@sweetoburrito/backstage-plugin-ai-assistant-backend-module-ingestor-azure-devops 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"repository.cjs.js","sources":["../../../src/services/ingestor/repository.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport { DEFAULT_FILE_TYPES } from '../../constants/default-file-types';\nimport {\n EmbeddingDocument,\n IngestorOptions,\n streamToString,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { AzureDevOpsService } from '../azure-devops';\nimport { Config } from '../../../config';\nimport { MODULE_ID } from '../../constants/module';\nimport { getProgressStats } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport { DEFAULT_REPO_FILE_BATCH_SIZE } from '../../constants/default-repo-file-batch-size';\nimport {\n GitItem,\n GitRepository,\n} from 'azure-devops-node-api/interfaces/GitInterfaces';\n\ntype RepositoryIngestorOptions = {\n config: RootConfigService;\n logger: LoggerService;\n azureDevOpsService: AzureDevOpsService;\n};\n\nexport const createRepositoryIngestor = async ({\n config,\n logger,\n azureDevOpsService,\n}: RepositoryIngestorOptions) => {\n // Get configuration values\n const repositoriesFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['azureDevOps']['repositories']\n >('aiAssistant.ingestors.azureDevOps.repositories');\n\n // Default to common file types if none are specified\n const fileTypes =\n config.getOptionalStringArray(\n 'aiAssistant.ingestors.azureDevOps.fileTypes',\n ) ?? DEFAULT_FILE_TYPES;\n\n // Get batch size for processing repository items (default to 50 items per batch)\n const itemsBatchSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestors.azureDevOps.filesBatchSize', // Reuse the same config for consistency\n ) ?? DEFAULT_REPO_FILE_BATCH_SIZE;\n\n /**\n * Ingest Azure DevOps repository items in batches\n * @param repository - The repository to ingest items from\n * @param items - The list of items to ingest from the repository\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns Total number of documents ingested and sent for embedding from the repository\n */\n const ingestRepoByFileBatch = async ({\n repository,\n items,\n saveDocumentsBatch,\n }: {\n repository: GitRepository;\n items: GitItem[];\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'];\n }) => {\n logger.info(\n `Processing ${items.length} items from repository \"${repository.name}\" in batches of ${itemsBatchSize}`,\n );\n\n logger.debug(`Items: ${JSON.stringify(items, null, 2)}`);\n\n let totalDocumentsIngested = 0;\n\n // Process items in batches to manage memory and performance\n const totalBatches = Math.ceil(items.length / itemsBatchSize);\n\n for (\n let batchStart = 0;\n batchStart < items.length;\n batchStart += itemsBatchSize\n ) {\n const batchEnd = Math.min(batchStart + itemsBatchSize, items.length);\n const itemsBatch = items.slice(batchStart, batchEnd);\n const batchNumber = Math.floor(batchStart / itemsBatchSize) + 1;\n\n logger.info(\n `Processing batch ${batchNumber}/${totalBatches} (${itemsBatch.length} items) for repository \"${repository.name}\"`,\n );\n\n // Generate embedding documents for each item in the current batch\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < itemsBatch.length; index++) {\n const item = itemsBatch[index];\n const globalIndex = batchStart + index;\n\n const content = await azureDevOpsService.getRepoItemContent(\n repository.id!,\n item.path!,\n );\n\n const completionStats = getProgressStats(globalIndex + 1, items.length);\n\n logger.info(\n `Retrieved content for Azure DevOps item: ${item.path} in repository: \"${repository.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of repository]`,\n );\n\n const text = await streamToString(content);\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${repository.id}:${item.path}`,\n url: item.url,\n organization: azureDevOpsService.organization,\n project: azureDevOpsService.project,\n repository: repository.name!,\n },\n content: text,\n };\n\n documents.push(document);\n }\n\n // Save the current batch of documents\n await saveDocumentsBatch(documents);\n\n totalDocumentsIngested += documents.length;\n\n logger.info(\n `Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for Azure DevOps repository: ${repository.name}`,\n );\n }\n\n return { totalDocumentsIngested };\n };\n\n /** Ingest Azure DevOps repositories in batches\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns void\n */\n const ingestRepositoriesBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const repositoriesList = await azureDevOpsService.getRepos();\n\n if (repositoriesList.length === 0) {\n logger.warn('No repositories found in the Azure DevOps project');\n return;\n }\n\n logger.info(\n `Filtering for repositories: ${repositoriesFilter\n ?.map(repo => repo.name)\n .join(', ')}`,\n );\n\n // Filter repositories if a filter is provided in the config\n const repositoriesToIngest = repositoriesFilter\n ? repositoriesList.filter(repo =>\n repositoriesFilter?.some(\n filteredRepo =>\n filteredRepo.name.toLowerCase() === repo.name!.toLowerCase(),\n ),\n )\n : repositoriesList;\n\n if (repositoriesToIngest.length === 0) {\n logger.warn(\n 'No repositories found for ingestion after applying the filter',\n );\n return;\n }\n\n logger.info(\n `Ingesting ${repositoriesToIngest.length} repositories from Azure DevOps`,\n );\n\n // Get items from each repository and create documents to be embedded\n for (const repo of repositoriesToIngest) {\n logger.info(\n `Beginning ingestion for repository: ${repo.name} (${repo.id})`,\n );\n\n // Determine the file types to use for this repository or use default\n const repositoryFileTypesFilter =\n repositoriesFilter?.find(\n r => r.name.toLowerCase() === repo.name!.toLowerCase(),\n )?.fileTypes ?? fileTypes;\n\n logger.info(\n `Processing file types for repository ${\n repo.name\n }: [${repositoryFileTypesFilter.join(', ')}]`,\n );\n\n // Get the items to be ingested from the repository based on the file types filter\n const items = await azureDevOpsService.getRepoItems(\n repo.id!,\n repositoryFileTypesFilter,\n );\n\n if (items.length === 0) {\n logger.warn(\n `No items found for ingestion in the Azure DevOps repository ${\n repo.name\n } (${\n repo.id\n }) with the specified file types filter: [${repositoryFileTypesFilter.join(\n ', ',\n )}]`,\n );\n continue;\n }\n\n const { totalDocumentsIngested } = await ingestRepoByFileBatch({\n repository: repo,\n items,\n saveDocumentsBatch,\n });\n\n if (totalDocumentsIngested === 0) {\n logger.warn(\n `No documents were ingested and sent for embedding from the Azure DevOps repository ${repo.name} (${repo.id})`,\n );\n continue;\n }\n\n logger.info(\n `Repository ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for Azure DevOps repository: ${repo.name}`,\n );\n }\n };\n\n return { ingestRepositoriesBatch };\n};\n"],"names":["DEFAULT_FILE_TYPES","DEFAULT_REPO_FILE_BATCH_SIZE","getProgressStats","streamToString","MODULE_ID"],"mappings":";;;;;;;;AA0BO,MAAM,2BAA2B,OAAO;AAAA,EAC7C,MAAA;AAAA,EACA,MAAA;AAAA,EACA;AACF,CAAA,KAAiC;AAE/B,EAAA,MAAM,kBAAA,GAAqB,MAAA,CAAO,WAAA,CAEhC,gDAAgD,CAAA;AAGlD,EAAA,MAAM,YACJ,MAAA,CAAO,sBAAA;AAAA,IACL;AAAA,GACF,IAAKA,mCAAA;AAGP,EAAA,MAAM,iBACJ,MAAA,CAAO,iBAAA;AAAA,IACL;AAAA;AAAA,GACF,IAAKC,qDAAA;AASP,EAAA,MAAM,wBAAwB,OAAO;AAAA,IACnC,UAAA;AAAA,IACA,KAAA;AAAA,IACA;AAAA,GACF,KAIM;AACJ,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,cAAc,KAAA,CAAM,MAAM,2BAA2B,UAAA,CAAW,IAAI,mBAAmB,cAAc,CAAA;AAAA,KACvG;AAEA,IAAA,MAAA,CAAO,KAAA,CAAM,UAAU,IAAA,CAAK,SAAA,CAAU,OAAO,IAAA,EAAM,CAAC,CAAC,CAAA,CAAE,CAAA;AAEvD,IAAA,IAAI,sBAAA,GAAyB,CAAA;AAG7B,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,SAAS,cAAc,CAAA;AAE5D,IAAA,KAAA,IACM,aAAa,CAAA,EACjB,UAAA,GAAa,KAAA,CAAM,MAAA,EACnB,cAAc,cAAA,EACd;AACA,MAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,UAAA,GAAa,cAAA,EAAgB,MAAM,MAAM,CAAA;AACnE,MAAA,MAAM,UAAA,GAAa,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,QAAQ,CAAA;AACnD,MAAA,MAAM,WAAA,GAAc,IAAA,CAAK,KAAA,CAAM,UAAA,GAAa,cAAc,CAAA,GAAI,CAAA;AAE9D,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iBAAA,EAAoB,WAAW,CAAA,CAAA,EAAI,YAAY,KAAK,UAAA,CAAW,MAAM,CAAA,wBAAA,EAA2B,UAAA,CAAW,IAAI,CAAA,CAAA;AAAA,OACjH;AAGA,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,UAAA,CAAW,QAAQ,KAAA,EAAA,EAAS;AACtD,QAAA,MAAM,IAAA,GAAO,WAAW,KAAK,CAAA;AAC7B,QAAA,MAAM,cAAc,UAAA,GAAa,KAAA;AAEjC,QAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,CAAmB,kBAAA;AAAA,UACvC,UAAA,CAAW,EAAA;AAAA,UACX,IAAA,CAAK;AAAA,SACP;AAEA,QAAA,MAAM,eAAA,GAAkBC,iDAAA,CAAiB,WAAA,GAAc,CAAA,EAAG,MAAM,MAAM,CAAA;AAEtE,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,yCAAA,EAA4C,IAAA,CAAK,IAAI,CAAA,iBAAA,EAAoB,WAAW,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,2BAAA;AAAA,SAC3L;AAEA,QAAA,MAAM,IAAA,GAAO,MAAMC,6CAAA,CAAe,OAAO,CAAA;AAEzC,QAAA,MAAM,QAAA,GAA8B;AAAA,UAClC,QAAA,EAAU;AAAA,YACR,MAAA,EAAQC,kBAAA;AAAA,YACR,IAAI,CAAA,EAAG,UAAA,CAAW,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,YACjC,KAAK,IAAA,CAAK,GAAA;AAAA,YACV,cAAc,kBAAA,CAAmB,YAAA;AAAA,YACjC,SAAS,kBAAA,CAAmB,OAAA;AAAA,YAC5B,YAAY,UAAA,CAAW;AAAA,WACzB;AAAA,UACA,OAAA,EAAS;AAAA,SACX;AAEA,QAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,MACzB;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,sBAAA,IAA0B,SAAA,CAAU,MAAA;AAEpC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,MAAA,EAAS,WAAW,CAAA,CAAA,EAAI,YAAY,eAAe,SAAA,CAAU,MAAM,CAAA,iDAAA,EAAoD,UAAA,CAAW,IAAI,CAAA;AAAA,OACxI;AAAA,IACF;AAEA,IAAA,OAAO,EAAE,sBAAA,EAAuB;AAAA,EAClC,CAAA;AAMA,EAAA,MAAM,uBAAA,GAA0B,OAC9B,kBAAA,KACG;AACH,IAAA,MAAM,gBAAA,GAAmB,MAAM,kBAAA,CAAmB,QAAA,EAAS;AAE3D,IAAA,IAAI,gBAAA,CAAiB,WAAW,CAAA,EAAG;AACjC,MAAA,MAAA,CAAO,KAAK,mDAAmD,CAAA;AAC/D,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,4BAAA,EAA+B,oBAC3B,GAAA,CAAI,CAAA,IAAA,KAAQ,KAAK,IAAI,CAAA,CACtB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAGA,IAAA,MAAM,oBAAA,GAAuB,qBACzB,gBAAA,CAAiB,MAAA;AAAA,MAAO,UACtB,kBAAA,EAAoB,IAAA;AAAA,QAClB,kBACE,YAAA,CAAa,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY;AAC/D,KACF,GACA,gBAAA;AAEJ,IAAA,IAAI,oBAAA,CAAqB,WAAW,CAAA,EAAG;AACrC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL;AAAA,OACF;AACA,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,UAAA,EAAa,qBAAqB,MAAM,CAAA,+BAAA;AAAA,KAC1C;AAGA,IAAA,KAAA,MAAW,QAAQ,oBAAA,EAAsB;AACvC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,oCAAA,EAAuC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,OAC9D;AAGA,MAAA,MAAM,4BACJ,kBAAA,EAAoB,IAAA;AAAA,QAClB,OAAK,CAAA,CAAE,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY,SACpD,SAAA,IAAa,SAAA;AAElB,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,wCACE,IAAA,CAAK,IACP,MAAM,yBAAA,CAA0B,IAAA,CAAK,IAAI,CAAC,CAAA,CAAA;AAAA,OAC5C;AAGA,MAAA,MAAM,KAAA,GAAQ,MAAM,kBAAA,CAAmB,YAAA;AAAA,QACrC,IAAA,CAAK,EAAA;AAAA,QACL;AAAA,OACF;AAEA,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,+DACE,IAAA,CAAK,IACP,KACE,IAAA,CAAK,EACP,4CAA4C,yBAAA,CAA0B,IAAA;AAAA,YACpE;AAAA,WACD,CAAA,CAAA;AAAA,SACH;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAM,EAAE,sBAAA,EAAuB,GAAI,MAAM,qBAAA,CAAsB;AAAA,QAC7D,UAAA,EAAY,IAAA;AAAA,QACZ,KAAA;AAAA,QACA;AAAA,OACD,CAAA;AAED,MAAA,IAAI,2BAA2B,CAAA,EAAG;AAChC,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,mFAAA,EAAsF,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SAC7G;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,gCAAA,EAAmC,sBAAsB,CAAA,8EAAA,EAAiF,IAAA,CAAK,IAAI,CAAA;AAAA,OACrJ;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,OAAO,EAAE,uBAAA,EAAwB;AACnC;;;;"}
|
|
1
|
+
{"version":3,"file":"repository.cjs.js","sources":["../../../src/services/ingestor/repository.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport { DEFAULT_FILE_TYPES } from '../../constants/default-file-types';\nimport {\n EmbeddingDocument,\n IngestorOptions,\n streamToString,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { AzureDevOpsService } from '../azure-devops';\nimport { Config } from '../../../config';\nimport { MODULE_ID } from '../../constants/module';\nimport { getProgressStats } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport { DEFAULT_REPO_FILE_BATCH_SIZE } from '../../constants/default-repo-file-batch-size';\nimport {\n GitItem,\n GitRepository,\n} from 'azure-devops-node-api/interfaces/GitInterfaces';\n\ntype RepositoryIngestorOptions = {\n config: RootConfigService;\n logger: LoggerService;\n azureDevOpsService: AzureDevOpsService;\n};\n\nexport const createRepositoryIngestor = async ({\n config,\n logger,\n azureDevOpsService,\n}: RepositoryIngestorOptions) => {\n // Get configuration values\n const repositoriesFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['azureDevOps']['repositories']\n >('aiAssistant.ingestors.azureDevOps.repositories');\n\n // Default to common file types if none are specified\n const fileTypes =\n config.getOptionalStringArray(\n 'aiAssistant.ingestors.azureDevOps.fileTypes',\n ) ?? DEFAULT_FILE_TYPES;\n\n // Get batch size for processing repository items (default to 50 items per batch)\n const itemsBatchSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestors.azureDevOps.filesBatchSize', // Reuse the same config for consistency\n ) ?? DEFAULT_REPO_FILE_BATCH_SIZE;\n\n /**\n * Ingest Azure DevOps repository items in batches\n * @param repository - The repository to ingest items from\n * @param items - The list of items to ingest from the repository\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns Total number of documents ingested and sent for embedding from the repository\n */\n const ingestRepoByFileBatch = async ({\n repository,\n items,\n saveDocumentsBatch,\n }: {\n repository: GitRepository;\n items: GitItem[];\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'];\n }) => {\n logger.info(\n `Processing ${items.length} items from repository \"${repository.name}\" in batches of ${itemsBatchSize}`,\n );\n\n logger.debug(`Items: ${JSON.stringify(items, null, 2)}`);\n\n let totalDocumentsIngested = 0;\n\n // Process items in batches to manage memory and performance\n const totalBatches = Math.ceil(items.length / itemsBatchSize);\n\n for (\n let batchStart = 0;\n batchStart < items.length;\n batchStart += itemsBatchSize\n ) {\n const batchEnd = Math.min(batchStart + itemsBatchSize, items.length);\n const itemsBatch = items.slice(batchStart, batchEnd);\n const batchNumber = Math.floor(batchStart / itemsBatchSize) + 1;\n\n logger.info(\n `Processing batch ${batchNumber}/${totalBatches} (${itemsBatch.length} items) for repository \"${repository.name}\"`,\n );\n\n // Generate embedding documents for each item in the current batch\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < itemsBatch.length; index++) {\n const item = itemsBatch[index];\n const globalIndex = batchStart + index;\n\n const content = await azureDevOpsService.getRepoItemContent(\n repository.id!,\n item.path!,\n );\n\n const completionStats = getProgressStats(globalIndex + 1, items.length);\n\n logger.info(\n `Retrieved content for Azure DevOps item: ${item.path} in repository: \"${repository.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of repository]`,\n );\n\n const text = await streamToString(content);\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${repository.id}:${item.path}`,\n url: item.url!,\n organization: azureDevOpsService.organization,\n project: azureDevOpsService.project,\n repository: repository.name!,\n },\n content: text,\n };\n\n documents.push(document);\n }\n\n // Save the current batch of documents\n await saveDocumentsBatch(documents);\n\n totalDocumentsIngested += documents.length;\n\n logger.info(\n `Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for Azure DevOps repository: ${repository.name}`,\n );\n }\n\n return { totalDocumentsIngested };\n };\n\n /** Ingest Azure DevOps repositories in batches\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns void\n */\n const ingestRepositoriesBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const repositoriesList = await azureDevOpsService.getRepos();\n\n if (repositoriesList.length === 0) {\n logger.warn('No repositories found in the Azure DevOps project');\n return;\n }\n\n logger.info(\n `Filtering for repositories: ${repositoriesFilter\n ?.map(repo => repo.name)\n .join(', ')}`,\n );\n\n // Filter repositories if a filter is provided in the config\n const repositoriesToIngest = repositoriesFilter\n ? repositoriesList.filter(repo =>\n repositoriesFilter?.some(\n filteredRepo =>\n filteredRepo.name.toLowerCase() === repo.name!.toLowerCase(),\n ),\n )\n : repositoriesList;\n\n if (repositoriesToIngest.length === 0) {\n logger.warn(\n 'No repositories found for ingestion after applying the filter',\n );\n return;\n }\n\n logger.info(\n `Ingesting ${repositoriesToIngest.length} repositories from Azure DevOps`,\n );\n\n // Get items from each repository and create documents to be embedded\n for (const repo of repositoriesToIngest) {\n logger.info(\n `Beginning ingestion for repository: ${repo.name} (${repo.id})`,\n );\n\n // Determine the file types to use for this repository or use default\n const repositoryFileTypesFilter =\n repositoriesFilter?.find(\n r => r.name.toLowerCase() === repo.name!.toLowerCase(),\n )?.fileTypes ?? fileTypes;\n\n logger.info(\n `Processing file types for repository ${\n repo.name\n }: [${repositoryFileTypesFilter.join(', ')}]`,\n );\n\n // Get the items to be ingested from the repository based on the file types filter\n const items = await azureDevOpsService.getRepoItems(\n repo.id!,\n repositoryFileTypesFilter,\n );\n\n if (items.length === 0) {\n logger.warn(\n `No items found for ingestion in the Azure DevOps repository ${\n repo.name\n } (${\n repo.id\n }) with the specified file types filter: [${repositoryFileTypesFilter.join(\n ', ',\n )}]`,\n );\n continue;\n }\n\n const { totalDocumentsIngested } = await ingestRepoByFileBatch({\n repository: repo,\n items,\n saveDocumentsBatch,\n });\n\n if (totalDocumentsIngested === 0) {\n logger.warn(\n `No documents were ingested and sent for embedding from the Azure DevOps repository ${repo.name} (${repo.id})`,\n );\n continue;\n }\n\n logger.info(\n `Repository ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for Azure DevOps repository: ${repo.name}`,\n );\n }\n };\n\n return { ingestRepositoriesBatch };\n};\n"],"names":["DEFAULT_FILE_TYPES","DEFAULT_REPO_FILE_BATCH_SIZE","getProgressStats","streamToString","MODULE_ID"],"mappings":";;;;;;;;AA0BO,MAAM,2BAA2B,OAAO;AAAA,EAC7C,MAAA;AAAA,EACA,MAAA;AAAA,EACA;AACF,CAAA,KAAiC;AAE/B,EAAA,MAAM,kBAAA,GAAqB,MAAA,CAAO,WAAA,CAEhC,gDAAgD,CAAA;AAGlD,EAAA,MAAM,YACJ,MAAA,CAAO,sBAAA;AAAA,IACL;AAAA,GACF,IAAKA,mCAAA;AAGP,EAAA,MAAM,iBACJ,MAAA,CAAO,iBAAA;AAAA,IACL;AAAA;AAAA,GACF,IAAKC,qDAAA;AASP,EAAA,MAAM,wBAAwB,OAAO;AAAA,IACnC,UAAA;AAAA,IACA,KAAA;AAAA,IACA;AAAA,GACF,KAIM;AACJ,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,cAAc,KAAA,CAAM,MAAM,2BAA2B,UAAA,CAAW,IAAI,mBAAmB,cAAc,CAAA;AAAA,KACvG;AAEA,IAAA,MAAA,CAAO,KAAA,CAAM,UAAU,IAAA,CAAK,SAAA,CAAU,OAAO,IAAA,EAAM,CAAC,CAAC,CAAA,CAAE,CAAA;AAEvD,IAAA,IAAI,sBAAA,GAAyB,CAAA;AAG7B,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,SAAS,cAAc,CAAA;AAE5D,IAAA,KAAA,IACM,aAAa,CAAA,EACjB,UAAA,GAAa,KAAA,CAAM,MAAA,EACnB,cAAc,cAAA,EACd;AACA,MAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,UAAA,GAAa,cAAA,EAAgB,MAAM,MAAM,CAAA;AACnE,MAAA,MAAM,UAAA,GAAa,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,QAAQ,CAAA;AACnD,MAAA,MAAM,WAAA,GAAc,IAAA,CAAK,KAAA,CAAM,UAAA,GAAa,cAAc,CAAA,GAAI,CAAA;AAE9D,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iBAAA,EAAoB,WAAW,CAAA,CAAA,EAAI,YAAY,KAAK,UAAA,CAAW,MAAM,CAAA,wBAAA,EAA2B,UAAA,CAAW,IAAI,CAAA,CAAA;AAAA,OACjH;AAGA,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,UAAA,CAAW,QAAQ,KAAA,EAAA,EAAS;AACtD,QAAA,MAAM,IAAA,GAAO,WAAW,KAAK,CAAA;AAC7B,QAAA,MAAM,cAAc,UAAA,GAAa,KAAA;AAEjC,QAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,CAAmB,kBAAA;AAAA,UACvC,UAAA,CAAW,EAAA;AAAA,UACX,IAAA,CAAK;AAAA,SACP;AAEA,QAAA,MAAM,eAAA,GAAkBC,iDAAA,CAAiB,WAAA,GAAc,CAAA,EAAG,MAAM,MAAM,CAAA;AAEtE,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,yCAAA,EAA4C,IAAA,CAAK,IAAI,CAAA,iBAAA,EAAoB,WAAW,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,2BAAA;AAAA,SAC3L;AAEA,QAAA,MAAM,IAAA,GAAO,MAAMC,6CAAA,CAAe,OAAO,CAAA;AAEzC,QAAA,MAAM,QAAA,GAA8B;AAAA,UAClC,QAAA,EAAU;AAAA,YACR,MAAA,EAAQC,kBAAA;AAAA,YACR,IAAI,CAAA,EAAG,UAAA,CAAW,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,YACjC,KAAK,IAAA,CAAK,GAAA;AAAA,YACV,cAAc,kBAAA,CAAmB,YAAA;AAAA,YACjC,SAAS,kBAAA,CAAmB,OAAA;AAAA,YAC5B,YAAY,UAAA,CAAW;AAAA,WACzB;AAAA,UACA,OAAA,EAAS;AAAA,SACX;AAEA,QAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,MACzB;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,sBAAA,IAA0B,SAAA,CAAU,MAAA;AAEpC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,MAAA,EAAS,WAAW,CAAA,CAAA,EAAI,YAAY,eAAe,SAAA,CAAU,MAAM,CAAA,iDAAA,EAAoD,UAAA,CAAW,IAAI,CAAA;AAAA,OACxI;AAAA,IACF;AAEA,IAAA,OAAO,EAAE,sBAAA,EAAuB;AAAA,EAClC,CAAA;AAMA,EAAA,MAAM,uBAAA,GAA0B,OAC9B,kBAAA,KACG;AACH,IAAA,MAAM,gBAAA,GAAmB,MAAM,kBAAA,CAAmB,QAAA,EAAS;AAE3D,IAAA,IAAI,gBAAA,CAAiB,WAAW,CAAA,EAAG;AACjC,MAAA,MAAA,CAAO,KAAK,mDAAmD,CAAA;AAC/D,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,4BAAA,EAA+B,oBAC3B,GAAA,CAAI,CAAA,IAAA,KAAQ,KAAK,IAAI,CAAA,CACtB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAGA,IAAA,MAAM,oBAAA,GAAuB,qBACzB,gBAAA,CAAiB,MAAA;AAAA,MAAO,UACtB,kBAAA,EAAoB,IAAA;AAAA,QAClB,kBACE,YAAA,CAAa,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY;AAC/D,KACF,GACA,gBAAA;AAEJ,IAAA,IAAI,oBAAA,CAAqB,WAAW,CAAA,EAAG;AACrC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL;AAAA,OACF;AACA,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,UAAA,EAAa,qBAAqB,MAAM,CAAA,+BAAA;AAAA,KAC1C;AAGA,IAAA,KAAA,MAAW,QAAQ,oBAAA,EAAsB;AACvC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,oCAAA,EAAuC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,OAC9D;AAGA,MAAA,MAAM,4BACJ,kBAAA,EAAoB,IAAA;AAAA,QAClB,OAAK,CAAA,CAAE,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY,SACpD,SAAA,IAAa,SAAA;AAElB,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,wCACE,IAAA,CAAK,IACP,MAAM,yBAAA,CAA0B,IAAA,CAAK,IAAI,CAAC,CAAA,CAAA;AAAA,OAC5C;AAGA,MAAA,MAAM,KAAA,GAAQ,MAAM,kBAAA,CAAmB,YAAA;AAAA,QACrC,IAAA,CAAK,EAAA;AAAA,QACL;AAAA,OACF;AAEA,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,+DACE,IAAA,CAAK,IACP,KACE,IAAA,CAAK,EACP,4CAA4C,yBAAA,CAA0B,IAAA;AAAA,YACpE;AAAA,WACD,CAAA,CAAA;AAAA,SACH;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAM,EAAE,sBAAA,EAAuB,GAAI,MAAM,qBAAA,CAAsB;AAAA,QAC7D,UAAA,EAAY,IAAA;AAAA,QACZ,KAAA;AAAA,QACA;AAAA,OACD,CAAA;AAED,MAAA,IAAI,2BAA2B,CAAA,EAAG;AAChC,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,mFAAA,EAAsF,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SAC7G;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,gCAAA,EAAmC,sBAAsB,CAAA,8EAAA,EAAiF,IAAA,CAAK,IAAI,CAAA;AAAA,OACrJ;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,OAAO,EAAE,uBAAA,EAAwB;AACnC;;;;"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"wiki.cjs.js","sources":["../../../src/services/ingestor/wiki.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport {\n EmbeddingDocument,\n IngestorOptions,\n streamToString,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { AzureDevOpsService } from '../azure-devops';\nimport { Config } from '../../../config';\nimport { MODULE_ID } from '../../constants/module';\nimport { getProgressStats } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport { DEFAULT_WIKI_PAGE_BATCH_SIZE } from '../../constants/default-wiki-page-batch-size';\nimport {\n WikiPage,\n WikiV2,\n} from 'azure-devops-node-api/interfaces/WikiInterfaces';\n\ntype WikiIngestorOptions = {\n config: RootConfigService;\n logger: LoggerService;\n azureDevOpsService: AzureDevOpsService;\n};\n\nexport const createWikiIngestor = async ({\n config,\n logger,\n azureDevOpsService,\n}: WikiIngestorOptions) => {\n // Get configuration values\n const wikisFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['azureDevOps']['wikis']\n >('aiAssistant.ingestors.azureDevOps.wikis');\n\n // Get batch size for processing pages (default to 50 pages per batch)\n const pagesBatchSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestors.azureDevOps.pagesBatchSize',\n ) ?? DEFAULT_WIKI_PAGE_BATCH_SIZE;\n\n /** Ingest Azure DevOps wiki pages in batches\n * @param wiki - The wiki to ingest pages from\n * @param pages - The list of pages to ingest from the wiki\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns Total number of documents ingested and sent for embedding from the wiki\n */\n const ingestWikiByPageBatch = async ({\n wiki,\n pages,\n saveDocumentsBatch,\n }: {\n wiki: WikiV2;\n pages: WikiPage[];\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'];\n }) => {\n logger.info(\n `Processing ${pages.length} pages from wiki \"${wiki.name}\" in batches of ${pagesBatchSize}`,\n );\n\n let totalDocumentsIngested = 0;\n\n // Process pages in batches to manage memory and performance\n\n // Calculate total number of batches\n const totalBatches = Math.ceil(pages.length / pagesBatchSize);\n\n // Process each batch\n for (\n let batchStart = 0;\n batchStart < pages.length;\n batchStart += pagesBatchSize\n ) {\n const batchEnd = Math.min(batchStart + pagesBatchSize, pages.length);\n const pagesBatch = pages.slice(batchStart, batchEnd);\n const batchNumber = Math.floor(batchStart / pagesBatchSize) + 1;\n\n logger.info(\n `Processing batch ${batchNumber}/${totalBatches} (${pagesBatch.length} pages) for wiki \"${wiki.name}\"`,\n );\n\n // Generate embedding documents for each page in the current batch\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < pagesBatch.length; index++) {\n const page = pagesBatch[index];\n const globalIndex = batchStart + index;\n\n const content = await azureDevOpsService.getWikiPageContent(\n wiki.id!,\n page.id!,\n );\n\n const completionStats = getProgressStats(globalIndex + 1, pages.length);\n\n logger.info(\n `Retrieved content for Azure DevOps page: \"${page.path}\" in wiki: \"${wiki.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of wiki]`,\n );\n\n const text = await streamToString(content);\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${wiki.id}:${page.path}`,\n url: page.url,\n organization: azureDevOpsService.organization,\n project: azureDevOpsService.project,\n wiki: wiki.name!,\n },\n content: text,\n };\n\n documents.push(document);\n }\n\n // Save the current batch of documents\n await saveDocumentsBatch(documents);\n\n totalDocumentsIngested += documents.length;\n\n logger.info(\n `Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for Azure DevOps wiki: ${wiki.name}`,\n );\n }\n\n return { totalDocumentsIngested };\n };\n\n /** Ingest Azure DevOps wikis in batches */\n const ingestWikisBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const wikisList = await azureDevOpsService.getWikis();\n\n if (wikisList.length === 0) {\n logger.warn('No wikis found in the Azure DevOps project');\n return;\n }\n\n logger.info(\n `Filtering for wikis: ${wikisFilter?.map(repo => repo.name).join(', ')}`,\n );\n\n // Filter wikis if a filter is provided in the config\n const wikisToIngest = wikisFilter\n ? wikisList.filter(wiki =>\n wikisFilter?.some(\n filteredWiki =>\n filteredWiki.name.toLowerCase() === wiki.name!.toLowerCase(),\n ),\n )\n : wikisList;\n\n if (wikisToIngest.length === 0) {\n logger.warn('No wikis found for ingestion after applying the filter');\n return;\n }\n\n logger.info(`Ingesting ${wikisToIngest.length} wikis from Azure DevOps`);\n\n // Get items from each wiki and create documents to be embedded\n for (const wiki of wikisToIngest) {\n logger.info(`Beginning ingestion for wiki: ${wiki.name} (${wiki.id})`);\n\n // Get the pages to be ingested from the wiki based on the file types filter\n const pages = await azureDevOpsService.getWikiPages(wiki.id!);\n\n if (pages.length === 0) {\n logger.warn(\n `No pages found for ingestion in the Azure DevOps wiki ${wiki.name} (${wiki.id})`,\n );\n continue;\n }\n\n const { totalDocumentsIngested } = await ingestWikiByPageBatch({\n wiki,\n pages,\n saveDocumentsBatch,\n });\n\n if (totalDocumentsIngested === 0) {\n logger.warn(\n `No documents were ingested and sent for embedding from the Azure DevOps wiki ${wiki.name} (${wiki.id})`,\n );\n continue;\n }\n\n logger.info(\n `Wiki ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for Azure DevOps wiki: ${wiki.name}`,\n );\n }\n };\n\n return { ingestWikisBatch };\n};\n"],"names":["DEFAULT_WIKI_PAGE_BATCH_SIZE","getProgressStats","streamToString","MODULE_ID"],"mappings":";;;;;;;AAyBO,MAAM,qBAAqB,OAAO;AAAA,EACvC,MAAA;AAAA,EACA,MAAA;AAAA,EACA;AACF,CAAA,KAA2B;AAEzB,EAAA,MAAM,WAAA,GAAc,MAAA,CAAO,WAAA,CAEzB,yCAAyC,CAAA;AAG3C,EAAA,MAAM,iBACJ,MAAA,CAAO,iBAAA;AAAA,IACL;AAAA,GACF,IAAKA,qDAAA;AAQP,EAAA,MAAM,wBAAwB,OAAO;AAAA,IACnC,IAAA;AAAA,IACA,KAAA;AAAA,IACA;AAAA,GACF,KAIM;AACJ,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,cAAc,KAAA,CAAM,MAAM,qBAAqB,IAAA,CAAK,IAAI,mBAAmB,cAAc,CAAA;AAAA,KAC3F;AAEA,IAAA,IAAI,sBAAA,GAAyB,CAAA;AAK7B,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,SAAS,cAAc,CAAA;AAG5D,IAAA,KAAA,IACM,aAAa,CAAA,EACjB,UAAA,GAAa,KAAA,CAAM,MAAA,EACnB,cAAc,cAAA,EACd;AACA,MAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,UAAA,GAAa,cAAA,EAAgB,MAAM,MAAM,CAAA;AACnE,MAAA,MAAM,UAAA,GAAa,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,QAAQ,CAAA;AACnD,MAAA,MAAM,WAAA,GAAc,IAAA,CAAK,KAAA,CAAM,UAAA,GAAa,cAAc,CAAA,GAAI,CAAA;AAE9D,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iBAAA,EAAoB,WAAW,CAAA,CAAA,EAAI,YAAY,KAAK,UAAA,CAAW,MAAM,CAAA,kBAAA,EAAqB,IAAA,CAAK,IAAI,CAAA,CAAA;AAAA,OACrG;AAGA,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,UAAA,CAAW,QAAQ,KAAA,EAAA,EAAS;AACtD,QAAA,MAAM,IAAA,GAAO,WAAW,KAAK,CAAA;AAC7B,QAAA,MAAM,cAAc,UAAA,GAAa,KAAA;AAEjC,QAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,CAAmB,kBAAA;AAAA,UACvC,IAAA,CAAK,EAAA;AAAA,UACL,IAAA,CAAK;AAAA,SACP;AAEA,QAAA,MAAM,eAAA,GAAkBC,iDAAA,CAAiB,WAAA,GAAc,CAAA,EAAG,MAAM,MAAM,CAAA;AAEtE,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,0CAAA,EAA6C,IAAA,CAAK,IAAI,CAAA,YAAA,EAAe,KAAK,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,qBAAA;AAAA,SACjL;AAEA,QAAA,MAAM,IAAA,GAAO,MAAMC,6CAAA,CAAe,OAAO,CAAA;AAEzC,QAAA,MAAM,QAAA,GAA8B;AAAA,UAClC,QAAA,EAAU;AAAA,YACR,MAAA,EAAQC,kBAAA;AAAA,YACR,IAAI,CAAA,EAAG,IAAA,CAAK,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,YAC3B,KAAK,IAAA,CAAK,GAAA;AAAA,YACV,cAAc,kBAAA,CAAmB,YAAA;AAAA,YACjC,SAAS,kBAAA,CAAmB,OAAA;AAAA,YAC5B,MAAM,IAAA,CAAK;AAAA,WACb;AAAA,UACA,OAAA,EAAS;AAAA,SACX;AAEA,QAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,MACzB;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,sBAAA,IAA0B,SAAA,CAAU,MAAA;AAEpC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,MAAA,EAAS,WAAW,CAAA,CAAA,EAAI,YAAY,eAAe,SAAA,CAAU,MAAM,CAAA,2CAAA,EAA8C,IAAA,CAAK,IAAI,CAAA;AAAA,OAC5H;AAAA,IACF;AAEA,IAAA,OAAO,EAAE,sBAAA,EAAuB;AAAA,EAClC,CAAA;AAGA,EAAA,MAAM,gBAAA,GAAmB,OACvB,kBAAA,KACG;AACH,IAAA,MAAM,SAAA,GAAY,MAAM,kBAAA,CAAmB,QAAA,EAAS;AAEpD,IAAA,IAAI,SAAA,CAAU,WAAW,CAAA,EAAG;AAC1B,MAAA,MAAA,CAAO,KAAK,4CAA4C,CAAA;AACxD,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,qBAAA,EAAwB,aAAa,GAAA,CAAI,CAAA,IAAA,KAAQ,KAAK,IAAI,CAAA,CAAE,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACxE;AAGA,IAAA,MAAM,aAAA,GAAgB,cAClB,SAAA,CAAU,MAAA;AAAA,MAAO,UACf,WAAA,EAAa,IAAA;AAAA,QACX,kBACE,YAAA,CAAa,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY;AAC/D,KACF,GACA,SAAA;AAEJ,IAAA,IAAI,aAAA,CAAc,WAAW,CAAA,EAAG;AAC9B,MAAA,MAAA,CAAO,KAAK,wDAAwD,CAAA;AACpE,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA,CAAK,CAAA,UAAA,EAAa,aAAA,CAAc,MAAM,CAAA,wBAAA,CAA0B,CAAA;AAGvE,IAAA,KAAA,MAAW,QAAQ,aAAA,EAAe;AAChC,MAAA,MAAA,CAAO,KAAK,CAAA,8BAAA,EAAiC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,IAAA,CAAK,EAAE,CAAA,CAAA,CAAG,CAAA;AAGrE,MAAA,MAAM,KAAA,GAAQ,MAAM,kBAAA,CAAmB,YAAA,CAAa,KAAK,EAAG,CAAA;AAE5D,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,sDAAA,EAAyD,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SAChF;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAM,EAAE,sBAAA,EAAuB,GAAI,MAAM,qBAAA,CAAsB;AAAA,QAC7D,IAAA;AAAA,QACA,KAAA;AAAA,QACA;AAAA,OACD,CAAA;AAED,MAAA,IAAI,2BAA2B,CAAA,EAAG;AAChC,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,6EAAA,EAAgF,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SACvG;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,0BAAA,EAA6B,sBAAsB,CAAA,wEAAA,EAA2E,IAAA,CAAK,IAAI,CAAA;AAAA,OACzI;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,OAAO,EAAE,gBAAA,EAAiB;AAC5B;;;;"}
|
|
1
|
+
{"version":3,"file":"wiki.cjs.js","sources":["../../../src/services/ingestor/wiki.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport {\n EmbeddingDocument,\n IngestorOptions,\n streamToString,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { AzureDevOpsService } from '../azure-devops';\nimport { Config } from '../../../config';\nimport { MODULE_ID } from '../../constants/module';\nimport { getProgressStats } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport { DEFAULT_WIKI_PAGE_BATCH_SIZE } from '../../constants/default-wiki-page-batch-size';\nimport {\n WikiPage,\n WikiV2,\n} from 'azure-devops-node-api/interfaces/WikiInterfaces';\n\ntype WikiIngestorOptions = {\n config: RootConfigService;\n logger: LoggerService;\n azureDevOpsService: AzureDevOpsService;\n};\n\nexport const createWikiIngestor = async ({\n config,\n logger,\n azureDevOpsService,\n}: WikiIngestorOptions) => {\n // Get configuration values\n const wikisFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['azureDevOps']['wikis']\n >('aiAssistant.ingestors.azureDevOps.wikis');\n\n // Get batch size for processing pages (default to 50 pages per batch)\n const pagesBatchSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestors.azureDevOps.pagesBatchSize',\n ) ?? DEFAULT_WIKI_PAGE_BATCH_SIZE;\n\n /** Ingest Azure DevOps wiki pages in batches\n * @param wiki - The wiki to ingest pages from\n * @param pages - The list of pages to ingest from the wiki\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns Total number of documents ingested and sent for embedding from the wiki\n */\n const ingestWikiByPageBatch = async ({\n wiki,\n pages,\n saveDocumentsBatch,\n }: {\n wiki: WikiV2;\n pages: WikiPage[];\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'];\n }) => {\n logger.info(\n `Processing ${pages.length} pages from wiki \"${wiki.name}\" in batches of ${pagesBatchSize}`,\n );\n\n let totalDocumentsIngested = 0;\n\n // Process pages in batches to manage memory and performance\n\n // Calculate total number of batches\n const totalBatches = Math.ceil(pages.length / pagesBatchSize);\n\n // Process each batch\n for (\n let batchStart = 0;\n batchStart < pages.length;\n batchStart += pagesBatchSize\n ) {\n const batchEnd = Math.min(batchStart + pagesBatchSize, pages.length);\n const pagesBatch = pages.slice(batchStart, batchEnd);\n const batchNumber = Math.floor(batchStart / pagesBatchSize) + 1;\n\n logger.info(\n `Processing batch ${batchNumber}/${totalBatches} (${pagesBatch.length} pages) for wiki \"${wiki.name}\"`,\n );\n\n // Generate embedding documents for each page in the current batch\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < pagesBatch.length; index++) {\n const page = pagesBatch[index];\n const globalIndex = batchStart + index;\n\n const content = await azureDevOpsService.getWikiPageContent(\n wiki.id!,\n page.id!,\n );\n\n const completionStats = getProgressStats(globalIndex + 1, pages.length);\n\n logger.info(\n `Retrieved content for Azure DevOps page: \"${page.path}\" in wiki: \"${wiki.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of wiki]`,\n );\n\n const text = await streamToString(content);\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${wiki.id}:${page.path}`,\n url: page.url!,\n organization: azureDevOpsService.organization,\n project: azureDevOpsService.project,\n wiki: wiki.name!,\n },\n content: text,\n };\n\n documents.push(document);\n }\n\n // Save the current batch of documents\n await saveDocumentsBatch(documents);\n\n totalDocumentsIngested += documents.length;\n\n logger.info(\n `Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for Azure DevOps wiki: ${wiki.name}`,\n );\n }\n\n return { totalDocumentsIngested };\n };\n\n /** Ingest Azure DevOps wikis in batches */\n const ingestWikisBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const wikisList = await azureDevOpsService.getWikis();\n\n if (wikisList.length === 0) {\n logger.warn('No wikis found in the Azure DevOps project');\n return;\n }\n\n logger.info(\n `Filtering for wikis: ${wikisFilter?.map(repo => repo.name).join(', ')}`,\n );\n\n // Filter wikis if a filter is provided in the config\n const wikisToIngest = wikisFilter\n ? wikisList.filter(wiki =>\n wikisFilter?.some(\n filteredWiki =>\n filteredWiki.name.toLowerCase() === wiki.name!.toLowerCase(),\n ),\n )\n : wikisList;\n\n if (wikisToIngest.length === 0) {\n logger.warn('No wikis found for ingestion after applying the filter');\n return;\n }\n\n logger.info(`Ingesting ${wikisToIngest.length} wikis from Azure DevOps`);\n\n // Get items from each wiki and create documents to be embedded\n for (const wiki of wikisToIngest) {\n logger.info(`Beginning ingestion for wiki: ${wiki.name} (${wiki.id})`);\n\n // Get the pages to be ingested from the wiki based on the file types filter\n const pages = await azureDevOpsService.getWikiPages(wiki.id!);\n\n if (pages.length === 0) {\n logger.warn(\n `No pages found for ingestion in the Azure DevOps wiki ${wiki.name} (${wiki.id})`,\n );\n continue;\n }\n\n const { totalDocumentsIngested } = await ingestWikiByPageBatch({\n wiki,\n pages,\n saveDocumentsBatch,\n });\n\n if (totalDocumentsIngested === 0) {\n logger.warn(\n `No documents were ingested and sent for embedding from the Azure DevOps wiki ${wiki.name} (${wiki.id})`,\n );\n continue;\n }\n\n logger.info(\n `Wiki ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for Azure DevOps wiki: ${wiki.name}`,\n );\n }\n };\n\n return { ingestWikisBatch };\n};\n"],"names":["DEFAULT_WIKI_PAGE_BATCH_SIZE","getProgressStats","streamToString","MODULE_ID"],"mappings":";;;;;;;AAyBO,MAAM,qBAAqB,OAAO;AAAA,EACvC,MAAA;AAAA,EACA,MAAA;AAAA,EACA;AACF,CAAA,KAA2B;AAEzB,EAAA,MAAM,WAAA,GAAc,MAAA,CAAO,WAAA,CAEzB,yCAAyC,CAAA;AAG3C,EAAA,MAAM,iBACJ,MAAA,CAAO,iBAAA;AAAA,IACL;AAAA,GACF,IAAKA,qDAAA;AAQP,EAAA,MAAM,wBAAwB,OAAO;AAAA,IACnC,IAAA;AAAA,IACA,KAAA;AAAA,IACA;AAAA,GACF,KAIM;AACJ,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,cAAc,KAAA,CAAM,MAAM,qBAAqB,IAAA,CAAK,IAAI,mBAAmB,cAAc,CAAA;AAAA,KAC3F;AAEA,IAAA,IAAI,sBAAA,GAAyB,CAAA;AAK7B,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,SAAS,cAAc,CAAA;AAG5D,IAAA,KAAA,IACM,aAAa,CAAA,EACjB,UAAA,GAAa,KAAA,CAAM,MAAA,EACnB,cAAc,cAAA,EACd;AACA,MAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,UAAA,GAAa,cAAA,EAAgB,MAAM,MAAM,CAAA;AACnE,MAAA,MAAM,UAAA,GAAa,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,QAAQ,CAAA;AACnD,MAAA,MAAM,WAAA,GAAc,IAAA,CAAK,KAAA,CAAM,UAAA,GAAa,cAAc,CAAA,GAAI,CAAA;AAE9D,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iBAAA,EAAoB,WAAW,CAAA,CAAA,EAAI,YAAY,KAAK,UAAA,CAAW,MAAM,CAAA,kBAAA,EAAqB,IAAA,CAAK,IAAI,CAAA,CAAA;AAAA,OACrG;AAGA,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,UAAA,CAAW,QAAQ,KAAA,EAAA,EAAS;AACtD,QAAA,MAAM,IAAA,GAAO,WAAW,KAAK,CAAA;AAC7B,QAAA,MAAM,cAAc,UAAA,GAAa,KAAA;AAEjC,QAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,CAAmB,kBAAA;AAAA,UACvC,IAAA,CAAK,EAAA;AAAA,UACL,IAAA,CAAK;AAAA,SACP;AAEA,QAAA,MAAM,eAAA,GAAkBC,iDAAA,CAAiB,WAAA,GAAc,CAAA,EAAG,MAAM,MAAM,CAAA;AAEtE,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,0CAAA,EAA6C,IAAA,CAAK,IAAI,CAAA,YAAA,EAAe,KAAK,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,qBAAA;AAAA,SACjL;AAEA,QAAA,MAAM,IAAA,GAAO,MAAMC,6CAAA,CAAe,OAAO,CAAA;AAEzC,QAAA,MAAM,QAAA,GAA8B;AAAA,UAClC,QAAA,EAAU;AAAA,YACR,MAAA,EAAQC,kBAAA;AAAA,YACR,IAAI,CAAA,EAAG,IAAA,CAAK,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,YAC3B,KAAK,IAAA,CAAK,GAAA;AAAA,YACV,cAAc,kBAAA,CAAmB,YAAA;AAAA,YACjC,SAAS,kBAAA,CAAmB,OAAA;AAAA,YAC5B,MAAM,IAAA,CAAK;AAAA,WACb;AAAA,UACA,OAAA,EAAS;AAAA,SACX;AAEA,QAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,MACzB;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,sBAAA,IAA0B,SAAA,CAAU,MAAA;AAEpC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,MAAA,EAAS,WAAW,CAAA,CAAA,EAAI,YAAY,eAAe,SAAA,CAAU,MAAM,CAAA,2CAAA,EAA8C,IAAA,CAAK,IAAI,CAAA;AAAA,OAC5H;AAAA,IACF;AAEA,IAAA,OAAO,EAAE,sBAAA,EAAuB;AAAA,EAClC,CAAA;AAGA,EAAA,MAAM,gBAAA,GAAmB,OACvB,kBAAA,KACG;AACH,IAAA,MAAM,SAAA,GAAY,MAAM,kBAAA,CAAmB,QAAA,EAAS;AAEpD,IAAA,IAAI,SAAA,CAAU,WAAW,CAAA,EAAG;AAC1B,MAAA,MAAA,CAAO,KAAK,4CAA4C,CAAA;AACxD,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,qBAAA,EAAwB,aAAa,GAAA,CAAI,CAAA,IAAA,KAAQ,KAAK,IAAI,CAAA,CAAE,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACxE;AAGA,IAAA,MAAM,aAAA,GAAgB,cAClB,SAAA,CAAU,MAAA;AAAA,MAAO,UACf,WAAA,EAAa,IAAA;AAAA,QACX,kBACE,YAAA,CAAa,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY;AAC/D,KACF,GACA,SAAA;AAEJ,IAAA,IAAI,aAAA,CAAc,WAAW,CAAA,EAAG;AAC9B,MAAA,MAAA,CAAO,KAAK,wDAAwD,CAAA;AACpE,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA,CAAK,CAAA,UAAA,EAAa,aAAA,CAAc,MAAM,CAAA,wBAAA,CAA0B,CAAA;AAGvE,IAAA,KAAA,MAAW,QAAQ,aAAA,EAAe;AAChC,MAAA,MAAA,CAAO,KAAK,CAAA,8BAAA,EAAiC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,IAAA,CAAK,EAAE,CAAA,CAAA,CAAG,CAAA;AAGrE,MAAA,MAAM,KAAA,GAAQ,MAAM,kBAAA,CAAmB,YAAA,CAAa,KAAK,EAAG,CAAA;AAE5D,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,sDAAA,EAAyD,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SAChF;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAM,EAAE,sBAAA,EAAuB,GAAI,MAAM,qBAAA,CAAsB;AAAA,QAC7D,IAAA;AAAA,QACA,KAAA;AAAA,QACA;AAAA,OACD,CAAA;AAED,MAAA,IAAI,2BAA2B,CAAA,EAAG;AAChC,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,6EAAA,EAAgF,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SACvG;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,0BAAA,EAA6B,sBAAsB,CAAA,wEAAA,EAA2E,IAAA,CAAK,IAAI,CAAA;AAAA,OACzI;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,OAAO,EAAE,gBAAA,EAAiB;AAC5B;;;;"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sweetoburrito/backstage-plugin-ai-assistant-backend-module-ingestor-azure-devops",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.2",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"description": "The ingestor-azure-devops backend module for the ai-assistant plugin.",
|
|
6
6
|
"main": "dist/index.cjs.js",
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
"dependencies": {
|
|
31
31
|
"@backstage/backend-plugin-api": "^1.4.1",
|
|
32
32
|
"@sweetoburrito/backstage-plugin-ai-assistant-common": "^0.5.0",
|
|
33
|
-
"@sweetoburrito/backstage-plugin-ai-assistant-node": "^0.5.
|
|
33
|
+
"@sweetoburrito/backstage-plugin-ai-assistant-node": "^0.5.1",
|
|
34
34
|
"azure-devops-node-api": "^15.1.1"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|