@sweetoburrito/backstage-plugin-ai-assistant-backend-module-ingestor-azure-devops 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.d.ts +5 -0
- package/dist/constants/default-repo-file-batch-size.cjs.js +6 -0
- package/dist/constants/default-repo-file-batch-size.cjs.js.map +1 -0
- package/dist/services/ingestor/repository.cjs.js +67 -26
- package/dist/services/ingestor/repository.cjs.js.map +1 -1
- package/package.json +1 -1
package/config.d.ts
CHANGED
|
@@ -41,6 +41,11 @@ export interface Config {
|
|
|
41
41
|
*/
|
|
42
42
|
fileTypes?: string[];
|
|
43
43
|
}[];
|
|
44
|
+
/**
|
|
45
|
+
* Optional batch size for processing repository files. Defaults to 50 files per batch.
|
|
46
|
+
* Lower values use less memory but may be slower, higher values are faster but use more memory.
|
|
47
|
+
*/
|
|
48
|
+
filesBatchSize?: number;
|
|
44
49
|
|
|
45
50
|
/**
|
|
46
51
|
* Optional list of wikis to ingest. If not specified, all wikis in the project will be ingested.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"default-repo-file-batch-size.cjs.js","sources":["../../src/constants/default-repo-file-batch-size.ts"],"sourcesContent":["export const DEFAULT_REPO_FILE_BATCH_SIZE = 50;\n"],"names":[],"mappings":";;AAAO,MAAM,4BAAA,GAA+B;;;;"}
|
|
@@ -4,6 +4,7 @@ var defaultFileTypes = require('../../constants/default-file-types.cjs.js');
|
|
|
4
4
|
var backstagePluginAiAssistantNode = require('@sweetoburrito/backstage-plugin-ai-assistant-node');
|
|
5
5
|
var module$1 = require('../../constants/module.cjs.js');
|
|
6
6
|
var backstagePluginAiAssistantCommon = require('@sweetoburrito/backstage-plugin-ai-assistant-common');
|
|
7
|
+
var defaultRepoFileBatchSize = require('../../constants/default-repo-file-batch-size.cjs.js');
|
|
7
8
|
|
|
8
9
|
const createRepositoryIngestor = async ({
|
|
9
10
|
config,
|
|
@@ -14,6 +15,62 @@ const createRepositoryIngestor = async ({
|
|
|
14
15
|
const fileTypes = config.getOptionalStringArray(
|
|
15
16
|
"aiAssistant.ingestors.azureDevOps.fileTypes"
|
|
16
17
|
) ?? defaultFileTypes.DEFAULT_FILE_TYPES;
|
|
18
|
+
const itemsBatchSize = config.getOptionalNumber(
|
|
19
|
+
"aiAssistant.ingestors.azureDevOps.filesBatchSize"
|
|
20
|
+
// Reuse the same config for consistency
|
|
21
|
+
) ?? defaultRepoFileBatchSize.DEFAULT_REPO_FILE_BATCH_SIZE;
|
|
22
|
+
const ingestRepoByFileBatch = async ({
|
|
23
|
+
repository,
|
|
24
|
+
items,
|
|
25
|
+
saveDocumentsBatch
|
|
26
|
+
}) => {
|
|
27
|
+
logger.info(
|
|
28
|
+
`Processing ${items.length} items from repository "${repository.name}" in batches of ${itemsBatchSize}`
|
|
29
|
+
);
|
|
30
|
+
logger.debug(`Items: ${JSON.stringify(items, null, 2)}`);
|
|
31
|
+
let totalDocumentsIngested = 0;
|
|
32
|
+
const totalBatches = Math.ceil(items.length / itemsBatchSize);
|
|
33
|
+
for (let batchStart = 0; batchStart < items.length; batchStart += itemsBatchSize) {
|
|
34
|
+
const batchEnd = Math.min(batchStart + itemsBatchSize, items.length);
|
|
35
|
+
const itemsBatch = items.slice(batchStart, batchEnd);
|
|
36
|
+
const batchNumber = Math.floor(batchStart / itemsBatchSize) + 1;
|
|
37
|
+
logger.info(
|
|
38
|
+
`Processing batch ${batchNumber}/${totalBatches} (${itemsBatch.length} items) for repository "${repository.name}"`
|
|
39
|
+
);
|
|
40
|
+
const documents = [];
|
|
41
|
+
for (let index = 0; index < itemsBatch.length; index++) {
|
|
42
|
+
const item = itemsBatch[index];
|
|
43
|
+
const globalIndex = batchStart + index;
|
|
44
|
+
const content = await azureDevOpsService.getRepoItemContent(
|
|
45
|
+
repository.id,
|
|
46
|
+
item.path
|
|
47
|
+
);
|
|
48
|
+
const completionStats = backstagePluginAiAssistantCommon.getProgressStats(globalIndex + 1, items.length);
|
|
49
|
+
logger.info(
|
|
50
|
+
`Retrieved content for Azure DevOps item: ${item.path} in repository: "${repository.name}" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of repository]`
|
|
51
|
+
);
|
|
52
|
+
const text = await backstagePluginAiAssistantNode.streamToString(content);
|
|
53
|
+
const document = {
|
|
54
|
+
metadata: {
|
|
55
|
+
source: module$1.MODULE_ID,
|
|
56
|
+
id: `${repository.id}:${item.path}`,
|
|
57
|
+
url: item.url,
|
|
58
|
+
organization: azureDevOpsService.organization,
|
|
59
|
+
project: azureDevOpsService.project,
|
|
60
|
+
repository: repository.name
|
|
61
|
+
},
|
|
62
|
+
content: text
|
|
63
|
+
};
|
|
64
|
+
documents.push(document);
|
|
65
|
+
}
|
|
66
|
+
await saveDocumentsBatch(documents);
|
|
67
|
+
totalDocumentsIngested += documents.length;
|
|
68
|
+
logger.info(
|
|
69
|
+
`Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for Azure DevOps repository: ${repository.name}`
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
return { totalDocumentsIngested };
|
|
73
|
+
};
|
|
17
74
|
const ingestRepositoriesBatch = async (saveDocumentsBatch) => {
|
|
18
75
|
const repositoriesList = await azureDevOpsService.getRepos();
|
|
19
76
|
if (repositoriesList.length === 0) {
|
|
@@ -59,35 +116,19 @@ const createRepositoryIngestor = async ({
|
|
|
59
116
|
);
|
|
60
117
|
continue;
|
|
61
118
|
}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
const completionStats = backstagePluginAiAssistantCommon.getProgressStats(index + 1, items.length);
|
|
71
|
-
logger.info(
|
|
72
|
-
`Retrieved content for Azure DevOps item: ${item.path} in repository: "${repo.name}" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of repository]`
|
|
119
|
+
const { totalDocumentsIngested } = await ingestRepoByFileBatch({
|
|
120
|
+
repository: repo,
|
|
121
|
+
items,
|
|
122
|
+
saveDocumentsBatch
|
|
123
|
+
});
|
|
124
|
+
if (totalDocumentsIngested === 0) {
|
|
125
|
+
logger.warn(
|
|
126
|
+
`No documents were ingested and sent for embedding from the Azure DevOps repository ${repo.name} (${repo.id})`
|
|
73
127
|
);
|
|
74
|
-
|
|
75
|
-
const document = {
|
|
76
|
-
metadata: {
|
|
77
|
-
source: module$1.MODULE_ID,
|
|
78
|
-
id: `${repo.id}:${item.path}`,
|
|
79
|
-
url: item.url,
|
|
80
|
-
organization: azureDevOpsService.organization,
|
|
81
|
-
project: azureDevOpsService.project,
|
|
82
|
-
repository: repo.name
|
|
83
|
-
},
|
|
84
|
-
content: text
|
|
85
|
-
};
|
|
86
|
-
documents.push(document);
|
|
128
|
+
continue;
|
|
87
129
|
}
|
|
88
|
-
await saveDocumentsBatch(documents);
|
|
89
130
|
logger.info(
|
|
90
|
-
|
|
131
|
+
`Repository ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for Azure DevOps repository: ${repo.name}`
|
|
91
132
|
);
|
|
92
133
|
}
|
|
93
134
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"repository.cjs.js","sources":["../../../src/services/ingestor/repository.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport { DEFAULT_FILE_TYPES } from '../../constants/default-file-types';\nimport {\n EmbeddingDocument,\n IngestorOptions,\n streamToString,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { AzureDevOpsService } from '../azure-devops';\nimport { Config } from '../../../config';\nimport { MODULE_ID } from '../../constants/module';\nimport { getProgressStats } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\n\ntype RepositoryIngestorOptions = {\n config: RootConfigService;\n logger: LoggerService;\n azureDevOpsService: AzureDevOpsService;\n};\n\nexport const createRepositoryIngestor = async ({\n config,\n logger,\n azureDevOpsService,\n}: RepositoryIngestorOptions) => {\n // Get configuration values\n const repositoriesFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['azureDevOps']['repositories']\n >('aiAssistant.ingestors.azureDevOps.repositories');\n\n // Default to common file types if none are specified\n const fileTypes =\n config.getOptionalStringArray(\n 'aiAssistant.ingestors.azureDevOps.fileTypes',\n ) ?? DEFAULT_FILE_TYPES;\n\n /** Ingest Azure DevOps repositories in batches */\n const ingestRepositoriesBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const repositoriesList = await azureDevOpsService.getRepos();\n\n if (repositoriesList.length === 0) {\n logger.warn('No repositories found in the Azure DevOps project');\n return;\n }\n\n logger.info(\n `Filtering for repositories: ${repositoriesFilter\n ?.map(repo => repo.name)\n .join(', ')}`,\n );\n\n // Filter repositories if a filter is provided in the config\n const repositoriesToIngest = repositoriesFilter\n ? repositoriesList.filter(repo =>\n repositoriesFilter?.some(\n filteredRepo =>\n filteredRepo.name.toLowerCase() === repo.name!.toLowerCase(),\n ),\n )\n : repositoriesList;\n\n if (repositoriesToIngest.length === 0) {\n logger.warn(\n 'No repositories found for ingestion after applying the filter',\n );\n return;\n }\n\n logger.info(\n `Ingesting ${repositoriesToIngest.length} repositories from Azure DevOps`,\n );\n\n // Get items from each repository and create documents to be embedded\n for (const repo of repositoriesToIngest) {\n logger.info(\n `Beginning ingestion for repository: ${repo.name} (${repo.id})`,\n );\n\n // Determine the file types to use for this repository or use default\n const repositoryFileTypesFilter =\n repositoriesFilter?.find(\n r => r.name.toLowerCase() === repo.name!.toLowerCase(),\n )?.fileTypes ?? fileTypes;\n\n logger.info(\n `Processing file types for repository ${\n repo.name\n }: [${repositoryFileTypesFilter.join(', ')}]`,\n );\n\n // Get the items to be ingested from the repository based on the file types filter\n const items = await azureDevOpsService.getRepoItems(\n repo.id!,\n repositoryFileTypesFilter,\n );\n\n if (items.length === 0) {\n logger.warn(\n `No items found for ingestion in the Azure DevOps repository ${\n repo.name\n } (${\n repo.id\n }) with the specified file types filter: [${repositoryFileTypesFilter.join(\n ', ',\n )}]`,\n );\n continue;\n }\n\n logger.debug(`Items: ${JSON.stringify(items, null, 2)}`);\n\n // Generate embedding documents for each item\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < items.length; index++) {\n const item = items[index];\n\n const content = await azureDevOpsService.getRepoItemContent(\n repo.id!,\n item.path!,\n );\n\n const completionStats = getProgressStats(index + 1, items.length);\n\n logger.info(\n `Retrieved content for Azure DevOps item: ${item.path} in repository: \"${repo.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of repository]`,\n );\n\n const text = await streamToString(content);\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${repo.id}:${item.path}`,\n url: item.url,\n organization: azureDevOpsService.organization,\n project: azureDevOpsService.project,\n repository: repo.name!,\n },\n content: text,\n };\n\n documents.push(document);\n }\n\n // Save the documents in batches\n await saveDocumentsBatch(documents);\n\n logger.info(\n `${documents.length} documents ingested and sent for embedding for Azure DevOps repository: ${repo.name}`,\n );\n }\n };\n\n return { ingestRepositoriesBatch };\n};\n"],"names":["DEFAULT_FILE_TYPES","getProgressStats","streamToString","MODULE_ID"],"mappings":";;;;;;;AAqBO,MAAM,2BAA2B,OAAO;AAAA,EAC7C,MAAA;AAAA,EACA,MAAA;AAAA,EACA;AACF,CAAA,KAAiC;AAE/B,EAAA,MAAM,kBAAA,GAAqB,MAAA,CAAO,WAAA,CAEhC,gDAAgD,CAAA;AAGlD,EAAA,MAAM,YACJ,MAAA,CAAO,sBAAA;AAAA,IACL;AAAA,GACF,IAAKA,mCAAA;AAGP,EAAA,MAAM,uBAAA,GAA0B,OAC9B,kBAAA,KACG;AACH,IAAA,MAAM,gBAAA,GAAmB,MAAM,kBAAA,CAAmB,QAAA,EAAS;AAE3D,IAAA,IAAI,gBAAA,CAAiB,WAAW,CAAA,EAAG;AACjC,MAAA,MAAA,CAAO,KAAK,mDAAmD,CAAA;AAC/D,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,4BAAA,EAA+B,oBAC3B,GAAA,CAAI,CAAA,IAAA,KAAQ,KAAK,IAAI,CAAA,CACtB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAGA,IAAA,MAAM,oBAAA,GAAuB,qBACzB,gBAAA,CAAiB,MAAA;AAAA,MAAO,UACtB,kBAAA,EAAoB,IAAA;AAAA,QAClB,kBACE,YAAA,CAAa,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY;AAC/D,KACF,GACA,gBAAA;AAEJ,IAAA,IAAI,oBAAA,CAAqB,WAAW,CAAA,EAAG;AACrC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL;AAAA,OACF;AACA,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,UAAA,EAAa,qBAAqB,MAAM,CAAA,+BAAA;AAAA,KAC1C;AAGA,IAAA,KAAA,MAAW,QAAQ,oBAAA,EAAsB;AACvC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,oCAAA,EAAuC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,OAC9D;AAGA,MAAA,MAAM,4BACJ,kBAAA,EAAoB,IAAA;AAAA,QAClB,OAAK,CAAA,CAAE,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY,SACpD,SAAA,IAAa,SAAA;AAElB,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,wCACE,IAAA,CAAK,IACP,MAAM,yBAAA,CAA0B,IAAA,CAAK,IAAI,CAAC,CAAA,CAAA;AAAA,OAC5C;AAGA,MAAA,MAAM,KAAA,GAAQ,MAAM,kBAAA,CAAmB,YAAA;AAAA,QACrC,IAAA,CAAK,EAAA;AAAA,QACL;AAAA,OACF;AAEA,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,+DACE,IAAA,CAAK,IACP,KACE,IAAA,CAAK,EACP,4CAA4C,yBAAA,CAA0B,IAAA;AAAA,YACpE;AAAA,WACD,CAAA,CAAA;AAAA,SACH;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,KAAA,CAAM,UAAU,IAAA,CAAK,SAAA,CAAU,OAAO,IAAA,EAAM,CAAC,CAAC,CAAA,CAAE,CAAA;AAGvD,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,KAAA,CAAM,QAAQ,KAAA,EAAA,EAAS;AACjD,QAAA,MAAM,IAAA,GAAO,MAAM,KAAK,CAAA;AAExB,QAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,CAAmB,kBAAA;AAAA,UACvC,IAAA,CAAK,EAAA;AAAA,UACL,IAAA,CAAK;AAAA,SACP;AAEA,QAAA,MAAM,eAAA,GAAkBC,iDAAA,CAAiB,KAAA,GAAQ,CAAA,EAAG,MAAM,MAAM,CAAA;AAEhE,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,yCAAA,EAA4C,IAAA,CAAK,IAAI,CAAA,iBAAA,EAAoB,KAAK,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,2BAAA;AAAA,SACrL;AAEA,QAAA,MAAM,IAAA,GAAO,MAAMC,6CAAA,CAAe,OAAO,CAAA;AAEzC,QAAA,MAAM,QAAA,GAA8B;AAAA,UAClC,QAAA,EAAU;AAAA,YACR,MAAA,EAAQC,kBAAA;AAAA,YACR,IAAI,CAAA,EAAG,IAAA,CAAK,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,YAC3B,KAAK,IAAA,CAAK,GAAA;AAAA,YACV,cAAc,kBAAA,CAAmB,YAAA;AAAA,YACjC,SAAS,kBAAA,CAAmB,OAAA;AAAA,YAC5B,YAAY,IAAA,CAAK;AAAA,WACnB;AAAA,UACA,OAAA,EAAS;AAAA,SACX;AAEA,QAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,MACzB;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,EAAG,SAAA,CAAU,MAAM,CAAA,wEAAA,EAA2E,KAAK,IAAI,CAAA;AAAA,OACzG;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,OAAO,EAAE,uBAAA,EAAwB;AACnC;;;;"}
|
|
1
|
+
{"version":3,"file":"repository.cjs.js","sources":["../../../src/services/ingestor/repository.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport { DEFAULT_FILE_TYPES } from '../../constants/default-file-types';\nimport {\n EmbeddingDocument,\n IngestorOptions,\n streamToString,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { AzureDevOpsService } from '../azure-devops';\nimport { Config } from '../../../config';\nimport { MODULE_ID } from '../../constants/module';\nimport { getProgressStats } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport { DEFAULT_REPO_FILE_BATCH_SIZE } from '../../constants/default-repo-file-batch-size';\nimport {\n GitItem,\n GitRepository,\n} from 'azure-devops-node-api/interfaces/GitInterfaces';\n\ntype RepositoryIngestorOptions = {\n config: RootConfigService;\n logger: LoggerService;\n azureDevOpsService: AzureDevOpsService;\n};\n\nexport const createRepositoryIngestor = async ({\n config,\n logger,\n azureDevOpsService,\n}: RepositoryIngestorOptions) => {\n // Get configuration values\n const repositoriesFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['azureDevOps']['repositories']\n >('aiAssistant.ingestors.azureDevOps.repositories');\n\n // Default to common file types if none are specified\n const fileTypes =\n config.getOptionalStringArray(\n 'aiAssistant.ingestors.azureDevOps.fileTypes',\n ) ?? DEFAULT_FILE_TYPES;\n\n // Get batch size for processing repository items (default to 50 items per batch)\n const itemsBatchSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestors.azureDevOps.filesBatchSize', // Reuse the same config for consistency\n ) ?? DEFAULT_REPO_FILE_BATCH_SIZE;\n\n /**\n * Ingest Azure DevOps repository items in batches\n * @param repository - The repository to ingest items from\n * @param items - The list of items to ingest from the repository\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns Total number of documents ingested and sent for embedding from the repository\n */\n const ingestRepoByFileBatch = async ({\n repository,\n items,\n saveDocumentsBatch,\n }: {\n repository: GitRepository;\n items: GitItem[];\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'];\n }) => {\n logger.info(\n `Processing ${items.length} items from repository \"${repository.name}\" in batches of ${itemsBatchSize}`,\n );\n\n logger.debug(`Items: ${JSON.stringify(items, null, 2)}`);\n\n let totalDocumentsIngested = 0;\n\n // Process items in batches to manage memory and performance\n const totalBatches = Math.ceil(items.length / itemsBatchSize);\n\n for (\n let batchStart = 0;\n batchStart < items.length;\n batchStart += itemsBatchSize\n ) {\n const batchEnd = Math.min(batchStart + itemsBatchSize, items.length);\n const itemsBatch = items.slice(batchStart, batchEnd);\n const batchNumber = Math.floor(batchStart / itemsBatchSize) + 1;\n\n logger.info(\n `Processing batch ${batchNumber}/${totalBatches} (${itemsBatch.length} items) for repository \"${repository.name}\"`,\n );\n\n // Generate embedding documents for each item in the current batch\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < itemsBatch.length; index++) {\n const item = itemsBatch[index];\n const globalIndex = batchStart + index;\n\n const content = await azureDevOpsService.getRepoItemContent(\n repository.id!,\n item.path!,\n );\n\n const completionStats = getProgressStats(globalIndex + 1, items.length);\n\n logger.info(\n `Retrieved content for Azure DevOps item: ${item.path} in repository: \"${repository.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of repository]`,\n );\n\n const text = await streamToString(content);\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${repository.id}:${item.path}`,\n url: item.url,\n organization: azureDevOpsService.organization,\n project: azureDevOpsService.project,\n repository: repository.name!,\n },\n content: text,\n };\n\n documents.push(document);\n }\n\n // Save the current batch of documents\n await saveDocumentsBatch(documents);\n\n totalDocumentsIngested += documents.length;\n\n logger.info(\n `Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for Azure DevOps repository: ${repository.name}`,\n );\n }\n\n return { totalDocumentsIngested };\n };\n\n /** Ingest Azure DevOps repositories in batches\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns void\n */\n const ingestRepositoriesBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const repositoriesList = await azureDevOpsService.getRepos();\n\n if (repositoriesList.length === 0) {\n logger.warn('No repositories found in the Azure DevOps project');\n return;\n }\n\n logger.info(\n `Filtering for repositories: ${repositoriesFilter\n ?.map(repo => repo.name)\n .join(', ')}`,\n );\n\n // Filter repositories if a filter is provided in the config\n const repositoriesToIngest = repositoriesFilter\n ? repositoriesList.filter(repo =>\n repositoriesFilter?.some(\n filteredRepo =>\n filteredRepo.name.toLowerCase() === repo.name!.toLowerCase(),\n ),\n )\n : repositoriesList;\n\n if (repositoriesToIngest.length === 0) {\n logger.warn(\n 'No repositories found for ingestion after applying the filter',\n );\n return;\n }\n\n logger.info(\n `Ingesting ${repositoriesToIngest.length} repositories from Azure DevOps`,\n );\n\n // Get items from each repository and create documents to be embedded\n for (const repo of repositoriesToIngest) {\n logger.info(\n `Beginning ingestion for repository: ${repo.name} (${repo.id})`,\n );\n\n // Determine the file types to use for this repository or use default\n const repositoryFileTypesFilter =\n repositoriesFilter?.find(\n r => r.name.toLowerCase() === repo.name!.toLowerCase(),\n )?.fileTypes ?? fileTypes;\n\n logger.info(\n `Processing file types for repository ${\n repo.name\n }: [${repositoryFileTypesFilter.join(', ')}]`,\n );\n\n // Get the items to be ingested from the repository based on the file types filter\n const items = await azureDevOpsService.getRepoItems(\n repo.id!,\n repositoryFileTypesFilter,\n );\n\n if (items.length === 0) {\n logger.warn(\n `No items found for ingestion in the Azure DevOps repository ${\n repo.name\n } (${\n repo.id\n }) with the specified file types filter: [${repositoryFileTypesFilter.join(\n ', ',\n )}]`,\n );\n continue;\n }\n\n const { totalDocumentsIngested } = await ingestRepoByFileBatch({\n repository: repo,\n items,\n saveDocumentsBatch,\n });\n\n if (totalDocumentsIngested === 0) {\n logger.warn(\n `No documents were ingested and sent for embedding from the Azure DevOps repository ${repo.name} (${repo.id})`,\n );\n continue;\n }\n\n logger.info(\n `Repository ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for Azure DevOps repository: ${repo.name}`,\n );\n }\n };\n\n return { ingestRepositoriesBatch };\n};\n"],"names":["DEFAULT_FILE_TYPES","DEFAULT_REPO_FILE_BATCH_SIZE","getProgressStats","streamToString","MODULE_ID"],"mappings":";;;;;;;;AA0BO,MAAM,2BAA2B,OAAO;AAAA,EAC7C,MAAA;AAAA,EACA,MAAA;AAAA,EACA;AACF,CAAA,KAAiC;AAE/B,EAAA,MAAM,kBAAA,GAAqB,MAAA,CAAO,WAAA,CAEhC,gDAAgD,CAAA;AAGlD,EAAA,MAAM,YACJ,MAAA,CAAO,sBAAA;AAAA,IACL;AAAA,GACF,IAAKA,mCAAA;AAGP,EAAA,MAAM,iBACJ,MAAA,CAAO,iBAAA;AAAA,IACL;AAAA;AAAA,GACF,IAAKC,qDAAA;AASP,EAAA,MAAM,wBAAwB,OAAO;AAAA,IACnC,UAAA;AAAA,IACA,KAAA;AAAA,IACA;AAAA,GACF,KAIM;AACJ,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,cAAc,KAAA,CAAM,MAAM,2BAA2B,UAAA,CAAW,IAAI,mBAAmB,cAAc,CAAA;AAAA,KACvG;AAEA,IAAA,MAAA,CAAO,KAAA,CAAM,UAAU,IAAA,CAAK,SAAA,CAAU,OAAO,IAAA,EAAM,CAAC,CAAC,CAAA,CAAE,CAAA;AAEvD,IAAA,IAAI,sBAAA,GAAyB,CAAA;AAG7B,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,SAAS,cAAc,CAAA;AAE5D,IAAA,KAAA,IACM,aAAa,CAAA,EACjB,UAAA,GAAa,KAAA,CAAM,MAAA,EACnB,cAAc,cAAA,EACd;AACA,MAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,UAAA,GAAa,cAAA,EAAgB,MAAM,MAAM,CAAA;AACnE,MAAA,MAAM,UAAA,GAAa,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,QAAQ,CAAA;AACnD,MAAA,MAAM,WAAA,GAAc,IAAA,CAAK,KAAA,CAAM,UAAA,GAAa,cAAc,CAAA,GAAI,CAAA;AAE9D,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iBAAA,EAAoB,WAAW,CAAA,CAAA,EAAI,YAAY,KAAK,UAAA,CAAW,MAAM,CAAA,wBAAA,EAA2B,UAAA,CAAW,IAAI,CAAA,CAAA;AAAA,OACjH;AAGA,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,UAAA,CAAW,QAAQ,KAAA,EAAA,EAAS;AACtD,QAAA,MAAM,IAAA,GAAO,WAAW,KAAK,CAAA;AAC7B,QAAA,MAAM,cAAc,UAAA,GAAa,KAAA;AAEjC,QAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,CAAmB,kBAAA;AAAA,UACvC,UAAA,CAAW,EAAA;AAAA,UACX,IAAA,CAAK;AAAA,SACP;AAEA,QAAA,MAAM,eAAA,GAAkBC,iDAAA,CAAiB,WAAA,GAAc,CAAA,EAAG,MAAM,MAAM,CAAA;AAEtE,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,yCAAA,EAA4C,IAAA,CAAK,IAAI,CAAA,iBAAA,EAAoB,WAAW,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,2BAAA;AAAA,SAC3L;AAEA,QAAA,MAAM,IAAA,GAAO,MAAMC,6CAAA,CAAe,OAAO,CAAA;AAEzC,QAAA,MAAM,QAAA,GAA8B;AAAA,UAClC,QAAA,EAAU;AAAA,YACR,MAAA,EAAQC,kBAAA;AAAA,YACR,IAAI,CAAA,EAAG,UAAA,CAAW,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,YACjC,KAAK,IAAA,CAAK,GAAA;AAAA,YACV,cAAc,kBAAA,CAAmB,YAAA;AAAA,YACjC,SAAS,kBAAA,CAAmB,OAAA;AAAA,YAC5B,YAAY,UAAA,CAAW;AAAA,WACzB;AAAA,UACA,OAAA,EAAS;AAAA,SACX;AAEA,QAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,MACzB;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,sBAAA,IAA0B,SAAA,CAAU,MAAA;AAEpC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,MAAA,EAAS,WAAW,CAAA,CAAA,EAAI,YAAY,eAAe,SAAA,CAAU,MAAM,CAAA,iDAAA,EAAoD,UAAA,CAAW,IAAI,CAAA;AAAA,OACxI;AAAA,IACF;AAEA,IAAA,OAAO,EAAE,sBAAA,EAAuB;AAAA,EAClC,CAAA;AAMA,EAAA,MAAM,uBAAA,GAA0B,OAC9B,kBAAA,KACG;AACH,IAAA,MAAM,gBAAA,GAAmB,MAAM,kBAAA,CAAmB,QAAA,EAAS;AAE3D,IAAA,IAAI,gBAAA,CAAiB,WAAW,CAAA,EAAG;AACjC,MAAA,MAAA,CAAO,KAAK,mDAAmD,CAAA;AAC/D,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,4BAAA,EAA+B,oBAC3B,GAAA,CAAI,CAAA,IAAA,KAAQ,KAAK,IAAI,CAAA,CACtB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAGA,IAAA,MAAM,oBAAA,GAAuB,qBACzB,gBAAA,CAAiB,MAAA;AAAA,MAAO,UACtB,kBAAA,EAAoB,IAAA;AAAA,QAClB,kBACE,YAAA,CAAa,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY;AAC/D,KACF,GACA,gBAAA;AAEJ,IAAA,IAAI,oBAAA,CAAqB,WAAW,CAAA,EAAG;AACrC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL;AAAA,OACF;AACA,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,UAAA,EAAa,qBAAqB,MAAM,CAAA,+BAAA;AAAA,KAC1C;AAGA,IAAA,KAAA,MAAW,QAAQ,oBAAA,EAAsB;AACvC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,oCAAA,EAAuC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,OAC9D;AAGA,MAAA,MAAM,4BACJ,kBAAA,EAAoB,IAAA;AAAA,QAClB,OAAK,CAAA,CAAE,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY,SACpD,SAAA,IAAa,SAAA;AAElB,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,wCACE,IAAA,CAAK,IACP,MAAM,yBAAA,CAA0B,IAAA,CAAK,IAAI,CAAC,CAAA,CAAA;AAAA,OAC5C;AAGA,MAAA,MAAM,KAAA,GAAQ,MAAM,kBAAA,CAAmB,YAAA;AAAA,QACrC,IAAA,CAAK,EAAA;AAAA,QACL;AAAA,OACF;AAEA,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,+DACE,IAAA,CAAK,IACP,KACE,IAAA,CAAK,EACP,4CAA4C,yBAAA,CAA0B,IAAA;AAAA,YACpE;AAAA,WACD,CAAA,CAAA;AAAA,SACH;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAM,EAAE,sBAAA,EAAuB,GAAI,MAAM,qBAAA,CAAsB;AAAA,QAC7D,UAAA,EAAY,IAAA;AAAA,QACZ,KAAA;AAAA,QACA;AAAA,OACD,CAAA;AAED,MAAA,IAAI,2BAA2B,CAAA,EAAG;AAChC,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,mFAAA,EAAsF,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SAC7G;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,gCAAA,EAAmC,sBAAsB,CAAA,8EAAA,EAAiF,IAAA,CAAK,IAAI,CAAA;AAAA,OACrJ;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,OAAO,EAAE,uBAAA,EAAwB;AACnC;;;;"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sweetoburrito/backstage-plugin-ai-assistant-backend-module-ingestor-azure-devops",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"description": "The ingestor-azure-devops backend module for the ai-assistant plugin.",
|
|
6
6
|
"main": "dist/index.cjs.js",
|