@sweetoburrito/backstage-plugin-ai-assistant-backend-module-ingestor-azure-devops 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/config.d.ts
CHANGED
|
@@ -29,23 +29,55 @@ export interface Config {
|
|
|
29
29
|
*/
|
|
30
30
|
fileTypes?: string[];
|
|
31
31
|
/**
|
|
32
|
-
* Optional
|
|
32
|
+
* Optional configuration for filtering repositories to ingest.
|
|
33
|
+
* If not specified, all repositories in the project will be ingested.
|
|
34
|
+
* Supports both exact name matching and regex patterns for flexible filtering.
|
|
33
35
|
*/
|
|
34
36
|
repositories?: {
|
|
35
37
|
/**
|
|
36
|
-
*
|
|
38
|
+
* List of repositories to include for ingestion.
|
|
39
|
+
* If specified, only repositories matching these criteria will be ingested (unless excluded).
|
|
37
40
|
*/
|
|
38
|
-
|
|
41
|
+
include?: {
|
|
42
|
+
/**
|
|
43
|
+
* Repository name or regular expression pattern.
|
|
44
|
+
* All values are treated as regex patterns for matching:
|
|
45
|
+
* - Plain strings (e.g., 'my-repo') match exactly that string (case-sensitive)
|
|
46
|
+
* - Regex patterns (e.g., '^backend-.*', '.*-service$') match using regex rules
|
|
47
|
+
* Examples:
|
|
48
|
+
* - 'backend-api' matches only 'backend-api'
|
|
49
|
+
* - '^backend-.*' matches 'backend-api', 'backend-service', etc.
|
|
50
|
+
* - '(?i)my-repo' for case-insensitive matching
|
|
51
|
+
*/
|
|
52
|
+
name: string;
|
|
53
|
+
/**
|
|
54
|
+
* Optional list of file types to ingest for this repository. Overrides the global fileTypes setting for this repository only.
|
|
55
|
+
*/
|
|
56
|
+
fileTypes?: string[];
|
|
57
|
+
/**
|
|
58
|
+
* Optional list of glob patterns to exclude files and directories from ingestion for this repository.
|
|
59
|
+
* Overrides the global pathExclusions setting for this repository only.
|
|
60
|
+
*/
|
|
61
|
+
pathExclusions?: string[];
|
|
62
|
+
}[];
|
|
39
63
|
/**
|
|
40
|
-
*
|
|
64
|
+
* List of repositories to exclude from ingestion.
|
|
65
|
+
* Exclusions are applied after inclusions.
|
|
41
66
|
*/
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
67
|
+
exclude?: {
|
|
68
|
+
/**
|
|
69
|
+
* Repository name or regular expression pattern to exclude.
|
|
70
|
+
* All values are treated as regex patterns for matching:
|
|
71
|
+
* - Plain strings (e.g., 'test-repo') match exactly that string (case-sensitive)
|
|
72
|
+
* - Regex patterns (e.g., '^test-.*', '.*-archived$') match using regex rules
|
|
73
|
+
* Examples:
|
|
74
|
+
* - 'temp-repo' matches only 'temp-repo'
|
|
75
|
+
* - '^test-.*' matches 'test-api', 'test-service', etc.
|
|
76
|
+
* - '(?i)archived' for case-insensitive matching
|
|
77
|
+
*/
|
|
78
|
+
name: string;
|
|
79
|
+
}[];
|
|
80
|
+
};
|
|
49
81
|
/**
|
|
50
82
|
* Optional batch size for processing repository files. Defaults to 50 files per batch.
|
|
51
83
|
* Lower values use less memory but may be slower, higher values are faster but use more memory.
|
|
@@ -59,14 +91,46 @@ export interface Config {
|
|
|
59
91
|
pathExclusions?: string[];
|
|
60
92
|
|
|
61
93
|
/**
|
|
62
|
-
* Optional
|
|
94
|
+
* Optional configuration for filtering wikis to ingest.
|
|
95
|
+
* If not specified, all wikis in the project will be ingested.
|
|
96
|
+
* Supports both exact name matching and regex patterns for flexible filtering.
|
|
63
97
|
*/
|
|
64
98
|
wikis?: {
|
|
65
99
|
/**
|
|
66
|
-
*
|
|
100
|
+
* List of wikis to include for ingestion.
|
|
101
|
+
* If specified, only wikis matching these criteria will be ingested (unless excluded).
|
|
102
|
+
*/
|
|
103
|
+
include?: {
|
|
104
|
+
/**
|
|
105
|
+
* Wiki name or regular expression pattern.
|
|
106
|
+
* All values are treated as regex patterns for matching:
|
|
107
|
+
* - Plain strings (e.g., 'my-wiki') match exactly that string (case-sensitive)
|
|
108
|
+
* - Regex patterns (e.g., '^prod-.*', '.*-docs$') match using regex rules
|
|
109
|
+
* Examples:
|
|
110
|
+
* - 'production-wiki' matches only 'production-wiki'
|
|
111
|
+
* - '^prod-.*' matches 'prod-wiki', 'prod-docs', etc.
|
|
112
|
+
* - '(?i)docs' for case-insensitive matching
|
|
113
|
+
*/
|
|
114
|
+
name: string;
|
|
115
|
+
}[];
|
|
116
|
+
/**
|
|
117
|
+
* List of wikis to exclude from ingestion.
|
|
118
|
+
* Exclusions are applied after inclusions.
|
|
67
119
|
*/
|
|
68
|
-
|
|
69
|
-
|
|
120
|
+
exclude?: {
|
|
121
|
+
/**
|
|
122
|
+
* Wiki name or regular expression pattern to exclude.
|
|
123
|
+
* All values are treated as regex patterns for matching:
|
|
124
|
+
* - Plain strings (e.g., 'draft-wiki') match exactly that string (case-sensitive)
|
|
125
|
+
* - Regex patterns (e.g., '^draft-.*', '.*-test$') match using regex rules
|
|
126
|
+
* Examples:
|
|
127
|
+
* - 'temp-wiki' matches only 'temp-wiki'
|
|
128
|
+
* - '^draft-.*' matches 'draft-docs', 'draft-notes', etc.
|
|
129
|
+
* - '(?i)test' for case-insensitive matching
|
|
130
|
+
*/
|
|
131
|
+
name: string;
|
|
132
|
+
}[];
|
|
133
|
+
};
|
|
70
134
|
|
|
71
135
|
/**
|
|
72
136
|
* Optional batch size for processing wiki pages. Defaults to 50 pages per batch.
|
|
@@ -13,6 +13,56 @@ const createRepositoryIngestor = async ({
|
|
|
13
13
|
azureDevOpsService
|
|
14
14
|
}) => {
|
|
15
15
|
const repositoriesFilter = config.getOptional("aiAssistant.ingestors.azureDevOps.repositories");
|
|
16
|
+
const includeMatchers = [];
|
|
17
|
+
const excludeMatchers = [];
|
|
18
|
+
if (repositoriesFilter?.include) {
|
|
19
|
+
for (const filter of repositoriesFilter.include) {
|
|
20
|
+
try {
|
|
21
|
+
const regex = new RegExp(filter.name);
|
|
22
|
+
includeMatchers.push({
|
|
23
|
+
value: filter.name,
|
|
24
|
+
regex,
|
|
25
|
+
fileTypes: filter.fileTypes,
|
|
26
|
+
pathExclusions: filter.pathExclusions
|
|
27
|
+
});
|
|
28
|
+
} catch (error) {
|
|
29
|
+
logger.error(
|
|
30
|
+
`Invalid regular expression in repository include '${filter.name}': ${error}`
|
|
31
|
+
);
|
|
32
|
+
throw new Error(
|
|
33
|
+
`Invalid repository include pattern '${filter.name}': ${error}`
|
|
34
|
+
);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
if (repositoriesFilter?.exclude) {
|
|
39
|
+
for (const filter of repositoriesFilter.exclude) {
|
|
40
|
+
try {
|
|
41
|
+
const regex = new RegExp(filter.name);
|
|
42
|
+
excludeMatchers.push({
|
|
43
|
+
value: filter.name,
|
|
44
|
+
regex
|
|
45
|
+
});
|
|
46
|
+
} catch (error) {
|
|
47
|
+
logger.error(
|
|
48
|
+
`Invalid regular expression in repository exclude '${filter.name}': ${error}`
|
|
49
|
+
);
|
|
50
|
+
throw new Error(
|
|
51
|
+
`Invalid repository exclude pattern '${filter.name}': ${error}`
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
if (includeMatchers.length > 0) {
|
|
57
|
+
logger.info(
|
|
58
|
+
`Repository include filters: ${includeMatchers.map((m) => `'${m.value}'`).join(", ")}`
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
if (excludeMatchers.length > 0) {
|
|
62
|
+
logger.info(
|
|
63
|
+
`Repository exclude filters: ${excludeMatchers.map((m) => `'${m.value}'`).join(", ")}`
|
|
64
|
+
);
|
|
65
|
+
}
|
|
16
66
|
const fileTypes = config.getOptionalStringArray(
|
|
17
67
|
"aiAssistant.ingestors.azureDevOps.fileTypes"
|
|
18
68
|
) ?? defaultFileTypes.DEFAULT_FILE_TYPES;
|
|
@@ -97,20 +147,42 @@ const createRepositoryIngestor = async ({
|
|
|
97
147
|
logger.warn("No repositories found in the Azure DevOps project");
|
|
98
148
|
return;
|
|
99
149
|
}
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
150
|
+
let repositoriesToIngest = repositoriesList;
|
|
151
|
+
if (includeMatchers.length > 0) {
|
|
152
|
+
logger.info(
|
|
153
|
+
`Include filter found. Only including repositories matching the following patterns for ingestion: ${includeMatchers.map((m) => `'${m.value}'`).join(", ")}`
|
|
154
|
+
);
|
|
155
|
+
repositoriesToIngest = repositoriesToIngest.filter((repo) => {
|
|
156
|
+
return includeMatchers.some((matcher) => matcher.regex.test(repo.name));
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
if (excludeMatchers.length > 0) {
|
|
160
|
+
logger.info(
|
|
161
|
+
`Exclude filter found. Excluding repositories matching the following patterns from ingestion: ${excludeMatchers.map((m) => `'${m.value}'`).join(", ")}`
|
|
162
|
+
);
|
|
163
|
+
const excludedRepos = repositoriesToIngest.filter((repo) => {
|
|
164
|
+
return excludeMatchers.some((matcher) => matcher.regex.test(repo.name));
|
|
165
|
+
});
|
|
166
|
+
if (excludedRepos.length > 0) {
|
|
167
|
+
logger.info(
|
|
168
|
+
`Excluding repositories: ${excludedRepos.map((r) => r.name).join(", ")}`
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
repositoriesToIngest = repositoriesToIngest.filter((repo) => {
|
|
172
|
+
return !excludeMatchers.some(
|
|
173
|
+
(matcher) => matcher.regex.test(repo.name)
|
|
174
|
+
);
|
|
175
|
+
});
|
|
176
|
+
}
|
|
108
177
|
if (repositoriesToIngest.length === 0) {
|
|
109
178
|
logger.warn(
|
|
110
179
|
"No repositories found for ingestion after applying the filter"
|
|
111
180
|
);
|
|
112
181
|
return;
|
|
113
182
|
}
|
|
183
|
+
logger.debug(
|
|
184
|
+
`Repositories to ingest: ${repositoriesToIngest.map((r) => r.name).join(", ")}`
|
|
185
|
+
);
|
|
114
186
|
logger.info(
|
|
115
187
|
`Ingesting ${repositoriesToIngest.length} repositories from Azure DevOps`
|
|
116
188
|
);
|
|
@@ -118,12 +190,11 @@ const createRepositoryIngestor = async ({
|
|
|
118
190
|
logger.info(
|
|
119
191
|
`Beginning ingestion for repository: ${repo.name} (${repo.id})`
|
|
120
192
|
);
|
|
121
|
-
const
|
|
122
|
-
(
|
|
123
|
-
)
|
|
124
|
-
const
|
|
125
|
-
|
|
126
|
-
)?.pathExclusions ?? globalPathExclusions;
|
|
193
|
+
const matchingMatcher = includeMatchers.find(
|
|
194
|
+
(matcher) => matcher.regex.test(repo.name)
|
|
195
|
+
);
|
|
196
|
+
const repositoryFileTypesFilter = matchingMatcher?.fileTypes ?? fileTypes;
|
|
197
|
+
const repositoryPathExclusions = matchingMatcher?.pathExclusions ?? globalPathExclusions;
|
|
127
198
|
logger.info(
|
|
128
199
|
`Processing file types for repository ${repo.name}: [${repositoryFileTypesFilter.join(", ")}]`
|
|
129
200
|
);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"repository.cjs.js","sources":["../../../src/services/ingestor/repository.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport { DEFAULT_FILE_TYPES } from '../../constants/default-file-types';\nimport {\n EmbeddingDocument,\n IngestorOptions,\n streamToString,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { AzureDevOpsService } from '../azure-devops';\nimport { Config } from '../../../config';\nimport { MODULE_ID } from '../../constants/module';\nimport {\n getProgressStats,\n createPathFilter,\n validateExclusionPatterns,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport { DEFAULT_REPO_FILE_BATCH_SIZE } from '../../constants/default-repo-file-batch-size';\nimport { DEFAULT_PATH_EXCLUSIONS } from '../../constants/default-path-exclusions';\nimport {\n GitItem,\n GitRepository,\n} from 'azure-devops-node-api/interfaces/GitInterfaces';\n\ntype RepositoryIngestorOptions = {\n config: RootConfigService;\n logger: LoggerService;\n azureDevOpsService: AzureDevOpsService;\n};\n\nexport const createRepositoryIngestor = async ({\n config,\n logger,\n azureDevOpsService,\n}: RepositoryIngestorOptions) => {\n // Get configuration values\n const repositoriesFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['azureDevOps']['repositories']\n >('aiAssistant.ingestors.azureDevOps.repositories');\n\n // Default to common file types if none are specified\n const fileTypes =\n config.getOptionalStringArray(\n 'aiAssistant.ingestors.azureDevOps.fileTypes',\n ) ?? DEFAULT_FILE_TYPES;\n\n // Get batch size for processing repository items (default to 50 items per batch)\n const itemsBatchSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestors.azureDevOps.filesBatchSize', // Reuse the same config for consistency\n ) ?? DEFAULT_REPO_FILE_BATCH_SIZE;\n\n // Get global path exclusion patterns from configuration, defaulting to predefined patterns\n const globalPathExclusions =\n config.getOptionalStringArray(\n 'aiAssistant.ingestors.azureDevOps.pathExclusions',\n ) ?? DEFAULT_PATH_EXCLUSIONS;\n\n // Validate exclusion patterns\n const validation = validateExclusionPatterns(globalPathExclusions);\n if (!validation.isValid) {\n logger.error(\n `Invalid path exclusion patterns in Azure DevOps ingestor configuration: ${validation.errors.join(\n ', ',\n )}`,\n );\n throw new Error(\n `Invalid path exclusion patterns: ${validation.errors.join(', ')}`,\n );\n }\n if (validation.warnings.length > 0) {\n logger.warn(\n `Path exclusion pattern warnings: ${validation.warnings.join(', ')}`,\n );\n }\n\n /**\n * Ingest Azure DevOps repository items in batches\n * @param repository - The repository to ingest items from\n * @param items - The list of items to ingest from the repository\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns Total number of documents ingested and sent for embedding from the repository\n */\n const ingestRepoByFileBatch = async ({\n repository,\n items,\n saveDocumentsBatch,\n }: {\n repository: GitRepository;\n items: GitItem[];\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'];\n }) => {\n logger.info(\n `Processing ${items.length} items from repository \"${repository.name}\" in batches of ${itemsBatchSize}`,\n );\n\n logger.debug(`Items: ${JSON.stringify(items, null, 2)}`);\n\n let totalDocumentsIngested = 0;\n\n // Process items in batches to manage memory and performance\n const totalBatches = Math.ceil(items.length / itemsBatchSize);\n\n for (\n let batchStart = 0;\n batchStart < items.length;\n batchStart += itemsBatchSize\n ) {\n const batchEnd = Math.min(batchStart + itemsBatchSize, items.length);\n const itemsBatch = items.slice(batchStart, batchEnd);\n const batchNumber = Math.floor(batchStart / itemsBatchSize) + 1;\n\n logger.info(\n `Processing batch ${batchNumber}/${totalBatches} (${itemsBatch.length} items) for repository \"${repository.name}\"`,\n );\n\n // Generate embedding documents for each item in the current batch\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < itemsBatch.length; index++) {\n const item = itemsBatch[index];\n const globalIndex = batchStart + index;\n\n const content = await azureDevOpsService.getRepoItemContent(\n repository.id!,\n item.path!,\n );\n\n const completionStats = getProgressStats(globalIndex + 1, items.length);\n\n logger.info(\n `Retrieved content for Azure DevOps item: ${item.path} in repository: \"${repository.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of repository]`,\n );\n\n const text = await streamToString(content);\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${repository.id}:${item.path}`,\n url: item.url!,\n organization: azureDevOpsService.organization,\n project: azureDevOpsService.project,\n repository: repository.name!,\n },\n content: text,\n };\n\n documents.push(document);\n }\n\n // Save the current batch of documents\n await saveDocumentsBatch(documents);\n\n totalDocumentsIngested += documents.length;\n\n logger.info(\n `Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for Azure DevOps repository: ${repository.name}`,\n );\n }\n\n return { totalDocumentsIngested };\n };\n\n /** Ingest Azure DevOps repositories in batches\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns void\n */\n const ingestRepositoriesBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const repositoriesList = await azureDevOpsService.getRepos();\n\n if (repositoriesList.length === 0) {\n logger.warn('No repositories found in the Azure DevOps project');\n return;\n }\n\n logger.info(\n `Filtering for repositories: ${repositoriesFilter\n ?.map(repo => repo.name)\n .join(', ')}`,\n );\n\n // Filter repositories if a filter is provided in the config\n const repositoriesToIngest = repositoriesFilter\n ? repositoriesList.filter(repo =>\n repositoriesFilter?.some(\n filteredRepo =>\n filteredRepo.name.toLowerCase() === repo.name!.toLowerCase(),\n ),\n )\n : repositoriesList;\n\n if (repositoriesToIngest.length === 0) {\n logger.warn(\n 'No repositories found for ingestion after applying the filter',\n );\n return;\n }\n\n logger.info(\n `Ingesting ${repositoriesToIngest.length} repositories from Azure DevOps`,\n );\n\n // Get items from each repository and create documents to be embedded\n for (const repo of repositoriesToIngest) {\n logger.info(\n `Beginning ingestion for repository: ${repo.name} (${repo.id})`,\n );\n\n // Determine the file types to use for this repository or use default\n const repositoryFileTypesFilter =\n repositoriesFilter?.find(\n r => r.name.toLowerCase() === repo.name!.toLowerCase(),\n )?.fileTypes ?? fileTypes;\n\n // Determine the path exclusions to use for this repository or use global default\n const repositoryPathExclusions =\n repositoriesFilter?.find(\n r => r.name.toLowerCase() === repo.name!.toLowerCase(),\n )?.pathExclusions ?? globalPathExclusions;\n\n logger.info(\n `Processing file types for repository ${\n repo.name\n }: [${repositoryFileTypesFilter.join(', ')}]`,\n );\n\n logger.info(\n `Using path exclusions for repository ${\n repo.name\n }: [${repositoryPathExclusions.join(', ')}]`,\n );\n\n // Get the items to be ingested from the repository based on the file types filter\n let items = await azureDevOpsService.getRepoItems(\n repo.id!,\n repositoryFileTypesFilter,\n );\n\n // Apply path exclusion filtering\n const pathFilter = createPathFilter({\n exclusionPatterns: repositoryPathExclusions,\n });\n\n const originalItemCount = items.length;\n\n // Log excluded items for debugging\n const excludedItems = items.filter(\n item => item.path && pathFilter.shouldExcludePath(item.path),\n );\n\n if (excludedItems.length > 0) {\n logger.debug(\n `Items excluded from repository ${repo.name}: ${excludedItems\n .map(i => i.path)\n .join(', ')}`,\n );\n }\n\n items = pathFilter.filterFiles(items);\n const filteredItemCount = originalItemCount - items.length;\n\n if (filteredItemCount > 0) {\n logger.info(\n `Filtered out ${filteredItemCount} items from repository ${repo.name} based on path exclusion patterns`,\n );\n }\n\n if (items.length === 0) {\n logger.warn(\n `No items found for ingestion in the Azure DevOps repository ${\n repo.name\n } (${\n repo.id\n }) with the specified file types filter: [${repositoryFileTypesFilter.join(\n ', ',\n )}]`,\n );\n continue;\n }\n\n const { totalDocumentsIngested } = await ingestRepoByFileBatch({\n repository: repo,\n items,\n saveDocumentsBatch,\n });\n\n if (totalDocumentsIngested === 0) {\n logger.warn(\n `No documents were ingested and sent for embedding from the Azure DevOps repository ${repo.name} (${repo.id})`,\n );\n continue;\n }\n\n logger.info(\n `Repository ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for Azure DevOps repository: ${repo.name}`,\n );\n }\n };\n\n return { ingestRepositoriesBatch };\n};\n"],"names":["DEFAULT_FILE_TYPES","DEFAULT_REPO_FILE_BATCH_SIZE","DEFAULT_PATH_EXCLUSIONS","validateExclusionPatterns","getProgressStats","streamToString","MODULE_ID","createPathFilter"],"mappings":";;;;;;;;;AA+BO,MAAM,2BAA2B,OAAO;AAAA,EAC7C,MAAA;AAAA,EACA,MAAA;AAAA,EACA;AACF,CAAA,KAAiC;AAE/B,EAAA,MAAM,kBAAA,GAAqB,MAAA,CAAO,WAAA,CAEhC,gDAAgD,CAAA;AAGlD,EAAA,MAAM,YACJ,MAAA,CAAO,sBAAA;AAAA,IACL;AAAA,GACF,IAAKA,mCAAA;AAGP,EAAA,MAAM,iBACJ,MAAA,CAAO,iBAAA;AAAA,IACL;AAAA;AAAA,GACF,IAAKC,qDAAA;AAGP,EAAA,MAAM,uBACJ,MAAA,CAAO,sBAAA;AAAA,IACL;AAAA,GACF,IAAKC,6CAAA;AAGP,EAAA,MAAM,UAAA,GAAaC,2DAA0B,oBAAoB,CAAA;AACjE,EAAA,IAAI,CAAC,WAAW,OAAA,EAAS;AACvB,IAAA,MAAA,CAAO,KAAA;AAAA,MACL,CAAA,wEAAA,EAA2E,WAAW,MAAA,CAAO,IAAA;AAAA,QAC3F;AAAA,OACD,CAAA;AAAA,KACH;AACA,IAAA,MAAM,IAAI,KAAA;AAAA,MACR,CAAA,iCAAA,EAAoC,UAAA,CAAW,MAAA,CAAO,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KAClE;AAAA,EACF;AACA,EAAA,IAAI,UAAA,CAAW,QAAA,CAAS,MAAA,GAAS,CAAA,EAAG;AAClC,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,iCAAA,EAAoC,UAAA,CAAW,QAAA,CAAS,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACpE;AAAA,EACF;AASA,EAAA,MAAM,wBAAwB,OAAO;AAAA,IACnC,UAAA;AAAA,IACA,KAAA;AAAA,IACA;AAAA,GACF,KAIM;AACJ,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,cAAc,KAAA,CAAM,MAAM,2BAA2B,UAAA,CAAW,IAAI,mBAAmB,cAAc,CAAA;AAAA,KACvG;AAEA,IAAA,MAAA,CAAO,KAAA,CAAM,UAAU,IAAA,CAAK,SAAA,CAAU,OAAO,IAAA,EAAM,CAAC,CAAC,CAAA,CAAE,CAAA;AAEvD,IAAA,IAAI,sBAAA,GAAyB,CAAA;AAG7B,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,SAAS,cAAc,CAAA;AAE5D,IAAA,KAAA,IACM,aAAa,CAAA,EACjB,UAAA,GAAa,KAAA,CAAM,MAAA,EACnB,cAAc,cAAA,EACd;AACA,MAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,UAAA,GAAa,cAAA,EAAgB,MAAM,MAAM,CAAA;AACnE,MAAA,MAAM,UAAA,GAAa,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,QAAQ,CAAA;AACnD,MAAA,MAAM,WAAA,GAAc,IAAA,CAAK,KAAA,CAAM,UAAA,GAAa,cAAc,CAAA,GAAI,CAAA;AAE9D,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iBAAA,EAAoB,WAAW,CAAA,CAAA,EAAI,YAAY,KAAK,UAAA,CAAW,MAAM,CAAA,wBAAA,EAA2B,UAAA,CAAW,IAAI,CAAA,CAAA;AAAA,OACjH;AAGA,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,UAAA,CAAW,QAAQ,KAAA,EAAA,EAAS;AACtD,QAAA,MAAM,IAAA,GAAO,WAAW,KAAK,CAAA;AAC7B,QAAA,MAAM,cAAc,UAAA,GAAa,KAAA;AAEjC,QAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,CAAmB,kBAAA;AAAA,UACvC,UAAA,CAAW,EAAA;AAAA,UACX,IAAA,CAAK;AAAA,SACP;AAEA,QAAA,MAAM,eAAA,GAAkBC,iDAAA,CAAiB,WAAA,GAAc,CAAA,EAAG,MAAM,MAAM,CAAA;AAEtE,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,yCAAA,EAA4C,IAAA,CAAK,IAAI,CAAA,iBAAA,EAAoB,WAAW,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,2BAAA;AAAA,SAC3L;AAEA,QAAA,MAAM,IAAA,GAAO,MAAMC,6CAAA,CAAe,OAAO,CAAA;AAEzC,QAAA,MAAM,QAAA,GAA8B;AAAA,UAClC,QAAA,EAAU;AAAA,YACR,MAAA,EAAQC,kBAAA;AAAA,YACR,IAAI,CAAA,EAAG,UAAA,CAAW,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,YACjC,KAAK,IAAA,CAAK,GAAA;AAAA,YACV,cAAc,kBAAA,CAAmB,YAAA;AAAA,YACjC,SAAS,kBAAA,CAAmB,OAAA;AAAA,YAC5B,YAAY,UAAA,CAAW;AAAA,WACzB;AAAA,UACA,OAAA,EAAS;AAAA,SACX;AAEA,QAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,MACzB;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,sBAAA,IAA0B,SAAA,CAAU,MAAA;AAEpC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,MAAA,EAAS,WAAW,CAAA,CAAA,EAAI,YAAY,eAAe,SAAA,CAAU,MAAM,CAAA,iDAAA,EAAoD,UAAA,CAAW,IAAI,CAAA;AAAA,OACxI;AAAA,IACF;AAEA,IAAA,OAAO,EAAE,sBAAA,EAAuB;AAAA,EAClC,CAAA;AAMA,EAAA,MAAM,uBAAA,GAA0B,OAC9B,kBAAA,KACG;AACH,IAAA,MAAM,gBAAA,GAAmB,MAAM,kBAAA,CAAmB,QAAA,EAAS;AAE3D,IAAA,IAAI,gBAAA,CAAiB,WAAW,CAAA,EAAG;AACjC,MAAA,MAAA,CAAO,KAAK,mDAAmD,CAAA;AAC/D,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,4BAAA,EAA+B,oBAC3B,GAAA,CAAI,CAAA,IAAA,KAAQ,KAAK,IAAI,CAAA,CACtB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAGA,IAAA,MAAM,oBAAA,GAAuB,qBACzB,gBAAA,CAAiB,MAAA;AAAA,MAAO,UACtB,kBAAA,EAAoB,IAAA;AAAA,QAClB,kBACE,YAAA,CAAa,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY;AAC/D,KACF,GACA,gBAAA;AAEJ,IAAA,IAAI,oBAAA,CAAqB,WAAW,CAAA,EAAG;AACrC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL;AAAA,OACF;AACA,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,UAAA,EAAa,qBAAqB,MAAM,CAAA,+BAAA;AAAA,KAC1C;AAGA,IAAA,KAAA,MAAW,QAAQ,oBAAA,EAAsB;AACvC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,oCAAA,EAAuC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,OAC9D;AAGA,MAAA,MAAM,4BACJ,kBAAA,EAAoB,IAAA;AAAA,QAClB,OAAK,CAAA,CAAE,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY,SACpD,SAAA,IAAa,SAAA;AAGlB,MAAA,MAAM,2BACJ,kBAAA,EAAoB,IAAA;AAAA,QAClB,OAAK,CAAA,CAAE,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY,SACpD,cAAA,IAAkB,oBAAA;AAEvB,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,wCACE,IAAA,CAAK,IACP,MAAM,yBAAA,CAA0B,IAAA,CAAK,IAAI,CAAC,CAAA,CAAA;AAAA,OAC5C;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,wCACE,IAAA,CAAK,IACP,MAAM,wBAAA,CAAyB,IAAA,CAAK,IAAI,CAAC,CAAA,CAAA;AAAA,OAC3C;AAGA,MAAA,IAAI,KAAA,GAAQ,MAAM,kBAAA,CAAmB,YAAA;AAAA,QACnC,IAAA,CAAK,EAAA;AAAA,QACL;AAAA,OACF;AAGA,MAAA,MAAM,aAAaC,iDAAA,CAAiB;AAAA,QAClC,iBAAA,EAAmB;AAAA,OACpB,CAAA;AAED,MAAA,MAAM,oBAAoB,KAAA,CAAM,MAAA;AAGhC,MAAA,MAAM,gBAAgB,KAAA,CAAM,MAAA;AAAA,QAC1B,UAAQ,IAAA,CAAK,IAAA,IAAQ,UAAA,CAAW,iBAAA,CAAkB,KAAK,IAAI;AAAA,OAC7D;AAEA,MAAA,IAAI,aAAA,CAAc,SAAS,CAAA,EAAG;AAC5B,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,+BAAA,EAAkC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,aAAA,CAC7C,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAE,IAAI,CAAA,CACf,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,SACf;AAAA,MACF;AAEA,MAAA,KAAA,GAAQ,UAAA,CAAW,YAAY,KAAK,CAAA;AACpC,MAAA,MAAM,iBAAA,GAAoB,oBAAoB,KAAA,CAAM,MAAA;AAEpD,MAAA,IAAI,oBAAoB,CAAA,EAAG;AACzB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,aAAA,EAAgB,iBAAiB,CAAA,uBAAA,EAA0B,IAAA,CAAK,IAAI,CAAA,iCAAA;AAAA,SACtE;AAAA,MACF;AAEA,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,+DACE,IAAA,CAAK,IACP,KACE,IAAA,CAAK,EACP,4CAA4C,yBAAA,CAA0B,IAAA;AAAA,YACpE;AAAA,WACD,CAAA,CAAA;AAAA,SACH;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAM,EAAE,sBAAA,EAAuB,GAAI,MAAM,qBAAA,CAAsB;AAAA,QAC7D,UAAA,EAAY,IAAA;AAAA,QACZ,KAAA;AAAA,QACA;AAAA,OACD,CAAA;AAED,MAAA,IAAI,2BAA2B,CAAA,EAAG;AAChC,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,mFAAA,EAAsF,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SAC7G;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,gCAAA,EAAmC,sBAAsB,CAAA,8EAAA,EAAiF,IAAA,CAAK,IAAI,CAAA;AAAA,OACrJ;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,OAAO,EAAE,uBAAA,EAAwB;AACnC;;;;"}
|
|
1
|
+
{"version":3,"file":"repository.cjs.js","sources":["../../../src/services/ingestor/repository.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport { DEFAULT_FILE_TYPES } from '../../constants/default-file-types';\nimport {\n EmbeddingDocument,\n IngestorOptions,\n streamToString,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { AzureDevOpsService } from '../azure-devops';\nimport { Config } from '../../../config';\nimport { MODULE_ID } from '../../constants/module';\nimport {\n getProgressStats,\n createPathFilter,\n validateExclusionPatterns,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport { DEFAULT_REPO_FILE_BATCH_SIZE } from '../../constants/default-repo-file-batch-size';\nimport { DEFAULT_PATH_EXCLUSIONS } from '../../constants/default-path-exclusions';\nimport {\n GitItem,\n GitRepository,\n} from 'azure-devops-node-api/interfaces/GitInterfaces';\n\ntype RepositoryIngestorOptions = {\n config: RootConfigService;\n logger: LoggerService;\n azureDevOpsService: AzureDevOpsService;\n};\n\nexport const createRepositoryIngestor = async ({\n config,\n logger,\n azureDevOpsService,\n}: RepositoryIngestorOptions) => {\n // Get configuration values\n const repositoriesFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['azureDevOps']['repositories']\n >('aiAssistant.ingestors.azureDevOps.repositories');\n\n // Process and validate repository filters\n type RepositoryMatcher = {\n value: string;\n regex: RegExp;\n fileTypes?: string[];\n pathExclusions?: string[];\n };\n\n const includeMatchers: RepositoryMatcher[] = [];\n const excludeMatchers: RepositoryMatcher[] = [];\n\n if (repositoriesFilter?.include) {\n for (const filter of repositoriesFilter.include) {\n try {\n // All strings are treated as regular expression patterns; escape special characters for exact literal matches\n const regex = new RegExp(filter.name);\n includeMatchers.push({\n value: filter.name,\n regex,\n fileTypes: filter.fileTypes,\n pathExclusions: filter.pathExclusions,\n });\n } catch (error) {\n logger.error(\n `Invalid regular expression in repository include '${filter.name}': ${error}`,\n );\n throw new Error(\n `Invalid repository include pattern '${filter.name}': ${error}`,\n );\n }\n }\n }\n\n if (repositoriesFilter?.exclude) {\n for (const filter of repositoriesFilter.exclude) {\n try {\n // All strings are valid regex - plain strings match exactly, patterns match as regex\n const regex = new RegExp(filter.name);\n excludeMatchers.push({\n value: filter.name,\n regex,\n });\n } catch (error) {\n logger.error(\n `Invalid regular expression in repository exclude '${filter.name}': ${error}`,\n );\n throw new Error(\n `Invalid repository exclude pattern '${filter.name}': ${error}`,\n );\n }\n }\n }\n\n if (includeMatchers.length > 0) {\n logger.info(\n `Repository include filters: ${includeMatchers\n .map(m => `'${m.value}'`)\n .join(', ')}`,\n );\n }\n if (excludeMatchers.length > 0) {\n logger.info(\n `Repository exclude filters: ${excludeMatchers\n .map(m => `'${m.value}'`)\n .join(', ')}`,\n );\n }\n\n // Default to common file types if none are specified\n const fileTypes =\n config.getOptionalStringArray(\n 'aiAssistant.ingestors.azureDevOps.fileTypes',\n ) ?? DEFAULT_FILE_TYPES;\n\n // Get batch size for processing repository items (default to 50 items per batch)\n const itemsBatchSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestors.azureDevOps.filesBatchSize', // Reuse the same config for consistency\n ) ?? DEFAULT_REPO_FILE_BATCH_SIZE;\n\n // Get global path exclusion patterns from configuration, defaulting to predefined patterns\n const globalPathExclusions =\n config.getOptionalStringArray(\n 'aiAssistant.ingestors.azureDevOps.pathExclusions',\n ) ?? DEFAULT_PATH_EXCLUSIONS;\n\n // Validate exclusion patterns\n const validation = validateExclusionPatterns(globalPathExclusions);\n if (!validation.isValid) {\n logger.error(\n `Invalid path exclusion patterns in Azure DevOps ingestor configuration: ${validation.errors.join(\n ', ',\n )}`,\n );\n throw new Error(\n `Invalid path exclusion patterns: ${validation.errors.join(', ')}`,\n );\n }\n if (validation.warnings.length > 0) {\n logger.warn(\n `Path exclusion pattern warnings: ${validation.warnings.join(', ')}`,\n );\n }\n\n /**\n * Ingest Azure DevOps repository items in batches\n * @param repository - The repository to ingest items from\n * @param items - The list of items to ingest from the repository\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns Total number of documents ingested and sent for embedding from the repository\n */\n const ingestRepoByFileBatch = async ({\n repository,\n items,\n saveDocumentsBatch,\n }: {\n repository: GitRepository;\n items: GitItem[];\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'];\n }) => {\n logger.info(\n `Processing ${items.length} items from repository \"${repository.name}\" in batches of ${itemsBatchSize}`,\n );\n\n logger.debug(`Items: ${JSON.stringify(items, null, 2)}`);\n\n let totalDocumentsIngested = 0;\n\n // Process items in batches to manage memory and performance\n const totalBatches = Math.ceil(items.length / itemsBatchSize);\n\n for (\n let batchStart = 0;\n batchStart < items.length;\n batchStart += itemsBatchSize\n ) {\n const batchEnd = Math.min(batchStart + itemsBatchSize, items.length);\n const itemsBatch = items.slice(batchStart, batchEnd);\n const batchNumber = Math.floor(batchStart / itemsBatchSize) + 1;\n\n logger.info(\n `Processing batch ${batchNumber}/${totalBatches} (${itemsBatch.length} items) for repository \"${repository.name}\"`,\n );\n\n // Generate embedding documents for each item in the current batch\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < itemsBatch.length; index++) {\n const item = itemsBatch[index];\n const globalIndex = batchStart + index;\n\n const content = await azureDevOpsService.getRepoItemContent(\n repository.id!,\n item.path!,\n );\n\n const completionStats = getProgressStats(globalIndex + 1, items.length);\n\n logger.info(\n `Retrieved content for Azure DevOps item: ${item.path} in repository: \"${repository.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of repository]`,\n );\n\n const text = await streamToString(content);\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${repository.id}:${item.path}`,\n url: item.url!,\n organization: azureDevOpsService.organization,\n project: azureDevOpsService.project,\n repository: repository.name!,\n },\n content: text,\n };\n\n documents.push(document);\n }\n\n // Save the current batch of documents\n await saveDocumentsBatch(documents);\n\n totalDocumentsIngested += documents.length;\n\n logger.info(\n `Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for Azure DevOps repository: ${repository.name}`,\n );\n }\n\n return { totalDocumentsIngested };\n };\n\n /** Ingest Azure DevOps repositories in batches\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns void\n */\n const ingestRepositoriesBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const repositoriesList = await azureDevOpsService.getRepos();\n\n if (repositoriesList.length === 0) {\n logger.warn('No repositories found in the Azure DevOps project');\n return;\n }\n\n // Filter repositories using matchers\n let repositoriesToIngest = repositoriesList;\n\n // If include matchers exist, only include repos that match at least one\n if (includeMatchers.length > 0) {\n logger.info(\n `Include filter found. Only including repositories matching the following patterns for ingestion: ${includeMatchers\n .map(m => `'${m.value}'`)\n .join(', ')}`,\n );\n\n repositoriesToIngest = repositoriesToIngest.filter(repo => {\n return includeMatchers.some(matcher => matcher.regex!.test(repo.name!));\n });\n }\n\n // Apply exclusions\n if (excludeMatchers.length > 0) {\n logger.info(\n `Exclude filter found. Excluding repositories matching the following patterns from ingestion: ${excludeMatchers\n .map(m => `'${m.value}'`)\n .join(', ')}`,\n );\n\n const excludedRepos = repositoriesToIngest.filter(repo => {\n return excludeMatchers.some(matcher => matcher.regex!.test(repo.name!));\n });\n\n if (excludedRepos.length > 0) {\n logger.info(\n `Excluding repositories: ${excludedRepos\n .map(r => r.name)\n .join(', ')}`,\n );\n }\n repositoriesToIngest = repositoriesToIngest.filter(repo => {\n return !excludeMatchers.some(matcher =>\n matcher.regex!.test(repo.name!),\n );\n });\n }\n\n if (repositoriesToIngest.length === 0) {\n logger.warn(\n 'No repositories found for ingestion after applying the filter',\n );\n return;\n }\n\n logger.debug(\n `Repositories to ingest: ${repositoriesToIngest\n .map(r => r.name)\n .join(', ')}`,\n );\n\n logger.info(\n `Ingesting ${repositoriesToIngest.length} repositories from Azure DevOps`,\n );\n\n // Get items from each repository and create documents to be embedded\n for (const repo of repositoriesToIngest) {\n logger.info(\n `Beginning ingestion for repository: ${repo.name} (${repo.id})`,\n );\n\n // Find the matching include matcher for this repository\n const matchingMatcher = includeMatchers.find(matcher =>\n matcher.regex!.test(repo.name!),\n );\n\n // Determine the file types to use for this repository or use default\n const repositoryFileTypesFilter = matchingMatcher?.fileTypes ?? fileTypes;\n\n // Determine the path exclusions to use for this repository or use global default\n const repositoryPathExclusions =\n matchingMatcher?.pathExclusions ?? globalPathExclusions;\n\n logger.info(\n `Processing file types for repository ${\n repo.name\n }: [${repositoryFileTypesFilter.join(', ')}]`,\n );\n\n logger.info(\n `Using path exclusions for repository ${\n repo.name\n }: [${repositoryPathExclusions.join(', ')}]`,\n );\n\n // Get the items to be ingested from the repository based on the file types filter\n let items = await azureDevOpsService.getRepoItems(\n repo.id!,\n repositoryFileTypesFilter,\n );\n\n // Apply path exclusion filtering\n const pathFilter = createPathFilter({\n exclusionPatterns: repositoryPathExclusions,\n });\n\n const originalItemCount = items.length;\n\n // Log excluded items for debugging\n const excludedItems = items.filter(\n item => item.path && pathFilter.shouldExcludePath(item.path),\n );\n\n if (excludedItems.length > 0) {\n logger.debug(\n `Items excluded from repository ${repo.name}: ${excludedItems\n .map(i => i.path)\n .join(', ')}`,\n );\n }\n\n items = pathFilter.filterFiles(items);\n const filteredItemCount = originalItemCount - items.length;\n\n if (filteredItemCount > 0) {\n logger.info(\n `Filtered out ${filteredItemCount} items from repository ${repo.name} based on path exclusion patterns`,\n );\n }\n\n if (items.length === 0) {\n logger.warn(\n `No items found for ingestion in the Azure DevOps repository ${\n repo.name\n } (${\n repo.id\n }) with the specified file types filter: [${repositoryFileTypesFilter.join(\n ', ',\n )}]`,\n );\n continue;\n }\n\n const { totalDocumentsIngested } = await ingestRepoByFileBatch({\n repository: repo,\n items,\n saveDocumentsBatch,\n });\n\n if (totalDocumentsIngested === 0) {\n logger.warn(\n `No documents were ingested and sent for embedding from the Azure DevOps repository ${repo.name} (${repo.id})`,\n );\n continue;\n }\n\n logger.info(\n `Repository ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for Azure DevOps repository: ${repo.name}`,\n );\n }\n };\n\n return { ingestRepositoriesBatch };\n};\n"],"names":["DEFAULT_FILE_TYPES","DEFAULT_REPO_FILE_BATCH_SIZE","DEFAULT_PATH_EXCLUSIONS","validateExclusionPatterns","getProgressStats","streamToString","MODULE_ID","createPathFilter"],"mappings":";;;;;;;;;AA+BO,MAAM,2BAA2B,OAAO;AAAA,EAC7C,MAAA;AAAA,EACA,MAAA;AAAA,EACA;AACF,CAAA,KAAiC;AAE/B,EAAA,MAAM,kBAAA,GAAqB,MAAA,CAAO,WAAA,CAEhC,gDAAgD,CAAA;AAUlD,EAAA,MAAM,kBAAuC,EAAC;AAC9C,EAAA,MAAM,kBAAuC,EAAC;AAE9C,EAAA,IAAI,oBAAoB,OAAA,EAAS;AAC/B,IAAA,KAAA,MAAW,MAAA,IAAU,mBAAmB,OAAA,EAAS;AAC/C,MAAA,IAAI;AAEF,QAAA,MAAM,KAAA,GAAQ,IAAI,MAAA,CAAO,MAAA,CAAO,IAAI,CAAA;AACpC,QAAA,eAAA,CAAgB,IAAA,CAAK;AAAA,UACnB,OAAO,MAAA,CAAO,IAAA;AAAA,UACd,KAAA;AAAA,UACA,WAAW,MAAA,CAAO,SAAA;AAAA,UAClB,gBAAgB,MAAA,CAAO;AAAA,SACxB,CAAA;AAAA,MACH,SAAS,KAAA,EAAO;AACd,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,kDAAA,EAAqD,MAAA,CAAO,IAAI,CAAA,GAAA,EAAM,KAAK,CAAA;AAAA,SAC7E;AACA,QAAA,MAAM,IAAI,KAAA;AAAA,UACR,CAAA,oCAAA,EAAuC,MAAA,CAAO,IAAI,CAAA,GAAA,EAAM,KAAK,CAAA;AAAA,SAC/D;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,IAAI,oBAAoB,OAAA,EAAS;AAC/B,IAAA,KAAA,MAAW,MAAA,IAAU,mBAAmB,OAAA,EAAS;AAC/C,MAAA,IAAI;AAEF,QAAA,MAAM,KAAA,GAAQ,IAAI,MAAA,CAAO,MAAA,CAAO,IAAI,CAAA;AACpC,QAAA,eAAA,CAAgB,IAAA,CAAK;AAAA,UACnB,OAAO,MAAA,CAAO,IAAA;AAAA,UACd;AAAA,SACD,CAAA;AAAA,MACH,SAAS,KAAA,EAAO;AACd,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,kDAAA,EAAqD,MAAA,CAAO,IAAI,CAAA,GAAA,EAAM,KAAK,CAAA;AAAA,SAC7E;AACA,QAAA,MAAM,IAAI,KAAA;AAAA,UACR,CAAA,oCAAA,EAAuC,MAAA,CAAO,IAAI,CAAA,GAAA,EAAM,KAAK,CAAA;AAAA,SAC/D;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,IAAI,eAAA,CAAgB,SAAS,CAAA,EAAG;AAC9B,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,4BAAA,EAA+B,eAAA,CAC5B,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAA,EAAI,CAAA,CAAE,KAAK,CAAA,CAAA,CAAG,CAAA,CACvB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAAA,EACF;AACA,EAAA,IAAI,eAAA,CAAgB,SAAS,CAAA,EAAG;AAC9B,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,4BAAA,EAA+B,eAAA,CAC5B,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAA,EAAI,CAAA,CAAE,KAAK,CAAA,CAAA,CAAG,CAAA,CACvB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAAA,EACF;AAGA,EAAA,MAAM,YACJ,MAAA,CAAO,sBAAA;AAAA,IACL;AAAA,GACF,IAAKA,mCAAA;AAGP,EAAA,MAAM,iBACJ,MAAA,CAAO,iBAAA;AAAA,IACL;AAAA;AAAA,GACF,IAAKC,qDAAA;AAGP,EAAA,MAAM,uBACJ,MAAA,CAAO,sBAAA;AAAA,IACL;AAAA,GACF,IAAKC,6CAAA;AAGP,EAAA,MAAM,UAAA,GAAaC,2DAA0B,oBAAoB,CAAA;AACjE,EAAA,IAAI,CAAC,WAAW,OAAA,EAAS;AACvB,IAAA,MAAA,CAAO,KAAA;AAAA,MACL,CAAA,wEAAA,EAA2E,WAAW,MAAA,CAAO,IAAA;AAAA,QAC3F;AAAA,OACD,CAAA;AAAA,KACH;AACA,IAAA,MAAM,IAAI,KAAA;AAAA,MACR,CAAA,iCAAA,EAAoC,UAAA,CAAW,MAAA,CAAO,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KAClE;AAAA,EACF;AACA,EAAA,IAAI,UAAA,CAAW,QAAA,CAAS,MAAA,GAAS,CAAA,EAAG;AAClC,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,iCAAA,EAAoC,UAAA,CAAW,QAAA,CAAS,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACpE;AAAA,EACF;AASA,EAAA,MAAM,wBAAwB,OAAO;AAAA,IACnC,UAAA;AAAA,IACA,KAAA;AAAA,IACA;AAAA,GACF,KAIM;AACJ,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,cAAc,KAAA,CAAM,MAAM,2BAA2B,UAAA,CAAW,IAAI,mBAAmB,cAAc,CAAA;AAAA,KACvG;AAEA,IAAA,MAAA,CAAO,KAAA,CAAM,UAAU,IAAA,CAAK,SAAA,CAAU,OAAO,IAAA,EAAM,CAAC,CAAC,CAAA,CAAE,CAAA;AAEvD,IAAA,IAAI,sBAAA,GAAyB,CAAA;AAG7B,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,SAAS,cAAc,CAAA;AAE5D,IAAA,KAAA,IACM,aAAa,CAAA,EACjB,UAAA,GAAa,KAAA,CAAM,MAAA,EACnB,cAAc,cAAA,EACd;AACA,MAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,UAAA,GAAa,cAAA,EAAgB,MAAM,MAAM,CAAA;AACnE,MAAA,MAAM,UAAA,GAAa,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,QAAQ,CAAA;AACnD,MAAA,MAAM,WAAA,GAAc,IAAA,CAAK,KAAA,CAAM,UAAA,GAAa,cAAc,CAAA,GAAI,CAAA;AAE9D,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iBAAA,EAAoB,WAAW,CAAA,CAAA,EAAI,YAAY,KAAK,UAAA,CAAW,MAAM,CAAA,wBAAA,EAA2B,UAAA,CAAW,IAAI,CAAA,CAAA;AAAA,OACjH;AAGA,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,UAAA,CAAW,QAAQ,KAAA,EAAA,EAAS;AACtD,QAAA,MAAM,IAAA,GAAO,WAAW,KAAK,CAAA;AAC7B,QAAA,MAAM,cAAc,UAAA,GAAa,KAAA;AAEjC,QAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,CAAmB,kBAAA;AAAA,UACvC,UAAA,CAAW,EAAA;AAAA,UACX,IAAA,CAAK;AAAA,SACP;AAEA,QAAA,MAAM,eAAA,GAAkBC,iDAAA,CAAiB,WAAA,GAAc,CAAA,EAAG,MAAM,MAAM,CAAA;AAEtE,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,yCAAA,EAA4C,IAAA,CAAK,IAAI,CAAA,iBAAA,EAAoB,WAAW,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,2BAAA;AAAA,SAC3L;AAEA,QAAA,MAAM,IAAA,GAAO,MAAMC,6CAAA,CAAe,OAAO,CAAA;AAEzC,QAAA,MAAM,QAAA,GAA8B;AAAA,UAClC,QAAA,EAAU;AAAA,YACR,MAAA,EAAQC,kBAAA;AAAA,YACR,IAAI,CAAA,EAAG,UAAA,CAAW,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,YACjC,KAAK,IAAA,CAAK,GAAA;AAAA,YACV,cAAc,kBAAA,CAAmB,YAAA;AAAA,YACjC,SAAS,kBAAA,CAAmB,OAAA;AAAA,YAC5B,YAAY,UAAA,CAAW;AAAA,WACzB;AAAA,UACA,OAAA,EAAS;AAAA,SACX;AAEA,QAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,MACzB;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,sBAAA,IAA0B,SAAA,CAAU,MAAA;AAEpC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,MAAA,EAAS,WAAW,CAAA,CAAA,EAAI,YAAY,eAAe,SAAA,CAAU,MAAM,CAAA,iDAAA,EAAoD,UAAA,CAAW,IAAI,CAAA;AAAA,OACxI;AAAA,IACF;AAEA,IAAA,OAAO,EAAE,sBAAA,EAAuB;AAAA,EAClC,CAAA;AAMA,EAAA,MAAM,uBAAA,GAA0B,OAC9B,kBAAA,KACG;AACH,IAAA,MAAM,gBAAA,GAAmB,MAAM,kBAAA,CAAmB,QAAA,EAAS;AAE3D,IAAA,IAAI,gBAAA,CAAiB,WAAW,CAAA,EAAG;AACjC,MAAA,MAAA,CAAO,KAAK,mDAAmD,CAAA;AAC/D,MAAA;AAAA,IACF;AAGA,IAAA,IAAI,oBAAA,GAAuB,gBAAA;AAG3B,IAAA,IAAI,eAAA,CAAgB,SAAS,CAAA,EAAG;AAC9B,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iGAAA,EAAoG,eAAA,CACjG,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAA,EAAI,CAAA,CAAE,KAAK,CAAA,CAAA,CAAG,CAAA,CACvB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,OACf;AAEA,MAAA,oBAAA,GAAuB,oBAAA,CAAqB,OAAO,CAAA,IAAA,KAAQ;AACzD,QAAA,OAAO,eAAA,CAAgB,KAAK,CAAA,OAAA,KAAW,OAAA,CAAQ,MAAO,IAAA,CAAK,IAAA,CAAK,IAAK,CAAC,CAAA;AAAA,MACxE,CAAC,CAAA;AAAA,IACH;AAGA,IAAA,IAAI,eAAA,CAAgB,SAAS,CAAA,EAAG;AAC9B,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,6FAAA,EAAgG,eAAA,CAC7F,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAA,EAAI,CAAA,CAAE,KAAK,CAAA,CAAA,CAAG,CAAA,CACvB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,OACf;AAEA,MAAA,MAAM,aAAA,GAAgB,oBAAA,CAAqB,MAAA,CAAO,CAAA,IAAA,KAAQ;AACxD,QAAA,OAAO,eAAA,CAAgB,KAAK,CAAA,OAAA,KAAW,OAAA,CAAQ,MAAO,IAAA,CAAK,IAAA,CAAK,IAAK,CAAC,CAAA;AAAA,MACxE,CAAC,CAAA;AAED,MAAA,IAAI,aAAA,CAAc,SAAS,CAAA,EAAG;AAC5B,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,wBAAA,EAA2B,cACxB,GAAA,CAAI,CAAA,CAAA,KAAK,EAAE,IAAI,CAAA,CACf,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,SACf;AAAA,MACF;AACA,MAAA,oBAAA,GAAuB,oBAAA,CAAqB,OAAO,CAAA,IAAA,KAAQ;AACzD,QAAA,OAAO,CAAC,eAAA,CAAgB,IAAA;AAAA,UAAK,CAAA,OAAA,KAC3B,OAAA,CAAQ,KAAA,CAAO,IAAA,CAAK,KAAK,IAAK;AAAA,SAChC;AAAA,MACF,CAAC,CAAA;AAAA,IACH;AAEA,IAAA,IAAI,oBAAA,CAAqB,WAAW,CAAA,EAAG;AACrC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL;AAAA,OACF;AACA,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,KAAA;AAAA,MACL,CAAA,wBAAA,EAA2B,qBACxB,GAAA,CAAI,CAAA,CAAA,KAAK,EAAE,IAAI,CAAA,CACf,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,UAAA,EAAa,qBAAqB,MAAM,CAAA,+BAAA;AAAA,KAC1C;AAGA,IAAA,KAAA,MAAW,QAAQ,oBAAA,EAAsB;AACvC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,oCAAA,EAAuC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,OAC9D;AAGA,MAAA,MAAM,kBAAkB,eAAA,CAAgB,IAAA;AAAA,QAAK,CAAA,OAAA,KAC3C,OAAA,CAAQ,KAAA,CAAO,IAAA,CAAK,KAAK,IAAK;AAAA,OAChC;AAGA,MAAA,MAAM,yBAAA,GAA4B,iBAAiB,SAAA,IAAa,SAAA;AAGhE,MAAA,MAAM,wBAAA,GACJ,iBAAiB,cAAA,IAAkB,oBAAA;AAErC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,wCACE,IAAA,CAAK,IACP,MAAM,yBAAA,CAA0B,IAAA,CAAK,IAAI,CAAC,CAAA,CAAA;AAAA,OAC5C;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,wCACE,IAAA,CAAK,IACP,MAAM,wBAAA,CAAyB,IAAA,CAAK,IAAI,CAAC,CAAA,CAAA;AAAA,OAC3C;AAGA,MAAA,IAAI,KAAA,GAAQ,MAAM,kBAAA,CAAmB,YAAA;AAAA,QACnC,IAAA,CAAK,EAAA;AAAA,QACL;AAAA,OACF;AAGA,MAAA,MAAM,aAAaC,iDAAA,CAAiB;AAAA,QAClC,iBAAA,EAAmB;AAAA,OACpB,CAAA;AAED,MAAA,MAAM,oBAAoB,KAAA,CAAM,MAAA;AAGhC,MAAA,MAAM,gBAAgB,KAAA,CAAM,MAAA;AAAA,QAC1B,UAAQ,IAAA,CAAK,IAAA,IAAQ,UAAA,CAAW,iBAAA,CAAkB,KAAK,IAAI;AAAA,OAC7D;AAEA,MAAA,IAAI,aAAA,CAAc,SAAS,CAAA,EAAG;AAC5B,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,+BAAA,EAAkC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,aAAA,CAC7C,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAE,IAAI,CAAA,CACf,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,SACf;AAAA,MACF;AAEA,MAAA,KAAA,GAAQ,UAAA,CAAW,YAAY,KAAK,CAAA;AACpC,MAAA,MAAM,iBAAA,GAAoB,oBAAoB,KAAA,CAAM,MAAA;AAEpD,MAAA,IAAI,oBAAoB,CAAA,EAAG;AACzB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,aAAA,EAAgB,iBAAiB,CAAA,uBAAA,EAA0B,IAAA,CAAK,IAAI,CAAA,iCAAA;AAAA,SACtE;AAAA,MACF;AAEA,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,+DACE,IAAA,CAAK,IACP,KACE,IAAA,CAAK,EACP,4CAA4C,yBAAA,CAA0B,IAAA;AAAA,YACpE;AAAA,WACD,CAAA,CAAA;AAAA,SACH;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAM,EAAE,sBAAA,EAAuB,GAAI,MAAM,qBAAA,CAAsB;AAAA,QAC7D,UAAA,EAAY,IAAA;AAAA,QACZ,KAAA;AAAA,QACA;AAAA,OACD,CAAA;AAED,MAAA,IAAI,2BAA2B,CAAA,EAAG;AAChC,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,mFAAA,EAAsF,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SAC7G;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,gCAAA,EAAmC,sBAAsB,CAAA,8EAAA,EAAiF,IAAA,CAAK,IAAI,CAAA;AAAA,OACrJ;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,OAAO,EAAE,uBAAA,EAAwB;AACnC;;;;"}
|
|
@@ -11,6 +11,54 @@ const createWikiIngestor = async ({
|
|
|
11
11
|
azureDevOpsService
|
|
12
12
|
}) => {
|
|
13
13
|
const wikisFilter = config.getOptional("aiAssistant.ingestors.azureDevOps.wikis");
|
|
14
|
+
const includeMatchers = [];
|
|
15
|
+
const excludeMatchers = [];
|
|
16
|
+
if (wikisFilter?.include) {
|
|
17
|
+
for (const filter of wikisFilter.include) {
|
|
18
|
+
try {
|
|
19
|
+
const regex = new RegExp(filter.name);
|
|
20
|
+
includeMatchers.push({
|
|
21
|
+
value: filter.name,
|
|
22
|
+
regex
|
|
23
|
+
});
|
|
24
|
+
} catch (error) {
|
|
25
|
+
logger.error(
|
|
26
|
+
`Invalid regular expression in wiki include '${filter.name}': ${error}`
|
|
27
|
+
);
|
|
28
|
+
throw new Error(
|
|
29
|
+
`Invalid wiki include pattern '${filter.name}': ${error}`
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
if (wikisFilter?.exclude) {
|
|
35
|
+
for (const filter of wikisFilter.exclude) {
|
|
36
|
+
try {
|
|
37
|
+
const regex = new RegExp(filter.name);
|
|
38
|
+
excludeMatchers.push({
|
|
39
|
+
value: filter.name,
|
|
40
|
+
regex
|
|
41
|
+
});
|
|
42
|
+
} catch (error) {
|
|
43
|
+
logger.error(
|
|
44
|
+
`Invalid regular expression in wiki exclude '${filter.name}': ${error}`
|
|
45
|
+
);
|
|
46
|
+
throw new Error(
|
|
47
|
+
`Invalid wiki exclude pattern '${filter.name}': ${error}`
|
|
48
|
+
);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
if (includeMatchers.length > 0) {
|
|
53
|
+
logger.info(
|
|
54
|
+
`Wiki include filters: ${includeMatchers.map((m) => `'${m.value}'`).join(", ")}`
|
|
55
|
+
);
|
|
56
|
+
}
|
|
57
|
+
if (excludeMatchers.length > 0) {
|
|
58
|
+
logger.info(
|
|
59
|
+
`Wiki exclude filters: ${excludeMatchers.map((m) => `'${m.value}'`).join(", ")}`
|
|
60
|
+
);
|
|
61
|
+
}
|
|
14
62
|
const pagesBatchSize = config.getOptionalNumber(
|
|
15
63
|
"aiAssistant.ingestors.azureDevOps.pagesBatchSize"
|
|
16
64
|
) ?? defaultWikiPageBatchSize.DEFAULT_WIKI_PAGE_BATCH_SIZE;
|
|
@@ -84,18 +132,40 @@ const createWikiIngestor = async ({
|
|
|
84
132
|
logger.warn("No wikis found in the Azure DevOps project");
|
|
85
133
|
return;
|
|
86
134
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
135
|
+
let wikisToIngest = wikisList;
|
|
136
|
+
if (includeMatchers.length > 0) {
|
|
137
|
+
logger.info(
|
|
138
|
+
`Include filter found. Only including wikis matching the following patterns for ingestion: ${includeMatchers.map((m) => `'${m.value}'`).join(", ")}`
|
|
139
|
+
);
|
|
140
|
+
wikisToIngest = wikisToIngest.filter((wiki) => {
|
|
141
|
+
return includeMatchers.some((matcher) => matcher.regex.test(wiki.name));
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
if (excludeMatchers.length > 0) {
|
|
145
|
+
logger.info(
|
|
146
|
+
`Exclude filter found. Excluding wikis matching the following patterns from ingestion: ${excludeMatchers.map((m) => `'${m.value}'`).join(", ")}`
|
|
147
|
+
);
|
|
148
|
+
const excludedWikis = wikisToIngest.filter((wiki) => {
|
|
149
|
+
return excludeMatchers.some((matcher) => matcher.regex.test(wiki.name));
|
|
150
|
+
});
|
|
151
|
+
if (excludedWikis.length > 0) {
|
|
152
|
+
logger.info(
|
|
153
|
+
`Excluding wikis: ${excludedWikis.map((w) => w.name).join(", ")}`
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
wikisToIngest = wikisToIngest.filter((wiki) => {
|
|
157
|
+
return !excludeMatchers.some(
|
|
158
|
+
(matcher) => matcher.regex.test(wiki.name)
|
|
159
|
+
);
|
|
160
|
+
});
|
|
161
|
+
}
|
|
95
162
|
if (wikisToIngest.length === 0) {
|
|
96
163
|
logger.warn("No wikis found for ingestion after applying the filter");
|
|
97
164
|
return;
|
|
98
165
|
}
|
|
166
|
+
logger.debug(
|
|
167
|
+
`Wikis to ingest: ${wikisToIngest.map((w) => w.name).join(", ")}`
|
|
168
|
+
);
|
|
99
169
|
logger.info(`Ingesting ${wikisToIngest.length} wikis from Azure DevOps`);
|
|
100
170
|
for (const wiki of wikisToIngest) {
|
|
101
171
|
logger.info(`Beginning ingestion for wiki: ${wiki.name} (${wiki.id})`);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"wiki.cjs.js","sources":["../../../src/services/ingestor/wiki.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport {\n EmbeddingDocument,\n IngestorOptions,\n streamToString,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { AzureDevOpsService } from '../azure-devops';\nimport { Config } from '../../../config';\nimport { MODULE_ID } from '../../constants/module';\nimport { getProgressStats } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport { DEFAULT_WIKI_PAGE_BATCH_SIZE } from '../../constants/default-wiki-page-batch-size';\nimport {\n WikiPage,\n WikiV2,\n} from 'azure-devops-node-api/interfaces/WikiInterfaces';\n\ntype WikiIngestorOptions = {\n config: RootConfigService;\n logger: LoggerService;\n azureDevOpsService: AzureDevOpsService;\n};\n\nexport const createWikiIngestor = async ({\n config,\n logger,\n azureDevOpsService,\n}: WikiIngestorOptions) => {\n // Get configuration values\n const wikisFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['azureDevOps']['wikis']\n >('aiAssistant.ingestors.azureDevOps.wikis');\n\n // Get batch size for processing pages (default to 50 pages per batch)\n const pagesBatchSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestors.azureDevOps.pagesBatchSize',\n ) ?? DEFAULT_WIKI_PAGE_BATCH_SIZE;\n\n /** Ingest Azure DevOps wiki pages in batches\n * @param wiki - The wiki to ingest pages from\n * @param pages - The list of pages to ingest from the wiki\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns Total number of documents ingested and sent for embedding from the wiki\n */\n const ingestWikiByPageBatch = async ({\n wiki,\n pages,\n saveDocumentsBatch,\n }: {\n wiki: WikiV2;\n pages: WikiPage[];\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'];\n }) => {\n logger.info(\n `Processing ${pages.length} pages from wiki \"${wiki.name}\" in batches of ${pagesBatchSize}`,\n );\n\n let totalDocumentsIngested = 0;\n\n // Process pages in batches to manage memory and performance\n\n // Calculate total number of batches\n const totalBatches = Math.ceil(pages.length / pagesBatchSize);\n\n // Process each batch\n for (\n let batchStart = 0;\n batchStart < pages.length;\n batchStart += pagesBatchSize\n ) {\n const batchEnd = Math.min(batchStart + pagesBatchSize, pages.length);\n const pagesBatch = pages.slice(batchStart, batchEnd);\n const batchNumber = Math.floor(batchStart / pagesBatchSize) + 1;\n\n logger.info(\n `Processing batch ${batchNumber}/${totalBatches} (${pagesBatch.length} pages) for wiki \"${wiki.name}\"`,\n );\n\n // Generate embedding documents for each page in the current batch\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < pagesBatch.length; index++) {\n const page = pagesBatch[index];\n const globalIndex = batchStart + index;\n\n const content = await azureDevOpsService.getWikiPageContent(\n wiki.id!,\n page.path!,\n );\n\n const completionStats = getProgressStats(globalIndex + 1, pages.length);\n\n logger.info(\n `Retrieved content for Azure DevOps page: \"${page.path}\" in wiki: \"${wiki.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of wiki]`,\n );\n\n // The API returns plain markdown text directly\n const pageContent = await streamToString(content);\n\n logger.debug(\n `Raw response for page \"${page.path}\" (length: ${pageContent.length})`,\n );\n // Use remoteUrl which points to the user-facing wiki page, not the API endpoint\n const pageUrl = page.remoteUrl || page.url!;\n\n // Check if we have actual content (not empty or just whitespace)\n if (!pageContent || pageContent.trim().length === 0) {\n logger.warn(\n `No content found for Azure DevOps page: \"${page.path}\" in wiki: \"${wiki.name}\". Skipping.`,\n );\n continue;\n }\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${wiki.id}:${page.path}`,\n url: pageUrl,\n organization: azureDevOpsService.organization,\n project: azureDevOpsService.project,\n wiki: wiki.name!,\n },\n content: pageContent,\n };\n\n logger.debug(\n `Created embedding document for Azure DevOps page: \"${page.path}\" in wiki: \"${wiki.name}\" content length: \"${document.content.length}\", page url: \"${document.metadata.url}\"`,\n );\n\n documents.push(document);\n }\n\n // Save the current batch of documents\n await saveDocumentsBatch(documents);\n\n totalDocumentsIngested += documents.length;\n\n logger.info(\n `Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for Azure DevOps wiki: ${wiki.name}`,\n );\n }\n\n return { totalDocumentsIngested };\n };\n\n /** Ingest Azure DevOps wikis in batches */\n const ingestWikisBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const wikisList = await azureDevOpsService.getWikis();\n\n if (wikisList.length === 0) {\n logger.warn('No wikis found in the Azure DevOps project');\n return;\n }\n\n logger.info(\n `Filtering for wikis: ${wikisFilter?.map(repo => repo.name).join(', ')}`,\n );\n\n // Filter wikis if a filter is provided in the config\n const wikisToIngest = wikisFilter\n ? wikisList.filter(wiki =>\n wikisFilter?.some(\n filteredWiki =>\n filteredWiki.name.toLowerCase() === wiki.name!.toLowerCase(),\n ),\n )\n : wikisList;\n\n if (wikisToIngest.length === 0) {\n logger.warn('No wikis found for ingestion after applying the filter');\n return;\n }\n\n logger.info(`Ingesting ${wikisToIngest.length} wikis from Azure DevOps`);\n\n // Get items from each wiki and create documents to be embedded\n for (const wiki of wikisToIngest) {\n logger.info(`Beginning ingestion for wiki: ${wiki.name} (${wiki.id})`);\n\n // Get the pages to be ingested from the wiki based on the file types filter\n const pages = await azureDevOpsService.getWikiPages(wiki.id!);\n\n if (pages.length === 0) {\n logger.warn(\n `No pages found for ingestion in the Azure DevOps wiki ${wiki.name} (${wiki.id})`,\n );\n continue;\n }\n\n const { totalDocumentsIngested } = await ingestWikiByPageBatch({\n wiki,\n pages,\n saveDocumentsBatch,\n });\n\n if (totalDocumentsIngested === 0) {\n logger.warn(\n `No documents were ingested and sent for embedding from the Azure DevOps wiki ${wiki.name} (${wiki.id})`,\n );\n continue;\n }\n\n logger.info(\n `Wiki ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for Azure DevOps wiki: ${wiki.name}`,\n );\n }\n };\n\n return { ingestWikisBatch };\n};\n"],"names":["DEFAULT_WIKI_PAGE_BATCH_SIZE","getProgressStats","streamToString","MODULE_ID"],"mappings":";;;;;;;AAyBO,MAAM,qBAAqB,OAAO;AAAA,EACvC,MAAA;AAAA,EACA,MAAA;AAAA,EACA;AACF,CAAA,KAA2B;AAEzB,EAAA,MAAM,WAAA,GAAc,MAAA,CAAO,WAAA,CAEzB,yCAAyC,CAAA;AAG3C,EAAA,MAAM,iBACJ,MAAA,CAAO,iBAAA;AAAA,IACL;AAAA,GACF,IAAKA,qDAAA;AAQP,EAAA,MAAM,wBAAwB,OAAO;AAAA,IACnC,IAAA;AAAA,IACA,KAAA;AAAA,IACA;AAAA,GACF,KAIM;AACJ,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,cAAc,KAAA,CAAM,MAAM,qBAAqB,IAAA,CAAK,IAAI,mBAAmB,cAAc,CAAA;AAAA,KAC3F;AAEA,IAAA,IAAI,sBAAA,GAAyB,CAAA;AAK7B,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,SAAS,cAAc,CAAA;AAG5D,IAAA,KAAA,IACM,aAAa,CAAA,EACjB,UAAA,GAAa,KAAA,CAAM,MAAA,EACnB,cAAc,cAAA,EACd;AACA,MAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,UAAA,GAAa,cAAA,EAAgB,MAAM,MAAM,CAAA;AACnE,MAAA,MAAM,UAAA,GAAa,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,QAAQ,CAAA;AACnD,MAAA,MAAM,WAAA,GAAc,IAAA,CAAK,KAAA,CAAM,UAAA,GAAa,cAAc,CAAA,GAAI,CAAA;AAE9D,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iBAAA,EAAoB,WAAW,CAAA,CAAA,EAAI,YAAY,KAAK,UAAA,CAAW,MAAM,CAAA,kBAAA,EAAqB,IAAA,CAAK,IAAI,CAAA,CAAA;AAAA,OACrG;AAGA,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,UAAA,CAAW,QAAQ,KAAA,EAAA,EAAS;AACtD,QAAA,MAAM,IAAA,GAAO,WAAW,KAAK,CAAA;AAC7B,QAAA,MAAM,cAAc,UAAA,GAAa,KAAA;AAEjC,QAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,CAAmB,kBAAA;AAAA,UACvC,IAAA,CAAK,EAAA;AAAA,UACL,IAAA,CAAK;AAAA,SACP;AAEA,QAAA,MAAM,eAAA,GAAkBC,iDAAA,CAAiB,WAAA,GAAc,CAAA,EAAG,MAAM,MAAM,CAAA;AAEtE,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,0CAAA,EAA6C,IAAA,CAAK,IAAI,CAAA,YAAA,EAAe,KAAK,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,qBAAA;AAAA,SACjL;AAGA,QAAA,MAAM,WAAA,GAAc,MAAMC,6CAAA,CAAe,OAAO,CAAA;AAEhD,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,uBAAA,EAA0B,IAAA,CAAK,IAAI,CAAA,WAAA,EAAc,YAAY,MAAM,CAAA,CAAA;AAAA,SACrE;AAEA,QAAA,MAAM,OAAA,GAAU,IAAA,CAAK,SAAA,IAAa,IAAA,CAAK,GAAA;AAGvC,QAAA,IAAI,CAAC,WAAA,IAAe,WAAA,CAAY,IAAA,EAAK,CAAE,WAAW,CAAA,EAAG;AACnD,UAAA,MAAA,CAAO,IAAA;AAAA,YACL,CAAA,yCAAA,EAA4C,IAAA,CAAK,IAAI,CAAA,YAAA,EAAe,KAAK,IAAI,CAAA,YAAA;AAAA,WAC/E;AACA,UAAA;AAAA,QACF;AAEA,QAAA,MAAM,QAAA,GAA8B;AAAA,UAClC,QAAA,EAAU;AAAA,YACR,MAAA,EAAQC,kBAAA;AAAA,YACR,IAAI,CAAA,EAAG,IAAA,CAAK,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,YAC3B,GAAA,EAAK,OAAA;AAAA,YACL,cAAc,kBAAA,CAAmB,YAAA;AAAA,YACjC,SAAS,kBAAA,CAAmB,OAAA;AAAA,YAC5B,MAAM,IAAA,CAAK;AAAA,WACb;AAAA,UACA,OAAA,EAAS;AAAA,SACX;AAEA,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,mDAAA,EAAsD,IAAA,CAAK,IAAI,CAAA,YAAA,EAAe,IAAA,CAAK,IAAI,CAAA,mBAAA,EAAsB,QAAA,CAAS,OAAA,CAAQ,MAAM,CAAA,cAAA,EAAiB,QAAA,CAAS,SAAS,GAAG,CAAA,CAAA;AAAA,SAC5K;AAEA,QAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,MACzB;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,sBAAA,IAA0B,SAAA,CAAU,MAAA;AAEpC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,MAAA,EAAS,WAAW,CAAA,CAAA,EAAI,YAAY,eAAe,SAAA,CAAU,MAAM,CAAA,2CAAA,EAA8C,IAAA,CAAK,IAAI,CAAA;AAAA,OAC5H;AAAA,IACF;AAEA,IAAA,OAAO,EAAE,sBAAA,EAAuB;AAAA,EAClC,CAAA;AAGA,EAAA,MAAM,gBAAA,GAAmB,OACvB,kBAAA,KACG;AACH,IAAA,MAAM,SAAA,GAAY,MAAM,kBAAA,CAAmB,QAAA,EAAS;AAEpD,IAAA,IAAI,SAAA,CAAU,WAAW,CAAA,EAAG;AAC1B,MAAA,MAAA,CAAO,KAAK,4CAA4C,CAAA;AACxD,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,qBAAA,EAAwB,aAAa,GAAA,CAAI,CAAA,IAAA,KAAQ,KAAK,IAAI,CAAA,CAAE,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACxE;AAGA,IAAA,MAAM,aAAA,GAAgB,cAClB,SAAA,CAAU,MAAA;AAAA,MAAO,UACf,WAAA,EAAa,IAAA;AAAA,QACX,kBACE,YAAA,CAAa,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAM,WAAA;AAAY;AAC/D,KACF,GACA,SAAA;AAEJ,IAAA,IAAI,aAAA,CAAc,WAAW,CAAA,EAAG;AAC9B,MAAA,MAAA,CAAO,KAAK,wDAAwD,CAAA;AACpE,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA,CAAK,CAAA,UAAA,EAAa,aAAA,CAAc,MAAM,CAAA,wBAAA,CAA0B,CAAA;AAGvE,IAAA,KAAA,MAAW,QAAQ,aAAA,EAAe;AAChC,MAAA,MAAA,CAAO,KAAK,CAAA,8BAAA,EAAiC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,IAAA,CAAK,EAAE,CAAA,CAAA,CAAG,CAAA;AAGrE,MAAA,MAAM,KAAA,GAAQ,MAAM,kBAAA,CAAmB,YAAA,CAAa,KAAK,EAAG,CAAA;AAE5D,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,sDAAA,EAAyD,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SAChF;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAM,EAAE,sBAAA,EAAuB,GAAI,MAAM,qBAAA,CAAsB;AAAA,QAC7D,IAAA;AAAA,QACA,KAAA;AAAA,QACA;AAAA,OACD,CAAA;AAED,MAAA,IAAI,2BAA2B,CAAA,EAAG;AAChC,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,6EAAA,EAAgF,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SACvG;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,0BAAA,EAA6B,sBAAsB,CAAA,wEAAA,EAA2E,IAAA,CAAK,IAAI,CAAA;AAAA,OACzI;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,OAAO,EAAE,gBAAA,EAAiB;AAC5B;;;;"}
|
|
1
|
+
{"version":3,"file":"wiki.cjs.js","sources":["../../../src/services/ingestor/wiki.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport {\n EmbeddingDocument,\n IngestorOptions,\n streamToString,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { AzureDevOpsService } from '../azure-devops';\nimport { Config } from '../../../config';\nimport { MODULE_ID } from '../../constants/module';\nimport { getProgressStats } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport { DEFAULT_WIKI_PAGE_BATCH_SIZE } from '../../constants/default-wiki-page-batch-size';\nimport {\n WikiPage,\n WikiV2,\n} from 'azure-devops-node-api/interfaces/WikiInterfaces';\n\ntype WikiIngestorOptions = {\n config: RootConfigService;\n logger: LoggerService;\n azureDevOpsService: AzureDevOpsService;\n};\n\nexport const createWikiIngestor = async ({\n config,\n logger,\n azureDevOpsService,\n}: WikiIngestorOptions) => {\n // Get configuration values\n const wikisFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['azureDevOps']['wikis']\n >('aiAssistant.ingestors.azureDevOps.wikis');\n\n // Process and validate wiki filters\n type WikiMatcher = {\n value: string;\n regex: RegExp;\n };\n\n const includeMatchers: WikiMatcher[] = [];\n const excludeMatchers: WikiMatcher[] = [];\n\n if (wikisFilter?.include) {\n for (const filter of wikisFilter.include) {\n try {\n // All strings are treated as regex patterns; escape special characters for exact literal matches\n const regex = new RegExp(filter.name);\n includeMatchers.push({\n value: filter.name,\n regex,\n });\n } catch (error) {\n logger.error(\n `Invalid regular expression in wiki include '${filter.name}': ${error}`,\n );\n throw new Error(\n `Invalid wiki include pattern '${filter.name}': ${error}`,\n );\n }\n }\n }\n\n if (wikisFilter?.exclude) {\n for (const filter of wikisFilter.exclude) {\n try {\n // All strings are valid regex - plain strings match exactly, patterns match as regex\n const regex = new RegExp(filter.name);\n excludeMatchers.push({\n value: filter.name,\n regex,\n });\n } catch (error) {\n logger.error(\n `Invalid regular expression in wiki exclude '${filter.name}': ${error}`,\n );\n throw new Error(\n `Invalid wiki exclude pattern '${filter.name}': ${error}`,\n );\n }\n }\n }\n\n if (includeMatchers.length > 0) {\n logger.info(\n `Wiki include filters: ${includeMatchers\n .map(m => `'${m.value}'`)\n .join(', ')}`,\n );\n }\n if (excludeMatchers.length > 0) {\n logger.info(\n `Wiki exclude filters: ${excludeMatchers\n .map(m => `'${m.value}'`)\n .join(', ')}`,\n );\n }\n\n // Get batch size for processing pages (default to 50 pages per batch)\n const pagesBatchSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestors.azureDevOps.pagesBatchSize',\n ) ?? DEFAULT_WIKI_PAGE_BATCH_SIZE;\n\n /** Ingest Azure DevOps wiki pages in batches\n * @param wiki - The wiki to ingest pages from\n * @param pages - The list of pages to ingest from the wiki\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns Total number of documents ingested and sent for embedding from the wiki\n */\n const ingestWikiByPageBatch = async ({\n wiki,\n pages,\n saveDocumentsBatch,\n }: {\n wiki: WikiV2;\n pages: WikiPage[];\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'];\n }) => {\n logger.info(\n `Processing ${pages.length} pages from wiki \"${wiki.name}\" in batches of ${pagesBatchSize}`,\n );\n\n let totalDocumentsIngested = 0;\n\n // Process pages in batches to manage memory and performance\n\n // Calculate total number of batches\n const totalBatches = Math.ceil(pages.length / pagesBatchSize);\n\n // Process each batch\n for (\n let batchStart = 0;\n batchStart < pages.length;\n batchStart += pagesBatchSize\n ) {\n const batchEnd = Math.min(batchStart + pagesBatchSize, pages.length);\n const pagesBatch = pages.slice(batchStart, batchEnd);\n const batchNumber = Math.floor(batchStart / pagesBatchSize) + 1;\n\n logger.info(\n `Processing batch ${batchNumber}/${totalBatches} (${pagesBatch.length} pages) for wiki \"${wiki.name}\"`,\n );\n\n // Generate embedding documents for each page in the current batch\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < pagesBatch.length; index++) {\n const page = pagesBatch[index];\n const globalIndex = batchStart + index;\n\n const content = await azureDevOpsService.getWikiPageContent(\n wiki.id!,\n page.path!,\n );\n\n const completionStats = getProgressStats(globalIndex + 1, pages.length);\n\n logger.info(\n `Retrieved content for Azure DevOps page: \"${page.path}\" in wiki: \"${wiki.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of wiki]`,\n );\n\n // The API returns plain markdown text directly\n const pageContent = await streamToString(content);\n\n logger.debug(\n `Raw response for page \"${page.path}\" (length: ${pageContent.length})`,\n );\n // Use remoteUrl which points to the user-facing wiki page, not the API endpoint\n const pageUrl = page.remoteUrl || page.url!;\n\n // Check if we have actual content (not empty or just whitespace)\n if (!pageContent || pageContent.trim().length === 0) {\n logger.warn(\n `No content found for Azure DevOps page: \"${page.path}\" in wiki: \"${wiki.name}\". Skipping.`,\n );\n continue;\n }\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${wiki.id}:${page.path}`,\n url: pageUrl,\n organization: azureDevOpsService.organization,\n project: azureDevOpsService.project,\n wiki: wiki.name!,\n },\n content: pageContent,\n };\n\n logger.debug(\n `Created embedding document for Azure DevOps page: \"${page.path}\" in wiki: \"${wiki.name}\" content length: \"${document.content.length}\", page url: \"${document.metadata.url}\"`,\n );\n\n documents.push(document);\n }\n\n // Save the current batch of documents\n await saveDocumentsBatch(documents);\n\n totalDocumentsIngested += documents.length;\n\n logger.info(\n `Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for Azure DevOps wiki: ${wiki.name}`,\n );\n }\n\n return { totalDocumentsIngested };\n };\n\n /** Ingest Azure DevOps wikis in batches */\n const ingestWikisBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const wikisList = await azureDevOpsService.getWikis();\n\n if (wikisList.length === 0) {\n logger.warn('No wikis found in the Azure DevOps project');\n return;\n }\n\n // Filter wikis using matchers\n let wikisToIngest = wikisList;\n\n // If include matchers exist, only include wikis that match at least one\n if (includeMatchers.length > 0) {\n logger.info(\n `Include filter found. Only including wikis matching the following patterns for ingestion: ${includeMatchers\n .map(m => `'${m.value}'`)\n .join(', ')}`,\n );\n\n wikisToIngest = wikisToIngest.filter(wiki => {\n return includeMatchers.some(matcher => matcher.regex!.test(wiki.name!));\n });\n }\n\n // Apply exclusions\n if (excludeMatchers.length > 0) {\n logger.info(\n `Exclude filter found. Excluding wikis matching the following patterns from ingestion: ${excludeMatchers\n .map(m => `'${m.value}'`)\n .join(', ')}`,\n );\n\n const excludedWikis = wikisToIngest.filter(wiki => {\n return excludeMatchers.some(matcher => matcher.regex!.test(wiki.name!));\n });\n\n if (excludedWikis.length > 0) {\n logger.info(\n `Excluding wikis: ${excludedWikis.map(w => w.name).join(', ')}`,\n );\n }\n wikisToIngest = wikisToIngest.filter(wiki => {\n return !excludeMatchers.some(matcher =>\n matcher.regex!.test(wiki.name!),\n );\n });\n }\n\n if (wikisToIngest.length === 0) {\n logger.warn('No wikis found for ingestion after applying the filter');\n return;\n }\n\n logger.debug(\n `Wikis to ingest: ${wikisToIngest.map(w => w.name).join(', ')}`,\n );\n\n logger.info(`Ingesting ${wikisToIngest.length} wikis from Azure DevOps`);\n\n // Get items from each wiki and create documents to be embedded\n for (const wiki of wikisToIngest) {\n logger.info(`Beginning ingestion for wiki: ${wiki.name} (${wiki.id})`);\n\n // Get the pages to be ingested from the wiki based on the file types filter\n const pages = await azureDevOpsService.getWikiPages(wiki.id!);\n\n if (pages.length === 0) {\n logger.warn(\n `No pages found for ingestion in the Azure DevOps wiki ${wiki.name} (${wiki.id})`,\n );\n continue;\n }\n\n const { totalDocumentsIngested } = await ingestWikiByPageBatch({\n wiki,\n pages,\n saveDocumentsBatch,\n });\n\n if (totalDocumentsIngested === 0) {\n logger.warn(\n `No documents were ingested and sent for embedding from the Azure DevOps wiki ${wiki.name} (${wiki.id})`,\n );\n continue;\n }\n\n logger.info(\n `Wiki ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for Azure DevOps wiki: ${wiki.name}`,\n );\n }\n };\n\n return { ingestWikisBatch };\n};\n"],"names":["DEFAULT_WIKI_PAGE_BATCH_SIZE","getProgressStats","streamToString","MODULE_ID"],"mappings":";;;;;;;AAyBO,MAAM,qBAAqB,OAAO;AAAA,EACvC,MAAA;AAAA,EACA,MAAA;AAAA,EACA;AACF,CAAA,KAA2B;AAEzB,EAAA,MAAM,WAAA,GAAc,MAAA,CAAO,WAAA,CAEzB,yCAAyC,CAAA;AAQ3C,EAAA,MAAM,kBAAiC,EAAC;AACxC,EAAA,MAAM,kBAAiC,EAAC;AAExC,EAAA,IAAI,aAAa,OAAA,EAAS;AACxB,IAAA,KAAA,MAAW,MAAA,IAAU,YAAY,OAAA,EAAS;AACxC,MAAA,IAAI;AAEF,QAAA,MAAM,KAAA,GAAQ,IAAI,MAAA,CAAO,MAAA,CAAO,IAAI,CAAA;AACpC,QAAA,eAAA,CAAgB,IAAA,CAAK;AAAA,UACnB,OAAO,MAAA,CAAO,IAAA;AAAA,UACd;AAAA,SACD,CAAA;AAAA,MACH,SAAS,KAAA,EAAO;AACd,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,4CAAA,EAA+C,MAAA,CAAO,IAAI,CAAA,GAAA,EAAM,KAAK,CAAA;AAAA,SACvE;AACA,QAAA,MAAM,IAAI,KAAA;AAAA,UACR,CAAA,8BAAA,EAAiC,MAAA,CAAO,IAAI,CAAA,GAAA,EAAM,KAAK,CAAA;AAAA,SACzD;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,IAAI,aAAa,OAAA,EAAS;AACxB,IAAA,KAAA,MAAW,MAAA,IAAU,YAAY,OAAA,EAAS;AACxC,MAAA,IAAI;AAEF,QAAA,MAAM,KAAA,GAAQ,IAAI,MAAA,CAAO,MAAA,CAAO,IAAI,CAAA;AACpC,QAAA,eAAA,CAAgB,IAAA,CAAK;AAAA,UACnB,OAAO,MAAA,CAAO,IAAA;AAAA,UACd;AAAA,SACD,CAAA;AAAA,MACH,SAAS,KAAA,EAAO;AACd,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,4CAAA,EAA+C,MAAA,CAAO,IAAI,CAAA,GAAA,EAAM,KAAK,CAAA;AAAA,SACvE;AACA,QAAA,MAAM,IAAI,KAAA;AAAA,UACR,CAAA,8BAAA,EAAiC,MAAA,CAAO,IAAI,CAAA,GAAA,EAAM,KAAK,CAAA;AAAA,SACzD;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,EAAA,IAAI,eAAA,CAAgB,SAAS,CAAA,EAAG;AAC9B,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,sBAAA,EAAyB,eAAA,CACtB,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAA,EAAI,CAAA,CAAE,KAAK,CAAA,CAAA,CAAG,CAAA,CACvB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAAA,EACF;AACA,EAAA,IAAI,eAAA,CAAgB,SAAS,CAAA,EAAG;AAC9B,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,sBAAA,EAAyB,eAAA,CACtB,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAA,EAAI,CAAA,CAAE,KAAK,CAAA,CAAA,CAAG,CAAA,CACvB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAAA,EACF;AAGA,EAAA,MAAM,iBACJ,MAAA,CAAO,iBAAA;AAAA,IACL;AAAA,GACF,IAAKA,qDAAA;AAQP,EAAA,MAAM,wBAAwB,OAAO;AAAA,IACnC,IAAA;AAAA,IACA,KAAA;AAAA,IACA;AAAA,GACF,KAIM;AACJ,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,cAAc,KAAA,CAAM,MAAM,qBAAqB,IAAA,CAAK,IAAI,mBAAmB,cAAc,CAAA;AAAA,KAC3F;AAEA,IAAA,IAAI,sBAAA,GAAyB,CAAA;AAK7B,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,SAAS,cAAc,CAAA;AAG5D,IAAA,KAAA,IACM,aAAa,CAAA,EACjB,UAAA,GAAa,KAAA,CAAM,MAAA,EACnB,cAAc,cAAA,EACd;AACA,MAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,UAAA,GAAa,cAAA,EAAgB,MAAM,MAAM,CAAA;AACnE,MAAA,MAAM,UAAA,GAAa,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,QAAQ,CAAA;AACnD,MAAA,MAAM,WAAA,GAAc,IAAA,CAAK,KAAA,CAAM,UAAA,GAAa,cAAc,CAAA,GAAI,CAAA;AAE9D,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iBAAA,EAAoB,WAAW,CAAA,CAAA,EAAI,YAAY,KAAK,UAAA,CAAW,MAAM,CAAA,kBAAA,EAAqB,IAAA,CAAK,IAAI,CAAA,CAAA;AAAA,OACrG;AAGA,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,UAAA,CAAW,QAAQ,KAAA,EAAA,EAAS;AACtD,QAAA,MAAM,IAAA,GAAO,WAAW,KAAK,CAAA;AAC7B,QAAA,MAAM,cAAc,UAAA,GAAa,KAAA;AAEjC,QAAA,MAAM,OAAA,GAAU,MAAM,kBAAA,CAAmB,kBAAA;AAAA,UACvC,IAAA,CAAK,EAAA;AAAA,UACL,IAAA,CAAK;AAAA,SACP;AAEA,QAAA,MAAM,eAAA,GAAkBC,iDAAA,CAAiB,WAAA,GAAc,CAAA,EAAG,MAAM,MAAM,CAAA;AAEtE,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,0CAAA,EAA6C,IAAA,CAAK,IAAI,CAAA,YAAA,EAAe,KAAK,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,qBAAA;AAAA,SACjL;AAGA,QAAA,MAAM,WAAA,GAAc,MAAMC,6CAAA,CAAe,OAAO,CAAA;AAEhD,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,uBAAA,EAA0B,IAAA,CAAK,IAAI,CAAA,WAAA,EAAc,YAAY,MAAM,CAAA,CAAA;AAAA,SACrE;AAEA,QAAA,MAAM,OAAA,GAAU,IAAA,CAAK,SAAA,IAAa,IAAA,CAAK,GAAA;AAGvC,QAAA,IAAI,CAAC,WAAA,IAAe,WAAA,CAAY,IAAA,EAAK,CAAE,WAAW,CAAA,EAAG;AACnD,UAAA,MAAA,CAAO,IAAA;AAAA,YACL,CAAA,yCAAA,EAA4C,IAAA,CAAK,IAAI,CAAA,YAAA,EAAe,KAAK,IAAI,CAAA,YAAA;AAAA,WAC/E;AACA,UAAA;AAAA,QACF;AAEA,QAAA,MAAM,QAAA,GAA8B;AAAA,UAClC,QAAA,EAAU;AAAA,YACR,MAAA,EAAQC,kBAAA;AAAA,YACR,IAAI,CAAA,EAAG,IAAA,CAAK,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,YAC3B,GAAA,EAAK,OAAA;AAAA,YACL,cAAc,kBAAA,CAAmB,YAAA;AAAA,YACjC,SAAS,kBAAA,CAAmB,OAAA;AAAA,YAC5B,MAAM,IAAA,CAAK;AAAA,WACb;AAAA,UACA,OAAA,EAAS;AAAA,SACX;AAEA,QAAA,MAAA,CAAO,KAAA;AAAA,UACL,CAAA,mDAAA,EAAsD,IAAA,CAAK,IAAI,CAAA,YAAA,EAAe,IAAA,CAAK,IAAI,CAAA,mBAAA,EAAsB,QAAA,CAAS,OAAA,CAAQ,MAAM,CAAA,cAAA,EAAiB,QAAA,CAAS,SAAS,GAAG,CAAA,CAAA;AAAA,SAC5K;AAEA,QAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,MACzB;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,sBAAA,IAA0B,SAAA,CAAU,MAAA;AAEpC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,MAAA,EAAS,WAAW,CAAA,CAAA,EAAI,YAAY,eAAe,SAAA,CAAU,MAAM,CAAA,2CAAA,EAA8C,IAAA,CAAK,IAAI,CAAA;AAAA,OAC5H;AAAA,IACF;AAEA,IAAA,OAAO,EAAE,sBAAA,EAAuB;AAAA,EAClC,CAAA;AAGA,EAAA,MAAM,gBAAA,GAAmB,OACvB,kBAAA,KACG;AACH,IAAA,MAAM,SAAA,GAAY,MAAM,kBAAA,CAAmB,QAAA,EAAS;AAEpD,IAAA,IAAI,SAAA,CAAU,WAAW,CAAA,EAAG;AAC1B,MAAA,MAAA,CAAO,KAAK,4CAA4C,CAAA;AACxD,MAAA;AAAA,IACF;AAGA,IAAA,IAAI,aAAA,GAAgB,SAAA;AAGpB,IAAA,IAAI,eAAA,CAAgB,SAAS,CAAA,EAAG;AAC9B,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,0FAAA,EAA6F,eAAA,CAC1F,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAA,EAAI,CAAA,CAAE,KAAK,CAAA,CAAA,CAAG,CAAA,CACvB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,OACf;AAEA,MAAA,aAAA,GAAgB,aAAA,CAAc,OAAO,CAAA,IAAA,KAAQ;AAC3C,QAAA,OAAO,eAAA,CAAgB,KAAK,CAAA,OAAA,KAAW,OAAA,CAAQ,MAAO,IAAA,CAAK,IAAA,CAAK,IAAK,CAAC,CAAA;AAAA,MACxE,CAAC,CAAA;AAAA,IACH;AAGA,IAAA,IAAI,eAAA,CAAgB,SAAS,CAAA,EAAG;AAC9B,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,sFAAA,EAAyF,eAAA,CACtF,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAA,EAAI,CAAA,CAAE,KAAK,CAAA,CAAA,CAAG,CAAA,CACvB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,OACf;AAEA,MAAA,MAAM,aAAA,GAAgB,aAAA,CAAc,MAAA,CAAO,CAAA,IAAA,KAAQ;AACjD,QAAA,OAAO,eAAA,CAAgB,KAAK,CAAA,OAAA,KAAW,OAAA,CAAQ,MAAO,IAAA,CAAK,IAAA,CAAK,IAAK,CAAC,CAAA;AAAA,MACxE,CAAC,CAAA;AAED,MAAA,IAAI,aAAA,CAAc,SAAS,CAAA,EAAG;AAC5B,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,iBAAA,EAAoB,cAAc,GAAA,CAAI,CAAA,CAAA,KAAK,EAAE,IAAI,CAAA,CAAE,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,SAC/D;AAAA,MACF;AACA,MAAA,aAAA,GAAgB,aAAA,CAAc,OAAO,CAAA,IAAA,KAAQ;AAC3C,QAAA,OAAO,CAAC,eAAA,CAAgB,IAAA;AAAA,UAAK,CAAA,OAAA,KAC3B,OAAA,CAAQ,KAAA,CAAO,IAAA,CAAK,KAAK,IAAK;AAAA,SAChC;AAAA,MACF,CAAC,CAAA;AAAA,IACH;AAEA,IAAA,IAAI,aAAA,CAAc,WAAW,CAAA,EAAG;AAC9B,MAAA,MAAA,CAAO,KAAK,wDAAwD,CAAA;AACpE,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,KAAA;AAAA,MACL,CAAA,iBAAA,EAAoB,cAAc,GAAA,CAAI,CAAA,CAAA,KAAK,EAAE,IAAI,CAAA,CAAE,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KAC/D;AAEA,IAAA,MAAA,CAAO,IAAA,CAAK,CAAA,UAAA,EAAa,aAAA,CAAc,MAAM,CAAA,wBAAA,CAA0B,CAAA;AAGvE,IAAA,KAAA,MAAW,QAAQ,aAAA,EAAe;AAChC,MAAA,MAAA,CAAO,KAAK,CAAA,8BAAA,EAAiC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,IAAA,CAAK,EAAE,CAAA,CAAA,CAAG,CAAA;AAGrE,MAAA,MAAM,KAAA,GAAQ,MAAM,kBAAA,CAAmB,YAAA,CAAa,KAAK,EAAG,CAAA;AAE5D,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,sDAAA,EAAyD,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SAChF;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAM,EAAE,sBAAA,EAAuB,GAAI,MAAM,qBAAA,CAAsB;AAAA,QAC7D,IAAA;AAAA,QACA,KAAA;AAAA,QACA;AAAA,OACD,CAAA;AAED,MAAA,IAAI,2BAA2B,CAAA,EAAG;AAChC,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,6EAAA,EAAgF,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SACvG;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,0BAAA,EAA6B,sBAAsB,CAAA,wEAAA,EAA2E,IAAA,CAAK,IAAI,CAAA;AAAA,OACzI;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,OAAO,EAAE,gBAAA,EAAiB;AAC5B;;;;"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sweetoburrito/backstage-plugin-ai-assistant-backend-module-ingestor-azure-devops",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"license": "Apache-2.0",
|
|
5
5
|
"description": "The ingestor-azure-devops backend module for the ai-assistant plugin.",
|
|
6
6
|
"main": "dist/index.cjs.js",
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
"dependencies": {
|
|
31
31
|
"@backstage/backend-plugin-api": "^1.4.1",
|
|
32
32
|
"@sweetoburrito/backstage-plugin-ai-assistant-common": "^0.8.0",
|
|
33
|
-
"@sweetoburrito/backstage-plugin-ai-assistant-node": "^0.
|
|
33
|
+
"@sweetoburrito/backstage-plugin-ai-assistant-node": "^0.10.0",
|
|
34
34
|
"azure-devops-node-api": "^15.1.1"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|