@equinor/fusion-framework-cli-plugin-ai-index 2.1.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/dist/esm/bin/embed.js +122 -46
- package/dist/esm/bin/embed.js.map +1 -1
- package/dist/esm/bin/get-diff.js +5 -0
- package/dist/esm/bin/get-diff.js.map +1 -1
- package/dist/esm/version.js +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types/delete-command.options.d.ts +1 -0
- package/dist/types/embeddings-command.options.d.ts +1 -0
- package/dist/types/version.d.ts +1 -1
- package/package.json +6 -6
- package/src/bin/embed.ts +146 -63
- package/src/bin/get-diff.ts +5 -0
- package/src/version.ts +1 -1
|
@@ -11,6 +11,7 @@ export declare const DeleteOptionsSchema: z.ZodObject<{
|
|
|
11
11
|
clientId: z.ZodOptional<z.ZodString>;
|
|
12
12
|
chatModel: z.ZodOptional<z.ZodString>;
|
|
13
13
|
embedModel: z.ZodOptional<z.ZodString>;
|
|
14
|
+
debug: z.ZodDefault<z.ZodCoercedBoolean<unknown>>;
|
|
14
15
|
indexName: z.ZodString;
|
|
15
16
|
dryRun: z.ZodBoolean;
|
|
16
17
|
filter: z.ZodOptional<z.ZodString>;
|
|
@@ -10,6 +10,7 @@ export declare const CommandOptionsSchema: z.ZodObject<{
|
|
|
10
10
|
tenantId: z.ZodOptional<z.ZodString>;
|
|
11
11
|
clientId: z.ZodOptional<z.ZodString>;
|
|
12
12
|
chatModel: z.ZodOptional<z.ZodString>;
|
|
13
|
+
debug: z.ZodDefault<z.ZodCoercedBoolean<unknown>>;
|
|
13
14
|
embedModel: z.ZodString;
|
|
14
15
|
indexName: z.ZodString;
|
|
15
16
|
dryRun: z.ZodBoolean;
|
package/dist/types/version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const version = "
|
|
1
|
+
export declare const version = "3.0.1";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@equinor/fusion-framework-cli-plugin-ai-index",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.1",
|
|
4
4
|
"description": "AI indexing plugin for Fusion Framework CLI providing document embedding and chunking utilities",
|
|
5
5
|
"main": "dist/esm/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -53,18 +53,18 @@
|
|
|
53
53
|
"tree-sitter-typescript": "^0.23.2",
|
|
54
54
|
"ts-morph": "^28.0.0",
|
|
55
55
|
"zod": "^4.3.6",
|
|
56
|
+
"@equinor/fusion-framework-cli-plugin-ai-base": "4.0.0",
|
|
56
57
|
"@equinor/fusion-framework-module": "6.0.0",
|
|
57
|
-
"@equinor/fusion-
|
|
58
|
-
"@equinor/fusion-
|
|
59
|
-
"@equinor/fusion-framework-module-ai": "4.0.0"
|
|
58
|
+
"@equinor/fusion-framework-module-ai": "4.0.0",
|
|
59
|
+
"@equinor/fusion-imports": "2.0.0"
|
|
60
60
|
},
|
|
61
61
|
"peerDependencies": {
|
|
62
|
-
"@equinor/fusion-framework-cli": "^
|
|
62
|
+
"@equinor/fusion-framework-cli": "^15.0.1"
|
|
63
63
|
},
|
|
64
64
|
"devDependencies": {
|
|
65
65
|
"typescript": "^5.9.3",
|
|
66
66
|
"vitest": "^4.1.0",
|
|
67
|
-
"@equinor/fusion-framework-cli": "^
|
|
67
|
+
"@equinor/fusion-framework-cli": "^15.0.1"
|
|
68
68
|
},
|
|
69
69
|
"scripts": {
|
|
70
70
|
"build": "tsc -b",
|
package/src/bin/embed.ts
CHANGED
|
@@ -36,9 +36,16 @@ import { generateChunkId } from '../utils/generate-chunk-id.js';
|
|
|
36
36
|
/** Braille spinner frames (same as ora's default). */
|
|
37
37
|
const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
|
38
38
|
|
|
39
|
+
/** Whether the process is running in a non-interactive environment (CI). */
|
|
40
|
+
const IS_CI = !process.stdout.isTTY || Boolean(process.env.CI);
|
|
41
|
+
|
|
39
42
|
/**
|
|
40
43
|
* Manages a fixed block of sticky progress lines with per-line spinners.
|
|
41
44
|
* Each line can be updated independently without overwriting the others.
|
|
45
|
+
*
|
|
46
|
+
* In non-interactive environments (CI) the ANSI cursor-movement dance is
|
|
47
|
+
* replaced with simple `console.log` lines so the output is readable in
|
|
48
|
+
* plain-text log viewers.
|
|
42
49
|
* @internal
|
|
43
50
|
*/
|
|
44
51
|
class ProgressDisplay {
|
|
@@ -48,17 +55,27 @@ class ProgressDisplay {
|
|
|
48
55
|
private frame = 0;
|
|
49
56
|
private timer: ReturnType<typeof setInterval> | undefined;
|
|
50
57
|
|
|
58
|
+
/** Tracks last CI log time per line to throttle output. */
|
|
59
|
+
private lastCiLog: number[] = [];
|
|
60
|
+
|
|
61
|
+
/** Minimum interval (ms) between CI progress lines for the same line slot. */
|
|
62
|
+
private static CI_LOG_INTERVAL_MS = 15_000;
|
|
63
|
+
|
|
51
64
|
/** Register the line labels up front and print empty placeholders. */
|
|
52
65
|
start(count: number): void {
|
|
53
66
|
this.lines = new Array<string>(count).fill('');
|
|
54
67
|
this.spinning = new Array<boolean>(count).fill(false);
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
68
|
+
this.lastCiLog = new Array<number>(count).fill(0);
|
|
69
|
+
|
|
70
|
+
if (!IS_CI) {
|
|
71
|
+
// Print placeholder lines so the cursor block exists
|
|
72
|
+
for (let i = 0; i < count; i++) {
|
|
73
|
+
process.stdout.write('\n');
|
|
74
|
+
}
|
|
75
|
+
// Tick spinner at 80ms (same cadence as ora)
|
|
76
|
+
this.timer = setInterval(() => this.tick(), 80);
|
|
58
77
|
}
|
|
59
78
|
this.started = true;
|
|
60
|
-
// Tick spinner at 80ms (same cadence as ora)
|
|
61
|
-
this.timer = setInterval(() => this.tick(), 80);
|
|
62
79
|
}
|
|
63
80
|
|
|
64
81
|
/** Update a specific line (0-indexed) without touching the others. */
|
|
@@ -66,14 +83,27 @@ class ProgressDisplay {
|
|
|
66
83
|
if (!this.started) return;
|
|
67
84
|
this.lines[line] = message;
|
|
68
85
|
this.spinning[line] = true;
|
|
86
|
+
if (IS_CI) {
|
|
87
|
+
const now = Date.now();
|
|
88
|
+
if (now - this.lastCiLog[line] >= ProgressDisplay.CI_LOG_INTERVAL_MS) {
|
|
89
|
+
this.lastCiLog[line] = now;
|
|
90
|
+
console.log(`⏳ ${message}`);
|
|
91
|
+
}
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
69
94
|
this.render(line);
|
|
70
95
|
}
|
|
71
96
|
|
|
72
97
|
/** Mark a line as completed — stops its spinner and shows a checkmark. */
|
|
73
98
|
succeed(line: number, message: string): void {
|
|
74
99
|
if (!this.started) return;
|
|
75
|
-
|
|
100
|
+
const text = `✅ ${message}`;
|
|
101
|
+
this.lines[line] = text;
|
|
76
102
|
this.spinning[line] = false;
|
|
103
|
+
if (IS_CI) {
|
|
104
|
+
console.log(text);
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
77
107
|
this.render(line);
|
|
78
108
|
}
|
|
79
109
|
|
|
@@ -81,13 +111,15 @@ class ProgressDisplay {
|
|
|
81
111
|
clear(): void {
|
|
82
112
|
if (!this.started) return;
|
|
83
113
|
if (this.timer) clearInterval(this.timer);
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
114
|
+
if (!IS_CI) {
|
|
115
|
+
// Move up to the first progress line and clear each one
|
|
116
|
+
for (let i = 0; i < this.lines.length; i++) {
|
|
117
|
+
const linesUp = this.lines.length - i;
|
|
118
|
+
process.stdout.write(`\x1b[${linesUp}A\x1b[2K\r\x1b[${linesUp}B\r`);
|
|
119
|
+
}
|
|
120
|
+
// Move cursor up past the now-empty block
|
|
121
|
+
process.stdout.write(`\x1b[${this.lines.length}A\r`);
|
|
88
122
|
}
|
|
89
|
-
// Move cursor up past the now-empty block
|
|
90
|
-
process.stdout.write(`\x1b[${this.lines.length}A\r`);
|
|
91
123
|
this.started = false;
|
|
92
124
|
}
|
|
93
125
|
|
|
@@ -129,18 +161,38 @@ const GIT_CONCURRENCY = 20;
|
|
|
129
161
|
/** Maximum parallel upsert requests to the vector store. */
|
|
130
162
|
const UPSERT_CONCURRENCY = 10;
|
|
131
163
|
|
|
132
|
-
/**
|
|
133
|
-
|
|
164
|
+
/**
|
|
165
|
+
* Number of texts to embed per API request.
|
|
166
|
+
*
|
|
167
|
+
* Azure OpenAI accepts up to 2 048 inputs per embedding call.
|
|
168
|
+
* LangChain's `batchSize` default is 1 (!) so we also set it on the
|
|
169
|
+
* client constructor. This outer batch controls how many documents
|
|
170
|
+
* are grouped before handing them to the embed client.
|
|
171
|
+
*/
|
|
172
|
+
const EMBED_BATCH_SIZE = 500;
|
|
134
173
|
|
|
135
|
-
/**
|
|
136
|
-
|
|
174
|
+
/**
|
|
175
|
+
* Number of concurrent embedding API requests in flight.
|
|
176
|
+
*
|
|
177
|
+
* Each request now carries EMBED_BATCH_SIZE texts in a single HTTP call
|
|
178
|
+
* (LangChain batchSize is aligned), so 3 concurrent requests should
|
|
179
|
+
* saturate most Azure OpenAI TPM quotas without triggering rate limits.
|
|
180
|
+
*/
|
|
181
|
+
const EMBED_BATCH_CONCURRENCY = 3;
|
|
137
182
|
|
|
138
183
|
/**
|
|
139
184
|
* Maximum time (ms) to wait before flushing a partial embedding batch.
|
|
140
|
-
*
|
|
141
|
-
*
|
|
185
|
+
*
|
|
186
|
+
* A longer window lets more documents accumulate before triggering an
|
|
187
|
+
* HTTP call, which drastically cuts the number of round-trips when
|
|
188
|
+
* upstream (metadata enrichment) feeds documents slowly.
|
|
189
|
+
*
|
|
190
|
+
* Full batches (EMBED_BATCH_SIZE) are emitted immediately regardless of
|
|
191
|
+
* the timer, and `bufferTime` flushes any remainder the instant the
|
|
192
|
+
* source stream completes — so a large value only affects mid-stream
|
|
193
|
+
* partial batches, not tail latency.
|
|
142
194
|
*/
|
|
143
|
-
const EMBED_BUFFER_FLUSH_MS =
|
|
195
|
+
const EMBED_BUFFER_FLUSH_MS = 10_000;
|
|
144
196
|
|
|
145
197
|
/** Maximum retry attempts for transient / rate-limit errors per chunk. */
|
|
146
198
|
const MAX_RETRIES = 4;
|
|
@@ -152,9 +204,24 @@ const MAX_RETRIES = 4;
|
|
|
152
204
|
*/
|
|
153
205
|
export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
154
206
|
const { framework, options, config, filePatterns } = binOptions;
|
|
207
|
+
const debug = options.debug ?? false;
|
|
155
208
|
|
|
156
209
|
console.log(`📇 Index: ${options.indexName}`);
|
|
157
210
|
|
|
211
|
+
if (debug) {
|
|
212
|
+
console.debug('[debug] Embed model:', options.embedModel);
|
|
213
|
+
console.debug('[debug] File patterns:', filePatterns);
|
|
214
|
+
console.debug(
|
|
215
|
+
'[debug] Allowed patterns:',
|
|
216
|
+
config.index?.patterns ?? ['**/*.ts', '**/*.tsx', '**/*.md', '**/*.mdx'],
|
|
217
|
+
);
|
|
218
|
+
console.debug('[debug] Raw patterns:', config.index?.rawPatterns ?? []);
|
|
219
|
+
console.debug('[debug] Ignore patterns:', config.index?.ignore ?? defaultIgnore);
|
|
220
|
+
console.debug('[debug] Diff mode:', options.diff);
|
|
221
|
+
console.debug('[debug] Dry run:', options.dryRun);
|
|
222
|
+
console.debug('[debug] Clean:', options.clean);
|
|
223
|
+
}
|
|
224
|
+
|
|
158
225
|
const progress = new ProgressDisplay();
|
|
159
226
|
|
|
160
227
|
// Handle clean operation (destructive - deletes all existing documents)
|
|
@@ -227,6 +294,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
227
294
|
}),
|
|
228
295
|
filter((file) => {
|
|
229
296
|
const matches = multimatch(file.relativePath, allowedFilePatterns);
|
|
297
|
+
if (debug && matches.length === 0) {
|
|
298
|
+
console.debug('[debug] Skipped (no pattern match):', file.relativePath);
|
|
299
|
+
}
|
|
230
300
|
return matches.length > 0;
|
|
231
301
|
}),
|
|
232
302
|
tap((file) => {
|
|
@@ -283,6 +353,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
283
353
|
mergeMap(async (file) => {
|
|
284
354
|
const documents = await parseMarkdownFile(file);
|
|
285
355
|
docCount++;
|
|
356
|
+
if (debug) {
|
|
357
|
+
console.debug(`[debug] Markdown ${file.relativePath} → ${documents.length} chunk(s)`);
|
|
358
|
+
}
|
|
286
359
|
progress.update(LINE_PARSE, `📄 Parsing [${docCount}] ${file.relativePath}`);
|
|
287
360
|
return { status: file.status, documents };
|
|
288
361
|
}),
|
|
@@ -294,6 +367,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
294
367
|
map((file) => {
|
|
295
368
|
const documents = parseTsDocFromFileSync(file);
|
|
296
369
|
docCount++;
|
|
370
|
+
if (debug) {
|
|
371
|
+
console.debug(`[debug] TypeScript ${file.relativePath} → ${documents.length} chunk(s)`);
|
|
372
|
+
}
|
|
297
373
|
progress.update(LINE_PARSE, `📄 Parsing [${docCount}] ${file.relativePath}`);
|
|
298
374
|
return { status: file.status, documents };
|
|
299
375
|
}),
|
|
@@ -336,51 +412,52 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
336
412
|
// whichever comes first — prevents upstream starvation from blocking concurrency
|
|
337
413
|
bufferTime(EMBED_BUFFER_FLUSH_MS, null, EMBED_BATCH_SIZE),
|
|
338
414
|
filter((batch) => batch.length > 0),
|
|
339
|
-
mergeMap(
|
|
340
|
-
(
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
const retryAfterSec =
|
|
354
|
-
error?.response?.headers?.get?.('retry-after') ??
|
|
355
|
-
error?.responseHeaders?.['retry-after'];
|
|
356
|
-
const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
|
|
357
|
-
|
|
358
|
-
const backoffMs = 2 ** retryIndex * 1000;
|
|
359
|
-
const delayMs = Math.max(backoffMs, retryAfterMs);
|
|
360
|
-
|
|
361
|
-
console.warn(
|
|
362
|
-
`\n⏳ Retry ${retryIndex}/${MAX_RETRIES} for batch of ${batch.length} in ${delayMs}ms`,
|
|
363
|
-
);
|
|
364
|
-
return timer(delayMs);
|
|
365
|
-
},
|
|
366
|
-
}),
|
|
367
|
-
map((allEmbeddings) => {
|
|
368
|
-
return batch.map((document, i) => {
|
|
369
|
-
embeddedCount++;
|
|
370
|
-
const total = metadataDone ? metadataCount : 0;
|
|
371
|
-
const pct = total > 0 ? ` ${Math.round((embeddedCount / total) * 100)}%` : '';
|
|
372
|
-
const denominator = total > 0 ? `/${total}` : '';
|
|
373
|
-
progress.update(
|
|
374
|
-
LINE_EMBED,
|
|
375
|
-
`🧠 Embedding [${embeddedCount}${denominator}]${pct} — ${document.metadata.source}`,
|
|
415
|
+
mergeMap((batch) => {
|
|
416
|
+
if (debug) {
|
|
417
|
+
console.debug(`[debug] Embedding batch of ${batch.length} documents`);
|
|
418
|
+
}
|
|
419
|
+
return from(embeddingService.embedDocuments(batch.map((d) => d.pageContent))).pipe(
|
|
420
|
+
retry({
|
|
421
|
+
count: MAX_RETRIES,
|
|
422
|
+
delay: (error, retryIndex) => {
|
|
423
|
+
// Auth errors are terminal — abort immediately with actionable message
|
|
424
|
+
if (error?.name === 'NoAccountsError') {
|
|
425
|
+
console.error(
|
|
426
|
+
'\n🔒 Authentication expired. Run `ffc auth login` then retry with `--diff`.',
|
|
376
427
|
);
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
428
|
+
throw error;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
const retryAfterSec =
|
|
432
|
+
error?.response?.headers?.get?.('retry-after') ??
|
|
433
|
+
error?.responseHeaders?.['retry-after'];
|
|
434
|
+
const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
|
|
435
|
+
|
|
436
|
+
const backoffMs = 2 ** retryIndex * 1000;
|
|
437
|
+
const delayMs = Math.max(backoffMs, retryAfterMs);
|
|
438
|
+
|
|
439
|
+
console.warn(
|
|
440
|
+
`\n⏳ Retry ${retryIndex}/${MAX_RETRIES} for batch of ${batch.length} in ${delayMs}ms`,
|
|
441
|
+
);
|
|
442
|
+
return timer(delayMs);
|
|
443
|
+
},
|
|
444
|
+
}),
|
|
445
|
+
map((allEmbeddings) => {
|
|
446
|
+
return batch.map((document, i) => {
|
|
447
|
+
embeddedCount++;
|
|
448
|
+
const total = metadataDone ? metadataCount : 0;
|
|
449
|
+
const pct = total > 0 ? ` ${Math.round((embeddedCount / total) * 100)}%` : '';
|
|
450
|
+
const denominator = total > 0 ? `/${total}` : '';
|
|
451
|
+
progress.update(
|
|
452
|
+
LINE_EMBED,
|
|
453
|
+
`🧠 Embedding [${embeddedCount}${denominator}]${pct} — ${document.metadata.source}`,
|
|
454
|
+
);
|
|
455
|
+
const metadata = { ...document.metadata, embedding: allEmbeddings[i] };
|
|
456
|
+
return { ...document, metadata };
|
|
457
|
+
});
|
|
458
|
+
}),
|
|
459
|
+
);
|
|
460
|
+
}, EMBED_BATCH_CONCURRENCY),
|
|
384
461
|
finalize(() => {
|
|
385
462
|
embeddingDone = true;
|
|
386
463
|
progress.succeed(LINE_EMBED, `🧠 Embedded ${embeddedCount} documents`);
|
|
@@ -398,6 +475,12 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
398
475
|
return undefined;
|
|
399
476
|
}
|
|
400
477
|
if (!options.dryRun) {
|
|
478
|
+
if (debug) {
|
|
479
|
+
console.debug(
|
|
480
|
+
`[debug] Upserting batch of ${documents.length} documents:`,
|
|
481
|
+
documents.map((d) => d.id),
|
|
482
|
+
);
|
|
483
|
+
}
|
|
401
484
|
await vectorStoreService.addDocuments(documents);
|
|
402
485
|
}
|
|
403
486
|
return {
|
package/src/bin/get-diff.ts
CHANGED
|
@@ -25,6 +25,11 @@ export async function getDiff(options: CommandOptions): Promise<ChangedFile[]> {
|
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
console.log(`📝 Found ${changedFiles.length} changed files matching patterns`);
|
|
28
|
+
if (options.debug) {
|
|
29
|
+
for (const file of changedFiles) {
|
|
30
|
+
console.debug(`[debug] ${file.status}: ${file.filepath}`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
28
33
|
return changedFiles;
|
|
29
34
|
} catch (error) {
|
|
30
35
|
console.error(`❌ Git diff error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
package/src/version.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// Generated by genversion.
|
|
2
|
-
export const version = '
|
|
2
|
+
export const version = '3.0.1';
|