@equinor/fusion-framework-cli-plugin-ai-index 2.1.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/esm/bin/embed.js +113 -44
- package/dist/esm/bin/embed.js.map +1 -1
- package/dist/esm/bin/get-diff.js +5 -0
- package/dist/esm/bin/get-diff.js.map +1 -1
- package/dist/esm/version.js +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types/delete-command.options.d.ts +1 -0
- package/dist/types/embeddings-command.options.d.ts +1 -0
- package/dist/types/version.d.ts +1 -1
- package/package.json +6 -6
- package/src/bin/embed.ts +137 -61
- package/src/bin/get-diff.ts +5 -0
- package/src/version.ts +1 -1
|
@@ -11,6 +11,7 @@ export declare const DeleteOptionsSchema: z.ZodObject<{
|
|
|
11
11
|
clientId: z.ZodOptional<z.ZodString>;
|
|
12
12
|
chatModel: z.ZodOptional<z.ZodString>;
|
|
13
13
|
embedModel: z.ZodOptional<z.ZodString>;
|
|
14
|
+
debug: z.ZodDefault<z.ZodCoercedBoolean<unknown>>;
|
|
14
15
|
indexName: z.ZodString;
|
|
15
16
|
dryRun: z.ZodBoolean;
|
|
16
17
|
filter: z.ZodOptional<z.ZodString>;
|
|
@@ -10,6 +10,7 @@ export declare const CommandOptionsSchema: z.ZodObject<{
|
|
|
10
10
|
tenantId: z.ZodOptional<z.ZodString>;
|
|
11
11
|
clientId: z.ZodOptional<z.ZodString>;
|
|
12
12
|
chatModel: z.ZodOptional<z.ZodString>;
|
|
13
|
+
debug: z.ZodDefault<z.ZodCoercedBoolean<unknown>>;
|
|
13
14
|
embedModel: z.ZodString;
|
|
14
15
|
indexName: z.ZodString;
|
|
15
16
|
dryRun: z.ZodBoolean;
|
package/dist/types/version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const version = "
|
|
1
|
+
export declare const version = "3.0.0";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@equinor/fusion-framework-cli-plugin-ai-index",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.0",
|
|
4
4
|
"description": "AI indexing plugin for Fusion Framework CLI providing document embedding and chunking utilities",
|
|
5
5
|
"main": "dist/esm/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -53,18 +53,18 @@
|
|
|
53
53
|
"tree-sitter-typescript": "^0.23.2",
|
|
54
54
|
"ts-morph": "^28.0.0",
|
|
55
55
|
"zod": "^4.3.6",
|
|
56
|
+
"@equinor/fusion-framework-cli-plugin-ai-base": "4.0.0",
|
|
57
|
+
"@equinor/fusion-framework-module-ai": "4.0.0",
|
|
56
58
|
"@equinor/fusion-framework-module": "6.0.0",
|
|
57
|
-
"@equinor/fusion-imports": "2.0.0"
|
|
58
|
-
"@equinor/fusion-framework-cli-plugin-ai-base": "3.0.0",
|
|
59
|
-
"@equinor/fusion-framework-module-ai": "4.0.0"
|
|
59
|
+
"@equinor/fusion-imports": "2.0.0"
|
|
60
60
|
},
|
|
61
61
|
"peerDependencies": {
|
|
62
|
-
"@equinor/fusion-framework-cli": "^
|
|
62
|
+
"@equinor/fusion-framework-cli": "^15.0.0"
|
|
63
63
|
},
|
|
64
64
|
"devDependencies": {
|
|
65
65
|
"typescript": "^5.9.3",
|
|
66
66
|
"vitest": "^4.1.0",
|
|
67
|
-
"@equinor/fusion-framework-cli": "^
|
|
67
|
+
"@equinor/fusion-framework-cli": "^15.0.0"
|
|
68
68
|
},
|
|
69
69
|
"scripts": {
|
|
70
70
|
"build": "tsc -b",
|
package/src/bin/embed.ts
CHANGED
|
@@ -36,9 +36,16 @@ import { generateChunkId } from '../utils/generate-chunk-id.js';
|
|
|
36
36
|
/** Braille spinner frames (same as ora's default). */
|
|
37
37
|
const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
|
38
38
|
|
|
39
|
+
/** Whether the process is running in a non-interactive environment (CI). */
|
|
40
|
+
const IS_CI = !process.stdout.isTTY || Boolean(process.env.CI);
|
|
41
|
+
|
|
39
42
|
/**
|
|
40
43
|
* Manages a fixed block of sticky progress lines with per-line spinners.
|
|
41
44
|
* Each line can be updated independently without overwriting the others.
|
|
45
|
+
*
|
|
46
|
+
* In non-interactive environments (CI) the ANSI cursor-movement dance is
|
|
47
|
+
* replaced with simple `console.log` lines so the output is readable in
|
|
48
|
+
* plain-text log viewers.
|
|
42
49
|
* @internal
|
|
43
50
|
*/
|
|
44
51
|
class ProgressDisplay {
|
|
@@ -48,17 +55,27 @@ class ProgressDisplay {
|
|
|
48
55
|
private frame = 0;
|
|
49
56
|
private timer: ReturnType<typeof setInterval> | undefined;
|
|
50
57
|
|
|
58
|
+
/** Tracks last CI log time per line to throttle output. */
|
|
59
|
+
private lastCiLog: number[] = [];
|
|
60
|
+
|
|
61
|
+
/** Minimum interval (ms) between CI progress lines for the same line slot. */
|
|
62
|
+
private static CI_LOG_INTERVAL_MS = 15_000;
|
|
63
|
+
|
|
51
64
|
/** Register the line labels up front and print empty placeholders. */
|
|
52
65
|
start(count: number): void {
|
|
53
66
|
this.lines = new Array<string>(count).fill('');
|
|
54
67
|
this.spinning = new Array<boolean>(count).fill(false);
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
68
|
+
this.lastCiLog = new Array<number>(count).fill(0);
|
|
69
|
+
|
|
70
|
+
if (!IS_CI) {
|
|
71
|
+
// Print placeholder lines so the cursor block exists
|
|
72
|
+
for (let i = 0; i < count; i++) {
|
|
73
|
+
process.stdout.write('\n');
|
|
74
|
+
}
|
|
75
|
+
// Tick spinner at 80ms (same cadence as ora)
|
|
76
|
+
this.timer = setInterval(() => this.tick(), 80);
|
|
58
77
|
}
|
|
59
78
|
this.started = true;
|
|
60
|
-
// Tick spinner at 80ms (same cadence as ora)
|
|
61
|
-
this.timer = setInterval(() => this.tick(), 80);
|
|
62
79
|
}
|
|
63
80
|
|
|
64
81
|
/** Update a specific line (0-indexed) without touching the others. */
|
|
@@ -66,14 +83,27 @@ class ProgressDisplay {
|
|
|
66
83
|
if (!this.started) return;
|
|
67
84
|
this.lines[line] = message;
|
|
68
85
|
this.spinning[line] = true;
|
|
86
|
+
if (IS_CI) {
|
|
87
|
+
const now = Date.now();
|
|
88
|
+
if (now - this.lastCiLog[line] >= ProgressDisplay.CI_LOG_INTERVAL_MS) {
|
|
89
|
+
this.lastCiLog[line] = now;
|
|
90
|
+
console.log(`⏳ ${message}`);
|
|
91
|
+
}
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
69
94
|
this.render(line);
|
|
70
95
|
}
|
|
71
96
|
|
|
72
97
|
/** Mark a line as completed — stops its spinner and shows a checkmark. */
|
|
73
98
|
succeed(line: number, message: string): void {
|
|
74
99
|
if (!this.started) return;
|
|
75
|
-
|
|
100
|
+
const text = `✅ ${message}`;
|
|
101
|
+
this.lines[line] = text;
|
|
76
102
|
this.spinning[line] = false;
|
|
103
|
+
if (IS_CI) {
|
|
104
|
+
console.log(text);
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
77
107
|
this.render(line);
|
|
78
108
|
}
|
|
79
109
|
|
|
@@ -81,13 +111,15 @@ class ProgressDisplay {
|
|
|
81
111
|
clear(): void {
|
|
82
112
|
if (!this.started) return;
|
|
83
113
|
if (this.timer) clearInterval(this.timer);
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
114
|
+
if (!IS_CI) {
|
|
115
|
+
// Move up to the first progress line and clear each one
|
|
116
|
+
for (let i = 0; i < this.lines.length; i++) {
|
|
117
|
+
const linesUp = this.lines.length - i;
|
|
118
|
+
process.stdout.write(`\x1b[${linesUp}A\x1b[2K\r\x1b[${linesUp}B\r`);
|
|
119
|
+
}
|
|
120
|
+
// Move cursor up past the now-empty block
|
|
121
|
+
process.stdout.write(`\x1b[${this.lines.length}A\r`);
|
|
88
122
|
}
|
|
89
|
-
// Move cursor up past the now-empty block
|
|
90
|
-
process.stdout.write(`\x1b[${this.lines.length}A\r`);
|
|
91
123
|
this.started = false;
|
|
92
124
|
}
|
|
93
125
|
|
|
@@ -129,18 +161,31 @@ const GIT_CONCURRENCY = 20;
|
|
|
129
161
|
/** Maximum parallel upsert requests to the vector store. */
|
|
130
162
|
const UPSERT_CONCURRENCY = 10;
|
|
131
163
|
|
|
132
|
-
/**
|
|
133
|
-
|
|
164
|
+
/**
|
|
165
|
+
* Number of texts to embed per API request.
|
|
166
|
+
*
|
|
167
|
+
* Azure OpenAI accepts up to 2 048 inputs per embedding call.
|
|
168
|
+
* LangChain's `batchSize` default is 1 (!) so we also set it on the
|
|
169
|
+
* client constructor. This outer batch controls how many documents
|
|
170
|
+
* are grouped before handing them to the embed client.
|
|
171
|
+
*/
|
|
172
|
+
const EMBED_BATCH_SIZE = 500;
|
|
134
173
|
|
|
135
|
-
/**
|
|
136
|
-
|
|
174
|
+
/**
|
|
175
|
+
* Number of concurrent embedding API requests in flight.
|
|
176
|
+
*
|
|
177
|
+
* Each request now carries EMBED_BATCH_SIZE texts in a single HTTP call
|
|
178
|
+
* (LangChain batchSize is aligned), so 2 concurrent requests already
|
|
179
|
+
* saturate most Azure OpenAI TPM quotas.
|
|
180
|
+
*/
|
|
181
|
+
const EMBED_BATCH_CONCURRENCY = 2;
|
|
137
182
|
|
|
138
183
|
/**
|
|
139
184
|
* Maximum time (ms) to wait before flushing a partial embedding batch.
|
|
140
185
|
* Without this, `bufferCount` waits indefinitely for a full batch, which
|
|
141
186
|
* starves `mergeMap` concurrency when upstream document throughput is slow.
|
|
142
187
|
*/
|
|
143
|
-
const EMBED_BUFFER_FLUSH_MS =
|
|
188
|
+
const EMBED_BUFFER_FLUSH_MS = 500;
|
|
144
189
|
|
|
145
190
|
/** Maximum retry attempts for transient / rate-limit errors per chunk. */
|
|
146
191
|
const MAX_RETRIES = 4;
|
|
@@ -152,9 +197,24 @@ const MAX_RETRIES = 4;
|
|
|
152
197
|
*/
|
|
153
198
|
export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
154
199
|
const { framework, options, config, filePatterns } = binOptions;
|
|
200
|
+
const debug = options.debug ?? false;
|
|
155
201
|
|
|
156
202
|
console.log(`📇 Index: ${options.indexName}`);
|
|
157
203
|
|
|
204
|
+
if (debug) {
|
|
205
|
+
console.debug('[debug] Embed model:', options.embedModel);
|
|
206
|
+
console.debug('[debug] File patterns:', filePatterns);
|
|
207
|
+
console.debug(
|
|
208
|
+
'[debug] Allowed patterns:',
|
|
209
|
+
config.index?.patterns ?? ['**/*.ts', '**/*.tsx', '**/*.md', '**/*.mdx'],
|
|
210
|
+
);
|
|
211
|
+
console.debug('[debug] Raw patterns:', config.index?.rawPatterns ?? []);
|
|
212
|
+
console.debug('[debug] Ignore patterns:', config.index?.ignore ?? defaultIgnore);
|
|
213
|
+
console.debug('[debug] Diff mode:', options.diff);
|
|
214
|
+
console.debug('[debug] Dry run:', options.dryRun);
|
|
215
|
+
console.debug('[debug] Clean:', options.clean);
|
|
216
|
+
}
|
|
217
|
+
|
|
158
218
|
const progress = new ProgressDisplay();
|
|
159
219
|
|
|
160
220
|
// Handle clean operation (destructive - deletes all existing documents)
|
|
@@ -227,6 +287,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
227
287
|
}),
|
|
228
288
|
filter((file) => {
|
|
229
289
|
const matches = multimatch(file.relativePath, allowedFilePatterns);
|
|
290
|
+
if (debug && matches.length === 0) {
|
|
291
|
+
console.debug('[debug] Skipped (no pattern match):', file.relativePath);
|
|
292
|
+
}
|
|
230
293
|
return matches.length > 0;
|
|
231
294
|
}),
|
|
232
295
|
tap((file) => {
|
|
@@ -283,6 +346,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
283
346
|
mergeMap(async (file) => {
|
|
284
347
|
const documents = await parseMarkdownFile(file);
|
|
285
348
|
docCount++;
|
|
349
|
+
if (debug) {
|
|
350
|
+
console.debug(`[debug] Markdown ${file.relativePath} → ${documents.length} chunk(s)`);
|
|
351
|
+
}
|
|
286
352
|
progress.update(LINE_PARSE, `📄 Parsing [${docCount}] ${file.relativePath}`);
|
|
287
353
|
return { status: file.status, documents };
|
|
288
354
|
}),
|
|
@@ -294,6 +360,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
294
360
|
map((file) => {
|
|
295
361
|
const documents = parseTsDocFromFileSync(file);
|
|
296
362
|
docCount++;
|
|
363
|
+
if (debug) {
|
|
364
|
+
console.debug(`[debug] TypeScript ${file.relativePath} → ${documents.length} chunk(s)`);
|
|
365
|
+
}
|
|
297
366
|
progress.update(LINE_PARSE, `📄 Parsing [${docCount}] ${file.relativePath}`);
|
|
298
367
|
return { status: file.status, documents };
|
|
299
368
|
}),
|
|
@@ -336,51 +405,52 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
336
405
|
// whichever comes first — prevents upstream starvation from blocking concurrency
|
|
337
406
|
bufferTime(EMBED_BUFFER_FLUSH_MS, null, EMBED_BATCH_SIZE),
|
|
338
407
|
filter((batch) => batch.length > 0),
|
|
339
|
-
mergeMap(
|
|
340
|
-
(
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
const retryAfterSec =
|
|
354
|
-
error?.response?.headers?.get?.('retry-after') ??
|
|
355
|
-
error?.responseHeaders?.['retry-after'];
|
|
356
|
-
const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
|
|
357
|
-
|
|
358
|
-
const backoffMs = 2 ** retryIndex * 1000;
|
|
359
|
-
const delayMs = Math.max(backoffMs, retryAfterMs);
|
|
360
|
-
|
|
361
|
-
console.warn(
|
|
362
|
-
`\n⏳ Retry ${retryIndex}/${MAX_RETRIES} for batch of ${batch.length} in ${delayMs}ms`,
|
|
363
|
-
);
|
|
364
|
-
return timer(delayMs);
|
|
365
|
-
},
|
|
366
|
-
}),
|
|
367
|
-
map((allEmbeddings) => {
|
|
368
|
-
return batch.map((document, i) => {
|
|
369
|
-
embeddedCount++;
|
|
370
|
-
const total = metadataDone ? metadataCount : 0;
|
|
371
|
-
const pct = total > 0 ? ` ${Math.round((embeddedCount / total) * 100)}%` : '';
|
|
372
|
-
const denominator = total > 0 ? `/${total}` : '';
|
|
373
|
-
progress.update(
|
|
374
|
-
LINE_EMBED,
|
|
375
|
-
`🧠 Embedding [${embeddedCount}${denominator}]${pct} — ${document.metadata.source}`,
|
|
408
|
+
mergeMap((batch) => {
|
|
409
|
+
if (debug) {
|
|
410
|
+
console.debug(`[debug] Embedding batch of ${batch.length} documents`);
|
|
411
|
+
}
|
|
412
|
+
return from(embeddingService.embedDocuments(batch.map((d) => d.pageContent))).pipe(
|
|
413
|
+
retry({
|
|
414
|
+
count: MAX_RETRIES,
|
|
415
|
+
delay: (error, retryIndex) => {
|
|
416
|
+
// Auth errors are terminal — abort immediately with actionable message
|
|
417
|
+
if (error?.name === 'NoAccountsError') {
|
|
418
|
+
console.error(
|
|
419
|
+
'\n🔒 Authentication expired. Run `ffc auth login` then retry with `--diff`.',
|
|
376
420
|
);
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
421
|
+
throw error;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const retryAfterSec =
|
|
425
|
+
error?.response?.headers?.get?.('retry-after') ??
|
|
426
|
+
error?.responseHeaders?.['retry-after'];
|
|
427
|
+
const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
|
|
428
|
+
|
|
429
|
+
const backoffMs = 2 ** retryIndex * 1000;
|
|
430
|
+
const delayMs = Math.max(backoffMs, retryAfterMs);
|
|
431
|
+
|
|
432
|
+
console.warn(
|
|
433
|
+
`\n⏳ Retry ${retryIndex}/${MAX_RETRIES} for batch of ${batch.length} in ${delayMs}ms`,
|
|
434
|
+
);
|
|
435
|
+
return timer(delayMs);
|
|
436
|
+
},
|
|
437
|
+
}),
|
|
438
|
+
map((allEmbeddings) => {
|
|
439
|
+
return batch.map((document, i) => {
|
|
440
|
+
embeddedCount++;
|
|
441
|
+
const total = metadataDone ? metadataCount : 0;
|
|
442
|
+
const pct = total > 0 ? ` ${Math.round((embeddedCount / total) * 100)}%` : '';
|
|
443
|
+
const denominator = total > 0 ? `/${total}` : '';
|
|
444
|
+
progress.update(
|
|
445
|
+
LINE_EMBED,
|
|
446
|
+
`🧠 Embedding [${embeddedCount}${denominator}]${pct} — ${document.metadata.source}`,
|
|
447
|
+
);
|
|
448
|
+
const metadata = { ...document.metadata, embedding: allEmbeddings[i] };
|
|
449
|
+
return { ...document, metadata };
|
|
450
|
+
});
|
|
451
|
+
}),
|
|
452
|
+
);
|
|
453
|
+
}, EMBED_BATCH_CONCURRENCY),
|
|
384
454
|
finalize(() => {
|
|
385
455
|
embeddingDone = true;
|
|
386
456
|
progress.succeed(LINE_EMBED, `🧠 Embedded ${embeddedCount} documents`);
|
|
@@ -398,6 +468,12 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
398
468
|
return undefined;
|
|
399
469
|
}
|
|
400
470
|
if (!options.dryRun) {
|
|
471
|
+
if (debug) {
|
|
472
|
+
console.debug(
|
|
473
|
+
`[debug] Upserting batch of ${documents.length} documents:`,
|
|
474
|
+
documents.map((d) => d.id),
|
|
475
|
+
);
|
|
476
|
+
}
|
|
401
477
|
await vectorStoreService.addDocuments(documents);
|
|
402
478
|
}
|
|
403
479
|
return {
|
package/src/bin/get-diff.ts
CHANGED
|
@@ -25,6 +25,11 @@ export async function getDiff(options: CommandOptions): Promise<ChangedFile[]> {
|
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
console.log(`📝 Found ${changedFiles.length} changed files matching patterns`);
|
|
28
|
+
if (options.debug) {
|
|
29
|
+
for (const file of changedFiles) {
|
|
30
|
+
console.debug(`[debug] ${file.status}: ${file.filepath}`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
28
33
|
return changedFiles;
|
|
29
34
|
} catch (error) {
|
|
30
35
|
console.error(`❌ Git diff error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
package/src/version.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
// Generated by genversion.
|
|
2
|
-
export const version = '
|
|
2
|
+
export const version = '3.0.0';
|