@equinor/fusion-framework-cli-plugin-ai-index 2.1.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ export declare const DeleteOptionsSchema: z.ZodObject<{
11
11
  clientId: z.ZodOptional<z.ZodString>;
12
12
  chatModel: z.ZodOptional<z.ZodString>;
13
13
  embedModel: z.ZodOptional<z.ZodString>;
14
+ debug: z.ZodDefault<z.ZodCoercedBoolean<unknown>>;
14
15
  indexName: z.ZodString;
15
16
  dryRun: z.ZodBoolean;
16
17
  filter: z.ZodOptional<z.ZodString>;
@@ -10,6 +10,7 @@ export declare const CommandOptionsSchema: z.ZodObject<{
10
10
  tenantId: z.ZodOptional<z.ZodString>;
11
11
  clientId: z.ZodOptional<z.ZodString>;
12
12
  chatModel: z.ZodOptional<z.ZodString>;
13
+ debug: z.ZodDefault<z.ZodCoercedBoolean<unknown>>;
13
14
  embedModel: z.ZodString;
14
15
  indexName: z.ZodString;
15
16
  dryRun: z.ZodBoolean;
@@ -1 +1 @@
1
- export declare const version = "2.1.0";
1
+ export declare const version = "3.0.1";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@equinor/fusion-framework-cli-plugin-ai-index",
3
- "version": "2.1.0",
3
+ "version": "3.0.1",
4
4
  "description": "AI indexing plugin for Fusion Framework CLI providing document embedding and chunking utilities",
5
5
  "main": "dist/esm/index.js",
6
6
  "type": "module",
@@ -53,18 +53,18 @@
53
53
  "tree-sitter-typescript": "^0.23.2",
54
54
  "ts-morph": "^28.0.0",
55
55
  "zod": "^4.3.6",
56
+ "@equinor/fusion-framework-cli-plugin-ai-base": "4.0.0",
56
57
  "@equinor/fusion-framework-module": "6.0.0",
57
- "@equinor/fusion-imports": "2.0.0",
58
- "@equinor/fusion-framework-cli-plugin-ai-base": "3.0.0",
59
- "@equinor/fusion-framework-module-ai": "4.0.0"
58
+ "@equinor/fusion-framework-module-ai": "4.0.0",
59
+ "@equinor/fusion-imports": "2.0.0"
60
60
  },
61
61
  "peerDependencies": {
62
- "@equinor/fusion-framework-cli": "^14.2.7"
62
+ "@equinor/fusion-framework-cli": "^15.0.1"
63
63
  },
64
64
  "devDependencies": {
65
65
  "typescript": "^5.9.3",
66
66
  "vitest": "^4.1.0",
67
- "@equinor/fusion-framework-cli": "^14.2.7"
67
+ "@equinor/fusion-framework-cli": "^15.0.1"
68
68
  },
69
69
  "scripts": {
70
70
  "build": "tsc -b",
package/src/bin/embed.ts CHANGED
@@ -36,9 +36,16 @@ import { generateChunkId } from '../utils/generate-chunk-id.js';
36
36
  /** Braille spinner frames (same as ora's default). */
37
37
  const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
38
38
 
39
+ /** Whether the process is running in a non-interactive environment (CI). */
40
+ const IS_CI = !process.stdout.isTTY || Boolean(process.env.CI);
41
+
39
42
  /**
40
43
  * Manages a fixed block of sticky progress lines with per-line spinners.
41
44
  * Each line can be updated independently without overwriting the others.
45
+ *
46
+ * In non-interactive environments (CI) the ANSI cursor-movement dance is
47
+ * replaced with simple `console.log` lines so the output is readable in
48
+ * plain-text log viewers.
42
49
  * @internal
43
50
  */
44
51
  class ProgressDisplay {
@@ -48,17 +55,27 @@ class ProgressDisplay {
48
55
  private frame = 0;
49
56
  private timer: ReturnType<typeof setInterval> | undefined;
50
57
 
58
+ /** Tracks last CI log time per line to throttle output. */
59
+ private lastCiLog: number[] = [];
60
+
61
+ /** Minimum interval (ms) between CI progress lines for the same line slot. */
62
+ private static CI_LOG_INTERVAL_MS = 15_000;
63
+
51
64
  /** Register the line labels up front and print empty placeholders. */
52
65
  start(count: number): void {
53
66
  this.lines = new Array<string>(count).fill('');
54
67
  this.spinning = new Array<boolean>(count).fill(false);
55
- // Print placeholder lines so the cursor block exists
56
- for (let i = 0; i < count; i++) {
57
- process.stdout.write('\n');
68
+ this.lastCiLog = new Array<number>(count).fill(0);
69
+
70
+ if (!IS_CI) {
71
+ // Print placeholder lines so the cursor block exists
72
+ for (let i = 0; i < count; i++) {
73
+ process.stdout.write('\n');
74
+ }
75
+ // Tick spinner at 80ms (same cadence as ora)
76
+ this.timer = setInterval(() => this.tick(), 80);
58
77
  }
59
78
  this.started = true;
60
- // Tick spinner at 80ms (same cadence as ora)
61
- this.timer = setInterval(() => this.tick(), 80);
62
79
  }
63
80
 
64
81
  /** Update a specific line (0-indexed) without touching the others. */
@@ -66,14 +83,27 @@ class ProgressDisplay {
66
83
  if (!this.started) return;
67
84
  this.lines[line] = message;
68
85
  this.spinning[line] = true;
86
+ if (IS_CI) {
87
+ const now = Date.now();
88
+ if (now - this.lastCiLog[line] >= ProgressDisplay.CI_LOG_INTERVAL_MS) {
89
+ this.lastCiLog[line] = now;
90
+ console.log(`⏳ ${message}`);
91
+ }
92
+ return;
93
+ }
69
94
  this.render(line);
70
95
  }
71
96
 
72
97
  /** Mark a line as completed — stops its spinner and shows a checkmark. */
73
98
  succeed(line: number, message: string): void {
74
99
  if (!this.started) return;
75
- this.lines[line] = `✅ ${message}`;
100
+ const text = `✅ ${message}`;
101
+ this.lines[line] = text;
76
102
  this.spinning[line] = false;
103
+ if (IS_CI) {
104
+ console.log(text);
105
+ return;
106
+ }
77
107
  this.render(line);
78
108
  }
79
109
 
@@ -81,13 +111,15 @@ class ProgressDisplay {
81
111
  clear(): void {
82
112
  if (!this.started) return;
83
113
  if (this.timer) clearInterval(this.timer);
84
- // Move up to the first progress line and clear each one
85
- for (let i = 0; i < this.lines.length; i++) {
86
- const linesUp = this.lines.length - i;
87
- process.stdout.write(`\x1b[${linesUp}A\x1b[2K\r\x1b[${linesUp}B\r`);
114
+ if (!IS_CI) {
115
+ // Move up to the first progress line and clear each one
116
+ for (let i = 0; i < this.lines.length; i++) {
117
+ const linesUp = this.lines.length - i;
118
+ process.stdout.write(`\x1b[${linesUp}A\x1b[2K\r\x1b[${linesUp}B\r`);
119
+ }
120
+ // Move cursor up past the now-empty block
121
+ process.stdout.write(`\x1b[${this.lines.length}A\r`);
88
122
  }
89
- // Move cursor up past the now-empty block
90
- process.stdout.write(`\x1b[${this.lines.length}A\r`);
91
123
  this.started = false;
92
124
  }
93
125
 
@@ -129,18 +161,38 @@ const GIT_CONCURRENCY = 20;
129
161
  /** Maximum parallel upsert requests to the vector store. */
130
162
  const UPSERT_CONCURRENCY = 10;
131
163
 
132
- /** Number of texts to embed per API request. */
133
- const EMBED_BATCH_SIZE = 20;
164
+ /**
165
+ * Number of texts to embed per API request.
166
+ *
167
+ * Azure OpenAI accepts up to 2 048 inputs per embedding call.
168
+ * LangChain's `batchSize` default is 1 (!) so we also set it on the
169
+ * client constructor. This outer batch controls how many documents
170
+ * are grouped before handing them to the embed client.
171
+ */
172
+ const EMBED_BATCH_SIZE = 500;
134
173
 
135
- /** Number of concurrent batch requests in flight. */
136
- const EMBED_BATCH_CONCURRENCY = 4;
174
+ /**
175
+ * Number of concurrent embedding API requests in flight.
176
+ *
177
+ * Each request now carries EMBED_BATCH_SIZE texts in a single HTTP call
178
+ * (LangChain batchSize is aligned), so 3 concurrent requests should
179
+ * saturate most Azure OpenAI TPM quotas without triggering rate limits.
180
+ */
181
+ const EMBED_BATCH_CONCURRENCY = 3;
137
182
 
138
183
  /**
139
184
  * Maximum time (ms) to wait before flushing a partial embedding batch.
140
- * Without this, `bufferCount` waits indefinitely for a full batch, which
141
- * starves `mergeMap` concurrency when upstream document throughput is slow.
185
+ *
186
+ * A longer window lets more documents accumulate before triggering an
187
+ * HTTP call, which drastically cuts the number of round-trips when
188
+ * upstream (metadata enrichment) feeds documents slowly.
189
+ *
190
+ * Full batches (EMBED_BATCH_SIZE) are emitted immediately regardless of
191
+ * the timer, and `bufferTime` flushes any remainder the instant the
192
+ * source stream completes — so a large value only affects mid-stream
193
+ * partial batches, not tail latency.
142
194
  */
143
- const EMBED_BUFFER_FLUSH_MS = 250;
195
+ const EMBED_BUFFER_FLUSH_MS = 10_000;
144
196
 
145
197
  /** Maximum retry attempts for transient / rate-limit errors per chunk. */
146
198
  const MAX_RETRIES = 4;
@@ -152,9 +204,24 @@ const MAX_RETRIES = 4;
152
204
  */
153
205
  export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
154
206
  const { framework, options, config, filePatterns } = binOptions;
207
+ const debug = options.debug ?? false;
155
208
 
156
209
  console.log(`📇 Index: ${options.indexName}`);
157
210
 
211
+ if (debug) {
212
+ console.debug('[debug] Embed model:', options.embedModel);
213
+ console.debug('[debug] File patterns:', filePatterns);
214
+ console.debug(
215
+ '[debug] Allowed patterns:',
216
+ config.index?.patterns ?? ['**/*.ts', '**/*.tsx', '**/*.md', '**/*.mdx'],
217
+ );
218
+ console.debug('[debug] Raw patterns:', config.index?.rawPatterns ?? []);
219
+ console.debug('[debug] Ignore patterns:', config.index?.ignore ?? defaultIgnore);
220
+ console.debug('[debug] Diff mode:', options.diff);
221
+ console.debug('[debug] Dry run:', options.dryRun);
222
+ console.debug('[debug] Clean:', options.clean);
223
+ }
224
+
158
225
  const progress = new ProgressDisplay();
159
226
 
160
227
  // Handle clean operation (destructive - deletes all existing documents)
@@ -227,6 +294,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
227
294
  }),
228
295
  filter((file) => {
229
296
  const matches = multimatch(file.relativePath, allowedFilePatterns);
297
+ if (debug && matches.length === 0) {
298
+ console.debug('[debug] Skipped (no pattern match):', file.relativePath);
299
+ }
230
300
  return matches.length > 0;
231
301
  }),
232
302
  tap((file) => {
@@ -283,6 +353,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
283
353
  mergeMap(async (file) => {
284
354
  const documents = await parseMarkdownFile(file);
285
355
  docCount++;
356
+ if (debug) {
357
+ console.debug(`[debug] Markdown ${file.relativePath} → ${documents.length} chunk(s)`);
358
+ }
286
359
  progress.update(LINE_PARSE, `📄 Parsing [${docCount}] ${file.relativePath}`);
287
360
  return { status: file.status, documents };
288
361
  }),
@@ -294,6 +367,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
294
367
  map((file) => {
295
368
  const documents = parseTsDocFromFileSync(file);
296
369
  docCount++;
370
+ if (debug) {
371
+ console.debug(`[debug] TypeScript ${file.relativePath} → ${documents.length} chunk(s)`);
372
+ }
297
373
  progress.update(LINE_PARSE, `📄 Parsing [${docCount}] ${file.relativePath}`);
298
374
  return { status: file.status, documents };
299
375
  }),
@@ -336,51 +412,52 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
336
412
  // whichever comes first — prevents upstream starvation from blocking concurrency
337
413
  bufferTime(EMBED_BUFFER_FLUSH_MS, null, EMBED_BATCH_SIZE),
338
414
  filter((batch) => batch.length > 0),
339
- mergeMap(
340
- (batch) =>
341
- from(embeddingService.embedDocuments(batch.map((d) => d.pageContent))).pipe(
342
- retry({
343
- count: MAX_RETRIES,
344
- delay: (error, retryIndex) => {
345
- // Auth errors are terminal — abort immediately with actionable message
346
- if (error?.name === 'NoAccountsError') {
347
- console.error(
348
- '\n🔒 Authentication expired. Run `ffc auth login` then retry with `--diff`.',
349
- );
350
- throw error;
351
- }
352
-
353
- const retryAfterSec =
354
- error?.response?.headers?.get?.('retry-after') ??
355
- error?.responseHeaders?.['retry-after'];
356
- const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
357
-
358
- const backoffMs = 2 ** retryIndex * 1000;
359
- const delayMs = Math.max(backoffMs, retryAfterMs);
360
-
361
- console.warn(
362
- `\n⏳ Retry ${retryIndex}/${MAX_RETRIES} for batch of ${batch.length} in ${delayMs}ms`,
363
- );
364
- return timer(delayMs);
365
- },
366
- }),
367
- map((allEmbeddings) => {
368
- return batch.map((document, i) => {
369
- embeddedCount++;
370
- const total = metadataDone ? metadataCount : 0;
371
- const pct = total > 0 ? ` ${Math.round((embeddedCount / total) * 100)}%` : '';
372
- const denominator = total > 0 ? `/${total}` : '';
373
- progress.update(
374
- LINE_EMBED,
375
- `🧠 Embedding [${embeddedCount}${denominator}]${pct} — ${document.metadata.source}`,
415
+ mergeMap((batch) => {
416
+ if (debug) {
417
+ console.debug(`[debug] Embedding batch of ${batch.length} documents`);
418
+ }
419
+ return from(embeddingService.embedDocuments(batch.map((d) => d.pageContent))).pipe(
420
+ retry({
421
+ count: MAX_RETRIES,
422
+ delay: (error, retryIndex) => {
423
+ // Auth errors are terminal — abort immediately with actionable message
424
+ if (error?.name === 'NoAccountsError') {
425
+ console.error(
426
+ '\n🔒 Authentication expired. Run `ffc auth login` then retry with `--diff`.',
376
427
  );
377
- const metadata = { ...document.metadata, embedding: allEmbeddings[i] };
378
- return { ...document, metadata };
379
- });
380
- }),
381
- ),
382
- EMBED_BATCH_CONCURRENCY,
383
- ),
428
+ throw error;
429
+ }
430
+
431
+ const retryAfterSec =
432
+ error?.response?.headers?.get?.('retry-after') ??
433
+ error?.responseHeaders?.['retry-after'];
434
+ const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
435
+
436
+ const backoffMs = 2 ** retryIndex * 1000;
437
+ const delayMs = Math.max(backoffMs, retryAfterMs);
438
+
439
+ console.warn(
440
+ `\n⏳ Retry ${retryIndex}/${MAX_RETRIES} for batch of ${batch.length} in ${delayMs}ms`,
441
+ );
442
+ return timer(delayMs);
443
+ },
444
+ }),
445
+ map((allEmbeddings) => {
446
+ return batch.map((document, i) => {
447
+ embeddedCount++;
448
+ const total = metadataDone ? metadataCount : 0;
449
+ const pct = total > 0 ? ` ${Math.round((embeddedCount / total) * 100)}%` : '';
450
+ const denominator = total > 0 ? `/${total}` : '';
451
+ progress.update(
452
+ LINE_EMBED,
453
+ `🧠 Embedding [${embeddedCount}${denominator}]${pct} — ${document.metadata.source}`,
454
+ );
455
+ const metadata = { ...document.metadata, embedding: allEmbeddings[i] };
456
+ return { ...document, metadata };
457
+ });
458
+ }),
459
+ );
460
+ }, EMBED_BATCH_CONCURRENCY),
384
461
  finalize(() => {
385
462
  embeddingDone = true;
386
463
  progress.succeed(LINE_EMBED, `🧠 Embedded ${embeddedCount} documents`);
@@ -398,6 +475,12 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
398
475
  return undefined;
399
476
  }
400
477
  if (!options.dryRun) {
478
+ if (debug) {
479
+ console.debug(
480
+ `[debug] Upserting batch of ${documents.length} documents:`,
481
+ documents.map((d) => d.id),
482
+ );
483
+ }
401
484
  await vectorStoreService.addDocuments(documents);
402
485
  }
403
486
  return {
@@ -25,6 +25,11 @@ export async function getDiff(options: CommandOptions): Promise<ChangedFile[]> {
25
25
  }
26
26
 
27
27
  console.log(`📝 Found ${changedFiles.length} changed files matching patterns`);
28
+ if (options.debug) {
29
+ for (const file of changedFiles) {
30
+ console.debug(`[debug] ${file.status}: ${file.filepath}`);
31
+ }
32
+ }
28
33
  return changedFiles;
29
34
  } catch (error) {
30
35
  console.error(`❌ Git diff error: ${error instanceof Error ? error.message : 'Unknown error'}`);
package/src/version.ts CHANGED
@@ -1,2 +1,2 @@
1
1
  // Generated by genversion.
2
- export const version = '2.1.0';
2
+ export const version = '3.0.1';