@equinor/fusion-framework-cli-plugin-ai-index 2.1.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@ export declare const DeleteOptionsSchema: z.ZodObject<{
11
11
  clientId: z.ZodOptional<z.ZodString>;
12
12
  chatModel: z.ZodOptional<z.ZodString>;
13
13
  embedModel: z.ZodOptional<z.ZodString>;
14
+ debug: z.ZodDefault<z.ZodCoercedBoolean<unknown>>;
14
15
  indexName: z.ZodString;
15
16
  dryRun: z.ZodBoolean;
16
17
  filter: z.ZodOptional<z.ZodString>;
@@ -10,6 +10,7 @@ export declare const CommandOptionsSchema: z.ZodObject<{
10
10
  tenantId: z.ZodOptional<z.ZodString>;
11
11
  clientId: z.ZodOptional<z.ZodString>;
12
12
  chatModel: z.ZodOptional<z.ZodString>;
13
+ debug: z.ZodDefault<z.ZodCoercedBoolean<unknown>>;
13
14
  embedModel: z.ZodString;
14
15
  indexName: z.ZodString;
15
16
  dryRun: z.ZodBoolean;
@@ -1 +1 @@
1
- export declare const version = "2.1.0";
1
+ export declare const version = "3.0.0";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@equinor/fusion-framework-cli-plugin-ai-index",
3
- "version": "2.1.0",
3
+ "version": "3.0.0",
4
4
  "description": "AI indexing plugin for Fusion Framework CLI providing document embedding and chunking utilities",
5
5
  "main": "dist/esm/index.js",
6
6
  "type": "module",
@@ -53,18 +53,18 @@
53
53
  "tree-sitter-typescript": "^0.23.2",
54
54
  "ts-morph": "^28.0.0",
55
55
  "zod": "^4.3.6",
56
+ "@equinor/fusion-framework-cli-plugin-ai-base": "4.0.0",
57
+ "@equinor/fusion-framework-module-ai": "4.0.0",
56
58
  "@equinor/fusion-framework-module": "6.0.0",
57
- "@equinor/fusion-imports": "2.0.0",
58
- "@equinor/fusion-framework-cli-plugin-ai-base": "3.0.0",
59
- "@equinor/fusion-framework-module-ai": "4.0.0"
59
+ "@equinor/fusion-imports": "2.0.0"
60
60
  },
61
61
  "peerDependencies": {
62
- "@equinor/fusion-framework-cli": "^14.2.7"
62
+ "@equinor/fusion-framework-cli": "^15.0.0"
63
63
  },
64
64
  "devDependencies": {
65
65
  "typescript": "^5.9.3",
66
66
  "vitest": "^4.1.0",
67
- "@equinor/fusion-framework-cli": "^14.2.7"
67
+ "@equinor/fusion-framework-cli": "^15.0.0"
68
68
  },
69
69
  "scripts": {
70
70
  "build": "tsc -b",
package/src/bin/embed.ts CHANGED
@@ -36,9 +36,16 @@ import { generateChunkId } from '../utils/generate-chunk-id.js';
36
36
  /** Braille spinner frames (same as ora's default). */
37
37
  const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
38
38
 
39
+ /** Whether the process is running in a non-interactive environment (CI). */
40
+ const IS_CI = !process.stdout.isTTY || Boolean(process.env.CI);
41
+
39
42
  /**
40
43
  * Manages a fixed block of sticky progress lines with per-line spinners.
41
44
  * Each line can be updated independently without overwriting the others.
45
+ *
46
+ * In non-interactive environments (CI) the ANSI cursor-movement dance is
47
+ * replaced with simple `console.log` lines so the output is readable in
48
+ * plain-text log viewers.
42
49
  * @internal
43
50
  */
44
51
  class ProgressDisplay {
@@ -48,17 +55,27 @@ class ProgressDisplay {
48
55
  private frame = 0;
49
56
  private timer: ReturnType<typeof setInterval> | undefined;
50
57
 
58
+ /** Tracks last CI log time per line to throttle output. */
59
+ private lastCiLog: number[] = [];
60
+
61
+ /** Minimum interval (ms) between CI progress lines for the same line slot. */
62
+ private static CI_LOG_INTERVAL_MS = 15_000;
63
+
51
64
  /** Register the line labels up front and print empty placeholders. */
52
65
  start(count: number): void {
53
66
  this.lines = new Array<string>(count).fill('');
54
67
  this.spinning = new Array<boolean>(count).fill(false);
55
- // Print placeholder lines so the cursor block exists
56
- for (let i = 0; i < count; i++) {
57
- process.stdout.write('\n');
68
+ this.lastCiLog = new Array<number>(count).fill(0);
69
+
70
+ if (!IS_CI) {
71
+ // Print placeholder lines so the cursor block exists
72
+ for (let i = 0; i < count; i++) {
73
+ process.stdout.write('\n');
74
+ }
75
+ // Tick spinner at 80ms (same cadence as ora)
76
+ this.timer = setInterval(() => this.tick(), 80);
58
77
  }
59
78
  this.started = true;
60
- // Tick spinner at 80ms (same cadence as ora)
61
- this.timer = setInterval(() => this.tick(), 80);
62
79
  }
63
80
 
64
81
  /** Update a specific line (0-indexed) without touching the others. */
@@ -66,14 +83,27 @@ class ProgressDisplay {
66
83
  if (!this.started) return;
67
84
  this.lines[line] = message;
68
85
  this.spinning[line] = true;
86
+ if (IS_CI) {
87
+ const now = Date.now();
88
+ if (now - this.lastCiLog[line] >= ProgressDisplay.CI_LOG_INTERVAL_MS) {
89
+ this.lastCiLog[line] = now;
90
+ console.log(`⏳ ${message}`);
91
+ }
92
+ return;
93
+ }
69
94
  this.render(line);
70
95
  }
71
96
 
72
97
  /** Mark a line as completed — stops its spinner and shows a checkmark. */
73
98
  succeed(line: number, message: string): void {
74
99
  if (!this.started) return;
75
- this.lines[line] = `✅ ${message}`;
100
+ const text = `✅ ${message}`;
101
+ this.lines[line] = text;
76
102
  this.spinning[line] = false;
103
+ if (IS_CI) {
104
+ console.log(text);
105
+ return;
106
+ }
77
107
  this.render(line);
78
108
  }
79
109
 
@@ -81,13 +111,15 @@ class ProgressDisplay {
81
111
  clear(): void {
82
112
  if (!this.started) return;
83
113
  if (this.timer) clearInterval(this.timer);
84
- // Move up to the first progress line and clear each one
85
- for (let i = 0; i < this.lines.length; i++) {
86
- const linesUp = this.lines.length - i;
87
- process.stdout.write(`\x1b[${linesUp}A\x1b[2K\r\x1b[${linesUp}B\r`);
114
+ if (!IS_CI) {
115
+ // Move up to the first progress line and clear each one
116
+ for (let i = 0; i < this.lines.length; i++) {
117
+ const linesUp = this.lines.length - i;
118
+ process.stdout.write(`\x1b[${linesUp}A\x1b[2K\r\x1b[${linesUp}B\r`);
119
+ }
120
+ // Move cursor up past the now-empty block
121
+ process.stdout.write(`\x1b[${this.lines.length}A\r`);
88
122
  }
89
- // Move cursor up past the now-empty block
90
- process.stdout.write(`\x1b[${this.lines.length}A\r`);
91
123
  this.started = false;
92
124
  }
93
125
 
@@ -129,18 +161,31 @@ const GIT_CONCURRENCY = 20;
129
161
  /** Maximum parallel upsert requests to the vector store. */
130
162
  const UPSERT_CONCURRENCY = 10;
131
163
 
132
- /** Number of texts to embed per API request. */
133
- const EMBED_BATCH_SIZE = 20;
164
+ /**
165
+ * Number of texts to embed per API request.
166
+ *
167
+ * Azure OpenAI accepts up to 2 048 inputs per embedding call.
168
+ * LangChain's `batchSize` default is 1 (!) so we also set it on the
169
+ * client constructor. This outer batch controls how many documents
170
+ * are grouped before handing them to the embed client.
171
+ */
172
+ const EMBED_BATCH_SIZE = 500;
134
173
 
135
- /** Number of concurrent batch requests in flight. */
136
- const EMBED_BATCH_CONCURRENCY = 4;
174
+ /**
175
+ * Number of concurrent embedding API requests in flight.
176
+ *
177
+ * Each request now carries EMBED_BATCH_SIZE texts in a single HTTP call
178
+ * (LangChain batchSize is aligned), so 2 concurrent requests already
179
+ * saturate most Azure OpenAI TPM quotas.
180
+ */
181
+ const EMBED_BATCH_CONCURRENCY = 2;
137
182
 
138
183
  /**
139
184
  * Maximum time (ms) to wait before flushing a partial embedding batch.
140
185
  * Without this, `bufferCount` waits indefinitely for a full batch, which
141
186
  * starves `mergeMap` concurrency when upstream document throughput is slow.
142
187
  */
143
- const EMBED_BUFFER_FLUSH_MS = 250;
188
+ const EMBED_BUFFER_FLUSH_MS = 500;
144
189
 
145
190
  /** Maximum retry attempts for transient / rate-limit errors per chunk. */
146
191
  const MAX_RETRIES = 4;
@@ -152,9 +197,24 @@ const MAX_RETRIES = 4;
152
197
  */
153
198
  export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
154
199
  const { framework, options, config, filePatterns } = binOptions;
200
+ const debug = options.debug ?? false;
155
201
 
156
202
  console.log(`📇 Index: ${options.indexName}`);
157
203
 
204
+ if (debug) {
205
+ console.debug('[debug] Embed model:', options.embedModel);
206
+ console.debug('[debug] File patterns:', filePatterns);
207
+ console.debug(
208
+ '[debug] Allowed patterns:',
209
+ config.index?.patterns ?? ['**/*.ts', '**/*.tsx', '**/*.md', '**/*.mdx'],
210
+ );
211
+ console.debug('[debug] Raw patterns:', config.index?.rawPatterns ?? []);
212
+ console.debug('[debug] Ignore patterns:', config.index?.ignore ?? defaultIgnore);
213
+ console.debug('[debug] Diff mode:', options.diff);
214
+ console.debug('[debug] Dry run:', options.dryRun);
215
+ console.debug('[debug] Clean:', options.clean);
216
+ }
217
+
158
218
  const progress = new ProgressDisplay();
159
219
 
160
220
  // Handle clean operation (destructive - deletes all existing documents)
@@ -227,6 +287,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
227
287
  }),
228
288
  filter((file) => {
229
289
  const matches = multimatch(file.relativePath, allowedFilePatterns);
290
+ if (debug && matches.length === 0) {
291
+ console.debug('[debug] Skipped (no pattern match):', file.relativePath);
292
+ }
230
293
  return matches.length > 0;
231
294
  }),
232
295
  tap((file) => {
@@ -283,6 +346,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
283
346
  mergeMap(async (file) => {
284
347
  const documents = await parseMarkdownFile(file);
285
348
  docCount++;
349
+ if (debug) {
350
+ console.debug(`[debug] Markdown ${file.relativePath} → ${documents.length} chunk(s)`);
351
+ }
286
352
  progress.update(LINE_PARSE, `📄 Parsing [${docCount}] ${file.relativePath}`);
287
353
  return { status: file.status, documents };
288
354
  }),
@@ -294,6 +360,9 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
294
360
  map((file) => {
295
361
  const documents = parseTsDocFromFileSync(file);
296
362
  docCount++;
363
+ if (debug) {
364
+ console.debug(`[debug] TypeScript ${file.relativePath} → ${documents.length} chunk(s)`);
365
+ }
297
366
  progress.update(LINE_PARSE, `📄 Parsing [${docCount}] ${file.relativePath}`);
298
367
  return { status: file.status, documents };
299
368
  }),
@@ -336,51 +405,52 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
336
405
  // whichever comes first — prevents upstream starvation from blocking concurrency
337
406
  bufferTime(EMBED_BUFFER_FLUSH_MS, null, EMBED_BATCH_SIZE),
338
407
  filter((batch) => batch.length > 0),
339
- mergeMap(
340
- (batch) =>
341
- from(embeddingService.embedDocuments(batch.map((d) => d.pageContent))).pipe(
342
- retry({
343
- count: MAX_RETRIES,
344
- delay: (error, retryIndex) => {
345
- // Auth errors are terminal — abort immediately with actionable message
346
- if (error?.name === 'NoAccountsError') {
347
- console.error(
348
- '\n🔒 Authentication expired. Run `ffc auth login` then retry with `--diff`.',
349
- );
350
- throw error;
351
- }
352
-
353
- const retryAfterSec =
354
- error?.response?.headers?.get?.('retry-after') ??
355
- error?.responseHeaders?.['retry-after'];
356
- const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
357
-
358
- const backoffMs = 2 ** retryIndex * 1000;
359
- const delayMs = Math.max(backoffMs, retryAfterMs);
360
-
361
- console.warn(
362
- `\n⏳ Retry ${retryIndex}/${MAX_RETRIES} for batch of ${batch.length} in ${delayMs}ms`,
363
- );
364
- return timer(delayMs);
365
- },
366
- }),
367
- map((allEmbeddings) => {
368
- return batch.map((document, i) => {
369
- embeddedCount++;
370
- const total = metadataDone ? metadataCount : 0;
371
- const pct = total > 0 ? ` ${Math.round((embeddedCount / total) * 100)}%` : '';
372
- const denominator = total > 0 ? `/${total}` : '';
373
- progress.update(
374
- LINE_EMBED,
375
- `🧠 Embedding [${embeddedCount}${denominator}]${pct} — ${document.metadata.source}`,
408
+ mergeMap((batch) => {
409
+ if (debug) {
410
+ console.debug(`[debug] Embedding batch of ${batch.length} documents`);
411
+ }
412
+ return from(embeddingService.embedDocuments(batch.map((d) => d.pageContent))).pipe(
413
+ retry({
414
+ count: MAX_RETRIES,
415
+ delay: (error, retryIndex) => {
416
+ // Auth errors are terminal — abort immediately with actionable message
417
+ if (error?.name === 'NoAccountsError') {
418
+ console.error(
419
+ '\n🔒 Authentication expired. Run `ffc auth login` then retry with `--diff`.',
376
420
  );
377
- const metadata = { ...document.metadata, embedding: allEmbeddings[i] };
378
- return { ...document, metadata };
379
- });
380
- }),
381
- ),
382
- EMBED_BATCH_CONCURRENCY,
383
- ),
421
+ throw error;
422
+ }
423
+
424
+ const retryAfterSec =
425
+ error?.response?.headers?.get?.('retry-after') ??
426
+ error?.responseHeaders?.['retry-after'];
427
+ const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
428
+
429
+ const backoffMs = 2 ** retryIndex * 1000;
430
+ const delayMs = Math.max(backoffMs, retryAfterMs);
431
+
432
+ console.warn(
433
+ `\n⏳ Retry ${retryIndex}/${MAX_RETRIES} for batch of ${batch.length} in ${delayMs}ms`,
434
+ );
435
+ return timer(delayMs);
436
+ },
437
+ }),
438
+ map((allEmbeddings) => {
439
+ return batch.map((document, i) => {
440
+ embeddedCount++;
441
+ const total = metadataDone ? metadataCount : 0;
442
+ const pct = total > 0 ? ` ${Math.round((embeddedCount / total) * 100)}%` : '';
443
+ const denominator = total > 0 ? `/${total}` : '';
444
+ progress.update(
445
+ LINE_EMBED,
446
+ `🧠 Embedding [${embeddedCount}${denominator}]${pct} — ${document.metadata.source}`,
447
+ );
448
+ const metadata = { ...document.metadata, embedding: allEmbeddings[i] };
449
+ return { ...document, metadata };
450
+ });
451
+ }),
452
+ );
453
+ }, EMBED_BATCH_CONCURRENCY),
384
454
  finalize(() => {
385
455
  embeddingDone = true;
386
456
  progress.succeed(LINE_EMBED, `🧠 Embedded ${embeddedCount} documents`);
@@ -398,6 +468,12 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
398
468
  return undefined;
399
469
  }
400
470
  if (!options.dryRun) {
471
+ if (debug) {
472
+ console.debug(
473
+ `[debug] Upserting batch of ${documents.length} documents:`,
474
+ documents.map((d) => d.id),
475
+ );
476
+ }
401
477
  await vectorStoreService.addDocuments(documents);
402
478
  }
403
479
  return {
@@ -25,6 +25,11 @@ export async function getDiff(options: CommandOptions): Promise<ChangedFile[]> {
25
25
  }
26
26
 
27
27
  console.log(`📝 Found ${changedFiles.length} changed files matching patterns`);
28
+ if (options.debug) {
29
+ for (const file of changedFiles) {
30
+ console.debug(`[debug] ${file.status}: ${file.filepath}`);
31
+ }
32
+ }
28
33
  return changedFiles;
29
34
  } catch (error) {
30
35
  console.error(`❌ Git diff error: ${error instanceof Error ? error.message : 'Unknown error'}`);
package/src/version.ts CHANGED
@@ -1,2 +1,2 @@
1
1
  // Generated by genversion.
2
- export const version = '2.1.0';
2
+ export const version = '3.0.0';