npm - @equinor/fusion-framework-cli-plugin-ai-index - Versions diffs - 2.0.1 → 2.1.0 - Mend

@equinor/fusion-framework-cli-plugin-ai-index 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/CHANGELOG.md +52 -0
package/dist/esm/bin/apply-metadata.js +15 -5
package/dist/esm/bin/apply-metadata.js.map +1 -1
package/dist/esm/bin/apply-schema.js +64 -0
package/dist/esm/bin/apply-schema.js.map +1 -0
package/dist/esm/bin/apply-schema.test.js +143 -0
package/dist/esm/bin/apply-schema.test.js.map +1 -0
package/dist/esm/bin/delete-removed-files.js +1 -1
package/dist/esm/bin/delete-removed-files.js.map +1 -1
package/dist/esm/bin/embed.js +188 -47
package/dist/esm/bin/embed.js.map +1 -1
package/dist/esm/create-command.js +186 -0
package/dist/esm/create-command.js.map +1 -0
package/dist/esm/delete-command.js +14 -2
package/dist/esm/delete-command.js.map +1 -1
package/dist/esm/delete-command.options.js +7 -31
package/dist/esm/delete-command.options.js.map +1 -1
package/dist/esm/delete-index-command.js +94 -0
package/dist/esm/delete-index-command.js.map +1 -0
package/dist/esm/embed-command.js +30 -0
package/dist/esm/embed-command.js.map +1 -0
package/dist/esm/embeddings-command.js +14 -17
package/dist/esm/embeddings-command.js.map +1 -1
package/dist/esm/embeddings-command.options.js +12 -43
package/dist/esm/embeddings-command.options.js.map +1 -1
package/dist/esm/index.js +12 -3
package/dist/esm/index.js.map +1 -1
package/dist/esm/schema.js +41 -0
package/dist/esm/schema.js.map +1 -0
package/dist/esm/search-command.js +17 -5
package/dist/esm/search-command.js.map +1 -1
package/dist/esm/utils/embedding-dimensions.js +37 -0
package/dist/esm/utils/embedding-dimensions.js.map +1 -0
package/dist/esm/utils/zod-to-azure-fields.js +120 -0
package/dist/esm/utils/zod-to-azure-fields.js.map +1 -0
package/dist/esm/utils/zod-to-azure-fields.test.js +112 -0
package/dist/esm/utils/zod-to-azure-fields.test.js.map +1 -0
package/dist/esm/version.js +1 -1
package/dist/tsconfig.tsbuildinfo +1 -1
package/dist/types/bin/apply-metadata.d.ts +2 -1
package/dist/types/bin/apply-schema.d.ts +22 -0
package/dist/types/bin/apply-schema.test.d.ts +1 -0
package/dist/types/config.d.ts +14 -0
package/dist/types/create-command.d.ts +6 -0
package/dist/types/delete-command.options.d.ts +9 -23
package/dist/types/delete-index-command.d.ts +6 -0
package/dist/types/embed-command.d.ts +12 -0
package/dist/types/embeddings-command.options.d.ts +9 -28
package/dist/types/index.d.ts +1 -0
package/dist/types/schema.d.ts +137 -0
package/dist/types/utils/embedding-dimensions.d.ts +13 -0
package/dist/types/utils/zod-to-azure-fields.d.ts +61 -0
package/dist/types/utils/zod-to-azure-fields.test.d.ts +1 -0
package/dist/types/version.d.ts +1 -1
package/package.json +6 -6
package/src/bin/apply-metadata.ts +20 -4
package/src/bin/apply-schema.test.ts +170 -0
package/src/bin/apply-schema.ts +86 -0
package/src/bin/delete-removed-files.ts +1 -1
package/src/bin/embed.ts +248 -76
package/src/config.ts +15 -0
package/src/create-command.ts +218 -0
package/src/delete-command.options.ts +7 -37
package/src/delete-command.ts +19 -2
package/src/delete-index-command.ts +121 -0
package/src/embed-command.ts +44 -0
package/src/embeddings-command.options.ts +12 -50
package/src/embeddings-command.ts +18 -18
package/src/index.ts +12 -3
package/src/schema.ts +149 -0
package/src/search-command.ts +22 -5
package/src/utils/embedding-dimensions.ts +39 -0
package/src/utils/zod-to-azure-fields.test.ts +136 -0
package/src/utils/zod-to-azure-fields.ts +177 -0
package/src/version.ts +1 -1

package/src/bin/apply-schema.ts ADDED Viewed

@@ -0,0 +1,86 @@
+import { map } from 'rxjs';
+import type { Observable } from 'rxjs';
+import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
+import type { IndexSchemaConfig } from '../schema.js';
+/**
+ * Creates an RxJS operator that resolves promoted schema fields for each
+ * document and separates them from the generic `attributes` bag.
+ *
+ * For each document in the batch:
+ * 1. Runs the optional `prepareAttributes` callback to enrich attributes
+ *    with type-safe access to schema-declared fields
+ * 2. Calls the schema resolver to compute promoted field values
+ * 3. Validates the resolved values against the Zod shape
+ * 4. Stores promoted fields on `metadata.schemaFields`
+ * 5. Removes promoted keys from `metadata.attributes` to avoid duplication
+ *
+ * When no schema is configured, the stream passes through unchanged.
+ *
+ * @param document$ - Stream of document batches from the metadata enrichment step.
+ * @param schema - The index schema config, if defined. When `undefined`, documents pass through unchanged.
+ * @returns Stream of document batches with promoted fields resolved and stored.
+ */
+export function applySchema(
+  document$: Observable<VectorStoreDocument[]>,
+  schema: IndexSchemaConfig | undefined,
+): Observable<VectorStoreDocument[]> {
+  // No schema configured — pass through unchanged (backward compatible)
+  if (!schema) {
+    return document$;
+  }
+  const promotedKeys = new Set(Object.keys(schema.shape.shape as Record<string, unknown>));
+  return document$.pipe(
+    map((documents) =>
+      documents.map((document) => {
+        // Run typed attribute processor before schema resolution so the
+        // resolver receives fully enriched attributes
+        let enrichedDocument = document;
+        if (schema.prepareAttributes) {
+          const enrichedAttributes = schema.prepareAttributes(
+            (document.metadata.attributes ?? {}) as Record<string, unknown>,
+            document,
+          );
+          enrichedDocument = {
+            ...document,
+            metadata: {
+              ...document.metadata,
+              attributes: enrichedAttributes as Record<string, unknown>,
+            },
+          };
+        }
+        // Resolve promoted field values from the fully enriched document
+        const resolved = schema.resolve(enrichedDocument);
+        // Validate against the Zod shape — throws on invalid data with
+        // a clear error message pointing to the offending field
+        const validated = schema.shape.parse(resolved) as Record<string, unknown>;
+        // Remove promoted keys from attributes to avoid storing them
+        // in both top-level fields and the generic attributes array
+        const currentAttributes = (enrichedDocument.metadata.attributes ?? {}) as Record<
+          string,
+          unknown
+        >;
+        const remainingAttributes: Record<string, unknown> = {};
+        for (const [key, value] of Object.entries(currentAttributes)) {
+          if (!promotedKeys.has(key)) {
+            remainingAttributes[key] = value;
+          }
+        }
+        return {
+          ...enrichedDocument,
+          metadata: {
+            ...enrichedDocument.metadata,
+            attributes: remainingAttributes,
+            schemaFields: validated,
+          },
+        };
+      }),
+    ),
+  );
+}

package/src/bin/delete-removed-files.ts CHANGED Viewed

@@ -33,7 +33,7 @@ export function createDeleteRemovedFilesStream(
         console.log('Removing entry from vector store', file.relativePath);
       }
       if (!options.dryRun) {
-        const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
+        const vectorStoreService = framework.ai.useIndex(options.indexName);
         // Single batch deletion - one file can produce multiple document chunks
         await vectorStoreService.deleteDocuments({
           filter: { filterExpression: filterExpression ?? undefined },

package/src/bin/embed.ts CHANGED Viewed

@@ -1,8 +1,19 @@
 import { globbyStream } from 'globby';
 import { relative } from 'node:path';
 import multimatch from 'multimatch';
-import { concat, from, merge, timer } from 'rxjs';
-import { concatMap, filter, map, mergeMap, retry, shareReplay, toArray } from 'rxjs/operators';
+import { from, merge, timer } from 'rxjs';
+import {
+  bufferCount,
+  bufferTime,
+  concatMap,
+  filter,
+  finalize,
+  map,
+  mergeMap,
+  retry,
+  shareReplay,
+  tap,
+} from 'rxjs/operators';
 import { isMarkdownFile, parseMarkdownFile } from '../utils/markdown/index.js';
 import { getFileStatus, resolveProjectRoot } from '../utils/git/index.js';
@@ -11,6 +22,7 @@ import { isTypescriptFile, parseTsDocFromFileSync } from '../utils/ts-doc/index.
 import { getDiff } from './get-diff.js';
 import { createDeleteRemovedFilesStream } from './delete-removed-files.js';
 import { applyMetadata } from './apply-metadata.js';
+import { applySchema } from './apply-schema.js';
 import type {
   DocumentEntry,
   EmbeddingsBinOptions,
@@ -21,6 +33,89 @@ import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/li
 import { readFileSync } from 'node:fs';
 import { generateChunkId } from '../utils/generate-chunk-id.js';
+/** Braille spinner frames (same as ora's default). */
+const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
+/**
+ * Manages a fixed block of sticky progress lines with per-line spinners.
+ * Each line can be updated independently without overwriting the others.
+ * @internal
+ */
+class ProgressDisplay {
+  private lines: string[] = [];
+  private spinning: boolean[] = [];
+  private started = false;
+  private frame = 0;
+  private timer: ReturnType<typeof setInterval> | undefined;
+  /** Register the line labels up front and print empty placeholders. */
+  start(count: number): void {
+    this.lines = new Array<string>(count).fill('');
+    this.spinning = new Array<boolean>(count).fill(false);
+    // Print placeholder lines so the cursor block exists
+    for (let i = 0; i < count; i++) {
+      process.stdout.write('\n');
+    }
+    this.started = true;
+    // Tick spinner at 80ms (same cadence as ora)
+    this.timer = setInterval(() => this.tick(), 80);
+  }
+  /** Update a specific line (0-indexed) without touching the others. */
+  update(line: number, message: string): void {
+    if (!this.started) return;
+    this.lines[line] = message;
+    this.spinning[line] = true;
+    this.render(line);
+  }
+  /** Mark a line as completed — stops its spinner and shows a checkmark. */
+  succeed(line: number, message: string): void {
+    if (!this.started) return;
+    this.lines[line] = `✅ ${message}`;
+    this.spinning[line] = false;
+    this.render(line);
+  }
+  /** Clear all progress lines and leave the cursor on a clean line. */
+  clear(): void {
+    if (!this.started) return;
+    if (this.timer) clearInterval(this.timer);
+    // Move up to the first progress line and clear each one
+    for (let i = 0; i < this.lines.length; i++) {
+      const linesUp = this.lines.length - i;
+      process.stdout.write(`\x1b[${linesUp}A\x1b[2K\r\x1b[${linesUp}B\r`);
+    }
+    // Move cursor up past the now-empty block
+    process.stdout.write(`\x1b[${this.lines.length}A\r`);
+    this.started = false;
+  }
+  /** Advance the spinner frame and re-render all spinning lines. */
+  private tick(): void {
+    this.frame = (this.frame + 1) % SPINNER_FRAMES.length;
+    for (let i = 0; i < this.lines.length; i++) {
+      if (this.spinning[i] && this.lines[i]) {
+        this.render(i);
+      }
+    }
+  }
+  /** Render a single line at its position. */
+  private render(line: number): void {
+    const linesUp = this.lines.length - line;
+    const prefix = this.spinning[line] ? SPINNER_FRAMES[this.frame] : '';
+    const text = this.spinning[line] ? `${prefix} ${this.lines[line]}` : this.lines[line];
+    process.stdout.write(`\x1b[${linesUp}A\x1b[2K\r${text}\x1b[${linesUp}B\r`);
+  }
+}
+/** Progress line indices */
+const LINE_PARSE = 0;
+const LINE_META = 1;
+const LINE_EMBED = 2;
+const LINE_INDEX = 3;
 /**
  * Default directories to skip before expensive git operations.
  * These are common build artifacts and dependencies that should be ignored.
@@ -28,6 +123,28 @@ import { generateChunkId } from '../utils/generate-chunk-id.js';
  */
 const defaultIgnore = ['node_modules', '**/node_modules/**', 'dist', '**/dist/**', '.git'];
+/** Concurrency limit for git subprocess operations (status, log, etc.). */
+const GIT_CONCURRENCY = 20;
+/** Maximum parallel upsert requests to the vector store. */
+const UPSERT_CONCURRENCY = 10;
+/** Number of texts to embed per API request. */
+const EMBED_BATCH_SIZE = 20;
+/** Number of concurrent batch requests in flight. */
+const EMBED_BATCH_CONCURRENCY = 4;
+/**
+ * Maximum time (ms) to wait before flushing a partial embedding batch.
+ * Without this, `bufferCount` waits indefinitely for a full batch, which
+ * starves `mergeMap` concurrency when upstream document throughput is slow.
+ */
+const EMBED_BUFFER_FLUSH_MS = 250;
+/** Maximum retry attempts for transient / rate-limit errors per chunk. */
+const MAX_RETRIES = 4;
 /**
  * Main entry point for the embeddings bin.
  * Orchestrates the entire embeddings generation pipeline.
@@ -36,10 +153,12 @@ const defaultIgnore = ['node_modules', '**/node_modules/**', 'dist', '**/dist/**
 export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
   const { framework, options, config, filePatterns } = binOptions;
-  console.log(`📇 Index: ${options.azureSearchIndexName}`);
+  console.log(`📇 Index: ${options.indexName}`);
+  const progress = new ProgressDisplay();
   // Handle clean operation (destructive - deletes all existing documents)
-  const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
+  const vectorStoreService = framework.ai.useIndex(options.indexName);
   if (options.clean && !options.dryRun) {
     console.log('🧹 Cleaning vector store: deleting all existing documents...');
     // OData filter: delete all documents with non-empty source (all indexed docs)
@@ -75,8 +194,8 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
         absolute: true,
       }),
     ).pipe(
-      // Get git status concurrently, then flatten array results
-      mergeMap((path) => getFileStatus(path)),
+      // Get git status concurrently (capped to avoid spawning too many git processes)
+      mergeMap((path) => getFileStatus(path), GIT_CONCURRENCY),
       concatMap((files) => from(files)),
       // Share stream for multiple subscribers (removedFiles$ and indexFiles$)
       shareReplay({ refCount: true }),
@@ -92,6 +211,7 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
   ];
   // Process files: enrich with metadata and filter by allowed patterns
+  let fileCount = 0;
   const processedFiles$ = files$.pipe(
     map((file) => {
       const { filepath, status } = file;
@@ -109,6 +229,11 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
       const matches = multimatch(file.relativePath, allowedFilePatterns);
       return matches.length > 0;
     }),
+    tap((file) => {
+      fileCount++;
+      const label = file.status === 'removed' ? '🗑️' : '📄';
+      progress.update(LINE_PARSE, `${label} Parsing [${fileCount}] ${file.relativePath}`);
+    }),
     // Share for multiple subscribers (removedFiles$, markdown$, typescript$)
     shareReplay({ refCount: true }),
   );
@@ -134,6 +259,7 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
     return false;
   };
+  let docCount = 0;
   const rawFiles$ = indexFiles$.pipe(
     filter(isRawFile),
     map((file): DocumentEntry => {
@@ -145,6 +271,8 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
           type: 'raw',
         },
       };
+      docCount++;
+      progress.update(LINE_PARSE, `📄 Parsing [${docCount}] ${file.relativePath}`);
       return { status: file.status, documents: [document] };
     }),
   );
@@ -154,6 +282,8 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
     filter((file) => isMarkdownFile(file.path)),
     mergeMap(async (file) => {
       const documents = await parseMarkdownFile(file);
+      docCount++;
+      progress.update(LINE_PARSE, `📄 Parsing [${docCount}] ${file.relativePath}`);
       return { status: file.status, documents };
     }),
   );
@@ -163,105 +293,134 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
     filter((file) => isTypescriptFile(file.path)),
     map((file) => {
       const documents = parseTsDocFromFileSync(file);
+      docCount++;
+      progress.update(LINE_PARSE, `📄 Parsing [${docCount}] ${file.relativePath}`);
       return { status: file.status, documents };
     }),
   );
+  // Merge parsed streams and signal when all parsing is done
+  const parsed$ = merge(rawFiles$, markdown$, typescript$).pipe(
+    finalize(() => {
+      progress.succeed(LINE_PARSE, `📄 Parsed ${docCount} files`);
+    }),
+  );
   // Apply metadata to documents
-  const applyMetadata$ = applyMetadata(merge(rawFiles$, markdown$, typescript$), config.index);
-  // Generate embeddings with concurrency limit and retry on rate-limit (429) errors
-  const embeddingService = framework.ai.getService('embeddings', options.openaiEmbeddingDeployment);
-  /** Maximum parallel embedding requests to avoid hitting Azure OpenAI TPM limits. */
-  const EMBEDDING_CONCURRENCY = 5;
-  /** Maximum retry attempts for transient / rate-limit errors per chunk. */
-  const MAX_RETRIES = 4;
-  const applyEmbedding$ = applyMetadata$.pipe(
-    mergeMap((documents) =>
-      from(documents).pipe(
-        // Limit concurrency to avoid overwhelming the embedding API
-        mergeMap(
-          (document) =>
-            from(embeddingService.embedQuery(document.pageContent)).pipe(
-              retry({
-                count: MAX_RETRIES,
-                delay: (error, retryIndex) => {
-                  // Parse Retry-After header when available (Azure sends seconds)
-                  const retryAfterSec =
-                    error?.response?.headers?.get?.('retry-after') ??
-                    error?.responseHeaders?.['retry-after'];
-                  const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
-                  // Exponential backoff: 2s, 4s, 8s, 16s — or Retry-After if larger
-                  const backoffMs = 2 ** retryIndex * 1000;
-                  const delayMs = Math.max(backoffMs, retryAfterMs);
-                  console.warn(
-                    `⏳ Retry ${retryIndex}/${MAX_RETRIES} for "${document.metadata.source}" in ${delayMs}ms`,
-                  );
-                  return timer(delayMs);
-                },
-              }),
-              map((embeddings) => {
-                console.log('embedding document', document.metadata.source);
-                const metadata = { ...document.metadata, embedding: embeddings };
-                return { ...document, metadata };
-              }),
-            ),
-          EMBEDDING_CONCURRENCY,
+  let metadataCount = 0;
+  let metadataDone = false;
+  const applyMetadata$ = applyMetadata(parsed$, config.index, (source) => {
+    metadataCount++;
+    progress.update(LINE_META, `🏷️  Metadata [${metadataCount}] ${source}`);
+  }).pipe(
+    finalize(() => {
+      metadataDone = true;
+      progress.succeed(LINE_META, `🏷️  Metadata ${metadataCount} documents`);
+    }),
+  );
+  // Resolve promoted schema fields (if schema is configured) — runs after
+  // metadata enrichment so the resolver has access to git, package, and
+  // custom attributes from attributeProcessor
+  const applySchema$ = applySchema(applyMetadata$, config.index?.schema);
+  // Generate embeddings in batches with retry on rate-limit (429) errors
+  const embeddingService = framework.ai.useEmbed(options.embedModel);
+  let embeddedCount = 0;
+  let embeddingDone = false;
+  const applyEmbedding$ = applySchema$.pipe(
+    // Flatten all documents from file-level batches, then re-batch for the API
+    concatMap((documents) => from(documents)),
+    // Flush when EMBED_BATCH_SIZE docs accumulate OR after EMBED_BUFFER_FLUSH_MS,
+    // whichever comes first — prevents upstream starvation from blocking concurrency
+    bufferTime(EMBED_BUFFER_FLUSH_MS, null, EMBED_BATCH_SIZE),
+    filter((batch) => batch.length > 0),
+    mergeMap(
+      (batch) =>
+        from(embeddingService.embedDocuments(batch.map((d) => d.pageContent))).pipe(
+          retry({
+            count: MAX_RETRIES,
+            delay: (error, retryIndex) => {
+              // Auth errors are terminal — abort immediately with actionable message
+              if (error?.name === 'NoAccountsError') {
+                console.error(
+                  '\n🔒 Authentication expired. Run `ffc auth login` then retry with `--diff`.',
+                );
+                throw error;
+              }
+              const retryAfterSec =
+                error?.response?.headers?.get?.('retry-after') ??
+                error?.responseHeaders?.['retry-after'];
+              const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
+              const backoffMs = 2 ** retryIndex * 1000;
+              const delayMs = Math.max(backoffMs, retryAfterMs);
+              console.warn(
+                `\n⏳ Retry ${retryIndex}/${MAX_RETRIES} for batch of ${batch.length} in ${delayMs}ms`,
+              );
+              return timer(delayMs);
+            },
+          }),
+          map((allEmbeddings) => {
+            return batch.map((document, i) => {
+              embeddedCount++;
+              const total = metadataDone ? metadataCount : 0;
+              const pct = total > 0 ? ` ${Math.round((embeddedCount / total) * 100)}%` : '';
+              const denominator = total > 0 ? `/${total}` : '';
+              progress.update(
+                LINE_EMBED,
+                `🧠 Embedding [${embeddedCount}${denominator}]${pct} — ${document.metadata.source}`,
+              );
+              const metadata = { ...document.metadata, embedding: allEmbeddings[i] };
+              return { ...document, metadata };
+            });
+          }),
         ),
-        toArray(),
-      ),
+      EMBED_BATCH_CONCURRENCY,
     ),
+    finalize(() => {
+      embeddingDone = true;
+      progress.succeed(LINE_EMBED, `🧠 Embedded ${embeddedCount} documents`);
+    }),
   );
-  // Update vector store
+  // Update vector store — batch documents and upsert concurrently
   const upsert$ = applyEmbedding$.pipe(
+    // Flatten file-level batches, then re-batch into groups of 20 for bulk upsert
+    concatMap((documents) => from(documents)),
+    bufferCount(20),
     mergeMap(async (documents) => {
-      const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
+      const vectorStoreService = framework.ai.useIndex(options.indexName);
       if (documents.length === 0) {
         return undefined;
       }
-      for (const document of documents) {
-        console.log(`Adding entry [${document.id}] to vector store`, document.metadata.source);
-      }
       if (!options.dryRun) {
-        // For multiple chunks from same file, delete existing chunks first
-        if (documents.length > 1) {
-          const sources = documents
-            .map((document) => document.metadata.source)
-            .reduce((acc, source) => acc.add(source), new Set<string>());
-          const filterExpression = Array.from(sources)
-            .map((source) => `metadata/source eq '${source}'`)
-            .join(' or ');
-          // Fire-and-forget deletion (not awaited) - brief gap before new docs are indexed
-          vectorStoreService.deleteDocuments({ filter: { filterExpression } });
-        }
         await vectorStoreService.addDocuments(documents);
       }
       return {
         status: 'added',
         documents,
       } as UpdateVectorStoreResult;
-    }),
+    }, UPSERT_CONCURRENCY),
     filter((result): result is UpdateVectorStoreResult => Boolean(result)),
   );
   // Execute pipeline
   // Track indexing results for reporting: deleted file paths and added document IDs
+  let indexedCount = 0;
   const indexingResults: { deleted: string[]; added: { source: string; id: string }[] } = {
     deleted: [],
     added: [],
   };
-  // Execute pipeline: concat ensures deletions happen before additions
-  // This subscription triggers lazy RxJS execution and tracks all results
-  concat(delete$, upsert$).subscribe({
+  // Execute pipeline: merge runs deletions and additions concurrently so
+  // the embedding pipeline can start as soon as metadata-enriched documents
+  // are available, without waiting for all file discovery to complete.
+  progress.start(4);
+  merge(delete$, upsert$).subscribe({
     next: (result) => {
       // Track deleted files by relative path
       if (result.status === 'deleted') {
@@ -275,6 +434,12 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
             id: document.id,
           })),
         );
+        indexedCount += result.documents.length;
+        // Use embeddedCount as denominator — only show % once embedding is done
+        const total = embeddingDone ? embeddedCount : 0;
+        const pct = total > 0 ? ` ${Math.round((indexedCount / total) * 100)}%` : '';
+        const denominator = total > 0 ? `/${total}` : '';
+        progress.update(LINE_INDEX, `📤 Indexed [${indexedCount}${denominator}]${pct}`);
       }
     },
     error: (error) => {
@@ -282,8 +447,15 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
       process.exit(1);
     },
     complete: () => {
-      // Pipeline completed - log results and exit
-      console.log('🗂️ Indexing results:', indexingResults);
+      // Clear the progress block before final output
+      progress.clear();
+      // Pipeline completed - log summary
+      if (indexingResults.deleted.length > 0) {
+        console.log(`🗑️  Deleted: ${indexingResults.deleted.length} files`);
+      }
+      if (indexingResults.added.length > 0) {
+        console.log(`📥 Indexed: ${indexingResults.added.length} documents`);
+      }
       console.log('✅ Embeddings generation completed!');
       process.exit(0);
     },

package/src/config.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
 import type { FusionAIConfig } from '@equinor/fusion-framework-cli-plugin-ai-base';
+import type { IndexSchemaConfig } from './schema.js';
 /**
  * Index-specific configuration for Fusion AI document indexing operations.
@@ -55,7 +56,21 @@ export interface IndexConfig {
     chunkSize?: number;
     /** Number of overlapping tokens between consecutive chunks. */
     chunkOverlap?: number;
+    /** Explicit vector dimensions for custom embedding models not in the known model map. */
+    dimensions?: number;
   };
+  /**
+   * Custom index schema that promotes frequently-filtered metadata to
+   * top-level Azure AI Search fields.
+   *
+   * When defined, the schema resolver runs after metadata enrichment and
+   * places resolved values as top-level document fields in Azure Search,
+   * enabling direct OData filters without the `any()` operator.
+   *
+   * @see {@link IndexSchemaConfig} for details and examples.
+   */
+  schema?: IndexSchemaConfig;
 }
 /**