@equinor/fusion-framework-cli-plugin-ai-index 1.0.6 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/CHANGELOG.md +74 -2
  2. package/README.md +105 -69
  3. package/dist/esm/bin/embed.js +28 -13
  4. package/dist/esm/bin/embed.js.map +1 -1
  5. package/dist/esm/delete-command.js +100 -0
  6. package/dist/esm/delete-command.js.map +1 -0
  7. package/dist/esm/delete-command.options.js +43 -0
  8. package/dist/esm/delete-command.options.js.map +1 -0
  9. package/dist/esm/{command.js → embeddings-command.js} +42 -28
  10. package/dist/esm/embeddings-command.js.map +1 -0
  11. package/dist/esm/{command.options.js → embeddings-command.options.js} +14 -7
  12. package/dist/esm/embeddings-command.options.js.map +1 -0
  13. package/dist/esm/index.js +37 -4
  14. package/dist/esm/index.js.map +1 -1
  15. package/dist/esm/search-command.js +198 -0
  16. package/dist/esm/search-command.js.map +1 -0
  17. package/dist/esm/utils/generate-chunk-id.js +17 -5
  18. package/dist/esm/utils/generate-chunk-id.js.map +1 -1
  19. package/dist/esm/utils/git/file-changes.js +26 -11
  20. package/dist/esm/utils/git/file-changes.js.map +1 -1
  21. package/dist/esm/utils/git/git-client.js +16 -7
  22. package/dist/esm/utils/git/git-client.js.map +1 -1
  23. package/dist/esm/utils/git/metadata.js +7 -3
  24. package/dist/esm/utils/git/metadata.js.map +1 -1
  25. package/dist/esm/utils/git/status.js +9 -3
  26. package/dist/esm/utils/git/status.js.map +1 -1
  27. package/dist/esm/utils/markdown/parser.js +53 -13
  28. package/dist/esm/utils/markdown/parser.js.map +1 -1
  29. package/dist/esm/utils/package-resolver.js +10 -6
  30. package/dist/esm/utils/package-resolver.js.map +1 -1
  31. package/dist/esm/utils/ts-doc/constants.js +4 -1
  32. package/dist/esm/utils/ts-doc/constants.js.map +1 -1
  33. package/dist/esm/utils/ts-doc/extractors.js +27 -13
  34. package/dist/esm/utils/ts-doc/extractors.js.map +1 -1
  35. package/dist/esm/utils/ts-doc/parser.js +19 -10
  36. package/dist/esm/utils/ts-doc/parser.js.map +1 -1
  37. package/dist/esm/version.js +1 -1
  38. package/dist/tsconfig.tsbuildinfo +1 -1
  39. package/dist/types/config.d.ts +51 -10
  40. package/dist/types/delete-command.d.ts +9 -0
  41. package/dist/types/delete-command.options.d.ts +32 -0
  42. package/dist/types/embeddings-command.d.ts +11 -0
  43. package/dist/types/embeddings-command.options.d.ts +40 -0
  44. package/dist/types/index.d.ts +19 -2
  45. package/dist/types/search-command.d.ts +8 -0
  46. package/dist/types/utils/generate-chunk-id.d.ts +17 -5
  47. package/dist/types/utils/git/file-changes.d.ts +26 -11
  48. package/dist/types/utils/git/git-client.d.ts +16 -7
  49. package/dist/types/utils/git/metadata.d.ts +7 -3
  50. package/dist/types/utils/git/status.d.ts +9 -3
  51. package/dist/types/utils/git/types.d.ts +15 -9
  52. package/dist/types/utils/markdown/parser.d.ts +23 -10
  53. package/dist/types/utils/markdown/types.d.ts +13 -2
  54. package/dist/types/utils/package-resolver.d.ts +8 -5
  55. package/dist/types/utils/ts-doc/constants.d.ts +4 -1
  56. package/dist/types/utils/ts-doc/extractors.d.ts +27 -13
  57. package/dist/types/utils/ts-doc/parser.d.ts +19 -10
  58. package/dist/types/utils/ts-doc/types.d.ts +12 -4
  59. package/dist/types/utils/types.d.ts +10 -6
  60. package/dist/types/version.d.ts +1 -1
  61. package/package.json +12 -10
  62. package/src/bin/delete-removed-files.ts +1 -1
  63. package/src/bin/embed.ts +47 -18
  64. package/src/bin/file-stream.ts +1 -1
  65. package/src/bin/get-diff.ts +1 -1
  66. package/src/bin/types.ts +1 -1
  67. package/src/config.ts +52 -10
  68. package/src/delete-command.options.ts +51 -0
  69. package/src/delete-command.ts +117 -0
  70. package/src/{command.options.ts → embeddings-command.options.ts} +16 -9
  71. package/src/{command.ts → embeddings-command.ts} +46 -28
  72. package/src/index.ts +38 -4
  73. package/src/search-command.ts +259 -0
  74. package/src/utils/generate-chunk-id.ts +17 -5
  75. package/src/utils/git/file-changes.ts +26 -11
  76. package/src/utils/git/git-client.ts +16 -7
  77. package/src/utils/git/metadata.ts +7 -3
  78. package/src/utils/git/status.ts +9 -3
  79. package/src/utils/git/types.ts +15 -9
  80. package/src/utils/markdown/parser.ts +54 -13
  81. package/src/utils/markdown/types.ts +13 -2
  82. package/src/utils/package-resolver.ts +10 -6
  83. package/src/utils/ts-doc/constants.ts +4 -1
  84. package/src/utils/ts-doc/extractors.ts +27 -13
  85. package/src/utils/ts-doc/parser.ts +19 -10
  86. package/src/utils/ts-doc/types.ts +12 -4
  87. package/src/utils/types.ts +10 -6
  88. package/src/version.ts +1 -1
  89. package/dist/esm/command.js.map +0 -1
  90. package/dist/esm/command.options.js.map +0 -1
  91. package/dist/types/command.d.ts +0 -2
  92. package/dist/types/command.options.d.ts +0 -33
@@ -1,20 +1,28 @@
1
1
  import type { VectorStoreDocument, VectorStoreDocumentMetadata } from '@equinor/fusion-framework-module-ai/lib';
2
2
  /**
3
- * TypeScript document metadata
3
+ * Metadata shape for documents generated from TypeScript source files.
4
+ *
5
+ * Extends the base vector-store metadata with TSDoc-specific fields.
4
6
  */
5
7
  export type TypescriptMetadata = VectorStoreDocumentMetadata<{
8
+ /** Discriminator identifying the document as extracted from TSDoc. */
6
9
  type: 'tsdoc';
10
+ /** The `ts-morph` syntax-kind name (e.g. `'FunctionDeclaration'`). */
7
11
  ts_kind: string;
12
+ /** Name of the TypeScript symbol (function, class, interface, etc.). */
8
13
  ts_name: string;
9
14
  }>;
10
15
  /**
11
- * TypeScript document with TSDoc metadata
16
+ * A vector-store document originating from a TypeScript source file.
17
+ *
18
+ * Contains the extracted TSDoc comment (and optionally the type signature)
19
+ * together with {@link TypescriptMetadata}.
12
20
  */
13
21
  export type TypescriptDocument = VectorStoreDocument<TypescriptMetadata>;
14
22
  /**
15
- * Options for parsing TypeScript documents
23
+ * Options for controlling TypeScript document parsing behaviour.
16
24
  */
17
25
  export interface ParseTsDocOptions {
18
- /** The project root path for generating relative paths */
26
+ /** Absolute path to the project root, used to compute relative source paths. */
19
27
  projectRoot?: string;
20
28
  }
@@ -1,17 +1,21 @@
1
1
  /**
2
- * File change status in git
2
+ * Git-tracked change status of a source file.
3
+ *
4
+ * - `'new'` — file is untracked or newly added.
5
+ * - `'modified'` — file is tracked and has been changed.
6
+ * - `'removed'` — file has been deleted.
3
7
  */
4
8
  export type FileStatus = 'new' | 'modified' | 'removed';
5
9
  /**
6
- * Source file information for processing
10
+ * Represents a source file to be indexed, enriched with path and git status info.
7
11
  */
8
12
  export type SourceFile = {
9
- /** Absolute file path */
13
+ /** Absolute file system path. */
10
14
  path: string;
11
- /** Project root directory (git repository root) */
15
+ /** Absolute path to the git repository root. */
12
16
  projectRoot?: string;
13
- /** Relative path from project root */
17
+ /** Path relative to {@link projectRoot}. */
14
18
  relativePath?: string;
15
- /** Git change status */
19
+ /** Current git change status. */
16
20
  status: FileStatus;
17
21
  };
@@ -1 +1 @@
1
- export declare const version = "1.0.6";
1
+ export declare const version = "2.0.0";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@equinor/fusion-framework-cli-plugin-ai-index",
3
- "version": "1.0.6",
3
+ "version": "2.0.0",
4
4
  "description": "AI indexing plugin for Fusion Framework CLI providing document embedding and chunking utilities",
5
5
  "main": "dist/esm/index.js",
6
6
  "type": "module",
@@ -39,6 +39,7 @@
39
39
  },
40
40
  "dependencies": {
41
41
  "@azure/search-documents": "^12.2.0",
42
+ "@langchain/core": "^1.0.1",
42
43
  "@langchain/textsplitters": "^1.0.0",
43
44
  "commander": "^14.0.1",
44
45
  "find-up": "^8.0.0",
@@ -47,22 +48,23 @@
47
48
  "multimatch": "^8.0.0",
48
49
  "read-package-up": "^12.0.0",
49
50
  "rxjs": "^7.8.1",
50
- "simple-git": "^3.28.0",
51
+ "simple-git": "^3.32.3",
51
52
  "tree-sitter": "^0.25.0",
52
53
  "tree-sitter-typescript": "^0.23.2",
53
54
  "ts-morph": "^27.0.2",
54
- "zod": "^4.3.5",
55
- "@equinor/fusion-framework-cli-plugin-ai-base": "1.0.5",
56
- "@equinor/fusion-framework-module": "5.0.6",
57
- "@equinor/fusion-imports": "1.1.11",
58
- "@equinor/fusion-framework-module-ai": "2.0.2"
55
+ "zod": "^4.3.6",
56
+ "@equinor/fusion-framework-cli-plugin-ai-base": "2.0.0",
57
+ "@equinor/fusion-framework-module": "6.0.0",
58
+ "@equinor/fusion-framework-module-ai": "3.0.0",
59
+ "@equinor/fusion-imports": "2.0.0"
59
60
  },
60
61
  "peerDependencies": {
61
- "@equinor/fusion-framework-cli": "^13.3.16"
62
+ "@equinor/fusion-framework-cli": "^14.0.0"
62
63
  },
63
64
  "devDependencies": {
64
- "typescript": "^5.8.2",
65
- "vitest": "^3.2.4"
65
+ "typescript": "^5.9.3",
66
+ "vitest": "^4.1.0",
67
+ "@equinor/fusion-framework-cli": "^14.0.0"
66
68
  },
67
69
  "scripts": {
68
70
  "build": "tsc -b",
@@ -2,7 +2,7 @@ import { map, mergeMap, toArray, filter } from 'rxjs';
2
2
  import type { Observable } from 'rxjs';
3
3
  import type { FrameworkInstance } from '@equinor/fusion-framework-cli-plugin-ai-base';
4
4
  import type { ProcessedFile, DeleteRemovedFilesResult } from './types.js';
5
- import type { CommandOptions } from '../command.options.js';
5
+ import type { CommandOptions } from '../embeddings-command.options.js';
6
6
 
7
7
  /**
8
8
  * Creates a stream that deletes removed files from the vector store.
package/src/bin/embed.ts CHANGED
@@ -1,8 +1,8 @@
1
1
  import { globbyStream } from 'globby';
2
2
  import { relative } from 'node:path';
3
3
  import multimatch from 'multimatch';
4
- import { concat, from, merge } from 'rxjs';
5
- import { concatMap, filter, map, mergeMap, shareReplay, toArray } from 'rxjs/operators';
4
+ import { concat, from, merge, timer } from 'rxjs';
5
+ import { concatMap, filter, map, mergeMap, retry, shareReplay, toArray } from 'rxjs/operators';
6
6
 
7
7
  import { isMarkdownFile, parseMarkdownFile } from '../utils/markdown/index.js';
8
8
  import { getFileStatus, resolveProjectRoot } from '../utils/git/index.js';
@@ -36,6 +36,8 @@ const defaultIgnore = ['node_modules', '**/node_modules/**', 'dist', '**/dist/**
36
36
  export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
37
37
  const { framework, options, config, filePatterns } = binOptions;
38
38
 
39
+ console.log(`📇 Index: ${options.azureSearchIndexName}`);
40
+
39
41
  // Handle clean operation (destructive - deletes all existing documents)
40
42
  const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
41
43
  if (options.clean && !options.dryRun) {
@@ -62,11 +64,14 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
62
64
  // to prevent traversing these directories entirely.
63
65
  const ignore = config.index?.ignore ?? defaultIgnore;
64
66
 
67
+ // Respect .gitignore by default; configs targeting build artifacts can opt out.
68
+ const gitignore = config.index?.gitignore ?? true;
69
+
65
70
  return from(
66
71
  globbyStream(filePatterns, {
67
72
  ignore,
68
73
  onlyFiles: true,
69
- gitignore: true,
74
+ gitignore,
70
75
  absolute: true,
71
76
  }),
72
77
  ).pipe(
@@ -165,25 +170,49 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
165
170
  // Apply metadata to documents
166
171
  const applyMetadata$ = applyMetadata(merge(rawFiles$, markdown$, typescript$), config.index);
167
172
 
168
- // Generate embeddings
173
+ // Generate embeddings with concurrency limit and retry on rate-limit (429) errors
169
174
  const embeddingService = framework.ai.getService('embeddings', options.openaiEmbeddingDeployment);
175
+
176
+ /** Maximum parallel embedding requests to avoid hitting Azure OpenAI TPM limits. */
177
+ const EMBEDDING_CONCURRENCY = 5;
178
+
179
+ /** Maximum retry attempts for transient / rate-limit errors per chunk. */
180
+ const MAX_RETRIES = 4;
181
+
170
182
  const applyEmbedding$ = applyMetadata$.pipe(
171
183
  mergeMap((documents) =>
172
184
  from(documents).pipe(
173
- mergeMap(async (document) => {
174
- console.log('embedding document', document.metadata.source);
175
- const embeddings = await embeddingService
176
- .embedQuery(document.pageContent)
177
- .catch((error) => {
178
- console.error(
179
- `❌ Error: ${error instanceof Error ? error.message : 'Unknown error'}`,
180
- );
181
- console.error('document', document);
182
- process.exit(1);
183
- });
184
- const metadata = { ...document.metadata, embedding: embeddings };
185
- return { ...document, metadata };
186
- }),
185
+ // Limit concurrency to avoid overwhelming the embedding API
186
+ mergeMap(
187
+ (document) =>
188
+ from(embeddingService.embedQuery(document.pageContent)).pipe(
189
+ retry({
190
+ count: MAX_RETRIES,
191
+ delay: (error, retryIndex) => {
192
+ // Parse Retry-After header when available (Azure sends seconds)
193
+ const retryAfterSec =
194
+ error?.response?.headers?.get?.('retry-after') ??
195
+ error?.responseHeaders?.['retry-after'];
196
+ const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
197
+
198
+ // Exponential backoff: 2s, 4s, 8s, 16s — or Retry-After if larger
199
+ const backoffMs = 2 ** retryIndex * 1000;
200
+ const delayMs = Math.max(backoffMs, retryAfterMs);
201
+
202
+ console.warn(
203
+ `⏳ Retry ${retryIndex}/${MAX_RETRIES} for "${document.metadata.source}" in ${delayMs}ms`,
204
+ );
205
+ return timer(delayMs);
206
+ },
207
+ }),
208
+ map((embeddings) => {
209
+ console.log('embedding document', document.metadata.source);
210
+ const metadata = { ...document.metadata, embedding: embeddings };
211
+ return { ...document, metadata };
212
+ }),
213
+ ),
214
+ EMBEDDING_CONCURRENCY,
215
+ ),
187
216
  toArray(),
188
217
  ),
189
218
  ),
@@ -3,7 +3,7 @@ import { from, mergeMap, concatMap, shareReplay } from 'rxjs';
3
3
  import type { Observable } from 'rxjs';
4
4
  import { getFileStatus } from '../utils/git/index.js';
5
5
  import type { ChangedFile } from '../utils/git/index.js';
6
- import type { CommandOptions } from '../command.options.js';
6
+ import type { CommandOptions } from '../embeddings-command.options.js';
7
7
 
8
8
  /**
9
9
  * Creates a file stream based on diff mode or glob patterns.
@@ -1,6 +1,6 @@
1
1
  import type { ChangedFile } from '../utils/git/index.js';
2
2
  import { getChangedFiles, getGitStatus } from '../utils/git/index.js';
3
- import type { CommandOptions } from '../command.options.js';
3
+ import type { CommandOptions } from '../embeddings-command.options.js';
4
4
 
5
5
  /**
6
6
  * Handles diff-based processing to get changed files from git.
package/src/bin/types.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
2
2
  import type { ChangedFile } from '../utils/git/index.js';
3
- import type { CommandOptions } from '../command.options.js';
3
+ import type { CommandOptions } from '../embeddings-command.options.js';
4
4
  import type { FrameworkInstance } from '@equinor/fusion-framework-cli-plugin-ai-base';
5
5
  import type { FusionAIConfigWithIndex } from '../config.js';
6
6
 
package/src/config.ts CHANGED
@@ -2,38 +2,80 @@ import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/li
2
2
  import type { FusionAIConfig } from '@equinor/fusion-framework-cli-plugin-ai-base';
3
3
 
4
4
  /**
5
- * Index-specific configuration for Fusion AI operations
5
+ * Index-specific configuration for Fusion AI document indexing operations.
6
+ *
7
+ * Controls which files are collected, how they are chunked, and what metadata
8
+ * is attached before being sent to the Azure AI Search vector store.
9
+ *
10
+ * @example
11
+ * ```ts
12
+ * const indexConfig: IndexConfig = {
13
+ * patterns: ['src/\**\/*.ts', 'docs/\**\/*.md'],
14
+ * ignore: ['dist/\**', 'node_modules/\**'],
15
+ * metadata: { resolvePackage: true, resolveGit: true },
16
+ * embedding: { chunkSize: 2000, chunkOverlap: 300 },
17
+ * };
18
+ * ```
6
19
  */
7
20
  export interface IndexConfig {
21
+ /** Azure Cognitive Search index name. Overridden by the `--azure-search-index-name` CLI flag. */
22
+ name?: string;
23
+ /** Azure OpenAI embedding deployment name. Overridden by the `--openai-embedding-deployment` CLI flag. */
24
+ model?: string;
25
+ // Glob patterns for files to process (defaults to ['**/*.ts', '**/*.md', '**/*.mdx']).
8
26
  patterns?: string[];
9
- /** Files will be processed as is, without any chunking or transformation */
27
+ /** Glob patterns for files that should be indexed as-is, without chunking or transformation. */
10
28
  rawPatterns?: string[];
11
- /** Globby patterns to ignored, only used when providing paths to the command */
29
+ /** Glob patterns to ignore only applied when file paths are provided to the command. */
12
30
  ignore?: string[];
13
- /** Metadata processing configuration */
31
+ /** Respect `.gitignore` rules when globbing files. Defaults to `true`. Set to `false` for build-output directories that are gitignored. */
32
+ gitignore?: boolean;
33
+ /** Metadata processing configuration. */
14
34
  metadata?: {
15
- /** Automatically resolve package information from source file paths */
35
+ /** Automatically resolve the nearest `package.json` and attach package name/version/keywords. */
16
36
  resolvePackage?: boolean;
37
+ /** Resolve git metadata (commit hash, date, permalink) for each source file. Defaults to `true`. */
17
38
  resolveGit?: boolean;
18
- /** Custom metadata processors to transform metadata before embedding */
39
+ /**
40
+ * Custom callback to transform document attributes before embedding.
41
+ *
42
+ * @param metadata - The current attribute map for the document.
43
+ * @param document - The full vector-store document being processed.
44
+ * @returns The transformed attribute map.
45
+ */
19
46
  attributeProcessor?: (
20
47
  metadata: Record<string, unknown>,
21
48
  document: VectorStoreDocument,
22
49
  ) => Record<string, unknown>;
23
50
  };
24
51
 
25
- /** Embedding generation configuration */
52
+ /** Embedding generation configuration. */
26
53
  embedding?: {
27
- /** Size of text chunks for embedding */
54
+ /** Maximum token size of each text chunk sent for embedding generation. */
28
55
  chunkSize?: number;
29
- /** Overlap between chunks */
56
+ /** Number of overlapping tokens between consecutive chunks. */
30
57
  chunkOverlap?: number;
31
58
  };
32
59
  }
33
60
 
34
61
  /**
35
- * Extended Fusion AI configuration with index-specific settings
62
+ * Fusion AI configuration extended with {@link IndexConfig | index-specific settings}.
63
+ *
64
+ * Used as the return type of `configureFusionAI()` when the `ai index add` or
65
+ * `ai index remove` commands are configured.
66
+ *
67
+ * @example
68
+ * ```ts
69
+ * import { configureFusionAI, type FusionAIConfigWithIndex } from '@equinor/fusion-framework-cli-plugin-ai-index';
70
+ *
71
+ * export default configureFusionAI((): FusionAIConfigWithIndex => ({
72
+ * index: {
73
+ * patterns: ['packages/\**\/*.ts', 'packages/\**\/*.md'],
74
+ * },
75
+ * }));
76
+ * ```
36
77
  */
37
78
  export interface FusionAIConfigWithIndex extends FusionAIConfig {
79
+ /** Index-specific configuration for document collection, chunking, and metadata. */
38
80
  index?: IndexConfig;
39
81
  }
@@ -0,0 +1,51 @@
1
+ import { z } from 'zod';
2
+
3
+ import { AiOptionsSchema } from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
4
+
5
+ /**
6
+ * Zod schema for validating options of the `ai index remove` command.
7
+ *
8
+ * Extends the base AI options schema ({@link AiOptionsSchema}) to require
9
+ * Azure Search credentials and the embedding deployment (needed to initialise
10
+ * the vector store service for document removal).
11
+ *
12
+ * @example
13
+ * ```ts
14
+ * const validated = await DeleteOptionsSchema.parseAsync(rawOptions);
15
+ * // validated.dryRun, validated.filter, validated.azureSearchEndpoint, etc.
16
+ * ```
17
+ */
18
+ export const DeleteOptionsSchema = AiOptionsSchema.extend({
19
+ openaiEmbeddingDeployment: z
20
+ .string({ message: 'Embedding deployment name is required to initialise the vector store.' })
21
+ .min(1, 'Embedding deployment name must be a non-empty string.')
22
+ .describe('Azure OpenAI embedding deployment name'),
23
+ azureSearchEndpoint: z
24
+ .string({ message: 'Azure Search endpoint is required for deletion.' })
25
+ .url('Azure Search endpoint must be a valid URL.')
26
+ .min(1, 'Azure Search endpoint must be a non-empty string.')
27
+ .describe('Azure Search endpoint URL'),
28
+ azureSearchApiKey: z
29
+ .string({ message: 'Azure Search API key is required for deletion.' })
30
+ .min(1, 'Azure Search API key must be a non-empty string.')
31
+ .describe('Azure Search API key'),
32
+ azureSearchIndexName: z
33
+ .string({ message: 'Azure Search index name is required for deletion.' })
34
+ .min(1, 'Azure Search index name must be a non-empty string.')
35
+ .describe('Azure Search index name'),
36
+ dryRun: z
37
+ .boolean({ message: 'dryRun must be a boolean value.' })
38
+ .describe('Preview what would be deleted without making changes'),
39
+ filter: z
40
+ .string()
41
+ .min(1, 'Filter expression must be a non-empty string.')
42
+ .optional()
43
+ .describe('Raw OData filter expression for selecting documents to delete'),
44
+ }).describe('Command options for the delete command');
45
+
46
+ /**
47
+ * Validated options for the `ai index remove` command.
48
+ *
49
+ * Inferred from {@link DeleteOptionsSchema}.
50
+ */
51
+ export type DeleteOptions = z.infer<typeof DeleteOptionsSchema>;
@@ -0,0 +1,117 @@
1
+ import { createCommand, createOption } from 'commander';
2
+
3
+ import { setupFramework } from '@equinor/fusion-framework-cli-plugin-ai-base';
4
+ import { withOptions as withAiOptions } from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
5
+
6
+ import { DeleteOptionsSchema, type DeleteOptions } from './delete-command.options.js';
7
+
8
+ /**
9
+ * Builds an OData filter expression from source paths and/or a raw filter.
10
+ *
11
+ * Source paths are joined with `or`; a raw `--filter` expression is used
12
+ * directly. When both are supplied, source-path filters take precedence
13
+ * to prevent unintentional broad deletions.
14
+ *
15
+ * @param sources - Relative file paths to match against `metadata/source`.
16
+ * @param rawFilter - A raw OData filter expression supplied via `--filter`.
17
+ * @returns The combined OData filter string, or `undefined` when neither
18
+ * sources nor a raw filter were provided.
19
+ */
20
+ function buildFilter(sources: string[], rawFilter?: string): string | undefined {
21
+ if (sources.length > 0) {
22
+ return sources.map((s) => `metadata/source eq '${s}'`).join(' or ');
23
+ }
24
+ return rawFilter;
25
+ }
26
+
27
+ /**
28
+ * CLI command: `ai index remove`
29
+ *
30
+ * Removes documents from the Azure AI Search index by source path or OData filter.
31
+ *
32
+ * Use this when you need to remove stale, renamed, or noisy documents from the
33
+ * vector store without running a full re-index.
34
+ *
35
+ * Usage:
36
+ * $ ffc ai index remove [options] [source-paths...]
37
+ *
38
+ * Arguments:
39
+ * source-paths One or more relative file paths whose indexed chunks should
40
+ * be removed (e.g. packages/modules/services/src/foo.ts).
41
+ *
42
+ * Options:
43
+ * --filter <expr> Raw OData filter expression for advanced selection
44
+ * (e.g. "metadata/source eq 'src/old-file.ts'").
45
+ * --dry-run Preview matching documents without deleting them.
46
+ *
47
+ * Examples:
48
+ * # Remove by source paths
49
+ * $ ffc ai index remove src/old-module.ts src/legacy/helper.ts
50
+ *
51
+ * # Preview what would be removed (dry-run)
52
+ * $ ffc ai index remove --dry-run src/old-module.ts
53
+ *
54
+ * # Remove using a raw OData filter
55
+ * $ ffc ai index remove --filter "metadata/source eq 'src/old-module.ts'"
56
+ *
57
+ * # Remove all chunks from a package
58
+ * $ ffc ai index remove --filter "metadata/attributes/any(a: a/key eq 'pkg_name' and a/value eq '@equinor/my-pkg')"
59
+ */
60
+ const _command = createCommand('remove')
61
+ .description('Remove documents from the search index by source path or OData filter')
62
+ .addOption(
63
+ createOption('--dry-run', 'Preview matching documents without deleting them').default(false),
64
+ )
65
+ .addOption(
66
+ createOption(
67
+ '--filter <expression>',
68
+ 'Raw OData filter expression for selecting documents to delete',
69
+ ),
70
+ )
71
+ .argument('[source-paths...]', 'Relative file paths whose indexed chunks should be removed')
72
+ .action(async (sources: string[], commandOptions: DeleteOptions) => {
73
+ const options = await DeleteOptionsSchema.parseAsync(commandOptions);
74
+ const filterExpression = buildFilter(sources, options.filter);
75
+
76
+ if (!filterExpression) {
77
+ throw new Error(
78
+ 'Nothing to delete. Provide source file paths as arguments or pass a --filter expression.',
79
+ );
80
+ }
81
+
82
+ if (sources.length > 0) {
83
+ console.log(`\nTargeting ${sources.length} source path(s):\n`);
84
+ for (const src of sources.sort()) {
85
+ console.log(` ${src}`);
86
+ }
87
+ } else {
88
+ console.log(`\nFilter: ${filterExpression}`);
89
+ }
90
+
91
+ if (options.dryRun) {
92
+ console.log('\n🔍 Dry run — no documents were deleted.');
93
+ console.log(` Would apply filter: ${filterExpression}`);
94
+ return;
95
+ }
96
+
97
+ const framework = await setupFramework(options);
98
+ const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
99
+ await vectorStoreService.deleteDocuments({
100
+ filter: { filterExpression },
101
+ });
102
+
103
+ console.log(`\n✅ Deleted chunks matching filter.`);
104
+ });
105
+
106
+ /**
107
+ * Configured Commander command for the `ai index remove` subcommand.
108
+ *
109
+ * This constant is the fully-configured {@link Command} instance with all
110
+ * AI-specific options (embedding deployment, Azure Search credentials) applied
111
+ * via `withAiOptions`. It is registered with the CLI automatically by
112
+ * {@link registerAiPlugin}.
113
+ */
114
+ export const deleteCommand = withAiOptions(_command, {
115
+ includeEmbedding: true,
116
+ includeSearch: true,
117
+ });
@@ -3,14 +3,21 @@ import { z } from 'zod';
3
3
  import { AiOptionsSchema } from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
4
4
 
5
5
  /**
6
- * Zod schema for validating command options for the embeddings command.
6
+ * Zod schema for validating command options for the `ai index add` command.
7
7
  *
8
- * This schema extends the base AI options schema with embeddings-specific options,
9
- * ensuring type safety and runtime validation of command arguments.
8
+ * Extends the base AI options schema ({@link AiOptionsSchema}) with
9
+ * add-specific options such as `--dry-run`, `--diff`, `--config`,
10
+ * `--base-ref`, and `--clean`.
10
11
  *
11
- * Note: Some optional AI options become required for the embeddings command
12
- * (openaiEmbeddingDeployment, azureSearchEndpoint, azureSearchApiKey, azureSearchIndexName)
13
- * because the command uses withAiOptions with includeEmbedding and includeSearch set to true.
12
+ * Azure Search and embedding options that are optional in the base schema
13
+ * become **required** because the add command always writes to a
14
+ * vector store.
15
+ *
16
+ * @example
17
+ * ```ts
18
+ * const validated = await CommandOptionsSchema.parseAsync(rawOptions);
19
+ * // validated.dryRun, validated.azureSearchEndpoint, etc.
20
+ * ```
14
21
  */
15
22
  export const CommandOptionsSchema = AiOptionsSchema.extend({
16
23
  // Override optional AI options to make them required for embeddings command
@@ -50,9 +57,9 @@ export const CommandOptionsSchema = AiOptionsSchema.extend({
50
57
  }).describe('Command options for the embeddings command');
51
58
 
52
59
  /**
53
- * Type representing the validated command options.
60
+ * Validated options for the `ai index add` command.
54
61
  *
55
- * This type is inferred from the Zod schema and should be used throughout the command
56
- * to ensure type safety and consistency with the schema.
62
+ * Inferred from {@link CommandOptionsSchema} and used as the single
63
+ * source of truth for option types throughout the add/embeddings pipeline.
57
64
  */
58
65
  export type CommandOptions = z.infer<typeof CommandOptionsSchema>;