@equinor/fusion-framework-cli-plugin-ai-index 1.0.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +91 -2
- package/README.md +105 -69
- package/dist/esm/bin/embed.js +28 -13
- package/dist/esm/bin/embed.js.map +1 -1
- package/dist/esm/delete-command.js +100 -0
- package/dist/esm/delete-command.js.map +1 -0
- package/dist/esm/delete-command.options.js +43 -0
- package/dist/esm/delete-command.options.js.map +1 -0
- package/dist/esm/{command.js → embeddings-command.js} +42 -28
- package/dist/esm/embeddings-command.js.map +1 -0
- package/dist/esm/{command.options.js → embeddings-command.options.js} +14 -7
- package/dist/esm/embeddings-command.options.js.map +1 -0
- package/dist/esm/index.js +37 -4
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/search-command.js +198 -0
- package/dist/esm/search-command.js.map +1 -0
- package/dist/esm/utils/generate-chunk-id.js +17 -5
- package/dist/esm/utils/generate-chunk-id.js.map +1 -1
- package/dist/esm/utils/git/file-changes.js +26 -11
- package/dist/esm/utils/git/file-changes.js.map +1 -1
- package/dist/esm/utils/git/git-client.js +16 -7
- package/dist/esm/utils/git/git-client.js.map +1 -1
- package/dist/esm/utils/git/metadata.js +7 -3
- package/dist/esm/utils/git/metadata.js.map +1 -1
- package/dist/esm/utils/git/status.js +9 -3
- package/dist/esm/utils/git/status.js.map +1 -1
- package/dist/esm/utils/markdown/parser.js +53 -13
- package/dist/esm/utils/markdown/parser.js.map +1 -1
- package/dist/esm/utils/package-resolver.js +10 -6
- package/dist/esm/utils/package-resolver.js.map +1 -1
- package/dist/esm/utils/ts-doc/constants.js +4 -1
- package/dist/esm/utils/ts-doc/constants.js.map +1 -1
- package/dist/esm/utils/ts-doc/extractors.js +27 -13
- package/dist/esm/utils/ts-doc/extractors.js.map +1 -1
- package/dist/esm/utils/ts-doc/parser.js +19 -10
- package/dist/esm/utils/ts-doc/parser.js.map +1 -1
- package/dist/esm/version.js +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types/config.d.ts +51 -10
- package/dist/types/delete-command.d.ts +9 -0
- package/dist/types/delete-command.options.d.ts +32 -0
- package/dist/types/embeddings-command.d.ts +11 -0
- package/dist/types/embeddings-command.options.d.ts +40 -0
- package/dist/types/index.d.ts +19 -2
- package/dist/types/search-command.d.ts +8 -0
- package/dist/types/utils/generate-chunk-id.d.ts +17 -5
- package/dist/types/utils/git/file-changes.d.ts +26 -11
- package/dist/types/utils/git/git-client.d.ts +16 -7
- package/dist/types/utils/git/metadata.d.ts +7 -3
- package/dist/types/utils/git/status.d.ts +9 -3
- package/dist/types/utils/git/types.d.ts +15 -9
- package/dist/types/utils/markdown/parser.d.ts +23 -10
- package/dist/types/utils/markdown/types.d.ts +13 -2
- package/dist/types/utils/package-resolver.d.ts +8 -5
- package/dist/types/utils/ts-doc/constants.d.ts +4 -1
- package/dist/types/utils/ts-doc/extractors.d.ts +27 -13
- package/dist/types/utils/ts-doc/parser.d.ts +19 -10
- package/dist/types/utils/ts-doc/types.d.ts +12 -4
- package/dist/types/utils/types.d.ts +10 -6
- package/dist/types/version.d.ts +1 -1
- package/package.json +13 -11
- package/src/bin/delete-removed-files.ts +1 -1
- package/src/bin/embed.ts +47 -18
- package/src/bin/file-stream.ts +1 -1
- package/src/bin/get-diff.ts +1 -1
- package/src/bin/types.ts +1 -1
- package/src/config.ts +52 -10
- package/src/delete-command.options.ts +51 -0
- package/src/delete-command.ts +117 -0
- package/src/{command.options.ts → embeddings-command.options.ts} +16 -9
- package/src/{command.ts → embeddings-command.ts} +46 -28
- package/src/index.ts +38 -4
- package/src/search-command.ts +259 -0
- package/src/utils/generate-chunk-id.ts +17 -5
- package/src/utils/git/file-changes.ts +26 -11
- package/src/utils/git/git-client.ts +16 -7
- package/src/utils/git/metadata.ts +7 -3
- package/src/utils/git/status.ts +9 -3
- package/src/utils/git/types.ts +15 -9
- package/src/utils/markdown/parser.ts +54 -13
- package/src/utils/markdown/types.ts +13 -2
- package/src/utils/package-resolver.ts +10 -6
- package/src/utils/ts-doc/constants.ts +4 -1
- package/src/utils/ts-doc/extractors.ts +27 -13
- package/src/utils/ts-doc/parser.ts +19 -10
- package/src/utils/ts-doc/types.ts +12 -4
- package/src/utils/types.ts +10 -6
- package/src/version.ts +1 -1
- package/dist/esm/command.js.map +0 -1
- package/dist/esm/command.options.js.map +0 -1
- package/dist/types/command.d.ts +0 -2
- package/dist/types/command.options.d.ts +0 -33
|
@@ -1,20 +1,28 @@
|
|
|
1
1
|
import type { VectorStoreDocument, VectorStoreDocumentMetadata } from '@equinor/fusion-framework-module-ai/lib';
|
|
2
2
|
/**
|
|
3
|
-
* TypeScript
|
|
3
|
+
* Metadata shape for documents generated from TypeScript source files.
|
|
4
|
+
*
|
|
5
|
+
* Extends the base vector-store metadata with TSDoc-specific fields.
|
|
4
6
|
*/
|
|
5
7
|
export type TypescriptMetadata = VectorStoreDocumentMetadata<{
|
|
8
|
+
/** Discriminator identifying the document as extracted from TSDoc. */
|
|
6
9
|
type: 'tsdoc';
|
|
10
|
+
/** The `ts-morph` syntax-kind name (e.g. `'FunctionDeclaration'`). */
|
|
7
11
|
ts_kind: string;
|
|
12
|
+
/** Name of the TypeScript symbol (function, class, interface, etc.). */
|
|
8
13
|
ts_name: string;
|
|
9
14
|
}>;
|
|
10
15
|
/**
|
|
11
|
-
*
|
|
16
|
+
* A vector-store document originating from a TypeScript source file.
|
|
17
|
+
*
|
|
18
|
+
* Contains the extracted TSDoc comment (and optionally the type signature)
|
|
19
|
+
* together with {@link TypescriptMetadata}.
|
|
12
20
|
*/
|
|
13
21
|
export type TypescriptDocument = VectorStoreDocument<TypescriptMetadata>;
|
|
14
22
|
/**
|
|
15
|
-
* Options for
|
|
23
|
+
* Options for controlling TypeScript document parsing behaviour.
|
|
16
24
|
*/
|
|
17
25
|
export interface ParseTsDocOptions {
|
|
18
|
-
/**
|
|
26
|
+
/** Absolute path to the project root, used to compute relative source paths. */
|
|
19
27
|
projectRoot?: string;
|
|
20
28
|
}
|
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Git-tracked change status of a source file.
|
|
3
|
+
*
|
|
4
|
+
* - `'new'` — file is untracked or newly added.
|
|
5
|
+
* - `'modified'` — file is tracked and has been changed.
|
|
6
|
+
* - `'removed'` — file has been deleted.
|
|
3
7
|
*/
|
|
4
8
|
export type FileStatus = 'new' | 'modified' | 'removed';
|
|
5
9
|
/**
|
|
6
|
-
*
|
|
10
|
+
* Represents a source file to be indexed, enriched with path and git status info.
|
|
7
11
|
*/
|
|
8
12
|
export type SourceFile = {
|
|
9
|
-
/** Absolute file path */
|
|
13
|
+
/** Absolute file system path. */
|
|
10
14
|
path: string;
|
|
11
|
-
/**
|
|
15
|
+
/** Absolute path to the git repository root. */
|
|
12
16
|
projectRoot?: string;
|
|
13
|
-
/**
|
|
17
|
+
/** Path relative to {@link projectRoot}. */
|
|
14
18
|
relativePath?: string;
|
|
15
|
-
/**
|
|
19
|
+
/** Current git change status. */
|
|
16
20
|
status: FileStatus;
|
|
17
21
|
};
|
package/dist/types/version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const version = "
|
|
1
|
+
export declare const version = "2.0.0";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@equinor/fusion-framework-cli-plugin-ai-index",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"description": "AI indexing plugin for Fusion Framework CLI providing document embedding and chunking utilities",
|
|
5
5
|
"main": "dist/esm/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -39,30 +39,32 @@
|
|
|
39
39
|
},
|
|
40
40
|
"dependencies": {
|
|
41
41
|
"@azure/search-documents": "^12.2.0",
|
|
42
|
+
"@langchain/core": "^1.0.1",
|
|
42
43
|
"@langchain/textsplitters": "^1.0.0",
|
|
43
44
|
"commander": "^14.0.1",
|
|
44
45
|
"find-up": "^8.0.0",
|
|
45
46
|
"globby": "^16.1.0",
|
|
46
47
|
"gray-matter": "^4.0.3",
|
|
47
|
-
"multimatch": "^
|
|
48
|
+
"multimatch": "^8.0.0",
|
|
48
49
|
"read-package-up": "^12.0.0",
|
|
49
50
|
"rxjs": "^7.8.1",
|
|
50
|
-
"simple-git": "^3.
|
|
51
|
+
"simple-git": "^3.32.3",
|
|
51
52
|
"tree-sitter": "^0.25.0",
|
|
52
53
|
"tree-sitter-typescript": "^0.23.2",
|
|
53
54
|
"ts-morph": "^27.0.2",
|
|
54
|
-
"zod": "^4.3.
|
|
55
|
-
"@equinor/fusion-framework-
|
|
56
|
-
"@equinor/fusion-framework-module
|
|
57
|
-
"@equinor/fusion-
|
|
58
|
-
"@equinor/fusion-
|
|
55
|
+
"zod": "^4.3.6",
|
|
56
|
+
"@equinor/fusion-framework-cli-plugin-ai-base": "2.0.0",
|
|
57
|
+
"@equinor/fusion-framework-module": "6.0.0",
|
|
58
|
+
"@equinor/fusion-framework-module-ai": "3.0.0",
|
|
59
|
+
"@equinor/fusion-imports": "2.0.0"
|
|
59
60
|
},
|
|
60
61
|
"peerDependencies": {
|
|
61
|
-
"@equinor/fusion-framework-cli": "^
|
|
62
|
+
"@equinor/fusion-framework-cli": "^14.0.0"
|
|
62
63
|
},
|
|
63
64
|
"devDependencies": {
|
|
64
|
-
"typescript": "^5.
|
|
65
|
-
"vitest": "^
|
|
65
|
+
"typescript": "^5.9.3",
|
|
66
|
+
"vitest": "^4.1.0",
|
|
67
|
+
"@equinor/fusion-framework-cli": "^14.0.0"
|
|
66
68
|
},
|
|
67
69
|
"scripts": {
|
|
68
70
|
"build": "tsc -b",
|
|
@@ -2,7 +2,7 @@ import { map, mergeMap, toArray, filter } from 'rxjs';
|
|
|
2
2
|
import type { Observable } from 'rxjs';
|
|
3
3
|
import type { FrameworkInstance } from '@equinor/fusion-framework-cli-plugin-ai-base';
|
|
4
4
|
import type { ProcessedFile, DeleteRemovedFilesResult } from './types.js';
|
|
5
|
-
import type { CommandOptions } from '../command.options.js';
|
|
5
|
+
import type { CommandOptions } from '../embeddings-command.options.js';
|
|
6
6
|
|
|
7
7
|
/**
|
|
8
8
|
* Creates a stream that deletes removed files from the vector store.
|
package/src/bin/embed.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { globbyStream } from 'globby';
|
|
2
2
|
import { relative } from 'node:path';
|
|
3
3
|
import multimatch from 'multimatch';
|
|
4
|
-
import { concat, from, merge } from 'rxjs';
|
|
5
|
-
import { concatMap, filter, map, mergeMap, shareReplay, toArray } from 'rxjs/operators';
|
|
4
|
+
import { concat, from, merge, timer } from 'rxjs';
|
|
5
|
+
import { concatMap, filter, map, mergeMap, retry, shareReplay, toArray } from 'rxjs/operators';
|
|
6
6
|
|
|
7
7
|
import { isMarkdownFile, parseMarkdownFile } from '../utils/markdown/index.js';
|
|
8
8
|
import { getFileStatus, resolveProjectRoot } from '../utils/git/index.js';
|
|
@@ -36,6 +36,8 @@ const defaultIgnore = ['node_modules', '**/node_modules/**', 'dist', '**/dist/**
|
|
|
36
36
|
export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
37
37
|
const { framework, options, config, filePatterns } = binOptions;
|
|
38
38
|
|
|
39
|
+
console.log(`📇 Index: ${options.azureSearchIndexName}`);
|
|
40
|
+
|
|
39
41
|
// Handle clean operation (destructive - deletes all existing documents)
|
|
40
42
|
const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
|
|
41
43
|
if (options.clean && !options.dryRun) {
|
|
@@ -62,11 +64,14 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
62
64
|
// to prevent traversing these directories entirely.
|
|
63
65
|
const ignore = config.index?.ignore ?? defaultIgnore;
|
|
64
66
|
|
|
67
|
+
// Respect .gitignore by default; configs targeting build artifacts can opt out.
|
|
68
|
+
const gitignore = config.index?.gitignore ?? true;
|
|
69
|
+
|
|
65
70
|
return from(
|
|
66
71
|
globbyStream(filePatterns, {
|
|
67
72
|
ignore,
|
|
68
73
|
onlyFiles: true,
|
|
69
|
-
gitignore
|
|
74
|
+
gitignore,
|
|
70
75
|
absolute: true,
|
|
71
76
|
}),
|
|
72
77
|
).pipe(
|
|
@@ -165,25 +170,49 @@ export async function embed(binOptions: EmbeddingsBinOptions): Promise<void> {
|
|
|
165
170
|
// Apply metadata to documents
|
|
166
171
|
const applyMetadata$ = applyMetadata(merge(rawFiles$, markdown$, typescript$), config.index);
|
|
167
172
|
|
|
168
|
-
// Generate embeddings
|
|
173
|
+
// Generate embeddings with concurrency limit and retry on rate-limit (429) errors
|
|
169
174
|
const embeddingService = framework.ai.getService('embeddings', options.openaiEmbeddingDeployment);
|
|
175
|
+
|
|
176
|
+
/** Maximum parallel embedding requests to avoid hitting Azure OpenAI TPM limits. */
|
|
177
|
+
const EMBEDDING_CONCURRENCY = 5;
|
|
178
|
+
|
|
179
|
+
/** Maximum retry attempts for transient / rate-limit errors per chunk. */
|
|
180
|
+
const MAX_RETRIES = 4;
|
|
181
|
+
|
|
170
182
|
const applyEmbedding$ = applyMetadata$.pipe(
|
|
171
183
|
mergeMap((documents) =>
|
|
172
184
|
from(documents).pipe(
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
.embedQuery(document.pageContent)
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
185
|
+
// Limit concurrency to avoid overwhelming the embedding API
|
|
186
|
+
mergeMap(
|
|
187
|
+
(document) =>
|
|
188
|
+
from(embeddingService.embedQuery(document.pageContent)).pipe(
|
|
189
|
+
retry({
|
|
190
|
+
count: MAX_RETRIES,
|
|
191
|
+
delay: (error, retryIndex) => {
|
|
192
|
+
// Parse Retry-After header when available (Azure sends seconds)
|
|
193
|
+
const retryAfterSec =
|
|
194
|
+
error?.response?.headers?.get?.('retry-after') ??
|
|
195
|
+
error?.responseHeaders?.['retry-after'];
|
|
196
|
+
const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
|
|
197
|
+
|
|
198
|
+
// Exponential backoff: 2s, 4s, 8s, 16s — or Retry-After if larger
|
|
199
|
+
const backoffMs = 2 ** retryIndex * 1000;
|
|
200
|
+
const delayMs = Math.max(backoffMs, retryAfterMs);
|
|
201
|
+
|
|
202
|
+
console.warn(
|
|
203
|
+
`⏳ Retry ${retryIndex}/${MAX_RETRIES} for "${document.metadata.source}" in ${delayMs}ms`,
|
|
204
|
+
);
|
|
205
|
+
return timer(delayMs);
|
|
206
|
+
},
|
|
207
|
+
}),
|
|
208
|
+
map((embeddings) => {
|
|
209
|
+
console.log('embedding document', document.metadata.source);
|
|
210
|
+
const metadata = { ...document.metadata, embedding: embeddings };
|
|
211
|
+
return { ...document, metadata };
|
|
212
|
+
}),
|
|
213
|
+
),
|
|
214
|
+
EMBEDDING_CONCURRENCY,
|
|
215
|
+
),
|
|
187
216
|
toArray(),
|
|
188
217
|
),
|
|
189
218
|
),
|
package/src/bin/file-stream.ts
CHANGED
|
@@ -3,7 +3,7 @@ import { from, mergeMap, concatMap, shareReplay } from 'rxjs';
|
|
|
3
3
|
import type { Observable } from 'rxjs';
|
|
4
4
|
import { getFileStatus } from '../utils/git/index.js';
|
|
5
5
|
import type { ChangedFile } from '../utils/git/index.js';
|
|
6
|
-
import type { CommandOptions } from '../command.options.js';
|
|
6
|
+
import type { CommandOptions } from '../embeddings-command.options.js';
|
|
7
7
|
|
|
8
8
|
/**
|
|
9
9
|
* Creates a file stream based on diff mode or glob patterns.
|
package/src/bin/get-diff.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { ChangedFile } from '../utils/git/index.js';
|
|
2
2
|
import { getChangedFiles, getGitStatus } from '../utils/git/index.js';
|
|
3
|
-
import type { CommandOptions } from '../command.options.js';
|
|
3
|
+
import type { CommandOptions } from '../embeddings-command.options.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* Handles diff-based processing to get changed files from git.
|
package/src/bin/types.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
|
|
2
2
|
import type { ChangedFile } from '../utils/git/index.js';
|
|
3
|
-
import type { CommandOptions } from '../command.options.js';
|
|
3
|
+
import type { CommandOptions } from '../embeddings-command.options.js';
|
|
4
4
|
import type { FrameworkInstance } from '@equinor/fusion-framework-cli-plugin-ai-base';
|
|
5
5
|
import type { FusionAIConfigWithIndex } from '../config.js';
|
|
6
6
|
|
package/src/config.ts
CHANGED
|
@@ -2,38 +2,80 @@ import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/li
|
|
|
2
2
|
import type { FusionAIConfig } from '@equinor/fusion-framework-cli-plugin-ai-base';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
|
-
* Index-specific configuration for Fusion AI operations
|
|
5
|
+
* Index-specific configuration for Fusion AI document indexing operations.
|
|
6
|
+
*
|
|
7
|
+
* Controls which files are collected, how they are chunked, and what metadata
|
|
8
|
+
* is attached before being sent to the Azure AI Search vector store.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```ts
|
|
12
|
+
* const indexConfig: IndexConfig = {
|
|
13
|
+
* patterns: ['src/\**\/*.ts', 'docs/\**\/*.md'],
|
|
14
|
+
* ignore: ['dist/\**', 'node_modules/\**'],
|
|
15
|
+
* metadata: { resolvePackage: true, resolveGit: true },
|
|
16
|
+
* embedding: { chunkSize: 2000, chunkOverlap: 300 },
|
|
17
|
+
* };
|
|
18
|
+
* ```
|
|
6
19
|
*/
|
|
7
20
|
export interface IndexConfig {
|
|
21
|
+
/** Azure Cognitive Search index name. Overridden by the `--azure-search-index-name` CLI flag. */
|
|
22
|
+
name?: string;
|
|
23
|
+
/** Azure OpenAI embedding deployment name. Overridden by the `--openai-embedding-deployment` CLI flag. */
|
|
24
|
+
model?: string;
|
|
25
|
+
// Glob patterns for files to process (defaults to ['**/*.ts', '**/*.md', '**/*.mdx']).
|
|
8
26
|
patterns?: string[];
|
|
9
|
-
/**
|
|
27
|
+
/** Glob patterns for files that should be indexed as-is, without chunking or transformation. */
|
|
10
28
|
rawPatterns?: string[];
|
|
11
|
-
/**
|
|
29
|
+
/** Glob patterns to ignore — only applied when file paths are provided to the command. */
|
|
12
30
|
ignore?: string[];
|
|
13
|
-
/**
|
|
31
|
+
/** Respect `.gitignore` rules when globbing files. Defaults to `true`. Set to `false` for build-output directories that are gitignored. */
|
|
32
|
+
gitignore?: boolean;
|
|
33
|
+
/** Metadata processing configuration. */
|
|
14
34
|
metadata?: {
|
|
15
|
-
/** Automatically resolve package
|
|
35
|
+
/** Automatically resolve the nearest `package.json` and attach package name/version/keywords. */
|
|
16
36
|
resolvePackage?: boolean;
|
|
37
|
+
/** Resolve git metadata (commit hash, date, permalink) for each source file. Defaults to `true`. */
|
|
17
38
|
resolveGit?: boolean;
|
|
18
|
-
/**
|
|
39
|
+
/**
|
|
40
|
+
* Custom callback to transform document attributes before embedding.
|
|
41
|
+
*
|
|
42
|
+
* @param metadata - The current attribute map for the document.
|
|
43
|
+
* @param document - The full vector-store document being processed.
|
|
44
|
+
* @returns The transformed attribute map.
|
|
45
|
+
*/
|
|
19
46
|
attributeProcessor?: (
|
|
20
47
|
metadata: Record<string, unknown>,
|
|
21
48
|
document: VectorStoreDocument,
|
|
22
49
|
) => Record<string, unknown>;
|
|
23
50
|
};
|
|
24
51
|
|
|
25
|
-
/** Embedding generation configuration */
|
|
52
|
+
/** Embedding generation configuration. */
|
|
26
53
|
embedding?: {
|
|
27
|
-
/**
|
|
54
|
+
/** Maximum token size of each text chunk sent for embedding generation. */
|
|
28
55
|
chunkSize?: number;
|
|
29
|
-
/**
|
|
56
|
+
/** Number of overlapping tokens between consecutive chunks. */
|
|
30
57
|
chunkOverlap?: number;
|
|
31
58
|
};
|
|
32
59
|
}
|
|
33
60
|
|
|
34
61
|
/**
|
|
35
|
-
*
|
|
62
|
+
* Fusion AI configuration extended with {@link IndexConfig | index-specific settings}.
|
|
63
|
+
*
|
|
64
|
+
* Used as the return type of `configureFusionAI()` when the `ai index add` or
|
|
65
|
+
* `ai index remove` commands are configured.
|
|
66
|
+
*
|
|
67
|
+
* @example
|
|
68
|
+
* ```ts
|
|
69
|
+
* import { configureFusionAI, type FusionAIConfigWithIndex } from '@equinor/fusion-framework-cli-plugin-ai-index';
|
|
70
|
+
*
|
|
71
|
+
* export default configureFusionAI((): FusionAIConfigWithIndex => ({
|
|
72
|
+
* index: {
|
|
73
|
+
* patterns: ['packages/\**\/*.ts', 'packages/\**\/*.md'],
|
|
74
|
+
* },
|
|
75
|
+
* }));
|
|
76
|
+
* ```
|
|
36
77
|
*/
|
|
37
78
|
export interface FusionAIConfigWithIndex extends FusionAIConfig {
|
|
79
|
+
/** Index-specific configuration for document collection, chunking, and metadata. */
|
|
38
80
|
index?: IndexConfig;
|
|
39
81
|
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
import { AiOptionsSchema } from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Zod schema for validating options of the `ai index remove` command.
|
|
7
|
+
*
|
|
8
|
+
* Extends the base AI options schema ({@link AiOptionsSchema}) to require
|
|
9
|
+
* Azure Search credentials and the embedding deployment (needed to initialise
|
|
10
|
+
* the vector store service for document removal).
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```ts
|
|
14
|
+
* const validated = await DeleteOptionsSchema.parseAsync(rawOptions);
|
|
15
|
+
* // validated.dryRun, validated.filter, validated.azureSearchEndpoint, etc.
|
|
16
|
+
* ```
|
|
17
|
+
*/
|
|
18
|
+
export const DeleteOptionsSchema = AiOptionsSchema.extend({
|
|
19
|
+
openaiEmbeddingDeployment: z
|
|
20
|
+
.string({ message: 'Embedding deployment name is required to initialise the vector store.' })
|
|
21
|
+
.min(1, 'Embedding deployment name must be a non-empty string.')
|
|
22
|
+
.describe('Azure OpenAI embedding deployment name'),
|
|
23
|
+
azureSearchEndpoint: z
|
|
24
|
+
.string({ message: 'Azure Search endpoint is required for deletion.' })
|
|
25
|
+
.url('Azure Search endpoint must be a valid URL.')
|
|
26
|
+
.min(1, 'Azure Search endpoint must be a non-empty string.')
|
|
27
|
+
.describe('Azure Search endpoint URL'),
|
|
28
|
+
azureSearchApiKey: z
|
|
29
|
+
.string({ message: 'Azure Search API key is required for deletion.' })
|
|
30
|
+
.min(1, 'Azure Search API key must be a non-empty string.')
|
|
31
|
+
.describe('Azure Search API key'),
|
|
32
|
+
azureSearchIndexName: z
|
|
33
|
+
.string({ message: 'Azure Search index name is required for deletion.' })
|
|
34
|
+
.min(1, 'Azure Search index name must be a non-empty string.')
|
|
35
|
+
.describe('Azure Search index name'),
|
|
36
|
+
dryRun: z
|
|
37
|
+
.boolean({ message: 'dryRun must be a boolean value.' })
|
|
38
|
+
.describe('Preview what would be deleted without making changes'),
|
|
39
|
+
filter: z
|
|
40
|
+
.string()
|
|
41
|
+
.min(1, 'Filter expression must be a non-empty string.')
|
|
42
|
+
.optional()
|
|
43
|
+
.describe('Raw OData filter expression for selecting documents to delete'),
|
|
44
|
+
}).describe('Command options for the delete command');
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Validated options for the `ai index remove` command.
|
|
48
|
+
*
|
|
49
|
+
* Inferred from {@link DeleteOptionsSchema}.
|
|
50
|
+
*/
|
|
51
|
+
export type DeleteOptions = z.infer<typeof DeleteOptionsSchema>;
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { createCommand, createOption } from 'commander';
|
|
2
|
+
|
|
3
|
+
import { setupFramework } from '@equinor/fusion-framework-cli-plugin-ai-base';
|
|
4
|
+
import { withOptions as withAiOptions } from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
|
|
5
|
+
|
|
6
|
+
import { DeleteOptionsSchema, type DeleteOptions } from './delete-command.options.js';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Builds an OData filter expression from source paths and/or a raw filter.
|
|
10
|
+
*
|
|
11
|
+
* Source paths are joined with `or`; a raw `--filter` expression is used
|
|
12
|
+
* directly. When both are supplied, source-path filters take precedence
|
|
13
|
+
* to prevent unintentional broad deletions.
|
|
14
|
+
*
|
|
15
|
+
* @param sources - Relative file paths to match against `metadata/source`.
|
|
16
|
+
* @param rawFilter - A raw OData filter expression supplied via `--filter`.
|
|
17
|
+
* @returns The combined OData filter string, or `undefined` when neither
|
|
18
|
+
* sources nor a raw filter were provided.
|
|
19
|
+
*/
|
|
20
|
+
function buildFilter(sources: string[], rawFilter?: string): string | undefined {
|
|
21
|
+
if (sources.length > 0) {
|
|
22
|
+
return sources.map((s) => `metadata/source eq '${s}'`).join(' or ');
|
|
23
|
+
}
|
|
24
|
+
return rawFilter;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* CLI command: `ai index remove`
|
|
29
|
+
*
|
|
30
|
+
* Removes documents from the Azure AI Search index by source path or OData filter.
|
|
31
|
+
*
|
|
32
|
+
* Use this when you need to remove stale, renamed, or noisy documents from the
|
|
33
|
+
* vector store without running a full re-index.
|
|
34
|
+
*
|
|
35
|
+
* Usage:
|
|
36
|
+
* $ ffc ai index remove [options] [source-paths...]
|
|
37
|
+
*
|
|
38
|
+
* Arguments:
|
|
39
|
+
* source-paths One or more relative file paths whose indexed chunks should
|
|
40
|
+
* be removed (e.g. packages/modules/services/src/foo.ts).
|
|
41
|
+
*
|
|
42
|
+
* Options:
|
|
43
|
+
* --filter <expr> Raw OData filter expression for advanced selection
|
|
44
|
+
* (e.g. "metadata/source eq 'src/old-file.ts'").
|
|
45
|
+
* --dry-run Preview matching documents without deleting them.
|
|
46
|
+
*
|
|
47
|
+
* Examples:
|
|
48
|
+
* # Remove by source paths
|
|
49
|
+
* $ ffc ai index remove src/old-module.ts src/legacy/helper.ts
|
|
50
|
+
*
|
|
51
|
+
* # Preview what would be removed (dry-run)
|
|
52
|
+
* $ ffc ai index remove --dry-run src/old-module.ts
|
|
53
|
+
*
|
|
54
|
+
* # Remove using a raw OData filter
|
|
55
|
+
* $ ffc ai index remove --filter "metadata/source eq 'src/old-module.ts'"
|
|
56
|
+
*
|
|
57
|
+
* # Remove all chunks from a package
|
|
58
|
+
* $ ffc ai index remove --filter "metadata/attributes/any(a: a/key eq 'pkg_name' and a/value eq '@equinor/my-pkg')"
|
|
59
|
+
*/
|
|
60
|
+
const _command = createCommand('remove')
|
|
61
|
+
.description('Remove documents from the search index by source path or OData filter')
|
|
62
|
+
.addOption(
|
|
63
|
+
createOption('--dry-run', 'Preview matching documents without deleting them').default(false),
|
|
64
|
+
)
|
|
65
|
+
.addOption(
|
|
66
|
+
createOption(
|
|
67
|
+
'--filter <expression>',
|
|
68
|
+
'Raw OData filter expression for selecting documents to delete',
|
|
69
|
+
),
|
|
70
|
+
)
|
|
71
|
+
.argument('[source-paths...]', 'Relative file paths whose indexed chunks should be removed')
|
|
72
|
+
.action(async (sources: string[], commandOptions: DeleteOptions) => {
|
|
73
|
+
const options = await DeleteOptionsSchema.parseAsync(commandOptions);
|
|
74
|
+
const filterExpression = buildFilter(sources, options.filter);
|
|
75
|
+
|
|
76
|
+
if (!filterExpression) {
|
|
77
|
+
throw new Error(
|
|
78
|
+
'Nothing to delete. Provide source file paths as arguments or pass a --filter expression.',
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (sources.length > 0) {
|
|
83
|
+
console.log(`\nTargeting ${sources.length} source path(s):\n`);
|
|
84
|
+
for (const src of sources.sort()) {
|
|
85
|
+
console.log(` ${src}`);
|
|
86
|
+
}
|
|
87
|
+
} else {
|
|
88
|
+
console.log(`\nFilter: ${filterExpression}`);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (options.dryRun) {
|
|
92
|
+
console.log('\n🔍 Dry run — no documents were deleted.');
|
|
93
|
+
console.log(` Would apply filter: ${filterExpression}`);
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const framework = await setupFramework(options);
|
|
98
|
+
const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
|
|
99
|
+
await vectorStoreService.deleteDocuments({
|
|
100
|
+
filter: { filterExpression },
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
console.log(`\n✅ Deleted chunks matching filter.`);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Configured Commander command for the `ai index remove` subcommand.
|
|
108
|
+
*
|
|
109
|
+
* This constant is the fully-configured {@link Command} instance with all
|
|
110
|
+
* AI-specific options (embedding deployment, Azure Search credentials) applied
|
|
111
|
+
* via `withAiOptions`. It is registered with the CLI automatically by
|
|
112
|
+
* {@link registerAiPlugin}.
|
|
113
|
+
*/
|
|
114
|
+
export const deleteCommand = withAiOptions(_command, {
|
|
115
|
+
includeEmbedding: true,
|
|
116
|
+
includeSearch: true,
|
|
117
|
+
});
|
|
@@ -3,14 +3,21 @@ import { z } from 'zod';
|
|
|
3
3
|
import { AiOptionsSchema } from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
* Zod schema for validating command options for the
|
|
6
|
+
* Zod schema for validating command options for the `ai index add` command.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
8
|
+
* Extends the base AI options schema ({@link AiOptionsSchema}) with
|
|
9
|
+
* add-specific options such as `--dry-run`, `--diff`, `--config`,
|
|
10
|
+
* `--base-ref`, and `--clean`.
|
|
10
11
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
12
|
+
* Azure Search and embedding options that are optional in the base schema
|
|
13
|
+
* become **required** because the add command always writes to a
|
|
14
|
+
* vector store.
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```ts
|
|
18
|
+
* const validated = await CommandOptionsSchema.parseAsync(rawOptions);
|
|
19
|
+
* // validated.dryRun, validated.azureSearchEndpoint, etc.
|
|
20
|
+
* ```
|
|
14
21
|
*/
|
|
15
22
|
export const CommandOptionsSchema = AiOptionsSchema.extend({
|
|
16
23
|
// Override optional AI options to make them required for embeddings command
|
|
@@ -50,9 +57,9 @@ export const CommandOptionsSchema = AiOptionsSchema.extend({
|
|
|
50
57
|
}).describe('Command options for the embeddings command');
|
|
51
58
|
|
|
52
59
|
/**
|
|
53
|
-
*
|
|
60
|
+
* Validated options for the `ai index add` command.
|
|
54
61
|
*
|
|
55
|
-
*
|
|
56
|
-
*
|
|
62
|
+
* Inferred from {@link CommandOptionsSchema} and used as the single
|
|
63
|
+
* source of truth for option types throughout the add/embeddings pipeline.
|
|
57
64
|
*/
|
|
58
65
|
export type CommandOptions = z.infer<typeof CommandOptionsSchema>;
|