@equinor/fusion-framework-cli-plugin-ai-index 1.0.6 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +74 -2
- package/README.md +105 -69
- package/dist/esm/bin/embed.js +28 -13
- package/dist/esm/bin/embed.js.map +1 -1
- package/dist/esm/delete-command.js +100 -0
- package/dist/esm/delete-command.js.map +1 -0
- package/dist/esm/delete-command.options.js +43 -0
- package/dist/esm/delete-command.options.js.map +1 -0
- package/dist/esm/{command.js → embeddings-command.js} +42 -28
- package/dist/esm/embeddings-command.js.map +1 -0
- package/dist/esm/{command.options.js → embeddings-command.options.js} +14 -7
- package/dist/esm/embeddings-command.options.js.map +1 -0
- package/dist/esm/index.js +37 -4
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/search-command.js +198 -0
- package/dist/esm/search-command.js.map +1 -0
- package/dist/esm/utils/generate-chunk-id.js +17 -5
- package/dist/esm/utils/generate-chunk-id.js.map +1 -1
- package/dist/esm/utils/git/file-changes.js +26 -11
- package/dist/esm/utils/git/file-changes.js.map +1 -1
- package/dist/esm/utils/git/git-client.js +16 -7
- package/dist/esm/utils/git/git-client.js.map +1 -1
- package/dist/esm/utils/git/metadata.js +7 -3
- package/dist/esm/utils/git/metadata.js.map +1 -1
- package/dist/esm/utils/git/status.js +9 -3
- package/dist/esm/utils/git/status.js.map +1 -1
- package/dist/esm/utils/markdown/parser.js +53 -13
- package/dist/esm/utils/markdown/parser.js.map +1 -1
- package/dist/esm/utils/package-resolver.js +10 -6
- package/dist/esm/utils/package-resolver.js.map +1 -1
- package/dist/esm/utils/ts-doc/constants.js +4 -1
- package/dist/esm/utils/ts-doc/constants.js.map +1 -1
- package/dist/esm/utils/ts-doc/extractors.js +27 -13
- package/dist/esm/utils/ts-doc/extractors.js.map +1 -1
- package/dist/esm/utils/ts-doc/parser.js +19 -10
- package/dist/esm/utils/ts-doc/parser.js.map +1 -1
- package/dist/esm/version.js +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types/config.d.ts +51 -10
- package/dist/types/delete-command.d.ts +9 -0
- package/dist/types/delete-command.options.d.ts +32 -0
- package/dist/types/embeddings-command.d.ts +11 -0
- package/dist/types/embeddings-command.options.d.ts +40 -0
- package/dist/types/index.d.ts +19 -2
- package/dist/types/search-command.d.ts +8 -0
- package/dist/types/utils/generate-chunk-id.d.ts +17 -5
- package/dist/types/utils/git/file-changes.d.ts +26 -11
- package/dist/types/utils/git/git-client.d.ts +16 -7
- package/dist/types/utils/git/metadata.d.ts +7 -3
- package/dist/types/utils/git/status.d.ts +9 -3
- package/dist/types/utils/git/types.d.ts +15 -9
- package/dist/types/utils/markdown/parser.d.ts +23 -10
- package/dist/types/utils/markdown/types.d.ts +13 -2
- package/dist/types/utils/package-resolver.d.ts +8 -5
- package/dist/types/utils/ts-doc/constants.d.ts +4 -1
- package/dist/types/utils/ts-doc/extractors.d.ts +27 -13
- package/dist/types/utils/ts-doc/parser.d.ts +19 -10
- package/dist/types/utils/ts-doc/types.d.ts +12 -4
- package/dist/types/utils/types.d.ts +10 -6
- package/dist/types/version.d.ts +1 -1
- package/package.json +12 -10
- package/src/bin/delete-removed-files.ts +1 -1
- package/src/bin/embed.ts +47 -18
- package/src/bin/file-stream.ts +1 -1
- package/src/bin/get-diff.ts +1 -1
- package/src/bin/types.ts +1 -1
- package/src/config.ts +52 -10
- package/src/delete-command.options.ts +51 -0
- package/src/delete-command.ts +117 -0
- package/src/{command.options.ts → embeddings-command.options.ts} +16 -9
- package/src/{command.ts → embeddings-command.ts} +46 -28
- package/src/index.ts +38 -4
- package/src/search-command.ts +259 -0
- package/src/utils/generate-chunk-id.ts +17 -5
- package/src/utils/git/file-changes.ts +26 -11
- package/src/utils/git/git-client.ts +16 -7
- package/src/utils/git/metadata.ts +7 -3
- package/src/utils/git/status.ts +9 -3
- package/src/utils/git/types.ts +15 -9
- package/src/utils/markdown/parser.ts +54 -13
- package/src/utils/markdown/types.ts +13 -2
- package/src/utils/package-resolver.ts +10 -6
- package/src/utils/ts-doc/constants.ts +4 -1
- package/src/utils/ts-doc/extractors.ts +27 -13
- package/src/utils/ts-doc/parser.ts +19 -10
- package/src/utils/ts-doc/types.ts +12 -4
- package/src/utils/types.ts +10 -6
- package/src/version.ts +1 -1
- package/dist/esm/command.js.map +0 -1
- package/dist/esm/command.options.js.map +0 -1
- package/dist/types/command.d.ts +0 -2
- package/dist/types/command.options.d.ts +0 -33
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,79 @@
|
|
|
1
1
|
# @equinor/fusion-framework-cli-plugin-ai-index
|
|
2
2
|
|
|
3
|
+
## 2.0.0
|
|
4
|
+
|
|
5
|
+
### Major Changes
|
|
6
|
+
|
|
7
|
+
- abffa53: Major version bump for Fusion Framework React 19 release.
|
|
8
|
+
|
|
9
|
+
All packages are bumped to the next major version as part of the React 19 upgrade. This release drops support for React versions below 18 and includes breaking changes across the framework.
|
|
10
|
+
|
|
11
|
+
**Breaking changes:**
|
|
12
|
+
- Peer dependencies now require React 18 or 19 (`^18.0.0 || ^19.0.0`)
|
|
13
|
+
- React Router upgraded from v6 to v7
|
|
14
|
+
- Navigation module refactored with new history API
|
|
15
|
+
- `renderComponent` and `renderApp` now use `createRoot` API
|
|
16
|
+
|
|
17
|
+
**Migration:**
|
|
18
|
+
- Update your React version to 18.0.0 or higher before upgrading
|
|
19
|
+
- Replace `NavigationProvider.createRouter()` with `@equinor/fusion-framework-react-router`
|
|
20
|
+
- See individual package changelogs for package-specific migration steps
|
|
21
|
+
|
|
22
|
+
### Patch Changes
|
|
23
|
+
|
|
24
|
+
- ae92f13: Require `simple-git` 3.32.3 or newer in published package manifests to align installs with the upstream fix for CVE-2026-28292.
|
|
25
|
+
|
|
26
|
+
This does not change the CLI API. It tightens the minimum allowed dependency version so fresh installs and manifest-based scanners resolve the first safe `simple-git` release.
|
|
27
|
+
|
|
28
|
+
- aaa3f74: fix(security): address undici multiple vulnerabilities (CVE-2026-1524, 1527, 1528, 2581)
|
|
29
|
+
|
|
30
|
+
Upgrade undici from 7.22.0 to 7.24.3 to fix multiple security vulnerabilities affecting WebSocket parsing, HTTP header validation, and request deduplication:
|
|
31
|
+
- **CVE-2026-1528** (HIGH): WebSocket 64-bit length integer overflow causing process crash
|
|
32
|
+
- **CVE-2026-1524** (MODERATE): HTTP/1.1 response field header injection
|
|
33
|
+
- **CVE-2026-1527** (MODERATE): CRLF injection via upgrade option enabling protocol smuggling
|
|
34
|
+
- **CVE-2026-2581** (MODERATE): Unbounded memory consumption in deduplication handler
|
|
35
|
+
|
|
36
|
+
These are non-breaking security patches that harden undici against untrusted upstream endpoints and malicious WebSocket frames.
|
|
37
|
+
|
|
38
|
+
**Advisories**: GHSA-f269-vfmq-vjvj, GHSA-v9p9-hfj2-hcw8, GHSA-4992-7rv2-5pvq, GHSA-phc3-fgpg-7m6h
|
|
39
|
+
**Fixed in**: undici 7.24.0+ (deployed 7.24.3)
|
|
40
|
+
|
|
41
|
+
- c123c39: chore: bump simple-git from 3.32.3 to 3.33.0
|
|
42
|
+
|
|
43
|
+
Includes security improvements:
|
|
44
|
+
- Pathspec input sanitization for git.clone() and git.mirror()
|
|
45
|
+
- Enhanced git -c safety checks
|
|
46
|
+
|
|
47
|
+
- 3de232c: fix(cli): break turbo workspace cycle for AI plugins
|
|
48
|
+
|
|
49
|
+
Upgrade turbo from 2.8.10 to 2.8.14. This version introduces stricter workspace cycle detection, requiring the AI plugin dependencies to be moved from the CLI package's devDependencies to the root package.json.
|
|
50
|
+
|
|
51
|
+
The CLI plugins are now configured at the repository root (fusion-cli.config.ts) instead of in the packages/cli package, ensuring a clean workspace dependency graph for turbo's build scheduler.
|
|
52
|
+
|
|
53
|
+
This change has no impact on the published CLI package's public API. Plugins continue to be wired identically; only the source of the wire definition has changed.
|
|
54
|
+
|
|
55
|
+
Additional improvements from turbo 2.8.14:
|
|
56
|
+
- Fix: Ensures turbo watch mode respects task dependencies on first run
|
|
57
|
+
- Perf: Skip irrelevant packages for faster monorepo builds
|
|
58
|
+
- Feature: AI agent telemetry support in turbo traces
|
|
59
|
+
|
|
60
|
+
- Updated dependencies [abffa53]
|
|
61
|
+
- Updated dependencies [abffa53]
|
|
62
|
+
- Updated dependencies [abffa53]
|
|
63
|
+
- Updated dependencies [ae92f13]
|
|
64
|
+
- Updated dependencies [abffa53]
|
|
65
|
+
- Updated dependencies [abffa53]
|
|
66
|
+
- Updated dependencies [abffa53]
|
|
67
|
+
- Updated dependencies [abffa53]
|
|
68
|
+
- Updated dependencies [c123c39]
|
|
69
|
+
- Updated dependencies [3de232c]
|
|
70
|
+
- Updated dependencies [32bcf83]
|
|
71
|
+
- @equinor/fusion-framework-cli@14.0.0
|
|
72
|
+
- @equinor/fusion-framework-cli-plugin-ai-base@2.0.0
|
|
73
|
+
- @equinor/fusion-framework-module@6.0.0
|
|
74
|
+
- @equinor/fusion-framework-module-ai@3.0.0
|
|
75
|
+
- @equinor/fusion-imports@2.0.0
|
|
76
|
+
|
|
3
77
|
## 1.0.6
|
|
4
78
|
|
|
5
79
|
### Patch Changes
|
|
@@ -88,7 +162,6 @@
|
|
|
88
162
|
This plugin extends the Fusion Framework CLI with comprehensive document embedding and indexing capabilities for building searchable vector stores from code and documentation.
|
|
89
163
|
|
|
90
164
|
**Features:**
|
|
91
|
-
|
|
92
165
|
- Document chunking and embedding generation
|
|
93
166
|
- Git metadata extraction for context-aware embeddings
|
|
94
167
|
- Markdown/MDX and TypeScript/TSX documentation parsing
|
|
@@ -99,7 +172,6 @@
|
|
|
99
172
|
- Dry-run mode for testing
|
|
100
173
|
|
|
101
174
|
**Quick Usage:**
|
|
102
|
-
|
|
103
175
|
1. Install the plugin:
|
|
104
176
|
|
|
105
177
|
```sh
|
package/README.md
CHANGED
|
@@ -1,6 +1,18 @@
|
|
|
1
1
|
# @equinor/fusion-framework-cli-plugin-ai-index
|
|
2
2
|
|
|
3
|
-
AI indexing plugin for Fusion Framework CLI
|
|
3
|
+
AI indexing plugin for the Fusion Framework CLI that chunks TypeScript, Markdown, and MDX source files, generates embeddings via Azure OpenAI, and upserts them into an Azure AI Search vector store.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Markdown / MDX chunking** — splits documents by heading hierarchy with YAML frontmatter extraction.
|
|
8
|
+
- **TypeScript / TSX TSDoc extraction** — extracts documented declarations (functions, classes, interfaces, types, enums) into individual vector-store documents.
|
|
9
|
+
- **Raw-file passthrough** — index files as-is when chunking is not needed.
|
|
10
|
+
- **Semantic search** — query the vector store to validate indexed content.
|
|
11
|
+
- **Git-diff workflow mode** — process only files changed since a base ref (`--diff`).
|
|
12
|
+
- **Dry-run mode** — preview what would be indexed without writing to the vector store.
|
|
13
|
+
- **Document removal** — remove stale documents by source path or OData filter.
|
|
14
|
+
- **Package & git metadata** — optionally resolve `package.json` info and git commit/permalink metadata per document.
|
|
15
|
+
- **Configurable patterns** — define file globs, ignore lists, chunk sizes, and custom attribute processors in `fusion-ai.config.ts`.
|
|
4
16
|
|
|
5
17
|
## Installation
|
|
6
18
|
|
|
@@ -8,107 +20,131 @@ AI indexing plugin for Fusion Framework CLI providing document embedding and chu
|
|
|
8
20
|
pnpm add -D @equinor/fusion-framework-cli-plugin-ai-index
|
|
9
21
|
```
|
|
10
22
|
|
|
11
|
-
##
|
|
23
|
+
## Usage
|
|
12
24
|
|
|
13
|
-
|
|
25
|
+
### Register the plugin
|
|
14
26
|
|
|
15
|
-
```
|
|
27
|
+
```ts
|
|
28
|
+
// fusion-cli.config.ts
|
|
16
29
|
import { defineFusionCli } from '@equinor/fusion-framework-cli';
|
|
17
30
|
|
|
18
31
|
export default defineFusionCli(() => ({
|
|
19
|
-
plugins: [
|
|
20
|
-
'@equinor/fusion-framework-cli-plugin-ai-index',
|
|
21
|
-
],
|
|
32
|
+
plugins: ['@equinor/fusion-framework-cli-plugin-ai-index'],
|
|
22
33
|
}));
|
|
23
34
|
```
|
|
24
35
|
|
|
25
|
-
|
|
36
|
+
### Add documents to the index
|
|
26
37
|
|
|
27
|
-
|
|
38
|
+
```sh
|
|
39
|
+
# Index all files matching default patterns
|
|
40
|
+
ffc ai index add
|
|
28
41
|
|
|
29
|
-
|
|
42
|
+
# Index specific globs
|
|
43
|
+
ffc ai index add "packages/**/*.ts" "packages/**/*.md"
|
|
30
44
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
- TypeScript/TSX TSDoc extraction and chunking
|
|
34
|
-
- Glob pattern support for file collection
|
|
35
|
-
- Git diff-based processing for workflow integration
|
|
36
|
-
- Dry-run mode for testing without actual processing
|
|
45
|
+
# Preview without writing (dry-run)
|
|
46
|
+
ffc ai index add --dry-run
|
|
37
47
|
|
|
38
|
-
|
|
48
|
+
# Process only files changed since origin/main
|
|
49
|
+
ffc ai index add --diff --base-ref origin/main
|
|
50
|
+
|
|
51
|
+
# Wipe the vector store before re-indexing
|
|
52
|
+
ffc ai index add --clean "**/*.ts"
|
|
53
|
+
```
|
|
39
54
|
|
|
40
|
-
|
|
55
|
+
### Search the index
|
|
41
56
|
|
|
42
57
|
```sh
|
|
43
|
-
#
|
|
44
|
-
ffc ai
|
|
58
|
+
# Semantic search
|
|
59
|
+
ffc ai index search "how to configure modules"
|
|
60
|
+
|
|
61
|
+
# Filter by package name
|
|
62
|
+
ffc ai index search "hooks" --filter "metadata/attributes/any(a: a/key eq 'pkg_name' and a/value eq '@equinor/fusion-framework-react')"
|
|
63
|
+
|
|
64
|
+
# JSON output
|
|
65
|
+
ffc ai index search "API reference" --json --limit 5
|
|
45
66
|
```
|
|
46
67
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
### `ai embeddings`
|
|
50
|
-
|
|
51
|
-
Document embedding utilities for Large Language Model processing.
|
|
52
|
-
|
|
53
|
-
**Features:**
|
|
54
|
-
- Markdown/MDX document chunking with frontmatter extraction
|
|
55
|
-
- TypeScript/TSX TSDoc extraction and chunking
|
|
56
|
-
- Glob pattern support for file collection
|
|
57
|
-
- Git diff-based processing for workflow integration
|
|
58
|
-
- Dry-run mode for testing without actual processing
|
|
59
|
-
- Configurable file patterns via fusion-ai.config.ts
|
|
60
|
-
|
|
61
|
-
**Options:**
|
|
62
|
-
- `--dry-run` - Show what would be processed without actually doing it
|
|
63
|
-
- `--config <config>` - Path to a config file (default: fusion-ai.config.ts)
|
|
64
|
-
- `--diff` - Process only changed files (workflow mode)
|
|
65
|
-
- `--base-ref <ref>` - Git reference to compare against (default: HEAD~1)
|
|
66
|
-
- `--clean` - Delete all existing documents from the vector store before processing
|
|
67
|
-
- `--openai-api-key <key>` - API key for Azure OpenAI
|
|
68
|
-
- `--openai-api-version <version>` - API version (default: 2024-02-15-preview)
|
|
69
|
-
- `--openai-instance <name>` - Azure OpenAI instance name
|
|
70
|
-
- `--openai-embedding-deployment <name>` - Azure OpenAI embedding deployment name
|
|
71
|
-
- `--azure-search-endpoint <url>` - Azure Search endpoint URL
|
|
72
|
-
- `--azure-search-api-key <key>` - Azure Search API key
|
|
73
|
-
- `--azure-search-index-name <name>` - Azure Search index name
|
|
74
|
-
|
|
75
|
-
**Examples:**
|
|
68
|
+
### Remove documents from the index
|
|
69
|
+
|
|
76
70
|
```sh
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
71
|
+
# Remove by source paths
|
|
72
|
+
ffc ai index remove src/old-module.ts src/legacy/helper.ts
|
|
73
|
+
|
|
74
|
+
# Preview what would be removed
|
|
75
|
+
ffc ai index remove --dry-run src/old-module.ts
|
|
76
|
+
|
|
77
|
+
# Remove with a raw OData filter
|
|
78
|
+
ffc ai index remove --filter "metadata/source eq 'src/old-module.ts'"
|
|
82
79
|
```
|
|
83
80
|
|
|
84
81
|
## Configuration
|
|
85
82
|
|
|
86
|
-
|
|
83
|
+
Create a `fusion-ai.config.ts` at the project root to customise file patterns, metadata enrichment, and chunk sizing:
|
|
87
84
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
```typescript
|
|
91
|
-
import { configureFusionAI } from '@equinor/fusion-framework-cli-plugin-ai-index';
|
|
92
|
-
import type { FusionAIConfigWithIndex } from '@equinor/fusion-framework-cli-plugin-ai-index';
|
|
85
|
+
```ts
|
|
86
|
+
import { configureFusionAI, type FusionAIConfigWithIndex } from '@equinor/fusion-framework-cli-plugin-ai-index';
|
|
93
87
|
|
|
94
88
|
export default configureFusionAI((): FusionAIConfigWithIndex => ({
|
|
95
89
|
index: {
|
|
96
|
-
patterns: ['
|
|
90
|
+
patterns: ['packages/**/src/**/*.ts', 'packages/**/*.md'],
|
|
91
|
+
rawPatterns: ['packages/**/README.md'],
|
|
92
|
+
ignore: ['**/dist/**', '**/node_modules/**'],
|
|
97
93
|
metadata: {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
94
|
+
resolvePackage: true,
|
|
95
|
+
resolveGit: true,
|
|
96
|
+
attributeProcessor: (attributes, _document) => ({
|
|
97
|
+
...attributes,
|
|
98
|
+
custom_tag: 'my-project',
|
|
99
|
+
}),
|
|
102
100
|
},
|
|
103
101
|
embedding: {
|
|
104
|
-
chunkSize:
|
|
105
|
-
chunkOverlap:
|
|
102
|
+
chunkSize: 2000,
|
|
103
|
+
chunkOverlap: 300,
|
|
106
104
|
},
|
|
107
105
|
},
|
|
108
106
|
}));
|
|
109
107
|
```
|
|
110
108
|
|
|
111
|
-
|
|
109
|
+
### Environment variables
|
|
110
|
+
|
|
111
|
+
Azure OpenAI and Azure AI Search credentials can be provided via CLI options or environment variables:
|
|
112
|
+
|
|
113
|
+
| Variable | Description |
|
|
114
|
+
|---|---|
|
|
115
|
+
| `AZURE_OPENAI_API_KEY` | Azure OpenAI API key |
|
|
116
|
+
| `AZURE_OPENAI_INSTANCE_NAME` | Azure OpenAI instance name |
|
|
117
|
+
| `AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME` | Embedding model deployment name |
|
|
118
|
+
| `AZURE_SEARCH_ENDPOINT` | Azure AI Search endpoint URL |
|
|
119
|
+
| `AZURE_SEARCH_API_KEY` | Azure AI Search admin API key |
|
|
120
|
+
| `AZURE_SEARCH_INDEX_NAME` | Target search index name |
|
|
121
|
+
|
|
122
|
+
## API Reference
|
|
123
|
+
|
|
124
|
+
### Entry point
|
|
125
|
+
|
|
126
|
+
| Export | Description |
|
|
127
|
+
|---|---|
|
|
128
|
+
| `registerAiPlugin(program)` | Registers the `ai index` command with `add`, `search`, and `remove` subcommands on a Commander program. |
|
|
129
|
+
| `configureFusionAI(fn)` | Re-exported helper to create a typed `fusion-ai.config.ts`. |
|
|
130
|
+
|
|
131
|
+
### Types
|
|
132
|
+
|
|
133
|
+
| Type | Description |
|
|
134
|
+
|---|---|
|
|
135
|
+
| `FusionAIConfigWithIndex` | Full config interface including `index` settings. |
|
|
136
|
+
| `IndexConfig` | Index-specific configuration (patterns, metadata, embedding). |
|
|
137
|
+
| `CommandOptions` | Validated options for the `ai index add` command. |
|
|
138
|
+
| `DeleteOptions` | Validated options for the `ai index remove` command. |
|
|
112
139
|
|
|
113
|
-
|
|
140
|
+
### Utilities (sub-path imports)
|
|
114
141
|
|
|
142
|
+
| Function / Type | Module | Description |
|
|
143
|
+
|---|---|---|
|
|
144
|
+
| `generateChunkId(path, index?)` | `utils/generate-chunk-id` | Deterministic, URL-safe document ID from a file path. |
|
|
145
|
+
| `parseMarkdown(content, source)` | `utils/markdown` | Chunk Markdown/MDX content into vector-store documents. |
|
|
146
|
+
| `parseTsDocSync(content, opts?)` | `utils/ts-doc` | Extract TSDoc documents from a TypeScript string. |
|
|
147
|
+
| `parseTsDocFromFileSync(file, opts?)` | `utils/ts-doc` | Extract TSDoc documents from a TypeScript file on disk. |
|
|
148
|
+
| `resolvePackage(filePath)` | `utils/package-resolver` | Resolve the nearest `package.json` for a file path. |
|
|
149
|
+
| `getChangedFiles(options)` | `utils/git` | List files changed between a base ref and HEAD. |
|
|
150
|
+
| `extractGitMetadata(filePath)` | `utils/git` | Extract commit hash, date, and GitHub permalink for a file. |
|
package/dist/esm/bin/embed.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { globbyStream } from 'globby';
|
|
2
2
|
import { relative } from 'node:path';
|
|
3
3
|
import multimatch from 'multimatch';
|
|
4
|
-
import { concat, from, merge } from 'rxjs';
|
|
5
|
-
import { concatMap, filter, map, mergeMap, shareReplay, toArray } from 'rxjs/operators';
|
|
4
|
+
import { concat, from, merge, timer } from 'rxjs';
|
|
5
|
+
import { concatMap, filter, map, mergeMap, retry, shareReplay, toArray } from 'rxjs/operators';
|
|
6
6
|
import { isMarkdownFile, parseMarkdownFile } from '../utils/markdown/index.js';
|
|
7
7
|
import { getFileStatus, resolveProjectRoot } from '../utils/git/index.js';
|
|
8
8
|
import { isTypescriptFile, parseTsDocFromFileSync } from '../utils/ts-doc/index.js';
|
|
@@ -24,6 +24,7 @@ const defaultIgnore = ['node_modules', '**/node_modules/**', 'dist', '**/dist/**
|
|
|
24
24
|
*/
|
|
25
25
|
export async function embed(binOptions) {
|
|
26
26
|
const { framework, options, config, filePatterns } = binOptions;
|
|
27
|
+
console.log(`📇 Index: ${options.azureSearchIndexName}`);
|
|
27
28
|
// Handle clean operation (destructive - deletes all existing documents)
|
|
28
29
|
const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
|
|
29
30
|
if (options.clean && !options.dryRun) {
|
|
@@ -46,10 +47,12 @@ export async function embed(binOptions) {
|
|
|
46
47
|
// contains negation patterns (like !.yarn/releases), so we add explicit ignore patterns
|
|
47
48
|
// to prevent traversing these directories entirely.
|
|
48
49
|
const ignore = config.index?.ignore ?? defaultIgnore;
|
|
50
|
+
// Respect .gitignore by default; configs targeting build artifacts can opt out.
|
|
51
|
+
const gitignore = config.index?.gitignore ?? true;
|
|
49
52
|
return from(globbyStream(filePatterns, {
|
|
50
53
|
ignore,
|
|
51
54
|
onlyFiles: true,
|
|
52
|
-
gitignore
|
|
55
|
+
gitignore,
|
|
53
56
|
absolute: true,
|
|
54
57
|
})).pipe(
|
|
55
58
|
// Get git status concurrently, then flatten array results
|
|
@@ -117,20 +120,32 @@ export async function embed(binOptions) {
|
|
|
117
120
|
}));
|
|
118
121
|
// Apply metadata to documents
|
|
119
122
|
const applyMetadata$ = applyMetadata(merge(rawFiles$, markdown$, typescript$), config.index);
|
|
120
|
-
// Generate embeddings
|
|
123
|
+
// Generate embeddings with concurrency limit and retry on rate-limit (429) errors
|
|
121
124
|
const embeddingService = framework.ai.getService('embeddings', options.openaiEmbeddingDeployment);
|
|
122
|
-
|
|
125
|
+
/** Maximum parallel embedding requests to avoid hitting Azure OpenAI TPM limits. */
|
|
126
|
+
const EMBEDDING_CONCURRENCY = 5;
|
|
127
|
+
/** Maximum retry attempts for transient / rate-limit errors per chunk. */
|
|
128
|
+
const MAX_RETRIES = 4;
|
|
129
|
+
const applyEmbedding$ = applyMetadata$.pipe(mergeMap((documents) => from(documents).pipe(
|
|
130
|
+
// Limit concurrency to avoid overwhelming the embedding API
|
|
131
|
+
mergeMap((document) => from(embeddingService.embedQuery(document.pageContent)).pipe(retry({
|
|
132
|
+
count: MAX_RETRIES,
|
|
133
|
+
delay: (error, retryIndex) => {
|
|
134
|
+
// Parse Retry-After header when available (Azure sends seconds)
|
|
135
|
+
const retryAfterSec = error?.response?.headers?.get?.('retry-after') ??
|
|
136
|
+
error?.responseHeaders?.['retry-after'];
|
|
137
|
+
const retryAfterMs = retryAfterSec ? Number(retryAfterSec) * 1000 : 0;
|
|
138
|
+
// Exponential backoff: 2s, 4s, 8s, 16s — or Retry-After if larger
|
|
139
|
+
const backoffMs = 2 ** retryIndex * 1000;
|
|
140
|
+
const delayMs = Math.max(backoffMs, retryAfterMs);
|
|
141
|
+
console.warn(`⏳ Retry ${retryIndex}/${MAX_RETRIES} for "${document.metadata.source}" in ${delayMs}ms`);
|
|
142
|
+
return timer(delayMs);
|
|
143
|
+
},
|
|
144
|
+
}), map((embeddings) => {
|
|
123
145
|
console.log('embedding document', document.metadata.source);
|
|
124
|
-
const embeddings = await embeddingService
|
|
125
|
-
.embedQuery(document.pageContent)
|
|
126
|
-
.catch((error) => {
|
|
127
|
-
console.error(`❌ Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
128
|
-
console.error('document', document);
|
|
129
|
-
process.exit(1);
|
|
130
|
-
});
|
|
131
146
|
const metadata = { ...document.metadata, embedding: embeddings };
|
|
132
147
|
return { ...document, metadata };
|
|
133
|
-
}), toArray())));
|
|
148
|
+
})), EMBEDDING_CONCURRENCY), toArray())));
|
|
134
149
|
// Update vector store
|
|
135
150
|
const upsert$ = applyEmbedding$.pipe(mergeMap(async (documents) => {
|
|
136
151
|
const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embed.js","sourceRoot":"","sources":["../../../src/bin/embed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,UAAU,MAAM,YAAY,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"embed.js","sourceRoot":"","sources":["../../../src/bin/embed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,UAAU,MAAM,YAAY,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,MAAM,CAAC;AAClD,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,gBAAgB,CAAC;AAE/F,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAC/E,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAC1E,OAAO,EAAE,gBAAgB,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAEpF,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EAAE,8BAA8B,EAAE,MAAM,2BAA2B,CAAC;AAC3E,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAQpD,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,eAAe,EAAE,MAAM,+BAA+B,CAAC;AAEhE;;;;GAIG;AACH,MAAM,aAAa,GAAG,CAAC,cAAc,EAAE,oBAAoB,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,CAAC,CAAC;AAE3F;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,UAAgC;IAC1D,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,GAAG,UAAU,CAAC;IAEhE,OAAO,CAAC,GAAG,CAAC,aAAa,OAAO,CAAC,oBAAoB,EAAE,CAAC,CAAC;IAEzD,wEAAwE;IACxE,MAAM,kBAAkB,GAAG,SAAS,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IAC3F,IAAI,OAAO,CAAC,KAAK,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,8DAA8D,CAAC,CAAC;QAC5E,8EAA8E;QAC9E,MAAM,kBAAkB,CAAC,eAAe,CAAC;YACvC,MAAM,EAAE,EAAE,gBAAgB,EAAE,uBAAuB,EAAE;SACtD,CAAC,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;IACrD,CAAC;IAED,+CAA+C;IAC/C,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAEhE,0EAA0E;IAC1E,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE;QACnB,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,OAAO,IAAI,CAAC,YAAY,CAAC,CAAC;QAC5B,CAAC;QAED,uDAAuD;QACvD,8FAA8F;QAC9F,wFAAwF;QACxF,oDAAoD;QACpD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,EAAE,MAAM,IAAI,aAAa,CAAC;QAErD,gFAAgF;QAChF,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,EAAE,SAAS,IAAI,IAAI,CAAC;QAElD,OAAO,IAAI,CACT,YAAY,CAAC,YAAY,EAAE;YACzB,MAAM;YACN,SAAS,EAAE,IAAI;YACf,SAAS;YACT,QAAQ,EAAE,IAAI;SACf,CAAC,CACH,CAAC,IAAI;QACJ,0DAA0D;QAC1D,QAAQ,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,EACvC,SAAS,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACjC,wEAAwE;QACxE,WAAW,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAChC,CAAC;IACJ,CAAC,CAAC,EAAE,CAAC;IAEL,qEAAqE;IACrE,MAAM,mBAAmB,GAAG,MAAM,CAAC,KAAK,EAAE,QAAQ,IAAI;QACpD,SAAS;QACT,UAAU;QACV,SAAS;QACT,UAAU;KACX,CAAC;IAEF,qEAAqE;IACrE,MAAM,eAAe,GAAG,MAAM,CAAC,IAAI,CACjC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACX,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;QAClC,MAAM,WAAW,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QAE9E,OAAO;YACL,IAAI,EAAE,QAAQ;YACd,MAAM;YACN,WAAW;YACX,YAAY;SACb,CAAC;IACJ,CAAC,CAAC,EACF,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACd,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,YAAY,EAAE,mBAAmB,CAAC,CAAC;QACnE,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;IAC5B,CAAC,CAAC;IACF,yEAAyE;IACzE,WAAW,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAChC,CAAC;IAEF,sEAAsE;IACtE,MAAM,aAAa,GAAG,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC;IAExF,4BAA4B;IAC5B,MAAM,OAAO,GAAG,8BAA8B,CAAC,aAAa,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IAElF,kCAAkC;IAClC,MAAM,WAAW,GAAG,eAAe,CAAC,IAAI,CACtC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,KAAK,KAAK,IAAI,IAAI,CAAC,MAAM,KAAK,UAAU,CAAC;IACrE,gDAAgD;IAChD,WAAW,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAChC,CAAC;IAEF,MAAM,SAAS,GAAG,CAAC,IAAmB,EAAW,EAAE;QACjD,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,KAAK,EAAE,WAAW,IAAI,EAAE,CAAC,CAAC;QAC/E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC,CAAC;IAEF,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAChC,MAAM,CAAC,SAAS,CAAC,EACjB,GAAG,CAAC,CAAC,IAAI,EAAiB,EAAE;QAC1B,MAAM,QAAQ,GAAwB;YACpC,EAAE,EAAE,eAAe,CAAC,IAAI,CAAC,YAAY,CAAC;YACtC,WAAW,EAAE,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC;YAC5C,QAAQ,EAAE;gBACR,MAAM,EAAE,IAAI,CAAC,YAAY;gBACzB,IAAI,EAAE,KAAK;aACZ;SACF,CAAC;QACF,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC;IACxD,CAAC,CAAC,CACH,CAAC;IAEF,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAChC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAC5B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAC3C,QAAQ,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE;QACtB,MAAM,SAAS,GAAG,MAAM,iBAAiB,CAAC,IAAI,CAAC,CAAC;QAChD,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,CAAC;IAC5C,CAAC,CAAC,CACH,CAAC;IAEF,MAAM,WAAW,GAAG,WAAW,CAAC,IAAI,CAClC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAC5B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAC7C,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACX,MAAM,SAAS,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAC;QAC/C,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,CAAC;IAC5C,CAAC,CAAC,CACH,CAAC;IAEF,8BAA8B;IAC9B,MAAM,cAAc,GAAG,aAAa,CAAC,KAAK,CAAC,SAAS,EAAE,SAAS,EAAE,WAAW,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;IAE7F,kFAAkF;IAClF,MAAM,gBAAgB,GAAG,SAAS,CAAC,EAAE,CAAC,UAAU,CAAC,YAAY,EAAE,OAAO,CAAC,yBAAyB,CAAC,CAAC;IAElG,oFAAoF;IACpF,MAAM,qBAAqB,GAAG,CAAC,CAAC;IAEhC,0EAA0E;IAC1E,MAAM,WAAW,GAAG,CAAC,CAAC;IAEtB,MAAM,eAAe,GAAG,cAAc,CAAC,IAAI,CACzC,QAAQ,CAAC,CAAC,SAAS,EAAE,EAAE,CACrB,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI;IAClB,4DAA4D;IAC5D,QAAQ,CACN,CAAC,QAAQ,EAAE,EAAE,CACX,IAAI,CAAC,gBAAgB,CAAC,UAAU,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAC1D,KAAK,CAAC;QACJ,KAAK,EAAE,WAAW;QAClB,KAAK,EAAE,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE;YAC3B,gEAAgE;YAChE,MAAM,aAAa,GACjB,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC,aAAa,CAAC;gBAC9C,KAAK,EAAE,eAAe,EAAE,CAAC,aAAa,CAAC,CAAC;YAC1C,MAAM,YAAY,GAAG,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;YAEtE,kEAAkE;YAClE,MAAM,SAAS,GAAG,CAAC,IAAI,UAAU,GAAG,IAAI,CAAC;YACzC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;YAElD,OAAO,CAAC,IAAI,CACV,WAAW,UAAU,IAAI,WAAW,SAAS,QAAQ,CAAC,QAAQ,CAAC,MAAM,QAAQ,OAAO,IAAI,CACzF,CAAC;YACF,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC;QACxB,CAAC;KACF,CAAC,EACF,GAAG,CAAC,CAAC,UAAU,EAAE,EAAE;QACjB,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC5D,MAAM,QAAQ,GAAG,EAAE,GAAG,QAAQ,CAAC,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC;QACjE,OAAO,EAAE,GAAG,QAAQ,EAAE,QAAQ,EAAE,CAAC;IACnC,CAAC,CAAC,CACH,EACH,qBAAqB,CACtB,EACD,OAAO,EAAE,CACV,CACF,CACF,CAAC;IAEF,sBAAsB;IACtB,MAAM,OAAO,GAAG,eAAe,CAAC,IAAI,CAClC,QAAQ,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE;QAC3B,MAAM,kBAAkB,GAAG,SAAS,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;QAC3F,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,OAAO,CAAC,GAAG,CAAC,iBAAiB,QAAQ,CAAC,EAAE,mBAAmB,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACzF,CAAC;QACD,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YACpB,mEAAmE;YACnE,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,MAAM,OAAO,GAAG,SAAS;qBACtB,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC;qBAC3C,MAAM,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,IAAI,GAAG,EAAU,CAAC,CAAC;gBAE/D,MAAM,gBAAgB,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC;qBACzC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,uBAAuB,MAAM,GAAG,CAAC;qBACjD,IAAI,CAAC,MAAM,CAAC,CAAC;gBAEhB,iFAAiF;gBACjF,kBAAkB,CAAC,eAAe,CAAC,EAAE,MAAM,EAAE,EAAE,gBAAgB,EAAE,EAAE,CAAC,CAAC;YACvE,CAAC;YACD,MAAM,kBAAkB,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QACnD,CAAC;QACD,OAAO;YACL,MAAM,EAAE,OAAO;YACf,SAAS;SACiB,CAAC;IAC/B,CAAC,CAAC,EACF,MAAM,CAAC,CAAC,MAAM,EAAqC,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CACvE,CAAC;IAEF,mBAAmB;IACnB,kFAAkF;IAClF,MAAM,eAAe,GAAmE;QACtF,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,EAAE;KACV,CAAC;IAEF,qEAAqE;IACrE,wEAAwE;IACxE,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,SAAS,CAAC;QACjC,IAAI,EAAE,CAAC,MAAM,EAAE,EAAE;YACf,uCAAuC;YACvC,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAChC,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;YACjF,CAAC;YACD,+EAA+E;iBAC1E,IAAI,MAAM,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;gBACnC,eAAe,CAAC,KAAK,CAAC,IAAI,CACxB,GAAG,MAAM,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;oBACrC,MAAM,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM;oBAChC,EAAE,EAAE,QAAQ,CAAC,EAAE;iBAChB,CAAC,CAAC,CACJ,CAAC;YACJ,CAAC;QACH,CAAC;QACD,KAAK,EAAE,CAAC,KAAK,EAAE,EAAE;YACf,OAAO,CAAC,KAAK,CAAC,YAAY,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;YACtF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QACD,QAAQ,EAAE,GAAG,EAAE;YACb,4CAA4C;YAC5C,OAAO,CAAC,GAAG,CAAC,uBAAuB,EAAE,eAAe,CAAC,CAAC;YACtD,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;KACF,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { createCommand, createOption } from 'commander';
|
|
2
|
+
import { setupFramework } from '@equinor/fusion-framework-cli-plugin-ai-base';
|
|
3
|
+
import { withOptions as withAiOptions } from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
|
|
4
|
+
import { DeleteOptionsSchema } from './delete-command.options.js';
|
|
5
|
+
/**
|
|
6
|
+
* Builds an OData filter expression from source paths and/or a raw filter.
|
|
7
|
+
*
|
|
8
|
+
* Source paths are joined with `or`; a raw `--filter` expression is used
|
|
9
|
+
* directly. When both are supplied, source-path filters take precedence
|
|
10
|
+
* to prevent unintentional broad deletions.
|
|
11
|
+
*
|
|
12
|
+
* @param sources - Relative file paths to match against `metadata/source`.
|
|
13
|
+
* @param rawFilter - A raw OData filter expression supplied via `--filter`.
|
|
14
|
+
* @returns The combined OData filter string, or `undefined` when neither
|
|
15
|
+
* sources nor a raw filter were provided.
|
|
16
|
+
*/
|
|
17
|
+
function buildFilter(sources, rawFilter) {
|
|
18
|
+
if (sources.length > 0) {
|
|
19
|
+
return sources.map((s) => `metadata/source eq '${s}'`).join(' or ');
|
|
20
|
+
}
|
|
21
|
+
return rawFilter;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* CLI command: `ai index remove`
|
|
25
|
+
*
|
|
26
|
+
* Removes documents from the Azure AI Search index by source path or OData filter.
|
|
27
|
+
*
|
|
28
|
+
* Use this when you need to remove stale, renamed, or noisy documents from the
|
|
29
|
+
* vector store without running a full re-index.
|
|
30
|
+
*
|
|
31
|
+
* Usage:
|
|
32
|
+
* $ ffc ai index remove [options] [source-paths...]
|
|
33
|
+
*
|
|
34
|
+
* Arguments:
|
|
35
|
+
* source-paths One or more relative file paths whose indexed chunks should
|
|
36
|
+
* be removed (e.g. packages/modules/services/src/foo.ts).
|
|
37
|
+
*
|
|
38
|
+
* Options:
|
|
39
|
+
* --filter <expr> Raw OData filter expression for advanced selection
|
|
40
|
+
* (e.g. "metadata/source eq 'src/old-file.ts'").
|
|
41
|
+
* --dry-run Preview matching documents without deleting them.
|
|
42
|
+
*
|
|
43
|
+
* Examples:
|
|
44
|
+
* # Remove by source paths
|
|
45
|
+
* $ ffc ai index remove src/old-module.ts src/legacy/helper.ts
|
|
46
|
+
*
|
|
47
|
+
* # Preview what would be removed (dry-run)
|
|
48
|
+
* $ ffc ai index remove --dry-run src/old-module.ts
|
|
49
|
+
*
|
|
50
|
+
* # Remove using a raw OData filter
|
|
51
|
+
* $ ffc ai index remove --filter "metadata/source eq 'src/old-module.ts'"
|
|
52
|
+
*
|
|
53
|
+
* # Remove all chunks from a package
|
|
54
|
+
* $ ffc ai index remove --filter "metadata/attributes/any(a: a/key eq 'pkg_name' and a/value eq '@equinor/my-pkg')"
|
|
55
|
+
*/
|
|
56
|
+
const _command = createCommand('remove')
|
|
57
|
+
.description('Remove documents from the search index by source path or OData filter')
|
|
58
|
+
.addOption(createOption('--dry-run', 'Preview matching documents without deleting them').default(false))
|
|
59
|
+
.addOption(createOption('--filter <expression>', 'Raw OData filter expression for selecting documents to delete'))
|
|
60
|
+
.argument('[source-paths...]', 'Relative file paths whose indexed chunks should be removed')
|
|
61
|
+
.action(async (sources, commandOptions) => {
|
|
62
|
+
const options = await DeleteOptionsSchema.parseAsync(commandOptions);
|
|
63
|
+
const filterExpression = buildFilter(sources, options.filter);
|
|
64
|
+
if (!filterExpression) {
|
|
65
|
+
throw new Error('Nothing to delete. Provide source file paths as arguments or pass a --filter expression.');
|
|
66
|
+
}
|
|
67
|
+
if (sources.length > 0) {
|
|
68
|
+
console.log(`\nTargeting ${sources.length} source path(s):\n`);
|
|
69
|
+
for (const src of sources.sort()) {
|
|
70
|
+
console.log(` ${src}`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
console.log(`\nFilter: ${filterExpression}`);
|
|
75
|
+
}
|
|
76
|
+
if (options.dryRun) {
|
|
77
|
+
console.log('\n🔍 Dry run — no documents were deleted.');
|
|
78
|
+
console.log(` Would apply filter: ${filterExpression}`);
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
const framework = await setupFramework(options);
|
|
82
|
+
const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
|
|
83
|
+
await vectorStoreService.deleteDocuments({
|
|
84
|
+
filter: { filterExpression },
|
|
85
|
+
});
|
|
86
|
+
console.log(`\n✅ Deleted chunks matching filter.`);
|
|
87
|
+
});
|
|
88
|
+
/**
|
|
89
|
+
* Configured Commander command for the `ai index remove` subcommand.
|
|
90
|
+
*
|
|
91
|
+
* This constant is the fully-configured {@link Command} instance with all
|
|
92
|
+
* AI-specific options (embedding deployment, Azure Search credentials) applied
|
|
93
|
+
* via `withAiOptions`. It is registered with the CLI automatically by
|
|
94
|
+
* {@link registerAiPlugin}.
|
|
95
|
+
*/
|
|
96
|
+
export const deleteCommand = withAiOptions(_command, {
|
|
97
|
+
includeEmbedding: true,
|
|
98
|
+
includeSearch: true,
|
|
99
|
+
});
|
|
100
|
+
//# sourceMappingURL=delete-command.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"delete-command.js","sourceRoot":"","sources":["../../src/delete-command.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAExD,OAAO,EAAE,cAAc,EAAE,MAAM,8CAA8C,CAAC;AAC9E,OAAO,EAAE,WAAW,IAAI,aAAa,EAAE,MAAM,8DAA8D,CAAC;AAE5G,OAAO,EAAE,mBAAmB,EAAsB,MAAM,6BAA6B,CAAC;AAEtF;;;;;;;;;;;GAWG;AACH,SAAS,WAAW,CAAC,OAAiB,EAAE,SAAkB;IACxD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,uBAAuB,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACtE,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AACH,MAAM,QAAQ,GAAG,aAAa,CAAC,QAAQ,CAAC;KACrC,WAAW,CAAC,uEAAuE,CAAC;KACpF,SAAS,CACR,YAAY,CAAC,WAAW,EAAE,kDAAkD,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAC7F;KACA,SAAS,CACR,YAAY,CACV,uBAAuB,EACvB,+DAA+D,CAChE,CACF;KACA,QAAQ,CAAC,mBAAmB,EAAE,4DAA4D,CAAC;KAC3F,MAAM,CAAC,KAAK,EAAE,OAAiB,EAAE,cAA6B,EAAE,EAAE;IACjE,MAAM,OAAO,GAAG,MAAM,mBAAmB,CAAC,UAAU,CAAC,cAAc,CAAC,CAAC;IACrE,MAAM,gBAAgB,GAAG,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IAE9D,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CACb,0FAA0F,CAC3F,CAAC;IACJ,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,MAAM,oBAAoB,CAAC,CAAC;QAC/D,KAAK,MAAM,GAAG,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;YACjC,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,aAAa,gBAAgB,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QACzD,OAAO,CAAC,GAAG,CAAC,yBAAyB,gBAAgB,EAAE,CAAC,CAAC;QACzD,OAAO;IACT,CAAC;IAED,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,kBAAkB,GAAG,SAAS,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC;IAC3F,MAAM,kBAAkB,CAAC,eAAe,CAAC;QACvC,MAAM,EAAE,EAAE,gBAAgB,EAAE;KAC7B,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;AACrD,CAAC,CAAC,CAAC;AAEL;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,aAAa,GAAG,aAAa,CAAC,QAAQ,EAAE;IACnD,gBAAgB,EAAE,IAAI;IACtB,aAAa,EAAE,IAAI;CACpB,CAAC,CAAC"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { AiOptionsSchema } from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
|
|
3
|
+
/**
|
|
4
|
+
* Zod schema for validating options of the `ai index remove` command.
|
|
5
|
+
*
|
|
6
|
+
* Extends the base AI options schema ({@link AiOptionsSchema}) to require
|
|
7
|
+
* Azure Search credentials and the embedding deployment (needed to initialise
|
|
8
|
+
* the vector store service for document removal).
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```ts
|
|
12
|
+
* const validated = await DeleteOptionsSchema.parseAsync(rawOptions);
|
|
13
|
+
* // validated.dryRun, validated.filter, validated.azureSearchEndpoint, etc.
|
|
14
|
+
* ```
|
|
15
|
+
*/
|
|
16
|
+
export const DeleteOptionsSchema = AiOptionsSchema.extend({
|
|
17
|
+
openaiEmbeddingDeployment: z
|
|
18
|
+
.string({ message: 'Embedding deployment name is required to initialise the vector store.' })
|
|
19
|
+
.min(1, 'Embedding deployment name must be a non-empty string.')
|
|
20
|
+
.describe('Azure OpenAI embedding deployment name'),
|
|
21
|
+
azureSearchEndpoint: z
|
|
22
|
+
.string({ message: 'Azure Search endpoint is required for deletion.' })
|
|
23
|
+
.url('Azure Search endpoint must be a valid URL.')
|
|
24
|
+
.min(1, 'Azure Search endpoint must be a non-empty string.')
|
|
25
|
+
.describe('Azure Search endpoint URL'),
|
|
26
|
+
azureSearchApiKey: z
|
|
27
|
+
.string({ message: 'Azure Search API key is required for deletion.' })
|
|
28
|
+
.min(1, 'Azure Search API key must be a non-empty string.')
|
|
29
|
+
.describe('Azure Search API key'),
|
|
30
|
+
azureSearchIndexName: z
|
|
31
|
+
.string({ message: 'Azure Search index name is required for deletion.' })
|
|
32
|
+
.min(1, 'Azure Search index name must be a non-empty string.')
|
|
33
|
+
.describe('Azure Search index name'),
|
|
34
|
+
dryRun: z
|
|
35
|
+
.boolean({ message: 'dryRun must be a boolean value.' })
|
|
36
|
+
.describe('Preview what would be deleted without making changes'),
|
|
37
|
+
filter: z
|
|
38
|
+
.string()
|
|
39
|
+
.min(1, 'Filter expression must be a non-empty string.')
|
|
40
|
+
.optional()
|
|
41
|
+
.describe('Raw OData filter expression for selecting documents to delete'),
|
|
42
|
+
}).describe('Command options for the delete command');
|
|
43
|
+
//# sourceMappingURL=delete-command.options.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"delete-command.options.js","sourceRoot":"","sources":["../../src/delete-command.options.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,eAAe,EAAE,MAAM,8DAA8D,CAAC;AAE/F;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG,eAAe,CAAC,MAAM,CAAC;IACxD,yBAAyB,EAAE,CAAC;SACzB,MAAM,CAAC,EAAE,OAAO,EAAE,uEAAuE,EAAE,CAAC;SAC5F,GAAG,CAAC,CAAC,EAAE,uDAAuD,CAAC;SAC/D,QAAQ,CAAC,wCAAwC,CAAC;IACrD,mBAAmB,EAAE,CAAC;SACnB,MAAM,CAAC,EAAE,OAAO,EAAE,iDAAiD,EAAE,CAAC;SACtE,GAAG,CAAC,4CAA4C,CAAC;SACjD,GAAG,CAAC,CAAC,EAAE,mDAAmD,CAAC;SAC3D,QAAQ,CAAC,2BAA2B,CAAC;IACxC,iBAAiB,EAAE,CAAC;SACjB,MAAM,CAAC,EAAE,OAAO,EAAE,gDAAgD,EAAE,CAAC;SACrE,GAAG,CAAC,CAAC,EAAE,kDAAkD,CAAC;SAC1D,QAAQ,CAAC,sBAAsB,CAAC;IACnC,oBAAoB,EAAE,CAAC;SACpB,MAAM,CAAC,EAAE,OAAO,EAAE,mDAAmD,EAAE,CAAC;SACxE,GAAG,CAAC,CAAC,EAAE,qDAAqD,CAAC;SAC7D,QAAQ,CAAC,yBAAyB,CAAC;IACtC,MAAM,EAAE,CAAC;SACN,OAAO,CAAC,EAAE,OAAO,EAAE,iCAAiC,EAAE,CAAC;SACvD,QAAQ,CAAC,sDAAsD,CAAC;IACnE,MAAM,EAAE,CAAC;SACN,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,EAAE,+CAA+C,CAAC;SACvD,QAAQ,EAAE;SACV,QAAQ,CAAC,+DAA+D,CAAC;CAC7E,CAAC,CAAC,QAAQ,CAAC,wCAAwC,CAAC,CAAC"}
|