@equinor/fusion-framework-cli-plugin-ai-index 1.0.6 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +74 -2
- package/README.md +105 -69
- package/dist/esm/bin/embed.js +28 -13
- package/dist/esm/bin/embed.js.map +1 -1
- package/dist/esm/delete-command.js +100 -0
- package/dist/esm/delete-command.js.map +1 -0
- package/dist/esm/delete-command.options.js +43 -0
- package/dist/esm/delete-command.options.js.map +1 -0
- package/dist/esm/{command.js → embeddings-command.js} +42 -28
- package/dist/esm/embeddings-command.js.map +1 -0
- package/dist/esm/{command.options.js → embeddings-command.options.js} +14 -7
- package/dist/esm/embeddings-command.options.js.map +1 -0
- package/dist/esm/index.js +37 -4
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/search-command.js +198 -0
- package/dist/esm/search-command.js.map +1 -0
- package/dist/esm/utils/generate-chunk-id.js +17 -5
- package/dist/esm/utils/generate-chunk-id.js.map +1 -1
- package/dist/esm/utils/git/file-changes.js +26 -11
- package/dist/esm/utils/git/file-changes.js.map +1 -1
- package/dist/esm/utils/git/git-client.js +16 -7
- package/dist/esm/utils/git/git-client.js.map +1 -1
- package/dist/esm/utils/git/metadata.js +7 -3
- package/dist/esm/utils/git/metadata.js.map +1 -1
- package/dist/esm/utils/git/status.js +9 -3
- package/dist/esm/utils/git/status.js.map +1 -1
- package/dist/esm/utils/markdown/parser.js +53 -13
- package/dist/esm/utils/markdown/parser.js.map +1 -1
- package/dist/esm/utils/package-resolver.js +10 -6
- package/dist/esm/utils/package-resolver.js.map +1 -1
- package/dist/esm/utils/ts-doc/constants.js +4 -1
- package/dist/esm/utils/ts-doc/constants.js.map +1 -1
- package/dist/esm/utils/ts-doc/extractors.js +27 -13
- package/dist/esm/utils/ts-doc/extractors.js.map +1 -1
- package/dist/esm/utils/ts-doc/parser.js +19 -10
- package/dist/esm/utils/ts-doc/parser.js.map +1 -1
- package/dist/esm/version.js +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types/config.d.ts +51 -10
- package/dist/types/delete-command.d.ts +9 -0
- package/dist/types/delete-command.options.d.ts +32 -0
- package/dist/types/embeddings-command.d.ts +11 -0
- package/dist/types/embeddings-command.options.d.ts +40 -0
- package/dist/types/index.d.ts +19 -2
- package/dist/types/search-command.d.ts +8 -0
- package/dist/types/utils/generate-chunk-id.d.ts +17 -5
- package/dist/types/utils/git/file-changes.d.ts +26 -11
- package/dist/types/utils/git/git-client.d.ts +16 -7
- package/dist/types/utils/git/metadata.d.ts +7 -3
- package/dist/types/utils/git/status.d.ts +9 -3
- package/dist/types/utils/git/types.d.ts +15 -9
- package/dist/types/utils/markdown/parser.d.ts +23 -10
- package/dist/types/utils/markdown/types.d.ts +13 -2
- package/dist/types/utils/package-resolver.d.ts +8 -5
- package/dist/types/utils/ts-doc/constants.d.ts +4 -1
- package/dist/types/utils/ts-doc/extractors.d.ts +27 -13
- package/dist/types/utils/ts-doc/parser.d.ts +19 -10
- package/dist/types/utils/ts-doc/types.d.ts +12 -4
- package/dist/types/utils/types.d.ts +10 -6
- package/dist/types/version.d.ts +1 -1
- package/package.json +12 -10
- package/src/bin/delete-removed-files.ts +1 -1
- package/src/bin/embed.ts +47 -18
- package/src/bin/file-stream.ts +1 -1
- package/src/bin/get-diff.ts +1 -1
- package/src/bin/types.ts +1 -1
- package/src/config.ts +52 -10
- package/src/delete-command.options.ts +51 -0
- package/src/delete-command.ts +117 -0
- package/src/{command.options.ts → embeddings-command.options.ts} +16 -9
- package/src/{command.ts → embeddings-command.ts} +46 -28
- package/src/index.ts +38 -4
- package/src/search-command.ts +259 -0
- package/src/utils/generate-chunk-id.ts +17 -5
- package/src/utils/git/file-changes.ts +26 -11
- package/src/utils/git/git-client.ts +16 -7
- package/src/utils/git/metadata.ts +7 -3
- package/src/utils/git/status.ts +9 -3
- package/src/utils/git/types.ts +15 -9
- package/src/utils/markdown/parser.ts +54 -13
- package/src/utils/markdown/types.ts +13 -2
- package/src/utils/package-resolver.ts +10 -6
- package/src/utils/ts-doc/constants.ts +4 -1
- package/src/utils/ts-doc/extractors.ts +27 -13
- package/src/utils/ts-doc/parser.ts +19 -10
- package/src/utils/ts-doc/types.ts +12 -4
- package/src/utils/types.ts +10 -6
- package/src/version.ts +1 -1
- package/dist/esm/command.js.map +0 -1
- package/dist/esm/command.options.js.map +0 -1
- package/dist/types/command.d.ts +0 -2
- package/dist/types/command.options.d.ts +0 -33
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
import { createCommand, createOption } from 'commander';
|
|
1
|
+
import { type Command, createCommand, createOption } from 'commander';
|
|
2
2
|
|
|
3
3
|
import { loadFusionAIConfig, setupFramework } from '@equinor/fusion-framework-cli-plugin-ai-base';
|
|
4
4
|
import { withOptions as withAiOptions } from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
|
|
5
5
|
|
|
6
6
|
import { embed } from './bin/embed.js';
|
|
7
|
-
import { CommandOptionsSchema, type CommandOptions } from './command.options.js';
|
|
7
|
+
import { CommandOptionsSchema, type CommandOptions } from './embeddings-command.options.js';
|
|
8
8
|
import type { FusionAIConfigWithIndex } from './config.js';
|
|
9
9
|
|
|
10
10
|
/**
|
|
11
|
-
* CLI command: `ai
|
|
11
|
+
* CLI command: `ai index add`
|
|
12
12
|
*
|
|
13
|
-
*
|
|
13
|
+
* Add documents to the AI search index via embedding generation.
|
|
14
14
|
*
|
|
15
15
|
* Features:
|
|
16
16
|
* - Markdown/MDX document chunking with frontmatter extraction
|
|
@@ -21,7 +21,7 @@ import type { FusionAIConfigWithIndex } from './config.js';
|
|
|
21
21
|
* - Configurable file patterns via fusion-ai.config.ts
|
|
22
22
|
*
|
|
23
23
|
* Usage:
|
|
24
|
-
* $ ffc ai
|
|
24
|
+
* $ ffc ai index add [options] [glob-patterns...]
|
|
25
25
|
*
|
|
26
26
|
* Arguments:
|
|
27
27
|
* glob-patterns Glob patterns to match files (optional when using --diff)
|
|
@@ -34,24 +34,15 @@ import type { FusionAIConfigWithIndex } from './config.js';
|
|
|
34
34
|
* --base-ref <ref> Git reference to compare against (default: HEAD~1)
|
|
35
35
|
* --clean Delete all existing documents from the vector store before processing
|
|
36
36
|
*
|
|
37
|
-
* AI Options (required):
|
|
38
|
-
* --openai-api-key <key> Azure OpenAI API key (or AZURE_OPENAI_API_KEY env var)
|
|
39
|
-
* --openai-api-version <version> Azure OpenAI API version (default: 2024-02-15-preview)
|
|
40
|
-
* --openai-instance <name> Azure OpenAI instance name (or AZURE_OPENAI_INSTANCE_NAME env var)
|
|
41
|
-
* --openai-embedding-deployment <name> Azure OpenAI embedding deployment name (or AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME env var)
|
|
42
|
-
* --azure-search-endpoint <url> Azure Search endpoint URL (or AZURE_SEARCH_ENDPOINT env var)
|
|
43
|
-
* --azure-search-api-key <key> Azure Search API key (or AZURE_SEARCH_API_KEY env var)
|
|
44
|
-
* --azure-search-index-name <name> Azure Search index name (or AZURE_SEARCH_INDEX_NAME env var)
|
|
45
|
-
*
|
|
46
37
|
* Examples:
|
|
47
|
-
* $ ffc ai
|
|
48
|
-
* $ ffc ai
|
|
49
|
-
* $ ffc ai
|
|
50
|
-
* $ ffc ai
|
|
51
|
-
* $ ffc ai
|
|
38
|
+
* $ ffc ai index add --dry-run ./src
|
|
39
|
+
* $ ffc ai index add "*.ts" "*.md" "*.mdx"
|
|
40
|
+
* $ ffc ai index add --diff
|
|
41
|
+
* $ ffc ai index add --diff --base-ref origin/main
|
|
42
|
+
* $ ffc ai index add --clean "*.ts"
|
|
52
43
|
*/
|
|
53
|
-
const _command = createCommand('
|
|
54
|
-
.description('
|
|
44
|
+
const _command = createCommand('add')
|
|
45
|
+
.description('Add documents to the AI search index via embedding generation')
|
|
55
46
|
.addOption(
|
|
56
47
|
createOption('--dry-run', 'Show what would be processed without actually doing it').default(
|
|
57
48
|
false,
|
|
@@ -67,16 +58,35 @@ const _command = createCommand('embeddings')
|
|
|
67
58
|
).default(false),
|
|
68
59
|
)
|
|
69
60
|
.argument('[glob-patterns...]', 'Glob patterns to match files (optional when using --diff)')
|
|
70
|
-
.action(async (patterns: string[], commandOptions: CommandOptions)
|
|
71
|
-
|
|
61
|
+
.action(async function (this: Command, patterns: string[], commandOptions: CommandOptions) {
|
|
62
|
+
// Load configuration before validation so config values can fill gaps
|
|
63
|
+
const preOptions = commandOptions as Record<string, unknown>;
|
|
64
|
+
const config = await loadFusionAIConfig<FusionAIConfigWithIndex>(
|
|
65
|
+
(preOptions.config as string) ?? 'fusion-ai.config',
|
|
66
|
+
{ baseDir: process.cwd() },
|
|
67
|
+
);
|
|
68
|
+
const indexConfig = config.index ?? {};
|
|
72
69
|
|
|
73
|
-
//
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
70
|
+
// Config file values override env-var defaults but not explicit CLI flags.
|
|
71
|
+
// Commander merges env vars before the action runs, so we use
|
|
72
|
+
// getOptionValueSource to distinguish "user passed --flag" from "came from env".
|
|
73
|
+
const parentCommand = this.parent ?? this;
|
|
74
|
+
if (indexConfig.name) {
|
|
75
|
+
const source = parentCommand.getOptionValueSource('azureSearchIndexName');
|
|
76
|
+
if (source !== 'cli') {
|
|
77
|
+
preOptions.azureSearchIndexName = indexConfig.name;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
if (indexConfig.model) {
|
|
81
|
+
const source = parentCommand.getOptionValueSource('openaiEmbeddingDeployment');
|
|
82
|
+
if (source !== 'cli') {
|
|
83
|
+
preOptions.openaiEmbeddingDeployment = indexConfig.model;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const options = await CommandOptionsSchema.parseAsync(preOptions);
|
|
77
88
|
|
|
78
89
|
// CLI args take precedence over config patterns
|
|
79
|
-
const indexConfig = config.index ?? {};
|
|
80
90
|
const allowedFilePatterns = indexConfig.patterns ?? ['**/*.ts', '**/*.md', '**/*.mdx'];
|
|
81
91
|
const filePatterns = patterns.length ? patterns : allowedFilePatterns;
|
|
82
92
|
|
|
@@ -92,6 +102,14 @@ const _command = createCommand('embeddings')
|
|
|
92
102
|
});
|
|
93
103
|
});
|
|
94
104
|
|
|
105
|
+
/**
|
|
106
|
+
* Configured Commander command for the `ai index add` subcommand.
|
|
107
|
+
*
|
|
108
|
+
* This constant is the fully-configured {@link Command} instance with all
|
|
109
|
+
* AI-specific options (embedding deployment, Azure Search credentials) applied
|
|
110
|
+
* via `withAiOptions`. It is registered with the CLI automatically by
|
|
111
|
+
* {@link registerAiPlugin}.
|
|
112
|
+
*/
|
|
95
113
|
export const command = withAiOptions(_command, {
|
|
96
114
|
includeEmbedding: true,
|
|
97
115
|
includeSearch: true,
|
package/src/index.ts
CHANGED
|
@@ -1,15 +1,49 @@
|
|
|
1
1
|
import type { Command } from 'commander';
|
|
2
|
+
import { createCommand } from 'commander';
|
|
2
3
|
import { registerAiPlugin as registerAiPluginBase } from '@equinor/fusion-framework-cli-plugin-ai-base';
|
|
3
|
-
import { command as
|
|
4
|
+
import { command as addCommand } from './embeddings-command.js';
|
|
5
|
+
import { deleteCommand as removeCommand } from './delete-command.js';
|
|
6
|
+
import { searchCommand } from './search-command.js';
|
|
4
7
|
|
|
5
8
|
export { FusionAIConfigWithIndex, IndexConfig } from './config.js';
|
|
6
9
|
|
|
7
10
|
/**
|
|
8
|
-
*
|
|
9
|
-
*
|
|
11
|
+
* Parent command for the `ai index` group.
|
|
12
|
+
*
|
|
13
|
+
* Owns three subcommands:
|
|
14
|
+
* - `add` — index documents into the Azure AI Search vector store.
|
|
15
|
+
* - `remove` — remove documents from the vector store.
|
|
16
|
+
* - `search` — query the vector store for indexed documents.
|
|
17
|
+
*/
|
|
18
|
+
const indexCommand = createCommand('index')
|
|
19
|
+
.description('Manage the AI search index (add, search, remove)')
|
|
20
|
+
.addCommand(addCommand)
|
|
21
|
+
.addCommand(removeCommand)
|
|
22
|
+
.addCommand(searchCommand);
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Registers the `ai index` command with the Fusion Framework CLI.
|
|
26
|
+
*
|
|
27
|
+
* Adds a single `index` command under `ai` with subcommands for indexing,
|
|
28
|
+
* searching, and removing documents in the Azure AI Search vector store.
|
|
29
|
+
*
|
|
30
|
+
* @param program - The root Commander {@link Command} instance to attach to.
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```ts
|
|
34
|
+
* import { Command } from 'commander';
|
|
35
|
+
* import { registerAiPlugin } from '@equinor/fusion-framework-cli-plugin-ai-index';
|
|
36
|
+
*
|
|
37
|
+
* const program = new Command();
|
|
38
|
+
* registerAiPlugin(program);
|
|
39
|
+
* program.parse();
|
|
40
|
+
* // ffc ai index add [glob-patterns...]
|
|
41
|
+
* // ffc ai index search <query>
|
|
42
|
+
* // ffc ai index remove [source-paths...]
|
|
43
|
+
* ```
|
|
10
44
|
*/
|
|
11
45
|
export function registerAiPlugin(program: Command): void {
|
|
12
|
-
registerAiPluginBase(program,
|
|
46
|
+
registerAiPluginBase(program, indexCommand);
|
|
13
47
|
}
|
|
14
48
|
|
|
15
49
|
export default registerAiPlugin;
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import { createCommand, createOption } from 'commander';
|
|
2
|
+
import type { Document } from '@langchain/core/documents';
|
|
3
|
+
import { inspect } from 'node:util';
|
|
4
|
+
|
|
5
|
+
import { setupFramework } from '@equinor/fusion-framework-cli-plugin-ai-base';
|
|
6
|
+
import {
|
|
7
|
+
withOptions as withAiOptions,
|
|
8
|
+
type AiOptions,
|
|
9
|
+
} from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
|
|
10
|
+
import type { RetrieverOptions } from '@equinor/fusion-framework-module-ai/lib';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Resolved option values for the `ai index search` CLI command.
|
|
14
|
+
*
|
|
15
|
+
* Extends {@link AiOptions} with search-specific flags such as result limits,
|
|
16
|
+
* output format toggles, filter expressions, and search-type selection.
|
|
17
|
+
*/
|
|
18
|
+
type CommandOptions = AiOptions & {
|
|
19
|
+
/** Maximum number of search results to return (default `10`). */
|
|
20
|
+
limit: number;
|
|
21
|
+
/** When `true`, print diagnostic details such as index name and metadata. */
|
|
22
|
+
verbose: boolean;
|
|
23
|
+
/** Optional OData filter expression applied to document metadata before ranking. */
|
|
24
|
+
filter?: string;
|
|
25
|
+
/** When `true`, emit results as JSON objects instead of human-readable text. */
|
|
26
|
+
json: boolean;
|
|
27
|
+
/** When `true`, output the raw Azure Search metadata without flattening attributes. */
|
|
28
|
+
raw: boolean;
|
|
29
|
+
/** Search algorithm: `'similarity'` for cosine similarity or `'mmr'` for Maximum Marginal Relevance diversity re-ranking. */
|
|
30
|
+
searchType: 'mmr' | 'similarity';
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Flatten Azure Cognitive Search metadata attributes into a plain object.
|
|
35
|
+
*
|
|
36
|
+
* Azure Search stores custom metadata as an array of `{ key, value }` pairs
|
|
37
|
+
* under an `attributes` property. This helper converts that array into a flat
|
|
38
|
+
* key-value map so consumers can access attributes directly
|
|
39
|
+
* (e.g. `metadata.source` instead of iterating the attributes array).
|
|
40
|
+
*
|
|
41
|
+
* JSON-encoded attribute values are transparently parsed; plain strings are
|
|
42
|
+
* kept as-is.
|
|
43
|
+
*
|
|
44
|
+
* @param metadata - Raw metadata record from an Azure Search document.
|
|
45
|
+
* @returns A shallow copy of `metadata` with the `attributes` array replaced by
|
|
46
|
+
* its flattened key-value entries.
|
|
47
|
+
*/
|
|
48
|
+
const normalizeMetadata = (metadata: Record<string, unknown>): Record<string, unknown> => {
|
|
49
|
+
const normalized = { ...metadata };
|
|
50
|
+
|
|
51
|
+
if (Array.isArray(normalized.attributes)) {
|
|
52
|
+
const attributesObj: Record<string, unknown> = {};
|
|
53
|
+
for (const attr of normalized.attributes) {
|
|
54
|
+
if (
|
|
55
|
+
typeof attr === 'object' &&
|
|
56
|
+
attr !== null &&
|
|
57
|
+
'key' in attr &&
|
|
58
|
+
'value' in attr &&
|
|
59
|
+
typeof attr.key === 'string'
|
|
60
|
+
) {
|
|
61
|
+
try {
|
|
62
|
+
attributesObj[attr.key] = JSON.parse(attr.value as string);
|
|
63
|
+
} catch {
|
|
64
|
+
attributesObj[attr.key] = attr.value;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
Object.assign(normalized, attributesObj);
|
|
69
|
+
delete normalized.attributes;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return normalized;
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Commander subcommand: **`ai index search`**
|
|
77
|
+
*
|
|
78
|
+
* Performs semantic vector-store search against an Azure Cognitive Search index
|
|
79
|
+
* and displays the matching documents. Use this command to validate that
|
|
80
|
+
* embeddings are indexed correctly, to explore the retrieval corpus, or to
|
|
81
|
+
* test OData filter expressions.
|
|
82
|
+
*
|
|
83
|
+
* Supports two search algorithms:
|
|
84
|
+
* - **`similarity`** (default) — pure cosine-similarity ranking.
|
|
85
|
+
* - **`mmr`** — Maximum Marginal Relevance, which re-ranks results to increase
|
|
86
|
+
* diversity while staying relevant.
|
|
87
|
+
*
|
|
88
|
+
* Results can be output as human-readable text (default) or as JSON objects
|
|
89
|
+
* (`--json`). The `--raw` flag preserves Azure Search's native metadata
|
|
90
|
+
* structure; without it, metadata attributes are flattened by
|
|
91
|
+
* {@link normalizeMetadata}.
|
|
92
|
+
*
|
|
93
|
+
* @example
|
|
94
|
+
* ```sh
|
|
95
|
+
* # Basic similarity search
|
|
96
|
+
* ffc ai index search "how to configure modules"
|
|
97
|
+
*
|
|
98
|
+
* # Limit results and use MMR for diversity
|
|
99
|
+
* ffc ai index search "authentication" --limit 5 --search-type mmr
|
|
100
|
+
*
|
|
101
|
+
* # Filter by package name
|
|
102
|
+
* ffc ai index search "hooks" --filter "metadata/attributes/any(a: a/key eq 'pkg_name' and a/value eq '@equinor/fusion-framework-react')" --json
|
|
103
|
+
*
|
|
104
|
+
* # Verbose output with raw Azure metadata
|
|
105
|
+
* ffc ai index search "API reference" --verbose --raw
|
|
106
|
+
* ```
|
|
107
|
+
*/
|
|
108
|
+
const _command = createCommand('search')
|
|
109
|
+
.description('Search the vector store to validate embeddings and retrieve relevant documents')
|
|
110
|
+
.addOption(
|
|
111
|
+
createOption('--limit <number>', 'Maximum number of results to return')
|
|
112
|
+
.default(10)
|
|
113
|
+
.argParser(parseInt),
|
|
114
|
+
)
|
|
115
|
+
.addOption(
|
|
116
|
+
createOption('--search-type <type>', 'Search type: mmr or similarity')
|
|
117
|
+
.choices(['mmr', 'similarity'])
|
|
118
|
+
.default('similarity'),
|
|
119
|
+
)
|
|
120
|
+
.addOption(
|
|
121
|
+
createOption('--filter <expression>', 'OData filter expression for metadata filtering'),
|
|
122
|
+
)
|
|
123
|
+
.addOption(createOption('--json', 'Output results as JSON').default(false))
|
|
124
|
+
.addOption(createOption('--raw', 'Output raw metadata without normalization').default(false))
|
|
125
|
+
.addOption(createOption('--verbose', 'Enable verbose output').default(false))
|
|
126
|
+
.argument('<query>', 'Search query string')
|
|
127
|
+
.action(async (query: string, options: CommandOptions) => {
|
|
128
|
+
if (options.verbose) {
|
|
129
|
+
console.log('🔍 Initializing framework...');
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const framework = await setupFramework(options);
|
|
133
|
+
|
|
134
|
+
if (!options.azureSearchIndexName) {
|
|
135
|
+
throw new Error('Azure Search index name is required');
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (options.verbose) {
|
|
139
|
+
console.log('✅ Framework initialized successfully');
|
|
140
|
+
console.log(`📇 Index: ${options.azureSearchIndexName}`);
|
|
141
|
+
console.log(`🔎 Searching for: "${query}"`);
|
|
142
|
+
console.log(`📊 Limit: ${options.limit}`);
|
|
143
|
+
console.log(`🔍 Search type: ${options.searchType}`);
|
|
144
|
+
if (options.filter) {
|
|
145
|
+
console.log(`🔧 Filter: ${options.filter}`);
|
|
146
|
+
}
|
|
147
|
+
console.log('');
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
|
|
151
|
+
|
|
152
|
+
try {
|
|
153
|
+
const filter = options.filter ? { filterExpression: options.filter } : undefined;
|
|
154
|
+
|
|
155
|
+
const retrieverOptions: RetrieverOptions =
|
|
156
|
+
options.searchType === 'mmr'
|
|
157
|
+
? {
|
|
158
|
+
k: options.limit,
|
|
159
|
+
searchType: 'mmr',
|
|
160
|
+
...(filter && { filter: filter as Record<string, unknown> }),
|
|
161
|
+
}
|
|
162
|
+
: {
|
|
163
|
+
k: options.limit,
|
|
164
|
+
searchType: 'similarity',
|
|
165
|
+
...(filter && { filter: filter as Record<string, unknown> }),
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
const retriever = vectorStoreService.asRetriever(retrieverOptions);
|
|
169
|
+
const results = await retriever.invoke(query);
|
|
170
|
+
|
|
171
|
+
if (!results || !Array.isArray(results)) {
|
|
172
|
+
throw new Error(
|
|
173
|
+
`Invalid search results: expected array but got ${results === null ? 'null' : typeof results}`,
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (options.json) {
|
|
178
|
+
for (const doc of results) {
|
|
179
|
+
if (options.raw) {
|
|
180
|
+
console.log(inspect(doc, { depth: null, colors: true }));
|
|
181
|
+
} else {
|
|
182
|
+
const metadata = normalizeMetadata(doc.metadata as Record<string, unknown>);
|
|
183
|
+
console.log({
|
|
184
|
+
content: doc.pageContent,
|
|
185
|
+
metadata,
|
|
186
|
+
score: (metadata as { score?: number })?.score,
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
} else {
|
|
191
|
+
if (results.length === 0) {
|
|
192
|
+
console.log('❌ No results found');
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
console.log(`✅ Found ${results.length} result${results.length !== 1 ? 's' : ''}:\n`);
|
|
197
|
+
|
|
198
|
+
results.forEach((doc: Document, index: number) => {
|
|
199
|
+
const processedMetadata = options.raw
|
|
200
|
+
? (doc.metadata as Record<string, unknown>)
|
|
201
|
+
: normalizeMetadata(doc.metadata as Record<string, unknown>);
|
|
202
|
+
const metadata = processedMetadata as {
|
|
203
|
+
source?: string;
|
|
204
|
+
score?: number;
|
|
205
|
+
[key: string]: unknown;
|
|
206
|
+
};
|
|
207
|
+
const score = metadata.score;
|
|
208
|
+
const source = metadata.source || 'Unknown source';
|
|
209
|
+
|
|
210
|
+
console.log(`${'─'.repeat(80)}`);
|
|
211
|
+
console.log(
|
|
212
|
+
`Result ${index + 1}${score !== undefined ? ` (Score: ${score.toFixed(4)})` : ''}`,
|
|
213
|
+
);
|
|
214
|
+
console.log(`Source: ${source}`);
|
|
215
|
+
|
|
216
|
+
if (options.verbose) {
|
|
217
|
+
const { source: _, score: __, ...otherMetadata } = metadata;
|
|
218
|
+
if (Object.keys(otherMetadata).length > 0) {
|
|
219
|
+
console.log(`Metadata:`, JSON.stringify(otherMetadata, null, 2));
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
console.log('');
|
|
223
|
+
|
|
224
|
+
const content = doc.pageContent;
|
|
225
|
+
const maxLength = 500;
|
|
226
|
+
if (content.length > maxLength) {
|
|
227
|
+
console.log(`${content.substring(0, maxLength)}...`);
|
|
228
|
+
console.log(`\n[Content truncated - ${content.length} characters total]`);
|
|
229
|
+
} else {
|
|
230
|
+
console.log(content);
|
|
231
|
+
}
|
|
232
|
+
console.log('');
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
console.log(`${'─'.repeat(80)}`);
|
|
236
|
+
}
|
|
237
|
+
} catch (error) {
|
|
238
|
+
console.error(
|
|
239
|
+
`❌ Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
|
|
240
|
+
);
|
|
241
|
+
if (options.verbose && error instanceof Error && error.stack) {
|
|
242
|
+
console.error(error.stack);
|
|
243
|
+
}
|
|
244
|
+
process.exit(1);
|
|
245
|
+
}
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Configured Commander command for the `ai index search` subcommand.
|
|
250
|
+
*
|
|
251
|
+
* Fully-configured {@link Command} instance with all AI-specific options
|
|
252
|
+
* (embedding deployment, Azure Search credentials) applied via `withAiOptions`.
|
|
253
|
+
*/
|
|
254
|
+
export const searchCommand = withAiOptions(_command, {
|
|
255
|
+
includeEmbedding: true,
|
|
256
|
+
includeSearch: true,
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
export default searchCommand;
|
|
@@ -1,9 +1,21 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Generates a
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
2
|
+
* Generates a deterministic, URL-safe identifier for a document chunk.
|
|
3
|
+
*
|
|
4
|
+
* The identifier is a Base64-encoded hash of the file path with all
|
|
5
|
+
* non-alphanumeric characters stripped, making it safe for use as an
|
|
6
|
+
* Azure AI Search document key.
|
|
7
|
+
*
|
|
8
|
+
* @param filePath - The relative file path to hash.
|
|
9
|
+
* @param chunkIndex - Optional zero-based chunk index appended to distinguish
|
|
10
|
+
* multiple chunks originating from the same file.
|
|
11
|
+
* @returns A stable, alphanumeric document ID string.
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```ts
|
|
15
|
+
* generateChunkId('packages/cli/src/index.ts'); // 'cGFja2FnZXMvY2xpL3NyYy9pbmRleC50cw'
|
|
16
|
+
* generateChunkId('packages/cli/src/index.ts', 0); // 'cGFja2FnZXMvY2xpL3NyYy9pbmRleC50cw-0'
|
|
17
|
+
* generateChunkId('packages/cli/src/index.ts', 3); // 'cGFja2FnZXMvY2xpL3NyYy9pbmRleC50cw-3'
|
|
18
|
+
* ```
|
|
7
19
|
*/
|
|
8
20
|
export const generateChunkId = (filePath: string, chunkIndex?: number): string => {
|
|
9
21
|
// Convert file path to base64 and remove non-alphanumeric characters
|
|
@@ -3,9 +3,15 @@ import type { ChangedFile, FileChangeStatus, GitDiffOptions } from './types.js';
|
|
|
3
3
|
import { resolveProjectRoot, getGit } from './git-client.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
6
|
+
* Returns a list of files changed between `baseRef` and HEAD.
|
|
7
|
+
*
|
|
8
|
+
* Parses the output of `git diff --name-status` to classify each file as
|
|
9
|
+
* `'new'`, `'modified'`, or `'removed'`. Renames are expanded into a
|
|
10
|
+
* `'removed'` entry for the old path and a `'new'` entry for the new path.
|
|
11
|
+
*
|
|
12
|
+
* @param options - Configuration controlling the diff reference and working directory.
|
|
13
|
+
* @returns Array of changed files with their status.
|
|
14
|
+
* @throws {Error} If the working directory is not inside a git repository.
|
|
9
15
|
*/
|
|
10
16
|
export const getChangedFiles = async (options: GitDiffOptions): Promise<ChangedFile[]> => {
|
|
11
17
|
const { diff, baseRef = 'HEAD~1', cwd = process.cwd() } = options;
|
|
@@ -78,10 +84,15 @@ export const getChangedFiles = async (options: GitDiffOptions): Promise<ChangedF
|
|
|
78
84
|
};
|
|
79
85
|
|
|
80
86
|
/**
|
|
81
|
-
*
|
|
82
|
-
*
|
|
83
|
-
*
|
|
84
|
-
*
|
|
87
|
+
* Determines the git change status of a single file.
|
|
88
|
+
*
|
|
89
|
+
* Checks tracked status, porcelain output, and rename/copy detection to
|
|
90
|
+
* produce one or two {@link ChangedFile} entries (two when a rename is
|
|
91
|
+
* detected — one `'removed'` for the old path and one `'new'` for the
|
|
92
|
+
* current path).
|
|
93
|
+
*
|
|
94
|
+
* @param filePath - Absolute path to the file to inspect.
|
|
95
|
+
* @returns Array with one or two changed-file entries.
|
|
85
96
|
*/
|
|
86
97
|
export const getFileStatus = async (filePath: string): Promise<ChangedFile[]> => {
|
|
87
98
|
const { git, gitRepoPath } = getGit(filePath) ?? {};
|
|
@@ -199,10 +210,14 @@ export const getFileStatus = async (filePath: string): Promise<ChangedFile[]> =>
|
|
|
199
210
|
};
|
|
200
211
|
|
|
201
212
|
/**
|
|
202
|
-
*
|
|
203
|
-
*
|
|
204
|
-
*
|
|
205
|
-
*
|
|
213
|
+
* Checks whether a file path appears in a list of changed files.
|
|
214
|
+
*
|
|
215
|
+
* When the changed-files list is empty (no diff filtering active), every
|
|
216
|
+
* file is considered changed so that all files are processed.
|
|
217
|
+
*
|
|
218
|
+
* @param filePath - Absolute file path to look up.
|
|
219
|
+
* @param changedFiles - Array of {@link ChangedFile} entries to search.
|
|
220
|
+
* @returns `true` if the file has changed or if diff filtering is disabled.
|
|
206
221
|
*/
|
|
207
222
|
export const isFileChanged = (filePath: string, changedFiles: ChangedFile[]): boolean => {
|
|
208
223
|
if (changedFiles.length === 0) {
|
|
@@ -6,9 +6,13 @@ import { existsSync } from 'node:fs';
|
|
|
6
6
|
const gitCache = new Map<string, SimpleGit>();
|
|
7
7
|
|
|
8
8
|
/**
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
9
|
+
* Resolves the git repository root for a given file path.
|
|
10
|
+
*
|
|
11
|
+
* Walks up the directory tree looking for a `.git` directory or file
|
|
12
|
+
* (to support worktrees) and returns the enclosing directory.
|
|
13
|
+
*
|
|
14
|
+
* @param filePath - Absolute file or directory path to resolve from.
|
|
15
|
+
* @returns Absolute path to the repository root, or `undefined` if not inside a git repo.
|
|
12
16
|
*/
|
|
13
17
|
export const resolveProjectRoot = (filePath: string): string | undefined => {
|
|
14
18
|
// if we are in the root of the git repository, return the root
|
|
@@ -21,10 +25,15 @@ export const resolveProjectRoot = (filePath: string): string | undefined => {
|
|
|
21
25
|
};
|
|
22
26
|
|
|
23
27
|
/**
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
+
* Returns a cached `SimpleGit` instance scoped to the repository that
|
|
29
|
+
* contains `filePath`.
|
|
30
|
+
*
|
|
31
|
+
* Instances are cached by repository root to avoid repeatedly spawning
|
|
32
|
+
* new git processes for the same repo.
|
|
33
|
+
*
|
|
34
|
+
* @param filePath - Absolute file path to locate the repository for.
|
|
35
|
+
* @returns An object containing the git client and the repository root path,
|
|
36
|
+
* or `undefined` when `filePath` is not inside a git repository.
|
|
28
37
|
*/
|
|
29
38
|
export const getGit = (
|
|
30
39
|
filePath: string,
|
|
@@ -24,9 +24,13 @@ const generateGithubPermalink = (
|
|
|
24
24
|
};
|
|
25
25
|
|
|
26
26
|
/**
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
*
|
|
27
|
+
* Extracts git metadata for a single source file.
|
|
28
|
+
*
|
|
29
|
+
* Resolves the latest commit hash, commit date, and a GitHub permalink
|
|
30
|
+
* (when the remote is a GitHub URL) by inspecting `git log` output.
|
|
31
|
+
*
|
|
32
|
+
* @param filePath - Absolute path to the file.
|
|
33
|
+
* @returns Git metadata, or `undefined` if the file is not inside a git repository.
|
|
30
34
|
*/
|
|
31
35
|
export const extractGitMetadata = async (filePath: string): Promise<GitMetadata | undefined> => {
|
|
32
36
|
const { git, gitRepoPath: gitRepoRoot } = getGit(filePath) ?? {};
|
package/src/utils/git/status.ts
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
import { resolveProjectRoot, getGit } from './git-client.js';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Retrieves a summary of the current git working-tree status.
|
|
5
|
+
*
|
|
6
|
+
* Returns the current branch name, abbreviated HEAD commit, and counts of
|
|
7
|
+
* staged / unstaged changes. Useful for informational output in CLI commands.
|
|
8
|
+
*
|
|
9
|
+
* @param cwd - Working directory for git operations. Defaults to `process.cwd()`.
|
|
10
|
+
* @returns An object with branch, commit, and file-change counts.
|
|
11
|
+
* @throws {Error} If the working directory is not inside a git repository or
|
|
12
|
+
* the git client cannot be initialised.
|
|
7
13
|
*/
|
|
8
14
|
export const getGitStatus = async (
|
|
9
15
|
cwd: string = process.cwd(),
|
package/src/utils/git/types.ts
CHANGED
|
@@ -1,36 +1,42 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Git metadata extracted from repository
|
|
2
|
+
* Git metadata extracted from the repository for a single source file.
|
|
3
|
+
*
|
|
4
|
+
* Attached to vector-store documents as part of `metadata.attributes`.
|
|
3
5
|
*/
|
|
4
6
|
export type GitMetadata = Partial<{
|
|
7
|
+
/** Remote origin URL of the git repository. */
|
|
5
8
|
git_remote_url: string;
|
|
9
|
+
/** Short SHA of the most recent commit that touched the file. */
|
|
6
10
|
git_commit_hash: string;
|
|
11
|
+
/** ISO-8601 date string of the most recent commit that touched the file. */
|
|
7
12
|
git_commit_date: string;
|
|
13
|
+
/** GitHub permalink to the file on the default branch. */
|
|
8
14
|
git_link: string;
|
|
9
15
|
}>;
|
|
10
16
|
|
|
11
17
|
/**
|
|
12
|
-
*
|
|
18
|
+
* Configuration for retrieving changed files via `git diff`.
|
|
13
19
|
*/
|
|
14
20
|
export interface GitDiffOptions {
|
|
15
|
-
/**
|
|
21
|
+
/** When `true`, enable diff-based file filtering. */
|
|
16
22
|
diff: boolean;
|
|
17
|
-
/** Git reference to compare against (
|
|
23
|
+
/** Git reference to compare against (e.g. `'HEAD~1'`, `'origin/main'`). Defaults to `'HEAD~1'`. */
|
|
18
24
|
baseRef?: string;
|
|
19
|
-
/** Working directory for git operations */
|
|
25
|
+
/** Working directory for git operations. Defaults to `process.cwd()`. */
|
|
20
26
|
cwd?: string;
|
|
21
27
|
}
|
|
22
28
|
|
|
23
29
|
/**
|
|
24
|
-
*
|
|
30
|
+
* Possible change statuses reported by git.
|
|
25
31
|
*/
|
|
26
32
|
export type FileChangeStatus = 'new' | 'modified' | 'removed';
|
|
27
33
|
|
|
28
34
|
/**
|
|
29
|
-
*
|
|
35
|
+
* Describes a single file that has changed according to git.
|
|
30
36
|
*/
|
|
31
37
|
export interface ChangedFile {
|
|
32
|
-
/** Absolute file path */
|
|
38
|
+
/** Absolute file-system path to the changed file. */
|
|
33
39
|
filepath: string;
|
|
34
|
-
/**
|
|
40
|
+
/** How the file was changed: added, modified, or deleted. */
|
|
35
41
|
status: FileChangeStatus;
|
|
36
42
|
}
|