@equinor/fusion-framework-cli-plugin-ai-index 1.0.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/CHANGELOG.md +91 -2
  2. package/README.md +105 -69
  3. package/dist/esm/bin/embed.js +28 -13
  4. package/dist/esm/bin/embed.js.map +1 -1
  5. package/dist/esm/delete-command.js +100 -0
  6. package/dist/esm/delete-command.js.map +1 -0
  7. package/dist/esm/delete-command.options.js +43 -0
  8. package/dist/esm/delete-command.options.js.map +1 -0
  9. package/dist/esm/{command.js → embeddings-command.js} +42 -28
  10. package/dist/esm/embeddings-command.js.map +1 -0
  11. package/dist/esm/{command.options.js → embeddings-command.options.js} +14 -7
  12. package/dist/esm/embeddings-command.options.js.map +1 -0
  13. package/dist/esm/index.js +37 -4
  14. package/dist/esm/index.js.map +1 -1
  15. package/dist/esm/search-command.js +198 -0
  16. package/dist/esm/search-command.js.map +1 -0
  17. package/dist/esm/utils/generate-chunk-id.js +17 -5
  18. package/dist/esm/utils/generate-chunk-id.js.map +1 -1
  19. package/dist/esm/utils/git/file-changes.js +26 -11
  20. package/dist/esm/utils/git/file-changes.js.map +1 -1
  21. package/dist/esm/utils/git/git-client.js +16 -7
  22. package/dist/esm/utils/git/git-client.js.map +1 -1
  23. package/dist/esm/utils/git/metadata.js +7 -3
  24. package/dist/esm/utils/git/metadata.js.map +1 -1
  25. package/dist/esm/utils/git/status.js +9 -3
  26. package/dist/esm/utils/git/status.js.map +1 -1
  27. package/dist/esm/utils/markdown/parser.js +53 -13
  28. package/dist/esm/utils/markdown/parser.js.map +1 -1
  29. package/dist/esm/utils/package-resolver.js +10 -6
  30. package/dist/esm/utils/package-resolver.js.map +1 -1
  31. package/dist/esm/utils/ts-doc/constants.js +4 -1
  32. package/dist/esm/utils/ts-doc/constants.js.map +1 -1
  33. package/dist/esm/utils/ts-doc/extractors.js +27 -13
  34. package/dist/esm/utils/ts-doc/extractors.js.map +1 -1
  35. package/dist/esm/utils/ts-doc/parser.js +19 -10
  36. package/dist/esm/utils/ts-doc/parser.js.map +1 -1
  37. package/dist/esm/version.js +1 -1
  38. package/dist/tsconfig.tsbuildinfo +1 -1
  39. package/dist/types/config.d.ts +51 -10
  40. package/dist/types/delete-command.d.ts +9 -0
  41. package/dist/types/delete-command.options.d.ts +32 -0
  42. package/dist/types/embeddings-command.d.ts +11 -0
  43. package/dist/types/embeddings-command.options.d.ts +40 -0
  44. package/dist/types/index.d.ts +19 -2
  45. package/dist/types/search-command.d.ts +8 -0
  46. package/dist/types/utils/generate-chunk-id.d.ts +17 -5
  47. package/dist/types/utils/git/file-changes.d.ts +26 -11
  48. package/dist/types/utils/git/git-client.d.ts +16 -7
  49. package/dist/types/utils/git/metadata.d.ts +7 -3
  50. package/dist/types/utils/git/status.d.ts +9 -3
  51. package/dist/types/utils/git/types.d.ts +15 -9
  52. package/dist/types/utils/markdown/parser.d.ts +23 -10
  53. package/dist/types/utils/markdown/types.d.ts +13 -2
  54. package/dist/types/utils/package-resolver.d.ts +8 -5
  55. package/dist/types/utils/ts-doc/constants.d.ts +4 -1
  56. package/dist/types/utils/ts-doc/extractors.d.ts +27 -13
  57. package/dist/types/utils/ts-doc/parser.d.ts +19 -10
  58. package/dist/types/utils/ts-doc/types.d.ts +12 -4
  59. package/dist/types/utils/types.d.ts +10 -6
  60. package/dist/types/version.d.ts +1 -1
  61. package/package.json +13 -11
  62. package/src/bin/delete-removed-files.ts +1 -1
  63. package/src/bin/embed.ts +47 -18
  64. package/src/bin/file-stream.ts +1 -1
  65. package/src/bin/get-diff.ts +1 -1
  66. package/src/bin/types.ts +1 -1
  67. package/src/config.ts +52 -10
  68. package/src/delete-command.options.ts +51 -0
  69. package/src/delete-command.ts +117 -0
  70. package/src/{command.options.ts → embeddings-command.options.ts} +16 -9
  71. package/src/{command.ts → embeddings-command.ts} +46 -28
  72. package/src/index.ts +38 -4
  73. package/src/search-command.ts +259 -0
  74. package/src/utils/generate-chunk-id.ts +17 -5
  75. package/src/utils/git/file-changes.ts +26 -11
  76. package/src/utils/git/git-client.ts +16 -7
  77. package/src/utils/git/metadata.ts +7 -3
  78. package/src/utils/git/status.ts +9 -3
  79. package/src/utils/git/types.ts +15 -9
  80. package/src/utils/markdown/parser.ts +54 -13
  81. package/src/utils/markdown/types.ts +13 -2
  82. package/src/utils/package-resolver.ts +10 -6
  83. package/src/utils/ts-doc/constants.ts +4 -1
  84. package/src/utils/ts-doc/extractors.ts +27 -13
  85. package/src/utils/ts-doc/parser.ts +19 -10
  86. package/src/utils/ts-doc/types.ts +12 -4
  87. package/src/utils/types.ts +10 -6
  88. package/src/version.ts +1 -1
  89. package/dist/esm/command.js.map +0 -1
  90. package/dist/esm/command.options.js.map +0 -1
  91. package/dist/types/command.d.ts +0 -2
  92. package/dist/types/command.options.d.ts +0 -33
@@ -1,16 +1,16 @@
1
- import { createCommand, createOption } from 'commander';
1
+ import { type Command, createCommand, createOption } from 'commander';
2
2
 
3
3
  import { loadFusionAIConfig, setupFramework } from '@equinor/fusion-framework-cli-plugin-ai-base';
4
4
  import { withOptions as withAiOptions } from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
5
5
 
6
6
  import { embed } from './bin/embed.js';
7
- import { CommandOptionsSchema, type CommandOptions } from './command.options.js';
7
+ import { CommandOptionsSchema, type CommandOptions } from './embeddings-command.options.js';
8
8
  import type { FusionAIConfigWithIndex } from './config.js';
9
9
 
10
10
  /**
11
- * CLI command: `ai embeddings`
11
+ * CLI command: `ai index add`
12
12
  *
13
- * Document embedding utilities for Large Language Model processing.
13
+ * Add documents to the AI search index via embedding generation.
14
14
  *
15
15
  * Features:
16
16
  * - Markdown/MDX document chunking with frontmatter extraction
@@ -21,7 +21,7 @@ import type { FusionAIConfigWithIndex } from './config.js';
21
21
  * - Configurable file patterns via fusion-ai.config.ts
22
22
  *
23
23
  * Usage:
24
- * $ ffc ai embeddings [options] [glob-patterns...]
24
+ * $ ffc ai index add [options] [glob-patterns...]
25
25
  *
26
26
  * Arguments:
27
27
  * glob-patterns Glob patterns to match files (optional when using --diff)
@@ -34,24 +34,15 @@ import type { FusionAIConfigWithIndex } from './config.js';
34
34
  * --base-ref <ref> Git reference to compare against (default: HEAD~1)
35
35
  * --clean Delete all existing documents from the vector store before processing
36
36
  *
37
- * AI Options (required):
38
- * --openai-api-key <key> Azure OpenAI API key (or AZURE_OPENAI_API_KEY env var)
39
- * --openai-api-version <version> Azure OpenAI API version (default: 2024-02-15-preview)
40
- * --openai-instance <name> Azure OpenAI instance name (or AZURE_OPENAI_INSTANCE_NAME env var)
41
- * --openai-embedding-deployment <name> Azure OpenAI embedding deployment name (or AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME env var)
42
- * --azure-search-endpoint <url> Azure Search endpoint URL (or AZURE_SEARCH_ENDPOINT env var)
43
- * --azure-search-api-key <key> Azure Search API key (or AZURE_SEARCH_API_KEY env var)
44
- * --azure-search-index-name <name> Azure Search index name (or AZURE_SEARCH_INDEX_NAME env var)
45
- *
46
37
  * Examples:
47
- * $ ffc ai embeddings --dry-run ./src
48
- * $ ffc ai embeddings "*.ts" "*.md" "*.mdx"
49
- * $ ffc ai embeddings --diff
50
- * $ ffc ai embeddings --diff --base-ref origin/main
51
- * $ ffc ai embeddings --clean "*.ts"
38
+ * $ ffc ai index add --dry-run ./src
39
+ * $ ffc ai index add "*.ts" "*.md" "*.mdx"
40
+ * $ ffc ai index add --diff
41
+ * $ ffc ai index add --diff --base-ref origin/main
42
+ * $ ffc ai index add --clean "*.ts"
52
43
  */
53
- const _command = createCommand('embeddings')
54
- .description('Document embedding utilities for Large Language Model processing')
44
+ const _command = createCommand('add')
45
+ .description('Add documents to the AI search index via embedding generation')
55
46
  .addOption(
56
47
  createOption('--dry-run', 'Show what would be processed without actually doing it').default(
57
48
  false,
@@ -67,16 +58,35 @@ const _command = createCommand('embeddings')
67
58
  ).default(false),
68
59
  )
69
60
  .argument('[glob-patterns...]', 'Glob patterns to match files (optional when using --diff)')
70
- .action(async (patterns: string[], commandOptions: CommandOptions) => {
71
- const options = await CommandOptionsSchema.parseAsync(commandOptions);
61
+ .action(async function (this: Command, patterns: string[], commandOptions: CommandOptions) {
62
+ // Load configuration before validation so config values can fill gaps
63
+ const preOptions = commandOptions as Record<string, unknown>;
64
+ const config = await loadFusionAIConfig<FusionAIConfigWithIndex>(
65
+ (preOptions.config as string) ?? 'fusion-ai.config',
66
+ { baseDir: process.cwd() },
67
+ );
68
+ const indexConfig = config.index ?? {};
72
69
 
73
- // Load configuration
74
- const config = await loadFusionAIConfig<FusionAIConfigWithIndex>(options.config, {
75
- baseDir: process.cwd(),
76
- });
70
+ // Config file values override env-var defaults but not explicit CLI flags.
71
+ // Commander merges env vars before the action runs, so we use
72
+ // getOptionValueSource to distinguish "user passed --flag" from "came from env".
73
+ const parentCommand = this.parent ?? this;
74
+ if (indexConfig.name) {
75
+ const source = parentCommand.getOptionValueSource('azureSearchIndexName');
76
+ if (source !== 'cli') {
77
+ preOptions.azureSearchIndexName = indexConfig.name;
78
+ }
79
+ }
80
+ if (indexConfig.model) {
81
+ const source = parentCommand.getOptionValueSource('openaiEmbeddingDeployment');
82
+ if (source !== 'cli') {
83
+ preOptions.openaiEmbeddingDeployment = indexConfig.model;
84
+ }
85
+ }
86
+
87
+ const options = await CommandOptionsSchema.parseAsync(preOptions);
77
88
 
78
89
  // CLI args take precedence over config patterns
79
- const indexConfig = config.index ?? {};
80
90
  const allowedFilePatterns = indexConfig.patterns ?? ['**/*.ts', '**/*.md', '**/*.mdx'];
81
91
  const filePatterns = patterns.length ? patterns : allowedFilePatterns;
82
92
 
@@ -92,6 +102,14 @@ const _command = createCommand('embeddings')
92
102
  });
93
103
  });
94
104
 
105
+ /**
106
+ * Configured Commander command for the `ai index add` subcommand.
107
+ *
108
+ * This constant is the fully-configured {@link Command} instance with all
109
+ * AI-specific options (embedding deployment, Azure Search credentials) applied
110
+ * via `withAiOptions`. It is registered with the CLI automatically by
111
+ * {@link registerAiPlugin}.
112
+ */
95
113
  export const command = withAiOptions(_command, {
96
114
  includeEmbedding: true,
97
115
  includeSearch: true,
package/src/index.ts CHANGED
@@ -1,15 +1,49 @@
1
1
  import type { Command } from 'commander';
2
+ import { createCommand } from 'commander';
2
3
  import { registerAiPlugin as registerAiPluginBase } from '@equinor/fusion-framework-cli-plugin-ai-base';
3
- import { command as embeddingsCommand } from './command.js';
4
+ import { command as addCommand } from './embeddings-command.js';
5
+ import { deleteCommand as removeCommand } from './delete-command.js';
6
+ import { searchCommand } from './search-command.js';
4
7
 
5
8
  export { FusionAIConfigWithIndex, IndexConfig } from './config.js';
6
9
 
7
10
  /**
8
- * Registers the AI index plugin command with the CLI program
9
- * @param program - The Commander program instance to register commands with
11
+ * Parent command for the `ai index` group.
12
+ *
13
+ * Owns three subcommands:
14
+ * - `add` — index documents into the Azure AI Search vector store.
15
+ * - `remove` — remove documents from the vector store.
16
+ * - `search` — query the vector store for indexed documents.
17
+ */
18
+ const indexCommand = createCommand('index')
19
+ .description('Manage the AI search index (add, search, remove)')
20
+ .addCommand(addCommand)
21
+ .addCommand(removeCommand)
22
+ .addCommand(searchCommand);
23
+
24
+ /**
25
+ * Registers the `ai index` command with the Fusion Framework CLI.
26
+ *
27
+ * Adds a single `index` command under `ai` with subcommands for indexing,
28
+ * searching, and removing documents in the Azure AI Search vector store.
29
+ *
30
+ * @param program - The root Commander {@link Command} instance to attach to.
31
+ *
32
+ * @example
33
+ * ```ts
34
+ * import { Command } from 'commander';
35
+ * import { registerAiPlugin } from '@equinor/fusion-framework-cli-plugin-ai-index';
36
+ *
37
+ * const program = new Command();
38
+ * registerAiPlugin(program);
39
+ * program.parse();
40
+ * // ffc ai index add [glob-patterns...]
41
+ * // ffc ai index search <query>
42
+ * // ffc ai index remove [source-paths...]
43
+ * ```
10
44
  */
11
45
  export function registerAiPlugin(program: Command): void {
12
- registerAiPluginBase(program, embeddingsCommand);
46
+ registerAiPluginBase(program, indexCommand);
13
47
  }
14
48
 
15
49
  export default registerAiPlugin;
@@ -0,0 +1,259 @@
1
+ import { createCommand, createOption } from 'commander';
2
+ import type { Document } from '@langchain/core/documents';
3
+ import { inspect } from 'node:util';
4
+
5
+ import { setupFramework } from '@equinor/fusion-framework-cli-plugin-ai-base';
6
+ import {
7
+ withOptions as withAiOptions,
8
+ type AiOptions,
9
+ } from '@equinor/fusion-framework-cli-plugin-ai-base/command-options';
10
+ import type { RetrieverOptions } from '@equinor/fusion-framework-module-ai/lib';
11
+
12
+ /**
13
+ * Resolved option values for the `ai index search` CLI command.
14
+ *
15
+ * Extends {@link AiOptions} with search-specific flags such as result limits,
16
+ * output format toggles, filter expressions, and search-type selection.
17
+ */
18
+ type CommandOptions = AiOptions & {
19
+ /** Maximum number of search results to return (default `10`). */
20
+ limit: number;
21
+ /** When `true`, print diagnostic details such as index name and metadata. */
22
+ verbose: boolean;
23
+ /** Optional OData filter expression applied to document metadata before ranking. */
24
+ filter?: string;
25
+ /** When `true`, emit results as JSON objects instead of human-readable text. */
26
+ json: boolean;
27
+ /** When `true`, output the raw Azure Search metadata without flattening attributes. */
28
+ raw: boolean;
29
+ /** Search algorithm: `'similarity'` for cosine similarity or `'mmr'` for Maximum Marginal Relevance diversity re-ranking. */
30
+ searchType: 'mmr' | 'similarity';
31
+ };
32
+
33
+ /**
34
+ * Flatten Azure Cognitive Search metadata attributes into a plain object.
35
+ *
36
+ * Azure Search stores custom metadata as an array of `{ key, value }` pairs
37
+ * under an `attributes` property. This helper converts that array into a flat
38
+ * key-value map so consumers can access attributes directly
39
+ * (e.g. `metadata.source` instead of iterating the attributes array).
40
+ *
41
+ * JSON-encoded attribute values are transparently parsed; plain strings are
42
+ * kept as-is.
43
+ *
44
+ * @param metadata - Raw metadata record from an Azure Search document.
45
+ * @returns A shallow copy of `metadata` with the `attributes` array replaced by
46
+ * its flattened key-value entries.
47
+ */
48
+ const normalizeMetadata = (metadata: Record<string, unknown>): Record<string, unknown> => {
49
+ const normalized = { ...metadata };
50
+
51
+ if (Array.isArray(normalized.attributes)) {
52
+ const attributesObj: Record<string, unknown> = {};
53
+ for (const attr of normalized.attributes) {
54
+ if (
55
+ typeof attr === 'object' &&
56
+ attr !== null &&
57
+ 'key' in attr &&
58
+ 'value' in attr &&
59
+ typeof attr.key === 'string'
60
+ ) {
61
+ try {
62
+ attributesObj[attr.key] = JSON.parse(attr.value as string);
63
+ } catch {
64
+ attributesObj[attr.key] = attr.value;
65
+ }
66
+ }
67
+ }
68
+ Object.assign(normalized, attributesObj);
69
+ delete normalized.attributes;
70
+ }
71
+
72
+ return normalized;
73
+ };
74
+
75
+ /**
76
+ * Commander subcommand: **`ai index search`**
77
+ *
78
+ * Performs semantic vector-store search against an Azure Cognitive Search index
79
+ * and displays the matching documents. Use this command to validate that
80
+ * embeddings are indexed correctly, to explore the retrieval corpus, or to
81
+ * test OData filter expressions.
82
+ *
83
+ * Supports two search algorithms:
84
+ * - **`similarity`** (default) — pure cosine-similarity ranking.
85
+ * - **`mmr`** — Maximum Marginal Relevance, which re-ranks results to increase
86
+ * diversity while staying relevant.
87
+ *
88
+ * Results can be output as human-readable text (default) or as JSON objects
89
+ * (`--json`). The `--raw` flag preserves Azure Search's native metadata
90
+ * structure; without it, metadata attributes are flattened by
91
+ * {@link normalizeMetadata}.
92
+ *
93
+ * @example
94
+ * ```sh
95
+ * # Basic similarity search
96
+ * ffc ai index search "how to configure modules"
97
+ *
98
+ * # Limit results and use MMR for diversity
99
+ * ffc ai index search "authentication" --limit 5 --search-type mmr
100
+ *
101
+ * # Filter by package name
102
+ * ffc ai index search "hooks" --filter "metadata/attributes/any(a: a/key eq 'pkg_name' and a/value eq '@equinor/fusion-framework-react')" --json
103
+ *
104
+ * # Verbose output with raw Azure metadata
105
+ * ffc ai index search "API reference" --verbose --raw
106
+ * ```
107
+ */
108
+ const _command = createCommand('search')
109
+ .description('Search the vector store to validate embeddings and retrieve relevant documents')
110
+ .addOption(
111
+ createOption('--limit <number>', 'Maximum number of results to return')
112
+ .default(10)
113
+ .argParser(parseInt),
114
+ )
115
+ .addOption(
116
+ createOption('--search-type <type>', 'Search type: mmr or similarity')
117
+ .choices(['mmr', 'similarity'])
118
+ .default('similarity'),
119
+ )
120
+ .addOption(
121
+ createOption('--filter <expression>', 'OData filter expression for metadata filtering'),
122
+ )
123
+ .addOption(createOption('--json', 'Output results as JSON').default(false))
124
+ .addOption(createOption('--raw', 'Output raw metadata without normalization').default(false))
125
+ .addOption(createOption('--verbose', 'Enable verbose output').default(false))
126
+ .argument('<query>', 'Search query string')
127
+ .action(async (query: string, options: CommandOptions) => {
128
+ if (options.verbose) {
129
+ console.log('🔍 Initializing framework...');
130
+ }
131
+
132
+ const framework = await setupFramework(options);
133
+
134
+ if (!options.azureSearchIndexName) {
135
+ throw new Error('Azure Search index name is required');
136
+ }
137
+
138
+ if (options.verbose) {
139
+ console.log('✅ Framework initialized successfully');
140
+ console.log(`📇 Index: ${options.azureSearchIndexName}`);
141
+ console.log(`🔎 Searching for: "${query}"`);
142
+ console.log(`📊 Limit: ${options.limit}`);
143
+ console.log(`🔍 Search type: ${options.searchType}`);
144
+ if (options.filter) {
145
+ console.log(`🔧 Filter: ${options.filter}`);
146
+ }
147
+ console.log('');
148
+ }
149
+
150
+ const vectorStoreService = framework.ai.getService('search', options.azureSearchIndexName);
151
+
152
+ try {
153
+ const filter = options.filter ? { filterExpression: options.filter } : undefined;
154
+
155
+ const retrieverOptions: RetrieverOptions =
156
+ options.searchType === 'mmr'
157
+ ? {
158
+ k: options.limit,
159
+ searchType: 'mmr',
160
+ ...(filter && { filter: filter as Record<string, unknown> }),
161
+ }
162
+ : {
163
+ k: options.limit,
164
+ searchType: 'similarity',
165
+ ...(filter && { filter: filter as Record<string, unknown> }),
166
+ };
167
+
168
+ const retriever = vectorStoreService.asRetriever(retrieverOptions);
169
+ const results = await retriever.invoke(query);
170
+
171
+ if (!results || !Array.isArray(results)) {
172
+ throw new Error(
173
+ `Invalid search results: expected array but got ${results === null ? 'null' : typeof results}`,
174
+ );
175
+ }
176
+
177
+ if (options.json) {
178
+ for (const doc of results) {
179
+ if (options.raw) {
180
+ console.log(inspect(doc, { depth: null, colors: true }));
181
+ } else {
182
+ const metadata = normalizeMetadata(doc.metadata as Record<string, unknown>);
183
+ console.log({
184
+ content: doc.pageContent,
185
+ metadata,
186
+ score: (metadata as { score?: number })?.score,
187
+ });
188
+ }
189
+ }
190
+ } else {
191
+ if (results.length === 0) {
192
+ console.log('❌ No results found');
193
+ return;
194
+ }
195
+
196
+ console.log(`✅ Found ${results.length} result${results.length !== 1 ? 's' : ''}:\n`);
197
+
198
+ results.forEach((doc: Document, index: number) => {
199
+ const processedMetadata = options.raw
200
+ ? (doc.metadata as Record<string, unknown>)
201
+ : normalizeMetadata(doc.metadata as Record<string, unknown>);
202
+ const metadata = processedMetadata as {
203
+ source?: string;
204
+ score?: number;
205
+ [key: string]: unknown;
206
+ };
207
+ const score = metadata.score;
208
+ const source = metadata.source || 'Unknown source';
209
+
210
+ console.log(`${'─'.repeat(80)}`);
211
+ console.log(
212
+ `Result ${index + 1}${score !== undefined ? ` (Score: ${score.toFixed(4)})` : ''}`,
213
+ );
214
+ console.log(`Source: ${source}`);
215
+
216
+ if (options.verbose) {
217
+ const { source: _, score: __, ...otherMetadata } = metadata;
218
+ if (Object.keys(otherMetadata).length > 0) {
219
+ console.log(`Metadata:`, JSON.stringify(otherMetadata, null, 2));
220
+ }
221
+ }
222
+ console.log('');
223
+
224
+ const content = doc.pageContent;
225
+ const maxLength = 500;
226
+ if (content.length > maxLength) {
227
+ console.log(`${content.substring(0, maxLength)}...`);
228
+ console.log(`\n[Content truncated - ${content.length} characters total]`);
229
+ } else {
230
+ console.log(content);
231
+ }
232
+ console.log('');
233
+ });
234
+
235
+ console.log(`${'─'.repeat(80)}`);
236
+ }
237
+ } catch (error) {
238
+ console.error(
239
+ `❌ Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
240
+ );
241
+ if (options.verbose && error instanceof Error && error.stack) {
242
+ console.error(error.stack);
243
+ }
244
+ process.exit(1);
245
+ }
246
+ });
247
+
248
+ /**
249
+ * Configured Commander command for the `ai index search` subcommand.
250
+ *
251
+ * Fully-configured {@link Command} instance with all AI-specific options
252
+ * (embedding deployment, Azure Search credentials) applied via `withAiOptions`.
253
+ */
254
+ export const searchCommand = withAiOptions(_command, {
255
+ includeEmbedding: true,
256
+ includeSearch: true,
257
+ });
258
+
259
+ export default searchCommand;
@@ -1,9 +1,21 @@
1
1
  /**
2
- * Generates a unique identifier for a document chunk based on file path
3
- * Creates a deterministic, URL-safe hash from the file path for validation and checks
4
- * @param filePath - The file path to generate an ID from
5
- * @param chunkIndex - Optional chunk index to append for multi-chunk documents
6
- * @returns A base64-encoded hash of the file path, optionally suffixed with chunk index
2
+ * Generates a deterministic, URL-safe identifier for a document chunk.
3
+ *
4
+ * The identifier is a Base64-encoded hash of the file path with all
5
+ * non-alphanumeric characters stripped, making it safe for use as an
6
+ * Azure AI Search document key.
7
+ *
8
+ * @param filePath - The relative file path to hash.
9
+ * @param chunkIndex - Optional zero-based chunk index appended to distinguish
10
+ * multiple chunks originating from the same file.
11
+ * @returns A stable, alphanumeric document ID string.
12
+ *
13
+ * @example
14
+ * ```ts
15
+ * generateChunkId('packages/cli/src/index.ts'); // 'cGFja2FnZXMvY2xpL3NyYy9pbmRleC50cw'
16
+ * generateChunkId('packages/cli/src/index.ts', 0); // 'cGFja2FnZXMvY2xpL3NyYy9pbmRleC50cw-0'
17
+ * generateChunkId('packages/cli/src/index.ts', 3); // 'cGFja2FnZXMvY2xpL3NyYy9pbmRleC50cw-3'
18
+ * ```
7
19
  */
8
20
  export const generateChunkId = (filePath: string, chunkIndex?: number): string => {
9
21
  // Convert file path to base64 and remove non-alphanumeric characters
@@ -3,9 +3,15 @@ import type { ChangedFile, FileChangeStatus, GitDiffOptions } from './types.js';
3
3
  import { resolveProjectRoot, getGit } from './git-client.js';
4
4
 
5
5
  /**
6
- * Get list of changed files using git diff with status
7
- * @param options - Git diff configuration options
8
- * @returns Array of changed files with their status
6
+ * Returns a list of files changed between `baseRef` and HEAD.
7
+ *
8
+ * Parses the output of `git diff --name-status` to classify each file as
9
+ * `'new'`, `'modified'`, or `'removed'`. Renames are expanded into a
10
+ * `'removed'` entry for the old path and a `'new'` entry for the new path.
11
+ *
12
+ * @param options - Configuration controlling the diff reference and working directory.
13
+ * @returns Array of changed files with their status.
14
+ * @throws {Error} If the working directory is not inside a git repository.
9
15
  */
10
16
  export const getChangedFiles = async (options: GitDiffOptions): Promise<ChangedFile[]> => {
11
17
  const { diff, baseRef = 'HEAD~1', cwd = process.cwd() } = options;
@@ -78,10 +84,15 @@ export const getChangedFiles = async (options: GitDiffOptions): Promise<ChangedF
78
84
  };
79
85
 
80
86
  /**
81
- * Determine the git status of a file, including handling renames
82
- * Returns an array of ChangedFile objects - if the file was renamed, returns both old and new paths
83
- * @param filePath - Absolute file path to check
84
- * @returns Promise resolving to array of changed files (1 or 2 items if renamed)
87
+ * Determines the git change status of a single file.
88
+ *
89
+ * Checks tracked status, porcelain output, and rename/copy detection to
90
+ * produce one or two {@link ChangedFile} entries (two when a rename is
91
+ * detected — one `'removed'` for the old path and one `'new'` for the
92
+ * current path).
93
+ *
94
+ * @param filePath - Absolute path to the file to inspect.
95
+ * @returns Array with one or two changed-file entries.
85
96
  */
86
97
  export const getFileStatus = async (filePath: string): Promise<ChangedFile[]> => {
87
98
  const { git, gitRepoPath } = getGit(filePath) ?? {};
@@ -199,10 +210,14 @@ export const getFileStatus = async (filePath: string): Promise<ChangedFile[]> =>
199
210
  };
200
211
 
201
212
  /**
202
- * Check if a file path matches any of the changed files
203
- * @param filePath - File path to check
204
- * @param changedFiles - Array of changed file objects
205
- * @returns True if file has changed
213
+ * Checks whether a file path appears in a list of changed files.
214
+ *
215
+ * When the changed-files list is empty (no diff filtering active), every
216
+ * file is considered changed so that all files are processed.
217
+ *
218
+ * @param filePath - Absolute file path to look up.
219
+ * @param changedFiles - Array of {@link ChangedFile} entries to search.
220
+ * @returns `true` if the file has changed or if diff filtering is disabled.
206
221
  */
207
222
  export const isFileChanged = (filePath: string, changedFiles: ChangedFile[]): boolean => {
208
223
  if (changedFiles.length === 0) {
@@ -6,9 +6,13 @@ import { existsSync } from 'node:fs';
6
6
  const gitCache = new Map<string, SimpleGit>();
7
7
 
8
8
  /**
9
- * Resolve the project root (git repository root) for a given file path
10
- * @param filePath - File path to resolve from
11
- * @returns Project root path or undefined if not in a git repository
9
+ * Resolves the git repository root for a given file path.
10
+ *
11
+ * Walks up the directory tree looking for a `.git` directory or file
12
+ * (to support worktrees) and returns the enclosing directory.
13
+ *
14
+ * @param filePath - Absolute file or directory path to resolve from.
15
+ * @returns Absolute path to the repository root, or `undefined` if not inside a git repo.
12
16
  */
13
17
  export const resolveProjectRoot = (filePath: string): string | undefined => {
14
18
  // if we are in the root of the git repository, return the root
@@ -21,10 +25,15 @@ export const resolveProjectRoot = (filePath: string): string | undefined => {
21
25
  };
22
26
 
23
27
  /**
24
- * Get or create a SimpleGit instance for a given file path
25
- * Uses caching to avoid creating multiple instances for the same repository
26
- * @param filePath - File path to get git instance for
27
- * @returns Git instance and repository path, or undefined if not in a git repository
28
+ * Returns a cached `SimpleGit` instance scoped to the repository that
29
+ * contains `filePath`.
30
+ *
31
+ * Instances are cached by repository root to avoid repeatedly spawning
32
+ * new git processes for the same repo.
33
+ *
34
+ * @param filePath - Absolute file path to locate the repository for.
35
+ * @returns An object containing the git client and the repository root path,
36
+ * or `undefined` when `filePath` is not inside a git repository.
28
37
  */
29
38
  export const getGit = (
30
39
  filePath: string,
@@ -24,9 +24,13 @@ const generateGithubPermalink = (
24
24
  };
25
25
 
26
26
  /**
27
- * Extract git metadata for a file
28
- * @param filePath - Absolute file path
29
- * @returns Git metadata or undefined if not in a git repository
27
+ * Extracts git metadata for a single source file.
28
+ *
29
+ * Resolves the latest commit hash, commit date, and a GitHub permalink
30
+ * (when the remote is a GitHub URL) by inspecting `git log` output.
31
+ *
32
+ * @param filePath - Absolute path to the file.
33
+ * @returns Git metadata, or `undefined` if the file is not inside a git repository.
30
34
  */
31
35
  export const extractGitMetadata = async (filePath: string): Promise<GitMetadata | undefined> => {
32
36
  const { git, gitRepoPath: gitRepoRoot } = getGit(filePath) ?? {};
@@ -1,9 +1,15 @@
1
1
  import { resolveProjectRoot, getGit } from './git-client.js';
2
2
 
3
3
  /**
4
- * Get git status information for debugging
5
- * @param cwd - Working directory
6
- * @returns Git status information
4
+ * Retrieves a summary of the current git working-tree status.
5
+ *
6
+ * Returns the current branch name, abbreviated HEAD commit, and counts of
7
+ * staged / unstaged changes. Useful for informational output in CLI commands.
8
+ *
9
+ * @param cwd - Working directory for git operations. Defaults to `process.cwd()`.
10
+ * @returns An object with branch, commit, and file-change counts.
11
+ * @throws {Error} If the working directory is not inside a git repository or
12
+ * the git client cannot be initialised.
7
13
  */
8
14
  export const getGitStatus = async (
9
15
  cwd: string = process.cwd(),
@@ -1,36 +1,42 @@
1
1
  /**
2
- * Git metadata extracted from repository
2
+ * Git metadata extracted from the repository for a single source file.
3
+ *
4
+ * Attached to vector-store documents as part of `metadata.attributes`.
3
5
  */
4
6
  export type GitMetadata = Partial<{
7
+ /** Remote origin URL of the git repository. */
5
8
  git_remote_url: string;
9
+ /** Short SHA of the most recent commit that touched the file. */
6
10
  git_commit_hash: string;
11
+ /** ISO-8601 date string of the most recent commit that touched the file. */
7
12
  git_commit_date: string;
13
+ /** GitHub permalink to the file on the default branch. */
8
14
  git_link: string;
9
15
  }>;
10
16
 
11
17
  /**
12
- * Git diff options for filtering changed files
18
+ * Configuration for retrieving changed files via `git diff`.
13
19
  */
14
20
  export interface GitDiffOptions {
15
- /** Enable diff-based file filtering */
21
+ /** When `true`, enable diff-based file filtering. */
16
22
  diff: boolean;
17
- /** Git reference to compare against (default: HEAD~1) */
23
+ /** Git reference to compare against (e.g. `'HEAD~1'`, `'origin/main'`). Defaults to `'HEAD~1'`. */
18
24
  baseRef?: string;
19
- /** Working directory for git operations */
25
+ /** Working directory for git operations. Defaults to `process.cwd()`. */
20
26
  cwd?: string;
21
27
  }
22
28
 
23
29
  /**
24
- * File change status
30
+ * Possible change statuses reported by git.
25
31
  */
26
32
  export type FileChangeStatus = 'new' | 'modified' | 'removed';
27
33
 
28
34
  /**
29
- * Changed file information
35
+ * Describes a single file that has changed according to git.
30
36
  */
31
37
  export interface ChangedFile {
32
- /** Absolute file path */
38
+ /** Absolute file-system path to the changed file. */
33
39
  filepath: string;
34
- /** Change status: new, modified, or removed */
40
+ /** How the file was changed: added, modified, or deleted. */
35
41
  status: FileChangeStatus;
36
42
  }