@equinor/fusion-framework-cli-plugin-ai-index 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +64 -0
- package/dist/esm/bin/apply-metadata.js +15 -5
- package/dist/esm/bin/apply-metadata.js.map +1 -1
- package/dist/esm/bin/apply-schema.js +64 -0
- package/dist/esm/bin/apply-schema.js.map +1 -0
- package/dist/esm/bin/apply-schema.test.js +143 -0
- package/dist/esm/bin/apply-schema.test.js.map +1 -0
- package/dist/esm/bin/delete-removed-files.js +1 -1
- package/dist/esm/bin/delete-removed-files.js.map +1 -1
- package/dist/esm/bin/embed.js +188 -47
- package/dist/esm/bin/embed.js.map +1 -1
- package/dist/esm/create-command.js +186 -0
- package/dist/esm/create-command.js.map +1 -0
- package/dist/esm/delete-command.js +14 -2
- package/dist/esm/delete-command.js.map +1 -1
- package/dist/esm/delete-command.options.js +7 -31
- package/dist/esm/delete-command.options.js.map +1 -1
- package/dist/esm/delete-index-command.js +94 -0
- package/dist/esm/delete-index-command.js.map +1 -0
- package/dist/esm/embed-command.js +30 -0
- package/dist/esm/embed-command.js.map +1 -0
- package/dist/esm/embeddings-command.js +14 -17
- package/dist/esm/embeddings-command.js.map +1 -1
- package/dist/esm/embeddings-command.options.js +12 -43
- package/dist/esm/embeddings-command.options.js.map +1 -1
- package/dist/esm/index.js +12 -3
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/schema.js +41 -0
- package/dist/esm/schema.js.map +1 -0
- package/dist/esm/search-command.js +17 -5
- package/dist/esm/search-command.js.map +1 -1
- package/dist/esm/utils/embedding-dimensions.js +37 -0
- package/dist/esm/utils/embedding-dimensions.js.map +1 -0
- package/dist/esm/utils/zod-to-azure-fields.js +120 -0
- package/dist/esm/utils/zod-to-azure-fields.js.map +1 -0
- package/dist/esm/utils/zod-to-azure-fields.test.js +112 -0
- package/dist/esm/utils/zod-to-azure-fields.test.js.map +1 -0
- package/dist/esm/version.js +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types/bin/apply-metadata.d.ts +2 -1
- package/dist/types/bin/apply-schema.d.ts +22 -0
- package/dist/types/bin/apply-schema.test.d.ts +1 -0
- package/dist/types/config.d.ts +14 -0
- package/dist/types/create-command.d.ts +6 -0
- package/dist/types/delete-command.options.d.ts +9 -23
- package/dist/types/delete-index-command.d.ts +6 -0
- package/dist/types/embed-command.d.ts +12 -0
- package/dist/types/embeddings-command.options.d.ts +9 -28
- package/dist/types/index.d.ts +1 -0
- package/dist/types/schema.d.ts +137 -0
- package/dist/types/utils/embedding-dimensions.d.ts +13 -0
- package/dist/types/utils/zod-to-azure-fields.d.ts +61 -0
- package/dist/types/utils/zod-to-azure-fields.test.d.ts +1 -0
- package/dist/types/version.d.ts +1 -1
- package/package.json +7 -7
- package/src/bin/apply-metadata.ts +20 -4
- package/src/bin/apply-schema.test.ts +170 -0
- package/src/bin/apply-schema.ts +86 -0
- package/src/bin/delete-removed-files.ts +1 -1
- package/src/bin/embed.ts +248 -76
- package/src/config.ts +15 -0
- package/src/create-command.ts +218 -0
- package/src/delete-command.options.ts +7 -37
- package/src/delete-command.ts +19 -2
- package/src/delete-index-command.ts +121 -0
- package/src/embed-command.ts +44 -0
- package/src/embeddings-command.options.ts +12 -50
- package/src/embeddings-command.ts +18 -18
- package/src/index.ts +12 -3
- package/src/schema.ts +149 -0
- package/src/search-command.ts +22 -5
- package/src/utils/embedding-dimensions.ts +39 -0
- package/src/utils/zod-to-azure-fields.test.ts +136 -0
- package/src/utils/zod-to-azure-fields.ts +177 -0
- package/src/version.ts +1 -1
|
@@ -1 +1,2 @@
|
|
|
1
|
-
|
|
1
|
+
/** Callback invoked after each document is enriched with metadata. */
|
|
2
|
+
export type MetadataProgressCallback = (source: string) => void;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { Observable } from 'rxjs';
|
|
2
|
+
import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
|
|
3
|
+
import type { IndexSchemaConfig } from '../schema.js';
|
|
4
|
+
/**
|
|
5
|
+
* Creates an RxJS operator that resolves promoted schema fields for each
|
|
6
|
+
* document and separates them from the generic `attributes` bag.
|
|
7
|
+
*
|
|
8
|
+
* For each document in the batch:
|
|
9
|
+
* 1. Runs the optional `prepareAttributes` callback to enrich attributes
|
|
10
|
+
* with type-safe access to schema-declared fields
|
|
11
|
+
* 2. Calls the schema resolver to compute promoted field values
|
|
12
|
+
* 3. Validates the resolved values against the Zod shape
|
|
13
|
+
* 4. Stores promoted fields on `metadata.schemaFields`
|
|
14
|
+
* 5. Removes promoted keys from `metadata.attributes` to avoid duplication
|
|
15
|
+
*
|
|
16
|
+
* When no schema is configured, the stream passes through unchanged.
|
|
17
|
+
*
|
|
18
|
+
* @param document$ - Stream of document batches from the metadata enrichment step.
|
|
19
|
+
* @param schema - The index schema config, if defined. When `undefined`, documents pass through unchanged.
|
|
20
|
+
* @returns Stream of document batches with promoted fields resolved and stored.
|
|
21
|
+
*/
|
|
22
|
+
export declare function applySchema(document$: Observable<VectorStoreDocument[]>, schema: IndexSchemaConfig | undefined): Observable<VectorStoreDocument[]>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/types/config.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
|
|
2
2
|
import type { FusionAIConfig } from '@equinor/fusion-framework-cli-plugin-ai-base';
|
|
3
|
+
import type { IndexSchemaConfig } from './schema.js';
|
|
3
4
|
/**
|
|
4
5
|
* Index-specific configuration for Fusion AI document indexing operations.
|
|
5
6
|
*
|
|
@@ -49,7 +50,20 @@ export interface IndexConfig {
|
|
|
49
50
|
chunkSize?: number;
|
|
50
51
|
/** Number of overlapping tokens between consecutive chunks. */
|
|
51
52
|
chunkOverlap?: number;
|
|
53
|
+
/** Explicit vector dimensions for custom embedding models not in the known model map. */
|
|
54
|
+
dimensions?: number;
|
|
52
55
|
};
|
|
56
|
+
/**
|
|
57
|
+
* Custom index schema that promotes frequently-filtered metadata to
|
|
58
|
+
* top-level Azure AI Search fields.
|
|
59
|
+
*
|
|
60
|
+
* When defined, the schema resolver runs after metadata enrichment and
|
|
61
|
+
* places resolved values as top-level document fields in Azure Search,
|
|
62
|
+
* enabling direct OData filters without the `any()` operator.
|
|
63
|
+
*
|
|
64
|
+
* @see {@link IndexSchemaConfig} for details and examples.
|
|
65
|
+
*/
|
|
66
|
+
schema?: IndexSchemaConfig;
|
|
53
67
|
}
|
|
54
68
|
/**
|
|
55
69
|
* Fusion AI configuration extended with {@link IndexConfig | index-specific settings}.
|
|
@@ -1,32 +1,18 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
/**
|
|
3
|
-
* Zod schema for
|
|
3
|
+
* Zod schema for the `ai index remove` command.
|
|
4
4
|
*
|
|
5
|
-
* Extends the base AI options schema
|
|
6
|
-
* Azure Search credentials and the embedding deployment (needed to initialise
|
|
7
|
-
* the vector store service for document removal).
|
|
8
|
-
*
|
|
9
|
-
* @example
|
|
10
|
-
* ```ts
|
|
11
|
-
* const validated = await DeleteOptionsSchema.parseAsync(rawOptions);
|
|
12
|
-
* // validated.dryRun, validated.filter, validated.azureSearchEndpoint, etc.
|
|
13
|
-
* ```
|
|
5
|
+
* Extends the base AI options schema making `indexName` required.
|
|
14
6
|
*/
|
|
15
7
|
export declare const DeleteOptionsSchema: z.ZodObject<{
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
azureSearchIndexName: z.ZodString;
|
|
8
|
+
env: z.ZodOptional<z.ZodString>;
|
|
9
|
+
token: z.ZodOptional<z.ZodString>;
|
|
10
|
+
tenantId: z.ZodOptional<z.ZodString>;
|
|
11
|
+
clientId: z.ZodOptional<z.ZodString>;
|
|
12
|
+
chatModel: z.ZodOptional<z.ZodString>;
|
|
13
|
+
embedModel: z.ZodOptional<z.ZodString>;
|
|
14
|
+
indexName: z.ZodString;
|
|
24
15
|
dryRun: z.ZodBoolean;
|
|
25
16
|
filter: z.ZodOptional<z.ZodString>;
|
|
26
17
|
}, z.core.$strip>;
|
|
27
|
-
/**
|
|
28
|
-
* Validated options for the `ai index remove` command.
|
|
29
|
-
*
|
|
30
|
-
* Inferred from {@link DeleteOptionsSchema}.
|
|
31
|
-
*/
|
|
32
18
|
export type DeleteOptions = z.infer<typeof DeleteOptionsSchema>;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI command: `ai index embed <text>`
|
|
3
|
+
*
|
|
4
|
+
* Embeds a single text string and prints the resulting vector.
|
|
5
|
+
* Useful for verifying the embeddings endpoint and model are reachable.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```sh
|
|
9
|
+
* ffc ai index embed "hello world"
|
|
10
|
+
* ```
|
|
11
|
+
*/
|
|
12
|
+
export declare const embedCommand: import("commander").Command;
|
|
@@ -1,40 +1,21 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
/**
|
|
3
|
-
* Zod schema for
|
|
3
|
+
* Zod schema for the `ai index add` command.
|
|
4
4
|
*
|
|
5
|
-
* Extends the base AI options schema
|
|
6
|
-
* add-specific options such as `--dry-run`, `--diff`, `--config`,
|
|
7
|
-
* `--base-ref`, and `--clean`.
|
|
8
|
-
*
|
|
9
|
-
* Azure Search and embedding options that are optional in the base schema
|
|
10
|
-
* become **required** because the add command always writes to a
|
|
11
|
-
* vector store.
|
|
12
|
-
*
|
|
13
|
-
* @example
|
|
14
|
-
* ```ts
|
|
15
|
-
* const validated = await CommandOptionsSchema.parseAsync(rawOptions);
|
|
16
|
-
* // validated.dryRun, validated.azureSearchEndpoint, etc.
|
|
17
|
-
* ```
|
|
5
|
+
* Extends the base AI options schema making `embedModel` and `indexName` required.
|
|
18
6
|
*/
|
|
19
7
|
export declare const CommandOptionsSchema: z.ZodObject<{
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
azureSearchIndexName: z.ZodString;
|
|
8
|
+
env: z.ZodOptional<z.ZodString>;
|
|
9
|
+
token: z.ZodOptional<z.ZodString>;
|
|
10
|
+
tenantId: z.ZodOptional<z.ZodString>;
|
|
11
|
+
clientId: z.ZodOptional<z.ZodString>;
|
|
12
|
+
chatModel: z.ZodOptional<z.ZodString>;
|
|
13
|
+
embedModel: z.ZodString;
|
|
14
|
+
indexName: z.ZodString;
|
|
28
15
|
dryRun: z.ZodBoolean;
|
|
29
16
|
config: z.ZodString;
|
|
30
17
|
diff: z.ZodBoolean;
|
|
31
18
|
baseRef: z.ZodOptional<z.ZodString>;
|
|
32
19
|
clean: z.ZodBoolean;
|
|
33
20
|
}, z.core.$strip>;
|
|
34
|
-
/**
|
|
35
|
-
* Validated options for the `ai index add` command.
|
|
36
|
-
*
|
|
37
|
-
* Inferred from {@link CommandOptionsSchema} and used as the single
|
|
38
|
-
* source of truth for option types throughout the add/embeddings pipeline.
|
|
39
|
-
*/
|
|
40
21
|
export type CommandOptions = z.infer<typeof CommandOptionsSchema>;
|
package/dist/types/index.d.ts
CHANGED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import type { z } from 'zod';
|
|
2
|
+
import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
|
|
3
|
+
/**
|
|
4
|
+
* Attribute map type used by {@link IndexSchemaConfig.prepareAttributes}.
|
|
5
|
+
*
|
|
6
|
+
* Combines the schema-declared field types (all optional, since
|
|
7
|
+
* attributes are built up incrementally) with a `Record<string, unknown>`
|
|
8
|
+
* base so non-promoted attributes are still accessible.
|
|
9
|
+
*
|
|
10
|
+
* @template T - Zod object schema from which attribute types are derived.
|
|
11
|
+
*/
|
|
12
|
+
export type SchemaAttributes<T extends z.ZodObject> = Partial<z.input<T>> & Record<string, unknown>;
|
|
13
|
+
/**
|
|
14
|
+
* Configuration for a custom Azure AI Search index schema defined via a Zod
|
|
15
|
+
* object shape.
|
|
16
|
+
*
|
|
17
|
+
* Declares which metadata fields should be promoted to top-level Azure AI
|
|
18
|
+
* Search fields (instead of being stored in the generic `attributes` array)
|
|
19
|
+
* and how their values are resolved from each document.
|
|
20
|
+
*
|
|
21
|
+
* Promoted fields become filterable/facetable at the Azure Search level,
|
|
22
|
+
* eliminating the need for `any()` OData operators.
|
|
23
|
+
*
|
|
24
|
+
* @template T - Zod object schema type that defines the promoted field names and types.
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* ```ts
|
|
28
|
+
* import { z } from 'zod';
|
|
29
|
+
* import { defineIndexSchema } from '@equinor/fusion-framework-cli-plugin-ai-index';
|
|
30
|
+
*
|
|
31
|
+
* const schema = defineIndexSchema({
|
|
32
|
+
* shape: z.object({
|
|
33
|
+
* pkg_name: z.string().optional(),
|
|
34
|
+
* type: z.string(),
|
|
35
|
+
* tags: z.array(z.string()).default([]),
|
|
36
|
+
* source_dir: z.string(),
|
|
37
|
+
* }),
|
|
38
|
+
* prepareAttributes: (attrs, doc) => {
|
|
39
|
+
* // attrs.tags is typed as string[] | undefined ✅
|
|
40
|
+
* attrs.tags ??= [];
|
|
41
|
+
* if (doc.metadata.source.includes('packages/')) {
|
|
42
|
+
* attrs.tags.push('package');
|
|
43
|
+
* }
|
|
44
|
+
* return attrs;
|
|
45
|
+
* },
|
|
46
|
+
* resolve: (doc) => ({
|
|
47
|
+
* pkg_name: doc.metadata.attributes?.pkg_name as string | undefined,
|
|
48
|
+
* type: (doc.metadata.attributes?.type as string) ?? 'unknown',
|
|
49
|
+
* tags: (doc.metadata.attributes?.tags as string[]) ?? [],
|
|
50
|
+
* source_dir: doc.metadata.source.split('/')[0],
|
|
51
|
+
* }),
|
|
52
|
+
* });
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
export interface IndexSchemaConfig<T extends z.ZodObject = z.ZodObject> {
|
|
56
|
+
/**
|
|
57
|
+
* Zod object schema defining the promoted field names and their types.
|
|
58
|
+
*
|
|
59
|
+
* Each key becomes a top-level Azure AI Search field. The Zod type
|
|
60
|
+
* determines the Azure EDM field type:
|
|
61
|
+
* - `z.string()` → `Edm.String` (filterable, facetable)
|
|
62
|
+
* - `z.array(z.string())` → `Collection(Edm.String)` (filterable, facetable)
|
|
63
|
+
* - `z.number()` → `Edm.Double` (filterable, sortable)
|
|
64
|
+
* - `z.boolean()` → `Edm.Boolean` (filterable)
|
|
65
|
+
*/
|
|
66
|
+
shape: T;
|
|
67
|
+
/**
|
|
68
|
+
* Type-safe attribute processor that enriches document attributes before
|
|
69
|
+
* the schema resolver runs.
|
|
70
|
+
*
|
|
71
|
+
* Runs in addition to the untyped `metadata.attributeProcessor` callback
|
|
72
|
+
* when a schema is defined. The `attributes` parameter is typed from the
|
|
73
|
+
* Zod shape so that schema-declared fields (e.g. `tags`, `pkg_name`)
|
|
74
|
+
* have proper types while non-schema attributes remain accessible via
|
|
75
|
+
* the `Record<string, unknown>` base.
|
|
76
|
+
*
|
|
77
|
+
* Runs after git and package metadata enrichment and after
|
|
78
|
+
* `metadata.attributeProcessor`, before
|
|
79
|
+
* {@link IndexSchemaConfig.resolve | resolve}.
|
|
80
|
+
*
|
|
81
|
+
* @param attributes - The accumulated attributes for the document, typed
|
|
82
|
+
* from the schema shape. All schema fields are optional since they may
|
|
83
|
+
* not be populated yet.
|
|
84
|
+
* @param document - The vector-store document being processed.
|
|
85
|
+
* @returns The enriched attributes map.
|
|
86
|
+
*/
|
|
87
|
+
prepareAttributes?: (attributes: SchemaAttributes<T>, document: VectorStoreDocument) => SchemaAttributes<T>;
|
|
88
|
+
/**
|
|
89
|
+
* Per-document resolver that extracts or computes promoted field values.
|
|
90
|
+
*
|
|
91
|
+
* Runs after {@link IndexSchemaConfig.prepareAttributes | prepareAttributes}
|
|
92
|
+
* and metadata enrichment (git, package), so all enriched attributes are
|
|
93
|
+
* available on the document.
|
|
94
|
+
*
|
|
95
|
+
* @param document - The fully enriched vector-store document.
|
|
96
|
+
* @returns An object matching the Zod shape with resolved field values.
|
|
97
|
+
*/
|
|
98
|
+
resolve: (document: VectorStoreDocument) => z.output<T>;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Type-safe factory for creating an {@link IndexSchemaConfig}.
|
|
102
|
+
*
|
|
103
|
+
* Infers `T` from the Zod shape and constrains both the
|
|
104
|
+
* `prepareAttributes` parameter types and the `resolve` return type,
|
|
105
|
+
* providing compile-time safety that attribute processing and resolution
|
|
106
|
+
* match the declared schema.
|
|
107
|
+
*
|
|
108
|
+
* @template T - Zod object schema type, inferred from `config.shape`.
|
|
109
|
+
* @param config - Schema configuration with a Zod shape, optional typed
|
|
110
|
+
* attribute processor, and a resolver function.
|
|
111
|
+
* @returns The same config object, narrowed to the inferred generic type.
|
|
112
|
+
*
|
|
113
|
+
* @example
|
|
114
|
+
* ```ts
|
|
115
|
+
* import { z } from 'zod';
|
|
116
|
+
* import { defineIndexSchema } from '@equinor/fusion-framework-cli-plugin-ai-index';
|
|
117
|
+
*
|
|
118
|
+
* const schema = defineIndexSchema({
|
|
119
|
+
* shape: z.object({
|
|
120
|
+
* tags: z.array(z.string()).default([]),
|
|
121
|
+
* type: z.string(),
|
|
122
|
+
* }),
|
|
123
|
+
* prepareAttributes: (attrs, doc) => {
|
|
124
|
+
* attrs.tags ??= []; // string[] | undefined — type-safe ✅
|
|
125
|
+
* if (doc.metadata.source.includes('cookbooks/')) {
|
|
126
|
+
* attrs.tags.push('cookbook');
|
|
127
|
+
* }
|
|
128
|
+
* return attrs;
|
|
129
|
+
* },
|
|
130
|
+
* resolve: (doc) => ({
|
|
131
|
+
* tags: (doc.metadata.attributes?.tags as string[]) ?? [],
|
|
132
|
+
* type: (doc.metadata.attributes?.type as string) ?? 'raw',
|
|
133
|
+
* }),
|
|
134
|
+
* });
|
|
135
|
+
* ```
|
|
136
|
+
*/
|
|
137
|
+
export declare function defineIndexSchema<T extends z.ZodObject>(config: IndexSchemaConfig<T>): IndexSchemaConfig<T>;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolve the embedding vector dimensions for a given model name.
|
|
3
|
+
*
|
|
4
|
+
* Checks the known model→dimensions map first. Falls back to an explicit
|
|
5
|
+
* `dimensions` override from the config. Throws if neither is available.
|
|
6
|
+
*
|
|
7
|
+
* @param model - The embedding model name (e.g. `'text-embedding-3-large'`).
|
|
8
|
+
* @param configDimensions - Optional explicit dimensions from config, used
|
|
9
|
+
* when the model is not in the known map.
|
|
10
|
+
* @returns The number of dimensions for the embedding vector.
|
|
11
|
+
* @throws {Error} When the model is unknown and no explicit dimensions are configured.
|
|
12
|
+
*/
|
|
13
|
+
export declare function resolveEmbeddingDimensions(model: string, configDimensions?: number): number;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { type z } from 'zod';
|
|
2
|
+
/**
|
|
3
|
+
* Azure AI Search EDM (Entity Data Model) type identifiers used in
|
|
4
|
+
* index field definitions.
|
|
5
|
+
*/
|
|
6
|
+
type AzureEdmType = 'Edm.String' | 'Edm.Int32' | 'Edm.Int64' | 'Edm.Double' | 'Edm.Boolean' | 'Collection(Edm.String)';
|
|
7
|
+
/**
|
|
8
|
+
* Azure AI Search field definition matching the REST API schema for
|
|
9
|
+
* index creation.
|
|
10
|
+
*
|
|
11
|
+
* @see https://learn.microsoft.com/en-us/rest/api/searchservice/indexes/create
|
|
12
|
+
*/
|
|
13
|
+
export interface AzureSearchField {
|
|
14
|
+
/** Field name as it appears in the index schema. */
|
|
15
|
+
name: string;
|
|
16
|
+
/** Azure EDM type for the field. */
|
|
17
|
+
type: AzureEdmType;
|
|
18
|
+
/** Whether the field can be used in `$filter` expressions. */
|
|
19
|
+
filterable: boolean;
|
|
20
|
+
/** Whether the field can be used in `$orderby` expressions. */
|
|
21
|
+
sortable: boolean;
|
|
22
|
+
/** Whether the field supports faceted navigation. */
|
|
23
|
+
facetable: boolean;
|
|
24
|
+
/** Whether the field is included in full-text search. */
|
|
25
|
+
searchable: boolean;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Convert a Zod object schema into an array of Azure AI Search field
|
|
29
|
+
* definitions.
|
|
30
|
+
*
|
|
31
|
+
* Walks the Zod shape, maps each field to its Azure EDM type, and assigns
|
|
32
|
+
* default capabilities (filterable, facetable, sortable). Used by the
|
|
33
|
+
* `ffc ai index create` command to generate the index schema.
|
|
34
|
+
*
|
|
35
|
+
* Uses public `instanceof` checks and `unwrap()` methods to avoid
|
|
36
|
+
* reliance on Zod's private `_zod.def` internals, ensuring compatibility
|
|
37
|
+
* across Zod versions.
|
|
38
|
+
*
|
|
39
|
+
* @param schema - A Zod object schema whose keys define the promoted fields.
|
|
40
|
+
* @returns An array of Azure AI Search field definitions.
|
|
41
|
+
* @throws {Error} When a field type cannot be mapped to an Azure EDM type.
|
|
42
|
+
*
|
|
43
|
+
* @example
|
|
44
|
+
* ```ts
|
|
45
|
+
* import { z } from 'zod';
|
|
46
|
+
* import { zodToAzureFields } from './zod-to-azure-fields.js';
|
|
47
|
+
*
|
|
48
|
+
* const fields = zodToAzureFields(
|
|
49
|
+
* z.object({
|
|
50
|
+
* pkg_name: z.string().optional(),
|
|
51
|
+
* tags: z.array(z.string()).default([]),
|
|
52
|
+
* }),
|
|
53
|
+
* );
|
|
54
|
+
* // [
|
|
55
|
+
* // { name: 'pkg_name', type: 'Edm.String', filterable: true, facetable: true, ... },
|
|
56
|
+
* // { name: 'tags', type: 'Collection(Edm.String)', filterable: true, facetable: true, ... },
|
|
57
|
+
* // ]
|
|
58
|
+
* ```
|
|
59
|
+
*/
|
|
60
|
+
export declare function zodToAzureFields(schema: z.ZodObject): AzureSearchField[];
|
|
61
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/types/version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const version = "2.
|
|
1
|
+
export declare const version = "2.1.0";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@equinor/fusion-framework-cli-plugin-ai-index",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"description": "AI indexing plugin for Fusion Framework CLI providing document embedding and chunking utilities",
|
|
5
5
|
"main": "dist/esm/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -51,20 +51,20 @@
|
|
|
51
51
|
"simple-git": "^3.32.3",
|
|
52
52
|
"tree-sitter": "^0.25.0",
|
|
53
53
|
"tree-sitter-typescript": "^0.23.2",
|
|
54
|
-
"ts-morph": "^
|
|
54
|
+
"ts-morph": "^28.0.0",
|
|
55
55
|
"zod": "^4.3.6",
|
|
56
|
-
"@equinor/fusion-framework-cli-plugin-ai-base": "2.0.0",
|
|
57
56
|
"@equinor/fusion-framework-module": "6.0.0",
|
|
58
|
-
"@equinor/fusion-
|
|
59
|
-
"@equinor/fusion-
|
|
57
|
+
"@equinor/fusion-imports": "2.0.0",
|
|
58
|
+
"@equinor/fusion-framework-cli-plugin-ai-base": "3.0.0",
|
|
59
|
+
"@equinor/fusion-framework-module-ai": "4.0.0"
|
|
60
60
|
},
|
|
61
61
|
"peerDependencies": {
|
|
62
|
-
"@equinor/fusion-framework-cli": "^14.
|
|
62
|
+
"@equinor/fusion-framework-cli": "^14.2.7"
|
|
63
63
|
},
|
|
64
64
|
"devDependencies": {
|
|
65
65
|
"typescript": "^5.9.3",
|
|
66
66
|
"vitest": "^4.1.0",
|
|
67
|
-
"@equinor/fusion-framework-cli": "^14.
|
|
67
|
+
"@equinor/fusion-framework-cli": "^14.2.7"
|
|
68
68
|
},
|
|
69
69
|
"scripts": {
|
|
70
70
|
"build": "tsc -b",
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
|
-
import { from, mergeMap, map, toArray } from 'rxjs';
|
|
2
|
+
import { from, mergeMap, map, tap, toArray } from 'rxjs';
|
|
3
3
|
import type { Observable } from 'rxjs';
|
|
4
4
|
import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
|
|
5
5
|
import { extractGitMetadata } from '../utils/git/index.js';
|
|
@@ -7,6 +7,9 @@ import { resolvePackage } from '../utils/package-resolver.js';
|
|
|
7
7
|
import type { DocumentEntry } from './types.js';
|
|
8
8
|
import type { FusionAIConfigWithIndex } from '../config.js';
|
|
9
9
|
|
|
10
|
+
/** Callback invoked after each document is enriched with metadata. */
|
|
11
|
+
export type MetadataProgressCallback = (source: string) => void;
|
|
12
|
+
|
|
10
13
|
/**
|
|
11
14
|
* Creates a stream that applies metadata to documents.
|
|
12
15
|
* @internal
|
|
@@ -14,14 +17,25 @@ import type { FusionAIConfigWithIndex } from '../config.js';
|
|
|
14
17
|
export function applyMetadata(
|
|
15
18
|
document$: Observable<DocumentEntry>,
|
|
16
19
|
indexConfig: FusionAIConfigWithIndex['index'],
|
|
20
|
+
onProgress?: MetadataProgressCallback,
|
|
17
21
|
): Observable<VectorStoreDocument[]> {
|
|
18
22
|
// Resolve packages if enabled
|
|
19
23
|
const shouldResolvePackage = indexConfig?.metadata?.resolvePackage ?? false;
|
|
20
24
|
|
|
25
|
+
/** Cap concurrent git subprocess calls to avoid overwhelming the OS process table. */
|
|
26
|
+
const GIT_CONCURRENCY = 20;
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Cap the number of file entries processed in parallel.
|
|
30
|
+
* Each entry fans out to GIT_CONCURRENCY inner git calls, so
|
|
31
|
+
* total concurrent git processes ≤ ENTRY_CONCURRENCY × GIT_CONCURRENCY.
|
|
32
|
+
*/
|
|
33
|
+
const ENTRY_CONCURRENCY = 20;
|
|
34
|
+
|
|
21
35
|
return document$.pipe(
|
|
22
36
|
mergeMap((entry) => {
|
|
23
37
|
return from(entry.documents).pipe(
|
|
24
|
-
// Extract git metadata concurrently
|
|
38
|
+
// Extract git metadata concurrently (capped to limit parallel git processes)
|
|
25
39
|
mergeMap(async (document): Promise<VectorStoreDocument> => {
|
|
26
40
|
const rootPath = document.metadata.rootPath ?? process.cwd();
|
|
27
41
|
const sourcePath = path.join(rootPath, document.metadata.source);
|
|
@@ -54,7 +68,9 @@ export function applyMetadata(
|
|
|
54
68
|
},
|
|
55
69
|
},
|
|
56
70
|
};
|
|
57
|
-
}),
|
|
71
|
+
}, GIT_CONCURRENCY),
|
|
72
|
+
// Notify caller after each document is enriched
|
|
73
|
+
tap((document) => onProgress?.(document.metadata.source)),
|
|
58
74
|
// Apply custom attribute processor from config
|
|
59
75
|
map((document: VectorStoreDocument) => {
|
|
60
76
|
const attributeProcessor =
|
|
@@ -72,6 +88,6 @@ export function applyMetadata(
|
|
|
72
88
|
// Group back by file for batch deletion in next step
|
|
73
89
|
toArray(),
|
|
74
90
|
);
|
|
75
|
-
}),
|
|
91
|
+
}, ENTRY_CONCURRENCY),
|
|
76
92
|
);
|
|
77
93
|
}
|