@equinor/fusion-framework-cli-plugin-ai-index 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +64 -0
- package/dist/esm/bin/apply-metadata.js +15 -5
- package/dist/esm/bin/apply-metadata.js.map +1 -1
- package/dist/esm/bin/apply-schema.js +64 -0
- package/dist/esm/bin/apply-schema.js.map +1 -0
- package/dist/esm/bin/apply-schema.test.js +143 -0
- package/dist/esm/bin/apply-schema.test.js.map +1 -0
- package/dist/esm/bin/delete-removed-files.js +1 -1
- package/dist/esm/bin/delete-removed-files.js.map +1 -1
- package/dist/esm/bin/embed.js +188 -47
- package/dist/esm/bin/embed.js.map +1 -1
- package/dist/esm/create-command.js +186 -0
- package/dist/esm/create-command.js.map +1 -0
- package/dist/esm/delete-command.js +14 -2
- package/dist/esm/delete-command.js.map +1 -1
- package/dist/esm/delete-command.options.js +7 -31
- package/dist/esm/delete-command.options.js.map +1 -1
- package/dist/esm/delete-index-command.js +94 -0
- package/dist/esm/delete-index-command.js.map +1 -0
- package/dist/esm/embed-command.js +30 -0
- package/dist/esm/embed-command.js.map +1 -0
- package/dist/esm/embeddings-command.js +14 -17
- package/dist/esm/embeddings-command.js.map +1 -1
- package/dist/esm/embeddings-command.options.js +12 -43
- package/dist/esm/embeddings-command.options.js.map +1 -1
- package/dist/esm/index.js +12 -3
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/schema.js +41 -0
- package/dist/esm/schema.js.map +1 -0
- package/dist/esm/search-command.js +17 -5
- package/dist/esm/search-command.js.map +1 -1
- package/dist/esm/utils/embedding-dimensions.js +37 -0
- package/dist/esm/utils/embedding-dimensions.js.map +1 -0
- package/dist/esm/utils/zod-to-azure-fields.js +120 -0
- package/dist/esm/utils/zod-to-azure-fields.js.map +1 -0
- package/dist/esm/utils/zod-to-azure-fields.test.js +112 -0
- package/dist/esm/utils/zod-to-azure-fields.test.js.map +1 -0
- package/dist/esm/version.js +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types/bin/apply-metadata.d.ts +2 -1
- package/dist/types/bin/apply-schema.d.ts +22 -0
- package/dist/types/bin/apply-schema.test.d.ts +1 -0
- package/dist/types/config.d.ts +14 -0
- package/dist/types/create-command.d.ts +6 -0
- package/dist/types/delete-command.options.d.ts +9 -23
- package/dist/types/delete-index-command.d.ts +6 -0
- package/dist/types/embed-command.d.ts +12 -0
- package/dist/types/embeddings-command.options.d.ts +9 -28
- package/dist/types/index.d.ts +1 -0
- package/dist/types/schema.d.ts +137 -0
- package/dist/types/utils/embedding-dimensions.d.ts +13 -0
- package/dist/types/utils/zod-to-azure-fields.d.ts +61 -0
- package/dist/types/utils/zod-to-azure-fields.test.d.ts +1 -0
- package/dist/types/version.d.ts +1 -1
- package/package.json +7 -7
- package/src/bin/apply-metadata.ts +20 -4
- package/src/bin/apply-schema.test.ts +170 -0
- package/src/bin/apply-schema.ts +86 -0
- package/src/bin/delete-removed-files.ts +1 -1
- package/src/bin/embed.ts +248 -76
- package/src/config.ts +15 -0
- package/src/create-command.ts +218 -0
- package/src/delete-command.options.ts +7 -37
- package/src/delete-command.ts +19 -2
- package/src/delete-index-command.ts +121 -0
- package/src/embed-command.ts +44 -0
- package/src/embeddings-command.options.ts +12 -50
- package/src/embeddings-command.ts +18 -18
- package/src/index.ts +12 -3
- package/src/schema.ts +149 -0
- package/src/search-command.ts +22 -5
- package/src/utils/embedding-dimensions.ts +39 -0
- package/src/utils/zod-to-azure-fields.test.ts +136 -0
- package/src/utils/zod-to-azure-fields.ts +177 -0
- package/src/version.ts +1 -1
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import { of, lastValueFrom } from 'rxjs';
|
|
4
|
+
import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
|
|
5
|
+
|
|
6
|
+
import { defineIndexSchema } from '../schema.js';
|
|
7
|
+
import { applySchema } from './apply-schema.js';
|
|
8
|
+
|
|
9
|
+
/** Helper to create a minimal VectorStoreDocument for testing. */
|
|
10
|
+
function makeDocument(
|
|
11
|
+
overrides: Partial<VectorStoreDocument> & { metadata: VectorStoreDocument['metadata'] },
|
|
12
|
+
): VectorStoreDocument {
|
|
13
|
+
return {
|
|
14
|
+
id: 'test-id',
|
|
15
|
+
pageContent: 'test content',
|
|
16
|
+
...overrides,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
describe('defineIndexSchema', () => {
|
|
21
|
+
it('returns the same config object (type-narrowing only)', () => {
|
|
22
|
+
const shape = z.object({ type: z.string() });
|
|
23
|
+
const resolve = () => ({ type: 'tsdoc' });
|
|
24
|
+
|
|
25
|
+
const schema = defineIndexSchema({ shape, resolve });
|
|
26
|
+
|
|
27
|
+
expect(schema.shape).toBe(shape);
|
|
28
|
+
expect(schema.resolve).toBe(resolve);
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
describe('applySchema', () => {
|
|
33
|
+
const schema = defineIndexSchema({
|
|
34
|
+
shape: z.object({
|
|
35
|
+
pkg_name: z.string().optional(),
|
|
36
|
+
type: z.string(),
|
|
37
|
+
tags: z.array(z.string()).default([]),
|
|
38
|
+
source_dir: z.string(),
|
|
39
|
+
}),
|
|
40
|
+
resolve: (doc) => ({
|
|
41
|
+
pkg_name: doc.metadata.attributes?.pkg_name as string | undefined,
|
|
42
|
+
type: (doc.metadata.attributes?.type as string) ?? 'unknown',
|
|
43
|
+
tags: (doc.metadata.attributes?.tags as string[]) ?? [],
|
|
44
|
+
source_dir: doc.metadata.source.split('/')[0],
|
|
45
|
+
}),
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it('passes through unchanged when schema is undefined', async () => {
|
|
49
|
+
const doc = makeDocument({
|
|
50
|
+
metadata: { source: 'packages/foo/src/index.ts', attributes: { type: 'tsdoc' } },
|
|
51
|
+
});
|
|
52
|
+
const docs$ = of([doc]);
|
|
53
|
+
|
|
54
|
+
const result = await lastValueFrom(applySchema(docs$, undefined));
|
|
55
|
+
|
|
56
|
+
expect(result).toEqual([doc]);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
it('resolves promoted fields and stores them on metadata.schemaFields', async () => {
|
|
60
|
+
const doc = makeDocument({
|
|
61
|
+
metadata: {
|
|
62
|
+
source: 'packages/foo/src/index.ts',
|
|
63
|
+
attributes: {
|
|
64
|
+
type: 'tsdoc',
|
|
65
|
+
pkg_name: '@equinor/fusion-framework',
|
|
66
|
+
tags: ['package', 'react'],
|
|
67
|
+
other_attr: 'keep-me',
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
const docs$ = of([doc]);
|
|
72
|
+
|
|
73
|
+
const result = await lastValueFrom(applySchema(docs$, schema));
|
|
74
|
+
|
|
75
|
+
expect(result[0].metadata.schemaFields).toEqual({
|
|
76
|
+
pkg_name: '@equinor/fusion-framework',
|
|
77
|
+
type: 'tsdoc',
|
|
78
|
+
tags: ['package', 'react'],
|
|
79
|
+
source_dir: 'packages',
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('removes promoted keys from attributes to avoid duplication', async () => {
|
|
84
|
+
const doc = makeDocument({
|
|
85
|
+
metadata: {
|
|
86
|
+
source: 'packages/foo/src/index.ts',
|
|
87
|
+
attributes: {
|
|
88
|
+
type: 'tsdoc',
|
|
89
|
+
pkg_name: '@equinor/fusion-framework',
|
|
90
|
+
tags: ['package'],
|
|
91
|
+
git_commit_hash: 'abc123',
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
});
|
|
95
|
+
const docs$ = of([doc]);
|
|
96
|
+
|
|
97
|
+
const result = await lastValueFrom(applySchema(docs$, schema));
|
|
98
|
+
|
|
99
|
+
// Promoted keys removed, non-promoted keys preserved
|
|
100
|
+
expect(result[0].metadata.attributes).toEqual({ git_commit_hash: 'abc123' });
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it('handles documents with no attributes gracefully', async () => {
|
|
104
|
+
const doc = makeDocument({
|
|
105
|
+
metadata: { source: 'cookbooks/app-react/src/App.tsx' },
|
|
106
|
+
});
|
|
107
|
+
const docs$ = of([doc]);
|
|
108
|
+
|
|
109
|
+
const result = await lastValueFrom(applySchema(docs$, schema));
|
|
110
|
+
|
|
111
|
+
expect(result[0].metadata.schemaFields).toEqual({
|
|
112
|
+
pkg_name: undefined,
|
|
113
|
+
type: 'unknown',
|
|
114
|
+
tags: [],
|
|
115
|
+
source_dir: 'cookbooks',
|
|
116
|
+
});
|
|
117
|
+
expect(result[0].metadata.attributes).toEqual({});
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it('throws when resolved values fail Zod validation', async () => {
|
|
121
|
+
const badSchema = defineIndexSchema({
|
|
122
|
+
shape: z.object({ type: z.string().min(1) }),
|
|
123
|
+
resolve: () => ({ type: '' }), // Empty string fails min(1)
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
const doc = makeDocument({
|
|
127
|
+
metadata: { source: 'test.ts', attributes: {} },
|
|
128
|
+
});
|
|
129
|
+
const docs$ = of([doc]);
|
|
130
|
+
|
|
131
|
+
await expect(lastValueFrom(applySchema(docs$, badSchema))).rejects.toThrow();
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it('runs prepareAttributes before resolve to enrich attributes', async () => {
|
|
135
|
+
const schemaWithPrepare = defineIndexSchema({
|
|
136
|
+
shape: z.object({
|
|
137
|
+
tags: z.array(z.string()).default([]),
|
|
138
|
+
type: z.string(),
|
|
139
|
+
}),
|
|
140
|
+
prepareAttributes: (attrs, doc) => {
|
|
141
|
+
// Type-safe: attrs.tags is string[] | undefined
|
|
142
|
+
attrs.tags ??= [];
|
|
143
|
+
if (doc.metadata.source.includes('packages/')) {
|
|
144
|
+
attrs.tags.push('package');
|
|
145
|
+
}
|
|
146
|
+
return attrs;
|
|
147
|
+
},
|
|
148
|
+
resolve: (doc) => ({
|
|
149
|
+
tags: (doc.metadata.attributes?.tags as string[]) ?? [],
|
|
150
|
+
type: (doc.metadata.attributes?.type as string) ?? 'unknown',
|
|
151
|
+
}),
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
const doc = makeDocument({
|
|
155
|
+
metadata: {
|
|
156
|
+
source: 'packages/framework/src/index.ts',
|
|
157
|
+
attributes: { type: 'tsdoc' },
|
|
158
|
+
},
|
|
159
|
+
});
|
|
160
|
+
const docs$ = of([doc]);
|
|
161
|
+
|
|
162
|
+
const result = await lastValueFrom(applySchema(docs$, schemaWithPrepare));
|
|
163
|
+
|
|
164
|
+
// prepareAttributes added 'package' tag before resolve consumed it
|
|
165
|
+
expect(result[0].metadata.schemaFields).toEqual({
|
|
166
|
+
tags: ['package'],
|
|
167
|
+
type: 'tsdoc',
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
});
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { map } from 'rxjs';
|
|
2
|
+
import type { Observable } from 'rxjs';
|
|
3
|
+
import type { VectorStoreDocument } from '@equinor/fusion-framework-module-ai/lib';
|
|
4
|
+
import type { IndexSchemaConfig } from '../schema.js';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Creates an RxJS operator that resolves promoted schema fields for each
|
|
8
|
+
* document and separates them from the generic `attributes` bag.
|
|
9
|
+
*
|
|
10
|
+
* For each document in the batch:
|
|
11
|
+
* 1. Runs the optional `prepareAttributes` callback to enrich attributes
|
|
12
|
+
* with type-safe access to schema-declared fields
|
|
13
|
+
* 2. Calls the schema resolver to compute promoted field values
|
|
14
|
+
* 3. Validates the resolved values against the Zod shape
|
|
15
|
+
* 4. Stores promoted fields on `metadata.schemaFields`
|
|
16
|
+
* 5. Removes promoted keys from `metadata.attributes` to avoid duplication
|
|
17
|
+
*
|
|
18
|
+
* When no schema is configured, the stream passes through unchanged.
|
|
19
|
+
*
|
|
20
|
+
* @param document$ - Stream of document batches from the metadata enrichment step.
|
|
21
|
+
* @param schema - The index schema config, if defined. When `undefined`, documents pass through unchanged.
|
|
22
|
+
* @returns Stream of document batches with promoted fields resolved and stored.
|
|
23
|
+
*/
|
|
24
|
+
export function applySchema(
|
|
25
|
+
document$: Observable<VectorStoreDocument[]>,
|
|
26
|
+
schema: IndexSchemaConfig | undefined,
|
|
27
|
+
): Observable<VectorStoreDocument[]> {
|
|
28
|
+
// No schema configured — pass through unchanged (backward compatible)
|
|
29
|
+
if (!schema) {
|
|
30
|
+
return document$;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const promotedKeys = new Set(Object.keys(schema.shape.shape as Record<string, unknown>));
|
|
34
|
+
|
|
35
|
+
return document$.pipe(
|
|
36
|
+
map((documents) =>
|
|
37
|
+
documents.map((document) => {
|
|
38
|
+
// Run typed attribute processor before schema resolution so the
|
|
39
|
+
// resolver receives fully enriched attributes
|
|
40
|
+
let enrichedDocument = document;
|
|
41
|
+
if (schema.prepareAttributes) {
|
|
42
|
+
const enrichedAttributes = schema.prepareAttributes(
|
|
43
|
+
(document.metadata.attributes ?? {}) as Record<string, unknown>,
|
|
44
|
+
document,
|
|
45
|
+
);
|
|
46
|
+
enrichedDocument = {
|
|
47
|
+
...document,
|
|
48
|
+
metadata: {
|
|
49
|
+
...document.metadata,
|
|
50
|
+
attributes: enrichedAttributes as Record<string, unknown>,
|
|
51
|
+
},
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Resolve promoted field values from the fully enriched document
|
|
56
|
+
const resolved = schema.resolve(enrichedDocument);
|
|
57
|
+
|
|
58
|
+
// Validate against the Zod shape — throws on invalid data with
|
|
59
|
+
// a clear error message pointing to the offending field
|
|
60
|
+
const validated = schema.shape.parse(resolved) as Record<string, unknown>;
|
|
61
|
+
|
|
62
|
+
// Remove promoted keys from attributes to avoid storing them
|
|
63
|
+
// in both top-level fields and the generic attributes array
|
|
64
|
+
const currentAttributes = (enrichedDocument.metadata.attributes ?? {}) as Record<
|
|
65
|
+
string,
|
|
66
|
+
unknown
|
|
67
|
+
>;
|
|
68
|
+
const remainingAttributes: Record<string, unknown> = {};
|
|
69
|
+
for (const [key, value] of Object.entries(currentAttributes)) {
|
|
70
|
+
if (!promotedKeys.has(key)) {
|
|
71
|
+
remainingAttributes[key] = value;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
...enrichedDocument,
|
|
77
|
+
metadata: {
|
|
78
|
+
...enrichedDocument.metadata,
|
|
79
|
+
attributes: remainingAttributes,
|
|
80
|
+
schemaFields: validated,
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
}),
|
|
84
|
+
),
|
|
85
|
+
);
|
|
86
|
+
}
|
|
@@ -33,7 +33,7 @@ export function createDeleteRemovedFilesStream(
|
|
|
33
33
|
console.log('Removing entry from vector store', file.relativePath);
|
|
34
34
|
}
|
|
35
35
|
if (!options.dryRun) {
|
|
36
|
-
const vectorStoreService = framework.ai.
|
|
36
|
+
const vectorStoreService = framework.ai.useIndex(options.indexName);
|
|
37
37
|
// Single batch deletion - one file can produce multiple document chunks
|
|
38
38
|
await vectorStoreService.deleteDocuments({
|
|
39
39
|
filter: { filterExpression: filterExpression ?? undefined },
|