@soleri/core 9.14.4 → 9.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brain/brain.d.ts +9 -0
- package/dist/brain/brain.d.ts.map +1 -1
- package/dist/brain/brain.js +11 -1
- package/dist/brain/brain.js.map +1 -1
- package/dist/brain/intelligence.d.ts.map +1 -1
- package/dist/brain/intelligence.js +24 -0
- package/dist/brain/intelligence.js.map +1 -1
- package/dist/brain/types.d.ts +1 -0
- package/dist/brain/types.d.ts.map +1 -1
- package/dist/chat/chat-session.d.ts +6 -0
- package/dist/chat/chat-session.d.ts.map +1 -1
- package/dist/chat/chat-session.js +68 -17
- package/dist/chat/chat-session.js.map +1 -1
- package/dist/curator/curator.d.ts +6 -0
- package/dist/curator/curator.d.ts.map +1 -1
- package/dist/curator/curator.js +138 -0
- package/dist/curator/curator.js.map +1 -1
- package/dist/curator/types.d.ts +10 -0
- package/dist/curator/types.d.ts.map +1 -1
- package/dist/engine/bin/soleri-engine.js +0 -0
- package/dist/flows/types.d.ts +16 -16
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/intake/content-classifier.d.ts +10 -4
- package/dist/intake/content-classifier.d.ts.map +1 -1
- package/dist/intake/content-classifier.js +19 -5
- package/dist/intake/content-classifier.js.map +1 -1
- package/dist/intake/text-ingester.d.ts +18 -0
- package/dist/intake/text-ingester.d.ts.map +1 -1
- package/dist/intake/text-ingester.js +37 -13
- package/dist/intake/text-ingester.js.map +1 -1
- package/dist/planning/planner.d.ts +3 -0
- package/dist/planning/planner.d.ts.map +1 -1
- package/dist/planning/planner.js +43 -4
- package/dist/planning/planner.js.map +1 -1
- package/dist/plugins/types.d.ts +2 -2
- package/dist/runtime/admin-setup-ops.d.ts.map +1 -1
- package/dist/runtime/admin-setup-ops.js +59 -20
- package/dist/runtime/admin-setup-ops.js.map +1 -1
- package/dist/runtime/facades/orchestrate-facade.d.ts.map +1 -1
- package/dist/runtime/facades/orchestrate-facade.js +28 -1
- package/dist/runtime/facades/orchestrate-facade.js.map +1 -1
- package/dist/runtime/runtime.d.ts.map +1 -1
- package/dist/runtime/runtime.js +16 -0
- package/dist/runtime/runtime.js.map +1 -1
- package/dist/runtime/types.d.ts +19 -0
- package/dist/runtime/types.d.ts.map +1 -1
- package/dist/skills/validate-skills.d.ts +32 -0
- package/dist/skills/validate-skills.d.ts.map +1 -0
- package/dist/skills/validate-skills.js +396 -0
- package/dist/skills/validate-skills.js.map +1 -0
- package/dist/vault/default-canonical-tags.d.ts +15 -0
- package/dist/vault/default-canonical-tags.d.ts.map +1 -0
- package/dist/vault/default-canonical-tags.js +65 -0
- package/dist/vault/default-canonical-tags.js.map +1 -0
- package/dist/vault/tag-normalizer.d.ts +42 -0
- package/dist/vault/tag-normalizer.d.ts.map +1 -0
- package/dist/vault/tag-normalizer.js +157 -0
- package/dist/vault/tag-normalizer.js.map +1 -0
- package/package.json +5 -1
- package/src/__tests__/embeddings.test.ts +3 -3
- package/src/brain/brain.ts +25 -1
- package/src/brain/intelligence.ts +25 -0
- package/src/brain/types.ts +1 -0
- package/src/chat/chat-session.ts +75 -17
- package/src/chat/chat-transport.test.ts +31 -1
- package/src/curator/curator.ts +180 -0
- package/src/curator/types.ts +10 -0
- package/src/index.ts +7 -0
- package/src/intake/content-classifier.ts +22 -4
- package/src/intake/text-ingester.ts +61 -12
- package/src/planning/planner.test.ts +86 -90
- package/src/planning/planner.ts +48 -4
- package/src/runtime/admin-setup-ops.test.ts +44 -0
- package/src/runtime/admin-setup-ops.ts +59 -20
- package/src/runtime/facades/orchestrate-facade.ts +27 -1
- package/src/runtime/runtime.ts +18 -0
- package/src/runtime/types.ts +19 -0
- package/src/skills/validate-skills.test.ts +205 -0
- package/src/skills/validate-skills.ts +470 -0
- package/src/vault/default-canonical-tags.ts +64 -0
- package/src/vault/tag-normalizer.test.ts +214 -0
- package/src/vault/tag-normalizer.ts +188 -0
- package/dist/embeddings/index.d.ts +0 -5
- package/dist/embeddings/index.d.ts.map +0 -1
- package/dist/embeddings/index.js +0 -3
- package/dist/embeddings/index.js.map +0 -1
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
computeEditDistance,
|
|
4
|
+
normalizeTag,
|
|
5
|
+
normalizeTags,
|
|
6
|
+
isMetadataTag,
|
|
7
|
+
} from './tag-normalizer.js';
|
|
8
|
+
|
|
9
|
+
// ─── computeEditDistance ────────────────────────────────────────────────────
|
|
10
|
+
|
|
11
|
+
describe('computeEditDistance', () => {
|
|
12
|
+
it('returns 0 for identical strings', () => {
|
|
13
|
+
expect(computeEditDistance('workflow', 'workflow')).toBe(0);
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it('returns length of b for empty a', () => {
|
|
17
|
+
expect(computeEditDistance('', 'abc')).toBe(3);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it('returns length of a for empty b', () => {
|
|
21
|
+
expect(computeEditDistance('abc', '')).toBe(3);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('single insertion: workflow → workflows', () => {
|
|
25
|
+
expect(computeEditDistance('workflow', 'workflows')).toBe(1);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it('single deletion: testing → testin', () => {
|
|
29
|
+
expect(computeEditDistance('testing', 'testin')).toBe(1);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it('single substitution: arch → arcs', () => {
|
|
33
|
+
expect(computeEditDistance('arch', 'arcs')).toBe(1);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("distance 1: architcture → architecture (single insertion - missing 'e')", () => {
|
|
37
|
+
expect(computeEditDistance('architcture', 'architecture')).toBe(1);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it('distance 2: archtecrure → architecture (two edits)', () => {
|
|
41
|
+
expect(computeEditDistance('archtecrure', 'architecture')).toBe(2);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('large distance: typescript → javascript', () => {
|
|
45
|
+
expect(computeEditDistance('typescript', 'javascript')).toBeGreaterThan(3);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it('is symmetric', () => {
|
|
49
|
+
expect(computeEditDistance('foo', 'bar')).toBe(computeEditDistance('bar', 'foo'));
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
// ─── isMetadataTag ──────────────────────────────────────────────────────────
|
|
54
|
+
|
|
55
|
+
describe('isMetadataTag', () => {
|
|
56
|
+
it('returns true when tag matches a prefix', () => {
|
|
57
|
+
expect(isMetadataTag('source:article', ['source:'])).toBe(true);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('returns true for exact prefix match', () => {
|
|
61
|
+
expect(isMetadataTag('source:ingested', ['source:'])).toBe(true);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it('returns false when no prefix matches', () => {
|
|
65
|
+
expect(isMetadataTag('typescript', ['source:'])).toBe(false);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it('returns false with empty prefix list', () => {
|
|
69
|
+
expect(isMetadataTag('source:article', [])).toBe(false);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it('supports multiple prefixes', () => {
|
|
73
|
+
expect(isMetadataTag('meta:foo', ['source:', 'meta:'])).toBe(true);
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// ─── normalizeTag ───────────────────────────────────────────────────────────
|
|
78
|
+
|
|
79
|
+
const CANONICAL = ['architecture', 'typescript', 'workflow', 'testing', 'performance'];
|
|
80
|
+
|
|
81
|
+
describe('normalizeTag — mode: off', () => {
|
|
82
|
+
it('returns tag as-is regardless of canonical list', () => {
|
|
83
|
+
expect(normalizeTag('workflows', CANONICAL, 'off')).toBe('workflows');
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it('passes through noise words in off mode', () => {
|
|
87
|
+
expect(normalizeTag('new', CANONICAL, 'off')).toBe('new');
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
describe('normalizeTag — noise stripping', () => {
|
|
92
|
+
it('drops version strings (v1.2)', () => {
|
|
93
|
+
expect(normalizeTag('v1.2', CANONICAL, 'suggest')).toBeNull();
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it('drops version strings (v10)', () => {
|
|
97
|
+
expect(normalizeTag('v10', CANONICAL, 'suggest')).toBeNull();
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it('drops generic noise word: new', () => {
|
|
101
|
+
expect(normalizeTag('new', CANONICAL, 'suggest')).toBeNull();
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it('drops generic noise word: via', () => {
|
|
105
|
+
expect(normalizeTag('via', CANONICAL, 'suggest')).toBeNull();
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('drops generic noise word: raw', () => {
|
|
109
|
+
expect(normalizeTag('raw', CANONICAL, 'enforce')).toBeNull();
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
describe('normalizeTag — mode: suggest', () => {
|
|
114
|
+
it('returns canonical for exact match', () => {
|
|
115
|
+
expect(normalizeTag('typescript', CANONICAL, 'suggest')).toBe('typescript');
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it('maps within edit-distance 2 to canonical', () => {
|
|
119
|
+
// 'workflows' is distance 1 from 'workflow'
|
|
120
|
+
expect(normalizeTag('workflows', CANONICAL, 'suggest')).toBe('workflow');
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it('lowercases tag before matching', () => {
|
|
124
|
+
expect(normalizeTag('TypeScript', CANONICAL, 'suggest')).toBe('typescript');
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it('passes through unknown tag with no close canonical (suggest passthrough)', () => {
|
|
128
|
+
const result = normalizeTag('gamification', CANONICAL, 'suggest');
|
|
129
|
+
// No match within distance 2 — passthrough
|
|
130
|
+
expect(result).toBe('gamification');
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it('returns null for noise even in suggest mode', () => {
|
|
134
|
+
expect(normalizeTag('one', CANONICAL, 'suggest')).toBeNull();
|
|
135
|
+
});
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
describe('normalizeTag — mode: enforce', () => {
|
|
139
|
+
it('returns canonical for exact match', () => {
|
|
140
|
+
expect(normalizeTag('testing', CANONICAL, 'enforce')).toBe('testing');
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('maps within edit-distance 3 to canonical', () => {
|
|
144
|
+
// 'archtecrure' is 2 away from 'architecture'
|
|
145
|
+
expect(normalizeTag('archtecrure', CANONICAL, 'enforce')).toBe('architecture');
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
it('returns null for tag with no match within distance 3', () => {
|
|
149
|
+
// 'gamification' is far from all CANONICAL entries
|
|
150
|
+
expect(normalizeTag('gamification', CANONICAL, 'enforce')).toBeNull();
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it('returns null for noise words', () => {
|
|
154
|
+
expect(normalizeTag('full', CANONICAL, 'enforce')).toBeNull();
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
describe('normalizeTag — empty canonical list', () => {
|
|
159
|
+
it('suggest mode: passes through non-noise tags', () => {
|
|
160
|
+
expect(normalizeTag('react', [], 'suggest')).toBe('react');
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
it('enforce mode: drops all tags (no canonical to match)', () => {
|
|
164
|
+
expect(normalizeTag('react', [], 'enforce')).toBeNull();
|
|
165
|
+
});
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
// ─── normalizeTags ──────────────────────────────────────────────────────────
|
|
169
|
+
|
|
170
|
+
describe('normalizeTags', () => {
|
|
171
|
+
it('deduplicates tags that map to the same canonical', () => {
|
|
172
|
+
// Both 'workflows' and 'workflow' normalize to 'workflow'
|
|
173
|
+
const result = normalizeTags(['workflows', 'workflow'], CANONICAL, 'suggest');
|
|
174
|
+
expect(result).toEqual(['workflow']);
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
it('preserves metadata tags unchanged', () => {
|
|
178
|
+
const result = normalizeTags(['source:article', 'typescript'], CANONICAL, 'enforce', [
|
|
179
|
+
'source:',
|
|
180
|
+
]);
|
|
181
|
+
expect(result).toContain('source:article');
|
|
182
|
+
expect(result).toContain('typescript');
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
it('metadata tags bypass canonical normalization in enforce mode', () => {
|
|
186
|
+
// 'source:mytype' does not match any canonical — but it should be kept
|
|
187
|
+
const result = normalizeTags(['source:mytype'], CANONICAL, 'enforce', ['source:']);
|
|
188
|
+
expect(result).toEqual(['source:mytype']);
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
it('returns empty array when all tags are noise', () => {
|
|
192
|
+
const result = normalizeTags(['new', 'via', 'raw', 'v1.2'], CANONICAL, 'suggest');
|
|
193
|
+
expect(result).toEqual([]);
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it('in off mode, returns tags unchanged', () => {
|
|
197
|
+
const input = ['new', 'workflows', 'v1.2'];
|
|
198
|
+
const result = normalizeTags(input, CANONICAL, 'off');
|
|
199
|
+
expect(result).toEqual(input);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it('batch normalizes a mixed tag list', () => {
|
|
203
|
+
const result = normalizeTags(
|
|
204
|
+
['TypeScript', 'workflows', 'new', 'source:article'],
|
|
205
|
+
CANONICAL,
|
|
206
|
+
'suggest',
|
|
207
|
+
['source:'],
|
|
208
|
+
);
|
|
209
|
+
expect(result).toContain('typescript');
|
|
210
|
+
expect(result).toContain('workflow');
|
|
211
|
+
expect(result).toContain('source:article');
|
|
212
|
+
expect(result).not.toContain('new');
|
|
213
|
+
});
|
|
214
|
+
});
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tag Normalizer — canonical tag taxonomy enforcement.
|
|
3
|
+
*
|
|
4
|
+
* Maps raw tags to nearest canonical via Levenshtein edit-distance.
|
|
5
|
+
* Strips noise words (version strings, single generic words).
|
|
6
|
+
* Respects metadata tag prefixes (e.g. 'source:').
|
|
7
|
+
*
|
|
8
|
+
* Three modes:
|
|
9
|
+
* - 'enforce': must match within edit-distance 3, else drop (return null)
|
|
10
|
+
* - 'suggest': map to nearest canonical within edit-distance 2 (passthrough if no match)
|
|
11
|
+
* - 'off': no normalization — return tag as-is
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// ─── Noise filter ────────────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Version-string pattern: v1.2, v10, v1.2.3, etc.
|
|
18
|
+
*/
|
|
19
|
+
const VERSION_PATTERN = /^v\d+(\.\d+)*/i;
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Generic single words that add no signal.
|
|
23
|
+
*/
|
|
24
|
+
const NOISE_WORDS = new Set([
|
|
25
|
+
'one',
|
|
26
|
+
'via',
|
|
27
|
+
'new',
|
|
28
|
+
'full',
|
|
29
|
+
'actual',
|
|
30
|
+
'raw',
|
|
31
|
+
'the',
|
|
32
|
+
'and',
|
|
33
|
+
'for',
|
|
34
|
+
'with',
|
|
35
|
+
'this',
|
|
36
|
+
'that',
|
|
37
|
+
'from',
|
|
38
|
+
'into',
|
|
39
|
+
]);
|
|
40
|
+
|
|
41
|
+
function isNoisy(tag: string): boolean {
|
|
42
|
+
if (VERSION_PATTERN.test(tag)) return true;
|
|
43
|
+
if (NOISE_WORDS.has(tag.toLowerCase())) return true;
|
|
44
|
+
return false;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ─── Levenshtein edit distance ───────────────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Compute Levenshtein edit distance between two strings.
|
|
51
|
+
* O(m*n) time, O(n) space using two-row DP.
|
|
52
|
+
*/
|
|
53
|
+
export function computeEditDistance(a: string, b: string): number {
|
|
54
|
+
if (a === b) return 0;
|
|
55
|
+
if (a.length === 0) return b.length;
|
|
56
|
+
if (b.length === 0) return a.length;
|
|
57
|
+
|
|
58
|
+
let prev = Array.from({ length: b.length + 1 }, (_, i) => i);
|
|
59
|
+
let curr = Array.from<number>({ length: b.length + 1 });
|
|
60
|
+
|
|
61
|
+
for (let i = 1; i <= a.length; i++) {
|
|
62
|
+
curr[0] = i;
|
|
63
|
+
for (let j = 1; j <= b.length; j++) {
|
|
64
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
65
|
+
curr[j] = Math.min(
|
|
66
|
+
curr[j - 1] + 1, // insertion
|
|
67
|
+
prev[j] + 1, // deletion
|
|
68
|
+
prev[j - 1] + cost, // substitution
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
[prev, curr] = [curr, prev];
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return prev[b.length];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// ─── Metadata tag check ──────────────────────────────────────────────────────
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Returns true if the tag starts with any of the given prefixes.
|
|
81
|
+
* Metadata tags (e.g. 'source:article') are exempt from canonical normalization.
|
|
82
|
+
*/
|
|
83
|
+
export function isMetadataTag(tag: string, prefixes: string[]): boolean {
|
|
84
|
+
return prefixes.some((prefix) => tag.startsWith(prefix));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// ─── Single tag normalization ────────────────────────────────────────────────
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Normalize a single tag against a canonical list.
|
|
91
|
+
*
|
|
92
|
+
* @param tag - Raw tag to normalize
|
|
93
|
+
* @param canonical - Canonical tag list to map against
|
|
94
|
+
* @param mode - Constraint mode: 'enforce' | 'suggest' | 'off'
|
|
95
|
+
* @returns Normalized tag string, or null if the tag should be dropped.
|
|
96
|
+
*/
|
|
97
|
+
export function normalizeTag(
|
|
98
|
+
tag: string,
|
|
99
|
+
canonical: string[],
|
|
100
|
+
mode: 'enforce' | 'suggest' | 'off',
|
|
101
|
+
): string | null {
|
|
102
|
+
if (mode === 'off') return tag;
|
|
103
|
+
|
|
104
|
+
const lower = tag.toLowerCase().trim();
|
|
105
|
+
|
|
106
|
+
// Always drop noise words
|
|
107
|
+
if (isNoisy(lower)) return null;
|
|
108
|
+
|
|
109
|
+
// Derive lowercase canonical for matching; preserve original casing for return
|
|
110
|
+
const canonicalLower = canonical.map((x) => x.toLowerCase());
|
|
111
|
+
|
|
112
|
+
// Exact match in canonical list — return canonical form (original casing)
|
|
113
|
+
const exactIdx = canonicalLower.indexOf(lower);
|
|
114
|
+
if (exactIdx !== -1) return canonical[exactIdx];
|
|
115
|
+
|
|
116
|
+
if (canonical.length === 0) {
|
|
117
|
+
// No canonical list configured — pass through in suggest, drop in enforce
|
|
118
|
+
return mode === 'enforce' ? null : lower;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Find nearest canonical by edit distance
|
|
122
|
+
let bestMatch: string | null = null;
|
|
123
|
+
let bestDist = Infinity;
|
|
124
|
+
|
|
125
|
+
for (let i = 0; i < canonicalLower.length; i++) {
|
|
126
|
+
const dist = computeEditDistance(lower, canonicalLower[i]);
|
|
127
|
+
if (dist < bestDist) {
|
|
128
|
+
bestDist = dist;
|
|
129
|
+
bestMatch = canonical[i];
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const threshold = mode === 'enforce' ? 3 : 2;
|
|
134
|
+
|
|
135
|
+
if (bestDist <= threshold && bestMatch !== null) {
|
|
136
|
+
return bestMatch;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// No close match found
|
|
140
|
+
if (mode === 'enforce') {
|
|
141
|
+
return null; // drop the tag
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// 'suggest' mode — keep original tag unchanged (passthrough)
|
|
145
|
+
return lower;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// ─── Batch tag normalization ─────────────────────────────────────────────────
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Normalize a batch of tags against a canonical list.
|
|
152
|
+
* Filters out nulls (dropped tags). Deduplicates the result.
|
|
153
|
+
*
|
|
154
|
+
* @param tags - Raw tags
|
|
155
|
+
* @param canonical - Canonical tag list
|
|
156
|
+
* @param mode - Constraint mode
|
|
157
|
+
* @param metadataPrefixes - Tags with these prefixes bypass normalization
|
|
158
|
+
*/
|
|
159
|
+
export function normalizeTags(
|
|
160
|
+
tags: string[],
|
|
161
|
+
canonical: string[],
|
|
162
|
+
mode: 'enforce' | 'suggest' | 'off',
|
|
163
|
+
metadataPrefixes: string[] = ['source:'],
|
|
164
|
+
): string[] {
|
|
165
|
+
if (mode === 'off') return tags;
|
|
166
|
+
|
|
167
|
+
const seen = new Set<string>();
|
|
168
|
+
const result: string[] = [];
|
|
169
|
+
|
|
170
|
+
for (const tag of tags) {
|
|
171
|
+
// Metadata tags bypass canonical normalization but are still kept
|
|
172
|
+
if (isMetadataTag(tag, metadataPrefixes)) {
|
|
173
|
+
if (!seen.has(tag)) {
|
|
174
|
+
seen.add(tag);
|
|
175
|
+
result.push(tag);
|
|
176
|
+
}
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const normalized = normalizeTag(tag, canonical, mode);
|
|
181
|
+
if (normalized !== null && !seen.has(normalized)) {
|
|
182
|
+
seen.add(normalized);
|
|
183
|
+
result.push(normalized);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return result;
|
|
188
|
+
}
|
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
export type { EmbeddingProvider, EmbeddingResult, EmbeddingConfig, StoredVector, EmbeddingStats, } from './types.js';
|
|
2
|
-
export { OpenAIEmbeddingProvider } from './openai-provider.js';
|
|
3
|
-
export { EmbeddingPipeline } from './pipeline.js';
|
|
4
|
-
export type { BatchEmbedOptions, BatchEmbedResult } from './pipeline.js';
|
|
5
|
-
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/embeddings/index.ts"],"names":[],"mappings":"AAAA,YAAY,EACV,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,YAAY,EACZ,cAAc,GACf,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,uBAAuB,EAAE,MAAM,sBAAsB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClD,YAAY,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC"}
|
package/dist/embeddings/index.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/embeddings/index.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,uBAAuB,EAAE,MAAM,sBAAsB,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC"}
|