@soleri/core 9.14.0 → 9.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brain/brain.d.ts +9 -0
- package/dist/brain/brain.d.ts.map +1 -1
- package/dist/brain/brain.js +11 -1
- package/dist/brain/brain.js.map +1 -1
- package/dist/brain/intelligence.d.ts.map +1 -1
- package/dist/brain/intelligence.js +24 -0
- package/dist/brain/intelligence.js.map +1 -1
- package/dist/brain/types.d.ts +1 -0
- package/dist/brain/types.d.ts.map +1 -1
- package/dist/chat/chat-session.d.ts +6 -0
- package/dist/chat/chat-session.d.ts.map +1 -1
- package/dist/chat/chat-session.js +68 -17
- package/dist/chat/chat-session.js.map +1 -1
- package/dist/curator/curator.d.ts +6 -0
- package/dist/curator/curator.d.ts.map +1 -1
- package/dist/curator/curator.js +138 -0
- package/dist/curator/curator.js.map +1 -1
- package/dist/curator/types.d.ts +10 -0
- package/dist/curator/types.d.ts.map +1 -1
- package/dist/engine/bin/soleri-engine.js +0 -0
- package/dist/flows/types.d.ts +16 -16
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/intake/content-classifier.d.ts +10 -4
- package/dist/intake/content-classifier.d.ts.map +1 -1
- package/dist/intake/content-classifier.js +19 -5
- package/dist/intake/content-classifier.js.map +1 -1
- package/dist/intake/text-ingester.d.ts +18 -0
- package/dist/intake/text-ingester.d.ts.map +1 -1
- package/dist/intake/text-ingester.js +37 -13
- package/dist/intake/text-ingester.js.map +1 -1
- package/dist/planning/planner.d.ts +3 -0
- package/dist/planning/planner.d.ts.map +1 -1
- package/dist/planning/planner.js +43 -4
- package/dist/planning/planner.js.map +1 -1
- package/dist/plugins/types.d.ts +2 -2
- package/dist/runtime/admin-setup-ops.d.ts.map +1 -1
- package/dist/runtime/admin-setup-ops.js +59 -20
- package/dist/runtime/admin-setup-ops.js.map +1 -1
- package/dist/runtime/facades/orchestrate-facade.d.ts.map +1 -1
- package/dist/runtime/facades/orchestrate-facade.js +28 -1
- package/dist/runtime/facades/orchestrate-facade.js.map +1 -1
- package/dist/runtime/runtime.d.ts.map +1 -1
- package/dist/runtime/runtime.js +16 -0
- package/dist/runtime/runtime.js.map +1 -1
- package/dist/runtime/types.d.ts +19 -0
- package/dist/runtime/types.d.ts.map +1 -1
- package/dist/skills/sync-skills.d.ts.map +1 -1
- package/dist/skills/sync-skills.js +9 -3
- package/dist/skills/sync-skills.js.map +1 -1
- package/dist/skills/validate-skills.d.ts +32 -0
- package/dist/skills/validate-skills.d.ts.map +1 -0
- package/dist/skills/validate-skills.js +396 -0
- package/dist/skills/validate-skills.js.map +1 -0
- package/dist/vault/default-canonical-tags.d.ts +15 -0
- package/dist/vault/default-canonical-tags.d.ts.map +1 -0
- package/dist/vault/default-canonical-tags.js +65 -0
- package/dist/vault/default-canonical-tags.js.map +1 -0
- package/dist/vault/tag-normalizer.d.ts +42 -0
- package/dist/vault/tag-normalizer.d.ts.map +1 -0
- package/dist/vault/tag-normalizer.js +157 -0
- package/dist/vault/tag-normalizer.js.map +1 -0
- package/package.json +6 -2
- package/src/__tests__/embeddings.test.ts +3 -3
- package/src/brain/brain.ts +25 -1
- package/src/brain/intelligence.ts +25 -0
- package/src/brain/types.ts +1 -0
- package/src/chat/chat-session.ts +75 -17
- package/src/chat/chat-transport.test.ts +31 -1
- package/src/curator/curator.ts +180 -0
- package/src/curator/types.ts +10 -0
- package/src/index.ts +7 -0
- package/src/intake/content-classifier.ts +22 -4
- package/src/intake/text-ingester.ts +61 -12
- package/src/planning/planner.test.ts +86 -90
- package/src/planning/planner.ts +48 -4
- package/src/runtime/admin-setup-ops.test.ts +44 -0
- package/src/runtime/admin-setup-ops.ts +59 -20
- package/src/runtime/facades/orchestrate-facade.ts +27 -1
- package/src/runtime/runtime.ts +18 -0
- package/src/runtime/types.ts +19 -0
- package/src/skills/sync-skills.ts +9 -3
- package/src/skills/validate-skills.test.ts +205 -0
- package/src/skills/validate-skills.ts +470 -0
- package/src/vault/default-canonical-tags.ts +64 -0
- package/src/vault/tag-normalizer.test.ts +214 -0
- package/src/vault/tag-normalizer.ts +188 -0
- package/dist/embeddings/index.d.ts +0 -5
- package/dist/embeddings/index.d.ts.map +0 -1
- package/dist/embeddings/index.js +0 -3
- package/dist/embeddings/index.js.map +0 -1
- package/dist/knowledge-packs/knowledge-packs/community/.gitkeep +0 -0
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/soleri-pack.json +0 -10
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/accessibility.json +0 -53
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/design-tokens.json +0 -26
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/design.json +0 -33
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/styling.json +0 -44
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/ux-laws.json +0 -36
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/ux.json +0 -36
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/soleri-pack.json +0 -10
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/architecture.json +0 -143
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/commercial.json +0 -16
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/communication.json +0 -33
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/component.json +0 -16
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/express.json +0 -34
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/leadership.json +0 -33
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/methodology.json +0 -33
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/monorepo.json +0 -33
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/other.json +0 -73
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/performance.json +0 -35
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/prisma.json +0 -33
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/product-strategy.json +0 -42
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/react.json +0 -47
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/security.json +0 -34
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/testing.json +0 -33
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/tooling.json +0 -85
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/typescript.json +0 -34
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/workflow.json +0 -46
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-uipro/soleri-pack.json +0 -10
- package/dist/knowledge-packs/knowledge-packs/salvador/salvador-uipro/vault/design.json +0 -2589
- package/dist/knowledge-packs/knowledge-packs/starter/architecture/soleri-pack.json +0 -10
- package/dist/knowledge-packs/knowledge-packs/starter/architecture/vault/patterns.json +0 -137
- package/dist/knowledge-packs/knowledge-packs/starter/design/soleri-pack.json +0 -10
- package/dist/knowledge-packs/knowledge-packs/starter/design/vault/patterns.json +0 -137
- package/dist/knowledge-packs/knowledge-packs/starter/security/soleri-pack.json +0 -10
- package/dist/knowledge-packs/knowledge-packs/starter/security/vault/patterns.json +0 -137
- /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/api-design/soleri-pack.json +0 -0
- /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/api-design/vault/patterns.json +0 -0
- /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/nodejs/soleri-pack.json +0 -0
- /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/nodejs/vault/patterns.json +0 -0
- /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/react/soleri-pack.json +0 -0
- /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/react/vault/patterns.json +0 -0
- /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/testing/soleri-pack.json +0 -0
- /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/testing/vault/patterns.json +0 -0
- /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/typescript/soleri-pack.json +0 -0
- /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/typescript/vault/patterns.json +0 -0
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import { describe, test, expect, beforeEach, afterEach } from 'vitest';
|
|
7
|
-
import { mkdtempSync, rmSync } from 'node:fs';
|
|
7
|
+
import { mkdtempSync, rmSync, readFileSync, writeFileSync, existsSync } from 'node:fs';
|
|
8
8
|
import { join } from 'node:path';
|
|
9
9
|
import { tmpdir } from 'node:os';
|
|
10
10
|
import { ChatSessionManager } from './chat-session.js';
|
|
@@ -136,6 +136,19 @@ describe('ChatSessionManager', () => {
|
|
|
136
136
|
expect(all).toContain('chat-2');
|
|
137
137
|
});
|
|
138
138
|
|
|
139
|
+
test('listAll ignores non-session JSON files in the storage root', () => {
|
|
140
|
+
manager.getOrCreate('chat-1');
|
|
141
|
+
writeFileSync(
|
|
142
|
+
join(dir, 'plans.json'),
|
|
143
|
+
JSON.stringify({ version: '1.0', plans: [] }),
|
|
144
|
+
'utf-8',
|
|
145
|
+
);
|
|
146
|
+
|
|
147
|
+
const all = manager.listAll();
|
|
148
|
+
expect(all).toContain('chat-1');
|
|
149
|
+
expect(all).not.toContain('plans');
|
|
150
|
+
});
|
|
151
|
+
|
|
139
152
|
test('setMeta updates metadata', () => {
|
|
140
153
|
manager.getOrCreate('chat-1');
|
|
141
154
|
manager.setMeta('chat-1', { mood: 'happy' });
|
|
@@ -162,6 +175,23 @@ describe('ChatSessionManager', () => {
|
|
|
162
175
|
manager2.close();
|
|
163
176
|
});
|
|
164
177
|
|
|
178
|
+
test('session files are namespaced away from plans.json collisions', () => {
|
|
179
|
+
writeFileSync(
|
|
180
|
+
join(dir, 'plans.json'),
|
|
181
|
+
JSON.stringify({ version: '1.0', plans: [{ id: 'plan-1' }] }),
|
|
182
|
+
'utf-8',
|
|
183
|
+
);
|
|
184
|
+
|
|
185
|
+
const session = manager.getOrCreate('plans');
|
|
186
|
+
|
|
187
|
+
expect(session.messages).toEqual([]);
|
|
188
|
+
expect(JSON.parse(readFileSync(join(dir, 'plans.json'), 'utf-8'))).toEqual({
|
|
189
|
+
version: '1.0',
|
|
190
|
+
plans: [{ id: 'plan-1' }],
|
|
191
|
+
});
|
|
192
|
+
expect(existsSync(join(dir, 'sessions', 'plans.json'))).toBe(true);
|
|
193
|
+
});
|
|
194
|
+
|
|
165
195
|
test('delete removes from disk', () => {
|
|
166
196
|
manager.getOrCreate('chat-1');
|
|
167
197
|
manager.delete('chat-1');
|
package/src/curator/curator.ts
CHANGED
|
@@ -40,6 +40,10 @@ import {
|
|
|
40
40
|
import { initializeTables } from './schema.js';
|
|
41
41
|
import { computeHealthAudit, type HealthDataProvider } from './health-audit.js';
|
|
42
42
|
import { enrichEntryMetadata } from './metadata-enricher.js';
|
|
43
|
+
import {
|
|
44
|
+
computeEditDistance,
|
|
45
|
+
normalizeTags as normalizeTagsCanonical,
|
|
46
|
+
} from '../vault/tag-normalizer.js';
|
|
43
47
|
|
|
44
48
|
// ─── Constants ──────────────────────────────────────────────────────
|
|
45
49
|
|
|
@@ -359,15 +363,141 @@ export class Curator {
|
|
|
359
363
|
if (batch.length < DEFAULT_BATCH_SIZE) break;
|
|
360
364
|
offset += DEFAULT_BATCH_SIZE;
|
|
361
365
|
}
|
|
366
|
+
|
|
367
|
+
// Synonym merge: detect tag pairs with edit-distance ≤ 1 and merge lower-frequency into higher
|
|
368
|
+
const synonymMerges = this.mergeSynonymTags();
|
|
369
|
+
|
|
362
370
|
return {
|
|
363
371
|
totalEntries,
|
|
364
372
|
groomedCount: totalEntries,
|
|
365
373
|
tagsNormalized,
|
|
366
374
|
staleCount,
|
|
367
375
|
durationMs: Date.now() - start,
|
|
376
|
+
synonymMerges,
|
|
368
377
|
};
|
|
369
378
|
}
|
|
370
379
|
|
|
380
|
+
/**
|
|
381
|
+
* Detect tag pairs where edit-distance ≤ 1 (e.g. 'workflow'/'workflows') and merge
|
|
382
|
+
* the lower-frequency tag into the higher-frequency one across all entries.
|
|
383
|
+
* Returns count of tags merged.
|
|
384
|
+
*/
|
|
385
|
+
private mergeSynonymTags(): number {
|
|
386
|
+
// Collect all unique tags and their usage counts
|
|
387
|
+
const rows = this.provider.all<{ tags: string }>(
|
|
388
|
+
'SELECT tags FROM entries WHERE tags IS NOT NULL',
|
|
389
|
+
);
|
|
390
|
+
const tagCounts = new Map<string, number>();
|
|
391
|
+
|
|
392
|
+
for (const row of rows) {
|
|
393
|
+
let tags: string[];
|
|
394
|
+
try {
|
|
395
|
+
tags = JSON.parse(row.tags) as string[];
|
|
396
|
+
} catch {
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
for (const tag of tags) {
|
|
400
|
+
if (typeof tag === 'string' && tag.length > 0) {
|
|
401
|
+
tagCounts.set(tag, (tagCounts.get(tag) ?? 0) + 1);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
const allTags = Array.from(tagCounts.keys());
|
|
407
|
+
if (allTags.length < 2) return 0;
|
|
408
|
+
|
|
409
|
+
// Build synonym merge map: minorTag → majorTag
|
|
410
|
+
// Only merge if edit-distance ≤ 1 and major has higher or equal frequency
|
|
411
|
+
const mergeMap = new Map<string, string>(); // minor → major
|
|
412
|
+
const processed = new Set<string>();
|
|
413
|
+
|
|
414
|
+
// Bucket tags by length to reduce comparisons from O(n²) to O(n * avg_bucket_size)
|
|
415
|
+
const buckets = new Map<number, string[]>();
|
|
416
|
+
for (const tag of allTags) {
|
|
417
|
+
const len = tag.length;
|
|
418
|
+
const bucket = buckets.get(len);
|
|
419
|
+
if (bucket) {
|
|
420
|
+
bucket.push(tag);
|
|
421
|
+
} else {
|
|
422
|
+
buckets.set(len, [tag]);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
for (const a of allTags) {
|
|
427
|
+
if (processed.has(a)) continue;
|
|
428
|
+
// Only compare against tags of the same or adjacent length (edit distance ≤ 1)
|
|
429
|
+
const candidates: string[] = [
|
|
430
|
+
...(buckets.get(a.length) ?? []),
|
|
431
|
+
...(buckets.get(a.length - 1) ?? []),
|
|
432
|
+
...(buckets.get(a.length + 1) ?? []),
|
|
433
|
+
];
|
|
434
|
+
for (const b of candidates) {
|
|
435
|
+
if (b === a) continue;
|
|
436
|
+
if (processed.has(a) || processed.has(b)) continue;
|
|
437
|
+
if (computeEditDistance(a, b) <= 1) {
|
|
438
|
+
const countA = tagCounts.get(a) ?? 0;
|
|
439
|
+
const countB = tagCounts.get(b) ?? 0;
|
|
440
|
+
// Merge lower-frequency into higher-frequency
|
|
441
|
+
if (countA >= countB) {
|
|
442
|
+
mergeMap.set(b, a);
|
|
443
|
+
processed.add(b);
|
|
444
|
+
} else {
|
|
445
|
+
mergeMap.set(a, b);
|
|
446
|
+
processed.add(a);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
if (mergeMap.size === 0) return 0;
|
|
453
|
+
|
|
454
|
+
// Apply merges to all affected entries
|
|
455
|
+
let mergeCount = 0;
|
|
456
|
+
const allEntryRows = this.provider.all<{ id: string; tags: string }>(
|
|
457
|
+
'SELECT id, tags FROM entries WHERE tags IS NOT NULL',
|
|
458
|
+
);
|
|
459
|
+
|
|
460
|
+
for (const row of allEntryRows) {
|
|
461
|
+
let tags: string[];
|
|
462
|
+
try {
|
|
463
|
+
tags = JSON.parse(row.tags) as string[];
|
|
464
|
+
} catch {
|
|
465
|
+
continue;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
let changed = false;
|
|
469
|
+
const updated = [
|
|
470
|
+
...new Set(
|
|
471
|
+
tags.map((tag) => {
|
|
472
|
+
const replacement = mergeMap.get(tag);
|
|
473
|
+
if (replacement) {
|
|
474
|
+
changed = true;
|
|
475
|
+
return replacement;
|
|
476
|
+
}
|
|
477
|
+
return tag;
|
|
478
|
+
}),
|
|
479
|
+
),
|
|
480
|
+
];
|
|
481
|
+
|
|
482
|
+
if (changed) {
|
|
483
|
+
this.provider.run('UPDATE entries SET tags = ?, updated_at = unixepoch() WHERE id = ?', [
|
|
484
|
+
JSON.stringify(updated),
|
|
485
|
+
row.id,
|
|
486
|
+
]);
|
|
487
|
+
this.logChange(
|
|
488
|
+
'synonym_merge',
|
|
489
|
+
row.id,
|
|
490
|
+
JSON.stringify(tags),
|
|
491
|
+
JSON.stringify(updated),
|
|
492
|
+
'Synonym tag merge (edit-distance ≤ 1)',
|
|
493
|
+
);
|
|
494
|
+
mergeCount++;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
return mergeCount;
|
|
499
|
+
}
|
|
500
|
+
|
|
371
501
|
// ─── Consolidation ───────────────────────────────────────────
|
|
372
502
|
|
|
373
503
|
consolidate(options?: ConsolidationOptions): ConsolidationResult {
|
|
@@ -419,6 +549,55 @@ export class Curator {
|
|
|
419
549
|
}
|
|
420
550
|
}
|
|
421
551
|
}
|
|
552
|
+
|
|
553
|
+
// Retag: run all entries through canonical normalization if requested
|
|
554
|
+
let retagged: number | undefined;
|
|
555
|
+
if (options?.retag && options.canonicalTags && options.canonicalTags.length > 0) {
|
|
556
|
+
const tagMode = options.tagConstraintMode ?? 'suggest';
|
|
557
|
+
const metaPrefixes = options.metadataTagPrefixes ?? ['source:'];
|
|
558
|
+
retagged = 0;
|
|
559
|
+
|
|
560
|
+
const entryRows = this.provider.all<{ id: string; tags: string }>(
|
|
561
|
+
'SELECT id, tags FROM entries WHERE tags IS NOT NULL',
|
|
562
|
+
);
|
|
563
|
+
|
|
564
|
+
for (const row of entryRows) {
|
|
565
|
+
let tags: string[];
|
|
566
|
+
try {
|
|
567
|
+
tags = JSON.parse(row.tags) as string[];
|
|
568
|
+
} catch {
|
|
569
|
+
continue;
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
const normalized = normalizeTagsCanonical(
|
|
573
|
+
tags,
|
|
574
|
+
options.canonicalTags,
|
|
575
|
+
tagMode,
|
|
576
|
+
metaPrefixes,
|
|
577
|
+
);
|
|
578
|
+
const tagsChanged =
|
|
579
|
+
normalized.length !== tags.length || normalized.some((t, i) => t !== tags[i]);
|
|
580
|
+
|
|
581
|
+
if (tagsChanged) {
|
|
582
|
+
if (!dryRun) {
|
|
583
|
+
this.provider.run(
|
|
584
|
+
'UPDATE entries SET tags = ?, updated_at = unixepoch() WHERE id = ?',
|
|
585
|
+
[JSON.stringify(normalized), row.id],
|
|
586
|
+
);
|
|
587
|
+
this.logChange(
|
|
588
|
+
'retag',
|
|
589
|
+
row.id,
|
|
590
|
+
JSON.stringify(tags),
|
|
591
|
+
JSON.stringify(normalized),
|
|
592
|
+
'Canonical retag during consolidation',
|
|
593
|
+
);
|
|
594
|
+
mutations++;
|
|
595
|
+
}
|
|
596
|
+
retagged++;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
422
601
|
return {
|
|
423
602
|
dryRun,
|
|
424
603
|
duplicates,
|
|
@@ -426,6 +605,7 @@ export class Curator {
|
|
|
426
605
|
contradictions,
|
|
427
606
|
mutations,
|
|
428
607
|
durationMs: Date.now() - start,
|
|
608
|
+
retagged,
|
|
429
609
|
};
|
|
430
610
|
}
|
|
431
611
|
|
package/src/curator/types.ts
CHANGED
|
@@ -62,6 +62,7 @@ export interface GroomAllResult {
|
|
|
62
62
|
tagsNormalized: number;
|
|
63
63
|
staleCount: number;
|
|
64
64
|
durationMs: number;
|
|
65
|
+
synonymMerges: number;
|
|
65
66
|
}
|
|
66
67
|
|
|
67
68
|
// ─── Consolidation ──────────────────────────────────────────────────
|
|
@@ -71,6 +72,14 @@ export interface ConsolidationOptions {
|
|
|
71
72
|
staleDaysThreshold?: number;
|
|
72
73
|
duplicateThreshold?: number;
|
|
73
74
|
contradictionThreshold?: number;
|
|
75
|
+
/** When true, run all entries through canonical tag normalization. Dry-run by default. */
|
|
76
|
+
retag?: boolean;
|
|
77
|
+
/** Canonical tag list for retag operation. Required when retag is true. */
|
|
78
|
+
canonicalTags?: string[];
|
|
79
|
+
/** Tag constraint mode for retag. Default: 'suggest'. */
|
|
80
|
+
tagConstraintMode?: 'enforce' | 'suggest' | 'off';
|
|
81
|
+
/** Metadata tag prefixes exempt from canonical normalization. Default: ['source:']. */
|
|
82
|
+
metadataTagPrefixes?: string[];
|
|
74
83
|
}
|
|
75
84
|
|
|
76
85
|
export interface ConsolidationResult {
|
|
@@ -80,6 +89,7 @@ export interface ConsolidationResult {
|
|
|
80
89
|
contradictions: Contradiction[];
|
|
81
90
|
mutations: number;
|
|
82
91
|
durationMs: number;
|
|
92
|
+
retagged?: number;
|
|
83
93
|
}
|
|
84
94
|
|
|
85
95
|
// ─── Changelog & Health ─────────────────────────────────────────────
|
package/src/index.ts
CHANGED
|
@@ -101,6 +101,13 @@ export type {
|
|
|
101
101
|
} from './vault/vault-types.js';
|
|
102
102
|
export { validatePlaybook, parsePlaybookFromEntry } from './vault/playbook.js';
|
|
103
103
|
export type { Playbook, PlaybookStep, PlaybookValidationResult } from './vault/playbook.js';
|
|
104
|
+
export { DEFAULT_CANONICAL_TAGS } from './vault/default-canonical-tags.js';
|
|
105
|
+
export {
|
|
106
|
+
normalizeTag as normalizeTagCanonical,
|
|
107
|
+
normalizeTags as normalizeTagsCanonical,
|
|
108
|
+
isMetadataTag,
|
|
109
|
+
computeEditDistance,
|
|
110
|
+
} from './vault/tag-normalizer.js';
|
|
104
111
|
|
|
105
112
|
// ─── Playbook System (registry, matching, seeding) ─────────────────
|
|
106
113
|
export {
|
|
@@ -45,22 +45,40 @@ Rules:
|
|
|
45
45
|
// CLASSIFIER
|
|
46
46
|
// =============================================================================
|
|
47
47
|
|
|
48
|
+
/**
|
|
49
|
+
* Build the classification system prompt, optionally injecting a canonical tag list.
|
|
50
|
+
* When canonical tags are provided, the LLM is guided to prefer them.
|
|
51
|
+
*/
|
|
52
|
+
export function buildClassificationPrompt(canonicalTags?: string[]): string {
|
|
53
|
+
if (!canonicalTags || canonicalTags.length === 0) {
|
|
54
|
+
return CLASSIFICATION_PROMPT;
|
|
55
|
+
}
|
|
56
|
+
const tagList = canonicalTags.join(', ');
|
|
57
|
+
return (
|
|
58
|
+
CLASSIFICATION_PROMPT +
|
|
59
|
+
`\n\nTag guidance: Use only tags from this approved list where possible: ${tagList}. Create a new tag only when nothing from the list fits the concept.`
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
|
|
48
63
|
/**
|
|
49
64
|
* Classify a text chunk into structured knowledge items using an LLM.
|
|
50
65
|
*
|
|
51
|
-
* @param llm
|
|
52
|
-
* @param chunkText
|
|
53
|
-
* @param citation
|
|
66
|
+
* @param llm - LLMClient instance
|
|
67
|
+
* @param chunkText - The text to classify
|
|
68
|
+
* @param citation - Source citation (e.g. "book.pdf, pages 12-15")
|
|
69
|
+
* @param canonicalTags - Optional canonical tag list to inject into the prompt
|
|
54
70
|
* @returns Classified items, or [] on any error
|
|
55
71
|
*/
|
|
56
72
|
export async function classifyChunk(
|
|
57
73
|
llm: LLMClient,
|
|
58
74
|
chunkText: string,
|
|
59
75
|
citation: string,
|
|
76
|
+
canonicalTags?: string[],
|
|
60
77
|
): Promise<ClassifiedItem[]> {
|
|
61
78
|
try {
|
|
79
|
+
const systemPrompt = buildClassificationPrompt(canonicalTags);
|
|
62
80
|
const result = await llm.complete({
|
|
63
|
-
systemPrompt
|
|
81
|
+
systemPrompt,
|
|
64
82
|
userPrompt: chunkText,
|
|
65
83
|
maxTokens: 4096,
|
|
66
84
|
temperature: 0.3,
|
|
@@ -11,6 +11,7 @@ import type { IntelligenceEntry } from '../intelligence/types.js';
|
|
|
11
11
|
import type { ClassifiedItem } from './types.js';
|
|
12
12
|
import { classifyChunk } from './content-classifier.js';
|
|
13
13
|
import { dedupItems } from './dedup-gate.js';
|
|
14
|
+
import { normalizeTags as normalizeTagsCanonical } from '../vault/tag-normalizer.js';
|
|
14
15
|
|
|
15
16
|
// ─── Types ───────────────────────────────────────────────────────────
|
|
16
17
|
|
|
@@ -26,6 +27,12 @@ export interface IngestOptions {
|
|
|
26
27
|
tags?: string[];
|
|
27
28
|
/** Max chars per chunk for LLM classification. Default 4000. */
|
|
28
29
|
chunkSize?: number;
|
|
30
|
+
/** Canonical tag list for normalization. If omitted, no canonical normalization. */
|
|
31
|
+
canonicalTags?: string[];
|
|
32
|
+
/** Tag constraint mode. Default: 'suggest'. */
|
|
33
|
+
tagConstraintMode?: 'enforce' | 'suggest' | 'off';
|
|
34
|
+
/** Metadata tag prefixes exempt from canonical normalization. Default: ['source:']. */
|
|
35
|
+
metadataTagPrefixes?: string[];
|
|
29
36
|
}
|
|
30
37
|
|
|
31
38
|
export interface IngestResult {
|
|
@@ -42,15 +49,30 @@ const FETCH_TIMEOUT_MS = 15000;
|
|
|
42
49
|
|
|
43
50
|
// ─── Class ───────────────────────────────────────────────────────────
|
|
44
51
|
|
|
52
|
+
interface CanonicalTagConfig {
|
|
53
|
+
canonicalTags: string[];
|
|
54
|
+
tagConstraintMode: 'enforce' | 'suggest' | 'off';
|
|
55
|
+
metadataTagPrefixes: string[];
|
|
56
|
+
}
|
|
57
|
+
|
|
45
58
|
export class TextIngester {
|
|
46
59
|
private vault: Vault;
|
|
47
60
|
private llm: LLMClient | null;
|
|
61
|
+
private canonicalTagConfig: CanonicalTagConfig | null = null;
|
|
48
62
|
|
|
49
63
|
constructor(vault: Vault, llm: LLMClient | null) {
|
|
50
64
|
this.vault = vault;
|
|
51
65
|
this.llm = llm;
|
|
52
66
|
}
|
|
53
67
|
|
|
68
|
+
/**
|
|
69
|
+
* Wire canonical tag config from runtime — used as defaults for all ingest calls.
|
|
70
|
+
* Caller-provided options in ingestText/ingestUrl/ingestBatch still take precedence.
|
|
71
|
+
*/
|
|
72
|
+
setCanonicalTagConfig(cfg: CanonicalTagConfig): void {
|
|
73
|
+
this.canonicalTagConfig = cfg;
|
|
74
|
+
}
|
|
75
|
+
|
|
54
76
|
/**
|
|
55
77
|
* Ingest a URL — fetch, strip HTML, classify, dedup, store.
|
|
56
78
|
*/
|
|
@@ -101,11 +123,19 @@ export class TextIngester {
|
|
|
101
123
|
const domain = opts?.domain ?? 'general';
|
|
102
124
|
const extraTags = opts?.tags ?? [];
|
|
103
125
|
|
|
126
|
+
// Resolve canonical config — caller opts take precedence over runtime-wired config
|
|
127
|
+
const canonicalTagsForClassify = opts?.canonicalTags ?? this.canonicalTagConfig?.canonicalTags;
|
|
128
|
+
|
|
104
129
|
// Classify all chunks
|
|
105
130
|
const allItems: ClassifiedItem[] = [];
|
|
106
131
|
for (const chunk of chunks) {
|
|
107
132
|
// oxlint-disable-next-line eslint(no-await-in-loop)
|
|
108
|
-
const items = await classifyChunk(
|
|
133
|
+
const items = await classifyChunk(
|
|
134
|
+
this.llm,
|
|
135
|
+
chunk,
|
|
136
|
+
`${source.type}: ${source.title}`,
|
|
137
|
+
canonicalTagsForClassify,
|
|
138
|
+
);
|
|
109
139
|
allItems.push(...items);
|
|
110
140
|
}
|
|
111
141
|
|
|
@@ -121,18 +151,37 @@ export class TextIngester {
|
|
|
121
151
|
// Build source attribution for context field
|
|
122
152
|
const attribution = buildAttribution(source);
|
|
123
153
|
|
|
154
|
+
// Metadata tags use 'source:' prefix so they're exempt from canonical normalization
|
|
155
|
+
const metadataTags = [`source:ingested`, `source:${source.type}`];
|
|
156
|
+
|
|
157
|
+
// Apply canonical tag normalization if configured
|
|
158
|
+
// Caller-provided options take precedence over runtime-wired config
|
|
159
|
+
const canonicalTags = opts?.canonicalTags ?? this.canonicalTagConfig?.canonicalTags;
|
|
160
|
+
const tagMode =
|
|
161
|
+
opts?.tagConstraintMode ?? this.canonicalTagConfig?.tagConstraintMode ?? 'suggest';
|
|
162
|
+
|
|
124
163
|
// Store in vault
|
|
125
|
-
const entries: IntelligenceEntry[] = unique.map((item, i) =>
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
164
|
+
const entries: IntelligenceEntry[] = unique.map((item, i) => {
|
|
165
|
+
const rawTags = [...(item.tags ?? []), ...extraTags];
|
|
166
|
+
// metaPrefixes not passed here — source: tags are added after normalization,
|
|
167
|
+
// so there is nothing to exempt at this point.
|
|
168
|
+
const normalizedTags =
|
|
169
|
+
canonicalTags && tagMode !== 'off'
|
|
170
|
+
? normalizeTagsCanonical(rawTags, canonicalTags, tagMode)
|
|
171
|
+
: rawTags;
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
id: `ingest-${source.type}-${Date.now()}-${i}-${Math.random().toString(36).slice(2, 6)}`,
|
|
175
|
+
type: mapType(item.type),
|
|
176
|
+
domain,
|
|
177
|
+
title: item.title,
|
|
178
|
+
description: item.description,
|
|
179
|
+
severity: mapSeverity(item.severity),
|
|
180
|
+
tags: [...normalizedTags, ...metadataTags],
|
|
181
|
+
context: attribution,
|
|
182
|
+
origin: 'user' as const,
|
|
183
|
+
};
|
|
184
|
+
});
|
|
136
185
|
|
|
137
186
|
if (entries.length > 0) {
|
|
138
187
|
this.vault.seed(entries);
|