@soleri/core 9.14.0 → 9.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/dist/brain/brain.d.ts +9 -0
  2. package/dist/brain/brain.d.ts.map +1 -1
  3. package/dist/brain/brain.js +11 -1
  4. package/dist/brain/brain.js.map +1 -1
  5. package/dist/brain/intelligence.d.ts.map +1 -1
  6. package/dist/brain/intelligence.js +24 -0
  7. package/dist/brain/intelligence.js.map +1 -1
  8. package/dist/brain/types.d.ts +1 -0
  9. package/dist/brain/types.d.ts.map +1 -1
  10. package/dist/chat/chat-session.d.ts +6 -0
  11. package/dist/chat/chat-session.d.ts.map +1 -1
  12. package/dist/chat/chat-session.js +68 -17
  13. package/dist/chat/chat-session.js.map +1 -1
  14. package/dist/curator/curator.d.ts +6 -0
  15. package/dist/curator/curator.d.ts.map +1 -1
  16. package/dist/curator/curator.js +138 -0
  17. package/dist/curator/curator.js.map +1 -1
  18. package/dist/curator/types.d.ts +10 -0
  19. package/dist/curator/types.d.ts.map +1 -1
  20. package/dist/engine/bin/soleri-engine.js +0 -0
  21. package/dist/flows/types.d.ts +16 -16
  22. package/dist/index.d.ts +2 -0
  23. package/dist/index.d.ts.map +1 -1
  24. package/dist/index.js +2 -0
  25. package/dist/index.js.map +1 -1
  26. package/dist/intake/content-classifier.d.ts +10 -4
  27. package/dist/intake/content-classifier.d.ts.map +1 -1
  28. package/dist/intake/content-classifier.js +19 -5
  29. package/dist/intake/content-classifier.js.map +1 -1
  30. package/dist/intake/text-ingester.d.ts +18 -0
  31. package/dist/intake/text-ingester.d.ts.map +1 -1
  32. package/dist/intake/text-ingester.js +37 -13
  33. package/dist/intake/text-ingester.js.map +1 -1
  34. package/dist/planning/planner.d.ts +3 -0
  35. package/dist/planning/planner.d.ts.map +1 -1
  36. package/dist/planning/planner.js +43 -4
  37. package/dist/planning/planner.js.map +1 -1
  38. package/dist/plugins/types.d.ts +2 -2
  39. package/dist/runtime/admin-setup-ops.d.ts.map +1 -1
  40. package/dist/runtime/admin-setup-ops.js +59 -20
  41. package/dist/runtime/admin-setup-ops.js.map +1 -1
  42. package/dist/runtime/facades/orchestrate-facade.d.ts.map +1 -1
  43. package/dist/runtime/facades/orchestrate-facade.js +28 -1
  44. package/dist/runtime/facades/orchestrate-facade.js.map +1 -1
  45. package/dist/runtime/runtime.d.ts.map +1 -1
  46. package/dist/runtime/runtime.js +16 -0
  47. package/dist/runtime/runtime.js.map +1 -1
  48. package/dist/runtime/types.d.ts +19 -0
  49. package/dist/runtime/types.d.ts.map +1 -1
  50. package/dist/skills/sync-skills.d.ts.map +1 -1
  51. package/dist/skills/sync-skills.js +9 -3
  52. package/dist/skills/sync-skills.js.map +1 -1
  53. package/dist/skills/validate-skills.d.ts +32 -0
  54. package/dist/skills/validate-skills.d.ts.map +1 -0
  55. package/dist/skills/validate-skills.js +396 -0
  56. package/dist/skills/validate-skills.js.map +1 -0
  57. package/dist/vault/default-canonical-tags.d.ts +15 -0
  58. package/dist/vault/default-canonical-tags.d.ts.map +1 -0
  59. package/dist/vault/default-canonical-tags.js +65 -0
  60. package/dist/vault/default-canonical-tags.js.map +1 -0
  61. package/dist/vault/tag-normalizer.d.ts +42 -0
  62. package/dist/vault/tag-normalizer.d.ts.map +1 -0
  63. package/dist/vault/tag-normalizer.js +157 -0
  64. package/dist/vault/tag-normalizer.js.map +1 -0
  65. package/package.json +6 -2
  66. package/src/__tests__/embeddings.test.ts +3 -3
  67. package/src/brain/brain.ts +25 -1
  68. package/src/brain/intelligence.ts +25 -0
  69. package/src/brain/types.ts +1 -0
  70. package/src/chat/chat-session.ts +75 -17
  71. package/src/chat/chat-transport.test.ts +31 -1
  72. package/src/curator/curator.ts +180 -0
  73. package/src/curator/types.ts +10 -0
  74. package/src/index.ts +7 -0
  75. package/src/intake/content-classifier.ts +22 -4
  76. package/src/intake/text-ingester.ts +61 -12
  77. package/src/planning/planner.test.ts +86 -90
  78. package/src/planning/planner.ts +48 -4
  79. package/src/runtime/admin-setup-ops.test.ts +44 -0
  80. package/src/runtime/admin-setup-ops.ts +59 -20
  81. package/src/runtime/facades/orchestrate-facade.ts +27 -1
  82. package/src/runtime/runtime.ts +18 -0
  83. package/src/runtime/types.ts +19 -0
  84. package/src/skills/sync-skills.ts +9 -3
  85. package/src/skills/validate-skills.test.ts +205 -0
  86. package/src/skills/validate-skills.ts +470 -0
  87. package/src/vault/default-canonical-tags.ts +64 -0
  88. package/src/vault/tag-normalizer.test.ts +214 -0
  89. package/src/vault/tag-normalizer.ts +188 -0
  90. package/dist/embeddings/index.d.ts +0 -5
  91. package/dist/embeddings/index.d.ts.map +0 -1
  92. package/dist/embeddings/index.js +0 -3
  93. package/dist/embeddings/index.js.map +0 -1
  94. package/dist/knowledge-packs/knowledge-packs/community/.gitkeep +0 -0
  95. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/soleri-pack.json +0 -10
  96. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/accessibility.json +0 -53
  97. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/design-tokens.json +0 -26
  98. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/design.json +0 -33
  99. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/styling.json +0 -44
  100. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/ux-laws.json +0 -36
  101. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-craft/vault/ux.json +0 -36
  102. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/soleri-pack.json +0 -10
  103. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/architecture.json +0 -143
  104. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/commercial.json +0 -16
  105. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/communication.json +0 -33
  106. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/component.json +0 -16
  107. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/express.json +0 -34
  108. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/leadership.json +0 -33
  109. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/methodology.json +0 -33
  110. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/monorepo.json +0 -33
  111. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/other.json +0 -73
  112. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/performance.json +0 -35
  113. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/prisma.json +0 -33
  114. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/product-strategy.json +0 -42
  115. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/react.json +0 -47
  116. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/security.json +0 -34
  117. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/testing.json +0 -33
  118. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/tooling.json +0 -85
  119. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/typescript.json +0 -34
  120. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-engineering/vault/workflow.json +0 -46
  121. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-uipro/soleri-pack.json +0 -10
  122. package/dist/knowledge-packs/knowledge-packs/salvador/salvador-uipro/vault/design.json +0 -2589
  123. package/dist/knowledge-packs/knowledge-packs/starter/architecture/soleri-pack.json +0 -10
  124. package/dist/knowledge-packs/knowledge-packs/starter/architecture/vault/patterns.json +0 -137
  125. package/dist/knowledge-packs/knowledge-packs/starter/design/soleri-pack.json +0 -10
  126. package/dist/knowledge-packs/knowledge-packs/starter/design/vault/patterns.json +0 -137
  127. package/dist/knowledge-packs/knowledge-packs/starter/security/soleri-pack.json +0 -10
  128. package/dist/knowledge-packs/knowledge-packs/starter/security/vault/patterns.json +0 -137
  129. /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/api-design/soleri-pack.json +0 -0
  130. /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/api-design/vault/patterns.json +0 -0
  131. /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/nodejs/soleri-pack.json +0 -0
  132. /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/nodejs/vault/patterns.json +0 -0
  133. /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/react/soleri-pack.json +0 -0
  134. /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/react/vault/patterns.json +0 -0
  135. /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/testing/soleri-pack.json +0 -0
  136. /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/testing/vault/patterns.json +0 -0
  137. /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/typescript/soleri-pack.json +0 -0
  138. /package/dist/knowledge-packs/{knowledge-packs/starter → starter}/typescript/vault/patterns.json +0 -0
@@ -4,7 +4,7 @@
4
4
  */
5
5
 
6
6
  import { describe, test, expect, beforeEach, afterEach } from 'vitest';
7
- import { mkdtempSync, rmSync } from 'node:fs';
7
+ import { mkdtempSync, rmSync, readFileSync, writeFileSync, existsSync } from 'node:fs';
8
8
  import { join } from 'node:path';
9
9
  import { tmpdir } from 'node:os';
10
10
  import { ChatSessionManager } from './chat-session.js';
@@ -136,6 +136,19 @@ describe('ChatSessionManager', () => {
136
136
  expect(all).toContain('chat-2');
137
137
  });
138
138
 
139
+ test('listAll ignores non-session JSON files in the storage root', () => {
140
+ manager.getOrCreate('chat-1');
141
+ writeFileSync(
142
+ join(dir, 'plans.json'),
143
+ JSON.stringify({ version: '1.0', plans: [] }),
144
+ 'utf-8',
145
+ );
146
+
147
+ const all = manager.listAll();
148
+ expect(all).toContain('chat-1');
149
+ expect(all).not.toContain('plans');
150
+ });
151
+
139
152
  test('setMeta updates metadata', () => {
140
153
  manager.getOrCreate('chat-1');
141
154
  manager.setMeta('chat-1', { mood: 'happy' });
@@ -162,6 +175,23 @@ describe('ChatSessionManager', () => {
162
175
  manager2.close();
163
176
  });
164
177
 
178
+ test('session files are namespaced away from plans.json collisions', () => {
179
+ writeFileSync(
180
+ join(dir, 'plans.json'),
181
+ JSON.stringify({ version: '1.0', plans: [{ id: 'plan-1' }] }),
182
+ 'utf-8',
183
+ );
184
+
185
+ const session = manager.getOrCreate('plans');
186
+
187
+ expect(session.messages).toEqual([]);
188
+ expect(JSON.parse(readFileSync(join(dir, 'plans.json'), 'utf-8'))).toEqual({
189
+ version: '1.0',
190
+ plans: [{ id: 'plan-1' }],
191
+ });
192
+ expect(existsSync(join(dir, 'sessions', 'plans.json'))).toBe(true);
193
+ });
194
+
165
195
  test('delete removes from disk', () => {
166
196
  manager.getOrCreate('chat-1');
167
197
  manager.delete('chat-1');
@@ -40,6 +40,10 @@ import {
40
40
  import { initializeTables } from './schema.js';
41
41
  import { computeHealthAudit, type HealthDataProvider } from './health-audit.js';
42
42
  import { enrichEntryMetadata } from './metadata-enricher.js';
43
+ import {
44
+ computeEditDistance,
45
+ normalizeTags as normalizeTagsCanonical,
46
+ } from '../vault/tag-normalizer.js';
43
47
 
44
48
  // ─── Constants ──────────────────────────────────────────────────────
45
49
 
@@ -359,15 +363,141 @@ export class Curator {
359
363
  if (batch.length < DEFAULT_BATCH_SIZE) break;
360
364
  offset += DEFAULT_BATCH_SIZE;
361
365
  }
366
+
367
+ // Synonym merge: detect tag pairs with edit-distance ≤ 1 and merge lower-frequency into higher
368
+ const synonymMerges = this.mergeSynonymTags();
369
+
362
370
  return {
363
371
  totalEntries,
364
372
  groomedCount: totalEntries,
365
373
  tagsNormalized,
366
374
  staleCount,
367
375
  durationMs: Date.now() - start,
376
+ synonymMerges,
368
377
  };
369
378
  }
370
379
 
380
+ /**
381
+ * Detect tag pairs where edit-distance ≤ 1 (e.g. 'workflow'/'workflows') and merge
382
+ * the lower-frequency tag into the higher-frequency one across all entries.
383
+ * Returns count of tags merged.
384
+ */
385
+ private mergeSynonymTags(): number {
386
+ // Collect all unique tags and their usage counts
387
+ const rows = this.provider.all<{ tags: string }>(
388
+ 'SELECT tags FROM entries WHERE tags IS NOT NULL',
389
+ );
390
+ const tagCounts = new Map<string, number>();
391
+
392
+ for (const row of rows) {
393
+ let tags: string[];
394
+ try {
395
+ tags = JSON.parse(row.tags) as string[];
396
+ } catch {
397
+ continue;
398
+ }
399
+ for (const tag of tags) {
400
+ if (typeof tag === 'string' && tag.length > 0) {
401
+ tagCounts.set(tag, (tagCounts.get(tag) ?? 0) + 1);
402
+ }
403
+ }
404
+ }
405
+
406
+ const allTags = Array.from(tagCounts.keys());
407
+ if (allTags.length < 2) return 0;
408
+
409
+ // Build synonym merge map: minorTag → majorTag
410
+ // Only merge if edit-distance ≤ 1 and major has higher or equal frequency
411
+ const mergeMap = new Map<string, string>(); // minor → major
412
+ const processed = new Set<string>();
413
+
414
+ // Bucket tags by length to reduce comparisons from O(n²) to O(n * avg_bucket_size)
415
+ const buckets = new Map<number, string[]>();
416
+ for (const tag of allTags) {
417
+ const len = tag.length;
418
+ const bucket = buckets.get(len);
419
+ if (bucket) {
420
+ bucket.push(tag);
421
+ } else {
422
+ buckets.set(len, [tag]);
423
+ }
424
+ }
425
+
426
+ for (const a of allTags) {
427
+ if (processed.has(a)) continue;
428
+ // Only compare against tags of the same or adjacent length (edit distance ≤ 1)
429
+ const candidates: string[] = [
430
+ ...(buckets.get(a.length) ?? []),
431
+ ...(buckets.get(a.length - 1) ?? []),
432
+ ...(buckets.get(a.length + 1) ?? []),
433
+ ];
434
+ for (const b of candidates) {
435
+ if (b === a) continue;
436
+ if (processed.has(a) || processed.has(b)) continue;
437
+ if (computeEditDistance(a, b) <= 1) {
438
+ const countA = tagCounts.get(a) ?? 0;
439
+ const countB = tagCounts.get(b) ?? 0;
440
+ // Merge lower-frequency into higher-frequency
441
+ if (countA >= countB) {
442
+ mergeMap.set(b, a);
443
+ processed.add(b);
444
+ } else {
445
+ mergeMap.set(a, b);
446
+ processed.add(a);
447
+ }
448
+ }
449
+ }
450
+ }
451
+
452
+ if (mergeMap.size === 0) return 0;
453
+
454
+ // Apply merges to all affected entries
455
+ let mergeCount = 0;
456
+ const allEntryRows = this.provider.all<{ id: string; tags: string }>(
457
+ 'SELECT id, tags FROM entries WHERE tags IS NOT NULL',
458
+ );
459
+
460
+ for (const row of allEntryRows) {
461
+ let tags: string[];
462
+ try {
463
+ tags = JSON.parse(row.tags) as string[];
464
+ } catch {
465
+ continue;
466
+ }
467
+
468
+ let changed = false;
469
+ const updated = [
470
+ ...new Set(
471
+ tags.map((tag) => {
472
+ const replacement = mergeMap.get(tag);
473
+ if (replacement) {
474
+ changed = true;
475
+ return replacement;
476
+ }
477
+ return tag;
478
+ }),
479
+ ),
480
+ ];
481
+
482
+ if (changed) {
483
+ this.provider.run('UPDATE entries SET tags = ?, updated_at = unixepoch() WHERE id = ?', [
484
+ JSON.stringify(updated),
485
+ row.id,
486
+ ]);
487
+ this.logChange(
488
+ 'synonym_merge',
489
+ row.id,
490
+ JSON.stringify(tags),
491
+ JSON.stringify(updated),
492
+ 'Synonym tag merge (edit-distance ≤ 1)',
493
+ );
494
+ mergeCount++;
495
+ }
496
+ }
497
+
498
+ return mergeCount;
499
+ }
500
+
371
501
  // ─── Consolidation ───────────────────────────────────────────
372
502
 
373
503
  consolidate(options?: ConsolidationOptions): ConsolidationResult {
@@ -419,6 +549,55 @@ export class Curator {
419
549
  }
420
550
  }
421
551
  }
552
+
553
+ // Retag: run all entries through canonical normalization if requested
554
+ let retagged: number | undefined;
555
+ if (options?.retag && options.canonicalTags && options.canonicalTags.length > 0) {
556
+ const tagMode = options.tagConstraintMode ?? 'suggest';
557
+ const metaPrefixes = options.metadataTagPrefixes ?? ['source:'];
558
+ retagged = 0;
559
+
560
+ const entryRows = this.provider.all<{ id: string; tags: string }>(
561
+ 'SELECT id, tags FROM entries WHERE tags IS NOT NULL',
562
+ );
563
+
564
+ for (const row of entryRows) {
565
+ let tags: string[];
566
+ try {
567
+ tags = JSON.parse(row.tags) as string[];
568
+ } catch {
569
+ continue;
570
+ }
571
+
572
+ const normalized = normalizeTagsCanonical(
573
+ tags,
574
+ options.canonicalTags,
575
+ tagMode,
576
+ metaPrefixes,
577
+ );
578
+ const tagsChanged =
579
+ normalized.length !== tags.length || normalized.some((t, i) => t !== tags[i]);
580
+
581
+ if (tagsChanged) {
582
+ if (!dryRun) {
583
+ this.provider.run(
584
+ 'UPDATE entries SET tags = ?, updated_at = unixepoch() WHERE id = ?',
585
+ [JSON.stringify(normalized), row.id],
586
+ );
587
+ this.logChange(
588
+ 'retag',
589
+ row.id,
590
+ JSON.stringify(tags),
591
+ JSON.stringify(normalized),
592
+ 'Canonical retag during consolidation',
593
+ );
594
+ mutations++;
595
+ }
596
+ retagged++;
597
+ }
598
+ }
599
+ }
600
+
422
601
  return {
423
602
  dryRun,
424
603
  duplicates,
@@ -426,6 +605,7 @@ export class Curator {
426
605
  contradictions,
427
606
  mutations,
428
607
  durationMs: Date.now() - start,
608
+ retagged,
429
609
  };
430
610
  }
431
611
 
@@ -62,6 +62,7 @@ export interface GroomAllResult {
62
62
  tagsNormalized: number;
63
63
  staleCount: number;
64
64
  durationMs: number;
65
+ synonymMerges: number;
65
66
  }
66
67
 
67
68
  // ─── Consolidation ──────────────────────────────────────────────────
@@ -71,6 +72,14 @@ export interface ConsolidationOptions {
71
72
  staleDaysThreshold?: number;
72
73
  duplicateThreshold?: number;
73
74
  contradictionThreshold?: number;
75
+ /** When true, run all entries through canonical tag normalization. Dry-run by default. */
76
+ retag?: boolean;
77
+ /** Canonical tag list for retag operation. Required when retag is true. */
78
+ canonicalTags?: string[];
79
+ /** Tag constraint mode for retag. Default: 'suggest'. */
80
+ tagConstraintMode?: 'enforce' | 'suggest' | 'off';
81
+ /** Metadata tag prefixes exempt from canonical normalization. Default: ['source:']. */
82
+ metadataTagPrefixes?: string[];
74
83
  }
75
84
 
76
85
  export interface ConsolidationResult {
@@ -80,6 +89,7 @@ export interface ConsolidationResult {
80
89
  contradictions: Contradiction[];
81
90
  mutations: number;
82
91
  durationMs: number;
92
+ retagged?: number;
83
93
  }
84
94
 
85
95
  // ─── Changelog & Health ─────────────────────────────────────────────
package/src/index.ts CHANGED
@@ -101,6 +101,13 @@ export type {
101
101
  } from './vault/vault-types.js';
102
102
  export { validatePlaybook, parsePlaybookFromEntry } from './vault/playbook.js';
103
103
  export type { Playbook, PlaybookStep, PlaybookValidationResult } from './vault/playbook.js';
104
+ export { DEFAULT_CANONICAL_TAGS } from './vault/default-canonical-tags.js';
105
+ export {
106
+ normalizeTag as normalizeTagCanonical,
107
+ normalizeTags as normalizeTagsCanonical,
108
+ isMetadataTag,
109
+ computeEditDistance,
110
+ } from './vault/tag-normalizer.js';
104
111
 
105
112
  // ─── Playbook System (registry, matching, seeding) ─────────────────
106
113
  export {
@@ -45,22 +45,40 @@ Rules:
45
45
  // CLASSIFIER
46
46
  // =============================================================================
47
47
 
48
+ /**
49
+ * Build the classification system prompt, optionally injecting a canonical tag list.
50
+ * When canonical tags are provided, the LLM is guided to prefer them.
51
+ */
52
+ export function buildClassificationPrompt(canonicalTags?: string[]): string {
53
+ if (!canonicalTags || canonicalTags.length === 0) {
54
+ return CLASSIFICATION_PROMPT;
55
+ }
56
+ const tagList = canonicalTags.join(', ');
57
+ return (
58
+ CLASSIFICATION_PROMPT +
59
+ `\n\nTag guidance: Use only tags from this approved list where possible: ${tagList}. Create a new tag only when nothing from the list fits the concept.`
60
+ );
61
+ }
62
+
48
63
  /**
49
64
  * Classify a text chunk into structured knowledge items using an LLM.
50
65
  *
51
- * @param llm - LLMClient instance
52
- * @param chunkText - The text to classify
53
- * @param citation - Source citation (e.g. "book.pdf, pages 12-15")
66
+ * @param llm - LLMClient instance
67
+ * @param chunkText - The text to classify
68
+ * @param citation - Source citation (e.g. "book.pdf, pages 12-15")
69
+ * @param canonicalTags - Optional canonical tag list to inject into the prompt
54
70
  * @returns Classified items, or [] on any error
55
71
  */
56
72
  export async function classifyChunk(
57
73
  llm: LLMClient,
58
74
  chunkText: string,
59
75
  citation: string,
76
+ canonicalTags?: string[],
60
77
  ): Promise<ClassifiedItem[]> {
61
78
  try {
79
+ const systemPrompt = buildClassificationPrompt(canonicalTags);
62
80
  const result = await llm.complete({
63
- systemPrompt: CLASSIFICATION_PROMPT,
81
+ systemPrompt,
64
82
  userPrompt: chunkText,
65
83
  maxTokens: 4096,
66
84
  temperature: 0.3,
@@ -11,6 +11,7 @@ import type { IntelligenceEntry } from '../intelligence/types.js';
11
11
  import type { ClassifiedItem } from './types.js';
12
12
  import { classifyChunk } from './content-classifier.js';
13
13
  import { dedupItems } from './dedup-gate.js';
14
+ import { normalizeTags as normalizeTagsCanonical } from '../vault/tag-normalizer.js';
14
15
 
15
16
  // ─── Types ───────────────────────────────────────────────────────────
16
17
 
@@ -26,6 +27,12 @@ export interface IngestOptions {
26
27
  tags?: string[];
27
28
  /** Max chars per chunk for LLM classification. Default 4000. */
28
29
  chunkSize?: number;
30
+ /** Canonical tag list for normalization. If omitted, no canonical normalization. */
31
+ canonicalTags?: string[];
32
+ /** Tag constraint mode. Default: 'suggest'. */
33
+ tagConstraintMode?: 'enforce' | 'suggest' | 'off';
34
+ /** Metadata tag prefixes exempt from canonical normalization. Default: ['source:']. */
35
+ metadataTagPrefixes?: string[];
29
36
  }
30
37
 
31
38
  export interface IngestResult {
@@ -42,15 +49,30 @@ const FETCH_TIMEOUT_MS = 15000;
42
49
 
43
50
  // ─── Class ───────────────────────────────────────────────────────────
44
51
 
52
+ interface CanonicalTagConfig {
53
+ canonicalTags: string[];
54
+ tagConstraintMode: 'enforce' | 'suggest' | 'off';
55
+ metadataTagPrefixes: string[];
56
+ }
57
+
45
58
  export class TextIngester {
46
59
  private vault: Vault;
47
60
  private llm: LLMClient | null;
61
+ private canonicalTagConfig: CanonicalTagConfig | null = null;
48
62
 
49
63
  constructor(vault: Vault, llm: LLMClient | null) {
50
64
  this.vault = vault;
51
65
  this.llm = llm;
52
66
  }
53
67
 
68
+ /**
69
+ * Wire canonical tag config from runtime — used as defaults for all ingest calls.
70
+ * Caller-provided options in ingestText/ingestUrl/ingestBatch still take precedence.
71
+ */
72
+ setCanonicalTagConfig(cfg: CanonicalTagConfig): void {
73
+ this.canonicalTagConfig = cfg;
74
+ }
75
+
54
76
  /**
55
77
  * Ingest a URL — fetch, strip HTML, classify, dedup, store.
56
78
  */
@@ -101,11 +123,19 @@ export class TextIngester {
101
123
  const domain = opts?.domain ?? 'general';
102
124
  const extraTags = opts?.tags ?? [];
103
125
 
126
+ // Resolve canonical config — caller opts take precedence over runtime-wired config
127
+ const canonicalTagsForClassify = opts?.canonicalTags ?? this.canonicalTagConfig?.canonicalTags;
128
+
104
129
  // Classify all chunks
105
130
  const allItems: ClassifiedItem[] = [];
106
131
  for (const chunk of chunks) {
107
132
  // oxlint-disable-next-line eslint(no-await-in-loop)
108
- const items = await classifyChunk(this.llm, chunk, `${source.type}: ${source.title}`);
133
+ const items = await classifyChunk(
134
+ this.llm,
135
+ chunk,
136
+ `${source.type}: ${source.title}`,
137
+ canonicalTagsForClassify,
138
+ );
109
139
  allItems.push(...items);
110
140
  }
111
141
 
@@ -121,18 +151,37 @@ export class TextIngester {
121
151
  // Build source attribution for context field
122
152
  const attribution = buildAttribution(source);
123
153
 
154
+ // Metadata tags use 'source:' prefix so they're exempt from canonical normalization
155
+ const metadataTags = [`source:ingested`, `source:${source.type}`];
156
+
157
+ // Apply canonical tag normalization if configured
158
+ // Caller-provided options take precedence over runtime-wired config
159
+ const canonicalTags = opts?.canonicalTags ?? this.canonicalTagConfig?.canonicalTags;
160
+ const tagMode =
161
+ opts?.tagConstraintMode ?? this.canonicalTagConfig?.tagConstraintMode ?? 'suggest';
162
+
124
163
  // Store in vault
125
- const entries: IntelligenceEntry[] = unique.map((item, i) => ({
126
- id: `ingest-${source.type}-${Date.now()}-${i}-${Math.random().toString(36).slice(2, 6)}`,
127
- type: mapType(item.type),
128
- domain,
129
- title: item.title,
130
- description: item.description,
131
- severity: mapSeverity(item.severity),
132
- tags: [...(item.tags ?? []), ...extraTags, 'ingested', source.type],
133
- context: attribution,
134
- origin: 'user' as const,
135
- }));
164
+ const entries: IntelligenceEntry[] = unique.map((item, i) => {
165
+ const rawTags = [...(item.tags ?? []), ...extraTags];
166
+ // metaPrefixes not passed here — source: tags are added after normalization,
167
+ // so there is nothing to exempt at this point.
168
+ const normalizedTags =
169
+ canonicalTags && tagMode !== 'off'
170
+ ? normalizeTagsCanonical(rawTags, canonicalTags, tagMode)
171
+ : rawTags;
172
+
173
+ return {
174
+ id: `ingest-${source.type}-${Date.now()}-${i}-${Math.random().toString(36).slice(2, 6)}`,
175
+ type: mapType(item.type),
176
+ domain,
177
+ title: item.title,
178
+ description: item.description,
179
+ severity: mapSeverity(item.severity),
180
+ tags: [...normalizedTags, ...metadataTags],
181
+ context: attribution,
182
+ origin: 'user' as const,
183
+ };
184
+ });
136
185
 
137
186
  if (entries.length > 0) {
138
187
  this.vault.seed(entries);