@pella-labs/pinakes 0.3.15 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +16 -7
  2. package/dist/cli/audit-wiki.d.ts +45 -1
  3. package/dist/cli/audit-wiki.d.ts.map +1 -1
  4. package/dist/cli/audit-wiki.js +346 -117
  5. package/dist/cli/audit-wiki.js.map +1 -1
  6. package/dist/cli/claims.d.ts +49 -0
  7. package/dist/cli/claims.d.ts.map +1 -0
  8. package/dist/cli/claims.js +169 -0
  9. package/dist/cli/claims.js.map +1 -0
  10. package/dist/cli/contradiction.d.ts +46 -28
  11. package/dist/cli/contradiction.d.ts.map +1 -1
  12. package/dist/cli/contradiction.js +182 -115
  13. package/dist/cli/contradiction.js.map +1 -1
  14. package/dist/cli/index.js +4 -2
  15. package/dist/cli/index.js.map +1 -1
  16. package/dist/cli/progress.d.ts +19 -0
  17. package/dist/cli/progress.d.ts.map +1 -0
  18. package/dist/cli/progress.js +44 -0
  19. package/dist/cli/progress.js.map +1 -0
  20. package/dist/db/client.js +1 -1
  21. package/dist/db/migrations/0003_add_pinakes_claims.sql +13 -0
  22. package/dist/db/migrations/0004_add_confidence_score.sql +12 -0
  23. package/dist/db/migrations/meta/_journal.json +14 -0
  24. package/dist/db/schema.d.ts +161 -1
  25. package/dist/db/schema.d.ts.map +1 -1
  26. package/dist/db/schema.js +24 -1
  27. package/dist/db/schema.js.map +1 -1
  28. package/dist/gate/confidence.d.ts +82 -0
  29. package/dist/gate/confidence.d.ts.map +1 -0
  30. package/dist/gate/confidence.js +190 -0
  31. package/dist/gate/confidence.js.map +1 -0
  32. package/dist/ingest/ingester.d.ts.map +1 -1
  33. package/dist/ingest/ingester.js +4 -3
  34. package/dist/ingest/ingester.js.map +1 -1
  35. package/dist/ingest/repo-mirror.d.ts.map +1 -1
  36. package/dist/ingest/repo-mirror.js +5 -1
  37. package/dist/ingest/repo-mirror.js.map +1 -1
  38. package/dist/init/copy.d.ts.map +1 -1
  39. package/dist/init/copy.js +9 -0
  40. package/dist/init/copy.js.map +1 -1
  41. package/dist/init/scanner.js +7 -0
  42. package/dist/init/scanner.js.map +1 -1
  43. package/dist/mcp/tools/search.d.ts.map +1 -1
  44. package/dist/mcp/tools/search.js +2 -2
  45. package/dist/mcp/tools/search.js.map +1 -1
  46. package/dist/retrieval/fts.d.ts +1 -0
  47. package/dist/retrieval/fts.d.ts.map +1 -1
  48. package/dist/retrieval/fts.js +18 -2
  49. package/dist/retrieval/fts.js.map +1 -1
  50. package/dist/retrieval/hybrid.d.ts +1 -0
  51. package/dist/retrieval/hybrid.d.ts.map +1 -1
  52. package/dist/retrieval/hybrid.js +5 -0
  53. package/dist/retrieval/hybrid.js.map +1 -1
  54. package/dist/retrieval/vec.d.ts +1 -0
  55. package/dist/retrieval/vec.d.ts.map +1 -1
  56. package/dist/retrieval/vec.js +17 -2
  57. package/dist/retrieval/vec.js.map +1 -1
  58. package/dist/sandbox/bindings/pinakes.d.ts.map +1 -1
  59. package/dist/sandbox/bindings/pinakes.js +9 -2
  60. package/dist/sandbox/bindings/pinakes.js.map +1 -1
  61. package/package.json +30 -19
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Topic-claim extraction for the audit-wiki v2 contradiction pipeline (D41).
3
+ *
4
+ * Phase A of the two-phase pipeline:
5
+ * 1. For each wiki file, send content to LLM → extract {topic, claims[]} pairs
6
+ * 2. Persist claims to `pinakes_claims` table
7
+ * 3. Skip unchanged files (incremental via source_sha comparison)
8
+ */
9
+ import type { Database as BetterSqliteDatabase } from 'better-sqlite3';
10
+ import type { LlmProvider } from '../llm/provider.js';
11
+ export interface ExtractedClaim {
12
+ topic: string;
13
+ claim: string;
14
+ source_uri: string;
15
+ }
16
+ interface TopicClaims {
17
+ topic: string;
18
+ claims: string[];
19
+ }
20
+ export interface ClaimExtractionResult {
21
+ files_processed: number;
22
+ files_skipped: number;
23
+ claims_extracted: number;
24
+ }
25
+ /**
26
+ * Extract topics and claims from a single file's content via LLM.
27
+ */
28
+ export declare function extractClaimsFromFile(content: string, sourceUri: string, llmProvider: LlmProvider): Promise<ExtractedClaim[]>;
29
+ /**
30
+ * Parse LLM response, handling JSON wrapped in markdown code fences.
31
+ */
32
+ export declare function parseExtractionResponse(response: string): TopicClaims[] | null;
33
+ /**
34
+ * Extract claims from all wiki files in a scope, persisting to DB.
35
+ * Skips files whose source_sha hasn't changed since last extraction.
36
+ */
37
+ export declare function extractAllClaims(writer: BetterSqliteDatabase, scope: string, llmProvider: LlmProvider, onTick?: (sourceUri: string, detail: string) => void): Promise<ClaimExtractionResult>;
38
+ /**
39
+ * Query all persisted claims for a scope, optionally filtered by topic.
40
+ */
41
+ export declare function queryClaims(reader: BetterSqliteDatabase, scope: string, topic?: string): Array<{
42
+ id: number;
43
+ source_uri: string;
44
+ topic: string;
45
+ claim: string;
46
+ extracted_at: number;
47
+ }>;
48
+ export {};
49
+ //# sourceMappingURL=claims.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claims.d.ts","sourceRoot":"","sources":["../../src/cli/claims.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,QAAQ,IAAI,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAEvE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAOtD,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,UAAU,WAAW;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,qBAAqB;IACpC,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAuBD;;GAEG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,WAAW,GACvB,OAAO,CAAC,cAAc,EAAE,CAAC,CAwB3B;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,WAAW,EAAE,GAAG,IAAI,CAwB9E;AAMD;;;GAGG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,WAAW,EACxB,MAAM,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,GACnD,OAAO,CAAC,qBAAqB,CAAC,CAsFhC;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,MAAM,GACb,KAAK,CAAC;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,MAAM,CAAA;CAAE,CAAC,CAkB/F"}
@@ -0,0 +1,169 @@
1
+ /**
2
+ * Topic-claim extraction for the audit-wiki v2 contradiction pipeline (D41).
3
+ *
4
+ * Phase A of the two-phase pipeline:
5
+ * 1. For each wiki file, send content to LLM → extract {topic, claims[]} pairs
6
+ * 2. Persist claims to `pinakes_claims` table
7
+ * 3. Skip unchanged files (incremental via source_sha comparison)
8
+ */
9
+ import { logger } from '../observability/logger.js';
10
+ // ----------------------------------------------------------------------------
11
+ // Extraction prompt
12
+ // ----------------------------------------------------------------------------
13
+ const EXTRACTION_SYSTEM = `You are a claim extractor for a knowledge wiki. Given the content of a wiki page, identify the key topics discussed and extract factual claims made about each topic.
14
+
15
+ Return ONLY valid JSON in this format:
16
+ {"topics":[{"topic":"topic name","claims":["claim 1","claim 2"]}]}
17
+
18
+ Rules:
19
+ - Use the most canonical, commonly-used name for each topic (e.g., "authentication" not "auth flow")
20
+ - Each claim should be a single, self-contained factual statement
21
+ - Only extract concrete claims (numbers, versions, choices, constraints), not vague descriptions
22
+ - Group related subtopics under their parent topic
23
+ - Limit to the 5-10 most important topics per page
24
+ - Each topic should have 1-5 claims`;
25
+ // ----------------------------------------------------------------------------
26
+ // Core extraction
27
+ // ----------------------------------------------------------------------------
28
+ /**
29
+ * Extract topics and claims from a single file's content via LLM.
30
+ */
31
+ export async function extractClaimsFromFile(content, sourceUri, llmProvider) {
32
+ const prompt = `Extract topics and claims from this wiki page:\n\n---\n${content.slice(0, 12000)}\n---`;
33
+ const response = await llmProvider.complete({
34
+ system: EXTRACTION_SYSTEM,
35
+ prompt,
36
+ maxTokens: 2000,
37
+ });
38
+ const parsed = parseExtractionResponse(response);
39
+ if (!parsed)
40
+ return [];
41
+ const claims = [];
42
+ for (const tc of parsed) {
43
+ for (const claim of tc.claims) {
44
+ claims.push({
45
+ topic: tc.topic.toLowerCase().trim(),
46
+ claim: claim.trim(),
47
+ source_uri: sourceUri,
48
+ });
49
+ }
50
+ }
51
+ return claims;
52
+ }
53
+ /**
54
+ * Parse LLM response, handling JSON wrapped in markdown code fences.
55
+ */
56
+ export function parseExtractionResponse(response) {
57
+ try {
58
+ // Try extracting JSON from code fences first
59
+ const fenceMatch = response.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
60
+ const jsonStr = fenceMatch ? fenceMatch[1] : response;
61
+ // Find the JSON object
62
+ const objMatch = jsonStr.match(/\{[\s\S]*\}/);
63
+ if (!objMatch)
64
+ return null;
65
+ const parsed = JSON.parse(objMatch[0]);
66
+ if (!Array.isArray(parsed.topics))
67
+ return null;
68
+ // Validate structure
69
+ return parsed.topics.filter((t) => typeof t.topic === 'string' &&
70
+ t.topic.length > 0 &&
71
+ Array.isArray(t.claims) &&
72
+ t.claims.every((c) => typeof c === 'string'));
73
+ }
74
+ catch {
75
+ return null;
76
+ }
77
+ }
78
+ // ----------------------------------------------------------------------------
79
+ // Persistence + incremental extraction
80
+ // ----------------------------------------------------------------------------
81
+ /**
82
+ * Extract claims from all wiki files in a scope, persisting to DB.
83
+ * Skips files whose source_sha hasn't changed since last extraction.
84
+ */
85
+ export async function extractAllClaims(writer, scope, llmProvider, onTick) {
86
+ // Get all wiki files with their content and hashes
87
+ const files = writer
88
+ .prepare(`SELECT source_uri, source_sha FROM pinakes_nodes WHERE scope = ? GROUP BY source_uri`)
89
+ .all(scope);
90
+ const result = {
91
+ files_processed: 0,
92
+ files_skipped: 0,
93
+ claims_extracted: 0,
94
+ };
95
+ for (const file of files) {
96
+ // Check if this file was already extracted with the same sha
97
+ const lastExtracted = writer
98
+ .prepare(`SELECT value FROM pinakes_meta WHERE key = ?`)
99
+ .get(`claims_sha:${scope}:${file.source_uri}`);
100
+ if (lastExtracted?.value === file.source_sha) {
101
+ result.files_skipped++;
102
+ onTick?.(file.source_uri, 'skipped (unchanged)');
103
+ continue;
104
+ }
105
+ // Get the full content for this file's chunks
106
+ const chunks = writer
107
+ .prepare(`SELECT c.text FROM pinakes_chunks c
108
+ JOIN pinakes_nodes n ON c.node_id = n.id
109
+ WHERE n.scope = ? AND n.source_uri = ?
110
+ ORDER BY c.chunk_index`)
111
+ .all(scope, file.source_uri);
112
+ const content = chunks.map((c) => c.text).join('\n\n');
113
+ if (!content.trim()) {
114
+ onTick?.(file.source_uri, 'skipped (empty)');
115
+ continue;
116
+ }
117
+ try {
118
+ const claims = await extractClaimsFromFile(content, file.source_uri, llmProvider);
119
+ // Delete old claims for this file and insert new ones
120
+ const now = Date.now();
121
+ writer.exec('BEGIN');
122
+ try {
123
+ writer
124
+ .prepare(`DELETE FROM pinakes_claims WHERE scope = ? AND source_uri = ?`)
125
+ .run(scope, file.source_uri);
126
+ const insertStmt = writer.prepare(`INSERT INTO pinakes_claims (scope, source_uri, topic, claim, extracted_at)
127
+ VALUES (?, ?, ?, ?, ?)`);
128
+ for (const claim of claims) {
129
+ insertStmt.run(scope, file.source_uri, claim.topic, claim.claim, now);
130
+ }
131
+ // Record the sha so we can skip this file next time
132
+ writer
133
+ .prepare(`INSERT OR REPLACE INTO pinakes_meta (key, value) VALUES (?, ?)`)
134
+ .run(`claims_sha:${scope}:${file.source_uri}`, file.source_sha);
135
+ writer.exec('COMMIT');
136
+ }
137
+ catch (err) {
138
+ writer.exec('ROLLBACK');
139
+ throw err;
140
+ }
141
+ result.files_processed++;
142
+ result.claims_extracted += claims.length;
143
+ onTick?.(file.source_uri, `${claims.length} claims from ${new Set(claims.map((c) => c.topic)).size} topics`);
144
+ }
145
+ catch (err) {
146
+ logger.warn({ err, source_uri: file.source_uri }, 'claim extraction failed for file');
147
+ onTick?.(file.source_uri, `failed: ${err instanceof Error ? err.message.slice(0, 60) : err}`);
148
+ }
149
+ }
150
+ return result;
151
+ }
152
+ /**
153
+ * Query all persisted claims for a scope, optionally filtered by topic.
154
+ */
155
+ export function queryClaims(reader, scope, topic) {
156
+ if (topic) {
157
+ return reader
158
+ .prepare(`SELECT id, source_uri, topic, claim, extracted_at
159
+ FROM pinakes_claims WHERE scope = ? AND topic = ?
160
+ ORDER BY source_uri`)
161
+ .all(scope, topic);
162
+ }
163
+ return reader
164
+ .prepare(`SELECT id, source_uri, topic, claim, extracted_at
165
+ FROM pinakes_claims WHERE scope = ?
166
+ ORDER BY topic, source_uri`)
167
+ .all(scope);
168
+ }
169
+ //# sourceMappingURL=claims.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claims.js","sourceRoot":"","sources":["../../src/cli/claims.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAKH,OAAO,EAAE,MAAM,EAAE,MAAM,4BAA4B,CAAC;AAuBpD,+EAA+E;AAC/E,oBAAoB;AACpB,+EAA+E;AAE/E,MAAM,iBAAiB,GAAG;;;;;;;;;;;oCAWU,CAAC;AAErC,+EAA+E;AAC/E,kBAAkB;AAClB,+EAA+E;AAE/E;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,OAAe,EACf,SAAiB,EACjB,WAAwB;IAExB,MAAM,MAAM,GAAG,0DAA0D,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,OAAO,CAAC;IAExG,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,QAAQ,CAAC;QAC1C,MAAM,EAAE,iBAAiB;QACzB,MAAM;QACN,SAAS,EAAE,IAAI;KAChB,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IACjD,IAAI,CAAC,MAAM;QAAE,OAAO,EAAE,CAAC;IAEvB,MAAM,MAAM,GAAqB,EAAE,CAAC;IACpC,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;QACxB,KAAK,MAAM,KAAK,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;YAC9B,MAAM,CAAC,IAAI,CAAC;gBACV,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE;gBACpC,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE;gBACnB,UAAU,EAAE,SAAS;aACtB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,IAAI,CAAC;QACH,6CAA6C;QAC7C,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAC;QACxE,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QAEvD,uBAAuB;QACvB,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC9C,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAE3B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAA+B,CAAC;QACrE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC;YAAE,OAAO,IAAI,CAAC;QAE/C,qBAAqB;QACrB,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,CACzB,CAAC,CAAC,EAAoB,EAAE,CACtB,OAAO,CAAC,CAAC,KAAK,KAAK,QAAQ;YAC3B,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YAClB,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;YACvB,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAC/C,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,+EAA+E;AAC/E,uCAAuC;AACvC,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,MAA4B,EAC5B,KAAa,EACb,WAAwB,EACxB,MAAoD;IAEpD,mDAAmD;IACnD,MAAM,KAAK,GAAG,MAAM;SACjB,OAAO,CACN,sFAAsF,CACvF;SACA,GAAG,CAAC,KAAK,CAAC,CAAC;IAEd,MAAM,MAAM,GAA0B;QACpC,eAAe,EAAE,CAAC;QAClB,aAAa,EAAE,CAAC;QAChB,gBAAgB,EAAE,CAAC;KACpB,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,6DAA6D;QAC7D,MAAM,aAAa,GAAG,MAAM;aACzB,OAAO,CACN,8CAA8C,CAC/C;aACA,GAAG,CAAC,cAAc,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAEjD,IAAI,aAAa,EAAE,KAAK,KAAK,IAAI,CAAC,UAAU,EAAE,CAAC;YAC7C,MAAM,CAAC,aAAa,EAAE,CAAC;YACvB,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,qBAAqB,CAAC,CAAC;YACjD,SAAS;QACX,CAAC;QAED,8CAA8C;QAC9C,MAAM,MAAM,GAAG,MAAM;aAClB,OAAO,CACN;;;gCAGwB,CACzB;aACA,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;QAE/B,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvD,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;YACpB,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAC;YAC7C,SAAS;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,qBAAqB,CAAC,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE,WAAW,CAAC,CAAC;YAElF,sDAAsD;YACtD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACvB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,IAAI,CAAC;gBACH,MAAM;qBACH,OAAO,CAAC,+DAA+D,CAAC;qBACxE,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;gBAE/B,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAC/B;kCACwB,CACzB,CAAC;gBACF,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;oBAC3B,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;gBACxE,CAAC;gBAED,oDAAoD;gBACpD,MAAM;qBACH,OAAO,CACN,gEAAgE,CACjE;qBACA,GAAG,CAAC,cAAc,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;gBAElE,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACxB,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;gBACxB,MAAM,GAAG,CAAC;YACZ,CAAC;YAED,MAAM,CAAC,eAAe,EAAE,CAAC;YACzB,MAAM,CAAC,gBAAgB,IAAI,MAAM,CAAC,MAAM,CAAC;YACzC,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,MAAM,CAAC,MAAM,gBAAgB,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC;QAC/G,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,EAAE,kCAAkC,CAAC,CAAC;YACtF,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,WAAW,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QAChG,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CACzB,MAA4B,EAC5B,KAAa,EACb,KAAc;IAEd,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,MAAM;aACV,OAAO,CACN;;6BAEqB,CACtB;aACA,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IACvB,CAAC;IAED,OAAO,MAAM;SACV,OAAO,CACN;;kCAE4B,CAC7B;SACA,GAAG,CAAC,KAAK,CAAC,CAAC;AAChB,CAAC"}
@@ -1,59 +1,77 @@
1
- import type { DbBundle } from '../db/client.js';
2
- import type { LlmProvider } from '../llm/provider.js';
3
1
  /**
4
- * Contradiction detector CLI command (PRD Phase 8, stretch goal H).
2
+ * Contradiction detector v2 (D41 topic-clustered claim comparison).
3
+ *
4
+ * Two-phase pipeline:
5
+ * Phase A (claims.ts): Per-file LLM extraction of {topic, claims[]}
6
+ * Phase B (this file): Group claims by topic, compare cross-file via LLM
5
7
  *
6
- * Scans wiki chunks for contradictory claims using pairwise LLM judge.
7
- * Rate-limited to 1 scan per hour. Outputs contradictions.md to wiki root.
8
+ * Topic dedup uses embedding cosine similarity > threshold (default 0.85)
9
+ * to merge terminology variants like "OAuth2" / "OAuth 2.0".
8
10
  */
11
+ import type { DbBundle } from '../db/client.js';
12
+ import type { LlmProvider } from '../llm/provider.js';
13
+ import type { Embedder } from '../retrieval/embedder.js';
14
+ import type { ProgressReporter } from './progress.js';
9
15
  export interface ContradictionScanOpts {
10
16
  bundle: DbBundle;
11
17
  scope: 'project' | 'personal';
12
18
  llmProvider: LlmProvider;
13
19
  wikiRoot: string;
20
+ embedder?: Embedder;
21
+ topicSimilarity?: number;
22
+ progress?: ProgressReporter;
14
23
  }
15
24
  export interface Contradiction {
16
- chunkA: {
17
- id: string;
25
+ topic: string;
26
+ claimA: {
27
+ claim: string;
18
28
  source_uri: string;
19
- text: string;
20
29
  };
21
- chunkB: {
22
- id: string;
30
+ claimB: {
31
+ claim: string;
23
32
  source_uri: string;
24
- text: string;
25
33
  };
26
34
  explanation: string;
27
35
  confidence: 'high' | 'medium';
28
36
  }
29
37
  export interface ContradictionResult {
30
38
  scanned_pairs: number;
39
+ topics_scanned: number;
40
+ claims_extracted: number;
31
41
  contradictions: Contradiction[];
32
42
  rate_limited: boolean;
33
43
  }
34
44
  /**
35
- * Run a contradiction scan. Returns immediately if rate-limited.
45
+ * Run contradiction scan using topic-clustered claim comparison (D41).
46
+ * Requires claims to be extracted first (via extractAllClaims).
36
47
  */
37
48
  export declare function contradictionScan(opts: ContradictionScanOpts): Promise<ContradictionResult>;
38
- interface ChunkInfo {
39
- id: string;
40
- source_uri: string;
41
- text: string;
42
- }
43
- interface CandidatePair {
44
- a: ChunkInfo;
45
- b: ChunkInfo;
49
+ interface TopicGroup {
50
+ topic: string;
51
+ claims: Array<{
52
+ claim: string;
53
+ source_uri: string;
54
+ }>;
46
55
  }
56
+ declare function groupByTopic(claims: Array<{
57
+ topic: string;
58
+ claim: string;
59
+ source_uri: string;
60
+ }>): TopicGroup[];
47
61
  /**
48
- * Find candidate pairs for contradiction checking.
49
- * Uses vector similarity to find chunks that are semantically similar
50
- * (which is a prerequisite for them to potentially contradict).
62
+ * Merge topic groups whose topic strings are semantically similar
63
+ * (cosine similarity > threshold). Handles "OAuth2" / "OAuth 2.0" merging.
51
64
  */
52
- declare function findCandidatePairs(bundle: DbBundle, scope: string): CandidatePair[];
53
- declare function parseJudgment(response: string): {
54
- contradicts: boolean;
65
+ export declare function deduplicateTopics(groups: TopicGroup[], embedder: Embedder, threshold: number): Promise<TopicGroup[]>;
66
+ declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
67
+ interface ParsedContradiction {
68
+ claim_a: string;
69
+ source_a: string;
70
+ claim_b: string;
71
+ source_b: string;
55
72
  explanation: string;
56
73
  confidence: string;
57
- } | null;
58
- export { findCandidatePairs as _findCandidatePairs, parseJudgment as _parseJudgment };
74
+ }
75
+ export declare function parseContradictionResponse(response: string): ParsedContradiction[];
76
+ export { groupByTopic as _groupByTopic, cosineSimilarity as _cosineSimilarity };
59
77
  //# sourceMappingURL=contradiction.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"contradiction.d.ts","sourceRoot":"","sources":["../../src/cli/contradiction.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGtD;;;;;GAKG;AAEH,MAAM,WAAW,qBAAqB;IACpC,MAAM,EAAE,QAAQ,CAAC;IACjB,KAAK,EAAE,SAAS,GAAG,UAAU,CAAC;IAC9B,WAAW,EAAE,WAAW,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACzD,MAAM,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACzD,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,GAAG,QAAQ,CAAC;CAC/B;AAED,MAAM,WAAW,mBAAmB;IAClC,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,aAAa,EAAE,CAAC;IAChC,YAAY,EAAE,OAAO,CAAC;CACvB;AAQD;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,qBAAqB,GAC1B,OAAO,CAAC,mBAAmB,CAAC,CAmE9B;AAMD,UAAU,SAAS;IACjB,EAAE,EAAE,MAAM,CAAC;IACX,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;CACd;AAED,UAAU,aAAa;IACrB,CAAC,EAAE,SAAS,CAAC;IACb,CAAC,EAAE,SAAS,CAAC;CACd;AAED;;;;GAIG;AACH,iBAAS,kBAAkB,CAAC,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,GAAG,aAAa,EAAE,CAwD5E;AAED,iBAAS,aAAa,CACpB,QAAQ,EAAE,MAAM,GACf;IAAE,WAAW,EAAE,OAAO,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAkB1E;AAwDD,OAAO,EAAE,kBAAkB,IAAI,mBAAmB,EAAE,aAAa,IAAI,cAAc,EAAE,CAAC"}
1
+ {"version":3,"file":"contradiction.d.ts","sourceRoot":"","sources":["../../src/cli/contradiction.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAKH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAEzD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAMtD,MAAM,WAAW,qBAAqB;IACpC,MAAM,EAAE,QAAQ,CAAC;IACjB,KAAK,EAAE,SAAS,GAAG,UAAU,CAAC;IAC9B,WAAW,EAAE,WAAW,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9C,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9C,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,GAAG,QAAQ,CAAC;CAC/B;AAED,MAAM,WAAW,mBAAmB;IAClC,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,MAAM,CAAC;IACzB,cAAc,EAAE,aAAa,EAAE,CAAC;IAChC,YAAY,EAAE,OAAO,CAAC;CACvB;AAoBD;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,qBAAqB,GAC1B,OAAO,CAAC,mBAAmB,CAAC,CAmG9B;AAMD,UAAU,UAAU;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACtD;AAED,iBAAS,YAAY,CACnB,MAAM,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,GAClE,UAAU,EAAE,CAYd;AAED;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,MAAM,EAAE,UAAU,EAAE,EACpB,QAAQ,EAAE,QAAQ,EAClB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,UAAU,EAAE,CAAC,CA6CvB;AAED,iBAAS,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CASlE;AAcD,UAAU,mBAAmB;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,mBAAmB,EAAE,CAqBlF;AAqCD,OAAO,EAAE,YAAY,IAAI,aAAa,EAAE,gBAAgB,IAAI,iBAAiB,EAAE,CAAC"}