@pella-labs/pinakes 0.3.15 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -7
- package/dist/cli/audit-wiki.d.ts +45 -1
- package/dist/cli/audit-wiki.d.ts.map +1 -1
- package/dist/cli/audit-wiki.js +346 -117
- package/dist/cli/audit-wiki.js.map +1 -1
- package/dist/cli/claims.d.ts +49 -0
- package/dist/cli/claims.d.ts.map +1 -0
- package/dist/cli/claims.js +169 -0
- package/dist/cli/claims.js.map +1 -0
- package/dist/cli/contradiction.d.ts +46 -28
- package/dist/cli/contradiction.d.ts.map +1 -1
- package/dist/cli/contradiction.js +182 -115
- package/dist/cli/contradiction.js.map +1 -1
- package/dist/cli/index.js +4 -2
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/progress.d.ts +19 -0
- package/dist/cli/progress.d.ts.map +1 -0
- package/dist/cli/progress.js +44 -0
- package/dist/cli/progress.js.map +1 -0
- package/dist/db/client.js +1 -1
- package/dist/db/migrations/0003_add_pinakes_claims.sql +13 -0
- package/dist/db/migrations/0004_add_confidence_score.sql +12 -0
- package/dist/db/migrations/meta/_journal.json +14 -0
- package/dist/db/schema.d.ts +161 -1
- package/dist/db/schema.d.ts.map +1 -1
- package/dist/db/schema.js +24 -1
- package/dist/db/schema.js.map +1 -1
- package/dist/gate/confidence.d.ts +82 -0
- package/dist/gate/confidence.d.ts.map +1 -0
- package/dist/gate/confidence.js +190 -0
- package/dist/gate/confidence.js.map +1 -0
- package/dist/ingest/ingester.d.ts.map +1 -1
- package/dist/ingest/ingester.js +4 -3
- package/dist/ingest/ingester.js.map +1 -1
- package/dist/ingest/repo-mirror.d.ts.map +1 -1
- package/dist/ingest/repo-mirror.js +5 -1
- package/dist/ingest/repo-mirror.js.map +1 -1
- package/dist/init/copy.d.ts.map +1 -1
- package/dist/init/copy.js +9 -0
- package/dist/init/copy.js.map +1 -1
- package/dist/init/scanner.js +7 -0
- package/dist/init/scanner.js.map +1 -1
- package/dist/mcp/tools/search.d.ts.map +1 -1
- package/dist/mcp/tools/search.js +2 -2
- package/dist/mcp/tools/search.js.map +1 -1
- package/dist/retrieval/fts.d.ts +1 -0
- package/dist/retrieval/fts.d.ts.map +1 -1
- package/dist/retrieval/fts.js +18 -2
- package/dist/retrieval/fts.js.map +1 -1
- package/dist/retrieval/hybrid.d.ts +1 -0
- package/dist/retrieval/hybrid.d.ts.map +1 -1
- package/dist/retrieval/hybrid.js +5 -0
- package/dist/retrieval/hybrid.js.map +1 -1
- package/dist/retrieval/vec.d.ts +1 -0
- package/dist/retrieval/vec.d.ts.map +1 -1
- package/dist/retrieval/vec.js +17 -2
- package/dist/retrieval/vec.js.map +1 -1
- package/dist/sandbox/bindings/pinakes.d.ts.map +1 -1
- package/dist/sandbox/bindings/pinakes.js +9 -2
- package/dist/sandbox/bindings/pinakes.js.map +1 -1
- package/package.json +30 -19
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Topic-claim extraction for the audit-wiki v2 contradiction pipeline (D41).
|
|
3
|
+
*
|
|
4
|
+
* Phase A of the two-phase pipeline:
|
|
5
|
+
* 1. For each wiki file, send content to LLM → extract {topic, claims[]} pairs
|
|
6
|
+
* 2. Persist claims to `pinakes_claims` table
|
|
7
|
+
* 3. Skip unchanged files (incremental via source_sha comparison)
|
|
8
|
+
*/
|
|
9
|
+
import type { Database as BetterSqliteDatabase } from 'better-sqlite3';
|
|
10
|
+
import type { LlmProvider } from '../llm/provider.js';
|
|
11
|
+
export interface ExtractedClaim {
|
|
12
|
+
topic: string;
|
|
13
|
+
claim: string;
|
|
14
|
+
source_uri: string;
|
|
15
|
+
}
|
|
16
|
+
interface TopicClaims {
|
|
17
|
+
topic: string;
|
|
18
|
+
claims: string[];
|
|
19
|
+
}
|
|
20
|
+
export interface ClaimExtractionResult {
|
|
21
|
+
files_processed: number;
|
|
22
|
+
files_skipped: number;
|
|
23
|
+
claims_extracted: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Extract topics and claims from a single file's content via LLM.
|
|
27
|
+
*/
|
|
28
|
+
export declare function extractClaimsFromFile(content: string, sourceUri: string, llmProvider: LlmProvider): Promise<ExtractedClaim[]>;
|
|
29
|
+
/**
|
|
30
|
+
* Parse LLM response, handling JSON wrapped in markdown code fences.
|
|
31
|
+
*/
|
|
32
|
+
export declare function parseExtractionResponse(response: string): TopicClaims[] | null;
|
|
33
|
+
/**
|
|
34
|
+
* Extract claims from all wiki files in a scope, persisting to DB.
|
|
35
|
+
* Skips files whose source_sha hasn't changed since last extraction.
|
|
36
|
+
*/
|
|
37
|
+
export declare function extractAllClaims(writer: BetterSqliteDatabase, scope: string, llmProvider: LlmProvider, onTick?: (sourceUri: string, detail: string) => void): Promise<ClaimExtractionResult>;
|
|
38
|
+
/**
|
|
39
|
+
* Query all persisted claims for a scope, optionally filtered by topic.
|
|
40
|
+
*/
|
|
41
|
+
export declare function queryClaims(reader: BetterSqliteDatabase, scope: string, topic?: string): Array<{
|
|
42
|
+
id: number;
|
|
43
|
+
source_uri: string;
|
|
44
|
+
topic: string;
|
|
45
|
+
claim: string;
|
|
46
|
+
extracted_at: number;
|
|
47
|
+
}>;
|
|
48
|
+
export {};
|
|
49
|
+
//# sourceMappingURL=claims.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claims.d.ts","sourceRoot":"","sources":["../../src/cli/claims.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,QAAQ,IAAI,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAEvE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAOtD,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,UAAU,WAAW;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,qBAAqB;IACpC,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAuBD;;GAEG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,MAAM,EACjB,WAAW,EAAE,WAAW,GACvB,OAAO,CAAC,cAAc,EAAE,CAAC,CAwB3B;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,WAAW,EAAE,GAAG,IAAI,CAwB9E;AAMD;;;GAGG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,WAAW,EACxB,MAAM,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,IAAI,GACnD,OAAO,CAAC,qBAAqB,CAAC,CAsFhC;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,MAAM,GACb,KAAK,CAAC;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,MAAM,CAAA;CAAE,CAAC,CAkB/F"}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Topic-claim extraction for the audit-wiki v2 contradiction pipeline (D41).
|
|
3
|
+
*
|
|
4
|
+
* Phase A of the two-phase pipeline:
|
|
5
|
+
* 1. For each wiki file, send content to LLM → extract {topic, claims[]} pairs
|
|
6
|
+
* 2. Persist claims to `pinakes_claims` table
|
|
7
|
+
* 3. Skip unchanged files (incremental via source_sha comparison)
|
|
8
|
+
*/
|
|
9
|
+
import { logger } from '../observability/logger.js';
|
|
10
|
+
// ----------------------------------------------------------------------------
|
|
11
|
+
// Extraction prompt
|
|
12
|
+
// ----------------------------------------------------------------------------
|
|
13
|
+
const EXTRACTION_SYSTEM = `You are a claim extractor for a knowledge wiki. Given the content of a wiki page, identify the key topics discussed and extract factual claims made about each topic.
|
|
14
|
+
|
|
15
|
+
Return ONLY valid JSON in this format:
|
|
16
|
+
{"topics":[{"topic":"topic name","claims":["claim 1","claim 2"]}]}
|
|
17
|
+
|
|
18
|
+
Rules:
|
|
19
|
+
- Use the most canonical, commonly-used name for each topic (e.g., "authentication" not "auth flow")
|
|
20
|
+
- Each claim should be a single, self-contained factual statement
|
|
21
|
+
- Only extract concrete claims (numbers, versions, choices, constraints), not vague descriptions
|
|
22
|
+
- Group related subtopics under their parent topic
|
|
23
|
+
- Limit to the 5-10 most important topics per page
|
|
24
|
+
- Each topic should have 1-5 claims`;
|
|
25
|
+
// ----------------------------------------------------------------------------
|
|
26
|
+
// Core extraction
|
|
27
|
+
// ----------------------------------------------------------------------------
|
|
28
|
+
/**
|
|
29
|
+
* Extract topics and claims from a single file's content via LLM.
|
|
30
|
+
*/
|
|
31
|
+
export async function extractClaimsFromFile(content, sourceUri, llmProvider) {
|
|
32
|
+
const prompt = `Extract topics and claims from this wiki page:\n\n---\n${content.slice(0, 12000)}\n---`;
|
|
33
|
+
const response = await llmProvider.complete({
|
|
34
|
+
system: EXTRACTION_SYSTEM,
|
|
35
|
+
prompt,
|
|
36
|
+
maxTokens: 2000,
|
|
37
|
+
});
|
|
38
|
+
const parsed = parseExtractionResponse(response);
|
|
39
|
+
if (!parsed)
|
|
40
|
+
return [];
|
|
41
|
+
const claims = [];
|
|
42
|
+
for (const tc of parsed) {
|
|
43
|
+
for (const claim of tc.claims) {
|
|
44
|
+
claims.push({
|
|
45
|
+
topic: tc.topic.toLowerCase().trim(),
|
|
46
|
+
claim: claim.trim(),
|
|
47
|
+
source_uri: sourceUri,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return claims;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Parse LLM response, handling JSON wrapped in markdown code fences.
|
|
55
|
+
*/
|
|
56
|
+
export function parseExtractionResponse(response) {
|
|
57
|
+
try {
|
|
58
|
+
// Try extracting JSON from code fences first
|
|
59
|
+
const fenceMatch = response.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
|
|
60
|
+
const jsonStr = fenceMatch ? fenceMatch[1] : response;
|
|
61
|
+
// Find the JSON object
|
|
62
|
+
const objMatch = jsonStr.match(/\{[\s\S]*\}/);
|
|
63
|
+
if (!objMatch)
|
|
64
|
+
return null;
|
|
65
|
+
const parsed = JSON.parse(objMatch[0]);
|
|
66
|
+
if (!Array.isArray(parsed.topics))
|
|
67
|
+
return null;
|
|
68
|
+
// Validate structure
|
|
69
|
+
return parsed.topics.filter((t) => typeof t.topic === 'string' &&
|
|
70
|
+
t.topic.length > 0 &&
|
|
71
|
+
Array.isArray(t.claims) &&
|
|
72
|
+
t.claims.every((c) => typeof c === 'string'));
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// ----------------------------------------------------------------------------
|
|
79
|
+
// Persistence + incremental extraction
|
|
80
|
+
// ----------------------------------------------------------------------------
|
|
81
|
+
/**
|
|
82
|
+
* Extract claims from all wiki files in a scope, persisting to DB.
|
|
83
|
+
* Skips files whose source_sha hasn't changed since last extraction.
|
|
84
|
+
*/
|
|
85
|
+
export async function extractAllClaims(writer, scope, llmProvider, onTick) {
|
|
86
|
+
// Get all wiki files with their content and hashes
|
|
87
|
+
const files = writer
|
|
88
|
+
.prepare(`SELECT source_uri, source_sha FROM pinakes_nodes WHERE scope = ? GROUP BY source_uri`)
|
|
89
|
+
.all(scope);
|
|
90
|
+
const result = {
|
|
91
|
+
files_processed: 0,
|
|
92
|
+
files_skipped: 0,
|
|
93
|
+
claims_extracted: 0,
|
|
94
|
+
};
|
|
95
|
+
for (const file of files) {
|
|
96
|
+
// Check if this file was already extracted with the same sha
|
|
97
|
+
const lastExtracted = writer
|
|
98
|
+
.prepare(`SELECT value FROM pinakes_meta WHERE key = ?`)
|
|
99
|
+
.get(`claims_sha:${scope}:${file.source_uri}`);
|
|
100
|
+
if (lastExtracted?.value === file.source_sha) {
|
|
101
|
+
result.files_skipped++;
|
|
102
|
+
onTick?.(file.source_uri, 'skipped (unchanged)');
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
// Get the full content for this file's chunks
|
|
106
|
+
const chunks = writer
|
|
107
|
+
.prepare(`SELECT c.text FROM pinakes_chunks c
|
|
108
|
+
JOIN pinakes_nodes n ON c.node_id = n.id
|
|
109
|
+
WHERE n.scope = ? AND n.source_uri = ?
|
|
110
|
+
ORDER BY c.chunk_index`)
|
|
111
|
+
.all(scope, file.source_uri);
|
|
112
|
+
const content = chunks.map((c) => c.text).join('\n\n');
|
|
113
|
+
if (!content.trim()) {
|
|
114
|
+
onTick?.(file.source_uri, 'skipped (empty)');
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
try {
|
|
118
|
+
const claims = await extractClaimsFromFile(content, file.source_uri, llmProvider);
|
|
119
|
+
// Delete old claims for this file and insert new ones
|
|
120
|
+
const now = Date.now();
|
|
121
|
+
writer.exec('BEGIN');
|
|
122
|
+
try {
|
|
123
|
+
writer
|
|
124
|
+
.prepare(`DELETE FROM pinakes_claims WHERE scope = ? AND source_uri = ?`)
|
|
125
|
+
.run(scope, file.source_uri);
|
|
126
|
+
const insertStmt = writer.prepare(`INSERT INTO pinakes_claims (scope, source_uri, topic, claim, extracted_at)
|
|
127
|
+
VALUES (?, ?, ?, ?, ?)`);
|
|
128
|
+
for (const claim of claims) {
|
|
129
|
+
insertStmt.run(scope, file.source_uri, claim.topic, claim.claim, now);
|
|
130
|
+
}
|
|
131
|
+
// Record the sha so we can skip this file next time
|
|
132
|
+
writer
|
|
133
|
+
.prepare(`INSERT OR REPLACE INTO pinakes_meta (key, value) VALUES (?, ?)`)
|
|
134
|
+
.run(`claims_sha:${scope}:${file.source_uri}`, file.source_sha);
|
|
135
|
+
writer.exec('COMMIT');
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
writer.exec('ROLLBACK');
|
|
139
|
+
throw err;
|
|
140
|
+
}
|
|
141
|
+
result.files_processed++;
|
|
142
|
+
result.claims_extracted += claims.length;
|
|
143
|
+
onTick?.(file.source_uri, `${claims.length} claims from ${new Set(claims.map((c) => c.topic)).size} topics`);
|
|
144
|
+
}
|
|
145
|
+
catch (err) {
|
|
146
|
+
logger.warn({ err, source_uri: file.source_uri }, 'claim extraction failed for file');
|
|
147
|
+
onTick?.(file.source_uri, `failed: ${err instanceof Error ? err.message.slice(0, 60) : err}`);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return result;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Query all persisted claims for a scope, optionally filtered by topic.
|
|
154
|
+
*/
|
|
155
|
+
export function queryClaims(reader, scope, topic) {
|
|
156
|
+
if (topic) {
|
|
157
|
+
return reader
|
|
158
|
+
.prepare(`SELECT id, source_uri, topic, claim, extracted_at
|
|
159
|
+
FROM pinakes_claims WHERE scope = ? AND topic = ?
|
|
160
|
+
ORDER BY source_uri`)
|
|
161
|
+
.all(scope, topic);
|
|
162
|
+
}
|
|
163
|
+
return reader
|
|
164
|
+
.prepare(`SELECT id, source_uri, topic, claim, extracted_at
|
|
165
|
+
FROM pinakes_claims WHERE scope = ?
|
|
166
|
+
ORDER BY topic, source_uri`)
|
|
167
|
+
.all(scope);
|
|
168
|
+
}
|
|
169
|
+
//# sourceMappingURL=claims.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"claims.js","sourceRoot":"","sources":["../../src/cli/claims.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAKH,OAAO,EAAE,MAAM,EAAE,MAAM,4BAA4B,CAAC;AAuBpD,+EAA+E;AAC/E,oBAAoB;AACpB,+EAA+E;AAE/E,MAAM,iBAAiB,GAAG;;;;;;;;;;;oCAWU,CAAC;AAErC,+EAA+E;AAC/E,kBAAkB;AAClB,+EAA+E;AAE/E;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,OAAe,EACf,SAAiB,EACjB,WAAwB;IAExB,MAAM,MAAM,GAAG,0DAA0D,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,OAAO,CAAC;IAExG,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,QAAQ,CAAC;QAC1C,MAAM,EAAE,iBAAiB;QACzB,MAAM;QACN,SAAS,EAAE,IAAI;KAChB,CAAC,CAAC;IAEH,MAAM,MAAM,GAAG,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IACjD,IAAI,CAAC,MAAM;QAAE,OAAO,EAAE,CAAC;IAEvB,MAAM,MAAM,GAAqB,EAAE,CAAC;IACpC,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;QACxB,KAAK,MAAM,KAAK,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;YAC9B,MAAM,CAAC,IAAI,CAAC;gBACV,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE;gBACpC,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE;gBACnB,UAAU,EAAE,SAAS;aACtB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,IAAI,CAAC;QACH,6CAA6C;QAC7C,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAC;QACxE,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QAEvD,uBAAuB;QACvB,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;QAC9C,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAE3B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAA+B,CAAC;QACrE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC;YAAE,OAAO,IAAI,CAAC;QAE/C,qBAAqB;QACrB,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,CACzB,CAAC,CAAC,EAAoB,EAAE,CACtB,OAAO,CAAC,CAAC,KAAK,KAAK,QAAQ;YAC3B,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YAClB,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;YACvB,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAC/C,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,+EAA+E;AAC/E,uCAAuC;AACvC,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,MAA4B,EAC5B,KAAa,EACb,WAAwB,EACxB,MAAoD;IAEpD,mDAAmD;IACnD,MAAM,KAAK,GAAG,MAAM;SACjB,OAAO,CACN,sFAAsF,CACvF;SACA,GAAG,CAAC,KAAK,CAAC,CAAC;IAEd,MAAM,MAAM,GAA0B;QACpC,eAAe,EAAE,CAAC;QAClB,aAAa,EAAE,CAAC;QAChB,gBAAgB,EAAE,CAAC;KACpB,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,6DAA6D;QAC7D,MAAM,aAAa,GAAG,MAAM;aACzB,OAAO,CACN,8CAA8C,CAC/C;aACA,GAAG,CAAC,cAAc,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAEjD,IAAI,aAAa,EAAE,KAAK,KAAK,IAAI,CAAC,UAAU,EAAE,CAAC;YAC7C,MAAM,CAAC,aAAa,EAAE,CAAC;YACvB,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,qBAAqB,CAAC,CAAC;YACjD,SAAS;QACX,CAAC;QAED,8CAA8C;QAC9C,MAAM,MAAM,GAAG,MAAM;aAClB,OAAO,CACN;;;gCAGwB,CACzB;aACA,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;QAE/B,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvD,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;YACpB,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAC;YAC7C,SAAS;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,qBAAqB,CAAC,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE,WAAW,CAAC,CAAC;YAElF,sDAAsD;YACtD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACvB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,IAAI,CAAC;gBACH,MAAM;qBACH,OAAO,CAAC,+DAA+D,CAAC;qBACxE,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;gBAE/B,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAC/B;kCACwB,CACzB,CAAC;gBACF,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;oBAC3B,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;gBACxE,CAAC;gBAED,oDAAoD;gBACpD,MAAM;qBACH,OAAO,CACN,gEAAgE,CACjE;qBACA,GAAG,CAAC,cAAc,KAAK,IAAI,IAAI,CAAC,UAAU,EAAE,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;gBAElE,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACxB,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;gBACxB,MAAM,GAAG,CAAC;YACZ,CAAC;YAED,MAAM,CAAC,eAAe,EAAE,CAAC;YACzB,MAAM,CAAC,gBAAgB,IAAI,MAAM,CAAC,MAAM,CAAC;YACzC,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,MAAM,CAAC,MAAM,gBAAgB,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC;QAC/G,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,EAAE,kCAAkC,CAAC,CAAC;YACtF,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,WAAW,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QAChG,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CACzB,MAA4B,EAC5B,KAAa,EACb,KAAc;IAEd,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,MAAM;aACV,OAAO,CACN;;6BAEqB,CACtB;aACA,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IACvB,CAAC;IAED,OAAO,MAAM;SACV,OAAO,CACN;;kCAE4B,CAC7B;SACA,GAAG,CAAC,KAAK,CAAC,CAAC;AAChB,CAAC"}
|
|
@@ -1,59 +1,77 @@
|
|
|
1
|
-
import type { DbBundle } from '../db/client.js';
|
|
2
|
-
import type { LlmProvider } from '../llm/provider.js';
|
|
3
1
|
/**
|
|
4
|
-
* Contradiction detector
|
|
2
|
+
* Contradiction detector v2 (D41 — topic-clustered claim comparison).
|
|
3
|
+
*
|
|
4
|
+
* Two-phase pipeline:
|
|
5
|
+
* Phase A (claims.ts): Per-file LLM extraction of {topic, claims[]}
|
|
6
|
+
* Phase B (this file): Group claims by topic, compare cross-file via LLM
|
|
5
7
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
+
* Topic dedup uses embedding cosine similarity > threshold (default 0.85)
|
|
9
|
+
* to merge terminology variants like "OAuth2" / "OAuth 2.0".
|
|
8
10
|
*/
|
|
11
|
+
import type { DbBundle } from '../db/client.js';
|
|
12
|
+
import type { LlmProvider } from '../llm/provider.js';
|
|
13
|
+
import type { Embedder } from '../retrieval/embedder.js';
|
|
14
|
+
import type { ProgressReporter } from './progress.js';
|
|
9
15
|
export interface ContradictionScanOpts {
|
|
10
16
|
bundle: DbBundle;
|
|
11
17
|
scope: 'project' | 'personal';
|
|
12
18
|
llmProvider: LlmProvider;
|
|
13
19
|
wikiRoot: string;
|
|
20
|
+
embedder?: Embedder;
|
|
21
|
+
topicSimilarity?: number;
|
|
22
|
+
progress?: ProgressReporter;
|
|
14
23
|
}
|
|
15
24
|
export interface Contradiction {
|
|
16
|
-
|
|
17
|
-
|
|
25
|
+
topic: string;
|
|
26
|
+
claimA: {
|
|
27
|
+
claim: string;
|
|
18
28
|
source_uri: string;
|
|
19
|
-
text: string;
|
|
20
29
|
};
|
|
21
|
-
|
|
22
|
-
|
|
30
|
+
claimB: {
|
|
31
|
+
claim: string;
|
|
23
32
|
source_uri: string;
|
|
24
|
-
text: string;
|
|
25
33
|
};
|
|
26
34
|
explanation: string;
|
|
27
35
|
confidence: 'high' | 'medium';
|
|
28
36
|
}
|
|
29
37
|
export interface ContradictionResult {
|
|
30
38
|
scanned_pairs: number;
|
|
39
|
+
topics_scanned: number;
|
|
40
|
+
claims_extracted: number;
|
|
31
41
|
contradictions: Contradiction[];
|
|
32
42
|
rate_limited: boolean;
|
|
33
43
|
}
|
|
34
44
|
/**
|
|
35
|
-
* Run
|
|
45
|
+
* Run contradiction scan using topic-clustered claim comparison (D41).
|
|
46
|
+
* Requires claims to be extracted first (via extractAllClaims).
|
|
36
47
|
*/
|
|
37
48
|
export declare function contradictionScan(opts: ContradictionScanOpts): Promise<ContradictionResult>;
|
|
38
|
-
interface
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
a: ChunkInfo;
|
|
45
|
-
b: ChunkInfo;
|
|
49
|
+
interface TopicGroup {
|
|
50
|
+
topic: string;
|
|
51
|
+
claims: Array<{
|
|
52
|
+
claim: string;
|
|
53
|
+
source_uri: string;
|
|
54
|
+
}>;
|
|
46
55
|
}
|
|
56
|
+
declare function groupByTopic(claims: Array<{
|
|
57
|
+
topic: string;
|
|
58
|
+
claim: string;
|
|
59
|
+
source_uri: string;
|
|
60
|
+
}>): TopicGroup[];
|
|
47
61
|
/**
|
|
48
|
-
*
|
|
49
|
-
*
|
|
50
|
-
* (which is a prerequisite for them to potentially contradict).
|
|
62
|
+
* Merge topic groups whose topic strings are semantically similar
|
|
63
|
+
* (cosine similarity > threshold). Handles "OAuth2" / "OAuth 2.0" merging.
|
|
51
64
|
*/
|
|
52
|
-
declare function
|
|
53
|
-
declare function
|
|
54
|
-
|
|
65
|
+
export declare function deduplicateTopics(groups: TopicGroup[], embedder: Embedder, threshold: number): Promise<TopicGroup[]>;
|
|
66
|
+
declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
|
|
67
|
+
interface ParsedContradiction {
|
|
68
|
+
claim_a: string;
|
|
69
|
+
source_a: string;
|
|
70
|
+
claim_b: string;
|
|
71
|
+
source_b: string;
|
|
55
72
|
explanation: string;
|
|
56
73
|
confidence: string;
|
|
57
|
-
}
|
|
58
|
-
export
|
|
74
|
+
}
|
|
75
|
+
export declare function parseContradictionResponse(response: string): ParsedContradiction[];
|
|
76
|
+
export { groupByTopic as _groupByTopic, cosineSimilarity as _cosineSimilarity };
|
|
59
77
|
//# sourceMappingURL=contradiction.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"contradiction.d.ts","sourceRoot":"","sources":["../../src/cli/contradiction.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"contradiction.d.ts","sourceRoot":"","sources":["../../src/cli/contradiction.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAKH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AAEzD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAMtD,MAAM,WAAW,qBAAqB;IACpC,MAAM,EAAE,QAAQ,CAAC;IACjB,KAAK,EAAE,SAAS,GAAG,UAAU,CAAC;IAC9B,WAAW,EAAE,WAAW,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9C,MAAM,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9C,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,GAAG,QAAQ,CAAC;CAC/B;AAED,MAAM,WAAW,mBAAmB;IAClC,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,MAAM,CAAC;IACzB,cAAc,EAAE,aAAa,EAAE,CAAC;IAChC,YAAY,EAAE,OAAO,CAAC;CACvB;AAoBD;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,IAAI,EAAE,qBAAqB,GAC1B,OAAO,CAAC,mBAAmB,CAAC,CAmG9B;AAMD,UAAU,UAAU;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACtD;AAED,iBAAS,YAAY,CACnB,MAAM,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,GAClE,UAAU,EAAE,CAYd;AAED;;;GAGG;AACH,wBAAsB,iBAAiB,CACrC,MAAM,EAAE,UAAU,EAAE,EACpB,QAAQ,EAAE,QAAQ,EAClB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,UAAU,EAAE,CAAC,CA6CvB;AAED,iBAAS,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CASlE;AAcD,UAAU,mBAAmB;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,GAAG,mBAAmB,EAAE,CAqBlF;AAqCD,OAAO,EAAE,YAAY,IAAI,aAAa,EAAE,gBAAgB,IAAI,iBAAiB,EAAE,CAAC"}
|