@var-ia/cli 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/dist/src/commands/analyze.d.ts +6 -2
- package/dist/src/commands/analyze.d.ts.map +1 -1
- package/dist/src/commands/analyze.js +240 -14
- package/dist/src/commands/analyze.js.map +1 -1
- package/dist/src/commands/cache.d.ts +2 -2
- package/dist/src/commands/cache.d.ts.map +1 -1
- package/dist/src/commands/cache.js.map +1 -1
- package/dist/src/commands/claim.d.ts +5 -1
- package/dist/src/commands/claim.d.ts.map +1 -1
- package/dist/src/commands/claim.js +34 -5
- package/dist/src/commands/claim.js.map +1 -1
- package/dist/src/commands/eval.d.ts +2 -0
- package/dist/src/commands/eval.d.ts.map +1 -0
- package/dist/src/commands/eval.js +38 -0
- package/dist/src/commands/eval.js.map +1 -0
- package/dist/src/commands/export.d.ts +2 -1
- package/dist/src/commands/export.d.ts.map +1 -1
- package/dist/src/commands/export.js +80 -8
- package/dist/src/commands/export.js.map +1 -1
- package/dist/src/commands/watch.d.ts +1 -1
- package/dist/src/commands/watch.d.ts.map +1 -1
- package/dist/src/commands/watch.js +2 -2
- package/dist/src/commands/watch.js.map +1 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +80 -30
- package/dist/src/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +11 -7
- package/src/commands/analyze.ts +291 -15
- package/src/commands/cache.ts +3 -2
- package/src/commands/claim.ts +39 -5
- package/src/commands/eval.ts +41 -0
- package/src/commands/export.ts +107 -8
- package/src/commands/watch.ts +2 -1
- package/src/index.ts +98 -35
package/src/commands/analyze.ts
CHANGED
|
@@ -1,7 +1,39 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
1
2
|
import { MediaWikiClient } from "@var-ia/ingestion";
|
|
3
|
+
import type { RevisionOptions } from "@var-ia/ingestion";
|
|
2
4
|
import { sectionDiffer, citationTracker, revertDetector, templateTracker } from "@var-ia/analyzers";
|
|
3
|
-
import type {
|
|
5
|
+
import type { TemplateType } from "@var-ia/analyzers";
|
|
6
|
+
import type { EvidenceEvent, EvidenceLayer, Revision, DeterministicFact } from "@var-ia/evidence-graph";
|
|
7
|
+
import { createAdapter } from "@var-ia/interpreter";
|
|
8
|
+
import type { ModelConfig } from "@var-ia/interpreter";
|
|
4
9
|
import { loadCachedRevisions, saveRevisions } from "./cache.js";
|
|
10
|
+
import { stripWikitext, fuzzyFindClaim, findSectionForText } from "./claim.js";
|
|
11
|
+
|
|
12
|
+
interface BatchPageResult {
|
|
13
|
+
pageTitle: string;
|
|
14
|
+
pageId: number;
|
|
15
|
+
eventCount: number;
|
|
16
|
+
events: EvidenceEvent[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface BatchResult {
|
|
20
|
+
mode: "batch";
|
|
21
|
+
batchSize: number;
|
|
22
|
+
pages: BatchPageResult[];
|
|
23
|
+
totalEvents: number;
|
|
24
|
+
generatedAt: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function templateTypeToPolicyDimension(type: TemplateType): string | null {
|
|
28
|
+
switch (type) {
|
|
29
|
+
case "citation": return "verifiability";
|
|
30
|
+
case "neutrality": return "npov";
|
|
31
|
+
case "blp": return "blp";
|
|
32
|
+
case "dispute": return "due_weight";
|
|
33
|
+
case "protection": return "protection";
|
|
34
|
+
default: return null;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
5
37
|
|
|
6
38
|
export async function runAnalyze(
|
|
7
39
|
pageTitle: string,
|
|
@@ -9,8 +41,14 @@ export async function runAnalyze(
|
|
|
9
41
|
fromRevId?: number,
|
|
10
42
|
_toRevId?: number,
|
|
11
43
|
useCache = false,
|
|
12
|
-
|
|
13
|
-
|
|
44
|
+
modelConfig?: ModelConfig,
|
|
45
|
+
apiUrl?: string,
|
|
46
|
+
pagesFile?: string,
|
|
47
|
+
): Promise<{ events: EvidenceEvent[]; revisions: Revision[] }> {
|
|
48
|
+
if (pagesFile) {
|
|
49
|
+
return runBatch(pagesFile, depth, fromRevId, _toRevId, useCache, modelConfig, apiUrl);
|
|
50
|
+
}
|
|
51
|
+
const client = new MediaWikiClient(apiUrl ? { apiUrl } : undefined);
|
|
14
52
|
console.log(`Analyzing "${pageTitle}" at depth: ${depth}...`);
|
|
15
53
|
|
|
16
54
|
let revisions: Revision[] = [];
|
|
@@ -25,8 +63,16 @@ export async function runAnalyze(
|
|
|
25
63
|
|
|
26
64
|
if (revisions.length === 0) {
|
|
27
65
|
console.log(`Fetching revisions from Wikipedia...`);
|
|
28
|
-
const options:
|
|
29
|
-
|
|
66
|
+
const options: RevisionOptions = { direction: "newer" };
|
|
67
|
+
if (fromRevId) {
|
|
68
|
+
options.startRevId = fromRevId;
|
|
69
|
+
}
|
|
70
|
+
if (_toRevId) {
|
|
71
|
+
options.endRevId = _toRevId;
|
|
72
|
+
}
|
|
73
|
+
if (!fromRevId && !_toRevId) {
|
|
74
|
+
options.limit = 20;
|
|
75
|
+
}
|
|
30
76
|
revisions = await client.fetchRevisions(pageTitle, options);
|
|
31
77
|
console.log(`Fetched ${revisions.length} revisions.`);
|
|
32
78
|
|
|
@@ -38,7 +84,7 @@ export async function runAnalyze(
|
|
|
38
84
|
|
|
39
85
|
if (revisions.length < 2) {
|
|
40
86
|
console.log("Need at least 2 revisions to analyze.");
|
|
41
|
-
return [];
|
|
87
|
+
return { events: [], revisions: [] };
|
|
42
88
|
}
|
|
43
89
|
|
|
44
90
|
const events: EvidenceEvent[] = [];
|
|
@@ -46,10 +92,21 @@ export async function runAnalyze(
|
|
|
46
92
|
(a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
|
|
47
93
|
);
|
|
48
94
|
|
|
95
|
+
const allSeenSentences = new Set<string>();
|
|
96
|
+
|
|
49
97
|
for (let i = 1; i < sortedRevs.length; i++) {
|
|
50
98
|
const before = sortedRevs[i - 1];
|
|
51
99
|
const after = sortedRevs[i];
|
|
52
100
|
|
|
101
|
+
const isBrief = depth === "brief";
|
|
102
|
+
const isForensic = depth === "forensic";
|
|
103
|
+
const extraFacts: DeterministicFact[] = isForensic
|
|
104
|
+
? [
|
|
105
|
+
{ fact: "full_wikitext_before", detail: before.content },
|
|
106
|
+
{ fact: "full_wikitext_after", detail: after.content },
|
|
107
|
+
]
|
|
108
|
+
: [];
|
|
109
|
+
|
|
53
110
|
const beforeSections = sectionDiffer.extractSections(before.content);
|
|
54
111
|
const afterSections = sectionDiffer.extractSections(after.content);
|
|
55
112
|
const sectionChanges = sectionDiffer.diffSections(beforeSections, afterSections);
|
|
@@ -72,10 +129,11 @@ export async function runAnalyze(
|
|
|
72
129
|
fromRevisionId: before.revId,
|
|
73
130
|
toRevisionId: after.revId,
|
|
74
131
|
section: "body",
|
|
75
|
-
before: cit.before?.raw ?? "",
|
|
76
|
-
after: cit.after?.raw ?? "",
|
|
132
|
+
before: isBrief ? "" : (cit.before?.raw ?? ""),
|
|
133
|
+
after: isBrief ? "" : (cit.after?.raw ?? ""),
|
|
77
134
|
deterministicFacts: [
|
|
78
135
|
{ fact: "citation_changed", detail: `type=${cit.type}` },
|
|
136
|
+
...extraFacts,
|
|
79
137
|
],
|
|
80
138
|
layer,
|
|
81
139
|
timestamp: after.timestamp,
|
|
@@ -84,17 +142,40 @@ export async function runAnalyze(
|
|
|
84
142
|
|
|
85
143
|
for (const tpl of templateChanges) {
|
|
86
144
|
if (tpl.type === "unchanged") continue;
|
|
145
|
+
|
|
146
|
+
if (tpl.template.type === "protection") {
|
|
147
|
+
events.push({
|
|
148
|
+
eventType: "protection_changed",
|
|
149
|
+
fromRevisionId: before.revId,
|
|
150
|
+
toRevisionId: after.revId,
|
|
151
|
+
section: "body",
|
|
152
|
+
before: tpl.type === "removed" ? tpl.template.name : "",
|
|
153
|
+
after: tpl.type === "added" ? tpl.template.name : "",
|
|
154
|
+
deterministicFacts: [
|
|
155
|
+
{ fact: "protection_changed", detail: `name=${tpl.template.name} type=${tpl.type}` },
|
|
156
|
+
...extraFacts,
|
|
157
|
+
],
|
|
158
|
+
layer: "policy_coded",
|
|
159
|
+
timestamp: after.timestamp,
|
|
160
|
+
});
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const policyDimension = templateTypeToPolicyDimension(tpl.template.type);
|
|
165
|
+
const layer: EvidenceLayer = policyDimension ? "policy_coded" : "observed";
|
|
87
166
|
events.push({
|
|
88
167
|
eventType: tpl.type === "added" ? "template_added" : "template_removed",
|
|
89
168
|
fromRevisionId: before.revId,
|
|
90
169
|
toRevisionId: after.revId,
|
|
91
170
|
section: "body",
|
|
92
171
|
before: "",
|
|
93
|
-
after: tpl.template.name,
|
|
172
|
+
after: isBrief ? "" : tpl.template.name,
|
|
94
173
|
deterministicFacts: [
|
|
95
174
|
{ fact: "template_changed", detail: `name=${tpl.template.name} type=${tpl.type}` },
|
|
175
|
+
...(policyDimension ? [{ fact: "policy_signal", detail: `dimension=${policyDimension} signal=${tpl.template.name.toLowerCase().replace(/\s+/g, "_")}` }] : []),
|
|
176
|
+
...extraFacts,
|
|
96
177
|
],
|
|
97
|
-
layer
|
|
178
|
+
layer,
|
|
98
179
|
timestamp: after.timestamp,
|
|
99
180
|
});
|
|
100
181
|
}
|
|
@@ -106,10 +187,11 @@ export async function runAnalyze(
|
|
|
106
187
|
fromRevisionId: before.revId,
|
|
107
188
|
toRevisionId: after.revId,
|
|
108
189
|
section: sc.section,
|
|
109
|
-
before: sc.fromContent ?? "",
|
|
110
|
-
after: sc.toContent ?? "",
|
|
190
|
+
before: isBrief ? "" : (sc.fromContent ?? ""),
|
|
191
|
+
after: isBrief ? "" : (sc.toContent ?? ""),
|
|
111
192
|
deterministicFacts: [
|
|
112
193
|
{ fact: "section_changed", detail: `change=${sc.changeType}` },
|
|
194
|
+
...extraFacts,
|
|
113
195
|
],
|
|
114
196
|
layer: "observed",
|
|
115
197
|
timestamp: after.timestamp,
|
|
@@ -123,15 +205,209 @@ export async function runAnalyze(
|
|
|
123
205
|
toRevisionId: after.revId,
|
|
124
206
|
section: "",
|
|
125
207
|
before: "",
|
|
126
|
-
after: after.comment,
|
|
208
|
+
after: isBrief ? "" : after.comment,
|
|
127
209
|
deterministicFacts: [
|
|
128
210
|
{ fact: "revert_detected", detail: after.comment },
|
|
211
|
+
{ fact: "policy_signal", detail: "dimension=edit_warring signal=revert_detected" },
|
|
212
|
+
...extraFacts,
|
|
129
213
|
],
|
|
130
|
-
layer: "
|
|
214
|
+
layer: "policy_coded",
|
|
131
215
|
timestamp: after.timestamp,
|
|
132
216
|
});
|
|
133
217
|
}
|
|
218
|
+
|
|
219
|
+
const leadChange = sectionChanges.find(
|
|
220
|
+
sc => sc.section === "(lead)" && sc.changeType === "modified"
|
|
221
|
+
);
|
|
222
|
+
if (leadChange) {
|
|
223
|
+
const fromLen = leadChange.fromContent?.length ?? 0;
|
|
224
|
+
const toLen = leadChange.toContent?.length ?? 0;
|
|
225
|
+
const contentMovedOut = fromLen > toLen && toLen < fromLen * 0.5;
|
|
226
|
+
const contentMovedIn = toLen > fromLen && fromLen < toLen * 0.5;
|
|
227
|
+
|
|
228
|
+
if (contentMovedOut) {
|
|
229
|
+
const targetSection = sectionChanges.find(
|
|
230
|
+
sc => sc.section !== "(lead)" && (sc.changeType === "added" || sc.changeType === "modified")
|
|
231
|
+
);
|
|
232
|
+
if (targetSection) {
|
|
233
|
+
events.push({
|
|
234
|
+
eventType: "lead_demotion",
|
|
235
|
+
fromRevisionId: before.revId,
|
|
236
|
+
toRevisionId: after.revId,
|
|
237
|
+
section: targetSection.section,
|
|
238
|
+
before: isBrief ? "" : (leadChange.fromContent ?? ""),
|
|
239
|
+
after: isBrief ? "" : (leadChange.toContent ?? ""),
|
|
240
|
+
deterministicFacts: [
|
|
241
|
+
{ fact: "lead_content_moved", detail: `from=lead to=${targetSection.section}` },
|
|
242
|
+
...extraFacts,
|
|
243
|
+
],
|
|
244
|
+
layer: "observed",
|
|
245
|
+
timestamp: after.timestamp,
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
} else if (contentMovedIn) {
|
|
249
|
+
const sourceSection = sectionChanges.find(
|
|
250
|
+
sc => sc.section !== "(lead)" && (sc.changeType === "removed" || sc.changeType === "modified")
|
|
251
|
+
);
|
|
252
|
+
if (sourceSection) {
|
|
253
|
+
events.push({
|
|
254
|
+
eventType: "lead_promotion",
|
|
255
|
+
fromRevisionId: before.revId,
|
|
256
|
+
toRevisionId: after.revId,
|
|
257
|
+
section: sourceSection.section,
|
|
258
|
+
before: isBrief ? "" : (leadChange.fromContent ?? ""),
|
|
259
|
+
after: isBrief ? "" : (leadChange.toContent ?? ""),
|
|
260
|
+
deterministicFacts: [
|
|
261
|
+
{ fact: "lead_content_moved", detail: `from=${sourceSection.section} to=lead` },
|
|
262
|
+
...extraFacts,
|
|
263
|
+
],
|
|
264
|
+
layer: "observed",
|
|
265
|
+
timestamp: after.timestamp,
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const beforePlain = stripWikitext(before.content);
|
|
272
|
+
const afterPlain = stripWikitext(after.content);
|
|
273
|
+
|
|
274
|
+
const beforeSentences = beforePlain.split(/[.!?]\s+/).filter(s => s.trim().length > 20);
|
|
275
|
+
const afterSentences = afterPlain.split(/[.!?]\s+/).filter(s => s.trim().length > 20);
|
|
276
|
+
|
|
277
|
+
for (const sentence of afterSentences) {
|
|
278
|
+
const trimmed = sentence.trim();
|
|
279
|
+
if (!trimmed) continue;
|
|
280
|
+
const foundInBefore = fuzzyFindClaim(trimmed, beforePlain);
|
|
281
|
+
if (!foundInBefore) {
|
|
282
|
+
const normalized = trimmed.toLowerCase().replace(/\s+/g, " ");
|
|
283
|
+
const wasSeenBefore = allSeenSentences.has(normalized);
|
|
284
|
+
const section = findSectionForText(after.content, trimmed);
|
|
285
|
+
events.push({
|
|
286
|
+
eventType: wasSeenBefore ? "claim_reintroduced" : "claim_first_seen",
|
|
287
|
+
fromRevisionId: before.revId,
|
|
288
|
+
toRevisionId: after.revId,
|
|
289
|
+
section,
|
|
290
|
+
before: "",
|
|
291
|
+
after: isBrief ? "" : trimmed,
|
|
292
|
+
deterministicFacts: [
|
|
293
|
+
{ fact: "claim_detected", detail: `sentence_length=${trimmed.length}` },
|
|
294
|
+
...extraFacts,
|
|
295
|
+
],
|
|
296
|
+
layer: "observed",
|
|
297
|
+
timestamp: after.timestamp,
|
|
298
|
+
});
|
|
299
|
+
} else {
|
|
300
|
+
const oldLen = foundInBefore.length;
|
|
301
|
+
const newLen = trimmed.length;
|
|
302
|
+
if (Math.abs(newLen - oldLen) > oldLen * 0.2) {
|
|
303
|
+
const section = findSectionForText(after.content, trimmed);
|
|
304
|
+
events.push({
|
|
305
|
+
eventType: "claim_reworded",
|
|
306
|
+
fromRevisionId: before.revId,
|
|
307
|
+
toRevisionId: after.revId,
|
|
308
|
+
section,
|
|
309
|
+
before: isBrief ? "" : foundInBefore,
|
|
310
|
+
after: isBrief ? "" : trimmed,
|
|
311
|
+
deterministicFacts: [
|
|
312
|
+
{ fact: "claim_reworded", detail: `old_length=${oldLen} new_length=${newLen}` },
|
|
313
|
+
...extraFacts,
|
|
314
|
+
],
|
|
315
|
+
layer: "observed",
|
|
316
|
+
timestamp: after.timestamp,
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
for (const sentence of beforeSentences) {
|
|
323
|
+
const trimmed = sentence.trim();
|
|
324
|
+
if (!trimmed) continue;
|
|
325
|
+
const foundInAfter = fuzzyFindClaim(trimmed, afterPlain);
|
|
326
|
+
if (!foundInAfter) {
|
|
327
|
+
const section = findSectionForText(before.content, trimmed);
|
|
328
|
+
events.push({
|
|
329
|
+
eventType: "claim_removed",
|
|
330
|
+
fromRevisionId: before.revId,
|
|
331
|
+
toRevisionId: after.revId,
|
|
332
|
+
section,
|
|
333
|
+
before: isBrief ? "" : trimmed,
|
|
334
|
+
after: "",
|
|
335
|
+
deterministicFacts: [
|
|
336
|
+
{ fact: "claim_removed", detail: `sentence_length=${trimmed.length}` },
|
|
337
|
+
...extraFacts,
|
|
338
|
+
],
|
|
339
|
+
layer: "observed",
|
|
340
|
+
timestamp: after.timestamp,
|
|
341
|
+
});
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
for (const s of afterSentences) {
|
|
346
|
+
const normalized = s.trim().toLowerCase().replace(/\s+/g, " ");
|
|
347
|
+
if (normalized) allSeenSentences.add(normalized);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
if (modelConfig && events.length > 0) {
|
|
352
|
+
const adapter = createAdapter(modelConfig);
|
|
353
|
+
console.log(`Interpreting ${events.length} events with ${modelConfig.provider}...`);
|
|
354
|
+
const interpreted = await adapter.interpret(events);
|
|
355
|
+
for (let i = 0; i < interpreted.length; i++) {
|
|
356
|
+
interpreted[i].layer = events[i].layer;
|
|
357
|
+
}
|
|
358
|
+
console.log("Interpretation complete.");
|
|
359
|
+
return { events: interpreted, revisions: sortedRevs };
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
return { events, revisions: sortedRevs };
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
async function runBatch(
|
|
366
|
+
pagesFile: string,
|
|
367
|
+
depth: string,
|
|
368
|
+
fromRevId?: number,
|
|
369
|
+
toRevId?: number,
|
|
370
|
+
useCache = false,
|
|
371
|
+
modelConfig?: ModelConfig,
|
|
372
|
+
apiUrl?: string,
|
|
373
|
+
): Promise<{ events: EvidenceEvent[]; revisions: Revision[] }> {
|
|
374
|
+
const content = readFileSync(pagesFile, "utf-8");
|
|
375
|
+
const titles = content
|
|
376
|
+
.split("\n")
|
|
377
|
+
.map(l => l.trim())
|
|
378
|
+
.filter(l => l.length > 0 && !l.startsWith("#"));
|
|
379
|
+
|
|
380
|
+
console.log(`Batch mode: ${titles.length} pages from ${pagesFile}\n`);
|
|
381
|
+
|
|
382
|
+
const pages: BatchPageResult[] = [];
|
|
383
|
+
const allEvents: EvidenceEvent[] = [];
|
|
384
|
+
|
|
385
|
+
for (const title of titles) {
|
|
386
|
+
console.log(`--- Page ${pages.length + 1}/${titles.length}: ${title} ---`);
|
|
387
|
+
const { events } = await runAnalyze(title, depth, fromRevId, toRevId, useCache, modelConfig, apiUrl, undefined);
|
|
388
|
+
pages.push({
|
|
389
|
+
pageTitle: title,
|
|
390
|
+
pageId: 0,
|
|
391
|
+
eventCount: events.length,
|
|
392
|
+
events,
|
|
393
|
+
});
|
|
394
|
+
allEvents.push(...events);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
const result: BatchResult = {
|
|
398
|
+
mode: "batch",
|
|
399
|
+
batchSize: titles.length,
|
|
400
|
+
pages,
|
|
401
|
+
totalEvents: allEvents.length,
|
|
402
|
+
generatedAt: new Date().toISOString(),
|
|
403
|
+
};
|
|
404
|
+
|
|
405
|
+
console.log(`\n=== Batch Results ===`);
|
|
406
|
+
console.log(`Pages processed: ${result.batchSize}`);
|
|
407
|
+
console.log(`Total events: ${result.totalEvents}\n`);
|
|
408
|
+
for (const p of result.pages) {
|
|
409
|
+
console.log(` ${p.pageTitle}: ${p.eventCount} events`);
|
|
134
410
|
}
|
|
135
411
|
|
|
136
|
-
return events;
|
|
412
|
+
return { events: allEvents, revisions: [] };
|
|
137
413
|
}
|
package/src/commands/cache.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { Persistence } from "@var-ia/persistence";
|
|
2
|
+
import type { PersistenceAdapter } from "@var-ia/persistence";
|
|
2
3
|
import type { Revision } from "@var-ia/evidence-graph";
|
|
3
4
|
import { existsSync, mkdirSync } from "node:fs";
|
|
4
5
|
import { homedir } from "node:os";
|
|
@@ -7,9 +8,9 @@ import { join } from "node:path";
|
|
|
7
8
|
const CACHE_DIR = join(homedir(), ".wikihistory");
|
|
8
9
|
const DB_PATH = join(CACHE_DIR, "varia.db");
|
|
9
10
|
|
|
10
|
-
let _instance:
|
|
11
|
+
let _instance: PersistenceAdapter | null = null;
|
|
11
12
|
|
|
12
|
-
export function getPersistence():
|
|
13
|
+
export function getPersistence(): PersistenceAdapter {
|
|
13
14
|
if (!_instance) {
|
|
14
15
|
if (!existsSync(CACHE_DIR)) {
|
|
15
16
|
mkdirSync(CACHE_DIR, { recursive: true });
|
package/src/commands/claim.ts
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
import { MediaWikiClient } from "@var-ia/ingestion";
|
|
2
2
|
import { createClaimIdentity } from "@var-ia/evidence-graph";
|
|
3
|
-
import type { ClaimState, Revision } from "@var-ia/evidence-graph";
|
|
3
|
+
import type { ClaimState, EvidenceEvent, Revision } from "@var-ia/evidence-graph";
|
|
4
|
+
import { createAdapter } from "@var-ia/interpreter";
|
|
5
|
+
import type { ModelConfig } from "@var-ia/interpreter";
|
|
4
6
|
import { loadCachedRevisions, saveRevisions } from "./cache.js";
|
|
5
7
|
|
|
6
8
|
export async function runClaim(
|
|
7
9
|
pageTitle: string,
|
|
8
10
|
claimText: string,
|
|
9
11
|
useCache = false,
|
|
12
|
+
modelConfig?: ModelConfig,
|
|
13
|
+
apiUrl?: string,
|
|
10
14
|
): Promise<void> {
|
|
11
|
-
const client = new MediaWikiClient();
|
|
15
|
+
const client = new MediaWikiClient(apiUrl ? { apiUrl } : undefined);
|
|
12
16
|
console.log(`Tracking claim in "${pageTitle}"...`);
|
|
13
17
|
console.log(`Claim text: "${claimText}"\n`);
|
|
14
18
|
|
|
@@ -97,6 +101,36 @@ export async function runClaim(
|
|
|
97
101
|
return;
|
|
98
102
|
}
|
|
99
103
|
|
|
104
|
+
if (modelConfig && variants.length >= 2) {
|
|
105
|
+
const adapter = createAdapter(modelConfig);
|
|
106
|
+
const comparisonEvents: EvidenceEvent[] = [];
|
|
107
|
+
for (let i = 1; i < variants.length; i++) {
|
|
108
|
+
comparisonEvents.push({
|
|
109
|
+
eventType: "claim_reworded",
|
|
110
|
+
fromRevisionId: variants[i - 1].revisionId,
|
|
111
|
+
toRevisionId: variants[i].revisionId,
|
|
112
|
+
section: variants[i].section,
|
|
113
|
+
before: variants[i - 1].text,
|
|
114
|
+
after: variants[i].text,
|
|
115
|
+
deterministicFacts: [
|
|
116
|
+
{ fact: "claim_variant_compared", detail: `pair=${i}` },
|
|
117
|
+
],
|
|
118
|
+
layer: "observed",
|
|
119
|
+
timestamp: variants[i].observedAt,
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (comparisonEvents.length > 0) {
|
|
124
|
+
console.log(`\nSemantically comparing ${comparisonEvents.length} claim variant pairs with ${modelConfig.provider}...`);
|
|
125
|
+
const interpreted = await adapter.interpret(comparisonEvents);
|
|
126
|
+
for (const ie of interpreted) {
|
|
127
|
+
const conf = ie.modelInterpretation.confidence;
|
|
128
|
+
const label = conf >= 0.7 ? "similar" : conf >= 0.4 ? "moderate change" : "substantial change";
|
|
129
|
+
console.log(`[rev ${ie.fromRevisionId}→${ie.toRevisionId}] ${label} (confidence: ${conf.toFixed(2)}) — ${ie.modelInterpretation.semanticChange}`);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
100
134
|
if (currentState !== "absent" && currentState !== "deleted") {
|
|
101
135
|
currentState = "stabilizing";
|
|
102
136
|
}
|
|
@@ -112,7 +146,7 @@ export async function runClaim(
|
|
|
112
146
|
|
|
113
147
|
export { runClaim as runClaimCommand };
|
|
114
148
|
|
|
115
|
-
function stripWikitext(wikitext: string): string {
|
|
149
|
+
export function stripWikitext(wikitext: string): string {
|
|
116
150
|
let text = wikitext;
|
|
117
151
|
text = text.replace(/<!--[\s\S]*?-->/g, "");
|
|
118
152
|
text = text.replace(/<ref\b[^>]*\/\s*>/gi, "");
|
|
@@ -127,7 +161,7 @@ function stripWikitext(wikitext: string): string {
|
|
|
127
161
|
return text.trim();
|
|
128
162
|
}
|
|
129
163
|
|
|
130
|
-
function fuzzyFindClaim(claimText: string, plainText: string): string {
|
|
164
|
+
export function fuzzyFindClaim(claimText: string, plainText: string): string {
|
|
131
165
|
const normalized = claimText.toLowerCase().replace(/\s+/g, " ").trim();
|
|
132
166
|
const searchText = plainText.toLowerCase().replace(/\s+/g, " ");
|
|
133
167
|
|
|
@@ -147,7 +181,7 @@ function fuzzyFindClaim(claimText: string, plainText: string): string {
|
|
|
147
181
|
return "";
|
|
148
182
|
}
|
|
149
183
|
|
|
150
|
-
function findSectionForText(wikitext: string, plainText: string): string {
|
|
184
|
+
export function findSectionForText(wikitext: string, plainText: string): string {
|
|
151
185
|
const headerRegex = /^(=+)\s*([^=]+?)\s*\1$/gm;
|
|
152
186
|
const lines = wikitext.split("\n");
|
|
153
187
|
let currentSection = "(lead)";
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { createEvalHarness } from "@var-ia/eval";
|
|
2
|
+
import { runAnalyze } from "./analyze.js";
|
|
3
|
+
|
|
4
|
+
export async function runEval(pageTitleOverride?: string): Promise<void> {
|
|
5
|
+
const harness = createEvalHarness();
|
|
6
|
+
const testCases = harness.benchmarkPages();
|
|
7
|
+
|
|
8
|
+
const filtered = pageTitleOverride
|
|
9
|
+
? testCases.filter(t => t.pageTitle === pageTitleOverride)
|
|
10
|
+
: testCases;
|
|
11
|
+
|
|
12
|
+
console.log(`Running ${filtered.length} benchmark tests...\n`);
|
|
13
|
+
|
|
14
|
+
const results = [];
|
|
15
|
+
for (const test of filtered) {
|
|
16
|
+
console.log(`[${test.id}] ${test.description}...`);
|
|
17
|
+
try {
|
|
18
|
+
const { events } = await runAnalyze(test.pageTitle, "detailed");
|
|
19
|
+
const result = harness.evaluate(test, events);
|
|
20
|
+
results.push(result);
|
|
21
|
+
const icon = result.passed ? "PASS" : "FAIL";
|
|
22
|
+
console.log(` ${icon} precision=${result.precision.toFixed(2)} events=${result.eventCount.actual}/${result.eventCount.expected}`);
|
|
23
|
+
} catch (err) {
|
|
24
|
+
console.log(` ERROR: ${err}`);
|
|
25
|
+
results.push({
|
|
26
|
+
testId: test.id,
|
|
27
|
+
passed: false,
|
|
28
|
+
precision: 0,
|
|
29
|
+
eventCount: { expected: test.expectedEvents.length, actual: 0 },
|
|
30
|
+
matches: [],
|
|
31
|
+
misses: test.expectedEvents.map(e => ({ expected: e })),
|
|
32
|
+
falsePositives: [],
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const summary = harness.computeScores(results);
|
|
38
|
+
console.log(`\n=== Eval Summary ===`);
|
|
39
|
+
console.log(`Passed: ${summary.testsPassed}/${summary.totalTests}`);
|
|
40
|
+
console.log(`Overall precision: ${(summary.overallPrecision * 100).toFixed(1)}%`);
|
|
41
|
+
}
|
package/src/commands/export.ts
CHANGED
|
@@ -1,11 +1,34 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
1
2
|
import { runAnalyze } from "./analyze.js";
|
|
2
|
-
import
|
|
3
|
+
import { createEventIdentity } from "@var-ia/evidence-graph";
|
|
4
|
+
import type { EvidenceEvent, Report, PolicySignal, Revision } from "@var-ia/evidence-graph";
|
|
5
|
+
import type { ModelConfig } from "@var-ia/interpreter";
|
|
6
|
+
|
|
7
|
+
interface EvidenceBundle {
|
|
8
|
+
format: "varia-evidence-bundle/v1";
|
|
9
|
+
generatedAt: string;
|
|
10
|
+
pageTitle: string;
|
|
11
|
+
revisionRange: { from: number; to: number };
|
|
12
|
+
inputRevisions: Revision[];
|
|
13
|
+
outputEvents: EvidenceEvent[];
|
|
14
|
+
bundleHash: string;
|
|
15
|
+
}
|
|
3
16
|
|
|
4
17
|
export async function runExport(
|
|
5
18
|
pageTitle: string,
|
|
6
19
|
format: string,
|
|
20
|
+
modelConfig?: ModelConfig,
|
|
21
|
+
apiUrl?: string,
|
|
22
|
+
bundle?: boolean,
|
|
7
23
|
): Promise<void> {
|
|
8
|
-
|
|
24
|
+
if (bundle) {
|
|
25
|
+
const { events, revisions } = await runAnalyze(pageTitle, "detailed", undefined, undefined, false, modelConfig, apiUrl);
|
|
26
|
+
const bundleData = buildBundle(pageTitle, events, revisions);
|
|
27
|
+
console.log(JSON.stringify(bundleData, null, 2));
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const { events } = await runAnalyze(pageTitle, "detailed", undefined, undefined, false, modelConfig, apiUrl);
|
|
9
32
|
|
|
10
33
|
if (events.length === 0) {
|
|
11
34
|
console.log("No events to export.");
|
|
@@ -22,10 +45,49 @@ export async function runExport(
|
|
|
22
45
|
}
|
|
23
46
|
}
|
|
24
47
|
|
|
48
|
+
function buildBundle(pageTitle: string, events: EvidenceEvent[], revisions: Revision[]): EvidenceBundle {
|
|
49
|
+
const from = revisions[0]?.revId ?? 0;
|
|
50
|
+
const to = revisions[revisions.length - 1]?.revId ?? 0;
|
|
51
|
+
|
|
52
|
+
const taggedEvents = events.map((e) => ({
|
|
53
|
+
...e,
|
|
54
|
+
eventId: e.eventId ?? createEventIdentity(e),
|
|
55
|
+
}));
|
|
56
|
+
|
|
57
|
+
const bundle = {
|
|
58
|
+
format: "varia-evidence-bundle/v1" as const,
|
|
59
|
+
generatedAt: new Date().toISOString(),
|
|
60
|
+
pageTitle,
|
|
61
|
+
revisionRange: { from, to },
|
|
62
|
+
inputRevisions: revisions,
|
|
63
|
+
outputEvents: taggedEvents,
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
const bundleHash = createHash("sha256")
|
|
67
|
+
.update(JSON.stringify(bundle))
|
|
68
|
+
.digest("hex");
|
|
69
|
+
|
|
70
|
+
return { ...bundle, bundleHash };
|
|
71
|
+
}
|
|
72
|
+
|
|
25
73
|
function buildReport(pageTitle: string, events: EvidenceEvent[]): Report {
|
|
26
74
|
const sortedRevs = events.map((e) => e.toRevisionId).sort((a, b) => a - b);
|
|
27
75
|
const timestamps = events.map((e) => e.timestamp).sort();
|
|
28
76
|
|
|
77
|
+
const observedCount = events.length;
|
|
78
|
+
const policyCount = events.filter((e) => e.layer === "policy_coded").length;
|
|
79
|
+
const modelCount = events.filter((e) => e.modelInterpretation != null).length;
|
|
80
|
+
|
|
81
|
+
const layers: Report["layers"] = [{ label: "observed", description: "Deterministic", events: observedCount, reproducible: true }];
|
|
82
|
+
if (policyCount > 0) {
|
|
83
|
+
layers.push({ label: "policy_coded", description: "Wikipedia policy signals", events: policyCount, reproducible: true });
|
|
84
|
+
}
|
|
85
|
+
if (modelCount > 0) {
|
|
86
|
+
layers.push({ label: "model_interpretation", description: "Model-assisted semantic interpretation", events: modelCount, reproducible: false });
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const hasModel = modelCount > 0;
|
|
90
|
+
|
|
29
91
|
return {
|
|
30
92
|
pageTitle,
|
|
31
93
|
pageId: 0,
|
|
@@ -35,9 +97,7 @@ function buildReport(pageTitle: string, events: EvidenceEvent[]): Report {
|
|
|
35
97
|
},
|
|
36
98
|
generatedAt: new Date().toISOString(),
|
|
37
99
|
depth: "detailed",
|
|
38
|
-
layers
|
|
39
|
-
{ label: "observed", description: "Deterministic", events: events.length, reproducible: true },
|
|
40
|
-
],
|
|
100
|
+
layers,
|
|
41
101
|
timeline: {
|
|
42
102
|
totalRevisions: sortedRevs.length,
|
|
43
103
|
analyzedRevisions: sortedRevs.length,
|
|
@@ -50,17 +110,56 @@ function buildReport(pageTitle: string, events: EvidenceEvent[]): Report {
|
|
|
50
110
|
timestamp: e.timestamp,
|
|
51
111
|
eventType: e.eventType,
|
|
52
112
|
summary: e.deterministicFacts.map((f) => f.fact).join("; "),
|
|
53
|
-
layer: e.layer,
|
|
113
|
+
layer: e.modelInterpretation ? "model_interpretation" : e.layer,
|
|
54
114
|
})),
|
|
55
115
|
},
|
|
56
116
|
claims: [],
|
|
57
117
|
sources: [],
|
|
58
|
-
policySignals:
|
|
59
|
-
caveats:
|
|
118
|
+
policySignals: extractPolicySignals(events),
|
|
119
|
+
caveats: hasModel
|
|
120
|
+
? ["Model-assisted interpretation applied — confidence scores are per-event and may vary between runs."]
|
|
121
|
+
: ["Deterministic analysis only — no model interpretation applied."],
|
|
60
122
|
phase: "Phase 1b",
|
|
61
123
|
};
|
|
62
124
|
}
|
|
63
125
|
|
|
126
|
+
function extractPolicySignals(events: EvidenceEvent[]): PolicySignal[] {
|
|
127
|
+
const signalMap = new Map<string, PolicySignal>();
|
|
128
|
+
const sortedEvents = [...events].sort(
|
|
129
|
+
(a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime(),
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
for (const event of sortedEvents) {
|
|
133
|
+
if (event.layer !== "policy_coded") continue;
|
|
134
|
+
const signalFact = event.deterministicFacts.find((f) => f.fact === "policy_signal");
|
|
135
|
+
if (!signalFact?.detail) continue;
|
|
136
|
+
|
|
137
|
+
const parts = signalFact.detail.split(" ");
|
|
138
|
+
const dimMatch = parts.find((p) => p.startsWith("dimension="));
|
|
139
|
+
const sigMatch = parts.find((p) => p.startsWith("signal="));
|
|
140
|
+
if (!dimMatch || !sigMatch) continue;
|
|
141
|
+
|
|
142
|
+
const dimension = dimMatch.slice("dimension=".length);
|
|
143
|
+
const signal = sigMatch.slice("signal=".length);
|
|
144
|
+
const key = `${dimension}:${signal}`;
|
|
145
|
+
|
|
146
|
+
if (signalMap.has(key)) {
|
|
147
|
+
const existing = signalMap.get(key)!;
|
|
148
|
+
existing.lastSeenRevisionId = event.toRevisionId;
|
|
149
|
+
existing.active = true;
|
|
150
|
+
} else {
|
|
151
|
+
signalMap.set(key, {
|
|
152
|
+
dimension,
|
|
153
|
+
signal,
|
|
154
|
+
firstSeenRevisionId: event.toRevisionId,
|
|
155
|
+
active: true,
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return [...signalMap.values()];
|
|
161
|
+
}
|
|
162
|
+
|
|
64
163
|
function toCSV(events: EvidenceEvent[]): string {
|
|
65
164
|
const header = "timestamp,eventType,fromRevisionId,toRevisionId,section,before,after,facts";
|
|
66
165
|
const rows = events.map((e) => {
|