@var-ia/cli 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/dist/src/commands/analyze.d.ts +5 -2
- package/dist/src/commands/analyze.d.ts.map +1 -1
- package/dist/src/commands/analyze.js +214 -13
- package/dist/src/commands/analyze.js.map +1 -1
- package/dist/src/commands/cache.d.ts +2 -2
- package/dist/src/commands/cache.d.ts.map +1 -1
- package/dist/src/commands/cache.js.map +1 -1
- package/dist/src/commands/claim.d.ts +4 -1
- package/dist/src/commands/claim.d.ts.map +1 -1
- package/dist/src/commands/claim.js +5 -5
- package/dist/src/commands/claim.js.map +1 -1
- package/dist/src/commands/eval.d.ts +2 -0
- package/dist/src/commands/eval.d.ts.map +1 -0
- package/dist/src/commands/eval.js +38 -0
- package/dist/src/commands/eval.js.map +1 -0
- package/dist/src/commands/export.d.ts +1 -1
- package/dist/src/commands/export.d.ts.map +1 -1
- package/dist/src/commands/export.js +30 -2
- package/dist/src/commands/export.js.map +1 -1
- package/dist/src/commands/watch.d.ts +1 -1
- package/dist/src/commands/watch.d.ts.map +1 -1
- package/dist/src/commands/watch.js +2 -2
- package/dist/src/commands/watch.js.map +1 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +46 -24
- package/dist/src/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +6 -5
- package/src/commands/analyze.ts +260 -14
- package/src/commands/cache.ts +3 -2
- package/src/commands/claim.ts +5 -4
- package/src/commands/eval.ts +41 -0
- package/src/commands/export.ts +48 -2
- package/src/commands/watch.ts +2 -1
- package/src/index.ts +62 -31
package/src/commands/analyze.ts
CHANGED
|
@@ -1,10 +1,28 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
1
2
|
import { MediaWikiClient } from "@var-ia/ingestion";
|
|
3
|
+
import type { RevisionOptions } from "@var-ia/ingestion";
|
|
2
4
|
import { sectionDiffer, citationTracker, revertDetector, templateTracker } from "@var-ia/analyzers";
|
|
3
5
|
import type { TemplateType } from "@var-ia/analyzers";
|
|
4
|
-
import type { EvidenceEvent, EvidenceLayer, Revision } from "@var-ia/evidence-graph";
|
|
6
|
+
import type { EvidenceEvent, EvidenceLayer, Revision, DeterministicFact } from "@var-ia/evidence-graph";
|
|
5
7
|
import { createAdapter } from "@var-ia/interpreter";
|
|
6
8
|
import type { ModelConfig } from "@var-ia/interpreter";
|
|
7
9
|
import { loadCachedRevisions, saveRevisions } from "./cache.js";
|
|
10
|
+
import { stripWikitext, fuzzyFindClaim, findSectionForText } from "./claim.js";
|
|
11
|
+
|
|
12
|
+
interface BatchPageResult {
|
|
13
|
+
pageTitle: string;
|
|
14
|
+
pageId: number;
|
|
15
|
+
eventCount: number;
|
|
16
|
+
events: EvidenceEvent[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface BatchResult {
|
|
20
|
+
mode: "batch";
|
|
21
|
+
batchSize: number;
|
|
22
|
+
pages: BatchPageResult[];
|
|
23
|
+
totalEvents: number;
|
|
24
|
+
generatedAt: string;
|
|
25
|
+
}
|
|
8
26
|
|
|
9
27
|
function templateTypeToPolicyDimension(type: TemplateType): string | null {
|
|
10
28
|
switch (type) {
|
|
@@ -24,8 +42,13 @@ export async function runAnalyze(
|
|
|
24
42
|
_toRevId?: number,
|
|
25
43
|
useCache = false,
|
|
26
44
|
modelConfig?: ModelConfig,
|
|
27
|
-
|
|
28
|
-
|
|
45
|
+
apiUrl?: string,
|
|
46
|
+
pagesFile?: string,
|
|
47
|
+
): Promise<{ events: EvidenceEvent[]; revisions: Revision[] }> {
|
|
48
|
+
if (pagesFile) {
|
|
49
|
+
return runBatch(pagesFile, depth, fromRevId, _toRevId, useCache, modelConfig, apiUrl);
|
|
50
|
+
}
|
|
51
|
+
const client = new MediaWikiClient(apiUrl ? { apiUrl } : undefined);
|
|
29
52
|
console.log(`Analyzing "${pageTitle}" at depth: ${depth}...`);
|
|
30
53
|
|
|
31
54
|
let revisions: Revision[] = [];
|
|
@@ -40,8 +63,16 @@ export async function runAnalyze(
|
|
|
40
63
|
|
|
41
64
|
if (revisions.length === 0) {
|
|
42
65
|
console.log(`Fetching revisions from Wikipedia...`);
|
|
43
|
-
const options:
|
|
44
|
-
|
|
66
|
+
const options: RevisionOptions = { direction: "newer" };
|
|
67
|
+
if (fromRevId) {
|
|
68
|
+
options.startRevId = fromRevId;
|
|
69
|
+
}
|
|
70
|
+
if (_toRevId) {
|
|
71
|
+
options.endRevId = _toRevId;
|
|
72
|
+
}
|
|
73
|
+
if (!fromRevId && !_toRevId) {
|
|
74
|
+
options.limit = 20;
|
|
75
|
+
}
|
|
45
76
|
revisions = await client.fetchRevisions(pageTitle, options);
|
|
46
77
|
console.log(`Fetched ${revisions.length} revisions.`);
|
|
47
78
|
|
|
@@ -53,7 +84,7 @@ export async function runAnalyze(
|
|
|
53
84
|
|
|
54
85
|
if (revisions.length < 2) {
|
|
55
86
|
console.log("Need at least 2 revisions to analyze.");
|
|
56
|
-
return [];
|
|
87
|
+
return { events: [], revisions: [] };
|
|
57
88
|
}
|
|
58
89
|
|
|
59
90
|
const events: EvidenceEvent[] = [];
|
|
@@ -61,10 +92,21 @@ export async function runAnalyze(
|
|
|
61
92
|
(a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
|
|
62
93
|
);
|
|
63
94
|
|
|
95
|
+
const allSeenSentences = new Set<string>();
|
|
96
|
+
|
|
64
97
|
for (let i = 1; i < sortedRevs.length; i++) {
|
|
65
98
|
const before = sortedRevs[i - 1];
|
|
66
99
|
const after = sortedRevs[i];
|
|
67
100
|
|
|
101
|
+
const isBrief = depth === "brief";
|
|
102
|
+
const isForensic = depth === "forensic";
|
|
103
|
+
const extraFacts: DeterministicFact[] = isForensic
|
|
104
|
+
? [
|
|
105
|
+
{ fact: "full_wikitext_before", detail: before.content },
|
|
106
|
+
{ fact: "full_wikitext_after", detail: after.content },
|
|
107
|
+
]
|
|
108
|
+
: [];
|
|
109
|
+
|
|
68
110
|
const beforeSections = sectionDiffer.extractSections(before.content);
|
|
69
111
|
const afterSections = sectionDiffer.extractSections(after.content);
|
|
70
112
|
const sectionChanges = sectionDiffer.diffSections(beforeSections, afterSections);
|
|
@@ -87,10 +129,11 @@ export async function runAnalyze(
|
|
|
87
129
|
fromRevisionId: before.revId,
|
|
88
130
|
toRevisionId: after.revId,
|
|
89
131
|
section: "body",
|
|
90
|
-
before: cit.before?.raw ?? "",
|
|
91
|
-
after: cit.after?.raw ?? "",
|
|
132
|
+
before: isBrief ? "" : (cit.before?.raw ?? ""),
|
|
133
|
+
after: isBrief ? "" : (cit.after?.raw ?? ""),
|
|
92
134
|
deterministicFacts: [
|
|
93
135
|
{ fact: "citation_changed", detail: `type=${cit.type}` },
|
|
136
|
+
...extraFacts,
|
|
94
137
|
],
|
|
95
138
|
layer,
|
|
96
139
|
timestamp: after.timestamp,
|
|
@@ -99,6 +142,25 @@ export async function runAnalyze(
|
|
|
99
142
|
|
|
100
143
|
for (const tpl of templateChanges) {
|
|
101
144
|
if (tpl.type === "unchanged") continue;
|
|
145
|
+
|
|
146
|
+
if (tpl.template.type === "protection") {
|
|
147
|
+
events.push({
|
|
148
|
+
eventType: "protection_changed",
|
|
149
|
+
fromRevisionId: before.revId,
|
|
150
|
+
toRevisionId: after.revId,
|
|
151
|
+
section: "body",
|
|
152
|
+
before: tpl.type === "removed" ? tpl.template.name : "",
|
|
153
|
+
after: tpl.type === "added" ? tpl.template.name : "",
|
|
154
|
+
deterministicFacts: [
|
|
155
|
+
{ fact: "protection_changed", detail: `name=${tpl.template.name} type=${tpl.type}` },
|
|
156
|
+
...extraFacts,
|
|
157
|
+
],
|
|
158
|
+
layer: "policy_coded",
|
|
159
|
+
timestamp: after.timestamp,
|
|
160
|
+
});
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
|
|
102
164
|
const policyDimension = templateTypeToPolicyDimension(tpl.template.type);
|
|
103
165
|
const layer: EvidenceLayer = policyDimension ? "policy_coded" : "observed";
|
|
104
166
|
events.push({
|
|
@@ -107,10 +169,11 @@ export async function runAnalyze(
|
|
|
107
169
|
toRevisionId: after.revId,
|
|
108
170
|
section: "body",
|
|
109
171
|
before: "",
|
|
110
|
-
after: tpl.template.name,
|
|
172
|
+
after: isBrief ? "" : tpl.template.name,
|
|
111
173
|
deterministicFacts: [
|
|
112
174
|
{ fact: "template_changed", detail: `name=${tpl.template.name} type=${tpl.type}` },
|
|
113
175
|
...(policyDimension ? [{ fact: "policy_signal", detail: `dimension=${policyDimension} signal=${tpl.template.name.toLowerCase().replace(/\s+/g, "_")}` }] : []),
|
|
176
|
+
...extraFacts,
|
|
114
177
|
],
|
|
115
178
|
layer,
|
|
116
179
|
timestamp: after.timestamp,
|
|
@@ -124,10 +187,11 @@ export async function runAnalyze(
|
|
|
124
187
|
fromRevisionId: before.revId,
|
|
125
188
|
toRevisionId: after.revId,
|
|
126
189
|
section: sc.section,
|
|
127
|
-
before: sc.fromContent ?? "",
|
|
128
|
-
after: sc.toContent ?? "",
|
|
190
|
+
before: isBrief ? "" : (sc.fromContent ?? ""),
|
|
191
|
+
after: isBrief ? "" : (sc.toContent ?? ""),
|
|
129
192
|
deterministicFacts: [
|
|
130
193
|
{ fact: "section_changed", detail: `change=${sc.changeType}` },
|
|
194
|
+
...extraFacts,
|
|
131
195
|
],
|
|
132
196
|
layer: "observed",
|
|
133
197
|
timestamp: after.timestamp,
|
|
@@ -141,15 +205,147 @@ export async function runAnalyze(
|
|
|
141
205
|
toRevisionId: after.revId,
|
|
142
206
|
section: "",
|
|
143
207
|
before: "",
|
|
144
|
-
after: after.comment,
|
|
208
|
+
after: isBrief ? "" : after.comment,
|
|
145
209
|
deterministicFacts: [
|
|
146
210
|
{ fact: "revert_detected", detail: after.comment },
|
|
147
211
|
{ fact: "policy_signal", detail: "dimension=edit_warring signal=revert_detected" },
|
|
212
|
+
...extraFacts,
|
|
148
213
|
],
|
|
149
214
|
layer: "policy_coded",
|
|
150
215
|
timestamp: after.timestamp,
|
|
151
216
|
});
|
|
152
217
|
}
|
|
218
|
+
|
|
219
|
+
const leadChange = sectionChanges.find(
|
|
220
|
+
sc => sc.section === "(lead)" && sc.changeType === "modified"
|
|
221
|
+
);
|
|
222
|
+
if (leadChange) {
|
|
223
|
+
const fromLen = leadChange.fromContent?.length ?? 0;
|
|
224
|
+
const toLen = leadChange.toContent?.length ?? 0;
|
|
225
|
+
const contentMovedOut = fromLen > toLen && toLen < fromLen * 0.5;
|
|
226
|
+
const contentMovedIn = toLen > fromLen && fromLen < toLen * 0.5;
|
|
227
|
+
|
|
228
|
+
if (contentMovedOut) {
|
|
229
|
+
const targetSection = sectionChanges.find(
|
|
230
|
+
sc => sc.section !== "(lead)" && (sc.changeType === "added" || sc.changeType === "modified")
|
|
231
|
+
);
|
|
232
|
+
if (targetSection) {
|
|
233
|
+
events.push({
|
|
234
|
+
eventType: "lead_demotion",
|
|
235
|
+
fromRevisionId: before.revId,
|
|
236
|
+
toRevisionId: after.revId,
|
|
237
|
+
section: targetSection.section,
|
|
238
|
+
before: isBrief ? "" : (leadChange.fromContent ?? ""),
|
|
239
|
+
after: isBrief ? "" : (leadChange.toContent ?? ""),
|
|
240
|
+
deterministicFacts: [
|
|
241
|
+
{ fact: "lead_content_moved", detail: `from=lead to=${targetSection.section}` },
|
|
242
|
+
...extraFacts,
|
|
243
|
+
],
|
|
244
|
+
layer: "observed",
|
|
245
|
+
timestamp: after.timestamp,
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
} else if (contentMovedIn) {
|
|
249
|
+
const sourceSection = sectionChanges.find(
|
|
250
|
+
sc => sc.section !== "(lead)" && (sc.changeType === "removed" || sc.changeType === "modified")
|
|
251
|
+
);
|
|
252
|
+
if (sourceSection) {
|
|
253
|
+
events.push({
|
|
254
|
+
eventType: "lead_promotion",
|
|
255
|
+
fromRevisionId: before.revId,
|
|
256
|
+
toRevisionId: after.revId,
|
|
257
|
+
section: sourceSection.section,
|
|
258
|
+
before: isBrief ? "" : (leadChange.fromContent ?? ""),
|
|
259
|
+
after: isBrief ? "" : (leadChange.toContent ?? ""),
|
|
260
|
+
deterministicFacts: [
|
|
261
|
+
{ fact: "lead_content_moved", detail: `from=${sourceSection.section} to=lead` },
|
|
262
|
+
...extraFacts,
|
|
263
|
+
],
|
|
264
|
+
layer: "observed",
|
|
265
|
+
timestamp: after.timestamp,
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const beforePlain = stripWikitext(before.content);
|
|
272
|
+
const afterPlain = stripWikitext(after.content);
|
|
273
|
+
|
|
274
|
+
const beforeSentences = beforePlain.split(/[.!?]\s+/).filter(s => s.trim().length > 20);
|
|
275
|
+
const afterSentences = afterPlain.split(/[.!?]\s+/).filter(s => s.trim().length > 20);
|
|
276
|
+
|
|
277
|
+
for (const sentence of afterSentences) {
|
|
278
|
+
const trimmed = sentence.trim();
|
|
279
|
+
if (!trimmed) continue;
|
|
280
|
+
const foundInBefore = fuzzyFindClaim(trimmed, beforePlain);
|
|
281
|
+
if (!foundInBefore) {
|
|
282
|
+
const normalized = trimmed.toLowerCase().replace(/\s+/g, " ");
|
|
283
|
+
const wasSeenBefore = allSeenSentences.has(normalized);
|
|
284
|
+
const section = findSectionForText(after.content, trimmed);
|
|
285
|
+
events.push({
|
|
286
|
+
eventType: wasSeenBefore ? "claim_reintroduced" : "claim_first_seen",
|
|
287
|
+
fromRevisionId: before.revId,
|
|
288
|
+
toRevisionId: after.revId,
|
|
289
|
+
section,
|
|
290
|
+
before: "",
|
|
291
|
+
after: isBrief ? "" : trimmed,
|
|
292
|
+
deterministicFacts: [
|
|
293
|
+
{ fact: "claim_detected", detail: `sentence_length=${trimmed.length}` },
|
|
294
|
+
...extraFacts,
|
|
295
|
+
],
|
|
296
|
+
layer: "observed",
|
|
297
|
+
timestamp: after.timestamp,
|
|
298
|
+
});
|
|
299
|
+
} else {
|
|
300
|
+
const oldLen = foundInBefore.length;
|
|
301
|
+
const newLen = trimmed.length;
|
|
302
|
+
if (Math.abs(newLen - oldLen) > oldLen * 0.2) {
|
|
303
|
+
const section = findSectionForText(after.content, trimmed);
|
|
304
|
+
events.push({
|
|
305
|
+
eventType: "claim_reworded",
|
|
306
|
+
fromRevisionId: before.revId,
|
|
307
|
+
toRevisionId: after.revId,
|
|
308
|
+
section,
|
|
309
|
+
before: isBrief ? "" : foundInBefore,
|
|
310
|
+
after: isBrief ? "" : trimmed,
|
|
311
|
+
deterministicFacts: [
|
|
312
|
+
{ fact: "claim_reworded", detail: `old_length=${oldLen} new_length=${newLen}` },
|
|
313
|
+
...extraFacts,
|
|
314
|
+
],
|
|
315
|
+
layer: "observed",
|
|
316
|
+
timestamp: after.timestamp,
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
for (const sentence of beforeSentences) {
|
|
323
|
+
const trimmed = sentence.trim();
|
|
324
|
+
if (!trimmed) continue;
|
|
325
|
+
const foundInAfter = fuzzyFindClaim(trimmed, afterPlain);
|
|
326
|
+
if (!foundInAfter) {
|
|
327
|
+
const section = findSectionForText(before.content, trimmed);
|
|
328
|
+
events.push({
|
|
329
|
+
eventType: "claim_removed",
|
|
330
|
+
fromRevisionId: before.revId,
|
|
331
|
+
toRevisionId: after.revId,
|
|
332
|
+
section,
|
|
333
|
+
before: isBrief ? "" : trimmed,
|
|
334
|
+
after: "",
|
|
335
|
+
deterministicFacts: [
|
|
336
|
+
{ fact: "claim_removed", detail: `sentence_length=${trimmed.length}` },
|
|
337
|
+
...extraFacts,
|
|
338
|
+
],
|
|
339
|
+
layer: "observed",
|
|
340
|
+
timestamp: after.timestamp,
|
|
341
|
+
});
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
for (const s of afterSentences) {
|
|
346
|
+
const normalized = s.trim().toLowerCase().replace(/\s+/g, " ");
|
|
347
|
+
if (normalized) allSeenSentences.add(normalized);
|
|
348
|
+
}
|
|
153
349
|
}
|
|
154
350
|
|
|
155
351
|
if (modelConfig && events.length > 0) {
|
|
@@ -160,8 +356,58 @@ export async function runAnalyze(
|
|
|
160
356
|
interpreted[i].layer = events[i].layer;
|
|
161
357
|
}
|
|
162
358
|
console.log("Interpretation complete.");
|
|
163
|
-
return interpreted;
|
|
359
|
+
return { events: interpreted, revisions: sortedRevs };
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
return { events, revisions: sortedRevs };
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
async function runBatch(
|
|
366
|
+
pagesFile: string,
|
|
367
|
+
depth: string,
|
|
368
|
+
fromRevId?: number,
|
|
369
|
+
toRevId?: number,
|
|
370
|
+
useCache = false,
|
|
371
|
+
modelConfig?: ModelConfig,
|
|
372
|
+
apiUrl?: string,
|
|
373
|
+
): Promise<{ events: EvidenceEvent[]; revisions: Revision[] }> {
|
|
374
|
+
const content = readFileSync(pagesFile, "utf-8");
|
|
375
|
+
const titles = content
|
|
376
|
+
.split("\n")
|
|
377
|
+
.map(l => l.trim())
|
|
378
|
+
.filter(l => l.length > 0 && !l.startsWith("#"));
|
|
379
|
+
|
|
380
|
+
console.log(`Batch mode: ${titles.length} pages from ${pagesFile}\n`);
|
|
381
|
+
|
|
382
|
+
const pages: BatchPageResult[] = [];
|
|
383
|
+
const allEvents: EvidenceEvent[] = [];
|
|
384
|
+
|
|
385
|
+
for (const title of titles) {
|
|
386
|
+
console.log(`--- Page ${pages.length + 1}/${titles.length}: ${title} ---`);
|
|
387
|
+
const { events } = await runAnalyze(title, depth, fromRevId, toRevId, useCache, modelConfig, apiUrl, undefined);
|
|
388
|
+
pages.push({
|
|
389
|
+
pageTitle: title,
|
|
390
|
+
pageId: 0,
|
|
391
|
+
eventCount: events.length,
|
|
392
|
+
events,
|
|
393
|
+
});
|
|
394
|
+
allEvents.push(...events);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
const result: BatchResult = {
|
|
398
|
+
mode: "batch",
|
|
399
|
+
batchSize: titles.length,
|
|
400
|
+
pages,
|
|
401
|
+
totalEvents: allEvents.length,
|
|
402
|
+
generatedAt: new Date().toISOString(),
|
|
403
|
+
};
|
|
404
|
+
|
|
405
|
+
console.log(`\n=== Batch Results ===`);
|
|
406
|
+
console.log(`Pages processed: ${result.batchSize}`);
|
|
407
|
+
console.log(`Total events: ${result.totalEvents}\n`);
|
|
408
|
+
for (const p of result.pages) {
|
|
409
|
+
console.log(` ${p.pageTitle}: ${p.eventCount} events`);
|
|
164
410
|
}
|
|
165
411
|
|
|
166
|
-
return events;
|
|
412
|
+
return { events: allEvents, revisions: [] };
|
|
167
413
|
}
|
package/src/commands/cache.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { Persistence } from "@var-ia/persistence";
|
|
2
|
+
import type { PersistenceAdapter } from "@var-ia/persistence";
|
|
2
3
|
import type { Revision } from "@var-ia/evidence-graph";
|
|
3
4
|
import { existsSync, mkdirSync } from "node:fs";
|
|
4
5
|
import { homedir } from "node:os";
|
|
@@ -7,9 +8,9 @@ import { join } from "node:path";
|
|
|
7
8
|
const CACHE_DIR = join(homedir(), ".wikihistory");
|
|
8
9
|
const DB_PATH = join(CACHE_DIR, "varia.db");
|
|
9
10
|
|
|
10
|
-
let _instance:
|
|
11
|
+
let _instance: PersistenceAdapter | null = null;
|
|
11
12
|
|
|
12
|
-
export function getPersistence():
|
|
13
|
+
export function getPersistence(): PersistenceAdapter {
|
|
13
14
|
if (!_instance) {
|
|
14
15
|
if (!existsSync(CACHE_DIR)) {
|
|
15
16
|
mkdirSync(CACHE_DIR, { recursive: true });
|
package/src/commands/claim.ts
CHANGED
|
@@ -10,8 +10,9 @@ export async function runClaim(
|
|
|
10
10
|
claimText: string,
|
|
11
11
|
useCache = false,
|
|
12
12
|
modelConfig?: ModelConfig,
|
|
13
|
+
apiUrl?: string,
|
|
13
14
|
): Promise<void> {
|
|
14
|
-
const client = new MediaWikiClient();
|
|
15
|
+
const client = new MediaWikiClient(apiUrl ? { apiUrl } : undefined);
|
|
15
16
|
console.log(`Tracking claim in "${pageTitle}"...`);
|
|
16
17
|
console.log(`Claim text: "${claimText}"\n`);
|
|
17
18
|
|
|
@@ -145,7 +146,7 @@ export async function runClaim(
|
|
|
145
146
|
|
|
146
147
|
export { runClaim as runClaimCommand };
|
|
147
148
|
|
|
148
|
-
function stripWikitext(wikitext: string): string {
|
|
149
|
+
export function stripWikitext(wikitext: string): string {
|
|
149
150
|
let text = wikitext;
|
|
150
151
|
text = text.replace(/<!--[\s\S]*?-->/g, "");
|
|
151
152
|
text = text.replace(/<ref\b[^>]*\/\s*>/gi, "");
|
|
@@ -160,7 +161,7 @@ function stripWikitext(wikitext: string): string {
|
|
|
160
161
|
return text.trim();
|
|
161
162
|
}
|
|
162
163
|
|
|
163
|
-
function fuzzyFindClaim(claimText: string, plainText: string): string {
|
|
164
|
+
export function fuzzyFindClaim(claimText: string, plainText: string): string {
|
|
164
165
|
const normalized = claimText.toLowerCase().replace(/\s+/g, " ").trim();
|
|
165
166
|
const searchText = plainText.toLowerCase().replace(/\s+/g, " ");
|
|
166
167
|
|
|
@@ -180,7 +181,7 @@ function fuzzyFindClaim(claimText: string, plainText: string): string {
|
|
|
180
181
|
return "";
|
|
181
182
|
}
|
|
182
183
|
|
|
183
|
-
function findSectionForText(wikitext: string, plainText: string): string {
|
|
184
|
+
export function findSectionForText(wikitext: string, plainText: string): string {
|
|
184
185
|
const headerRegex = /^(=+)\s*([^=]+?)\s*\1$/gm;
|
|
185
186
|
const lines = wikitext.split("\n");
|
|
186
187
|
let currentSection = "(lead)";
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { createEvalHarness } from "@var-ia/eval";
|
|
2
|
+
import { runAnalyze } from "./analyze.js";
|
|
3
|
+
|
|
4
|
+
export async function runEval(pageTitleOverride?: string): Promise<void> {
|
|
5
|
+
const harness = createEvalHarness();
|
|
6
|
+
const testCases = harness.benchmarkPages();
|
|
7
|
+
|
|
8
|
+
const filtered = pageTitleOverride
|
|
9
|
+
? testCases.filter(t => t.pageTitle === pageTitleOverride)
|
|
10
|
+
: testCases;
|
|
11
|
+
|
|
12
|
+
console.log(`Running ${filtered.length} benchmark tests...\n`);
|
|
13
|
+
|
|
14
|
+
const results = [];
|
|
15
|
+
for (const test of filtered) {
|
|
16
|
+
console.log(`[${test.id}] ${test.description}...`);
|
|
17
|
+
try {
|
|
18
|
+
const { events } = await runAnalyze(test.pageTitle, "detailed");
|
|
19
|
+
const result = harness.evaluate(test, events);
|
|
20
|
+
results.push(result);
|
|
21
|
+
const icon = result.passed ? "PASS" : "FAIL";
|
|
22
|
+
console.log(` ${icon} precision=${result.precision.toFixed(2)} events=${result.eventCount.actual}/${result.eventCount.expected}`);
|
|
23
|
+
} catch (err) {
|
|
24
|
+
console.log(` ERROR: ${err}`);
|
|
25
|
+
results.push({
|
|
26
|
+
testId: test.id,
|
|
27
|
+
passed: false,
|
|
28
|
+
precision: 0,
|
|
29
|
+
eventCount: { expected: test.expectedEvents.length, actual: 0 },
|
|
30
|
+
matches: [],
|
|
31
|
+
misses: test.expectedEvents.map(e => ({ expected: e })),
|
|
32
|
+
falsePositives: [],
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const summary = harness.computeScores(results);
|
|
38
|
+
console.log(`\n=== Eval Summary ===`);
|
|
39
|
+
console.log(`Passed: ${summary.testsPassed}/${summary.totalTests}`);
|
|
40
|
+
console.log(`Overall precision: ${(summary.overallPrecision * 100).toFixed(1)}%`);
|
|
41
|
+
}
|
package/src/commands/export.ts
CHANGED
|
@@ -1,13 +1,34 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
1
2
|
import { runAnalyze } from "./analyze.js";
|
|
2
|
-
import
|
|
3
|
+
import { createEventIdentity } from "@var-ia/evidence-graph";
|
|
4
|
+
import type { EvidenceEvent, Report, PolicySignal, Revision } from "@var-ia/evidence-graph";
|
|
3
5
|
import type { ModelConfig } from "@var-ia/interpreter";
|
|
4
6
|
|
|
7
|
+
interface EvidenceBundle {
|
|
8
|
+
format: "varia-evidence-bundle/v1";
|
|
9
|
+
generatedAt: string;
|
|
10
|
+
pageTitle: string;
|
|
11
|
+
revisionRange: { from: number; to: number };
|
|
12
|
+
inputRevisions: Revision[];
|
|
13
|
+
outputEvents: EvidenceEvent[];
|
|
14
|
+
bundleHash: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
5
17
|
export async function runExport(
|
|
6
18
|
pageTitle: string,
|
|
7
19
|
format: string,
|
|
8
20
|
modelConfig?: ModelConfig,
|
|
21
|
+
apiUrl?: string,
|
|
22
|
+
bundle?: boolean,
|
|
9
23
|
): Promise<void> {
|
|
10
|
-
|
|
24
|
+
if (bundle) {
|
|
25
|
+
const { events, revisions } = await runAnalyze(pageTitle, "detailed", undefined, undefined, false, modelConfig, apiUrl);
|
|
26
|
+
const bundleData = buildBundle(pageTitle, events, revisions);
|
|
27
|
+
console.log(JSON.stringify(bundleData, null, 2));
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const { events } = await runAnalyze(pageTitle, "detailed", undefined, undefined, false, modelConfig, apiUrl);
|
|
11
32
|
|
|
12
33
|
if (events.length === 0) {
|
|
13
34
|
console.log("No events to export.");
|
|
@@ -24,6 +45,31 @@ export async function runExport(
|
|
|
24
45
|
}
|
|
25
46
|
}
|
|
26
47
|
|
|
48
|
+
function buildBundle(pageTitle: string, events: EvidenceEvent[], revisions: Revision[]): EvidenceBundle {
|
|
49
|
+
const from = revisions[0]?.revId ?? 0;
|
|
50
|
+
const to = revisions[revisions.length - 1]?.revId ?? 0;
|
|
51
|
+
|
|
52
|
+
const taggedEvents = events.map((e) => ({
|
|
53
|
+
...e,
|
|
54
|
+
eventId: e.eventId ?? createEventIdentity(e),
|
|
55
|
+
}));
|
|
56
|
+
|
|
57
|
+
const bundle = {
|
|
58
|
+
format: "varia-evidence-bundle/v1" as const,
|
|
59
|
+
generatedAt: new Date().toISOString(),
|
|
60
|
+
pageTitle,
|
|
61
|
+
revisionRange: { from, to },
|
|
62
|
+
inputRevisions: revisions,
|
|
63
|
+
outputEvents: taggedEvents,
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
const bundleHash = createHash("sha256")
|
|
67
|
+
.update(JSON.stringify(bundle))
|
|
68
|
+
.digest("hex");
|
|
69
|
+
|
|
70
|
+
return { ...bundle, bundleHash };
|
|
71
|
+
}
|
|
72
|
+
|
|
27
73
|
function buildReport(pageTitle: string, events: EvidenceEvent[]): Report {
|
|
28
74
|
const sortedRevs = events.map((e) => e.toRevisionId).sort((a, b) => a - b);
|
|
29
75
|
const timestamps = events.map((e) => e.timestamp).sort();
|
package/src/commands/watch.ts
CHANGED
|
@@ -7,8 +7,9 @@ const POLL_INTERVAL_MS = 60_000;
|
|
|
7
7
|
export async function runWatch(
|
|
8
8
|
pageTitle: string,
|
|
9
9
|
section?: string,
|
|
10
|
+
apiUrl?: string,
|
|
10
11
|
): Promise<void> {
|
|
11
|
-
const client = new MediaWikiClient();
|
|
12
|
+
const client = new MediaWikiClient(apiUrl ? { apiUrl } : undefined);
|
|
12
13
|
console.log(`Watching "${pageTitle}"${section ? ` section="${section}"` : ""}`);
|
|
13
14
|
console.log(`Polling every ${POLL_INTERVAL_MS / 1000}s. Press Ctrl+C to stop.\n`);
|
|
14
15
|
|