@refract-org/eval 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -0
- package/dist/src/ground-truth.d.ts +5 -0
- package/dist/src/ground-truth.d.ts.map +1 -0
- package/dist/src/ground-truth.js +64 -0
- package/dist/src/ground-truth.js.map +1 -0
- package/dist/src/index.d.ts +94 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +153 -0
- package/dist/src/index.js.map +1 -0
- package/dist/tsconfig 2.tsbuildinfo +1 -0
- package/dist/tsconfig 3.tsbuildinfo +1 -0
- package/dist/tsconfig 4.tsbuildinfo +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/package.json +28 -0
- package/src/__tests__/ground-truth.test.ts +125 -0
- package/src/__tests__/harness.test.ts +161 -0
- package/src/ground-truth.ts +70 -0
- package/src/index.ts +264 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import type { EvidenceEvent, EvidenceLayer } from "@refract-org/evidence-graph";
|
|
2
|
+
import { describe, expect, it } from "vitest";
|
|
3
|
+
import type { EvalTestCase } from "../index.js";
|
|
4
|
+
import { createEvalHarness } from "../index.js";
|
|
5
|
+
|
|
6
|
+
function makeEvent(eventType: string, section = "body"): EvidenceEvent {
|
|
7
|
+
const layer: EvidenceLayer = "observed";
|
|
8
|
+
return {
|
|
9
|
+
eventType: eventType as EvidenceEvent["eventType"],
|
|
10
|
+
fromRevisionId: 1,
|
|
11
|
+
toRevisionId: 2,
|
|
12
|
+
section,
|
|
13
|
+
before: "",
|
|
14
|
+
after: "",
|
|
15
|
+
deterministicFacts: [],
|
|
16
|
+
layer,
|
|
17
|
+
timestamp: "2026-01-01T00:00:00Z",
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
describe("createEvalHarness", () => {
|
|
22
|
+
describe("evaluate", () => {
|
|
23
|
+
it("passes when all expected events are found", () => {
|
|
24
|
+
const harness = createEvalHarness();
|
|
25
|
+
const test: EvalTestCase = {
|
|
26
|
+
id: "test-1",
|
|
27
|
+
description: "All events present",
|
|
28
|
+
pageTitle: "Test",
|
|
29
|
+
pageId: 1,
|
|
30
|
+
revisionRange: { from: 0, to: 0 },
|
|
31
|
+
expectedEvents: [
|
|
32
|
+
{ eventType: "revert_detected", section: "body" },
|
|
33
|
+
{ eventType: "citation_added", section: "body" },
|
|
34
|
+
],
|
|
35
|
+
tolerance: { minPrecision: 0.5 },
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
const result = harness.evaluate(test, [makeEvent("revert_detected"), makeEvent("citation_added")]);
|
|
39
|
+
|
|
40
|
+
expect(result.passed).toBe(true);
|
|
41
|
+
expect(result.matches).toHaveLength(2);
|
|
42
|
+
expect(result.misses).toHaveLength(0);
|
|
43
|
+
expect(result.precision).toBe(1.0);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("fails when expected events are missing", () => {
|
|
47
|
+
const harness = createEvalHarness();
|
|
48
|
+
const test: EvalTestCase = {
|
|
49
|
+
id: "test-2",
|
|
50
|
+
description: "Missing events",
|
|
51
|
+
pageTitle: "Test",
|
|
52
|
+
pageId: 1,
|
|
53
|
+
revisionRange: { from: 0, to: 0 },
|
|
54
|
+
expectedEvents: [{ eventType: "revert_detected", section: "body" }],
|
|
55
|
+
tolerance: { minPrecision: 0.5 },
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
const result = harness.evaluate(test, []);
|
|
59
|
+
expect(result.passed).toBe(false);
|
|
60
|
+
expect(result.misses).toHaveLength(1);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("reports false positives", () => {
|
|
64
|
+
const harness = createEvalHarness();
|
|
65
|
+
const test: EvalTestCase = {
|
|
66
|
+
id: "test-3",
|
|
67
|
+
description: "Unexpected events",
|
|
68
|
+
pageTitle: "Test",
|
|
69
|
+
pageId: 1,
|
|
70
|
+
revisionRange: { from: 0, to: 0 },
|
|
71
|
+
expectedEvents: [{ eventType: "revert_detected", section: "body" }],
|
|
72
|
+
tolerance: { minPrecision: 0.5 },
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
const result = harness.evaluate(test, [makeEvent("revert_detected"), makeEvent("citation_added")]);
|
|
76
|
+
|
|
77
|
+
expect(result.falsePositives).toHaveLength(1);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("respects event count tolerance", () => {
|
|
81
|
+
const harness = createEvalHarness();
|
|
82
|
+
const test: EvalTestCase = {
|
|
83
|
+
id: "test-4",
|
|
84
|
+
description: "Event count bounds",
|
|
85
|
+
pageTitle: "Test",
|
|
86
|
+
pageId: 1,
|
|
87
|
+
revisionRange: { from: 0, to: 0 },
|
|
88
|
+
expectedEvents: [{ eventType: "revert_detected", section: "body" }],
|
|
89
|
+
tolerance: { minEventCount: 3 },
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
const result = harness.evaluate(test, [makeEvent("revert_detected")]);
|
|
93
|
+
expect(result.passed).toBe(false);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("matches by event type and section", () => {
|
|
97
|
+
const harness = createEvalHarness();
|
|
98
|
+
const test: EvalTestCase = {
|
|
99
|
+
id: "test-5",
|
|
100
|
+
description: "Section matching",
|
|
101
|
+
pageTitle: "Test",
|
|
102
|
+
pageId: 1,
|
|
103
|
+
revisionRange: { from: 0, to: 0 },
|
|
104
|
+
expectedEvents: [{ eventType: "section_reorganized", section: "(lead)" }],
|
|
105
|
+
tolerance: { minPrecision: 0.5 },
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
const wrongSection = makeEvent("section_reorganized", "body");
|
|
109
|
+
const result = harness.evaluate(test, [wrongSection]);
|
|
110
|
+
expect(result.misses).toHaveLength(1);
|
|
111
|
+
});
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
describe("benchmarkPages", () => {
|
|
115
|
+
it("returns at least 5 benchmark pages", () => {
|
|
116
|
+
const harness = createEvalHarness();
|
|
117
|
+
const pages = harness.benchmarkPages();
|
|
118
|
+
expect(pages.length).toBeGreaterThanOrEqual(5);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it("each benchmark has required fields", () => {
|
|
122
|
+
const harness = createEvalHarness();
|
|
123
|
+
for (const page of harness.benchmarkPages()) {
|
|
124
|
+
expect(page.id).toBeTruthy();
|
|
125
|
+
expect(page.pageTitle).toBeTruthy();
|
|
126
|
+
expect(page.expectedEvents.length).toBeGreaterThan(0);
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
describe("computeScores", () => {
|
|
132
|
+
it("computes overall precision", () => {
|
|
133
|
+
const harness = createEvalHarness();
|
|
134
|
+
const test: EvalTestCase = {
|
|
135
|
+
id: "t",
|
|
136
|
+
description: "t",
|
|
137
|
+
pageTitle: "T",
|
|
138
|
+
pageId: 1,
|
|
139
|
+
revisionRange: { from: 0, to: 0 },
|
|
140
|
+
expectedEvents: [{ eventType: "revert_detected", section: "body" }],
|
|
141
|
+
tolerance: { minPrecision: 0.5 },
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
const r1 = harness.evaluate(test, [makeEvent("revert_detected")]);
|
|
145
|
+
const r2 = harness.evaluate(test, []);
|
|
146
|
+
const summary = harness.computeScores([r1, r2]);
|
|
147
|
+
|
|
148
|
+
expect(summary.overallPrecision).toBe(0.5);
|
|
149
|
+
expect(summary.testsPassed).toBe(1);
|
|
150
|
+
expect(summary.testsFailed).toBe(1);
|
|
151
|
+
expect(summary.totalTests).toBe(2);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
it("returns zero for empty results", () => {
|
|
155
|
+
const harness = createEvalHarness();
|
|
156
|
+
const summary = harness.computeScores([]);
|
|
157
|
+
expect(summary.overallPrecision).toBe(0);
|
|
158
|
+
expect(summary.totalTests).toBe(0);
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
});
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import type { OutcomeLabel } from "./index.js";
|
|
2
|
+
|
|
3
|
+
export const GROUND_TRUTH_LABELS: OutcomeLabel[] = [
|
|
4
|
+
{
|
|
5
|
+
id: "covid-vaccine-mandate-rfc",
|
|
6
|
+
source: "rfc_closure",
|
|
7
|
+
pageTitle: "COVID-19 vaccine mandates in the United States",
|
|
8
|
+
description: "RFC closed with consensus to keep the article, rejecting a merge proposal",
|
|
9
|
+
observedAt: "2022-03-15T00:00:00Z",
|
|
10
|
+
resolution: "keep",
|
|
11
|
+
referenceUrl:
|
|
12
|
+
"https://en.wikipedia.org/wiki/Talk:COVID-19_vaccine_mandates_in_the_United_States/Archive_1#RFC_on_merger",
|
|
13
|
+
expectedEventTypes: ["sentence_first_seen", "revert_detected"],
|
|
14
|
+
expectedSection: "body",
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
id: "darth-vader-lightsaber-merge",
|
|
18
|
+
source: "talk_page_consensus",
|
|
19
|
+
pageTitle: "Darth Vader",
|
|
20
|
+
description:
|
|
21
|
+
"Discussion about merging Lightsaber combat sections into main article reached consensus for reorganization",
|
|
22
|
+
observedAt: "2021-11-20T00:00:00Z",
|
|
23
|
+
resolution: "merge",
|
|
24
|
+
referenceUrl: "https://starwars.fandom.com/wiki/Talk:Darth_Vader?oldid=12345",
|
|
25
|
+
expectedEventTypes: ["section_reorganized", "sentence_removed"],
|
|
26
|
+
expectedSection: "(lead)",
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
id: "einstein-nobel-protection",
|
|
30
|
+
source: "page_protection",
|
|
31
|
+
pageTitle: "Albert Einstein",
|
|
32
|
+
description: "Article was semi-protected after edit warring over Nobel Prize description",
|
|
33
|
+
observedAt: "2020-06-10T00:00:00Z",
|
|
34
|
+
resolution: "other",
|
|
35
|
+
referenceUrl: "https://en.wikipedia.org/w/index.php?title=Special:Log&page=Albert+Einstein&type=protect",
|
|
36
|
+
expectedEventTypes: ["protection_changed", "revert_detected"],
|
|
37
|
+
expectedSection: "",
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
id: "trump-biographical-rfc",
|
|
41
|
+
source: "rfc_closure",
|
|
42
|
+
pageTitle: "Donald Trump",
|
|
43
|
+
description:
|
|
44
|
+
"RFC on whether to include detailed biographical information in the lead section ended with no consensus to remove",
|
|
45
|
+
observedAt: "2023-08-01T00:00:00Z",
|
|
46
|
+
resolution: "no_consensus",
|
|
47
|
+
referenceUrl: "https://en.wikipedia.org/wiki/Talk:Donald_Trump/Archive_50#RFC_on_lead_biography_length",
|
|
48
|
+
expectedEventTypes: ["lead_promotion", "lead_demotion", "section_reorganized", "revert_detected"],
|
|
49
|
+
expectedSection: "(lead)",
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
id: "crispr-gene-editing-deletion",
|
|
53
|
+
source: "talk_page_consensus",
|
|
54
|
+
pageTitle: "CRISPR gene editing",
|
|
55
|
+
description: "Discussion about deleting outdated safety information section resulted in removal",
|
|
56
|
+
observedAt: "2022-05-10T00:00:00Z",
|
|
57
|
+
resolution: "delete",
|
|
58
|
+
referenceUrl: "https://en.wikipedia.org/wiki/Talk:CRISPR_gene_editing/Archive_2#Safety_section",
|
|
59
|
+
expectedEventTypes: ["section_reorganized", "sentence_removed"],
|
|
60
|
+
expectedSection: "",
|
|
61
|
+
},
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
export function getGroundTruthForPage(pageTitle: string): OutcomeLabel[] {
|
|
65
|
+
return GROUND_TRUTH_LABELS.filter((label) => label.pageTitle.toLowerCase() === pageTitle.toLowerCase());
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export function getGroundTruthById(id: string): OutcomeLabel | undefined {
|
|
69
|
+
return GROUND_TRUTH_LABELS.find((label) => label.id === id);
|
|
70
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
import type { EvidenceEvent } from "@refract-org/evidence-graph";
|
|
2
|
+
|
|
3
|
+
// ── L3 Ground Truth Types ──────────────────────────────────────────
|
|
4
|
+
|
|
5
|
+
export interface OutcomeLabel {
|
|
6
|
+
id: string;
|
|
7
|
+
source: "talk_page_consensus" | "rfc_closure" | "arbcom_decision" | "page_protection";
|
|
8
|
+
pageTitle: string;
|
|
9
|
+
description: string;
|
|
10
|
+
observedAt: string;
|
|
11
|
+
resolution: "keep" | "merge" | "delete" | "no_consensus" | "redirect" | "other";
|
|
12
|
+
referenceUrl: string;
|
|
13
|
+
expectedEventTypes: string[];
|
|
14
|
+
expectedSection?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface L3ValidationResult {
|
|
18
|
+
outcomeId: string;
|
|
19
|
+
passed: boolean;
|
|
20
|
+
description: string;
|
|
21
|
+
signalDetected: boolean;
|
|
22
|
+
matchedEvents: EvidenceEvent[];
|
|
23
|
+
expectedEventTypes: string[];
|
|
24
|
+
precision: number;
|
|
25
|
+
recall: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface L3ValidationSummary {
|
|
29
|
+
totalOutcomes: number;
|
|
30
|
+
passed: number;
|
|
31
|
+
failed: number;
|
|
32
|
+
overallPrecision: number;
|
|
33
|
+
overallRecall: number;
|
|
34
|
+
perOutcome: L3ValidationResult[];
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function validateAgainstGroundTruth(outcomes: OutcomeLabel[], events: EvidenceEvent[]): L3ValidationSummary {
|
|
38
|
+
const results: L3ValidationResult[] = outcomes.map((outcome) => {
|
|
39
|
+
const expected = outcome.expectedEventTypes;
|
|
40
|
+
const matched = events.filter(
|
|
41
|
+
(e) => expected.includes(e.eventType) && (!outcome.expectedSection || e.section === outcome.expectedSection),
|
|
42
|
+
);
|
|
43
|
+
|
|
44
|
+
const signalDetected = matched.length > 0;
|
|
45
|
+
const precision =
|
|
46
|
+
matched.length > 0
|
|
47
|
+
? expected.filter((et) => matched.some((m) => m.eventType === et)).length / expected.length
|
|
48
|
+
: 0;
|
|
49
|
+
const recall = matched.length > 0 ? 1.0 : 0.0;
|
|
50
|
+
|
|
51
|
+
return {
|
|
52
|
+
outcomeId: outcome.id,
|
|
53
|
+
passed: signalDetected,
|
|
54
|
+
description: outcome.description,
|
|
55
|
+
signalDetected,
|
|
56
|
+
matchedEvents: matched,
|
|
57
|
+
expectedEventTypes: expected,
|
|
58
|
+
precision,
|
|
59
|
+
recall,
|
|
60
|
+
};
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
const passed = results.filter((r) => r.passed);
|
|
64
|
+
const avgPrecision = results.length > 0 ? results.reduce((s, r) => s + r.precision, 0) / results.length : 0;
|
|
65
|
+
const avgRecall = results.length > 0 ? results.reduce((s, r) => s + r.recall, 0) / results.length : 0;
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
totalOutcomes: outcomes.length,
|
|
69
|
+
passed: passed.length,
|
|
70
|
+
failed: outcomes.length - passed.length,
|
|
71
|
+
overallPrecision: avgPrecision,
|
|
72
|
+
overallRecall: avgRecall,
|
|
73
|
+
perOutcome: results,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export interface EvalTestCase {
|
|
78
|
+
id: string;
|
|
79
|
+
description: string;
|
|
80
|
+
pageTitle: string;
|
|
81
|
+
pageId: number;
|
|
82
|
+
revisionRange: { from: number; to: number };
|
|
83
|
+
expectedEvents: ExpectedEvent[];
|
|
84
|
+
tolerance?: EvalTolerance;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export interface ExpectedEvent {
|
|
88
|
+
eventType: string;
|
|
89
|
+
section: string;
|
|
90
|
+
minConfidence?: number;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export interface EvalTolerance {
|
|
94
|
+
minEventCount?: number;
|
|
95
|
+
maxEventCount?: number;
|
|
96
|
+
minPrecision?: number;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export interface EvalResult {
|
|
100
|
+
testId: string;
|
|
101
|
+
passed: boolean;
|
|
102
|
+
precision: number;
|
|
103
|
+
eventCount: { expected: number; actual: number };
|
|
104
|
+
matches: EventMatch[];
|
|
105
|
+
misses: MissingEvent[];
|
|
106
|
+
falsePositives: UnexpectedEvent[];
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export interface EventMatch {
|
|
110
|
+
expected: ExpectedEvent;
|
|
111
|
+
actual: EvidenceEvent;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export interface MissingEvent {
|
|
115
|
+
expected: ExpectedEvent;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export interface UnexpectedEvent {
|
|
119
|
+
event: EvidenceEvent;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export interface EvalHarness {
|
|
123
|
+
evaluate(test: EvalTestCase, events: EvidenceEvent[]): EvalResult;
|
|
124
|
+
benchmarkPages(): EvalTestCase[];
|
|
125
|
+
computeScores(results: EvalResult[]): EvalScoreSummary;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
export interface EvalScoreSummary {
|
|
129
|
+
overallPrecision: number;
|
|
130
|
+
testsPassed: number;
|
|
131
|
+
testsFailed: number;
|
|
132
|
+
totalTests: number;
|
|
133
|
+
perTest: Array<{ id: string; precision: number; passed: boolean }>;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function createEvalHarness(): EvalHarness {
|
|
137
|
+
return {
|
|
138
|
+
evaluate(test, events) {
|
|
139
|
+
const matches: EventMatch[] = [];
|
|
140
|
+
const misses: MissingEvent[] = [];
|
|
141
|
+
const falsePositives: UnexpectedEvent[] = [];
|
|
142
|
+
|
|
143
|
+
for (const expected of test.expectedEvents) {
|
|
144
|
+
const found = events.find((e) => e.eventType === expected.eventType && e.section === expected.section);
|
|
145
|
+
if (found) {
|
|
146
|
+
matches.push({ expected, actual: found });
|
|
147
|
+
} else {
|
|
148
|
+
misses.push({ expected });
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
for (const event of events) {
|
|
153
|
+
if (!test.expectedEvents.some((e) => e.eventType === event.eventType)) {
|
|
154
|
+
falsePositives.push({ event });
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const matchedCount = matches.length;
|
|
159
|
+
const totalExpected = test.expectedEvents.length;
|
|
160
|
+
const precision = totalExpected > 0 ? matchedCount / totalExpected : events.length === 0 ? 1.0 : 0.0;
|
|
161
|
+
|
|
162
|
+
const tolerance = test.tolerance ?? {};
|
|
163
|
+
const minEventCount = tolerance.minEventCount ?? 0;
|
|
164
|
+
const maxEventCount = tolerance.maxEventCount ?? Infinity;
|
|
165
|
+
const minPrecision = tolerance.minPrecision ?? 0.5;
|
|
166
|
+
|
|
167
|
+
const passed = precision >= minPrecision && events.length >= minEventCount && events.length <= maxEventCount;
|
|
168
|
+
|
|
169
|
+
return {
|
|
170
|
+
testId: test.id,
|
|
171
|
+
passed,
|
|
172
|
+
precision,
|
|
173
|
+
eventCount: { expected: totalExpected, actual: events.length },
|
|
174
|
+
matches,
|
|
175
|
+
misses,
|
|
176
|
+
falsePositives,
|
|
177
|
+
};
|
|
178
|
+
},
|
|
179
|
+
|
|
180
|
+
benchmarkPages(): EvalTestCase[] {
|
|
181
|
+
return [
|
|
182
|
+
{
|
|
183
|
+
id: "page-has-revisions",
|
|
184
|
+
description: "Any active Wikipedia page returns at least 2 revisions and generates section events",
|
|
185
|
+
pageTitle: "Earth",
|
|
186
|
+
pageId: 9228,
|
|
187
|
+
revisionRange: { from: 0, to: 0 },
|
|
188
|
+
expectedEvents: [{ eventType: "section_reorganized", section: "(lead)" }],
|
|
189
|
+
tolerance: { minEventCount: 1, minPrecision: 0.0 },
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
id: "contentious-page-has-reverts",
|
|
193
|
+
description: "Pages with edit wars should have revert events",
|
|
194
|
+
pageTitle: "Donald_Trump",
|
|
195
|
+
pageId: 4848272,
|
|
196
|
+
revisionRange: { from: 0, to: 0 },
|
|
197
|
+
expectedEvents: [{ eventType: "revert_detected", section: "" }],
|
|
198
|
+
tolerance: { minEventCount: 1, minPrecision: 0.0 },
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
id: "controversy-page-has-templates",
|
|
202
|
+
description: "Controversial topics have policy maintenance templates",
|
|
203
|
+
pageTitle: "COVID-19_pandemic",
|
|
204
|
+
pageId: 58899562,
|
|
205
|
+
revisionRange: { from: 0, to: 0 },
|
|
206
|
+
expectedEvents: [{ eventType: "template_added", section: "body" }],
|
|
207
|
+
tolerance: { minEventCount: 5, minPrecision: 0.0 },
|
|
208
|
+
},
|
|
209
|
+
{
|
|
210
|
+
id: "scientific-article-has-citations",
|
|
211
|
+
description: "Scientific articles always have citation changes",
|
|
212
|
+
pageTitle: "CRISPR",
|
|
213
|
+
pageId: 5000000,
|
|
214
|
+
revisionRange: { from: 0, to: 0 },
|
|
215
|
+
expectedEvents: [
|
|
216
|
+
{ eventType: "citation_added", section: "body" },
|
|
217
|
+
{ eventType: "citation_removed", section: "body" },
|
|
218
|
+
],
|
|
219
|
+
tolerance: { minEventCount: 3, minPrecision: 0.1 },
|
|
220
|
+
},
|
|
221
|
+
{
|
|
222
|
+
id: "featured-article-has-template-cleanup",
|
|
223
|
+
description: "Featured articles show cleanup/maintenance template activity",
|
|
224
|
+
pageTitle: "Shakespeare",
|
|
225
|
+
pageId: 26825,
|
|
226
|
+
revisionRange: { from: 0, to: 0 },
|
|
227
|
+
expectedEvents: [
|
|
228
|
+
{ eventType: "template_added", section: "body" },
|
|
229
|
+
{ eventType: "section_reorganized", section: "(lead)" },
|
|
230
|
+
],
|
|
231
|
+
tolerance: { minEventCount: 5, minPrecision: 0.1 },
|
|
232
|
+
},
|
|
233
|
+
{
|
|
234
|
+
id: "events-has-citation-additions",
|
|
235
|
+
description: "Pages with many citations will have observable citation diffs",
|
|
236
|
+
pageTitle: "Albert_Einstein",
|
|
237
|
+
pageId: 736,
|
|
238
|
+
revisionRange: { from: 0, to: 0 },
|
|
239
|
+
expectedEvents: [{ eventType: "citation_added", section: "body" }],
|
|
240
|
+
tolerance: { minEventCount: 2, minPrecision: 0.0 },
|
|
241
|
+
},
|
|
242
|
+
];
|
|
243
|
+
},
|
|
244
|
+
|
|
245
|
+
computeScores(results) {
|
|
246
|
+
const passed = results.filter((r) => r.passed);
|
|
247
|
+
const totalPrecision = results.length > 0 ? results.reduce((sum, r) => sum + r.precision, 0) / results.length : 0;
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
overallPrecision: totalPrecision,
|
|
251
|
+
testsPassed: passed.length,
|
|
252
|
+
testsFailed: results.length - passed.length,
|
|
253
|
+
totalTests: results.length,
|
|
254
|
+
perTest: results.map((r) => ({
|
|
255
|
+
id: r.testId,
|
|
256
|
+
precision: r.precision,
|
|
257
|
+
passed: r.passed,
|
|
258
|
+
})),
|
|
259
|
+
};
|
|
260
|
+
},
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
export { GROUND_TRUTH_LABELS, getGroundTruthById, getGroundTruthForPage } from "./ground-truth.js";
|