@refract-org/analyzers 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -0
- package/dist/src/category-tracker.d.ts +8 -0
- package/dist/src/category-tracker.d.ts.map +1 -0
- package/dist/src/category-tracker.js +60 -0
- package/dist/src/category-tracker.js.map +1 -0
- package/dist/src/citation-tracker.d.ts +13 -0
- package/dist/src/citation-tracker.d.ts.map +1 -0
- package/dist/src/citation-tracker.js +200 -0
- package/dist/src/citation-tracker.js.map +1 -0
- package/dist/src/claim-differ.d.ts +2 -0
- package/dist/src/claim-differ.d.ts.map +1 -0
- package/dist/src/claim-differ.js +6 -0
- package/dist/src/claim-differ.js.map +1 -0
- package/dist/src/edit-cluster-detector.d.ts +14 -0
- package/dist/src/edit-cluster-detector.d.ts.map +1 -0
- package/dist/src/edit-cluster-detector.js +57 -0
- package/dist/src/edit-cluster-detector.js.map +1 -0
- package/dist/src/heuristic-classifier.d.ts +9 -0
- package/dist/src/heuristic-classifier.d.ts.map +1 -0
- package/dist/src/heuristic-classifier.js +33 -0
- package/dist/src/heuristic-classifier.js.map +1 -0
- package/dist/src/index.d.ts +70 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +16 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/observation-differ.d.ts +8 -0
- package/dist/src/observation-differ.d.ts.map +1 -0
- package/dist/src/observation-differ.js +16 -0
- package/dist/src/observation-differ.js.map +1 -0
- package/dist/src/page-move-detector.d.ts +11 -0
- package/dist/src/page-move-detector.d.ts.map +1 -0
- package/dist/src/page-move-detector.js +21 -0
- package/dist/src/page-move-detector.js.map +1 -0
- package/dist/src/protection-tracker.d.ts +23 -0
- package/dist/src/protection-tracker.d.ts.map +1 -0
- package/dist/src/protection-tracker.js +74 -0
- package/dist/src/protection-tracker.js.map +1 -0
- package/dist/src/revert-detector.d.ts +3 -0
- package/dist/src/revert-detector.d.ts.map +1 -0
- package/dist/src/revert-detector.js +43 -0
- package/dist/src/revert-detector.js.map +1 -0
- package/dist/src/section-differ.d.ts +26 -0
- package/dist/src/section-differ.d.ts.map +1 -0
- package/dist/src/section-differ.js +268 -0
- package/dist/src/section-differ.js.map +1 -0
- package/dist/src/talk-activity-detector.d.ts +16 -0
- package/dist/src/talk-activity-detector.d.ts.map +1 -0
- package/dist/src/talk-activity-detector.js +76 -0
- package/dist/src/talk-activity-detector.js.map +1 -0
- package/dist/src/talk-correlator.d.ts +7 -0
- package/dist/src/talk-correlator.d.ts.map +1 -0
- package/dist/src/talk-correlator.js +53 -0
- package/dist/src/talk-correlator.js.map +1 -0
- package/dist/src/talk-section-parser.d.ts +22 -0
- package/dist/src/talk-section-parser.d.ts.map +1 -0
- package/dist/src/talk-section-parser.js +109 -0
- package/dist/src/talk-section-parser.js.map +1 -0
- package/dist/src/template-tracker.d.ts +12 -0
- package/dist/src/template-tracker.d.ts.map +1 -0
- package/dist/src/template-tracker.js +225 -0
- package/dist/src/template-tracker.js.map +1 -0
- package/dist/src/wikilink-extractor.d.ts +8 -0
- package/dist/src/wikilink-extractor.d.ts.map +1 -0
- package/dist/src/wikilink-extractor.js +81 -0
- package/dist/src/wikilink-extractor.js.map +1 -0
- package/dist/src/wikitext-parser.d.ts +15 -0
- package/dist/src/wikitext-parser.d.ts.map +1 -0
- package/dist/src/wikitext-parser.js +85 -0
- package/dist/src/wikitext-parser.js.map +1 -0
- package/dist/tsconfig 2.tsbuildinfo +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/package.json +28 -0
- package/src/__tests__/category-tracker.test.ts +79 -0
- package/src/__tests__/citation-tracker.test.ts +185 -0
- package/src/__tests__/edit-cluster-detector.test.ts +79 -0
- package/src/__tests__/heuristic-classifier.test.ts +67 -0
- package/src/__tests__/observation-differ.test.ts +58 -0
- package/src/__tests__/page-move-detector.test.ts +64 -0
- package/src/__tests__/protection-tracker.test.ts +72 -0
- package/src/__tests__/revert-detector.test.ts +76 -0
- package/src/__tests__/section-differ.test.ts +120 -0
- package/src/__tests__/talk-activity-detector.test.ts +112 -0
- package/src/__tests__/talk-correlator.test.ts +71 -0
- package/src/__tests__/talk-section-parser.test.ts +105 -0
- package/src/__tests__/template-tracker.test.ts +159 -0
- package/src/__tests__/wikilink-extractor.test.ts +101 -0
- package/src/__tests__/wikitext-parser.test.ts +142 -0
- package/src/category-tracker.ts +75 -0
- package/src/citation-tracker.ts +226 -0
- package/src/claim-differ.ts +4 -0
- package/src/edit-cluster-detector.ts +78 -0
- package/src/heuristic-classifier.ts +59 -0
- package/src/index.ts +88 -0
- package/src/observation-differ.ts +26 -0
- package/src/page-move-detector.ts +32 -0
- package/src/protection-tracker.ts +103 -0
- package/src/revert-detector.ts +51 -0
- package/src/section-differ.ts +315 -0
- package/src/talk-activity-detector.ts +105 -0
- package/src/talk-correlator.ts +70 -0
- package/src/talk-section-parser.ts +151 -0
- package/src/template-tracker.ts +253 -0
- package/src/wikilink-extractor.ts +100 -0
- package/src/wikitext-parser.ts +92 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { buildSourceId, buildSourceLineage, citationTracker } from "../citation-tracker.js";
|
|
3
|
+
|
|
4
|
+
describe("citationTracker", () => {
|
|
5
|
+
it("extracts named refs from wikitext", () => {
|
|
6
|
+
const wikitext = `Content<ref name="src1">{{cite web |url=https://example.edu/research |title=Research Paper}}</ref>`;
|
|
7
|
+
const refs = citationTracker.extractCitations(wikitext);
|
|
8
|
+
expect(refs).toHaveLength(1);
|
|
9
|
+
expect(refs[0].refName).toBe("src1");
|
|
10
|
+
expect(refs[0].url).toBe("https://example.edu/research");
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it("diff detects added citations", () => {
|
|
14
|
+
const before: Parameters<typeof citationTracker.diffCitations>[0] = [];
|
|
15
|
+
const after = citationTracker.extractCitations(
|
|
16
|
+
`Content<ref name="a">{{cite web |url=https://example.edu/a}}</ref>`,
|
|
17
|
+
);
|
|
18
|
+
const changes = citationTracker.diffCitations(before, after);
|
|
19
|
+
expect(changes).toHaveLength(1);
|
|
20
|
+
expect(changes[0].type).toBe("added");
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it("diff detects replaced citations", () => {
|
|
24
|
+
const before = citationTracker.extractCitations(
|
|
25
|
+
`Content<ref name="x">{{cite web |url=https://example.edu/old}}</ref>`,
|
|
26
|
+
);
|
|
27
|
+
const after = citationTracker.extractCitations(
|
|
28
|
+
`Content<ref name="x">{{cite web |url=https://example.edu/new}}</ref>`,
|
|
29
|
+
);
|
|
30
|
+
const changes = citationTracker.diffCitations(before, after);
|
|
31
|
+
expect(changes).toHaveLength(1);
|
|
32
|
+
expect(changes[0].type).toBe("replaced");
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("diff detects removed citations", () => {
|
|
36
|
+
const before = citationTracker.extractCitations(
|
|
37
|
+
`Content<ref name="y">{{cite web |url=https://example.edu/y}}</ref>`,
|
|
38
|
+
);
|
|
39
|
+
const after: Parameters<typeof citationTracker.diffCitations>[1] = [];
|
|
40
|
+
const changes = citationTracker.diffCitations(before, after);
|
|
41
|
+
expect(changes).toHaveLength(1);
|
|
42
|
+
expect(changes[0].type).toBe("removed");
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
describe("buildSourceId", () => {
|
|
47
|
+
it("hashes URL when present", () => {
|
|
48
|
+
const id = buildSourceId({ url: "https://example.edu/doc", raw: "<ref>...</ref>" });
|
|
49
|
+
expect(id).toMatch(/^[0-9a-f]{16}$/);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("hashes ref:name when no URL", () => {
|
|
53
|
+
const id = buildSourceId({ refName: "Smith2024", raw: '<ref name="Smith2024"/>' });
|
|
54
|
+
expect(id).toMatch(/^[0-9a-f]{16}$/);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("hashes raw text when no URL or refName", () => {
|
|
58
|
+
const id = buildSourceId({ raw: "<ref>Some bare citation</ref>" });
|
|
59
|
+
expect(id).toMatch(/^[0-9a-f]{16}$/);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("produces deterministic output", () => {
|
|
63
|
+
const a = buildSourceId({ url: "https://example.edu/doc", raw: "" });
|
|
64
|
+
const b = buildSourceId({ url: "https://example.edu/doc", raw: "" });
|
|
65
|
+
expect(a).toBe(b);
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
describe("buildSourceLineage", () => {
|
|
70
|
+
it("builds source records and tracks replacement", () => {
|
|
71
|
+
const rev1 = {
|
|
72
|
+
revId: 1,
|
|
73
|
+
timestamp: "2024-01-01T00:00:00Z",
|
|
74
|
+
content: `Content<ref name="src1">{{cite web |url=https://example.edu/research |title=Original}}</ref>`,
|
|
75
|
+
};
|
|
76
|
+
const rev2 = {
|
|
77
|
+
revId: 2,
|
|
78
|
+
timestamp: "2024-01-02T00:00:00Z",
|
|
79
|
+
content: `Content<ref name="src1">{{cite web |url=https://example.edu/research |title=Original}}</ref>`,
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
const result = buildSourceLineage([rev1, rev2]);
|
|
83
|
+
expect(result.sources).toHaveLength(1);
|
|
84
|
+
expect(result.lineage).toHaveLength(0);
|
|
85
|
+
const src = result.sources[0];
|
|
86
|
+
expect(src.sourceType).toBe("secondary");
|
|
87
|
+
expect(src.authority).toBe("high");
|
|
88
|
+
expect(src.firstSeenRevisionId).toBe(1);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("tracks replacement when named ref changes content", () => {
|
|
92
|
+
const rev1 = {
|
|
93
|
+
revId: 1,
|
|
94
|
+
timestamp: "2024-01-01T00:00:00Z",
|
|
95
|
+
content: `Content<ref name="x">{{cite web |url=https://example.edu/old}}</ref>`,
|
|
96
|
+
};
|
|
97
|
+
const rev2 = {
|
|
98
|
+
revId: 2,
|
|
99
|
+
timestamp: "2024-01-02T00:00:00Z",
|
|
100
|
+
content: `Content<ref name="x">{{cite web |url=https://reuters.com/article |title=New}}</ref>`,
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
const result = buildSourceLineage([rev1, rev2]);
|
|
104
|
+
expect(result.sources).toHaveLength(2);
|
|
105
|
+
expect(result.lineage).toHaveLength(1);
|
|
106
|
+
|
|
107
|
+
const eduSource = result.sources.find((s) => s.url?.includes("example.edu"));
|
|
108
|
+
if (!eduSource) throw new Error("Expected edu source to be found");
|
|
109
|
+
const comSource = result.sources.find((s) => s.url?.includes("reuters.com"));
|
|
110
|
+
if (!comSource) throw new Error("Expected com source to be found");
|
|
111
|
+
|
|
112
|
+
expect(eduSource.sourceType).toBe("secondary");
|
|
113
|
+
expect(eduSource.authority).toBe("high");
|
|
114
|
+
expect(comSource.sourceType).toBe("news");
|
|
115
|
+
expect(comSource.authority).toBe("medium");
|
|
116
|
+
|
|
117
|
+
expect(result.lineage[0].sourceId).toBe(eduSource.sourceId);
|
|
118
|
+
expect(result.lineage[0].replacements).toHaveLength(1);
|
|
119
|
+
expect(result.lineage[0].replacements[0].replacedById).toBe(comSource.sourceId);
|
|
120
|
+
expect(result.lineage[0].replacements[0].atRevisionId).toBe(2);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it("classifies .gov source as government/high", () => {
|
|
124
|
+
const refUrl = "https://www.nasa.gov/report";
|
|
125
|
+
const content = `<ref name="g">{{cite web |url=${refUrl} |title=Report}}</ref>`;
|
|
126
|
+
const result = buildSourceLineage([{ revId: 1, timestamp: "2024-01-01T00:00:00Z", content }]);
|
|
127
|
+
const src = result.sources.find((s) => s.url === refUrl);
|
|
128
|
+
if (!src) throw new Error("Expected source to be found");
|
|
129
|
+
expect(src.sourceType).toBe("government");
|
|
130
|
+
expect(src.authority).toBe("high");
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("classifies doi.org as academic/medium", () => {
|
|
134
|
+
const refUrl = "https://doi.org/10.1234/test";
|
|
135
|
+
const content = `<ref name="d">{{cite journal |url=${refUrl} |title=Study}}</ref>`;
|
|
136
|
+
const result = buildSourceLineage([{ revId: 1, timestamp: "2024-01-01T00:00:00Z", content }]);
|
|
137
|
+
const src = result.sources.find((s) => s.url === refUrl);
|
|
138
|
+
if (!src) throw new Error("Expected source to be found");
|
|
139
|
+
expect(src.sourceType).toBe("academic");
|
|
140
|
+
expect(src.authority).toBe("medium");
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it("classifies unknown URL as unknown/unrated", () => {
|
|
144
|
+
const refUrl = "https://someblog.example/page";
|
|
145
|
+
const content = `<ref name="u">{{cite web |url=${refUrl} |title=Blog}}</ref>`;
|
|
146
|
+
const result = buildSourceLineage([{ revId: 1, timestamp: "2024-01-01T00:00:00Z", content }]);
|
|
147
|
+
const src = result.sources.find((s) => s.url === refUrl);
|
|
148
|
+
if (!src) throw new Error("Expected source to be found");
|
|
149
|
+
expect(src.sourceType).toBe("unknown");
|
|
150
|
+
expect(src.authority).toBe("unrated");
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it("tracks removal and sets lastSeenRevisionId", () => {
|
|
154
|
+
const rev1 = {
|
|
155
|
+
revId: 1,
|
|
156
|
+
timestamp: "2024-01-01T00:00:00Z",
|
|
157
|
+
content: `Content<ref name="a">{{cite web |url=https://example.edu/a}}</ref>`,
|
|
158
|
+
};
|
|
159
|
+
const rev2 = {
|
|
160
|
+
revId: 2,
|
|
161
|
+
timestamp: "2024-01-02T00:00:00Z",
|
|
162
|
+
content: "Content without citations",
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
const result = buildSourceLineage([rev1, rev2]);
|
|
166
|
+
const src = result.sources.find((s) => s.url?.includes("example.edu"));
|
|
167
|
+
if (!src) throw new Error("Expected source to be found");
|
|
168
|
+
expect(src.lastSeenRevisionId).toBe(1);
|
|
169
|
+
expect(src.lastSeenAt).toBe("2024-01-01T00:00:00Z");
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
it("classifies no-url as unknown/unrated", () => {
|
|
173
|
+
const result = buildSourceLineage([
|
|
174
|
+
{
|
|
175
|
+
revId: 1,
|
|
176
|
+
timestamp: "2024-01-01T00:00:00Z",
|
|
177
|
+
content: `Content<ref name="nourl">Some citation text</ref>`,
|
|
178
|
+
},
|
|
179
|
+
]);
|
|
180
|
+
const src = result.sources[0];
|
|
181
|
+
expect(src.url).toBeUndefined();
|
|
182
|
+
expect(src.sourceType).toBe("unknown");
|
|
183
|
+
expect(src.authority).toBe("unrated");
|
|
184
|
+
});
|
|
185
|
+
});
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import type { Revision } from "@refract-org/evidence-graph";
|
|
2
|
+
import { describe, expect, it } from "vitest";
|
|
3
|
+
import { detectEditClusters } from "../edit-cluster-detector.js";
|
|
4
|
+
|
|
5
|
+
function makeRev(revId: number, timestamp: string, user?: string): Revision {
|
|
6
|
+
return {
|
|
7
|
+
revId,
|
|
8
|
+
title: "Test Page",
|
|
9
|
+
timestamp,
|
|
10
|
+
user: user ?? "Editor",
|
|
11
|
+
comment: "",
|
|
12
|
+
content: "",
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
describe("detectEditClusters", () => {
|
|
17
|
+
it("returns empty for fewer than min cluster size revisions", () => {
|
|
18
|
+
const revs = [makeRev(1, "2024-01-01T00:00:00Z")];
|
|
19
|
+
const events = detectEditClusters(revs);
|
|
20
|
+
expect(events).toHaveLength(0);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it("detects a cluster of 3 rapid edits within 1 hour", () => {
|
|
24
|
+
const revs = [
|
|
25
|
+
makeRev(1, "2024-01-01T00:00:00Z"),
|
|
26
|
+
makeRev(2, "2024-01-01T00:10:00Z"),
|
|
27
|
+
makeRev(3, "2024-01-01T00:20:00Z"),
|
|
28
|
+
];
|
|
29
|
+
const events = detectEditClusters(revs, { windowMs: 60 * 60 * 1000 });
|
|
30
|
+
expect(events).toHaveLength(1);
|
|
31
|
+
expect(events[0].eventType).toBe("edit_cluster_detected");
|
|
32
|
+
expect(events[0].fromRevisionId).toBe(1);
|
|
33
|
+
expect(events[0].toRevisionId).toBe(3);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("does not cluster edits outside the window", () => {
|
|
37
|
+
const revs = [
|
|
38
|
+
makeRev(1, "2024-01-01T00:00:00Z"),
|
|
39
|
+
makeRev(2, "2024-01-01T02:00:00Z"),
|
|
40
|
+
makeRev(3, "2024-01-01T04:00:00Z"),
|
|
41
|
+
];
|
|
42
|
+
const events = detectEditClusters(revs, { windowMs: 60 * 60 * 1000 });
|
|
43
|
+
expect(events).toHaveLength(0);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("detects single-editor cluster", () => {
|
|
47
|
+
const revs = [
|
|
48
|
+
makeRev(1, "2024-01-01T00:00:00Z", "Alice"),
|
|
49
|
+
makeRev(2, "2024-01-01T00:05:00Z", "Alice"),
|
|
50
|
+
makeRev(3, "2024-01-01T00:10:00Z", "Alice"),
|
|
51
|
+
];
|
|
52
|
+
const events = detectEditClusters(revs, { windowMs: 60 * 60 * 1000 });
|
|
53
|
+
expect(events).toHaveLength(1);
|
|
54
|
+
expect(events[0].deterministicFacts[0].detail).toContain("single_editor=true");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("detects multi-editor cluster", () => {
|
|
58
|
+
const revs = [
|
|
59
|
+
makeRev(1, "2024-01-01T00:00:00Z", "Alice"),
|
|
60
|
+
makeRev(2, "2024-01-01T00:05:00Z", "Bob"),
|
|
61
|
+
makeRev(3, "2024-01-01T00:10:00Z", "Charlie"),
|
|
62
|
+
];
|
|
63
|
+
const events = detectEditClusters(revs, { windowMs: 60 * 60 * 1000 });
|
|
64
|
+
expect(events).toHaveLength(1);
|
|
65
|
+
expect(events[0].deterministicFacts[0].detail).toContain("single_editor=false");
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it("respects custom min cluster size", () => {
|
|
69
|
+
const revs = [
|
|
70
|
+
makeRev(1, "2024-01-01T00:00:00Z"),
|
|
71
|
+
makeRev(2, "2024-01-01T00:01:00Z"),
|
|
72
|
+
makeRev(3, "2024-01-01T00:02:00Z"),
|
|
73
|
+
makeRev(4, "2024-01-01T00:03:00Z"),
|
|
74
|
+
makeRev(5, "2024-01-01T00:04:00Z"),
|
|
75
|
+
];
|
|
76
|
+
const events = detectEditClusters(revs, { minClusterSize: 5, windowMs: 60 * 60 * 1000 });
|
|
77
|
+
expect(events).toHaveLength(1);
|
|
78
|
+
});
|
|
79
|
+
});
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { classifyHeuristic } from "../heuristic-classifier.js";
|
|
3
|
+
|
|
4
|
+
describe("classifyHeuristic", () => {
|
|
5
|
+
it("classifies revert patterns in comment", () => {
|
|
6
|
+
expect(classifyHeuristic("reverted edit", 100)).toBe("revert");
|
|
7
|
+
expect(classifyHeuristic("Undid revision 12345", 50)).toBe("revert");
|
|
8
|
+
expect(classifyHeuristic("Rollback vandalism", -300)).toBe("revert");
|
|
9
|
+
expect(classifyHeuristic("rvv nonsense", 0)).toBe("revert");
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it("prioritizes revert over vandalism when both match", () => {
|
|
13
|
+
expect(classifyHeuristic("Revert vandalism", -500)).toBe("revert");
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it("classifies vandalism patterns in comment", () => {
|
|
17
|
+
expect(classifyHeuristic("vandal", 100)).toBe("vandalism");
|
|
18
|
+
expect(classifyHeuristic("spam removal", -200)).toBe("vandalism");
|
|
19
|
+
expect(classifyHeuristic("blanking section", -1000)).toBe("vandalism");
|
|
20
|
+
expect(classifyHeuristic("test edit", 10)).toBe("vandalism");
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it("classifies sourcing patterns in comment", () => {
|
|
24
|
+
expect(classifyHeuristic("added citation", 500)).toBe("sourcing");
|
|
25
|
+
expect(classifyHeuristic("add ref", 100)).toBe("sourcing");
|
|
26
|
+
expect(classifyHeuristic("rm bad source", -200)).toBe("sourcing");
|
|
27
|
+
expect(classifyHeuristic("cite web", 0)).toBe("sourcing");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("classifies major additions by size", () => {
|
|
31
|
+
expect(classifyHeuristic("update", 2500)).toBe("major_addition");
|
|
32
|
+
expect(classifyHeuristic("expanded section", 5000)).toBe("major_addition");
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("classifies major removals by size", () => {
|
|
36
|
+
expect(classifyHeuristic("trim", -2500)).toBe("major_removal");
|
|
37
|
+
expect(classifyHeuristic("cleanup", -10000)).toBe("major_removal");
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it("classifies cosmetic edits (small delta, empty comment)", () => {
|
|
41
|
+
expect(classifyHeuristic("", 10)).toBe("cosmetic");
|
|
42
|
+
expect(classifyHeuristic("", 0)).toBe("cosmetic");
|
|
43
|
+
expect(classifyHeuristic("", 19)).toBe("cosmetic");
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("classifies minor edits by size delta", () => {
|
|
47
|
+
expect(classifyHeuristic("fix typo", 50)).toBe("minor");
|
|
48
|
+
expect(classifyHeuristic("tweak", 99)).toBe("minor");
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("returns unknown for unmatched edits", () => {
|
|
52
|
+
expect(classifyHeuristic("meaningful edit", 500)).toBe("unknown");
|
|
53
|
+
expect(classifyHeuristic("", 1000)).toBe("unknown");
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it("respects custom thresholds", () => {
|
|
57
|
+
expect(classifyHeuristic("big edit", 1500, { majorAdditionThreshold: 1000 })).toBe("major_addition");
|
|
58
|
+
expect(classifyHeuristic("", 50, { cosmeticThreshold: 100 })).toBe("cosmetic");
|
|
59
|
+
expect(classifyHeuristic("tweak", 200, { minorThreshold: 300 })).toBe("minor");
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("is case-insensitive for comment matching", () => {
|
|
63
|
+
expect(classifyHeuristic("REVERT", 0)).toBe("revert");
|
|
64
|
+
expect(classifyHeuristic("CITATION ADDED", 100)).toBe("sourcing");
|
|
65
|
+
expect(classifyHeuristic("VANDAL", 0)).toBe("vandalism");
|
|
66
|
+
});
|
|
67
|
+
});
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import type { EvidenceEvent } from "@refract-org/evidence-graph";
|
|
2
|
+
import { describe, expect, it } from "vitest";
|
|
3
|
+
import { diffObservations } from "../observation-differ.js";
|
|
4
|
+
|
|
5
|
+
function makeEvent(eventType: string, fromRevId: number, toRevId: number, section = "body"): EvidenceEvent {
|
|
6
|
+
return {
|
|
7
|
+
eventType: eventType as EvidenceEvent["eventType"],
|
|
8
|
+
fromRevisionId: fromRevId,
|
|
9
|
+
toRevisionId: toRevId,
|
|
10
|
+
section,
|
|
11
|
+
before: "",
|
|
12
|
+
after: "",
|
|
13
|
+
deterministicFacts: [],
|
|
14
|
+
layer: "observed",
|
|
15
|
+
timestamp: "2026-01-01T00:00:00Z",
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
describe("diffObservations", () => {
|
|
20
|
+
it("returns zero delta for identical streams", () => {
|
|
21
|
+
const prior = [makeEvent("revert_detected", 1, 2)];
|
|
22
|
+
const current = [makeEvent("revert_detected", 1, 2)];
|
|
23
|
+
|
|
24
|
+
const diff = diffObservations(prior, current);
|
|
25
|
+
expect(diff.new).toHaveLength(0);
|
|
26
|
+
expect(diff.resolved).toHaveLength(0);
|
|
27
|
+
expect(diff.unchanged).toHaveLength(1);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("detects new events in current stream", () => {
|
|
31
|
+
const prior = [makeEvent("revert_detected", 1, 2)];
|
|
32
|
+
const current = [makeEvent("revert_detected", 1, 2), makeEvent("citation_added", 2, 3)];
|
|
33
|
+
|
|
34
|
+
const diff = diffObservations(prior, current);
|
|
35
|
+
expect(diff.new).toHaveLength(1);
|
|
36
|
+
expect(diff.new[0].eventType).toBe("citation_added");
|
|
37
|
+
expect(diff.resolved).toHaveLength(0);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it("detects resolved events when prior has them and current does not", () => {
|
|
41
|
+
const prior = [makeEvent("revert_detected", 1, 2), makeEvent("template_added", 2, 3)];
|
|
42
|
+
const current = [makeEvent("revert_detected", 1, 2)];
|
|
43
|
+
|
|
44
|
+
const diff = diffObservations(prior, current);
|
|
45
|
+
expect(diff.resolved).toHaveLength(1);
|
|
46
|
+
expect(diff.resolved[0].eventType).toBe("template_added");
|
|
47
|
+
expect(diff.new).toHaveLength(0);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it("handles empty prior stream (first run)", () => {
|
|
51
|
+
const current = [makeEvent("revert_detected", 1, 2)];
|
|
52
|
+
|
|
53
|
+
const diff = diffObservations([], current);
|
|
54
|
+
expect(diff.new).toHaveLength(1);
|
|
55
|
+
expect(diff.resolved).toHaveLength(0);
|
|
56
|
+
expect(diff.unchanged).toHaveLength(0);
|
|
57
|
+
});
|
|
58
|
+
});
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { buildPageMoveEvents } from "../page-move-detector.js";
|
|
3
|
+
|
|
4
|
+
interface MoveRecord {
|
|
5
|
+
oldTitle: string;
|
|
6
|
+
newTitle: string;
|
|
7
|
+
timestamp: string;
|
|
8
|
+
revId: number;
|
|
9
|
+
comment: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
describe("buildPageMoveEvents", () => {
|
|
13
|
+
it("converts move records to EvidenceEvent[]", () => {
|
|
14
|
+
const moves: MoveRecord[] = [
|
|
15
|
+
{
|
|
16
|
+
oldTitle: "Old Page",
|
|
17
|
+
newTitle: "New Page",
|
|
18
|
+
timestamp: "2024-01-01T00:00:00Z",
|
|
19
|
+
revId: 12345,
|
|
20
|
+
comment: "Renamed for clarity",
|
|
21
|
+
},
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
const events = buildPageMoveEvents(moves);
|
|
25
|
+
|
|
26
|
+
expect(events).toHaveLength(1);
|
|
27
|
+
expect(events[0].eventType).toBe("page_moved");
|
|
28
|
+
expect(events[0].fromRevisionId).toBe(0);
|
|
29
|
+
expect(events[0].toRevisionId).toBe(12345);
|
|
30
|
+
expect(events[0].before).toBe("Old Page");
|
|
31
|
+
expect(events[0].after).toBe("New Page");
|
|
32
|
+
expect(events[0].section).toBe("");
|
|
33
|
+
expect(events[0].layer).toBe("observed");
|
|
34
|
+
expect(events[0].timestamp).toBe("2024-01-01T00:00:00Z");
|
|
35
|
+
expect(events[0].deterministicFacts[0].fact).toBe("page_moved");
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it("handles empty moves", () => {
|
|
39
|
+
const events = buildPageMoveEvents([]);
|
|
40
|
+
expect(events).toEqual([]);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("handles multiple moves", () => {
|
|
44
|
+
const moves: MoveRecord[] = [
|
|
45
|
+
{
|
|
46
|
+
oldTitle: "Page A",
|
|
47
|
+
newTitle: "Page B",
|
|
48
|
+
timestamp: "2024-01-01T00:00:00Z",
|
|
49
|
+
revId: 1,
|
|
50
|
+
comment: "First move",
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
oldTitle: "Page B",
|
|
54
|
+
newTitle: "Page C",
|
|
55
|
+
timestamp: "2024-01-02T00:00:00Z",
|
|
56
|
+
revId: 2,
|
|
57
|
+
comment: "Second move",
|
|
58
|
+
},
|
|
59
|
+
];
|
|
60
|
+
|
|
61
|
+
const events = buildPageMoveEvents(moves);
|
|
62
|
+
expect(events).toHaveLength(2);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import type { ProtectionLogRecord } from "../protection-tracker.js";
|
|
3
|
+
import { protectionTracker } from "../protection-tracker.js";
|
|
4
|
+
|
|
5
|
+
describe("protectionTracker", () => {
|
|
6
|
+
describe("buildState", () => {
|
|
7
|
+
it("builds state from a protection log", () => {
|
|
8
|
+
const logs: ProtectionLogRecord[] = [
|
|
9
|
+
{ logId: 1, pageTitle: "Test", timestamp: "2026-01-01T00:00:00Z", comment: "protecting", action: "protect" },
|
|
10
|
+
];
|
|
11
|
+
const state = protectionTracker.buildState(logs);
|
|
12
|
+
expect(state.size).toBeGreaterThan(0);
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
it("returns empty map for empty logs", () => {
|
|
16
|
+
const state = protectionTracker.buildState([]);
|
|
17
|
+
expect(state.size).toBe(0);
|
|
18
|
+
});
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
describe("diffState", () => {
|
|
22
|
+
it("detects added protections", () => {
|
|
23
|
+
const before = new Map();
|
|
24
|
+
const after = new Map();
|
|
25
|
+
after.set("Test", { level: "semi", sinceTimestamp: "2026-01-01T00:00:00Z", sinceLogId: 1 });
|
|
26
|
+
|
|
27
|
+
const changes = protectionTracker.diffState(before, after);
|
|
28
|
+
const added = changes.filter((c) => c.type === "added");
|
|
29
|
+
expect(added.length).toBeGreaterThanOrEqual(1);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("detects removed protections", () => {
|
|
33
|
+
const before = new Map();
|
|
34
|
+
before.set("Test", { level: "semi", sinceTimestamp: "2026-01-01T00:00:00Z", sinceLogId: 1 });
|
|
35
|
+
const after = new Map();
|
|
36
|
+
|
|
37
|
+
const changes = protectionTracker.diffState(before, after);
|
|
38
|
+
const removed = changes.filter((c) => c.type === "removed");
|
|
39
|
+
expect(removed.length).toBeGreaterThanOrEqual(1);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("returns empty for identical states", () => {
|
|
43
|
+
const state = new Map();
|
|
44
|
+
state.set("Test", { level: "semi", sinceTimestamp: "2026-01-01T00:00:00Z", sinceLogId: 1 });
|
|
45
|
+
|
|
46
|
+
expect(protectionTracker.diffState(state, state)).toHaveLength(0);
|
|
47
|
+
});
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
describe("findLogsBetween", () => {
|
|
51
|
+
const logs: ProtectionLogRecord[] = [
|
|
52
|
+
{ logId: 1, pageTitle: "Test", timestamp: "2026-01-01T00:00:00Z", comment: "first", action: "protect" },
|
|
53
|
+
{ logId: 2, pageTitle: "Test", timestamp: "2026-01-15T00:00:00Z", comment: "second", action: "modify" },
|
|
54
|
+
{ logId: 3, pageTitle: "Test", timestamp: "2026-02-01T00:00:00Z", comment: "third", action: "unprotect" },
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
it("finds logs within a time range", () => {
|
|
58
|
+
const found = protectionTracker.findLogsBetween(logs, "2026-01-10T00:00:00Z", "2026-01-20T00:00:00Z");
|
|
59
|
+
expect(found).toHaveLength(1);
|
|
60
|
+
expect(found[0].logId).toBe(2);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("returns empty for range with no logs", () => {
|
|
64
|
+
const found = protectionTracker.findLogsBetween(logs, "2025-01-01T00:00:00Z", "2025-12-31T00:00:00Z");
|
|
65
|
+
expect(found).toHaveLength(0);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it("returns empty for empty logs array", () => {
|
|
69
|
+
expect(protectionTracker.findLogsBetween([], "2026-01-01T00:00:00Z", "2026-02-01T00:00:00Z")).toEqual([]);
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
});
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import type { Revision } from "@refract-org/evidence-graph";
|
|
2
|
+
import { describe, expect, it } from "vitest";
|
|
3
|
+
import { revertDetector } from "../revert-detector.js";
|
|
4
|
+
|
|
5
|
+
function makeRev(revId: number, comment: string, content = ""): Revision {
|
|
6
|
+
return {
|
|
7
|
+
revId,
|
|
8
|
+
pageId: 1,
|
|
9
|
+
pageTitle: "Test",
|
|
10
|
+
timestamp: new Date(Date.UTC(2026, 0, revId)).toISOString(),
|
|
11
|
+
comment,
|
|
12
|
+
content,
|
|
13
|
+
size: 100,
|
|
14
|
+
minor: false,
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
describe("isRevert", () => {
|
|
19
|
+
it("detects 'revert' in comment", () => {
|
|
20
|
+
expect(revertDetector.isRevert("reverted vandalism")).toBe(true);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it("detects 'undid revision' in comment", () => {
|
|
24
|
+
expect(revertDetector.isRevert("Undid revision 12345 by User")).toBe(true);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it("detects 'rvv' in comment", () => {
|
|
28
|
+
expect(revertDetector.isRevert("rvv nonsense")).toBe(true);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it("detects 'rollback' in comment", () => {
|
|
32
|
+
expect(revertDetector.isRevert("Rollback vandalism")).toBe(true);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("detects 'restore' in comment", () => {
|
|
36
|
+
expect(revertDetector.isRevert("Restore previous version")).toBe(true);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("detects '[[WP:ROLLBACK]]' in comment", () => {
|
|
40
|
+
expect(revertDetector.isRevert("[[WP:ROLLBACK]] vandalism")).toBe(true);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("returns false for non-revert comments", () => {
|
|
44
|
+
expect(revertDetector.isRevert("added citation")).toBe(false);
|
|
45
|
+
expect(revertDetector.isRevert("fixed typo")).toBe(false);
|
|
46
|
+
expect(revertDetector.isRevert("expanded section")).toBe(false);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("is case-insensitive", () => {
|
|
50
|
+
expect(revertDetector.isRevert("REVERT")).toBe(true);
|
|
51
|
+
expect(revertDetector.isRevert("Reverted")).toBe(true);
|
|
52
|
+
});
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
describe("detectRevertChain", () => {
|
|
56
|
+
it("detects a single revert edit", () => {
|
|
57
|
+
const revs = [makeRev(1, "added content"), makeRev(2, "reverted")];
|
|
58
|
+
const chains = revertDetector.detectRevertChain(revs);
|
|
59
|
+
expect(chains.length).toBeGreaterThanOrEqual(1);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("returns empty for no reverts", () => {
|
|
63
|
+
const revs = [makeRev(1, "added content"), makeRev(2, "fixed typo"), makeRev(3, "expanded")];
|
|
64
|
+
expect(revertDetector.detectRevertChain(revs)).toEqual([]);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it("detects a multi-edit revert chain", () => {
|
|
68
|
+
const revs = [makeRev(1, "good edit"), makeRev(2, "revert"), makeRev(3, "re-revert")];
|
|
69
|
+
const chains = revertDetector.detectRevertChain(revs);
|
|
70
|
+
expect(chains.length).toBeGreaterThanOrEqual(1);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it("returns empty for empty revision list", () => {
|
|
74
|
+
expect(revertDetector.detectRevertChain([])).toEqual([]);
|
|
75
|
+
});
|
|
76
|
+
});
|