@refract-org/analyzers 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/README.md +38 -0
  2. package/dist/src/category-tracker.d.ts +8 -0
  3. package/dist/src/category-tracker.d.ts.map +1 -0
  4. package/dist/src/category-tracker.js +60 -0
  5. package/dist/src/category-tracker.js.map +1 -0
  6. package/dist/src/citation-tracker.d.ts +13 -0
  7. package/dist/src/citation-tracker.d.ts.map +1 -0
  8. package/dist/src/citation-tracker.js +200 -0
  9. package/dist/src/citation-tracker.js.map +1 -0
  10. package/dist/src/claim-differ.d.ts +2 -0
  11. package/dist/src/claim-differ.d.ts.map +1 -0
  12. package/dist/src/claim-differ.js +6 -0
  13. package/dist/src/claim-differ.js.map +1 -0
  14. package/dist/src/edit-cluster-detector.d.ts +14 -0
  15. package/dist/src/edit-cluster-detector.d.ts.map +1 -0
  16. package/dist/src/edit-cluster-detector.js +57 -0
  17. package/dist/src/edit-cluster-detector.js.map +1 -0
  18. package/dist/src/heuristic-classifier.d.ts +9 -0
  19. package/dist/src/heuristic-classifier.d.ts.map +1 -0
  20. package/dist/src/heuristic-classifier.js +33 -0
  21. package/dist/src/heuristic-classifier.js.map +1 -0
  22. package/dist/src/index.d.ts +70 -0
  23. package/dist/src/index.d.ts.map +1 -0
  24. package/dist/src/index.js +16 -0
  25. package/dist/src/index.js.map +1 -0
  26. package/dist/src/observation-differ.d.ts +8 -0
  27. package/dist/src/observation-differ.d.ts.map +1 -0
  28. package/dist/src/observation-differ.js +16 -0
  29. package/dist/src/observation-differ.js.map +1 -0
  30. package/dist/src/page-move-detector.d.ts +11 -0
  31. package/dist/src/page-move-detector.d.ts.map +1 -0
  32. package/dist/src/page-move-detector.js +21 -0
  33. package/dist/src/page-move-detector.js.map +1 -0
  34. package/dist/src/protection-tracker.d.ts +23 -0
  35. package/dist/src/protection-tracker.d.ts.map +1 -0
  36. package/dist/src/protection-tracker.js +74 -0
  37. package/dist/src/protection-tracker.js.map +1 -0
  38. package/dist/src/revert-detector.d.ts +3 -0
  39. package/dist/src/revert-detector.d.ts.map +1 -0
  40. package/dist/src/revert-detector.js +43 -0
  41. package/dist/src/revert-detector.js.map +1 -0
  42. package/dist/src/section-differ.d.ts +26 -0
  43. package/dist/src/section-differ.d.ts.map +1 -0
  44. package/dist/src/section-differ.js +268 -0
  45. package/dist/src/section-differ.js.map +1 -0
  46. package/dist/src/talk-activity-detector.d.ts +16 -0
  47. package/dist/src/talk-activity-detector.d.ts.map +1 -0
  48. package/dist/src/talk-activity-detector.js +76 -0
  49. package/dist/src/talk-activity-detector.js.map +1 -0
  50. package/dist/src/talk-correlator.d.ts +7 -0
  51. package/dist/src/talk-correlator.d.ts.map +1 -0
  52. package/dist/src/talk-correlator.js +53 -0
  53. package/dist/src/talk-correlator.js.map +1 -0
  54. package/dist/src/talk-section-parser.d.ts +22 -0
  55. package/dist/src/talk-section-parser.d.ts.map +1 -0
  56. package/dist/src/talk-section-parser.js +109 -0
  57. package/dist/src/talk-section-parser.js.map +1 -0
  58. package/dist/src/template-tracker.d.ts +12 -0
  59. package/dist/src/template-tracker.d.ts.map +1 -0
  60. package/dist/src/template-tracker.js +225 -0
  61. package/dist/src/template-tracker.js.map +1 -0
  62. package/dist/src/wikilink-extractor.d.ts +8 -0
  63. package/dist/src/wikilink-extractor.d.ts.map +1 -0
  64. package/dist/src/wikilink-extractor.js +81 -0
  65. package/dist/src/wikilink-extractor.js.map +1 -0
  66. package/dist/src/wikitext-parser.d.ts +15 -0
  67. package/dist/src/wikitext-parser.d.ts.map +1 -0
  68. package/dist/src/wikitext-parser.js +85 -0
  69. package/dist/src/wikitext-parser.js.map +1 -0
  70. package/dist/tsconfig 2.tsbuildinfo +1 -0
  71. package/dist/tsconfig.tsbuildinfo +1 -0
  72. package/package.json +28 -0
  73. package/src/__tests__/category-tracker.test.ts +79 -0
  74. package/src/__tests__/citation-tracker.test.ts +185 -0
  75. package/src/__tests__/edit-cluster-detector.test.ts +79 -0
  76. package/src/__tests__/heuristic-classifier.test.ts +67 -0
  77. package/src/__tests__/observation-differ.test.ts +58 -0
  78. package/src/__tests__/page-move-detector.test.ts +64 -0
  79. package/src/__tests__/protection-tracker.test.ts +72 -0
  80. package/src/__tests__/revert-detector.test.ts +76 -0
  81. package/src/__tests__/section-differ.test.ts +120 -0
  82. package/src/__tests__/talk-activity-detector.test.ts +112 -0
  83. package/src/__tests__/talk-correlator.test.ts +71 -0
  84. package/src/__tests__/talk-section-parser.test.ts +105 -0
  85. package/src/__tests__/template-tracker.test.ts +159 -0
  86. package/src/__tests__/wikilink-extractor.test.ts +101 -0
  87. package/src/__tests__/wikitext-parser.test.ts +142 -0
  88. package/src/category-tracker.ts +75 -0
  89. package/src/citation-tracker.ts +226 -0
  90. package/src/claim-differ.ts +4 -0
  91. package/src/edit-cluster-detector.ts +78 -0
  92. package/src/heuristic-classifier.ts +59 -0
  93. package/src/index.ts +88 -0
  94. package/src/observation-differ.ts +26 -0
  95. package/src/page-move-detector.ts +32 -0
  96. package/src/protection-tracker.ts +103 -0
  97. package/src/revert-detector.ts +51 -0
  98. package/src/section-differ.ts +315 -0
  99. package/src/talk-activity-detector.ts +105 -0
  100. package/src/talk-correlator.ts +70 -0
  101. package/src/talk-section-parser.ts +151 -0
  102. package/src/template-tracker.ts +253 -0
  103. package/src/wikilink-extractor.ts +100 -0
  104. package/src/wikitext-parser.ts +92 -0
@@ -0,0 +1,142 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import {
3
+ countCitations,
4
+ countKeywordMentions,
5
+ deriveSectionHeading,
6
+ extractAnchorSnippet,
7
+ extractHeadingMap,
8
+ sanitizeWikitext,
9
+ } from "../wikitext-parser.js";
10
+
11
+ const SAMPLE = `'''Bold text''' and ''italic''.
12
+ == History ==
13
+ The {{citation needed}} theory was first proposed in 2005.<ref name="smith2005">Smith (2005)</ref>
14
+
15
+ <!-- This is a comment -->
16
+
17
+ [[Internal link]] and [[Piped|Display]].
18
+
19
+ == See also ==
20
+ * [[Related topic]]
21
+
22
+ == References ==
23
+ {{reflist}}`;
24
+
25
+ const HEADING_SAMPLE = `Lead text here.
26
+ == First section ==
27
+ Content.
28
+ === Subsection ===
29
+ Deeper.
30
+ == Second section ==
31
+ More.`;
32
+
33
+ describe("sanitizeWikitext", () => {
34
+ it("strips HTML comments", () => {
35
+ expect(sanitizeWikitext("before <!-- comment --> after")).not.toContain("comment");
36
+ });
37
+
38
+ it("strips ref tags", () => {
39
+ const result = sanitizeWikitext("text<ref>citation</ref>more");
40
+ expect(result).not.toContain("citation");
41
+ expect(result).toContain("text");
42
+ expect(result).toContain("more");
43
+ });
44
+
45
+ it("strips templates", () => {
46
+ expect(sanitizeWikitext("{{citation needed}}")).toBe("");
47
+ });
48
+
49
+ it("strips wikilinks, keeping display text", () => {
50
+ expect(sanitizeWikitext("[[Foo]] and [[Bar|baz]]")).toBe("Foo and baz");
51
+ });
52
+
53
+ it("strips bold and italic markers", () => {
54
+ expect(sanitizeWikitext("'''bold''' and ''italic''")).toBe("bold and italic");
55
+ });
56
+
57
+ it("collapses all whitespace sequences to single spaces", () => {
58
+ const result = sanitizeWikitext("a\n\n\n\nb");
59
+ expect(result).toBe("a b");
60
+ });
61
+ });
62
+
63
+ describe("extractHeadingMap", () => {
64
+ it("extracts headings with positions", () => {
65
+ const map = extractHeadingMap(HEADING_SAMPLE);
66
+ expect(map.length).toBeGreaterThanOrEqual(3);
67
+ expect(map[0].heading).toBe("First section");
68
+ });
69
+ });
70
+
71
+ describe("deriveSectionHeading", () => {
72
+ it("returns heading for a position in a section", () => {
73
+ const heading = deriveSectionHeading(HEADING_SAMPLE, HEADING_SAMPLE.indexOf("Content"));
74
+ expect(heading).toBe("First section");
75
+ });
76
+
77
+ it("returns null for lead area", () => {
78
+ const heading = deriveSectionHeading(HEADING_SAMPLE, HEADING_SAMPLE.indexOf("Lead text"));
79
+ expect(heading).toBeNull();
80
+ });
81
+
82
+ it("handles positions at the end of content", () => {
83
+ const heading = deriveSectionHeading(HEADING_SAMPLE, HEADING_SAMPLE.length - 1);
84
+ expect(heading).toBe("Second section");
85
+ });
86
+ });
87
+
88
+ describe("countCitations", () => {
89
+ it("counts ref tags, returning at least 1", () => {
90
+ const wikitext = "One<ref>a</ref> two<ref name='b'>c</ref> three<ref>d</ref>";
91
+ expect(countCitations(wikitext)).toBe(3);
92
+ });
93
+
94
+ it("counts combined refs", () => {
95
+ expect(countCitations(SAMPLE)).toBeGreaterThan(0);
96
+ });
97
+
98
+ it("returns 0 when there are no refs", () => {
99
+ expect(countCitations("Plain text with no citations.")).toBe(0);
100
+ });
101
+ });
102
+
103
+ describe("countKeywordMentions", () => {
104
+ it("counts mentions of a single phrase", () => {
105
+ const result = countKeywordMentions("Earth is the third planet. Earth has one moon.", ["Earth"]);
106
+ expect(result.totalMentions).toBe(2);
107
+ expect(result.matchedPhrases).toBe(1);
108
+ });
109
+
110
+ it("counts multiple phrases", () => {
111
+ const result = countKeywordMentions("Apple and orange are fruits. Apple is red.", ["Apple", "orange"]);
112
+ expect(result.totalMentions).toBe(3);
113
+ expect(result.matchedPhrases).toBe(2);
114
+ });
115
+
116
+ it("returns 0 for no matches", () => {
117
+ const result = countKeywordMentions("No relevant content here.", ["pineapple"]);
118
+ expect(result.totalMentions).toBe(0);
119
+ expect(result.matchedPhrases).toBe(0);
120
+ });
121
+ });
122
+
123
+ describe("extractAnchorSnippet", () => {
124
+ it("extracts surrounding context for a keyword", () => {
125
+ const text = "The Earth is the third planet from the Sun and the only known planet to support life.";
126
+ const snippet = extractAnchorSnippet(text, ["Earth"]);
127
+ expect(snippet).toBeTruthy();
128
+ expect(snippet?.length).toBeGreaterThan(0);
129
+ });
130
+
131
+ it("returns null if keyword not found", () => {
132
+ const snippet = extractAnchorSnippet("No relevant content here.", ["Nonexistent"]);
133
+ expect(snippet).toBeNull();
134
+ });
135
+
136
+ it("respects radius parameter", () => {
137
+ const text = `${"A".repeat(100)}TARGET${"B".repeat(100)}`;
138
+ const snippet = extractAnchorSnippet(text, ["TARGET"], 10);
139
+ expect(snippet).toBeTruthy();
140
+ expect(snippet?.length).toBeLessThan(50);
141
+ });
142
+ });
@@ -0,0 +1,75 @@
1
+ import type { DeterministicFact, EvidenceEvent } from "@refract-org/evidence-graph";
2
+
3
+ const CATEGORY_REGEX = /\[\[Category:([^\]|]+)(?:\|[^\]]*)?\]\]/gi;
4
+
5
+ export function extractCategories(wikitext: string): string[] {
6
+ const categories: string[] = [];
7
+ const seen = new Set<string>();
8
+ let match: RegExpExecArray | null;
9
+
10
+ // biome-ignore lint/suspicious/noAssignInExpressions: Standard regex loop pattern
11
+ while ((match = CATEGORY_REGEX.exec(wikitext)) !== null) {
12
+ const name = match[1].trim();
13
+ if (!name) continue;
14
+ const normalized = name.toLowerCase().replace(/_/g, " ");
15
+ if (seen.has(normalized)) continue;
16
+ seen.add(normalized);
17
+ categories.push(normalized);
18
+ }
19
+
20
+ return categories;
21
+ }
22
+
23
+ export function diffCategories(before: string[], after: string[]): { added: string[]; removed: string[] } {
24
+ const beforeSet = new Set(before);
25
+ const afterSet = new Set(after);
26
+
27
+ return {
28
+ added: after.filter((c) => !beforeSet.has(c)),
29
+ removed: before.filter((c) => !afterSet.has(c)),
30
+ };
31
+ }
32
+
33
+ export function buildCategoryEvents(
34
+ beforeWikitext: string,
35
+ afterWikitext: string,
36
+ fromRevId: number,
37
+ toRevId: number,
38
+ timestamp: string,
39
+ extraFacts?: DeterministicFact[],
40
+ ): EvidenceEvent[] {
41
+ const events: EvidenceEvent[] = [];
42
+ const before = extractCategories(beforeWikitext);
43
+ const after = extractCategories(afterWikitext);
44
+ const { added, removed } = diffCategories(before, after);
45
+
46
+ for (const cat of added) {
47
+ events.push({
48
+ eventType: "category_added",
49
+ fromRevisionId: fromRevId,
50
+ toRevisionId: toRevId,
51
+ section: "",
52
+ before: "",
53
+ after: cat,
54
+ deterministicFacts: [{ fact: "category_added", detail: `category=${cat}` }, ...(extraFacts ?? [])],
55
+ layer: "observed",
56
+ timestamp,
57
+ });
58
+ }
59
+
60
+ for (const cat of removed) {
61
+ events.push({
62
+ eventType: "category_removed",
63
+ fromRevisionId: fromRevId,
64
+ toRevisionId: toRevId,
65
+ section: "",
66
+ before: cat,
67
+ after: "",
68
+ deterministicFacts: [{ fact: "category_removed", detail: `category=${cat}` }, ...(extraFacts ?? [])],
69
+ layer: "observed",
70
+ timestamp,
71
+ });
72
+ }
73
+
74
+ return events;
75
+ }
@@ -0,0 +1,226 @@
1
+ import { createHash } from "node:crypto";
2
+ import type { SourceAuthority, SourceLineage, SourceRecord, SourceType } from "@refract-org/evidence-graph";
3
+ import type { CitationChange, CitationRef, CitationTracker } from "./index.js";
4
+
5
+ export const citationTracker: CitationTracker = {
6
+ extractCitations(wikitext: string): CitationRef[] {
7
+ const refs: CitationRef[] = [];
8
+ const seen = new Set<string>();
9
+
10
+ const refRegex = /<ref\b([^>]*?)>(.*?)<\/ref\s*>/gs;
11
+ let match: RegExpExecArray | null;
12
+
13
+ // biome-ignore lint/suspicious/noAssignInExpressions: Standard regex loop pattern
14
+ while ((match = refRegex.exec(wikitext)) !== null) {
15
+ const attrs = match[1];
16
+ const content = match[2].trim();
17
+
18
+ const nameMatch = attrs.match(/name\s*=\s*["']?([^"'\s>]+)/i);
19
+ const urlMatch = content.match(/url\s*=\s*([^\s|}\]]+)/i);
20
+ const titleMatch = content.match(/title\s*=\s*([^|}\]]+?)(?:\s*[|}\]])/i);
21
+
22
+ const raw = match[0];
23
+ const key = nameMatch ? nameMatch[1] : raw;
24
+
25
+ if (seen.has(key)) continue;
26
+ seen.add(key);
27
+
28
+ refs.push({
29
+ refName: nameMatch?.[1],
30
+ url: urlMatch ? urlMatch[1].trim() : undefined,
31
+ title: titleMatch ? titleMatch[1].trim() : undefined,
32
+ raw,
33
+ });
34
+ }
35
+
36
+ const selfClosingRegex = /<ref\b([^>]*?)\/\s*>/g;
37
+ // biome-ignore lint/suspicious/noAssignInExpressions: Standard regex loop pattern
38
+ while ((match = selfClosingRegex.exec(wikitext)) !== null) {
39
+ const attrs = match[1];
40
+ const nameMatch = attrs.match(/name\s*=\s*["']?([^"'\s>]+)/i);
41
+ if (!nameMatch) continue;
42
+
43
+ const key = nameMatch[1];
44
+ if (seen.has(key)) continue;
45
+ seen.add(key);
46
+
47
+ refs.push({
48
+ refName: key,
49
+ raw: match[0],
50
+ });
51
+ }
52
+
53
+ return refs;
54
+ },
55
+
56
+ diffCitations(before: CitationRef[], after: CitationRef[]): CitationChange[] {
57
+ const changes: CitationChange[] = [];
58
+ const beforeMap = indexByKey(before);
59
+ const afterMap = indexByKey(after);
60
+
61
+ for (const [key, afterRef] of afterMap) {
62
+ const beforeRef = beforeMap.get(key);
63
+ if (!beforeRef) {
64
+ changes.push({ type: "added", after: afterRef });
65
+ } else if (beforeRef.raw !== afterRef.raw) {
66
+ changes.push({ type: "replaced", before: beforeRef, after: afterRef });
67
+ } else {
68
+ changes.push({ type: "unchanged", after: afterRef });
69
+ }
70
+ }
71
+
72
+ for (const [key, beforeRef] of beforeMap) {
73
+ if (!afterMap.has(key)) {
74
+ changes.push({ type: "removed", before: beforeRef });
75
+ }
76
+ }
77
+
78
+ return changes;
79
+ },
80
+ };
81
+
82
+ function indexByKey(refs: CitationRef[]): Map<string, CitationRef> {
83
+ const map = new Map<string, CitationRef>();
84
+ for (const ref of refs) {
85
+ const key = ref.refName ?? ref.raw;
86
+ map.set(key, ref);
87
+ }
88
+ return map;
89
+ }
90
+
91
+ export function buildSourceLineage(revisions: { revId: number; timestamp: string; content: string }[]): {
92
+ sources: SourceRecord[];
93
+ lineage: SourceLineage[];
94
+ } {
95
+ const sourceMap = new Map<string, SourceRecord>();
96
+ const replacementMap = new Map<string, { replacedById: string; atRevisionId: number; atTimestamp: string }[]>();
97
+
98
+ function ensureSource(ref: CitationRef, seenAtRevId: number, seenAtTimestamp: string): string {
99
+ const sourceId = buildSourceId(ref);
100
+ if (!sourceMap.has(sourceId)) {
101
+ sourceMap.set(sourceId, {
102
+ sourceId,
103
+ url: ref.url,
104
+ title: ref.title,
105
+ sourceType: classifySourceType(ref),
106
+ authority: classifyAuthority(ref),
107
+ firstSeenRevisionId: seenAtRevId,
108
+ firstSeenAt: seenAtTimestamp,
109
+ claimsReferencing: [],
110
+ });
111
+ }
112
+ return sourceId;
113
+ }
114
+
115
+ const allCitations = revisions.map((r) => citationTracker.extractCitations(r.content));
116
+
117
+ // Seed sources from the first revision
118
+ if (revisions.length > 0) {
119
+ for (const ref of allCitations[0]) {
120
+ ensureSource(ref, revisions[0].revId, revisions[0].timestamp);
121
+ }
122
+ }
123
+
124
+ for (let i = 0; i < revisions.length - 1; i++) {
125
+ const before = revisions[i];
126
+ const after = revisions[i + 1];
127
+
128
+ const beforeRefs = allCitations[i];
129
+ const afterRefs = allCitations[i + 1];
130
+ const changes = citationTracker.diffCitations(beforeRefs, afterRefs);
131
+
132
+ for (const change of changes) {
133
+ if (change.after) {
134
+ const id = ensureSource(change.after, after.revId, after.timestamp);
135
+ if (change.type === "replaced" && change.before) {
136
+ const oldId = ensureSource(change.before, after.revId, after.timestamp);
137
+ const replacements = replacementMap.get(oldId) ?? [];
138
+ replacements.push({
139
+ replacedById: id,
140
+ atRevisionId: after.revId,
141
+ atTimestamp: after.timestamp,
142
+ });
143
+ replacementMap.set(oldId, replacements);
144
+ }
145
+ }
146
+
147
+ if ((change.type === "removed" || change.type === "replaced") && change.before) {
148
+ const sourceId = ensureSource(change.before, before.revId, before.timestamp);
149
+ const record = sourceMap.get(sourceId);
150
+ if (record) {
151
+ record.lastSeenRevisionId = before.revId;
152
+ record.lastSeenAt = before.timestamp;
153
+ }
154
+ }
155
+ }
156
+ }
157
+
158
+ const sources = Array.from(sourceMap.values());
159
+ const lineage: SourceLineage[] = [];
160
+ for (const [sourceId, replacements] of replacementMap) {
161
+ lineage.push({ sourceId, replacements });
162
+ }
163
+
164
+ return { sources, lineage };
165
+ }
166
+
167
+ export function buildSourceId(ref: CitationRef): string {
168
+ if (ref.url) {
169
+ return createHash("sha256").update(ref.url).digest("hex").slice(0, 16);
170
+ }
171
+ if (ref.refName) {
172
+ return createHash("sha256").update(`ref:${ref.refName}`).digest("hex").slice(0, 16);
173
+ }
174
+ return createHash("sha256").update(ref.raw).digest("hex").slice(0, 16);
175
+ }
176
+
177
+ const NEWS_DOMAINS = [
178
+ "cnn.com",
179
+ "nytimes.com",
180
+ "bbc.com",
181
+ "reuters.com",
182
+ "apnews.com",
183
+ "washingtonpost.com",
184
+ "wsj.com",
185
+ "theguardian.com",
186
+ "bloomberg.com",
187
+ "npr.org",
188
+ "thehill.com",
189
+ "politico.com",
190
+ "foxnews.com",
191
+ "nbcnews.com",
192
+ "cbsnews.com",
193
+ "abcnews.net",
194
+ "usatoday.com",
195
+ "latimes.com",
196
+ "chicagotribune.com",
197
+ "huffpost.com",
198
+ "buzzfeednews.com",
199
+ ];
200
+
201
+ function classifySourceType(ref: CitationRef): SourceType {
202
+ const url = ref.url?.toLowerCase() ?? "";
203
+ if (!url) return "unknown";
204
+
205
+ if (url.includes("doi.org") || /journal|jstor|springer|sciencedirect/i.test(url)) {
206
+ return "academic";
207
+ }
208
+ if (url.includes(".gov")) return "government";
209
+ if (url.includes(".edu")) return "secondary";
210
+ if (NEWS_DOMAINS.some((d) => url.includes(d))) return "news";
211
+
212
+ return "unknown";
213
+ }
214
+
215
+ function classifyAuthority(ref: CitationRef): SourceAuthority {
216
+ const url = ref.url?.toLowerCase() ?? "";
217
+ if (!url) return "unrated";
218
+
219
+ if (url.includes("doi.org") || /journal|jstor|springer/i.test(url)) {
220
+ return "medium";
221
+ }
222
+ if (/\.(edu|gov|org)\b/.test(url)) return "high";
223
+ if (/\.(com|net)\b/.test(url)) return "medium";
224
+
225
+ return "unrated";
226
+ }
@@ -0,0 +1,4 @@
1
+ // claim-differ has been removed.
2
+ // Text comparison utilities (stripWikitext, fuzzyFindClaim) moved to wikitext-parser.ts.
3
+ // Semantic direction classification (classifyClaimChange) removed — Refract observes mechanical
4
+ // text appearance/disappearance, not semantic interpretation of direction.
@@ -0,0 +1,78 @@
1
+ import type { EvidenceEvent, Revision } from "@refract-org/evidence-graph";
2
+
3
+ const DEFAULT_WINDOW_MS = 60 * 60 * 1000; // 1 hour
4
+ const DEFAULT_MIN_CLUSTER_SIZE = 3;
5
+
6
+ export interface EditClusterOptions {
7
+ windowMs?: number;
8
+ minClusterSize?: number;
9
+ }
10
+
11
+ export interface EditCluster {
12
+ revisionIds: number[];
13
+ timestamp: string;
14
+ editor?: string;
15
+ section?: string;
16
+ eventCount: number;
17
+ }
18
+
19
+ export function detectEditClusters(revisions: Revision[], options?: EditClusterOptions): EvidenceEvent[] {
20
+ const windowMs = options?.windowMs ?? DEFAULT_WINDOW_MS;
21
+ const minSize = options?.minClusterSize ?? DEFAULT_MIN_CLUSTER_SIZE;
22
+ const events: EvidenceEvent[] = [];
23
+
24
+ if (revisions.length < minSize) return events;
25
+
26
+ const sorted = [...revisions].sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime());
27
+
28
+ const clustered = new Set<number>();
29
+
30
+ for (let i = 0; i < sorted.length; i++) {
31
+ if (clustered.has(i)) continue;
32
+
33
+ const windowStart = new Date(sorted[i].timestamp).getTime();
34
+ const windowEnd = windowStart + windowMs;
35
+ const cluster: number[] = [];
36
+
37
+ for (let j = i; j < sorted.length; j++) {
38
+ const t = new Date(sorted[j].timestamp).getTime();
39
+ if (t <= windowEnd) {
40
+ cluster.push(j);
41
+ } else {
42
+ break;
43
+ }
44
+ }
45
+
46
+ if (cluster.length >= minSize) {
47
+ const hasSingleEditor = singleEditorCluster(sorted, cluster);
48
+ const _revIds = cluster.map((idx) => sorted[idx].revId);
49
+
50
+ for (const idx of cluster) clustered.add(idx);
51
+
52
+ events.push({
53
+ eventType: "edit_cluster_detected",
54
+ fromRevisionId: sorted[cluster[0]].revId,
55
+ toRevisionId: sorted[cluster[cluster.length - 1]].revId,
56
+ section: "",
57
+ before: "",
58
+ after: "",
59
+ deterministicFacts: [
60
+ {
61
+ fact: "edit_cluster",
62
+ detail: `revisions=${cluster.length} window_ms=${windowMs} single_editor=${hasSingleEditor}`,
63
+ },
64
+ ],
65
+ layer: "observed",
66
+ timestamp: sorted[cluster[0]].timestamp,
67
+ });
68
+ }
69
+ }
70
+
71
+ return events;
72
+ }
73
+
74
+ function singleEditorCluster(revisions: Revision[], indices: number[]): boolean {
75
+ const firstUser = revisions[indices[0]].user;
76
+ if (!firstUser) return false;
77
+ return indices.every((i) => revisions[i].user === firstUser);
78
+ }
@@ -0,0 +1,59 @@
1
+ export type HeuristicKind =
2
+ | "revert"
3
+ | "vandalism"
4
+ | "major_addition"
5
+ | "major_removal"
6
+ | "sourcing"
7
+ | "cosmetic"
8
+ | "minor"
9
+ | "unknown";
10
+
11
+ export interface HeuristicOptions {
12
+ majorAdditionThreshold?: number;
13
+ majorRemovalThreshold?: number;
14
+ cosmeticThreshold?: number;
15
+ minorThreshold?: number;
16
+ }
17
+
18
+ const VANDALISM_PATTERNS = /\b(vandal|vandalism|spam|blanking|test edit)\b/i;
19
+ const SOURCING_PATTERNS = /\b(cite|ref|source|reference|citation|add ref|rm ref)\b/i;
20
+ const REVERT_PATTERNS = /\b(rv|revert|reverted|undo|undid|rollback|rvv)\b/i;
21
+
22
+ const DEFAULT_MAJOR_ADDITION = 2000;
23
+ const DEFAULT_MAJOR_REMOVAL = -2000;
24
+ const DEFAULT_COSMETIC = 20;
25
+ const DEFAULT_MINOR = 100;
26
+
27
+ export function classifyHeuristic(comment: string, sizeDelta: number, options?: HeuristicOptions): HeuristicKind {
28
+ const norm = comment.toLowerCase().trim();
29
+
30
+ if (REVERT_PATTERNS.test(norm)) {
31
+ return "revert";
32
+ }
33
+
34
+ if (VANDALISM_PATTERNS.test(norm)) {
35
+ return "vandalism";
36
+ }
37
+
38
+ if (SOURCING_PATTERNS.test(norm)) {
39
+ return "sourcing";
40
+ }
41
+
42
+ if (sizeDelta > (options?.majorAdditionThreshold ?? DEFAULT_MAJOR_ADDITION)) {
43
+ return "major_addition";
44
+ }
45
+
46
+ if (sizeDelta < (options?.majorRemovalThreshold ?? DEFAULT_MAJOR_REMOVAL)) {
47
+ return "major_removal";
48
+ }
49
+
50
+ if (Math.abs(sizeDelta) < (options?.cosmeticThreshold ?? DEFAULT_COSMETIC) && !norm) {
51
+ return "cosmetic";
52
+ }
53
+
54
+ if (Math.abs(sizeDelta) < (options?.minorThreshold ?? DEFAULT_MINOR)) {
55
+ return "minor";
56
+ }
57
+
58
+ return "unknown";
59
+ }
package/src/index.ts ADDED
@@ -0,0 +1,88 @@
1
+ import type { Revision, Section, SectionChange } from "@refract-org/evidence-graph";
2
+
3
+ export interface SectionDiffer {
4
+ extractSections(wikitext: string): Section[];
5
+ diffSections(before: Section[], after: Section[]): SectionChange[];
6
+ }
7
+
8
+ export interface CitationTracker {
9
+ extractCitations(wikitext: string): CitationRef[];
10
+ diffCitations(before: CitationRef[], after: CitationRef[]): CitationChange[];
11
+ }
12
+
13
+ export interface CitationRef {
14
+ url?: string;
15
+ title?: string;
16
+ refName?: string;
17
+ raw: string;
18
+ }
19
+
20
+ export interface CitationChange {
21
+ type: "added" | "removed" | "replaced" | "unchanged";
22
+ before?: CitationRef;
23
+ after?: CitationRef;
24
+ }
25
+
26
+ export interface RevertDetector {
27
+ isRevert(comment: string): boolean;
28
+ detectRevertChain(revisions: Revision[]): RevertChain[];
29
+ }
30
+
31
+ export interface RevertChain {
32
+ startRevisionId: number;
33
+ endRevisionId: number;
34
+ revertedToRevisionId: number;
35
+ participants: number;
36
+ }
37
+
38
+ export interface TemplateTracker {
39
+ extractTemplates(wikitext: string): Template[];
40
+ diffTemplates(before: Template[], after: Template[]): TemplateChange[];
41
+ }
42
+
43
+ export interface Template {
44
+ name: string; // e.g., "Citation needed", "NPOV", "BLP"
45
+ type: TemplateType;
46
+ params?: Record<string, string>;
47
+ }
48
+
49
+ export type TemplateType = "citation" | "neutrality" | "blp" | "dispute" | "cleanup" | "protection" | "other";
50
+
51
+ export interface TemplateChange {
52
+ type: "added" | "removed" | "unchanged";
53
+ template: Template;
54
+ }
55
+
56
+ export { buildCategoryEvents, diffCategories, extractCategories } from "./category-tracker.js";
57
+ export { buildSourceId, buildSourceLineage, citationTracker } from "./citation-tracker.js";
58
+ export type { EditClusterOptions } from "./edit-cluster-detector.js";
59
+ export { detectEditClusters } from "./edit-cluster-detector.js";
60
+ export type { HeuristicKind, HeuristicOptions } from "./heuristic-classifier.js";
61
+ export { classifyHeuristic } from "./heuristic-classifier.js";
62
+ export type { ObservationDiff } from "./observation-differ.js";
63
+ export { diffObservations } from "./observation-differ.js";
64
+ export { buildPageMoveEvents } from "./page-move-detector.js";
65
+ export type { ProtectionChange, ProtectionTracker } from "./protection-tracker.js";
66
+ export { protectionTracker } from "./protection-tracker.js";
67
+ export { revertDetector } from "./revert-detector.js";
68
+ export type { SectionEvent, SectionLineage } from "./section-differ.js";
69
+ export { buildSectionLineage, sectionDiffer } from "./section-differ.js";
70
+ export type { TalkActivityOptions, TalkActivityResult } from "./talk-activity-detector.js";
71
+ export { detectTalkActivitySpikes } from "./talk-activity-detector.js";
72
+ export type { TalkCorrelationOptions } from "./talk-correlator.js";
73
+ export { correlateTalkRevisions } from "./talk-correlator.js";
74
+ export type { TalkReply, TalkThread, TalkThreadChange } from "./talk-section-parser.js";
75
+ export { buildTalkThreadEvents, diffTalkThreads, parseTalkThreads } from "./talk-section-parser.js";
76
+ export type { ParamChange } from "./template-tracker.js";
77
+ export { buildParamChangeEvents, diffTemplateParams, templateTracker } from "./template-tracker.js";
78
+ export { buildWikilinkEvents, diffWikilinks, extractWikilinks } from "./wikilink-extractor.js";
79
+ export type { HeadingPosition } from "./wikitext-parser.js";
80
+ export {
81
+ countCitations,
82
+ countKeywordMentions,
83
+ deriveSectionHeading,
84
+ extractAnchorSnippet,
85
+ extractHeadingMap,
86
+ sanitizeWikitext,
87
+ stripWikitext,
88
+ } from "./wikitext-parser.js";