@refract-org/analyzers 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/README.md +38 -0
  2. package/dist/src/category-tracker.d.ts +8 -0
  3. package/dist/src/category-tracker.d.ts.map +1 -0
  4. package/dist/src/category-tracker.js +60 -0
  5. package/dist/src/category-tracker.js.map +1 -0
  6. package/dist/src/citation-tracker.d.ts +13 -0
  7. package/dist/src/citation-tracker.d.ts.map +1 -0
  8. package/dist/src/citation-tracker.js +200 -0
  9. package/dist/src/citation-tracker.js.map +1 -0
  10. package/dist/src/claim-differ.d.ts +2 -0
  11. package/dist/src/claim-differ.d.ts.map +1 -0
  12. package/dist/src/claim-differ.js +6 -0
  13. package/dist/src/claim-differ.js.map +1 -0
  14. package/dist/src/edit-cluster-detector.d.ts +14 -0
  15. package/dist/src/edit-cluster-detector.d.ts.map +1 -0
  16. package/dist/src/edit-cluster-detector.js +57 -0
  17. package/dist/src/edit-cluster-detector.js.map +1 -0
  18. package/dist/src/heuristic-classifier.d.ts +9 -0
  19. package/dist/src/heuristic-classifier.d.ts.map +1 -0
  20. package/dist/src/heuristic-classifier.js +33 -0
  21. package/dist/src/heuristic-classifier.js.map +1 -0
  22. package/dist/src/index.d.ts +70 -0
  23. package/dist/src/index.d.ts.map +1 -0
  24. package/dist/src/index.js +16 -0
  25. package/dist/src/index.js.map +1 -0
  26. package/dist/src/observation-differ.d.ts +8 -0
  27. package/dist/src/observation-differ.d.ts.map +1 -0
  28. package/dist/src/observation-differ.js +16 -0
  29. package/dist/src/observation-differ.js.map +1 -0
  30. package/dist/src/page-move-detector.d.ts +11 -0
  31. package/dist/src/page-move-detector.d.ts.map +1 -0
  32. package/dist/src/page-move-detector.js +21 -0
  33. package/dist/src/page-move-detector.js.map +1 -0
  34. package/dist/src/protection-tracker.d.ts +23 -0
  35. package/dist/src/protection-tracker.d.ts.map +1 -0
  36. package/dist/src/protection-tracker.js +74 -0
  37. package/dist/src/protection-tracker.js.map +1 -0
  38. package/dist/src/revert-detector.d.ts +3 -0
  39. package/dist/src/revert-detector.d.ts.map +1 -0
  40. package/dist/src/revert-detector.js +43 -0
  41. package/dist/src/revert-detector.js.map +1 -0
  42. package/dist/src/section-differ.d.ts +26 -0
  43. package/dist/src/section-differ.d.ts.map +1 -0
  44. package/dist/src/section-differ.js +268 -0
  45. package/dist/src/section-differ.js.map +1 -0
  46. package/dist/src/talk-activity-detector.d.ts +16 -0
  47. package/dist/src/talk-activity-detector.d.ts.map +1 -0
  48. package/dist/src/talk-activity-detector.js +76 -0
  49. package/dist/src/talk-activity-detector.js.map +1 -0
  50. package/dist/src/talk-correlator.d.ts +7 -0
  51. package/dist/src/talk-correlator.d.ts.map +1 -0
  52. package/dist/src/talk-correlator.js +53 -0
  53. package/dist/src/talk-correlator.js.map +1 -0
  54. package/dist/src/talk-section-parser.d.ts +22 -0
  55. package/dist/src/talk-section-parser.d.ts.map +1 -0
  56. package/dist/src/talk-section-parser.js +109 -0
  57. package/dist/src/talk-section-parser.js.map +1 -0
  58. package/dist/src/template-tracker.d.ts +12 -0
  59. package/dist/src/template-tracker.d.ts.map +1 -0
  60. package/dist/src/template-tracker.js +225 -0
  61. package/dist/src/template-tracker.js.map +1 -0
  62. package/dist/src/wikilink-extractor.d.ts +8 -0
  63. package/dist/src/wikilink-extractor.d.ts.map +1 -0
  64. package/dist/src/wikilink-extractor.js +81 -0
  65. package/dist/src/wikilink-extractor.js.map +1 -0
  66. package/dist/src/wikitext-parser.d.ts +15 -0
  67. package/dist/src/wikitext-parser.d.ts.map +1 -0
  68. package/dist/src/wikitext-parser.js +85 -0
  69. package/dist/src/wikitext-parser.js.map +1 -0
  70. package/dist/tsconfig 2.tsbuildinfo +1 -0
  71. package/dist/tsconfig.tsbuildinfo +1 -0
  72. package/package.json +28 -0
  73. package/src/__tests__/category-tracker.test.ts +79 -0
  74. package/src/__tests__/citation-tracker.test.ts +185 -0
  75. package/src/__tests__/edit-cluster-detector.test.ts +79 -0
  76. package/src/__tests__/heuristic-classifier.test.ts +67 -0
  77. package/src/__tests__/observation-differ.test.ts +58 -0
  78. package/src/__tests__/page-move-detector.test.ts +64 -0
  79. package/src/__tests__/protection-tracker.test.ts +72 -0
  80. package/src/__tests__/revert-detector.test.ts +76 -0
  81. package/src/__tests__/section-differ.test.ts +120 -0
  82. package/src/__tests__/talk-activity-detector.test.ts +112 -0
  83. package/src/__tests__/talk-correlator.test.ts +71 -0
  84. package/src/__tests__/talk-section-parser.test.ts +105 -0
  85. package/src/__tests__/template-tracker.test.ts +159 -0
  86. package/src/__tests__/wikilink-extractor.test.ts +101 -0
  87. package/src/__tests__/wikitext-parser.test.ts +142 -0
  88. package/src/category-tracker.ts +75 -0
  89. package/src/citation-tracker.ts +226 -0
  90. package/src/claim-differ.ts +4 -0
  91. package/src/edit-cluster-detector.ts +78 -0
  92. package/src/heuristic-classifier.ts +59 -0
  93. package/src/index.ts +88 -0
  94. package/src/observation-differ.ts +26 -0
  95. package/src/page-move-detector.ts +32 -0
  96. package/src/protection-tracker.ts +103 -0
  97. package/src/revert-detector.ts +51 -0
  98. package/src/section-differ.ts +315 -0
  99. package/src/talk-activity-detector.ts +105 -0
  100. package/src/talk-correlator.ts +70 -0
  101. package/src/talk-section-parser.ts +151 -0
  102. package/src/template-tracker.ts +253 -0
  103. package/src/wikilink-extractor.ts +100 -0
  104. package/src/wikitext-parser.ts +92 -0
@@ -0,0 +1,151 @@
1
+ import type { EvidenceEvent } from "@refract-org/evidence-graph";
2
+
3
+ export interface TalkReply {
4
+ depth: number;
5
+ text: string;
6
+ author?: string;
7
+ timestamp?: string;
8
+ }
9
+
10
+ export interface TalkThread {
11
+ heading: string;
12
+ startedAt?: string;
13
+ replies: TalkReply[];
14
+ participants: string[];
15
+ isResolved: boolean;
16
+ }
17
+
18
+ export interface TalkThreadChange {
19
+ type: "opened" | "archived" | "reply_added" | "unchanged";
20
+ thread: TalkThread;
21
+ }
22
+
23
+ const RESOLVED_PATTERN = /\{\{(resolved|done|closed|archived)\}\}/i;
24
+
25
+ function extractSignatures(text: string): { author?: string; timestamp?: string } {
26
+ const userMatch = text.match(/\[\[[Uu]ser:([^\]|]+)/);
27
+ const tsMatch = text.match(/(\d{1,2}:\d{2},\s+\d{1,2}\s+\w+\s+\d{4})/);
28
+ return {
29
+ author: userMatch?.[1],
30
+ timestamp: tsMatch?.[1],
31
+ };
32
+ }
33
+
34
+ export function parseTalkThreads(wikitext: string): TalkThread[] {
35
+ const threads: TalkThread[] = [];
36
+ const headerRegex = /^(={1,3})\s*([^=]+?)\s*\1\s*$/m;
37
+ const lines = wikitext.split("\n");
38
+ let currentThread: TalkThread | null = null;
39
+
40
+ for (const line of lines) {
41
+ const headerMatch = headerRegex.exec(line);
42
+ if (headerMatch) {
43
+ if (currentThread) {
44
+ threads.push(currentThread);
45
+ }
46
+ currentThread = {
47
+ heading: headerMatch[2].trim(),
48
+ startedAt: undefined,
49
+ replies: [],
50
+ participants: [],
51
+ isResolved: false,
52
+ };
53
+ continue;
54
+ }
55
+
56
+ if (!currentThread) continue;
57
+
58
+ if (line.trim()) {
59
+ if (RESOLVED_PATTERN.test(line)) {
60
+ currentThread.isResolved = true;
61
+ }
62
+
63
+ const indent = line.search(/\S/);
64
+ const depth = indent > 0 ? Math.ceil(indent / 2) + 1 : 1;
65
+ const { author, timestamp } = extractSignatures(line);
66
+ const text = line.replace(/^[:*#]+\s*/, "").trim();
67
+
68
+ currentThread.replies.push({ depth, text, author, timestamp });
69
+ if (author && !currentThread.participants.includes(author)) {
70
+ currentThread.participants.push(author);
71
+ }
72
+ if (timestamp && !currentThread.startedAt) {
73
+ currentThread.startedAt = timestamp;
74
+ }
75
+ }
76
+ }
77
+
78
+ if (currentThread) {
79
+ threads.push(currentThread);
80
+ }
81
+
82
+ return threads;
83
+ }
84
+
85
+ export function diffTalkThreads(before: TalkThread[], after: TalkThread[]): TalkThreadChange[] {
86
+ const changes: TalkThreadChange[] = [];
87
+ const beforeMap = new Map(before.map((t) => [t.heading.toLowerCase(), t]));
88
+ const afterMap = new Map(after.map((t) => [t.heading.toLowerCase(), t]));
89
+
90
+ for (const [key, thread] of afterMap) {
91
+ const prev = beforeMap.get(key);
92
+ if (!prev) {
93
+ changes.push({ type: "opened", thread });
94
+ } else if (thread.replies.length > prev.replies.length) {
95
+ changes.push({ type: "reply_added", thread });
96
+ } else {
97
+ changes.push({ type: "unchanged", thread });
98
+ }
99
+ }
100
+
101
+ for (const [key, thread] of beforeMap) {
102
+ if (!afterMap.has(key)) {
103
+ changes.push({ type: "archived", thread });
104
+ }
105
+ }
106
+
107
+ return changes;
108
+ }
109
+
110
+ export function buildTalkThreadEvents(
111
+ beforeWikitext: string,
112
+ afterWikitext: string,
113
+ fromRevId: number,
114
+ toRevId: number,
115
+ timestamp: string,
116
+ ): EvidenceEvent[] {
117
+ const before = parseTalkThreads(beforeWikitext);
118
+ const after = parseTalkThreads(afterWikitext);
119
+ const changes = diffTalkThreads(before, after);
120
+ const events: EvidenceEvent[] = [];
121
+
122
+ for (const change of changes) {
123
+ if (change.type === "unchanged") continue;
124
+
125
+ const eventType =
126
+ change.type === "opened"
127
+ ? "talk_thread_opened"
128
+ : change.type === "archived"
129
+ ? "talk_thread_archived"
130
+ : "talk_reply_added";
131
+
132
+ events.push({
133
+ eventType: eventType as EvidenceEvent["eventType"],
134
+ fromRevisionId: fromRevId,
135
+ toRevisionId: toRevId,
136
+ section: change.thread.heading || "(unknown)",
137
+ before: "",
138
+ after: change.thread.heading,
139
+ deterministicFacts: [
140
+ {
141
+ fact: `talk_thread_${change.type}`,
142
+ detail: `heading="${change.thread.heading}" participants=${change.thread.participants.length} replies=${change.thread.replies.length} resolved=${change.thread.isResolved}`,
143
+ },
144
+ ],
145
+ layer: "observed",
146
+ timestamp,
147
+ });
148
+ }
149
+
150
+ return events;
151
+ }
@@ -0,0 +1,253 @@
1
+ import type { EvidenceEvent } from "@refract-org/evidence-graph";
2
+ import type { Template, TemplateChange, TemplateTracker, TemplateType } from "./index.js";
3
+
4
+ const TEMPLATE_TYPE_MAP: Record<string, TemplateType> = {
5
+ "citation needed": "citation",
6
+ cn: "citation",
7
+ citation: "citation",
8
+ cite: "citation",
9
+ fact: "citation",
10
+ unreferenced: "citation",
11
+ refimprove: "citation",
12
+ "need citation": "citation",
13
+ "primary sources": "citation",
14
+ npov: "neutrality",
15
+ pov: "neutrality",
16
+ undue: "neutrality",
17
+ blp: "blp",
18
+ "blp sources": "blp",
19
+ "living persons": "blp",
20
+ disputed: "dispute",
21
+ dispute: "dispute",
22
+ contradict: "dispute",
23
+ inconsistent: "dispute",
24
+ cleanup: "cleanup",
25
+ "copy edit": "cleanup",
26
+ tone: "cleanup",
27
+ wikify: "cleanup",
28
+ merge: "cleanup",
29
+ split: "cleanup",
30
+ pp: "protection",
31
+ protected: "protection",
32
+ "pp-protected": "protection",
33
+ "semi-protected": "protection",
34
+ };
35
+
36
+ export const templateTracker: TemplateTracker = {
37
+ extractTemplates(wikitext: string): Template[] {
38
+ const templates: Template[] = [];
39
+ const seen = new Set<string>();
40
+ let i = 0;
41
+ let depth = 0;
42
+ let start = -1;
43
+
44
+ while (i < wikitext.length) {
45
+ if (wikitext[i] === "{" && wikitext[i + 1] === "{") {
46
+ if (depth === 0) {
47
+ start = i;
48
+ }
49
+ depth++;
50
+ i += 2;
51
+ continue;
52
+ }
53
+ if (wikitext[i] === "}" && wikitext[i + 1] === "}") {
54
+ depth--;
55
+ if (depth === 0 && start >= 0) {
56
+ const raw = wikitext.slice(start, i + 2);
57
+ const inner = raw.slice(2, -2).trim();
58
+ const firstBar = inner.indexOf("|");
59
+ const firstName =
60
+ firstBar >= 0
61
+ ? inner.slice(0, firstBar).trim()
62
+ : (() => {
63
+ const nl = inner.indexOf("\n");
64
+ return nl >= 0 ? inner.slice(0, nl).trim() : inner.trim();
65
+ })();
66
+ const name = firstName.toLowerCase().replace(/\s+/g, " ");
67
+
68
+ const key = name;
69
+ if (!seen.has(key)) {
70
+ seen.add(key);
71
+ const params = firstBar >= 0 ? parseParams(inner.slice(firstBar + 1)) : undefined;
72
+ templates.push({
73
+ name: firstName,
74
+ type: classifyTemplate(name),
75
+ params,
76
+ });
77
+ }
78
+ }
79
+ i += 2;
80
+ continue;
81
+ }
82
+ i++;
83
+ }
84
+
85
+ return templates;
86
+ },
87
+
88
+ diffTemplates(before: Template[], after: Template[]): TemplateChange[] {
89
+ const changes: TemplateChange[] = [];
90
+ const beforeMap = new Map<string, Template>();
91
+ const afterMap = new Map<string, Template>();
92
+
93
+ for (const t of before) beforeMap.set(t.name.toLowerCase(), t);
94
+ for (const t of after) afterMap.set(t.name.toLowerCase(), t);
95
+
96
+ for (const [name, t] of afterMap) {
97
+ if (!beforeMap.has(name)) {
98
+ changes.push({ type: "added", template: t });
99
+ } else {
100
+ changes.push({ type: "unchanged", template: t });
101
+ }
102
+ }
103
+
104
+ for (const [name, t] of beforeMap) {
105
+ if (!afterMap.has(name)) {
106
+ changes.push({ type: "removed", template: t });
107
+ }
108
+ }
109
+
110
+ return changes;
111
+ },
112
+ };
113
+
114
+ function classifyTemplate(name: string): TemplateType {
115
+ return TEMPLATE_TYPE_MAP[name] ?? "other";
116
+ }
117
+
118
+ function parseParams(raw: string): Record<string, string> {
119
+ const params: Record<string, string> = {};
120
+ const parts = splitParams(raw);
121
+ let unnamedIndex = 1;
122
+
123
+ for (const part of parts) {
124
+ const eqIndex = part.indexOf("=");
125
+ if (eqIndex >= 0) {
126
+ params[part.slice(0, eqIndex).trim()] = part.slice(eqIndex + 1).trim();
127
+ } else {
128
+ params[String(unnamedIndex)] = part.trim();
129
+ unnamedIndex++;
130
+ }
131
+ }
132
+
133
+ return params;
134
+ }
135
+
136
+ function splitParams(raw: string): string[] {
137
+ const parts: string[] = [];
138
+ let current = "";
139
+ let braceDepth = 0;
140
+ let linkDepth = 0;
141
+
142
+ for (let i = 0; i < raw.length; i++) {
143
+ const ch = raw[i];
144
+ const next = raw[i + 1];
145
+
146
+ if (ch === "{" && next === "{") {
147
+ braceDepth++;
148
+ current += "{{";
149
+ i++;
150
+ } else if (ch === "}" && next === "}" && braceDepth > 0) {
151
+ braceDepth--;
152
+ current += "}}";
153
+ i++;
154
+ } else if (ch === "[" && next === "[" && linkDepth === 0) {
155
+ linkDepth++;
156
+ current += "[[";
157
+ i++;
158
+ } else if (ch === "]" && next === "]" && linkDepth > 0) {
159
+ linkDepth--;
160
+ current += "]]";
161
+ i++;
162
+ } else if (ch === "|" && braceDepth === 0 && linkDepth === 0) {
163
+ parts.push(current.trim());
164
+ current = "";
165
+ } else {
166
+ current += ch;
167
+ }
168
+ }
169
+
170
+ if (current.trim()) {
171
+ parts.push(current.trim());
172
+ }
173
+
174
+ return parts;
175
+ }
176
+
177
+ export interface ParamChange {
178
+ templateName: string;
179
+ paramName: string;
180
+ oldValue?: string;
181
+ newValue?: string;
182
+ }
183
+
184
+ export function diffTemplateParams(before: Template[], after: Template[]): ParamChange[] {
185
+ const changes: ParamChange[] = [];
186
+ const beforeMap = new Map<string, Template>();
187
+ const afterMap = new Map<string, Template>();
188
+
189
+ for (const t of before) beforeMap.set(t.name.toLowerCase(), t);
190
+ for (const t of after) afterMap.set(t.name.toLowerCase(), t);
191
+
192
+ for (const [name, afterTmpl] of afterMap) {
193
+ const beforeTmpl = beforeMap.get(name);
194
+ if (!beforeTmpl || (!beforeTmpl.params && !afterTmpl.params)) continue;
195
+ if (!beforeTmpl.params || !afterTmpl.params) continue;
196
+
197
+ const beforeParams = normalizeParams(beforeTmpl.params);
198
+ const afterParams = normalizeParams(afterTmpl.params);
199
+ const allKeys = new Set([...Object.keys(beforeParams), ...Object.keys(afterParams)]);
200
+
201
+ for (const key of allKeys) {
202
+ const oldVal = beforeParams[key];
203
+ const newVal = afterParams[key];
204
+
205
+ if (oldVal === undefined && newVal !== undefined) {
206
+ changes.push({ templateName: afterTmpl.name, paramName: key, newValue: newVal });
207
+ } else if (oldVal !== undefined && newVal === undefined) {
208
+ changes.push({ templateName: afterTmpl.name, paramName: key, oldValue: oldVal });
209
+ } else if (oldVal !== newVal) {
210
+ changes.push({ templateName: afterTmpl.name, paramName: key, oldValue: oldVal, newValue: newVal });
211
+ }
212
+ }
213
+ }
214
+
215
+ return changes;
216
+ }
217
+
218
+ export function buildParamChangeEvents(
219
+ beforeTemplates: Template[],
220
+ afterTemplates: Template[],
221
+ fromRevId: number,
222
+ toRevId: number,
223
+ timestamp: string,
224
+ ): EvidenceEvent[] {
225
+ const changes = diffTemplateParams(beforeTemplates, afterTemplates);
226
+
227
+ return changes.map((c) => ({
228
+ eventType: "template_parameter_changed" as EvidenceEvent["eventType"],
229
+ fromRevisionId: fromRevId,
230
+ toRevisionId: toRevId,
231
+ section: "body",
232
+ before: c.oldValue ?? "",
233
+ after: c.newValue ?? "",
234
+ deterministicFacts: [
235
+ {
236
+ fact: "template_parameter_changed",
237
+ detail: `template=${c.templateName} param=${c.paramName}${c.oldValue !== undefined ? ` old=${c.oldValue.slice(0, 100)}` : ""}${c.newValue !== undefined ? ` new=${c.newValue.slice(0, 100)}` : ""}`,
238
+ },
239
+ ],
240
+ layer: "observed",
241
+ timestamp,
242
+ }));
243
+ }
244
+
245
+ function normalizeParams(params: Record<string, string>): Record<string, string | undefined> {
246
+ const normalized: Record<string, string | undefined> = {};
247
+ for (const [key, val] of Object.entries(params)) {
248
+ const nk = key.toLowerCase().trim();
249
+ if (val === "") continue;
250
+ normalized[nk] = val;
251
+ }
252
+ return normalized;
253
+ }
@@ -0,0 +1,100 @@
1
+ import type { DeterministicFact, EvidenceEvent } from "@refract-org/evidence-graph";
2
+
3
+ const WIKILINK_REGEX = /\[\[([^\]]+?)(?:\|([^\]]*))?\]\]/g;
4
+
5
+ const EXCLUDED_PREFIXES = [
6
+ "File:",
7
+ "Image:",
8
+ "Category:",
9
+ "wikipedia:",
10
+ "w:",
11
+ "mediawiki:",
12
+ "mw:",
13
+ "wiktionary:",
14
+ "wikt:",
15
+ "commons:",
16
+ "c:",
17
+ "d:",
18
+ "n:",
19
+ "q:",
20
+ "s:",
21
+ "v:",
22
+ "voy:",
23
+ "b:",
24
+ ];
25
+
26
+ export function extractWikilinks(wikitext: string): string[] {
27
+ const links: string[] = [];
28
+ const seen = new Set<string>();
29
+ let match: RegExpExecArray | null;
30
+
31
+ // biome-ignore lint/suspicious/noAssignInExpressions: Standard regex loop pattern
32
+ while ((match = WIKILINK_REGEX.exec(wikitext)) !== null) {
33
+ const target = match[1].trim();
34
+
35
+ const isExcluded = EXCLUDED_PREFIXES.some((prefix) => target.toLowerCase().startsWith(prefix.toLowerCase()));
36
+ if (isExcluded) continue;
37
+
38
+ const normalized = target.toLowerCase().replace(/_/g, " ");
39
+ if (seen.has(normalized)) continue;
40
+ seen.add(normalized);
41
+ links.push(normalized);
42
+ }
43
+
44
+ return links;
45
+ }
46
+
47
+ export function diffWikilinks(before: string[], after: string[]): { added: string[]; removed: string[] } {
48
+ const beforeSet = new Set(before);
49
+ const afterSet = new Set(after);
50
+
51
+ return {
52
+ added: after.filter((w) => !beforeSet.has(w)),
53
+ removed: before.filter((w) => !afterSet.has(w)),
54
+ };
55
+ }
56
+
57
+ export function buildWikilinkEvents(
58
+ beforeWikitext: string,
59
+ afterWikitext: string,
60
+ fromRevId: number,
61
+ toRevId: number,
62
+ section: string,
63
+ timestamp: string,
64
+ extraFacts?: DeterministicFact[],
65
+ ): EvidenceEvent[] {
66
+ const events: EvidenceEvent[] = [];
67
+ const before = extractWikilinks(beforeWikitext);
68
+ const after = extractWikilinks(afterWikitext);
69
+ const { added, removed } = diffWikilinks(before, after);
70
+
71
+ for (const link of added) {
72
+ events.push({
73
+ eventType: "wikilink_added",
74
+ fromRevisionId: fromRevId,
75
+ toRevisionId: toRevId,
76
+ section,
77
+ before: "",
78
+ after: link,
79
+ deterministicFacts: [{ fact: "wikilink_added", detail: `target=${link}` }, ...(extraFacts ?? [])],
80
+ layer: "observed",
81
+ timestamp,
82
+ });
83
+ }
84
+
85
+ for (const link of removed) {
86
+ events.push({
87
+ eventType: "wikilink_removed",
88
+ fromRevisionId: fromRevId,
89
+ toRevisionId: toRevId,
90
+ section,
91
+ before: link,
92
+ after: "",
93
+ deterministicFacts: [{ fact: "wikilink_removed", detail: `target=${link}` }, ...(extraFacts ?? [])],
94
+ layer: "observed",
95
+ timestamp,
96
+ });
97
+ }
98
+
99
+ return events;
100
+ }
@@ -0,0 +1,92 @@
1
+ export function sanitizeWikitext(value: string): string {
2
+ return value
3
+ .replace(/<!--[\s\S]*?-->/g, " ")
4
+ .replace(/<ref[^>/]*?>[\s\S]*?<\/ref>/gi, " ")
5
+ .replace(/<ref[^>]*/gi, (match) => (match.endsWith("/>") ? " " : match))
6
+ .replace(/<ref[^>]*\/>/gi, " ")
7
+ .replace(/\{\{[^}]*\}\}/g, " ")
8
+ .replace(/\[\[(?:[^|\]]*\|)?([^\]]+)\]\]/g, "$1")
9
+ .replace(/\[https?:\/\/[^\s\]]+\s*([^\]]*)\]/g, "$1")
10
+ .replace(/'''/g, "")
11
+ .replace(/''/g, "")
12
+ .replace(/\s+/g, " ")
13
+ .trim();
14
+ }
15
+
16
+ export function stripWikitext(wikitext: string): string {
17
+ let text = wikitext;
18
+ text = text.replace(/<!--[\s\S]*?-->/g, "");
19
+ text = text.replace(/<ref\b[^>]*\/\s*>/gi, "");
20
+ text = text.replace(/<ref\b[^>]*>[\s\S]*?<\/ref\s*>/gi, "");
21
+ text = text.replace(/<[^>]+>/g, "");
22
+ text = text.replace(/\{\{[^{}]*?\}\}/g, "");
23
+ text = text.replace(/'''(.+?)'''/g, "$1");
24
+ text = text.replace(/''(.+?)''/g, "$1");
25
+ text = text.replace(/\[\[([^\]|]+?)\]\]/g, "$1");
26
+ text = text.replace(/\[\[[^\]]+?\|([^\]]+?)\]\]/g, "$1");
27
+ text = text.replace(/\n{3,}/g, "\n\n");
28
+ return text.trim();
29
+ }
30
+
31
+ export interface HeadingPosition {
32
+ position: number;
33
+ heading: string;
34
+ }
35
+
36
+ export function extractHeadingMap(wikitext: string): HeadingPosition[] {
37
+ return Array.from(wikitext.matchAll(/^==+\s*(.*?)\s*==+\s*$/gm)).map((match) => ({
38
+ position: match.index ?? 0,
39
+ heading: match[1]?.trim() ?? "",
40
+ }));
41
+ }
42
+
43
+ export function deriveSectionHeading(wikitext: string, position: number): string | null {
44
+ let selected: string | null = null;
45
+ for (const heading of extractHeadingMap(wikitext)) {
46
+ if (heading.position > position) break;
47
+ selected = heading.heading;
48
+ }
49
+ return selected;
50
+ }
51
+
52
+ export function countCitations(wikitext: string): number {
53
+ return Array.from(wikitext.matchAll(/<ref\b/gi)).length;
54
+ }
55
+
56
+ export function countKeywordMentions(
57
+ wikitext: string,
58
+ phrases: string[],
59
+ ): { totalMentions: number; matchedPhrases: number } {
60
+ const lowered = wikitext.toLowerCase();
61
+ let totalMentions = 0;
62
+ let matchedPhrases = 0;
63
+ for (const phrase of phrases) {
64
+ const normalized = phrase.trim().toLowerCase();
65
+ if (!normalized) continue;
66
+ let count = 0;
67
+ let fromIndex = 0;
68
+ while (fromIndex < lowered.length) {
69
+ const idx = lowered.indexOf(normalized, fromIndex);
70
+ if (idx === -1) break;
71
+ count++;
72
+ fromIndex = idx + normalized.length;
73
+ }
74
+ totalMentions += count;
75
+ if (count > 0) matchedPhrases += 1;
76
+ }
77
+ return { totalMentions, matchedPhrases };
78
+ }
79
+
80
+ export function extractAnchorSnippet(wikitext: string, phrases: string[], radius = 200): string | null {
81
+ const lowered = wikitext.toLowerCase();
82
+ for (const phrase of phrases) {
83
+ const normalized = phrase.trim().toLowerCase();
84
+ if (!normalized) continue;
85
+ const idx = lowered.indexOf(normalized);
86
+ if (idx === -1) continue;
87
+ const start = Math.max(0, idx - radius);
88
+ const end = Math.min(wikitext.length, idx + normalized.length + radius);
89
+ return wikitext.slice(start, end).trim();
90
+ }
91
+ return null;
92
+ }