@refract-org/analyzers 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -0
- package/dist/src/category-tracker.d.ts +8 -0
- package/dist/src/category-tracker.d.ts.map +1 -0
- package/dist/src/category-tracker.js +60 -0
- package/dist/src/category-tracker.js.map +1 -0
- package/dist/src/citation-tracker.d.ts +13 -0
- package/dist/src/citation-tracker.d.ts.map +1 -0
- package/dist/src/citation-tracker.js +200 -0
- package/dist/src/citation-tracker.js.map +1 -0
- package/dist/src/claim-differ.d.ts +2 -0
- package/dist/src/claim-differ.d.ts.map +1 -0
- package/dist/src/claim-differ.js +6 -0
- package/dist/src/claim-differ.js.map +1 -0
- package/dist/src/edit-cluster-detector.d.ts +14 -0
- package/dist/src/edit-cluster-detector.d.ts.map +1 -0
- package/dist/src/edit-cluster-detector.js +57 -0
- package/dist/src/edit-cluster-detector.js.map +1 -0
- package/dist/src/heuristic-classifier.d.ts +9 -0
- package/dist/src/heuristic-classifier.d.ts.map +1 -0
- package/dist/src/heuristic-classifier.js +33 -0
- package/dist/src/heuristic-classifier.js.map +1 -0
- package/dist/src/index.d.ts +70 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +16 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/observation-differ.d.ts +8 -0
- package/dist/src/observation-differ.d.ts.map +1 -0
- package/dist/src/observation-differ.js +16 -0
- package/dist/src/observation-differ.js.map +1 -0
- package/dist/src/page-move-detector.d.ts +11 -0
- package/dist/src/page-move-detector.d.ts.map +1 -0
- package/dist/src/page-move-detector.js +21 -0
- package/dist/src/page-move-detector.js.map +1 -0
- package/dist/src/protection-tracker.d.ts +23 -0
- package/dist/src/protection-tracker.d.ts.map +1 -0
- package/dist/src/protection-tracker.js +74 -0
- package/dist/src/protection-tracker.js.map +1 -0
- package/dist/src/revert-detector.d.ts +3 -0
- package/dist/src/revert-detector.d.ts.map +1 -0
- package/dist/src/revert-detector.js +43 -0
- package/dist/src/revert-detector.js.map +1 -0
- package/dist/src/section-differ.d.ts +26 -0
- package/dist/src/section-differ.d.ts.map +1 -0
- package/dist/src/section-differ.js +268 -0
- package/dist/src/section-differ.js.map +1 -0
- package/dist/src/talk-activity-detector.d.ts +16 -0
- package/dist/src/talk-activity-detector.d.ts.map +1 -0
- package/dist/src/talk-activity-detector.js +76 -0
- package/dist/src/talk-activity-detector.js.map +1 -0
- package/dist/src/talk-correlator.d.ts +7 -0
- package/dist/src/talk-correlator.d.ts.map +1 -0
- package/dist/src/talk-correlator.js +53 -0
- package/dist/src/talk-correlator.js.map +1 -0
- package/dist/src/talk-section-parser.d.ts +22 -0
- package/dist/src/talk-section-parser.d.ts.map +1 -0
- package/dist/src/talk-section-parser.js +109 -0
- package/dist/src/talk-section-parser.js.map +1 -0
- package/dist/src/template-tracker.d.ts +12 -0
- package/dist/src/template-tracker.d.ts.map +1 -0
- package/dist/src/template-tracker.js +225 -0
- package/dist/src/template-tracker.js.map +1 -0
- package/dist/src/wikilink-extractor.d.ts +8 -0
- package/dist/src/wikilink-extractor.d.ts.map +1 -0
- package/dist/src/wikilink-extractor.js +81 -0
- package/dist/src/wikilink-extractor.js.map +1 -0
- package/dist/src/wikitext-parser.d.ts +15 -0
- package/dist/src/wikitext-parser.d.ts.map +1 -0
- package/dist/src/wikitext-parser.js +85 -0
- package/dist/src/wikitext-parser.js.map +1 -0
- package/dist/tsconfig 2.tsbuildinfo +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/package.json +28 -0
- package/src/__tests__/category-tracker.test.ts +79 -0
- package/src/__tests__/citation-tracker.test.ts +185 -0
- package/src/__tests__/edit-cluster-detector.test.ts +79 -0
- package/src/__tests__/heuristic-classifier.test.ts +67 -0
- package/src/__tests__/observation-differ.test.ts +58 -0
- package/src/__tests__/page-move-detector.test.ts +64 -0
- package/src/__tests__/protection-tracker.test.ts +72 -0
- package/src/__tests__/revert-detector.test.ts +76 -0
- package/src/__tests__/section-differ.test.ts +120 -0
- package/src/__tests__/talk-activity-detector.test.ts +112 -0
- package/src/__tests__/talk-correlator.test.ts +71 -0
- package/src/__tests__/talk-section-parser.test.ts +105 -0
- package/src/__tests__/template-tracker.test.ts +159 -0
- package/src/__tests__/wikilink-extractor.test.ts +101 -0
- package/src/__tests__/wikitext-parser.test.ts +142 -0
- package/src/category-tracker.ts +75 -0
- package/src/citation-tracker.ts +226 -0
- package/src/claim-differ.ts +4 -0
- package/src/edit-cluster-detector.ts +78 -0
- package/src/heuristic-classifier.ts +59 -0
- package/src/index.ts +88 -0
- package/src/observation-differ.ts +26 -0
- package/src/page-move-detector.ts +32 -0
- package/src/protection-tracker.ts +103 -0
- package/src/revert-detector.ts +51 -0
- package/src/section-differ.ts +315 -0
- package/src/talk-activity-detector.ts +105 -0
- package/src/talk-correlator.ts +70 -0
- package/src/talk-section-parser.ts +151 -0
- package/src/template-tracker.ts +253 -0
- package/src/wikilink-extractor.ts +100 -0
- package/src/wikitext-parser.ts +92 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { EvidenceEvent } from "@refract-org/evidence-graph";
|
|
2
|
+
|
|
3
|
+
export interface ObservationDiff {
|
|
4
|
+
new: EvidenceEvent[];
|
|
5
|
+
resolved: EvidenceEvent[];
|
|
6
|
+
unchanged: EvidenceEvent[];
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
function eventKey(event: EvidenceEvent): string {
|
|
10
|
+
return `${event.eventType}|${event.fromRevisionId}|${event.toRevisionId}|${event.section}`;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function diffObservations(prior: EvidenceEvent[], current: EvidenceEvent[]): ObservationDiff {
|
|
14
|
+
const priorKeys = new Set(prior.map(eventKey));
|
|
15
|
+
const currentKeys = new Set(current.map(eventKey));
|
|
16
|
+
|
|
17
|
+
const newEvents = current.filter((e) => !priorKeys.has(eventKey(e)));
|
|
18
|
+
const unchangedEvents = current.filter((e) => priorKeys.has(eventKey(e)));
|
|
19
|
+
const resolvedEvents = prior.filter((e) => !currentKeys.has(eventKey(e)));
|
|
20
|
+
|
|
21
|
+
return {
|
|
22
|
+
new: newEvents,
|
|
23
|
+
resolved: resolvedEvents,
|
|
24
|
+
unchanged: unchangedEvents,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import type { EvidenceEvent, EvidenceLayer } from "@refract-org/evidence-graph";
|
|
2
|
+
|
|
3
|
+
interface PageMoveRecord {
|
|
4
|
+
oldTitle: string;
|
|
5
|
+
newTitle: string;
|
|
6
|
+
timestamp: string;
|
|
7
|
+
revId: number;
|
|
8
|
+
comment: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function buildPageMoveEvents(moves: PageMoveRecord[]): EvidenceEvent[] {
|
|
12
|
+
const events: EvidenceEvent[] = [];
|
|
13
|
+
|
|
14
|
+
for (const move of moves) {
|
|
15
|
+
const layer: EvidenceLayer = "observed";
|
|
16
|
+
events.push({
|
|
17
|
+
eventType: "page_moved",
|
|
18
|
+
fromRevisionId: 0,
|
|
19
|
+
toRevisionId: move.revId,
|
|
20
|
+
section: "",
|
|
21
|
+
before: move.oldTitle,
|
|
22
|
+
after: move.newTitle,
|
|
23
|
+
deterministicFacts: [
|
|
24
|
+
{ fact: "page_moved", detail: `from=${move.oldTitle} to=${move.newTitle} comment=${move.comment}` },
|
|
25
|
+
],
|
|
26
|
+
layer,
|
|
27
|
+
timestamp: move.timestamp,
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return events;
|
|
32
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
export interface ProtectionLogRecord {
|
|
2
|
+
logId: number;
|
|
3
|
+
pageTitle: string;
|
|
4
|
+
timestamp: string;
|
|
5
|
+
comment: string;
|
|
6
|
+
action: "protect" | "unprotect" | "modify";
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export interface ProtectionState {
|
|
10
|
+
level: string;
|
|
11
|
+
sinceTimestamp: string;
|
|
12
|
+
sinceLogId: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface ProtectionChange {
|
|
16
|
+
type: "added" | "removed" | "modified";
|
|
17
|
+
logEvent: ProtectionLogRecord;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface ProtectionTracker {
|
|
21
|
+
buildState(logs: ProtectionLogRecord[]): Map<string, ProtectionState>;
|
|
22
|
+
diffState(before: Map<string, ProtectionState>, after: Map<string, ProtectionState>): ProtectionChange[];
|
|
23
|
+
findLogsBetween(logs: ProtectionLogRecord[], fromTimestamp: string, toTimestamp: string): ProtectionLogRecord[];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export const protectionTracker: ProtectionTracker = {
|
|
27
|
+
buildState(logs: ProtectionLogRecord[]): Map<string, ProtectionState> {
|
|
28
|
+
const state = new Map<string, ProtectionState>();
|
|
29
|
+
const sorted = [...logs].map((l) => ({ l, ts: new Date(l.timestamp).getTime() }));
|
|
30
|
+
sorted.sort((a, b) => a.ts - b.ts);
|
|
31
|
+
|
|
32
|
+
for (const { l: log } of sorted) {
|
|
33
|
+
if (log.action === "protect" || log.action === "modify") {
|
|
34
|
+
state.set(log.pageTitle, {
|
|
35
|
+
level: log.action === "protect" ? "protected" : "modified",
|
|
36
|
+
sinceTimestamp: log.timestamp,
|
|
37
|
+
sinceLogId: log.logId,
|
|
38
|
+
});
|
|
39
|
+
} else if (log.action === "unprotect") {
|
|
40
|
+
state.delete(log.pageTitle);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return state;
|
|
45
|
+
},
|
|
46
|
+
|
|
47
|
+
diffState(before: Map<string, ProtectionState>, after: Map<string, ProtectionState>): ProtectionChange[] {
|
|
48
|
+
const changes: ProtectionChange[] = [];
|
|
49
|
+
const allTitles = new Set([...before.keys(), ...after.keys()]);
|
|
50
|
+
|
|
51
|
+
for (const title of allTitles) {
|
|
52
|
+
const b = before.get(title);
|
|
53
|
+
const a = after.get(title);
|
|
54
|
+
|
|
55
|
+
if (!b && a) {
|
|
56
|
+
changes.push({
|
|
57
|
+
type: "added",
|
|
58
|
+
logEvent: {
|
|
59
|
+
logId: a.sinceLogId,
|
|
60
|
+
pageTitle: title,
|
|
61
|
+
timestamp: a.sinceTimestamp,
|
|
62
|
+
comment: "",
|
|
63
|
+
action: "protect",
|
|
64
|
+
},
|
|
65
|
+
});
|
|
66
|
+
} else if (b && !a) {
|
|
67
|
+
changes.push({
|
|
68
|
+
type: "removed",
|
|
69
|
+
logEvent: {
|
|
70
|
+
logId: b.sinceLogId,
|
|
71
|
+
pageTitle: title,
|
|
72
|
+
timestamp: b.sinceTimestamp,
|
|
73
|
+
comment: "",
|
|
74
|
+
action: "unprotect",
|
|
75
|
+
},
|
|
76
|
+
});
|
|
77
|
+
} else if (b && a && b.sinceLogId !== a.sinceLogId) {
|
|
78
|
+
changes.push({
|
|
79
|
+
type: "modified",
|
|
80
|
+
logEvent: {
|
|
81
|
+
logId: a.sinceLogId,
|
|
82
|
+
pageTitle: title,
|
|
83
|
+
timestamp: a.sinceTimestamp,
|
|
84
|
+
comment: "",
|
|
85
|
+
action: "modify",
|
|
86
|
+
},
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return changes;
|
|
92
|
+
},
|
|
93
|
+
|
|
94
|
+
findLogsBetween(logs: ProtectionLogRecord[], fromTimestamp: string, toTimestamp: string): ProtectionLogRecord[] {
|
|
95
|
+
const from = new Date(fromTimestamp).getTime();
|
|
96
|
+
const to = new Date(toTimestamp).getTime();
|
|
97
|
+
|
|
98
|
+
return logs.filter((l) => {
|
|
99
|
+
const t = new Date(l.timestamp).getTime();
|
|
100
|
+
return t > from && t <= to;
|
|
101
|
+
});
|
|
102
|
+
},
|
|
103
|
+
};
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { Revision } from "@refract-org/evidence-graph";
|
|
2
|
+
import type { RevertChain, RevertDetector } from "./index.js";
|
|
3
|
+
|
|
4
|
+
const REVERT_PATTERNS = [
|
|
5
|
+
/\brevert/i,
|
|
6
|
+
/\bundid\s+revision/i,
|
|
7
|
+
/\brvv\b/i,
|
|
8
|
+
/\brollback\b/i,
|
|
9
|
+
/\brestore/i,
|
|
10
|
+
/\[\[WP:ROLLBACK\]\]/i,
|
|
11
|
+
];
|
|
12
|
+
|
|
13
|
+
export const revertDetector: RevertDetector = {
|
|
14
|
+
isRevert(comment: string): boolean {
|
|
15
|
+
return REVERT_PATTERNS.some((pattern) => pattern.test(comment));
|
|
16
|
+
},
|
|
17
|
+
|
|
18
|
+
detectRevertChain(revisions: Revision[]): RevertChain[] {
|
|
19
|
+
const chains: RevertChain[] = [];
|
|
20
|
+
const sorted = [...revisions].sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime());
|
|
21
|
+
|
|
22
|
+
let i = 0;
|
|
23
|
+
while (i < sorted.length) {
|
|
24
|
+
const rev = sorted[i];
|
|
25
|
+
if (!this.isRevert(rev.comment)) {
|
|
26
|
+
i++;
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const existing = chains.find((c) => rev.revId <= c.endRevisionId + 1 && rev.revId >= c.startRevisionId - 1);
|
|
31
|
+
|
|
32
|
+
if (existing) {
|
|
33
|
+
existing.startRevisionId = Math.min(existing.startRevisionId, rev.revId);
|
|
34
|
+
existing.endRevisionId = Math.max(existing.endRevisionId, rev.revId);
|
|
35
|
+
existing.participants++;
|
|
36
|
+
} else {
|
|
37
|
+
const revertedToMatch = rev.comment.match(/revision\s+(\d+)/i);
|
|
38
|
+
chains.push({
|
|
39
|
+
startRevisionId: rev.revId,
|
|
40
|
+
endRevisionId: rev.revId,
|
|
41
|
+
revertedToRevisionId: revertedToMatch ? parseInt(revertedToMatch[1], 10) : rev.revId - 1,
|
|
42
|
+
participants: 1,
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
i++;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return chains;
|
|
50
|
+
},
|
|
51
|
+
};
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
import type { Section, SectionChange } from "@refract-org/evidence-graph";
|
|
2
|
+
import type { SectionDiffer } from "./index.js";
|
|
3
|
+
|
|
4
|
+
export const sectionDiffer: SectionDiffer = {
|
|
5
|
+
extractSections(wikitext: string): Section[] {
|
|
6
|
+
const sections: Section[] = [];
|
|
7
|
+
const lines = wikitext.split("\n");
|
|
8
|
+
const headerRegex = /^(=+)\s*([^=]+?)\s*\1$/;
|
|
9
|
+
|
|
10
|
+
// Pre-compute byte offsets of each line from a single TextEncoder pass
|
|
11
|
+
const bytes = new TextEncoder().encode(wikitext);
|
|
12
|
+
const lineByteOffsets = [0];
|
|
13
|
+
for (let i = 0, lineIdx = 0; i < bytes.length; i++) {
|
|
14
|
+
if (bytes[i] === 0x0a) {
|
|
15
|
+
lineByteOffsets[++lineIdx] = i + 1;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
while (lineByteOffsets.length <= lines.length) {
|
|
19
|
+
lineByteOffsets.push(bytes.length);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const headerMatches: Array<{
|
|
23
|
+
index: number;
|
|
24
|
+
offset: number;
|
|
25
|
+
level: number;
|
|
26
|
+
title: string;
|
|
27
|
+
}> = [];
|
|
28
|
+
|
|
29
|
+
for (let i = 0; i < lines.length; i++) {
|
|
30
|
+
const match = headerRegex.exec(lines[i]);
|
|
31
|
+
if (match) {
|
|
32
|
+
headerMatches.push({
|
|
33
|
+
index: i,
|
|
34
|
+
offset: lineByteOffsets[i],
|
|
35
|
+
level: match[1].length,
|
|
36
|
+
title: match[2].trim(),
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (headerMatches.length === 0) {
|
|
42
|
+
sections.push({
|
|
43
|
+
title: "",
|
|
44
|
+
level: 1,
|
|
45
|
+
content: wikitext.trim(),
|
|
46
|
+
byteOffset: 0,
|
|
47
|
+
});
|
|
48
|
+
return sections;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const leadEnd = headerMatches[0].offset;
|
|
52
|
+
const leadContent = wikitext.slice(0, leadEnd).trim();
|
|
53
|
+
if (leadContent) {
|
|
54
|
+
sections.push({
|
|
55
|
+
title: "",
|
|
56
|
+
level: 1,
|
|
57
|
+
content: leadContent,
|
|
58
|
+
byteOffset: 0,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
for (let i = 0; i < headerMatches.length; i++) {
|
|
63
|
+
const header = headerMatches[i];
|
|
64
|
+
const nextOffset = i + 1 < headerMatches.length ? headerMatches[i + 1].offset : bytes.length;
|
|
65
|
+
const headerEnd = lineByteOffsets[header.index + 1] ?? bytes.length;
|
|
66
|
+
const content = wikitext.slice(headerEnd, nextOffset).trim();
|
|
67
|
+
|
|
68
|
+
sections.push({
|
|
69
|
+
title: header.title,
|
|
70
|
+
level: header.level,
|
|
71
|
+
content,
|
|
72
|
+
byteOffset: header.offset,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return sections;
|
|
77
|
+
},
|
|
78
|
+
|
|
79
|
+
diffSections(before: Section[], after: Section[]): SectionChange[] {
|
|
80
|
+
const changes: SectionChange[] = [];
|
|
81
|
+
const beforeByTitle = new Map<string, Section>();
|
|
82
|
+
const afterByTitle = new Map<string, Section>();
|
|
83
|
+
|
|
84
|
+
for (const s of before) {
|
|
85
|
+
const key = sectionKey(s);
|
|
86
|
+
beforeByTitle.set(key, s);
|
|
87
|
+
}
|
|
88
|
+
for (const s of after) {
|
|
89
|
+
const key = sectionKey(s);
|
|
90
|
+
afterByTitle.set(key, s);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const seenAfter = new Set<string>();
|
|
94
|
+
|
|
95
|
+
for (const [key, afterSection] of afterByTitle) {
|
|
96
|
+
seenAfter.add(key);
|
|
97
|
+
const beforeSection = beforeByTitle.get(key);
|
|
98
|
+
if (!beforeSection) {
|
|
99
|
+
changes.push({
|
|
100
|
+
section: afterSection.title || "(lead)",
|
|
101
|
+
changeType: "added",
|
|
102
|
+
toContent: afterSection.content,
|
|
103
|
+
});
|
|
104
|
+
} else if (beforeSection.content !== afterSection.content) {
|
|
105
|
+
changes.push({
|
|
106
|
+
section: afterSection.title || "(lead)",
|
|
107
|
+
changeType: "modified",
|
|
108
|
+
fromContent: beforeSection.content,
|
|
109
|
+
toContent: afterSection.content,
|
|
110
|
+
});
|
|
111
|
+
} else {
|
|
112
|
+
changes.push({
|
|
113
|
+
section: afterSection.title || "(lead)",
|
|
114
|
+
changeType: "unchanged",
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
for (const [key, beforeSection] of beforeByTitle) {
|
|
120
|
+
if (!seenAfter.has(key)) {
|
|
121
|
+
changes.push({
|
|
122
|
+
section: beforeSection.title || "(lead)",
|
|
123
|
+
changeType: "removed",
|
|
124
|
+
fromContent: beforeSection.content,
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return changes;
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
export interface SectionEvent {
|
|
134
|
+
revisionId: number;
|
|
135
|
+
timestamp: string;
|
|
136
|
+
eventType: "created" | "modified" | "removed" | "renamed";
|
|
137
|
+
content?: string;
|
|
138
|
+
oldName?: string;
|
|
139
|
+
newName?: string;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
export interface SectionLineage {
|
|
143
|
+
sectionName: string;
|
|
144
|
+
level: number;
|
|
145
|
+
firstSeenRevisionId: number;
|
|
146
|
+
firstSeenAt: string;
|
|
147
|
+
lastSeenRevisionId?: number;
|
|
148
|
+
lastSeenAt?: string;
|
|
149
|
+
events: SectionEvent[];
|
|
150
|
+
isActive: boolean;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
export function buildSectionLineage(
|
|
154
|
+
revisions: Array<{ revId: number; timestamp: string; content: string }>,
|
|
155
|
+
): SectionLineage[] {
|
|
156
|
+
if (revisions.length === 0) return [];
|
|
157
|
+
|
|
158
|
+
const allSections = revisions.map((r) => sectionDiffer.extractSections(r.content));
|
|
159
|
+
|
|
160
|
+
const lineages = new Map<string, SectionLineage>();
|
|
161
|
+
|
|
162
|
+
const firstSections = allSections[0];
|
|
163
|
+
for (const section of firstSections) {
|
|
164
|
+
const key = sectionKey(section);
|
|
165
|
+
lineages.set(key, {
|
|
166
|
+
sectionName: section.title || "(lead)",
|
|
167
|
+
level: section.level,
|
|
168
|
+
firstSeenRevisionId: revisions[0].revId,
|
|
169
|
+
firstSeenAt: revisions[0].timestamp,
|
|
170
|
+
lastSeenRevisionId: revisions[0].revId,
|
|
171
|
+
lastSeenAt: revisions[0].timestamp,
|
|
172
|
+
events: [
|
|
173
|
+
{
|
|
174
|
+
revisionId: revisions[0].revId,
|
|
175
|
+
timestamp: revisions[0].timestamp,
|
|
176
|
+
eventType: "created",
|
|
177
|
+
content: section.content,
|
|
178
|
+
},
|
|
179
|
+
],
|
|
180
|
+
isActive: true,
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
for (let i = 0; i < revisions.length - 1; i++) {
|
|
185
|
+
const prevRev = revisions[i];
|
|
186
|
+
const currRev = revisions[i + 1];
|
|
187
|
+
|
|
188
|
+
const prevSections = allSections[i];
|
|
189
|
+
const currSections = allSections[i + 1];
|
|
190
|
+
|
|
191
|
+
const prevByKey = new Map<string, Section>();
|
|
192
|
+
const currByKey = new Map<string, Section>();
|
|
193
|
+
for (const s of prevSections) prevByKey.set(sectionKey(s), s);
|
|
194
|
+
for (const s of currSections) currByKey.set(sectionKey(s), s);
|
|
195
|
+
|
|
196
|
+
const prevKeys = new Set(prevByKey.keys());
|
|
197
|
+
const currKeys = new Set(currByKey.keys());
|
|
198
|
+
|
|
199
|
+
const removedKeys = [...prevKeys].filter((k) => !currKeys.has(k));
|
|
200
|
+
const addedKeys = [...currKeys].filter((k) => !prevKeys.has(k));
|
|
201
|
+
|
|
202
|
+
const renamedFromTo = new Map<string, string>();
|
|
203
|
+
const contentToAddKey = new Map<string, string>();
|
|
204
|
+
for (const addKey of addedKeys) {
|
|
205
|
+
const section = currByKey.get(addKey);
|
|
206
|
+
if (!section) continue;
|
|
207
|
+
contentToAddKey.set(section.content, addKey);
|
|
208
|
+
}
|
|
209
|
+
for (const remKey of removedKeys) {
|
|
210
|
+
const remSection = prevByKey.get(remKey);
|
|
211
|
+
if (!remSection) continue;
|
|
212
|
+
const addKey = contentToAddKey.get(remSection.content);
|
|
213
|
+
if (addKey) {
|
|
214
|
+
renamedFromTo.set(remKey, addKey);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const renamedToSet = new Set(renamedFromTo.values());
|
|
219
|
+
|
|
220
|
+
for (const key of removedKeys) {
|
|
221
|
+
if (renamedFromTo.has(key)) {
|
|
222
|
+
const newKey = renamedFromTo.get(key);
|
|
223
|
+
const oldSection = prevByKey.get(key);
|
|
224
|
+
const newSection = newKey ? currByKey.get(newKey) : undefined;
|
|
225
|
+
if (!newKey || !oldSection || !newSection) continue;
|
|
226
|
+
const lineage = lineages.get(key);
|
|
227
|
+
if (lineage) {
|
|
228
|
+
lineage.events.push({
|
|
229
|
+
revisionId: currRev.revId,
|
|
230
|
+
timestamp: currRev.timestamp,
|
|
231
|
+
eventType: "renamed",
|
|
232
|
+
content: oldSection.content,
|
|
233
|
+
oldName: oldSection.title || "(lead)",
|
|
234
|
+
newName: newSection.title || "(lead)",
|
|
235
|
+
});
|
|
236
|
+
lineage.sectionName = newSection.title || "(lead)";
|
|
237
|
+
lineage.level = newSection.level;
|
|
238
|
+
lineage.lastSeenRevisionId = currRev.revId;
|
|
239
|
+
lineage.lastSeenAt = currRev.timestamp;
|
|
240
|
+
lineage.isActive = true;
|
|
241
|
+
lineages.set(newKey, lineage);
|
|
242
|
+
lineages.delete(key);
|
|
243
|
+
}
|
|
244
|
+
} else {
|
|
245
|
+
const section = prevByKey.get(key);
|
|
246
|
+
if (!section) continue;
|
|
247
|
+
const lineage = lineages.get(key);
|
|
248
|
+
if (lineage) {
|
|
249
|
+
lineage.events.push({
|
|
250
|
+
revisionId: currRev.revId,
|
|
251
|
+
timestamp: currRev.timestamp,
|
|
252
|
+
eventType: "removed",
|
|
253
|
+
content: section.content,
|
|
254
|
+
});
|
|
255
|
+
lineage.lastSeenRevisionId = prevRev.revId;
|
|
256
|
+
lineage.lastSeenAt = prevRev.timestamp;
|
|
257
|
+
lineage.isActive = false;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
for (const key of addedKeys) {
|
|
263
|
+
if (!renamedToSet.has(key)) {
|
|
264
|
+
const section = currByKey.get(key);
|
|
265
|
+
if (!section) continue;
|
|
266
|
+
const lineage: SectionLineage = {
|
|
267
|
+
sectionName: section.title || "(lead)",
|
|
268
|
+
level: section.level,
|
|
269
|
+
firstSeenRevisionId: currRev.revId,
|
|
270
|
+
firstSeenAt: currRev.timestamp,
|
|
271
|
+
lastSeenRevisionId: currRev.revId,
|
|
272
|
+
lastSeenAt: currRev.timestamp,
|
|
273
|
+
events: [
|
|
274
|
+
{
|
|
275
|
+
revisionId: currRev.revId,
|
|
276
|
+
timestamp: currRev.timestamp,
|
|
277
|
+
eventType: "created",
|
|
278
|
+
content: section.content,
|
|
279
|
+
},
|
|
280
|
+
],
|
|
281
|
+
isActive: true,
|
|
282
|
+
};
|
|
283
|
+
lineages.set(key, lineage);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
for (const key of prevKeys) {
|
|
288
|
+
if (currKeys.has(key) && !renamedFromTo.has(key)) {
|
|
289
|
+
const prevSection = prevByKey.get(key);
|
|
290
|
+
const currSection = currByKey.get(key);
|
|
291
|
+
if (!prevSection || !currSection) continue;
|
|
292
|
+
const lineage = lineages.get(key);
|
|
293
|
+
if (lineage) {
|
|
294
|
+
if (prevSection.content !== currSection.content) {
|
|
295
|
+
lineage.events.push({
|
|
296
|
+
revisionId: currRev.revId,
|
|
297
|
+
timestamp: currRev.timestamp,
|
|
298
|
+
eventType: "modified",
|
|
299
|
+
content: currSection.content,
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
lineage.lastSeenRevisionId = currRev.revId;
|
|
303
|
+
lineage.lastSeenAt = currRev.timestamp;
|
|
304
|
+
lineage.isActive = true;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
return [...lineages.values()].sort((a, b) => a.sectionName.localeCompare(b.sectionName));
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
function sectionKey(s: Section): string {
|
|
314
|
+
return `${s.level}:${s.title.toLowerCase()}`;
|
|
315
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import type { EvidenceEvent, Revision } from "@refract-org/evidence-graph";
|
|
2
|
+
|
|
3
|
+
const DEFAULT_SPIKE_WINDOW_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
|
|
4
|
+
const DEFAULT_SPIKE_FACTOR = 3.0;
|
|
5
|
+
const DEFAULT_MA_PERIODS = 4;
|
|
6
|
+
|
|
7
|
+
export interface TalkActivityOptions {
|
|
8
|
+
lookbackWindowMs?: number;
|
|
9
|
+
spikeFactor?: number;
|
|
10
|
+
movingAveragePeriods?: number;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface TalkActivityResult {
|
|
14
|
+
spikes: EvidenceEvent[];
|
|
15
|
+
activityByDay: Array<{ date: string; count: number }>;
|
|
16
|
+
movingAverage: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function detectTalkActivitySpikes(
|
|
20
|
+
talkRevisions: Revision[],
|
|
21
|
+
articleRevisions: Revision[],
|
|
22
|
+
options?: TalkActivityOptions,
|
|
23
|
+
): TalkActivityResult {
|
|
24
|
+
const windowMs = options?.lookbackWindowMs ?? DEFAULT_SPIKE_WINDOW_MS;
|
|
25
|
+
const spikeFactor = options?.spikeFactor ?? DEFAULT_SPIKE_FACTOR;
|
|
26
|
+
const maPeriods = options?.movingAveragePeriods ?? DEFAULT_MA_PERIODS;
|
|
27
|
+
const spikes: EvidenceEvent[] = [];
|
|
28
|
+
|
|
29
|
+
if (talkRevisions.length === 0) {
|
|
30
|
+
return { spikes, activityByDay: [], movingAverage: 0 };
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const dailyCounts = bucketByDay(talkRevisions);
|
|
34
|
+
const sortedDays = Object.keys(dailyCounts).sort();
|
|
35
|
+
|
|
36
|
+
if (sortedDays.length < maPeriods) {
|
|
37
|
+
return { spikes, activityByDay: dailyActivity(dailyCounts), movingAverage: 0 };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const recentEnd = Date.now();
|
|
41
|
+
const recentStart = recentEnd - windowMs;
|
|
42
|
+
const recentDays = sortedDays.filter((d) => new Date(d).getTime() >= recentStart);
|
|
43
|
+
|
|
44
|
+
const movingAverages: number[] = [];
|
|
45
|
+
for (let i = maPeriods - 1; i < sortedDays.length; i++) {
|
|
46
|
+
let sum = 0;
|
|
47
|
+
for (let j = i - (maPeriods - 1); j <= i; j++) {
|
|
48
|
+
sum += dailyCounts[sortedDays[j]];
|
|
49
|
+
}
|
|
50
|
+
movingAverages.push(sum / maPeriods);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const latestMA = movingAverages.length > 0 ? movingAverages[movingAverages.length - 1] : 0;
|
|
54
|
+
const threshold = Math.max(latestMA * spikeFactor, 3);
|
|
55
|
+
|
|
56
|
+
for (const day of recentDays) {
|
|
57
|
+
const count = dailyCounts[day];
|
|
58
|
+
if (count >= threshold) {
|
|
59
|
+
const nearbyArticleEdits = articleRevisions.filter((r) => {
|
|
60
|
+
const t = new Date(r.timestamp);
|
|
61
|
+
const dayStart = new Date(day).getTime();
|
|
62
|
+
const dayEnd = dayStart + 24 * 60 * 60 * 1000;
|
|
63
|
+
return t.getTime() >= dayStart && t.getTime() < dayEnd;
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
spikes.push({
|
|
67
|
+
eventType: "talk_activity_spike",
|
|
68
|
+
fromRevisionId: 0,
|
|
69
|
+
toRevisionId: 0,
|
|
70
|
+
section: "",
|
|
71
|
+
before: "",
|
|
72
|
+
after: "",
|
|
73
|
+
deterministicFacts: [
|
|
74
|
+
{
|
|
75
|
+
fact: "talk_activity_spike",
|
|
76
|
+
detail: `date=${day} talk_edits=${count} moving_average=${latestMA.toFixed(1)} threshold=${threshold.toFixed(1)} nearby_article_edits=${nearbyArticleEdits.length}`,
|
|
77
|
+
},
|
|
78
|
+
],
|
|
79
|
+
layer: "observed",
|
|
80
|
+
timestamp: new Date(day).toISOString(),
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
spikes,
|
|
87
|
+
activityByDay: dailyActivity(dailyCounts),
|
|
88
|
+
movingAverage: latestMA,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function bucketByDay(revisions: Revision[]): Record<string, number> {
|
|
93
|
+
const buckets: Record<string, number> = {};
|
|
94
|
+
for (const r of revisions) {
|
|
95
|
+
const day = r.timestamp.slice(0, 10);
|
|
96
|
+
buckets[day] = (buckets[day] ?? 0) + 1;
|
|
97
|
+
}
|
|
98
|
+
return buckets;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function dailyActivity(counts: Record<string, number>): Array<{ date: string; count: number }> {
|
|
102
|
+
return Object.entries(counts)
|
|
103
|
+
.map(([date, count]) => ({ date, count }))
|
|
104
|
+
.sort((a, b) => a.date.localeCompare(b.date));
|
|
105
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import type { EvidenceEvent, Revision } from "@refract-org/evidence-graph";
|
|
2
|
+
|
|
3
|
+
const DEFAULT_WINDOW_BEFORE_MS = 7 * 24 * 60 * 60 * 1000;
|
|
4
|
+
const DEFAULT_WINDOW_AFTER_MS = 3 * 24 * 60 * 60 * 1000;
|
|
5
|
+
|
|
6
|
+
export interface TalkCorrelationOptions {
|
|
7
|
+
windowBeforeMs?: number;
|
|
8
|
+
windowAfterMs?: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function correlateTalkRevisions(
|
|
12
|
+
articleRevs: Revision[],
|
|
13
|
+
talkRevs: Revision[],
|
|
14
|
+
options?: TalkCorrelationOptions,
|
|
15
|
+
): EvidenceEvent[] {
|
|
16
|
+
const windowBefore = options?.windowBeforeMs ?? DEFAULT_WINDOW_BEFORE_MS;
|
|
17
|
+
const windowAfter = options?.windowAfterMs ?? DEFAULT_WINDOW_AFTER_MS;
|
|
18
|
+
const events: EvidenceEvent[] = [];
|
|
19
|
+
|
|
20
|
+
if (articleRevs.length === 0 || talkRevs.length === 0) return events;
|
|
21
|
+
|
|
22
|
+
const sortedTalk = [...talkRevs].map((r) => ({ r, ts: new Date(r.timestamp).getTime() }));
|
|
23
|
+
sortedTalk.sort((a, b) => a.ts - b.ts);
|
|
24
|
+
|
|
25
|
+
const articleTimes = articleRevs.map((r) => new Date(r.timestamp).getTime());
|
|
26
|
+
|
|
27
|
+
for (let a = 0; a < articleRevs.length; a++) {
|
|
28
|
+
const article = articleRevs[a];
|
|
29
|
+
const articleTime = articleTimes[a];
|
|
30
|
+
const windowStart = articleTime - windowBefore;
|
|
31
|
+
const windowEnd = articleTime + windowAfter;
|
|
32
|
+
|
|
33
|
+
let closest: Revision | null = null;
|
|
34
|
+
let closestDelta = Infinity;
|
|
35
|
+
|
|
36
|
+
for (let t = 0; t < sortedTalk.length; t++) {
|
|
37
|
+
const talkTime = sortedTalk[t].ts;
|
|
38
|
+
if (talkTime < windowStart) continue;
|
|
39
|
+
if (talkTime > windowEnd) break;
|
|
40
|
+
|
|
41
|
+
const delta = Math.abs(talkTime - articleTime);
|
|
42
|
+
if (delta < closestDelta) {
|
|
43
|
+
closestDelta = delta;
|
|
44
|
+
closest = sortedTalk[t].r;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (closest) {
|
|
49
|
+
const deltaHours = Math.round((closestDelta / (1000 * 60 * 60)) * 10) / 10;
|
|
50
|
+
events.push({
|
|
51
|
+
eventType: "talk_page_correlated",
|
|
52
|
+
fromRevisionId: article.revId,
|
|
53
|
+
toRevisionId: closest.revId,
|
|
54
|
+
section: "",
|
|
55
|
+
before: "",
|
|
56
|
+
after: "",
|
|
57
|
+
deterministicFacts: [
|
|
58
|
+
{
|
|
59
|
+
fact: "talk_page_correlated",
|
|
60
|
+
detail: `time_delta_hours=${deltaHours} talk_comment=${closest.comment.slice(0, 200)}`,
|
|
61
|
+
},
|
|
62
|
+
],
|
|
63
|
+
layer: "observed",
|
|
64
|
+
timestamp: closest.timestamp,
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return events;
|
|
70
|
+
}
|