@refract-org/ingestion 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/dist/src/index.d.ts +49 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +5 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/mediawiki-client.d.ts +24 -0
- package/dist/src/mediawiki-client.d.ts.map +1 -0
- package/dist/src/mediawiki-client.js +292 -0
- package/dist/src/mediawiki-client.js.map +1 -0
- package/dist/src/rate-limiter.d.ts +8 -0
- package/dist/src/rate-limiter.d.ts.map +1 -0
- package/dist/src/rate-limiter.js +28 -0
- package/dist/src/rate-limiter.js.map +1 -0
- package/dist/src/wikidata-mapper.d.ts +29 -0
- package/dist/src/wikidata-mapper.d.ts.map +1 -0
- package/dist/src/wikidata-mapper.js +138 -0
- package/dist/src/wikidata-mapper.js.map +1 -0
- package/dist/src/xml-dump-source.d.ts +8 -0
- package/dist/src/xml-dump-source.d.ts.map +1 -0
- package/dist/src/xml-dump-source.js +77 -0
- package/dist/src/xml-dump-source.js.map +1 -0
- package/dist/tsconfig 2.tsbuildinfo +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/package.json +28 -0
- package/src/__tests__/auth-integration.test.ts +59 -0
- package/src/__tests__/integration.test.ts +95 -0
- package/src/__tests__/mediawiki-client.test.ts +113 -0
- package/src/__tests__/page-move.test.ts +31 -0
- package/src/__tests__/rate-limiter.test.ts +30 -0
- package/src/__tests__/talk-page.test.ts +46 -0
- package/src/__tests__/wikidata-mapper.test.ts +134 -0
- package/src/__tests__/xml-dump-source.test.ts +151 -0
- package/src/index.ts +63 -0
- package/src/mediawiki-client.ts +420 -0
- package/src/rate-limiter.ts +29 -0
- package/src/wikidata-mapper.ts +197 -0
- package/src/xml-dump-source.ts +89 -0
package/README.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# @refract-org/ingestion
|
|
2
|
+
|
|
3
|
+
Wikimedia API adapters — revision fetching, diffing, rate limiting.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
bun add @refract-org/ingestion
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## Exports
|
|
10
|
+
|
|
11
|
+
### Interfaces
|
|
12
|
+
|
|
13
|
+
- `RevisionFetcher` — fetch revisions by page title
|
|
14
|
+
- `RevisionSource` — async iterable revision stream
|
|
15
|
+
- `DiffFetcher` — fetch diff between two revisions
|
|
16
|
+
|
|
17
|
+
### Classes
|
|
18
|
+
|
|
19
|
+
- `MediaWikiClient` — Wikipedia REST API client with pagination and error handling
|
|
20
|
+
- `RateLimiter` — configurable request throttling
|
|
21
|
+
|
|
22
|
+
```ts
|
|
23
|
+
import { MediaWikiClient, RateLimiter } from "@refract-org/ingestion";
|
|
24
|
+
import type { RevisionFetcher, RevisionOptions } from "@refract-org/ingestion";
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
[Refract](https://github.com/refract-org/sequent) · [Docs](https://github.com/refract-org/sequent-docs) · [npm](https://www.npmjs.com/package/@refract-org/ingestion)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import type { DiffResult, Revision } from "@refract-org/evidence-graph";
|
|
2
|
+
export interface AuthConfig {
|
|
3
|
+
apiKey?: string;
|
|
4
|
+
apiUser?: string;
|
|
5
|
+
apiPassword?: string;
|
|
6
|
+
oauthClientId?: string;
|
|
7
|
+
oauthClientSecret?: string;
|
|
8
|
+
}
|
|
9
|
+
export interface RevisionFetcher {
|
|
10
|
+
fetchRevisions(pageTitle: string, options?: RevisionOptions): Promise<Revision[]>;
|
|
11
|
+
}
|
|
12
|
+
export interface RevisionSource {
|
|
13
|
+
revisions(pageTitle: string, options?: RevisionOptions): AsyncIterable<Revision>;
|
|
14
|
+
}
|
|
15
|
+
export interface DiffFetcher {
|
|
16
|
+
fetchDiff(fromRevId: number, toRevId: number): Promise<DiffResult>;
|
|
17
|
+
}
|
|
18
|
+
export interface MoveFetcher {
|
|
19
|
+
fetchPageMoves(pageTitle: string): Promise<PageMove[]>;
|
|
20
|
+
}
|
|
21
|
+
export interface ProtectionLogEvent {
|
|
22
|
+
logId: number;
|
|
23
|
+
pageTitle: string;
|
|
24
|
+
timestamp: string;
|
|
25
|
+
comment: string;
|
|
26
|
+
action: "protect" | "unprotect" | "modify";
|
|
27
|
+
level?: string;
|
|
28
|
+
}
|
|
29
|
+
export interface PageMove {
|
|
30
|
+
oldTitle: string;
|
|
31
|
+
newTitle: string;
|
|
32
|
+
timestamp: string;
|
|
33
|
+
revId: number;
|
|
34
|
+
comment: string;
|
|
35
|
+
}
|
|
36
|
+
export interface RevisionOptions {
|
|
37
|
+
limit?: number;
|
|
38
|
+
start?: Date;
|
|
39
|
+
end?: Date;
|
|
40
|
+
direction?: "newer" | "older";
|
|
41
|
+
startRevId?: number;
|
|
42
|
+
endRevId?: number;
|
|
43
|
+
}
|
|
44
|
+
export { MediaWikiClient } from "./mediawiki-client.js";
|
|
45
|
+
export { RateLimiter } from "./rate-limiter.js";
|
|
46
|
+
export type { PageToEntityMap, WikidataClaim, WikidataEntity, WikidataValue } from "./wikidata-mapper.js";
|
|
47
|
+
export { fetchWikidataEntity, fetchWikidataId, mapPagesToEntities, mapPageToEntity, wikidataEntityToEvents, } from "./wikidata-mapper.js";
|
|
48
|
+
export { XmlDumpRevisionSource } from "./xml-dump-source.js";
|
|
49
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,6BAA6B,CAAC;AAExE,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,MAAM,WAAW,eAAe;IAC9B,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;CACnF;AAED,MAAM,WAAW,cAAc;IAC7B,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;CAClF;AAED,MAAM,WAAW,WAAW;IAC1B,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;CACpE;AAED,MAAM,WAAW,WAAW;IAC1B,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;CACxD;AAED,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,SAAS,GAAG,WAAW,GAAG,QAAQ,CAAC;IAC3C,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,QAAQ;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,IAAI,CAAC;IACb,GAAG,CAAC,EAAE,IAAI,CAAC;IACX,SAAS,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,YAAY,EAAE,eAAe,EAAE,aAAa,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAC1G,OAAO,EACL,mBAAmB,EACnB,eAAe,EACf,kBAAkB,EAClB,eAAe,EACf,sBAAsB,GACvB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { MediaWikiClient } from "./mediawiki-client.js";
|
|
2
|
+
export { RateLimiter } from "./rate-limiter.js";
|
|
3
|
+
export { fetchWikidataEntity, fetchWikidataId, mapPagesToEntities, mapPageToEntity, wikidataEntityToEvents, } from "./wikidata-mapper.js";
|
|
4
|
+
export { XmlDumpRevisionSource } from "./xml-dump-source.js";
|
|
5
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAoDA,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhD,OAAO,EACL,mBAAmB,EACnB,eAAe,EACf,kBAAkB,EAClB,eAAe,EACf,sBAAsB,GACvB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { DiffResult, Revision } from "@refract-org/evidence-graph";
|
|
2
|
+
import type { AuthConfig, DiffFetcher, MoveFetcher, PageMove, ProtectionLogEvent, RevisionFetcher, RevisionOptions, RevisionSource } from "./index.js";
|
|
3
|
+
export declare class MediaWikiClient implements RevisionFetcher, RevisionSource, DiffFetcher, MoveFetcher {
|
|
4
|
+
private rateLimiter;
|
|
5
|
+
private userAgent;
|
|
6
|
+
private apiUrl;
|
|
7
|
+
private auth?;
|
|
8
|
+
constructor(options?: {
|
|
9
|
+
apiUrl?: string;
|
|
10
|
+
userAgent?: string;
|
|
11
|
+
minDelayMs?: number;
|
|
12
|
+
auth?: AuthConfig;
|
|
13
|
+
});
|
|
14
|
+
fetchTalkRevisions(pageTitle: string, options?: RevisionOptions, talkPrefix?: string): Promise<Revision[]>;
|
|
15
|
+
fetchRevisions(pageTitle: string, options?: RevisionOptions): Promise<Revision[]>;
|
|
16
|
+
fetchPageMoves(pageTitle: string): Promise<PageMove[]>;
|
|
17
|
+
fetchProtectionLogs(pageTitle: string): Promise<ProtectionLogEvent[]>;
|
|
18
|
+
fetchDiff(fromRevId: number, toRevId: number): Promise<DiffResult>;
|
|
19
|
+
private fetch;
|
|
20
|
+
private sleep;
|
|
21
|
+
revisions(pageTitle: string, options?: RevisionOptions): AsyncIterable<Revision>;
|
|
22
|
+
private mapRevision;
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=mediawiki-client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mediawiki-client.d.ts","sourceRoot":"","sources":["../../src/mediawiki-client.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAY,UAAU,EAAE,QAAQ,EAAE,MAAM,6BAA6B,CAAC;AAClF,OAAO,KAAK,EACV,UAAU,EACV,WAAW,EACX,WAAW,EACX,QAAQ,EACR,kBAAkB,EAClB,eAAe,EACf,eAAe,EACf,cAAc,EACf,MAAM,YAAY,CAAC;AAqEpB,qBAAa,eAAgB,YAAW,eAAe,EAAE,cAAc,EAAE,WAAW,EAAE,WAAW;IAC/F,OAAO,CAAC,WAAW,CAAc;IACjC,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,IAAI,CAAC,CAAa;gBAEd,OAAO,CAAC,EAAE;QAAE,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,UAAU,CAAA;KAAE;IAO/F,kBAAkB,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,eAAe,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAM1G,cAAc,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAwEjF,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IA6CtD,mBAAmB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,EAAE,CAAC;IA4DrE,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC;YA6B1D,KAAK;IAgDnB,OAAO,CAAC,KAAK;IAIN,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,aAAa,CAAC,QAAQ,CAAC;IAOvF,OAAO,CAAC,WAAW;CAcpB"}
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
import { RateLimiter } from "./rate-limiter.js";
|
|
2
|
+
const DEFAULT_API_URL = "https://en.wikipedia.org/w/api.php";
|
|
3
|
+
const DEFAULT_USER_AGENT = "Refract/0.1.0 (https://github.com/refract-org/var-ia; sequent@nextconsensus.com)";
|
|
4
|
+
const MAX_REVISIONS_PER_REQUEST = 500;
|
|
5
|
+
export class MediaWikiClient {
|
|
6
|
+
rateLimiter;
|
|
7
|
+
userAgent;
|
|
8
|
+
apiUrl;
|
|
9
|
+
auth;
|
|
10
|
+
constructor(options) {
|
|
11
|
+
this.apiUrl = options?.apiUrl ?? DEFAULT_API_URL;
|
|
12
|
+
this.userAgent = options?.userAgent ?? DEFAULT_USER_AGENT;
|
|
13
|
+
this.rateLimiter = new RateLimiter(options?.minDelayMs ?? 100);
|
|
14
|
+
this.auth = options?.auth;
|
|
15
|
+
}
|
|
16
|
+
async fetchTalkRevisions(pageTitle, options, talkPrefix) {
|
|
17
|
+
const prefix = talkPrefix ?? "Talk:";
|
|
18
|
+
const talkTitle = `${prefix}${pageTitle}`;
|
|
19
|
+
return this.fetchRevisions(talkTitle, options);
|
|
20
|
+
}
|
|
21
|
+
async fetchRevisions(pageTitle, options) {
|
|
22
|
+
const revisions = [];
|
|
23
|
+
const limit = Math.min(options?.limit ?? MAX_REVISIONS_PER_REQUEST, MAX_REVISIONS_PER_REQUEST);
|
|
24
|
+
let rvcontinue;
|
|
25
|
+
let pageInfo = null;
|
|
26
|
+
while (true) {
|
|
27
|
+
const params = new URLSearchParams({
|
|
28
|
+
action: "query",
|
|
29
|
+
prop: "revisions",
|
|
30
|
+
titles: pageTitle,
|
|
31
|
+
rvprop: "content|ids|timestamp|flags|comment|size|user",
|
|
32
|
+
rvslots: "main",
|
|
33
|
+
rvlimit: String(limit),
|
|
34
|
+
format: "json",
|
|
35
|
+
formatversion: "2",
|
|
36
|
+
});
|
|
37
|
+
const isNewer = options?.direction === "newer";
|
|
38
|
+
params.set("rvdir", isNewer ? "newer" : "older");
|
|
39
|
+
if (options?.start && options?.end) {
|
|
40
|
+
params.set("rvstart", formatTimestamp(isNewer ? options.start : options.end));
|
|
41
|
+
params.set("rvend", formatTimestamp(isNewer ? options.end : options.start));
|
|
42
|
+
}
|
|
43
|
+
else if (options?.start) {
|
|
44
|
+
params.set("rvstart", formatTimestamp(options.start));
|
|
45
|
+
}
|
|
46
|
+
else if (options?.end) {
|
|
47
|
+
params.set("rvend", formatTimestamp(options.end));
|
|
48
|
+
}
|
|
49
|
+
if (options?.startRevId) {
|
|
50
|
+
params.set("rvstartid", String(options.startRevId));
|
|
51
|
+
}
|
|
52
|
+
if (options?.endRevId) {
|
|
53
|
+
params.set("rvendid", String(options.endRevId));
|
|
54
|
+
}
|
|
55
|
+
if (rvcontinue) {
|
|
56
|
+
params.set("rvcontinue", rvcontinue);
|
|
57
|
+
}
|
|
58
|
+
const url = `${this.apiUrl}?${params.toString()}`;
|
|
59
|
+
const response = await this.fetch(url);
|
|
60
|
+
const data = await response.json();
|
|
61
|
+
if (!data.query?.pages) {
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
for (const page of Object.values(data.query.pages)) {
|
|
65
|
+
if (page.missing)
|
|
66
|
+
continue;
|
|
67
|
+
if (!pageInfo) {
|
|
68
|
+
pageInfo = { pageId: page.pageid, title: page.title };
|
|
69
|
+
}
|
|
70
|
+
if (page.revisions) {
|
|
71
|
+
for (const rev of page.revisions) {
|
|
72
|
+
revisions.push(this.mapRevision(rev, pageInfo));
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
if (data.continue?.rvcontinue) {
|
|
77
|
+
rvcontinue = data.continue.rvcontinue;
|
|
78
|
+
if (revisions.length >= (options?.limit ?? MAX_REVISIONS_PER_REQUEST))
|
|
79
|
+
break;
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return revisions;
|
|
86
|
+
}
|
|
87
|
+
async fetchPageMoves(pageTitle) {
|
|
88
|
+
const moves = [];
|
|
89
|
+
let lecontinue;
|
|
90
|
+
while (true) {
|
|
91
|
+
const params = new URLSearchParams({
|
|
92
|
+
action: "query",
|
|
93
|
+
list: "logevents",
|
|
94
|
+
letype: "move",
|
|
95
|
+
letitle: pageTitle,
|
|
96
|
+
lelimit: "50",
|
|
97
|
+
format: "json",
|
|
98
|
+
formatversion: "2",
|
|
99
|
+
});
|
|
100
|
+
if (lecontinue)
|
|
101
|
+
params.set("lecontinue", lecontinue);
|
|
102
|
+
const url = `${this.apiUrl}?${params.toString()}`;
|
|
103
|
+
const response = await this.fetch(url);
|
|
104
|
+
const data = (await response.json());
|
|
105
|
+
if (!data.query?.logevents)
|
|
106
|
+
break;
|
|
107
|
+
for (const entry of data.query.logevents) {
|
|
108
|
+
moves.push({
|
|
109
|
+
oldTitle: entry.title,
|
|
110
|
+
newTitle: entry.params?.target_title ?? "",
|
|
111
|
+
timestamp: entry.timestamp,
|
|
112
|
+
revId: entry.logid,
|
|
113
|
+
comment: entry.comment ?? "",
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
if (data.continue?.lecontinue) {
|
|
117
|
+
lecontinue = data.continue.lecontinue;
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
break;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return moves;
|
|
124
|
+
}
|
|
125
|
+
async fetchProtectionLogs(pageTitle) {
|
|
126
|
+
const events = [];
|
|
127
|
+
let lecontinue;
|
|
128
|
+
while (true) {
|
|
129
|
+
const params = new URLSearchParams({
|
|
130
|
+
action: "query",
|
|
131
|
+
list: "logevents",
|
|
132
|
+
letype: "protect",
|
|
133
|
+
letitle: pageTitle,
|
|
134
|
+
lelimit: "50",
|
|
135
|
+
leprop: "details",
|
|
136
|
+
format: "json",
|
|
137
|
+
formatversion: "2",
|
|
138
|
+
});
|
|
139
|
+
if (lecontinue)
|
|
140
|
+
params.set("lecontinue", lecontinue);
|
|
141
|
+
const url = `${this.apiUrl}?${params.toString()}`;
|
|
142
|
+
const response = await this.fetch(url);
|
|
143
|
+
const data = (await response.json());
|
|
144
|
+
if (data.query?.logevents) {
|
|
145
|
+
for (const entry of data.query.logevents) {
|
|
146
|
+
const level = entry.params?.detail?.[0]?.level;
|
|
147
|
+
events.push({
|
|
148
|
+
logId: entry.logid,
|
|
149
|
+
pageTitle: entry.title,
|
|
150
|
+
timestamp: entry.timestamp,
|
|
151
|
+
comment: entry.comment ?? "",
|
|
152
|
+
action: entry.action,
|
|
153
|
+
level,
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
if (data.continue?.lecontinue) {
|
|
158
|
+
lecontinue = data.continue.lecontinue;
|
|
159
|
+
}
|
|
160
|
+
else {
|
|
161
|
+
break;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return events;
|
|
165
|
+
}
|
|
166
|
+
async fetchDiff(fromRevId, toRevId) {
|
|
167
|
+
const params = new URLSearchParams({
|
|
168
|
+
action: "compare",
|
|
169
|
+
fromrev: String(fromRevId),
|
|
170
|
+
torev: String(toRevId),
|
|
171
|
+
format: "json",
|
|
172
|
+
formatversion: "2",
|
|
173
|
+
});
|
|
174
|
+
const url = `${this.apiUrl}?${params.toString()}`;
|
|
175
|
+
const response = await this.fetch(url);
|
|
176
|
+
const data = await response.json();
|
|
177
|
+
if (!data.compare) {
|
|
178
|
+
throw new Error(`Failed to fetch diff for revisions ${fromRevId} -> ${toRevId}`);
|
|
179
|
+
}
|
|
180
|
+
const sizeDelta = data.compare.tosize - data.compare.fromsize;
|
|
181
|
+
const lines = data.compare["*"] ? parseUnifiedDiff(data.compare["*"]) : [];
|
|
182
|
+
return {
|
|
183
|
+
fromRevId: data.compare.fromrevid,
|
|
184
|
+
toRevId: data.compare.torevid,
|
|
185
|
+
lines,
|
|
186
|
+
sections: [],
|
|
187
|
+
sizeDelta,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
async fetch(url, retries = 3) {
|
|
191
|
+
for (let attempt = 0; attempt < retries; attempt++) {
|
|
192
|
+
await this.rateLimiter.acquire();
|
|
193
|
+
const headers = {
|
|
194
|
+
"User-Agent": this.userAgent,
|
|
195
|
+
Accept: "application/json",
|
|
196
|
+
"Accept-Encoding": "gzip",
|
|
197
|
+
};
|
|
198
|
+
if (this.auth?.apiKey) {
|
|
199
|
+
headers.Authorization = `Bearer ${this.auth.apiKey}`;
|
|
200
|
+
}
|
|
201
|
+
else if (this.auth?.apiUser && this.auth?.apiPassword) {
|
|
202
|
+
const encoded = btoa(`${this.auth.apiUser}:${this.auth.apiPassword}`);
|
|
203
|
+
headers.Authorization = `Basic ${encoded}`;
|
|
204
|
+
}
|
|
205
|
+
if (this.auth?.oauthClientId && this.auth?.oauthClientSecret) {
|
|
206
|
+
headers["X-OAuth-Client-Id"] = this.auth.oauthClientId;
|
|
207
|
+
headers["X-OAuth-Client-Secret"] = this.auth.oauthClientSecret;
|
|
208
|
+
}
|
|
209
|
+
const response = await fetch(url, {
|
|
210
|
+
headers,
|
|
211
|
+
signal: AbortSignal.timeout(30000),
|
|
212
|
+
});
|
|
213
|
+
if (response.ok)
|
|
214
|
+
return response;
|
|
215
|
+
if (response.status === 429) {
|
|
216
|
+
const retryAfter = response.headers.get("Retry-After");
|
|
217
|
+
const waitMs = retryAfter ? parseInt(retryAfter, 10) * 1000 : 1000;
|
|
218
|
+
if (attempt < retries - 1) {
|
|
219
|
+
await this.sleep(waitMs);
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (response.status >= 500 && attempt < retries - 1) {
|
|
224
|
+
await this.sleep(2 ** attempt * 1000);
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
throw new Error(`MediaWiki API error: ${response.status} ${response.statusText} for ${url}`);
|
|
228
|
+
}
|
|
229
|
+
throw new Error(`MediaWiki API request failed after ${retries} retries for ${url}`);
|
|
230
|
+
}
|
|
231
|
+
sleep(ms) {
|
|
232
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
233
|
+
}
|
|
234
|
+
async *revisions(pageTitle, options) {
|
|
235
|
+
const revs = await this.fetchRevisions(pageTitle, options);
|
|
236
|
+
for (const rev of revs) {
|
|
237
|
+
yield rev;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
mapRevision(raw, page) {
|
|
241
|
+
const content = raw.slots?.main?.content ?? "";
|
|
242
|
+
return {
|
|
243
|
+
revId: raw.revid,
|
|
244
|
+
pageId: page.pageId,
|
|
245
|
+
pageTitle: page.title,
|
|
246
|
+
timestamp: raw.timestamp,
|
|
247
|
+
user: raw.userhidden ? undefined : raw.user,
|
|
248
|
+
comment: raw.comment ?? "",
|
|
249
|
+
content,
|
|
250
|
+
size: raw.size,
|
|
251
|
+
minor: raw.minor ?? false,
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
function formatTimestamp(date) {
|
|
256
|
+
const iso = date.toISOString();
|
|
257
|
+
return `${iso.slice(0, -5)}Z`;
|
|
258
|
+
}
|
|
259
|
+
function parseUnifiedDiff(diffText) {
|
|
260
|
+
const lines = [];
|
|
261
|
+
const textLines = diffText.split("\n");
|
|
262
|
+
let fromLine = 0;
|
|
263
|
+
let toLine = 0;
|
|
264
|
+
for (const line of textLines) {
|
|
265
|
+
if (line.startsWith("@@")) {
|
|
266
|
+
const match = line.match(/@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
|
|
267
|
+
if (match) {
|
|
268
|
+
fromLine = parseInt(match[1], 10);
|
|
269
|
+
toLine = parseInt(match[2], 10);
|
|
270
|
+
}
|
|
271
|
+
continue;
|
|
272
|
+
}
|
|
273
|
+
if (line.startsWith("---") || line.startsWith("+++")) {
|
|
274
|
+
continue;
|
|
275
|
+
}
|
|
276
|
+
if (line.startsWith(" ")) {
|
|
277
|
+
lines.push({ type: "unchanged", content: line.slice(1), lineNumber: toLine });
|
|
278
|
+
fromLine++;
|
|
279
|
+
toLine++;
|
|
280
|
+
}
|
|
281
|
+
else if (line.startsWith("-")) {
|
|
282
|
+
lines.push({ type: "removed", content: line.slice(1), lineNumber: fromLine });
|
|
283
|
+
fromLine++;
|
|
284
|
+
}
|
|
285
|
+
else if (line.startsWith("+")) {
|
|
286
|
+
lines.push({ type: "added", content: line.slice(1), lineNumber: toLine });
|
|
287
|
+
toLine++;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return lines;
|
|
291
|
+
}
|
|
292
|
+
//# sourceMappingURL=mediawiki-client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mediawiki-client.js","sourceRoot":"","sources":["../../src/mediawiki-client.ts"],"names":[],"mappings":"AAWA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEhD,MAAM,eAAe,GAAG,oCAAoC,CAAC;AAC7D,MAAM,kBAAkB,GAAG,kFAAkF,CAAC;AAC9G,MAAM,yBAAyB,GAAG,GAAG,CAAC;AAgEtC,MAAM,OAAO,eAAe;IAClB,WAAW,CAAc;IACzB,SAAS,CAAS;IAClB,MAAM,CAAS;IACf,IAAI,CAAc;IAE1B,YAAY,OAAyF;QACnG,IAAI,CAAC,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,eAAe,CAAC;QACjD,IAAI,CAAC,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,kBAAkB,CAAC;QAC1D,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC,OAAO,EAAE,UAAU,IAAI,GAAG,CAAC,CAAC;QAC/D,IAAI,CAAC,IAAI,GAAG,OAAO,EAAE,IAAI,CAAC;IAC5B,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,SAAiB,EAAE,OAAyB,EAAE,UAAmB;QACxF,MAAM,MAAM,GAAG,UAAU,IAAI,OAAO,CAAC;QACrC,MAAM,SAAS,GAAG,GAAG,MAAM,GAAG,SAAS,EAAE,CAAC;QAC1C,OAAO,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IACjD,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,SAAiB,EAAE,OAAyB;QAC/D,MAAM,SAAS,GAAe,EAAE,CAAC;QACjC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,KAAK,IAAI,yBAAyB,EAAE,yBAAyB,CAAC,CAAC;QAC/F,IAAI,UAA8B,CAAC;QAEnC,IAAI,QAAQ,GAAoB,IAAI,CAAC;QAErC,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;gBACjC,MAAM,EAAE,OAAO;gBACf,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE,SAAS;gBACjB,MAAM,EAAE,+CAA+C;gBACvD,OAAO,EAAE,MAAM;gBACf,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC;gBACtB,MAAM,EAAE,MAAM;gBACd,aAAa,EAAE,GAAG;aACnB,CAAC,CAAC;YAEH,MAAM,OAAO,GAAG,OAAO,EAAE,SAAS,KAAK,OAAO,CAAC;YAC/C,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YAEjD,IAAI,OAAO,EAAE,KAAK,IAAI,OAAO,EAAE,GAAG,EAAE,CAAC;gBACnC,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC9E,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;YAC9E,CAAC;iBAAM,IAAI,OAAO,EAAE,KAAK,EAAE,CAAC;gBAC1B,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,eAAe,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;YACxD,CAAC;iBAAM,IAAI,OAAO,EAAE,GAAG,EAAE,CAAC;gBACxB,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;YACpD,CAAC;YACD,IAAI,OAAO,EAAE,UAAU,EAAE,CAAC;gBACxB,MAAM,CAAC,GAAG,CAAC,WAAW,EAAE,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;YACtD,CAAC;YACD,IAAI,OAAO,EAAE,QAAQ,EAAE,CAAC;gBACtB,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;YAClD,CAAC;YAED,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;YACvC,CAAC;YAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC;YAClD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACvC,MAAM,IAAI,GAA0B,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAE1D,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,CAAC;gBACvB,MAAM;YACR,CAAC;YAED,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBACnD,IAAI,IAAI,CAAC,OAAO;oBAAE,SAAS;gBAC3B,IAAI,CAAC,QAAQ,EAAE,CAAC;oBACd,QAAQ,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;gBACxD,CAAC;gBACD,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;oBACnB,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;wBACjC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC,CAAC;oBAClD,CAAC;gBACH,CAAC;YACH,CAAC;YAED,IAAI,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;gBAC9B,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;gBACtC,IAAI,SAAS,CAAC,MAAM,IAAI,CAAC,OAAO,EAAE,KAAK,IAAI,yBAAyB,CAAC;oBAAE,MAAM;YAC/E,CAAC;iBAAM,CAAC;gBACN,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,SAAiB;QACpC,MAAM,KAAK,GAAe,EAAE,CAAC;QAC7B,IAAI,UAA8B,CAAC;QAEnC,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;gBACjC,MAAM,EAAE,OAAO;gBACf,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE,SAAS;gBAClB,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE,MAAM;gBACd,aAAa,EAAE,GAAG;aACnB,CAAC,CAAC;YAEH,IAAI,UAAU;gBAAE,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;YAErD,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC;YAClD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACvC,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAElC,CAAC;YAEF,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,SAAS;gBAAE,MAAM;YAElC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;gBACzC,KAAK,CAAC,IAAI,CAAC;oBACT,QAAQ,EAAE,KAAK,CAAC,KAAK;oBACrB,QAAQ,EAAE,KAAK,CAAC,MAAM,EAAE,YAAY,IAAI,EAAE;oBAC1C,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,OAAO,EAAE,KAAK,CAAC,OAAO,IAAI,EAAE;iBAC7B,CAAC,CAAC;YACL,CAAC;YAED,IAAI,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;gBAC9B,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;YACxC,CAAC;iBAAM,CAAC;gBACN,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,mBAAmB,CAAC,SAAiB;QACzC,MAAM,MAAM,GAAyB,EAAE,CAAC;QACxC,IAAI,UAA8B,CAAC;QAEnC,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;gBACjC,MAAM,EAAE,OAAO;gBACf,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE,SAAS;gBACjB,OAAO,EAAE,SAAS;gBAClB,OAAO,EAAE,IAAI;gBACb,MAAM,EAAE,SAAS;gBACjB,MAAM,EAAE,MAAM;gBACd,aAAa,EAAE,GAAG;aACnB,CAAC,CAAC;YAEH,IAAI,UAAU;gBAAE,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;YAErD,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC;YAClD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACvC,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAclC,CAAC;YAEF,IAAI,IAAI,CAAC,KAAK,EAAE,SAAS,EAAE,CAAC;gBAC1B,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;oBACzC,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC;oBAC/C,MAAM,CAAC,IAAI,CAAC;wBACV,KAAK,EAAE,KAAK,CAAC,KAAK;wBAClB,SAAS,EAAE,KAAK,CAAC,KAAK;wBACtB,SAAS,EAAE,KAAK,CAAC,SAAS;wBAC1B,OAAO,EAAE,KAAK,CAAC,OAAO,IAAI,EAAE;wBAC5B,MAAM,EAAE,KAAK,CAAC,MAA4C;wBAC1D,KAAK;qBACN,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,IAAI,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;gBAC9B,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;YACxC,CAAC;iBAAM,CAAC;gBACN,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,SAAiB,EAAE,OAAe;QAChD,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;YACjC,MAAM,EAAE,SAAS;YACjB,OAAO,EAAE,MAAM,CAAC,SAAS,CAAC;YAC1B,KAAK,EAAE,MAAM,CAAC,OAAO,CAAC;YACtB,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,GAAG;SACnB,CAAC,CAAC;QAEH,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC;QAClD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACvC,MAAM,IAAI,GAAoB,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAEpD,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,sCAAsC,SAAS,OAAO,OAAO,EAAE,CAAC,CAAC;QACnF,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC;QAC9D,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE3E,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,SAAS;YACjC,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,OAAO;YAC7B,KAAK;YACL,QAAQ,EAAE,EAAE;YACZ,SAAS;SACV,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,KAAK,CAAC,GAAW,EAAE,OAAO,GAAG,CAAC;QAC1C,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;YACnD,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,CAAC;YACjC,MAAM,OAAO,GAA2B;gBACtC,YAAY,EAAE,IAAI,CAAC,SAAS;gBAC5B,MAAM,EAAE,kBAAkB;gBAC1B,iBAAiB,EAAE,MAAM;aAC1B,CAAC;YAEF,IAAI,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC;gBACtB,OAAO,CAAC,aAAa,GAAG,UAAU,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACvD,CAAC;iBAAM,IAAI,IAAI,CAAC,IAAI,EAAE,OAAO,IAAI,IAAI,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC;gBACxD,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;gBACtE,OAAO,CAAC,aAAa,GAAG,SAAS,OAAO,EAAE,CAAC;YAC7C,CAAC;YAED,IAAI,IAAI,CAAC,IAAI,EAAE,aAAa,IAAI,IAAI,CAAC,IAAI,EAAE,iBAAiB,EAAE,CAAC;gBAC7D,OAAO,CAAC,mBAAmB,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC;gBACvD,OAAO,CAAC,uBAAuB,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC;YACjE,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAChC,OAAO;gBACP,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC;aACnC,CAAC,CAAC;YAEH,IAAI,QAAQ,CAAC,EAAE;gBAAE,OAAO,QAAQ,CAAC;YAEjC,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;gBAC5B,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;gBACvD,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;gBACnE,IAAI,OAAO,GAAG,OAAO,GAAG,CAAC,EAAE,CAAC;oBAC1B,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;oBACzB,SAAS;gBACX,CAAC;YACH,CAAC;YAED,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,IAAI,OAAO,GAAG,OAAO,GAAG,CAAC,EAAE,CAAC;gBACpD,MAAM,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,OAAO,GAAG,IAAI,CAAC,CAAC;gBACtC,SAAS;YACX,CAAC;YAED,MAAM,IAAI,KAAK,CAAC,wBAAwB,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,QAAQ,GAAG,EAAE,CAAC,CAAC;QAC/F,CAAC;QAED,MAAM,IAAI,KAAK,CAAC,sCAAsC,OAAO,gBAAgB,GAAG,EAAE,CAAC,CAAC;IACtF,CAAC;IAEO,KAAK,CAAC,EAAU;QACtB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;IAC3D,CAAC;IAED,KAAK,CAAC,CAAC,SAAS,CAAC,SAAiB,EAAE,OAAyB;QAC3D,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAC3D,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,WAAW,CAAC,GAAgB,EAAE,IAAc;QAClD,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,OAAO,IAAI,EAAE,CAAC;QAC/C,OAAO;YACL,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,SAAS,EAAE,IAAI,CAAC,KAAK;YACrB,SAAS,EAAE,GAAG,CAAC,SAAS;YACxB,IAAI,EAAE,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI;YAC3C,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,EAAE;YAC1B,OAAO;YACP,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,KAAK,EAAE,GAAG,CAAC,KAAK,IAAI,KAAK;SAC1B,CAAC;IACJ,CAAC;CACF;AAED,SAAS,eAAe,CAAC,IAAU;IACjC,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IAC/B,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;AAChC,CAAC;AAED,SAAS,gBAAgB,CAAC,QAAgB;IACxC,MAAM,KAAK,GAAe,EAAE,CAAC;IAC7B,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAEvC,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,MAAM,GAAG,CAAC,CAAC;IAEf,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,wCAAwC,CAAC,CAAC;YACnE,IAAI,KAAK,EAAE,CAAC;gBACV,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAClC,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAClC,CAAC;YACD,SAAS;QACX,CAAC;QAED,IAAI,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YACrD,SAAS;QACX,CAAC;QAED,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACzB,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;YAC9E,QAAQ,EAAE,CAAC;YACX,MAAM,EAAE,CAAC;QACX,CAAC;aAAM,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAChC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC,CAAC;YAC9E,QAAQ,EAAE,CAAC;QACb,CAAC;aAAM,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAChC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;YAC1E,MAAM,EAAE,CAAC;QACX,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rate-limiter.d.ts","sourceRoot":"","sources":["../../src/rate-limiter.ts"],"names":[],"mappings":"AAAA,qBAAa,WAAW;IAGV,OAAO,CAAC,UAAU;IAF9B,OAAO,CAAC,QAAQ,CAAS;gBAEL,UAAU,GAAE,MAAY;IAItC,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAkB9B,OAAO,CAAC,KAAK;CAGd"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export class RateLimiter {
|
|
2
|
+
minDelayMs;
|
|
3
|
+
nextSlot;
|
|
4
|
+
constructor(minDelayMs = 100) {
|
|
5
|
+
this.minDelayMs = minDelayMs;
|
|
6
|
+
this.nextSlot = Date.now();
|
|
7
|
+
}
|
|
8
|
+
async acquire() {
|
|
9
|
+
const now = Date.now();
|
|
10
|
+
let slot;
|
|
11
|
+
if (this.nextSlot <= now) {
|
|
12
|
+
slot = now;
|
|
13
|
+
this.nextSlot = now + this.minDelayMs;
|
|
14
|
+
}
|
|
15
|
+
else {
|
|
16
|
+
slot = this.nextSlot;
|
|
17
|
+
this.nextSlot += this.minDelayMs;
|
|
18
|
+
}
|
|
19
|
+
const waitMs = slot - now;
|
|
20
|
+
if (waitMs > 0) {
|
|
21
|
+
await this.sleep(waitMs);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
sleep(ms) {
|
|
25
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=rate-limiter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rate-limiter.js","sourceRoot":"","sources":["../../src/rate-limiter.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,WAAW;IAGF;IAFZ,QAAQ,CAAS;IAEzB,YAAoB,aAAqB,GAAG;QAAxB,eAAU,GAAV,UAAU,CAAc;QAC1C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,CAAC;IAED,KAAK,CAAC,OAAO;QACX,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,IAAI,IAAY,CAAC;QAEjB,IAAI,IAAI,CAAC,QAAQ,IAAI,GAAG,EAAE,CAAC;YACzB,IAAI,GAAG,GAAG,CAAC;YACX,IAAI,CAAC,QAAQ,GAAG,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC;QACxC,CAAC;aAAM,CAAC;YACN,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC;YACrB,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,UAAU,CAAC;QACnC,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,GAAG,GAAG,CAAC;QAC1B,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;YACf,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,EAAU;QACtB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;IAC3D,CAAC;CACF"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { EvidenceEvent } from "@refract-org/evidence-graph";
|
|
2
|
+
export interface WikidataEntity {
|
|
3
|
+
qid: string;
|
|
4
|
+
label: string;
|
|
5
|
+
description: string;
|
|
6
|
+
aliases: string[];
|
|
7
|
+
instanceOf: string[];
|
|
8
|
+
claims: Record<string, WikidataClaim>;
|
|
9
|
+
}
|
|
10
|
+
export interface WikidataClaim {
|
|
11
|
+
property: string;
|
|
12
|
+
propertyLabel: string;
|
|
13
|
+
values: WikidataValue[];
|
|
14
|
+
}
|
|
15
|
+
export interface WikidataValue {
|
|
16
|
+
type: "wikibase-item" | "string" | "time" | "quantity" | "url";
|
|
17
|
+
value: string;
|
|
18
|
+
}
|
|
19
|
+
export interface PageToEntityMap {
|
|
20
|
+
pageTitle: string;
|
|
21
|
+
qid: string;
|
|
22
|
+
entity?: WikidataEntity;
|
|
23
|
+
}
|
|
24
|
+
export declare function fetchWikidataId(pageTitle: string): Promise<string | null>;
|
|
25
|
+
export declare function fetchWikidataEntity(qid: string): Promise<WikidataEntity | null>;
|
|
26
|
+
export declare function mapPageToEntity(pageTitle: string): Promise<PageToEntityMap>;
|
|
27
|
+
export declare function mapPagesToEntities(pageTitles: string[], concurrency?: number): Promise<PageToEntityMap[]>;
|
|
28
|
+
export declare function wikidataEntityToEvents(entity: WikidataEntity, _pageTitle: string): EvidenceEvent[];
|
|
29
|
+
//# sourceMappingURL=wikidata-mapper.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"wikidata-mapper.d.ts","sourceRoot":"","sources":["../../src/wikidata-mapper.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AAKjE,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;CACvC;AAED,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,MAAM,CAAC;IACtB,MAAM,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,eAAe,GAAG,QAAQ,GAAG,MAAM,GAAG,UAAU,GAAG,KAAK,CAAC;IAC/D,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,cAAc,CAAC;CACzB;AAED,wBAAsB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAoB/E;AAED,wBAAsB,mBAAmB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC,CAgBrF;AAED,wBAAsB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAKjF;AAED,wBAAsB,kBAAkB,CAAC,UAAU,EAAE,MAAM,EAAE,EAAE,WAAW,SAAI,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAQ1G;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,GAAG,aAAa,EAAE,CAgClG"}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
const WIKIPEDIA_API = "https://en.wikipedia.org/w/api.php";
|
|
2
|
+
const WIKIDATA_ENTITY_API = "https://www.wikidata.org/wiki/Special:EntityData";
|
|
3
|
+
export async function fetchWikidataId(pageTitle) {
|
|
4
|
+
const params = new URLSearchParams({
|
|
5
|
+
action: "query",
|
|
6
|
+
prop: "pageprops",
|
|
7
|
+
titles: pageTitle,
|
|
8
|
+
format: "json",
|
|
9
|
+
origin: "*",
|
|
10
|
+
});
|
|
11
|
+
const url = `${WIKIPEDIA_API}?${params}`;
|
|
12
|
+
const res = await fetch(url, { signal: AbortSignal.timeout(15000) });
|
|
13
|
+
if (!res.ok)
|
|
14
|
+
return null;
|
|
15
|
+
const data = (await res.json());
|
|
16
|
+
const pages = data.query?.pages;
|
|
17
|
+
if (!pages)
|
|
18
|
+
return null;
|
|
19
|
+
for (const id of Object.keys(pages)) {
|
|
20
|
+
if (id === "-1")
|
|
21
|
+
continue;
|
|
22
|
+
return pages[id].pageprops?.wikibase_item ?? null;
|
|
23
|
+
}
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
export async function fetchWikidataEntity(qid) {
|
|
27
|
+
const url = `${WIKIDATA_ENTITY_API}/${encodeURIComponent(qid)}.json`;
|
|
28
|
+
const res = await fetch(url, { signal: AbortSignal.timeout(15000) });
|
|
29
|
+
if (!res.ok)
|
|
30
|
+
return null;
|
|
31
|
+
const data = (await res.json());
|
|
32
|
+
const entity = data.entities?.[qid];
|
|
33
|
+
if (!entity)
|
|
34
|
+
return null;
|
|
35
|
+
return {
|
|
36
|
+
qid,
|
|
37
|
+
label: entity.labels?.en?.value ?? qid,
|
|
38
|
+
description: entity.descriptions?.en?.value ?? "",
|
|
39
|
+
aliases: Object.values(entity.aliases?.en ?? {}).map((a) => a.value),
|
|
40
|
+
instanceOf: extractInstanceOf(entity),
|
|
41
|
+
claims: extractClaims(entity),
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
export async function mapPageToEntity(pageTitle) {
|
|
45
|
+
const qid = await fetchWikidataId(pageTitle);
|
|
46
|
+
if (!qid)
|
|
47
|
+
return { pageTitle, qid: "" };
|
|
48
|
+
const entity = await fetchWikidataEntity(qid);
|
|
49
|
+
return { pageTitle, qid, entity: entity ?? undefined };
|
|
50
|
+
}
|
|
51
|
+
export async function mapPagesToEntities(pageTitles, concurrency = 3) {
|
|
52
|
+
const results = [];
|
|
53
|
+
for (let i = 0; i < pageTitles.length; i += concurrency) {
|
|
54
|
+
const batch = pageTitles.slice(i, i + concurrency);
|
|
55
|
+
const mapped = await Promise.all(batch.map((title) => mapPageToEntity(title)));
|
|
56
|
+
results.push(...mapped);
|
|
57
|
+
}
|
|
58
|
+
return results;
|
|
59
|
+
}
|
|
60
|
+
export function wikidataEntityToEvents(entity, _pageTitle) {
|
|
61
|
+
const events = [];
|
|
62
|
+
const props = Object.keys(entity.claims).join(", ");
|
|
63
|
+
const instanceOf = entity.instanceOf.join(", ");
|
|
64
|
+
events.push({
|
|
65
|
+
eventType: "sentence_first_seen",
|
|
66
|
+
fromRevisionId: 0,
|
|
67
|
+
toRevisionId: 0,
|
|
68
|
+
section: "",
|
|
69
|
+
before: "",
|
|
70
|
+
after: `Wikidata entity: ${entity.label}`,
|
|
71
|
+
deterministicFacts: [{ fact: "wikidata_entity_linked", detail: `qid=${entity.qid} label=${entity.label}` }],
|
|
72
|
+
layer: "observed",
|
|
73
|
+
timestamp: new Date().toISOString(),
|
|
74
|
+
});
|
|
75
|
+
if (instanceOf) {
|
|
76
|
+
events.push({
|
|
77
|
+
eventType: "category_added",
|
|
78
|
+
fromRevisionId: 0,
|
|
79
|
+
toRevisionId: 0,
|
|
80
|
+
section: "",
|
|
81
|
+
before: "",
|
|
82
|
+
after: instanceOf,
|
|
83
|
+
deterministicFacts: [{ fact: "wikidata_instance_of", detail: `types=${instanceOf} properties=${props}` }],
|
|
84
|
+
layer: "observed",
|
|
85
|
+
timestamp: new Date().toISOString(),
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
return events;
|
|
89
|
+
}
|
|
90
|
+
function extractInstanceOf(entity) {
|
|
91
|
+
const p31 = entity?.claims?.P31;
|
|
92
|
+
if (!p31)
|
|
93
|
+
return [];
|
|
94
|
+
return p31
|
|
95
|
+
.filter((c) => c.mainsnak?.snaktype === "value" && c.mainsnak?.datavalue?.type === "wikibase-item")
|
|
96
|
+
.map((c) => {
|
|
97
|
+
const dt = c.mainsnak?.datavalue;
|
|
98
|
+
if (!dt || dt.type !== "wikibase-item")
|
|
99
|
+
return "";
|
|
100
|
+
return dt.value.id;
|
|
101
|
+
})
|
|
102
|
+
.filter(Boolean);
|
|
103
|
+
}
|
|
104
|
+
function extractClaims(entity) {
|
|
105
|
+
const result = {};
|
|
106
|
+
if (!entity?.claims)
|
|
107
|
+
return result;
|
|
108
|
+
for (const [prop, statements] of Object.entries(entity.claims)) {
|
|
109
|
+
const values = [];
|
|
110
|
+
for (const stmt of statements) {
|
|
111
|
+
if (stmt.mainsnak?.snaktype !== "value" || !stmt.mainsnak?.datavalue)
|
|
112
|
+
continue;
|
|
113
|
+
const dt = stmt.mainsnak.datavalue;
|
|
114
|
+
switch (dt.type) {
|
|
115
|
+
case "wikibase-item":
|
|
116
|
+
values.push({ type: "wikibase-item", value: dt.value.id });
|
|
117
|
+
break;
|
|
118
|
+
case "string":
|
|
119
|
+
values.push({ type: "string", value: dt.value });
|
|
120
|
+
break;
|
|
121
|
+
case "time":
|
|
122
|
+
values.push({ type: "time", value: dt.value.time });
|
|
123
|
+
break;
|
|
124
|
+
case "quantity":
|
|
125
|
+
values.push({ type: "quantity", value: String(dt.value.amount) });
|
|
126
|
+
break;
|
|
127
|
+
case "url":
|
|
128
|
+
values.push({ type: "url", value: dt.value });
|
|
129
|
+
break;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
if (values.length > 0) {
|
|
133
|
+
result[prop] = { property: prop, propertyLabel: prop, values };
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return result;
|
|
137
|
+
}
|
|
138
|
+
//# sourceMappingURL=wikidata-mapper.js.map
|