@refract-org/ingestion 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +27 -0
  2. package/dist/src/index.d.ts +49 -0
  3. package/dist/src/index.d.ts.map +1 -0
  4. package/dist/src/index.js +5 -0
  5. package/dist/src/index.js.map +1 -0
  6. package/dist/src/mediawiki-client.d.ts +24 -0
  7. package/dist/src/mediawiki-client.d.ts.map +1 -0
  8. package/dist/src/mediawiki-client.js +292 -0
  9. package/dist/src/mediawiki-client.js.map +1 -0
  10. package/dist/src/rate-limiter.d.ts +8 -0
  11. package/dist/src/rate-limiter.d.ts.map +1 -0
  12. package/dist/src/rate-limiter.js +28 -0
  13. package/dist/src/rate-limiter.js.map +1 -0
  14. package/dist/src/wikidata-mapper.d.ts +29 -0
  15. package/dist/src/wikidata-mapper.d.ts.map +1 -0
  16. package/dist/src/wikidata-mapper.js +138 -0
  17. package/dist/src/wikidata-mapper.js.map +1 -0
  18. package/dist/src/xml-dump-source.d.ts +8 -0
  19. package/dist/src/xml-dump-source.d.ts.map +1 -0
  20. package/dist/src/xml-dump-source.js +77 -0
  21. package/dist/src/xml-dump-source.js.map +1 -0
  22. package/dist/tsconfig 2.tsbuildinfo +1 -0
  23. package/dist/tsconfig.tsbuildinfo +1 -0
  24. package/package.json +28 -0
  25. package/src/__tests__/auth-integration.test.ts +59 -0
  26. package/src/__tests__/integration.test.ts +95 -0
  27. package/src/__tests__/mediawiki-client.test.ts +113 -0
  28. package/src/__tests__/page-move.test.ts +31 -0
  29. package/src/__tests__/rate-limiter.test.ts +30 -0
  30. package/src/__tests__/talk-page.test.ts +46 -0
  31. package/src/__tests__/wikidata-mapper.test.ts +134 -0
  32. package/src/__tests__/xml-dump-source.test.ts +151 -0
  33. package/src/index.ts +63 -0
  34. package/src/mediawiki-client.ts +420 -0
  35. package/src/rate-limiter.ts +29 -0
  36. package/src/wikidata-mapper.ts +197 -0
  37. package/src/xml-dump-source.ts +89 -0
@@ -0,0 +1,420 @@
1
+ import type { DiffLine, DiffResult, Revision } from "@refract-org/evidence-graph";
2
+ import type {
3
+ AuthConfig,
4
+ DiffFetcher,
5
+ MoveFetcher,
6
+ PageMove,
7
+ ProtectionLogEvent,
8
+ RevisionFetcher,
9
+ RevisionOptions,
10
+ RevisionSource,
11
+ } from "./index.js";
12
+ import { RateLimiter } from "./rate-limiter.js";
13
+
14
+ const DEFAULT_API_URL = "https://en.wikipedia.org/w/api.php";
15
+ const DEFAULT_USER_AGENT = "Refract/0.1.0 (https://github.com/refract-org/var-ia; sequent@nextconsensus.com)";
16
+ const MAX_REVISIONS_PER_REQUEST = 500;
17
+
18
+ interface PageInfo {
19
+ pageId: number;
20
+ title: string;
21
+ }
22
+
23
+ interface RawRevision {
24
+ revid: number;
25
+ parentid: number;
26
+ timestamp: string;
27
+ comment: string;
28
+ size: number;
29
+ minor?: boolean;
30
+ user?: string;
31
+ userhidden?: boolean;
32
+ slots?: {
33
+ main?: {
34
+ content?: string;
35
+ };
36
+ };
37
+ }
38
+
39
+ interface RevisionQueryResponse {
40
+ query?: {
41
+ pages?: Record<
42
+ string,
43
+ {
44
+ pageid: number;
45
+ title: string;
46
+ revisions?: RawRevision[];
47
+ missing?: string;
48
+ }
49
+ >;
50
+ };
51
+ continue?: {
52
+ rvcontinue: string;
53
+ };
54
+ }
55
+
56
+ interface LogEventResponse {
57
+ query?: {
58
+ logevents?: {
59
+ logid: number;
60
+ title: string;
61
+ timestamp: string;
62
+ comment: string;
63
+ params?: {
64
+ target_title: string;
65
+ };
66
+ }[];
67
+ };
68
+ }
69
+
70
+ interface CompareResponse {
71
+ compare?: {
72
+ fromrevid: number;
73
+ torevid: number;
74
+ fromsize: number;
75
+ tosize: number;
76
+ "*"?: string;
77
+ };
78
+ }
79
+
80
+ export class MediaWikiClient implements RevisionFetcher, RevisionSource, DiffFetcher, MoveFetcher {
81
+ private rateLimiter: RateLimiter;
82
+ private userAgent: string;
83
+ private apiUrl: string;
84
+ private auth?: AuthConfig;
85
+
86
+ constructor(options?: { apiUrl?: string; userAgent?: string; minDelayMs?: number; auth?: AuthConfig }) {
87
+ this.apiUrl = options?.apiUrl ?? DEFAULT_API_URL;
88
+ this.userAgent = options?.userAgent ?? DEFAULT_USER_AGENT;
89
+ this.rateLimiter = new RateLimiter(options?.minDelayMs ?? 100);
90
+ this.auth = options?.auth;
91
+ }
92
+
93
+ async fetchTalkRevisions(pageTitle: string, options?: RevisionOptions, talkPrefix?: string): Promise<Revision[]> {
94
+ const prefix = talkPrefix ?? "Talk:";
95
+ const talkTitle = `${prefix}${pageTitle}`;
96
+ return this.fetchRevisions(talkTitle, options);
97
+ }
98
+
99
+ async fetchRevisions(pageTitle: string, options?: RevisionOptions): Promise<Revision[]> {
100
+ const revisions: Revision[] = [];
101
+ const limit = Math.min(options?.limit ?? MAX_REVISIONS_PER_REQUEST, MAX_REVISIONS_PER_REQUEST);
102
+ let rvcontinue: string | undefined;
103
+
104
+ let pageInfo: PageInfo | null = null;
105
+
106
+ while (true) {
107
+ const params = new URLSearchParams({
108
+ action: "query",
109
+ prop: "revisions",
110
+ titles: pageTitle,
111
+ rvprop: "content|ids|timestamp|flags|comment|size|user",
112
+ rvslots: "main",
113
+ rvlimit: String(limit),
114
+ format: "json",
115
+ formatversion: "2",
116
+ });
117
+
118
+ const isNewer = options?.direction === "newer";
119
+ params.set("rvdir", isNewer ? "newer" : "older");
120
+
121
+ if (options?.start && options?.end) {
122
+ params.set("rvstart", formatTimestamp(isNewer ? options.start : options.end));
123
+ params.set("rvend", formatTimestamp(isNewer ? options.end : options.start));
124
+ } else if (options?.start) {
125
+ params.set("rvstart", formatTimestamp(options.start));
126
+ } else if (options?.end) {
127
+ params.set("rvend", formatTimestamp(options.end));
128
+ }
129
+ if (options?.startRevId) {
130
+ params.set("rvstartid", String(options.startRevId));
131
+ }
132
+ if (options?.endRevId) {
133
+ params.set("rvendid", String(options.endRevId));
134
+ }
135
+
136
+ if (rvcontinue) {
137
+ params.set("rvcontinue", rvcontinue);
138
+ }
139
+
140
+ const url = `${this.apiUrl}?${params.toString()}`;
141
+ const response = await this.fetch(url);
142
+ const data: RevisionQueryResponse = await response.json();
143
+
144
+ if (!data.query?.pages) {
145
+ break;
146
+ }
147
+
148
+ for (const page of Object.values(data.query.pages)) {
149
+ if (page.missing) continue;
150
+ if (!pageInfo) {
151
+ pageInfo = { pageId: page.pageid, title: page.title };
152
+ }
153
+ if (page.revisions) {
154
+ for (const rev of page.revisions) {
155
+ revisions.push(this.mapRevision(rev, pageInfo));
156
+ }
157
+ }
158
+ }
159
+
160
+ if (data.continue?.rvcontinue) {
161
+ rvcontinue = data.continue.rvcontinue;
162
+ if (revisions.length >= (options?.limit ?? MAX_REVISIONS_PER_REQUEST)) break;
163
+ } else {
164
+ break;
165
+ }
166
+ }
167
+
168
+ return revisions;
169
+ }
170
+
171
+ async fetchPageMoves(pageTitle: string): Promise<PageMove[]> {
172
+ const moves: PageMove[] = [];
173
+ let lecontinue: string | undefined;
174
+
175
+ while (true) {
176
+ const params = new URLSearchParams({
177
+ action: "query",
178
+ list: "logevents",
179
+ letype: "move",
180
+ letitle: pageTitle,
181
+ lelimit: "50",
182
+ format: "json",
183
+ formatversion: "2",
184
+ });
185
+
186
+ if (lecontinue) params.set("lecontinue", lecontinue);
187
+
188
+ const url = `${this.apiUrl}?${params.toString()}`;
189
+ const response = await this.fetch(url);
190
+ const data = (await response.json()) as LogEventResponse & {
191
+ continue?: { lecontinue: string };
192
+ };
193
+
194
+ if (!data.query?.logevents) break;
195
+
196
+ for (const entry of data.query.logevents) {
197
+ moves.push({
198
+ oldTitle: entry.title,
199
+ newTitle: entry.params?.target_title ?? "",
200
+ timestamp: entry.timestamp,
201
+ revId: entry.logid,
202
+ comment: entry.comment ?? "",
203
+ });
204
+ }
205
+
206
+ if (data.continue?.lecontinue) {
207
+ lecontinue = data.continue.lecontinue;
208
+ } else {
209
+ break;
210
+ }
211
+ }
212
+
213
+ return moves;
214
+ }
215
+
216
+ async fetchProtectionLogs(pageTitle: string): Promise<ProtectionLogEvent[]> {
217
+ const events: ProtectionLogEvent[] = [];
218
+ let lecontinue: string | undefined;
219
+
220
+ while (true) {
221
+ const params = new URLSearchParams({
222
+ action: "query",
223
+ list: "logevents",
224
+ letype: "protect",
225
+ letitle: pageTitle,
226
+ lelimit: "50",
227
+ leprop: "details",
228
+ format: "json",
229
+ formatversion: "2",
230
+ });
231
+
232
+ if (lecontinue) params.set("lecontinue", lecontinue);
233
+
234
+ const url = `${this.apiUrl}?${params.toString()}`;
235
+ const response = await this.fetch(url);
236
+ const data = (await response.json()) as {
237
+ query?: {
238
+ logevents?: Array<{
239
+ logid: number;
240
+ title: string;
241
+ timestamp: string;
242
+ comment: string;
243
+ action: string;
244
+ params?: {
245
+ detail?: Array<{ level?: string; expiry?: string }>;
246
+ };
247
+ }>;
248
+ };
249
+ continue?: { lecontinue: string };
250
+ };
251
+
252
+ if (data.query?.logevents) {
253
+ for (const entry of data.query.logevents) {
254
+ const level = entry.params?.detail?.[0]?.level;
255
+ events.push({
256
+ logId: entry.logid,
257
+ pageTitle: entry.title,
258
+ timestamp: entry.timestamp,
259
+ comment: entry.comment ?? "",
260
+ action: entry.action as "protect" | "unprotect" | "modify",
261
+ level,
262
+ });
263
+ }
264
+ }
265
+
266
+ if (data.continue?.lecontinue) {
267
+ lecontinue = data.continue.lecontinue;
268
+ } else {
269
+ break;
270
+ }
271
+ }
272
+
273
+ return events;
274
+ }
275
+
276
+ async fetchDiff(fromRevId: number, toRevId: number): Promise<DiffResult> {
277
+ const params = new URLSearchParams({
278
+ action: "compare",
279
+ fromrev: String(fromRevId),
280
+ torev: String(toRevId),
281
+ format: "json",
282
+ formatversion: "2",
283
+ });
284
+
285
+ const url = `${this.apiUrl}?${params.toString()}`;
286
+ const response = await this.fetch(url);
287
+ const data: CompareResponse = await response.json();
288
+
289
+ if (!data.compare) {
290
+ throw new Error(`Failed to fetch diff for revisions ${fromRevId} -> ${toRevId}`);
291
+ }
292
+
293
+ const sizeDelta = data.compare.tosize - data.compare.fromsize;
294
+ const lines = data.compare["*"] ? parseUnifiedDiff(data.compare["*"]) : [];
295
+
296
+ return {
297
+ fromRevId: data.compare.fromrevid,
298
+ toRevId: data.compare.torevid,
299
+ lines,
300
+ sections: [],
301
+ sizeDelta,
302
+ };
303
+ }
304
+
305
+ private async fetch(url: string, retries = 3): Promise<Response> {
306
+ for (let attempt = 0; attempt < retries; attempt++) {
307
+ await this.rateLimiter.acquire();
308
+ const headers: Record<string, string> = {
309
+ "User-Agent": this.userAgent,
310
+ Accept: "application/json",
311
+ "Accept-Encoding": "gzip",
312
+ };
313
+
314
+ if (this.auth?.apiKey) {
315
+ headers.Authorization = `Bearer ${this.auth.apiKey}`;
316
+ } else if (this.auth?.apiUser && this.auth?.apiPassword) {
317
+ const encoded = btoa(`${this.auth.apiUser}:${this.auth.apiPassword}`);
318
+ headers.Authorization = `Basic ${encoded}`;
319
+ }
320
+
321
+ if (this.auth?.oauthClientId && this.auth?.oauthClientSecret) {
322
+ headers["X-OAuth-Client-Id"] = this.auth.oauthClientId;
323
+ headers["X-OAuth-Client-Secret"] = this.auth.oauthClientSecret;
324
+ }
325
+
326
+ const response = await fetch(url, {
327
+ headers,
328
+ signal: AbortSignal.timeout(30000),
329
+ });
330
+
331
+ if (response.ok) return response;
332
+
333
+ if (response.status === 429) {
334
+ const retryAfter = response.headers.get("Retry-After");
335
+ const waitMs = retryAfter ? parseInt(retryAfter, 10) * 1000 : 1000;
336
+ if (attempt < retries - 1) {
337
+ await this.sleep(waitMs);
338
+ continue;
339
+ }
340
+ }
341
+
342
+ if (response.status >= 500 && attempt < retries - 1) {
343
+ await this.sleep(2 ** attempt * 1000);
344
+ continue;
345
+ }
346
+
347
+ throw new Error(`MediaWiki API error: ${response.status} ${response.statusText} for ${url}`);
348
+ }
349
+
350
+ throw new Error(`MediaWiki API request failed after ${retries} retries for ${url}`);
351
+ }
352
+
353
+ private sleep(ms: number): Promise<void> {
354
+ return new Promise((resolve) => setTimeout(resolve, ms));
355
+ }
356
+
357
+ async *revisions(pageTitle: string, options?: RevisionOptions): AsyncIterable<Revision> {
358
+ const revs = await this.fetchRevisions(pageTitle, options);
359
+ for (const rev of revs) {
360
+ yield rev;
361
+ }
362
+ }
363
+
364
+ private mapRevision(raw: RawRevision, page: PageInfo): Revision {
365
+ const content = raw.slots?.main?.content ?? "";
366
+ return {
367
+ revId: raw.revid,
368
+ pageId: page.pageId,
369
+ pageTitle: page.title,
370
+ timestamp: raw.timestamp,
371
+ user: raw.userhidden ? undefined : raw.user,
372
+ comment: raw.comment ?? "",
373
+ content,
374
+ size: raw.size,
375
+ minor: raw.minor ?? false,
376
+ };
377
+ }
378
+ }
379
+
380
+ function formatTimestamp(date: Date): string {
381
+ const iso = date.toISOString();
382
+ return `${iso.slice(0, -5)}Z`;
383
+ }
384
+
385
+ function parseUnifiedDiff(diffText: string): DiffLine[] {
386
+ const lines: DiffLine[] = [];
387
+ const textLines = diffText.split("\n");
388
+
389
+ let fromLine = 0;
390
+ let toLine = 0;
391
+
392
+ for (const line of textLines) {
393
+ if (line.startsWith("@@")) {
394
+ const match = line.match(/@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
395
+ if (match) {
396
+ fromLine = parseInt(match[1], 10);
397
+ toLine = parseInt(match[2], 10);
398
+ }
399
+ continue;
400
+ }
401
+
402
+ if (line.startsWith("---") || line.startsWith("+++")) {
403
+ continue;
404
+ }
405
+
406
+ if (line.startsWith(" ")) {
407
+ lines.push({ type: "unchanged", content: line.slice(1), lineNumber: toLine });
408
+ fromLine++;
409
+ toLine++;
410
+ } else if (line.startsWith("-")) {
411
+ lines.push({ type: "removed", content: line.slice(1), lineNumber: fromLine });
412
+ fromLine++;
413
+ } else if (line.startsWith("+")) {
414
+ lines.push({ type: "added", content: line.slice(1), lineNumber: toLine });
415
+ toLine++;
416
+ }
417
+ }
418
+
419
+ return lines;
420
+ }
@@ -0,0 +1,29 @@
1
+ export class RateLimiter {
2
+ private nextSlot: number;
3
+
4
+ constructor(private minDelayMs: number = 100) {
5
+ this.nextSlot = Date.now();
6
+ }
7
+
8
+ async acquire(): Promise<void> {
9
+ const now = Date.now();
10
+ let slot: number;
11
+
12
+ if (this.nextSlot <= now) {
13
+ slot = now;
14
+ this.nextSlot = now + this.minDelayMs;
15
+ } else {
16
+ slot = this.nextSlot;
17
+ this.nextSlot += this.minDelayMs;
18
+ }
19
+
20
+ const waitMs = slot - now;
21
+ if (waitMs > 0) {
22
+ await this.sleep(waitMs);
23
+ }
24
+ }
25
+
26
+ private sleep(ms: number): Promise<void> {
27
+ return new Promise((resolve) => setTimeout(resolve, ms));
28
+ }
29
+ }
@@ -0,0 +1,197 @@
1
+ import type { EvidenceEvent } from "@refract-org/evidence-graph";
2
+
3
+ const WIKIPEDIA_API = "https://en.wikipedia.org/w/api.php";
4
+ const WIKIDATA_ENTITY_API = "https://www.wikidata.org/wiki/Special:EntityData";
5
+
6
+ export interface WikidataEntity {
7
+ qid: string;
8
+ label: string;
9
+ description: string;
10
+ aliases: string[];
11
+ instanceOf: string[];
12
+ claims: Record<string, WikidataClaim>;
13
+ }
14
+
15
+ export interface WikidataClaim {
16
+ property: string;
17
+ propertyLabel: string;
18
+ values: WikidataValue[];
19
+ }
20
+
21
+ export interface WikidataValue {
22
+ type: "wikibase-item" | "string" | "time" | "quantity" | "url";
23
+ value: string;
24
+ }
25
+
26
+ export interface PageToEntityMap {
27
+ pageTitle: string;
28
+ qid: string;
29
+ entity?: WikidataEntity;
30
+ }
31
+
32
+ export async function fetchWikidataId(pageTitle: string): Promise<string | null> {
33
+ const params = new URLSearchParams({
34
+ action: "query",
35
+ prop: "pageprops",
36
+ titles: pageTitle,
37
+ format: "json",
38
+ origin: "*",
39
+ });
40
+ const url = `${WIKIPEDIA_API}?${params}`;
41
+
42
+ const res = await fetch(url, { signal: AbortSignal.timeout(15000) });
43
+ if (!res.ok) return null;
44
+ const data = (await res.json()) as WikipediaQueryResponse;
45
+ const pages = data.query?.pages;
46
+ if (!pages) return null;
47
+ for (const id of Object.keys(pages)) {
48
+ if (id === "-1") continue;
49
+ return pages[id].pageprops?.wikibase_item ?? null;
50
+ }
51
+ return null;
52
+ }
53
+
54
+ export async function fetchWikidataEntity(qid: string): Promise<WikidataEntity | null> {
55
+ const url = `${WIKIDATA_ENTITY_API}/${encodeURIComponent(qid)}.json`;
56
+ const res = await fetch(url, { signal: AbortSignal.timeout(15000) });
57
+ if (!res.ok) return null;
58
+ const data = (await res.json()) as WikidataEntityResponse;
59
+ const entity = data.entities?.[qid];
60
+ if (!entity) return null;
61
+
62
+ return {
63
+ qid,
64
+ label: entity.labels?.en?.value ?? qid,
65
+ description: entity.descriptions?.en?.value ?? "",
66
+ aliases: Object.values(entity.aliases?.en ?? {}).map((a) => a.value),
67
+ instanceOf: extractInstanceOf(entity),
68
+ claims: extractClaims(entity),
69
+ };
70
+ }
71
+
72
+ export async function mapPageToEntity(pageTitle: string): Promise<PageToEntityMap> {
73
+ const qid = await fetchWikidataId(pageTitle);
74
+ if (!qid) return { pageTitle, qid: "" };
75
+ const entity = await fetchWikidataEntity(qid);
76
+ return { pageTitle, qid, entity: entity ?? undefined };
77
+ }
78
+
79
+ export async function mapPagesToEntities(pageTitles: string[], concurrency = 3): Promise<PageToEntityMap[]> {
80
+ const results: PageToEntityMap[] = [];
81
+ for (let i = 0; i < pageTitles.length; i += concurrency) {
82
+ const batch = pageTitles.slice(i, i + concurrency);
83
+ const mapped = await Promise.all(batch.map((title) => mapPageToEntity(title)));
84
+ results.push(...mapped);
85
+ }
86
+ return results;
87
+ }
88
+
89
+ export function wikidataEntityToEvents(entity: WikidataEntity, _pageTitle: string): EvidenceEvent[] {
90
+ const events: EvidenceEvent[] = [];
91
+ const props = Object.keys(entity.claims).join(", ");
92
+ const instanceOf = entity.instanceOf.join(", ");
93
+
94
+ events.push({
95
+ eventType: "sentence_first_seen",
96
+ fromRevisionId: 0,
97
+ toRevisionId: 0,
98
+ section: "",
99
+ before: "",
100
+ after: `Wikidata entity: ${entity.label}`,
101
+ deterministicFacts: [{ fact: "wikidata_entity_linked", detail: `qid=${entity.qid} label=${entity.label}` }],
102
+ layer: "observed",
103
+ timestamp: new Date().toISOString(),
104
+ });
105
+
106
+ if (instanceOf) {
107
+ events.push({
108
+ eventType: "category_added",
109
+ fromRevisionId: 0,
110
+ toRevisionId: 0,
111
+ section: "",
112
+ before: "",
113
+ after: instanceOf,
114
+ deterministicFacts: [{ fact: "wikidata_instance_of", detail: `types=${instanceOf} properties=${props}` }],
115
+ layer: "observed",
116
+ timestamp: new Date().toISOString(),
117
+ });
118
+ }
119
+
120
+ return events;
121
+ }
122
+
123
+ interface WikipediaQueryResponse {
124
+ query?: {
125
+ pages?: Record<string, { pageprops?: { wikibase_item?: string } }>;
126
+ };
127
+ }
128
+
129
+ interface WikidataEntityData {
130
+ labels?: Record<string, { value: string }>;
131
+ descriptions?: Record<string, { value: string }>;
132
+ aliases?: Record<string, Array<{ value: string }>>;
133
+ claims?: Record<string, Array<WikidataStatement>>;
134
+ }
135
+
136
+ interface WikidataStatement {
137
+ mainsnak?: {
138
+ snaktype: string;
139
+ datavalue?: {
140
+ type: string;
141
+ value: unknown;
142
+ };
143
+ datatype?: string;
144
+ };
145
+ }
146
+
147
+ interface WikidataEntityResponse {
148
+ entities?: Record<string, WikidataEntityData>;
149
+ }
150
+
151
+ function extractInstanceOf(entity: WikidataEntityData | undefined): string[] {
152
+ const p31 = entity?.claims?.P31;
153
+ if (!p31) return [];
154
+ return p31
155
+ .filter(
156
+ (c: WikidataStatement) => c.mainsnak?.snaktype === "value" && c.mainsnak?.datavalue?.type === "wikibase-item",
157
+ )
158
+ .map((c: WikidataStatement) => {
159
+ const dt = c.mainsnak?.datavalue;
160
+ if (!dt || dt.type !== "wikibase-item") return "";
161
+ return (dt.value as { id: string }).id;
162
+ })
163
+ .filter(Boolean);
164
+ }
165
+
166
+ function extractClaims(entity: WikidataEntityData | undefined): Record<string, WikidataClaim> {
167
+ const result: Record<string, WikidataClaim> = {};
168
+ if (!entity?.claims) return result;
169
+ for (const [prop, statements] of Object.entries(entity.claims)) {
170
+ const values: WikidataValue[] = [];
171
+ for (const stmt of statements) {
172
+ if (stmt.mainsnak?.snaktype !== "value" || !stmt.mainsnak?.datavalue) continue;
173
+ const dt = stmt.mainsnak.datavalue;
174
+ switch (dt.type) {
175
+ case "wikibase-item":
176
+ values.push({ type: "wikibase-item", value: (dt.value as { id: string }).id });
177
+ break;
178
+ case "string":
179
+ values.push({ type: "string", value: dt.value as string });
180
+ break;
181
+ case "time":
182
+ values.push({ type: "time", value: (dt.value as { time: string }).time });
183
+ break;
184
+ case "quantity":
185
+ values.push({ type: "quantity", value: String((dt.value as { amount: string }).amount) });
186
+ break;
187
+ case "url":
188
+ values.push({ type: "url", value: dt.value as string });
189
+ break;
190
+ }
191
+ }
192
+ if (values.length > 0) {
193
+ result[prop] = { property: prop, propertyLabel: prop, values };
194
+ }
195
+ }
196
+ return result;
197
+ }