@rtbnext/core 2.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +11 -0
- package/dist/abstract/Cache.d.ts +9 -0
- package/dist/abstract/Cache.js +19 -0
- package/dist/abstract/Index.d.ts +22 -0
- package/dist/abstract/Index.js +81 -0
- package/dist/abstract/Job.d.ts +19 -0
- package/dist/abstract/Job.js +49 -0
- package/dist/abstract/Snapshot.d.ts +22 -0
- package/dist/abstract/Snapshot.js +78 -0
- package/dist/bin/cli.d.ts +2 -0
- package/dist/bin/cli.js +25 -0
- package/dist/bin/cron.d.ts +2 -0
- package/dist/bin/cron.js +4 -0
- package/dist/core/Config.d.ts +30 -0
- package/dist/core/Config.js +66 -0
- package/dist/core/Cron.d.ts +12 -0
- package/dist/core/Cron.js +52 -0
- package/dist/core/Fetch.d.ts +28 -0
- package/dist/core/Fetch.js +172 -0
- package/dist/core/Logger.d.ts +30 -0
- package/dist/core/Logger.js +92 -0
- package/dist/core/Queue.d.ts +37 -0
- package/dist/core/Queue.js +136 -0
- package/dist/core/Storage.d.ts +28 -0
- package/dist/core/Storage.js +166 -0
- package/dist/core/Utils.d.ts +33 -0
- package/dist/core/Utils.js +167 -0
- package/dist/interfaces/cache.d.ts +6 -0
- package/dist/interfaces/config.d.ts +21 -0
- package/dist/interfaces/cron.d.ts +3 -0
- package/dist/interfaces/fetch.d.ts +13 -0
- package/dist/interfaces/filter.d.ts +12 -0
- package/dist/interfaces/index.d.ts +30 -0
- package/dist/interfaces/job.d.ts +9 -0
- package/dist/interfaces/list.d.ts +9 -0
- package/dist/interfaces/logger.d.ts +20 -0
- package/dist/interfaces/mover.d.ts +7 -0
- package/dist/interfaces/parser.d.ts +68 -0
- package/dist/interfaces/profile.d.ts +30 -0
- package/dist/interfaces/queue.d.ts +17 -0
- package/dist/interfaces/snapshot.d.ts +16 -0
- package/dist/interfaces/stats.d.ts +45 -0
- package/dist/interfaces/storage.d.ts +16 -0
- package/dist/job/Alias.d.ts +8 -0
- package/dist/job/Alias.js +42 -0
- package/dist/job/Annual.d.ts +8 -0
- package/dist/job/Annual.js +41 -0
- package/dist/job/List.d.ts +11 -0
- package/dist/job/List.js +101 -0
- package/dist/job/Merge.d.ts +10 -0
- package/dist/job/Merge.js +59 -0
- package/dist/job/Move.d.ts +7 -0
- package/dist/job/Move.js +33 -0
- package/dist/job/Performance.d.ts +8 -0
- package/dist/job/Performance.js +27 -0
- package/dist/job/Profile.d.ts +11 -0
- package/dist/job/Profile.js +76 -0
- package/dist/job/Queue.d.ts +8 -0
- package/dist/job/Queue.js +54 -0
- package/dist/job/RTB.d.ts +12 -0
- package/dist/job/RTB.js +121 -0
- package/dist/job/Stats.d.ts +11 -0
- package/dist/job/Stats.js +46 -0
- package/dist/job/Top10.d.ts +9 -0
- package/dist/job/Top10.js +48 -0
- package/dist/job/Wiki.d.ts +9 -0
- package/dist/job/Wiki.js +40 -0
- package/dist/job/index.d.ts +26 -0
- package/dist/job/index.js +26 -0
- package/dist/lib/const.d.ts +31 -0
- package/dist/lib/const.js +74 -0
- package/dist/lib/list.d.ts +90 -0
- package/dist/lib/list.js +72 -0
- package/dist/lib/regex.d.ts +7 -0
- package/dist/lib/regex.js +7 -0
- package/dist/model/Filter.d.ts +28 -0
- package/dist/model/Filter.js +122 -0
- package/dist/model/List.d.ts +12 -0
- package/dist/model/List.js +43 -0
- package/dist/model/ListIndex.d.ts +8 -0
- package/dist/model/ListIndex.js +10 -0
- package/dist/model/Mover.d.ts +15 -0
- package/dist/model/Mover.js +74 -0
- package/dist/model/Profile.d.ts +49 -0
- package/dist/model/Profile.js +181 -0
- package/dist/model/ProfileIndex.d.ts +20 -0
- package/dist/model/ProfileIndex.js +140 -0
- package/dist/model/Stats.d.ts +56 -0
- package/dist/model/Stats.js +435 -0
- package/dist/parser/BillionairesListParser.d.ts +3 -0
- package/dist/parser/BillionairesListParser.js +2 -0
- package/dist/parser/ListParser.d.ts +7 -0
- package/dist/parser/ListParser.js +11 -0
- package/dist/parser/Parser.d.ts +43 -0
- package/dist/parser/Parser.js +146 -0
- package/dist/parser/PersonListParser.d.ts +29 -0
- package/dist/parser/PersonListParser.js +111 -0
- package/dist/parser/ProfileParser.d.ts +44 -0
- package/dist/parser/ProfileParser.js +193 -0
- package/dist/parser/RTBListParser.d.ts +15 -0
- package/dist/parser/RTBListParser.js +91 -0
- package/dist/types/annual.d.ts +7 -0
- package/dist/types/config.d.ts +35 -0
- package/dist/types/fetch.d.ts +3 -0
- package/dist/types/generic.d.ts +10 -0
- package/dist/types/job.d.ts +71 -0
- package/dist/types/list.d.ts +49 -0
- package/dist/types/parser.d.ts +7 -0
- package/dist/types/profile.d.ts +9 -0
- package/dist/types/queue.d.ts +15 -0
- package/dist/types/response.d.ts +183 -0
- package/dist/types/storage.d.ts +3 -0
- package/dist/types/wiki.d.ts +1 -0
- package/dist/utils/Annual.d.ts +7 -0
- package/dist/utils/Annual.js +99 -0
- package/dist/utils/Performance.d.ts +8 -0
- package/dist/utils/Performance.js +39 -0
- package/dist/utils/ProfileManager.d.ts +24 -0
- package/dist/utils/ProfileManager.js +60 -0
- package/dist/utils/ProfileMerger.d.ts +11 -0
- package/dist/utils/ProfileMerger.js +67 -0
- package/dist/utils/Ranking.d.ts +11 -0
- package/dist/utils/Ranking.js +77 -0
- package/dist/utils/Wiki.d.ts +11 -0
- package/dist/utils/Wiki.js +168 -0
- package/package.json +45 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import { Fetch } from '../core/Fetch.js';
|
|
2
|
+
import { log } from '../core/Logger.js';
|
|
3
|
+
import { Parser } from '../parser/Parser.js';
|
|
4
|
+
export class Wiki {
|
|
5
|
+
static fetch = Fetch.getInstance();
|
|
6
|
+
static scoreWDItem(item, data) {
|
|
7
|
+
const { name: { shortName } = {}, gender, birthDate, citizenship } = data.info ?? {};
|
|
8
|
+
let score = 0;
|
|
9
|
+
// --- name matching ---
|
|
10
|
+
if (item.itemLabel.value.trim() === shortName) score += 0.2;
|
|
11
|
+
else if (item.itemLabel.xmlLang === 'en') score += 0.1;
|
|
12
|
+
else score += 0.1;
|
|
13
|
+
// --- birthdate matching ---
|
|
14
|
+
if (birthDate && item.birthdate?.value.startsWith(birthDate)) score += 0.2;
|
|
15
|
+
else if (birthDate && item.birthdate?.value.startsWith(birthDate.substring(0, 4))) score += 0.1;
|
|
16
|
+
else if (birthDate && item.birthdate?.value) score -= 0.1;
|
|
17
|
+
// --- gender matching ---
|
|
18
|
+
if (gender && item.gender?.value.endsWith(gender === 'm' ? 'Q6581097' : gender === 'f' ? 'Q6581072' : '-'))
|
|
19
|
+
score += 0.1;
|
|
20
|
+
else if (score && item.gender?.value) score -= 0.2;
|
|
21
|
+
// --- citizenship matching ---
|
|
22
|
+
if (citizenship && item.iso2?.value === citizenship.toUpperCase()) score += 0.2;
|
|
23
|
+
// --- media matching ---
|
|
24
|
+
if (item.article) score += 0.1;
|
|
25
|
+
if (item.image) score += 0.05;
|
|
26
|
+
// --- occupation matching ---
|
|
27
|
+
if (['Q131524', 'Q557880', 'Q911554', 'Q2462658'].some(e => item.occupation?.value.endsWith(e))) score += 0.2;
|
|
28
|
+
else if (item.occupation) score += 0.05;
|
|
29
|
+
// --- economic matching ---
|
|
30
|
+
if (item.employer ?? item.ownerOf) score += 0.1;
|
|
31
|
+
if (item.netWorth) score += 0.2;
|
|
32
|
+
return Math.min(1, Math.max(0, score));
|
|
33
|
+
}
|
|
34
|
+
static async queryWikidata(data) {
|
|
35
|
+
log.debug(`Querying Wikidata for: ${data.info?.name?.shortName}`);
|
|
36
|
+
return await log.catchAsync(
|
|
37
|
+
async () => {
|
|
38
|
+
const shortName = data.info?.name?.shortName;
|
|
39
|
+
if (!shortName) throw new Error('No short name provided');
|
|
40
|
+
const [first, ...rest] = shortName.split(' '),
|
|
41
|
+
last = rest.pop();
|
|
42
|
+
const nameVariants = [shortName, `${first[0]}. ${last}`, `${first} ${last}`]
|
|
43
|
+
.filter(Boolean)
|
|
44
|
+
.map(n => `"${n}"@en "${n}"@de`)
|
|
45
|
+
.join(' ');
|
|
46
|
+
const sparql = `
|
|
47
|
+
SELECT DISTINCT
|
|
48
|
+
?item ?itemLabel ?gender ?birthdate ?article ?image ?iso2 ?occupation ?employer ?ownerOf ?netWorth
|
|
49
|
+
WHERE {
|
|
50
|
+
VALUES ?name { ${nameVariants} }
|
|
51
|
+
?item wdt:P31 wd:Q5 .
|
|
52
|
+
{ { ?item rdfs:label ?name . } UNION { ?item skos:altLabel ?name . } }
|
|
53
|
+
OPTIONAL { ?item wdt:P21 ?gender . }
|
|
54
|
+
OPTIONAL { ?item wdt:P569 ?birthdate . }
|
|
55
|
+
OPTIONAL { ?article schema:about ?item ; schema:isPartOf <https://en.wikipedia.org/> . }
|
|
56
|
+
OPTIONAL { ?item wdt:P18 ?image . }
|
|
57
|
+
OPTIONAL { ?item wdt:P27 ?country . ?country wdt:P297 ?iso2 . }
|
|
58
|
+
OPTIONAL { ?item wdt:P106 ?occupation . }
|
|
59
|
+
OPTIONAL { ?item wdt:P108 ?employer . }
|
|
60
|
+
OPTIONAL { ?item wdt:P169 ?employer . }
|
|
61
|
+
OPTIONAL { ?item wdt:P127 ?ownerOf . }
|
|
62
|
+
OPTIONAL { ?item wdt:P1830 ?ownerOf . }
|
|
63
|
+
OPTIONAL { ?item wdt:P2218 ?netWorth . }
|
|
64
|
+
SERVICE wikibase:label { bd:serviceParam wikibase:language "en,de" . }
|
|
65
|
+
}
|
|
66
|
+
LIMIT 20
|
|
67
|
+
`;
|
|
68
|
+
const res = await Wiki.fetch.wikidata(sparql);
|
|
69
|
+
let best;
|
|
70
|
+
for (const item of res.data?.results.bindings ?? []) {
|
|
71
|
+
const score = Wiki.scoreWDItem(item, data);
|
|
72
|
+
if (!best || score > best.score) best = { score, item };
|
|
73
|
+
if (best.score === 1) break;
|
|
74
|
+
}
|
|
75
|
+
if (!best || best.score < 0.65) throw new Error('No suitable Wikidata item found');
|
|
76
|
+
log.debug(`Best Wikidata item for ${shortName} has score: ${best.score}`);
|
|
77
|
+
return Parser.container({
|
|
78
|
+
qid: { value: best.item.item.value.split('/').pop(), type: 'string' },
|
|
79
|
+
confidence: { value: best.score, type: 'number', args: [3] },
|
|
80
|
+
article: { value: best.item.article?.value.split('/').pop(), type: 'decodeURI' },
|
|
81
|
+
image: { value: best.item.image?.value.split('/').pop(), type: 'decodeURI' }
|
|
82
|
+
});
|
|
83
|
+
},
|
|
84
|
+
`Failed to query Wikidata for: ${data.info?.name?.shortName ?? 'unknown'}`
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
static async queryCommonsImage(title) {
|
|
88
|
+
log.debug(`Querying Wikimedia Commons image: ${title}`);
|
|
89
|
+
return await log.catchAsync(async () => {
|
|
90
|
+
const res = await Wiki.fetch.commons({
|
|
91
|
+
action: 'query',
|
|
92
|
+
titles: `File:${title}`,
|
|
93
|
+
prop: 'imageinfo',
|
|
94
|
+
redirects: 1,
|
|
95
|
+
iiprop: 'url|extmetadata',
|
|
96
|
+
iiurlwidth: 400
|
|
97
|
+
});
|
|
98
|
+
const info = res.data?.query.pages?.[0]?.imageinfo?.[0];
|
|
99
|
+
if (!info) throw new Error(`No image info found for: ${title}`);
|
|
100
|
+
log.debug(`Wikimedia Commons image info received for: ${title}`);
|
|
101
|
+
const meta = info.extmetadata ?? {};
|
|
102
|
+
const thumbUrl = info.thumburl ?? Object.values(info.responsiveUrls ?? {}).at(0);
|
|
103
|
+
const dateTime = meta.DateTimeOriginal?.value ?? meta.DateTime?.value;
|
|
104
|
+
const credits = Parser.list([
|
|
105
|
+
meta.Attribution?.value ?? meta.Artist?.value ?? meta.Credit?.value,
|
|
106
|
+
meta.LicenseShortName?.value ?? meta.UsageTerms?.value,
|
|
107
|
+
'via Wikimedia Commons'
|
|
108
|
+
]).join(', ');
|
|
109
|
+
return Parser.container({
|
|
110
|
+
url: { value: info.descriptionurl, type: 'string' },
|
|
111
|
+
file: { value: info.url, type: 'string' },
|
|
112
|
+
thumb: { value: thumbUrl, type: 'string' },
|
|
113
|
+
caption: { value: meta.ImageDescription?.value, type: 'safeStr' },
|
|
114
|
+
date: { value: dateTime, type: 'date', args: ['iso'] },
|
|
115
|
+
credits: { value: credits, type: 'safeStr' }
|
|
116
|
+
});
|
|
117
|
+
}, `Failed to query Wikimedia Commons image: ${title}`);
|
|
118
|
+
}
|
|
119
|
+
static async queryWikiPage(title, qid, image, confidence = 1) {
|
|
120
|
+
log.debug(`Querying Wikipedia page: ${title}`);
|
|
121
|
+
return await log.catchAsync(async () => {
|
|
122
|
+
const res = await Wiki.fetch.wikipedia({
|
|
123
|
+
action: 'query',
|
|
124
|
+
prop: 'extracts|info|pageprops|pageimages',
|
|
125
|
+
titles: title,
|
|
126
|
+
redirects: 1,
|
|
127
|
+
exintro: 1,
|
|
128
|
+
explaintext: 1,
|
|
129
|
+
exsectionformat: 'plain',
|
|
130
|
+
piprop: 'name',
|
|
131
|
+
pilimit: 1
|
|
132
|
+
});
|
|
133
|
+
if (!res?.success || !res.data || !res.data.query.pages.length)
|
|
134
|
+
throw new Error(`No Wikipedia page found for: ${title}`);
|
|
135
|
+
log.debug(`Wikipedia page info received for: ${title}`);
|
|
136
|
+
const raw = res.data.query.pages[0];
|
|
137
|
+
if (!image && raw.pageimage) {
|
|
138
|
+
log.debug(`Querying page image from Wikimedia Commons: ${raw.pageimage}`);
|
|
139
|
+
image = await this.queryCommonsImage(raw.pageimage);
|
|
140
|
+
}
|
|
141
|
+
return {
|
|
142
|
+
image,
|
|
143
|
+
...Parser.container({
|
|
144
|
+
uri: { value: title, type: 'string' },
|
|
145
|
+
pageId: { value: raw.pageid, type: 'number' },
|
|
146
|
+
refId: { value: raw.lastrevid, type: 'number' },
|
|
147
|
+
confidence: { value: confidence, type: 'number', args: [3] },
|
|
148
|
+
name: { value: raw.title, type: 'string' },
|
|
149
|
+
lastModified: { value: raw.touched, type: 'date', args: ['iso'] },
|
|
150
|
+
summary: { value: raw.extract ?? '', type: 'list', args: ['safeStr', '\n'], strict: false },
|
|
151
|
+
sortKey: { value: raw.pageprops?.['defaultsort'], type: 'string' },
|
|
152
|
+
wikidata: { value: qid ?? raw.pageprops?.['wikibase_item'], type: 'string' },
|
|
153
|
+
desc: { value: raw.pageprops?.['wikibase-shortdesc'], type: 'safeStr' }
|
|
154
|
+
})
|
|
155
|
+
};
|
|
156
|
+
}, `Failed to query Wikipedia page: ${title}`);
|
|
157
|
+
}
|
|
158
|
+
static async fromProfileData(data) {
|
|
159
|
+
const { qid, confidence, article, image } = (await Wiki.queryWikidata(data)) ?? {};
|
|
160
|
+
log.debug(
|
|
161
|
+
`Query Wikidata for ${data.info?.name?.shortName ?? 'unknown'}: ${qid || 'no match'} ` +
|
|
162
|
+
`(score: ${confidence || 0})`
|
|
163
|
+
);
|
|
164
|
+
return article
|
|
165
|
+
? await Wiki.queryWikiPage(article, qid, image ? await this.queryCommonsImage(image) : undefined)
|
|
166
|
+
: undefined;
|
|
167
|
+
}
|
|
168
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@rtbnext/core",
|
|
3
|
+
"description": "Internal core package for the rtbnext project",
|
|
4
|
+
"license": "MIT",
|
|
5
|
+
"version": "2.0.0-alpha.1",
|
|
6
|
+
"author": {
|
|
7
|
+
"name": "komed3 (Paul Köhler)",
|
|
8
|
+
"email": "webmaster@komed3.de",
|
|
9
|
+
"url": "https://komed3.de"
|
|
10
|
+
},
|
|
11
|
+
"files": [
|
|
12
|
+
"dist",
|
|
13
|
+
"README.md",
|
|
14
|
+
"LICENSE"
|
|
15
|
+
],
|
|
16
|
+
"bin": {
|
|
17
|
+
"rtbnext-cli": "./dist/bin/cli.js",
|
|
18
|
+
"rtbnext-cron": "./dist/bin/cron.js"
|
|
19
|
+
},
|
|
20
|
+
"type": "module",
|
|
21
|
+
"scripts": {
|
|
22
|
+
"format": "prettier --ignore-path \"\" --write \"./dist/**/*.@(js|d.ts)\" --log-level warn",
|
|
23
|
+
"build": "tsc && node scripts/copy-dts.mjs && tsc-alias && npm run format"
|
|
24
|
+
},
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"@isodb/us-states": "^0.1.0",
|
|
27
|
+
"@komed3/deepmerge": "^1.0.0",
|
|
28
|
+
"@rtbnext/schema": "^2.0.0-alpha.20",
|
|
29
|
+
"axios": "^1.18.1",
|
|
30
|
+
"cmpstr": "^3.3.0",
|
|
31
|
+
"commander": "^15.0.0",
|
|
32
|
+
"csv-string": "^4.1.1",
|
|
33
|
+
"devtypes": "^2.2.0",
|
|
34
|
+
"i18n-iso-countries": "^7.14.0",
|
|
35
|
+
"js-sha256": "^0.11.1",
|
|
36
|
+
"nxtcron": "^1.0.1",
|
|
37
|
+
"yaml": "^2.9.0"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@types/node": "^26.1.0",
|
|
41
|
+
"prettier": "^3.9.4",
|
|
42
|
+
"tsc-alias": "^1.8.17",
|
|
43
|
+
"typescript": "^6.0.3"
|
|
44
|
+
}
|
|
45
|
+
}
|