@d-zero/beholder 2.1.6 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/dist/dom-evaluation.d.ts +72 -24
- package/dist/dom-evaluation.js +442 -84
- package/dist/index.d.ts +1 -1
- package/dist/meta/classify.d.ts +52 -0
- package/dist/meta/classify.js +731 -0
- package/dist/meta/id-extractors.d.ts +40 -0
- package/dist/meta/id-extractors.js +196 -0
- package/dist/meta/keys.d.ts +41 -0
- package/dist/meta/keys.js +507 -0
- package/dist/meta/parsers.d.ts +74 -0
- package/dist/meta/parsers.js +293 -0
- package/dist/meta/tag-detection.d.ts +59 -0
- package/dist/meta/tag-detection.js +120 -0
- package/dist/meta/types.d.ts +874 -0
- package/dist/meta/types.js +12 -0
- package/dist/scraper.js +15 -13
- package/dist/types.d.ts +3 -38
- package/package.json +5 -4
- package/src/dom-evaluation.spec.ts +301 -73
- package/src/dom-evaluation.ts +558 -88
- package/src/index.ts +43 -0
- package/src/meta/classify.spec.ts +281 -0
- package/src/meta/classify.ts +810 -0
- package/src/meta/id-extractors.spec.ts +69 -0
- package/src/meta/id-extractors.ts +206 -0
- package/src/meta/keys.ts +568 -0
- package/src/meta/parsers.spec.ts +178 -0
- package/src/meta/parsers.ts +304 -0
- package/src/meta/simple-wappalyzer.d.ts +37 -0
- package/src/meta/tag-detection.spec.ts +134 -0
- package/src/meta/tag-detection.ts +161 -0
- package/src/meta/types.ts +949 -0
- package/src/scraper.ts +19 -13
- package/src/types.ts +49 -55
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -0,0 +1,731 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure-function classifier that turns `RawHeadEntry[]` (collected on the browser
|
|
3
|
+
* side by `collectHead`) into a typed `Meta` object.
|
|
4
|
+
*
|
|
5
|
+
* The classifier is the **only place** where dot-paths from `keys.ts` get
|
|
6
|
+
* resolved against the `Meta` shape. Parsers (viewport/robots/refresh/etc.)
|
|
7
|
+
* are dispatched on the fly for the few entries that need value normalization.
|
|
8
|
+
*
|
|
9
|
+
* Unknown entries (names/properties/rels not in the lookup tables) are
|
|
10
|
+
* preserved in {@link Meta.others} so consumers never lose information.
|
|
11
|
+
* @module
|
|
12
|
+
*/
|
|
13
|
+
import { HTTP_EQUIV_MAP, ITEMPROP_MAP, LINK_REL_MAP, META_NAME_MAP, META_PROPERTY_MAP, } from './keys.js';
|
|
14
|
+
import { JSON_LD_TOTAL_LIMIT, capJsonLdContent, normalizeValue, parseFormatDetection, parseJsonLd, parseRefresh, parseReferrer, parseRobots, parseViewport, } from './parsers.js';
|
|
15
|
+
const THEME_COLOR_DARK_MEDIA = /prefers-color-scheme:\s*dark/i;
|
|
16
|
+
const THEME_COLOR_LIGHT_MEDIA = /prefers-color-scheme:\s*light/i;
|
|
17
|
+
/**
|
|
18
|
+
* Builds the empty `Meta` skeleton with all required fields initialized.
|
|
19
|
+
*/
|
|
20
|
+
/** Returns a fresh `Meta` skeleton with all required fields initialized. */
|
|
21
|
+
export function emptyMeta() {
|
|
22
|
+
return {
|
|
23
|
+
title: '',
|
|
24
|
+
originTrial: [],
|
|
25
|
+
jsonLd: [],
|
|
26
|
+
speculationRules: [],
|
|
27
|
+
tags: { detected: {}, entries: [] },
|
|
28
|
+
others: emptyOthersBucket(),
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
*
|
|
33
|
+
* @param meta
|
|
34
|
+
*/
|
|
35
|
+
function ensureHttpEquiv(meta) {
|
|
36
|
+
if (meta.httpEquiv === undefined) {
|
|
37
|
+
meta.httpEquiv = { originTrialToken: [] };
|
|
38
|
+
}
|
|
39
|
+
else if (!Array.isArray(meta.httpEquiv.originTrialToken)) {
|
|
40
|
+
meta.httpEquiv.originTrialToken = [];
|
|
41
|
+
}
|
|
42
|
+
return meta.httpEquiv;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
*
|
|
46
|
+
*/
|
|
47
|
+
function emptyOthersBucket() {
|
|
48
|
+
return {
|
|
49
|
+
meta: {},
|
|
50
|
+
property: {},
|
|
51
|
+
httpEquiv: {},
|
|
52
|
+
itemprop: {},
|
|
53
|
+
link: [],
|
|
54
|
+
script: [],
|
|
55
|
+
iframe: [],
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Writes `value` to `target` along `dotPath`. Intermediate objects are created
|
|
60
|
+
* on demand. When `multi` is `true`, the leaf is treated as an array and `value`
|
|
61
|
+
* is appended; otherwise the first assignment wins (subsequent calls are no-ops).
|
|
62
|
+
*
|
|
63
|
+
* Exported for the unit tests in `classify.spec.ts`.
|
|
64
|
+
* @param target
|
|
65
|
+
* @param dotPath
|
|
66
|
+
* @param value
|
|
67
|
+
* @param multi
|
|
68
|
+
*/
|
|
69
|
+
export function setByPath(target, dotPath, value, multi) {
|
|
70
|
+
const segments = dotPath.split('.');
|
|
71
|
+
if (segments.length === 0)
|
|
72
|
+
return;
|
|
73
|
+
let cursor = target;
|
|
74
|
+
for (let i = 0; i < segments.length - 1; i++) {
|
|
75
|
+
const seg = segments[i] ?? '';
|
|
76
|
+
if (!seg)
|
|
77
|
+
return;
|
|
78
|
+
const next = cursor[seg];
|
|
79
|
+
if (next == null || typeof next !== 'object' || Array.isArray(next)) {
|
|
80
|
+
const created = {};
|
|
81
|
+
cursor[seg] = created;
|
|
82
|
+
cursor = created;
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
cursor = next;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const leaf = segments.at(-1) ?? '';
|
|
89
|
+
if (!leaf)
|
|
90
|
+
return;
|
|
91
|
+
if (multi) {
|
|
92
|
+
const existing = cursor[leaf];
|
|
93
|
+
if (Array.isArray(existing)) {
|
|
94
|
+
existing.push(value);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
cursor[leaf] = [value];
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
else if (cursor[leaf] === undefined) {
|
|
101
|
+
cursor[leaf] = value;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
*
|
|
106
|
+
* @param meta
|
|
107
|
+
* @param def
|
|
108
|
+
* @param rawValue
|
|
109
|
+
*/
|
|
110
|
+
function applyKeyDef(meta, def, rawValue) {
|
|
111
|
+
const value = normalizeValue(rawValue, def.transform);
|
|
112
|
+
for (const path of def.paths) {
|
|
113
|
+
setByPath(meta, path, value, def.multi === true);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
*
|
|
118
|
+
* @param meta
|
|
119
|
+
* @param name
|
|
120
|
+
* @param content
|
|
121
|
+
* @param media
|
|
122
|
+
*/
|
|
123
|
+
function classifyMetaName(meta, name, content, media) {
|
|
124
|
+
if (name === 'viewport') {
|
|
125
|
+
if (meta.viewport === undefined) {
|
|
126
|
+
meta.viewport = parseViewport(content);
|
|
127
|
+
}
|
|
128
|
+
return true;
|
|
129
|
+
}
|
|
130
|
+
if (name === 'robots') {
|
|
131
|
+
if (meta.robots === undefined) {
|
|
132
|
+
meta.robots = parseRobots(content);
|
|
133
|
+
}
|
|
134
|
+
return true;
|
|
135
|
+
}
|
|
136
|
+
if (name === 'referrer') {
|
|
137
|
+
if (meta.referrer === undefined) {
|
|
138
|
+
meta.referrer = parseReferrer(content);
|
|
139
|
+
}
|
|
140
|
+
return true;
|
|
141
|
+
}
|
|
142
|
+
if (name === 'format-detection') {
|
|
143
|
+
const parsed = parseFormatDetection(content);
|
|
144
|
+
if (meta.formatDetection === undefined) {
|
|
145
|
+
meta.formatDetection = parsed;
|
|
146
|
+
}
|
|
147
|
+
if (parsed.telephone === false && meta.apple === undefined) {
|
|
148
|
+
meta.apple = { formatDetectionTelephone: false };
|
|
149
|
+
}
|
|
150
|
+
else if (parsed.telephone === false && meta.apple !== undefined) {
|
|
151
|
+
meta.apple.formatDetectionTelephone = false;
|
|
152
|
+
}
|
|
153
|
+
return true;
|
|
154
|
+
}
|
|
155
|
+
if (name === 'theme-color') {
|
|
156
|
+
const target = media && THEME_COLOR_DARK_MEDIA.test(media)
|
|
157
|
+
? 'themeColorDark'
|
|
158
|
+
: media && THEME_COLOR_LIGHT_MEDIA.test(media)
|
|
159
|
+
? 'themeColorLight'
|
|
160
|
+
: 'themeColor';
|
|
161
|
+
if (meta[target] === undefined) {
|
|
162
|
+
meta[target] = content;
|
|
163
|
+
}
|
|
164
|
+
return true;
|
|
165
|
+
}
|
|
166
|
+
if (name === 'google') {
|
|
167
|
+
const flag = content.trim().toLowerCase();
|
|
168
|
+
if (flag === 'notranslate' ||
|
|
169
|
+
flag === 'nositelinkssearchbox' ||
|
|
170
|
+
flag === 'nopagereadaloud') {
|
|
171
|
+
const camel = flag.replaceAll(/-([a-z])/g, (_match, ch) => ch.toUpperCase());
|
|
172
|
+
if (meta.google === undefined) {
|
|
173
|
+
meta.google = {};
|
|
174
|
+
}
|
|
175
|
+
meta.google[camel] = true;
|
|
176
|
+
return true;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
if (name === 'googlebot' && content.trim().toLowerCase() === 'notranslate') {
|
|
180
|
+
if (meta.google === undefined) {
|
|
181
|
+
meta.google = {};
|
|
182
|
+
}
|
|
183
|
+
meta.google.googlebotNotranslate = true;
|
|
184
|
+
// fall through to also write `googlebot` field
|
|
185
|
+
}
|
|
186
|
+
const def = META_NAME_MAP[name];
|
|
187
|
+
if (def) {
|
|
188
|
+
applyKeyDef(meta, def, content);
|
|
189
|
+
return true;
|
|
190
|
+
}
|
|
191
|
+
return false;
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
*
|
|
195
|
+
* @param meta
|
|
196
|
+
* @param property
|
|
197
|
+
* @param content
|
|
198
|
+
*/
|
|
199
|
+
function classifyMetaProperty(meta, property, content) {
|
|
200
|
+
const def = META_PROPERTY_MAP[property];
|
|
201
|
+
if (def) {
|
|
202
|
+
applyKeyDef(meta, def, content);
|
|
203
|
+
return true;
|
|
204
|
+
}
|
|
205
|
+
return false;
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
*
|
|
209
|
+
* @param meta
|
|
210
|
+
* @param key
|
|
211
|
+
* @param content
|
|
212
|
+
*/
|
|
213
|
+
function classifyHttpEquiv(meta, key, content) {
|
|
214
|
+
if (key === 'refresh') {
|
|
215
|
+
const slot = ensureHttpEquiv(meta);
|
|
216
|
+
if (slot.refresh === undefined) {
|
|
217
|
+
slot.refresh = parseRefresh(content);
|
|
218
|
+
}
|
|
219
|
+
return true;
|
|
220
|
+
}
|
|
221
|
+
const def = HTTP_EQUIV_MAP[key];
|
|
222
|
+
if (def) {
|
|
223
|
+
applyKeyDef(meta, def, content);
|
|
224
|
+
return true;
|
|
225
|
+
}
|
|
226
|
+
return false;
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
*
|
|
230
|
+
* @param meta
|
|
231
|
+
* @param key
|
|
232
|
+
* @param content
|
|
233
|
+
*/
|
|
234
|
+
function classifyItemprop(meta, key, content) {
|
|
235
|
+
const def = ITEMPROP_MAP[key];
|
|
236
|
+
if (def) {
|
|
237
|
+
applyKeyDef(meta, def, content);
|
|
238
|
+
return true;
|
|
239
|
+
}
|
|
240
|
+
return false;
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
*
|
|
244
|
+
* @param entry
|
|
245
|
+
*/
|
|
246
|
+
function makeLinkEntry(entry) {
|
|
247
|
+
return {
|
|
248
|
+
href: entry.href,
|
|
249
|
+
rel: entry.rel,
|
|
250
|
+
type: entry.type,
|
|
251
|
+
media: entry.media,
|
|
252
|
+
sizes: entry.sizes,
|
|
253
|
+
title: entry.title,
|
|
254
|
+
hreflang: entry.hreflang,
|
|
255
|
+
as: entry.as,
|
|
256
|
+
crossorigin: entry.crossorigin,
|
|
257
|
+
color: entry.color,
|
|
258
|
+
blocking: entry.blocking,
|
|
259
|
+
imagesrcset: entry.imagesrcset,
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
*
|
|
264
|
+
* @param meta
|
|
265
|
+
* @param def
|
|
266
|
+
* @param entry
|
|
267
|
+
*/
|
|
268
|
+
function applyLinkRel(meta, def, entry) {
|
|
269
|
+
if (meta.link === undefined) {
|
|
270
|
+
meta.link = createEmptyLinkMeta();
|
|
271
|
+
}
|
|
272
|
+
const linkRecord = meta.link;
|
|
273
|
+
switch (def.cardinality) {
|
|
274
|
+
case 'href-only': {
|
|
275
|
+
if (linkRecord[def.path] === undefined) {
|
|
276
|
+
linkRecord[def.path] = entry.href;
|
|
277
|
+
}
|
|
278
|
+
break;
|
|
279
|
+
}
|
|
280
|
+
case 'single': {
|
|
281
|
+
if (linkRecord[def.path] === undefined) {
|
|
282
|
+
linkRecord[def.path] = entry;
|
|
283
|
+
}
|
|
284
|
+
break;
|
|
285
|
+
}
|
|
286
|
+
case 'array': {
|
|
287
|
+
const list = linkRecord[def.path];
|
|
288
|
+
if (Array.isArray(list)) {
|
|
289
|
+
list.push(entry);
|
|
290
|
+
}
|
|
291
|
+
else {
|
|
292
|
+
linkRecord[def.path] = [entry];
|
|
293
|
+
}
|
|
294
|
+
break;
|
|
295
|
+
}
|
|
296
|
+
case 'icon-sized': {
|
|
297
|
+
if (entry.sizes) {
|
|
298
|
+
const list = linkRecord[def.path];
|
|
299
|
+
if (Array.isArray(list)) {
|
|
300
|
+
list.push(entry);
|
|
301
|
+
}
|
|
302
|
+
else {
|
|
303
|
+
linkRecord[def.path] = [entry];
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
break;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
*
|
|
312
|
+
*/
|
|
313
|
+
function createEmptyLinkMeta() {
|
|
314
|
+
return {
|
|
315
|
+
alternateHreflang: [],
|
|
316
|
+
alternateMedia: [],
|
|
317
|
+
alternateRss: [],
|
|
318
|
+
alternateAtom: [],
|
|
319
|
+
alternateJsonFeed: [],
|
|
320
|
+
tag: [],
|
|
321
|
+
archives: [],
|
|
322
|
+
appendix: [],
|
|
323
|
+
chapter: [],
|
|
324
|
+
section: [],
|
|
325
|
+
subsection: [],
|
|
326
|
+
profile: [],
|
|
327
|
+
me: [],
|
|
328
|
+
enclosure: [],
|
|
329
|
+
external: [],
|
|
330
|
+
nofollow: [],
|
|
331
|
+
sponsored: [],
|
|
332
|
+
ugc: [],
|
|
333
|
+
noopener: [],
|
|
334
|
+
noreferrer: [],
|
|
335
|
+
opener: [],
|
|
336
|
+
dnsPrefetch: [],
|
|
337
|
+
preconnect: [],
|
|
338
|
+
prefetch: [],
|
|
339
|
+
prerender: [],
|
|
340
|
+
preload: [],
|
|
341
|
+
modulepreload: [],
|
|
342
|
+
expect: [],
|
|
343
|
+
stylesheet: [],
|
|
344
|
+
syndication: [],
|
|
345
|
+
related: [],
|
|
346
|
+
iconSized: [],
|
|
347
|
+
appleTouchIconSized: [],
|
|
348
|
+
appleTouchIconPrecomposed: [],
|
|
349
|
+
appleTouchStartupImage: [],
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* Refines `alternate` rel into RSS/Atom/JSON-Feed sub-buckets when `type` matches.
|
|
354
|
+
* @param meta
|
|
355
|
+
* @param entry
|
|
356
|
+
*/
|
|
357
|
+
function refineAlternate(meta, entry) {
|
|
358
|
+
if (meta.link === undefined) {
|
|
359
|
+
meta.link = createEmptyLinkMeta();
|
|
360
|
+
}
|
|
361
|
+
const t = entry.type?.toLowerCase();
|
|
362
|
+
switch (t) {
|
|
363
|
+
case 'application/rss+xml': {
|
|
364
|
+
meta.link.alternateRss.push(entry);
|
|
365
|
+
break;
|
|
366
|
+
}
|
|
367
|
+
case 'application/atom+xml': {
|
|
368
|
+
meta.link.alternateAtom.push(entry);
|
|
369
|
+
break;
|
|
370
|
+
}
|
|
371
|
+
case 'application/feed+json': {
|
|
372
|
+
meta.link.alternateJsonFeed.push(entry);
|
|
373
|
+
break;
|
|
374
|
+
}
|
|
375
|
+
case 'application/json+oembed': {
|
|
376
|
+
if (meta.link.oembedJson === undefined) {
|
|
377
|
+
meta.link.oembedJson = entry;
|
|
378
|
+
}
|
|
379
|
+
break;
|
|
380
|
+
}
|
|
381
|
+
case 'application/xml+oembed': {
|
|
382
|
+
if (meta.link.oembedXml === undefined) {
|
|
383
|
+
meta.link.oembedXml = entry;
|
|
384
|
+
}
|
|
385
|
+
break;
|
|
386
|
+
}
|
|
387
|
+
case 'application/activity+json': {
|
|
388
|
+
if (meta.link.alternateActivityJson === undefined) {
|
|
389
|
+
meta.link.alternateActivityJson = entry;
|
|
390
|
+
}
|
|
391
|
+
break;
|
|
392
|
+
}
|
|
393
|
+
default: {
|
|
394
|
+
if (entry.media) {
|
|
395
|
+
meta.link.alternateMedia.push(entry);
|
|
396
|
+
}
|
|
397
|
+
else {
|
|
398
|
+
meta.link.alternateHreflang.push(entry);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
/**
|
|
404
|
+
* Refines `icon` rel by `type`/`sizes`/`media`.
|
|
405
|
+
* @param meta
|
|
406
|
+
* @param entry
|
|
407
|
+
*/
|
|
408
|
+
function refineIcon(meta, entry) {
|
|
409
|
+
if (meta.link === undefined) {
|
|
410
|
+
meta.link = createEmptyLinkMeta();
|
|
411
|
+
}
|
|
412
|
+
const sizes = entry.sizes?.toLowerCase();
|
|
413
|
+
if (entry.type === 'image/svg+xml') {
|
|
414
|
+
if (meta.link.iconSvg === undefined) {
|
|
415
|
+
meta.link.iconSvg = entry;
|
|
416
|
+
}
|
|
417
|
+
return;
|
|
418
|
+
}
|
|
419
|
+
if (sizes === 'any') {
|
|
420
|
+
if (meta.link.iconAny === undefined) {
|
|
421
|
+
meta.link.iconAny = entry;
|
|
422
|
+
}
|
|
423
|
+
return;
|
|
424
|
+
}
|
|
425
|
+
if (entry.sizes) {
|
|
426
|
+
meta.link.iconSized.push(entry);
|
|
427
|
+
return;
|
|
428
|
+
}
|
|
429
|
+
if (meta.link.icon === undefined) {
|
|
430
|
+
meta.link.icon = entry;
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
/**
|
|
434
|
+
*
|
|
435
|
+
* @param meta
|
|
436
|
+
* @param entry
|
|
437
|
+
*/
|
|
438
|
+
function refineAppleTouchIcon(meta, entry) {
|
|
439
|
+
if (meta.link === undefined) {
|
|
440
|
+
meta.link = createEmptyLinkMeta();
|
|
441
|
+
}
|
|
442
|
+
if (entry.sizes) {
|
|
443
|
+
meta.link.appleTouchIconSized.push(entry);
|
|
444
|
+
return;
|
|
445
|
+
}
|
|
446
|
+
if (meta.link.appleTouchIcon === undefined) {
|
|
447
|
+
meta.link.appleTouchIcon = entry;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
/**
|
|
451
|
+
*
|
|
452
|
+
* @param meta
|
|
453
|
+
* @param entry
|
|
454
|
+
*/
|
|
455
|
+
function refineAppleTouchStartupImage(meta, entry) {
|
|
456
|
+
if (meta.link === undefined) {
|
|
457
|
+
meta.link = createEmptyLinkMeta();
|
|
458
|
+
}
|
|
459
|
+
meta.link.appleTouchStartupImage.push(entry);
|
|
460
|
+
const media = entry.media ?? '';
|
|
461
|
+
if (/device-width:\s*320px/i.test(media)) {
|
|
462
|
+
if (meta.link.appleTouchStartupImageIphone === undefined) {
|
|
463
|
+
meta.link.appleTouchStartupImageIphone = entry;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
else if (/device-width:\s*768px/i.test(media) && /portrait/i.test(media)) {
|
|
467
|
+
if (meta.link.appleTouchStartupImageIpadPortrait === undefined) {
|
|
468
|
+
meta.link.appleTouchStartupImageIpadPortrait = entry;
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
else if (/device-width:\s*768px/i.test(media) &&
|
|
472
|
+
/landscape/i.test(media) &&
|
|
473
|
+
meta.link.appleTouchStartupImageIpadLandscape === undefined) {
|
|
474
|
+
meta.link.appleTouchStartupImageIpadLandscape = entry;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
/**
|
|
478
|
+
*
|
|
479
|
+
* @param meta
|
|
480
|
+
* @param entry
|
|
481
|
+
*/
|
|
482
|
+
function classifyLink(meta, entry) {
|
|
483
|
+
const linkEntry = makeLinkEntry(entry);
|
|
484
|
+
let anyKnown = false;
|
|
485
|
+
for (const rel of entry.rel) {
|
|
486
|
+
const lower = rel.toLowerCase();
|
|
487
|
+
if (lower === 'alternate') {
|
|
488
|
+
refineAlternate(meta, linkEntry);
|
|
489
|
+
anyKnown = true;
|
|
490
|
+
continue;
|
|
491
|
+
}
|
|
492
|
+
if (lower === 'icon') {
|
|
493
|
+
refineIcon(meta, linkEntry);
|
|
494
|
+
anyKnown = true;
|
|
495
|
+
continue;
|
|
496
|
+
}
|
|
497
|
+
if (lower === 'apple-touch-icon') {
|
|
498
|
+
refineAppleTouchIcon(meta, linkEntry);
|
|
499
|
+
anyKnown = true;
|
|
500
|
+
continue;
|
|
501
|
+
}
|
|
502
|
+
if (lower === 'apple-touch-startup-image') {
|
|
503
|
+
refineAppleTouchStartupImage(meta, linkEntry);
|
|
504
|
+
anyKnown = true;
|
|
505
|
+
continue;
|
|
506
|
+
}
|
|
507
|
+
if (lower === 'me' && meta.microformats === undefined) {
|
|
508
|
+
meta.microformats = { relMe: [linkEntry.href] };
|
|
509
|
+
anyKnown = true;
|
|
510
|
+
}
|
|
511
|
+
else if (lower === 'me' && meta.microformats !== undefined) {
|
|
512
|
+
meta.microformats.relMe.push(linkEntry.href);
|
|
513
|
+
anyKnown = true;
|
|
514
|
+
}
|
|
515
|
+
const def = LINK_REL_MAP[lower];
|
|
516
|
+
if (def) {
|
|
517
|
+
applyLinkRel(meta, def, linkEntry);
|
|
518
|
+
anyKnown = true;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
if (!anyKnown) {
|
|
522
|
+
meta.others.link.push(linkEntry);
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
/**
|
|
526
|
+
*
|
|
527
|
+
* @param meta
|
|
528
|
+
* @param entry
|
|
529
|
+
* @param totals
|
|
530
|
+
* @param totals.jsonLdBytes
|
|
531
|
+
*/
|
|
532
|
+
function classifyScript(meta, entry, totals) {
|
|
533
|
+
const type = entry.scriptType.toLowerCase();
|
|
534
|
+
if (type === 'application/ld+json' || type === 'speculationrules') {
|
|
535
|
+
const raw = entry.content ?? '';
|
|
536
|
+
if (totals.jsonLdBytes + raw.length > JSON_LD_TOTAL_LIMIT) {
|
|
537
|
+
const remaining = Math.max(0, JSON_LD_TOTAL_LIMIT - totals.jsonLdBytes);
|
|
538
|
+
const capped = raw.slice(0, remaining);
|
|
539
|
+
totals.jsonLdBytes += capped.length;
|
|
540
|
+
const jsonEntry = {
|
|
541
|
+
raw: capped,
|
|
542
|
+
parseError: 'truncated: total jsonLd bytes exceeded limit',
|
|
543
|
+
};
|
|
544
|
+
pushJsonLd(meta, type, jsonEntry);
|
|
545
|
+
return;
|
|
546
|
+
}
|
|
547
|
+
const { content: capped, truncated } = capJsonLdContent(raw);
|
|
548
|
+
totals.jsonLdBytes += capped.length;
|
|
549
|
+
const jsonEntry = parseJsonLd(capped);
|
|
550
|
+
if (truncated && jsonEntry.parseError === undefined) {
|
|
551
|
+
jsonEntry.parseError = 'truncated: per-entry size limit exceeded';
|
|
552
|
+
}
|
|
553
|
+
pushJsonLd(meta, type, jsonEntry);
|
|
554
|
+
return;
|
|
555
|
+
}
|
|
556
|
+
meta.others.script.push({
|
|
557
|
+
type: entry.scriptType,
|
|
558
|
+
content: entry.content,
|
|
559
|
+
src: entry.src,
|
|
560
|
+
location: entry.location,
|
|
561
|
+
});
|
|
562
|
+
}
|
|
563
|
+
/**
|
|
564
|
+
*
|
|
565
|
+
* @param meta
|
|
566
|
+
* @param type
|
|
567
|
+
* @param entry
|
|
568
|
+
*/
|
|
569
|
+
function pushJsonLd(meta, type, entry) {
|
|
570
|
+
if (type === 'application/ld+json') {
|
|
571
|
+
meta.jsonLd.push(entry);
|
|
572
|
+
}
|
|
573
|
+
else if (type === 'speculationrules') {
|
|
574
|
+
meta.speculationRules.push(entry);
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
/**
|
|
578
|
+
* Top-level classifier. Takes a list of raw entries collected from the page
|
|
579
|
+
* and produces a populated `Meta`.
|
|
580
|
+
* @param raw
|
|
581
|
+
* @param options
|
|
582
|
+
*/
|
|
583
|
+
export function classify(raw, options = {}) {
|
|
584
|
+
const meta = emptyMeta();
|
|
585
|
+
const totals = { jsonLdBytes: 0 };
|
|
586
|
+
if (options.tags) {
|
|
587
|
+
meta.tags = options.tags;
|
|
588
|
+
}
|
|
589
|
+
for (const entry of raw) {
|
|
590
|
+
classifyEntry(meta, entry, totals);
|
|
591
|
+
}
|
|
592
|
+
if (options.includeRaw) {
|
|
593
|
+
meta._raw = raw;
|
|
594
|
+
}
|
|
595
|
+
return meta;
|
|
596
|
+
}
|
|
597
|
+
/**
|
|
598
|
+
*
|
|
599
|
+
* @param meta
|
|
600
|
+
* @param entry
|
|
601
|
+
* @param totals
|
|
602
|
+
* @param totals.jsonLdBytes
|
|
603
|
+
*/
|
|
604
|
+
function classifyEntry(meta, entry, totals) {
|
|
605
|
+
switch (entry.kind) {
|
|
606
|
+
case 'html': {
|
|
607
|
+
if (entry.lang)
|
|
608
|
+
meta.lang = entry.lang;
|
|
609
|
+
if (entry.dir)
|
|
610
|
+
meta.dir = entry.dir;
|
|
611
|
+
if (entry.xmlns)
|
|
612
|
+
meta.xmlns = entry.xmlns;
|
|
613
|
+
if (entry.prefix) {
|
|
614
|
+
meta.prefix = entry.prefix;
|
|
615
|
+
if (meta.rdfa === undefined)
|
|
616
|
+
meta.rdfa = {};
|
|
617
|
+
meta.rdfa.prefix = entry.prefix;
|
|
618
|
+
}
|
|
619
|
+
if (entry.vocab) {
|
|
620
|
+
meta.vocab = entry.vocab;
|
|
621
|
+
if (meta.rdfa === undefined)
|
|
622
|
+
meta.rdfa = {};
|
|
623
|
+
meta.rdfa.vocab = entry.vocab;
|
|
624
|
+
}
|
|
625
|
+
if (entry.typeOf) {
|
|
626
|
+
meta.typeOf = entry.typeOf;
|
|
627
|
+
if (meta.rdfa === undefined)
|
|
628
|
+
meta.rdfa = {};
|
|
629
|
+
meta.rdfa.typeOf = entry.typeOf;
|
|
630
|
+
}
|
|
631
|
+
if (entry.itemtype) {
|
|
632
|
+
meta.itemType = entry.itemtype;
|
|
633
|
+
if (meta.microdata === undefined)
|
|
634
|
+
meta.microdata = {};
|
|
635
|
+
meta.microdata.itemtype = entry.itemtype;
|
|
636
|
+
}
|
|
637
|
+
if (entry.itemscope) {
|
|
638
|
+
if (meta.microdata === undefined)
|
|
639
|
+
meta.microdata = {};
|
|
640
|
+
meta.microdata.itemscope = true;
|
|
641
|
+
}
|
|
642
|
+
if (entry.amp || entry.lightning) {
|
|
643
|
+
if (meta.amp === undefined)
|
|
644
|
+
meta.amp = {};
|
|
645
|
+
if (entry.amp)
|
|
646
|
+
meta.amp.enabled = true;
|
|
647
|
+
if (entry.lightning)
|
|
648
|
+
meta.amp.lightning = true;
|
|
649
|
+
}
|
|
650
|
+
break;
|
|
651
|
+
}
|
|
652
|
+
case 'title': {
|
|
653
|
+
if (meta.title === '') {
|
|
654
|
+
meta.title = entry.content;
|
|
655
|
+
}
|
|
656
|
+
break;
|
|
657
|
+
}
|
|
658
|
+
case 'base': {
|
|
659
|
+
if (entry.href && meta.baseHref === undefined) {
|
|
660
|
+
meta.baseHref = entry.href;
|
|
661
|
+
}
|
|
662
|
+
if (entry.target && meta.baseTarget === undefined) {
|
|
663
|
+
meta.baseTarget = entry.target;
|
|
664
|
+
}
|
|
665
|
+
break;
|
|
666
|
+
}
|
|
667
|
+
case 'meta': {
|
|
668
|
+
if (entry.charset && meta.charset === undefined) {
|
|
669
|
+
meta.charset = entry.charset;
|
|
670
|
+
}
|
|
671
|
+
const content = entry.content ?? '';
|
|
672
|
+
if (entry.name) {
|
|
673
|
+
const handled = classifyMetaName(meta, entry.name, content, entry.media);
|
|
674
|
+
if (!handled) {
|
|
675
|
+
pushMulti(meta.others.meta, entry.name, content);
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
if (entry.property) {
|
|
679
|
+
const handled = classifyMetaProperty(meta, entry.property, content);
|
|
680
|
+
if (!handled) {
|
|
681
|
+
pushMulti(meta.others.property, entry.property, content);
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
if (entry.httpEquiv) {
|
|
685
|
+
const handled = classifyHttpEquiv(meta, entry.httpEquiv, content);
|
|
686
|
+
if (!handled) {
|
|
687
|
+
pushMulti(meta.others.httpEquiv, entry.httpEquiv, content);
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
if (entry.itemprop) {
|
|
691
|
+
const handled = classifyItemprop(meta, entry.itemprop, content);
|
|
692
|
+
if (!handled) {
|
|
693
|
+
pushMulti(meta.others.itemprop, entry.itemprop, content);
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
break;
|
|
697
|
+
}
|
|
698
|
+
case 'link': {
|
|
699
|
+
classifyLink(meta, entry);
|
|
700
|
+
break;
|
|
701
|
+
}
|
|
702
|
+
case 'script': {
|
|
703
|
+
classifyScript(meta, entry, totals);
|
|
704
|
+
break;
|
|
705
|
+
}
|
|
706
|
+
case 'iframe': {
|
|
707
|
+
meta.others.iframe.push({ src: entry.src, location: entry.location });
|
|
708
|
+
break;
|
|
709
|
+
}
|
|
710
|
+
case 'window-global': {
|
|
711
|
+
// `window-global` entries are consumed by the tag-detection layer,
|
|
712
|
+
// not by classify itself. Ignored here.
|
|
713
|
+
break;
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
/**
|
|
718
|
+
*
|
|
719
|
+
* @param bucket
|
|
720
|
+
* @param key
|
|
721
|
+
* @param value
|
|
722
|
+
*/
|
|
723
|
+
function pushMulti(bucket, key, value) {
|
|
724
|
+
const list = bucket[key];
|
|
725
|
+
if (list) {
|
|
726
|
+
list.push(value);
|
|
727
|
+
}
|
|
728
|
+
else {
|
|
729
|
+
bucket[key] = [value];
|
|
730
|
+
}
|
|
731
|
+
}
|