@d-zero/beholder 2.1.6 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/README.md +26 -0
- package/dist/dom-evaluation.d.ts +72 -24
- package/dist/dom-evaluation.js +310 -84
- package/dist/extract-meta.d.ts +98 -0
- package/dist/extract-meta.js +75 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.js +1 -0
- package/dist/meta/classify.d.ts +52 -0
- package/dist/meta/classify.js +731 -0
- package/dist/meta/collect-head.d.ts +63 -0
- package/dist/meta/collect-head.js +223 -0
- package/dist/meta/id-extractors.d.ts +40 -0
- package/dist/meta/id-extractors.js +196 -0
- package/dist/meta/keys.d.ts +41 -0
- package/dist/meta/keys.js +507 -0
- package/dist/meta/parsers.d.ts +74 -0
- package/dist/meta/parsers.js +293 -0
- package/dist/meta/tag-detection.d.ts +59 -0
- package/dist/meta/tag-detection.js +120 -0
- package/dist/meta/types.d.ts +874 -0
- package/dist/meta/types.js +12 -0
- package/dist/scraper.js +15 -13
- package/dist/types.d.ts +3 -38
- package/package.json +8 -5
- package/src/dom-evaluation.spec.ts +301 -73
- package/src/dom-evaluation.ts +417 -88
- package/src/extract-meta.spec.ts +247 -0
- package/src/extract-meta.ts +121 -0
- package/src/index.ts +45 -0
- package/src/meta/classify.spec.ts +281 -0
- package/src/meta/classify.ts +810 -0
- package/src/meta/collect-head.ts +247 -0
- package/src/meta/id-extractors.spec.ts +69 -0
- package/src/meta/id-extractors.ts +206 -0
- package/src/meta/keys.ts +568 -0
- package/src/meta/parsers.spec.ts +178 -0
- package/src/meta/parsers.ts +304 -0
- package/src/meta/simple-wappalyzer.d.ts +37 -0
- package/src/meta/tag-detection.spec.ts +134 -0
- package/src/meta/tag-detection.ts +161 -0
- package/src/meta/types.ts +949 -0
- package/src/scraper.ts +19 -13
- package/src/types.ts +49 -55
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -0,0 +1,949 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for the `Meta` data extracted from a page's `<head>` and full document.
|
|
3
|
+
*
|
|
4
|
+
* Structure follows the reference table in `frontmatter-keys.md`, with one dot-path
|
|
5
|
+
* field per category. Optional fields are absent when not detected on the page.
|
|
6
|
+
* Array fields are required and default to `[]` so consumers can iterate without
|
|
7
|
+
* null-checks.
|
|
8
|
+
* @see {@link ./classify.ts} for the function that builds `Meta` from raw head entries
|
|
9
|
+
* @see {@link ./parsers.ts} for the value normalizers used by `classify`
|
|
10
|
+
* @module
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Top-level metadata extracted from a page's `<head>` and surrounding markup
|
|
15
|
+
* (`<html>`, `<base>`, `<noscript>`, `<iframe>` in body, `<script>` of known
|
|
16
|
+
* structured-data types).
|
|
17
|
+
*
|
|
18
|
+
* Required fields (`title`, `jsonLd`, `speculationRules`, `others`, `tags`)
|
|
19
|
+
* always exist so downstream consumers can iterate without null-checking the
|
|
20
|
+
* top level.
|
|
21
|
+
*/
|
|
22
|
+
export type Meta = {
|
|
23
|
+
/** The text content of the `<title>` element. */
|
|
24
|
+
title: string;
|
|
25
|
+
|
|
26
|
+
/** The `lang` attribute of the `<html>` element. */
|
|
27
|
+
lang?: string;
|
|
28
|
+
/** The `dir` attribute of the `<html>` element. */
|
|
29
|
+
dir?: string;
|
|
30
|
+
/** The `xmlns` attribute of the `<html>` element (rare; RDFa contexts). */
|
|
31
|
+
xmlns?: string;
|
|
32
|
+
/** The `prefix` attribute of the `<html>` element (RDFa). */
|
|
33
|
+
prefix?: string;
|
|
34
|
+
/** The `vocab` attribute of the `<html>` element (RDFa). */
|
|
35
|
+
vocab?: string;
|
|
36
|
+
/** The `typeof` attribute of the `<html>` element (RDFa). */
|
|
37
|
+
typeOf?: string;
|
|
38
|
+
/** The `itemtype` attribute of the `<html>` element (Microdata). */
|
|
39
|
+
itemType?: string;
|
|
40
|
+
/** The `<meta charset>` value, or `null` if absent. */
|
|
41
|
+
charset?: string;
|
|
42
|
+
/** The `<base href>` value, or `null` if absent. */
|
|
43
|
+
baseHref?: string;
|
|
44
|
+
/** The `<base target>` value, or `null` if absent. */
|
|
45
|
+
baseTarget?: string;
|
|
46
|
+
|
|
47
|
+
/** `<meta name="description">` content. */
|
|
48
|
+
description?: string;
|
|
49
|
+
/** `<meta name="keywords">` content. */
|
|
50
|
+
keywords?: string;
|
|
51
|
+
/** `<meta name="application-name">` content. */
|
|
52
|
+
applicationName?: string;
|
|
53
|
+
/** `<meta name="author">` content. */
|
|
54
|
+
author?: string;
|
|
55
|
+
/** `<meta name="generator">` content. */
|
|
56
|
+
generator?: string;
|
|
57
|
+
/** `<meta name="creator">` content. */
|
|
58
|
+
creator?: string;
|
|
59
|
+
/** `<meta name="publisher">` content. */
|
|
60
|
+
publisher?: string;
|
|
61
|
+
/** `<meta name="theme-color">` (no `media` attribute) content. */
|
|
62
|
+
themeColor?: string;
|
|
63
|
+
/** `<meta name="theme-color" media="(prefers-color-scheme: light)">` content. */
|
|
64
|
+
themeColorLight?: string;
|
|
65
|
+
/** `<meta name="theme-color" media="(prefers-color-scheme: dark)">` content. */
|
|
66
|
+
themeColorDark?: string;
|
|
67
|
+
/** `<meta name="color-scheme">` content. */
|
|
68
|
+
colorScheme?: string;
|
|
69
|
+
/** `<meta name="supported-color-schemes">` content. */
|
|
70
|
+
supportedColorSchemes?: string;
|
|
71
|
+
|
|
72
|
+
/** Parsed `<meta name="viewport">`. */
|
|
73
|
+
viewport?: ViewportMeta;
|
|
74
|
+
/** Parsed `<meta name="robots">`. */
|
|
75
|
+
robots?: RobotsMeta;
|
|
76
|
+
/** Parsed `<meta name="referrer">` and its sub-policies. */
|
|
77
|
+
referrer?: ReferrerMeta;
|
|
78
|
+
/** Parsed `<meta name="format-detection">` and Apple cross-references. */
|
|
79
|
+
formatDetection?: FormatDetectionMeta;
|
|
80
|
+
|
|
81
|
+
/** `<meta name="googlebot">` and other crawler-specific directives. */
|
|
82
|
+
googlebot?: string;
|
|
83
|
+
googlebotNews?: string;
|
|
84
|
+
googlebotImage?: string;
|
|
85
|
+
googlebotVideo?: string;
|
|
86
|
+
bingbot?: string;
|
|
87
|
+
slurp?: string;
|
|
88
|
+
duckduckbot?: string;
|
|
89
|
+
yandex?: string;
|
|
90
|
+
baiduspider?: string;
|
|
91
|
+
iaArchiver?: string;
|
|
92
|
+
revisitAfter?: string;
|
|
93
|
+
rating?: string;
|
|
94
|
+
distribution?: string;
|
|
95
|
+
classification?: string;
|
|
96
|
+
category?: string;
|
|
97
|
+
subject?: string;
|
|
98
|
+
topic?: string;
|
|
99
|
+
summary?: string;
|
|
100
|
+
abstract?: string;
|
|
101
|
+
audience?: string;
|
|
102
|
+
target?: string;
|
|
103
|
+
copyright?: string;
|
|
104
|
+
designer?: string;
|
|
105
|
+
owner?: string;
|
|
106
|
+
replyTo?: string;
|
|
107
|
+
contact?: string;
|
|
108
|
+
identifierUrl?: string;
|
|
109
|
+
language?: string;
|
|
110
|
+
revision?: string;
|
|
111
|
+
build?: string;
|
|
112
|
+
version?: string;
|
|
113
|
+
handheldFriendly?: string;
|
|
114
|
+
mobileOptimized?: string;
|
|
115
|
+
mobileWebAppCapable?: string;
|
|
116
|
+
applicationUrl?: string;
|
|
117
|
+
theme?: string;
|
|
118
|
+
|
|
119
|
+
/** Parsed `http-equiv` attributes. */
|
|
120
|
+
httpEquiv?: HttpEquivMeta;
|
|
121
|
+
|
|
122
|
+
/** Open Graph tags (`og:*`, `article:*`, `book:*`, `profile:*`, `music:*`, `video:*`). */
|
|
123
|
+
og?: OpenGraphMeta;
|
|
124
|
+
/** Twitter Card tags (`twitter:*`). */
|
|
125
|
+
twitter?: TwitterMeta;
|
|
126
|
+
/** Facebook tags (`fb:*`). */
|
|
127
|
+
fb?: FbMeta;
|
|
128
|
+
/** Fediverse tags (`fediverse:*`). */
|
|
129
|
+
fediverse?: FediverseMeta;
|
|
130
|
+
/** Apple iOS tags. */
|
|
131
|
+
apple?: AppleMeta;
|
|
132
|
+
/** Microsoft application tile tags (`msapplication-*`). */
|
|
133
|
+
msapplication?: MsApplicationMeta;
|
|
134
|
+
/** Site verification tags. */
|
|
135
|
+
verification?: VerificationMeta;
|
|
136
|
+
/** Google-specific tags (`google`, `google-*`). */
|
|
137
|
+
google?: GoogleMeta;
|
|
138
|
+
/** Dublin Core (`DC.*`) tags. */
|
|
139
|
+
dc?: Record<string, string>;
|
|
140
|
+
/** DC Terms (`DCTERMS.*`) tags. */
|
|
141
|
+
dcterms?: Record<string, string>;
|
|
142
|
+
/** Geo tags. */
|
|
143
|
+
geo?: GeoMeta;
|
|
144
|
+
/** `<meta name="ICBM" content="{lat}, {lng}">` content. */
|
|
145
|
+
icbm?: string;
|
|
146
|
+
/** Academic citation (`citation_*`) tags. */
|
|
147
|
+
citation?: CitationMeta;
|
|
148
|
+
|
|
149
|
+
/** CSRF param name (`<meta name="csrf-param">`). */
|
|
150
|
+
csrfParam?: string;
|
|
151
|
+
/** CSRF token (`<meta name="csrf-token">`). */
|
|
152
|
+
csrfToken?: string;
|
|
153
|
+
|
|
154
|
+
/** Misc single-value tags. */
|
|
155
|
+
goImport?: string;
|
|
156
|
+
bitcoin?: string;
|
|
157
|
+
originTrial: string[];
|
|
158
|
+
monetization?: string;
|
|
159
|
+
paymentPointer?: string;
|
|
160
|
+
ampExperimentsOptIn?: string;
|
|
161
|
+
ampGoogleClientIdApi?: string;
|
|
162
|
+
|
|
163
|
+
/** `<meta itemprop="...">` tags (Microdata in head). */
|
|
164
|
+
itemprop?: {
|
|
165
|
+
name?: string;
|
|
166
|
+
description?: string;
|
|
167
|
+
image?: string;
|
|
168
|
+
} & Record<string, string | string[]>;
|
|
169
|
+
|
|
170
|
+
/** Parsed `<link>` elements. */
|
|
171
|
+
link?: LinkMeta;
|
|
172
|
+
|
|
173
|
+
/** All `<script type="application/ld+json">` entries. */
|
|
174
|
+
jsonLd: JsonLdEntry[];
|
|
175
|
+
/** All `<script type="speculationrules">` entries. */
|
|
176
|
+
speculationRules: JsonLdEntry[];
|
|
177
|
+
|
|
178
|
+
/** RDFa attributes on `<html>` (mirror of top-level fields, kept for explicit access). */
|
|
179
|
+
rdfa?: RdfaMeta;
|
|
180
|
+
/** Microdata attributes on `<html>`. */
|
|
181
|
+
microdata?: MicrodataMeta;
|
|
182
|
+
|
|
183
|
+
/** AMP-related markers. */
|
|
184
|
+
amp?: AmpMeta;
|
|
185
|
+
|
|
186
|
+
/** Legacy meta tags (kept for completeness). */
|
|
187
|
+
legacy?: LegacyMeta;
|
|
188
|
+
|
|
189
|
+
/** Mobile-specific meta tags. */
|
|
190
|
+
mobile?: MobileMeta;
|
|
191
|
+
|
|
192
|
+
/** Microformats2 markers in head. */
|
|
193
|
+
microformats?: MicroformatsMeta;
|
|
194
|
+
|
|
195
|
+
/** Pinterest-specific tags. */
|
|
196
|
+
pinterest?: PinterestMeta;
|
|
197
|
+
|
|
198
|
+
/** Slack/LinkedIn-specific notes (cross-references to og:* tags). */
|
|
199
|
+
slack?: SlackMeta;
|
|
200
|
+
linkedin?: LinkedInMeta;
|
|
201
|
+
|
|
202
|
+
/** Experimental / vendor-specific tags. */
|
|
203
|
+
experimental?: ExperimentalMeta;
|
|
204
|
+
|
|
205
|
+
/** Wikipedia / MediaWiki-specific tags. */
|
|
206
|
+
wiki?: WikiMeta;
|
|
207
|
+
|
|
208
|
+
/** Detected third-party tags (analytics, frameworks, libraries, etc.). */
|
|
209
|
+
tags: TagsMeta;
|
|
210
|
+
|
|
211
|
+
/** Unknown / future / vendor-specific markup not covered by typed fields. */
|
|
212
|
+
others: OthersBucket;
|
|
213
|
+
|
|
214
|
+
/** Raw head entries for debugging. Only present when `getMeta` is called with `includeRaw: true`. */
|
|
215
|
+
_raw?: readonly RawHeadEntry[];
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Parsed `<meta name="viewport">` content.
|
|
220
|
+
* The `raw` string is always preserved so consumers can re-parse unknown directives.
|
|
221
|
+
*/
|
|
222
|
+
export type ViewportMeta = {
|
|
223
|
+
raw: string;
|
|
224
|
+
width?: string;
|
|
225
|
+
height?: string;
|
|
226
|
+
initialScale?: number;
|
|
227
|
+
minimumScale?: number;
|
|
228
|
+
maximumScale?: number;
|
|
229
|
+
userScalable?: boolean | string;
|
|
230
|
+
viewportFit?: string;
|
|
231
|
+
interactiveWidget?: string;
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Parsed `<meta name="robots">` and crawler directives.
|
|
236
|
+
*/
|
|
237
|
+
export type RobotsMeta = {
|
|
238
|
+
raw: string;
|
|
239
|
+
index?: boolean;
|
|
240
|
+
noindex?: boolean;
|
|
241
|
+
follow?: boolean;
|
|
242
|
+
nofollow?: boolean;
|
|
243
|
+
none?: boolean;
|
|
244
|
+
all?: boolean;
|
|
245
|
+
noarchive?: boolean;
|
|
246
|
+
nosnippet?: boolean;
|
|
247
|
+
noimageindex?: boolean;
|
|
248
|
+
nocache?: boolean;
|
|
249
|
+
notranslate?: boolean;
|
|
250
|
+
noodp?: boolean;
|
|
251
|
+
noydir?: boolean;
|
|
252
|
+
indexifembedded?: boolean;
|
|
253
|
+
maxSnippet?: number;
|
|
254
|
+
maxImagePreview?: string;
|
|
255
|
+
maxVideoPreview?: number;
|
|
256
|
+
unavailableAfter?: string;
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Parsed `<meta name="referrer">` and its individual policy values.
|
|
261
|
+
*/
|
|
262
|
+
export type ReferrerMeta = {
|
|
263
|
+
raw: string;
|
|
264
|
+
noReferrer?: boolean;
|
|
265
|
+
origin?: boolean;
|
|
266
|
+
originWhenCrossOrigin?: boolean;
|
|
267
|
+
strictOrigin?: boolean;
|
|
268
|
+
strictOriginWhenCrossOrigin?: boolean;
|
|
269
|
+
unsafeUrl?: boolean;
|
|
270
|
+
sameOrigin?: boolean;
|
|
271
|
+
noReferrerWhenDowngrade?: boolean;
|
|
272
|
+
};
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Parsed `<meta name="format-detection">` content.
|
|
276
|
+
*/
|
|
277
|
+
export type FormatDetectionMeta = {
|
|
278
|
+
raw: string;
|
|
279
|
+
telephone?: boolean;
|
|
280
|
+
email?: boolean;
|
|
281
|
+
address?: boolean;
|
|
282
|
+
date?: boolean;
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Parsed `http-equiv` attribute values.
|
|
287
|
+
*/
|
|
288
|
+
export type HttpEquivMeta = {
|
|
289
|
+
contentType?: string;
|
|
290
|
+
contentLanguage?: string;
|
|
291
|
+
defaultStyle?: string;
|
|
292
|
+
refresh?: HttpEquivRefresh;
|
|
293
|
+
xUaCompatible?: string;
|
|
294
|
+
contentSecurityPolicy?: string;
|
|
295
|
+
contentSecurityPolicyReportOnly?: string;
|
|
296
|
+
setCookie?: string;
|
|
297
|
+
pragma?: string;
|
|
298
|
+
cacheControl?: string;
|
|
299
|
+
expires?: string;
|
|
300
|
+
acceptCh?: string;
|
|
301
|
+
delegateCh?: string;
|
|
302
|
+
permissionsPolicy?: string;
|
|
303
|
+
originTrial?: string;
|
|
304
|
+
originTrialToken: string[];
|
|
305
|
+
xDnsPrefetchControl?: string;
|
|
306
|
+
windowTarget?: string;
|
|
307
|
+
imagetoolbar?: string;
|
|
308
|
+
cleartype?: string;
|
|
309
|
+
permissionsPolicyValue?: string;
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
/** Parsed `<meta http-equiv="refresh">` content. */
|
|
313
|
+
export type HttpEquivRefresh = {
|
|
314
|
+
raw: string;
|
|
315
|
+
seconds?: number;
|
|
316
|
+
url?: string;
|
|
317
|
+
};
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Open Graph tags including all sub-namespaces (article, book, profile, music, video).
|
|
321
|
+
*/
|
|
322
|
+
export type OpenGraphMeta = {
|
|
323
|
+
title?: string;
|
|
324
|
+
type?: string;
|
|
325
|
+
url?: string;
|
|
326
|
+
siteName?: string;
|
|
327
|
+
description?: string;
|
|
328
|
+
determiner?: string;
|
|
329
|
+
locale?: string;
|
|
330
|
+
localeAlternate: string[];
|
|
331
|
+
|
|
332
|
+
image: string[];
|
|
333
|
+
imageUrl?: string;
|
|
334
|
+
imageSecureUrl?: string;
|
|
335
|
+
imageType?: string;
|
|
336
|
+
imageWidth?: string;
|
|
337
|
+
imageHeight?: string;
|
|
338
|
+
imageAlt?: string;
|
|
339
|
+
|
|
340
|
+
video: string[];
|
|
341
|
+
videoUrl?: string;
|
|
342
|
+
videoSecureUrl?: string;
|
|
343
|
+
videoType?: string;
|
|
344
|
+
videoWidth?: string;
|
|
345
|
+
videoHeight?: string;
|
|
346
|
+
videoAlt?: string;
|
|
347
|
+
|
|
348
|
+
audio: string[];
|
|
349
|
+
audioUrl?: string;
|
|
350
|
+
audioSecureUrl?: string;
|
|
351
|
+
audioType?: string;
|
|
352
|
+
|
|
353
|
+
article?: OgArticleMeta;
|
|
354
|
+
book?: OgBookMeta;
|
|
355
|
+
profile?: OgProfileMeta;
|
|
356
|
+
music?: OgMusicMeta;
|
|
357
|
+
videoNs?: OgVideoNsMeta;
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
export type OgArticleMeta = {
|
|
361
|
+
publishedTime?: string;
|
|
362
|
+
modifiedTime?: string;
|
|
363
|
+
expirationTime?: string;
|
|
364
|
+
author: string[];
|
|
365
|
+
section?: string;
|
|
366
|
+
tag: string[];
|
|
367
|
+
publisher?: string;
|
|
368
|
+
};
|
|
369
|
+
|
|
370
|
+
export type OgBookMeta = {
|
|
371
|
+
author: string[];
|
|
372
|
+
isbn?: string;
|
|
373
|
+
releaseDate?: string;
|
|
374
|
+
tag: string[];
|
|
375
|
+
};
|
|
376
|
+
|
|
377
|
+
export type OgProfileMeta = {
|
|
378
|
+
firstName?: string;
|
|
379
|
+
lastName?: string;
|
|
380
|
+
username?: string;
|
|
381
|
+
gender?: string;
|
|
382
|
+
};
|
|
383
|
+
|
|
384
|
+
export type OgMusicMeta = {
|
|
385
|
+
duration?: string;
|
|
386
|
+
album: string[];
|
|
387
|
+
albumDisc?: string;
|
|
388
|
+
albumTrack?: string;
|
|
389
|
+
musician: string[];
|
|
390
|
+
song: string[];
|
|
391
|
+
songDisc?: string;
|
|
392
|
+
songTrack?: string;
|
|
393
|
+
releaseDate?: string;
|
|
394
|
+
creator: string[];
|
|
395
|
+
};
|
|
396
|
+
|
|
397
|
+
export type OgVideoNsMeta = {
|
|
398
|
+
actor: string[];
|
|
399
|
+
actorRole?: string;
|
|
400
|
+
director: string[];
|
|
401
|
+
writer: string[];
|
|
402
|
+
duration?: string;
|
|
403
|
+
releaseDate?: string;
|
|
404
|
+
tag: string[];
|
|
405
|
+
series?: string;
|
|
406
|
+
};
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Twitter Card tags.
|
|
410
|
+
*/
|
|
411
|
+
export type TwitterMeta = {
|
|
412
|
+
card?: string;
|
|
413
|
+
site?: string;
|
|
414
|
+
siteId?: string;
|
|
415
|
+
creator?: string;
|
|
416
|
+
creatorId?: string;
|
|
417
|
+
title?: string;
|
|
418
|
+
description?: string;
|
|
419
|
+
image?: string;
|
|
420
|
+
imageSrc?: string;
|
|
421
|
+
imageAlt?: string;
|
|
422
|
+
imageWidth?: string;
|
|
423
|
+
imageHeight?: string;
|
|
424
|
+
url?: string;
|
|
425
|
+
domain?: string;
|
|
426
|
+
player?: string;
|
|
427
|
+
playerWidth?: string;
|
|
428
|
+
playerHeight?: string;
|
|
429
|
+
playerStream?: string;
|
|
430
|
+
playerStreamContentType?: string;
|
|
431
|
+
appNameIphone?: string;
|
|
432
|
+
appIdIphone?: string;
|
|
433
|
+
appUrlIphone?: string;
|
|
434
|
+
appNameIpad?: string;
|
|
435
|
+
appIdIpad?: string;
|
|
436
|
+
appUrlIpad?: string;
|
|
437
|
+
appNameGoogleplay?: string;
|
|
438
|
+
appIdGoogleplay?: string;
|
|
439
|
+
appUrlGoogleplay?: string;
|
|
440
|
+
appCountry?: string;
|
|
441
|
+
label1?: string;
|
|
442
|
+
data1?: string;
|
|
443
|
+
label2?: string;
|
|
444
|
+
data2?: string;
|
|
445
|
+
widgetsCsp?: string;
|
|
446
|
+
widgetsNewEmbedDesign?: string;
|
|
447
|
+
dnt?: string;
|
|
448
|
+
};
|
|
449
|
+
|
|
450
|
+
/** Facebook tags (`fb:*` and `facebook-domain-verification`). */
|
|
451
|
+
export type FbMeta = {
|
|
452
|
+
appId?: string;
|
|
453
|
+
admins: string[];
|
|
454
|
+
pages: string[];
|
|
455
|
+
};
|
|
456
|
+
|
|
457
|
+
/** Fediverse tags. */
|
|
458
|
+
export type FediverseMeta = {
|
|
459
|
+
creator?: string;
|
|
460
|
+
};
|
|
461
|
+
|
|
462
|
+
/** Apple iOS-specific tags. */
|
|
463
|
+
export type AppleMeta = {
|
|
464
|
+
mobileWebAppCapable?: boolean | string;
|
|
465
|
+
mobileWebAppStatusBarStyle?: string;
|
|
466
|
+
mobileWebAppTitle?: string;
|
|
467
|
+
touchFullscreen?: boolean | string;
|
|
468
|
+
itunesApp?: string;
|
|
469
|
+
mobileWebAppOrientations?: string;
|
|
470
|
+
touchIconTitle?: string;
|
|
471
|
+
touchStartupImage?: string;
|
|
472
|
+
formatDetectionTelephone?: boolean;
|
|
473
|
+
};
|
|
474
|
+
|
|
475
|
+
/** Microsoft application tile tags. */
|
|
476
|
+
export type MsApplicationMeta = {
|
|
477
|
+
tileColor?: string;
|
|
478
|
+
tileImage?: string;
|
|
479
|
+
config?: string;
|
|
480
|
+
configFile?: string;
|
|
481
|
+
navbuttonColor?: string;
|
|
482
|
+
square70x70logo?: string;
|
|
483
|
+
square150x150logo?: string;
|
|
484
|
+
square310x310logo?: string;
|
|
485
|
+
wide310x150logo?: string;
|
|
486
|
+
starturl?: string;
|
|
487
|
+
window?: string;
|
|
488
|
+
task: string[];
|
|
489
|
+
taskSeparator?: string;
|
|
490
|
+
tooltip?: string;
|
|
491
|
+
notification?: string;
|
|
492
|
+
badge?: string;
|
|
493
|
+
tapHighlight?: string;
|
|
494
|
+
allowDomainApiCalls?: string;
|
|
495
|
+
allowDomainMetaTags?: string;
|
|
496
|
+
cleartype?: string;
|
|
497
|
+
smartTagsPreventParsing?: string;
|
|
498
|
+
ieRmOff?: string;
|
|
499
|
+
};
|
|
500
|
+
|
|
501
|
+
/** Site verification tags. */
|
|
502
|
+
export type VerificationMeta = {
|
|
503
|
+
google?: string;
|
|
504
|
+
bing?: string;
|
|
505
|
+
yandex?: string;
|
|
506
|
+
baidu?: string;
|
|
507
|
+
naver?: string;
|
|
508
|
+
pinterest?: string;
|
|
509
|
+
facebook?: string;
|
|
510
|
+
alexa?: string;
|
|
511
|
+
norton?: string;
|
|
512
|
+
ahrefs?: string;
|
|
513
|
+
detectify?: string;
|
|
514
|
+
zoho?: string;
|
|
515
|
+
wot?: string;
|
|
516
|
+
seznam?: string;
|
|
517
|
+
shopify?: string;
|
|
518
|
+
brave?: string;
|
|
519
|
+
};
|
|
520
|
+
|
|
521
|
+
/** Google-specific tags. */
|
|
522
|
+
export type GoogleMeta = {
|
|
523
|
+
notranslate?: boolean;
|
|
524
|
+
nositelinkssearchbox?: boolean;
|
|
525
|
+
nopagereadaloud?: boolean;
|
|
526
|
+
translateCustomization?: string;
|
|
527
|
+
adsenseAccount?: string;
|
|
528
|
+
playApp?: string;
|
|
529
|
+
googlebotNotranslate?: boolean;
|
|
530
|
+
};
|
|
531
|
+
|
|
532
|
+
/** Geo tags. */
|
|
533
|
+
export type GeoMeta = {
|
|
534
|
+
region?: string;
|
|
535
|
+
placename?: string;
|
|
536
|
+
position?: string;
|
|
537
|
+
country?: string;
|
|
538
|
+
a1?: string;
|
|
539
|
+
a2?: string;
|
|
540
|
+
a3?: string;
|
|
541
|
+
lmk?: string;
|
|
542
|
+
};
|
|
543
|
+
|
|
544
|
+
/** Academic citation tags. */
|
|
545
|
+
export type CitationMeta = {
|
|
546
|
+
title?: string;
|
|
547
|
+
author: string[];
|
|
548
|
+
authorEmail: string[];
|
|
549
|
+
authorInstitution: string[];
|
|
550
|
+
publicationDate?: string;
|
|
551
|
+
date?: string;
|
|
552
|
+
journalTitle?: string;
|
|
553
|
+
journalAbbrev?: string;
|
|
554
|
+
conferenceTitle?: string;
|
|
555
|
+
publisher?: string;
|
|
556
|
+
volume?: string;
|
|
557
|
+
issue?: string;
|
|
558
|
+
firstpage?: string;
|
|
559
|
+
lastpage?: string;
|
|
560
|
+
doi?: string;
|
|
561
|
+
isbn?: string;
|
|
562
|
+
issn?: string;
|
|
563
|
+
language?: string;
|
|
564
|
+
keywords?: string;
|
|
565
|
+
pdfUrl?: string;
|
|
566
|
+
fulltextHtmlUrl?: string;
|
|
567
|
+
dissertationInstitution?: string;
|
|
568
|
+
technicalReportInstitution?: string;
|
|
569
|
+
technicalReportNumber?: string;
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
/** RDFa attributes mirrored from `<html>` element. */
|
|
573
|
+
export type RdfaMeta = {
|
|
574
|
+
prefix?: string;
|
|
575
|
+
vocab?: string;
|
|
576
|
+
typeOf?: string;
|
|
577
|
+
};
|
|
578
|
+
|
|
579
|
+
/** Microdata attributes mirrored from `<html>` element. */
|
|
580
|
+
export type MicrodataMeta = {
|
|
581
|
+
itemscope?: boolean;
|
|
582
|
+
itemtype?: string;
|
|
583
|
+
};
|
|
584
|
+
|
|
585
|
+
/** AMP markers. */
|
|
586
|
+
export type AmpMeta = {
|
|
587
|
+
enabled?: boolean;
|
|
588
|
+
lightning?: boolean;
|
|
589
|
+
canonicalFromAmp?: string;
|
|
590
|
+
amphtml?: string;
|
|
591
|
+
experimentsOptIn?: string;
|
|
592
|
+
runtimeScript?: boolean;
|
|
593
|
+
};
|
|
594
|
+
|
|
595
|
+
/** Legacy meta tags. */
|
|
596
|
+
export type LegacyMeta = {
|
|
597
|
+
msSmartTagsPreventParsing?: string;
|
|
598
|
+
imagetoolbar?: string;
|
|
599
|
+
pageVersion?: string;
|
|
600
|
+
audience?: string;
|
|
601
|
+
resourceType?: string;
|
|
602
|
+
distribution?: string;
|
|
603
|
+
docClass?: string;
|
|
604
|
+
docRights?: string;
|
|
605
|
+
docType?: string;
|
|
606
|
+
mobileOptimized?: string;
|
|
607
|
+
handheldFriendly?: string;
|
|
608
|
+
};
|
|
609
|
+
|
|
610
|
+
/** Mobile-specific meta tags. */
|
|
611
|
+
export type MobileMeta = {
|
|
612
|
+
handheldFriendly?: string;
|
|
613
|
+
mobileOptimized?: string;
|
|
614
|
+
mobileAgent?: string;
|
|
615
|
+
fullScreen?: string;
|
|
616
|
+
browsermode?: string;
|
|
617
|
+
x5Orientation?: string;
|
|
618
|
+
x5Fullscreen?: string;
|
|
619
|
+
x5PageMode?: string;
|
|
620
|
+
screenOrientation?: string;
|
|
621
|
+
layoutmode?: string;
|
|
622
|
+
imagemode?: string;
|
|
623
|
+
};
|
|
624
|
+
|
|
625
|
+
/** Microformats2 markers in head. */
|
|
626
|
+
export type MicroformatsMeta = {
|
|
627
|
+
relMe: string[];
|
|
628
|
+
};
|
|
629
|
+
|
|
630
|
+
/** Pinterest tags. */
|
|
631
|
+
export type PinterestMeta = {
|
|
632
|
+
richPin?: boolean;
|
|
633
|
+
nopin?: boolean;
|
|
634
|
+
disableRichPin?: boolean;
|
|
635
|
+
};
|
|
636
|
+
|
|
637
|
+
/** Slack-specific notes (cross-reference for og:image:width=1200). */
|
|
638
|
+
export type SlackMeta = {
|
|
639
|
+
ogImageWidth?: string;
|
|
640
|
+
};
|
|
641
|
+
|
|
642
|
+
/** LinkedIn-specific notes. */
|
|
643
|
+
export type LinkedInMeta = {
|
|
644
|
+
ogType?: string;
|
|
645
|
+
};
|
|
646
|
+
|
|
647
|
+
/** Experimental / vendor tags. */
|
|
648
|
+
export type ExperimentalMeta = {
|
|
649
|
+
darkreaderLock?: boolean;
|
|
650
|
+
turboCacheControl?: string;
|
|
651
|
+
turboVisitControl?: string;
|
|
652
|
+
viewTransition?: string;
|
|
653
|
+
};
|
|
654
|
+
|
|
655
|
+
/** Wikipedia / MediaWiki tags. */
|
|
656
|
+
export type WikiMeta = {
|
|
657
|
+
resourceLoaderDynamicStyles?: string;
|
|
658
|
+
mediawikiGenerator?: string;
|
|
659
|
+
};
|
|
660
|
+
|
|
661
|
+
/**
|
|
662
|
+
* Parsed `<link>` elements grouped by `rel`. Single-rel entries are stored on
|
|
663
|
+
* named fields; multi-rel and unknown rels are stored on `others.link[]`.
|
|
664
|
+
*/
|
|
665
|
+
export type LinkMeta = {
|
|
666
|
+
canonical?: string;
|
|
667
|
+
alternateHreflang: LinkEntry[];
|
|
668
|
+
alternateMedia: LinkEntry[];
|
|
669
|
+
alternateRss: LinkEntry[];
|
|
670
|
+
alternateAtom: LinkEntry[];
|
|
671
|
+
alternateJsonFeed: LinkEntry[];
|
|
672
|
+
oembedJson?: LinkEntry;
|
|
673
|
+
oembedXml?: LinkEntry;
|
|
674
|
+
alternateActivityJson?: LinkEntry;
|
|
675
|
+
amphtml?: string;
|
|
676
|
+
author?: string;
|
|
677
|
+
bookmark?: string;
|
|
678
|
+
help?: string;
|
|
679
|
+
license?: string;
|
|
680
|
+
next?: string;
|
|
681
|
+
prev?: string;
|
|
682
|
+
previous?: string;
|
|
683
|
+
first?: string;
|
|
684
|
+
last?: string;
|
|
685
|
+
up?: string;
|
|
686
|
+
index?: string;
|
|
687
|
+
contents?: string;
|
|
688
|
+
start?: string;
|
|
689
|
+
search?: LinkEntry;
|
|
690
|
+
tag: LinkEntry[];
|
|
691
|
+
archives: LinkEntry[];
|
|
692
|
+
publisher?: string;
|
|
693
|
+
privacyPolicy?: string;
|
|
694
|
+
termsOfService?: string;
|
|
695
|
+
copyright?: string;
|
|
696
|
+
appendix: LinkEntry[];
|
|
697
|
+
chapter: LinkEntry[];
|
|
698
|
+
section: LinkEntry[];
|
|
699
|
+
subsection: LinkEntry[];
|
|
700
|
+
glossary?: string;
|
|
701
|
+
profile: LinkEntry[];
|
|
702
|
+
editUri?: string;
|
|
703
|
+
pingback?: string;
|
|
704
|
+
webmention?: string;
|
|
705
|
+
micropub?: string;
|
|
706
|
+
microsub?: string;
|
|
707
|
+
me: LinkEntry[];
|
|
708
|
+
authorizationEndpoint?: string;
|
|
709
|
+
tokenEndpoint?: string;
|
|
710
|
+
indieauthMetadata?: string;
|
|
711
|
+
openidServer?: string;
|
|
712
|
+
openidDelegate?: string;
|
|
713
|
+
openid2Provider?: string;
|
|
714
|
+
openid2LocalId?: string;
|
|
715
|
+
hub?: string;
|
|
716
|
+
self?: string;
|
|
717
|
+
payment?: string;
|
|
718
|
+
enclosure: LinkEntry[];
|
|
719
|
+
external: LinkEntry[];
|
|
720
|
+
nofollow: LinkEntry[];
|
|
721
|
+
sponsored: LinkEntry[];
|
|
722
|
+
ugc: LinkEntry[];
|
|
723
|
+
noopener: LinkEntry[];
|
|
724
|
+
noreferrer: LinkEntry[];
|
|
725
|
+
opener: LinkEntry[];
|
|
726
|
+
imageSrc?: string;
|
|
727
|
+
shortlink?: string;
|
|
728
|
+
dnsPrefetch: LinkEntry[];
|
|
729
|
+
preconnect: LinkEntry[];
|
|
730
|
+
prefetch: LinkEntry[];
|
|
731
|
+
prerender: LinkEntry[];
|
|
732
|
+
preload: LinkEntry[];
|
|
733
|
+
modulepreload: LinkEntry[];
|
|
734
|
+
expect: LinkEntry[];
|
|
735
|
+
stylesheet: LinkEntry[];
|
|
736
|
+
manifest?: string;
|
|
737
|
+
serviceworker?: string;
|
|
738
|
+
dpp?: string;
|
|
739
|
+
gbfs?: string;
|
|
740
|
+
syndication: LinkEntry[];
|
|
741
|
+
apiCatalog?: string;
|
|
742
|
+
memento?: string;
|
|
743
|
+
timegate?: string;
|
|
744
|
+
timemap?: string;
|
|
745
|
+
versionHistory?: string;
|
|
746
|
+
latestVersion?: string;
|
|
747
|
+
predecessorVersion?: string;
|
|
748
|
+
successorVersion?: string;
|
|
749
|
+
workingCopy?: string;
|
|
750
|
+
workingCopyOf?: string;
|
|
751
|
+
describedby?: string;
|
|
752
|
+
describes?: string;
|
|
753
|
+
via?: string;
|
|
754
|
+
related: LinkEntry[];
|
|
755
|
+
citeAs?: string;
|
|
756
|
+
disclosure?: string;
|
|
757
|
+
status?: string;
|
|
758
|
+
sunset?: string;
|
|
759
|
+
deprecation?: string;
|
|
760
|
+
lrdd?: string;
|
|
761
|
+
hosts?: string;
|
|
762
|
+
service?: string;
|
|
763
|
+
serviceDesc?: string;
|
|
764
|
+
serviceDoc?: string;
|
|
765
|
+
serviceMeta?: string;
|
|
766
|
+
c2paManifest?: string;
|
|
767
|
+
compressionDictionary?: string;
|
|
768
|
+
|
|
769
|
+
icon?: LinkEntry;
|
|
770
|
+
iconAny?: LinkEntry;
|
|
771
|
+
iconSvg?: LinkEntry;
|
|
772
|
+
iconSized: LinkEntry[];
|
|
773
|
+
shortcutIcon?: string;
|
|
774
|
+
appleTouchIcon?: LinkEntry;
|
|
775
|
+
appleTouchIconSized: LinkEntry[];
|
|
776
|
+
appleTouchIconPrecomposed: LinkEntry[];
|
|
777
|
+
appleTouchStartupImage: LinkEntry[];
|
|
778
|
+
appleTouchStartupImageIphone?: LinkEntry;
|
|
779
|
+
appleTouchStartupImageIpadPortrait?: LinkEntry;
|
|
780
|
+
appleTouchStartupImageIpadLandscape?: LinkEntry;
|
|
781
|
+
maskIcon?: LinkEntry;
|
|
782
|
+
fluidIcon?: LinkEntry;
|
|
783
|
+
|
|
784
|
+
securityTxt?: string;
|
|
785
|
+
};
|
|
786
|
+
|
|
787
|
+
/**
|
|
788
|
+
* Common shape of a parsed `<link>` element.
|
|
789
|
+
*/
|
|
790
|
+
export type LinkEntry = {
|
|
791
|
+
href: string;
|
|
792
|
+
rel: readonly string[];
|
|
793
|
+
type?: string;
|
|
794
|
+
media?: string;
|
|
795
|
+
sizes?: string;
|
|
796
|
+
title?: string;
|
|
797
|
+
hreflang?: string;
|
|
798
|
+
as?: string;
|
|
799
|
+
crossorigin?: string;
|
|
800
|
+
color?: string;
|
|
801
|
+
blocking?: string;
|
|
802
|
+
imagesrcset?: string;
|
|
803
|
+
};
|
|
804
|
+
|
|
805
|
+
/**
|
|
806
|
+
* A `<script type="application/ld+json">` or `<script type="speculationrules">`
|
|
807
|
+
* entry. `parsed` holds the result of `JSON.parse(raw)`; on parse failure
|
|
808
|
+
* `parseError` is set and `parsed` is `undefined`.
|
|
809
|
+
*/
|
|
810
|
+
export type JsonLdEntry = {
|
|
811
|
+
raw: string;
|
|
812
|
+
parsed?: unknown;
|
|
813
|
+
parseError?: string;
|
|
814
|
+
};
|
|
815
|
+
|
|
816
|
+
/**
|
|
817
|
+
* Catch-all bucket for markup not covered by typed fields above. Always present
|
|
818
|
+
* (empty values when no unknowns were found) so consumers can iterate without
|
|
819
|
+
* null-checking.
|
|
820
|
+
*/
|
|
821
|
+
export type OthersBucket = {
|
|
822
|
+
/** Unknown `<meta name>` → list of `content` values. */
|
|
823
|
+
meta: Record<string, string[]>;
|
|
824
|
+
/** Unknown `<meta property>` → list of `content` values. */
|
|
825
|
+
property: Record<string, string[]>;
|
|
826
|
+
/** Unknown `<meta http-equiv>` → list of `content` values. */
|
|
827
|
+
httpEquiv: Record<string, string[]>;
|
|
828
|
+
/** Unknown `<meta itemprop>` → list of `content` values. */
|
|
829
|
+
itemprop: Record<string, string[]>;
|
|
830
|
+
/** `<link>` elements whose every `rel` is unknown. */
|
|
831
|
+
link: LinkEntry[];
|
|
832
|
+
/** `<script>` elements with unknown `type` (kept for raw inspection). */
|
|
833
|
+
script: ScriptEntry[];
|
|
834
|
+
/** `<iframe>` elements (used to capture GTM noscript iframes, etc.). */
|
|
835
|
+
iframe: IframeEntry[];
|
|
836
|
+
};
|
|
837
|
+
|
|
838
|
+
export type ScriptEntry = {
|
|
839
|
+
type: string;
|
|
840
|
+
content?: string;
|
|
841
|
+
src?: string;
|
|
842
|
+
location: 'head' | 'body' | 'noscript';
|
|
843
|
+
};
|
|
844
|
+
|
|
845
|
+
export type IframeEntry = {
|
|
846
|
+
src: string;
|
|
847
|
+
location: 'head' | 'body' | 'noscript';
|
|
848
|
+
};
|
|
849
|
+
|
|
850
|
+
/**
|
|
851
|
+
* Detected third-party tags from the page (analytics, frameworks, libraries,
|
|
852
|
+
* etc.). Produced by `tag-detection.ts` by combining `simple-wappalyzer`
|
|
853
|
+
* results with ID extractors.
|
|
854
|
+
*/
|
|
855
|
+
export type TagsMeta = {
|
|
856
|
+
/** Wappalyzer category name → provider name → detection detail. */
|
|
857
|
+
detected: Record<string, Record<string, TagDetail>>;
|
|
858
|
+
/** Flat list of all detected entries (one per (provider, id) tuple). */
|
|
859
|
+
entries: TagEntry[];
|
|
860
|
+
};
|
|
861
|
+
|
|
862
|
+
export type TagDetail = {
|
|
863
|
+
/** Real IDs extracted from the page (e.g., `G-XXXX`, `GTM-XXXX`). */
|
|
864
|
+
ids: string[];
|
|
865
|
+
/** Wappalyzer-reported version, if available. */
|
|
866
|
+
version?: string;
|
|
867
|
+
/** Wappalyzer-reported confidence (0-100), if available. */
|
|
868
|
+
confidence?: number;
|
|
869
|
+
};
|
|
870
|
+
|
|
871
|
+
export type TagEntry = {
|
|
872
|
+
provider: string;
|
|
873
|
+
categories: readonly string[];
|
|
874
|
+
id?: string;
|
|
875
|
+
version?: string;
|
|
876
|
+
confidence?: number;
|
|
877
|
+
sources: readonly TagSource[];
|
|
878
|
+
};
|
|
879
|
+
|
|
880
|
+
export type TagSource = {
|
|
881
|
+
type:
|
|
882
|
+
| 'script-src'
|
|
883
|
+
| 'inline'
|
|
884
|
+
| 'iframe-src'
|
|
885
|
+
| 'window-global'
|
|
886
|
+
| 'img-src'
|
|
887
|
+
| 'header'
|
|
888
|
+
| 'meta'
|
|
889
|
+
| 'html';
|
|
890
|
+
src?: string;
|
|
891
|
+
location?: 'head' | 'body' | 'noscript';
|
|
892
|
+
globalName?: string;
|
|
893
|
+
};
|
|
894
|
+
|
|
895
|
+
/**
|
|
896
|
+
* Discriminated union of raw entries collected from the page by `collectHead`.
|
|
897
|
+
* Used as the input shape for `classify()`. Keeping this serializable lets us
|
|
898
|
+
* collect on the browser side and process on the Node side.
|
|
899
|
+
*/
|
|
900
|
+
export type RawHeadEntry =
|
|
901
|
+
| {
|
|
902
|
+
kind: 'html';
|
|
903
|
+
lang?: string;
|
|
904
|
+
dir?: string;
|
|
905
|
+
xmlns?: string;
|
|
906
|
+
prefix?: string;
|
|
907
|
+
vocab?: string;
|
|
908
|
+
typeOf?: string;
|
|
909
|
+
itemscope?: boolean;
|
|
910
|
+
itemtype?: string;
|
|
911
|
+
amp?: boolean;
|
|
912
|
+
lightning?: boolean;
|
|
913
|
+
}
|
|
914
|
+
| { kind: 'title'; content: string }
|
|
915
|
+
| { kind: 'base'; href?: string; target?: string }
|
|
916
|
+
| {
|
|
917
|
+
kind: 'meta';
|
|
918
|
+
name?: string;
|
|
919
|
+
property?: string;
|
|
920
|
+
httpEquiv?: string;
|
|
921
|
+
itemprop?: string;
|
|
922
|
+
charset?: string;
|
|
923
|
+
content?: string;
|
|
924
|
+
media?: string;
|
|
925
|
+
}
|
|
926
|
+
| {
|
|
927
|
+
kind: 'link';
|
|
928
|
+
rel: readonly string[];
|
|
929
|
+
href: string;
|
|
930
|
+
type?: string;
|
|
931
|
+
media?: string;
|
|
932
|
+
sizes?: string;
|
|
933
|
+
title?: string;
|
|
934
|
+
hreflang?: string;
|
|
935
|
+
as?: string;
|
|
936
|
+
crossorigin?: string;
|
|
937
|
+
color?: string;
|
|
938
|
+
blocking?: string;
|
|
939
|
+
imagesrcset?: string;
|
|
940
|
+
}
|
|
941
|
+
| {
|
|
942
|
+
kind: 'script';
|
|
943
|
+
scriptType: string;
|
|
944
|
+
content?: string;
|
|
945
|
+
src?: string;
|
|
946
|
+
location: 'head' | 'body' | 'noscript';
|
|
947
|
+
}
|
|
948
|
+
| { kind: 'iframe'; src: string; location: 'head' | 'body' | 'noscript' }
|
|
949
|
+
| { kind: 'window-global'; names: readonly string[] };
|