@d-zero/beholder 2.1.6 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/README.md +26 -0
- package/dist/dom-evaluation.d.ts +72 -24
- package/dist/dom-evaluation.js +310 -84
- package/dist/extract-meta.d.ts +98 -0
- package/dist/extract-meta.js +75 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.js +1 -0
- package/dist/meta/classify.d.ts +52 -0
- package/dist/meta/classify.js +731 -0
- package/dist/meta/collect-head.d.ts +63 -0
- package/dist/meta/collect-head.js +223 -0
- package/dist/meta/id-extractors.d.ts +40 -0
- package/dist/meta/id-extractors.js +196 -0
- package/dist/meta/keys.d.ts +41 -0
- package/dist/meta/keys.js +507 -0
- package/dist/meta/parsers.d.ts +74 -0
- package/dist/meta/parsers.js +293 -0
- package/dist/meta/tag-detection.d.ts +59 -0
- package/dist/meta/tag-detection.js +120 -0
- package/dist/meta/types.d.ts +874 -0
- package/dist/meta/types.js +12 -0
- package/dist/scraper.js +15 -13
- package/dist/types.d.ts +3 -38
- package/package.json +8 -5
- package/src/dom-evaluation.spec.ts +301 -73
- package/src/dom-evaluation.ts +417 -88
- package/src/extract-meta.spec.ts +247 -0
- package/src/extract-meta.ts +121 -0
- package/src/index.ts +45 -0
- package/src/meta/classify.spec.ts +281 -0
- package/src/meta/classify.ts +810 -0
- package/src/meta/collect-head.ts +247 -0
- package/src/meta/id-extractors.spec.ts +69 -0
- package/src/meta/id-extractors.ts +206 -0
- package/src/meta/keys.ts +568 -0
- package/src/meta/parsers.spec.ts +178 -0
- package/src/meta/parsers.ts +304 -0
- package/src/meta/simple-wappalyzer.d.ts +37 -0
- package/src/meta/tag-detection.spec.ts +134 -0
- package/src/meta/tag-detection.ts +161 -0
- package/src/meta/types.ts +949 -0
- package/src/scraper.ts +19 -13
- package/src/types.ts +49 -55
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -0,0 +1,507 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lookup tables mapping `<meta name>`, `<meta property>`, `<meta http-equiv>`,
|
|
3
|
+
* `<meta itemprop>`, and `<link rel>` to their dot-path in `Meta`.
|
|
4
|
+
*
|
|
5
|
+
* Each key has a single canonical lowercase form. Cross-reference keys
|
|
6
|
+
* (e.g., `format-detection` writes to both `formatDetection.*` and
|
|
7
|
+
* `apple.formatDetectionTelephone`) use `paths` with more than one entry.
|
|
8
|
+
*
|
|
9
|
+
* Values referenced from `frontmatter-keys.md` in `../../frontend-env/`.
|
|
10
|
+
* @module
|
|
11
|
+
*/
|
|
12
|
+
/** `<meta name="X">` → dot-path in `Meta`. */
|
|
13
|
+
export const META_NAME_MAP = {
|
|
14
|
+
'application-name': { paths: ['applicationName'] },
|
|
15
|
+
author: { paths: ['author'] },
|
|
16
|
+
description: { paths: ['description'] },
|
|
17
|
+
generator: { paths: ['generator'] },
|
|
18
|
+
keywords: { paths: ['keywords'] },
|
|
19
|
+
creator: { paths: ['creator'] },
|
|
20
|
+
publisher: { paths: ['publisher'] },
|
|
21
|
+
'theme-color': { paths: ['themeColor'] },
|
|
22
|
+
'color-scheme': { paths: ['colorScheme'] },
|
|
23
|
+
'supported-color-schemes': { paths: ['supportedColorSchemes'] },
|
|
24
|
+
googlebot: { paths: ['googlebot'] },
|
|
25
|
+
'googlebot-news': { paths: ['googlebotNews'] },
|
|
26
|
+
'googlebot-image': { paths: ['googlebotImage'] },
|
|
27
|
+
'googlebot-video': { paths: ['googlebotVideo'] },
|
|
28
|
+
bingbot: { paths: ['bingbot'] },
|
|
29
|
+
slurp: { paths: ['slurp'] },
|
|
30
|
+
duckduckbot: { paths: ['duckduckbot'] },
|
|
31
|
+
yandex: { paths: ['yandex'] },
|
|
32
|
+
baiduspider: { paths: ['baiduspider'] },
|
|
33
|
+
ia_archiver: { paths: ['iaArchiver'] },
|
|
34
|
+
'revisit-after': { paths: ['revisitAfter'] },
|
|
35
|
+
rating: { paths: ['rating'] },
|
|
36
|
+
distribution: { paths: ['distribution'] },
|
|
37
|
+
classification: { paths: ['classification'] },
|
|
38
|
+
category: { paths: ['category'] },
|
|
39
|
+
subject: { paths: ['subject'] },
|
|
40
|
+
topic: { paths: ['topic'] },
|
|
41
|
+
summary: { paths: ['summary'] },
|
|
42
|
+
abstract: { paths: ['abstract'] },
|
|
43
|
+
audience: { paths: ['audience'] },
|
|
44
|
+
target: { paths: ['target'] },
|
|
45
|
+
copyright: { paths: ['copyright'] },
|
|
46
|
+
designer: { paths: ['designer'] },
|
|
47
|
+
owner: { paths: ['owner'] },
|
|
48
|
+
'reply-to': { paths: ['replyTo'] },
|
|
49
|
+
contact: { paths: ['contact'] },
|
|
50
|
+
'identifier-url': { paths: ['identifierUrl'] },
|
|
51
|
+
language: { paths: ['language'] },
|
|
52
|
+
revision: { paths: ['revision'] },
|
|
53
|
+
build: { paths: ['build'] },
|
|
54
|
+
version: { paths: ['version'] },
|
|
55
|
+
handheldfriendly: {
|
|
56
|
+
paths: ['handheldFriendly', 'mobile.handheldFriendly', 'legacy.handheldFriendly'],
|
|
57
|
+
},
|
|
58
|
+
mobileoptimized: {
|
|
59
|
+
paths: ['mobileOptimized', 'mobile.mobileOptimized', 'legacy.mobileOptimized'],
|
|
60
|
+
},
|
|
61
|
+
'mobile-web-app-capable': { paths: ['mobileWebAppCapable'] },
|
|
62
|
+
'application-url': { paths: ['applicationUrl'] },
|
|
63
|
+
theme: { paths: ['theme'] },
|
|
64
|
+
// Apple iOS
|
|
65
|
+
'apple-mobile-web-app-capable': {
|
|
66
|
+
paths: ['apple.mobileWebAppCapable'],
|
|
67
|
+
transform: 'boolean-yes',
|
|
68
|
+
},
|
|
69
|
+
'apple-mobile-web-app-status-bar-style': {
|
|
70
|
+
paths: ['apple.mobileWebAppStatusBarStyle'],
|
|
71
|
+
},
|
|
72
|
+
'apple-mobile-web-app-title': { paths: ['apple.mobileWebAppTitle'] },
|
|
73
|
+
'apple-touch-fullscreen': {
|
|
74
|
+
paths: ['apple.touchFullscreen'],
|
|
75
|
+
transform: 'boolean-yes',
|
|
76
|
+
},
|
|
77
|
+
'apple-itunes-app': { paths: ['apple.itunesApp'] },
|
|
78
|
+
'apple-mobile-web-app-orientations': { paths: ['apple.mobileWebAppOrientations'] },
|
|
79
|
+
'apple-touch-icon-title': { paths: ['apple.touchIconTitle'] },
|
|
80
|
+
'apple-touch-startup-image': { paths: ['apple.touchStartupImage'] },
|
|
81
|
+
// Microsoft
|
|
82
|
+
'msapplication-tilecolor': { paths: ['msapplication.tileColor'] },
|
|
83
|
+
'msapplication-tileimage': { paths: ['msapplication.tileImage'] },
|
|
84
|
+
'msapplication-config': { paths: ['msapplication.config', 'msapplication.configFile'] },
|
|
85
|
+
'msapplication-navbutton-color': { paths: ['msapplication.navbuttonColor'] },
|
|
86
|
+
'msapplication-square70x70logo': { paths: ['msapplication.square70x70logo'] },
|
|
87
|
+
'msapplication-square150x150logo': { paths: ['msapplication.square150x150logo'] },
|
|
88
|
+
'msapplication-square310x310logo': { paths: ['msapplication.square310x310logo'] },
|
|
89
|
+
'msapplication-wide310x150logo': { paths: ['msapplication.wide310x150logo'] },
|
|
90
|
+
'msapplication-starturl': { paths: ['msapplication.starturl'] },
|
|
91
|
+
'msapplication-window': { paths: ['msapplication.window'] },
|
|
92
|
+
'msapplication-task': { paths: ['msapplication.task'], multi: true },
|
|
93
|
+
'msapplication-task-separator': { paths: ['msapplication.taskSeparator'] },
|
|
94
|
+
'msapplication-tooltip': { paths: ['msapplication.tooltip'] },
|
|
95
|
+
'msapplication-notification': { paths: ['msapplication.notification'] },
|
|
96
|
+
'msapplication-badge': { paths: ['msapplication.badge'] },
|
|
97
|
+
'msapplication-tap-highlight': { paths: ['msapplication.tapHighlight'] },
|
|
98
|
+
'msapplication-allowdomainapicalls': { paths: ['msapplication.allowDomainApiCalls'] },
|
|
99
|
+
'msapplication-allowdomainmetatags': { paths: ['msapplication.allowDomainMetaTags'] },
|
|
100
|
+
mssmarttagspreventparsing: {
|
|
101
|
+
paths: ['msapplication.smartTagsPreventParsing', 'legacy.msSmartTagsPreventParsing'],
|
|
102
|
+
},
|
|
103
|
+
ie_rm_off: { paths: ['msapplication.ieRmOff'] },
|
|
104
|
+
// Verification
|
|
105
|
+
'google-site-verification': { paths: ['verification.google'] },
|
|
106
|
+
'msvalidate.01': { paths: ['verification.bing'] },
|
|
107
|
+
'yandex-verification': { paths: ['verification.yandex'] },
|
|
108
|
+
'baidu-site-verification': { paths: ['verification.baidu'] },
|
|
109
|
+
'naver-site-verification': { paths: ['verification.naver'] },
|
|
110
|
+
'p:domain_verify': { paths: ['verification.pinterest'] },
|
|
111
|
+
'facebook-domain-verification': { paths: ['verification.facebook'] },
|
|
112
|
+
alexaverifyid: { paths: ['verification.alexa'] },
|
|
113
|
+
'norton-safeweb-site-verification': { paths: ['verification.norton'] },
|
|
114
|
+
'ahrefs-site-verification': { paths: ['verification.ahrefs'] },
|
|
115
|
+
'detectify-verification': { paths: ['verification.detectify'] },
|
|
116
|
+
'zoho-verification': { paths: ['verification.zoho'] },
|
|
117
|
+
'wot-verification': { paths: ['verification.wot'] },
|
|
118
|
+
'seznam-wmt': { paths: ['verification.seznam'] },
|
|
119
|
+
'shopify-checkout-api-token': { paths: ['verification.shopify'] },
|
|
120
|
+
'brave-rewards-verification': { paths: ['verification.brave'] },
|
|
121
|
+
// Google-specific
|
|
122
|
+
'google-translate-customization': { paths: ['google.translateCustomization'] },
|
|
123
|
+
'google-adsense-account': { paths: ['google.adsenseAccount'] },
|
|
124
|
+
'google-play-app': { paths: ['google.playApp'] },
|
|
125
|
+
// Dublin Core
|
|
126
|
+
'dc.title': { paths: ['dc.title'] },
|
|
127
|
+
'dc.creator': { paths: ['dc.creator'] },
|
|
128
|
+
'dc.subject': { paths: ['dc.subject'] },
|
|
129
|
+
'dc.description': { paths: ['dc.description'] },
|
|
130
|
+
'dc.publisher': { paths: ['dc.publisher'] },
|
|
131
|
+
'dc.contributor': { paths: ['dc.contributor'] },
|
|
132
|
+
'dc.date': { paths: ['dc.date'] },
|
|
133
|
+
'dc.type': { paths: ['dc.type'] },
|
|
134
|
+
'dc.format': { paths: ['dc.format'] },
|
|
135
|
+
'dc.identifier': { paths: ['dc.identifier'] },
|
|
136
|
+
'dc.source': { paths: ['dc.source'] },
|
|
137
|
+
'dc.language': { paths: ['dc.language'] },
|
|
138
|
+
'dc.relation': { paths: ['dc.relation'] },
|
|
139
|
+
'dc.coverage': { paths: ['dc.coverage'] },
|
|
140
|
+
'dc.rights': { paths: ['dc.rights'] },
|
|
141
|
+
// DC Terms
|
|
142
|
+
'dcterms.abstract': { paths: ['dcterms.abstract'] },
|
|
143
|
+
'dcterms.accessrights': { paths: ['dcterms.accessRights'] },
|
|
144
|
+
'dcterms.accrualmethod': { paths: ['dcterms.accrualMethod'] },
|
|
145
|
+
'dcterms.accrualperiodicity': { paths: ['dcterms.accrualPeriodicity'] },
|
|
146
|
+
'dcterms.accrualpolicy': { paths: ['dcterms.accrualPolicy'] },
|
|
147
|
+
'dcterms.alternative': { paths: ['dcterms.alternative'] },
|
|
148
|
+
'dcterms.audience': { paths: ['dcterms.audience'] },
|
|
149
|
+
'dcterms.available': { paths: ['dcterms.available'] },
|
|
150
|
+
'dcterms.bibliographiccitation': { paths: ['dcterms.bibliographicCitation'] },
|
|
151
|
+
'dcterms.conformsto': { paths: ['dcterms.conformsTo'] },
|
|
152
|
+
'dcterms.created': { paths: ['dcterms.created'] },
|
|
153
|
+
'dcterms.dateaccepted': { paths: ['dcterms.dateAccepted'] },
|
|
154
|
+
'dcterms.datecopyrighted': { paths: ['dcterms.dateCopyrighted'] },
|
|
155
|
+
'dcterms.datesubmitted': { paths: ['dcterms.dateSubmitted'] },
|
|
156
|
+
'dcterms.educationlevel': { paths: ['dcterms.educationLevel'] },
|
|
157
|
+
'dcterms.extent': { paths: ['dcterms.extent'] },
|
|
158
|
+
'dcterms.hasformat': { paths: ['dcterms.hasFormat'] },
|
|
159
|
+
'dcterms.haspart': { paths: ['dcterms.hasPart'] },
|
|
160
|
+
'dcterms.hasversion': { paths: ['dcterms.hasVersion'] },
|
|
161
|
+
'dcterms.instructionalmethod': { paths: ['dcterms.instructionalMethod'] },
|
|
162
|
+
'dcterms.isformatof': { paths: ['dcterms.isFormatOf'] },
|
|
163
|
+
'dcterms.ispartof': { paths: ['dcterms.isPartOf'] },
|
|
164
|
+
'dcterms.isreferencedby': { paths: ['dcterms.isReferencedBy'] },
|
|
165
|
+
'dcterms.isreplacedby': { paths: ['dcterms.isReplacedBy'] },
|
|
166
|
+
'dcterms.isrequiredby': { paths: ['dcterms.isRequiredBy'] },
|
|
167
|
+
'dcterms.issued': { paths: ['dcterms.issued'] },
|
|
168
|
+
'dcterms.isversionof': { paths: ['dcterms.isVersionOf'] },
|
|
169
|
+
'dcterms.license': { paths: ['dcterms.license'] },
|
|
170
|
+
'dcterms.mediator': { paths: ['dcterms.mediator'] },
|
|
171
|
+
'dcterms.medium': { paths: ['dcterms.medium'] },
|
|
172
|
+
'dcterms.modified': { paths: ['dcterms.modified'] },
|
|
173
|
+
'dcterms.provenance': { paths: ['dcterms.provenance'] },
|
|
174
|
+
'dcterms.references': { paths: ['dcterms.references'] },
|
|
175
|
+
'dcterms.replaces': { paths: ['dcterms.replaces'] },
|
|
176
|
+
'dcterms.requires': { paths: ['dcterms.requires'] },
|
|
177
|
+
'dcterms.rightsholder': { paths: ['dcterms.rightsHolder'] },
|
|
178
|
+
'dcterms.spatial': { paths: ['dcterms.spatial'] },
|
|
179
|
+
'dcterms.tableofcontents': { paths: ['dcterms.tableOfContents'] },
|
|
180
|
+
'dcterms.temporal': { paths: ['dcterms.temporal'] },
|
|
181
|
+
'dcterms.valid': { paths: ['dcterms.valid'] },
|
|
182
|
+
// Geo
|
|
183
|
+
'geo.region': { paths: ['geo.region'] },
|
|
184
|
+
'geo.placename': { paths: ['geo.placename'] },
|
|
185
|
+
'geo.position': { paths: ['geo.position'] },
|
|
186
|
+
'geo.country': { paths: ['geo.country'] },
|
|
187
|
+
'geo.a1': { paths: ['geo.a1'] },
|
|
188
|
+
'geo.a2': { paths: ['geo.a2'] },
|
|
189
|
+
'geo.a3': { paths: ['geo.a3'] },
|
|
190
|
+
'geo.lmk': { paths: ['geo.lmk'] },
|
|
191
|
+
icbm: { paths: ['icbm'] },
|
|
192
|
+
// Citation
|
|
193
|
+
citation_title: { paths: ['citation.title'] },
|
|
194
|
+
citation_author: { paths: ['citation.author'], multi: true },
|
|
195
|
+
citation_author_email: { paths: ['citation.authorEmail'], multi: true },
|
|
196
|
+
citation_author_institution: { paths: ['citation.authorInstitution'], multi: true },
|
|
197
|
+
citation_publication_date: { paths: ['citation.publicationDate'] },
|
|
198
|
+
citation_date: { paths: ['citation.date'] },
|
|
199
|
+
citation_journal_title: { paths: ['citation.journalTitle'] },
|
|
200
|
+
citation_journal_abbrev: { paths: ['citation.journalAbbrev'] },
|
|
201
|
+
citation_conference_title: { paths: ['citation.conferenceTitle'] },
|
|
202
|
+
citation_publisher: { paths: ['citation.publisher'] },
|
|
203
|
+
citation_volume: { paths: ['citation.volume'] },
|
|
204
|
+
citation_issue: { paths: ['citation.issue'] },
|
|
205
|
+
citation_firstpage: { paths: ['citation.firstpage'] },
|
|
206
|
+
citation_lastpage: { paths: ['citation.lastpage'] },
|
|
207
|
+
citation_doi: { paths: ['citation.doi'] },
|
|
208
|
+
citation_isbn: { paths: ['citation.isbn'] },
|
|
209
|
+
citation_issn: { paths: ['citation.issn'] },
|
|
210
|
+
citation_language: { paths: ['citation.language'] },
|
|
211
|
+
citation_keywords: { paths: ['citation.keywords'] },
|
|
212
|
+
citation_pdf_url: { paths: ['citation.pdfUrl'] },
|
|
213
|
+
citation_fulltext_html_url: { paths: ['citation.fulltextHtmlUrl'] },
|
|
214
|
+
citation_dissertation_institution: { paths: ['citation.dissertationInstitution'] },
|
|
215
|
+
citation_technical_report_institution: {
|
|
216
|
+
paths: ['citation.technicalReportInstitution'],
|
|
217
|
+
},
|
|
218
|
+
citation_technical_report_number: { paths: ['citation.technicalReportNumber'] },
|
|
219
|
+
// CSRF
|
|
220
|
+
'csrf-param': { paths: ['csrfParam'] },
|
|
221
|
+
'csrf-token': { paths: ['csrfToken'] },
|
|
222
|
+
// Misc
|
|
223
|
+
'go-import': { paths: ['goImport'] },
|
|
224
|
+
bitcoin: { paths: ['bitcoin'] },
|
|
225
|
+
'origin-trial': { paths: ['originTrial'], multi: true },
|
|
226
|
+
monetization: { paths: ['monetization'] },
|
|
227
|
+
'payment-pointer': { paths: ['paymentPointer'] },
|
|
228
|
+
'amp-experiments-opt-in': { paths: ['ampExperimentsOptIn', 'amp.experimentsOptIn'] },
|
|
229
|
+
'amp-google-client-id-api': { paths: ['ampGoogleClientIdApi'] },
|
|
230
|
+
// Pinterest
|
|
231
|
+
'pinterest-rich-pin': { paths: ['pinterest.richPin'], transform: 'boolean-true' },
|
|
232
|
+
pinterest: { paths: ['pinterest.nopin'], transform: 'boolean-true' },
|
|
233
|
+
// Legacy
|
|
234
|
+
imagetoolbar: { paths: ['legacy.imagetoolbar'] },
|
|
235
|
+
'page-version': { paths: ['legacy.pageVersion'] },
|
|
236
|
+
'resource-type': { paths: ['legacy.resourceType'] },
|
|
237
|
+
'doc-class': { paths: ['legacy.docClass'] },
|
|
238
|
+
'doc-rights': { paths: ['legacy.docRights'] },
|
|
239
|
+
'doc-type': { paths: ['legacy.docType'] },
|
|
240
|
+
// Mobile-specific
|
|
241
|
+
'mobile-agent': { paths: ['mobile.mobileAgent'] },
|
|
242
|
+
'full-screen': { paths: ['mobile.fullScreen'] },
|
|
243
|
+
browsermode: { paths: ['mobile.browsermode'] },
|
|
244
|
+
'x5-orientation': { paths: ['mobile.x5Orientation'] },
|
|
245
|
+
'x5-fullscreen': { paths: ['mobile.x5Fullscreen'] },
|
|
246
|
+
'x5-page-mode': { paths: ['mobile.x5PageMode'] },
|
|
247
|
+
'screen-orientation': { paths: ['mobile.screenOrientation'] },
|
|
248
|
+
layoutmode: { paths: ['mobile.layoutmode'] },
|
|
249
|
+
imagemode: { paths: ['mobile.imagemode'] },
|
|
250
|
+
// Twitter Cards (treated as name in HTML even though logically property-like)
|
|
251
|
+
'twitter:card': { paths: ['twitter.card'] },
|
|
252
|
+
'twitter:site': { paths: ['twitter.site'] },
|
|
253
|
+
'twitter:site:id': { paths: ['twitter.siteId'] },
|
|
254
|
+
'twitter:creator': { paths: ['twitter.creator'] },
|
|
255
|
+
'twitter:creator:id': { paths: ['twitter.creatorId'] },
|
|
256
|
+
'twitter:title': { paths: ['twitter.title'] },
|
|
257
|
+
'twitter:description': { paths: ['twitter.description'] },
|
|
258
|
+
'twitter:image': { paths: ['twitter.image'] },
|
|
259
|
+
'twitter:image:src': { paths: ['twitter.imageSrc'] },
|
|
260
|
+
'twitter:image:alt': { paths: ['twitter.imageAlt'] },
|
|
261
|
+
'twitter:image:width': { paths: ['twitter.imageWidth'] },
|
|
262
|
+
'twitter:image:height': { paths: ['twitter.imageHeight'] },
|
|
263
|
+
'twitter:url': { paths: ['twitter.url'] },
|
|
264
|
+
'twitter:domain': { paths: ['twitter.domain'] },
|
|
265
|
+
'twitter:player': { paths: ['twitter.player'] },
|
|
266
|
+
'twitter:player:width': { paths: ['twitter.playerWidth'] },
|
|
267
|
+
'twitter:player:height': { paths: ['twitter.playerHeight'] },
|
|
268
|
+
'twitter:player:stream': { paths: ['twitter.playerStream'] },
|
|
269
|
+
'twitter:player:stream:content_type': { paths: ['twitter.playerStreamContentType'] },
|
|
270
|
+
'twitter:app:name:iphone': { paths: ['twitter.appNameIphone'] },
|
|
271
|
+
'twitter:app:id:iphone': { paths: ['twitter.appIdIphone'] },
|
|
272
|
+
'twitter:app:url:iphone': { paths: ['twitter.appUrlIphone'] },
|
|
273
|
+
'twitter:app:name:ipad': { paths: ['twitter.appNameIpad'] },
|
|
274
|
+
'twitter:app:id:ipad': { paths: ['twitter.appIdIpad'] },
|
|
275
|
+
'twitter:app:url:ipad': { paths: ['twitter.appUrlIpad'] },
|
|
276
|
+
'twitter:app:name:googleplay': { paths: ['twitter.appNameGoogleplay'] },
|
|
277
|
+
'twitter:app:id:googleplay': { paths: ['twitter.appIdGoogleplay'] },
|
|
278
|
+
'twitter:app:url:googleplay': { paths: ['twitter.appUrlGoogleplay'] },
|
|
279
|
+
'twitter:app:country': { paths: ['twitter.appCountry'] },
|
|
280
|
+
'twitter:label1': { paths: ['twitter.label1'] },
|
|
281
|
+
'twitter:data1': { paths: ['twitter.data1'] },
|
|
282
|
+
'twitter:label2': { paths: ['twitter.label2'] },
|
|
283
|
+
'twitter:data2': { paths: ['twitter.data2'] },
|
|
284
|
+
'twitter:widgets:csp': { paths: ['twitter.widgetsCsp'] },
|
|
285
|
+
'twitter:widgets:new-embed-design': { paths: ['twitter.widgetsNewEmbedDesign'] },
|
|
286
|
+
'twitter:dnt': { paths: ['twitter.dnt'] },
|
|
287
|
+
// Experimental / vendor
|
|
288
|
+
'darkreader-lock': {
|
|
289
|
+
paths: ['experimental.darkreaderLock'],
|
|
290
|
+
transform: 'boolean-true',
|
|
291
|
+
},
|
|
292
|
+
'turbo-cache-control': { paths: ['experimental.turboCacheControl'] },
|
|
293
|
+
'turbo-visit-control': { paths: ['experimental.turboVisitControl'] },
|
|
294
|
+
'view-transition': { paths: ['experimental.viewTransition'] },
|
|
295
|
+
// Wiki
|
|
296
|
+
resourceloaderdynamicstyles: { paths: ['wiki.resourceLoaderDynamicStyles'] },
|
|
297
|
+
};
|
|
298
|
+
/** `<meta property="X">` → dot-path in `Meta`. */
|
|
299
|
+
export const META_PROPERTY_MAP = {
|
|
300
|
+
'og:title': { paths: ['og.title'] },
|
|
301
|
+
'og:type': { paths: ['og.type'] },
|
|
302
|
+
'og:url': { paths: ['og.url'] },
|
|
303
|
+
'og:site_name': { paths: ['og.siteName'] },
|
|
304
|
+
'og:description': { paths: ['og.description'] },
|
|
305
|
+
'og:determiner': { paths: ['og.determiner'] },
|
|
306
|
+
'og:locale': { paths: ['og.locale'] },
|
|
307
|
+
'og:locale:alternate': { paths: ['og.localeAlternate'], multi: true },
|
|
308
|
+
'og:image': { paths: ['og.image'], multi: true },
|
|
309
|
+
'og:image:url': { paths: ['og.imageUrl'] },
|
|
310
|
+
'og:image:secure_url': { paths: ['og.imageSecureUrl'] },
|
|
311
|
+
'og:image:type': { paths: ['og.imageType'] },
|
|
312
|
+
'og:image:width': { paths: ['og.imageWidth'] },
|
|
313
|
+
'og:image:height': { paths: ['og.imageHeight'] },
|
|
314
|
+
'og:image:alt': { paths: ['og.imageAlt'] },
|
|
315
|
+
'og:video': { paths: ['og.video'], multi: true },
|
|
316
|
+
'og:video:url': { paths: ['og.videoUrl'] },
|
|
317
|
+
'og:video:secure_url': { paths: ['og.videoSecureUrl'] },
|
|
318
|
+
'og:video:type': { paths: ['og.videoType'] },
|
|
319
|
+
'og:video:width': { paths: ['og.videoWidth'] },
|
|
320
|
+
'og:video:height': { paths: ['og.videoHeight'] },
|
|
321
|
+
'og:video:alt': { paths: ['og.videoAlt'] },
|
|
322
|
+
'og:audio': { paths: ['og.audio'], multi: true },
|
|
323
|
+
'og:audio:url': { paths: ['og.audioUrl'] },
|
|
324
|
+
'og:audio:secure_url': { paths: ['og.audioSecureUrl'] },
|
|
325
|
+
'og:audio:type': { paths: ['og.audioType'] },
|
|
326
|
+
'article:published_time': { paths: ['og.article.publishedTime'] },
|
|
327
|
+
'article:modified_time': { paths: ['og.article.modifiedTime'] },
|
|
328
|
+
'article:expiration_time': { paths: ['og.article.expirationTime'] },
|
|
329
|
+
'article:author': { paths: ['og.article.author'], multi: true },
|
|
330
|
+
'article:section': { paths: ['og.article.section'] },
|
|
331
|
+
'article:tag': { paths: ['og.article.tag'], multi: true },
|
|
332
|
+
'article:publisher': { paths: ['og.article.publisher'] },
|
|
333
|
+
'book:author': { paths: ['og.book.author'], multi: true },
|
|
334
|
+
'book:isbn': { paths: ['og.book.isbn'] },
|
|
335
|
+
'book:release_date': { paths: ['og.book.releaseDate'] },
|
|
336
|
+
'book:tag': { paths: ['og.book.tag'], multi: true },
|
|
337
|
+
'profile:first_name': { paths: ['og.profile.firstName'] },
|
|
338
|
+
'profile:last_name': { paths: ['og.profile.lastName'] },
|
|
339
|
+
'profile:username': { paths: ['og.profile.username'] },
|
|
340
|
+
'profile:gender': { paths: ['og.profile.gender'] },
|
|
341
|
+
'music:duration': { paths: ['og.music.duration'] },
|
|
342
|
+
'music:album': { paths: ['og.music.album'], multi: true },
|
|
343
|
+
'music:album:disc': { paths: ['og.music.albumDisc'] },
|
|
344
|
+
'music:album:track': { paths: ['og.music.albumTrack'] },
|
|
345
|
+
'music:musician': { paths: ['og.music.musician'], multi: true },
|
|
346
|
+
'music:song': { paths: ['og.music.song'], multi: true },
|
|
347
|
+
'music:song:disc': { paths: ['og.music.songDisc'] },
|
|
348
|
+
'music:song:track': { paths: ['og.music.songTrack'] },
|
|
349
|
+
'music:release_date': { paths: ['og.music.releaseDate'] },
|
|
350
|
+
'music:creator': { paths: ['og.music.creator'], multi: true },
|
|
351
|
+
'video:actor': { paths: ['og.videoNs.actor'], multi: true },
|
|
352
|
+
'video:actor:role': { paths: ['og.videoNs.actorRole'] },
|
|
353
|
+
'video:director': { paths: ['og.videoNs.director'], multi: true },
|
|
354
|
+
'video:writer': { paths: ['og.videoNs.writer'], multi: true },
|
|
355
|
+
'video:duration': { paths: ['og.videoNs.duration'] },
|
|
356
|
+
'video:release_date': { paths: ['og.videoNs.releaseDate'] },
|
|
357
|
+
'video:tag': { paths: ['og.videoNs.tag'], multi: true },
|
|
358
|
+
'video:series': { paths: ['og.videoNs.series'] },
|
|
359
|
+
'fb:app_id': { paths: ['fb.appId'] },
|
|
360
|
+
'fb:admins': { paths: ['fb.admins'], multi: true },
|
|
361
|
+
'fb:pages': { paths: ['fb.pages'], multi: true },
|
|
362
|
+
'fediverse:creator': { paths: ['fediverse.creator'] },
|
|
363
|
+
};
|
|
364
|
+
/** `<meta http-equiv="X">` → dot-path in `Meta.httpEquiv`. */
|
|
365
|
+
export const HTTP_EQUIV_MAP = {
|
|
366
|
+
'content-type': { paths: ['httpEquiv.contentType'] },
|
|
367
|
+
'content-language': { paths: ['httpEquiv.contentLanguage'] },
|
|
368
|
+
'default-style': { paths: ['httpEquiv.defaultStyle'] },
|
|
369
|
+
refresh: { paths: ['httpEquiv.refresh'] },
|
|
370
|
+
'x-ua-compatible': { paths: ['httpEquiv.xUaCompatible'] },
|
|
371
|
+
'content-security-policy': { paths: ['httpEquiv.contentSecurityPolicy'] },
|
|
372
|
+
'content-security-policy-report-only': {
|
|
373
|
+
paths: ['httpEquiv.contentSecurityPolicyReportOnly'],
|
|
374
|
+
},
|
|
375
|
+
'set-cookie': { paths: ['httpEquiv.setCookie'] },
|
|
376
|
+
pragma: { paths: ['httpEquiv.pragma'] },
|
|
377
|
+
'cache-control': { paths: ['httpEquiv.cacheControl'] },
|
|
378
|
+
expires: { paths: ['httpEquiv.expires'] },
|
|
379
|
+
'accept-ch': { paths: ['httpEquiv.acceptCh'] },
|
|
380
|
+
'delegate-ch': { paths: ['httpEquiv.delegateCh'] },
|
|
381
|
+
'permissions-policy': {
|
|
382
|
+
paths: ['httpEquiv.permissionsPolicy', 'httpEquiv.permissionsPolicyValue'],
|
|
383
|
+
},
|
|
384
|
+
'origin-trial': {
|
|
385
|
+
paths: ['httpEquiv.originTrial', 'httpEquiv.originTrialToken'],
|
|
386
|
+
multi: true,
|
|
387
|
+
},
|
|
388
|
+
'x-dns-prefetch-control': { paths: ['httpEquiv.xDnsPrefetchControl'] },
|
|
389
|
+
'window-target': { paths: ['httpEquiv.windowTarget'] },
|
|
390
|
+
imagetoolbar: { paths: ['httpEquiv.imagetoolbar'] },
|
|
391
|
+
cleartype: { paths: ['httpEquiv.cleartype', 'msapplication.cleartype'] },
|
|
392
|
+
};
|
|
393
|
+
/** `<meta itemprop="X">` → dot-path in `Meta.itemprop`. */
|
|
394
|
+
export const ITEMPROP_MAP = {
|
|
395
|
+
name: { paths: ['itemprop.name'] },
|
|
396
|
+
description: { paths: ['itemprop.description'] },
|
|
397
|
+
image: { paths: ['itemprop.image'] },
|
|
398
|
+
};
|
|
399
|
+
/** `<link rel="X">` → dot-path in `Meta.link`. */
|
|
400
|
+
export const LINK_REL_MAP = {
|
|
401
|
+
canonical: { path: 'canonical', cardinality: 'href-only' },
|
|
402
|
+
alternate: { path: 'alternateHreflang', cardinality: 'array' },
|
|
403
|
+
amphtml: { path: 'amphtml', cardinality: 'href-only' },
|
|
404
|
+
author: { path: 'author', cardinality: 'href-only' },
|
|
405
|
+
bookmark: { path: 'bookmark', cardinality: 'href-only' },
|
|
406
|
+
help: { path: 'help', cardinality: 'href-only' },
|
|
407
|
+
license: { path: 'license', cardinality: 'href-only' },
|
|
408
|
+
next: { path: 'next', cardinality: 'href-only' },
|
|
409
|
+
prev: { path: 'prev', cardinality: 'href-only' },
|
|
410
|
+
previous: { path: 'previous', cardinality: 'href-only' },
|
|
411
|
+
first: { path: 'first', cardinality: 'href-only' },
|
|
412
|
+
last: { path: 'last', cardinality: 'href-only' },
|
|
413
|
+
up: { path: 'up', cardinality: 'href-only' },
|
|
414
|
+
index: { path: 'index', cardinality: 'href-only' },
|
|
415
|
+
contents: { path: 'contents', cardinality: 'href-only' },
|
|
416
|
+
start: { path: 'start', cardinality: 'href-only' },
|
|
417
|
+
search: { path: 'search', cardinality: 'single' },
|
|
418
|
+
tag: { path: 'tag', cardinality: 'array' },
|
|
419
|
+
archives: { path: 'archives', cardinality: 'array' },
|
|
420
|
+
publisher: { path: 'publisher', cardinality: 'href-only' },
|
|
421
|
+
'privacy-policy': { path: 'privacyPolicy', cardinality: 'href-only' },
|
|
422
|
+
'terms-of-service': { path: 'termsOfService', cardinality: 'href-only' },
|
|
423
|
+
copyright: { path: 'copyright', cardinality: 'href-only' },
|
|
424
|
+
appendix: { path: 'appendix', cardinality: 'array' },
|
|
425
|
+
chapter: { path: 'chapter', cardinality: 'array' },
|
|
426
|
+
section: { path: 'section', cardinality: 'array' },
|
|
427
|
+
subsection: { path: 'subsection', cardinality: 'array' },
|
|
428
|
+
glossary: { path: 'glossary', cardinality: 'href-only' },
|
|
429
|
+
profile: { path: 'profile', cardinality: 'array' },
|
|
430
|
+
edituri: { path: 'editUri', cardinality: 'href-only' },
|
|
431
|
+
pingback: { path: 'pingback', cardinality: 'href-only' },
|
|
432
|
+
webmention: { path: 'webmention', cardinality: 'href-only' },
|
|
433
|
+
micropub: { path: 'micropub', cardinality: 'href-only' },
|
|
434
|
+
microsub: { path: 'microsub', cardinality: 'href-only' },
|
|
435
|
+
me: { path: 'me', cardinality: 'array' },
|
|
436
|
+
authorization_endpoint: { path: 'authorizationEndpoint', cardinality: 'href-only' },
|
|
437
|
+
token_endpoint: { path: 'tokenEndpoint', cardinality: 'href-only' },
|
|
438
|
+
'indieauth-metadata': { path: 'indieauthMetadata', cardinality: 'href-only' },
|
|
439
|
+
'openid.server': { path: 'openidServer', cardinality: 'href-only' },
|
|
440
|
+
'openid.delegate': { path: 'openidDelegate', cardinality: 'href-only' },
|
|
441
|
+
'openid2.provider': { path: 'openid2Provider', cardinality: 'href-only' },
|
|
442
|
+
'openid2.local_id': { path: 'openid2LocalId', cardinality: 'href-only' },
|
|
443
|
+
hub: { path: 'hub', cardinality: 'href-only' },
|
|
444
|
+
self: { path: 'self', cardinality: 'href-only' },
|
|
445
|
+
payment: { path: 'payment', cardinality: 'href-only' },
|
|
446
|
+
enclosure: { path: 'enclosure', cardinality: 'array' },
|
|
447
|
+
external: { path: 'external', cardinality: 'array' },
|
|
448
|
+
nofollow: { path: 'nofollow', cardinality: 'array' },
|
|
449
|
+
sponsored: { path: 'sponsored', cardinality: 'array' },
|
|
450
|
+
ugc: { path: 'ugc', cardinality: 'array' },
|
|
451
|
+
noopener: { path: 'noopener', cardinality: 'array' },
|
|
452
|
+
noreferrer: { path: 'noreferrer', cardinality: 'array' },
|
|
453
|
+
opener: { path: 'opener', cardinality: 'array' },
|
|
454
|
+
image_src: { path: 'imageSrc', cardinality: 'href-only' },
|
|
455
|
+
shortlink: { path: 'shortlink', cardinality: 'href-only' },
|
|
456
|
+
'dns-prefetch': { path: 'dnsPrefetch', cardinality: 'array' },
|
|
457
|
+
preconnect: { path: 'preconnect', cardinality: 'array' },
|
|
458
|
+
prefetch: { path: 'prefetch', cardinality: 'array' },
|
|
459
|
+
prerender: { path: 'prerender', cardinality: 'array' },
|
|
460
|
+
preload: { path: 'preload', cardinality: 'array' },
|
|
461
|
+
modulepreload: { path: 'modulepreload', cardinality: 'array' },
|
|
462
|
+
expect: { path: 'expect', cardinality: 'array' },
|
|
463
|
+
stylesheet: { path: 'stylesheet', cardinality: 'array' },
|
|
464
|
+
manifest: { path: 'manifest', cardinality: 'href-only' },
|
|
465
|
+
serviceworker: { path: 'serviceworker', cardinality: 'href-only' },
|
|
466
|
+
dpp: { path: 'dpp', cardinality: 'href-only' },
|
|
467
|
+
gbfs: { path: 'gbfs', cardinality: 'href-only' },
|
|
468
|
+
syndication: { path: 'syndication', cardinality: 'array' },
|
|
469
|
+
'api-catalog': { path: 'apiCatalog', cardinality: 'href-only' },
|
|
470
|
+
memento: { path: 'memento', cardinality: 'href-only' },
|
|
471
|
+
timegate: { path: 'timegate', cardinality: 'href-only' },
|
|
472
|
+
timemap: { path: 'timemap', cardinality: 'href-only' },
|
|
473
|
+
'version-history': { path: 'versionHistory', cardinality: 'href-only' },
|
|
474
|
+
'latest-version': { path: 'latestVersion', cardinality: 'href-only' },
|
|
475
|
+
'predecessor-version': { path: 'predecessorVersion', cardinality: 'href-only' },
|
|
476
|
+
'successor-version': { path: 'successorVersion', cardinality: 'href-only' },
|
|
477
|
+
'working-copy': { path: 'workingCopy', cardinality: 'href-only' },
|
|
478
|
+
'working-copy-of': { path: 'workingCopyOf', cardinality: 'href-only' },
|
|
479
|
+
describedby: { path: 'describedby', cardinality: 'href-only' },
|
|
480
|
+
describes: { path: 'describes', cardinality: 'href-only' },
|
|
481
|
+
via: { path: 'via', cardinality: 'href-only' },
|
|
482
|
+
related: { path: 'related', cardinality: 'array' },
|
|
483
|
+
'cite-as': { path: 'citeAs', cardinality: 'href-only' },
|
|
484
|
+
disclosure: { path: 'disclosure', cardinality: 'href-only' },
|
|
485
|
+
status: { path: 'status', cardinality: 'href-only' },
|
|
486
|
+
sunset: { path: 'sunset', cardinality: 'href-only' },
|
|
487
|
+
deprecation: { path: 'deprecation', cardinality: 'href-only' },
|
|
488
|
+
lrdd: { path: 'lrdd', cardinality: 'href-only' },
|
|
489
|
+
hosts: { path: 'hosts', cardinality: 'href-only' },
|
|
490
|
+
service: { path: 'service', cardinality: 'href-only' },
|
|
491
|
+
'service-desc': { path: 'serviceDesc', cardinality: 'href-only' },
|
|
492
|
+
'service-doc': { path: 'serviceDoc', cardinality: 'href-only' },
|
|
493
|
+
'service-meta': { path: 'serviceMeta', cardinality: 'href-only' },
|
|
494
|
+
'c2pa-manifest': { path: 'c2paManifest', cardinality: 'href-only' },
|
|
495
|
+
'compression-dictionary': { path: 'compressionDictionary', cardinality: 'href-only' },
|
|
496
|
+
icon: { path: 'icon', cardinality: 'single' },
|
|
497
|
+
'shortcut icon': { path: 'shortcutIcon', cardinality: 'href-only' },
|
|
498
|
+
'apple-touch-icon': { path: 'appleTouchIcon', cardinality: 'single' },
|
|
499
|
+
'apple-touch-icon-precomposed': {
|
|
500
|
+
path: 'appleTouchIconPrecomposed',
|
|
501
|
+
cardinality: 'array',
|
|
502
|
+
},
|
|
503
|
+
'apple-touch-startup-image': { path: 'appleTouchStartupImage', cardinality: 'array' },
|
|
504
|
+
'mask-icon': { path: 'maskIcon', cardinality: 'single' },
|
|
505
|
+
'fluid-icon': { path: 'fluidIcon', cardinality: 'single' },
|
|
506
|
+
'security.txt': { path: 'securityTxt', cardinality: 'href-only' },
|
|
507
|
+
};
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Value normalizers used by `classify()` to turn raw `content` strings into
|
|
3
|
+
* structured objects (viewport, robots, format-detection, etc.).
|
|
4
|
+
*
|
|
5
|
+
* Each parser is a pure function that takes the raw `content` string and
|
|
6
|
+
* returns a normalized structure. They never throw; on unrecognizable input
|
|
7
|
+
* they fall back to keeping the `raw` field only.
|
|
8
|
+
* @module
|
|
9
|
+
*/
|
|
10
|
+
import type { KeyTransform } from './keys.js';
|
|
11
|
+
import type { FormatDetectionMeta, HttpEquivRefresh, JsonLdEntry, ReferrerMeta, RobotsMeta, ViewportMeta } from './types.js';
|
|
12
|
+
/**
|
|
13
|
+
* Parses `<meta name="viewport">` content into a structured `ViewportMeta`.
|
|
14
|
+
* @param raw
|
|
15
|
+
* @example parseViewport('width=device-width, initial-scale=1.0')
|
|
16
|
+
* → { raw: '...', width: 'device-width', initialScale: 1 }
|
|
17
|
+
*/
|
|
18
|
+
export declare function parseViewport(raw: string): ViewportMeta;
|
|
19
|
+
/**
|
|
20
|
+
* Parses `<meta name="robots">` content into a structured `RobotsMeta`.
|
|
21
|
+
* @param raw
|
|
22
|
+
* @example parseRobots('noindex, max-snippet:50, unavailable_after:2026-01-01')
|
|
23
|
+
* → { raw: '...', noindex: true, maxSnippet: 50, unavailableAfter: '2026-01-01' }
|
|
24
|
+
*/
|
|
25
|
+
export declare function parseRobots(raw: string): RobotsMeta;
|
|
26
|
+
/**
|
|
27
|
+
* Parses `<meta name="referrer">` content into a structured `ReferrerMeta`.
|
|
28
|
+
* @param raw
|
|
29
|
+
*/
|
|
30
|
+
export declare function parseReferrer(raw: string): ReferrerMeta;
|
|
31
|
+
/**
|
|
32
|
+
* Parses `<meta name="format-detection">` content (e.g. `'telephone=no, address=no'`).
|
|
33
|
+
* @param raw
|
|
34
|
+
*/
|
|
35
|
+
export declare function parseFormatDetection(raw: string): FormatDetectionMeta;
|
|
36
|
+
/**
|
|
37
|
+
* Parses `<meta http-equiv="refresh">` content (e.g. `'5; url=https://...'`).
|
|
38
|
+
* @param raw
|
|
39
|
+
*/
|
|
40
|
+
export declare function parseRefresh(raw: string): HttpEquivRefresh;
|
|
41
|
+
/**
|
|
42
|
+
* Parses a `<script type="application/ld+json">` (or speculationrules) body
|
|
43
|
+
* into a {@link JsonLdEntry}. On parse failure, the entry preserves the `raw`
|
|
44
|
+
* text and records the error message in `parseError`.
|
|
45
|
+
* @param content
|
|
46
|
+
*/
|
|
47
|
+
export declare function parseJsonLd(content: string): JsonLdEntry;
|
|
48
|
+
/**
|
|
49
|
+
* Normalizes a string value according to a {@link KeyTransform}.
|
|
50
|
+
*
|
|
51
|
+
* `'boolean-yes'`: `'yes'` → `true`, `'no'` → `false`, anything else → raw string
|
|
52
|
+
* `'boolean-on'`: `'on'`/`'true'`/`'1'` → `true`, `'off'`/`'false'`/`'0'` → `false`, else raw
|
|
53
|
+
* `'boolean-true'`: `'true'` → `true`, `'false'` → `false`, else raw
|
|
54
|
+
* `'number'`: parsed via `Number.parseFloat`, falls back to raw on NaN
|
|
55
|
+
* `'string'` (default): returns the value unchanged
|
|
56
|
+
* @param value
|
|
57
|
+
* @param transform
|
|
58
|
+
*/
|
|
59
|
+
export declare function normalizeValue(value: string, transform: KeyTransform | undefined): string | number | boolean;
|
|
60
|
+
/**
|
|
61
|
+
* JSON-LD / speculationrules content size caps (bytes). Above these sizes the
|
|
62
|
+
* content is truncated and a `truncated` marker is emitted via `parseError`.
|
|
63
|
+
*/
|
|
64
|
+
export declare const JSON_LD_PER_ENTRY_LIMIT = 200000;
|
|
65
|
+
export declare const JSON_LD_TOTAL_LIMIT = 1000000;
|
|
66
|
+
/**
|
|
67
|
+
* Caps a single JSON-LD entry's raw content to {@link JSON_LD_PER_ENTRY_LIMIT}.
|
|
68
|
+
* Returns the (possibly truncated) entry and a `truncated` flag.
|
|
69
|
+
* @param content
|
|
70
|
+
*/
|
|
71
|
+
export declare function capJsonLdContent(content: string): {
|
|
72
|
+
content: string;
|
|
73
|
+
truncated: boolean;
|
|
74
|
+
};
|