magpie-html 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +50 -22
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +156 -108
- package/dist/index.d.ts +156 -108
- package/dist/index.js +50 -22
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -3101,6 +3101,12 @@ function parseHTML(html, baseUrl) {
|
|
|
3101
3101
|
});
|
|
3102
3102
|
return document;
|
|
3103
3103
|
}
|
|
3104
|
+
function ensureDocument(input, baseUrl) {
|
|
3105
|
+
if (typeof input === "string") {
|
|
3106
|
+
return parseHTML(input, baseUrl);
|
|
3107
|
+
}
|
|
3108
|
+
return input;
|
|
3109
|
+
}
|
|
3104
3110
|
|
|
3105
3111
|
// src/utils/meta-helpers.ts
|
|
3106
3112
|
function getMetaContent(doc, name) {
|
|
@@ -3128,7 +3134,8 @@ function getMetaHttpEquiv(doc, httpEquiv) {
|
|
|
3128
3134
|
}
|
|
3129
3135
|
|
|
3130
3136
|
// src/metadata/opengraph/extract.ts
|
|
3131
|
-
function extractOpenGraph(
|
|
3137
|
+
function extractOpenGraph(input) {
|
|
3138
|
+
const doc = ensureDocument(input);
|
|
3132
3139
|
const metadata = {};
|
|
3133
3140
|
metadata.title = getMetaProperty(doc, "og:title");
|
|
3134
3141
|
metadata.type = getMetaProperty(doc, "og:type");
|
|
@@ -3341,7 +3348,8 @@ function matchesAnyType(obj, targetTypes) {
|
|
|
3341
3348
|
}
|
|
3342
3349
|
|
|
3343
3350
|
// src/metadata/schema-org/extract.ts
|
|
3344
|
-
function extractSchemaOrg(
|
|
3351
|
+
function extractSchemaOrg(input) {
|
|
3352
|
+
const doc = ensureDocument(input);
|
|
3345
3353
|
const metadata = {
|
|
3346
3354
|
jsonLd: []
|
|
3347
3355
|
};
|
|
@@ -3418,7 +3426,8 @@ function organizeByType(metadata) {
|
|
|
3418
3426
|
}
|
|
3419
3427
|
|
|
3420
3428
|
// src/metadata/seo/extract.ts
|
|
3421
|
-
function extractSEO(
|
|
3429
|
+
function extractSEO(input) {
|
|
3430
|
+
const doc = ensureDocument(input);
|
|
3422
3431
|
const metadata = {};
|
|
3423
3432
|
const titleElement = doc.querySelector("title");
|
|
3424
3433
|
if (titleElement?.textContent) {
|
|
@@ -3450,7 +3459,8 @@ function extractSEO(doc) {
|
|
|
3450
3459
|
}
|
|
3451
3460
|
|
|
3452
3461
|
// src/metadata/twitter-card/extract.ts
|
|
3453
|
-
function extractTwitterCard(
|
|
3462
|
+
function extractTwitterCard(input) {
|
|
3463
|
+
const doc = ensureDocument(input);
|
|
3454
3464
|
const metadata = {};
|
|
3455
3465
|
metadata.card = getMetaContent(doc, "twitter:card");
|
|
3456
3466
|
metadata.site = getMetaContent(doc, "twitter:site");
|
|
@@ -3607,7 +3617,8 @@ function getAllLinksByPrefix(doc, relPrefix) {
|
|
|
3607
3617
|
}
|
|
3608
3618
|
|
|
3609
3619
|
// src/metadata/icons/extract.ts
|
|
3610
|
-
function extractIcons(
|
|
3620
|
+
function extractIcons(input) {
|
|
3621
|
+
const doc = ensureDocument(input);
|
|
3611
3622
|
const metadata = {};
|
|
3612
3623
|
const iconLinks = getAllLinksByRels(doc, ["icon", "shortcut icon"]);
|
|
3613
3624
|
for (const link of iconLinks) {
|
|
@@ -3788,7 +3799,8 @@ function parseSizeString(sizeStr) {
|
|
|
3788
3799
|
}
|
|
3789
3800
|
|
|
3790
3801
|
// src/metadata/language/extract.ts
|
|
3791
|
-
function extractLanguage(
|
|
3802
|
+
function extractLanguage(input) {
|
|
3803
|
+
const doc = ensureDocument(input);
|
|
3792
3804
|
const metadata = {};
|
|
3793
3805
|
const htmlElement = doc.querySelector("html");
|
|
3794
3806
|
if (htmlElement) {
|
|
@@ -3840,7 +3852,8 @@ function extractBestLanguage(doc) {
|
|
|
3840
3852
|
}
|
|
3841
3853
|
|
|
3842
3854
|
// src/metadata/links/extract.ts
|
|
3843
|
-
function extractLinks3(
|
|
3855
|
+
function extractLinks3(input, baseUrl, options = {}) {
|
|
3856
|
+
const doc = ensureDocument(input);
|
|
3844
3857
|
const opts = normalizeOptions3(options);
|
|
3845
3858
|
const effectiveBaseUrl = getEffectiveBaseUrl(doc, baseUrl);
|
|
3846
3859
|
const baseOrigin = effectiveBaseUrl ? getOrigin(effectiveBaseUrl) : null;
|
|
@@ -4171,7 +4184,8 @@ function getStringProperty3(obj, prop) {
|
|
|
4171
4184
|
}
|
|
4172
4185
|
|
|
4173
4186
|
// src/metadata/canonical/extract.ts
|
|
4174
|
-
function extractCanonical(
|
|
4187
|
+
function extractCanonical(input) {
|
|
4188
|
+
const doc = ensureDocument(input);
|
|
4175
4189
|
const metadata = {};
|
|
4176
4190
|
metadata.canonical = getLinkHref(doc, "canonical");
|
|
4177
4191
|
const alternateLinks = getAllLinks(doc, "alternate");
|
|
@@ -4407,7 +4421,8 @@ function generateFeedSuggestions(documentUrl) {
|
|
|
4407
4421
|
}
|
|
4408
4422
|
|
|
4409
4423
|
// src/metadata/feed-discovery/extract.ts
|
|
4410
|
-
function extractFeedDiscovery(
|
|
4424
|
+
function extractFeedDiscovery(input, documentUrl) {
|
|
4425
|
+
const doc = ensureDocument(input);
|
|
4411
4426
|
const metadata = {
|
|
4412
4427
|
feeds: []
|
|
4413
4428
|
};
|
|
@@ -4584,7 +4599,8 @@ async function gatherWebsite(url) {
|
|
|
4584
4599
|
}
|
|
4585
4600
|
|
|
4586
4601
|
// src/metadata/analytics/extract.ts
|
|
4587
|
-
function extractAnalytics(
|
|
4602
|
+
function extractAnalytics(input) {
|
|
4603
|
+
const doc = ensureDocument(input);
|
|
4588
4604
|
const metadata = {};
|
|
4589
4605
|
const scripts = doc.querySelectorAll("script");
|
|
4590
4606
|
const googleAnalytics = /* @__PURE__ */ new Set();
|
|
@@ -4676,7 +4692,8 @@ function extractAnalytics(doc) {
|
|
|
4676
4692
|
}
|
|
4677
4693
|
|
|
4678
4694
|
// src/metadata/assets/extract.ts
|
|
4679
|
-
function extractAssets(
|
|
4695
|
+
function extractAssets(input, baseUrl) {
|
|
4696
|
+
const doc = ensureDocument(input);
|
|
4680
4697
|
const metadata = {};
|
|
4681
4698
|
const effectiveBaseUrl = getEffectiveBaseUrl2(doc, baseUrl);
|
|
4682
4699
|
const images = extractImages3(doc, effectiveBaseUrl);
|
|
@@ -5003,7 +5020,8 @@ function extractConnectionHints(doc, baseUrl) {
|
|
|
5003
5020
|
}
|
|
5004
5021
|
|
|
5005
5022
|
// src/metadata/copyright/extract.ts
|
|
5006
|
-
function extractCopyright(
|
|
5023
|
+
function extractCopyright(input) {
|
|
5024
|
+
const doc = ensureDocument(input);
|
|
5007
5025
|
const metadata = {};
|
|
5008
5026
|
metadata.copyright = getMetaContent(doc, "copyright");
|
|
5009
5027
|
metadata.license = getLinkHref(doc, "license");
|
|
@@ -5039,7 +5057,8 @@ function parseCopyright(copyrightString) {
|
|
|
5039
5057
|
}
|
|
5040
5058
|
|
|
5041
5059
|
// src/metadata/dublin-core/extract.ts
|
|
5042
|
-
function extractDublinCore(
|
|
5060
|
+
function extractDublinCore(input) {
|
|
5061
|
+
const doc = ensureDocument(input);
|
|
5043
5062
|
const metadata = {};
|
|
5044
5063
|
metadata.title = getMetaContent(doc, "DC.title") || getMetaContent(doc, "dcterms.title");
|
|
5045
5064
|
metadata.description = getMetaContent(doc, "DC.description") || getMetaContent(doc, "dcterms.description");
|
|
@@ -5080,7 +5099,8 @@ function extractMultiValue(doc, field) {
|
|
|
5080
5099
|
}
|
|
5081
5100
|
|
|
5082
5101
|
// src/metadata/geo/extract.ts
|
|
5083
|
-
function extractGeo(
|
|
5102
|
+
function extractGeo(input) {
|
|
5103
|
+
const doc = ensureDocument(input);
|
|
5084
5104
|
const metadata = {};
|
|
5085
5105
|
const geoPosition = getMetaContent(doc, "geo.position");
|
|
5086
5106
|
if (geoPosition) {
|
|
@@ -5137,7 +5157,8 @@ function parseICBM(icbm) {
|
|
|
5137
5157
|
}
|
|
5138
5158
|
|
|
5139
5159
|
// src/metadata/monetization/extract.ts
|
|
5140
|
-
function extractMonetization(
|
|
5160
|
+
function extractMonetization(input) {
|
|
5161
|
+
const doc = ensureDocument(input);
|
|
5141
5162
|
const metadata = {};
|
|
5142
5163
|
metadata.webMonetization = getMetaContent(doc, "monetization");
|
|
5143
5164
|
metadata.paypalVerification = getMetaContent(doc, "paypal-site-verification");
|
|
@@ -5151,7 +5172,8 @@ function extractMonetization(doc) {
|
|
|
5151
5172
|
}
|
|
5152
5173
|
|
|
5153
5174
|
// src/metadata/news/extract.ts
|
|
5154
|
-
function extractNews2(
|
|
5175
|
+
function extractNews2(input) {
|
|
5176
|
+
const doc = ensureDocument(input);
|
|
5155
5177
|
const metadata = {};
|
|
5156
5178
|
const newsKeywords = getMetaContent(doc, "news_keywords");
|
|
5157
5179
|
if (newsKeywords) {
|
|
@@ -5169,7 +5191,8 @@ function extractNews2(doc) {
|
|
|
5169
5191
|
}
|
|
5170
5192
|
|
|
5171
5193
|
// src/metadata/pagination/extract.ts
|
|
5172
|
-
function extractPagination(
|
|
5194
|
+
function extractPagination(input) {
|
|
5195
|
+
const doc = ensureDocument(input);
|
|
5173
5196
|
const metadata = {};
|
|
5174
5197
|
metadata.prev = getLinkHref(doc, "prev") || getLinkHref(doc, "previous");
|
|
5175
5198
|
metadata.next = getLinkHref(doc, "next");
|
|
@@ -5268,7 +5291,8 @@ function parseKeyValueDirective(key, value, result) {
|
|
|
5268
5291
|
}
|
|
5269
5292
|
|
|
5270
5293
|
// src/metadata/robots/extract.ts
|
|
5271
|
-
function extractRobots(
|
|
5294
|
+
function extractRobots(input) {
|
|
5295
|
+
const doc = ensureDocument(input);
|
|
5272
5296
|
const metadata = {};
|
|
5273
5297
|
const robotsContent = getMetaContent(doc, "robots");
|
|
5274
5298
|
if (robotsContent) {
|
|
@@ -5302,7 +5326,8 @@ function extractRobots(doc) {
|
|
|
5302
5326
|
}
|
|
5303
5327
|
|
|
5304
5328
|
// src/metadata/security/extract.ts
|
|
5305
|
-
function extractSecurity(
|
|
5329
|
+
function extractSecurity(input) {
|
|
5330
|
+
const doc = ensureDocument(input);
|
|
5306
5331
|
const metadata = {};
|
|
5307
5332
|
metadata.referrerPolicy = getMetaContent(doc, "referrer");
|
|
5308
5333
|
metadata.contentSecurityPolicy = getMetaHttpEquiv(doc, "Content-Security-Policy");
|
|
@@ -5353,7 +5378,8 @@ function generateSitemapSuggestions(documentUrl) {
|
|
|
5353
5378
|
}
|
|
5354
5379
|
|
|
5355
5380
|
// src/metadata/sitemap-discovery/extract.ts
|
|
5356
|
-
function extractSitemapDiscovery(
|
|
5381
|
+
function extractSitemapDiscovery(input, documentUrl) {
|
|
5382
|
+
const doc = ensureDocument(input);
|
|
5357
5383
|
const metadata = {
|
|
5358
5384
|
sitemaps: []
|
|
5359
5385
|
};
|
|
@@ -5366,7 +5392,8 @@ function extractSitemapDiscovery(doc, documentUrl) {
|
|
|
5366
5392
|
}
|
|
5367
5393
|
|
|
5368
5394
|
// src/metadata/social-profiles/extract.ts
|
|
5369
|
-
function extractSocialProfiles(
|
|
5395
|
+
function extractSocialProfiles(input) {
|
|
5396
|
+
const doc = ensureDocument(input);
|
|
5370
5397
|
const metadata = {};
|
|
5371
5398
|
metadata.twitter = getMetaContent(doc, "twitter:site") || getMetaContent(doc, "twitter:creator") || extractFromProperty(doc, "twitter:site") || extractFromProperty(doc, "twitter:creator");
|
|
5372
5399
|
if (metadata.twitter) {
|
|
@@ -5519,7 +5546,8 @@ function categorizeSchemaProfile(url, metadata) {
|
|
|
5519
5546
|
}
|
|
5520
5547
|
|
|
5521
5548
|
// src/metadata/verification/extract.ts
|
|
5522
|
-
function extractVerification(
|
|
5549
|
+
function extractVerification(input) {
|
|
5550
|
+
const doc = ensureDocument(input);
|
|
5523
5551
|
const metadata = {};
|
|
5524
5552
|
metadata.googleSiteVerification = getMetaContent(doc, "google-site-verification");
|
|
5525
5553
|
metadata.msvalidate = getMetaContent(doc, "msvalidate.01");
|