magpie-html 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +50 -22
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +156 -108
- package/dist/index.d.ts +156 -108
- package/dist/index.js +50 -22
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -3095,6 +3095,12 @@ function parseHTML(html, baseUrl) {
|
|
|
3095
3095
|
});
|
|
3096
3096
|
return document;
|
|
3097
3097
|
}
|
|
3098
|
+
function ensureDocument(input, baseUrl) {
|
|
3099
|
+
if (typeof input === "string") {
|
|
3100
|
+
return parseHTML(input, baseUrl);
|
|
3101
|
+
}
|
|
3102
|
+
return input;
|
|
3103
|
+
}
|
|
3098
3104
|
|
|
3099
3105
|
// src/utils/meta-helpers.ts
|
|
3100
3106
|
function getMetaContent(doc, name) {
|
|
@@ -3122,7 +3128,8 @@ function getMetaHttpEquiv(doc, httpEquiv) {
|
|
|
3122
3128
|
}
|
|
3123
3129
|
|
|
3124
3130
|
// src/metadata/opengraph/extract.ts
|
|
3125
|
-
function extractOpenGraph(
|
|
3131
|
+
function extractOpenGraph(input) {
|
|
3132
|
+
const doc = ensureDocument(input);
|
|
3126
3133
|
const metadata = {};
|
|
3127
3134
|
metadata.title = getMetaProperty(doc, "og:title");
|
|
3128
3135
|
metadata.type = getMetaProperty(doc, "og:type");
|
|
@@ -3335,7 +3342,8 @@ function matchesAnyType(obj, targetTypes) {
|
|
|
3335
3342
|
}
|
|
3336
3343
|
|
|
3337
3344
|
// src/metadata/schema-org/extract.ts
|
|
3338
|
-
function extractSchemaOrg(
|
|
3345
|
+
function extractSchemaOrg(input) {
|
|
3346
|
+
const doc = ensureDocument(input);
|
|
3339
3347
|
const metadata = {
|
|
3340
3348
|
jsonLd: []
|
|
3341
3349
|
};
|
|
@@ -3412,7 +3420,8 @@ function organizeByType(metadata) {
|
|
|
3412
3420
|
}
|
|
3413
3421
|
|
|
3414
3422
|
// src/metadata/seo/extract.ts
|
|
3415
|
-
function extractSEO(
|
|
3423
|
+
function extractSEO(input) {
|
|
3424
|
+
const doc = ensureDocument(input);
|
|
3416
3425
|
const metadata = {};
|
|
3417
3426
|
const titleElement = doc.querySelector("title");
|
|
3418
3427
|
if (titleElement?.textContent) {
|
|
@@ -3444,7 +3453,8 @@ function extractSEO(doc) {
|
|
|
3444
3453
|
}
|
|
3445
3454
|
|
|
3446
3455
|
// src/metadata/twitter-card/extract.ts
|
|
3447
|
-
function extractTwitterCard(
|
|
3456
|
+
function extractTwitterCard(input) {
|
|
3457
|
+
const doc = ensureDocument(input);
|
|
3448
3458
|
const metadata = {};
|
|
3449
3459
|
metadata.card = getMetaContent(doc, "twitter:card");
|
|
3450
3460
|
metadata.site = getMetaContent(doc, "twitter:site");
|
|
@@ -3601,7 +3611,8 @@ function getAllLinksByPrefix(doc, relPrefix) {
|
|
|
3601
3611
|
}
|
|
3602
3612
|
|
|
3603
3613
|
// src/metadata/icons/extract.ts
|
|
3604
|
-
function extractIcons(
|
|
3614
|
+
function extractIcons(input) {
|
|
3615
|
+
const doc = ensureDocument(input);
|
|
3605
3616
|
const metadata = {};
|
|
3606
3617
|
const iconLinks = getAllLinksByRels(doc, ["icon", "shortcut icon"]);
|
|
3607
3618
|
for (const link of iconLinks) {
|
|
@@ -3782,7 +3793,8 @@ function parseSizeString(sizeStr) {
|
|
|
3782
3793
|
}
|
|
3783
3794
|
|
|
3784
3795
|
// src/metadata/language/extract.ts
|
|
3785
|
-
function extractLanguage(
|
|
3796
|
+
function extractLanguage(input) {
|
|
3797
|
+
const doc = ensureDocument(input);
|
|
3786
3798
|
const metadata = {};
|
|
3787
3799
|
const htmlElement = doc.querySelector("html");
|
|
3788
3800
|
if (htmlElement) {
|
|
@@ -3834,7 +3846,8 @@ function extractBestLanguage(doc) {
|
|
|
3834
3846
|
}
|
|
3835
3847
|
|
|
3836
3848
|
// src/metadata/links/extract.ts
|
|
3837
|
-
function extractLinks3(
|
|
3849
|
+
function extractLinks3(input, baseUrl, options = {}) {
|
|
3850
|
+
const doc = ensureDocument(input);
|
|
3838
3851
|
const opts = normalizeOptions3(options);
|
|
3839
3852
|
const effectiveBaseUrl = getEffectiveBaseUrl(doc, baseUrl);
|
|
3840
3853
|
const baseOrigin = effectiveBaseUrl ? getOrigin(effectiveBaseUrl) : null;
|
|
@@ -4165,7 +4178,8 @@ function getStringProperty3(obj, prop) {
|
|
|
4165
4178
|
}
|
|
4166
4179
|
|
|
4167
4180
|
// src/metadata/canonical/extract.ts
|
|
4168
|
-
function extractCanonical(
|
|
4181
|
+
function extractCanonical(input) {
|
|
4182
|
+
const doc = ensureDocument(input);
|
|
4169
4183
|
const metadata = {};
|
|
4170
4184
|
metadata.canonical = getLinkHref(doc, "canonical");
|
|
4171
4185
|
const alternateLinks = getAllLinks(doc, "alternate");
|
|
@@ -4401,7 +4415,8 @@ function generateFeedSuggestions(documentUrl) {
|
|
|
4401
4415
|
}
|
|
4402
4416
|
|
|
4403
4417
|
// src/metadata/feed-discovery/extract.ts
|
|
4404
|
-
function extractFeedDiscovery(
|
|
4418
|
+
function extractFeedDiscovery(input, documentUrl) {
|
|
4419
|
+
const doc = ensureDocument(input);
|
|
4405
4420
|
const metadata = {
|
|
4406
4421
|
feeds: []
|
|
4407
4422
|
};
|
|
@@ -4578,7 +4593,8 @@ async function gatherWebsite(url) {
|
|
|
4578
4593
|
}
|
|
4579
4594
|
|
|
4580
4595
|
// src/metadata/analytics/extract.ts
|
|
4581
|
-
function extractAnalytics(
|
|
4596
|
+
function extractAnalytics(input) {
|
|
4597
|
+
const doc = ensureDocument(input);
|
|
4582
4598
|
const metadata = {};
|
|
4583
4599
|
const scripts = doc.querySelectorAll("script");
|
|
4584
4600
|
const googleAnalytics = /* @__PURE__ */ new Set();
|
|
@@ -4670,7 +4686,8 @@ function extractAnalytics(doc) {
|
|
|
4670
4686
|
}
|
|
4671
4687
|
|
|
4672
4688
|
// src/metadata/assets/extract.ts
|
|
4673
|
-
function extractAssets(
|
|
4689
|
+
function extractAssets(input, baseUrl) {
|
|
4690
|
+
const doc = ensureDocument(input);
|
|
4674
4691
|
const metadata = {};
|
|
4675
4692
|
const effectiveBaseUrl = getEffectiveBaseUrl2(doc, baseUrl);
|
|
4676
4693
|
const images = extractImages3(doc, effectiveBaseUrl);
|
|
@@ -4997,7 +5014,8 @@ function extractConnectionHints(doc, baseUrl) {
|
|
|
4997
5014
|
}
|
|
4998
5015
|
|
|
4999
5016
|
// src/metadata/copyright/extract.ts
|
|
5000
|
-
function extractCopyright(
|
|
5017
|
+
function extractCopyright(input) {
|
|
5018
|
+
const doc = ensureDocument(input);
|
|
5001
5019
|
const metadata = {};
|
|
5002
5020
|
metadata.copyright = getMetaContent(doc, "copyright");
|
|
5003
5021
|
metadata.license = getLinkHref(doc, "license");
|
|
@@ -5033,7 +5051,8 @@ function parseCopyright(copyrightString) {
|
|
|
5033
5051
|
}
|
|
5034
5052
|
|
|
5035
5053
|
// src/metadata/dublin-core/extract.ts
|
|
5036
|
-
function extractDublinCore(
|
|
5054
|
+
function extractDublinCore(input) {
|
|
5055
|
+
const doc = ensureDocument(input);
|
|
5037
5056
|
const metadata = {};
|
|
5038
5057
|
metadata.title = getMetaContent(doc, "DC.title") || getMetaContent(doc, "dcterms.title");
|
|
5039
5058
|
metadata.description = getMetaContent(doc, "DC.description") || getMetaContent(doc, "dcterms.description");
|
|
@@ -5074,7 +5093,8 @@ function extractMultiValue(doc, field) {
|
|
|
5074
5093
|
}
|
|
5075
5094
|
|
|
5076
5095
|
// src/metadata/geo/extract.ts
|
|
5077
|
-
function extractGeo(
|
|
5096
|
+
function extractGeo(input) {
|
|
5097
|
+
const doc = ensureDocument(input);
|
|
5078
5098
|
const metadata = {};
|
|
5079
5099
|
const geoPosition = getMetaContent(doc, "geo.position");
|
|
5080
5100
|
if (geoPosition) {
|
|
@@ -5131,7 +5151,8 @@ function parseICBM(icbm) {
|
|
|
5131
5151
|
}
|
|
5132
5152
|
|
|
5133
5153
|
// src/metadata/monetization/extract.ts
|
|
5134
|
-
function extractMonetization(
|
|
5154
|
+
function extractMonetization(input) {
|
|
5155
|
+
const doc = ensureDocument(input);
|
|
5135
5156
|
const metadata = {};
|
|
5136
5157
|
metadata.webMonetization = getMetaContent(doc, "monetization");
|
|
5137
5158
|
metadata.paypalVerification = getMetaContent(doc, "paypal-site-verification");
|
|
@@ -5145,7 +5166,8 @@ function extractMonetization(doc) {
|
|
|
5145
5166
|
}
|
|
5146
5167
|
|
|
5147
5168
|
// src/metadata/news/extract.ts
|
|
5148
|
-
function extractNews2(
|
|
5169
|
+
function extractNews2(input) {
|
|
5170
|
+
const doc = ensureDocument(input);
|
|
5149
5171
|
const metadata = {};
|
|
5150
5172
|
const newsKeywords = getMetaContent(doc, "news_keywords");
|
|
5151
5173
|
if (newsKeywords) {
|
|
@@ -5163,7 +5185,8 @@ function extractNews2(doc) {
|
|
|
5163
5185
|
}
|
|
5164
5186
|
|
|
5165
5187
|
// src/metadata/pagination/extract.ts
|
|
5166
|
-
function extractPagination(
|
|
5188
|
+
function extractPagination(input) {
|
|
5189
|
+
const doc = ensureDocument(input);
|
|
5167
5190
|
const metadata = {};
|
|
5168
5191
|
metadata.prev = getLinkHref(doc, "prev") || getLinkHref(doc, "previous");
|
|
5169
5192
|
metadata.next = getLinkHref(doc, "next");
|
|
@@ -5262,7 +5285,8 @@ function parseKeyValueDirective(key, value, result) {
|
|
|
5262
5285
|
}
|
|
5263
5286
|
|
|
5264
5287
|
// src/metadata/robots/extract.ts
|
|
5265
|
-
function extractRobots(
|
|
5288
|
+
function extractRobots(input) {
|
|
5289
|
+
const doc = ensureDocument(input);
|
|
5266
5290
|
const metadata = {};
|
|
5267
5291
|
const robotsContent = getMetaContent(doc, "robots");
|
|
5268
5292
|
if (robotsContent) {
|
|
@@ -5296,7 +5320,8 @@ function extractRobots(doc) {
|
|
|
5296
5320
|
}
|
|
5297
5321
|
|
|
5298
5322
|
// src/metadata/security/extract.ts
|
|
5299
|
-
function extractSecurity(
|
|
5323
|
+
function extractSecurity(input) {
|
|
5324
|
+
const doc = ensureDocument(input);
|
|
5300
5325
|
const metadata = {};
|
|
5301
5326
|
metadata.referrerPolicy = getMetaContent(doc, "referrer");
|
|
5302
5327
|
metadata.contentSecurityPolicy = getMetaHttpEquiv(doc, "Content-Security-Policy");
|
|
@@ -5347,7 +5372,8 @@ function generateSitemapSuggestions(documentUrl) {
|
|
|
5347
5372
|
}
|
|
5348
5373
|
|
|
5349
5374
|
// src/metadata/sitemap-discovery/extract.ts
|
|
5350
|
-
function extractSitemapDiscovery(
|
|
5375
|
+
function extractSitemapDiscovery(input, documentUrl) {
|
|
5376
|
+
const doc = ensureDocument(input);
|
|
5351
5377
|
const metadata = {
|
|
5352
5378
|
sitemaps: []
|
|
5353
5379
|
};
|
|
@@ -5360,7 +5386,8 @@ function extractSitemapDiscovery(doc, documentUrl) {
|
|
|
5360
5386
|
}
|
|
5361
5387
|
|
|
5362
5388
|
// src/metadata/social-profiles/extract.ts
|
|
5363
|
-
function extractSocialProfiles(
|
|
5389
|
+
function extractSocialProfiles(input) {
|
|
5390
|
+
const doc = ensureDocument(input);
|
|
5364
5391
|
const metadata = {};
|
|
5365
5392
|
metadata.twitter = getMetaContent(doc, "twitter:site") || getMetaContent(doc, "twitter:creator") || extractFromProperty(doc, "twitter:site") || extractFromProperty(doc, "twitter:creator");
|
|
5366
5393
|
if (metadata.twitter) {
|
|
@@ -5513,7 +5540,8 @@ function categorizeSchemaProfile(url, metadata) {
|
|
|
5513
5540
|
}
|
|
5514
5541
|
|
|
5515
5542
|
// src/metadata/verification/extract.ts
|
|
5516
|
-
function extractVerification(
|
|
5543
|
+
function extractVerification(input) {
|
|
5544
|
+
const doc = ensureDocument(input);
|
|
5517
5545
|
const metadata = {};
|
|
5518
5546
|
metadata.googleSiteVerification = getMetaContent(doc, "google-site-verification");
|
|
5519
5547
|
metadata.msvalidate = getMetaContent(doc, "msvalidate.01");
|