magpie-html 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -3101,6 +3101,12 @@ function parseHTML(html, baseUrl) {
3101
3101
  });
3102
3102
  return document;
3103
3103
  }
3104
+ function ensureDocument(input, baseUrl) {
3105
+ if (typeof input === "string") {
3106
+ return parseHTML(input, baseUrl);
3107
+ }
3108
+ return input;
3109
+ }
3104
3110
 
3105
3111
  // src/utils/meta-helpers.ts
3106
3112
  function getMetaContent(doc, name) {
@@ -3128,7 +3134,8 @@ function getMetaHttpEquiv(doc, httpEquiv) {
3128
3134
  }
3129
3135
 
3130
3136
  // src/metadata/opengraph/extract.ts
3131
- function extractOpenGraph(doc) {
3137
+ function extractOpenGraph(input) {
3138
+ const doc = ensureDocument(input);
3132
3139
  const metadata = {};
3133
3140
  metadata.title = getMetaProperty(doc, "og:title");
3134
3141
  metadata.type = getMetaProperty(doc, "og:type");
@@ -3341,7 +3348,8 @@ function matchesAnyType(obj, targetTypes) {
3341
3348
  }
3342
3349
 
3343
3350
  // src/metadata/schema-org/extract.ts
3344
- function extractSchemaOrg(doc) {
3351
+ function extractSchemaOrg(input) {
3352
+ const doc = ensureDocument(input);
3345
3353
  const metadata = {
3346
3354
  jsonLd: []
3347
3355
  };
@@ -3418,7 +3426,8 @@ function organizeByType(metadata) {
3418
3426
  }
3419
3427
 
3420
3428
  // src/metadata/seo/extract.ts
3421
- function extractSEO(doc) {
3429
+ function extractSEO(input) {
3430
+ const doc = ensureDocument(input);
3422
3431
  const metadata = {};
3423
3432
  const titleElement = doc.querySelector("title");
3424
3433
  if (titleElement?.textContent) {
@@ -3450,7 +3459,8 @@ function extractSEO(doc) {
3450
3459
  }
3451
3460
 
3452
3461
  // src/metadata/twitter-card/extract.ts
3453
- function extractTwitterCard(doc) {
3462
+ function extractTwitterCard(input) {
3463
+ const doc = ensureDocument(input);
3454
3464
  const metadata = {};
3455
3465
  metadata.card = getMetaContent(doc, "twitter:card");
3456
3466
  metadata.site = getMetaContent(doc, "twitter:site");
@@ -3607,7 +3617,8 @@ function getAllLinksByPrefix(doc, relPrefix) {
3607
3617
  }
3608
3618
 
3609
3619
  // src/metadata/icons/extract.ts
3610
- function extractIcons(doc) {
3620
+ function extractIcons(input) {
3621
+ const doc = ensureDocument(input);
3611
3622
  const metadata = {};
3612
3623
  const iconLinks = getAllLinksByRels(doc, ["icon", "shortcut icon"]);
3613
3624
  for (const link of iconLinks) {
@@ -3788,7 +3799,8 @@ function parseSizeString(sizeStr) {
3788
3799
  }
3789
3800
 
3790
3801
  // src/metadata/language/extract.ts
3791
- function extractLanguage(doc) {
3802
+ function extractLanguage(input) {
3803
+ const doc = ensureDocument(input);
3792
3804
  const metadata = {};
3793
3805
  const htmlElement = doc.querySelector("html");
3794
3806
  if (htmlElement) {
@@ -3840,7 +3852,8 @@ function extractBestLanguage(doc) {
3840
3852
  }
3841
3853
 
3842
3854
  // src/metadata/links/extract.ts
3843
- function extractLinks3(doc, baseUrl, options = {}) {
3855
+ function extractLinks3(input, baseUrl, options = {}) {
3856
+ const doc = ensureDocument(input);
3844
3857
  const opts = normalizeOptions3(options);
3845
3858
  const effectiveBaseUrl = getEffectiveBaseUrl(doc, baseUrl);
3846
3859
  const baseOrigin = effectiveBaseUrl ? getOrigin(effectiveBaseUrl) : null;
@@ -4171,7 +4184,8 @@ function getStringProperty3(obj, prop) {
4171
4184
  }
4172
4185
 
4173
4186
  // src/metadata/canonical/extract.ts
4174
- function extractCanonical(doc) {
4187
+ function extractCanonical(input) {
4188
+ const doc = ensureDocument(input);
4175
4189
  const metadata = {};
4176
4190
  metadata.canonical = getLinkHref(doc, "canonical");
4177
4191
  const alternateLinks = getAllLinks(doc, "alternate");
@@ -4407,7 +4421,8 @@ function generateFeedSuggestions(documentUrl) {
4407
4421
  }
4408
4422
 
4409
4423
  // src/metadata/feed-discovery/extract.ts
4410
- function extractFeedDiscovery(doc, documentUrl) {
4424
+ function extractFeedDiscovery(input, documentUrl) {
4425
+ const doc = ensureDocument(input);
4411
4426
  const metadata = {
4412
4427
  feeds: []
4413
4428
  };
@@ -4584,7 +4599,8 @@ async function gatherWebsite(url) {
4584
4599
  }
4585
4600
 
4586
4601
  // src/metadata/analytics/extract.ts
4587
- function extractAnalytics(doc) {
4602
+ function extractAnalytics(input) {
4603
+ const doc = ensureDocument(input);
4588
4604
  const metadata = {};
4589
4605
  const scripts = doc.querySelectorAll("script");
4590
4606
  const googleAnalytics = /* @__PURE__ */ new Set();
@@ -4676,7 +4692,8 @@ function extractAnalytics(doc) {
4676
4692
  }
4677
4693
 
4678
4694
  // src/metadata/assets/extract.ts
4679
- function extractAssets(doc, baseUrl) {
4695
+ function extractAssets(input, baseUrl) {
4696
+ const doc = ensureDocument(input);
4680
4697
  const metadata = {};
4681
4698
  const effectiveBaseUrl = getEffectiveBaseUrl2(doc, baseUrl);
4682
4699
  const images = extractImages3(doc, effectiveBaseUrl);
@@ -5003,7 +5020,8 @@ function extractConnectionHints(doc, baseUrl) {
5003
5020
  }
5004
5021
 
5005
5022
  // src/metadata/copyright/extract.ts
5006
- function extractCopyright(doc) {
5023
+ function extractCopyright(input) {
5024
+ const doc = ensureDocument(input);
5007
5025
  const metadata = {};
5008
5026
  metadata.copyright = getMetaContent(doc, "copyright");
5009
5027
  metadata.license = getLinkHref(doc, "license");
@@ -5039,7 +5057,8 @@ function parseCopyright(copyrightString) {
5039
5057
  }
5040
5058
 
5041
5059
  // src/metadata/dublin-core/extract.ts
5042
- function extractDublinCore(doc) {
5060
+ function extractDublinCore(input) {
5061
+ const doc = ensureDocument(input);
5043
5062
  const metadata = {};
5044
5063
  metadata.title = getMetaContent(doc, "DC.title") || getMetaContent(doc, "dcterms.title");
5045
5064
  metadata.description = getMetaContent(doc, "DC.description") || getMetaContent(doc, "dcterms.description");
@@ -5080,7 +5099,8 @@ function extractMultiValue(doc, field) {
5080
5099
  }
5081
5100
 
5082
5101
  // src/metadata/geo/extract.ts
5083
- function extractGeo(doc) {
5102
+ function extractGeo(input) {
5103
+ const doc = ensureDocument(input);
5084
5104
  const metadata = {};
5085
5105
  const geoPosition = getMetaContent(doc, "geo.position");
5086
5106
  if (geoPosition) {
@@ -5137,7 +5157,8 @@ function parseICBM(icbm) {
5137
5157
  }
5138
5158
 
5139
5159
  // src/metadata/monetization/extract.ts
5140
- function extractMonetization(doc) {
5160
+ function extractMonetization(input) {
5161
+ const doc = ensureDocument(input);
5141
5162
  const metadata = {};
5142
5163
  metadata.webMonetization = getMetaContent(doc, "monetization");
5143
5164
  metadata.paypalVerification = getMetaContent(doc, "paypal-site-verification");
@@ -5151,7 +5172,8 @@ function extractMonetization(doc) {
5151
5172
  }
5152
5173
 
5153
5174
  // src/metadata/news/extract.ts
5154
- function extractNews2(doc) {
5175
+ function extractNews2(input) {
5176
+ const doc = ensureDocument(input);
5155
5177
  const metadata = {};
5156
5178
  const newsKeywords = getMetaContent(doc, "news_keywords");
5157
5179
  if (newsKeywords) {
@@ -5169,7 +5191,8 @@ function extractNews2(doc) {
5169
5191
  }
5170
5192
 
5171
5193
  // src/metadata/pagination/extract.ts
5172
- function extractPagination(doc) {
5194
+ function extractPagination(input) {
5195
+ const doc = ensureDocument(input);
5173
5196
  const metadata = {};
5174
5197
  metadata.prev = getLinkHref(doc, "prev") || getLinkHref(doc, "previous");
5175
5198
  metadata.next = getLinkHref(doc, "next");
@@ -5268,7 +5291,8 @@ function parseKeyValueDirective(key, value, result) {
5268
5291
  }
5269
5292
 
5270
5293
  // src/metadata/robots/extract.ts
5271
- function extractRobots(doc) {
5294
+ function extractRobots(input) {
5295
+ const doc = ensureDocument(input);
5272
5296
  const metadata = {};
5273
5297
  const robotsContent = getMetaContent(doc, "robots");
5274
5298
  if (robotsContent) {
@@ -5302,7 +5326,8 @@ function extractRobots(doc) {
5302
5326
  }
5303
5327
 
5304
5328
  // src/metadata/security/extract.ts
5305
- function extractSecurity(doc) {
5329
+ function extractSecurity(input) {
5330
+ const doc = ensureDocument(input);
5306
5331
  const metadata = {};
5307
5332
  metadata.referrerPolicy = getMetaContent(doc, "referrer");
5308
5333
  metadata.contentSecurityPolicy = getMetaHttpEquiv(doc, "Content-Security-Policy");
@@ -5353,7 +5378,8 @@ function generateSitemapSuggestions(documentUrl) {
5353
5378
  }
5354
5379
 
5355
5380
  // src/metadata/sitemap-discovery/extract.ts
5356
- function extractSitemapDiscovery(doc, documentUrl) {
5381
+ function extractSitemapDiscovery(input, documentUrl) {
5382
+ const doc = ensureDocument(input);
5357
5383
  const metadata = {
5358
5384
  sitemaps: []
5359
5385
  };
@@ -5366,7 +5392,8 @@ function extractSitemapDiscovery(doc, documentUrl) {
5366
5392
  }
5367
5393
 
5368
5394
  // src/metadata/social-profiles/extract.ts
5369
- function extractSocialProfiles(doc) {
5395
+ function extractSocialProfiles(input) {
5396
+ const doc = ensureDocument(input);
5370
5397
  const metadata = {};
5371
5398
  metadata.twitter = getMetaContent(doc, "twitter:site") || getMetaContent(doc, "twitter:creator") || extractFromProperty(doc, "twitter:site") || extractFromProperty(doc, "twitter:creator");
5372
5399
  if (metadata.twitter) {
@@ -5519,7 +5546,8 @@ function categorizeSchemaProfile(url, metadata) {
5519
5546
  }
5520
5547
 
5521
5548
  // src/metadata/verification/extract.ts
5522
- function extractVerification(doc) {
5549
+ function extractVerification(input) {
5550
+ const doc = ensureDocument(input);
5523
5551
  const metadata = {};
5524
5552
  metadata.googleSiteVerification = getMetaContent(doc, "google-site-verification");
5525
5553
  metadata.msvalidate = getMetaContent(doc, "msvalidate.01");