magpie-html 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -3095,6 +3095,12 @@ function parseHTML(html, baseUrl) {
3095
3095
  });
3096
3096
  return document;
3097
3097
  }
3098
+ function ensureDocument(input, baseUrl) {
3099
+ if (typeof input === "string") {
3100
+ return parseHTML(input, baseUrl);
3101
+ }
3102
+ return input;
3103
+ }
3098
3104
 
3099
3105
  // src/utils/meta-helpers.ts
3100
3106
  function getMetaContent(doc, name) {
@@ -3122,7 +3128,8 @@ function getMetaHttpEquiv(doc, httpEquiv) {
3122
3128
  }
3123
3129
 
3124
3130
  // src/metadata/opengraph/extract.ts
3125
- function extractOpenGraph(doc) {
3131
+ function extractOpenGraph(input) {
3132
+ const doc = ensureDocument(input);
3126
3133
  const metadata = {};
3127
3134
  metadata.title = getMetaProperty(doc, "og:title");
3128
3135
  metadata.type = getMetaProperty(doc, "og:type");
@@ -3335,7 +3342,8 @@ function matchesAnyType(obj, targetTypes) {
3335
3342
  }
3336
3343
 
3337
3344
  // src/metadata/schema-org/extract.ts
3338
- function extractSchemaOrg(doc) {
3345
+ function extractSchemaOrg(input) {
3346
+ const doc = ensureDocument(input);
3339
3347
  const metadata = {
3340
3348
  jsonLd: []
3341
3349
  };
@@ -3412,7 +3420,8 @@ function organizeByType(metadata) {
3412
3420
  }
3413
3421
 
3414
3422
  // src/metadata/seo/extract.ts
3415
- function extractSEO(doc) {
3423
+ function extractSEO(input) {
3424
+ const doc = ensureDocument(input);
3416
3425
  const metadata = {};
3417
3426
  const titleElement = doc.querySelector("title");
3418
3427
  if (titleElement?.textContent) {
@@ -3444,7 +3453,8 @@ function extractSEO(doc) {
3444
3453
  }
3445
3454
 
3446
3455
  // src/metadata/twitter-card/extract.ts
3447
- function extractTwitterCard(doc) {
3456
+ function extractTwitterCard(input) {
3457
+ const doc = ensureDocument(input);
3448
3458
  const metadata = {};
3449
3459
  metadata.card = getMetaContent(doc, "twitter:card");
3450
3460
  metadata.site = getMetaContent(doc, "twitter:site");
@@ -3601,7 +3611,8 @@ function getAllLinksByPrefix(doc, relPrefix) {
3601
3611
  }
3602
3612
 
3603
3613
  // src/metadata/icons/extract.ts
3604
- function extractIcons(doc) {
3614
+ function extractIcons(input) {
3615
+ const doc = ensureDocument(input);
3605
3616
  const metadata = {};
3606
3617
  const iconLinks = getAllLinksByRels(doc, ["icon", "shortcut icon"]);
3607
3618
  for (const link of iconLinks) {
@@ -3782,7 +3793,8 @@ function parseSizeString(sizeStr) {
3782
3793
  }
3783
3794
 
3784
3795
  // src/metadata/language/extract.ts
3785
- function extractLanguage(doc) {
3796
+ function extractLanguage(input) {
3797
+ const doc = ensureDocument(input);
3786
3798
  const metadata = {};
3787
3799
  const htmlElement = doc.querySelector("html");
3788
3800
  if (htmlElement) {
@@ -3834,7 +3846,8 @@ function extractBestLanguage(doc) {
3834
3846
  }
3835
3847
 
3836
3848
  // src/metadata/links/extract.ts
3837
- function extractLinks3(doc, baseUrl, options = {}) {
3849
+ function extractLinks3(input, baseUrl, options = {}) {
3850
+ const doc = ensureDocument(input);
3838
3851
  const opts = normalizeOptions3(options);
3839
3852
  const effectiveBaseUrl = getEffectiveBaseUrl(doc, baseUrl);
3840
3853
  const baseOrigin = effectiveBaseUrl ? getOrigin(effectiveBaseUrl) : null;
@@ -4165,7 +4178,8 @@ function getStringProperty3(obj, prop) {
4165
4178
  }
4166
4179
 
4167
4180
  // src/metadata/canonical/extract.ts
4168
- function extractCanonical(doc) {
4181
+ function extractCanonical(input) {
4182
+ const doc = ensureDocument(input);
4169
4183
  const metadata = {};
4170
4184
  metadata.canonical = getLinkHref(doc, "canonical");
4171
4185
  const alternateLinks = getAllLinks(doc, "alternate");
@@ -4401,7 +4415,8 @@ function generateFeedSuggestions(documentUrl) {
4401
4415
  }
4402
4416
 
4403
4417
  // src/metadata/feed-discovery/extract.ts
4404
- function extractFeedDiscovery(doc, documentUrl) {
4418
+ function extractFeedDiscovery(input, documentUrl) {
4419
+ const doc = ensureDocument(input);
4405
4420
  const metadata = {
4406
4421
  feeds: []
4407
4422
  };
@@ -4578,7 +4593,8 @@ async function gatherWebsite(url) {
4578
4593
  }
4579
4594
 
4580
4595
  // src/metadata/analytics/extract.ts
4581
- function extractAnalytics(doc) {
4596
+ function extractAnalytics(input) {
4597
+ const doc = ensureDocument(input);
4582
4598
  const metadata = {};
4583
4599
  const scripts = doc.querySelectorAll("script");
4584
4600
  const googleAnalytics = /* @__PURE__ */ new Set();
@@ -4670,7 +4686,8 @@ function extractAnalytics(doc) {
4670
4686
  }
4671
4687
 
4672
4688
  // src/metadata/assets/extract.ts
4673
- function extractAssets(doc, baseUrl) {
4689
+ function extractAssets(input, baseUrl) {
4690
+ const doc = ensureDocument(input);
4674
4691
  const metadata = {};
4675
4692
  const effectiveBaseUrl = getEffectiveBaseUrl2(doc, baseUrl);
4676
4693
  const images = extractImages3(doc, effectiveBaseUrl);
@@ -4997,7 +5014,8 @@ function extractConnectionHints(doc, baseUrl) {
4997
5014
  }
4998
5015
 
4999
5016
  // src/metadata/copyright/extract.ts
5000
- function extractCopyright(doc) {
5017
+ function extractCopyright(input) {
5018
+ const doc = ensureDocument(input);
5001
5019
  const metadata = {};
5002
5020
  metadata.copyright = getMetaContent(doc, "copyright");
5003
5021
  metadata.license = getLinkHref(doc, "license");
@@ -5033,7 +5051,8 @@ function parseCopyright(copyrightString) {
5033
5051
  }
5034
5052
 
5035
5053
  // src/metadata/dublin-core/extract.ts
5036
- function extractDublinCore(doc) {
5054
+ function extractDublinCore(input) {
5055
+ const doc = ensureDocument(input);
5037
5056
  const metadata = {};
5038
5057
  metadata.title = getMetaContent(doc, "DC.title") || getMetaContent(doc, "dcterms.title");
5039
5058
  metadata.description = getMetaContent(doc, "DC.description") || getMetaContent(doc, "dcterms.description");
@@ -5074,7 +5093,8 @@ function extractMultiValue(doc, field) {
5074
5093
  }
5075
5094
 
5076
5095
  // src/metadata/geo/extract.ts
5077
- function extractGeo(doc) {
5096
+ function extractGeo(input) {
5097
+ const doc = ensureDocument(input);
5078
5098
  const metadata = {};
5079
5099
  const geoPosition = getMetaContent(doc, "geo.position");
5080
5100
  if (geoPosition) {
@@ -5131,7 +5151,8 @@ function parseICBM(icbm) {
5131
5151
  }
5132
5152
 
5133
5153
  // src/metadata/monetization/extract.ts
5134
- function extractMonetization(doc) {
5154
+ function extractMonetization(input) {
5155
+ const doc = ensureDocument(input);
5135
5156
  const metadata = {};
5136
5157
  metadata.webMonetization = getMetaContent(doc, "monetization");
5137
5158
  metadata.paypalVerification = getMetaContent(doc, "paypal-site-verification");
@@ -5145,7 +5166,8 @@ function extractMonetization(doc) {
5145
5166
  }
5146
5167
 
5147
5168
  // src/metadata/news/extract.ts
5148
- function extractNews2(doc) {
5169
+ function extractNews2(input) {
5170
+ const doc = ensureDocument(input);
5149
5171
  const metadata = {};
5150
5172
  const newsKeywords = getMetaContent(doc, "news_keywords");
5151
5173
  if (newsKeywords) {
@@ -5163,7 +5185,8 @@ function extractNews2(doc) {
5163
5185
  }
5164
5186
 
5165
5187
  // src/metadata/pagination/extract.ts
5166
- function extractPagination(doc) {
5188
+ function extractPagination(input) {
5189
+ const doc = ensureDocument(input);
5167
5190
  const metadata = {};
5168
5191
  metadata.prev = getLinkHref(doc, "prev") || getLinkHref(doc, "previous");
5169
5192
  metadata.next = getLinkHref(doc, "next");
@@ -5262,7 +5285,8 @@ function parseKeyValueDirective(key, value, result) {
5262
5285
  }
5263
5286
 
5264
5287
  // src/metadata/robots/extract.ts
5265
- function extractRobots(doc) {
5288
+ function extractRobots(input) {
5289
+ const doc = ensureDocument(input);
5266
5290
  const metadata = {};
5267
5291
  const robotsContent = getMetaContent(doc, "robots");
5268
5292
  if (robotsContent) {
@@ -5296,7 +5320,8 @@ function extractRobots(doc) {
5296
5320
  }
5297
5321
 
5298
5322
  // src/metadata/security/extract.ts
5299
- function extractSecurity(doc) {
5323
+ function extractSecurity(input) {
5324
+ const doc = ensureDocument(input);
5300
5325
  const metadata = {};
5301
5326
  metadata.referrerPolicy = getMetaContent(doc, "referrer");
5302
5327
  metadata.contentSecurityPolicy = getMetaHttpEquiv(doc, "Content-Security-Policy");
@@ -5347,7 +5372,8 @@ function generateSitemapSuggestions(documentUrl) {
5347
5372
  }
5348
5373
 
5349
5374
  // src/metadata/sitemap-discovery/extract.ts
5350
- function extractSitemapDiscovery(doc, documentUrl) {
5375
+ function extractSitemapDiscovery(input, documentUrl) {
5376
+ const doc = ensureDocument(input);
5351
5377
  const metadata = {
5352
5378
  sitemaps: []
5353
5379
  };
@@ -5360,7 +5386,8 @@ function extractSitemapDiscovery(doc, documentUrl) {
5360
5386
  }
5361
5387
 
5362
5388
  // src/metadata/social-profiles/extract.ts
5363
- function extractSocialProfiles(doc) {
5389
+ function extractSocialProfiles(input) {
5390
+ const doc = ensureDocument(input);
5364
5391
  const metadata = {};
5365
5392
  metadata.twitter = getMetaContent(doc, "twitter:site") || getMetaContent(doc, "twitter:creator") || extractFromProperty(doc, "twitter:site") || extractFromProperty(doc, "twitter:creator");
5366
5393
  if (metadata.twitter) {
@@ -5513,7 +5540,8 @@ function categorizeSchemaProfile(url, metadata) {
5513
5540
  }
5514
5541
 
5515
5542
  // src/metadata/verification/extract.ts
5516
- function extractVerification(doc) {
5543
+ function extractVerification(input) {
5544
+ const doc = ensureDocument(input);
5517
5545
  const metadata = {};
5518
5546
  metadata.googleSiteVerification = getMetaContent(doc, "google-site-verification");
5519
5547
  metadata.msvalidate = getMetaContent(doc, "msvalidate.01");