omnifetch-lib 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,728 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/index.ts
31
+ var index_exports = {};
32
+ __export(index_exports, {
33
+ DEFAULT_CONFIG: () => DEFAULT_CONFIG,
34
+ default: () => index_default,
35
+ fetchJson: () => fetchJson,
36
+ fetchText: () => fetchText,
37
+ omniFetch: () => omniFetch
38
+ });
39
+ module.exports = __toCommonJS(index_exports);
40
+
41
+ // src/utils/detector.ts
42
+ var cheerio = __toESM(require("cheerio"));
43
+ var SKELETON_INDICATORS = [
44
+ "loading",
45
+ "skeleton",
46
+ "placeholder",
47
+ "spinner",
48
+ "shimmer",
49
+ "lazy-load",
50
+ "js-loading"
51
+ ];
52
+ var PAYWALL_INDICATORS = [
53
+ "paywall",
54
+ "subscribe",
55
+ "subscription",
56
+ "sign-in",
57
+ "sign in",
58
+ "log-in",
59
+ "log in",
60
+ "login",
61
+ "create account",
62
+ "register to read",
63
+ "premium content",
64
+ "members only",
65
+ "subscriber only",
66
+ "paid content",
67
+ "unlock this",
68
+ "continue reading",
69
+ "read more for",
70
+ "free trial"
71
+ ];
72
+ var MIN_CONTENT_LENGTH = 500;
73
+ function isSkeletonPage(html) {
74
+ const $ = cheerio.load(html);
75
+ const body = $("body");
76
+ const bodyHtml = body.html() || "";
77
+ const lowerHtml = bodyHtml.toLowerCase();
78
+ for (const indicator of SKELETON_INDICATORS) {
79
+ if (lowerHtml.includes(`class="${indicator}`) || lowerHtml.includes(`class='${indicator}`) || lowerHtml.includes(`${indicator}-`)) {
80
+ return true;
81
+ }
82
+ }
83
+ const textContent = body.text().replace(/\s+/g, " ").trim();
84
+ if (textContent.length < MIN_CONTENT_LENGTH) {
85
+ const emptyDivs = $("div:empty").length;
86
+ const totalDivs = $("div").length;
87
+ if (totalDivs > 0 && emptyDivs / totalDivs > 0.5) {
88
+ return true;
89
+ }
90
+ }
91
+ const noscript = $("noscript").text().trim();
92
+ if (noscript.length > 100 && textContent.length < MIN_CONTENT_LENGTH) {
93
+ return true;
94
+ }
95
+ return false;
96
+ }
97
+ function isPaywalled(html) {
98
+ const $ = cheerio.load(html);
99
+ const body = $("body");
100
+ const lowerHtml = (body.html() || "").toLowerCase();
101
+ let paywallScore = 0;
102
+ for (const indicator of PAYWALL_INDICATORS) {
103
+ if (lowerHtml.includes(indicator)) {
104
+ paywallScore++;
105
+ }
106
+ }
107
+ if (paywallScore >= 2) {
108
+ return true;
109
+ }
110
+ const modalSelectors = [
111
+ '[class*="paywall"]',
112
+ '[class*="subscribe-wall"]',
113
+ '[class*="registration-wall"]',
114
+ '[id*="paywall"]',
115
+ "[data-paywall]"
116
+ ];
117
+ for (const selector of modalSelectors) {
118
+ if ($(selector).length > 0) {
119
+ return true;
120
+ }
121
+ }
122
+ const article = $('article, [class*="article"], [class*="content"]').first();
123
+ if (article.length > 0) {
124
+ const articleText = article.text().trim();
125
+ if (articleText.length < 1e3 && paywallScore >= 1) {
126
+ return true;
127
+ }
128
+ }
129
+ return false;
130
+ }
131
+ function extractTitle(html) {
132
+ const $ = cheerio.load(html);
133
+ const sources = [
134
+ () => $('meta[property="og:title"]').attr("content"),
135
+ () => $('meta[name="twitter:title"]').attr("content"),
136
+ () => $("title").text(),
137
+ () => $("h1").first().text()
138
+ ];
139
+ for (const getTitle of sources) {
140
+ const title = getTitle()?.trim();
141
+ if (title && title.length > 0) {
142
+ return title.replace(/\s+/g, " ").replace(/\|.*$/, "").replace(/-\s*[^-]+$/, "").trim();
143
+ }
144
+ }
145
+ return "";
146
+ }
147
+ function hasValidContent(html) {
148
+ const $ = cheerio.load(html);
149
+ const contentSelectors = [
150
+ "article",
151
+ '[class*="article"]',
152
+ '[class*="content"]',
153
+ '[class*="post"]',
154
+ "main",
155
+ '[role="main"]'
156
+ ];
157
+ for (const selector of contentSelectors) {
158
+ const element = $(selector).first();
159
+ if (element.length > 0) {
160
+ const text = element.text().replace(/\s+/g, " ").trim();
161
+ if (text.length >= MIN_CONTENT_LENGTH) {
162
+ return true;
163
+ }
164
+ }
165
+ }
166
+ const bodyText = $("body").text().replace(/\s+/g, " ").trim();
167
+ return bodyText.length >= MIN_CONTENT_LENGTH;
168
+ }
169
+
170
+ // src/tiers/light.ts
171
+ var BROWSER_HEADERS = {
172
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
173
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
174
+ "Accept-Language": "en-US,en;q=0.9",
175
+ "Accept-Encoding": "gzip, deflate, br",
176
+ "Cache-Control": "no-cache",
177
+ "Pragma": "no-cache",
178
+ "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
179
+ "Sec-Ch-Ua-Mobile": "?0",
180
+ "Sec-Ch-Ua-Platform": '"Windows"',
181
+ "Sec-Fetch-Dest": "document",
182
+ "Sec-Fetch-Mode": "navigate",
183
+ "Sec-Fetch-Site": "none",
184
+ "Sec-Fetch-User": "?1",
185
+ "Upgrade-Insecure-Requests": "1"
186
+ };
187
+ async function lightFetch(url, timeout = 3e4, customHeaders = {}) {
188
+ const controller = new AbortController();
189
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
190
+ try {
191
+ const response = await fetch(url, {
192
+ method: "GET",
193
+ headers: {
194
+ ...BROWSER_HEADERS,
195
+ ...customHeaders
196
+ },
197
+ redirect: "follow",
198
+ signal: controller.signal
199
+ });
200
+ clearTimeout(timeoutId);
201
+ if (!response.ok) {
202
+ if (response.status === 403 || response.status === 401) {
203
+ return {
204
+ success: false,
205
+ shouldFallback: true,
206
+ fallbackReason: "paywall",
207
+ error: `HTTP ${response.status}: Access denied`
208
+ };
209
+ }
210
+ return {
211
+ success: false,
212
+ shouldFallback: true,
213
+ fallbackReason: "error",
214
+ error: `HTTP ${response.status}: ${response.statusText}`
215
+ };
216
+ }
217
+ const html = await response.text();
218
+ const finalUrl = response.url;
219
+ if (!html || html.trim().length === 0) {
220
+ return {
221
+ success: false,
222
+ shouldFallback: true,
223
+ fallbackReason: "empty",
224
+ error: "Empty response received"
225
+ };
226
+ }
227
+ if (isSkeletonPage(html)) {
228
+ return {
229
+ success: false,
230
+ html,
231
+ title: extractTitle(html),
232
+ shouldFallback: true,
233
+ fallbackReason: "skeleton",
234
+ finalUrl
235
+ };
236
+ }
237
+ if (!hasValidContent(html)) {
238
+ return {
239
+ success: false,
240
+ html,
241
+ title: extractTitle(html),
242
+ shouldFallback: true,
243
+ fallbackReason: "skeleton",
244
+ finalUrl
245
+ };
246
+ }
247
+ return {
248
+ success: true,
249
+ html,
250
+ title: extractTitle(html),
251
+ shouldFallback: false,
252
+ finalUrl
253
+ };
254
+ } catch (error) {
255
+ clearTimeout(timeoutId);
256
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
257
+ if (errorMessage.includes("abort")) {
258
+ return {
259
+ success: false,
260
+ shouldFallback: true,
261
+ fallbackReason: "error",
262
+ error: "Request timeout"
263
+ };
264
+ }
265
+ return {
266
+ success: false,
267
+ shouldFallback: true,
268
+ fallbackReason: "error",
269
+ error: errorMessage
270
+ };
271
+ }
272
+ }
273
+
274
+ // src/tiers/headless.ts
275
+ async function headlessFetch(url, netlifyEndpoint, timeout = 3e4) {
276
+ if (!netlifyEndpoint) {
277
+ return {
278
+ success: false,
279
+ shouldFallback: true,
280
+ fallbackReason: "error",
281
+ error: "No Netlify endpoint configured. Set netlifyEndpoint in config."
282
+ };
283
+ }
284
+ const controller = new AbortController();
285
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
286
+ try {
287
+ const response = await fetch(netlifyEndpoint, {
288
+ method: "POST",
289
+ headers: {
290
+ "Content-Type": "application/json"
291
+ },
292
+ body: JSON.stringify({ url }),
293
+ signal: controller.signal
294
+ });
295
+ clearTimeout(timeoutId);
296
+ if (!response.ok) {
297
+ const errorText = await response.text();
298
+ return {
299
+ success: false,
300
+ shouldFallback: true,
301
+ fallbackReason: "error",
302
+ error: `Headless fetch failed: ${response.status} - ${errorText}`
303
+ };
304
+ }
305
+ const result = await response.json();
306
+ if (!result.success || !result.html) {
307
+ return {
308
+ success: false,
309
+ shouldFallback: true,
310
+ fallbackReason: "error",
311
+ error: result.error || "No HTML content returned from headless fetch"
312
+ };
313
+ }
314
+ const html = result.html;
315
+ const title = extractTitle(html);
316
+ if (isPaywalled(html)) {
317
+ return {
318
+ success: false,
319
+ html,
320
+ title,
321
+ shouldFallback: true,
322
+ fallbackReason: "paywall",
323
+ finalUrl: result.finalUrl
324
+ };
325
+ }
326
+ return {
327
+ success: true,
328
+ html,
329
+ title,
330
+ shouldFallback: false,
331
+ finalUrl: result.finalUrl
332
+ };
333
+ } catch (error) {
334
+ clearTimeout(timeoutId);
335
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
336
+ if (errorMessage.includes("abort")) {
337
+ return {
338
+ success: false,
339
+ shouldFallback: true,
340
+ fallbackReason: "error",
341
+ error: "Headless fetch timeout"
342
+ };
343
+ }
344
+ return {
345
+ success: false,
346
+ shouldFallback: true,
347
+ fallbackReason: "error",
348
+ error: `Headless fetch error: ${errorMessage}`
349
+ };
350
+ }
351
+ }
352
+
353
+ // src/tiers/search.ts
354
+ var DDG_SEARCH_URL = "https://html.duckduckgo.com/html/";
355
+ function calculateSimilarity(str1, str2) {
356
+ const s1 = str1.toLowerCase().trim();
357
+ const s2 = str2.toLowerCase().trim();
358
+ if (s1 === s2) return 100;
359
+ if (s1.length === 0 || s2.length === 0) return 0;
360
+ const words1 = new Set(s1.split(/\s+/).filter((w) => w.length > 2));
361
+ const words2 = new Set(s2.split(/\s+/).filter((w) => w.length > 2));
362
+ if (words1.size === 0 || words2.size === 0) return 0;
363
+ let matches = 0;
364
+ for (const word of words1) {
365
+ if (words2.has(word)) matches++;
366
+ }
367
+ const similarity = matches * 2 / (words1.size + words2.size) * 100;
368
+ return Math.round(similarity);
369
+ }
370
+ function parseSearchResults(html) {
371
+ const results = [];
372
+ const resultRegex = /<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>([^<]*)<\/a>/gi;
373
+ const snippetRegex = /<a[^>]*class="result__snippet"[^>]*>([^<]*)<\/a>/gi;
374
+ const links = [];
375
+ let match;
376
+ while ((match = resultRegex.exec(html)) !== null) {
377
+ const url = decodeURIComponent(match[1].replace(/.*uddg=/, "").split("&")[0]);
378
+ const title = match[2].trim();
379
+ if (url.startsWith("http")) {
380
+ links.push({ url, title });
381
+ }
382
+ }
383
+ const snippets = [];
384
+ while ((match = snippetRegex.exec(html)) !== null) {
385
+ snippets.push(match[1].trim());
386
+ }
387
+ for (let i = 0; i < links.length; i++) {
388
+ results.push({
389
+ url: links[i].url,
390
+ title: links[i].title,
391
+ snippet: snippets[i] || "",
392
+ matchScore: 0
393
+ // Will be calculated later
394
+ });
395
+ }
396
+ return results;
397
+ }
398
+ async function searchFallback(title, originalUrl, timeout = 3e4) {
399
+ if (!title || title.trim().length === 0) {
400
+ return {
401
+ success: false,
402
+ shouldFallback: false,
403
+ error: "No title available for search fallback",
404
+ suggestions: []
405
+ };
406
+ }
407
+ try {
408
+ const searchUrl = `${DDG_SEARCH_URL}?q=${encodeURIComponent(title)}`;
409
+ const response = await fetch(searchUrl, {
410
+ method: "GET",
411
+ headers: {
412
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
413
+ "Accept": "text/html"
414
+ }
415
+ });
416
+ if (!response.ok) {
417
+ return {
418
+ success: false,
419
+ shouldFallback: false,
420
+ error: `Search failed: ${response.status}`,
421
+ suggestions: []
422
+ };
423
+ }
424
+ const searchHtml = await response.text();
425
+ const results = parseSearchResults(searchHtml);
426
+ const originalDomain = new URL(originalUrl).hostname;
427
+ const scoredResults = results.filter((r) => !r.url.includes(originalDomain)).map((r) => ({
428
+ ...r,
429
+ matchScore: calculateSimilarity(title, r.title)
430
+ })).sort((a, b) => b.matchScore - a.matchScore);
431
+ const perfectMatch = scoredResults.find((r) => r.matchScore === 100);
432
+ if (perfectMatch) {
433
+ const mirrorResult = await lightFetch(perfectMatch.url, timeout);
434
+ if (mirrorResult.success && mirrorResult.html) {
435
+ return {
436
+ success: true,
437
+ html: mirrorResult.html,
438
+ title: mirrorResult.title || title,
439
+ shouldFallback: false,
440
+ finalUrl: perfectMatch.url
441
+ };
442
+ }
443
+ }
444
+ const suggestions = scoredResults.slice(0, 5).map((r) => r.url);
445
+ return {
446
+ success: false,
447
+ shouldFallback: false,
448
+ error: "No exact match found",
449
+ suggestions
450
+ };
451
+ } catch (error) {
452
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
453
+ return {
454
+ success: false,
455
+ shouldFallback: false,
456
+ error: `Search fallback error: ${errorMessage}`,
457
+ suggestions: []
458
+ };
459
+ }
460
+ }
461
+
462
+ // src/parsers/json.ts
463
+ var cheerio2 = __toESM(require("cheerio"));
464
+ function extractJson(html) {
465
+ const $ = cheerio2.load(html);
466
+ const result = {};
467
+ const jsonLdScripts = $('script[type="application/ld+json"]');
468
+ const jsonLdData = [];
469
+ jsonLdScripts.each((_, el) => {
470
+ try {
471
+ const content = $(el).html();
472
+ if (content) {
473
+ const parsed = JSON.parse(content);
474
+ jsonLdData.push(parsed);
475
+ }
476
+ } catch {
477
+ }
478
+ });
479
+ if (jsonLdData.length > 0) {
480
+ result.jsonLd = jsonLdData.length === 1 ? jsonLdData[0] : jsonLdData;
481
+ }
482
+ const ogData = {};
483
+ $('meta[property^="og:"]').each((_, el) => {
484
+ const property = $(el).attr("property")?.replace("og:", "");
485
+ const content = $(el).attr("content");
486
+ if (property && content) {
487
+ ogData[property] = content;
488
+ }
489
+ });
490
+ if (Object.keys(ogData).length > 0) {
491
+ result.openGraph = ogData;
492
+ }
493
+ const twitterData = {};
494
+ $('meta[name^="twitter:"]').each((_, el) => {
495
+ const name = $(el).attr("name")?.replace("twitter:", "");
496
+ const content = $(el).attr("content");
497
+ if (name && content) {
498
+ twitterData[name] = content;
499
+ }
500
+ });
501
+ if (Object.keys(twitterData).length > 0) {
502
+ result.twitter = twitterData;
503
+ }
504
+ const meta = {};
505
+ const metaSelectors = [
506
+ { name: "description", selector: 'meta[name="description"]' },
507
+ { name: "keywords", selector: 'meta[name="keywords"]' },
508
+ { name: "author", selector: 'meta[name="author"]' },
509
+ { name: "robots", selector: 'meta[name="robots"]' }
510
+ ];
511
+ for (const { name, selector } of metaSelectors) {
512
+ const content = $(selector).attr("content");
513
+ if (content) {
514
+ meta[name] = content;
515
+ }
516
+ }
517
+ if (Object.keys(meta).length > 0) {
518
+ result.meta = meta;
519
+ }
520
+ result.title = $("title").text().trim() || $('meta[property="og:title"]').attr("content") || $("h1").first().text().trim() || "";
521
+ const article = $("article").first();
522
+ if (article.length > 0) {
523
+ result.article = {
524
+ headline: article.find("h1, h2").first().text().trim(),
525
+ content: article.text().replace(/\s+/g, " ").trim().substring(0, 1e3)
526
+ };
527
+ }
528
+ const links = [];
529
+ $("a[href]").each((_, el) => {
530
+ const href = $(el).attr("href");
531
+ const text = $(el).text().trim();
532
+ if (href && text && href.startsWith("http") && links.length < 20) {
533
+ links.push({ text, href });
534
+ }
535
+ });
536
+ if (links.length > 0) {
537
+ result.links = links;
538
+ }
539
+ return Object.keys(result).length > 0 ? result : null;
540
+ }
541
+
542
+ // src/parsers/text.ts
543
+ var cheerio3 = __toESM(require("cheerio"));
544
+ var import_turndown = __toESM(require("turndown"));
545
+ var turndown = new import_turndown.default({
546
+ headingStyle: "atx",
547
+ codeBlockStyle: "fenced",
548
+ bulletListMarker: "-"
549
+ });
550
+ turndown.remove(["script", "style", "nav", "footer", "header", "aside", "iframe", "noscript"]);
551
+ function extractText(html, asMarkdown = true) {
552
+ const $ = cheerio3.load(html);
553
+ $('script, style, nav, footer, header, aside, iframe, noscript, [role="navigation"], [role="banner"], [role="contentinfo"]').remove();
554
+ $('[class*="sidebar"], [class*="navigation"], [class*="menu"], [class*="footer"], [class*="header"], [class*="ad-"], [class*="advertisement"], [id*="sidebar"], [id*="navigation"], [id*="menu"], [id*="footer"], [id*="header"], [id*="ad-"]').remove();
555
+ let contentElement = $("article").first();
556
+ if (contentElement.length === 0) {
557
+ contentElement = $("main").first();
558
+ }
559
+ if (contentElement.length === 0) {
560
+ contentElement = $('[role="main"]').first();
561
+ }
562
+ if (contentElement.length === 0) {
563
+ const contentSelectors = [
564
+ '[class*="article-content"]',
565
+ '[class*="post-content"]',
566
+ '[class*="entry-content"]',
567
+ '[class*="content-body"]',
568
+ '[class*="story-body"]',
569
+ ".content",
570
+ "#content"
571
+ ];
572
+ for (const selector of contentSelectors) {
573
+ const el = $(selector).first();
574
+ if (el.length > 0 && el.text().trim().length > 200) {
575
+ contentElement = el;
576
+ break;
577
+ }
578
+ }
579
+ }
580
+ if (contentElement.length === 0) {
581
+ contentElement = $("body");
582
+ }
583
+ const contentHtml = contentElement.html() || "";
584
+ if (asMarkdown) {
585
+ let markdown = turndown.turndown(contentHtml);
586
+ markdown = markdown.replace(/\n{3,}/g, "\n\n").replace(/[ \t]+/g, " ").trim();
587
+ return markdown;
588
+ } else {
589
+ let text = contentElement.text();
590
+ text = text.replace(/\s+/g, " ").replace(/\n\s*\n/g, "\n\n").trim();
591
+ return text;
592
+ }
593
+ }
594
+
595
+ // src/fetcher.ts
596
+ async function fetchContent(url, config) {
597
+ const timeout = config.timeout ?? 3e4;
598
+ const headers = config.headers ?? {};
599
+ const tier1Result = await lightFetch(url, timeout, headers);
600
+ if (tier1Result.success && tier1Result.html) {
601
+ return formatResult(tier1Result.html, config.mode, 1, tier1Result.finalUrl, tier1Result.title);
602
+ }
603
+ if (!config.skipHeadless && tier1Result.shouldFallback) {
604
+ const netlifyEndpoint = config.netlifyEndpoint;
605
+ if (netlifyEndpoint) {
606
+ const tier2Result = await headlessFetch(url, netlifyEndpoint, timeout);
607
+ if (tier2Result.success && tier2Result.html) {
608
+ return formatResult(tier2Result.html, config.mode, 2, tier2Result.finalUrl, tier2Result.title);
609
+ }
610
+ if (tier2Result.fallbackReason !== "paywall" && !config.skipSearch) {
611
+ if (tier1Result.html) {
612
+ return formatResult(tier1Result.html, config.mode, 1, tier1Result.finalUrl, tier1Result.title);
613
+ }
614
+ }
615
+ if (tier2Result.fallbackReason === "paywall" || isPaywalled(tier1Result.html || "")) {
616
+ if (!config.skipSearch) {
617
+ const title = tier2Result.title || tier1Result.title || "";
618
+ return await executeSearchFallback(title, url, config.mode, timeout);
619
+ }
620
+ }
621
+ } else if (tier1Result.fallbackReason === "skeleton") {
622
+ if (tier1Result.html) {
623
+ const result = formatResult(tier1Result.html, config.mode, 1, tier1Result.finalUrl, tier1Result.title);
624
+ result.error = "Content may be incomplete (no headless endpoint configured)";
625
+ return result;
626
+ }
627
+ }
628
+ }
629
+ if (!config.skipSearch && (tier1Result.fallbackReason === "paywall" || isPaywalled(tier1Result.html || ""))) {
630
+ const title = tier1Result.title || "";
631
+ return await executeSearchFallback(title, url, config.mode, timeout);
632
+ }
633
+ if (tier1Result.html) {
634
+ const result = formatResult(tier1Result.html, config.mode, 1, tier1Result.finalUrl, tier1Result.title);
635
+ if (tier1Result.error) {
636
+ result.error = tier1Result.error;
637
+ }
638
+ return result;
639
+ }
640
+ return {
641
+ success: false,
642
+ error: tier1Result.error || "Failed to fetch content"
643
+ };
644
+ }
645
+ async function executeSearchFallback(title, originalUrl, mode, timeout) {
646
+ const searchResult = await searchFallback(title, originalUrl, timeout);
647
+ if (searchResult.success && searchResult.html) {
648
+ return formatResult(searchResult.html, mode, 3, searchResult.finalUrl, searchResult.title);
649
+ }
650
+ return {
651
+ success: false,
652
+ error: "Content not found",
653
+ suggestions: searchResult.suggestions || [],
654
+ title
655
+ };
656
+ }
657
+ function formatResult(html, mode, tier, finalUrl, title) {
658
+ if (mode === "JSON") {
659
+ const jsonContent = extractJson(html);
660
+ return {
661
+ success: true,
662
+ content: jsonContent || { rawHtml: html },
663
+ tier,
664
+ finalUrl,
665
+ title
666
+ };
667
+ } else {
668
+ const textContent = extractText(html, true);
669
+ return {
670
+ success: true,
671
+ content: textContent,
672
+ tier,
673
+ finalUrl,
674
+ title
675
+ };
676
+ }
677
+ }
678
+
679
+ // src/types.ts
680
+ var DEFAULT_CONFIG = {
681
+ mode: "TEXT",
682
+ timeout: 3e4,
683
+ netlifyEndpoint: "",
684
+ headers: {},
685
+ skipHeadless: false,
686
+ skipSearch: false
687
+ };
688
+
689
+ // src/index.ts
690
+ async function omniFetch(url, config = {}) {
691
+ if (!url || typeof url !== "string") {
692
+ return {
693
+ success: false,
694
+ error: "Invalid URL provided"
695
+ };
696
+ }
697
+ try {
698
+ new URL(url);
699
+ } catch {
700
+ return {
701
+ success: false,
702
+ error: "Invalid URL format"
703
+ };
704
+ }
705
+ const fullConfig = {
706
+ mode: config.mode ?? DEFAULT_CONFIG.mode,
707
+ timeout: config.timeout ?? DEFAULT_CONFIG.timeout,
708
+ netlifyEndpoint: config.netlifyEndpoint ?? DEFAULT_CONFIG.netlifyEndpoint,
709
+ headers: config.headers ?? DEFAULT_CONFIG.headers,
710
+ skipHeadless: config.skipHeadless ?? DEFAULT_CONFIG.skipHeadless,
711
+ skipSearch: config.skipSearch ?? DEFAULT_CONFIG.skipSearch
712
+ };
713
+ return await fetchContent(url, fullConfig);
714
+ }
715
+ async function fetchText(url, options) {
716
+ return omniFetch(url, { ...options, mode: "TEXT" });
717
+ }
718
+ async function fetchJson(url, options) {
719
+ return omniFetch(url, { ...options, mode: "JSON" });
720
+ }
721
+ var index_default = omniFetch;
722
+ // Annotate the CommonJS export names for ESM import in node:
723
+ 0 && (module.exports = {
724
+ DEFAULT_CONFIG,
725
+ fetchJson,
726
+ fetchText,
727
+ omniFetch
728
+ });