@steipete/summarize-core 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/cjs/content/index.js +14 -0
- package/dist/cjs/content/index.js.map +1 -0
- package/dist/cjs/content/link-preview/client.js +31 -0
- package/dist/cjs/content/link-preview/client.js.map +1 -0
- package/dist/cjs/content/link-preview/content/article.js +164 -0
- package/dist/cjs/content/link-preview/content/article.js.map +1 -0
- package/dist/cjs/content/link-preview/content/cleaner.js +63 -0
- package/dist/cjs/content/link-preview/content/cleaner.js.map +1 -0
- package/dist/cjs/content/link-preview/content/constants.js +10 -0
- package/dist/cjs/content/link-preview/content/constants.js.map +1 -0
- package/dist/cjs/content/link-preview/content/fetcher.js +128 -0
- package/dist/cjs/content/link-preview/content/fetcher.js.map +1 -0
- package/dist/cjs/content/link-preview/content/firecrawl.js +90 -0
- package/dist/cjs/content/link-preview/content/firecrawl.js.map +1 -0
- package/dist/cjs/content/link-preview/content/html.js +165 -0
- package/dist/cjs/content/link-preview/content/html.js.map +1 -0
- package/dist/cjs/content/link-preview/content/index.js +348 -0
- package/dist/cjs/content/link-preview/content/index.js.map +1 -0
- package/dist/cjs/content/link-preview/content/jsonld.js +80 -0
- package/dist/cjs/content/link-preview/content/jsonld.js.map +1 -0
- package/dist/cjs/content/link-preview/content/parsers.js +81 -0
- package/dist/cjs/content/link-preview/content/parsers.js.map +1 -0
- package/dist/cjs/content/link-preview/content/podcast-utils.js +85 -0
- package/dist/cjs/content/link-preview/content/podcast-utils.js.map +1 -0
- package/dist/cjs/content/link-preview/content/readability.js +90 -0
- package/dist/cjs/content/link-preview/content/readability.js.map +1 -0
- package/dist/cjs/content/link-preview/content/twitter-utils.js +74 -0
- package/dist/cjs/content/link-preview/content/twitter-utils.js.map +1 -0
- package/dist/cjs/content/link-preview/content/types.js +7 -0
- package/dist/cjs/content/link-preview/content/types.js.map +1 -0
- package/dist/cjs/content/link-preview/content/utils.js +177 -0
- package/dist/cjs/content/link-preview/content/utils.js.map +1 -0
- package/dist/cjs/content/link-preview/content/video.js +99 -0
- package/dist/cjs/content/link-preview/content/video.js.map +1 -0
- package/dist/cjs/content/link-preview/content/youtube.js +85 -0
- package/dist/cjs/content/link-preview/content/youtube.js.map +1 -0
- package/dist/cjs/content/link-preview/deps.js +23 -0
- package/dist/cjs/content/link-preview/deps.js.map +1 -0
- package/dist/cjs/content/link-preview/fetch-with-timeout.js +38 -0
- package/dist/cjs/content/link-preview/fetch-with-timeout.js.map +1 -0
- package/dist/cjs/content/link-preview/types.js +5 -0
- package/dist/cjs/content/link-preview/types.js.map +1 -0
- package/dist/cjs/content/transcript/cache.js +85 -0
- package/dist/cjs/content/transcript/cache.js.map +1 -0
- package/dist/cjs/content/transcript/index.js +134 -0
- package/dist/cjs/content/transcript/index.js.map +1 -0
- package/dist/cjs/content/transcript/normalize.js +49 -0
- package/dist/cjs/content/transcript/normalize.js.map +1 -0
- package/dist/cjs/content/transcript/providers/generic.js +16 -0
- package/dist/cjs/content/transcript/providers/generic.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/apple-flow.js +226 -0
- package/dist/cjs/content/transcript/providers/podcast/apple-flow.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/apple.js +43 -0
- package/dist/cjs/content/transcript/providers/podcast/apple.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/constants.js +11 -0
- package/dist/cjs/content/transcript/providers/podcast/constants.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/flow-context.js +3 -0
- package/dist/cjs/content/transcript/providers/podcast/flow-context.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/itunes.js +139 -0
- package/dist/cjs/content/transcript/providers/podcast/itunes.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/json.js +43 -0
- package/dist/cjs/content/transcript/providers/podcast/json.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/media.js +355 -0
- package/dist/cjs/content/transcript/providers/podcast/media.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/results.js +32 -0
- package/dist/cjs/content/transcript/providers/podcast/results.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/rss.js +262 -0
- package/dist/cjs/content/transcript/providers/podcast/rss.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/spotify-flow.js +221 -0
- package/dist/cjs/content/transcript/providers/podcast/spotify-flow.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/spotify.js +119 -0
- package/dist/cjs/content/transcript/providers/podcast/spotify.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast.js +260 -0
- package/dist/cjs/content/transcript/providers/podcast.js.map +1 -0
- package/dist/cjs/content/transcript/providers/youtube/api.js +264 -0
- package/dist/cjs/content/transcript/providers/youtube/api.js.map +1 -0
- package/dist/cjs/content/transcript/providers/youtube/apify.js +59 -0
- package/dist/cjs/content/transcript/providers/youtube/apify.js.map +1 -0
- package/dist/cjs/content/transcript/providers/youtube/captions.js +413 -0
- package/dist/cjs/content/transcript/providers/youtube/captions.js.map +1 -0
- package/dist/cjs/content/transcript/providers/youtube/yt-dlp.js +170 -0
- package/dist/cjs/content/transcript/providers/youtube/yt-dlp.js.map +1 -0
- package/dist/cjs/content/transcript/providers/youtube.js +178 -0
- package/dist/cjs/content/transcript/providers/youtube.js.map +1 -0
- package/dist/cjs/content/transcript/types.js +3 -0
- package/dist/cjs/content/transcript/types.js.map +1 -0
- package/dist/cjs/content/transcript/utils.js +303 -0
- package/dist/cjs/content/transcript/utils.js.map +1 -0
- package/dist/cjs/index.js +22 -0
- package/dist/cjs/index.js.map +1 -0
- package/dist/cjs/language.js +132 -0
- package/dist/cjs/language.js.map +1 -0
- package/dist/cjs/package.json +3 -0
- package/dist/cjs/prompts/cli.js +23 -0
- package/dist/cjs/prompts/cli.js.map +1 -0
- package/dist/cjs/prompts/file.js +52 -0
- package/dist/cjs/prompts/file.js.map +1 -0
- package/dist/cjs/prompts/index.js +14 -0
- package/dist/cjs/prompts/index.js.map +1 -0
- package/dist/cjs/prompts/link-summary.js +122 -0
- package/dist/cjs/prompts/link-summary.js.map +1 -0
- package/dist/cjs/shared/contracts.js +5 -0
- package/dist/cjs/shared/contracts.js.map +1 -0
- package/dist/cjs/transcription/whisper/constants.js +11 -0
- package/dist/cjs/transcription/whisper/constants.js.map +1 -0
- package/dist/cjs/transcription/whisper/core.js +307 -0
- package/dist/cjs/transcription/whisper/core.js.map +1 -0
- package/dist/cjs/transcription/whisper/fal.js +44 -0
- package/dist/cjs/transcription/whisper/fal.js.map +1 -0
- package/dist/cjs/transcription/whisper/ffmpeg.js +187 -0
- package/dist/cjs/transcription/whisper/ffmpeg.js.map +1 -0
- package/dist/cjs/transcription/whisper/openai.js +51 -0
- package/dist/cjs/transcription/whisper/openai.js.map +1 -0
- package/dist/cjs/transcription/whisper/types.js +3 -0
- package/dist/cjs/transcription/whisper/types.js.map +1 -0
- package/dist/cjs/transcription/whisper/utils.js +70 -0
- package/dist/cjs/transcription/whisper/utils.js.map +1 -0
- package/dist/cjs/transcription/whisper/whisper-cpp.js +232 -0
- package/dist/cjs/transcription/whisper/whisper-cpp.js.map +1 -0
- package/dist/cjs/transcription/whisper.js +15 -0
- package/dist/cjs/transcription/whisper.js.map +1 -0
- package/package.json +15 -12
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Peter Steinberger
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CACHE_MODES = exports.ProgressKind = exports.DEFAULT_TIMEOUT_MS = exports.DEFAULT_MAX_CONTENT_CHARACTERS = exports.DEFAULT_CACHE_MODE = exports.createLinkPreviewClient = void 0;
|
|
4
|
+
var client_js_1 = require("./link-preview/client.js");
|
|
5
|
+
Object.defineProperty(exports, "createLinkPreviewClient", { enumerable: true, get: function () { return client_js_1.createLinkPreviewClient; } });
|
|
6
|
+
var types_js_1 = require("./link-preview/content/types.js");
|
|
7
|
+
Object.defineProperty(exports, "DEFAULT_CACHE_MODE", { enumerable: true, get: function () { return types_js_1.DEFAULT_CACHE_MODE; } });
|
|
8
|
+
Object.defineProperty(exports, "DEFAULT_MAX_CONTENT_CHARACTERS", { enumerable: true, get: function () { return types_js_1.DEFAULT_MAX_CONTENT_CHARACTERS; } });
|
|
9
|
+
Object.defineProperty(exports, "DEFAULT_TIMEOUT_MS", { enumerable: true, get: function () { return types_js_1.DEFAULT_TIMEOUT_MS; } });
|
|
10
|
+
var deps_js_1 = require("./link-preview/deps.js");
|
|
11
|
+
Object.defineProperty(exports, "ProgressKind", { enumerable: true, get: function () { return deps_js_1.ProgressKind; } });
|
|
12
|
+
var types_js_2 = require("./link-preview/types.js");
|
|
13
|
+
Object.defineProperty(exports, "CACHE_MODES", { enumerable: true, get: function () { return types_js_2.CACHE_MODES; } });
|
|
14
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/content/index.ts"],"names":[],"mappings":";;;AAAA,sDAIiC;AAH/B,oHAAA,uBAAuB,OAAA;AAIzB,4DAMwC;AALtC,8GAAA,kBAAkB,OAAA;AAClB,0HAAA,8BAA8B,OAAA;AAC9B,8GAAA,kBAAkB,OAAA;AAapB,kDAAqD;AAA5C,uGAAA,YAAY,OAAA;AACrB,oDAKgC;AAJ9B,uGAAA,WAAW,OAAA"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createLinkPreviewClient = createLinkPreviewClient;
|
|
4
|
+
const index_js_1 = require("./content/index.js");
|
|
5
|
+
function createLinkPreviewClient(options = {}) {
|
|
6
|
+
const fetchImpl = options.fetch ?? ((...args) => globalThis.fetch(...args));
|
|
7
|
+
const scrape = options.scrapeWithFirecrawl ?? null;
|
|
8
|
+
const apifyApiToken = typeof options.apifyApiToken === 'string' ? options.apifyApiToken : null;
|
|
9
|
+
const ytDlpPath = typeof options.ytDlpPath === 'string' ? options.ytDlpPath : null;
|
|
10
|
+
const falApiKey = typeof options.falApiKey === 'string' ? options.falApiKey : null;
|
|
11
|
+
const openaiApiKey = typeof options.openaiApiKey === 'string' ? options.openaiApiKey : null;
|
|
12
|
+
const convertHtmlToMarkdown = options.convertHtmlToMarkdown ?? null;
|
|
13
|
+
const transcriptCache = options.transcriptCache ?? null;
|
|
14
|
+
const readTweetWithBird = typeof options.readTweetWithBird === 'function' ? options.readTweetWithBird : null;
|
|
15
|
+
const onProgress = typeof options.onProgress === 'function' ? options.onProgress : null;
|
|
16
|
+
return {
|
|
17
|
+
fetchLinkContent: (url, contentOptions) => (0, index_js_1.fetchLinkContent)(url, contentOptions, {
|
|
18
|
+
fetch: fetchImpl,
|
|
19
|
+
scrapeWithFirecrawl: scrape,
|
|
20
|
+
apifyApiToken,
|
|
21
|
+
ytDlpPath,
|
|
22
|
+
falApiKey,
|
|
23
|
+
openaiApiKey,
|
|
24
|
+
convertHtmlToMarkdown,
|
|
25
|
+
transcriptCache,
|
|
26
|
+
readTweetWithBird,
|
|
27
|
+
onProgress,
|
|
28
|
+
}),
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../../../src/content/link-preview/client.ts"],"names":[],"mappings":";;AA2BA,0DA6BC;AAxDD,iDAAqD;AA2BrD,SAAgB,uBAAuB,CAAC,UAAoC,EAAE;IAC5E,MAAM,SAAS,GACb,OAAO,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,IAA8B,EAAE,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC,CAAA;IACrF,MAAM,MAAM,GAA+B,OAAO,CAAC,mBAAmB,IAAI,IAAI,CAAA;IAC9E,MAAM,aAAa,GAAG,OAAO,OAAO,CAAC,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAA;IAC9F,MAAM,SAAS,GAAG,OAAO,OAAO,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAA;IAClF,MAAM,SAAS,GAAG,OAAO,OAAO,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAA;IAClF,MAAM,YAAY,GAAG,OAAO,OAAO,CAAC,YAAY,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAA;IAC3F,MAAM,qBAAqB,GAAiC,OAAO,CAAC,qBAAqB,IAAI,IAAI,CAAA;IACjG,MAAM,eAAe,GAA2B,OAAO,CAAC,eAAe,IAAI,IAAI,CAAA;IAC/E,MAAM,iBAAiB,GACrB,OAAO,OAAO,CAAC,iBAAiB,KAAK,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC,CAAC,IAAI,CAAA;IACpF,MAAM,UAAU,GAAG,OAAO,OAAO,CAAC,UAAU,KAAK,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;IAEvF,OAAO;QACL,gBAAgB,EAAE,CAAC,GAAW,EAAE,cAAwC,EAAE,EAAE,CAC1E,IAAA,2BAAgB,EAAC,GAAG,EAAE,cAAc,EAAE;YACpC,KAAK,EAAE,SAAS;YAChB,mBAAmB,EAAE,MAAM;YAC3B,aAAa;YACb,SAAS;YACT,SAAS;YACT,YAAY;YACZ,qBAAqB;YACrB,eAAe;YACf,iBAAiB;YACjB,UAAU;SACX,CAAC;KACL,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.sanitizeHtmlForMarkdownConversion = sanitizeHtmlForMarkdownConversion;
|
|
7
|
+
exports.extractArticleContent = extractArticleContent;
|
|
8
|
+
exports.collectSegmentsFromHtml = collectSegmentsFromHtml;
|
|
9
|
+
exports.extractPlainText = extractPlainText;
|
|
10
|
+
const cheerio_1 = require("cheerio");
|
|
11
|
+
const sanitize_html_1 = __importDefault(require("sanitize-html"));
|
|
12
|
+
const cleaner_js_1 = require("./cleaner.js");
|
|
13
|
+
const MIN_SEGMENT_LENGTH = 30;
|
|
14
|
+
function sanitizeHtmlForMarkdownConversion(html) {
|
|
15
|
+
return (0, sanitize_html_1.default)(html, {
|
|
16
|
+
allowedTags: [
|
|
17
|
+
'article',
|
|
18
|
+
'section',
|
|
19
|
+
'div',
|
|
20
|
+
'p',
|
|
21
|
+
'h1',
|
|
22
|
+
'h2',
|
|
23
|
+
'h3',
|
|
24
|
+
'h4',
|
|
25
|
+
'h5',
|
|
26
|
+
'h6',
|
|
27
|
+
'ol',
|
|
28
|
+
'ul',
|
|
29
|
+
'li',
|
|
30
|
+
'blockquote',
|
|
31
|
+
'pre',
|
|
32
|
+
'code',
|
|
33
|
+
'span',
|
|
34
|
+
'strong',
|
|
35
|
+
'em',
|
|
36
|
+
'br',
|
|
37
|
+
'a',
|
|
38
|
+
],
|
|
39
|
+
allowedAttributes: {
|
|
40
|
+
a: ['href'],
|
|
41
|
+
},
|
|
42
|
+
nonTextTags: [
|
|
43
|
+
'style',
|
|
44
|
+
'script',
|
|
45
|
+
'noscript',
|
|
46
|
+
'template',
|
|
47
|
+
'svg',
|
|
48
|
+
'canvas',
|
|
49
|
+
'iframe',
|
|
50
|
+
'object',
|
|
51
|
+
'embed',
|
|
52
|
+
],
|
|
53
|
+
textFilter(text) {
|
|
54
|
+
return (0, cleaner_js_1.decodeHtmlEntities)(text);
|
|
55
|
+
},
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
function extractArticleContent(html) {
|
|
59
|
+
const segments = collectSegmentsFromHtml(html);
|
|
60
|
+
if (segments.length > 0) {
|
|
61
|
+
return segments.join('\n');
|
|
62
|
+
}
|
|
63
|
+
const fallback = (0, cleaner_js_1.normalizeWhitespace)(extractPlainText(html));
|
|
64
|
+
return fallback ?? '';
|
|
65
|
+
}
|
|
66
|
+
function collectSegmentsFromHtml(html) {
|
|
67
|
+
const sanitized = (0, sanitize_html_1.default)(html, {
|
|
68
|
+
allowedTags: [
|
|
69
|
+
'article',
|
|
70
|
+
'section',
|
|
71
|
+
'div',
|
|
72
|
+
'p',
|
|
73
|
+
'h1',
|
|
74
|
+
'h2',
|
|
75
|
+
'h3',
|
|
76
|
+
'h4',
|
|
77
|
+
'h5',
|
|
78
|
+
'h6',
|
|
79
|
+
'ol',
|
|
80
|
+
'ul',
|
|
81
|
+
'li',
|
|
82
|
+
'blockquote',
|
|
83
|
+
'pre',
|
|
84
|
+
'code',
|
|
85
|
+
'span',
|
|
86
|
+
'strong',
|
|
87
|
+
'em',
|
|
88
|
+
'br',
|
|
89
|
+
],
|
|
90
|
+
allowedAttributes: {},
|
|
91
|
+
nonTextTags: [
|
|
92
|
+
'style',
|
|
93
|
+
'script',
|
|
94
|
+
'noscript',
|
|
95
|
+
'template',
|
|
96
|
+
'svg',
|
|
97
|
+
'canvas',
|
|
98
|
+
'iframe',
|
|
99
|
+
'object',
|
|
100
|
+
'embed',
|
|
101
|
+
],
|
|
102
|
+
textFilter(text) {
|
|
103
|
+
return (0, cleaner_js_1.decodeHtmlEntities)(text);
|
|
104
|
+
},
|
|
105
|
+
});
|
|
106
|
+
const $ = (0, cheerio_1.load)(sanitized);
|
|
107
|
+
const segments = [];
|
|
108
|
+
$('h1,h2,h3,h4,h5,h6,li,p,blockquote,pre').each((_, element) => {
|
|
109
|
+
if (!('tagName' in element) || typeof element.tagName !== 'string') {
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
const tag = element.tagName.toLowerCase();
|
|
113
|
+
const raw = $(element).text();
|
|
114
|
+
const text = (0, cleaner_js_1.normalizeWhitespace)(raw).replaceAll(/\n+/g, ' ');
|
|
115
|
+
if (!text || text.length === 0) {
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
if (tag.startsWith('h')) {
|
|
119
|
+
if (text.length >= 10) {
|
|
120
|
+
segments.push(text);
|
|
121
|
+
}
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
if (tag === 'li') {
|
|
125
|
+
if (text.length >= 20) {
|
|
126
|
+
segments.push(`• ${text}`);
|
|
127
|
+
}
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
if (text.length < MIN_SEGMENT_LENGTH) {
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
segments.push(text);
|
|
134
|
+
});
|
|
135
|
+
if (segments.length === 0) {
|
|
136
|
+
const fallback = (0, cleaner_js_1.normalizeWhitespace)($('body').text() || sanitized);
|
|
137
|
+
return fallback ? [fallback] : [];
|
|
138
|
+
}
|
|
139
|
+
return mergeConsecutiveSegments(segments);
|
|
140
|
+
}
|
|
141
|
+
function extractPlainText(html) {
|
|
142
|
+
const stripped = (0, sanitize_html_1.default)(html, {
|
|
143
|
+
allowedTags: [],
|
|
144
|
+
allowedAttributes: {},
|
|
145
|
+
nonTextTags: [
|
|
146
|
+
'style',
|
|
147
|
+
'script',
|
|
148
|
+
'noscript',
|
|
149
|
+
'template',
|
|
150
|
+
'svg',
|
|
151
|
+
'canvas',
|
|
152
|
+
'iframe',
|
|
153
|
+
'object',
|
|
154
|
+
'embed',
|
|
155
|
+
],
|
|
156
|
+
});
|
|
157
|
+
return (0, cleaner_js_1.decodeHtmlEntities)(stripped);
|
|
158
|
+
}
|
|
159
|
+
function mergeConsecutiveSegments(segments) {
|
|
160
|
+
// Keep headings as separate segments; merging short segments mostly collapses headings into the
|
|
161
|
+
// previous paragraph ("... Conclusion"), which reads worse than a standalone heading line.
|
|
162
|
+
return segments.filter(Boolean);
|
|
163
|
+
}
|
|
164
|
+
//# sourceMappingURL=article.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"article.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/article.ts"],"names":[],"mappings":";;;;;AAOA,8EA2CC;AAED,sDAOC;AAED,0DAoFC;AAED,4CAiBC;AApKD,qCAA8B;AAC9B,kEAAwC;AAExC,6CAAsE;AAEtE,MAAM,kBAAkB,GAAG,EAAE,CAAA;AAE7B,SAAgB,iCAAiC,CAAC,IAAY;IAC5D,OAAO,IAAA,uBAAY,EAAC,IAAI,EAAE;QACxB,WAAW,EAAE;YACX,SAAS;YACT,SAAS;YACT,KAAK;YACL,GAAG;YACH,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,YAAY;YACZ,KAAK;YACL,MAAM;YACN,MAAM;YACN,QAAQ;YACR,IAAI;YACJ,IAAI;YACJ,GAAG;SACJ;QACD,iBAAiB,EAAE;YACjB,CAAC,EAAE,CAAC,MAAM,CAAC;SACZ;QACD,WAAW,EAAE;YACX,OAAO;YACP,QAAQ;YACR,UAAU;YACV,UAAU;YACV,KAAK;YACL,QAAQ;YACR,QAAQ;YACR,QAAQ;YACR,OAAO;SACR;QACD,UAAU,CAAC,IAAY;YACrB,OAAO,IAAA,+BAAkB,EAAC,IAAI,CAAC,CAAA;QACjC,CAAC;KACF,CAAC,CAAA;AACJ,CAAC;AAED,SAAgB,qBAAqB,CAAC,IAAY;IAChD,MAAM,QAAQ,GAAG,uBAAuB,CAAC,IAAI,CAAC,CAAA;IAC9C,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC5B,CAAC;IACD,MAAM,QAAQ,GAAG,IAAA,gCAAmB,EAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAA;IAC5D,OAAO,QAAQ,IAAI,EAAE,CAAA;AACvB,CAAC;AAED,SAAgB,uBAAuB,CAAC,IAAY;IAClD,MAAM,SAAS,GAAG,IAAA,uBAAY,EAAC,IAAI,EAAE;QACnC,WAAW,EAAE;YACX,SAAS;YACT,SAAS;YACT,KAAK;YACL,GAAG;YACH,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,YAAY;YACZ,KAAK;YACL,MAAM;YACN,MAAM;YACN,QAAQ;YACR,IAAI;YACJ,IAAI;SACL;QACD,iBAAiB,EAAE,EAAE;QACrB,WAAW,EAAE;YACX,OAAO;YACP,QAAQ;YACR,UAAU;YACV,UAAU;YACV,KAAK;YACL,QAAQ;YACR,QAAQ;YACR,QAAQ;YACR,OAAO;SACR;QACD,UAAU,CAAC,IAAY;YACrB,OAAO,IAAA,+BAAkB,EAAC,IAAI,CAAC,CAAA;QACjC,CAAC;KACF,CAAC,CAAA;IAEF,MAAM,CAAC,GAAG,IAAA,cAAI,EAAC,SAAS,CAAC,CAAA;IACzB,MAAM,QAAQ,GAAa,EAAE,CAAA;IAE7B,CAAC,CAAC,uCAAuC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;QAC7D,IAAI,CAAC,CAAC,SAAS,IAAI,OAAO,CAAC,IAAI,OAAO,OAAO,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACnE,OAAM;QACR,CAAC;QAED,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,CAAA;QAEzC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAA;QAC7B,MAAM,IAAI,GAAG,IAAA,gCAAmB,EAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;QAC7D,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/B,OAAM;QACR,CAAC;QAED,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;gBACtB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACrB,CAAC;YACD,OAAM;QACR,CAAC;QAED,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;YACjB,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;gBACtB,QAAQ,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAA;YAC5B,CAAC;YACD,OAAM;QACR,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,GAAG,kBAAkB,EAAE,CAAC;YACrC,OAAM;QACR,CAAC;QAED,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACrB,CAAC,CAAC,CAAA;IAEF,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,MAAM,QAAQ,GAAG,IAAA,gCAAmB,EAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAC,CAAA;QACnE,OAAO,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IACnC,CAAC;IAED,OAAO,wBAAwB,CAAC,QAAQ,CAAC,CAAA;AAC3C,CAAC;AAED,SAAgB,gBAAgB,CAAC,IAAY;IAC3C,MAAM,QAAQ,GAAG,IAAA,uBAAY,EAAC,IAAI,EAAE;QAClC,WAAW,EAAE,EAAE;QACf,iBAAiB,EAAE,EAAE;QACrB,WAAW,EAAE;YACX,OAAO;YACP,QAAQ;YACR,UAAU;YACV,UAAU;YACV,KAAK;YACL,QAAQ;YACR,QAAQ;YACR,QAAQ;YACR,OAAO;SACR;KACF,CAAC,CAAA;IACF,OAAO,IAAA,+BAAkB,EAAC,QAAQ,CAAC,CAAA;AACrC,CAAC;AAED,SAAS,wBAAwB,CAAC,QAAkB;IAClD,gGAAgG;IAChG,2FAA2F;IAC3F,OAAO,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;AACjC,CAAC"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.normalizeForPrompt = normalizeForPrompt;
|
|
4
|
+
exports.normalizeWhitespace = normalizeWhitespace;
|
|
5
|
+
exports.decodeHtmlEntities = decodeHtmlEntities;
|
|
6
|
+
exports.normalizeCandidate = normalizeCandidate;
|
|
7
|
+
exports.clipAtSentenceBoundary = clipAtSentenceBoundary;
|
|
8
|
+
exports.applyContentBudget = applyContentBudget;
|
|
9
|
+
const es_toolkit_1 = require("es-toolkit");
|
|
10
|
+
const WORD_SPLIT_PATTERN = /\s+/g;
|
|
11
|
+
function normalizeForPrompt(input) {
|
|
12
|
+
return input
|
|
13
|
+
.replaceAll('\u00A0', ' ')
|
|
14
|
+
.replaceAll(/[\t ]+/g, ' ')
|
|
15
|
+
.replaceAll(/\s*\n\s*/g, '\n')
|
|
16
|
+
.replaceAll(/\n{3,}/g, '\n\n')
|
|
17
|
+
.trim();
|
|
18
|
+
}
|
|
19
|
+
function normalizeWhitespace(input) {
|
|
20
|
+
return input
|
|
21
|
+
.replaceAll('\u00A0', ' ')
|
|
22
|
+
.replaceAll(/[\t ]+/g, ' ')
|
|
23
|
+
.replaceAll(/\s*\n\s*/g, '\n')
|
|
24
|
+
.trim();
|
|
25
|
+
}
|
|
26
|
+
function decodeHtmlEntities(input) {
|
|
27
|
+
return input
|
|
28
|
+
.replaceAll('&', '&')
|
|
29
|
+
.replaceAll('<', '<')
|
|
30
|
+
.replaceAll('>', '>')
|
|
31
|
+
.replaceAll('"', '"')
|
|
32
|
+
.replaceAll(''', "'")
|
|
33
|
+
.replaceAll(''', "'")
|
|
34
|
+
.replaceAll('/', '/')
|
|
35
|
+
.replaceAll(' ', ' ');
|
|
36
|
+
}
|
|
37
|
+
function normalizeCandidate(value) {
|
|
38
|
+
if (!value) {
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
const trimmed = value.replaceAll(/\s+/g, ' ').trim();
|
|
42
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
43
|
+
}
|
|
44
|
+
function clipAtSentenceBoundary(input, maxLength) {
|
|
45
|
+
if (input.length <= maxLength) {
|
|
46
|
+
return input;
|
|
47
|
+
}
|
|
48
|
+
const slice = input.slice(0, maxLength);
|
|
49
|
+
const lastSentenceBreak = Math.max(slice.lastIndexOf('. '), slice.lastIndexOf('! '), slice.lastIndexOf('? '), slice.lastIndexOf('\n\n'));
|
|
50
|
+
if (lastSentenceBreak > maxLength * 0.5) {
|
|
51
|
+
return slice.slice(0, lastSentenceBreak + 1);
|
|
52
|
+
}
|
|
53
|
+
return slice;
|
|
54
|
+
}
|
|
55
|
+
function applyContentBudget(baseContent, maxCharacters) {
|
|
56
|
+
const totalCharacters = baseContent.length;
|
|
57
|
+
const truncated = totalCharacters > maxCharacters;
|
|
58
|
+
const clipped = truncated ? clipAtSentenceBoundary(baseContent, maxCharacters) : baseContent;
|
|
59
|
+
const content = clipped.trim();
|
|
60
|
+
const wordCount = content.length > 0 ? (0, es_toolkit_1.compact)(content.split(WORD_SPLIT_PATTERN)).length : 0;
|
|
61
|
+
return { content, truncated, totalCharacters, wordCount };
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=cleaner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cleaner.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/cleaner.ts"],"names":[],"mappings":";;AAWA,gDAOC;AAED,kDAMC;AAED,gDAUC;AAED,gDAMC;AAED,wDAeC;AAED,gDAUC;AA3ED,2CAAoC;AAEpC,MAAM,kBAAkB,GAAG,MAAM,CAAA;AASjC,SAAgB,kBAAkB,CAAC,KAAa;IAC9C,OAAO,KAAK;SACT,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC;SACzB,UAAU,CAAC,SAAS,EAAE,GAAG,CAAC;SAC1B,UAAU,CAAC,WAAW,EAAE,IAAI,CAAC;SAC7B,UAAU,CAAC,SAAS,EAAE,MAAM,CAAC;SAC7B,IAAI,EAAE,CAAA;AACX,CAAC;AAED,SAAgB,mBAAmB,CAAC,KAAa;IAC/C,OAAO,KAAK;SACT,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC;SACzB,UAAU,CAAC,SAAS,EAAE,GAAG,CAAC;SAC1B,UAAU,CAAC,WAAW,EAAE,IAAI,CAAC;SAC7B,IAAI,EAAE,CAAA;AACX,CAAC;AAED,SAAgB,kBAAkB,CAAC,KAAa;IAC9C,OAAO,KAAK;SACT,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC;SACxB,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC;SACvB,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC;SACvB,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC;SACzB,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC;SACxB,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC;SACzB,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC;SACzB,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAA;AAC9B,CAAC;AAED,SAAgB,kBAAkB,CAAC,KAAgC;IACjE,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACpD,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAA;AAC5C,CAAC;AAED,SAAgB,sBAAsB,CAAC,KAAa,EAAE,SAAiB;IACrE,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAA;IACd,CAAC;IACD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAA;IACvC,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAChC,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,EACvB,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,EACvB,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,EACvB,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAC1B,CAAA;IACD,IAAI,iBAAiB,GAAG,SAAS,GAAG,GAAG,EAAE,CAAC;QACxC,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,iBAAiB,GAAG,CAAC,CAAC,CAAA;IAC9C,CAAC;IACD,OAAO,KAAK,CAAA;AACd,CAAC;AAED,SAAgB,kBAAkB,CAChC,WAAmB,EACnB,aAAqB;IAErB,MAAM,eAAe,GAAG,WAAW,CAAC,MAAM,CAAA;IAC1C,MAAM,SAAS,GAAG,eAAe,GAAG,aAAa,CAAA;IACjD,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,sBAAsB,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC,WAAW,CAAA;IAC5F,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;IAC9B,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAA,oBAAO,EAAC,OAAO,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAC5F,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,SAAS,EAAE,CAAA;AAC3D,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.MIN_HTML_DOCUMENT_CHARACTERS_FOR_FALLBACK = exports.READABILITY_RELATIVE_THRESHOLD = exports.MIN_METADATA_DESCRIPTION_CHARACTERS = exports.MIN_READABILITY_CONTENT_CHARACTERS = exports.MIN_HTML_CONTENT_CHARACTERS = exports.BLOCKED_HTML_HINT_PATTERN = void 0;
|
|
4
|
+
exports.BLOCKED_HTML_HINT_PATTERN = /access denied|attention required|captcha|cloudflare|enable javascript|forbidden|please turn javascript on|verify you are human/i;
|
|
5
|
+
exports.MIN_HTML_CONTENT_CHARACTERS = 200;
|
|
6
|
+
exports.MIN_READABILITY_CONTENT_CHARACTERS = 200;
|
|
7
|
+
exports.MIN_METADATA_DESCRIPTION_CHARACTERS = 120;
|
|
8
|
+
exports.READABILITY_RELATIVE_THRESHOLD = 0.6;
|
|
9
|
+
exports.MIN_HTML_DOCUMENT_CHARACTERS_FOR_FALLBACK = 5000;
|
|
10
|
+
//# sourceMappingURL=constants.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/constants.ts"],"names":[],"mappings":";;;AAAa,QAAA,yBAAyB,GACpC,iIAAiI,CAAA;AACtH,QAAA,2BAA2B,GAAG,GAAG,CAAA;AACjC,QAAA,kCAAkC,GAAG,GAAG,CAAA;AACxC,QAAA,mCAAmC,GAAG,GAAG,CAAA;AACzC,QAAA,8BAA8B,GAAG,GAAG,CAAA;AACpC,QAAA,yCAAyC,GAAG,IAAI,CAAA"}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.fetchHtmlDocument = fetchHtmlDocument;
|
|
4
|
+
exports.fetchWithFirecrawl = fetchWithFirecrawl;
|
|
5
|
+
const utils_js_1 = require("../../transcript/utils.js");
|
|
6
|
+
const utils_js_2 = require("./utils.js");
|
|
7
|
+
const REQUEST_HEADERS = {
|
|
8
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
9
|
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
10
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
11
|
+
'Cache-Control': 'no-cache',
|
|
12
|
+
Pragma: 'no-cache',
|
|
13
|
+
};
|
|
14
|
+
const DEFAULT_REQUEST_TIMEOUT_MS = 5000;
|
|
15
|
+
async function fetchHtmlDocument(fetchImpl, url, { timeoutMs, onProgress, } = {}) {
|
|
16
|
+
onProgress?.({ kind: 'fetch-html-start', url });
|
|
17
|
+
const controller = new AbortController();
|
|
18
|
+
const effectiveTimeoutMs = typeof timeoutMs === 'number' && Number.isFinite(timeoutMs)
|
|
19
|
+
? timeoutMs
|
|
20
|
+
: DEFAULT_REQUEST_TIMEOUT_MS;
|
|
21
|
+
const timeout = setTimeout(() => {
|
|
22
|
+
controller.abort();
|
|
23
|
+
}, effectiveTimeoutMs);
|
|
24
|
+
try {
|
|
25
|
+
const response = await fetchImpl(url, {
|
|
26
|
+
headers: REQUEST_HEADERS,
|
|
27
|
+
redirect: 'follow',
|
|
28
|
+
signal: controller.signal,
|
|
29
|
+
});
|
|
30
|
+
if (!response.ok) {
|
|
31
|
+
throw new Error(`Failed to fetch HTML document (status ${response.status})`);
|
|
32
|
+
}
|
|
33
|
+
const contentType = response.headers.get('content-type')?.toLowerCase() ?? null;
|
|
34
|
+
if (contentType &&
|
|
35
|
+
!contentType.includes('text/html') &&
|
|
36
|
+
!contentType.includes('application/xhtml+xml') &&
|
|
37
|
+
!contentType.includes('application/xml') &&
|
|
38
|
+
!contentType.includes('text/xml') &&
|
|
39
|
+
!contentType.includes('application/rss+xml') &&
|
|
40
|
+
!contentType.includes('application/atom+xml') &&
|
|
41
|
+
!contentType.startsWith('text/')) {
|
|
42
|
+
throw new Error(`Unsupported content-type for HTML document fetch: ${contentType}`);
|
|
43
|
+
}
|
|
44
|
+
const totalBytes = (() => {
|
|
45
|
+
const raw = response.headers.get('content-length');
|
|
46
|
+
if (!raw)
|
|
47
|
+
return null;
|
|
48
|
+
const parsed = Number(raw);
|
|
49
|
+
return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : null;
|
|
50
|
+
})();
|
|
51
|
+
const body = response.body;
|
|
52
|
+
if (!body) {
|
|
53
|
+
const text = await response.text();
|
|
54
|
+
const bytes = new TextEncoder().encode(text).byteLength;
|
|
55
|
+
onProgress?.({ kind: 'fetch-html-done', url, downloadedBytes: bytes, totalBytes });
|
|
56
|
+
return text;
|
|
57
|
+
}
|
|
58
|
+
const reader = body.getReader();
|
|
59
|
+
const decoder = new TextDecoder();
|
|
60
|
+
let downloadedBytes = 0;
|
|
61
|
+
let text = '';
|
|
62
|
+
onProgress?.({ kind: 'fetch-html-progress', url, downloadedBytes: 0, totalBytes });
|
|
63
|
+
while (true) {
|
|
64
|
+
const { value, done } = await reader.read();
|
|
65
|
+
if (done)
|
|
66
|
+
break;
|
|
67
|
+
if (!value)
|
|
68
|
+
continue;
|
|
69
|
+
downloadedBytes += value.byteLength;
|
|
70
|
+
text += decoder.decode(value, { stream: true });
|
|
71
|
+
onProgress?.({ kind: 'fetch-html-progress', url, downloadedBytes, totalBytes });
|
|
72
|
+
}
|
|
73
|
+
text += decoder.decode();
|
|
74
|
+
onProgress?.({ kind: 'fetch-html-done', url, downloadedBytes, totalBytes });
|
|
75
|
+
return text;
|
|
76
|
+
}
|
|
77
|
+
catch (error) {
|
|
78
|
+
if (error instanceof DOMException && error.name === 'AbortError') {
|
|
79
|
+
throw new Error('Fetching HTML document timed out');
|
|
80
|
+
}
|
|
81
|
+
throw error;
|
|
82
|
+
}
|
|
83
|
+
finally {
|
|
84
|
+
clearTimeout(timeout);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
async function fetchWithFirecrawl(url, scrapeWithFirecrawl, options = {}) {
|
|
88
|
+
const timeoutMs = options.timeoutMs;
|
|
89
|
+
const cacheMode = options.cacheMode ?? 'default';
|
|
90
|
+
const onProgress = typeof options.onProgress === 'function' ? options.onProgress : null;
|
|
91
|
+
const reason = typeof options.reason === 'string' ? options.reason : null;
|
|
92
|
+
const diagnostics = {
|
|
93
|
+
attempted: false,
|
|
94
|
+
used: false,
|
|
95
|
+
cacheMode,
|
|
96
|
+
cacheStatus: cacheMode === 'bypass' ? 'bypassed' : 'unknown',
|
|
97
|
+
notes: null,
|
|
98
|
+
};
|
|
99
|
+
if ((0, utils_js_1.isYouTubeUrl)(url)) {
|
|
100
|
+
diagnostics.notes = (0, utils_js_2.appendNote)(diagnostics.notes, 'Skipped Firecrawl for YouTube URL');
|
|
101
|
+
return { payload: null, diagnostics };
|
|
102
|
+
}
|
|
103
|
+
if (!scrapeWithFirecrawl) {
|
|
104
|
+
diagnostics.notes = (0, utils_js_2.appendNote)(diagnostics.notes, 'Firecrawl is not configured');
|
|
105
|
+
return { payload: null, diagnostics };
|
|
106
|
+
}
|
|
107
|
+
diagnostics.attempted = true;
|
|
108
|
+
onProgress?.({ kind: 'firecrawl-start', url, reason: reason ?? 'firecrawl' });
|
|
109
|
+
try {
|
|
110
|
+
const payload = await scrapeWithFirecrawl(url, { timeoutMs, cacheMode });
|
|
111
|
+
if (!payload) {
|
|
112
|
+
diagnostics.notes = (0, utils_js_2.appendNote)(diagnostics.notes, 'Firecrawl returned no content payload');
|
|
113
|
+
onProgress?.({ kind: 'firecrawl-done', url, ok: false, markdownBytes: null, htmlBytes: null });
|
|
114
|
+
return { payload: null, diagnostics };
|
|
115
|
+
}
|
|
116
|
+
const encoder = new TextEncoder();
|
|
117
|
+
const markdownBytes = typeof payload.markdown === 'string' ? encoder.encode(payload.markdown).byteLength : null;
|
|
118
|
+
const htmlBytes = typeof payload.html === 'string' ? encoder.encode(payload.html).byteLength : null;
|
|
119
|
+
onProgress?.({ kind: 'firecrawl-done', url, ok: true, markdownBytes, htmlBytes });
|
|
120
|
+
return { payload, diagnostics };
|
|
121
|
+
}
|
|
122
|
+
catch (error) {
|
|
123
|
+
diagnostics.notes = (0, utils_js_2.appendNote)(diagnostics.notes, `Firecrawl error: ${error instanceof Error ? error.message : 'unknown error'}`);
|
|
124
|
+
onProgress?.({ kind: 'firecrawl-done', url, ok: false, markdownBytes: null, htmlBytes: null });
|
|
125
|
+
return { payload: null, diagnostics };
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
//# sourceMappingURL=fetcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetcher.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/fetcher.ts"],"names":[],"mappings":";;AA2BA,8CAsFC;AAED,gDA2DC;AA9KD,wDAAwD;AAQxD,yCAAuC;AAEvC,MAAM,eAAe,GAA2B;IAC9C,YAAY,EACV,iHAAiH;IACnH,MAAM,EACJ,kGAAkG;IACpG,iBAAiB,EAAE,gBAAgB;IACnC,eAAe,EAAE,UAAU;IAC3B,MAAM,EAAE,UAAU;CACnB,CAAA;AAED,MAAM,0BAA0B,GAAG,IAAI,CAAA;AAOhC,KAAK,UAAU,iBAAiB,CACrC,SAAuB,EACvB,GAAW,EACX,EACE,SAAS,EACT,UAAU,MACiF,EAAE;IAE/F,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,kBAAkB,EAAE,GAAG,EAAE,CAAC,CAAA;IAE/C,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAA;IACxC,MAAM,kBAAkB,GACtB,OAAO,SAAS,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;QACzD,CAAC,CAAC,SAAS;QACX,CAAC,CAAC,0BAA0B,CAAA;IAChC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE;QAC9B,UAAU,CAAC,KAAK,EAAE,CAAA;IACpB,CAAC,EAAE,kBAAkB,CAAC,CAAA;IAEtB,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,GAAG,EAAE;YACpC,OAAO,EAAE,eAAe;YACxB,QAAQ,EAAE,QAAQ;YAClB,MAAM,EAAE,UAAU,CAAC,MAAM;SAC1B,CAAC,CAAA;QAEF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,yCAAyC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAA;QAC9E,CAAC;QAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,WAAW,EAAE,IAAI,IAAI,CAAA;QAC/E,IACE,WAAW;YACX,CAAC,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC;YAClC,CAAC,WAAW,CAAC,QAAQ,CAAC,uBAAuB,CAAC;YAC9C,CAAC,WAAW,CAAC,QAAQ,CAAC,iBAAiB,CAAC;YACxC,CAAC,WAAW,CAAC,QAAQ,CAAC,UAAU,CAAC;YACjC,CAAC,WAAW,CAAC,QAAQ,CAAC,qBAAqB,CAAC;YAC5C,CAAC,WAAW,CAAC,QAAQ,CAAC,sBAAsB,CAAC;YAC7C,CAAC,WAAW,CAAC,UAAU,CAAC,OAAO,CAAC,EAChC,CAAC;YACD,MAAM,IAAI,KAAK,CAAC,qDAAqD,WAAW,EAAE,CAAC,CAAA;QACrF,CAAC;QAED,MAAM,UAAU,GAAG,CAAC,GAAG,EAAE;YACvB,MAAM,GAAG,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAA;YAClD,IAAI,CAAC,GAAG;gBAAE,OAAO,IAAI,CAAA;YACrB,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAA;YAC1B,OAAO,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;QAC1E,CAAC,CAAC,EAAE,CAAA;QAEJ,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAA;QAC1B,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;YAClC,MAAM,KAAK,GAAG,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,UAAU,CAAA;YACvD,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,GAAG,EAAE,eAAe,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAA;YAClF,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAA;QAC/B,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAA;QACjC,IAAI,eAAe,GAAG,CAAC,CAAA;QACvB,IAAI,IAAI,GAAG,EAAE,CAAA;QAEb,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,qBAAqB,EAAE,GAAG,EAAE,eAAe,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,CAAA;QAElF,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAA;YAC3C,IAAI,IAAI;gBAAE,MAAK;YACf,IAAI,CAAC,KAAK;gBAAE,SAAQ;YACpB,eAAe,IAAI,KAAK,CAAC,UAAU,CAAA;YACnC,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAA;YAC/C,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,qBAAqB,EAAE,GAAG,EAAE,eAAe,EAAE,UAAU,EAAE,CAAC,CAAA;QACjF,CAAC;QAED,IAAI,IAAI,OAAO,CAAC,MAAM,EAAE,CAAA;QACxB,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,GAAG,EAAE,eAAe,EAAE,UAAU,EAAE,CAAC,CAAA;QAC3E,OAAO,IAAI,CAAA;IACb,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,YAAY,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YACjE,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAA;QACrD,CAAC;QACD,MAAM,KAAK,CAAA;IACb,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,OAAO,CAAC,CAAA;IACvB,CAAC;AACH,CAAC;AAEM,KAAK,UAAU,kBAAkB,CACtC,GAAW,EACX,mBAA+C,EAC/C,UAKI,EAAE;IAEN,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,CAAA;IACnC,MAAM,SAAS,GAAc,OAAO,CAAC,SAAS,IAAI,SAAS,CAAA;IAC3D,MAAM,UAAU,GAAG,OAAO,OAAO,CAAC,UAAU,KAAK,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;IACvF,MAAM,MAAM,GAAG,OAAO,OAAO,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAA;IACzE,MAAM,WAAW,GAAyB;QACxC,SAAS,EAAE,KAAK;QAChB,IAAI,EAAE,KAAK;QACX,SAAS;QACT,WAAW,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;QAC5D,KAAK,EAAE,IAAI;KACZ,CAAA;IAED,IAAI,IAAA,uBAAY,EAAC,GAAG,CAAC,EAAE,CAAC;QACtB,WAAW,CAAC,KAAK,GAAG,IAAA,qBAAU,EAAC,WAAW,CAAC,KAAK,EAAE,mCAAmC,CAAC,CAAA;QACtF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IACvC,CAAC;IAED,IAAI,CAAC,mBAAmB,EAAE,CAAC;QACzB,WAAW,CAAC,KAAK,GAAG,IAAA,qBAAU,EAAC,WAAW,CAAC,KAAK,EAAE,6BAA6B,CAAC,CAAA;QAChF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IACvC,CAAC;IAED,WAAW,CAAC,SAAS,GAAG,IAAI,CAAA;IAC5B,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,iBAAiB,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,IAAI,WAAW,EAAE,CAAC,CAAA;IAE7E,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,mBAAmB,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAA;QACxE,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,WAAW,CAAC,KAAK,GAAG,IAAA,qBAAU,EAAC,WAAW,CAAC,KAAK,EAAE,uCAAuC,CAAC,CAAA;YAC1F,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,GAAG,EAAE,EAAE,EAAE,KAAK,EAAE,aAAa,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;YAC9F,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;QACvC,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAA;QACjC,MAAM,aAAa,GACjB,OAAO,OAAO,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;QAC3F,MAAM,SAAS,GACb,OAAO,OAAO,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;QACnF,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,GAAG,EAAE,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE,SAAS,EAAE,CAAC,CAAA;QAEjF,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,CAAA;IACjC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,WAAW,CAAC,KAAK,GAAG,IAAA,qBAAU,EAC5B,WAAW,CAAC,KAAK,EACjB,oBAAoB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAC/E,CAAA;QACD,UAAU,EAAE,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,GAAG,EAAE,EAAE,EAAE,KAAK,EAAE,aAAa,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;QAC9F,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IACvC,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.shouldFallbackToFirecrawl = shouldFallbackToFirecrawl;
|
|
4
|
+
exports.buildResultFromFirecrawl = buildResultFromFirecrawl;
|
|
5
|
+
const index_js_1 = require("../../transcript/index.js");
|
|
6
|
+
const article_js_1 = require("./article.js");
|
|
7
|
+
const cleaner_js_1 = require("./cleaner.js");
|
|
8
|
+
const constants_js_1 = require("./constants.js");
|
|
9
|
+
const jsonld_js_1 = require("./jsonld.js");
|
|
10
|
+
const parsers_js_1 = require("./parsers.js");
|
|
11
|
+
const podcast_utils_js_1 = require("./podcast-utils.js");
|
|
12
|
+
const utils_js_1 = require("./utils.js");
|
|
13
|
+
const video_js_1 = require("./video.js");
|
|
14
|
+
function shouldFallbackToFirecrawl(html) {
|
|
15
|
+
const plainText = (0, cleaner_js_1.normalizeForPrompt)((0, article_js_1.extractPlainText)(html));
|
|
16
|
+
if (constants_js_1.BLOCKED_HTML_HINT_PATTERN.test(plainText))
|
|
17
|
+
return true;
|
|
18
|
+
const normalized = (0, cleaner_js_1.normalizeForPrompt)((0, article_js_1.extractArticleContent)(html));
|
|
19
|
+
if (normalized.length >= constants_js_1.MIN_HTML_CONTENT_CHARACTERS) {
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
// Avoid spending Firecrawl on truly small/simple pages where the extracted HTML content is short but
|
|
23
|
+
// likely complete (e.g. https://example.com). Only treat "thin" content as a Firecrawl signal when
|
|
24
|
+
// the HTML document itself is large (SSR/app-shell pages, blocked pages without a match, etc.).
|
|
25
|
+
return html.length >= constants_js_1.MIN_HTML_DOCUMENT_CHARACTERS_FOR_FALLBACK;
|
|
26
|
+
}
|
|
27
|
+
async function buildResultFromFirecrawl({ url, payload, cacheMode, maxCharacters, youtubeTranscriptMode, firecrawlDiagnostics, markdownRequested, deps, }) {
|
|
28
|
+
const normalizedMarkdown = (0, cleaner_js_1.normalizeForPrompt)(payload.markdown ?? '');
|
|
29
|
+
if (normalizedMarkdown.length === 0) {
|
|
30
|
+
firecrawlDiagnostics.notes = (0, utils_js_1.appendNote)(firecrawlDiagnostics.notes, 'Firecrawl markdown normalization yielded empty text');
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
const jsonLd = payload.html ? (0, jsonld_js_1.extractJsonLdContent)(payload.html) : null;
|
|
34
|
+
const isPodcastJsonLd = (0, podcast_utils_js_1.isPodcastLikeJsonLdType)(jsonLd?.type);
|
|
35
|
+
const transcriptResolution = await (0, index_js_1.resolveTranscriptForLink)(url, payload.html ?? null, deps, {
|
|
36
|
+
youtubeTranscriptMode,
|
|
37
|
+
cacheMode,
|
|
38
|
+
});
|
|
39
|
+
const htmlMetadata = payload.html
|
|
40
|
+
? (0, parsers_js_1.extractMetadataFromHtml)(payload.html, url)
|
|
41
|
+
: { title: null, description: null, siteName: null };
|
|
42
|
+
const metadata = (0, parsers_js_1.extractMetadataFromFirecrawl)(payload.metadata ?? null);
|
|
43
|
+
const title = (0, utils_js_1.pickFirstText)([jsonLd?.title, metadata.title, htmlMetadata.title]);
|
|
44
|
+
const description = (0, utils_js_1.pickFirstText)([
|
|
45
|
+
jsonLd?.description,
|
|
46
|
+
metadata.description,
|
|
47
|
+
htmlMetadata.description,
|
|
48
|
+
]);
|
|
49
|
+
const siteName = (0, utils_js_1.pickFirstText)([metadata.siteName, htmlMetadata.siteName, (0, utils_js_1.safeHostname)(url)]);
|
|
50
|
+
const descriptionCandidate = description ? (0, cleaner_js_1.normalizeForPrompt)(description) : '';
|
|
51
|
+
const preferDescription = descriptionCandidate.length >= constants_js_1.MIN_METADATA_DESCRIPTION_CHARACTERS &&
|
|
52
|
+
(isPodcastJsonLd ||
|
|
53
|
+
(0, podcast_utils_js_1.isPodcastHost)(url) ||
|
|
54
|
+
normalizedMarkdown.length < constants_js_1.MIN_HTML_CONTENT_CHARACTERS ||
|
|
55
|
+
descriptionCandidate.length >= normalizedMarkdown.length * constants_js_1.READABILITY_RELATIVE_THRESHOLD);
|
|
56
|
+
const baseCandidate = preferDescription ? descriptionCandidate : normalizedMarkdown;
|
|
57
|
+
const baseContent = (0, utils_js_1.selectBaseContent)(baseCandidate, transcriptResolution.text);
|
|
58
|
+
if (baseContent.length === 0) {
|
|
59
|
+
firecrawlDiagnostics.notes = (0, utils_js_1.appendNote)(firecrawlDiagnostics.notes, 'Firecrawl produced content that normalized to an empty string');
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
firecrawlDiagnostics.used = true;
|
|
63
|
+
const transcriptDiagnostics = (0, utils_js_1.ensureTranscriptDiagnostics)(transcriptResolution, cacheMode ?? 'default');
|
|
64
|
+
const video = payload.html ? (0, video_js_1.detectPrimaryVideoFromHtml)(payload.html, url) : null;
|
|
65
|
+
const isVideoOnly = !transcriptResolution.text &&
|
|
66
|
+
normalizedMarkdown.length < constants_js_1.MIN_HTML_CONTENT_CHARACTERS &&
|
|
67
|
+
video !== null;
|
|
68
|
+
return (0, utils_js_1.finalizeExtractedLinkContent)({
|
|
69
|
+
url,
|
|
70
|
+
baseContent,
|
|
71
|
+
maxCharacters,
|
|
72
|
+
title,
|
|
73
|
+
description,
|
|
74
|
+
siteName,
|
|
75
|
+
transcriptResolution,
|
|
76
|
+
video,
|
|
77
|
+
isVideoOnly,
|
|
78
|
+
diagnostics: {
|
|
79
|
+
strategy: 'firecrawl',
|
|
80
|
+
firecrawl: firecrawlDiagnostics,
|
|
81
|
+
markdown: {
|
|
82
|
+
requested: markdownRequested,
|
|
83
|
+
used: true,
|
|
84
|
+
provider: 'firecrawl',
|
|
85
|
+
},
|
|
86
|
+
transcript: transcriptDiagnostics,
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=firecrawl.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"firecrawl.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/firecrawl.ts"],"names":[],"mappings":";;AA0BA,8DAYC;AAED,4DAmGC;AA3ID,wDAAoE;AAGpE,6CAAsE;AACtE,6CAAiD;AACjD,iDAMuB;AACvB,2CAAkD;AAClD,6CAAoF;AACpF,yDAA2E;AAE3E,yCAOmB;AACnB,yCAAuD;AAEvD,SAAgB,yBAAyB,CAAC,IAAY;IACpD,MAAM,SAAS,GAAG,IAAA,+BAAkB,EAAC,IAAA,6BAAgB,EAAC,IAAI,CAAC,CAAC,CAAA;IAC5D,IAAI,wCAAyB,CAAC,IAAI,CAAC,SAAS,CAAC;QAAE,OAAO,IAAI,CAAA;IAC1D,MAAM,UAAU,GAAG,IAAA,+BAAkB,EAAC,IAAA,kCAAqB,EAAC,IAAI,CAAC,CAAC,CAAA;IAClE,IAAI,UAAU,CAAC,MAAM,IAAI,0CAA2B,EAAE,CAAC;QACrD,OAAO,KAAK,CAAA;IACd,CAAC;IAED,qGAAqG;IACrG,mGAAmG;IACnG,gGAAgG;IAChG,OAAO,IAAI,CAAC,MAAM,IAAI,wDAAyC,CAAA;AACjE,CAAC;AAEM,KAAK,UAAU,wBAAwB,CAAC,EAC7C,GAAG,EACH,OAAO,EACP,SAAS,EACT,aAAa,EACb,qBAAqB,EACrB,oBAAoB,EACpB,iBAAiB,EACjB,IAAI,GAUL;IACC,MAAM,kBAAkB,GAAG,IAAA,+BAAkB,EAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAA;IACrE,IAAI,kBAAkB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpC,oBAAoB,CAAC,KAAK,GAAG,IAAA,qBAAU,EACrC,oBAAoB,CAAC,KAAK,EAC1B,qDAAqD,CACtD,CAAA;QACD,OAAO,IAAI,CAAA;IACb,CAAC;IAED,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAA,gCAAoB,EAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IACvE,MAAM,eAAe,GAAG,IAAA,0CAAuB,EAAC,MAAM,EAAE,IAAI,CAAC,CAAA;IAE7D,MAAM,oBAAoB,GAAG,MAAM,IAAA,mCAAwB,EAAC,GAAG,EAAE,OAAO,CAAC,IAAI,IAAI,IAAI,EAAE,IAAI,EAAE;QAC3F,qBAAqB;QACrB,SAAS;KACV,CAAC,CAAA;IACF,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI;QAC/B,CAAC,CAAC,IAAA,oCAAuB,EAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;QAC5C,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;IACtD,MAAM,QAAQ,GAAG,IAAA,yCAA4B,EAAC,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAA;IAEvE,MAAM,KAAK,GAAG,IAAA,wBAAa,EAAC,CAAC,MAAM,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC,CAAA;IAChF,MAAM,WAAW,GAAG,IAAA,wBAAa,EAAC;QAChC,MAAM,EAAE,WAAW;QACnB,QAAQ,CAAC,WAAW;QACpB,YAAY,CAAC,WAAW;KACzB,CAAC,CAAA;IACF,MAAM,QAAQ,GAAG,IAAA,wBAAa,EAAC,CAAC,QAAQ,CAAC,QAAQ,EAAE,YAAY,CAAC,QAAQ,EAAE,IAAA,uBAAY,EAAC,GAAG,CAAC,CAAC,CAAC,CAAA;IAE7F,MAAM,oBAAoB,GAAG,WAAW,CAAC,CAAC,CAAC,IAAA,+BAAkB,EAAC,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAC/E,MAAM,iBAAiB,GACrB,oBAAoB,CAAC,MAAM,IAAI,kDAAmC;QAClE,CAAC,eAAe;YACd,IAAA,gCAAa,EAAC,GAAG,CAAC;YAClB,kBAAkB,CAAC,MAAM,GAAG,0CAA2B;YACvD,oBAAoB,CAAC,MAAM,IAAI,kBAAkB,CAAC,MAAM,GAAG,6CAA8B,CAAC,CAAA;IAC9F,MAAM,aAAa,GAAG,iBAAiB,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,kBAAkB,CAAA;IACnF,MAAM,WAAW,GAAG,IAAA,4BAAiB,EAAC,aAAa,EAAE,oBAAoB,CAAC,IAAI,CAAC,CAAA;IAC/E,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,oBAAoB,CAAC,KAAK,GAAG,IAAA,qBAAU,EACrC,oBAAoB,CAAC,KAAK,EAC1B,+DAA+D,CAChE,CAAA;QACD,OAAO,IAAI,CAAA;IACb,CAAC;IAED,oBAAoB,CAAC,IAAI,GAAG,IAAI,CAAA;IAEhC,MAAM,qBAAqB,GAAG,IAAA,sCAA2B,EACvD,oBAAoB,EACpB,SAAS,IAAI,SAAS,CACvB,CAAA;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAA,qCAA0B,EAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IACjF,MAAM,WAAW,GACf,CAAC,oBAAoB,CAAC,IAAI;QAC1B,kBAAkB,CAAC,MAAM,GAAG,0CAA2B;QACvD,KAAK,KAAK,IAAI,CAAA;IAEhB,OAAO,IAAA,uCAA4B,EAAC;QAClC,GAAG;QACH,WAAW;QACX,aAAa;QACb,KAAK;QACL,WAAW;QACX,QAAQ;QACR,oBAAoB;QACpB,KAAK;QACL,WAAW;QACX,WAAW,EAAE;YACX,QAAQ,EAAE,WAAW;YACrB,SAAS,EAAE,oBAAoB;YAC/B,QAAQ,EAAE;gBACR,SAAS,EAAE,iBAAiB;gBAC5B,IAAI,EAAE,IAAI;gBACV,QAAQ,EAAE,WAAW;aACtB;YACD,UAAU,EAAE,qBAAqB;SAClC;KACF,CAAC,CAAA;AACJ,CAAC"}
|