@steipete/summarize-core 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/cjs/content/index.js +14 -0
- package/dist/cjs/content/index.js.map +1 -0
- package/dist/cjs/content/link-preview/client.js +31 -0
- package/dist/cjs/content/link-preview/client.js.map +1 -0
- package/dist/cjs/content/link-preview/content/article.js +164 -0
- package/dist/cjs/content/link-preview/content/article.js.map +1 -0
- package/dist/cjs/content/link-preview/content/cleaner.js +63 -0
- package/dist/cjs/content/link-preview/content/cleaner.js.map +1 -0
- package/dist/cjs/content/link-preview/content/constants.js +10 -0
- package/dist/cjs/content/link-preview/content/constants.js.map +1 -0
- package/dist/cjs/content/link-preview/content/fetcher.js +128 -0
- package/dist/cjs/content/link-preview/content/fetcher.js.map +1 -0
- package/dist/cjs/content/link-preview/content/firecrawl.js +90 -0
- package/dist/cjs/content/link-preview/content/firecrawl.js.map +1 -0
- package/dist/cjs/content/link-preview/content/html.js +165 -0
- package/dist/cjs/content/link-preview/content/html.js.map +1 -0
- package/dist/cjs/content/link-preview/content/index.js +348 -0
- package/dist/cjs/content/link-preview/content/index.js.map +1 -0
- package/dist/cjs/content/link-preview/content/jsonld.js +80 -0
- package/dist/cjs/content/link-preview/content/jsonld.js.map +1 -0
- package/dist/cjs/content/link-preview/content/parsers.js +81 -0
- package/dist/cjs/content/link-preview/content/parsers.js.map +1 -0
- package/dist/cjs/content/link-preview/content/podcast-utils.js +85 -0
- package/dist/cjs/content/link-preview/content/podcast-utils.js.map +1 -0
- package/dist/cjs/content/link-preview/content/readability.js +90 -0
- package/dist/cjs/content/link-preview/content/readability.js.map +1 -0
- package/dist/cjs/content/link-preview/content/twitter-utils.js +74 -0
- package/dist/cjs/content/link-preview/content/twitter-utils.js.map +1 -0
- package/dist/cjs/content/link-preview/content/types.js +7 -0
- package/dist/cjs/content/link-preview/content/types.js.map +1 -0
- package/dist/cjs/content/link-preview/content/utils.js +177 -0
- package/dist/cjs/content/link-preview/content/utils.js.map +1 -0
- package/dist/cjs/content/link-preview/content/video.js +99 -0
- package/dist/cjs/content/link-preview/content/video.js.map +1 -0
- package/dist/cjs/content/link-preview/content/youtube.js +85 -0
- package/dist/cjs/content/link-preview/content/youtube.js.map +1 -0
- package/dist/cjs/content/link-preview/deps.js +23 -0
- package/dist/cjs/content/link-preview/deps.js.map +1 -0
- package/dist/cjs/content/link-preview/fetch-with-timeout.js +38 -0
- package/dist/cjs/content/link-preview/fetch-with-timeout.js.map +1 -0
- package/dist/cjs/content/link-preview/types.js +5 -0
- package/dist/cjs/content/link-preview/types.js.map +1 -0
- package/dist/cjs/content/transcript/cache.js +85 -0
- package/dist/cjs/content/transcript/cache.js.map +1 -0
- package/dist/cjs/content/transcript/index.js +134 -0
- package/dist/cjs/content/transcript/index.js.map +1 -0
- package/dist/cjs/content/transcript/normalize.js +49 -0
- package/dist/cjs/content/transcript/normalize.js.map +1 -0
- package/dist/cjs/content/transcript/providers/generic.js +16 -0
- package/dist/cjs/content/transcript/providers/generic.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/apple-flow.js +226 -0
- package/dist/cjs/content/transcript/providers/podcast/apple-flow.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/apple.js +43 -0
- package/dist/cjs/content/transcript/providers/podcast/apple.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/constants.js +11 -0
- package/dist/cjs/content/transcript/providers/podcast/constants.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/flow-context.js +3 -0
- package/dist/cjs/content/transcript/providers/podcast/flow-context.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/itunes.js +139 -0
- package/dist/cjs/content/transcript/providers/podcast/itunes.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/json.js +43 -0
- package/dist/cjs/content/transcript/providers/podcast/json.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/media.js +355 -0
- package/dist/cjs/content/transcript/providers/podcast/media.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/results.js +32 -0
- package/dist/cjs/content/transcript/providers/podcast/results.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/rss.js +262 -0
- package/dist/cjs/content/transcript/providers/podcast/rss.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/spotify-flow.js +221 -0
- package/dist/cjs/content/transcript/providers/podcast/spotify-flow.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast/spotify.js +119 -0
- package/dist/cjs/content/transcript/providers/podcast/spotify.js.map +1 -0
- package/dist/cjs/content/transcript/providers/podcast.js +260 -0
- package/dist/cjs/content/transcript/providers/podcast.js.map +1 -0
- package/dist/cjs/content/transcript/providers/youtube/api.js +264 -0
- package/dist/cjs/content/transcript/providers/youtube/api.js.map +1 -0
- package/dist/cjs/content/transcript/providers/youtube/apify.js +59 -0
- package/dist/cjs/content/transcript/providers/youtube/apify.js.map +1 -0
- package/dist/cjs/content/transcript/providers/youtube/captions.js +413 -0
- package/dist/cjs/content/transcript/providers/youtube/captions.js.map +1 -0
- package/dist/cjs/content/transcript/providers/youtube/yt-dlp.js +170 -0
- package/dist/cjs/content/transcript/providers/youtube/yt-dlp.js.map +1 -0
- package/dist/cjs/content/transcript/providers/youtube.js +178 -0
- package/dist/cjs/content/transcript/providers/youtube.js.map +1 -0
- package/dist/cjs/content/transcript/types.js +3 -0
- package/dist/cjs/content/transcript/types.js.map +1 -0
- package/dist/cjs/content/transcript/utils.js +303 -0
- package/dist/cjs/content/transcript/utils.js.map +1 -0
- package/dist/cjs/index.js +22 -0
- package/dist/cjs/index.js.map +1 -0
- package/dist/cjs/language.js +132 -0
- package/dist/cjs/language.js.map +1 -0
- package/dist/cjs/package.json +3 -0
- package/dist/cjs/prompts/cli.js +23 -0
- package/dist/cjs/prompts/cli.js.map +1 -0
- package/dist/cjs/prompts/file.js +52 -0
- package/dist/cjs/prompts/file.js.map +1 -0
- package/dist/cjs/prompts/index.js +14 -0
- package/dist/cjs/prompts/index.js.map +1 -0
- package/dist/cjs/prompts/link-summary.js +122 -0
- package/dist/cjs/prompts/link-summary.js.map +1 -0
- package/dist/cjs/shared/contracts.js +5 -0
- package/dist/cjs/shared/contracts.js.map +1 -0
- package/dist/cjs/transcription/whisper/constants.js +11 -0
- package/dist/cjs/transcription/whisper/constants.js.map +1 -0
- package/dist/cjs/transcription/whisper/core.js +307 -0
- package/dist/cjs/transcription/whisper/core.js.map +1 -0
- package/dist/cjs/transcription/whisper/fal.js +44 -0
- package/dist/cjs/transcription/whisper/fal.js.map +1 -0
- package/dist/cjs/transcription/whisper/ffmpeg.js +187 -0
- package/dist/cjs/transcription/whisper/ffmpeg.js.map +1 -0
- package/dist/cjs/transcription/whisper/openai.js +51 -0
- package/dist/cjs/transcription/whisper/openai.js.map +1 -0
- package/dist/cjs/transcription/whisper/types.js +3 -0
- package/dist/cjs/transcription/whisper/types.js.map +1 -0
- package/dist/cjs/transcription/whisper/utils.js +70 -0
- package/dist/cjs/transcription/whisper/utils.js.map +1 -0
- package/dist/cjs/transcription/whisper/whisper-cpp.js +232 -0
- package/dist/cjs/transcription/whisper/whisper-cpp.js.map +1 -0
- package/dist/cjs/transcription/whisper.js +15 -0
- package/dist/cjs/transcription/whisper.js.map +1 -0
- package/package.json +15 -12
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.extractMetadataFromHtml = extractMetadataFromHtml;
|
|
4
|
+
exports.extractMetadataFromFirecrawl = extractMetadataFromFirecrawl;
|
|
5
|
+
const cheerio_1 = require("cheerio");
|
|
6
|
+
const cleaner_js_1 = require("./cleaner.js");
|
|
7
|
+
const utils_js_1 = require("./utils.js");
|
|
8
|
+
const ALLOWED_TEXT_TAGS = new Set(['title']);
|
|
9
|
+
function extractMetadataFromHtml(html, url) {
|
|
10
|
+
const $ = (0, cheerio_1.load)(html);
|
|
11
|
+
const title = (0, utils_js_1.pickFirstText)([
|
|
12
|
+
pickMetaContent($, [
|
|
13
|
+
{ attribute: 'property', value: 'og:title' },
|
|
14
|
+
{ attribute: 'name', value: 'og:title' },
|
|
15
|
+
{ attribute: 'name', value: 'twitter:title' },
|
|
16
|
+
]),
|
|
17
|
+
extractTagText($, 'title'),
|
|
18
|
+
]);
|
|
19
|
+
const description = (0, utils_js_1.pickFirstText)([
|
|
20
|
+
pickMetaContent($, [
|
|
21
|
+
{ attribute: 'property', value: 'og:description' },
|
|
22
|
+
{ attribute: 'name', value: 'description' },
|
|
23
|
+
{ attribute: 'name', value: 'twitter:description' },
|
|
24
|
+
]),
|
|
25
|
+
]);
|
|
26
|
+
const siteName = (0, utils_js_1.pickFirstText)([
|
|
27
|
+
pickMetaContent($, [
|
|
28
|
+
{ attribute: 'property', value: 'og:site_name' },
|
|
29
|
+
{ attribute: 'name', value: 'application-name' },
|
|
30
|
+
]),
|
|
31
|
+
(0, utils_js_1.safeHostname)(url),
|
|
32
|
+
]);
|
|
33
|
+
return { title, description, siteName };
|
|
34
|
+
}
|
|
35
|
+
function extractMetadataFromFirecrawl(metadata) {
|
|
36
|
+
return {
|
|
37
|
+
title: (0, utils_js_1.pickFirstText)([metadataString(metadata, 'title'), metadataString(metadata, 'ogTitle')]),
|
|
38
|
+
description: (0, utils_js_1.pickFirstText)([
|
|
39
|
+
metadataString(metadata, 'description'),
|
|
40
|
+
metadataString(metadata, 'ogDescription'),
|
|
41
|
+
]),
|
|
42
|
+
siteName: (0, utils_js_1.pickFirstText)([
|
|
43
|
+
metadataString(metadata, 'siteName'),
|
|
44
|
+
metadataString(metadata, 'ogSiteName'),
|
|
45
|
+
]),
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
function pickMetaContent($, selectors) {
|
|
49
|
+
for (const selector of selectors) {
|
|
50
|
+
const meta = $(`meta[${selector.attribute}="${selector.value}"]`).first();
|
|
51
|
+
if (meta.length === 0) {
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
const value = meta.attr('content') ?? meta.attr('value') ?? '';
|
|
55
|
+
const normalized = (0, cleaner_js_1.normalizeCandidate)((0, cleaner_js_1.decodeHtmlEntities)(value));
|
|
56
|
+
if (normalized) {
|
|
57
|
+
return normalized;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
function extractTagText($, tagName) {
|
|
63
|
+
const normalizedTag = tagName.trim().toLowerCase();
|
|
64
|
+
if (!ALLOWED_TEXT_TAGS.has(normalizedTag)) {
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
const element = $(normalizedTag).first();
|
|
68
|
+
if (element.length === 0) {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
const text = (0, cleaner_js_1.decodeHtmlEntities)(element.text());
|
|
72
|
+
return (0, cleaner_js_1.normalizeCandidate)(text);
|
|
73
|
+
}
|
|
74
|
+
function metadataString(metadata, key) {
|
|
75
|
+
if (!metadata) {
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
const value = metadata[key];
|
|
79
|
+
return typeof value === 'string' ? (0, cleaner_js_1.normalizeCandidate)(value) : null;
|
|
80
|
+
}
|
|
81
|
+
//# sourceMappingURL=parsers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parsers.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/parsers.ts"],"names":[],"mappings":";;AAkBA,0DA6BC;AAED,oEAcC;AA/DD,qCAA+C;AAE/C,6CAAqE;AACrE,yCAAwD;AAExD,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAA;AAa5C,SAAgB,uBAAuB,CAAC,IAAY,EAAE,GAAW;IAC/D,MAAM,CAAC,GAAG,IAAA,cAAI,EAAC,IAAI,CAAC,CAAA;IAEpB,MAAM,KAAK,GAAG,IAAA,wBAAa,EAAC;QAC1B,eAAe,CAAC,CAAC,EAAE;YACjB,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,UAAU,EAAE;YAC5C,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE;YACxC,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,eAAe,EAAE;SAC9C,CAAC;QACF,cAAc,CAAC,CAAC,EAAE,OAAO,CAAC;KAC3B,CAAC,CAAA;IAEF,MAAM,WAAW,GAAG,IAAA,wBAAa,EAAC;QAChC,eAAe,CAAC,CAAC,EAAE;YACjB,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,gBAAgB,EAAE;YAClD,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE;YAC3C,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,qBAAqB,EAAE;SACpD,CAAC;KACH,CAAC,CAAA;IAEF,MAAM,QAAQ,GAAG,IAAA,wBAAa,EAAC;QAC7B,eAAe,CAAC,CAAC,EAAE;YACjB,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,cAAc,EAAE;YAChD,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,kBAAkB,EAAE;SACjD,CAAC;QACF,IAAA,uBAAY,EAAC,GAAG,CAAC;KAClB,CAAC,CAAA;IAEF,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAA;AACzC,CAAC;AAED,SAAgB,4BAA4B,CAC1C,QAAoD;IAEpD,OAAO;QACL,KAAK,EAAE,IAAA,wBAAa,EAAC,CAAC,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC;QAC9F,WAAW,EAAE,IAAA,wBAAa,EAAC;YACzB,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC;YACvC,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC;SAC1C,CAAC;QACF,QAAQ,EAAE,IAAA,wBAAa,EAAC;YACtB,cAAc,CAAC,QAAQ,EAAE,UAAU,CAAC;YACpC,cAAc,CAAC,QAAQ,EAAE,YAAY,CAAC;SACvC,CAAC;KACH,CAAA;AACH,CAAC;AAED,SAAS,eAAe,CAAC,CAAa,EAAE,SAAyB;IAC/D,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,QAAQ,CAAC,SAAS,KAAK,QAAQ,CAAC,KAAK,IAAI,CAAC,CAAC,KAAK,EAAE,CAAA;QACzE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,SAAQ;QACV,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;QAC9D,MAAM,UAAU,GAAG,IAAA,+BAAkB,EAAC,IAAA,+BAAkB,EAAC,KAAK,CAAC,CAAC,CAAA;QAChE,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,cAAc,CAAC,CAAa,EAAE,OAAe;IACpD,MAAM,aAAa,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAClD,IAAI,CAAC,iBAAiB,CAAC,GAAG,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1C,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,OAAO,GAAG,CAAC,CAAC,aAAa,CAAC,CAAC,KAAK,EAAE,CAAA;IACxC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,IAAI,GAAG,IAAA,+BAAkB,EAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAA;IAC/C,OAAO,IAAA,+BAAkB,EAAC,IAAI,CAAC,CAAA;AACjC,CAAC;AAED,SAAS,cAAc,CACrB,QAAoD,EACpD,GAAW;IAEX,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;IAC3B,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAA,+BAAkB,EAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;AACrE,CAAC"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.extractSpotifyEpisodeId = extractSpotifyEpisodeId;
|
|
4
|
+
exports.extractApplePodcastIds = extractApplePodcastIds;
|
|
5
|
+
exports.isPodcastLikeJsonLdType = isPodcastLikeJsonLdType;
|
|
6
|
+
exports.isPodcastHost = isPodcastHost;
|
|
7
|
+
const PODCAST_HOST_SUFFIXES = [
|
|
8
|
+
'spotify.com',
|
|
9
|
+
'podcasts.apple.com',
|
|
10
|
+
'podchaser.com',
|
|
11
|
+
'podbean.com',
|
|
12
|
+
'buzzsprout.com',
|
|
13
|
+
'spreaker.com',
|
|
14
|
+
'simplecast.com',
|
|
15
|
+
'rss.com',
|
|
16
|
+
'libsyn.com',
|
|
17
|
+
'omny.fm',
|
|
18
|
+
'acast.com',
|
|
19
|
+
'transistor.fm',
|
|
20
|
+
'captivate.fm',
|
|
21
|
+
'soundcloud.com',
|
|
22
|
+
'ivoox.com',
|
|
23
|
+
'iheart.com',
|
|
24
|
+
'megaphone.fm',
|
|
25
|
+
'pca.st',
|
|
26
|
+
'player.fm',
|
|
27
|
+
'castbox.fm',
|
|
28
|
+
];
|
|
29
|
+
function extractSpotifyEpisodeId(url) {
|
|
30
|
+
try {
|
|
31
|
+
const parsed = new URL(url);
|
|
32
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
33
|
+
if (!host.endsWith('spotify.com'))
|
|
34
|
+
return null;
|
|
35
|
+
const parts = parsed.pathname.split('/').filter(Boolean);
|
|
36
|
+
const idx = parts.indexOf('episode');
|
|
37
|
+
const id = idx >= 0 ? parts[idx + 1] : null;
|
|
38
|
+
return id && /^[A-Za-z0-9]+$/.test(id) ? id : null;
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
function extractApplePodcastIds(url) {
|
|
45
|
+
try {
|
|
46
|
+
const parsed = new URL(url);
|
|
47
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
48
|
+
if (host !== 'podcasts.apple.com')
|
|
49
|
+
return null;
|
|
50
|
+
const showId = parsed.pathname.match(/\/id(\d+)(?:\/|$)/)?.[1] ?? null;
|
|
51
|
+
if (!showId)
|
|
52
|
+
return null;
|
|
53
|
+
const episodeIdRaw = parsed.searchParams.get('i');
|
|
54
|
+
const episodeId = episodeIdRaw && /^\d+$/.test(episodeIdRaw) ? episodeIdRaw : null;
|
|
55
|
+
return { showId, episodeId };
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
function isPodcastLikeJsonLdType(type) {
|
|
62
|
+
if (!type)
|
|
63
|
+
return false;
|
|
64
|
+
const normalized = type.toLowerCase();
|
|
65
|
+
if (normalized.includes('podcast'))
|
|
66
|
+
return true;
|
|
67
|
+
return (normalized === 'audioobject' ||
|
|
68
|
+
normalized === 'episode' ||
|
|
69
|
+
normalized === 'radioepisode' ||
|
|
70
|
+
normalized === 'musicrecording');
|
|
71
|
+
}
|
|
72
|
+
function isPodcastHost(url) {
|
|
73
|
+
try {
|
|
74
|
+
const parsed = new URL(url);
|
|
75
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
76
|
+
if (host.startsWith('music.amazon.') && parsed.pathname.includes('/podcasts/')) {
|
|
77
|
+
return true;
|
|
78
|
+
}
|
|
79
|
+
return PODCAST_HOST_SUFFIXES.some((suffix) => host === suffix || host.endsWith(`.${suffix}`));
|
|
80
|
+
}
|
|
81
|
+
catch {
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=podcast-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"podcast-utils.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/podcast-utils.ts"],"names":[],"mappings":";;AAuBA,0DAYC;AAED,wDAeC;AAED,0DAUC;AAED,sCAWC;AA7ED,MAAM,qBAAqB,GAAG;IAC5B,aAAa;IACb,oBAAoB;IACpB,eAAe;IACf,aAAa;IACb,gBAAgB;IAChB,cAAc;IACd,gBAAgB;IAChB,SAAS;IACT,YAAY;IACZ,SAAS;IACT,WAAW;IACX,eAAe;IACf,cAAc;IACd,gBAAgB;IAChB,WAAW;IACX,YAAY;IACZ,cAAc;IACd,QAAQ;IACR,WAAW;IACX,YAAY;CACb,CAAA;AAED,SAAgB,uBAAuB,CAAC,GAAW;IACjD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC;YAAE,OAAO,IAAI,CAAA;QAC9C,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACxD,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;QACpC,MAAM,EAAE,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;QAC3C,OAAO,EAAE,IAAI,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;IACpD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAgB,sBAAsB,CACpC,GAAW;IAEX,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,IAAI,KAAK,oBAAoB;YAAE,OAAO,IAAI,CAAA;QAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAA;QACtE,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAA;QACxB,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;QACjD,MAAM,SAAS,GAAG,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAA;QAClF,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,CAAA;IAC9B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAgB,uBAAuB,CAAC,IAA+B;IACrE,IAAI,CAAC,IAAI;QAAE,OAAO,KAAK,CAAA;IACvB,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAA;IACrC,IAAI,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC;QAAE,OAAO,IAAI,CAAA;IAC/C,OAAO,CACL,UAAU,KAAK,aAAa;QAC5B,UAAU,KAAK,SAAS;QACxB,UAAU,KAAK,cAAc;QAC7B,UAAU,KAAK,gBAAgB,CAChC,CAAA;AACH,CAAC;AAED,SAAgB,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,IAAI,CAAC,UAAU,CAAC,eAAe,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAA;QACb,CAAC;QACD,OAAO,qBAAqB,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,KAAK,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,MAAM,EAAE,CAAC,CAAC,CAAA;IAC/F,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAA;IACd,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.extractReadabilityFromHtml = extractReadabilityFromHtml;
|
|
37
|
+
exports.toReadabilityHtml = toReadabilityHtml;
|
|
38
|
+
async function extractReadabilityFromHtml(html, url) {
|
|
39
|
+
try {
|
|
40
|
+
const cleanedHtml = stripCssFromHtml(html);
|
|
41
|
+
const { Readability } = await Promise.resolve().then(() => __importStar(require('@mozilla/readability')));
|
|
42
|
+
const { JSDOM, VirtualConsole } = await Promise.resolve().then(() => __importStar(require('jsdom')));
|
|
43
|
+
const virtualConsole = new VirtualConsole();
|
|
44
|
+
virtualConsole.on('jsdomError', (err) => {
|
|
45
|
+
const message = err && typeof err === 'object' && 'message' in err
|
|
46
|
+
? String(err.message ?? '')
|
|
47
|
+
: '';
|
|
48
|
+
if (message.includes('Could not parse CSS stylesheet'))
|
|
49
|
+
return;
|
|
50
|
+
console.error(err);
|
|
51
|
+
});
|
|
52
|
+
const dom = new JSDOM(cleanedHtml, { ...(url ? { url } : undefined), virtualConsole });
|
|
53
|
+
const reader = new Readability(dom.window.document);
|
|
54
|
+
const article = reader.parse();
|
|
55
|
+
if (!article)
|
|
56
|
+
return null;
|
|
57
|
+
const text = (article.textContent ?? '').replace(/\s+/g, ' ').trim();
|
|
58
|
+
return {
|
|
59
|
+
text,
|
|
60
|
+
html: article.content ?? null,
|
|
61
|
+
title: article.title ?? null,
|
|
62
|
+
excerpt: article.excerpt ?? null,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
function toReadabilityHtml(result) {
|
|
70
|
+
if (!result)
|
|
71
|
+
return null;
|
|
72
|
+
if (result.html)
|
|
73
|
+
return result.html;
|
|
74
|
+
if (!result.text)
|
|
75
|
+
return null;
|
|
76
|
+
return `<article><p>${escapeHtml(result.text)}</p></article>`;
|
|
77
|
+
}
|
|
78
|
+
function escapeHtml(input) {
|
|
79
|
+
return input
|
|
80
|
+
.replaceAll('&', '&')
|
|
81
|
+
.replaceAll('<', '<')
|
|
82
|
+
.replaceAll('>', '>')
|
|
83
|
+
.replaceAll('"', '"')
|
|
84
|
+
.replaceAll("'", ''');
|
|
85
|
+
}
|
|
86
|
+
function stripCssFromHtml(html) {
|
|
87
|
+
// Readability doesn't need CSS; jsdom's CSS parsing can be extremely slow on some pages.
|
|
88
|
+
return html.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, '');
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=readability.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"readability.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/readability.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAOA,gEAiCC;AAED,8CAKC;AAxCM,KAAK,UAAU,0BAA0B,CAC9C,IAAY,EACZ,GAAY;IAEZ,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAA;QAC1C,MAAM,EAAE,WAAW,EAAE,GAAG,wDAAa,sBAAsB,GAAC,CAAA;QAC5D,MAAM,EAAE,KAAK,EAAE,cAAc,EAAE,GAAG,wDAAa,OAAO,GAAC,CAAA;QACvD,MAAM,cAAc,GAAG,IAAI,cAAc,EAAE,CAAA;QAC3C,cAAc,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,GAAG,EAAE,EAAE;YACtC,MAAM,OAAO,GACX,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,SAAS,IAAI,GAAG;gBAChD,CAAC,CAAC,MAAM,CAAE,GAA6B,CAAC,OAAO,IAAI,EAAE,CAAC;gBACtD,CAAC,CAAC,EAAE,CAAA;YACR,IAAI,OAAO,CAAC,QAAQ,CAAC,gCAAgC,CAAC;gBAAE,OAAM;YAC9D,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QACpB,CAAC,CAAC,CAAA;QAEF,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,EAAE,cAAc,EAAE,CAAC,CAAA;QACtF,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;QACnD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAA;QAC9B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAA;QAEzB,MAAM,IAAI,GAAG,CAAC,OAAO,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;QACpE,OAAO;YACL,IAAI;YACJ,IAAI,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI;YAC7B,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,IAAI;YAC5B,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI;SACjC,CAAA;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAgB,iBAAiB,CAAC,MAAgC;IAChE,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,IAAI,MAAM,CAAC,IAAI;QAAE,OAAO,MAAM,CAAC,IAAI,CAAA;IACnC,IAAI,CAAC,MAAM,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IAC7B,OAAO,eAAe,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAA;AAC/D,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,OAAO,KAAK;SACT,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC;SACxB,UAAU,CAAC,GAAG,EAAE,MAAM,CAAC;SACvB,UAAU,CAAC,GAAG,EAAE,MAAM,CAAC;SACvB,UAAU,CAAC,GAAG,EAAE,QAAQ,CAAC;SACzB,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAA;AAC7B,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,yFAAyF;IACzF,OAAO,IAAI,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAA;AAC9D,CAAC"}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.isTwitterStatusUrl = isTwitterStatusUrl;
|
|
4
|
+
exports.toNitterUrls = toNitterUrls;
|
|
5
|
+
exports.isBlockedTwitterContent = isBlockedTwitterContent;
|
|
6
|
+
exports.isAnubisHtml = isAnubisHtml;
|
|
7
|
+
const TWITTER_HOSTS = new Set(['x.com', 'twitter.com', 'mobile.twitter.com']);
|
|
8
|
+
const NITTER_HOSTS = [
|
|
9
|
+
'nitter.net',
|
|
10
|
+
'nitter.poast.org',
|
|
11
|
+
'nitter.catsarch.com',
|
|
12
|
+
'nitter.privacydev.net',
|
|
13
|
+
'nitter.1d4.us',
|
|
14
|
+
];
|
|
15
|
+
const TWITTER_BLOCKED_TEXT_PATTERN = /something went wrong|try again|privacy related extensions|please disable them and try again/i;
|
|
16
|
+
const ANUBIS_TOKENS = ['anubis', 'proof-of-work', 'proof of work', 'hashcash', 'jshelter'];
|
|
17
|
+
function isTwitterStatusUrl(url) {
|
|
18
|
+
try {
|
|
19
|
+
const parsed = new URL(url);
|
|
20
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
21
|
+
if (!TWITTER_HOSTS.has(host))
|
|
22
|
+
return false;
|
|
23
|
+
return /\/status\/\d+/.test(parsed.pathname);
|
|
24
|
+
}
|
|
25
|
+
catch {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
function rotateHosts(values, seed) {
|
|
30
|
+
if (values.length <= 1)
|
|
31
|
+
return values.slice();
|
|
32
|
+
const offset = Math.abs(seed) % values.length;
|
|
33
|
+
return values.slice(offset).concat(values.slice(0, offset));
|
|
34
|
+
}
|
|
35
|
+
function hashSeed(input) {
|
|
36
|
+
let hash = 0;
|
|
37
|
+
for (let i = 0; i < input.length; i += 1) {
|
|
38
|
+
hash = (hash * 31 + input.charCodeAt(i)) | 0;
|
|
39
|
+
}
|
|
40
|
+
return hash;
|
|
41
|
+
}
|
|
42
|
+
function toNitterUrls(url) {
|
|
43
|
+
try {
|
|
44
|
+
const parsed = new URL(url);
|
|
45
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
46
|
+
if (!TWITTER_HOSTS.has(host))
|
|
47
|
+
return [];
|
|
48
|
+
const seed = hashSeed(`${parsed.pathname}${parsed.search}`);
|
|
49
|
+
const rotated = rotateHosts(NITTER_HOSTS, seed);
|
|
50
|
+
return rotated.map((nitterHost) => {
|
|
51
|
+
const copy = new URL(parsed.toString());
|
|
52
|
+
copy.hostname = nitterHost;
|
|
53
|
+
copy.protocol = 'https:';
|
|
54
|
+
return copy.toString();
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
return [];
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
function isBlockedTwitterContent(content) {
|
|
62
|
+
if (!content)
|
|
63
|
+
return false;
|
|
64
|
+
return TWITTER_BLOCKED_TEXT_PATTERN.test(content);
|
|
65
|
+
}
|
|
66
|
+
function isAnubisHtml(html) {
|
|
67
|
+
if (!html)
|
|
68
|
+
return false;
|
|
69
|
+
const lower = html.toLowerCase();
|
|
70
|
+
if (!lower.includes('anubis'))
|
|
71
|
+
return false;
|
|
72
|
+
return ANUBIS_TOKENS.some((token) => lower.includes(token));
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=twitter-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"twitter-utils.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/twitter-utils.ts"],"names":[],"mappings":";;AAYA,gDASC;AAgBD,oCAgBC;AAED,0DAGC;AAED,oCAKC;AAjED,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,aAAa,EAAE,oBAAoB,CAAC,CAAC,CAAA;AAC7E,MAAM,YAAY,GAAG;IACnB,YAAY;IACZ,kBAAkB;IAClB,qBAAqB;IACrB,uBAAuB;IACvB,eAAe;CAChB,CAAA;AACD,MAAM,4BAA4B,GAChC,8FAA8F,CAAA;AAChG,MAAM,aAAa,GAAG,CAAC,QAAQ,EAAE,eAAe,EAAE,eAAe,EAAE,UAAU,EAAE,UAAU,CAAC,CAAA;AAE1F,SAAgB,kBAAkB,CAAC,GAAW;IAC5C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAA;QAC1C,OAAO,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAA;IACd,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAI,MAAW,EAAE,IAAY;IAC/C,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC,KAAK,EAAE,CAAA;IAC7C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IAC7C,OAAO,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAA;AAC7D,CAAC;AAED,SAAS,QAAQ,CAAC,KAAa;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,IAAI,GAAG,CAAC,IAAI,GAAG,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IAC9C,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAgB,YAAY,CAAC,GAAW;IACtC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,OAAO,EAAE,CAAA;QACvC,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAA;QAC3D,MAAM,OAAO,GAAG,WAAW,CAAC,YAAY,EAAE,IAAI,CAAC,CAAA;QAC/C,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,UAAU,EAAE,EAAE;YAChC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAA;YACvC,IAAI,CAAC,QAAQ,GAAG,UAAU,CAAA;YAC1B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAA;YACxB,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAA;QACxB,CAAC,CAAC,CAAA;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC;AAED,SAAgB,uBAAuB,CAAC,OAAe;IACrD,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAA;IAC1B,OAAO,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;AACnD,CAAC;AAED,SAAgB,YAAY,CAAC,IAAY;IACvC,IAAI,CAAC,IAAI;QAAE,OAAO,KAAK,CAAA;IACvB,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAA;IAChC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAAE,OAAO,KAAK,CAAA;IAC3C,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAA;AAC7D,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.DEFAULT_CACHE_MODE = exports.DEFAULT_MAX_CONTENT_CHARACTERS = exports.DEFAULT_TIMEOUT_MS = void 0;
|
|
4
|
+
exports.DEFAULT_TIMEOUT_MS = 120_000;
|
|
5
|
+
exports.DEFAULT_MAX_CONTENT_CHARACTERS = 8000;
|
|
6
|
+
exports.DEFAULT_CACHE_MODE = 'default';
|
|
7
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/types.ts"],"names":[],"mappings":";;;AAOa,QAAA,kBAAkB,GAAG,OAAO,CAAA;AAC5B,QAAA,8BAA8B,GAAG,IAAI,CAAA;AACrC,QAAA,kBAAkB,GAAc,SAAS,CAAA"}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.resolveCacheMode = resolveCacheMode;
|
|
4
|
+
exports.resolveMaxCharacters = resolveMaxCharacters;
|
|
5
|
+
exports.resolveTimeoutMs = resolveTimeoutMs;
|
|
6
|
+
exports.resolveFirecrawlMode = resolveFirecrawlMode;
|
|
7
|
+
exports.appendNote = appendNote;
|
|
8
|
+
exports.safeHostname = safeHostname;
|
|
9
|
+
exports.pickFirstText = pickFirstText;
|
|
10
|
+
exports.selectBaseContent = selectBaseContent;
|
|
11
|
+
exports.summarizeTranscript = summarizeTranscript;
|
|
12
|
+
exports.ensureTranscriptDiagnostics = ensureTranscriptDiagnostics;
|
|
13
|
+
exports.finalizeExtractedLinkContent = finalizeExtractedLinkContent;
|
|
14
|
+
const cleaner_js_1 = require("./cleaner.js");
|
|
15
|
+
const types_js_1 = require("./types.js");
|
|
16
|
+
const WWW_PREFIX_PATTERN = /^www\./i;
|
|
17
|
+
const TRANSCRIPT_LINE_SPLIT_PATTERN = /\r?\n/;
|
|
18
|
+
const WORD_SPLIT_PATTERN = /\s+/g;
|
|
19
|
+
function resolveMediaDurationSecondsFromTranscriptMetadata(metadata) {
|
|
20
|
+
if (!metadata)
|
|
21
|
+
return null;
|
|
22
|
+
const direct = metadata.durationSeconds;
|
|
23
|
+
if (typeof direct === 'number' && Number.isFinite(direct) && direct > 0) {
|
|
24
|
+
return direct;
|
|
25
|
+
}
|
|
26
|
+
const media = metadata.media;
|
|
27
|
+
if (typeof media === 'object' && media !== null) {
|
|
28
|
+
const nested = media.durationSeconds;
|
|
29
|
+
if (typeof nested === 'number' && Number.isFinite(nested) && nested > 0) {
|
|
30
|
+
return nested;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
function resolveTranscriptionProviderFromTranscriptMetadata(metadata) {
|
|
36
|
+
if (!metadata)
|
|
37
|
+
return null;
|
|
38
|
+
const provider = metadata.transcriptionProvider;
|
|
39
|
+
return typeof provider === 'string' && provider.trim().length > 0 ? provider.trim() : null;
|
|
40
|
+
}
|
|
41
|
+
function resolveCacheMode(options) {
|
|
42
|
+
return options?.cacheMode ?? types_js_1.DEFAULT_CACHE_MODE;
|
|
43
|
+
}
|
|
44
|
+
function resolveMaxCharacters(options) {
|
|
45
|
+
const candidate = options?.maxCharacters;
|
|
46
|
+
if (typeof candidate !== 'number' || !Number.isFinite(candidate) || candidate <= 0) {
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
if (candidate <= types_js_1.DEFAULT_MAX_CONTENT_CHARACTERS) {
|
|
50
|
+
return types_js_1.DEFAULT_MAX_CONTENT_CHARACTERS;
|
|
51
|
+
}
|
|
52
|
+
return Math.floor(candidate);
|
|
53
|
+
}
|
|
54
|
+
function resolveTimeoutMs(options) {
|
|
55
|
+
const candidate = options?.timeoutMs;
|
|
56
|
+
if (typeof candidate !== 'number' || !Number.isFinite(candidate) || candidate <= 0) {
|
|
57
|
+
return types_js_1.DEFAULT_TIMEOUT_MS;
|
|
58
|
+
}
|
|
59
|
+
return Math.floor(candidate);
|
|
60
|
+
}
|
|
61
|
+
function resolveFirecrawlMode(options) {
|
|
62
|
+
const candidate = options?.firecrawl;
|
|
63
|
+
if (candidate === 'off' || candidate === 'auto' || candidate === 'always') {
|
|
64
|
+
return candidate;
|
|
65
|
+
}
|
|
66
|
+
return 'auto';
|
|
67
|
+
}
|
|
68
|
+
function appendNote(existing, next) {
|
|
69
|
+
if (!next) {
|
|
70
|
+
return existing ?? '';
|
|
71
|
+
}
|
|
72
|
+
if (!existing || existing.length === 0) {
|
|
73
|
+
return next;
|
|
74
|
+
}
|
|
75
|
+
return `${existing}; ${next}`;
|
|
76
|
+
}
|
|
77
|
+
function safeHostname(rawUrl) {
|
|
78
|
+
try {
|
|
79
|
+
return new URL(rawUrl).hostname.replace(WWW_PREFIX_PATTERN, '');
|
|
80
|
+
}
|
|
81
|
+
catch {
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
function pickFirstText(candidates) {
|
|
86
|
+
for (const candidate of candidates) {
|
|
87
|
+
const normalized = (0, cleaner_js_1.normalizeCandidate)(candidate);
|
|
88
|
+
if (normalized) {
|
|
89
|
+
return normalized;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
function selectBaseContent(sourceContent, transcriptText) {
|
|
95
|
+
if (!transcriptText) {
|
|
96
|
+
return sourceContent;
|
|
97
|
+
}
|
|
98
|
+
const normalizedTranscript = (0, cleaner_js_1.normalizeForPrompt)(transcriptText);
|
|
99
|
+
if (normalizedTranscript.length === 0) {
|
|
100
|
+
return sourceContent;
|
|
101
|
+
}
|
|
102
|
+
return `Transcript:\n${normalizedTranscript}`;
|
|
103
|
+
}
|
|
104
|
+
function summarizeTranscript(transcriptText) {
|
|
105
|
+
if (!transcriptText) {
|
|
106
|
+
return { transcriptCharacters: null, transcriptLines: null, transcriptWordCount: null };
|
|
107
|
+
}
|
|
108
|
+
const transcriptCharacters = transcriptText.length > 0 ? transcriptText.length : null;
|
|
109
|
+
const transcriptLinesRaw = transcriptText
|
|
110
|
+
.split(TRANSCRIPT_LINE_SPLIT_PATTERN)
|
|
111
|
+
.map((line) => line.trim())
|
|
112
|
+
.filter((line) => line.length > 0).length;
|
|
113
|
+
const transcriptLines = transcriptLinesRaw > 0 ? transcriptLinesRaw : null;
|
|
114
|
+
const transcriptWordCountRaw = transcriptText.length > 0
|
|
115
|
+
? transcriptText
|
|
116
|
+
.split(WORD_SPLIT_PATTERN)
|
|
117
|
+
.map((value) => value.trim())
|
|
118
|
+
.filter((value) => value.length > 0).length
|
|
119
|
+
: 0;
|
|
120
|
+
const transcriptWordCount = transcriptWordCountRaw > 0 ? transcriptWordCountRaw : null;
|
|
121
|
+
return { transcriptCharacters, transcriptLines, transcriptWordCount };
|
|
122
|
+
}
|
|
123
|
+
function ensureTranscriptDiagnostics(resolution, cacheMode) {
|
|
124
|
+
if (resolution.diagnostics) {
|
|
125
|
+
return resolution.diagnostics;
|
|
126
|
+
}
|
|
127
|
+
const hasText = typeof resolution.text === 'string' && resolution.text.length > 0;
|
|
128
|
+
const cacheStatus = cacheMode === 'bypass' ? 'bypassed' : hasText ? 'miss' : 'unknown';
|
|
129
|
+
return {
|
|
130
|
+
cacheMode,
|
|
131
|
+
cacheStatus,
|
|
132
|
+
textProvided: hasText,
|
|
133
|
+
provider: resolution.source,
|
|
134
|
+
attemptedProviders: resolution.source ? [resolution.source] : [],
|
|
135
|
+
notes: cacheMode === 'bypass' ? 'Cache bypass requested' : null,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
function finalizeExtractedLinkContent({ url, baseContent, maxCharacters, title, description, siteName, transcriptResolution, video, isVideoOnly, diagnostics, }) {
|
|
139
|
+
const normalized = (0, cleaner_js_1.normalizeForPrompt)(baseContent);
|
|
140
|
+
const { content, truncated, totalCharacters, wordCount } = typeof maxCharacters === 'number'
|
|
141
|
+
? (0, cleaner_js_1.applyContentBudget)(normalized, maxCharacters)
|
|
142
|
+
: {
|
|
143
|
+
content: normalized,
|
|
144
|
+
truncated: false,
|
|
145
|
+
totalCharacters: normalized.length,
|
|
146
|
+
wordCount: normalized.length > 0
|
|
147
|
+
? normalized
|
|
148
|
+
.split(WORD_SPLIT_PATTERN)
|
|
149
|
+
.map((value) => value.trim())
|
|
150
|
+
.filter((value) => value.length > 0).length
|
|
151
|
+
: 0,
|
|
152
|
+
};
|
|
153
|
+
const { transcriptCharacters, transcriptLines, transcriptWordCount } = summarizeTranscript(transcriptResolution.text);
|
|
154
|
+
const transcriptionProvider = resolveTranscriptionProviderFromTranscriptMetadata(transcriptResolution.metadata);
|
|
155
|
+
const mediaDurationSeconds = resolveMediaDurationSecondsFromTranscriptMetadata(transcriptResolution.metadata);
|
|
156
|
+
return {
|
|
157
|
+
url,
|
|
158
|
+
title,
|
|
159
|
+
description,
|
|
160
|
+
siteName,
|
|
161
|
+
content,
|
|
162
|
+
truncated,
|
|
163
|
+
totalCharacters,
|
|
164
|
+
wordCount,
|
|
165
|
+
transcriptCharacters,
|
|
166
|
+
transcriptLines,
|
|
167
|
+
transcriptWordCount,
|
|
168
|
+
transcriptSource: transcriptResolution.source,
|
|
169
|
+
transcriptionProvider,
|
|
170
|
+
transcriptMetadata: transcriptResolution.metadata ?? null,
|
|
171
|
+
mediaDurationSeconds,
|
|
172
|
+
video,
|
|
173
|
+
isVideoOnly,
|
|
174
|
+
diagnostics,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
//# sourceMappingURL=utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/utils.ts"],"names":[],"mappings":";;AA2CA,4CAEC;AAED,oDASC;AAED,4CAMC;AAED,oDAMC;AAED,gCAQC;AAED,oCAMC;AAED,sCAQC;AAED,8CASC;AAED,kDAmBC;AAED,kEAiBC;AAED,oEA0DC;AAlND,6CAAyF;AACzF,yCASmB;AAEnB,MAAM,kBAAkB,GAAG,SAAS,CAAA;AACpC,MAAM,6BAA6B,GAAG,OAAO,CAAA;AAC7C,MAAM,kBAAkB,GAAG,MAAM,CAAA;AAEjC,SAAS,iDAAiD,CACxD,QAAoD;IAEpD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC1B,MAAM,MAAM,GAAI,QAA0C,CAAC,eAAe,CAAA;IAC1E,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QACxE,OAAO,MAAM,CAAA;IACf,CAAC;IACD,MAAM,KAAK,GAAI,QAAgC,CAAC,KAAK,CAAA;IACrD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QAChD,MAAM,MAAM,GAAI,KAAuC,CAAC,eAAe,CAAA;QACvE,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;YACxE,OAAO,MAAM,CAAA;QACf,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,kDAAkD,CACzD,QAAoD;IAEpD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC1B,MAAM,QAAQ,GAAI,QAAgD,CAAC,qBAAqB,CAAA;IACxF,OAAO,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;AAC5F,CAAC;AAED,SAAgB,gBAAgB,CAAC,OAAiC;IAChE,OAAO,OAAO,EAAE,SAAS,IAAI,6BAAkB,CAAA;AACjD,CAAC;AAED,SAAgB,oBAAoB,CAAC,OAAiC;IACpE,MAAM,SAAS,GAAG,OAAO,EAAE,aAAa,CAAA;IACxC,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnF,OAAO,IAAI,CAAA;IACb,CAAC;IACD,IAAI,SAAS,IAAI,yCAA8B,EAAE,CAAC;QAChD,OAAO,yCAA8B,CAAA;IACvC,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;AAC9B,CAAC;AAED,SAAgB,gBAAgB,CAAC,OAAiC;IAChE,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,CAAA;IACpC,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnF,OAAO,6BAAkB,CAAA;IAC3B,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;AAC9B,CAAC;AAED,SAAgB,oBAAoB,CAAC,OAAiC;IACpE,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,CAAA;IACpC,IAAI,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,MAAM,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC1E,OAAO,SAAS,CAAA;IAClB,CAAC;IACD,OAAO,MAAM,CAAA;AACf,CAAC;AAED,SAAgB,UAAU,CAAC,QAAmC,EAAE,IAAY;IAC1E,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,QAAQ,IAAI,EAAE,CAAA;IACvB,CAAC;IACD,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvC,OAAO,IAAI,CAAA;IACb,CAAC;IACD,OAAO,GAAG,QAAQ,KAAK,IAAI,EAAE,CAAA;AAC/B,CAAC;AAED,SAAgB,YAAY,CAAC,MAAc;IACzC,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAA;IACjE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAgB,aAAa,CAAC,UAA4C;IACxE,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,UAAU,GAAG,IAAA,+BAAkB,EAAC,SAAS,CAAC,CAAA;QAChD,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAgB,iBAAiB,CAAC,aAAqB,EAAE,cAA6B;IACpF,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,aAAa,CAAA;IACtB,CAAC;IACD,MAAM,oBAAoB,GAAG,IAAA,+BAAkB,EAAC,cAAc,CAAC,CAAA;IAC/D,IAAI,oBAAoB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,aAAa,CAAA;IACtB,CAAC;IACD,OAAO,gBAAgB,oBAAoB,EAAE,CAAA;AAC/C,CAAC;AAED,SAAgB,mBAAmB,CAAC,cAA6B;IAC/D,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,EAAE,oBAAoB,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,CAAA;IACzF,CAAC;IACD,MAAM,oBAAoB,GAAG,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAA;IACrF,MAAM,kBAAkB,GAAG,cAAc;SACtC,KAAK,CAAC,6BAA6B,CAAC;SACpC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAA;IAC3C,MAAM,eAAe,GAAG,kBAAkB,GAAG,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,IAAI,CAAA;IAC1E,MAAM,sBAAsB,GAC1B,cAAc,CAAC,MAAM,GAAG,CAAC;QACvB,CAAC,CAAC,cAAc;aACX,KAAK,CAAC,kBAAkB,CAAC;aACzB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;aAC5B,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM;QAC/C,CAAC,CAAC,CAAC,CAAA;IACP,MAAM,mBAAmB,GAAG,sBAAsB,GAAG,CAAC,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,IAAI,CAAA;IACtF,OAAO,EAAE,oBAAoB,EAAE,eAAe,EAAE,mBAAmB,EAAE,CAAA;AACvE,CAAC;AAED,SAAgB,2BAA2B,CACzC,UAAgC,EAChC,SAAoB;IAEpB,IAAI,UAAU,CAAC,WAAW,EAAE,CAAC;QAC3B,OAAO,UAAU,CAAC,WAAW,CAAA;IAC/B,CAAC;IACD,MAAM,OAAO,GAAG,OAAO,UAAU,CAAC,IAAI,KAAK,QAAQ,IAAI,UAAU,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAA;IACjF,MAAM,WAAW,GAAG,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAA;IACtF,OAAO;QACL,SAAS;QACT,WAAW;QACX,YAAY,EAAE,OAAO;QACrB,QAAQ,EAAE,UAAU,CAAC,MAAM;QAC3B,kBAAkB,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;QAChE,KAAK,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,IAAI;KAChE,CAAA;AACH,CAAC;AAED,SAAgB,4BAA4B,CAAC,EAC3C,GAAG,EACH,WAAW,EACX,aAAa,EACb,KAAK,EACL,WAAW,EACX,QAAQ,EACR,oBAAoB,EACpB,KAAK,EACL,WAAW,EACX,WAAW,GACW;IACtB,MAAM,UAAU,GAAG,IAAA,+BAAkB,EAAC,WAAW,CAAC,CAAA;IAClD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,SAAS,EAAE,GACtD,OAAO,aAAa,KAAK,QAAQ;QAC/B,CAAC,CAAC,IAAA,+BAAkB,EAAC,UAAU,EAAE,aAAa,CAAC;QAC/C,CAAC,CAAC;YACE,OAAO,EAAE,UAAU;YACnB,SAAS,EAAE,KAAK;YAChB,eAAe,EAAE,UAAU,CAAC,MAAM;YAClC,SAAS,EACP,UAAU,CAAC,MAAM,GAAG,CAAC;gBACnB,CAAC,CAAC,UAAU;qBACP,KAAK,CAAC,kBAAkB,CAAC;qBACzB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;qBAC5B,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM;gBAC/C,CAAC,CAAC,CAAC;SACR,CAAA;IACP,MAAM,EAAE,oBAAoB,EAAE,eAAe,EAAE,mBAAmB,EAAE,GAAG,mBAAmB,CACxF,oBAAoB,CAAC,IAAI,CAC1B,CAAA;IACD,MAAM,qBAAqB,GAAG,kDAAkD,CAC9E,oBAAoB,CAAC,QAAQ,CAC9B,CAAA;IACD,MAAM,oBAAoB,GAAG,iDAAiD,CAC5E,oBAAoB,CAAC,QAAQ,CAC9B,CAAA;IAED,OAAO;QACL,GAAG;QACH,KAAK;QACL,WAAW;QACX,QAAQ;QACR,OAAO;QACP,SAAS;QACT,eAAe;QACf,SAAS;QACT,oBAAoB;QACpB,eAAe;QACf,mBAAmB;QACnB,gBAAgB,EAAE,oBAAoB,CAAC,MAAM;QAC7C,qBAAqB;QACrB,kBAAkB,EAAE,oBAAoB,CAAC,QAAQ,IAAI,IAAI;QACzD,oBAAoB;QACpB,KAAK;QACL,WAAW;QACX,WAAW;KACZ,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.detectPrimaryVideoFromHtml = detectPrimaryVideoFromHtml;
|
|
4
|
+
const cheerio_1 = require("cheerio");
|
|
5
|
+
const VIDEO_EXTENSIONS = new Set(['.mp4', '.webm', '.mov', '.m4v']);
|
|
6
|
+
function resolveAbsoluteUrl(candidate, baseUrl) {
|
|
7
|
+
const trimmed = candidate.trim();
|
|
8
|
+
if (trimmed.length === 0)
|
|
9
|
+
return null;
|
|
10
|
+
try {
|
|
11
|
+
return new URL(trimmed, baseUrl).toString();
|
|
12
|
+
}
|
|
13
|
+
catch {
|
|
14
|
+
return null;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
function isDirectVideoUrl(url) {
|
|
18
|
+
try {
|
|
19
|
+
const parsed = new URL(url);
|
|
20
|
+
const lower = parsed.pathname.toLowerCase();
|
|
21
|
+
for (const ext of VIDEO_EXTENSIONS) {
|
|
22
|
+
if (lower.endsWith(ext))
|
|
23
|
+
return true;
|
|
24
|
+
}
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
return false;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
function extractYouTubeVideoIdFromEmbedUrl(raw) {
|
|
32
|
+
try {
|
|
33
|
+
const u = new URL(raw);
|
|
34
|
+
const host = u.hostname.toLowerCase().replace(/^www\./, '');
|
|
35
|
+
if (host === 'youtube.com' || host.endsWith('.youtube.com')) {
|
|
36
|
+
const m = u.pathname.match(/\/embed\/([a-zA-Z0-9_-]{11})/);
|
|
37
|
+
return m?.[1] ?? null;
|
|
38
|
+
}
|
|
39
|
+
if (host === 'youtu.be') {
|
|
40
|
+
const id = u.pathname.replace(/^\//, '').trim();
|
|
41
|
+
return /^[a-zA-Z0-9_-]{11}$/.test(id) ? id : null;
|
|
42
|
+
}
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
function metaContent($, selectors) {
|
|
50
|
+
for (const sel of selectors) {
|
|
51
|
+
const meta = $(`meta[${sel.attribute}="${sel.value}"]`).first();
|
|
52
|
+
if (meta.length === 0)
|
|
53
|
+
continue;
|
|
54
|
+
const value = (meta.attr('content') ?? meta.attr('value') ?? '').trim();
|
|
55
|
+
if (value)
|
|
56
|
+
return value;
|
|
57
|
+
}
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
function detectPrimaryVideoFromHtml(html, url) {
|
|
61
|
+
const $ = (0, cheerio_1.load)(html);
|
|
62
|
+
// 1) YouTube embeds (preferred, stable)
|
|
63
|
+
const iframeSrc = $('iframe[src*="youtube.com/embed/"], iframe[src*="youtu.be/"]').first().attr('src') ?? null;
|
|
64
|
+
if (iframeSrc) {
|
|
65
|
+
const resolved = resolveAbsoluteUrl(iframeSrc, url);
|
|
66
|
+
const videoId = resolved ? extractYouTubeVideoIdFromEmbedUrl(resolved) : null;
|
|
67
|
+
if (videoId) {
|
|
68
|
+
return { kind: 'youtube', url: `https://www.youtube.com/watch?v=${videoId}` };
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// 2) OpenGraph video
|
|
72
|
+
const ogVideo = metaContent($, [
|
|
73
|
+
{ attribute: 'property', value: 'og:video' },
|
|
74
|
+
{ attribute: 'property', value: 'og:video:url' },
|
|
75
|
+
{ attribute: 'property', value: 'og:video:secure_url' },
|
|
76
|
+
{ attribute: 'name', value: 'og:video' },
|
|
77
|
+
{ attribute: 'name', value: 'og:video:url' },
|
|
78
|
+
{ attribute: 'name', value: 'og:video:secure_url' },
|
|
79
|
+
]);
|
|
80
|
+
if (ogVideo) {
|
|
81
|
+
const resolved = resolveAbsoluteUrl(ogVideo, url);
|
|
82
|
+
if (resolved && isDirectVideoUrl(resolved)) {
|
|
83
|
+
return { kind: 'direct', url: resolved };
|
|
84
|
+
}
|
|
85
|
+
const ytId = resolved ? extractYouTubeVideoIdFromEmbedUrl(resolved) : null;
|
|
86
|
+
if (ytId)
|
|
87
|
+
return { kind: 'youtube', url: `https://www.youtube.com/watch?v=${ytId}` };
|
|
88
|
+
}
|
|
89
|
+
// 3) <video> tags
|
|
90
|
+
const videoSrc = $('video[src]').first().attr('src') ?? $('video source[src]').first().attr('src') ?? null;
|
|
91
|
+
if (videoSrc) {
|
|
92
|
+
const resolved = resolveAbsoluteUrl(videoSrc, url);
|
|
93
|
+
if (resolved && isDirectVideoUrl(resolved)) {
|
|
94
|
+
return { kind: 'direct', url: resolved };
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=video.js.map
|