@steipete/summarize-core 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/content/cache/types.js +2 -0
- package/dist/esm/content/cache/types.js.map +1 -0
- package/dist/esm/content/index.js +1 -0
- package/dist/esm/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/client.js +3 -0
- package/dist/esm/content/link-preview/client.js.map +1 -1
- package/dist/esm/content/link-preview/content/fetcher.js +1 -1
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
- package/dist/esm/content/link-preview/content/html.js +1 -1
- package/dist/esm/content/link-preview/content/html.js.map +1 -1
- package/dist/esm/content/link-preview/content/index.js +22 -3
- package/dist/esm/content/link-preview/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/deps.js.map +1 -1
- package/dist/esm/content/transcript/index.js +1 -0
- package/dist/esm/content/transcript/index.js.map +1 -1
- package/dist/esm/content/transcript/providers/generic.js +84 -4
- package/dist/esm/content/transcript/providers/generic.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast.js +1 -0
- package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/captions.js +35 -14
- package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +84 -12
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube.js +38 -2
- package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
- package/dist/esm/content/transcript/utils.js +25 -69
- package/dist/esm/content/transcript/utils.js.map +1 -1
- package/dist/esm/content/url.js +76 -0
- package/dist/esm/content/url.js.map +1 -0
- package/dist/esm/prompts/cli.js +25 -5
- package/dist/esm/prompts/cli.js.map +1 -1
- package/dist/esm/prompts/file.js +51 -12
- package/dist/esm/prompts/file.js.map +1 -1
- package/dist/esm/prompts/format.js +26 -0
- package/dist/esm/prompts/format.js.map +1 -0
- package/dist/esm/prompts/link-summary.js +51 -22
- package/dist/esm/prompts/link-summary.js.map +1 -1
- package/dist/types/content/cache/types.d.ts +25 -0
- package/dist/types/content/index.d.ts +3 -1
- package/dist/types/content/link-preview/client.d.ts +6 -1
- package/dist/types/content/link-preview/content/types.d.ts +1 -1
- package/dist/types/content/link-preview/deps.d.ts +11 -20
- package/dist/types/content/transcript/cache.d.ts +1 -1
- package/dist/types/content/transcript/providers/generic.d.ts +1 -1
- package/dist/types/content/transcript/providers/youtube/captions.d.ts +3 -1
- package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +3 -1
- package/dist/types/content/transcript/types.d.ts +2 -1
- package/dist/types/content/transcript/utils.d.ts +1 -3
- package/dist/types/content/url.d.ts +8 -0
- package/dist/types/prompts/cli.d.ts +4 -1
- package/dist/types/prompts/file.d.ts +9 -2
- package/dist/types/prompts/format.d.ts +14 -0
- package/dist/types/prompts/link-summary.d.ts +4 -1
- package/package.json +10 -10
- package/dist/cjs/content/index.js +0 -14
- package/dist/cjs/content/index.js.map +0 -1
- package/dist/cjs/content/link-preview/client.js +0 -31
- package/dist/cjs/content/link-preview/client.js.map +0 -1
- package/dist/cjs/content/link-preview/content/article.js +0 -164
- package/dist/cjs/content/link-preview/content/article.js.map +0 -1
- package/dist/cjs/content/link-preview/content/cleaner.js +0 -63
- package/dist/cjs/content/link-preview/content/cleaner.js.map +0 -1
- package/dist/cjs/content/link-preview/content/constants.js +0 -10
- package/dist/cjs/content/link-preview/content/constants.js.map +0 -1
- package/dist/cjs/content/link-preview/content/fetcher.js +0 -128
- package/dist/cjs/content/link-preview/content/fetcher.js.map +0 -1
- package/dist/cjs/content/link-preview/content/firecrawl.js +0 -90
- package/dist/cjs/content/link-preview/content/firecrawl.js.map +0 -1
- package/dist/cjs/content/link-preview/content/html.js +0 -165
- package/dist/cjs/content/link-preview/content/html.js.map +0 -1
- package/dist/cjs/content/link-preview/content/index.js +0 -348
- package/dist/cjs/content/link-preview/content/index.js.map +0 -1
- package/dist/cjs/content/link-preview/content/jsonld.js +0 -80
- package/dist/cjs/content/link-preview/content/jsonld.js.map +0 -1
- package/dist/cjs/content/link-preview/content/parsers.js +0 -81
- package/dist/cjs/content/link-preview/content/parsers.js.map +0 -1
- package/dist/cjs/content/link-preview/content/podcast-utils.js +0 -85
- package/dist/cjs/content/link-preview/content/podcast-utils.js.map +0 -1
- package/dist/cjs/content/link-preview/content/readability.js +0 -90
- package/dist/cjs/content/link-preview/content/readability.js.map +0 -1
- package/dist/cjs/content/link-preview/content/twitter-utils.js +0 -74
- package/dist/cjs/content/link-preview/content/twitter-utils.js.map +0 -1
- package/dist/cjs/content/link-preview/content/types.js +0 -7
- package/dist/cjs/content/link-preview/content/types.js.map +0 -1
- package/dist/cjs/content/link-preview/content/utils.js +0 -177
- package/dist/cjs/content/link-preview/content/utils.js.map +0 -1
- package/dist/cjs/content/link-preview/content/video.js +0 -99
- package/dist/cjs/content/link-preview/content/video.js.map +0 -1
- package/dist/cjs/content/link-preview/content/youtube.js +0 -85
- package/dist/cjs/content/link-preview/content/youtube.js.map +0 -1
- package/dist/cjs/content/link-preview/deps.js +0 -23
- package/dist/cjs/content/link-preview/deps.js.map +0 -1
- package/dist/cjs/content/link-preview/fetch-with-timeout.js +0 -38
- package/dist/cjs/content/link-preview/fetch-with-timeout.js.map +0 -1
- package/dist/cjs/content/link-preview/types.js +0 -5
- package/dist/cjs/content/link-preview/types.js.map +0 -1
- package/dist/cjs/content/transcript/cache.js +0 -85
- package/dist/cjs/content/transcript/cache.js.map +0 -1
- package/dist/cjs/content/transcript/index.js +0 -134
- package/dist/cjs/content/transcript/index.js.map +0 -1
- package/dist/cjs/content/transcript/normalize.js +0 -49
- package/dist/cjs/content/transcript/normalize.js.map +0 -1
- package/dist/cjs/content/transcript/providers/generic.js +0 -16
- package/dist/cjs/content/transcript/providers/generic.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast/apple-flow.js +0 -226
- package/dist/cjs/content/transcript/providers/podcast/apple-flow.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast/apple.js +0 -43
- package/dist/cjs/content/transcript/providers/podcast/apple.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast/constants.js +0 -11
- package/dist/cjs/content/transcript/providers/podcast/constants.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast/flow-context.js +0 -3
- package/dist/cjs/content/transcript/providers/podcast/flow-context.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast/itunes.js +0 -139
- package/dist/cjs/content/transcript/providers/podcast/itunes.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast/json.js +0 -43
- package/dist/cjs/content/transcript/providers/podcast/json.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast/media.js +0 -355
- package/dist/cjs/content/transcript/providers/podcast/media.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast/results.js +0 -32
- package/dist/cjs/content/transcript/providers/podcast/results.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast/rss.js +0 -262
- package/dist/cjs/content/transcript/providers/podcast/rss.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast/spotify-flow.js +0 -221
- package/dist/cjs/content/transcript/providers/podcast/spotify-flow.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast/spotify.js +0 -119
- package/dist/cjs/content/transcript/providers/podcast/spotify.js.map +0 -1
- package/dist/cjs/content/transcript/providers/podcast.js +0 -260
- package/dist/cjs/content/transcript/providers/podcast.js.map +0 -1
- package/dist/cjs/content/transcript/providers/youtube/api.js +0 -264
- package/dist/cjs/content/transcript/providers/youtube/api.js.map +0 -1
- package/dist/cjs/content/transcript/providers/youtube/apify.js +0 -59
- package/dist/cjs/content/transcript/providers/youtube/apify.js.map +0 -1
- package/dist/cjs/content/transcript/providers/youtube/captions.js +0 -413
- package/dist/cjs/content/transcript/providers/youtube/captions.js.map +0 -1
- package/dist/cjs/content/transcript/providers/youtube/yt-dlp.js +0 -170
- package/dist/cjs/content/transcript/providers/youtube/yt-dlp.js.map +0 -1
- package/dist/cjs/content/transcript/providers/youtube.js +0 -178
- package/dist/cjs/content/transcript/providers/youtube.js.map +0 -1
- package/dist/cjs/content/transcript/types.js +0 -3
- package/dist/cjs/content/transcript/types.js.map +0 -1
- package/dist/cjs/content/transcript/utils.js +0 -303
- package/dist/cjs/content/transcript/utils.js.map +0 -1
- package/dist/cjs/index.js +0 -22
- package/dist/cjs/index.js.map +0 -1
- package/dist/cjs/language.js +0 -132
- package/dist/cjs/language.js.map +0 -1
- package/dist/cjs/package.json +0 -3
- package/dist/cjs/prompts/cli.js +0 -23
- package/dist/cjs/prompts/cli.js.map +0 -1
- package/dist/cjs/prompts/file.js +0 -52
- package/dist/cjs/prompts/file.js.map +0 -1
- package/dist/cjs/prompts/index.js +0 -14
- package/dist/cjs/prompts/index.js.map +0 -1
- package/dist/cjs/prompts/link-summary.js +0 -122
- package/dist/cjs/prompts/link-summary.js.map +0 -1
- package/dist/cjs/shared/contracts.js +0 -5
- package/dist/cjs/shared/contracts.js.map +0 -1
- package/dist/cjs/transcription/whisper/constants.js +0 -11
- package/dist/cjs/transcription/whisper/constants.js.map +0 -1
- package/dist/cjs/transcription/whisper/core.js +0 -307
- package/dist/cjs/transcription/whisper/core.js.map +0 -1
- package/dist/cjs/transcription/whisper/fal.js +0 -44
- package/dist/cjs/transcription/whisper/fal.js.map +0 -1
- package/dist/cjs/transcription/whisper/ffmpeg.js +0 -187
- package/dist/cjs/transcription/whisper/ffmpeg.js.map +0 -1
- package/dist/cjs/transcription/whisper/openai.js +0 -51
- package/dist/cjs/transcription/whisper/openai.js.map +0 -1
- package/dist/cjs/transcription/whisper/types.js +0 -3
- package/dist/cjs/transcription/whisper/types.js.map +0 -1
- package/dist/cjs/transcription/whisper/utils.js +0 -70
- package/dist/cjs/transcription/whisper/utils.js.map +0 -1
- package/dist/cjs/transcription/whisper/whisper-cpp.js +0 -232
- package/dist/cjs/transcription/whisper/whisper-cpp.js.map +0 -1
- package/dist/cjs/transcription/whisper.js +0 -15
- package/dist/cjs/transcription/whisper.js.map +0 -1
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.extractMetadataFromHtml = extractMetadataFromHtml;
|
|
4
|
-
exports.extractMetadataFromFirecrawl = extractMetadataFromFirecrawl;
|
|
5
|
-
const cheerio_1 = require("cheerio");
|
|
6
|
-
const cleaner_js_1 = require("./cleaner.js");
|
|
7
|
-
const utils_js_1 = require("./utils.js");
|
|
8
|
-
const ALLOWED_TEXT_TAGS = new Set(['title']);
|
|
9
|
-
function extractMetadataFromHtml(html, url) {
|
|
10
|
-
const $ = (0, cheerio_1.load)(html);
|
|
11
|
-
const title = (0, utils_js_1.pickFirstText)([
|
|
12
|
-
pickMetaContent($, [
|
|
13
|
-
{ attribute: 'property', value: 'og:title' },
|
|
14
|
-
{ attribute: 'name', value: 'og:title' },
|
|
15
|
-
{ attribute: 'name', value: 'twitter:title' },
|
|
16
|
-
]),
|
|
17
|
-
extractTagText($, 'title'),
|
|
18
|
-
]);
|
|
19
|
-
const description = (0, utils_js_1.pickFirstText)([
|
|
20
|
-
pickMetaContent($, [
|
|
21
|
-
{ attribute: 'property', value: 'og:description' },
|
|
22
|
-
{ attribute: 'name', value: 'description' },
|
|
23
|
-
{ attribute: 'name', value: 'twitter:description' },
|
|
24
|
-
]),
|
|
25
|
-
]);
|
|
26
|
-
const siteName = (0, utils_js_1.pickFirstText)([
|
|
27
|
-
pickMetaContent($, [
|
|
28
|
-
{ attribute: 'property', value: 'og:site_name' },
|
|
29
|
-
{ attribute: 'name', value: 'application-name' },
|
|
30
|
-
]),
|
|
31
|
-
(0, utils_js_1.safeHostname)(url),
|
|
32
|
-
]);
|
|
33
|
-
return { title, description, siteName };
|
|
34
|
-
}
|
|
35
|
-
function extractMetadataFromFirecrawl(metadata) {
|
|
36
|
-
return {
|
|
37
|
-
title: (0, utils_js_1.pickFirstText)([metadataString(metadata, 'title'), metadataString(metadata, 'ogTitle')]),
|
|
38
|
-
description: (0, utils_js_1.pickFirstText)([
|
|
39
|
-
metadataString(metadata, 'description'),
|
|
40
|
-
metadataString(metadata, 'ogDescription'),
|
|
41
|
-
]),
|
|
42
|
-
siteName: (0, utils_js_1.pickFirstText)([
|
|
43
|
-
metadataString(metadata, 'siteName'),
|
|
44
|
-
metadataString(metadata, 'ogSiteName'),
|
|
45
|
-
]),
|
|
46
|
-
};
|
|
47
|
-
}
|
|
48
|
-
function pickMetaContent($, selectors) {
|
|
49
|
-
for (const selector of selectors) {
|
|
50
|
-
const meta = $(`meta[${selector.attribute}="${selector.value}"]`).first();
|
|
51
|
-
if (meta.length === 0) {
|
|
52
|
-
continue;
|
|
53
|
-
}
|
|
54
|
-
const value = meta.attr('content') ?? meta.attr('value') ?? '';
|
|
55
|
-
const normalized = (0, cleaner_js_1.normalizeCandidate)((0, cleaner_js_1.decodeHtmlEntities)(value));
|
|
56
|
-
if (normalized) {
|
|
57
|
-
return normalized;
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
return null;
|
|
61
|
-
}
|
|
62
|
-
function extractTagText($, tagName) {
|
|
63
|
-
const normalizedTag = tagName.trim().toLowerCase();
|
|
64
|
-
if (!ALLOWED_TEXT_TAGS.has(normalizedTag)) {
|
|
65
|
-
return null;
|
|
66
|
-
}
|
|
67
|
-
const element = $(normalizedTag).first();
|
|
68
|
-
if (element.length === 0) {
|
|
69
|
-
return null;
|
|
70
|
-
}
|
|
71
|
-
const text = (0, cleaner_js_1.decodeHtmlEntities)(element.text());
|
|
72
|
-
return (0, cleaner_js_1.normalizeCandidate)(text);
|
|
73
|
-
}
|
|
74
|
-
function metadataString(metadata, key) {
|
|
75
|
-
if (!metadata) {
|
|
76
|
-
return null;
|
|
77
|
-
}
|
|
78
|
-
const value = metadata[key];
|
|
79
|
-
return typeof value === 'string' ? (0, cleaner_js_1.normalizeCandidate)(value) : null;
|
|
80
|
-
}
|
|
81
|
-
//# sourceMappingURL=parsers.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"parsers.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/parsers.ts"],"names":[],"mappings":";;AAkBA,0DA6BC;AAED,oEAcC;AA/DD,qCAA+C;AAE/C,6CAAqE;AACrE,yCAAwD;AAExD,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAA;AAa5C,SAAgB,uBAAuB,CAAC,IAAY,EAAE,GAAW;IAC/D,MAAM,CAAC,GAAG,IAAA,cAAI,EAAC,IAAI,CAAC,CAAA;IAEpB,MAAM,KAAK,GAAG,IAAA,wBAAa,EAAC;QAC1B,eAAe,CAAC,CAAC,EAAE;YACjB,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,UAAU,EAAE;YAC5C,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE;YACxC,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,eAAe,EAAE;SAC9C,CAAC;QACF,cAAc,CAAC,CAAC,EAAE,OAAO,CAAC;KAC3B,CAAC,CAAA;IAEF,MAAM,WAAW,GAAG,IAAA,wBAAa,EAAC;QAChC,eAAe,CAAC,CAAC,EAAE;YACjB,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,gBAAgB,EAAE;YAClD,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE;YAC3C,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,qBAAqB,EAAE;SACpD,CAAC;KACH,CAAC,CAAA;IAEF,MAAM,QAAQ,GAAG,IAAA,wBAAa,EAAC;QAC7B,eAAe,CAAC,CAAC,EAAE;YACjB,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,cAAc,EAAE;YAChD,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,kBAAkB,EAAE;SACjD,CAAC;QACF,IAAA,uBAAY,EAAC,GAAG,CAAC;KAClB,CAAC,CAAA;IAEF,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAA;AACzC,CAAC;AAED,SAAgB,4BAA4B,CAC1C,QAAoD;IAEpD,OAAO;QACL,KAAK,EAAE,IAAA,wBAAa,EAAC,CAAC,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC;QAC9F,WAAW,EAAE,IAAA,wBAAa,EAAC;YACzB,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC;YACvC,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC;SAC1C,CAAC;QACF,QAAQ,EAAE,IAAA,wBAAa,EAAC;YACtB,cAAc,CAAC,QAAQ,EAAE,UAAU,CAAC;YACpC,cAAc,CAAC,QAAQ,EAAE,YAAY,CAAC;SACvC,CAAC;KACH,CAAA;AACH,CAAC;AAED,SAAS,eAAe,CAAC,CAAa,EAAE,SAAyB;IAC/D,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,QAAQ,CAAC,SAAS,KAAK,QAAQ,CAAC,KAAK,IAAI,CAAC,CAAC,KAAK,EAAE,CAAA;QACzE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,SAAQ;QACV,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;QAC9D,MAAM,UAAU,GAAG,IAAA,+BAAkB,EAAC,IAAA,+BAAkB,EAAC,KAAK,CAAC,CAAC,CAAA;QAChE,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,cAAc,CAAC,CAAa,EAAE,OAAe;IACpD,MAAM,aAAa,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAClD,IAAI,CAAC,iBAAiB,CAAC,GAAG,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1C,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,OAAO,GAAG,CAAC,CAAC,aAAa,CAAC,CAAC,KAAK,EAAE,CAAA;IACxC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,IAAI,GAAG,IAAA,+BAAkB,EAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAA;IAC/C,OAAO,IAAA,+BAAkB,EAAC,IAAI,CAAC,CAAA;AACjC,CAAC;AAED,SAAS,cAAc,CACrB,QAAoD,EACpD,GAAW;IAEX,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;IAC3B,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAA,+BAAkB,EAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;AACrE,CAAC"}
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.extractSpotifyEpisodeId = extractSpotifyEpisodeId;
|
|
4
|
-
exports.extractApplePodcastIds = extractApplePodcastIds;
|
|
5
|
-
exports.isPodcastLikeJsonLdType = isPodcastLikeJsonLdType;
|
|
6
|
-
exports.isPodcastHost = isPodcastHost;
|
|
7
|
-
const PODCAST_HOST_SUFFIXES = [
|
|
8
|
-
'spotify.com',
|
|
9
|
-
'podcasts.apple.com',
|
|
10
|
-
'podchaser.com',
|
|
11
|
-
'podbean.com',
|
|
12
|
-
'buzzsprout.com',
|
|
13
|
-
'spreaker.com',
|
|
14
|
-
'simplecast.com',
|
|
15
|
-
'rss.com',
|
|
16
|
-
'libsyn.com',
|
|
17
|
-
'omny.fm',
|
|
18
|
-
'acast.com',
|
|
19
|
-
'transistor.fm',
|
|
20
|
-
'captivate.fm',
|
|
21
|
-
'soundcloud.com',
|
|
22
|
-
'ivoox.com',
|
|
23
|
-
'iheart.com',
|
|
24
|
-
'megaphone.fm',
|
|
25
|
-
'pca.st',
|
|
26
|
-
'player.fm',
|
|
27
|
-
'castbox.fm',
|
|
28
|
-
];
|
|
29
|
-
function extractSpotifyEpisodeId(url) {
|
|
30
|
-
try {
|
|
31
|
-
const parsed = new URL(url);
|
|
32
|
-
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
33
|
-
if (!host.endsWith('spotify.com'))
|
|
34
|
-
return null;
|
|
35
|
-
const parts = parsed.pathname.split('/').filter(Boolean);
|
|
36
|
-
const idx = parts.indexOf('episode');
|
|
37
|
-
const id = idx >= 0 ? parts[idx + 1] : null;
|
|
38
|
-
return id && /^[A-Za-z0-9]+$/.test(id) ? id : null;
|
|
39
|
-
}
|
|
40
|
-
catch {
|
|
41
|
-
return null;
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
function extractApplePodcastIds(url) {
|
|
45
|
-
try {
|
|
46
|
-
const parsed = new URL(url);
|
|
47
|
-
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
48
|
-
if (host !== 'podcasts.apple.com')
|
|
49
|
-
return null;
|
|
50
|
-
const showId = parsed.pathname.match(/\/id(\d+)(?:\/|$)/)?.[1] ?? null;
|
|
51
|
-
if (!showId)
|
|
52
|
-
return null;
|
|
53
|
-
const episodeIdRaw = parsed.searchParams.get('i');
|
|
54
|
-
const episodeId = episodeIdRaw && /^\d+$/.test(episodeIdRaw) ? episodeIdRaw : null;
|
|
55
|
-
return { showId, episodeId };
|
|
56
|
-
}
|
|
57
|
-
catch {
|
|
58
|
-
return null;
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
function isPodcastLikeJsonLdType(type) {
|
|
62
|
-
if (!type)
|
|
63
|
-
return false;
|
|
64
|
-
const normalized = type.toLowerCase();
|
|
65
|
-
if (normalized.includes('podcast'))
|
|
66
|
-
return true;
|
|
67
|
-
return (normalized === 'audioobject' ||
|
|
68
|
-
normalized === 'episode' ||
|
|
69
|
-
normalized === 'radioepisode' ||
|
|
70
|
-
normalized === 'musicrecording');
|
|
71
|
-
}
|
|
72
|
-
function isPodcastHost(url) {
|
|
73
|
-
try {
|
|
74
|
-
const parsed = new URL(url);
|
|
75
|
-
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
76
|
-
if (host.startsWith('music.amazon.') && parsed.pathname.includes('/podcasts/')) {
|
|
77
|
-
return true;
|
|
78
|
-
}
|
|
79
|
-
return PODCAST_HOST_SUFFIXES.some((suffix) => host === suffix || host.endsWith(`.${suffix}`));
|
|
80
|
-
}
|
|
81
|
-
catch {
|
|
82
|
-
return false;
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
//# sourceMappingURL=podcast-utils.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"podcast-utils.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/podcast-utils.ts"],"names":[],"mappings":";;AAuBA,0DAYC;AAED,wDAeC;AAED,0DAUC;AAED,sCAWC;AA7ED,MAAM,qBAAqB,GAAG;IAC5B,aAAa;IACb,oBAAoB;IACpB,eAAe;IACf,aAAa;IACb,gBAAgB;IAChB,cAAc;IACd,gBAAgB;IAChB,SAAS;IACT,YAAY;IACZ,SAAS;IACT,WAAW;IACX,eAAe;IACf,cAAc;IACd,gBAAgB;IAChB,WAAW;IACX,YAAY;IACZ,cAAc;IACd,QAAQ;IACR,WAAW;IACX,YAAY;CACb,CAAA;AAED,SAAgB,uBAAuB,CAAC,GAAW;IACjD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC;YAAE,OAAO,IAAI,CAAA;QAC9C,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACxD,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;QACpC,MAAM,EAAE,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;QAC3C,OAAO,EAAE,IAAI,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;IACpD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAgB,sBAAsB,CACpC,GAAW;IAEX,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,IAAI,KAAK,oBAAoB;YAAE,OAAO,IAAI,CAAA;QAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAA;QACtE,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAA;QACxB,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;QACjD,MAAM,SAAS,GAAG,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAA;QAClF,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,CAAA;IAC9B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAgB,uBAAuB,CAAC,IAA+B;IACrE,IAAI,CAAC,IAAI;QAAE,OAAO,KAAK,CAAA;IACvB,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAA;IACrC,IAAI,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC;QAAE,OAAO,IAAI,CAAA;IAC/C,OAAO,CACL,UAAU,KAAK,aAAa;QAC5B,UAAU,KAAK,SAAS;QACxB,UAAU,KAAK,cAAc;QAC7B,UAAU,KAAK,gBAAgB,CAChC,CAAA;AACH,CAAC;AAED,SAAgB,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,IAAI,CAAC,UAAU,CAAC,eAAe,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAA;QACb,CAAC;QACD,OAAO,qBAAqB,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,KAAK,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,MAAM,EAAE,CAAC,CAAC,CAAA;IAC/F,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAA;IACd,CAAC;AACH,CAAC"}
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.extractReadabilityFromHtml = extractReadabilityFromHtml;
|
|
37
|
-
exports.toReadabilityHtml = toReadabilityHtml;
|
|
38
|
-
async function extractReadabilityFromHtml(html, url) {
|
|
39
|
-
try {
|
|
40
|
-
const cleanedHtml = stripCssFromHtml(html);
|
|
41
|
-
const { Readability } = await Promise.resolve().then(() => __importStar(require('@mozilla/readability')));
|
|
42
|
-
const { JSDOM, VirtualConsole } = await Promise.resolve().then(() => __importStar(require('jsdom')));
|
|
43
|
-
const virtualConsole = new VirtualConsole();
|
|
44
|
-
virtualConsole.on('jsdomError', (err) => {
|
|
45
|
-
const message = err && typeof err === 'object' && 'message' in err
|
|
46
|
-
? String(err.message ?? '')
|
|
47
|
-
: '';
|
|
48
|
-
if (message.includes('Could not parse CSS stylesheet'))
|
|
49
|
-
return;
|
|
50
|
-
console.error(err);
|
|
51
|
-
});
|
|
52
|
-
const dom = new JSDOM(cleanedHtml, { ...(url ? { url } : undefined), virtualConsole });
|
|
53
|
-
const reader = new Readability(dom.window.document);
|
|
54
|
-
const article = reader.parse();
|
|
55
|
-
if (!article)
|
|
56
|
-
return null;
|
|
57
|
-
const text = (article.textContent ?? '').replace(/\s+/g, ' ').trim();
|
|
58
|
-
return {
|
|
59
|
-
text,
|
|
60
|
-
html: article.content ?? null,
|
|
61
|
-
title: article.title ?? null,
|
|
62
|
-
excerpt: article.excerpt ?? null,
|
|
63
|
-
};
|
|
64
|
-
}
|
|
65
|
-
catch {
|
|
66
|
-
return null;
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
function toReadabilityHtml(result) {
|
|
70
|
-
if (!result)
|
|
71
|
-
return null;
|
|
72
|
-
if (result.html)
|
|
73
|
-
return result.html;
|
|
74
|
-
if (!result.text)
|
|
75
|
-
return null;
|
|
76
|
-
return `<article><p>${escapeHtml(result.text)}</p></article>`;
|
|
77
|
-
}
|
|
78
|
-
function escapeHtml(input) {
|
|
79
|
-
return input
|
|
80
|
-
.replaceAll('&', '&')
|
|
81
|
-
.replaceAll('<', '<')
|
|
82
|
-
.replaceAll('>', '>')
|
|
83
|
-
.replaceAll('"', '"')
|
|
84
|
-
.replaceAll("'", ''');
|
|
85
|
-
}
|
|
86
|
-
function stripCssFromHtml(html) {
|
|
87
|
-
// Readability doesn't need CSS; jsdom's CSS parsing can be extremely slow on some pages.
|
|
88
|
-
return html.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, '');
|
|
89
|
-
}
|
|
90
|
-
//# sourceMappingURL=readability.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"readability.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/readability.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAOA,gEAiCC;AAED,8CAKC;AAxCM,KAAK,UAAU,0BAA0B,CAC9C,IAAY,EACZ,GAAY;IAEZ,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAA;QAC1C,MAAM,EAAE,WAAW,EAAE,GAAG,wDAAa,sBAAsB,GAAC,CAAA;QAC5D,MAAM,EAAE,KAAK,EAAE,cAAc,EAAE,GAAG,wDAAa,OAAO,GAAC,CAAA;QACvD,MAAM,cAAc,GAAG,IAAI,cAAc,EAAE,CAAA;QAC3C,cAAc,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,GAAG,EAAE,EAAE;YACtC,MAAM,OAAO,GACX,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,SAAS,IAAI,GAAG;gBAChD,CAAC,CAAC,MAAM,CAAE,GAA6B,CAAC,OAAO,IAAI,EAAE,CAAC;gBACtD,CAAC,CAAC,EAAE,CAAA;YACR,IAAI,OAAO,CAAC,QAAQ,CAAC,gCAAgC,CAAC;gBAAE,OAAM;YAC9D,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QACpB,CAAC,CAAC,CAAA;QAEF,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,EAAE,cAAc,EAAE,CAAC,CAAA;QACtF,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;QACnD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAA;QAC9B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAA;QAEzB,MAAM,IAAI,GAAG,CAAC,OAAO,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;QACpE,OAAO;YACL,IAAI;YACJ,IAAI,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI;YAC7B,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,IAAI;YAC5B,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI;SACjC,CAAA;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAgB,iBAAiB,CAAC,MAAgC;IAChE,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,IAAI,MAAM,CAAC,IAAI;QAAE,OAAO,MAAM,CAAC,IAAI,CAAA;IACnC,IAAI,CAAC,MAAM,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IAC7B,OAAO,eAAe,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAA;AAC/D,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,OAAO,KAAK;SACT,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC;SACxB,UAAU,CAAC,GAAG,EAAE,MAAM,CAAC;SACvB,UAAU,CAAC,GAAG,EAAE,MAAM,CAAC;SACvB,UAAU,CAAC,GAAG,EAAE,QAAQ,CAAC;SACzB,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAA;AAC7B,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,yFAAyF;IACzF,OAAO,IAAI,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAA;AAC9D,CAAC"}
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.isTwitterStatusUrl = isTwitterStatusUrl;
|
|
4
|
-
exports.toNitterUrls = toNitterUrls;
|
|
5
|
-
exports.isBlockedTwitterContent = isBlockedTwitterContent;
|
|
6
|
-
exports.isAnubisHtml = isAnubisHtml;
|
|
7
|
-
const TWITTER_HOSTS = new Set(['x.com', 'twitter.com', 'mobile.twitter.com']);
|
|
8
|
-
const NITTER_HOSTS = [
|
|
9
|
-
'nitter.net',
|
|
10
|
-
'nitter.poast.org',
|
|
11
|
-
'nitter.catsarch.com',
|
|
12
|
-
'nitter.privacydev.net',
|
|
13
|
-
'nitter.1d4.us',
|
|
14
|
-
];
|
|
15
|
-
const TWITTER_BLOCKED_TEXT_PATTERN = /something went wrong|try again|privacy related extensions|please disable them and try again/i;
|
|
16
|
-
const ANUBIS_TOKENS = ['anubis', 'proof-of-work', 'proof of work', 'hashcash', 'jshelter'];
|
|
17
|
-
function isTwitterStatusUrl(url) {
|
|
18
|
-
try {
|
|
19
|
-
const parsed = new URL(url);
|
|
20
|
-
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
21
|
-
if (!TWITTER_HOSTS.has(host))
|
|
22
|
-
return false;
|
|
23
|
-
return /\/status\/\d+/.test(parsed.pathname);
|
|
24
|
-
}
|
|
25
|
-
catch {
|
|
26
|
-
return false;
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
function rotateHosts(values, seed) {
|
|
30
|
-
if (values.length <= 1)
|
|
31
|
-
return values.slice();
|
|
32
|
-
const offset = Math.abs(seed) % values.length;
|
|
33
|
-
return values.slice(offset).concat(values.slice(0, offset));
|
|
34
|
-
}
|
|
35
|
-
function hashSeed(input) {
|
|
36
|
-
let hash = 0;
|
|
37
|
-
for (let i = 0; i < input.length; i += 1) {
|
|
38
|
-
hash = (hash * 31 + input.charCodeAt(i)) | 0;
|
|
39
|
-
}
|
|
40
|
-
return hash;
|
|
41
|
-
}
|
|
42
|
-
function toNitterUrls(url) {
|
|
43
|
-
try {
|
|
44
|
-
const parsed = new URL(url);
|
|
45
|
-
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
46
|
-
if (!TWITTER_HOSTS.has(host))
|
|
47
|
-
return [];
|
|
48
|
-
const seed = hashSeed(`${parsed.pathname}${parsed.search}`);
|
|
49
|
-
const rotated = rotateHosts(NITTER_HOSTS, seed);
|
|
50
|
-
return rotated.map((nitterHost) => {
|
|
51
|
-
const copy = new URL(parsed.toString());
|
|
52
|
-
copy.hostname = nitterHost;
|
|
53
|
-
copy.protocol = 'https:';
|
|
54
|
-
return copy.toString();
|
|
55
|
-
});
|
|
56
|
-
}
|
|
57
|
-
catch {
|
|
58
|
-
return [];
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
function isBlockedTwitterContent(content) {
|
|
62
|
-
if (!content)
|
|
63
|
-
return false;
|
|
64
|
-
return TWITTER_BLOCKED_TEXT_PATTERN.test(content);
|
|
65
|
-
}
|
|
66
|
-
function isAnubisHtml(html) {
|
|
67
|
-
if (!html)
|
|
68
|
-
return false;
|
|
69
|
-
const lower = html.toLowerCase();
|
|
70
|
-
if (!lower.includes('anubis'))
|
|
71
|
-
return false;
|
|
72
|
-
return ANUBIS_TOKENS.some((token) => lower.includes(token));
|
|
73
|
-
}
|
|
74
|
-
//# sourceMappingURL=twitter-utils.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"twitter-utils.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/twitter-utils.ts"],"names":[],"mappings":";;AAYA,gDASC;AAgBD,oCAgBC;AAED,0DAGC;AAED,oCAKC;AAjED,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,aAAa,EAAE,oBAAoB,CAAC,CAAC,CAAA;AAC7E,MAAM,YAAY,GAAG;IACnB,YAAY;IACZ,kBAAkB;IAClB,qBAAqB;IACrB,uBAAuB;IACvB,eAAe;CAChB,CAAA;AACD,MAAM,4BAA4B,GAChC,8FAA8F,CAAA;AAChG,MAAM,aAAa,GAAG,CAAC,QAAQ,EAAE,eAAe,EAAE,eAAe,EAAE,UAAU,EAAE,UAAU,CAAC,CAAA;AAE1F,SAAgB,kBAAkB,CAAC,GAAW;IAC5C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAA;QAC1C,OAAO,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAA;IACd,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAI,MAAW,EAAE,IAAY;IAC/C,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC,KAAK,EAAE,CAAA;IAC7C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IAC7C,OAAO,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAA;AAC7D,CAAC;AAED,SAAS,QAAQ,CAAC,KAAa;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,IAAI,GAAG,CAAC,IAAI,GAAG,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IAC9C,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAgB,YAAY,CAAC,GAAW;IACtC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,OAAO,EAAE,CAAA;QACvC,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAA;QAC3D,MAAM,OAAO,GAAG,WAAW,CAAC,YAAY,EAAE,IAAI,CAAC,CAAA;QAC/C,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,UAAU,EAAE,EAAE;YAChC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAA;YACvC,IAAI,CAAC,QAAQ,GAAG,UAAU,CAAA;YAC1B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAA;YACxB,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAA;QACxB,CAAC,CAAC,CAAA;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC;AAED,SAAgB,uBAAuB,CAAC,OAAe;IACrD,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAA;IAC1B,OAAO,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;AACnD,CAAC;AAED,SAAgB,YAAY,CAAC,IAAY;IACvC,IAAI,CAAC,IAAI;QAAE,OAAO,KAAK,CAAA;IACvB,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAA;IAChC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAAE,OAAO,KAAK,CAAA;IAC3C,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAA;AAC7D,CAAC"}
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.DEFAULT_CACHE_MODE = exports.DEFAULT_MAX_CONTENT_CHARACTERS = exports.DEFAULT_TIMEOUT_MS = void 0;
|
|
4
|
-
exports.DEFAULT_TIMEOUT_MS = 120_000;
|
|
5
|
-
exports.DEFAULT_MAX_CONTENT_CHARACTERS = 8000;
|
|
6
|
-
exports.DEFAULT_CACHE_MODE = 'default';
|
|
7
|
-
//# sourceMappingURL=types.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/types.ts"],"names":[],"mappings":";;;AAOa,QAAA,kBAAkB,GAAG,OAAO,CAAA;AAC5B,QAAA,8BAA8B,GAAG,IAAI,CAAA;AACrC,QAAA,kBAAkB,GAAc,SAAS,CAAA"}
|
|
@@ -1,177 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.resolveCacheMode = resolveCacheMode;
|
|
4
|
-
exports.resolveMaxCharacters = resolveMaxCharacters;
|
|
5
|
-
exports.resolveTimeoutMs = resolveTimeoutMs;
|
|
6
|
-
exports.resolveFirecrawlMode = resolveFirecrawlMode;
|
|
7
|
-
exports.appendNote = appendNote;
|
|
8
|
-
exports.safeHostname = safeHostname;
|
|
9
|
-
exports.pickFirstText = pickFirstText;
|
|
10
|
-
exports.selectBaseContent = selectBaseContent;
|
|
11
|
-
exports.summarizeTranscript = summarizeTranscript;
|
|
12
|
-
exports.ensureTranscriptDiagnostics = ensureTranscriptDiagnostics;
|
|
13
|
-
exports.finalizeExtractedLinkContent = finalizeExtractedLinkContent;
|
|
14
|
-
const cleaner_js_1 = require("./cleaner.js");
|
|
15
|
-
const types_js_1 = require("./types.js");
|
|
16
|
-
const WWW_PREFIX_PATTERN = /^www\./i;
|
|
17
|
-
const TRANSCRIPT_LINE_SPLIT_PATTERN = /\r?\n/;
|
|
18
|
-
const WORD_SPLIT_PATTERN = /\s+/g;
|
|
19
|
-
function resolveMediaDurationSecondsFromTranscriptMetadata(metadata) {
|
|
20
|
-
if (!metadata)
|
|
21
|
-
return null;
|
|
22
|
-
const direct = metadata.durationSeconds;
|
|
23
|
-
if (typeof direct === 'number' && Number.isFinite(direct) && direct > 0) {
|
|
24
|
-
return direct;
|
|
25
|
-
}
|
|
26
|
-
const media = metadata.media;
|
|
27
|
-
if (typeof media === 'object' && media !== null) {
|
|
28
|
-
const nested = media.durationSeconds;
|
|
29
|
-
if (typeof nested === 'number' && Number.isFinite(nested) && nested > 0) {
|
|
30
|
-
return nested;
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
return null;
|
|
34
|
-
}
|
|
35
|
-
function resolveTranscriptionProviderFromTranscriptMetadata(metadata) {
|
|
36
|
-
if (!metadata)
|
|
37
|
-
return null;
|
|
38
|
-
const provider = metadata.transcriptionProvider;
|
|
39
|
-
return typeof provider === 'string' && provider.trim().length > 0 ? provider.trim() : null;
|
|
40
|
-
}
|
|
41
|
-
function resolveCacheMode(options) {
|
|
42
|
-
return options?.cacheMode ?? types_js_1.DEFAULT_CACHE_MODE;
|
|
43
|
-
}
|
|
44
|
-
function resolveMaxCharacters(options) {
|
|
45
|
-
const candidate = options?.maxCharacters;
|
|
46
|
-
if (typeof candidate !== 'number' || !Number.isFinite(candidate) || candidate <= 0) {
|
|
47
|
-
return null;
|
|
48
|
-
}
|
|
49
|
-
if (candidate <= types_js_1.DEFAULT_MAX_CONTENT_CHARACTERS) {
|
|
50
|
-
return types_js_1.DEFAULT_MAX_CONTENT_CHARACTERS;
|
|
51
|
-
}
|
|
52
|
-
return Math.floor(candidate);
|
|
53
|
-
}
|
|
54
|
-
function resolveTimeoutMs(options) {
|
|
55
|
-
const candidate = options?.timeoutMs;
|
|
56
|
-
if (typeof candidate !== 'number' || !Number.isFinite(candidate) || candidate <= 0) {
|
|
57
|
-
return types_js_1.DEFAULT_TIMEOUT_MS;
|
|
58
|
-
}
|
|
59
|
-
return Math.floor(candidate);
|
|
60
|
-
}
|
|
61
|
-
function resolveFirecrawlMode(options) {
|
|
62
|
-
const candidate = options?.firecrawl;
|
|
63
|
-
if (candidate === 'off' || candidate === 'auto' || candidate === 'always') {
|
|
64
|
-
return candidate;
|
|
65
|
-
}
|
|
66
|
-
return 'auto';
|
|
67
|
-
}
|
|
68
|
-
function appendNote(existing, next) {
|
|
69
|
-
if (!next) {
|
|
70
|
-
return existing ?? '';
|
|
71
|
-
}
|
|
72
|
-
if (!existing || existing.length === 0) {
|
|
73
|
-
return next;
|
|
74
|
-
}
|
|
75
|
-
return `${existing}; ${next}`;
|
|
76
|
-
}
|
|
77
|
-
function safeHostname(rawUrl) {
|
|
78
|
-
try {
|
|
79
|
-
return new URL(rawUrl).hostname.replace(WWW_PREFIX_PATTERN, '');
|
|
80
|
-
}
|
|
81
|
-
catch {
|
|
82
|
-
return null;
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
function pickFirstText(candidates) {
|
|
86
|
-
for (const candidate of candidates) {
|
|
87
|
-
const normalized = (0, cleaner_js_1.normalizeCandidate)(candidate);
|
|
88
|
-
if (normalized) {
|
|
89
|
-
return normalized;
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
return null;
|
|
93
|
-
}
|
|
94
|
-
function selectBaseContent(sourceContent, transcriptText) {
|
|
95
|
-
if (!transcriptText) {
|
|
96
|
-
return sourceContent;
|
|
97
|
-
}
|
|
98
|
-
const normalizedTranscript = (0, cleaner_js_1.normalizeForPrompt)(transcriptText);
|
|
99
|
-
if (normalizedTranscript.length === 0) {
|
|
100
|
-
return sourceContent;
|
|
101
|
-
}
|
|
102
|
-
return `Transcript:\n${normalizedTranscript}`;
|
|
103
|
-
}
|
|
104
|
-
function summarizeTranscript(transcriptText) {
|
|
105
|
-
if (!transcriptText) {
|
|
106
|
-
return { transcriptCharacters: null, transcriptLines: null, transcriptWordCount: null };
|
|
107
|
-
}
|
|
108
|
-
const transcriptCharacters = transcriptText.length > 0 ? transcriptText.length : null;
|
|
109
|
-
const transcriptLinesRaw = transcriptText
|
|
110
|
-
.split(TRANSCRIPT_LINE_SPLIT_PATTERN)
|
|
111
|
-
.map((line) => line.trim())
|
|
112
|
-
.filter((line) => line.length > 0).length;
|
|
113
|
-
const transcriptLines = transcriptLinesRaw > 0 ? transcriptLinesRaw : null;
|
|
114
|
-
const transcriptWordCountRaw = transcriptText.length > 0
|
|
115
|
-
? transcriptText
|
|
116
|
-
.split(WORD_SPLIT_PATTERN)
|
|
117
|
-
.map((value) => value.trim())
|
|
118
|
-
.filter((value) => value.length > 0).length
|
|
119
|
-
: 0;
|
|
120
|
-
const transcriptWordCount = transcriptWordCountRaw > 0 ? transcriptWordCountRaw : null;
|
|
121
|
-
return { transcriptCharacters, transcriptLines, transcriptWordCount };
|
|
122
|
-
}
|
|
123
|
-
function ensureTranscriptDiagnostics(resolution, cacheMode) {
|
|
124
|
-
if (resolution.diagnostics) {
|
|
125
|
-
return resolution.diagnostics;
|
|
126
|
-
}
|
|
127
|
-
const hasText = typeof resolution.text === 'string' && resolution.text.length > 0;
|
|
128
|
-
const cacheStatus = cacheMode === 'bypass' ? 'bypassed' : hasText ? 'miss' : 'unknown';
|
|
129
|
-
return {
|
|
130
|
-
cacheMode,
|
|
131
|
-
cacheStatus,
|
|
132
|
-
textProvided: hasText,
|
|
133
|
-
provider: resolution.source,
|
|
134
|
-
attemptedProviders: resolution.source ? [resolution.source] : [],
|
|
135
|
-
notes: cacheMode === 'bypass' ? 'Cache bypass requested' : null,
|
|
136
|
-
};
|
|
137
|
-
}
|
|
138
|
-
function finalizeExtractedLinkContent({ url, baseContent, maxCharacters, title, description, siteName, transcriptResolution, video, isVideoOnly, diagnostics, }) {
|
|
139
|
-
const normalized = (0, cleaner_js_1.normalizeForPrompt)(baseContent);
|
|
140
|
-
const { content, truncated, totalCharacters, wordCount } = typeof maxCharacters === 'number'
|
|
141
|
-
? (0, cleaner_js_1.applyContentBudget)(normalized, maxCharacters)
|
|
142
|
-
: {
|
|
143
|
-
content: normalized,
|
|
144
|
-
truncated: false,
|
|
145
|
-
totalCharacters: normalized.length,
|
|
146
|
-
wordCount: normalized.length > 0
|
|
147
|
-
? normalized
|
|
148
|
-
.split(WORD_SPLIT_PATTERN)
|
|
149
|
-
.map((value) => value.trim())
|
|
150
|
-
.filter((value) => value.length > 0).length
|
|
151
|
-
: 0,
|
|
152
|
-
};
|
|
153
|
-
const { transcriptCharacters, transcriptLines, transcriptWordCount } = summarizeTranscript(transcriptResolution.text);
|
|
154
|
-
const transcriptionProvider = resolveTranscriptionProviderFromTranscriptMetadata(transcriptResolution.metadata);
|
|
155
|
-
const mediaDurationSeconds = resolveMediaDurationSecondsFromTranscriptMetadata(transcriptResolution.metadata);
|
|
156
|
-
return {
|
|
157
|
-
url,
|
|
158
|
-
title,
|
|
159
|
-
description,
|
|
160
|
-
siteName,
|
|
161
|
-
content,
|
|
162
|
-
truncated,
|
|
163
|
-
totalCharacters,
|
|
164
|
-
wordCount,
|
|
165
|
-
transcriptCharacters,
|
|
166
|
-
transcriptLines,
|
|
167
|
-
transcriptWordCount,
|
|
168
|
-
transcriptSource: transcriptResolution.source,
|
|
169
|
-
transcriptionProvider,
|
|
170
|
-
transcriptMetadata: transcriptResolution.metadata ?? null,
|
|
171
|
-
mediaDurationSeconds,
|
|
172
|
-
video,
|
|
173
|
-
isVideoOnly,
|
|
174
|
-
diagnostics,
|
|
175
|
-
};
|
|
176
|
-
}
|
|
177
|
-
//# sourceMappingURL=utils.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/utils.ts"],"names":[],"mappings":";;AA2CA,4CAEC;AAED,oDASC;AAED,4CAMC;AAED,oDAMC;AAED,gCAQC;AAED,oCAMC;AAED,sCAQC;AAED,8CASC;AAED,kDAmBC;AAED,kEAiBC;AAED,oEA0DC;AAlND,6CAAyF;AACzF,yCASmB;AAEnB,MAAM,kBAAkB,GAAG,SAAS,CAAA;AACpC,MAAM,6BAA6B,GAAG,OAAO,CAAA;AAC7C,MAAM,kBAAkB,GAAG,MAAM,CAAA;AAEjC,SAAS,iDAAiD,CACxD,QAAoD;IAEpD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC1B,MAAM,MAAM,GAAI,QAA0C,CAAC,eAAe,CAAA;IAC1E,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QACxE,OAAO,MAAM,CAAA;IACf,CAAC;IACD,MAAM,KAAK,GAAI,QAAgC,CAAC,KAAK,CAAA;IACrD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QAChD,MAAM,MAAM,GAAI,KAAuC,CAAC,eAAe,CAAA;QACvE,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;YACxE,OAAO,MAAM,CAAA;QACf,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,kDAAkD,CACzD,QAAoD;IAEpD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC1B,MAAM,QAAQ,GAAI,QAAgD,CAAC,qBAAqB,CAAA;IACxF,OAAO,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;AAC5F,CAAC;AAED,SAAgB,gBAAgB,CAAC,OAAiC;IAChE,OAAO,OAAO,EAAE,SAAS,IAAI,6BAAkB,CAAA;AACjD,CAAC;AAED,SAAgB,oBAAoB,CAAC,OAAiC;IACpE,MAAM,SAAS,GAAG,OAAO,EAAE,aAAa,CAAA;IACxC,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnF,OAAO,IAAI,CAAA;IACb,CAAC;IACD,IAAI,SAAS,IAAI,yCAA8B,EAAE,CAAC;QAChD,OAAO,yCAA8B,CAAA;IACvC,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;AAC9B,CAAC;AAED,SAAgB,gBAAgB,CAAC,OAAiC;IAChE,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,CAAA;IACpC,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnF,OAAO,6BAAkB,CAAA;IAC3B,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;AAC9B,CAAC;AAED,SAAgB,oBAAoB,CAAC,OAAiC;IACpE,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,CAAA;IACpC,IAAI,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,MAAM,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC1E,OAAO,SAAS,CAAA;IAClB,CAAC;IACD,OAAO,MAAM,CAAA;AACf,CAAC;AAED,SAAgB,UAAU,CAAC,QAAmC,EAAE,IAAY;IAC1E,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,QAAQ,IAAI,EAAE,CAAA;IACvB,CAAC;IACD,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvC,OAAO,IAAI,CAAA;IACb,CAAC;IACD,OAAO,GAAG,QAAQ,KAAK,IAAI,EAAE,CAAA;AAC/B,CAAC;AAED,SAAgB,YAAY,CAAC,MAAc;IACzC,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAA;IACjE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAgB,aAAa,CAAC,UAA4C;IACxE,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,UAAU,GAAG,IAAA,+BAAkB,EAAC,SAAS,CAAC,CAAA;QAChD,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAgB,iBAAiB,CAAC,aAAqB,EAAE,cAA6B;IACpF,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,aAAa,CAAA;IACtB,CAAC;IACD,MAAM,oBAAoB,GAAG,IAAA,+BAAkB,EAAC,cAAc,CAAC,CAAA;IAC/D,IAAI,oBAAoB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,aAAa,CAAA;IACtB,CAAC;IACD,OAAO,gBAAgB,oBAAoB,EAAE,CAAA;AAC/C,CAAC;AAED,SAAgB,mBAAmB,CAAC,cAA6B;IAC/D,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,EAAE,oBAAoB,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,CAAA;IACzF,CAAC;IACD,MAAM,oBAAoB,GAAG,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAA;IACrF,MAAM,kBAAkB,GAAG,cAAc;SACtC,KAAK,CAAC,6BAA6B,CAAC;SACpC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAA;IAC3C,MAAM,eAAe,GAAG,kBAAkB,GAAG,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,IAAI,CAAA;IAC1E,MAAM,sBAAsB,GAC1B,cAAc,CAAC,MAAM,GAAG,CAAC;QACvB,CAAC,CAAC,cAAc;aACX,KAAK,CAAC,kBAAkB,CAAC;aACzB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;aAC5B,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM;QAC/C,CAAC,CAAC,CAAC,CAAA;IACP,MAAM,mBAAmB,GAAG,sBAAsB,GAAG,CAAC,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,IAAI,CAAA;IACtF,OAAO,EAAE,oBAAoB,EAAE,eAAe,EAAE,mBAAmB,EAAE,CAAA;AACvE,CAAC;AAED,SAAgB,2BAA2B,CACzC,UAAgC,EAChC,SAAoB;IAEpB,IAAI,UAAU,CAAC,WAAW,EAAE,CAAC;QAC3B,OAAO,UAAU,CAAC,WAAW,CAAA;IAC/B,CAAC;IACD,MAAM,OAAO,GAAG,OAAO,UAAU,CAAC,IAAI,KAAK,QAAQ,IAAI,UAAU,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAA;IACjF,MAAM,WAAW,GAAG,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAA;IACtF,OAAO;QACL,SAAS;QACT,WAAW;QACX,YAAY,EAAE,OAAO;QACrB,QAAQ,EAAE,UAAU,CAAC,MAAM;QAC3B,kBAAkB,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;QAChE,KAAK,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,IAAI;KAChE,CAAA;AACH,CAAC;AAED,SAAgB,4BAA4B,CAAC,EAC3C,GAAG,EACH,WAAW,EACX,aAAa,EACb,KAAK,EACL,WAAW,EACX,QAAQ,EACR,oBAAoB,EACpB,KAAK,EACL,WAAW,EACX,WAAW,GACW;IACtB,MAAM,UAAU,GAAG,IAAA,+BAAkB,EAAC,WAAW,CAAC,CAAA;IAClD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,SAAS,EAAE,GACtD,OAAO,aAAa,KAAK,QAAQ;QAC/B,CAAC,CAAC,IAAA,+BAAkB,EAAC,UAAU,EAAE,aAAa,CAAC;QAC/C,CAAC,CAAC;YACE,OAAO,EAAE,UAAU;YACnB,SAAS,EAAE,KAAK;YAChB,eAAe,EAAE,UAAU,CAAC,MAAM;YAClC,SAAS,EACP,UAAU,CAAC,MAAM,GAAG,CAAC;gBACnB,CAAC,CAAC,UAAU;qBACP,KAAK,CAAC,kBAAkB,CAAC;qBACzB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;qBAC5B,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM;gBAC/C,CAAC,CAAC,CAAC;SACR,CAAA;IACP,MAAM,EAAE,oBAAoB,EAAE,eAAe,EAAE,mBAAmB,EAAE,GAAG,mBAAmB,CACxF,oBAAoB,CAAC,IAAI,CAC1B,CAAA;IACD,MAAM,qBAAqB,GAAG,kDAAkD,CAC9E,oBAAoB,CAAC,QAAQ,CAC9B,CAAA;IACD,MAAM,oBAAoB,GAAG,iDAAiD,CAC5E,oBAAoB,CAAC,QAAQ,CAC9B,CAAA;IAED,OAAO;QACL,GAAG;QACH,KAAK;QACL,WAAW;QACX,QAAQ;QACR,OAAO;QACP,SAAS;QACT,eAAe;QACf,SAAS;QACT,oBAAoB;QACpB,eAAe;QACf,mBAAmB;QACnB,gBAAgB,EAAE,oBAAoB,CAAC,MAAM;QAC7C,qBAAqB;QACrB,kBAAkB,EAAE,oBAAoB,CAAC,QAAQ,IAAI,IAAI;QACzD,oBAAoB;QACpB,KAAK;QACL,WAAW;QACX,WAAW;KACZ,CAAA;AACH,CAAC"}
|
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.detectPrimaryVideoFromHtml = detectPrimaryVideoFromHtml;
|
|
4
|
-
const cheerio_1 = require("cheerio");
|
|
5
|
-
const VIDEO_EXTENSIONS = new Set(['.mp4', '.webm', '.mov', '.m4v']);
|
|
6
|
-
function resolveAbsoluteUrl(candidate, baseUrl) {
|
|
7
|
-
const trimmed = candidate.trim();
|
|
8
|
-
if (trimmed.length === 0)
|
|
9
|
-
return null;
|
|
10
|
-
try {
|
|
11
|
-
return new URL(trimmed, baseUrl).toString();
|
|
12
|
-
}
|
|
13
|
-
catch {
|
|
14
|
-
return null;
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
function isDirectVideoUrl(url) {
|
|
18
|
-
try {
|
|
19
|
-
const parsed = new URL(url);
|
|
20
|
-
const lower = parsed.pathname.toLowerCase();
|
|
21
|
-
for (const ext of VIDEO_EXTENSIONS) {
|
|
22
|
-
if (lower.endsWith(ext))
|
|
23
|
-
return true;
|
|
24
|
-
}
|
|
25
|
-
return false;
|
|
26
|
-
}
|
|
27
|
-
catch {
|
|
28
|
-
return false;
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
function extractYouTubeVideoIdFromEmbedUrl(raw) {
|
|
32
|
-
try {
|
|
33
|
-
const u = new URL(raw);
|
|
34
|
-
const host = u.hostname.toLowerCase().replace(/^www\./, '');
|
|
35
|
-
if (host === 'youtube.com' || host.endsWith('.youtube.com')) {
|
|
36
|
-
const m = u.pathname.match(/\/embed\/([a-zA-Z0-9_-]{11})/);
|
|
37
|
-
return m?.[1] ?? null;
|
|
38
|
-
}
|
|
39
|
-
if (host === 'youtu.be') {
|
|
40
|
-
const id = u.pathname.replace(/^\//, '').trim();
|
|
41
|
-
return /^[a-zA-Z0-9_-]{11}$/.test(id) ? id : null;
|
|
42
|
-
}
|
|
43
|
-
return null;
|
|
44
|
-
}
|
|
45
|
-
catch {
|
|
46
|
-
return null;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
function metaContent($, selectors) {
|
|
50
|
-
for (const sel of selectors) {
|
|
51
|
-
const meta = $(`meta[${sel.attribute}="${sel.value}"]`).first();
|
|
52
|
-
if (meta.length === 0)
|
|
53
|
-
continue;
|
|
54
|
-
const value = (meta.attr('content') ?? meta.attr('value') ?? '').trim();
|
|
55
|
-
if (value)
|
|
56
|
-
return value;
|
|
57
|
-
}
|
|
58
|
-
return null;
|
|
59
|
-
}
|
|
60
|
-
function detectPrimaryVideoFromHtml(html, url) {
|
|
61
|
-
const $ = (0, cheerio_1.load)(html);
|
|
62
|
-
// 1) YouTube embeds (preferred, stable)
|
|
63
|
-
const iframeSrc = $('iframe[src*="youtube.com/embed/"], iframe[src*="youtu.be/"]').first().attr('src') ?? null;
|
|
64
|
-
if (iframeSrc) {
|
|
65
|
-
const resolved = resolveAbsoluteUrl(iframeSrc, url);
|
|
66
|
-
const videoId = resolved ? extractYouTubeVideoIdFromEmbedUrl(resolved) : null;
|
|
67
|
-
if (videoId) {
|
|
68
|
-
return { kind: 'youtube', url: `https://www.youtube.com/watch?v=${videoId}` };
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
// 2) OpenGraph video
|
|
72
|
-
const ogVideo = metaContent($, [
|
|
73
|
-
{ attribute: 'property', value: 'og:video' },
|
|
74
|
-
{ attribute: 'property', value: 'og:video:url' },
|
|
75
|
-
{ attribute: 'property', value: 'og:video:secure_url' },
|
|
76
|
-
{ attribute: 'name', value: 'og:video' },
|
|
77
|
-
{ attribute: 'name', value: 'og:video:url' },
|
|
78
|
-
{ attribute: 'name', value: 'og:video:secure_url' },
|
|
79
|
-
]);
|
|
80
|
-
if (ogVideo) {
|
|
81
|
-
const resolved = resolveAbsoluteUrl(ogVideo, url);
|
|
82
|
-
if (resolved && isDirectVideoUrl(resolved)) {
|
|
83
|
-
return { kind: 'direct', url: resolved };
|
|
84
|
-
}
|
|
85
|
-
const ytId = resolved ? extractYouTubeVideoIdFromEmbedUrl(resolved) : null;
|
|
86
|
-
if (ytId)
|
|
87
|
-
return { kind: 'youtube', url: `https://www.youtube.com/watch?v=${ytId}` };
|
|
88
|
-
}
|
|
89
|
-
// 3) <video> tags
|
|
90
|
-
const videoSrc = $('video[src]').first().attr('src') ?? $('video source[src]').first().attr('src') ?? null;
|
|
91
|
-
if (videoSrc) {
|
|
92
|
-
const resolved = resolveAbsoluteUrl(videoSrc, url);
|
|
93
|
-
if (resolved && isDirectVideoUrl(resolved)) {
|
|
94
|
-
return { kind: 'direct', url: resolved };
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
return null;
|
|
98
|
-
}
|
|
99
|
-
//# sourceMappingURL=video.js.map
|