@steipete/summarize-core 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/esm/content/index.js +5 -0
- package/dist/esm/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/client.js +28 -0
- package/dist/esm/content/link-preview/client.js.map +1 -0
- package/dist/esm/content/link-preview/content/article.js +155 -0
- package/dist/esm/content/link-preview/content/article.js.map +1 -0
- package/dist/esm/content/link-preview/content/cleaner.js +55 -0
- package/dist/esm/content/link-preview/content/cleaner.js.map +1 -0
- package/dist/esm/content/link-preview/content/constants.js +7 -0
- package/dist/esm/content/link-preview/content/constants.js.map +1 -0
- package/dist/esm/content/link-preview/content/fetcher.js +124 -0
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -0
- package/dist/esm/content/link-preview/content/firecrawl.js +86 -0
- package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -0
- package/dist/esm/content/link-preview/content/html.js +162 -0
- package/dist/esm/content/link-preview/content/html.js.map +1 -0
- package/dist/esm/content/link-preview/content/index.js +345 -0
- package/dist/esm/content/link-preview/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/content/jsonld.js +77 -0
- package/dist/esm/content/link-preview/content/jsonld.js.map +1 -0
- package/dist/esm/content/link-preview/content/parsers.js +77 -0
- package/dist/esm/content/link-preview/content/parsers.js.map +1 -0
- package/dist/esm/content/link-preview/content/podcast-utils.js +79 -0
- package/dist/esm/content/link-preview/content/podcast-utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/readability.js +53 -0
- package/dist/esm/content/link-preview/content/readability.js.map +1 -0
- package/dist/esm/content/link-preview/content/twitter-utils.js +68 -0
- package/dist/esm/content/link-preview/content/twitter-utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/types.js +4 -0
- package/dist/esm/content/link-preview/content/types.js.map +1 -0
- package/dist/esm/content/link-preview/content/utils.js +164 -0
- package/dist/esm/content/link-preview/content/utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/video.js +96 -0
- package/dist/esm/content/link-preview/content/video.js.map +1 -0
- package/dist/esm/content/link-preview/content/youtube.js +82 -0
- package/dist/esm/content/link-preview/content/youtube.js.map +1 -0
- package/dist/esm/content/link-preview/deps.js +20 -0
- package/dist/esm/content/link-preview/deps.js.map +1 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js +35 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -0
- package/dist/esm/content/link-preview/types.js +2 -0
- package/dist/esm/content/link-preview/types.js.map +1 -0
- package/dist/esm/content/transcript/cache.js +79 -0
- package/dist/esm/content/transcript/cache.js.map +1 -0
- package/dist/esm/content/transcript/index.js +130 -0
- package/dist/esm/content/transcript/index.js.map +1 -0
- package/dist/esm/content/transcript/normalize.js +43 -0
- package/dist/esm/content/transcript/normalize.js.map +1 -0
- package/dist/esm/content/transcript/providers/generic.js +11 -0
- package/dist/esm/content/transcript/providers/generic.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js +222 -0
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/apple.js +38 -0
- package/dist/esm/content/transcript/providers/podcast/apple.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/constants.js +8 -0
- package/dist/esm/content/transcript/providers/podcast/constants.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/flow-context.js +2 -0
- package/dist/esm/content/transcript/providers/podcast/flow-context.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/itunes.js +134 -0
- package/dist/esm/content/transcript/providers/podcast/itunes.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/json.js +34 -0
- package/dist/esm/content/transcript/providers/podcast/json.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/media.js +345 -0
- package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/results.js +28 -0
- package/dist/esm/content/transcript/providers/podcast/results.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss.js +253 -0
- package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js +218 -0
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/spotify.js +113 -0
- package/dist/esm/content/transcript/providers/podcast/spotify.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast.js +222 -0
- package/dist/esm/content/transcript/providers/podcast.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/api.js +257 -0
- package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/apify.js +55 -0
- package/dist/esm/content/transcript/providers/youtube/apify.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions.js +409 -0
- package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +166 -0
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube.js +173 -0
- package/dist/esm/content/transcript/providers/youtube.js.map +1 -0
- package/dist/esm/content/transcript/types.js +2 -0
- package/dist/esm/content/transcript/types.js.map +1 -0
- package/dist/esm/content/transcript/utils.js +259 -0
- package/dist/esm/content/transcript/utils.js.map +1 -0
- package/dist/esm/index.js +4 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/language.js +126 -0
- package/dist/esm/language.js.map +1 -0
- package/dist/esm/prompts/cli.js +20 -0
- package/dist/esm/prompts/cli.js.map +1 -0
- package/dist/esm/prompts/file.js +48 -0
- package/dist/esm/prompts/file.js.map +1 -0
- package/dist/esm/prompts/index.js +4 -0
- package/dist/esm/prompts/index.js.map +1 -0
- package/dist/esm/prompts/link-summary.js +116 -0
- package/dist/esm/prompts/link-summary.js.map +1 -0
- package/dist/esm/shared/contracts.js +2 -0
- package/dist/esm/shared/contracts.js.map +1 -0
- package/dist/esm/transcription/whisper/constants.js +8 -0
- package/dist/esm/transcription/whisper/constants.js.map +1 -0
- package/dist/esm/transcription/whisper/core.js +303 -0
- package/dist/esm/transcription/whisper/core.js.map +1 -0
- package/dist/esm/transcription/whisper/fal.js +41 -0
- package/dist/esm/transcription/whisper/fal.js.map +1 -0
- package/dist/esm/transcription/whisper/ffmpeg.js +179 -0
- package/dist/esm/transcription/whisper/ffmpeg.js.map +1 -0
- package/dist/esm/transcription/whisper/openai.js +47 -0
- package/dist/esm/transcription/whisper/openai.js.map +1 -0
- package/dist/esm/transcription/whisper/types.js +2 -0
- package/dist/esm/transcription/whisper/types.js.map +1 -0
- package/dist/esm/transcription/whisper/utils.js +63 -0
- package/dist/esm/transcription/whisper/utils.js.map +1 -0
- package/dist/esm/transcription/whisper/whisper-cpp.js +227 -0
- package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -0
- package/dist/esm/transcription/whisper.js +5 -0
- package/dist/esm/transcription/whisper.js.map +1 -0
- package/dist/types/content/index.d.ts +5 -0
- package/dist/types/content/link-preview/client.d.ts +18 -0
- package/dist/types/content/link-preview/content/article.d.ts +4 -0
- package/dist/types/content/link-preview/content/cleaner.d.ts +12 -0
- package/dist/types/content/link-preview/content/constants.d.ts +6 -0
- package/dist/types/content/link-preview/content/fetcher.d.ts +16 -0
- package/dist/types/content/link-preview/content/firecrawl.d.ts +14 -0
- package/dist/types/content/link-preview/content/html.d.ts +17 -0
- package/dist/types/content/link-preview/content/index.d.ts +4 -0
- package/dist/types/content/link-preview/content/jsonld.d.ts +6 -0
- package/dist/types/content/link-preview/content/parsers.d.ts +7 -0
- package/dist/types/content/link-preview/content/podcast-utils.d.ts +7 -0
- package/dist/types/content/link-preview/content/readability.d.ts +8 -0
- package/dist/types/content/link-preview/content/twitter-utils.d.ts +4 -0
- package/dist/types/content/link-preview/content/types.d.ts +61 -0
- package/dist/types/content/link-preview/content/utils.d.ts +17 -0
- package/dist/types/content/link-preview/content/video.d.ts +5 -0
- package/dist/types/content/link-preview/content/youtube.d.ts +1 -0
- package/dist/types/content/link-preview/deps.d.ts +167 -0
- package/dist/types/content/link-preview/fetch-with-timeout.d.ts +4 -0
- package/dist/types/content/link-preview/types.d.ts +37 -0
- package/dist/types/content/transcript/cache.d.ts +29 -0
- package/dist/types/content/transcript/index.d.ts +9 -0
- package/dist/types/content/transcript/normalize.d.ts +3 -0
- package/dist/types/content/transcript/providers/generic.d.ts +3 -0
- package/dist/types/content/transcript/providers/podcast/apple-flow.d.ts +4 -0
- package/dist/types/content/transcript/providers/podcast/apple.d.ts +6 -0
- package/dist/types/content/transcript/providers/podcast/constants.d.ts +7 -0
- package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +11 -0
- package/dist/types/content/transcript/providers/podcast/itunes.d.ts +17 -0
- package/dist/types/content/transcript/providers/podcast/json.d.ts +8 -0
- package/dist/types/content/transcript/providers/podcast/media.d.ts +42 -0
- package/dist/types/content/transcript/providers/podcast/results.d.ts +10 -0
- package/dist/types/content/transcript/providers/podcast/rss.d.ts +22 -0
- package/dist/types/content/transcript/providers/podcast/spotify-flow.d.ts +3 -0
- package/dist/types/content/transcript/providers/podcast/spotify.d.ts +24 -0
- package/dist/types/content/transcript/providers/podcast.d.ts +20 -0
- package/dist/types/content/transcript/providers/youtube/api.d.ts +26 -0
- package/dist/types/content/transcript/providers/youtube/apify.d.ts +1 -0
- package/dist/types/content/transcript/providers/youtube/captions.d.ts +7 -0
- package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +17 -0
- package/dist/types/content/transcript/providers/youtube.d.ts +3 -0
- package/dist/types/content/transcript/types.d.ts +30 -0
- package/dist/types/content/transcript/utils.d.ts +8 -0
- package/dist/types/index.d.ts +4 -0
- package/dist/types/language.d.ts +25 -0
- package/dist/types/prompts/cli.d.ts +10 -0
- package/dist/types/prompts/file.d.ts +17 -0
- package/dist/types/prompts/index.d.ts +4 -0
- package/dist/types/prompts/link-summary.d.ts +29 -0
- package/dist/types/shared/contracts.d.ts +2 -0
- package/dist/types/transcription/whisper/constants.d.ts +7 -0
- package/dist/types/transcription/whisper/core.d.ts +20 -0
- package/dist/types/transcription/whisper/fal.d.ts +1 -0
- package/dist/types/transcription/whisper/ffmpeg.d.ts +16 -0
- package/dist/types/transcription/whisper/openai.d.ts +2 -0
- package/dist/types/transcription/whisper/types.d.ts +17 -0
- package/dist/types/transcription/whisper/utils.d.ts +5 -0
- package/dist/types/transcription/whisper/whisper-cpp.d.ts +9 -0
- package/dist/types/transcription/whisper.d.ts +5 -0
- package/package.json +54 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { load } from 'cheerio';
|
|
2
|
+
import { decodeHtmlEntities, normalizeCandidate } from './cleaner.js';
|
|
3
|
+
import { pickFirstText, safeHostname } from './utils.js';
|
|
4
|
+
const ALLOWED_TEXT_TAGS = new Set(['title']);
|
|
5
|
+
export function extractMetadataFromHtml(html, url) {
|
|
6
|
+
const $ = load(html);
|
|
7
|
+
const title = pickFirstText([
|
|
8
|
+
pickMetaContent($, [
|
|
9
|
+
{ attribute: 'property', value: 'og:title' },
|
|
10
|
+
{ attribute: 'name', value: 'og:title' },
|
|
11
|
+
{ attribute: 'name', value: 'twitter:title' },
|
|
12
|
+
]),
|
|
13
|
+
extractTagText($, 'title'),
|
|
14
|
+
]);
|
|
15
|
+
const description = pickFirstText([
|
|
16
|
+
pickMetaContent($, [
|
|
17
|
+
{ attribute: 'property', value: 'og:description' },
|
|
18
|
+
{ attribute: 'name', value: 'description' },
|
|
19
|
+
{ attribute: 'name', value: 'twitter:description' },
|
|
20
|
+
]),
|
|
21
|
+
]);
|
|
22
|
+
const siteName = pickFirstText([
|
|
23
|
+
pickMetaContent($, [
|
|
24
|
+
{ attribute: 'property', value: 'og:site_name' },
|
|
25
|
+
{ attribute: 'name', value: 'application-name' },
|
|
26
|
+
]),
|
|
27
|
+
safeHostname(url),
|
|
28
|
+
]);
|
|
29
|
+
return { title, description, siteName };
|
|
30
|
+
}
|
|
31
|
+
export function extractMetadataFromFirecrawl(metadata) {
|
|
32
|
+
return {
|
|
33
|
+
title: pickFirstText([metadataString(metadata, 'title'), metadataString(metadata, 'ogTitle')]),
|
|
34
|
+
description: pickFirstText([
|
|
35
|
+
metadataString(metadata, 'description'),
|
|
36
|
+
metadataString(metadata, 'ogDescription'),
|
|
37
|
+
]),
|
|
38
|
+
siteName: pickFirstText([
|
|
39
|
+
metadataString(metadata, 'siteName'),
|
|
40
|
+
metadataString(metadata, 'ogSiteName'),
|
|
41
|
+
]),
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
function pickMetaContent($, selectors) {
|
|
45
|
+
for (const selector of selectors) {
|
|
46
|
+
const meta = $(`meta[${selector.attribute}="${selector.value}"]`).first();
|
|
47
|
+
if (meta.length === 0) {
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
const value = meta.attr('content') ?? meta.attr('value') ?? '';
|
|
51
|
+
const normalized = normalizeCandidate(decodeHtmlEntities(value));
|
|
52
|
+
if (normalized) {
|
|
53
|
+
return normalized;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
function extractTagText($, tagName) {
|
|
59
|
+
const normalizedTag = tagName.trim().toLowerCase();
|
|
60
|
+
if (!ALLOWED_TEXT_TAGS.has(normalizedTag)) {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
const element = $(normalizedTag).first();
|
|
64
|
+
if (element.length === 0) {
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
const text = decodeHtmlEntities(element.text());
|
|
68
|
+
return normalizeCandidate(text);
|
|
69
|
+
}
|
|
70
|
+
function metadataString(metadata, key) {
|
|
71
|
+
if (!metadata) {
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
const value = metadata[key];
|
|
75
|
+
return typeof value === 'string' ? normalizeCandidate(value) : null;
|
|
76
|
+
}
|
|
77
|
+
//# sourceMappingURL=parsers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parsers.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/parsers.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,IAAI,EAAE,MAAM,SAAS,CAAA;AAE/C,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAA;AACrE,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAExD,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAA;AAa5C,MAAM,UAAU,uBAAuB,CAAC,IAAY,EAAE,GAAW;IAC/D,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAA;IAEpB,MAAM,KAAK,GAAG,aAAa,CAAC;QAC1B,eAAe,CAAC,CAAC,EAAE;YACjB,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,UAAU,EAAE;YAC5C,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE;YACxC,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,eAAe,EAAE;SAC9C,CAAC;QACF,cAAc,CAAC,CAAC,EAAE,OAAO,CAAC;KAC3B,CAAC,CAAA;IAEF,MAAM,WAAW,GAAG,aAAa,CAAC;QAChC,eAAe,CAAC,CAAC,EAAE;YACjB,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,gBAAgB,EAAE;YAClD,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE;YAC3C,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,qBAAqB,EAAE;SACpD,CAAC;KACH,CAAC,CAAA;IAEF,MAAM,QAAQ,GAAG,aAAa,CAAC;QAC7B,eAAe,CAAC,CAAC,EAAE;YACjB,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,cAAc,EAAE;YAChD,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,kBAAkB,EAAE;SACjD,CAAC;QACF,YAAY,CAAC,GAAG,CAAC;KAClB,CAAC,CAAA;IAEF,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAA;AACzC,CAAC;AAED,MAAM,UAAU,4BAA4B,CAC1C,QAAoD;IAEpD,OAAO;QACL,KAAK,EAAE,aAAa,CAAC,CAAC,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC;QAC9F,WAAW,EAAE,aAAa,CAAC;YACzB,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC;YACvC,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC;SAC1C,CAAC;QACF,QAAQ,EAAE,aAAa,CAAC;YACtB,cAAc,CAAC,QAAQ,EAAE,UAAU,CAAC;YACpC,cAAc,CAAC,QAAQ,EAAE,YAAY,CAAC;SACvC,CAAC;KACH,CAAA;AACH,CAAC;AAED,SAAS,eAAe,CAAC,CAAa,EAAE,SAAyB;IAC/D,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,QAAQ,CAAC,SAAS,KAAK,QAAQ,CAAC,KAAK,IAAI,CAAC,CAAC,KAAK,EAAE,CAAA;QACzE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,SAAQ;QACV,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;QAC9D,MAAM,UAAU,GAAG,kBAAkB,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC,CAAA;QAChE,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,cAAc,CAAC,CAAa,EAAE,OAAe;IACpD,MAAM,aAAa,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAClD,IAAI,CAAC,iBAAiB,CAAC,GAAG,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1C,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,OAAO,GAAG,CAAC,CAAC,aAAa,CAAC,CAAC,KAAK,EAAE,CAAA;IACxC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,IAAI,GAAG,kBAAkB,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAA;IAC/C,OAAO,kBAAkB,CAAC,IAAI,CAAC,CAAA;AACjC,CAAC;AAED,SAAS,cAAc,CACrB,QAAoD,EACpD,GAAW;IAEX,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;IAC3B,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;AACrE,CAAC"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
const PODCAST_HOST_SUFFIXES = [
|
|
2
|
+
'spotify.com',
|
|
3
|
+
'podcasts.apple.com',
|
|
4
|
+
'podchaser.com',
|
|
5
|
+
'podbean.com',
|
|
6
|
+
'buzzsprout.com',
|
|
7
|
+
'spreaker.com',
|
|
8
|
+
'simplecast.com',
|
|
9
|
+
'rss.com',
|
|
10
|
+
'libsyn.com',
|
|
11
|
+
'omny.fm',
|
|
12
|
+
'acast.com',
|
|
13
|
+
'transistor.fm',
|
|
14
|
+
'captivate.fm',
|
|
15
|
+
'soundcloud.com',
|
|
16
|
+
'ivoox.com',
|
|
17
|
+
'iheart.com',
|
|
18
|
+
'megaphone.fm',
|
|
19
|
+
'pca.st',
|
|
20
|
+
'player.fm',
|
|
21
|
+
'castbox.fm',
|
|
22
|
+
];
|
|
23
|
+
export function extractSpotifyEpisodeId(url) {
|
|
24
|
+
try {
|
|
25
|
+
const parsed = new URL(url);
|
|
26
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
27
|
+
if (!host.endsWith('spotify.com'))
|
|
28
|
+
return null;
|
|
29
|
+
const parts = parsed.pathname.split('/').filter(Boolean);
|
|
30
|
+
const idx = parts.indexOf('episode');
|
|
31
|
+
const id = idx >= 0 ? parts[idx + 1] : null;
|
|
32
|
+
return id && /^[A-Za-z0-9]+$/.test(id) ? id : null;
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
export function extractApplePodcastIds(url) {
|
|
39
|
+
try {
|
|
40
|
+
const parsed = new URL(url);
|
|
41
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
42
|
+
if (host !== 'podcasts.apple.com')
|
|
43
|
+
return null;
|
|
44
|
+
const showId = parsed.pathname.match(/\/id(\d+)(?:\/|$)/)?.[1] ?? null;
|
|
45
|
+
if (!showId)
|
|
46
|
+
return null;
|
|
47
|
+
const episodeIdRaw = parsed.searchParams.get('i');
|
|
48
|
+
const episodeId = episodeIdRaw && /^\d+$/.test(episodeIdRaw) ? episodeIdRaw : null;
|
|
49
|
+
return { showId, episodeId };
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
export function isPodcastLikeJsonLdType(type) {
|
|
56
|
+
if (!type)
|
|
57
|
+
return false;
|
|
58
|
+
const normalized = type.toLowerCase();
|
|
59
|
+
if (normalized.includes('podcast'))
|
|
60
|
+
return true;
|
|
61
|
+
return (normalized === 'audioobject' ||
|
|
62
|
+
normalized === 'episode' ||
|
|
63
|
+
normalized === 'radioepisode' ||
|
|
64
|
+
normalized === 'musicrecording');
|
|
65
|
+
}
|
|
66
|
+
export function isPodcastHost(url) {
|
|
67
|
+
try {
|
|
68
|
+
const parsed = new URL(url);
|
|
69
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
70
|
+
if (host.startsWith('music.amazon.') && parsed.pathname.includes('/podcasts/')) {
|
|
71
|
+
return true;
|
|
72
|
+
}
|
|
73
|
+
return PODCAST_HOST_SUFFIXES.some((suffix) => host === suffix || host.endsWith(`.${suffix}`));
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
return false;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=podcast-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"podcast-utils.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/podcast-utils.ts"],"names":[],"mappings":"AAAA,MAAM,qBAAqB,GAAG;IAC5B,aAAa;IACb,oBAAoB;IACpB,eAAe;IACf,aAAa;IACb,gBAAgB;IAChB,cAAc;IACd,gBAAgB;IAChB,SAAS;IACT,YAAY;IACZ,SAAS;IACT,WAAW;IACX,eAAe;IACf,cAAc;IACd,gBAAgB;IAChB,WAAW;IACX,YAAY;IACZ,cAAc;IACd,QAAQ;IACR,WAAW;IACX,YAAY;CACb,CAAA;AAED,MAAM,UAAU,uBAAuB,CAAC,GAAW;IACjD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC;YAAE,OAAO,IAAI,CAAA;QAC9C,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QACxD,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAA;QACpC,MAAM,EAAE,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;QAC3C,OAAO,EAAE,IAAI,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;IACpD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,MAAM,UAAU,sBAAsB,CACpC,GAAW;IAEX,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,IAAI,KAAK,oBAAoB;YAAE,OAAO,IAAI,CAAA;QAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAA;QACtE,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAA;QACxB,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;QACjD,MAAM,SAAS,GAAG,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAA;QAClF,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,CAAA;IAC9B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,IAA+B;IACrE,IAAI,CAAC,IAAI;QAAE,OAAO,KAAK,CAAA;IACvB,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAA;IACrC,IAAI,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC;QAAE,OAAO,IAAI,CAAA;IAC/C,OAAO,CACL,UAAU,KAAK,aAAa;QAC5B,UAAU,KAAK,SAAS;QACxB,UAAU,KAAK,cAAc;QAC7B,UAAU,KAAK,gBAAgB,CAChC,CAAA;AACH,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,IAAI,CAAC,UAAU,CAAC,eAAe,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAA;QACb,CAAC;QACD,OAAO,qBAAqB,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,KAAK,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,MAAM,EAAE,CAAC,CAAC,CAAA;IAC/F,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAA;IACd,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
export async function extractReadabilityFromHtml(html, url) {
|
|
2
|
+
try {
|
|
3
|
+
const cleanedHtml = stripCssFromHtml(html);
|
|
4
|
+
const { Readability } = await import('@mozilla/readability');
|
|
5
|
+
const { JSDOM, VirtualConsole } = await import('jsdom');
|
|
6
|
+
const virtualConsole = new VirtualConsole();
|
|
7
|
+
virtualConsole.on('jsdomError', (err) => {
|
|
8
|
+
const message = err && typeof err === 'object' && 'message' in err
|
|
9
|
+
? String(err.message ?? '')
|
|
10
|
+
: '';
|
|
11
|
+
if (message.includes('Could not parse CSS stylesheet'))
|
|
12
|
+
return;
|
|
13
|
+
console.error(err);
|
|
14
|
+
});
|
|
15
|
+
const dom = new JSDOM(cleanedHtml, { ...(url ? { url } : undefined), virtualConsole });
|
|
16
|
+
const reader = new Readability(dom.window.document);
|
|
17
|
+
const article = reader.parse();
|
|
18
|
+
if (!article)
|
|
19
|
+
return null;
|
|
20
|
+
const text = (article.textContent ?? '').replace(/\s+/g, ' ').trim();
|
|
21
|
+
return {
|
|
22
|
+
text,
|
|
23
|
+
html: article.content ?? null,
|
|
24
|
+
title: article.title ?? null,
|
|
25
|
+
excerpt: article.excerpt ?? null,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
export function toReadabilityHtml(result) {
|
|
33
|
+
if (!result)
|
|
34
|
+
return null;
|
|
35
|
+
if (result.html)
|
|
36
|
+
return result.html;
|
|
37
|
+
if (!result.text)
|
|
38
|
+
return null;
|
|
39
|
+
return `<article><p>${escapeHtml(result.text)}</p></article>`;
|
|
40
|
+
}
|
|
41
|
+
function escapeHtml(input) {
|
|
42
|
+
return input
|
|
43
|
+
.replaceAll('&', '&')
|
|
44
|
+
.replaceAll('<', '<')
|
|
45
|
+
.replaceAll('>', '>')
|
|
46
|
+
.replaceAll('"', '"')
|
|
47
|
+
.replaceAll("'", ''');
|
|
48
|
+
}
|
|
49
|
+
function stripCssFromHtml(html) {
|
|
50
|
+
// Readability doesn't need CSS; jsdom's CSS parsing can be extremely slow on some pages.
|
|
51
|
+
return html.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, '');
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=readability.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"readability.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/readability.ts"],"names":[],"mappings":"AAOA,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAC9C,IAAY,EACZ,GAAY;IAEZ,IAAI,CAAC;QACH,MAAM,WAAW,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAA;QAC1C,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAA;QAC5D,MAAM,EAAE,KAAK,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAA;QACvD,MAAM,cAAc,GAAG,IAAI,cAAc,EAAE,CAAA;QAC3C,cAAc,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,GAAG,EAAE,EAAE;YACtC,MAAM,OAAO,GACX,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,SAAS,IAAI,GAAG;gBAChD,CAAC,CAAC,MAAM,CAAE,GAA6B,CAAC,OAAO,IAAI,EAAE,CAAC;gBACtD,CAAC,CAAC,EAAE,CAAA;YACR,IAAI,OAAO,CAAC,QAAQ,CAAC,gCAAgC,CAAC;gBAAE,OAAM;YAC9D,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QACpB,CAAC,CAAC,CAAA;QAEF,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,EAAE,cAAc,EAAE,CAAC,CAAA;QACtF,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;QACnD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAA;QAC9B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAA;QAEzB,MAAM,IAAI,GAAG,CAAC,OAAO,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;QACpE,OAAO;YACL,IAAI;YACJ,IAAI,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI;YAC7B,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,IAAI;YAC5B,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI;SACjC,CAAA;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,MAAgC;IAChE,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,IAAI,MAAM,CAAC,IAAI;QAAE,OAAO,MAAM,CAAC,IAAI,CAAA;IACnC,IAAI,CAAC,MAAM,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IAC7B,OAAO,eAAe,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAA;AAC/D,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,OAAO,KAAK;SACT,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC;SACxB,UAAU,CAAC,GAAG,EAAE,MAAM,CAAC;SACvB,UAAU,CAAC,GAAG,EAAE,MAAM,CAAC;SACvB,UAAU,CAAC,GAAG,EAAE,QAAQ,CAAC;SACzB,UAAU,CAAC,GAAG,EAAE,OAAO,CAAC,CAAA;AAC7B,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,yFAAyF;IACzF,OAAO,IAAI,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAA;AAC9D,CAAC"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
const TWITTER_HOSTS = new Set(['x.com', 'twitter.com', 'mobile.twitter.com']);
|
|
2
|
+
const NITTER_HOSTS = [
|
|
3
|
+
'nitter.net',
|
|
4
|
+
'nitter.poast.org',
|
|
5
|
+
'nitter.catsarch.com',
|
|
6
|
+
'nitter.privacydev.net',
|
|
7
|
+
'nitter.1d4.us',
|
|
8
|
+
];
|
|
9
|
+
const TWITTER_BLOCKED_TEXT_PATTERN = /something went wrong|try again|privacy related extensions|please disable them and try again/i;
|
|
10
|
+
const ANUBIS_TOKENS = ['anubis', 'proof-of-work', 'proof of work', 'hashcash', 'jshelter'];
|
|
11
|
+
export function isTwitterStatusUrl(url) {
|
|
12
|
+
try {
|
|
13
|
+
const parsed = new URL(url);
|
|
14
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
15
|
+
if (!TWITTER_HOSTS.has(host))
|
|
16
|
+
return false;
|
|
17
|
+
return /\/status\/\d+/.test(parsed.pathname);
|
|
18
|
+
}
|
|
19
|
+
catch {
|
|
20
|
+
return false;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
function rotateHosts(values, seed) {
|
|
24
|
+
if (values.length <= 1)
|
|
25
|
+
return values.slice();
|
|
26
|
+
const offset = Math.abs(seed) % values.length;
|
|
27
|
+
return values.slice(offset).concat(values.slice(0, offset));
|
|
28
|
+
}
|
|
29
|
+
function hashSeed(input) {
|
|
30
|
+
let hash = 0;
|
|
31
|
+
for (let i = 0; i < input.length; i += 1) {
|
|
32
|
+
hash = (hash * 31 + input.charCodeAt(i)) | 0;
|
|
33
|
+
}
|
|
34
|
+
return hash;
|
|
35
|
+
}
|
|
36
|
+
export function toNitterUrls(url) {
|
|
37
|
+
try {
|
|
38
|
+
const parsed = new URL(url);
|
|
39
|
+
const host = parsed.hostname.toLowerCase().replace(/^www\./, '');
|
|
40
|
+
if (!TWITTER_HOSTS.has(host))
|
|
41
|
+
return [];
|
|
42
|
+
const seed = hashSeed(`${parsed.pathname}${parsed.search}`);
|
|
43
|
+
const rotated = rotateHosts(NITTER_HOSTS, seed);
|
|
44
|
+
return rotated.map((nitterHost) => {
|
|
45
|
+
const copy = new URL(parsed.toString());
|
|
46
|
+
copy.hostname = nitterHost;
|
|
47
|
+
copy.protocol = 'https:';
|
|
48
|
+
return copy.toString();
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
return [];
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
export function isBlockedTwitterContent(content) {
|
|
56
|
+
if (!content)
|
|
57
|
+
return false;
|
|
58
|
+
return TWITTER_BLOCKED_TEXT_PATTERN.test(content);
|
|
59
|
+
}
|
|
60
|
+
export function isAnubisHtml(html) {
|
|
61
|
+
if (!html)
|
|
62
|
+
return false;
|
|
63
|
+
const lower = html.toLowerCase();
|
|
64
|
+
if (!lower.includes('anubis'))
|
|
65
|
+
return false;
|
|
66
|
+
return ANUBIS_TOKENS.some((token) => lower.includes(token));
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=twitter-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"twitter-utils.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/twitter-utils.ts"],"names":[],"mappings":"AAAA,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,aAAa,EAAE,oBAAoB,CAAC,CAAC,CAAA;AAC7E,MAAM,YAAY,GAAG;IACnB,YAAY;IACZ,kBAAkB;IAClB,qBAAqB;IACrB,uBAAuB;IACvB,eAAe;CAChB,CAAA;AACD,MAAM,4BAA4B,GAChC,8FAA8F,CAAA;AAChG,MAAM,aAAa,GAAG,CAAC,QAAQ,EAAE,eAAe,EAAE,eAAe,EAAE,UAAU,EAAE,UAAU,CAAC,CAAA;AAE1F,MAAM,UAAU,kBAAkB,CAAC,GAAW;IAC5C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAA;QAC1C,OAAO,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAA;IACd,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAI,MAAW,EAAE,IAAY;IAC/C,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC,KAAK,EAAE,CAAA;IAC7C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IAC7C,OAAO,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAA;AAC7D,CAAC;AAED,SAAS,QAAQ,CAAC,KAAa;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,IAAI,GAAG,CAAC,IAAI,GAAG,EAAE,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IAC9C,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,GAAW;IACtC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAChE,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,OAAO,EAAE,CAAA;QACvC,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,MAAM,CAAC,QAAQ,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,CAAA;QAC3D,MAAM,OAAO,GAAG,WAAW,CAAC,YAAY,EAAE,IAAI,CAAC,CAAA;QAC/C,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,UAAU,EAAE,EAAE;YAChC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAA;YACvC,IAAI,CAAC,QAAQ,GAAG,UAAU,CAAA;YAC1B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAA;YACxB,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAA;QACxB,CAAC,CAAC,CAAA;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,OAAe;IACrD,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAA;IAC1B,OAAO,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;AACnD,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,IAAI,CAAC,IAAI;QAAE,OAAO,KAAK,CAAA;IACvB,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAA;IAChC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAAE,OAAO,KAAK,CAAA;IAC3C,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAA;AAC7D,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/types.ts"],"names":[],"mappings":"AAOA,MAAM,CAAC,MAAM,kBAAkB,GAAG,OAAO,CAAA;AACzC,MAAM,CAAC,MAAM,8BAA8B,GAAG,IAAI,CAAA;AAClD,MAAM,CAAC,MAAM,kBAAkB,GAAc,SAAS,CAAA"}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import { applyContentBudget, normalizeCandidate, normalizeForPrompt } from './cleaner.js';
|
|
2
|
+
import { DEFAULT_CACHE_MODE, DEFAULT_MAX_CONTENT_CHARACTERS, DEFAULT_TIMEOUT_MS, } from './types.js';
|
|
3
|
+
const WWW_PREFIX_PATTERN = /^www\./i;
|
|
4
|
+
const TRANSCRIPT_LINE_SPLIT_PATTERN = /\r?\n/;
|
|
5
|
+
const WORD_SPLIT_PATTERN = /\s+/g;
|
|
6
|
+
function resolveMediaDurationSecondsFromTranscriptMetadata(metadata) {
|
|
7
|
+
if (!metadata)
|
|
8
|
+
return null;
|
|
9
|
+
const direct = metadata.durationSeconds;
|
|
10
|
+
if (typeof direct === 'number' && Number.isFinite(direct) && direct > 0) {
|
|
11
|
+
return direct;
|
|
12
|
+
}
|
|
13
|
+
const media = metadata.media;
|
|
14
|
+
if (typeof media === 'object' && media !== null) {
|
|
15
|
+
const nested = media.durationSeconds;
|
|
16
|
+
if (typeof nested === 'number' && Number.isFinite(nested) && nested > 0) {
|
|
17
|
+
return nested;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
function resolveTranscriptionProviderFromTranscriptMetadata(metadata) {
|
|
23
|
+
if (!metadata)
|
|
24
|
+
return null;
|
|
25
|
+
const provider = metadata.transcriptionProvider;
|
|
26
|
+
return typeof provider === 'string' && provider.trim().length > 0 ? provider.trim() : null;
|
|
27
|
+
}
|
|
28
|
+
export function resolveCacheMode(options) {
|
|
29
|
+
return options?.cacheMode ?? DEFAULT_CACHE_MODE;
|
|
30
|
+
}
|
|
31
|
+
export function resolveMaxCharacters(options) {
|
|
32
|
+
const candidate = options?.maxCharacters;
|
|
33
|
+
if (typeof candidate !== 'number' || !Number.isFinite(candidate) || candidate <= 0) {
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
if (candidate <= DEFAULT_MAX_CONTENT_CHARACTERS) {
|
|
37
|
+
return DEFAULT_MAX_CONTENT_CHARACTERS;
|
|
38
|
+
}
|
|
39
|
+
return Math.floor(candidate);
|
|
40
|
+
}
|
|
41
|
+
export function resolveTimeoutMs(options) {
|
|
42
|
+
const candidate = options?.timeoutMs;
|
|
43
|
+
if (typeof candidate !== 'number' || !Number.isFinite(candidate) || candidate <= 0) {
|
|
44
|
+
return DEFAULT_TIMEOUT_MS;
|
|
45
|
+
}
|
|
46
|
+
return Math.floor(candidate);
|
|
47
|
+
}
|
|
48
|
+
export function resolveFirecrawlMode(options) {
|
|
49
|
+
const candidate = options?.firecrawl;
|
|
50
|
+
if (candidate === 'off' || candidate === 'auto' || candidate === 'always') {
|
|
51
|
+
return candidate;
|
|
52
|
+
}
|
|
53
|
+
return 'auto';
|
|
54
|
+
}
|
|
55
|
+
export function appendNote(existing, next) {
|
|
56
|
+
if (!next) {
|
|
57
|
+
return existing ?? '';
|
|
58
|
+
}
|
|
59
|
+
if (!existing || existing.length === 0) {
|
|
60
|
+
return next;
|
|
61
|
+
}
|
|
62
|
+
return `${existing}; ${next}`;
|
|
63
|
+
}
|
|
64
|
+
export function safeHostname(rawUrl) {
|
|
65
|
+
try {
|
|
66
|
+
return new URL(rawUrl).hostname.replace(WWW_PREFIX_PATTERN, '');
|
|
67
|
+
}
|
|
68
|
+
catch {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
export function pickFirstText(candidates) {
|
|
73
|
+
for (const candidate of candidates) {
|
|
74
|
+
const normalized = normalizeCandidate(candidate);
|
|
75
|
+
if (normalized) {
|
|
76
|
+
return normalized;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
export function selectBaseContent(sourceContent, transcriptText) {
|
|
82
|
+
if (!transcriptText) {
|
|
83
|
+
return sourceContent;
|
|
84
|
+
}
|
|
85
|
+
const normalizedTranscript = normalizeForPrompt(transcriptText);
|
|
86
|
+
if (normalizedTranscript.length === 0) {
|
|
87
|
+
return sourceContent;
|
|
88
|
+
}
|
|
89
|
+
return `Transcript:\n${normalizedTranscript}`;
|
|
90
|
+
}
|
|
91
|
+
export function summarizeTranscript(transcriptText) {
|
|
92
|
+
if (!transcriptText) {
|
|
93
|
+
return { transcriptCharacters: null, transcriptLines: null, transcriptWordCount: null };
|
|
94
|
+
}
|
|
95
|
+
const transcriptCharacters = transcriptText.length > 0 ? transcriptText.length : null;
|
|
96
|
+
const transcriptLinesRaw = transcriptText
|
|
97
|
+
.split(TRANSCRIPT_LINE_SPLIT_PATTERN)
|
|
98
|
+
.map((line) => line.trim())
|
|
99
|
+
.filter((line) => line.length > 0).length;
|
|
100
|
+
const transcriptLines = transcriptLinesRaw > 0 ? transcriptLinesRaw : null;
|
|
101
|
+
const transcriptWordCountRaw = transcriptText.length > 0
|
|
102
|
+
? transcriptText
|
|
103
|
+
.split(WORD_SPLIT_PATTERN)
|
|
104
|
+
.map((value) => value.trim())
|
|
105
|
+
.filter((value) => value.length > 0).length
|
|
106
|
+
: 0;
|
|
107
|
+
const transcriptWordCount = transcriptWordCountRaw > 0 ? transcriptWordCountRaw : null;
|
|
108
|
+
return { transcriptCharacters, transcriptLines, transcriptWordCount };
|
|
109
|
+
}
|
|
110
|
+
export function ensureTranscriptDiagnostics(resolution, cacheMode) {
|
|
111
|
+
if (resolution.diagnostics) {
|
|
112
|
+
return resolution.diagnostics;
|
|
113
|
+
}
|
|
114
|
+
const hasText = typeof resolution.text === 'string' && resolution.text.length > 0;
|
|
115
|
+
const cacheStatus = cacheMode === 'bypass' ? 'bypassed' : hasText ? 'miss' : 'unknown';
|
|
116
|
+
return {
|
|
117
|
+
cacheMode,
|
|
118
|
+
cacheStatus,
|
|
119
|
+
textProvided: hasText,
|
|
120
|
+
provider: resolution.source,
|
|
121
|
+
attemptedProviders: resolution.source ? [resolution.source] : [],
|
|
122
|
+
notes: cacheMode === 'bypass' ? 'Cache bypass requested' : null,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
export function finalizeExtractedLinkContent({ url, baseContent, maxCharacters, title, description, siteName, transcriptResolution, video, isVideoOnly, diagnostics, }) {
|
|
126
|
+
const normalized = normalizeForPrompt(baseContent);
|
|
127
|
+
const { content, truncated, totalCharacters, wordCount } = typeof maxCharacters === 'number'
|
|
128
|
+
? applyContentBudget(normalized, maxCharacters)
|
|
129
|
+
: {
|
|
130
|
+
content: normalized,
|
|
131
|
+
truncated: false,
|
|
132
|
+
totalCharacters: normalized.length,
|
|
133
|
+
wordCount: normalized.length > 0
|
|
134
|
+
? normalized
|
|
135
|
+
.split(WORD_SPLIT_PATTERN)
|
|
136
|
+
.map((value) => value.trim())
|
|
137
|
+
.filter((value) => value.length > 0).length
|
|
138
|
+
: 0,
|
|
139
|
+
};
|
|
140
|
+
const { transcriptCharacters, transcriptLines, transcriptWordCount } = summarizeTranscript(transcriptResolution.text);
|
|
141
|
+
const transcriptionProvider = resolveTranscriptionProviderFromTranscriptMetadata(transcriptResolution.metadata);
|
|
142
|
+
const mediaDurationSeconds = resolveMediaDurationSecondsFromTranscriptMetadata(transcriptResolution.metadata);
|
|
143
|
+
return {
|
|
144
|
+
url,
|
|
145
|
+
title,
|
|
146
|
+
description,
|
|
147
|
+
siteName,
|
|
148
|
+
content,
|
|
149
|
+
truncated,
|
|
150
|
+
totalCharacters,
|
|
151
|
+
wordCount,
|
|
152
|
+
transcriptCharacters,
|
|
153
|
+
transcriptLines,
|
|
154
|
+
transcriptWordCount,
|
|
155
|
+
transcriptSource: transcriptResolution.source,
|
|
156
|
+
transcriptionProvider,
|
|
157
|
+
transcriptMetadata: transcriptResolution.metadata ?? null,
|
|
158
|
+
mediaDurationSeconds,
|
|
159
|
+
video,
|
|
160
|
+
isVideoOnly,
|
|
161
|
+
diagnostics,
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
//# sourceMappingURL=utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/utils.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAA;AACzF,OAAO,EACL,kBAAkB,EAClB,8BAA8B,EAC9B,kBAAkB,GAMnB,MAAM,YAAY,CAAA;AAEnB,MAAM,kBAAkB,GAAG,SAAS,CAAA;AACpC,MAAM,6BAA6B,GAAG,OAAO,CAAA;AAC7C,MAAM,kBAAkB,GAAG,MAAM,CAAA;AAEjC,SAAS,iDAAiD,CACxD,QAAoD;IAEpD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC1B,MAAM,MAAM,GAAI,QAA0C,CAAC,eAAe,CAAA;IAC1E,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QACxE,OAAO,MAAM,CAAA;IACf,CAAC;IACD,MAAM,KAAK,GAAI,QAAgC,CAAC,KAAK,CAAA;IACrD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QAChD,MAAM,MAAM,GAAI,KAAuC,CAAC,eAAe,CAAA;QACvE,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;YACxE,OAAO,MAAM,CAAA;QACf,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,kDAAkD,CACzD,QAAoD;IAEpD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC1B,MAAM,QAAQ,GAAI,QAAgD,CAAC,qBAAqB,CAAA;IACxF,OAAO,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;AAC5F,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,OAAiC;IAChE,OAAO,OAAO,EAAE,SAAS,IAAI,kBAAkB,CAAA;AACjD,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,OAAiC;IACpE,MAAM,SAAS,GAAG,OAAO,EAAE,aAAa,CAAA;IACxC,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnF,OAAO,IAAI,CAAA;IACb,CAAC;IACD,IAAI,SAAS,IAAI,8BAA8B,EAAE,CAAC;QAChD,OAAO,8BAA8B,CAAA;IACvC,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;AAC9B,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,OAAiC;IAChE,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,CAAA;IACpC,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnF,OAAO,kBAAkB,CAAA;IAC3B,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;AAC9B,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,OAAiC;IACpE,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,CAAA;IACpC,IAAI,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,MAAM,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC1E,OAAO,SAAS,CAAA;IAClB,CAAC;IACD,OAAO,MAAM,CAAA;AACf,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,QAAmC,EAAE,IAAY;IAC1E,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,QAAQ,IAAI,EAAE,CAAA;IACvB,CAAC;IACD,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvC,OAAO,IAAI,CAAA;IACb,CAAC;IACD,OAAO,GAAG,QAAQ,KAAK,IAAI,EAAE,CAAA;AAC/B,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,MAAc;IACzC,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAA;IACjE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,UAA4C;IACxE,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,UAAU,GAAG,kBAAkB,CAAC,SAAS,CAAC,CAAA;QAChD,IAAI,UAAU,EAAE,CAAC;YACf,OAAO,UAAU,CAAA;QACnB,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,aAAqB,EAAE,cAA6B;IACpF,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,aAAa,CAAA;IACtB,CAAC;IACD,MAAM,oBAAoB,GAAG,kBAAkB,CAAC,cAAc,CAAC,CAAA;IAC/D,IAAI,oBAAoB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,aAAa,CAAA;IACtB,CAAC;IACD,OAAO,gBAAgB,oBAAoB,EAAE,CAAA;AAC/C,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,cAA6B;IAC/D,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,EAAE,oBAAoB,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,CAAA;IACzF,CAAC;IACD,MAAM,oBAAoB,GAAG,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAA;IACrF,MAAM,kBAAkB,GAAG,cAAc;SACtC,KAAK,CAAC,6BAA6B,CAAC;SACpC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAA;IAC3C,MAAM,eAAe,GAAG,kBAAkB,GAAG,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,IAAI,CAAA;IAC1E,MAAM,sBAAsB,GAC1B,cAAc,CAAC,MAAM,GAAG,CAAC;QACvB,CAAC,CAAC,cAAc;aACX,KAAK,CAAC,kBAAkB,CAAC;aACzB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;aAC5B,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM;QAC/C,CAAC,CAAC,CAAC,CAAA;IACP,MAAM,mBAAmB,GAAG,sBAAsB,GAAG,CAAC,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,IAAI,CAAA;IACtF,OAAO,EAAE,oBAAoB,EAAE,eAAe,EAAE,mBAAmB,EAAE,CAAA;AACvE,CAAC;AAED,MAAM,UAAU,2BAA2B,CACzC,UAAgC,EAChC,SAAoB;IAEpB,IAAI,UAAU,CAAC,WAAW,EAAE,CAAC;QAC3B,OAAO,UAAU,CAAC,WAAW,CAAA;IAC/B,CAAC;IACD,MAAM,OAAO,GAAG,OAAO,UAAU,CAAC,IAAI,KAAK,QAAQ,IAAI,UAAU,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAA;IACjF,MAAM,WAAW,GAAG,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAA;IACtF,OAAO;QACL,SAAS;QACT,WAAW;QACX,YAAY,EAAE,OAAO;QACrB,QAAQ,EAAE,UAAU,CAAC,MAAM;QAC3B,kBAAkB,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;QAChE,KAAK,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,IAAI;KAChE,CAAA;AACH,CAAC;AAED,MAAM,UAAU,4BAA4B,CAAC,EAC3C,GAAG,EACH,WAAW,EACX,aAAa,EACb,KAAK,EACL,WAAW,EACX,QAAQ,EACR,oBAAoB,EACpB,KAAK,EACL,WAAW,EACX,WAAW,GACW;IACtB,MAAM,UAAU,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAA;IAClD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,SAAS,EAAE,GACtD,OAAO,aAAa,KAAK,QAAQ;QAC/B,CAAC,CAAC,kBAAkB,CAAC,UAAU,EAAE,aAAa,CAAC;QAC/C,CAAC,CAAC;YACE,OAAO,EAAE,UAAU;YACnB,SAAS,EAAE,KAAK;YAChB,eAAe,EAAE,UAAU,CAAC,MAAM;YAClC,SAAS,EACP,UAAU,CAAC,MAAM,GAAG,CAAC;gBACnB,CAAC,CAAC,UAAU;qBACP,KAAK,CAAC,kBAAkB,CAAC;qBACzB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;qBAC5B,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM;gBAC/C,CAAC,CAAC,CAAC;SACR,CAAA;IACP,MAAM,EAAE,oBAAoB,EAAE,eAAe,EAAE,mBAAmB,EAAE,GAAG,mBAAmB,CACxF,oBAAoB,CAAC,IAAI,CAC1B,CAAA;IACD,MAAM,qBAAqB,GAAG,kDAAkD,CAC9E,oBAAoB,CAAC,QAAQ,CAC9B,CAAA;IACD,MAAM,oBAAoB,GAAG,iDAAiD,CAC5E,oBAAoB,CAAC,QAAQ,CAC9B,CAAA;IAED,OAAO;QACL,GAAG;QACH,KAAK;QACL,WAAW;QACX,QAAQ;QACR,OAAO;QACP,SAAS;QACT,eAAe;QACf,SAAS;QACT,oBAAoB;QACpB,eAAe;QACf,mBAAmB;QACnB,gBAAgB,EAAE,oBAAoB,CAAC,MAAM;QAC7C,qBAAqB;QACrB,kBAAkB,EAAE,oBAAoB,CAAC,QAAQ,IAAI,IAAI;QACzD,oBAAoB;QACpB,KAAK;QACL,WAAW;QACX,WAAW;KACZ,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { load } from 'cheerio';
|
|
2
|
+
const VIDEO_EXTENSIONS = new Set(['.mp4', '.webm', '.mov', '.m4v']);
|
|
3
|
+
function resolveAbsoluteUrl(candidate, baseUrl) {
|
|
4
|
+
const trimmed = candidate.trim();
|
|
5
|
+
if (trimmed.length === 0)
|
|
6
|
+
return null;
|
|
7
|
+
try {
|
|
8
|
+
return new URL(trimmed, baseUrl).toString();
|
|
9
|
+
}
|
|
10
|
+
catch {
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
function isDirectVideoUrl(url) {
|
|
15
|
+
try {
|
|
16
|
+
const parsed = new URL(url);
|
|
17
|
+
const lower = parsed.pathname.toLowerCase();
|
|
18
|
+
for (const ext of VIDEO_EXTENSIONS) {
|
|
19
|
+
if (lower.endsWith(ext))
|
|
20
|
+
return true;
|
|
21
|
+
}
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
function extractYouTubeVideoIdFromEmbedUrl(raw) {
|
|
29
|
+
try {
|
|
30
|
+
const u = new URL(raw);
|
|
31
|
+
const host = u.hostname.toLowerCase().replace(/^www\./, '');
|
|
32
|
+
if (host === 'youtube.com' || host.endsWith('.youtube.com')) {
|
|
33
|
+
const m = u.pathname.match(/\/embed\/([a-zA-Z0-9_-]{11})/);
|
|
34
|
+
return m?.[1] ?? null;
|
|
35
|
+
}
|
|
36
|
+
if (host === 'youtu.be') {
|
|
37
|
+
const id = u.pathname.replace(/^\//, '').trim();
|
|
38
|
+
return /^[a-zA-Z0-9_-]{11}$/.test(id) ? id : null;
|
|
39
|
+
}
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
function metaContent($, selectors) {
|
|
47
|
+
for (const sel of selectors) {
|
|
48
|
+
const meta = $(`meta[${sel.attribute}="${sel.value}"]`).first();
|
|
49
|
+
if (meta.length === 0)
|
|
50
|
+
continue;
|
|
51
|
+
const value = (meta.attr('content') ?? meta.attr('value') ?? '').trim();
|
|
52
|
+
if (value)
|
|
53
|
+
return value;
|
|
54
|
+
}
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
export function detectPrimaryVideoFromHtml(html, url) {
|
|
58
|
+
const $ = load(html);
|
|
59
|
+
// 1) YouTube embeds (preferred, stable)
|
|
60
|
+
const iframeSrc = $('iframe[src*="youtube.com/embed/"], iframe[src*="youtu.be/"]').first().attr('src') ?? null;
|
|
61
|
+
if (iframeSrc) {
|
|
62
|
+
const resolved = resolveAbsoluteUrl(iframeSrc, url);
|
|
63
|
+
const videoId = resolved ? extractYouTubeVideoIdFromEmbedUrl(resolved) : null;
|
|
64
|
+
if (videoId) {
|
|
65
|
+
return { kind: 'youtube', url: `https://www.youtube.com/watch?v=${videoId}` };
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
// 2) OpenGraph video
|
|
69
|
+
const ogVideo = metaContent($, [
|
|
70
|
+
{ attribute: 'property', value: 'og:video' },
|
|
71
|
+
{ attribute: 'property', value: 'og:video:url' },
|
|
72
|
+
{ attribute: 'property', value: 'og:video:secure_url' },
|
|
73
|
+
{ attribute: 'name', value: 'og:video' },
|
|
74
|
+
{ attribute: 'name', value: 'og:video:url' },
|
|
75
|
+
{ attribute: 'name', value: 'og:video:secure_url' },
|
|
76
|
+
]);
|
|
77
|
+
if (ogVideo) {
|
|
78
|
+
const resolved = resolveAbsoluteUrl(ogVideo, url);
|
|
79
|
+
if (resolved && isDirectVideoUrl(resolved)) {
|
|
80
|
+
return { kind: 'direct', url: resolved };
|
|
81
|
+
}
|
|
82
|
+
const ytId = resolved ? extractYouTubeVideoIdFromEmbedUrl(resolved) : null;
|
|
83
|
+
if (ytId)
|
|
84
|
+
return { kind: 'youtube', url: `https://www.youtube.com/watch?v=${ytId}` };
|
|
85
|
+
}
|
|
86
|
+
// 3) <video> tags
|
|
87
|
+
const videoSrc = $('video[src]').first().attr('src') ?? $('video source[src]').first().attr('src') ?? null;
|
|
88
|
+
if (videoSrc) {
|
|
89
|
+
const resolved = resolveAbsoluteUrl(videoSrc, url);
|
|
90
|
+
if (resolved && isDirectVideoUrl(resolved)) {
|
|
91
|
+
return { kind: 'direct', url: resolved };
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
//# sourceMappingURL=video.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"video.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/video.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,IAAI,EAAE,MAAM,SAAS,CAAA;AAO/C,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;AAEnE,SAAS,kBAAkB,CAAC,SAAiB,EAAE,OAAe;IAC5D,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,CAAA;IAChC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAA;IACrC,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,QAAQ,EAAE,CAAA;IAC7C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAW;IACnC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC3B,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAA;QAC3C,KAAK,MAAM,GAAG,IAAI,gBAAgB,EAAE,CAAC;YACnC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;gBAAE,OAAO,IAAI,CAAA;QACtC,CAAC;QACD,OAAO,KAAK,CAAA;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAA;IACd,CAAC;AACH,CAAC;AAED,SAAS,iCAAiC,CAAC,GAAW;IACpD,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QACtB,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;QAC3D,IAAI,IAAI,KAAK,aAAa,IAAI,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,EAAE,CAAC;YAC5D,MAAM,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAA;YAC1D,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAA;QACvB,CAAC;QACD,IAAI,IAAI,KAAK,UAAU,EAAE,CAAC;YACxB,MAAM,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;YAC/C,OAAO,qBAAqB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAA;QACnD,CAAC;QACD,OAAO,IAAI,CAAA;IACb,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAClB,CAAa,EACb,SAAmE;IAEnE,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,GAAG,CAAC,SAAS,KAAK,GAAG,CAAC,KAAK,IAAI,CAAC,CAAC,KAAK,EAAE,CAAA;QAC/D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAC/B,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;QACvE,IAAI,KAAK;YAAE,OAAO,KAAK,CAAA;IACzB,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,IAAY,EAAE,GAAW;IAClE,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAA;IAEpB,wCAAwC;IACxC,MAAM,SAAS,GACb,CAAC,CAAC,6DAA6D,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,CAAA;IAC9F,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,QAAQ,GAAG,kBAAkB,CAAC,SAAS,EAAE,GAAG,CAAC,CAAA;QACnD,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,iCAAiC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;QAC7E,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,mCAAmC,OAAO,EAAE,EAAE,CAAA;QAC/E,CAAC;IACH,CAAC;IAED,qBAAqB;IACrB,MAAM,OAAO,GAAG,WAAW,CAAC,CAAC,EAAE;QAC7B,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,UAAU,EAAE;QAC5C,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,cAAc,EAAE;QAChD,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,qBAAqB,EAAE;QACvD,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE;QACxC,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,cAAc,EAAE;QAC5C,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,qBAAqB,EAAE;KACpD,CAAC,CAAA;IACF,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,QAAQ,GAAG,kBAAkB,CAAC,OAAO,EAAE,GAAG,CAAC,CAAA;QACjD,IAAI,QAAQ,IAAI,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC3C,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAA;QAC1C,CAAC;QACD,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,iCAAiC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;QAC1E,IAAI,IAAI;YAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,mCAAmC,IAAI,EAAE,EAAE,CAAA;IACtF,CAAC;IAED,kBAAkB;IAClB,MAAM,QAAQ,GACZ,CAAC,CAAC,YAAY,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,mBAAmB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,CAAA;IAC3F,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,kBAAkB,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAA;QAClD,IAAI,QAAQ,IAAI,gBAAgB,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC3C,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAA;QAC1C,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAA;AACb,CAAC"}
|