@steipete/summarize-core 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/esm/content/index.js +5 -0
- package/dist/esm/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/client.js +28 -0
- package/dist/esm/content/link-preview/client.js.map +1 -0
- package/dist/esm/content/link-preview/content/article.js +155 -0
- package/dist/esm/content/link-preview/content/article.js.map +1 -0
- package/dist/esm/content/link-preview/content/cleaner.js +55 -0
- package/dist/esm/content/link-preview/content/cleaner.js.map +1 -0
- package/dist/esm/content/link-preview/content/constants.js +7 -0
- package/dist/esm/content/link-preview/content/constants.js.map +1 -0
- package/dist/esm/content/link-preview/content/fetcher.js +124 -0
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -0
- package/dist/esm/content/link-preview/content/firecrawl.js +86 -0
- package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -0
- package/dist/esm/content/link-preview/content/html.js +162 -0
- package/dist/esm/content/link-preview/content/html.js.map +1 -0
- package/dist/esm/content/link-preview/content/index.js +345 -0
- package/dist/esm/content/link-preview/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/content/jsonld.js +77 -0
- package/dist/esm/content/link-preview/content/jsonld.js.map +1 -0
- package/dist/esm/content/link-preview/content/parsers.js +77 -0
- package/dist/esm/content/link-preview/content/parsers.js.map +1 -0
- package/dist/esm/content/link-preview/content/podcast-utils.js +79 -0
- package/dist/esm/content/link-preview/content/podcast-utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/readability.js +53 -0
- package/dist/esm/content/link-preview/content/readability.js.map +1 -0
- package/dist/esm/content/link-preview/content/twitter-utils.js +68 -0
- package/dist/esm/content/link-preview/content/twitter-utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/types.js +4 -0
- package/dist/esm/content/link-preview/content/types.js.map +1 -0
- package/dist/esm/content/link-preview/content/utils.js +164 -0
- package/dist/esm/content/link-preview/content/utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/video.js +96 -0
- package/dist/esm/content/link-preview/content/video.js.map +1 -0
- package/dist/esm/content/link-preview/content/youtube.js +82 -0
- package/dist/esm/content/link-preview/content/youtube.js.map +1 -0
- package/dist/esm/content/link-preview/deps.js +20 -0
- package/dist/esm/content/link-preview/deps.js.map +1 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js +35 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -0
- package/dist/esm/content/link-preview/types.js +2 -0
- package/dist/esm/content/link-preview/types.js.map +1 -0
- package/dist/esm/content/transcript/cache.js +79 -0
- package/dist/esm/content/transcript/cache.js.map +1 -0
- package/dist/esm/content/transcript/index.js +130 -0
- package/dist/esm/content/transcript/index.js.map +1 -0
- package/dist/esm/content/transcript/normalize.js +43 -0
- package/dist/esm/content/transcript/normalize.js.map +1 -0
- package/dist/esm/content/transcript/providers/generic.js +11 -0
- package/dist/esm/content/transcript/providers/generic.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js +222 -0
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/apple.js +38 -0
- package/dist/esm/content/transcript/providers/podcast/apple.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/constants.js +8 -0
- package/dist/esm/content/transcript/providers/podcast/constants.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/flow-context.js +2 -0
- package/dist/esm/content/transcript/providers/podcast/flow-context.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/itunes.js +134 -0
- package/dist/esm/content/transcript/providers/podcast/itunes.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/json.js +34 -0
- package/dist/esm/content/transcript/providers/podcast/json.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/media.js +345 -0
- package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/results.js +28 -0
- package/dist/esm/content/transcript/providers/podcast/results.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss.js +253 -0
- package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js +218 -0
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/spotify.js +113 -0
- package/dist/esm/content/transcript/providers/podcast/spotify.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast.js +222 -0
- package/dist/esm/content/transcript/providers/podcast.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/api.js +257 -0
- package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/apify.js +55 -0
- package/dist/esm/content/transcript/providers/youtube/apify.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions.js +409 -0
- package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +166 -0
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube.js +173 -0
- package/dist/esm/content/transcript/providers/youtube.js.map +1 -0
- package/dist/esm/content/transcript/types.js +2 -0
- package/dist/esm/content/transcript/types.js.map +1 -0
- package/dist/esm/content/transcript/utils.js +259 -0
- package/dist/esm/content/transcript/utils.js.map +1 -0
- package/dist/esm/index.js +4 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/language.js +126 -0
- package/dist/esm/language.js.map +1 -0
- package/dist/esm/prompts/cli.js +20 -0
- package/dist/esm/prompts/cli.js.map +1 -0
- package/dist/esm/prompts/file.js +48 -0
- package/dist/esm/prompts/file.js.map +1 -0
- package/dist/esm/prompts/index.js +4 -0
- package/dist/esm/prompts/index.js.map +1 -0
- package/dist/esm/prompts/link-summary.js +116 -0
- package/dist/esm/prompts/link-summary.js.map +1 -0
- package/dist/esm/shared/contracts.js +2 -0
- package/dist/esm/shared/contracts.js.map +1 -0
- package/dist/esm/transcription/whisper/constants.js +8 -0
- package/dist/esm/transcription/whisper/constants.js.map +1 -0
- package/dist/esm/transcription/whisper/core.js +303 -0
- package/dist/esm/transcription/whisper/core.js.map +1 -0
- package/dist/esm/transcription/whisper/fal.js +41 -0
- package/dist/esm/transcription/whisper/fal.js.map +1 -0
- package/dist/esm/transcription/whisper/ffmpeg.js +179 -0
- package/dist/esm/transcription/whisper/ffmpeg.js.map +1 -0
- package/dist/esm/transcription/whisper/openai.js +47 -0
- package/dist/esm/transcription/whisper/openai.js.map +1 -0
- package/dist/esm/transcription/whisper/types.js +2 -0
- package/dist/esm/transcription/whisper/types.js.map +1 -0
- package/dist/esm/transcription/whisper/utils.js +63 -0
- package/dist/esm/transcription/whisper/utils.js.map +1 -0
- package/dist/esm/transcription/whisper/whisper-cpp.js +227 -0
- package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -0
- package/dist/esm/transcription/whisper.js +5 -0
- package/dist/esm/transcription/whisper.js.map +1 -0
- package/dist/types/content/index.d.ts +5 -0
- package/dist/types/content/link-preview/client.d.ts +18 -0
- package/dist/types/content/link-preview/content/article.d.ts +4 -0
- package/dist/types/content/link-preview/content/cleaner.d.ts +12 -0
- package/dist/types/content/link-preview/content/constants.d.ts +6 -0
- package/dist/types/content/link-preview/content/fetcher.d.ts +16 -0
- package/dist/types/content/link-preview/content/firecrawl.d.ts +14 -0
- package/dist/types/content/link-preview/content/html.d.ts +17 -0
- package/dist/types/content/link-preview/content/index.d.ts +4 -0
- package/dist/types/content/link-preview/content/jsonld.d.ts +6 -0
- package/dist/types/content/link-preview/content/parsers.d.ts +7 -0
- package/dist/types/content/link-preview/content/podcast-utils.d.ts +7 -0
- package/dist/types/content/link-preview/content/readability.d.ts +8 -0
- package/dist/types/content/link-preview/content/twitter-utils.d.ts +4 -0
- package/dist/types/content/link-preview/content/types.d.ts +61 -0
- package/dist/types/content/link-preview/content/utils.d.ts +17 -0
- package/dist/types/content/link-preview/content/video.d.ts +5 -0
- package/dist/types/content/link-preview/content/youtube.d.ts +1 -0
- package/dist/types/content/link-preview/deps.d.ts +167 -0
- package/dist/types/content/link-preview/fetch-with-timeout.d.ts +4 -0
- package/dist/types/content/link-preview/types.d.ts +37 -0
- package/dist/types/content/transcript/cache.d.ts +29 -0
- package/dist/types/content/transcript/index.d.ts +9 -0
- package/dist/types/content/transcript/normalize.d.ts +3 -0
- package/dist/types/content/transcript/providers/generic.d.ts +3 -0
- package/dist/types/content/transcript/providers/podcast/apple-flow.d.ts +4 -0
- package/dist/types/content/transcript/providers/podcast/apple.d.ts +6 -0
- package/dist/types/content/transcript/providers/podcast/constants.d.ts +7 -0
- package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +11 -0
- package/dist/types/content/transcript/providers/podcast/itunes.d.ts +17 -0
- package/dist/types/content/transcript/providers/podcast/json.d.ts +8 -0
- package/dist/types/content/transcript/providers/podcast/media.d.ts +42 -0
- package/dist/types/content/transcript/providers/podcast/results.d.ts +10 -0
- package/dist/types/content/transcript/providers/podcast/rss.d.ts +22 -0
- package/dist/types/content/transcript/providers/podcast/spotify-flow.d.ts +3 -0
- package/dist/types/content/transcript/providers/podcast/spotify.d.ts +24 -0
- package/dist/types/content/transcript/providers/podcast.d.ts +20 -0
- package/dist/types/content/transcript/providers/youtube/api.d.ts +26 -0
- package/dist/types/content/transcript/providers/youtube/apify.d.ts +1 -0
- package/dist/types/content/transcript/providers/youtube/captions.d.ts +7 -0
- package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +17 -0
- package/dist/types/content/transcript/providers/youtube.d.ts +3 -0
- package/dist/types/content/transcript/types.d.ts +30 -0
- package/dist/types/content/transcript/utils.d.ts +8 -0
- package/dist/types/index.d.ts +4 -0
- package/dist/types/language.d.ts +25 -0
- package/dist/types/prompts/cli.d.ts +10 -0
- package/dist/types/prompts/file.d.ts +17 -0
- package/dist/types/prompts/index.d.ts +4 -0
- package/dist/types/prompts/link-summary.d.ts +29 -0
- package/dist/types/shared/contracts.d.ts +2 -0
- package/dist/types/transcription/whisper/constants.d.ts +7 -0
- package/dist/types/transcription/whisper/core.d.ts +20 -0
- package/dist/types/transcription/whisper/fal.d.ts +1 -0
- package/dist/types/transcription/whisper/ffmpeg.d.ts +16 -0
- package/dist/types/transcription/whisper/openai.d.ts +2 -0
- package/dist/types/transcription/whisper/types.d.ts +17 -0
- package/dist/types/transcription/whisper/utils.d.ts +5 -0
- package/dist/types/transcription/whisper/whisper-cpp.d.ts +9 -0
- package/dist/types/transcription/whisper.d.ts +5 -0
- package/package.json +54 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import { normalizeWhitespace } from './cleaner.js';
|
|
2
|
+
function extractBalancedJsonObject(source, startAt) {
|
|
3
|
+
const start = source.indexOf('{', startAt);
|
|
4
|
+
if (start < 0) {
|
|
5
|
+
return null;
|
|
6
|
+
}
|
|
7
|
+
let depth = 0;
|
|
8
|
+
let inString = false;
|
|
9
|
+
let quote = null;
|
|
10
|
+
let escaping = false;
|
|
11
|
+
for (let i = start; i < source.length; i += 1) {
|
|
12
|
+
const ch = source[i];
|
|
13
|
+
if (!ch) {
|
|
14
|
+
continue;
|
|
15
|
+
}
|
|
16
|
+
if (inString) {
|
|
17
|
+
if (escaping) {
|
|
18
|
+
escaping = false;
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
if (ch === '\\') {
|
|
22
|
+
escaping = true;
|
|
23
|
+
continue;
|
|
24
|
+
}
|
|
25
|
+
if (quote && ch === quote) {
|
|
26
|
+
inString = false;
|
|
27
|
+
quote = null;
|
|
28
|
+
}
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
if (ch === '"' || ch === "'") {
|
|
32
|
+
inString = true;
|
|
33
|
+
quote = ch;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
if (ch === '{') {
|
|
37
|
+
depth += 1;
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
if (ch === '}') {
|
|
41
|
+
depth -= 1;
|
|
42
|
+
if (depth === 0) {
|
|
43
|
+
return source.slice(start, i + 1);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
export function extractYouTubeShortDescription(html) {
|
|
50
|
+
const tokenIndex = html.indexOf('ytInitialPlayerResponse');
|
|
51
|
+
if (tokenIndex < 0) {
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
const assignmentIndex = html.indexOf('=', tokenIndex);
|
|
55
|
+
if (assignmentIndex < 0) {
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
const objectText = extractBalancedJsonObject(html, assignmentIndex);
|
|
59
|
+
if (!objectText) {
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
try {
|
|
63
|
+
const parsed = JSON.parse(objectText);
|
|
64
|
+
if (!parsed || typeof parsed !== 'object') {
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
const videoDetails = parsed.videoDetails;
|
|
68
|
+
if (!videoDetails || typeof videoDetails !== 'object') {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
const description = videoDetails.shortDescription;
|
|
72
|
+
if (typeof description !== 'string') {
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
const normalized = normalizeWhitespace(description);
|
|
76
|
+
return normalized && normalized.length > 0 ? normalized : null;
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
//# sourceMappingURL=youtube.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/youtube.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAA;AAElD,SAAS,yBAAyB,CAAC,MAAc,EAAE,OAAe;IAChE,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,CAAA;IAC1C,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,QAAQ,GAAG,KAAK,CAAA;IACpB,IAAI,KAAK,GAAqB,IAAI,CAAA;IAClC,IAAI,QAAQ,GAAG,KAAK,CAAA;IAEpB,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC9C,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAA;QACpB,IAAI,CAAC,EAAE,EAAE,CAAC;YACR,SAAQ;QACV,CAAC;QAED,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,QAAQ,EAAE,CAAC;gBACb,QAAQ,GAAG,KAAK,CAAA;gBAChB,SAAQ;YACV,CAAC;YACD,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;gBAChB,QAAQ,GAAG,IAAI,CAAA;gBACf,SAAQ;YACV,CAAC;YACD,IAAI,KAAK,IAAI,EAAE,KAAK,KAAK,EAAE,CAAC;gBAC1B,QAAQ,GAAG,KAAK,CAAA;gBAChB,KAAK,GAAG,IAAI,CAAA;YACd,CAAC;YACD,SAAQ;QACV,CAAC;QAED,IAAI,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YAC7B,QAAQ,GAAG,IAAI,CAAA;YACf,KAAK,GAAG,EAAE,CAAA;YACV,SAAQ;QACV,CAAC;QAED,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,KAAK,IAAI,CAAC,CAAA;YACV,SAAQ;QACV,CAAC;QACD,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;YACf,KAAK,IAAI,CAAC,CAAA;YACV,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;gBAChB,OAAO,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,CAAA;YACnC,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAA;AACb,CAAC;AAED,MAAM,UAAU,8BAA8B,CAAC,IAAY;IACzD,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,yBAAyB,CAAC,CAAA;IAC1D,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;QACnB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,UAAU,CAAC,CAAA;IACrD,IAAI,eAAe,GAAG,CAAC,EAAE,CAAC;QACxB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,UAAU,GAAG,yBAAyB,CAAC,IAAI,EAAE,eAAe,CAAC,CAAA;IACnE,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,IAAI,CAAA;IACb,CAAC;IAED,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAY,CAAA;QAChD,IAAI,CAAC,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;YAC1C,OAAO,IAAI,CAAA;QACb,CAAC;QACD,MAAM,YAAY,GAAI,MAAkC,CAAC,YAAY,CAAA;QACrE,IAAI,CAAC,YAAY,IAAI,OAAO,YAAY,KAAK,QAAQ,EAAE,CAAC;YACtD,OAAO,IAAI,CAAA;QACb,CAAC;QACD,MAAM,WAAW,GAAI,YAAwC,CAAC,gBAAgB,CAAA;QAC9E,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;YACpC,OAAO,IAAI,CAAA;QACb,CAAC;QAED,MAAM,UAAU,GAAG,mBAAmB,CAAC,WAAW,CAAC,CAAA;QACnD,OAAO,UAAU,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;IAChE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAA;IACb,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
// Enum-like constants for progress kinds (keeps call sites typo-resistant without TS `enum` runtime quirks).
|
|
2
|
+
export const ProgressKind = {
|
|
3
|
+
FetchHtmlStart: 'fetch-html-start',
|
|
4
|
+
FetchHtmlProgress: 'fetch-html-progress',
|
|
5
|
+
FetchHtmlDone: 'fetch-html-done',
|
|
6
|
+
TranscriptMediaDownloadStart: 'transcript-media-download-start',
|
|
7
|
+
TranscriptMediaDownloadProgress: 'transcript-media-download-progress',
|
|
8
|
+
TranscriptMediaDownloadDone: 'transcript-media-download-done',
|
|
9
|
+
TranscriptWhisperStart: 'transcript-whisper-start',
|
|
10
|
+
TranscriptWhisperProgress: 'transcript-whisper-progress',
|
|
11
|
+
TranscriptStart: 'transcript-start',
|
|
12
|
+
TranscriptDone: 'transcript-done',
|
|
13
|
+
FirecrawlStart: 'firecrawl-start',
|
|
14
|
+
FirecrawlDone: 'firecrawl-done',
|
|
15
|
+
NitterStart: 'nitter-start',
|
|
16
|
+
NitterDone: 'nitter-done',
|
|
17
|
+
BirdStart: 'bird-start',
|
|
18
|
+
BirdDone: 'bird-done',
|
|
19
|
+
};
|
|
20
|
+
//# sourceMappingURL=deps.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"deps.js","sourceRoot":"","sources":["../../../../src/content/link-preview/deps.ts"],"names":[],"mappings":"AAEA,6GAA6G;AAC7G,MAAM,CAAC,MAAM,YAAY,GAAG;IAC1B,cAAc,EAAE,kBAAkB;IAClC,iBAAiB,EAAE,qBAAqB;IACxC,aAAa,EAAE,iBAAiB;IAEhC,4BAA4B,EAAE,iCAAiC;IAC/D,+BAA+B,EAAE,oCAAoC;IACrE,2BAA2B,EAAE,gCAAgC;IAE7D,sBAAsB,EAAE,0BAA0B;IAClD,yBAAyB,EAAE,6BAA6B;IAExD,eAAe,EAAE,kBAAkB;IACnC,cAAc,EAAE,iBAAiB;IAEjC,cAAc,EAAE,iBAAiB;IACjC,aAAa,EAAE,gBAAgB;IAE/B,WAAW,EAAE,cAAc;IAC3B,UAAU,EAAE,aAAa;IAEzB,SAAS,EAAE,YAAY;IACvB,QAAQ,EAAE,WAAW;CACb,CAAA"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
const DEFAULT_TIMEOUT_MS = 120_000;
|
|
2
|
+
export async function fetchWithTimeout(fetchImpl, input, init, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
3
|
+
if (init?.signal) {
|
|
4
|
+
return fetchImpl(input, init ?? {});
|
|
5
|
+
}
|
|
6
|
+
const controller = new AbortController();
|
|
7
|
+
const normalizedTimeoutMs = Number.isFinite(timeoutMs) ? timeoutMs : DEFAULT_TIMEOUT_MS;
|
|
8
|
+
const clampedTimeoutMs = Math.max(0, normalizedTimeoutMs);
|
|
9
|
+
const timer = setTimeout(() => {
|
|
10
|
+
if (typeof DOMException === 'function') {
|
|
11
|
+
controller.abort(new DOMException('Request timed out', 'AbortError'));
|
|
12
|
+
return;
|
|
13
|
+
}
|
|
14
|
+
controller.abort();
|
|
15
|
+
}, clampedTimeoutMs);
|
|
16
|
+
try {
|
|
17
|
+
const finalInit = {
|
|
18
|
+
...init,
|
|
19
|
+
signal: controller.signal,
|
|
20
|
+
};
|
|
21
|
+
return await fetchImpl(input, finalInit);
|
|
22
|
+
}
|
|
23
|
+
catch (error) {
|
|
24
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
25
|
+
const timeoutError = new Error(`Fetch aborted after ${clampedTimeoutMs}ms`);
|
|
26
|
+
timeoutError.name = 'FetchTimeoutError';
|
|
27
|
+
throw timeoutError;
|
|
28
|
+
}
|
|
29
|
+
throw error;
|
|
30
|
+
}
|
|
31
|
+
finally {
|
|
32
|
+
clearTimeout(timer);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=fetch-with-timeout.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-with-timeout.js","sourceRoot":"","sources":["../../../../src/content/link-preview/fetch-with-timeout.ts"],"names":[],"mappings":"AAAA,MAAM,kBAAkB,GAAG,OAAO,CAAA;AAKlC,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,SAAoB,EACpB,KAAwB,EACxB,IAAwB,EACxB,YAAoB,kBAAkB;IAEtC,IAAI,IAAI,EAAE,MAAM,EAAE,CAAC;QACjB,OAAO,SAAS,CAAC,KAAK,EAAE,IAAI,IAAI,EAAE,CAAC,CAAA;IACrC,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAA;IACxC,MAAM,mBAAmB,GAAG,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,kBAAkB,CAAA;IACvF,MAAM,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,mBAAmB,CAAC,CAAA;IAEzD,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;QAC5B,IAAI,OAAO,YAAY,KAAK,UAAU,EAAE,CAAC;YACvC,UAAU,CAAC,KAAK,CAAC,IAAI,YAAY,CAAC,mBAAmB,EAAE,YAAY,CAAC,CAAC,CAAA;YACrE,OAAM;QACR,CAAC;QACD,UAAU,CAAC,KAAK,EAAE,CAAA;IACpB,CAAC,EAAE,gBAAgB,CAAC,CAAA;IAEpB,IAAI,CAAC;QACH,MAAM,SAAS,GAAgB;YAC7B,GAAG,IAAI;YACP,MAAM,EAAE,UAAU,CAAC,MAAM;SAC1B,CAAA;QACD,OAAO,MAAM,SAAS,CAAC,KAAK,EAAE,SAAS,CAAC,CAAA;IAC1C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YAC1D,MAAM,YAAY,GAAG,IAAI,KAAK,CAAC,uBAAuB,gBAAgB,IAAI,CAAC,CAAA;YAC3E,YAAY,CAAC,IAAI,GAAG,mBAAmB,CAAA;YACvC,MAAM,YAAY,CAAA;QACpB,CAAC;QACD,MAAM,KAAK,CAAA;IACb,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,KAAK,CAAC,CAAA;IACrB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/content/link-preview/types.ts"],"names":[],"mappings":"AAWA,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,SAAS,EAAE,QAAQ,CAAU,CAAA"}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
export const DEFAULT_TTL_MS = 1000 * 60 * 60 * 24 * 7;
|
|
2
|
+
export const NEGATIVE_TTL_MS = 1000 * 60 * 60 * 6;
|
|
3
|
+
export const readTranscriptCache = async ({ url, cacheMode, transcriptCache, }) => {
|
|
4
|
+
const cached = transcriptCache ? await transcriptCache.get({ url }) : null;
|
|
5
|
+
const diagnostics = buildBaseDiagnostics(cacheMode);
|
|
6
|
+
if (!cached) {
|
|
7
|
+
return { cached: null, resolution: null, diagnostics };
|
|
8
|
+
}
|
|
9
|
+
const provider = mapCachedSource(cached.source);
|
|
10
|
+
diagnostics.provider = provider;
|
|
11
|
+
diagnostics.attemptedProviders = provider ? [provider] : [];
|
|
12
|
+
diagnostics.textProvided = Boolean(cached.content && cached.content.length > 0);
|
|
13
|
+
if (cacheMode === 'bypass') {
|
|
14
|
+
diagnostics.notes = appendNote(diagnostics.notes, 'Cached transcript ignored due to bypass request');
|
|
15
|
+
return { cached, resolution: null, diagnostics };
|
|
16
|
+
}
|
|
17
|
+
if (cached.expired) {
|
|
18
|
+
diagnostics.cacheStatus = 'expired';
|
|
19
|
+
diagnostics.notes = appendNote(diagnostics.notes, 'Cached transcript expired; fetching fresh copy');
|
|
20
|
+
return { cached, resolution: null, diagnostics };
|
|
21
|
+
}
|
|
22
|
+
diagnostics.cacheStatus = 'hit';
|
|
23
|
+
diagnostics.notes = appendNote(diagnostics.notes, 'Served transcript from cache');
|
|
24
|
+
const resolution = {
|
|
25
|
+
text: cached.content,
|
|
26
|
+
source: provider,
|
|
27
|
+
metadata: cached.metadata ?? null,
|
|
28
|
+
};
|
|
29
|
+
return { cached, resolution, diagnostics };
|
|
30
|
+
};
|
|
31
|
+
const buildBaseDiagnostics = (cacheMode) => ({
|
|
32
|
+
cacheMode,
|
|
33
|
+
cacheStatus: cacheMode === 'bypass' ? 'bypassed' : 'miss',
|
|
34
|
+
provider: null,
|
|
35
|
+
attemptedProviders: [],
|
|
36
|
+
textProvided: false,
|
|
37
|
+
notes: cacheMode === 'bypass' ? 'Cache bypass requested' : null,
|
|
38
|
+
});
|
|
39
|
+
const appendNote = (existing, next) => {
|
|
40
|
+
if (!existing) {
|
|
41
|
+
return next;
|
|
42
|
+
}
|
|
43
|
+
return `${existing}; ${next}`;
|
|
44
|
+
};
|
|
45
|
+
export const mapCachedSource = (source) => {
|
|
46
|
+
if (source === null)
|
|
47
|
+
return null;
|
|
48
|
+
if (source === 'youtubei' ||
|
|
49
|
+
source === 'captionTracks' ||
|
|
50
|
+
source === 'yt-dlp' ||
|
|
51
|
+
source === 'podcastTranscript' ||
|
|
52
|
+
source === 'whisper' ||
|
|
53
|
+
source === 'apify' ||
|
|
54
|
+
source === 'html' ||
|
|
55
|
+
source === 'unavailable') {
|
|
56
|
+
return source;
|
|
57
|
+
}
|
|
58
|
+
return 'unknown';
|
|
59
|
+
};
|
|
60
|
+
export const writeTranscriptCache = async ({ url, service, resourceKey, result, transcriptCache, }) => {
|
|
61
|
+
if (!transcriptCache) {
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
if (result.source === null && result.text === null) {
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
const ttlMs = result.text ? DEFAULT_TTL_MS : NEGATIVE_TTL_MS;
|
|
68
|
+
const resolvedSource = result.source ?? (result.text ? 'unknown' : 'unavailable');
|
|
69
|
+
await transcriptCache.set({
|
|
70
|
+
url,
|
|
71
|
+
service,
|
|
72
|
+
resourceKey,
|
|
73
|
+
ttlMs,
|
|
74
|
+
content: result.text,
|
|
75
|
+
source: resolvedSource,
|
|
76
|
+
metadata: result.metadata ?? null,
|
|
77
|
+
});
|
|
78
|
+
};
|
|
79
|
+
//# sourceMappingURL=cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../../../../src/content/transcript/cache.ts"],"names":[],"mappings":"AAQA,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAA;AACrD,MAAM,CAAC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAA;AAmBjD,MAAM,CAAC,MAAM,mBAAmB,GAAG,KAAK,EAAE,EACxC,GAAG,EACH,SAAS,EACT,eAAe,GACI,EAAkC,EAAE;IACvD,MAAM,MAAM,GAAG,eAAe,CAAC,CAAC,CAAC,MAAM,eAAe,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IAC1E,MAAM,WAAW,GAAG,oBAAoB,CAAC,SAAS,CAAC,CAAA;IAEnD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IACxD,CAAC;IAED,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;IAC/C,WAAW,CAAC,QAAQ,GAAG,QAAQ,CAAA;IAC/B,WAAW,CAAC,kBAAkB,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAC3D,WAAW,CAAC,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAE/E,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC3B,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,iDAAiD,CAClD,CAAA;QACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IAClD,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,WAAW,CAAC,WAAW,GAAG,SAAS,CAAA;QACnC,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,gDAAgD,CACjD,CAAA;QACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IAClD,CAAC;IAED,WAAW,CAAC,WAAW,GAAG,KAAK,CAAA;IAC/B,WAAW,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,8BAA8B,CAAC,CAAA;IAEjF,MAAM,UAAU,GAAyB;QACvC,IAAI,EAAE,MAAM,CAAC,OAAO;QACpB,MAAM,EAAE,QAAQ;QAChB,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,IAAI;KAClC,CAAA;IACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,CAAA;AAC5C,CAAC,CAAA;AAED,MAAM,oBAAoB,GAAG,CAAC,SAAoB,EAAoB,EAAE,CAAC,CAAC;IACxE,SAAS;IACT,WAAW,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,MAAM;IACzD,QAAQ,EAAE,IAAI;IACd,kBAAkB,EAAE,EAAE;IACtB,YAAY,EAAE,KAAK;IACnB,KAAK,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,IAAI;CAChE,CAAC,CAAA;AAEF,MAAM,UAAU,GAAG,CAAC,QAAmC,EAAE,IAAY,EAAU,EAAE;IAC/E,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IACD,OAAO,GAAG,QAAQ,KAAK,IAAI,EAAE,CAAA;AAC/B,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,MAAqB,EAA2B,EAAE;IAChF,IAAI,MAAM,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IAChC,IACE,MAAM,KAAK,UAAU;QACrB,MAAM,KAAK,eAAe;QAC1B,MAAM,KAAK,QAAQ;QACnB,MAAM,KAAK,mBAAmB;QAC9B,MAAM,KAAK,SAAS;QACpB,MAAM,KAAK,OAAO;QAClB,MAAM,KAAK,MAAM;QACjB,MAAM,KAAK,aAAa,EACxB,CAAC;QACD,OAAO,MAAM,CAAA;IACf,CAAC;IACD,OAAO,SAAS,CAAA;AAClB,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,oBAAoB,GAAG,KAAK,EAAE,EACzC,GAAG,EACH,OAAO,EACP,WAAW,EACX,MAAM,EACN,eAAe,GAWhB,EAAiB,EAAE;IAClB,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,OAAM;IACR,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,KAAK,IAAI,IAAI,MAAM,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;QACnD,OAAM;IACR,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,eAAe,CAAA;IAC5D,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,aAAa,CAAC,CAAA;IAEjF,MAAM,eAAe,CAAC,GAAG,CAAC;QACxB,GAAG;QACH,OAAO;QACP,WAAW;QACX,KAAK;QACL,OAAO,EAAE,MAAM,CAAC,IAAI;QACpB,MAAM,EAAE,cAAc;QACtB,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,IAAI;KAClC,CAAC,CAAA;AACJ,CAAC,CAAA"}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { mapCachedSource, readTranscriptCache, writeTranscriptCache } from './cache.js';
|
|
2
|
+
import { canHandle as canHandleGeneric, fetchTranscript as fetchGeneric, } from './providers/generic.js';
|
|
3
|
+
import { canHandle as canHandlePodcast, fetchTranscript as fetchPodcast, } from './providers/podcast.js';
|
|
4
|
+
import { canHandle as canHandleYoutube, fetchTranscript as fetchYoutube, } from './providers/youtube.js';
|
|
5
|
+
import { extractEmbeddedYouTubeUrlFromHtml, extractYouTubeVideoId as extractYouTubeVideoIdInternal, isYouTubeUrl as isYouTubeUrlInternal, } from './utils.js';
|
|
6
|
+
const PROVIDERS = [
|
|
7
|
+
{ id: 'youtube', canHandle: canHandleYoutube, fetchTranscript: fetchYoutube },
|
|
8
|
+
{ id: 'podcast', canHandle: canHandlePodcast, fetchTranscript: fetchPodcast },
|
|
9
|
+
{ id: 'generic', canHandle: canHandleGeneric, fetchTranscript: fetchGeneric },
|
|
10
|
+
];
|
|
11
|
+
const GENERIC_PROVIDER_ID = 'generic';
|
|
12
|
+
export const resolveTranscriptForLink = async (url, html, deps, { youtubeTranscriptMode, cacheMode: providedCacheMode } = {}) => {
|
|
13
|
+
const normalizedUrl = url.trim();
|
|
14
|
+
const embeddedYoutubeUrl = !isYouTubeUrlInternal(normalizedUrl) && html
|
|
15
|
+
? await extractEmbeddedYouTubeUrlFromHtml(html)
|
|
16
|
+
: null;
|
|
17
|
+
const effectiveUrl = embeddedYoutubeUrl ?? normalizedUrl;
|
|
18
|
+
const resourceKey = extractResourceKey(effectiveUrl);
|
|
19
|
+
const baseContext = { url: effectiveUrl, html, resourceKey };
|
|
20
|
+
const provider = selectProvider(baseContext);
|
|
21
|
+
const cacheMode = providedCacheMode ?? 'default';
|
|
22
|
+
const cacheOutcome = await readTranscriptCache({
|
|
23
|
+
url: normalizedUrl,
|
|
24
|
+
cacheMode,
|
|
25
|
+
transcriptCache: deps.transcriptCache,
|
|
26
|
+
});
|
|
27
|
+
const diagnostics = {
|
|
28
|
+
cacheMode,
|
|
29
|
+
cacheStatus: cacheOutcome.diagnostics.cacheStatus,
|
|
30
|
+
textProvided: cacheOutcome.diagnostics.textProvided,
|
|
31
|
+
provider: cacheOutcome.diagnostics.provider,
|
|
32
|
+
attemptedProviders: [],
|
|
33
|
+
notes: cacheOutcome.diagnostics.notes ?? null,
|
|
34
|
+
};
|
|
35
|
+
if (cacheOutcome.resolution) {
|
|
36
|
+
return {
|
|
37
|
+
...cacheOutcome.resolution,
|
|
38
|
+
diagnostics,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
const shouldReportProgress = provider.id === 'youtube' || provider.id === 'podcast';
|
|
42
|
+
if (shouldReportProgress) {
|
|
43
|
+
deps.onProgress?.({
|
|
44
|
+
kind: 'transcript-start',
|
|
45
|
+
url: normalizedUrl,
|
|
46
|
+
service: provider.id,
|
|
47
|
+
hint: provider.id === 'youtube'
|
|
48
|
+
? 'YouTube: resolving transcript'
|
|
49
|
+
: 'Podcast: resolving transcript',
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
const providerResult = await executeProvider(provider, baseContext, {
|
|
53
|
+
fetch: deps.fetch,
|
|
54
|
+
scrapeWithFirecrawl: deps.scrapeWithFirecrawl,
|
|
55
|
+
apifyApiToken: deps.apifyApiToken,
|
|
56
|
+
ytDlpPath: deps.ytDlpPath,
|
|
57
|
+
falApiKey: deps.falApiKey,
|
|
58
|
+
openaiApiKey: deps.openaiApiKey,
|
|
59
|
+
onProgress: deps.onProgress ?? null,
|
|
60
|
+
youtubeTranscriptMode: youtubeTranscriptMode ?? 'auto',
|
|
61
|
+
});
|
|
62
|
+
if (shouldReportProgress) {
|
|
63
|
+
deps.onProgress?.({
|
|
64
|
+
kind: 'transcript-done',
|
|
65
|
+
url: normalizedUrl,
|
|
66
|
+
ok: Boolean(providerResult.text && providerResult.text.length > 0),
|
|
67
|
+
service: provider.id,
|
|
68
|
+
source: providerResult.source,
|
|
69
|
+
hint: providerResult.source ? `${provider.id}/${providerResult.source}` : provider.id,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
diagnostics.provider = providerResult.source;
|
|
73
|
+
diagnostics.attemptedProviders = providerResult.attemptedProviders;
|
|
74
|
+
diagnostics.textProvided = Boolean(providerResult.text && providerResult.text.length > 0);
|
|
75
|
+
if (providerResult.notes) {
|
|
76
|
+
diagnostics.notes = appendNote(diagnostics.notes, providerResult.notes);
|
|
77
|
+
}
|
|
78
|
+
if (providerResult.source !== null || providerResult.text !== null) {
|
|
79
|
+
await writeTranscriptCache({
|
|
80
|
+
url: normalizedUrl,
|
|
81
|
+
service: provider.id,
|
|
82
|
+
resourceKey,
|
|
83
|
+
result: providerResult,
|
|
84
|
+
transcriptCache: deps.transcriptCache,
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
if (!providerResult.text && cacheOutcome.cached?.content && cacheMode !== 'bypass') {
|
|
88
|
+
diagnostics.cacheStatus = 'fallback';
|
|
89
|
+
diagnostics.provider = mapCachedSource(cacheOutcome.cached.source);
|
|
90
|
+
diagnostics.textProvided = Boolean(cacheOutcome.cached.content && cacheOutcome.cached.content.length > 0);
|
|
91
|
+
diagnostics.notes = appendNote(diagnostics.notes, 'Falling back to cached transcript content after provider miss');
|
|
92
|
+
return {
|
|
93
|
+
text: cacheOutcome.cached.content,
|
|
94
|
+
source: diagnostics.provider,
|
|
95
|
+
metadata: cacheOutcome.cached.metadata ?? null,
|
|
96
|
+
diagnostics,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
return {
|
|
100
|
+
text: providerResult.text,
|
|
101
|
+
source: providerResult.source,
|
|
102
|
+
metadata: providerResult.metadata ?? null,
|
|
103
|
+
diagnostics,
|
|
104
|
+
};
|
|
105
|
+
};
|
|
106
|
+
const extractResourceKey = (url) => {
|
|
107
|
+
if (isYouTubeUrlInternal(url)) {
|
|
108
|
+
return extractYouTubeVideoIdInternal(url);
|
|
109
|
+
}
|
|
110
|
+
return null;
|
|
111
|
+
};
|
|
112
|
+
const selectProvider = (context) => {
|
|
113
|
+
const genericProviderModule = PROVIDERS.find((provider) => provider.id === GENERIC_PROVIDER_ID);
|
|
114
|
+
const specializedProvider = PROVIDERS.find((provider) => provider.id !== GENERIC_PROVIDER_ID && provider.canHandle(context));
|
|
115
|
+
if (specializedProvider) {
|
|
116
|
+
return specializedProvider;
|
|
117
|
+
}
|
|
118
|
+
if (genericProviderModule) {
|
|
119
|
+
return genericProviderModule;
|
|
120
|
+
}
|
|
121
|
+
throw new Error('Generic transcript provider is not registered');
|
|
122
|
+
};
|
|
123
|
+
const executeProvider = async (provider, context, options) => provider.fetchTranscript(context, options);
|
|
124
|
+
const appendNote = (existing, next) => {
|
|
125
|
+
if (!existing) {
|
|
126
|
+
return next;
|
|
127
|
+
}
|
|
128
|
+
return `${existing}; ${next}`;
|
|
129
|
+
};
|
|
130
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/content/transcript/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,eAAe,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAA;AACvF,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAA;AAC/B,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAA;AAC/B,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAA;AAO/B,OAAO,EACL,iCAAiC,EACjC,qBAAqB,IAAI,6BAA6B,EACtD,YAAY,IAAI,oBAAoB,GACrC,MAAM,YAAY,CAAA;AAOnB,MAAM,SAAS,GAAqB;IAClC,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;IAC7E,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;IAC7E,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;CAC9E,CAAA;AACD,MAAM,mBAAmB,GAAG,SAAS,CAAA;AAErC,MAAM,CAAC,MAAM,wBAAwB,GAAG,KAAK,EAC3C,GAAW,EACX,IAAmB,EACnB,IAAqB,EACrB,EAAE,qBAAqB,EAAE,SAAS,EAAE,iBAAiB,KAA+B,EAAE,EACvD,EAAE;IACjC,MAAM,aAAa,GAAG,GAAG,CAAC,IAAI,EAAE,CAAA;IAChC,MAAM,kBAAkB,GACtB,CAAC,oBAAoB,CAAC,aAAa,CAAC,IAAI,IAAI;QAC1C,CAAC,CAAC,MAAM,iCAAiC,CAAC,IAAI,CAAC;QAC/C,CAAC,CAAC,IAAI,CAAA;IACV,MAAM,YAAY,GAAG,kBAAkB,IAAI,aAAa,CAAA;IACxD,MAAM,WAAW,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAAA;IACpD,MAAM,WAAW,GAAoB,EAAE,GAAG,EAAE,YAAY,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IAC7E,MAAM,QAAQ,GAAmB,cAAc,CAAC,WAAW,CAAC,CAAA;IAC5D,MAAM,SAAS,GAAc,iBAAiB,IAAI,SAAS,CAAA;IAE3D,MAAM,YAAY,GAAG,MAAM,mBAAmB,CAAC;QAC7C,GAAG,EAAE,aAAa;QAClB,SAAS;QACT,eAAe,EAAE,IAAI,CAAC,eAAe;KACtC,CAAC,CAAA;IAEF,MAAM,WAAW,GAA0B;QACzC,SAAS;QACT,WAAW,EAAE,YAAY,CAAC,WAAW,CAAC,WAAW;QACjD,YAAY,EAAE,YAAY,CAAC,WAAW,CAAC,YAAY;QACnD,QAAQ,EAAE,YAAY,CAAC,WAAW,CAAC,QAAQ;QAC3C,kBAAkB,EAAE,EAAE;QACtB,KAAK,EAAE,YAAY,CAAC,WAAW,CAAC,KAAK,IAAI,IAAI;KAC9C,CAAA;IAED,IAAI,YAAY,CAAC,UAAU,EAAE,CAAC;QAC5B,OAAO;YACL,GAAG,YAAY,CAAC,UAAU;YAC1B,WAAW;SACZ,CAAA;IACH,CAAC;IAED,MAAM,oBAAoB,GAAG,QAAQ,CAAC,EAAE,KAAK,SAAS,IAAI,QAAQ,CAAC,EAAE,KAAK,SAAS,CAAA;IACnF,IAAI,oBAAoB,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,IAAI,EAAE,kBAAkB;YACxB,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,IAAI,EACF,QAAQ,CAAC,EAAE,KAAK,SAAS;gBACvB,CAAC,CAAC,+BAA+B;gBACjC,CAAC,CAAC,+BAA+B;SACtC,CAAC,CAAA;IACJ,CAAC;IAED,MAAM,cAAc,GAAG,MAAM,eAAe,CAAC,QAAQ,EAAE,WAAW,EAAE;QAClE,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;QAC7C,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,qBAAqB,EAAE,qBAAqB,IAAI,MAAM;KACvD,CAAC,CAAA;IAEF,IAAI,oBAAoB,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,IAAI,EAAE,iBAAiB;YACvB,GAAG,EAAE,aAAa;YAClB,EAAE,EAAE,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAClE,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,cAAc,CAAC,MAAM;YAC7B,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,EAAE,IAAI,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE;SACtF,CAAC,CAAA;IACJ,CAAC;IAED,WAAW,CAAC,QAAQ,GAAG,cAAc,CAAC,MAAM,CAAA;IAC5C,WAAW,CAAC,kBAAkB,GAAG,cAAc,CAAC,kBAAkB,CAAA;IAClE,WAAW,CAAC,YAAY,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IACzF,IAAI,cAAc,CAAC,KAAK,EAAE,CAAC;QACzB,WAAW,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,cAAc,CAAC,KAAK,CAAC,CAAA;IACzE,CAAC;IAED,IAAI,cAAc,CAAC,MAAM,KAAK,IAAI,IAAI,cAAc,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;QACnE,MAAM,oBAAoB,CAAC;YACzB,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,WAAW;YACX,MAAM,EAAE,cAAc;YACtB,eAAe,EAAE,IAAI,CAAC,eAAe;SACtC,CAAC,CAAA;IACJ,CAAC;IAED,IAAI,CAAC,cAAc,CAAC,IAAI,IAAI,YAAY,CAAC,MAAM,EAAE,OAAO,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QACnF,WAAW,CAAC,WAAW,GAAG,UAAU,CAAA;QACpC,WAAW,CAAC,QAAQ,GAAG,eAAe,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;QAClE,WAAW,CAAC,YAAY,GAAG,OAAO,CAChC,YAAY,CAAC,MAAM,CAAC,OAAO,IAAI,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CACtE,CAAA;QACD,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,+DAA+D,CAChE,CAAA;QAED,OAAO;YACL,IAAI,EAAE,YAAY,CAAC,MAAM,CAAC,OAAO;YACjC,MAAM,EAAE,WAAW,CAAC,QAAQ;YAC5B,QAAQ,EAAE,YAAY,CAAC,MAAM,CAAC,QAAQ,IAAI,IAAI;YAC9C,WAAW;SACZ,CAAA;IACH,CAAC;IAED,OAAO;QACL,IAAI,EAAE,cAAc,CAAC,IAAI;QACzB,MAAM,EAAE,cAAc,CAAC,MAAM;QAC7B,QAAQ,EAAE,cAAc,CAAC,QAAQ,IAAI,IAAI;QACzC,WAAW;KACZ,CAAA;AACH,CAAC,CAAA;AAED,MAAM,kBAAkB,GAAG,CAAC,GAAW,EAAiB,EAAE;IACxD,IAAI,oBAAoB,CAAC,GAAG,CAAC,EAAE,CAAC;QAC9B,OAAO,6BAA6B,CAAC,GAAG,CAAC,CAAA;IAC3C,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC,CAAA;AAED,MAAM,cAAc,GAAG,CAAC,OAAwB,EAAkB,EAAE;IAClE,MAAM,qBAAqB,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,KAAK,mBAAmB,CAAC,CAAA;IAE/F,MAAM,mBAAmB,GAAG,SAAS,CAAC,IAAI,CACxC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,KAAK,mBAAmB,IAAI,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC,CACjF,CAAA;IACD,IAAI,mBAAmB,EAAE,CAAC;QACxB,OAAO,mBAAmB,CAAA;IAC5B,CAAC;IAED,IAAI,qBAAqB,EAAE,CAAC;QAC1B,OAAO,qBAAqB,CAAA;IAC9B,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAA;AAClE,CAAC,CAAA;AAED,MAAM,eAAe,GAAG,KAAK,EAC3B,QAAwB,EACxB,OAAwB,EACxB,OAA6B,EACJ,EAAE,CAAC,QAAQ,CAAC,eAAe,CAAC,OAAO,EAAE,OAAO,CAAC,CAAA;AAExE,MAAM,UAAU,GAAG,CAAC,QAAmC,EAAE,IAAY,EAAU,EAAE;IAC/E,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IACD,OAAO,GAAG,QAAQ,KAAK,IAAI,EAAE,CAAA;AAC/B,CAAC,CAAA"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { isRecord } from './utils.js';
|
|
2
|
+
export const normalizeTranscriptText = (input) => input
|
|
3
|
+
.replaceAll('\u00A0', ' ')
|
|
4
|
+
.replaceAll(/[\t ]+/g, ' ')
|
|
5
|
+
.replaceAll(/\s*\n\s*/g, '\n')
|
|
6
|
+
.replaceAll(/\n{3,}/g, '\n\n')
|
|
7
|
+
.trim();
|
|
8
|
+
export const normalizeTranscriptLines = (lines) => {
|
|
9
|
+
if (lines.length === 0) {
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
const normalized = normalizeTranscriptText(lines.join('\n'));
|
|
13
|
+
return normalized.length > 0 ? normalized : null;
|
|
14
|
+
};
|
|
15
|
+
export const normalizeApifyTranscript = (raw) => {
|
|
16
|
+
if (typeof raw === 'string') {
|
|
17
|
+
const trimmed = normalizeTranscriptText(raw);
|
|
18
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
19
|
+
}
|
|
20
|
+
if (Array.isArray(raw)) {
|
|
21
|
+
const lines = raw
|
|
22
|
+
.map((entry) => {
|
|
23
|
+
if (!isRecord(entry)) {
|
|
24
|
+
return '';
|
|
25
|
+
}
|
|
26
|
+
const textValue = entry.text;
|
|
27
|
+
return typeof textValue === 'string' ? textValue.trim() : '';
|
|
28
|
+
})
|
|
29
|
+
.filter((line) => line.length > 0);
|
|
30
|
+
if (lines.length > 0) {
|
|
31
|
+
return normalizeTranscriptLines(lines) ?? null;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
if (isRecord(raw)) {
|
|
35
|
+
const singleText = raw.text;
|
|
36
|
+
if (typeof singleText === 'string') {
|
|
37
|
+
const trimmed = normalizeTranscriptText(singleText);
|
|
38
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return null;
|
|
42
|
+
};
|
|
43
|
+
//# sourceMappingURL=normalize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize.js","sourceRoot":"","sources":["../../../../src/content/transcript/normalize.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAA;AAErC,MAAM,CAAC,MAAM,uBAAuB,GAAG,CAAC,KAAa,EAAU,EAAE,CAC/D,KAAK;KACF,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC;KACzB,UAAU,CAAC,SAAS,EAAE,GAAG,CAAC;KAC1B,UAAU,CAAC,WAAW,EAAE,IAAI,CAAC;KAC7B,UAAU,CAAC,SAAS,EAAE,MAAM,CAAC;KAC7B,IAAI,EAAE,CAAA;AAEX,MAAM,CAAC,MAAM,wBAAwB,GAAG,CAAC,KAAwB,EAAiB,EAAE;IAClF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,UAAU,GAAG,uBAAuB,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;IAC5D,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;AAClD,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,wBAAwB,GAAG,CAAC,GAAY,EAAiB,EAAE;IACtE,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAA;QAC5C,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAA;IAC5C,CAAC;IAED,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QAEvB,MAAM,KAAK,GAAG,GAAG;aACd,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;YACb,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACrB,OAAO,EAAE,CAAA;YACX,CAAC;YACD,MAAM,SAAS,GAAI,KAAuB,CAAC,IAAI,CAAA;YAC/C,OAAO,OAAO,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;QAC9D,CAAC,CAAC;aACD,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;QACpC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,OAAO,wBAAwB,CAAC,KAAK,CAAC,IAAI,IAAI,CAAA;QAChD,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAClB,MAAM,UAAU,GAAI,GAA0B,CAAC,IAAI,CAAA;QACnD,IAAI,OAAO,UAAU,KAAK,QAAQ,EAAE,CAAC;YACnC,MAAM,OAAO,GAAG,uBAAuB,CAAC,UAAU,CAAC,CAAA;YACnD,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAA;QAC5C,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAA;AACb,CAAC,CAAA"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export const canHandle = () => true;
|
|
2
|
+
export const fetchTranscript = async (_context, _options) => {
|
|
3
|
+
await Promise.resolve();
|
|
4
|
+
return {
|
|
5
|
+
text: null,
|
|
6
|
+
source: null,
|
|
7
|
+
attemptedProviders: [],
|
|
8
|
+
metadata: { provider: 'generic', reason: 'not_implemented' },
|
|
9
|
+
};
|
|
10
|
+
};
|
|
11
|
+
//# sourceMappingURL=generic.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generic.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/generic.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,SAAS,GAAG,GAAY,EAAE,CAAC,IAAI,CAAA;AAE5C,MAAM,CAAC,MAAM,eAAe,GAAG,KAAK,EAClC,QAAyB,EACzB,QAA8B,EACL,EAAE;IAC3B,MAAM,OAAO,CAAC,OAAO,EAAE,CAAA;IACvB,OAAO;QACL,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,IAAI;QACZ,kBAAkB,EAAE,EAAE;QACtB,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,iBAAiB,EAAE;KAC7D,CAAA;AACH,CAAC,CAAA"}
|