@steipete/summarize-core 0.10.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/dist/esm/content/index.js +5 -5
- package/dist/esm/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/client.js +20 -9
- package/dist/esm/content/link-preview/client.js.map +1 -1
- package/dist/esm/content/link-preview/content/article.js +84 -83
- package/dist/esm/content/link-preview/content/article.js.map +1 -1
- package/dist/esm/content/link-preview/content/cleaner.js +23 -20
- package/dist/esm/content/link-preview/content/cleaner.js.map +1 -1
- package/dist/esm/content/link-preview/content/constants.js.map +1 -1
- package/dist/esm/content/link-preview/content/fetcher.js +46 -40
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
- package/dist/esm/content/link-preview/content/firecrawl.js +16 -16
- package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -1
- package/dist/esm/content/link-preview/content/html.js +29 -27
- package/dist/esm/content/link-preview/content/html.js.map +1 -1
- package/dist/esm/content/link-preview/content/index.js +141 -88
- package/dist/esm/content/link-preview/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/content/jsonld.js +12 -12
- package/dist/esm/content/link-preview/content/jsonld.js.map +1 -1
- package/dist/esm/content/link-preview/content/parsers.js +20 -20
- package/dist/esm/content/link-preview/content/parsers.js.map +1 -1
- package/dist/esm/content/link-preview/content/podcast-utils.js +34 -34
- package/dist/esm/content/link-preview/content/podcast-utils.js.map +1 -1
- package/dist/esm/content/link-preview/content/readability.js +16 -15
- package/dist/esm/content/link-preview/content/readability.js.map +1 -1
- package/dist/esm/content/link-preview/content/twitter-utils.js +24 -11
- package/dist/esm/content/link-preview/content/twitter-utils.js.map +1 -1
- package/dist/esm/content/link-preview/content/types.js +1 -1
- package/dist/esm/content/link-preview/content/types.js.map +1 -1
- package/dist/esm/content/link-preview/content/utils.js +17 -17
- package/dist/esm/content/link-preview/content/utils.js.map +1 -1
- package/dist/esm/content/link-preview/content/video.js +19 -19
- package/dist/esm/content/link-preview/content/video.js.map +1 -1
- package/dist/esm/content/link-preview/content/visibility.js +121 -0
- package/dist/esm/content/link-preview/content/visibility.js.map +1 -0
- package/dist/esm/content/link-preview/content/youtube.js +10 -10
- package/dist/esm/content/link-preview/content/youtube.js.map +1 -1
- package/dist/esm/content/link-preview/deps.js +16 -16
- package/dist/esm/content/link-preview/deps.js.map +1 -1
- package/dist/esm/content/link-preview/fetch-with-timeout.js +4 -4
- package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -1
- package/dist/esm/content/link-preview/types.js +1 -1
- package/dist/esm/content/link-preview/types.js.map +1 -1
- package/dist/esm/content/transcript/cache.js +22 -22
- package/dist/esm/content/transcript/cache.js.map +1 -1
- package/dist/esm/content/transcript/index.js +34 -24
- package/dist/esm/content/transcript/index.js.map +1 -1
- package/dist/esm/content/transcript/normalize.js +10 -10
- package/dist/esm/content/transcript/normalize.js.map +1 -1
- package/dist/esm/content/transcript/parse.js +31 -31
- package/dist/esm/content/transcript/parse.js.map +1 -1
- package/dist/esm/content/transcript/providers/generic.js +74 -78
- package/dist/esm/content/transcript/providers/generic.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js +36 -36
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/apple.js +5 -5
- package/dist/esm/content/transcript/providers/podcast/apple.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/constants.js +2 -2
- package/dist/esm/content/transcript/providers/podcast/constants.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/itunes.js +44 -42
- package/dist/esm/content/transcript/providers/podcast/itunes.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/json.js +4 -4
- package/dist/esm/content/transcript/providers/podcast/json.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/media.js +58 -49
- package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/results.js +2 -2
- package/dist/esm/content/transcript/providers/podcast/results.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/rss.js +29 -29
- package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js +38 -38
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/spotify.js +32 -32
- package/dist/esm/content/transcript/providers/podcast/spotify.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast.js +43 -47
- package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
- package/dist/esm/content/transcript/providers/transcription-start.js +59 -31
- package/dist/esm/content/transcript/providers/transcription-start.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/api.js +56 -56
- package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/apify.js +7 -7
- package/dist/esm/content/transcript/providers/youtube/apify.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/captions.js +76 -76
- package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +82 -75
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube.js +84 -77
- package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
- package/dist/esm/content/transcript/timestamps.js +8 -8
- package/dist/esm/content/transcript/timestamps.js.map +1 -1
- package/dist/esm/content/transcript/transcription-config.js +14 -0
- package/dist/esm/content/transcript/transcription-config.js.map +1 -0
- package/dist/esm/content/transcript/utils.js +35 -35
- package/dist/esm/content/transcript/utils.js.map +1 -1
- package/dist/esm/content/url.js +59 -28
- package/dist/esm/content/url.js.map +1 -1
- package/dist/esm/index.js +4 -3
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/language.js +77 -77
- package/dist/esm/language.js.map +1 -1
- package/dist/esm/openai/base-url.js +35 -0
- package/dist/esm/openai/base-url.js.map +1 -0
- package/dist/esm/processes.js +16 -16
- package/dist/esm/processes.js.map +1 -1
- package/dist/esm/prompts/cli.js +17 -17
- package/dist/esm/prompts/cli.js.map +1 -1
- package/dist/esm/prompts/file.js +54 -54
- package/dist/esm/prompts/file.js.map +1 -1
- package/dist/esm/prompts/format.js +2 -2
- package/dist/esm/prompts/format.js.map +1 -1
- package/dist/esm/prompts/index.js +5 -5
- package/dist/esm/prompts/index.js.map +1 -1
- package/dist/esm/prompts/link-summary.js +65 -65
- package/dist/esm/prompts/link-summary.js.map +1 -1
- package/dist/esm/prompts/summary-lengths.js +10 -10
- package/dist/esm/prompts/summary-lengths.js.map +1 -1
- package/dist/esm/prompts/summary-system.js +9 -9
- package/dist/esm/prompts/summary-system.js.map +1 -1
- package/dist/esm/shared/contracts.js +1 -1
- package/dist/esm/shared/contracts.js.map +1 -1
- package/dist/esm/transcription/onnx-cli.js +69 -69
- package/dist/esm/transcription/onnx-cli.js.map +1 -1
- package/dist/esm/transcription/whisper/constants.js +3 -3
- package/dist/esm/transcription/whisper/constants.js.map +1 -1
- package/dist/esm/transcription/whisper/core.js +148 -59
- package/dist/esm/transcription/whisper/core.js.map +1 -1
- package/dist/esm/transcription/whisper/fal.js +14 -14
- package/dist/esm/transcription/whisper/fal.js.map +1 -1
- package/dist/esm/transcription/whisper/ffmpeg.js +106 -106
- package/dist/esm/transcription/whisper/ffmpeg.js.map +1 -1
- package/dist/esm/transcription/whisper/groq.js +46 -0
- package/dist/esm/transcription/whisper/groq.js.map +1 -0
- package/dist/esm/transcription/whisper/openai.js +19 -13
- package/dist/esm/transcription/whisper/openai.js.map +1 -1
- package/dist/esm/transcription/whisper/utils.js +19 -19
- package/dist/esm/transcription/whisper/utils.js.map +1 -1
- package/dist/esm/transcription/whisper/whisper-cpp.js +64 -64
- package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -1
- package/dist/esm/transcription/whisper.js +4 -4
- package/dist/esm/transcription/whisper.js.map +1 -1
- package/dist/types/content/cache/types.d.ts +1 -1
- package/dist/types/content/index.d.ts +7 -7
- package/dist/types/content/link-preview/client.d.ts +7 -4
- package/dist/types/content/link-preview/content/cleaner.d.ts +1 -0
- package/dist/types/content/link-preview/content/fetcher.d.ts +2 -2
- package/dist/types/content/link-preview/content/firecrawl.d.ts +7 -7
- package/dist/types/content/link-preview/content/html.d.ts +8 -8
- package/dist/types/content/link-preview/content/index.d.ts +3 -3
- package/dist/types/content/link-preview/content/twitter-utils.d.ts +1 -0
- package/dist/types/content/link-preview/content/types.d.ts +8 -8
- package/dist/types/content/link-preview/content/utils.d.ts +3 -3
- package/dist/types/content/link-preview/content/video.d.ts +1 -1
- package/dist/types/content/link-preview/content/visibility.d.ts +1 -0
- package/dist/types/content/link-preview/deps.d.ts +36 -33
- package/dist/types/content/link-preview/types.d.ts +4 -4
- package/dist/types/content/transcript/cache.d.ts +4 -4
- package/dist/types/content/transcript/index.d.ts +7 -7
- package/dist/types/content/transcript/parse.d.ts +1 -1
- package/dist/types/content/transcript/providers/generic.d.ts +1 -1
- package/dist/types/content/transcript/providers/podcast/apple-flow.d.ts +2 -2
- package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +4 -4
- package/dist/types/content/transcript/providers/podcast/media.d.ts +9 -6
- package/dist/types/content/transcript/providers/podcast/results.d.ts +3 -3
- package/dist/types/content/transcript/providers/podcast/rss.d.ts +1 -1
- package/dist/types/content/transcript/providers/podcast/spotify-flow.d.ts +2 -2
- package/dist/types/content/transcript/providers/podcast/spotify.d.ts +2 -2
- package/dist/types/content/transcript/providers/podcast.d.ts +5 -5
- package/dist/types/content/transcript/providers/transcription-start.d.ts +14 -8
- package/dist/types/content/transcript/providers/youtube/api.d.ts +1 -1
- package/dist/types/content/transcript/providers/youtube/captions.d.ts +1 -1
- package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +11 -8
- package/dist/types/content/transcript/providers/youtube.d.ts +1 -1
- package/dist/types/content/transcript/timestamps.d.ts +1 -1
- package/dist/types/content/transcript/transcription-config.d.ts +15 -0
- package/dist/types/content/transcript/types.d.ts +12 -9
- package/dist/types/content/transcript/utils.d.ts +1 -1
- package/dist/types/content/url.d.ts +5 -3
- package/dist/types/index.d.ts +5 -4
- package/dist/types/language.d.ts +4 -4
- package/dist/types/openai/base-url.d.ts +14 -0
- package/dist/types/processes.d.ts +2 -2
- package/dist/types/prompts/cli.d.ts +3 -3
- package/dist/types/prompts/file.d.ts +2 -2
- package/dist/types/prompts/index.d.ts +6 -6
- package/dist/types/prompts/link-summary.d.ts +3 -3
- package/dist/types/prompts/summary-lengths.d.ts +1 -1
- package/dist/types/transcription/onnx-cli.d.ts +3 -3
- package/dist/types/transcription/whisper/core.d.ts +6 -3
- package/dist/types/transcription/whisper/groq.d.ts +2 -0
- package/dist/types/transcription/whisper/openai.d.ts +6 -1
- package/dist/types/transcription/whisper/types.d.ts +1 -1
- package/dist/types/transcription/whisper/whisper-cpp.d.ts +1 -1
- package/dist/types/transcription/whisper.d.ts +5 -5
- package/package.json +13 -13
|
@@ -1,27 +1,29 @@
|
|
|
1
|
-
import { normalizeTranscriptText } from
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
1
|
+
import { normalizeTranscriptText } from "../normalize.js";
|
|
2
|
+
import { resolveTranscriptionConfig } from "../transcription-config.js";
|
|
3
|
+
import { extractYouTubeVideoId } from "../utils.js";
|
|
4
|
+
import { resolveTranscriptionAvailability } from "./transcription-start.js";
|
|
5
|
+
import { extractYoutubeiTranscriptConfig, fetchTranscriptFromTranscriptEndpoint, } from "./youtube/api.js";
|
|
6
|
+
import { fetchTranscriptWithApify } from "./youtube/apify.js";
|
|
7
|
+
import { extractYoutubeDurationSeconds, fetchTranscriptFromCaptionTracks, fetchYoutubeDurationSecondsViaPlayer, } from "./youtube/captions.js";
|
|
8
|
+
import { fetchDurationSecondsWithYtDlp, fetchTranscriptWithYtDlp } from "./youtube/yt-dlp.js";
|
|
8
9
|
const YOUTUBE_URL_PATTERN = /youtube\.com|youtu\.be/i;
|
|
9
10
|
export const canHandle = ({ url }) => YOUTUBE_URL_PATTERN.test(url);
|
|
10
11
|
export const fetchTranscript = async (context, options) => {
|
|
11
12
|
// Diagnostics: used for logging/UX and for tests asserting provider order.
|
|
12
13
|
const attemptedProviders = [];
|
|
13
14
|
const notes = [];
|
|
15
|
+
const transcription = resolveTranscriptionConfig(options);
|
|
14
16
|
const { html: initialHtml, url } = context;
|
|
15
17
|
let html = initialHtml;
|
|
16
|
-
const hasYoutubeConfig = typeof html ===
|
|
18
|
+
const hasYoutubeConfig = typeof html === "string" && /ytcfg\.set|ytInitialPlayerResponse/.test(html);
|
|
17
19
|
if (!hasYoutubeConfig) {
|
|
18
20
|
// Many callers don't pass through the raw watch page HTML. When we don't see the usual
|
|
19
21
|
// bootstrap tokens, do a best-effort fetch so downstream extractors can work.
|
|
20
22
|
try {
|
|
21
23
|
const response = await options.fetch(url, {
|
|
22
24
|
headers: {
|
|
23
|
-
|
|
24
|
-
Accept:
|
|
25
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0 Safari/537.36",
|
|
26
|
+
Accept: "text/html,application/xhtml+xml",
|
|
25
27
|
},
|
|
26
28
|
});
|
|
27
29
|
if (response.ok) {
|
|
@@ -33,54 +35,55 @@ export const fetchTranscript = async (context, options) => {
|
|
|
33
35
|
}
|
|
34
36
|
}
|
|
35
37
|
const mode = options.youtubeTranscriptMode;
|
|
36
|
-
const progress = typeof options.onProgress ===
|
|
38
|
+
const progress = typeof options.onProgress === "function" ? options.onProgress : null;
|
|
37
39
|
const transcriptionAvailability = await resolveTranscriptionAvailability({
|
|
38
|
-
|
|
39
|
-
openaiApiKey: options.openaiApiKey,
|
|
40
|
-
falApiKey: options.falApiKey,
|
|
40
|
+
transcription,
|
|
41
41
|
});
|
|
42
42
|
const hasYtDlpCredentials = transcriptionAvailability.hasAnyProvider;
|
|
43
43
|
// yt-dlp fallback only makes sense if we have the binary *and* some transcription path.
|
|
44
44
|
const canRunYtDlp = Boolean(options.ytDlpPath && hasYtDlpCredentials);
|
|
45
45
|
const pushHint = (hint) => {
|
|
46
|
-
progress?.({ kind:
|
|
46
|
+
progress?.({ kind: "transcript-start", url, service: "youtube", hint });
|
|
47
47
|
};
|
|
48
|
-
if (mode ===
|
|
49
|
-
throw new Error(
|
|
48
|
+
if (mode === "yt-dlp" && !options.ytDlpPath) {
|
|
49
|
+
throw new Error("Missing yt-dlp binary for --youtube yt-dlp (set YT_DLP_PATH or install yt-dlp)");
|
|
50
50
|
}
|
|
51
|
-
if (mode ===
|
|
52
|
-
throw new Error(
|
|
51
|
+
if (mode === "yt-dlp" && !hasYtDlpCredentials) {
|
|
52
|
+
throw new Error("Missing transcription provider for --youtube yt-dlp (install whisper-cpp or set OPENAI_API_KEY/FAL_KEY)");
|
|
53
53
|
}
|
|
54
|
-
|
|
54
|
+
// In explicit apify mode we can continue without HTML.
|
|
55
|
+
if (!html && mode !== "apify") {
|
|
55
56
|
return { text: null, source: null, attemptedProviders };
|
|
56
57
|
}
|
|
57
58
|
const tryApify = async (hint) => {
|
|
58
59
|
if (!options.apifyApiToken)
|
|
59
60
|
return null;
|
|
60
61
|
pushHint(hint);
|
|
61
|
-
attemptedProviders.push(
|
|
62
|
+
attemptedProviders.push("apify");
|
|
62
63
|
const apifyTranscript = await fetchTranscriptWithApify(options.fetch, options.apifyApiToken, url);
|
|
63
64
|
if (!apifyTranscript)
|
|
64
65
|
return null;
|
|
65
66
|
return {
|
|
66
67
|
text: normalizeTranscriptText(apifyTranscript),
|
|
67
|
-
source:
|
|
68
|
-
metadata: { provider:
|
|
68
|
+
source: "apify",
|
|
69
|
+
metadata: { provider: "apify", ...(durationMetadata ?? {}) },
|
|
69
70
|
attemptedProviders,
|
|
70
71
|
};
|
|
71
72
|
};
|
|
72
73
|
const effectiveVideoIdCandidate = context.resourceKey ?? extractYouTubeVideoId(url);
|
|
73
74
|
// Prefer the caller-provided resource key (e.g. from cache routing) over URL parsing.
|
|
74
|
-
const effectiveVideoId = typeof effectiveVideoIdCandidate ===
|
|
75
|
+
const effectiveVideoId = typeof effectiveVideoIdCandidate === "string" && effectiveVideoIdCandidate.trim().length > 0
|
|
75
76
|
? effectiveVideoIdCandidate.trim()
|
|
76
77
|
: null;
|
|
77
|
-
|
|
78
|
+
const htmlText = html ?? "";
|
|
79
|
+
// In explicit apify mode we can continue without a parsed video id.
|
|
80
|
+
if (!effectiveVideoId && mode !== "apify") {
|
|
78
81
|
return { text: null, source: null, attemptedProviders };
|
|
79
82
|
}
|
|
80
|
-
let durationSeconds = extractYoutubeDurationSeconds(
|
|
81
|
-
if (!durationSeconds) {
|
|
83
|
+
let durationSeconds = extractYoutubeDurationSeconds(htmlText);
|
|
84
|
+
if (!durationSeconds && effectiveVideoId) {
|
|
82
85
|
durationSeconds = await fetchYoutubeDurationSecondsViaPlayer(options.fetch, {
|
|
83
|
-
html,
|
|
86
|
+
html: htmlText,
|
|
84
87
|
videoId: effectiveVideoId,
|
|
85
88
|
});
|
|
86
89
|
}
|
|
@@ -90,17 +93,20 @@ export const fetchTranscript = async (context, options) => {
|
|
|
90
93
|
url,
|
|
91
94
|
});
|
|
92
95
|
}
|
|
93
|
-
const durationMetadata = typeof durationSeconds ===
|
|
96
|
+
const durationMetadata = typeof durationSeconds === "number" && Number.isFinite(durationSeconds) && durationSeconds > 0
|
|
94
97
|
? { durationSeconds }
|
|
95
98
|
: null;
|
|
96
99
|
// Try no-auto mode (skip auto-generated captions, fall back to yt-dlp)
|
|
97
|
-
if (mode ===
|
|
100
|
+
if (mode === "no-auto") {
|
|
101
|
+
if (!effectiveVideoId) {
|
|
102
|
+
return { text: null, source: null, attemptedProviders };
|
|
103
|
+
}
|
|
98
104
|
// "no-auto" is intentionally strict: only accept creator captions (and skip ASR/auto tracks).
|
|
99
105
|
// We *only* require yt-dlp once we know captions aren't available.
|
|
100
|
-
pushHint(
|
|
101
|
-
attemptedProviders.push(
|
|
106
|
+
pushHint("YouTube: checking creator captions only (skipping auto-generated)");
|
|
107
|
+
attemptedProviders.push("captionTracks");
|
|
102
108
|
const manualTranscript = await fetchTranscriptFromCaptionTracks(options.fetch, {
|
|
103
|
-
html,
|
|
109
|
+
html: htmlText,
|
|
104
110
|
originalUrl: url,
|
|
105
111
|
videoId: effectiveVideoId,
|
|
106
112
|
skipAutoGenerated: true,
|
|
@@ -108,23 +114,26 @@ export const fetchTranscript = async (context, options) => {
|
|
|
108
114
|
if (manualTranscript?.text) {
|
|
109
115
|
return {
|
|
110
116
|
text: normalizeTranscriptText(manualTranscript.text),
|
|
111
|
-
source:
|
|
117
|
+
source: "captionTracks",
|
|
112
118
|
segments: options.transcriptTimestamps ? (manualTranscript.segments ?? null) : null,
|
|
113
|
-
metadata: { provider:
|
|
119
|
+
metadata: { provider: "captionTracks", manualOnly: true, ...(durationMetadata ?? {}) },
|
|
114
120
|
attemptedProviders,
|
|
115
121
|
};
|
|
116
122
|
}
|
|
117
123
|
// No creator captions found, fall through to yt-dlp below
|
|
118
|
-
notes.push(
|
|
124
|
+
notes.push("No creator captions found, using yt-dlp transcription");
|
|
119
125
|
}
|
|
120
126
|
// Try web methods (youtubei, captionTracks) if mode is 'auto' or 'web'
|
|
121
|
-
if (mode ===
|
|
127
|
+
if (mode === "auto" || mode === "web") {
|
|
128
|
+
if (!effectiveVideoId) {
|
|
129
|
+
return { text: null, source: null, attemptedProviders };
|
|
130
|
+
}
|
|
122
131
|
// youtubei is preferred when available: it returns a clean transcript payload without having
|
|
123
132
|
// to download/parse caption track formats.
|
|
124
|
-
pushHint(
|
|
125
|
-
const config = extractYoutubeiTranscriptConfig(
|
|
133
|
+
pushHint("YouTube: checking captions (youtubei)");
|
|
134
|
+
const config = extractYoutubeiTranscriptConfig(htmlText);
|
|
126
135
|
if (config) {
|
|
127
|
-
attemptedProviders.push(
|
|
136
|
+
attemptedProviders.push("youtubei");
|
|
128
137
|
const transcript = await fetchTranscriptFromTranscriptEndpoint(options.fetch, {
|
|
129
138
|
config,
|
|
130
139
|
originalUrl: url,
|
|
@@ -132,59 +141,57 @@ export const fetchTranscript = async (context, options) => {
|
|
|
132
141
|
if (transcript?.text) {
|
|
133
142
|
return {
|
|
134
143
|
text: normalizeTranscriptText(transcript.text),
|
|
135
|
-
source:
|
|
144
|
+
source: "youtubei",
|
|
136
145
|
segments: options.transcriptTimestamps ? (transcript.segments ?? null) : null,
|
|
137
|
-
metadata: { provider:
|
|
146
|
+
metadata: { provider: "youtubei", ...(durationMetadata ?? {}) },
|
|
138
147
|
attemptedProviders,
|
|
139
148
|
};
|
|
140
149
|
}
|
|
141
150
|
}
|
|
142
151
|
if (!config) {
|
|
143
|
-
pushHint(
|
|
152
|
+
pushHint("YouTube: youtubei unavailable; checking caption tracks");
|
|
144
153
|
}
|
|
145
154
|
else {
|
|
146
|
-
pushHint(
|
|
155
|
+
pushHint("YouTube: youtubei empty; checking caption tracks");
|
|
147
156
|
}
|
|
148
|
-
attemptedProviders.push(
|
|
157
|
+
attemptedProviders.push("captionTracks");
|
|
149
158
|
const captionTranscript = await fetchTranscriptFromCaptionTracks(options.fetch, {
|
|
150
|
-
html,
|
|
159
|
+
html: htmlText,
|
|
151
160
|
originalUrl: url,
|
|
152
161
|
videoId: effectiveVideoId,
|
|
153
162
|
});
|
|
154
163
|
if (captionTranscript?.text) {
|
|
155
164
|
return {
|
|
156
165
|
text: normalizeTranscriptText(captionTranscript.text),
|
|
157
|
-
source:
|
|
166
|
+
source: "captionTracks",
|
|
158
167
|
segments: options.transcriptTimestamps ? (captionTranscript.segments ?? null) : null,
|
|
159
|
-
metadata: { provider:
|
|
168
|
+
metadata: { provider: "captionTracks", ...(durationMetadata ?? {}) },
|
|
160
169
|
attemptedProviders,
|
|
161
170
|
};
|
|
162
171
|
}
|
|
163
172
|
}
|
|
164
173
|
// Try yt-dlp (audio download + OpenAI/FAL transcription) if mode is 'auto', 'no-auto', or 'yt-dlp'
|
|
165
|
-
if (mode ===
|
|
166
|
-
if (mode ===
|
|
167
|
-
throw new Error(
|
|
174
|
+
if (mode === "yt-dlp" || mode === "no-auto" || (mode === "auto" && canRunYtDlp)) {
|
|
175
|
+
if (mode === "no-auto" && !canRunYtDlp) {
|
|
176
|
+
throw new Error("--youtube no-auto requires yt-dlp and a transcription provider (whisper-cpp, OPENAI_API_KEY, or FAL_KEY) for fallback");
|
|
168
177
|
}
|
|
169
|
-
if (mode ===
|
|
170
|
-
pushHint(
|
|
178
|
+
if (mode === "auto") {
|
|
179
|
+
pushHint("YouTube: captions unavailable; falling back to yt-dlp audio");
|
|
171
180
|
}
|
|
172
|
-
else if (mode ===
|
|
173
|
-
pushHint(
|
|
181
|
+
else if (mode === "no-auto") {
|
|
182
|
+
pushHint("YouTube: no creator captions; falling back to yt-dlp audio");
|
|
174
183
|
}
|
|
175
184
|
else {
|
|
176
|
-
pushHint(
|
|
185
|
+
pushHint("YouTube: downloading audio (yt-dlp)");
|
|
177
186
|
}
|
|
178
|
-
attemptedProviders.push(
|
|
187
|
+
attemptedProviders.push("yt-dlp");
|
|
179
188
|
const ytdlpResult = await fetchTranscriptWithYtDlp({
|
|
180
189
|
ytDlpPath: options.ytDlpPath,
|
|
181
|
-
|
|
182
|
-
openaiApiKey: options.openaiApiKey,
|
|
183
|
-
falApiKey: options.falApiKey,
|
|
190
|
+
transcription,
|
|
184
191
|
mediaCache: options.mediaCache ?? null,
|
|
185
192
|
url,
|
|
186
193
|
onProgress: progress,
|
|
187
|
-
mediaKind:
|
|
194
|
+
mediaKind: "video",
|
|
188
195
|
});
|
|
189
196
|
if (ytdlpResult.notes.length > 0) {
|
|
190
197
|
notes.push(...ytdlpResult.notes);
|
|
@@ -192,52 +199,52 @@ export const fetchTranscript = async (context, options) => {
|
|
|
192
199
|
if (ytdlpResult.text) {
|
|
193
200
|
return {
|
|
194
201
|
text: normalizeTranscriptText(ytdlpResult.text),
|
|
195
|
-
source:
|
|
202
|
+
source: "yt-dlp",
|
|
196
203
|
metadata: {
|
|
197
|
-
provider:
|
|
204
|
+
provider: "yt-dlp",
|
|
198
205
|
transcriptionProvider: ytdlpResult.provider,
|
|
199
206
|
...(durationMetadata ?? {}),
|
|
200
207
|
},
|
|
201
208
|
attemptedProviders,
|
|
202
|
-
notes: notes.length > 0 ? notes.join(
|
|
209
|
+
notes: notes.length > 0 ? notes.join("; ") : null,
|
|
203
210
|
};
|
|
204
211
|
}
|
|
205
|
-
if (mode ===
|
|
212
|
+
if (mode === "yt-dlp" && ytdlpResult.error) {
|
|
206
213
|
throw ytdlpResult.error;
|
|
207
214
|
}
|
|
208
215
|
// Auto mode: only try Apify after yt-dlp fails (last resort).
|
|
209
|
-
if (mode ===
|
|
210
|
-
const apifyResult = await tryApify(
|
|
216
|
+
if (mode === "auto") {
|
|
217
|
+
const apifyResult = await tryApify("YouTube: yt-dlp transcription failed; trying Apify");
|
|
211
218
|
if (apifyResult)
|
|
212
219
|
return apifyResult;
|
|
213
220
|
}
|
|
214
221
|
}
|
|
215
222
|
// Explicit apify mode: allow forcing it, but require a token.
|
|
216
|
-
if (mode ===
|
|
223
|
+
if (mode === "apify") {
|
|
217
224
|
if (!options.apifyApiToken) {
|
|
218
|
-
throw new Error(
|
|
225
|
+
throw new Error("Missing APIFY_API_TOKEN for --youtube apify");
|
|
219
226
|
}
|
|
220
|
-
const apifyResult = await tryApify(
|
|
227
|
+
const apifyResult = await tryApify("YouTube: fetching transcript (Apify)");
|
|
221
228
|
if (apifyResult)
|
|
222
229
|
return apifyResult;
|
|
223
230
|
}
|
|
224
231
|
// Auto mode: if yt-dlp cannot run (no binary/credentials), fall back to Apify last-last.
|
|
225
|
-
if (mode ===
|
|
226
|
-
const apifyResult = await tryApify(
|
|
232
|
+
if (mode === "auto" && !canRunYtDlp) {
|
|
233
|
+
const apifyResult = await tryApify("YouTube: captions unavailable; trying Apify");
|
|
227
234
|
if (apifyResult)
|
|
228
235
|
return apifyResult;
|
|
229
236
|
}
|
|
230
|
-
attemptedProviders.push(
|
|
237
|
+
attemptedProviders.push("unavailable");
|
|
231
238
|
return {
|
|
232
239
|
text: null,
|
|
233
|
-
source:
|
|
240
|
+
source: "unavailable",
|
|
234
241
|
metadata: {
|
|
235
|
-
provider:
|
|
236
|
-
reason:
|
|
242
|
+
provider: "youtube",
|
|
243
|
+
reason: "no_transcript_available",
|
|
237
244
|
...(durationMetadata ?? {}),
|
|
238
245
|
},
|
|
239
246
|
attemptedProviders,
|
|
240
|
-
notes: notes.length > 0 ? notes.join(
|
|
247
|
+
notes: notes.length > 0 ? notes.join("; ") : null,
|
|
241
248
|
};
|
|
242
249
|
};
|
|
243
250
|
//# sourceMappingURL=youtube.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/youtube.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"youtube.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/youtube.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAC1D,OAAO,EAAE,0BAA0B,EAAE,MAAM,4BAA4B,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,gCAAgC,EAAE,MAAM,0BAA0B,CAAC;AAC5E,OAAO,EACL,+BAA+B,EAC/B,qCAAqC,GACtC,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,wBAAwB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,EACL,6BAA6B,EAC7B,gCAAgC,EAChC,oCAAoC,GACrC,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,6BAA6B,EAAE,wBAAwB,EAAE,MAAM,qBAAqB,CAAC;AAE9F,MAAM,mBAAmB,GAAG,yBAAyB,CAAC;AAEtD,MAAM,CAAC,MAAM,SAAS,GAAG,CAAC,EAAE,GAAG,EAAmB,EAAW,EAAE,CAAC,mBAAmB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAE9F,MAAM,CAAC,MAAM,eAAe,GAAG,KAAK,EAClC,OAAwB,EACxB,OAA6B,EACJ,EAAE;IAC3B,2EAA2E;IAC3E,MAAM,kBAAkB,GAAuB,EAAE,CAAC;IAClD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,aAAa,GAAG,0BAA0B,CAAC,OAAO,CAAC,CAAC;IAC1D,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,GAAG,EAAE,GAAG,OAAO,CAAC;IAC3C,IAAI,IAAI,GAAG,WAAW,CAAC;IACvB,MAAM,gBAAgB,GACpB,OAAO,IAAI,KAAK,QAAQ,IAAI,oCAAoC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9E,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,uFAAuF;QACvF,8EAA8E;QAC9E,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE;gBACxC,OAAO,EAAE;oBACP,YAAY,EACV,mHAAmH;oBACrH,MAAM,EAAE,iCAAiC;iBAC1C;aACF,CAAC,CAAC;YACH,IAAI,QAAQ,CAAC,EAAE,EAAE,CAAC;gBAChB,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAC/B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,wCAAwC;QAC1C,CAAC;IACH,CAAC;IACD,MAAM,IAAI,GAAG,OAAO,CAAC,qBAAqB,CAAC;IAC3C,MAAM,QAAQ,GAAG,OAAO,OAAO,CAAC,UAAU,KAAK,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC;IACtF,MAAM,yBAAyB,GAAG,MAAM,gCAAgC,CAAC;QACvE,aAAa;KACd,CAAC,CAAC;IACH,MAAM,mBAAmB,GAAG,yBAAyB,CAAC,cAAc,CAAC;IACrE,wFAAwF;IACxF,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,IAAI,mBAAmB,CAAC,CAAC;IACtE,MAAM,QAAQ,GAAG,CAAC,IAAY,EAAE,EAAE;QAChC,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,kBAAkB,EAAE,GAAG,EAAE,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1E,CAAC,CAAC;IAEF,IAAI,IAAI,KAAK,QAAQ,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC;QAC5C,MAAM,IAAI,KAAK,CACb,gFAAgF,CACjF,CAAC;IACJ,CAAC;IACD,IAAI,IAAI,KAAK,QAAQ,IAAI,CAAC,mBAAmB,EAAE,CAAC;QAC9C,MAAM,IAAI,KAAK,CACb,yGAAyG,CAC1G,CAAC;IACJ,CAAC;IAED,uDAAuD;IACvD,IAAI,CAAC,IAAI,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;QAC9B,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,EAAE,CAAC;IAC1D,CAAC;IAED,MAAM,QAAQ,GAAG,KAAK,EAAE,IAAY,EAAkC,EAAE;QACtE,IAAI,CAAC,OAAO,CAAC,aAAa;YAAE,OAAO,IAAI,CAAC;QACxC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACf,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACjC,MAAM,eAAe,GAAG,MAAM,wBAAwB,CACpD,OAAO,CAAC,KAAK,EACb,OAAO,CAAC,aAAa,EACrB,GAAG,CACJ,CAAC;QACF,IAAI,CAAC,eAAe;YAAE,OAAO,IAAI,CAAC;QAClC,OAAO;YACL,IAAI,EAAE,uBAAuB,CAAC,eAAe,CAAC;YAC9C,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE;YAC5D,kBAAkB;SACnB,CAAC;IACJ,CAAC,CAAC;IAEF,MAAM,yBAAyB,GAAG,OAAO,CAAC,WAAW,IAAI,qBAAqB,CAAC,GAAG,CAAC,CAAC;IACpF,sFAAsF;IACtF,MAAM,gBAAgB,GACpB,OAAO,yBAAyB,KAAK,QAAQ,IAAI,yBAAyB,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;QAC1F,CAAC,CAAC,yBAAyB,CAAC,IAAI,EAAE;QAClC,CAAC,CAAC,IAAI,CAAC;IACX,MAAM,QAAQ,GAAG,IAAI,IAAI,EAAE,CAAC;IAC5B,oEAAoE;IACpE,IAAI,CAAC,gBAAgB,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;QAC1C,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,EAAE,CAAC;IAC1D,CAAC;IAED,IAAI,eAAe,GAAG,6BAA6B,CAAC,QAAQ,CAAC,CAAC;IAC9D,IAAI,CAAC,eAAe,IAAI,gBAAgB,EAAE,CAAC;QACzC,eAAe,GAAG,MAAM,oCAAoC,CAAC,OAAO,CAAC,KAAK,EAAE;YAC1E,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,gBAAgB;SAC1B,CAAC,CAAC;IACL,CAAC;IACD,IAAI,CAAC,eAAe,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;QAC1C,eAAe,GAAG,MAAM,6BAA6B,CAAC;YACpD,SAAS,EAAE,OAAO,CAAC,SAAS;YAC5B,GAAG;SACJ,CAAC,CAAC;IACL,CAAC;IACD,MAAM,gBAAgB,GACpB,OAAO,eAAe,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAC,IAAI,eAAe,GAAG,CAAC;QAC5F,CAAC,CAAC,EAAE,eAAe,EAAE;QACrB,CAAC,CAAC,IAAI,CAAC;IAEX,uEAAuE;IACvE,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QACvB,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,EAAE,CAAC;QAC1D,CAAC;QACD,8FAA8F;QAC9F,mEAAmE;QACnE,QAAQ,CAAC,mEAAmE,CAAC,CAAC;QAC9E,kBAAkB,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QACzC,MAAM,gBAAgB,GAAG,MAAM,gCAAgC,CAAC,OAAO,CAAC,KAAK,EAAE;YAC7E,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,GAAG;YAChB,OAAO,EAAE,gBAAgB;YACzB,iBAAiB,EAAE,IAAI;SACxB,CAAC,CAAC;QACH,IAAI,gBAAgB,EAAE,IAAI,EAAE,CAAC;YAC3B,OAAO;gBACL,IAAI,EAAE,uBAAuB,CAAC,gBAAgB,CAAC,IAAI,CAAC;gBACpD,MAAM,EAAE,eAAe;gBACvB,QAAQ,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;gBACnF,QAAQ,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE,UAAU,EAAE,IAAI,EAAE,GAAG,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE;gBACtF,kBAAkB;aACnB,CAAC;QACJ,CAAC;QACD,0DAA0D;QAC1D,KAAK,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;IACtE,CAAC;IAED,uEAAuE;IACvE,IAAI,IAAI,KAAK,MAAM,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACtC,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,kBAAkB,EAAE,CAAC;QAC1D,CAAC;QACD,6FAA6F;QAC7F,2CAA2C;QAC3C,QAAQ,CAAC,uCAAuC,CAAC,CAAC;QAClD,MAAM,MAAM,GAAG,+BAA+B,CAAC,QAAQ,CAAC,CAAC;QACzD,IAAI,MAAM,EAAE,CAAC;YACX,kBAAkB,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACpC,MAAM,UAAU,GAAG,MAAM,qCAAqC,CAAC,OAAO,CAAC,KAAK,EAAE;gBAC5E,MAAM;gBACN,WAAW,EAAE,GAAG;aACjB,CAAC,CAAC;YACH,IAAI,UAAU,EAAE,IAAI,EAAE,CAAC;gBACrB,OAAO;oBACL,IAAI,EAAE,uBAAuB,CAAC,UAAU,CAAC,IAAI,CAAC;oBAC9C,MAAM,EAAE,UAAU;oBAClB,QAAQ,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;oBAC7E,QAAQ,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE;oBAC/D,kBAAkB;iBACnB,CAAC;YACJ,CAAC;QACH,CAAC;QAED,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,QAAQ,CAAC,wDAAwD,CAAC,CAAC;QACrE,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,kDAAkD,CAAC,CAAC;QAC/D,CAAC;QACD,kBAAkB,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QACzC,MAAM,iBAAiB,GAAG,MAAM,gCAAgC,CAAC,OAAO,CAAC,KAAK,EAAE;YAC9E,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,GAAG;YAChB,OAAO,EAAE,gBAAgB;SAC1B,CAAC,CAAC;QACH,IAAI,iBAAiB,EAAE,IAAI,EAAE,CAAC;YAC5B,OAAO;gBACL,IAAI,EAAE,uBAAuB,CAAC,iBAAiB,CAAC,IAAI,CAAC;gBACrD,MAAM,EAAE,eAAe;gBACvB,QAAQ,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;gBACpF,QAAQ,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE,GAAG,CAAC,gBAAgB,IAAI,EAAE,CAAC,EAAE;gBACpE,kBAAkB;aACnB,CAAC;QACJ,CAAC;IACH,CAAC;IAED,mGAAmG;IACnG,IAAI,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,SAAS,IAAI,CAAC,IAAI,KAAK,MAAM,IAAI,WAAW,CAAC,EAAE,CAAC;QAChF,IAAI,IAAI,KAAK,SAAS,IAAI,CAAC,WAAW,EAAE,CAAC;YACvC,MAAM,IAAI,KAAK,CACb,uHAAuH,CACxH,CAAC;QACJ,CAAC;QACD,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YACpB,QAAQ,CAAC,6DAA6D,CAAC,CAAC;QAC1E,CAAC;aAAM,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YAC9B,QAAQ,CAAC,4DAA4D,CAAC,CAAC;QACzE,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,qCAAqC,CAAC,CAAC;QAClD,CAAC;QACD,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAClC,MAAM,WAAW,GAAG,MAAM,wBAAwB,CAAC;YACjD,SAAS,EAAE,OAAO,CAAC,SAAS;YAC5B,aAAa;YACb,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,IAAI;YACtC,GAAG;YACH,UAAU,EAAE,QAAQ;YACpB,SAAS,EAAE,OAAO;SACnB,CAAC,CAAC;QACH,IAAI,WAAW,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,KAAK,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;QACnC,CAAC;QACD,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC;YACrB,OAAO;gBACL,IAAI,EAAE,uBAAuB,CAAC,WAAW,CAAC,IAAI,CAAC;gBAC/C,MAAM,EAAE,QAAQ;gBAChB,QAAQ,EAAE;oBACR,QAAQ,EAAE,QAAQ;oBAClB,qBAAqB,EAAE,WAAW,CAAC,QAAQ;oBAC3C,GAAG,CAAC,gBAAgB,IAAI,EAAE,CAAC;iBAC5B;gBACD,kBAAkB;gBAClB,KAAK,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;aAClD,CAAC;QACJ,CAAC;QACD,IAAI,IAAI,KAAK,QAAQ,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;YAC3C,MAAM,WAAW,CAAC,KAAK,CAAC;QAC1B,CAAC;QAED,8DAA8D;QAC9D,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YACpB,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,oDAAoD,CAAC,CAAC;YACzF,IAAI,WAAW;gBAAE,OAAO,WAAW,CAAC;QACtC,CAAC;IACH,CAAC;IAED,8DAA8D;IAC9D,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;QACrB,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;QACjE,CAAC;QACD,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,sCAAsC,CAAC,CAAC;QAC3E,IAAI,WAAW;YAAE,OAAO,WAAW,CAAC;IACtC,CAAC;IAED,yFAAyF;IACzF,IAAI,IAAI,KAAK,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,6CAA6C,CAAC,CAAC;QAClF,IAAI,WAAW;YAAE,OAAO,WAAW,CAAC;IACtC,CAAC;IAED,kBAAkB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACvC,OAAO;QACL,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,aAAa;QACrB,QAAQ,EAAE;YACR,QAAQ,EAAE,SAAS;YACnB,MAAM,EAAE,yBAAyB;YACjC,GAAG,CAAC,gBAAgB,IAAI,EAAE,CAAC;SAC5B;QACD,kBAAkB;QAClB,KAAK,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;KAClD,CAAC;AACJ,CAAC,CAAC"}
|
|
@@ -6,9 +6,9 @@ export function formatTimestampMs(ms) {
|
|
|
6
6
|
const minutes = Math.floor((totalSeconds % 3600) / 60);
|
|
7
7
|
const seconds = totalSeconds % 60;
|
|
8
8
|
if (hours > 0) {
|
|
9
|
-
return `${hours}:${minutes.toString().padStart(2,
|
|
9
|
+
return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
10
10
|
}
|
|
11
|
-
return `${minutes}:${seconds.toString().padStart(2,
|
|
11
|
+
return `${minutes}:${seconds.toString().padStart(2, "0")}`;
|
|
12
12
|
}
|
|
13
13
|
export function parseTimestampStringToMs(value) {
|
|
14
14
|
const trimmed = value.trim();
|
|
@@ -24,7 +24,7 @@ export function parseTimestampStringToMs(value) {
|
|
|
24
24
|
const secondsPart = parts.pop();
|
|
25
25
|
if (secondsPart == null)
|
|
26
26
|
return null;
|
|
27
|
-
const seconds = Number(secondsPart.replace(
|
|
27
|
+
const seconds = Number(secondsPart.replace(",", "."));
|
|
28
28
|
if (!Number.isFinite(seconds) || seconds < 0)
|
|
29
29
|
return null;
|
|
30
30
|
const minutesPart = parts.pop();
|
|
@@ -40,16 +40,16 @@ export function parseTimestampStringToMs(value) {
|
|
|
40
40
|
return Math.round((hours * 3600 + minutes * 60 + seconds) * 1000);
|
|
41
41
|
}
|
|
42
42
|
export function parseTimestampToMs(value, assumeSeconds = false) {
|
|
43
|
-
if (typeof value ===
|
|
43
|
+
if (typeof value === "number") {
|
|
44
44
|
if (!Number.isFinite(value) || value < 0)
|
|
45
45
|
return null;
|
|
46
46
|
return assumeSeconds ? Math.round(value * 1000) : Math.round(value);
|
|
47
47
|
}
|
|
48
|
-
if (typeof value ===
|
|
48
|
+
if (typeof value === "string") {
|
|
49
49
|
const trimmed = value.trim();
|
|
50
50
|
if (!trimmed)
|
|
51
51
|
return null;
|
|
52
|
-
if (trimmed.includes(
|
|
52
|
+
if (trimmed.includes(":")) {
|
|
53
53
|
const parsed = parseTimestampStringToMs(trimmed);
|
|
54
54
|
if (parsed != null)
|
|
55
55
|
return parsed;
|
|
@@ -66,7 +66,7 @@ export function formatTranscriptSegments(segments) {
|
|
|
66
66
|
return null;
|
|
67
67
|
const lines = segments
|
|
68
68
|
.map((segment) => {
|
|
69
|
-
const text = segment.text.replace(/\s+/g,
|
|
69
|
+
const text = segment.text.replace(/\s+/g, " ").trim();
|
|
70
70
|
if (!text)
|
|
71
71
|
return null;
|
|
72
72
|
return `[${formatTimestampMs(segment.startMs)}] ${text}`;
|
|
@@ -74,6 +74,6 @@ export function formatTranscriptSegments(segments) {
|
|
|
74
74
|
.filter((line) => Boolean(line));
|
|
75
75
|
if (lines.length === 0)
|
|
76
76
|
return null;
|
|
77
|
-
return lines.join(
|
|
77
|
+
return lines.join("\n");
|
|
78
78
|
}
|
|
79
79
|
//# sourceMappingURL=timestamps.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"timestamps.js","sourceRoot":"","sources":["../../../../src/content/transcript/timestamps.ts"],"names":[],"mappings":"AAEA,MAAM,uBAAuB,GAAG,GAAG,
|
|
1
|
+
{"version":3,"file":"timestamps.js","sourceRoot":"","sources":["../../../../src/content/transcript/timestamps.ts"],"names":[],"mappings":"AAEA,MAAM,uBAAuB,GAAG,GAAG,CAAC;AAEpC,MAAM,UAAU,iBAAiB,CAAC,EAAU;IAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;IACzC,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;IAC7C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,IAAI,CAAC,CAAC;IAC9C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;IACvD,MAAM,OAAO,GAAG,YAAY,GAAG,EAAE,CAAC;IAClC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACd,OAAO,GAAG,KAAK,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;IAClG,CAAC;IACD,OAAO,GAAG,OAAO,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;AAC7D,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,KAAa;IACpD,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;IAC7B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;QAChC,OAAO,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACtF,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;IAChF,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACtD,MAAM,WAAW,GAAG,KAAK,CAAC,GAAG,EAAE,CAAC;IAChC,IAAI,WAAW,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC;IACrC,MAAM,OAAO,GAAG,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IACtD,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,OAAO,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAE1D,MAAM,WAAW,GAAG,KAAK,CAAC,GAAG,EAAE,CAAC;IAChC,IAAI,WAAW,IAAI,IAAI;QAAE,OAAO,IAAI,CAAC;IACrC,MAAM,OAAO,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC;IACpC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,OAAO,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAE1D,MAAM,SAAS,GAAG,KAAK,CAAC,GAAG,EAAE,CAAC;IAC9B,MAAM,KAAK,GAAG,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACxD,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtD,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,GAAG,IAAI,GAAG,OAAO,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC;AACpE,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAc,EAAE,aAAa,GAAG,KAAK;IACtE,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QACtD,OAAO,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACtE,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAC1B,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,MAAM,MAAM,GAAG,wBAAwB,CAAC,OAAO,CAAC,CAAC;YACjD,IAAI,MAAM,IAAI,IAAI;gBAAE,OAAO,MAAM,CAAC;QACpC,CAAC;QACD,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;QAChC,IAAI,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,OAAO,IAAI,CAAC,EAAE,CAAC;YAC7C,OAAO,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAC1E,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,QAA6B;IACpE,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACpD,MAAM,KAAK,GAAG,QAAQ;SACnB,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE;QACf,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACtD,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QACvB,OAAO,IAAI,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;IAC3D,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;IACnD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACpC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
function normalizeKey(raw) {
|
|
2
|
+
const trimmed = typeof raw === "string" ? raw.trim() : "";
|
|
3
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
4
|
+
}
|
|
5
|
+
export function resolveTranscriptionConfig(input) {
|
|
6
|
+
const fromObject = input.transcription ?? null;
|
|
7
|
+
return {
|
|
8
|
+
env: fromObject?.env ?? input.env,
|
|
9
|
+
groqApiKey: normalizeKey(fromObject?.groqApiKey ?? input.groqApiKey),
|
|
10
|
+
openaiApiKey: normalizeKey(fromObject?.openaiApiKey ?? input.openaiApiKey),
|
|
11
|
+
falApiKey: normalizeKey(fromObject?.falApiKey ?? input.falApiKey),
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=transcription-config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcription-config.js","sourceRoot":"","sources":["../../../../src/content/transcript/transcription-config.ts"],"names":[],"mappings":"AAeA,SAAS,YAAY,CAAC,GAA8B;IAClD,MAAM,OAAO,GAAG,OAAO,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC1D,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7C,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,KAA+B;IACxE,MAAM,UAAU,GAAG,KAAK,CAAC,aAAa,IAAI,IAAI,CAAC;IAC/C,OAAO;QACL,GAAG,EAAE,UAAU,EAAE,GAAG,IAAI,KAAK,CAAC,GAAG;QACjC,UAAU,EAAE,YAAY,CAAC,UAAU,EAAE,UAAU,IAAI,KAAK,CAAC,UAAU,CAAC;QACpE,YAAY,EAAE,YAAY,CAAC,UAAU,EAAE,YAAY,IAAI,KAAK,CAAC,YAAY,CAAC;QAC1E,SAAS,EAAE,YAAY,CAAC,UAAU,EAAE,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC;KAClE,CAAC;AACJ,CAAC"}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { load } from
|
|
2
|
-
import { extractYouTubeVideoId } from
|
|
3
|
-
export { extractYouTubeVideoId, isYouTubeUrl, isYouTubeVideoUrl } from
|
|
1
|
+
import { load } from "cheerio";
|
|
2
|
+
import { extractYouTubeVideoId } from "../url.js";
|
|
3
|
+
export { extractYouTubeVideoId, isYouTubeUrl, isYouTubeVideoUrl } from "../url.js";
|
|
4
4
|
export function isRecord(value) {
|
|
5
|
-
return typeof value ===
|
|
5
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
6
6
|
}
|
|
7
7
|
const MAX_EMBED_YOUTUBE_TEXT_CHARS = 2000;
|
|
8
8
|
const MAX_EMBED_YOUTUBE_READABILITY_CHARS = 2000;
|
|
@@ -11,8 +11,8 @@ async function loadReadabilityDeps() {
|
|
|
11
11
|
if (!readabilityDepsPromise) {
|
|
12
12
|
readabilityDepsPromise = (async () => {
|
|
13
13
|
const [{ Readability }, { JSDOM, VirtualConsole }] = await Promise.all([
|
|
14
|
-
import(
|
|
15
|
-
import(
|
|
14
|
+
import("@mozilla/readability"),
|
|
15
|
+
import("jsdom"),
|
|
16
16
|
]);
|
|
17
17
|
return { Readability, JSDOM, VirtualConsole };
|
|
18
18
|
})();
|
|
@@ -24,32 +24,32 @@ async function extractReadabilityText(html) {
|
|
|
24
24
|
const cleanedHtml = stripCssFromHtml(html);
|
|
25
25
|
const { Readability, JSDOM, VirtualConsole } = await loadReadabilityDeps();
|
|
26
26
|
const virtualConsole = new VirtualConsole();
|
|
27
|
-
virtualConsole.on(
|
|
28
|
-
const message = err && typeof err ===
|
|
29
|
-
? String(err.message ??
|
|
30
|
-
:
|
|
31
|
-
if (message.includes(
|
|
27
|
+
virtualConsole.on("jsdomError", (err) => {
|
|
28
|
+
const message = err && typeof err === "object" && "message" in err
|
|
29
|
+
? String(err.message ?? "")
|
|
30
|
+
: "";
|
|
31
|
+
if (message.includes("Could not parse CSS stylesheet"))
|
|
32
32
|
return;
|
|
33
33
|
console.error(err);
|
|
34
34
|
});
|
|
35
35
|
const dom = new JSDOM(cleanedHtml, { virtualConsole });
|
|
36
36
|
const reader = new Readability(dom.window.document);
|
|
37
37
|
const article = reader.parse();
|
|
38
|
-
const text = (article?.textContent ??
|
|
38
|
+
const text = (article?.textContent ?? "").replace(/\s+/g, " ").trim();
|
|
39
39
|
return text;
|
|
40
40
|
}
|
|
41
41
|
catch {
|
|
42
|
-
return
|
|
42
|
+
return "";
|
|
43
43
|
}
|
|
44
44
|
}
|
|
45
45
|
function stripCssFromHtml(html) {
|
|
46
|
-
return html.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi,
|
|
46
|
+
return html.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, "");
|
|
47
47
|
}
|
|
48
48
|
export async function extractEmbeddedYouTubeUrlFromHtml(html, maxTextChars = MAX_EMBED_YOUTUBE_TEXT_CHARS, maxReadabilityChars = MAX_EMBED_YOUTUBE_READABILITY_CHARS) {
|
|
49
49
|
try {
|
|
50
50
|
const $ = load(html);
|
|
51
|
-
const rawText = $(
|
|
52
|
-
const normalizedText = rawText.replace(/\s+/g,
|
|
51
|
+
const rawText = $("body").text() || $.text();
|
|
52
|
+
const normalizedText = rawText.replace(/\s+/g, " ").trim();
|
|
53
53
|
if (normalizedText.length > maxTextChars) {
|
|
54
54
|
const readabilityText = await extractReadabilityText(html);
|
|
55
55
|
if (readabilityText.length > 0) {
|
|
@@ -61,21 +61,21 @@ export async function extractEmbeddedYouTubeUrlFromHtml(html, maxTextChars = MAX
|
|
|
61
61
|
}
|
|
62
62
|
}
|
|
63
63
|
const candidates = [];
|
|
64
|
-
const iframeSrc = $('iframe[src*="youtube.com/embed/"], iframe[src*="youtu.be/"]').first().attr(
|
|
64
|
+
const iframeSrc = $('iframe[src*="youtube.com/embed/"], iframe[src*="youtu.be/"]').first().attr("src") ?? null;
|
|
65
65
|
if (iframeSrc)
|
|
66
66
|
candidates.push(iframeSrc);
|
|
67
67
|
const ogVideo = $('meta[property="og:video"], meta[property="og:video:url"], meta[property="og:video:secure_url"], meta[name="og:video"], meta[name="og:video:url"], meta[name="og:video:secure_url"]')
|
|
68
68
|
.first()
|
|
69
|
-
.attr(
|
|
69
|
+
.attr("content") ?? null;
|
|
70
70
|
if (ogVideo)
|
|
71
71
|
candidates.push(ogVideo);
|
|
72
72
|
for (const candidate of candidates) {
|
|
73
73
|
let url = candidate.trim();
|
|
74
74
|
if (!url)
|
|
75
75
|
continue;
|
|
76
|
-
if (url.startsWith(
|
|
76
|
+
if (url.startsWith("//"))
|
|
77
77
|
url = `https:${url}`;
|
|
78
|
-
if (url.startsWith(
|
|
78
|
+
if (url.startsWith("/"))
|
|
79
79
|
url = `https://www.youtube.com${url}`;
|
|
80
80
|
const id = extractYouTubeVideoId(url);
|
|
81
81
|
if (id)
|
|
@@ -96,19 +96,19 @@ export function sanitizeYoutubeJsonResponse(input) {
|
|
|
96
96
|
}
|
|
97
97
|
export function decodeHtmlEntities(input) {
|
|
98
98
|
return input
|
|
99
|
-
.replaceAll(
|
|
100
|
-
.replaceAll(
|
|
101
|
-
.replaceAll(
|
|
102
|
-
.replaceAll(
|
|
103
|
-
.replaceAll(
|
|
104
|
-
.replaceAll(
|
|
105
|
-
.replaceAll(
|
|
106
|
-
.replaceAll(
|
|
99
|
+
.replaceAll("&", "&")
|
|
100
|
+
.replaceAll("<", "<")
|
|
101
|
+
.replaceAll(">", ">")
|
|
102
|
+
.replaceAll(""", '"')
|
|
103
|
+
.replaceAll("'", "'")
|
|
104
|
+
.replaceAll("'", "'")
|
|
105
|
+
.replaceAll("/", "/")
|
|
106
|
+
.replaceAll(" ", " ");
|
|
107
107
|
}
|
|
108
108
|
export function extractYoutubeBootstrapConfig(html) {
|
|
109
109
|
try {
|
|
110
110
|
const $ = load(html);
|
|
111
|
-
const scripts = $(
|
|
111
|
+
const scripts = $("script").toArray();
|
|
112
112
|
for (const script of scripts) {
|
|
113
113
|
const source = $(script).html();
|
|
114
114
|
if (!source) {
|
|
@@ -125,10 +125,10 @@ export function extractYoutubeBootstrapConfig(html) {
|
|
|
125
125
|
}
|
|
126
126
|
return parseBootstrapFromScript(html);
|
|
127
127
|
}
|
|
128
|
-
const YTCFG_SET_TOKEN =
|
|
129
|
-
const YTCFG_VAR_TOKEN =
|
|
128
|
+
const YTCFG_SET_TOKEN = "ytcfg.set";
|
|
129
|
+
const YTCFG_VAR_TOKEN = "var ytcfg";
|
|
130
130
|
function extractBalancedJsonObject(source, startAt) {
|
|
131
|
-
const start = source.indexOf(
|
|
131
|
+
const start = source.indexOf("{", startAt);
|
|
132
132
|
if (start < 0) {
|
|
133
133
|
return null;
|
|
134
134
|
}
|
|
@@ -146,7 +146,7 @@ function extractBalancedJsonObject(source, startAt) {
|
|
|
146
146
|
escaping = false;
|
|
147
147
|
continue;
|
|
148
148
|
}
|
|
149
|
-
if (ch ===
|
|
149
|
+
if (ch === "\\") {
|
|
150
150
|
escaping = true;
|
|
151
151
|
continue;
|
|
152
152
|
}
|
|
@@ -161,11 +161,11 @@ function extractBalancedJsonObject(source, startAt) {
|
|
|
161
161
|
quote = ch;
|
|
162
162
|
continue;
|
|
163
163
|
}
|
|
164
|
-
if (ch ===
|
|
164
|
+
if (ch === "{") {
|
|
165
165
|
depth += 1;
|
|
166
166
|
continue;
|
|
167
167
|
}
|
|
168
|
-
if (ch ===
|
|
168
|
+
if (ch === "}") {
|
|
169
169
|
depth -= 1;
|
|
170
170
|
if (depth === 0) {
|
|
171
171
|
return source.slice(start, i + 1);
|