@steipete/summarize-core 0.10.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/dist/esm/content/index.js +5 -5
- package/dist/esm/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/client.js +20 -9
- package/dist/esm/content/link-preview/client.js.map +1 -1
- package/dist/esm/content/link-preview/content/article.js +84 -83
- package/dist/esm/content/link-preview/content/article.js.map +1 -1
- package/dist/esm/content/link-preview/content/cleaner.js +23 -20
- package/dist/esm/content/link-preview/content/cleaner.js.map +1 -1
- package/dist/esm/content/link-preview/content/constants.js.map +1 -1
- package/dist/esm/content/link-preview/content/fetcher.js +46 -40
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
- package/dist/esm/content/link-preview/content/firecrawl.js +16 -16
- package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -1
- package/dist/esm/content/link-preview/content/html.js +29 -27
- package/dist/esm/content/link-preview/content/html.js.map +1 -1
- package/dist/esm/content/link-preview/content/index.js +141 -88
- package/dist/esm/content/link-preview/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/content/jsonld.js +12 -12
- package/dist/esm/content/link-preview/content/jsonld.js.map +1 -1
- package/dist/esm/content/link-preview/content/parsers.js +20 -20
- package/dist/esm/content/link-preview/content/parsers.js.map +1 -1
- package/dist/esm/content/link-preview/content/podcast-utils.js +34 -34
- package/dist/esm/content/link-preview/content/podcast-utils.js.map +1 -1
- package/dist/esm/content/link-preview/content/readability.js +16 -15
- package/dist/esm/content/link-preview/content/readability.js.map +1 -1
- package/dist/esm/content/link-preview/content/twitter-utils.js +24 -11
- package/dist/esm/content/link-preview/content/twitter-utils.js.map +1 -1
- package/dist/esm/content/link-preview/content/types.js +1 -1
- package/dist/esm/content/link-preview/content/types.js.map +1 -1
- package/dist/esm/content/link-preview/content/utils.js +17 -17
- package/dist/esm/content/link-preview/content/utils.js.map +1 -1
- package/dist/esm/content/link-preview/content/video.js +19 -19
- package/dist/esm/content/link-preview/content/video.js.map +1 -1
- package/dist/esm/content/link-preview/content/visibility.js +121 -0
- package/dist/esm/content/link-preview/content/visibility.js.map +1 -0
- package/dist/esm/content/link-preview/content/youtube.js +10 -10
- package/dist/esm/content/link-preview/content/youtube.js.map +1 -1
- package/dist/esm/content/link-preview/deps.js +16 -16
- package/dist/esm/content/link-preview/deps.js.map +1 -1
- package/dist/esm/content/link-preview/fetch-with-timeout.js +4 -4
- package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -1
- package/dist/esm/content/link-preview/types.js +1 -1
- package/dist/esm/content/link-preview/types.js.map +1 -1
- package/dist/esm/content/transcript/cache.js +22 -22
- package/dist/esm/content/transcript/cache.js.map +1 -1
- package/dist/esm/content/transcript/index.js +34 -24
- package/dist/esm/content/transcript/index.js.map +1 -1
- package/dist/esm/content/transcript/normalize.js +10 -10
- package/dist/esm/content/transcript/normalize.js.map +1 -1
- package/dist/esm/content/transcript/parse.js +31 -31
- package/dist/esm/content/transcript/parse.js.map +1 -1
- package/dist/esm/content/transcript/providers/generic.js +74 -78
- package/dist/esm/content/transcript/providers/generic.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js +36 -36
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/apple.js +5 -5
- package/dist/esm/content/transcript/providers/podcast/apple.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/constants.js +2 -2
- package/dist/esm/content/transcript/providers/podcast/constants.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/itunes.js +44 -42
- package/dist/esm/content/transcript/providers/podcast/itunes.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/json.js +4 -4
- package/dist/esm/content/transcript/providers/podcast/json.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/media.js +58 -49
- package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/results.js +2 -2
- package/dist/esm/content/transcript/providers/podcast/results.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/rss.js +29 -29
- package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js +38 -38
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/spotify.js +32 -32
- package/dist/esm/content/transcript/providers/podcast/spotify.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast.js +43 -47
- package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
- package/dist/esm/content/transcript/providers/transcription-start.js +59 -31
- package/dist/esm/content/transcript/providers/transcription-start.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/api.js +56 -56
- package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/apify.js +7 -7
- package/dist/esm/content/transcript/providers/youtube/apify.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/captions.js +76 -76
- package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +82 -75
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube.js +84 -77
- package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
- package/dist/esm/content/transcript/timestamps.js +8 -8
- package/dist/esm/content/transcript/timestamps.js.map +1 -1
- package/dist/esm/content/transcript/transcription-config.js +14 -0
- package/dist/esm/content/transcript/transcription-config.js.map +1 -0
- package/dist/esm/content/transcript/utils.js +35 -35
- package/dist/esm/content/transcript/utils.js.map +1 -1
- package/dist/esm/content/url.js +59 -28
- package/dist/esm/content/url.js.map +1 -1
- package/dist/esm/index.js +4 -3
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/language.js +77 -77
- package/dist/esm/language.js.map +1 -1
- package/dist/esm/openai/base-url.js +35 -0
- package/dist/esm/openai/base-url.js.map +1 -0
- package/dist/esm/processes.js +16 -16
- package/dist/esm/processes.js.map +1 -1
- package/dist/esm/prompts/cli.js +17 -17
- package/dist/esm/prompts/cli.js.map +1 -1
- package/dist/esm/prompts/file.js +54 -54
- package/dist/esm/prompts/file.js.map +1 -1
- package/dist/esm/prompts/format.js +2 -2
- package/dist/esm/prompts/format.js.map +1 -1
- package/dist/esm/prompts/index.js +5 -5
- package/dist/esm/prompts/index.js.map +1 -1
- package/dist/esm/prompts/link-summary.js +65 -65
- package/dist/esm/prompts/link-summary.js.map +1 -1
- package/dist/esm/prompts/summary-lengths.js +10 -10
- package/dist/esm/prompts/summary-lengths.js.map +1 -1
- package/dist/esm/prompts/summary-system.js +9 -9
- package/dist/esm/prompts/summary-system.js.map +1 -1
- package/dist/esm/shared/contracts.js +1 -1
- package/dist/esm/shared/contracts.js.map +1 -1
- package/dist/esm/transcription/onnx-cli.js +69 -69
- package/dist/esm/transcription/onnx-cli.js.map +1 -1
- package/dist/esm/transcription/whisper/constants.js +3 -3
- package/dist/esm/transcription/whisper/constants.js.map +1 -1
- package/dist/esm/transcription/whisper/core.js +148 -59
- package/dist/esm/transcription/whisper/core.js.map +1 -1
- package/dist/esm/transcription/whisper/fal.js +14 -14
- package/dist/esm/transcription/whisper/fal.js.map +1 -1
- package/dist/esm/transcription/whisper/ffmpeg.js +106 -106
- package/dist/esm/transcription/whisper/ffmpeg.js.map +1 -1
- package/dist/esm/transcription/whisper/groq.js +46 -0
- package/dist/esm/transcription/whisper/groq.js.map +1 -0
- package/dist/esm/transcription/whisper/openai.js +19 -13
- package/dist/esm/transcription/whisper/openai.js.map +1 -1
- package/dist/esm/transcription/whisper/utils.js +19 -19
- package/dist/esm/transcription/whisper/utils.js.map +1 -1
- package/dist/esm/transcription/whisper/whisper-cpp.js +64 -64
- package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -1
- package/dist/esm/transcription/whisper.js +4 -4
- package/dist/esm/transcription/whisper.js.map +1 -1
- package/dist/types/content/cache/types.d.ts +1 -1
- package/dist/types/content/index.d.ts +7 -7
- package/dist/types/content/link-preview/client.d.ts +7 -4
- package/dist/types/content/link-preview/content/cleaner.d.ts +1 -0
- package/dist/types/content/link-preview/content/fetcher.d.ts +2 -2
- package/dist/types/content/link-preview/content/firecrawl.d.ts +7 -7
- package/dist/types/content/link-preview/content/html.d.ts +8 -8
- package/dist/types/content/link-preview/content/index.d.ts +3 -3
- package/dist/types/content/link-preview/content/twitter-utils.d.ts +1 -0
- package/dist/types/content/link-preview/content/types.d.ts +8 -8
- package/dist/types/content/link-preview/content/utils.d.ts +3 -3
- package/dist/types/content/link-preview/content/video.d.ts +1 -1
- package/dist/types/content/link-preview/content/visibility.d.ts +1 -0
- package/dist/types/content/link-preview/deps.d.ts +36 -33
- package/dist/types/content/link-preview/types.d.ts +4 -4
- package/dist/types/content/transcript/cache.d.ts +4 -4
- package/dist/types/content/transcript/index.d.ts +7 -7
- package/dist/types/content/transcript/parse.d.ts +1 -1
- package/dist/types/content/transcript/providers/generic.d.ts +1 -1
- package/dist/types/content/transcript/providers/podcast/apple-flow.d.ts +2 -2
- package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +4 -4
- package/dist/types/content/transcript/providers/podcast/media.d.ts +9 -6
- package/dist/types/content/transcript/providers/podcast/results.d.ts +3 -3
- package/dist/types/content/transcript/providers/podcast/rss.d.ts +1 -1
- package/dist/types/content/transcript/providers/podcast/spotify-flow.d.ts +2 -2
- package/dist/types/content/transcript/providers/podcast/spotify.d.ts +2 -2
- package/dist/types/content/transcript/providers/podcast.d.ts +5 -5
- package/dist/types/content/transcript/providers/transcription-start.d.ts +14 -8
- package/dist/types/content/transcript/providers/youtube/api.d.ts +1 -1
- package/dist/types/content/transcript/providers/youtube/captions.d.ts +1 -1
- package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +11 -8
- package/dist/types/content/transcript/providers/youtube.d.ts +1 -1
- package/dist/types/content/transcript/timestamps.d.ts +1 -1
- package/dist/types/content/transcript/transcription-config.d.ts +15 -0
- package/dist/types/content/transcript/types.d.ts +12 -9
- package/dist/types/content/transcript/utils.d.ts +1 -1
- package/dist/types/content/url.d.ts +5 -3
- package/dist/types/index.d.ts +5 -4
- package/dist/types/language.d.ts +4 -4
- package/dist/types/openai/base-url.d.ts +14 -0
- package/dist/types/processes.d.ts +2 -2
- package/dist/types/prompts/cli.d.ts +3 -3
- package/dist/types/prompts/file.d.ts +2 -2
- package/dist/types/prompts/index.d.ts +6 -6
- package/dist/types/prompts/link-summary.d.ts +3 -3
- package/dist/types/prompts/summary-lengths.d.ts +1 -1
- package/dist/types/transcription/onnx-cli.d.ts +3 -3
- package/dist/types/transcription/whisper/core.d.ts +6 -3
- package/dist/types/transcription/whisper/groq.d.ts +2 -0
- package/dist/types/transcription/whisper/openai.d.ts +6 -1
- package/dist/types/transcription/whisper/types.d.ts +1 -1
- package/dist/types/transcription/whisper/whisper-cpp.d.ts +1 -1
- package/dist/types/transcription/whisper.d.ts +5 -5
- package/package.json +13 -13
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
import { BLOCKED_HTML_HINT_PATTERN, TRANSCRIPTION_TIMEOUT_MS } from
|
|
2
|
-
import { getJsonNumber, getJsonPath, getJsonString } from
|
|
1
|
+
import { BLOCKED_HTML_HINT_PATTERN, TRANSCRIPTION_TIMEOUT_MS } from "./constants.js";
|
|
2
|
+
import { getJsonNumber, getJsonPath, getJsonString } from "./json.js";
|
|
3
3
|
export function extractSpotifyEpisodeId(url) {
|
|
4
4
|
try {
|
|
5
5
|
const parsed = new URL(url);
|
|
6
6
|
const host = parsed.hostname.toLowerCase();
|
|
7
|
-
if (!host.endsWith(
|
|
7
|
+
if (!host.endsWith("spotify.com"))
|
|
8
8
|
return null;
|
|
9
|
-
const parts = parsed.pathname.split(
|
|
10
|
-
const idx = parts.indexOf(
|
|
9
|
+
const parts = parsed.pathname.split("/").filter(Boolean);
|
|
10
|
+
const idx = parts.indexOf("episode");
|
|
11
11
|
const id = idx >= 0 ? parts[idx + 1] : null;
|
|
12
12
|
return id && /^[A-Za-z0-9]+$/.test(id) ? id : null;
|
|
13
13
|
}
|
|
@@ -21,31 +21,31 @@ export function extractSpotifyEmbedData(html) {
|
|
|
21
21
|
return null;
|
|
22
22
|
try {
|
|
23
23
|
const json = JSON.parse(match[1]);
|
|
24
|
-
const showTitle = (getJsonString(json, [
|
|
25
|
-
const episodeTitle = (getJsonString(json, [
|
|
24
|
+
const showTitle = (getJsonString(json, ["props", "pageProps", "state", "data", "entity", "subtitle"]) ?? "").trim();
|
|
25
|
+
const episodeTitle = (getJsonString(json, ["props", "pageProps", "state", "data", "entity", "title"]) ?? "").trim();
|
|
26
26
|
const durationMs = getJsonNumber(json, [
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
27
|
+
"props",
|
|
28
|
+
"pageProps",
|
|
29
|
+
"state",
|
|
30
|
+
"data",
|
|
31
|
+
"entity",
|
|
32
|
+
"duration",
|
|
33
33
|
]);
|
|
34
34
|
const drmFormat = getJsonString(json, [
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
35
|
+
"props",
|
|
36
|
+
"pageProps",
|
|
37
|
+
"state",
|
|
38
|
+
"data",
|
|
39
|
+
"defaultAudioFileObject",
|
|
40
|
+
"format",
|
|
41
41
|
]) ?? null;
|
|
42
|
-
const audioUrl = pickSpotifyEmbedAudioUrl(getJsonPath(json, [
|
|
42
|
+
const audioUrl = pickSpotifyEmbedAudioUrl(getJsonPath(json, ["props", "pageProps", "state", "data", "defaultAudioFileObject", "url"]));
|
|
43
43
|
if (!showTitle || !episodeTitle)
|
|
44
44
|
return null;
|
|
45
45
|
return {
|
|
46
46
|
showTitle,
|
|
47
47
|
episodeTitle,
|
|
48
|
-
durationSeconds: typeof durationMs ===
|
|
48
|
+
durationSeconds: typeof durationMs === "number" && Number.isFinite(durationMs) ? durationMs / 1000 : null,
|
|
49
49
|
drmFormat,
|
|
50
50
|
audioUrl,
|
|
51
51
|
};
|
|
@@ -60,10 +60,10 @@ export async function fetchSpotifyEmbedHtml({ embedUrl, episodeId, fetchImpl, sc
|
|
|
60
60
|
const embedResponse = await fetchImpl(embedUrl, {
|
|
61
61
|
signal: AbortSignal.timeout(TRANSCRIPTION_TIMEOUT_MS),
|
|
62
62
|
headers: {
|
|
63
|
-
accept:
|
|
64
|
-
|
|
63
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
64
|
+
"accept-language": "en-US,en;q=0.9",
|
|
65
65
|
referer: `https://open.spotify.com/episode/${episodeId}`,
|
|
66
|
-
|
|
66
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
|
|
67
67
|
},
|
|
68
68
|
});
|
|
69
69
|
if (!embedResponse.ok) {
|
|
@@ -71,9 +71,9 @@ export async function fetchSpotifyEmbedHtml({ embedUrl, episodeId, fetchImpl, sc
|
|
|
71
71
|
}
|
|
72
72
|
const embedHtml = await embedResponse.text();
|
|
73
73
|
if (!looksLikeBlockedHtml(embedHtml)) {
|
|
74
|
-
return { html: embedHtml, via:
|
|
74
|
+
return { html: embedHtml, via: "fetch" };
|
|
75
75
|
}
|
|
76
|
-
throw new Error(
|
|
76
|
+
throw new Error("Spotify embed HTML looked blocked (captcha)");
|
|
77
77
|
}
|
|
78
78
|
catch (error) {
|
|
79
79
|
if (!scrapeWithFirecrawl) {
|
|
@@ -81,29 +81,29 @@ export async function fetchSpotifyEmbedHtml({ embedUrl, episodeId, fetchImpl, sc
|
|
|
81
81
|
}
|
|
82
82
|
// Firecrawl is optional and only used as a fallback when Spotify blocks direct fetches.
|
|
83
83
|
const payload = await scrapeWithFirecrawl(embedUrl, {
|
|
84
|
-
cacheMode:
|
|
84
|
+
cacheMode: "bypass",
|
|
85
85
|
timeoutMs: TRANSCRIPTION_TIMEOUT_MS,
|
|
86
86
|
});
|
|
87
|
-
const text = (payload?.html ?? payload?.markdown ??
|
|
87
|
+
const text = (payload?.html ?? payload?.markdown ?? "").trim();
|
|
88
88
|
if (!text) {
|
|
89
89
|
throw new Error(`Spotify embed fetch failed and Firecrawl returned empty content (${error instanceof Error ? error.message : String(error)})`);
|
|
90
90
|
}
|
|
91
91
|
if (looksLikeBlockedHtml(text)) {
|
|
92
|
-
throw new Error(
|
|
92
|
+
throw new Error("Spotify embed blocked even via Firecrawl (captcha)");
|
|
93
93
|
}
|
|
94
|
-
return { html: text, via:
|
|
94
|
+
return { html: text, via: "firecrawl" };
|
|
95
95
|
}
|
|
96
96
|
}
|
|
97
97
|
export function looksLikeBlockedHtml(html) {
|
|
98
98
|
const head = html.slice(0, 20000).toLowerCase();
|
|
99
99
|
// Spotify embed pages include `__NEXT_DATA__` even when the rest of the HTML is minimal; treat that
|
|
100
100
|
// as a strong "not blocked" signal to avoid unnecessary Firecrawl fallbacks.
|
|
101
|
-
if (head.includes(
|
|
101
|
+
if (head.includes("__next_data__"))
|
|
102
102
|
return false;
|
|
103
103
|
return BLOCKED_HTML_HINT_PATTERN.test(head);
|
|
104
104
|
}
|
|
105
105
|
function pickSpotifyEmbedAudioUrl(raw) {
|
|
106
|
-
const urls = Array.isArray(raw) ? raw.filter((v) => typeof v ===
|
|
106
|
+
const urls = Array.isArray(raw) ? raw.filter((v) => typeof v === "string") : [];
|
|
107
107
|
const normalized = urls.map((u) => u.trim()).filter((u) => /^https?:\/\//i.test(u));
|
|
108
108
|
if (normalized.length === 0)
|
|
109
109
|
return null;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"spotify.js","sourceRoot":"","sources":["../../../../../../src/content/transcript/providers/podcast/spotify.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,
|
|
1
|
+
{"version":3,"file":"spotify.js","sourceRoot":"","sources":["../../../../../../src/content/transcript/providers/podcast/spotify.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,yBAAyB,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AACrF,OAAO,EAAE,aAAa,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAEtE,MAAM,UAAU,uBAAuB,CAAC,GAAW;IACjD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;QAC3C,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC;YAAE,OAAO,IAAI,CAAC;QAE/C,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACzD,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QACrC,MAAM,EAAE,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAC5C,OAAO,EAAE,IAAI,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACrD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,IAAY;IAOlD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,iEAAiE,CAAC,CAAC;IAC5F,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IAC7B,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAY,CAAC;QAC7C,MAAM,SAAS,GAAG,CAChB,aAAa,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC,IAAI,EAAE,CACzF,CAAC,IAAI,EAAE,CAAC;QACT,MAAM,YAAY,GAAG,CACnB,aAAa,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CACtF,CAAC,IAAI,EAAE,CAAC;QACT,MAAM,UAAU,GAAG,aAAa,CAAC,IAAI,EAAE;YACrC,OAAO;YACP,WAAW;YACX,OAAO;YACP,MAAM;YACN,QAAQ;YACR,UAAU;SACX,CAAC,CAAC;QACH,MAAM,SAAS,GACb,aAAa,CAAC,IAAI,EAAE;YAClB,OAAO;YACP,WAAW;YACX,OAAO;YACP,MAAM;YACN,wBAAwB;YACxB,QAAQ;SACT,CAAC,IAAI,IAAI,CAAC;QACb,MAAM,QAAQ,GAAG,wBAAwB,CACvC,WAAW,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,EAAE,wBAAwB,EAAE,KAAK,CAAC,CAAC,CAC5F,CAAC;QACF,IAAI,CAAC,SAAS,IAAI,CAAC,YAAY;YAAE,OAAO,IAAI,CAAC;QAC7C,OAAO;YACL,SAAS;YACT,YAAY;YACZ,eAAe,EACb,OAAO,UAAU,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI;YAC1F,SAAS;YACT,QAAQ;SACT,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,EAC1C,QAAQ,EACR,SAAS,EACT,SAAS,EACT,mBAAmB,GAWpB;IACC,IAAI,CAAC;QACH,qFAAqF;QACrF,MAAM,aAAa,GAAG,MAAM,SAAS,CAAC,QAAQ,EAAE;YAC9C,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,wBAAwB,CAAC;YACrD,OAAO,EAAE;gBACP,MAAM,EAAE,iEAAiE;gBACzE,iBAAiB,EAAE,gBAAgB;gBACnC,OAAO,EAAE,oCAAoC,SAAS,EAAE;gBACxD,YAAY,EACV,iHAAiH;aACpH;SACF,CAAC,CAAC;QACH,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,+BAA+B,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC;QAC1E,CAAC;QACD,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,CAAC;QAC7C,IAAI,CAAC,oBAAoB,CAAC,SAAS,CAAC,EAAE,CAAC;YACrC,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC;QAC3C,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;IACjE,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,CAAC,mBAAmB,EAAE,CAAC;YACzB,MAAM,KAAK,CAAC;QACd,CAAC;QAED,wFAAwF;QACxF,MAAM,OAAO,GAAG,MAAM,mBAAmB,CAAC,QAAQ,EAAE;YAClD,SAAS,EAAE,QAAQ;YACnB,SAAS,EAAE,wBAAwB;SACpC,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,CAAC,OAAO,EAAE,IAAI,IAAI,OAAO,EAAE,QAAQ,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAC/D,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,KAAK,CACb,oEAAoE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAC9H,CAAC;QACJ,CAAC;QACD,IAAI,oBAAoB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;QACxE,CAAC;QACD,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,WAAW,EAAE,CAAC;IAC1C,CAAC;AACH,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,IAAY;IAC/C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;IAChD,oGAAoG;IACpG,6EAA6E;IAC7E,IAAI,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC;QAAE,OAAO,KAAK,CAAC;IACjD,OAAO,yBAAyB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,wBAAwB,CAAC,GAAY;IAC5C,MAAM,IAAI,GAAa,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC1F,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IACpF,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACzC,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IACzD,OAAO,IAAI,IAAI,UAAU,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;AACvC,CAAC"}
|
|
@@ -1,19 +1,20 @@
|
|
|
1
|
-
import { isDirectMediaUrl } from
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import { fetchSpotifyTranscript } from
|
|
10
|
-
import {
|
|
1
|
+
import { isDirectMediaUrl } from "../../url.js";
|
|
2
|
+
import { resolveTranscriptionConfig } from "../transcription-config.js";
|
|
3
|
+
import { fetchAppleTranscriptFromEmbeddedHtml, fetchAppleTranscriptFromItunesLookup, } from "./podcast/apple-flow.js";
|
|
4
|
+
import { FEED_HINT_URL_PATTERN, PODCAST_PLATFORM_HOST_PATTERN } from "./podcast/constants.js";
|
|
5
|
+
import { resolvePodcastFeedUrlFromItunesSearch } from "./podcast/itunes.js";
|
|
6
|
+
import { downloadCappedBytes, downloadToFile, filenameFromUrl, formatBytes, normalizeHeaderType, parseContentLength, probeRemoteMedia, transcribeMediaUrl, } from "./podcast/media.js";
|
|
7
|
+
import { buildWhisperResult, joinNotes } from "./podcast/results.js";
|
|
8
|
+
import { decodeXmlEntities, extractEnclosureForEpisode, extractEnclosureFromFeed, extractItemDurationSeconds, looksLikeRssOrAtomFeed, tryFetchTranscriptFromFeedXml, } from "./podcast/rss.js";
|
|
9
|
+
import { fetchSpotifyTranscript } from "./podcast/spotify-flow.js";
|
|
10
|
+
import { looksLikeBlockedHtml } from "./podcast/spotify.js";
|
|
11
|
+
import { resolveTranscriptionAvailability } from "./transcription-start.js";
|
|
11
12
|
export const canHandle = ({ url, html }) => {
|
|
12
13
|
// Direct media URLs (e.g., .mp3, .wav) should be handled by the generic provider
|
|
13
14
|
// even if the URL contains "podcast" in the path (like "rt_podcast996.mp3")
|
|
14
15
|
if (isDirectMediaUrl(url))
|
|
15
16
|
return false;
|
|
16
|
-
if (typeof html ===
|
|
17
|
+
if (typeof html === "string" && looksLikeRssOrAtomFeed(html))
|
|
17
18
|
return true;
|
|
18
19
|
if (PODCAST_PLATFORM_HOST_PATTERN.test(url))
|
|
19
20
|
return true;
|
|
@@ -22,35 +23,32 @@ export const canHandle = ({ url, html }) => {
|
|
|
22
23
|
export const fetchTranscript = async (context, options) => {
|
|
23
24
|
const attemptedProviders = [];
|
|
24
25
|
const notes = [];
|
|
26
|
+
const transcription = resolveTranscriptionConfig(options);
|
|
25
27
|
const pushOnce = (provider) => {
|
|
26
28
|
if (!attemptedProviders.includes(provider))
|
|
27
29
|
attemptedProviders.push(provider);
|
|
28
30
|
};
|
|
29
31
|
const transcriptionAvailability = await resolveTranscriptionAvailability({
|
|
30
|
-
|
|
31
|
-
openaiApiKey: options.openaiApiKey,
|
|
32
|
-
falApiKey: options.falApiKey,
|
|
32
|
+
transcription,
|
|
33
33
|
});
|
|
34
34
|
const missingTranscriptionProviderResult = () => ({
|
|
35
35
|
text: null,
|
|
36
36
|
source: null,
|
|
37
37
|
attemptedProviders,
|
|
38
|
-
metadata: { provider:
|
|
39
|
-
notes:
|
|
38
|
+
metadata: { provider: "podcast", reason: "missing_transcription_keys" },
|
|
39
|
+
notes: "Missing transcription provider (install whisper-cpp or set OPENAI_API_KEY/FAL_KEY)",
|
|
40
40
|
});
|
|
41
41
|
const ensureTranscriptionProvider = () => {
|
|
42
42
|
return !transcriptionAvailability.hasAnyProvider ? missingTranscriptionProviderResult() : null;
|
|
43
43
|
};
|
|
44
44
|
const progress = {
|
|
45
45
|
url: context.url,
|
|
46
|
-
service:
|
|
46
|
+
service: "podcast",
|
|
47
47
|
onProgress: options.onProgress ?? null,
|
|
48
48
|
};
|
|
49
49
|
const transcribe = (request) => transcribeMediaUrl({
|
|
50
50
|
fetchImpl: options.fetch,
|
|
51
|
-
|
|
52
|
-
openaiApiKey: options.openaiApiKey,
|
|
53
|
-
falApiKey: options.falApiKey,
|
|
51
|
+
transcription,
|
|
54
52
|
notes,
|
|
55
53
|
progress,
|
|
56
54
|
...request,
|
|
@@ -64,9 +62,9 @@ export const fetchTranscript = async (context, options) => {
|
|
|
64
62
|
ensureTranscriptionProvider,
|
|
65
63
|
transcribe,
|
|
66
64
|
};
|
|
67
|
-
const feedHtml = typeof context.html ===
|
|
65
|
+
const feedHtml = typeof context.html === "string" ? context.html : null;
|
|
68
66
|
if (feedHtml && /podcast:transcript/i.test(feedHtml)) {
|
|
69
|
-
pushOnce(
|
|
67
|
+
pushOnce("podcastTranscript");
|
|
70
68
|
const direct = await tryFetchTranscriptFromFeedXml({
|
|
71
69
|
fetchImpl: options.fetch,
|
|
72
70
|
feedXml: feedHtml,
|
|
@@ -76,13 +74,13 @@ export const fetchTranscript = async (context, options) => {
|
|
|
76
74
|
if (direct) {
|
|
77
75
|
return {
|
|
78
76
|
text: direct.text,
|
|
79
|
-
source:
|
|
77
|
+
source: "podcastTranscript",
|
|
80
78
|
segments: options.transcriptTimestamps ? (direct.segments ?? null) : null,
|
|
81
79
|
attemptedProviders,
|
|
82
80
|
notes: joinNotes(notes),
|
|
83
81
|
metadata: {
|
|
84
|
-
provider:
|
|
85
|
-
kind:
|
|
82
|
+
provider: "podcast",
|
|
83
|
+
kind: "rss_podcast_transcript",
|
|
86
84
|
transcriptUrl: direct.transcriptUrl,
|
|
87
85
|
transcriptType: direct.transcriptType,
|
|
88
86
|
},
|
|
@@ -106,10 +104,10 @@ export const fetchTranscript = async (context, options) => {
|
|
|
106
104
|
const missing = ensureTranscriptionProvider();
|
|
107
105
|
if (missing)
|
|
108
106
|
return missing;
|
|
109
|
-
pushOnce(
|
|
107
|
+
pushOnce("whisper");
|
|
110
108
|
const transcript = await transcribe({
|
|
111
109
|
url: resolvedUrl,
|
|
112
|
-
filenameHint:
|
|
110
|
+
filenameHint: "episode.mp3",
|
|
113
111
|
durationSecondsHint: durationSeconds,
|
|
114
112
|
});
|
|
115
113
|
return buildWhisperResult({
|
|
@@ -118,8 +116,8 @@ export const fetchTranscript = async (context, options) => {
|
|
|
118
116
|
outcome: transcript,
|
|
119
117
|
includeProviderOnFailure: true,
|
|
120
118
|
metadata: {
|
|
121
|
-
provider:
|
|
122
|
-
kind:
|
|
119
|
+
provider: "podcast",
|
|
120
|
+
kind: "rss_enclosure",
|
|
123
121
|
enclosureUrl: resolvedUrl,
|
|
124
122
|
durationSeconds,
|
|
125
123
|
},
|
|
@@ -131,27 +129,27 @@ export const fetchTranscript = async (context, options) => {
|
|
|
131
129
|
source: null,
|
|
132
130
|
attemptedProviders,
|
|
133
131
|
notes: `Podcast enclosure download failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
134
|
-
metadata: { provider:
|
|
132
|
+
metadata: { provider: "podcast", kind: "rss_enclosure", enclosureUrl: resolvedUrl },
|
|
135
133
|
};
|
|
136
134
|
}
|
|
137
135
|
}
|
|
138
136
|
const ogAudioUrl = feedHtml ? extractOgAudioUrl(feedHtml) : null;
|
|
139
137
|
if (ogAudioUrl) {
|
|
140
|
-
attemptedProviders.push(
|
|
138
|
+
attemptedProviders.push("whisper");
|
|
141
139
|
const result = await transcribe({
|
|
142
140
|
url: ogAudioUrl,
|
|
143
|
-
filenameHint:
|
|
141
|
+
filenameHint: "audio.mp3",
|
|
144
142
|
durationSecondsHint: null,
|
|
145
143
|
});
|
|
146
144
|
if (result.text) {
|
|
147
|
-
notes.push(
|
|
145
|
+
notes.push("Used og:audio media (may be a preview clip, not the full episode)");
|
|
148
146
|
return buildWhisperResult({
|
|
149
147
|
attemptedProviders,
|
|
150
148
|
notes,
|
|
151
149
|
outcome: result,
|
|
152
150
|
metadata: {
|
|
153
|
-
provider:
|
|
154
|
-
kind:
|
|
151
|
+
provider: "podcast",
|
|
152
|
+
kind: "og_audio",
|
|
155
153
|
ogAudioUrl,
|
|
156
154
|
},
|
|
157
155
|
});
|
|
@@ -161,31 +159,29 @@ export const fetchTranscript = async (context, options) => {
|
|
|
161
159
|
source: null,
|
|
162
160
|
attemptedProviders,
|
|
163
161
|
notes: result.error?.message ?? null,
|
|
164
|
-
metadata: { provider:
|
|
162
|
+
metadata: { provider: "podcast", kind: "og_audio", ogAudioUrl },
|
|
165
163
|
};
|
|
166
164
|
}
|
|
167
165
|
if (options.ytDlpPath) {
|
|
168
|
-
attemptedProviders.push(
|
|
166
|
+
attemptedProviders.push("yt-dlp");
|
|
169
167
|
try {
|
|
170
|
-
const mod = await import(
|
|
168
|
+
const mod = await import("./youtube/yt-dlp.js");
|
|
171
169
|
const result = await mod.fetchTranscriptWithYtDlp({
|
|
172
170
|
ytDlpPath: options.ytDlpPath,
|
|
173
|
-
|
|
174
|
-
openaiApiKey: options.openaiApiKey,
|
|
175
|
-
falApiKey: options.falApiKey,
|
|
171
|
+
transcription,
|
|
176
172
|
mediaCache: options.mediaCache ?? null,
|
|
177
173
|
url: context.url,
|
|
178
|
-
service:
|
|
179
|
-
mediaKind:
|
|
174
|
+
service: "podcast",
|
|
175
|
+
mediaKind: "audio",
|
|
180
176
|
});
|
|
181
177
|
if (result.notes.length > 0)
|
|
182
178
|
notes.push(...result.notes);
|
|
183
179
|
return {
|
|
184
180
|
text: result.text,
|
|
185
|
-
source: result.text ?
|
|
181
|
+
source: result.text ? "yt-dlp" : null,
|
|
186
182
|
attemptedProviders,
|
|
187
183
|
notes: joinNotes(notes),
|
|
188
|
-
metadata: { provider:
|
|
184
|
+
metadata: { provider: "podcast", kind: "yt_dlp", transcriptionProvider: result.provider },
|
|
189
185
|
};
|
|
190
186
|
}
|
|
191
187
|
catch (error) {
|
|
@@ -194,7 +190,7 @@ export const fetchTranscript = async (context, options) => {
|
|
|
194
190
|
source: null,
|
|
195
191
|
attemptedProviders,
|
|
196
192
|
notes: `yt-dlp transcription failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
197
|
-
metadata: { provider:
|
|
193
|
+
metadata: { provider: "podcast", kind: "yt_dlp" },
|
|
198
194
|
};
|
|
199
195
|
}
|
|
200
196
|
}
|
|
@@ -205,7 +201,7 @@ export const fetchTranscript = async (context, options) => {
|
|
|
205
201
|
text: null,
|
|
206
202
|
source: null,
|
|
207
203
|
attemptedProviders,
|
|
208
|
-
metadata: { provider:
|
|
204
|
+
metadata: { provider: "podcast", reason: "no_enclosure_and_no_yt_dlp" },
|
|
209
205
|
};
|
|
210
206
|
};
|
|
211
207
|
function extractOgAudioUrl(html) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"podcast.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/podcast.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"podcast.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/podcast.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,0BAA0B,EAAE,MAAM,4BAA4B,CAAC;AACxE,OAAO,EACL,oCAAoC,EACpC,oCAAoC,GACrC,MAAM,yBAAyB,CAAC;AACjC,OAAO,EAAE,qBAAqB,EAAE,6BAA6B,EAAE,MAAM,wBAAwB,CAAC;AAC9F,OAAO,EAAE,qCAAqC,EAAE,MAAM,qBAAqB,CAAC;AAC5E,OAAO,EACL,mBAAmB,EACnB,cAAc,EACd,eAAe,EACf,WAAW,EACX,mBAAmB,EACnB,kBAAkB,EAClB,gBAAgB,EAGhB,kBAAkB,GACnB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,kBAAkB,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EACL,iBAAiB,EACjB,0BAA0B,EAC1B,wBAAwB,EACxB,0BAA0B,EAC1B,sBAAsB,EACtB,6BAA6B,GAC9B,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,EAAE,gCAAgC,EAAE,MAAM,0BAA0B,CAAC;AAE5E,MAAM,CAAC,MAAM,SAAS,GAAG,CAAC,EAAE,GAAG,EAAE,IAAI,EAAmB,EAAW,EAAE;IACnE,iFAAiF;IACjF,4EAA4E;IAC5E,IAAI,gBAAgB,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,sBAAsB,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1E,IAAI,6BAA6B,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACzD,OAAO,qBAAqB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzC,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,eAAe,GAAG,KAAK,EAClC,OAAwB,EACxB,OAA6B,EACJ,EAAE;IAC3B,MAAM,kBAAkB,GAAyC,EAAE,CAAC;IACpE,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,aAAa,GAAG,0BAA0B,CAAC,OAAO,CAAC,CAAC;IAE1D,MAAM,QAAQ,GAAG,CAAC,QAAsD,EAAE,EAAE;QAC1E,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,QAAQ,CAAC;YAAE,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAChF,CAAC,CAAC;IAEF,MAAM,yBAAyB,GAAG,MAAM,gCAAgC,CAAC;QACvE,aAAa;KACd,CAAC,CAAC;IAEH,MAAM,kCAAkC,GAAG,GAAmB,EAAE,CAAC,CAAC;QAChE,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,IAAI;QACZ,kBAAkB;QAClB,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,4BAA4B,EAAE;QACvE,KAAK,EAAE,oFAAoF;KAC5F,CAAC,CAAC;IAEH,MAAM,2BAA2B,GAAG,GAA0B,EAAE;QAC9D,OAAO,CAAC,yBAAyB,CAAC,cAAc,CAAC,CAAC,CAAC,kCAAkC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACjG,CAAC,CAAC;IAEF,MAAM,QAAQ,GAAG;QACf,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,OAAO,EAAE,SAAkB;QAC3B,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,IAAI;KACvC,CAAC;IAEF,MAAM,UAAU,GAAG,CAAC,OAA0B,EAAgC,EAAE,CAC9E,kBAAkB,CAAC;QACjB,SAAS,EAAE,OAAO,CAAC,KAAK;QACxB,aAAa;QACb,KAAK;QACL,QAAQ;QACR,GAAG,OAAO;KACX,CAAC,CAAC;IAEL,MAAM,IAAI,GAAuB;QAC/B,OAAO;QACP,OAAO;QACP,kBAAkB;QAClB,KAAK;QACL,QAAQ;QACR,2BAA2B;QAC3B,UAAU;KACX,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,OAAO,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;IACxE,IAAI,QAAQ,IAAI,qBAAqB,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QACrD,QAAQ,CAAC,mBAAmB,CAAC,CAAC;QAC9B,MAAM,MAAM,GAAG,MAAM,6BAA6B,CAAC;YACjD,SAAS,EAAE,OAAO,CAAC,KAAK;YACxB,OAAO,EAAE,QAAQ;YACjB,YAAY,EAAE,IAAI;YAClB,KAAK;SACN,CAAC,CAAC;QACH,IAAI,MAAM,EAAE,CAAC;YACX,OAAO;gBACL,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,MAAM,EAAE,mBAAmB;gBAC3B,QAAQ,EAAE,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;gBACzE,kBAAkB;gBAClB,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC;gBACvB,QAAQ,EAAE;oBACR,QAAQ,EAAE,SAAS;oBACnB,IAAI,EAAE,wBAAwB;oBAC9B,aAAa,EAAE,MAAM,CAAC,aAAa;oBACnC,cAAc,EAAE,MAAM,CAAC,cAAc;iBACtC;aACF,CAAC;QACJ,CAAC;IACH,CAAC;IAED,MAAM,aAAa,GAAG,MAAM,sBAAsB,CAAC,IAAI,CAAC,CAAC;IACzD,IAAI,aAAa;QAAE,OAAO,aAAa,CAAC;IAExC,MAAM,iBAAiB,GAAG,MAAM,oCAAoC,CAAC,IAAI,CAAC,CAAC;IAC3E,IAAI,iBAAiB;QAAE,OAAO,iBAAiB,CAAC;IAEhD,MAAM,mBAAmB,GAAG,MAAM,oCAAoC,CAAC,IAAI,CAAC,CAAC;IAC7E,IAAI,mBAAmB;QAAE,OAAO,mBAAmB,CAAC;IAEpD,MAAM,gBAAgB,GAAG,QAAQ,CAAC,CAAC,CAAC,wBAAwB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC9E,IAAI,gBAAgB,IAAI,QAAQ,EAAE,CAAC;QACjC,MAAM,WAAW,GAAG,iBAAiB,CAAC,gBAAgB,CAAC,YAAY,CAAC,CAAC;QACrE,MAAM,eAAe,GAAG,gBAAgB,CAAC,eAAe,CAAC;QACzD,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,2BAA2B,EAAE,CAAC;YAC9C,IAAI,OAAO;gBAAE,OAAO,OAAO,CAAC;YAC5B,QAAQ,CAAC,SAAS,CAAC,CAAC;YACpB,MAAM,UAAU,GAAG,MAAM,UAAU,CAAC;gBAClC,GAAG,EAAE,WAAW;gBAChB,YAAY,EAAE,aAAa;gBAC3B,mBAAmB,EAAE,eAAe;aACrC,CAAC,CAAC;YACH,OAAO,kBAAkB,CAAC;gBACxB,kBAAkB;gBAClB,KAAK;gBACL,OAAO,EAAE,UAAU;gBACnB,wBAAwB,EAAE,IAAI;gBAC9B,QAAQ,EAAE;oBACR,QAAQ,EAAE,SAAS;oBACnB,IAAI,EAAE,eAAe;oBACrB,YAAY,EAAE,WAAW;oBACzB,eAAe;iBAChB;aACF,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,IAAI,EAAE,IAAI;gBACV,MAAM,EAAE,IAAI;gBACZ,kBAAkB;gBAClB,KAAK,EAAE,sCAAsC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE;gBACrG,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,eAAe,EAAE,YAAY,EAAE,WAAW,EAAE;aACpF,CAAC;QACJ,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACjE,IAAI,UAAU,EAAE,CAAC;QACf,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACnC,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC;YAC9B,GAAG,EAAE,UAAU;YACf,YAAY,EAAE,WAAW;YACzB,mBAAmB,EAAE,IAAI;SAC1B,CAAC,CAAC;QACH,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAChB,KAAK,CAAC,IAAI,CAAC,mEAAmE,CAAC,CAAC;YAChF,OAAO,kBAAkB,CAAC;gBACxB,kBAAkB;gBAClB,KAAK;gBACL,OAAO,EAAE,MAAM;gBACf,QAAQ,EAAE;oBACR,QAAQ,EAAE,SAAS;oBACnB,IAAI,EAAE,UAAU;oBAChB,UAAU;iBACX;aACF,CAAC,CAAC;QACL,CAAC;QACD,OAAO;YACL,IAAI,EAAE,IAAI;YACV,MAAM,EAAE,IAAI;YACZ,kBAAkB;YAClB,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,OAAO,IAAI,IAAI;YACpC,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,UAAU,EAAE;SAChE,CAAC;IACJ,CAAC;IAED,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;QACtB,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAClC,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;YAChD,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,wBAAwB,CAAC;gBAChD,SAAS,EAAE,OAAO,CAAC,SAAS;gBAC5B,aAAa;gBACb,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,IAAI;gBACtC,GAAG,EAAE,OAAO,CAAC,GAAG;gBAChB,OAAO,EAAE,SAAS;gBAClB,SAAS,EAAE,OAAO;aACnB,CAAC,CAAC;YACH,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;gBAAE,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO;gBACL,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI;gBACrC,kBAAkB;gBAClB,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC;gBACvB,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE,qBAAqB,EAAE,MAAM,CAAC,QAAQ,EAAE;aAC1F,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,IAAI,EAAE,IAAI;gBACV,MAAM,EAAE,IAAI;gBACZ,kBAAkB;gBAClB,KAAK,EAAE,gCAAgC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE;gBAC/F,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE;aAClD,CAAC;QACJ,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,2BAA2B,EAAE,CAAC;IAC9C,IAAI,OAAO;QAAE,OAAO,OAAO,CAAC;IAE5B,OAAO;QACL,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,IAAI;QACZ,kBAAkB;QAClB,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,4BAA4B,EAAE;KACxE,CAAC;AACJ,CAAC,CAAC;AAEF,SAAS,iBAAiB,CAAC,IAAY;IACrC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,qEAAqE,CAAC,CAAC;IAChG,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IAC7B,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAClC,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC;IAC5B,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,SAAS,CAAC;QAAE,OAAO,IAAI,CAAC;IAClD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,6EAA6E;AAC7E,MAAM,CAAC,MAAM,QAAQ,GAAG;IACtB,gBAAgB;IAChB,mBAAmB;IACnB,cAAc;IACd,mBAAmB;IACnB,kBAAkB;IAClB,eAAe;IACf,oBAAoB;IACpB,0BAA0B;IAC1B,0BAA0B;IAC1B,qCAAqC;IACrC,WAAW;CACZ,CAAC"}
|
|
@@ -1,50 +1,78 @@
|
|
|
1
|
-
import { isOnnxCliConfigured, resolvePreferredOnnxModel } from
|
|
2
|
-
import { isWhisperCppReady, resolveWhisperCppModelNameForDisplay, } from
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
import { isOnnxCliConfigured, resolvePreferredOnnxModel } from "../../../transcription/onnx-cli.js";
|
|
2
|
+
import { isWhisperCppReady, resolveWhisperCppModelNameForDisplay, } from "../../../transcription/whisper.js";
|
|
3
|
+
import { resolveTranscriptionConfig } from "../transcription-config.js";
|
|
4
|
+
export async function resolveTranscriptionAvailability({ env, transcription, groqApiKey, openaiApiKey, falApiKey, }) {
|
|
5
|
+
const effective = resolveTranscriptionConfig({
|
|
6
|
+
env,
|
|
7
|
+
transcription,
|
|
8
|
+
groqApiKey,
|
|
9
|
+
openaiApiKey,
|
|
10
|
+
falApiKey,
|
|
11
|
+
});
|
|
12
|
+
const effectiveEnv = effective.env ?? process.env;
|
|
5
13
|
const preferredOnnxModel = resolvePreferredOnnxModel(effectiveEnv);
|
|
6
14
|
const onnxReady = preferredOnnxModel
|
|
7
15
|
? isOnnxCliConfigured(preferredOnnxModel, effectiveEnv)
|
|
8
16
|
: false;
|
|
9
17
|
const hasLocalWhisper = await isWhisperCppReady();
|
|
10
|
-
const
|
|
11
|
-
const
|
|
12
|
-
const
|
|
18
|
+
const hasGroq = Boolean(effective.groqApiKey);
|
|
19
|
+
const hasOpenai = Boolean(effective.openaiApiKey);
|
|
20
|
+
const hasFal = Boolean(effective.falApiKey);
|
|
21
|
+
const hasAnyProvider = onnxReady || hasLocalWhisper || hasGroq || hasOpenai || hasFal;
|
|
13
22
|
return {
|
|
14
23
|
preferredOnnxModel,
|
|
15
24
|
onnxReady,
|
|
16
25
|
hasLocalWhisper,
|
|
26
|
+
hasGroq,
|
|
17
27
|
hasOpenai,
|
|
18
28
|
hasFal,
|
|
19
29
|
hasAnyProvider,
|
|
20
30
|
};
|
|
21
31
|
}
|
|
22
|
-
export async function resolveTranscriptionStartInfo({ env, openaiApiKey, falApiKey, }) {
|
|
23
|
-
const availability = await resolveTranscriptionAvailability({
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
export async function resolveTranscriptionStartInfo({ env, transcription, groqApiKey, openaiApiKey, falApiKey, }) {
|
|
33
|
+
const availability = await resolveTranscriptionAvailability({
|
|
34
|
+
env,
|
|
35
|
+
transcription,
|
|
36
|
+
groqApiKey,
|
|
37
|
+
openaiApiKey,
|
|
38
|
+
falApiKey,
|
|
39
|
+
});
|
|
40
|
+
const providerHint = availability.hasGroq
|
|
41
|
+
? availability.hasOpenai && availability.hasFal
|
|
42
|
+
? "groq->openai->fal"
|
|
43
|
+
: availability.hasOpenai
|
|
44
|
+
? "groq->openai"
|
|
45
|
+
: availability.hasFal
|
|
46
|
+
? "groq->fal"
|
|
47
|
+
: "groq"
|
|
48
|
+
: availability.onnxReady
|
|
49
|
+
? "onnx"
|
|
50
|
+
: availability.hasLocalWhisper
|
|
51
|
+
? "cpp"
|
|
52
|
+
: availability.hasOpenai && availability.hasFal
|
|
53
|
+
? "openai->fal"
|
|
54
|
+
: availability.hasOpenai
|
|
55
|
+
? "openai"
|
|
56
|
+
: availability.hasFal
|
|
57
|
+
? "fal"
|
|
58
|
+
: "unknown";
|
|
59
|
+
const modelId = providerHint === "onnx"
|
|
36
60
|
? availability.preferredOnnxModel
|
|
37
61
|
? `onnx/${availability.preferredOnnxModel}`
|
|
38
|
-
:
|
|
39
|
-
: providerHint ===
|
|
40
|
-
? ((await resolveWhisperCppModelNameForDisplay()) ??
|
|
41
|
-
: availability
|
|
42
|
-
? 'whisper-1->fal-ai/wizper'
|
|
43
|
-
: availability.hasOpenai
|
|
44
|
-
? 'whisper-1'
|
|
45
|
-
: availability.hasFal
|
|
46
|
-
? 'fal-ai/wizper'
|
|
47
|
-
: null;
|
|
62
|
+
: "onnx"
|
|
63
|
+
: providerHint === "cpp"
|
|
64
|
+
? ((await resolveWhisperCppModelNameForDisplay()) ?? "whisper.cpp")
|
|
65
|
+
: resolveCloudModelId(availability);
|
|
48
66
|
return { availability, providerHint, modelId };
|
|
49
67
|
}
|
|
68
|
+
function resolveCloudModelId(availability) {
|
|
69
|
+
const parts = [];
|
|
70
|
+
if (availability.hasGroq)
|
|
71
|
+
parts.push("groq/whisper-large-v3-turbo");
|
|
72
|
+
if (availability.hasOpenai)
|
|
73
|
+
parts.push("whisper-1");
|
|
74
|
+
if (availability.hasFal)
|
|
75
|
+
parts.push("fal-ai/wizper");
|
|
76
|
+
return parts.length > 0 ? parts.join("->") : null;
|
|
77
|
+
}
|
|
50
78
|
//# sourceMappingURL=transcription-start.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transcription-start.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/transcription-start.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"transcription-start.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/transcription-start.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,yBAAyB,EAAE,MAAM,oCAAoC,CAAC;AACpG,OAAO,EACL,iBAAiB,EACjB,oCAAoC,GACrC,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,0BAA0B,EAA4B,MAAM,4BAA4B,CAAC;AAclG,MAAM,CAAC,KAAK,UAAU,gCAAgC,CAAC,EACrD,GAAG,EACH,aAAa,EACb,UAAU,EACV,YAAY,EACZ,SAAS,GAOV;IACC,MAAM,SAAS,GAAG,0BAA0B,CAAC;QAC3C,GAAG;QACH,aAAa;QACb,UAAU;QACV,YAAY;QACZ,SAAS;KACV,CAAC,CAAC;IACH,MAAM,YAAY,GAAG,SAAS,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,CAAC;IAClD,MAAM,kBAAkB,GAAG,yBAAyB,CAAC,YAAY,CAAC,CAAC;IACnE,MAAM,SAAS,GAAG,kBAAkB;QAClC,CAAC,CAAC,mBAAmB,CAAC,kBAAkB,EAAE,YAAY,CAAC;QACvD,CAAC,CAAC,KAAK,CAAC;IAEV,MAAM,eAAe,GAAG,MAAM,iBAAiB,EAAE,CAAC;IAClD,MAAM,OAAO,GAAG,OAAO,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAC9C,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IAClD,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;IAC5C,MAAM,cAAc,GAAG,SAAS,IAAI,eAAe,IAAI,OAAO,IAAI,SAAS,IAAI,MAAM,CAAC;IAEtF,OAAO;QACL,kBAAkB;QAClB,SAAS;QACT,eAAe;QACf,OAAO;QACP,SAAS;QACT,MAAM;QACN,cAAc;KACf,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,6BAA6B,CAAC,EAClD,GAAG,EACH,aAAa,EACb,UAAU,EACV,YAAY,EACZ,SAAS,GAOV;IAKC,MAAM,YAAY,GAAG,MAAM,gCAAgC,CAAC;QAC1D,GAAG;QACH,aAAa;QACb,UAAU;QACV,YAAY;QACZ,SAAS;KACV,CAAC,CAAC;IAEH,MAAM,YAAY,GAA8B,YAAY,CAAC,OAAO;QAClE,CAAC,CAAC,YAAY,CAAC,SAAS,IAAI,YAAY,CAAC,MAAM;YAC7C,CAAC,CAAC,mBAAmB;YACrB,CAAC,CAAC,YAAY,CAAC,SAAS;gBACtB,CAAC,CAAC,cAAc;gBAChB,CAAC,CAAC,YAAY,CAAC,MAAM;oBACnB,CAAC,CAAC,WAAW;oBACb,CAAC,CAAC,MAAM;QACd,CAAC,CAAC,YAAY,CAAC,SAAS;YACtB,CAAC,CAAC,MAAM;YACR,CAAC,CAAC,YAAY,CAAC,eAAe;gBAC5B,CAAC,CAAC,KAAK;gBACP,CAAC,CAAC,YAAY,CAAC,SAAS,IAAI,YAAY,CAAC,MAAM;oBAC7C,CAAC,CAAC,aAAa;oBACf,CAAC,CAAC,YAAY,CAAC,SAAS;wBACtB,CAAC,CAAC,QAAQ;wBACV,CAAC,CAAC,YAAY,CAAC,MAAM;4BACnB,CAAC,CAAC,KAAK;4BACP,CAAC,CAAC,SAAS,CAAC;IAExB,MAAM,OAAO,GACX,YAAY,KAAK,MAAM;QACrB,CAAC,CAAC,YAAY,CAAC,kBAAkB;YAC/B,CAAC,CAAC,QAAQ,YAAY,CAAC,kBAAkB,EAAE;YAC3C,CAAC,CAAC,MAAM;QACV,CAAC,CAAC,YAAY,KAAK,KAAK;YACtB,CAAC,CAAC,CAAC,CAAC,MAAM,oCAAoC,EAAE,CAAC,IAAI,aAAa,CAAC;YACnE,CAAC,CAAC,mBAAmB,CAAC,YAAY,CAAC,CAAC;IAE1C,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;AACjD,CAAC;AAED,SAAS,mBAAmB,CAAC,YAAuC;IAClE,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,YAAY,CAAC,OAAO;QAAE,KAAK,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IACpE,IAAI,YAAY,CAAC,SAAS;QAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACpD,IAAI,YAAY,CAAC,MAAM;QAAE,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IACrD,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACpD,CAAC"}
|