@steipete/summarize-core 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/esm/content/bun.js +21 -0
- package/dist/esm/content/bun.js.map +1 -0
- package/dist/esm/content/index.js +1 -0
- package/dist/esm/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/client.js +6 -0
- package/dist/esm/content/link-preview/client.js.map +1 -1
- package/dist/esm/content/link-preview/content/fetcher.js +19 -2
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
- package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -1
- package/dist/esm/content/link-preview/content/html.js.map +1 -1
- package/dist/esm/content/link-preview/content/index.js +29 -12
- package/dist/esm/content/link-preview/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/content/utils.js.map +1 -1
- package/dist/esm/content/transcript/index.js +2 -0
- package/dist/esm/content/transcript/index.js.map +1 -1
- package/dist/esm/content/transcript/providers/generic.js +10 -11
- package/dist/esm/content/transcript/providers/generic.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/media.js +9 -1
- package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/provider-flow.js +157 -0
- package/dist/esm/content/transcript/providers/podcast/provider-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss-feed.js +123 -0
- package/dist/esm/content/transcript/providers/podcast/rss-feed.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss-transcript.js +113 -0
- package/dist/esm/content/transcript/providers/podcast/rss-transcript.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss.js +2 -226
- package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast.js +26 -155
- package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
- package/dist/esm/content/transcript/providers/transcription-capability.js +22 -0
- package/dist/esm/content/transcript/providers/transcription-capability.js.map +1 -0
- package/dist/esm/content/transcript/providers/transcription-start.js +40 -30
- package/dist/esm/content/transcript/providers/transcription-start.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/api.js +3 -2
- package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/captions-player.js +173 -0
- package/dist/esm/content/transcript/providers/youtube/captions-player.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions-shared.js +8 -0
- package/dist/esm/content/transcript/providers/youtube/captions-shared.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions-transcript.js +361 -0
- package/dist/esm/content/transcript/providers/youtube/captions-transcript.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions.js +2 -557
- package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/provider-flow.js +189 -0
- package/dist/esm/content/transcript/providers/youtube/provider-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +7 -2
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube.js +42 -194
- package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
- package/dist/esm/content/transcript/transcription-config.js +24 -4
- package/dist/esm/content/transcript/transcription-config.js.map +1 -1
- package/dist/esm/content/url.js +3 -3
- package/dist/esm/content/url.js.map +1 -1
- package/dist/esm/processes.js.map +1 -1
- package/dist/esm/prompts/format.js +6 -0
- package/dist/esm/prompts/format.js.map +1 -1
- package/dist/esm/prompts/link-summary.js +27 -3
- package/dist/esm/prompts/link-summary.js.map +1 -1
- package/dist/esm/transcription/onnx-cli.js.map +1 -1
- package/dist/esm/transcription/whisper/assemblyai.js +132 -0
- package/dist/esm/transcription/whisper/assemblyai.js.map +1 -0
- package/dist/esm/transcription/whisper/chunking.js +64 -0
- package/dist/esm/transcription/whisper/chunking.js.map +1 -0
- package/dist/esm/transcription/whisper/cloud-providers.js +69 -0
- package/dist/esm/transcription/whisper/cloud-providers.js.map +1 -0
- package/dist/esm/transcription/whisper/core.js +316 -390
- package/dist/esm/transcription/whisper/core.js.map +1 -1
- package/dist/esm/transcription/whisper/gemini.js +324 -0
- package/dist/esm/transcription/whisper/gemini.js.map +1 -0
- package/dist/esm/transcription/whisper/preferences.js +16 -0
- package/dist/esm/transcription/whisper/preferences.js.map +1 -0
- package/dist/esm/transcription/whisper/provider-setup.js +62 -0
- package/dist/esm/transcription/whisper/provider-setup.js.map +1 -0
- package/dist/esm/transcription/whisper/remote-provider-attempts.js +189 -0
- package/dist/esm/transcription/whisper/remote-provider-attempts.js.map +1 -0
- package/dist/esm/transcription/whisper/remote.js +220 -0
- package/dist/esm/transcription/whisper/remote.js.map +1 -0
- package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -1
- package/dist/types/content/bun.d.ts +6 -0
- package/dist/types/content/index.d.ts +1 -0
- package/dist/types/content/link-preview/client.d.ts +3 -1
- package/dist/types/content/link-preview/content/fetcher.d.ts +1 -1
- package/dist/types/content/link-preview/content/html.d.ts +1 -1
- package/dist/types/content/link-preview/deps.d.ts +8 -2
- package/dist/types/content/link-preview/types.d.ts +1 -1
- package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +3 -0
- package/dist/types/content/transcript/providers/podcast/media.d.ts +4 -2
- package/dist/types/content/transcript/providers/podcast/provider-flow.d.ts +7 -0
- package/dist/types/content/transcript/providers/podcast/rss-feed.d.ts +15 -0
- package/dist/types/content/transcript/providers/podcast/rss-transcript.d.ts +12 -0
- package/dist/types/content/transcript/providers/podcast/rss.d.ts +2 -24
- package/dist/types/content/transcript/providers/transcription-capability.d.ts +18 -0
- package/dist/types/content/transcript/providers/transcription-start.d.ts +10 -3
- package/dist/types/content/transcript/providers/youtube/captions-player.d.ts +12 -0
- package/dist/types/content/transcript/providers/youtube/captions-shared.d.ts +42 -0
- package/dist/types/content/transcript/providers/youtube/captions-transcript.d.ts +4 -0
- package/dist/types/content/transcript/providers/youtube/captions.d.ts +2 -19
- package/dist/types/content/transcript/providers/youtube/provider-flow.d.ts +34 -0
- package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +4 -2
- package/dist/types/content/transcript/transcription-config.d.ts +6 -0
- package/dist/types/content/transcript/types.d.ts +1 -0
- package/dist/types/prompts/format.d.ts +1 -0
- package/dist/types/prompts/link-summary.d.ts +2 -1
- package/dist/types/transcription/whisper/assemblyai.d.ts +17 -0
- package/dist/types/transcription/whisper/chunking.d.ts +11 -0
- package/dist/types/transcription/whisper/cloud-providers.d.ts +22 -0
- package/dist/types/transcription/whisper/core.d.ts +12 -14
- package/dist/types/transcription/whisper/gemini.d.ts +14 -0
- package/dist/types/transcription/whisper/preferences.d.ts +4 -0
- package/dist/types/transcription/whisper/provider-setup.d.ts +30 -0
- package/dist/types/transcription/whisper/remote-provider-attempts.d.ts +51 -0
- package/dist/types/transcription/whisper/remote.d.ts +51 -0
- package/dist/types/transcription/whisper/types.d.ts +1 -1
- package/package.json +15 -16
|
@@ -1,227 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
export function looksLikeRssOrAtomFeed(xml) {
|
|
4
|
-
const head = xml.slice(0, 4096).trimStart().toLowerCase();
|
|
5
|
-
if (head.startsWith("<rss") || head.includes("<rss"))
|
|
6
|
-
return true;
|
|
7
|
-
if (head.startsWith("<?xml") && (head.includes("<rss") || head.includes("<feed")))
|
|
8
|
-
return true;
|
|
9
|
-
if (head.startsWith("<feed") || head.includes("<feed"))
|
|
10
|
-
return true;
|
|
11
|
-
return false;
|
|
12
|
-
}
|
|
13
|
-
export function extractEnclosureFromFeed(xml) {
|
|
14
|
-
const items = xml.match(/<item\b[\s\S]*?<\/item>/gi) ?? [];
|
|
15
|
-
for (const item of items) {
|
|
16
|
-
const enclosureUrl = extractEnclosureUrlFromItem(item);
|
|
17
|
-
if (!enclosureUrl)
|
|
18
|
-
continue;
|
|
19
|
-
return { enclosureUrl, durationSeconds: extractItemDurationSeconds(item) };
|
|
20
|
-
}
|
|
21
|
-
const enclosureMatch = xml.match(/<enclosure\b[^>]*\burl\s*=\s*(['"])([^'"]+)\1/i);
|
|
22
|
-
if (enclosureMatch?.[2]) {
|
|
23
|
-
return { enclosureUrl: enclosureMatch[2], durationSeconds: extractItemDurationSeconds(xml) };
|
|
24
|
-
}
|
|
25
|
-
const atomMatch = xml.match(/<link\b[^>]*\brel\s*=\s*(['"])enclosure\1[^>]*\bhref\s*=\s*(['"])([^'"]+)\2/i);
|
|
26
|
-
if (atomMatch?.[3]) {
|
|
27
|
-
return { enclosureUrl: atomMatch[3], durationSeconds: extractItemDurationSeconds(xml) };
|
|
28
|
-
}
|
|
29
|
-
return null;
|
|
30
|
-
}
|
|
31
|
-
export function extractEnclosureForEpisode(feedXml, episodeTitle) {
|
|
32
|
-
const normalizedTarget = normalizeLooseTitle(episodeTitle);
|
|
33
|
-
const items = feedXml.match(/<item\b[\s\S]*?<\/item>/gi) ?? [];
|
|
34
|
-
for (const item of items) {
|
|
35
|
-
const title = extractItemTitle(item);
|
|
36
|
-
if (!title)
|
|
37
|
-
continue;
|
|
38
|
-
if (normalizeLooseTitle(title) !== normalizedTarget)
|
|
39
|
-
continue;
|
|
40
|
-
const enclosureUrl = extractEnclosureUrlFromItem(item);
|
|
41
|
-
if (!enclosureUrl)
|
|
42
|
-
continue;
|
|
43
|
-
return { enclosureUrl, durationSeconds: extractItemDurationSeconds(item) };
|
|
44
|
-
}
|
|
45
|
-
return null;
|
|
46
|
-
}
|
|
47
|
-
export function extractItemDurationSeconds(itemXml) {
|
|
48
|
-
const match = itemXml.match(/<itunes:duration>([\s\S]*?)<\/itunes:duration>/i);
|
|
49
|
-
if (!match?.[1])
|
|
50
|
-
return null;
|
|
51
|
-
const raw = match[1]
|
|
52
|
-
.replaceAll(/<!\[CDATA\[/gi, "")
|
|
53
|
-
.replaceAll(/\]\]>/g, "")
|
|
54
|
-
.trim();
|
|
55
|
-
if (!raw)
|
|
56
|
-
return null;
|
|
57
|
-
// common forms: "HH:MM:SS", "MM:SS", "SS"
|
|
58
|
-
if (/^\d+$/.test(raw)) {
|
|
59
|
-
const seconds = Number(raw);
|
|
60
|
-
return Number.isFinite(seconds) && seconds > 0 ? seconds : null;
|
|
61
|
-
}
|
|
62
|
-
const parts = raw
|
|
63
|
-
.split(":")
|
|
64
|
-
.map((value) => value.trim())
|
|
65
|
-
.filter(Boolean);
|
|
66
|
-
if (parts.length < 2 || parts.length > 3)
|
|
67
|
-
return null;
|
|
68
|
-
const nums = parts.map((value) => Number(value));
|
|
69
|
-
if (nums.some((n) => !Number.isFinite(n) || n < 0))
|
|
70
|
-
return null;
|
|
71
|
-
const seconds = (() => {
|
|
72
|
-
if (nums.length === 3) {
|
|
73
|
-
const [hours, minutes, secondsRaw] = nums;
|
|
74
|
-
if (hours === undefined || minutes === undefined || secondsRaw === undefined)
|
|
75
|
-
return null;
|
|
76
|
-
return Math.round(hours * 3600 + minutes * 60 + secondsRaw);
|
|
77
|
-
}
|
|
78
|
-
const [minutes, secondsRaw] = nums;
|
|
79
|
-
if (minutes === undefined || secondsRaw === undefined)
|
|
80
|
-
return null;
|
|
81
|
-
return Math.round(minutes * 60 + secondsRaw);
|
|
82
|
-
})();
|
|
83
|
-
if (seconds === null)
|
|
84
|
-
return null;
|
|
85
|
-
return seconds > 0 ? seconds : null;
|
|
86
|
-
}
|
|
87
|
-
export function decodeXmlEntities(value) {
|
|
88
|
-
return value
|
|
89
|
-
.replaceAll(/&/gi, "&")
|
|
90
|
-
.replaceAll(/&/g, "&")
|
|
91
|
-
.replaceAll(/</gi, "<")
|
|
92
|
-
.replaceAll(/>/gi, ">")
|
|
93
|
-
.replaceAll(/"/gi, '"')
|
|
94
|
-
.replaceAll(/'/gi, "'");
|
|
95
|
-
}
|
|
96
|
-
export function normalizeLooseTitle(value) {
|
|
97
|
-
return value
|
|
98
|
-
.toLowerCase()
|
|
99
|
-
.normalize("NFKD")
|
|
100
|
-
.replaceAll(/\p{Diacritic}+/gu, "")
|
|
101
|
-
.replaceAll(/[^a-z0-9]+/g, " ")
|
|
102
|
-
.trim();
|
|
103
|
-
}
|
|
104
|
-
export async function tryFetchTranscriptFromFeedXml({ fetchImpl, feedXml, episodeTitle, notes, }) {
|
|
105
|
-
const items = feedXml.match(/<item\b[\s\S]*?<\/item>/gi) ?? [];
|
|
106
|
-
const normalizedTarget = episodeTitle ? normalizeLooseTitle(episodeTitle) : null;
|
|
107
|
-
for (const item of items) {
|
|
108
|
-
if (normalizedTarget) {
|
|
109
|
-
const title = extractItemTitle(item);
|
|
110
|
-
if (!title || normalizeLooseTitle(title) !== normalizedTarget)
|
|
111
|
-
continue;
|
|
112
|
-
}
|
|
113
|
-
const candidates = extractPodcastTranscriptCandidatesFromItem(item);
|
|
114
|
-
const preferred = selectPreferredTranscriptCandidate(candidates);
|
|
115
|
-
if (!preferred) {
|
|
116
|
-
if (normalizedTarget)
|
|
117
|
-
return null;
|
|
118
|
-
continue;
|
|
119
|
-
}
|
|
120
|
-
const transcriptUrl = decodeXmlEntities(preferred.url);
|
|
121
|
-
try {
|
|
122
|
-
const res = await fetchImpl(transcriptUrl, {
|
|
123
|
-
redirect: "follow",
|
|
124
|
-
signal: AbortSignal.timeout(TRANSCRIPTION_TIMEOUT_MS),
|
|
125
|
-
headers: { accept: "text/vtt,text/plain,application/json;q=0.9,*/*;q=0.8" },
|
|
126
|
-
});
|
|
127
|
-
if (!res.ok)
|
|
128
|
-
throw new Error(`transcript fetch failed (${res.status})`);
|
|
129
|
-
const contentType = res.headers.get("content-type")?.toLowerCase().split(";")[0]?.trim() ?? null;
|
|
130
|
-
const hintedType = preferred.type?.toLowerCase().split(";")[0]?.trim() ?? null;
|
|
131
|
-
const effectiveType = hintedType ?? contentType;
|
|
132
|
-
const body = await res.text();
|
|
133
|
-
const text = (() => {
|
|
134
|
-
if (effectiveType === "application/json" || transcriptUrl.toLowerCase().endsWith(".json")) {
|
|
135
|
-
try {
|
|
136
|
-
return jsonTranscriptToPlainText(JSON.parse(body));
|
|
137
|
-
}
|
|
138
|
-
catch {
|
|
139
|
-
return null;
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
if (effectiveType === "text/vtt" || transcriptUrl.toLowerCase().endsWith(".vtt")) {
|
|
143
|
-
const plain = vttToPlainText(body);
|
|
144
|
-
return plain.length > 0 ? plain : null;
|
|
145
|
-
}
|
|
146
|
-
const plain = body.trim();
|
|
147
|
-
return plain.length > 0 ? plain : null;
|
|
148
|
-
})();
|
|
149
|
-
if (!text) {
|
|
150
|
-
if (normalizedTarget)
|
|
151
|
-
return null;
|
|
152
|
-
continue;
|
|
153
|
-
}
|
|
154
|
-
const segments = (() => {
|
|
155
|
-
if (effectiveType === "application/json" || transcriptUrl.toLowerCase().endsWith(".json")) {
|
|
156
|
-
try {
|
|
157
|
-
return jsonTranscriptToSegments(JSON.parse(body));
|
|
158
|
-
}
|
|
159
|
-
catch {
|
|
160
|
-
return null;
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
if (effectiveType === "text/vtt" || transcriptUrl.toLowerCase().endsWith(".vtt")) {
|
|
164
|
-
return vttToSegments(body);
|
|
165
|
-
}
|
|
166
|
-
return null;
|
|
167
|
-
})();
|
|
168
|
-
notes.push("Used RSS <podcast:transcript> (skipped Whisper)");
|
|
169
|
-
return { text, transcriptUrl, transcriptType: effectiveType, segments };
|
|
170
|
-
}
|
|
171
|
-
catch (error) {
|
|
172
|
-
if (normalizedTarget) {
|
|
173
|
-
notes.push(`RSS <podcast:transcript> fetch failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
174
|
-
return null;
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
return null;
|
|
179
|
-
}
|
|
180
|
-
function extractEnclosureUrlFromItem(xml) {
|
|
181
|
-
const enclosureMatch = xml.match(/<enclosure\b[^>]*\burl\s*=\s*(['"])([^'"]+)\1/i);
|
|
182
|
-
if (enclosureMatch?.[2])
|
|
183
|
-
return enclosureMatch[2];
|
|
184
|
-
const atomMatch = xml.match(/<link\b[^>]*\brel\s*=\s*(['"])enclosure\1[^>]*\bhref\s*=\s*(['"])([^'"]+)\2/i);
|
|
185
|
-
if (atomMatch?.[3])
|
|
186
|
-
return atomMatch[3];
|
|
187
|
-
return null;
|
|
188
|
-
}
|
|
189
|
-
function extractItemTitle(itemXml) {
|
|
190
|
-
const match = itemXml.match(/<title>([\s\S]*?)<\/title>/i);
|
|
191
|
-
if (!match?.[1])
|
|
192
|
-
return null;
|
|
193
|
-
const raw = match[1]
|
|
194
|
-
.replaceAll(/<!\[CDATA\[/gi, "")
|
|
195
|
-
.replaceAll(/\]\]>/g, "")
|
|
196
|
-
.trim();
|
|
197
|
-
return raw.length > 0 ? raw : null;
|
|
198
|
-
}
|
|
199
|
-
function extractPodcastTranscriptCandidatesFromItem(itemXml) {
|
|
200
|
-
const matches = itemXml.matchAll(/<podcast:transcript\b[^>]*\burl\s*=\s*(['"])([^'"]+)\1[^>]*>/gi);
|
|
201
|
-
const results = [];
|
|
202
|
-
for (const match of matches) {
|
|
203
|
-
const tag = match[0];
|
|
204
|
-
const url = match[2]?.trim();
|
|
205
|
-
if (!url)
|
|
206
|
-
continue;
|
|
207
|
-
const type = tag.match(/\btype\s*=\s*(['"])([^'"]+)\1/i)?.[2]?.trim() ?? null;
|
|
208
|
-
results.push({ url, type });
|
|
209
|
-
}
|
|
210
|
-
return results;
|
|
211
|
-
}
|
|
212
|
-
function selectPreferredTranscriptCandidate(candidates) {
|
|
213
|
-
if (candidates.length === 0)
|
|
214
|
-
return null;
|
|
215
|
-
const normalized = candidates.map((c) => ({
|
|
216
|
-
...c,
|
|
217
|
-
type: c.type?.toLowerCase().split(";")[0]?.trim() ?? null,
|
|
218
|
-
}));
|
|
219
|
-
const json = normalized.find((c) => c.type === "application/json" || c.url.toLowerCase().endsWith(".json"));
|
|
220
|
-
if (json)
|
|
221
|
-
return json;
|
|
222
|
-
const vtt = normalized.find((c) => c.type === "text/vtt" || c.url.toLowerCase().endsWith(".vtt"));
|
|
223
|
-
if (vtt)
|
|
224
|
-
return vtt;
|
|
225
|
-
return normalized[0] ?? null;
|
|
226
|
-
}
|
|
1
|
+
export { decodeXmlEntities, extractEnclosureForEpisode, extractEnclosureFromFeed, extractItemDurationSeconds, looksLikeRssOrAtomFeed, normalizeLooseTitle, } from "./rss-feed.js";
|
|
2
|
+
export { tryFetchTranscriptFromFeedXml } from "./rss-transcript.js";
|
|
227
3
|
//# sourceMappingURL=rss.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rss.js","sourceRoot":"","sources":["../../../../../../src/content/transcript/providers/podcast/rss.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"rss.js","sourceRoot":"","sources":["../../../../../../src/content/transcript/providers/podcast/rss.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,iBAAiB,EACjB,0BAA0B,EAC1B,wBAAwB,EACxB,0BAA0B,EAC1B,sBAAsB,EACtB,mBAAmB,GACpB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,6BAA6B,EAAE,MAAM,qBAAqB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"spotify-flow.js","sourceRoot":"","sources":["../../../../../../src/content/transcript/providers/podcast/spotify-flow.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"spotify-flow.js","sourceRoot":"","sources":["../../../../../../src/content/transcript/providers/podcast/spotify-flow.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAE1D,OAAO,EACL,qCAAqC,EACrC,qCAAqC,GACtC,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,kBAAkB,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC7D,OAAO,EACL,iBAAiB,EACjB,0BAA0B,EAC1B,6BAA6B,GAC9B,MAAM,UAAU,CAAC;AAClB,OAAO,EACL,uBAAuB,EACvB,uBAAuB,EACvB,qBAAqB,GACtB,MAAM,cAAc,CAAC;AAEtB,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,IAAwB;IAExB,MAAM,gBAAgB,GAAG,uBAAuB,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACnE,IAAI,CAAC,gBAAgB;QAAE,OAAO,IAAI,CAAC;IAEnC,IAAI,CAAC;QACH,sFAAsF;QACtF,0DAA0D;QAC1D,0EAA0E;QAC1E,sEAAsE;QACtE,qFAAqF;QACrF,MAAM,QAAQ,GAAG,0CAA0C,gBAAgB,EAAE,CAAC;QAC9E,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,GAAG,MAAM,qBAAqB,CAAC;YAC3D,QAAQ;YACR,SAAS,EAAE,gBAAgB;YAC3B,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK;YAC7B,mBAAmB,EAAE,IAAI,CAAC,OAAO,CAAC,mBAAmB,IAAI,IAAI;SAC9D,CAAC,CAAC;QAEH,MAAM,SAAS,GAAG,uBAAuB,CAAC,SAAS,CAAC,CAAC;QACrD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,sDAAsD,CAAC,CAAC;QAC1E,CAAC;QACD,MAAM,SAAS,GAAG,SAAS,CAAC,SAAS,CAAC;QACtC,MAAM,YAAY,GAAG,SAAS,CAAC,YAAY,CAAC;QAC5C,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC;QACzC,MAAM,oBAAoB,GAAG,SAAS,CAAC,eAAe,CAAC;QAEvD,IAAI,aAAa,EAAE,CAAC;YAClB,MAAM,OAAO,GAAG,IAAI,CAAC,2BAA2B,EAAE,CAAC;YACnD,IAAI,OAAO;gBAAE,OAAO,OAAO,CAAC;YAC5B,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;YACzB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC;gBACnC,GAAG,EAAE,aAAa;gBAClB,YAAY,EAAE,aAAa;gBAC3B,mBAAmB,EAAE,oBAAoB;aAC1C,CAAC,CAAC;YACH,MAAM,oBAAoB,GAAG,MAAM,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,MAAM,IAAI,CAAC,CAAC;YAC7D,MAAM,oBAAoB,GACxB,oBAAoB,GAAG,CAAC;gBACxB,CAAC,oBAAoB,GAAG,GAAG;oBACzB,CAAC,oBAAoB,GAAG,GAAG;wBACzB,CAAC,oBAAoB,IAAI,IAAI;4BAC3B,CAAC,OAAO,oBAAoB,KAAK,QAAQ,IAAI,oBAAoB,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAErF,IAAI,MAAM,CAAC,IAAI,IAAI,CAAC,oBAAoB,EAAE,CAAC;gBACzC,IAAI,CAAC,KAAK,CAAC,IAAI,CACb,GAAG,KAAK,WAAW;oBACjB,CAAC,CAAC,4CAA4C;oBAC9C,CAAC,CAAC,8BAA8B,CACnC,CAAC;gBACF,OAAO,kBAAkB,CAAC;oBACxB,kBAAkB,EAAE,IAAI,CAAC,kBAAkB;oBAC3C,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,OAAO,EAAE,MAAM;oBACf,QAAQ,EAAE;wBACR,QAAQ,EAAE,SAAS;wBACnB,IAAI,EAAE,qBAAqB;wBAC3B,SAAS,EAAE,gBAAgB;wBAC3B,SAAS;wBACT,YAAY;wBACZ,QAAQ,EAAE,aAAa;wBACvB,eAAe,EAAE,oBAAoB;wBACrC,SAAS,EAAE,SAAS,CAAC,SAAS;qBAC/B;iBACF,CAAC,CAAC;YACL,CAAC;YACD,IAAI,oBAAoB,EAAE,CAAC;gBACzB,IAAI,CAAC,KAAK,CAAC,IAAI,CACb,iDAAiD,oBAAoB,qCAAqC,CAC3G,CAAC;YACJ,CAAC;YACD,IAAI,CAAC,KAAK,CAAC,IAAI,CACb,yEACE,MAAM,CAAC,KAAK,EAAE,OAAO,IAAI,eAC3B,EAAE,CACH,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,qCAAqC,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QAC3F,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,iBAAiB,GAAG,MAAM,qCAAqC,CACnE,IAAI,CAAC,OAAO,CAAC,KAAK,EAClB,SAAS,EACT,YAAY,CACb,CAAC;YACF,IAAI,iBAAiB,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,IAAI,CAAC,2BAA2B,EAAE,CAAC;gBACnD,IAAI,OAAO;oBAAE,OAAO,OAAO,CAAC;gBAC5B,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;gBACzB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC;oBACnC,GAAG,EAAE,iBAAiB,CAAC,UAAU;oBACjC,YAAY,EAAE,aAAa;oBAC3B,mBAAmB,EAAE,iBAAiB,CAAC,eAAe;iBACvD,CAAC,CAAC;gBACH,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;oBAChB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,oDAAoD,CAAC,CAAC;oBACtE,OAAO,kBAAkB,CAAC;wBACxB,kBAAkB,EAAE,IAAI,CAAC,kBAAkB;wBAC3C,KAAK,EAAE,IAAI,CAAC,KAAK;wBACjB,OAAO,EAAE,MAAM;wBACf,QAAQ,EAAE;4BACR,QAAQ,EAAE,SAAS;4BACnB,IAAI,EAAE,+BAA+B;4BACrC,SAAS,EAAE,gBAAgB;4BAC3B,SAAS;4BACT,YAAY,EAAE,iBAAiB,CAAC,YAAY;4BAC5C,UAAU,EAAE,iBAAiB,CAAC,UAAU;4BACxC,eAAe,EAAE,iBAAiB,CAAC,eAAe;yBACnD;qBACF,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YACD,MAAM,IAAI,KAAK,CACb,2GAA2G,SAAS,GAAG,CACxH,CAAC;QACJ,CAAC;QAED,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,EAAE;YACrD,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,wBAAwB,CAAC;SACtD,CAAC,CAAC;QACH,IAAI,CAAC,YAAY,CAAC,EAAE,EAAE,CAAC;YACrB,MAAM,IAAI,KAAK,CAAC,8BAA8B,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;QACxE,CAAC;QACD,MAAM,OAAO,GAAG,MAAM,YAAY,CAAC,IAAI,EAAE,CAAC;QAC1C,IAAI,eAAe,GAA8D,IAAI,CAAC;QACtF,IAAI,qBAAqB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YACxC,IAAI,CAAC,QAAQ,CAAC,mBAAmB,CAAC,CAAC;YACnC,eAAe,GAAG,MAAM,6BAA6B,CAAC;gBACpD,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK;gBAC7B,OAAO;gBACP,YAAY;gBACZ,KAAK,EAAE,IAAI,CAAC,KAAK;aAClB,CAAC,CAAC;QACL,CAAC;QACD,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO;gBACL,IAAI,EAAE,eAAe,CAAC,IAAI;gBAC1B,MAAM,EAAE,mBAAmB;gBAC3B,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;gBACvF,kBAAkB,EAAE,IAAI,CAAC,kBAAkB;gBAC3C,KAAK,EAAE,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC;gBAC5B,QAAQ,EAAE;oBACR,QAAQ,EAAE,SAAS;oBACnB,IAAI,EAAE,+BAA+B;oBACrC,SAAS,EAAE,gBAAgB;oBAC3B,SAAS;oBACT,YAAY;oBACZ,OAAO;oBACP,aAAa,EAAE,eAAe,CAAC,aAAa;oBAC5C,cAAc,EAAE,eAAe,CAAC,cAAc;iBAC/C;aACF,CAAC;QACJ,CAAC;QACD,MAAM,KAAK,GAAG,0BAA0B,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;QAChE,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,iBAAiB,GAAG,MAAM,qCAAqC,CACnE,IAAI,CAAC,OAAO,CAAC,KAAK,EAClB,SAAS,EACT,YAAY,CACb,CAAC;YACF,IAAI,iBAAiB,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,IAAI,CAAC,2BAA2B,EAAE,CAAC;gBACnD,IAAI,OAAO;oBAAE,OAAO,OAAO,CAAC;gBAC5B,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;gBACzB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC;oBACnC,GAAG,EAAE,iBAAiB,CAAC,UAAU;oBACjC,YAAY,EAAE,aAAa;oBAC3B,mBAAmB,EAAE,iBAAiB,CAAC,eAAe;iBACvD,CAAC,CAAC;gBACH,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;oBAChB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,oDAAoD,CAAC,CAAC;oBACtE,OAAO,kBAAkB,CAAC;wBACxB,kBAAkB,EAAE,IAAI,CAAC,kBAAkB;wBAC3C,KAAK,EAAE,IAAI,CAAC,KAAK;wBACjB,OAAO,EAAE,MAAM;wBACf,QAAQ,EAAE;4BACR,QAAQ,EAAE,SAAS;4BACnB,IAAI,EAAE,+BAA+B;4BACrC,SAAS,EAAE,gBAAgB;4BAC3B,SAAS;4BACT,YAAY,EAAE,iBAAiB,CAAC,YAAY;4BAC5C,UAAU,EAAE,iBAAiB,CAAC,UAAU;4BACxC,eAAe,EAAE,iBAAiB,CAAC,eAAe;yBACnD;qBACF,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YACD,MAAM,IAAI,KAAK,CAAC,gDAAgD,YAAY,GAAG,CAAC,CAAC;QACnF,CAAC;QACD,MAAM,YAAY,GAAG,iBAAiB,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QAC3D,MAAM,eAAe,GAAG,KAAK,CAAC,eAAe,CAAC;QAE9C,IAAI,CAAC,KAAK,CAAC,IAAI,CACb,GAAG,KAAK,WAAW;YACjB,CAAC,CAAC,2DAA2D;YAC7D,CAAC,CAAC,yCAAyC,CAC9C,CAAC;QACF,MAAM,OAAO,GAAG,IAAI,CAAC,2BAA2B,EAAE,CAAC;QACnD,IAAI,OAAO;YAAE,OAAO,OAAO,CAAC;QAC5B,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC;YACnC,GAAG,EAAE,YAAY;YACjB,YAAY,EAAE,aAAa;YAC3B,mBAAmB,EAAE,eAAe;SACrC,CAAC,CAAC;QACH,OAAO,kBAAkB,CAAC;YACxB,kBAAkB,EAAE,IAAI,CAAC,kBAAkB;YAC3C,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,OAAO,EAAE,MAAM;YACf,wBAAwB,EAAE,IAAI;YAC9B,QAAQ,EAAE;gBACR,QAAQ,EAAE,SAAS;gBACnB,IAAI,EAAE,8BAA8B;gBACpC,SAAS,EAAE,gBAAgB;gBAC3B,SAAS;gBACT,YAAY;gBACZ,OAAO;gBACP,YAAY;gBACZ,eAAe;aAChB;SACF,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,IAAI,EAAE,IAAI;YACV,MAAM,EAAE,IAAI;YACZ,kBAAkB,EAAE,IAAI,CAAC,kBAAkB;YAC3C,KAAK,EAAE,iCAAiC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE;YAChG,QAAQ,EAAE;gBACR,QAAQ,EAAE,SAAS;gBACnB,IAAI,EAAE,8BAA8B;gBACpC,SAAS,EAAE,gBAAgB;aAC5B;SACF,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -4,11 +4,11 @@ import { fetchAppleTranscriptFromEmbeddedHtml, fetchAppleTranscriptFromItunesLoo
|
|
|
4
4
|
import { FEED_HINT_URL_PATTERN, PODCAST_PLATFORM_HOST_PATTERN } from "./podcast/constants.js";
|
|
5
5
|
import { resolvePodcastFeedUrlFromItunesSearch } from "./podcast/itunes.js";
|
|
6
6
|
import { downloadCappedBytes, downloadToFile, filenameFromUrl, formatBytes, normalizeHeaderType, parseContentLength, probeRemoteMedia, transcribeMediaUrl, } from "./podcast/media.js";
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
7
|
+
import { buildNoTranscriptResult, tryFeedEnclosureTranscript, tryOgAudioTranscript, tryPodcastTranscriptFromFeed, tryPodcastYtDlpTranscript, } from "./podcast/provider-flow.js";
|
|
8
|
+
import { extractEnclosureForEpisode, extractItemDurationSeconds, looksLikeRssOrAtomFeed, } from "./podcast/rss.js";
|
|
9
9
|
import { fetchSpotifyTranscript } from "./podcast/spotify-flow.js";
|
|
10
10
|
import { looksLikeBlockedHtml } from "./podcast/spotify.js";
|
|
11
|
-
import {
|
|
11
|
+
import { buildMissingTranscriptionProviderResult, resolveTranscriptProviderCapabilities, } from "./transcription-capability.js";
|
|
12
12
|
export const canHandle = ({ url, html }) => {
|
|
13
13
|
// Direct media URLs (e.g., .mp3, .wav) should be handled by the generic provider
|
|
14
14
|
// even if the URL contains "podcast" in the path (like "rt_podcast996.mp3")
|
|
@@ -28,18 +28,17 @@ export const fetchTranscript = async (context, options) => {
|
|
|
28
28
|
if (!attemptedProviders.includes(provider))
|
|
29
29
|
attemptedProviders.push(provider);
|
|
30
30
|
};
|
|
31
|
-
const
|
|
31
|
+
const transcriptionCapabilities = await resolveTranscriptProviderCapabilities({
|
|
32
32
|
transcription,
|
|
33
|
-
|
|
34
|
-
const missingTranscriptionProviderResult = () => ({
|
|
35
|
-
text: null,
|
|
36
|
-
source: null,
|
|
37
|
-
attemptedProviders,
|
|
38
|
-
metadata: { provider: "podcast", reason: "missing_transcription_keys" },
|
|
39
|
-
notes: "Missing transcription provider (install whisper-cpp or set OPENAI_API_KEY/FAL_KEY)",
|
|
33
|
+
ytDlpPath: options.ytDlpPath,
|
|
40
34
|
});
|
|
41
35
|
const ensureTranscriptionProvider = () => {
|
|
42
|
-
return !
|
|
36
|
+
return !transcriptionCapabilities.canTranscribe
|
|
37
|
+
? buildMissingTranscriptionProviderResult({
|
|
38
|
+
attemptedProviders,
|
|
39
|
+
metadata: { provider: "podcast", reason: "missing_transcription_keys" },
|
|
40
|
+
})
|
|
41
|
+
: null;
|
|
43
42
|
};
|
|
44
43
|
const progress = {
|
|
45
44
|
url: context.url,
|
|
@@ -56,37 +55,17 @@ export const fetchTranscript = async (context, options) => {
|
|
|
56
55
|
const flow = {
|
|
57
56
|
context,
|
|
58
57
|
options,
|
|
58
|
+
transcription,
|
|
59
|
+
feedHtml: typeof context.html === "string" ? context.html : null,
|
|
59
60
|
attemptedProviders,
|
|
60
61
|
notes,
|
|
61
62
|
pushOnce,
|
|
62
63
|
ensureTranscriptionProvider,
|
|
63
64
|
transcribe,
|
|
64
65
|
};
|
|
65
|
-
const
|
|
66
|
-
if (
|
|
67
|
-
|
|
68
|
-
const direct = await tryFetchTranscriptFromFeedXml({
|
|
69
|
-
fetchImpl: options.fetch,
|
|
70
|
-
feedXml: feedHtml,
|
|
71
|
-
episodeTitle: null,
|
|
72
|
-
notes,
|
|
73
|
-
});
|
|
74
|
-
if (direct) {
|
|
75
|
-
return {
|
|
76
|
-
text: direct.text,
|
|
77
|
-
source: "podcastTranscript",
|
|
78
|
-
segments: options.transcriptTimestamps ? (direct.segments ?? null) : null,
|
|
79
|
-
attemptedProviders,
|
|
80
|
-
notes: joinNotes(notes),
|
|
81
|
-
metadata: {
|
|
82
|
-
provider: "podcast",
|
|
83
|
-
kind: "rss_podcast_transcript",
|
|
84
|
-
transcriptUrl: direct.transcriptUrl,
|
|
85
|
-
transcriptType: direct.transcriptType,
|
|
86
|
-
},
|
|
87
|
-
};
|
|
88
|
-
}
|
|
89
|
-
}
|
|
66
|
+
const directResult = await tryPodcastTranscriptFromFeed(flow);
|
|
67
|
+
if (directResult)
|
|
68
|
+
return directResult;
|
|
90
69
|
const spotifyResult = await fetchSpotifyTranscript(flow);
|
|
91
70
|
if (spotifyResult)
|
|
92
71
|
return spotifyResult;
|
|
@@ -96,125 +75,17 @@ export const fetchTranscript = async (context, options) => {
|
|
|
96
75
|
const appleEmbeddedResult = await fetchAppleTranscriptFromEmbeddedHtml(flow);
|
|
97
76
|
if (appleEmbeddedResult)
|
|
98
77
|
return appleEmbeddedResult;
|
|
99
|
-
const
|
|
100
|
-
if (
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
url: resolvedUrl,
|
|
110
|
-
filenameHint: "episode.mp3",
|
|
111
|
-
durationSecondsHint: durationSeconds,
|
|
112
|
-
});
|
|
113
|
-
return buildWhisperResult({
|
|
114
|
-
attemptedProviders,
|
|
115
|
-
notes,
|
|
116
|
-
outcome: transcript,
|
|
117
|
-
includeProviderOnFailure: true,
|
|
118
|
-
metadata: {
|
|
119
|
-
provider: "podcast",
|
|
120
|
-
kind: "rss_enclosure",
|
|
121
|
-
enclosureUrl: resolvedUrl,
|
|
122
|
-
durationSeconds,
|
|
123
|
-
},
|
|
124
|
-
});
|
|
125
|
-
}
|
|
126
|
-
catch (error) {
|
|
127
|
-
return {
|
|
128
|
-
text: null,
|
|
129
|
-
source: null,
|
|
130
|
-
attemptedProviders,
|
|
131
|
-
notes: `Podcast enclosure download failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
132
|
-
metadata: { provider: "podcast", kind: "rss_enclosure", enclosureUrl: resolvedUrl },
|
|
133
|
-
};
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
const ogAudioUrl = feedHtml ? extractOgAudioUrl(feedHtml) : null;
|
|
137
|
-
if (ogAudioUrl) {
|
|
138
|
-
attemptedProviders.push("whisper");
|
|
139
|
-
const result = await transcribe({
|
|
140
|
-
url: ogAudioUrl,
|
|
141
|
-
filenameHint: "audio.mp3",
|
|
142
|
-
durationSecondsHint: null,
|
|
143
|
-
});
|
|
144
|
-
if (result.text) {
|
|
145
|
-
notes.push("Used og:audio media (may be a preview clip, not the full episode)");
|
|
146
|
-
return buildWhisperResult({
|
|
147
|
-
attemptedProviders,
|
|
148
|
-
notes,
|
|
149
|
-
outcome: result,
|
|
150
|
-
metadata: {
|
|
151
|
-
provider: "podcast",
|
|
152
|
-
kind: "og_audio",
|
|
153
|
-
ogAudioUrl,
|
|
154
|
-
},
|
|
155
|
-
});
|
|
156
|
-
}
|
|
157
|
-
return {
|
|
158
|
-
text: null,
|
|
159
|
-
source: null,
|
|
160
|
-
attemptedProviders,
|
|
161
|
-
notes: result.error?.message ?? null,
|
|
162
|
-
metadata: { provider: "podcast", kind: "og_audio", ogAudioUrl },
|
|
163
|
-
};
|
|
164
|
-
}
|
|
165
|
-
if (options.ytDlpPath) {
|
|
166
|
-
attemptedProviders.push("yt-dlp");
|
|
167
|
-
try {
|
|
168
|
-
const mod = await import("./youtube/yt-dlp.js");
|
|
169
|
-
const result = await mod.fetchTranscriptWithYtDlp({
|
|
170
|
-
ytDlpPath: options.ytDlpPath,
|
|
171
|
-
transcription,
|
|
172
|
-
mediaCache: options.mediaCache ?? null,
|
|
173
|
-
url: context.url,
|
|
174
|
-
service: "podcast",
|
|
175
|
-
mediaKind: "audio",
|
|
176
|
-
});
|
|
177
|
-
if (result.notes.length > 0)
|
|
178
|
-
notes.push(...result.notes);
|
|
179
|
-
return {
|
|
180
|
-
text: result.text,
|
|
181
|
-
source: result.text ? "yt-dlp" : null,
|
|
182
|
-
attemptedProviders,
|
|
183
|
-
notes: joinNotes(notes),
|
|
184
|
-
metadata: { provider: "podcast", kind: "yt_dlp", transcriptionProvider: result.provider },
|
|
185
|
-
};
|
|
186
|
-
}
|
|
187
|
-
catch (error) {
|
|
188
|
-
return {
|
|
189
|
-
text: null,
|
|
190
|
-
source: null,
|
|
191
|
-
attemptedProviders,
|
|
192
|
-
notes: `yt-dlp transcription failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
193
|
-
metadata: { provider: "podcast", kind: "yt_dlp" },
|
|
194
|
-
};
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
const missing = ensureTranscriptionProvider();
|
|
198
|
-
if (missing)
|
|
199
|
-
return missing;
|
|
200
|
-
return {
|
|
201
|
-
text: null,
|
|
202
|
-
source: null,
|
|
203
|
-
attemptedProviders,
|
|
204
|
-
metadata: { provider: "podcast", reason: "no_enclosure_and_no_yt_dlp" },
|
|
205
|
-
};
|
|
78
|
+
const enclosureResult = await tryFeedEnclosureTranscript(flow);
|
|
79
|
+
if (enclosureResult)
|
|
80
|
+
return enclosureResult;
|
|
81
|
+
const ogAudioResult = await tryOgAudioTranscript(flow);
|
|
82
|
+
if (ogAudioResult)
|
|
83
|
+
return ogAudioResult;
|
|
84
|
+
const ytDlpResult = await tryPodcastYtDlpTranscript(flow);
|
|
85
|
+
if (ytDlpResult)
|
|
86
|
+
return ytDlpResult;
|
|
87
|
+
return buildNoTranscriptResult(flow);
|
|
206
88
|
};
|
|
207
|
-
function extractOgAudioUrl(html) {
|
|
208
|
-
const match = html.match(/<meta\s+property=['"]og:audio['"]\s+content=['"]([^'"]+)['"][^>]*>/i);
|
|
209
|
-
if (!match?.[1])
|
|
210
|
-
return null;
|
|
211
|
-
const candidate = match[1].trim();
|
|
212
|
-
if (!candidate)
|
|
213
|
-
return null;
|
|
214
|
-
if (!/^https?:\/\//i.test(candidate))
|
|
215
|
-
return null;
|
|
216
|
-
return candidate;
|
|
217
|
-
}
|
|
218
89
|
// Test-only exports (not part of the public API; may change without notice).
|
|
219
90
|
export const __test__ = {
|
|
220
91
|
probeRemoteMedia,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"podcast.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/podcast.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"podcast.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/podcast.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,0BAA0B,EAAE,MAAM,4BAA4B,CAAC;AAExE,OAAO,EACL,oCAAoC,EACpC,oCAAoC,GACrC,MAAM,yBAAyB,CAAC;AACjC,OAAO,EAAE,qBAAqB,EAAE,6BAA6B,EAAE,MAAM,wBAAwB,CAAC;AAE9F,OAAO,EAAE,qCAAqC,EAAE,MAAM,qBAAqB,CAAC;AAC5E,OAAO,EACL,mBAAmB,EACnB,cAAc,EACd,eAAe,EACf,WAAW,EACX,mBAAmB,EACnB,kBAAkB,EAClB,gBAAgB,EAGhB,kBAAkB,GACnB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACL,uBAAuB,EACvB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,yBAAyB,GAC1B,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACL,0BAA0B,EAC1B,0BAA0B,EAC1B,sBAAsB,GACvB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,EACL,uCAAuC,EACvC,qCAAqC,GACtC,MAAM,+BAA+B,CAAC;AAEvC,MAAM,CAAC,MAAM,SAAS,GAAG,CAAC,EAAE,GAAG,EAAE,IAAI,EAAmB,EAAW,EAAE;IACnE,iFAAiF;IACjF,4EAA4E;IAC5E,IAAI,gBAAgB,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,sBAAsB,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1E,IAAI,6BAA6B,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACzD,OAAO,qBAAqB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzC,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,eAAe,GAAG,KAAK,EAClC,OAAwB,EACxB,OAA6B,EACJ,EAAE;IAC3B,MAAM,kBAAkB,GAAyC,EAAE,CAAC;IACpE,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,aAAa,GAAG,0BAA0B,CAAC,OAAO,CAAC,CAAC;IAE1D,MAAM,QAAQ,GAAG,CAAC,QAAsD,EAAE,EAAE;QAC1E,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,QAAQ,CAAC;YAAE,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAChF,CAAC,CAAC;IAEF,MAAM,yBAAyB,GAAG,MAAM,qCAAqC,CAAC;QAC5E,aAAa;QACb,SAAS,EAAE,OAAO,CAAC,SAAS;KAC7B,CAAC,CAAC;IAEH,MAAM,2BAA2B,GAAG,GAA0B,EAAE;QAC9D,OAAO,CAAC,yBAAyB,CAAC,aAAa;YAC7C,CAAC,CAAC,uCAAuC,CAAC;gBACtC,kBAAkB;gBAClB,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,4BAA4B,EAAE;aACxE,CAAC;YACJ,CAAC,CAAC,IAAI,CAAC;IACX,CAAC,CAAC;IAEF,MAAM,QAAQ,GAAG;QACf,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,OAAO,EAAE,SAAkB;QAC3B,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,IAAI;KACvC,CAAC;IAEF,MAAM,UAAU,GAAG,CAAC,OAA0B,EAAgC,EAAE,CAC9E,kBAAkB,CAAC;QACjB,SAAS,EAAE,OAAO,CAAC,KAAK;QACxB,aAAa;QACb,KAAK;QACL,QAAQ;QACR,GAAG,OAAO;KACX,CAAC,CAAC;IAEL,MAAM,IAAI,GAAuB;QAC/B,OAAO;QACP,OAAO;QACP,aAAa;QACb,QAAQ,EAAE,OAAO,OAAO,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI;QAChE,kBAAkB;QAClB,KAAK;QACL,QAAQ;QACR,2BAA2B;QAC3B,UAAU;KACX,CAAC;IAEF,MAAM,YAAY,GAAG,MAAM,4BAA4B,CAAC,IAAI,CAAC,CAAC;IAC9D,IAAI,YAAY;QAAE,OAAO,YAAY,CAAC;IAEtC,MAAM,aAAa,GAAG,MAAM,sBAAsB,CAAC,IAAI,CAAC,CAAC;IACzD,IAAI,aAAa;QAAE,OAAO,aAAa,CAAC;IAExC,MAAM,iBAAiB,GAAG,MAAM,oCAAoC,CAAC,IAAI,CAAC,CAAC;IAC3E,IAAI,iBAAiB;QAAE,OAAO,iBAAiB,CAAC;IAEhD,MAAM,mBAAmB,GAAG,MAAM,oCAAoC,CAAC,IAAI,CAAC,CAAC;IAC7E,IAAI,mBAAmB;QAAE,OAAO,mBAAmB,CAAC;IAEpD,MAAM,eAAe,GAAG,MAAM,0BAA0B,CAAC,IAAI,CAAC,CAAC;IAC/D,IAAI,eAAe;QAAE,OAAO,eAAe,CAAC;IAE5C,MAAM,aAAa,GAAG,MAAM,oBAAoB,CAAC,IAAI,CAAC,CAAC;IACvD,IAAI,aAAa;QAAE,OAAO,aAAa,CAAC;IAExC,MAAM,WAAW,GAAG,MAAM,yBAAyB,CAAC,IAAI,CAAC,CAAC;IAC1D,IAAI,WAAW;QAAE,OAAO,WAAW,CAAC;IAEpC,OAAO,uBAAuB,CAAC,IAAI,CAAC,CAAC;AACvC,CAAC,CAAC;AAEF,6EAA6E;AAC7E,MAAM,CAAC,MAAM,QAAQ,GAAG;IACtB,gBAAgB;IAChB,mBAAmB;IACnB,cAAc;IACd,mBAAmB;IACnB,kBAAkB;IAClB,eAAe;IACf,oBAAoB;IACpB,0BAA0B;IAC1B,0BAA0B;IAC1B,qCAAqC;IACrC,WAAW;CACZ,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { buildMissingTranscriptionProviderNote } from "../../../transcription/whisper/provider-setup.js";
|
|
2
|
+
import { resolveTranscriptionAvailability, } from "./transcription-start.js";
|
|
3
|
+
export async function resolveTranscriptProviderCapabilities({ transcription, ytDlpPath, }) {
|
|
4
|
+
const availability = await resolveTranscriptionAvailability({ transcription });
|
|
5
|
+
return {
|
|
6
|
+
availability,
|
|
7
|
+
canTranscribe: availability.hasAnyProvider,
|
|
8
|
+
canRunYtDlp: Boolean(ytDlpPath && availability.hasAnyProvider),
|
|
9
|
+
missingProviderNote: buildMissingTranscriptionProviderNote(),
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
export function buildMissingTranscriptionProviderResult(args) {
|
|
13
|
+
const notes = args.notes?.filter((note) => note.trim().length > 0) ?? [];
|
|
14
|
+
return {
|
|
15
|
+
text: null,
|
|
16
|
+
source: null,
|
|
17
|
+
attemptedProviders: args.attemptedProviders,
|
|
18
|
+
metadata: args.metadata,
|
|
19
|
+
notes: [buildMissingTranscriptionProviderNote(), ...notes].join("; "),
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=transcription-capability.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcription-capability.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/transcription-capability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,qCAAqC,EAAE,MAAM,kDAAkD,CAAC;AAGzG,OAAO,EACL,gCAAgC,GAEjC,MAAM,0BAA0B,CAAC;AASlC,MAAM,CAAC,KAAK,UAAU,qCAAqC,CAAC,EAC1D,aAAa,EACb,SAAS,GAIV;IACC,MAAM,YAAY,GAAG,MAAM,gCAAgC,CAAC,EAAE,aAAa,EAAE,CAAC,CAAC;IAC/E,OAAO;QACL,YAAY;QACZ,aAAa,EAAE,YAAY,CAAC,cAAc;QAC1C,WAAW,EAAE,OAAO,CAAC,SAAS,IAAI,YAAY,CAAC,cAAc,CAAC;QAC9D,mBAAmB,EAAE,qCAAqC,EAAE;KAC7D,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,uCAAuC,CAAC,IAIvD;IACC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACzE,OAAO;QACL,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,IAAI;QACZ,kBAAkB,EAAE,IAAI,CAAC,kBAAkB;QAC3C,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,KAAK,EAAE,CAAC,qCAAqC,EAAE,EAAE,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;KACtE,CAAC;AACJ,CAAC"}
|