@steipete/summarize-core 0.11.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/content/bun.js +21 -0
- package/dist/esm/content/bun.js.map +1 -0
- package/dist/esm/content/direct-media.js +100 -0
- package/dist/esm/content/direct-media.js.map +1 -0
- package/dist/esm/content/index.js +2 -1
- package/dist/esm/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/client.js +6 -0
- package/dist/esm/content/link-preview/client.js.map +1 -1
- package/dist/esm/content/link-preview/content/fetcher.js +19 -2
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
- package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -1
- package/dist/esm/content/link-preview/content/html.js.map +1 -1
- package/dist/esm/content/link-preview/content/index.js +29 -12
- package/dist/esm/content/link-preview/content/index.js.map +1 -1
- package/dist/esm/content/link-preview/content/utils.js.map +1 -1
- package/dist/esm/content/link-preview/content/video.js +1 -1
- package/dist/esm/content/link-preview/content/video.js.map +1 -1
- package/dist/esm/content/local-file.js +58 -0
- package/dist/esm/content/local-file.js.map +1 -0
- package/dist/esm/content/transcript/index.js +2 -0
- package/dist/esm/content/transcript/index.js.map +1 -1
- package/dist/esm/content/transcript/providers/generic-direct-media.js +47 -0
- package/dist/esm/content/transcript/providers/generic-direct-media.js.map +1 -0
- package/dist/esm/content/transcript/providers/generic-embedded.js +126 -0
- package/dist/esm/content/transcript/providers/generic-embedded.js.map +1 -0
- package/dist/esm/content/transcript/providers/generic-twitter.js +78 -0
- package/dist/esm/content/transcript/providers/generic-twitter.js.map +1 -0
- package/dist/esm/content/transcript/providers/generic.js +12 -248
- package/dist/esm/content/transcript/providers/generic.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/media.js +9 -1
- package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/provider-flow.js +157 -0
- package/dist/esm/content/transcript/providers/podcast/provider-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss-feed.js +123 -0
- package/dist/esm/content/transcript/providers/podcast/rss-feed.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss-transcript.js +113 -0
- package/dist/esm/content/transcript/providers/podcast/rss-transcript.js.map +1 -0
- package/dist/esm/content/transcript/providers/podcast/rss.js +2 -226
- package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -1
- package/dist/esm/content/transcript/providers/podcast.js +26 -155
- package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
- package/dist/esm/content/transcript/providers/transcription-capability.js +22 -0
- package/dist/esm/content/transcript/providers/transcription-capability.js.map +1 -0
- package/dist/esm/content/transcript/providers/transcription-start.js +43 -32
- package/dist/esm/content/transcript/providers/transcription-start.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/api.js +3 -2
- package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/captions-player.js +173 -0
- package/dist/esm/content/transcript/providers/youtube/captions-player.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions-shared.js +8 -0
- package/dist/esm/content/transcript/providers/youtube/captions-shared.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions-transcript.js +361 -0
- package/dist/esm/content/transcript/providers/youtube/captions-transcript.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/captions.js +2 -557
- package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube/provider-flow.js +217 -0
- package/dist/esm/content/transcript/providers/youtube/provider-flow.js.map +1 -0
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +33 -9
- package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
- package/dist/esm/content/transcript/providers/youtube.js +42 -194
- package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
- package/dist/esm/content/transcript/transcription-config.js +24 -4
- package/dist/esm/content/transcript/transcription-config.js.map +1 -1
- package/dist/esm/content/url.js +5 -33
- package/dist/esm/content/url.js.map +1 -1
- package/dist/esm/processes.js.map +1 -1
- package/dist/esm/prompts/format.js +6 -0
- package/dist/esm/prompts/format.js.map +1 -1
- package/dist/esm/prompts/link-summary.js +27 -3
- package/dist/esm/prompts/link-summary.js.map +1 -1
- package/dist/esm/transcription/onnx-cli.js.map +1 -1
- package/dist/esm/transcription/whisper/assemblyai.js +132 -0
- package/dist/esm/transcription/whisper/assemblyai.js.map +1 -0
- package/dist/esm/transcription/whisper/chunking.js +64 -0
- package/dist/esm/transcription/whisper/chunking.js.map +1 -0
- package/dist/esm/transcription/whisper/cloud-providers.js +69 -0
- package/dist/esm/transcription/whisper/cloud-providers.js.map +1 -0
- package/dist/esm/transcription/whisper/core.js +320 -390
- package/dist/esm/transcription/whisper/core.js.map +1 -1
- package/dist/esm/transcription/whisper/gemini.js +324 -0
- package/dist/esm/transcription/whisper/gemini.js.map +1 -0
- package/dist/esm/transcription/whisper/groq.js +62 -1
- package/dist/esm/transcription/whisper/groq.js.map +1 -1
- package/dist/esm/transcription/whisper/preferences.js +16 -0
- package/dist/esm/transcription/whisper/preferences.js.map +1 -0
- package/dist/esm/transcription/whisper/provider-setup.js +62 -0
- package/dist/esm/transcription/whisper/provider-setup.js.map +1 -0
- package/dist/esm/transcription/whisper/remote-provider-attempts.js +189 -0
- package/dist/esm/transcription/whisper/remote-provider-attempts.js.map +1 -0
- package/dist/esm/transcription/whisper/remote.js +220 -0
- package/dist/esm/transcription/whisper/remote.js.map +1 -0
- package/dist/esm/transcription/whisper/whisper-cpp.js +21 -18
- package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -1
- package/dist/types/content/bun.d.ts +6 -0
- package/dist/types/content/direct-media.d.ts +9 -0
- package/dist/types/content/index.d.ts +2 -1
- package/dist/types/content/link-preview/client.d.ts +3 -1
- package/dist/types/content/link-preview/content/fetcher.d.ts +1 -1
- package/dist/types/content/link-preview/content/html.d.ts +1 -1
- package/dist/types/content/link-preview/deps.d.ts +8 -2
- package/dist/types/content/link-preview/types.d.ts +1 -1
- package/dist/types/content/local-file.d.ts +16 -0
- package/dist/types/content/transcript/providers/generic-direct-media.d.ts +11 -0
- package/dist/types/content/transcript/providers/generic-embedded.d.ts +16 -0
- package/dist/types/content/transcript/providers/generic-twitter.d.ts +11 -0
- package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +3 -0
- package/dist/types/content/transcript/providers/podcast/media.d.ts +4 -2
- package/dist/types/content/transcript/providers/podcast/provider-flow.d.ts +7 -0
- package/dist/types/content/transcript/providers/podcast/rss-feed.d.ts +15 -0
- package/dist/types/content/transcript/providers/podcast/rss-transcript.d.ts +12 -0
- package/dist/types/content/transcript/providers/podcast/rss.d.ts +2 -24
- package/dist/types/content/transcript/providers/transcription-capability.d.ts +18 -0
- package/dist/types/content/transcript/providers/transcription-start.d.ts +11 -3
- package/dist/types/content/transcript/providers/youtube/captions-player.d.ts +12 -0
- package/dist/types/content/transcript/providers/youtube/captions-shared.d.ts +42 -0
- package/dist/types/content/transcript/providers/youtube/captions-transcript.d.ts +4 -0
- package/dist/types/content/transcript/providers/youtube/captions.d.ts +2 -19
- package/dist/types/content/transcript/providers/youtube/provider-flow.d.ts +34 -0
- package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +4 -2
- package/dist/types/content/transcript/transcription-config.d.ts +6 -0
- package/dist/types/content/transcript/types.d.ts +1 -0
- package/dist/types/content/url.d.ts +2 -3
- package/dist/types/prompts/format.d.ts +1 -0
- package/dist/types/prompts/link-summary.d.ts +2 -1
- package/dist/types/transcription/whisper/assemblyai.d.ts +17 -0
- package/dist/types/transcription/whisper/chunking.d.ts +11 -0
- package/dist/types/transcription/whisper/cloud-providers.d.ts +22 -0
- package/dist/types/transcription/whisper/core.d.ts +12 -14
- package/dist/types/transcription/whisper/gemini.d.ts +14 -0
- package/dist/types/transcription/whisper/preferences.d.ts +4 -0
- package/dist/types/transcription/whisper/provider-setup.d.ts +30 -0
- package/dist/types/transcription/whisper/remote-provider-attempts.d.ts +51 -0
- package/dist/types/transcription/whisper/remote.d.ts +51 -0
- package/dist/types/transcription/whisper/types.d.ts +1 -1
- package/dist/types/transcription/whisper/whisper-cpp.d.ts +4 -3
- package/package.json +14 -10
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { statSync } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
4
|
+
import { inferDirectMediaKind, resolveDirectMediaExtension, resolveDirectMediaType, } from "./direct-media.js";
|
|
5
|
+
export function isLocalFileUrl(value) {
|
|
6
|
+
try {
|
|
7
|
+
return new URL(value).protocol === "file:";
|
|
8
|
+
}
|
|
9
|
+
catch {
|
|
10
|
+
return false;
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
export function resolveLocalFileReference(value) {
|
|
14
|
+
try {
|
|
15
|
+
const filePath = isLocalFileUrl(value)
|
|
16
|
+
? fileURLToPath(stripFileUrlSearchAndHash(new URL(value)))
|
|
17
|
+
: path.resolve(value);
|
|
18
|
+
const stat = statSync(filePath);
|
|
19
|
+
if (!stat.isFile())
|
|
20
|
+
return null;
|
|
21
|
+
return {
|
|
22
|
+
filePath,
|
|
23
|
+
fileUrl: pathToFileURL(filePath).href,
|
|
24
|
+
filename: path.basename(filePath),
|
|
25
|
+
mtimeMs: stat.mtimeMs ?? 0,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
export function resolveLocalFileMtime(value) {
|
|
33
|
+
return resolveLocalFileReference(value)?.mtimeMs ?? null;
|
|
34
|
+
}
|
|
35
|
+
export function resolveLocalDirectMediaSource(value, kindHint = null) {
|
|
36
|
+
const file = resolveLocalFileReference(value);
|
|
37
|
+
if (!file)
|
|
38
|
+
return null;
|
|
39
|
+
const mediaKind = kindHint ?? inferDirectMediaKind(file.filePath) ?? inferDirectMediaKind(file.fileUrl);
|
|
40
|
+
if (!mediaKind)
|
|
41
|
+
return null;
|
|
42
|
+
const mediaType = resolveDirectMediaType(file.filePath, mediaKind) ??
|
|
43
|
+
resolveDirectMediaType(file.fileUrl, mediaKind);
|
|
44
|
+
if (!mediaType)
|
|
45
|
+
return null;
|
|
46
|
+
return {
|
|
47
|
+
...file,
|
|
48
|
+
extension: resolveDirectMediaExtension(file.filePath),
|
|
49
|
+
mediaKind,
|
|
50
|
+
mediaType,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
function stripFileUrlSearchAndHash(url) {
|
|
54
|
+
url.search = "";
|
|
55
|
+
url.hash = "";
|
|
56
|
+
return url;
|
|
57
|
+
}
|
|
58
|
+
//# sourceMappingURL=local-file.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"local-file.js","sourceRoot":"","sources":["../../../src/content/local-file.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACnC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACxD,OAAO,EACL,oBAAoB,EACpB,2BAA2B,EAC3B,sBAAsB,GAEvB,MAAM,mBAAmB,CAAC;AAe3B,MAAM,UAAU,cAAc,CAAC,KAAa;IAC1C,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC;IAC7C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,KAAa;IACrD,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC;YACpC,CAAC,CAAC,aAAa,CAAC,yBAAyB,CAAC,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC;YAC1D,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACxB,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAAE,OAAO,IAAI,CAAC;QAChC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,aAAa,CAAC,QAAQ,CAAC,CAAC,IAAI;YACrC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;YACjC,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,CAAC;SAC3B,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,KAAa;IACjD,OAAO,yBAAyB,CAAC,KAAK,CAAC,EAAE,OAAO,IAAI,IAAI,CAAC;AAC3D,CAAC;AAED,MAAM,UAAU,6BAA6B,CAC3C,KAAa,EACb,WAAmC,IAAI;IAEvC,MAAM,IAAI,GAAG,yBAAyB,CAAC,KAAK,CAAC,CAAC;IAC9C,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,MAAM,SAAS,GACb,QAAQ,IAAI,oBAAoB,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,oBAAoB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACxF,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC;IAC5B,MAAM,SAAS,GACb,sBAAsB,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;QAChD,sBAAsB,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAClD,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC;IAC5B,OAAO;QACL,GAAG,IAAI;QACP,SAAS,EAAE,2BAA2B,CAAC,IAAI,CAAC,QAAQ,CAAC;QACrD,SAAS;QACT,SAAS;KACV,CAAC;AACJ,CAAC;AAED,SAAS,yBAAyB,CAAC,GAAQ;IACzC,GAAG,CAAC,MAAM,GAAG,EAAE,CAAC;IAChB,GAAG,CAAC,IAAI,GAAG,EAAE,CAAC;IACd,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -57,6 +57,7 @@ export const resolveTranscriptForLink = async (url, html, deps, { youtubeTranscr
|
|
|
57
57
|
transcription: deps.transcription ?? null,
|
|
58
58
|
falApiKey: deps.falApiKey,
|
|
59
59
|
groqApiKey: deps.groqApiKey,
|
|
60
|
+
geminiApiKey: deps.geminiApiKey,
|
|
60
61
|
openaiApiKey: deps.openaiApiKey,
|
|
61
62
|
});
|
|
62
63
|
const providerResult = await executeProvider(provider, baseContext, {
|
|
@@ -68,6 +69,7 @@ export const resolveTranscriptForLink = async (url, html, deps, { youtubeTranscr
|
|
|
68
69
|
transcription,
|
|
69
70
|
falApiKey: transcription.falApiKey,
|
|
70
71
|
groqApiKey: transcription.groqApiKey,
|
|
72
|
+
geminiApiKey: transcription.geminiApiKey,
|
|
71
73
|
openaiApiKey: transcription.openaiApiKey,
|
|
72
74
|
mediaCache: deps.mediaCache ?? null,
|
|
73
75
|
resolveTwitterCookies: deps.resolveTwitterCookies ?? null,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/content/transcript/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/content/transcript/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,eAAe,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AACxF,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,0BAA0B,EAAE,MAAM,2BAA2B,CAAC;AAOvE,OAAO,EACL,iCAAiC,EACjC,qBAAqB,IAAI,6BAA6B,EACtD,YAAY,IAAI,oBAAoB,GACrC,MAAM,YAAY,CAAC;AAWpB,MAAM,SAAS,GAAqB;IAClC,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;IAC7E,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;IAC7E,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;CAC9E,CAAC;AACF,MAAM,mBAAmB,GAAG,SAAS,CAAC;AAEtC,MAAM,CAAC,MAAM,wBAAwB,GAAG,KAAK,EAC3C,GAAW,EACX,IAAmB,EACnB,IAAqB,EACrB,EACE,qBAAqB,EACrB,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,SAAS,EAAE,iBAAiB,EAC5B,SAAS,MACmB,EAAE,EACD,EAAE;IACjC,MAAM,aAAa,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;IACjC,MAAM,kBAAkB,GACtB,CAAC,oBAAoB,CAAC,aAAa,CAAC,IAAI,IAAI;QAC1C,CAAC,CAAC,MAAM,iCAAiC,CAAC,IAAI,CAAC;QAC/C,CAAC,CAAC,IAAI,CAAC;IACX,MAAM,YAAY,GAAG,kBAAkB,IAAI,aAAa,CAAC;IACzD,MAAM,WAAW,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAAC;IACrD,MAAM,WAAW,GAAoB,EAAE,GAAG,EAAE,YAAY,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;IAC9E,MAAM,QAAQ,GAAmB,cAAc,CAAC,WAAW,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAc,iBAAiB,IAAI,SAAS,CAAC;IAE5D,MAAM,YAAY,GAAG,MAAM,mBAAmB,CAAC;QAC7C,GAAG,EAAE,aAAa;QAClB,SAAS;QACT,eAAe,EAAE,IAAI,CAAC,eAAe;QACrC,oBAAoB,EAAE,OAAO,CAAC,oBAAoB,CAAC;QACnD,SAAS,EAAE,SAAS,IAAI,IAAI;KAC7B,CAAC,CAAC;IAEH,MAAM,WAAW,GAA0B;QACzC,SAAS;QACT,WAAW,EAAE,YAAY,CAAC,WAAW,CAAC,WAAW;QACjD,YAAY,EAAE,YAAY,CAAC,WAAW,CAAC,YAAY;QACnD,QAAQ,EAAE,YAAY,CAAC,WAAW,CAAC,QAAQ;QAC3C,kBAAkB,EAAE,EAAE;QACtB,KAAK,EAAE,YAAY,CAAC,WAAW,CAAC,KAAK,IAAI,IAAI;KAC9C,CAAC;IAEF,IAAI,YAAY,CAAC,UAAU,EAAE,CAAC;QAC5B,OAAO;YACL,GAAG,YAAY,CAAC,UAAU;YAC1B,WAAW;SACZ,CAAC;IACJ,CAAC;IAED,MAAM,oBAAoB,GAAG,QAAQ,CAAC,EAAE,KAAK,SAAS,IAAI,QAAQ,CAAC,EAAE,KAAK,SAAS,CAAC;IACpF,IAAI,oBAAoB,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,IAAI,EAAE,kBAAkB;YACxB,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,IAAI,EACF,QAAQ,CAAC,EAAE,KAAK,SAAS;gBACvB,CAAC,CAAC,+BAA+B;gBACjC,CAAC,CAAC,+BAA+B;SACtC,CAAC,CAAC;IACL,CAAC;IAED,MAAM,aAAa,GAAG,0BAA0B,CAAC;QAC/C,GAAG,EAAE,IAAI,CAAC,GAAG;QACb,aAAa,EAAE,IAAI,CAAC,aAAa,IAAI,IAAI;QACzC,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,UAAU,EAAE,IAAI,CAAC,UAAU;QAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,YAAY,EAAE,IAAI,CAAC,YAAY;KAChC,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,MAAM,eAAe,CAAC,QAAQ,EAAE,WAAW,EAAE;QAClE,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,GAAG,EAAE,IAAI,CAAC,GAAG;QACb,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;QAC7C,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,aAAa;QACb,SAAS,EAAE,aAAa,CAAC,SAAS;QAClC,UAAU,EAAE,aAAa,CAAC,UAAU;QACpC,YAAY,EAAE,aAAa,CAAC,YAAY;QACxC,YAAY,EAAE,aAAa,CAAC,YAAY;QACxC,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,qBAAqB,EAAE,IAAI,CAAC,qBAAqB,IAAI,IAAI;QACzD,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,qBAAqB,EAAE,qBAAqB,IAAI,MAAM;QACtD,mBAAmB,EAAE,mBAAmB,IAAI,MAAM;QAClD,aAAa,EAAE,aAAa,IAAI,IAAI;QACpC,oBAAoB,EAAE,oBAAoB,IAAI,KAAK;KACpD,CAAC,CAAC;IAEH,IAAI,oBAAoB,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,IAAI,EAAE,iBAAiB;YACvB,GAAG,EAAE,aAAa;YAClB,EAAE,EAAE,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAClE,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,cAAc,CAAC,MAAM;YAC7B,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,EAAE,IAAI,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE;SACtF,CAAC,CAAC;IACL,CAAC;IAED,WAAW,CAAC,QAAQ,GAAG,cAAc,CAAC,MAAM,CAAC;IAC7C,WAAW,CAAC,kBAAkB,GAAG,cAAc,CAAC,kBAAkB,CAAC;IACnE,WAAW,CAAC,YAAY,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC1F,IAAI,cAAc,CAAC,KAAK,EAAE,CAAC;QACzB,WAAW,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,cAAc,CAAC,KAAK,CAAC,CAAC;IAC1E,CAAC;IAED,IAAI,cAAc,CAAC,MAAM,KAAK,IAAI,IAAI,cAAc,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;QACnE,IAAI,oBAAoB,EAAE,CAAC;YACzB,MAAM,QAAQ,GAAG,EAAE,GAAG,CAAC,cAAc,CAAC,QAAQ,IAAI,EAAE,CAAC,EAAE,CAAC;YACxD,IAAI,cAAc,CAAC,QAAQ,IAAI,cAAc,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClE,QAAQ,CAAC,UAAU,GAAG,IAAI,CAAC;gBAC3B,QAAQ,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC;YAC9C,CAAC;iBAAM,IAAI,QAAQ,CAAC,UAAU,IAAI,IAAI,EAAE,CAAC;gBACvC,QAAQ,CAAC,UAAU,GAAG,KAAK,CAAC;YAC9B,CAAC;YACD,cAAc,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACrC,CAAC;aAAM,IAAI,cAAc,CAAC,QAAQ,IAAI,cAAc,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzE,cAAc,CAAC,QAAQ,GAAG;gBACxB,GAAG,CAAC,cAAc,CAAC,QAAQ,IAAI,EAAE,CAAC;gBAClC,QAAQ,EAAE,cAAc,CAAC,QAAQ;aAClC,CAAC;QACJ,CAAC;QACD,MAAM,oBAAoB,CAAC;YACzB,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,WAAW;YACX,MAAM,EAAE,cAAc;YACtB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,SAAS;SACV,CAAC,CAAC;IACL,CAAC;IAED,IAAI,CAAC,cAAc,CAAC,IAAI,IAAI,YAAY,CAAC,MAAM,EAAE,OAAO,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QACnF,WAAW,CAAC,WAAW,GAAG,UAAU,CAAC;QACrC,WAAW,CAAC,QAAQ,GAAG,eAAe,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACnE,WAAW,CAAC,YAAY,GAAG,OAAO,CAChC,YAAY,CAAC,MAAM,CAAC,OAAO,IAAI,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CACtE,CAAC;QACF,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,+DAA+D,CAChE,CAAC;QAEF,OAAO;YACL,IAAI,EAAE,YAAY,CAAC,MAAM,CAAC,OAAO;YACjC,MAAM,EAAE,WAAW,CAAC,QAAQ;YAC5B,QAAQ,EAAE,YAAY,CAAC,MAAM,CAAC,QAAQ,IAAI,IAAI;YAC9C,WAAW;YACX,QAAQ,EAAE,oBAAoB;gBAC5B,CAAC,CAAC,2BAA2B,CAAC,YAAY,CAAC,MAAM,CAAC,QAAQ,CAAC;gBAC3D,CAAC,CAAC,IAAI;SACT,CAAC;IACJ,CAAC;IAED,OAAO;QACL,IAAI,EAAE,cAAc,CAAC,IAAI;QACzB,MAAM,EAAE,cAAc,CAAC,MAAM;QAC7B,QAAQ,EAAE,cAAc,CAAC,QAAQ,IAAI,IAAI;QACzC,WAAW;QACX,QAAQ,EAAE,oBAAoB,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;KAC1E,CAAC;AACJ,CAAC,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,GAAW,EAAiB,EAAE;IACxD,IAAI,oBAAoB,CAAC,GAAG,CAAC,EAAE,CAAC;QAC9B,OAAO,6BAA6B,CAAC,GAAG,CAAC,CAAC;IAC5C,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC,CAAC;AAEF,MAAM,cAAc,GAAG,CAAC,OAAwB,EAAkB,EAAE;IAClE,MAAM,qBAAqB,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,KAAK,mBAAmB,CAAC,CAAC;IAEhG,MAAM,mBAAmB,GAAG,SAAS,CAAC,IAAI,CACxC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,KAAK,mBAAmB,IAAI,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC,CACjF,CAAC;IACF,IAAI,mBAAmB,EAAE,CAAC;QACxB,OAAO,mBAAmB,CAAC;IAC7B,CAAC;IAED,IAAI,qBAAqB,EAAE,CAAC;QAC1B,OAAO,qBAAqB,CAAC;IAC/B,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;AACnE,CAAC,CAAC;AAEF,MAAM,eAAe,GAAG,KAAK,EAC3B,QAAwB,EACxB,OAAwB,EACxB,OAA6B,EACJ,EAAE,CAAC,QAAQ,CAAC,eAAe,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;AAEzE,MAAM,UAAU,GAAG,CAAC,QAAmC,EAAE,IAAY,EAAU,EAAE;IAC/E,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,GAAG,QAAQ,KAAK,IAAI,EAAE,CAAC;AAChC,CAAC,CAAC;AAEF,MAAM,2BAA2B,GAAG,CAAC,QAAyC,EAAE,EAAE;IAChF,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC3B,MAAM,QAAQ,GAAI,QAAmC,CAAC,QAAQ,CAAC;IAC/D,OAAO,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;QACnD,CAAC,CAAE,QAA6C;QAChD,CAAC,CAAC,IAAI,CAAC;AACX,CAAC,CAAC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { normalizeTranscriptText } from "../normalize.js";
|
|
2
|
+
import { resolveTranscriptProviderCapabilities } from "./transcription-capability.js";
|
|
3
|
+
export async function fetchDirectMediaTranscript({ url, options, transcription, notes, attemptedProviders, kind, }) {
|
|
4
|
+
if (!options.ytDlpPath) {
|
|
5
|
+
notes.push("yt-dlp is not configured (set YT_DLP_PATH or ensure yt-dlp is on PATH)");
|
|
6
|
+
return null;
|
|
7
|
+
}
|
|
8
|
+
const transcriptionCapabilities = await resolveTranscriptProviderCapabilities({
|
|
9
|
+
transcription,
|
|
10
|
+
ytDlpPath: options.ytDlpPath,
|
|
11
|
+
});
|
|
12
|
+
if (!transcriptionCapabilities.canTranscribe) {
|
|
13
|
+
notes.push(transcriptionCapabilities.missingProviderNote);
|
|
14
|
+
return null;
|
|
15
|
+
}
|
|
16
|
+
attemptedProviders.push("yt-dlp");
|
|
17
|
+
const mod = await import("./youtube/yt-dlp.js");
|
|
18
|
+
const ytdlpResult = await mod.fetchTranscriptWithYtDlp({
|
|
19
|
+
ytDlpPath: options.ytDlpPath,
|
|
20
|
+
transcription,
|
|
21
|
+
mediaCache: options.mediaCache ?? null,
|
|
22
|
+
url,
|
|
23
|
+
onProgress: options.onProgress ?? null,
|
|
24
|
+
service: "generic",
|
|
25
|
+
mediaKind: kind ?? options.mediaKindHint ?? null,
|
|
26
|
+
});
|
|
27
|
+
if (ytdlpResult.notes.length > 0)
|
|
28
|
+
notes.push(...ytdlpResult.notes);
|
|
29
|
+
if (ytdlpResult.text) {
|
|
30
|
+
return {
|
|
31
|
+
text: normalizeTranscriptText(ytdlpResult.text),
|
|
32
|
+
source: "yt-dlp",
|
|
33
|
+
attemptedProviders,
|
|
34
|
+
metadata: {
|
|
35
|
+
provider: "generic",
|
|
36
|
+
kind: kind ?? "media",
|
|
37
|
+
transcriptionProvider: ytdlpResult.provider,
|
|
38
|
+
},
|
|
39
|
+
notes: notes.length > 0 ? notes.join("; ") : null,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
if (ytdlpResult.error) {
|
|
43
|
+
notes.push(`yt-dlp transcription failed: ${ytdlpResult.error.message}`);
|
|
44
|
+
}
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=generic-direct-media.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generic-direct-media.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/generic-direct-media.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAG1D,OAAO,EAAE,qCAAqC,EAAE,MAAM,+BAA+B,CAAC;AAEtF,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAAC,EAC/C,GAAG,EACH,OAAO,EACP,aAAa,EACb,KAAK,EACL,kBAAkB,EAClB,IAAI,GAQL;IACC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC;QACvB,KAAK,CAAC,IAAI,CAAC,wEAAwE,CAAC,CAAC;QACrF,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,yBAAyB,GAAG,MAAM,qCAAqC,CAAC;QAC5E,aAAa;QACb,SAAS,EAAE,OAAO,CAAC,SAAS;KAC7B,CAAC,CAAC;IACH,IAAI,CAAC,yBAAyB,CAAC,aAAa,EAAE,CAAC;QAC7C,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC,mBAAmB,CAAC,CAAC;QAC1D,OAAO,IAAI,CAAC;IACd,CAAC;IAED,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAElC,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;IAChD,MAAM,WAAW,GAAG,MAAM,GAAG,CAAC,wBAAwB,CAAC;QACrD,SAAS,EAAE,OAAO,CAAC,SAAS;QAC5B,aAAa;QACb,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,IAAI;QACtC,GAAG;QACH,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,IAAI;QACtC,OAAO,EAAE,SAAS;QAClB,SAAS,EAAE,IAAI,IAAI,OAAO,CAAC,aAAa,IAAI,IAAI;KACjD,CAAC,CAAC;IACH,IAAI,WAAW,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,KAAK,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;IAEnE,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC;QACrB,OAAO;YACL,IAAI,EAAE,uBAAuB,CAAC,WAAW,CAAC,IAAI,CAAC;YAC/C,MAAM,EAAE,QAAQ;YAChB,kBAAkB;YAClB,QAAQ,EAAE;gBACR,QAAQ,EAAE,SAAS;gBACnB,IAAI,EAAE,IAAI,IAAI,OAAO;gBACrB,qBAAqB,EAAE,WAAW,CAAC,QAAQ;aAC5C;YACD,KAAK,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;SAClD,CAAC;IACJ,CAAC;IAED,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC,gCAAgC,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;IAC1E,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import { load } from "cheerio";
|
|
2
|
+
import { isDirectMediaUrl } from "../../url.js";
|
|
3
|
+
import { jsonTranscriptToPlainText, jsonTranscriptToSegments, vttToPlainText, vttToSegments, } from "../parse.js";
|
|
4
|
+
export function detectEmbeddedMedia(html, baseUrl) {
|
|
5
|
+
const $ = load(html);
|
|
6
|
+
const trackCandidates = [];
|
|
7
|
+
$('track[kind="captions"], track[kind="subtitles"]').each((_idx, el) => {
|
|
8
|
+
const src = $(el).attr("src")?.trim();
|
|
9
|
+
if (!src)
|
|
10
|
+
return;
|
|
11
|
+
const url = resolveAbsoluteUrl(src, baseUrl);
|
|
12
|
+
if (!url)
|
|
13
|
+
return;
|
|
14
|
+
const type = $(el).attr("type")?.trim() ?? null;
|
|
15
|
+
const language = $(el).attr("srclang")?.trim() ?? $(el).attr("lang")?.trim() ?? null;
|
|
16
|
+
trackCandidates.push({ url, type, language });
|
|
17
|
+
});
|
|
18
|
+
const track = selectPreferredTrack(trackCandidates);
|
|
19
|
+
const videoUrl = resolveFirstMediaUrl($, baseUrl, "video");
|
|
20
|
+
const audioUrl = resolveFirstMediaUrl($, baseUrl, "audio");
|
|
21
|
+
const ogVideo = resolveOgMediaUrl($, baseUrl, "video");
|
|
22
|
+
const ogAudio = resolveOgMediaUrl($, baseUrl, "audio");
|
|
23
|
+
if (videoUrl || ogVideo) {
|
|
24
|
+
return { kind: "video", mediaUrl: pickMediaUrl([videoUrl, ogVideo]), track };
|
|
25
|
+
}
|
|
26
|
+
if (audioUrl || ogAudio) {
|
|
27
|
+
return { kind: "audio", mediaUrl: pickMediaUrl([audioUrl, ogAudio]), track };
|
|
28
|
+
}
|
|
29
|
+
const hasVideoTag = $("video").length > 0;
|
|
30
|
+
const hasAudioTag = !hasVideoTag && $("audio").length > 0;
|
|
31
|
+
if (track || hasVideoTag || hasAudioTag) {
|
|
32
|
+
return { kind: hasAudioTag ? "audio" : "video", mediaUrl: null, track };
|
|
33
|
+
}
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
export async function fetchCaptionTrack(fetchImpl, track, notes, includeSegments) {
|
|
37
|
+
try {
|
|
38
|
+
const res = await fetchImpl(track.url, {
|
|
39
|
+
headers: { accept: "text/vtt,text/plain,application/json;q=0.9,*/*;q=0.8" },
|
|
40
|
+
});
|
|
41
|
+
if (!res.ok) {
|
|
42
|
+
notes.push(`Embedded captions fetch failed (${res.status})`);
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
const body = await res.text();
|
|
46
|
+
const contentType = res.headers.get("content-type")?.toLowerCase() ?? "";
|
|
47
|
+
const type = track.type?.toLowerCase() ?? "";
|
|
48
|
+
if (type.includes("application/json") || contentType.includes("application/json")) {
|
|
49
|
+
try {
|
|
50
|
+
const parsed = JSON.parse(body);
|
|
51
|
+
const text = jsonTranscriptToPlainText(parsed);
|
|
52
|
+
if (!text)
|
|
53
|
+
return null;
|
|
54
|
+
const segments = includeSegments ? jsonTranscriptToSegments(parsed) : null;
|
|
55
|
+
return { text, segments };
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
notes.push("Embedded captions JSON parse failed");
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
if (type.includes("text/vtt") ||
|
|
63
|
+
contentType.includes("text/vtt") ||
|
|
64
|
+
track.url.toLowerCase().endsWith(".vtt")) {
|
|
65
|
+
const plain = vttToPlainText(body);
|
|
66
|
+
if (plain.length === 0)
|
|
67
|
+
return null;
|
|
68
|
+
const segments = includeSegments ? vttToSegments(body) : null;
|
|
69
|
+
return { text: plain, segments };
|
|
70
|
+
}
|
|
71
|
+
const trimmed = body.trim();
|
|
72
|
+
return trimmed.length > 0 ? { text: trimmed, segments: null } : null;
|
|
73
|
+
}
|
|
74
|
+
catch (error) {
|
|
75
|
+
notes.push(`Embedded captions fetch failed: ${error instanceof Error ? error.message : error}`);
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
function selectPreferredTrack(tracks) {
|
|
80
|
+
if (tracks.length === 0)
|
|
81
|
+
return null;
|
|
82
|
+
const normalized = tracks.map((track) => ({
|
|
83
|
+
...track,
|
|
84
|
+
language: track.language?.toLowerCase() ?? null,
|
|
85
|
+
}));
|
|
86
|
+
const english = normalized.find((track) => track.language?.startsWith("en"));
|
|
87
|
+
return english ?? normalized[0] ?? null;
|
|
88
|
+
}
|
|
89
|
+
function resolveFirstMediaUrl($, baseUrl, tag) {
|
|
90
|
+
const direct = $(`${tag}[src]`).first().attr("src") ?? $(`${tag} source[src]`).first().attr("src") ?? null;
|
|
91
|
+
if (!direct)
|
|
92
|
+
return null;
|
|
93
|
+
return resolveAbsoluteUrl(direct, baseUrl);
|
|
94
|
+
}
|
|
95
|
+
function resolveOgMediaUrl($, baseUrl, kind) {
|
|
96
|
+
const meta = $(`meta[property="og:${kind}"], meta[property="og:${kind}:url"], meta[property="og:${kind}:secure_url"], meta[name="og:${kind}"], meta[name="og:${kind}:url"], meta[name="og:${kind}:secure_url"]`)
|
|
97
|
+
.first()
|
|
98
|
+
.attr("content");
|
|
99
|
+
if (!meta)
|
|
100
|
+
return null;
|
|
101
|
+
return resolveAbsoluteUrl(meta, baseUrl);
|
|
102
|
+
}
|
|
103
|
+
function resolveAbsoluteUrl(candidate, baseUrl) {
|
|
104
|
+
const trimmed = candidate.trim();
|
|
105
|
+
if (trimmed.length === 0)
|
|
106
|
+
return null;
|
|
107
|
+
try {
|
|
108
|
+
return new URL(trimmed, baseUrl).toString();
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
function pickMediaUrl(candidates) {
|
|
115
|
+
let fallback = null;
|
|
116
|
+
for (const candidate of candidates) {
|
|
117
|
+
if (!candidate)
|
|
118
|
+
continue;
|
|
119
|
+
if (isDirectMediaUrl(candidate))
|
|
120
|
+
return candidate;
|
|
121
|
+
if (!fallback)
|
|
122
|
+
fallback = candidate;
|
|
123
|
+
}
|
|
124
|
+
return fallback;
|
|
125
|
+
}
|
|
126
|
+
//# sourceMappingURL=generic-embedded.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generic-embedded.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/generic-embedded.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAE/B,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EACL,yBAAyB,EACzB,wBAAwB,EACxB,cAAc,EACd,aAAa,GACd,MAAM,aAAa,CAAC;AAcrB,MAAM,UAAU,mBAAmB,CAAC,IAAY,EAAE,OAAe;IAC/D,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,MAAM,eAAe,GAAoB,EAAE,CAAC;IAC5C,CAAC,CAAC,iDAAiD,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,EAAE,EAAE;QACrE,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC;QACtC,IAAI,CAAC,GAAG;YAAE,OAAO;QACjB,MAAM,GAAG,GAAG,kBAAkB,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QAC7C,IAAI,CAAC,GAAG;YAAE,OAAO;QACjB,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,IAAI,IAAI,CAAC;QAChD,MAAM,QAAQ,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,IAAI,IAAI,CAAC;QACrF,eAAe,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,oBAAoB,CAAC,eAAe,CAAC,CAAC;IACpD,MAAM,QAAQ,GAAG,oBAAoB,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC3D,MAAM,QAAQ,GAAG,oBAAoB,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC3D,MAAM,OAAO,GAAG,iBAAiB,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IACvD,MAAM,OAAO,GAAG,iBAAiB,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAEvD,IAAI,QAAQ,IAAI,OAAO,EAAE,CAAC;QACxB,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,YAAY,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC;IAC/E,CAAC;IACD,IAAI,QAAQ,IAAI,OAAO,EAAE,CAAC;QACxB,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,YAAY,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC;IAC/E,CAAC;IAED,MAAM,WAAW,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC1C,MAAM,WAAW,GAAG,CAAC,WAAW,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC1D,IAAI,KAAK,IAAI,WAAW,IAAI,WAAW,EAAE,CAAC;QACxC,OAAO,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IAC1E,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,SAAuB,EACvB,KAAoB,EACpB,KAAe,EACf,eAAwB;IAExB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,GAAG,EAAE;YACrC,OAAO,EAAE,EAAE,MAAM,EAAE,sDAAsD,EAAE;SAC5E,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,KAAK,CAAC,IAAI,CAAC,mCAAmC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;YAC7D,OAAO,IAAI,CAAC;QACd,CAAC;QACD,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;QACzE,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;QAE7C,IAAI,IAAI,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,WAAW,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;YAClF,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAChC,MAAM,IAAI,GAAG,yBAAyB,CAAC,MAAM,CAAC,CAAC;gBAC/C,IAAI,CAAC,IAAI;oBAAE,OAAO,IAAI,CAAC;gBACvB,MAAM,QAAQ,GAAG,eAAe,CAAC,CAAC,CAAC,wBAAwB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;gBAC3E,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;YAC5B,CAAC;YAAC,MAAM,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;gBAClD,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QAED,IACE,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;YACzB,WAAW,CAAC,QAAQ,CAAC,UAAU,CAAC;YAChC,KAAK,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,EACxC,CAAC;YACD,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;YACnC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,IAAI,CAAC;YACpC,MAAM,QAAQ,GAAG,eAAe,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC9D,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;QACnC,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IACvE,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,mCAAmC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QAChG,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,oBAAoB,CAAC,MAAuB;IACnD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACrC,MAAM,UAAU,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACxC,GAAG,KAAK;QACR,QAAQ,EAAE,KAAK,CAAC,QAAQ,EAAE,WAAW,EAAE,IAAI,IAAI;KAChD,CAAC,CAAC,CAAC;IACJ,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,EAAE,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7E,OAAO,OAAO,IAAI,UAAU,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;AAC1C,CAAC;AAED,SAAS,oBAAoB,CAC3B,CAA0B,EAC1B,OAAe,EACf,GAAsB;IAEtB,MAAM,MAAM,GACV,CAAC,CAAC,GAAG,GAAG,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,GAAG,cAAc,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC;IAC9F,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IACzB,OAAO,kBAAkB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAC7C,CAAC;AAED,SAAS,iBAAiB,CACxB,CAA0B,EAC1B,OAAe,EACf,IAAuB;IAEvB,MAAM,IAAI,GAAG,CAAC,CACZ,qBAAqB,IAAI,yBAAyB,IAAI,6BAA6B,IAAI,gCAAgC,IAAI,qBAAqB,IAAI,yBAAyB,IAAI,eAAe,CACjM;SACE,KAAK,EAAE;SACP,IAAI,CAAC,SAAS,CAAC,CAAC;IACnB,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,OAAO,kBAAkB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAC3C,CAAC;AAED,SAAS,kBAAkB,CAAC,SAAiB,EAAE,OAAe;IAC5D,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IACjC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,QAAQ,EAAE,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,YAAY,CAAC,UAAgC;IACpD,IAAI,QAAQ,GAAkB,IAAI,CAAC;IACnC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,IAAI,CAAC,SAAS;YAAE,SAAS;QACzB,IAAI,gBAAgB,CAAC,SAAS,CAAC;YAAE,OAAO,SAAS,CAAC;QAClD,IAAI,CAAC,QAAQ;YAAE,QAAQ,GAAG,SAAS,CAAC;IACtC,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { normalizeTranscriptText } from "../normalize.js";
|
|
2
|
+
import { buildMissingTranscriptionProviderResult, resolveTranscriptProviderCapabilities, } from "./transcription-capability.js";
|
|
3
|
+
export async function fetchTwitterMediaTranscript({ context, options, transcription, attemptedProviders, notes, mediaKindHint, }) {
|
|
4
|
+
if (!options.ytDlpPath) {
|
|
5
|
+
return {
|
|
6
|
+
text: null,
|
|
7
|
+
source: null,
|
|
8
|
+
attemptedProviders,
|
|
9
|
+
metadata: { provider: "generic", kind: "twitter", reason: "missing_yt_dlp" },
|
|
10
|
+
notes: "yt-dlp is not configured (set YT_DLP_PATH or ensure yt-dlp is on PATH)",
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
const transcriptionCapabilities = await resolveTranscriptProviderCapabilities({
|
|
14
|
+
transcription,
|
|
15
|
+
ytDlpPath: options.ytDlpPath,
|
|
16
|
+
});
|
|
17
|
+
if (!transcriptionCapabilities.canTranscribe) {
|
|
18
|
+
return buildMissingTranscriptionProviderResult({
|
|
19
|
+
attemptedProviders,
|
|
20
|
+
metadata: { provider: "generic", kind: "twitter", reason: "missing_transcription_keys" },
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
attemptedProviders.push("yt-dlp");
|
|
24
|
+
const resolved = options.resolveTwitterCookies
|
|
25
|
+
? await options.resolveTwitterCookies({ url: context.url })
|
|
26
|
+
: null;
|
|
27
|
+
if (resolved?.warnings?.length)
|
|
28
|
+
notes.push(...resolved.warnings);
|
|
29
|
+
const extraArgs = [];
|
|
30
|
+
if (resolved?.cookiesFromBrowser) {
|
|
31
|
+
extraArgs.push("--cookies-from-browser", resolved.cookiesFromBrowser);
|
|
32
|
+
if (resolved.source)
|
|
33
|
+
notes.push(`Using X cookies from ${resolved.source}`);
|
|
34
|
+
}
|
|
35
|
+
const mod = await import("./youtube/yt-dlp.js");
|
|
36
|
+
const ytdlpResult = await mod.fetchTranscriptWithYtDlp({
|
|
37
|
+
ytDlpPath: options.ytDlpPath,
|
|
38
|
+
transcription,
|
|
39
|
+
mediaCache: options.mediaCache ?? null,
|
|
40
|
+
url: context.url,
|
|
41
|
+
onProgress: options.onProgress ?? null,
|
|
42
|
+
service: "generic",
|
|
43
|
+
extraArgs: extraArgs.length > 0 ? extraArgs : undefined,
|
|
44
|
+
mediaKind: mediaKindHint,
|
|
45
|
+
});
|
|
46
|
+
if (ytdlpResult.notes.length > 0)
|
|
47
|
+
notes.push(...ytdlpResult.notes);
|
|
48
|
+
if (ytdlpResult.text) {
|
|
49
|
+
return {
|
|
50
|
+
text: normalizeTranscriptText(ytdlpResult.text),
|
|
51
|
+
source: "yt-dlp",
|
|
52
|
+
attemptedProviders,
|
|
53
|
+
metadata: {
|
|
54
|
+
provider: "generic",
|
|
55
|
+
kind: "twitter",
|
|
56
|
+
transcriptionProvider: ytdlpResult.provider,
|
|
57
|
+
cookieSource: resolved?.source ?? null,
|
|
58
|
+
},
|
|
59
|
+
notes: notes.length > 0 ? notes.join("; ") : null,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
if (ytdlpResult.error) {
|
|
63
|
+
notes.push(`yt-dlp transcription failed: ${ytdlpResult.error.message}`);
|
|
64
|
+
}
|
|
65
|
+
return {
|
|
66
|
+
text: null,
|
|
67
|
+
source: null,
|
|
68
|
+
attemptedProviders,
|
|
69
|
+
metadata: {
|
|
70
|
+
provider: "generic",
|
|
71
|
+
kind: "twitter",
|
|
72
|
+
reason: ytdlpResult.error ? "yt_dlp_failed" : "no_transcript",
|
|
73
|
+
transcriptionProvider: ytdlpResult.provider,
|
|
74
|
+
},
|
|
75
|
+
notes: notes.length > 0 ? notes.join("; ") : null,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
//# sourceMappingURL=generic-twitter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generic-twitter.js","sourceRoot":"","sources":["../../../../../src/content/transcript/providers/generic-twitter.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAG1D,OAAO,EACL,uCAAuC,EACvC,qCAAqC,GACtC,MAAM,+BAA+B,CAAC;AAEvC,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,EAChD,OAAO,EACP,OAAO,EACP,aAAa,EACb,kBAAkB,EAClB,KAAK,EACL,aAAa,GAQd;IACC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC;QACvB,OAAO;YACL,IAAI,EAAE,IAAI;YACV,MAAM,EAAE,IAAI;YACZ,kBAAkB;YAClB,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,gBAAgB,EAAE;YAC5E,KAAK,EAAE,wEAAwE;SAChF,CAAC;IACJ,CAAC;IAED,MAAM,yBAAyB,GAAG,MAAM,qCAAqC,CAAC;QAC5E,aAAa;QACb,SAAS,EAAE,OAAO,CAAC,SAAS;KAC7B,CAAC,CAAC;IACH,IAAI,CAAC,yBAAyB,CAAC,aAAa,EAAE,CAAC;QAC7C,OAAO,uCAAuC,CAAC;YAC7C,kBAAkB;YAClB,QAAQ,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,4BAA4B,EAAE;SACzF,CAAC,CAAC;IACL,CAAC;IAED,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAElC,MAAM,QAAQ,GAAG,OAAO,CAAC,qBAAqB;QAC5C,CAAC,CAAC,MAAM,OAAO,CAAC,qBAAqB,CAAC,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,CAAC;QAC3D,CAAC,CAAC,IAAI,CAAC;IACT,IAAI,QAAQ,EAAE,QAAQ,EAAE,MAAM;QAAE,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAEjE,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,IAAI,QAAQ,EAAE,kBAAkB,EAAE,CAAC;QACjC,SAAS,CAAC,IAAI,CAAC,wBAAwB,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAAC;QACtE,IAAI,QAAQ,CAAC,MAAM;YAAE,KAAK,CAAC,IAAI,CAAC,wBAAwB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;IAChD,MAAM,WAAW,GAAG,MAAM,GAAG,CAAC,wBAAwB,CAAC;QACrD,SAAS,EAAE,OAAO,CAAC,SAAS;QAC5B,aAAa;QACb,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,IAAI;QACtC,GAAG,EAAE,OAAO,CAAC,GAAG;QAChB,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,IAAI;QACtC,OAAO,EAAE,SAAS;QAClB,SAAS,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;QACvD,SAAS,EAAE,aAAa;KACzB,CAAC,CAAC;IACH,IAAI,WAAW,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,KAAK,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;IAEnE,IAAI,WAAW,CAAC,IAAI,EAAE,CAAC;QACrB,OAAO;YACL,IAAI,EAAE,uBAAuB,CAAC,WAAW,CAAC,IAAI,CAAC;YAC/C,MAAM,EAAE,QAAQ;YAChB,kBAAkB;YAClB,QAAQ,EAAE;gBACR,QAAQ,EAAE,SAAS;gBACnB,IAAI,EAAE,SAAS;gBACf,qBAAqB,EAAE,WAAW,CAAC,QAAQ;gBAC3C,YAAY,EAAE,QAAQ,EAAE,MAAM,IAAI,IAAI;aACvC;YACD,KAAK,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;SAClD,CAAC;IACJ,CAAC;IAED,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC,gCAAgC,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;IAC1E,CAAC;IAED,OAAO;QACL,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,IAAI;QACZ,kBAAkB;QAClB,QAAQ,EAAE;YACR,QAAQ,EAAE,SAAS;YACnB,IAAI,EAAE,SAAS;YACf,MAAM,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,eAAe;YAC7D,qBAAqB,EAAE,WAAW,CAAC,QAAQ;SAC5C;QACD,KAAK,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;KAClD,CAAC;AACJ,CAAC"}
|