@steipete/summarize-core 0.8.2 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/LICENSE +1 -1
  2. package/dist/esm/content/index.js.map +1 -1
  3. package/dist/esm/content/link-preview/client.js +4 -0
  4. package/dist/esm/content/link-preview/client.js.map +1 -1
  5. package/dist/esm/content/link-preview/content/fetcher.js +3 -2
  6. package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
  7. package/dist/esm/content/link-preview/content/firecrawl.js +3 -1
  8. package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -1
  9. package/dist/esm/content/link-preview/content/html.js +3 -1
  10. package/dist/esm/content/link-preview/content/html.js.map +1 -1
  11. package/dist/esm/content/link-preview/content/index.js +101 -19
  12. package/dist/esm/content/link-preview/content/index.js.map +1 -1
  13. package/dist/esm/content/link-preview/content/types.js.map +1 -1
  14. package/dist/esm/content/link-preview/content/utils.js +15 -7
  15. package/dist/esm/content/link-preview/content/utils.js.map +1 -1
  16. package/dist/esm/content/link-preview/types.js.map +1 -1
  17. package/dist/esm/content/transcript/cache.js +29 -3
  18. package/dist/esm/content/transcript/cache.js.map +1 -1
  19. package/dist/esm/content/transcript/index.js +38 -1
  20. package/dist/esm/content/transcript/index.js.map +1 -1
  21. package/dist/esm/content/transcript/parse.js +154 -0
  22. package/dist/esm/content/transcript/parse.js.map +1 -0
  23. package/dist/esm/content/transcript/providers/generic.js +235 -4
  24. package/dist/esm/content/transcript/providers/generic.js.map +1 -1
  25. package/dist/esm/content/transcript/providers/podcast/apple-flow.js +2 -0
  26. package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -1
  27. package/dist/esm/content/transcript/providers/podcast/media.js +21 -20
  28. package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -1
  29. package/dist/esm/content/transcript/providers/podcast/rss.js +16 -42
  30. package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -1
  31. package/dist/esm/content/transcript/providers/podcast/spotify-flow.js +1 -0
  32. package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -1
  33. package/dist/esm/content/transcript/providers/podcast.js +17 -4
  34. package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
  35. package/dist/esm/content/transcript/providers/transcription-start.js +50 -0
  36. package/dist/esm/content/transcript/providers/transcription-start.js.map +1 -0
  37. package/dist/esm/content/transcript/providers/youtube/api.js +24 -7
  38. package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -1
  39. package/dist/esm/content/transcript/providers/youtube/captions.js +132 -4
  40. package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
  41. package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +153 -50
  42. package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
  43. package/dist/esm/content/transcript/providers/youtube.js +60 -26
  44. package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
  45. package/dist/esm/content/transcript/timestamps.js +79 -0
  46. package/dist/esm/content/transcript/timestamps.js.map +1 -0
  47. package/dist/esm/language.js +1 -1
  48. package/dist/esm/language.js.map +1 -1
  49. package/dist/esm/processes.js +121 -0
  50. package/dist/esm/processes.js.map +1 -0
  51. package/dist/esm/prompts/cli.js +9 -1
  52. package/dist/esm/prompts/cli.js.map +1 -1
  53. package/dist/esm/prompts/file.js +34 -0
  54. package/dist/esm/prompts/file.js.map +1 -1
  55. package/dist/esm/prompts/index.js +2 -0
  56. package/dist/esm/prompts/index.js.map +1 -1
  57. package/dist/esm/prompts/link-summary.js +68 -47
  58. package/dist/esm/prompts/link-summary.js.map +1 -1
  59. package/dist/esm/prompts/summary-lengths.js +56 -0
  60. package/dist/esm/prompts/summary-lengths.js.map +1 -0
  61. package/dist/esm/prompts/summary-system.js +13 -0
  62. package/dist/esm/prompts/summary-system.js.map +1 -0
  63. package/dist/esm/transcription/onnx-cli.js +319 -0
  64. package/dist/esm/transcription/onnx-cli.js.map +1 -0
  65. package/dist/esm/transcription/whisper/core.js +72 -4
  66. package/dist/esm/transcription/whisper/core.js.map +1 -1
  67. package/dist/esm/transcription/whisper/ffmpeg.js +57 -5
  68. package/dist/esm/transcription/whisper/ffmpeg.js.map +1 -1
  69. package/dist/esm/transcription/whisper/whisper-cpp.js +13 -3
  70. package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -1
  71. package/dist/types/content/cache/types.d.ts +24 -0
  72. package/dist/types/content/index.d.ts +1 -1
  73. package/dist/types/content/link-preview/client.d.ts +3 -1
  74. package/dist/types/content/link-preview/content/fetcher.d.ts +5 -1
  75. package/dist/types/content/link-preview/content/firecrawl.d.ts +3 -1
  76. package/dist/types/content/link-preview/content/html.d.ts +3 -1
  77. package/dist/types/content/link-preview/content/types.d.ts +8 -1
  78. package/dist/types/content/link-preview/content/utils.d.ts +1 -1
  79. package/dist/types/content/link-preview/deps.d.ts +15 -2
  80. package/dist/types/content/link-preview/types.d.ts +7 -1
  81. package/dist/types/content/transcript/cache.d.ts +5 -2
  82. package/dist/types/content/transcript/index.d.ts +5 -1
  83. package/dist/types/content/transcript/parse.d.ts +9 -0
  84. package/dist/types/content/transcript/providers/podcast/media.d.ts +2 -1
  85. package/dist/types/content/transcript/providers/podcast/rss.d.ts +2 -0
  86. package/dist/types/content/transcript/providers/transcription-start.d.ts +26 -0
  87. package/dist/types/content/transcript/providers/youtube/api.d.ts +7 -2
  88. package/dist/types/content/transcript/providers/youtube/captions.d.ts +11 -1
  89. package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +10 -1
  90. package/dist/types/content/transcript/timestamps.d.ts +5 -0
  91. package/dist/types/content/transcript/types.d.ts +7 -1
  92. package/dist/types/processes.d.ts +50 -0
  93. package/dist/types/prompts/cli.d.ts +1 -1
  94. package/dist/types/prompts/file.d.ts +1 -1
  95. package/dist/types/prompts/index.d.ts +2 -0
  96. package/dist/types/prompts/link-summary.d.ts +8 -2
  97. package/dist/types/prompts/summary-lengths.d.ts +15 -0
  98. package/dist/types/prompts/summary-system.d.ts +1 -0
  99. package/dist/types/transcription/onnx-cli.d.ts +25 -0
  100. package/dist/types/transcription/whisper/core.d.ts +6 -2
  101. package/dist/types/transcription/whisper/ffmpeg.d.ts +5 -0
  102. package/dist/types/transcription/whisper/types.d.ts +1 -1
  103. package/package.json +11 -6
@@ -1 +1 @@
1
- {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/content/link-preview/types.ts"],"names":[],"mappings":"AAWA,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,SAAS,EAAE,QAAQ,CAAU,CAAA"}
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/content/link-preview/types.ts"],"names":[],"mappings":"AAkBA,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,SAAS,EAAE,QAAQ,CAAU,CAAA"}
@@ -1,7 +1,9 @@
1
1
  export const DEFAULT_TTL_MS = 1000 * 60 * 60 * 24 * 7;
2
2
  export const NEGATIVE_TTL_MS = 1000 * 60 * 60 * 6;
3
- export const readTranscriptCache = async ({ url, cacheMode, transcriptCache, }) => {
4
- const cached = transcriptCache ? await transcriptCache.get({ url }) : null;
3
+ export const readTranscriptCache = async ({ url, cacheMode, transcriptCache, transcriptTimestamps = false, fileMtime, }) => {
4
+ const cached = transcriptCache
5
+ ? await transcriptCache.get({ url, fileMtime: fileMtime ?? null })
6
+ : null;
5
7
  const diagnostics = buildBaseDiagnostics(cacheMode);
6
8
  if (!cached) {
7
9
  return { cached: null, resolution: null, diagnostics };
@@ -21,10 +23,23 @@ export const readTranscriptCache = async ({ url, cacheMode, transcriptCache, })
21
23
  }
22
24
  diagnostics.cacheStatus = 'hit';
23
25
  diagnostics.notes = appendNote(diagnostics.notes, 'Served transcript from cache');
26
+ const cachedSegments = extractSegments(cached.metadata);
27
+ const hasSegments = Boolean(cachedSegments && cachedSegments.length > 0);
28
+ const timestampsFlag = cached.metadata?.timestamps;
29
+ if (transcriptTimestamps &&
30
+ timestampsFlag !== false &&
31
+ (cachedSegments == null || cachedSegments.length === 0)) {
32
+ diagnostics.notes = appendNote(diagnostics.notes, 'Cached transcript missing timestamps; fetching fresh copy');
33
+ return { cached, resolution: null, diagnostics };
34
+ }
35
+ if (transcriptTimestamps && timestampsFlag === false) {
36
+ diagnostics.notes = appendNote(diagnostics.notes, 'Transcript timestamps unavailable');
37
+ }
24
38
  const resolution = {
25
39
  text: cached.content,
26
40
  source: provider,
27
41
  metadata: cached.metadata ?? null,
42
+ segments: transcriptTimestamps && hasSegments ? cachedSegments : null,
28
43
  };
29
44
  return { cached, resolution, diagnostics };
30
45
  };
@@ -47,6 +62,7 @@ export const mapCachedSource = (source) => {
47
62
  return null;
48
63
  if (source === 'youtubei' ||
49
64
  source === 'captionTracks' ||
65
+ source === 'embedded' ||
50
66
  source === 'yt-dlp' ||
51
67
  source === 'podcastTranscript' ||
52
68
  source === 'whisper' ||
@@ -57,7 +73,7 @@ export const mapCachedSource = (source) => {
57
73
  }
58
74
  return 'unknown';
59
75
  };
60
- export const writeTranscriptCache = async ({ url, service, resourceKey, result, transcriptCache, }) => {
76
+ export const writeTranscriptCache = async ({ url, service, resourceKey, result, transcriptCache, fileMtime, }) => {
61
77
  if (!transcriptCache) {
62
78
  return;
63
79
  }
@@ -74,6 +90,16 @@ export const writeTranscriptCache = async ({ url, service, resourceKey, result,
74
90
  content: result.text,
75
91
  source: resolvedSource,
76
92
  metadata: result.metadata ?? null,
93
+ fileMtime,
77
94
  });
78
95
  };
96
+ function extractSegments(metadata) {
97
+ if (!metadata)
98
+ return null;
99
+ const segments = metadata.segments;
100
+ if (!Array.isArray(segments))
101
+ return null;
102
+ const normalized = segments.filter((segment) => segment && typeof segment === 'object');
103
+ return normalized.length > 0 ? normalized : null;
104
+ }
79
105
  //# sourceMappingURL=cache.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"cache.js","sourceRoot":"","sources":["../../../../src/content/transcript/cache.ts"],"names":[],"mappings":"AAQA,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAA;AACrD,MAAM,CAAC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAA;AAmBjD,MAAM,CAAC,MAAM,mBAAmB,GAAG,KAAK,EAAE,EACxC,GAAG,EACH,SAAS,EACT,eAAe,GACI,EAAkC,EAAE;IACvD,MAAM,MAAM,GAAG,eAAe,CAAC,CAAC,CAAC,MAAM,eAAe,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;IAC1E,MAAM,WAAW,GAAG,oBAAoB,CAAC,SAAS,CAAC,CAAA;IAEnD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IACxD,CAAC;IAED,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;IAC/C,WAAW,CAAC,QAAQ,GAAG,QAAQ,CAAA;IAC/B,WAAW,CAAC,kBAAkB,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAC3D,WAAW,CAAC,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAE/E,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC3B,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,iDAAiD,CAClD,CAAA;QACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IAClD,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,WAAW,CAAC,WAAW,GAAG,SAAS,CAAA;QACnC,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,gDAAgD,CACjD,CAAA;QACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IAClD,CAAC;IAED,WAAW,CAAC,WAAW,GAAG,KAAK,CAAA;IAC/B,WAAW,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,8BAA8B,CAAC,CAAA;IAEjF,MAAM,UAAU,GAAyB;QACvC,IAAI,EAAE,MAAM,CAAC,OAAO;QACpB,MAAM,EAAE,QAAQ;QAChB,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,IAAI;KAClC,CAAA;IACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,CAAA;AAC5C,CAAC,CAAA;AAED,MAAM,oBAAoB,GAAG,CAAC,SAAoB,EAAoB,EAAE,CAAC,CAAC;IACxE,SAAS;IACT,WAAW,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,MAAM;IACzD,QAAQ,EAAE,IAAI;IACd,kBAAkB,EAAE,EAAE;IACtB,YAAY,EAAE,KAAK;IACnB,KAAK,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,IAAI;CAChE,CAAC,CAAA;AAEF,MAAM,UAAU,GAAG,CAAC,QAAmC,EAAE,IAAY,EAAU,EAAE;IAC/E,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IACD,OAAO,GAAG,QAAQ,KAAK,IAAI,EAAE,CAAA;AAC/B,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,MAAqB,EAA2B,EAAE;IAChF,IAAI,MAAM,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IAChC,IACE,MAAM,KAAK,UAAU;QACrB,MAAM,KAAK,eAAe;QAC1B,MAAM,KAAK,QAAQ;QACnB,MAAM,KAAK,mBAAmB;QAC9B,MAAM,KAAK,SAAS;QACpB,MAAM,KAAK,OAAO;QAClB,MAAM,KAAK,MAAM;QACjB,MAAM,KAAK,aAAa,EACxB,CAAC;QACD,OAAO,MAAM,CAAA;IACf,CAAC;IACD,OAAO,SAAS,CAAA;AAClB,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,oBAAoB,GAAG,KAAK,EAAE,EACzC,GAAG,EACH,OAAO,EACP,WAAW,EACX,MAAM,EACN,eAAe,GAWhB,EAAiB,EAAE;IAClB,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,OAAM;IACR,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,KAAK,IAAI,IAAI,MAAM,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;QACnD,OAAM;IACR,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,eAAe,CAAA;IAC5D,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,aAAa,CAAC,CAAA;IAEjF,MAAM,eAAe,CAAC,GAAG,CAAC;QACxB,GAAG;QACH,OAAO;QACP,WAAW;QACX,KAAK;QACL,OAAO,EAAE,MAAM,CAAC,IAAI;QACpB,MAAM,EAAE,cAAc;QACtB,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,IAAI;KAClC,CAAC,CAAA;AACJ,CAAC,CAAA"}
1
+ {"version":3,"file":"cache.js","sourceRoot":"","sources":["../../../../src/content/transcript/cache.ts"],"names":[],"mappings":"AAQA,MAAM,CAAC,MAAM,cAAc,GAAG,IAAI,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAA;AACrD,MAAM,CAAC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAA;AAqBjD,MAAM,CAAC,MAAM,mBAAmB,GAAG,KAAK,EAAE,EACxC,GAAG,EACH,SAAS,EACT,eAAe,EACf,oBAAoB,GAAG,KAAK,EAC5B,SAAS,GACU,EAAkC,EAAE;IACvD,MAAM,MAAM,GAAG,eAAe;QAC5B,CAAC,CAAC,MAAM,eAAe,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,SAAS,IAAI,IAAI,EAAE,CAAC;QAClE,CAAC,CAAC,IAAI,CAAA;IACR,MAAM,WAAW,GAAG,oBAAoB,CAAC,SAAS,CAAC,CAAA;IAEnD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IACxD,CAAC;IAED,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;IAC/C,WAAW,CAAC,QAAQ,GAAG,QAAQ,CAAA;IAC/B,WAAW,CAAC,kBAAkB,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAC3D,WAAW,CAAC,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAE/E,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC3B,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,iDAAiD,CAClD,CAAA;QACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IAClD,CAAC;IAED,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QACnB,WAAW,CAAC,WAAW,GAAG,SAAS,CAAA;QACnC,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,gDAAgD,CACjD,CAAA;QACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IAClD,CAAC;IAED,WAAW,CAAC,WAAW,GAAG,KAAK,CAAA;IAC/B,WAAW,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,8BAA8B,CAAC,CAAA;IAEjF,MAAM,cAAc,GAAG,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;IACvD,MAAM,WAAW,GAAG,OAAO,CAAC,cAAc,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IACxE,MAAM,cAAc,GAAG,MAAM,CAAC,QAAQ,EAAE,UAAU,CAAA;IAClD,IACE,oBAAoB;QACpB,cAAc,KAAK,KAAK;QACxB,CAAC,cAAc,IAAI,IAAI,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,CAAC,EACvD,CAAC;QACD,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,2DAA2D,CAC5D,CAAA;QACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IAClD,CAAC;IACD,IAAI,oBAAoB,IAAI,cAAc,KAAK,KAAK,EAAE,CAAC;QACrD,WAAW,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,mCAAmC,CAAC,CAAA;IACxF,CAAC;IAED,MAAM,UAAU,GAAyB;QACvC,IAAI,EAAE,MAAM,CAAC,OAAO;QACpB,MAAM,EAAE,QAAQ;QAChB,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,IAAI;QACjC,QAAQ,EAAE,oBAAoB,IAAI,WAAW,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI;KACtE,CAAA;IACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,CAAA;AAC5C,CAAC,CAAA;AAED,MAAM,oBAAoB,GAAG,CAAC,SAAoB,EAAoB,EAAE,CAAC,CAAC;IACxE,SAAS;IACT,WAAW,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,MAAM;IACzD,QAAQ,EAAE,IAAI;IACd,kBAAkB,EAAE,EAAE;IACtB,YAAY,EAAE,KAAK;IACnB,KAAK,EAAE,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,IAAI;CAChE,CAAC,CAAA;AAEF,MAAM,UAAU,GAAG,CAAC,QAAmC,EAAE,IAAY,EAAU,EAAE;IAC/E,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IACD,OAAO,GAAG,QAAQ,KAAK,IAAI,EAAE,CAAA;AAC/B,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,MAAqB,EAA2B,EAAE;IAChF,IAAI,MAAM,KAAK,IAAI;QAAE,OAAO,IAAI,CAAA;IAChC,IACE,MAAM,KAAK,UAAU;QACrB,MAAM,KAAK,eAAe;QAC1B,MAAM,KAAK,UAAU;QACrB,MAAM,KAAK,QAAQ;QACnB,MAAM,KAAK,mBAAmB;QAC9B,MAAM,KAAK,SAAS;QACpB,MAAM,KAAK,OAAO;QAClB,MAAM,KAAK,MAAM;QACjB,MAAM,KAAK,aAAa,EACxB,CAAC;QACD,OAAO,MAAM,CAAA;IACf,CAAC;IACD,OAAO,SAAS,CAAA;AAClB,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,oBAAoB,GAAG,KAAK,EAAE,EACzC,GAAG,EACH,OAAO,EACP,WAAW,EACX,MAAM,EACN,eAAe,EACf,SAAS,GAYV,EAAiB,EAAE;IAClB,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,OAAM;IACR,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,KAAK,IAAI,IAAI,MAAM,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;QACnD,OAAM;IACR,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,eAAe,CAAA;IAC5D,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,aAAa,CAAC,CAAA;IAEjF,MAAM,eAAe,CAAC,GAAG,CAAC;QACxB,GAAG;QACH,OAAO;QACP,WAAW;QACX,KAAK;QACL,OAAO,EAAE,MAAM,CAAC,IAAI;QACpB,MAAM,EAAE,cAAc;QACtB,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,IAAI;QACjC,SAAS;KACV,CAAC,CAAA;AACJ,CAAC,CAAA;AAED,SAAS,eAAe,CAAC,QAAoD;IAC3E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC1B,MAAM,QAAQ,GAAI,QAAmC,CAAC,QAAQ,CAAA;IAC9D,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAA;IACzC,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,IAAI,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAA;IACvF,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,UAA+C,CAAC,CAAC,CAAC,IAAI,CAAA;AACxF,CAAC"}
@@ -9,7 +9,7 @@ const PROVIDERS = [
9
9
  { id: 'generic', canHandle: canHandleGeneric, fetchTranscript: fetchGeneric },
10
10
  ];
11
11
  const GENERIC_PROVIDER_ID = 'generic';
12
- export const resolveTranscriptForLink = async (url, html, deps, { youtubeTranscriptMode, cacheMode: providedCacheMode } = {}) => {
12
+ export const resolveTranscriptForLink = async (url, html, deps, { youtubeTranscriptMode, mediaTranscriptMode, mediaKindHint, transcriptTimestamps, cacheMode: providedCacheMode, fileMtime, } = {}) => {
13
13
  const normalizedUrl = url.trim();
14
14
  const embeddedYoutubeUrl = !isYouTubeUrlInternal(normalizedUrl) && html
15
15
  ? await extractEmbeddedYouTubeUrlFromHtml(html)
@@ -23,6 +23,8 @@ export const resolveTranscriptForLink = async (url, html, deps, { youtubeTranscr
23
23
  url: normalizedUrl,
24
24
  cacheMode,
25
25
  transcriptCache: deps.transcriptCache,
26
+ transcriptTimestamps: Boolean(transcriptTimestamps),
27
+ fileMtime: fileMtime ?? null,
26
28
  });
27
29
  const diagnostics = {
28
30
  cacheMode,
@@ -51,14 +53,19 @@ export const resolveTranscriptForLink = async (url, html, deps, { youtubeTranscr
51
53
  }
52
54
  const providerResult = await executeProvider(provider, baseContext, {
53
55
  fetch: deps.fetch,
56
+ env: deps.env,
54
57
  scrapeWithFirecrawl: deps.scrapeWithFirecrawl,
55
58
  apifyApiToken: deps.apifyApiToken,
56
59
  ytDlpPath: deps.ytDlpPath,
57
60
  falApiKey: deps.falApiKey,
58
61
  openaiApiKey: deps.openaiApiKey,
62
+ mediaCache: deps.mediaCache ?? null,
59
63
  resolveTwitterCookies: deps.resolveTwitterCookies ?? null,
60
64
  onProgress: deps.onProgress ?? null,
61
65
  youtubeTranscriptMode: youtubeTranscriptMode ?? 'auto',
66
+ mediaTranscriptMode: mediaTranscriptMode ?? 'auto',
67
+ mediaKindHint: mediaKindHint ?? null,
68
+ transcriptTimestamps: transcriptTimestamps ?? false,
62
69
  });
63
70
  if (shouldReportProgress) {
64
71
  deps.onProgress?.({
@@ -77,12 +84,30 @@ export const resolveTranscriptForLink = async (url, html, deps, { youtubeTranscr
77
84
  diagnostics.notes = appendNote(diagnostics.notes, providerResult.notes);
78
85
  }
79
86
  if (providerResult.source !== null || providerResult.text !== null) {
87
+ if (transcriptTimestamps) {
88
+ const nextMeta = { ...(providerResult.metadata ?? {}) };
89
+ if (providerResult.segments && providerResult.segments.length > 0) {
90
+ nextMeta.timestamps = true;
91
+ nextMeta.segments = providerResult.segments;
92
+ }
93
+ else if (nextMeta.timestamps == null) {
94
+ nextMeta.timestamps = false;
95
+ }
96
+ providerResult.metadata = nextMeta;
97
+ }
98
+ else if (providerResult.segments && providerResult.segments.length > 0) {
99
+ providerResult.metadata = {
100
+ ...(providerResult.metadata ?? {}),
101
+ segments: providerResult.segments,
102
+ };
103
+ }
80
104
  await writeTranscriptCache({
81
105
  url: normalizedUrl,
82
106
  service: provider.id,
83
107
  resourceKey,
84
108
  result: providerResult,
85
109
  transcriptCache: deps.transcriptCache,
110
+ fileMtime,
86
111
  });
87
112
  }
88
113
  if (!providerResult.text && cacheOutcome.cached?.content && cacheMode !== 'bypass') {
@@ -95,6 +120,9 @@ export const resolveTranscriptForLink = async (url, html, deps, { youtubeTranscr
95
120
  source: diagnostics.provider,
96
121
  metadata: cacheOutcome.cached.metadata ?? null,
97
122
  diagnostics,
123
+ segments: transcriptTimestamps
124
+ ? resolveSegmentsFromMetadata(cacheOutcome.cached.metadata)
125
+ : null,
98
126
  };
99
127
  }
100
128
  return {
@@ -102,6 +130,7 @@ export const resolveTranscriptForLink = async (url, html, deps, { youtubeTranscr
102
130
  source: providerResult.source,
103
131
  metadata: providerResult.metadata ?? null,
104
132
  diagnostics,
133
+ segments: transcriptTimestamps ? (providerResult.segments ?? null) : null,
105
134
  };
106
135
  };
107
136
  const extractResourceKey = (url) => {
@@ -128,4 +157,12 @@ const appendNote = (existing, next) => {
128
157
  }
129
158
  return `${existing}; ${next}`;
130
159
  };
160
+ const resolveSegmentsFromMetadata = (metadata) => {
161
+ if (!metadata)
162
+ return null;
163
+ const segments = metadata.segments;
164
+ return Array.isArray(segments) && segments.length > 0
165
+ ? segments
166
+ : null;
167
+ };
131
168
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/content/transcript/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,eAAe,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAA;AACvF,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAA;AAC/B,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAA;AAC/B,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAA;AAO/B,OAAO,EACL,iCAAiC,EACjC,qBAAqB,IAAI,6BAA6B,EACtD,YAAY,IAAI,oBAAoB,GACrC,MAAM,YAAY,CAAA;AAOnB,MAAM,SAAS,GAAqB;IAClC,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;IAC7E,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;IAC7E,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;CAC9E,CAAA;AACD,MAAM,mBAAmB,GAAG,SAAS,CAAA;AAErC,MAAM,CAAC,MAAM,wBAAwB,GAAG,KAAK,EAC3C,GAAW,EACX,IAAmB,EACnB,IAAqB,EACrB,EAAE,qBAAqB,EAAE,SAAS,EAAE,iBAAiB,KAA+B,EAAE,EACvD,EAAE;IACjC,MAAM,aAAa,GAAG,GAAG,CAAC,IAAI,EAAE,CAAA;IAChC,MAAM,kBAAkB,GACtB,CAAC,oBAAoB,CAAC,aAAa,CAAC,IAAI,IAAI;QAC1C,CAAC,CAAC,MAAM,iCAAiC,CAAC,IAAI,CAAC;QAC/C,CAAC,CAAC,IAAI,CAAA;IACV,MAAM,YAAY,GAAG,kBAAkB,IAAI,aAAa,CAAA;IACxD,MAAM,WAAW,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAAA;IACpD,MAAM,WAAW,GAAoB,EAAE,GAAG,EAAE,YAAY,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IAC7E,MAAM,QAAQ,GAAmB,cAAc,CAAC,WAAW,CAAC,CAAA;IAC5D,MAAM,SAAS,GAAc,iBAAiB,IAAI,SAAS,CAAA;IAE3D,MAAM,YAAY,GAAG,MAAM,mBAAmB,CAAC;QAC7C,GAAG,EAAE,aAAa;QAClB,SAAS;QACT,eAAe,EAAE,IAAI,CAAC,eAAe;KACtC,CAAC,CAAA;IAEF,MAAM,WAAW,GAA0B;QACzC,SAAS;QACT,WAAW,EAAE,YAAY,CAAC,WAAW,CAAC,WAAW;QACjD,YAAY,EAAE,YAAY,CAAC,WAAW,CAAC,YAAY;QACnD,QAAQ,EAAE,YAAY,CAAC,WAAW,CAAC,QAAQ;QAC3C,kBAAkB,EAAE,EAAE;QACtB,KAAK,EAAE,YAAY,CAAC,WAAW,CAAC,KAAK,IAAI,IAAI;KAC9C,CAAA;IAED,IAAI,YAAY,CAAC,UAAU,EAAE,CAAC;QAC5B,OAAO;YACL,GAAG,YAAY,CAAC,UAAU;YAC1B,WAAW;SACZ,CAAA;IACH,CAAC;IAED,MAAM,oBAAoB,GAAG,QAAQ,CAAC,EAAE,KAAK,SAAS,IAAI,QAAQ,CAAC,EAAE,KAAK,SAAS,CAAA;IACnF,IAAI,oBAAoB,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,IAAI,EAAE,kBAAkB;YACxB,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,IAAI,EACF,QAAQ,CAAC,EAAE,KAAK,SAAS;gBACvB,CAAC,CAAC,+BAA+B;gBACjC,CAAC,CAAC,+BAA+B;SACtC,CAAC,CAAA;IACJ,CAAC;IAED,MAAM,cAAc,GAAG,MAAM,eAAe,CAAC,QAAQ,EAAE,WAAW,EAAE;QAClE,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;QAC7C,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,qBAAqB,EAAE,IAAI,CAAC,qBAAqB,IAAI,IAAI;QACzD,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,qBAAqB,EAAE,qBAAqB,IAAI,MAAM;KACvD,CAAC,CAAA;IAEF,IAAI,oBAAoB,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,IAAI,EAAE,iBAAiB;YACvB,GAAG,EAAE,aAAa;YAClB,EAAE,EAAE,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAClE,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,cAAc,CAAC,MAAM;YAC7B,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,EAAE,IAAI,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE;SACtF,CAAC,CAAA;IACJ,CAAC;IAED,WAAW,CAAC,QAAQ,GAAG,cAAc,CAAC,MAAM,CAAA;IAC5C,WAAW,CAAC,kBAAkB,GAAG,cAAc,CAAC,kBAAkB,CAAA;IAClE,WAAW,CAAC,YAAY,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IACzF,IAAI,cAAc,CAAC,KAAK,EAAE,CAAC;QACzB,WAAW,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,cAAc,CAAC,KAAK,CAAC,CAAA;IACzE,CAAC;IAED,IAAI,cAAc,CAAC,MAAM,KAAK,IAAI,IAAI,cAAc,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;QACnE,MAAM,oBAAoB,CAAC;YACzB,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,WAAW;YACX,MAAM,EAAE,cAAc;YACtB,eAAe,EAAE,IAAI,CAAC,eAAe;SACtC,CAAC,CAAA;IACJ,CAAC;IAED,IAAI,CAAC,cAAc,CAAC,IAAI,IAAI,YAAY,CAAC,MAAM,EAAE,OAAO,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QACnF,WAAW,CAAC,WAAW,GAAG,UAAU,CAAA;QACpC,WAAW,CAAC,QAAQ,GAAG,eAAe,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;QAClE,WAAW,CAAC,YAAY,GAAG,OAAO,CAChC,YAAY,CAAC,MAAM,CAAC,OAAO,IAAI,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CACtE,CAAA;QACD,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,+DAA+D,CAChE,CAAA;QAED,OAAO;YACL,IAAI,EAAE,YAAY,CAAC,MAAM,CAAC,OAAO;YACjC,MAAM,EAAE,WAAW,CAAC,QAAQ;YAC5B,QAAQ,EAAE,YAAY,CAAC,MAAM,CAAC,QAAQ,IAAI,IAAI;YAC9C,WAAW;SACZ,CAAA;IACH,CAAC;IAED,OAAO;QACL,IAAI,EAAE,cAAc,CAAC,IAAI;QACzB,MAAM,EAAE,cAAc,CAAC,MAAM;QAC7B,QAAQ,EAAE,cAAc,CAAC,QAAQ,IAAI,IAAI;QACzC,WAAW;KACZ,CAAA;AACH,CAAC,CAAA;AAED,MAAM,kBAAkB,GAAG,CAAC,GAAW,EAAiB,EAAE;IACxD,IAAI,oBAAoB,CAAC,GAAG,CAAC,EAAE,CAAC;QAC9B,OAAO,6BAA6B,CAAC,GAAG,CAAC,CAAA;IAC3C,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC,CAAA;AAED,MAAM,cAAc,GAAG,CAAC,OAAwB,EAAkB,EAAE;IAClE,MAAM,qBAAqB,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,KAAK,mBAAmB,CAAC,CAAA;IAE/F,MAAM,mBAAmB,GAAG,SAAS,CAAC,IAAI,CACxC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,KAAK,mBAAmB,IAAI,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC,CACjF,CAAA;IACD,IAAI,mBAAmB,EAAE,CAAC;QACxB,OAAO,mBAAmB,CAAA;IAC5B,CAAC;IAED,IAAI,qBAAqB,EAAE,CAAC;QAC1B,OAAO,qBAAqB,CAAA;IAC9B,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAA;AAClE,CAAC,CAAA;AAED,MAAM,eAAe,GAAG,KAAK,EAC3B,QAAwB,EACxB,OAAwB,EACxB,OAA6B,EACJ,EAAE,CAAC,QAAQ,CAAC,eAAe,CAAC,OAAO,EAAE,OAAO,CAAC,CAAA;AAExE,MAAM,UAAU,GAAG,CAAC,QAAmC,EAAE,IAAY,EAAU,EAAE;IAC/E,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IACD,OAAO,GAAG,QAAQ,KAAK,IAAI,EAAE,CAAA;AAC/B,CAAC,CAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/content/transcript/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,eAAe,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAA;AACvF,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAA;AAC/B,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAA;AAC/B,OAAO,EACL,SAAS,IAAI,gBAAgB,EAC7B,eAAe,IAAI,YAAY,GAChC,MAAM,wBAAwB,CAAA;AAO/B,OAAO,EACL,iCAAiC,EACjC,qBAAqB,IAAI,6BAA6B,EACtD,YAAY,IAAI,oBAAoB,GACrC,MAAM,YAAY,CAAA;AAWnB,MAAM,SAAS,GAAqB;IAClC,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;IAC7E,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;IAC7E,EAAE,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,gBAAgB,EAAE,eAAe,EAAE,YAAY,EAAE;CAC9E,CAAA;AACD,MAAM,mBAAmB,GAAG,SAAS,CAAA;AAErC,MAAM,CAAC,MAAM,wBAAwB,GAAG,KAAK,EAC3C,GAAW,EACX,IAAmB,EACnB,IAAqB,EACrB,EACE,qBAAqB,EACrB,mBAAmB,EACnB,aAAa,EACb,oBAAoB,EACpB,SAAS,EAAE,iBAAiB,EAC5B,SAAS,MACmB,EAAE,EACD,EAAE;IACjC,MAAM,aAAa,GAAG,GAAG,CAAC,IAAI,EAAE,CAAA;IAChC,MAAM,kBAAkB,GACtB,CAAC,oBAAoB,CAAC,aAAa,CAAC,IAAI,IAAI;QAC1C,CAAC,CAAC,MAAM,iCAAiC,CAAC,IAAI,CAAC;QAC/C,CAAC,CAAC,IAAI,CAAA;IACV,MAAM,YAAY,GAAG,kBAAkB,IAAI,aAAa,CAAA;IACxD,MAAM,WAAW,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAAA;IACpD,MAAM,WAAW,GAAoB,EAAE,GAAG,EAAE,YAAY,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;IAC7E,MAAM,QAAQ,GAAmB,cAAc,CAAC,WAAW,CAAC,CAAA;IAC5D,MAAM,SAAS,GAAc,iBAAiB,IAAI,SAAS,CAAA;IAE3D,MAAM,YAAY,GAAG,MAAM,mBAAmB,CAAC;QAC7C,GAAG,EAAE,aAAa;QAClB,SAAS;QACT,eAAe,EAAE,IAAI,CAAC,eAAe;QACrC,oBAAoB,EAAE,OAAO,CAAC,oBAAoB,CAAC;QACnD,SAAS,EAAE,SAAS,IAAI,IAAI;KAC7B,CAAC,CAAA;IAEF,MAAM,WAAW,GAA0B;QACzC,SAAS;QACT,WAAW,EAAE,YAAY,CAAC,WAAW,CAAC,WAAW;QACjD,YAAY,EAAE,YAAY,CAAC,WAAW,CAAC,YAAY;QACnD,QAAQ,EAAE,YAAY,CAAC,WAAW,CAAC,QAAQ;QAC3C,kBAAkB,EAAE,EAAE;QACtB,KAAK,EAAE,YAAY,CAAC,WAAW,CAAC,KAAK,IAAI,IAAI;KAC9C,CAAA;IAED,IAAI,YAAY,CAAC,UAAU,EAAE,CAAC;QAC5B,OAAO;YACL,GAAG,YAAY,CAAC,UAAU;YAC1B,WAAW;SACZ,CAAA;IACH,CAAC;IAED,MAAM,oBAAoB,GAAG,QAAQ,CAAC,EAAE,KAAK,SAAS,IAAI,QAAQ,CAAC,EAAE,KAAK,SAAS,CAAA;IACnF,IAAI,oBAAoB,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,IAAI,EAAE,kBAAkB;YACxB,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,IAAI,EACF,QAAQ,CAAC,EAAE,KAAK,SAAS;gBACvB,CAAC,CAAC,+BAA+B;gBACjC,CAAC,CAAC,+BAA+B;SACtC,CAAC,CAAA;IACJ,CAAC;IAED,MAAM,cAAc,GAAG,MAAM,eAAe,CAAC,QAAQ,EAAE,WAAW,EAAE;QAClE,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,GAAG,EAAE,IAAI,CAAC,GAAG;QACb,mBAAmB,EAAE,IAAI,CAAC,mBAAmB;QAC7C,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,YAAY,EAAE,IAAI,CAAC,YAAY;QAC/B,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,qBAAqB,EAAE,IAAI,CAAC,qBAAqB,IAAI,IAAI;QACzD,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,qBAAqB,EAAE,qBAAqB,IAAI,MAAM;QACtD,mBAAmB,EAAE,mBAAmB,IAAI,MAAM;QAClD,aAAa,EAAE,aAAa,IAAI,IAAI;QACpC,oBAAoB,EAAE,oBAAoB,IAAI,KAAK;KACpD,CAAC,CAAA;IAEF,IAAI,oBAAoB,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,IAAI,EAAE,iBAAiB;YACvB,GAAG,EAAE,aAAa;YAClB,EAAE,EAAE,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAClE,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,cAAc,CAAC,MAAM;YAC7B,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,EAAE,IAAI,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE;SACtF,CAAC,CAAA;IACJ,CAAC;IAED,WAAW,CAAC,QAAQ,GAAG,cAAc,CAAC,MAAM,CAAA;IAC5C,WAAW,CAAC,kBAAkB,GAAG,cAAc,CAAC,kBAAkB,CAAA;IAClE,WAAW,CAAC,YAAY,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IACzF,IAAI,cAAc,CAAC,KAAK,EAAE,CAAC;QACzB,WAAW,CAAC,KAAK,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,EAAE,cAAc,CAAC,KAAK,CAAC,CAAA;IACzE,CAAC;IAED,IAAI,cAAc,CAAC,MAAM,KAAK,IAAI,IAAI,cAAc,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;QACnE,IAAI,oBAAoB,EAAE,CAAC;YACzB,MAAM,QAAQ,GAAG,EAAE,GAAG,CAAC,cAAc,CAAC,QAAQ,IAAI,EAAE,CAAC,EAAE,CAAA;YACvD,IAAI,cAAc,CAAC,QAAQ,IAAI,cAAc,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClE,QAAQ,CAAC,UAAU,GAAG,IAAI,CAAA;gBAC1B,QAAQ,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAA;YAC7C,CAAC;iBAAM,IAAI,QAAQ,CAAC,UAAU,IAAI,IAAI,EAAE,CAAC;gBACvC,QAAQ,CAAC,UAAU,GAAG,KAAK,CAAA;YAC7B,CAAC;YACD,cAAc,CAAC,QAAQ,GAAG,QAAQ,CAAA;QACpC,CAAC;aAAM,IAAI,cAAc,CAAC,QAAQ,IAAI,cAAc,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzE,cAAc,CAAC,QAAQ,GAAG;gBACxB,GAAG,CAAC,cAAc,CAAC,QAAQ,IAAI,EAAE,CAAC;gBAClC,QAAQ,EAAE,cAAc,CAAC,QAAQ;aAClC,CAAA;QACH,CAAC;QACD,MAAM,oBAAoB,CAAC;YACzB,GAAG,EAAE,aAAa;YAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,WAAW;YACX,MAAM,EAAE,cAAc;YACtB,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,SAAS;SACV,CAAC,CAAA;IACJ,CAAC;IAED,IAAI,CAAC,cAAc,CAAC,IAAI,IAAI,YAAY,CAAC,MAAM,EAAE,OAAO,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;QACnF,WAAW,CAAC,WAAW,GAAG,UAAU,CAAA;QACpC,WAAW,CAAC,QAAQ,GAAG,eAAe,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;QAClE,WAAW,CAAC,YAAY,GAAG,OAAO,CAChC,YAAY,CAAC,MAAM,CAAC,OAAO,IAAI,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CACtE,CAAA;QACD,WAAW,CAAC,KAAK,GAAG,UAAU,CAC5B,WAAW,CAAC,KAAK,EACjB,+DAA+D,CAChE,CAAA;QAED,OAAO;YACL,IAAI,EAAE,YAAY,CAAC,MAAM,CAAC,OAAO;YACjC,MAAM,EAAE,WAAW,CAAC,QAAQ;YAC5B,QAAQ,EAAE,YAAY,CAAC,MAAM,CAAC,QAAQ,IAAI,IAAI;YAC9C,WAAW;YACX,QAAQ,EAAE,oBAAoB;gBAC5B,CAAC,CAAC,2BAA2B,CAAC,YAAY,CAAC,MAAM,CAAC,QAAQ,CAAC;gBAC3D,CAAC,CAAC,IAAI;SACT,CAAA;IACH,CAAC;IAED,OAAO;QACL,IAAI,EAAE,cAAc,CAAC,IAAI;QACzB,MAAM,EAAE,cAAc,CAAC,MAAM;QAC7B,QAAQ,EAAE,cAAc,CAAC,QAAQ,IAAI,IAAI;QACzC,WAAW;QACX,QAAQ,EAAE,oBAAoB,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;KAC1E,CAAA;AACH,CAAC,CAAA;AAED,MAAM,kBAAkB,GAAG,CAAC,GAAW,EAAiB,EAAE;IACxD,IAAI,oBAAoB,CAAC,GAAG,CAAC,EAAE,CAAC;QAC9B,OAAO,6BAA6B,CAAC,GAAG,CAAC,CAAA;IAC3C,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC,CAAA;AAED,MAAM,cAAc,GAAG,CAAC,OAAwB,EAAkB,EAAE;IAClE,MAAM,qBAAqB,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,KAAK,mBAAmB,CAAC,CAAA;IAE/F,MAAM,mBAAmB,GAAG,SAAS,CAAC,IAAI,CACxC,CAAC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,KAAK,mBAAmB,IAAI,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC,CACjF,CAAA;IACD,IAAI,mBAAmB,EAAE,CAAC;QACxB,OAAO,mBAAmB,CAAA;IAC5B,CAAC;IAED,IAAI,qBAAqB,EAAE,CAAC;QAC1B,OAAO,qBAAqB,CAAA;IAC9B,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAA;AAClE,CAAC,CAAA;AAED,MAAM,eAAe,GAAG,KAAK,EAC3B,QAAwB,EACxB,OAAwB,EACxB,OAA6B,EACJ,EAAE,CAAC,QAAQ,CAAC,eAAe,CAAC,OAAO,EAAE,OAAO,CAAC,CAAA;AAExE,MAAM,UAAU,GAAG,CAAC,QAAmC,EAAE,IAAY,EAAU,EAAE;IAC/E,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACb,CAAC;IACD,OAAO,GAAG,QAAQ,KAAK,IAAI,EAAE,CAAA;AAC/B,CAAC,CAAA;AAED,MAAM,2BAA2B,GAAG,CAAC,QAAyC,EAAE,EAAE;IAChF,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC1B,MAAM,QAAQ,GAAI,QAAmC,CAAC,QAAQ,CAAA;IAC9D,OAAO,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;QACnD,CAAC,CAAE,QAA6C;QAChD,CAAC,CAAC,IAAI,CAAA;AACV,CAAC,CAAA"}
@@ -0,0 +1,154 @@
1
+ import { parseTimestampStringToMs, parseTimestampToMs } from './timestamps.js';
2
+ export function vttToSegments(raw) {
3
+ const normalized = raw.replace(/\r\n/g, '\n');
4
+ const lines = normalized.split('\n');
5
+ const segments = [];
6
+ let idx = 0;
7
+ while (idx < lines.length) {
8
+ const line = lines[idx]?.trim() ?? '';
9
+ if (!line || line.toUpperCase() === 'WEBVTT' || /^(NOTE|STYLE|REGION)\b/i.test(line)) {
10
+ idx += 1;
11
+ continue;
12
+ }
13
+ if (!line.includes('-->')) {
14
+ idx += 1;
15
+ continue;
16
+ }
17
+ const [startRaw, rest] = line.split('-->');
18
+ const endRaw = rest?.trim().split(/\s+/)[0] ?? '';
19
+ const startMs = parseTimestampStringToMs(startRaw?.trim() ?? '');
20
+ const endMs = parseTimestampStringToMs(endRaw);
21
+ idx += 1;
22
+ const textLines = [];
23
+ while (idx < lines.length) {
24
+ const cueLine = lines[idx];
25
+ if (!cueLine || cueLine.trim().length === 0)
26
+ break;
27
+ if (!/^(NOTE|STYLE|REGION)\b/i.test(cueLine.trim())) {
28
+ textLines.push(cueLine.trim());
29
+ }
30
+ idx += 1;
31
+ }
32
+ idx += 1;
33
+ if (startMs == null)
34
+ continue;
35
+ const text = textLines.join(' ').replace(/\s+/g, ' ').trim();
36
+ if (!text)
37
+ continue;
38
+ segments.push({
39
+ startMs,
40
+ endMs: endMs ?? null,
41
+ text,
42
+ });
43
+ }
44
+ return segments.length > 0 ? segments : null;
45
+ }
46
+ export function vttToPlainText(raw) {
47
+ const segments = vttToSegments(raw);
48
+ if (segments) {
49
+ return segments
50
+ .map((segment) => segment.text)
51
+ .join('\n')
52
+ .trim();
53
+ }
54
+ const lines = raw
55
+ .replace(/\r\n/g, '\n')
56
+ .split('\n')
57
+ .map((line) => line.trim())
58
+ .filter((line) => line.length > 0)
59
+ .filter((line) => line.toUpperCase() !== 'WEBVTT')
60
+ .filter((line) => !/^\d{2}:\d{2}:\d{2}\.\d{3}\s+-->\s+\d{2}:\d{2}:\d{2}\.\d{3}/.test(line))
61
+ .filter((line) => !/^\d+$/.test(line))
62
+ .filter((line) => !/^(NOTE|STYLE|REGION)\b/i.test(line));
63
+ return lines.join('\n').trim();
64
+ }
65
+ function parseSegmentsFromJsonArray(items) {
66
+ const segments = [];
67
+ for (const item of items) {
68
+ if (!item || typeof item !== 'object')
69
+ continue;
70
+ const record = item;
71
+ const text = typeof record.text === 'string'
72
+ ? record.text
73
+ : typeof record.utf8 === 'string'
74
+ ? record.utf8
75
+ : null;
76
+ if (!text)
77
+ continue;
78
+ const startMs = parseTimestampToMs(record.startMs, false);
79
+ const endMs = parseTimestampToMs(record.endMs, false);
80
+ const startSeconds = parseTimestampToMs(record.start, true);
81
+ const endSeconds = parseTimestampToMs(record.end, true);
82
+ const start = startMs ?? startSeconds;
83
+ const end = endMs ?? endSeconds;
84
+ if (start == null)
85
+ continue;
86
+ segments.push({
87
+ startMs: start,
88
+ endMs: end ?? null,
89
+ text: text.replace(/\s+/g, ' ').trim(),
90
+ });
91
+ }
92
+ return segments;
93
+ }
94
+ export function jsonTranscriptToSegments(payload) {
95
+ if (Array.isArray(payload)) {
96
+ const segments = parseSegmentsFromJsonArray(payload);
97
+ return segments.length > 0 ? segments : null;
98
+ }
99
+ if (payload && typeof payload === 'object') {
100
+ const record = payload;
101
+ const segmentsPayload = record.segments;
102
+ if (Array.isArray(segmentsPayload)) {
103
+ const segments = parseSegmentsFromJsonArray(segmentsPayload);
104
+ return segments.length > 0 ? segments : null;
105
+ }
106
+ }
107
+ return null;
108
+ }
109
+ export function jsonTranscriptToPlainText(payload) {
110
+ if (Array.isArray(payload)) {
111
+ const segments = jsonTranscriptToSegments(payload);
112
+ if (segments) {
113
+ const text = segments
114
+ .map((segment) => segment.text)
115
+ .join('\n')
116
+ .trim();
117
+ return text.length > 0 ? text : null;
118
+ }
119
+ const parts = payload
120
+ .map((row) => (row && typeof row === 'object' ? row.text : null))
121
+ .filter((t) => typeof t === 'string')
122
+ .map((t) => t.trim())
123
+ .filter(Boolean);
124
+ const text = parts.join('\n').trim();
125
+ return text.length > 0 ? text : null;
126
+ }
127
+ if (payload && typeof payload === 'object') {
128
+ const record = payload;
129
+ if (typeof record.transcript === 'string' && record.transcript.trim())
130
+ return record.transcript.trim();
131
+ if (typeof record.text === 'string' && record.text.trim())
132
+ return record.text.trim();
133
+ const segments = record.segments;
134
+ if (Array.isArray(segments)) {
135
+ const segmentText = jsonTranscriptToSegments(record);
136
+ if (segmentText) {
137
+ const text = segmentText
138
+ .map((segment) => segment.text)
139
+ .join('\n')
140
+ .trim();
141
+ return text.length > 0 ? text : null;
142
+ }
143
+ const parts = segments
144
+ .map((row) => row && typeof row === 'object' ? row.text : null)
145
+ .filter((t) => typeof t === 'string')
146
+ .map((t) => t.trim())
147
+ .filter(Boolean);
148
+ const text = parts.join('\n').trim();
149
+ return text.length > 0 ? text : null;
150
+ }
151
+ }
152
+ return null;
153
+ }
154
+ //# sourceMappingURL=parse.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parse.js","sourceRoot":"","sources":["../../../../src/content/transcript/parse.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAA;AAO9E,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,MAAM,UAAU,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;IAC7C,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;IACpC,MAAM,QAAQ,GAAwB,EAAE,CAAA;IAExC,IAAI,GAAG,GAAG,CAAC,CAAA;IACX,OAAO,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;QACrC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,WAAW,EAAE,KAAK,QAAQ,IAAI,yBAAyB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACrF,GAAG,IAAI,CAAC,CAAA;YACR,SAAQ;QACV,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC1B,GAAG,IAAI,CAAC,CAAA;YACR,SAAQ;QACV,CAAC;QAED,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;QAC1C,MAAM,MAAM,GAAG,IAAI,EAAE,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QACjD,MAAM,OAAO,GAAG,wBAAwB,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAA;QAChE,MAAM,KAAK,GAAG,wBAAwB,CAAC,MAAM,CAAC,CAAA;QAC9C,GAAG,IAAI,CAAC,CAAA;QAER,MAAM,SAAS,GAAa,EAAE,CAAA;QAC9B,OAAO,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC1B,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,CAAA;YAC1B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;gBAAE,MAAK;YAClD,IAAI,CAAC,yBAAyB,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;gBACpD,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAA;YAChC,CAAC;YACD,GAAG,IAAI,CAAC,CAAA;QACV,CAAC;QACD,GAAG,IAAI,CAAC,CAAA;QAER,IAAI,OAAO,IAAI,IAAI;YAAE,SAAQ;QAC7B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;QAC5D,IAAI,CAAC,IAAI;YAAE,SAAQ;QACnB,QAAQ,CAAC,IAAI,CAAC;YACZ,OAAO;YACP,KAAK,EAAE,KAAK,IAAI,IAAI;YACpB,IAAI;SACL,CAAC,CAAA;IACJ,CAAC;IAED,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAA;AAC9C,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,GAAW;IACxC,MAAM,QAAQ,GAAG,aAAa,CAAC,GAAG,CAAC,CAAA;IACnC,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,QAAQ;aACZ,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;aAC9B,IAAI,CAAC,IAAI,CAAC;aACV,IAAI,EAAE,CAAA;IACX,CAAC;IAED,MAAM,KAAK,GAAG,GAAG;SACd,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;SACtB,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;SACjC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,QAAQ,CAAC;SACjD,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,4DAA4D,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SAC1F,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SACrC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,yBAAyB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;IAC1D,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAA;AAChC,CAAC;AAED,SAAS,0BAA0B,CAAC,KAAgB;IAClD,MAAM,QAAQ,GAAwB,EAAE,CAAA;IACxC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;YAAE,SAAQ;QAC/C,MAAM,MAAM,GAAG,IAA+B,CAAA;QAC9C,MAAM,IAAI,GACR,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ;YAC7B,CAAC,CAAC,MAAM,CAAC,IAAI;YACb,CAAC,CAAC,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ;gBAC/B,CAAC,CAAC,MAAM,CAAC,IAAI;gBACb,CAAC,CAAC,IAAI,CAAA;QACZ,IAAI,CAAC,IAAI;YAAE,SAAQ;QACnB,MAAM,OAAO,GAAG,kBAAkB,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAA;QACzD,MAAM,KAAK,GAAG,kBAAkB,CAAC,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;QACrD,MAAM,YAAY,GAAG,kBAAkB,CAAC,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;QAC3D,MAAM,UAAU,GAAG,kBAAkB,CAAC,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,CAAA;QACvD,MAAM,KAAK,GAAG,OAAO,IAAI,YAAY,CAAA;QACrC,MAAM,GAAG,GAAG,KAAK,IAAI,UAAU,CAAA;QAC/B,IAAI,KAAK,IAAI,IAAI;YAAE,SAAQ;QAC3B,QAAQ,CAAC,IAAI,CAAC;YACZ,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,GAAG,IAAI,IAAI;YAClB,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE;SACvC,CAAC,CAAA;IACJ,CAAC;IACD,OAAO,QAAQ,CAAA;AACjB,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,OAAgB;IACvD,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,0BAA0B,CAAC,OAAO,CAAC,CAAA;QACpD,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAA;IAC9C,CAAC;IAED,IAAI,OAAO,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;QAC3C,MAAM,MAAM,GAAG,OAAkC,CAAA;QACjD,MAAM,eAAe,GAAG,MAAM,CAAC,QAAQ,CAAA;QACvC,IAAI,KAAK,CAAC,OAAO,CAAC,eAAe,CAAC,EAAE,CAAC;YACnC,MAAM,QAAQ,GAAG,0BAA0B,CAAC,eAAe,CAAC,CAAA;YAC5D,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAA;QAC9C,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAA;AACb,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,OAAgB;IACxD,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,wBAAwB,CAAC,OAAO,CAAC,CAAA;QAClD,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,IAAI,GAAG,QAAQ;iBAClB,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;iBAC9B,IAAI,CAAC,IAAI,CAAC;iBACV,IAAI,EAAE,CAAA;YACT,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAA;QACtC,CAAC;QACD,MAAM,KAAK,GAAG,OAAO;aAClB,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAE,GAA+B,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;aAC7F,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC;aACjD,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,OAAO,CAAC,CAAA;QAClB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAA;QACpC,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAA;IACtC,CAAC;IAED,IAAI,OAAO,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;QAC3C,MAAM,MAAM,GAAG,OAAkC,CAAA;QACjD,IAAI,OAAO,MAAM,CAAC,UAAU,KAAK,QAAQ,IAAI,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE;YACnE,OAAO,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE,CAAA;QACjC,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE;YAAE,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;QACpF,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAA;QAChC,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC5B,MAAM,WAAW,GAAG,wBAAwB,CAAC,MAAM,CAAC,CAAA;YACpD,IAAI,WAAW,EAAE,CAAC;gBAChB,MAAM,IAAI,GAAG,WAAW;qBACrB,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;qBAC9B,IAAI,CAAC,IAAI,CAAC;qBACV,IAAI,EAAE,CAAA;gBACT,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAA;YACtC,CAAC;YACD,MAAM,KAAK,GAAG,QAAQ;iBACnB,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CACX,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAE,GAA+B,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAC9E;iBACA,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC;iBACjD,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;iBACpB,MAAM,CAAC,OAAO,CAAC,CAAA;YAClB,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAA;YACpC,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAA;QACtC,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAA;AACb,CAAC"}
@@ -1,16 +1,69 @@
1
- import { isWhisperCppReady } from '../../../transcription/whisper.js';
1
+ import { load } from 'cheerio';
2
2
  import { isTwitterStatusUrl } from '../../link-preview/content/twitter-utils.js';
3
+ import { isDirectMediaUrl } from '../../url.js';
3
4
  import { normalizeTranscriptText } from '../normalize.js';
5
+ import { jsonTranscriptToPlainText, jsonTranscriptToSegments, vttToPlainText, vttToSegments, } from '../parse.js';
6
+ import { resolveTranscriptionAvailability } from './transcription-start.js';
4
7
  export const canHandle = () => true;
5
8
  export const fetchTranscript = async (context, options) => {
6
9
  const attemptedProviders = [];
7
10
  const notes = [];
11
+ const embedded = context.html ? detectEmbeddedMedia(context.html, context.url) : null;
12
+ const twitterStatus = isTwitterStatusUrl(context.url);
13
+ const hasEmbeddedMedia = Boolean(embedded?.mediaUrl || embedded?.kind);
14
+ const mediaKindHint = options.mediaKindHint ?? embedded?.kind ?? null;
15
+ if (embedded?.track) {
16
+ attemptedProviders.push('embedded');
17
+ const caption = await fetchCaptionTrack(options.fetch, embedded.track, notes, Boolean(options.transcriptTimestamps));
18
+ if (caption?.text) {
19
+ return {
20
+ text: normalizeTranscriptText(caption.text),
21
+ source: 'embedded',
22
+ segments: options.transcriptTimestamps ? (caption.segments ?? null) : null,
23
+ attemptedProviders,
24
+ metadata: {
25
+ provider: 'embedded',
26
+ kind: embedded.kind,
27
+ trackUrl: embedded.track.url,
28
+ trackType: embedded.track.type,
29
+ trackLanguage: embedded.track.language,
30
+ },
31
+ notes: notes.length > 0 ? notes.join('; ') : null,
32
+ };
33
+ }
34
+ }
35
+ const shouldAttemptMediaTranscript = options.mediaTranscriptMode === 'prefer' || (twitterStatus && hasEmbeddedMedia);
36
+ const mediaUrl = shouldAttemptMediaTranscript
37
+ ? (embedded?.mediaUrl ?? (isDirectMediaUrl(context.url) ? context.url : null))
38
+ : null;
39
+ if (shouldAttemptMediaTranscript &&
40
+ (mediaUrl || embedded?.kind || isDirectMediaUrl(context.url))) {
41
+ const result = await fetchDirectMediaTranscript({
42
+ url: mediaUrl ?? context.url,
43
+ options,
44
+ notes,
45
+ attemptedProviders,
46
+ kind: embedded?.kind ?? null,
47
+ });
48
+ if (result)
49
+ return result;
50
+ }
51
+ if (twitterStatus && options.mediaTranscriptMode !== 'prefer' && !hasEmbeddedMedia) {
52
+ return {
53
+ text: null,
54
+ source: null,
55
+ attemptedProviders,
56
+ metadata: { provider: 'generic', kind: 'twitter', reason: 'media_mode_auto' },
57
+ notes: 'Twitter transcript skipped (media transcript mode is auto; enable --video-mode transcript to force audio).',
58
+ };
59
+ }
8
60
  if (!isTwitterStatusUrl(context.url)) {
9
61
  return {
10
62
  text: null,
11
63
  source: null,
12
64
  attemptedProviders,
13
65
  metadata: { provider: 'generic', reason: 'not_implemented' },
66
+ notes: notes.length > 0 ? notes.join('; ') : null,
14
67
  };
15
68
  }
16
69
  if (!options.ytDlpPath) {
@@ -22,9 +75,12 @@ export const fetchTranscript = async (context, options) => {
22
75
  notes: 'yt-dlp is not configured (set YT_DLP_PATH or ensure yt-dlp is on PATH)',
23
76
  };
24
77
  }
25
- const hasTranscriptionKeys = Boolean(options.openaiApiKey || options.falApiKey);
26
- const hasLocalWhisper = await isWhisperCppReady();
27
- if (!hasTranscriptionKeys && !hasLocalWhisper) {
78
+ const transcriptionAvailability = await resolveTranscriptionAvailability({
79
+ env: options.env,
80
+ openaiApiKey: options.openaiApiKey,
81
+ falApiKey: options.falApiKey,
82
+ });
83
+ if (!transcriptionAvailability.hasAnyProvider) {
28
84
  return {
29
85
  text: null,
30
86
  source: null,
@@ -48,12 +104,15 @@ export const fetchTranscript = async (context, options) => {
48
104
  const mod = await import('./youtube/yt-dlp.js');
49
105
  const ytdlpResult = await mod.fetchTranscriptWithYtDlp({
50
106
  ytDlpPath: options.ytDlpPath,
107
+ env: options.env,
51
108
  openaiApiKey: options.openaiApiKey,
52
109
  falApiKey: options.falApiKey,
110
+ mediaCache: options.mediaCache ?? null,
53
111
  url: context.url,
54
112
  onProgress: options.onProgress ?? null,
55
113
  service: 'generic',
56
114
  extraArgs: extraArgs.length > 0 ? extraArgs : undefined,
115
+ mediaKind: mediaKindHint,
57
116
  });
58
117
  if (ytdlpResult.notes.length > 0) {
59
118
  notes.push(...ytdlpResult.notes);
@@ -88,4 +147,176 @@ export const fetchTranscript = async (context, options) => {
88
147
  notes: notes.length > 0 ? notes.join('; ') : null,
89
148
  };
90
149
  };
150
+ function detectEmbeddedMedia(html, baseUrl) {
151
+ const $ = load(html);
152
+ const trackCandidates = [];
153
+ $('track[kind="captions"], track[kind="subtitles"]').each((_idx, el) => {
154
+ const src = $(el).attr('src')?.trim();
155
+ if (!src)
156
+ return;
157
+ const url = resolveAbsoluteUrl(src, baseUrl);
158
+ if (!url)
159
+ return;
160
+ const type = $(el).attr('type')?.trim() ?? null;
161
+ const language = $(el).attr('srclang')?.trim() ?? $(el).attr('lang')?.trim() ?? null;
162
+ trackCandidates.push({ url, type, language });
163
+ });
164
+ const track = selectPreferredTrack(trackCandidates);
165
+ const videoUrl = resolveFirstMediaUrl($, baseUrl, 'video');
166
+ const audioUrl = resolveFirstMediaUrl($, baseUrl, 'audio');
167
+ const ogVideo = resolveOgMediaUrl($, baseUrl, 'video');
168
+ const ogAudio = resolveOgMediaUrl($, baseUrl, 'audio');
169
+ if (videoUrl || ogVideo) {
170
+ const mediaUrl = pickMediaUrl([videoUrl, ogVideo]);
171
+ return { kind: 'video', mediaUrl, track };
172
+ }
173
+ if (audioUrl || ogAudio) {
174
+ const mediaUrl = pickMediaUrl([audioUrl, ogAudio]);
175
+ return { kind: 'audio', mediaUrl, track };
176
+ }
177
+ const hasVideoTag = $('video').length > 0;
178
+ const hasAudioTag = !hasVideoTag && $('audio').length > 0;
179
+ if (track || hasVideoTag || hasAudioTag) {
180
+ return { kind: hasAudioTag ? 'audio' : 'video', mediaUrl: null, track };
181
+ }
182
+ return null;
183
+ }
184
+ function selectPreferredTrack(tracks) {
185
+ if (tracks.length === 0)
186
+ return null;
187
+ const normalized = tracks.map((track) => ({
188
+ ...track,
189
+ language: track.language?.toLowerCase() ?? null,
190
+ }));
191
+ const english = normalized.find((track) => track.language?.startsWith('en'));
192
+ return english ?? normalized[0] ?? null;
193
+ }
194
+ function resolveFirstMediaUrl($, baseUrl, tag) {
195
+ const direct = $(`${tag}[src]`).first().attr('src') ?? $(`${tag} source[src]`).first().attr('src') ?? null;
196
+ if (!direct)
197
+ return null;
198
+ return resolveAbsoluteUrl(direct, baseUrl);
199
+ }
200
+ function resolveOgMediaUrl($, baseUrl, kind) {
201
+ const meta = $(`meta[property="og:${kind}"], meta[property="og:${kind}:url"], meta[property="og:${kind}:secure_url"], meta[name="og:${kind}"], meta[name="og:${kind}:url"], meta[name="og:${kind}:secure_url"]`)
202
+ .first()
203
+ .attr('content');
204
+ if (!meta)
205
+ return null;
206
+ return resolveAbsoluteUrl(meta, baseUrl);
207
+ }
208
+ function resolveAbsoluteUrl(candidate, baseUrl) {
209
+ const trimmed = candidate.trim();
210
+ if (trimmed.length === 0)
211
+ return null;
212
+ try {
213
+ return new URL(trimmed, baseUrl).toString();
214
+ }
215
+ catch {
216
+ return null;
217
+ }
218
+ }
219
+ function pickMediaUrl(candidates) {
220
+ let fallback = null;
221
+ for (const candidate of candidates) {
222
+ if (!candidate)
223
+ continue;
224
+ if (isDirectMediaUrl(candidate))
225
+ return candidate;
226
+ if (!fallback)
227
+ fallback = candidate;
228
+ }
229
+ return fallback;
230
+ }
231
+ async function fetchCaptionTrack(fetchImpl, track, notes, includeSegments) {
232
+ try {
233
+ const res = await fetchImpl(track.url, {
234
+ headers: { accept: 'text/vtt,text/plain,application/json;q=0.9,*/*;q=0.8' },
235
+ });
236
+ if (!res.ok) {
237
+ notes.push(`Embedded captions fetch failed (${res.status})`);
238
+ return null;
239
+ }
240
+ const body = await res.text();
241
+ const contentType = res.headers.get('content-type')?.toLowerCase() ?? '';
242
+ const type = track.type?.toLowerCase() ?? '';
243
+ if (type.includes('application/json') || contentType.includes('application/json')) {
244
+ try {
245
+ const parsed = JSON.parse(body);
246
+ const text = jsonTranscriptToPlainText(parsed);
247
+ if (!text)
248
+ return null;
249
+ const segments = includeSegments ? jsonTranscriptToSegments(parsed) : null;
250
+ return { text, segments };
251
+ }
252
+ catch {
253
+ notes.push('Embedded captions JSON parse failed');
254
+ return null;
255
+ }
256
+ }
257
+ if (type.includes('text/vtt') ||
258
+ contentType.includes('text/vtt') ||
259
+ track.url.toLowerCase().endsWith('.vtt')) {
260
+ const plain = vttToPlainText(body);
261
+ if (plain.length === 0)
262
+ return null;
263
+ const segments = includeSegments ? vttToSegments(body) : null;
264
+ return { text: plain, segments };
265
+ }
266
+ const trimmed = body.trim();
267
+ return trimmed.length > 0 ? { text: trimmed, segments: null } : null;
268
+ }
269
+ catch (error) {
270
+ notes.push(`Embedded captions fetch failed: ${error instanceof Error ? error.message : error}`);
271
+ return null;
272
+ }
273
+ }
274
+ async function fetchDirectMediaTranscript({ url, options, notes, attemptedProviders, kind, }) {
275
+ if (!options.ytDlpPath) {
276
+ notes.push('yt-dlp is not configured (set YT_DLP_PATH or ensure yt-dlp is on PATH)');
277
+ return null;
278
+ }
279
+ const transcriptionAvailability = await resolveTranscriptionAvailability({
280
+ env: options.env,
281
+ openaiApiKey: options.openaiApiKey,
282
+ falApiKey: options.falApiKey,
283
+ });
284
+ if (!transcriptionAvailability.hasAnyProvider) {
285
+ notes.push('Missing transcription provider (install whisper-cpp or set OPENAI_API_KEY/FAL_KEY)');
286
+ return null;
287
+ }
288
+ attemptedProviders.push('yt-dlp');
289
+ const mod = await import('./youtube/yt-dlp.js');
290
+ const ytdlpResult = await mod.fetchTranscriptWithYtDlp({
291
+ ytDlpPath: options.ytDlpPath,
292
+ env: options.env,
293
+ openaiApiKey: options.openaiApiKey,
294
+ falApiKey: options.falApiKey,
295
+ mediaCache: options.mediaCache ?? null,
296
+ url,
297
+ onProgress: options.onProgress ?? null,
298
+ service: 'generic',
299
+ mediaKind: kind ?? options.mediaKindHint ?? null,
300
+ });
301
+ if (ytdlpResult.notes.length > 0) {
302
+ notes.push(...ytdlpResult.notes);
303
+ }
304
+ if (ytdlpResult.text) {
305
+ return {
306
+ text: normalizeTranscriptText(ytdlpResult.text),
307
+ source: 'yt-dlp',
308
+ attemptedProviders,
309
+ metadata: {
310
+ provider: 'generic',
311
+ kind: kind ?? 'media',
312
+ transcriptionProvider: ytdlpResult.provider,
313
+ },
314
+ notes: notes.length > 0 ? notes.join('; ') : null,
315
+ };
316
+ }
317
+ if (ytdlpResult.error) {
318
+ notes.push(`yt-dlp transcription failed: ${ytdlpResult.error.message}`);
319
+ }
320
+ return null;
321
+ }
91
322
  //# sourceMappingURL=generic.js.map