@steipete/summarize-core 0.10.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/README.md +0 -1
  2. package/dist/esm/content/index.js +5 -5
  3. package/dist/esm/content/index.js.map +1 -1
  4. package/dist/esm/content/link-preview/client.js +20 -9
  5. package/dist/esm/content/link-preview/client.js.map +1 -1
  6. package/dist/esm/content/link-preview/content/article.js +84 -83
  7. package/dist/esm/content/link-preview/content/article.js.map +1 -1
  8. package/dist/esm/content/link-preview/content/cleaner.js +23 -20
  9. package/dist/esm/content/link-preview/content/cleaner.js.map +1 -1
  10. package/dist/esm/content/link-preview/content/constants.js.map +1 -1
  11. package/dist/esm/content/link-preview/content/fetcher.js +46 -40
  12. package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
  13. package/dist/esm/content/link-preview/content/firecrawl.js +16 -16
  14. package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -1
  15. package/dist/esm/content/link-preview/content/html.js +29 -27
  16. package/dist/esm/content/link-preview/content/html.js.map +1 -1
  17. package/dist/esm/content/link-preview/content/index.js +141 -88
  18. package/dist/esm/content/link-preview/content/index.js.map +1 -1
  19. package/dist/esm/content/link-preview/content/jsonld.js +12 -12
  20. package/dist/esm/content/link-preview/content/jsonld.js.map +1 -1
  21. package/dist/esm/content/link-preview/content/parsers.js +20 -20
  22. package/dist/esm/content/link-preview/content/parsers.js.map +1 -1
  23. package/dist/esm/content/link-preview/content/podcast-utils.js +34 -34
  24. package/dist/esm/content/link-preview/content/podcast-utils.js.map +1 -1
  25. package/dist/esm/content/link-preview/content/readability.js +16 -15
  26. package/dist/esm/content/link-preview/content/readability.js.map +1 -1
  27. package/dist/esm/content/link-preview/content/twitter-utils.js +24 -11
  28. package/dist/esm/content/link-preview/content/twitter-utils.js.map +1 -1
  29. package/dist/esm/content/link-preview/content/types.js +1 -1
  30. package/dist/esm/content/link-preview/content/types.js.map +1 -1
  31. package/dist/esm/content/link-preview/content/utils.js +17 -17
  32. package/dist/esm/content/link-preview/content/utils.js.map +1 -1
  33. package/dist/esm/content/link-preview/content/video.js +19 -19
  34. package/dist/esm/content/link-preview/content/video.js.map +1 -1
  35. package/dist/esm/content/link-preview/content/visibility.js +121 -0
  36. package/dist/esm/content/link-preview/content/visibility.js.map +1 -0
  37. package/dist/esm/content/link-preview/content/youtube.js +10 -10
  38. package/dist/esm/content/link-preview/content/youtube.js.map +1 -1
  39. package/dist/esm/content/link-preview/deps.js +16 -16
  40. package/dist/esm/content/link-preview/deps.js.map +1 -1
  41. package/dist/esm/content/link-preview/fetch-with-timeout.js +4 -4
  42. package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -1
  43. package/dist/esm/content/link-preview/types.js +1 -1
  44. package/dist/esm/content/link-preview/types.js.map +1 -1
  45. package/dist/esm/content/transcript/cache.js +22 -22
  46. package/dist/esm/content/transcript/cache.js.map +1 -1
  47. package/dist/esm/content/transcript/index.js +34 -24
  48. package/dist/esm/content/transcript/index.js.map +1 -1
  49. package/dist/esm/content/transcript/normalize.js +10 -10
  50. package/dist/esm/content/transcript/normalize.js.map +1 -1
  51. package/dist/esm/content/transcript/parse.js +31 -31
  52. package/dist/esm/content/transcript/parse.js.map +1 -1
  53. package/dist/esm/content/transcript/providers/generic.js +74 -78
  54. package/dist/esm/content/transcript/providers/generic.js.map +1 -1
  55. package/dist/esm/content/transcript/providers/podcast/apple-flow.js +36 -36
  56. package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -1
  57. package/dist/esm/content/transcript/providers/podcast/apple.js +5 -5
  58. package/dist/esm/content/transcript/providers/podcast/apple.js.map +1 -1
  59. package/dist/esm/content/transcript/providers/podcast/constants.js +2 -2
  60. package/dist/esm/content/transcript/providers/podcast/constants.js.map +1 -1
  61. package/dist/esm/content/transcript/providers/podcast/itunes.js +44 -42
  62. package/dist/esm/content/transcript/providers/podcast/itunes.js.map +1 -1
  63. package/dist/esm/content/transcript/providers/podcast/json.js +4 -4
  64. package/dist/esm/content/transcript/providers/podcast/json.js.map +1 -1
  65. package/dist/esm/content/transcript/providers/podcast/media.js +58 -49
  66. package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -1
  67. package/dist/esm/content/transcript/providers/podcast/results.js +2 -2
  68. package/dist/esm/content/transcript/providers/podcast/results.js.map +1 -1
  69. package/dist/esm/content/transcript/providers/podcast/rss.js +29 -29
  70. package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -1
  71. package/dist/esm/content/transcript/providers/podcast/spotify-flow.js +38 -38
  72. package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -1
  73. package/dist/esm/content/transcript/providers/podcast/spotify.js +32 -32
  74. package/dist/esm/content/transcript/providers/podcast/spotify.js.map +1 -1
  75. package/dist/esm/content/transcript/providers/podcast.js +43 -47
  76. package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
  77. package/dist/esm/content/transcript/providers/transcription-start.js +59 -31
  78. package/dist/esm/content/transcript/providers/transcription-start.js.map +1 -1
  79. package/dist/esm/content/transcript/providers/youtube/api.js +56 -56
  80. package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -1
  81. package/dist/esm/content/transcript/providers/youtube/apify.js +7 -7
  82. package/dist/esm/content/transcript/providers/youtube/apify.js.map +1 -1
  83. package/dist/esm/content/transcript/providers/youtube/captions.js +76 -76
  84. package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
  85. package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +82 -75
  86. package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
  87. package/dist/esm/content/transcript/providers/youtube.js +84 -77
  88. package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
  89. package/dist/esm/content/transcript/timestamps.js +8 -8
  90. package/dist/esm/content/transcript/timestamps.js.map +1 -1
  91. package/dist/esm/content/transcript/transcription-config.js +14 -0
  92. package/dist/esm/content/transcript/transcription-config.js.map +1 -0
  93. package/dist/esm/content/transcript/utils.js +35 -35
  94. package/dist/esm/content/transcript/utils.js.map +1 -1
  95. package/dist/esm/content/url.js +59 -28
  96. package/dist/esm/content/url.js.map +1 -1
  97. package/dist/esm/index.js +4 -3
  98. package/dist/esm/index.js.map +1 -1
  99. package/dist/esm/language.js +77 -77
  100. package/dist/esm/language.js.map +1 -1
  101. package/dist/esm/openai/base-url.js +35 -0
  102. package/dist/esm/openai/base-url.js.map +1 -0
  103. package/dist/esm/processes.js +16 -16
  104. package/dist/esm/processes.js.map +1 -1
  105. package/dist/esm/prompts/cli.js +17 -17
  106. package/dist/esm/prompts/cli.js.map +1 -1
  107. package/dist/esm/prompts/file.js +54 -54
  108. package/dist/esm/prompts/file.js.map +1 -1
  109. package/dist/esm/prompts/format.js +2 -2
  110. package/dist/esm/prompts/format.js.map +1 -1
  111. package/dist/esm/prompts/index.js +5 -5
  112. package/dist/esm/prompts/index.js.map +1 -1
  113. package/dist/esm/prompts/link-summary.js +65 -65
  114. package/dist/esm/prompts/link-summary.js.map +1 -1
  115. package/dist/esm/prompts/summary-lengths.js +10 -10
  116. package/dist/esm/prompts/summary-lengths.js.map +1 -1
  117. package/dist/esm/prompts/summary-system.js +9 -9
  118. package/dist/esm/prompts/summary-system.js.map +1 -1
  119. package/dist/esm/shared/contracts.js +1 -1
  120. package/dist/esm/shared/contracts.js.map +1 -1
  121. package/dist/esm/transcription/onnx-cli.js +69 -69
  122. package/dist/esm/transcription/onnx-cli.js.map +1 -1
  123. package/dist/esm/transcription/whisper/constants.js +3 -3
  124. package/dist/esm/transcription/whisper/constants.js.map +1 -1
  125. package/dist/esm/transcription/whisper/core.js +148 -59
  126. package/dist/esm/transcription/whisper/core.js.map +1 -1
  127. package/dist/esm/transcription/whisper/fal.js +14 -14
  128. package/dist/esm/transcription/whisper/fal.js.map +1 -1
  129. package/dist/esm/transcription/whisper/ffmpeg.js +106 -106
  130. package/dist/esm/transcription/whisper/ffmpeg.js.map +1 -1
  131. package/dist/esm/transcription/whisper/groq.js +46 -0
  132. package/dist/esm/transcription/whisper/groq.js.map +1 -0
  133. package/dist/esm/transcription/whisper/openai.js +19 -13
  134. package/dist/esm/transcription/whisper/openai.js.map +1 -1
  135. package/dist/esm/transcription/whisper/utils.js +19 -19
  136. package/dist/esm/transcription/whisper/utils.js.map +1 -1
  137. package/dist/esm/transcription/whisper/whisper-cpp.js +64 -64
  138. package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -1
  139. package/dist/esm/transcription/whisper.js +4 -4
  140. package/dist/esm/transcription/whisper.js.map +1 -1
  141. package/dist/types/content/cache/types.d.ts +1 -1
  142. package/dist/types/content/index.d.ts +7 -7
  143. package/dist/types/content/link-preview/client.d.ts +7 -4
  144. package/dist/types/content/link-preview/content/cleaner.d.ts +1 -0
  145. package/dist/types/content/link-preview/content/fetcher.d.ts +2 -2
  146. package/dist/types/content/link-preview/content/firecrawl.d.ts +7 -7
  147. package/dist/types/content/link-preview/content/html.d.ts +8 -8
  148. package/dist/types/content/link-preview/content/index.d.ts +3 -3
  149. package/dist/types/content/link-preview/content/twitter-utils.d.ts +1 -0
  150. package/dist/types/content/link-preview/content/types.d.ts +8 -8
  151. package/dist/types/content/link-preview/content/utils.d.ts +3 -3
  152. package/dist/types/content/link-preview/content/video.d.ts +1 -1
  153. package/dist/types/content/link-preview/content/visibility.d.ts +1 -0
  154. package/dist/types/content/link-preview/deps.d.ts +36 -33
  155. package/dist/types/content/link-preview/types.d.ts +4 -4
  156. package/dist/types/content/transcript/cache.d.ts +4 -4
  157. package/dist/types/content/transcript/index.d.ts +7 -7
  158. package/dist/types/content/transcript/parse.d.ts +1 -1
  159. package/dist/types/content/transcript/providers/generic.d.ts +1 -1
  160. package/dist/types/content/transcript/providers/podcast/apple-flow.d.ts +2 -2
  161. package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +4 -4
  162. package/dist/types/content/transcript/providers/podcast/media.d.ts +9 -6
  163. package/dist/types/content/transcript/providers/podcast/results.d.ts +3 -3
  164. package/dist/types/content/transcript/providers/podcast/rss.d.ts +1 -1
  165. package/dist/types/content/transcript/providers/podcast/spotify-flow.d.ts +2 -2
  166. package/dist/types/content/transcript/providers/podcast/spotify.d.ts +2 -2
  167. package/dist/types/content/transcript/providers/podcast.d.ts +5 -5
  168. package/dist/types/content/transcript/providers/transcription-start.d.ts +14 -8
  169. package/dist/types/content/transcript/providers/youtube/api.d.ts +1 -1
  170. package/dist/types/content/transcript/providers/youtube/captions.d.ts +1 -1
  171. package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +11 -8
  172. package/dist/types/content/transcript/providers/youtube.d.ts +1 -1
  173. package/dist/types/content/transcript/timestamps.d.ts +1 -1
  174. package/dist/types/content/transcript/transcription-config.d.ts +15 -0
  175. package/dist/types/content/transcript/types.d.ts +12 -9
  176. package/dist/types/content/transcript/utils.d.ts +1 -1
  177. package/dist/types/content/url.d.ts +5 -3
  178. package/dist/types/index.d.ts +5 -4
  179. package/dist/types/language.d.ts +4 -4
  180. package/dist/types/openai/base-url.d.ts +14 -0
  181. package/dist/types/processes.d.ts +2 -2
  182. package/dist/types/prompts/cli.d.ts +3 -3
  183. package/dist/types/prompts/file.d.ts +2 -2
  184. package/dist/types/prompts/index.d.ts +6 -6
  185. package/dist/types/prompts/link-summary.d.ts +3 -3
  186. package/dist/types/prompts/summary-lengths.d.ts +1 -1
  187. package/dist/types/transcription/onnx-cli.d.ts +3 -3
  188. package/dist/types/transcription/whisper/core.d.ts +6 -3
  189. package/dist/types/transcription/whisper/groq.d.ts +2 -0
  190. package/dist/types/transcription/whisper/openai.d.ts +6 -1
  191. package/dist/types/transcription/whisper/types.d.ts +1 -1
  192. package/dist/types/transcription/whisper/whisper-cpp.d.ts +1 -1
  193. package/dist/types/transcription/whisper.d.ts +5 -5
  194. package/package.json +13 -13
@@ -1,5 +1,6 @@
1
- import type { MediaCache, TranscriptCache } from '../cache/types.js';
2
- import type { CacheMode, TranscriptSource } from './types.js';
1
+ import type { MediaCache, TranscriptCache } from "../cache/types.js";
2
+ import type { TranscriptionConfig } from "../transcript/transcription-config.js";
3
+ import type { CacheMode, TranscriptSource } from "./types.js";
3
4
  export declare const ProgressKind: {
4
5
  readonly FetchHtmlStart: "fetch-html-start";
5
6
  readonly FetchHtmlProgress: "fetch-html-progress";
@@ -18,93 +19,93 @@ export declare const ProgressKind: {
18
19
  readonly BirdStart: "bird-start";
19
20
  readonly BirdDone: "bird-done";
20
21
  };
21
- export type TranscriptionProviderHint = 'cpp' | 'onnx' | 'openai' | 'fal' | 'openai->fal' | 'unknown';
22
+ export type TranscriptionProviderHint = "cpp" | "onnx" | "groq" | "groq->openai" | "groq->fal" | "groq->openai->fal" | "openai" | "fal" | "openai->fal" | "unknown";
22
23
  /** Public progress events emitted by link preview fetchers. */
23
24
  export type LinkPreviewProgressEvent = {
24
- kind: 'fetch-html-start';
25
+ kind: "fetch-html-start";
25
26
  url: string;
26
27
  } | {
27
- kind: 'fetch-html-progress';
28
+ kind: "fetch-html-progress";
28
29
  url: string;
29
30
  downloadedBytes: number;
30
31
  totalBytes: number | null;
31
32
  } | {
32
- kind: 'fetch-html-done';
33
+ kind: "fetch-html-done";
33
34
  url: string;
34
35
  downloadedBytes: number;
35
36
  totalBytes: number | null;
36
37
  } | {
37
- kind: 'transcript-media-download-start';
38
+ kind: "transcript-media-download-start";
38
39
  url: string;
39
- service: 'youtube' | 'podcast' | 'generic';
40
+ service: "youtube" | "podcast" | "generic";
40
41
  mediaUrl: string | null;
41
- mediaKind?: 'video' | 'audio' | null;
42
+ mediaKind?: "video" | "audio" | null;
42
43
  totalBytes: number | null;
43
44
  } | {
44
- kind: 'transcript-media-download-progress';
45
+ kind: "transcript-media-download-progress";
45
46
  url: string;
46
- service: 'youtube' | 'podcast' | 'generic';
47
+ service: "youtube" | "podcast" | "generic";
47
48
  downloadedBytes: number;
48
49
  totalBytes: number | null;
49
- mediaKind?: 'video' | 'audio' | null;
50
+ mediaKind?: "video" | "audio" | null;
50
51
  } | {
51
- kind: 'transcript-media-download-done';
52
+ kind: "transcript-media-download-done";
52
53
  url: string;
53
- service: 'youtube' | 'podcast' | 'generic';
54
+ service: "youtube" | "podcast" | "generic";
54
55
  downloadedBytes: number;
55
56
  totalBytes: number | null;
56
- mediaKind?: 'video' | 'audio' | null;
57
+ mediaKind?: "video" | "audio" | null;
57
58
  } | {
58
- kind: 'transcript-whisper-start';
59
+ kind: "transcript-whisper-start";
59
60
  url: string;
60
- service: 'youtube' | 'podcast' | 'generic';
61
+ service: "youtube" | "podcast" | "generic";
61
62
  providerHint: TranscriptionProviderHint;
62
63
  modelId: string | null;
63
64
  totalDurationSeconds: number | null;
64
65
  parts: number | null;
65
66
  } | {
66
- kind: 'transcript-whisper-progress';
67
+ kind: "transcript-whisper-progress";
67
68
  url: string;
68
- service: 'youtube' | 'podcast' | 'generic';
69
+ service: "youtube" | "podcast" | "generic";
69
70
  processedDurationSeconds: number | null;
70
71
  totalDurationSeconds: number | null;
71
72
  partIndex: number | null;
72
73
  parts: number | null;
73
74
  } | {
74
- kind: 'transcript-start';
75
+ kind: "transcript-start";
75
76
  url: string;
76
- service: 'youtube' | 'podcast' | 'generic';
77
+ service: "youtube" | "podcast" | "generic";
77
78
  hint: string | null;
78
79
  } | {
79
- kind: 'transcript-done';
80
+ kind: "transcript-done";
80
81
  url: string;
81
82
  ok: boolean;
82
- service: 'youtube' | 'podcast' | 'generic';
83
+ service: "youtube" | "podcast" | "generic";
83
84
  source: TranscriptSource | null;
84
85
  hint: string | null;
85
86
  } | {
86
- kind: 'firecrawl-start';
87
+ kind: "firecrawl-start";
87
88
  url: string;
88
89
  reason: string;
89
90
  } | {
90
- kind: 'firecrawl-done';
91
+ kind: "firecrawl-done";
91
92
  url: string;
92
93
  ok: boolean;
93
94
  markdownBytes: number | null;
94
95
  htmlBytes: number | null;
95
96
  } | {
96
- kind: 'nitter-start';
97
+ kind: "nitter-start";
97
98
  url: string;
98
99
  } | {
99
- kind: 'nitter-done';
100
+ kind: "nitter-done";
100
101
  url: string;
101
102
  ok: boolean;
102
103
  textBytes: number | null;
103
104
  } | {
104
- kind: 'bird-start';
105
+ kind: "bird-start";
105
106
  url: string;
106
107
  } | {
107
- kind: 'bird-done';
108
+ kind: "bird-done";
108
109
  url: string;
109
110
  ok: boolean;
110
111
  textBytes: number | null;
@@ -126,10 +127,10 @@ export type ConvertHtmlToMarkdown = (args: {
126
127
  timeoutMs: number;
127
128
  }) => Promise<string>;
128
129
  export type BirdTweetMedia = {
129
- kind: 'video' | 'audio';
130
+ kind: "video" | "audio";
130
131
  urls: string[];
131
132
  preferredUrl: string | null;
132
- source: 'extended_entities' | 'card' | 'entities';
133
+ source: "extended_entities" | "card" | "entities";
133
134
  };
134
135
  export type BirdTweetPayload = {
135
136
  id?: string;
@@ -160,8 +161,10 @@ export interface LinkPreviewDeps {
160
161
  scrapeWithFirecrawl: ScrapeWithFirecrawl | null;
161
162
  apifyApiToken: string | null;
162
163
  ytDlpPath: string | null;
163
- falApiKey: string | null;
164
- openaiApiKey: string | null;
164
+ transcription?: TranscriptionConfig | null;
165
+ falApiKey?: string | null;
166
+ groqApiKey?: string | null;
167
+ openaiApiKey?: string | null;
165
168
  convertHtmlToMarkdown: ConvertHtmlToMarkdown | null;
166
169
  transcriptCache: TranscriptCache | null;
167
170
  mediaCache?: MediaCache | null;
@@ -1,4 +1,4 @@
1
- export type TranscriptSource = 'youtubei' | 'captionTracks' | 'embedded' | 'yt-dlp' | 'podcastTranscript' | 'whisper' | 'apify' | 'html' | 'unavailable' | 'unknown';
1
+ export type TranscriptSource = "youtubei" | "captionTracks" | "embedded" | "yt-dlp" | "podcastTranscript" | "whisper" | "apify" | "html" | "unavailable" | "unknown";
2
2
  export type TranscriptSegment = {
3
3
  startMs: number;
4
4
  endMs?: number | null;
@@ -6,7 +6,7 @@ export type TranscriptSegment = {
6
6
  };
7
7
  export declare const CACHE_MODES: readonly ["default", "bypass"];
8
8
  export type CacheMode = (typeof CACHE_MODES)[number];
9
- export type CacheStatus = 'hit' | 'miss' | 'expired' | 'bypassed' | 'fallback' | 'unknown';
9
+ export type CacheStatus = "hit" | "miss" | "expired" | "bypassed" | "fallback" | "unknown";
10
10
  export interface TranscriptDiagnostics {
11
11
  cacheMode: CacheMode;
12
12
  cacheStatus: CacheStatus;
@@ -25,11 +25,11 @@ export interface FirecrawlDiagnostics {
25
25
  export interface MarkdownDiagnostics {
26
26
  requested: boolean;
27
27
  used: boolean;
28
- provider: 'firecrawl' | 'llm' | null;
28
+ provider: "firecrawl" | "llm" | null;
29
29
  notes?: string | null;
30
30
  }
31
31
  export interface ContentFetchDiagnostics {
32
- strategy: 'bird' | 'firecrawl' | 'html' | 'nitter';
32
+ strategy: "bird" | "firecrawl" | "html" | "nitter";
33
33
  firecrawl: FirecrawlDiagnostics;
34
34
  markdown: MarkdownDiagnostics;
35
35
  transcript: TranscriptDiagnostics;
@@ -1,8 +1,8 @@
1
- import type { TranscriptCache } from '../cache/types.js';
2
- import type { CacheMode, TranscriptDiagnostics, TranscriptResolution, TranscriptSource } from '../link-preview/types.js';
1
+ import type { TranscriptCache } from "../cache/types.js";
2
+ import type { CacheMode, TranscriptDiagnostics, TranscriptResolution, TranscriptSource } from "../link-preview/types.js";
3
3
  export declare const DEFAULT_TTL_MS: number;
4
4
  export declare const NEGATIVE_TTL_MS: number;
5
- type CacheDiagnostics = Pick<TranscriptDiagnostics, 'cacheStatus' | 'notes' | 'provider' | 'textProvided' | 'cacheMode' | 'attemptedProviders'>;
5
+ type CacheDiagnostics = Pick<TranscriptDiagnostics, "cacheStatus" | "notes" | "provider" | "textProvided" | "cacheMode" | "attemptedProviders">;
6
6
  export interface CacheReadArguments {
7
7
  url: string;
8
8
  cacheMode: CacheMode;
@@ -11,7 +11,7 @@ export interface CacheReadArguments {
11
11
  fileMtime?: number | null;
12
12
  }
13
13
  export interface TranscriptCacheLookup {
14
- cached: Awaited<ReturnType<TranscriptCache['get']>> | null;
14
+ cached: Awaited<ReturnType<TranscriptCache["get"]>> | null;
15
15
  resolution: TranscriptResolution | null;
16
16
  diagnostics: CacheDiagnostics;
17
17
  }
@@ -1,11 +1,11 @@
1
- import type { LinkPreviewDeps } from '../link-preview/deps.js';
2
- import type { CacheMode, TranscriptResolution } from '../link-preview/types.js';
3
- import type { ProviderFetchOptions } from './types.js';
1
+ import type { LinkPreviewDeps } from "../link-preview/deps.js";
2
+ import type { CacheMode, TranscriptResolution } from "../link-preview/types.js";
3
+ import type { ProviderFetchOptions } from "./types.js";
4
4
  interface ResolveTranscriptOptions {
5
- youtubeTranscriptMode?: ProviderFetchOptions['youtubeTranscriptMode'];
6
- mediaTranscriptMode?: ProviderFetchOptions['mediaTranscriptMode'];
7
- mediaKindHint?: ProviderFetchOptions['mediaKindHint'];
8
- transcriptTimestamps?: ProviderFetchOptions['transcriptTimestamps'];
5
+ youtubeTranscriptMode?: ProviderFetchOptions["youtubeTranscriptMode"];
6
+ mediaTranscriptMode?: ProviderFetchOptions["mediaTranscriptMode"];
7
+ mediaKindHint?: ProviderFetchOptions["mediaKindHint"];
8
+ transcriptTimestamps?: ProviderFetchOptions["transcriptTimestamps"];
9
9
  cacheMode?: CacheMode;
10
10
  fileMtime?: number | null;
11
11
  }
@@ -1,4 +1,4 @@
1
- import type { TranscriptSegment } from '../link-preview/types.js';
1
+ import type { TranscriptSegment } from "../link-preview/types.js";
2
2
  export type TranscriptParseResult = {
3
3
  text: string | null;
4
4
  segments: TranscriptSegment[] | null;
@@ -1,3 +1,3 @@
1
- import type { ProviderContext, ProviderFetchOptions, ProviderResult } from '../types.js';
1
+ import type { ProviderContext, ProviderFetchOptions, ProviderResult } from "../types.js";
2
2
  export declare const canHandle: () => boolean;
3
3
  export declare const fetchTranscript: (context: ProviderContext, options: ProviderFetchOptions) => Promise<ProviderResult>;
@@ -1,4 +1,4 @@
1
- import type { ProviderResult } from '../../types.js';
2
- import type { PodcastFlowContext } from './flow-context.js';
1
+ import type { ProviderResult } from "../../types.js";
2
+ import type { PodcastFlowContext } from "./flow-context.js";
3
3
  export declare function fetchAppleTranscriptFromItunesLookup(flow: PodcastFlowContext): Promise<ProviderResult | null>;
4
4
  export declare function fetchAppleTranscriptFromEmbeddedHtml(flow: PodcastFlowContext): Promise<ProviderResult | null>;
@@ -1,11 +1,11 @@
1
- import type { ProviderContext, ProviderFetchOptions, ProviderResult } from '../../types.js';
2
- import type { TranscribeRequest, TranscriptionResult } from './media.js';
1
+ import type { ProviderContext, ProviderFetchOptions, ProviderResult } from "../../types.js";
2
+ import type { TranscribeRequest, TranscriptionResult } from "./media.js";
3
3
  export type PodcastFlowContext = {
4
4
  context: ProviderContext;
5
5
  options: ProviderFetchOptions;
6
- attemptedProviders: ProviderResult['attemptedProviders'];
6
+ attemptedProviders: ProviderResult["attemptedProviders"];
7
7
  notes: string[];
8
- pushOnce: (provider: ProviderResult['attemptedProviders'][number]) => void;
8
+ pushOnce: (provider: ProviderResult["attemptedProviders"][number]) => void;
9
9
  ensureTranscriptionProvider: () => ProviderResult | null;
10
10
  transcribe: (request: TranscribeRequest) => Promise<TranscriptionResult>;
11
11
  };
@@ -1,4 +1,5 @@
1
- import type { ProviderFetchOptions } from '../../types.js';
1
+ import type { ProviderFetchOptions } from "../../types.js";
2
+ import { type TranscriptionConfig } from "../../transcription-config.js";
2
3
  export type TranscribeRequest = {
3
4
  url: string;
4
5
  filenameHint: string;
@@ -9,19 +10,21 @@ export type TranscriptionResult = {
9
10
  provider: string | null;
10
11
  error: Error | null;
11
12
  };
12
- export declare function transcribeMediaUrl({ fetchImpl, env, url, filenameHint, durationSecondsHint, openaiApiKey, falApiKey, notes, progress, }: {
13
+ export declare function transcribeMediaUrl({ fetchImpl, transcription, env, url, filenameHint, durationSecondsHint, groqApiKey, openaiApiKey, falApiKey, notes, progress, }: {
13
14
  fetchImpl: typeof fetch;
15
+ transcription?: Partial<TranscriptionConfig> | null;
14
16
  env?: Record<string, string | undefined>;
15
17
  url: string;
16
18
  filenameHint: string;
17
19
  durationSecondsHint: number | null;
18
- openaiApiKey: string | null;
19
- falApiKey: string | null;
20
+ groqApiKey?: string | null;
21
+ openaiApiKey?: string | null;
22
+ falApiKey?: string | null;
20
23
  notes: string[];
21
24
  progress: {
22
25
  url: string;
23
- service: 'podcast';
24
- onProgress: ProviderFetchOptions['onProgress'] | null;
26
+ service: "podcast";
27
+ onProgress: ProviderFetchOptions["onProgress"] | null;
25
28
  } | null;
26
29
  }): Promise<TranscriptionResult>;
27
30
  export declare function probeRemoteMedia(fetchImpl: typeof fetch, url: string): Promise<{
@@ -1,8 +1,8 @@
1
- import type { ProviderResult } from '../../types.js';
2
- import type { TranscriptionResult } from './media.js';
1
+ import type { ProviderResult } from "../../types.js";
2
+ import type { TranscriptionResult } from "./media.js";
3
3
  export declare function joinNotes(notes: string[]): string | null;
4
4
  export declare function buildWhisperResult({ attemptedProviders, notes, outcome, metadata, includeProviderOnFailure, }: {
5
- attemptedProviders: ProviderResult['attemptedProviders'];
5
+ attemptedProviders: ProviderResult["attemptedProviders"];
6
6
  notes: string[];
7
7
  outcome: TranscriptionResult;
8
8
  metadata: Record<string, unknown>;
@@ -1,4 +1,4 @@
1
- import type { TranscriptSegment } from '../../../link-preview/types.js';
1
+ import type { TranscriptSegment } from "../../../link-preview/types.js";
2
2
  export declare function looksLikeRssOrAtomFeed(xml: string): boolean;
3
3
  export declare function extractEnclosureFromFeed(xml: string): {
4
4
  enclosureUrl: string;
@@ -1,3 +1,3 @@
1
- import type { ProviderResult } from '../../types.js';
2
- import type { PodcastFlowContext } from './flow-context.js';
1
+ import type { ProviderResult } from "../../types.js";
2
+ import type { PodcastFlowContext } from "./flow-context.js";
3
3
  export declare function fetchSpotifyTranscript(flow: PodcastFlowContext): Promise<ProviderResult | null>;
@@ -11,7 +11,7 @@ export declare function fetchSpotifyEmbedHtml({ embedUrl, episodeId, fetchImpl,
11
11
  episodeId: string;
12
12
  fetchImpl: typeof fetch;
13
13
  scrapeWithFirecrawl: ((url: string, options?: {
14
- cacheMode?: 'default' | 'bypass';
14
+ cacheMode?: "default" | "bypass";
15
15
  timeoutMs?: number;
16
16
  }) => Promise<{
17
17
  html?: string | null;
@@ -19,6 +19,6 @@ export declare function fetchSpotifyEmbedHtml({ embedUrl, episodeId, fetchImpl,
19
19
  } | null>) | null;
20
20
  }): Promise<{
21
21
  html: string;
22
- via: 'fetch' | 'firecrawl';
22
+ via: "fetch" | "firecrawl";
23
23
  }>;
24
24
  export declare function looksLikeBlockedHtml(html: string): boolean;
@@ -1,8 +1,8 @@
1
- import type { ProviderContext, ProviderFetchOptions, ProviderResult } from '../types.js';
2
- import { resolvePodcastFeedUrlFromItunesSearch } from './podcast/itunes.js';
3
- import { downloadCappedBytes, downloadToFile, filenameFromUrl, formatBytes, normalizeHeaderType, parseContentLength, probeRemoteMedia } from './podcast/media.js';
4
- import { extractEnclosureForEpisode, extractItemDurationSeconds } from './podcast/rss.js';
5
- import { looksLikeBlockedHtml } from './podcast/spotify.js';
1
+ import type { ProviderContext, ProviderFetchOptions, ProviderResult } from "../types.js";
2
+ import { resolvePodcastFeedUrlFromItunesSearch } from "./podcast/itunes.js";
3
+ import { downloadCappedBytes, downloadToFile, filenameFromUrl, formatBytes, normalizeHeaderType, parseContentLength, probeRemoteMedia } from "./podcast/media.js";
4
+ import { extractEnclosureForEpisode, extractItemDurationSeconds } from "./podcast/rss.js";
5
+ import { looksLikeBlockedHtml } from "./podcast/spotify.js";
6
6
  export declare const canHandle: ({ url, html }: ProviderContext) => boolean;
7
7
  export declare const fetchTranscript: (context: ProviderContext, options: ProviderFetchOptions) => Promise<ProviderResult>;
8
8
  export declare const __test__: {
@@ -1,23 +1,29 @@
1
- import { resolvePreferredOnnxModel } from '../../../transcription/onnx-cli.js';
2
- import type { TranscriptionProviderHint } from '../../link-preview/deps.js';
1
+ import type { TranscriptionProviderHint } from "../../link-preview/deps.js";
2
+ import { resolvePreferredOnnxModel } from "../../../transcription/onnx-cli.js";
3
+ import { type TranscriptionConfig } from "../transcription-config.js";
3
4
  type Env = Record<string, string | undefined>;
4
5
  export type TranscriptionAvailability = {
5
6
  preferredOnnxModel: ReturnType<typeof resolvePreferredOnnxModel>;
6
7
  onnxReady: boolean;
7
8
  hasLocalWhisper: boolean;
9
+ hasGroq: boolean;
8
10
  hasOpenai: boolean;
9
11
  hasFal: boolean;
10
12
  hasAnyProvider: boolean;
11
13
  };
12
- export declare function resolveTranscriptionAvailability({ env, openaiApiKey, falApiKey, }: {
14
+ export declare function resolveTranscriptionAvailability({ env, transcription, groqApiKey, openaiApiKey, falApiKey, }: {
13
15
  env?: Env;
14
- openaiApiKey: string | null;
15
- falApiKey: string | null;
16
+ transcription?: Partial<TranscriptionConfig> | null;
17
+ groqApiKey?: string | null;
18
+ openaiApiKey?: string | null;
19
+ falApiKey?: string | null;
16
20
  }): Promise<TranscriptionAvailability>;
17
- export declare function resolveTranscriptionStartInfo({ env, openaiApiKey, falApiKey, }: {
21
+ export declare function resolveTranscriptionStartInfo({ env, transcription, groqApiKey, openaiApiKey, falApiKey, }: {
18
22
  env?: Env;
19
- openaiApiKey: string | null;
20
- falApiKey: string | null;
23
+ transcription?: Partial<TranscriptionConfig> | null;
24
+ groqApiKey?: string | null;
25
+ openaiApiKey?: string | null;
26
+ falApiKey?: string | null;
21
27
  }): Promise<{
22
28
  availability: TranscriptionAvailability;
23
29
  providerHint: TranscriptionProviderHint;
@@ -1,4 +1,4 @@
1
- import type { TranscriptSegment } from '../../../link-preview/types.js';
1
+ import type { TranscriptSegment } from "../../../link-preview/types.js";
2
2
  export interface YoutubeTranscriptConfig {
3
3
  apiKey: string;
4
4
  context: Record<string, unknown>;
@@ -1,4 +1,4 @@
1
- import type { TranscriptSegment } from '../../../link-preview/types.js';
1
+ import type { TranscriptSegment } from "../../../link-preview/types.js";
2
2
  interface YoutubeTranscriptContext {
3
3
  html: string;
4
4
  originalUrl: string;
@@ -1,6 +1,7 @@
1
- import { type TranscriptionProvider } from '../../../../transcription/whisper.js';
2
- import type { MediaCache } from '../../../cache/types.js';
3
- import type { LinkPreviewProgressEvent } from '../../../link-preview/deps.js';
1
+ import type { MediaCache } from "../../../cache/types.js";
2
+ import type { LinkPreviewProgressEvent } from "../../../link-preview/deps.js";
3
+ import { type TranscriptionProvider } from "../../../../transcription/whisper.js";
4
+ import { type TranscriptionConfig } from "../../transcription-config.js";
4
5
  type YtDlpTranscriptResult = {
5
6
  text: string | null;
6
7
  provider: TranscriptionProvider | null;
@@ -9,13 +10,15 @@ type YtDlpTranscriptResult = {
9
10
  };
10
11
  type YtDlpRequest = {
11
12
  ytDlpPath: string | null;
13
+ transcription?: Partial<TranscriptionConfig> | null;
12
14
  env?: Record<string, string | undefined>;
13
- openaiApiKey: string | null;
14
- falApiKey: string | null;
15
+ groqApiKey?: string | null;
16
+ openaiApiKey?: string | null;
17
+ falApiKey?: string | null;
15
18
  url: string;
16
19
  onProgress?: ((event: LinkPreviewProgressEvent) => void) | null;
17
- service?: 'youtube' | 'podcast' | 'generic';
18
- mediaKind?: 'video' | 'audio' | null;
20
+ service?: "youtube" | "podcast" | "generic";
21
+ mediaKind?: "video" | "audio" | null;
19
22
  mediaCache?: MediaCache | null;
20
23
  extraArgs?: string[];
21
24
  };
@@ -23,6 +26,6 @@ type YtDlpDurationRequest = {
23
26
  ytDlpPath: string | null;
24
27
  url: string;
25
28
  };
26
- export declare const fetchTranscriptWithYtDlp: ({ ytDlpPath, env, openaiApiKey, falApiKey, url, onProgress, service, mediaKind, mediaCache, extraArgs, }: YtDlpRequest) => Promise<YtDlpTranscriptResult>;
29
+ export declare const fetchTranscriptWithYtDlp: ({ ytDlpPath, transcription, env, groqApiKey, openaiApiKey, falApiKey, url, onProgress, service, mediaKind, mediaCache, extraArgs, }: YtDlpRequest) => Promise<YtDlpTranscriptResult>;
27
30
  export declare const fetchDurationSecondsWithYtDlp: ({ ytDlpPath, url, }: YtDlpDurationRequest) => Promise<number | null>;
28
31
  export {};
@@ -1,3 +1,3 @@
1
- import type { ProviderContext, ProviderFetchOptions, ProviderResult } from '../types.js';
1
+ import type { ProviderContext, ProviderFetchOptions, ProviderResult } from "../types.js";
2
2
  export declare const canHandle: ({ url }: ProviderContext) => boolean;
3
3
  export declare const fetchTranscript: (context: ProviderContext, options: ProviderFetchOptions) => Promise<ProviderResult>;
@@ -1,4 +1,4 @@
1
- import type { TranscriptSegment } from '../link-preview/types.js';
1
+ import type { TranscriptSegment } from "../link-preview/types.js";
2
2
  export declare function formatTimestampMs(ms: number): string;
3
3
  export declare function parseTimestampStringToMs(value: string): number | null;
4
4
  export declare function parseTimestampToMs(value: unknown, assumeSeconds?: boolean): number | null;
@@ -0,0 +1,15 @@
1
+ export type TranscriptionConfig = {
2
+ env?: Record<string, string | undefined>;
3
+ groqApiKey: string | null;
4
+ openaiApiKey: string | null;
5
+ falApiKey: string | null;
6
+ };
7
+ type TranscriptionConfigInput = {
8
+ env?: Record<string, string | undefined>;
9
+ transcription?: Partial<TranscriptionConfig> | null;
10
+ groqApiKey?: string | null;
11
+ openaiApiKey?: string | null;
12
+ falApiKey?: string | null;
13
+ };
14
+ export declare function resolveTranscriptionConfig(input: TranscriptionConfigInput): TranscriptionConfig;
15
+ export {};
@@ -1,8 +1,9 @@
1
- import type { MediaCache } from '../cache/types.js';
2
- import type { MediaTranscriptMode, YoutubeTranscriptMode } from '../link-preview/content/types.js';
3
- import type { LinkPreviewProgressEvent, ResolveTwitterCookies, ScrapeWithFirecrawl } from '../link-preview/deps.js';
4
- import type { TranscriptResolution, TranscriptSource } from '../link-preview/types.js';
5
- export type TranscriptService = 'youtube' | 'podcast' | 'generic';
1
+ import type { MediaCache } from "../cache/types.js";
2
+ import type { MediaTranscriptMode, YoutubeTranscriptMode } from "../link-preview/content/types.js";
3
+ import type { LinkPreviewProgressEvent, ResolveTwitterCookies, ScrapeWithFirecrawl } from "../link-preview/deps.js";
4
+ import type { TranscriptResolution, TranscriptSource } from "../link-preview/types.js";
5
+ import type { TranscriptionConfig } from "./transcription-config.js";
6
+ export type TranscriptService = "youtube" | "podcast" | "generic";
6
7
  export interface ProviderContext {
7
8
  url: string;
8
9
  html: string | null;
@@ -15,11 +16,13 @@ export interface ProviderFetchOptions {
15
16
  apifyApiToken: string | null;
16
17
  youtubeTranscriptMode: YoutubeTranscriptMode;
17
18
  mediaTranscriptMode: MediaTranscriptMode;
18
- mediaKindHint?: 'video' | 'audio' | null;
19
+ mediaKindHint?: "video" | "audio" | null;
19
20
  transcriptTimestamps?: boolean;
20
21
  ytDlpPath: string | null;
21
- falApiKey: string | null;
22
- openaiApiKey: string | null;
22
+ transcription?: TranscriptionConfig;
23
+ falApiKey?: string | null;
24
+ groqApiKey?: string | null;
25
+ openaiApiKey?: string | null;
23
26
  mediaCache?: MediaCache | null;
24
27
  resolveTwitterCookies?: ResolveTwitterCookies | null;
25
28
  onProgress?: ((event: LinkPreviewProgressEvent) => void) | null;
@@ -34,4 +37,4 @@ export interface ProviderModule {
34
37
  canHandle(context: ProviderContext): boolean;
35
38
  fetchTranscript(context: ProviderContext, options: ProviderFetchOptions): Promise<ProviderResult>;
36
39
  }
37
- export type { TranscriptSource } from '../link-preview/types.js';
40
+ export type { TranscriptSource } from "../link-preview/types.js";
@@ -1,4 +1,4 @@
1
- export { extractYouTubeVideoId, isYouTubeUrl, isYouTubeVideoUrl } from '../url.js';
1
+ export { extractYouTubeVideoId, isYouTubeUrl, isYouTubeVideoUrl } from "../url.js";
2
2
  export declare function isRecord(value: unknown): value is Record<string, unknown>;
3
3
  export declare function extractEmbeddedYouTubeUrlFromHtml(html: string, maxTextChars?: number, maxReadabilityChars?: number): Promise<string | null>;
4
4
  export declare function sanitizeYoutubeJsonResponse(input: string): string;
@@ -1,8 +1,10 @@
1
- import { isPodcastHost } from './link-preview/content/podcast-utils.js';
2
- import { isTwitterStatusUrl } from './link-preview/content/twitter-utils.js';
1
+ import { isPodcastHost } from "./link-preview/content/podcast-utils.js";
2
+ import { isTwitterBroadcastUrl, isTwitterStatusUrl } from "./link-preview/content/twitter-utils.js";
3
3
  export declare const isYouTubeUrl: (rawUrl: string) => boolean;
4
+ export declare const DIRECT_MEDIA_EXTENSIONS: readonly ["mp4", "mov", "m4v", "mkv", "webm", "mpeg", "mpg", "avi", "wmv", "flv", "mp3", "m4a", "wav", "flac", "aac", "ogg", "opus", "aiff", "wma"];
4
5
  export declare function isYouTubeVideoUrl(rawUrl: string): boolean;
5
6
  export declare function extractYouTubeVideoId(rawUrl: string): string | null;
6
7
  export declare function isDirectMediaUrl(url: string): boolean;
8
+ export declare function isDirectMediaExtension(ext: string): boolean;
7
9
  export declare function shouldPreferUrlMode(url: string): boolean;
8
- export { isTwitterStatusUrl, isPodcastHost };
10
+ export { isTwitterBroadcastUrl, isTwitterStatusUrl, isPodcastHost };
@@ -1,4 +1,5 @@
1
- export * from './content/index.js';
2
- export * from './prompts/index.js';
3
- export type { SummaryLength } from './shared/contracts.js';
4
- export { SUMMARY_LENGTHS } from './shared/contracts.js';
1
+ export * from "./content/index.js";
2
+ export { isOpenRouterBaseUrl, normalizeBaseUrl, resolveConfiguredBaseUrl, resolveOpenAiWhisperBaseUrl, } from "./openai/base-url.js";
3
+ export * from "./prompts/index.js";
4
+ export type { SummaryLength } from "./shared/contracts.js";
5
+ export { SUMMARY_LENGTHS } from "./shared/contracts.js";
@@ -1,7 +1,7 @@
1
1
  export type OutputLanguage = {
2
- kind: 'auto';
2
+ kind: "auto";
3
3
  } | {
4
- kind: 'fixed';
4
+ kind: "fixed";
5
5
  /**
6
6
  * BCP-47-ish language tag (e.g. "en", "de", "en-US").
7
7
  *
@@ -17,9 +17,9 @@ export declare function parseOutputLanguage(raw: string): OutputLanguage;
17
17
  export declare function resolveOutputLanguage(raw: string | null | undefined): OutputLanguage;
18
18
  export declare function formatOutputLanguageInstruction(language: OutputLanguage): string;
19
19
  export declare function formatOutputLanguageForJson(language: OutputLanguage): {
20
- mode: 'auto';
20
+ mode: "auto";
21
21
  } | {
22
- mode: 'fixed';
22
+ mode: "fixed";
23
23
  tag: string;
24
24
  label: string;
25
25
  };
@@ -0,0 +1,14 @@
1
+ type Env = Record<string, string | undefined>;
2
+ export declare function normalizeBaseUrl(raw: string | null | undefined): string | null;
3
+ export declare function isOpenRouterBaseUrl(baseUrl: string): boolean;
4
+ export declare function resolveConfiguredBaseUrl({ envValue, configValue, }: {
5
+ envValue: string | null | undefined;
6
+ configValue: string | null | undefined;
7
+ }): string | null;
8
+ export declare function resolveOpenAiWhisperBaseUrl({ explicitBaseUrl, env, allowOpenRouterFallback, defaultBaseUrl, }: {
9
+ explicitBaseUrl?: string | null;
10
+ env?: Env;
11
+ allowOpenRouterFallback?: boolean;
12
+ defaultBaseUrl?: string;
13
+ }): string;
14
+ export {};
@@ -1,4 +1,4 @@
1
- import type { ChildProcess, ExecFileException, ExecFileOptions, SpawnOptions } from 'node:child_process';
1
+ import type { ChildProcess, ExecFileException, ExecFileOptions, SpawnOptions } from "node:child_process";
2
2
  export type ProcessContext = {
3
3
  runId?: string | null;
4
4
  source?: string | null;
@@ -17,7 +17,7 @@ export type ProcessRegistration = {
17
17
  export type ProcessHandle = {
18
18
  id: string;
19
19
  setPid: (pid: number | null) => void;
20
- appendOutput: (stream: 'stdout' | 'stderr', line: string) => void;
20
+ appendOutput: (stream: "stdout" | "stderr", line: string) => void;
21
21
  setProgress: (progress: number | null, detail?: string | null) => void;
22
22
  setStatus: (text: string | null) => void;
23
23
  finish: (result: {