@steipete/summarize-core 0.11.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/dist/esm/content/bun.js +21 -0
  2. package/dist/esm/content/bun.js.map +1 -0
  3. package/dist/esm/content/direct-media.js +100 -0
  4. package/dist/esm/content/direct-media.js.map +1 -0
  5. package/dist/esm/content/index.js +2 -1
  6. package/dist/esm/content/index.js.map +1 -1
  7. package/dist/esm/content/link-preview/client.js +6 -0
  8. package/dist/esm/content/link-preview/client.js.map +1 -1
  9. package/dist/esm/content/link-preview/content/fetcher.js +19 -2
  10. package/dist/esm/content/link-preview/content/fetcher.js.map +1 -1
  11. package/dist/esm/content/link-preview/content/firecrawl.js.map +1 -1
  12. package/dist/esm/content/link-preview/content/html.js.map +1 -1
  13. package/dist/esm/content/link-preview/content/index.js +29 -12
  14. package/dist/esm/content/link-preview/content/index.js.map +1 -1
  15. package/dist/esm/content/link-preview/content/utils.js.map +1 -1
  16. package/dist/esm/content/link-preview/content/video.js +1 -1
  17. package/dist/esm/content/link-preview/content/video.js.map +1 -1
  18. package/dist/esm/content/local-file.js +58 -0
  19. package/dist/esm/content/local-file.js.map +1 -0
  20. package/dist/esm/content/transcript/index.js +2 -0
  21. package/dist/esm/content/transcript/index.js.map +1 -1
  22. package/dist/esm/content/transcript/providers/generic-direct-media.js +47 -0
  23. package/dist/esm/content/transcript/providers/generic-direct-media.js.map +1 -0
  24. package/dist/esm/content/transcript/providers/generic-embedded.js +126 -0
  25. package/dist/esm/content/transcript/providers/generic-embedded.js.map +1 -0
  26. package/dist/esm/content/transcript/providers/generic-twitter.js +78 -0
  27. package/dist/esm/content/transcript/providers/generic-twitter.js.map +1 -0
  28. package/dist/esm/content/transcript/providers/generic.js +12 -248
  29. package/dist/esm/content/transcript/providers/generic.js.map +1 -1
  30. package/dist/esm/content/transcript/providers/podcast/apple-flow.js.map +1 -1
  31. package/dist/esm/content/transcript/providers/podcast/media.js +9 -1
  32. package/dist/esm/content/transcript/providers/podcast/media.js.map +1 -1
  33. package/dist/esm/content/transcript/providers/podcast/provider-flow.js +157 -0
  34. package/dist/esm/content/transcript/providers/podcast/provider-flow.js.map +1 -0
  35. package/dist/esm/content/transcript/providers/podcast/rss-feed.js +123 -0
  36. package/dist/esm/content/transcript/providers/podcast/rss-feed.js.map +1 -0
  37. package/dist/esm/content/transcript/providers/podcast/rss-transcript.js +113 -0
  38. package/dist/esm/content/transcript/providers/podcast/rss-transcript.js.map +1 -0
  39. package/dist/esm/content/transcript/providers/podcast/rss.js +2 -226
  40. package/dist/esm/content/transcript/providers/podcast/rss.js.map +1 -1
  41. package/dist/esm/content/transcript/providers/podcast/spotify-flow.js.map +1 -1
  42. package/dist/esm/content/transcript/providers/podcast.js +26 -155
  43. package/dist/esm/content/transcript/providers/podcast.js.map +1 -1
  44. package/dist/esm/content/transcript/providers/transcription-capability.js +22 -0
  45. package/dist/esm/content/transcript/providers/transcription-capability.js.map +1 -0
  46. package/dist/esm/content/transcript/providers/transcription-start.js +43 -32
  47. package/dist/esm/content/transcript/providers/transcription-start.js.map +1 -1
  48. package/dist/esm/content/transcript/providers/youtube/api.js +3 -2
  49. package/dist/esm/content/transcript/providers/youtube/api.js.map +1 -1
  50. package/dist/esm/content/transcript/providers/youtube/captions-player.js +173 -0
  51. package/dist/esm/content/transcript/providers/youtube/captions-player.js.map +1 -0
  52. package/dist/esm/content/transcript/providers/youtube/captions-shared.js +8 -0
  53. package/dist/esm/content/transcript/providers/youtube/captions-shared.js.map +1 -0
  54. package/dist/esm/content/transcript/providers/youtube/captions-transcript.js +361 -0
  55. package/dist/esm/content/transcript/providers/youtube/captions-transcript.js.map +1 -0
  56. package/dist/esm/content/transcript/providers/youtube/captions.js +2 -557
  57. package/dist/esm/content/transcript/providers/youtube/captions.js.map +1 -1
  58. package/dist/esm/content/transcript/providers/youtube/provider-flow.js +217 -0
  59. package/dist/esm/content/transcript/providers/youtube/provider-flow.js.map +1 -0
  60. package/dist/esm/content/transcript/providers/youtube/yt-dlp.js +33 -9
  61. package/dist/esm/content/transcript/providers/youtube/yt-dlp.js.map +1 -1
  62. package/dist/esm/content/transcript/providers/youtube.js +42 -194
  63. package/dist/esm/content/transcript/providers/youtube.js.map +1 -1
  64. package/dist/esm/content/transcript/transcription-config.js +24 -4
  65. package/dist/esm/content/transcript/transcription-config.js.map +1 -1
  66. package/dist/esm/content/url.js +5 -33
  67. package/dist/esm/content/url.js.map +1 -1
  68. package/dist/esm/processes.js.map +1 -1
  69. package/dist/esm/prompts/format.js +6 -0
  70. package/dist/esm/prompts/format.js.map +1 -1
  71. package/dist/esm/prompts/link-summary.js +27 -3
  72. package/dist/esm/prompts/link-summary.js.map +1 -1
  73. package/dist/esm/transcription/onnx-cli.js.map +1 -1
  74. package/dist/esm/transcription/whisper/assemblyai.js +132 -0
  75. package/dist/esm/transcription/whisper/assemblyai.js.map +1 -0
  76. package/dist/esm/transcription/whisper/chunking.js +64 -0
  77. package/dist/esm/transcription/whisper/chunking.js.map +1 -0
  78. package/dist/esm/transcription/whisper/cloud-providers.js +69 -0
  79. package/dist/esm/transcription/whisper/cloud-providers.js.map +1 -0
  80. package/dist/esm/transcription/whisper/core.js +320 -390
  81. package/dist/esm/transcription/whisper/core.js.map +1 -1
  82. package/dist/esm/transcription/whisper/gemini.js +324 -0
  83. package/dist/esm/transcription/whisper/gemini.js.map +1 -0
  84. package/dist/esm/transcription/whisper/groq.js +62 -1
  85. package/dist/esm/transcription/whisper/groq.js.map +1 -1
  86. package/dist/esm/transcription/whisper/preferences.js +16 -0
  87. package/dist/esm/transcription/whisper/preferences.js.map +1 -0
  88. package/dist/esm/transcription/whisper/provider-setup.js +62 -0
  89. package/dist/esm/transcription/whisper/provider-setup.js.map +1 -0
  90. package/dist/esm/transcription/whisper/remote-provider-attempts.js +189 -0
  91. package/dist/esm/transcription/whisper/remote-provider-attempts.js.map +1 -0
  92. package/dist/esm/transcription/whisper/remote.js +220 -0
  93. package/dist/esm/transcription/whisper/remote.js.map +1 -0
  94. package/dist/esm/transcription/whisper/whisper-cpp.js +21 -18
  95. package/dist/esm/transcription/whisper/whisper-cpp.js.map +1 -1
  96. package/dist/types/content/bun.d.ts +6 -0
  97. package/dist/types/content/direct-media.d.ts +9 -0
  98. package/dist/types/content/index.d.ts +2 -1
  99. package/dist/types/content/link-preview/client.d.ts +3 -1
  100. package/dist/types/content/link-preview/content/fetcher.d.ts +1 -1
  101. package/dist/types/content/link-preview/content/html.d.ts +1 -1
  102. package/dist/types/content/link-preview/deps.d.ts +8 -2
  103. package/dist/types/content/link-preview/types.d.ts +1 -1
  104. package/dist/types/content/local-file.d.ts +16 -0
  105. package/dist/types/content/transcript/providers/generic-direct-media.d.ts +11 -0
  106. package/dist/types/content/transcript/providers/generic-embedded.d.ts +16 -0
  107. package/dist/types/content/transcript/providers/generic-twitter.d.ts +11 -0
  108. package/dist/types/content/transcript/providers/podcast/flow-context.d.ts +3 -0
  109. package/dist/types/content/transcript/providers/podcast/media.d.ts +4 -2
  110. package/dist/types/content/transcript/providers/podcast/provider-flow.d.ts +7 -0
  111. package/dist/types/content/transcript/providers/podcast/rss-feed.d.ts +15 -0
  112. package/dist/types/content/transcript/providers/podcast/rss-transcript.d.ts +12 -0
  113. package/dist/types/content/transcript/providers/podcast/rss.d.ts +2 -24
  114. package/dist/types/content/transcript/providers/transcription-capability.d.ts +18 -0
  115. package/dist/types/content/transcript/providers/transcription-start.d.ts +11 -3
  116. package/dist/types/content/transcript/providers/youtube/captions-player.d.ts +12 -0
  117. package/dist/types/content/transcript/providers/youtube/captions-shared.d.ts +42 -0
  118. package/dist/types/content/transcript/providers/youtube/captions-transcript.d.ts +4 -0
  119. package/dist/types/content/transcript/providers/youtube/captions.d.ts +2 -19
  120. package/dist/types/content/transcript/providers/youtube/provider-flow.d.ts +34 -0
  121. package/dist/types/content/transcript/providers/youtube/yt-dlp.d.ts +4 -2
  122. package/dist/types/content/transcript/transcription-config.d.ts +6 -0
  123. package/dist/types/content/transcript/types.d.ts +1 -0
  124. package/dist/types/content/url.d.ts +2 -3
  125. package/dist/types/prompts/format.d.ts +1 -0
  126. package/dist/types/prompts/link-summary.d.ts +2 -1
  127. package/dist/types/transcription/whisper/assemblyai.d.ts +17 -0
  128. package/dist/types/transcription/whisper/chunking.d.ts +11 -0
  129. package/dist/types/transcription/whisper/cloud-providers.d.ts +22 -0
  130. package/dist/types/transcription/whisper/core.d.ts +12 -14
  131. package/dist/types/transcription/whisper/gemini.d.ts +14 -0
  132. package/dist/types/transcription/whisper/preferences.d.ts +4 -0
  133. package/dist/types/transcription/whisper/provider-setup.d.ts +30 -0
  134. package/dist/types/transcription/whisper/remote-provider-attempts.d.ts +51 -0
  135. package/dist/types/transcription/whisper/remote.d.ts +51 -0
  136. package/dist/types/transcription/whisper/types.d.ts +1 -1
  137. package/dist/types/transcription/whisper/whisper-cpp.d.ts +4 -3
  138. package/package.json +14 -10
@@ -0,0 +1,30 @@
1
+ type Env = Record<string, string | undefined>;
2
+ export declare const DEFAULT_GEMINI_TRANSCRIPTION_MODEL = "gemini-2.5-flash";
3
+ export declare const GEMINI_TRANSCRIPTION_MODEL_ENV = "SUMMARIZE_GEMINI_TRANSCRIPTION_MODEL";
4
+ export declare const TRANSCRIPTION_PROVIDER_ENV_LIST: readonly ["GROQ_API_KEY", "ASSEMBLYAI_API_KEY", "GEMINI_API_KEY", "OPENAI_API_KEY", "FAL_KEY"];
5
+ export declare const TRANSCRIPTION_PROVIDER_ENV_LABEL = "GROQ_API_KEY, ASSEMBLYAI_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, or FAL_KEY";
6
+ export declare function normalizeApiKey(raw: string | null | undefined): string | null;
7
+ export declare function resolveGeminiApiKey({ env, geminiApiKey, }: {
8
+ env?: Env;
9
+ geminiApiKey?: string | null;
10
+ }): string | null;
11
+ export declare function resolveGroqApiKey({ env, groqApiKey, }: {
12
+ env?: Env;
13
+ groqApiKey?: string | null;
14
+ }): string | null;
15
+ export declare function resolveAssemblyAiApiKey({ env, assemblyaiApiKey, }: {
16
+ env?: Env;
17
+ assemblyaiApiKey?: string | null;
18
+ }): string | null;
19
+ export declare function resolveOpenAiTranscriptionApiKey({ env, openaiApiKey, }: {
20
+ env?: Env;
21
+ openaiApiKey?: string | null;
22
+ }): string | null;
23
+ export declare function resolveFalApiKey({ env, falApiKey, }: {
24
+ env?: Env;
25
+ falApiKey?: string | null;
26
+ }): string | null;
27
+ export declare function resolveGeminiTranscriptionModel(env?: Env): string;
28
+ export declare function buildMissingTranscriptionProviderMessage(): string;
29
+ export declare function buildMissingTranscriptionProviderNote(): string;
30
+ export {};
@@ -0,0 +1,51 @@
1
+ import type { CloudProvider } from "./cloud-providers.js";
2
+ import type { WhisperProgressEvent, WhisperTranscriptionResult } from "./types.js";
3
+ type Env = Record<string, string | undefined>;
4
+ export type RemoteByteState = {
5
+ bytes: Uint8Array;
6
+ mediaType: string;
7
+ filename: string | null;
8
+ };
9
+ type RemoteByteAttemptResult = {
10
+ state: RemoteByteState;
11
+ result: WhisperTranscriptionResult | null;
12
+ error: Error | null;
13
+ skipped?: boolean;
14
+ };
15
+ type RemoteFileAttemptResult = {
16
+ kind: "result";
17
+ result: WhisperTranscriptionResult;
18
+ } | {
19
+ kind: "error";
20
+ error: Error;
21
+ } | {
22
+ kind: "delegate-to-bytes";
23
+ };
24
+ type TranscribeOversizedBytesWithChunking = (args: {
25
+ bytes: Uint8Array;
26
+ mediaType: string;
27
+ filename: string | null;
28
+ onProgress?: ((event: WhisperProgressEvent) => void) | null;
29
+ }) => Promise<WhisperTranscriptionResult>;
30
+ export declare function attemptRemoteBytesProvider(args: {
31
+ provider: CloudProvider;
32
+ state: RemoteByteState;
33
+ assemblyaiApiKey: string | null;
34
+ geminiApiKey: string | null;
35
+ openaiApiKey: string | null;
36
+ falApiKey: string | null;
37
+ env: Env;
38
+ notes: string[];
39
+ onProgress?: ((event: WhisperProgressEvent) => void) | null;
40
+ transcribeOversizedBytesWithChunking?: TranscribeOversizedBytesWithChunking;
41
+ }): Promise<RemoteByteAttemptResult>;
42
+ export declare function attemptRemoteFileProvider(args: {
43
+ provider: CloudProvider;
44
+ filePath: string;
45
+ mediaType: string;
46
+ filename: string | null;
47
+ assemblyaiApiKey: string | null;
48
+ geminiApiKey: string | null;
49
+ env: Env;
50
+ }): Promise<RemoteFileAttemptResult>;
51
+ export {};
@@ -0,0 +1,51 @@
1
+ import type { WhisperProgressEvent, WhisperTranscriptionResult } from "./types.js";
2
+ type Env = Record<string, string | undefined>;
3
+ type CloudArgs = {
4
+ groqApiKey: string | null;
5
+ groqError?: Error | null;
6
+ assemblyaiApiKey: string | null;
7
+ geminiApiKey: string | null;
8
+ openaiApiKey: string | null;
9
+ falApiKey: string | null;
10
+ env: Env;
11
+ };
12
+ export declare function transcribeBytesWithRemoteFallbacks({ bytes, mediaType, filename, notes, groqApiKey, groqError, assemblyaiApiKey, geminiApiKey, openaiApiKey, falApiKey, env, onProgress, transcribeOversizedBytesWithChunking, }: {
13
+ bytes: Uint8Array;
14
+ mediaType: string;
15
+ filename: string | null;
16
+ notes: string[];
17
+ onProgress?: ((event: WhisperProgressEvent) => void) | null;
18
+ transcribeOversizedBytesWithChunking: (args: {
19
+ bytes: Uint8Array;
20
+ mediaType: string;
21
+ filename: string | null;
22
+ onProgress?: ((event: WhisperProgressEvent) => void) | null;
23
+ }) => Promise<WhisperTranscriptionResult>;
24
+ } & CloudArgs): Promise<WhisperTranscriptionResult>;
25
+ export declare function transcribeFileWithRemoteFallbacks({ filePath, mediaType, filename, notes, groqApiKey, groqError, assemblyaiApiKey, geminiApiKey, openaiApiKey, falApiKey, env, totalDurationSeconds, onProgress, transcribeChunkedFile, }: {
26
+ filePath: string;
27
+ mediaType: string;
28
+ filename: string | null;
29
+ notes: string[];
30
+ totalDurationSeconds: number | null;
31
+ onProgress?: ((event: WhisperProgressEvent) => void) | null;
32
+ transcribeChunkedFile: (args: {
33
+ filePath: string;
34
+ segmentSeconds: number;
35
+ totalDurationSeconds: number | null;
36
+ onProgress?: ((event: WhisperProgressEvent) => void) | null;
37
+ }) => Promise<WhisperTranscriptionResult>;
38
+ } & CloudArgs): Promise<WhisperTranscriptionResult>;
39
+ export declare function transcribeOversizedBytesViaTempFile({ bytes, mediaType, filename, onProgress, transcribeFile, }: {
40
+ bytes: Uint8Array;
41
+ mediaType: string;
42
+ filename: string | null;
43
+ onProgress?: ((event: WhisperProgressEvent) => void) | null;
44
+ transcribeFile: (args: {
45
+ filePath: string;
46
+ mediaType: string;
47
+ filename: string | null;
48
+ onProgress?: ((event: WhisperProgressEvent) => void) | null;
49
+ }) => Promise<WhisperTranscriptionResult>;
50
+ }): Promise<WhisperTranscriptionResult>;
51
+ export {};
@@ -1,4 +1,4 @@
1
- export type TranscriptionProvider = "groq" | "openai" | "fal" | "whisper.cpp" | "onnx-parakeet" | "onnx-canary";
1
+ export type TranscriptionProvider = "groq" | "assemblyai" | "gemini" | "openai" | "fal" | "whisper.cpp" | "onnx-parakeet" | "onnx-canary";
2
2
  export type WhisperTranscriptionResult = {
3
3
  text: string | null;
4
4
  provider: TranscriptionProvider | null;
@@ -1,9 +1,10 @@
1
1
  import type { WhisperProgressEvent, WhisperTranscriptionResult } from "./types.js";
2
- export declare function isWhisperCppReady(): Promise<boolean>;
3
- export declare function resolveWhisperCppModelNameForDisplay(): Promise<string | null>;
4
- export declare function transcribeWithWhisperCppFile({ filePath, mediaType, totalDurationSeconds, onProgress, }: {
2
+ export declare function isWhisperCppReady(env?: Record<string, string | undefined>): Promise<boolean>;
3
+ export declare function resolveWhisperCppModelNameForDisplay(env?: Record<string, string | undefined>): Promise<string | null>;
4
+ export declare function transcribeWithWhisperCppFile({ filePath, mediaType, totalDurationSeconds, onProgress, env, }: {
5
5
  filePath: string;
6
6
  mediaType: string;
7
7
  totalDurationSeconds: number | null;
8
8
  onProgress?: ((event: WhisperProgressEvent) => void) | null;
9
+ env?: Record<string, string | undefined>;
9
10
  }): Promise<WhisperTranscriptionResult>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@steipete/summarize-core",
3
- "version": "0.11.1",
3
+ "version": "0.13.0",
4
4
  "description": "Summarize core library (content extraction + prompts).",
5
5
  "files": [
6
6
  "dist",
@@ -23,6 +23,10 @@
23
23
  "types": "./dist/types/content/url.d.ts",
24
24
  "import": "./dist/esm/content/url.js"
25
25
  },
26
+ "./content/local-file": {
27
+ "types": "./dist/types/content/local-file.d.ts",
28
+ "import": "./dist/esm/content/local-file.js"
29
+ },
26
30
  "./prompts": {
27
31
  "types": "./dist/types/prompts/index.d.ts",
28
32
  "import": "./dist/esm/prompts/index.js"
@@ -37,19 +41,19 @@
37
41
  }
38
42
  },
39
43
  "dependencies": {
40
- "@fal-ai/client": "^1.9.1",
44
+ "@fal-ai/client": "^1.9.5",
41
45
  "@mozilla/readability": "0.6.0",
42
46
  "cheerio": "^1.2.0",
43
- "es-toolkit": "^1.44.0",
44
- "jsdom": "28.0.0",
45
- "sanitize-html": "^2.17.0"
47
+ "es-toolkit": "^1.45.1",
48
+ "jsdom": "29.0.2",
49
+ "sanitize-html": "^2.17.2"
46
50
  },
47
51
  "devDependencies": {
48
- "@types/jsdom": "^27.0.0",
49
- "@types/node": "^25.2.3",
50
- "@types/sanitize-html": "^2.16.0",
51
- "rimraf": "^6.1.2",
52
- "typescript": "^5.9.3"
52
+ "@types/jsdom": "^28.0.1",
53
+ "@types/node": "^25.5.2",
54
+ "@types/sanitize-html": "^2.16.1",
55
+ "rimraf": "^6.1.3",
56
+ "typescript": "^6.0.2"
53
57
  },
54
58
  "engines": {
55
59
  "node": ">=22"