@steipete/summarize 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/CHANGELOG.md +52 -0
  2. package/LICENSE +21 -0
  3. package/README.md +185 -0
  4. package/dist/cli.cjs +74333 -0
  5. package/dist/cli.cjs.map +7 -0
  6. package/dist/esm/cli-main.js +80 -0
  7. package/dist/esm/cli-main.js.map +1 -0
  8. package/dist/esm/cli.js +18 -0
  9. package/dist/esm/cli.js.map +1 -0
  10. package/dist/esm/config.js +33 -0
  11. package/dist/esm/config.js.map +1 -0
  12. package/dist/esm/content/asset.js +167 -0
  13. package/dist/esm/content/asset.js.map +1 -0
  14. package/dist/esm/content/index.js +4 -0
  15. package/dist/esm/content/index.js.map +1 -0
  16. package/dist/esm/content/link-preview/client.js +20 -0
  17. package/dist/esm/content/link-preview/client.js.map +1 -0
  18. package/dist/esm/content/link-preview/content/article.js +150 -0
  19. package/dist/esm/content/link-preview/content/article.js.map +1 -0
  20. package/dist/esm/content/link-preview/content/cleaner.js +55 -0
  21. package/dist/esm/content/link-preview/content/cleaner.js.map +1 -0
  22. package/dist/esm/content/link-preview/content/fetcher.js +120 -0
  23. package/dist/esm/content/link-preview/content/fetcher.js.map +1 -0
  24. package/dist/esm/content/link-preview/content/index.js +275 -0
  25. package/dist/esm/content/link-preview/content/index.js.map +1 -0
  26. package/dist/esm/content/link-preview/content/parsers.js +77 -0
  27. package/dist/esm/content/link-preview/content/parsers.js.map +1 -0
  28. package/dist/esm/content/link-preview/content/types.js +4 -0
  29. package/dist/esm/content/link-preview/content/types.js.map +1 -0
  30. package/dist/esm/content/link-preview/content/utils.js +127 -0
  31. package/dist/esm/content/link-preview/content/utils.js.map +1 -0
  32. package/dist/esm/content/link-preview/content/youtube.js +82 -0
  33. package/dist/esm/content/link-preview/content/youtube.js.map +1 -0
  34. package/dist/esm/content/link-preview/deps.js +2 -0
  35. package/dist/esm/content/link-preview/deps.js.map +1 -0
  36. package/dist/esm/content/link-preview/fetch-with-timeout.js +35 -0
  37. package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -0
  38. package/dist/esm/content/link-preview/transcript/cache.js +73 -0
  39. package/dist/esm/content/link-preview/transcript/cache.js.map +1 -0
  40. package/dist/esm/content/link-preview/transcript/index.js +95 -0
  41. package/dist/esm/content/link-preview/transcript/index.js.map +1 -0
  42. package/dist/esm/content/link-preview/transcript/normalize.js +43 -0
  43. package/dist/esm/content/link-preview/transcript/normalize.js.map +1 -0
  44. package/dist/esm/content/link-preview/transcript/providers/generic.js +11 -0
  45. package/dist/esm/content/link-preview/transcript/providers/generic.js.map +1 -0
  46. package/dist/esm/content/link-preview/transcript/providers/podcast.js +12 -0
  47. package/dist/esm/content/link-preview/transcript/providers/podcast.js.map +1 -0
  48. package/dist/esm/content/link-preview/transcript/providers/twitter.js +12 -0
  49. package/dist/esm/content/link-preview/transcript/providers/twitter.js.map +1 -0
  50. package/dist/esm/content/link-preview/transcript/providers/youtube/api.js +257 -0
  51. package/dist/esm/content/link-preview/transcript/providers/youtube/api.js.map +1 -0
  52. package/dist/esm/content/link-preview/transcript/providers/youtube/apify.js +55 -0
  53. package/dist/esm/content/link-preview/transcript/providers/youtube/apify.js.map +1 -0
  54. package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js +409 -0
  55. package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js.map +1 -0
  56. package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js +114 -0
  57. package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js.map +1 -0
  58. package/dist/esm/content/link-preview/transcript/providers/youtube.js +74 -0
  59. package/dist/esm/content/link-preview/transcript/providers/youtube.js.map +1 -0
  60. package/dist/esm/content/link-preview/transcript/types.js +2 -0
  61. package/dist/esm/content/link-preview/transcript/types.js.map +1 -0
  62. package/dist/esm/content/link-preview/transcript/utils.js +193 -0
  63. package/dist/esm/content/link-preview/transcript/utils.js.map +1 -0
  64. package/dist/esm/content/link-preview/types.js +2 -0
  65. package/dist/esm/content/link-preview/types.js.map +1 -0
  66. package/dist/esm/costs.js +57 -0
  67. package/dist/esm/costs.js.map +1 -0
  68. package/dist/esm/firecrawl.js +54 -0
  69. package/dist/esm/firecrawl.js.map +1 -0
  70. package/dist/esm/flags.js +97 -0
  71. package/dist/esm/flags.js.map +1 -0
  72. package/dist/esm/index.js +4 -0
  73. package/dist/esm/index.js.map +1 -0
  74. package/dist/esm/llm/generate-text.js +296 -0
  75. package/dist/esm/llm/generate-text.js.map +1 -0
  76. package/dist/esm/llm/google-models.js +112 -0
  77. package/dist/esm/llm/google-models.js.map +1 -0
  78. package/dist/esm/llm/html-to-markdown.js +44 -0
  79. package/dist/esm/llm/html-to-markdown.js.map +1 -0
  80. package/dist/esm/llm/model-id.js +45 -0
  81. package/dist/esm/llm/model-id.js.map +1 -0
  82. package/dist/esm/pricing/litellm.js +25 -0
  83. package/dist/esm/pricing/litellm.js.map +1 -0
  84. package/dist/esm/prompts/file.js +14 -0
  85. package/dist/esm/prompts/file.js.map +1 -0
  86. package/dist/esm/prompts/index.js +3 -0
  87. package/dist/esm/prompts/index.js.map +1 -0
  88. package/dist/esm/prompts/link-summary.js +105 -0
  89. package/dist/esm/prompts/link-summary.js.map +1 -0
  90. package/dist/esm/run.js +1674 -0
  91. package/dist/esm/run.js.map +1 -0
  92. package/dist/esm/shared/contracts.js +2 -0
  93. package/dist/esm/shared/contracts.js.map +1 -0
  94. package/dist/esm/summarizeHome.js +20 -0
  95. package/dist/esm/summarizeHome.js.map +1 -0
  96. package/dist/esm/tty/live-markdown.js +52 -0
  97. package/dist/esm/tty/live-markdown.js.map +1 -0
  98. package/dist/esm/tty/osc-progress.js +8 -0
  99. package/dist/esm/tty/osc-progress.js.map +1 -0
  100. package/dist/esm/tty/spinner.js +33 -0
  101. package/dist/esm/tty/spinner.js.map +1 -0
  102. package/dist/esm/version.js +44 -0
  103. package/dist/esm/version.js.map +1 -0
  104. package/dist/types/cli-main.d.ts +11 -0
  105. package/dist/types/cli.d.ts +1 -0
  106. package/dist/types/config.d.ts +15 -0
  107. package/dist/types/content/asset.d.ts +44 -0
  108. package/dist/types/content/index.d.ts +4 -0
  109. package/dist/types/content/link-preview/client.d.ts +14 -0
  110. package/dist/types/content/link-preview/content/article.d.ts +4 -0
  111. package/dist/types/content/link-preview/content/cleaner.d.ts +12 -0
  112. package/dist/types/content/link-preview/content/fetcher.d.ts +16 -0
  113. package/dist/types/content/link-preview/content/index.d.ts +4 -0
  114. package/dist/types/content/link-preview/content/parsers.d.ts +7 -0
  115. package/dist/types/content/link-preview/content/types.d.ts +44 -0
  116. package/dist/types/content/link-preview/content/utils.d.ts +16 -0
  117. package/dist/types/content/link-preview/content/youtube.d.ts +1 -0
  118. package/dist/types/content/link-preview/deps.d.ts +70 -0
  119. package/dist/types/content/link-preview/fetch-with-timeout.d.ts +4 -0
  120. package/dist/types/content/link-preview/transcript/cache.d.ts +29 -0
  121. package/dist/types/content/link-preview/transcript/index.d.ts +9 -0
  122. package/dist/types/content/link-preview/transcript/normalize.d.ts +3 -0
  123. package/dist/types/content/link-preview/transcript/providers/generic.d.ts +3 -0
  124. package/dist/types/content/link-preview/transcript/providers/podcast.d.ts +3 -0
  125. package/dist/types/content/link-preview/transcript/providers/twitter.d.ts +3 -0
  126. package/dist/types/content/link-preview/transcript/providers/youtube/api.d.ts +26 -0
  127. package/dist/types/content/link-preview/transcript/providers/youtube/apify.d.ts +1 -0
  128. package/dist/types/content/link-preview/transcript/providers/youtube/captions.d.ts +7 -0
  129. package/dist/types/content/link-preview/transcript/providers/youtube/ytdlp.d.ts +3 -0
  130. package/dist/types/content/link-preview/transcript/providers/youtube.d.ts +3 -0
  131. package/dist/types/content/link-preview/transcript/types.d.ts +23 -0
  132. package/dist/types/content/link-preview/transcript/utils.d.ts +7 -0
  133. package/dist/types/content/link-preview/types.d.ts +36 -0
  134. package/dist/types/costs.d.ts +31 -0
  135. package/dist/types/firecrawl.d.ts +5 -0
  136. package/dist/types/flags.d.ts +23 -0
  137. package/dist/types/index.d.ts +4 -0
  138. package/dist/types/llm/generate-text.d.ts +43 -0
  139. package/dist/types/llm/google-models.d.ts +10 -0
  140. package/dist/types/llm/html-to-markdown.d.ts +15 -0
  141. package/dist/types/llm/model-id.d.ts +14 -0
  142. package/dist/types/pricing/litellm.d.ts +13 -0
  143. package/dist/types/prompts/file.d.ts +6 -0
  144. package/dist/types/prompts/index.d.ts +3 -0
  145. package/dist/types/prompts/link-summary.d.ts +27 -0
  146. package/dist/types/run.d.ts +8 -0
  147. package/dist/types/shared/contracts.d.ts +2 -0
  148. package/dist/types/summarizeHome.d.ts +6 -0
  149. package/dist/types/tty/live-markdown.d.ts +10 -0
  150. package/dist/types/tty/osc-progress.d.ts +3 -0
  151. package/dist/types/tty/spinner.d.ts +10 -0
  152. package/dist/types/version.d.ts +2 -0
  153. package/docs/README.md +11 -0
  154. package/docs/config.md +28 -0
  155. package/docs/extract-only.md +13 -0
  156. package/docs/firecrawl.md +17 -0
  157. package/docs/llm.md +33 -0
  158. package/docs/openai.md +18 -0
  159. package/docs/site/.nojekyll +1 -0
  160. package/docs/site/404.html +37 -0
  161. package/docs/site/assets/site.css +577 -0
  162. package/docs/site/assets/site.js +69 -0
  163. package/docs/site/docs/config.html +73 -0
  164. package/docs/site/docs/extract-only.html +79 -0
  165. package/docs/site/docs/firecrawl.html +72 -0
  166. package/docs/site/docs/index.html +89 -0
  167. package/docs/site/docs/llm.html +70 -0
  168. package/docs/site/docs/openai.html +66 -0
  169. package/docs/site/docs/website.html +70 -0
  170. package/docs/site/docs/youtube.html +62 -0
  171. package/docs/site/index.html +125 -0
  172. package/docs/website.md +27 -0
  173. package/docs/youtube.md +32 -0
  174. package/package.json +76 -0
@@ -0,0 +1,29 @@
1
+ import type { TranscriptCache } from '../deps.js';
2
+ import type { CacheMode, TranscriptDiagnostics, TranscriptResolution, TranscriptSource } from '../types.js';
3
+ export declare const DEFAULT_TTL_MS: number;
4
+ export declare const NEGATIVE_TTL_MS: number;
5
+ type CacheDiagnostics = Pick<TranscriptDiagnostics, 'cacheStatus' | 'notes' | 'provider' | 'textProvided' | 'cacheMode' | 'attemptedProviders'>;
6
+ export interface CacheReadArguments {
7
+ url: string;
8
+ cacheMode: CacheMode;
9
+ transcriptCache: TranscriptCache | null;
10
+ }
11
+ export interface TranscriptCacheLookup {
12
+ cached: Awaited<ReturnType<TranscriptCache['get']>> | null;
13
+ resolution: TranscriptResolution | null;
14
+ diagnostics: CacheDiagnostics;
15
+ }
16
+ export declare const readTranscriptCache: ({ url, cacheMode, transcriptCache, }: CacheReadArguments) => Promise<TranscriptCacheLookup>;
17
+ export declare const mapCachedSource: (source: string | null) => TranscriptSource | null;
18
+ export declare const writeTranscriptCache: ({ url, service, resourceKey, result, transcriptCache, }: {
19
+ url: string;
20
+ service: string;
21
+ resourceKey: string | null;
22
+ result: {
23
+ text: string | null;
24
+ source: TranscriptSource | null;
25
+ metadata?: Record<string, unknown> | undefined;
26
+ };
27
+ transcriptCache: TranscriptCache | null;
28
+ }) => Promise<void>;
29
+ export {};
@@ -0,0 +1,9 @@
1
+ import type { LinkPreviewDeps } from '../deps.js';
2
+ import type { CacheMode, TranscriptResolution } from '../types.js';
3
+ import type { ProviderFetchOptions } from './types.js';
4
+ interface ResolveTranscriptOptions {
5
+ youtubeTranscriptMode?: ProviderFetchOptions['youtubeTranscriptMode'];
6
+ cacheMode?: CacheMode;
7
+ }
8
+ export declare const resolveTranscriptForLink: (url: string, html: string | null, deps: LinkPreviewDeps, { youtubeTranscriptMode, cacheMode: providedCacheMode }?: ResolveTranscriptOptions) => Promise<TranscriptResolution>;
9
+ export {};
@@ -0,0 +1,3 @@
1
+ export declare const normalizeTranscriptText: (input: string) => string;
2
+ export declare const normalizeTranscriptLines: (lines: readonly string[]) => string | null;
3
+ export declare const normalizeApifyTranscript: (raw: unknown) => string | null;
@@ -0,0 +1,3 @@
1
+ import type { ProviderContext, ProviderFetchOptions, ProviderResult } from '../types.js';
2
+ export declare const canHandle: () => boolean;
3
+ export declare const fetchTranscript: (_context: ProviderContext, _options: ProviderFetchOptions) => Promise<ProviderResult>;
@@ -0,0 +1,3 @@
1
+ import type { ProviderContext, ProviderFetchOptions, ProviderResult } from '../types.js';
2
+ export declare const canHandle: ({ url }: ProviderContext) => boolean;
3
+ export declare const fetchTranscript: (_context: ProviderContext, _options: ProviderFetchOptions) => Promise<ProviderResult>;
@@ -0,0 +1,3 @@
1
+ import type { ProviderContext, ProviderFetchOptions, ProviderResult } from '../types.js';
2
+ export declare const canHandle: ({ url }: ProviderContext) => boolean;
3
+ export declare const fetchTranscript: (_context: ProviderContext, _options: ProviderFetchOptions) => Promise<ProviderResult>;
@@ -0,0 +1,26 @@
1
+ export interface YoutubeTranscriptConfig {
2
+ apiKey: string;
3
+ context: Record<string, unknown>;
4
+ params: string;
5
+ visitorData?: string | null;
6
+ clientName?: string | null;
7
+ clientVersion?: string | null;
8
+ pageCl?: number | null;
9
+ pageLabel?: string | null;
10
+ }
11
+ export declare const extractYoutubeiTranscriptConfig: (html: string) => YoutubeTranscriptConfig | null;
12
+ export declare const fetchTranscriptFromTranscriptEndpoint: (fetchImpl: typeof fetch, { config, originalUrl, }: {
13
+ config: YoutubeTranscriptConfig;
14
+ originalUrl: string;
15
+ }) => Promise<string | null>;
16
+ export declare const extractTranscriptFromTranscriptEndpoint: (data: unknown) => string | null;
17
+ export declare const extractYoutubeiBootstrap: (html: string) => {
18
+ apiKey: string | null;
19
+ context: Record<string, unknown>;
20
+ clientVersion: string | null;
21
+ clientName: string | null;
22
+ visitorData: string | null;
23
+ pageCl: number | null;
24
+ pageLabel: string | null;
25
+ xsrfToken: string | null;
26
+ } | null;
@@ -0,0 +1 @@
1
+ export declare const fetchTranscriptWithApify: (fetchImpl: typeof fetch, apifyApiToken: string | null, url: string) => Promise<string | null>;
@@ -0,0 +1,7 @@
1
+ interface YoutubeTranscriptContext {
2
+ html: string;
3
+ originalUrl: string;
4
+ videoId: string;
5
+ }
6
+ export declare const fetchTranscriptFromCaptionTracks: (fetchImpl: typeof fetch, { html, originalUrl, videoId }: YoutubeTranscriptContext) => Promise<string | null>;
7
+ export {};
@@ -0,0 +1,3 @@
1
+ export declare function fetchTranscriptWithYtDlp(fetchImpl: typeof fetch, url: string, { timeoutMs }?: {
2
+ timeoutMs?: number;
3
+ }): Promise<string | null>;
@@ -0,0 +1,3 @@
1
+ import type { ProviderContext, ProviderFetchOptions, ProviderResult } from '../types.js';
2
+ export declare const canHandle: ({ url }: ProviderContext) => boolean;
3
+ export declare const fetchTranscript: (context: ProviderContext, options: ProviderFetchOptions) => Promise<ProviderResult>;
@@ -0,0 +1,23 @@
1
+ import type { YoutubeTranscriptMode } from '../content/types.js';
2
+ import type { TranscriptResolution, TranscriptSource } from '../types.js';
3
+ export type TranscriptService = 'youtube' | 'podcast' | 'generic';
4
+ export interface ProviderContext {
5
+ url: string;
6
+ html: string | null;
7
+ resourceKey: string | null;
8
+ }
9
+ export interface ProviderFetchOptions {
10
+ fetch: typeof fetch;
11
+ apifyApiToken: string | null;
12
+ youtubeTranscriptMode: YoutubeTranscriptMode;
13
+ }
14
+ export interface ProviderResult extends TranscriptResolution {
15
+ metadata?: Record<string, unknown>;
16
+ attemptedProviders: TranscriptSource[];
17
+ }
18
+ export interface ProviderModule {
19
+ id: TranscriptService;
20
+ canHandle(context: ProviderContext): boolean;
21
+ fetchTranscript(context: ProviderContext, options: ProviderFetchOptions): Promise<ProviderResult>;
22
+ }
23
+ export type { TranscriptSource } from '../types.js';
@@ -0,0 +1,7 @@
1
+ export declare function isRecord(value: unknown): value is Record<string, unknown>;
2
+ export declare const isYouTubeUrl: (rawUrl: string) => boolean;
3
+ export declare function isYouTubeVideoUrl(rawUrl: string): boolean;
4
+ export declare function extractYouTubeVideoId(rawUrl: string): string | null;
5
+ export declare function sanitizeYoutubeJsonResponse(input: string): string;
6
+ export declare function decodeHtmlEntities(input: string): string;
7
+ export declare function extractYoutubeBootstrapConfig(html: string): Record<string, unknown> | null;
@@ -0,0 +1,36 @@
1
+ export type TranscriptSource = 'youtubei' | 'captionTracks' | 'yt-dlp' | 'apify' | 'html' | 'unavailable' | 'unknown';
2
+ export declare const CACHE_MODES: readonly ["default", "bypass"];
3
+ export type CacheMode = (typeof CACHE_MODES)[number];
4
+ export type CacheStatus = 'hit' | 'miss' | 'expired' | 'bypassed' | 'fallback' | 'unknown';
5
+ export interface TranscriptDiagnostics {
6
+ cacheMode: CacheMode;
7
+ cacheStatus: CacheStatus;
8
+ textProvided: boolean;
9
+ provider: TranscriptSource | null;
10
+ attemptedProviders: TranscriptSource[];
11
+ notes?: string | null;
12
+ }
13
+ export interface FirecrawlDiagnostics {
14
+ attempted: boolean;
15
+ used: boolean;
16
+ cacheMode: CacheMode;
17
+ cacheStatus: CacheStatus;
18
+ notes?: string | null;
19
+ }
20
+ export interface MarkdownDiagnostics {
21
+ requested: boolean;
22
+ used: boolean;
23
+ provider: 'firecrawl' | 'llm' | null;
24
+ notes?: string | null;
25
+ }
26
+ export interface ContentFetchDiagnostics {
27
+ strategy: 'firecrawl' | 'html';
28
+ firecrawl: FirecrawlDiagnostics;
29
+ markdown: MarkdownDiagnostics;
30
+ transcript: TranscriptDiagnostics;
31
+ }
32
+ export interface TranscriptResolution {
33
+ text: string | null;
34
+ source: TranscriptSource | null;
35
+ diagnostics?: TranscriptDiagnostics;
36
+ }
@@ -0,0 +1,31 @@
1
+ import type { LlmTokenUsage } from './llm/generate-text.js';
2
+ export type LlmProvider = 'xai' | 'openai' | 'google' | 'anthropic';
3
+ export type LlmCall = {
4
+ provider: LlmProvider;
5
+ model: string;
6
+ usage: LlmTokenUsage | null;
7
+ purpose: 'summary' | 'chunk-notes' | 'markdown';
8
+ };
9
+ export type RunMetricsReport = {
10
+ llm: Array<{
11
+ provider: LlmProvider;
12
+ model: string;
13
+ calls: number;
14
+ promptTokens: number | null;
15
+ completionTokens: number | null;
16
+ totalTokens: number | null;
17
+ }>;
18
+ services: {
19
+ firecrawl: {
20
+ requests: number;
21
+ };
22
+ apify: {
23
+ requests: number;
24
+ };
25
+ };
26
+ };
27
+ export declare function buildRunMetricsReport({ llmCalls, firecrawlRequests, apifyRequests, }: {
28
+ llmCalls: LlmCall[];
29
+ firecrawlRequests: number;
30
+ apifyRequests: number;
31
+ }): RunMetricsReport;
@@ -0,0 +1,5 @@
1
+ import type { ScrapeWithFirecrawl } from './content/link-preview/deps.js';
2
+ export declare function createFirecrawlScraper({ apiKey, fetchImpl, }: {
3
+ apiKey: string;
4
+ fetchImpl: typeof fetch;
5
+ }): ScrapeWithFirecrawl;
@@ -0,0 +1,23 @@
1
+ import type { SummaryLength } from './shared/contracts.js';
2
+ export type YoutubeMode = 'auto' | 'web' | 'apify';
3
+ export type FirecrawlMode = 'off' | 'auto' | 'always';
4
+ export type MarkdownMode = 'off' | 'auto' | 'llm';
5
+ export type StreamMode = 'auto' | 'on' | 'off';
6
+ export type RenderMode = 'auto' | 'md' | 'md-live' | 'plain';
7
+ export type MetricsMode = 'off' | 'on' | 'detailed';
8
+ export type LengthArg = {
9
+ kind: 'preset';
10
+ preset: SummaryLength;
11
+ } | {
12
+ kind: 'chars';
13
+ maxCharacters: number;
14
+ };
15
+ export declare function parseYoutubeMode(raw: string): YoutubeMode;
16
+ export declare function parseFirecrawlMode(raw: string): FirecrawlMode;
17
+ export declare function parseMarkdownMode(raw: string): MarkdownMode;
18
+ export declare function parseStreamMode(raw: string): StreamMode;
19
+ export declare function parseRenderMode(raw: string): RenderMode;
20
+ export declare function parseMetricsMode(raw: string): MetricsMode;
21
+ export declare function parseDurationMs(raw: string): number;
22
+ export declare function parseLengthArg(raw: string): LengthArg;
23
+ export declare function parseMaxOutputTokensArg(raw: string | undefined): number | null;
@@ -0,0 +1,4 @@
1
+ export * from './content/index.js';
2
+ export * from './prompts/index.js';
3
+ export type { SummaryLength } from './shared/contracts.js';
4
+ export { SUMMARY_LENGTHS } from './shared/contracts.js';
@@ -0,0 +1,43 @@
1
+ import type { ModelMessage } from 'ai';
2
+ export type LlmApiKeys = {
3
+ xaiApiKey: string | null;
4
+ openaiApiKey: string | null;
5
+ googleApiKey: string | null;
6
+ anthropicApiKey: string | null;
7
+ };
8
+ export type LlmTokenUsage = {
9
+ promptTokens: number | null;
10
+ completionTokens: number | null;
11
+ totalTokens: number | null;
12
+ };
13
+ export declare function generateTextWithModelId({ modelId, apiKeys, system, prompt, temperature, maxOutputTokens, timeoutMs, fetchImpl, }: {
14
+ modelId: string;
15
+ apiKeys: LlmApiKeys;
16
+ system?: string;
17
+ prompt: string | ModelMessage[];
18
+ temperature?: number;
19
+ maxOutputTokens?: number;
20
+ timeoutMs: number;
21
+ fetchImpl: typeof fetch;
22
+ }): Promise<{
23
+ text: string;
24
+ canonicalModelId: string;
25
+ provider: 'xai' | 'openai' | 'google' | 'anthropic';
26
+ usage: LlmTokenUsage | null;
27
+ }>;
28
+ export declare function streamTextWithModelId({ modelId, apiKeys, system, prompt, temperature, maxOutputTokens, timeoutMs, fetchImpl, }: {
29
+ modelId: string;
30
+ apiKeys: LlmApiKeys;
31
+ system?: string;
32
+ prompt: string | ModelMessage[];
33
+ temperature?: number;
34
+ maxOutputTokens?: number;
35
+ timeoutMs: number;
36
+ fetchImpl: typeof fetch;
37
+ }): Promise<{
38
+ textStream: AsyncIterable<string>;
39
+ canonicalModelId: string;
40
+ provider: 'xai' | 'openai' | 'google' | 'anthropic';
41
+ usage: Promise<LlmTokenUsage | null>;
42
+ lastError: () => unknown;
43
+ }>;
@@ -0,0 +1,10 @@
1
+ export declare function resolveGoogleModelForUsage({ requestedModelId, apiKey, fetchImpl, timeoutMs, }: {
2
+ requestedModelId: string;
3
+ apiKey: string;
4
+ fetchImpl: typeof fetch;
5
+ timeoutMs: number;
6
+ }): Promise<{
7
+ resolvedModelId: string;
8
+ supportedMethods: string[];
9
+ note: string | null;
10
+ }>;
@@ -0,0 +1,15 @@
1
+ import type { ConvertHtmlToMarkdown } from '../content/link-preview/deps.js';
2
+ import type { LlmTokenUsage } from './generate-text.js';
3
+ export declare function createHtmlToMarkdownConverter({ modelId, xaiApiKey, googleApiKey, openaiApiKey, anthropicApiKey, fetchImpl, onUsage, }: {
4
+ modelId: string;
5
+ xaiApiKey: string | null;
6
+ googleApiKey: string | null;
7
+ openaiApiKey: string | null;
8
+ fetchImpl: typeof fetch;
9
+ anthropicApiKey: string | null;
10
+ onUsage?: (usage: {
11
+ model: string;
12
+ provider: 'xai' | 'openai' | 'google' | 'anthropic';
13
+ usage: LlmTokenUsage | null;
14
+ }) => void;
15
+ }): ConvertHtmlToMarkdown;
@@ -0,0 +1,14 @@
1
+ export type LlmProvider = 'xai' | 'openai' | 'google' | 'anthropic';
2
+ export type ParsedModelId = {
3
+ provider: LlmProvider;
4
+ /**
5
+ * Provider-native model id (no prefix), e.g. `grok-4-fast-non-reasoning`.
6
+ */
7
+ model: string;
8
+ /**
9
+ * Canonical gateway-style id, e.g. `xai/grok-4-fast-non-reasoning`.
10
+ */
11
+ canonical: string;
12
+ };
13
+ export declare function normalizeGatewayStyleModelId(raw: string): string;
14
+ export declare function parseGatewayStyleModelId(raw: string): ParsedModelId;
@@ -0,0 +1,13 @@
1
+ import { type LiteLlmCatalog, type LiteLlmLoadResult } from 'tokentally/node';
2
+ export type { LiteLlmCatalog, LiteLlmLoadResult };
3
+ export declare function loadLiteLlmCatalog({ env, fetchImpl, nowMs, }: {
4
+ env: Record<string, string | undefined>;
5
+ fetchImpl: typeof fetch;
6
+ nowMs?: number;
7
+ }): Promise<LiteLlmLoadResult>;
8
+ export type LlmPerTokenPricing = {
9
+ inputUsdPerToken: number;
10
+ outputUsdPerToken: number;
11
+ };
12
+ export declare function resolveLiteLlmPricingForModelId(catalog: LiteLlmCatalog, modelId: string): LlmPerTokenPricing | null;
13
+ export declare function resolveLiteLlmMaxOutputTokensForModelId(catalog: LiteLlmCatalog, modelId: string): number | null;
@@ -0,0 +1,6 @@
1
+ import type { SummaryLengthTarget } from './link-summary.js';
2
+ export declare function buildFileSummaryPrompt({ filename, mediaType, summaryLength, }: {
3
+ filename: string | null;
4
+ mediaType: string | null;
5
+ summaryLength: SummaryLengthTarget;
6
+ }): string;
@@ -0,0 +1,3 @@
1
+ export type { SummaryLength } from '../shared/contracts.js';
2
+ export { buildFileSummaryPrompt } from './file.js';
3
+ export { buildLinkSummaryPrompt, estimateMaxCompletionTokensForCharacters, pickSummaryLengthForCharacters, type ShareContextEntry, SUMMARY_LENGTH_TO_TOKENS, type SummaryLengthTarget, } from './link-summary.js';
@@ -0,0 +1,27 @@
1
+ import type { SummaryLength } from '../shared/contracts.js';
2
+ export declare const SUMMARY_LENGTH_TO_TOKENS: Record<SummaryLength, number>;
3
+ export type SummaryLengthTarget = SummaryLength | {
4
+ maxCharacters: number;
5
+ };
6
+ export declare function pickSummaryLengthForCharacters(maxCharacters: number): SummaryLength;
7
+ export declare function estimateMaxCompletionTokensForCharacters(maxCharacters: number): number;
8
+ export type ShareContextEntry = {
9
+ author: string;
10
+ handle?: string | null;
11
+ text: string;
12
+ likeCount?: number | null;
13
+ reshareCount?: number | null;
14
+ replyCount?: number | null;
15
+ timestamp?: string | null;
16
+ };
17
+ export declare function buildLinkSummaryPrompt({ url, title, siteName, description, content, truncated, hasTranscript, summaryLength, shares, }: {
18
+ url: string;
19
+ title: string | null;
20
+ siteName: string | null;
21
+ description: string | null;
22
+ content: string;
23
+ truncated: boolean;
24
+ hasTranscript: boolean;
25
+ summaryLength: SummaryLengthTarget;
26
+ shares: ShareContextEntry[];
27
+ }): string;
@@ -0,0 +1,8 @@
1
+ type RunEnv = {
2
+ env: Record<string, string | undefined>;
3
+ fetch: typeof fetch;
4
+ stdout: NodeJS.WritableStream;
5
+ stderr: NodeJS.WritableStream;
6
+ };
7
+ export declare function runCli(argv: string[], { env, fetch, stdout, stderr }: RunEnv): Promise<void>;
8
+ export {};
@@ -0,0 +1,2 @@
1
+ export declare const SUMMARY_LENGTHS: readonly ["short", "medium", "long", "xl", "xxl"];
2
+ export type SummaryLength = (typeof SUMMARY_LENGTHS)[number];
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Test-only hook: avoid mutating process.env (shared across Vitest worker threads).
3
+ * This override is scoped to the current Node worker.
4
+ */
5
+ export declare function setSummarizeHomeDirOverrideForTest(dir: string | null): void;
6
+ export declare function getSummarizeHomeDir(env: Record<string, string | undefined>): string;
@@ -0,0 +1,10 @@
1
+ type LiveMarkdownRenderer = {
2
+ render: (markdown: string) => void;
3
+ finish: () => void;
4
+ };
5
+ export declare function createLiveMarkdownRenderer({ stdout, width, color, }: {
6
+ stdout: NodeJS.WritableStream;
7
+ width: number;
8
+ color: boolean;
9
+ }): LiveMarkdownRenderer;
10
+ export {};
@@ -0,0 +1,3 @@
1
+ export type { OscProgressOptions } from 'osc-progress';
2
+ export declare function startOscProgress(options: import('osc-progress').OscProgressOptions): () => void;
3
+ export declare function supportsOscProgress(env: Record<string, string | undefined>, isTty: boolean): boolean;
@@ -0,0 +1,10 @@
1
+ export declare function startSpinner({ text, enabled, stream, }: {
2
+ text: string;
3
+ enabled: boolean;
4
+ stream: NodeJS.WritableStream;
5
+ }): {
6
+ stop: () => void;
7
+ clear: () => void;
8
+ stopAndClear: () => void;
9
+ setText: (next: string) => void;
10
+ };
@@ -0,0 +1,2 @@
1
+ export declare const FALLBACK_VERSION = "0.1.0";
2
+ export declare function resolvePackageVersion(importMetaUrl?: string): string;
package/docs/README.md ADDED
@@ -0,0 +1,11 @@
1
+ # Docs
2
+
3
+ - `docs/website.md` — normal websites (HTML extraction + Firecrawl fallback)
4
+ - `docs/youtube.md` — YouTube transcript extraction (youtubei / captionTracks / Apify)
5
+ - `docs/firecrawl.md` — Firecrawl mode + API key
6
+ - `docs/llm.md` — LLM summarization + model config (Gateway/OpenAI)
7
+ - `docs/extract-only.md` — extraction mode (no LLM call)
8
+
9
+ ## Website
10
+
11
+ - Static site source: `docs/site/` (GitHub Pages via Actions)
package/docs/config.md ADDED
@@ -0,0 +1,28 @@
1
+ # Config
2
+
3
+ `summarize` supports an optional JSON config file for defaults.
4
+
5
+ ## Location
6
+
7
+ Default path:
8
+
9
+ - `~/.summarize/config.json`
10
+
11
+ ## Precedence
12
+
13
+ For `model`:
14
+
15
+ 1. CLI flag `--model`
16
+ 2. Env `SUMMARIZE_MODEL`
17
+ 3. Config file `model`
18
+ 4. Built-in default (`google/gemini-3-flash-preview`)
19
+
20
+ ## Format
21
+
22
+ `~/.summarize/config.json`:
23
+
24
+ ```json
25
+ {
26
+ "model": "google/gemini-3-flash-preview"
27
+ }
28
+ ```
@@ -0,0 +1,13 @@
1
+ # Extract-only mode
2
+
3
+ `--extract-only` prints the extracted content and exits.
4
+
5
+ ## Notes
6
+
7
+ - No summarization LLM call happens in this mode.
8
+ - `--markdown llm` / `--markdown auto` may still call the configured LLM for HTML → Markdown conversion.
9
+ - `--length` is intended for summarization guidance; extraction prints full content.
10
+ - For non-YouTube URLs, the CLI prefers Firecrawl Markdown by default when `FIRECRAWL_API_KEY` is configured.
11
+ - Force plain HTML extraction with `--firecrawl off`.
12
+ - For non-YouTube URLs, `--markdown auto` can convert HTML → Markdown via an LLM when configured.
13
+ - Force it with `--markdown llm`.
@@ -0,0 +1,17 @@
1
+ # Firecrawl mode
2
+
3
+ Firecrawl is a fallback for sites that block direct HTML fetching or don’t render meaningful content without JS.
4
+
5
+ ## `--firecrawl off|auto|always`
6
+
7
+ - `off`: never use Firecrawl.
8
+ - `auto` (default): use Firecrawl only when HTML extraction looks blocked/thin.
9
+ - `always`: try Firecrawl first (falls back to HTML if Firecrawl is unavailable/empty).
10
+
11
+ ## Extract-only default
12
+
13
+ When `--extract-only` is used for non-YouTube URLs and `FIRECRAWL_API_KEY` is configured, the CLI defaults to `--firecrawl always` to return Markdown.
14
+
15
+ ## API key
16
+
17
+ - `FIRECRAWL_API_KEY` (required for Firecrawl requests)
package/docs/llm.md ADDED
@@ -0,0 +1,33 @@
1
+ # LLM / summarization mode
2
+
3
+ By default `summarize` will call an LLM using **direct provider API keys**.
4
+
5
+ ## Defaults
6
+
7
+ - Default model: `google/gemini-3-flash-preview`
8
+ - Override with `SUMMARIZE_MODEL`, config file (`model`), or `--model`.
9
+
10
+ ## Env
11
+
12
+ - `XAI_API_KEY` (required for `xai/...` models)
13
+ - `OPENAI_API_KEY` (required for `openai/...` models)
14
+ - `OPENAI_BASE_URL` (optional; OpenAI-compatible API endpoint, e.g. OpenRouter)
15
+ - `OPENROUTER_API_KEY` (optional; used when `OPENAI_BASE_URL` points to OpenRouter)
16
+ - `GEMINI_API_KEY` (required for `google/...` models; also accepts `GOOGLE_GENERATIVE_AI_API_KEY` / `GOOGLE_API_KEY`)
17
+ - `ANTHROPIC_API_KEY` (required for `anthropic/...` models)
18
+ - `SUMMARIZE_MODEL` (optional; overrides default model selection)
19
+
20
+ ## Flags
21
+
22
+ - `--model <model>`
23
+ - Examples:
24
+ - `google/gemini-3-flash-preview`
25
+ - `openai/gpt-5.2`
26
+ - `xai/grok-4-fast-non-reasoning`
27
+ - `google/gemini-2.0-flash`
28
+ - `anthropic/claude-sonnet-4-5`
29
+ - `--length short|medium|long|xl|xxl|<chars>`
30
+ - This is *soft guidance* to the model (no hard truncation).
31
+ - `--max-output-tokens <count>`
32
+ - Hard cap for output tokens (optional).
33
+ - `--json` (includes prompt + summary in one JSON object)
package/docs/openai.md ADDED
@@ -0,0 +1,18 @@
1
+ # OpenAI models
2
+
3
+ Use OpenAI directly by choosing an `openai/...` model id.
4
+
5
+ For the full model/provider matrix, see `docs/llm.md`.
6
+
7
+ ## Env
8
+
9
+ - `OPENAI_API_KEY` (required for `openai/...` models)
10
+
11
+ ## Flags
12
+
13
+ - `--model openai/<model>`
14
+ - `--length short|medium|long|xl|xxl|<chars>`
15
+ - This is *soft guidance* to the model (no hard truncation).
16
+ - `--max-output-tokens <count>`
17
+ - Hard cap for output tokens (optional).
18
+ - `--json` (includes prompt + summary in one JSON object)
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,37 @@
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width,initial-scale=1" />
6
+ <meta name="color-scheme" content="dark light" />
7
+ <title>Not found — summarize</title>
8
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
10
+ <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500;600;700&display=swap" rel="stylesheet" />
11
+ <link rel="stylesheet" href="./assets/site.css" />
12
+ </head>
13
+ <body>
14
+ <main class="shell">
15
+ <header class="top">
16
+ <a class="brand" href="./index.html" aria-label="summarize home">
17
+ <span class="brand__mark" aria-hidden="true">s</span>
18
+ <span class="brand__word">summarize</span>
19
+ </a>
20
+ </header>
21
+
22
+ <section class="hero hero--compact">
23
+ <div class="hero__card">
24
+ <p class="kicker">404</p>
25
+ <h1 class="title">Page not found.</h1>
26
+ <p class="lede">Try the docs index or head back home.</p>
27
+ <div class="ctaRow">
28
+ <a class="btn btn--primary" href="./index.html">Home</a>
29
+ <a class="btn" href="./docs/index.html">Docs</a>
30
+ </div>
31
+ </div>
32
+ </section>
33
+ </main>
34
+
35
+ <script type="module" src="./assets/site.js"></script>
36
+ </body>
37
+ </html>