@steipete/summarize 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/CHANGELOG.md +52 -0
  2. package/LICENSE +21 -0
  3. package/README.md +185 -0
  4. package/dist/cli.cjs +74333 -0
  5. package/dist/cli.cjs.map +7 -0
  6. package/dist/esm/cli-main.js +80 -0
  7. package/dist/esm/cli-main.js.map +1 -0
  8. package/dist/esm/cli.js +18 -0
  9. package/dist/esm/cli.js.map +1 -0
  10. package/dist/esm/config.js +33 -0
  11. package/dist/esm/config.js.map +1 -0
  12. package/dist/esm/content/asset.js +167 -0
  13. package/dist/esm/content/asset.js.map +1 -0
  14. package/dist/esm/content/index.js +4 -0
  15. package/dist/esm/content/index.js.map +1 -0
  16. package/dist/esm/content/link-preview/client.js +20 -0
  17. package/dist/esm/content/link-preview/client.js.map +1 -0
  18. package/dist/esm/content/link-preview/content/article.js +150 -0
  19. package/dist/esm/content/link-preview/content/article.js.map +1 -0
  20. package/dist/esm/content/link-preview/content/cleaner.js +55 -0
  21. package/dist/esm/content/link-preview/content/cleaner.js.map +1 -0
  22. package/dist/esm/content/link-preview/content/fetcher.js +120 -0
  23. package/dist/esm/content/link-preview/content/fetcher.js.map +1 -0
  24. package/dist/esm/content/link-preview/content/index.js +275 -0
  25. package/dist/esm/content/link-preview/content/index.js.map +1 -0
  26. package/dist/esm/content/link-preview/content/parsers.js +77 -0
  27. package/dist/esm/content/link-preview/content/parsers.js.map +1 -0
  28. package/dist/esm/content/link-preview/content/types.js +4 -0
  29. package/dist/esm/content/link-preview/content/types.js.map +1 -0
  30. package/dist/esm/content/link-preview/content/utils.js +127 -0
  31. package/dist/esm/content/link-preview/content/utils.js.map +1 -0
  32. package/dist/esm/content/link-preview/content/youtube.js +82 -0
  33. package/dist/esm/content/link-preview/content/youtube.js.map +1 -0
  34. package/dist/esm/content/link-preview/deps.js +2 -0
  35. package/dist/esm/content/link-preview/deps.js.map +1 -0
  36. package/dist/esm/content/link-preview/fetch-with-timeout.js +35 -0
  37. package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -0
  38. package/dist/esm/content/link-preview/transcript/cache.js +73 -0
  39. package/dist/esm/content/link-preview/transcript/cache.js.map +1 -0
  40. package/dist/esm/content/link-preview/transcript/index.js +95 -0
  41. package/dist/esm/content/link-preview/transcript/index.js.map +1 -0
  42. package/dist/esm/content/link-preview/transcript/normalize.js +43 -0
  43. package/dist/esm/content/link-preview/transcript/normalize.js.map +1 -0
  44. package/dist/esm/content/link-preview/transcript/providers/generic.js +11 -0
  45. package/dist/esm/content/link-preview/transcript/providers/generic.js.map +1 -0
  46. package/dist/esm/content/link-preview/transcript/providers/podcast.js +12 -0
  47. package/dist/esm/content/link-preview/transcript/providers/podcast.js.map +1 -0
  48. package/dist/esm/content/link-preview/transcript/providers/twitter.js +12 -0
  49. package/dist/esm/content/link-preview/transcript/providers/twitter.js.map +1 -0
  50. package/dist/esm/content/link-preview/transcript/providers/youtube/api.js +257 -0
  51. package/dist/esm/content/link-preview/transcript/providers/youtube/api.js.map +1 -0
  52. package/dist/esm/content/link-preview/transcript/providers/youtube/apify.js +55 -0
  53. package/dist/esm/content/link-preview/transcript/providers/youtube/apify.js.map +1 -0
  54. package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js +409 -0
  55. package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js.map +1 -0
  56. package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js +114 -0
  57. package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js.map +1 -0
  58. package/dist/esm/content/link-preview/transcript/providers/youtube.js +74 -0
  59. package/dist/esm/content/link-preview/transcript/providers/youtube.js.map +1 -0
  60. package/dist/esm/content/link-preview/transcript/types.js +2 -0
  61. package/dist/esm/content/link-preview/transcript/types.js.map +1 -0
  62. package/dist/esm/content/link-preview/transcript/utils.js +193 -0
  63. package/dist/esm/content/link-preview/transcript/utils.js.map +1 -0
  64. package/dist/esm/content/link-preview/types.js +2 -0
  65. package/dist/esm/content/link-preview/types.js.map +1 -0
  66. package/dist/esm/costs.js +57 -0
  67. package/dist/esm/costs.js.map +1 -0
  68. package/dist/esm/firecrawl.js +54 -0
  69. package/dist/esm/firecrawl.js.map +1 -0
  70. package/dist/esm/flags.js +97 -0
  71. package/dist/esm/flags.js.map +1 -0
  72. package/dist/esm/index.js +4 -0
  73. package/dist/esm/index.js.map +1 -0
  74. package/dist/esm/llm/generate-text.js +296 -0
  75. package/dist/esm/llm/generate-text.js.map +1 -0
  76. package/dist/esm/llm/google-models.js +112 -0
  77. package/dist/esm/llm/google-models.js.map +1 -0
  78. package/dist/esm/llm/html-to-markdown.js +44 -0
  79. package/dist/esm/llm/html-to-markdown.js.map +1 -0
  80. package/dist/esm/llm/model-id.js +45 -0
  81. package/dist/esm/llm/model-id.js.map +1 -0
  82. package/dist/esm/pricing/litellm.js +25 -0
  83. package/dist/esm/pricing/litellm.js.map +1 -0
  84. package/dist/esm/prompts/file.js +14 -0
  85. package/dist/esm/prompts/file.js.map +1 -0
  86. package/dist/esm/prompts/index.js +3 -0
  87. package/dist/esm/prompts/index.js.map +1 -0
  88. package/dist/esm/prompts/link-summary.js +105 -0
  89. package/dist/esm/prompts/link-summary.js.map +1 -0
  90. package/dist/esm/run.js +1674 -0
  91. package/dist/esm/run.js.map +1 -0
  92. package/dist/esm/shared/contracts.js +2 -0
  93. package/dist/esm/shared/contracts.js.map +1 -0
  94. package/dist/esm/summarizeHome.js +20 -0
  95. package/dist/esm/summarizeHome.js.map +1 -0
  96. package/dist/esm/tty/live-markdown.js +52 -0
  97. package/dist/esm/tty/live-markdown.js.map +1 -0
  98. package/dist/esm/tty/osc-progress.js +8 -0
  99. package/dist/esm/tty/osc-progress.js.map +1 -0
  100. package/dist/esm/tty/spinner.js +33 -0
  101. package/dist/esm/tty/spinner.js.map +1 -0
  102. package/dist/esm/version.js +44 -0
  103. package/dist/esm/version.js.map +1 -0
  104. package/dist/types/cli-main.d.ts +11 -0
  105. package/dist/types/cli.d.ts +1 -0
  106. package/dist/types/config.d.ts +15 -0
  107. package/dist/types/content/asset.d.ts +44 -0
  108. package/dist/types/content/index.d.ts +4 -0
  109. package/dist/types/content/link-preview/client.d.ts +14 -0
  110. package/dist/types/content/link-preview/content/article.d.ts +4 -0
  111. package/dist/types/content/link-preview/content/cleaner.d.ts +12 -0
  112. package/dist/types/content/link-preview/content/fetcher.d.ts +16 -0
  113. package/dist/types/content/link-preview/content/index.d.ts +4 -0
  114. package/dist/types/content/link-preview/content/parsers.d.ts +7 -0
  115. package/dist/types/content/link-preview/content/types.d.ts +44 -0
  116. package/dist/types/content/link-preview/content/utils.d.ts +16 -0
  117. package/dist/types/content/link-preview/content/youtube.d.ts +1 -0
  118. package/dist/types/content/link-preview/deps.d.ts +70 -0
  119. package/dist/types/content/link-preview/fetch-with-timeout.d.ts +4 -0
  120. package/dist/types/content/link-preview/transcript/cache.d.ts +29 -0
  121. package/dist/types/content/link-preview/transcript/index.d.ts +9 -0
  122. package/dist/types/content/link-preview/transcript/normalize.d.ts +3 -0
  123. package/dist/types/content/link-preview/transcript/providers/generic.d.ts +3 -0
  124. package/dist/types/content/link-preview/transcript/providers/podcast.d.ts +3 -0
  125. package/dist/types/content/link-preview/transcript/providers/twitter.d.ts +3 -0
  126. package/dist/types/content/link-preview/transcript/providers/youtube/api.d.ts +26 -0
  127. package/dist/types/content/link-preview/transcript/providers/youtube/apify.d.ts +1 -0
  128. package/dist/types/content/link-preview/transcript/providers/youtube/captions.d.ts +7 -0
  129. package/dist/types/content/link-preview/transcript/providers/youtube/ytdlp.d.ts +3 -0
  130. package/dist/types/content/link-preview/transcript/providers/youtube.d.ts +3 -0
  131. package/dist/types/content/link-preview/transcript/types.d.ts +23 -0
  132. package/dist/types/content/link-preview/transcript/utils.d.ts +7 -0
  133. package/dist/types/content/link-preview/types.d.ts +36 -0
  134. package/dist/types/costs.d.ts +31 -0
  135. package/dist/types/firecrawl.d.ts +5 -0
  136. package/dist/types/flags.d.ts +23 -0
  137. package/dist/types/index.d.ts +4 -0
  138. package/dist/types/llm/generate-text.d.ts +43 -0
  139. package/dist/types/llm/google-models.d.ts +10 -0
  140. package/dist/types/llm/html-to-markdown.d.ts +15 -0
  141. package/dist/types/llm/model-id.d.ts +14 -0
  142. package/dist/types/pricing/litellm.d.ts +13 -0
  143. package/dist/types/prompts/file.d.ts +6 -0
  144. package/dist/types/prompts/index.d.ts +3 -0
  145. package/dist/types/prompts/link-summary.d.ts +27 -0
  146. package/dist/types/run.d.ts +8 -0
  147. package/dist/types/shared/contracts.d.ts +2 -0
  148. package/dist/types/summarizeHome.d.ts +6 -0
  149. package/dist/types/tty/live-markdown.d.ts +10 -0
  150. package/dist/types/tty/osc-progress.d.ts +3 -0
  151. package/dist/types/tty/spinner.d.ts +10 -0
  152. package/dist/types/version.d.ts +2 -0
  153. package/docs/README.md +11 -0
  154. package/docs/config.md +28 -0
  155. package/docs/extract-only.md +13 -0
  156. package/docs/firecrawl.md +17 -0
  157. package/docs/llm.md +33 -0
  158. package/docs/openai.md +18 -0
  159. package/docs/site/.nojekyll +1 -0
  160. package/docs/site/404.html +37 -0
  161. package/docs/site/assets/site.css +577 -0
  162. package/docs/site/assets/site.js +69 -0
  163. package/docs/site/docs/config.html +73 -0
  164. package/docs/site/docs/extract-only.html +79 -0
  165. package/docs/site/docs/firecrawl.html +72 -0
  166. package/docs/site/docs/index.html +89 -0
  167. package/docs/site/docs/llm.html +70 -0
  168. package/docs/site/docs/openai.html +66 -0
  169. package/docs/site/docs/website.html +70 -0
  170. package/docs/site/docs/youtube.html +62 -0
  171. package/docs/site/index.html +125 -0
  172. package/docs/website.md +27 -0
  173. package/docs/youtube.md +32 -0
  174. package/package.json +76 -0
@@ -0,0 +1,27 @@
1
+ # Website mode
2
+
3
+ Use this for non-YouTube URLs.
4
+
5
+ ## What it does
6
+
7
+ - Fetches the page HTML.
8
+ - Extracts “article-ish” content and normalizes it into clean text.
9
+ - If extraction looks blocked or too thin, it can retry via Firecrawl (Markdown).
10
+ - In `--extract-only` mode, the CLI prefers Firecrawl Markdown by default when `FIRECRAWL_API_KEY` is configured.
11
+ - In `--extract-only` mode, `--markdown auto|llm` can also convert HTML → Markdown via an LLM using the configured `--model` (no provider fallback).
12
+
13
+ ## Flags
14
+
15
+ - `--firecrawl off|auto|always`
16
+ - `--markdown off|auto|llm` (default: `auto`; only affects `--extract-only` for non-YouTube URLs)
17
+ - Plain-text mode: use `--firecrawl off --markdown off`.
18
+ - `--timeout 30s|30|2m|5000ms` (default: `2m`)
19
+ - `--extract-only` (print extracted content; no summary LLM call)
20
+ - `--json` (emit a single JSON object)
21
+ - `--verbose` (progress + which extractor was used)
22
+ - `--metrics off|on|detailed` (default: `on`; `detailed` prints a breakdown to stderr)
23
+
24
+ ## API keys
25
+
26
+ - Optional: `FIRECRAWL_API_KEY` (for the Firecrawl fallback / preferred Markdown output)
27
+ - Optional: `XAI_API_KEY` / `OPENAI_API_KEY` / `GEMINI_API_KEY` (also accepts `GOOGLE_GENERATIVE_AI_API_KEY` / `GOOGLE_API_KEY`) (required only when `--markdown llm` is used, or when `--markdown auto` falls back to LLM conversion)
@@ -0,0 +1,32 @@
1
+ # YouTube mode
2
+
3
+ YouTube URLs use transcript-first extraction.
4
+
5
+ ## `--youtube auto|web|apify`
6
+
7
+ - `auto` (default): try `youtubei` → `captionTracks` → Apify (if token exists)
8
+ - `web`: try `youtubei` → `captionTracks` only
9
+ - `apify`: Apify only
10
+
11
+ ## `youtubei` vs `captionTracks`
12
+
13
+ - `youtubei`:
14
+ - Calls YouTube’s internal transcript endpoint (`/youtubei/v1/get_transcript`).
15
+ - Needs a bootstrapped `INNERTUBE_API_KEY`, context, and `getTranscriptEndpoint.params` from the watch page HTML.
16
+ - When it works, you get a nice list of transcript segments.
17
+ - `captionTracks`:
18
+ - Downloads caption tracks listed in `ytInitialPlayerResponse.captions.playerCaptionsTracklistRenderer.captionTracks`.
19
+ - Fetches `fmt=json3` first and falls back to XML-like caption payloads if needed.
20
+ - Often works even when the transcript endpoint doesn’t.
21
+
22
+ ## Fallbacks
23
+
24
+ - If no transcript is available, we still extract `ytInitialPlayerResponse.videoDetails.shortDescription` so YouTube links can still summarize meaningfully.
25
+ - Apify is an optional fallback (needs `APIFY_API_TOKEN`).
26
+ - By default, we use the actor id `faVsWy9VTSNVIhWpR` (Pinto Studio’s “Youtube Transcript Scraper”).
27
+
28
+ ## Example
29
+
30
+ ```bash
31
+ pnpm summarize -- --extract-only "https://www.youtube.com/watch?v=I845O57ZSy4&t=11s"
32
+ ```
package/package.json ADDED
@@ -0,0 +1,76 @@
1
+ {
2
+ "name": "@steipete/summarize",
3
+ "version": "0.1.0",
4
+ "description": "Link → clean text → summary.",
5
+ "type": "module",
6
+ "bin": {
7
+ "summarize": "./dist/cli.cjs"
8
+ },
9
+ "main": "./dist/esm/index.js",
10
+ "module": "./dist/esm/index.js",
11
+ "types": "./dist/types/index.d.ts",
12
+ "exports": {
13
+ ".": {
14
+ "types": "./dist/types/index.d.ts",
15
+ "import": "./dist/esm/index.js"
16
+ },
17
+ "./content": {
18
+ "types": "./dist/types/content/index.d.ts",
19
+ "import": "./dist/esm/content/index.js"
20
+ },
21
+ "./prompts": {
22
+ "types": "./dist/types/prompts/index.d.ts",
23
+ "import": "./dist/esm/prompts/index.js"
24
+ }
25
+ },
26
+ "files": [
27
+ "dist",
28
+ "docs",
29
+ "CHANGELOG.md",
30
+ "README.md",
31
+ "LICENSE"
32
+ ],
33
+ "dependencies": {
34
+ "cheerio": "^1.1.2",
35
+ "es-toolkit": "^1.43.0",
36
+ "sanitize-html": "^2.17.0"
37
+ },
38
+ "devDependencies": {
39
+ "@ai-sdk/anthropic": "3.0.0-beta.91",
40
+ "@ai-sdk/google": "3.0.0-beta.81",
41
+ "@ai-sdk/openai": "3.0.0-beta.104",
42
+ "@ai-sdk/xai": "3.0.0-beta.62",
43
+ "@biomejs/biome": "^2.3.10",
44
+ "@types/node": "^25.0.3",
45
+ "@types/sanitize-html": "^2.16.0",
46
+ "@vitest/coverage-v8": "^4.0.16",
47
+ "ai": "6.0.0-beta.159",
48
+ "commander": "^14.0.2",
49
+ "esbuild": "^0.27.2",
50
+ "file-type": "^21.1.1",
51
+ "markdansi": "0.1.7",
52
+ "mime": "^4.1.0",
53
+ "osc-progress": "^0.1.0",
54
+ "ora": "^9.0.0",
55
+ "tokentally": "github:steipete/tokentally#v0.1.0",
56
+ "tsx": "^4.21.0",
57
+ "typescript": "^5.9.3",
58
+ "vitest": "^4.0.16"
59
+ },
60
+ "scripts": {
61
+ "build": "pnpm build:lib && pnpm build:cli",
62
+ "build:lib": "tsc -p tsconfig.build.json",
63
+ "build:cli": "node scripts/build-cli.mjs",
64
+ "build:bun": "bun scripts/build-bun.js",
65
+ "build:bun:test": "bun scripts/build-bun.js --test",
66
+ "typecheck": "tsc -p tsconfig.build.json --noEmit",
67
+ "summarize": "tsx src/cli.ts",
68
+ "format": "biome format --write .",
69
+ "lint": "biome check .",
70
+ "lint:fix": "biome check --write .",
71
+ "check": "pnpm lint && pnpm test:coverage",
72
+ "test": "pnpm build && vitest run",
73
+ "test:coverage": "pnpm build && vitest run --coverage",
74
+ "release": "bash scripts/release.sh"
75
+ }
76
+ }