@pencil-agent/nano-pencil 2.0.0-beta.8 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +267 -267
  2. package/dist/build-meta.json +3 -3
  3. package/dist/core/export-html/AGENT.md +11 -11
  4. package/dist/core/export-html/template.css +971 -971
  5. package/dist/core/export-html/template.html +54 -54
  6. package/dist/core/extensions-host/index.d.ts +1 -1
  7. package/dist/core/extensions-host/loader.js +1 -1
  8. package/dist/core/extensions-host/runner.d.ts +1 -0
  9. package/dist/core/extensions-host/runner.js +2 -2
  10. package/dist/core/extensions-host/types.d.ts +17 -22
  11. package/dist/core/lib/ai/src/types.d.ts +12 -2
  12. package/dist/core/persona/persona-manager.js +5 -2
  13. package/dist/core/runtime/agent-session.js +3 -3
  14. package/dist/core/runtime/extension-core-bindings.d.ts +1 -0
  15. package/dist/core/runtime/extension-core-bindings.js +2 -2
  16. package/dist/extensions/builtin/AGENT.md +115 -115
  17. package/dist/extensions/builtin/browser/AGENT.md +17 -17
  18. package/dist/extensions/builtin/browser/agent-workspace/agent_helpers.py +12 -12
  19. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/amazon/product-search.md +198 -198
  20. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/archive-org/scraping.md +341 -341
  21. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/arxiv/scraping.md +311 -311
  22. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/arxiv-bulk/scraping.md +333 -333
  23. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/atlas/overview.md +70 -70
  24. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/booking-com/scraping.md +578 -578
  25. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/capterra/scraping.md +440 -440
  26. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/centilebrain/generate-estimates.md +110 -110
  27. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coingecko/scraping.md +325 -325
  28. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coinmarketcap/scraping.md +463 -463
  29. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coursera/scraping.md +360 -360
  30. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/craigslist/scraping.md +390 -390
  31. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/crossref/scraping.md +568 -568
  32. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/dev-to/scraping.md +323 -323
  33. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/duckduckgo/scraping.md +349 -349
  34. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/ebay/scraping.md +435 -435
  35. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/etsy/scraping.md +506 -506
  36. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/eventbrite/scraping.md +363 -363
  37. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/expedia/automation.md +168 -168
  38. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/facebook/groups.md +236 -236
  39. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/facebook/pages.md +295 -295
  40. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/framer/editor.md +108 -108
  41. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/fred/scraping.md +493 -493
  42. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/g2/scraping.md +580 -580
  43. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/genius/scraping.md +511 -511
  44. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/github/repo-actions.md +65 -65
  45. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/github/scraping.md +184 -184
  46. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/glassdoor/scraping.md +543 -543
  47. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/gmail/compose.md +122 -122
  48. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/goodreads/scraping.md +461 -461
  49. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/gutenberg/scraping.md +383 -383
  50. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/hackernews/scraping.md +243 -243
  51. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/howlongtobeat/scraping.md +473 -473
  52. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/imdb/scraping.md +271 -271
  53. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/itch-io/scraping.md +436 -436
  54. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/job-boards/indeed-glassdoor.md +1021 -1021
  55. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/letterboxd/scraping.md +349 -349
  56. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/linkedin/invitation-manager.md +109 -109
  57. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/loom/folder-enumeration.md +170 -170
  58. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/macrotrends/scraping.md +537 -537
  59. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/medium/article-hydration.md +120 -120
  60. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/medium/scraping.md +414 -414
  61. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/metacritic/scraping.md +477 -477
  62. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/musicbrainz/scraping.md +478 -478
  63. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/nasa/scraping.md +339 -339
  64. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/news-aggregation/multi-source.md +205 -205
  65. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/open-library/scraping.md +472 -472
  66. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/openalex/scraping.md +470 -470
  67. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/openstreetmap/scraping.md +490 -490
  68. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/package-registries/npm-pypi.md +478 -478
  69. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/polymarket/scraping.md +234 -234
  70. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/producthunt/scraping.md +307 -307
  71. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/pubmed/scraping.md +421 -421
  72. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/quora/scraping.md +364 -364
  73. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/rawg/scraping.md +352 -352
  74. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/reddit/scraping.md +124 -124
  75. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/rest-countries/scraping.md +233 -233
  76. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/sec-edgar/scraping.md +361 -361
  77. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/README.md +36 -36
  78. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/embedded-apps.md +72 -72
  79. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/knowledge-base.md +109 -109
  80. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/polaris-inputs.md +137 -137
  81. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/soundcloud/scraping.md +362 -362
  82. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/spotify/scraping.md +339 -339
  83. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/stackoverflow/scraping.md +435 -435
  84. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/steam/scraping.md +575 -575
  85. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/substack/scraping.md +338 -338
  86. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/thetechgeeks/pricing.md +52 -52
  87. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/tiktok/upload.md +107 -107
  88. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/tradingview/scraping.md +309 -309
  89. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/trello/boards-and-lists.md +88 -88
  90. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/trustpilot/scraping.md +375 -375
  91. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/walmart/scraping.md +444 -444
  92. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/wayback-machine/scraping.md +306 -306
  93. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/weather/scraping.md +398 -398
  94. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/wellfound/scraping.md +596 -596
  95. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/world-bank/scraping.md +356 -356
  96. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/xiaohongshu/scraping.md +84 -84
  97. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/youtube/scraping.md +418 -418
  98. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/zillow/scraping.md +433 -433
  99. package/dist/extensions/builtin/browser/browser.md +73 -73
  100. package/dist/extensions/builtin/browser/install.md +142 -142
  101. package/dist/extensions/builtin/browser/interaction-skills/connection.md +48 -48
  102. package/dist/extensions/builtin/browser/interaction-skills/cookies.md +3 -3
  103. package/dist/extensions/builtin/browser/interaction-skills/cross-origin-iframes.md +3 -3
  104. package/dist/extensions/builtin/browser/interaction-skills/dialogs.md +64 -64
  105. package/dist/extensions/builtin/browser/interaction-skills/downloads.md +3 -3
  106. package/dist/extensions/builtin/browser/interaction-skills/drag-and-drop.md +3 -3
  107. package/dist/extensions/builtin/browser/interaction-skills/dropdowns.md +3 -3
  108. package/dist/extensions/builtin/browser/interaction-skills/iframes.md +3 -3
  109. package/dist/extensions/builtin/browser/interaction-skills/network-requests.md +3 -3
  110. package/dist/extensions/builtin/browser/interaction-skills/print-as-pdf.md +3 -3
  111. package/dist/extensions/builtin/browser/interaction-skills/profile-sync.md +90 -90
  112. package/dist/extensions/builtin/browser/interaction-skills/screenshots.md +17 -17
  113. package/dist/extensions/builtin/browser/interaction-skills/scrolling.md +3 -3
  114. package/dist/extensions/builtin/browser/interaction-skills/shadow-dom.md +3 -3
  115. package/dist/extensions/builtin/browser/interaction-skills/tabs.md +69 -69
  116. package/dist/extensions/builtin/browser/interaction-skills/uploads.md +1 -1
  117. package/dist/extensions/builtin/browser/interaction-skills/viewport.md +3 -3
  118. package/dist/extensions/builtin/browser/src/browser_harness/AGENT.md +15 -15
  119. package/dist/extensions/builtin/browser/src/browser_harness/__init__.py +8 -8
  120. package/dist/extensions/builtin/browser/src/browser_harness/_ipc.py +90 -90
  121. package/dist/extensions/builtin/browser/src/browser_harness/admin.py +722 -722
  122. package/dist/extensions/builtin/browser/src/browser_harness/daemon.py +328 -328
  123. package/dist/extensions/builtin/browser/src/browser_harness/helpers.py +396 -396
  124. package/dist/extensions/builtin/browser/src/browser_harness/run.py +103 -103
  125. package/dist/extensions/builtin/discipline/skills/brainstorming/SKILL.md +33 -33
  126. package/dist/extensions/builtin/discipline/skills/executing-plans/SKILL.md +25 -25
  127. package/dist/extensions/builtin/discipline/skills/finishing-development-branch/SKILL.md +25 -25
  128. package/dist/extensions/builtin/discipline/skills/receiving-code-review/SKILL.md +22 -22
  129. package/dist/extensions/builtin/discipline/skills/requesting-code-review/SKILL.md +31 -31
  130. package/dist/extensions/builtin/discipline/skills/systematic-debugging/SKILL.md +28 -28
  131. package/dist/extensions/builtin/discipline/skills/test-driven-development/SKILL.md +32 -32
  132. package/dist/extensions/builtin/discipline/skills/using-git-worktrees/SKILL.md +25 -25
  133. package/dist/extensions/builtin/discipline/skills/verification-before-completion/SKILL.md +27 -27
  134. package/dist/extensions/builtin/discipline/skills/writing-plans/SKILL.md +26 -26
  135. package/dist/extensions/builtin/goal/README.md +67 -67
  136. package/dist/extensions/builtin/goal/goal-controller.d.ts +39 -10
  137. package/dist/extensions/builtin/goal/goal-controller.js +1 -1
  138. package/dist/extensions/builtin/goal/goal-format.js +1 -1
  139. package/dist/extensions/builtin/goal/goal-prompts.d.ts +2 -0
  140. package/dist/extensions/builtin/goal/goal-prompts.js +5 -4
  141. package/dist/extensions/builtin/goal/goal-store.js +1 -1
  142. package/dist/extensions/builtin/goal/index.d.ts +1 -1
  143. package/dist/extensions/builtin/goal/index.js +10 -7
  144. package/dist/extensions/builtin/grub/README.md +112 -112
  145. package/dist/extensions/builtin/link-world/agent-workspace/README.md +16 -16
  146. package/dist/extensions/builtin/link-world/index.js +6 -6
  147. package/dist/extensions/builtin/link-world/internet-search/internet-search.md +65 -65
  148. package/dist/extensions/builtin/link-world/link-world-agent.md +82 -82
  149. package/dist/extensions/builtin/link-world/linkworld.md +313 -313
  150. package/dist/extensions/builtin/link-world/{network-routing.md → network-routing/network-routing.md} +67 -67
  151. package/dist/extensions/builtin/loop/README.md +92 -92
  152. package/dist/extensions/builtin/mcp/figma-design.md +68 -68
  153. package/dist/extensions/builtin/mcp/mcp-management.md +85 -85
  154. package/dist/extensions/builtin/plan/index.js +1 -1
  155. package/dist/extensions/builtin/recap/AGENT.md +15 -15
  156. package/dist/extensions/builtin/sal/README.md +72 -72
  157. package/dist/extensions/builtin/security-audit/README.md +289 -289
  158. package/dist/extensions/builtin/task/task-store.d.ts +4 -0
  159. package/dist/extensions/builtin/task/task-store.js +1 -1
  160. package/dist/extensions/builtin/team/AGENT.md +112 -112
  161. package/dist/extensions/builtin/team/TESTING.md +299 -299
  162. package/dist/extensions/builtin/token-save/README.md +56 -56
  163. package/dist/extensions/optional/AGENT.md +10 -10
  164. package/dist/index.d.ts +5 -30
  165. package/dist/index.js +1 -1
  166. package/dist/models.d.ts +7 -0
  167. package/dist/models.js +1 -0
  168. package/dist/modes/interactive/components/footer.js +1 -1
  169. package/dist/modes/interactive/components/task-status-panel.d.ts +36 -0
  170. package/dist/modes/interactive/components/task-status-panel.js +1 -0
  171. package/dist/modes/interactive/controllers/stream-render-controller.d.ts +7 -0
  172. package/dist/modes/interactive/controllers/stream-render-controller.js +2 -2
  173. package/dist/modes/interactive/interactive-mode.js +40 -40
  174. package/dist/modes/interactive/state/interactive-state.d.ts +2 -0
  175. package/dist/modes/interactive/state/interactive-state.js +1 -1
  176. package/dist/modes/interactive/theme/dark.json +85 -85
  177. package/dist/modes/interactive/theme/light.json +84 -84
  178. package/dist/modes/interactive/theme/theme-schema.json +335 -335
  179. package/dist/modes/interactive/theme/warm.json +81 -81
  180. package/dist/node_modules/@pencil-agent/ai/dist/cli.js +0 -0
  181. package/dist/node_modules/@pencil-agent/ai/dist/models.generated.js +1 -1
  182. package/dist/node_modules/@pencil-agent/ai/dist/providers/anthropic.js +2 -2
  183. package/dist/node_modules/@pencil-agent/ai/dist/providers/openai-completions.js +5 -5
  184. package/dist/node_modules/@pencil-agent/ai/dist/providers/openai-responses.js +1 -1
  185. package/dist/node_modules/@pencil-agent/ai/dist/stream.js +1 -1
  186. package/dist/packages/protocol/src/commands.d.ts +33 -0
  187. package/dist/packages/protocol/src/flags.d.ts +20 -0
  188. package/dist/packages/protocol/src/hooks.d.ts +17 -0
  189. package/dist/packages/protocol/src/hooks.js +0 -0
  190. package/dist/packages/{extension-sdk → protocol}/src/index.d.ts +7 -4
  191. package/dist/packages/protocol/src/index.js +1 -0
  192. package/dist/packages/{extension-sdk → protocol}/src/lifecycle.d.ts +15 -27
  193. package/dist/packages/protocol/src/lifecycle.js +0 -0
  194. package/dist/packages/{extension-sdk → protocol}/src/tools.d.ts +1 -1
  195. package/dist/packages/protocol/src/tools.js +0 -0
  196. package/dist/public-config.d.ts +12 -0
  197. package/dist/public-config.js +1 -0
  198. package/dist/runtime.d.ts +9 -0
  199. package/dist/runtime.js +1 -0
  200. package/dist/session-compaction.d.ts +7 -0
  201. package/dist/session-compaction.js +1 -0
  202. package/dist/session.d.ts +7 -0
  203. package/dist/session.js +1 -0
  204. package/dist/skills.d.ts +7 -0
  205. package/dist/skills.js +1 -0
  206. package/dist/tools.d.ts +7 -0
  207. package/dist/tools.js +1 -0
  208. package/docs/ACP/345/215/217/350/256/256/351/233/206/346/210/220/345/274/200/345/217/221/346/226/207/346/241/243.md +851 -0
  209. package/docs/SDK-TESTING.md +364 -0
  210. package/docs/codex-goal-command-impl.md +1055 -1055
  211. package/docs/codex-goal-vs-grub.md +500 -500
  212. package/docs/custom-provider.md +27 -27
  213. package/docs/extensions.md +27 -27
  214. package/docs/keybindings.md +27 -27
  215. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/200/273/347/273/223.md" +250 -250
  216. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/212/245/345/221/212.md" +122 -122
  217. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210.md" +1222 -1222
  218. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/256/236/347/216/260/346/212/245/345/221/212.md" +158 -158
  219. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/257/271/346/257/224/345/210/206/346/236/220.md" +128 -128
  220. package/docs/loop /351/207/215/346/236/204/350/256/241/345/210/222.md" +320 -320
  221. package/docs/loop-usage-examples.md +214 -214
  222. package/docs/mem-core/346/212/200/346/234/257/346/226/207/346/241/243.md +593 -0
  223. package/docs/models.md +27 -27
  224. package/docs/packages.md +27 -27
  225. package/docs/pi-design-philosophy.md +457 -457
  226. package/docs/planmode.md +1987 -1987
  227. package/docs/prompt-templates.md +27 -27
  228. package/docs/providers.md +27 -27
  229. package/docs/sdk.md +27 -27
  230. package/docs/skills.md +27 -27
  231. package/docs/startup-performance-optimization.md +301 -0
  232. package/docs/themes.md +27 -27
  233. package/docs/tui.md +27 -27
  234. package/docs//350/256/244/347/237/245/345/234/260/345/233/276.md +47 -0
  235. package/package.json +190 -162
  236. package/dist/packages/extension-sdk/src/index.js +0 -1
  237. package/docs/cc-agent-design.md +0 -1297
  238. package/docs/cc-tui-design.md +0 -1333
  239. package/docs//345/257/271/346/240/207Claude-Code.md +0 -1775
  240. /package/dist/packages/{extension-sdk/src/lifecycle.js → protocol/src/commands.js} +0 -0
  241. /package/dist/packages/{extension-sdk/src/tools.js → protocol/src/flags.js} +0 -0
@@ -1,338 +1,338 @@
1
- # Substack — Data Extraction
2
-
3
- Field-tested against multiple Substack publications on 2026-04-27.
4
- No authentication required for any approach documented here.
5
- All endpoints work via `http_get` without a browser.
6
-
7
- ---
8
-
9
- ## TL;DR
10
-
11
- Substack exposes a clean public REST API at `{publication}.substack.com/api/v1/`.
12
- Every publication hosted on Substack (custom domain or `{name}.substack.com`)
13
- responds to the same API paths. No API key, no login, no browser required.
14
-
15
- **What you can do:**
16
- - List all posts from any publication (`/api/v1/posts`)
17
- - Fetch full post content by slug (`/api/v1/posts/{slug}`)
18
- - Fetch post comments (`/api/v1/post/{post_id}/comments`)
19
- - Read the RSS feed (`/feed`) for title/date/link/description metadata
20
-
21
- **Limitations:**
22
- - Paid-only post bodies return a truncated HTML preview for `body_html` (not the full article)
23
- - No cross-publication search API accessible without a logged-in session
24
- - Comment endpoint uses `post_id` (integer), not slug
25
-
26
- ---
27
-
28
- ## Approach 1 (Recommended): Publication Post List
29
-
30
- `GET https://{subdomain}.substack.com/api/v1/posts?limit=N&offset=N`
31
-
32
- Works for any Substack publication. Returns posts sorted newest-first.
33
-
34
- ```python
35
- from helpers import http_get
36
- import json
37
-
38
- def substack_list_posts(publication_url, limit=20, offset=0):
39
- """List posts from a Substack publication.
40
-
41
- Args:
42
- publication_url: Base URL of the publication, e.g.
43
- 'https://www.slowboring.com' or
44
- 'https://simonwillison.substack.com'
45
- limit: Number of posts to return (max observed: 100)
46
- offset: Pagination offset
47
-
48
- Returns list of post dicts with keys: title, subtitle, slug,
49
- canonical_url, post_date, audience, wordcount, reactions, restacks.
50
- audience is 'everyone' (free) or 'only_paid' (paywalled).
51
- """
52
- url = f"{publication_url.rstrip('/')}/api/v1/posts?limit={limit}&offset={offset}"
53
- posts = json.loads(http_get(url))
54
- return [
55
- {
56
- "title": p.get("title"),
57
- "subtitle": p.get("subtitle"),
58
- "slug": p.get("slug"),
59
- "url": p.get("canonical_url"),
60
- "post_date": p.get("post_date"),
61
- "audience": p.get("audience"), # 'everyone' or 'only_paid'
62
- "wordcount": p.get("wordcount"),
63
- "reactions": p.get("reactions"), # e.g. {"❤": 221}
64
- "restacks": p.get("restacks"),
65
- "cover_image": p.get("cover_image"),
66
- "post_id": p.get("id"),
67
- }
68
- for p in posts
69
- ]
70
-
71
- posts = substack_list_posts("https://www.slowboring.com", limit=10)
72
- # [
73
- # {
74
- # "title": "What to make of the generic ballot",
75
- # "subtitle": "Plus ties, Mamdani, the Obama legacy, and fundraising's diminishing returns",
76
- # "slug": "what-to-make-of-the-generic-ballot",
77
- # "url": "https://www.slowboring.com/p/what-to-make-of-the-generic-ballot",
78
- # "post_date": "2026-04-24T10:03:26.581Z",
79
- # "audience": "everyone",
80
- # "wordcount": 4369,
81
- # "reactions": {"❤": 221},
82
- # "restacks": 10,
83
- # "post_id": 194950421,
84
- # },
85
- # ...
86
- # ]
87
-
88
- # Filter for free (non-paywalled) posts only
89
- free_posts = [p for p in posts if p["audience"] == "everyone"]
90
- ```
91
-
92
- ### Pagination
93
-
94
- ```python
95
- def substack_all_posts(publication_url, max_posts=200):
96
- """Fetch all posts from a publication via paginated API."""
97
- all_posts = []
98
- offset = 0
99
- batch_size = 50
100
- while len(all_posts) < max_posts:
101
- batch = substack_list_posts(publication_url, limit=batch_size, offset=offset)
102
- if not batch:
103
- break
104
- all_posts.extend(batch)
105
- if len(batch) < batch_size:
106
- break # last page
107
- offset += batch_size
108
- return all_posts[:max_posts]
109
- ```
110
-
111
- ---
112
-
113
- ## Approach 2: Full Post Content by Slug
114
-
115
- `GET https://{subdomain}.substack.com/api/v1/posts/{slug}`
116
-
117
- Returns the full post including `body_html` for free posts. Paywalled posts
118
- return a truncated HTML preview for `body_html` (not the full article).
119
-
120
- ```python
121
- from helpers import http_get
122
- import json, re
123
-
124
- def substack_get_post(publication_url, slug):
125
- """Fetch full content of a single Substack post by slug.
126
-
127
- Returns title, body as plain text, body_html, author, date,
128
- and metadata. body_html is a truncated preview for paywalled posts.
129
- """
130
- url = f"{publication_url.rstrip('/')}/api/v1/posts/{slug}"
131
- post = json.loads(http_get(url))
132
-
133
- body_html = post.get("body_html")
134
- body_text = None
135
- if body_html:
136
- # Strip HTML tags for plain text
137
- body_text = re.sub(r'<[^>]+>', ' ', body_html)
138
- body_text = re.sub(r'\s+', ' ', body_text).strip()
139
-
140
- return {
141
- "title": post.get("title"),
142
- "subtitle": post.get("subtitle"),
143
- "slug": post.get("slug"),
144
- "url": post.get("canonical_url"),
145
- "post_date": post.get("post_date"),
146
- "audience": post.get("audience"),
147
- "wordcount": post.get("wordcount"),
148
- "reactions": post.get("reactions"),
149
- "restacks": post.get("restacks"),
150
- "body_html": body_html, # full article if free; truncated preview if paywalled
151
- "body_text": body_text, # full plain text if free; truncated if paywalled
152
- "truncated_preview": post.get("truncated_body_text"), # always present
153
- "post_id": post.get("id"),
154
- "publication_id": post.get("publication_id"),
155
- }
156
-
157
- post = substack_get_post(
158
- "https://www.slowboring.com",
159
- "what-to-make-of-the-generic-ballot"
160
- )
161
- # Free post (audience == "everyone"):
162
- # {
163
- # "title": "What to make of the generic ballot",
164
- # "audience": "everyone",
165
- # "wordcount": 4369,
166
- # "body_html": "<p>I suppose this isn't a huge surprise...</p>...", # ~40KB full article
167
- # "body_text": "I suppose this isn't a huge surprise ...", # ~25KB plain text
168
- # "post_id": 194950421,
169
- # }
170
-
171
- # Paywalled post (audience == "only_paid"):
172
- # post["body_html"] -> truncated HTML preview (a few hundred bytes, not the full article)
173
- # post["body_text"] -> truncated plain text (stripped from truncated HTML)
174
- # post["truncated_preview"] -> short plaintext excerpt (separate, always present)
175
- # Use audience == "everyone" as the reliable signal for full content availability.
176
- ```
177
-
178
- ---
179
-
180
- ## Approach 3: Post Comments
181
-
182
- `GET https://{subdomain}.substack.com/api/v1/post/{post_id}/comments?limit=N`
183
-
184
- Note: uses **integer `post_id`**, not slug. Get `post_id` from the post list
185
- or post detail responses.
186
-
187
- ```python
188
- from helpers import http_get
189
- import json
190
-
191
- def substack_get_comments(publication_url, post_id, limit=50):
192
- """Fetch top-level comments for a Substack post.
193
-
194
- Args:
195
- publication_url: Base URL of the publication
196
- post_id: Integer post ID (from post list or post detail)
197
- limit: Max comments to return
198
-
199
- Returns list of comment dicts.
200
- """
201
- url = f"{publication_url.rstrip('/')}/api/v1/post/{post_id}/comments?limit={limit}"
202
- data = json.loads(http_get(url))
203
- comments = data.get("comments", [])
204
- return [
205
- {
206
- "comment_id": c.get("id"),
207
- "author": c.get("name"),
208
- "author_handle": c.get("handle"),
209
- "body": c.get("body"),
210
- "date": c.get("date"),
211
- "reaction_count": c.get("reaction_count"), # e.g. {"❤": 99}
212
- "children_count": c.get("children_count"), # reply count
213
- "restacks": c.get("restacks"),
214
- }
215
- for c in comments
216
- if not c.get("deleted")
217
- ]
218
-
219
- comments = substack_get_comments("https://www.slowboring.com", 194950421, limit=10)
220
- # [
221
- # {
222
- # "comment_id": 248392394,
223
- # "author": "John from FL",
224
- # "body": "Sam asks: \"don't they kind of have a point...\"",
225
- # "date": "2026-04-24T10:20:21.997Z",
226
- # "reaction_count": {"❤": 99},
227
- # "children_count": 3,
228
- # },
229
- # ...
230
- # ]
231
- ```
232
-
233
- ---
234
-
235
- ## Approach 4: RSS Feed (Lightweight Metadata)
236
-
237
- `GET https://{subdomain}.substack.com/feed`
238
-
239
- Returns an RSS 2.0 feed. Useful when you only need title/date/link/description
240
- without hitting the JSON API. Works as a quick check without parsing JSON.
241
-
242
- ```python
243
- from helpers import http_get
244
- import re
245
-
246
- def substack_rss(publication_url, max_items=20):
247
- """Fetch recent post metadata via RSS feed.
248
-
249
- Lighter than the JSON API — only returns title, link, pubDate,
250
- and description (short excerpt). Does not include body_html or wordcount.
251
- """
252
- rss = http_get(f"{publication_url.rstrip('/')}/feed")
253
- items = re.findall(
254
- r'<item>(.*?)</item>',
255
- rss,
256
- re.DOTALL
257
- )[:max_items]
258
-
259
- results = []
260
- for item in items:
261
- title = re.search(r'<title><!\[CDATA\[(.*?)\]\]></title>', item)
262
- link = re.search(r'<link>(https?://[^<]+)</link>', item)
263
- date = re.search(r'<pubDate>(.*?)</pubDate>', item)
264
- desc = re.search(r'<description><!\[CDATA\[(.*?)\]\]></description>', item, re.DOTALL)
265
- results.append({
266
- "title": title.group(1) if title else None,
267
- "link": link.group(1) if link else None,
268
- "pub_date": date.group(1) if date else None,
269
- "description": desc.group(1).strip() if desc else None,
270
- })
271
- return results
272
-
273
- feed = substack_rss("https://www.slowboring.com", max_items=5)
274
- # [
275
- # {
276
- # "title": "Sunday Mailbag + Thread",
277
- # "link": "https://www.slowboring.com/p/sunday-mailbag-thread-48b",
278
- # "pub_date": "Sun, 26 Apr 2026 17:02:04 GMT",
279
- # "description": "Ask your questions below.",
280
- # },
281
- # ...
282
- # ]
283
- ```
284
-
285
- ---
286
-
287
- ## Publication URL Formats
288
-
289
- Substack publications use one of two URL formats:
290
-
291
- ```python
292
- # Format 1: native subdomain (older or simpler publications)
293
- "https://simonwillison.substack.com"
294
-
295
- # Format 2: custom domain (larger publications, purchased domain)
296
- "https://www.slowboring.com" # Matthew Yglesias — Slow Boring
297
- "https://unchartedterritories.tomaspueyo.com" # Tomas Pueyo
298
-
299
- # Both formats use identical API paths:
300
- # {base_url}/api/v1/posts
301
- # {base_url}/api/v1/posts/{slug}
302
- # {base_url}/api/v1/post/{post_id}/comments
303
- # {base_url}/feed
304
- ```
305
-
306
- If you only know a publication's Substack handle (e.g., `matthewyglesias`),
307
- the canonical subdomain URL is `https://matthewyglesias.substack.com`. Custom
308
- domain URLs are listed on the publication's about page or in the RSS feed's
309
- `<link>` element.
310
-
311
- ---
312
-
313
- ## Gotchas
314
-
315
- - **Paywalled post `body_html` is a truncated preview, not `null`** — the API
316
- returns a short HTML excerpt (typically a few hundred to a few KB). It is
317
- never `null`. The reliable way to detect full content availability is
318
- `audience == "everyone"`. For paywalled posts, compare `len(body_html)` to
319
- `wordcount * ~7` (average bytes per word) — a large gap means truncation.
320
- `truncated_body_text` (plaintext) is always present regardless of audience.
321
- - **Comments endpoint uses integer `post_id`, not slug** — `/api/v1/post/{id}/comments`
322
- is correct. `/api/v1/posts/{slug}/comments` returns 404.
323
- - **`reactions` field is a dict with emoji keys**, e.g. `{"❤": 221}` — not a
324
- plain integer. Sum the values for total reaction count:
325
- `total = sum(post["reactions"].values())`.
326
- - **`limit` on post list is not strictly capped** — values up to at least 100
327
- work; beyond that behavior is untested.
328
- - **Custom domains and `{name}.substack.com` are interchangeable** — use
329
- whichever you have. The `x-sub` response header always reflects the internal
330
- publication handle.
331
- - **`audience` values**: only `"everyone"` and `"only_paid"` observed. A third
332
- value `"founding"` exists in Substack's data model but is rare.
333
- - **No unauthenticated cross-publication search** — `substack.com/api/v1/search`
334
- returns HTML (a React page), not JSON. To find publications, use external
335
- search engines (`site:substack.com {query}`) or the RSS discovery approach.
336
- - **Podcast posts** have `type == "podcast"` and `podcast_url` set; their
337
- `body_html` may be a show-notes HTML block. Check `type` to distinguish
338
- newsletter posts from podcast episodes.
1
+ # Substack — Data Extraction
2
+
3
+ Field-tested against multiple Substack publications on 2026-04-27.
4
+ No authentication required for any approach documented here.
5
+ All endpoints work via `http_get` without a browser.
6
+
7
+ ---
8
+
9
+ ## TL;DR
10
+
11
+ Substack exposes a clean public REST API at `{publication}.substack.com/api/v1/`.
12
+ Every publication hosted on Substack (custom domain or `{name}.substack.com`)
13
+ responds to the same API paths. No API key, no login, no browser required.
14
+
15
+ **What you can do:**
16
+ - List all posts from any publication (`/api/v1/posts`)
17
+ - Fetch full post content by slug (`/api/v1/posts/{slug}`)
18
+ - Fetch post comments (`/api/v1/post/{post_id}/comments`)
19
+ - Read the RSS feed (`/feed`) for title/date/link/description metadata
20
+
21
+ **Limitations:**
22
+ - Paid-only post bodies return a truncated HTML preview for `body_html` (not the full article)
23
+ - No cross-publication search API accessible without a logged-in session
24
+ - Comment endpoint uses `post_id` (integer), not slug
25
+
26
+ ---
27
+
28
+ ## Approach 1 (Recommended): Publication Post List
29
+
30
+ `GET https://{subdomain}.substack.com/api/v1/posts?limit=N&offset=N`
31
+
32
+ Works for any Substack publication. Returns posts sorted newest-first.
33
+
34
+ ```python
35
+ from helpers import http_get
36
+ import json
37
+
38
+ def substack_list_posts(publication_url, limit=20, offset=0):
39
+ """List posts from a Substack publication.
40
+
41
+ Args:
42
+ publication_url: Base URL of the publication, e.g.
43
+ 'https://www.slowboring.com' or
44
+ 'https://simonwillison.substack.com'
45
+ limit: Number of posts to return (max observed: 100)
46
+ offset: Pagination offset
47
+
48
+ Returns list of post dicts with keys: title, subtitle, slug,
49
+ canonical_url, post_date, audience, wordcount, reactions, restacks.
50
+ audience is 'everyone' (free) or 'only_paid' (paywalled).
51
+ """
52
+ url = f"{publication_url.rstrip('/')}/api/v1/posts?limit={limit}&offset={offset}"
53
+ posts = json.loads(http_get(url))
54
+ return [
55
+ {
56
+ "title": p.get("title"),
57
+ "subtitle": p.get("subtitle"),
58
+ "slug": p.get("slug"),
59
+ "url": p.get("canonical_url"),
60
+ "post_date": p.get("post_date"),
61
+ "audience": p.get("audience"), # 'everyone' or 'only_paid'
62
+ "wordcount": p.get("wordcount"),
63
+ "reactions": p.get("reactions"), # e.g. {"❤": 221}
64
+ "restacks": p.get("restacks"),
65
+ "cover_image": p.get("cover_image"),
66
+ "post_id": p.get("id"),
67
+ }
68
+ for p in posts
69
+ ]
70
+
71
+ posts = substack_list_posts("https://www.slowboring.com", limit=10)
72
+ # [
73
+ # {
74
+ # "title": "What to make of the generic ballot",
75
+ # "subtitle": "Plus ties, Mamdani, the Obama legacy, and fundraising's diminishing returns",
76
+ # "slug": "what-to-make-of-the-generic-ballot",
77
+ # "url": "https://www.slowboring.com/p/what-to-make-of-the-generic-ballot",
78
+ # "post_date": "2026-04-24T10:03:26.581Z",
79
+ # "audience": "everyone",
80
+ # "wordcount": 4369,
81
+ # "reactions": {"❤": 221},
82
+ # "restacks": 10,
83
+ # "post_id": 194950421,
84
+ # },
85
+ # ...
86
+ # ]
87
+
88
+ # Filter for free (non-paywalled) posts only
89
+ free_posts = [p for p in posts if p["audience"] == "everyone"]
90
+ ```
91
+
92
+ ### Pagination
93
+
94
+ ```python
95
+ def substack_all_posts(publication_url, max_posts=200):
96
+ """Fetch all posts from a publication via paginated API."""
97
+ all_posts = []
98
+ offset = 0
99
+ batch_size = 50
100
+ while len(all_posts) < max_posts:
101
+ batch = substack_list_posts(publication_url, limit=batch_size, offset=offset)
102
+ if not batch:
103
+ break
104
+ all_posts.extend(batch)
105
+ if len(batch) < batch_size:
106
+ break # last page
107
+ offset += batch_size
108
+ return all_posts[:max_posts]
109
+ ```
110
+
111
+ ---
112
+
113
+ ## Approach 2: Full Post Content by Slug
114
+
115
+ `GET https://{subdomain}.substack.com/api/v1/posts/{slug}`
116
+
117
+ Returns the full post including `body_html` for free posts. Paywalled posts
118
+ return a truncated HTML preview for `body_html` (not the full article).
119
+
120
+ ```python
121
+ from helpers import http_get
122
+ import json, re
123
+
124
+ def substack_get_post(publication_url, slug):
125
+ """Fetch full content of a single Substack post by slug.
126
+
127
+ Returns title, body as plain text, body_html, author, date,
128
+ and metadata. body_html is a truncated preview for paywalled posts.
129
+ """
130
+ url = f"{publication_url.rstrip('/')}/api/v1/posts/{slug}"
131
+ post = json.loads(http_get(url))
132
+
133
+ body_html = post.get("body_html")
134
+ body_text = None
135
+ if body_html:
136
+ # Strip HTML tags for plain text
137
+ body_text = re.sub(r'<[^>]+>', ' ', body_html)
138
+ body_text = re.sub(r'\s+', ' ', body_text).strip()
139
+
140
+ return {
141
+ "title": post.get("title"),
142
+ "subtitle": post.get("subtitle"),
143
+ "slug": post.get("slug"),
144
+ "url": post.get("canonical_url"),
145
+ "post_date": post.get("post_date"),
146
+ "audience": post.get("audience"),
147
+ "wordcount": post.get("wordcount"),
148
+ "reactions": post.get("reactions"),
149
+ "restacks": post.get("restacks"),
150
+ "body_html": body_html, # full article if free; truncated preview if paywalled
151
+ "body_text": body_text, # full plain text if free; truncated if paywalled
152
+ "truncated_preview": post.get("truncated_body_text"), # always present
153
+ "post_id": post.get("id"),
154
+ "publication_id": post.get("publication_id"),
155
+ }
156
+
157
+ post = substack_get_post(
158
+ "https://www.slowboring.com",
159
+ "what-to-make-of-the-generic-ballot"
160
+ )
161
+ # Free post (audience == "everyone"):
162
+ # {
163
+ # "title": "What to make of the generic ballot",
164
+ # "audience": "everyone",
165
+ # "wordcount": 4369,
166
+ # "body_html": "<p>I suppose this isn't a huge surprise...</p>...", # ~40KB full article
167
+ # "body_text": "I suppose this isn't a huge surprise ...", # ~25KB plain text
168
+ # "post_id": 194950421,
169
+ # }
170
+
171
+ # Paywalled post (audience == "only_paid"):
172
+ # post["body_html"] -> truncated HTML preview (a few hundred bytes, not the full article)
173
+ # post["body_text"] -> truncated plain text (stripped from truncated HTML)
174
+ # post["truncated_preview"] -> short plaintext excerpt (separate, always present)
175
+ # Use audience == "everyone" as the reliable signal for full content availability.
176
+ ```
177
+
178
+ ---
179
+
180
+ ## Approach 3: Post Comments
181
+
182
+ `GET https://{subdomain}.substack.com/api/v1/post/{post_id}/comments?limit=N`
183
+
184
+ Note: uses **integer `post_id`**, not slug. Get `post_id` from the post list
185
+ or post detail responses.
186
+
187
+ ```python
188
+ from helpers import http_get
189
+ import json
190
+
191
+ def substack_get_comments(publication_url, post_id, limit=50):
192
+ """Fetch top-level comments for a Substack post.
193
+
194
+ Args:
195
+ publication_url: Base URL of the publication
196
+ post_id: Integer post ID (from post list or post detail)
197
+ limit: Max comments to return
198
+
199
+ Returns list of comment dicts.
200
+ """
201
+ url = f"{publication_url.rstrip('/')}/api/v1/post/{post_id}/comments?limit={limit}"
202
+ data = json.loads(http_get(url))
203
+ comments = data.get("comments", [])
204
+ return [
205
+ {
206
+ "comment_id": c.get("id"),
207
+ "author": c.get("name"),
208
+ "author_handle": c.get("handle"),
209
+ "body": c.get("body"),
210
+ "date": c.get("date"),
211
+ "reaction_count": c.get("reaction_count"), # e.g. {"❤": 99}
212
+ "children_count": c.get("children_count"), # reply count
213
+ "restacks": c.get("restacks"),
214
+ }
215
+ for c in comments
216
+ if not c.get("deleted")
217
+ ]
218
+
219
+ comments = substack_get_comments("https://www.slowboring.com", 194950421, limit=10)
220
+ # [
221
+ # {
222
+ # "comment_id": 248392394,
223
+ # "author": "John from FL",
224
+ # "body": "Sam asks: \"don't they kind of have a point...\"",
225
+ # "date": "2026-04-24T10:20:21.997Z",
226
+ # "reaction_count": {"❤": 99},
227
+ # "children_count": 3,
228
+ # },
229
+ # ...
230
+ # ]
231
+ ```
232
+
233
+ ---
234
+
235
+ ## Approach 4: RSS Feed (Lightweight Metadata)
236
+
237
+ `GET https://{subdomain}.substack.com/feed`
238
+
239
+ Returns an RSS 2.0 feed. Useful when you only need title/date/link/description
240
+ without hitting the JSON API. Works as a quick check without parsing JSON.
241
+
242
+ ```python
243
+ from helpers import http_get
244
+ import re
245
+
246
+ def substack_rss(publication_url, max_items=20):
247
+ """Fetch recent post metadata via RSS feed.
248
+
249
+ Lighter than the JSON API — only returns title, link, pubDate,
250
+ and description (short excerpt). Does not include body_html or wordcount.
251
+ """
252
+ rss = http_get(f"{publication_url.rstrip('/')}/feed")
253
+ items = re.findall(
254
+ r'<item>(.*?)</item>',
255
+ rss,
256
+ re.DOTALL
257
+ )[:max_items]
258
+
259
+ results = []
260
+ for item in items:
261
+ title = re.search(r'<title><!\[CDATA\[(.*?)\]\]></title>', item)
262
+ link = re.search(r'<link>(https?://[^<]+)</link>', item)
263
+ date = re.search(r'<pubDate>(.*?)</pubDate>', item)
264
+ desc = re.search(r'<description><!\[CDATA\[(.*?)\]\]></description>', item, re.DOTALL)
265
+ results.append({
266
+ "title": title.group(1) if title else None,
267
+ "link": link.group(1) if link else None,
268
+ "pub_date": date.group(1) if date else None,
269
+ "description": desc.group(1).strip() if desc else None,
270
+ })
271
+ return results
272
+
273
+ feed = substack_rss("https://www.slowboring.com", max_items=5)
274
+ # [
275
+ # {
276
+ # "title": "Sunday Mailbag + Thread",
277
+ # "link": "https://www.slowboring.com/p/sunday-mailbag-thread-48b",
278
+ # "pub_date": "Sun, 26 Apr 2026 17:02:04 GMT",
279
+ # "description": "Ask your questions below.",
280
+ # },
281
+ # ...
282
+ # ]
283
+ ```
284
+
285
+ ---
286
+
287
+ ## Publication URL Formats
288
+
289
+ Substack publications use one of two URL formats:
290
+
291
+ ```python
292
+ # Format 1: native subdomain (older or simpler publications)
293
+ "https://simonwillison.substack.com"
294
+
295
+ # Format 2: custom domain (larger publications, purchased domain)
296
+ "https://www.slowboring.com" # Matthew Yglesias — Slow Boring
297
+ "https://unchartedterritories.tomaspueyo.com" # Tomas Pueyo
298
+
299
+ # Both formats use identical API paths:
300
+ # {base_url}/api/v1/posts
301
+ # {base_url}/api/v1/posts/{slug}
302
+ # {base_url}/api/v1/post/{post_id}/comments
303
+ # {base_url}/feed
304
+ ```
305
+
306
+ If you only know a publication's Substack handle (e.g., `matthewyglesias`),
307
+ the canonical subdomain URL is `https://matthewyglesias.substack.com`. Custom
308
+ domain URLs are listed on the publication's about page or in the RSS feed's
309
+ `<link>` element.
310
+
311
+ ---
312
+
313
+ ## Gotchas
314
+
315
+ - **Paywalled post `body_html` is a truncated preview, not `null`** — the API
316
+ returns a short HTML excerpt (typically a few hundred to a few KB). It is
317
+ never `null`. The reliable way to detect full content availability is
318
+ `audience == "everyone"`. For paywalled posts, compare `len(body_html)` to
319
+ `wordcount * ~7` (average bytes per word) — a large gap means truncation.
320
+ `truncated_body_text` (plaintext) is always present regardless of audience.
321
+ - **Comments endpoint uses integer `post_id`, not slug** — `/api/v1/post/{id}/comments`
322
+ is correct. `/api/v1/posts/{slug}/comments` returns 404.
323
+ - **`reactions` field is a dict with emoji keys**, e.g. `{"❤": 221}` — not a
324
+ plain integer. Sum the values for total reaction count:
325
+ `total = sum(post["reactions"].values())`.
326
+ - **`limit` on post list is not strictly capped** — values up to at least 100
327
+ work; beyond that behavior is untested.
328
+ - **Custom domains and `{name}.substack.com` are interchangeable** — use
329
+ whichever you have. The `x-sub` response header always reflects the internal
330
+ publication handle.
331
+ - **`audience` values**: only `"everyone"` and `"only_paid"` observed. A third
332
+ value `"founding"` exists in Substack's data model but is rare.
333
+ - **No unauthenticated cross-publication search** — `substack.com/api/v1/search`
334
+ returns HTML (a React page), not JSON. To find publications, use external
335
+ search engines (`site:substack.com {query}`) or the RSS discovery approach.
336
+ - **Podcast posts** have `type == "podcast"` and `podcast_url` set; their
337
+ `body_html` may be a show-notes HTML block. Check `type` to distinguish
338
+ newsletter posts from podcast episodes.