@pencil-agent/nano-pencil 2.0.0-beta.8 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +267 -267
  2. package/dist/build-meta.json +3 -3
  3. package/dist/core/export-html/AGENT.md +11 -11
  4. package/dist/core/export-html/template.css +971 -971
  5. package/dist/core/export-html/template.html +54 -54
  6. package/dist/core/extensions-host/index.d.ts +1 -1
  7. package/dist/core/extensions-host/loader.js +1 -1
  8. package/dist/core/extensions-host/runner.d.ts +1 -0
  9. package/dist/core/extensions-host/runner.js +2 -2
  10. package/dist/core/extensions-host/types.d.ts +17 -22
  11. package/dist/core/lib/ai/src/types.d.ts +12 -2
  12. package/dist/core/persona/persona-manager.js +5 -2
  13. package/dist/core/runtime/agent-session.js +3 -3
  14. package/dist/core/runtime/extension-core-bindings.d.ts +1 -0
  15. package/dist/core/runtime/extension-core-bindings.js +2 -2
  16. package/dist/extensions/builtin/AGENT.md +115 -115
  17. package/dist/extensions/builtin/browser/AGENT.md +17 -17
  18. package/dist/extensions/builtin/browser/agent-workspace/agent_helpers.py +12 -12
  19. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/amazon/product-search.md +198 -198
  20. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/archive-org/scraping.md +341 -341
  21. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/arxiv/scraping.md +311 -311
  22. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/arxiv-bulk/scraping.md +333 -333
  23. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/atlas/overview.md +70 -70
  24. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/booking-com/scraping.md +578 -578
  25. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/capterra/scraping.md +440 -440
  26. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/centilebrain/generate-estimates.md +110 -110
  27. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coingecko/scraping.md +325 -325
  28. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coinmarketcap/scraping.md +463 -463
  29. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coursera/scraping.md +360 -360
  30. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/craigslist/scraping.md +390 -390
  31. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/crossref/scraping.md +568 -568
  32. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/dev-to/scraping.md +323 -323
  33. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/duckduckgo/scraping.md +349 -349
  34. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/ebay/scraping.md +435 -435
  35. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/etsy/scraping.md +506 -506
  36. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/eventbrite/scraping.md +363 -363
  37. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/expedia/automation.md +168 -168
  38. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/facebook/groups.md +236 -236
  39. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/facebook/pages.md +295 -295
  40. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/framer/editor.md +108 -108
  41. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/fred/scraping.md +493 -493
  42. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/g2/scraping.md +580 -580
  43. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/genius/scraping.md +511 -511
  44. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/github/repo-actions.md +65 -65
  45. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/github/scraping.md +184 -184
  46. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/glassdoor/scraping.md +543 -543
  47. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/gmail/compose.md +122 -122
  48. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/goodreads/scraping.md +461 -461
  49. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/gutenberg/scraping.md +383 -383
  50. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/hackernews/scraping.md +243 -243
  51. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/howlongtobeat/scraping.md +473 -473
  52. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/imdb/scraping.md +271 -271
  53. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/itch-io/scraping.md +436 -436
  54. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/job-boards/indeed-glassdoor.md +1021 -1021
  55. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/letterboxd/scraping.md +349 -349
  56. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/linkedin/invitation-manager.md +109 -109
  57. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/loom/folder-enumeration.md +170 -170
  58. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/macrotrends/scraping.md +537 -537
  59. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/medium/article-hydration.md +120 -120
  60. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/medium/scraping.md +414 -414
  61. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/metacritic/scraping.md +477 -477
  62. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/musicbrainz/scraping.md +478 -478
  63. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/nasa/scraping.md +339 -339
  64. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/news-aggregation/multi-source.md +205 -205
  65. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/open-library/scraping.md +472 -472
  66. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/openalex/scraping.md +470 -470
  67. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/openstreetmap/scraping.md +490 -490
  68. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/package-registries/npm-pypi.md +478 -478
  69. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/polymarket/scraping.md +234 -234
  70. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/producthunt/scraping.md +307 -307
  71. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/pubmed/scraping.md +421 -421
  72. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/quora/scraping.md +364 -364
  73. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/rawg/scraping.md +352 -352
  74. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/reddit/scraping.md +124 -124
  75. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/rest-countries/scraping.md +233 -233
  76. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/sec-edgar/scraping.md +361 -361
  77. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/README.md +36 -36
  78. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/embedded-apps.md +72 -72
  79. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/knowledge-base.md +109 -109
  80. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/polaris-inputs.md +137 -137
  81. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/soundcloud/scraping.md +362 -362
  82. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/spotify/scraping.md +339 -339
  83. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/stackoverflow/scraping.md +435 -435
  84. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/steam/scraping.md +575 -575
  85. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/substack/scraping.md +338 -338
  86. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/thetechgeeks/pricing.md +52 -52
  87. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/tiktok/upload.md +107 -107
  88. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/tradingview/scraping.md +309 -309
  89. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/trello/boards-and-lists.md +88 -88
  90. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/trustpilot/scraping.md +375 -375
  91. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/walmart/scraping.md +444 -444
  92. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/wayback-machine/scraping.md +306 -306
  93. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/weather/scraping.md +398 -398
  94. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/wellfound/scraping.md +596 -596
  95. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/world-bank/scraping.md +356 -356
  96. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/xiaohongshu/scraping.md +84 -84
  97. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/youtube/scraping.md +418 -418
  98. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/zillow/scraping.md +433 -433
  99. package/dist/extensions/builtin/browser/browser.md +73 -73
  100. package/dist/extensions/builtin/browser/install.md +142 -142
  101. package/dist/extensions/builtin/browser/interaction-skills/connection.md +48 -48
  102. package/dist/extensions/builtin/browser/interaction-skills/cookies.md +3 -3
  103. package/dist/extensions/builtin/browser/interaction-skills/cross-origin-iframes.md +3 -3
  104. package/dist/extensions/builtin/browser/interaction-skills/dialogs.md +64 -64
  105. package/dist/extensions/builtin/browser/interaction-skills/downloads.md +3 -3
  106. package/dist/extensions/builtin/browser/interaction-skills/drag-and-drop.md +3 -3
  107. package/dist/extensions/builtin/browser/interaction-skills/dropdowns.md +3 -3
  108. package/dist/extensions/builtin/browser/interaction-skills/iframes.md +3 -3
  109. package/dist/extensions/builtin/browser/interaction-skills/network-requests.md +3 -3
  110. package/dist/extensions/builtin/browser/interaction-skills/print-as-pdf.md +3 -3
  111. package/dist/extensions/builtin/browser/interaction-skills/profile-sync.md +90 -90
  112. package/dist/extensions/builtin/browser/interaction-skills/screenshots.md +17 -17
  113. package/dist/extensions/builtin/browser/interaction-skills/scrolling.md +3 -3
  114. package/dist/extensions/builtin/browser/interaction-skills/shadow-dom.md +3 -3
  115. package/dist/extensions/builtin/browser/interaction-skills/tabs.md +69 -69
  116. package/dist/extensions/builtin/browser/interaction-skills/uploads.md +1 -1
  117. package/dist/extensions/builtin/browser/interaction-skills/viewport.md +3 -3
  118. package/dist/extensions/builtin/browser/src/browser_harness/AGENT.md +15 -15
  119. package/dist/extensions/builtin/browser/src/browser_harness/__init__.py +8 -8
  120. package/dist/extensions/builtin/browser/src/browser_harness/_ipc.py +90 -90
  121. package/dist/extensions/builtin/browser/src/browser_harness/admin.py +722 -722
  122. package/dist/extensions/builtin/browser/src/browser_harness/daemon.py +328 -328
  123. package/dist/extensions/builtin/browser/src/browser_harness/helpers.py +396 -396
  124. package/dist/extensions/builtin/browser/src/browser_harness/run.py +103 -103
  125. package/dist/extensions/builtin/discipline/skills/brainstorming/SKILL.md +33 -33
  126. package/dist/extensions/builtin/discipline/skills/executing-plans/SKILL.md +25 -25
  127. package/dist/extensions/builtin/discipline/skills/finishing-development-branch/SKILL.md +25 -25
  128. package/dist/extensions/builtin/discipline/skills/receiving-code-review/SKILL.md +22 -22
  129. package/dist/extensions/builtin/discipline/skills/requesting-code-review/SKILL.md +31 -31
  130. package/dist/extensions/builtin/discipline/skills/systematic-debugging/SKILL.md +28 -28
  131. package/dist/extensions/builtin/discipline/skills/test-driven-development/SKILL.md +32 -32
  132. package/dist/extensions/builtin/discipline/skills/using-git-worktrees/SKILL.md +25 -25
  133. package/dist/extensions/builtin/discipline/skills/verification-before-completion/SKILL.md +27 -27
  134. package/dist/extensions/builtin/discipline/skills/writing-plans/SKILL.md +26 -26
  135. package/dist/extensions/builtin/goal/README.md +67 -67
  136. package/dist/extensions/builtin/goal/goal-controller.d.ts +39 -10
  137. package/dist/extensions/builtin/goal/goal-controller.js +1 -1
  138. package/dist/extensions/builtin/goal/goal-format.js +1 -1
  139. package/dist/extensions/builtin/goal/goal-prompts.d.ts +2 -0
  140. package/dist/extensions/builtin/goal/goal-prompts.js +5 -4
  141. package/dist/extensions/builtin/goal/goal-store.js +1 -1
  142. package/dist/extensions/builtin/goal/index.d.ts +1 -1
  143. package/dist/extensions/builtin/goal/index.js +10 -7
  144. package/dist/extensions/builtin/grub/README.md +112 -112
  145. package/dist/extensions/builtin/link-world/agent-workspace/README.md +16 -16
  146. package/dist/extensions/builtin/link-world/index.js +6 -6
  147. package/dist/extensions/builtin/link-world/internet-search/internet-search.md +65 -65
  148. package/dist/extensions/builtin/link-world/link-world-agent.md +82 -82
  149. package/dist/extensions/builtin/link-world/linkworld.md +313 -313
  150. package/dist/extensions/builtin/link-world/{network-routing.md → network-routing/network-routing.md} +67 -67
  151. package/dist/extensions/builtin/loop/README.md +92 -92
  152. package/dist/extensions/builtin/mcp/figma-design.md +68 -68
  153. package/dist/extensions/builtin/mcp/mcp-management.md +85 -85
  154. package/dist/extensions/builtin/plan/index.js +1 -1
  155. package/dist/extensions/builtin/recap/AGENT.md +15 -15
  156. package/dist/extensions/builtin/sal/README.md +72 -72
  157. package/dist/extensions/builtin/security-audit/README.md +289 -289
  158. package/dist/extensions/builtin/task/task-store.d.ts +4 -0
  159. package/dist/extensions/builtin/task/task-store.js +1 -1
  160. package/dist/extensions/builtin/team/AGENT.md +112 -112
  161. package/dist/extensions/builtin/team/TESTING.md +299 -299
  162. package/dist/extensions/builtin/token-save/README.md +56 -56
  163. package/dist/extensions/optional/AGENT.md +10 -10
  164. package/dist/index.d.ts +5 -30
  165. package/dist/index.js +1 -1
  166. package/dist/models.d.ts +7 -0
  167. package/dist/models.js +1 -0
  168. package/dist/modes/interactive/components/footer.js +1 -1
  169. package/dist/modes/interactive/components/task-status-panel.d.ts +36 -0
  170. package/dist/modes/interactive/components/task-status-panel.js +1 -0
  171. package/dist/modes/interactive/controllers/stream-render-controller.d.ts +7 -0
  172. package/dist/modes/interactive/controllers/stream-render-controller.js +2 -2
  173. package/dist/modes/interactive/interactive-mode.js +40 -40
  174. package/dist/modes/interactive/state/interactive-state.d.ts +2 -0
  175. package/dist/modes/interactive/state/interactive-state.js +1 -1
  176. package/dist/modes/interactive/theme/dark.json +85 -85
  177. package/dist/modes/interactive/theme/light.json +84 -84
  178. package/dist/modes/interactive/theme/theme-schema.json +335 -335
  179. package/dist/modes/interactive/theme/warm.json +81 -81
  180. package/dist/node_modules/@pencil-agent/ai/dist/cli.js +0 -0
  181. package/dist/node_modules/@pencil-agent/ai/dist/models.generated.js +1 -1
  182. package/dist/node_modules/@pencil-agent/ai/dist/providers/anthropic.js +2 -2
  183. package/dist/node_modules/@pencil-agent/ai/dist/providers/openai-completions.js +5 -5
  184. package/dist/node_modules/@pencil-agent/ai/dist/providers/openai-responses.js +1 -1
  185. package/dist/node_modules/@pencil-agent/ai/dist/stream.js +1 -1
  186. package/dist/packages/protocol/src/commands.d.ts +33 -0
  187. package/dist/packages/protocol/src/flags.d.ts +20 -0
  188. package/dist/packages/protocol/src/hooks.d.ts +17 -0
  189. package/dist/packages/protocol/src/hooks.js +0 -0
  190. package/dist/packages/{extension-sdk → protocol}/src/index.d.ts +7 -4
  191. package/dist/packages/protocol/src/index.js +1 -0
  192. package/dist/packages/{extension-sdk → protocol}/src/lifecycle.d.ts +15 -27
  193. package/dist/packages/protocol/src/lifecycle.js +0 -0
  194. package/dist/packages/{extension-sdk → protocol}/src/tools.d.ts +1 -1
  195. package/dist/packages/protocol/src/tools.js +0 -0
  196. package/dist/public-config.d.ts +12 -0
  197. package/dist/public-config.js +1 -0
  198. package/dist/runtime.d.ts +9 -0
  199. package/dist/runtime.js +1 -0
  200. package/dist/session-compaction.d.ts +7 -0
  201. package/dist/session-compaction.js +1 -0
  202. package/dist/session.d.ts +7 -0
  203. package/dist/session.js +1 -0
  204. package/dist/skills.d.ts +7 -0
  205. package/dist/skills.js +1 -0
  206. package/dist/tools.d.ts +7 -0
  207. package/dist/tools.js +1 -0
  208. package/docs/ACP/345/215/217/350/256/256/351/233/206/346/210/220/345/274/200/345/217/221/346/226/207/346/241/243.md +851 -0
  209. package/docs/SDK-TESTING.md +364 -0
  210. package/docs/codex-goal-command-impl.md +1055 -1055
  211. package/docs/codex-goal-vs-grub.md +500 -500
  212. package/docs/custom-provider.md +27 -27
  213. package/docs/extensions.md +27 -27
  214. package/docs/keybindings.md +27 -27
  215. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/200/273/347/273/223.md" +250 -250
  216. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/212/245/345/221/212.md" +122 -122
  217. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210.md" +1222 -1222
  218. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/256/236/347/216/260/346/212/245/345/221/212.md" +158 -158
  219. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/257/271/346/257/224/345/210/206/346/236/220.md" +128 -128
  220. package/docs/loop /351/207/215/346/236/204/350/256/241/345/210/222.md" +320 -320
  221. package/docs/loop-usage-examples.md +214 -214
  222. package/docs/mem-core/346/212/200/346/234/257/346/226/207/346/241/243.md +593 -0
  223. package/docs/models.md +27 -27
  224. package/docs/packages.md +27 -27
  225. package/docs/pi-design-philosophy.md +457 -457
  226. package/docs/planmode.md +1987 -1987
  227. package/docs/prompt-templates.md +27 -27
  228. package/docs/providers.md +27 -27
  229. package/docs/sdk.md +27 -27
  230. package/docs/skills.md +27 -27
  231. package/docs/startup-performance-optimization.md +301 -0
  232. package/docs/themes.md +27 -27
  233. package/docs/tui.md +27 -27
  234. package/docs//350/256/244/347/237/245/345/234/260/345/233/276.md +47 -0
  235. package/package.json +190 -162
  236. package/dist/packages/extension-sdk/src/index.js +0 -1
  237. package/docs/cc-agent-design.md +0 -1297
  238. package/docs/cc-tui-design.md +0 -1333
  239. package/docs//345/257/271/346/240/207Claude-Code.md +0 -1775
  240. /package/dist/packages/{extension-sdk/src/lifecycle.js → protocol/src/commands.js} +0 -0
  241. /package/dist/packages/{extension-sdk/src/tools.js → protocol/src/flags.js} +0 -0
@@ -1,596 +1,596 @@
1
- # Wellfound (AngelList) — Startup Jobs & Company Profiles
2
-
3
- Field-tested against wellfound.com on 2026-04-18.
4
- All confirmed via live HTTP probes and response header analysis.
5
-
6
- ---
7
-
8
- ## Anti-bot verdict: browser required, no http_get workaround exists
9
-
10
- **`http_get` returns HTTP 403 on every Wellfound URL without exception** (except `robots.txt`).
11
-
12
- Tested endpoints (all 403):
13
- - `/company/stripe`
14
- - `/jobs`
15
- - `/jobs?role=engineer&location=remote`
16
- - `/company/stripe/jobs`
17
- - `/sitemap.xml`, `/sitemap_index.xml`
18
- - `/jobs.rss`
19
- - `POST /graphql` (HTTP 403, Cloudflare managed challenge)
20
-
21
- Old AngelList public API (`api.angel.co/1/...`) returns `404 Not Found` — permanently shut down.
22
-
23
- **Dual anti-bot stack confirmed from response headers:**
24
-
25
- | Layer | System | Evidence |
26
- |-------|--------|----------|
27
- | Page GETs | DataDome | `X-DataDome: protected`, `X-DD-B: 2`, `Set-Cookie: datadome=...` |
28
- | API POSTs | Cloudflare Bot Management | `Cf-Mitigated: challenge` |
29
-
30
- The 403 response body contains a DataDome captcha challenge script (`geo.captcha-delivery.com`) AND an embedded Cloudflare challenge (`window.__CF$cv$params`). Both fire simultaneously. Neither cookie can be replayed — both are TLS-fingerprint-bound.
31
-
32
- **Use `new_tab()` + `wait()` exclusively. Never use `http_get` for Wellfound.**
33
-
34
- ---
35
-
36
- ## Tech stack (confirmed from response headers)
37
-
38
- Wellfound is a **Ruby on Rails + React + Apollo GraphQL** hybrid app — NOT a pure Next.js app.
39
-
40
- Confirmed headers from `robots.txt` (the only accessible endpoint):
41
- ```
42
- x-runtime: 0.006700 → Rails rack middleware timer
43
- x-request-id: 4645fd66... → Rails request ID
44
- x-xss-protection: 1; mode=block → Rails security defaults
45
- Set-Cookie: _wellfound=... → Rails session cookie
46
- Server: cloudflare → Cloudflare CDN
47
- ```
48
-
49
- Implications:
50
- - **`__NEXT_DATA__` is NOT present** — not a Next.js app
51
- - **`window.__APOLLO_STATE__` or `window.gon` may be present** — check these instead
52
- - CSRF token is in a `<meta name="csrf-token">` tag (Rails default)
53
- - Session cookie is `_wellfound=...` for anonymous sessions; login sessions add `_wellfound_session=...`
54
-
55
- ---
56
-
57
- ## Do this first: open in new tab, wait for DataDome to resolve
58
-
59
- ```python
60
- new_tab("https://wellfound.com/company/stripe")
61
- wait_for_load()
62
- wait(5) # DataDome JS fingerprinting runs ~2-4s after readyState=complete
63
- ```
64
-
65
- Verify you are past the DataDome challenge before extracting:
66
-
67
- ```python
68
- title = js("document.title")
69
- url = page_info()["url"]
70
-
71
- if "wellfound.com" not in url or not title or "Just a moment" in title:
72
- # DataDome or CF challenge did not resolve — wait longer
73
- wait(8)
74
- title = js("document.title")
75
- if "Just a moment" in title or not title:
76
- capture_screenshot("/tmp/wellfound_block.png")
77
- raise RuntimeError("DataDome/CF challenge did not resolve — see screenshot")
78
- ```
79
-
80
- DataDome resolves **silently** in a real Chrome session via CDP — no user interaction required.
81
- The challenge is a JS fingerprint check that passes automatically when running in a real browser.
82
-
83
- ---
84
-
85
- ## URL patterns
86
-
87
- | Goal | URL |
88
- |------|-----|
89
- | Company profile | `https://wellfound.com/company/{slug}` |
90
- | Company jobs | `https://wellfound.com/company/{slug}/jobs` |
91
- | Company culture | `https://wellfound.com/company/{slug}/culture` |
92
- | Job board (all) | `https://wellfound.com/jobs` |
93
- | Job board filtered | `https://wellfound.com/jobs` — then use UI filters (query params are disallowed by robots.txt) |
94
- | Investor profile | `https://wellfound.com/investor/{slug}` |
95
- | User profile | `https://wellfound.com/u/{username}` (disallowed by robots.txt, login wall) |
96
-
97
- **Note on query params:** `robots.txt` disallows `?role=*`, `?jobId=*`, `?jobSlug=*`, `?location=*`.
98
- Wellfound enforces these with login walls or redirects for most filtered job searches.
99
-
100
- ---
101
-
102
- ## Workflow 1: Company profile — name, description, team size, funding, tags
103
-
104
- Navigate to the company page and extract structured data. Most fields are visible without login.
105
-
106
- ```python
107
- import json
108
-
109
- new_tab("https://wellfound.com/company/stripe")
110
- wait_for_load()
111
- wait(5)
112
-
113
- # Check for Apollo state (Rails + React app, not Next.js)
114
- # Wellfound embeds data in window.gon or inline script tags
115
- apollo_raw = js("""
116
- (function() {
117
- // Try window.__APOLLO_STATE__ (Apollo Client cache)
118
- if (window.__APOLLO_STATE__) return JSON.stringify(window.__APOLLO_STATE__);
119
- // Try window.gon (Rails Gon gem)
120
- if (window.gon) return JSON.stringify(window.gon);
121
- // Try inline <script> tags containing startup data
122
- var scripts = Array.from(document.querySelectorAll('script:not([src])'));
123
- for (var s of scripts) {
124
- var t = s.textContent || '';
125
- if (t.includes('"name"') && t.includes('"description"') && t.includes('teamSize')) {
126
- return t.substring(0, 5000);
127
- }
128
- }
129
- return null;
130
- })()
131
- """)
132
-
133
- if apollo_raw:
134
- try:
135
- data = json.loads(apollo_raw)
136
- # Apollo State: look for Startup:{id} keys
137
- for key, val in data.items():
138
- if key.startswith("Startup:") and isinstance(val, dict):
139
- print("Company:", val.get("name"))
140
- print("Description:", val.get("description") or val.get("highConcept"))
141
- print("Team size:", val.get("teamSize"))
142
- print("Total raised:", val.get("totalRaised"))
143
- print("Hiring:", val.get("hiring"))
144
- print(json.dumps(data, indent=2)[:3000])
145
- except json.JSONDecodeError:
146
- # Raw script tag — parse key fields with regex
147
- import re
148
- name = re.search(r'"name"\s*:\s*"([^"]+)"', apollo_raw)
149
- desc = re.search(r'"description"\s*:\s*"([^"]+)"', apollo_raw)
150
- print("Name:", name.group(1) if name else "not found")
151
- print("Desc:", desc.group(1) if desc else "not found")
152
- ```
153
-
154
- If the structured data path fails, fall back to DOM extraction:
155
-
156
- ```python
157
- # DOM extraction — company profile page
158
- profile = js("""
159
- (function() {
160
- // Company name — first h1 on the page
161
- var nameEl = document.querySelector('h1');
162
-
163
- // Description — first substantial paragraph or div with class containing 'description'
164
- var descEl = (
165
- document.querySelector('[class*="description"]') ||
166
- document.querySelector('[class*="about"]') ||
167
- document.querySelector('p[class*="startupDescription"]')
168
- );
169
-
170
- // Tags — market/role tags are links with /jobs?role= or /location/ in href
171
- // Wellfound uses Tailwind (no stable class names) — use href pattern
172
- var roleLinks = Array.from(document.querySelectorAll('a[href*="/jobs?role="]')).map(a => a.innerText.trim());
173
- var locationLinks = Array.from(document.querySelectorAll('a[href*="/location/"]')).map(a => a.innerText.trim());
174
-
175
- // Team size / funding — look in page text for patterns
176
- var bodyText = document.body.innerText;
177
-
178
- // Company size: "11-50 employees" or "51-200 people" pattern
179
- var sizeMatch = bodyText.match(/(\d+[-–]\d+)\s+(employees|people)/i);
180
- var teamSize = sizeMatch ? sizeMatch[0] : null;
181
-
182
- // Funding: "$X.XM" or "Raised $X" pattern
183
- var fundingMatch = bodyText.match(/\$[\d,.]+[KMBkm]\s*(raised|in funding|Series [A-Z])?/i);
184
- var funding = fundingMatch ? fundingMatch[0] : null;
185
-
186
- // Stage: "Series A", "Seed", "Series B", etc.
187
- var stageMatch = bodyText.match(/\b(Seed|Series [A-Z]\+?|Pre-seed|Angel|Late Stage|Public)\b/);
188
- var stage = stageMatch ? stageMatch[0] : null;
189
-
190
- return JSON.stringify({
191
- name: nameEl ? nameEl.innerText.trim() : null,
192
- desc: descEl ? descEl.innerText.trim().substring(0, 500) : null,
193
- teamSize: teamSize,
194
- funding: funding,
195
- stage: stage,
196
- roles: roleLinks.slice(0, 10),
197
- locations: locationLinks.slice(0, 5),
198
- });
199
- })()
200
- """)
201
-
202
- data = json.loads(profile)
203
- print(json.dumps(data, indent=2))
204
- ```
205
-
206
- ---
207
-
208
- ## Workflow 2: Company jobs listing
209
-
210
- ```python
211
- import json
212
-
213
- company_slug = "stripe"
214
- new_tab(f"https://wellfound.com/company/{company_slug}/jobs")
215
- wait_for_load()
216
- wait(5)
217
-
218
- jobs = js("""
219
- (function() {
220
- // Job listing cards — Wellfound uses role="listitem" or li elements in job list
221
- var cards = document.querySelectorAll('[data-test^="StartupJobListing"], li[class*="job"], div[class*="JobListing"]');
222
- if (!cards.length) {
223
- // Broad fallback: all anchor tags with /jobs/ in href
224
- var links = Array.from(document.querySelectorAll('a[href*="/jobs/"]'));
225
- return JSON.stringify(links.map(a => ({
226
- title: a.innerText.trim().split('\\n')[0],
227
- href: a.href,
228
- })).filter(j => j.title && j.title.length > 2).slice(0, 30));
229
- }
230
- return JSON.stringify(Array.from(cards).map(card => {
231
- var titleEl = card.querySelector('h2, h3, [class*="title"], [class*="jobTitle"]');
232
- var locEl = card.querySelector('[class*="location"], [class*="Location"]');
233
- var compEl = card.querySelector('[class*="salary"], [class*="comp"], [class*="equity"]');
234
- var linkEl = card.querySelector('a[href*="/jobs/"]');
235
- return {
236
- title: titleEl ? titleEl.innerText.trim() : '',
237
- location: locEl ? locEl.innerText.trim() : '',
238
- comp: compEl ? compEl.innerText.trim() : '',
239
- href: linkEl ? linkEl.href : '',
240
- };
241
- }).filter(j => j.title));
242
- })()
243
- """)
244
-
245
- results = json.loads(jobs)
246
- print(f"Found {len(results)} jobs")
247
- for j in results:
248
- print(f" {j['title']} | {j.get('location','?')} | {j.get('comp','?')}")
249
- ```
250
-
251
- ---
252
-
253
- ## Workflow 3: Job board — browse all jobs
254
-
255
- The main `/jobs` page shows a curated job feed. Filters are not accessible via URL params (DataDome blocks `?role=...`). Use the UI dropdown filters after loading the page.
256
-
257
- ```python
258
- import json
259
-
260
- new_tab("https://wellfound.com/jobs")
261
- wait_for_load()
262
- wait(5)
263
-
264
- # Extract visible job cards
265
- jobs = js("""
266
- (function() {
267
- // Job cards on the main /jobs board
268
- var cards = document.querySelectorAll(
269
- '[data-test*="job"], [class*="JobCard"], [class*="jobListing"], ' +
270
- 'li[class*="job"], article[class*="job"]'
271
- );
272
- if (!cards.length) {
273
- // Fallback: links to job detail pages
274
- var links = Array.from(document.querySelectorAll('a[href*="/company/"][href*="/jobs/"]'));
275
- return JSON.stringify(links.map(a => ({
276
- href: a.href,
277
- text: a.innerText.trim().substring(0, 100),
278
- })).slice(0, 30));
279
- }
280
- return JSON.stringify(Array.from(cards).map(card => {
281
- var titleEl = card.querySelector('h2, h3, [class*="title"]');
282
- var companyEl = card.querySelector('[class*="company"], [class*="startup"]');
283
- var locEl = card.querySelector('[class*="location"]');
284
- var linkEl = card.querySelector('a[href*="/jobs/"]');
285
- return {
286
- title: titleEl ? titleEl.innerText.trim() : '',
287
- company: companyEl ? companyEl.innerText.trim() : '',
288
- location: locEl ? locEl.innerText.trim() : '',
289
- href: linkEl ? linkEl.href : '',
290
- };
291
- }).filter(j => j.title));
292
- })()
293
- """)
294
-
295
- results = json.loads(jobs)
296
- print(f"Found {len(results)} jobs")
297
- ```
298
-
299
- ---
300
-
301
- ## Workflow 4: GraphQL API (authenticated sessions only)
302
-
303
- Wellfound's GraphQL endpoint (`/graphql`) requires:
304
- 1. A valid `_wellfound` session cookie from a real browser load
305
- 2. A CSRF token from the page's `<meta name="csrf-token">` tag
306
- 3. Cloudflare Bot Management to have passed (only happens in a real Chrome session)
307
-
308
- **This approach only works from inside a browser session (after navigating to any Wellfound page).**
309
-
310
- ```python
311
- import json
312
-
313
- # Step 1: Load any Wellfound page so the session cookie + DataDome cookie are set
314
- new_tab("https://wellfound.com/")
315
- wait_for_load()
316
- wait(5)
317
-
318
- # Step 2: Extract CSRF token from meta tag
319
- csrf = js("document.querySelector('meta[name=\"csrf-token\"]') ? document.querySelector('meta[name=\"csrf-token\"]').getAttribute('content') : null")
320
- if not csrf:
321
- raise RuntimeError("CSRF token not found — page may not have loaded correctly")
322
-
323
- print(f"CSRF token: {csrf[:20]}...")
324
-
325
- # Step 3: Execute GraphQL query via fetch() from within the browser
326
- # This uses the browser's existing cookies automatically
327
- result = js(f"""
328
- (async function() {{
329
- try {{
330
- var resp = await fetch('/graphql', {{
331
- method: 'POST',
332
- credentials: 'include',
333
- headers: {{
334
- 'Content-Type': 'application/json',
335
- 'Accept': 'application/json',
336
- 'x-csrf-token': '{csrf}',
337
- 'x-requested-with': 'XMLHttpRequest',
338
- }},
339
- body: JSON.stringify({{
340
- query: `query StartupShow($slug: String!) {{
341
- startup(slug: $slug) {{
342
- id
343
- name
344
- description: highConcept
345
- productDesc
346
- teamSize
347
- locations {{ displayName }}
348
- markets {{ displayName }}
349
- totalRaised
350
- fundingStage
351
- badges
352
- hiring
353
- jobListingsCount
354
- }}
355
- }}`,
356
- variables: {{ slug: "stripe" }}
357
- }})
358
- }});
359
- var data = await resp.json();
360
- return JSON.stringify(data);
361
- }} catch(e) {{
362
- return JSON.stringify({{error: e.message}});
363
- }}
364
- }})()
365
- """)
366
-
367
- # js() with async returns a Promise — use js_async() if available, or eval trick:
368
- # Note: the above may return None if js() doesn't await Promises.
369
- # Use this pattern instead if js() doesn't handle async:
370
- result_sync = js("""
371
- var done = false, out = null;
372
- fetch('/graphql', {
373
- method: 'POST',
374
- credentials: 'include',
375
- headers: {
376
- 'Content-Type': 'application/json',
377
- 'Accept': 'application/json',
378
- 'x-csrf-token': document.querySelector('meta[name="csrf-token"]').content,
379
- 'x-requested-with': 'XMLHttpRequest',
380
- },
381
- body: JSON.stringify({
382
- query: '{ __typename }',
383
- })
384
- }).then(r => r.json()).then(d => { window._wf_gql_result = JSON.stringify(d); });
385
- 'pending'
386
- """)
387
- # Wait for async result
388
- import time; time.sleep(3)
389
- gql_result = js("window._wf_gql_result || null")
390
- if gql_result:
391
- data = json.loads(gql_result)
392
- print("GraphQL response:", json.dumps(data, indent=2)[:1000])
393
- ```
394
-
395
- ### Known GraphQL operations
396
-
397
- | Operation | Purpose |
398
- |-----------|---------|
399
- | `StartupShow` | Full company profile (name, desc, funding, team size, markets) |
400
- | `JobListingsIndex` | Paginated job board |
401
- | `JobSearch` | Filtered job search by role/location |
402
- | `UserProfile` | User/candidate profile |
403
- | `InvestorShow` | VC/investor profile |
404
-
405
- ---
406
-
407
- ## Handling the login wall
408
-
409
- Wellfound shows a sign-in modal on:
410
- - Job detail pages (immediately or after 2-3 seconds)
411
- - Candidate profile pages (immediately)
412
- - Some company pages after scrolling
413
-
414
- Company overview pages typically show content without login. Job listings require login to see full details and apply.
415
-
416
- ```python
417
- def dismiss_wellfound_login_modal():
418
- """Close the Wellfound sign-in modal. Safe to call if no modal is present."""
419
- closed = js("""
420
- (function() {
421
- var selectors = [
422
- 'button[aria-label="Close"]',
423
- 'button[class*="close"]',
424
- 'button[class*="Close"]',
425
- '[data-test="close-modal"]',
426
- '[aria-label="Dismiss"]',
427
- 'button[class*="dismiss"]',
428
- // Wellfound-specific: modal overlay dismiss
429
- 'div[class*="Modal"] button[type="button"]',
430
- ];
431
- for (var s of selectors) {
432
- var btn = document.querySelector(s);
433
- if (btn && btn.offsetParent !== null) {
434
- btn.click();
435
- return s;
436
- }
437
- }
438
- // Try pressing Escape
439
- document.dispatchEvent(new KeyboardEvent('keydown', {key: 'Escape', keyCode: 27, bubbles: true}));
440
- return 'escape';
441
- })()
442
- """)
443
- if closed:
444
- wait(1)
445
- return closed
446
- ```
447
-
448
- ---
449
-
450
- ## Detecting DataDome / challenge page
451
-
452
- After `new_tab()` + `wait(5)`, verify you are on a real Wellfound page:
453
-
454
- ```python
455
- def wellfound_is_blocked() -> bool:
456
- """True if DataDome or Cloudflare challenge is still showing."""
457
- title = js("document.title") or ""
458
- url = page_info()["url"]
459
- # DataDome challenge page has no useful title; CF shows "Just a moment..."
460
- blocked = (
461
- "Just a moment" in title or
462
- "wellfound.com" not in url or
463
- "captcha-delivery.com" in js("document.body.innerHTML or ''") or
464
- not title
465
- )
466
- return blocked
467
-
468
- # Usage
469
- new_tab("https://wellfound.com/company/stripe")
470
- wait_for_load()
471
- wait(5)
472
-
473
- if wellfound_is_blocked():
474
- wait(8) # DataDome sometimes needs up to 10s total
475
- if wellfound_is_blocked():
476
- capture_screenshot("/tmp/wellfound_blocked.png")
477
- raise RuntimeError("DataDome/CF challenge did not resolve — see /tmp/wellfound_blocked.png")
478
- ```
479
-
480
- ---
481
-
482
- ## Key selectors reference
483
-
484
- Wellfound uses **Tailwind CSS** — no stable semantic class names. These patterns are robust:
485
-
486
- | Target | Selector strategy |
487
- |--------|------------------|
488
- | Company name | `h1` (first on page) |
489
- | Company description | `[class*="description"]`, `[class*="about"]` |
490
- | Team size | Text search: `/\d+[-–]\d+\s+(employees\|people)/i` |
491
- | Funding amount | Text search: `/\$[\d,.]+[KMBkm]/i` |
492
- | Funding stage | Text search: `/\b(Seed\|Series [A-Z]\+?\|Pre-seed\|Late Stage)\b/` |
493
- | Role/market tags | `a[href*="/jobs?role="]` |
494
- | Location tags | `a[href*="/location/"]` |
495
- | Job cards | `a[href*="/company/"][href*="/jobs/"]` (broad fallback) |
496
- | Job title | `h2`, `h3`, `[class*="title"]` within card |
497
- | CSRF token | `meta[name="csrf-token"]` |
498
- | Login modal | `button[aria-label="Close"]`, Escape key |
499
-
500
- ---
501
-
502
- ## Common pitfalls
503
-
504
- 1. **`http_get` is permanently blocked.** DataDome intercepts all non-browser HTTP requests with
505
- a 403 + captcha challenge. No User-Agent, header combination, or cookie replay works.
506
- `api.angel.co` is HTTP 404 (shut down). Use `new_tab()` exclusively.
507
-
508
- 2. **NOT a Next.js app.** Wellfound is Ruby on Rails + React. There is no `__NEXT_DATA__` JSON
509
- blob. Look for `window.__APOLLO_STATE__`, `window.gon`, or inline `<script>` tags instead.
510
-
511
- 3. **`wait(5)` minimum after `wait_for_load()`.** DataDome runs JS fingerprinting probes for
512
- 2-4 seconds after `readyState = complete`. Extracting before this resolves returns the challenge
513
- page HTML, not real content.
514
-
515
- 4. **Tailwind CSS — no stable class names.** Wellfound uses Tailwind utility classes. Never
516
- hardcode a specific class name. Use `href` attribute patterns, `data-test` attributes if present,
517
- or semantic element selectors (`h1`, `h2`, `li`, `article`).
518
-
519
- 5. **GraphQL requires both CSRF token AND browser session cookies.** The CSRF token is a
520
- per-session value from `<meta name="csrf-token">`. Cloudflare Bot Management blocks
521
- `POST /graphql` from non-browser sessions. Always fire GraphQL via `fetch()` inside the
522
- browser session (not from Python's `http_get`).
523
-
524
- 6. **`?role=` and `?location=` params are robots.txt-disallowed.** Wellfound may redirect or
525
- show a login wall for filtered job search URLs. Load `/jobs` unfiltered and use in-page
526
- UI filters (dropdowns) to narrow results.
527
-
528
- 7. **Login wall on job details and user profiles.** Company overview pages load without login.
529
- Individual job detail pages, and all `/u/{username}` profiles, hit a login modal immediately.
530
- Call `dismiss_wellfound_login_modal()` right after `wait(5)` on these pages.
531
-
532
- 8. **Rate limiting.** After ~5-10 rapid page navigations DataDome may harden. Use `wait(3)` between
533
- `goto_url()` calls. If you get a captcha that does not auto-resolve, wait 30-60 seconds.
534
-
535
- 9. **`new_tab()` over `goto_url()` for the first Wellfound page.** `goto_url()` in an existing tab
536
- may inherit a stale DataDome fingerprint. `new_tab()` gives a clean origin context that
537
- DataDome processes cleanly.
538
-
539
- ---
540
-
541
- ## Anti-bot response identification
542
-
543
- What you see in the 403 body when NOT in a browser:
544
-
545
- ```html
546
- <!-- DataDome challenge (page GETs) -->
547
- <script>var dd={'rt':'c','cid':'...','t':'bv','host':'geo.captcha-delivery.com',...}</script>
548
- <script src="https://ct.captcha-delivery.com/c.js"></script>
549
- <!-- rt='c' = captcha required; rt='i' = invisible solve; rt='b' = blocked -->
550
-
551
- <!-- Cloudflare challenge (API POSTs) -->
552
- <title>Just a moment...</title>
553
- <script>window.__CF$cv$params={r:'...',t:'...'}</script>
554
- ```
555
-
556
- In a real Chrome browser, both challenges resolve automatically without user interaction.
557
-
558
- ---
559
-
560
- ## Minimal working example
561
-
562
- ```python
563
- import json
564
-
565
- # Open Wellfound company page
566
- new_tab("https://wellfound.com/company/openai")
567
- wait_for_load()
568
- wait(5)
569
-
570
- # Verify not blocked
571
- title = js("document.title")
572
- assert "Just a moment" not in (title or ""), f"Still on challenge page: {title}"
573
-
574
- # Extract company overview
575
- data = js("""
576
- (function() {
577
- var name = document.querySelector('h1');
578
- var bodyText = document.body.innerText;
579
- var sizeMatch = bodyText.match(/(\\d+[-\\u2013]\\d+)\\s+(employees|people)/i);
580
- var fundingMatch = bodyText.match(/\\$[\\d,.]+[KMBkm](?:\\s+(?:raised|total))?/i);
581
- var stageMatch = bodyText.match(/\\b(Seed|Series [A-Z]\\+?|Pre-seed|Late Stage|Public)\\b/);
582
- var tags = Array.from(document.querySelectorAll('a[href*="/jobs?role="]')).map(a => a.innerText.trim());
583
- var locs = Array.from(document.querySelectorAll('a[href*="/location/"]')).map(a => a.innerText.trim());
584
- return JSON.stringify({
585
- name: name ? name.innerText.trim() : null,
586
- teamSize: sizeMatch ? sizeMatch[0] : null,
587
- funding: fundingMatch ? fundingMatch[0] : null,
588
- stage: stageMatch ? stageMatch[0] : null,
589
- roles: tags.slice(0, 8),
590
- locations: locs.slice(0, 5),
591
- });
592
- })()
593
- """)
594
-
595
- print(json.dumps(json.loads(data), indent=2))
596
- ```
1
+ # Wellfound (AngelList) — Startup Jobs & Company Profiles
2
+
3
+ Field-tested against wellfound.com on 2026-04-18.
4
+ All confirmed via live HTTP probes and response header analysis.
5
+
6
+ ---
7
+
8
+ ## Anti-bot verdict: browser required, no http_get workaround exists
9
+
10
+ **`http_get` returns HTTP 403 on every Wellfound URL without exception** (except `robots.txt`).
11
+
12
+ Tested endpoints (all 403):
13
+ - `/company/stripe`
14
+ - `/jobs`
15
+ - `/jobs?role=engineer&location=remote`
16
+ - `/company/stripe/jobs`
17
+ - `/sitemap.xml`, `/sitemap_index.xml`
18
+ - `/jobs.rss`
19
+ - `POST /graphql` (HTTP 403, Cloudflare managed challenge)
20
+
21
+ Old AngelList public API (`api.angel.co/1/...`) returns `404 Not Found` — permanently shut down.
22
+
23
+ **Dual anti-bot stack confirmed from response headers:**
24
+
25
+ | Layer | System | Evidence |
26
+ |-------|--------|----------|
27
+ | Page GETs | DataDome | `X-DataDome: protected`, `X-DD-B: 2`, `Set-Cookie: datadome=...` |
28
+ | API POSTs | Cloudflare Bot Management | `Cf-Mitigated: challenge` |
29
+
30
+ The 403 response body contains a DataDome captcha challenge script (`geo.captcha-delivery.com`) AND an embedded Cloudflare challenge (`window.__CF$cv$params`). Both fire simultaneously. Neither cookie can be replayed — both are TLS-fingerprint-bound.
31
+
32
+ **Use `new_tab()` + `wait()` exclusively. Never use `http_get` for Wellfound.**
33
+
34
+ ---
35
+
36
+ ## Tech stack (confirmed from response headers)
37
+
38
+ Wellfound is a **Ruby on Rails + React + Apollo GraphQL** hybrid app — NOT a pure Next.js app.
39
+
40
+ Confirmed headers from `robots.txt` (the only accessible endpoint):
41
+ ```
42
+ x-runtime: 0.006700 → Rails rack middleware timer
43
+ x-request-id: 4645fd66... → Rails request ID
44
+ x-xss-protection: 1; mode=block → Rails security defaults
45
+ Set-Cookie: _wellfound=... → Rails session cookie
46
+ Server: cloudflare → Cloudflare CDN
47
+ ```
48
+
49
+ Implications:
50
+ - **`__NEXT_DATA__` is NOT present** — not a Next.js app
51
+ - **`window.__APOLLO_STATE__` or `window.gon` may be present** — check these instead
52
+ - CSRF token is in a `<meta name="csrf-token">` tag (Rails default)
53
+ - Session cookie is `_wellfound=...` for anonymous sessions; login sessions add `_wellfound_session=...`
54
+
55
+ ---
56
+
57
+ ## Do this first: open in new tab, wait for DataDome to resolve
58
+
59
+ ```python
60
+ new_tab("https://wellfound.com/company/stripe")
61
+ wait_for_load()
62
+ wait(5) # DataDome JS fingerprinting runs ~2-4s after readyState=complete
63
+ ```
64
+
65
+ Verify you are past the DataDome challenge before extracting:
66
+
67
+ ```python
68
+ title = js("document.title")
69
+ url = page_info()["url"]
70
+
71
+ if "wellfound.com" not in url or not title or "Just a moment" in title:
72
+ # DataDome or CF challenge did not resolve — wait longer
73
+ wait(8)
74
+ title = js("document.title")
75
+ if "Just a moment" in title or not title:
76
+ capture_screenshot("/tmp/wellfound_block.png")
77
+ raise RuntimeError("DataDome/CF challenge did not resolve — see screenshot")
78
+ ```
79
+
80
+ DataDome resolves **silently** in a real Chrome session via CDP — no user interaction required.
81
+ The challenge is a JS fingerprint check that passes automatically when running in a real browser.
82
+
83
+ ---
84
+
85
+ ## URL patterns
86
+
87
+ | Goal | URL |
88
+ |------|-----|
89
+ | Company profile | `https://wellfound.com/company/{slug}` |
90
+ | Company jobs | `https://wellfound.com/company/{slug}/jobs` |
91
+ | Company culture | `https://wellfound.com/company/{slug}/culture` |
92
+ | Job board (all) | `https://wellfound.com/jobs` |
93
+ | Job board filtered | `https://wellfound.com/jobs` — then use UI filters (query params are disallowed by robots.txt) |
94
+ | Investor profile | `https://wellfound.com/investor/{slug}` |
95
+ | User profile | `https://wellfound.com/u/{username}` (disallowed by robots.txt, login wall) |
96
+
97
+ **Note on query params:** `robots.txt` disallows `?role=*`, `?jobId=*`, `?jobSlug=*`, `?location=*`.
98
+ Wellfound enforces these with login walls or redirects for most filtered job searches.
99
+
100
+ ---
101
+
102
+ ## Workflow 1: Company profile — name, description, team size, funding, tags
103
+
104
+ Navigate to the company page and extract structured data. Most fields are visible without login.
105
+
106
+ ```python
107
+ import json
108
+
109
+ new_tab("https://wellfound.com/company/stripe")
110
+ wait_for_load()
111
+ wait(5)
112
+
113
+ # Check for Apollo state (Rails + React app, not Next.js)
114
+ # Wellfound embeds data in window.gon or inline script tags
115
+ apollo_raw = js("""
116
+ (function() {
117
+ // Try window.__APOLLO_STATE__ (Apollo Client cache)
118
+ if (window.__APOLLO_STATE__) return JSON.stringify(window.__APOLLO_STATE__);
119
+ // Try window.gon (Rails Gon gem)
120
+ if (window.gon) return JSON.stringify(window.gon);
121
+ // Try inline <script> tags containing startup data
122
+ var scripts = Array.from(document.querySelectorAll('script:not([src])'));
123
+ for (var s of scripts) {
124
+ var t = s.textContent || '';
125
+ if (t.includes('"name"') && t.includes('"description"') && t.includes('teamSize')) {
126
+ return t.substring(0, 5000);
127
+ }
128
+ }
129
+ return null;
130
+ })()
131
+ """)
132
+
133
+ if apollo_raw:
134
+ try:
135
+ data = json.loads(apollo_raw)
136
+ # Apollo State: look for Startup:{id} keys
137
+ for key, val in data.items():
138
+ if key.startswith("Startup:") and isinstance(val, dict):
139
+ print("Company:", val.get("name"))
140
+ print("Description:", val.get("description") or val.get("highConcept"))
141
+ print("Team size:", val.get("teamSize"))
142
+ print("Total raised:", val.get("totalRaised"))
143
+ print("Hiring:", val.get("hiring"))
144
+ print(json.dumps(data, indent=2)[:3000])
145
+ except json.JSONDecodeError:
146
+ # Raw script tag — parse key fields with regex
147
+ import re
148
+ name = re.search(r'"name"\s*:\s*"([^"]+)"', apollo_raw)
149
+ desc = re.search(r'"description"\s*:\s*"([^"]+)"', apollo_raw)
150
+ print("Name:", name.group(1) if name else "not found")
151
+ print("Desc:", desc.group(1) if desc else "not found")
152
+ ```
153
+
154
+ If the structured data path fails, fall back to DOM extraction:
155
+
156
+ ```python
157
+ # DOM extraction — company profile page
158
+ profile = js("""
159
+ (function() {
160
+ // Company name — first h1 on the page
161
+ var nameEl = document.querySelector('h1');
162
+
163
+ // Description — first substantial paragraph or div with class containing 'description'
164
+ var descEl = (
165
+ document.querySelector('[class*="description"]') ||
166
+ document.querySelector('[class*="about"]') ||
167
+ document.querySelector('p[class*="startupDescription"]')
168
+ );
169
+
170
+ // Tags — market/role tags are links with /jobs?role= or /location/ in href
171
+ // Wellfound uses Tailwind (no stable class names) — use href pattern
172
+ var roleLinks = Array.from(document.querySelectorAll('a[href*="/jobs?role="]')).map(a => a.innerText.trim());
173
+ var locationLinks = Array.from(document.querySelectorAll('a[href*="/location/"]')).map(a => a.innerText.trim());
174
+
175
+ // Team size / funding — look in page text for patterns
176
+ var bodyText = document.body.innerText;
177
+
178
+ // Company size: "11-50 employees" or "51-200 people" pattern
179
+ var sizeMatch = bodyText.match(/(\d+[-–]\d+)\s+(employees|people)/i);
180
+ var teamSize = sizeMatch ? sizeMatch[0] : null;
181
+
182
+ // Funding: "$X.XM" or "Raised $X" pattern
183
+ var fundingMatch = bodyText.match(/\$[\d,.]+[KMBkm]\s*(raised|in funding|Series [A-Z])?/i);
184
+ var funding = fundingMatch ? fundingMatch[0] : null;
185
+
186
+ // Stage: "Series A", "Seed", "Series B", etc.
187
+ var stageMatch = bodyText.match(/\b(Seed|Series [A-Z]\+?|Pre-seed|Angel|Late Stage|Public)\b/);
188
+ var stage = stageMatch ? stageMatch[0] : null;
189
+
190
+ return JSON.stringify({
191
+ name: nameEl ? nameEl.innerText.trim() : null,
192
+ desc: descEl ? descEl.innerText.trim().substring(0, 500) : null,
193
+ teamSize: teamSize,
194
+ funding: funding,
195
+ stage: stage,
196
+ roles: roleLinks.slice(0, 10),
197
+ locations: locationLinks.slice(0, 5),
198
+ });
199
+ })()
200
+ """)
201
+
202
+ data = json.loads(profile)
203
+ print(json.dumps(data, indent=2))
204
+ ```
205
+
206
+ ---
207
+
208
+ ## Workflow 2: Company jobs listing
209
+
210
+ ```python
211
+ import json
212
+
213
+ company_slug = "stripe"
214
+ new_tab(f"https://wellfound.com/company/{company_slug}/jobs")
215
+ wait_for_load()
216
+ wait(5)
217
+
218
+ jobs = js("""
219
+ (function() {
220
+ // Job listing cards — Wellfound uses role="listitem" or li elements in job list
221
+ var cards = document.querySelectorAll('[data-test^="StartupJobListing"], li[class*="job"], div[class*="JobListing"]');
222
+ if (!cards.length) {
223
+ // Broad fallback: all anchor tags with /jobs/ in href
224
+ var links = Array.from(document.querySelectorAll('a[href*="/jobs/"]'));
225
+ return JSON.stringify(links.map(a => ({
226
+ title: a.innerText.trim().split('\\n')[0],
227
+ href: a.href,
228
+ })).filter(j => j.title && j.title.length > 2).slice(0, 30));
229
+ }
230
+ return JSON.stringify(Array.from(cards).map(card => {
231
+ var titleEl = card.querySelector('h2, h3, [class*="title"], [class*="jobTitle"]');
232
+ var locEl = card.querySelector('[class*="location"], [class*="Location"]');
233
+ var compEl = card.querySelector('[class*="salary"], [class*="comp"], [class*="equity"]');
234
+ var linkEl = card.querySelector('a[href*="/jobs/"]');
235
+ return {
236
+ title: titleEl ? titleEl.innerText.trim() : '',
237
+ location: locEl ? locEl.innerText.trim() : '',
238
+ comp: compEl ? compEl.innerText.trim() : '',
239
+ href: linkEl ? linkEl.href : '',
240
+ };
241
+ }).filter(j => j.title));
242
+ })()
243
+ """)
244
+
245
+ results = json.loads(jobs)
246
+ print(f"Found {len(results)} jobs")
247
+ for j in results:
248
+ print(f" {j['title']} | {j.get('location','?')} | {j.get('comp','?')}")
249
+ ```
250
+
251
+ ---
252
+
253
+ ## Workflow 3: Job board — browse all jobs
254
+
255
+ The main `/jobs` page shows a curated job feed. Filters are not accessible via URL params (DataDome blocks `?role=...`). Use the UI dropdown filters after loading the page.
256
+
257
+ ```python
258
+ import json
259
+
260
+ new_tab("https://wellfound.com/jobs")
261
+ wait_for_load()
262
+ wait(5)
263
+
264
+ # Extract visible job cards
265
+ jobs = js("""
266
+ (function() {
267
+ // Job cards on the main /jobs board
268
+ var cards = document.querySelectorAll(
269
+ '[data-test*="job"], [class*="JobCard"], [class*="jobListing"], ' +
270
+ 'li[class*="job"], article[class*="job"]'
271
+ );
272
+ if (!cards.length) {
273
+ // Fallback: links to job detail pages
274
+ var links = Array.from(document.querySelectorAll('a[href*="/company/"][href*="/jobs/"]'));
275
+ return JSON.stringify(links.map(a => ({
276
+ href: a.href,
277
+ text: a.innerText.trim().substring(0, 100),
278
+ })).slice(0, 30));
279
+ }
280
+ return JSON.stringify(Array.from(cards).map(card => {
281
+ var titleEl = card.querySelector('h2, h3, [class*="title"]');
282
+ var companyEl = card.querySelector('[class*="company"], [class*="startup"]');
283
+ var locEl = card.querySelector('[class*="location"]');
284
+ var linkEl = card.querySelector('a[href*="/jobs/"]');
285
+ return {
286
+ title: titleEl ? titleEl.innerText.trim() : '',
287
+ company: companyEl ? companyEl.innerText.trim() : '',
288
+ location: locEl ? locEl.innerText.trim() : '',
289
+ href: linkEl ? linkEl.href : '',
290
+ };
291
+ }).filter(j => j.title));
292
+ })()
293
+ """)
294
+
295
+ results = json.loads(jobs)
296
+ print(f"Found {len(results)} jobs")
297
+ ```
298
+
299
+ ---
300
+
301
+ ## Workflow 4: GraphQL API (authenticated sessions only)
302
+
303
+ Wellfound's GraphQL endpoint (`/graphql`) requires:
304
+ 1. A valid `_wellfound` session cookie from a real browser load
305
+ 2. A CSRF token from the page's `<meta name="csrf-token">` tag
306
+ 3. Cloudflare Bot Management to have passed (only happens in a real Chrome session)
307
+
308
+ **This approach only works from inside a browser session (after navigating to any Wellfound page).**
309
+
310
+ ```python
311
+ import json
312
+
313
+ # Step 1: Load any Wellfound page so the session cookie + DataDome cookie are set
314
+ new_tab("https://wellfound.com/")
315
+ wait_for_load()
316
+ wait(5)
317
+
318
+ # Step 2: Extract CSRF token from meta tag
319
+ csrf = js("document.querySelector('meta[name=\"csrf-token\"]') ? document.querySelector('meta[name=\"csrf-token\"]').getAttribute('content') : null")
320
+ if not csrf:
321
+ raise RuntimeError("CSRF token not found — page may not have loaded correctly")
322
+
323
+ print(f"CSRF token: {csrf[:20]}...")
324
+
325
+ # Step 3: Execute GraphQL query via fetch() from within the browser
326
+ # This uses the browser's existing cookies automatically
327
+ result = js(f"""
328
+ (async function() {{
329
+ try {{
330
+ var resp = await fetch('/graphql', {{
331
+ method: 'POST',
332
+ credentials: 'include',
333
+ headers: {{
334
+ 'Content-Type': 'application/json',
335
+ 'Accept': 'application/json',
336
+ 'x-csrf-token': '{csrf}',
337
+ 'x-requested-with': 'XMLHttpRequest',
338
+ }},
339
+ body: JSON.stringify({{
340
+ query: `query StartupShow($slug: String!) {{
341
+ startup(slug: $slug) {{
342
+ id
343
+ name
344
+ description: highConcept
345
+ productDesc
346
+ teamSize
347
+ locations {{ displayName }}
348
+ markets {{ displayName }}
349
+ totalRaised
350
+ fundingStage
351
+ badges
352
+ hiring
353
+ jobListingsCount
354
+ }}
355
+ }}`,
356
+ variables: {{ slug: "stripe" }}
357
+ }})
358
+ }});
359
+ var data = await resp.json();
360
+ return JSON.stringify(data);
361
+ }} catch(e) {{
362
+ return JSON.stringify({{error: e.message}});
363
+ }}
364
+ }})()
365
+ """)
366
+
367
+ # js() with async returns a Promise — use js_async() if available, or eval trick:
368
+ # Note: the above may return None if js() doesn't await Promises.
369
+ # Use this pattern instead if js() doesn't handle async:
370
+ result_sync = js("""
371
+ var done = false, out = null;
372
+ fetch('/graphql', {
373
+ method: 'POST',
374
+ credentials: 'include',
375
+ headers: {
376
+ 'Content-Type': 'application/json',
377
+ 'Accept': 'application/json',
378
+ 'x-csrf-token': document.querySelector('meta[name="csrf-token"]').content,
379
+ 'x-requested-with': 'XMLHttpRequest',
380
+ },
381
+ body: JSON.stringify({
382
+ query: '{ __typename }',
383
+ })
384
+ }).then(r => r.json()).then(d => { window._wf_gql_result = JSON.stringify(d); });
385
+ 'pending'
386
+ """)
387
+ # Wait for async result
388
+ import time; time.sleep(3)
389
+ gql_result = js("window._wf_gql_result || null")
390
+ if gql_result:
391
+ data = json.loads(gql_result)
392
+ print("GraphQL response:", json.dumps(data, indent=2)[:1000])
393
+ ```
394
+
395
+ ### Known GraphQL operations
396
+
397
+ | Operation | Purpose |
398
+ |-----------|---------|
399
+ | `StartupShow` | Full company profile (name, desc, funding, team size, markets) |
400
+ | `JobListingsIndex` | Paginated job board |
401
+ | `JobSearch` | Filtered job search by role/location |
402
+ | `UserProfile` | User/candidate profile |
403
+ | `InvestorShow` | VC/investor profile |
404
+
405
+ ---
406
+
407
+ ## Handling the login wall
408
+
409
+ Wellfound shows a sign-in modal on:
410
+ - Job detail pages (immediately or after 2-3 seconds)
411
+ - Candidate profile pages (immediately)
412
+ - Some company pages after scrolling
413
+
414
+ Company overview pages typically show content without login. Job listings require login to see full details and apply.
415
+
416
+ ```python
417
+ def dismiss_wellfound_login_modal():
418
+ """Close the Wellfound sign-in modal. Safe to call if no modal is present."""
419
+ closed = js("""
420
+ (function() {
421
+ var selectors = [
422
+ 'button[aria-label="Close"]',
423
+ 'button[class*="close"]',
424
+ 'button[class*="Close"]',
425
+ '[data-test="close-modal"]',
426
+ '[aria-label="Dismiss"]',
427
+ 'button[class*="dismiss"]',
428
+ // Wellfound-specific: modal overlay dismiss
429
+ 'div[class*="Modal"] button[type="button"]',
430
+ ];
431
+ for (var s of selectors) {
432
+ var btn = document.querySelector(s);
433
+ if (btn && btn.offsetParent !== null) {
434
+ btn.click();
435
+ return s;
436
+ }
437
+ }
438
+ // Try pressing Escape
439
+ document.dispatchEvent(new KeyboardEvent('keydown', {key: 'Escape', keyCode: 27, bubbles: true}));
440
+ return 'escape';
441
+ })()
442
+ """)
443
+ if closed:
444
+ wait(1)
445
+ return closed
446
+ ```
447
+
448
+ ---
449
+
450
+ ## Detecting DataDome / challenge page
451
+
452
+ After `new_tab()` + `wait(5)`, verify you are on a real Wellfound page:
453
+
454
+ ```python
455
+ def wellfound_is_blocked() -> bool:
456
+ """True if DataDome or Cloudflare challenge is still showing."""
457
+ title = js("document.title") or ""
458
+ url = page_info()["url"]
459
+ # DataDome challenge page has no useful title; CF shows "Just a moment..."
460
+ blocked = (
461
+ "Just a moment" in title or
462
+ "wellfound.com" not in url or
463
+ "captcha-delivery.com" in js("document.body.innerHTML or ''") or
464
+ not title
465
+ )
466
+ return blocked
467
+
468
+ # Usage
469
+ new_tab("https://wellfound.com/company/stripe")
470
+ wait_for_load()
471
+ wait(5)
472
+
473
+ if wellfound_is_blocked():
474
+ wait(8) # DataDome sometimes needs up to 10s total
475
+ if wellfound_is_blocked():
476
+ capture_screenshot("/tmp/wellfound_blocked.png")
477
+ raise RuntimeError("DataDome/CF challenge did not resolve — see /tmp/wellfound_blocked.png")
478
+ ```
479
+
480
+ ---
481
+
482
+ ## Key selectors reference
483
+
484
+ Wellfound uses **Tailwind CSS** — no stable semantic class names. These patterns are robust:
485
+
486
+ | Target | Selector strategy |
487
+ |--------|------------------|
488
+ | Company name | `h1` (first on page) |
489
+ | Company description | `[class*="description"]`, `[class*="about"]` |
490
+ | Team size | Text search: `/\d+[-–]\d+\s+(employees\|people)/i` |
491
+ | Funding amount | Text search: `/\$[\d,.]+[KMBkm]/i` |
492
+ | Funding stage | Text search: `/\b(Seed\|Series [A-Z]\+?\|Pre-seed\|Late Stage)\b/` |
493
+ | Role/market tags | `a[href*="/jobs?role="]` |
494
+ | Location tags | `a[href*="/location/"]` |
495
+ | Job cards | `a[href*="/company/"][href*="/jobs/"]` (broad fallback) |
496
+ | Job title | `h2`, `h3`, `[class*="title"]` within card |
497
+ | CSRF token | `meta[name="csrf-token"]` |
498
+ | Login modal | `button[aria-label="Close"]`, Escape key |
499
+
500
+ ---
501
+
502
+ ## Common pitfalls
503
+
504
+ 1. **`http_get` is permanently blocked.** DataDome intercepts all non-browser HTTP requests with
505
+ a 403 + captcha challenge. No User-Agent, header combination, or cookie replay works.
506
+ `api.angel.co` is HTTP 404 (shut down). Use `new_tab()` exclusively.
507
+
508
+ 2. **NOT a Next.js app.** Wellfound is Ruby on Rails + React. There is no `__NEXT_DATA__` JSON
509
+ blob. Look for `window.__APOLLO_STATE__`, `window.gon`, or inline `<script>` tags instead.
510
+
511
+ 3. **`wait(5)` minimum after `wait_for_load()`.** DataDome runs JS fingerprinting probes for
512
+ 2-4 seconds after `readyState = complete`. Extracting before this resolves returns the challenge
513
+ page HTML, not real content.
514
+
515
+ 4. **Tailwind CSS — no stable class names.** Wellfound uses Tailwind utility classes. Never
516
+ hardcode a specific class name. Use `href` attribute patterns, `data-test` attributes if present,
517
+ or semantic element selectors (`h1`, `h2`, `li`, `article`).
518
+
519
+ 5. **GraphQL requires both CSRF token AND browser session cookies.** The CSRF token is a
520
+ per-session value from `<meta name="csrf-token">`. Cloudflare Bot Management blocks
521
+ `POST /graphql` from non-browser sessions. Always fire GraphQL via `fetch()` inside the
522
+ browser session (not from Python's `http_get`).
523
+
524
+ 6. **`?role=` and `?location=` params are robots.txt-disallowed.** Wellfound may redirect or
525
+ show a login wall for filtered job search URLs. Load `/jobs` unfiltered and use in-page
526
+ UI filters (dropdowns) to narrow results.
527
+
528
+ 7. **Login wall on job details and user profiles.** Company overview pages load without login.
529
+ Individual job detail pages, and all `/u/{username}` profiles, hit a login modal immediately.
530
+ Call `dismiss_wellfound_login_modal()` right after `wait(5)` on these pages.
531
+
532
+ 8. **Rate limiting.** After ~5-10 rapid page navigations DataDome may harden. Use `wait(3)` between
533
+ `goto_url()` calls. If you get a captcha that does not auto-resolve, wait 30-60 seconds.
534
+
535
+ 9. **`new_tab()` over `goto_url()` for the first Wellfound page.** `goto_url()` in an existing tab
536
+ may inherit a stale DataDome fingerprint. `new_tab()` gives a clean origin context that
537
+ DataDome processes cleanly.
538
+
539
+ ---
540
+
541
+ ## Anti-bot response identification
542
+
543
+ What you see in the 403 body when NOT in a browser:
544
+
545
+ ```html
546
+ <!-- DataDome challenge (page GETs) -->
547
+ <script>var dd={'rt':'c','cid':'...','t':'bv','host':'geo.captcha-delivery.com',...}</script>
548
+ <script src="https://ct.captcha-delivery.com/c.js"></script>
549
+ <!-- rt='c' = captcha required; rt='i' = invisible solve; rt='b' = blocked -->
550
+
551
+ <!-- Cloudflare challenge (API POSTs) -->
552
+ <title>Just a moment...</title>
553
+ <script>window.__CF$cv$params={r:'...',t:'...'}</script>
554
+ ```
555
+
556
+ In a real Chrome browser, both challenges resolve automatically without user interaction.
557
+
558
+ ---
559
+
560
+ ## Minimal working example
561
+
562
+ ```python
563
+ import json
564
+
565
+ # Open Wellfound company page
566
+ new_tab("https://wellfound.com/company/openai")
567
+ wait_for_load()
568
+ wait(5)
569
+
570
+ # Verify not blocked
571
+ title = js("document.title")
572
+ assert "Just a moment" not in (title or ""), f"Still on challenge page: {title}"
573
+
574
+ # Extract company overview
575
+ data = js("""
576
+ (function() {
577
+ var name = document.querySelector('h1');
578
+ var bodyText = document.body.innerText;
579
+ var sizeMatch = bodyText.match(/(\\d+[-\\u2013]\\d+)\\s+(employees|people)/i);
580
+ var fundingMatch = bodyText.match(/\\$[\\d,.]+[KMBkm](?:\\s+(?:raised|total))?/i);
581
+ var stageMatch = bodyText.match(/\\b(Seed|Series [A-Z]\\+?|Pre-seed|Late Stage|Public)\\b/);
582
+ var tags = Array.from(document.querySelectorAll('a[href*="/jobs?role="]')).map(a => a.innerText.trim());
583
+ var locs = Array.from(document.querySelectorAll('a[href*="/location/"]')).map(a => a.innerText.trim());
584
+ return JSON.stringify({
585
+ name: name ? name.innerText.trim() : null,
586
+ teamSize: sizeMatch ? sizeMatch[0] : null,
587
+ funding: fundingMatch ? fundingMatch[0] : null,
588
+ stage: stageMatch ? stageMatch[0] : null,
589
+ roles: tags.slice(0, 8),
590
+ locations: locs.slice(0, 5),
591
+ });
592
+ })()
593
+ """)
594
+
595
+ print(json.dumps(json.loads(data), indent=2))
596
+ ```