@pencil-agent/nano-pencil 2.0.0-beta.8 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +267 -267
  2. package/dist/build-meta.json +3 -3
  3. package/dist/core/export-html/AGENT.md +11 -11
  4. package/dist/core/export-html/template.css +971 -971
  5. package/dist/core/export-html/template.html +54 -54
  6. package/dist/core/extensions-host/index.d.ts +1 -1
  7. package/dist/core/extensions-host/loader.js +1 -1
  8. package/dist/core/extensions-host/runner.d.ts +1 -0
  9. package/dist/core/extensions-host/runner.js +2 -2
  10. package/dist/core/extensions-host/types.d.ts +17 -22
  11. package/dist/core/lib/ai/src/types.d.ts +12 -2
  12. package/dist/core/persona/persona-manager.js +5 -2
  13. package/dist/core/runtime/agent-session.js +3 -3
  14. package/dist/core/runtime/extension-core-bindings.d.ts +1 -0
  15. package/dist/core/runtime/extension-core-bindings.js +2 -2
  16. package/dist/extensions/builtin/AGENT.md +115 -115
  17. package/dist/extensions/builtin/browser/AGENT.md +17 -17
  18. package/dist/extensions/builtin/browser/agent-workspace/agent_helpers.py +12 -12
  19. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/amazon/product-search.md +198 -198
  20. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/archive-org/scraping.md +341 -341
  21. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/arxiv/scraping.md +311 -311
  22. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/arxiv-bulk/scraping.md +333 -333
  23. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/atlas/overview.md +70 -70
  24. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/booking-com/scraping.md +578 -578
  25. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/capterra/scraping.md +440 -440
  26. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/centilebrain/generate-estimates.md +110 -110
  27. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coingecko/scraping.md +325 -325
  28. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coinmarketcap/scraping.md +463 -463
  29. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coursera/scraping.md +360 -360
  30. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/craigslist/scraping.md +390 -390
  31. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/crossref/scraping.md +568 -568
  32. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/dev-to/scraping.md +323 -323
  33. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/duckduckgo/scraping.md +349 -349
  34. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/ebay/scraping.md +435 -435
  35. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/etsy/scraping.md +506 -506
  36. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/eventbrite/scraping.md +363 -363
  37. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/expedia/automation.md +168 -168
  38. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/facebook/groups.md +236 -236
  39. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/facebook/pages.md +295 -295
  40. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/framer/editor.md +108 -108
  41. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/fred/scraping.md +493 -493
  42. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/g2/scraping.md +580 -580
  43. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/genius/scraping.md +511 -511
  44. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/github/repo-actions.md +65 -65
  45. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/github/scraping.md +184 -184
  46. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/glassdoor/scraping.md +543 -543
  47. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/gmail/compose.md +122 -122
  48. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/goodreads/scraping.md +461 -461
  49. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/gutenberg/scraping.md +383 -383
  50. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/hackernews/scraping.md +243 -243
  51. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/howlongtobeat/scraping.md +473 -473
  52. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/imdb/scraping.md +271 -271
  53. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/itch-io/scraping.md +436 -436
  54. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/job-boards/indeed-glassdoor.md +1021 -1021
  55. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/letterboxd/scraping.md +349 -349
  56. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/linkedin/invitation-manager.md +109 -109
  57. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/loom/folder-enumeration.md +170 -170
  58. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/macrotrends/scraping.md +537 -537
  59. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/medium/article-hydration.md +120 -120
  60. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/medium/scraping.md +414 -414
  61. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/metacritic/scraping.md +477 -477
  62. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/musicbrainz/scraping.md +478 -478
  63. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/nasa/scraping.md +339 -339
  64. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/news-aggregation/multi-source.md +205 -205
  65. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/open-library/scraping.md +472 -472
  66. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/openalex/scraping.md +470 -470
  67. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/openstreetmap/scraping.md +490 -490
  68. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/package-registries/npm-pypi.md +478 -478
  69. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/polymarket/scraping.md +234 -234
  70. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/producthunt/scraping.md +307 -307
  71. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/pubmed/scraping.md +421 -421
  72. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/quora/scraping.md +364 -364
  73. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/rawg/scraping.md +352 -352
  74. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/reddit/scraping.md +124 -124
  75. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/rest-countries/scraping.md +233 -233
  76. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/sec-edgar/scraping.md +361 -361
  77. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/README.md +36 -36
  78. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/embedded-apps.md +72 -72
  79. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/knowledge-base.md +109 -109
  80. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/polaris-inputs.md +137 -137
  81. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/soundcloud/scraping.md +362 -362
  82. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/spotify/scraping.md +339 -339
  83. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/stackoverflow/scraping.md +435 -435
  84. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/steam/scraping.md +575 -575
  85. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/substack/scraping.md +338 -338
  86. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/thetechgeeks/pricing.md +52 -52
  87. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/tiktok/upload.md +107 -107
  88. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/tradingview/scraping.md +309 -309
  89. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/trello/boards-and-lists.md +88 -88
  90. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/trustpilot/scraping.md +375 -375
  91. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/walmart/scraping.md +444 -444
  92. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/wayback-machine/scraping.md +306 -306
  93. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/weather/scraping.md +398 -398
  94. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/wellfound/scraping.md +596 -596
  95. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/world-bank/scraping.md +356 -356
  96. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/xiaohongshu/scraping.md +84 -84
  97. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/youtube/scraping.md +418 -418
  98. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/zillow/scraping.md +433 -433
  99. package/dist/extensions/builtin/browser/browser.md +73 -73
  100. package/dist/extensions/builtin/browser/install.md +142 -142
  101. package/dist/extensions/builtin/browser/interaction-skills/connection.md +48 -48
  102. package/dist/extensions/builtin/browser/interaction-skills/cookies.md +3 -3
  103. package/dist/extensions/builtin/browser/interaction-skills/cross-origin-iframes.md +3 -3
  104. package/dist/extensions/builtin/browser/interaction-skills/dialogs.md +64 -64
  105. package/dist/extensions/builtin/browser/interaction-skills/downloads.md +3 -3
  106. package/dist/extensions/builtin/browser/interaction-skills/drag-and-drop.md +3 -3
  107. package/dist/extensions/builtin/browser/interaction-skills/dropdowns.md +3 -3
  108. package/dist/extensions/builtin/browser/interaction-skills/iframes.md +3 -3
  109. package/dist/extensions/builtin/browser/interaction-skills/network-requests.md +3 -3
  110. package/dist/extensions/builtin/browser/interaction-skills/print-as-pdf.md +3 -3
  111. package/dist/extensions/builtin/browser/interaction-skills/profile-sync.md +90 -90
  112. package/dist/extensions/builtin/browser/interaction-skills/screenshots.md +17 -17
  113. package/dist/extensions/builtin/browser/interaction-skills/scrolling.md +3 -3
  114. package/dist/extensions/builtin/browser/interaction-skills/shadow-dom.md +3 -3
  115. package/dist/extensions/builtin/browser/interaction-skills/tabs.md +69 -69
  116. package/dist/extensions/builtin/browser/interaction-skills/uploads.md +1 -1
  117. package/dist/extensions/builtin/browser/interaction-skills/viewport.md +3 -3
  118. package/dist/extensions/builtin/browser/src/browser_harness/AGENT.md +15 -15
  119. package/dist/extensions/builtin/browser/src/browser_harness/__init__.py +8 -8
  120. package/dist/extensions/builtin/browser/src/browser_harness/_ipc.py +90 -90
  121. package/dist/extensions/builtin/browser/src/browser_harness/admin.py +722 -722
  122. package/dist/extensions/builtin/browser/src/browser_harness/daemon.py +328 -328
  123. package/dist/extensions/builtin/browser/src/browser_harness/helpers.py +396 -396
  124. package/dist/extensions/builtin/browser/src/browser_harness/run.py +103 -103
  125. package/dist/extensions/builtin/discipline/skills/brainstorming/SKILL.md +33 -33
  126. package/dist/extensions/builtin/discipline/skills/executing-plans/SKILL.md +25 -25
  127. package/dist/extensions/builtin/discipline/skills/finishing-development-branch/SKILL.md +25 -25
  128. package/dist/extensions/builtin/discipline/skills/receiving-code-review/SKILL.md +22 -22
  129. package/dist/extensions/builtin/discipline/skills/requesting-code-review/SKILL.md +31 -31
  130. package/dist/extensions/builtin/discipline/skills/systematic-debugging/SKILL.md +28 -28
  131. package/dist/extensions/builtin/discipline/skills/test-driven-development/SKILL.md +32 -32
  132. package/dist/extensions/builtin/discipline/skills/using-git-worktrees/SKILL.md +25 -25
  133. package/dist/extensions/builtin/discipline/skills/verification-before-completion/SKILL.md +27 -27
  134. package/dist/extensions/builtin/discipline/skills/writing-plans/SKILL.md +26 -26
  135. package/dist/extensions/builtin/goal/README.md +67 -67
  136. package/dist/extensions/builtin/goal/goal-controller.d.ts +39 -10
  137. package/dist/extensions/builtin/goal/goal-controller.js +1 -1
  138. package/dist/extensions/builtin/goal/goal-format.js +1 -1
  139. package/dist/extensions/builtin/goal/goal-prompts.d.ts +2 -0
  140. package/dist/extensions/builtin/goal/goal-prompts.js +5 -4
  141. package/dist/extensions/builtin/goal/goal-store.js +1 -1
  142. package/dist/extensions/builtin/goal/index.d.ts +1 -1
  143. package/dist/extensions/builtin/goal/index.js +10 -7
  144. package/dist/extensions/builtin/grub/README.md +112 -112
  145. package/dist/extensions/builtin/link-world/agent-workspace/README.md +16 -16
  146. package/dist/extensions/builtin/link-world/index.js +6 -6
  147. package/dist/extensions/builtin/link-world/internet-search/internet-search.md +65 -65
  148. package/dist/extensions/builtin/link-world/link-world-agent.md +82 -82
  149. package/dist/extensions/builtin/link-world/linkworld.md +313 -313
  150. package/dist/extensions/builtin/link-world/{network-routing.md → network-routing/network-routing.md} +67 -67
  151. package/dist/extensions/builtin/loop/README.md +92 -92
  152. package/dist/extensions/builtin/mcp/figma-design.md +68 -68
  153. package/dist/extensions/builtin/mcp/mcp-management.md +85 -85
  154. package/dist/extensions/builtin/plan/index.js +1 -1
  155. package/dist/extensions/builtin/recap/AGENT.md +15 -15
  156. package/dist/extensions/builtin/sal/README.md +72 -72
  157. package/dist/extensions/builtin/security-audit/README.md +289 -289
  158. package/dist/extensions/builtin/task/task-store.d.ts +4 -0
  159. package/dist/extensions/builtin/task/task-store.js +1 -1
  160. package/dist/extensions/builtin/team/AGENT.md +112 -112
  161. package/dist/extensions/builtin/team/TESTING.md +299 -299
  162. package/dist/extensions/builtin/token-save/README.md +56 -56
  163. package/dist/extensions/optional/AGENT.md +10 -10
  164. package/dist/index.d.ts +5 -30
  165. package/dist/index.js +1 -1
  166. package/dist/models.d.ts +7 -0
  167. package/dist/models.js +1 -0
  168. package/dist/modes/interactive/components/footer.js +1 -1
  169. package/dist/modes/interactive/components/task-status-panel.d.ts +36 -0
  170. package/dist/modes/interactive/components/task-status-panel.js +1 -0
  171. package/dist/modes/interactive/controllers/stream-render-controller.d.ts +7 -0
  172. package/dist/modes/interactive/controllers/stream-render-controller.js +2 -2
  173. package/dist/modes/interactive/interactive-mode.js +40 -40
  174. package/dist/modes/interactive/state/interactive-state.d.ts +2 -0
  175. package/dist/modes/interactive/state/interactive-state.js +1 -1
  176. package/dist/modes/interactive/theme/dark.json +85 -85
  177. package/dist/modes/interactive/theme/light.json +84 -84
  178. package/dist/modes/interactive/theme/theme-schema.json +335 -335
  179. package/dist/modes/interactive/theme/warm.json +81 -81
  180. package/dist/node_modules/@pencil-agent/ai/dist/cli.js +0 -0
  181. package/dist/node_modules/@pencil-agent/ai/dist/models.generated.js +1 -1
  182. package/dist/node_modules/@pencil-agent/ai/dist/providers/anthropic.js +2 -2
  183. package/dist/node_modules/@pencil-agent/ai/dist/providers/openai-completions.js +5 -5
  184. package/dist/node_modules/@pencil-agent/ai/dist/providers/openai-responses.js +1 -1
  185. package/dist/node_modules/@pencil-agent/ai/dist/stream.js +1 -1
  186. package/dist/packages/protocol/src/commands.d.ts +33 -0
  187. package/dist/packages/protocol/src/flags.d.ts +20 -0
  188. package/dist/packages/protocol/src/hooks.d.ts +17 -0
  189. package/dist/packages/protocol/src/hooks.js +0 -0
  190. package/dist/packages/{extension-sdk → protocol}/src/index.d.ts +7 -4
  191. package/dist/packages/protocol/src/index.js +1 -0
  192. package/dist/packages/{extension-sdk → protocol}/src/lifecycle.d.ts +15 -27
  193. package/dist/packages/protocol/src/lifecycle.js +0 -0
  194. package/dist/packages/{extension-sdk → protocol}/src/tools.d.ts +1 -1
  195. package/dist/packages/protocol/src/tools.js +0 -0
  196. package/dist/public-config.d.ts +12 -0
  197. package/dist/public-config.js +1 -0
  198. package/dist/runtime.d.ts +9 -0
  199. package/dist/runtime.js +1 -0
  200. package/dist/session-compaction.d.ts +7 -0
  201. package/dist/session-compaction.js +1 -0
  202. package/dist/session.d.ts +7 -0
  203. package/dist/session.js +1 -0
  204. package/dist/skills.d.ts +7 -0
  205. package/dist/skills.js +1 -0
  206. package/dist/tools.d.ts +7 -0
  207. package/dist/tools.js +1 -0
  208. package/docs/ACP/345/215/217/350/256/256/351/233/206/346/210/220/345/274/200/345/217/221/346/226/207/346/241/243.md +851 -0
  209. package/docs/SDK-TESTING.md +364 -0
  210. package/docs/codex-goal-command-impl.md +1055 -1055
  211. package/docs/codex-goal-vs-grub.md +500 -500
  212. package/docs/custom-provider.md +27 -27
  213. package/docs/extensions.md +27 -27
  214. package/docs/keybindings.md +27 -27
  215. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/200/273/347/273/223.md" +250 -250
  216. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/212/245/345/221/212.md" +122 -122
  217. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210.md" +1222 -1222
  218. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/256/236/347/216/260/346/212/245/345/221/212.md" +158 -158
  219. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/257/271/346/257/224/345/210/206/346/236/220.md" +128 -128
  220. package/docs/loop /351/207/215/346/236/204/350/256/241/345/210/222.md" +320 -320
  221. package/docs/loop-usage-examples.md +214 -214
  222. package/docs/mem-core/346/212/200/346/234/257/346/226/207/346/241/243.md +593 -0
  223. package/docs/models.md +27 -27
  224. package/docs/packages.md +27 -27
  225. package/docs/pi-design-philosophy.md +457 -457
  226. package/docs/planmode.md +1987 -1987
  227. package/docs/prompt-templates.md +27 -27
  228. package/docs/providers.md +27 -27
  229. package/docs/sdk.md +27 -27
  230. package/docs/skills.md +27 -27
  231. package/docs/startup-performance-optimization.md +301 -0
  232. package/docs/themes.md +27 -27
  233. package/docs/tui.md +27 -27
  234. package/docs//350/256/244/347/237/245/345/234/260/345/233/276.md +47 -0
  235. package/package.json +190 -162
  236. package/dist/packages/extension-sdk/src/index.js +0 -1
  237. package/docs/cc-agent-design.md +0 -1297
  238. package/docs/cc-tui-design.md +0 -1333
  239. package/docs//345/257/271/346/240/207Claude-Code.md +0 -1775
  240. /package/dist/packages/{extension-sdk/src/lifecycle.js → protocol/src/commands.js} +0 -0
  241. /package/dist/packages/{extension-sdk/src/tools.js → protocol/src/flags.js} +0 -0
@@ -1,236 +1,236 @@
1
- # Facebook Groups — mining feeds for posts + external URLs
2
-
3
- Field-tested against a logged-in Jay account on 2026-04-18.
4
- **Requires:** Browser Harness driving a real Chrome that is (a) signed into
5
- Facebook and (b) already a member of the target group. Non-member or logged-out
6
- views serve a stripped landing page with no post content.
7
-
8
- ## What this skill is for
9
-
10
- 1. Pull the N most recent posts from a named FB group
11
- 2. Harvest every external URL that members have shared
12
- 3. Hand that URL list to Firecrawl (or `http_get`) for structured scraping at scale
13
- 4. Cache post text + author + timestamp for downstream keyword matching
14
-
15
- It is NOT for: replying in groups, DMing members, or any write action.
16
-
17
- ## URL patterns
18
-
19
- | What | URL |
20
- |------|-----|
21
- | Group main feed | `https://www.facebook.com/groups/{id_or_slug}` |
22
- | Group "Discussion" tab (canonical feed) | `https://www.facebook.com/groups/{id_or_slug}/?sorting_setting=CHRONOLOGICAL` |
23
- | Single post (permalink) | `https://www.facebook.com/groups/{id_or_slug}/posts/{post_id}/` |
24
- | User's joined-groups feed | `https://www.facebook.com/groups/feed/` |
25
- | List of YOUR groups | `https://www.facebook.com/groups/joins/` |
26
-
27
- The `?sorting_setting=CHRONOLOGICAL` flag matters — without it, FB inserts an
28
- algorithmic ranking that hides older posts and shows the same handful of "popular"
29
- items every visit, which kills monitoring use cases.
30
-
31
- ## DOM anchors (verified 2026-04-18)
32
-
33
- FB rewrites class names every few weeks but ARIA roles and stable URL patterns
34
- hold up well. Anchor on those, not on hashed CSS classes.
35
-
36
- | Anchor | Selector | Notes |
37
- |--------|----------|-------|
38
- | Each post container | `div[role="article"]` | Stable. One per visible post. |
39
- | Post permalink | `a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]` | First match per article = the post link |
40
- | Post body text | `div[data-ad-preview="message"], div[data-ad-comet-preview="message"]` | One of these is the visible body |
41
- | Post author | `h3 a, h4 a` (first inside the article) | Falls back to `strong a` |
42
- | Post timestamp | `a[href*="/posts/"] abbr, a[role="link"] > span > span` (relative time text) | Hover gets the absolute time but the relative string is fine for sorting |
43
- | External link (FB redirector) | `a[href^="https://l.facebook.com/l.php?u="]` | Decode the `u=` param to get the real URL |
44
- | "See more" button on long posts | `div[role="button"]:has(span:contains("See more"))` (use XPath fallback if `:has` is unsupported) | Click before reading body or posts get truncated |
45
-
46
- If selectors stop returning results, run the self-inspection block at the bottom
47
- of this file and update this table — that's the workflow, not a fallback.
48
-
49
-
50
- ## Scrolling the feed (lazy load)
51
-
52
- FB virtualizes the feed: scrolled-past posts get unmounted from the DOM. So
53
- "scroll then collect" misses old posts. Pattern that works: **collect-as-you-go.**
54
-
55
- ```python
56
- seen = {} # post_url -> dict
57
- TARGET = 50 # how many posts to collect
58
- MAX_SCROLLS = 30
59
-
60
- for i in range(MAX_SCROLLS):
61
- new_posts = js("""
62
- Array.from(document.querySelectorAll('div[role="article"]')).map(el => {
63
- const link = el.querySelector('a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]');
64
- const body = el.querySelector('div[data-ad-preview="message"], div[data-ad-comet-preview="message"]');
65
- const author = el.querySelector('h3 a, h4 a, strong a');
66
- const time = el.querySelector('abbr, a[role="link"] > span > span');
67
- const externals = Array.from(el.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]'))
68
- .map(a => a.href);
69
- return {
70
- url: link?.href || null,
71
- author: author?.innerText || null,
72
- time: time?.innerText || null,
73
- body: body?.innerText?.slice(0, 4000) || null,
74
- externals: externals,
75
- };
76
- }).filter(p => p.url)
77
- """) or []
78
- for p in new_posts:
79
- seen.setdefault(p["url"], p)
80
- if len(seen) >= TARGET:
81
- break
82
- scroll(640, 400, dy=900) # scroll near middle of viewport
83
- wait(2.5) # FB needs ~2s to render new batch + a little buffer
84
- ```
85
-
86
- `wait(2.5)` is the floor. Faster than that and you'll see empty post containers
87
- because React hasn't hydrated them yet.
88
-
89
-
90
- ## Decoding the external-URL redirector
91
-
92
- Every external link gets wrapped in `https://l.facebook.com/l.php?u={URL-encoded real URL}&h=...`.
93
- You want the real URL, not the redirector.
94
-
95
- ```python
96
- from urllib.parse import urlparse, parse_qs, unquote
97
- def decode_fb_link(href):
98
- if not href.startswith("https://l.facebook.com/l.php"):
99
- return href
100
- q = parse_qs(urlparse(href).query)
101
- return unquote(q["u"][0]) if "u" in q else href
102
- ```
103
-
104
- ## Handoff to Firecrawl (for the public outbound URLs)
105
-
106
- Once you have the harvested external list, those URLs are outside FB's walled
107
- garden — public, scrapable by anything. Firecrawl's schema-native extraction
108
- shines here because you want typed results across heterogeneous sources.
109
-
110
- ```python
111
- # After the scroll loop:
112
- external_urls = []
113
- for p in seen.values():
114
- for raw in p["externals"]:
115
- external_urls.append(decode_fb_link(raw))
116
- external_urls = sorted(set(external_urls))
117
- print(f"harvested {len(external_urls)} unique external URLs")
118
-
119
- # Hand off to Firecrawl MCP in the calling conversation:
120
- # firecrawl_extract(
121
- # urls=external_urls,
122
- # prompt="Extract product/listing name, price, location, year, and key features.",
123
- # schema={...}
124
- # )
125
- ```
126
-
127
- When Firecrawl isn't available or the pages are simple, `http_get(url)` from
128
- Harness itself is fine — it does a plain HTTP fetch without a browser, works
129
- for static pages, and is the fastest option for bulk.
130
-
131
-
132
- ## Rate-limit discipline
133
-
134
- FB notices automation patterns at the account level, not the IP level. Driving
135
- a real logged-in session means Jay's account is the one getting rate-limited if
136
- you get greedy. Keep these floors:
137
-
138
- - **≥2 seconds between scrolls** in the collect loop (the `wait(2.5)` above)
139
- - **≥3 seconds between groups** if you're sweeping multiple
140
- - **No more than ~6 groups per hour** for sustained monitoring
141
- - **Don't open the same group more than every 15 minutes** — repeated visits
142
- within a short window is a heuristic that triggers checkpoints
143
-
144
- Symptoms of over-pacing: article containers start rendering with empty bodies,
145
- `/groups/{id}/` redirects to `/checkpoint/`, or the account briefly gets asked
146
- to re-verify a phone or confirm a login from a new device. If that happens,
147
- **stop immediately** and let Jay deal with the UI — don't try to auto-resolve.
148
-
149
- ## Self-inspection block (run this when selectors stop working)
150
-
151
- Paste this into a Harness stdin block to see what anchors currently exist in the
152
- visible feed. Run it on a group you're a member of.
153
-
154
- ```python
155
- print(js("""
156
- ({
157
- articles: document.querySelectorAll('div[role="article"]').length,
158
- body_preview_a: document.querySelectorAll('div[data-ad-preview="message"]').length,
159
- body_preview_b: document.querySelectorAll('div[data-ad-comet-preview="message"]').length,
160
- external_redirectors: document.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]').length,
161
- permalink_posts: document.querySelectorAll('a[href*="/groups/"][href*="/posts/"]').length,
162
- permalink_permalinks: document.querySelectorAll('a[href*="/groups/"][href*="/permalink/"]').length,
163
- })
164
- """))
165
- # If any count is 0, the selector drifted. Open DevTools, right-click a visible
166
- # post, inspect, find the new stable attribute (aria-*, data-*), and update the
167
- # DOM anchors table above.
168
- ```
169
-
170
-
171
- ## Full example — mine one group, emit JSON for downstream tools
172
-
173
- ```bash
174
- cd ~/Developer/browser-harness && uv run browser-harness <<'PY'
175
- import json, sys
176
- from urllib.parse import urlparse, parse_qs, unquote
177
-
178
- GROUP = "riceLakeBoating" # slug or numeric id
179
- TARGET = 50 # how many posts to collect
180
- MAX_SCROLLS = 30
181
-
182
- goto_url(f"https://www.facebook.com/groups/{GROUP}/?sorting_setting=CHRONOLOGICAL")
183
- wait_for_load()
184
- wait(2)
185
-
186
- # Abort if FB bounced us
187
- info = page_info()
188
- if "/checkpoint/" in info["url"] or "/login" in info["url"]:
189
- sys.exit("AUTH_WALL — stop and have Jay re-verify the account.")
190
-
191
- seen = {}
192
- for _ in range(MAX_SCROLLS):
193
- batch = js("""
194
- Array.from(document.querySelectorAll('div[role="article"]')).map(el => {
195
- const link = el.querySelector('a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]');
196
- const body = el.querySelector('div[data-ad-preview="message"], div[data-ad-comet-preview="message"]');
197
- const author = el.querySelector('h3 a, h4 a, strong a');
198
- const time = el.querySelector('abbr, a[role="link"] > span > span');
199
- const externals = Array.from(el.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]')).map(a => a.href);
200
- return { url: link?.href, author: author?.innerText, time: time?.innerText,
201
- body: body?.innerText?.slice(0, 4000), externals };
202
- }).filter(p => p.url)
203
- """) or []
204
- for p in batch:
205
- seen.setdefault(p["url"], p)
206
- if len(seen) >= TARGET:
207
- break
208
- scroll(640, 400, dy=900)
209
- wait(2.5)
210
-
211
- def decode(u):
212
- if not u.startswith("https://l.facebook.com/l.php"): return u
213
- q = parse_qs(urlparse(u).query)
214
- return unquote(q["u"][0]) if "u" in q else u
215
-
216
- posts = list(seen.values())
217
- all_externals = sorted({decode(x) for p in posts for x in p["externals"]})
218
- capture_screenshot(f"/tmp/fb-group-{GROUP}.png", full=True)
219
- print(json.dumps({
220
- "group": GROUP,
221
- "post_count": len(posts),
222
- "posts": posts,
223
- "external_urls": all_externals,
224
- }, ensure_ascii=False))
225
- PY
226
- ```
227
-
228
- The JSON on stdout is the handoff payload — parse it in the calling agent and
229
- route `external_urls` into `firecrawl_extract` with whatever schema matches the
230
- downstream task (competitor inventory, pricing intel, boat listings, etc).
231
-
232
- ## Gotchas log (append when you hit something new)
233
-
234
- - **2026-04-18:** Fresh install verified. People-search URL requires login;
235
- page search `/search/pages/?q=` works the same way. Groups feed defaults to
236
- algorithmic sort — always append `?sorting_setting=CHRONOLOGICAL`.
1
+ # Facebook Groups — mining feeds for posts + external URLs
2
+
3
+ Field-tested against a logged-in Jay account on 2026-04-18.
4
+ **Requires:** Browser Harness driving a real Chrome that is (a) signed into
5
+ Facebook and (b) already a member of the target group. Non-member or logged-out
6
+ views serve a stripped landing page with no post content.
7
+
8
+ ## What this skill is for
9
+
10
+ 1. Pull the N most recent posts from a named FB group
11
+ 2. Harvest every external URL that members have shared
12
+ 3. Hand that URL list to Firecrawl (or `http_get`) for structured scraping at scale
13
+ 4. Cache post text + author + timestamp for downstream keyword matching
14
+
15
+ It is NOT for: replying in groups, DMing members, or any write action.
16
+
17
+ ## URL patterns
18
+
19
+ | What | URL |
20
+ |------|-----|
21
+ | Group main feed | `https://www.facebook.com/groups/{id_or_slug}` |
22
+ | Group "Discussion" tab (canonical feed) | `https://www.facebook.com/groups/{id_or_slug}/?sorting_setting=CHRONOLOGICAL` |
23
+ | Single post (permalink) | `https://www.facebook.com/groups/{id_or_slug}/posts/{post_id}/` |
24
+ | User's joined-groups feed | `https://www.facebook.com/groups/feed/` |
25
+ | List of YOUR groups | `https://www.facebook.com/groups/joins/` |
26
+
27
+ The `?sorting_setting=CHRONOLOGICAL` flag matters — without it, FB inserts an
28
+ algorithmic ranking that hides older posts and shows the same handful of "popular"
29
+ items every visit, which kills monitoring use cases.
30
+
31
+ ## DOM anchors (verified 2026-04-18)
32
+
33
+ FB rewrites class names every few weeks but ARIA roles and stable URL patterns
34
+ hold up well. Anchor on those, not on hashed CSS classes.
35
+
36
+ | Anchor | Selector | Notes |
37
+ |--------|----------|-------|
38
+ | Each post container | `div[role="article"]` | Stable. One per visible post. |
39
+ | Post permalink | `a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]` | First match per article = the post link |
40
+ | Post body text | `div[data-ad-preview="message"], div[data-ad-comet-preview="message"]` | One of these is the visible body |
41
+ | Post author | `h3 a, h4 a` (first inside the article) | Falls back to `strong a` |
42
+ | Post timestamp | `a[href*="/posts/"] abbr, a[role="link"] > span > span` (relative time text) | Hover gets the absolute time but the relative string is fine for sorting |
43
+ | External link (FB redirector) | `a[href^="https://l.facebook.com/l.php?u="]` | Decode the `u=` param to get the real URL |
44
+ | "See more" button on long posts | `div[role="button"]:has(span:contains("See more"))` (use XPath fallback if `:has` is unsupported) | Click before reading body or posts get truncated |
45
+
46
+ If selectors stop returning results, run the self-inspection block at the bottom
47
+ of this file and update this table — that's the workflow, not a fallback.
48
+
49
+
50
+ ## Scrolling the feed (lazy load)
51
+
52
+ FB virtualizes the feed: scrolled-past posts get unmounted from the DOM. So
53
+ "scroll then collect" misses old posts. Pattern that works: **collect-as-you-go.**
54
+
55
+ ```python
56
+ seen = {} # post_url -> dict
57
+ TARGET = 50 # how many posts to collect
58
+ MAX_SCROLLS = 30
59
+
60
+ for i in range(MAX_SCROLLS):
61
+ new_posts = js("""
62
+ Array.from(document.querySelectorAll('div[role="article"]')).map(el => {
63
+ const link = el.querySelector('a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]');
64
+ const body = el.querySelector('div[data-ad-preview="message"], div[data-ad-comet-preview="message"]');
65
+ const author = el.querySelector('h3 a, h4 a, strong a');
66
+ const time = el.querySelector('abbr, a[role="link"] > span > span');
67
+ const externals = Array.from(el.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]'))
68
+ .map(a => a.href);
69
+ return {
70
+ url: link?.href || null,
71
+ author: author?.innerText || null,
72
+ time: time?.innerText || null,
73
+ body: body?.innerText?.slice(0, 4000) || null,
74
+ externals: externals,
75
+ };
76
+ }).filter(p => p.url)
77
+ """) or []
78
+ for p in new_posts:
79
+ seen.setdefault(p["url"], p)
80
+ if len(seen) >= TARGET:
81
+ break
82
+ scroll(640, 400, dy=900) # scroll near middle of viewport
83
+ wait(2.5) # FB needs ~2s to render new batch + a little buffer
84
+ ```
85
+
86
+ `wait(2.5)` is the floor. Faster than that and you'll see empty post containers
87
+ because React hasn't hydrated them yet.
88
+
89
+
90
+ ## Decoding the external-URL redirector
91
+
92
+ Every external link gets wrapped in `https://l.facebook.com/l.php?u={URL-encoded real URL}&h=...`.
93
+ You want the real URL, not the redirector.
94
+
95
+ ```python
96
+ from urllib.parse import urlparse, parse_qs, unquote
97
+ def decode_fb_link(href):
98
+ if not href.startswith("https://l.facebook.com/l.php"):
99
+ return href
100
+ q = parse_qs(urlparse(href).query)
101
+ return unquote(q["u"][0]) if "u" in q else href
102
+ ```
103
+
104
+ ## Handoff to Firecrawl (for the public outbound URLs)
105
+
106
+ Once you have the harvested external list, those URLs are outside FB's walled
107
+ garden — public, scrapable by anything. Firecrawl's schema-native extraction
108
+ shines here because you want typed results across heterogeneous sources.
109
+
110
+ ```python
111
+ # After the scroll loop:
112
+ external_urls = []
113
+ for p in seen.values():
114
+ for raw in p["externals"]:
115
+ external_urls.append(decode_fb_link(raw))
116
+ external_urls = sorted(set(external_urls))
117
+ print(f"harvested {len(external_urls)} unique external URLs")
118
+
119
+ # Hand off to Firecrawl MCP in the calling conversation:
120
+ # firecrawl_extract(
121
+ # urls=external_urls,
122
+ # prompt="Extract product/listing name, price, location, year, and key features.",
123
+ # schema={...}
124
+ # )
125
+ ```
126
+
127
+ When Firecrawl isn't available or the pages are simple, `http_get(url)` from
128
+ Harness itself is fine — it does a plain HTTP fetch without a browser, works
129
+ for static pages, and is the fastest option for bulk.
130
+
131
+
132
+ ## Rate-limit discipline
133
+
134
+ FB notices automation patterns at the account level, not the IP level. Driving
135
+ a real logged-in session means Jay's account is the one getting rate-limited if
136
+ you get greedy. Keep these floors:
137
+
138
+ - **≥2 seconds between scrolls** in the collect loop (the `wait(2.5)` above)
139
+ - **≥3 seconds between groups** if you're sweeping multiple
140
+ - **No more than ~6 groups per hour** for sustained monitoring
141
+ - **Don't open the same group more than every 15 minutes** — repeated visits
142
+ within a short window is a heuristic that triggers checkpoints
143
+
144
+ Symptoms of over-pacing: article containers start rendering with empty bodies,
145
+ `/groups/{id}/` redirects to `/checkpoint/`, or the account briefly gets asked
146
+ to re-verify a phone or confirm a login from a new device. If that happens,
147
+ **stop immediately** and let Jay deal with the UI — don't try to auto-resolve.
148
+
149
+ ## Self-inspection block (run this when selectors stop working)
150
+
151
+ Paste this into a Harness stdin block to see what anchors currently exist in the
152
+ visible feed. Run it on a group you're a member of.
153
+
154
+ ```python
155
+ print(js("""
156
+ ({
157
+ articles: document.querySelectorAll('div[role="article"]').length,
158
+ body_preview_a: document.querySelectorAll('div[data-ad-preview="message"]').length,
159
+ body_preview_b: document.querySelectorAll('div[data-ad-comet-preview="message"]').length,
160
+ external_redirectors: document.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]').length,
161
+ permalink_posts: document.querySelectorAll('a[href*="/groups/"][href*="/posts/"]').length,
162
+ permalink_permalinks: document.querySelectorAll('a[href*="/groups/"][href*="/permalink/"]').length,
163
+ })
164
+ """))
165
+ # If any count is 0, the selector drifted. Open DevTools, right-click a visible
166
+ # post, inspect, find the new stable attribute (aria-*, data-*), and update the
167
+ # DOM anchors table above.
168
+ ```
169
+
170
+
171
+ ## Full example — mine one group, emit JSON for downstream tools
172
+
173
+ ```bash
174
+ cd ~/Developer/browser-harness && uv run browser-harness <<'PY'
175
+ import json, sys
176
+ from urllib.parse import urlparse, parse_qs, unquote
177
+
178
+ GROUP = "riceLakeBoating" # slug or numeric id
179
+ TARGET = 50 # how many posts to collect
180
+ MAX_SCROLLS = 30
181
+
182
+ goto_url(f"https://www.facebook.com/groups/{GROUP}/?sorting_setting=CHRONOLOGICAL")
183
+ wait_for_load()
184
+ wait(2)
185
+
186
+ # Abort if FB bounced us
187
+ info = page_info()
188
+ if "/checkpoint/" in info["url"] or "/login" in info["url"]:
189
+ sys.exit("AUTH_WALL — stop and have Jay re-verify the account.")
190
+
191
+ seen = {}
192
+ for _ in range(MAX_SCROLLS):
193
+ batch = js("""
194
+ Array.from(document.querySelectorAll('div[role="article"]')).map(el => {
195
+ const link = el.querySelector('a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]');
196
+ const body = el.querySelector('div[data-ad-preview="message"], div[data-ad-comet-preview="message"]');
197
+ const author = el.querySelector('h3 a, h4 a, strong a');
198
+ const time = el.querySelector('abbr, a[role="link"] > span > span');
199
+ const externals = Array.from(el.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]')).map(a => a.href);
200
+ return { url: link?.href, author: author?.innerText, time: time?.innerText,
201
+ body: body?.innerText?.slice(0, 4000), externals };
202
+ }).filter(p => p.url)
203
+ """) or []
204
+ for p in batch:
205
+ seen.setdefault(p["url"], p)
206
+ if len(seen) >= TARGET:
207
+ break
208
+ scroll(640, 400, dy=900)
209
+ wait(2.5)
210
+
211
+ def decode(u):
212
+ if not u.startswith("https://l.facebook.com/l.php"): return u
213
+ q = parse_qs(urlparse(u).query)
214
+ return unquote(q["u"][0]) if "u" in q else u
215
+
216
+ posts = list(seen.values())
217
+ all_externals = sorted({decode(x) for p in posts for x in p["externals"]})
218
+ capture_screenshot(f"/tmp/fb-group-{GROUP}.png", full=True)
219
+ print(json.dumps({
220
+ "group": GROUP,
221
+ "post_count": len(posts),
222
+ "posts": posts,
223
+ "external_urls": all_externals,
224
+ }, ensure_ascii=False))
225
+ PY
226
+ ```
227
+
228
+ The JSON on stdout is the handoff payload — parse it in the calling agent and
229
+ route `external_urls` into `firecrawl_extract` with whatever schema matches the
230
+ downstream task (competitor inventory, pricing intel, boat listings, etc).
231
+
232
+ ## Gotchas log (append when you hit something new)
233
+
234
+ - **2026-04-18:** Fresh install verified. People-search URL requires login;
235
+ page search `/search/pages/?q=` works the same way. Groups feed defaults to
236
+ algorithmic sort — always append `?sorting_setting=CHRONOLOGICAL`.