@pencil-agent/nano-pencil 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/README.md +267 -267
  2. package/dist/build-meta.json +3 -3
  3. package/dist/core/export-html/AGENT.md +11 -11
  4. package/dist/core/export-html/template.css +971 -971
  5. package/dist/core/export-html/template.html +54 -54
  6. package/dist/core/mcp/mcp-client.d.ts +3 -1
  7. package/dist/core/mcp/mcp-client.js +6 -6
  8. package/dist/core/mcp/mcp-config.d.ts +3 -3
  9. package/dist/core/mcp/mcp-config.js +1 -1
  10. package/dist/core/mcp/mcp-manager.d.ts +5 -1
  11. package/dist/core/mcp/mcp-manager.js +1 -1
  12. package/dist/core/platform/config/resource-loader.d.ts +2 -0
  13. package/dist/core/platform/config/resource-loader.js +2 -2
  14. package/dist/core/runtime/agent-session.d.ts +12 -0
  15. package/dist/core/runtime/agent-session.js +8 -8
  16. package/dist/core/runtime/sdk.d.ts +8 -0
  17. package/dist/core/runtime/sdk.js +1 -1
  18. package/dist/extensions/builtin/AGENT.md +115 -115
  19. package/dist/extensions/builtin/browser/AGENT.md +17 -17
  20. package/dist/extensions/builtin/browser/agent-workspace/agent_helpers.py +12 -12
  21. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/amazon/product-search.md +198 -198
  22. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/archive-org/scraping.md +341 -341
  23. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/arxiv/scraping.md +311 -311
  24. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/arxiv-bulk/scraping.md +333 -333
  25. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/atlas/overview.md +70 -70
  26. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/booking-com/scraping.md +578 -578
  27. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/capterra/scraping.md +440 -440
  28. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/centilebrain/generate-estimates.md +110 -110
  29. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coingecko/scraping.md +325 -325
  30. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coinmarketcap/scraping.md +463 -463
  31. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/coursera/scraping.md +360 -360
  32. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/craigslist/scraping.md +390 -390
  33. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/crossref/scraping.md +568 -568
  34. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/dev-to/scraping.md +323 -323
  35. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/duckduckgo/scraping.md +349 -349
  36. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/ebay/scraping.md +435 -435
  37. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/etsy/scraping.md +506 -506
  38. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/eventbrite/scraping.md +363 -363
  39. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/expedia/automation.md +168 -168
  40. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/facebook/groups.md +236 -236
  41. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/facebook/pages.md +295 -295
  42. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/framer/editor.md +108 -108
  43. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/fred/scraping.md +493 -493
  44. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/g2/scraping.md +580 -580
  45. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/genius/scraping.md +511 -511
  46. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/github/repo-actions.md +65 -65
  47. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/github/scraping.md +184 -184
  48. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/glassdoor/scraping.md +543 -543
  49. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/gmail/compose.md +122 -122
  50. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/goodreads/scraping.md +461 -461
  51. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/gutenberg/scraping.md +383 -383
  52. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/hackernews/scraping.md +243 -243
  53. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/howlongtobeat/scraping.md +473 -473
  54. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/imdb/scraping.md +271 -271
  55. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/itch-io/scraping.md +436 -436
  56. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/job-boards/indeed-glassdoor.md +1021 -1021
  57. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/letterboxd/scraping.md +349 -349
  58. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/linkedin/invitation-manager.md +109 -109
  59. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/loom/folder-enumeration.md +170 -170
  60. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/macrotrends/scraping.md +537 -537
  61. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/medium/article-hydration.md +120 -120
  62. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/medium/scraping.md +414 -414
  63. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/metacritic/scraping.md +477 -477
  64. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/musicbrainz/scraping.md +478 -478
  65. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/nasa/scraping.md +339 -339
  66. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/news-aggregation/multi-source.md +205 -205
  67. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/open-library/scraping.md +472 -472
  68. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/openalex/scraping.md +470 -470
  69. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/openstreetmap/scraping.md +490 -490
  70. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/package-registries/npm-pypi.md +478 -478
  71. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/polymarket/scraping.md +234 -234
  72. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/producthunt/scraping.md +307 -307
  73. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/pubmed/scraping.md +421 -421
  74. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/quora/scraping.md +364 -364
  75. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/rawg/scraping.md +352 -352
  76. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/reddit/scraping.md +124 -124
  77. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/rest-countries/scraping.md +233 -233
  78. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/sec-edgar/scraping.md +361 -361
  79. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/README.md +36 -36
  80. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/embedded-apps.md +72 -72
  81. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/knowledge-base.md +109 -109
  82. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/shopify-admin/polaris-inputs.md +137 -137
  83. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/soundcloud/scraping.md +362 -362
  84. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/spotify/scraping.md +339 -339
  85. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/stackoverflow/scraping.md +435 -435
  86. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/steam/scraping.md +575 -575
  87. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/substack/scraping.md +338 -338
  88. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/thetechgeeks/pricing.md +52 -52
  89. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/tiktok/upload.md +107 -107
  90. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/tradingview/scraping.md +309 -309
  91. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/trello/boards-and-lists.md +88 -88
  92. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/trustpilot/scraping.md +375 -375
  93. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/walmart/scraping.md +444 -444
  94. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/wayback-machine/scraping.md +306 -306
  95. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/weather/scraping.md +398 -398
  96. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/wellfound/scraping.md +596 -596
  97. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/world-bank/scraping.md +356 -356
  98. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/xiaohongshu/scraping.md +84 -84
  99. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/youtube/scraping.md +418 -418
  100. package/dist/extensions/builtin/browser/agent-workspace/domain-skills/zillow/scraping.md +433 -433
  101. package/dist/extensions/builtin/browser/browser.md +73 -73
  102. package/dist/extensions/builtin/browser/install.md +142 -142
  103. package/dist/extensions/builtin/browser/interaction-skills/connection.md +48 -48
  104. package/dist/extensions/builtin/browser/interaction-skills/cookies.md +3 -3
  105. package/dist/extensions/builtin/browser/interaction-skills/cross-origin-iframes.md +3 -3
  106. package/dist/extensions/builtin/browser/interaction-skills/dialogs.md +64 -64
  107. package/dist/extensions/builtin/browser/interaction-skills/downloads.md +3 -3
  108. package/dist/extensions/builtin/browser/interaction-skills/drag-and-drop.md +3 -3
  109. package/dist/extensions/builtin/browser/interaction-skills/dropdowns.md +3 -3
  110. package/dist/extensions/builtin/browser/interaction-skills/iframes.md +3 -3
  111. package/dist/extensions/builtin/browser/interaction-skills/network-requests.md +3 -3
  112. package/dist/extensions/builtin/browser/interaction-skills/print-as-pdf.md +3 -3
  113. package/dist/extensions/builtin/browser/interaction-skills/profile-sync.md +90 -90
  114. package/dist/extensions/builtin/browser/interaction-skills/screenshots.md +17 -17
  115. package/dist/extensions/builtin/browser/interaction-skills/scrolling.md +3 -3
  116. package/dist/extensions/builtin/browser/interaction-skills/shadow-dom.md +3 -3
  117. package/dist/extensions/builtin/browser/interaction-skills/tabs.md +69 -69
  118. package/dist/extensions/builtin/browser/interaction-skills/uploads.md +1 -1
  119. package/dist/extensions/builtin/browser/interaction-skills/viewport.md +3 -3
  120. package/dist/extensions/builtin/browser/src/browser_harness/AGENT.md +15 -15
  121. package/dist/extensions/builtin/browser/src/browser_harness/__init__.py +8 -8
  122. package/dist/extensions/builtin/browser/src/browser_harness/_ipc.py +90 -90
  123. package/dist/extensions/builtin/browser/src/browser_harness/admin.py +722 -722
  124. package/dist/extensions/builtin/browser/src/browser_harness/daemon.py +328 -328
  125. package/dist/extensions/builtin/browser/src/browser_harness/helpers.py +396 -396
  126. package/dist/extensions/builtin/browser/src/browser_harness/run.py +103 -103
  127. package/dist/extensions/builtin/discipline/skills/brainstorming/SKILL.md +33 -33
  128. package/dist/extensions/builtin/discipline/skills/executing-plans/SKILL.md +25 -25
  129. package/dist/extensions/builtin/discipline/skills/finishing-development-branch/SKILL.md +25 -25
  130. package/dist/extensions/builtin/discipline/skills/receiving-code-review/SKILL.md +22 -22
  131. package/dist/extensions/builtin/discipline/skills/requesting-code-review/SKILL.md +31 -31
  132. package/dist/extensions/builtin/discipline/skills/systematic-debugging/SKILL.md +28 -28
  133. package/dist/extensions/builtin/discipline/skills/test-driven-development/SKILL.md +32 -32
  134. package/dist/extensions/builtin/discipline/skills/using-git-worktrees/SKILL.md +25 -25
  135. package/dist/extensions/builtin/discipline/skills/verification-before-completion/SKILL.md +27 -27
  136. package/dist/extensions/builtin/discipline/skills/writing-plans/SKILL.md +26 -26
  137. package/dist/extensions/builtin/goal/README.md +67 -67
  138. package/dist/extensions/builtin/grub/README.md +112 -112
  139. package/dist/extensions/builtin/link-world/agent-workspace/README.md +16 -16
  140. package/dist/extensions/builtin/link-world/internet-search/internet-search.md +65 -65
  141. package/dist/extensions/builtin/link-world/link-world-agent.md +82 -82
  142. package/dist/extensions/builtin/link-world/linkworld.md +313 -313
  143. package/dist/extensions/builtin/link-world/network-routing/network-routing.md +67 -67
  144. package/dist/extensions/builtin/loop/README.md +92 -92
  145. package/dist/extensions/builtin/mcp/figma-design.md +68 -68
  146. package/dist/extensions/builtin/mcp/mcp-management.md +85 -85
  147. package/dist/extensions/builtin/recap/AGENT.md +15 -15
  148. package/dist/extensions/builtin/sal/README.md +72 -72
  149. package/dist/extensions/builtin/security-audit/README.md +289 -289
  150. package/dist/extensions/builtin/team/AGENT.md +112 -112
  151. package/dist/extensions/builtin/team/TESTING.md +299 -299
  152. package/dist/extensions/builtin/token-save/README.md +56 -56
  153. package/dist/extensions/optional/AGENT.md +10 -10
  154. package/dist/modes/interactive/interactive-mode.js +36 -36
  155. package/dist/modes/interactive/theme/dark.json +85 -85
  156. package/dist/modes/interactive/theme/light.json +84 -84
  157. package/dist/modes/interactive/theme/theme-schema.json +335 -335
  158. package/dist/modes/interactive/theme/warm.json +81 -81
  159. package/dist/node_modules/@pencil-agent/agent-core/dist/agent-loop.js +3 -2
  160. package/dist/node_modules/@pencil-agent/agent-core/dist/structured-adaptive-agent-loop.js +2 -1
  161. package/dist/node_modules/@pencil-agent/ai/dist/cli.js +0 -0
  162. package/docs/cc-agent-design.md +1297 -0
  163. package/docs/cc-tui-design.md +1333 -0
  164. package/docs/codex-goal-command-impl.md +1055 -1055
  165. package/docs/codex-goal-vs-grub.md +500 -500
  166. package/docs/custom-provider.md +27 -27
  167. package/docs/extensions.md +27 -27
  168. package/docs/keybindings.md +27 -27
  169. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/200/273/347/273/223.md" +250 -250
  170. package/docs/loop /351/207/215/346/236/204/345/256/214/346/210/220/346/212/245/345/221/212.md" +122 -122
  171. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210.md" +1222 -1222
  172. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/256/236/347/216/260/346/212/245/345/221/212.md" +158 -158
  173. package/docs/loop /351/207/215/346/236/204/346/226/271/346/241/210/345/257/271/346/257/224/345/210/206/346/236/220.md" +128 -128
  174. package/docs/loop /351/207/215/346/236/204/350/256/241/345/210/222.md" +320 -320
  175. package/docs/loop-usage-examples.md +214 -214
  176. package/docs/models.md +27 -27
  177. package/docs/nanoPencil-/345/255/246/344/271/240/350/256/241/345/210/222.md +170 -0
  178. package/docs/packages.md +27 -27
  179. package/docs/pi-design-philosophy.md +457 -457
  180. package/docs/planmode.md +1987 -1987
  181. package/docs/prompt-templates.md +27 -27
  182. package/docs/providers.md +27 -27
  183. package/docs/scan-report.md +3820 -0
  184. package/docs/sdk.md +27 -27
  185. package/docs/skills.md +27 -27
  186. package/docs/themes.md +27 -27
  187. package/docs/tui.md +27 -27
  188. package/docs//345/257/271/346/240/207Claude-Code.md +1775 -0
  189. package/docs//351/230/277/351/207/214/345/267/264/345/267/264/350/264/242/346/212/245/345/210/206/346/236/220/344/271/246.md +261 -0
  190. package/package.json +190 -190
  191. package/docs/ACP/345/215/217/350/256/256/351/233/206/346/210/220/345/274/200/345/217/221/346/226/207/346/241/243.md +0 -851
  192. package/docs/SDK-TESTING.md +0 -364
  193. package/docs/mem-core/346/212/200/346/234/257/346/226/207/346/241/243.md +0 -593
  194. package/docs/startup-performance-optimization.md +0 -301
  195. package/docs//350/256/244/347/237/245/345/234/260/345/233/276.md +0 -47
@@ -1,236 +1,236 @@
1
- # Facebook Groups — mining feeds for posts + external URLs
2
-
3
- Field-tested against a logged-in Jay account on 2026-04-18.
4
- **Requires:** Browser Harness driving a real Chrome that is (a) signed into
5
- Facebook and (b) already a member of the target group. Non-member or logged-out
6
- views serve a stripped landing page with no post content.
7
-
8
- ## What this skill is for
9
-
10
- 1. Pull the N most recent posts from a named FB group
11
- 2. Harvest every external URL that members have shared
12
- 3. Hand that URL list to Firecrawl (or `http_get`) for structured scraping at scale
13
- 4. Cache post text + author + timestamp for downstream keyword matching
14
-
15
- It is NOT for: replying in groups, DMing members, or any write action.
16
-
17
- ## URL patterns
18
-
19
- | What | URL |
20
- |------|-----|
21
- | Group main feed | `https://www.facebook.com/groups/{id_or_slug}` |
22
- | Group "Discussion" tab (canonical feed) | `https://www.facebook.com/groups/{id_or_slug}/?sorting_setting=CHRONOLOGICAL` |
23
- | Single post (permalink) | `https://www.facebook.com/groups/{id_or_slug}/posts/{post_id}/` |
24
- | User's joined-groups feed | `https://www.facebook.com/groups/feed/` |
25
- | List of YOUR groups | `https://www.facebook.com/groups/joins/` |
26
-
27
- The `?sorting_setting=CHRONOLOGICAL` flag matters — without it, FB inserts an
28
- algorithmic ranking that hides older posts and shows the same handful of "popular"
29
- items every visit, which kills monitoring use cases.
30
-
31
- ## DOM anchors (verified 2026-04-18)
32
-
33
- FB rewrites class names every few weeks but ARIA roles and stable URL patterns
34
- hold up well. Anchor on those, not on hashed CSS classes.
35
-
36
- | Anchor | Selector | Notes |
37
- |--------|----------|-------|
38
- | Each post container | `div[role="article"]` | Stable. One per visible post. |
39
- | Post permalink | `a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]` | First match per article = the post link |
40
- | Post body text | `div[data-ad-preview="message"], div[data-ad-comet-preview="message"]` | One of these is the visible body |
41
- | Post author | `h3 a, h4 a` (first inside the article) | Falls back to `strong a` |
42
- | Post timestamp | `a[href*="/posts/"] abbr, a[role="link"] > span > span` (relative time text) | Hover gets the absolute time but the relative string is fine for sorting |
43
- | External link (FB redirector) | `a[href^="https://l.facebook.com/l.php?u="]` | Decode the `u=` param to get the real URL |
44
- | "See more" button on long posts | `div[role="button"]:has(span:contains("See more"))` (use XPath fallback if `:has` is unsupported) | Click before reading body or posts get truncated |
45
-
46
- If selectors stop returning results, run the self-inspection block at the bottom
47
- of this file and update this table — that's the workflow, not a fallback.
48
-
49
-
50
- ## Scrolling the feed (lazy load)
51
-
52
- FB virtualizes the feed: scrolled-past posts get unmounted from the DOM. So
53
- "scroll then collect" misses old posts. Pattern that works: **collect-as-you-go.**
54
-
55
- ```python
56
- seen = {} # post_url -> dict
57
- TARGET = 50 # how many posts to collect
58
- MAX_SCROLLS = 30
59
-
60
- for i in range(MAX_SCROLLS):
61
- new_posts = js("""
62
- Array.from(document.querySelectorAll('div[role="article"]')).map(el => {
63
- const link = el.querySelector('a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]');
64
- const body = el.querySelector('div[data-ad-preview="message"], div[data-ad-comet-preview="message"]');
65
- const author = el.querySelector('h3 a, h4 a, strong a');
66
- const time = el.querySelector('abbr, a[role="link"] > span > span');
67
- const externals = Array.from(el.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]'))
68
- .map(a => a.href);
69
- return {
70
- url: link?.href || null,
71
- author: author?.innerText || null,
72
- time: time?.innerText || null,
73
- body: body?.innerText?.slice(0, 4000) || null,
74
- externals: externals,
75
- };
76
- }).filter(p => p.url)
77
- """) or []
78
- for p in new_posts:
79
- seen.setdefault(p["url"], p)
80
- if len(seen) >= TARGET:
81
- break
82
- scroll(640, 400, dy=900) # scroll near middle of viewport
83
- wait(2.5) # FB needs ~2s to render new batch + a little buffer
84
- ```
85
-
86
- `wait(2.5)` is the floor. Faster than that and you'll see empty post containers
87
- because React hasn't hydrated them yet.
88
-
89
-
90
- ## Decoding the external-URL redirector
91
-
92
- Every external link gets wrapped in `https://l.facebook.com/l.php?u={URL-encoded real URL}&h=...`.
93
- You want the real URL, not the redirector.
94
-
95
- ```python
96
- from urllib.parse import urlparse, parse_qs, unquote
97
- def decode_fb_link(href):
98
- if not href.startswith("https://l.facebook.com/l.php"):
99
- return href
100
- q = parse_qs(urlparse(href).query)
101
- return unquote(q["u"][0]) if "u" in q else href
102
- ```
103
-
104
- ## Handoff to Firecrawl (for the public outbound URLs)
105
-
106
- Once you have the harvested external list, those URLs are outside FB's walled
107
- garden — public, scrapable by anything. Firecrawl's schema-native extraction
108
- shines here because you want typed results across heterogeneous sources.
109
-
110
- ```python
111
- # After the scroll loop:
112
- external_urls = []
113
- for p in seen.values():
114
- for raw in p["externals"]:
115
- external_urls.append(decode_fb_link(raw))
116
- external_urls = sorted(set(external_urls))
117
- print(f"harvested {len(external_urls)} unique external URLs")
118
-
119
- # Hand off to Firecrawl MCP in the calling conversation:
120
- # firecrawl_extract(
121
- # urls=external_urls,
122
- # prompt="Extract product/listing name, price, location, year, and key features.",
123
- # schema={...}
124
- # )
125
- ```
126
-
127
- When Firecrawl isn't available or the pages are simple, `http_get(url)` from
128
- Harness itself is fine — it does a plain HTTP fetch without a browser, works
129
- for static pages, and is the fastest option for bulk.
130
-
131
-
132
- ## Rate-limit discipline
133
-
134
- FB notices automation patterns at the account level, not the IP level. Driving
135
- a real logged-in session means Jay's account is the one getting rate-limited if
136
- you get greedy. Keep these floors:
137
-
138
- - **≥2 seconds between scrolls** in the collect loop (the `wait(2.5)` above)
139
- - **≥3 seconds between groups** if you're sweeping multiple
140
- - **No more than ~6 groups per hour** for sustained monitoring
141
- - **Don't open the same group more than every 15 minutes** — repeated visits
142
- within a short window is a heuristic that triggers checkpoints
143
-
144
- Symptoms of over-pacing: article containers start rendering with empty bodies,
145
- `/groups/{id}/` redirects to `/checkpoint/`, or the account briefly gets asked
146
- to re-verify a phone or confirm a login from a new device. If that happens,
147
- **stop immediately** and let Jay deal with the UI — don't try to auto-resolve.
148
-
149
- ## Self-inspection block (run this when selectors stop working)
150
-
151
- Paste this into a Harness stdin block to see what anchors currently exist in the
152
- visible feed. Run it on a group you're a member of.
153
-
154
- ```python
155
- print(js("""
156
- ({
157
- articles: document.querySelectorAll('div[role="article"]').length,
158
- body_preview_a: document.querySelectorAll('div[data-ad-preview="message"]').length,
159
- body_preview_b: document.querySelectorAll('div[data-ad-comet-preview="message"]').length,
160
- external_redirectors: document.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]').length,
161
- permalink_posts: document.querySelectorAll('a[href*="/groups/"][href*="/posts/"]').length,
162
- permalink_permalinks: document.querySelectorAll('a[href*="/groups/"][href*="/permalink/"]').length,
163
- })
164
- """))
165
- # If any count is 0, the selector drifted. Open DevTools, right-click a visible
166
- # post, inspect, find the new stable attribute (aria-*, data-*), and update the
167
- # DOM anchors table above.
168
- ```
169
-
170
-
171
- ## Full example — mine one group, emit JSON for downstream tools
172
-
173
- ```bash
174
- cd ~/Developer/browser-harness && uv run browser-harness <<'PY'
175
- import json, sys
176
- from urllib.parse import urlparse, parse_qs, unquote
177
-
178
- GROUP = "riceLakeBoating" # slug or numeric id
179
- TARGET = 50 # how many posts to collect
180
- MAX_SCROLLS = 30
181
-
182
- goto_url(f"https://www.facebook.com/groups/{GROUP}/?sorting_setting=CHRONOLOGICAL")
183
- wait_for_load()
184
- wait(2)
185
-
186
- # Abort if FB bounced us
187
- info = page_info()
188
- if "/checkpoint/" in info["url"] or "/login" in info["url"]:
189
- sys.exit("AUTH_WALL — stop and have Jay re-verify the account.")
190
-
191
- seen = {}
192
- for _ in range(MAX_SCROLLS):
193
- batch = js("""
194
- Array.from(document.querySelectorAll('div[role="article"]')).map(el => {
195
- const link = el.querySelector('a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]');
196
- const body = el.querySelector('div[data-ad-preview="message"], div[data-ad-comet-preview="message"]');
197
- const author = el.querySelector('h3 a, h4 a, strong a');
198
- const time = el.querySelector('abbr, a[role="link"] > span > span');
199
- const externals = Array.from(el.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]')).map(a => a.href);
200
- return { url: link?.href, author: author?.innerText, time: time?.innerText,
201
- body: body?.innerText?.slice(0, 4000), externals };
202
- }).filter(p => p.url)
203
- """) or []
204
- for p in batch:
205
- seen.setdefault(p["url"], p)
206
- if len(seen) >= TARGET:
207
- break
208
- scroll(640, 400, dy=900)
209
- wait(2.5)
210
-
211
- def decode(u):
212
- if not u.startswith("https://l.facebook.com/l.php"): return u
213
- q = parse_qs(urlparse(u).query)
214
- return unquote(q["u"][0]) if "u" in q else u
215
-
216
- posts = list(seen.values())
217
- all_externals = sorted({decode(x) for p in posts for x in p["externals"]})
218
- capture_screenshot(f"/tmp/fb-group-{GROUP}.png", full=True)
219
- print(json.dumps({
220
- "group": GROUP,
221
- "post_count": len(posts),
222
- "posts": posts,
223
- "external_urls": all_externals,
224
- }, ensure_ascii=False))
225
- PY
226
- ```
227
-
228
- The JSON on stdout is the handoff payload — parse it in the calling agent and
229
- route `external_urls` into `firecrawl_extract` with whatever schema matches the
230
- downstream task (competitor inventory, pricing intel, boat listings, etc).
231
-
232
- ## Gotchas log (append when you hit something new)
233
-
234
- - **2026-04-18:** Fresh install verified. People-search URL requires login;
235
- page search `/search/pages/?q=` works the same way. Groups feed defaults to
236
- algorithmic sort — always append `?sorting_setting=CHRONOLOGICAL`.
1
+ # Facebook Groups — mining feeds for posts + external URLs
2
+
3
+ Field-tested against a logged-in Jay account on 2026-04-18.
4
+ **Requires:** Browser Harness driving a real Chrome that is (a) signed into
5
+ Facebook and (b) already a member of the target group. Non-member or logged-out
6
+ views serve a stripped landing page with no post content.
7
+
8
+ ## What this skill is for
9
+
10
+ 1. Pull the N most recent posts from a named FB group
11
+ 2. Harvest every external URL that members have shared
12
+ 3. Hand that URL list to Firecrawl (or `http_get`) for structured scraping at scale
13
+ 4. Cache post text + author + timestamp for downstream keyword matching
14
+
15
+ It is NOT for: replying in groups, DMing members, or any write action.
16
+
17
+ ## URL patterns
18
+
19
+ | What | URL |
20
+ |------|-----|
21
+ | Group main feed | `https://www.facebook.com/groups/{id_or_slug}` |
22
+ | Group "Discussion" tab (canonical feed) | `https://www.facebook.com/groups/{id_or_slug}/?sorting_setting=CHRONOLOGICAL` |
23
+ | Single post (permalink) | `https://www.facebook.com/groups/{id_or_slug}/posts/{post_id}/` |
24
+ | User's joined-groups feed | `https://www.facebook.com/groups/feed/` |
25
+ | List of YOUR groups | `https://www.facebook.com/groups/joins/` |
26
+
27
+ The `?sorting_setting=CHRONOLOGICAL` flag matters — without it, FB inserts an
28
+ algorithmic ranking that hides older posts and shows the same handful of "popular"
29
+ items every visit, which kills monitoring use cases.
30
+
31
+ ## DOM anchors (verified 2026-04-18)
32
+
33
+ FB rewrites class names every few weeks but ARIA roles and stable URL patterns
34
+ hold up well. Anchor on those, not on hashed CSS classes.
35
+
36
+ | Anchor | Selector | Notes |
37
+ |--------|----------|-------|
38
+ | Each post container | `div[role="article"]` | Stable. One per visible post. |
39
+ | Post permalink | `a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]` | First match per article = the post link |
40
+ | Post body text | `div[data-ad-preview="message"], div[data-ad-comet-preview="message"]` | One of these is the visible body |
41
+ | Post author | `h3 a, h4 a` (first inside the article) | Falls back to `strong a` |
42
+ | Post timestamp | `a[href*="/posts/"] abbr, a[role="link"] > span > span` (relative time text) | Hover gets the absolute time but the relative string is fine for sorting |
43
+ | External link (FB redirector) | `a[href^="https://l.facebook.com/l.php?u="]` | Decode the `u=` param to get the real URL |
44
+ | "See more" button on long posts | `div[role="button"]:has(span:contains("See more"))` (use XPath fallback if `:has` is unsupported) | Click before reading body or posts get truncated |
45
+
46
+ If selectors stop returning results, run the self-inspection block at the bottom
47
+ of this file and update this table — that's the workflow, not a fallback.
48
+
49
+
50
+ ## Scrolling the feed (lazy load)
51
+
52
+ FB virtualizes the feed: scrolled-past posts get unmounted from the DOM. So
53
+ "scroll then collect" misses old posts. Pattern that works: **collect-as-you-go.**
54
+
55
+ ```python
56
+ seen = {} # post_url -> dict
57
+ TARGET = 50 # how many posts to collect
58
+ MAX_SCROLLS = 30
59
+
60
+ for i in range(MAX_SCROLLS):
61
+ new_posts = js("""
62
+ Array.from(document.querySelectorAll('div[role="article"]')).map(el => {
63
+ const link = el.querySelector('a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]');
64
+ const body = el.querySelector('div[data-ad-preview="message"], div[data-ad-comet-preview="message"]');
65
+ const author = el.querySelector('h3 a, h4 a, strong a');
66
+ const time = el.querySelector('abbr, a[role="link"] > span > span');
67
+ const externals = Array.from(el.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]'))
68
+ .map(a => a.href);
69
+ return {
70
+ url: link?.href || null,
71
+ author: author?.innerText || null,
72
+ time: time?.innerText || null,
73
+ body: body?.innerText?.slice(0, 4000) || null,
74
+ externals: externals,
75
+ };
76
+ }).filter(p => p.url)
77
+ """) or []
78
+ for p in new_posts:
79
+ seen.setdefault(p["url"], p)
80
+ if len(seen) >= TARGET:
81
+ break
82
+ scroll(640, 400, dy=900) # scroll near middle of viewport
83
+ wait(2.5) # FB needs ~2s to render new batch + a little buffer
84
+ ```
85
+
86
+ `wait(2.5)` is the floor. Faster than that and you'll see empty post containers
87
+ because React hasn't hydrated them yet.
88
+
89
+
90
+ ## Decoding the external-URL redirector
91
+
92
+ Every external link gets wrapped in `https://l.facebook.com/l.php?u={URL-encoded real URL}&h=...`.
93
+ You want the real URL, not the redirector.
94
+
95
+ ```python
96
+ from urllib.parse import urlparse, parse_qs, unquote
97
+ def decode_fb_link(href):
98
+ if not href.startswith("https://l.facebook.com/l.php"):
99
+ return href
100
+ q = parse_qs(urlparse(href).query)
101
+ return unquote(q["u"][0]) if "u" in q else href
102
+ ```
103
+
104
+ ## Handoff to Firecrawl (for the public outbound URLs)
105
+
106
+ Once you have the harvested external list, those URLs are outside FB's walled
107
+ garden — public, scrapable by anything. Firecrawl's schema-native extraction
108
+ shines here because you want typed results across heterogeneous sources.
109
+
110
+ ```python
111
+ # After the scroll loop:
112
+ external_urls = []
113
+ for p in seen.values():
114
+ for raw in p["externals"]:
115
+ external_urls.append(decode_fb_link(raw))
116
+ external_urls = sorted(set(external_urls))
117
+ print(f"harvested {len(external_urls)} unique external URLs")
118
+
119
+ # Hand off to Firecrawl MCP in the calling conversation:
120
+ # firecrawl_extract(
121
+ # urls=external_urls,
122
+ # prompt="Extract product/listing name, price, location, year, and key features.",
123
+ # schema={...}
124
+ # )
125
+ ```
126
+
127
+ When Firecrawl isn't available or the pages are simple, `http_get(url)` from
128
+ Harness itself is fine — it does a plain HTTP fetch without a browser, works
129
+ for static pages, and is the fastest option for bulk.
130
+
131
+
132
+ ## Rate-limit discipline
133
+
134
+ FB notices automation patterns at the account level, not the IP level. Driving
135
+ a real logged-in session means Jay's account is the one getting rate-limited if
136
+ you get greedy. Keep these floors:
137
+
138
+ - **≥2 seconds between scrolls** in the collect loop (the `wait(2.5)` above)
139
+ - **≥3 seconds between groups** if you're sweeping multiple
140
+ - **No more than ~6 groups per hour** for sustained monitoring
141
+ - **Don't open the same group more than every 15 minutes** — repeated visits
142
+ within a short window is a heuristic that triggers checkpoints
143
+
144
+ Symptoms of over-pacing: article containers start rendering with empty bodies,
145
+ `/groups/{id}/` redirects to `/checkpoint/`, or the account briefly gets asked
146
+ to re-verify a phone or confirm a login from a new device. If that happens,
147
+ **stop immediately** and let Jay deal with the UI — don't try to auto-resolve.
148
+
149
+ ## Self-inspection block (run this when selectors stop working)
150
+
151
+ Paste this into a Harness stdin block to see what anchors currently exist in the
152
+ visible feed. Run it on a group you're a member of.
153
+
154
+ ```python
155
+ print(js("""
156
+ ({
157
+ articles: document.querySelectorAll('div[role="article"]').length,
158
+ body_preview_a: document.querySelectorAll('div[data-ad-preview="message"]').length,
159
+ body_preview_b: document.querySelectorAll('div[data-ad-comet-preview="message"]').length,
160
+ external_redirectors: document.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]').length,
161
+ permalink_posts: document.querySelectorAll('a[href*="/groups/"][href*="/posts/"]').length,
162
+ permalink_permalinks: document.querySelectorAll('a[href*="/groups/"][href*="/permalink/"]').length,
163
+ })
164
+ """))
165
+ # If any count is 0, the selector drifted. Open DevTools, right-click a visible
166
+ # post, inspect, find the new stable attribute (aria-*, data-*), and update the
167
+ # DOM anchors table above.
168
+ ```
169
+
170
+
171
+ ## Full example — mine one group, emit JSON for downstream tools
172
+
173
+ ```bash
174
+ cd ~/Developer/browser-harness && uv run browser-harness <<'PY'
175
+ import json, sys
176
+ from urllib.parse import urlparse, parse_qs, unquote
177
+
178
+ GROUP = "riceLakeBoating" # slug or numeric id
179
+ TARGET = 50 # how many posts to collect
180
+ MAX_SCROLLS = 30
181
+
182
+ goto_url(f"https://www.facebook.com/groups/{GROUP}/?sorting_setting=CHRONOLOGICAL")
183
+ wait_for_load()
184
+ wait(2)
185
+
186
+ # Abort if FB bounced us
187
+ info = page_info()
188
+ if "/checkpoint/" in info["url"] or "/login" in info["url"]:
189
+ sys.exit("AUTH_WALL — stop and have Jay re-verify the account.")
190
+
191
+ seen = {}
192
+ for _ in range(MAX_SCROLLS):
193
+ batch = js("""
194
+ Array.from(document.querySelectorAll('div[role="article"]')).map(el => {
195
+ const link = el.querySelector('a[href*="/groups/"][href*="/posts/"], a[href*="/groups/"][href*="/permalink/"]');
196
+ const body = el.querySelector('div[data-ad-preview="message"], div[data-ad-comet-preview="message"]');
197
+ const author = el.querySelector('h3 a, h4 a, strong a');
198
+ const time = el.querySelector('abbr, a[role="link"] > span > span');
199
+ const externals = Array.from(el.querySelectorAll('a[href^="https://l.facebook.com/l.php?u="]')).map(a => a.href);
200
+ return { url: link?.href, author: author?.innerText, time: time?.innerText,
201
+ body: body?.innerText?.slice(0, 4000), externals };
202
+ }).filter(p => p.url)
203
+ """) or []
204
+ for p in batch:
205
+ seen.setdefault(p["url"], p)
206
+ if len(seen) >= TARGET:
207
+ break
208
+ scroll(640, 400, dy=900)
209
+ wait(2.5)
210
+
211
+ def decode(u):
212
+ if not u.startswith("https://l.facebook.com/l.php"): return u
213
+ q = parse_qs(urlparse(u).query)
214
+ return unquote(q["u"][0]) if "u" in q else u
215
+
216
+ posts = list(seen.values())
217
+ all_externals = sorted({decode(x) for p in posts for x in p["externals"]})
218
+ capture_screenshot(f"/tmp/fb-group-{GROUP}.png", full=True)
219
+ print(json.dumps({
220
+ "group": GROUP,
221
+ "post_count": len(posts),
222
+ "posts": posts,
223
+ "external_urls": all_externals,
224
+ }, ensure_ascii=False))
225
+ PY
226
+ ```
227
+
228
+ The JSON on stdout is the handoff payload — parse it in the calling agent and
229
+ route `external_urls` into `firecrawl_extract` with whatever schema matches the
230
+ downstream task (competitor inventory, pricing intel, boat listings, etc).
231
+
232
+ ## Gotchas log (append when you hit something new)
233
+
234
+ - **2026-04-18:** Fresh install verified. People-search URL requires login;
235
+ page search `/search/pages/?q=` works the same way. Groups feed defaults to
236
+ algorithmic sort — always append `?sorting_setting=CHRONOLOGICAL`.