mindforge-cc 11.5.1 → 11.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/.agent/mindforge/skill-tdd.md +53 -0
  2. package/.agent/mindforge/skills-index.md +118 -0
  3. package/.agent/mindforge/systematic-debug.md +60 -0
  4. package/.agent/mindforge/wf-catalog.md +37 -0
  5. package/.agent/mindforge/wf-code-audit.md +31 -0
  6. package/.agent/mindforge/wf-competitive-analysis.md +31 -0
  7. package/.agent/mindforge/wf-deep-research.md +32 -0
  8. package/.agent/mindforge/wf-feature-planner.md +31 -0
  9. package/.agent/mindforge/wf-incident-response.md +31 -0
  10. package/.agent/mindforge/wf-onboard-codebase.md +31 -0
  11. package/.agent/mindforge/wf-perf-optimize.md +31 -0
  12. package/.agent/mindforge/wf-pr-review.md +31 -0
  13. package/.agent/mindforge/wf-refactor-plan.md +31 -0
  14. package/.agent/mindforge/wf-release-prep.md +31 -0
  15. package/.agent/mindforge/wf-tdd-sprint.md +31 -0
  16. package/.agent/mindforge/wf-tech-evaluation.md +31 -0
  17. package/.agent/skills/1password-skill/SKILL.md +156 -0
  18. package/.agent/skills/1password-skill/references/cli-examples.md +31 -0
  19. package/.agent/skills/1password-skill/references/get-started.md +21 -0
  20. package/.agent/skills/article-illustrator/SKILL.md +199 -0
  21. package/.agent/skills/article-illustrator/references/prompt-construction.md +426 -0
  22. package/.agent/skills/article-illustrator/references/style-presets.md +80 -0
  23. package/.agent/skills/article-illustrator/references/styles.md +224 -0
  24. package/.agent/skills/article-illustrator/references/usage.md +50 -0
  25. package/.agent/skills/article-illustrator/references/workflow.md +332 -0
  26. package/.agent/skills/arxiv/SKILL.md +275 -0
  27. package/.agent/skills/blogwatcher/SKILL.md +130 -0
  28. package/.agent/skills/code-wiki/SKILL.md +438 -0
  29. package/.agent/skills/code-wiki/templates/README.md +31 -0
  30. package/.agent/skills/code-wiki/templates/architecture.md +30 -0
  31. package/.agent/skills/code-wiki/templates/getting-started.md +47 -0
  32. package/.agent/skills/code-wiki/templates/module.md +38 -0
  33. package/.agent/skills/codebase-inspection/SKILL.md +109 -0
  34. package/.agent/skills/comic-creator/SKILL.md +240 -0
  35. package/.agent/skills/comic-creator/references/analysis-framework.md +176 -0
  36. package/.agent/skills/comic-creator/references/auto-selection.md +71 -0
  37. package/.agent/skills/comic-creator/references/base-prompt.md +98 -0
  38. package/.agent/skills/comic-creator/references/character-template.md +180 -0
  39. package/.agent/skills/comic-creator/references/ohmsha-guide.md +85 -0
  40. package/.agent/skills/comic-creator/references/partial-workflows.md +106 -0
  41. package/.agent/skills/comic-creator/references/storyboard-template.md +143 -0
  42. package/.agent/skills/comic-creator/references/workflow.md +401 -0
  43. package/.agent/skills/concept-diagrams/SKILL.md +355 -0
  44. package/.agent/skills/concept-diagrams/references/dashboard-patterns.md +43 -0
  45. package/.agent/skills/concept-diagrams/references/infrastructure-patterns.md +144 -0
  46. package/.agent/skills/concept-diagrams/references/physical-shape-cookbook.md +42 -0
  47. package/.agent/skills/creative-ideation/SKILL.md +144 -0
  48. package/.agent/skills/creative-ideation/references/full-prompt-library.md +110 -0
  49. package/.agent/skills/devops-cli/SKILL.md +149 -0
  50. package/.agent/skills/devops-cli/references/app-discovery.md +112 -0
  51. package/.agent/skills/devops-cli/references/authentication.md +59 -0
  52. package/.agent/skills/devops-cli/references/cli-reference.md +104 -0
  53. package/.agent/skills/devops-cli/references/running-apps.md +171 -0
  54. package/.agent/skills/devops-watchers/SKILL.md +103 -0
  55. package/.agent/skills/docker-management/SKILL.md +273 -0
  56. package/.agent/skills/domain-intel/SKILL.md +96 -0
  57. package/.agent/skills/duckduckgo-search/SKILL.md +230 -0
  58. package/.agent/skills/github-auth/SKILL.md +240 -0
  59. package/.agent/skills/github-code-review/SKILL.md +474 -0
  60. package/.agent/skills/github-code-review/references/review-output-template.md +74 -0
  61. package/.agent/skills/github-issues/SKILL.md +363 -0
  62. package/.agent/skills/github-issues/templates/bug-report.md +35 -0
  63. package/.agent/skills/github-issues/templates/feature-request.md +31 -0
  64. package/.agent/skills/github-pr-workflow/SKILL.md +360 -0
  65. package/.agent/skills/github-pr-workflow/references/ci-troubleshooting.md +183 -0
  66. package/.agent/skills/github-pr-workflow/references/conventional-commits.md +71 -0
  67. package/.agent/skills/github-pr-workflow/templates/pr-body-bugfix.md +35 -0
  68. package/.agent/skills/github-pr-workflow/templates/pr-body-feature.md +33 -0
  69. package/.agent/skills/github-repo-management/SKILL.md +509 -0
  70. package/.agent/skills/github-repo-management/references/github-api-cheatsheet.md +161 -0
  71. package/.agent/skills/godmode/SKILL.md +396 -0
  72. package/.agent/skills/godmode/references/jailbreak-templates.md +128 -0
  73. package/.agent/skills/godmode/references/refusal-detection.md +142 -0
  74. package/.agent/skills/hyperframes/SKILL.md +182 -0
  75. package/.agent/skills/hyperframes/references/cli.md +185 -0
  76. package/.agent/skills/hyperframes/references/composition.md +129 -0
  77. package/.agent/skills/hyperframes/references/features.md +289 -0
  78. package/.agent/skills/hyperframes/references/gsap.md +136 -0
  79. package/.agent/skills/hyperframes/references/troubleshooting.md +137 -0
  80. package/.agent/skills/hyperframes/references/website-to-video.md +145 -0
  81. package/.agent/skills/jupyter-live-kernel/SKILL.md +160 -0
  82. package/.agent/skills/kanban-orchestrator/SKILL.md +209 -0
  83. package/.agent/skills/kanban-worker/SKILL.md +188 -0
  84. package/.agent/skills/llm-wiki/SKILL.md +499 -0
  85. package/.agent/skills/meme-generation/SKILL.md +122 -0
  86. package/.agent/skills/node-inspect-debugger/SKILL.md +312 -0
  87. package/.agent/skills/obsidian/SKILL.md +60 -0
  88. package/.agent/skills/osint-investigation/SKILL.md +269 -0
  89. package/.agent/skills/osint-investigation/templates/source-template.md +59 -0
  90. package/.agent/skills/oss-forensics/SKILL.md +422 -0
  91. package/.agent/skills/oss-forensics/references/evidence-types.md +89 -0
  92. package/.agent/skills/oss-forensics/references/github-archive-guide.md +184 -0
  93. package/.agent/skills/oss-forensics/references/investigation-templates.md +131 -0
  94. package/.agent/skills/oss-forensics/references/recovery-techniques.md +164 -0
  95. package/.agent/skills/oss-forensics/templates/forensic-report.md +151 -0
  96. package/.agent/skills/oss-forensics/templates/malicious-package-report.md +43 -0
  97. package/.agent/skills/parallel-cli/SKILL.md +384 -0
  98. package/.agent/skills/pinggy-tunnel/SKILL.md +302 -0
  99. package/.agent/skills/pixel-art/SKILL.md +209 -0
  100. package/.agent/skills/pixel-art/references/palettes.md +49 -0
  101. package/.agent/skills/plan/SKILL.md +331 -0
  102. package/.agent/skills/polymarket/SKILL.md +75 -0
  103. package/.agent/skills/polymarket/references/api-endpoints.md +220 -0
  104. package/.agent/skills/python-debugpy/SKILL.md +368 -0
  105. package/.agent/skills/requesting-code-review/SKILL.md +273 -0
  106. package/.agent/skills/research-paper-writing/SKILL.md +2367 -0
  107. package/.agent/skills/research-paper-writing/references/autoreason-methodology.md +394 -0
  108. package/.agent/skills/research-paper-writing/references/checklists.md +434 -0
  109. package/.agent/skills/research-paper-writing/references/citation-workflow.md +563 -0
  110. package/.agent/skills/research-paper-writing/references/experiment-patterns.md +728 -0
  111. package/.agent/skills/research-paper-writing/references/human-evaluation.md +476 -0
  112. package/.agent/skills/research-paper-writing/references/paper-types.md +481 -0
  113. package/.agent/skills/research-paper-writing/references/reviewer-guidelines.md +433 -0
  114. package/.agent/skills/research-paper-writing/references/sources.md +191 -0
  115. package/.agent/skills/research-paper-writing/references/writing-guide.md +474 -0
  116. package/.agent/skills/research-paper-writing/templates/README.md +251 -0
  117. package/.agent/skills/rest-graphql-debug/SKILL.md +507 -0
  118. package/.agent/skills/s6-container-supervision/SKILL.md +171 -0
  119. package/.agent/skills/scrapling/SKILL.md +328 -0
  120. package/.agent/skills/sherlock/SKILL.md +186 -0
  121. package/.agent/skills/simplify-code/SKILL.md +168 -0
  122. package/.agent/skills/skill-authoring/SKILL.md +158 -0
  123. package/.agent/skills/spike/SKILL.md +190 -0
  124. package/.agent/skills/subagent-driven-development/SKILL.md +345 -0
  125. package/.agent/skills/subagent-driven-development/references/context-budget-discipline.md +53 -0
  126. package/.agent/skills/subagent-driven-development/references/gates-taxonomy.md +93 -0
  127. package/.agent/skills/systematic-debugging/SKILL.md +360 -0
  128. package/.agent/skills/test-driven-development/SKILL.md +336 -0
  129. package/.agent/skills/video-orchestrator/SKILL.md +194 -0
  130. package/.agent/skills/video-orchestrator/references/examples.md +227 -0
  131. package/.agent/skills/video-orchestrator/references/intake.md +166 -0
  132. package/.agent/skills/video-orchestrator/references/kanban-setup.md +278 -0
  133. package/.agent/skills/video-orchestrator/references/monitoring.md +180 -0
  134. package/.agent/skills/video-orchestrator/references/role-archetypes.md +298 -0
  135. package/.agent/skills/video-orchestrator/references/tool-matrix.md +317 -0
  136. package/.agent/skills/web-pentest/SKILL.md +332 -0
  137. package/.agent/skills/web-pentest/references/bypass-techniques.md +133 -0
  138. package/.agent/skills/web-pentest/references/exploitation-techniques.md +204 -0
  139. package/.agent/skills/web-pentest/references/scope-enforcement.md +110 -0
  140. package/.agent/skills/web-pentest/references/vuln-taxonomy.md +81 -0
  141. package/.agent/skills/web-pentest/templates/authorization.md +69 -0
  142. package/.agent/skills/web-pentest/templates/pentest-report.md +178 -0
  143. package/.claude/commands/mindforge/skill-tdd.md +53 -0
  144. package/.claude/commands/mindforge/skills-index.md +118 -0
  145. package/.claude/commands/mindforge/systematic-debug.md +60 -0
  146. package/.claude/commands/mindforge/wf-catalog.md +37 -0
  147. package/.claude/commands/mindforge/wf-code-audit.md +31 -0
  148. package/.claude/commands/mindforge/wf-competitive-analysis.md +31 -0
  149. package/.claude/commands/mindforge/wf-deep-research.md +32 -0
  150. package/.claude/commands/mindforge/wf-feature-planner.md +31 -0
  151. package/.claude/commands/mindforge/wf-incident-response.md +31 -0
  152. package/.claude/commands/mindforge/wf-onboard-codebase.md +31 -0
  153. package/.claude/commands/mindforge/wf-perf-optimize.md +31 -0
  154. package/.claude/commands/mindforge/wf-pr-review.md +31 -0
  155. package/.claude/commands/mindforge/wf-refactor-plan.md +31 -0
  156. package/.claude/commands/mindforge/wf-release-prep.md +31 -0
  157. package/.claude/commands/mindforge/wf-tdd-sprint.md +31 -0
  158. package/.claude/commands/mindforge/wf-tech-evaluation.md +31 -0
  159. package/.mindforge/config.json +2 -2
  160. package/.mindforge/dynamic-workflows/REGISTRY.md +65 -0
  161. package/.mindforge/dynamic-workflows/index.json +171 -0
  162. package/.mindforge/dynamic-workflows/scripts/code-audit.js +103 -0
  163. package/.mindforge/dynamic-workflows/scripts/competitive-analysis.js +85 -0
  164. package/.mindforge/dynamic-workflows/scripts/deep-research.js +151 -0
  165. package/.mindforge/dynamic-workflows/scripts/feature-planner.js +104 -0
  166. package/.mindforge/dynamic-workflows/scripts/incident-response.js +106 -0
  167. package/.mindforge/dynamic-workflows/scripts/onboard-codebase.js +102 -0
  168. package/.mindforge/dynamic-workflows/scripts/perf-optimize.js +128 -0
  169. package/.mindforge/dynamic-workflows/scripts/pr-review.js +87 -0
  170. package/.mindforge/dynamic-workflows/scripts/refactor-plan.js +121 -0
  171. package/.mindforge/dynamic-workflows/scripts/release-prep.js +102 -0
  172. package/.mindforge/dynamic-workflows/scripts/tdd-sprint.js +103 -0
  173. package/.mindforge/dynamic-workflows/scripts/tech-evaluation.js +72 -0
  174. package/.mindforge/memory/sync-manifest.json +1 -1
  175. package/.mindforge/skills/arxiv/SKILL.md +294 -0
  176. package/.mindforge/skills/blogwatcher/SKILL.md +147 -0
  177. package/.mindforge/skills/code-wiki/SKILL.md +457 -0
  178. package/.mindforge/skills/codebase-inspection/SKILL.md +126 -0
  179. package/.mindforge/skills/concept-diagrams/SKILL.md +373 -0
  180. package/.mindforge/skills/creative-ideation/SKILL.md +162 -0
  181. package/.mindforge/skills/domain-intel/SKILL.md +116 -0
  182. package/.mindforge/skills/duckduckgo-search/SKILL.md +249 -0
  183. package/.mindforge/skills/github-code-review/SKILL.md +493 -0
  184. package/.mindforge/skills/github-issues/SKILL.md +382 -0
  185. package/.mindforge/skills/github-pr-workflow/SKILL.md +379 -0
  186. package/.mindforge/skills/jupyter-live-kernel/SKILL.md +179 -0
  187. package/.mindforge/skills/kanban-orchestrator/SKILL.md +227 -0
  188. package/.mindforge/skills/kanban-worker/SKILL.md +206 -0
  189. package/.mindforge/skills/meme-generation/SKILL.md +141 -0
  190. package/.mindforge/skills/obsidian/SKILL.md +80 -0
  191. package/.mindforge/skills/osint-investigation/SKILL.md +288 -0
  192. package/.mindforge/skills/oss-forensics/SKILL.md +421 -0
  193. package/.mindforge/skills/pixel-art/SKILL.md +228 -0
  194. package/.mindforge/skills/plan/SKILL.md +350 -0
  195. package/.mindforge/skills/requesting-code-review/SKILL.md +292 -0
  196. package/.mindforge/skills/research-paper-writing/SKILL.md +2384 -0
  197. package/.mindforge/skills/scrapling/SKILL.md +345 -0
  198. package/.mindforge/skills/sherlock/SKILL.md +203 -0
  199. package/.mindforge/skills/simplify-code/SKILL.md +187 -0
  200. package/.mindforge/skills/spike/SKILL.md +209 -0
  201. package/.mindforge/skills/subagent-driven-development/SKILL.md +364 -0
  202. package/.mindforge/skills/systematic-debugging/SKILL.md +379 -0
  203. package/.mindforge/skills/test-driven-development/SKILL.md +355 -0
  204. package/.mindforge/skills/web-pentest/SKILL.md +327 -0
  205. package/CHANGELOG.md +71 -0
  206. package/MINDFORGE.md +2 -2
  207. package/README.md +72 -3
  208. package/RELEASENOTES.md +109 -0
  209. package/bin/installer-core.js +6 -2
  210. package/bin/mindforge-cli.js +7 -0
  211. package/bin/workflows/workflow-runner.js +110 -0
  212. package/docs/commands-reference.md +25 -0
  213. package/docs/getting-started.md +42 -5
  214. package/package.json +2 -1
@@ -0,0 +1,328 @@
1
+ ---
2
+ name: scrapling
3
+ description: Web scraping with Scrapling - HTTP fetching, stealth browser automation, Cloudflare bypass, and spider crawling via CLI and Python.
4
+ version: 1.0.0
5
+ prerequisites:
6
+ commands: [scrapling, python]
7
+ ---
8
+
9
+ # Scrapling
10
+
11
+ [Scrapling](https://github.com/D4Vinci/Scrapling) is a web scraping framework with anti-bot bypass, stealth browser automation, and a spider framework. It provides three fetching strategies (HTTP, dynamic JS, stealth/Cloudflare) and a full CLI.
12
+
13
+ **This skill is for educational and research purposes only.** Users must comply with local/international data scraping laws and respect website Terms of Service.
14
+
15
+ ## When to Use
16
+
17
+ - Scraping static HTML pages (faster than browser tools)
18
+ - Scraping JS-rendered pages that need a real browser
19
+ - Bypassing Cloudflare Turnstile or bot detection
20
+ - Crawling multiple pages with a spider
21
+ - When the built-in `web_extract` tool does not return the data you need
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ pip install "scrapling[all]"
27
+ scrapling install
28
+ ```
29
+
30
+ Minimal install (HTTP only, no browser):
31
+ ```bash
32
+ pip install scrapling
33
+ ```
34
+
35
+ With browser automation only:
36
+ ```bash
37
+ pip install "scrapling[fetchers]"
38
+ scrapling install
39
+ ```
40
+
41
+ ## Quick Reference
42
+
43
+ | Approach | Class | Use When |
44
+ |----------|-------|----------|
45
+ | HTTP | `Fetcher` / `FetcherSession` | Static pages, APIs, fast bulk requests |
46
+ | Dynamic | `DynamicFetcher` / `DynamicSession` | JS-rendered content, SPAs |
47
+ | Stealth | `StealthyFetcher` / `StealthySession` | Cloudflare, anti-bot protected sites |
48
+ | Spider | `Spider` | Multi-page crawling with link following |
49
+
50
+ ## CLI Usage
51
+
52
+ ### Extract Static Page
53
+
54
+ ```bash
55
+ scrapling extract get 'https://example.com' output.md
56
+ ```
57
+
58
+ With CSS selector and browser impersonation:
59
+
60
+ ```bash
61
+ scrapling extract get 'https://example.com' output.md \
62
+ --css-selector '.content' \
63
+ --impersonate 'chrome'
64
+ ```
65
+
66
+ ### Extract JS-Rendered Page
67
+
68
+ ```bash
69
+ scrapling extract fetch 'https://example.com' output.md \
70
+ --css-selector '.dynamic-content' \
71
+ --disable-resources \
72
+ --network-idle
73
+ ```
74
+
75
+ ### Extract Cloudflare-Protected Page
76
+
77
+ ```bash
78
+ scrapling extract stealthy-fetch 'https://protected-site.com' output.html \
79
+ --solve-cloudflare \
80
+ --block-webrtc \
81
+ --hide-canvas
82
+ ```
83
+
84
+ ### POST Request
85
+
86
+ ```bash
87
+ scrapling extract post 'https://example.com/api' output.json \
88
+ --json '{"query": "search term"}'
89
+ ```
90
+
91
+ ### Output Formats
92
+
93
+ The output format is determined by the file extension:
94
+ - `.html` -- raw HTML
95
+ - `.md` -- converted to Markdown
96
+ - `.txt` -- plain text
97
+ - `.json` / `.jsonl` -- JSON
98
+
99
+ ## Python: HTTP Scraping
100
+
101
+ ### Single Request
102
+
103
+ ```python
104
+ from scrapling.fetchers import Fetcher
105
+
106
+ page = Fetcher.get('https://quotes.toscrape.com/')
107
+ quotes = page.css('.quote .text::text').getall()
108
+ for q in quotes:
109
+ print(q)
110
+ ```
111
+
112
+ ### Session (Persistent Cookies)
113
+
114
+ ```python
115
+ from scrapling.fetchers import FetcherSession
116
+
117
+ with FetcherSession(impersonate='chrome') as session:
118
+ page = session.get('https://example.com/', stealthy_headers=True)
119
+ links = page.css('a::attr(href)').getall()
120
+ for link in links[:5]:
121
+ sub = session.get(link)
122
+ print(sub.css('h1::text').get())
123
+ ```
124
+
125
+ ### POST / PUT / DELETE
126
+
127
+ ```python
128
+ page = Fetcher.post('https://api.example.com/data', json={"key": "value"})
129
+ page = Fetcher.put('https://api.example.com/item/1', data={"name": "updated"})
130
+ page = Fetcher.delete('https://api.example.com/item/1')
131
+ ```
132
+
133
+ ### With Proxy
134
+
135
+ ```python
136
+ page = Fetcher.get('https://example.com', proxy='http://user:pass@proxy:8080')
137
+ ```
138
+
139
+ ## Python: Dynamic Pages (JS-Rendered)
140
+
141
+ For pages that require JavaScript execution (SPAs, lazy-loaded content):
142
+
143
+ ```python
144
+ from scrapling.fetchers import DynamicFetcher
145
+
146
+ page = DynamicFetcher.fetch('https://example.com', headless=True)
147
+ data = page.css('.js-loaded-content::text').getall()
148
+ ```
149
+
150
+ ### Wait for Specific Element
151
+
152
+ ```python
153
+ page = DynamicFetcher.fetch(
154
+ 'https://example.com',
155
+ wait_selector=('.results', 'visible'),
156
+ network_idle=True,
157
+ )
158
+ ```
159
+
160
+ ### Disable Resources for Speed
161
+
162
+ Blocks fonts, images, media, stylesheets (~25% faster):
163
+
164
+ ```python
165
+ from scrapling.fetchers import DynamicSession
166
+
167
+ with DynamicSession(headless=True, disable_resources=True, network_idle=True) as session:
168
+ page = session.fetch('https://example.com')
169
+ items = page.css('.item::text').getall()
170
+ ```
171
+
172
+ ### Custom Page Automation
173
+
174
+ ```python
175
+ from playwright.sync_api import Page
176
+ from scrapling.fetchers import DynamicFetcher
177
+
178
+ def scroll_and_click(page: Page):
179
+ page.mouse.wheel(0, 3000)
180
+ page.wait_for_timeout(1000)
181
+ page.click('button.load-more')
182
+ page.wait_for_selector('.extra-results')
183
+
184
+ page = DynamicFetcher.fetch('https://example.com', page_action=scroll_and_click)
185
+ results = page.css('.extra-results .item::text').getall()
186
+ ```
187
+
188
+ ## Python: Stealth Mode (Anti-Bot Bypass)
189
+
190
+ For Cloudflare-protected or heavily fingerprinted sites:
191
+
192
+ ```python
193
+ from scrapling.fetchers import StealthyFetcher
194
+
195
+ page = StealthyFetcher.fetch(
196
+ 'https://protected-site.com',
197
+ headless=True,
198
+ solve_cloudflare=True,
199
+ block_webrtc=True,
200
+ hide_canvas=True,
201
+ )
202
+ content = page.css('.protected-content::text').getall()
203
+ ```
204
+
205
+ ### Stealth Session
206
+
207
+ ```python
208
+ from scrapling.fetchers import StealthySession
209
+
210
+ with StealthySession(headless=True, solve_cloudflare=True) as session:
211
+ page1 = session.fetch('https://protected-site.com/page1')
212
+ page2 = session.fetch('https://protected-site.com/page2')
213
+ ```
214
+
215
+ ## Element Selection
216
+
217
+ All fetchers return a `Selector` object with these methods:
218
+
219
+ ### CSS Selectors
220
+
221
+ ```python
222
+ page.css('h1::text').get() # First h1 text
223
+ page.css('a::attr(href)').getall() # All link hrefs
224
+ page.css('.quote .text::text').getall() # Nested selection
225
+ ```
226
+
227
+ ### XPath
228
+
229
+ ```python
230
+ page.xpath('//div[@class="content"]/text()').getall()
231
+ page.xpath('//a/@href').getall()
232
+ ```
233
+
234
+ ### Find Methods
235
+
236
+ ```python
237
+ page.find_all('div', class_='quote') # By tag + attribute
238
+ page.find_by_text('Read more', tag='a') # By text content
239
+ page.find_by_regex(r'\$\d+\.\d{2}') # By regex pattern
240
+ ```
241
+
242
+ ### Similar Elements
243
+
244
+ Find elements with similar structure (useful for product listings, etc.):
245
+
246
+ ```python
247
+ first_product = page.css('.product')[0]
248
+ all_similar = first_product.find_similar()
249
+ ```
250
+
251
+ ### Navigation
252
+
253
+ ```python
254
+ el = page.css('.target')[0]
255
+ el.parent # Parent element
256
+ el.children # Child elements
257
+ el.next_sibling # Next sibling
258
+ el.prev_sibling # Previous sibling
259
+ ```
260
+
261
+ ## Python: Spider Framework
262
+
263
+ For multi-page crawling with link following:
264
+
265
+ ```python
266
+ from scrapling.spiders import Spider, Request, Response
267
+
268
+ class QuotesSpider(Spider):
269
+ name = "quotes"
270
+ start_urls = ["https://quotes.toscrape.com/"]
271
+ concurrent_requests = 10
272
+ download_delay = 1
273
+
274
+ async def parse(self, response: Response):
275
+ for quote in response.css('.quote'):
276
+ yield {
277
+ "text": quote.css('.text::text').get(),
278
+ "author": quote.css('.author::text').get(),
279
+ "tags": quote.css('.tag::text').getall(),
280
+ }
281
+
282
+ next_page = response.css('.next a::attr(href)').get()
283
+ if next_page:
284
+ yield response.follow(next_page)
285
+
286
+ result = QuotesSpider().start()
287
+ print(f"Scraped {len(result.items)} quotes")
288
+ result.items.to_json("quotes.json")
289
+ ```
290
+
291
+ ### Multi-Session Spider
292
+
293
+ Route requests to different fetcher types:
294
+
295
+ ```python
296
+ from scrapling.fetchers import FetcherSession, AsyncStealthySession
297
+
298
+ class SmartSpider(Spider):
299
+ name = "smart"
300
+ start_urls = ["https://example.com/"]
301
+
302
+ def configure_sessions(self, manager):
303
+ manager.add("fast", FetcherSession(impersonate="chrome"))
304
+ manager.add("stealth", AsyncStealthySession(headless=True), lazy=True)
305
+
306
+ async def parse(self, response: Response):
307
+ for link in response.css('a::attr(href)').getall():
308
+ if "protected" in link:
309
+ yield Request(link, sid="stealth")
310
+ else:
311
+ yield Request(link, sid="fast", callback=self.parse)
312
+ ```
313
+
314
+ ### Pause/Resume Crawling
315
+
316
+ ```python
317
+ spider = QuotesSpider(crawldir="./crawl_checkpoint")
318
+ spider.start() # Ctrl+C to pause, re-run to resume from checkpoint
319
+ ```
320
+
321
+ ## Pitfalls
322
+
323
+ - **Browser install required**: run `scrapling install` after pip install -- without it, `DynamicFetcher` and `StealthyFetcher` will fail
324
+ - **Timeouts**: DynamicFetcher/StealthyFetcher timeout is in **milliseconds** (default 30000), Fetcher timeout is in **seconds**
325
+ - **Cloudflare bypass**: `solve_cloudflare=True` adds 5-15 seconds to fetch time -- only enable when needed
326
+ - **Resource usage**: StealthyFetcher runs a real browser -- limit concurrent usage
327
+ - **Legal**: always check robots.txt and website ToS before scraping. This library is for educational and research purposes
328
+ - **Python version**: requires Python 3.10+
@@ -0,0 +1,186 @@
1
+ ---
2
+ name: sherlock
3
+ description: OSINT username search across 400+ social networks. Hunt down social media accounts by username.
4
+ version: 1.0.0
5
+ prerequisites:
6
+ commands: [sherlock]
7
+ ---
8
+
9
+ # Sherlock OSINT Username Search
10
+
11
+ Hunt down social media accounts by username across 400+ social networks using the [Sherlock Project](https://github.com/sherlock-project/sherlock).
12
+
13
+ ## When to Use
14
+
15
+ - User asks to find accounts associated with a username
16
+ - User wants to check username availability across platforms
17
+ - User is conducting OSINT or reconnaissance research
18
+ - User asks "where is this username registered?" or similar
19
+
20
+ ## Requirements
21
+
22
+ - Sherlock CLI installed: `pipx install sherlock-project` or `pip install sherlock-project`
23
+ - Alternatively: Docker available (`docker run -it --rm sherlock/sherlock`)
24
+ - Network access to query social platforms
25
+
26
+ ## Procedure
27
+
28
+ ### 1. Check if Sherlock is Installed
29
+
30
+ **Before doing anything else**, verify sherlock is available:
31
+
32
+ ```bash
33
+ sherlock --version
34
+ ```
35
+
36
+ If the command fails:
37
+ - Offer to install: `pipx install sherlock-project` (recommended) or `pip install sherlock-project`
38
+ - **Do NOT** try multiple installation methods — pick one and proceed
39
+ - If installation fails, inform the user and stop
40
+
41
+ ### 2. Extract Username
42
+
43
+ **Extract the username directly from the user's message if clearly stated.**
44
+
45
+ Examples where you should **NOT** use clarify:
46
+ - "Find accounts for nasa" → username is `nasa`
47
+ - "Search for johndoe123" → username is `johndoe123`
48
+ - "Check if alice exists on social media" → username is `alice`
49
+ - "Look up user bob on social networks" → username is `bob`
50
+
51
+ **Only use clarify if:**
52
+ - Multiple potential usernames mentioned ("search for alice or bob")
53
+ - Ambiguous phrasing ("search for my username" without specifying)
54
+ - No username mentioned at all ("do an OSINT search")
55
+
56
+ When extracting, take the **exact** username as stated — preserve case, numbers, underscores, etc.
57
+
58
+ ### 3. Build Command
59
+
60
+ **Default command** (use this unless user specifically requests otherwise):
61
+ ```bash
62
+ sherlock --print-found --no-color "<username>" --timeout 90
63
+ ```
64
+
65
+ **Optional flags** (only add if user explicitly requests):
66
+ - `--nsfw` — Include NSFW sites (only if user asks)
67
+ - `--tor` — Route through Tor (only if user asks for anonymity)
68
+
69
+ **Do NOT ask about options via clarify** — just run the default search. Users can request specific options if needed.
70
+
71
+ ### 4. Execute Search
72
+
73
+ Run via the `terminal` tool. The command typically takes 30-120 seconds depending on network conditions and site count.
74
+
75
+ **Example terminal call:**
76
+ ```json
77
+ {
78
+ "command": "sherlock --print-found --no-color \"target_username\"",
79
+ "timeout": 180
80
+ }
81
+ ```
82
+
83
+ ### 5. Parse and Present Results
84
+
85
+ Sherlock outputs found accounts in a simple format. Parse the output and present:
86
+
87
+ 1. **Summary line:** "Found X accounts for username 'Y'"
88
+ 2. **Categorized links:** Group by platform type if helpful (social, professional, forums, etc.)
89
+ 3. **Output file location:** Sherlock saves results to `<username>.txt` by default
90
+
91
+ **Example output parsing:**
92
+ ```
93
+ [+] Instagram: https://instagram.com/username
94
+ [+] Twitter: https://twitter.com/username
95
+ [+] GitHub: https://github.com/username
96
+ ```
97
+
98
+ Present findings as clickable links when possible.
99
+
100
+ ## Pitfalls
101
+
102
+ ### No Results Found
103
+ If Sherlock finds no accounts, this is often correct — the username may not be registered on checked platforms. Suggest:
104
+ - Checking spelling/variation
105
+ - Trying similar usernames with `?` wildcard: `sherlock "user?name"`
106
+ - The user may have privacy settings or deleted accounts
107
+
108
+ ### Timeout Issues
109
+ Some sites are slow or block automated requests. Use `--timeout 120` to increase wait time, or `--site` to limit scope.
110
+
111
+ ### Tor Configuration
112
+ `--tor` requires Tor daemon running. If user wants anonymity but Tor isn't available, suggest:
113
+ - Installing Tor service
114
+ - Using `--proxy` with an alternative proxy
115
+
116
+ ### False Positives
117
+ Some sites always return "found" due to their response structure. Cross-reference unexpected results with manual checks.
118
+
119
+ ### Rate Limiting
120
+ Aggressive searches may trigger rate limits. For bulk username searches, add delays between calls or use `--local` with cached data.
121
+
122
+ ## Installation
123
+
124
+ ### pipx (recommended)
125
+ ```bash
126
+ pipx install sherlock-project
127
+ ```
128
+
129
+ ### pip
130
+ ```bash
131
+ pip install sherlock-project
132
+ ```
133
+
134
+ ### Docker
135
+ ```bash
136
+ docker pull sherlock/sherlock
137
+ docker run -it --rm sherlock/sherlock <username>
138
+ ```
139
+
140
+ ### Linux packages
141
+ Available on Debian 13+, Ubuntu 22.10+, Homebrew, Kali, BlackArch.
142
+
143
+ ## Ethical Use
144
+
145
+ This tool is for legitimate OSINT and research purposes only. Remind users:
146
+ - Only search usernames they own or have permission to investigate
147
+ - Respect platform terms of service
148
+ - Do not use for harassment, stalking, or illegal activities
149
+ - Consider privacy implications before sharing results
150
+
151
+ ## Verification
152
+
153
+ After running sherlock, verify:
154
+ 1. Output lists found sites with URLs
155
+ 2. `<username>.txt` file created (default output) if using file output
156
+ 3. If `--print-found` used, output should only contain `[+]` lines for matches
157
+
158
+ ## Example Interaction
159
+
160
+ **User:** "Can you check if the username 'johndoe123' exists on social media?"
161
+
162
+ **Agent procedure:**
163
+ 1. Check `sherlock --version` (verify installed)
164
+ 2. Username provided — proceed directly
165
+ 3. Run: `sherlock --print-found --no-color "johndoe123" --timeout 90`
166
+ 4. Parse output and present links
167
+
168
+ **Response format:**
169
+ > Found 12 accounts for username 'johndoe123':
170
+ >
171
+ > • https://twitter.com/johndoe123
172
+ > • https://github.com/johndoe123
173
+ > • https://instagram.com/johndoe123
174
+ > • [... additional links]
175
+ >
176
+ > Results saved to: johndoe123.txt
177
+
178
+ ---
179
+
180
+ **User:** "Search for username 'alice' including NSFW sites"
181
+
182
+ **Agent procedure:**
183
+ 1. Check sherlock installed
184
+ 2. Username + NSFW flag both provided
185
+ 3. Run: `sherlock --print-found --no-color --nsfw "alice" --timeout 90`
186
+ 4. Present results
@@ -0,0 +1,168 @@
1
+ ---
2
+ name: simplify-code
3
+ description: "Parallel 3-agent cleanup of recent code changes."
4
+ version: 1.0.0
5
+ ---
6
+
7
+ # Simplify Code — Parallel Review & Cleanup
8
+
9
+ Review your recent code changes with three focused reviewers running in
10
+ parallel, aggregate their findings, and apply the fixes worth applying.
11
+
12
+ **Core principle:** Three narrow reviewers beat one broad reviewer. Each one
13
+ deeply searches the codebase for a single class of problem — reuse, quality,
14
+ efficiency — without diluting its attention across all three. They run
15
+ concurrently, so you pay the latency of one review, not three.
16
+
17
+ ## When to Use
18
+
19
+ Trigger this skill when the user says any of:
20
+
21
+ - "simplify" / "simplify my changes" / "simplify these changes"
22
+ - "review my code" / "review my recent changes" / "clean up my changes"
23
+ - "/simplify" (if they're carrying the Claude Code habit over)
24
+
25
+ Optional modifiers the user may add — honor them:
26
+
27
+ - **Focus:** "simplify focus on efficiency" → run only the efficiency reviewer
28
+ (or weight the aggregation toward it). Recognized focuses: `reuse`,
29
+ `quality`, `efficiency`.
30
+ - **Dry run:** "simplify but don't change anything" / "just report" → run the
31
+ three reviewers, present findings, apply NOTHING. Ask before applying.
32
+ - **Scope:** "simplify the last commit" / "simplify staged" / "simplify
33
+ src/foo.py" → narrow the diff source accordingly (see Phase 1).
34
+
35
+ Do NOT auto-run this after every edit. It costs three subagents' worth of
36
+ tokens — invoke it only when the user explicitly asks.
37
+
38
+ ## The Process
39
+
40
+ ### Phase 1 — Identify the changes
41
+
42
+ Capture the diff to review. Pick the source by what the user asked for, in
43
+ this default order:
44
+
45
+ ```bash
46
+ # 1. Default: uncommitted working-tree changes (tracked files)
47
+ git diff
48
+
49
+ # 2. If that's empty, include staged changes
50
+ git diff HEAD
51
+
52
+ # 3. Scoped variants the user may request:
53
+ git diff --staged # "staged changes"
54
+ git diff HEAD~1 # "the last commit"
55
+ git diff main...HEAD # "this branch" / "my PR"
56
+ git diff -- src/foo.py # specific file(s)
57
+ ```
58
+
59
+ If `git diff` and `git diff HEAD` are both empty and there's no git repo or no
60
+ changes, fall back to the files the user explicitly named or that were
61
+ recently created/edited in this session. If you genuinely can't find any
62
+ changed code, say so and stop — there's nothing to simplify.
63
+
64
+ Capture the full diff text. Note its size: if it's very large (say >2000
65
+ changed lines), warn the user that three subagents each carrying the full diff
66
+ will be token-heavy, and offer to scope it down (per-directory, per-commit)
67
+ before proceeding.
68
+
69
+ ### Phase 2 — Launch three reviewers in parallel
70
+
71
+ Use `delegate_task` **batch mode** — pass all three tasks in one `tasks`
72
+ array so they run concurrently. Three is the right fan-out for this pattern;
73
+ it's well within the `delegation.max_concurrent_children` budget on any
74
+ default install.
75
+
76
+ Give **every** reviewer the **complete diff** (not fragments — cross-file
77
+ issues hide in the gaps) plus the absolute repo path so they can search the
78
+ wider codebase. Each reviewer gets `terminal`, `file`, and `search`
79
+ toolsets (so they can `git`, `read_file`, and `search_files`/grep).
80
+
81
+ Tell each reviewer to:
82
+ - Search the existing codebase for evidence (don't reason from the diff alone).
83
+ - Report findings as a concrete list: `file:line → problem → suggested fix`.
84
+ - Rank each finding `high` / `medium` / `low` confidence.
85
+ - Skip nits and style-only churn. Only flag things that materially improve
86
+ the code.
87
+
88
+ Pass these three goals (drop any the user's focus excludes):
89
+
90
+ **Reviewer 1 — Code Reuse**
91
+ > Review this diff for code that duplicates functionality already in the
92
+ > codebase. Search utility modules, shared helpers, and adjacent files
93
+ > (use search_files / grep) for existing functions, constants, or patterns
94
+ > the new code could call instead of reimplementing. Flag: new functions
95
+ > that duplicate existing ones; hand-rolled logic that an existing utility
96
+ > already does (manual string/path manipulation, custom env checks, ad-hoc
97
+ > type guards, re-implemented parsing). For each, name the existing thing to
98
+ > use and where it lives.
99
+
100
+ **Reviewer 2 — Code Quality**
101
+ > Review this diff for quality problems. Look for: redundant state (values
102
+ > that duplicate or could be derived from existing state; caches that don't
103
+ > need to exist); parameter sprawl (new params bolted on where the function
104
+ > should have been restructured); copy-paste-with-variation (near-duplicate
105
+ > blocks that should share an abstraction); leaky abstractions (exposing
106
+ > internals, breaking an existing encapsulation boundary); stringly-typed
107
+ > code (raw strings where a constant/enum/registry already exists — check the
108
+ > canonical registries before flagging). For each, give the concrete refactor.
109
+
110
+ **Reviewer 3 — Efficiency**
111
+ > Review this diff for efficiency problems. Look for: unnecessary work
112
+ > (redundant computation, repeated file reads, duplicate API calls, N+1
113
+ > access patterns); missed concurrency (independent ops run sequentially);
114
+ > hot-path bloat (heavy/blocking work on startup or per-request paths);
115
+ > TOCTOU anti-patterns (existence pre-checks before an op instead of doing
116
+ > the op and handling the error); memory issues (unbounded growth, missing
117
+ > cleanup, listener/handle leaks); overly broad reads (loading whole files
118
+ > when a slice would do). For each, give the concrete fix and why it's faster
119
+ > or lighter.
120
+
121
+ ### Phase 3 — Aggregate and apply
122
+
123
+ Wait for all three to return (batch mode returns them together).
124
+
125
+ 1. **Merge** the findings into one list, deduping where reviewers overlap.
126
+ 2. **Discard false positives** — you have the most context; you don't have to
127
+ argue with a reviewer, just drop weak or wrong suggestions silently.
128
+ 3. **Resolve conflicts.** Reviewers can disagree (Reviewer 1: "use existing
129
+ util X"; Reviewer 3: "X is slow, inline it"). Default resolution order:
130
+ **correctness > the user's stated focus > readability/reuse > micro-perf.**
131
+ Don't apply a perf "fix" that hurts clarity unless the path is genuinely
132
+ hot. When two suggestions are mutually exclusive and both defensible, pick
133
+ the one that touches less code and note the alternative.
134
+ 4. **Apply** the surviving fixes directly with `patch` / `write_file` — unless
135
+ the user asked for a dry run, in which case present the list and ask first.
136
+ 5. **Verify** you didn't break anything: run the project's targeted tests for
137
+ the touched files (not the full suite), and re-run any linter/type check the
138
+ repo uses. If a fix breaks a test, revert that one fix and report it.
139
+ 6. **Summarize** what you changed: a short list of applied fixes grouped by
140
+ reviewer category, plus any findings you deliberately skipped and why.
141
+
142
+ ## Pitfalls
143
+
144
+ - **Don't fan out wider than ~3.** More reviewers means more cost and more
145
+ conflicting suggestions to reconcile, not better coverage. Three categories
146
+ cover the space.
147
+ - **Give the WHOLE diff to each reviewer.** Splitting the diff across reviewers
148
+ defeats the design — cross-file duplication and N+1s only show up with the
149
+ full picture.
150
+ - **Reviewers search, they don't guess.** A reuse finding with no pointer to
151
+ the existing utility ("there's probably a helper for this") is noise. Require
152
+ `file:line` evidence; drop findings that lack it.
153
+ - **Apply ≠ rewrite.** This is cleanup of the user's recent changes, not a
154
+ license to refactor the whole module. Keep edits scoped to what the diff
155
+ touched plus the minimal surrounding change a fix requires.
156
+ - **Respect project conventions.** If the repo has AGENTS.md / CLAUDE.md /
157
+ HERMES.md or a linter config, fold those rules into the reviewer prompts so
158
+ suggestions match house style instead of fighting it.
159
+ - **Large diffs blow context.** If the diff is huge, scope it down before
160
+ delegating — three subagents each carrying a 5000-line diff is expensive and
161
+ may truncate.
162
+
163
+ ## Related
164
+
165
+ If your install has the `subagent-driven-development` skill (optional), it
166
+ covers the complementary case: parallel review *during* implementation, per
167
+ task. This skill is the standalone *after-the-fact* cleanup pass. Use
168
+ `requesting-code-review` for the pre-commit security/quality gate.