aidevops 2.52.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/.agent/AGENTS.md +614 -0
  2. package/.agent/accounts.md +65 -0
  3. package/.agent/aidevops/add-new-mcp-to-aidevops.md +456 -0
  4. package/.agent/aidevops/api-integrations.md +335 -0
  5. package/.agent/aidevops/architecture.md +510 -0
  6. package/.agent/aidevops/configs.md +274 -0
  7. package/.agent/aidevops/docs.md +244 -0
  8. package/.agent/aidevops/extension.md +311 -0
  9. package/.agent/aidevops/mcp-integrations.md +340 -0
  10. package/.agent/aidevops/mcp-troubleshooting.md +162 -0
  11. package/.agent/aidevops/memory-patterns.md +172 -0
  12. package/.agent/aidevops/providers.md +217 -0
  13. package/.agent/aidevops/recommendations.md +321 -0
  14. package/.agent/aidevops/requirements.md +301 -0
  15. package/.agent/aidevops/resources.md +214 -0
  16. package/.agent/aidevops/security-requirements.md +174 -0
  17. package/.agent/aidevops/security.md +350 -0
  18. package/.agent/aidevops/service-links.md +400 -0
  19. package/.agent/aidevops/services.md +357 -0
  20. package/.agent/aidevops/setup.md +153 -0
  21. package/.agent/aidevops/troubleshooting.md +389 -0
  22. package/.agent/aidevops.md +124 -0
  23. package/.agent/build-plus.md +244 -0
  24. package/.agent/content/guidelines.md +109 -0
  25. package/.agent/content.md +87 -0
  26. package/.agent/health.md +59 -0
  27. package/.agent/legal.md +59 -0
  28. package/.agent/loop-state/full-loop.local.md +16 -0
  29. package/.agent/loop-state/ralph-loop.local.md +10 -0
  30. package/.agent/marketing.md +440 -0
  31. package/.agent/memory/README.md +260 -0
  32. package/.agent/onboarding.md +796 -0
  33. package/.agent/plan-plus.md +245 -0
  34. package/.agent/research.md +100 -0
  35. package/.agent/sales.md +333 -0
  36. package/.agent/scripts/101domains-helper.sh +701 -0
  37. package/.agent/scripts/add-missing-returns.sh +140 -0
  38. package/.agent/scripts/agent-browser-helper.sh +311 -0
  39. package/.agent/scripts/agno-setup.sh +712 -0
  40. package/.agent/scripts/ahrefs-mcp-wrapper.js +168 -0
  41. package/.agent/scripts/aidevops-update-check.sh +71 -0
  42. package/.agent/scripts/ampcode-cli.sh +522 -0
  43. package/.agent/scripts/auto-version-bump.sh +156 -0
  44. package/.agent/scripts/autogen-helper.sh +512 -0
  45. package/.agent/scripts/beads-sync-helper.sh +596 -0
  46. package/.agent/scripts/closte-helper.sh +5 -0
  47. package/.agent/scripts/cloudron-helper.sh +321 -0
  48. package/.agent/scripts/codacy-cli-chunked.sh +581 -0
  49. package/.agent/scripts/codacy-cli.sh +442 -0
  50. package/.agent/scripts/code-audit-helper.sh +5 -0
  51. package/.agent/scripts/coderabbit-cli.sh +417 -0
  52. package/.agent/scripts/coderabbit-pro-analysis.sh +238 -0
  53. package/.agent/scripts/commands/code-simplifier.md +86 -0
  54. package/.agent/scripts/commands/full-loop.md +246 -0
  55. package/.agent/scripts/commands/postflight-loop.md +103 -0
  56. package/.agent/scripts/commands/recall.md +182 -0
  57. package/.agent/scripts/commands/remember.md +132 -0
  58. package/.agent/scripts/commands/save-todo.md +175 -0
  59. package/.agent/scripts/commands/session-review.md +154 -0
  60. package/.agent/scripts/comprehensive-quality-fix.sh +106 -0
  61. package/.agent/scripts/context-builder-helper.sh +522 -0
  62. package/.agent/scripts/coolify-cli-helper.sh +674 -0
  63. package/.agent/scripts/coolify-helper.sh +380 -0
  64. package/.agent/scripts/crawl4ai-examples.sh +401 -0
  65. package/.agent/scripts/crawl4ai-helper.sh +1078 -0
  66. package/.agent/scripts/crewai-helper.sh +681 -0
  67. package/.agent/scripts/dev-browser-helper.sh +513 -0
  68. package/.agent/scripts/dns-helper.sh +396 -0
  69. package/.agent/scripts/domain-research-helper.sh +917 -0
  70. package/.agent/scripts/dspy-helper.sh +285 -0
  71. package/.agent/scripts/dspyground-helper.sh +291 -0
  72. package/.agent/scripts/eeat-score-helper.sh +1242 -0
  73. package/.agent/scripts/efficient-return-fix.sh +92 -0
  74. package/.agent/scripts/extract-opencode-prompts.sh +128 -0
  75. package/.agent/scripts/find-missing-returns.sh +113 -0
  76. package/.agent/scripts/fix-auth-headers.sh +104 -0
  77. package/.agent/scripts/fix-common-strings.sh +254 -0
  78. package/.agent/scripts/fix-content-type.sh +100 -0
  79. package/.agent/scripts/fix-error-messages.sh +130 -0
  80. package/.agent/scripts/fix-misplaced-returns.sh +74 -0
  81. package/.agent/scripts/fix-remaining-literals.sh +152 -0
  82. package/.agent/scripts/fix-return-statements.sh +41 -0
  83. package/.agent/scripts/fix-s131-default-cases.sh +249 -0
  84. package/.agent/scripts/fix-sc2155-simple.sh +102 -0
  85. package/.agent/scripts/fix-shellcheck-critical.sh +187 -0
  86. package/.agent/scripts/fix-string-literals.sh +273 -0
  87. package/.agent/scripts/full-loop-helper.sh +773 -0
  88. package/.agent/scripts/generate-opencode-agents.sh +497 -0
  89. package/.agent/scripts/generate-opencode-commands.sh +1629 -0
  90. package/.agent/scripts/generate-skills.sh +366 -0
  91. package/.agent/scripts/git-platforms-helper.sh +640 -0
  92. package/.agent/scripts/gitea-cli-helper.sh +743 -0
  93. package/.agent/scripts/github-cli-helper.sh +702 -0
  94. package/.agent/scripts/gitlab-cli-helper.sh +682 -0
  95. package/.agent/scripts/gsc-add-user-helper.sh +325 -0
  96. package/.agent/scripts/gsc-sitemap-helper.sh +678 -0
  97. package/.agent/scripts/hetzner-helper.sh +485 -0
  98. package/.agent/scripts/hostinger-helper.sh +229 -0
  99. package/.agent/scripts/keyword-research-helper.sh +1815 -0
  100. package/.agent/scripts/langflow-helper.sh +544 -0
  101. package/.agent/scripts/linkedin-automation.py +241 -0
  102. package/.agent/scripts/linter-manager.sh +599 -0
  103. package/.agent/scripts/linters-local.sh +434 -0
  104. package/.agent/scripts/list-keys-helper.sh +488 -0
  105. package/.agent/scripts/local-browser-automation.py +339 -0
  106. package/.agent/scripts/localhost-helper.sh +744 -0
  107. package/.agent/scripts/loop-common.sh +806 -0
  108. package/.agent/scripts/mainwp-helper.sh +728 -0
  109. package/.agent/scripts/markdown-formatter.sh +338 -0
  110. package/.agent/scripts/markdown-lint-fix.sh +311 -0
  111. package/.agent/scripts/mass-fix-returns.sh +58 -0
  112. package/.agent/scripts/mcp-diagnose.sh +167 -0
  113. package/.agent/scripts/mcp-inspector-helper.sh +449 -0
  114. package/.agent/scripts/memory-helper.sh +650 -0
  115. package/.agent/scripts/monitor-code-review.sh +255 -0
  116. package/.agent/scripts/onboarding-helper.sh +706 -0
  117. package/.agent/scripts/opencode-github-setup-helper.sh +797 -0
  118. package/.agent/scripts/opencode-test-helper.sh +213 -0
  119. package/.agent/scripts/pagespeed-helper.sh +464 -0
  120. package/.agent/scripts/pandoc-helper.sh +362 -0
  121. package/.agent/scripts/postflight-check.sh +555 -0
  122. package/.agent/scripts/pre-commit-hook.sh +259 -0
  123. package/.agent/scripts/pre-edit-check.sh +169 -0
  124. package/.agent/scripts/qlty-cli.sh +356 -0
  125. package/.agent/scripts/quality-cli-manager.sh +525 -0
  126. package/.agent/scripts/quality-feedback-helper.sh +462 -0
  127. package/.agent/scripts/quality-fix.sh +263 -0
  128. package/.agent/scripts/quality-loop-helper.sh +1108 -0
  129. package/.agent/scripts/ralph-loop-helper.sh +836 -0
  130. package/.agent/scripts/ralph-upstream-check.sh +341 -0
  131. package/.agent/scripts/secretlint-helper.sh +847 -0
  132. package/.agent/scripts/servers-helper.sh +241 -0
  133. package/.agent/scripts/ses-helper.sh +619 -0
  134. package/.agent/scripts/session-review-helper.sh +404 -0
  135. package/.agent/scripts/setup-linters-wizard.sh +379 -0
  136. package/.agent/scripts/setup-local-api-keys.sh +330 -0
  137. package/.agent/scripts/setup-mcp-integrations.sh +472 -0
  138. package/.agent/scripts/shared-constants.sh +246 -0
  139. package/.agent/scripts/site-crawler-helper.sh +1487 -0
  140. package/.agent/scripts/snyk-helper.sh +940 -0
  141. package/.agent/scripts/sonarcloud-autofix.sh +193 -0
  142. package/.agent/scripts/sonarcloud-cli.sh +191 -0
  143. package/.agent/scripts/sonarscanner-cli.sh +455 -0
  144. package/.agent/scripts/spaceship-helper.sh +747 -0
  145. package/.agent/scripts/stagehand-helper.sh +321 -0
  146. package/.agent/scripts/stagehand-python-helper.sh +321 -0
  147. package/.agent/scripts/stagehand-python-setup.sh +441 -0
  148. package/.agent/scripts/stagehand-setup.sh +439 -0
  149. package/.agent/scripts/system-cleanup.sh +340 -0
  150. package/.agent/scripts/terminal-title-helper.sh +388 -0
  151. package/.agent/scripts/terminal-title-setup.sh +549 -0
  152. package/.agent/scripts/test-stagehand-both-integration.sh +317 -0
  153. package/.agent/scripts/test-stagehand-integration.sh +309 -0
  154. package/.agent/scripts/test-stagehand-python-integration.sh +341 -0
  155. package/.agent/scripts/todo-ready.sh +263 -0
  156. package/.agent/scripts/tool-version-check.sh +362 -0
  157. package/.agent/scripts/toon-helper.sh +469 -0
  158. package/.agent/scripts/twilio-helper.sh +917 -0
  159. package/.agent/scripts/updown-helper.sh +279 -0
  160. package/.agent/scripts/validate-mcp-integrations.sh +250 -0
  161. package/.agent/scripts/validate-version-consistency.sh +131 -0
  162. package/.agent/scripts/vaultwarden-helper.sh +597 -0
  163. package/.agent/scripts/vercel-cli-helper.sh +816 -0
  164. package/.agent/scripts/verify-mirrors.sh +169 -0
  165. package/.agent/scripts/version-manager.sh +831 -0
  166. package/.agent/scripts/webhosting-helper.sh +471 -0
  167. package/.agent/scripts/webhosting-verify.sh +238 -0
  168. package/.agent/scripts/wordpress-mcp-helper.sh +508 -0
  169. package/.agent/scripts/worktree-helper.sh +595 -0
  170. package/.agent/scripts/worktree-sessions.sh +577 -0
  171. package/.agent/seo/dataforseo.md +215 -0
  172. package/.agent/seo/domain-research.md +532 -0
  173. package/.agent/seo/eeat-score.md +659 -0
  174. package/.agent/seo/google-search-console.md +366 -0
  175. package/.agent/seo/gsc-sitemaps.md +282 -0
  176. package/.agent/seo/keyword-research.md +521 -0
  177. package/.agent/seo/serper.md +278 -0
  178. package/.agent/seo/site-crawler.md +387 -0
  179. package/.agent/seo.md +236 -0
  180. package/.agent/services/accounting/quickfile.md +159 -0
  181. package/.agent/services/communications/telfon.md +470 -0
  182. package/.agent/services/communications/twilio.md +569 -0
  183. package/.agent/services/crm/fluentcrm.md +449 -0
  184. package/.agent/services/email/ses.md +399 -0
  185. package/.agent/services/hosting/101domains.md +378 -0
  186. package/.agent/services/hosting/closte.md +177 -0
  187. package/.agent/services/hosting/cloudflare.md +251 -0
  188. package/.agent/services/hosting/cloudron.md +478 -0
  189. package/.agent/services/hosting/dns-providers.md +335 -0
  190. package/.agent/services/hosting/domain-purchasing.md +344 -0
  191. package/.agent/services/hosting/hetzner.md +327 -0
  192. package/.agent/services/hosting/hostinger.md +287 -0
  193. package/.agent/services/hosting/localhost.md +419 -0
  194. package/.agent/services/hosting/spaceship.md +353 -0
  195. package/.agent/services/hosting/webhosting.md +330 -0
  196. package/.agent/social-media.md +69 -0
  197. package/.agent/templates/plans-template.md +114 -0
  198. package/.agent/templates/prd-template.md +129 -0
  199. package/.agent/templates/tasks-template.md +108 -0
  200. package/.agent/templates/todo-template.md +89 -0
  201. package/.agent/tools/ai-assistants/agno.md +471 -0
  202. package/.agent/tools/ai-assistants/capsolver.md +326 -0
  203. package/.agent/tools/ai-assistants/configuration.md +221 -0
  204. package/.agent/tools/ai-assistants/overview.md +209 -0
  205. package/.agent/tools/ai-assistants/status.md +171 -0
  206. package/.agent/tools/ai-assistants/windsurf.md +193 -0
  207. package/.agent/tools/ai-orchestration/autogen.md +406 -0
  208. package/.agent/tools/ai-orchestration/crewai.md +445 -0
  209. package/.agent/tools/ai-orchestration/langflow.md +405 -0
  210. package/.agent/tools/ai-orchestration/openprose.md +487 -0
  211. package/.agent/tools/ai-orchestration/overview.md +362 -0
  212. package/.agent/tools/ai-orchestration/packaging.md +647 -0
  213. package/.agent/tools/browser/agent-browser.md +464 -0
  214. package/.agent/tools/browser/browser-automation.md +400 -0
  215. package/.agent/tools/browser/chrome-devtools.md +282 -0
  216. package/.agent/tools/browser/crawl4ai-integration.md +422 -0
  217. package/.agent/tools/browser/crawl4ai-resources.md +277 -0
  218. package/.agent/tools/browser/crawl4ai-usage.md +416 -0
  219. package/.agent/tools/browser/crawl4ai.md +585 -0
  220. package/.agent/tools/browser/dev-browser.md +341 -0
  221. package/.agent/tools/browser/pagespeed.md +260 -0
  222. package/.agent/tools/browser/playwright.md +266 -0
  223. package/.agent/tools/browser/playwriter.md +310 -0
  224. package/.agent/tools/browser/stagehand-examples.md +456 -0
  225. package/.agent/tools/browser/stagehand-python.md +483 -0
  226. package/.agent/tools/browser/stagehand.md +421 -0
  227. package/.agent/tools/build-agent/agent-review.md +224 -0
  228. package/.agent/tools/build-agent/build-agent.md +784 -0
  229. package/.agent/tools/build-mcp/aidevops-plugin.md +476 -0
  230. package/.agent/tools/build-mcp/api-wrapper.md +445 -0
  231. package/.agent/tools/build-mcp/build-mcp.md +240 -0
  232. package/.agent/tools/build-mcp/deployment.md +401 -0
  233. package/.agent/tools/build-mcp/server-patterns.md +632 -0
  234. package/.agent/tools/build-mcp/transports.md +366 -0
  235. package/.agent/tools/code-review/auditing.md +383 -0
  236. package/.agent/tools/code-review/automation.md +219 -0
  237. package/.agent/tools/code-review/best-practices.md +203 -0
  238. package/.agent/tools/code-review/codacy.md +151 -0
  239. package/.agent/tools/code-review/code-simplifier.md +174 -0
  240. package/.agent/tools/code-review/code-standards.md +309 -0
  241. package/.agent/tools/code-review/coderabbit.md +101 -0
  242. package/.agent/tools/code-review/management.md +155 -0
  243. package/.agent/tools/code-review/qlty.md +248 -0
  244. package/.agent/tools/code-review/secretlint.md +565 -0
  245. package/.agent/tools/code-review/setup.md +250 -0
  246. package/.agent/tools/code-review/snyk.md +563 -0
  247. package/.agent/tools/code-review/tools.md +230 -0
  248. package/.agent/tools/content/summarize.md +353 -0
  249. package/.agent/tools/context/augment-context-engine.md +468 -0
  250. package/.agent/tools/context/context-builder-agent.md +76 -0
  251. package/.agent/tools/context/context-builder.md +375 -0
  252. package/.agent/tools/context/context7.md +371 -0
  253. package/.agent/tools/context/dspy.md +302 -0
  254. package/.agent/tools/context/dspyground.md +374 -0
  255. package/.agent/tools/context/llm-tldr.md +219 -0
  256. package/.agent/tools/context/osgrep.md +488 -0
  257. package/.agent/tools/context/prompt-optimization.md +338 -0
  258. package/.agent/tools/context/toon.md +292 -0
  259. package/.agent/tools/conversion/pandoc.md +304 -0
  260. package/.agent/tools/credentials/api-key-management.md +154 -0
  261. package/.agent/tools/credentials/api-key-setup.md +224 -0
  262. package/.agent/tools/credentials/environment-variables.md +180 -0
  263. package/.agent/tools/credentials/vaultwarden.md +382 -0
  264. package/.agent/tools/data-extraction/outscraper.md +974 -0
  265. package/.agent/tools/deployment/coolify-cli.md +388 -0
  266. package/.agent/tools/deployment/coolify-setup.md +353 -0
  267. package/.agent/tools/deployment/coolify.md +345 -0
  268. package/.agent/tools/deployment/vercel.md +390 -0
  269. package/.agent/tools/git/authentication.md +132 -0
  270. package/.agent/tools/git/gitea-cli.md +193 -0
  271. package/.agent/tools/git/github-actions.md +207 -0
  272. package/.agent/tools/git/github-cli.md +223 -0
  273. package/.agent/tools/git/gitlab-cli.md +190 -0
  274. package/.agent/tools/git/opencode-github-security.md +350 -0
  275. package/.agent/tools/git/opencode-github.md +328 -0
  276. package/.agent/tools/git/opencode-gitlab.md +252 -0
  277. package/.agent/tools/git/security.md +196 -0
  278. package/.agent/tools/git.md +207 -0
  279. package/.agent/tools/opencode/oh-my-opencode.md +375 -0
  280. package/.agent/tools/opencode/opencode-anthropic-auth.md +446 -0
  281. package/.agent/tools/opencode/opencode.md +651 -0
  282. package/.agent/tools/social-media/bird.md +437 -0
  283. package/.agent/tools/task-management/beads.md +336 -0
  284. package/.agent/tools/terminal/terminal-title.md +251 -0
  285. package/.agent/tools/ui/shadcn.md +196 -0
  286. package/.agent/tools/ui/ui-skills.md +115 -0
  287. package/.agent/tools/wordpress/localwp.md +311 -0
  288. package/.agent/tools/wordpress/mainwp.md +391 -0
  289. package/.agent/tools/wordpress/scf.md +527 -0
  290. package/.agent/tools/wordpress/wp-admin.md +729 -0
  291. package/.agent/tools/wordpress/wp-dev.md +940 -0
  292. package/.agent/tools/wordpress/wp-preferred.md +398 -0
  293. package/.agent/tools/wordpress.md +95 -0
  294. package/.agent/workflows/branch/bugfix.md +63 -0
  295. package/.agent/workflows/branch/chore.md +95 -0
  296. package/.agent/workflows/branch/experiment.md +115 -0
  297. package/.agent/workflows/branch/feature.md +59 -0
  298. package/.agent/workflows/branch/hotfix.md +98 -0
  299. package/.agent/workflows/branch/refactor.md +92 -0
  300. package/.agent/workflows/branch/release.md +96 -0
  301. package/.agent/workflows/branch.md +347 -0
  302. package/.agent/workflows/bug-fixing.md +267 -0
  303. package/.agent/workflows/changelog.md +129 -0
  304. package/.agent/workflows/code-audit-remote.md +279 -0
  305. package/.agent/workflows/conversation-starter.md +69 -0
  306. package/.agent/workflows/error-feedback.md +578 -0
  307. package/.agent/workflows/feature-development.md +355 -0
  308. package/.agent/workflows/git-workflow.md +702 -0
  309. package/.agent/workflows/multi-repo-workspace.md +268 -0
  310. package/.agent/workflows/plans.md +709 -0
  311. package/.agent/workflows/postflight.md +604 -0
  312. package/.agent/workflows/pr.md +571 -0
  313. package/.agent/workflows/preflight.md +278 -0
  314. package/.agent/workflows/ralph-loop.md +773 -0
  315. package/.agent/workflows/release.md +498 -0
  316. package/.agent/workflows/session-manager.md +254 -0
  317. package/.agent/workflows/session-review.md +311 -0
  318. package/.agent/workflows/sql-migrations.md +631 -0
  319. package/.agent/workflows/version-bump.md +283 -0
  320. package/.agent/workflows/wiki-update.md +333 -0
  321. package/.agent/workflows/worktree.md +477 -0
  322. package/LICENSE +21 -0
  323. package/README.md +1446 -0
  324. package/VERSION +1 -0
  325. package/aidevops.sh +1746 -0
  326. package/bin/aidevops +21 -0
  327. package/package.json +75 -0
  328. package/scripts/npm-postinstall.js +60 -0
  329. package/setup.sh +2366 -0
@@ -0,0 +1,585 @@
1
+ ---
2
+ description: AI-powered web crawling and content extraction
3
+ mode: subagent
4
+ tools:
5
+ read: true
6
+ write: false
7
+ edit: false
8
+ bash: true
9
+ glob: true
10
+ grep: true
11
+ webfetch: true
12
+ task: true
13
+ ---
14
+
15
+ # Crawl4AI Integration Guide
16
+
17
+ <!-- AI-CONTEXT-START -->
18
+
19
+ ## Quick Reference
20
+
21
+ - **Purpose**: #1 AI/LLM web crawler - markdown output for RAG pipelines
22
+ - **Install**: `./.agent/scripts/crawl4ai-helper.sh install`
23
+ - **Docker**: `./.agent/scripts/crawl4ai-helper.sh docker-start`
24
+ - **MCP Setup**: `./.agent/scripts/crawl4ai-helper.sh mcp-setup`
25
+
26
+ **Endpoints** (Docker):
27
+ - API: http://localhost:11235
28
+ - Dashboard: http://localhost:11235/dashboard
29
+ - Playground: http://localhost:11235/playground
30
+
31
+ **Commands**: `install|docker-setup|docker-start|mcp-setup|capsolver-setup|status|crawl|extract|captcha-crawl`
32
+
33
+ **Key Features**:
34
+ - LLM-ready markdown output
35
+ - CSS/XPath/LLM extraction strategies
36
+ - CAPTCHA solving via CapSolver
37
+ - Parallel async crawling
38
+ - Session management & browser pool
39
+
40
+ **Env Vars**: `OPENAI_API_KEY`, `CAPSOLVER_API_KEY`, `CRAWL4AI_MAX_PAGES=50`
41
+ <!-- AI-CONTEXT-END -->
42
+
43
+ ## 🚀 Overview
44
+
45
+ Crawl4AI is the #1 trending open-source web crawler on GitHub, specifically designed for AI and LLM applications. This integration provides comprehensive web crawling and data extraction capabilities for the AI DevOps Framework.
46
+
47
+ ### Key Features
48
+
49
+ - **🤖 LLM-Ready Output**: Clean markdown generation perfect for RAG pipelines
50
+ - **📊 Structured Extraction**: CSS selectors, XPath, and LLM-based data extraction
51
+ - **🎛️ Advanced Browser Control**: Hooks, proxies, stealth modes, session management
52
+ - **⚡ High Performance**: Parallel crawling, async operations, real-time processing
53
+ - **🔌 AI Integration**: Native MCP support for AI assistants like Claude
54
+ - **📈 Enterprise Features**: Monitoring dashboard, job queues, webhook notifications
55
+ - **🤖 CAPTCHA Solving**: Integrated CapSolver support for automated CAPTCHA bypass
56
+ - **🛡️ Anti-Bot Measures**: Handle Cloudflare, AWS WAF, and other protection systems
57
+
58
+ ## 🛠️ Quick Start
59
+
60
+ ### Installation
61
+
62
+ ```bash
63
+ # Install Crawl4AI Python package
64
+ ./.agent/scripts/crawl4ai-helper.sh install
65
+
66
+ # Setup Docker deployment with monitoring
67
+ ./.agent/scripts/crawl4ai-helper.sh docker-setup
68
+
69
+ # Start Docker container
70
+ ./.agent/scripts/crawl4ai-helper.sh docker-start
71
+
72
+ # Setup MCP integration for AI assistants
73
+ ./.agent/scripts/crawl4ai-helper.sh mcp-setup
74
+
75
+ # Setup CapSolver for CAPTCHA solving
76
+ ./.agent/scripts/crawl4ai-helper.sh capsolver-setup
77
+
78
+ # Check status
79
+ ./.agent/scripts/crawl4ai-helper.sh status
80
+ ```
81
+
82
+ ### Basic Usage
83
+
84
+ ```bash
85
+ # Crawl a single URL
86
+ ./.agent/scripts/crawl4ai-helper.sh crawl https://example.com markdown output.json
87
+
88
+ # Extract structured data
89
+ ./.agent/scripts/crawl4ai-helper.sh extract https://example.com '{"title":"h1","content":".article"}' data.json
90
+
91
+ # Crawl with CAPTCHA solving (requires CapSolver API key)
92
+ export CAPSOLVER_API_KEY="CAP-xxxxxxxxxxxxxxxxxxxxx"
93
+ ./.agent/scripts/crawl4ai-helper.sh captcha-crawl https://example.com recaptcha_v2 6LfW6wATAAAAAHLqO2pb8bDBahxlMxNdo9g947u9
94
+ ```
95
+
96
+ ## 🐳 Docker Deployment
97
+
98
+ The Docker deployment includes a comprehensive suite of features:
99
+
100
+ ### Services Available
101
+
102
+ - **API Server**: http://localhost:11235
103
+ - **Monitoring Dashboard**: http://localhost:11235/dashboard
104
+ - **Interactive Playground**: http://localhost:11235/playground
105
+ - **Health Check**: http://localhost:11235/health
106
+ - **Metrics**: http://localhost:11235/metrics
107
+
108
+ ### Key Features
109
+
110
+ - **Real-time Monitoring**: System health, memory usage, request tracking
111
+ - **Browser Pool Management**: Efficient browser instance management
112
+ - **Job Queue System**: Asynchronous processing with webhook notifications
113
+ - **WebSocket Streaming**: Real-time crawl results
114
+ - **Multi-architecture Support**: AMD64 and ARM64 compatibility
115
+
116
+ ## 🔌 MCP Integration
117
+
118
+ Crawl4AI provides native Model Context Protocol (MCP) support for AI assistants:
119
+
120
+ ### Claude Desktop Setup
121
+
122
+ Add to your Claude Desktop configuration:
123
+
124
+ ```json
125
+ {
126
+ "mcpServers": {
127
+ "crawl4ai": {
128
+ "command": "npx",
129
+ "args": ["crawl4ai-mcp-server@latest"],
130
+ "env": {
131
+ "CRAWL4AI_API_URL": "http://localhost:11235"
132
+ }
133
+ }
134
+ }
135
+ }
136
+ ```
137
+
138
+ ### Available MCP Tools
139
+
140
+ - **crawl_url**: Crawl single URL with format options
141
+ - **crawl_multiple**: Batch crawl multiple URLs
142
+ - **extract_structured**: Extract data using CSS selectors or LLM
143
+ - **take_screenshot**: Capture webpage screenshots
144
+ - **generate_pdf**: Convert webpages to PDF
145
+ - **execute_javascript**: Run custom JavaScript on pages
146
+ - **solve_captcha**: Solve CAPTCHA challenges using CapSolver
147
+ - **crawl_with_captcha**: Crawl URLs with automatic CAPTCHA solving
148
+ - **check_captcha_balance**: Monitor CapSolver account balance
149
+
150
+ ## 🤖 CapSolver Integration for CAPTCHA Solving
151
+
152
+ Crawl4AI integrates with CapSolver, the world's leading automated CAPTCHA solving service, to handle anti-bot measures seamlessly.
153
+
154
+ ### Supported CAPTCHA Types
155
+
156
+ - **reCAPTCHA v2/v3**: Including Enterprise versions with high success rates
157
+ - **Cloudflare Turnstile**: Modern CAPTCHA alternative bypass
158
+ - **Cloudflare Challenge**: 5-second shield and anti-bot protection
159
+ - **AWS WAF**: Web Application Firewall bypass
160
+ - **GeeTest v3/v4**: Popular CAPTCHA system in Asia
161
+ - **Image-to-Text**: Traditional OCR-based CAPTCHAs
162
+
163
+ ### Quick Setup
164
+
165
+ ```bash
166
+ # Setup CapSolver integration
167
+ ./.agent/scripts/crawl4ai-helper.sh capsolver-setup
168
+
169
+ # Get API key from https://dashboard.capsolver.com/
170
+ export CAPSOLVER_API_KEY="CAP-xxxxxxxxxxxxxxxxxxxxx"
171
+
172
+ # Crawl with CAPTCHA solving
173
+ ./.agent/scripts/crawl4ai-helper.sh captcha-crawl https://example.com recaptcha_v2 site_key_here
174
+ ```
175
+
176
+ ### Pricing & Performance
177
+
178
+ - **Cost**: Starting from $0.4/1000 requests
179
+ - **Speed**: Most CAPTCHAs solved in < 10 seconds
180
+ - **Success Rate**: 99.9% accuracy
181
+ - **Package Discounts**: Up to 60% savings available
182
+
183
+ ### Integration Methods
184
+
185
+ 1. **API Integration** (Recommended): Direct Python SDK integration
186
+ 2. **Browser Extension**: Automatic detection and solving
187
+
188
+ ## 📊 Core Capabilities
189
+
190
+ ### 1. Web Crawling
191
+
192
+ ```python
193
+ import asyncio
194
+ from crawl4ai import AsyncWebCrawler
195
+
196
+ async def basic_crawl():
197
+ async with AsyncWebCrawler() as crawler:
198
+ result = await crawler.arun(url="https://example.com")
199
+ return result.markdown
200
+ ```
201
+
202
+ ### 2. Structured Data Extraction
203
+
204
+ ```python
205
+ from crawl4ai import JsonCssExtractionStrategy
206
+
207
+ schema = {
208
+ "name": "Product Schema",
209
+ "baseSelector": ".product",
210
+ "fields": [
211
+ {"name": "title", "selector": "h2", "type": "text"},
212
+ {"name": "price", "selector": ".price", "type": "text"},
213
+ {"name": "image", "selector": "img", "type": "attribute", "attribute": "src"}
214
+ ]
215
+ }
216
+
217
+ extraction_strategy = JsonCssExtractionStrategy(schema)
218
+ result = await crawler.arun(url="https://shop.com", extraction_strategy=extraction_strategy)
219
+ ```
220
+
221
+ ### 3. LLM-Powered Extraction
222
+
223
+ ```python
224
+ from crawl4ai import LLMExtractionStrategy, LLMConfig
225
+
226
+ llm_strategy = LLMExtractionStrategy(
227
+ llm_config=LLMConfig(provider="openai/gpt-4o"),
228
+ instruction="Extract key information and create a summary"
229
+ )
230
+
231
+ result = await crawler.arun(url="https://article.com", extraction_strategy=llm_strategy)
232
+ ```
233
+
234
+ ### 4. Advanced Browser Control
235
+
236
+ ```python
237
+ # Custom hooks for advanced control
238
+ async def setup_hook(page, context, **kwargs):
239
+ # Block images for faster crawling
240
+ await context.route("**/*.{png,jpg,gif}", lambda r: r.abort())
241
+ # Set custom viewport
242
+ await page.set_viewport_size({"width": 1920, "height": 1080})
243
+ return page
244
+
245
+ result = await crawler.arun(
246
+ url="https://example.com",
247
+ hooks={"on_page_context_created": setup_hook}
248
+ )
249
+ ```
250
+
251
+ ## 🔄 Job Queue & Webhooks
252
+
253
+ ### Asynchronous Processing
254
+
255
+ ```python
256
+ import requests
257
+
258
+ # Submit crawl job
259
+ response = requests.post("http://localhost:11235/crawl/job", json={
260
+ "urls": ["https://example.com"],
261
+ "webhook_config": {
262
+ "webhook_url": "https://your-app.com/webhook",
263
+ "webhook_data_in_payload": True,
264
+ "webhook_headers": {
265
+ "X-Webhook-Secret": "your-secret-token"
266
+ }
267
+ }
268
+ })
269
+
270
+ task_id = response.json()["task_id"]
271
+ ```
272
+
273
+ ### Webhook Handler
274
+
275
+ ```python
276
+ from flask import Flask, request, jsonify
277
+
278
+ app = Flask(__name__)
279
+
280
+ @app.route('/webhook', methods=['POST'])
281
+ def handle_webhook():
282
+ payload = request.json
283
+
284
+ if payload['status'] == 'completed':
285
+ # Process successful crawl
286
+ data = payload['data']
287
+ markdown = data.get('markdown', '')
288
+ extracted = data.get('extracted_content', {})
289
+
290
+ # Your processing logic here
291
+ print(f"Crawl completed: {len(markdown)} characters extracted")
292
+
293
+ elif payload['status'] == 'failed':
294
+ # Handle failure
295
+ error = payload.get('error', 'Unknown error')
296
+ print(f"Crawl failed: {error}")
297
+
298
+ return jsonify({"status": "received"}), 200
299
+ ```
300
+
301
+ ## 🎯 Use Cases
302
+
303
+ ### 1. Content Research & Analysis
304
+
305
+ ```bash
306
+ # Research articles and papers
307
+ ./.agent/scripts/crawl4ai-helper.sh extract https://research-paper.com '{
308
+ "title": "h1",
309
+ "authors": ".authors",
310
+ "abstract": ".abstract",
311
+ "sections": {
312
+ "selector": ".section",
313
+ "fields": [
314
+ {"name": "heading", "selector": "h2", "type": "text"},
315
+ {"name": "content", "selector": "p", "type": "text"}
316
+ ]
317
+ }
318
+ }' research.json
319
+ ```
320
+
321
+ ### 2. E-commerce Data Collection
322
+
323
+ ```bash
324
+ # Product information extraction
325
+ ./.agent/scripts/crawl4ai-helper.sh extract https://ecommerce.com/product '{
326
+ "name": "h1.product-title",
327
+ "price": ".price-current",
328
+ "description": ".product-description",
329
+ "specifications": {
330
+ "selector": ".specs tr",
331
+ "fields": [
332
+ {"name": "feature", "selector": "td:first-child", "type": "text"},
333
+ {"name": "value", "selector": "td:last-child", "type": "text"}
334
+ ]
335
+ },
336
+ "images": {"selector": ".product-images img", "type": "attribute", "attribute": "src"}
337
+ }' product.json
338
+ ```
339
+
340
+ ### 3. News Aggregation
341
+
342
+ ```bash
343
+ # Multiple news sources
344
+ urls=("https://news1.com" "https://news2.com" "https://news3.com")
345
+
346
+ for url in "${urls[@]}"; do
347
+ ./.agent/scripts/crawl4ai-helper.sh extract "$url" '{
348
+ "headline": "h1",
349
+ "summary": ".article-summary",
350
+ "author": ".byline",
351
+ "date": ".publish-date",
352
+ "content": ".article-body"
353
+ }' "news-$(basename $url).json"
354
+ done
355
+ ```
356
+
357
+ ### 4. Documentation Processing
358
+
359
+ ```bash
360
+ # API documentation extraction
361
+ ./.agent/scripts/crawl4ai-helper.sh extract https://api-docs.com '{
362
+ "endpoints": {
363
+ "selector": ".endpoint",
364
+ "fields": [
365
+ {"name": "method", "selector": ".method", "type": "text"},
366
+ {"name": "path", "selector": ".path", "type": "text"},
367
+ {"name": "description", "selector": ".description", "type": "text"},
368
+ {"name": "parameters", "selector": ".params", "type": "html"},
369
+ {"name": "examples", "selector": ".examples", "type": "html"}
370
+ ]
371
+ }
372
+ }' api-docs.json
373
+ ```
374
+
375
+ ## 🔧 Configuration
376
+
377
+ ### Environment Variables
378
+
379
+ ```bash
380
+ # LLM Configuration
381
+ OPENAI_API_KEY=sk-your-key
382
+ ANTHROPIC_API_KEY=your-anthropic-key
383
+ LLM_PROVIDER=openai/gpt-4o-mini
384
+ LLM_TEMPERATURE=0.7
385
+
386
+ # Crawl4AI Settings
387
+ CRAWL4AI_MAX_PAGES=50
388
+ CRAWL4AI_TIMEOUT=60
389
+ CRAWL4AI_DEFAULT_FORMAT=markdown
390
+ CRAWL4AI_CONCURRENT_REQUESTS=5
391
+ ```
392
+
393
+ ### Docker Configuration
394
+
395
+ ```yaml
396
+ # docker-compose.yml
397
+ services:
398
+ crawl4ai:
399
+ image: unclecode/crawl4ai:latest
400
+ ports:
401
+ - "11235:11235"
402
+ environment:
403
+ - OPENAI_API_KEY=${OPENAI_API_KEY}
404
+ - LLM_PROVIDER=openai/gpt-4o-mini
405
+ volumes:
406
+ - /dev/shm:/dev/shm
407
+ shm_size: 1g
408
+ ```
409
+
410
+ ## 📊 Monitoring & Analytics
411
+
412
+ ### Dashboard Features
413
+
414
+ Access the monitoring dashboard at http://localhost:11235/dashboard:
415
+
416
+ - **System Metrics**: CPU, memory, network utilization
417
+ - **Request Analytics**: Success rates, response times, error tracking
418
+ - **Browser Pool**: Active/hot/cold browser instances
419
+ - **Job Queue**: Pending, processing, completed jobs
420
+ - **Real-time Logs**: Live system and application logs
421
+
422
+ ### API Metrics
423
+
424
+ ```bash
425
+ # Prometheus metrics
426
+ curl http://localhost:11235/metrics
427
+
428
+ # Health status
429
+ curl http://localhost:11235/health | jq '.'
430
+
431
+ # API schema
432
+ curl http://localhost:11235/schema | jq '.'
433
+ ```
434
+
435
+ ## 🔒 Security & Best Practices
436
+
437
+ ### Rate Limiting
438
+
439
+ ```yaml
440
+ rate_limiting:
441
+ enabled: true
442
+ default_limit: "1000/minute"
443
+ trusted_proxies: []
444
+ ```
445
+
446
+ ### Security Headers
447
+
448
+ ```yaml
449
+ security:
450
+ headers:
451
+ x_content_type_options: "nosniff"
452
+ x_frame_options: "DENY"
453
+ content_security_policy: "default-src 'self'"
454
+ strict_transport_security: "max-age=63072000"
455
+ ```
456
+
457
+ ### Safe Crawling
458
+
459
+ - **Respect robots.txt**: Enabled by default
460
+ - **Rate limiting**: Built-in delays between requests
461
+ - **User agent identification**: Clear identification as Crawl4AI
462
+ - **Timeout protection**: Prevents hanging requests
463
+ - **Resource blocking**: Block unnecessary resources for performance
464
+
465
+ ## 🛠️ Advanced Features
466
+
467
+ ### Adaptive Crawling
468
+
469
+ ```python
470
+ from crawl4ai import AdaptiveCrawler, AdaptiveConfig
471
+
472
+ config = AdaptiveConfig(
473
+ confidence_threshold=0.7,
474
+ max_depth=5,
475
+ max_pages=20,
476
+ strategy="statistical"
477
+ )
478
+
479
+ adaptive_crawler = AdaptiveCrawler(crawler, config)
480
+ state = await adaptive_crawler.digest(
481
+ start_url="https://news.example.com",
482
+ query="latest technology news"
483
+ )
484
+ ```
485
+
486
+ ### Virtual Scroll Support
487
+
488
+ ```python
489
+ from crawl4ai import VirtualScrollConfig
490
+
491
+ scroll_config = VirtualScrollConfig(
492
+ container_selector="[data-testid='feed']",
493
+ scroll_count=20,
494
+ scroll_by="container_height",
495
+ wait_after_scroll=1.0
496
+ )
497
+
498
+ result = await crawler.arun(
499
+ url="https://infinite-scroll-site.com",
500
+ virtual_scroll_config=scroll_config
501
+ )
502
+ ```
503
+
504
+ ### Session Management
505
+
506
+ ```python
507
+ # Persistent browser sessions
508
+ browser_config = BrowserConfig(
509
+ use_persistent_context=True,
510
+ user_data_dir="/path/to/profile",
511
+ headless=True
512
+ )
513
+
514
+ async with AsyncWebCrawler(config=browser_config) as crawler:
515
+ # Session persists across requests
516
+ result1 = await crawler.arun("https://site.com/login")
517
+ result2 = await crawler.arun("https://site.com/dashboard")
518
+ ```
519
+
520
+ ## 🔧 Troubleshooting
521
+
522
+ ### Common Issues
523
+
524
+ 1. **Container won't start**: Check Docker memory allocation
525
+
526
+ ```bash
527
+ docker run --shm-size=1g unclecode/crawl4ai:latest
528
+ ```
529
+
530
+ 2. **API not responding**: Verify container status and port mapping
531
+
532
+ ```bash
533
+ docker ps | grep crawl4ai
534
+ curl http://localhost:11235/health
535
+ ```
536
+
537
+ 3. **Extraction failing**: Validate CSS selectors or LLM configuration
538
+
539
+ ```bash
540
+ # Test in playground
541
+ open http://localhost:11235/playground
542
+ ```
543
+
544
+ ### Debug Commands
545
+
546
+ ```bash
547
+ # Check comprehensive status
548
+ ./.agent/scripts/crawl4ai-helper.sh status
549
+
550
+ # View container logs
551
+ docker logs crawl4ai --tail 50 --follow
552
+
553
+ # Test basic functionality
554
+ curl -X POST http://localhost:11235/crawl \
555
+ -H "Content-Type: application/json" \
556
+ -d '{"urls": ["https://httpbin.org/html"]}'
557
+ ```
558
+
559
+ ## 📚 Resources
560
+
561
+ ### Framework Integration
562
+
563
+ - **Helper Script**: `.agent/scripts/crawl4ai-helper.sh`
564
+ - **Configuration Template**: `configs/crawl4ai-config.json.txt`
565
+ - **MCP Configuration**: `configs/mcp-templates/crawl4ai-mcp-config.json`
566
+ - **Integration Guide**: `.agent/wiki/crawl4ai-integration.md`
567
+ - **Usage Guide**: `.agent/spec/crawl4ai-usage.md`
568
+
569
+ ### Official Resources
570
+
571
+ - **Documentation**: https://docs.crawl4ai.com/
572
+ - **GitHub Repository**: https://github.com/unclecode/crawl4ai
573
+ - **Docker Hub**: https://hub.docker.com/r/unclecode/crawl4ai
574
+ - **Discord Community**: https://discord.gg/jP8KfhDhyN
575
+
576
+ ## 🎯 Next Steps
577
+
578
+ 1. **Install and Setup**: Run `./.agent/scripts/crawl4ai-helper.sh install`
579
+ 2. **Start Docker Services**: Run `./.agent/scripts/crawl4ai-helper.sh docker-start`
580
+ 3. **Explore Dashboard**: Visit http://localhost:11235/dashboard
581
+ 4. **Try Playground**: Test crawling at http://localhost:11235/playground
582
+ 5. **Setup MCP**: Run `./.agent/scripts/crawl4ai-helper.sh mcp-setup`
583
+ 6. **Build Applications**: Use the API for your specific use cases
584
+
585
+ Crawl4AI transforms web data into AI-ready formats, making it perfect for RAG systems, data pipelines, and AI-powered applications within the AI DevOps Framework.