aidevops 2.52.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/.agent/AGENTS.md +614 -0
  2. package/.agent/accounts.md +65 -0
  3. package/.agent/aidevops/add-new-mcp-to-aidevops.md +456 -0
  4. package/.agent/aidevops/api-integrations.md +335 -0
  5. package/.agent/aidevops/architecture.md +510 -0
  6. package/.agent/aidevops/configs.md +274 -0
  7. package/.agent/aidevops/docs.md +244 -0
  8. package/.agent/aidevops/extension.md +311 -0
  9. package/.agent/aidevops/mcp-integrations.md +340 -0
  10. package/.agent/aidevops/mcp-troubleshooting.md +162 -0
  11. package/.agent/aidevops/memory-patterns.md +172 -0
  12. package/.agent/aidevops/providers.md +217 -0
  13. package/.agent/aidevops/recommendations.md +321 -0
  14. package/.agent/aidevops/requirements.md +301 -0
  15. package/.agent/aidevops/resources.md +214 -0
  16. package/.agent/aidevops/security-requirements.md +174 -0
  17. package/.agent/aidevops/security.md +350 -0
  18. package/.agent/aidevops/service-links.md +400 -0
  19. package/.agent/aidevops/services.md +357 -0
  20. package/.agent/aidevops/setup.md +153 -0
  21. package/.agent/aidevops/troubleshooting.md +389 -0
  22. package/.agent/aidevops.md +124 -0
  23. package/.agent/build-plus.md +244 -0
  24. package/.agent/content/guidelines.md +109 -0
  25. package/.agent/content.md +87 -0
  26. package/.agent/health.md +59 -0
  27. package/.agent/legal.md +59 -0
  28. package/.agent/loop-state/full-loop.local.md +16 -0
  29. package/.agent/loop-state/ralph-loop.local.md +10 -0
  30. package/.agent/marketing.md +440 -0
  31. package/.agent/memory/README.md +260 -0
  32. package/.agent/onboarding.md +796 -0
  33. package/.agent/plan-plus.md +245 -0
  34. package/.agent/research.md +100 -0
  35. package/.agent/sales.md +333 -0
  36. package/.agent/scripts/101domains-helper.sh +701 -0
  37. package/.agent/scripts/add-missing-returns.sh +140 -0
  38. package/.agent/scripts/agent-browser-helper.sh +311 -0
  39. package/.agent/scripts/agno-setup.sh +712 -0
  40. package/.agent/scripts/ahrefs-mcp-wrapper.js +168 -0
  41. package/.agent/scripts/aidevops-update-check.sh +71 -0
  42. package/.agent/scripts/ampcode-cli.sh +522 -0
  43. package/.agent/scripts/auto-version-bump.sh +156 -0
  44. package/.agent/scripts/autogen-helper.sh +512 -0
  45. package/.agent/scripts/beads-sync-helper.sh +596 -0
  46. package/.agent/scripts/closte-helper.sh +5 -0
  47. package/.agent/scripts/cloudron-helper.sh +321 -0
  48. package/.agent/scripts/codacy-cli-chunked.sh +581 -0
  49. package/.agent/scripts/codacy-cli.sh +442 -0
  50. package/.agent/scripts/code-audit-helper.sh +5 -0
  51. package/.agent/scripts/coderabbit-cli.sh +417 -0
  52. package/.agent/scripts/coderabbit-pro-analysis.sh +238 -0
  53. package/.agent/scripts/commands/code-simplifier.md +86 -0
  54. package/.agent/scripts/commands/full-loop.md +246 -0
  55. package/.agent/scripts/commands/postflight-loop.md +103 -0
  56. package/.agent/scripts/commands/recall.md +182 -0
  57. package/.agent/scripts/commands/remember.md +132 -0
  58. package/.agent/scripts/commands/save-todo.md +175 -0
  59. package/.agent/scripts/commands/session-review.md +154 -0
  60. package/.agent/scripts/comprehensive-quality-fix.sh +106 -0
  61. package/.agent/scripts/context-builder-helper.sh +522 -0
  62. package/.agent/scripts/coolify-cli-helper.sh +674 -0
  63. package/.agent/scripts/coolify-helper.sh +380 -0
  64. package/.agent/scripts/crawl4ai-examples.sh +401 -0
  65. package/.agent/scripts/crawl4ai-helper.sh +1078 -0
  66. package/.agent/scripts/crewai-helper.sh +681 -0
  67. package/.agent/scripts/dev-browser-helper.sh +513 -0
  68. package/.agent/scripts/dns-helper.sh +396 -0
  69. package/.agent/scripts/domain-research-helper.sh +917 -0
  70. package/.agent/scripts/dspy-helper.sh +285 -0
  71. package/.agent/scripts/dspyground-helper.sh +291 -0
  72. package/.agent/scripts/eeat-score-helper.sh +1242 -0
  73. package/.agent/scripts/efficient-return-fix.sh +92 -0
  74. package/.agent/scripts/extract-opencode-prompts.sh +128 -0
  75. package/.agent/scripts/find-missing-returns.sh +113 -0
  76. package/.agent/scripts/fix-auth-headers.sh +104 -0
  77. package/.agent/scripts/fix-common-strings.sh +254 -0
  78. package/.agent/scripts/fix-content-type.sh +100 -0
  79. package/.agent/scripts/fix-error-messages.sh +130 -0
  80. package/.agent/scripts/fix-misplaced-returns.sh +74 -0
  81. package/.agent/scripts/fix-remaining-literals.sh +152 -0
  82. package/.agent/scripts/fix-return-statements.sh +41 -0
  83. package/.agent/scripts/fix-s131-default-cases.sh +249 -0
  84. package/.agent/scripts/fix-sc2155-simple.sh +102 -0
  85. package/.agent/scripts/fix-shellcheck-critical.sh +187 -0
  86. package/.agent/scripts/fix-string-literals.sh +273 -0
  87. package/.agent/scripts/full-loop-helper.sh +773 -0
  88. package/.agent/scripts/generate-opencode-agents.sh +497 -0
  89. package/.agent/scripts/generate-opencode-commands.sh +1629 -0
  90. package/.agent/scripts/generate-skills.sh +366 -0
  91. package/.agent/scripts/git-platforms-helper.sh +640 -0
  92. package/.agent/scripts/gitea-cli-helper.sh +743 -0
  93. package/.agent/scripts/github-cli-helper.sh +702 -0
  94. package/.agent/scripts/gitlab-cli-helper.sh +682 -0
  95. package/.agent/scripts/gsc-add-user-helper.sh +325 -0
  96. package/.agent/scripts/gsc-sitemap-helper.sh +678 -0
  97. package/.agent/scripts/hetzner-helper.sh +485 -0
  98. package/.agent/scripts/hostinger-helper.sh +229 -0
  99. package/.agent/scripts/keyword-research-helper.sh +1815 -0
  100. package/.agent/scripts/langflow-helper.sh +544 -0
  101. package/.agent/scripts/linkedin-automation.py +241 -0
  102. package/.agent/scripts/linter-manager.sh +599 -0
  103. package/.agent/scripts/linters-local.sh +434 -0
  104. package/.agent/scripts/list-keys-helper.sh +488 -0
  105. package/.agent/scripts/local-browser-automation.py +339 -0
  106. package/.agent/scripts/localhost-helper.sh +744 -0
  107. package/.agent/scripts/loop-common.sh +806 -0
  108. package/.agent/scripts/mainwp-helper.sh +728 -0
  109. package/.agent/scripts/markdown-formatter.sh +338 -0
  110. package/.agent/scripts/markdown-lint-fix.sh +311 -0
  111. package/.agent/scripts/mass-fix-returns.sh +58 -0
  112. package/.agent/scripts/mcp-diagnose.sh +167 -0
  113. package/.agent/scripts/mcp-inspector-helper.sh +449 -0
  114. package/.agent/scripts/memory-helper.sh +650 -0
  115. package/.agent/scripts/monitor-code-review.sh +255 -0
  116. package/.agent/scripts/onboarding-helper.sh +706 -0
  117. package/.agent/scripts/opencode-github-setup-helper.sh +797 -0
  118. package/.agent/scripts/opencode-test-helper.sh +213 -0
  119. package/.agent/scripts/pagespeed-helper.sh +464 -0
  120. package/.agent/scripts/pandoc-helper.sh +362 -0
  121. package/.agent/scripts/postflight-check.sh +555 -0
  122. package/.agent/scripts/pre-commit-hook.sh +259 -0
  123. package/.agent/scripts/pre-edit-check.sh +169 -0
  124. package/.agent/scripts/qlty-cli.sh +356 -0
  125. package/.agent/scripts/quality-cli-manager.sh +525 -0
  126. package/.agent/scripts/quality-feedback-helper.sh +462 -0
  127. package/.agent/scripts/quality-fix.sh +263 -0
  128. package/.agent/scripts/quality-loop-helper.sh +1108 -0
  129. package/.agent/scripts/ralph-loop-helper.sh +836 -0
  130. package/.agent/scripts/ralph-upstream-check.sh +341 -0
  131. package/.agent/scripts/secretlint-helper.sh +847 -0
  132. package/.agent/scripts/servers-helper.sh +241 -0
  133. package/.agent/scripts/ses-helper.sh +619 -0
  134. package/.agent/scripts/session-review-helper.sh +404 -0
  135. package/.agent/scripts/setup-linters-wizard.sh +379 -0
  136. package/.agent/scripts/setup-local-api-keys.sh +330 -0
  137. package/.agent/scripts/setup-mcp-integrations.sh +472 -0
  138. package/.agent/scripts/shared-constants.sh +246 -0
  139. package/.agent/scripts/site-crawler-helper.sh +1487 -0
  140. package/.agent/scripts/snyk-helper.sh +940 -0
  141. package/.agent/scripts/sonarcloud-autofix.sh +193 -0
  142. package/.agent/scripts/sonarcloud-cli.sh +191 -0
  143. package/.agent/scripts/sonarscanner-cli.sh +455 -0
  144. package/.agent/scripts/spaceship-helper.sh +747 -0
  145. package/.agent/scripts/stagehand-helper.sh +321 -0
  146. package/.agent/scripts/stagehand-python-helper.sh +321 -0
  147. package/.agent/scripts/stagehand-python-setup.sh +441 -0
  148. package/.agent/scripts/stagehand-setup.sh +439 -0
  149. package/.agent/scripts/system-cleanup.sh +340 -0
  150. package/.agent/scripts/terminal-title-helper.sh +388 -0
  151. package/.agent/scripts/terminal-title-setup.sh +549 -0
  152. package/.agent/scripts/test-stagehand-both-integration.sh +317 -0
  153. package/.agent/scripts/test-stagehand-integration.sh +309 -0
  154. package/.agent/scripts/test-stagehand-python-integration.sh +341 -0
  155. package/.agent/scripts/todo-ready.sh +263 -0
  156. package/.agent/scripts/tool-version-check.sh +362 -0
  157. package/.agent/scripts/toon-helper.sh +469 -0
  158. package/.agent/scripts/twilio-helper.sh +917 -0
  159. package/.agent/scripts/updown-helper.sh +279 -0
  160. package/.agent/scripts/validate-mcp-integrations.sh +250 -0
  161. package/.agent/scripts/validate-version-consistency.sh +131 -0
  162. package/.agent/scripts/vaultwarden-helper.sh +597 -0
  163. package/.agent/scripts/vercel-cli-helper.sh +816 -0
  164. package/.agent/scripts/verify-mirrors.sh +169 -0
  165. package/.agent/scripts/version-manager.sh +831 -0
  166. package/.agent/scripts/webhosting-helper.sh +471 -0
  167. package/.agent/scripts/webhosting-verify.sh +238 -0
  168. package/.agent/scripts/wordpress-mcp-helper.sh +508 -0
  169. package/.agent/scripts/worktree-helper.sh +595 -0
  170. package/.agent/scripts/worktree-sessions.sh +577 -0
  171. package/.agent/seo/dataforseo.md +215 -0
  172. package/.agent/seo/domain-research.md +532 -0
  173. package/.agent/seo/eeat-score.md +659 -0
  174. package/.agent/seo/google-search-console.md +366 -0
  175. package/.agent/seo/gsc-sitemaps.md +282 -0
  176. package/.agent/seo/keyword-research.md +521 -0
  177. package/.agent/seo/serper.md +278 -0
  178. package/.agent/seo/site-crawler.md +387 -0
  179. package/.agent/seo.md +236 -0
  180. package/.agent/services/accounting/quickfile.md +159 -0
  181. package/.agent/services/communications/telfon.md +470 -0
  182. package/.agent/services/communications/twilio.md +569 -0
  183. package/.agent/services/crm/fluentcrm.md +449 -0
  184. package/.agent/services/email/ses.md +399 -0
  185. package/.agent/services/hosting/101domains.md +378 -0
  186. package/.agent/services/hosting/closte.md +177 -0
  187. package/.agent/services/hosting/cloudflare.md +251 -0
  188. package/.agent/services/hosting/cloudron.md +478 -0
  189. package/.agent/services/hosting/dns-providers.md +335 -0
  190. package/.agent/services/hosting/domain-purchasing.md +344 -0
  191. package/.agent/services/hosting/hetzner.md +327 -0
  192. package/.agent/services/hosting/hostinger.md +287 -0
  193. package/.agent/services/hosting/localhost.md +419 -0
  194. package/.agent/services/hosting/spaceship.md +353 -0
  195. package/.agent/services/hosting/webhosting.md +330 -0
  196. package/.agent/social-media.md +69 -0
  197. package/.agent/templates/plans-template.md +114 -0
  198. package/.agent/templates/prd-template.md +129 -0
  199. package/.agent/templates/tasks-template.md +108 -0
  200. package/.agent/templates/todo-template.md +89 -0
  201. package/.agent/tools/ai-assistants/agno.md +471 -0
  202. package/.agent/tools/ai-assistants/capsolver.md +326 -0
  203. package/.agent/tools/ai-assistants/configuration.md +221 -0
  204. package/.agent/tools/ai-assistants/overview.md +209 -0
  205. package/.agent/tools/ai-assistants/status.md +171 -0
  206. package/.agent/tools/ai-assistants/windsurf.md +193 -0
  207. package/.agent/tools/ai-orchestration/autogen.md +406 -0
  208. package/.agent/tools/ai-orchestration/crewai.md +445 -0
  209. package/.agent/tools/ai-orchestration/langflow.md +405 -0
  210. package/.agent/tools/ai-orchestration/openprose.md +487 -0
  211. package/.agent/tools/ai-orchestration/overview.md +362 -0
  212. package/.agent/tools/ai-orchestration/packaging.md +647 -0
  213. package/.agent/tools/browser/agent-browser.md +464 -0
  214. package/.agent/tools/browser/browser-automation.md +400 -0
  215. package/.agent/tools/browser/chrome-devtools.md +282 -0
  216. package/.agent/tools/browser/crawl4ai-integration.md +422 -0
  217. package/.agent/tools/browser/crawl4ai-resources.md +277 -0
  218. package/.agent/tools/browser/crawl4ai-usage.md +416 -0
  219. package/.agent/tools/browser/crawl4ai.md +585 -0
  220. package/.agent/tools/browser/dev-browser.md +341 -0
  221. package/.agent/tools/browser/pagespeed.md +260 -0
  222. package/.agent/tools/browser/playwright.md +266 -0
  223. package/.agent/tools/browser/playwriter.md +310 -0
  224. package/.agent/tools/browser/stagehand-examples.md +456 -0
  225. package/.agent/tools/browser/stagehand-python.md +483 -0
  226. package/.agent/tools/browser/stagehand.md +421 -0
  227. package/.agent/tools/build-agent/agent-review.md +224 -0
  228. package/.agent/tools/build-agent/build-agent.md +784 -0
  229. package/.agent/tools/build-mcp/aidevops-plugin.md +476 -0
  230. package/.agent/tools/build-mcp/api-wrapper.md +445 -0
  231. package/.agent/tools/build-mcp/build-mcp.md +240 -0
  232. package/.agent/tools/build-mcp/deployment.md +401 -0
  233. package/.agent/tools/build-mcp/server-patterns.md +632 -0
  234. package/.agent/tools/build-mcp/transports.md +366 -0
  235. package/.agent/tools/code-review/auditing.md +383 -0
  236. package/.agent/tools/code-review/automation.md +219 -0
  237. package/.agent/tools/code-review/best-practices.md +203 -0
  238. package/.agent/tools/code-review/codacy.md +151 -0
  239. package/.agent/tools/code-review/code-simplifier.md +174 -0
  240. package/.agent/tools/code-review/code-standards.md +309 -0
  241. package/.agent/tools/code-review/coderabbit.md +101 -0
  242. package/.agent/tools/code-review/management.md +155 -0
  243. package/.agent/tools/code-review/qlty.md +248 -0
  244. package/.agent/tools/code-review/secretlint.md +565 -0
  245. package/.agent/tools/code-review/setup.md +250 -0
  246. package/.agent/tools/code-review/snyk.md +563 -0
  247. package/.agent/tools/code-review/tools.md +230 -0
  248. package/.agent/tools/content/summarize.md +353 -0
  249. package/.agent/tools/context/augment-context-engine.md +468 -0
  250. package/.agent/tools/context/context-builder-agent.md +76 -0
  251. package/.agent/tools/context/context-builder.md +375 -0
  252. package/.agent/tools/context/context7.md +371 -0
  253. package/.agent/tools/context/dspy.md +302 -0
  254. package/.agent/tools/context/dspyground.md +374 -0
  255. package/.agent/tools/context/llm-tldr.md +219 -0
  256. package/.agent/tools/context/osgrep.md +488 -0
  257. package/.agent/tools/context/prompt-optimization.md +338 -0
  258. package/.agent/tools/context/toon.md +292 -0
  259. package/.agent/tools/conversion/pandoc.md +304 -0
  260. package/.agent/tools/credentials/api-key-management.md +154 -0
  261. package/.agent/tools/credentials/api-key-setup.md +224 -0
  262. package/.agent/tools/credentials/environment-variables.md +180 -0
  263. package/.agent/tools/credentials/vaultwarden.md +382 -0
  264. package/.agent/tools/data-extraction/outscraper.md +974 -0
  265. package/.agent/tools/deployment/coolify-cli.md +388 -0
  266. package/.agent/tools/deployment/coolify-setup.md +353 -0
  267. package/.agent/tools/deployment/coolify.md +345 -0
  268. package/.agent/tools/deployment/vercel.md +390 -0
  269. package/.agent/tools/git/authentication.md +132 -0
  270. package/.agent/tools/git/gitea-cli.md +193 -0
  271. package/.agent/tools/git/github-actions.md +207 -0
  272. package/.agent/tools/git/github-cli.md +223 -0
  273. package/.agent/tools/git/gitlab-cli.md +190 -0
  274. package/.agent/tools/git/opencode-github-security.md +350 -0
  275. package/.agent/tools/git/opencode-github.md +328 -0
  276. package/.agent/tools/git/opencode-gitlab.md +252 -0
  277. package/.agent/tools/git/security.md +196 -0
  278. package/.agent/tools/git.md +207 -0
  279. package/.agent/tools/opencode/oh-my-opencode.md +375 -0
  280. package/.agent/tools/opencode/opencode-anthropic-auth.md +446 -0
  281. package/.agent/tools/opencode/opencode.md +651 -0
  282. package/.agent/tools/social-media/bird.md +437 -0
  283. package/.agent/tools/task-management/beads.md +336 -0
  284. package/.agent/tools/terminal/terminal-title.md +251 -0
  285. package/.agent/tools/ui/shadcn.md +196 -0
  286. package/.agent/tools/ui/ui-skills.md +115 -0
  287. package/.agent/tools/wordpress/localwp.md +311 -0
  288. package/.agent/tools/wordpress/mainwp.md +391 -0
  289. package/.agent/tools/wordpress/scf.md +527 -0
  290. package/.agent/tools/wordpress/wp-admin.md +729 -0
  291. package/.agent/tools/wordpress/wp-dev.md +940 -0
  292. package/.agent/tools/wordpress/wp-preferred.md +398 -0
  293. package/.agent/tools/wordpress.md +95 -0
  294. package/.agent/workflows/branch/bugfix.md +63 -0
  295. package/.agent/workflows/branch/chore.md +95 -0
  296. package/.agent/workflows/branch/experiment.md +115 -0
  297. package/.agent/workflows/branch/feature.md +59 -0
  298. package/.agent/workflows/branch/hotfix.md +98 -0
  299. package/.agent/workflows/branch/refactor.md +92 -0
  300. package/.agent/workflows/branch/release.md +96 -0
  301. package/.agent/workflows/branch.md +347 -0
  302. package/.agent/workflows/bug-fixing.md +267 -0
  303. package/.agent/workflows/changelog.md +129 -0
  304. package/.agent/workflows/code-audit-remote.md +279 -0
  305. package/.agent/workflows/conversation-starter.md +69 -0
  306. package/.agent/workflows/error-feedback.md +578 -0
  307. package/.agent/workflows/feature-development.md +355 -0
  308. package/.agent/workflows/git-workflow.md +702 -0
  309. package/.agent/workflows/multi-repo-workspace.md +268 -0
  310. package/.agent/workflows/plans.md +709 -0
  311. package/.agent/workflows/postflight.md +604 -0
  312. package/.agent/workflows/pr.md +571 -0
  313. package/.agent/workflows/preflight.md +278 -0
  314. package/.agent/workflows/ralph-loop.md +773 -0
  315. package/.agent/workflows/release.md +498 -0
  316. package/.agent/workflows/session-manager.md +254 -0
  317. package/.agent/workflows/session-review.md +311 -0
  318. package/.agent/workflows/sql-migrations.md +631 -0
  319. package/.agent/workflows/version-bump.md +283 -0
  320. package/.agent/workflows/wiki-update.md +333 -0
  321. package/.agent/workflows/worktree.md +477 -0
  322. package/LICENSE +21 -0
  323. package/README.md +1446 -0
  324. package/VERSION +1 -0
  325. package/aidevops.sh +1746 -0
  326. package/bin/aidevops +21 -0
  327. package/package.json +75 -0
  328. package/scripts/npm-postinstall.js +60 -0
  329. package/setup.sh +2366 -0
@@ -0,0 +1,1078 @@
1
+ #!/bin/bash
2
+ # shellcheck disable=SC2034,SC2155,SC2317,SC2329,SC2016,SC2181,SC1091,SC2154,SC2015,SC2086,SC2129,SC2030,SC2031,SC2119,SC2120,SC2001,SC2162,SC2088,SC2089,SC2090,SC2029,SC2006,SC2153
3
+
4
+ # Crawl4AI Helper Script
5
+ # AI-powered web crawler and scraper for LLM-friendly data extraction
6
+ #
7
+ # This script provides comprehensive management for Crawl4AI including:
8
+ # - Docker deployment with monitoring dashboard
9
+ # - Python package installation and setup
10
+ # - MCP server integration for AI assistants
11
+ # - Web scraping and data extraction operations
12
+ # - CapSolver integration for CAPTCHA solving and anti-bot bypass
13
+ #
14
+ # Usage: ./crawl4ai-helper.sh [command] [options]
15
+ # Commands:
16
+ # install - Install Crawl4AI Python package
17
+ # docker-setup - Setup Docker deployment with monitoring
18
+ # docker-start - Start Docker container
19
+ # docker-stop - Stop Docker container
20
+ # mcp-setup - Setup MCP server integration
21
+ # capsolver-setup - Setup CapSolver integration for CAPTCHA solving
22
+ # crawl - Perform web crawling operation
23
+ # extract - Extract structured data from URL
24
+ # captcha-crawl - Crawl with CAPTCHA solving capabilities
25
+ # status - Check Crawl4AI service status
26
+ # help - Show this help message
27
+ #
28
+ # Author: AI DevOps Framework
29
+ # Version: 1.0.0
30
+ # License: MIT
31
+
32
+ # Colors for output
33
+ readonly GREEN='\033[0;32m'
34
+ readonly BLUE='\033[0;34m'
35
+ readonly YELLOW='\033[1;33m'
36
+ readonly RED='\033[0;31m'
37
+ readonly PURPLE='\033[0;35m'
38
+ readonly NC='\033[0m' # No Color
39
+
40
+ # Common constants
41
+ readonly ERROR_UNKNOWN_COMMAND="Unknown command:"
42
+ # Common constants
43
+ readonly CONTENT_TYPE_JSON=$CONTENT_TYPE_JSON
44
+
45
+ # Constants
46
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" || exit
47
+ readonly SCRIPT_DIR
48
+ readonly CONFIG_DIR="$SCRIPT_DIR/../configs"
49
+ readonly DOCKER_IMAGE="unclecode/crawl4ai:latest"
50
+ readonly DOCKER_CONTAINER="crawl4ai"
51
+ readonly DOCKER_PORT="11235"
52
+ readonly MCP_PORT="3009"
53
+ readonly HELP_SHOW_MESSAGE="Show this help message"
54
+
55
+ # Print functions
56
+ print_success() {
57
+ local message="$1"
58
+ echo -e "${GREEN}✅ $message${NC}"
59
+ return 0
60
+ }
61
+
62
+ print_info() {
63
+ local message="$1"
64
+ echo -e "${BLUE}ℹ️ $message${NC}"
65
+ return 0
66
+ }
67
+
68
+ print_warning() {
69
+ local message="$1"
70
+ echo -e "${YELLOW}⚠️ $message${NC}"
71
+ return 0
72
+ }
73
+
74
+ print_error() {
75
+ local message="$1"
76
+ echo -e "${RED}❌ $message${NC}"
77
+ return 0
78
+ }
79
+
80
+ print_header() {
81
+ local message="$1"
82
+ echo -e "${PURPLE}🚀 $message${NC}"
83
+ return 0
84
+ }
85
+
86
+ # Check if Docker is available
87
+ check_docker() {
88
+ if ! command -v docker &> /dev/null; then
89
+ print_error "Docker is not installed. Please install Docker first."
90
+ return 1
91
+ fi
92
+
93
+ if ! docker info &> /dev/null; then
94
+ print_error "Docker daemon is not running. Please start Docker."
95
+ return 1
96
+ fi
97
+
98
+ return 0
99
+ }
100
+
101
+ # Check if Python is available
102
+ check_python() {
103
+ if ! command -v python3 &> /dev/null; then
104
+ print_error "Python 3 is not installed. Please install Python 3.8+ first."
105
+ return 1
106
+ fi
107
+
108
+ local python_version
109
+ python_version=$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
110
+
111
+ if [[ $(echo "$python_version < 3.8" | bc -l) -eq 1 ]]; then
112
+ print_error "Python 3.8+ is required. Current version: $python_version"
113
+ return 1
114
+ fi
115
+
116
+ return 0
117
+ }
118
+
119
+ # Install Crawl4AI Python package
120
+ install_crawl4ai() {
121
+ print_header "Installing Crawl4AI Python Package"
122
+
123
+ if ! check_python; then
124
+ return 1
125
+ fi
126
+
127
+ print_info "Installing Crawl4AI with pip..."
128
+ if pip3 install -U crawl4ai; then
129
+ print_success "Crawl4AI installed successfully"
130
+ else
131
+ print_error "Failed to install Crawl4AI"
132
+ return 1
133
+ fi
134
+
135
+ print_info "Running post-installation setup..."
136
+ if crawl4ai-setup; then
137
+ print_success "Crawl4AI setup completed"
138
+ else
139
+ print_warning "Setup completed with warnings. Run 'crawl4ai-doctor' to check."
140
+ fi
141
+
142
+ print_info "Verifying installation..."
143
+ if crawl4ai-doctor; then
144
+ print_success "Crawl4AI installation verified"
145
+ else
146
+ print_warning "Installation verification completed with warnings"
147
+ fi
148
+
149
+ return 0
150
+ }
151
+
152
+ # Setup Docker deployment
153
+ docker_setup() {
154
+ print_header "Setting up Crawl4AI Docker Deployment"
155
+
156
+ if ! check_docker; then
157
+ return 1
158
+ fi
159
+
160
+ print_info "Pulling Crawl4AI Docker image..."
161
+ if docker pull "$DOCKER_IMAGE"; then
162
+ print_success "Docker image pulled successfully"
163
+ else
164
+ print_error "Failed to pull Docker image"
165
+ return 1
166
+ fi
167
+
168
+ # Create environment file if it doesn't exist
169
+ local env_file="$CONFIG_DIR/.crawl4ai.env"
170
+ if [[ ! -f "$env_file" ]]; then
171
+ print_info "Creating environment configuration..."
172
+ cat > "$env_file" << 'EOF'
173
+ # Crawl4AI Environment Configuration
174
+ # Add your API keys here for LLM integration
175
+
176
+ # OpenAI
177
+ # OPENAI_API_KEY=sk-your-key
178
+
179
+ # Anthropic
180
+ # ANTHROPIC_API_KEY=your-anthropic-key
181
+
182
+ # Other providers
183
+ # DEEPSEEK_API_KEY=your-deepseek-key
184
+ # GROQ_API_KEY=your-groq-key
185
+ # TOGETHER_API_KEY=your-together-key
186
+ # MISTRAL_API_KEY=your-mistral-key
187
+ # GEMINI_API_TOKEN=your-gemini-token
188
+
189
+ # Global LLM settings
190
+ # LLM_PROVIDER=openai/gpt-4o-mini
191
+ # LLM_TEMPERATURE=0.7
192
+ EOF
193
+ print_success "Environment file created at $env_file"
194
+ print_warning "Please edit $env_file to add your API keys"
195
+ fi
196
+
197
+ return 0
198
+ }
199
+
200
+ # Start Docker container
201
+ docker_start() {
202
+ print_header "Starting Crawl4AI Docker Container"
203
+
204
+ if ! check_docker; then
205
+ return 1
206
+ fi
207
+
208
+ # Stop existing container if running
209
+ if docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
210
+ print_info "Stopping existing container..."
211
+ docker stop "$DOCKER_CONTAINER" > /dev/null 2>&1
212
+ docker rm "$DOCKER_CONTAINER" > /dev/null 2>&1
213
+ fi
214
+
215
+ local env_file="$CONFIG_DIR/.crawl4ai.env"
216
+ local docker_args=(
217
+ "-d"
218
+ "-p" "$DOCKER_PORT:$DOCKER_PORT"
219
+ "--name" "$DOCKER_CONTAINER"
220
+ "--shm-size=1g"
221
+ )
222
+
223
+ if [[ -f "$env_file" ]]; then
224
+ docker_args+=("--env-file" "$env_file")
225
+ fi
226
+
227
+ docker_args+=("$DOCKER_IMAGE")
228
+
229
+ print_info "Starting Docker container..."
230
+ if docker run "${docker_args[@]}"; then
231
+ print_success "Crawl4AI container started successfully"
232
+ print_info "Dashboard: http://localhost:$DOCKER_PORT/dashboard"
233
+ print_info "Playground: http://localhost:$DOCKER_PORT/playground"
234
+ print_info "API: http://localhost:$DOCKER_PORT"
235
+ else
236
+ print_error "Failed to start Docker container"
237
+ return 1
238
+ fi
239
+
240
+ return 0
241
+ }
242
+
243
+ # Stop Docker container
244
+ docker_stop() {
245
+ print_header "Stopping Crawl4AI Docker Container"
246
+
247
+ if ! check_docker; then
248
+ return 1
249
+ fi
250
+
251
+ if docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
252
+ print_info "Stopping container..."
253
+ if docker stop "$DOCKER_CONTAINER" && docker rm "$DOCKER_CONTAINER"; then
254
+ print_success "Container stopped and removed"
255
+ else
256
+ print_error "Failed to stop container"
257
+ return 1
258
+ fi
259
+ else
260
+ print_warning "Container is not running"
261
+ fi
262
+
263
+ return 0
264
+ }
265
+
266
+ # Setup MCP server integration
267
+ mcp_setup() {
268
+ print_header "Setting up Crawl4AI MCP Server Integration"
269
+
270
+ local mcp_config="$CONFIG_DIR/crawl4ai-mcp-config.json"
271
+
272
+ print_info "Creating MCP server configuration..."
273
+ cat > "$mcp_config" << EOF
274
+ {
275
+ "provider": "crawl4ai",
276
+ "description": "Crawl4AI MCP server for AI-powered web crawling and data extraction",
277
+ "mcp_server": {
278
+ "name": "crawl4ai",
279
+ "command": "npx",
280
+ "args": ["crawl4ai-mcp-server@latest"],
281
+ "port": $MCP_PORT,
282
+ "transport": "stdio",
283
+ "description": "Crawl4AI MCP server for web scraping and LLM-friendly data extraction",
284
+ "env": {
285
+ "CRAWL4AI_API_URL": "http://localhost:$DOCKER_PORT",
286
+ "CRAWL4AI_TIMEOUT": "60"
287
+ }
288
+ },
289
+ "capabilities": [
290
+ "web_crawling",
291
+ "markdown_generation",
292
+ "structured_extraction",
293
+ "llm_extraction",
294
+ "screenshot_capture",
295
+ "pdf_generation",
296
+ "javascript_execution"
297
+ ]
298
+ return 0
299
+ }
300
+ EOF
301
+
302
+ print_success "MCP configuration created at $mcp_config"
303
+ print_info "To use with Claude Desktop, add this to your MCP settings:"
304
+ print_info " \"crawl4ai\": {"
305
+ print_info " \"command\": \"npx\","
306
+ print_info " \"args\": [\"crawl4ai-mcp-server@latest\"]"
307
+ print_info " }"
308
+
309
+ return 0
310
+ }
311
+
312
+ # Setup CapSolver integration for CAPTCHA solving
313
+ capsolver_setup() {
314
+ print_header "Setting up CapSolver Integration for CAPTCHA Solving"
315
+
316
+ local capsolver_config="$CONFIG_DIR/capsolver-config.json"
317
+
318
+ print_info "Creating CapSolver configuration..."
319
+ cat > "$capsolver_config" << EOF
320
+ {
321
+ "provider": "capsolver",
322
+ "description": "CapSolver configuration for automated CAPTCHA solving with Crawl4AI",
323
+ "service_type": "captcha_solver",
324
+ "version": "latest",
325
+ "api": {
326
+ "base_url": "https://api.capsolver.com",
327
+ "endpoints": {
328
+ "create_task": "/createTask",
329
+ "get_task_result": "/getTaskResult",
330
+ "get_balance": "/getBalance"
331
+ },
332
+ "authentication": {
333
+ "type": "api_key",
334
+ "header": "clientKey"
335
+ }
336
+ },
337
+ "supported_captcha_types": {
338
+ "recaptcha_v2": {
339
+ "type": "ReCaptchaV2TaskProxyLess",
340
+ "description": "reCAPTCHA v2 checkbox solving",
341
+ "response_field": "gRecaptchaResponse",
342
+ "injection_target": "g-recaptcha-response",
343
+ "pricing": "$0.5/1000 requests",
344
+ "avg_solve_time": "< 9 seconds"
345
+ },
346
+ "recaptcha_v3": {
347
+ "type": "ReCaptchaV3TaskProxyLess",
348
+ "description": "reCAPTCHA v3 invisible solving with score ≥0.7",
349
+ "response_field": "gRecaptchaResponse",
350
+ "injection_method": "fetch_hook",
351
+ "pricing": "$0.5/1000 requests",
352
+ "avg_solve_time": "< 3 seconds"
353
+ },
354
+ "recaptcha_v2_enterprise": {
355
+ "type": "ReCaptchaV2EnterpriseTaskProxyLess",
356
+ "description": "reCAPTCHA v2 Enterprise solving",
357
+ "response_field": "gRecaptchaResponse",
358
+ "pricing": "$_arg1/1000 requests",
359
+ "avg_solve_time": "< 9 seconds"
360
+ },
361
+ "recaptcha_v3_enterprise": {
362
+ "type": "ReCaptchaV3EnterpriseTaskProxyLess",
363
+ "description": "reCAPTCHA v3 Enterprise solving with score ≥0.9",
364
+ "response_field": "gRecaptchaResponse",
365
+ "pricing": "$_arg3/1000 requests",
366
+ "avg_solve_time": "< 3 seconds"
367
+ },
368
+ "cloudflare_turnstile": {
369
+ "type": "AntiTurnstileTaskProxyLess",
370
+ "description": "Cloudflare Turnstile CAPTCHA solving",
371
+ "response_field": "token",
372
+ "injection_target": "cf-turnstile-response",
373
+ "pricing": "$_arg3/1000 requests",
374
+ "avg_solve_time": "< 3 seconds"
375
+ },
376
+ "cloudflare_challenge": {
377
+ "type": "AntiCloudflareTask",
378
+ "description": "Cloudflare Challenge (5s shield) solving",
379
+ "response_field": "cookies",
380
+ "requires_proxy": true,
381
+ "pricing": "Contact for pricing",
382
+ "avg_solve_time": "< 10 seconds"
383
+ },
384
+ "aws_waf": {
385
+ "type": "AntiAwsWafTaskProxyLess",
386
+ "description": "AWS WAF CAPTCHA solving",
387
+ "response_field": "cookie",
388
+ "injection_method": "cookie_set",
389
+ "pricing": "Contact for pricing",
390
+ "avg_solve_time": "< 5 seconds"
391
+ },
392
+ "geetest_v3": {
393
+ "type": "GeeTestTaskProxyLess",
394
+ "description": "GeeTest v3 CAPTCHA solving",
395
+ "response_field": "challenge",
396
+ "pricing": "$0.5/1000 requests",
397
+ "avg_solve_time": "< 5 seconds"
398
+ },
399
+ "geetest_v4": {
400
+ "type": "GeeTestV4TaskProxyLess",
401
+ "description": "GeeTest v4 CAPTCHA solving",
402
+ "response_field": "captcha_output",
403
+ "pricing": "$0.5/1000 requests",
404
+ "avg_solve_time": "< 5 seconds"
405
+ },
406
+ "image_to_text": {
407
+ "type": "ImageToTextTask",
408
+ "description": "OCR image CAPTCHA solving",
409
+ "response_field": "text",
410
+ "pricing": "$0.4/1000 requests",
411
+ "avg_solve_time": "< 1 second"
412
+ }
413
+ },
414
+ "integration_methods": {
415
+ "api_integration": {
416
+ "description": "Direct API integration with Python capsolver SDK",
417
+ "advantages": ["More flexible", "Precise control", "Better error handling"],
418
+ "recommended": true
419
+ },
420
+ "browser_extension": {
421
+ "description": "CapSolver browser extension integration",
422
+ "advantages": ["Easy setup", "Automatic detection", "No coding required"],
423
+ "extension_url": "https://chrome.google.com/webstore/detail/capsolver/pgojnojmmhpofjgdmaebadhbocahppod"
424
+ }
425
+ },
426
+ "python_sdk": {
427
+ "installation": "pip install capsolver",
428
+ "import": "import capsolver",
429
+ "usage": "capsolver.api_key = 'CAP-xxxxxxxxxxxxxxxxxxxxx'"
430
+ },
431
+ "pricing": {
432
+ "pay_per_usage": "Standard pricing per request",
433
+ "package_discounts": "Up to 60% savings with packages",
434
+ "developer_plan": "Contact for better pricing",
435
+ "balance_check": "GET /getBalance endpoint"
436
+ }
437
+ return 0
438
+ }
439
+ EOF
440
+
441
+ print_success "CapSolver configuration created at $capsolver_config"
442
+
443
+ # Create Python example script
444
+ local example_script="$CONFIG_DIR/capsolver-example.py"
445
+ cat > "$example_script" << 'EOF'
446
+ #!/usr/bin/env python3
447
+ """
448
+ CapSolver + Crawl4AI Integration Example
449
+ Demonstrates CAPTCHA solving with various types
450
+ """
451
+
452
+ import asyncio
453
+ import capsolver
454
+ from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
455
+
456
+ # TODO: Set your CapSolver API key
457
+ # Get your API key from: https://dashboard.capsolver.com/dashboard/overview
458
+ CAPSOLVER_API_KEY = "CAP-xxxxxxxxxxxxxxxxxxxxx"
459
+ capsolver.api_key = CAPSOLVER_API_KEY
460
+
461
+ async def solve_recaptcha_v2_example():
462
+ """Example: Solving reCAPTCHA v2 checkbox"""
463
+ site_url = "https://recaptcha-demo.appspot.com/recaptcha-v2-checkbox.php"
464
+ site_key = "6LfW6wATAAAAAHLqO2pb8bDBahxlMxNdo9g947u9"
465
+
466
+ browser_config = BrowserConfig(
467
+ verbose=True,
468
+ headless=False,
469
+ use_persistent_context=True,
470
+ )
471
+
472
+ async with AsyncWebCrawler(config=browser_config) as crawler:
473
+ # Initial page load
474
+ await crawler.arun(
475
+ url=site_url,
476
+ cache_mode=CacheMode.BYPASS,
477
+ session_id="captcha_session"
478
+ )
479
+
480
+ # Solve CAPTCHA using CapSolver
481
+ print("🔄 Solving reCAPTCHA v2...")
482
+ solution = capsolver.solve({
483
+ "type": "ReCaptchaV2TaskProxyLess",
484
+ "websiteURL": site_url,
485
+ "websiteKey": site_key,
486
+ })
487
+ token = solution["gRecaptchaResponse"]
488
+ print(f"✅ Token obtained: {token[:50]}...")
489
+
490
+ # Inject token and submit
491
+ js_code = f"""
492
+ const textarea = document.getElementById('g-recaptcha-response');
493
+ if (textarea) {{
494
+ textarea.value = '{token}';
495
+ document.querySelector('button.form-field[type="submit"]').click();
496
+ }}
497
+ """
498
+
499
+ wait_condition = """() => {
500
+ const items = document.querySelectorAll('h2');
501
+ return items.length > 1;
502
+ }"""
503
+
504
+ run_config = CrawlerRunConfig(
505
+ cache_mode=CacheMode.BYPASS,
506
+ session_id="captcha_session",
507
+ js_code=js_code,
508
+ js_only=True,
509
+ wait_for=f"js:{wait_condition}"
510
+ )
511
+
512
+ result = await crawler.arun(url=site_url, config=run_config)
513
+ print("🎉 CAPTCHA solved successfully!")
514
+ return result.markdown
515
+
516
+ async def solve_cloudflare_turnstile_example():
517
+ """Example: Solving Cloudflare Turnstile"""
518
+ site_url = "https://clifford.io/demo/cloudflare-turnstile"
519
+ site_key = "0x4AAAAAAAGlwMzq_9z6S9Mh"
520
+
521
+ browser_config = BrowserConfig(
522
+ verbose=True,
523
+ headless=False,
524
+ use_persistent_context=True,
525
+ )
526
+
527
+ async with AsyncWebCrawler(config=browser_config) as crawler:
528
+ # Initial page load
529
+ await crawler.arun(
530
+ url=site_url,
531
+ cache_mode=CacheMode.BYPASS,
532
+ session_id="turnstile_session"
533
+ )
534
+
535
+ # Solve Turnstile using CapSolver
536
+ print("🔄 Solving Cloudflare Turnstile...")
537
+ solution = capsolver.solve({
538
+ "type": "AntiTurnstileTaskProxyLess",
539
+ "websiteURL": site_url,
540
+ "websiteKey": site_key,
541
+ })
542
+ token = solution["token"]
543
+ print(f"✅ Token obtained: {token[:50]}...")
544
+
545
+ # Inject token and submit
546
+ js_code = f"""
547
+ document.querySelector('input[name="cf-turnstile-response"]').value = '{token}';
548
+ document.querySelector('button[type="submit"]').click();
549
+ """
550
+
551
+ wait_condition = """() => {
552
+ const items = document.querySelectorAll('h1');
553
+ return items.length === 0;
554
+ }"""
555
+
556
+ run_config = CrawlerRunConfig(
557
+ cache_mode=CacheMode.BYPASS,
558
+ session_id="turnstile_session",
559
+ js_code=js_code,
560
+ js_only=True,
561
+ wait_for=f"js:{wait_condition}"
562
+ )
563
+
564
+ result = await crawler.arun(url=site_url, config=run_config)
565
+ print("🎉 Turnstile solved successfully!")
566
+ return result.markdown
567
+
568
+ async def main():
569
+ """Main function to run examples"""
570
+ print("🚀 CapSolver + Crawl4AI Integration Examples")
571
+ print("=" * 50)
572
+
573
+ try:
574
+ # Example 1: reCAPTCHA v2
575
+ print("\n📋 Example 1: reCAPTCHA v2")
576
+ result1 = await solve_recaptcha_v2_example()
577
+
578
+ # Example 2: Cloudflare Turnstile
579
+ print("\n📋 Example 2: Cloudflare Turnstile")
580
+ result2 = await solve_cloudflare_turnstile_example()
581
+
582
+ print("\n✅ All examples completed successfully!")
583
+
584
+ except Exception as e:
585
+ print(f"❌ Error: {e}")
586
+ print("💡 Make sure to set your CapSolver API key!")
587
+
588
+ if __name__ == "__main__":
589
+ asyncio.run(main())
590
+ EOF
591
+
592
+ chmod +x "$example_script"
593
+ print_success "Python example script created at $example_script"
594
+
595
+ print_info "CapSolver Integration Setup Complete!"
596
+ print_info ""
597
+ print_info "📋 Next Steps:"
598
+ print_info "1. Get API key: https://dashboard.capsolver.com/dashboard/overview"
599
+ print_info "2. Install Python SDK: pip install capsolver"
600
+ print_info "3. Set API key in example script: $example_script"
601
+ print_info "4. Run example: python3 $example_script"
602
+ print_info ""
603
+ print_info "📚 Supported CAPTCHA Types:"
604
+ print_info "• reCAPTCHA v2/v3 (including Enterprise)"
605
+ print_info "• Cloudflare Turnstile & Challenge"
606
+ print_info "• AWS WAF"
607
+ print_info "• GeeTest v3/v4"
608
+ print_info "• Image-to-Text OCR"
609
+ print_info ""
610
+ print_info "💰 Pricing: Starting from $0.4/1000 requests"
611
+ print_info "🔗 Documentation: https://docs.capsolver.com/"
612
+
613
+ return 0
614
+ }
615
+
616
+ # Perform web crawling operation
617
+ crawl_url() {
618
+ local url="$1"
619
+ local output_file="$3"
620
+
621
+ if [[ -z "$url" ]]; then
622
+ print_error "URL is required"
623
+ return 1
624
+ fi
625
+
626
+ print_header "Crawling URL: $url"
627
+
628
+ # Check if Docker container is running
629
+ if ! docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
630
+ print_warning "Docker container is not running. Starting it..."
631
+ if ! docker_start; then
632
+ return 1
633
+ fi
634
+ sleep 5 # Wait for container to be ready
635
+ fi
636
+
637
+ local api_url="http://localhost:$DOCKER_PORT/crawl"
638
+ local payload
639
+ payload=$(cat << EOF
640
+ {
641
+ "urls": ["$url"],
642
+ "crawler_config": {
643
+ "type": "CrawlerRunConfig",
644
+ "params": {
645
+ "cache_mode": "bypass"
646
+ }
647
+ }
648
+ return 0
649
+ }
650
+ EOF
651
+ )
652
+
653
+ print_info "Sending crawl request..."
654
+ local response
655
+ if response=$(curl -s -X POST "$api_url" \
656
+ -H $CONTENT_TYPE_JSON \
657
+ -d "$payload"); then
658
+
659
+ if [[ -n "$output_file" ]]; then
660
+ echo "$response" > "$output_file"
661
+ print_success "Results saved to $output_file"
662
+ else
663
+ echo "$response" | jq '.'
664
+ fi
665
+
666
+ print_success "Crawl completed successfully"
667
+ else
668
+ print_error "Failed to crawl URL"
669
+ return 1
670
+ fi
671
+
672
+ return 0
673
+ }
674
+
675
+ # Extract structured data
676
+ extract_structured() {
677
+ local url="$1"
678
+ local schema="$2"
679
+ local output_file="$3"
680
+
681
+ if [[ -z "$url" || -z "$schema" ]]; then
682
+ print_error "URL and schema are required"
683
+ return 1
684
+ fi
685
+
686
+ print_header "Extracting structured data from: $url"
687
+
688
+ # Check if Docker container is running
689
+ if ! docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
690
+ print_warning "Docker container is not running. Starting it..."
691
+ if ! docker_start; then
692
+ return 1
693
+ fi
694
+ sleep 5
695
+ fi
696
+
697
+ local api_url="http://localhost:$DOCKER_PORT/crawl"
698
+ local payload
699
+ payload=$(cat << EOF
700
+ {
701
+ "urls": ["$url"],
702
+ "crawler_config": {
703
+ "type": "CrawlerRunConfig",
704
+ "params": {
705
+ "extraction_strategy": {
706
+ "type": "JsonCssExtractionStrategy",
707
+ "params": {
708
+ "schema": {
709
+ "type": "dict",
710
+ "value": $schema
711
+ }
712
+ }
713
+ },
714
+ "cache_mode": "bypass"
715
+ }
716
+ }
717
+ return 0
718
+ }
719
+ EOF
720
+ )
721
+
722
+ print_info "Sending extraction request..."
723
+ local response
724
+ if response=$(curl -s -X POST "$api_url" \
725
+ -H $CONTENT_TYPE_JSON \
726
+ -d "$payload"); then
727
+
728
+ if [[ -n "$output_file" ]]; then
729
+ echo "$response" > "$output_file"
730
+ print_success "Results saved to $output_file"
731
+ else
732
+ echo "$response" | jq '.results[0].extracted_content'
733
+ fi
734
+
735
+ print_success "Extraction completed successfully"
736
+ else
737
+ print_error "Failed to extract data"
738
+ return 1
739
+ fi
740
+
741
+ return 0
742
+ }
743
+
744
+ # Crawl with CAPTCHA solving capabilities
745
+ captcha_crawl() {
746
+ local url="$1"
747
+ local captcha_type="$2"
748
+ local site_key="$3"
749
+ local output_file="$4"
750
+
751
+ if [[ -z "$url" || -z "$captcha_type" ]]; then
752
+ print_error "URL and CAPTCHA type are required"
753
+ print_info "Usage: captcha-crawl <url> <captcha_type> [site_key] [output_file]"
754
+ print_info "CAPTCHA types: recaptcha_v2, recaptcha_v3, turnstile, aws_waf"
755
+ return 1
756
+ fi
757
+
758
+ print_header "Crawling with CAPTCHA Solving: $url"
759
+ print_info "CAPTCHA Type: $captcha_type"
760
+
761
+ # Check if Docker container is running
762
+ if ! docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
763
+ print_warning "Docker container is not running. Starting it..."
764
+ if ! docker_start; then
765
+ return 1
766
+ fi
767
+ sleep 5
768
+ fi
769
+
770
+ # Create Python script for CAPTCHA crawling
771
+ local temp_script="/tmp/captcha_crawl_$$.py"
772
+ cat > "$temp_script" << EOF
773
+ #!/usr/bin/env python3
774
+ import asyncio
775
+ import capsolver
776
+ import os
777
+ from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
778
+
779
+ # Get CapSolver API key from environment
780
+ api_key = os.getenv('CAPSOLVER_API_KEY')
781
+ if not api_key:
782
+ print("❌ Error: CAPSOLVER_API_KEY environment variable not set")
783
+ print("💡 Set it with: export CAPSOLVER_API_KEY='CAP-xxxxxxxxxxxxxxxxxxxxx'")
784
+ exit(1)
785
+
786
+ capsolver.api_key = api_key
787
+
788
+ async def crawl_with_captcha():
789
+ url = "$url"
790
+ captcha_type = "$captcha_type"
791
+ site_key = "$site_key"
792
+
793
+ browser_config = BrowserConfig(
794
+ verbose=True,
795
+ headless=False,
796
+ use_persistent_context=True,
797
+ )
798
+
799
+ async with AsyncWebCrawler(config=browser_config) as crawler:
800
+ # Initial page load
801
+ print(f"🔄 Loading page: {url}")
802
+ await crawler.arun(
803
+ url=url,
804
+ cache_mode=CacheMode.BYPASS,
805
+ session_id="captcha_crawl_session"
806
+ )
807
+
808
+ # Solve CAPTCHA based on type
809
+ if captcha_type == "recaptcha_v2":
810
+ if not site_key:
811
+ print("❌ Error: site_key required for reCAPTCHA v2")
812
+ return
813
+
814
+ print("🔄 Solving reCAPTCHA v2...")
815
+ solution = capsolver.solve({
816
+ "type": "ReCaptchaV2TaskProxyLess",
817
+ "websiteURL": url,
818
+ "websiteKey": site_key,
819
+ })
820
+ token = solution["gRecaptchaResponse"]
821
+
822
+ js_code = f'''
823
+ const textarea = document.getElementById('g-recaptcha-response');
824
+ if (textarea) {{
825
+ textarea.value = '{token}';
826
+ console.log('✅ reCAPTCHA v2 token injected');
827
+ }}
828
+ '''
829
+
830
+ elif captcha_type == "recaptcha_v3":
831
+ if not site_key:
832
+ print("❌ Error: site_key required for reCAPTCHA v3")
833
+ return
834
+
835
+ print("🔄 Solving reCAPTCHA v3...")
836
+ solution = capsolver.solve({
837
+ "type": "ReCaptchaV3TaskProxyLess",
838
+ "websiteURL": url,
839
+ "websiteKey": site_key,
840
+ "pageAction": "submit",
841
+ })
842
+ token = solution["gRecaptchaResponse"]
843
+
844
+ js_code = f'''
845
+ const originalFetch = window.fetch;
846
+ window.fetch = function(...args) {{
847
+ if (typeof args[0] === 'string' && args[0].includes('recaptcha')) {{
848
+ console.log('🔄 Hooking reCAPTCHA v3 request');
849
+ // Replace token in request
850
+ }}
851
+ return originalFetch.apply(this, args);
852
+ }};
853
+ console.log('✅ reCAPTCHA v3 hook installed');
854
+ '''
855
+
856
+ elif captcha_type == "turnstile":
857
+ if not site_key:
858
+ print("❌ Error: site_key required for Cloudflare Turnstile")
859
+ return
860
+
861
+ print("🔄 Solving Cloudflare Turnstile...")
862
+ solution = capsolver.solve({
863
+ "type": "AntiTurnstileTaskProxyLess",
864
+ "websiteURL": url,
865
+ "websiteKey": site_key,
866
+ })
867
+ token = solution["token"]
868
+
869
+ js_code = f'''
870
+ const input = document.querySelector('input[name="cf-turnstile-response"]');
871
+ if (input) {{
872
+ input.value = '{token}';
873
+ console.log('✅ Turnstile token injected');
874
+ }}
875
+ '''
876
+
877
+ elif captcha_type == "aws_waf":
878
+ print("🔄 Solving AWS WAF...")
879
+ solution = capsolver.solve({
880
+ "type": "AntiAwsWafTaskProxyLess",
881
+ "websiteURL": url,
882
+ })
883
+ cookie = solution["cookie"]
884
+
885
+ js_code = f'''
886
+ document.cookie = 'aws-waf-token={cookie};path=/';
887
+ console.log('✅ AWS WAF cookie set');
888
+ location.reload();
889
+ '''
890
+
891
+ else:
892
+ print(f"❌ Error: Unsupported CAPTCHA type: {captcha_type}")
893
+ return
894
+
895
+ # Execute JavaScript and continue crawling
896
+ run_config = CrawlerRunConfig(
897
+ cache_mode=CacheMode.BYPASS,
898
+ session_id="captcha_crawl_session",
899
+ js_code=js_code,
900
+ js_only=True,
901
+ )
902
+
903
+ result = await crawler.arun(url=url, config=run_config)
904
+ print("🎉 CAPTCHA solved and page crawled successfully!")
905
+
906
+ return result.markdown
907
+
908
+ if __name__ == "__main__":
909
+ result = asyncio.run(crawl_with_captcha())
910
+ if result:
911
+ print("📄 Crawled content:")
912
+ print(result[:500] + "..." if len(result) > 500 else result)
913
+ EOF
914
+
915
+ # Check if CapSolver API key is set
916
+ if [[ -z "$CAPSOLVER_API_KEY" ]]; then
917
+ print_error "CAPSOLVER_API_KEY environment variable not set"
918
+ print_info "Set it with: export CAPSOLVER_API_KEY='CAP-xxxxxxxxxxxxxxxxxxxxx'"
919
+ print_info "Get your API key from: https://dashboard.capsolver.com/dashboard/overview"
920
+ rm -f "$temp_script"
921
+ return 1
922
+ fi
923
+
924
+ print_info "Running CAPTCHA-enabled crawl..."
925
+ if python3 "$temp_script"; then
926
+ print_success "CAPTCHA crawl completed successfully"
927
+ if [[ -n "$output_file" ]]; then
928
+ python3 "$temp_script" > "$output_file" 2>&1
929
+ print_info "Results saved to: $output_file"
930
+ fi
931
+ else
932
+ print_error "CAPTCHA crawl failed"
933
+ rm -f "$temp_script"
934
+ return 1
935
+ fi
936
+
937
+ rm -f "$temp_script"
938
+ return 0
939
+ }
940
+
941
+ # Check service status
942
+ check_status() {
943
+ print_header "Checking Crawl4AI Service Status"
944
+
945
+ # Check Python package
946
+ if command -v crawl4ai-doctor &> /dev/null; then
947
+ print_info "Python package: Installed"
948
+ if crawl4ai-doctor &> /dev/null; then
949
+ print_success "Python package: Working"
950
+ else
951
+ print_warning "Python package: Issues detected"
952
+ fi
953
+ else
954
+ print_warning "Python package: Not installed"
955
+ fi
956
+
957
+ # Check Docker container
958
+ if check_docker; then
959
+ if docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
960
+ print_success "Docker container: Running"
961
+
962
+ # Check API health
963
+ local health_url="http://localhost:$DOCKER_PORT/health"
964
+ if curl -s "$health_url" &> /dev/null; then
965
+ print_success "API endpoint: Healthy"
966
+ print_info "Dashboard: http://localhost:$DOCKER_PORT/dashboard"
967
+ print_info "Playground: http://localhost:$DOCKER_PORT/playground"
968
+ else
969
+ print_warning "API endpoint: Not responding"
970
+ fi
971
+ else
972
+ print_warning "Docker container: Not running"
973
+ fi
974
+ else
975
+ print_warning "Docker: Not available"
976
+ fi
977
+
978
+ # Check MCP configuration
979
+ local mcp_config="$CONFIG_DIR/crawl4ai-mcp-config.json"
980
+ if [[ -f "$mcp_config" ]]; then
981
+ print_success "MCP configuration: Available"
982
+ else
983
+ print_warning "MCP configuration: Not setup"
984
+ fi
985
+
986
+ return 0
987
+ }
988
+
989
+ # Show help
990
+ show_help() {
991
+ echo "Crawl4AI Helper Script"
992
+ echo "Usage: $0 [command] [options]"
993
+ echo ""
994
+ echo "Commands:"
995
+ echo " install - Install Crawl4AI Python package"
996
+ echo " docker-setup - Setup Docker deployment with monitoring"
997
+ echo " docker-start - Start Docker container"
998
+ echo " docker-stop - Stop Docker container"
999
+ echo " mcp-setup - Setup MCP server integration"
1000
+ echo " capsolver-setup - Setup CapSolver CAPTCHA solving integration"
1001
+ echo " crawl [url] [format] [file] - Crawl URL and extract content"
1002
+ echo " extract [url] [schema] [file] - Extract structured data"
1003
+ echo " captcha-crawl [url] [type] [key] [file] - Crawl with CAPTCHA solving"
1004
+ echo " status - Check Crawl4AI service status"
1005
+ echo " help - $HELP_SHOW_MESSAGE"
1006
+ echo ""
1007
+ echo "Examples:"
1008
+ echo " $0 install"
1009
+ echo " $0 docker-setup"
1010
+ echo " $0 docker-start"
1011
+ echo " $0 crawl https://example.com markdown output.json"
1012
+ echo " $0 extract https://example.com '{\"title\":\"h1\"}' data.json"
1013
+ echo " $0 captcha-crawl https://example.com recaptcha_v2 6LfW6wATAAAAAHLqO2pb8bDBahxlMxNdo9g947u9"
1014
+ echo " $0 status"
1015
+ echo ""
1016
+ echo "Documentation:"
1017
+ echo " GitHub: https://github.com/unclecode/crawl4ai"
1018
+ echo " Docs: https://docs.crawl4ai.com/"
1019
+ echo " Framework docs: .agent/CRAWL4AI.md"
1020
+ return 0
1021
+ }
1022
+
1023
+ # Main function
1024
+ main() {
1025
+ # Assign positional parameters to local variables
1026
+ local command="${1:-help}"
1027
+ local param2="$2"
1028
+ local param3="$3"
1029
+ local param4="$4"
1030
+ local param5="$5"
1031
+
1032
+ # Main command handler
1033
+ case "$command" in
1034
+ "install")
1035
+ install_crawl4ai
1036
+ ;;
1037
+ "docker-setup")
1038
+ docker_setup
1039
+ ;;
1040
+ "docker-start")
1041
+ docker_start
1042
+ ;;
1043
+ "docker-stop")
1044
+ docker_stop
1045
+ ;;
1046
+ "mcp-setup")
1047
+ mcp_setup
1048
+ ;;
1049
+ "capsolver-setup")
1050
+ capsolver_setup
1051
+ ;;
1052
+ "crawl")
1053
+ crawl_url "$param2" "$param3" "$param4"
1054
+ ;;
1055
+ "extract")
1056
+ extract_structured "$param2" "$param3" "$param4"
1057
+ ;;
1058
+ "captcha-crawl")
1059
+ captcha_crawl "$param2" "$param3" "$param4" "$param5"
1060
+ ;;
1061
+ "status")
1062
+ check_status
1063
+ ;;
1064
+ "help"|"-h"|"--help"|"")
1065
+ show_help
1066
+ ;;
1067
+ *)
1068
+ print_error "$ERROR_UNKNOWN_COMMAND $command"
1069
+ show_help
1070
+ return 1
1071
+ ;;
1072
+ esac
1073
+ return 0
1074
+ }
1075
+
1076
+ main "$@"
1077
+
1078
+ exit 0