aidevops 2.52.1 → 2.53.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (334) hide show
  1. package/README.md +1 -1
  2. package/VERSION +1 -1
  3. package/aidevops.sh +15 -9
  4. package/package.json +4 -4
  5. package/scripts/npm-postinstall.js +6 -7
  6. package/setup.sh +1 -1
  7. package/templates/deploy-templates.sh +144 -0
  8. package/templates/home/.agent/README.md +33 -0
  9. package/templates/home/AGENTS.md +96 -0
  10. package/templates/home/git/.agent/README.md +48 -0
  11. package/templates/home/git/AGENTS.md +97 -0
  12. package/templates/standard-functions.sh +179 -0
  13. package/templates/wordpress-performance-workflow.md +217 -0
  14. package/.agent/AGENTS.md +0 -614
  15. package/.agent/accounts.md +0 -65
  16. package/.agent/aidevops/add-new-mcp-to-aidevops.md +0 -456
  17. package/.agent/aidevops/api-integrations.md +0 -335
  18. package/.agent/aidevops/architecture.md +0 -510
  19. package/.agent/aidevops/configs.md +0 -274
  20. package/.agent/aidevops/docs.md +0 -244
  21. package/.agent/aidevops/extension.md +0 -311
  22. package/.agent/aidevops/mcp-integrations.md +0 -340
  23. package/.agent/aidevops/mcp-troubleshooting.md +0 -162
  24. package/.agent/aidevops/memory-patterns.md +0 -172
  25. package/.agent/aidevops/providers.md +0 -217
  26. package/.agent/aidevops/recommendations.md +0 -321
  27. package/.agent/aidevops/requirements.md +0 -301
  28. package/.agent/aidevops/resources.md +0 -214
  29. package/.agent/aidevops/security-requirements.md +0 -174
  30. package/.agent/aidevops/security.md +0 -350
  31. package/.agent/aidevops/service-links.md +0 -400
  32. package/.agent/aidevops/services.md +0 -357
  33. package/.agent/aidevops/setup.md +0 -153
  34. package/.agent/aidevops/troubleshooting.md +0 -389
  35. package/.agent/aidevops.md +0 -124
  36. package/.agent/build-plus.md +0 -244
  37. package/.agent/content/guidelines.md +0 -109
  38. package/.agent/content.md +0 -87
  39. package/.agent/health.md +0 -59
  40. package/.agent/legal.md +0 -59
  41. package/.agent/loop-state/full-loop.local.md +0 -16
  42. package/.agent/loop-state/ralph-loop.local.md +0 -10
  43. package/.agent/marketing.md +0 -440
  44. package/.agent/memory/README.md +0 -260
  45. package/.agent/onboarding.md +0 -796
  46. package/.agent/plan-plus.md +0 -245
  47. package/.agent/research.md +0 -100
  48. package/.agent/sales.md +0 -333
  49. package/.agent/scripts/101domains-helper.sh +0 -701
  50. package/.agent/scripts/add-missing-returns.sh +0 -140
  51. package/.agent/scripts/agent-browser-helper.sh +0 -311
  52. package/.agent/scripts/agno-setup.sh +0 -712
  53. package/.agent/scripts/ahrefs-mcp-wrapper.js +0 -168
  54. package/.agent/scripts/aidevops-update-check.sh +0 -71
  55. package/.agent/scripts/ampcode-cli.sh +0 -522
  56. package/.agent/scripts/auto-version-bump.sh +0 -156
  57. package/.agent/scripts/autogen-helper.sh +0 -512
  58. package/.agent/scripts/beads-sync-helper.sh +0 -596
  59. package/.agent/scripts/closte-helper.sh +0 -5
  60. package/.agent/scripts/cloudron-helper.sh +0 -321
  61. package/.agent/scripts/codacy-cli-chunked.sh +0 -581
  62. package/.agent/scripts/codacy-cli.sh +0 -442
  63. package/.agent/scripts/code-audit-helper.sh +0 -5
  64. package/.agent/scripts/coderabbit-cli.sh +0 -417
  65. package/.agent/scripts/coderabbit-pro-analysis.sh +0 -238
  66. package/.agent/scripts/commands/code-simplifier.md +0 -86
  67. package/.agent/scripts/commands/full-loop.md +0 -246
  68. package/.agent/scripts/commands/postflight-loop.md +0 -103
  69. package/.agent/scripts/commands/recall.md +0 -182
  70. package/.agent/scripts/commands/remember.md +0 -132
  71. package/.agent/scripts/commands/save-todo.md +0 -175
  72. package/.agent/scripts/commands/session-review.md +0 -154
  73. package/.agent/scripts/comprehensive-quality-fix.sh +0 -106
  74. package/.agent/scripts/context-builder-helper.sh +0 -522
  75. package/.agent/scripts/coolify-cli-helper.sh +0 -674
  76. package/.agent/scripts/coolify-helper.sh +0 -380
  77. package/.agent/scripts/crawl4ai-examples.sh +0 -401
  78. package/.agent/scripts/crawl4ai-helper.sh +0 -1078
  79. package/.agent/scripts/crewai-helper.sh +0 -681
  80. package/.agent/scripts/dev-browser-helper.sh +0 -513
  81. package/.agent/scripts/dns-helper.sh +0 -396
  82. package/.agent/scripts/domain-research-helper.sh +0 -917
  83. package/.agent/scripts/dspy-helper.sh +0 -285
  84. package/.agent/scripts/dspyground-helper.sh +0 -291
  85. package/.agent/scripts/eeat-score-helper.sh +0 -1242
  86. package/.agent/scripts/efficient-return-fix.sh +0 -92
  87. package/.agent/scripts/extract-opencode-prompts.sh +0 -128
  88. package/.agent/scripts/find-missing-returns.sh +0 -113
  89. package/.agent/scripts/fix-auth-headers.sh +0 -104
  90. package/.agent/scripts/fix-common-strings.sh +0 -254
  91. package/.agent/scripts/fix-content-type.sh +0 -100
  92. package/.agent/scripts/fix-error-messages.sh +0 -130
  93. package/.agent/scripts/fix-misplaced-returns.sh +0 -74
  94. package/.agent/scripts/fix-remaining-literals.sh +0 -152
  95. package/.agent/scripts/fix-return-statements.sh +0 -41
  96. package/.agent/scripts/fix-s131-default-cases.sh +0 -249
  97. package/.agent/scripts/fix-sc2155-simple.sh +0 -102
  98. package/.agent/scripts/fix-shellcheck-critical.sh +0 -187
  99. package/.agent/scripts/fix-string-literals.sh +0 -273
  100. package/.agent/scripts/full-loop-helper.sh +0 -773
  101. package/.agent/scripts/generate-opencode-agents.sh +0 -497
  102. package/.agent/scripts/generate-opencode-commands.sh +0 -1629
  103. package/.agent/scripts/generate-skills.sh +0 -366
  104. package/.agent/scripts/git-platforms-helper.sh +0 -640
  105. package/.agent/scripts/gitea-cli-helper.sh +0 -743
  106. package/.agent/scripts/github-cli-helper.sh +0 -702
  107. package/.agent/scripts/gitlab-cli-helper.sh +0 -682
  108. package/.agent/scripts/gsc-add-user-helper.sh +0 -325
  109. package/.agent/scripts/gsc-sitemap-helper.sh +0 -678
  110. package/.agent/scripts/hetzner-helper.sh +0 -485
  111. package/.agent/scripts/hostinger-helper.sh +0 -229
  112. package/.agent/scripts/keyword-research-helper.sh +0 -1815
  113. package/.agent/scripts/langflow-helper.sh +0 -544
  114. package/.agent/scripts/linkedin-automation.py +0 -241
  115. package/.agent/scripts/linter-manager.sh +0 -599
  116. package/.agent/scripts/linters-local.sh +0 -434
  117. package/.agent/scripts/list-keys-helper.sh +0 -488
  118. package/.agent/scripts/local-browser-automation.py +0 -339
  119. package/.agent/scripts/localhost-helper.sh +0 -744
  120. package/.agent/scripts/loop-common.sh +0 -806
  121. package/.agent/scripts/mainwp-helper.sh +0 -728
  122. package/.agent/scripts/markdown-formatter.sh +0 -338
  123. package/.agent/scripts/markdown-lint-fix.sh +0 -311
  124. package/.agent/scripts/mass-fix-returns.sh +0 -58
  125. package/.agent/scripts/mcp-diagnose.sh +0 -167
  126. package/.agent/scripts/mcp-inspector-helper.sh +0 -449
  127. package/.agent/scripts/memory-helper.sh +0 -650
  128. package/.agent/scripts/monitor-code-review.sh +0 -255
  129. package/.agent/scripts/onboarding-helper.sh +0 -706
  130. package/.agent/scripts/opencode-github-setup-helper.sh +0 -797
  131. package/.agent/scripts/opencode-test-helper.sh +0 -213
  132. package/.agent/scripts/pagespeed-helper.sh +0 -464
  133. package/.agent/scripts/pandoc-helper.sh +0 -362
  134. package/.agent/scripts/postflight-check.sh +0 -555
  135. package/.agent/scripts/pre-commit-hook.sh +0 -259
  136. package/.agent/scripts/pre-edit-check.sh +0 -169
  137. package/.agent/scripts/qlty-cli.sh +0 -356
  138. package/.agent/scripts/quality-cli-manager.sh +0 -525
  139. package/.agent/scripts/quality-feedback-helper.sh +0 -462
  140. package/.agent/scripts/quality-fix.sh +0 -263
  141. package/.agent/scripts/quality-loop-helper.sh +0 -1108
  142. package/.agent/scripts/ralph-loop-helper.sh +0 -836
  143. package/.agent/scripts/ralph-upstream-check.sh +0 -341
  144. package/.agent/scripts/secretlint-helper.sh +0 -847
  145. package/.agent/scripts/servers-helper.sh +0 -241
  146. package/.agent/scripts/ses-helper.sh +0 -619
  147. package/.agent/scripts/session-review-helper.sh +0 -404
  148. package/.agent/scripts/setup-linters-wizard.sh +0 -379
  149. package/.agent/scripts/setup-local-api-keys.sh +0 -330
  150. package/.agent/scripts/setup-mcp-integrations.sh +0 -472
  151. package/.agent/scripts/shared-constants.sh +0 -246
  152. package/.agent/scripts/site-crawler-helper.sh +0 -1487
  153. package/.agent/scripts/snyk-helper.sh +0 -940
  154. package/.agent/scripts/sonarcloud-autofix.sh +0 -193
  155. package/.agent/scripts/sonarcloud-cli.sh +0 -191
  156. package/.agent/scripts/sonarscanner-cli.sh +0 -455
  157. package/.agent/scripts/spaceship-helper.sh +0 -747
  158. package/.agent/scripts/stagehand-helper.sh +0 -321
  159. package/.agent/scripts/stagehand-python-helper.sh +0 -321
  160. package/.agent/scripts/stagehand-python-setup.sh +0 -441
  161. package/.agent/scripts/stagehand-setup.sh +0 -439
  162. package/.agent/scripts/system-cleanup.sh +0 -340
  163. package/.agent/scripts/terminal-title-helper.sh +0 -388
  164. package/.agent/scripts/terminal-title-setup.sh +0 -549
  165. package/.agent/scripts/test-stagehand-both-integration.sh +0 -317
  166. package/.agent/scripts/test-stagehand-integration.sh +0 -309
  167. package/.agent/scripts/test-stagehand-python-integration.sh +0 -341
  168. package/.agent/scripts/todo-ready.sh +0 -263
  169. package/.agent/scripts/tool-version-check.sh +0 -362
  170. package/.agent/scripts/toon-helper.sh +0 -469
  171. package/.agent/scripts/twilio-helper.sh +0 -917
  172. package/.agent/scripts/updown-helper.sh +0 -279
  173. package/.agent/scripts/validate-mcp-integrations.sh +0 -250
  174. package/.agent/scripts/validate-version-consistency.sh +0 -131
  175. package/.agent/scripts/vaultwarden-helper.sh +0 -597
  176. package/.agent/scripts/vercel-cli-helper.sh +0 -816
  177. package/.agent/scripts/verify-mirrors.sh +0 -169
  178. package/.agent/scripts/version-manager.sh +0 -831
  179. package/.agent/scripts/webhosting-helper.sh +0 -471
  180. package/.agent/scripts/webhosting-verify.sh +0 -238
  181. package/.agent/scripts/wordpress-mcp-helper.sh +0 -508
  182. package/.agent/scripts/worktree-helper.sh +0 -595
  183. package/.agent/scripts/worktree-sessions.sh +0 -577
  184. package/.agent/seo/dataforseo.md +0 -215
  185. package/.agent/seo/domain-research.md +0 -532
  186. package/.agent/seo/eeat-score.md +0 -659
  187. package/.agent/seo/google-search-console.md +0 -366
  188. package/.agent/seo/gsc-sitemaps.md +0 -282
  189. package/.agent/seo/keyword-research.md +0 -521
  190. package/.agent/seo/serper.md +0 -278
  191. package/.agent/seo/site-crawler.md +0 -387
  192. package/.agent/seo.md +0 -236
  193. package/.agent/services/accounting/quickfile.md +0 -159
  194. package/.agent/services/communications/telfon.md +0 -470
  195. package/.agent/services/communications/twilio.md +0 -569
  196. package/.agent/services/crm/fluentcrm.md +0 -449
  197. package/.agent/services/email/ses.md +0 -399
  198. package/.agent/services/hosting/101domains.md +0 -378
  199. package/.agent/services/hosting/closte.md +0 -177
  200. package/.agent/services/hosting/cloudflare.md +0 -251
  201. package/.agent/services/hosting/cloudron.md +0 -478
  202. package/.agent/services/hosting/dns-providers.md +0 -335
  203. package/.agent/services/hosting/domain-purchasing.md +0 -344
  204. package/.agent/services/hosting/hetzner.md +0 -327
  205. package/.agent/services/hosting/hostinger.md +0 -287
  206. package/.agent/services/hosting/localhost.md +0 -419
  207. package/.agent/services/hosting/spaceship.md +0 -353
  208. package/.agent/services/hosting/webhosting.md +0 -330
  209. package/.agent/social-media.md +0 -69
  210. package/.agent/templates/plans-template.md +0 -114
  211. package/.agent/templates/prd-template.md +0 -129
  212. package/.agent/templates/tasks-template.md +0 -108
  213. package/.agent/templates/todo-template.md +0 -89
  214. package/.agent/tools/ai-assistants/agno.md +0 -471
  215. package/.agent/tools/ai-assistants/capsolver.md +0 -326
  216. package/.agent/tools/ai-assistants/configuration.md +0 -221
  217. package/.agent/tools/ai-assistants/overview.md +0 -209
  218. package/.agent/tools/ai-assistants/status.md +0 -171
  219. package/.agent/tools/ai-assistants/windsurf.md +0 -193
  220. package/.agent/tools/ai-orchestration/autogen.md +0 -406
  221. package/.agent/tools/ai-orchestration/crewai.md +0 -445
  222. package/.agent/tools/ai-orchestration/langflow.md +0 -405
  223. package/.agent/tools/ai-orchestration/openprose.md +0 -487
  224. package/.agent/tools/ai-orchestration/overview.md +0 -362
  225. package/.agent/tools/ai-orchestration/packaging.md +0 -647
  226. package/.agent/tools/browser/agent-browser.md +0 -464
  227. package/.agent/tools/browser/browser-automation.md +0 -400
  228. package/.agent/tools/browser/chrome-devtools.md +0 -282
  229. package/.agent/tools/browser/crawl4ai-integration.md +0 -422
  230. package/.agent/tools/browser/crawl4ai-resources.md +0 -277
  231. package/.agent/tools/browser/crawl4ai-usage.md +0 -416
  232. package/.agent/tools/browser/crawl4ai.md +0 -585
  233. package/.agent/tools/browser/dev-browser.md +0 -341
  234. package/.agent/tools/browser/pagespeed.md +0 -260
  235. package/.agent/tools/browser/playwright.md +0 -266
  236. package/.agent/tools/browser/playwriter.md +0 -310
  237. package/.agent/tools/browser/stagehand-examples.md +0 -456
  238. package/.agent/tools/browser/stagehand-python.md +0 -483
  239. package/.agent/tools/browser/stagehand.md +0 -421
  240. package/.agent/tools/build-agent/agent-review.md +0 -224
  241. package/.agent/tools/build-agent/build-agent.md +0 -784
  242. package/.agent/tools/build-mcp/aidevops-plugin.md +0 -476
  243. package/.agent/tools/build-mcp/api-wrapper.md +0 -445
  244. package/.agent/tools/build-mcp/build-mcp.md +0 -240
  245. package/.agent/tools/build-mcp/deployment.md +0 -401
  246. package/.agent/tools/build-mcp/server-patterns.md +0 -632
  247. package/.agent/tools/build-mcp/transports.md +0 -366
  248. package/.agent/tools/code-review/auditing.md +0 -383
  249. package/.agent/tools/code-review/automation.md +0 -219
  250. package/.agent/tools/code-review/best-practices.md +0 -203
  251. package/.agent/tools/code-review/codacy.md +0 -151
  252. package/.agent/tools/code-review/code-simplifier.md +0 -174
  253. package/.agent/tools/code-review/code-standards.md +0 -309
  254. package/.agent/tools/code-review/coderabbit.md +0 -101
  255. package/.agent/tools/code-review/management.md +0 -155
  256. package/.agent/tools/code-review/qlty.md +0 -248
  257. package/.agent/tools/code-review/secretlint.md +0 -565
  258. package/.agent/tools/code-review/setup.md +0 -250
  259. package/.agent/tools/code-review/snyk.md +0 -563
  260. package/.agent/tools/code-review/tools.md +0 -230
  261. package/.agent/tools/content/summarize.md +0 -353
  262. package/.agent/tools/context/augment-context-engine.md +0 -468
  263. package/.agent/tools/context/context-builder-agent.md +0 -76
  264. package/.agent/tools/context/context-builder.md +0 -375
  265. package/.agent/tools/context/context7.md +0 -371
  266. package/.agent/tools/context/dspy.md +0 -302
  267. package/.agent/tools/context/dspyground.md +0 -374
  268. package/.agent/tools/context/llm-tldr.md +0 -219
  269. package/.agent/tools/context/osgrep.md +0 -488
  270. package/.agent/tools/context/prompt-optimization.md +0 -338
  271. package/.agent/tools/context/toon.md +0 -292
  272. package/.agent/tools/conversion/pandoc.md +0 -304
  273. package/.agent/tools/credentials/api-key-management.md +0 -154
  274. package/.agent/tools/credentials/api-key-setup.md +0 -224
  275. package/.agent/tools/credentials/environment-variables.md +0 -180
  276. package/.agent/tools/credentials/vaultwarden.md +0 -382
  277. package/.agent/tools/data-extraction/outscraper.md +0 -974
  278. package/.agent/tools/deployment/coolify-cli.md +0 -388
  279. package/.agent/tools/deployment/coolify-setup.md +0 -353
  280. package/.agent/tools/deployment/coolify.md +0 -345
  281. package/.agent/tools/deployment/vercel.md +0 -390
  282. package/.agent/tools/git/authentication.md +0 -132
  283. package/.agent/tools/git/gitea-cli.md +0 -193
  284. package/.agent/tools/git/github-actions.md +0 -207
  285. package/.agent/tools/git/github-cli.md +0 -223
  286. package/.agent/tools/git/gitlab-cli.md +0 -190
  287. package/.agent/tools/git/opencode-github-security.md +0 -350
  288. package/.agent/tools/git/opencode-github.md +0 -328
  289. package/.agent/tools/git/opencode-gitlab.md +0 -252
  290. package/.agent/tools/git/security.md +0 -196
  291. package/.agent/tools/git.md +0 -207
  292. package/.agent/tools/opencode/oh-my-opencode.md +0 -375
  293. package/.agent/tools/opencode/opencode-anthropic-auth.md +0 -446
  294. package/.agent/tools/opencode/opencode.md +0 -651
  295. package/.agent/tools/social-media/bird.md +0 -437
  296. package/.agent/tools/task-management/beads.md +0 -336
  297. package/.agent/tools/terminal/terminal-title.md +0 -251
  298. package/.agent/tools/ui/shadcn.md +0 -196
  299. package/.agent/tools/ui/ui-skills.md +0 -115
  300. package/.agent/tools/wordpress/localwp.md +0 -311
  301. package/.agent/tools/wordpress/mainwp.md +0 -391
  302. package/.agent/tools/wordpress/scf.md +0 -527
  303. package/.agent/tools/wordpress/wp-admin.md +0 -729
  304. package/.agent/tools/wordpress/wp-dev.md +0 -940
  305. package/.agent/tools/wordpress/wp-preferred.md +0 -398
  306. package/.agent/tools/wordpress.md +0 -95
  307. package/.agent/workflows/branch/bugfix.md +0 -63
  308. package/.agent/workflows/branch/chore.md +0 -95
  309. package/.agent/workflows/branch/experiment.md +0 -115
  310. package/.agent/workflows/branch/feature.md +0 -59
  311. package/.agent/workflows/branch/hotfix.md +0 -98
  312. package/.agent/workflows/branch/refactor.md +0 -92
  313. package/.agent/workflows/branch/release.md +0 -96
  314. package/.agent/workflows/branch.md +0 -347
  315. package/.agent/workflows/bug-fixing.md +0 -267
  316. package/.agent/workflows/changelog.md +0 -129
  317. package/.agent/workflows/code-audit-remote.md +0 -279
  318. package/.agent/workflows/conversation-starter.md +0 -69
  319. package/.agent/workflows/error-feedback.md +0 -578
  320. package/.agent/workflows/feature-development.md +0 -355
  321. package/.agent/workflows/git-workflow.md +0 -702
  322. package/.agent/workflows/multi-repo-workspace.md +0 -268
  323. package/.agent/workflows/plans.md +0 -709
  324. package/.agent/workflows/postflight.md +0 -604
  325. package/.agent/workflows/pr.md +0 -571
  326. package/.agent/workflows/preflight.md +0 -278
  327. package/.agent/workflows/ralph-loop.md +0 -773
  328. package/.agent/workflows/release.md +0 -498
  329. package/.agent/workflows/session-manager.md +0 -254
  330. package/.agent/workflows/session-review.md +0 -311
  331. package/.agent/workflows/sql-migrations.md +0 -631
  332. package/.agent/workflows/version-bump.md +0 -283
  333. package/.agent/workflows/wiki-update.md +0 -333
  334. package/.agent/workflows/worktree.md +0 -477
@@ -1,1078 +0,0 @@
1
- #!/bin/bash
2
- # shellcheck disable=SC2034,SC2155,SC2317,SC2329,SC2016,SC2181,SC1091,SC2154,SC2015,SC2086,SC2129,SC2030,SC2031,SC2119,SC2120,SC2001,SC2162,SC2088,SC2089,SC2090,SC2029,SC2006,SC2153
3
-
4
- # Crawl4AI Helper Script
5
- # AI-powered web crawler and scraper for LLM-friendly data extraction
6
- #
7
- # This script provides comprehensive management for Crawl4AI including:
8
- # - Docker deployment with monitoring dashboard
9
- # - Python package installation and setup
10
- # - MCP server integration for AI assistants
11
- # - Web scraping and data extraction operations
12
- # - CapSolver integration for CAPTCHA solving and anti-bot bypass
13
- #
14
- # Usage: ./crawl4ai-helper.sh [command] [options]
15
- # Commands:
16
- # install - Install Crawl4AI Python package
17
- # docker-setup - Setup Docker deployment with monitoring
18
- # docker-start - Start Docker container
19
- # docker-stop - Stop Docker container
20
- # mcp-setup - Setup MCP server integration
21
- # capsolver-setup - Setup CapSolver integration for CAPTCHA solving
22
- # crawl - Perform web crawling operation
23
- # extract - Extract structured data from URL
24
- # captcha-crawl - Crawl with CAPTCHA solving capabilities
25
- # status - Check Crawl4AI service status
26
- # help - Show this help message
27
- #
28
- # Author: AI DevOps Framework
29
- # Version: 1.0.0
30
- # License: MIT
31
-
32
- # Colors for output
33
- readonly GREEN='\033[0;32m'
34
- readonly BLUE='\033[0;34m'
35
- readonly YELLOW='\033[1;33m'
36
- readonly RED='\033[0;31m'
37
- readonly PURPLE='\033[0;35m'
38
- readonly NC='\033[0m' # No Color
39
-
40
- # Common constants
41
- readonly ERROR_UNKNOWN_COMMAND="Unknown command:"
42
- # Common constants
43
- readonly CONTENT_TYPE_JSON=$CONTENT_TYPE_JSON
44
-
45
- # Constants
46
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" || exit
47
- readonly SCRIPT_DIR
48
- readonly CONFIG_DIR="$SCRIPT_DIR/../configs"
49
- readonly DOCKER_IMAGE="unclecode/crawl4ai:latest"
50
- readonly DOCKER_CONTAINER="crawl4ai"
51
- readonly DOCKER_PORT="11235"
52
- readonly MCP_PORT="3009"
53
- readonly HELP_SHOW_MESSAGE="Show this help message"
54
-
55
- # Print functions
56
- print_success() {
57
- local message="$1"
58
- echo -e "${GREEN}✅ $message${NC}"
59
- return 0
60
- }
61
-
62
- print_info() {
63
- local message="$1"
64
- echo -e "${BLUE}ℹ️ $message${NC}"
65
- return 0
66
- }
67
-
68
- print_warning() {
69
- local message="$1"
70
- echo -e "${YELLOW}⚠️ $message${NC}"
71
- return 0
72
- }
73
-
74
- print_error() {
75
- local message="$1"
76
- echo -e "${RED}❌ $message${NC}"
77
- return 0
78
- }
79
-
80
- print_header() {
81
- local message="$1"
82
- echo -e "${PURPLE}🚀 $message${NC}"
83
- return 0
84
- }
85
-
86
- # Check if Docker is available
87
- check_docker() {
88
- if ! command -v docker &> /dev/null; then
89
- print_error "Docker is not installed. Please install Docker first."
90
- return 1
91
- fi
92
-
93
- if ! docker info &> /dev/null; then
94
- print_error "Docker daemon is not running. Please start Docker."
95
- return 1
96
- fi
97
-
98
- return 0
99
- }
100
-
101
- # Check if Python is available
102
- check_python() {
103
- if ! command -v python3 &> /dev/null; then
104
- print_error "Python 3 is not installed. Please install Python 3.8+ first."
105
- return 1
106
- fi
107
-
108
- local python_version
109
- python_version=$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
110
-
111
- if [[ $(echo "$python_version < 3.8" | bc -l) -eq 1 ]]; then
112
- print_error "Python 3.8+ is required. Current version: $python_version"
113
- return 1
114
- fi
115
-
116
- return 0
117
- }
118
-
119
- # Install Crawl4AI Python package
120
- install_crawl4ai() {
121
- print_header "Installing Crawl4AI Python Package"
122
-
123
- if ! check_python; then
124
- return 1
125
- fi
126
-
127
- print_info "Installing Crawl4AI with pip..."
128
- if pip3 install -U crawl4ai; then
129
- print_success "Crawl4AI installed successfully"
130
- else
131
- print_error "Failed to install Crawl4AI"
132
- return 1
133
- fi
134
-
135
- print_info "Running post-installation setup..."
136
- if crawl4ai-setup; then
137
- print_success "Crawl4AI setup completed"
138
- else
139
- print_warning "Setup completed with warnings. Run 'crawl4ai-doctor' to check."
140
- fi
141
-
142
- print_info "Verifying installation..."
143
- if crawl4ai-doctor; then
144
- print_success "Crawl4AI installation verified"
145
- else
146
- print_warning "Installation verification completed with warnings"
147
- fi
148
-
149
- return 0
150
- }
151
-
152
- # Setup Docker deployment
153
- docker_setup() {
154
- print_header "Setting up Crawl4AI Docker Deployment"
155
-
156
- if ! check_docker; then
157
- return 1
158
- fi
159
-
160
- print_info "Pulling Crawl4AI Docker image..."
161
- if docker pull "$DOCKER_IMAGE"; then
162
- print_success "Docker image pulled successfully"
163
- else
164
- print_error "Failed to pull Docker image"
165
- return 1
166
- fi
167
-
168
- # Create environment file if it doesn't exist
169
- local env_file="$CONFIG_DIR/.crawl4ai.env"
170
- if [[ ! -f "$env_file" ]]; then
171
- print_info "Creating environment configuration..."
172
- cat > "$env_file" << 'EOF'
173
- # Crawl4AI Environment Configuration
174
- # Add your API keys here for LLM integration
175
-
176
- # OpenAI
177
- # OPENAI_API_KEY=sk-your-key
178
-
179
- # Anthropic
180
- # ANTHROPIC_API_KEY=your-anthropic-key
181
-
182
- # Other providers
183
- # DEEPSEEK_API_KEY=your-deepseek-key
184
- # GROQ_API_KEY=your-groq-key
185
- # TOGETHER_API_KEY=your-together-key
186
- # MISTRAL_API_KEY=your-mistral-key
187
- # GEMINI_API_TOKEN=your-gemini-token
188
-
189
- # Global LLM settings
190
- # LLM_PROVIDER=openai/gpt-4o-mini
191
- # LLM_TEMPERATURE=0.7
192
- EOF
193
- print_success "Environment file created at $env_file"
194
- print_warning "Please edit $env_file to add your API keys"
195
- fi
196
-
197
- return 0
198
- }
199
-
200
- # Start Docker container
201
- docker_start() {
202
- print_header "Starting Crawl4AI Docker Container"
203
-
204
- if ! check_docker; then
205
- return 1
206
- fi
207
-
208
- # Stop existing container if running
209
- if docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
210
- print_info "Stopping existing container..."
211
- docker stop "$DOCKER_CONTAINER" > /dev/null 2>&1
212
- docker rm "$DOCKER_CONTAINER" > /dev/null 2>&1
213
- fi
214
-
215
- local env_file="$CONFIG_DIR/.crawl4ai.env"
216
- local docker_args=(
217
- "-d"
218
- "-p" "$DOCKER_PORT:$DOCKER_PORT"
219
- "--name" "$DOCKER_CONTAINER"
220
- "--shm-size=1g"
221
- )
222
-
223
- if [[ -f "$env_file" ]]; then
224
- docker_args+=("--env-file" "$env_file")
225
- fi
226
-
227
- docker_args+=("$DOCKER_IMAGE")
228
-
229
- print_info "Starting Docker container..."
230
- if docker run "${docker_args[@]}"; then
231
- print_success "Crawl4AI container started successfully"
232
- print_info "Dashboard: http://localhost:$DOCKER_PORT/dashboard"
233
- print_info "Playground: http://localhost:$DOCKER_PORT/playground"
234
- print_info "API: http://localhost:$DOCKER_PORT"
235
- else
236
- print_error "Failed to start Docker container"
237
- return 1
238
- fi
239
-
240
- return 0
241
- }
242
-
243
- # Stop Docker container
244
- docker_stop() {
245
- print_header "Stopping Crawl4AI Docker Container"
246
-
247
- if ! check_docker; then
248
- return 1
249
- fi
250
-
251
- if docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
252
- print_info "Stopping container..."
253
- if docker stop "$DOCKER_CONTAINER" && docker rm "$DOCKER_CONTAINER"; then
254
- print_success "Container stopped and removed"
255
- else
256
- print_error "Failed to stop container"
257
- return 1
258
- fi
259
- else
260
- print_warning "Container is not running"
261
- fi
262
-
263
- return 0
264
- }
265
-
266
- # Setup MCP server integration
267
- mcp_setup() {
268
- print_header "Setting up Crawl4AI MCP Server Integration"
269
-
270
- local mcp_config="$CONFIG_DIR/crawl4ai-mcp-config.json"
271
-
272
- print_info "Creating MCP server configuration..."
273
- cat > "$mcp_config" << EOF
274
- {
275
- "provider": "crawl4ai",
276
- "description": "Crawl4AI MCP server for AI-powered web crawling and data extraction",
277
- "mcp_server": {
278
- "name": "crawl4ai",
279
- "command": "npx",
280
- "args": ["crawl4ai-mcp-server@latest"],
281
- "port": $MCP_PORT,
282
- "transport": "stdio",
283
- "description": "Crawl4AI MCP server for web scraping and LLM-friendly data extraction",
284
- "env": {
285
- "CRAWL4AI_API_URL": "http://localhost:$DOCKER_PORT",
286
- "CRAWL4AI_TIMEOUT": "60"
287
- }
288
- },
289
- "capabilities": [
290
- "web_crawling",
291
- "markdown_generation",
292
- "structured_extraction",
293
- "llm_extraction",
294
- "screenshot_capture",
295
- "pdf_generation",
296
- "javascript_execution"
297
- ]
298
- return 0
299
- }
300
- EOF
301
-
302
- print_success "MCP configuration created at $mcp_config"
303
- print_info "To use with Claude Desktop, add this to your MCP settings:"
304
- print_info " \"crawl4ai\": {"
305
- print_info " \"command\": \"npx\","
306
- print_info " \"args\": [\"crawl4ai-mcp-server@latest\"]"
307
- print_info " }"
308
-
309
- return 0
310
- }
311
-
312
- # Setup CapSolver integration for CAPTCHA solving
313
- capsolver_setup() {
314
- print_header "Setting up CapSolver Integration for CAPTCHA Solving"
315
-
316
- local capsolver_config="$CONFIG_DIR/capsolver-config.json"
317
-
318
- print_info "Creating CapSolver configuration..."
319
- cat > "$capsolver_config" << EOF
320
- {
321
- "provider": "capsolver",
322
- "description": "CapSolver configuration for automated CAPTCHA solving with Crawl4AI",
323
- "service_type": "captcha_solver",
324
- "version": "latest",
325
- "api": {
326
- "base_url": "https://api.capsolver.com",
327
- "endpoints": {
328
- "create_task": "/createTask",
329
- "get_task_result": "/getTaskResult",
330
- "get_balance": "/getBalance"
331
- },
332
- "authentication": {
333
- "type": "api_key",
334
- "header": "clientKey"
335
- }
336
- },
337
- "supported_captcha_types": {
338
- "recaptcha_v2": {
339
- "type": "ReCaptchaV2TaskProxyLess",
340
- "description": "reCAPTCHA v2 checkbox solving",
341
- "response_field": "gRecaptchaResponse",
342
- "injection_target": "g-recaptcha-response",
343
- "pricing": "$0.5/1000 requests",
344
- "avg_solve_time": "< 9 seconds"
345
- },
346
- "recaptcha_v3": {
347
- "type": "ReCaptchaV3TaskProxyLess",
348
- "description": "reCAPTCHA v3 invisible solving with score ≥0.7",
349
- "response_field": "gRecaptchaResponse",
350
- "injection_method": "fetch_hook",
351
- "pricing": "$0.5/1000 requests",
352
- "avg_solve_time": "< 3 seconds"
353
- },
354
- "recaptcha_v2_enterprise": {
355
- "type": "ReCaptchaV2EnterpriseTaskProxyLess",
356
- "description": "reCAPTCHA v2 Enterprise solving",
357
- "response_field": "gRecaptchaResponse",
358
- "pricing": "$_arg1/1000 requests",
359
- "avg_solve_time": "< 9 seconds"
360
- },
361
- "recaptcha_v3_enterprise": {
362
- "type": "ReCaptchaV3EnterpriseTaskProxyLess",
363
- "description": "reCAPTCHA v3 Enterprise solving with score ≥0.9",
364
- "response_field": "gRecaptchaResponse",
365
- "pricing": "$_arg3/1000 requests",
366
- "avg_solve_time": "< 3 seconds"
367
- },
368
- "cloudflare_turnstile": {
369
- "type": "AntiTurnstileTaskProxyLess",
370
- "description": "Cloudflare Turnstile CAPTCHA solving",
371
- "response_field": "token",
372
- "injection_target": "cf-turnstile-response",
373
- "pricing": "$_arg3/1000 requests",
374
- "avg_solve_time": "< 3 seconds"
375
- },
376
- "cloudflare_challenge": {
377
- "type": "AntiCloudflareTask",
378
- "description": "Cloudflare Challenge (5s shield) solving",
379
- "response_field": "cookies",
380
- "requires_proxy": true,
381
- "pricing": "Contact for pricing",
382
- "avg_solve_time": "< 10 seconds"
383
- },
384
- "aws_waf": {
385
- "type": "AntiAwsWafTaskProxyLess",
386
- "description": "AWS WAF CAPTCHA solving",
387
- "response_field": "cookie",
388
- "injection_method": "cookie_set",
389
- "pricing": "Contact for pricing",
390
- "avg_solve_time": "< 5 seconds"
391
- },
392
- "geetest_v3": {
393
- "type": "GeeTestTaskProxyLess",
394
- "description": "GeeTest v3 CAPTCHA solving",
395
- "response_field": "challenge",
396
- "pricing": "$0.5/1000 requests",
397
- "avg_solve_time": "< 5 seconds"
398
- },
399
- "geetest_v4": {
400
- "type": "GeeTestV4TaskProxyLess",
401
- "description": "GeeTest v4 CAPTCHA solving",
402
- "response_field": "captcha_output",
403
- "pricing": "$0.5/1000 requests",
404
- "avg_solve_time": "< 5 seconds"
405
- },
406
- "image_to_text": {
407
- "type": "ImageToTextTask",
408
- "description": "OCR image CAPTCHA solving",
409
- "response_field": "text",
410
- "pricing": "$0.4/1000 requests",
411
- "avg_solve_time": "< 1 second"
412
- }
413
- },
414
- "integration_methods": {
415
- "api_integration": {
416
- "description": "Direct API integration with Python capsolver SDK",
417
- "advantages": ["More flexible", "Precise control", "Better error handling"],
418
- "recommended": true
419
- },
420
- "browser_extension": {
421
- "description": "CapSolver browser extension integration",
422
- "advantages": ["Easy setup", "Automatic detection", "No coding required"],
423
- "extension_url": "https://chrome.google.com/webstore/detail/capsolver/pgojnojmmhpofjgdmaebadhbocahppod"
424
- }
425
- },
426
- "python_sdk": {
427
- "installation": "pip install capsolver",
428
- "import": "import capsolver",
429
- "usage": "capsolver.api_key = 'CAP-xxxxxxxxxxxxxxxxxxxxx'"
430
- },
431
- "pricing": {
432
- "pay_per_usage": "Standard pricing per request",
433
- "package_discounts": "Up to 60% savings with packages",
434
- "developer_plan": "Contact for better pricing",
435
- "balance_check": "GET /getBalance endpoint"
436
- }
437
- return 0
438
- }
439
- EOF
440
-
441
- print_success "CapSolver configuration created at $capsolver_config"
442
-
443
- # Create Python example script
444
- local example_script="$CONFIG_DIR/capsolver-example.py"
445
- cat > "$example_script" << 'EOF'
446
- #!/usr/bin/env python3
447
- """
448
- CapSolver + Crawl4AI Integration Example
449
- Demonstrates CAPTCHA solving with various types
450
- """
451
-
452
- import asyncio
453
- import capsolver
454
- from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
455
-
456
- # TODO: Set your CapSolver API key
457
- # Get your API key from: https://dashboard.capsolver.com/dashboard/overview
458
- CAPSOLVER_API_KEY = "CAP-xxxxxxxxxxxxxxxxxxxxx"
459
- capsolver.api_key = CAPSOLVER_API_KEY
460
-
461
- async def solve_recaptcha_v2_example():
462
- """Example: Solving reCAPTCHA v2 checkbox"""
463
- site_url = "https://recaptcha-demo.appspot.com/recaptcha-v2-checkbox.php"
464
- site_key = "6LfW6wATAAAAAHLqO2pb8bDBahxlMxNdo9g947u9"
465
-
466
- browser_config = BrowserConfig(
467
- verbose=True,
468
- headless=False,
469
- use_persistent_context=True,
470
- )
471
-
472
- async with AsyncWebCrawler(config=browser_config) as crawler:
473
- # Initial page load
474
- await crawler.arun(
475
- url=site_url,
476
- cache_mode=CacheMode.BYPASS,
477
- session_id="captcha_session"
478
- )
479
-
480
- # Solve CAPTCHA using CapSolver
481
- print("🔄 Solving reCAPTCHA v2...")
482
- solution = capsolver.solve({
483
- "type": "ReCaptchaV2TaskProxyLess",
484
- "websiteURL": site_url,
485
- "websiteKey": site_key,
486
- })
487
- token = solution["gRecaptchaResponse"]
488
- print(f"✅ Token obtained: {token[:50]}...")
489
-
490
- # Inject token and submit
491
- js_code = f"""
492
- const textarea = document.getElementById('g-recaptcha-response');
493
- if (textarea) {{
494
- textarea.value = '{token}';
495
- document.querySelector('button.form-field[type="submit"]').click();
496
- }}
497
- """
498
-
499
- wait_condition = """() => {
500
- const items = document.querySelectorAll('h2');
501
- return items.length > 1;
502
- }"""
503
-
504
- run_config = CrawlerRunConfig(
505
- cache_mode=CacheMode.BYPASS,
506
- session_id="captcha_session",
507
- js_code=js_code,
508
- js_only=True,
509
- wait_for=f"js:{wait_condition}"
510
- )
511
-
512
- result = await crawler.arun(url=site_url, config=run_config)
513
- print("🎉 CAPTCHA solved successfully!")
514
- return result.markdown
515
-
516
- async def solve_cloudflare_turnstile_example():
517
- """Example: Solving Cloudflare Turnstile"""
518
- site_url = "https://clifford.io/demo/cloudflare-turnstile"
519
- site_key = "0x4AAAAAAAGlwMzq_9z6S9Mh"
520
-
521
- browser_config = BrowserConfig(
522
- verbose=True,
523
- headless=False,
524
- use_persistent_context=True,
525
- )
526
-
527
- async with AsyncWebCrawler(config=browser_config) as crawler:
528
- # Initial page load
529
- await crawler.arun(
530
- url=site_url,
531
- cache_mode=CacheMode.BYPASS,
532
- session_id="turnstile_session"
533
- )
534
-
535
- # Solve Turnstile using CapSolver
536
- print("🔄 Solving Cloudflare Turnstile...")
537
- solution = capsolver.solve({
538
- "type": "AntiTurnstileTaskProxyLess",
539
- "websiteURL": site_url,
540
- "websiteKey": site_key,
541
- })
542
- token = solution["token"]
543
- print(f"✅ Token obtained: {token[:50]}...")
544
-
545
- # Inject token and submit
546
- js_code = f"""
547
- document.querySelector('input[name="cf-turnstile-response"]').value = '{token}';
548
- document.querySelector('button[type="submit"]').click();
549
- """
550
-
551
- wait_condition = """() => {
552
- const items = document.querySelectorAll('h1');
553
- return items.length === 0;
554
- }"""
555
-
556
- run_config = CrawlerRunConfig(
557
- cache_mode=CacheMode.BYPASS,
558
- session_id="turnstile_session",
559
- js_code=js_code,
560
- js_only=True,
561
- wait_for=f"js:{wait_condition}"
562
- )
563
-
564
- result = await crawler.arun(url=site_url, config=run_config)
565
- print("🎉 Turnstile solved successfully!")
566
- return result.markdown
567
-
568
- async def main():
569
- """Main function to run examples"""
570
- print("🚀 CapSolver + Crawl4AI Integration Examples")
571
- print("=" * 50)
572
-
573
- try:
574
- # Example 1: reCAPTCHA v2
575
- print("\n📋 Example 1: reCAPTCHA v2")
576
- result1 = await solve_recaptcha_v2_example()
577
-
578
- # Example 2: Cloudflare Turnstile
579
- print("\n📋 Example 2: Cloudflare Turnstile")
580
- result2 = await solve_cloudflare_turnstile_example()
581
-
582
- print("\n✅ All examples completed successfully!")
583
-
584
- except Exception as e:
585
- print(f"❌ Error: {e}")
586
- print("💡 Make sure to set your CapSolver API key!")
587
-
588
- if __name__ == "__main__":
589
- asyncio.run(main())
590
- EOF
591
-
592
- chmod +x "$example_script"
593
- print_success "Python example script created at $example_script"
594
-
595
- print_info "CapSolver Integration Setup Complete!"
596
- print_info ""
597
- print_info "📋 Next Steps:"
598
- print_info "1. Get API key: https://dashboard.capsolver.com/dashboard/overview"
599
- print_info "2. Install Python SDK: pip install capsolver"
600
- print_info "3. Set API key in example script: $example_script"
601
- print_info "4. Run example: python3 $example_script"
602
- print_info ""
603
- print_info "📚 Supported CAPTCHA Types:"
604
- print_info "• reCAPTCHA v2/v3 (including Enterprise)"
605
- print_info "• Cloudflare Turnstile & Challenge"
606
- print_info "• AWS WAF"
607
- print_info "• GeeTest v3/v4"
608
- print_info "• Image-to-Text OCR"
609
- print_info ""
610
- print_info "💰 Pricing: Starting from $0.4/1000 requests"
611
- print_info "🔗 Documentation: https://docs.capsolver.com/"
612
-
613
- return 0
614
- }
615
-
616
- # Perform web crawling operation
617
- crawl_url() {
618
- local url="$1"
619
- local output_file="$3"
620
-
621
- if [[ -z "$url" ]]; then
622
- print_error "URL is required"
623
- return 1
624
- fi
625
-
626
- print_header "Crawling URL: $url"
627
-
628
- # Check if Docker container is running
629
- if ! docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
630
- print_warning "Docker container is not running. Starting it..."
631
- if ! docker_start; then
632
- return 1
633
- fi
634
- sleep 5 # Wait for container to be ready
635
- fi
636
-
637
- local api_url="http://localhost:$DOCKER_PORT/crawl"
638
- local payload
639
- payload=$(cat << EOF
640
- {
641
- "urls": ["$url"],
642
- "crawler_config": {
643
- "type": "CrawlerRunConfig",
644
- "params": {
645
- "cache_mode": "bypass"
646
- }
647
- }
648
- return 0
649
- }
650
- EOF
651
- )
652
-
653
- print_info "Sending crawl request..."
654
- local response
655
- if response=$(curl -s -X POST "$api_url" \
656
- -H $CONTENT_TYPE_JSON \
657
- -d "$payload"); then
658
-
659
- if [[ -n "$output_file" ]]; then
660
- echo "$response" > "$output_file"
661
- print_success "Results saved to $output_file"
662
- else
663
- echo "$response" | jq '.'
664
- fi
665
-
666
- print_success "Crawl completed successfully"
667
- else
668
- print_error "Failed to crawl URL"
669
- return 1
670
- fi
671
-
672
- return 0
673
- }
674
-
675
- # Extract structured data
676
- extract_structured() {
677
- local url="$1"
678
- local schema="$2"
679
- local output_file="$3"
680
-
681
- if [[ -z "$url" || -z "$schema" ]]; then
682
- print_error "URL and schema are required"
683
- return 1
684
- fi
685
-
686
- print_header "Extracting structured data from: $url"
687
-
688
- # Check if Docker container is running
689
- if ! docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
690
- print_warning "Docker container is not running. Starting it..."
691
- if ! docker_start; then
692
- return 1
693
- fi
694
- sleep 5
695
- fi
696
-
697
- local api_url="http://localhost:$DOCKER_PORT/crawl"
698
- local payload
699
- payload=$(cat << EOF
700
- {
701
- "urls": ["$url"],
702
- "crawler_config": {
703
- "type": "CrawlerRunConfig",
704
- "params": {
705
- "extraction_strategy": {
706
- "type": "JsonCssExtractionStrategy",
707
- "params": {
708
- "schema": {
709
- "type": "dict",
710
- "value": $schema
711
- }
712
- }
713
- },
714
- "cache_mode": "bypass"
715
- }
716
- }
717
- return 0
718
- }
719
- EOF
720
- )
721
-
722
- print_info "Sending extraction request..."
723
- local response
724
- if response=$(curl -s -X POST "$api_url" \
725
- -H $CONTENT_TYPE_JSON \
726
- -d "$payload"); then
727
-
728
- if [[ -n "$output_file" ]]; then
729
- echo "$response" > "$output_file"
730
- print_success "Results saved to $output_file"
731
- else
732
- echo "$response" | jq '.results[0].extracted_content'
733
- fi
734
-
735
- print_success "Extraction completed successfully"
736
- else
737
- print_error "Failed to extract data"
738
- return 1
739
- fi
740
-
741
- return 0
742
- }
743
-
744
- # Crawl with CAPTCHA solving capabilities
745
- captcha_crawl() {
746
- local url="$1"
747
- local captcha_type="$2"
748
- local site_key="$3"
749
- local output_file="$4"
750
-
751
- if [[ -z "$url" || -z "$captcha_type" ]]; then
752
- print_error "URL and CAPTCHA type are required"
753
- print_info "Usage: captcha-crawl <url> <captcha_type> [site_key] [output_file]"
754
- print_info "CAPTCHA types: recaptcha_v2, recaptcha_v3, turnstile, aws_waf"
755
- return 1
756
- fi
757
-
758
- print_header "Crawling with CAPTCHA Solving: $url"
759
- print_info "CAPTCHA Type: $captcha_type"
760
-
761
- # Check if Docker container is running
762
- if ! docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
763
- print_warning "Docker container is not running. Starting it..."
764
- if ! docker_start; then
765
- return 1
766
- fi
767
- sleep 5
768
- fi
769
-
770
- # Create Python script for CAPTCHA crawling
771
- local temp_script="/tmp/captcha_crawl_$$.py"
772
- cat > "$temp_script" << EOF
773
- #!/usr/bin/env python3
774
- import asyncio
775
- import capsolver
776
- import os
777
- from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
778
-
779
- # Get CapSolver API key from environment
780
- api_key = os.getenv('CAPSOLVER_API_KEY')
781
- if not api_key:
782
- print("❌ Error: CAPSOLVER_API_KEY environment variable not set")
783
- print("💡 Set it with: export CAPSOLVER_API_KEY='CAP-xxxxxxxxxxxxxxxxxxxxx'")
784
- exit(1)
785
-
786
- capsolver.api_key = api_key
787
-
788
- async def crawl_with_captcha():
789
- url = "$url"
790
- captcha_type = "$captcha_type"
791
- site_key = "$site_key"
792
-
793
- browser_config = BrowserConfig(
794
- verbose=True,
795
- headless=False,
796
- use_persistent_context=True,
797
- )
798
-
799
- async with AsyncWebCrawler(config=browser_config) as crawler:
800
- # Initial page load
801
- print(f"🔄 Loading page: {url}")
802
- await crawler.arun(
803
- url=url,
804
- cache_mode=CacheMode.BYPASS,
805
- session_id="captcha_crawl_session"
806
- )
807
-
808
- # Solve CAPTCHA based on type
809
- if captcha_type == "recaptcha_v2":
810
- if not site_key:
811
- print("❌ Error: site_key required for reCAPTCHA v2")
812
- return
813
-
814
- print("🔄 Solving reCAPTCHA v2...")
815
- solution = capsolver.solve({
816
- "type": "ReCaptchaV2TaskProxyLess",
817
- "websiteURL": url,
818
- "websiteKey": site_key,
819
- })
820
- token = solution["gRecaptchaResponse"]
821
-
822
- js_code = f'''
823
- const textarea = document.getElementById('g-recaptcha-response');
824
- if (textarea) {{
825
- textarea.value = '{token}';
826
- console.log('✅ reCAPTCHA v2 token injected');
827
- }}
828
- '''
829
-
830
- elif captcha_type == "recaptcha_v3":
831
- if not site_key:
832
- print("❌ Error: site_key required for reCAPTCHA v3")
833
- return
834
-
835
- print("🔄 Solving reCAPTCHA v3...")
836
- solution = capsolver.solve({
837
- "type": "ReCaptchaV3TaskProxyLess",
838
- "websiteURL": url,
839
- "websiteKey": site_key,
840
- "pageAction": "submit",
841
- })
842
- token = solution["gRecaptchaResponse"]
843
-
844
- js_code = f'''
845
- const originalFetch = window.fetch;
846
- window.fetch = function(...args) {{
847
- if (typeof args[0] === 'string' && args[0].includes('recaptcha')) {{
848
- console.log('🔄 Hooking reCAPTCHA v3 request');
849
- // Replace token in request
850
- }}
851
- return originalFetch.apply(this, args);
852
- }};
853
- console.log('✅ reCAPTCHA v3 hook installed');
854
- '''
855
-
856
- elif captcha_type == "turnstile":
857
- if not site_key:
858
- print("❌ Error: site_key required for Cloudflare Turnstile")
859
- return
860
-
861
- print("🔄 Solving Cloudflare Turnstile...")
862
- solution = capsolver.solve({
863
- "type": "AntiTurnstileTaskProxyLess",
864
- "websiteURL": url,
865
- "websiteKey": site_key,
866
- })
867
- token = solution["token"]
868
-
869
- js_code = f'''
870
- const input = document.querySelector('input[name="cf-turnstile-response"]');
871
- if (input) {{
872
- input.value = '{token}';
873
- console.log('✅ Turnstile token injected');
874
- }}
875
- '''
876
-
877
- elif captcha_type == "aws_waf":
878
- print("🔄 Solving AWS WAF...")
879
- solution = capsolver.solve({
880
- "type": "AntiAwsWafTaskProxyLess",
881
- "websiteURL": url,
882
- })
883
- cookie = solution["cookie"]
884
-
885
- js_code = f'''
886
- document.cookie = 'aws-waf-token={cookie};path=/';
887
- console.log('✅ AWS WAF cookie set');
888
- location.reload();
889
- '''
890
-
891
- else:
892
- print(f"❌ Error: Unsupported CAPTCHA type: {captcha_type}")
893
- return
894
-
895
- # Execute JavaScript and continue crawling
896
- run_config = CrawlerRunConfig(
897
- cache_mode=CacheMode.BYPASS,
898
- session_id="captcha_crawl_session",
899
- js_code=js_code,
900
- js_only=True,
901
- )
902
-
903
- result = await crawler.arun(url=url, config=run_config)
904
- print("🎉 CAPTCHA solved and page crawled successfully!")
905
-
906
- return result.markdown
907
-
908
- if __name__ == "__main__":
909
- result = asyncio.run(crawl_with_captcha())
910
- if result:
911
- print("📄 Crawled content:")
912
- print(result[:500] + "..." if len(result) > 500 else result)
913
- EOF
914
-
915
- # Check if CapSolver API key is set
916
- if [[ -z "$CAPSOLVER_API_KEY" ]]; then
917
- print_error "CAPSOLVER_API_KEY environment variable not set"
918
- print_info "Set it with: export CAPSOLVER_API_KEY='CAP-xxxxxxxxxxxxxxxxxxxxx'"
919
- print_info "Get your API key from: https://dashboard.capsolver.com/dashboard/overview"
920
- rm -f "$temp_script"
921
- return 1
922
- fi
923
-
924
- print_info "Running CAPTCHA-enabled crawl..."
925
- if python3 "$temp_script"; then
926
- print_success "CAPTCHA crawl completed successfully"
927
- if [[ -n "$output_file" ]]; then
928
- python3 "$temp_script" > "$output_file" 2>&1
929
- print_info "Results saved to: $output_file"
930
- fi
931
- else
932
- print_error "CAPTCHA crawl failed"
933
- rm -f "$temp_script"
934
- return 1
935
- fi
936
-
937
- rm -f "$temp_script"
938
- return 0
939
- }
940
-
941
- # Check service status
942
- check_status() {
943
- print_header "Checking Crawl4AI Service Status"
944
-
945
- # Check Python package
946
- if command -v crawl4ai-doctor &> /dev/null; then
947
- print_info "Python package: Installed"
948
- if crawl4ai-doctor &> /dev/null; then
949
- print_success "Python package: Working"
950
- else
951
- print_warning "Python package: Issues detected"
952
- fi
953
- else
954
- print_warning "Python package: Not installed"
955
- fi
956
-
957
- # Check Docker container
958
- if check_docker; then
959
- if docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
960
- print_success "Docker container: Running"
961
-
962
- # Check API health
963
- local health_url="http://localhost:$DOCKER_PORT/health"
964
- if curl -s "$health_url" &> /dev/null; then
965
- print_success "API endpoint: Healthy"
966
- print_info "Dashboard: http://localhost:$DOCKER_PORT/dashboard"
967
- print_info "Playground: http://localhost:$DOCKER_PORT/playground"
968
- else
969
- print_warning "API endpoint: Not responding"
970
- fi
971
- else
972
- print_warning "Docker container: Not running"
973
- fi
974
- else
975
- print_warning "Docker: Not available"
976
- fi
977
-
978
- # Check MCP configuration
979
- local mcp_config="$CONFIG_DIR/crawl4ai-mcp-config.json"
980
- if [[ -f "$mcp_config" ]]; then
981
- print_success "MCP configuration: Available"
982
- else
983
- print_warning "MCP configuration: Not setup"
984
- fi
985
-
986
- return 0
987
- }
988
-
989
- # Show help
990
- show_help() {
991
- echo "Crawl4AI Helper Script"
992
- echo "Usage: $0 [command] [options]"
993
- echo ""
994
- echo "Commands:"
995
- echo " install - Install Crawl4AI Python package"
996
- echo " docker-setup - Setup Docker deployment with monitoring"
997
- echo " docker-start - Start Docker container"
998
- echo " docker-stop - Stop Docker container"
999
- echo " mcp-setup - Setup MCP server integration"
1000
- echo " capsolver-setup - Setup CapSolver CAPTCHA solving integration"
1001
- echo " crawl [url] [format] [file] - Crawl URL and extract content"
1002
- echo " extract [url] [schema] [file] - Extract structured data"
1003
- echo " captcha-crawl [url] [type] [key] [file] - Crawl with CAPTCHA solving"
1004
- echo " status - Check Crawl4AI service status"
1005
- echo " help - $HELP_SHOW_MESSAGE"
1006
- echo ""
1007
- echo "Examples:"
1008
- echo " $0 install"
1009
- echo " $0 docker-setup"
1010
- echo " $0 docker-start"
1011
- echo " $0 crawl https://example.com markdown output.json"
1012
- echo " $0 extract https://example.com '{\"title\":\"h1\"}' data.json"
1013
- echo " $0 captcha-crawl https://example.com recaptcha_v2 6LfW6wATAAAAAHLqO2pb8bDBahxlMxNdo9g947u9"
1014
- echo " $0 status"
1015
- echo ""
1016
- echo "Documentation:"
1017
- echo " GitHub: https://github.com/unclecode/crawl4ai"
1018
- echo " Docs: https://docs.crawl4ai.com/"
1019
- echo " Framework docs: .agent/CRAWL4AI.md"
1020
- return 0
1021
- }
1022
-
1023
- # Main function
1024
- main() {
1025
- # Assign positional parameters to local variables
1026
- local command="${1:-help}"
1027
- local param2="$2"
1028
- local param3="$3"
1029
- local param4="$4"
1030
- local param5="$5"
1031
-
1032
- # Main command handler
1033
- case "$command" in
1034
- "install")
1035
- install_crawl4ai
1036
- ;;
1037
- "docker-setup")
1038
- docker_setup
1039
- ;;
1040
- "docker-start")
1041
- docker_start
1042
- ;;
1043
- "docker-stop")
1044
- docker_stop
1045
- ;;
1046
- "mcp-setup")
1047
- mcp_setup
1048
- ;;
1049
- "capsolver-setup")
1050
- capsolver_setup
1051
- ;;
1052
- "crawl")
1053
- crawl_url "$param2" "$param3" "$param4"
1054
- ;;
1055
- "extract")
1056
- extract_structured "$param2" "$param3" "$param4"
1057
- ;;
1058
- "captcha-crawl")
1059
- captcha_crawl "$param2" "$param3" "$param4" "$param5"
1060
- ;;
1061
- "status")
1062
- check_status
1063
- ;;
1064
- "help"|"-h"|"--help"|"")
1065
- show_help
1066
- ;;
1067
- *)
1068
- print_error "$ERROR_UNKNOWN_COMMAND $command"
1069
- show_help
1070
- return 1
1071
- ;;
1072
- esac
1073
- return 0
1074
- }
1075
-
1076
- main "$@"
1077
-
1078
- exit 0