aidevops 2.52.1 → 2.53.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/VERSION +1 -1
- package/aidevops.sh +15 -9
- package/package.json +4 -4
- package/scripts/npm-postinstall.js +6 -7
- package/setup.sh +1 -1
- package/templates/deploy-templates.sh +144 -0
- package/templates/home/.agent/README.md +33 -0
- package/templates/home/AGENTS.md +96 -0
- package/templates/home/git/.agent/README.md +48 -0
- package/templates/home/git/AGENTS.md +97 -0
- package/templates/standard-functions.sh +179 -0
- package/templates/wordpress-performance-workflow.md +217 -0
- package/.agent/AGENTS.md +0 -614
- package/.agent/accounts.md +0 -65
- package/.agent/aidevops/add-new-mcp-to-aidevops.md +0 -456
- package/.agent/aidevops/api-integrations.md +0 -335
- package/.agent/aidevops/architecture.md +0 -510
- package/.agent/aidevops/configs.md +0 -274
- package/.agent/aidevops/docs.md +0 -244
- package/.agent/aidevops/extension.md +0 -311
- package/.agent/aidevops/mcp-integrations.md +0 -340
- package/.agent/aidevops/mcp-troubleshooting.md +0 -162
- package/.agent/aidevops/memory-patterns.md +0 -172
- package/.agent/aidevops/providers.md +0 -217
- package/.agent/aidevops/recommendations.md +0 -321
- package/.agent/aidevops/requirements.md +0 -301
- package/.agent/aidevops/resources.md +0 -214
- package/.agent/aidevops/security-requirements.md +0 -174
- package/.agent/aidevops/security.md +0 -350
- package/.agent/aidevops/service-links.md +0 -400
- package/.agent/aidevops/services.md +0 -357
- package/.agent/aidevops/setup.md +0 -153
- package/.agent/aidevops/troubleshooting.md +0 -389
- package/.agent/aidevops.md +0 -124
- package/.agent/build-plus.md +0 -244
- package/.agent/content/guidelines.md +0 -109
- package/.agent/content.md +0 -87
- package/.agent/health.md +0 -59
- package/.agent/legal.md +0 -59
- package/.agent/loop-state/full-loop.local.md +0 -16
- package/.agent/loop-state/ralph-loop.local.md +0 -10
- package/.agent/marketing.md +0 -440
- package/.agent/memory/README.md +0 -260
- package/.agent/onboarding.md +0 -796
- package/.agent/plan-plus.md +0 -245
- package/.agent/research.md +0 -100
- package/.agent/sales.md +0 -333
- package/.agent/scripts/101domains-helper.sh +0 -701
- package/.agent/scripts/add-missing-returns.sh +0 -140
- package/.agent/scripts/agent-browser-helper.sh +0 -311
- package/.agent/scripts/agno-setup.sh +0 -712
- package/.agent/scripts/ahrefs-mcp-wrapper.js +0 -168
- package/.agent/scripts/aidevops-update-check.sh +0 -71
- package/.agent/scripts/ampcode-cli.sh +0 -522
- package/.agent/scripts/auto-version-bump.sh +0 -156
- package/.agent/scripts/autogen-helper.sh +0 -512
- package/.agent/scripts/beads-sync-helper.sh +0 -596
- package/.agent/scripts/closte-helper.sh +0 -5
- package/.agent/scripts/cloudron-helper.sh +0 -321
- package/.agent/scripts/codacy-cli-chunked.sh +0 -581
- package/.agent/scripts/codacy-cli.sh +0 -442
- package/.agent/scripts/code-audit-helper.sh +0 -5
- package/.agent/scripts/coderabbit-cli.sh +0 -417
- package/.agent/scripts/coderabbit-pro-analysis.sh +0 -238
- package/.agent/scripts/commands/code-simplifier.md +0 -86
- package/.agent/scripts/commands/full-loop.md +0 -246
- package/.agent/scripts/commands/postflight-loop.md +0 -103
- package/.agent/scripts/commands/recall.md +0 -182
- package/.agent/scripts/commands/remember.md +0 -132
- package/.agent/scripts/commands/save-todo.md +0 -175
- package/.agent/scripts/commands/session-review.md +0 -154
- package/.agent/scripts/comprehensive-quality-fix.sh +0 -106
- package/.agent/scripts/context-builder-helper.sh +0 -522
- package/.agent/scripts/coolify-cli-helper.sh +0 -674
- package/.agent/scripts/coolify-helper.sh +0 -380
- package/.agent/scripts/crawl4ai-examples.sh +0 -401
- package/.agent/scripts/crawl4ai-helper.sh +0 -1078
- package/.agent/scripts/crewai-helper.sh +0 -681
- package/.agent/scripts/dev-browser-helper.sh +0 -513
- package/.agent/scripts/dns-helper.sh +0 -396
- package/.agent/scripts/domain-research-helper.sh +0 -917
- package/.agent/scripts/dspy-helper.sh +0 -285
- package/.agent/scripts/dspyground-helper.sh +0 -291
- package/.agent/scripts/eeat-score-helper.sh +0 -1242
- package/.agent/scripts/efficient-return-fix.sh +0 -92
- package/.agent/scripts/extract-opencode-prompts.sh +0 -128
- package/.agent/scripts/find-missing-returns.sh +0 -113
- package/.agent/scripts/fix-auth-headers.sh +0 -104
- package/.agent/scripts/fix-common-strings.sh +0 -254
- package/.agent/scripts/fix-content-type.sh +0 -100
- package/.agent/scripts/fix-error-messages.sh +0 -130
- package/.agent/scripts/fix-misplaced-returns.sh +0 -74
- package/.agent/scripts/fix-remaining-literals.sh +0 -152
- package/.agent/scripts/fix-return-statements.sh +0 -41
- package/.agent/scripts/fix-s131-default-cases.sh +0 -249
- package/.agent/scripts/fix-sc2155-simple.sh +0 -102
- package/.agent/scripts/fix-shellcheck-critical.sh +0 -187
- package/.agent/scripts/fix-string-literals.sh +0 -273
- package/.agent/scripts/full-loop-helper.sh +0 -773
- package/.agent/scripts/generate-opencode-agents.sh +0 -497
- package/.agent/scripts/generate-opencode-commands.sh +0 -1629
- package/.agent/scripts/generate-skills.sh +0 -366
- package/.agent/scripts/git-platforms-helper.sh +0 -640
- package/.agent/scripts/gitea-cli-helper.sh +0 -743
- package/.agent/scripts/github-cli-helper.sh +0 -702
- package/.agent/scripts/gitlab-cli-helper.sh +0 -682
- package/.agent/scripts/gsc-add-user-helper.sh +0 -325
- package/.agent/scripts/gsc-sitemap-helper.sh +0 -678
- package/.agent/scripts/hetzner-helper.sh +0 -485
- package/.agent/scripts/hostinger-helper.sh +0 -229
- package/.agent/scripts/keyword-research-helper.sh +0 -1815
- package/.agent/scripts/langflow-helper.sh +0 -544
- package/.agent/scripts/linkedin-automation.py +0 -241
- package/.agent/scripts/linter-manager.sh +0 -599
- package/.agent/scripts/linters-local.sh +0 -434
- package/.agent/scripts/list-keys-helper.sh +0 -488
- package/.agent/scripts/local-browser-automation.py +0 -339
- package/.agent/scripts/localhost-helper.sh +0 -744
- package/.agent/scripts/loop-common.sh +0 -806
- package/.agent/scripts/mainwp-helper.sh +0 -728
- package/.agent/scripts/markdown-formatter.sh +0 -338
- package/.agent/scripts/markdown-lint-fix.sh +0 -311
- package/.agent/scripts/mass-fix-returns.sh +0 -58
- package/.agent/scripts/mcp-diagnose.sh +0 -167
- package/.agent/scripts/mcp-inspector-helper.sh +0 -449
- package/.agent/scripts/memory-helper.sh +0 -650
- package/.agent/scripts/monitor-code-review.sh +0 -255
- package/.agent/scripts/onboarding-helper.sh +0 -706
- package/.agent/scripts/opencode-github-setup-helper.sh +0 -797
- package/.agent/scripts/opencode-test-helper.sh +0 -213
- package/.agent/scripts/pagespeed-helper.sh +0 -464
- package/.agent/scripts/pandoc-helper.sh +0 -362
- package/.agent/scripts/postflight-check.sh +0 -555
- package/.agent/scripts/pre-commit-hook.sh +0 -259
- package/.agent/scripts/pre-edit-check.sh +0 -169
- package/.agent/scripts/qlty-cli.sh +0 -356
- package/.agent/scripts/quality-cli-manager.sh +0 -525
- package/.agent/scripts/quality-feedback-helper.sh +0 -462
- package/.agent/scripts/quality-fix.sh +0 -263
- package/.agent/scripts/quality-loop-helper.sh +0 -1108
- package/.agent/scripts/ralph-loop-helper.sh +0 -836
- package/.agent/scripts/ralph-upstream-check.sh +0 -341
- package/.agent/scripts/secretlint-helper.sh +0 -847
- package/.agent/scripts/servers-helper.sh +0 -241
- package/.agent/scripts/ses-helper.sh +0 -619
- package/.agent/scripts/session-review-helper.sh +0 -404
- package/.agent/scripts/setup-linters-wizard.sh +0 -379
- package/.agent/scripts/setup-local-api-keys.sh +0 -330
- package/.agent/scripts/setup-mcp-integrations.sh +0 -472
- package/.agent/scripts/shared-constants.sh +0 -246
- package/.agent/scripts/site-crawler-helper.sh +0 -1487
- package/.agent/scripts/snyk-helper.sh +0 -940
- package/.agent/scripts/sonarcloud-autofix.sh +0 -193
- package/.agent/scripts/sonarcloud-cli.sh +0 -191
- package/.agent/scripts/sonarscanner-cli.sh +0 -455
- package/.agent/scripts/spaceship-helper.sh +0 -747
- package/.agent/scripts/stagehand-helper.sh +0 -321
- package/.agent/scripts/stagehand-python-helper.sh +0 -321
- package/.agent/scripts/stagehand-python-setup.sh +0 -441
- package/.agent/scripts/stagehand-setup.sh +0 -439
- package/.agent/scripts/system-cleanup.sh +0 -340
- package/.agent/scripts/terminal-title-helper.sh +0 -388
- package/.agent/scripts/terminal-title-setup.sh +0 -549
- package/.agent/scripts/test-stagehand-both-integration.sh +0 -317
- package/.agent/scripts/test-stagehand-integration.sh +0 -309
- package/.agent/scripts/test-stagehand-python-integration.sh +0 -341
- package/.agent/scripts/todo-ready.sh +0 -263
- package/.agent/scripts/tool-version-check.sh +0 -362
- package/.agent/scripts/toon-helper.sh +0 -469
- package/.agent/scripts/twilio-helper.sh +0 -917
- package/.agent/scripts/updown-helper.sh +0 -279
- package/.agent/scripts/validate-mcp-integrations.sh +0 -250
- package/.agent/scripts/validate-version-consistency.sh +0 -131
- package/.agent/scripts/vaultwarden-helper.sh +0 -597
- package/.agent/scripts/vercel-cli-helper.sh +0 -816
- package/.agent/scripts/verify-mirrors.sh +0 -169
- package/.agent/scripts/version-manager.sh +0 -831
- package/.agent/scripts/webhosting-helper.sh +0 -471
- package/.agent/scripts/webhosting-verify.sh +0 -238
- package/.agent/scripts/wordpress-mcp-helper.sh +0 -508
- package/.agent/scripts/worktree-helper.sh +0 -595
- package/.agent/scripts/worktree-sessions.sh +0 -577
- package/.agent/seo/dataforseo.md +0 -215
- package/.agent/seo/domain-research.md +0 -532
- package/.agent/seo/eeat-score.md +0 -659
- package/.agent/seo/google-search-console.md +0 -366
- package/.agent/seo/gsc-sitemaps.md +0 -282
- package/.agent/seo/keyword-research.md +0 -521
- package/.agent/seo/serper.md +0 -278
- package/.agent/seo/site-crawler.md +0 -387
- package/.agent/seo.md +0 -236
- package/.agent/services/accounting/quickfile.md +0 -159
- package/.agent/services/communications/telfon.md +0 -470
- package/.agent/services/communications/twilio.md +0 -569
- package/.agent/services/crm/fluentcrm.md +0 -449
- package/.agent/services/email/ses.md +0 -399
- package/.agent/services/hosting/101domains.md +0 -378
- package/.agent/services/hosting/closte.md +0 -177
- package/.agent/services/hosting/cloudflare.md +0 -251
- package/.agent/services/hosting/cloudron.md +0 -478
- package/.agent/services/hosting/dns-providers.md +0 -335
- package/.agent/services/hosting/domain-purchasing.md +0 -344
- package/.agent/services/hosting/hetzner.md +0 -327
- package/.agent/services/hosting/hostinger.md +0 -287
- package/.agent/services/hosting/localhost.md +0 -419
- package/.agent/services/hosting/spaceship.md +0 -353
- package/.agent/services/hosting/webhosting.md +0 -330
- package/.agent/social-media.md +0 -69
- package/.agent/templates/plans-template.md +0 -114
- package/.agent/templates/prd-template.md +0 -129
- package/.agent/templates/tasks-template.md +0 -108
- package/.agent/templates/todo-template.md +0 -89
- package/.agent/tools/ai-assistants/agno.md +0 -471
- package/.agent/tools/ai-assistants/capsolver.md +0 -326
- package/.agent/tools/ai-assistants/configuration.md +0 -221
- package/.agent/tools/ai-assistants/overview.md +0 -209
- package/.agent/tools/ai-assistants/status.md +0 -171
- package/.agent/tools/ai-assistants/windsurf.md +0 -193
- package/.agent/tools/ai-orchestration/autogen.md +0 -406
- package/.agent/tools/ai-orchestration/crewai.md +0 -445
- package/.agent/tools/ai-orchestration/langflow.md +0 -405
- package/.agent/tools/ai-orchestration/openprose.md +0 -487
- package/.agent/tools/ai-orchestration/overview.md +0 -362
- package/.agent/tools/ai-orchestration/packaging.md +0 -647
- package/.agent/tools/browser/agent-browser.md +0 -464
- package/.agent/tools/browser/browser-automation.md +0 -400
- package/.agent/tools/browser/chrome-devtools.md +0 -282
- package/.agent/tools/browser/crawl4ai-integration.md +0 -422
- package/.agent/tools/browser/crawl4ai-resources.md +0 -277
- package/.agent/tools/browser/crawl4ai-usage.md +0 -416
- package/.agent/tools/browser/crawl4ai.md +0 -585
- package/.agent/tools/browser/dev-browser.md +0 -341
- package/.agent/tools/browser/pagespeed.md +0 -260
- package/.agent/tools/browser/playwright.md +0 -266
- package/.agent/tools/browser/playwriter.md +0 -310
- package/.agent/tools/browser/stagehand-examples.md +0 -456
- package/.agent/tools/browser/stagehand-python.md +0 -483
- package/.agent/tools/browser/stagehand.md +0 -421
- package/.agent/tools/build-agent/agent-review.md +0 -224
- package/.agent/tools/build-agent/build-agent.md +0 -784
- package/.agent/tools/build-mcp/aidevops-plugin.md +0 -476
- package/.agent/tools/build-mcp/api-wrapper.md +0 -445
- package/.agent/tools/build-mcp/build-mcp.md +0 -240
- package/.agent/tools/build-mcp/deployment.md +0 -401
- package/.agent/tools/build-mcp/server-patterns.md +0 -632
- package/.agent/tools/build-mcp/transports.md +0 -366
- package/.agent/tools/code-review/auditing.md +0 -383
- package/.agent/tools/code-review/automation.md +0 -219
- package/.agent/tools/code-review/best-practices.md +0 -203
- package/.agent/tools/code-review/codacy.md +0 -151
- package/.agent/tools/code-review/code-simplifier.md +0 -174
- package/.agent/tools/code-review/code-standards.md +0 -309
- package/.agent/tools/code-review/coderabbit.md +0 -101
- package/.agent/tools/code-review/management.md +0 -155
- package/.agent/tools/code-review/qlty.md +0 -248
- package/.agent/tools/code-review/secretlint.md +0 -565
- package/.agent/tools/code-review/setup.md +0 -250
- package/.agent/tools/code-review/snyk.md +0 -563
- package/.agent/tools/code-review/tools.md +0 -230
- package/.agent/tools/content/summarize.md +0 -353
- package/.agent/tools/context/augment-context-engine.md +0 -468
- package/.agent/tools/context/context-builder-agent.md +0 -76
- package/.agent/tools/context/context-builder.md +0 -375
- package/.agent/tools/context/context7.md +0 -371
- package/.agent/tools/context/dspy.md +0 -302
- package/.agent/tools/context/dspyground.md +0 -374
- package/.agent/tools/context/llm-tldr.md +0 -219
- package/.agent/tools/context/osgrep.md +0 -488
- package/.agent/tools/context/prompt-optimization.md +0 -338
- package/.agent/tools/context/toon.md +0 -292
- package/.agent/tools/conversion/pandoc.md +0 -304
- package/.agent/tools/credentials/api-key-management.md +0 -154
- package/.agent/tools/credentials/api-key-setup.md +0 -224
- package/.agent/tools/credentials/environment-variables.md +0 -180
- package/.agent/tools/credentials/vaultwarden.md +0 -382
- package/.agent/tools/data-extraction/outscraper.md +0 -974
- package/.agent/tools/deployment/coolify-cli.md +0 -388
- package/.agent/tools/deployment/coolify-setup.md +0 -353
- package/.agent/tools/deployment/coolify.md +0 -345
- package/.agent/tools/deployment/vercel.md +0 -390
- package/.agent/tools/git/authentication.md +0 -132
- package/.agent/tools/git/gitea-cli.md +0 -193
- package/.agent/tools/git/github-actions.md +0 -207
- package/.agent/tools/git/github-cli.md +0 -223
- package/.agent/tools/git/gitlab-cli.md +0 -190
- package/.agent/tools/git/opencode-github-security.md +0 -350
- package/.agent/tools/git/opencode-github.md +0 -328
- package/.agent/tools/git/opencode-gitlab.md +0 -252
- package/.agent/tools/git/security.md +0 -196
- package/.agent/tools/git.md +0 -207
- package/.agent/tools/opencode/oh-my-opencode.md +0 -375
- package/.agent/tools/opencode/opencode-anthropic-auth.md +0 -446
- package/.agent/tools/opencode/opencode.md +0 -651
- package/.agent/tools/social-media/bird.md +0 -437
- package/.agent/tools/task-management/beads.md +0 -336
- package/.agent/tools/terminal/terminal-title.md +0 -251
- package/.agent/tools/ui/shadcn.md +0 -196
- package/.agent/tools/ui/ui-skills.md +0 -115
- package/.agent/tools/wordpress/localwp.md +0 -311
- package/.agent/tools/wordpress/mainwp.md +0 -391
- package/.agent/tools/wordpress/scf.md +0 -527
- package/.agent/tools/wordpress/wp-admin.md +0 -729
- package/.agent/tools/wordpress/wp-dev.md +0 -940
- package/.agent/tools/wordpress/wp-preferred.md +0 -398
- package/.agent/tools/wordpress.md +0 -95
- package/.agent/workflows/branch/bugfix.md +0 -63
- package/.agent/workflows/branch/chore.md +0 -95
- package/.agent/workflows/branch/experiment.md +0 -115
- package/.agent/workflows/branch/feature.md +0 -59
- package/.agent/workflows/branch/hotfix.md +0 -98
- package/.agent/workflows/branch/refactor.md +0 -92
- package/.agent/workflows/branch/release.md +0 -96
- package/.agent/workflows/branch.md +0 -347
- package/.agent/workflows/bug-fixing.md +0 -267
- package/.agent/workflows/changelog.md +0 -129
- package/.agent/workflows/code-audit-remote.md +0 -279
- package/.agent/workflows/conversation-starter.md +0 -69
- package/.agent/workflows/error-feedback.md +0 -578
- package/.agent/workflows/feature-development.md +0 -355
- package/.agent/workflows/git-workflow.md +0 -702
- package/.agent/workflows/multi-repo-workspace.md +0 -268
- package/.agent/workflows/plans.md +0 -709
- package/.agent/workflows/postflight.md +0 -604
- package/.agent/workflows/pr.md +0 -571
- package/.agent/workflows/preflight.md +0 -278
- package/.agent/workflows/ralph-loop.md +0 -773
- package/.agent/workflows/release.md +0 -498
- package/.agent/workflows/session-manager.md +0 -254
- package/.agent/workflows/session-review.md +0 -311
- package/.agent/workflows/sql-migrations.md +0 -631
- package/.agent/workflows/version-bump.md +0 -283
- package/.agent/workflows/wiki-update.md +0 -333
- package/.agent/workflows/worktree.md +0 -477
|
@@ -1,1078 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# shellcheck disable=SC2034,SC2155,SC2317,SC2329,SC2016,SC2181,SC1091,SC2154,SC2015,SC2086,SC2129,SC2030,SC2031,SC2119,SC2120,SC2001,SC2162,SC2088,SC2089,SC2090,SC2029,SC2006,SC2153
|
|
3
|
-
|
|
4
|
-
# Crawl4AI Helper Script
|
|
5
|
-
# AI-powered web crawler and scraper for LLM-friendly data extraction
|
|
6
|
-
#
|
|
7
|
-
# This script provides comprehensive management for Crawl4AI including:
|
|
8
|
-
# - Docker deployment with monitoring dashboard
|
|
9
|
-
# - Python package installation and setup
|
|
10
|
-
# - MCP server integration for AI assistants
|
|
11
|
-
# - Web scraping and data extraction operations
|
|
12
|
-
# - CapSolver integration for CAPTCHA solving and anti-bot bypass
|
|
13
|
-
#
|
|
14
|
-
# Usage: ./crawl4ai-helper.sh [command] [options]
|
|
15
|
-
# Commands:
|
|
16
|
-
# install - Install Crawl4AI Python package
|
|
17
|
-
# docker-setup - Setup Docker deployment with monitoring
|
|
18
|
-
# docker-start - Start Docker container
|
|
19
|
-
# docker-stop - Stop Docker container
|
|
20
|
-
# mcp-setup - Setup MCP server integration
|
|
21
|
-
# capsolver-setup - Setup CapSolver integration for CAPTCHA solving
|
|
22
|
-
# crawl - Perform web crawling operation
|
|
23
|
-
# extract - Extract structured data from URL
|
|
24
|
-
# captcha-crawl - Crawl with CAPTCHA solving capabilities
|
|
25
|
-
# status - Check Crawl4AI service status
|
|
26
|
-
# help - Show this help message
|
|
27
|
-
#
|
|
28
|
-
# Author: AI DevOps Framework
|
|
29
|
-
# Version: 1.0.0
|
|
30
|
-
# License: MIT
|
|
31
|
-
|
|
32
|
-
# Colors for output
|
|
33
|
-
readonly GREEN='\033[0;32m'
|
|
34
|
-
readonly BLUE='\033[0;34m'
|
|
35
|
-
readonly YELLOW='\033[1;33m'
|
|
36
|
-
readonly RED='\033[0;31m'
|
|
37
|
-
readonly PURPLE='\033[0;35m'
|
|
38
|
-
readonly NC='\033[0m' # No Color
|
|
39
|
-
|
|
40
|
-
# Common constants
|
|
41
|
-
readonly ERROR_UNKNOWN_COMMAND="Unknown command:"
|
|
42
|
-
# Common constants
|
|
43
|
-
readonly CONTENT_TYPE_JSON=$CONTENT_TYPE_JSON
|
|
44
|
-
|
|
45
|
-
# Constants
|
|
46
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" || exit
|
|
47
|
-
readonly SCRIPT_DIR
|
|
48
|
-
readonly CONFIG_DIR="$SCRIPT_DIR/../configs"
|
|
49
|
-
readonly DOCKER_IMAGE="unclecode/crawl4ai:latest"
|
|
50
|
-
readonly DOCKER_CONTAINER="crawl4ai"
|
|
51
|
-
readonly DOCKER_PORT="11235"
|
|
52
|
-
readonly MCP_PORT="3009"
|
|
53
|
-
readonly HELP_SHOW_MESSAGE="Show this help message"
|
|
54
|
-
|
|
55
|
-
# Print functions
|
|
56
|
-
print_success() {
|
|
57
|
-
local message="$1"
|
|
58
|
-
echo -e "${GREEN}✅ $message${NC}"
|
|
59
|
-
return 0
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
print_info() {
|
|
63
|
-
local message="$1"
|
|
64
|
-
echo -e "${BLUE}ℹ️ $message${NC}"
|
|
65
|
-
return 0
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
print_warning() {
|
|
69
|
-
local message="$1"
|
|
70
|
-
echo -e "${YELLOW}⚠️ $message${NC}"
|
|
71
|
-
return 0
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
print_error() {
|
|
75
|
-
local message="$1"
|
|
76
|
-
echo -e "${RED}❌ $message${NC}"
|
|
77
|
-
return 0
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
print_header() {
|
|
81
|
-
local message="$1"
|
|
82
|
-
echo -e "${PURPLE}🚀 $message${NC}"
|
|
83
|
-
return 0
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
# Check if Docker is available
|
|
87
|
-
check_docker() {
|
|
88
|
-
if ! command -v docker &> /dev/null; then
|
|
89
|
-
print_error "Docker is not installed. Please install Docker first."
|
|
90
|
-
return 1
|
|
91
|
-
fi
|
|
92
|
-
|
|
93
|
-
if ! docker info &> /dev/null; then
|
|
94
|
-
print_error "Docker daemon is not running. Please start Docker."
|
|
95
|
-
return 1
|
|
96
|
-
fi
|
|
97
|
-
|
|
98
|
-
return 0
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
# Check if Python is available
|
|
102
|
-
check_python() {
|
|
103
|
-
if ! command -v python3 &> /dev/null; then
|
|
104
|
-
print_error "Python 3 is not installed. Please install Python 3.8+ first."
|
|
105
|
-
return 1
|
|
106
|
-
fi
|
|
107
|
-
|
|
108
|
-
local python_version
|
|
109
|
-
python_version=$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")
|
|
110
|
-
|
|
111
|
-
if [[ $(echo "$python_version < 3.8" | bc -l) -eq 1 ]]; then
|
|
112
|
-
print_error "Python 3.8+ is required. Current version: $python_version"
|
|
113
|
-
return 1
|
|
114
|
-
fi
|
|
115
|
-
|
|
116
|
-
return 0
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
# Install Crawl4AI Python package
|
|
120
|
-
install_crawl4ai() {
|
|
121
|
-
print_header "Installing Crawl4AI Python Package"
|
|
122
|
-
|
|
123
|
-
if ! check_python; then
|
|
124
|
-
return 1
|
|
125
|
-
fi
|
|
126
|
-
|
|
127
|
-
print_info "Installing Crawl4AI with pip..."
|
|
128
|
-
if pip3 install -U crawl4ai; then
|
|
129
|
-
print_success "Crawl4AI installed successfully"
|
|
130
|
-
else
|
|
131
|
-
print_error "Failed to install Crawl4AI"
|
|
132
|
-
return 1
|
|
133
|
-
fi
|
|
134
|
-
|
|
135
|
-
print_info "Running post-installation setup..."
|
|
136
|
-
if crawl4ai-setup; then
|
|
137
|
-
print_success "Crawl4AI setup completed"
|
|
138
|
-
else
|
|
139
|
-
print_warning "Setup completed with warnings. Run 'crawl4ai-doctor' to check."
|
|
140
|
-
fi
|
|
141
|
-
|
|
142
|
-
print_info "Verifying installation..."
|
|
143
|
-
if crawl4ai-doctor; then
|
|
144
|
-
print_success "Crawl4AI installation verified"
|
|
145
|
-
else
|
|
146
|
-
print_warning "Installation verification completed with warnings"
|
|
147
|
-
fi
|
|
148
|
-
|
|
149
|
-
return 0
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
# Setup Docker deployment
|
|
153
|
-
docker_setup() {
|
|
154
|
-
print_header "Setting up Crawl4AI Docker Deployment"
|
|
155
|
-
|
|
156
|
-
if ! check_docker; then
|
|
157
|
-
return 1
|
|
158
|
-
fi
|
|
159
|
-
|
|
160
|
-
print_info "Pulling Crawl4AI Docker image..."
|
|
161
|
-
if docker pull "$DOCKER_IMAGE"; then
|
|
162
|
-
print_success "Docker image pulled successfully"
|
|
163
|
-
else
|
|
164
|
-
print_error "Failed to pull Docker image"
|
|
165
|
-
return 1
|
|
166
|
-
fi
|
|
167
|
-
|
|
168
|
-
# Create environment file if it doesn't exist
|
|
169
|
-
local env_file="$CONFIG_DIR/.crawl4ai.env"
|
|
170
|
-
if [[ ! -f "$env_file" ]]; then
|
|
171
|
-
print_info "Creating environment configuration..."
|
|
172
|
-
cat > "$env_file" << 'EOF'
|
|
173
|
-
# Crawl4AI Environment Configuration
|
|
174
|
-
# Add your API keys here for LLM integration
|
|
175
|
-
|
|
176
|
-
# OpenAI
|
|
177
|
-
# OPENAI_API_KEY=sk-your-key
|
|
178
|
-
|
|
179
|
-
# Anthropic
|
|
180
|
-
# ANTHROPIC_API_KEY=your-anthropic-key
|
|
181
|
-
|
|
182
|
-
# Other providers
|
|
183
|
-
# DEEPSEEK_API_KEY=your-deepseek-key
|
|
184
|
-
# GROQ_API_KEY=your-groq-key
|
|
185
|
-
# TOGETHER_API_KEY=your-together-key
|
|
186
|
-
# MISTRAL_API_KEY=your-mistral-key
|
|
187
|
-
# GEMINI_API_TOKEN=your-gemini-token
|
|
188
|
-
|
|
189
|
-
# Global LLM settings
|
|
190
|
-
# LLM_PROVIDER=openai/gpt-4o-mini
|
|
191
|
-
# LLM_TEMPERATURE=0.7
|
|
192
|
-
EOF
|
|
193
|
-
print_success "Environment file created at $env_file"
|
|
194
|
-
print_warning "Please edit $env_file to add your API keys"
|
|
195
|
-
fi
|
|
196
|
-
|
|
197
|
-
return 0
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
# Start Docker container
|
|
201
|
-
docker_start() {
|
|
202
|
-
print_header "Starting Crawl4AI Docker Container"
|
|
203
|
-
|
|
204
|
-
if ! check_docker; then
|
|
205
|
-
return 1
|
|
206
|
-
fi
|
|
207
|
-
|
|
208
|
-
# Stop existing container if running
|
|
209
|
-
if docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
|
|
210
|
-
print_info "Stopping existing container..."
|
|
211
|
-
docker stop "$DOCKER_CONTAINER" > /dev/null 2>&1
|
|
212
|
-
docker rm "$DOCKER_CONTAINER" > /dev/null 2>&1
|
|
213
|
-
fi
|
|
214
|
-
|
|
215
|
-
local env_file="$CONFIG_DIR/.crawl4ai.env"
|
|
216
|
-
local docker_args=(
|
|
217
|
-
"-d"
|
|
218
|
-
"-p" "$DOCKER_PORT:$DOCKER_PORT"
|
|
219
|
-
"--name" "$DOCKER_CONTAINER"
|
|
220
|
-
"--shm-size=1g"
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
if [[ -f "$env_file" ]]; then
|
|
224
|
-
docker_args+=("--env-file" "$env_file")
|
|
225
|
-
fi
|
|
226
|
-
|
|
227
|
-
docker_args+=("$DOCKER_IMAGE")
|
|
228
|
-
|
|
229
|
-
print_info "Starting Docker container..."
|
|
230
|
-
if docker run "${docker_args[@]}"; then
|
|
231
|
-
print_success "Crawl4AI container started successfully"
|
|
232
|
-
print_info "Dashboard: http://localhost:$DOCKER_PORT/dashboard"
|
|
233
|
-
print_info "Playground: http://localhost:$DOCKER_PORT/playground"
|
|
234
|
-
print_info "API: http://localhost:$DOCKER_PORT"
|
|
235
|
-
else
|
|
236
|
-
print_error "Failed to start Docker container"
|
|
237
|
-
return 1
|
|
238
|
-
fi
|
|
239
|
-
|
|
240
|
-
return 0
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
# Stop Docker container
|
|
244
|
-
docker_stop() {
|
|
245
|
-
print_header "Stopping Crawl4AI Docker Container"
|
|
246
|
-
|
|
247
|
-
if ! check_docker; then
|
|
248
|
-
return 1
|
|
249
|
-
fi
|
|
250
|
-
|
|
251
|
-
if docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
|
|
252
|
-
print_info "Stopping container..."
|
|
253
|
-
if docker stop "$DOCKER_CONTAINER" && docker rm "$DOCKER_CONTAINER"; then
|
|
254
|
-
print_success "Container stopped and removed"
|
|
255
|
-
else
|
|
256
|
-
print_error "Failed to stop container"
|
|
257
|
-
return 1
|
|
258
|
-
fi
|
|
259
|
-
else
|
|
260
|
-
print_warning "Container is not running"
|
|
261
|
-
fi
|
|
262
|
-
|
|
263
|
-
return 0
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
# Setup MCP server integration
|
|
267
|
-
mcp_setup() {
|
|
268
|
-
print_header "Setting up Crawl4AI MCP Server Integration"
|
|
269
|
-
|
|
270
|
-
local mcp_config="$CONFIG_DIR/crawl4ai-mcp-config.json"
|
|
271
|
-
|
|
272
|
-
print_info "Creating MCP server configuration..."
|
|
273
|
-
cat > "$mcp_config" << EOF
|
|
274
|
-
{
|
|
275
|
-
"provider": "crawl4ai",
|
|
276
|
-
"description": "Crawl4AI MCP server for AI-powered web crawling and data extraction",
|
|
277
|
-
"mcp_server": {
|
|
278
|
-
"name": "crawl4ai",
|
|
279
|
-
"command": "npx",
|
|
280
|
-
"args": ["crawl4ai-mcp-server@latest"],
|
|
281
|
-
"port": $MCP_PORT,
|
|
282
|
-
"transport": "stdio",
|
|
283
|
-
"description": "Crawl4AI MCP server for web scraping and LLM-friendly data extraction",
|
|
284
|
-
"env": {
|
|
285
|
-
"CRAWL4AI_API_URL": "http://localhost:$DOCKER_PORT",
|
|
286
|
-
"CRAWL4AI_TIMEOUT": "60"
|
|
287
|
-
}
|
|
288
|
-
},
|
|
289
|
-
"capabilities": [
|
|
290
|
-
"web_crawling",
|
|
291
|
-
"markdown_generation",
|
|
292
|
-
"structured_extraction",
|
|
293
|
-
"llm_extraction",
|
|
294
|
-
"screenshot_capture",
|
|
295
|
-
"pdf_generation",
|
|
296
|
-
"javascript_execution"
|
|
297
|
-
]
|
|
298
|
-
return 0
|
|
299
|
-
}
|
|
300
|
-
EOF
|
|
301
|
-
|
|
302
|
-
print_success "MCP configuration created at $mcp_config"
|
|
303
|
-
print_info "To use with Claude Desktop, add this to your MCP settings:"
|
|
304
|
-
print_info " \"crawl4ai\": {"
|
|
305
|
-
print_info " \"command\": \"npx\","
|
|
306
|
-
print_info " \"args\": [\"crawl4ai-mcp-server@latest\"]"
|
|
307
|
-
print_info " }"
|
|
308
|
-
|
|
309
|
-
return 0
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
# Setup CapSolver integration for CAPTCHA solving
|
|
313
|
-
capsolver_setup() {
|
|
314
|
-
print_header "Setting up CapSolver Integration for CAPTCHA Solving"
|
|
315
|
-
|
|
316
|
-
local capsolver_config="$CONFIG_DIR/capsolver-config.json"
|
|
317
|
-
|
|
318
|
-
print_info "Creating CapSolver configuration..."
|
|
319
|
-
cat > "$capsolver_config" << EOF
|
|
320
|
-
{
|
|
321
|
-
"provider": "capsolver",
|
|
322
|
-
"description": "CapSolver configuration for automated CAPTCHA solving with Crawl4AI",
|
|
323
|
-
"service_type": "captcha_solver",
|
|
324
|
-
"version": "latest",
|
|
325
|
-
"api": {
|
|
326
|
-
"base_url": "https://api.capsolver.com",
|
|
327
|
-
"endpoints": {
|
|
328
|
-
"create_task": "/createTask",
|
|
329
|
-
"get_task_result": "/getTaskResult",
|
|
330
|
-
"get_balance": "/getBalance"
|
|
331
|
-
},
|
|
332
|
-
"authentication": {
|
|
333
|
-
"type": "api_key",
|
|
334
|
-
"header": "clientKey"
|
|
335
|
-
}
|
|
336
|
-
},
|
|
337
|
-
"supported_captcha_types": {
|
|
338
|
-
"recaptcha_v2": {
|
|
339
|
-
"type": "ReCaptchaV2TaskProxyLess",
|
|
340
|
-
"description": "reCAPTCHA v2 checkbox solving",
|
|
341
|
-
"response_field": "gRecaptchaResponse",
|
|
342
|
-
"injection_target": "g-recaptcha-response",
|
|
343
|
-
"pricing": "$0.5/1000 requests",
|
|
344
|
-
"avg_solve_time": "< 9 seconds"
|
|
345
|
-
},
|
|
346
|
-
"recaptcha_v3": {
|
|
347
|
-
"type": "ReCaptchaV3TaskProxyLess",
|
|
348
|
-
"description": "reCAPTCHA v3 invisible solving with score ≥0.7",
|
|
349
|
-
"response_field": "gRecaptchaResponse",
|
|
350
|
-
"injection_method": "fetch_hook",
|
|
351
|
-
"pricing": "$0.5/1000 requests",
|
|
352
|
-
"avg_solve_time": "< 3 seconds"
|
|
353
|
-
},
|
|
354
|
-
"recaptcha_v2_enterprise": {
|
|
355
|
-
"type": "ReCaptchaV2EnterpriseTaskProxyLess",
|
|
356
|
-
"description": "reCAPTCHA v2 Enterprise solving",
|
|
357
|
-
"response_field": "gRecaptchaResponse",
|
|
358
|
-
"pricing": "$_arg1/1000 requests",
|
|
359
|
-
"avg_solve_time": "< 9 seconds"
|
|
360
|
-
},
|
|
361
|
-
"recaptcha_v3_enterprise": {
|
|
362
|
-
"type": "ReCaptchaV3EnterpriseTaskProxyLess",
|
|
363
|
-
"description": "reCAPTCHA v3 Enterprise solving with score ≥0.9",
|
|
364
|
-
"response_field": "gRecaptchaResponse",
|
|
365
|
-
"pricing": "$_arg3/1000 requests",
|
|
366
|
-
"avg_solve_time": "< 3 seconds"
|
|
367
|
-
},
|
|
368
|
-
"cloudflare_turnstile": {
|
|
369
|
-
"type": "AntiTurnstileTaskProxyLess",
|
|
370
|
-
"description": "Cloudflare Turnstile CAPTCHA solving",
|
|
371
|
-
"response_field": "token",
|
|
372
|
-
"injection_target": "cf-turnstile-response",
|
|
373
|
-
"pricing": "$_arg3/1000 requests",
|
|
374
|
-
"avg_solve_time": "< 3 seconds"
|
|
375
|
-
},
|
|
376
|
-
"cloudflare_challenge": {
|
|
377
|
-
"type": "AntiCloudflareTask",
|
|
378
|
-
"description": "Cloudflare Challenge (5s shield) solving",
|
|
379
|
-
"response_field": "cookies",
|
|
380
|
-
"requires_proxy": true,
|
|
381
|
-
"pricing": "Contact for pricing",
|
|
382
|
-
"avg_solve_time": "< 10 seconds"
|
|
383
|
-
},
|
|
384
|
-
"aws_waf": {
|
|
385
|
-
"type": "AntiAwsWafTaskProxyLess",
|
|
386
|
-
"description": "AWS WAF CAPTCHA solving",
|
|
387
|
-
"response_field": "cookie",
|
|
388
|
-
"injection_method": "cookie_set",
|
|
389
|
-
"pricing": "Contact for pricing",
|
|
390
|
-
"avg_solve_time": "< 5 seconds"
|
|
391
|
-
},
|
|
392
|
-
"geetest_v3": {
|
|
393
|
-
"type": "GeeTestTaskProxyLess",
|
|
394
|
-
"description": "GeeTest v3 CAPTCHA solving",
|
|
395
|
-
"response_field": "challenge",
|
|
396
|
-
"pricing": "$0.5/1000 requests",
|
|
397
|
-
"avg_solve_time": "< 5 seconds"
|
|
398
|
-
},
|
|
399
|
-
"geetest_v4": {
|
|
400
|
-
"type": "GeeTestV4TaskProxyLess",
|
|
401
|
-
"description": "GeeTest v4 CAPTCHA solving",
|
|
402
|
-
"response_field": "captcha_output",
|
|
403
|
-
"pricing": "$0.5/1000 requests",
|
|
404
|
-
"avg_solve_time": "< 5 seconds"
|
|
405
|
-
},
|
|
406
|
-
"image_to_text": {
|
|
407
|
-
"type": "ImageToTextTask",
|
|
408
|
-
"description": "OCR image CAPTCHA solving",
|
|
409
|
-
"response_field": "text",
|
|
410
|
-
"pricing": "$0.4/1000 requests",
|
|
411
|
-
"avg_solve_time": "< 1 second"
|
|
412
|
-
}
|
|
413
|
-
},
|
|
414
|
-
"integration_methods": {
|
|
415
|
-
"api_integration": {
|
|
416
|
-
"description": "Direct API integration with Python capsolver SDK",
|
|
417
|
-
"advantages": ["More flexible", "Precise control", "Better error handling"],
|
|
418
|
-
"recommended": true
|
|
419
|
-
},
|
|
420
|
-
"browser_extension": {
|
|
421
|
-
"description": "CapSolver browser extension integration",
|
|
422
|
-
"advantages": ["Easy setup", "Automatic detection", "No coding required"],
|
|
423
|
-
"extension_url": "https://chrome.google.com/webstore/detail/capsolver/pgojnojmmhpofjgdmaebadhbocahppod"
|
|
424
|
-
}
|
|
425
|
-
},
|
|
426
|
-
"python_sdk": {
|
|
427
|
-
"installation": "pip install capsolver",
|
|
428
|
-
"import": "import capsolver",
|
|
429
|
-
"usage": "capsolver.api_key = 'CAP-xxxxxxxxxxxxxxxxxxxxx'"
|
|
430
|
-
},
|
|
431
|
-
"pricing": {
|
|
432
|
-
"pay_per_usage": "Standard pricing per request",
|
|
433
|
-
"package_discounts": "Up to 60% savings with packages",
|
|
434
|
-
"developer_plan": "Contact for better pricing",
|
|
435
|
-
"balance_check": "GET /getBalance endpoint"
|
|
436
|
-
}
|
|
437
|
-
return 0
|
|
438
|
-
}
|
|
439
|
-
EOF
|
|
440
|
-
|
|
441
|
-
print_success "CapSolver configuration created at $capsolver_config"
|
|
442
|
-
|
|
443
|
-
# Create Python example script
|
|
444
|
-
local example_script="$CONFIG_DIR/capsolver-example.py"
|
|
445
|
-
cat > "$example_script" << 'EOF'
|
|
446
|
-
#!/usr/bin/env python3
|
|
447
|
-
"""
|
|
448
|
-
CapSolver + Crawl4AI Integration Example
|
|
449
|
-
Demonstrates CAPTCHA solving with various types
|
|
450
|
-
"""
|
|
451
|
-
|
|
452
|
-
import asyncio
|
|
453
|
-
import capsolver
|
|
454
|
-
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
|
|
455
|
-
|
|
456
|
-
# TODO: Set your CapSolver API key
|
|
457
|
-
# Get your API key from: https://dashboard.capsolver.com/dashboard/overview
|
|
458
|
-
CAPSOLVER_API_KEY = "CAP-xxxxxxxxxxxxxxxxxxxxx"
|
|
459
|
-
capsolver.api_key = CAPSOLVER_API_KEY
|
|
460
|
-
|
|
461
|
-
async def solve_recaptcha_v2_example():
|
|
462
|
-
"""Example: Solving reCAPTCHA v2 checkbox"""
|
|
463
|
-
site_url = "https://recaptcha-demo.appspot.com/recaptcha-v2-checkbox.php"
|
|
464
|
-
site_key = "6LfW6wATAAAAAHLqO2pb8bDBahxlMxNdo9g947u9"
|
|
465
|
-
|
|
466
|
-
browser_config = BrowserConfig(
|
|
467
|
-
verbose=True,
|
|
468
|
-
headless=False,
|
|
469
|
-
use_persistent_context=True,
|
|
470
|
-
)
|
|
471
|
-
|
|
472
|
-
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
473
|
-
# Initial page load
|
|
474
|
-
await crawler.arun(
|
|
475
|
-
url=site_url,
|
|
476
|
-
cache_mode=CacheMode.BYPASS,
|
|
477
|
-
session_id="captcha_session"
|
|
478
|
-
)
|
|
479
|
-
|
|
480
|
-
# Solve CAPTCHA using CapSolver
|
|
481
|
-
print("🔄 Solving reCAPTCHA v2...")
|
|
482
|
-
solution = capsolver.solve({
|
|
483
|
-
"type": "ReCaptchaV2TaskProxyLess",
|
|
484
|
-
"websiteURL": site_url,
|
|
485
|
-
"websiteKey": site_key,
|
|
486
|
-
})
|
|
487
|
-
token = solution["gRecaptchaResponse"]
|
|
488
|
-
print(f"✅ Token obtained: {token[:50]}...")
|
|
489
|
-
|
|
490
|
-
# Inject token and submit
|
|
491
|
-
js_code = f"""
|
|
492
|
-
const textarea = document.getElementById('g-recaptcha-response');
|
|
493
|
-
if (textarea) {{
|
|
494
|
-
textarea.value = '{token}';
|
|
495
|
-
document.querySelector('button.form-field[type="submit"]').click();
|
|
496
|
-
}}
|
|
497
|
-
"""
|
|
498
|
-
|
|
499
|
-
wait_condition = """() => {
|
|
500
|
-
const items = document.querySelectorAll('h2');
|
|
501
|
-
return items.length > 1;
|
|
502
|
-
}"""
|
|
503
|
-
|
|
504
|
-
run_config = CrawlerRunConfig(
|
|
505
|
-
cache_mode=CacheMode.BYPASS,
|
|
506
|
-
session_id="captcha_session",
|
|
507
|
-
js_code=js_code,
|
|
508
|
-
js_only=True,
|
|
509
|
-
wait_for=f"js:{wait_condition}"
|
|
510
|
-
)
|
|
511
|
-
|
|
512
|
-
result = await crawler.arun(url=site_url, config=run_config)
|
|
513
|
-
print("🎉 CAPTCHA solved successfully!")
|
|
514
|
-
return result.markdown
|
|
515
|
-
|
|
516
|
-
async def solve_cloudflare_turnstile_example():
|
|
517
|
-
"""Example: Solving Cloudflare Turnstile"""
|
|
518
|
-
site_url = "https://clifford.io/demo/cloudflare-turnstile"
|
|
519
|
-
site_key = "0x4AAAAAAAGlwMzq_9z6S9Mh"
|
|
520
|
-
|
|
521
|
-
browser_config = BrowserConfig(
|
|
522
|
-
verbose=True,
|
|
523
|
-
headless=False,
|
|
524
|
-
use_persistent_context=True,
|
|
525
|
-
)
|
|
526
|
-
|
|
527
|
-
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
528
|
-
# Initial page load
|
|
529
|
-
await crawler.arun(
|
|
530
|
-
url=site_url,
|
|
531
|
-
cache_mode=CacheMode.BYPASS,
|
|
532
|
-
session_id="turnstile_session"
|
|
533
|
-
)
|
|
534
|
-
|
|
535
|
-
# Solve Turnstile using CapSolver
|
|
536
|
-
print("🔄 Solving Cloudflare Turnstile...")
|
|
537
|
-
solution = capsolver.solve({
|
|
538
|
-
"type": "AntiTurnstileTaskProxyLess",
|
|
539
|
-
"websiteURL": site_url,
|
|
540
|
-
"websiteKey": site_key,
|
|
541
|
-
})
|
|
542
|
-
token = solution["token"]
|
|
543
|
-
print(f"✅ Token obtained: {token[:50]}...")
|
|
544
|
-
|
|
545
|
-
# Inject token and submit
|
|
546
|
-
js_code = f"""
|
|
547
|
-
document.querySelector('input[name="cf-turnstile-response"]').value = '{token}';
|
|
548
|
-
document.querySelector('button[type="submit"]').click();
|
|
549
|
-
"""
|
|
550
|
-
|
|
551
|
-
wait_condition = """() => {
|
|
552
|
-
const items = document.querySelectorAll('h1');
|
|
553
|
-
return items.length === 0;
|
|
554
|
-
}"""
|
|
555
|
-
|
|
556
|
-
run_config = CrawlerRunConfig(
|
|
557
|
-
cache_mode=CacheMode.BYPASS,
|
|
558
|
-
session_id="turnstile_session",
|
|
559
|
-
js_code=js_code,
|
|
560
|
-
js_only=True,
|
|
561
|
-
wait_for=f"js:{wait_condition}"
|
|
562
|
-
)
|
|
563
|
-
|
|
564
|
-
result = await crawler.arun(url=site_url, config=run_config)
|
|
565
|
-
print("🎉 Turnstile solved successfully!")
|
|
566
|
-
return result.markdown
|
|
567
|
-
|
|
568
|
-
async def main():
|
|
569
|
-
"""Main function to run examples"""
|
|
570
|
-
print("🚀 CapSolver + Crawl4AI Integration Examples")
|
|
571
|
-
print("=" * 50)
|
|
572
|
-
|
|
573
|
-
try:
|
|
574
|
-
# Example 1: reCAPTCHA v2
|
|
575
|
-
print("\n📋 Example 1: reCAPTCHA v2")
|
|
576
|
-
result1 = await solve_recaptcha_v2_example()
|
|
577
|
-
|
|
578
|
-
# Example 2: Cloudflare Turnstile
|
|
579
|
-
print("\n📋 Example 2: Cloudflare Turnstile")
|
|
580
|
-
result2 = await solve_cloudflare_turnstile_example()
|
|
581
|
-
|
|
582
|
-
print("\n✅ All examples completed successfully!")
|
|
583
|
-
|
|
584
|
-
except Exception as e:
|
|
585
|
-
print(f"❌ Error: {e}")
|
|
586
|
-
print("💡 Make sure to set your CapSolver API key!")
|
|
587
|
-
|
|
588
|
-
if __name__ == "__main__":
|
|
589
|
-
asyncio.run(main())
|
|
590
|
-
EOF
|
|
591
|
-
|
|
592
|
-
chmod +x "$example_script"
|
|
593
|
-
print_success "Python example script created at $example_script"
|
|
594
|
-
|
|
595
|
-
print_info "CapSolver Integration Setup Complete!"
|
|
596
|
-
print_info ""
|
|
597
|
-
print_info "📋 Next Steps:"
|
|
598
|
-
print_info "1. Get API key: https://dashboard.capsolver.com/dashboard/overview"
|
|
599
|
-
print_info "2. Install Python SDK: pip install capsolver"
|
|
600
|
-
print_info "3. Set API key in example script: $example_script"
|
|
601
|
-
print_info "4. Run example: python3 $example_script"
|
|
602
|
-
print_info ""
|
|
603
|
-
print_info "📚 Supported CAPTCHA Types:"
|
|
604
|
-
print_info "• reCAPTCHA v2/v3 (including Enterprise)"
|
|
605
|
-
print_info "• Cloudflare Turnstile & Challenge"
|
|
606
|
-
print_info "• AWS WAF"
|
|
607
|
-
print_info "• GeeTest v3/v4"
|
|
608
|
-
print_info "• Image-to-Text OCR"
|
|
609
|
-
print_info ""
|
|
610
|
-
print_info "💰 Pricing: Starting from $0.4/1000 requests"
|
|
611
|
-
print_info "🔗 Documentation: https://docs.capsolver.com/"
|
|
612
|
-
|
|
613
|
-
return 0
|
|
614
|
-
}
|
|
615
|
-
|
|
616
|
-
# Perform web crawling operation
|
|
617
|
-
crawl_url() {
|
|
618
|
-
local url="$1"
|
|
619
|
-
local output_file="$3"
|
|
620
|
-
|
|
621
|
-
if [[ -z "$url" ]]; then
|
|
622
|
-
print_error "URL is required"
|
|
623
|
-
return 1
|
|
624
|
-
fi
|
|
625
|
-
|
|
626
|
-
print_header "Crawling URL: $url"
|
|
627
|
-
|
|
628
|
-
# Check if Docker container is running
|
|
629
|
-
if ! docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
|
|
630
|
-
print_warning "Docker container is not running. Starting it..."
|
|
631
|
-
if ! docker_start; then
|
|
632
|
-
return 1
|
|
633
|
-
fi
|
|
634
|
-
sleep 5 # Wait for container to be ready
|
|
635
|
-
fi
|
|
636
|
-
|
|
637
|
-
local api_url="http://localhost:$DOCKER_PORT/crawl"
|
|
638
|
-
local payload
|
|
639
|
-
payload=$(cat << EOF
|
|
640
|
-
{
|
|
641
|
-
"urls": ["$url"],
|
|
642
|
-
"crawler_config": {
|
|
643
|
-
"type": "CrawlerRunConfig",
|
|
644
|
-
"params": {
|
|
645
|
-
"cache_mode": "bypass"
|
|
646
|
-
}
|
|
647
|
-
}
|
|
648
|
-
return 0
|
|
649
|
-
}
|
|
650
|
-
EOF
|
|
651
|
-
)
|
|
652
|
-
|
|
653
|
-
print_info "Sending crawl request..."
|
|
654
|
-
local response
|
|
655
|
-
if response=$(curl -s -X POST "$api_url" \
|
|
656
|
-
-H $CONTENT_TYPE_JSON \
|
|
657
|
-
-d "$payload"); then
|
|
658
|
-
|
|
659
|
-
if [[ -n "$output_file" ]]; then
|
|
660
|
-
echo "$response" > "$output_file"
|
|
661
|
-
print_success "Results saved to $output_file"
|
|
662
|
-
else
|
|
663
|
-
echo "$response" | jq '.'
|
|
664
|
-
fi
|
|
665
|
-
|
|
666
|
-
print_success "Crawl completed successfully"
|
|
667
|
-
else
|
|
668
|
-
print_error "Failed to crawl URL"
|
|
669
|
-
return 1
|
|
670
|
-
fi
|
|
671
|
-
|
|
672
|
-
return 0
|
|
673
|
-
}
|
|
674
|
-
|
|
675
|
-
# Extract structured data
|
|
676
|
-
extract_structured() {
|
|
677
|
-
local url="$1"
|
|
678
|
-
local schema="$2"
|
|
679
|
-
local output_file="$3"
|
|
680
|
-
|
|
681
|
-
if [[ -z "$url" || -z "$schema" ]]; then
|
|
682
|
-
print_error "URL and schema are required"
|
|
683
|
-
return 1
|
|
684
|
-
fi
|
|
685
|
-
|
|
686
|
-
print_header "Extracting structured data from: $url"
|
|
687
|
-
|
|
688
|
-
# Check if Docker container is running
|
|
689
|
-
if ! docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
|
|
690
|
-
print_warning "Docker container is not running. Starting it..."
|
|
691
|
-
if ! docker_start; then
|
|
692
|
-
return 1
|
|
693
|
-
fi
|
|
694
|
-
sleep 5
|
|
695
|
-
fi
|
|
696
|
-
|
|
697
|
-
local api_url="http://localhost:$DOCKER_PORT/crawl"
|
|
698
|
-
local payload
|
|
699
|
-
payload=$(cat << EOF
|
|
700
|
-
{
|
|
701
|
-
"urls": ["$url"],
|
|
702
|
-
"crawler_config": {
|
|
703
|
-
"type": "CrawlerRunConfig",
|
|
704
|
-
"params": {
|
|
705
|
-
"extraction_strategy": {
|
|
706
|
-
"type": "JsonCssExtractionStrategy",
|
|
707
|
-
"params": {
|
|
708
|
-
"schema": {
|
|
709
|
-
"type": "dict",
|
|
710
|
-
"value": $schema
|
|
711
|
-
}
|
|
712
|
-
}
|
|
713
|
-
},
|
|
714
|
-
"cache_mode": "bypass"
|
|
715
|
-
}
|
|
716
|
-
}
|
|
717
|
-
return 0
|
|
718
|
-
}
|
|
719
|
-
EOF
|
|
720
|
-
)
|
|
721
|
-
|
|
722
|
-
print_info "Sending extraction request..."
|
|
723
|
-
local response
|
|
724
|
-
if response=$(curl -s -X POST "$api_url" \
|
|
725
|
-
-H $CONTENT_TYPE_JSON \
|
|
726
|
-
-d "$payload"); then
|
|
727
|
-
|
|
728
|
-
if [[ -n "$output_file" ]]; then
|
|
729
|
-
echo "$response" > "$output_file"
|
|
730
|
-
print_success "Results saved to $output_file"
|
|
731
|
-
else
|
|
732
|
-
echo "$response" | jq '.results[0].extracted_content'
|
|
733
|
-
fi
|
|
734
|
-
|
|
735
|
-
print_success "Extraction completed successfully"
|
|
736
|
-
else
|
|
737
|
-
print_error "Failed to extract data"
|
|
738
|
-
return 1
|
|
739
|
-
fi
|
|
740
|
-
|
|
741
|
-
return 0
|
|
742
|
-
}
|
|
743
|
-
|
|
744
|
-
# Crawl with CAPTCHA solving capabilities
|
|
745
|
-
captcha_crawl() {
|
|
746
|
-
local url="$1"
|
|
747
|
-
local captcha_type="$2"
|
|
748
|
-
local site_key="$3"
|
|
749
|
-
local output_file="$4"
|
|
750
|
-
|
|
751
|
-
if [[ -z "$url" || -z "$captcha_type" ]]; then
|
|
752
|
-
print_error "URL and CAPTCHA type are required"
|
|
753
|
-
print_info "Usage: captcha-crawl <url> <captcha_type> [site_key] [output_file]"
|
|
754
|
-
print_info "CAPTCHA types: recaptcha_v2, recaptcha_v3, turnstile, aws_waf"
|
|
755
|
-
return 1
|
|
756
|
-
fi
|
|
757
|
-
|
|
758
|
-
print_header "Crawling with CAPTCHA Solving: $url"
|
|
759
|
-
print_info "CAPTCHA Type: $captcha_type"
|
|
760
|
-
|
|
761
|
-
# Check if Docker container is running
|
|
762
|
-
if ! docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
|
|
763
|
-
print_warning "Docker container is not running. Starting it..."
|
|
764
|
-
if ! docker_start; then
|
|
765
|
-
return 1
|
|
766
|
-
fi
|
|
767
|
-
sleep 5
|
|
768
|
-
fi
|
|
769
|
-
|
|
770
|
-
# Create Python script for CAPTCHA crawling
|
|
771
|
-
local temp_script="/tmp/captcha_crawl_$$.py"
|
|
772
|
-
cat > "$temp_script" << EOF
|
|
773
|
-
#!/usr/bin/env python3
|
|
774
|
-
import asyncio
|
|
775
|
-
import capsolver
|
|
776
|
-
import os
|
|
777
|
-
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
|
|
778
|
-
|
|
779
|
-
# Get CapSolver API key from environment
|
|
780
|
-
api_key = os.getenv('CAPSOLVER_API_KEY')
|
|
781
|
-
if not api_key:
|
|
782
|
-
print("❌ Error: CAPSOLVER_API_KEY environment variable not set")
|
|
783
|
-
print("💡 Set it with: export CAPSOLVER_API_KEY='CAP-xxxxxxxxxxxxxxxxxxxxx'")
|
|
784
|
-
exit(1)
|
|
785
|
-
|
|
786
|
-
capsolver.api_key = api_key
|
|
787
|
-
|
|
788
|
-
async def crawl_with_captcha():
|
|
789
|
-
url = "$url"
|
|
790
|
-
captcha_type = "$captcha_type"
|
|
791
|
-
site_key = "$site_key"
|
|
792
|
-
|
|
793
|
-
browser_config = BrowserConfig(
|
|
794
|
-
verbose=True,
|
|
795
|
-
headless=False,
|
|
796
|
-
use_persistent_context=True,
|
|
797
|
-
)
|
|
798
|
-
|
|
799
|
-
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
800
|
-
# Initial page load
|
|
801
|
-
print(f"🔄 Loading page: {url}")
|
|
802
|
-
await crawler.arun(
|
|
803
|
-
url=url,
|
|
804
|
-
cache_mode=CacheMode.BYPASS,
|
|
805
|
-
session_id="captcha_crawl_session"
|
|
806
|
-
)
|
|
807
|
-
|
|
808
|
-
# Solve CAPTCHA based on type
|
|
809
|
-
if captcha_type == "recaptcha_v2":
|
|
810
|
-
if not site_key:
|
|
811
|
-
print("❌ Error: site_key required for reCAPTCHA v2")
|
|
812
|
-
return
|
|
813
|
-
|
|
814
|
-
print("🔄 Solving reCAPTCHA v2...")
|
|
815
|
-
solution = capsolver.solve({
|
|
816
|
-
"type": "ReCaptchaV2TaskProxyLess",
|
|
817
|
-
"websiteURL": url,
|
|
818
|
-
"websiteKey": site_key,
|
|
819
|
-
})
|
|
820
|
-
token = solution["gRecaptchaResponse"]
|
|
821
|
-
|
|
822
|
-
js_code = f'''
|
|
823
|
-
const textarea = document.getElementById('g-recaptcha-response');
|
|
824
|
-
if (textarea) {{
|
|
825
|
-
textarea.value = '{token}';
|
|
826
|
-
console.log('✅ reCAPTCHA v2 token injected');
|
|
827
|
-
}}
|
|
828
|
-
'''
|
|
829
|
-
|
|
830
|
-
elif captcha_type == "recaptcha_v3":
|
|
831
|
-
if not site_key:
|
|
832
|
-
print("❌ Error: site_key required for reCAPTCHA v3")
|
|
833
|
-
return
|
|
834
|
-
|
|
835
|
-
print("🔄 Solving reCAPTCHA v3...")
|
|
836
|
-
solution = capsolver.solve({
|
|
837
|
-
"type": "ReCaptchaV3TaskProxyLess",
|
|
838
|
-
"websiteURL": url,
|
|
839
|
-
"websiteKey": site_key,
|
|
840
|
-
"pageAction": "submit",
|
|
841
|
-
})
|
|
842
|
-
token = solution["gRecaptchaResponse"]
|
|
843
|
-
|
|
844
|
-
js_code = f'''
|
|
845
|
-
const originalFetch = window.fetch;
|
|
846
|
-
window.fetch = function(...args) {{
|
|
847
|
-
if (typeof args[0] === 'string' && args[0].includes('recaptcha')) {{
|
|
848
|
-
console.log('🔄 Hooking reCAPTCHA v3 request');
|
|
849
|
-
// Replace token in request
|
|
850
|
-
}}
|
|
851
|
-
return originalFetch.apply(this, args);
|
|
852
|
-
}};
|
|
853
|
-
console.log('✅ reCAPTCHA v3 hook installed');
|
|
854
|
-
'''
|
|
855
|
-
|
|
856
|
-
elif captcha_type == "turnstile":
|
|
857
|
-
if not site_key:
|
|
858
|
-
print("❌ Error: site_key required for Cloudflare Turnstile")
|
|
859
|
-
return
|
|
860
|
-
|
|
861
|
-
print("🔄 Solving Cloudflare Turnstile...")
|
|
862
|
-
solution = capsolver.solve({
|
|
863
|
-
"type": "AntiTurnstileTaskProxyLess",
|
|
864
|
-
"websiteURL": url,
|
|
865
|
-
"websiteKey": site_key,
|
|
866
|
-
})
|
|
867
|
-
token = solution["token"]
|
|
868
|
-
|
|
869
|
-
js_code = f'''
|
|
870
|
-
const input = document.querySelector('input[name="cf-turnstile-response"]');
|
|
871
|
-
if (input) {{
|
|
872
|
-
input.value = '{token}';
|
|
873
|
-
console.log('✅ Turnstile token injected');
|
|
874
|
-
}}
|
|
875
|
-
'''
|
|
876
|
-
|
|
877
|
-
elif captcha_type == "aws_waf":
|
|
878
|
-
print("🔄 Solving AWS WAF...")
|
|
879
|
-
solution = capsolver.solve({
|
|
880
|
-
"type": "AntiAwsWafTaskProxyLess",
|
|
881
|
-
"websiteURL": url,
|
|
882
|
-
})
|
|
883
|
-
cookie = solution["cookie"]
|
|
884
|
-
|
|
885
|
-
js_code = f'''
|
|
886
|
-
document.cookie = 'aws-waf-token={cookie};path=/';
|
|
887
|
-
console.log('✅ AWS WAF cookie set');
|
|
888
|
-
location.reload();
|
|
889
|
-
'''
|
|
890
|
-
|
|
891
|
-
else:
|
|
892
|
-
print(f"❌ Error: Unsupported CAPTCHA type: {captcha_type}")
|
|
893
|
-
return
|
|
894
|
-
|
|
895
|
-
# Execute JavaScript and continue crawling
|
|
896
|
-
run_config = CrawlerRunConfig(
|
|
897
|
-
cache_mode=CacheMode.BYPASS,
|
|
898
|
-
session_id="captcha_crawl_session",
|
|
899
|
-
js_code=js_code,
|
|
900
|
-
js_only=True,
|
|
901
|
-
)
|
|
902
|
-
|
|
903
|
-
result = await crawler.arun(url=url, config=run_config)
|
|
904
|
-
print("🎉 CAPTCHA solved and page crawled successfully!")
|
|
905
|
-
|
|
906
|
-
return result.markdown
|
|
907
|
-
|
|
908
|
-
if __name__ == "__main__":
|
|
909
|
-
result = asyncio.run(crawl_with_captcha())
|
|
910
|
-
if result:
|
|
911
|
-
print("📄 Crawled content:")
|
|
912
|
-
print(result[:500] + "..." if len(result) > 500 else result)
|
|
913
|
-
EOF
|
|
914
|
-
|
|
915
|
-
# Check if CapSolver API key is set
|
|
916
|
-
if [[ -z "$CAPSOLVER_API_KEY" ]]; then
|
|
917
|
-
print_error "CAPSOLVER_API_KEY environment variable not set"
|
|
918
|
-
print_info "Set it with: export CAPSOLVER_API_KEY='CAP-xxxxxxxxxxxxxxxxxxxxx'"
|
|
919
|
-
print_info "Get your API key from: https://dashboard.capsolver.com/dashboard/overview"
|
|
920
|
-
rm -f "$temp_script"
|
|
921
|
-
return 1
|
|
922
|
-
fi
|
|
923
|
-
|
|
924
|
-
print_info "Running CAPTCHA-enabled crawl..."
|
|
925
|
-
if python3 "$temp_script"; then
|
|
926
|
-
print_success "CAPTCHA crawl completed successfully"
|
|
927
|
-
if [[ -n "$output_file" ]]; then
|
|
928
|
-
python3 "$temp_script" > "$output_file" 2>&1
|
|
929
|
-
print_info "Results saved to: $output_file"
|
|
930
|
-
fi
|
|
931
|
-
else
|
|
932
|
-
print_error "CAPTCHA crawl failed"
|
|
933
|
-
rm -f "$temp_script"
|
|
934
|
-
return 1
|
|
935
|
-
fi
|
|
936
|
-
|
|
937
|
-
rm -f "$temp_script"
|
|
938
|
-
return 0
|
|
939
|
-
}
|
|
940
|
-
|
|
941
|
-
# Check service status
|
|
942
|
-
check_status() {
|
|
943
|
-
print_header "Checking Crawl4AI Service Status"
|
|
944
|
-
|
|
945
|
-
# Check Python package
|
|
946
|
-
if command -v crawl4ai-doctor &> /dev/null; then
|
|
947
|
-
print_info "Python package: Installed"
|
|
948
|
-
if crawl4ai-doctor &> /dev/null; then
|
|
949
|
-
print_success "Python package: Working"
|
|
950
|
-
else
|
|
951
|
-
print_warning "Python package: Issues detected"
|
|
952
|
-
fi
|
|
953
|
-
else
|
|
954
|
-
print_warning "Python package: Not installed"
|
|
955
|
-
fi
|
|
956
|
-
|
|
957
|
-
# Check Docker container
|
|
958
|
-
if check_docker; then
|
|
959
|
-
if docker ps -q -f name="$DOCKER_CONTAINER" | grep -q .; then
|
|
960
|
-
print_success "Docker container: Running"
|
|
961
|
-
|
|
962
|
-
# Check API health
|
|
963
|
-
local health_url="http://localhost:$DOCKER_PORT/health"
|
|
964
|
-
if curl -s "$health_url" &> /dev/null; then
|
|
965
|
-
print_success "API endpoint: Healthy"
|
|
966
|
-
print_info "Dashboard: http://localhost:$DOCKER_PORT/dashboard"
|
|
967
|
-
print_info "Playground: http://localhost:$DOCKER_PORT/playground"
|
|
968
|
-
else
|
|
969
|
-
print_warning "API endpoint: Not responding"
|
|
970
|
-
fi
|
|
971
|
-
else
|
|
972
|
-
print_warning "Docker container: Not running"
|
|
973
|
-
fi
|
|
974
|
-
else
|
|
975
|
-
print_warning "Docker: Not available"
|
|
976
|
-
fi
|
|
977
|
-
|
|
978
|
-
# Check MCP configuration
|
|
979
|
-
local mcp_config="$CONFIG_DIR/crawl4ai-mcp-config.json"
|
|
980
|
-
if [[ -f "$mcp_config" ]]; then
|
|
981
|
-
print_success "MCP configuration: Available"
|
|
982
|
-
else
|
|
983
|
-
print_warning "MCP configuration: Not setup"
|
|
984
|
-
fi
|
|
985
|
-
|
|
986
|
-
return 0
|
|
987
|
-
}
|
|
988
|
-
|
|
989
|
-
# Show help
|
|
990
|
-
show_help() {
|
|
991
|
-
echo "Crawl4AI Helper Script"
|
|
992
|
-
echo "Usage: $0 [command] [options]"
|
|
993
|
-
echo ""
|
|
994
|
-
echo "Commands:"
|
|
995
|
-
echo " install - Install Crawl4AI Python package"
|
|
996
|
-
echo " docker-setup - Setup Docker deployment with monitoring"
|
|
997
|
-
echo " docker-start - Start Docker container"
|
|
998
|
-
echo " docker-stop - Stop Docker container"
|
|
999
|
-
echo " mcp-setup - Setup MCP server integration"
|
|
1000
|
-
echo " capsolver-setup - Setup CapSolver CAPTCHA solving integration"
|
|
1001
|
-
echo " crawl [url] [format] [file] - Crawl URL and extract content"
|
|
1002
|
-
echo " extract [url] [schema] [file] - Extract structured data"
|
|
1003
|
-
echo " captcha-crawl [url] [type] [key] [file] - Crawl with CAPTCHA solving"
|
|
1004
|
-
echo " status - Check Crawl4AI service status"
|
|
1005
|
-
echo " help - $HELP_SHOW_MESSAGE"
|
|
1006
|
-
echo ""
|
|
1007
|
-
echo "Examples:"
|
|
1008
|
-
echo " $0 install"
|
|
1009
|
-
echo " $0 docker-setup"
|
|
1010
|
-
echo " $0 docker-start"
|
|
1011
|
-
echo " $0 crawl https://example.com markdown output.json"
|
|
1012
|
-
echo " $0 extract https://example.com '{\"title\":\"h1\"}' data.json"
|
|
1013
|
-
echo " $0 captcha-crawl https://example.com recaptcha_v2 6LfW6wATAAAAAHLqO2pb8bDBahxlMxNdo9g947u9"
|
|
1014
|
-
echo " $0 status"
|
|
1015
|
-
echo ""
|
|
1016
|
-
echo "Documentation:"
|
|
1017
|
-
echo " GitHub: https://github.com/unclecode/crawl4ai"
|
|
1018
|
-
echo " Docs: https://docs.crawl4ai.com/"
|
|
1019
|
-
echo " Framework docs: .agent/CRAWL4AI.md"
|
|
1020
|
-
return 0
|
|
1021
|
-
}
|
|
1022
|
-
|
|
1023
|
-
# Main function
|
|
1024
|
-
main() {
|
|
1025
|
-
# Assign positional parameters to local variables
|
|
1026
|
-
local command="${1:-help}"
|
|
1027
|
-
local param2="$2"
|
|
1028
|
-
local param3="$3"
|
|
1029
|
-
local param4="$4"
|
|
1030
|
-
local param5="$5"
|
|
1031
|
-
|
|
1032
|
-
# Main command handler
|
|
1033
|
-
case "$command" in
|
|
1034
|
-
"install")
|
|
1035
|
-
install_crawl4ai
|
|
1036
|
-
;;
|
|
1037
|
-
"docker-setup")
|
|
1038
|
-
docker_setup
|
|
1039
|
-
;;
|
|
1040
|
-
"docker-start")
|
|
1041
|
-
docker_start
|
|
1042
|
-
;;
|
|
1043
|
-
"docker-stop")
|
|
1044
|
-
docker_stop
|
|
1045
|
-
;;
|
|
1046
|
-
"mcp-setup")
|
|
1047
|
-
mcp_setup
|
|
1048
|
-
;;
|
|
1049
|
-
"capsolver-setup")
|
|
1050
|
-
capsolver_setup
|
|
1051
|
-
;;
|
|
1052
|
-
"crawl")
|
|
1053
|
-
crawl_url "$param2" "$param3" "$param4"
|
|
1054
|
-
;;
|
|
1055
|
-
"extract")
|
|
1056
|
-
extract_structured "$param2" "$param3" "$param4"
|
|
1057
|
-
;;
|
|
1058
|
-
"captcha-crawl")
|
|
1059
|
-
captcha_crawl "$param2" "$param3" "$param4" "$param5"
|
|
1060
|
-
;;
|
|
1061
|
-
"status")
|
|
1062
|
-
check_status
|
|
1063
|
-
;;
|
|
1064
|
-
"help"|"-h"|"--help"|"")
|
|
1065
|
-
show_help
|
|
1066
|
-
;;
|
|
1067
|
-
*)
|
|
1068
|
-
print_error "$ERROR_UNKNOWN_COMMAND $command"
|
|
1069
|
-
show_help
|
|
1070
|
-
return 1
|
|
1071
|
-
;;
|
|
1072
|
-
esac
|
|
1073
|
-
return 0
|
|
1074
|
-
}
|
|
1075
|
-
|
|
1076
|
-
main "$@"
|
|
1077
|
-
|
|
1078
|
-
exit 0
|