aidevops 2.52.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/AGENTS.md +614 -0
- package/.agent/accounts.md +65 -0
- package/.agent/aidevops/add-new-mcp-to-aidevops.md +456 -0
- package/.agent/aidevops/api-integrations.md +335 -0
- package/.agent/aidevops/architecture.md +510 -0
- package/.agent/aidevops/configs.md +274 -0
- package/.agent/aidevops/docs.md +244 -0
- package/.agent/aidevops/extension.md +311 -0
- package/.agent/aidevops/mcp-integrations.md +340 -0
- package/.agent/aidevops/mcp-troubleshooting.md +162 -0
- package/.agent/aidevops/memory-patterns.md +172 -0
- package/.agent/aidevops/providers.md +217 -0
- package/.agent/aidevops/recommendations.md +321 -0
- package/.agent/aidevops/requirements.md +301 -0
- package/.agent/aidevops/resources.md +214 -0
- package/.agent/aidevops/security-requirements.md +174 -0
- package/.agent/aidevops/security.md +350 -0
- package/.agent/aidevops/service-links.md +400 -0
- package/.agent/aidevops/services.md +357 -0
- package/.agent/aidevops/setup.md +153 -0
- package/.agent/aidevops/troubleshooting.md +389 -0
- package/.agent/aidevops.md +124 -0
- package/.agent/build-plus.md +244 -0
- package/.agent/content/guidelines.md +109 -0
- package/.agent/content.md +87 -0
- package/.agent/health.md +59 -0
- package/.agent/legal.md +59 -0
- package/.agent/loop-state/full-loop.local.md +16 -0
- package/.agent/loop-state/ralph-loop.local.md +10 -0
- package/.agent/marketing.md +440 -0
- package/.agent/memory/README.md +260 -0
- package/.agent/onboarding.md +796 -0
- package/.agent/plan-plus.md +245 -0
- package/.agent/research.md +100 -0
- package/.agent/sales.md +333 -0
- package/.agent/scripts/101domains-helper.sh +701 -0
- package/.agent/scripts/add-missing-returns.sh +140 -0
- package/.agent/scripts/agent-browser-helper.sh +311 -0
- package/.agent/scripts/agno-setup.sh +712 -0
- package/.agent/scripts/ahrefs-mcp-wrapper.js +168 -0
- package/.agent/scripts/aidevops-update-check.sh +71 -0
- package/.agent/scripts/ampcode-cli.sh +522 -0
- package/.agent/scripts/auto-version-bump.sh +156 -0
- package/.agent/scripts/autogen-helper.sh +512 -0
- package/.agent/scripts/beads-sync-helper.sh +596 -0
- package/.agent/scripts/closte-helper.sh +5 -0
- package/.agent/scripts/cloudron-helper.sh +321 -0
- package/.agent/scripts/codacy-cli-chunked.sh +581 -0
- package/.agent/scripts/codacy-cli.sh +442 -0
- package/.agent/scripts/code-audit-helper.sh +5 -0
- package/.agent/scripts/coderabbit-cli.sh +417 -0
- package/.agent/scripts/coderabbit-pro-analysis.sh +238 -0
- package/.agent/scripts/commands/code-simplifier.md +86 -0
- package/.agent/scripts/commands/full-loop.md +246 -0
- package/.agent/scripts/commands/postflight-loop.md +103 -0
- package/.agent/scripts/commands/recall.md +182 -0
- package/.agent/scripts/commands/remember.md +132 -0
- package/.agent/scripts/commands/save-todo.md +175 -0
- package/.agent/scripts/commands/session-review.md +154 -0
- package/.agent/scripts/comprehensive-quality-fix.sh +106 -0
- package/.agent/scripts/context-builder-helper.sh +522 -0
- package/.agent/scripts/coolify-cli-helper.sh +674 -0
- package/.agent/scripts/coolify-helper.sh +380 -0
- package/.agent/scripts/crawl4ai-examples.sh +401 -0
- package/.agent/scripts/crawl4ai-helper.sh +1078 -0
- package/.agent/scripts/crewai-helper.sh +681 -0
- package/.agent/scripts/dev-browser-helper.sh +513 -0
- package/.agent/scripts/dns-helper.sh +396 -0
- package/.agent/scripts/domain-research-helper.sh +917 -0
- package/.agent/scripts/dspy-helper.sh +285 -0
- package/.agent/scripts/dspyground-helper.sh +291 -0
- package/.agent/scripts/eeat-score-helper.sh +1242 -0
- package/.agent/scripts/efficient-return-fix.sh +92 -0
- package/.agent/scripts/extract-opencode-prompts.sh +128 -0
- package/.agent/scripts/find-missing-returns.sh +113 -0
- package/.agent/scripts/fix-auth-headers.sh +104 -0
- package/.agent/scripts/fix-common-strings.sh +254 -0
- package/.agent/scripts/fix-content-type.sh +100 -0
- package/.agent/scripts/fix-error-messages.sh +130 -0
- package/.agent/scripts/fix-misplaced-returns.sh +74 -0
- package/.agent/scripts/fix-remaining-literals.sh +152 -0
- package/.agent/scripts/fix-return-statements.sh +41 -0
- package/.agent/scripts/fix-s131-default-cases.sh +249 -0
- package/.agent/scripts/fix-sc2155-simple.sh +102 -0
- package/.agent/scripts/fix-shellcheck-critical.sh +187 -0
- package/.agent/scripts/fix-string-literals.sh +273 -0
- package/.agent/scripts/full-loop-helper.sh +773 -0
- package/.agent/scripts/generate-opencode-agents.sh +497 -0
- package/.agent/scripts/generate-opencode-commands.sh +1629 -0
- package/.agent/scripts/generate-skills.sh +366 -0
- package/.agent/scripts/git-platforms-helper.sh +640 -0
- package/.agent/scripts/gitea-cli-helper.sh +743 -0
- package/.agent/scripts/github-cli-helper.sh +702 -0
- package/.agent/scripts/gitlab-cli-helper.sh +682 -0
- package/.agent/scripts/gsc-add-user-helper.sh +325 -0
- package/.agent/scripts/gsc-sitemap-helper.sh +678 -0
- package/.agent/scripts/hetzner-helper.sh +485 -0
- package/.agent/scripts/hostinger-helper.sh +229 -0
- package/.agent/scripts/keyword-research-helper.sh +1815 -0
- package/.agent/scripts/langflow-helper.sh +544 -0
- package/.agent/scripts/linkedin-automation.py +241 -0
- package/.agent/scripts/linter-manager.sh +599 -0
- package/.agent/scripts/linters-local.sh +434 -0
- package/.agent/scripts/list-keys-helper.sh +488 -0
- package/.agent/scripts/local-browser-automation.py +339 -0
- package/.agent/scripts/localhost-helper.sh +744 -0
- package/.agent/scripts/loop-common.sh +806 -0
- package/.agent/scripts/mainwp-helper.sh +728 -0
- package/.agent/scripts/markdown-formatter.sh +338 -0
- package/.agent/scripts/markdown-lint-fix.sh +311 -0
- package/.agent/scripts/mass-fix-returns.sh +58 -0
- package/.agent/scripts/mcp-diagnose.sh +167 -0
- package/.agent/scripts/mcp-inspector-helper.sh +449 -0
- package/.agent/scripts/memory-helper.sh +650 -0
- package/.agent/scripts/monitor-code-review.sh +255 -0
- package/.agent/scripts/onboarding-helper.sh +706 -0
- package/.agent/scripts/opencode-github-setup-helper.sh +797 -0
- package/.agent/scripts/opencode-test-helper.sh +213 -0
- package/.agent/scripts/pagespeed-helper.sh +464 -0
- package/.agent/scripts/pandoc-helper.sh +362 -0
- package/.agent/scripts/postflight-check.sh +555 -0
- package/.agent/scripts/pre-commit-hook.sh +259 -0
- package/.agent/scripts/pre-edit-check.sh +169 -0
- package/.agent/scripts/qlty-cli.sh +356 -0
- package/.agent/scripts/quality-cli-manager.sh +525 -0
- package/.agent/scripts/quality-feedback-helper.sh +462 -0
- package/.agent/scripts/quality-fix.sh +263 -0
- package/.agent/scripts/quality-loop-helper.sh +1108 -0
- package/.agent/scripts/ralph-loop-helper.sh +836 -0
- package/.agent/scripts/ralph-upstream-check.sh +341 -0
- package/.agent/scripts/secretlint-helper.sh +847 -0
- package/.agent/scripts/servers-helper.sh +241 -0
- package/.agent/scripts/ses-helper.sh +619 -0
- package/.agent/scripts/session-review-helper.sh +404 -0
- package/.agent/scripts/setup-linters-wizard.sh +379 -0
- package/.agent/scripts/setup-local-api-keys.sh +330 -0
- package/.agent/scripts/setup-mcp-integrations.sh +472 -0
- package/.agent/scripts/shared-constants.sh +246 -0
- package/.agent/scripts/site-crawler-helper.sh +1487 -0
- package/.agent/scripts/snyk-helper.sh +940 -0
- package/.agent/scripts/sonarcloud-autofix.sh +193 -0
- package/.agent/scripts/sonarcloud-cli.sh +191 -0
- package/.agent/scripts/sonarscanner-cli.sh +455 -0
- package/.agent/scripts/spaceship-helper.sh +747 -0
- package/.agent/scripts/stagehand-helper.sh +321 -0
- package/.agent/scripts/stagehand-python-helper.sh +321 -0
- package/.agent/scripts/stagehand-python-setup.sh +441 -0
- package/.agent/scripts/stagehand-setup.sh +439 -0
- package/.agent/scripts/system-cleanup.sh +340 -0
- package/.agent/scripts/terminal-title-helper.sh +388 -0
- package/.agent/scripts/terminal-title-setup.sh +549 -0
- package/.agent/scripts/test-stagehand-both-integration.sh +317 -0
- package/.agent/scripts/test-stagehand-integration.sh +309 -0
- package/.agent/scripts/test-stagehand-python-integration.sh +341 -0
- package/.agent/scripts/todo-ready.sh +263 -0
- package/.agent/scripts/tool-version-check.sh +362 -0
- package/.agent/scripts/toon-helper.sh +469 -0
- package/.agent/scripts/twilio-helper.sh +917 -0
- package/.agent/scripts/updown-helper.sh +279 -0
- package/.agent/scripts/validate-mcp-integrations.sh +250 -0
- package/.agent/scripts/validate-version-consistency.sh +131 -0
- package/.agent/scripts/vaultwarden-helper.sh +597 -0
- package/.agent/scripts/vercel-cli-helper.sh +816 -0
- package/.agent/scripts/verify-mirrors.sh +169 -0
- package/.agent/scripts/version-manager.sh +831 -0
- package/.agent/scripts/webhosting-helper.sh +471 -0
- package/.agent/scripts/webhosting-verify.sh +238 -0
- package/.agent/scripts/wordpress-mcp-helper.sh +508 -0
- package/.agent/scripts/worktree-helper.sh +595 -0
- package/.agent/scripts/worktree-sessions.sh +577 -0
- package/.agent/seo/dataforseo.md +215 -0
- package/.agent/seo/domain-research.md +532 -0
- package/.agent/seo/eeat-score.md +659 -0
- package/.agent/seo/google-search-console.md +366 -0
- package/.agent/seo/gsc-sitemaps.md +282 -0
- package/.agent/seo/keyword-research.md +521 -0
- package/.agent/seo/serper.md +278 -0
- package/.agent/seo/site-crawler.md +387 -0
- package/.agent/seo.md +236 -0
- package/.agent/services/accounting/quickfile.md +159 -0
- package/.agent/services/communications/telfon.md +470 -0
- package/.agent/services/communications/twilio.md +569 -0
- package/.agent/services/crm/fluentcrm.md +449 -0
- package/.agent/services/email/ses.md +399 -0
- package/.agent/services/hosting/101domains.md +378 -0
- package/.agent/services/hosting/closte.md +177 -0
- package/.agent/services/hosting/cloudflare.md +251 -0
- package/.agent/services/hosting/cloudron.md +478 -0
- package/.agent/services/hosting/dns-providers.md +335 -0
- package/.agent/services/hosting/domain-purchasing.md +344 -0
- package/.agent/services/hosting/hetzner.md +327 -0
- package/.agent/services/hosting/hostinger.md +287 -0
- package/.agent/services/hosting/localhost.md +419 -0
- package/.agent/services/hosting/spaceship.md +353 -0
- package/.agent/services/hosting/webhosting.md +330 -0
- package/.agent/social-media.md +69 -0
- package/.agent/templates/plans-template.md +114 -0
- package/.agent/templates/prd-template.md +129 -0
- package/.agent/templates/tasks-template.md +108 -0
- package/.agent/templates/todo-template.md +89 -0
- package/.agent/tools/ai-assistants/agno.md +471 -0
- package/.agent/tools/ai-assistants/capsolver.md +326 -0
- package/.agent/tools/ai-assistants/configuration.md +221 -0
- package/.agent/tools/ai-assistants/overview.md +209 -0
- package/.agent/tools/ai-assistants/status.md +171 -0
- package/.agent/tools/ai-assistants/windsurf.md +193 -0
- package/.agent/tools/ai-orchestration/autogen.md +406 -0
- package/.agent/tools/ai-orchestration/crewai.md +445 -0
- package/.agent/tools/ai-orchestration/langflow.md +405 -0
- package/.agent/tools/ai-orchestration/openprose.md +487 -0
- package/.agent/tools/ai-orchestration/overview.md +362 -0
- package/.agent/tools/ai-orchestration/packaging.md +647 -0
- package/.agent/tools/browser/agent-browser.md +464 -0
- package/.agent/tools/browser/browser-automation.md +400 -0
- package/.agent/tools/browser/chrome-devtools.md +282 -0
- package/.agent/tools/browser/crawl4ai-integration.md +422 -0
- package/.agent/tools/browser/crawl4ai-resources.md +277 -0
- package/.agent/tools/browser/crawl4ai-usage.md +416 -0
- package/.agent/tools/browser/crawl4ai.md +585 -0
- package/.agent/tools/browser/dev-browser.md +341 -0
- package/.agent/tools/browser/pagespeed.md +260 -0
- package/.agent/tools/browser/playwright.md +266 -0
- package/.agent/tools/browser/playwriter.md +310 -0
- package/.agent/tools/browser/stagehand-examples.md +456 -0
- package/.agent/tools/browser/stagehand-python.md +483 -0
- package/.agent/tools/browser/stagehand.md +421 -0
- package/.agent/tools/build-agent/agent-review.md +224 -0
- package/.agent/tools/build-agent/build-agent.md +784 -0
- package/.agent/tools/build-mcp/aidevops-plugin.md +476 -0
- package/.agent/tools/build-mcp/api-wrapper.md +445 -0
- package/.agent/tools/build-mcp/build-mcp.md +240 -0
- package/.agent/tools/build-mcp/deployment.md +401 -0
- package/.agent/tools/build-mcp/server-patterns.md +632 -0
- package/.agent/tools/build-mcp/transports.md +366 -0
- package/.agent/tools/code-review/auditing.md +383 -0
- package/.agent/tools/code-review/automation.md +219 -0
- package/.agent/tools/code-review/best-practices.md +203 -0
- package/.agent/tools/code-review/codacy.md +151 -0
- package/.agent/tools/code-review/code-simplifier.md +174 -0
- package/.agent/tools/code-review/code-standards.md +309 -0
- package/.agent/tools/code-review/coderabbit.md +101 -0
- package/.agent/tools/code-review/management.md +155 -0
- package/.agent/tools/code-review/qlty.md +248 -0
- package/.agent/tools/code-review/secretlint.md +565 -0
- package/.agent/tools/code-review/setup.md +250 -0
- package/.agent/tools/code-review/snyk.md +563 -0
- package/.agent/tools/code-review/tools.md +230 -0
- package/.agent/tools/content/summarize.md +353 -0
- package/.agent/tools/context/augment-context-engine.md +468 -0
- package/.agent/tools/context/context-builder-agent.md +76 -0
- package/.agent/tools/context/context-builder.md +375 -0
- package/.agent/tools/context/context7.md +371 -0
- package/.agent/tools/context/dspy.md +302 -0
- package/.agent/tools/context/dspyground.md +374 -0
- package/.agent/tools/context/llm-tldr.md +219 -0
- package/.agent/tools/context/osgrep.md +488 -0
- package/.agent/tools/context/prompt-optimization.md +338 -0
- package/.agent/tools/context/toon.md +292 -0
- package/.agent/tools/conversion/pandoc.md +304 -0
- package/.agent/tools/credentials/api-key-management.md +154 -0
- package/.agent/tools/credentials/api-key-setup.md +224 -0
- package/.agent/tools/credentials/environment-variables.md +180 -0
- package/.agent/tools/credentials/vaultwarden.md +382 -0
- package/.agent/tools/data-extraction/outscraper.md +974 -0
- package/.agent/tools/deployment/coolify-cli.md +388 -0
- package/.agent/tools/deployment/coolify-setup.md +353 -0
- package/.agent/tools/deployment/coolify.md +345 -0
- package/.agent/tools/deployment/vercel.md +390 -0
- package/.agent/tools/git/authentication.md +132 -0
- package/.agent/tools/git/gitea-cli.md +193 -0
- package/.agent/tools/git/github-actions.md +207 -0
- package/.agent/tools/git/github-cli.md +223 -0
- package/.agent/tools/git/gitlab-cli.md +190 -0
- package/.agent/tools/git/opencode-github-security.md +350 -0
- package/.agent/tools/git/opencode-github.md +328 -0
- package/.agent/tools/git/opencode-gitlab.md +252 -0
- package/.agent/tools/git/security.md +196 -0
- package/.agent/tools/git.md +207 -0
- package/.agent/tools/opencode/oh-my-opencode.md +375 -0
- package/.agent/tools/opencode/opencode-anthropic-auth.md +446 -0
- package/.agent/tools/opencode/opencode.md +651 -0
- package/.agent/tools/social-media/bird.md +437 -0
- package/.agent/tools/task-management/beads.md +336 -0
- package/.agent/tools/terminal/terminal-title.md +251 -0
- package/.agent/tools/ui/shadcn.md +196 -0
- package/.agent/tools/ui/ui-skills.md +115 -0
- package/.agent/tools/wordpress/localwp.md +311 -0
- package/.agent/tools/wordpress/mainwp.md +391 -0
- package/.agent/tools/wordpress/scf.md +527 -0
- package/.agent/tools/wordpress/wp-admin.md +729 -0
- package/.agent/tools/wordpress/wp-dev.md +940 -0
- package/.agent/tools/wordpress/wp-preferred.md +398 -0
- package/.agent/tools/wordpress.md +95 -0
- package/.agent/workflows/branch/bugfix.md +63 -0
- package/.agent/workflows/branch/chore.md +95 -0
- package/.agent/workflows/branch/experiment.md +115 -0
- package/.agent/workflows/branch/feature.md +59 -0
- package/.agent/workflows/branch/hotfix.md +98 -0
- package/.agent/workflows/branch/refactor.md +92 -0
- package/.agent/workflows/branch/release.md +96 -0
- package/.agent/workflows/branch.md +347 -0
- package/.agent/workflows/bug-fixing.md +267 -0
- package/.agent/workflows/changelog.md +129 -0
- package/.agent/workflows/code-audit-remote.md +279 -0
- package/.agent/workflows/conversation-starter.md +69 -0
- package/.agent/workflows/error-feedback.md +578 -0
- package/.agent/workflows/feature-development.md +355 -0
- package/.agent/workflows/git-workflow.md +702 -0
- package/.agent/workflows/multi-repo-workspace.md +268 -0
- package/.agent/workflows/plans.md +709 -0
- package/.agent/workflows/postflight.md +604 -0
- package/.agent/workflows/pr.md +571 -0
- package/.agent/workflows/preflight.md +278 -0
- package/.agent/workflows/ralph-loop.md +773 -0
- package/.agent/workflows/release.md +498 -0
- package/.agent/workflows/session-manager.md +254 -0
- package/.agent/workflows/session-review.md +311 -0
- package/.agent/workflows/sql-migrations.md +631 -0
- package/.agent/workflows/version-bump.md +283 -0
- package/.agent/workflows/wiki-update.md +333 -0
- package/.agent/workflows/worktree.md +477 -0
- package/LICENSE +21 -0
- package/README.md +1446 -0
- package/VERSION +1 -0
- package/aidevops.sh +1746 -0
- package/bin/aidevops +21 -0
- package/package.json +75 -0
- package/scripts/npm-postinstall.js +60 -0
- package/setup.sh +2366 -0
|
@@ -0,0 +1,585 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: AI-powered web crawling and content extraction
|
|
3
|
+
mode: subagent
|
|
4
|
+
tools:
|
|
5
|
+
read: true
|
|
6
|
+
write: false
|
|
7
|
+
edit: false
|
|
8
|
+
bash: true
|
|
9
|
+
glob: true
|
|
10
|
+
grep: true
|
|
11
|
+
webfetch: true
|
|
12
|
+
task: true
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
# Crawl4AI Integration Guide
|
|
16
|
+
|
|
17
|
+
<!-- AI-CONTEXT-START -->
|
|
18
|
+
|
|
19
|
+
## Quick Reference
|
|
20
|
+
|
|
21
|
+
- **Purpose**: #1 AI/LLM web crawler - markdown output for RAG pipelines
|
|
22
|
+
- **Install**: `./.agent/scripts/crawl4ai-helper.sh install`
|
|
23
|
+
- **Docker**: `./.agent/scripts/crawl4ai-helper.sh docker-start`
|
|
24
|
+
- **MCP Setup**: `./.agent/scripts/crawl4ai-helper.sh mcp-setup`
|
|
25
|
+
|
|
26
|
+
**Endpoints** (Docker):
|
|
27
|
+
- API: http://localhost:11235
|
|
28
|
+
- Dashboard: http://localhost:11235/dashboard
|
|
29
|
+
- Playground: http://localhost:11235/playground
|
|
30
|
+
|
|
31
|
+
**Commands**: `install|docker-setup|docker-start|mcp-setup|capsolver-setup|status|crawl|extract|captcha-crawl`
|
|
32
|
+
|
|
33
|
+
**Key Features**:
|
|
34
|
+
- LLM-ready markdown output
|
|
35
|
+
- CSS/XPath/LLM extraction strategies
|
|
36
|
+
- CAPTCHA solving via CapSolver
|
|
37
|
+
- Parallel async crawling
|
|
38
|
+
- Session management & browser pool
|
|
39
|
+
|
|
40
|
+
**Env Vars**: `OPENAI_API_KEY`, `CAPSOLVER_API_KEY`, `CRAWL4AI_MAX_PAGES=50`
|
|
41
|
+
<!-- AI-CONTEXT-END -->
|
|
42
|
+
|
|
43
|
+
## 🚀 Overview
|
|
44
|
+
|
|
45
|
+
Crawl4AI is the #1 trending open-source web crawler on GitHub, specifically designed for AI and LLM applications. This integration provides comprehensive web crawling and data extraction capabilities for the AI DevOps Framework.
|
|
46
|
+
|
|
47
|
+
### Key Features
|
|
48
|
+
|
|
49
|
+
- **🤖 LLM-Ready Output**: Clean markdown generation perfect for RAG pipelines
|
|
50
|
+
- **📊 Structured Extraction**: CSS selectors, XPath, and LLM-based data extraction
|
|
51
|
+
- **🎛️ Advanced Browser Control**: Hooks, proxies, stealth modes, session management
|
|
52
|
+
- **⚡ High Performance**: Parallel crawling, async operations, real-time processing
|
|
53
|
+
- **🔌 AI Integration**: Native MCP support for AI assistants like Claude
|
|
54
|
+
- **📈 Enterprise Features**: Monitoring dashboard, job queues, webhook notifications
|
|
55
|
+
- **🤖 CAPTCHA Solving**: Integrated CapSolver support for automated CAPTCHA bypass
|
|
56
|
+
- **🛡️ Anti-Bot Measures**: Handle Cloudflare, AWS WAF, and other protection systems
|
|
57
|
+
|
|
58
|
+
## 🛠️ Quick Start
|
|
59
|
+
|
|
60
|
+
### Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# Install Crawl4AI Python package
|
|
64
|
+
./.agent/scripts/crawl4ai-helper.sh install
|
|
65
|
+
|
|
66
|
+
# Setup Docker deployment with monitoring
|
|
67
|
+
./.agent/scripts/crawl4ai-helper.sh docker-setup
|
|
68
|
+
|
|
69
|
+
# Start Docker container
|
|
70
|
+
./.agent/scripts/crawl4ai-helper.sh docker-start
|
|
71
|
+
|
|
72
|
+
# Setup MCP integration for AI assistants
|
|
73
|
+
./.agent/scripts/crawl4ai-helper.sh mcp-setup
|
|
74
|
+
|
|
75
|
+
# Setup CapSolver for CAPTCHA solving
|
|
76
|
+
./.agent/scripts/crawl4ai-helper.sh capsolver-setup
|
|
77
|
+
|
|
78
|
+
# Check status
|
|
79
|
+
./.agent/scripts/crawl4ai-helper.sh status
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Basic Usage
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# Crawl a single URL
|
|
86
|
+
./.agent/scripts/crawl4ai-helper.sh crawl https://example.com markdown output.json
|
|
87
|
+
|
|
88
|
+
# Extract structured data
|
|
89
|
+
./.agent/scripts/crawl4ai-helper.sh extract https://example.com '{"title":"h1","content":".article"}' data.json
|
|
90
|
+
|
|
91
|
+
# Crawl with CAPTCHA solving (requires CapSolver API key)
|
|
92
|
+
export CAPSOLVER_API_KEY="CAP-xxxxxxxxxxxxxxxxxxxxx"
|
|
93
|
+
./.agent/scripts/crawl4ai-helper.sh captcha-crawl https://example.com recaptcha_v2 6LfW6wATAAAAAHLqO2pb8bDBahxlMxNdo9g947u9
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## 🐳 Docker Deployment
|
|
97
|
+
|
|
98
|
+
The Docker deployment includes a comprehensive suite of features:
|
|
99
|
+
|
|
100
|
+
### Services Available
|
|
101
|
+
|
|
102
|
+
- **API Server**: http://localhost:11235
|
|
103
|
+
- **Monitoring Dashboard**: http://localhost:11235/dashboard
|
|
104
|
+
- **Interactive Playground**: http://localhost:11235/playground
|
|
105
|
+
- **Health Check**: http://localhost:11235/health
|
|
106
|
+
- **Metrics**: http://localhost:11235/metrics
|
|
107
|
+
|
|
108
|
+
### Key Features
|
|
109
|
+
|
|
110
|
+
- **Real-time Monitoring**: System health, memory usage, request tracking
|
|
111
|
+
- **Browser Pool Management**: Efficient browser instance management
|
|
112
|
+
- **Job Queue System**: Asynchronous processing with webhook notifications
|
|
113
|
+
- **WebSocket Streaming**: Real-time crawl results
|
|
114
|
+
- **Multi-architecture Support**: AMD64 and ARM64 compatibility
|
|
115
|
+
|
|
116
|
+
## 🔌 MCP Integration
|
|
117
|
+
|
|
118
|
+
Crawl4AI provides native Model Context Protocol (MCP) support for AI assistants:
|
|
119
|
+
|
|
120
|
+
### Claude Desktop Setup
|
|
121
|
+
|
|
122
|
+
Add to your Claude Desktop configuration:
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
{
|
|
126
|
+
"mcpServers": {
|
|
127
|
+
"crawl4ai": {
|
|
128
|
+
"command": "npx",
|
|
129
|
+
"args": ["crawl4ai-mcp-server@latest"],
|
|
130
|
+
"env": {
|
|
131
|
+
"CRAWL4AI_API_URL": "http://localhost:11235"
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Available MCP Tools
|
|
139
|
+
|
|
140
|
+
- **crawl_url**: Crawl single URL with format options
|
|
141
|
+
- **crawl_multiple**: Batch crawl multiple URLs
|
|
142
|
+
- **extract_structured**: Extract data using CSS selectors or LLM
|
|
143
|
+
- **take_screenshot**: Capture webpage screenshots
|
|
144
|
+
- **generate_pdf**: Convert webpages to PDF
|
|
145
|
+
- **execute_javascript**: Run custom JavaScript on pages
|
|
146
|
+
- **solve_captcha**: Solve CAPTCHA challenges using CapSolver
|
|
147
|
+
- **crawl_with_captcha**: Crawl URLs with automatic CAPTCHA solving
|
|
148
|
+
- **check_captcha_balance**: Monitor CapSolver account balance
|
|
149
|
+
|
|
150
|
+
## 🤖 CapSolver Integration for CAPTCHA Solving
|
|
151
|
+
|
|
152
|
+
Crawl4AI integrates with CapSolver, the world's leading automated CAPTCHA solving service, to handle anti-bot measures seamlessly.
|
|
153
|
+
|
|
154
|
+
### Supported CAPTCHA Types
|
|
155
|
+
|
|
156
|
+
- **reCAPTCHA v2/v3**: Including Enterprise versions with high success rates
|
|
157
|
+
- **Cloudflare Turnstile**: Modern CAPTCHA alternative bypass
|
|
158
|
+
- **Cloudflare Challenge**: 5-second shield and anti-bot protection
|
|
159
|
+
- **AWS WAF**: Web Application Firewall bypass
|
|
160
|
+
- **GeeTest v3/v4**: Popular CAPTCHA system in Asia
|
|
161
|
+
- **Image-to-Text**: Traditional OCR-based CAPTCHAs
|
|
162
|
+
|
|
163
|
+
### Quick Setup
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
# Setup CapSolver integration
|
|
167
|
+
./.agent/scripts/crawl4ai-helper.sh capsolver-setup
|
|
168
|
+
|
|
169
|
+
# Get API key from https://dashboard.capsolver.com/
|
|
170
|
+
export CAPSOLVER_API_KEY="CAP-xxxxxxxxxxxxxxxxxxxxx"
|
|
171
|
+
|
|
172
|
+
# Crawl with CAPTCHA solving
|
|
173
|
+
./.agent/scripts/crawl4ai-helper.sh captcha-crawl https://example.com recaptcha_v2 site_key_here
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Pricing & Performance
|
|
177
|
+
|
|
178
|
+
- **Cost**: Starting from $0.4/1000 requests
|
|
179
|
+
- **Speed**: Most CAPTCHAs solved in < 10 seconds
|
|
180
|
+
- **Success Rate**: 99.9% accuracy
|
|
181
|
+
- **Package Discounts**: Up to 60% savings available
|
|
182
|
+
|
|
183
|
+
### Integration Methods
|
|
184
|
+
|
|
185
|
+
1. **API Integration** (Recommended): Direct Python SDK integration
|
|
186
|
+
2. **Browser Extension**: Automatic detection and solving
|
|
187
|
+
|
|
188
|
+
## 📊 Core Capabilities
|
|
189
|
+
|
|
190
|
+
### 1. Web Crawling
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
import asyncio
|
|
194
|
+
from crawl4ai import AsyncWebCrawler
|
|
195
|
+
|
|
196
|
+
async def basic_crawl():
|
|
197
|
+
async with AsyncWebCrawler() as crawler:
|
|
198
|
+
result = await crawler.arun(url="https://example.com")
|
|
199
|
+
return result.markdown
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### 2. Structured Data Extraction
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from crawl4ai import JsonCssExtractionStrategy
|
|
206
|
+
|
|
207
|
+
schema = {
|
|
208
|
+
"name": "Product Schema",
|
|
209
|
+
"baseSelector": ".product",
|
|
210
|
+
"fields": [
|
|
211
|
+
{"name": "title", "selector": "h2", "type": "text"},
|
|
212
|
+
{"name": "price", "selector": ".price", "type": "text"},
|
|
213
|
+
{"name": "image", "selector": "img", "type": "attribute", "attribute": "src"}
|
|
214
|
+
]
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
extraction_strategy = JsonCssExtractionStrategy(schema)
|
|
218
|
+
result = await crawler.arun(url="https://shop.com", extraction_strategy=extraction_strategy)
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### 3. LLM-Powered Extraction
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
from crawl4ai import LLMExtractionStrategy, LLMConfig
|
|
225
|
+
|
|
226
|
+
llm_strategy = LLMExtractionStrategy(
|
|
227
|
+
llm_config=LLMConfig(provider="openai/gpt-4o"),
|
|
228
|
+
instruction="Extract key information and create a summary"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
result = await crawler.arun(url="https://article.com", extraction_strategy=llm_strategy)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### 4. Advanced Browser Control
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
# Custom hooks for advanced control
|
|
238
|
+
async def setup_hook(page, context, **kwargs):
|
|
239
|
+
# Block images for faster crawling
|
|
240
|
+
await context.route("**/*.{png,jpg,gif}", lambda r: r.abort())
|
|
241
|
+
# Set custom viewport
|
|
242
|
+
await page.set_viewport_size({"width": 1920, "height": 1080})
|
|
243
|
+
return page
|
|
244
|
+
|
|
245
|
+
result = await crawler.arun(
|
|
246
|
+
url="https://example.com",
|
|
247
|
+
hooks={"on_page_context_created": setup_hook}
|
|
248
|
+
)
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## 🔄 Job Queue & Webhooks
|
|
252
|
+
|
|
253
|
+
### Asynchronous Processing
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
import requests
|
|
257
|
+
|
|
258
|
+
# Submit crawl job
|
|
259
|
+
response = requests.post("http://localhost:11235/crawl/job", json={
|
|
260
|
+
"urls": ["https://example.com"],
|
|
261
|
+
"webhook_config": {
|
|
262
|
+
"webhook_url": "https://your-app.com/webhook",
|
|
263
|
+
"webhook_data_in_payload": True,
|
|
264
|
+
"webhook_headers": {
|
|
265
|
+
"X-Webhook-Secret": "your-secret-token"
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
})
|
|
269
|
+
|
|
270
|
+
task_id = response.json()["task_id"]
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### Webhook Handler
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
from flask import Flask, request, jsonify
|
|
277
|
+
|
|
278
|
+
app = Flask(__name__)
|
|
279
|
+
|
|
280
|
+
@app.route('/webhook', methods=['POST'])
|
|
281
|
+
def handle_webhook():
|
|
282
|
+
payload = request.json
|
|
283
|
+
|
|
284
|
+
if payload['status'] == 'completed':
|
|
285
|
+
# Process successful crawl
|
|
286
|
+
data = payload['data']
|
|
287
|
+
markdown = data.get('markdown', '')
|
|
288
|
+
extracted = data.get('extracted_content', {})
|
|
289
|
+
|
|
290
|
+
# Your processing logic here
|
|
291
|
+
print(f"Crawl completed: {len(markdown)} characters extracted")
|
|
292
|
+
|
|
293
|
+
elif payload['status'] == 'failed':
|
|
294
|
+
# Handle failure
|
|
295
|
+
error = payload.get('error', 'Unknown error')
|
|
296
|
+
print(f"Crawl failed: {error}")
|
|
297
|
+
|
|
298
|
+
return jsonify({"status": "received"}), 200
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## 🎯 Use Cases
|
|
302
|
+
|
|
303
|
+
### 1. Content Research & Analysis
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
# Research articles and papers
|
|
307
|
+
./.agent/scripts/crawl4ai-helper.sh extract https://research-paper.com '{
|
|
308
|
+
"title": "h1",
|
|
309
|
+
"authors": ".authors",
|
|
310
|
+
"abstract": ".abstract",
|
|
311
|
+
"sections": {
|
|
312
|
+
"selector": ".section",
|
|
313
|
+
"fields": [
|
|
314
|
+
{"name": "heading", "selector": "h2", "type": "text"},
|
|
315
|
+
{"name": "content", "selector": "p", "type": "text"}
|
|
316
|
+
]
|
|
317
|
+
}
|
|
318
|
+
}' research.json
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
### 2. E-commerce Data Collection
|
|
322
|
+
|
|
323
|
+
```bash
|
|
324
|
+
# Product information extraction
|
|
325
|
+
./.agent/scripts/crawl4ai-helper.sh extract https://ecommerce.com/product '{
|
|
326
|
+
"name": "h1.product-title",
|
|
327
|
+
"price": ".price-current",
|
|
328
|
+
"description": ".product-description",
|
|
329
|
+
"specifications": {
|
|
330
|
+
"selector": ".specs tr",
|
|
331
|
+
"fields": [
|
|
332
|
+
{"name": "feature", "selector": "td:first-child", "type": "text"},
|
|
333
|
+
{"name": "value", "selector": "td:last-child", "type": "text"}
|
|
334
|
+
]
|
|
335
|
+
},
|
|
336
|
+
"images": {"selector": ".product-images img", "type": "attribute", "attribute": "src"}
|
|
337
|
+
}' product.json
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
### 3. News Aggregation
|
|
341
|
+
|
|
342
|
+
```bash
|
|
343
|
+
# Multiple news sources
|
|
344
|
+
urls=("https://news1.com" "https://news2.com" "https://news3.com")
|
|
345
|
+
|
|
346
|
+
for url in "${urls[@]}"; do
|
|
347
|
+
./.agent/scripts/crawl4ai-helper.sh extract "$url" '{
|
|
348
|
+
"headline": "h1",
|
|
349
|
+
"summary": ".article-summary",
|
|
350
|
+
"author": ".byline",
|
|
351
|
+
"date": ".publish-date",
|
|
352
|
+
"content": ".article-body"
|
|
353
|
+
}' "news-$(basename $url).json"
|
|
354
|
+
done
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
### 4. Documentation Processing
|
|
358
|
+
|
|
359
|
+
```bash
|
|
360
|
+
# API documentation extraction
|
|
361
|
+
./.agent/scripts/crawl4ai-helper.sh extract https://api-docs.com '{
|
|
362
|
+
"endpoints": {
|
|
363
|
+
"selector": ".endpoint",
|
|
364
|
+
"fields": [
|
|
365
|
+
{"name": "method", "selector": ".method", "type": "text"},
|
|
366
|
+
{"name": "path", "selector": ".path", "type": "text"},
|
|
367
|
+
{"name": "description", "selector": ".description", "type": "text"},
|
|
368
|
+
{"name": "parameters", "selector": ".params", "type": "html"},
|
|
369
|
+
{"name": "examples", "selector": ".examples", "type": "html"}
|
|
370
|
+
]
|
|
371
|
+
}
|
|
372
|
+
}' api-docs.json
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
## 🔧 Configuration
|
|
376
|
+
|
|
377
|
+
### Environment Variables
|
|
378
|
+
|
|
379
|
+
```bash
|
|
380
|
+
# LLM Configuration
|
|
381
|
+
OPENAI_API_KEY=sk-your-key
|
|
382
|
+
ANTHROPIC_API_KEY=your-anthropic-key
|
|
383
|
+
LLM_PROVIDER=openai/gpt-4o-mini
|
|
384
|
+
LLM_TEMPERATURE=0.7
|
|
385
|
+
|
|
386
|
+
# Crawl4AI Settings
|
|
387
|
+
CRAWL4AI_MAX_PAGES=50
|
|
388
|
+
CRAWL4AI_TIMEOUT=60
|
|
389
|
+
CRAWL4AI_DEFAULT_FORMAT=markdown
|
|
390
|
+
CRAWL4AI_CONCURRENT_REQUESTS=5
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
### Docker Configuration
|
|
394
|
+
|
|
395
|
+
```yaml
|
|
396
|
+
# docker-compose.yml
|
|
397
|
+
services:
|
|
398
|
+
crawl4ai:
|
|
399
|
+
image: unclecode/crawl4ai:latest
|
|
400
|
+
ports:
|
|
401
|
+
- "11235:11235"
|
|
402
|
+
environment:
|
|
403
|
+
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
|
404
|
+
- LLM_PROVIDER=openai/gpt-4o-mini
|
|
405
|
+
volumes:
|
|
406
|
+
- /dev/shm:/dev/shm
|
|
407
|
+
shm_size: 1g
|
|
408
|
+
```
|
|
409
|
+
|
|
410
|
+
## 📊 Monitoring & Analytics
|
|
411
|
+
|
|
412
|
+
### Dashboard Features
|
|
413
|
+
|
|
414
|
+
Access the monitoring dashboard at http://localhost:11235/dashboard:
|
|
415
|
+
|
|
416
|
+
- **System Metrics**: CPU, memory, network utilization
|
|
417
|
+
- **Request Analytics**: Success rates, response times, error tracking
|
|
418
|
+
- **Browser Pool**: Active/hot/cold browser instances
|
|
419
|
+
- **Job Queue**: Pending, processing, completed jobs
|
|
420
|
+
- **Real-time Logs**: Live system and application logs
|
|
421
|
+
|
|
422
|
+
### API Metrics
|
|
423
|
+
|
|
424
|
+
```bash
|
|
425
|
+
# Prometheus metrics
|
|
426
|
+
curl http://localhost:11235/metrics
|
|
427
|
+
|
|
428
|
+
# Health status
|
|
429
|
+
curl http://localhost:11235/health | jq '.'
|
|
430
|
+
|
|
431
|
+
# API schema
|
|
432
|
+
curl http://localhost:11235/schema | jq '.'
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
## 🔒 Security & Best Practices
|
|
436
|
+
|
|
437
|
+
### Rate Limiting
|
|
438
|
+
|
|
439
|
+
```yaml
|
|
440
|
+
rate_limiting:
|
|
441
|
+
enabled: true
|
|
442
|
+
default_limit: "1000/minute"
|
|
443
|
+
trusted_proxies: []
|
|
444
|
+
```
|
|
445
|
+
|
|
446
|
+
### Security Headers
|
|
447
|
+
|
|
448
|
+
```yaml
|
|
449
|
+
security:
|
|
450
|
+
headers:
|
|
451
|
+
x_content_type_options: "nosniff"
|
|
452
|
+
x_frame_options: "DENY"
|
|
453
|
+
content_security_policy: "default-src 'self'"
|
|
454
|
+
strict_transport_security: "max-age=63072000"
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
### Safe Crawling
|
|
458
|
+
|
|
459
|
+
- **Respect robots.txt**: Enabled by default
|
|
460
|
+
- **Rate limiting**: Built-in delays between requests
|
|
461
|
+
- **User agent identification**: Clear identification as Crawl4AI
|
|
462
|
+
- **Timeout protection**: Prevents hanging requests
|
|
463
|
+
- **Resource blocking**: Block unnecessary resources for performance
|
|
464
|
+
|
|
465
|
+
## 🛠️ Advanced Features
|
|
466
|
+
|
|
467
|
+
### Adaptive Crawling
|
|
468
|
+
|
|
469
|
+
```python
|
|
470
|
+
from crawl4ai import AdaptiveCrawler, AdaptiveConfig
|
|
471
|
+
|
|
472
|
+
config = AdaptiveConfig(
|
|
473
|
+
confidence_threshold=0.7,
|
|
474
|
+
max_depth=5,
|
|
475
|
+
max_pages=20,
|
|
476
|
+
strategy="statistical"
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
adaptive_crawler = AdaptiveCrawler(crawler, config)
|
|
480
|
+
state = await adaptive_crawler.digest(
|
|
481
|
+
start_url="https://news.example.com",
|
|
482
|
+
query="latest technology news"
|
|
483
|
+
)
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
### Virtual Scroll Support
|
|
487
|
+
|
|
488
|
+
```python
|
|
489
|
+
from crawl4ai import VirtualScrollConfig
|
|
490
|
+
|
|
491
|
+
scroll_config = VirtualScrollConfig(
|
|
492
|
+
container_selector="[data-testid='feed']",
|
|
493
|
+
scroll_count=20,
|
|
494
|
+
scroll_by="container_height",
|
|
495
|
+
wait_after_scroll=1.0
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
result = await crawler.arun(
|
|
499
|
+
url="https://infinite-scroll-site.com",
|
|
500
|
+
virtual_scroll_config=scroll_config
|
|
501
|
+
)
|
|
502
|
+
```
|
|
503
|
+
|
|
504
|
+
### Session Management
|
|
505
|
+
|
|
506
|
+
```python
|
|
507
|
+
# Persistent browser sessions
|
|
508
|
+
browser_config = BrowserConfig(
|
|
509
|
+
use_persistent_context=True,
|
|
510
|
+
user_data_dir="/path/to/profile",
|
|
511
|
+
headless=True
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
async with AsyncWebCrawler(config=browser_config) as crawler:
|
|
515
|
+
# Session persists across requests
|
|
516
|
+
result1 = await crawler.arun("https://site.com/login")
|
|
517
|
+
result2 = await crawler.arun("https://site.com/dashboard")
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
## 🔧 Troubleshooting
|
|
521
|
+
|
|
522
|
+
### Common Issues
|
|
523
|
+
|
|
524
|
+
1. **Container won't start**: Check Docker memory allocation
|
|
525
|
+
|
|
526
|
+
```bash
|
|
527
|
+
docker run --shm-size=1g unclecode/crawl4ai:latest
|
|
528
|
+
```
|
|
529
|
+
|
|
530
|
+
2. **API not responding**: Verify container status and port mapping
|
|
531
|
+
|
|
532
|
+
```bash
|
|
533
|
+
docker ps | grep crawl4ai
|
|
534
|
+
curl http://localhost:11235/health
|
|
535
|
+
```
|
|
536
|
+
|
|
537
|
+
3. **Extraction failing**: Validate CSS selectors or LLM configuration
|
|
538
|
+
|
|
539
|
+
```bash
|
|
540
|
+
# Test in playground
|
|
541
|
+
open http://localhost:11235/playground
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
### Debug Commands
|
|
545
|
+
|
|
546
|
+
```bash
|
|
547
|
+
# Check comprehensive status
|
|
548
|
+
./.agent/scripts/crawl4ai-helper.sh status
|
|
549
|
+
|
|
550
|
+
# View container logs
|
|
551
|
+
docker logs crawl4ai --tail 50 --follow
|
|
552
|
+
|
|
553
|
+
# Test basic functionality
|
|
554
|
+
curl -X POST http://localhost:11235/crawl \
|
|
555
|
+
-H "Content-Type: application/json" \
|
|
556
|
+
-d '{"urls": ["https://httpbin.org/html"]}'
|
|
557
|
+
```
|
|
558
|
+
|
|
559
|
+
## 📚 Resources
|
|
560
|
+
|
|
561
|
+
### Framework Integration
|
|
562
|
+
|
|
563
|
+
- **Helper Script**: `.agent/scripts/crawl4ai-helper.sh`
|
|
564
|
+
- **Configuration Template**: `configs/crawl4ai-config.json.txt`
|
|
565
|
+
- **MCP Configuration**: `configs/mcp-templates/crawl4ai-mcp-config.json`
|
|
566
|
+
- **Integration Guide**: `.agent/wiki/crawl4ai-integration.md`
|
|
567
|
+
- **Usage Guide**: `.agent/spec/crawl4ai-usage.md`
|
|
568
|
+
|
|
569
|
+
### Official Resources
|
|
570
|
+
|
|
571
|
+
- **Documentation**: https://docs.crawl4ai.com/
|
|
572
|
+
- **GitHub Repository**: https://github.com/unclecode/crawl4ai
|
|
573
|
+
- **Docker Hub**: https://hub.docker.com/r/unclecode/crawl4ai
|
|
574
|
+
- **Discord Community**: https://discord.gg/jP8KfhDhyN
|
|
575
|
+
|
|
576
|
+
## 🎯 Next Steps
|
|
577
|
+
|
|
578
|
+
1. **Install and Setup**: Run `./.agent/scripts/crawl4ai-helper.sh install`
|
|
579
|
+
2. **Start Docker Services**: Run `./.agent/scripts/crawl4ai-helper.sh docker-start`
|
|
580
|
+
3. **Explore Dashboard**: Visit http://localhost:11235/dashboard
|
|
581
|
+
4. **Try Playground**: Test crawling at http://localhost:11235/playground
|
|
582
|
+
5. **Setup MCP**: Run `./.agent/scripts/crawl4ai-helper.sh mcp-setup`
|
|
583
|
+
6. **Build Applications**: Use the API for your specific use cases
|
|
584
|
+
|
|
585
|
+
Crawl4AI transforms web data into AI-ready formats, making it perfect for RAG systems, data pipelines, and AI-powered applications within the AI DevOps Framework.
|