aidevops 2.52.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/AGENTS.md +614 -0
- package/.agent/accounts.md +65 -0
- package/.agent/aidevops/add-new-mcp-to-aidevops.md +456 -0
- package/.agent/aidevops/api-integrations.md +335 -0
- package/.agent/aidevops/architecture.md +510 -0
- package/.agent/aidevops/configs.md +274 -0
- package/.agent/aidevops/docs.md +244 -0
- package/.agent/aidevops/extension.md +311 -0
- package/.agent/aidevops/mcp-integrations.md +340 -0
- package/.agent/aidevops/mcp-troubleshooting.md +162 -0
- package/.agent/aidevops/memory-patterns.md +172 -0
- package/.agent/aidevops/providers.md +217 -0
- package/.agent/aidevops/recommendations.md +321 -0
- package/.agent/aidevops/requirements.md +301 -0
- package/.agent/aidevops/resources.md +214 -0
- package/.agent/aidevops/security-requirements.md +174 -0
- package/.agent/aidevops/security.md +350 -0
- package/.agent/aidevops/service-links.md +400 -0
- package/.agent/aidevops/services.md +357 -0
- package/.agent/aidevops/setup.md +153 -0
- package/.agent/aidevops/troubleshooting.md +389 -0
- package/.agent/aidevops.md +124 -0
- package/.agent/build-plus.md +244 -0
- package/.agent/content/guidelines.md +109 -0
- package/.agent/content.md +87 -0
- package/.agent/health.md +59 -0
- package/.agent/legal.md +59 -0
- package/.agent/loop-state/full-loop.local.md +16 -0
- package/.agent/loop-state/ralph-loop.local.md +10 -0
- package/.agent/marketing.md +440 -0
- package/.agent/memory/README.md +260 -0
- package/.agent/onboarding.md +796 -0
- package/.agent/plan-plus.md +245 -0
- package/.agent/research.md +100 -0
- package/.agent/sales.md +333 -0
- package/.agent/scripts/101domains-helper.sh +701 -0
- package/.agent/scripts/add-missing-returns.sh +140 -0
- package/.agent/scripts/agent-browser-helper.sh +311 -0
- package/.agent/scripts/agno-setup.sh +712 -0
- package/.agent/scripts/ahrefs-mcp-wrapper.js +168 -0
- package/.agent/scripts/aidevops-update-check.sh +71 -0
- package/.agent/scripts/ampcode-cli.sh +522 -0
- package/.agent/scripts/auto-version-bump.sh +156 -0
- package/.agent/scripts/autogen-helper.sh +512 -0
- package/.agent/scripts/beads-sync-helper.sh +596 -0
- package/.agent/scripts/closte-helper.sh +5 -0
- package/.agent/scripts/cloudron-helper.sh +321 -0
- package/.agent/scripts/codacy-cli-chunked.sh +581 -0
- package/.agent/scripts/codacy-cli.sh +442 -0
- package/.agent/scripts/code-audit-helper.sh +5 -0
- package/.agent/scripts/coderabbit-cli.sh +417 -0
- package/.agent/scripts/coderabbit-pro-analysis.sh +238 -0
- package/.agent/scripts/commands/code-simplifier.md +86 -0
- package/.agent/scripts/commands/full-loop.md +246 -0
- package/.agent/scripts/commands/postflight-loop.md +103 -0
- package/.agent/scripts/commands/recall.md +182 -0
- package/.agent/scripts/commands/remember.md +132 -0
- package/.agent/scripts/commands/save-todo.md +175 -0
- package/.agent/scripts/commands/session-review.md +154 -0
- package/.agent/scripts/comprehensive-quality-fix.sh +106 -0
- package/.agent/scripts/context-builder-helper.sh +522 -0
- package/.agent/scripts/coolify-cli-helper.sh +674 -0
- package/.agent/scripts/coolify-helper.sh +380 -0
- package/.agent/scripts/crawl4ai-examples.sh +401 -0
- package/.agent/scripts/crawl4ai-helper.sh +1078 -0
- package/.agent/scripts/crewai-helper.sh +681 -0
- package/.agent/scripts/dev-browser-helper.sh +513 -0
- package/.agent/scripts/dns-helper.sh +396 -0
- package/.agent/scripts/domain-research-helper.sh +917 -0
- package/.agent/scripts/dspy-helper.sh +285 -0
- package/.agent/scripts/dspyground-helper.sh +291 -0
- package/.agent/scripts/eeat-score-helper.sh +1242 -0
- package/.agent/scripts/efficient-return-fix.sh +92 -0
- package/.agent/scripts/extract-opencode-prompts.sh +128 -0
- package/.agent/scripts/find-missing-returns.sh +113 -0
- package/.agent/scripts/fix-auth-headers.sh +104 -0
- package/.agent/scripts/fix-common-strings.sh +254 -0
- package/.agent/scripts/fix-content-type.sh +100 -0
- package/.agent/scripts/fix-error-messages.sh +130 -0
- package/.agent/scripts/fix-misplaced-returns.sh +74 -0
- package/.agent/scripts/fix-remaining-literals.sh +152 -0
- package/.agent/scripts/fix-return-statements.sh +41 -0
- package/.agent/scripts/fix-s131-default-cases.sh +249 -0
- package/.agent/scripts/fix-sc2155-simple.sh +102 -0
- package/.agent/scripts/fix-shellcheck-critical.sh +187 -0
- package/.agent/scripts/fix-string-literals.sh +273 -0
- package/.agent/scripts/full-loop-helper.sh +773 -0
- package/.agent/scripts/generate-opencode-agents.sh +497 -0
- package/.agent/scripts/generate-opencode-commands.sh +1629 -0
- package/.agent/scripts/generate-skills.sh +366 -0
- package/.agent/scripts/git-platforms-helper.sh +640 -0
- package/.agent/scripts/gitea-cli-helper.sh +743 -0
- package/.agent/scripts/github-cli-helper.sh +702 -0
- package/.agent/scripts/gitlab-cli-helper.sh +682 -0
- package/.agent/scripts/gsc-add-user-helper.sh +325 -0
- package/.agent/scripts/gsc-sitemap-helper.sh +678 -0
- package/.agent/scripts/hetzner-helper.sh +485 -0
- package/.agent/scripts/hostinger-helper.sh +229 -0
- package/.agent/scripts/keyword-research-helper.sh +1815 -0
- package/.agent/scripts/langflow-helper.sh +544 -0
- package/.agent/scripts/linkedin-automation.py +241 -0
- package/.agent/scripts/linter-manager.sh +599 -0
- package/.agent/scripts/linters-local.sh +434 -0
- package/.agent/scripts/list-keys-helper.sh +488 -0
- package/.agent/scripts/local-browser-automation.py +339 -0
- package/.agent/scripts/localhost-helper.sh +744 -0
- package/.agent/scripts/loop-common.sh +806 -0
- package/.agent/scripts/mainwp-helper.sh +728 -0
- package/.agent/scripts/markdown-formatter.sh +338 -0
- package/.agent/scripts/markdown-lint-fix.sh +311 -0
- package/.agent/scripts/mass-fix-returns.sh +58 -0
- package/.agent/scripts/mcp-diagnose.sh +167 -0
- package/.agent/scripts/mcp-inspector-helper.sh +449 -0
- package/.agent/scripts/memory-helper.sh +650 -0
- package/.agent/scripts/monitor-code-review.sh +255 -0
- package/.agent/scripts/onboarding-helper.sh +706 -0
- package/.agent/scripts/opencode-github-setup-helper.sh +797 -0
- package/.agent/scripts/opencode-test-helper.sh +213 -0
- package/.agent/scripts/pagespeed-helper.sh +464 -0
- package/.agent/scripts/pandoc-helper.sh +362 -0
- package/.agent/scripts/postflight-check.sh +555 -0
- package/.agent/scripts/pre-commit-hook.sh +259 -0
- package/.agent/scripts/pre-edit-check.sh +169 -0
- package/.agent/scripts/qlty-cli.sh +356 -0
- package/.agent/scripts/quality-cli-manager.sh +525 -0
- package/.agent/scripts/quality-feedback-helper.sh +462 -0
- package/.agent/scripts/quality-fix.sh +263 -0
- package/.agent/scripts/quality-loop-helper.sh +1108 -0
- package/.agent/scripts/ralph-loop-helper.sh +836 -0
- package/.agent/scripts/ralph-upstream-check.sh +341 -0
- package/.agent/scripts/secretlint-helper.sh +847 -0
- package/.agent/scripts/servers-helper.sh +241 -0
- package/.agent/scripts/ses-helper.sh +619 -0
- package/.agent/scripts/session-review-helper.sh +404 -0
- package/.agent/scripts/setup-linters-wizard.sh +379 -0
- package/.agent/scripts/setup-local-api-keys.sh +330 -0
- package/.agent/scripts/setup-mcp-integrations.sh +472 -0
- package/.agent/scripts/shared-constants.sh +246 -0
- package/.agent/scripts/site-crawler-helper.sh +1487 -0
- package/.agent/scripts/snyk-helper.sh +940 -0
- package/.agent/scripts/sonarcloud-autofix.sh +193 -0
- package/.agent/scripts/sonarcloud-cli.sh +191 -0
- package/.agent/scripts/sonarscanner-cli.sh +455 -0
- package/.agent/scripts/spaceship-helper.sh +747 -0
- package/.agent/scripts/stagehand-helper.sh +321 -0
- package/.agent/scripts/stagehand-python-helper.sh +321 -0
- package/.agent/scripts/stagehand-python-setup.sh +441 -0
- package/.agent/scripts/stagehand-setup.sh +439 -0
- package/.agent/scripts/system-cleanup.sh +340 -0
- package/.agent/scripts/terminal-title-helper.sh +388 -0
- package/.agent/scripts/terminal-title-setup.sh +549 -0
- package/.agent/scripts/test-stagehand-both-integration.sh +317 -0
- package/.agent/scripts/test-stagehand-integration.sh +309 -0
- package/.agent/scripts/test-stagehand-python-integration.sh +341 -0
- package/.agent/scripts/todo-ready.sh +263 -0
- package/.agent/scripts/tool-version-check.sh +362 -0
- package/.agent/scripts/toon-helper.sh +469 -0
- package/.agent/scripts/twilio-helper.sh +917 -0
- package/.agent/scripts/updown-helper.sh +279 -0
- package/.agent/scripts/validate-mcp-integrations.sh +250 -0
- package/.agent/scripts/validate-version-consistency.sh +131 -0
- package/.agent/scripts/vaultwarden-helper.sh +597 -0
- package/.agent/scripts/vercel-cli-helper.sh +816 -0
- package/.agent/scripts/verify-mirrors.sh +169 -0
- package/.agent/scripts/version-manager.sh +831 -0
- package/.agent/scripts/webhosting-helper.sh +471 -0
- package/.agent/scripts/webhosting-verify.sh +238 -0
- package/.agent/scripts/wordpress-mcp-helper.sh +508 -0
- package/.agent/scripts/worktree-helper.sh +595 -0
- package/.agent/scripts/worktree-sessions.sh +577 -0
- package/.agent/seo/dataforseo.md +215 -0
- package/.agent/seo/domain-research.md +532 -0
- package/.agent/seo/eeat-score.md +659 -0
- package/.agent/seo/google-search-console.md +366 -0
- package/.agent/seo/gsc-sitemaps.md +282 -0
- package/.agent/seo/keyword-research.md +521 -0
- package/.agent/seo/serper.md +278 -0
- package/.agent/seo/site-crawler.md +387 -0
- package/.agent/seo.md +236 -0
- package/.agent/services/accounting/quickfile.md +159 -0
- package/.agent/services/communications/telfon.md +470 -0
- package/.agent/services/communications/twilio.md +569 -0
- package/.agent/services/crm/fluentcrm.md +449 -0
- package/.agent/services/email/ses.md +399 -0
- package/.agent/services/hosting/101domains.md +378 -0
- package/.agent/services/hosting/closte.md +177 -0
- package/.agent/services/hosting/cloudflare.md +251 -0
- package/.agent/services/hosting/cloudron.md +478 -0
- package/.agent/services/hosting/dns-providers.md +335 -0
- package/.agent/services/hosting/domain-purchasing.md +344 -0
- package/.agent/services/hosting/hetzner.md +327 -0
- package/.agent/services/hosting/hostinger.md +287 -0
- package/.agent/services/hosting/localhost.md +419 -0
- package/.agent/services/hosting/spaceship.md +353 -0
- package/.agent/services/hosting/webhosting.md +330 -0
- package/.agent/social-media.md +69 -0
- package/.agent/templates/plans-template.md +114 -0
- package/.agent/templates/prd-template.md +129 -0
- package/.agent/templates/tasks-template.md +108 -0
- package/.agent/templates/todo-template.md +89 -0
- package/.agent/tools/ai-assistants/agno.md +471 -0
- package/.agent/tools/ai-assistants/capsolver.md +326 -0
- package/.agent/tools/ai-assistants/configuration.md +221 -0
- package/.agent/tools/ai-assistants/overview.md +209 -0
- package/.agent/tools/ai-assistants/status.md +171 -0
- package/.agent/tools/ai-assistants/windsurf.md +193 -0
- package/.agent/tools/ai-orchestration/autogen.md +406 -0
- package/.agent/tools/ai-orchestration/crewai.md +445 -0
- package/.agent/tools/ai-orchestration/langflow.md +405 -0
- package/.agent/tools/ai-orchestration/openprose.md +487 -0
- package/.agent/tools/ai-orchestration/overview.md +362 -0
- package/.agent/tools/ai-orchestration/packaging.md +647 -0
- package/.agent/tools/browser/agent-browser.md +464 -0
- package/.agent/tools/browser/browser-automation.md +400 -0
- package/.agent/tools/browser/chrome-devtools.md +282 -0
- package/.agent/tools/browser/crawl4ai-integration.md +422 -0
- package/.agent/tools/browser/crawl4ai-resources.md +277 -0
- package/.agent/tools/browser/crawl4ai-usage.md +416 -0
- package/.agent/tools/browser/crawl4ai.md +585 -0
- package/.agent/tools/browser/dev-browser.md +341 -0
- package/.agent/tools/browser/pagespeed.md +260 -0
- package/.agent/tools/browser/playwright.md +266 -0
- package/.agent/tools/browser/playwriter.md +310 -0
- package/.agent/tools/browser/stagehand-examples.md +456 -0
- package/.agent/tools/browser/stagehand-python.md +483 -0
- package/.agent/tools/browser/stagehand.md +421 -0
- package/.agent/tools/build-agent/agent-review.md +224 -0
- package/.agent/tools/build-agent/build-agent.md +784 -0
- package/.agent/tools/build-mcp/aidevops-plugin.md +476 -0
- package/.agent/tools/build-mcp/api-wrapper.md +445 -0
- package/.agent/tools/build-mcp/build-mcp.md +240 -0
- package/.agent/tools/build-mcp/deployment.md +401 -0
- package/.agent/tools/build-mcp/server-patterns.md +632 -0
- package/.agent/tools/build-mcp/transports.md +366 -0
- package/.agent/tools/code-review/auditing.md +383 -0
- package/.agent/tools/code-review/automation.md +219 -0
- package/.agent/tools/code-review/best-practices.md +203 -0
- package/.agent/tools/code-review/codacy.md +151 -0
- package/.agent/tools/code-review/code-simplifier.md +174 -0
- package/.agent/tools/code-review/code-standards.md +309 -0
- package/.agent/tools/code-review/coderabbit.md +101 -0
- package/.agent/tools/code-review/management.md +155 -0
- package/.agent/tools/code-review/qlty.md +248 -0
- package/.agent/tools/code-review/secretlint.md +565 -0
- package/.agent/tools/code-review/setup.md +250 -0
- package/.agent/tools/code-review/snyk.md +563 -0
- package/.agent/tools/code-review/tools.md +230 -0
- package/.agent/tools/content/summarize.md +353 -0
- package/.agent/tools/context/augment-context-engine.md +468 -0
- package/.agent/tools/context/context-builder-agent.md +76 -0
- package/.agent/tools/context/context-builder.md +375 -0
- package/.agent/tools/context/context7.md +371 -0
- package/.agent/tools/context/dspy.md +302 -0
- package/.agent/tools/context/dspyground.md +374 -0
- package/.agent/tools/context/llm-tldr.md +219 -0
- package/.agent/tools/context/osgrep.md +488 -0
- package/.agent/tools/context/prompt-optimization.md +338 -0
- package/.agent/tools/context/toon.md +292 -0
- package/.agent/tools/conversion/pandoc.md +304 -0
- package/.agent/tools/credentials/api-key-management.md +154 -0
- package/.agent/tools/credentials/api-key-setup.md +224 -0
- package/.agent/tools/credentials/environment-variables.md +180 -0
- package/.agent/tools/credentials/vaultwarden.md +382 -0
- package/.agent/tools/data-extraction/outscraper.md +974 -0
- package/.agent/tools/deployment/coolify-cli.md +388 -0
- package/.agent/tools/deployment/coolify-setup.md +353 -0
- package/.agent/tools/deployment/coolify.md +345 -0
- package/.agent/tools/deployment/vercel.md +390 -0
- package/.agent/tools/git/authentication.md +132 -0
- package/.agent/tools/git/gitea-cli.md +193 -0
- package/.agent/tools/git/github-actions.md +207 -0
- package/.agent/tools/git/github-cli.md +223 -0
- package/.agent/tools/git/gitlab-cli.md +190 -0
- package/.agent/tools/git/opencode-github-security.md +350 -0
- package/.agent/tools/git/opencode-github.md +328 -0
- package/.agent/tools/git/opencode-gitlab.md +252 -0
- package/.agent/tools/git/security.md +196 -0
- package/.agent/tools/git.md +207 -0
- package/.agent/tools/opencode/oh-my-opencode.md +375 -0
- package/.agent/tools/opencode/opencode-anthropic-auth.md +446 -0
- package/.agent/tools/opencode/opencode.md +651 -0
- package/.agent/tools/social-media/bird.md +437 -0
- package/.agent/tools/task-management/beads.md +336 -0
- package/.agent/tools/terminal/terminal-title.md +251 -0
- package/.agent/tools/ui/shadcn.md +196 -0
- package/.agent/tools/ui/ui-skills.md +115 -0
- package/.agent/tools/wordpress/localwp.md +311 -0
- package/.agent/tools/wordpress/mainwp.md +391 -0
- package/.agent/tools/wordpress/scf.md +527 -0
- package/.agent/tools/wordpress/wp-admin.md +729 -0
- package/.agent/tools/wordpress/wp-dev.md +940 -0
- package/.agent/tools/wordpress/wp-preferred.md +398 -0
- package/.agent/tools/wordpress.md +95 -0
- package/.agent/workflows/branch/bugfix.md +63 -0
- package/.agent/workflows/branch/chore.md +95 -0
- package/.agent/workflows/branch/experiment.md +115 -0
- package/.agent/workflows/branch/feature.md +59 -0
- package/.agent/workflows/branch/hotfix.md +98 -0
- package/.agent/workflows/branch/refactor.md +92 -0
- package/.agent/workflows/branch/release.md +96 -0
- package/.agent/workflows/branch.md +347 -0
- package/.agent/workflows/bug-fixing.md +267 -0
- package/.agent/workflows/changelog.md +129 -0
- package/.agent/workflows/code-audit-remote.md +279 -0
- package/.agent/workflows/conversation-starter.md +69 -0
- package/.agent/workflows/error-feedback.md +578 -0
- package/.agent/workflows/feature-development.md +355 -0
- package/.agent/workflows/git-workflow.md +702 -0
- package/.agent/workflows/multi-repo-workspace.md +268 -0
- package/.agent/workflows/plans.md +709 -0
- package/.agent/workflows/postflight.md +604 -0
- package/.agent/workflows/pr.md +571 -0
- package/.agent/workflows/preflight.md +278 -0
- package/.agent/workflows/ralph-loop.md +773 -0
- package/.agent/workflows/release.md +498 -0
- package/.agent/workflows/session-manager.md +254 -0
- package/.agent/workflows/session-review.md +311 -0
- package/.agent/workflows/sql-migrations.md +631 -0
- package/.agent/workflows/version-bump.md +283 -0
- package/.agent/workflows/wiki-update.md +333 -0
- package/.agent/workflows/worktree.md +477 -0
- package/LICENSE +21 -0
- package/README.md +1446 -0
- package/VERSION +1 -0
- package/aidevops.sh +1746 -0
- package/bin/aidevops +21 -0
- package/package.json +75 -0
- package/scripts/npm-postinstall.js +60 -0
- package/setup.sh +2366 -0
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Crawl4AI MCP server integration setup
|
|
3
|
+
mode: subagent
|
|
4
|
+
tools:
|
|
5
|
+
read: true
|
|
6
|
+
write: true
|
|
7
|
+
edit: true
|
|
8
|
+
bash: true
|
|
9
|
+
glob: true
|
|
10
|
+
grep: true
|
|
11
|
+
webfetch: true
|
|
12
|
+
task: true
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
# Crawl4AI Integration Guide
|
|
16
|
+
|
|
17
|
+
<!-- AI-CONTEXT-START -->
|
|
18
|
+
|
|
19
|
+
## Quick Reference
|
|
20
|
+
|
|
21
|
+
- Crawl4AI: #1 trending open-source web crawler for AI/LLM applications
|
|
22
|
+
- Install: `./.agent/scripts/crawl4ai-helper.sh install`
|
|
23
|
+
- Docker setup: `./.agent/scripts/crawl4ai-helper.sh docker-setup`
|
|
24
|
+
- Start: `./.agent/scripts/crawl4ai-helper.sh docker-start`
|
|
25
|
+
- MCP setup: `./.agent/scripts/crawl4ai-helper.sh mcp-setup`
|
|
26
|
+
- URLs: Dashboard http://localhost:11235/dashboard, Playground /playground, API :11235
|
|
27
|
+
- Crawl: `./.agent/scripts/crawl4ai-helper.sh crawl URL markdown output.json`
|
|
28
|
+
- Extract: `./.agent/scripts/crawl4ai-helper.sh extract URL '{"schema"}' data.json`
|
|
29
|
+
- Features: LLM-ready markdown, CSS/XPath/LLM extraction, async parallel crawling
|
|
30
|
+
- MCP tools: crawl_url, crawl_multiple, extract_structured, take_screenshot, generate_pdf
|
|
31
|
+
- Config: `configs/crawl4ai-config.json.txt`, `configs/mcp-templates/crawl4ai-mcp-config.json`
|
|
32
|
+
- Debug: `./.agent/scripts/crawl4ai-helper.sh status`, `docker logs crawl4ai`
|
|
33
|
+
<!-- AI-CONTEXT-END -->
|
|
34
|
+
|
|
35
|
+
## Overview
|
|
36
|
+
|
|
37
|
+
Crawl4AI is the #1 trending open-source web crawler on GitHub, specifically designed for AI and LLM applications. This integration provides comprehensive web crawling and data extraction capabilities for the AI DevOps Framework.
|
|
38
|
+
|
|
39
|
+
### Key Features
|
|
40
|
+
|
|
41
|
+
- **LLM-Ready Output**: Clean markdown generation perfect for RAG pipelines
|
|
42
|
+
- **Structured Extraction**: CSS selectors, XPath, and LLM-based data extraction
|
|
43
|
+
- **Advanced Browser Control**: Hooks, proxies, stealth modes, session management
|
|
44
|
+
- **High Performance**: Parallel crawling, async operations, real-time processing
|
|
45
|
+
- **AI Integration**: Native MCP support for AI assistants like Claude
|
|
46
|
+
- **Enterprise Features**: Monitoring dashboard, job queues, webhook notifications
|
|
47
|
+
|
|
48
|
+
## 🛠️ Installation & Setup
|
|
49
|
+
|
|
50
|
+
### Quick Start
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Install Python package
|
|
54
|
+
./.agent/scripts/crawl4ai-helper.sh install
|
|
55
|
+
|
|
56
|
+
# Setup Docker deployment
|
|
57
|
+
./.agent/scripts/crawl4ai-helper.sh docker-setup
|
|
58
|
+
|
|
59
|
+
# Start Docker container with monitoring dashboard
|
|
60
|
+
./.agent/scripts/crawl4ai-helper.sh docker-start
|
|
61
|
+
|
|
62
|
+
# Setup MCP integration for AI assistants
|
|
63
|
+
./.agent/scripts/crawl4ai-helper.sh mcp-setup
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Docker Deployment
|
|
67
|
+
|
|
68
|
+
The Docker deployment includes:
|
|
69
|
+
|
|
70
|
+
- **Real-time Monitoring Dashboard**: http://localhost:11235/dashboard
|
|
71
|
+
- **Interactive Playground**: http://localhost:11235/playground
|
|
72
|
+
- **REST API**: http://localhost:11235
|
|
73
|
+
- **WebSocket Streaming**: Real-time crawl results
|
|
74
|
+
- **Job Queue System**: Asynchronous processing with webhooks
|
|
75
|
+
|
|
76
|
+
### MCP Integration
|
|
77
|
+
|
|
78
|
+
Crawl4AI provides native MCP (Model Context Protocol) support for AI assistants:
|
|
79
|
+
|
|
80
|
+
```json
|
|
81
|
+
{
|
|
82
|
+
"crawl4ai": {
|
|
83
|
+
"command": "npx",
|
|
84
|
+
"args": ["crawl4ai-mcp-server@latest"],
|
|
85
|
+
"env": {
|
|
86
|
+
"CRAWL4AI_API_URL": "http://localhost:11235"
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## 🎯 Core Capabilities
|
|
93
|
+
|
|
94
|
+
### 1. Web Crawling
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# Basic crawling
|
|
98
|
+
./.agent/scripts/crawl4ai-helper.sh crawl https://example.com markdown output.json
|
|
99
|
+
|
|
100
|
+
# With structured extraction
|
|
101
|
+
./.agent/scripts/crawl4ai-helper.sh extract https://example.com '{"title":"h1","content":".article"}' data.json
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### 2. LLM-Powered Extraction
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
import asyncio
|
|
108
|
+
from crawl4ai import AsyncWebCrawler, LLMExtractionStrategy, LLMConfig
|
|
109
|
+
|
|
110
|
+
async def extract_with_llm():
|
|
111
|
+
async with AsyncWebCrawler() as crawler:
|
|
112
|
+
result = await crawler.arun(
|
|
113
|
+
url="https://example.com",
|
|
114
|
+
extraction_strategy=LLMExtractionStrategy(
|
|
115
|
+
llm_config=LLMConfig(provider="openai/gpt-4o"),
|
|
116
|
+
instruction="Extract key information and summarize"
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
return result.extracted_content
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### 3. Advanced Browser Control
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
# Custom hooks for advanced control
|
|
126
|
+
async def setup_hook(page, context, **kwargs):
|
|
127
|
+
# Block images for faster crawling
|
|
128
|
+
await context.route("**/*.{png,jpg,gif}", lambda r: r.abort())
|
|
129
|
+
# Set custom viewport
|
|
130
|
+
await page.set_viewport_size({"width": 1920, "height": 1080})
|
|
131
|
+
return page
|
|
132
|
+
|
|
133
|
+
result = await crawler.arun(
|
|
134
|
+
url="https://example.com",
|
|
135
|
+
hooks={"on_page_context_created": setup_hook}
|
|
136
|
+
)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### 4. Adaptive Crawling
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from crawl4ai import AdaptiveCrawler, AdaptiveConfig
|
|
143
|
+
|
|
144
|
+
config = AdaptiveConfig(
|
|
145
|
+
confidence_threshold=0.7,
|
|
146
|
+
max_depth=5,
|
|
147
|
+
max_pages=20,
|
|
148
|
+
strategy="statistical"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
adaptive_crawler = AdaptiveCrawler(crawler, config)
|
|
152
|
+
state = await adaptive_crawler.digest(
|
|
153
|
+
start_url="https://news.example.com",
|
|
154
|
+
query="latest technology news"
|
|
155
|
+
)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## 🔧 Configuration
|
|
159
|
+
|
|
160
|
+
### Environment Variables
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
# LLM Provider Configuration
|
|
164
|
+
OPENAI_API_KEY=sk-your-key
|
|
165
|
+
ANTHROPIC_API_KEY=your-anthropic-key
|
|
166
|
+
LLM_PROVIDER=openai/gpt-4o-mini
|
|
167
|
+
LLM_TEMPERATURE=0.7
|
|
168
|
+
|
|
169
|
+
# Crawl4AI Settings
|
|
170
|
+
CRAWL4AI_MAX_PAGES=50
|
|
171
|
+
CRAWL4AI_TIMEOUT=60
|
|
172
|
+
CRAWL4AI_DEFAULT_FORMAT=markdown
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Browser Configuration
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
browser_config = BrowserConfig(
|
|
179
|
+
headless=True,
|
|
180
|
+
viewport={"width": 1920, "height": 1080},
|
|
181
|
+
user_agent="Mozilla/5.0 (compatible; Crawl4AI/0.7.7)",
|
|
182
|
+
timeout=30000,
|
|
183
|
+
extra_args=["--disable-blink-features=AutomationControlled"]
|
|
184
|
+
)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Crawler Configuration
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
crawler_config = CrawlerRunConfig(
|
|
191
|
+
cache_mode=CacheMode.ENABLED,
|
|
192
|
+
max_depth=3,
|
|
193
|
+
delay_between_requests=1.0,
|
|
194
|
+
respect_robots_txt=True,
|
|
195
|
+
follow_redirects=True,
|
|
196
|
+
extraction_strategy=JsonCssExtractionStrategy(schema=your_schema)
|
|
197
|
+
)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## 📊 Monitoring & Analytics
|
|
201
|
+
|
|
202
|
+
### Dashboard Features
|
|
203
|
+
|
|
204
|
+
- **Real-time Metrics**: System health, memory usage, request tracking
|
|
205
|
+
- **Browser Pool Management**: Active/hot/cold browser instances
|
|
206
|
+
- **Request Analytics**: Success rates, response times, error tracking
|
|
207
|
+
- **Resource Monitoring**: CPU, memory, network utilization
|
|
208
|
+
|
|
209
|
+
### API Endpoints
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
# Health check
|
|
213
|
+
curl http://localhost:11235/health
|
|
214
|
+
|
|
215
|
+
# Prometheus metrics
|
|
216
|
+
curl http://localhost:11235/metrics
|
|
217
|
+
|
|
218
|
+
# API schema
|
|
219
|
+
curl http://localhost:11235/schema
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## 🔄 Job Queue & Webhooks
|
|
223
|
+
|
|
224
|
+
### Asynchronous Processing
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
# Submit crawl job
|
|
228
|
+
response = requests.post("http://localhost:11235/crawl/job", json={
|
|
229
|
+
"urls": ["https://example.com"],
|
|
230
|
+
"webhook_config": {
|
|
231
|
+
"webhook_url": "https://your-app.com/webhook",
|
|
232
|
+
"webhook_data_in_payload": True
|
|
233
|
+
}
|
|
234
|
+
})
|
|
235
|
+
|
|
236
|
+
task_id = response.json()["task_id"]
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
### Webhook Notifications
|
|
240
|
+
|
|
241
|
+
```python
|
|
242
|
+
@app.route('/webhook', methods=['POST'])
|
|
243
|
+
def handle_webhook():
|
|
244
|
+
payload = request.json
|
|
245
|
+
if payload['status'] == 'completed':
|
|
246
|
+
process_results(payload['data'])
|
|
247
|
+
return "OK", 200
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## 🤖 AI Assistant Integration
|
|
251
|
+
|
|
252
|
+
### Claude Desktop Setup
|
|
253
|
+
|
|
254
|
+
Add to your Claude Desktop MCP configuration:
|
|
255
|
+
|
|
256
|
+
```json
|
|
257
|
+
{
|
|
258
|
+
"mcpServers": {
|
|
259
|
+
"crawl4ai": {
|
|
260
|
+
"command": "npx",
|
|
261
|
+
"args": ["crawl4ai-mcp-server@latest"]
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### Available MCP Tools
|
|
268
|
+
|
|
269
|
+
- `crawl_url`: Crawl single URL with format options
|
|
270
|
+
- `crawl_multiple`: Batch crawl multiple URLs
|
|
271
|
+
- `extract_structured`: Extract data using CSS or LLM
|
|
272
|
+
- `take_screenshot`: Capture webpage screenshots
|
|
273
|
+
- `generate_pdf`: Convert webpages to PDF
|
|
274
|
+
- `execute_javascript`: Run custom JavaScript on pages
|
|
275
|
+
|
|
276
|
+
## 🔒 Security & Best Practices
|
|
277
|
+
|
|
278
|
+
### Rate Limiting
|
|
279
|
+
|
|
280
|
+
```yaml
|
|
281
|
+
rate_limiting:
|
|
282
|
+
enabled: true
|
|
283
|
+
default_limit: "1000/minute"
|
|
284
|
+
trusted_proxies: []
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### Security Headers
|
|
288
|
+
|
|
289
|
+
```yaml
|
|
290
|
+
security:
|
|
291
|
+
headers:
|
|
292
|
+
x_content_type_options: "nosniff"
|
|
293
|
+
x_frame_options: "DENY"
|
|
294
|
+
content_security_policy: "default-src 'self'"
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Hook Security
|
|
298
|
+
|
|
299
|
+
- Never trust user-provided hook code
|
|
300
|
+
- Validate and sandbox hook execution
|
|
301
|
+
- Use timeouts to prevent infinite loops
|
|
302
|
+
- Audit hook code before deployment
|
|
303
|
+
|
|
304
|
+
## 📚 Use Cases
|
|
305
|
+
|
|
306
|
+
### 1. Content Aggregation
|
|
307
|
+
|
|
308
|
+
```python
|
|
309
|
+
# News aggregation
|
|
310
|
+
urls = ["https://news1.com", "https://news2.com", "https://news3.com"]
|
|
311
|
+
results = await crawler.arun_many(urls, extraction_strategy=news_schema)
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
### 2. E-commerce Data
|
|
315
|
+
|
|
316
|
+
```python
|
|
317
|
+
# Product information extraction
|
|
318
|
+
product_schema = {
|
|
319
|
+
"name": "h1.product-title",
|
|
320
|
+
"price": ".price",
|
|
321
|
+
"description": ".product-description",
|
|
322
|
+
"images": {"selector": "img.product-image", "type": "attribute", "attribute": "src"}
|
|
323
|
+
}
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### 3. Research & Analysis
|
|
327
|
+
|
|
328
|
+
```python
|
|
329
|
+
# Academic paper extraction
|
|
330
|
+
paper_extraction = LLMExtractionStrategy(
|
|
331
|
+
instruction="Extract title, authors, abstract, and key findings",
|
|
332
|
+
schema=paper_schema
|
|
333
|
+
)
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
### 4. SEO & Marketing
|
|
337
|
+
|
|
338
|
+
```python
|
|
339
|
+
# SEO data extraction
|
|
340
|
+
seo_schema = {
|
|
341
|
+
"title": "title",
|
|
342
|
+
"meta_description": "meta[name='description']",
|
|
343
|
+
"headings": "h1, h2, h3",
|
|
344
|
+
"links": {"selector": "a", "type": "attribute", "attribute": "href"}
|
|
345
|
+
}
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
## 🚀 Advanced Features
|
|
349
|
+
|
|
350
|
+
### Virtual Scroll Support
|
|
351
|
+
|
|
352
|
+
```python
|
|
353
|
+
scroll_config = VirtualScrollConfig(
|
|
354
|
+
container_selector="[data-testid='feed']",
|
|
355
|
+
scroll_count=20,
|
|
356
|
+
scroll_by="container_height",
|
|
357
|
+
wait_after_scroll=1.0
|
|
358
|
+
)
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
### Session Management
|
|
362
|
+
|
|
363
|
+
```python
|
|
364
|
+
# Persistent browser sessions
|
|
365
|
+
browser_config = BrowserConfig(
|
|
366
|
+
use_persistent_context=True,
|
|
367
|
+
user_data_dir="/path/to/profile"
|
|
368
|
+
)
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
### Proxy Support
|
|
372
|
+
|
|
373
|
+
```python
|
|
374
|
+
# Proxy configuration
|
|
375
|
+
browser_config = BrowserConfig(
|
|
376
|
+
proxy={
|
|
377
|
+
"server": "http://proxy.example.com:8080",
|
|
378
|
+
"username": "user",
|
|
379
|
+
"password": "pass"
|
|
380
|
+
}
|
|
381
|
+
)
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
## 🔧 Troubleshooting
|
|
385
|
+
|
|
386
|
+
### Common Issues
|
|
387
|
+
|
|
388
|
+
1. **Browser not starting**: Check Docker memory allocation (--shm-size=1g)
|
|
389
|
+
2. **API not responding**: Verify container is running and port is accessible
|
|
390
|
+
3. **Extraction failing**: Validate CSS selectors or LLM configuration
|
|
391
|
+
4. **Memory issues**: Adjust browser pool size and cleanup intervals
|
|
392
|
+
|
|
393
|
+
### Debug Commands
|
|
394
|
+
|
|
395
|
+
```bash
|
|
396
|
+
# Check service status
|
|
397
|
+
./.agent/scripts/crawl4ai-helper.sh status
|
|
398
|
+
|
|
399
|
+
# View container logs
|
|
400
|
+
docker logs crawl4ai
|
|
401
|
+
|
|
402
|
+
# Test API health
|
|
403
|
+
curl http://localhost:11235/health
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
## 📖 Resources
|
|
407
|
+
|
|
408
|
+
- **Official Documentation**: https://docs.crawl4ai.com/
|
|
409
|
+
- **GitHub Repository**: https://github.com/unclecode/crawl4ai
|
|
410
|
+
- **Framework Integration**: `.agent/scripts/crawl4ai-helper.sh`
|
|
411
|
+
- **Configuration Templates**: `configs/crawl4ai-config.json.txt`
|
|
412
|
+
- **MCP Configuration**: `configs/mcp-templates/crawl4ai-mcp-config.json`
|
|
413
|
+
|
|
414
|
+
## 🎯 Next Steps
|
|
415
|
+
|
|
416
|
+
1. **Install and Setup**: Run the helper script to get started
|
|
417
|
+
2. **Explore Dashboard**: Visit http://localhost:11235/dashboard
|
|
418
|
+
3. **Try Playground**: Test crawling at http://localhost:11235/playground
|
|
419
|
+
4. **Setup MCP**: Integrate with your AI assistant
|
|
420
|
+
5. **Build Applications**: Use the API for your specific use cases
|
|
421
|
+
|
|
422
|
+
Crawl4AI transforms web data into AI-ready formats, making it perfect for RAG systems, data pipelines, and AI-powered applications.
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Crawl4AI documentation and resource links
|
|
3
|
+
mode: subagent
|
|
4
|
+
tools:
|
|
5
|
+
read: true
|
|
6
|
+
write: false
|
|
7
|
+
edit: false
|
|
8
|
+
bash: false
|
|
9
|
+
glob: true
|
|
10
|
+
grep: true
|
|
11
|
+
webfetch: true
|
|
12
|
+
task: true
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
# Crawl4AI Resources & Links
|
|
16
|
+
|
|
17
|
+
<!-- AI-CONTEXT-START -->
|
|
18
|
+
|
|
19
|
+
## Quick Reference
|
|
20
|
+
|
|
21
|
+
- **Docs**: https://docs.crawl4ai.com/
|
|
22
|
+
- **GitHub**: https://github.com/unclecode/crawl4ai
|
|
23
|
+
- **Docker**: `unclecode/crawl4ai:latest`
|
|
24
|
+
- **PyPI**: https://pypi.org/project/crawl4ai/
|
|
25
|
+
- **MCP NPM**: `npx crawl4ai-mcp-server@latest`
|
|
26
|
+
- **Discord**: https://discord.gg/jP8KfhDhyN
|
|
27
|
+
- **CapSolver**: https://www.capsolver.com/ (CAPTCHA integration)
|
|
28
|
+
- **API Endpoints**: `/crawl`, `/crawl/job`, `/health`, `/metrics`, `/screenshot`, `/pdf`
|
|
29
|
+
- **Framework Files**: `.agent/scripts/crawl4ai-helper.sh`, `configs/crawl4ai-config.json.txt`
|
|
30
|
+
- **Current Version**: v0.7.7 (November 2024)
|
|
31
|
+
<!-- AI-CONTEXT-END -->
|
|
32
|
+
|
|
33
|
+
## Official Resources
|
|
34
|
+
|
|
35
|
+
### Primary Documentation
|
|
36
|
+
|
|
37
|
+
- **Official Documentation**: https://docs.crawl4ai.com/
|
|
38
|
+
- **GitHub Repository**: https://github.com/unclecode/crawl4ai
|
|
39
|
+
- **Docker Hub**: https://hub.docker.com/r/unclecode/crawl4ai
|
|
40
|
+
- **PyPI Package**: https://pypi.org/project/crawl4ai/
|
|
41
|
+
|
|
42
|
+
### Community & Support
|
|
43
|
+
|
|
44
|
+
- **Discord Community**: https://discord.gg/jP8KfhDhyN
|
|
45
|
+
- **GitHub Issues**: https://github.com/unclecode/crawl4ai/issues
|
|
46
|
+
- **GitHub Discussions**: https://github.com/unclecode/crawl4ai/discussions
|
|
47
|
+
- **Changelog**: https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md
|
|
48
|
+
|
|
49
|
+
### CapSolver Integration
|
|
50
|
+
|
|
51
|
+
- **CapSolver Homepage**: https://www.capsolver.com/
|
|
52
|
+
- **CapSolver Dashboard**: https://dashboard.capsolver.com/dashboard/overview
|
|
53
|
+
- **CapSolver Documentation**: https://docs.capsolver.com/
|
|
54
|
+
- **Crawl4AI Partnership**: https://www.capsolver.com/blog/Partners/crawl4ai-capsolver/
|
|
55
|
+
- **Chrome Extension**: https://chrome.google.com/webstore/detail/capsolver/pgojnojmmhpofjgdmaebadhbocahppod
|
|
56
|
+
|
|
57
|
+
## Documentation Sections
|
|
58
|
+
|
|
59
|
+
### Core Documentation
|
|
60
|
+
|
|
61
|
+
- **Quick Start**: https://docs.crawl4ai.com/quick-start/
|
|
62
|
+
- **Installation**: https://docs.crawl4ai.com/setup-installation/installation/
|
|
63
|
+
- **Docker Deployment**: https://docs.crawl4ai.com/setup-installation/docker-deployment/
|
|
64
|
+
- **API Reference**: https://docs.crawl4ai.com/api-reference/
|
|
65
|
+
|
|
66
|
+
### Advanced Features
|
|
67
|
+
|
|
68
|
+
- **Adaptive Crawling**: https://docs.crawl4ai.com/advanced/adaptive-strategies/
|
|
69
|
+
- **Virtual Scroll**: https://docs.crawl4ai.com/advanced/virtual-scroll/
|
|
70
|
+
- **Hooks & Authentication**: https://docs.crawl4ai.com/advanced/hooks-auth/
|
|
71
|
+
- **Session Management**: https://docs.crawl4ai.com/advanced/session-management/
|
|
72
|
+
|
|
73
|
+
### Extraction Strategies
|
|
74
|
+
|
|
75
|
+
- **LLM-Free Strategies**: https://docs.crawl4ai.com/extraction/llm-free-strategies/
|
|
76
|
+
- **LLM Strategies**: https://docs.crawl4ai.com/extraction/llm-strategies/
|
|
77
|
+
- **Clustering Strategies**: https://docs.crawl4ai.com/extraction/clustering-strategies/
|
|
78
|
+
- **Chunking**: https://docs.crawl4ai.com/extraction/chunking/
|
|
79
|
+
|
|
80
|
+
## Framework Integration
|
|
81
|
+
|
|
82
|
+
### Helper Scripts
|
|
83
|
+
|
|
84
|
+
- **Main Helper**: `.agent/scripts/crawl4ai-helper.sh`
|
|
85
|
+
- **Examples Script**: `.agent/scripts/crawl4ai-examples.sh`
|
|
86
|
+
- **Configuration Template**: `configs/crawl4ai-config.json.txt`
|
|
87
|
+
- **MCP Configuration**: `configs/mcp-templates/crawl4ai-mcp-config.json`
|
|
88
|
+
|
|
89
|
+
### Documentation Files
|
|
90
|
+
|
|
91
|
+
- **Main Guide**: `.agent/crawl4ai.md`
|
|
92
|
+
- **Integration Guide**: `.agent/wiki/crawl4ai-integration.md`
|
|
93
|
+
- **Usage Guide**: `.agent/spec/crawl4ai-usage.md`
|
|
94
|
+
- **Resources**: `.agent/links/crawl4ai-resources.md` (this file)
|
|
95
|
+
|
|
96
|
+
## MCP Integration
|
|
97
|
+
|
|
98
|
+
### MCP Server
|
|
99
|
+
|
|
100
|
+
- **NPM Package**: https://www.npmjs.com/package/crawl4ai-mcp-server
|
|
101
|
+
- **Installation**: `npx crawl4ai-mcp-server@latest`
|
|
102
|
+
- **Documentation**: https://docs.crawl4ai.com/core/docker-deployment/#mcp-model-context-protocol-support
|
|
103
|
+
|
|
104
|
+
### Claude Desktop Integration
|
|
105
|
+
|
|
106
|
+
```json
|
|
107
|
+
{
|
|
108
|
+
"mcpServers": {
|
|
109
|
+
"crawl4ai": {
|
|
110
|
+
"command": "npx",
|
|
111
|
+
"args": ["crawl4ai-mcp-server@latest"]
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Docker Resources
|
|
118
|
+
|
|
119
|
+
### Docker Images
|
|
120
|
+
|
|
121
|
+
- **Latest Release**: `unclecode/crawl4ai:latest`
|
|
122
|
+
- **Specific Version**: `unclecode/crawl4ai:0.7.7`
|
|
123
|
+
- **Multi-Architecture**: Supports AMD64 and ARM64
|
|
124
|
+
|
|
125
|
+
### Docker Compose
|
|
126
|
+
|
|
127
|
+
- **Example Compose**: https://github.com/unclecode/crawl4ai/blob/main/docker-compose.yml
|
|
128
|
+
- **Environment Variables**: https://docs.crawl4ai.com/core/docker-deployment/#environment-setup-api-keys
|
|
129
|
+
|
|
130
|
+
## Use Case Examples
|
|
131
|
+
|
|
132
|
+
### Content Research
|
|
133
|
+
|
|
134
|
+
- **News Aggregation**: Extract articles from multiple news sources
|
|
135
|
+
- **Academic Papers**: Extract titles, authors, abstracts, and citations
|
|
136
|
+
- **Documentation**: Process API docs and technical documentation
|
|
137
|
+
|
|
138
|
+
### E-commerce Data
|
|
139
|
+
|
|
140
|
+
- **Product Information**: Extract names, prices, descriptions, specifications
|
|
141
|
+
- **Inventory Tracking**: Monitor stock levels and price changes
|
|
142
|
+
- **Competitor Analysis**: Compare products across different sites
|
|
143
|
+
|
|
144
|
+
### SEO & Marketing
|
|
145
|
+
|
|
146
|
+
- **Content Analysis**: Extract headings, meta tags, and content structure
|
|
147
|
+
- **Link Analysis**: Discover internal and external link patterns
|
|
148
|
+
- **Performance Monitoring**: Track page changes and updates
|
|
149
|
+
|
|
150
|
+
## API Endpoints
|
|
151
|
+
|
|
152
|
+
### Core Endpoints
|
|
153
|
+
|
|
154
|
+
- **Crawl**: `POST /crawl` - Synchronous crawling
|
|
155
|
+
- **Crawl Job**: `POST /crawl/job` - Asynchronous crawling with webhooks
|
|
156
|
+
- **LLM Job**: `POST /llm/job` - LLM extraction with webhooks
|
|
157
|
+
- **Job Status**: `GET /job/{task_id}` - Check job status
|
|
158
|
+
|
|
159
|
+
### Utility Endpoints
|
|
160
|
+
|
|
161
|
+
- **Health**: `GET /health` - Service health check
|
|
162
|
+
- **Metrics**: `GET /metrics` - Prometheus metrics
|
|
163
|
+
- **Schema**: `GET /schema` - API schema documentation
|
|
164
|
+
- **Dashboard**: `GET /dashboard` - Monitoring dashboard
|
|
165
|
+
- **Playground**: `GET /playground` - Interactive testing interface
|
|
166
|
+
|
|
167
|
+
### Media Endpoints
|
|
168
|
+
|
|
169
|
+
- **Screenshot**: `POST /screenshot` - Capture page screenshots
|
|
170
|
+
- **PDF**: `POST /pdf` - Generate PDF from webpage
|
|
171
|
+
- **HTML**: `POST /html` - Extract raw HTML
|
|
172
|
+
- **JavaScript**: `POST /js` - Execute JavaScript on page
|
|
173
|
+
|
|
174
|
+
## Security Resources
|
|
175
|
+
|
|
176
|
+
### Best Practices
|
|
177
|
+
|
|
178
|
+
- **Rate Limiting**: Built-in protection against abuse
|
|
179
|
+
- **User Agent**: Clear identification as Crawl4AI
|
|
180
|
+
- **Robots.txt**: Respects robots.txt by default
|
|
181
|
+
- **Timeout Protection**: Prevents hanging requests
|
|
182
|
+
|
|
183
|
+
### Authentication
|
|
184
|
+
|
|
185
|
+
- **JWT Support**: Optional JWT authentication for API access
|
|
186
|
+
- **API Keys**: Secure API key management for LLM providers
|
|
187
|
+
- **Webhook Security**: Custom headers for webhook authentication
|
|
188
|
+
|
|
189
|
+
## Monitoring & Analytics
|
|
190
|
+
|
|
191
|
+
### Dashboard Features
|
|
192
|
+
|
|
193
|
+
- **System Metrics**: CPU, memory, network utilization
|
|
194
|
+
- **Request Analytics**: Success rates, response times, error tracking
|
|
195
|
+
- **Browser Pool**: Active/hot/cold browser instances management
|
|
196
|
+
- **Job Queue**: Real-time job processing status
|
|
197
|
+
|
|
198
|
+
### Metrics Integration
|
|
199
|
+
|
|
200
|
+
- **Prometheus**: Native Prometheus metrics export
|
|
201
|
+
- **Health Checks**: Comprehensive health monitoring
|
|
202
|
+
- **Performance Tracking**: Request timing and resource usage
|
|
203
|
+
|
|
204
|
+
## Performance Optimization
|
|
205
|
+
|
|
206
|
+
### Configuration Tips
|
|
207
|
+
|
|
208
|
+
- **Browser Pool Size**: Optimize based on available resources
|
|
209
|
+
- **Concurrent Requests**: Balance speed vs resource usage
|
|
210
|
+
- **Memory Management**: Configure cleanup intervals and thresholds
|
|
211
|
+
- **Caching**: Use appropriate cache modes for your use case
|
|
212
|
+
|
|
213
|
+
### Resource Management
|
|
214
|
+
|
|
215
|
+
- **Docker Memory**: Allocate sufficient shared memory (--shm-size=1g)
|
|
216
|
+
- **CPU Throttling**: Configure CPU limits for container
|
|
217
|
+
- **Network Optimization**: Use appropriate timeouts and retry policies
|
|
218
|
+
|
|
219
|
+
## Version Information
|
|
220
|
+
|
|
221
|
+
### Current Version
|
|
222
|
+
|
|
223
|
+
- **Latest Stable**: v0.7.7
|
|
224
|
+
- **Release Date**: November 2024
|
|
225
|
+
- **Breaking Changes**: Check CHANGELOG.md for migration notes
|
|
226
|
+
|
|
227
|
+
### Version History
|
|
228
|
+
|
|
229
|
+
- **v0.7.7**: Self-hosting platform with real-time monitoring
|
|
230
|
+
- **v0.7.6**: Complete webhook infrastructure for job queue API
|
|
231
|
+
- **v0.7.5**: Docker hooks system with function-based API
|
|
232
|
+
- **v0.7.4**: Intelligent table extraction & performance updates
|
|
233
|
+
|
|
234
|
+
## Learning Resources
|
|
235
|
+
|
|
236
|
+
### Tutorials & Guides
|
|
237
|
+
|
|
238
|
+
- **Video Tutorial**: Available on documentation homepage
|
|
239
|
+
- **Code Examples**: https://github.com/unclecode/crawl4ai/tree/main/.agent/examples
|
|
240
|
+
- **Blog Posts**: Check GitHub discussions for community tutorials
|
|
241
|
+
|
|
242
|
+
### Community Examples
|
|
243
|
+
|
|
244
|
+
- **GitHub Examples**: Real-world usage examples in repository
|
|
245
|
+
- **Discord Discussions**: Community-shared patterns and solutions
|
|
246
|
+
- **Stack Overflow**: Tagged questions and answers
|
|
247
|
+
|
|
248
|
+
## Contributing
|
|
249
|
+
|
|
250
|
+
### Development
|
|
251
|
+
|
|
252
|
+
- **Contributing Guide**: https://github.com/unclecode/crawl4ai/blob/main/CONTRIBUTING.md
|
|
253
|
+
- **Code of Conduct**: https://github.com/unclecode/crawl4ai/blob/main/CODE_OF_CONDUCT.md
|
|
254
|
+
- **Development Setup**: Local development instructions in README
|
|
255
|
+
|
|
256
|
+
### Sponsorship
|
|
257
|
+
|
|
258
|
+
- **GitHub Sponsors**: Support the project development
|
|
259
|
+
- **Enterprise Support**: Commercial support options available
|
|
260
|
+
- **Community Recognition**: Contributors acknowledged in project
|
|
261
|
+
|
|
262
|
+
## Support Channels
|
|
263
|
+
|
|
264
|
+
### Technical Support
|
|
265
|
+
|
|
266
|
+
1. **GitHub Issues**: Bug reports and feature requests
|
|
267
|
+
2. **Discord Community**: Real-time community support
|
|
268
|
+
3. **Documentation**: Comprehensive guides and API reference
|
|
269
|
+
4. **Stack Overflow**: Tag questions with `crawl4ai`
|
|
270
|
+
|
|
271
|
+
### Enterprise Support
|
|
272
|
+
|
|
273
|
+
- **Commercial Licensing**: Available for enterprise use
|
|
274
|
+
- **Priority Support**: Dedicated support channels
|
|
275
|
+
- **Custom Development**: Tailored solutions and integrations
|
|
276
|
+
|
|
277
|
+
This resource collection provides comprehensive access to all Crawl4AI documentation, tools, and community resources for effective integration within the AI DevOps Framework.
|