aidevops 2.52.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/AGENTS.md +614 -0
- package/.agent/accounts.md +65 -0
- package/.agent/aidevops/add-new-mcp-to-aidevops.md +456 -0
- package/.agent/aidevops/api-integrations.md +335 -0
- package/.agent/aidevops/architecture.md +510 -0
- package/.agent/aidevops/configs.md +274 -0
- package/.agent/aidevops/docs.md +244 -0
- package/.agent/aidevops/extension.md +311 -0
- package/.agent/aidevops/mcp-integrations.md +340 -0
- package/.agent/aidevops/mcp-troubleshooting.md +162 -0
- package/.agent/aidevops/memory-patterns.md +172 -0
- package/.agent/aidevops/providers.md +217 -0
- package/.agent/aidevops/recommendations.md +321 -0
- package/.agent/aidevops/requirements.md +301 -0
- package/.agent/aidevops/resources.md +214 -0
- package/.agent/aidevops/security-requirements.md +174 -0
- package/.agent/aidevops/security.md +350 -0
- package/.agent/aidevops/service-links.md +400 -0
- package/.agent/aidevops/services.md +357 -0
- package/.agent/aidevops/setup.md +153 -0
- package/.agent/aidevops/troubleshooting.md +389 -0
- package/.agent/aidevops.md +124 -0
- package/.agent/build-plus.md +244 -0
- package/.agent/content/guidelines.md +109 -0
- package/.agent/content.md +87 -0
- package/.agent/health.md +59 -0
- package/.agent/legal.md +59 -0
- package/.agent/loop-state/full-loop.local.md +16 -0
- package/.agent/loop-state/ralph-loop.local.md +10 -0
- package/.agent/marketing.md +440 -0
- package/.agent/memory/README.md +260 -0
- package/.agent/onboarding.md +796 -0
- package/.agent/plan-plus.md +245 -0
- package/.agent/research.md +100 -0
- package/.agent/sales.md +333 -0
- package/.agent/scripts/101domains-helper.sh +701 -0
- package/.agent/scripts/add-missing-returns.sh +140 -0
- package/.agent/scripts/agent-browser-helper.sh +311 -0
- package/.agent/scripts/agno-setup.sh +712 -0
- package/.agent/scripts/ahrefs-mcp-wrapper.js +168 -0
- package/.agent/scripts/aidevops-update-check.sh +71 -0
- package/.agent/scripts/ampcode-cli.sh +522 -0
- package/.agent/scripts/auto-version-bump.sh +156 -0
- package/.agent/scripts/autogen-helper.sh +512 -0
- package/.agent/scripts/beads-sync-helper.sh +596 -0
- package/.agent/scripts/closte-helper.sh +5 -0
- package/.agent/scripts/cloudron-helper.sh +321 -0
- package/.agent/scripts/codacy-cli-chunked.sh +581 -0
- package/.agent/scripts/codacy-cli.sh +442 -0
- package/.agent/scripts/code-audit-helper.sh +5 -0
- package/.agent/scripts/coderabbit-cli.sh +417 -0
- package/.agent/scripts/coderabbit-pro-analysis.sh +238 -0
- package/.agent/scripts/commands/code-simplifier.md +86 -0
- package/.agent/scripts/commands/full-loop.md +246 -0
- package/.agent/scripts/commands/postflight-loop.md +103 -0
- package/.agent/scripts/commands/recall.md +182 -0
- package/.agent/scripts/commands/remember.md +132 -0
- package/.agent/scripts/commands/save-todo.md +175 -0
- package/.agent/scripts/commands/session-review.md +154 -0
- package/.agent/scripts/comprehensive-quality-fix.sh +106 -0
- package/.agent/scripts/context-builder-helper.sh +522 -0
- package/.agent/scripts/coolify-cli-helper.sh +674 -0
- package/.agent/scripts/coolify-helper.sh +380 -0
- package/.agent/scripts/crawl4ai-examples.sh +401 -0
- package/.agent/scripts/crawl4ai-helper.sh +1078 -0
- package/.agent/scripts/crewai-helper.sh +681 -0
- package/.agent/scripts/dev-browser-helper.sh +513 -0
- package/.agent/scripts/dns-helper.sh +396 -0
- package/.agent/scripts/domain-research-helper.sh +917 -0
- package/.agent/scripts/dspy-helper.sh +285 -0
- package/.agent/scripts/dspyground-helper.sh +291 -0
- package/.agent/scripts/eeat-score-helper.sh +1242 -0
- package/.agent/scripts/efficient-return-fix.sh +92 -0
- package/.agent/scripts/extract-opencode-prompts.sh +128 -0
- package/.agent/scripts/find-missing-returns.sh +113 -0
- package/.agent/scripts/fix-auth-headers.sh +104 -0
- package/.agent/scripts/fix-common-strings.sh +254 -0
- package/.agent/scripts/fix-content-type.sh +100 -0
- package/.agent/scripts/fix-error-messages.sh +130 -0
- package/.agent/scripts/fix-misplaced-returns.sh +74 -0
- package/.agent/scripts/fix-remaining-literals.sh +152 -0
- package/.agent/scripts/fix-return-statements.sh +41 -0
- package/.agent/scripts/fix-s131-default-cases.sh +249 -0
- package/.agent/scripts/fix-sc2155-simple.sh +102 -0
- package/.agent/scripts/fix-shellcheck-critical.sh +187 -0
- package/.agent/scripts/fix-string-literals.sh +273 -0
- package/.agent/scripts/full-loop-helper.sh +773 -0
- package/.agent/scripts/generate-opencode-agents.sh +497 -0
- package/.agent/scripts/generate-opencode-commands.sh +1629 -0
- package/.agent/scripts/generate-skills.sh +366 -0
- package/.agent/scripts/git-platforms-helper.sh +640 -0
- package/.agent/scripts/gitea-cli-helper.sh +743 -0
- package/.agent/scripts/github-cli-helper.sh +702 -0
- package/.agent/scripts/gitlab-cli-helper.sh +682 -0
- package/.agent/scripts/gsc-add-user-helper.sh +325 -0
- package/.agent/scripts/gsc-sitemap-helper.sh +678 -0
- package/.agent/scripts/hetzner-helper.sh +485 -0
- package/.agent/scripts/hostinger-helper.sh +229 -0
- package/.agent/scripts/keyword-research-helper.sh +1815 -0
- package/.agent/scripts/langflow-helper.sh +544 -0
- package/.agent/scripts/linkedin-automation.py +241 -0
- package/.agent/scripts/linter-manager.sh +599 -0
- package/.agent/scripts/linters-local.sh +434 -0
- package/.agent/scripts/list-keys-helper.sh +488 -0
- package/.agent/scripts/local-browser-automation.py +339 -0
- package/.agent/scripts/localhost-helper.sh +744 -0
- package/.agent/scripts/loop-common.sh +806 -0
- package/.agent/scripts/mainwp-helper.sh +728 -0
- package/.agent/scripts/markdown-formatter.sh +338 -0
- package/.agent/scripts/markdown-lint-fix.sh +311 -0
- package/.agent/scripts/mass-fix-returns.sh +58 -0
- package/.agent/scripts/mcp-diagnose.sh +167 -0
- package/.agent/scripts/mcp-inspector-helper.sh +449 -0
- package/.agent/scripts/memory-helper.sh +650 -0
- package/.agent/scripts/monitor-code-review.sh +255 -0
- package/.agent/scripts/onboarding-helper.sh +706 -0
- package/.agent/scripts/opencode-github-setup-helper.sh +797 -0
- package/.agent/scripts/opencode-test-helper.sh +213 -0
- package/.agent/scripts/pagespeed-helper.sh +464 -0
- package/.agent/scripts/pandoc-helper.sh +362 -0
- package/.agent/scripts/postflight-check.sh +555 -0
- package/.agent/scripts/pre-commit-hook.sh +259 -0
- package/.agent/scripts/pre-edit-check.sh +169 -0
- package/.agent/scripts/qlty-cli.sh +356 -0
- package/.agent/scripts/quality-cli-manager.sh +525 -0
- package/.agent/scripts/quality-feedback-helper.sh +462 -0
- package/.agent/scripts/quality-fix.sh +263 -0
- package/.agent/scripts/quality-loop-helper.sh +1108 -0
- package/.agent/scripts/ralph-loop-helper.sh +836 -0
- package/.agent/scripts/ralph-upstream-check.sh +341 -0
- package/.agent/scripts/secretlint-helper.sh +847 -0
- package/.agent/scripts/servers-helper.sh +241 -0
- package/.agent/scripts/ses-helper.sh +619 -0
- package/.agent/scripts/session-review-helper.sh +404 -0
- package/.agent/scripts/setup-linters-wizard.sh +379 -0
- package/.agent/scripts/setup-local-api-keys.sh +330 -0
- package/.agent/scripts/setup-mcp-integrations.sh +472 -0
- package/.agent/scripts/shared-constants.sh +246 -0
- package/.agent/scripts/site-crawler-helper.sh +1487 -0
- package/.agent/scripts/snyk-helper.sh +940 -0
- package/.agent/scripts/sonarcloud-autofix.sh +193 -0
- package/.agent/scripts/sonarcloud-cli.sh +191 -0
- package/.agent/scripts/sonarscanner-cli.sh +455 -0
- package/.agent/scripts/spaceship-helper.sh +747 -0
- package/.agent/scripts/stagehand-helper.sh +321 -0
- package/.agent/scripts/stagehand-python-helper.sh +321 -0
- package/.agent/scripts/stagehand-python-setup.sh +441 -0
- package/.agent/scripts/stagehand-setup.sh +439 -0
- package/.agent/scripts/system-cleanup.sh +340 -0
- package/.agent/scripts/terminal-title-helper.sh +388 -0
- package/.agent/scripts/terminal-title-setup.sh +549 -0
- package/.agent/scripts/test-stagehand-both-integration.sh +317 -0
- package/.agent/scripts/test-stagehand-integration.sh +309 -0
- package/.agent/scripts/test-stagehand-python-integration.sh +341 -0
- package/.agent/scripts/todo-ready.sh +263 -0
- package/.agent/scripts/tool-version-check.sh +362 -0
- package/.agent/scripts/toon-helper.sh +469 -0
- package/.agent/scripts/twilio-helper.sh +917 -0
- package/.agent/scripts/updown-helper.sh +279 -0
- package/.agent/scripts/validate-mcp-integrations.sh +250 -0
- package/.agent/scripts/validate-version-consistency.sh +131 -0
- package/.agent/scripts/vaultwarden-helper.sh +597 -0
- package/.agent/scripts/vercel-cli-helper.sh +816 -0
- package/.agent/scripts/verify-mirrors.sh +169 -0
- package/.agent/scripts/version-manager.sh +831 -0
- package/.agent/scripts/webhosting-helper.sh +471 -0
- package/.agent/scripts/webhosting-verify.sh +238 -0
- package/.agent/scripts/wordpress-mcp-helper.sh +508 -0
- package/.agent/scripts/worktree-helper.sh +595 -0
- package/.agent/scripts/worktree-sessions.sh +577 -0
- package/.agent/seo/dataforseo.md +215 -0
- package/.agent/seo/domain-research.md +532 -0
- package/.agent/seo/eeat-score.md +659 -0
- package/.agent/seo/google-search-console.md +366 -0
- package/.agent/seo/gsc-sitemaps.md +282 -0
- package/.agent/seo/keyword-research.md +521 -0
- package/.agent/seo/serper.md +278 -0
- package/.agent/seo/site-crawler.md +387 -0
- package/.agent/seo.md +236 -0
- package/.agent/services/accounting/quickfile.md +159 -0
- package/.agent/services/communications/telfon.md +470 -0
- package/.agent/services/communications/twilio.md +569 -0
- package/.agent/services/crm/fluentcrm.md +449 -0
- package/.agent/services/email/ses.md +399 -0
- package/.agent/services/hosting/101domains.md +378 -0
- package/.agent/services/hosting/closte.md +177 -0
- package/.agent/services/hosting/cloudflare.md +251 -0
- package/.agent/services/hosting/cloudron.md +478 -0
- package/.agent/services/hosting/dns-providers.md +335 -0
- package/.agent/services/hosting/domain-purchasing.md +344 -0
- package/.agent/services/hosting/hetzner.md +327 -0
- package/.agent/services/hosting/hostinger.md +287 -0
- package/.agent/services/hosting/localhost.md +419 -0
- package/.agent/services/hosting/spaceship.md +353 -0
- package/.agent/services/hosting/webhosting.md +330 -0
- package/.agent/social-media.md +69 -0
- package/.agent/templates/plans-template.md +114 -0
- package/.agent/templates/prd-template.md +129 -0
- package/.agent/templates/tasks-template.md +108 -0
- package/.agent/templates/todo-template.md +89 -0
- package/.agent/tools/ai-assistants/agno.md +471 -0
- package/.agent/tools/ai-assistants/capsolver.md +326 -0
- package/.agent/tools/ai-assistants/configuration.md +221 -0
- package/.agent/tools/ai-assistants/overview.md +209 -0
- package/.agent/tools/ai-assistants/status.md +171 -0
- package/.agent/tools/ai-assistants/windsurf.md +193 -0
- package/.agent/tools/ai-orchestration/autogen.md +406 -0
- package/.agent/tools/ai-orchestration/crewai.md +445 -0
- package/.agent/tools/ai-orchestration/langflow.md +405 -0
- package/.agent/tools/ai-orchestration/openprose.md +487 -0
- package/.agent/tools/ai-orchestration/overview.md +362 -0
- package/.agent/tools/ai-orchestration/packaging.md +647 -0
- package/.agent/tools/browser/agent-browser.md +464 -0
- package/.agent/tools/browser/browser-automation.md +400 -0
- package/.agent/tools/browser/chrome-devtools.md +282 -0
- package/.agent/tools/browser/crawl4ai-integration.md +422 -0
- package/.agent/tools/browser/crawl4ai-resources.md +277 -0
- package/.agent/tools/browser/crawl4ai-usage.md +416 -0
- package/.agent/tools/browser/crawl4ai.md +585 -0
- package/.agent/tools/browser/dev-browser.md +341 -0
- package/.agent/tools/browser/pagespeed.md +260 -0
- package/.agent/tools/browser/playwright.md +266 -0
- package/.agent/tools/browser/playwriter.md +310 -0
- package/.agent/tools/browser/stagehand-examples.md +456 -0
- package/.agent/tools/browser/stagehand-python.md +483 -0
- package/.agent/tools/browser/stagehand.md +421 -0
- package/.agent/tools/build-agent/agent-review.md +224 -0
- package/.agent/tools/build-agent/build-agent.md +784 -0
- package/.agent/tools/build-mcp/aidevops-plugin.md +476 -0
- package/.agent/tools/build-mcp/api-wrapper.md +445 -0
- package/.agent/tools/build-mcp/build-mcp.md +240 -0
- package/.agent/tools/build-mcp/deployment.md +401 -0
- package/.agent/tools/build-mcp/server-patterns.md +632 -0
- package/.agent/tools/build-mcp/transports.md +366 -0
- package/.agent/tools/code-review/auditing.md +383 -0
- package/.agent/tools/code-review/automation.md +219 -0
- package/.agent/tools/code-review/best-practices.md +203 -0
- package/.agent/tools/code-review/codacy.md +151 -0
- package/.agent/tools/code-review/code-simplifier.md +174 -0
- package/.agent/tools/code-review/code-standards.md +309 -0
- package/.agent/tools/code-review/coderabbit.md +101 -0
- package/.agent/tools/code-review/management.md +155 -0
- package/.agent/tools/code-review/qlty.md +248 -0
- package/.agent/tools/code-review/secretlint.md +565 -0
- package/.agent/tools/code-review/setup.md +250 -0
- package/.agent/tools/code-review/snyk.md +563 -0
- package/.agent/tools/code-review/tools.md +230 -0
- package/.agent/tools/content/summarize.md +353 -0
- package/.agent/tools/context/augment-context-engine.md +468 -0
- package/.agent/tools/context/context-builder-agent.md +76 -0
- package/.agent/tools/context/context-builder.md +375 -0
- package/.agent/tools/context/context7.md +371 -0
- package/.agent/tools/context/dspy.md +302 -0
- package/.agent/tools/context/dspyground.md +374 -0
- package/.agent/tools/context/llm-tldr.md +219 -0
- package/.agent/tools/context/osgrep.md +488 -0
- package/.agent/tools/context/prompt-optimization.md +338 -0
- package/.agent/tools/context/toon.md +292 -0
- package/.agent/tools/conversion/pandoc.md +304 -0
- package/.agent/tools/credentials/api-key-management.md +154 -0
- package/.agent/tools/credentials/api-key-setup.md +224 -0
- package/.agent/tools/credentials/environment-variables.md +180 -0
- package/.agent/tools/credentials/vaultwarden.md +382 -0
- package/.agent/tools/data-extraction/outscraper.md +974 -0
- package/.agent/tools/deployment/coolify-cli.md +388 -0
- package/.agent/tools/deployment/coolify-setup.md +353 -0
- package/.agent/tools/deployment/coolify.md +345 -0
- package/.agent/tools/deployment/vercel.md +390 -0
- package/.agent/tools/git/authentication.md +132 -0
- package/.agent/tools/git/gitea-cli.md +193 -0
- package/.agent/tools/git/github-actions.md +207 -0
- package/.agent/tools/git/github-cli.md +223 -0
- package/.agent/tools/git/gitlab-cli.md +190 -0
- package/.agent/tools/git/opencode-github-security.md +350 -0
- package/.agent/tools/git/opencode-github.md +328 -0
- package/.agent/tools/git/opencode-gitlab.md +252 -0
- package/.agent/tools/git/security.md +196 -0
- package/.agent/tools/git.md +207 -0
- package/.agent/tools/opencode/oh-my-opencode.md +375 -0
- package/.agent/tools/opencode/opencode-anthropic-auth.md +446 -0
- package/.agent/tools/opencode/opencode.md +651 -0
- package/.agent/tools/social-media/bird.md +437 -0
- package/.agent/tools/task-management/beads.md +336 -0
- package/.agent/tools/terminal/terminal-title.md +251 -0
- package/.agent/tools/ui/shadcn.md +196 -0
- package/.agent/tools/ui/ui-skills.md +115 -0
- package/.agent/tools/wordpress/localwp.md +311 -0
- package/.agent/tools/wordpress/mainwp.md +391 -0
- package/.agent/tools/wordpress/scf.md +527 -0
- package/.agent/tools/wordpress/wp-admin.md +729 -0
- package/.agent/tools/wordpress/wp-dev.md +940 -0
- package/.agent/tools/wordpress/wp-preferred.md +398 -0
- package/.agent/tools/wordpress.md +95 -0
- package/.agent/workflows/branch/bugfix.md +63 -0
- package/.agent/workflows/branch/chore.md +95 -0
- package/.agent/workflows/branch/experiment.md +115 -0
- package/.agent/workflows/branch/feature.md +59 -0
- package/.agent/workflows/branch/hotfix.md +98 -0
- package/.agent/workflows/branch/refactor.md +92 -0
- package/.agent/workflows/branch/release.md +96 -0
- package/.agent/workflows/branch.md +347 -0
- package/.agent/workflows/bug-fixing.md +267 -0
- package/.agent/workflows/changelog.md +129 -0
- package/.agent/workflows/code-audit-remote.md +279 -0
- package/.agent/workflows/conversation-starter.md +69 -0
- package/.agent/workflows/error-feedback.md +578 -0
- package/.agent/workflows/feature-development.md +355 -0
- package/.agent/workflows/git-workflow.md +702 -0
- package/.agent/workflows/multi-repo-workspace.md +268 -0
- package/.agent/workflows/plans.md +709 -0
- package/.agent/workflows/postflight.md +604 -0
- package/.agent/workflows/pr.md +571 -0
- package/.agent/workflows/preflight.md +278 -0
- package/.agent/workflows/ralph-loop.md +773 -0
- package/.agent/workflows/release.md +498 -0
- package/.agent/workflows/session-manager.md +254 -0
- package/.agent/workflows/session-review.md +311 -0
- package/.agent/workflows/sql-migrations.md +631 -0
- package/.agent/workflows/version-bump.md +283 -0
- package/.agent/workflows/wiki-update.md +333 -0
- package/.agent/workflows/worktree.md +477 -0
- package/LICENSE +21 -0
- package/README.md +1446 -0
- package/VERSION +1 -0
- package/aidevops.sh +1746 -0
- package/bin/aidevops +21 -0
- package/package.json +75 -0
- package/scripts/npm-postinstall.js +60 -0
- package/setup.sh +2366 -0
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Crawl4AI usage patterns and best practices
|
|
3
|
+
mode: subagent
|
|
4
|
+
tools:
|
|
5
|
+
read: true
|
|
6
|
+
write: false
|
|
7
|
+
edit: false
|
|
8
|
+
bash: true
|
|
9
|
+
glob: true
|
|
10
|
+
grep: true
|
|
11
|
+
webfetch: true
|
|
12
|
+
task: true
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
# Crawl4AI Usage Guide for AI Assistants
|
|
16
|
+
|
|
17
|
+
<!-- AI-CONTEXT-START -->
|
|
18
|
+
|
|
19
|
+
## Quick Reference
|
|
20
|
+
|
|
21
|
+
- **Helper**: `.agent/scripts/crawl4ai-helper.sh`
|
|
22
|
+
- **API Port**: `localhost:11235`
|
|
23
|
+
- **Commands**: `install | docker-setup | docker-start | status | crawl | extract | mcp-setup`
|
|
24
|
+
- **Crawl**: `./crawl4ai-helper.sh crawl URL markdown output.json`
|
|
25
|
+
- **Extract**: `./crawl4ai-helper.sh extract URL '{"title":"h1"}' data.json`
|
|
26
|
+
- **MCP Tools**: `crawl_url | crawl_multiple | extract_structured | take_screenshot | generate_pdf`
|
|
27
|
+
- **Dashboard**: `http://localhost:11235/dashboard`
|
|
28
|
+
- **Playground**: `http://localhost:11235/playground`
|
|
29
|
+
- **Output**: JSON with markdown, html, extracted_content, links, media, metadata
|
|
30
|
+
- **Process results**: `jq -r '.results[0].markdown' output.json`
|
|
31
|
+
<!-- AI-CONTEXT-END -->
|
|
32
|
+
|
|
33
|
+
## Purpose
|
|
34
|
+
|
|
35
|
+
This guide provides AI assistants with comprehensive instructions for using Crawl4AI within the AI DevOps Framework for web crawling, data extraction, and content processing tasks.
|
|
36
|
+
|
|
37
|
+
## Quick Start Commands
|
|
38
|
+
|
|
39
|
+
### Basic Setup
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Install Crawl4AI
|
|
43
|
+
./.agent/scripts/crawl4ai-helper.sh install
|
|
44
|
+
|
|
45
|
+
# Setup Docker deployment
|
|
46
|
+
./.agent/scripts/crawl4ai-helper.sh docker-setup
|
|
47
|
+
|
|
48
|
+
# Start services
|
|
49
|
+
./.agent/scripts/crawl4ai-helper.sh docker-start
|
|
50
|
+
|
|
51
|
+
# Check status
|
|
52
|
+
./.agent/scripts/crawl4ai-helper.sh status
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### MCP Integration
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
# Setup MCP server for AI assistants
|
|
59
|
+
./.agent/scripts/crawl4ai-helper.sh mcp-setup
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Core Operations
|
|
63
|
+
|
|
64
|
+
### 1. Web Crawling
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Basic crawling - extract markdown
|
|
68
|
+
./.agent/scripts/crawl4ai-helper.sh crawl https://example.com markdown output.json
|
|
69
|
+
|
|
70
|
+
# Crawl with specific format
|
|
71
|
+
./.agent/scripts/crawl4ai-helper.sh crawl https://news.com html news.json
|
|
72
|
+
|
|
73
|
+
# Save to file
|
|
74
|
+
./.agent/scripts/crawl4ai-helper.sh crawl https://docs.com markdown ~/Downloads/docs.json
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 2. Structured Data Extraction
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# Extract with CSS selectors
|
|
81
|
+
./.agent/scripts/crawl4ai-helper.sh extract https://example.com '{"title":"h1","content":".article"}' data.json
|
|
82
|
+
|
|
83
|
+
# Complex schema extraction
|
|
84
|
+
./.agent/scripts/crawl4ai-helper.sh extract https://ecommerce.com '{
|
|
85
|
+
"products": {
|
|
86
|
+
"selector": ".product",
|
|
87
|
+
"fields": [
|
|
88
|
+
{"name": "title", "selector": "h2", "type": "text"},
|
|
89
|
+
{"name": "price", "selector": ".price", "type": "text"},
|
|
90
|
+
{"name": "image", "selector": "img", "type": "attribute", "attribute": "src"}
|
|
91
|
+
]
|
|
92
|
+
}
|
|
93
|
+
}' products.json
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## AI Assistant Integration Patterns
|
|
97
|
+
|
|
98
|
+
### For Claude Desktop
|
|
99
|
+
|
|
100
|
+
1. **Setup MCP Configuration**:
|
|
101
|
+
|
|
102
|
+
```json
|
|
103
|
+
{
|
|
104
|
+
"mcpServers": {
|
|
105
|
+
"crawl4ai": {
|
|
106
|
+
"command": "npx",
|
|
107
|
+
"args": ["crawl4ai-mcp-server@latest"]
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
2. **Available Tools**:
|
|
114
|
+
- `crawl_url`: Single URL crawling
|
|
115
|
+
- `crawl_multiple`: Batch URL processing
|
|
116
|
+
- `extract_structured`: Data extraction
|
|
117
|
+
- `take_screenshot`: Page screenshots
|
|
118
|
+
- `generate_pdf`: PDF conversion
|
|
119
|
+
|
|
120
|
+
### For Other AI Assistants
|
|
121
|
+
|
|
122
|
+
Use the REST API directly:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
import requests
|
|
126
|
+
|
|
127
|
+
# Basic crawl
|
|
128
|
+
response = requests.post("http://localhost:11235/crawl", json={
|
|
129
|
+
"urls": ["https://example.com"],
|
|
130
|
+
"crawler_config": {
|
|
131
|
+
"type": "CrawlerRunConfig",
|
|
132
|
+
"params": {"cache_mode": "bypass"}
|
|
133
|
+
}
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
# Extract structured data
|
|
137
|
+
response = requests.post("http://localhost:11235/crawl", json={
|
|
138
|
+
"urls": ["https://example.com"],
|
|
139
|
+
"crawler_config": {
|
|
140
|
+
"type": "CrawlerRunConfig",
|
|
141
|
+
"params": {
|
|
142
|
+
"extraction_strategy": {
|
|
143
|
+
"type": "JsonCssExtractionStrategy",
|
|
144
|
+
"params": {
|
|
145
|
+
"schema": {
|
|
146
|
+
"type": "dict",
|
|
147
|
+
"value": {"title": "h1", "content": ".article"}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
})
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Common Use Cases
|
|
157
|
+
|
|
158
|
+
### 1. Content Research
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
# Research articles
|
|
162
|
+
./.agent/scripts/crawl4ai-helper.sh crawl https://research-site.com markdown research.json
|
|
163
|
+
|
|
164
|
+
# Extract key information
|
|
165
|
+
./.agent/scripts/crawl4ai-helper.sh extract https://paper.com '{
|
|
166
|
+
"title": "h1",
|
|
167
|
+
"authors": ".authors",
|
|
168
|
+
"abstract": ".abstract",
|
|
169
|
+
"keywords": ".keywords"
|
|
170
|
+
}' paper-data.json
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### 2. News Aggregation
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
# Multiple news sources
|
|
177
|
+
for url in "https://news1.com" "https://news2.com" "https://news3.com"; do
|
|
178
|
+
./.agent/scripts/crawl4ai-helper.sh crawl "$url" markdown "news-$(basename $url).json"
|
|
179
|
+
done
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### 3. E-commerce Data
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
# Product information
|
|
186
|
+
./.agent/scripts/crawl4ai-helper.sh extract https://shop.com/product '{
|
|
187
|
+
"name": "h1.product-title",
|
|
188
|
+
"price": ".price-current",
|
|
189
|
+
"description": ".product-description",
|
|
190
|
+
"specs": {
|
|
191
|
+
"selector": ".specs tr",
|
|
192
|
+
"fields": [
|
|
193
|
+
{"name": "feature", "selector": "td:first-child", "type": "text"},
|
|
194
|
+
{"name": "value", "selector": "td:last-child", "type": "text"}
|
|
195
|
+
]
|
|
196
|
+
}
|
|
197
|
+
}' product.json
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### 4. Documentation Processing
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
# API documentation
|
|
204
|
+
./.agent/scripts/crawl4ai-helper.sh extract https://api-docs.com '{
|
|
205
|
+
"endpoints": {
|
|
206
|
+
"selector": ".endpoint",
|
|
207
|
+
"fields": [
|
|
208
|
+
{"name": "method", "selector": ".method", "type": "text"},
|
|
209
|
+
{"name": "path", "selector": ".path", "type": "text"},
|
|
210
|
+
{"name": "description", "selector": ".description", "type": "text"},
|
|
211
|
+
{"name": "parameters", "selector": ".params", "type": "html"}
|
|
212
|
+
]
|
|
213
|
+
}
|
|
214
|
+
}' api-docs.json
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## Advanced Workflows
|
|
218
|
+
|
|
219
|
+
### Batch Processing
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
#!/bin/bash
|
|
223
|
+
# Process multiple URLs with different strategies
|
|
224
|
+
|
|
225
|
+
urls=(
|
|
226
|
+
"https://news.com"
|
|
227
|
+
"https://blog.com"
|
|
228
|
+
"https://docs.com"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
for url in "${urls[@]}"; do
|
|
232
|
+
echo "Processing: $url"
|
|
233
|
+
./.agent/scripts/crawl4ai-helper.sh crawl "$url" markdown "output-$(date +%s).json"
|
|
234
|
+
sleep 2 # Rate limiting
|
|
235
|
+
done
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### Content Analysis Pipeline
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
#!/bin/bash
|
|
242
|
+
# Complete content analysis workflow
|
|
243
|
+
|
|
244
|
+
URL="https://example.com"
|
|
245
|
+
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|
246
|
+
|
|
247
|
+
# 1. Basic crawl
|
|
248
|
+
./.agent/scripts/crawl4ai-helper.sh crawl "$URL" markdown "raw-$TIMESTAMP.json"
|
|
249
|
+
|
|
250
|
+
# 2. Extract structured data
|
|
251
|
+
./.agent/scripts/crawl4ai-helper.sh extract "$URL" '{
|
|
252
|
+
"title": "h1",
|
|
253
|
+
"headings": "h2, h3",
|
|
254
|
+
"links": {"selector": "a", "type": "attribute", "attribute": "href"},
|
|
255
|
+
"images": {"selector": "img", "type": "attribute", "attribute": "src"}
|
|
256
|
+
}' "structured-$TIMESTAMP.json"
|
|
257
|
+
|
|
258
|
+
echo "Analysis complete: raw-$TIMESTAMP.json and structured-$TIMESTAMP.json"
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
## Configuration Best Practices
|
|
262
|
+
|
|
263
|
+
### Environment Setup
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
# Create dedicated environment file
|
|
267
|
+
cat > ~/.aidevops/.agent-workspace/tmp/crawl4ai.env << EOF
|
|
268
|
+
OPENAI_API_KEY=your-key-here
|
|
269
|
+
LLM_PROVIDER=openai/gpt-4o-mini
|
|
270
|
+
LLM_TEMPERATURE=0.7
|
|
271
|
+
CRAWL4AI_MAX_PAGES=50
|
|
272
|
+
CRAWL4AI_TIMEOUT=60
|
|
273
|
+
EOF
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Performance Optimization
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
# For high-volume crawling
|
|
280
|
+
export CRAWL4AI_CONCURRENT_REQUESTS=5
|
|
281
|
+
export CRAWL4AI_BROWSER_POOL_SIZE=3
|
|
282
|
+
export CRAWL4AI_MEMORY_THRESHOLD=90
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
## Monitoring & Debugging
|
|
286
|
+
|
|
287
|
+
### Status Checks
|
|
288
|
+
|
|
289
|
+
```bash
|
|
290
|
+
# Comprehensive status
|
|
291
|
+
./.agent/scripts/crawl4ai-helper.sh status
|
|
292
|
+
|
|
293
|
+
# Docker container status
|
|
294
|
+
docker ps | grep crawl4ai
|
|
295
|
+
|
|
296
|
+
# API health
|
|
297
|
+
curl -s http://localhost:11235/health | jq '.'
|
|
298
|
+
|
|
299
|
+
# Metrics
|
|
300
|
+
curl -s http://localhost:11235/metrics
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
### Dashboard Access
|
|
304
|
+
|
|
305
|
+
- **Monitoring Dashboard**: http://localhost:11235/dashboard
|
|
306
|
+
- **Interactive Playground**: http://localhost:11235/playground
|
|
307
|
+
- **API Documentation**: http://localhost:11235/schema
|
|
308
|
+
|
|
309
|
+
### Troubleshooting
|
|
310
|
+
|
|
311
|
+
```bash
|
|
312
|
+
# Container logs
|
|
313
|
+
docker logs crawl4ai --tail 50
|
|
314
|
+
|
|
315
|
+
# Restart services
|
|
316
|
+
./.agent/scripts/crawl4ai-helper.sh docker-stop
|
|
317
|
+
./.agent/scripts/crawl4ai-helper.sh docker-start
|
|
318
|
+
|
|
319
|
+
# Test basic functionality
|
|
320
|
+
curl -X POST http://localhost:11235/crawl \
|
|
321
|
+
-H "Content-Type: application/json" \
|
|
322
|
+
-d '{"urls": ["https://httpbin.org/html"]}'
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
## Output Processing
|
|
326
|
+
|
|
327
|
+
### JSON Response Structure
|
|
328
|
+
|
|
329
|
+
```json
|
|
330
|
+
{
|
|
331
|
+
"success": true,
|
|
332
|
+
"results": [
|
|
333
|
+
{
|
|
334
|
+
"url": "https://example.com",
|
|
335
|
+
"success": true,
|
|
336
|
+
"markdown": "# Page Title\n\nContent...",
|
|
337
|
+
"html": "<html>...</html>",
|
|
338
|
+
"extracted_content": {...},
|
|
339
|
+
"links": {...},
|
|
340
|
+
"media": {...},
|
|
341
|
+
"metadata": {...}
|
|
342
|
+
}
|
|
343
|
+
]
|
|
344
|
+
}
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
### Processing Results
|
|
348
|
+
|
|
349
|
+
```bash
|
|
350
|
+
# Extract just the markdown
|
|
351
|
+
jq -r '.results[0].markdown' output.json > content.md
|
|
352
|
+
|
|
353
|
+
# Get extracted data
|
|
354
|
+
jq '.results[0].extracted_content' output.json > data.json
|
|
355
|
+
|
|
356
|
+
# List all links
|
|
357
|
+
jq -r '.results[0].links.internal[]' output.json
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
## Security Considerations
|
|
361
|
+
|
|
362
|
+
### Safe Crawling Practices
|
|
363
|
+
|
|
364
|
+
1. **Respect robots.txt**: Always enabled by default
|
|
365
|
+
2. **Rate limiting**: Built-in delays between requests
|
|
366
|
+
3. **User agent**: Identifies as Crawl4AI
|
|
367
|
+
4. **Timeout protection**: Prevents hanging requests
|
|
368
|
+
|
|
369
|
+
### Data Privacy
|
|
370
|
+
|
|
371
|
+
```bash
|
|
372
|
+
# Use cache mode for repeated requests
|
|
373
|
+
./.agent/scripts/crawl4ai-helper.sh crawl https://example.com markdown output.json
|
|
374
|
+
|
|
375
|
+
# Clear cache when needed
|
|
376
|
+
docker exec crawl4ai redis-cli FLUSHALL
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
## Integration Tips
|
|
380
|
+
|
|
381
|
+
### With Other Framework Tools
|
|
382
|
+
|
|
383
|
+
```bash
|
|
384
|
+
# Combine with quality tools
|
|
385
|
+
./.agent/scripts/crawl4ai-helper.sh crawl https://docs.com markdown docs.json
|
|
386
|
+
cat docs.json | jq -r '.results[0].markdown' | ./.agent/scripts/pandoc-helper.sh convert - pdf docs.pdf
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
### With AI Workflows
|
|
390
|
+
|
|
391
|
+
```bash
|
|
392
|
+
# Extract content for AI processing
|
|
393
|
+
./.agent/scripts/crawl4ai-helper.sh crawl https://article.com markdown article.json
|
|
394
|
+
CONTENT=$(jq -r '.results[0].markdown' article.json)
|
|
395
|
+
echo "$CONTENT" | # Process with your AI pipeline
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
## Resources
|
|
399
|
+
|
|
400
|
+
- **Helper Script**: `.agent/scripts/crawl4ai-helper.sh`
|
|
401
|
+
- **Configuration**: `configs/crawl4ai-config.json.txt`
|
|
402
|
+
- **MCP Setup**: `configs/mcp-templates/crawl4ai-mcp-config.json`
|
|
403
|
+
- **Integration Guide**: `.agent/wiki/crawl4ai-integration.md`
|
|
404
|
+
- **Official Docs**: https://docs.crawl4ai.com/
|
|
405
|
+
|
|
406
|
+
## Success Checklist
|
|
407
|
+
|
|
408
|
+
- [ ] Crawl4AI installed and running
|
|
409
|
+
- [ ] Docker container started successfully
|
|
410
|
+
- [ ] MCP integration configured
|
|
411
|
+
- [ ] Basic crawling test completed
|
|
412
|
+
- [ ] Structured extraction working
|
|
413
|
+
- [ ] Dashboard accessible
|
|
414
|
+
- [ ] API endpoints responding
|
|
415
|
+
|
|
416
|
+
Use this guide to effectively leverage Crawl4AI's powerful web crawling and data extraction capabilities within your AI workflows.
|