ultimate-pi 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/.agents/skills/harness-decisions/SKILL.md +37 -0
  2. package/.agents/skills/harness-governor/SKILL.md +1 -1
  3. package/.agents/skills/harness-orchestration/SKILL.md +54 -0
  4. package/.agents/skills/harness-plan/SKILL.md +4 -3
  5. package/.agents/skills/harness-sentrux-setup/SKILL.md +57 -0
  6. package/.agents/skills/scrapling-web/SKILL.md +93 -0
  7. package/.pi/PACKAGING.md +2 -2
  8. package/.pi/SYSTEM.md +13 -15
  9. package/.pi/agents/harness/adversary.md +3 -0
  10. package/.pi/agents/harness/evaluator.md +3 -0
  11. package/.pi/agents/harness/executor.md +4 -1
  12. package/.pi/agents/harness/meta-optimizer.md +2 -1
  13. package/.pi/agents/harness/planner.md +22 -1
  14. package/.pi/agents/harness/sentrux-bootstrap.md +42 -0
  15. package/.pi/agents/harness/tie-breaker.md +2 -0
  16. package/.pi/extensions/harness-ask-user.ts +74 -0
  17. package/.pi/extensions/harness-subagents.ts +9 -0
  18. package/.pi/extensions/lib/ask-user/dialog.ts +260 -0
  19. package/.pi/extensions/lib/ask-user/fallback.ts +78 -0
  20. package/.pi/extensions/lib/ask-user/render.ts +66 -0
  21. package/.pi/extensions/lib/ask-user/schema.ts +69 -0
  22. package/.pi/extensions/lib/ask-user/types.ts +41 -0
  23. package/.pi/extensions/lib/ask-user/validate-core.mjs +79 -0
  24. package/.pi/extensions/lib/ask-user/validate.ts +92 -0
  25. package/.pi/extensions/lib/harness-subagents/agent-loader.ts +126 -0
  26. package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +119 -0
  27. package/.pi/extensions/lib/harness-subagents/agent-parser.ts +87 -0
  28. package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +118 -0
  29. package/.pi/extensions/lib/harness-subagents/blackboard.ts +175 -0
  30. package/.pi/extensions/lib/harness-subagents/spawn-policy.ts +27 -0
  31. package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +27 -0
  32. package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +553 -0
  33. package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +637 -0
  34. package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +175 -0
  35. package/.pi/extensions/lib/harness-subagents/vendored/context.ts +59 -0
  36. package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +134 -0
  37. package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +5 -0
  38. package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +123 -0
  39. package/.pi/extensions/lib/harness-subagents/vendored/env.ts +43 -0
  40. package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +144 -0
  41. package/.pi/extensions/lib/harness-subagents/vendored/index.ts +2447 -0
  42. package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +52 -0
  43. package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +182 -0
  44. package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +92 -0
  45. package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +115 -0
  46. package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +103 -0
  47. package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +177 -0
  48. package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +416 -0
  49. package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +210 -0
  50. package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +108 -0
  51. package/.pi/extensions/lib/harness-subagents/vendored/types.ts +187 -0
  52. package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +637 -0
  53. package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +324 -0
  54. package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +110 -0
  55. package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +71 -0
  56. package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +195 -0
  57. package/.pi/harness/README.md +2 -1
  58. package/.pi/harness/agents.manifest.json +80 -0
  59. package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +9 -5
  60. package/.pi/harness/env.harness.template +28 -0
  61. package/.pi/harness/sentrux/architecture.manifest.json +6 -1
  62. package/.pi/prompts/harness-auto.md +2 -2
  63. package/.pi/prompts/harness-plan.md +2 -2
  64. package/.pi/prompts/harness-router-tune.md +2 -2
  65. package/.pi/prompts/harness-run.md +1 -0
  66. package/.pi/prompts/harness-setup.md +178 -339
  67. package/.pi/scripts/README.md +6 -1
  68. package/.pi/scripts/harness-agents-manifest.mjs +123 -0
  69. package/.pi/scripts/harness-cli-verify.sh +60 -11
  70. package/.pi/scripts/harness-generate-model-router.mjs +242 -0
  71. package/.pi/scripts/harness-graphify-bootstrap.sh +1 -6
  72. package/.pi/scripts/harness-resolve-up-pkg.mjs +71 -0
  73. package/.pi/scripts/harness-seed-project-contracts.mjs +33 -1
  74. package/.pi/scripts/harness-sentrux-bootstrap.mjs +146 -0
  75. package/.pi/scripts/harness-sync-env.mjs +148 -0
  76. package/.pi/scripts/harness-verify.mjs +19 -0
  77. package/.pi/scripts/harness-web-search.md +33 -0
  78. package/.pi/scripts/harness-web.py +177 -0
  79. package/.pi/scripts/harness_web/__init__.py +1 -0
  80. package/.pi/scripts/harness_web/config.py +80 -0
  81. package/.pi/scripts/harness_web/output.py +55 -0
  82. package/.pi/scripts/harness_web/scrape.py +120 -0
  83. package/.pi/scripts/harness_web/search_ddg.py +106 -0
  84. package/.pi/scripts/release.sh +338 -0
  85. package/.pi/scripts/sentrux-rules-sync.mjs +29 -7
  86. package/.pi/settings.example.json +0 -1
  87. package/.sentrux/rules.toml +1 -1
  88. package/AGENTS.md +1 -1
  89. package/CHANGELOG.md +12 -0
  90. package/THIRD_PARTY_NOTICES.md +22 -0
  91. package/package.json +12 -9
  92. package/.agents/skills/firecrawl/SKILL.md +0 -150
  93. package/.agents/skills/firecrawl/rules/install.md +0 -82
  94. package/.agents/skills/firecrawl/rules/security.md +0 -26
  95. package/.agents/skills/firecrawl-agent/SKILL.md +0 -57
  96. package/.agents/skills/firecrawl-build-interact/SKILL.md +0 -67
  97. package/.agents/skills/firecrawl-build-onboarding/SKILL.md +0 -102
  98. package/.agents/skills/firecrawl-build-onboarding/references/auth-flow.md +0 -39
  99. package/.agents/skills/firecrawl-build-onboarding/references/project-setup.md +0 -20
  100. package/.agents/skills/firecrawl-build-onboarding/references/sdk-installation.md +0 -17
  101. package/.agents/skills/firecrawl-build-scrape/SKILL.md +0 -68
  102. package/.agents/skills/firecrawl-build-search/SKILL.md +0 -68
  103. package/.agents/skills/firecrawl-crawl/SKILL.md +0 -58
  104. package/.agents/skills/firecrawl-download/SKILL.md +0 -69
  105. package/.agents/skills/firecrawl-interact/SKILL.md +0 -83
  106. package/.agents/skills/firecrawl-map/SKILL.md +0 -50
  107. package/.agents/skills/firecrawl-parse/SKILL.md +0 -61
  108. package/.agents/skills/firecrawl-scrape/SKILL.md +0 -68
  109. package/.agents/skills/firecrawl-search/SKILL.md +0 -59
  110. package/firecrawl/.env.template +0 -62
  111. package/firecrawl/README.md +0 -49
  112. package/firecrawl/docker-compose.yaml +0 -201
  113. package/firecrawl/searxng/searxng.env +0 -3
  114. package/firecrawl/searxng/settings.yml +0 -85
@@ -1,68 +0,0 @@
1
- ---
2
- name: firecrawl-build-scrape
3
- description: Integrate Firecrawl `/scrape` into product code for single-page extraction. Use when an app already has a URL and needs markdown, HTML, links, screenshots, metadata, or structured page output. Prefer this skill over broader crawl patterns when the feature is page-level.
4
- license: ISC
5
- metadata:
6
- author: firecrawl
7
- version: "0.1.0"
8
- homepage: https://www.firecrawl.dev
9
- source: https://github.com/firecrawl/skills
10
- inputs:
11
- - name: FIRECRAWL_API_KEY
12
- description: Firecrawl API key for hosted Firecrawl requests.
13
- required: true
14
- - name: FIRECRAWL_API_URL
15
- description: Optional base URL for self-hosted Firecrawl deployments.
16
- required: false
17
- ---
18
-
19
- # Firecrawl Build Scrape
20
-
21
- Use this when the application already has the URL and needs content from one page.
22
-
23
- ## Use This When
24
-
25
- - the feature starts from a known URL
26
- - you need page content for retrieval, summarization, enrichment, or monitoring
27
- - you want the default extraction primitive before considering `/interact`
28
-
29
- ## Default Recommendations
30
-
31
- - Return `markdown` unless the feature truly needs another format.
32
- - Use `onlyMainContent` for article-like pages where nav and chrome add noise.
33
- - Add waits or other rendering options only when the page needs them.
34
-
35
- ## Common Product Patterns
36
-
37
- - knowledge ingestion from known URLs
38
- - enrichment from a company, product, or docs page
39
- - pricing, changelog, and documentation extraction
40
- - page-level quality checks or monitoring
41
-
42
- ## Escalation Rules
43
-
44
- - If you do not have the URL yet, start with [firecrawl-build-search](../firecrawl-build-search/SKILL.md).
45
- - If content requires clicks, typing, or multi-step navigation, escalate to [firecrawl-build-interact](../firecrawl-build-interact/SKILL.md).
46
-
47
- ## Implementation Notes
48
-
49
- - Keep the integration narrow: one feature, one URL, one extraction contract.
50
- - Treat `/scrape` as the default primitive for downstream LLM or indexing pipelines.
51
- - Request richer formats only when the consumer needs them, such as links, screenshots, or branding data.
52
-
53
- ## Docs (Source of Truth)
54
-
55
- Read the source-of-truth page for your project language before writing integration code:
56
-
57
- - **Node / TypeScript**: [docs.firecrawl.dev/agent-source-of-truth/node](https://docs.firecrawl.dev/agent-source-of-truth/node)
58
- - **Python**: [docs.firecrawl.dev/agent-source-of-truth/python](https://docs.firecrawl.dev/agent-source-of-truth/python)
59
- - **Rust**: [docs.firecrawl.dev/agent-source-of-truth/rust](https://docs.firecrawl.dev/agent-source-of-truth/rust)
60
- - **Java**: [docs.firecrawl.dev/agent-source-of-truth/java](https://docs.firecrawl.dev/agent-source-of-truth/java)
61
- - **Elixir**: [docs.firecrawl.dev/agent-source-of-truth/elixir](https://docs.firecrawl.dev/agent-source-of-truth/elixir)
62
- - **cURL / REST**: [docs.firecrawl.dev/agent-source-of-truth/curl](https://docs.firecrawl.dev/agent-source-of-truth/curl)
63
-
64
- ## See Also
65
-
66
- - [firecrawl-build](../firecrawl-build/SKILL.md)
67
- - [firecrawl-build-search](../firecrawl-build-search/SKILL.md)
68
- - [firecrawl-build-interact](../firecrawl-build-interact/SKILL.md)
@@ -1,68 +0,0 @@
1
- ---
2
- name: firecrawl-build-search
3
- description: Integrate Firecrawl `/search` into product code and agent workflows. Use when an app needs discovery before extraction, when the feature starts with a query instead of a URL, or when the system should search the web and optionally hydrate result content.
4
- license: ISC
5
- metadata:
6
- author: firecrawl
7
- version: "0.1.0"
8
- homepage: https://www.firecrawl.dev
9
- source: https://github.com/firecrawl/skills
10
- inputs:
11
- - name: FIRECRAWL_API_KEY
12
- description: Firecrawl API key for hosted Firecrawl requests.
13
- required: true
14
- - name: FIRECRAWL_API_URL
15
- description: Optional base URL for self-hosted Firecrawl deployments.
16
- required: false
17
- ---
18
-
19
- # Firecrawl Build Search
20
-
21
- Use this when the application starts with a query, not a URL.
22
-
23
- ## Use This When
24
-
25
- - the user asks a question and the product must discover sources first
26
- - the feature needs current web results
27
- - you want to turn a search query into a shortlist of pages for later scraping
28
-
29
- ## Default Recommendations
30
-
31
- - Use `/search` first when URL discovery is part of the product behavior.
32
- - Keep search and extraction conceptually separate unless scraping search results is clearly required.
33
- - Prefer selective follow-up extraction over broad hydration when cost or latency matters.
34
-
35
- ## Common Product Patterns
36
-
37
- - answer generation with cited sources
38
- - company, competitor, or topic discovery
39
- - research workflows that produce a shortlist before deeper extraction
40
- - query-to-URL pipelines for later `/scrape` or `/interact`
41
-
42
- ## Escalation Rules
43
-
44
- - If you already have the URL, use [firecrawl-build-scrape](../firecrawl-build-scrape/SKILL.md).
45
- - If the result page then requires clicks or form interaction, escalate to [firecrawl-build-interact](../firecrawl-build-interact/SKILL.md).
46
-
47
- ## Implementation Notes
48
-
49
- - Treat `/search` as discovery, ranking, and source selection.
50
- - Be explicit about whether the product needs snippets, URLs, or full result content.
51
- - Keep the query contract stable so downstream scraping logic stays predictable.
52
-
53
- ## Docs (Source of Truth)
54
-
55
- Read the source-of-truth page for your project language before writing integration code:
56
-
57
- - **Node / TypeScript**: [docs.firecrawl.dev/agent-source-of-truth/node](https://docs.firecrawl.dev/agent-source-of-truth/node)
58
- - **Python**: [docs.firecrawl.dev/agent-source-of-truth/python](https://docs.firecrawl.dev/agent-source-of-truth/python)
59
- - **Rust**: [docs.firecrawl.dev/agent-source-of-truth/rust](https://docs.firecrawl.dev/agent-source-of-truth/rust)
60
- - **Java**: [docs.firecrawl.dev/agent-source-of-truth/java](https://docs.firecrawl.dev/agent-source-of-truth/java)
61
- - **Elixir**: [docs.firecrawl.dev/agent-source-of-truth/elixir](https://docs.firecrawl.dev/agent-source-of-truth/elixir)
62
- - **cURL / REST**: [docs.firecrawl.dev/agent-source-of-truth/curl](https://docs.firecrawl.dev/agent-source-of-truth/curl)
63
-
64
- ## See Also
65
-
66
- - [firecrawl-build](../firecrawl-build/SKILL.md)
67
- - [firecrawl-build-scrape](../firecrawl-build-scrape/SKILL.md)
68
- - [firecrawl-build-interact](../firecrawl-build-interact/SKILL.md)
@@ -1,58 +0,0 @@
1
- ---
2
- name: firecrawl-crawl
3
- description: |
4
- Bulk extract content from an entire website or site section. Use this skill when the user wants to crawl a site, extract all pages from a docs section, bulk-scrape multiple pages following links, or says "crawl", "get all the pages", "extract everything under /docs", "bulk extract", or needs content from many pages on the same site. Handles depth limits, path filtering, and concurrent extraction.
5
- allowed-tools:
6
- - Bash(firecrawl *)
7
- - Bash(npx firecrawl *)
8
- ---
9
-
10
- # firecrawl crawl
11
-
12
- Bulk extract content from a website. Crawls pages following links up to a depth/limit.
13
-
14
- ## When to use
15
-
16
- - You need content from many pages on a site (e.g., all `/docs/`)
17
- - You want to extract an entire site section
18
- - Step 4 in the [workflow escalation pattern](firecrawl-cli): search → scrape → map → **crawl** → interact
19
-
20
- ## Quick start
21
-
22
- ```bash
23
- # Crawl a docs section
24
- firecrawl crawl "<url>" --include-paths /docs --limit 50 --wait -o .firecrawl/crawl.json
25
-
26
- # Full crawl with depth limit
27
- firecrawl crawl "<url>" --max-depth 3 --wait --progress -o .firecrawl/crawl.json
28
-
29
- # Check status of a running crawl
30
- firecrawl crawl <job-id>
31
- ```
32
-
33
- ## Options
34
-
35
- | Option | Description |
36
- | ------------------------- | ------------------------------------------- |
37
- | `--wait` | Wait for crawl to complete before returning |
38
- | `--progress` | Show progress while waiting |
39
- | `--limit <n>` | Max pages to crawl |
40
- | `--max-depth <n>` | Max link depth to follow |
41
- | `--include-paths <paths>` | Only crawl URLs matching these paths |
42
- | `--exclude-paths <paths>` | Skip URLs matching these paths |
43
- | `--delay <ms>` | Delay between requests |
44
- | `--max-concurrency <n>` | Max parallel crawl workers |
45
- | `--pretty` | Pretty print JSON output |
46
- | `-o, --output <path>` | Output file path |
47
-
48
- ## Tips
49
-
50
- - Always use `--wait` when you need the results immediately. Without it, crawl returns a job ID for async polling.
51
- - Use `--include-paths` to scope the crawl — don't crawl an entire site when you only need one section.
52
- - Crawl consumes credits per page. Check `firecrawl credit-usage` before large crawls.
53
-
54
- ## See also
55
-
56
- - [firecrawl-scrape](../firecrawl-scrape/SKILL.md) — scrape individual pages
57
- - [firecrawl-map](../firecrawl-map/SKILL.md) — discover URLs before deciding to crawl
58
- - [firecrawl-download](../firecrawl-download/SKILL.md) — download site to local files (uses map + scrape)
@@ -1,69 +0,0 @@
1
- ---
2
- name: firecrawl-download
3
- description: |
4
- Download an entire website as local files — markdown, screenshots, or multiple formats per page. Use this skill when the user wants to save a site locally, download documentation for offline use, bulk-save pages as files, or says "download the site", "save as local files", "offline copy", "download all the docs", or "save for reference". Combines site mapping and scraping into organized local directories.
5
- allowed-tools:
6
- - Bash(firecrawl *)
7
- - Bash(npx firecrawl *)
8
- ---
9
-
10
- # firecrawl download
11
-
12
- > **Experimental.** Convenience command that combines `map` + `scrape` to save an entire site as local files.
13
-
14
- Maps the site first to discover pages, then scrapes each one into nested directories under `.firecrawl/`. All scrape options work with download. Always pass `-y` to skip the confirmation prompt.
15
-
16
- ## When to use
17
-
18
- - You want to save an entire site (or section) to local files
19
- - You need offline access to documentation or content
20
- - Bulk content extraction with organized file structure
21
-
22
- ## Quick start
23
-
24
- ```bash
25
- # Interactive wizard (picks format, screenshots, paths for you)
26
- firecrawl download https://docs.example.com
27
-
28
- # With screenshots
29
- firecrawl download https://docs.example.com --screenshot --limit 20 -y
30
-
31
- # Multiple formats (each saved as its own file per page)
32
- firecrawl download https://docs.example.com --format markdown,links --screenshot --limit 20 -y
33
- # Creates per page: index.md + links.txt + screenshot.png
34
-
35
- # Filter to specific sections
36
- firecrawl download https://docs.example.com --include-paths "/features,/sdks"
37
-
38
- # Skip translations
39
- firecrawl download https://docs.example.com --exclude-paths "/zh,/ja,/fr,/es,/pt-BR"
40
-
41
- # Full combo
42
- firecrawl download https://docs.example.com \
43
- --include-paths "/features,/sdks" \
44
- --exclude-paths "/zh,/ja" \
45
- --only-main-content \
46
- --screenshot \
47
- -y
48
- ```
49
-
50
- ## Download options
51
-
52
- | Option | Description |
53
- | ------------------------- | -------------------------------------------------------- |
54
- | `--limit <n>` | Max pages to download |
55
- | `--search <query>` | Filter URLs by search query |
56
- | `--include-paths <paths>` | Only download matching paths |
57
- | `--exclude-paths <paths>` | Skip matching paths |
58
- | `--allow-subdomains` | Include subdomain pages |
59
- | `-y` | Skip confirmation prompt (always use in automated flows) |
60
-
61
- ## Scrape options (all work with download)
62
-
63
- `-f <formats>`, `-H`, `-S`, `--screenshot`, `--full-page-screenshot`, `--only-main-content`, `--include-tags`, `--exclude-tags`, `--wait-for`, `--max-age`, `--country`, `--languages`
64
-
65
- ## See also
66
-
67
- - [firecrawl-map](../firecrawl-map/SKILL.md) — just discover URLs without downloading
68
- - [firecrawl-scrape](../firecrawl-scrape/SKILL.md) — scrape individual pages
69
- - [firecrawl-crawl](../firecrawl-crawl/SKILL.md) — bulk extract as JSON (not local files)
@@ -1,83 +0,0 @@
1
- ---
2
- name: firecrawl-interact
3
- description: |
4
- Control and interact with a live browser session on any scraped page — click buttons, fill forms, navigate flows, and extract data using natural language prompts or code. Use when the user needs to interact with a webpage beyond simple scraping: logging into a site, submitting forms, clicking through pagination, handling infinite scroll, navigating multi-step checkout or wizard flows, or when a regular scrape failed because content is behind JavaScript interaction. Also useful for authenticated scraping via profiles. Triggers on "interact", "click", "fill out the form", "log in to", "sign in", "submit", "paginated", "next page", "infinite scroll", "interact with the page", "navigate to", "open a session", or "scrape failed".
5
- allowed-tools:
6
- - Bash(firecrawl *)
7
- - Bash(npx firecrawl *)
8
- ---
9
-
10
- # firecrawl interact
11
-
12
- Interact with scraped pages in a live browser session. Scrape a page first, then use natural language prompts or code to click, fill forms, navigate, and extract data.
13
-
14
- ## When to use
15
-
16
- - Content requires interaction: clicks, form fills, pagination, login
17
- - `scrape` failed because content is behind JavaScript interaction
18
- - You need to navigate a multi-step flow
19
- - Last resort in the [workflow escalation pattern](firecrawl-cli): search → scrape → map → crawl → **interact**
20
- - **Never use interact for web searches** — use `search` instead
21
-
22
- ## Quick start
23
-
24
- ```bash
25
- # 1. Scrape a page (scrape ID is saved automatically)
26
- firecrawl scrape "<url>"
27
-
28
- # 2. Interact with the page using natural language
29
- firecrawl interact --prompt "Click the login button"
30
- firecrawl interact --prompt "Fill in the email field with test@example.com"
31
- firecrawl interact --prompt "Extract the pricing table"
32
-
33
- # 3. Or use code for precise control
34
- firecrawl interact --code "agent-browser click @e5" --language bash
35
- firecrawl interact --code "agent-browser snapshot -i" --language bash
36
-
37
- # 4. Stop the session when done
38
- firecrawl interact stop
39
- ```
40
-
41
- ## Options
42
-
43
- | Option | Description |
44
- | --------------------- | ------------------------------------------------- |
45
- | `--prompt <text>` | Natural language instruction (use this OR --code) |
46
- | `--code <code>` | Code to execute in the browser session |
47
- | `--language <lang>` | Language for code: bash, python, node |
48
- | `--timeout <seconds>` | Execution timeout (default: 30, max: 300) |
49
- | `--scrape-id <id>` | Target a specific scrape (default: last scrape) |
50
- | `-o, --output <path>` | Output file path |
51
-
52
- ## Profiles
53
-
54
- Use `--profile` on the scrape to persist browser state (cookies, localStorage) across scrapes:
55
-
56
- ```bash
57
- # Session 1: Login and save state
58
- firecrawl scrape "https://app.example.com/login" --profile my-app
59
- firecrawl interact --prompt "Fill in email with user@example.com and click login"
60
-
61
- # Session 2: Come back authenticated
62
- firecrawl scrape "https://app.example.com/dashboard" --profile my-app
63
- firecrawl interact --prompt "Extract the dashboard data"
64
- ```
65
-
66
- Read-only reconnect (no writes to profile state):
67
-
68
- ```bash
69
- firecrawl scrape "https://app.example.com" --profile my-app --no-save-changes
70
- ```
71
-
72
- ## Tips
73
-
74
- - Always scrape first — `interact` requires a scrape ID from a previous `firecrawl scrape` call
75
- - The scrape ID is saved automatically, so you don't need `--scrape-id` for subsequent interact calls
76
- - Use `firecrawl interact stop` to free resources when done
77
- - For parallel work, scrape multiple pages and interact with each using `--scrape-id`
78
-
79
- ## See also
80
-
81
- - [firecrawl-scrape](../firecrawl-scrape/SKILL.md) — try scrape first, escalate to interact only when needed
82
- - [firecrawl-search](../firecrawl-search/SKILL.md) — for web searches (never use interact for searching)
83
- - [firecrawl-agent](../firecrawl-agent/SKILL.md) — AI-powered extraction (less manual control)
@@ -1,50 +0,0 @@
1
- ---
2
- name: firecrawl-map
3
- description: |
4
- Discover and list all URLs on a website, with optional search filtering. Use this skill when the user wants to find a specific page on a large site, list all URLs, see the site structure, find where something is on a domain, or says "map the site", "find the URL for", "what pages are on", or "list all pages". Essential when the user knows which site but not which exact page.
5
- allowed-tools:
6
- - Bash(firecrawl *)
7
- - Bash(npx firecrawl *)
8
- ---
9
-
10
- # firecrawl map
11
-
12
- Discover URLs on a site. Use `--search` to find a specific page within a large site.
13
-
14
- ## When to use
15
-
16
- - You need to find a specific subpage on a large site
17
- - You want a list of all URLs on a site before scraping or crawling
18
- - Step 3 in the [workflow escalation pattern](firecrawl-cli): search → scrape → **map** → crawl → interact
19
-
20
- ## Quick start
21
-
22
- ```bash
23
- # Find a specific page on a large site
24
- firecrawl map "<url>" --search "authentication" -o .firecrawl/filtered.txt
25
-
26
- # Get all URLs
27
- firecrawl map "<url>" --limit 500 --json -o .firecrawl/urls.json
28
- ```
29
-
30
- ## Options
31
-
32
- | Option | Description |
33
- | --------------------------------- | ---------------------------- |
34
- | `--limit <n>` | Max number of URLs to return |
35
- | `--search <query>` | Filter URLs by search query |
36
- | `--sitemap <include\|skip\|only>` | Sitemap handling strategy |
37
- | `--include-subdomains` | Include subdomain URLs |
38
- | `--json` | Output as JSON |
39
- | `-o, --output <path>` | Output file path |
40
-
41
- ## Tips
42
-
43
- - **Map + scrape is a common pattern**: use `map --search` to find the right URL, then `scrape` it.
44
- - Example: `map https://docs.example.com --search "auth"` → found `/docs/api/authentication` → `scrape` that URL.
45
-
46
- ## See also
47
-
48
- - [firecrawl-scrape](../firecrawl-scrape/SKILL.md) — scrape the URLs you discover
49
- - [firecrawl-crawl](../firecrawl-crawl/SKILL.md) — bulk extract instead of map + scrape
50
- - [firecrawl-download](../firecrawl-download/SKILL.md) — download entire site (uses map internally)
@@ -1,61 +0,0 @@
1
- ---
2
- name: firecrawl-parse
3
- description: |
4
- Efficiently extract and convert the contents of any local file—such as PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, or HTML—into clean, well-formatted markdown saved to disk. Use this skill whenever the user requests to parse, read, or extract information from a file on their computer, including phrases like “parse this PDF”, “convert this document”, “read this file”, “extract text from”, or when a local file path (not a URL) is provided. This skill offers advanced options like generating AI-powered summaries and answering questions based on the file's content. Prefer this tool over `scrape` when handling local files to deliver precise, structured outputs for downstream tasks.
5
- allowed-tools:
6
- - Bash(firecrawl *)
7
- - Bash(npx firecrawl *)
8
- ---
9
-
10
- # firecrawl parse
11
-
12
- Turn a local document into clean markdown on disk. Supports **PDF, DOCX, DOC, ODT, RTF, XLSX, XLS, HTML/HTM/XHTML**.
13
-
14
- ## When to use
15
-
16
- - You have a file on disk (not a URL) and want its text as markdown
17
- - User drops a PDF/DOCX and asks what it says, or to summarize it
18
- - Use `scrape` instead when the source is a URL
19
-
20
- ## Quick start
21
-
22
- Always save to `.firecrawl/` with `-o` — parsed docs can be hundreds of KB and blow up context if streamed to stdout. Add `.firecrawl/` to `.gitignore`.
23
-
24
- ```bash
25
- mkdir -p .firecrawl
26
-
27
- # File → markdown
28
- firecrawl parse ./paper.pdf -o .firecrawl/paper.md
29
-
30
- # AI summary
31
- firecrawl parse ./paper.pdf -S -o .firecrawl/paper-summary.md
32
-
33
- # Ask a question about the doc
34
- firecrawl parse ./paper.pdf -Q "What are the main conclusions?" \
35
- -o .firecrawl/paper-qa.md
36
- ```
37
-
38
- Then `head`, `grep`, `rg` etc., or incrementally read the file - don't load the whole thing at once.
39
-
40
- ## Options
41
-
42
- | Option | Description |
43
- | ---------------------- | --------------------------------------- |
44
- | `-S, --summary` | AI-generated summary |
45
- | `-Q, --query <prompt>` | Ask a question about the parsed content |
46
- | `-o, --output <path>` | Output file path — **always use this** |
47
- | `-f, --format <fmt>` | `markdown` (default), `html`, `summary` |
48
- | `--timeout <ms>` | Timeout for the parse job |
49
- | `--timing` | Show request duration |
50
-
51
- ## Tips
52
-
53
- - Quote paths with spaces: `firecrawl parse "./My Doc.pdf" -o .firecrawl/mydoc.md`.
54
- - Max upload size: **50 MB** per file.
55
- - Credits: ~1 per PDF page; HTML is 1 flat.
56
- - Check `.firecrawl/` before re-parsing the same file.
57
- - To check your credit balance (recommended for batch processing and similar workflows), use the `firecrawl credit-usage` command.
58
-
59
- ## See also
60
-
61
- - [firecrawl-scrape](../firecrawl-scrape/SKILL.md) — same idea for URLs
@@ -1,68 +0,0 @@
1
- ---
2
- name: firecrawl-scrape
3
- description: |
4
- Extract clean markdown from any URL, including JavaScript-rendered SPAs. Use this skill whenever the user provides a URL and wants its content, says "scrape", "grab", "fetch", "pull", "get the page", "extract from this URL", or "read this webpage". Handles JS-rendered pages, multiple concurrent URLs, and returns LLM-optimized markdown. Use this instead of WebFetch for any webpage content extraction.
5
- allowed-tools:
6
- - Bash(firecrawl *)
7
- - Bash(npx firecrawl *)
8
- ---
9
-
10
- # firecrawl scrape
11
-
12
- Scrape one or more URLs. Returns clean, LLM-optimized markdown. Multiple URLs are scraped concurrently.
13
-
14
- ## When to use
15
-
16
- - You have a specific URL and want its content
17
- - The page is static or JS-rendered (SPA)
18
- - Step 2 in the [workflow escalation pattern](firecrawl-cli): search → **scrape** → map → crawl → interact
19
-
20
- ## Quick start
21
-
22
- ```bash
23
- # Basic markdown extraction
24
- firecrawl scrape "<url>" -o .firecrawl/page.md
25
-
26
- # Main content only, no nav/footer
27
- firecrawl scrape "<url>" --only-main-content -o .firecrawl/page.md
28
-
29
- # Wait for JS to render, then scrape
30
- firecrawl scrape "<url>" --wait-for 3000 -o .firecrawl/page.md
31
-
32
- # Multiple URLs (each saved to .firecrawl/)
33
- firecrawl scrape https://example.com https://example.com/blog https://example.com/docs
34
-
35
- # Get markdown and links together
36
- firecrawl scrape "<url>" --format markdown,links -o .firecrawl/page.json
37
-
38
- # Ask a question about the page
39
- firecrawl scrape "https://example.com/pricing" --query "What is the enterprise plan price?"
40
- ```
41
-
42
- ## Options
43
-
44
- | Option | Description |
45
- | ------------------------ | ---------------------------------------------------------------- |
46
- | `-f, --format <formats>` | Output formats: markdown, html, rawHtml, links, screenshot, json |
47
- | `-Q, --query <prompt>` | Ask a question about the page content (5 credits) |
48
- | `-H` | Include HTTP headers in output |
49
- | `--only-main-content` | Strip nav, footer, sidebar — main content only |
50
- | `--wait-for <ms>` | Wait for JS rendering before scraping |
51
- | `--include-tags <tags>` | Only include these HTML tags |
52
- | `--exclude-tags <tags>` | Exclude these HTML tags |
53
- | `-o, --output <path>` | Output file path |
54
-
55
- ## Tips
56
-
57
- - **Prefer plain scrape over `--query`.** Scrape to a file, then use `grep`, `head`, or read the markdown directly — you can search and reason over the full content yourself. Use `--query` only when you want a single targeted answer without saving the page (costs 5 extra credits).
58
- - **Try scrape before interact.** Scrape handles static pages and JS-rendered SPAs. Only escalate to `interact` when you need interaction (clicks, form fills, pagination).
59
- - Multiple URLs are scraped concurrently — check `firecrawl --status` for your concurrency limit.
60
- - Single format outputs raw content. Multiple formats (e.g., `--format markdown,links`) output JSON.
61
- - Always quote URLs — shell interprets `?` and `&` as special characters.
62
- - Naming convention: `.firecrawl/{site}-{path}.md`
63
-
64
- ## See also
65
-
66
- - [firecrawl-search](../firecrawl-search/SKILL.md) — find pages when you don't have a URL
67
- - [firecrawl-interact](../firecrawl-interact/SKILL.md) — when scrape can't get the content, use `interact` to click, fill forms, etc.
68
- - [firecrawl-download](../firecrawl-download/SKILL.md) — bulk download an entire site to local files
@@ -1,59 +0,0 @@
1
- ---
2
- name: firecrawl-search
3
- description: |
4
- Web search with full page content extraction. Use this skill whenever the user asks to search the web, find articles, research a topic, look something up, find recent news, discover sources, or says "search for", "find me", "look up", "what are people saying about", or "find articles about". Returns real search results with optional full-page markdown — not just snippets. Provides capabilities beyond Claude's built-in WebSearch.
5
- allowed-tools:
6
- - Bash(firecrawl *)
7
- - Bash(npx firecrawl *)
8
- ---
9
-
10
- # firecrawl search
11
-
12
- Web search with optional content scraping. Returns search results as JSON, optionally with full page content.
13
-
14
- ## When to use
15
-
16
- - You don't have a specific URL yet
17
- - You need to find pages, answer questions, or discover sources
18
- - First step in the [workflow escalation pattern](firecrawl-cli): search → scrape → map → crawl → interact
19
-
20
- ## Quick start
21
-
22
- ```bash
23
- # Basic search
24
- firecrawl search "your query" -o .firecrawl/result.json --json
25
-
26
- # Search and scrape full page content from results
27
- firecrawl search "your query" --scrape -o .firecrawl/scraped.json --json
28
-
29
- # News from the past day
30
- firecrawl search "your query" --sources news --tbs qdr:d -o .firecrawl/news.json --json
31
- ```
32
-
33
- ## Options
34
-
35
- | Option | Description |
36
- | ------------------------------------ | --------------------------------------------- |
37
- | `--limit <n>` | Max number of results |
38
- | `--sources <web,images,news>` | Source types to search |
39
- | `--categories <github,research,pdf>` | Filter by category |
40
- | `--tbs <qdr:h\|d\|w\|m\|y>` | Time-based search filter |
41
- | `--location` | Location for search results |
42
- | `--country <code>` | Country code for search |
43
- | `--scrape` | Also scrape full page content for each result |
44
- | `--scrape-formats` | Formats when scraping (default: markdown) |
45
- | `-o, --output <path>` | Output file path |
46
- | `--json` | Output as JSON |
47
-
48
- ## Tips
49
-
50
- - **`--scrape` fetches full content** — don't re-scrape URLs from search results. This saves credits and avoids redundant fetches.
51
- - Always write results to `.firecrawl/` with `-o` to avoid context window bloat.
52
- - Use `jq` to extract URLs or titles: `jq -r '.data.web[].url' .firecrawl/search.json`
53
- - Naming convention: `.firecrawl/search-{query}.json` or `.firecrawl/search-{query}-scraped.json`
54
-
55
- ## See also
56
-
57
- - [firecrawl-scrape](../firecrawl-scrape/SKILL.md) — scrape a specific URL
58
- - [firecrawl-map](../firecrawl-map/SKILL.md) — discover URLs within a site
59
- - [firecrawl-crawl](../firecrawl-crawl/SKILL.md) — bulk extract from a site
@@ -1,62 +0,0 @@
1
- # Firecrawl Self-Hosted Configuration Template
2
- # Copy to .env and adjust values as needed.
3
-
4
- # === API Service ===
5
- PORT=3002
6
- INTERNAL_PORT=3002
7
- HOST=0.0.0.0
8
- ENV=local
9
-
10
- # === Redis ===
11
- REDIS_URL=redis://redis:6379
12
-
13
- # === PostgreSQL (NUQ) ===
14
- POSTGRES_USER=postgres
15
- POSTGRES_PASSWORD=postgres
16
- POSTGRES_DB=postgres
17
- POSTGRES_HOST=nuq-postgres
18
- POSTGRES_PORT=5432
19
- USE_DB_AUTHENTICATION=false
20
-
21
- # === Queue / Workers ===
22
- NUM_WORKERS_PER_QUEUE=8
23
- CRAWL_CONCURRENT_REQUESTS=10
24
- MAX_CONCURRENT_JOBS=5
25
- BROWSER_POOL_SIZE=5
26
- BULL_AUTH_KEY=changeme
27
- TEST_API_KEY=
28
-
29
- # === Playwright Browser Service ===
30
- PLAYWRIGHT_MICROSERVICE_URL=http://playwright-service:3000/scrape
31
- BLOCK_MEDIA=false
32
- ALLOW_LOCAL_WEBHOOKS=true
33
- MAX_CONCURRENT_PAGES=10
34
-
35
- # === AI / LLM (optional) ===
36
- # OPENAI_API_KEY=
37
- # OPENAI_BASE_URL=
38
- # MODEL_NAME=
39
- # MODEL_EMBEDDING_NAME=
40
- # OLLAMA_BASE_URL=
41
-
42
- # === Integrations ===
43
- # AUTUMN_SECRET_KEY=
44
- # SLACK_WEBHOOK_URL=
45
- # SUPABASE_ANON_TOKEN=
46
- # SUPABASE_URL=
47
- # SUPABASE_SERVICE_TOKEN=
48
- # SELF_HOSTED_WEBHOOK_URL=
49
-
50
- # === Proxy (optional) ===
51
- # PROXY_SERVER=
52
- # PROXY_USERNAME=
53
- # PROXY_PASSWORD=
54
-
55
- # === Logging ===
56
- LOGGING_LEVEL=info
57
-
58
- # === SearXNG ===
59
- SEARXNG_ENDPOINT=http://searxng:8080
60
- SEARXNG_ENGINES=google,bing,duckduckgo
61
- SEARXNG_CATEGORIES=general,science,technology
62
- SEARXNG_EXTERNAL_PORT=8080