ultimate-pi 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/ck-search/SKILL.md +99 -0
- package/.agents/skills/defuddle/SKILL.md +90 -0
- package/.agents/skills/find-skills/SKILL.md +142 -0
- package/.agents/skills/firecrawl/SKILL.md +150 -0
- package/.agents/skills/firecrawl/rules/install.md +82 -0
- package/.agents/skills/firecrawl/rules/security.md +26 -0
- package/.agents/skills/firecrawl-agent/SKILL.md +57 -0
- package/.agents/skills/firecrawl-build-interact/SKILL.md +67 -0
- package/.agents/skills/firecrawl-build-onboarding/SKILL.md +102 -0
- package/.agents/skills/firecrawl-build-onboarding/references/auth-flow.md +39 -0
- package/.agents/skills/firecrawl-build-onboarding/references/project-setup.md +20 -0
- package/.agents/skills/firecrawl-build-onboarding/references/sdk-installation.md +17 -0
- package/.agents/skills/firecrawl-build-scrape/SKILL.md +68 -0
- package/.agents/skills/firecrawl-build-search/SKILL.md +68 -0
- package/.agents/skills/firecrawl-crawl/SKILL.md +58 -0
- package/.agents/skills/firecrawl-download/SKILL.md +69 -0
- package/.agents/skills/firecrawl-interact/SKILL.md +83 -0
- package/.agents/skills/firecrawl-map/SKILL.md +50 -0
- package/.agents/skills/firecrawl-parse/SKILL.md +61 -0
- package/.agents/skills/firecrawl-scrape/SKILL.md +68 -0
- package/.agents/skills/firecrawl-search/SKILL.md +59 -0
- package/.agents/skills/obsidian-bases/SKILL.md +299 -0
- package/.agents/skills/obsidian-markdown/SKILL.md +237 -0
- package/.agents/skills/posthog-analyst/SKILL.md +306 -0
- package/.agents/skills/posthog-analyst/evals/evals.json +23 -0
- package/.agents/skills/wiki/SKILL.md +215 -0
- package/.agents/skills/wiki/references/css-snippets.md +122 -0
- package/.agents/skills/wiki/references/frontmatter.md +107 -0
- package/.agents/skills/wiki/references/git-setup.md +58 -0
- package/.agents/skills/wiki/references/mcp-setup.md +149 -0
- package/.agents/skills/wiki/references/modes.md +259 -0
- package/.agents/skills/wiki/references/plugins.md +96 -0
- package/.agents/skills/wiki/references/rest-api.md +124 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +211 -0
- package/.agents/skills/wiki-autoresearch/references/program.md +75 -0
- package/.agents/skills/wiki-fold/SKILL.md +204 -0
- package/.agents/skills/wiki-fold/references/fold-template.md +133 -0
- package/.agents/skills/wiki-ingest/SKILL.md +288 -0
- package/.agents/skills/wiki-lint/SKILL.md +183 -0
- package/.agents/skills/wiki-query/SKILL.md +176 -0
- package/.agents/skills/wiki-save/SKILL.md +128 -0
- package/.ckignore +41 -0
- package/.env.example +9 -0
- package/.github/workflows/lint.yml +33 -0
- package/.github/workflows/publish-github-packages.yml +35 -0
- package/.github/workflows/publish-npm.yml +1 -1
- package/.pi/SYSTEM.md +107 -40
- package/.pi/agents/pi-pi/agent-expert.md +205 -0
- package/.pi/agents/pi-pi/cli-expert.md +47 -0
- package/.pi/agents/pi-pi/config-expert.md +67 -0
- package/.pi/agents/pi-pi/ext-expert.md +53 -0
- package/.pi/agents/pi-pi/keybinding-expert.md +123 -0
- package/.pi/agents/pi-pi/pi-orchestrator.md +103 -0
- package/.pi/agents/pi-pi/prompt-expert.md +83 -0
- package/.pi/agents/pi-pi/skill-expert.md +52 -0
- package/.pi/agents/pi-pi/theme-expert.md +46 -0
- package/.pi/agents/pi-pi/tui-expert.md +100 -0
- package/.pi/agents/rethink.md +140 -0
- package/.pi/agents/wiki-ingest.md +67 -0
- package/.pi/agents/wiki-lint.md +75 -0
- package/.pi/auto-commit.json +20 -0
- package/.pi/extensions/banner.png +0 -0
- package/.pi/extensions/ck-enforce.ts +216 -0
- package/.pi/extensions/custom-footer.ts +308 -0
- package/.pi/extensions/custom-header.ts +116 -0
- package/.pi/extensions/dotenv-loader.ts +170 -0
- package/.pi/internal/cursor-sdk-transcript-parser.ts +59 -0
- package/.pi/model-router.json +95 -0
- package/.pi/npm/.gitignore +2 -0
- package/.pi/prompts/git-sync.md +124 -0
- package/.pi/prompts/harness-setup.md +509 -0
- package/.pi/prompts/save.md +16 -0
- package/.pi/prompts/wiki-autoresearch.md +19 -0
- package/.pi/prompts/wiki.md +23 -0
- package/.pi/providers/cursor-sdk-provider.test.mjs +476 -0
- package/.pi/providers/cursor-sdk-provider.ts +1085 -0
- package/.pi/settings.json +14 -4
- package/.pi/skills/agent-router/SKILL.md +174 -0
- package/.pi/sounds/alert/1-kaching-track.mp3 +0 -0
- package/.pi/sounds/error/1-ksi-wth-track.mp3 +0 -0
- package/.pi/sounds/error/2-smash-track.mp3 +0 -0
- package/.pi/sounds/error/3-buzzer-track.mp3 +0 -0
- package/.pi/sounds/notification/1-soft-notification-track.mp3 +0 -0
- package/.pi/sounds/project-sounds.json +25 -0
- package/.pi/sounds/reminder/1-soft-notification-track.mp3 +0 -0
- package/.pi/sounds/success/1-tada-track.mp3 +0 -0
- package/.pi/sounds/success/2-jobs-done-track.mp3 +0 -0
- package/.pi/sounds/success/3-yay-track.mp3 +0 -0
- package/CONTRIBUTING.md +116 -0
- package/README.md +32 -39
- package/biome.json +34 -0
- package/firecrawl/.env.template +58 -0
- package/firecrawl/README.md +49 -0
- package/firecrawl/docker-compose.yaml +201 -0
- package/firecrawl/searxng/searxng.env +3 -0
- package/firecrawl/searxng/settings.yml +85 -0
- package/lefthook.yml +8 -0
- package/package.json +55 -24
- package/vault/AGENTS.md +37 -0
- package/vault/wiki/_templates/comparison.md +39 -0
- package/vault/wiki/_templates/concept.md +40 -0
- package/vault/wiki/_templates/decision.md +21 -0
- package/vault/wiki/_templates/entity.md +32 -0
- package/vault/wiki/_templates/flow.md +14 -0
- package/vault/wiki/_templates/module.md +18 -0
- package/vault/wiki/_templates/question.md +31 -0
- package/vault/wiki/_templates/source.md +39 -0
- package/vault/wiki/concepts/AST-Aware Code Chunking.md +44 -0
- package/vault/wiki/concepts/Build-Time Prompt Compilation.md +107 -0
- package/vault/wiki/concepts/Context Engine (AI Coding).md +47 -0
- package/vault/wiki/concepts/Context-Aware System Reminders.md +61 -0
- package/vault/wiki/concepts/Contextualized Text Embedding.md +42 -0
- package/vault/wiki/concepts/Contractor vs Employee AI Model.md +55 -0
- package/vault/wiki/concepts/Dual-Model Agent Architecture.md +65 -0
- package/vault/wiki/concepts/Late Chunking vs Early Chunking.md +43 -0
- package/vault/wiki/concepts/Majority Vote Ensembling.md +68 -0
- package/vault/wiki/concepts/Meta-Harness.md +16 -0
- package/vault/wiki/concepts/Multi-Agent AI Coding Architecture.md +75 -0
- package/vault/wiki/concepts/Prompt Enhancement.md +90 -0
- package/vault/wiki/concepts/Prompt Renderer.md +89 -0
- package/vault/wiki/concepts/Semantic Codebase Indexing.md +67 -0
- package/vault/wiki/concepts/additive-config-hierarchy.md +16 -0
- package/vault/wiki/concepts/agent-artifacts-verifiable-deliverables.md +71 -0
- package/vault/wiki/concepts/agent-browser-browser-automation.md +99 -0
- package/vault/wiki/concepts/agent-codebase-interface.md +43 -0
- package/vault/wiki/concepts/agent-harness-architecture.md +67 -0
- package/vault/wiki/concepts/agent-loop-detection-patterns.md +133 -0
- package/vault/wiki/concepts/agent-search-enforcement.md +126 -0
- package/vault/wiki/concepts/agent-skills-ecosystem.md +74 -0
- package/vault/wiki/concepts/agent-skills-pattern.md +68 -0
- package/vault/wiki/concepts/agentic-harness-context-enforcement.md +91 -0
- package/vault/wiki/concepts/agentic-harness.md +34 -0
- package/vault/wiki/concepts/agentic-orchestration-pipeline.md +56 -0
- package/vault/wiki/concepts/agentic-search-no-embeddings.md +18 -0
- package/vault/wiki/concepts/anthropic-context-engineering.md +13 -0
- package/vault/wiki/concepts/antigravity-agent-first-architecture.md +61 -0
- package/vault/wiki/concepts/ast-compression.md +19 -0
- package/vault/wiki/concepts/ast-truncation.md +66 -0
- package/vault/wiki/concepts/barrel-files.md +37 -0
- package/vault/wiki/concepts/browser-harness-agent.md +41 -0
- package/vault/wiki/concepts/browser-subagent-visual-verification.md +82 -0
- package/vault/wiki/concepts/codebase-intelligence-ecosystem-comparison.md +192 -0
- package/vault/wiki/concepts/codebase-intelligence-harness-integration.md +161 -0
- package/vault/wiki/concepts/codebase-to-context-ingestion.md +46 -0
- package/vault/wiki/concepts/codex-harness-innovations.md +147 -0
- package/vault/wiki/concepts/consensus-debate-flow.md +17 -0
- package/vault/wiki/concepts/consensus-debate.md +206 -0
- package/vault/wiki/concepts/content-addressed-spec-identity.md +166 -0
- package/vault/wiki/concepts/context-anxiety.md +57 -0
- package/vault/wiki/concepts/context-compression-techniques.md +19 -0
- package/vault/wiki/concepts/context-continuity.md +22 -0
- package/vault/wiki/concepts/context-drift-in-agents.md +106 -0
- package/vault/wiki/concepts/context-engineering.md +62 -0
- package/vault/wiki/concepts/context-folding.md +67 -0
- package/vault/wiki/concepts/context-mode.md +38 -0
- package/vault/wiki/concepts/cursor-harness-innovations.md +107 -0
- package/vault/wiki/concepts/deterministic-session-compaction.md +79 -0
- package/vault/wiki/concepts/drift-detection-unified.md +296 -0
- package/vault/wiki/concepts/execution-feedback-loop.md +46 -0
- package/vault/wiki/concepts/feedforward-feedback-harness.md +60 -0
- package/vault/wiki/concepts/five-root-cause-metrics-sentrux.md +40 -0
- package/vault/wiki/concepts/fork-safe-spec-storage.md +89 -0
- package/vault/wiki/concepts/fts5-sandbox.md +19 -0
- package/vault/wiki/concepts/fuzzy-edit-matching.md +71 -0
- package/vault/wiki/concepts/gemini-cli-architecture.md +104 -0
- package/vault/wiki/concepts/generator-evaluator-architecture.md +64 -0
- package/vault/wiki/concepts/guardian-agent-pattern.md +67 -0
- package/vault/wiki/concepts/harness-configuration-layers.md +89 -0
- package/vault/wiki/concepts/harness-control-frameworks.md +155 -0
- package/vault/wiki/concepts/harness-engineering-first-principles.md +90 -0
- package/vault/wiki/concepts/harness-h-formalism.md +53 -0
- package/vault/wiki/concepts/hybrid-code-search.md +61 -0
- package/vault/wiki/concepts/inline-post-edit-validation.md +112 -0
- package/vault/wiki/concepts/legendary-engineering-patterns-harness.md +110 -0
- package/vault/wiki/concepts/lifecycle-hooks.md +94 -0
- package/vault/wiki/concepts/mcp-tool-routing.md +102 -0
- package/vault/wiki/concepts/memory-system-of-record-vs-ephemeral-cache.md +47 -0
- package/vault/wiki/concepts/meta-agent-context-pruning.md +151 -0
- package/vault/wiki/concepts/model-adaptive-harness.md +122 -0
- package/vault/wiki/concepts/model-routing-agents.md +101 -0
- package/vault/wiki/concepts/monorepo-architecture.md +45 -0
- package/vault/wiki/concepts/multi-agent-specialization.md +61 -0
- package/vault/wiki/concepts/permission-subsystem.md +16 -0
- package/vault/wiki/concepts/pi-messenger-analysis.md +243 -0
- package/vault/wiki/concepts/pi-vscode-extension-landscape.md +37 -0
- package/vault/wiki/concepts/policy-engine-pattern.md +78 -0
- package/vault/wiki/concepts/progressive-disclosure-agents.md +53 -0
- package/vault/wiki/concepts/progressive-skill-disclosure.md +17 -0
- package/vault/wiki/concepts/provider-native-prompting.md +203 -0
- package/vault/wiki/concepts/quality-signal-sentrux.md +37 -0
- package/vault/wiki/concepts/repo-map-ranking.md +42 -0
- package/vault/wiki/concepts/result-monad-error-handling.md +47 -0
- package/vault/wiki/concepts/safety-defense-in-depth.md +83 -0
- package/vault/wiki/concepts/sandbox-os-enforcement.md +18 -0
- package/vault/wiki/concepts/selective-debate-routing.md +70 -0
- package/vault/wiki/concepts/self-evolving-harness.md +60 -0
- package/vault/wiki/concepts/sentrux-mcp-integration.md +36 -0
- package/vault/wiki/concepts/sentrux-rules-engine.md +49 -0
- package/vault/wiki/concepts/shell-pattern-compression.md +24 -0
- package/vault/wiki/concepts/skill-first-architecture.md +166 -0
- package/vault/wiki/concepts/structured-compaction.md +78 -0
- package/vault/wiki/concepts/subagent-orchestration.md +17 -0
- package/vault/wiki/concepts/subagent-worktree-isolation.md +68 -0
- package/vault/wiki/concepts/superpowers-methodology.md +78 -0
- package/vault/wiki/concepts/think-in-code.md +73 -0
- package/vault/wiki/concepts/ts-execution-layer.md +100 -0
- package/vault/wiki/concepts/typescript-strict-mode.md +37 -0
- package/vault/wiki/concepts/vcc-conversation-compaction-for-pi.md +51 -0
- package/vault/wiki/concepts/verification-drift-detection.md +19 -0
- package/vault/wiki/consensus/consensus-records.md +58 -0
- package/vault/wiki/decisions/2026-04-30-pi-lean-ctx-native.md +122 -0
- package/vault/wiki/decisions/adr-008.md +40 -0
- package/vault/wiki/decisions/adr-009.md +46 -0
- package/vault/wiki/decisions/adr-010.md +55 -0
- package/vault/wiki/decisions/adr-011.md +165 -0
- package/vault/wiki/decisions/adr-012.md +102 -0
- package/vault/wiki/decisions/adr-013.md +59 -0
- package/vault/wiki/decisions/adr-014.md +73 -0
- package/vault/wiki/decisions/adr-015.md +81 -0
- package/vault/wiki/decisions/adr-016.md +91 -0
- package/vault/wiki/decisions/adr-017.md +79 -0
- package/vault/wiki/decisions/adr-018.md +100 -0
- package/vault/wiki/decisions/adr-019.md +75 -0
- package/vault/wiki/decisions/adr-020.md +106 -0
- package/vault/wiki/decisions/adr-021.md +86 -0
- package/vault/wiki/decisions/adr-022.md +113 -0
- package/vault/wiki/decisions/adr-023.md +113 -0
- package/vault/wiki/decisions/adr-024.md +73 -0
- package/vault/wiki/decisions/adr-025.md +130 -0
- package/vault/wiki/decisions/adr-026.md +56 -0
- package/vault/wiki/decisions/colocate-wiki.md +34 -0
- package/vault/wiki/entities/Anders Hejlsberg.md +29 -0
- package/vault/wiki/entities/Anthropic.md +17 -0
- package/vault/wiki/entities/Augment Code.md +49 -0
- package/vault/wiki/entities/Bjarne Stroustrup.md +26 -0
- package/vault/wiki/entities/Bolt.new (StackBlitz).md +39 -0
- package/vault/wiki/entities/Boris Cherny.md +11 -0
- package/vault/wiki/entities/Claude Code.md +19 -0
- package/vault/wiki/entities/Dennis Ritchie.md +26 -0
- package/vault/wiki/entities/Emergent Labs.md +32 -0
- package/vault/wiki/entities/Google Cloud.md +16 -0
- package/vault/wiki/entities/Guido van Rossum.md +28 -0
- package/vault/wiki/entities/Ken Thompson.md +28 -0
- package/vault/wiki/entities/Lee et al.md +16 -0
- package/vault/wiki/entities/Linus Torvalds.md +28 -0
- package/vault/wiki/entities/Lovable (company).md +40 -0
- package/vault/wiki/entities/Martin Fowler.md +16 -0
- package/vault/wiki/entities/Meng et al.md +16 -0
- package/vault/wiki/entities/OpenAI.md +16 -0
- package/vault/wiki/entities/Rocket.new.md +38 -0
- package/vault/wiki/entities/VILA-Lab.md +15 -0
- package/vault/wiki/entities/autodev-codebase.md +18 -0
- package/vault/wiki/entities/ck-tool.md +59 -0
- package/vault/wiki/entities/codesearch.md +18 -0
- package/vault/wiki/entities/disler-indydevdan.md +33 -0
- package/vault/wiki/entities/gsd-get-shit-done.md +56 -0
- package/vault/wiki/entities/javascript-runtimes.md +48 -0
- package/vault/wiki/entities/jesse-vincent.md +38 -0
- package/vault/wiki/entities/lean-ctx.md +32 -0
- package/vault/wiki/entities/opendev.md +41 -0
- package/vault/wiki/entities/ops-codegraph-tool.md +18 -0
- package/vault/wiki/entities/pi-coding-agent.md +53 -0
- package/vault/wiki/entities/sentrux.md +54 -0
- package/vault/wiki/entities/vgrep-tool.md +57 -0
- package/vault/wiki/entities/vitest.md +41 -0
- package/vault/wiki/flows/harness-wiki-pipeline.md +204 -0
- package/vault/wiki/hot.md +932 -0
- package/vault/wiki/index.md +437 -0
- package/vault/wiki/log.md +418 -0
- package/vault/wiki/meta/dashboard.md +30 -0
- package/vault/wiki/meta/lint-report-2026-04-30.md +86 -0
- package/vault/wiki/meta/lint-report-2026-05-02.md +251 -0
- package/vault/wiki/meta/overview.canvas +43 -0
- package/vault/wiki/modules/adversarial-verification.md +57 -0
- package/vault/wiki/modules/automated-observability.md +54 -0
- package/vault/wiki/modules/bench.md +20 -0
- package/vault/wiki/modules/extensions.md +23 -0
- package/vault/wiki/modules/grounding-checkpoints.md +62 -0
- package/vault/wiki/modules/harness-implementation-plan.md +345 -0
- package/vault/wiki/modules/harness-wiki-skill-mapping.md +135 -0
- package/vault/wiki/modules/harness.md +86 -0
- package/vault/wiki/modules/persistent-memory.md +85 -0
- package/vault/wiki/modules/schema-orchestration.md +68 -0
- package/vault/wiki/modules/skills.md +27 -0
- package/vault/wiki/modules/spec-hardening.md +58 -0
- package/vault/wiki/modules/structured-planning.md +53 -0
- package/vault/wiki/modules/think-in-code-enforcement.md +153 -0
- package/vault/wiki/modules/wiki-query-interface.md +64 -0
- package/vault/wiki/overview.md +51 -0
- package/vault/wiki/questions/Research-pi-vs-claude-code-agentic-orchestration-pipeline.md +87 -0
- package/vault/wiki/questions/Research-sentrux-dev.md +123 -0
- package/vault/wiki/questions/Research-superpowers-skill-for-agentic-coding-agents.md +164 -0
- package/vault/wiki/questions/Research: Augment Code Context Engine.md +244 -0
- package/vault/wiki/questions/Research: Automating Software Engineering - Lovable, Bolt, Emergent, Rocket.md +112 -0
- package/vault/wiki/questions/Research: Claude Code State-of-the-Art Harness Improvements.md +209 -0
- package/vault/wiki/questions/Research: Codex State-of-the-Art Harness Improvements.md +99 -0
- package/vault/wiki/questions/Research: Engineering Workflows of Legendary Programmers and AI Harness Mapping.md +107 -0
- package/vault/wiki/questions/Research: Fallow Codebase Intelligence Harness Integration.md +72 -0
- package/vault/wiki/questions/Research: Gemini CLI SOTA Harness Integration.md +166 -0
- package/vault/wiki/questions/Research: GitHub Issues as Harness Spec Storage.md +188 -0
- package/vault/wiki/questions/Research: Google Antigravity Harness Integration.md +120 -0
- package/vault/wiki/questions/Research: Meta-Agent Context Drift Detection.md +236 -0
- package/vault/wiki/questions/Research: Model-Adaptive Agent Harness Design.md +95 -0
- package/vault/wiki/questions/Research: Model-Specific Prompting Guides.md +165 -0
- package/vault/wiki/questions/Research: Prompt Renderer for Multi-Model Agent Harness.md +216 -0
- package/vault/wiki/questions/Research: Skill-First Harness Architecture.md +91 -0
- package/vault/wiki/questions/Research: TypeScript Best Practices and Codebase Structure.md +88 -0
- package/vault/wiki/questions/Research: TypeScript Execution Layer for Agent Tool Calling.md +81 -0
- package/vault/wiki/questions/Research: claude-mem over Obsidian for Harness Layer.md +71 -0
- package/vault/wiki/questions/Research: claude-mem over obsidian wiki as the knowledge base for our agentic harness pipeline. think from first principles. does this replace or complement our current setup? no hard feelings about previous decisions. gimme accurate points.md +80 -0
- package/vault/wiki/questions/Research: context-mode vs lean-ctx.md +72 -0
- package/vault/wiki/questions/Research: cursor.sh Harness Innovations.md +92 -0
- package/vault/wiki/questions/Research: executor.sh Harness Integration.md +170 -0
- package/vault/wiki/questions/Research: how GSD fits into our coding harness setup.md +97 -0
- package/vault/wiki/questions/Research: how claude-mem fits into our workflow. and whether it should replace obsidian in the codebase. no hard feelings about previous actions, rethink from first principles always.md +80 -0
- package/vault/wiki/questions/Research: pi-vcc.md +113 -0
- package/vault/wiki/questions/Research: semantic code search tools.md +69 -0
- package/vault/wiki/questions/Research: vcc extension for pi coding agent.md +73 -0
- package/vault/wiki/questions/how-to-enable-semantic-code-search-now.md +111 -0
- package/vault/wiki/questions/mvp-implementation-blueprint.md +552 -0
- package/vault/wiki/questions/research-agent-first-codebase-exploration.md +199 -0
- package/vault/wiki/questions/research-agentic-coding-harness-latest-papers.md +142 -0
- package/vault/wiki/questions/research-gitingest-gitreverse-integration.md +100 -0
- package/vault/wiki/questions/research-wozcode-token-reduction.md +67 -0
- package/vault/wiki/questions/resolved-context-pruning-inplace-vs-restart.md +95 -0
- package/vault/wiki/questions/resolved-context-window-economics.md +167 -0
- package/vault/wiki/questions/resolved-imad-debate-gating-transfer.md +126 -0
- package/vault/wiki/questions/resolved-mcp-tool-preference.md +112 -0
- package/vault/wiki/questions/resolved-small-model-meta-agents.md +107 -0
- package/vault/wiki/questions/resolved-treesitter-dynamic-languages.md +95 -0
- package/vault/wiki/sources/Auggie Context MCP Server.md +63 -0
- package/vault/wiki/sources/Augment Code Codacy AI Giants.md +61 -0
- package/vault/wiki/sources/Augment Code MCP SiliconAngle.md +49 -0
- package/vault/wiki/sources/Augment Code WorkOS ERC 2025.md +55 -0
- package/vault/wiki/sources/Augment Context Engine Official.md +71 -0
- package/vault/wiki/sources/Augment SWE-bench Agent GitHub.md +74 -0
- package/vault/wiki/sources/Augment SWE-bench Pro Blog.md +58 -0
- package/vault/wiki/sources/Source: AgentBus Jinja2 Prompt Pipelines.md +75 -0
- package/vault/wiki/sources/Source: Arxiv /342/200/224 Don't Break the Cache.md" +85 -0
- package/vault/wiki/sources/Source: Augment - Harness Engineering for AI Coding Agents.md +58 -0
- package/vault/wiki/sources/Source: Blake Crosley Agent Architecture Guide.md +100 -0
- package/vault/wiki/sources/Source: Bolt.new Architecture & Case Study.md +75 -0
- package/vault/wiki/sources/Source: Build-Time Prompt Compilation Architecture.md +107 -0
- package/vault/wiki/sources/Source: Claude API Agent Skills Overview.md +70 -0
- package/vault/wiki/sources/Source: Gemini CLI Changelogs.md +88 -0
- package/vault/wiki/sources/Source: Google Blog - Gemini CLI Announcement.md +57 -0
- package/vault/wiki/sources/Source: Google Gemini CLI Architecture Docs.md +53 -0
- package/vault/wiki/sources/Source: LangChain - Anatomy of Agent Harness.md +65 -0
- package/vault/wiki/sources/Source: Lovable Architecture & Clone Analysis.md +83 -0
- package/vault/wiki/sources/Source: Martin Fowler - Harness Engineering.md +70 -0
- package/vault/wiki/sources/Source: OpenAI Harness Engineering Five Principles.md +58 -0
- package/vault/wiki/sources/Source: OpenAI Harness Engineering /342/200/224 0 Lines of Human Code.md" +101 -0
- package/vault/wiki/sources/Source: OpenDev /342/200/224 Building AI Coding Agents for the Terminal.md" +100 -0
- package/vault/wiki/sources/Source: Render AI Coding Agents Benchmark 2025.md +53 -0
- package/vault/wiki/sources/Source: Rocket.new /342/200/224 Vibe Solutioning Platform.md" +70 -0
- package/vault/wiki/sources/Source: SwirlAI Agent Skills Progressive Disclosure.md +71 -0
- package/vault/wiki/sources/Source: TianPan Prompt Caching Architecture.md +89 -0
- package/vault/wiki/sources/Source: Vercel Labs agent-browser.md +155 -0
- package/vault/wiki/sources/Source: browser-harness CDP Harness.md +126 -0
- package/vault/wiki/sources/agent-drift-academic-paper.md +79 -0
- package/vault/wiki/sources/aider-repomap-tree-sitter.md +42 -0
- package/vault/wiki/sources/anthropic-compaction-api.md +58 -0
- package/vault/wiki/sources/anthropic-effective-harnesses.md +42 -0
- package/vault/wiki/sources/anthropic-prompt-best-practices.md +100 -0
- package/vault/wiki/sources/anthropic2026-harness-design.md +63 -0
- package/vault/wiki/sources/barrel-files-tkdodo.md +38 -0
- package/vault/wiki/sources/birth-of-unix-kernighan-interview.md +57 -0
- package/vault/wiki/sources/bockeler2026-harness-engineering.md +69 -0
- package/vault/wiki/sources/cast-code-chunking-paper.md +50 -0
- package/vault/wiki/sources/ck-semantic-search.md +78 -0
- package/vault/wiki/sources/claude-code-architecture-karaxai-2026.md +71 -0
- package/vault/wiki/sources/claude-code-architecture-qubytes-2026.md +50 -0
- package/vault/wiki/sources/claude-code-architecture-vila-lab-2026.md +64 -0
- package/vault/wiki/sources/claude-code-security-architecture-penligent-2026.md +70 -0
- package/vault/wiki/sources/claude-context-editing-docs.md +13 -0
- package/vault/wiki/sources/cloudflare-codemode.md +63 -0
- package/vault/wiki/sources/code-chunk-library-supermemory.md +63 -0
- package/vault/wiki/sources/codeact-apple-2024.md +62 -0
- package/vault/wiki/sources/codex-dsc-rfc-8573.md +41 -0
- package/vault/wiki/sources/codex-open-source-agent-2026.md +110 -0
- package/vault/wiki/sources/coir-code-retrieval-benchmark.md +51 -0
- package/vault/wiki/sources/colinmcnamara-context-optimization-codemode.md +48 -0
- package/vault/wiki/sources/context-folding-paper.md +61 -0
- package/vault/wiki/sources/context-mode-website.md +63 -0
- package/vault/wiki/sources/cursor-agent-best-practices-2026.md +62 -0
- package/vault/wiki/sources/cursor-fork-29b-2025.md +50 -0
- package/vault/wiki/sources/cursor-harness-april-2026.md +76 -0
- package/vault/wiki/sources/cursor-instant-apply-2024.md +45 -0
- package/vault/wiki/sources/cursor-shadow-workspace-2024.md +52 -0
- package/vault/wiki/sources/cursor-shipped-coding-agent-2026.md +53 -0
- package/vault/wiki/sources/cursor-vs-antigravity-2026.md +51 -0
- package/vault/wiki/sources/disler-pi-vs-claude-code.md +69 -0
- package/vault/wiki/sources/distill-deterministic-context-compression.md +53 -0
- package/vault/wiki/sources/embedding-models-benchmark-supermemory-2025.md +48 -0
- package/vault/wiki/sources/executor-rhyssullivan.md +122 -0
- package/vault/wiki/sources/fallow-rs-codebase-intelligence.md +125 -0
- package/vault/wiki/sources/fan2025-imad.md +60 -0
- package/vault/wiki/sources/forgecode-gpt5-agent-improvements.md +63 -0
- package/vault/wiki/sources/gemini-3-prompting-guide.md +78 -0
- package/vault/wiki/sources/gh-cli-sub-issue-rfc.md +50 -0
- package/vault/wiki/sources/gh-sub-issue-extension.md +72 -0
- package/vault/wiki/sources/github-fork-issues-discussion.md +44 -0
- package/vault/wiki/sources/github-issue-dependencies-docs.md +49 -0
- package/vault/wiki/sources/github-sub-issues-docs.md +51 -0
- package/vault/wiki/sources/gitingest.md +91 -0
- package/vault/wiki/sources/gitreverse.md +63 -0
- package/vault/wiki/sources/google-antigravity-official-blog.md +47 -0
- package/vault/wiki/sources/google-antigravity-wikipedia.md +53 -0
- package/vault/wiki/sources/gsd-codecentric-deep-dive.md +57 -0
- package/vault/wiki/sources/gsd-github-repo.md +51 -0
- package/vault/wiki/sources/gsd-hn-discussion.md +59 -0
- package/vault/wiki/sources/guido-python-design-philosophy.md +56 -0
- package/vault/wiki/sources/hejlsberg-7-learnings.md +48 -0
- package/vault/wiki/sources/ironclaw-drift-monitor.md +80 -0
- package/vault/wiki/sources/langsight-loop-detection.md +80 -0
- package/vault/wiki/sources/leanctx-website.md +69 -0
- package/vault/wiki/sources/lee2026-meta-harness.md +59 -0
- package/vault/wiki/sources/linux-kernel-coding-workflow.md +50 -0
- package/vault/wiki/sources/lou2026-autoharness.md +53 -0
- package/vault/wiki/sources/martin-fowler-harness-engineering.md +73 -0
- package/vault/wiki/sources/mcp-architecture-docs.md +13 -0
- package/vault/wiki/sources/meng2026-agent-harness-survey.md +79 -0
- package/vault/wiki/sources/mindstudio-four-agent-types.md +68 -0
- package/vault/wiki/sources/ms-chat-history-management.md +13 -0
- package/vault/wiki/sources/openai-prompt-guidance.md +104 -0
- package/vault/wiki/sources/openclaw-session-pruning.md +13 -0
- package/vault/wiki/sources/opencode-dcp.md +13 -0
- package/vault/wiki/sources/opendev-arxiv-2603.05344v1.md +79 -0
- package/vault/wiki/sources/openhands-platform.md +39 -0
- package/vault/wiki/sources/oss-guide-codebase-exploration.md +53 -0
- package/vault/wiki/sources/pi-compaction-extensions-ecosystem.md +102 -0
- package/vault/wiki/sources/pi-context-prune-github-repo.md +38 -0
- package/vault/wiki/sources/pi-mono-compaction-docs.md +38 -0
- package/vault/wiki/sources/pi-omni-compact-github-repo.md +50 -0
- package/vault/wiki/sources/pi-rtk-optimizer-github-repo.md +45 -0
- package/vault/wiki/sources/pi-vcc-github-repo.md +69 -0
- package/vault/wiki/sources/pi-vscode-marketplace.md +41 -0
- package/vault/wiki/sources/pi-vscode-model-provider-marketplace.md +39 -0
- package/vault/wiki/sources/py-tree-sitter.md +13 -0
- package/vault/wiki/sources/sentrux-dev-landing.md +40 -0
- package/vault/wiki/sources/sentrux-docs-pro-architecture.md +75 -0
- package/vault/wiki/sources/sentrux-docs-quality-signal.md +46 -0
- package/vault/wiki/sources/sentrux-docs-root-cause-metrics.md +57 -0
- package/vault/wiki/sources/sentrux-docs-rules-engine.md +58 -0
- package/vault/wiki/sources/sentrux-github-repo.md +56 -0
- package/vault/wiki/sources/superpowers-github-repo.md +56 -0
- package/vault/wiki/sources/superpowers-release-blog.md +54 -0
- package/vault/wiki/sources/superpowers-termdock-analysis.md +45 -0
- package/vault/wiki/sources/swe-agent-aci.md +42 -0
- package/vault/wiki/sources/swe-bench.md +45 -0
- package/vault/wiki/sources/swe-pruner-context-pruning.md +13 -0
- package/vault/wiki/sources/think-in-code-blog.md +48 -0
- package/vault/wiki/sources/tree-sitter-docs.md +13 -0
- package/vault/wiki/sources/ts-best-practices-2025-devto.md +42 -0
- package/vault/wiki/sources/ts-folder-structure-mingyang.md +58 -0
- package/vault/wiki/sources/ts-monorepo-koerselman.md +44 -0
- package/vault/wiki/sources/ts-result-error-handling-kkalamarski.md +52 -0
- package/vault/wiki/sources/ts-runtimes-comparison-betterstack.md +42 -0
- package/vault/wiki/sources/ts-strict-mode-rishikc.md +43 -0
- package/vault/wiki/sources/unix-philosophy.md +48 -0
- package/vault/wiki/sources/vectara-chunking-vs-embedding-naacl2025.md +39 -0
- package/vault/wiki/sources/vectara-guardian-agents.md +79 -0
- package/vault/wiki/sources/vgrep-semantic-search.md +76 -0
- package/vault/wiki/sources/vitest-official.md +41 -0
- package/vault/wiki/sources/vscode-pi-community-extension.md +40 -0
- package/vault/wiki/sources/wozcode.md +79 -0
- package/.agents/skills/compress/SKILL.md +0 -111
- package/.agents/skills/compress/scripts/__init__.py +0 -9
- package/.agents/skills/compress/scripts/__main__.py +0 -3
- package/.agents/skills/compress/scripts/benchmark.py +0 -78
- package/.agents/skills/compress/scripts/cli.py +0 -73
- package/.agents/skills/compress/scripts/compress.py +0 -227
- package/.agents/skills/compress/scripts/detect.py +0 -121
- package/.agents/skills/compress/scripts/validate.py +0 -189
- package/.agents/skills/emil-design-eng/SKILL.md +0 -679
- package/.agents/skills/lean-ctx/SKILL.md +0 -149
- package/.agents/skills/lean-ctx/scripts/install.sh +0 -95
- package/.agents/skills/scrapling-official/LICENSE.txt +0 -28
- package/.agents/skills/scrapling-official/SKILL.md +0 -390
- package/.agents/skills/scrapling-official/examples/01_fetcher_session.py +0 -26
- package/.agents/skills/scrapling-official/examples/02_dynamic_session.py +0 -26
- package/.agents/skills/scrapling-official/examples/03_stealthy_session.py +0 -26
- package/.agents/skills/scrapling-official/examples/04_spider.py +0 -58
- package/.agents/skills/scrapling-official/examples/README.md +0 -45
- package/.agents/skills/scrapling-official/references/fetching/choosing.md +0 -78
- package/.agents/skills/scrapling-official/references/fetching/dynamic.md +0 -352
- package/.agents/skills/scrapling-official/references/fetching/static.md +0 -432
- package/.agents/skills/scrapling-official/references/fetching/stealthy.md +0 -255
- package/.agents/skills/scrapling-official/references/mcp-server.md +0 -214
- package/.agents/skills/scrapling-official/references/migrating_from_beautifulsoup.md +0 -86
- package/.agents/skills/scrapling-official/references/parsing/adaptive.md +0 -212
- package/.agents/skills/scrapling-official/references/parsing/main_classes.md +0 -586
- package/.agents/skills/scrapling-official/references/parsing/selection.md +0 -494
- package/.agents/skills/scrapling-official/references/spiders/advanced.md +0 -344
- package/.agents/skills/scrapling-official/references/spiders/architecture.md +0 -94
- package/.agents/skills/scrapling-official/references/spiders/getting-started.md +0 -164
- package/.agents/skills/scrapling-official/references/spiders/proxy-blocking.md +0 -235
- package/.agents/skills/scrapling-official/references/spiders/requests-responses.md +0 -196
- package/.agents/skills/scrapling-official/references/spiders/sessions.md +0 -205
- package/PLAN.md +0 -11
- package/extensions/lean-ctx-enforce.ts +0 -166
- package/skills-lock.json +0 -35
- package/wiki/README.md +0 -19
- package/wiki/decisions/0001-establish-project-wiki-and-decision-record-format.md +0 -25
- package/wiki/decisions/0002-add-project-banner-to-readme.md +0 -26
- package/wiki/decisions/0003-remove-redundant-readme-title-heading.md +0 -26
- package/wiki/decisions/0004-publish-package-to-npm-as-ultimate-pi.md +0 -26
- package/wiki/decisions/0005-automate-npm-publish-with-github-actions.md +0 -27
- package/wiki/decisions/0006-switch-to-npm-trusted-publishing.md +0 -26
- package/wiki/decisions/0007-use-absolute-banner-url-for-npm-readme-rendering.md +0 -26
- package/wiki/decisions/0008-rename-banner-asset-for-cache-busting.md +0 -26
- package/wiki/decisions/0009-force-oidc-path-by-clearing-node-auth-token-in-publish-step.md +0 -25
- package/wiki/decisions/0010-simplify-setup-node-for-npm-trusted-publishing.md +0 -26
- package/wiki/decisions/0011-add-noop-workflow-change-to-force-fresh-publish-run.md +0 -25
- package/wiki/decisions/0012-align-workflow-runtime-with-npm-trusted-publishing-requirements.md +0 -26
- package/wiki/decisions/0013-add-package-repository-url-for-provenance-validation.md +0 -25
|
@@ -1,432 +0,0 @@
|
|
|
1
|
-
# HTTP requests
|
|
2
|
-
|
|
3
|
-
The `Fetcher` class provides rapid and lightweight HTTP requests using the high-performance `curl_cffi` library with a lot of stealth capabilities.
|
|
4
|
-
|
|
5
|
-
## Basic Usage
|
|
6
|
-
Import the Fetcher (same import pattern for all fetchers):
|
|
7
|
-
|
|
8
|
-
```python
|
|
9
|
-
>>> from scrapling.fetchers import Fetcher
|
|
10
|
-
```
|
|
11
|
-
Check out how to configure the parsing options [here](choosing.md#parser-configuration-in-all-fetchers)
|
|
12
|
-
|
|
13
|
-
### Shared arguments
|
|
14
|
-
All methods for making requests here share some arguments, so let's discuss them first.
|
|
15
|
-
|
|
16
|
-
- **url**: The targeted URL
|
|
17
|
-
- **stealthy_headers**: If enabled (default), it creates and adds real browser headers. It also sets a Google referer header.
|
|
18
|
-
- **follow_redirects**: Controls redirect behavior. **Defaults to `"safe"`**, which follows redirects but rejects those targeting internal/private IPs (SSRF protection). Pass `True` to follow all redirects without restriction, or `False` to disable redirects entirely.
|
|
19
|
-
- **timeout**: The number of seconds to wait for each request to be finished. **Defaults to 30 seconds**.
|
|
20
|
-
- **retries**: The number of retries that the fetcher will do for failed requests. **Defaults to three retries**.
|
|
21
|
-
- **retry_delay**: Number of seconds to wait between retry attempts. **Defaults to 1 second**.
|
|
22
|
-
- **impersonate**: Impersonate specific browsers' TLS fingerprints. Accepts browser strings or a list of them like `"chrome110"`, `"firefox102"`, `"safari15_5"` to use specific versions or `"chrome"`, `"firefox"`, `"safari"`, `"edge"` to automatically use the latest version available. This makes your requests appear to come from real browsers at the TLS level. If you pass it a list of strings, it will choose a random one with each request. **Defaults to the latest available Chrome version.**
|
|
23
|
-
- **http3**: Use HTTP/3 protocol for requests. **Defaults to False**. It might be problematic if used with `impersonate`.
|
|
24
|
-
- **cookies**: Cookies to use in the request. Can be a dictionary of `name→value` or a list of dictionaries.
|
|
25
|
-
- **proxy**: As the name implies, the proxy for this request is used to route all traffic (HTTP and HTTPS). The format accepted here is `http://username:password@localhost:8030`.
|
|
26
|
-
- **proxy_auth**: HTTP basic auth for proxy, tuple of (username, password).
|
|
27
|
-
- **proxies**: Dict of proxies to use. Format: `{"http": proxy_url, "https": proxy_url}`.
|
|
28
|
-
- **proxy_rotator**: A `ProxyRotator` instance for automatic proxy rotation. Cannot be combined with `proxy` or `proxies`.
|
|
29
|
-
- **headers**: Headers to include in the request. Can override any header generated by the `stealthy_headers` argument
|
|
30
|
-
- **max_redirects**: Maximum number of redirects. **Defaults to 30**, use -1 for unlimited.
|
|
31
|
-
- **verify**: Whether to verify HTTPS certificates. **Defaults to True**.
|
|
32
|
-
- **cert**: Tuple of (cert, key) filenames for the client certificate.
|
|
33
|
-
- **selector_config**: A dictionary of custom parsing arguments to be used when creating the final `Selector`/`Response` class.
|
|
34
|
-
|
|
35
|
-
**Notes:**
|
|
36
|
-
1. The currently available browsers to impersonate are (`"edge"`, `"chrome"`, `"chrome_android"`, `"safari"`, `"safari_beta"`, `"safari_ios"`, `"safari_ios_beta"`, `"firefox"`, `"tor"`)
|
|
37
|
-
2. The available browsers to impersonate, along with their corresponding versions, are automatically displayed in the argument autocompletion and updated with each `curl_cffi` update.
|
|
38
|
-
3. If any of the arguments `impersonate` or `stealthy_headers` are enabled, the fetchers will automatically generate real browser headers that match the browser version used.
|
|
39
|
-
|
|
40
|
-
Other than this, for further customization, you can pass any arguments that `curl_cffi` supports for any method if that method doesn't already support them.
|
|
41
|
-
|
|
42
|
-
### HTTP Methods
|
|
43
|
-
There are additional arguments for each method, depending on the method, such as `params` for GET requests and `data`/`json` for POST/PUT/DELETE requests.
|
|
44
|
-
|
|
45
|
-
Examples are the best way to explain this:
|
|
46
|
-
|
|
47
|
-
> Hence: `OPTIONS` and `HEAD` methods are not supported.
|
|
48
|
-
#### GET
|
|
49
|
-
```python
|
|
50
|
-
>>> from scrapling.fetchers import Fetcher
|
|
51
|
-
>>> # Basic GET
|
|
52
|
-
>>> page = Fetcher.get('https://example.com')
|
|
53
|
-
>>> page = Fetcher.get('https://scrapling.requestcatcher.com/get', stealthy_headers=True)
|
|
54
|
-
>>> page = Fetcher.get('https://scrapling.requestcatcher.com/get', proxy='http://username:password@localhost:8030')
|
|
55
|
-
>>> # With parameters
|
|
56
|
-
>>> page = Fetcher.get('https://example.com/search', params={'q': 'query'})
|
|
57
|
-
>>>
|
|
58
|
-
>>> # With headers
|
|
59
|
-
>>> page = Fetcher.get('https://example.com', headers={'User-Agent': 'Custom/1.0'})
|
|
60
|
-
>>> # Basic HTTP authentication
|
|
61
|
-
>>> page = Fetcher.get("https://example.com", auth=("my_user", "password123"))
|
|
62
|
-
>>> # Browser impersonation
|
|
63
|
-
>>> page = Fetcher.get('https://example.com', impersonate='chrome')
|
|
64
|
-
>>> # HTTP/3 support
|
|
65
|
-
>>> page = Fetcher.get('https://example.com', http3=True)
|
|
66
|
-
```
|
|
67
|
-
And for asynchronous requests, it's a small adjustment
|
|
68
|
-
```python
|
|
69
|
-
>>> from scrapling.fetchers import AsyncFetcher
|
|
70
|
-
>>> # Basic GET
|
|
71
|
-
>>> page = await AsyncFetcher.get('https://example.com')
|
|
72
|
-
>>> page = await AsyncFetcher.get('https://scrapling.requestcatcher.com/get', stealthy_headers=True)
|
|
73
|
-
>>> page = await AsyncFetcher.get('https://scrapling.requestcatcher.com/get', proxy='http://username:password@localhost:8030')
|
|
74
|
-
>>> # With parameters
|
|
75
|
-
>>> page = await AsyncFetcher.get('https://example.com/search', params={'q': 'query'})
|
|
76
|
-
>>>
|
|
77
|
-
>>> # With headers
|
|
78
|
-
>>> page = await AsyncFetcher.get('https://example.com', headers={'User-Agent': 'Custom/1.0'})
|
|
79
|
-
>>> # Basic HTTP authentication
|
|
80
|
-
>>> page = await AsyncFetcher.get("https://example.com", auth=("my_user", "password123"))
|
|
81
|
-
>>> # Browser impersonation
|
|
82
|
-
>>> page = await AsyncFetcher.get('https://example.com', impersonate='chrome110')
|
|
83
|
-
>>> # HTTP/3 support
|
|
84
|
-
>>> page = await AsyncFetcher.get('https://example.com', http3=True)
|
|
85
|
-
```
|
|
86
|
-
The `page` object in all cases is a [Response](choosing.md#response-object) object, which is a [Selector](parsing/main_classes.md#selector), so you can use it directly
|
|
87
|
-
```python
|
|
88
|
-
>>> page.css('.something.something')
|
|
89
|
-
|
|
90
|
-
>>> page = Fetcher.get('https://api.github.com/events')
|
|
91
|
-
>>> page.json()
|
|
92
|
-
[{'id': '<redacted>',
|
|
93
|
-
'type': 'PushEvent',
|
|
94
|
-
'actor': {'id': '<redacted>',
|
|
95
|
-
'login': '<redacted>',
|
|
96
|
-
'display_login': '<redacted>',
|
|
97
|
-
'gravatar_id': '',
|
|
98
|
-
'url': 'https://api.github.com/users/<redacted>',
|
|
99
|
-
'avatar_url': 'https://avatars.githubusercontent.com/u/<redacted>'},
|
|
100
|
-
'repo': {'id': '<redacted>',
|
|
101
|
-
...
|
|
102
|
-
```
|
|
103
|
-
#### POST
|
|
104
|
-
```python
|
|
105
|
-
>>> from scrapling.fetchers import Fetcher
|
|
106
|
-
>>> # Basic POST
|
|
107
|
-
>>> page = Fetcher.post('https://scrapling.requestcatcher.com/post', data={'key': 'value'}, params={'q': 'query'})
|
|
108
|
-
>>> page = Fetcher.post('https://scrapling.requestcatcher.com/post', data={'key': 'value'}, stealthy_headers=True)
|
|
109
|
-
>>> page = Fetcher.post('https://scrapling.requestcatcher.com/post', data={'key': 'value'}, proxy='http://username:password@localhost:8030', impersonate="chrome")
|
|
110
|
-
>>> # Another example of form-encoded data
|
|
111
|
-
>>> page = Fetcher.post('https://example.com/submit', data={'username': 'user', 'password': 'pass'}, http3=True)
|
|
112
|
-
>>> # JSON data
|
|
113
|
-
>>> page = Fetcher.post('https://example.com/api', json={'key': 'value'})
|
|
114
|
-
```
|
|
115
|
-
And for asynchronous requests, it's a small adjustment
|
|
116
|
-
```python
|
|
117
|
-
>>> from scrapling.fetchers import AsyncFetcher
|
|
118
|
-
>>> # Basic POST
|
|
119
|
-
>>> page = await AsyncFetcher.post('https://scrapling.requestcatcher.com/post', data={'key': 'value'})
|
|
120
|
-
>>> page = await AsyncFetcher.post('https://scrapling.requestcatcher.com/post', data={'key': 'value'}, stealthy_headers=True)
|
|
121
|
-
>>> page = await AsyncFetcher.post('https://scrapling.requestcatcher.com/post', data={'key': 'value'}, proxy='http://username:password@localhost:8030', impersonate="chrome")
|
|
122
|
-
>>> # Another example of form-encoded data
|
|
123
|
-
>>> page = await AsyncFetcher.post('https://example.com/submit', data={'username': 'user', 'password': 'pass'}, http3=True)
|
|
124
|
-
>>> # JSON data
|
|
125
|
-
>>> page = await AsyncFetcher.post('https://example.com/api', json={'key': 'value'})
|
|
126
|
-
```
|
|
127
|
-
#### PUT
|
|
128
|
-
```python
|
|
129
|
-
>>> from scrapling.fetchers import Fetcher
|
|
130
|
-
>>> # Basic PUT
|
|
131
|
-
>>> page = Fetcher.put('https://example.com/update', data={'status': 'updated'})
|
|
132
|
-
>>> page = Fetcher.put('https://example.com/update', data={'status': 'updated'}, stealthy_headers=True, impersonate="chrome")
|
|
133
|
-
>>> page = Fetcher.put('https://example.com/update', data={'status': 'updated'}, proxy='http://username:password@localhost:8030')
|
|
134
|
-
>>> # Another example of form-encoded data
|
|
135
|
-
>>> page = Fetcher.put("https://scrapling.requestcatcher.com/put", data={'key': ['value1', 'value2']})
|
|
136
|
-
```
|
|
137
|
-
And for asynchronous requests, it's a small adjustment
|
|
138
|
-
```python
|
|
139
|
-
>>> from scrapling.fetchers import AsyncFetcher
|
|
140
|
-
>>> # Basic PUT
|
|
141
|
-
>>> page = await AsyncFetcher.put('https://example.com/update', data={'status': 'updated'})
|
|
142
|
-
>>> page = await AsyncFetcher.put('https://example.com/update', data={'status': 'updated'}, stealthy_headers=True, impersonate="chrome")
|
|
143
|
-
>>> page = await AsyncFetcher.put('https://example.com/update', data={'status': 'updated'}, proxy='http://username:password@localhost:8030')
|
|
144
|
-
>>> # Another example of form-encoded data
|
|
145
|
-
>>> page = await AsyncFetcher.put("https://scrapling.requestcatcher.com/put", data={'key': ['value1', 'value2']})
|
|
146
|
-
```
|
|
147
|
-
|
|
148
|
-
#### DELETE
|
|
149
|
-
```python
|
|
150
|
-
>>> from scrapling.fetchers import Fetcher
|
|
151
|
-
>>> page = Fetcher.delete('https://example.com/resource/123')
|
|
152
|
-
>>> page = Fetcher.delete('https://example.com/resource/123', stealthy_headers=True, impersonate="chrome")
|
|
153
|
-
>>> page = Fetcher.delete('https://example.com/resource/123', proxy='http://username:password@localhost:8030')
|
|
154
|
-
```
|
|
155
|
-
And for asynchronous requests, it's a small adjustment
|
|
156
|
-
```python
|
|
157
|
-
>>> from scrapling.fetchers import AsyncFetcher
|
|
158
|
-
>>> page = await AsyncFetcher.delete('https://example.com/resource/123')
|
|
159
|
-
>>> page = await AsyncFetcher.delete('https://example.com/resource/123', stealthy_headers=True, impersonate="chrome")
|
|
160
|
-
>>> page = await AsyncFetcher.delete('https://example.com/resource/123', proxy='http://username:password@localhost:8030')
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
## Session Management
|
|
164
|
-
|
|
165
|
-
For making multiple requests with the same configuration, use the `FetcherSession` class. It can be used in both synchronous and asynchronous code without issue; the class automatically detects and changes the session type, without requiring a different import.
|
|
166
|
-
|
|
167
|
-
The `FetcherSession` class can accept nearly all the arguments that the methods can take, which enables you to specify a config for the entire session and later choose a different config for one of the requests effortlessly, as you will see in the following examples.
|
|
168
|
-
|
|
169
|
-
```python
|
|
170
|
-
from scrapling.fetchers import FetcherSession
|
|
171
|
-
|
|
172
|
-
# Create a session with default configuration
|
|
173
|
-
with FetcherSession(
|
|
174
|
-
impersonate='chrome',
|
|
175
|
-
http3=True,
|
|
176
|
-
stealthy_headers=True,
|
|
177
|
-
timeout=30,
|
|
178
|
-
retries=3
|
|
179
|
-
) as session:
|
|
180
|
-
# Make multiple requests with the same settings and the same cookies
|
|
181
|
-
page1 = session.get('https://scrapling.requestcatcher.com/get')
|
|
182
|
-
page2 = session.post('https://scrapling.requestcatcher.com/post', data={'key': 'value'})
|
|
183
|
-
page3 = session.get('https://api.github.com/events')
|
|
184
|
-
|
|
185
|
-
# All requests share the same session and connection pool
|
|
186
|
-
```
|
|
187
|
-
|
|
188
|
-
You can also use a `ProxyRotator` with `FetcherSession` for automatic proxy rotation across requests:
|
|
189
|
-
|
|
190
|
-
```python
|
|
191
|
-
from scrapling.fetchers import FetcherSession, ProxyRotator
|
|
192
|
-
|
|
193
|
-
rotator = ProxyRotator([
|
|
194
|
-
'http://proxy1:8080',
|
|
195
|
-
'http://proxy2:8080',
|
|
196
|
-
'http://proxy3:8080',
|
|
197
|
-
])
|
|
198
|
-
|
|
199
|
-
with FetcherSession(proxy_rotator=rotator, impersonate='chrome') as session:
|
|
200
|
-
# Each request automatically uses the next proxy in rotation
|
|
201
|
-
page1 = session.get('https://example.com/page1')
|
|
202
|
-
page2 = session.get('https://example.com/page2')
|
|
203
|
-
|
|
204
|
-
# You can check which proxy was used via the response metadata
|
|
205
|
-
print(page1.meta['proxy'])
|
|
206
|
-
```
|
|
207
|
-
|
|
208
|
-
You can also override the session proxy (or rotator) for a specific request by passing `proxy=` directly to the request method:
|
|
209
|
-
|
|
210
|
-
```python
|
|
211
|
-
with FetcherSession(proxy='http://default-proxy:8080') as session:
|
|
212
|
-
# Uses the session proxy
|
|
213
|
-
page1 = session.get('https://example.com/page1')
|
|
214
|
-
|
|
215
|
-
# Override the proxy for this specific request
|
|
216
|
-
page2 = session.get('https://example.com/page2', proxy='http://special-proxy:9090')
|
|
217
|
-
```
|
|
218
|
-
|
|
219
|
-
And here's an async example
|
|
220
|
-
|
|
221
|
-
```python
|
|
222
|
-
async with FetcherSession(impersonate='firefox', http3=True) as session:
|
|
223
|
-
# All standard HTTP methods available
|
|
224
|
-
response = await session.get('https://example.com')
|
|
225
|
-
response = await session.post('https://scrapling.requestcatcher.com/post', json={'data': 'value'})
|
|
226
|
-
response = await session.put('https://scrapling.requestcatcher.com/put', data={'update': 'info'})
|
|
227
|
-
response = await session.delete('https://scrapling.requestcatcher.com/delete')
|
|
228
|
-
```
|
|
229
|
-
or better
|
|
230
|
-
```python
|
|
231
|
-
import asyncio
|
|
232
|
-
from scrapling.fetchers import FetcherSession
|
|
233
|
-
|
|
234
|
-
# Async session usage
|
|
235
|
-
async with FetcherSession(impersonate="safari") as session:
|
|
236
|
-
urls = ['https://example.com/page1', 'https://example.com/page2']
|
|
237
|
-
|
|
238
|
-
tasks = [
|
|
239
|
-
session.get(url) for url in urls
|
|
240
|
-
]
|
|
241
|
-
|
|
242
|
-
pages = await asyncio.gather(*tasks)
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
The `Fetcher` class uses `FetcherSession` to create a temporary session with each request you make.
|
|
246
|
-
|
|
247
|
-
### Session Benefits
|
|
248
|
-
|
|
249
|
-
- **A lot faster**: 10 times faster than creating a single session for each request
|
|
250
|
-
- **Cookie persistence**: Automatic cookie handling across requests
|
|
251
|
-
- **Resource efficiency**: Better memory and CPU usage for multiple requests
|
|
252
|
-
- **Centralized configuration**: Single place to manage request settings
|
|
253
|
-
|
|
254
|
-
## Examples
|
|
255
|
-
Some well-rounded examples to aid newcomers to Web Scraping
|
|
256
|
-
|
|
257
|
-
### Basic HTTP Request
|
|
258
|
-
|
|
259
|
-
```python
|
|
260
|
-
from scrapling.fetchers import Fetcher
|
|
261
|
-
|
|
262
|
-
# Make a request
|
|
263
|
-
page = Fetcher.get('https://example.com')
|
|
264
|
-
|
|
265
|
-
# Check the status
|
|
266
|
-
if page.status == 200:
|
|
267
|
-
# Extract title
|
|
268
|
-
title = page.css('title::text').get()
|
|
269
|
-
print(f"Page title: {title}")
|
|
270
|
-
|
|
271
|
-
# Extract all links
|
|
272
|
-
links = page.css('a::attr(href)').getall()
|
|
273
|
-
print(f"Found {len(links)} links")
|
|
274
|
-
```
|
|
275
|
-
|
|
276
|
-
### Product Scraping
|
|
277
|
-
|
|
278
|
-
```python
|
|
279
|
-
from scrapling.fetchers import Fetcher
|
|
280
|
-
|
|
281
|
-
def scrape_products():
|
|
282
|
-
page = Fetcher.get('https://example.com/products')
|
|
283
|
-
|
|
284
|
-
# Find all product elements
|
|
285
|
-
products = page.css('.product')
|
|
286
|
-
|
|
287
|
-
results = []
|
|
288
|
-
for product in products:
|
|
289
|
-
results.append({
|
|
290
|
-
'title': product.css('.title::text').get(),
|
|
291
|
-
'price': product.css('.price::text').re_first(r'\d+\.\d{2}'),
|
|
292
|
-
'description': product.css('.description::text').get(),
|
|
293
|
-
'in_stock': product.has_class('in-stock')
|
|
294
|
-
})
|
|
295
|
-
|
|
296
|
-
return results
|
|
297
|
-
```
|
|
298
|
-
|
|
299
|
-
### Downloading Files
|
|
300
|
-
|
|
301
|
-
```python
|
|
302
|
-
from scrapling.fetchers import Fetcher
|
|
303
|
-
|
|
304
|
-
page = Fetcher.get('https://raw.githubusercontent.com/D4Vinci/Scrapling/main/images/main_cover.png')
|
|
305
|
-
with open(file='main_cover.png', mode='wb') as f:
|
|
306
|
-
f.write(page.body)
|
|
307
|
-
```
|
|
308
|
-
|
|
309
|
-
### Pagination Handling
|
|
310
|
-
|
|
311
|
-
```python
|
|
312
|
-
from scrapling.fetchers import Fetcher
|
|
313
|
-
|
|
314
|
-
def scrape_all_pages():
|
|
315
|
-
base_url = 'https://example.com/products?page={}'
|
|
316
|
-
page_num = 1
|
|
317
|
-
all_products = []
|
|
318
|
-
|
|
319
|
-
while True:
|
|
320
|
-
# Get current page
|
|
321
|
-
page = Fetcher.get(base_url.format(page_num))
|
|
322
|
-
|
|
323
|
-
# Find products
|
|
324
|
-
products = page.css('.product')
|
|
325
|
-
if not products:
|
|
326
|
-
break
|
|
327
|
-
|
|
328
|
-
# Process products
|
|
329
|
-
for product in products:
|
|
330
|
-
all_products.append({
|
|
331
|
-
'name': product.css('.name::text').get(),
|
|
332
|
-
'price': product.css('.price::text').get()
|
|
333
|
-
})
|
|
334
|
-
|
|
335
|
-
# Next page
|
|
336
|
-
page_num += 1
|
|
337
|
-
|
|
338
|
-
return all_products
|
|
339
|
-
```
|
|
340
|
-
|
|
341
|
-
### Form Submission
|
|
342
|
-
|
|
343
|
-
```python
|
|
344
|
-
from scrapling.fetchers import Fetcher
|
|
345
|
-
|
|
346
|
-
# Submit login form
|
|
347
|
-
response = Fetcher.post(
|
|
348
|
-
'https://example.com/login',
|
|
349
|
-
data={
|
|
350
|
-
'username': 'user@example.com',
|
|
351
|
-
'password': 'password123'
|
|
352
|
-
}
|
|
353
|
-
)
|
|
354
|
-
|
|
355
|
-
# Check login success
|
|
356
|
-
if response.status == 200:
|
|
357
|
-
# Extract user info
|
|
358
|
-
user_name = response.css('.user-name::text').get()
|
|
359
|
-
print(f"Logged in as: {user_name}")
|
|
360
|
-
```
|
|
361
|
-
|
|
362
|
-
### Table Extraction
|
|
363
|
-
|
|
364
|
-
```python
|
|
365
|
-
from scrapling.fetchers import Fetcher
|
|
366
|
-
|
|
367
|
-
def extract_table():
|
|
368
|
-
page = Fetcher.get('https://example.com/data')
|
|
369
|
-
|
|
370
|
-
# Find table
|
|
371
|
-
table = page.css('table')[0]
|
|
372
|
-
|
|
373
|
-
# Extract headers
|
|
374
|
-
headers = [
|
|
375
|
-
th.text for th in table.css('thead th')
|
|
376
|
-
]
|
|
377
|
-
|
|
378
|
-
# Extract rows
|
|
379
|
-
rows = []
|
|
380
|
-
for row in table.css('tbody tr'):
|
|
381
|
-
cells = [td.text for td in row.css('td')]
|
|
382
|
-
rows.append(dict(zip(headers, cells)))
|
|
383
|
-
|
|
384
|
-
return rows
|
|
385
|
-
```
|
|
386
|
-
|
|
387
|
-
### Navigation Menu
|
|
388
|
-
|
|
389
|
-
```python
|
|
390
|
-
from scrapling.fetchers import Fetcher
|
|
391
|
-
|
|
392
|
-
def extract_menu():
|
|
393
|
-
page = Fetcher.get('https://example.com')
|
|
394
|
-
|
|
395
|
-
# Find navigation
|
|
396
|
-
nav = page.css('nav')[0]
|
|
397
|
-
|
|
398
|
-
menu = {}
|
|
399
|
-
for item in nav.css('li'):
|
|
400
|
-
links = item.css('a')
|
|
401
|
-
if links:
|
|
402
|
-
link = links[0]
|
|
403
|
-
menu[link.text] = {
|
|
404
|
-
'url': link['href'],
|
|
405
|
-
'has_submenu': bool(item.css('.submenu'))
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
return menu
|
|
409
|
-
```
|
|
410
|
-
|
|
411
|
-
## When to Use
|
|
412
|
-
|
|
413
|
-
Use `Fetcher` when:
|
|
414
|
-
|
|
415
|
-
- Need rapid HTTP requests.
|
|
416
|
-
- Want minimal overhead.
|
|
417
|
-
- Don't need JavaScript execution (the website can be scraped through requests).
|
|
418
|
-
- Need some stealth features (ex, the targeted website is using protection but doesn't use JavaScript challenges).
|
|
419
|
-
|
|
420
|
-
Use `FetcherSession` when:
|
|
421
|
-
|
|
422
|
-
- Making multiple requests to the same or different sites.
|
|
423
|
-
- Need to maintain cookies/authentication between requests.
|
|
424
|
-
- Want connection pooling for better performance.
|
|
425
|
-
- Require consistent configuration across requests.
|
|
426
|
-
- Working with APIs that require a session state.
|
|
427
|
-
|
|
428
|
-
Use other fetchers when:
|
|
429
|
-
|
|
430
|
-
- Need browser automation.
|
|
431
|
-
- Need advanced anti-bot/stealth capabilities.
|
|
432
|
-
- Need JavaScript support or interacting with dynamic content
|