hyperresearch 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. hyperresearch-0.2.0/.github/workflows/ci.yml +23 -0
  2. hyperresearch-0.2.0/.github/workflows/publish.yml +47 -0
  3. hyperresearch-0.2.0/.gitignore +13 -0
  4. hyperresearch-0.2.0/CHANGELOG.md +72 -0
  5. hyperresearch-0.2.0/CONTRIBUTING.md +76 -0
  6. hyperresearch-0.2.0/LICENSE +21 -0
  7. hyperresearch-0.2.0/PKG-INFO +172 -0
  8. hyperresearch-0.2.0/PLAN.md +330 -0
  9. hyperresearch-0.2.0/README.md +132 -0
  10. hyperresearch-0.2.0/pyproject.toml +78 -0
  11. hyperresearch-0.2.0/src/hyperresearch/__init__.py +3 -0
  12. hyperresearch-0.2.0/src/hyperresearch/__main__.py +5 -0
  13. hyperresearch-0.2.0/src/hyperresearch/cli/__init__.py +117 -0
  14. hyperresearch-0.2.0/src/hyperresearch/cli/_output.py +131 -0
  15. hyperresearch-0.2.0/src/hyperresearch/cli/assets.py +114 -0
  16. hyperresearch-0.2.0/src/hyperresearch/cli/batch.py +301 -0
  17. hyperresearch-0.2.0/src/hyperresearch/cli/config_cmd.py +143 -0
  18. hyperresearch-0.2.0/src/hyperresearch/cli/dedup.py +138 -0
  19. hyperresearch-0.2.0/src/hyperresearch/cli/export.py +117 -0
  20. hyperresearch-0.2.0/src/hyperresearch/cli/fetch.py +294 -0
  21. hyperresearch-0.2.0/src/hyperresearch/cli/fetch_batch.py +231 -0
  22. hyperresearch-0.2.0/src/hyperresearch/cli/git_cmd.py +161 -0
  23. hyperresearch-0.2.0/src/hyperresearch/cli/graph.py +286 -0
  24. hyperresearch-0.2.0/src/hyperresearch/cli/import_cmd.py +85 -0
  25. hyperresearch-0.2.0/src/hyperresearch/cli/index.py +77 -0
  26. hyperresearch-0.2.0/src/hyperresearch/cli/install.py +160 -0
  27. hyperresearch-0.2.0/src/hyperresearch/cli/link.py +81 -0
  28. hyperresearch-0.2.0/src/hyperresearch/cli/lint.py +212 -0
  29. hyperresearch-0.2.0/src/hyperresearch/cli/main.py +170 -0
  30. hyperresearch-0.2.0/src/hyperresearch/cli/mcp_cmd.py +19 -0
  31. hyperresearch-0.2.0/src/hyperresearch/cli/note.py +481 -0
  32. hyperresearch-0.2.0/src/hyperresearch/cli/repair.py +254 -0
  33. hyperresearch-0.2.0/src/hyperresearch/cli/research.py +313 -0
  34. hyperresearch-0.2.0/src/hyperresearch/cli/search.py +125 -0
  35. hyperresearch-0.2.0/src/hyperresearch/cli/serve.py +26 -0
  36. hyperresearch-0.2.0/src/hyperresearch/cli/setup.py +356 -0
  37. hyperresearch-0.2.0/src/hyperresearch/cli/sources.py +97 -0
  38. hyperresearch-0.2.0/src/hyperresearch/cli/tag.py +121 -0
  39. hyperresearch-0.2.0/src/hyperresearch/cli/template.py +58 -0
  40. hyperresearch-0.2.0/src/hyperresearch/cli/topic.py +163 -0
  41. hyperresearch-0.2.0/src/hyperresearch/cli/watch.py +113 -0
  42. hyperresearch-0.2.0/src/hyperresearch/core/__init__.py +1 -0
  43. hyperresearch-0.2.0/src/hyperresearch/core/agent_docs.py +331 -0
  44. hyperresearch-0.2.0/src/hyperresearch/core/config.py +113 -0
  45. hyperresearch-0.2.0/src/hyperresearch/core/db.py +162 -0
  46. hyperresearch-0.2.0/src/hyperresearch/core/enrich.py +110 -0
  47. hyperresearch-0.2.0/src/hyperresearch/core/fetcher.py +127 -0
  48. hyperresearch-0.2.0/src/hyperresearch/core/frontmatter.py +55 -0
  49. hyperresearch-0.2.0/src/hyperresearch/core/hooks.py +358 -0
  50. hyperresearch-0.2.0/src/hyperresearch/core/linker.py +138 -0
  51. hyperresearch-0.2.0/src/hyperresearch/core/migrations.py +80 -0
  52. hyperresearch-0.2.0/src/hyperresearch/core/note.py +117 -0
  53. hyperresearch-0.2.0/src/hyperresearch/core/patterns.py +12 -0
  54. hyperresearch-0.2.0/src/hyperresearch/core/similarity.py +81 -0
  55. hyperresearch-0.2.0/src/hyperresearch/core/sync.py +301 -0
  56. hyperresearch-0.2.0/src/hyperresearch/core/templates.py +209 -0
  57. hyperresearch-0.2.0/src/hyperresearch/core/vault.py +153 -0
  58. hyperresearch-0.2.0/src/hyperresearch/export/__init__.py +1 -0
  59. hyperresearch-0.2.0/src/hyperresearch/graph/__init__.py +1 -0
  60. hyperresearch-0.2.0/src/hyperresearch/indexgen/__init__.py +1 -0
  61. hyperresearch-0.2.0/src/hyperresearch/indexgen/generator.py +256 -0
  62. hyperresearch-0.2.0/src/hyperresearch/mcp/__init__.py +1 -0
  63. hyperresearch-0.2.0/src/hyperresearch/mcp/server.py +404 -0
  64. hyperresearch-0.2.0/src/hyperresearch/models/__init__.py +1 -0
  65. hyperresearch-0.2.0/src/hyperresearch/models/graph.py +21 -0
  66. hyperresearch-0.2.0/src/hyperresearch/models/note.py +89 -0
  67. hyperresearch-0.2.0/src/hyperresearch/models/output.py +28 -0
  68. hyperresearch-0.2.0/src/hyperresearch/models/search.py +24 -0
  69. hyperresearch-0.2.0/src/hyperresearch/py.typed +0 -0
  70. hyperresearch-0.2.0/src/hyperresearch/search/__init__.py +1 -0
  71. hyperresearch-0.2.0/src/hyperresearch/search/filters.py +95 -0
  72. hyperresearch-0.2.0/src/hyperresearch/search/fts.py +139 -0
  73. hyperresearch-0.2.0/src/hyperresearch/serve/__init__.py +1 -0
  74. hyperresearch-0.2.0/src/hyperresearch/serve/renderer.py +124 -0
  75. hyperresearch-0.2.0/src/hyperresearch/serve/server.py +588 -0
  76. hyperresearch-0.2.0/src/hyperresearch/skills/__init__.py +1 -0
  77. hyperresearch-0.2.0/src/hyperresearch/skills/research.md +172 -0
  78. hyperresearch-0.2.0/src/hyperresearch/web/__init__.py +1 -0
  79. hyperresearch-0.2.0/src/hyperresearch/web/base.py +101 -0
  80. hyperresearch-0.2.0/src/hyperresearch/web/builtin.py +111 -0
  81. hyperresearch-0.2.0/src/hyperresearch/web/crawl4ai_provider.py +253 -0
  82. hyperresearch-0.2.0/tests/__init__.py +0 -0
  83. hyperresearch-0.2.0/tests/conftest.py +83 -0
  84. hyperresearch-0.2.0/tests/test_cli/__init__.py +0 -0
  85. hyperresearch-0.2.0/tests/test_cli/test_commands.py +176 -0
  86. hyperresearch-0.2.0/tests/test_cli/test_note_ops.py +70 -0
  87. hyperresearch-0.2.0/tests/test_core/__init__.py +0 -0
  88. hyperresearch-0.2.0/tests/test_core/test_frontmatter.py +72 -0
  89. hyperresearch-0.2.0/tests/test_core/test_note.py +104 -0
  90. hyperresearch-0.2.0/tests/test_core/test_sync.py +98 -0
  91. hyperresearch-0.2.0/tests/test_core/test_vault.py +67 -0
  92. hyperresearch-0.2.0/tests/test_graph/__init__.py +0 -0
  93. hyperresearch-0.2.0/tests/test_graph/test_links.py +77 -0
  94. hyperresearch-0.2.0/tests/test_search/__init__.py +0 -0
  95. hyperresearch-0.2.0/tests/test_search/test_fts.py +58 -0
@@ -0,0 +1,23 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.11", "3.12", "3.13"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ - uses: actions/setup-python@v5
19
+ with:
20
+ python-version: ${{ matrix.python-version }}
21
+ - run: pip install -e ".[dev]"
22
+ - run: ruff check src/ tests/
23
+ - run: pytest tests/ -v
@@ -0,0 +1,47 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ contents: read
9
+
10
+ jobs:
11
+ build:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.13"
19
+
20
+ - name: Install build tools
21
+ run: pip install build
22
+
23
+ - name: Build package
24
+ run: python -m build
25
+
26
+ - name: Upload artifact
27
+ uses: actions/upload-artifact@v4
28
+ with:
29
+ name: dist
30
+ path: dist/
31
+
32
+ publish:
33
+ needs: build
34
+ runs-on: ubuntu-latest
35
+ environment: pypi
36
+ permissions:
37
+ id-token: write
38
+
39
+ steps:
40
+ - name: Download artifact
41
+ uses: actions/download-artifact@v4
42
+ with:
43
+ name: dist
44
+ path: dist/
45
+
46
+ - name: Publish to PyPI
47
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .venv/
7
+ .idea/
8
+ *.db
9
+ .hyperresearch/
10
+ .pytest_cache/
11
+ .mypy_cache/
12
+ .ruff_cache/
13
+ *.egg
@@ -0,0 +1,72 @@
1
+ # Changelog
2
+
3
+ ## [0.2.0] - 2026-04-10
4
+
5
+ ### New
6
+
7
+ - **`/research` skill** — Scripted deep research workflow as a Claude Code slash command. Clarifies ambiguous requests, searches broadly, fetches aggressively, follows rabbit holes, auto-curates, synthesizes, and presents findings with hub notes
8
+ - **`hyperresearch setup`** — Interactive TUI onboarding: web provider, browser profile selection/creation, agent hooks. Auto-launches on first `install`
9
+ - **`hyperresearch fetch-batch`** — Concurrent multi-URL fetch with batched sync (O(1) syncs instead of O(n))
10
+ - **`hyperresearch link --auto`** — Holistic auto-linking: scans notes for mentions of other notes' titles and appends wiki-links
11
+ - **`hyperresearch assets list/path`** — Browse downloaded screenshots and images
12
+ - **`--save-assets` flag** — Opt-in screenshot + content image download on fetch
13
+ - **`--visible` flag** — Non-headless browser for stubborn auth sites (auto-enabled for LinkedIn, Twitter, Facebook, Instagram, TikTok)
14
+ - **`--max-tokens` on search** — Token budget truncation for context-aware agents
15
+ - **Auto-curation at fetch time** — Notes arrive with auto-generated tags and summaries
16
+ - **MCP write tools** — `fetch_url`, `create_note`, `update_note` (MCP server is now read-write)
17
+ - **MinHash+LSH dedup** — O(n) approximate dedup for large vaults (200+ notes), falls back to brute-force for small vaults
18
+ - **Hub notes auto-surfaced** after research sessions
19
+ - **Synthesis notes** saved as feedback loop (agent Q&A becomes searchable)
20
+ - **`hyperresearch-fetcher` subagent** — Haiku-powered URL fetcher installed to `.claude/agents/`
21
+ - **Login wall detection** — `AUTH_REQUIRED` error instead of saving login page junk
22
+ - **Smart SPA wait** — Polls DOM stability (2s initial + 10s ceiling) instead of fixed delays
23
+
24
+ ### Changed
25
+
26
+ - **crawl4ai is the sole browser provider** — Removed firecrawl, tavily, trafilatura
27
+ - **crawl4ai v0.8.x API** — AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, arun/arun_many
28
+ - **Authenticated crawling** via crawl4ai browser profiles (`crwl profiles` or setup TUI)
29
+ - **CLI path baked into CLAUDE.md** — Works without venv activation (forward slashes for Windows bash)
30
+ - **Deep research philosophy** — Agent docs say "over-collect, then prune" and "go down rabbit holes"
31
+ - **Windows encoding fix** — `stream.reconfigure(encoding="utf-8")` at startup, no more charmap crashes
32
+ - **Note slugs capped at 80 chars** — Avoids Windows MAX_PATH issues
33
+ - **Anti-bot stealth always on** when crawl4ai is used (no setup question)
34
+ - **Config commands** now support `web.provider`, `web.profile`, `web.magic`
35
+
36
+ ### Removed
37
+
38
+ - Dead fields: `confidence`, `superseded_by`, `llm_compiled`, `llm_model`, `compile_source`
39
+ - Tag plural normalization (use explicit `tag_aliases` instead)
40
+ - `deprecated-no-successor` and `low-confidence` lint rules
41
+ - Firecrawl, Tavily, Trafilatura web providers
42
+
43
+ ## [0.1.0] - 2026-04-09
44
+
45
+ Initial release. Forked from [llm-kasten](https://github.com/jordan-gibbs/llm-kasten) and repositioned for agent-driven research workflows.
46
+
47
+ ### New
48
+
49
+ - **`hyperresearch install`** — One-step setup: init vault + inject agent docs + install PreToolUse hooks for Claude Code, Codex, Cursor, Gemini CLI
50
+ - **`hyperresearch fetch <url>`** — Fetch a URL, extract content, save as a research note with source tracking
51
+ - **`hyperresearch research <topic>`** — Deep research: web search, fetch results, follow links, save as linked notes, generate synthesis MOC
52
+ - **`hyperresearch sources list/check`** — List and query fetched web sources
53
+ - **Web provider plugin system** — Pluggable backends: builtin (stdlib), crawl4ai (local headless browser)
54
+ - **Agent hook system** — PreToolUse hooks that remind agents to check the research base before web searches
55
+ - **Sources table** — URL deduplication, domain tracking, fetch metadata
56
+ - **Extended frontmatter** — `source_domain`, `fetched_at`, `fetch_provider` fields
57
+ - **MCP server** with 10 tools including `check_source` and `list_sources`
58
+
59
+ ### From kasten (the backbone)
60
+
61
+ - SQLite FTS5 full-text search with BM25 ranking
62
+ - Markdown notes with YAML frontmatter as source of truth
63
+ - `[[wiki-link]]` tracking with backlinks
64
+ - `--json` / `-j` structured output on every command
65
+ - Note lifecycle: draft → review → evergreen → stale → deprecated → archive
66
+ - Auto-sync (mtime + SHA-256 change detection)
67
+ - Agent doc injection (CLAUDE.md, AGENTS.md, GEMINI.md, copilot-instructions.md)
68
+ - Web viewer with force-directed knowledge graph
69
+ - 70 tests
70
+
71
+ [0.2.0]: https://github.com/jordan-gibbs/hyperresearch/releases/tag/v0.2.0
72
+ [0.1.0]: https://github.com/jordan-gibbs/hyperresearch/releases/tag/v0.1.0
@@ -0,0 +1,76 @@
1
+ # Contributing to kasten
2
+
3
+ Thank you for your interest in contributing.
4
+
5
+ ## Development setup
6
+
7
+ ```bash
8
+ git clone https://github.com/jordan-gibbs/kasten.git
9
+ cd kasten
10
+ python -m venv .venv
11
+ source .venv/bin/activate # or .venv\Scripts\activate on Windows
12
+ pip install -e ".[dev]"
13
+ ```
14
+
15
+ ## Running tests
16
+
17
+ ```bash
18
+ python -m pytest tests/ -v
19
+ ```
20
+
21
+ All tests must pass before submitting a PR. The test suite runs in under 30 seconds.
22
+
23
+ ## Code style
24
+
25
+ This project uses [ruff](https://docs.astral.sh/ruff/) for linting:
26
+
27
+ ```bash
28
+ ruff check src/
29
+ ruff format src/
30
+ ```
31
+
32
+ Configuration is in `pyproject.toml`. Line length is 100 characters.
33
+
34
+ ## Type checking
35
+
36
+ ```bash
37
+ mypy src/kasten/
38
+ ```
39
+
40
+ Strict mode is enabled in `pyproject.toml`.
41
+
42
+ ## Submitting changes
43
+
44
+ 1. Fork the repository.
45
+ 2. Create a branch from `main`.
46
+ 3. Make your changes with tests.
47
+ 4. Run `ruff check src/` and `python -m pytest tests/`.
48
+ 5. Open a pull request against `main`.
49
+
50
+ Keep PRs focused on a single change. Include a clear description of what changed and why.
51
+
52
+ ## Reporting issues
53
+
54
+ Open an issue at [github.com/jordan-gibbs/kasten/issues](https://github.com/jordan-gibbs/kasten/issues). Include:
55
+
56
+ - What you expected to happen
57
+ - What actually happened
58
+ - Steps to reproduce
59
+ - kasten version (`kasten --version`)
60
+ - Python version and OS
61
+
62
+ ## Architecture overview
63
+
64
+ - `src/kasten/core/` -- vault management, sync engine, frontmatter parsing, SQLite schema
65
+ - `src/kasten/cli/` -- typer CLI commands, output formatting
66
+ - `src/kasten/search/` -- FTS5 search engine, filters, ranking
67
+ - `src/kasten/models/` -- Pydantic models for notes, output envelopes
68
+ - `src/kasten/graph/` -- link parsing (shared patterns)
69
+ - `src/kasten/indexgen/` -- auto-generated index page builder
70
+ - `src/kasten/ingest/` -- file, web, PDF ingestion
71
+ - `src/kasten/compile/` -- LLM compilation pipeline
72
+ - `src/kasten/llm/` -- provider abstraction (OpenAI, Anthropic, Ollama)
73
+ - `src/kasten/serve/` -- lightweight web UI server
74
+ - `tests/` -- pytest test suite
75
+
76
+ The key design principle: markdown files are the source of truth, SQLite is a derived cache that can be rebuilt at any time with `kasten sync --force`.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jordan Gibbs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,172 @@
1
+ Metadata-Version: 2.4
2
+ Name: hyperresearch
3
+ Version: 0.2.0
4
+ Summary: Agent-driven research knowledge base. Browse, collect, and synthesize web sources into a searchable wiki.
5
+ Project-URL: Homepage, https://github.com/jordan-gibbs/hyperresearch
6
+ Project-URL: Repository, https://github.com/jordan-gibbs/hyperresearch
7
+ Project-URL: Issues, https://github.com/jordan-gibbs/hyperresearch/issues
8
+ Author-email: Jordan Gibbs <jordan@jordangibbs.xyz>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: agent,cli,knowledge-base,llm,markdown,research,wiki,zettelkasten
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
20
+ Requires-Python: >=3.11
21
+ Requires-Dist: jinja2>=3.1
22
+ Requires-Dist: platformdirs>=4.0
23
+ Requires-Dist: pydantic>=2.0
24
+ Requires-Dist: pyyaml>=6.0
25
+ Requires-Dist: rich>=13.0
26
+ Requires-Dist: typer>=0.9.0
27
+ Provides-Extra: all
28
+ Requires-Dist: crawl4ai>=0.4; extra == 'all'
29
+ Requires-Dist: mcp>=1.6; extra == 'all'
30
+ Provides-Extra: crawl4ai
31
+ Requires-Dist: crawl4ai>=0.4; extra == 'crawl4ai'
32
+ Provides-Extra: dev
33
+ Requires-Dist: mypy>=1.8; extra == 'dev'
34
+ Requires-Dist: pytest-cov>=4.1; extra == 'dev'
35
+ Requires-Dist: pytest>=7.4; extra == 'dev'
36
+ Requires-Dist: ruff>=0.3; extra == 'dev'
37
+ Provides-Extra: mcp
38
+ Requires-Dist: mcp>=1.6; extra == 'mcp'
39
+ Description-Content-Type: text/markdown
40
+
41
+ # Hyperresearch
42
+
43
+ Agent-driven research knowledge base. Install it, and your AI coding agent can collect, search, and synthesize web research into a persistent, searchable wiki — across sessions.
44
+
45
+ ```bash
46
+ pip install hyperresearch
47
+ hyperresearch install # init vault + hook your agent
48
+ ```
49
+
50
+ That's it. Your agent now checks the research base before searching the web, saves useful findings automatically, and builds a knowledge graph over time.
51
+
52
+ ## How it works
53
+
54
+ 1. **Agent finds something useful** (via its own web search, browsing, or your input)
55
+ 2. **Agent saves it**: `hyperresearch fetch "https://..." --tag ml -j` or `hyperresearch note new "Title" --body-file content.md -j`
56
+ 3. **Next time it needs info**, the PreToolUse hook reminds it: *"check hyperresearch first"*
57
+ 4. **Agent searches the KB**: `hyperresearch search "attention mechanisms" -j`
58
+ 5. **Knowledge compounds** across sessions — no redundant fetches, no lost context
59
+
60
+ ```
61
+ your-repo/
62
+ .hyperresearch/ # Hidden: config, SQLite index, hook script
63
+ research/
64
+ notes/ # Markdown notes (source of truth)
65
+ index/ # Auto-generated wiki pages
66
+ CLAUDE.md # Agent docs (auto-injected)
67
+ ```
68
+
69
+ ## Commands
70
+
71
+ ```bash
72
+ # Setup
73
+ hyperresearch install # Init + hooks (Claude Code, Cursor, Codex, Gemini)
74
+ hyperresearch install --platform all # Hook all supported platforms
75
+
76
+ # Collect
77
+ hyperresearch fetch <url> --tag t -j # Save a URL as a note
78
+ hyperresearch research "topic" --max 5 -j # Search → fetch → link → synthesize (needs crawl4ai)
79
+
80
+ # Search & read
81
+ hyperresearch search "query" -j # Full-text search
82
+ hyperresearch note show <id> -j # Read a note
83
+ hyperresearch note list --tag ml -j # List notes by tag
84
+
85
+ # Manage
86
+ hyperresearch sources list -j # What URLs have been fetched
87
+ hyperresearch sources check <url> -j # Has this URL been fetched?
88
+ hyperresearch repair -j # Fix links, promote notes, rebuild indexes
89
+ hyperresearch status -j # Vault health overview
90
+ ```
91
+
92
+ Every command returns `{"ok": true, "data": {...}}` with `-j`.
93
+
94
+ ## Agent integration
95
+
96
+ `hyperresearch install` does three things:
97
+
98
+ 1. **Creates the vault** (`.hyperresearch/` + `research/`)
99
+ 2. **Injects usage docs** into CLAUDE.md (or AGENTS.md, GEMINI.md, copilot-instructions.md)
100
+ 3. **Installs PreToolUse hooks** that fire before web searches:
101
+
102
+ | Platform | Hook | Trigger |
103
+ |----------|------|---------|
104
+ | Claude Code | `.claude/settings.json` | Before Glob, Grep, WebSearch, WebFetch |
105
+ | Codex | `.codex/hooks.json` | Before Bash |
106
+ | Cursor | `.cursor/rules/hyperresearch.mdc` | Always-apply rule |
107
+ | Gemini CLI | `.gemini/settings.json` | Before tool calls |
108
+
109
+ The hook doesn't block — it reminds the agent to check the research base first.
110
+
111
+ ## Web providers
112
+
113
+ By default, agents use their own web tools (WebSearch, WebFetch) and pipe content into hyperresearch. For JS-rendered pages, blocked sites, or authenticated content, install crawl4ai (local headless Chromium):
114
+
115
+ ```bash
116
+ pip install hyperresearch[crawl4ai]
117
+ crawl4ai-setup # Install browser (one-time)
118
+ ```
119
+
120
+ Configure in `.hyperresearch/config.toml`:
121
+ ```toml
122
+ [web]
123
+ provider = "crawl4ai" # or "builtin" (stdlib urllib, no JS)
124
+ profile = "" # Browser profile name for authenticated crawling (optional)
125
+ magic = false # Anti-bot stealth mode (recommended for social media)
126
+ ```
127
+
128
+ ### Authenticated crawling
129
+
130
+ Access login-gated content (LinkedIn, Twitter, paywalled sites) by creating a login profile:
131
+
132
+ ```bash
133
+ hyperresearch setup # Choose option 1 — browser opens, log into your sites, done
134
+ # Or manually:
135
+ crwl profiles # Create profile, log in, press q when done
136
+ ```
137
+
138
+ ```toml
139
+ # .hyperresearch/config.toml
140
+ [web]
141
+ profile = "research" # Your profile name
142
+ ```
143
+
144
+ ## MCP server
145
+
146
+ For Claude Desktop, Cursor inline, or any MCP-compatible agent:
147
+
148
+ ```bash
149
+ pip install hyperresearch[mcp]
150
+ ```
151
+
152
+ ```json
153
+ {"mcpServers": {"hyperresearch": {"command": "hyperresearch", "args": ["mcp"]}}}
154
+ ```
155
+
156
+ 10 tools: `search_notes`, `read_note`, `read_many`, `list_notes`, `get_backlinks`, `get_hubs`, `vault_status`, `lint_vault`, `check_source`, `list_sources`.
157
+
158
+ ## Philosophy
159
+
160
+ - **The agent IS the LLM** — hyperresearch is a dumb tool that stores, indexes, and searches. It never calls an LLM.
161
+ - **Files are truth** — markdown notes survive the tool dying. SQLite is a rebuildable cache.
162
+ - **Agents already have web access** — hyperresearch is where they *store* what they find, not how they find it.
163
+ - **Check before you fetch** — the hook system prevents redundant web searches across sessions.
164
+
165
+ ## Requirements
166
+
167
+ - Python 3.11+
168
+ - Works on Windows, macOS, Linux
169
+
170
+ ## License
171
+
172
+ [MIT](LICENSE)