@askalf/deepdive 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DISCLAIMER.md +158 -0
- package/LICENSE +21 -0
- package/README.md +140 -0
- package/dist/agent.d.ts +55 -0
- package/dist/agent.d.ts.map +1 -0
- package/dist/agent.js +100 -0
- package/dist/agent.js.map +1 -0
- package/dist/browser.d.ts +26 -0
- package/dist/browser.d.ts.map +1 -0
- package/dist/browser.js +75 -0
- package/dist/browser.js.map +1 -0
- package/dist/citations.d.ts +10 -0
- package/dist/citations.d.ts.map +1 -0
- package/dist/citations.js +25 -0
- package/dist/citations.js.map +1 -0
- package/dist/cli.d.ts +11 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +206 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +26 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +63 -0
- package/dist/config.js.map +1 -0
- package/dist/extract.d.ts +10 -0
- package/dist/extract.d.ts.map +1 -0
- package/dist/extract.js +52 -0
- package/dist/extract.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +11 -0
- package/dist/index.js.map +1 -0
- package/dist/llm.d.ts +19 -0
- package/dist/llm.d.ts.map +1 -0
- package/dist/llm.js +33 -0
- package/dist/llm.js.map +1 -0
- package/dist/plan.d.ts +8 -0
- package/dist/plan.d.ts.map +1 -0
- package/dist/plan.js +72 -0
- package/dist/plan.js.map +1 -0
- package/dist/search/brave.d.ts +8 -0
- package/dist/search/brave.d.ts.map +1 -0
- package/dist/search/brave.js +31 -0
- package/dist/search/brave.js.map +1 -0
- package/dist/search/duckduckgo.d.ts +7 -0
- package/dist/search/duckduckgo.d.ts.map +1 -0
- package/dist/search/duckduckgo.js +94 -0
- package/dist/search/duckduckgo.js.map +1 -0
- package/dist/search/searxng.d.ts +8 -0
- package/dist/search/searxng.d.ts.map +1 -0
- package/dist/search/searxng.js +29 -0
- package/dist/search/searxng.js.map +1 -0
- package/dist/search/tavily.d.ts +8 -0
- package/dist/search/tavily.d.ts.map +1 -0
- package/dist/search/tavily.js +38 -0
- package/dist/search/tavily.js.map +1 -0
- package/dist/search.d.ts +13 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +47 -0
- package/dist/search.js.map +1 -0
- package/dist/synthesize.d.ts +8 -0
- package/dist/synthesize.d.ts.map +1 -0
- package/dist/synthesize.js +40 -0
- package/dist/synthesize.js.map +1 -0
- package/package.json +67 -0
package/DISCLAIMER.md
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# Disclaimer
|
|
2
|
+
|
|
3
|
+
**Last updated: 2026-04-21**
|
|
4
|
+
|
|
5
|
+
This document is a plain-language expansion of the MIT License that ships with deepdive. In case of conflict, the MIT [LICENSE](LICENSE) controls.
|
|
6
|
+
|
|
7
|
+
By downloading, installing, running, linking against, or otherwise using deepdive (the "Software"), you acknowledge and agree to everything below. If you do not agree, do not use the Software.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## 1. Provided "AS IS"
|
|
12
|
+
|
|
13
|
+
The Software is provided **"AS IS" and "AS AVAILABLE"**, without warranty of any kind, express, implied, or statutory, including but not limited to:
|
|
14
|
+
|
|
15
|
+
- warranties of merchantability, fitness for a particular purpose, title, or non-infringement
|
|
16
|
+
- warranties that the Software will be error-free, uninterrupted, secure, or free of harmful components
|
|
17
|
+
- warranties that any defect or bug will be corrected
|
|
18
|
+
- warranties regarding the accuracy, reliability, completeness, timeliness, or usefulness of any output produced by or through the Software, including research reports and cited content
|
|
19
|
+
|
|
20
|
+
Research output from the Software is machine-generated and may contain factual errors, omissions, outdated information, misattributed citations, or hallucinated content. **Do not rely on deepdive output for medical, legal, financial, safety-critical, or other high-stakes decisions without independent verification.**
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## 2. Limitation of liability
|
|
25
|
+
|
|
26
|
+
To the maximum extent permitted by applicable law, in no event shall the authors, maintainers, contributors, copyright holders, or any person associated with the project be liable for any:
|
|
27
|
+
|
|
28
|
+
- direct, indirect, incidental, special, exemplary, consequential, punitive, or any other damages
|
|
29
|
+
- loss of profits, revenue, data, goodwill, use, opportunity, or business
|
|
30
|
+
- service interruption, computer failure or malfunction, subscription loss, suspension, or termination
|
|
31
|
+
- costs of procurement of substitute goods or services
|
|
32
|
+
- claims by third parties arising out of content produced by or through the Software
|
|
33
|
+
|
|
34
|
+
arising out of or in connection with the Software, its use, its inability to be used, its interaction with any third-party service, or any content produced by it, whether based on warranty, contract, tort (including negligence), strict liability, statute, or any other legal theory, and whether or not the project has been advised of the possibility of such damages.
|
|
35
|
+
|
|
36
|
+
Where liability cannot be fully excluded under applicable law, it is limited to the maximum extent permitted.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## 3. No affiliation
|
|
41
|
+
|
|
42
|
+
deepdive is an **independent, unofficial, third-party project**. It is:
|
|
43
|
+
|
|
44
|
+
- **not affiliated with, endorsed by, sponsored by, or in any way officially connected to** Anthropic PBC, OpenAI OpenCorp, Google LLC, DuckDuckGo, SearXNG, Brave Software, Tavily AI, Perplexity AI, Microsoft, the Playwright project, or any other company, product, or service mentioned in the documentation, source code, or default configuration
|
|
45
|
+
- **not an official client, SDK, integration, or partner** of any of the above
|
|
46
|
+
- **not authorized to speak on behalf of** any of the above
|
|
47
|
+
|
|
48
|
+
All product names, logos, brands, trademarks, and registered trademarks referenced anywhere in this project are property of their respective owners. Use of those names is for identification and interoperability purposes only and does not imply endorsement.
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## 4. User responsibility
|
|
53
|
+
|
|
54
|
+
You are solely responsible for:
|
|
55
|
+
|
|
56
|
+
- **Your use of any third-party service reached through deepdive**, including search engines, LLM providers, and the arbitrary web pages deepdive fetches on your behalf. Your use of each service is governed by that service's own terms of service, acceptable-use policy, `robots.txt`, rate limits, and any other agreement you have with that service. Review them. Follow them.
|
|
57
|
+
- **Web scraping and fetching.** deepdive drives a headless browser against URLs the LLM selected from search results. You are responsible for ensuring your use complies with the terms of service of the sites you cause deepdive to visit, as well as applicable laws such as the Computer Fraud and Abuse Act (US), the Computer Misuse Act (UK), GDPR, and any other relevant regime. Do not use deepdive to access sites that prohibit automated access, to bypass paywalls or authentication, or to scrape at a volume that would constitute abuse.
|
|
58
|
+
- **Your subscriptions, API keys, and accounts.** You are responsible for all activity conducted under them and for keeping them secure.
|
|
59
|
+
- **Compliance with all laws applicable to you and your use**, including but not limited to export control, sanctions, privacy, data protection, consumer protection, copyright, and industry-specific regulations (HIPAA, PCI-DSS, FedRAMP, GDPR, CCPA, etc.).
|
|
60
|
+
- **The content you send through the Software and the content you receive back.** The project does not moderate, filter, store, or review this content. You are responsible for ensuring your inputs and outputs are lawful, ethical, and appropriate for your context.
|
|
61
|
+
- **Determining whether the Software is appropriate for your use case.** The Software is a general-purpose research tool. It is not intended for, and is not warranted as suitable for, safety-critical, life-critical, mission-critical, regulated, or production-grade environments without your own independent review, hardening, and diligence.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## 5. Accuracy of research output
|
|
66
|
+
|
|
67
|
+
**The Software does not guarantee factual accuracy.** Research output is produced by:
|
|
68
|
+
|
|
69
|
+
1. A language model decomposing your question into sub-queries,
|
|
70
|
+
2. Third-party search engines ranking web pages,
|
|
71
|
+
3. A headless browser fetching whatever HTML those pages happen to serve,
|
|
72
|
+
4. A language model synthesizing an answer from that content and inventing citation numbers.
|
|
73
|
+
|
|
74
|
+
Each step introduces the possibility of error. Citation numbers may be mapped to sources that do not actually support the cited claim. Sources may themselves be inaccurate, outdated, satirical, AI-generated, or deliberately misleading. The language model may hallucinate. The search engine may be biased or manipulated.
|
|
75
|
+
|
|
76
|
+
**Treat all deepdive output as a research lead that needs independent verification, not as a finished answer.**
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## 6. No support obligation
|
|
81
|
+
|
|
82
|
+
The project is operated on a **best-effort, volunteer basis**. There is no obligation, express or implied, to:
|
|
83
|
+
|
|
84
|
+
- respond to issues, discussions, pull requests, emails, or other communications
|
|
85
|
+
- fix bugs, address vulnerabilities, or publish updates on any timeline
|
|
86
|
+
- maintain backward compatibility between versions, except where explicitly stated in release notes
|
|
87
|
+
- continue the project at all
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## 7. No availability or continuity guarantee
|
|
92
|
+
|
|
93
|
+
The Software may stop working at any time, for any reason, including but not limited to:
|
|
94
|
+
|
|
95
|
+
- changes to search engines' HTML output, rate limits, or terms
|
|
96
|
+
- changes to LLM provider APIs or authentication flows
|
|
97
|
+
- changes to Playwright, Chromium, operating systems, runtimes, or dependencies
|
|
98
|
+
- the project entering maintenance mode, archive status, or being discontinued
|
|
99
|
+
|
|
100
|
+
You should have a fallback plan if continuous availability matters to your workflow.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## 8. Third-party services and content
|
|
105
|
+
|
|
106
|
+
deepdive reaches out to services and web pages you or your LLM's plan selects. When you use it:
|
|
107
|
+
|
|
108
|
+
- **You initiate the connection.** The project neither controls nor hosts any upstream service or content.
|
|
109
|
+
- **You are the party contracting with each upstream service** under its terms, not through the project.
|
|
110
|
+
- **The project does not warrant, endorse, or take responsibility for** the availability, accuracy, legality, quality, safety, or any other aspect of content, services, or responses provided by any upstream service or web page.
|
|
111
|
+
- **The project does not process, store, or transmit your data outside of your local machine**, except insofar as it forwards your queries to the search and LLM services you configured, and fetches the URLs those services returned.
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## 9. Credentials and local data
|
|
116
|
+
|
|
117
|
+
The Software may read API keys and OAuth tokens from environment variables or CLI flags on your behalf. It does not persist credentials to disk.
|
|
118
|
+
|
|
119
|
+
- You are responsible for the security of your machine, your user account, your shell history, and any system where credentials might be exposed.
|
|
120
|
+
- The project is not responsible for credential compromise resulting from the security of your environment, your configuration choices, or third-party software running on your system.
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## 10. Export, sanctions, regulated use
|
|
125
|
+
|
|
126
|
+
The Software is distributed from the United States. You are responsible for complying with all applicable export-control laws, sanctions regimes, and regulations in your jurisdiction.
|
|
127
|
+
|
|
128
|
+
The Software is **not designed, tested, or warranted for use** in environments requiring specific regulatory certifications (HIPAA, PCI-DSS, FedRAMP, SOC 2, ISO 27001, FDA, FAA, NERC-CIP, etc.). If your use falls under such a regime, you are solely responsible for determining suitability and performing any required controls, audits, or risk assessments.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## 11. Indemnification
|
|
133
|
+
|
|
134
|
+
To the maximum extent permitted by applicable law, you agree to indemnify, defend, and hold harmless the authors, maintainers, contributors, and copyright holders of the Software from and against any and all claims, damages, losses, liabilities, costs, and expenses (including reasonable attorneys' fees) arising out of or in connection with:
|
|
135
|
+
|
|
136
|
+
- your use of the Software
|
|
137
|
+
- your violation of any third-party terms, policies, or agreements (including search-engine or site-operator terms)
|
|
138
|
+
- your violation of any law or regulation
|
|
139
|
+
- your violation of any third-party right, including privacy, publicity, or intellectual property rights
|
|
140
|
+
- any content you transmit through, cause to be fetched by, or cause to be produced by the Software
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## 12. Changes to this disclaimer
|
|
145
|
+
|
|
146
|
+
This document may be updated from time to time. Changes take effect on the date shown at the top of the file. Continued use of the Software after a change indicates acceptance of the updated disclaimer.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## 13. Governing law and severability
|
|
151
|
+
|
|
152
|
+
This disclaimer is to be interpreted consistently with the MIT License. If any provision is held to be unenforceable under applicable law, the remaining provisions remain in full force and effect, and the unenforceable provision shall be modified to the minimum extent necessary to make it enforceable while preserving its intent.
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## 14. Questions
|
|
157
|
+
|
|
158
|
+
For questions about this disclaimer, open a GitHub discussion. For security issues, follow [SECURITY.md](SECURITY.md). The project does not provide legal advice; if you need legal advice, consult a qualified attorney in your jurisdiction.
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 AskAlf
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<h1 align="center">deepdive</h1>
|
|
3
|
+
<p align="center"><strong>A local research agent. One command, cited answer.</strong><br>Decomposes your question into sub-queries, runs web searches, fetches pages through a real headless browser, and hands everything to an LLM that writes a cited markdown report. Every LLM call goes through your own router — default target is <a href="https://github.com/askalf/dario">dario</a> at <code>localhost:3456</code>, so synthesis runs on your Claude Max / Pro subscription, your own OpenAI key, or any local model. Any Anthropic-compat endpoint works.</p>
|
|
4
|
+
</p>
|
|
5
|
+
|
|
6
|
+
<p align="center"><em>Zero hosted dependencies. MIT. Independent, unofficial, third-party — see <a href="DISCLAIMER.md">DISCLAIMER.md</a>.</em></p>
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## 60 seconds
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
# 1. Have dario running (or any Anthropic-compat endpoint at a local URL).
|
|
14
|
+
# See: https://github.com/askalf/dario
|
|
15
|
+
dario proxy # http://localhost:3456, routes to Claude Max / OpenAI / etc.
|
|
16
|
+
|
|
17
|
+
# 2. Install deepdive.
|
|
18
|
+
npm install -g @askalf/deepdive
|
|
19
|
+
npx playwright install chromium # first run only, ~300 MB
|
|
20
|
+
|
|
21
|
+
# 3. Ask.
|
|
22
|
+
deepdive "how does claude's rate limiter work"
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Output is cited markdown printed to stdout. Pipe it, save it with `--out=report.md`, or stream progress with `--verbose`:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
deepdive "how does claude's rate limiter work" --verbose --out=rate-limiter.md
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Under the hood:
|
|
32
|
+
1. **Plan.** LLM decomposes your question into 3–5 searchable sub-queries.
|
|
33
|
+
2. **Search.** DuckDuckGo HTML by default (no API key). Pluggable: `--search=searxng|brave|tavily` with your own endpoint or key.
|
|
34
|
+
3. **Fetch.** Playwright-driven Chromium renders each result page (JS-rendered SPAs included).
|
|
35
|
+
4. **Extract.** Boilerplate stripped, main content capped to a word budget.
|
|
36
|
+
5. **Synthesize.** LLM writes the answer with inline `[N]` citations referencing the source list.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Why this exists
|
|
41
|
+
|
|
42
|
+
Every hosted research tool (Perplexity, OpenAI Deep Research, Gemini Deep Research) sends your queries to someone else's server, charges per query, and gives you no say in which model synthesizes the answer or which sources get read. deepdive is the self-hosted alternative: your machine, your LLM subscription, your model choice, your search backend.
|
|
43
|
+
|
|
44
|
+
Pair it with [dario](https://github.com/askalf/dario) and every research query routes through your Claude Max / Pro subscription instead of per-token API pricing — a single deep query can be 50k–200k tokens, which is exactly the workload subscription billing was built for.
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## LLM routing (default: dario)
|
|
49
|
+
|
|
50
|
+
deepdive speaks the Anthropic Messages API. The default target is `http://localhost:3456` (dario's default port), with `x-api-key: dario`:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
deepdive "…" --base-url=http://localhost:3456 --api-key=dario --model=claude-sonnet-4-6
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Any Anthropic-compat endpoint works. If you already have Claude Code credentials and dario installed, there is nothing else to configure.
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Search adapters
|
|
61
|
+
|
|
62
|
+
| Adapter | Flag | Needs | Notes |
|
|
63
|
+
|---|---|---|---|
|
|
64
|
+
| DuckDuckGo HTML | `--search=duckduckgo` (default) | nothing | Scrapes `html.duckduckgo.com`. No key, no account. Quality is fine for v1; if DDG changes their HTML, the parser may need an update. |
|
|
65
|
+
| SearXNG | `--search=searxng` | `DEEPDIVE_SEARXNG_URL` | Self-hosted metasearch. Best quality/privacy combo. |
|
|
66
|
+
| Brave Search | `--search=brave` | `DEEPDIVE_BRAVE_KEY` | Brave Search API. Paid, but high quality. |
|
|
67
|
+
| Tavily | `--search=tavily` | `DEEPDIVE_TAVILY_KEY` | Research-tuned API. Returns pre-extracted content, which deepdive re-fetches for consistency. |
|
|
68
|
+
|
|
69
|
+
Adding a new adapter is ~30 lines — implement `SearchAdapter` in `src/search/*.ts` and register in `src/search.ts`.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## Flags
|
|
74
|
+
|
|
75
|
+
Run `deepdive --help` for the full list. The ones you'll actually use:
|
|
76
|
+
|
|
77
|
+
| Flag | Default | Description |
|
|
78
|
+
|---|---|---|
|
|
79
|
+
| `--base-url=<url>` | `http://localhost:3456` | LLM endpoint (dario / Anthropic / any compat URL) |
|
|
80
|
+
| `--api-key=<key>` | `dario` | LLM key |
|
|
81
|
+
| `--model=<name>` | `claude-sonnet-4-6` | Model for both planning and synthesis |
|
|
82
|
+
| `--search=<adapter>` | `duckduckgo` | Search backend (see table above) |
|
|
83
|
+
| `--max-sources=<n>` | `12` | Total pages fetched per query |
|
|
84
|
+
| `--results-per-query=<n>` | `5` | Candidates pulled per sub-query |
|
|
85
|
+
| `--max-words-per-source=<n>` | `2000` | Per-source content cap before synthesis |
|
|
86
|
+
| `--timeout-ms=<ms>` | `30000` | Per-fetch timeout |
|
|
87
|
+
| `--out=<path>` | — | Also write markdown to file |
|
|
88
|
+
| `--verbose`, `-v` | — | Stream progress events to stderr |
|
|
89
|
+
|
|
90
|
+
All flags mirror to `DEEPDIVE_*` env vars (e.g. `DEEPDIVE_MODEL`, `DEEPDIVE_MAX_SOURCES`). CLI flags win over env vars.
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Library mode
|
|
95
|
+
|
|
96
|
+
```ts
|
|
97
|
+
import { runAgent, resolveSearchAdapter, resolveConfig } from "@askalf/deepdive";
|
|
98
|
+
|
|
99
|
+
const config = resolveConfig({}, process.env);
|
|
100
|
+
const search = await resolveSearchAdapter(config.searchAdapter, process.env);
|
|
101
|
+
|
|
102
|
+
const result = await runAgent("how does claude's rate limiter work", {
|
|
103
|
+
llm: config.llm,
|
|
104
|
+
search,
|
|
105
|
+
browser: config.browser,
|
|
106
|
+
resultsPerQuery: config.resultsPerQuery,
|
|
107
|
+
maxSources: config.maxSources,
|
|
108
|
+
maxWordsPerSource: config.maxWordsPerSource,
|
|
109
|
+
onEvent: (e) => console.error(e),
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
console.log(result.markdown);
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Trust and transparency
|
|
118
|
+
|
|
119
|
+
| Signal | Status |
|
|
120
|
+
|---|---|
|
|
121
|
+
| **Source** | One TypeScript package, small enough to audit in an evening |
|
|
122
|
+
| **Runtime dependencies** | One — `playwright`. No hosted services, no telemetry. |
|
|
123
|
+
| **Credentials** | API keys live in env vars or CLI flags; deepdive never persists them |
|
|
124
|
+
| **Network scope** | LLM endpoint (your choice), search backend (your choice), and the actual URLs your LLM picked to read |
|
|
125
|
+
| **Telemetry** | None. Zero analytics, tracking, or data collection. |
|
|
126
|
+
| **License** | MIT |
|
|
127
|
+
|
|
128
|
+
See [DISCLAIMER.md](DISCLAIMER.md) for the full AS IS / no-affiliation / user-responsibility terms.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## Contributing
|
|
133
|
+
|
|
134
|
+
PRs welcome. Code style matches [dario](https://github.com/askalf/dario) — small TypeScript, pure decision functions, `node --test` assertions on anything with logic in it. Run `npm run build && npm test` before submitting.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## License
|
|
139
|
+
|
|
140
|
+
MIT — see [LICENSE](LICENSE) and [DISCLAIMER.md](DISCLAIMER.md).
|
package/dist/agent.d.ts
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import type { LLMConfig } from "./llm.js";
|
|
2
|
+
import type { SearchAdapter } from "./search.js";
|
|
3
|
+
import { type Plan } from "./plan.js";
|
|
4
|
+
import { type BrowserOptions } from "./browser.js";
|
|
5
|
+
import { type Source } from "./citations.js";
|
|
6
|
+
export interface AgentConfig {
|
|
7
|
+
llm: LLMConfig;
|
|
8
|
+
search: SearchAdapter;
|
|
9
|
+
browser: BrowserOptions;
|
|
10
|
+
resultsPerQuery: number;
|
|
11
|
+
maxSources: number;
|
|
12
|
+
maxWordsPerSource: number;
|
|
13
|
+
onEvent?: (event: AgentEvent) => void;
|
|
14
|
+
}
|
|
15
|
+
export type AgentEvent = {
|
|
16
|
+
type: "plan.start";
|
|
17
|
+
question: string;
|
|
18
|
+
} | {
|
|
19
|
+
type: "plan.done";
|
|
20
|
+
plan: Plan;
|
|
21
|
+
} | {
|
|
22
|
+
type: "search.start";
|
|
23
|
+
query: string;
|
|
24
|
+
} | {
|
|
25
|
+
type: "search.done";
|
|
26
|
+
query: string;
|
|
27
|
+
count: number;
|
|
28
|
+
} | {
|
|
29
|
+
type: "fetch.start";
|
|
30
|
+
url: string;
|
|
31
|
+
} | {
|
|
32
|
+
type: "fetch.done";
|
|
33
|
+
url: string;
|
|
34
|
+
ok: boolean;
|
|
35
|
+
status: number;
|
|
36
|
+
words: number;
|
|
37
|
+
} | {
|
|
38
|
+
type: "synthesize.start";
|
|
39
|
+
sourceCount: number;
|
|
40
|
+
} | {
|
|
41
|
+
type: "synthesize.done";
|
|
42
|
+
};
|
|
43
|
+
export interface AgentResult {
|
|
44
|
+
question: string;
|
|
45
|
+
plan: Plan;
|
|
46
|
+
sources: Source[];
|
|
47
|
+
markdown: string;
|
|
48
|
+
usage: {
|
|
49
|
+
queries: number;
|
|
50
|
+
fetched: number;
|
|
51
|
+
kept: number;
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
export declare function runAgent(question: string, config: AgentConfig, signal?: AbortSignal): Promise<AgentResult>;
|
|
55
|
+
//# sourceMappingURL=agent.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent.d.ts","sourceRoot":"","sources":["../src/agent.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAC1C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAEjD,OAAO,EAAe,KAAK,IAAI,EAAE,MAAM,WAAW,CAAC;AACnD,OAAO,EAAkB,KAAK,cAAc,EAAoB,MAAM,cAAc,CAAC;AAErF,OAAO,EAA0C,KAAK,MAAM,EAAE,MAAM,gBAAgB,CAAC;AAGrF,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,SAAS,CAAC;IACf,MAAM,EAAE,aAAa,CAAC;IACtB,OAAO,EAAE,cAAc,CAAC;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,UAAU,KAAK,IAAI,CAAC;CACvC;AAED,MAAM,MAAM,UAAU,GAClB;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,GACxC;IAAE,IAAI,EAAE,WAAW,CAAC;IAAC,IAAI,EAAE,IAAI,CAAA;CAAE,GACjC;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GACvC;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GACrD;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,GACpC;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,GAC/E;IAAE,IAAI,EAAE,kBAAkB,CAAC;IAAC,WAAW,EAAE,MAAM,CAAA;CAAE,GACjD;IAAE,IAAI,EAAE,iBAAiB,CAAA;CAAE,CAAC;AAEhC,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,IAAI,CAAC;IACX,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;CAC3D;AAED,wBAAsB,QAAQ,CAC5B,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,WAAW,EACnB,MAAM,CAAC,EAAE,WAAW,GACnB,OAAO,CAAC,WAAW,CAAC,CA2FtB"}
|
package/dist/agent.js
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
// Main agent loop: plan → search → fetch → extract → synthesize.
|
|
2
|
+
//
|
|
3
|
+
// Intentionally single-pass for v0.1.0. A follow-up loop (read the answer,
|
|
4
|
+
// decide if more searches are needed) is the obvious next step and belongs
|
|
5
|
+
// behind a --deep flag so v1 stays cheap and predictable.
|
|
6
|
+
import { dedupeByUrl } from "./search.js";
|
|
7
|
+
import { planQueries } from "./plan.js";
|
|
8
|
+
import { BrowserSession } from "./browser.js";
|
|
9
|
+
import { extractContent } from "./extract.js";
|
|
10
|
+
import { buildSourceTable, renderAnswerMarkdown } from "./citations.js";
|
|
11
|
+
import { synthesize } from "./synthesize.js";
|
|
12
|
+
export async function runAgent(question, config, signal) {
|
|
13
|
+
emit(config, { type: "plan.start", question });
|
|
14
|
+
const plan = await planQueries(question, config.llm, signal);
|
|
15
|
+
emit(config, { type: "plan.done", plan });
|
|
16
|
+
const seenUrls = new Set();
|
|
17
|
+
const candidates = [];
|
|
18
|
+
for (const query of plan.queries) {
|
|
19
|
+
if (signal?.aborted)
|
|
20
|
+
throw new Error("aborted");
|
|
21
|
+
emit(config, { type: "search.start", query });
|
|
22
|
+
const results = await config.search.search(query, config.resultsPerQuery, signal);
|
|
23
|
+
const fresh = dedupeByUrl(results).filter((r) => !seenUrls.has(r.url));
|
|
24
|
+
for (const r of fresh) {
|
|
25
|
+
seenUrls.add(r.url);
|
|
26
|
+
candidates.push({ url: r.url, title: r.title, snippet: r.snippet, query });
|
|
27
|
+
}
|
|
28
|
+
emit(config, { type: "search.done", query, count: fresh.length });
|
|
29
|
+
}
|
|
30
|
+
const toFetch = candidates.slice(0, config.maxSources);
|
|
31
|
+
const browser = new BrowserSession(config.browser);
|
|
32
|
+
await browser.start();
|
|
33
|
+
const fetched = [];
|
|
34
|
+
try {
|
|
35
|
+
for (const c of toFetch) {
|
|
36
|
+
if (signal?.aborted)
|
|
37
|
+
throw new Error("aborted");
|
|
38
|
+
emit(config, { type: "fetch.start", url: c.url });
|
|
39
|
+
try {
|
|
40
|
+
const page = await browser.fetch(c.url);
|
|
41
|
+
const words = (page.text.match(/\S+/g) ?? []).length;
|
|
42
|
+
emit(config, {
|
|
43
|
+
type: "fetch.done",
|
|
44
|
+
url: c.url,
|
|
45
|
+
ok: page.status >= 200 && page.status < 400,
|
|
46
|
+
status: page.status,
|
|
47
|
+
words,
|
|
48
|
+
});
|
|
49
|
+
if (page.status >= 200 && page.status < 400 && words > 50) {
|
|
50
|
+
fetched.push({ ...page, origTitle: c.title });
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
catch (err) {
|
|
54
|
+
emit(config, {
|
|
55
|
+
type: "fetch.done",
|
|
56
|
+
url: c.url,
|
|
57
|
+
ok: false,
|
|
58
|
+
status: 0,
|
|
59
|
+
words: 0,
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
finally {
|
|
65
|
+
await browser.close();
|
|
66
|
+
}
|
|
67
|
+
const extracted = [];
|
|
68
|
+
const sourceRows = buildSourceTable(fetched.map((f) => ({
|
|
69
|
+
url: f.finalUrl || f.url,
|
|
70
|
+
title: f.title || f.origTitle,
|
|
71
|
+
fetchedAt: f.fetchedAt,
|
|
72
|
+
})));
|
|
73
|
+
for (let i = 0; i < fetched.length; i++) {
|
|
74
|
+
const f = fetched[i];
|
|
75
|
+
const row = sourceRows[i];
|
|
76
|
+
const ex = extractContent(f.text, f.title || f.origTitle, config.maxWordsPerSource);
|
|
77
|
+
if (ex.text.length > 0) {
|
|
78
|
+
extracted.push({ ...row, content: ex.text });
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
emit(config, { type: "synthesize.start", sourceCount: extracted.length });
|
|
82
|
+
const answer = await synthesize(question, extracted, config.llm, signal);
|
|
83
|
+
emit(config, { type: "synthesize.done" });
|
|
84
|
+
const markdown = renderAnswerMarkdown(question, answer, extracted);
|
|
85
|
+
return {
|
|
86
|
+
question,
|
|
87
|
+
plan,
|
|
88
|
+
sources: extracted,
|
|
89
|
+
markdown,
|
|
90
|
+
usage: {
|
|
91
|
+
queries: plan.queries.length,
|
|
92
|
+
fetched: toFetch.length,
|
|
93
|
+
kept: extracted.length,
|
|
94
|
+
},
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
function emit(config, event) {
|
|
98
|
+
config.onEvent?.(event);
|
|
99
|
+
}
|
|
100
|
+
//# sourceMappingURL=agent.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent.js","sourceRoot":"","sources":["../src/agent.ts"],"names":[],"mappings":"AAAA,iEAAiE;AACjE,EAAE;AACF,2EAA2E;AAC3E,2EAA2E;AAC3E,0DAA0D;AAI1D,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC1C,OAAO,EAAE,WAAW,EAAa,MAAM,WAAW,CAAC;AACnD,OAAO,EAAE,cAAc,EAAyC,MAAM,cAAc,CAAC;AACrF,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,EAAE,gBAAgB,EAAE,oBAAoB,EAAe,MAAM,gBAAgB,CAAC;AACrF,OAAO,EAAE,UAAU,EAA0B,MAAM,iBAAiB,CAAC;AA8BrE,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,QAAgB,EAChB,MAAmB,EACnB,MAAoB;IAEpB,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC/C,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,QAAQ,EAAE,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IAC7D,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC;IAE1C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,MAAM,UAAU,GAAqE,EAAE,CAAC;IAExF,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;QACjC,IAAI,MAAM,EAAE,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC;QAChD,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC,CAAC;QAC9C,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,eAAe,EAAE,MAAM,CAAC,CAAC;QAClF,MAAM,KAAK,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACvE,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACpB,UAAU,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;QAC7E,CAAC;QACD,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;IACpE,CAAC;IAED,MAAM,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IAEvD,MAAM,OAAO,GAAG,IAAI,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACnD,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IAEtB,MAAM,OAAO,GAA4C,EAAE,CAAC;IAC5D,IAAI,CAAC;QACH,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,IAAI,MAAM,EAAE,OAAO;gBAAE,MAAM,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC;YAChD,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;YAClD,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACxC,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;gBACrD,IAAI,CAAC,MAAM,EAAE;oBACX,IAAI,EAAE,YAAY;oBAClB,GAAG,EAAE,CAAC,CAAC,GAAG;oBACV,EAAE,EAAE,IAAI,CAAC,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;oBAC3C,MAAM,EAAE,IAAI,CAAC,MAAM;oBACnB,KAAK;iBACN,CAAC,CAAC;gBACH,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,IAAI,KAAK,GAAG,EAAE,EAAE,CAAC;oBAC1D,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;gBAChD,CAAC;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,IAAI,CAAC,MAAM,EAAE;oBACX,IAAI,EAAE,YAAY;oBAClB,GAAG,EAAE,CAAC,CAAC,GAAG;oBACV,EAAE,EAAE,KAAK;oBACT,MAAM,EAAE,CAAC;oBACT,KAAK,EAAE,CAAC;iBACT,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;YAAS,CAAC;QACT,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IACxB,CAAC;IAED,MAAM,SAAS,GAAwB,EAAE,CAAC;IAC1C,MAAM,UAAU,GAAG,gBAAgB,CACjC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAClB,GAAG,EAAE,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,GAAG;QACxB,KAAK,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,SAAS;QAC7B,SAAS,EAAE,CAAC,CAAC,SAAS;KACvB,CAAC,CAAC,CACJ,CAAC;IACF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,GAAG,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAC1B,MAAM,EAAE,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,SAAS,EAAE,MAAM,CAAC,iBAAiB,CAAC,CAAC;QACpF,IAAI,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS,CAAC,IAAI,CAAC,EAAE,GAAG,GAAG,EAAE,OAAO,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,WAAW,EAAE,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1E,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,QAAQ,EAAE,SAAS,EAAE,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IACzE,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,iBAAiB,EAAE,CAAC,CAAC;IAE1C,MAAM,QAAQ,GAAG,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAEnE,OAAO;QACL,QAAQ;QACR,IAAI;QACJ,OAAO,EAAE,SAAS;QAClB,QAAQ;QACR,KAAK,EAAE;YACL,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM;YAC5B,OAAO,EAAE,OAAO,CAAC,MAAM;YACvB,IAAI,EAAE,SAAS,CAAC,MAAM;SACvB;KACF,CAAC;AACJ,CAAC;AAED,SAAS,IAAI,CAAC,MAAmB,EAAE,KAAiB;IAClD,MAAM,CAAC,OAAO,EAAE,CAAC,KAAK,CAAC,CAAC;AAC1B,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export interface FetchedPage {
|
|
2
|
+
url: string;
|
|
3
|
+
finalUrl: string;
|
|
4
|
+
status: number;
|
|
5
|
+
title: string;
|
|
6
|
+
text: string;
|
|
7
|
+
html: string;
|
|
8
|
+
fetchedAt: number;
|
|
9
|
+
}
|
|
10
|
+
export interface BrowserOptions {
|
|
11
|
+
headless: boolean;
|
|
12
|
+
timeoutMs: number;
|
|
13
|
+
maxBytes: number;
|
|
14
|
+
userAgent?: string;
|
|
15
|
+
}
|
|
16
|
+
export declare const DEFAULT_USER_AGENT: string;
|
|
17
|
+
export declare class BrowserSession {
|
|
18
|
+
private readonly opts;
|
|
19
|
+
private browser;
|
|
20
|
+
private context;
|
|
21
|
+
constructor(opts: BrowserOptions);
|
|
22
|
+
start(): Promise<void>;
|
|
23
|
+
fetch(url: string): Promise<FetchedPage>;
|
|
24
|
+
close(): Promise<void>;
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=browser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"browser.d.ts","sourceRoot":"","sources":["../src/browser.ts"],"names":[],"mappings":"AAMA,MAAM,WAAW,WAAW;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,OAAO,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,eAAO,MAAM,kBAAkB,QAEuB,CAAC;AAYvD,qBAAa,cAAc;IAIb,OAAO,CAAC,QAAQ,CAAC,IAAI;IAHjC,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,OAAO,CAA+B;gBAEjB,IAAI,EAAE,cAAc;IAE3C,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IActB,KAAK,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAmCxC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAM7B"}
|
package/dist/browser.js
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// Thin Playwright wrapper. One browser per agent run, one context, one page
|
|
2
|
+
// re-used across fetches. Stealth-flavored Chrome args ported from the
|
|
3
|
+
// substrate browser container.
|
|
4
|
+
import { chromium } from "playwright";
|
|
5
|
+
export const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
|
|
6
|
+
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
|
|
7
|
+
const STEALTH_ARGS = [
|
|
8
|
+
"--disable-blink-features=AutomationControlled",
|
|
9
|
+
"--disable-features=IsolateOrigins,site-per-process",
|
|
10
|
+
"--enable-features=NetworkService,NetworkServiceInProcess",
|
|
11
|
+
"--window-size=1920,1080",
|
|
12
|
+
"--lang=en-US,en",
|
|
13
|
+
"--font-render-hinting=medium",
|
|
14
|
+
"--disable-dev-shm-usage",
|
|
15
|
+
];
|
|
16
|
+
export class BrowserSession {
|
|
17
|
+
opts;
|
|
18
|
+
browser = null;
|
|
19
|
+
context = null;
|
|
20
|
+
constructor(opts) {
|
|
21
|
+
this.opts = opts;
|
|
22
|
+
}
|
|
23
|
+
async start() {
|
|
24
|
+
this.browser = await chromium.launch({
|
|
25
|
+
headless: this.opts.headless,
|
|
26
|
+
args: STEALTH_ARGS,
|
|
27
|
+
ignoreDefaultArgs: ["--enable-automation"],
|
|
28
|
+
});
|
|
29
|
+
this.context = await this.browser.newContext({
|
|
30
|
+
userAgent: this.opts.userAgent ?? DEFAULT_USER_AGENT,
|
|
31
|
+
viewport: { width: 1920, height: 1080 },
|
|
32
|
+
locale: "en-US",
|
|
33
|
+
javaScriptEnabled: true,
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
async fetch(url) {
|
|
37
|
+
if (!this.context)
|
|
38
|
+
throw new Error("browser not started");
|
|
39
|
+
const page = await this.context.newPage();
|
|
40
|
+
try {
|
|
41
|
+
const response = await page.goto(url, {
|
|
42
|
+
waitUntil: "domcontentloaded",
|
|
43
|
+
timeout: this.opts.timeoutMs,
|
|
44
|
+
});
|
|
45
|
+
// Give SPAs a moment to finish painting after domcontentloaded.
|
|
46
|
+
await page
|
|
47
|
+
.waitForLoadState("networkidle", { timeout: Math.min(5000, this.opts.timeoutMs) })
|
|
48
|
+
.catch(() => undefined);
|
|
49
|
+
const finalUrl = page.url();
|
|
50
|
+
const status = response?.status() ?? 0;
|
|
51
|
+
const title = await page.title().catch(() => "");
|
|
52
|
+
const html = (await page.content()).slice(0, this.opts.maxBytes);
|
|
53
|
+
const text = (await page.evaluate(() => document.body?.innerText ?? "")).slice(0, this.opts.maxBytes);
|
|
54
|
+
return {
|
|
55
|
+
url,
|
|
56
|
+
finalUrl,
|
|
57
|
+
status,
|
|
58
|
+
title,
|
|
59
|
+
text,
|
|
60
|
+
html,
|
|
61
|
+
fetchedAt: Date.now(),
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
finally {
|
|
65
|
+
await page.close().catch(() => undefined);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
async close() {
|
|
69
|
+
await this.context?.close().catch(() => undefined);
|
|
70
|
+
await this.browser?.close().catch(() => undefined);
|
|
71
|
+
this.context = null;
|
|
72
|
+
this.browser = null;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=browser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"browser.js","sourceRoot":"","sources":["../src/browser.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAC5E,uEAAuE;AACvE,+BAA+B;AAE/B,OAAO,EAAE,QAAQ,EAAgD,MAAM,YAAY,CAAC;AAmBpF,MAAM,CAAC,MAAM,kBAAkB,GAC7B,+DAA+D;IAC/D,oDAAoD,CAAC;AAEvD,MAAM,YAAY,GAAG;IACnB,+CAA+C;IAC/C,oDAAoD;IACpD,0DAA0D;IAC1D,yBAAyB;IACzB,iBAAiB;IACjB,8BAA8B;IAC9B,yBAAyB;CAC1B,CAAC;AAEF,MAAM,OAAO,cAAc;IAII;IAHrB,OAAO,GAAmB,IAAI,CAAC;IAC/B,OAAO,GAA0B,IAAI,CAAC;IAE9C,YAA6B,IAAoB;QAApB,SAAI,GAAJ,IAAI,CAAgB;IAAG,CAAC;IAErD,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,OAAO,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC;YACnC,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ;YAC5B,IAAI,EAAE,YAAY;YAClB,iBAAiB,EAAE,CAAC,qBAAqB,CAAC;SAC3C,CAAC,CAAC;QACH,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC;YAC3C,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,kBAAkB;YACpD,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;YACvC,MAAM,EAAE,OAAO;YACf,iBAAiB,EAAE,IAAI;SACxB,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,GAAW;QACrB,IAAI,CAAC,IAAI,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;QAC1D,MAAM,IAAI,GAAS,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAChD,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBACpC,SAAS,EAAE,kBAAkB;gBAC7B,OAAO,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS;aAC7B,CAAC,CAAC;YACH,gEAAgE;YAChE,MAAM,IAAI;iBACP,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;iBACjF,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;YAE1B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAG,QAAQ,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC;YACvC,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;YACjD,MAAM,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACjE,MAAM,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,IAAI,EAAE,SAAS,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,CAC5E,CAAC,EACD,IAAI,CAAC,IAAI,CAAC,QAAQ,CACnB,CAAC;YACF,OAAO;gBACL,GAAG;gBACH,QAAQ;gBACR,MAAM;gBACN,KAAK;gBACL,IAAI;gBACJ,IAAI;gBACJ,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACtB,CAAC;QACJ,CAAC;gBAAS,CAAC;YACT,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK;QACT,MAAM,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;QACnD,MAAM,IAAI,CAAC,OAAO,EAAE,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;QACnD,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACpB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;IACtB,CAAC;CACF"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export interface Source {
|
|
2
|
+
id: number;
|
|
3
|
+
url: string;
|
|
4
|
+
title: string;
|
|
5
|
+
fetchedAt: number;
|
|
6
|
+
}
|
|
7
|
+
export declare function buildSourceTable(sources: Omit<Source, "id">[]): Source[];
|
|
8
|
+
export declare function renderSourcesMarkdown(sources: Source[]): string;
|
|
9
|
+
export declare function renderAnswerMarkdown(question: string, answer: string, sources: Source[]): string;
|
|
10
|
+
//# sourceMappingURL=citations.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citations.d.ts","sourceRoot":"","sources":["../src/citations.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,MAAM;IACrB,EAAE,EAAE,MAAM,CAAC;IACX,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,GAAG,MAAM,EAAE,CAExE;AAED,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,CAQ/D;AAED,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,CAIhG"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
// Citation formatting — turns a list of sources into a numbered footnote
|
|
2
|
+
// block, gives each source a stable [n] id, and provides utilities for
|
|
3
|
+
// rendering inline citations into markdown.
|
|
4
|
+
export function buildSourceTable(sources) {
|
|
5
|
+
return sources.map((s, i) => ({ id: i + 1, ...s }));
|
|
6
|
+
}
|
|
7
|
+
export function renderSourcesMarkdown(sources) {
|
|
8
|
+
if (sources.length === 0)
|
|
9
|
+
return "";
|
|
10
|
+
const lines = sources.map((s) => {
|
|
11
|
+
const date = new Date(s.fetchedAt).toISOString().slice(0, 10);
|
|
12
|
+
const safeTitle = escapeMd(s.title) || s.url;
|
|
13
|
+
return `${s.id}. [${safeTitle}](${s.url}) — fetched ${date}`;
|
|
14
|
+
});
|
|
15
|
+
return "## Sources\n\n" + lines.join("\n") + "\n";
|
|
16
|
+
}
|
|
17
|
+
export function renderAnswerMarkdown(question, answer, sources) {
|
|
18
|
+
const header = `# ${escapeMd(question)}\n\n`;
|
|
19
|
+
const body = answer.trim() + "\n\n";
|
|
20
|
+
return header + body + renderSourcesMarkdown(sources);
|
|
21
|
+
}
|
|
22
|
+
function escapeMd(s) {
|
|
23
|
+
return s.replace(/[\r\n]+/g, " ").replace(/\[/g, "(").replace(/\]/g, ")");
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=citations.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citations.js","sourceRoot":"","sources":["../src/citations.ts"],"names":[],"mappings":"AAAA,yEAAyE;AACzE,uEAAuE;AACvE,4CAA4C;AAS5C,MAAM,UAAU,gBAAgB,CAAC,OAA6B;IAC5D,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,OAAiB;IACrD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QAC9B,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC9D,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC;QAC7C,OAAO,GAAG,CAAC,CAAC,EAAE,MAAM,SAAS,KAAK,CAAC,CAAC,GAAG,eAAe,IAAI,EAAE,CAAC;IAC/D,CAAC,CAAC,CAAC;IACH,OAAO,gBAAgB,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;AACpD,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,QAAgB,EAAE,MAAc,EAAE,OAAiB;IACtF,MAAM,MAAM,GAAG,KAAK,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC;IAC7C,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,EAAE,GAAG,MAAM,CAAC;IACpC,OAAO,MAAM,GAAG,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;AACxD,CAAC;AAED,SAAS,QAAQ,CAAC,CAAS;IACzB,OAAO,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;AAC5E,CAAC"}
|