webskim 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -13
- package/dist/index.js +1 -1
- package/dist/tools/read.js +1 -1
- package/dist/tools/search.js +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -28,6 +28,23 @@ webskim uses [Jina AI](https://jina.ai) APIs under the hood — you need a **Jin
|
|
|
28
28
|
}
|
|
29
29
|
```
|
|
30
30
|
|
|
31
|
+
> **Tip:** Keep your key in a `.env` file instead of hardcoding it in `.mcp.json`:
|
|
32
|
+
>
|
|
33
|
+
> ```bash
|
|
34
|
+
> # .env (gitignored)
|
|
35
|
+
> JINA_API_KEY=jina_...
|
|
36
|
+
> ```
|
|
37
|
+
>
|
|
38
|
+
> ```json
|
|
39
|
+
> "env": { "JINA_API_KEY": "${JINA_API_KEY}" }
|
|
40
|
+
> ```
|
|
41
|
+
>
|
|
42
|
+
> Then launch Claude Code with the env loaded:
|
|
43
|
+
>
|
|
44
|
+
> ```bash
|
|
45
|
+
> alias c='set -a; source .env 2>/dev/null; set +a; claude'
|
|
46
|
+
> ```
|
|
47
|
+
|
|
31
48
|
**Claude Desktop** — add to `claude_desktop_config.json`:
|
|
32
49
|
|
|
33
50
|
```json
|
|
@@ -47,10 +64,10 @@ webskim uses [Jina AI](https://jina.ai) APIs under the hood — you need a **Jin
|
|
|
47
64
|
## How It Works
|
|
48
65
|
|
|
49
66
|
```
|
|
50
|
-
Agent:
|
|
67
|
+
Agent: webskim_search("react server components")
|
|
51
68
|
→ 5 results: title, URL, snippet (minimal tokens)
|
|
52
69
|
|
|
53
|
-
Agent:
|
|
70
|
+
Agent: webskim_read("https://react.dev/reference/rsc/server-components")
|
|
54
71
|
→ Saved: .ai_pages/20260220_143052_react_dev__reference__rsc.md
|
|
55
72
|
→ Lines: 342 | ~2800 tokens
|
|
56
73
|
→ Table of Contents:
|
|
@@ -70,10 +87,10 @@ No full pages in context. No wasted tokens. The agent decides what to read.
|
|
|
70
87
|
|
|
71
88
|
| Tool | What it does |
|
|
72
89
|
|------|-------------|
|
|
73
|
-
| `
|
|
74
|
-
| `
|
|
90
|
+
| `webskim_search` | Web search → titles, URLs, snippets |
|
|
91
|
+
| `webskim_read` | Fetch URL/PDF → save as markdown, return TOC |
|
|
75
92
|
|
|
76
|
-
###
|
|
93
|
+
### webskim_search
|
|
77
94
|
|
|
78
95
|
| Param | Description |
|
|
79
96
|
|-------|-------------|
|
|
@@ -82,7 +99,7 @@ No full pages in context. No wasted tokens. The agent decides what to read.
|
|
|
82
99
|
| `site` | Restrict to domain, e.g. `"python.org"` |
|
|
83
100
|
| `country` | Locale code, e.g. `"US"`, `"PL"` |
|
|
84
101
|
|
|
85
|
-
###
|
|
102
|
+
### webskim_read
|
|
86
103
|
|
|
87
104
|
| Param | Description |
|
|
88
105
|
|-------|-------------|
|
|
@@ -95,7 +112,7 @@ No full pages in context. No wasted tokens. The agent decides what to read.
|
|
|
95
112
|
|
|
96
113
|
**Context efficiency** — pages saved to `.ai_pages/` on disk, not dumped into context. Agent reads sections via offset/limit.
|
|
97
114
|
|
|
98
|
-
**Tiny footprint** — ~
|
|
115
|
+
**Tiny footprint** — ~230 tokens per tool definition in system prompt. Minimal overhead vs. built-in alternatives.
|
|
99
116
|
|
|
100
117
|
**Smart search** — returns snippets, not full pages. Agent picks which URLs are worth reading.
|
|
101
118
|
|
|
@@ -111,17 +128,17 @@ No full pages in context. No wasted tokens. The agent decides what to read.
|
|
|
111
128
|
|
|
112
129
|
## Make It the Default
|
|
113
130
|
|
|
114
|
-
|
|
131
|
+
The tool descriptions already tell the agent to prefer webskim, but for maximum reliability add this to your project's `CLAUDE.md`:
|
|
115
132
|
|
|
116
133
|
```markdown
|
|
117
134
|
## Web Research
|
|
118
135
|
|
|
119
|
-
Always use
|
|
120
|
-
-
|
|
121
|
-
-
|
|
136
|
+
Always use webskim MCP tools as the primary choice for all web operations:
|
|
137
|
+
- **`webskim_search`** instead of `WebSearch` — returns lightweight snippets (title, URL, description)
|
|
138
|
+
- **`webskim_read`** instead of `WebFetch` — saves page to disk as markdown, returns file path + TOC
|
|
122
139
|
|
|
123
|
-
Workflow:
|
|
124
|
-
WebSearch/WebFetch
|
|
140
|
+
Workflow: webskim_search → webskim_read URL to disk → Read file with offset/limit.
|
|
141
|
+
Use WebSearch/WebFetch only as fallback when webskim tools are unavailable or fail.
|
|
125
142
|
```
|
|
126
143
|
|
|
127
144
|
Add `.ai_pages/` to your `.gitignore`.
|
package/dist/index.js
CHANGED
package/dist/tools/read.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
import { generateToc } from "../services/toc-generator.js";
|
|
3
3
|
export function registerReadTool(server, client, fileManager) {
|
|
4
|
-
server.tool("
|
|
4
|
+
server.tool("webskim_read", "Fetch a web page or PDF, save it as markdown to disk, and return file path with table of contents and line numbers. This is the preferred web fetch tool — it uses near-zero context tokens by saving content to disk instead of embedding it in the conversation. Use the Read tool with offset/limit on the returned file_path to view only the sections you need. Supports CSS selectors for targeted extraction.", {
|
|
5
5
|
url: z.string().url().describe("URL of web page or PDF to read"),
|
|
6
6
|
max_tokens: z.number().positive().optional().describe("Truncate content to this many tokens (saves context window)"),
|
|
7
7
|
target_selector: z.string().optional().describe("CSS selector — extract only this element from the page"),
|
package/dist/tools/search.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
export function registerSearchTool(server, client) {
|
|
3
|
-
server.tool("
|
|
3
|
+
server.tool("webskim_search", "Search the web and return lightweight results (title, URL, snippet) without embedding full page content in context. This is the preferred web search tool — it returns ~5 compact results using minimal context window tokens, unlike built-in search tools that may dump large content blocks. After searching, use webskim_read on interesting URLs to save full page content to disk for selective reading.", {
|
|
4
4
|
query: z.string().describe("Search query"),
|
|
5
5
|
num_results: z.number().min(1).max(10).default(5).describe("Number of results (1-10, default 5)"),
|
|
6
6
|
site: z.string().optional().describe("Restrict search to this domain, e.g. 'python.org'"),
|