langchain-skim 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_skim-0.1.0/.gitignore +49 -0
- langchain_skim-0.1.0/LICENSE +21 -0
- langchain_skim-0.1.0/PKG-INFO +204 -0
- langchain_skim-0.1.0/README.md +175 -0
- langchain_skim-0.1.0/langchain_skim/__init__.py +6 -0
- langchain_skim-0.1.0/langchain_skim/tool.py +198 -0
- langchain_skim-0.1.0/pyproject.toml +54 -0
- langchain_skim-0.1.0/tests/test_tool.py +161 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# See https://docs.github.com/en/get-started/getting-started-with-git/ignoring-files for more about ignoring files.
|
|
2
|
+
|
|
3
|
+
# compiled output
|
|
4
|
+
dist
|
|
5
|
+
tmp
|
|
6
|
+
out-tsc
|
|
7
|
+
*.tsbuildinfo
|
|
8
|
+
.expo
|
|
9
|
+
.expo-shared
|
|
10
|
+
|
|
11
|
+
# dependencies
|
|
12
|
+
node_modules
|
|
13
|
+
|
|
14
|
+
# IDEs and editors
|
|
15
|
+
/.idea
|
|
16
|
+
.project
|
|
17
|
+
.classpath
|
|
18
|
+
.c9/
|
|
19
|
+
*.launch
|
|
20
|
+
.settings/
|
|
21
|
+
*.sublime-workspace
|
|
22
|
+
|
|
23
|
+
# IDE - VSCode
|
|
24
|
+
.vscode/*
|
|
25
|
+
!.vscode/settings.json
|
|
26
|
+
!.vscode/tasks.json
|
|
27
|
+
!.vscode/launch.json
|
|
28
|
+
!.vscode/extensions.json
|
|
29
|
+
|
|
30
|
+
# misc
|
|
31
|
+
/.sass-cache
|
|
32
|
+
/connect.lock
|
|
33
|
+
/coverage
|
|
34
|
+
/libpeerconnection.log
|
|
35
|
+
npm-debug.log
|
|
36
|
+
yarn-error.log
|
|
37
|
+
testem.log
|
|
38
|
+
/typings
|
|
39
|
+
|
|
40
|
+
# System Files
|
|
41
|
+
.DS_Store
|
|
42
|
+
Thumbs.db
|
|
43
|
+
|
|
44
|
+
.cursor/rules/nx-rules.mdc
|
|
45
|
+
.github/instructions/nx.instructions.md
|
|
46
|
+
|
|
47
|
+
# Replit
|
|
48
|
+
.cache/
|
|
49
|
+
.local/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Skim
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: langchain-skim
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LangChain tool for Skim — clean web reader for AI agents. Pays $0.002/call in USDC over x402. No signup, no API keys.
|
|
5
|
+
Project-URL: Homepage, https://skim402.com
|
|
6
|
+
Project-URL: Documentation, https://skim402.com/docs
|
|
7
|
+
Project-URL: Repository, https://github.com/JessieJanie/skim402
|
|
8
|
+
Project-URL: x402 protocol, https://x402.org
|
|
9
|
+
Author-email: Skim <hello@skim402.com>
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: agent,ai,langchain,llm,markdown,reader,skim,web-scraping,x402
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: eth-account>=0.13.0
|
|
23
|
+
Requires-Dist: langchain-core>=0.3.0
|
|
24
|
+
Requires-Dist: requests>=2.31.0
|
|
25
|
+
Requires-Dist: x402[evm]>=2.0.0
|
|
26
|
+
Provides-Extra: test
|
|
27
|
+
Requires-Dist: pytest>=8.0; extra == 'test'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# langchain-skim
|
|
31
|
+
|
|
32
|
+
**Give your LangChain agent the ability to read any URL — clean Markdown, no ads, no nav, no boilerplate. Pays itself per call. No signup, no API key.**
|
|
33
|
+
|
|
34
|
+
[](https://pypi.org/project/langchain-skim/)
|
|
35
|
+
[](LICENSE)
|
|
36
|
+
|
|
37
|
+
`langchain-skim` is the official [LangChain](https://python.langchain.com) tool for [Skim](https://skim402.com) — the canonical [x402](https://x402.org) clean reader API. It exposes one tool, `SkimReader`, that your agent can call to fetch any web page as agent-ready Markdown plus structured metadata (title, byline, published date, language, excerpt). Each call costs **$0.002 in USDC on Base**, paid automatically by your local wallet over HTTP 402.
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Install
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install langchain-skim
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
This pulls in the x402 client with EVM support, so there's nothing else to install.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Quickstart (60 seconds)
|
|
52
|
+
|
|
53
|
+
### 1. Fund a Base wallet with $1 of USDC
|
|
54
|
+
|
|
55
|
+
A dollar funds roughly 500 reads. Full step-by-step (with screenshots, for non-crypto-native devs): **<https://skim402.com/wallet>**.
|
|
56
|
+
|
|
57
|
+
> **Use a fresh wallet, not your personal one.** This wallet's private key signs payment authorizations on your machine — treat it like a hot wallet for paying $0.002 tolls, not a savings account.
|
|
58
|
+
|
|
59
|
+
### 2. Point the tool at your wallet
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
export SKIM_WALLET_PRIVATE_KEY=0xYOUR_BASE_WALLET_PRIVATE_KEY
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### 3. Use it
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from langchain_skim import SkimReader
|
|
69
|
+
|
|
70
|
+
reader = SkimReader() # reads SKIM_WALLET_PRIVATE_KEY from the environment
|
|
71
|
+
|
|
72
|
+
markdown = reader.invoke({"url": "https://en.wikipedia.org/wiki/HTTP_402"})
|
|
73
|
+
print(markdown)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
The tool signs an EIP-3009 USDC authorization for $0.002, Skim returns clean Markdown, and you get back the article body with a YAML frontmatter block of metadata. The payment shows up in your wallet's transaction history on [BaseScan](https://basescan.org/).
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Use it in an agent
|
|
81
|
+
|
|
82
|
+
`SkimReader` is a standard LangChain `BaseTool`, so it drops straight into any agent's tool list:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from langchain_skim import SkimReader
|
|
86
|
+
from langchain.agents import create_react_agent # or any agent constructor
|
|
87
|
+
from langchain_openai import ChatOpenAI
|
|
88
|
+
|
|
89
|
+
llm = ChatOpenAI(model="gpt-4o-mini")
|
|
90
|
+
tools = [SkimReader()]
|
|
91
|
+
|
|
92
|
+
agent = create_react_agent(llm, tools)
|
|
93
|
+
agent.invoke({"messages": [("user", "Read https://example.com/article and summarize it.")]})
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
The agent decides when to call `skim_read`, the wallet pays per read, and the model gets clean Markdown instead of raw HTML.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Output shape
|
|
101
|
+
|
|
102
|
+
`SkimReader` returns Markdown with a YAML frontmatter block of the page metadata:
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
---
|
|
106
|
+
title: Example article
|
|
107
|
+
byline: Jane Doe
|
|
108
|
+
publishedAt: 2025-01-15
|
|
109
|
+
lang: en
|
|
110
|
+
excerpt: A short summary...
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
# Example article
|
|
114
|
+
|
|
115
|
+
The cleaned article body in Markdown...
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Set `include_metadata=False` to get just the Markdown body.
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Configuration
|
|
123
|
+
|
|
124
|
+
`SkimReader` takes the following parameters (all optional except the wallet key):
|
|
125
|
+
|
|
126
|
+
| Parameter | Default | Notes |
|
|
127
|
+
| ------------------ | ----------------------- | ------------------------------------------------------------------------------------------------------------------------- |
|
|
128
|
+
| `private_key` | `$SKIM_WALLET_PRIVATE_KEY` | Hex private key for the Base wallet that pays for reads. With or without `0x`. Use a dedicated wallet — never your personal one. |
|
|
129
|
+
| `base_url` | `https://skim402.com` | Override the API base URL. For self-hosting or local development. |
|
|
130
|
+
| `max_price_usd` | `0.01` | Hard cap on per-call price in USD. The wallet refuses to sign for anything above this. Skim is `$0.002`/call. |
|
|
131
|
+
| `include_metadata` | `True` | Prepend a YAML frontmatter block of page metadata to the returned Markdown. |
|
|
132
|
+
| `timeout` | `60` | Per-request timeout in seconds. |
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
reader = SkimReader(
|
|
136
|
+
private_key="0x...", # or rely on the env var
|
|
137
|
+
max_price_usd=0.005,
|
|
138
|
+
include_metadata=False,
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## How it actually works
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
your agent ──► SkimReader ──► POST https://skim402.com/api/v1/read
|
|
148
|
+
▲ │
|
|
149
|
+
│ ▼
|
|
150
|
+
│ 402 Payment Required
|
|
151
|
+
│ (x402 challenge)
|
|
152
|
+
│ │
|
|
153
|
+
▼ │
|
|
154
|
+
x402 signs EIP-3009 USDC ◄─────────────┘
|
|
155
|
+
transfer authorization (locally)
|
|
156
|
+
│
|
|
157
|
+
▼
|
|
158
|
+
retry POST with X-PAYMENT header
|
|
159
|
+
│
|
|
160
|
+
▼
|
|
161
|
+
Skim verifies + settles via Coinbase CDP facilitator
|
|
162
|
+
│
|
|
163
|
+
▼
|
|
164
|
+
200 OK + clean Markdown
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
Your private key never leaves your machine — it only signs authorizations locally.
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## Security
|
|
172
|
+
|
|
173
|
+
- **Dedicated wallet, always.** Fund it with only as much USDC as you're willing to spend in a runaway loop. The `max_price_usd` cap catches accidental price escalations.
|
|
174
|
+
- **No outbound telemetry from this package.** `langchain-skim` only talks to `skim402.com` (or whatever you set as `base_url`). No analytics, no error reporting, no phone-home.
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Try it without an agent
|
|
179
|
+
|
|
180
|
+
Skeptical? Test the upstream endpoint directly — it'll return a 402 challenge so you can see the protocol in action:
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
curl -i -X POST https://skim402.com/api/v1/read \
|
|
184
|
+
-H 'content-type: application/json' \
|
|
185
|
+
-d '{"url":"https://en.wikipedia.org/wiki/HTTP_402"}'
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
You'll get back `HTTP/1.1 402 Payment Required` with the x402 challenge in the response body.
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## Links
|
|
193
|
+
|
|
194
|
+
- **Skim website** — <https://skim402.com>
|
|
195
|
+
- **Wallet setup guide** — <https://skim402.com/wallet>
|
|
196
|
+
- **API docs** — <https://skim402.com/docs>
|
|
197
|
+
- **x402 protocol** — <https://x402.org>
|
|
198
|
+
- **GitHub** — <https://github.com/JessieJanie/skim402>
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
## License
|
|
203
|
+
|
|
204
|
+
MIT
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# langchain-skim
|
|
2
|
+
|
|
3
|
+
**Give your LangChain agent the ability to read any URL — clean Markdown, no ads, no nav, no boilerplate. Pays itself per call. No signup, no API key.**
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/langchain-skim/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
`langchain-skim` is the official [LangChain](https://python.langchain.com) tool for [Skim](https://skim402.com) — the canonical [x402](https://x402.org) clean reader API. It exposes one tool, `SkimReader`, that your agent can call to fetch any web page as agent-ready Markdown plus structured metadata (title, byline, published date, language, excerpt). Each call costs **$0.002 in USDC on Base**, paid automatically by your local wallet over HTTP 402.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install langchain-skim
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
This pulls in the x402 client with EVM support, so there's nothing else to install.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Quickstart (60 seconds)
|
|
23
|
+
|
|
24
|
+
### 1. Fund a Base wallet with $1 of USDC
|
|
25
|
+
|
|
26
|
+
A dollar funds roughly 500 reads. Full step-by-step (with screenshots, for non-crypto-native devs): **<https://skim402.com/wallet>**.
|
|
27
|
+
|
|
28
|
+
> **Use a fresh wallet, not your personal one.** This wallet's private key signs payment authorizations on your machine — treat it like a hot wallet for paying $0.002 tolls, not a savings account.
|
|
29
|
+
|
|
30
|
+
### 2. Point the tool at your wallet
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
export SKIM_WALLET_PRIVATE_KEY=0xYOUR_BASE_WALLET_PRIVATE_KEY
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### 3. Use it
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from langchain_skim import SkimReader
|
|
40
|
+
|
|
41
|
+
reader = SkimReader() # reads SKIM_WALLET_PRIVATE_KEY from the environment
|
|
42
|
+
|
|
43
|
+
markdown = reader.invoke({"url": "https://en.wikipedia.org/wiki/HTTP_402"})
|
|
44
|
+
print(markdown)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
The tool signs an EIP-3009 USDC authorization for $0.002, Skim returns clean Markdown, and you get back the article body with a YAML frontmatter block of metadata. The payment shows up in your wallet's transaction history on [BaseScan](https://basescan.org/).
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Use it in an agent
|
|
52
|
+
|
|
53
|
+
`SkimReader` is a standard LangChain `BaseTool`, so it drops straight into any agent's tool list:
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from langchain_skim import SkimReader
|
|
57
|
+
from langchain.agents import create_react_agent # or any agent constructor
|
|
58
|
+
from langchain_openai import ChatOpenAI
|
|
59
|
+
|
|
60
|
+
llm = ChatOpenAI(model="gpt-4o-mini")
|
|
61
|
+
tools = [SkimReader()]
|
|
62
|
+
|
|
63
|
+
agent = create_react_agent(llm, tools)
|
|
64
|
+
agent.invoke({"messages": [("user", "Read https://example.com/article and summarize it.")]})
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
The agent decides when to call `skim_read`, the wallet pays per read, and the model gets clean Markdown instead of raw HTML.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Output shape
|
|
72
|
+
|
|
73
|
+
`SkimReader` returns Markdown with a YAML frontmatter block of the page metadata:
|
|
74
|
+
|
|
75
|
+
```
|
|
76
|
+
---
|
|
77
|
+
title: Example article
|
|
78
|
+
byline: Jane Doe
|
|
79
|
+
publishedAt: 2025-01-15
|
|
80
|
+
lang: en
|
|
81
|
+
excerpt: A short summary...
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
# Example article
|
|
85
|
+
|
|
86
|
+
The cleaned article body in Markdown...
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Set `include_metadata=False` to get just the Markdown body.
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Configuration
|
|
94
|
+
|
|
95
|
+
`SkimReader` takes the following parameters (all optional except the wallet key):
|
|
96
|
+
|
|
97
|
+
| Parameter | Default | Notes |
|
|
98
|
+
| ------------------ | ----------------------- | ------------------------------------------------------------------------------------------------------------------------- |
|
|
99
|
+
| `private_key` | `$SKIM_WALLET_PRIVATE_KEY` | Hex private key for the Base wallet that pays for reads. With or without `0x`. Use a dedicated wallet — never your personal one. |
|
|
100
|
+
| `base_url` | `https://skim402.com` | Override the API base URL. For self-hosting or local development. |
|
|
101
|
+
| `max_price_usd` | `0.01` | Hard cap on per-call price in USD. The wallet refuses to sign for anything above this. Skim is `$0.002`/call. |
|
|
102
|
+
| `include_metadata` | `True` | Prepend a YAML frontmatter block of page metadata to the returned Markdown. |
|
|
103
|
+
| `timeout` | `60` | Per-request timeout in seconds. |
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
reader = SkimReader(
|
|
107
|
+
private_key="0x...", # or rely on the env var
|
|
108
|
+
max_price_usd=0.005,
|
|
109
|
+
include_metadata=False,
|
|
110
|
+
)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## How it actually works
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
your agent ──► SkimReader ──► POST https://skim402.com/api/v1/read
|
|
119
|
+
▲ │
|
|
120
|
+
│ ▼
|
|
121
|
+
│ 402 Payment Required
|
|
122
|
+
│ (x402 challenge)
|
|
123
|
+
│ │
|
|
124
|
+
▼ │
|
|
125
|
+
x402 signs EIP-3009 USDC ◄─────────────┘
|
|
126
|
+
transfer authorization (locally)
|
|
127
|
+
│
|
|
128
|
+
▼
|
|
129
|
+
retry POST with X-PAYMENT header
|
|
130
|
+
│
|
|
131
|
+
▼
|
|
132
|
+
Skim verifies + settles via Coinbase CDP facilitator
|
|
133
|
+
│
|
|
134
|
+
▼
|
|
135
|
+
200 OK + clean Markdown
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Your private key never leaves your machine — it only signs authorizations locally.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Security
|
|
143
|
+
|
|
144
|
+
- **Dedicated wallet, always.** Fund it with only as much USDC as you're willing to spend in a runaway loop. The `max_price_usd` cap catches accidental price escalations.
|
|
145
|
+
- **No outbound telemetry from this package.** `langchain-skim` only talks to `skim402.com` (or whatever you set as `base_url`). No analytics, no error reporting, no phone-home.
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Try it without an agent
|
|
150
|
+
|
|
151
|
+
Skeptical? Test the upstream endpoint directly — it'll return a 402 challenge so you can see the protocol in action:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
curl -i -X POST https://skim402.com/api/v1/read \
|
|
155
|
+
-H 'content-type: application/json' \
|
|
156
|
+
-d '{"url":"https://en.wikipedia.org/wiki/HTTP_402"}'
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
You'll get back `HTTP/1.1 402 Payment Required` with the x402 challenge in the response body.
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Links
|
|
164
|
+
|
|
165
|
+
- **Skim website** — <https://skim402.com>
|
|
166
|
+
- **Wallet setup guide** — <https://skim402.com/wallet>
|
|
167
|
+
- **API docs** — <https://skim402.com/docs>
|
|
168
|
+
- **x402 protocol** — <https://x402.org>
|
|
169
|
+
- **GitHub** — <https://github.com/JessieJanie/skim402>
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## License
|
|
174
|
+
|
|
175
|
+
MIT
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""LangChain tool for Skim — the x402-native clean reader API for AI agents.
|
|
2
|
+
|
|
3
|
+
Exposes :class:`SkimReader`, a LangChain ``BaseTool`` that fetches any URL and
|
|
4
|
+
returns clean, agent-ready Markdown plus structured metadata. Each call is paid
|
|
5
|
+
automatically over the x402 protocol ($0.002 in USDC on Base) using a wallet you
|
|
6
|
+
control. The private key never leaves your machine — it only signs an EIP-3009
|
|
7
|
+
USDC authorization locally.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
from typing import Any, Optional, Type
|
|
14
|
+
|
|
15
|
+
from langchain_core.callbacks import CallbackManagerForToolRun
|
|
16
|
+
from langchain_core.tools import BaseTool, ToolException
|
|
17
|
+
from pydantic import BaseModel, Field, PrivateAttr, SecretStr
|
|
18
|
+
|
|
19
|
+
DEFAULT_BASE_URL = "https://skim402.com"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _yaml_scalar(value: Any) -> str:
|
|
23
|
+
"""Render a metadata value as a safe single-line YAML scalar.
|
|
24
|
+
|
|
25
|
+
Collapses internal whitespace/newlines and double-quotes the value when it
|
|
26
|
+
contains characters that could otherwise produce invalid or ambiguous YAML.
|
|
27
|
+
"""
|
|
28
|
+
text = " ".join(str(value).split())
|
|
29
|
+
needs_quoting = (
|
|
30
|
+
text == ""
|
|
31
|
+
or text[0] in "!&*?|>%@`\"'#,[]{}:-"
|
|
32
|
+
or ": " in text
|
|
33
|
+
or text.endswith(":")
|
|
34
|
+
or text[0] in " "
|
|
35
|
+
)
|
|
36
|
+
if needs_quoting:
|
|
37
|
+
escaped = text.replace("\\", "\\\\").replace('"', '\\"')
|
|
38
|
+
return f'"{escaped}"'
|
|
39
|
+
return text
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
_TOOL_DESCRIPTION = (
|
|
43
|
+
"Fetch any URL and return clean, agent-ready Markdown via Skim (skim402.com). "
|
|
44
|
+
"Strips nav, ads, and boilerplate; preserves the article body plus structured "
|
|
45
|
+
"metadata (title, byline, published date, language, excerpt). Pays $0.002 per "
|
|
46
|
+
"call in USDC on Base over the x402 protocol — no API keys, no signup. Use this "
|
|
47
|
+
"whenever you need to read web content: articles, docs, blog posts, GitHub "
|
|
48
|
+
"READMEs, research papers, and similar pages."
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class SkimReadInput(BaseModel):
|
|
53
|
+
"""Input schema for :class:`SkimReader`."""
|
|
54
|
+
|
|
55
|
+
url: str = Field(
|
|
56
|
+
description="The fully-qualified URL to fetch and clean (https://...).",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class SkimReader(BaseTool):
|
|
61
|
+
"""Read any URL as clean Markdown via Skim, paying per call over x402.
|
|
62
|
+
|
|
63
|
+
The tool lazily builds a payment-aware HTTP session the first time it runs,
|
|
64
|
+
using your Base wallet's private key to sign USDC authorizations on demand.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
private_key: Hex private key (with or without ``0x``) for the Base wallet
|
|
68
|
+
that pays for reads. Falls back to the ``SKIM_WALLET_PRIVATE_KEY``
|
|
69
|
+
environment variable. Use a dedicated wallet, never your personal one.
|
|
70
|
+
base_url: Skim API base URL. Defaults to ``https://skim402.com``.
|
|
71
|
+
max_price_usd: Hard per-call price cap in USD. The wallet refuses to sign
|
|
72
|
+
for anything above this. Defaults to ``0.01`` (Skim is ``$0.002``).
|
|
73
|
+
include_metadata: When ``True`` (default), prepend a YAML frontmatter block
|
|
74
|
+
of the page metadata to the returned Markdown.
|
|
75
|
+
timeout: Per-request timeout in seconds. Defaults to ``60``.
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
.. code-block:: python
|
|
79
|
+
|
|
80
|
+
from langchain_skim import SkimReader
|
|
81
|
+
|
|
82
|
+
reader = SkimReader() # reads SKIM_WALLET_PRIVATE_KEY from the env
|
|
83
|
+
markdown = reader.invoke({"url": "https://en.wikipedia.org/wiki/HTTP_402"})
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
name: str = "skim_read"
|
|
87
|
+
description: str = _TOOL_DESCRIPTION
|
|
88
|
+
args_schema: Type[BaseModel] = SkimReadInput
|
|
89
|
+
|
|
90
|
+
private_key: Optional[SecretStr] = Field(default=None, exclude=True, repr=False)
|
|
91
|
+
base_url: str = DEFAULT_BASE_URL
|
|
92
|
+
max_price_usd: float = 0.01
|
|
93
|
+
include_metadata: bool = True
|
|
94
|
+
timeout: float = 60.0
|
|
95
|
+
|
|
96
|
+
_session: Any = PrivateAttr(default=None)
|
|
97
|
+
|
|
98
|
+
def _get_session(self) -> Any:
|
|
99
|
+
"""Build (and cache) a requests Session that auto-pays 402 responses."""
|
|
100
|
+
if self._session is not None:
|
|
101
|
+
return self._session
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
import requests
|
|
105
|
+
from eth_account import Account
|
|
106
|
+
from x402 import x402ClientSync
|
|
107
|
+
from x402.client import max_amount
|
|
108
|
+
from x402.http.clients.requests import wrapRequestsWithPayment
|
|
109
|
+
from x402.mechanisms.evm.exact.register import register_exact_evm_client
|
|
110
|
+
from x402.mechanisms.evm.signers import EthAccountSigner
|
|
111
|
+
except ImportError as exc: # pragma: no cover - import-guard
|
|
112
|
+
raise ImportError(
|
|
113
|
+
"langchain-skim needs the x402 client with EVM support. Install it "
|
|
114
|
+
"with: pip install langchain-skim (which pulls x402[evm]). If you "
|
|
115
|
+
"installed manually, run: pip install 'x402[evm]' requests eth-account"
|
|
116
|
+
) from exc
|
|
117
|
+
|
|
118
|
+
key = (
|
|
119
|
+
self.private_key.get_secret_value()
|
|
120
|
+
if self.private_key is not None
|
|
121
|
+
else os.environ.get("SKIM_WALLET_PRIVATE_KEY")
|
|
122
|
+
)
|
|
123
|
+
if not key:
|
|
124
|
+
raise ValueError(
|
|
125
|
+
"Skim requires payment via x402. Provide a Base wallet funded with "
|
|
126
|
+
"USDC by setting the SKIM_WALLET_PRIVATE_KEY environment variable, or "
|
|
127
|
+
"by passing private_key=... to SkimReader(). The key never leaves your "
|
|
128
|
+
"machine — it only signs payment authorizations locally."
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
normalized = key[2:] if key.startswith("0x") else key
|
|
132
|
+
if len(normalized) != 64 or any(
|
|
133
|
+
c not in "0123456789abcdefABCDEF" for c in normalized
|
|
134
|
+
):
|
|
135
|
+
raise ValueError(
|
|
136
|
+
"SKIM_WALLET_PRIVATE_KEY must be a 64-character hex string (with or "
|
|
137
|
+
"without a 0x prefix)."
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
account = Account.from_key("0x" + normalized)
|
|
141
|
+
cap_atomic = int(round(self.max_price_usd * 1_000_000)) # USDC has 6 decimals
|
|
142
|
+
client = x402ClientSync()
|
|
143
|
+
register_exact_evm_client(
|
|
144
|
+
client,
|
|
145
|
+
EthAccountSigner(account),
|
|
146
|
+
policies=[max_amount(cap_atomic)],
|
|
147
|
+
)
|
|
148
|
+
self._session = wrapRequestsWithPayment(requests.Session(), client)
|
|
149
|
+
return self._session
|
|
150
|
+
|
|
151
|
+
def _run(
|
|
152
|
+
self,
|
|
153
|
+
url: str,
|
|
154
|
+
run_manager: Optional[CallbackManagerForToolRun] = None,
|
|
155
|
+
) -> str:
|
|
156
|
+
session = self._get_session()
|
|
157
|
+
endpoint = self.base_url.rstrip("/") + "/api/v1/read"
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
res = session.post(
|
|
161
|
+
endpoint,
|
|
162
|
+
json={"url": url, "mode": "basic"},
|
|
163
|
+
timeout=self.timeout,
|
|
164
|
+
)
|
|
165
|
+
except Exception as exc: # network / payment-signing failure
|
|
166
|
+
raise ToolException(
|
|
167
|
+
f"Skim request failed: {exc}. Common causes: the wallet has no USDC "
|
|
168
|
+
f"on Base, or the price exceeded max_price_usd (${self.max_price_usd})."
|
|
169
|
+
) from exc
|
|
170
|
+
|
|
171
|
+
if not getattr(res, "ok", res.status_code < 400):
|
|
172
|
+
body = (res.text or "").strip()
|
|
173
|
+
raise ToolException(
|
|
174
|
+
f"Skim returned {res.status_code} {getattr(res, 'reason', '')}: "
|
|
175
|
+
f"{body or '(no body)'}"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
data = res.json()
|
|
180
|
+
except ValueError as exc:
|
|
181
|
+
raise ToolException(
|
|
182
|
+
"Skim returned a non-JSON response. This usually means the request "
|
|
183
|
+
f"did not reach the Skim API. Underlying error: {exc}"
|
|
184
|
+
) from exc
|
|
185
|
+
|
|
186
|
+
markdown = data.get("markdown") or data.get("text") or ""
|
|
187
|
+
|
|
188
|
+
metadata = data.get("metadata")
|
|
189
|
+
if self.include_metadata and isinstance(metadata, dict):
|
|
190
|
+
meta_lines = [
|
|
191
|
+
f"{k}: {_yaml_scalar(v)}"
|
|
192
|
+
for k, v in metadata.items()
|
|
193
|
+
if v is not None and v != ""
|
|
194
|
+
]
|
|
195
|
+
if meta_lines:
|
|
196
|
+
markdown = "---\n" + "\n".join(meta_lines) + "\n---\n\n" + markdown
|
|
197
|
+
|
|
198
|
+
return markdown
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "langchain-skim"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "LangChain tool for Skim — clean web reader for AI agents. Pays $0.002/call in USDC over x402. No signup, no API keys."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{ name = "Skim", email = "hello@skim402.com" }]
|
|
13
|
+
keywords = [
|
|
14
|
+
"langchain",
|
|
15
|
+
"x402",
|
|
16
|
+
"skim",
|
|
17
|
+
"ai",
|
|
18
|
+
"agent",
|
|
19
|
+
"reader",
|
|
20
|
+
"markdown",
|
|
21
|
+
"web-scraping",
|
|
22
|
+
"llm",
|
|
23
|
+
]
|
|
24
|
+
classifiers = [
|
|
25
|
+
"Development Status :: 4 - Beta",
|
|
26
|
+
"Intended Audience :: Developers",
|
|
27
|
+
"License :: OSI Approved :: MIT License",
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"Programming Language :: Python :: 3.10",
|
|
30
|
+
"Programming Language :: Python :: 3.11",
|
|
31
|
+
"Programming Language :: Python :: 3.12",
|
|
32
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
|
+
]
|
|
34
|
+
dependencies = [
|
|
35
|
+
"langchain-core>=0.3.0",
|
|
36
|
+
"x402[evm]>=2.0.0",
|
|
37
|
+
"requests>=2.31.0",
|
|
38
|
+
"eth-account>=0.13.0",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[project.optional-dependencies]
|
|
42
|
+
test = ["pytest>=8.0"]
|
|
43
|
+
|
|
44
|
+
[project.urls]
|
|
45
|
+
Homepage = "https://skim402.com"
|
|
46
|
+
Documentation = "https://skim402.com/docs"
|
|
47
|
+
Repository = "https://github.com/JessieJanie/skim402"
|
|
48
|
+
"x402 protocol" = "https://x402.org"
|
|
49
|
+
|
|
50
|
+
[tool.hatch.build.targets.wheel]
|
|
51
|
+
packages = ["langchain_skim"]
|
|
52
|
+
|
|
53
|
+
[tool.pytest.ini_options]
|
|
54
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Unit tests for SkimReader.
|
|
2
|
+
|
|
3
|
+
These tests inject a fake payment-aware session (via the cached ``_session``
|
|
4
|
+
private attribute), so they never touch the network or sign a real payment.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from langchain_skim import SkimReader
|
|
10
|
+
from langchain_core.tools import ToolException
|
|
11
|
+
|
|
12
|
+
VALID_KEY = "0x" + "ab" * 32
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class _FakeResp:
|
|
16
|
+
def __init__(self, status=200, payload=None, text="", reason="OK"):
|
|
17
|
+
self.status_code = status
|
|
18
|
+
self._payload = payload or {}
|
|
19
|
+
self.text = text
|
|
20
|
+
self.reason = reason
|
|
21
|
+
self.ok = 200 <= status < 300
|
|
22
|
+
|
|
23
|
+
def json(self):
|
|
24
|
+
return self._payload
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class _FakeSession:
|
|
28
|
+
def __init__(self, resp):
|
|
29
|
+
self._resp = resp
|
|
30
|
+
self.calls = []
|
|
31
|
+
|
|
32
|
+
def post(self, url, json=None, timeout=None):
|
|
33
|
+
self.calls.append({"url": url, "json": json, "timeout": timeout})
|
|
34
|
+
return self._resp
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_read_assembles_markdown_with_metadata_frontmatter():
|
|
38
|
+
tool = SkimReader(private_key=VALID_KEY)
|
|
39
|
+
fake = _FakeSession(
|
|
40
|
+
_FakeResp(
|
|
41
|
+
payload={
|
|
42
|
+
"markdown": "# Title\n\nBody text.",
|
|
43
|
+
"metadata": {
|
|
44
|
+
"title": "Title",
|
|
45
|
+
"byline": "Jane Doe",
|
|
46
|
+
"lang": "en",
|
|
47
|
+
"excerpt": "", # empty values are filtered out
|
|
48
|
+
"siteName": None, # None values are filtered out
|
|
49
|
+
},
|
|
50
|
+
}
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
tool._session = fake
|
|
54
|
+
|
|
55
|
+
out = tool.invoke({"url": "https://example.com/a"})
|
|
56
|
+
|
|
57
|
+
assert out.startswith("---\n")
|
|
58
|
+
assert "title: Title" in out
|
|
59
|
+
assert "byline: Jane Doe" in out
|
|
60
|
+
assert "lang: en" in out
|
|
61
|
+
assert "excerpt:" not in out
|
|
62
|
+
assert "siteName:" not in out
|
|
63
|
+
assert out.endswith("# Title\n\nBody text.")
|
|
64
|
+
|
|
65
|
+
call = fake.calls[0]
|
|
66
|
+
assert call["url"].endswith("/api/v1/read")
|
|
67
|
+
assert call["json"] == {"url": "https://example.com/a", "mode": "basic"}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_include_metadata_false_returns_plain_markdown():
|
|
71
|
+
tool = SkimReader(private_key=VALID_KEY, include_metadata=False)
|
|
72
|
+
tool._session = _FakeSession(
|
|
73
|
+
_FakeResp(payload={"markdown": "# Title", "metadata": {"title": "Title"}})
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
out = tool.invoke({"url": "https://example.com/a"})
|
|
77
|
+
|
|
78
|
+
assert out == "# Title"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_falls_back_to_text_when_no_markdown():
|
|
82
|
+
tool = SkimReader(private_key=VALID_KEY, include_metadata=False)
|
|
83
|
+
tool._session = _FakeSession(_FakeResp(payload={"text": "plain text"}))
|
|
84
|
+
|
|
85
|
+
assert tool.invoke({"url": "https://example.com/a"}) == "plain text"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_custom_base_url_is_used():
|
|
89
|
+
tool = SkimReader(private_key=VALID_KEY, base_url="https://example.test/")
|
|
90
|
+
fake = _FakeSession(_FakeResp(payload={"markdown": "x"}))
|
|
91
|
+
tool._session = fake
|
|
92
|
+
|
|
93
|
+
tool.invoke({"url": "https://example.com/a"})
|
|
94
|
+
|
|
95
|
+
assert fake.calls[0]["url"] == "https://example.test/api/v1/read"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_metadata_values_are_yaml_safe():
|
|
99
|
+
tool = SkimReader(private_key=VALID_KEY)
|
|
100
|
+
tool._session = _FakeSession(
|
|
101
|
+
_FakeResp(
|
|
102
|
+
payload={
|
|
103
|
+
"markdown": "body",
|
|
104
|
+
"metadata": {
|
|
105
|
+
"title": "Breaking: it works\nline two",
|
|
106
|
+
"excerpt": "a: b",
|
|
107
|
+
"lang": "en",
|
|
108
|
+
},
|
|
109
|
+
}
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
out = tool.invoke({"url": "https://example.com/a"})
|
|
114
|
+
|
|
115
|
+
assert 'title: "Breaking: it works line two"' in out
|
|
116
|
+
assert 'excerpt: "a: b"' in out
|
|
117
|
+
assert "lang: en" in out
|
|
118
|
+
# No raw newline should leak into the frontmatter block.
|
|
119
|
+
frontmatter = out.split("---\n\n", 1)[0]
|
|
120
|
+
assert "\nline two" not in frontmatter
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def test_non_json_response_raises_tool_exception():
|
|
124
|
+
tool = SkimReader(private_key=VALID_KEY)
|
|
125
|
+
|
|
126
|
+
class _BadJsonResp(_FakeResp):
|
|
127
|
+
def json(self):
|
|
128
|
+
raise ValueError("Expecting value")
|
|
129
|
+
|
|
130
|
+
tool._session = _FakeSession(_BadJsonResp(text="<html>oops</html>"))
|
|
131
|
+
|
|
132
|
+
with pytest.raises(ToolException):
|
|
133
|
+
tool._run("https://example.com/a")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def test_http_error_raises_tool_exception():
|
|
137
|
+
tool = SkimReader(private_key=VALID_KEY)
|
|
138
|
+
tool._session = _FakeSession(
|
|
139
|
+
_FakeResp(status=502, text="upstream boom", reason="Bad Gateway")
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
with pytest.raises(ToolException) as exc:
|
|
143
|
+
tool._run("https://example.com/a")
|
|
144
|
+
|
|
145
|
+
assert "502" in str(exc.value)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def test_missing_key_raises_value_error(monkeypatch):
|
|
149
|
+
monkeypatch.delenv("SKIM_WALLET_PRIVATE_KEY", raising=False)
|
|
150
|
+
tool = SkimReader()
|
|
151
|
+
|
|
152
|
+
with pytest.raises(ValueError):
|
|
153
|
+
tool._get_session()
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def test_malformed_key_raises_value_error(monkeypatch):
|
|
157
|
+
monkeypatch.delenv("SKIM_WALLET_PRIVATE_KEY", raising=False)
|
|
158
|
+
tool = SkimReader(private_key="not-a-hex-key")
|
|
159
|
+
|
|
160
|
+
with pytest.raises(ValueError):
|
|
161
|
+
tool._get_session()
|