langchain-skim 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,49 @@
1
+ # See https://docs.github.com/en/get-started/getting-started-with-git/ignoring-files for more about ignoring files.
2
+
3
+ # compiled output
4
+ dist
5
+ tmp
6
+ out-tsc
7
+ *.tsbuildinfo
8
+ .expo
9
+ .expo-shared
10
+
11
+ # dependencies
12
+ node_modules
13
+
14
+ # IDEs and editors
15
+ /.idea
16
+ .project
17
+ .classpath
18
+ .c9/
19
+ *.launch
20
+ .settings/
21
+ *.sublime-workspace
22
+
23
+ # IDE - VSCode
24
+ .vscode/*
25
+ !.vscode/settings.json
26
+ !.vscode/tasks.json
27
+ !.vscode/launch.json
28
+ !.vscode/extensions.json
29
+
30
+ # misc
31
+ /.sass-cache
32
+ /connect.lock
33
+ /coverage
34
+ /libpeerconnection.log
35
+ npm-debug.log
36
+ yarn-error.log
37
+ testem.log
38
+ /typings
39
+
40
+ # System Files
41
+ .DS_Store
42
+ Thumbs.db
43
+
44
+ .cursor/rules/nx-rules.mdc
45
+ .github/instructions/nx.instructions.md
46
+
47
+ # Replit
48
+ .cache/
49
+ .local/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Skim
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,204 @@
1
+ Metadata-Version: 2.4
2
+ Name: langchain-skim
3
+ Version: 0.1.0
4
+ Summary: LangChain tool for Skim — clean web reader for AI agents. Pays $0.002/call in USDC over x402. No signup, no API keys.
5
+ Project-URL: Homepage, https://skim402.com
6
+ Project-URL: Documentation, https://skim402.com/docs
7
+ Project-URL: Repository, https://github.com/JessieJanie/skim402
8
+ Project-URL: x402 protocol, https://x402.org
9
+ Author-email: Skim <hello@skim402.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: agent,ai,langchain,llm,markdown,reader,skim,web-scraping,x402
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: eth-account>=0.13.0
23
+ Requires-Dist: langchain-core>=0.3.0
24
+ Requires-Dist: requests>=2.31.0
25
+ Requires-Dist: x402[evm]>=2.0.0
26
+ Provides-Extra: test
27
+ Requires-Dist: pytest>=8.0; extra == 'test'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # langchain-skim
31
+
32
+ **Give your LangChain agent the ability to read any URL — clean Markdown, no ads, no nav, no boilerplate. Pays itself per call. No signup, no API key.**
33
+
34
+ [![PyPI version](https://img.shields.io/pypi/v/langchain-skim.svg)](https://pypi.org/project/langchain-skim/)
35
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
36
+
37
+ `langchain-skim` is the official [LangChain](https://python.langchain.com) tool for [Skim](https://skim402.com) — the canonical [x402](https://x402.org) clean reader API. It exposes one tool, `SkimReader`, that your agent can call to fetch any web page as agent-ready Markdown plus structured metadata (title, byline, published date, language, excerpt). Each call costs **$0.002 in USDC on Base**, paid automatically by your local wallet over HTTP 402.
38
+
39
+ ---
40
+
41
+ ## Install
42
+
43
+ ```bash
44
+ pip install langchain-skim
45
+ ```
46
+
47
+ This pulls in the x402 client with EVM support, so there's nothing else to install.
48
+
49
+ ---
50
+
51
+ ## Quickstart (60 seconds)
52
+
53
+ ### 1. Fund a Base wallet with $1 of USDC
54
+
55
+ A dollar funds roughly 500 reads. Full step-by-step (with screenshots, for non-crypto-native devs): **<https://skim402.com/wallet>**.
56
+
57
+ > **Use a fresh wallet, not your personal one.** This wallet's private key signs payment authorizations on your machine — treat it like a hot wallet for paying $0.002 tolls, not a savings account.
58
+
59
+ ### 2. Point the tool at your wallet
60
+
61
+ ```bash
62
+ export SKIM_WALLET_PRIVATE_KEY=0xYOUR_BASE_WALLET_PRIVATE_KEY
63
+ ```
64
+
65
+ ### 3. Use it
66
+
67
+ ```python
68
+ from langchain_skim import SkimReader
69
+
70
+ reader = SkimReader() # reads SKIM_WALLET_PRIVATE_KEY from the environment
71
+
72
+ markdown = reader.invoke({"url": "https://en.wikipedia.org/wiki/HTTP_402"})
73
+ print(markdown)
74
+ ```
75
+
76
+ The tool signs an EIP-3009 USDC authorization for $0.002, Skim returns clean Markdown, and you get back the article body with a YAML frontmatter block of metadata. The payment shows up in your wallet's transaction history on [BaseScan](https://basescan.org/).
77
+
78
+ ---
79
+
80
+ ## Use it in an agent
81
+
82
+ `SkimReader` is a standard LangChain `BaseTool`, so it drops straight into any agent's tool list:
83
+
84
+ ```python
85
+ from langchain_skim import SkimReader
86
+ from langchain.agents import create_react_agent # or any agent constructor
87
+ from langchain_openai import ChatOpenAI
88
+
89
+ llm = ChatOpenAI(model="gpt-4o-mini")
90
+ tools = [SkimReader()]
91
+
92
+ agent = create_react_agent(llm, tools)
93
+ agent.invoke({"messages": [("user", "Read https://example.com/article and summarize it.")]})
94
+ ```
95
+
96
+ The agent decides when to call `skim_read`, the wallet pays per read, and the model gets clean Markdown instead of raw HTML.
97
+
98
+ ---
99
+
100
+ ## Output shape
101
+
102
+ `SkimReader` returns Markdown with a YAML frontmatter block of the page metadata:
103
+
104
+ ```
105
+ ---
106
+ title: Example article
107
+ byline: Jane Doe
108
+ publishedAt: 2025-01-15
109
+ lang: en
110
+ excerpt: A short summary...
111
+ ---
112
+
113
+ # Example article
114
+
115
+ The cleaned article body in Markdown...
116
+ ```
117
+
118
+ Set `include_metadata=False` to get just the Markdown body.
119
+
120
+ ---
121
+
122
+ ## Configuration
123
+
124
+ `SkimReader` takes the following parameters (all optional except the wallet key):
125
+
126
+ | Parameter | Default | Notes |
127
+ | ------------------ | ----------------------- | ------------------------------------------------------------------------------------------------------------------------- |
128
+ | `private_key` | `$SKIM_WALLET_PRIVATE_KEY` | Hex private key for the Base wallet that pays for reads. With or without `0x`. Use a dedicated wallet — never your personal one. |
129
+ | `base_url` | `https://skim402.com` | Override the API base URL. For self-hosting or local development. |
130
+ | `max_price_usd` | `0.01` | Hard cap on per-call price in USD. The wallet refuses to sign for anything above this. Skim is `$0.002`/call. |
131
+ | `include_metadata` | `True` | Prepend a YAML frontmatter block of page metadata to the returned Markdown. |
132
+ | `timeout` | `60` | Per-request timeout in seconds. |
133
+
134
+ ```python
135
+ reader = SkimReader(
136
+ private_key="0x...", # or rely on the env var
137
+ max_price_usd=0.005,
138
+ include_metadata=False,
139
+ )
140
+ ```
141
+
142
+ ---
143
+
144
+ ## How it actually works
145
+
146
+ ```
147
+ your agent ──► SkimReader ──► POST https://skim402.com/api/v1/read
148
+ ▲ │
149
+ │ ▼
150
+ │ 402 Payment Required
151
+ │ (x402 challenge)
152
+ │ │
153
+ ▼ │
154
+ x402 signs EIP-3009 USDC ◄─────────────┘
155
+ transfer authorization (locally)
156
+
157
+
158
+ retry POST with X-PAYMENT header
159
+
160
+
161
+ Skim verifies + settles via Coinbase CDP facilitator
162
+
163
+
164
+ 200 OK + clean Markdown
165
+ ```
166
+
167
+ Your private key never leaves your machine — it only signs authorizations locally.
168
+
169
+ ---
170
+
171
+ ## Security
172
+
173
+ - **Dedicated wallet, always.** Fund it with only as much USDC as you're willing to spend in a runaway loop. The `max_price_usd` cap catches accidental price escalations.
174
+ - **No outbound telemetry from this package.** `langchain-skim` only talks to `skim402.com` (or whatever you set as `base_url`). No analytics, no error reporting, no phone-home.
175
+
176
+ ---
177
+
178
+ ## Try it without an agent
179
+
180
+ Skeptical? Test the upstream endpoint directly — it'll return a 402 challenge so you can see the protocol in action:
181
+
182
+ ```bash
183
+ curl -i -X POST https://skim402.com/api/v1/read \
184
+ -H 'content-type: application/json' \
185
+ -d '{"url":"https://en.wikipedia.org/wiki/HTTP_402"}'
186
+ ```
187
+
188
+ You'll get back `HTTP/1.1 402 Payment Required` with the x402 challenge in the response body.
189
+
190
+ ---
191
+
192
+ ## Links
193
+
194
+ - **Skim website** — <https://skim402.com>
195
+ - **Wallet setup guide** — <https://skim402.com/wallet>
196
+ - **API docs** — <https://skim402.com/docs>
197
+ - **x402 protocol** — <https://x402.org>
198
+ - **GitHub** — <https://github.com/JessieJanie/skim402>
199
+
200
+ ---
201
+
202
+ ## License
203
+
204
+ MIT
@@ -0,0 +1,175 @@
1
+ # langchain-skim
2
+
3
+ **Give your LangChain agent the ability to read any URL — clean Markdown, no ads, no nav, no boilerplate. Pays itself per call. No signup, no API key.**
4
+
5
+ [![PyPI version](https://img.shields.io/pypi/v/langchain-skim.svg)](https://pypi.org/project/langchain-skim/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
7
+
8
+ `langchain-skim` is the official [LangChain](https://python.langchain.com) tool for [Skim](https://skim402.com) — the canonical [x402](https://x402.org) clean reader API. It exposes one tool, `SkimReader`, that your agent can call to fetch any web page as agent-ready Markdown plus structured metadata (title, byline, published date, language, excerpt). Each call costs **$0.002 in USDC on Base**, paid automatically by your local wallet over HTTP 402.
9
+
10
+ ---
11
+
12
+ ## Install
13
+
14
+ ```bash
15
+ pip install langchain-skim
16
+ ```
17
+
18
+ This pulls in the x402 client with EVM support, so there's nothing else to install.
19
+
20
+ ---
21
+
22
+ ## Quickstart (60 seconds)
23
+
24
+ ### 1. Fund a Base wallet with $1 of USDC
25
+
26
+ A dollar funds roughly 500 reads. Full step-by-step (with screenshots, for non-crypto-native devs): **<https://skim402.com/wallet>**.
27
+
28
+ > **Use a fresh wallet, not your personal one.** This wallet's private key signs payment authorizations on your machine — treat it like a hot wallet for paying $0.002 tolls, not a savings account.
29
+
30
+ ### 2. Point the tool at your wallet
31
+
32
+ ```bash
33
+ export SKIM_WALLET_PRIVATE_KEY=0xYOUR_BASE_WALLET_PRIVATE_KEY
34
+ ```
35
+
36
+ ### 3. Use it
37
+
38
+ ```python
39
+ from langchain_skim import SkimReader
40
+
41
+ reader = SkimReader() # reads SKIM_WALLET_PRIVATE_KEY from the environment
42
+
43
+ markdown = reader.invoke({"url": "https://en.wikipedia.org/wiki/HTTP_402"})
44
+ print(markdown)
45
+ ```
46
+
47
+ The tool signs an EIP-3009 USDC authorization for $0.002, Skim returns clean Markdown, and you get back the article body with a YAML frontmatter block of metadata. The payment shows up in your wallet's transaction history on [BaseScan](https://basescan.org/).
48
+
49
+ ---
50
+
51
+ ## Use it in an agent
52
+
53
+ `SkimReader` is a standard LangChain `BaseTool`, so it drops straight into any agent's tool list:
54
+
55
+ ```python
56
+ from langchain_skim import SkimReader
57
+ from langchain.agents import create_react_agent # or any agent constructor
58
+ from langchain_openai import ChatOpenAI
59
+
60
+ llm = ChatOpenAI(model="gpt-4o-mini")
61
+ tools = [SkimReader()]
62
+
63
+ agent = create_react_agent(llm, tools)
64
+ agent.invoke({"messages": [("user", "Read https://example.com/article and summarize it.")]})
65
+ ```
66
+
67
+ The agent decides when to call `skim_read`, the wallet pays per read, and the model gets clean Markdown instead of raw HTML.
68
+
69
+ ---
70
+
71
+ ## Output shape
72
+
73
+ `SkimReader` returns Markdown with a YAML frontmatter block of the page metadata:
74
+
75
+ ```
76
+ ---
77
+ title: Example article
78
+ byline: Jane Doe
79
+ publishedAt: 2025-01-15
80
+ lang: en
81
+ excerpt: A short summary...
82
+ ---
83
+
84
+ # Example article
85
+
86
+ The cleaned article body in Markdown...
87
+ ```
88
+
89
+ Set `include_metadata=False` to get just the Markdown body.
90
+
91
+ ---
92
+
93
+ ## Configuration
94
+
95
+ `SkimReader` takes the following parameters (all optional except the wallet key):
96
+
97
+ | Parameter | Default | Notes |
98
+ | ------------------ | ----------------------- | ------------------------------------------------------------------------------------------------------------------------- |
99
+ | `private_key` | `$SKIM_WALLET_PRIVATE_KEY` | Hex private key for the Base wallet that pays for reads. With or without `0x`. Use a dedicated wallet — never your personal one. |
100
+ | `base_url` | `https://skim402.com` | Override the API base URL. For self-hosting or local development. |
101
+ | `max_price_usd` | `0.01` | Hard cap on per-call price in USD. The wallet refuses to sign for anything above this. Skim is `$0.002`/call. |
102
+ | `include_metadata` | `True` | Prepend a YAML frontmatter block of page metadata to the returned Markdown. |
103
+ | `timeout` | `60` | Per-request timeout in seconds. |
104
+
105
+ ```python
106
+ reader = SkimReader(
107
+ private_key="0x...", # or rely on the env var
108
+ max_price_usd=0.005,
109
+ include_metadata=False,
110
+ )
111
+ ```
112
+
113
+ ---
114
+
115
+ ## How it actually works
116
+
117
+ ```
118
+ your agent ──► SkimReader ──► POST https://skim402.com/api/v1/read
119
+ ▲ │
120
+ │ ▼
121
+ │ 402 Payment Required
122
+ │ (x402 challenge)
123
+ │ │
124
+ ▼ │
125
+ x402 signs EIP-3009 USDC ◄─────────────┘
126
+ transfer authorization (locally)
127
+
128
+
129
+ retry POST with X-PAYMENT header
130
+
131
+
132
+ Skim verifies + settles via Coinbase CDP facilitator
133
+
134
+
135
+ 200 OK + clean Markdown
136
+ ```
137
+
138
+ Your private key never leaves your machine — it only signs authorizations locally.
139
+
140
+ ---
141
+
142
+ ## Security
143
+
144
+ - **Dedicated wallet, always.** Fund it with only as much USDC as you're willing to spend in a runaway loop. The `max_price_usd` cap catches accidental price escalations.
145
+ - **No outbound telemetry from this package.** `langchain-skim` only talks to `skim402.com` (or whatever you set as `base_url`). No analytics, no error reporting, no phone-home.
146
+
147
+ ---
148
+
149
+ ## Try it without an agent
150
+
151
+ Skeptical? Test the upstream endpoint directly — it'll return a 402 challenge so you can see the protocol in action:
152
+
153
+ ```bash
154
+ curl -i -X POST https://skim402.com/api/v1/read \
155
+ -H 'content-type: application/json' \
156
+ -d '{"url":"https://en.wikipedia.org/wiki/HTTP_402"}'
157
+ ```
158
+
159
+ You'll get back `HTTP/1.1 402 Payment Required` with the x402 challenge in the response body.
160
+
161
+ ---
162
+
163
+ ## Links
164
+
165
+ - **Skim website** — <https://skim402.com>
166
+ - **Wallet setup guide** — <https://skim402.com/wallet>
167
+ - **API docs** — <https://skim402.com/docs>
168
+ - **x402 protocol** — <https://x402.org>
169
+ - **GitHub** — <https://github.com/JessieJanie/skim402>
170
+
171
+ ---
172
+
173
+ ## License
174
+
175
+ MIT
@@ -0,0 +1,6 @@
1
+ """langchain-skim — LangChain tool for the Skim x402 clean reader API."""
2
+
3
+ from langchain_skim.tool import SkimReader, SkimReadInput
4
+
5
+ __all__ = ["SkimReader", "SkimReadInput"]
6
+ __version__ = "0.1.0"
@@ -0,0 +1,198 @@
1
+ """LangChain tool for Skim — the x402-native clean reader API for AI agents.
2
+
3
+ Exposes :class:`SkimReader`, a LangChain ``BaseTool`` that fetches any URL and
4
+ returns clean, agent-ready Markdown plus structured metadata. Each call is paid
5
+ automatically over the x402 protocol ($0.002 in USDC on Base) using a wallet you
6
+ control. The private key never leaves your machine — it only signs an EIP-3009
7
+ USDC authorization locally.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ from typing import Any, Optional, Type
14
+
15
+ from langchain_core.callbacks import CallbackManagerForToolRun
16
+ from langchain_core.tools import BaseTool, ToolException
17
+ from pydantic import BaseModel, Field, PrivateAttr, SecretStr
18
+
19
+ DEFAULT_BASE_URL = "https://skim402.com"
20
+
21
+
22
+ def _yaml_scalar(value: Any) -> str:
23
+ """Render a metadata value as a safe single-line YAML scalar.
24
+
25
+ Collapses internal whitespace/newlines and double-quotes the value when it
26
+ contains characters that could otherwise produce invalid or ambiguous YAML.
27
+ """
28
+ text = " ".join(str(value).split())
29
+ needs_quoting = (
30
+ text == ""
31
+ or text[0] in "!&*?|>%@`\"'#,[]{}:-"
32
+ or ": " in text
33
+ or text.endswith(":")
34
+ or text[0] in " "
35
+ )
36
+ if needs_quoting:
37
+ escaped = text.replace("\\", "\\\\").replace('"', '\\"')
38
+ return f'"{escaped}"'
39
+ return text
40
+
41
+
42
+ _TOOL_DESCRIPTION = (
43
+ "Fetch any URL and return clean, agent-ready Markdown via Skim (skim402.com). "
44
+ "Strips nav, ads, and boilerplate; preserves the article body plus structured "
45
+ "metadata (title, byline, published date, language, excerpt). Pays $0.002 per "
46
+ "call in USDC on Base over the x402 protocol — no API keys, no signup. Use this "
47
+ "whenever you need to read web content: articles, docs, blog posts, GitHub "
48
+ "READMEs, research papers, and similar pages."
49
+ )
50
+
51
+
52
+ class SkimReadInput(BaseModel):
53
+ """Input schema for :class:`SkimReader`."""
54
+
55
+ url: str = Field(
56
+ description="The fully-qualified URL to fetch and clean (https://...).",
57
+ )
58
+
59
+
60
+ class SkimReader(BaseTool):
61
+ """Read any URL as clean Markdown via Skim, paying per call over x402.
62
+
63
+ The tool lazily builds a payment-aware HTTP session the first time it runs,
64
+ using your Base wallet's private key to sign USDC authorizations on demand.
65
+
66
+ Args:
67
+ private_key: Hex private key (with or without ``0x``) for the Base wallet
68
+ that pays for reads. Falls back to the ``SKIM_WALLET_PRIVATE_KEY``
69
+ environment variable. Use a dedicated wallet, never your personal one.
70
+ base_url: Skim API base URL. Defaults to ``https://skim402.com``.
71
+ max_price_usd: Hard per-call price cap in USD. The wallet refuses to sign
72
+ for anything above this. Defaults to ``0.01`` (Skim is ``$0.002``).
73
+ include_metadata: When ``True`` (default), prepend a YAML frontmatter block
74
+ of the page metadata to the returned Markdown.
75
+ timeout: Per-request timeout in seconds. Defaults to ``60``.
76
+
77
+ Example:
78
+ .. code-block:: python
79
+
80
+ from langchain_skim import SkimReader
81
+
82
+ reader = SkimReader() # reads SKIM_WALLET_PRIVATE_KEY from the env
83
+ markdown = reader.invoke({"url": "https://en.wikipedia.org/wiki/HTTP_402"})
84
+ """
85
+
86
+ name: str = "skim_read"
87
+ description: str = _TOOL_DESCRIPTION
88
+ args_schema: Type[BaseModel] = SkimReadInput
89
+
90
+ private_key: Optional[SecretStr] = Field(default=None, exclude=True, repr=False)
91
+ base_url: str = DEFAULT_BASE_URL
92
+ max_price_usd: float = 0.01
93
+ include_metadata: bool = True
94
+ timeout: float = 60.0
95
+
96
+ _session: Any = PrivateAttr(default=None)
97
+
98
+ def _get_session(self) -> Any:
99
+ """Build (and cache) a requests Session that auto-pays 402 responses."""
100
+ if self._session is not None:
101
+ return self._session
102
+
103
+ try:
104
+ import requests
105
+ from eth_account import Account
106
+ from x402 import x402ClientSync
107
+ from x402.client import max_amount
108
+ from x402.http.clients.requests import wrapRequestsWithPayment
109
+ from x402.mechanisms.evm.exact.register import register_exact_evm_client
110
+ from x402.mechanisms.evm.signers import EthAccountSigner
111
+ except ImportError as exc: # pragma: no cover - import-guard
112
+ raise ImportError(
113
+ "langchain-skim needs the x402 client with EVM support. Install it "
114
+ "with: pip install langchain-skim (which pulls x402[evm]). If you "
115
+ "installed manually, run: pip install 'x402[evm]' requests eth-account"
116
+ ) from exc
117
+
118
+ key = (
119
+ self.private_key.get_secret_value()
120
+ if self.private_key is not None
121
+ else os.environ.get("SKIM_WALLET_PRIVATE_KEY")
122
+ )
123
+ if not key:
124
+ raise ValueError(
125
+ "Skim requires payment via x402. Provide a Base wallet funded with "
126
+ "USDC by setting the SKIM_WALLET_PRIVATE_KEY environment variable, or "
127
+ "by passing private_key=... to SkimReader(). The key never leaves your "
128
+ "machine — it only signs payment authorizations locally."
129
+ )
130
+
131
+ normalized = key[2:] if key.startswith("0x") else key
132
+ if len(normalized) != 64 or any(
133
+ c not in "0123456789abcdefABCDEF" for c in normalized
134
+ ):
135
+ raise ValueError(
136
+ "SKIM_WALLET_PRIVATE_KEY must be a 64-character hex string (with or "
137
+ "without a 0x prefix)."
138
+ )
139
+
140
+ account = Account.from_key("0x" + normalized)
141
+ cap_atomic = int(round(self.max_price_usd * 1_000_000)) # USDC has 6 decimals
142
+ client = x402ClientSync()
143
+ register_exact_evm_client(
144
+ client,
145
+ EthAccountSigner(account),
146
+ policies=[max_amount(cap_atomic)],
147
+ )
148
+ self._session = wrapRequestsWithPayment(requests.Session(), client)
149
+ return self._session
150
+
151
+ def _run(
152
+ self,
153
+ url: str,
154
+ run_manager: Optional[CallbackManagerForToolRun] = None,
155
+ ) -> str:
156
+ session = self._get_session()
157
+ endpoint = self.base_url.rstrip("/") + "/api/v1/read"
158
+
159
+ try:
160
+ res = session.post(
161
+ endpoint,
162
+ json={"url": url, "mode": "basic"},
163
+ timeout=self.timeout,
164
+ )
165
+ except Exception as exc: # network / payment-signing failure
166
+ raise ToolException(
167
+ f"Skim request failed: {exc}. Common causes: the wallet has no USDC "
168
+ f"on Base, or the price exceeded max_price_usd (${self.max_price_usd})."
169
+ ) from exc
170
+
171
+ if not getattr(res, "ok", res.status_code < 400):
172
+ body = (res.text or "").strip()
173
+ raise ToolException(
174
+ f"Skim returned {res.status_code} {getattr(res, 'reason', '')}: "
175
+ f"{body or '(no body)'}"
176
+ )
177
+
178
+ try:
179
+ data = res.json()
180
+ except ValueError as exc:
181
+ raise ToolException(
182
+ "Skim returned a non-JSON response. This usually means the request "
183
+ f"did not reach the Skim API. Underlying error: {exc}"
184
+ ) from exc
185
+
186
+ markdown = data.get("markdown") or data.get("text") or ""
187
+
188
+ metadata = data.get("metadata")
189
+ if self.include_metadata and isinstance(metadata, dict):
190
+ meta_lines = [
191
+ f"{k}: {_yaml_scalar(v)}"
192
+ for k, v in metadata.items()
193
+ if v is not None and v != ""
194
+ ]
195
+ if meta_lines:
196
+ markdown = "---\n" + "\n".join(meta_lines) + "\n---\n\n" + markdown
197
+
198
+ return markdown
@@ -0,0 +1,54 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "langchain-skim"
7
+ version = "0.1.0"
8
+ description = "LangChain tool for Skim — clean web reader for AI agents. Pays $0.002/call in USDC over x402. No signup, no API keys."
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.10"
12
+ authors = [{ name = "Skim", email = "hello@skim402.com" }]
13
+ keywords = [
14
+ "langchain",
15
+ "x402",
16
+ "skim",
17
+ "ai",
18
+ "agent",
19
+ "reader",
20
+ "markdown",
21
+ "web-scraping",
22
+ "llm",
23
+ ]
24
+ classifiers = [
25
+ "Development Status :: 4 - Beta",
26
+ "Intended Audience :: Developers",
27
+ "License :: OSI Approved :: MIT License",
28
+ "Programming Language :: Python :: 3",
29
+ "Programming Language :: Python :: 3.10",
30
+ "Programming Language :: Python :: 3.11",
31
+ "Programming Language :: Python :: 3.12",
32
+ "Topic :: Software Development :: Libraries :: Python Modules",
33
+ ]
34
+ dependencies = [
35
+ "langchain-core>=0.3.0",
36
+ "x402[evm]>=2.0.0",
37
+ "requests>=2.31.0",
38
+ "eth-account>=0.13.0",
39
+ ]
40
+
41
+ [project.optional-dependencies]
42
+ test = ["pytest>=8.0"]
43
+
44
+ [project.urls]
45
+ Homepage = "https://skim402.com"
46
+ Documentation = "https://skim402.com/docs"
47
+ Repository = "https://github.com/JessieJanie/skim402"
48
+ "x402 protocol" = "https://x402.org"
49
+
50
+ [tool.hatch.build.targets.wheel]
51
+ packages = ["langchain_skim"]
52
+
53
+ [tool.pytest.ini_options]
54
+ testpaths = ["tests"]
@@ -0,0 +1,161 @@
1
+ """Unit tests for SkimReader.
2
+
3
+ These tests inject a fake payment-aware session (via the cached ``_session``
4
+ private attribute), so they never touch the network or sign a real payment.
5
+ """
6
+
7
+ import pytest
8
+
9
+ from langchain_skim import SkimReader
10
+ from langchain_core.tools import ToolException
11
+
12
+ VALID_KEY = "0x" + "ab" * 32
13
+
14
+
15
+ class _FakeResp:
16
+ def __init__(self, status=200, payload=None, text="", reason="OK"):
17
+ self.status_code = status
18
+ self._payload = payload or {}
19
+ self.text = text
20
+ self.reason = reason
21
+ self.ok = 200 <= status < 300
22
+
23
+ def json(self):
24
+ return self._payload
25
+
26
+
27
+ class _FakeSession:
28
+ def __init__(self, resp):
29
+ self._resp = resp
30
+ self.calls = []
31
+
32
+ def post(self, url, json=None, timeout=None):
33
+ self.calls.append({"url": url, "json": json, "timeout": timeout})
34
+ return self._resp
35
+
36
+
37
+ def test_read_assembles_markdown_with_metadata_frontmatter():
38
+ tool = SkimReader(private_key=VALID_KEY)
39
+ fake = _FakeSession(
40
+ _FakeResp(
41
+ payload={
42
+ "markdown": "# Title\n\nBody text.",
43
+ "metadata": {
44
+ "title": "Title",
45
+ "byline": "Jane Doe",
46
+ "lang": "en",
47
+ "excerpt": "", # empty values are filtered out
48
+ "siteName": None, # None values are filtered out
49
+ },
50
+ }
51
+ )
52
+ )
53
+ tool._session = fake
54
+
55
+ out = tool.invoke({"url": "https://example.com/a"})
56
+
57
+ assert out.startswith("---\n")
58
+ assert "title: Title" in out
59
+ assert "byline: Jane Doe" in out
60
+ assert "lang: en" in out
61
+ assert "excerpt:" not in out
62
+ assert "siteName:" not in out
63
+ assert out.endswith("# Title\n\nBody text.")
64
+
65
+ call = fake.calls[0]
66
+ assert call["url"].endswith("/api/v1/read")
67
+ assert call["json"] == {"url": "https://example.com/a", "mode": "basic"}
68
+
69
+
70
+ def test_include_metadata_false_returns_plain_markdown():
71
+ tool = SkimReader(private_key=VALID_KEY, include_metadata=False)
72
+ tool._session = _FakeSession(
73
+ _FakeResp(payload={"markdown": "# Title", "metadata": {"title": "Title"}})
74
+ )
75
+
76
+ out = tool.invoke({"url": "https://example.com/a"})
77
+
78
+ assert out == "# Title"
79
+
80
+
81
+ def test_falls_back_to_text_when_no_markdown():
82
+ tool = SkimReader(private_key=VALID_KEY, include_metadata=False)
83
+ tool._session = _FakeSession(_FakeResp(payload={"text": "plain text"}))
84
+
85
+ assert tool.invoke({"url": "https://example.com/a"}) == "plain text"
86
+
87
+
88
+ def test_custom_base_url_is_used():
89
+ tool = SkimReader(private_key=VALID_KEY, base_url="https://example.test/")
90
+ fake = _FakeSession(_FakeResp(payload={"markdown": "x"}))
91
+ tool._session = fake
92
+
93
+ tool.invoke({"url": "https://example.com/a"})
94
+
95
+ assert fake.calls[0]["url"] == "https://example.test/api/v1/read"
96
+
97
+
98
+ def test_metadata_values_are_yaml_safe():
99
+ tool = SkimReader(private_key=VALID_KEY)
100
+ tool._session = _FakeSession(
101
+ _FakeResp(
102
+ payload={
103
+ "markdown": "body",
104
+ "metadata": {
105
+ "title": "Breaking: it works\nline two",
106
+ "excerpt": "a: b",
107
+ "lang": "en",
108
+ },
109
+ }
110
+ )
111
+ )
112
+
113
+ out = tool.invoke({"url": "https://example.com/a"})
114
+
115
+ assert 'title: "Breaking: it works line two"' in out
116
+ assert 'excerpt: "a: b"' in out
117
+ assert "lang: en" in out
118
+ # No raw newline should leak into the frontmatter block.
119
+ frontmatter = out.split("---\n\n", 1)[0]
120
+ assert "\nline two" not in frontmatter
121
+
122
+
123
+ def test_non_json_response_raises_tool_exception():
124
+ tool = SkimReader(private_key=VALID_KEY)
125
+
126
+ class _BadJsonResp(_FakeResp):
127
+ def json(self):
128
+ raise ValueError("Expecting value")
129
+
130
+ tool._session = _FakeSession(_BadJsonResp(text="<html>oops</html>"))
131
+
132
+ with pytest.raises(ToolException):
133
+ tool._run("https://example.com/a")
134
+
135
+
136
+ def test_http_error_raises_tool_exception():
137
+ tool = SkimReader(private_key=VALID_KEY)
138
+ tool._session = _FakeSession(
139
+ _FakeResp(status=502, text="upstream boom", reason="Bad Gateway")
140
+ )
141
+
142
+ with pytest.raises(ToolException) as exc:
143
+ tool._run("https://example.com/a")
144
+
145
+ assert "502" in str(exc.value)
146
+
147
+
148
+ def test_missing_key_raises_value_error(monkeypatch):
149
+ monkeypatch.delenv("SKIM_WALLET_PRIVATE_KEY", raising=False)
150
+ tool = SkimReader()
151
+
152
+ with pytest.raises(ValueError):
153
+ tool._get_session()
154
+
155
+
156
+ def test_malformed_key_raises_value_error(monkeypatch):
157
+ monkeypatch.delenv("SKIM_WALLET_PRIVATE_KEY", raising=False)
158
+ tool = SkimReader(private_key="not-a-hex-key")
159
+
160
+ with pytest.raises(ValueError):
161
+ tool._get_session()