langchain-proxyclaw 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 IPLoop
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,44 @@
1
+ .PHONY: help install install-dev test lint format clean build publish
2
+
3
+ help:
4
+ @echo "Available commands:"
5
+ @echo " install - Install package"
6
+ @echo " install-dev - Install with dev dependencies"
7
+ @echo " test - Run tests"
8
+ @echo " lint - Run linters"
9
+ @echo " format - Format code with black"
10
+ @echo " clean - Clean build artifacts"
11
+ @echo " build - Build package"
12
+ @echo " publish - Publish to PyPI"
13
+
14
+ install:
15
+ pip install -e .
16
+
17
+ install-dev:
18
+ pip install -e ".[dev]"
19
+
20
+ test:
21
+ pytest tests/ -v --cov=langchain_proxyclaw --cov-report=term-missing
22
+
23
+ lint:
24
+ mypy langchain_proxyclaw/
25
+ flake8 langchain_proxyclaw/ tests/
26
+
27
+ format:
28
+ black langchain_proxyclaw/ tests/
29
+
30
+ clean:
31
+ rm -rf build/
32
+ rm -rf dist/
33
+ rm -rf *.egg-info/
34
+ find . -type d -name __pycache__ -exec rm -rf {} +
35
+ find . -type f -name "*.pyc" -delete
36
+
37
+ build: clean
38
+ python -m build
39
+
40
+ publish: build
41
+ python -m twine upload dist/*
42
+
43
+ check: lint test
44
+ @echo "All checks passed!"
@@ -0,0 +1,291 @@
1
+ Metadata-Version: 2.4
2
+ Name: langchain-proxyclaw
3
+ Version: 0.1.0
4
+ Summary: LangChain integration for ProxyClaw - residential proxy network
5
+ Project-URL: Homepage, https://proxyclaw.ai
6
+ Project-URL: Documentation, https://docs.proxyclaw.ai
7
+ Project-URL: Repository, https://github.com/iploop/langchain-proxyclaw
8
+ Project-URL: Bug Tracker, https://github.com/iploop/langchain-proxyclaw/issues
9
+ Author-email: IPLoop Team <support@iploop.io>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: iploop,langchain,proxy,proxyclaw,residential,scraping
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Internet :: WWW/HTTP
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Requires-Python: >=3.9
24
+ Requires-Dist: iploop-sdk>=1.8.0
25
+ Requires-Dist: langchain-core>=0.1.0
26
+ Requires-Dist: pydantic>=2.0.0
27
+ Provides-Extra: dev
28
+ Requires-Dist: beautifulsoup4>=4.12.0; extra == 'dev'
29
+ Requires-Dist: black>=23.0.0; extra == 'dev'
30
+ Requires-Dist: mypy>=1.0.0; extra == 'dev'
31
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
32
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # LangChain ProxyClaw Integration
36
+
37
+ [![PyPI version](https://badge.fury.io/py/langchain-proxyclaw.svg)](https://badge.fury.io/py/langchain-proxyclaw)
38
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
39
+
40
+ Official LangChain integration for [ProxyClaw](https://proxyclaw.ai) - a residential proxy network with 10M+ IPs across 111+ countries.
41
+
42
+ ## Overview
43
+
44
+ This package provides LangChain Tools for routing HTTP requests through ProxyClaw's residential proxy network, enabling AI agents to scrape websites with:
45
+
46
+ - 🌍 **Global IP coverage** - 111+ countries
47
+ - 🔄 **Automatic rotation** - Fresh IPs per request or sticky sessions
48
+ - 🛡️ **Anti-detection** - Built-in fingerprint spoofing
49
+ - ⚡ **High success rate** - Residential IPs bypass most blocks
50
+
51
+ ## Installation
52
+
53
+ ```bash
54
+ pip install langchain-proxyclaw
55
+ ```
56
+
57
+ For development:
58
+
59
+ ```bash
60
+ pip install langchain-proxyclaw[dev]
61
+ ```
62
+
63
+ ## Quick Start
64
+
65
+ ```python
66
+ from langchain_proxyclaw import ProxyClawTool
67
+ from langchain.agents import AgentType, initialize_agent
68
+ from langchain_openai import ChatOpenAI
69
+
70
+ # Initialize the tool
71
+ tool = ProxyClawTool(api_key="your_api_key")
72
+
73
+ # Use with an agent
74
+ llm = ChatOpenAI(temperature=0)
75
+ agent = initialize_agent(
76
+ [tool],
77
+ llm,
78
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
79
+ verbose=True
80
+ )
81
+
82
+ # Ask the agent to scrape a website
83
+ agent.run("Get the content from https://example.com using a US proxy")
84
+ ```
85
+
86
+ ## Tools
87
+
88
+ ### 1. ProxyClawTool
89
+
90
+ Basic HTTP requests through ProxyClaw proxies.
91
+
92
+ ```python
93
+ from langchain_proxyclaw import ProxyClawTool
94
+
95
+ tool = ProxyClawTool(api_key="your_api_key")
96
+
97
+ # Simple GET request
98
+ result = tool.invoke({
99
+ "url": "https://example.com",
100
+ "country": "US"
101
+ })
102
+
103
+ # POST request with data
104
+ result = tool.invoke({
105
+ "url": "https://api.example.com/data",
106
+ "method": "POST",
107
+ "data": {"key": "value"},
108
+ "headers": {"Authorization": "Bearer token"},
109
+ "country": "GB"
110
+ })
111
+ ```
112
+
113
+ **Parameters:**
114
+ - `url` (str, required): Target URL
115
+ - `method` (str): HTTP method (GET, POST, PUT, DELETE) - default: GET
116
+ - `headers` (dict): Optional HTTP headers
117
+ - `data` (str/dict): Request body data
118
+ - `country` (str): Country code for proxy location (e.g., "US", "GB", "DE")
119
+ - `session_id` (str): Session ID for sticky sessions
120
+
121
+ ### 2. ProxyClawSessionTool
122
+
123
+ Sticky proxy sessions for multi-step workflows.
124
+
125
+ ```python
126
+ from langchain_proxyclaw import ProxyClawSessionTool
127
+
128
+ tool = ProxyClawSessionTool(api_key="your_api_key")
129
+
130
+ # Scrape multiple pages with the same IP
131
+ result = tool.invoke({
132
+ "urls": [
133
+ "https://site.com/login",
134
+ "https://site.com/dashboard",
135
+ "https://site.com/profile"
136
+ ],
137
+ "country": "US"
138
+ })
139
+ ```
140
+
141
+ **Parameters:**
142
+ - `urls` (list[str], required): List of URLs to fetch
143
+ - `country` (str): Country code
144
+ - `session_lifetime` (int): Session duration in minutes - default: 30
145
+
146
+ ### 3. ProxyClawScraperTool
147
+
148
+ Advanced scraper with retries and auto-rotation.
149
+
150
+ ```python
151
+ from langchain_proxyclaw import ProxyClawScraperTool
152
+
153
+ tool = ProxyClawScraperTool(api_key="your_api_key")
154
+
155
+ # Scrape with retries and link extraction
156
+ result = tool.invoke({
157
+ "url": "https://example.com/products",
158
+ "country": "US",
159
+ "retries": 3,
160
+ "timeout": 30,
161
+ "extract_links": True
162
+ })
163
+ ```
164
+
165
+ **Parameters:**
166
+ - `url` (str, required): Target URL
167
+ - `country` (str): Country code
168
+ - `retries` (int): Number of retry attempts - default: 3
169
+ - `timeout` (int): Request timeout in seconds - default: 30
170
+ - `extract_links` (bool): Extract all links from the page - default: False
171
+
172
+ ## Authentication
173
+
174
+ Get your API key from [ProxyClaw Dashboard](https://platform.proxyclaw.ai):
175
+
176
+ ```python
177
+ api_key = "pk_live_xxxxxxxxxxxxxxxx"
178
+ ```
179
+
180
+ ## Country Codes
181
+
182
+ Common country codes:
183
+ - `US` - United States
184
+ - `GB` - United Kingdom
185
+ - `DE` - Germany
186
+ - `FR` - France
187
+ - `JP` - Japan
188
+ - `IN` - India
189
+ - `BR` - Brazil
190
+
191
+ Full list: 111+ countries supported. Use ISO 3166-1 alpha-2 codes.
192
+
193
+ ## Use Cases
194
+
195
+ ### Web Scraping for AI Agents
196
+
197
+ ```python
198
+ from langchain_proxyclaw import ProxyClawScraperTool
199
+ from langchain.agents import AgentType, initialize_agent
200
+
201
+ scraper = ProxyClawScraperTool(api_key="your_api_key")
202
+
203
+ agent = initialize_agent(
204
+ [scraper],
205
+ llm,
206
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION
207
+ )
208
+
209
+ # Agent can now scrape without being blocked
210
+ agent.run("Find pricing information from https://competitor.com/pricing")
211
+ ```
212
+
213
+ ### E-commerce Monitoring
214
+
215
+ ```python
216
+ session_tool = ProxyClawSessionTool(api_key="your_api_key")
217
+
218
+ # Same IP for login + price check
219
+ result = session_tool.invoke({
220
+ "urls": [
221
+ "https://shop.com/login",
222
+ "https://shop.com/product/123"
223
+ ],
224
+ "country": "US"
225
+ })
226
+ ```
227
+
228
+ ### Market Research
229
+
230
+ ```python
231
+ from langchain_proxyclaw import ProxyClawTool
232
+
233
+ tool = ProxyClawTool(api_key="your_api_key")
234
+
235
+ # Check how a site appears from different countries
236
+ for country in ["US", "GB", "DE", "JP"]:
237
+ result = tool.invoke({
238
+ "url": "https://global-site.com",
239
+ "country": country
240
+ })
241
+ print(f"{country}: {result}")
242
+ ```
243
+
244
+ ## LangChain Integration Tests
245
+
246
+ To run LangChain's standard integration tests:
247
+
248
+ ```bash
249
+ pip install langchain-proxyclaw[dev]
250
+ pytest tests/ -v
251
+ ```
252
+
253
+ ## Documentation
254
+
255
+ Full documentation: [docs.proxyclaw.ai](https://docs.proxyclaw.ai)
256
+
257
+ LangChain docs: [python.langchain.com](https://python.langchain.com)
258
+
259
+ ## API Reference
260
+
261
+ See [ProxyClaw API Docs](https://docs.proxyclaw.ai/api) for details on:
262
+ - Authentication formats
263
+ - Country targeting
264
+ - Session management
265
+ - Bandwidth tracking
266
+
267
+ ## Pricing
268
+
269
+ ProxyClaw uses pay-as-you-go pricing:
270
+ - $0.35/GB under 10TB
271
+ - $0.25/GB over 10TB
272
+
273
+ No minimums, no commitments. [Sign up](https://proxyclaw.ai)
274
+
275
+ ## Support
276
+
277
+ - 📧 Email: support@iploop.io
278
+ - 💬 Discord: [discord.gg/clawd](https://discord.gg/clawd)
279
+ - 📖 Docs: [docs.proxyclaw.ai](https://docs.proxyclaw.ai)
280
+
281
+ ## License
282
+
283
+ MIT License - see [LICENSE](LICENSE) file.
284
+
285
+ ## Contributing
286
+
287
+ Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
288
+
289
+ ---
290
+
291
+ Built with ❤️ by the [IPLoop](https://iploop.io) team.
@@ -0,0 +1,257 @@
1
+ # LangChain ProxyClaw Integration
2
+
3
+ [![PyPI version](https://badge.fury.io/py/langchain-proxyclaw.svg)](https://badge.fury.io/py/langchain-proxyclaw)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+
6
+ Official LangChain integration for [ProxyClaw](https://proxyclaw.ai) - a residential proxy network with 10M+ IPs across 111+ countries.
7
+
8
+ ## Overview
9
+
10
+ This package provides LangChain Tools for routing HTTP requests through ProxyClaw's residential proxy network, enabling AI agents to scrape websites with:
11
+
12
+ - 🌍 **Global IP coverage** - 111+ countries
13
+ - 🔄 **Automatic rotation** - Fresh IPs per request or sticky sessions
14
+ - 🛡️ **Anti-detection** - Built-in fingerprint spoofing
15
+ - ⚡ **High success rate** - Residential IPs bypass most blocks
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install langchain-proxyclaw
21
+ ```
22
+
23
+ For development:
24
+
25
+ ```bash
26
+ pip install langchain-proxyclaw[dev]
27
+ ```
28
+
29
+ ## Quick Start
30
+
31
+ ```python
32
+ from langchain_proxyclaw import ProxyClawTool
33
+ from langchain.agents import AgentType, initialize_agent
34
+ from langchain_openai import ChatOpenAI
35
+
36
+ # Initialize the tool
37
+ tool = ProxyClawTool(api_key="your_api_key")
38
+
39
+ # Use with an agent
40
+ llm = ChatOpenAI(temperature=0)
41
+ agent = initialize_agent(
42
+ [tool],
43
+ llm,
44
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
45
+ verbose=True
46
+ )
47
+
48
+ # Ask the agent to scrape a website
49
+ agent.run("Get the content from https://example.com using a US proxy")
50
+ ```
51
+
52
+ ## Tools
53
+
54
+ ### 1. ProxyClawTool
55
+
56
+ Basic HTTP requests through ProxyClaw proxies.
57
+
58
+ ```python
59
+ from langchain_proxyclaw import ProxyClawTool
60
+
61
+ tool = ProxyClawTool(api_key="your_api_key")
62
+
63
+ # Simple GET request
64
+ result = tool.invoke({
65
+ "url": "https://example.com",
66
+ "country": "US"
67
+ })
68
+
69
+ # POST request with data
70
+ result = tool.invoke({
71
+ "url": "https://api.example.com/data",
72
+ "method": "POST",
73
+ "data": {"key": "value"},
74
+ "headers": {"Authorization": "Bearer token"},
75
+ "country": "GB"
76
+ })
77
+ ```
78
+
79
+ **Parameters:**
80
+ - `url` (str, required): Target URL
81
+ - `method` (str): HTTP method (GET, POST, PUT, DELETE) - default: GET
82
+ - `headers` (dict): Optional HTTP headers
83
+ - `data` (str/dict): Request body data
84
+ - `country` (str): Country code for proxy location (e.g., "US", "GB", "DE")
85
+ - `session_id` (str): Session ID for sticky sessions
86
+
87
+ ### 2. ProxyClawSessionTool
88
+
89
+ Sticky proxy sessions for multi-step workflows.
90
+
91
+ ```python
92
+ from langchain_proxyclaw import ProxyClawSessionTool
93
+
94
+ tool = ProxyClawSessionTool(api_key="your_api_key")
95
+
96
+ # Scrape multiple pages with the same IP
97
+ result = tool.invoke({
98
+ "urls": [
99
+ "https://site.com/login",
100
+ "https://site.com/dashboard",
101
+ "https://site.com/profile"
102
+ ],
103
+ "country": "US"
104
+ })
105
+ ```
106
+
107
+ **Parameters:**
108
+ - `urls` (list[str], required): List of URLs to fetch
109
+ - `country` (str): Country code
110
+ - `session_lifetime` (int): Session duration in minutes - default: 30
111
+
112
+ ### 3. ProxyClawScraperTool
113
+
114
+ Advanced scraper with retries and auto-rotation.
115
+
116
+ ```python
117
+ from langchain_proxyclaw import ProxyClawScraperTool
118
+
119
+ tool = ProxyClawScraperTool(api_key="your_api_key")
120
+
121
+ # Scrape with retries and link extraction
122
+ result = tool.invoke({
123
+ "url": "https://example.com/products",
124
+ "country": "US",
125
+ "retries": 3,
126
+ "timeout": 30,
127
+ "extract_links": True
128
+ })
129
+ ```
130
+
131
+ **Parameters:**
132
+ - `url` (str, required): Target URL
133
+ - `country` (str): Country code
134
+ - `retries` (int): Number of retry attempts - default: 3
135
+ - `timeout` (int): Request timeout in seconds - default: 30
136
+ - `extract_links` (bool): Extract all links from the page - default: False
137
+
138
+ ## Authentication
139
+
140
+ Get your API key from [ProxyClaw Dashboard](https://platform.proxyclaw.ai):
141
+
142
+ ```python
143
+ api_key = "pk_live_xxxxxxxxxxxxxxxx"
144
+ ```
145
+
146
+ ## Country Codes
147
+
148
+ Common country codes:
149
+ - `US` - United States
150
+ - `GB` - United Kingdom
151
+ - `DE` - Germany
152
+ - `FR` - France
153
+ - `JP` - Japan
154
+ - `IN` - India
155
+ - `BR` - Brazil
156
+
157
+ Full list: 111+ countries supported. Use ISO 3166-1 alpha-2 codes.
158
+
159
+ ## Use Cases
160
+
161
+ ### Web Scraping for AI Agents
162
+
163
+ ```python
164
+ from langchain_proxyclaw import ProxyClawScraperTool
165
+ from langchain.agents import AgentType, initialize_agent
166
+
167
+ scraper = ProxyClawScraperTool(api_key="your_api_key")
168
+
169
+ agent = initialize_agent(
170
+ [scraper],
171
+ llm,
172
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION
173
+ )
174
+
175
+ # Agent can now scrape without being blocked
176
+ agent.run("Find pricing information from https://competitor.com/pricing")
177
+ ```
178
+
179
+ ### E-commerce Monitoring
180
+
181
+ ```python
182
+ session_tool = ProxyClawSessionTool(api_key="your_api_key")
183
+
184
+ # Same IP for login + price check
185
+ result = session_tool.invoke({
186
+ "urls": [
187
+ "https://shop.com/login",
188
+ "https://shop.com/product/123"
189
+ ],
190
+ "country": "US"
191
+ })
192
+ ```
193
+
194
+ ### Market Research
195
+
196
+ ```python
197
+ from langchain_proxyclaw import ProxyClawTool
198
+
199
+ tool = ProxyClawTool(api_key="your_api_key")
200
+
201
+ # Check how a site appears from different countries
202
+ for country in ["US", "GB", "DE", "JP"]:
203
+ result = tool.invoke({
204
+ "url": "https://global-site.com",
205
+ "country": country
206
+ })
207
+ print(f"{country}: {result}")
208
+ ```
209
+
210
+ ## LangChain Integration Tests
211
+
212
+ To run LangChain's standard integration tests:
213
+
214
+ ```bash
215
+ pip install langchain-proxyclaw[dev]
216
+ pytest tests/ -v
217
+ ```
218
+
219
+ ## Documentation
220
+
221
+ Full documentation: [docs.proxyclaw.ai](https://docs.proxyclaw.ai)
222
+
223
+ LangChain docs: [python.langchain.com](https://python.langchain.com)
224
+
225
+ ## API Reference
226
+
227
+ See [ProxyClaw API Docs](https://docs.proxyclaw.ai/api) for details on:
228
+ - Authentication formats
229
+ - Country targeting
230
+ - Session management
231
+ - Bandwidth tracking
232
+
233
+ ## Pricing
234
+
235
+ ProxyClaw uses pay-as-you-go pricing:
236
+ - $0.35/GB under 10TB
237
+ - $0.25/GB over 10TB
238
+
239
+ No minimums, no commitments. [Sign up](https://proxyclaw.ai)
240
+
241
+ ## Support
242
+
243
+ - 📧 Email: support@iploop.io
244
+ - 💬 Discord: [discord.gg/clawd](https://discord.gg/clawd)
245
+ - 📖 Docs: [docs.proxyclaw.ai](https://docs.proxyclaw.ai)
246
+
247
+ ## License
248
+
249
+ MIT License - see [LICENSE](LICENSE) file.
250
+
251
+ ## Contributing
252
+
253
+ Contributions welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
254
+
255
+ ---
256
+
257
+ Built with ❤️ by the [IPLoop](https://iploop.io) team.
@@ -0,0 +1,14 @@
1
+ """LangChain integration for ProxyClaw residential proxy network."""
2
+
3
+ from langchain_proxyclaw.tools import (
4
+ ProxyClawTool,
5
+ ProxyClawSessionTool,
6
+ ProxyClawScraperTool,
7
+ )
8
+
9
+ __version__ = "0.1.0"
10
+ __all__ = [
11
+ "ProxyClawTool",
12
+ "ProxyClawSessionTool",
13
+ "ProxyClawScraperTool",
14
+ ]
@@ -0,0 +1,318 @@
1
+ """LangChain Tools for ProxyClaw proxy network."""
2
+
3
+ from typing import Any, Dict, List, Optional, Type, Union
4
+ from urllib.parse import urljoin, urlparse
5
+
6
+ from langchain_core.callbacks import CallbackManagerForToolRun
7
+ from langchain_core.tools import BaseTool
8
+ from pydantic import BaseModel, Field
9
+
10
+ from iploop import IPLoop
11
+
12
+
13
+ class ProxyClawRequestInput(BaseModel):
14
+ """Input schema for ProxyClaw HTTP request."""
15
+ url: str = Field(..., description="URL to fetch")
16
+ method: str = Field(default="GET", description="HTTP method (GET, POST, PUT, DELETE)")
17
+ headers: Optional[Dict[str, str]] = Field(default=None, description="Optional HTTP headers")
18
+ data: Optional[Union[Dict[str, Any], str]] = Field(default=None, description="Request body data")
19
+ country: Optional[str] = Field(default=None, description="Country code for proxy location (e.g., 'US', 'GB')")
20
+
21
+
22
+ class ProxyClawSessionInput(BaseModel):
23
+ """Input schema for ProxyClaw sticky session."""
24
+ urls: List[str] = Field(..., description="List of URLs to fetch in the same session")
25
+ country: Optional[str] = Field(default=None, description="Country code for proxy location")
26
+
27
+
28
+ class ProxyClawScraperInput(BaseModel):
29
+ """Input schema for ProxyClaw web scraper with retries."""
30
+ url: str = Field(..., description="URL to scrape")
31
+ country: Optional[str] = Field(default=None, description="Country code for proxy location")
32
+ retries: int = Field(default=3, description="Number of retry attempts")
33
+ timeout: int = Field(default=30, description="Request timeout in seconds")
34
+ extract_links: bool = Field(default=False, description="Whether to extract links from the page")
35
+
36
+
37
+ class ProxyClawTool(BaseTool):
38
+ """Tool for making HTTP requests through ProxyClaw residential proxies.
39
+
40
+ This tool routes HTTP requests through ProxyClaw's residential proxy network,
41
+ providing access to 10M+ IPs across 111+ countries with anti-detection features.
42
+
43
+ Example:
44
+ .. code-block:: python
45
+
46
+ from langchain_proxyclaw import ProxyClawTool
47
+
48
+ tool = ProxyClawTool(api_key="your_api_key")
49
+ result = tool.invoke({
50
+ "url": "https://example.com",
51
+ "country": "US"
52
+ })
53
+ """
54
+
55
+ name: str = "proxyclaw_request"
56
+ description: str = (
57
+ "Make HTTP requests through ProxyClaw residential proxies. "
58
+ "Provides access to millions of residential IPs with anti-detection. "
59
+ "Input: URL, optional country code, headers. "
60
+ "Output: Response content, status code, headers."
61
+ )
62
+ args_schema: Type[BaseModel] = ProxyClawRequestInput
63
+
64
+ client: Any = Field(default=None, exclude=True)
65
+ api_key: Optional[str] = Field(default=None, exclude=True)
66
+
67
+ def __init__(self, api_key: Optional[str] = None, **kwargs: Any):
68
+ """Initialize ProxyClawTool.
69
+
70
+ Args:
71
+ api_key: ProxyClaw API key
72
+ **kwargs: Additional arguments passed to BaseTool
73
+ """
74
+ super().__init__(**kwargs)
75
+ self.api_key = api_key
76
+ self.client = IPLoop(api_key=api_key) if api_key else None
77
+
78
+ def _run(
79
+ self,
80
+ url: str,
81
+ method: str = "GET",
82
+ headers: Optional[Dict[str, str]] = None,
83
+ data: Optional[Union[Dict[str, Any], str]] = None,
84
+ country: Optional[str] = None,
85
+ run_manager: Optional[CallbackManagerForToolRun] = None,
86
+ ) -> str:
87
+ """Execute HTTP request through ProxyClaw.
88
+
89
+ Args:
90
+ url: Target URL
91
+ method: HTTP method
92
+ headers: Optional headers
93
+ data: Request body
94
+ country: Country code for proxy
95
+ run_manager: Callback manager
96
+
97
+ Returns:
98
+ Response content as string
99
+ """
100
+ try:
101
+ if self.client is None:
102
+ return "Error: No API key provided"
103
+
104
+ # Create new client with country if specified
105
+ client = self.client
106
+ if country:
107
+ client = IPLoop(api_key=self.api_key, country=country)
108
+
109
+ # Make request
110
+ if method.upper() == "GET":
111
+ response = client.get(url, headers=headers)
112
+ elif method.upper() == "POST":
113
+ response = client.post(url, data=data, headers=headers)
114
+ elif method.upper() == "PUT":
115
+ response = client.put(url, data=data, headers=headers)
116
+ elif method.upper() == "DELETE":
117
+ response = client.delete(url, headers=headers)
118
+ else:
119
+ return f"Error: Unsupported method {method}"
120
+
121
+ result = {
122
+ "status_code": response.status_code if hasattr(response, 'status_code') else 200,
123
+ "url": url,
124
+ "content": response.text[:10000] if hasattr(response, 'text') else str(response)[:10000],
125
+ "success": True,
126
+ }
127
+
128
+ return str(result)
129
+
130
+ except Exception as e:
131
+ return f"Error: {str(e)}"
132
+
133
+
134
+ class ProxyClawSessionTool(BaseTool):
135
+ """Tool for proxy sessions through ProxyClaw.
136
+
137
+ Maintains session context across multiple requests.
138
+
139
+ Example:
140
+ .. code-block:: python
141
+
142
+ tool = ProxyClawSessionTool(api_key="your_api_key")
143
+ result = tool.invoke({
144
+ "urls": ["https://site.com/page1", "https://site.com/page2"],
145
+ "country": "GB"
146
+ })
147
+ """
148
+
149
+ name: str = "proxyclaw_session"
150
+ description: str = (
151
+ "Create a proxy session for multiple requests. "
152
+ "Useful for sites requiring consistent IP (login, checkout, etc.). "
153
+ "Input: List of URLs, country. "
154
+ "Output: List of responses."
155
+ )
156
+ args_schema: Type[BaseModel] = ProxyClawSessionInput
157
+
158
+ client: Any = Field(default=None, exclude=True)
159
+ api_key: Optional[str] = Field(default=None, exclude=True)
160
+
161
+ def __init__(self, api_key: Optional[str] = None, **kwargs: Any):
162
+ super().__init__(**kwargs)
163
+ self.api_key = api_key
164
+ self.client = IPLoop(api_key=api_key) if api_key else None
165
+
166
+ def _run(
167
+ self,
168
+ urls: List[str],
169
+ country: Optional[str] = None,
170
+ run_manager: Optional[CallbackManagerForToolRun] = None,
171
+ ) -> str:
172
+ """Execute multiple requests in a session.
173
+
174
+ Args:
175
+ urls: List of URLs to fetch
176
+ country: Country code
177
+ run_manager: Callback manager
178
+
179
+ Returns:
180
+ Combined results as string
181
+ """
182
+ if self.client is None:
183
+ return "Error: No API key provided"
184
+
185
+ results = []
186
+
187
+ # Use session if available, otherwise create client with country
188
+ client = self.client
189
+ if country:
190
+ client = IPLoop(api_key=self.api_key, country=country)
191
+
192
+ for url in urls:
193
+ try:
194
+ response = client.get(url)
195
+ results.append({
196
+ "url": url,
197
+ "status": response.status_code if hasattr(response, 'status_code') else 200,
198
+ "success": True,
199
+ "preview": response.text[:500] if hasattr(response, 'text') else str(response)[:500],
200
+ })
201
+ except Exception as e:
202
+ results.append({"url": url, "error": str(e)})
203
+
204
+ return str({
205
+ "results": results,
206
+ "total": len(urls),
207
+ "successful": sum(1 for r in results if r.get("success")),
208
+ })
209
+
210
+
211
+ class ProxyClawScraperTool(BaseTool):
212
+ """Advanced web scraper with retries.
213
+
214
+ Automatically retries failed requests with different IPs
215
+ and can extract links and structured data.
216
+
217
+ Example:
218
+ .. code-block:: python
219
+
220
+ tool = ProxyClawScraperTool(api_key="your_api_key")
221
+ result = tool.invoke({
222
+ "url": "https://example.com/products",
223
+ "country": "US",
224
+ "retries": 3,
225
+ "extract_links": True
226
+ })
227
+ """
228
+
229
+ name: str = "proxyclaw_scraper"
230
+ description: str = (
231
+ "Advanced web scraper with automatic retries and IP rotation. "
232
+ "Best for challenging sites with anti-bot protection. "
233
+ "Input: URL, country, retries, timeout, extract_links. "
234
+ "Output: Page content, extracted links, success status."
235
+ )
236
+ args_schema: Type[BaseModel] = ProxyClawScraperInput
237
+
238
+ client: Any = Field(default=None, exclude=True)
239
+ api_key: Optional[str] = Field(default=None, exclude=True)
240
+
241
+ def __init__(self, api_key: Optional[str] = None, **kwargs: Any):
242
+ super().__init__(**kwargs)
243
+ self.api_key = api_key
244
+ self.client = IPLoop(api_key=api_key) if api_key else None
245
+
246
+ def _run(
247
+ self,
248
+ url: str,
249
+ country: Optional[str] = None,
250
+ retries: int = 3,
251
+ timeout: int = 30,
252
+ extract_links: bool = False,
253
+ run_manager: Optional[CallbackManagerForToolRun] = None,
254
+ ) -> str:
255
+ """Scrape URL with retries and optional link extraction.
256
+
257
+ Args:
258
+ url: Target URL
259
+ country: Country code
260
+ retries: Number of retry attempts
261
+ timeout: Request timeout
262
+ extract_links: Whether to extract links
263
+ run_manager: Callback manager
264
+
265
+ Returns:
266
+ Scraped data as string
267
+ """
268
+ if self.client is None:
269
+ return "Error: No API key provided"
270
+
271
+ last_error = None
272
+
273
+ for attempt in range(retries):
274
+ try:
275
+ # Create client with country if specified
276
+ client = self.client
277
+ if country:
278
+ client = IPLoop(api_key=self.api_key, country=country)
279
+
280
+ response = client.get(url)
281
+
282
+ content = response.text if hasattr(response, 'text') else str(response)
283
+ result = {
284
+ "url": url,
285
+ "status_code": response.status_code if hasattr(response, 'status_code') else 200,
286
+ "success": True,
287
+ "attempts": attempt + 1,
288
+ "content_length": len(content),
289
+ "content_preview": content[:2000],
290
+ }
291
+
292
+ if extract_links:
293
+ try:
294
+ from bs4 import BeautifulSoup
295
+ soup = BeautifulSoup(content, 'html.parser')
296
+ links = [a.get('href') for a in soup.find_all('a', href=True)]
297
+ # Convert relative to absolute URLs
298
+ base_url = f"{urlparse(url).scheme}://{urlparse(url).netloc}"
299
+ links = [
300
+ urljoin(base_url, link) if link.startswith('/') else link
301
+ for link in links
302
+ ]
303
+ result["extracted_links"] = list(set(links))[:50] # Limit links
304
+ except ImportError:
305
+ result["extracted_links"] = ["beautifulsoup4 not installed"]
306
+
307
+ return str(result)
308
+
309
+ except Exception as e:
310
+ last_error = str(e)
311
+ continue
312
+
313
+ return str({
314
+ "url": url,
315
+ "success": False,
316
+ "attempts": retries,
317
+ "error": last_error,
318
+ })
@@ -0,0 +1,76 @@
1
+ """Utility functions for langchain-proxyclaw."""
2
+
3
+ from typing import Any, Dict, Optional
4
+
5
+
6
+ def build_auth_string(
7
+ base_auth: str,
8
+ country: Optional[str] = None,
9
+ session_id: Optional[str] = None,
10
+ session_lifetime: Optional[int] = None,
11
+ city: Optional[str] = None,
12
+ rotation: bool = False,
13
+ ) -> str:
14
+ """Build ProxyClaw auth string with options.
15
+
16
+ Args:
17
+ base_auth: Base auth string (customer_id:api_key)
18
+ country: Country code (e.g., 'US', 'GB')
19
+ session_id: Session ID for sticky sessions
20
+ session_lifetime: Session lifetime in minutes
21
+ city: City name for geo-targeting
22
+ rotation: Whether to enable IP rotation
23
+
24
+ Returns:
25
+ Formatted auth string
26
+ """
27
+ auth = base_auth
28
+
29
+ if country:
30
+ auth = f"{auth}-country-{country}"
31
+
32
+ if city:
33
+ auth = f"{auth}-city-{city}"
34
+
35
+ if session_id:
36
+ auth = f"{auth}-session-{session_id}"
37
+ if session_lifetime:
38
+ auth = f"{auth}-lifetime-{session_lifetime}"
39
+
40
+ if rotation:
41
+ auth = f"{auth}-rotation-true"
42
+
43
+ return auth
44
+
45
+
46
+ def validate_url(url: str) -> bool:
47
+ """Validate URL format.
48
+
49
+ Args:
50
+ url: URL to validate
51
+
52
+ Returns:
53
+ True if valid URL
54
+ """
55
+ from urllib.parse import urlparse
56
+
57
+ try:
58
+ result = urlparse(url)
59
+ return all([result.scheme in ('http', 'https'), result.netloc])
60
+ except Exception:
61
+ return False
62
+
63
+
64
+ def truncate_content(content: str, max_length: int = 10000) -> str:
65
+ """Truncate content to max length with indicator.
66
+
67
+ Args:
68
+ content: Content to truncate
69
+ max_length: Maximum length
70
+
71
+ Returns:
72
+ Truncated content
73
+ """
74
+ if len(content) <= max_length:
75
+ return content
76
+ return content[:max_length] + f"\n... [truncated, total: {len(content)} chars]"
@@ -0,0 +1,60 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "langchain-proxyclaw"
7
+ version = "0.1.0"
8
+ description = "LangChain integration for ProxyClaw - residential proxy network"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ {name = "IPLoop Team", email = "support@iploop.io"}
14
+ ]
15
+ keywords = ["langchain", "proxy", "scraping", "residential", "proxyclaw", "iploop"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Topic :: Internet :: WWW/HTTP",
26
+ "Topic :: Software Development :: Libraries :: Python Modules",
27
+ ]
28
+ dependencies = [
29
+ "langchain-core>=0.1.0",
30
+ "iploop-sdk>=1.8.0",
31
+ "pydantic>=2.0.0",
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ dev = [
36
+ "pytest>=7.0.0",
37
+ "pytest-asyncio>=0.21.0",
38
+ "black>=23.0.0",
39
+ "mypy>=1.0.0",
40
+ "beautifulsoup4>=4.12.0",
41
+ ]
42
+
43
+ [project.urls]
44
+ Homepage = "https://proxyclaw.ai"
45
+ Documentation = "https://docs.proxyclaw.ai"
46
+ Repository = "https://github.com/iploop/langchain-proxyclaw"
47
+ "Bug Tracker" = "https://github.com/iploop/langchain-proxyclaw/issues"
48
+
49
+ [tool.hatch.build.targets.wheel]
50
+ packages = ["langchain_proxyclaw"]
51
+
52
+ [tool.black]
53
+ line-length = 100
54
+ target-version = ['py39']
55
+
56
+ [tool.mypy]
57
+ python_version = "3.9"
58
+ warn_return_any = true
59
+ warn_unused_configs = true
60
+ disallow_untyped_defs = true
@@ -0,0 +1 @@
1
+ """Tests for langchain-proxyclaw."""
@@ -0,0 +1,153 @@
1
+ """Tests for ProxyClaw LangChain tools."""
2
+
3
+ import pytest
4
+ from unittest.mock import MagicMock, patch
5
+
6
+ from langchain_proxyclaw import ProxyClawTool, ProxyClawSessionTool, ProxyClawScraperTool
7
+
8
+
9
+ class MockResponse:
10
+ """Mock HTTP response."""
11
+ def __init__(self, status_code=200, text="test content", url="https://example.com"):
12
+ self.status_code = status_code
13
+ self.text = text
14
+ self.url = url
15
+ self.headers = {"Content-Type": "text/html"}
16
+
17
+
18
+ @pytest.fixture
19
+ def mock_iploop():
20
+ """Fixture for mocked IPLoop client."""
21
+ with patch("langchain_proxyclaw.tools.IPLoop") as mock:
22
+ instance = MagicMock()
23
+ mock.return_value = instance
24
+ yield instance
25
+
26
+
27
+ class TestProxyClawTool:
28
+ """Tests for ProxyClawTool."""
29
+
30
+ def test_initialization(self):
31
+ """Test tool initialization."""
32
+ tool = ProxyClawTool(api_key="test_key")
33
+ assert tool.name == "proxyclaw_request"
34
+ assert tool.api_key == "test_key"
35
+
36
+ def test_basic_request(self, mock_iploop):
37
+ """Test basic HTTP request."""
38
+ mock_iploop.get.return_value = MockResponse()
39
+
40
+ tool = ProxyClawTool(api_key="test_key")
41
+ result = tool._run(url="https://example.com")
42
+
43
+ assert "status_code" in result
44
+ assert "200" in result or "success" in result
45
+ mock_iploop.get.assert_called_once()
46
+
47
+ def test_request_with_country(self, mock_iploop):
48
+ """Test request with country parameter."""
49
+ mock_iploop.get.return_value = MockResponse()
50
+
51
+ tool = ProxyClawTool(api_key="test_key")
52
+ result = tool._run(url="https://example.com", country="US")
53
+
54
+ # Should succeed - country param is processed
55
+ assert "status_code" in result or "success" in result
56
+
57
+ def test_post_request(self, mock_iploop):
58
+ """Test POST request."""
59
+ mock_iploop.post.return_value = MockResponse()
60
+
61
+ tool = ProxyClawTool(api_key="test_key")
62
+ result = tool._run(
63
+ url="https://example.com/api",
64
+ method="POST",
65
+ data={"key": "value"}
66
+ )
67
+
68
+ mock_iploop.post.assert_called_once()
69
+
70
+
71
+ class TestProxyClawSessionTool:
72
+ """Tests for ProxyClawSessionTool."""
73
+
74
+ def test_initialization(self):
75
+ """Test tool initialization."""
76
+ tool = ProxyClawSessionTool(api_key="test_key")
77
+ assert tool.name == "proxyclaw_session"
78
+
79
+ def test_session_requests(self, mock_iploop):
80
+ """Test multiple requests in session."""
81
+ mock_iploop.get.return_value = MockResponse()
82
+
83
+ tool = ProxyClawSessionTool(api_key="test_key")
84
+ result = tool._run(
85
+ urls=["https://example.com/1", "https://example.com/2"],
86
+ country="GB"
87
+ )
88
+
89
+ assert mock_iploop.get.call_count == 2
90
+ assert "results" in result
91
+
92
+
93
+ class TestProxyClawScraperTool:
94
+ """Tests for ProxyClawScraperTool."""
95
+
96
+ def test_initialization(self):
97
+ """Test tool initialization."""
98
+ tool = ProxyClawScraperTool(api_key="test_key")
99
+ assert tool.name == "proxyclaw_scraper"
100
+
101
+ def test_successful_scrape(self, mock_iploop):
102
+ """Test successful scraping."""
103
+ mock_iploop.get.return_value = MockResponse(
104
+ text='<html><body><a href="/page2">Link</a></body></html>'
105
+ )
106
+
107
+ tool = ProxyClawScraperTool(api_key="test_key")
108
+ result = tool._run(url="https://example.com")
109
+
110
+ assert "success" in result
111
+ assert "True" in result
112
+
113
+ def test_scrape_with_retries(self, mock_iploop):
114
+ """Test scraping with retry logic."""
115
+ # First call fails, second succeeds
116
+ mock_iploop.get.side_effect = [
117
+ Exception("Connection error"),
118
+ MockResponse()
119
+ ]
120
+
121
+ tool = ProxyClawScraperTool(api_key="test_key")
122
+ result = tool._run(url="https://example.com", retries=2)
123
+
124
+ assert mock_iploop.get.call_count == 2
125
+
126
+
127
+ class TestToolSchema:
128
+ """Tests for tool input schemas."""
129
+
130
+ def test_proxyclaw_tool_schema(self):
131
+ """Test ProxyClawTool schema."""
132
+ tool = ProxyClawTool()
133
+ schema = tool.args_schema
134
+
135
+ assert "url" in schema.model_fields
136
+ assert "method" in schema.model_fields
137
+ assert "country" in schema.model_fields
138
+
139
+ def test_session_tool_schema(self):
140
+ """Test ProxyClawSessionTool schema."""
141
+ tool = ProxyClawSessionTool()
142
+ schema = tool.args_schema
143
+
144
+ assert "urls" in schema.model_fields
145
+
146
+ def test_scraper_tool_schema(self):
147
+ """Test ProxyClawScraperTool schema."""
148
+ tool = ProxyClawScraperTool()
149
+ schema = tool.args_schema
150
+
151
+ assert "url" in schema.model_fields
152
+ assert "retries" in schema.model_fields
153
+ assert "extract_links" in schema.model_fields