iflow-mcp_bashirk-inbound-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- iflow_mcp_bashirk_inbound_mcp-0.1.0.dist-info/METADATA +348 -0
- iflow_mcp_bashirk_inbound_mcp-0.1.0.dist-info/RECORD +8 -0
- iflow_mcp_bashirk_inbound_mcp-0.1.0.dist-info/WHEEL +5 -0
- iflow_mcp_bashirk_inbound_mcp-0.1.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_bashirk_inbound_mcp-0.1.0.dist-info/top_level.txt +1 -0
- lead_server/__init__.py +11 -0
- lead_server/lead_server.py +250 -0
- lead_server/requirements.txt +26 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: iflow-mcp_bashirk-inbound-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A production-grade lead generation MCP server using FastMCP, Crawl4AI and external enrichment services
|
|
5
|
+
Author-email: bashirk <hi@kobotai.co>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/bashirk/inbound-mcp
|
|
8
|
+
Project-URL: Repository, https://github.com/bashirk/inbound-mcp
|
|
9
|
+
Project-URL: Issues, https://github.com/bashirk/inbound-mcp/issues
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Requires-Dist: mcp>=1.26.0
|
|
21
|
+
Requires-Dist: crawl4ai>=0.4.3
|
|
22
|
+
Requires-Dist: aiocache>=0.12.0
|
|
23
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
24
|
+
Requires-Dist: uvloop>=0.19.0
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
26
|
+
Requires-Dist: pydantic>=2.5.0
|
|
27
|
+
Requires-Dist: playwright>=1.40.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-benchmark>=4.0.0; extra == "dev"
|
|
32
|
+
Requires-Dist: black>=23.9.0; extra == "dev"
|
|
33
|
+
Requires-Dist: mypy>=1.5.0; extra == "dev"
|
|
34
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
35
|
+
Provides-Extra: monitoring
|
|
36
|
+
Requires-Dist: prometheus-client>=0.17.0; extra == "monitoring"
|
|
37
|
+
Requires-Dist: sentry-sdk>=1.30.0; extra == "monitoring"
|
|
38
|
+
Provides-Extra: server
|
|
39
|
+
Requires-Dist: gunicorn>=21.2.0; extra == "server"
|
|
40
|
+
Requires-Dist: uvicorn>=0.23.0; extra == "server"
|
|
41
|
+
|
|
42
|
+
# Lead Generation Server Documentation
|
|
43
|
+
|
|
44
|
+

|
|
45
|
+

|
|
46
|
+

|
|
47
|
+
|
|
48
|
+
## Table of Contents
|
|
49
|
+
1. [Overview](#overview)
|
|
50
|
+
2. [Features](#features)
|
|
51
|
+
3. [Architecture](#architecture)
|
|
52
|
+
4. [Prerequisites](#prerequisites)
|
|
53
|
+
5. [Installation](#installation)
|
|
54
|
+
6. [Configuration](#configuration)
|
|
55
|
+
7. [Running the Server](#running-the-server)
|
|
56
|
+
8. [API Documentation](#api-documentation)
|
|
57
|
+
9. [Examples](#examples)
|
|
58
|
+
10. [Advanced Configuration](#advanced-configuration)
|
|
59
|
+
11. [Troubleshooting](#troubleshooting)
|
|
60
|
+
12. [Contributing](#contributing)
|
|
61
|
+
13. [License](#license)
|
|
62
|
+
14. [Roadmap](#roadmap)
|
|
63
|
+
15. [Support](#support)
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Overview <a name="overview"></a>
|
|
68
|
+
A production-grade lead generation system built on:
|
|
69
|
+
- **MCP Python SDK** for protocol-compliant AI services
|
|
70
|
+
- **Crawl4AI** for intelligent web crawling
|
|
71
|
+
- **AsyncIO** for high-concurrency operations
|
|
72
|
+
|
|
73
|
+
Implements a full lead lifecycle from discovery to enrichment with:
|
|
74
|
+
- UUID-based lead tracking
|
|
75
|
+
- Multi-source data aggregation
|
|
76
|
+
- Smart caching strategies
|
|
77
|
+
- Enterprise-grade error handling
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Features <a name="features"></a>
|
|
82
|
+
| Feature | Tech Stack | Throughput |
|
|
83
|
+
|---------|------------|------------|
|
|
84
|
+
| Lead Generation | Google CSE, Crawl4AI | 120 req/min |
|
|
85
|
+
| Data Enrichment | Hunter.io, Clearbit [Hubspot Breeze] | 80 req/min |
|
|
86
|
+
| LinkedIn Scraping | Playwright, Stealth Mode | 40 req/min |
|
|
87
|
+
| Caching | aiocache, Redis | 10K ops/sec |
|
|
88
|
+
| Monitoring | Prometheus, Custom Metrics | Real-time |
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Architecture <a name="architecture"></a>
|
|
93
|
+
```mermaid
|
|
94
|
+
graph TD
|
|
95
|
+
A[Client] --> B[MCP Server]
|
|
96
|
+
B --> C{Lead Manager}
|
|
97
|
+
C --> D[Google CSE]
|
|
98
|
+
C --> E[Crawl4AI]
|
|
99
|
+
C --> F[Hunter.io]
|
|
100
|
+
C --> G[Clearbit]
|
|
101
|
+
C --> H[LinkedIn Scraper]
|
|
102
|
+
C --> I[(Redis Cache)]
|
|
103
|
+
C --> J[Lead Store]
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
## Prerequisites <a name="prerequisites"></a>
|
|
109
|
+
- Python 3.10+
|
|
110
|
+
- API Keys:
|
|
111
|
+
```bash
|
|
112
|
+
export HUNTER_API_KEY="your_key"
|
|
113
|
+
export CLEARBIT_API_KEY="your_key"
|
|
114
|
+
export GOOGLE_CSE_ID="your_id"
|
|
115
|
+
export GOOGLE_API_KEY="your_key"
|
|
116
|
+
```
|
|
117
|
+
- LinkedIn Session Cookie (for scraping)
|
|
118
|
+
- 4GB+ RAM (8GB recommended for heavy scraping)
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Installation <a name="installation"></a>
|
|
123
|
+
|
|
124
|
+
### Production Setup
|
|
125
|
+
```bash
|
|
126
|
+
# Create virtual environment
|
|
127
|
+
python -m venv .venv && source .venv/bin/activate
|
|
128
|
+
|
|
129
|
+
# Install with production dependencies
|
|
130
|
+
pip install mcp crawl4ai[all] aiocache aiohttp uvloop
|
|
131
|
+
|
|
132
|
+
# Set up browser dependencies
|
|
133
|
+
python -m playwright install chromium
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Docker Deployment
|
|
137
|
+
```dockerfile
|
|
138
|
+
FROM python:3.10-slim
|
|
139
|
+
|
|
140
|
+
RUN apt-get update && apt-get install -y \
|
|
141
|
+
gcc \
|
|
142
|
+
libpython3-dev \
|
|
143
|
+
chromium \
|
|
144
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
145
|
+
|
|
146
|
+
COPY . /app
|
|
147
|
+
WORKDIR /app
|
|
148
|
+
|
|
149
|
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
150
|
+
CMD ["python", "-m", "mcp", "run", "lead_server.py"]
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Configuration <a name="configuration"></a>
|
|
156
|
+
`config.yaml`
|
|
157
|
+
```yaml
|
|
158
|
+
services:
|
|
159
|
+
hunter:
|
|
160
|
+
api_key: ${HUNTER_API_KEY}
|
|
161
|
+
rate_limit: 50/60s
|
|
162
|
+
|
|
163
|
+
clearbit:
|
|
164
|
+
api_key: ${CLEARBIT_API_KEY}
|
|
165
|
+
cache_ttl: 86400
|
|
166
|
+
|
|
167
|
+
scraping:
|
|
168
|
+
stealth_mode: true
|
|
169
|
+
headless: true
|
|
170
|
+
timeout: 30
|
|
171
|
+
max_retries: 3
|
|
172
|
+
|
|
173
|
+
cache:
|
|
174
|
+
backend: redis://localhost:6379/0
|
|
175
|
+
default_ttl: 3600
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Running the Server <a name="running-the-server"></a>
|
|
181
|
+
|
|
182
|
+
### Development Mode
|
|
183
|
+
```bash
|
|
184
|
+
mcp dev lead_server.py --reload --port 8080
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Production
|
|
188
|
+
```bash
|
|
189
|
+
gunicorn -w 4 -k uvicorn.workers.UvicornWorker lead_server:app
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Docker
|
|
193
|
+
```bash
|
|
194
|
+
docker build -t lead-server .
|
|
195
|
+
docker run -p 8080:8080 -e HUNTER_API_KEY=your_key lead-server
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
---
|
|
199
|
+
|
|
200
|
+
## API Documentation <a name="api-documentation"></a>
|
|
201
|
+
|
|
202
|
+
### 1. Generate Lead
|
|
203
|
+
```http
|
|
204
|
+
POST /tools/lead_generation
|
|
205
|
+
Content-Type: application/json
|
|
206
|
+
|
|
207
|
+
{
|
|
208
|
+
"search_terms": "OpenAI"
|
|
209
|
+
}
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
Response:
|
|
213
|
+
```json
|
|
214
|
+
{
|
|
215
|
+
"lead_id": "550e8400-e29b-41d4-a716-446655440000",
|
|
216
|
+
"status": "pending",
|
|
217
|
+
"estimated_time": 15
|
|
218
|
+
}
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### 2. Enrich Lead
|
|
222
|
+
```http
|
|
223
|
+
POST /tools/data_enrichment
|
|
224
|
+
Content-Type: application/json
|
|
225
|
+
|
|
226
|
+
{
|
|
227
|
+
"lead_id": "550e8400-e29b-41d4-a716-446655440000"
|
|
228
|
+
}
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### 3. Monitor Leads
|
|
232
|
+
```http
|
|
233
|
+
GET /tools/lead_maintenance
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## Examples <a name="examples"></a>
|
|
239
|
+
|
|
240
|
+
### Python Client
|
|
241
|
+
```python
|
|
242
|
+
from mcp.client import Client
|
|
243
|
+
|
|
244
|
+
async with Client() as client:
|
|
245
|
+
# Generate lead
|
|
246
|
+
lead = await client.call_tool(
|
|
247
|
+
"lead_generation",
|
|
248
|
+
{"search_terms": "Anthropic"}
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Enrich with all services
|
|
252
|
+
enriched = await client.call_tool(
|
|
253
|
+
"data_enrichment",
|
|
254
|
+
{"lead_id": lead['lead_id']}
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Get full lead data
|
|
258
|
+
status = await client.call_tool(
|
|
259
|
+
"lead_status",
|
|
260
|
+
{"lead_id": lead['lead_id']}
|
|
261
|
+
)
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
### cURL
|
|
265
|
+
```bash
|
|
266
|
+
# Generate lead
|
|
267
|
+
curl -X POST http://localhost:8080/tools/lead_generation \
|
|
268
|
+
-H "Content-Type: application/json" \
|
|
269
|
+
-d '{"search_terms": "Cohere AI"}'
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
---
|
|
273
|
+
|
|
274
|
+
## Advanced Configuration <a name="advanced-configuration"></a>
|
|
275
|
+
|
|
276
|
+
### Caching Strategies
|
|
277
|
+
```python
|
|
278
|
+
from aiocache import Cache
|
|
279
|
+
|
|
280
|
+
# Configure Redis cluster
|
|
281
|
+
Cache.from_url(
|
|
282
|
+
"redis://cluster-node1:6379/0",
|
|
283
|
+
timeout=10,
|
|
284
|
+
retry=True,
|
|
285
|
+
retry_timeout=2
|
|
286
|
+
)
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
### Rate Limiting
|
|
290
|
+
```python
|
|
291
|
+
from mcp.server.middleware import RateLimiter
|
|
292
|
+
|
|
293
|
+
mcp.add_middleware(
|
|
294
|
+
RateLimiter(
|
|
295
|
+
rules={
|
|
296
|
+
"lead_generation": "100/1m",
|
|
297
|
+
"data_enrichment": "50/1m"
|
|
298
|
+
}
|
|
299
|
+
)
|
|
300
|
+
)
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
## Troubleshooting <a name="troubleshooting"></a>
|
|
306
|
+
|
|
307
|
+
| Error | Solution |
|
|
308
|
+
|-------|----------|
|
|
309
|
+
| `403 Forbidden` from Google | Rotate IPs or use official CSE API |
|
|
310
|
+
| `429 Too Many Requests` | Implement exponential backoff |
|
|
311
|
+
| `Playwright Timeout` | Increase `scraping.timeout` in config |
|
|
312
|
+
| `Cache Miss` | Verify Redis connection and TTL settings |
|
|
313
|
+
|
|
314
|
+
---
|
|
315
|
+
|
|
316
|
+
## Contributing <a name="contributing"></a>
|
|
317
|
+
1. Fork the repository
|
|
318
|
+
2. Create feature branch: `git checkout -b feature/new-enrichment`
|
|
319
|
+
3. Commit changes: `git commit -am 'Add Clearbit alternative'`
|
|
320
|
+
4. Push to branch: `git push origin feature/new-enrichment`
|
|
321
|
+
5. Submit pull request
|
|
322
|
+
|
|
323
|
+
---
|
|
324
|
+
|
|
325
|
+
## License <a name="license"></a>
|
|
326
|
+
Apache 2.0 - See [LICENSE](LICENSE) for details.
|
|
327
|
+
|
|
328
|
+
---
|
|
329
|
+
|
|
330
|
+
## Roadmap <a name="roadmap"></a>
|
|
331
|
+
- [ ] **Q2 2025**: AI-powered lead scoring
|
|
332
|
+
- [ ] **Q3 2025**: Distributed crawling cluster support
|
|
333
|
+
|
|
334
|
+
---
|
|
335
|
+
|
|
336
|
+
## Support <a name="support"></a>
|
|
337
|
+
For enterprise support and custom integrations:
|
|
338
|
+
📧 Email: [hi@kobotai.co](mailto:support@leadgen.ai)
|
|
339
|
+
🐦 Twitter: [@KobotAIco](https://x.com/KobotAIco)
|
|
340
|
+
|
|
341
|
+
---
|
|
342
|
+
|
|
343
|
+
```bash
|
|
344
|
+
# Run benchmark tests
|
|
345
|
+
pytest tests/ --benchmark-json=results.json
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+

|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
lead_server/__init__.py,sha256=4E05K6TrVEeLvtfvvclC_Q7i2t7wkrrs_KZzgRHKMBA,204
|
|
2
|
+
lead_server/lead_server.py,sha256=X9P0phZep0q5ovPDPx6HBWjRmJJ9ofQRoNf2DOlYGp4,8513
|
|
3
|
+
lead_server/requirements.txt,sha256=p2bmyKw5P4sEQ4LYCyK6Uv3qhY8-jz1WUZKT6sw6PIk,381
|
|
4
|
+
iflow_mcp_bashirk_inbound_mcp-0.1.0.dist-info/METADATA,sha256=e6ZLv2mQmCbrWHPEQ1mbPyD5u9weufcm_nHDmM-rBjI,8302
|
|
5
|
+
iflow_mcp_bashirk_inbound_mcp-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
6
|
+
iflow_mcp_bashirk_inbound_mcp-0.1.0.dist-info/entry_points.txt,sha256=k41T1A_96On2z38B4dJ5ZcwbxW-L0C0-Mjr47frhSO4,79
|
|
7
|
+
iflow_mcp_bashirk_inbound_mcp-0.1.0.dist-info/top_level.txt,sha256=_qpSFoyE1Rz9YUHQIID0aiudO0G_7T1-hytoiwA6fX4,12
|
|
8
|
+
iflow_mcp_bashirk_inbound_mcp-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
lead_server
|
lead_server/__init__.py
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
import uuid
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from datetime import datetime, timedelta
|
|
8
|
+
from typing import Dict, Optional
|
|
9
|
+
from mcp.server.fastmcp import FastMCP, Context
|
|
10
|
+
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, JsonCssExtractionStrategy, CacheMode
|
|
11
|
+
import aiohttp
|
|
12
|
+
|
|
13
|
+
# Configure logging
|
|
14
|
+
logging.basicConfig(level=logging.INFO)
|
|
15
|
+
logger = logging.getLogger("LeadGenServer")
|
|
16
|
+
mcp = FastMCP("LeadGenServer")
|
|
17
|
+
|
|
18
|
+
# External API configurations - load from environment or use defaults
|
|
19
|
+
HUNTER_API_KEY = os.getenv("HUNTER_API_KEY", "your_hunter_api_key")
|
|
20
|
+
CLEARBIT_API_KEY = os.getenv("CLEARBIT_API_KEY", "your_clearbit_api_key")
|
|
21
|
+
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "your_google_api_key")
|
|
22
|
+
GOOGLE_CSE_ID = os.getenv("GOOGLE_CSE_ID", "your_search_engine_id")
|
|
23
|
+
LINKEDIN_COOKIE = os.getenv("LINKEDIN_COOKIE", "your_linkedin_cookie")
|
|
24
|
+
|
|
25
|
+
# In-memory lead store with async locking
|
|
26
|
+
lead_store = {}
|
|
27
|
+
lead_lock = asyncio.Lock()
|
|
28
|
+
|
|
29
|
+
# Simple in-memory cache
|
|
30
|
+
_domain_cache = {}
|
|
31
|
+
_cache_lock = asyncio.Lock()
|
|
32
|
+
|
|
33
|
+
async def cached_get(key: str, ttl: int, func):
|
|
34
|
+
"""Simple caching function"""
|
|
35
|
+
async with _cache_lock:
|
|
36
|
+
if key in _domain_cache:
|
|
37
|
+
data, timestamp = _domain_cache[key]
|
|
38
|
+
if datetime.utcnow() - timestamp < timedelta(seconds=ttl):
|
|
39
|
+
return data
|
|
40
|
+
|
|
41
|
+
result = await func()
|
|
42
|
+
|
|
43
|
+
async with _cache_lock:
|
|
44
|
+
_domain_cache[key] = (result, datetime.utcnow())
|
|
45
|
+
|
|
46
|
+
return result
|
|
47
|
+
|
|
48
|
+
# --------------------------------------------------
|
|
49
|
+
# Helper Functions
|
|
50
|
+
# --------------------------------------------------
|
|
51
|
+
async def get_company_domain(search_term: str) -> str:
|
|
52
|
+
"""Search Google for company website using official API"""
|
|
53
|
+
# Use a mock implementation for testing if API keys are not set
|
|
54
|
+
if GOOGLE_API_KEY == "your_google_api_key":
|
|
55
|
+
logger.warning("Using mock domain search - API key not configured")
|
|
56
|
+
return f"https://www.{search_term.lower().replace(' ', '')}.com"
|
|
57
|
+
|
|
58
|
+
async with aiohttp.ClientSession() as session:
|
|
59
|
+
params = {
|
|
60
|
+
"q": f"{search_term} company",
|
|
61
|
+
"key": GOOGLE_API_KEY,
|
|
62
|
+
"cx": GOOGLE_CSE_ID,
|
|
63
|
+
"num": 1
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async with session.get(
|
|
67
|
+
"https://www.googleapis.com/customsearch/v1",
|
|
68
|
+
params=params
|
|
69
|
+
) as response:
|
|
70
|
+
results = await response.json()
|
|
71
|
+
return results["items"][0]["link"]
|
|
72
|
+
|
|
73
|
+
async def hunter_domain_search(domain: str) -> dict:
|
|
74
|
+
"""Fetch email patterns from Hunter.io"""
|
|
75
|
+
if HUNTER_API_KEY == "your_hunter_api_key":
|
|
76
|
+
logger.warning("Using mock Hunter.io response - API key not configured")
|
|
77
|
+
return {
|
|
78
|
+
"data": {
|
|
79
|
+
"domain": domain,
|
|
80
|
+
"emails": [
|
|
81
|
+
{"value": f"info@{domain}", "type": "generic"},
|
|
82
|
+
{"value": f"contact@{domain}", "type": "generic"}
|
|
83
|
+
]
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async with aiohttp.ClientSession() as session:
|
|
88
|
+
url = f"https://api.hunter.io/v2/domain-search?domain={domain}&api_key={HUNTER_API_KEY}"
|
|
89
|
+
async with session.get(url) as response:
|
|
90
|
+
return await response.json()
|
|
91
|
+
|
|
92
|
+
async def clearbit_enrichment(domain: str) -> dict:
|
|
93
|
+
"""Get company info from Clearbit"""
|
|
94
|
+
if CLEARBIT_API_KEY == "your_clearbit_api_key":
|
|
95
|
+
logger.warning("Using mock Clearbit response - API key not configured")
|
|
96
|
+
return {
|
|
97
|
+
"name": domain.split(".")[0].capitalize(),
|
|
98
|
+
"domain": domain,
|
|
99
|
+
"industry": "Technology",
|
|
100
|
+
"employees": 100
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
async with aiohttp.ClientSession() as session:
|
|
104
|
+
url = f"https://company.clearbit.com/v2/companies/find?domain={domain}"
|
|
105
|
+
headers = {"Authorization": f"Bearer {CLEARBIT_API_KEY}"}
|
|
106
|
+
|
|
107
|
+
async with session.get(url, headers=headers) as response:
|
|
108
|
+
if response.status == 404:
|
|
109
|
+
return {}
|
|
110
|
+
return await response.json()
|
|
111
|
+
|
|
112
|
+
async def linkedin_scrape(company: str, ctx: Context) -> dict:
|
|
113
|
+
"""Scrape LinkedIn Sales Navigator (use with caution)"""
|
|
114
|
+
if LINKEDIN_COOKIE == "your_linkedin_cookie":
|
|
115
|
+
logger.warning("Using mock LinkedIn scrape - cookie not configured")
|
|
116
|
+
return {
|
|
117
|
+
"employees": [
|
|
118
|
+
{"name": "John Doe"},
|
|
119
|
+
{"name": "Jane Smith"}
|
|
120
|
+
]
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
url = f"https://www.linkedin.com/company/{company}/people/"
|
|
124
|
+
|
|
125
|
+
async with AsyncWebCrawler() as crawler:
|
|
126
|
+
result = await crawler.arun(
|
|
127
|
+
url=url,
|
|
128
|
+
config=CrawlerRunConfig(
|
|
129
|
+
cookies={"li_at": LINKEDIN_COOKIE},
|
|
130
|
+
stealth_mode=True,
|
|
131
|
+
js_code="window.scrollTo(0, document.body.scrollHeight);"
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Extract employee data
|
|
136
|
+
strategy = JsonCssExtractionStrategy({
|
|
137
|
+
"name": "Employees",
|
|
138
|
+
"baseSelector": ".org-people-profile-card__profile-info",
|
|
139
|
+
"fields": [{
|
|
140
|
+
"name": "name",
|
|
141
|
+
"selector": ".artdeco-entity-lockup__title",
|
|
142
|
+
"type": "text"
|
|
143
|
+
}]
|
|
144
|
+
})
|
|
145
|
+
|
|
146
|
+
return json.loads(strategy.extract(result.html))
|
|
147
|
+
|
|
148
|
+
# --------------------------------------------------
|
|
149
|
+
# Core Tools
|
|
150
|
+
# --------------------------------------------------
|
|
151
|
+
@mcp.tool()
|
|
152
|
+
async def lead_generation(search_terms: str, ctx: Context) -> Dict:
|
|
153
|
+
"""Generate lead with proper ID and storage"""
|
|
154
|
+
if not search_terms:
|
|
155
|
+
raise ValueError("Search terms required")
|
|
156
|
+
|
|
157
|
+
lead_id = str(uuid.uuid4())
|
|
158
|
+
domain = await get_company_domain(search_terms)
|
|
159
|
+
|
|
160
|
+
async with lead_lock:
|
|
161
|
+
lead_store[lead_id] = {
|
|
162
|
+
"id": lead_id,
|
|
163
|
+
"search_terms": search_terms,
|
|
164
|
+
"domain": domain,
|
|
165
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
166
|
+
"status": "initial",
|
|
167
|
+
"raw_data": {}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
logger.info(f"Created lead {lead_id} for {search_terms}")
|
|
171
|
+
return {"lead_id": lead_id, "status": "created"}
|
|
172
|
+
|
|
173
|
+
@mcp.tool()
|
|
174
|
+
async def data_enrichment(lead_id: str, ctx: Context) -> Dict:
|
|
175
|
+
"""Full enrichment pipeline with external services"""
|
|
176
|
+
async with lead_lock:
|
|
177
|
+
lead = lead_store.get(lead_id)
|
|
178
|
+
if not lead:
|
|
179
|
+
raise ValueError("Lead not found")
|
|
180
|
+
|
|
181
|
+
if lead.get("enriched"):
|
|
182
|
+
return {"lead": lead}
|
|
183
|
+
|
|
184
|
+
# Parallel API calls
|
|
185
|
+
hunter_task = hunter_domain_search(lead["domain"])
|
|
186
|
+
clearbit_task = clearbit_enrichment(lead["domain"])
|
|
187
|
+
linkedin_task = linkedin_scrape(lead["domain"], ctx)
|
|
188
|
+
|
|
189
|
+
results = await asyncio.gather(
|
|
190
|
+
hunter_task,
|
|
191
|
+
clearbit_task,
|
|
192
|
+
linkedin_task,
|
|
193
|
+
return_exceptions=True
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Process results
|
|
197
|
+
updates = {
|
|
198
|
+
"hunter": results[0] if not isinstance(results[0], Exception) else None,
|
|
199
|
+
"clearbit": results[1] if not isinstance(results[1], Exception) else None,
|
|
200
|
+
"linkedin": results[2] if not isinstance(results[2], Exception) else None,
|
|
201
|
+
"enriched_at": datetime.utcnow().isoformat(),
|
|
202
|
+
"status": "enriched"
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
lead_store[lead_id].update({
|
|
206
|
+
"enriched_data": updates,
|
|
207
|
+
"status": "enriched"
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
return {
|
|
211
|
+
"lead_id": lead_id,
|
|
212
|
+
"enriched_data": updates,
|
|
213
|
+
"cache_status": {
|
|
214
|
+
"hunter": isinstance(results[0], Exception),
|
|
215
|
+
"clearbit": isinstance(results[1], Exception),
|
|
216
|
+
"linkedin": isinstance(results[2], Exception)
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
# --------------------------------------------------
|
|
221
|
+
# Maintenance Tools
|
|
222
|
+
# --------------------------------------------------
|
|
223
|
+
@mcp.tool()
|
|
224
|
+
async def lead_maintenance(ctx: Context) -> Dict:
|
|
225
|
+
"""Clean up old leads and cache"""
|
|
226
|
+
async with lead_lock:
|
|
227
|
+
now = datetime.utcnow()
|
|
228
|
+
expired_leads = [
|
|
229
|
+
lead_id for lead_id, lead in lead_store.items()
|
|
230
|
+
if now - datetime.fromisoformat(lead["created_at"]) > timedelta(days=7)
|
|
231
|
+
]
|
|
232
|
+
|
|
233
|
+
for lead_id in expired_leads:
|
|
234
|
+
del lead_store[lead_id]
|
|
235
|
+
|
|
236
|
+
async with _cache_lock:
|
|
237
|
+
_domain_cache.clear()
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
"cleaned_leads": len(expired_leads),
|
|
241
|
+
"remaining_leads": len(lead_store)
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
def main():
|
|
245
|
+
"""Main entry point for the MCP server"""
|
|
246
|
+
logger.info("Starting production LeadGenServer...")
|
|
247
|
+
mcp.run(transport="stdio")
|
|
248
|
+
|
|
249
|
+
if __name__ == "__main__":
|
|
250
|
+
main()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Core Dependencies
|
|
2
|
+
mcp>=2.1.0
|
|
3
|
+
crawl4ai>=0.4.3
|
|
4
|
+
aiocache>=0.12.0
|
|
5
|
+
aiohttp>=3.9.0
|
|
6
|
+
uvloop>=0.19.0
|
|
7
|
+
python-dotenv>=1.0.0
|
|
8
|
+
pydantic>=2.5.0
|
|
9
|
+
playwright>=1.40.0
|
|
10
|
+
|
|
11
|
+
# Development
|
|
12
|
+
pytest>=7.4.0
|
|
13
|
+
pytest-asyncio>=0.23.0
|
|
14
|
+
pytest-benchmark>=4.0.0
|
|
15
|
+
black>=23.9.0
|
|
16
|
+
mypy>=1.5.0
|
|
17
|
+
isort>=5.12.0
|
|
18
|
+
|
|
19
|
+
# Monitoring
|
|
20
|
+
prometheus-client>=0.17.0
|
|
21
|
+
sentry-sdk>=1.30.0
|
|
22
|
+
|
|
23
|
+
# CLI Tools
|
|
24
|
+
uv>=0.1.0
|
|
25
|
+
gunicorn>=21.2.0
|
|
26
|
+
uvicorn>=0.23.0
|