cogniscrape 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +308 -0
- package/dist/graphs/AbstractGraph.d.ts +27 -0
- package/dist/graphs/AbstractGraph.d.ts.map +1 -0
- package/dist/graphs/AbstractGraph.js +44 -0
- package/dist/graphs/AbstractGraph.js.map +1 -0
- package/dist/graphs/BaseGraph.d.ts +30 -0
- package/dist/graphs/BaseGraph.d.ts.map +1 -0
- package/dist/graphs/BaseGraph.js +62 -0
- package/dist/graphs/BaseGraph.js.map +1 -0
- package/dist/graphs/CSVScraperGraph.d.ts +16 -0
- package/dist/graphs/CSVScraperGraph.d.ts.map +1 -0
- package/dist/graphs/CSVScraperGraph.js +84 -0
- package/dist/graphs/CSVScraperGraph.js.map +1 -0
- package/dist/graphs/DepthSearchGraph.d.ts +14 -0
- package/dist/graphs/DepthSearchGraph.d.ts.map +1 -0
- package/dist/graphs/DepthSearchGraph.js +45 -0
- package/dist/graphs/DepthSearchGraph.js.map +1 -0
- package/dist/graphs/JSONScraperGraph.d.ts +18 -0
- package/dist/graphs/JSONScraperGraph.d.ts.map +1 -0
- package/dist/graphs/JSONScraperGraph.js +100 -0
- package/dist/graphs/JSONScraperGraph.js.map +1 -0
- package/dist/graphs/SearchGraph.d.ts +14 -0
- package/dist/graphs/SearchGraph.d.ts.map +1 -0
- package/dist/graphs/SearchGraph.js +42 -0
- package/dist/graphs/SearchGraph.js.map +1 -0
- package/dist/graphs/SmartScraperGraph.d.ts +16 -0
- package/dist/graphs/SmartScraperGraph.d.ts.map +1 -0
- package/dist/graphs/SmartScraperGraph.js +57 -0
- package/dist/graphs/SmartScraperGraph.js.map +1 -0
- package/dist/graphs/SmartScraperMultiGraph.d.ts +17 -0
- package/dist/graphs/SmartScraperMultiGraph.d.ts.map +1 -0
- package/dist/graphs/SmartScraperMultiGraph.js +71 -0
- package/dist/graphs/SmartScraperMultiGraph.js.map +1 -0
- package/dist/graphs/index.d.ts +12 -0
- package/dist/graphs/index.d.ts.map +1 -0
- package/dist/graphs/index.js +23 -0
- package/dist/graphs/index.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +43 -0
- package/dist/index.js.map +1 -0
- package/dist/models/GeminiModel.d.ts +16 -0
- package/dist/models/GeminiModel.d.ts.map +1 -0
- package/dist/models/GeminiModel.js +127 -0
- package/dist/models/GeminiModel.js.map +1 -0
- package/dist/models/OllamaModel.d.ts +15 -0
- package/dist/models/OllamaModel.d.ts.map +1 -0
- package/dist/models/OllamaModel.js +134 -0
- package/dist/models/OllamaModel.js.map +1 -0
- package/dist/models/index.d.ts +8 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +24 -0
- package/dist/models/index.js.map +1 -0
- package/dist/nodes/BaseNode.d.ts +37 -0
- package/dist/nodes/BaseNode.d.ts.map +1 -0
- package/dist/nodes/BaseNode.js +116 -0
- package/dist/nodes/BaseNode.js.map +1 -0
- package/dist/nodes/CSVExporterNode.d.ts +16 -0
- package/dist/nodes/CSVExporterNode.d.ts.map +1 -0
- package/dist/nodes/CSVExporterNode.js +85 -0
- package/dist/nodes/CSVExporterNode.js.map +1 -0
- package/dist/nodes/ConditionalNode.d.ts +16 -0
- package/dist/nodes/ConditionalNode.d.ts.map +1 -0
- package/dist/nodes/ConditionalNode.js +68 -0
- package/dist/nodes/ConditionalNode.js.map +1 -0
- package/dist/nodes/FetchNode.d.ts +15 -0
- package/dist/nodes/FetchNode.d.ts.map +1 -0
- package/dist/nodes/FetchNode.js +182 -0
- package/dist/nodes/FetchNode.js.map +1 -0
- package/dist/nodes/GenerateAnswerNode.d.ts +14 -0
- package/dist/nodes/GenerateAnswerNode.d.ts.map +1 -0
- package/dist/nodes/GenerateAnswerNode.js +86 -0
- package/dist/nodes/GenerateAnswerNode.js.map +1 -0
- package/dist/nodes/JSONExporterNode.d.ts +16 -0
- package/dist/nodes/JSONExporterNode.d.ts.map +1 -0
- package/dist/nodes/JSONExporterNode.js +42 -0
- package/dist/nodes/JSONExporterNode.js.map +1 -0
- package/dist/nodes/MergeNode.d.ts +10 -0
- package/dist/nodes/MergeNode.d.ts.map +1 -0
- package/dist/nodes/MergeNode.js +51 -0
- package/dist/nodes/MergeNode.js.map +1 -0
- package/dist/nodes/PDFScraperNode.d.ts +10 -0
- package/dist/nodes/PDFScraperNode.d.ts.map +1 -0
- package/dist/nodes/PDFScraperNode.js +80 -0
- package/dist/nodes/PDFScraperNode.js.map +1 -0
- package/dist/nodes/ParseNode.d.ts +12 -0
- package/dist/nodes/ParseNode.d.ts.map +1 -0
- package/dist/nodes/ParseNode.js +44 -0
- package/dist/nodes/ParseNode.js.map +1 -0
- package/dist/nodes/RAGNode.d.ts +13 -0
- package/dist/nodes/RAGNode.d.ts.map +1 -0
- package/dist/nodes/RAGNode.js +64 -0
- package/dist/nodes/RAGNode.js.map +1 -0
- package/dist/nodes/ReasoningNode.d.ts +10 -0
- package/dist/nodes/ReasoningNode.d.ts.map +1 -0
- package/dist/nodes/ReasoningNode.js +51 -0
- package/dist/nodes/ReasoningNode.js.map +1 -0
- package/dist/nodes/SearchNode.d.ts +13 -0
- package/dist/nodes/SearchNode.d.ts.map +1 -0
- package/dist/nodes/SearchNode.js +81 -0
- package/dist/nodes/SearchNode.js.map +1 -0
- package/dist/nodes/XMLScraperNode.d.ts +11 -0
- package/dist/nodes/XMLScraperNode.d.ts.map +1 -0
- package/dist/nodes/XMLScraperNode.js +99 -0
- package/dist/nodes/XMLScraperNode.js.map +1 -0
- package/dist/nodes/index.d.ts +17 -0
- package/dist/nodes/index.d.ts.map +1 -0
- package/dist/nodes/index.js +33 -0
- package/dist/nodes/index.js.map +1 -0
- package/dist/prompts/index.d.ts +12 -0
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/prompts/index.js +117 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/types.d.ts +106 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +13 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/cache.d.ts +28 -0
- package/dist/utils/cache.d.ts.map +1 -0
- package/dist/utils/cache.js +72 -0
- package/dist/utils/cache.js.map +1 -0
- package/dist/utils/chunking.d.ts +8 -0
- package/dist/utils/chunking.d.ts.map +1 -0
- package/dist/utils/chunking.js +51 -0
- package/dist/utils/chunking.js.map +1 -0
- package/dist/utils/cleanupHtml.d.ts +7 -0
- package/dist/utils/cleanupHtml.d.ts.map +1 -0
- package/dist/utils/cleanupHtml.js +81 -0
- package/dist/utils/cleanupHtml.js.map +1 -0
- package/dist/utils/convertToMarkdown.d.ts +6 -0
- package/dist/utils/convertToMarkdown.d.ts.map +1 -0
- package/dist/utils/convertToMarkdown.js +61 -0
- package/dist/utils/convertToMarkdown.js.map +1 -0
- package/dist/utils/index.d.ts +13 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +40 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/logger.d.ts +14 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +35 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/proxy.d.ts +30 -0
- package/dist/utils/proxy.d.ts.map +1 -0
- package/dist/utils/proxy.js +62 -0
- package/dist/utils/proxy.js.map +1 -0
- package/dist/utils/rateLimiter.d.ts +24 -0
- package/dist/utils/rateLimiter.d.ts.map +1 -0
- package/dist/utils/rateLimiter.js +61 -0
- package/dist/utils/rateLimiter.js.map +1 -0
- package/dist/utils/retry.d.ts +17 -0
- package/dist/utils/retry.d.ts.map +1 -0
- package/dist/utils/retry.js +43 -0
- package/dist/utils/retry.js.map +1 -0
- package/dist/utils/schemaValidator.d.ts +69 -0
- package/dist/utils/schemaValidator.d.ts.map +1 -0
- package/dist/utils/schemaValidator.js +133 -0
- package/dist/utils/schemaValidator.js.map +1 -0
- package/package.json +64 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 [Your Name]
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
# 🕷️ CogniScrape
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/cogniscrape)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
|
|
6
|
+
**Intelligent Web Scraping with LLMs** - A TypeScript library that combines traditional web scraping with Large Language Models for intelligent, structured data extraction.
|
|
7
|
+
|
|
8
|
+
## ✨ Features
|
|
9
|
+
|
|
10
|
+
- 🤖 **Dual LLM Support**: Ollama (free/local) + Google Gemini (cloud)
|
|
11
|
+
- 📊 **Graph-Based Architecture**: Composable, reusable node pipelines
|
|
12
|
+
- 🚀 **Production-Ready**: Built-in caching, retries, rate limiting, and proxy rotation
|
|
13
|
+
- 🎯 **Smart Parsing**: Automatic HTML→Markdown conversion and intelligent chunking
|
|
14
|
+
- ✅ **Schema Validation**: Zod integration for type-safe outputs
|
|
15
|
+
- 📝 **Multiple Formats**: JSON, CSV, XML, PDF support
|
|
16
|
+
- 🌐 **Browser Automation**: Playwright for dynamic content
|
|
17
|
+
- 🧠 **RAG Integration**: Retrieval-Augmented Generation for better accuracy
|
|
18
|
+
|
|
19
|
+
## 📦 Installation
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
npm install cogniscrape
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## 🚀 Quick Start
|
|
26
|
+
|
|
27
|
+
### Basic Web Scraping with Gemini
|
|
28
|
+
|
|
29
|
+
```typescript
|
|
30
|
+
import { SmartScraperGraph } from 'cogniscrape';
|
|
31
|
+
|
|
32
|
+
const scraper = new SmartScraperGraph({
|
|
33
|
+
prompt: 'Extract all product names and prices',
|
|
34
|
+
source: 'https://example.com/products',
|
|
35
|
+
config: {
|
|
36
|
+
llm: {
|
|
37
|
+
provider: 'gemini',
|
|
38
|
+
model: 'gemini-2.0-flash-exp',
|
|
39
|
+
apiKey: process.env.GEMINI_API_KEY,
|
|
40
|
+
},
|
|
41
|
+
verbose: true,
|
|
42
|
+
},
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
const result = await scraper.run();
|
|
46
|
+
console.log(result);
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Using Ollama (100% Free & Local)
|
|
50
|
+
|
|
51
|
+
```typescript
|
|
52
|
+
import { SmartScraperGraph } from 'cogniscrape';
|
|
53
|
+
|
|
54
|
+
const scraper = new SmartScraperGraph({
|
|
55
|
+
prompt: 'List all article titles and summaries',
|
|
56
|
+
source: 'https://news.example.com',
|
|
57
|
+
config: {
|
|
58
|
+
llm: {
|
|
59
|
+
provider: 'ollama',
|
|
60
|
+
model: 'llama2', // or 'mistral', 'codellama', etc.
|
|
61
|
+
baseUrl: 'http://localhost:11434',
|
|
62
|
+
},
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
const result = await scraper.run();
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## 🎯 Available Graphs
|
|
70
|
+
|
|
71
|
+
| Graph | Purpose | Use Case |
|
|
72
|
+
|-------|---------|----------|
|
|
73
|
+
| `SmartScraperGraph` | Basic scraping | Extract data from single URL |
|
|
74
|
+
| `SmartScraperMultiGraph` | Multi-URL scraping | Scrape multiple sources (parallel/sequential) |
|
|
75
|
+
| `SearchGraph` | Internet search + scrape | Search engines + content extraction |
|
|
76
|
+
| `DepthSearchGraph` | Deep analysis | Search + reasoning + comprehensive analysis |
|
|
77
|
+
| `CSVScraperGraph` | CSV export | Scrape data → export to CSV |
|
|
78
|
+
| `JSONScraperGraph` | JSON export | Schema-validated JSON output |
|
|
79
|
+
|
|
80
|
+
## 📚 Examples
|
|
81
|
+
|
|
82
|
+
### Multi-URL Scraping
|
|
83
|
+
|
|
84
|
+
```typescript
|
|
85
|
+
import { SmartScraperMultiGraph, createLLM } from 'cogniscrape';
|
|
86
|
+
|
|
87
|
+
const llm = createLLM({
|
|
88
|
+
provider: 'gemini',
|
|
89
|
+
model: 'gemini-2.0-flash-exp',
|
|
90
|
+
apiKey: process.env.GEMINI_API_KEY,
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
const scraper = new SmartScraperMultiGraph(
|
|
94
|
+
'Extract company names and descriptions',
|
|
95
|
+
[
|
|
96
|
+
'https://company1.com',
|
|
97
|
+
'https://company2.com',
|
|
98
|
+
'https://company3.com',
|
|
99
|
+
],
|
|
100
|
+
{ llm },
|
|
101
|
+
llm,
|
|
102
|
+
true // parallel execution
|
|
103
|
+
);
|
|
104
|
+
|
|
105
|
+
const result = await scraper.run();
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### CSV Export with Schema Validation
|
|
109
|
+
|
|
110
|
+
```typescript
|
|
111
|
+
import { CSVScraperGraph } from 'cogniscrape';
|
|
112
|
+
import { z } from 'zod';
|
|
113
|
+
|
|
114
|
+
const schema = z.object({
|
|
115
|
+
products: z.array(z.object({
|
|
116
|
+
name: z.string(),
|
|
117
|
+
price: z.number(),
|
|
118
|
+
rating: z.number().optional(),
|
|
119
|
+
})),
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
const scraper = new CSVScraperGraph(
|
|
123
|
+
'Extract all products with their prices',
|
|
124
|
+
'https://shop.example.com',
|
|
125
|
+
{
|
|
126
|
+
llm: {
|
|
127
|
+
provider: 'gemini',
|
|
128
|
+
model: 'gemini-2.0-flash-exp',
|
|
129
|
+
apiKey: process.env.GEMINI_API_KEY,
|
|
130
|
+
},
|
|
131
|
+
schema,
|
|
132
|
+
},
|
|
133
|
+
llm,
|
|
134
|
+
'products.csv'
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
await scraper.run();
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Internet Search Graph
|
|
141
|
+
|
|
142
|
+
```typescript
|
|
143
|
+
import { SearchGraph, createLLM } from 'cogniscrape';
|
|
144
|
+
|
|
145
|
+
const llm = createLLM({
|
|
146
|
+
provider: 'gemini',
|
|
147
|
+
model: 'gemini-2.0-flash-exp',
|
|
148
|
+
apiKey: process.env.GEMINI_API_KEY,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
const searchGraph = new SearchGraph(
|
|
152
|
+
'Latest news about AI developments in 2026',
|
|
153
|
+
{
|
|
154
|
+
llm,
|
|
155
|
+
searchEngine: 'duckduckgo',
|
|
156
|
+
maxDepth: 3,
|
|
157
|
+
},
|
|
158
|
+
llm
|
|
159
|
+
);
|
|
160
|
+
|
|
161
|
+
const result = await searchGraph.run();
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## ⚙️ Configuration Options
|
|
165
|
+
|
|
166
|
+
```typescript
|
|
167
|
+
interface ScraperConfig {
|
|
168
|
+
llm: LLMConfig;
|
|
169
|
+
verbose?: boolean; // Enable logging
|
|
170
|
+
headless?: boolean; // Headless browser mode
|
|
171
|
+
timeout?: number; // Request timeout (ms)
|
|
172
|
+
cut?: boolean; // Enable HTML minification
|
|
173
|
+
htmlMode?: boolean; // Skip parsing (use raw HTML)
|
|
174
|
+
|
|
175
|
+
// Production features
|
|
176
|
+
proxy?: ProxyConfig; // Proxy configuration
|
|
177
|
+
retry?: RetryConfig; // Retry with backoff
|
|
178
|
+
rateLimit?: RateLimitConfig; // Rate limiting
|
|
179
|
+
cache?: CacheConfig; // Response caching
|
|
180
|
+
|
|
181
|
+
// Advanced
|
|
182
|
+
schema?: any; // Zod schema for validation
|
|
183
|
+
additionalInfo?: string; // Extra context for LLM
|
|
184
|
+
reasoning?: boolean; // Enable reasoning mode
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## 🔧 Production Features
|
|
189
|
+
|
|
190
|
+
### Proxy Rotation
|
|
191
|
+
|
|
192
|
+
```typescript
|
|
193
|
+
const config = {
|
|
194
|
+
llm: { /* ... */ },
|
|
195
|
+
proxy: {
|
|
196
|
+
enabled: true,
|
|
197
|
+
proxies: [
|
|
198
|
+
'http://proxy1.com:8080',
|
|
199
|
+
'http://proxy2.com:8080',
|
|
200
|
+
],
|
|
201
|
+
},
|
|
202
|
+
};
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Retry with Exponential Backoff
|
|
206
|
+
|
|
207
|
+
```typescript
|
|
208
|
+
const config = {
|
|
209
|
+
llm: { /* ... */ },
|
|
210
|
+
retry: {
|
|
211
|
+
maxRetries: 3,
|
|
212
|
+
initialDelay: 1000,
|
|
213
|
+
maxDelay: 10000,
|
|
214
|
+
backoffMultiplier: 2,
|
|
215
|
+
},
|
|
216
|
+
};
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### Rate Limiting
|
|
220
|
+
|
|
221
|
+
```typescript
|
|
222
|
+
const config = {
|
|
223
|
+
llm: { /* ... */ },
|
|
224
|
+
rateLimit: {
|
|
225
|
+
maxRequests: 10,
|
|
226
|
+
windowMs: 1000,
|
|
227
|
+
minDelay: 100,
|
|
228
|
+
},
|
|
229
|
+
};
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### Caching
|
|
233
|
+
|
|
234
|
+
```typescript
|
|
235
|
+
const config = {
|
|
236
|
+
llm: { /* ... */ },
|
|
237
|
+
cache: {
|
|
238
|
+
enabled: true,
|
|
239
|
+
ttl: 3600000, // 1 hour
|
|
240
|
+
maxSize: 1000,
|
|
241
|
+
},
|
|
242
|
+
};
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
## 🧪 Testing
|
|
246
|
+
|
|
247
|
+
```bash
|
|
248
|
+
npm test
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## 🛠️ Development
|
|
252
|
+
|
|
253
|
+
```bash
|
|
254
|
+
# Install dependencies
|
|
255
|
+
npm install
|
|
256
|
+
|
|
257
|
+
# Build the project
|
|
258
|
+
npm run build
|
|
259
|
+
|
|
260
|
+
# Watch mode
|
|
261
|
+
npm run dev
|
|
262
|
+
|
|
263
|
+
# Run examples
|
|
264
|
+
npx ts-node examples/smart-scraper-gemini.ts
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
## 📖 API Reference
|
|
268
|
+
|
|
269
|
+
### Models
|
|
270
|
+
- `OllamaModel` - Local LLM support
|
|
271
|
+
- `GeminiModel` - Google Gemini integration
|
|
272
|
+
- `createLLM(config)` - Factory function
|
|
273
|
+
|
|
274
|
+
### Graphs
|
|
275
|
+
- `SmartScraperGraph` - Basic web scraping
|
|
276
|
+
- `SmartScraperMultiGraph` - Multi-URL scraping
|
|
277
|
+
- `SearchGraph` - Search + scrape
|
|
278
|
+
- `DepthSearchGraph` - Deep search with reasoning
|
|
279
|
+
- `CSVScraperGraph` - Export to CSV
|
|
280
|
+
- `JSONScraperGraph` - Export to JSON
|
|
281
|
+
|
|
282
|
+
### Nodes
|
|
283
|
+
- `FetchNode` - Fetch content
|
|
284
|
+
- `ParseNode` - Parse & chunk
|
|
285
|
+
- `GenerateAnswerNode` - LLM answer generation
|
|
286
|
+
- `RAGNode` - Retrieval-Augmented Generation
|
|
287
|
+
- `SearchNode` - Internet search
|
|
288
|
+
- `MergeNode` - Merge results
|
|
289
|
+
- `PDFScraperNode` - PDF extraction
|
|
290
|
+
- `XMLScraperNode` - XML parsing
|
|
291
|
+
|
|
292
|
+
## 🤝 Contributing
|
|
293
|
+
|
|
294
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
295
|
+
|
|
296
|
+
## 📄 License
|
|
297
|
+
|
|
298
|
+
MIT License - see LICENSE file for details
|
|
299
|
+
|
|
300
|
+
## 📬 Support
|
|
301
|
+
|
|
302
|
+
- 📧 Email: your.email@example.com
|
|
303
|
+
- 🐛 Issues: [GitHub Issues](https://github.com/yourusername/cogniscrape/issues)
|
|
304
|
+
- 💬 Discussions: [GitHub Discussions](https://github.com/yourusername/cogniscrape/discussions)
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
308
|
+
**Made with ❤️ for the TypeScript community**
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AbstractGraph - Base class for all scraping graphs
|
|
3
|
+
*/
|
|
4
|
+
import { BaseGraph } from './BaseGraph';
|
|
5
|
+
import { ScraperConfig, BaseLLM } from '../types';
|
|
6
|
+
import { Logger } from '../utils/logger';
|
|
7
|
+
export declare abstract class AbstractGraph {
|
|
8
|
+
protected prompt: string;
|
|
9
|
+
protected source: string;
|
|
10
|
+
protected config: ScraperConfig;
|
|
11
|
+
protected schema?: any;
|
|
12
|
+
protected llmModel: BaseLLM;
|
|
13
|
+
protected modelToken: number;
|
|
14
|
+
protected verbose: boolean;
|
|
15
|
+
protected logger: Logger;
|
|
16
|
+
protected graph?: BaseGraph;
|
|
17
|
+
constructor(prompt: string, config: ScraperConfig, source: string, schema?: any);
|
|
18
|
+
/**
|
|
19
|
+
* Run the graph pipeline
|
|
20
|
+
*/
|
|
21
|
+
run(): Promise<any>;
|
|
22
|
+
/**
|
|
23
|
+
* Create the graph structure - to be implemented by subclasses
|
|
24
|
+
*/
|
|
25
|
+
protected abstract createGraph(): BaseGraph;
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=AbstractGraph.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AbstractGraph.d.ts","sourceRoot":"","sources":["../../src/graphs/AbstractGraph.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAc,aAAa,EAAE,OAAO,EAAE,MAAM,UAAU,CAAC;AAE9D,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAEzC,8BAAsB,aAAa;IACjC,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;IACzB,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;IACzB,SAAS,CAAC,MAAM,EAAE,aAAa,CAAC;IAChC,SAAS,CAAC,MAAM,CAAC,EAAE,GAAG,CAAC;IACvB,SAAS,CAAC,QAAQ,EAAE,OAAO,CAAC;IAC5B,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;IAC7B,SAAS,CAAC,OAAO,EAAE,OAAO,CAAC;IAC3B,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;IACzB,SAAS,CAAC,KAAK,CAAC,EAAE,SAAS,CAAC;gBAEhB,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,GAAG;IAe/E;;OAEG;IACG,GAAG,IAAI,OAAO,CAAC,GAAG,CAAC;IAwBzB;;OAEG;IACH,SAAS,CAAC,QAAQ,CAAC,WAAW,IAAI,SAAS;CAC5C"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* AbstractGraph - Base class for all scraping graphs
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.AbstractGraph = void 0;
|
|
7
|
+
const models_1 = require("../models");
|
|
8
|
+
const logger_1 = require("../utils/logger");
|
|
9
|
+
class AbstractGraph {
|
|
10
|
+
constructor(prompt, config, source, schema) {
|
|
11
|
+
this.prompt = prompt;
|
|
12
|
+
this.source = source;
|
|
13
|
+
this.config = config;
|
|
14
|
+
this.schema = schema;
|
|
15
|
+
this.verbose = config.verbose ?? false;
|
|
16
|
+
this.logger = new logger_1.Logger('AbstractGraph', this.verbose);
|
|
17
|
+
// Create LLM model
|
|
18
|
+
this.llmModel = (0, models_1.createLLM)(config.llm);
|
|
19
|
+
// Set model token limit
|
|
20
|
+
this.modelToken = config.llm.maxTokens ?? 8192;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Run the graph pipeline
|
|
24
|
+
*/
|
|
25
|
+
async run() {
|
|
26
|
+
this.logger.info('Initializing graph');
|
|
27
|
+
// Create the graph
|
|
28
|
+
this.graph = this.createGraph();
|
|
29
|
+
// Determine input key based on source type
|
|
30
|
+
const inputKey = this.source.startsWith('http') ? 'url' : 'localDir';
|
|
31
|
+
// Create initial state
|
|
32
|
+
const initialState = {
|
|
33
|
+
userPrompt: this.prompt,
|
|
34
|
+
[inputKey]: this.source,
|
|
35
|
+
};
|
|
36
|
+
this.logger.info(`Running graph with ${inputKey}: ${this.source}`);
|
|
37
|
+
// Execute the graph
|
|
38
|
+
const finalState = await this.graph.execute(initialState);
|
|
39
|
+
// Return the answer
|
|
40
|
+
return finalState.answer;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
exports.AbstractGraph = AbstractGraph;
|
|
44
|
+
//# sourceMappingURL=AbstractGraph.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AbstractGraph.js","sourceRoot":"","sources":["../../src/graphs/AbstractGraph.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAIH,sCAAsC;AACtC,4CAAyC;AAEzC,MAAsB,aAAa;IAWjC,YAAY,MAAc,EAAE,MAAqB,EAAE,MAAc,EAAE,MAAY;QAC7E,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK,CAAC;QACvC,IAAI,CAAC,MAAM,GAAG,IAAI,eAAM,CAAC,eAAe,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAExD,mBAAmB;QACnB,IAAI,CAAC,QAAQ,GAAG,IAAA,kBAAS,EAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAEtC,wBAAwB;QACxB,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,GAAG,CAAC,SAAS,IAAI,IAAI,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,GAAG;QACP,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QAEvC,mBAAmB;QACnB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEhC,2CAA2C;QAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC;QAErE,uBAAuB;QACvB,MAAM,YAAY,GAAe;YAC/B,UAAU,EAAE,IAAI,CAAC,MAAM;YACvB,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,MAAM;SACxB,CAAC;QAEF,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,sBAAsB,QAAQ,KAAK,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAEnE,oBAAoB;QACpB,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;QAE1D,oBAAoB;QACpB,OAAO,UAAU,CAAC,MAAM,CAAC;IAC3B,CAAC;CAMF;AAzDD,sCAyDC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BaseGraph - Core graph execution engine
|
|
3
|
+
*/
|
|
4
|
+
import { GraphState } from '../types';
|
|
5
|
+
import { BaseNode } from '../nodes/BaseNode';
|
|
6
|
+
import { Logger } from '../utils/logger';
|
|
7
|
+
export declare class BaseGraph {
|
|
8
|
+
protected nodes: BaseNode[];
|
|
9
|
+
protected edges: [BaseNode, BaseNode][];
|
|
10
|
+
protected entryPoint: BaseNode;
|
|
11
|
+
protected logger: Logger;
|
|
12
|
+
constructor(nodes: BaseNode[], edges: [BaseNode, BaseNode][], entryPoint: BaseNode, verbose?: boolean);
|
|
13
|
+
/**
|
|
14
|
+
* Execute the graph pipeline
|
|
15
|
+
*/
|
|
16
|
+
execute(initialState: GraphState): Promise<GraphState>;
|
|
17
|
+
/**
|
|
18
|
+
* Get the next node in the pipeline
|
|
19
|
+
*/
|
|
20
|
+
private getNextNode;
|
|
21
|
+
/**
|
|
22
|
+
* Get all nodes in the graph
|
|
23
|
+
*/
|
|
24
|
+
getNodes(): BaseNode[];
|
|
25
|
+
/**
|
|
26
|
+
* Get all edges in the graph
|
|
27
|
+
*/
|
|
28
|
+
getEdges(): [BaseNode, BaseNode][];
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=BaseGraph.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BaseGraph.d.ts","sourceRoot":"","sources":["../../src/graphs/BaseGraph.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AACtC,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAEzC,qBAAa,SAAS;IACpB,SAAS,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC;IAC5B,SAAS,CAAC,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,CAAC,EAAE,CAAC;IACxC,SAAS,CAAC,UAAU,EAAE,QAAQ,CAAC;IAC/B,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;gBAEb,KAAK,EAAE,QAAQ,EAAE,EAAE,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,CAAC,EAAE,EAAE,UAAU,EAAE,QAAQ,EAAE,OAAO,GAAE,OAAe;IAO5G;;OAEG;IACG,OAAO,CAAC,YAAY,EAAE,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;IA0B5D;;OAEG;IACH,OAAO,CAAC,WAAW;IASnB;;OAEG;IACH,QAAQ,IAAI,QAAQ,EAAE;IAItB;;OAEG;IACH,QAAQ,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,EAAE;CAGnC"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* BaseGraph - Core graph execution engine
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.BaseGraph = void 0;
|
|
7
|
+
const logger_1 = require("../utils/logger");
|
|
8
|
+
class BaseGraph {
|
|
9
|
+
constructor(nodes, edges, entryPoint, verbose = false) {
|
|
10
|
+
this.nodes = nodes;
|
|
11
|
+
this.edges = edges;
|
|
12
|
+
this.entryPoint = entryPoint;
|
|
13
|
+
this.logger = new logger_1.Logger('BaseGraph', verbose);
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Execute the graph pipeline
|
|
17
|
+
*/
|
|
18
|
+
async execute(initialState) {
|
|
19
|
+
this.logger.info('Starting graph execution');
|
|
20
|
+
let currentState = initialState;
|
|
21
|
+
let currentNode = this.entryPoint;
|
|
22
|
+
const visitedNodes = new Set();
|
|
23
|
+
while (currentNode) {
|
|
24
|
+
if (visitedNodes.has(currentNode)) {
|
|
25
|
+
this.logger.warn(`Node ${currentNode['nodeName']} already visited, breaking to prevent loop`);
|
|
26
|
+
break;
|
|
27
|
+
}
|
|
28
|
+
visitedNodes.add(currentNode);
|
|
29
|
+
// Execute current node
|
|
30
|
+
currentState = await currentNode.execute(currentState);
|
|
31
|
+
// Find next node
|
|
32
|
+
currentNode = this.getNextNode(currentNode);
|
|
33
|
+
}
|
|
34
|
+
this.logger.success('Graph execution completed');
|
|
35
|
+
return currentState;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Get the next node in the pipeline
|
|
39
|
+
*/
|
|
40
|
+
getNextNode(currentNode) {
|
|
41
|
+
for (const [from, to] of this.edges) {
|
|
42
|
+
if (from === currentNode) {
|
|
43
|
+
return to;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Get all nodes in the graph
|
|
50
|
+
*/
|
|
51
|
+
getNodes() {
|
|
52
|
+
return this.nodes;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Get all edges in the graph
|
|
56
|
+
*/
|
|
57
|
+
getEdges() {
|
|
58
|
+
return this.edges;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
exports.BaseGraph = BaseGraph;
|
|
62
|
+
//# sourceMappingURL=BaseGraph.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BaseGraph.js","sourceRoot":"","sources":["../../src/graphs/BaseGraph.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAIH,4CAAyC;AAEzC,MAAa,SAAS;IAMpB,YAAY,KAAiB,EAAE,KAA6B,EAAE,UAAoB,EAAE,UAAmB,KAAK;QAC1G,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,MAAM,GAAG,IAAI,eAAM,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO,CAAC,YAAwB;QACpC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QAE7C,IAAI,YAAY,GAAG,YAAY,CAAC;QAChC,IAAI,WAAW,GAAoB,IAAI,CAAC,UAAU,CAAC;QACnD,MAAM,YAAY,GAAG,IAAI,GAAG,EAAY,CAAC;QAEzC,OAAO,WAAW,EAAE,CAAC;YACnB,IAAI,YAAY,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;gBAClC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,WAAW,CAAC,UAAU,CAAC,4CAA4C,CAAC,CAAC;gBAC9F,MAAM;YACR,CAAC;YAED,YAAY,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YAE9B,uBAAuB;YACvB,YAAY,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YAEvD,iBAAiB;YACjB,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;QAC9C,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,2BAA2B,CAAC,CAAC;QACjD,OAAO,YAAY,CAAC;IACtB,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,WAAqB;QACvC,KAAK,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACpC,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;gBACzB,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;CACF;AAnED,8BAmEC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSVScraperGraph - Scrape data and export to CSV
|
|
3
|
+
*/
|
|
4
|
+
import { BaseLLM, GraphState, ScraperConfig } from '../types';
|
|
5
|
+
import { Logger } from '../utils/logger';
|
|
6
|
+
export declare class CSVScraperGraph {
|
|
7
|
+
protected prompt: string;
|
|
8
|
+
protected source: string;
|
|
9
|
+
protected config: ScraperConfig;
|
|
10
|
+
protected llmModel: BaseLLM;
|
|
11
|
+
protected logger: Logger;
|
|
12
|
+
protected outputPath?: string;
|
|
13
|
+
constructor(prompt: string, source: string, config: ScraperConfig, llmModel: BaseLLM, outputPath?: string);
|
|
14
|
+
run(): Promise<GraphState>;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=CSVScraperGraph.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CSVScraperGraph.d.ts","sourceRoot":"","sources":["../../src/graphs/CSVScraperGraph.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAK9D,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAEzC,qBAAa,eAAe;IAC1B,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;IACzB,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;IACzB,SAAS,CAAC,MAAM,EAAE,aAAa,CAAC;IAChC,SAAS,CAAC,QAAQ,EAAE,OAAO,CAAC;IAC5B,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;IACzB,SAAS,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC;gBAG5B,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,aAAa,EACrB,QAAQ,EAAE,OAAO,EACjB,UAAU,CAAC,EAAE,MAAM;IAUf,GAAG,IAAI,OAAO,CAAC,UAAU,CAAC;CA4CjC"}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* CSVScraperGraph - Scrape data and export to CSV
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
18
|
+
}) : function(o, v) {
|
|
19
|
+
o["default"] = v;
|
|
20
|
+
});
|
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
22
|
+
var ownKeys = function(o) {
|
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
24
|
+
var ar = [];
|
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
26
|
+
return ar;
|
|
27
|
+
};
|
|
28
|
+
return ownKeys(o);
|
|
29
|
+
};
|
|
30
|
+
return function (mod) {
|
|
31
|
+
if (mod && mod.__esModule) return mod;
|
|
32
|
+
var result = {};
|
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
34
|
+
__setModuleDefault(result, mod);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
})();
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.CSVScraperGraph = void 0;
|
|
40
|
+
const FetchNode_1 = require("../nodes/FetchNode");
|
|
41
|
+
const ParseNode_1 = require("../nodes/ParseNode");
|
|
42
|
+
const GenerateAnswerNode_1 = require("../nodes/GenerateAnswerNode");
|
|
43
|
+
const CSVExporterNode_1 = require("../nodes/CSVExporterNode");
|
|
44
|
+
const logger_1 = require("../utils/logger");
|
|
45
|
+
class CSVScraperGraph {
|
|
46
|
+
constructor(prompt, source, config, llmModel, outputPath) {
|
|
47
|
+
this.prompt = prompt;
|
|
48
|
+
this.source = source;
|
|
49
|
+
this.config = config;
|
|
50
|
+
this.llmModel = llmModel;
|
|
51
|
+
this.outputPath = outputPath;
|
|
52
|
+
this.logger = new logger_1.Logger('CSVScraperGraph', config.verbose ?? false);
|
|
53
|
+
}
|
|
54
|
+
async run() {
|
|
55
|
+
this.logger.info('Starting CSVScraperGraph execution');
|
|
56
|
+
this.logger.info(`Scraping: ${this.source}`);
|
|
57
|
+
let state = {
|
|
58
|
+
url: this.source,
|
|
59
|
+
userPrompt: this.prompt,
|
|
60
|
+
};
|
|
61
|
+
// Fetch content
|
|
62
|
+
const fetchNode = new FetchNode_1.FetchNode('url', ['doc'], this.config);
|
|
63
|
+
state = await fetchNode.execute(state);
|
|
64
|
+
// Parse content
|
|
65
|
+
const parseNode = new ParseNode_1.ParseNode('doc', ['parsedDoc'], this.config);
|
|
66
|
+
state = await parseNode.execute(state);
|
|
67
|
+
// Generate answer
|
|
68
|
+
const generateNode = new GenerateAnswerNode_1.GenerateAnswerNode('userPrompt & parsedDoc', ['answer'], { ...this.config, llmModel: this.llmModel });
|
|
69
|
+
state = await generateNode.execute(state);
|
|
70
|
+
// Export to CSV
|
|
71
|
+
const csvExporter = new CSVExporterNode_1.CSVExporterNode('answer', ['csv'], { ...this.config, outputPath: this.outputPath });
|
|
72
|
+
state = await csvExporter.execute(state);
|
|
73
|
+
this.logger.success('CSVScraperGraph execution completed');
|
|
74
|
+
// Save to file if path provided
|
|
75
|
+
if (this.outputPath && state.csv) {
|
|
76
|
+
const fs = await Promise.resolve().then(() => __importStar(require('fs')));
|
|
77
|
+
fs.writeFileSync(this.outputPath, state.csv, 'utf-8');
|
|
78
|
+
this.logger.success(`CSV saved to: ${this.outputPath}`);
|
|
79
|
+
}
|
|
80
|
+
return state;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
exports.CSVScraperGraph = CSVScraperGraph;
|
|
84
|
+
//# sourceMappingURL=CSVScraperGraph.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CSVScraperGraph.js","sourceRoot":"","sources":["../../src/graphs/CSVScraperGraph.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAGH,kDAA+C;AAC/C,kDAA+C;AAC/C,oEAAiE;AACjE,8DAA2D;AAC3D,4CAAyC;AAEzC,MAAa,eAAe;IAQ1B,YACE,MAAc,EACd,MAAc,EACd,MAAqB,EACrB,QAAiB,EACjB,UAAmB;QAEnB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,MAAM,GAAG,IAAI,eAAM,CAAC,iBAAiB,EAAE,MAAM,CAAC,OAAO,IAAI,KAAK,CAAC,CAAC;IACvE,CAAC;IAED,KAAK,CAAC,GAAG;QACP,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;QACvD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAE7C,IAAI,KAAK,GAAe;YACtB,GAAG,EAAE,IAAI,CAAC,MAAM;YAChB,UAAU,EAAE,IAAI,CAAC,MAAM;SACxB,CAAC;QAEF,gBAAgB;QAChB,MAAM,SAAS,GAAG,IAAI,qBAAS,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7D,KAAK,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAEvC,gBAAgB;QAChB,MAAM,SAAS,GAAG,IAAI,qBAAS,CAAC,KAAK,EAAE,CAAC,WAAW,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACnE,KAAK,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAEvC,kBAAkB;QAClB,MAAM,YAAY,GAAG,IAAI,uCAAkB,CACzC,wBAAwB,EACxB,CAAC,QAAQ,CAAC,EACV,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAC5C,CAAC;QACF,KAAK,GAAG,MAAM,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAE1C,gBAAgB;QAChB,MAAM,WAAW,GAAG,IAAI,iCAAe,CACrC,QAAQ,EACR,CAAC,KAAK,CAAC,EACP,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,CAChD,CAAC;QACF,KAAK,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAEzC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,qCAAqC,CAAC,CAAC;QAE3D,gCAAgC;QAChC,IAAI,IAAI,CAAC,UAAU,IAAI,KAAK,CAAC,GAAG,EAAE,CAAC;YACjC,MAAM,EAAE,GAAG,wDAAa,IAAI,GAAC,CAAC;YAC9B,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YACtD,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,iBAAiB,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAC1D,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;CACF;AAnED,0CAmEC"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DepthSearchGraph - Search the internet, scrape results, and perform deep analysis
|
|
3
|
+
*/
|
|
4
|
+
import { BaseLLM, GraphState, ScraperConfig } from '../types';
|
|
5
|
+
import { Logger } from '../utils/logger';
|
|
6
|
+
export declare class DepthSearchGraph {
|
|
7
|
+
protected prompt: string;
|
|
8
|
+
protected config: ScraperConfig;
|
|
9
|
+
protected llmModel: BaseLLM;
|
|
10
|
+
protected logger: Logger;
|
|
11
|
+
constructor(prompt: string, config: ScraperConfig, llmModel: BaseLLM);
|
|
12
|
+
run(): Promise<GraphState>;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=DepthSearchGraph.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DepthSearchGraph.d.ts","sourceRoot":"","sources":["../../src/graphs/DepthSearchGraph.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAO9D,OAAO,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAEzC,qBAAa,gBAAgB;IAC3B,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;IACzB,SAAS,CAAC,MAAM,EAAE,aAAa,CAAC;IAChC,SAAS,CAAC,QAAQ,EAAE,OAAO,CAAC;IAC5B,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;gBAGvB,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,aAAa,EACrB,QAAQ,EAAE,OAAO;IAQb,GAAG,IAAI,OAAO,CAAC,UAAU,CAAC;CAuCjC"}
|