@kadoa/node-sdk 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +225 -188
- package/dist/browser/index.global.js +13 -13
- package/dist/browser/index.global.js.map +1 -1
- package/dist/index.d.mts +280 -100
- package/dist/index.d.ts +280 -100
- package/dist/index.js +247 -201
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +248 -203
- package/dist/index.mjs.map +1 -1
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -12,309 +12,346 @@ yarn add @kadoa/node-sdk
|
|
|
12
12
|
pnpm add @kadoa/node-sdk
|
|
13
13
|
```
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
## Quick Start
|
|
17
16
|
|
|
18
17
|
```typescript
|
|
19
18
|
import { KadoaClient } from '@kadoa/node-sdk';
|
|
20
19
|
|
|
21
|
-
// Initialize the client
|
|
22
20
|
const client = new KadoaClient({
|
|
23
21
|
apiKey: 'your-api-key'
|
|
24
22
|
});
|
|
25
23
|
|
|
26
|
-
//
|
|
24
|
+
// AI automatically detects and extracts data
|
|
27
25
|
const result = await client.extraction.run({
|
|
28
|
-
urls: ['https://example.com'],
|
|
29
|
-
name: '
|
|
26
|
+
urls: ['https://example.com/products'],
|
|
27
|
+
name: 'Product Extraction'
|
|
30
28
|
});
|
|
31
29
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
}
|
|
30
|
+
console.log(`Extracted ${result.data?.length} items`);
|
|
31
|
+
// Output: Extracted 25 items
|
|
35
32
|
```
|
|
36
33
|
|
|
37
|
-
##
|
|
34
|
+
## Extraction Methods
|
|
38
35
|
|
|
39
|
-
###
|
|
36
|
+
### Auto-Detection
|
|
37
|
+
|
|
38
|
+
The simplest way to extract data - AI automatically detects structured content:
|
|
40
39
|
|
|
41
40
|
```typescript
|
|
42
|
-
const
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
timeout: 30000 // optional, in ms
|
|
41
|
+
const result = await client.extraction.run({
|
|
42
|
+
urls: ['https://example.com'],
|
|
43
|
+
name: 'My Extraction'
|
|
46
44
|
});
|
|
47
|
-
```
|
|
48
45
|
|
|
49
|
-
|
|
46
|
+
// Returns:
|
|
47
|
+
// {
|
|
48
|
+
// workflowId: "abc123",
|
|
49
|
+
// workflow: { id: "abc123", state: "FINISHED", ... },
|
|
50
|
+
// data: [
|
|
51
|
+
// { title: "Item 1", price: "$10" },
|
|
52
|
+
// { title: "Item 2", price: "$20" }
|
|
53
|
+
// ],
|
|
54
|
+
// pagination: { page: 1, totalPages: 3, hasMore: true }
|
|
55
|
+
// }
|
|
56
|
+
```
|
|
50
57
|
|
|
51
|
-
|
|
58
|
+
**When to use:** Quick extractions, exploratory data gathering, or when you don't know the exact schema.
|
|
52
59
|
|
|
53
|
-
|
|
54
|
-
const client = new KadoaClient({
|
|
55
|
-
apiKey: 'tk-your-team-api-key', // Must be a team API key
|
|
56
|
-
enableRealtime: true,
|
|
57
|
-
realtimeConfig: {
|
|
58
|
-
autoConnect: true, // optional, default: true
|
|
59
|
-
reconnectDelay: 5000, // optional, default: 5000ms
|
|
60
|
-
heartbeatInterval: 10000 // optional, default: 10000ms
|
|
61
|
-
}
|
|
62
|
-
});
|
|
63
|
-
```
|
|
60
|
+
### Builder API (Custom Schemas)
|
|
64
61
|
|
|
65
|
-
|
|
62
|
+
Define exactly what data you want to extract using the fluent builder pattern:
|
|
66
63
|
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
64
|
+
```typescript
|
|
65
|
+
const extraction = await client.extract({
|
|
66
|
+
urls: ['https://example.com/products'],
|
|
67
|
+
name: 'Product Extraction',
|
|
68
|
+
extraction: builder => builder
|
|
69
|
+
.schema('Product')
|
|
70
|
+
.field('title', 'Product name', 'STRING', { example: 'Laptop' })
|
|
71
|
+
.field('price', 'Product price', 'CURRENCY')
|
|
72
|
+
.field('inStock', 'Stock status', 'BOOLEAN')
|
|
73
|
+
.field('rating', 'Star rating', 'NUMBER')
|
|
74
|
+
}).create();
|
|
75
|
+
|
|
76
|
+
// Run extraction
|
|
77
|
+
const result = await extraction.run();
|
|
78
|
+
const data = await result.fetchData({});
|
|
79
|
+
|
|
80
|
+
// Returns:
|
|
81
|
+
// {
|
|
82
|
+
// data: [
|
|
83
|
+
// { title: "Dell XPS", price: "$999", inStock: true, rating: 4.5 },
|
|
84
|
+
// { title: "MacBook", price: "$1299", inStock: false, rating: 4.8 }
|
|
85
|
+
// ],
|
|
86
|
+
// pagination: { ... }
|
|
87
|
+
// }
|
|
73
88
|
```
|
|
74
89
|
|
|
75
|
-
|
|
76
|
-
import { KadoaClient } from '@kadoa/node-sdk';
|
|
77
|
-
import { config } from 'dotenv';
|
|
90
|
+
**When to use:** Production applications, consistent schema requirements, data validation needs.
|
|
78
91
|
|
|
79
|
-
|
|
92
|
+
#### Builder Patterns
|
|
80
93
|
|
|
81
|
-
|
|
82
|
-
apiKey: process.env.KADOA_API_KEY!,
|
|
83
|
-
baseUrl: process.env.KADOA_PUBLIC_API_URI,
|
|
84
|
-
timeout: parseInt(process.env.KADOA_TIMEOUT || '30000')
|
|
85
|
-
});
|
|
86
|
-
```
|
|
94
|
+
**Raw Content Extraction**
|
|
87
95
|
|
|
88
|
-
|
|
96
|
+
Extract page content without structure transformation:
|
|
89
97
|
|
|
90
98
|
```typescript
|
|
91
|
-
|
|
99
|
+
// Single format
|
|
100
|
+
extraction: builder => builder.raw('markdown')
|
|
92
101
|
|
|
93
|
-
//
|
|
94
|
-
|
|
95
|
-
console.log('Event:', event);
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
// Event types:
|
|
99
|
-
// - entity:detected
|
|
100
|
-
// - extraction:started
|
|
101
|
-
// - extraction:status_changed
|
|
102
|
-
// - extraction:data_available
|
|
103
|
-
// - extraction:completed
|
|
104
|
-
// - realtime:connected (when WebSocket enabled)
|
|
105
|
-
// - realtime:disconnected
|
|
106
|
-
// - realtime:event
|
|
107
|
-
// - realtime:heartbeat
|
|
108
|
-
// - realtime:error
|
|
102
|
+
// Multiple formats
|
|
103
|
+
extraction: builder => builder.raw(['html', 'markdown', 'url'])
|
|
109
104
|
```
|
|
110
105
|
|
|
111
|
-
|
|
106
|
+
**Classification Fields**
|
|
112
107
|
|
|
113
|
-
|
|
108
|
+
Categorize content into predefined labels:
|
|
114
109
|
|
|
115
110
|
```typescript
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
console.log('Received realtime data:', event.payload.data);
|
|
125
|
-
}
|
|
126
|
-
});
|
|
111
|
+
extraction: builder => builder
|
|
112
|
+
.schema('Article')
|
|
113
|
+
.classify('sentiment', 'Content sentiment', [
|
|
114
|
+
{ title: 'Positive', definition: 'Optimistic or favorable tone' },
|
|
115
|
+
{ title: 'Negative', definition: 'Critical or unfavorable tone' },
|
|
116
|
+
{ title: 'Neutral', definition: 'Balanced or objective tone' }
|
|
117
|
+
])
|
|
118
|
+
```
|
|
127
119
|
|
|
128
|
-
|
|
129
|
-
const realtime = client.connectRealtime();
|
|
120
|
+
**Hybrid Extraction**
|
|
130
121
|
|
|
131
|
-
|
|
132
|
-
if (client.isRealtimeConnected()) {
|
|
133
|
-
console.log('Connected to realtime server');
|
|
134
|
-
}
|
|
122
|
+
Combine structured fields with raw content:
|
|
135
123
|
|
|
136
|
-
|
|
137
|
-
|
|
124
|
+
```typescript
|
|
125
|
+
extraction: builder => builder
|
|
126
|
+
.schema('Product')
|
|
127
|
+
.field('title', 'Product name', 'STRING', { example: 'Item' })
|
|
128
|
+
.field('price', 'Product price', 'CURRENCY')
|
|
129
|
+
.raw('html') // Include raw HTML alongside structured fields
|
|
138
130
|
```
|
|
139
131
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
The SDK uses the [debug](https://www.npmjs.com/package/debug) package for logging, which is disabled by default. Enable debug logs using the `DEBUG` environment variable:
|
|
132
|
+
**Reference Existing Schema**
|
|
143
133
|
|
|
144
|
-
|
|
145
|
-
# Enable all Kadoa SDK logs
|
|
146
|
-
DEBUG=kadoa:* node app.js
|
|
134
|
+
Reuse a previously defined schema:
|
|
147
135
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
DEBUG=kadoa:wss node app.js # WebSocket logs only
|
|
151
|
-
DEBUG=kadoa:extraction node app.js # Extraction module logs
|
|
152
|
-
DEBUG=kadoa:http node app.js # HTTP request/response logs
|
|
153
|
-
DEBUG=kadoa:workflow node app.js # Workflow operations
|
|
154
|
-
|
|
155
|
-
# Enable multiple modules
|
|
156
|
-
DEBUG=kadoa:client,kadoa:extraction node app.js
|
|
136
|
+
```typescript
|
|
137
|
+
extraction: builder => builder.useSchema('schema-id-123')
|
|
157
138
|
```
|
|
158
139
|
|
|
159
|
-
|
|
140
|
+
### Working with Results
|
|
160
141
|
|
|
161
|
-
|
|
142
|
+
**Fetch Specific Page**
|
|
162
143
|
|
|
163
144
|
```typescript
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
145
|
+
const page = await client.extraction.fetchData({
|
|
146
|
+
workflowId: 'workflow-id',
|
|
147
|
+
page: 2,
|
|
148
|
+
limit: 50
|
|
168
149
|
});
|
|
150
|
+
```
|
|
169
151
|
|
|
170
|
-
|
|
171
|
-
const result = await client.extraction.run({
|
|
172
|
-
urls: ['https://sandbox.kadoa.com/ecommerce'],
|
|
173
|
-
name: 'My Extraction Workflow'
|
|
174
|
-
});
|
|
152
|
+
**Iterate Through All Pages**
|
|
175
153
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
console.log('Data:', page1.data?.slice(0, 5));
|
|
184
|
-
console.log('Pagination:', page1.pagination);
|
|
154
|
+
```typescript
|
|
155
|
+
for await (const page of client.extraction.fetchDataPages({
|
|
156
|
+
workflowId: 'workflow-id'
|
|
157
|
+
})) {
|
|
158
|
+
console.log(`Processing ${page.data.length} items`);
|
|
159
|
+
// Process page.data
|
|
185
160
|
}
|
|
186
161
|
```
|
|
187
162
|
|
|
188
|
-
|
|
163
|
+
**Fetch All Data at Once**
|
|
189
164
|
|
|
190
165
|
```typescript
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
apiKey: 'tk-your-team-api-key',
|
|
194
|
-
enableRealtime: true,
|
|
166
|
+
const allData = await client.extraction.fetchAllData({
|
|
167
|
+
workflowId: 'workflow-id'
|
|
195
168
|
});
|
|
196
169
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
console.log('Event received:', event);
|
|
200
|
-
});
|
|
170
|
+
console.log(`Total items: ${allData.length}`);
|
|
171
|
+
```
|
|
201
172
|
|
|
202
|
-
|
|
203
|
-
const availableEvents = await client.notification.settings.listAllEvents();
|
|
173
|
+
### Advanced Workflow Control
|
|
204
174
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
175
|
+
For scheduled extractions, monitoring, and notifications:
|
|
176
|
+
|
|
177
|
+
```typescript
|
|
178
|
+
const extraction = await client.extract({
|
|
179
|
+
urls: ['https://example.com'],
|
|
180
|
+
name: 'Scheduled Extraction',
|
|
181
|
+
extraction: builder => builder
|
|
182
|
+
.schema('Product')
|
|
183
|
+
.field('title', 'Product name', 'STRING', { example: 'Item' })
|
|
184
|
+
.field('price', 'Price', 'CURRENCY')
|
|
185
|
+
})
|
|
186
|
+
.setInterval({ interval: 'DAILY' }) // Schedule: HOURLY, DAILY, WEEKLY, MONTHLY
|
|
187
|
+
.withNotifications({
|
|
188
|
+
events: 'all',
|
|
189
|
+
channels: { WEBSOCKET: true }
|
|
190
|
+
})
|
|
191
|
+
.bypassPreview() // Skip approval step
|
|
192
|
+
.create();
|
|
193
|
+
|
|
194
|
+
const result = await extraction.run();
|
|
215
195
|
```
|
|
216
196
|
|
|
217
|
-
|
|
197
|
+
## Data Validation
|
|
198
|
+
|
|
199
|
+
Kadoa can automatically suggest validation rules and detect anomalies:
|
|
218
200
|
|
|
219
201
|
```typescript
|
|
220
202
|
import { KadoaClient, pollUntil } from '@kadoa/node-sdk';
|
|
221
203
|
|
|
222
204
|
const client = new KadoaClient({ apiKey: 'your-api-key' });
|
|
223
205
|
|
|
224
|
-
// 1. Run
|
|
206
|
+
// 1. Run extraction
|
|
225
207
|
const result = await client.extraction.run({
|
|
226
|
-
urls: ['https://
|
|
208
|
+
urls: ['https://example.com']
|
|
227
209
|
});
|
|
228
210
|
|
|
229
|
-
// 2. Wait for
|
|
230
|
-
const
|
|
211
|
+
// 2. Wait for AI-suggested validation rules
|
|
212
|
+
const rules = await pollUntil(
|
|
231
213
|
async () => await client.validation.listRules({
|
|
232
|
-
workflowId: result.workflowId
|
|
214
|
+
workflowId: result.workflowId
|
|
233
215
|
}),
|
|
234
216
|
(result) => result.data.length > 0,
|
|
235
217
|
{ pollIntervalMs: 1000, timeoutMs: 30000 }
|
|
236
218
|
);
|
|
237
219
|
|
|
238
|
-
// 3. Approve
|
|
239
|
-
|
|
220
|
+
// 3. Approve and run validation
|
|
221
|
+
await client.validation.bulkApproveRules({
|
|
240
222
|
workflowId: result.workflowId,
|
|
241
|
-
ruleIds:
|
|
223
|
+
ruleIds: rules.result.data.map(r => r.id)
|
|
242
224
|
});
|
|
243
225
|
|
|
244
|
-
// 4. Run validation check
|
|
245
226
|
const validation = await client.validation.scheduleValidation(
|
|
246
227
|
result.workflowId,
|
|
247
228
|
result.workflow?.jobId || ''
|
|
248
229
|
);
|
|
249
230
|
|
|
250
|
-
//
|
|
231
|
+
// 4. Check for anomalies
|
|
251
232
|
const completed = await client.validation.waitUntilCompleted(
|
|
252
233
|
validation.validationId
|
|
253
234
|
);
|
|
254
|
-
|
|
255
235
|
const anomalies = await client.validation.getValidationAnomalies(
|
|
256
236
|
validation.validationId
|
|
257
237
|
);
|
|
258
238
|
|
|
259
|
-
console.log(
|
|
239
|
+
console.log(`Found ${anomalies.length} anomalies`);
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## Configuration
|
|
243
|
+
|
|
244
|
+
### Basic Setup
|
|
245
|
+
|
|
246
|
+
```typescript
|
|
247
|
+
const client = new KadoaClient({
|
|
248
|
+
apiKey: 'your-api-key',
|
|
249
|
+
timeout: 30000 // optional, in ms
|
|
250
|
+
});
|
|
260
251
|
```
|
|
261
252
|
|
|
262
|
-
###
|
|
253
|
+
### Environment Variables
|
|
263
254
|
|
|
264
255
|
```typescript
|
|
265
|
-
import assert from 'node:assert';
|
|
266
256
|
import { KadoaClient } from '@kadoa/node-sdk';
|
|
257
|
+
import { config } from 'dotenv';
|
|
258
|
+
|
|
259
|
+
config();
|
|
260
|
+
|
|
261
|
+
const client = new KadoaClient({
|
|
262
|
+
apiKey: process.env.KADOA_API_KEY!
|
|
263
|
+
});
|
|
264
|
+
```
|
|
267
265
|
|
|
268
|
-
|
|
269
|
-
const apiKey = process.env.KADOA_API_KEY;
|
|
270
|
-
assert(apiKey, 'KADOA_API_KEY is not set');
|
|
266
|
+
### WebSocket & Realtime Events
|
|
271
267
|
|
|
272
|
-
|
|
268
|
+
Enable realtime notifications using a team API key (starts with `tk-`):
|
|
273
269
|
|
|
274
|
-
|
|
270
|
+
```typescript
|
|
271
|
+
const client = new KadoaClient({
|
|
272
|
+
apiKey: 'tk-your-team-api-key',
|
|
273
|
+
enableRealtime: true
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
// Listen to events
|
|
277
|
+
client.realtime?.onEvent((event) => {
|
|
278
|
+
console.log('Event:', event);
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
// Use with extractions
|
|
282
|
+
const extraction = await client.extract({
|
|
283
|
+
urls: ['https://example.com'],
|
|
284
|
+
name: 'Monitored Extraction',
|
|
285
|
+
extraction: builder => builder.raw('markdown')
|
|
286
|
+
})
|
|
287
|
+
.withNotifications({
|
|
288
|
+
events: 'all',
|
|
289
|
+
channels: { WEBSOCKET: true }
|
|
290
|
+
})
|
|
291
|
+
.create();
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
**Connection control:**
|
|
295
|
+
|
|
296
|
+
```typescript
|
|
297
|
+
const realtime = client.connectRealtime(); // Connect manually
|
|
298
|
+
const connected = client.isRealtimeConnected(); // Check status
|
|
299
|
+
client.disconnectRealtime(); // Disconnect
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
## Error Handling
|
|
303
|
+
|
|
304
|
+
```typescript
|
|
305
|
+
import { KadoaClient, KadoaSdkException, KadoaHttpException } from '@kadoa/node-sdk';
|
|
306
|
+
|
|
307
|
+
try {
|
|
275
308
|
const result = await client.extraction.run({
|
|
276
|
-
urls: ['https://
|
|
309
|
+
urls: ['https://example.com']
|
|
277
310
|
});
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
console.log('Page 1 Data:');
|
|
287
|
-
console.log('--------------------------------');
|
|
288
|
-
console.log(page1.data?.slice(0, 5));
|
|
289
|
-
console.log(page1.pagination);
|
|
290
|
-
console.log('--------------------------------');
|
|
311
|
+
} catch (error) {
|
|
312
|
+
if (error instanceof KadoaHttpException) {
|
|
313
|
+
console.error('API Error:', error.message);
|
|
314
|
+
console.error('Status:', error.httpStatus);
|
|
315
|
+
} else if (error instanceof KadoaSdkException) {
|
|
316
|
+
console.error('SDK Error:', error.message);
|
|
317
|
+
console.error('Code:', error.code);
|
|
291
318
|
}
|
|
292
|
-
|
|
293
|
-
console.log('Initial result:', result.data?.slice(0, 5));
|
|
294
319
|
}
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
## Debugging
|
|
323
|
+
|
|
324
|
+
Enable debug logs using the `DEBUG` environment variable:
|
|
325
|
+
|
|
326
|
+
```bash
|
|
327
|
+
# All SDK logs
|
|
328
|
+
DEBUG=kadoa:* node app.js
|
|
295
329
|
|
|
296
|
-
|
|
330
|
+
# Specific modules
|
|
331
|
+
DEBUG=kadoa:extraction node app.js
|
|
332
|
+
DEBUG=kadoa:http node app.js
|
|
333
|
+
DEBUG=kadoa:client,kadoa:extraction node app.js
|
|
297
334
|
```
|
|
298
335
|
|
|
299
|
-
|
|
336
|
+
## More Examples
|
|
300
337
|
|
|
301
|
-
See the [examples directory](https://github.com/kadoa-org/kadoa-sdks/tree/main/examples/node-examples) for
|
|
302
|
-
- Advanced extraction configurations
|
|
303
|
-
- Custom validation rules
|
|
304
|
-
- Error handling patterns
|
|
338
|
+
See the [examples directory](https://github.com/kadoa-org/kadoa-sdks/tree/main/examples/node-examples) for complete examples including:
|
|
305
339
|
- Batch processing
|
|
340
|
+
- Custom error handling
|
|
306
341
|
- Integration patterns
|
|
342
|
+
- Advanced validation workflows
|
|
307
343
|
|
|
308
344
|
## Requirements
|
|
309
345
|
|
|
310
346
|
- Node.js 22+
|
|
311
347
|
|
|
312
|
-
##
|
|
348
|
+
## Support
|
|
313
349
|
|
|
314
|
-
|
|
350
|
+
- **Documentation:** [docs.kadoa.com](https://docs.kadoa.com)
|
|
351
|
+
- **API Reference:** [docs.kadoa.com/api](https://docs.kadoa.com/api)
|
|
352
|
+
- **Support:** [support@kadoa.com](mailto:support@kadoa.com)
|
|
353
|
+
- **Issues:** [GitHub Issues](https://github.com/kadoa-org/kadoa-sdks/issues)
|
|
315
354
|
|
|
316
|
-
##
|
|
355
|
+
## License
|
|
317
356
|
|
|
318
|
-
|
|
319
|
-
- Support: [support@kadoa.com](mailto:support@kadoa.com)
|
|
320
|
-
- Issues: [GitHub Issues](https://github.com/kadoa-org/kadoa-sdks/issues)
|
|
357
|
+
MIT
|