@knowledgesdk/node 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +495 -0
- package/dist/api/classify.d.ts +24 -0
- package/dist/api/classify.js +19 -0
- package/dist/api/classify.js.map +1 -0
- package/dist/api/extract.d.ts +118 -0
- package/dist/api/extract.js +60 -0
- package/dist/api/extract.js.map +1 -0
- package/dist/api/jobs.d.ts +35 -0
- package/dist/api/jobs.js +43 -0
- package/dist/api/jobs.js.map +1 -0
- package/dist/api/scrape.d.ts +18 -0
- package/dist/api/scrape.js +18 -0
- package/dist/api/scrape.js.map +1 -0
- package/dist/api/screenshot.d.ts +15 -0
- package/dist/api/screenshot.js +18 -0
- package/dist/api/screenshot.js.map +1 -0
- package/dist/api/search.d.ts +29 -0
- package/dist/api/search.js +22 -0
- package/dist/api/search.js.map +1 -0
- package/dist/api/sitemap.d.ts +17 -0
- package/dist/api/sitemap.js +19 -0
- package/dist/api/sitemap.js.map +1 -0
- package/dist/api/webhooks.d.ts +40 -0
- package/dist/api/webhooks.js +39 -0
- package/dist/api/webhooks.js.map +1 -0
- package/dist/constants.d.ts +5 -0
- package/dist/constants.js +9 -0
- package/dist/constants.js.map +1 -0
- package/dist/errors.d.ts +32 -0
- package/dist/errors.js +52 -0
- package/dist/errors.js.map +1 -0
- package/dist/esm/api/classify.d.ts +24 -0
- package/dist/esm/api/classify.js +15 -0
- package/dist/esm/api/classify.js.map +1 -0
- package/dist/esm/api/extract.d.ts +118 -0
- package/dist/esm/api/extract.js +56 -0
- package/dist/esm/api/extract.js.map +1 -0
- package/dist/esm/api/jobs.d.ts +35 -0
- package/dist/esm/api/jobs.js +39 -0
- package/dist/esm/api/jobs.js.map +1 -0
- package/dist/esm/api/scrape.d.ts +18 -0
- package/dist/esm/api/scrape.js +14 -0
- package/dist/esm/api/scrape.js.map +1 -0
- package/dist/esm/api/screenshot.d.ts +15 -0
- package/dist/esm/api/screenshot.js +14 -0
- package/dist/esm/api/screenshot.js.map +1 -0
- package/dist/esm/api/search.d.ts +29 -0
- package/dist/esm/api/search.js +18 -0
- package/dist/esm/api/search.js.map +1 -0
- package/dist/esm/api/sitemap.d.ts +17 -0
- package/dist/esm/api/sitemap.js +15 -0
- package/dist/esm/api/sitemap.js.map +1 -0
- package/dist/esm/api/webhooks.d.ts +40 -0
- package/dist/esm/api/webhooks.js +35 -0
- package/dist/esm/api/webhooks.js.map +1 -0
- package/dist/esm/constants.d.ts +5 -0
- package/dist/esm/constants.js +6 -0
- package/dist/esm/constants.js.map +1 -0
- package/dist/esm/errors.d.ts +32 -0
- package/dist/esm/errors.js +43 -0
- package/dist/esm/errors.js.map +1 -0
- package/dist/esm/index.d.ts +100 -0
- package/dist/esm/index.js +91 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/utils/http-client.d.ts +62 -0
- package/dist/esm/utils/http-client.js +354 -0
- package/dist/esm/utils/http-client.js.map +1 -0
- package/dist/index.d.ts +100 -0
- package/dist/index.js +102 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +2 -0
- package/dist/index.mjs.map +1 -0
- package/dist/utils/http-client.d.ts +62 -0
- package/dist/utils/http-client.js +361 -0
- package/dist/utils/http-client.js.map +1 -0
- package/package.json +93 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 KnowledgeSDK
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
# @knowledgesdk/node
|
|
2
|
+
|
|
3
|
+
Official Node.js SDK for the [KnowledgeSDK](https://knowledgesdk.com) API. Extract structured knowledge from any website — business profiles, content, screenshots, sitemaps, and more.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @knowledgesdk/node
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
yarn add @knowledgesdk/node
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
```typescript
|
|
18
|
+
import { KnowledgeSDK } from '@knowledgesdk/node';
|
|
19
|
+
|
|
20
|
+
const client = new KnowledgeSDK('sk_ks_your_api_key');
|
|
21
|
+
|
|
22
|
+
// Run the full extraction pipeline on a website
|
|
23
|
+
const result = await client.extract.run('https://example.com');
|
|
24
|
+
console.log(result.business.businessName);
|
|
25
|
+
console.log(result.knowledgeItems);
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Authentication
|
|
29
|
+
|
|
30
|
+
All API calls require an API key. Keys are prefixed with `sk_ks_`. Pass your key to the constructor:
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
const client = new KnowledgeSDK('sk_ks_your_api_key');
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
You can also set `KNOWLEDGE_SDK_API_KEY` as an environment variable and pass it explicitly:
|
|
37
|
+
|
|
38
|
+
```typescript
|
|
39
|
+
const client = new KnowledgeSDK(process.env.KNOWLEDGE_SDK_API_KEY!);
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Configuration
|
|
43
|
+
|
|
44
|
+
```typescript
|
|
45
|
+
const client = new KnowledgeSDK('sk_ks_your_api_key', {
|
|
46
|
+
baseUrl: 'https://api.knowledgesdk.com', // default
|
|
47
|
+
maxRetries: 3, // default — retries on 429 and 5xx
|
|
48
|
+
timeout: 30000, // default — 30 seconds
|
|
49
|
+
});
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Resources
|
|
53
|
+
|
|
54
|
+
### `extract`
|
|
55
|
+
|
|
56
|
+
Run the full pipeline against a URL: scrape, classify, and return structured knowledge items.
|
|
57
|
+
|
|
58
|
+
#### Synchronous
|
|
59
|
+
|
|
60
|
+
```typescript
|
|
61
|
+
const result = await client.extract.run('https://acme.com', {
|
|
62
|
+
maxPages: 20,
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
console.log(result.business.businessName); // "Acme Corp"
|
|
66
|
+
console.log(result.business.industrySector); // "SaaS"
|
|
67
|
+
console.log(result.business.confidenceScore); // 0.92
|
|
68
|
+
console.log(result.pagesScraped); // 18
|
|
69
|
+
console.log(result.sitemapUrls); // 54
|
|
70
|
+
console.log(result.knowledgeItems.length); // 12
|
|
71
|
+
|
|
72
|
+
result.knowledgeItems.forEach((item) => {
|
|
73
|
+
console.log(`[${item.category}] ${item.title}`);
|
|
74
|
+
console.log(item.content);
|
|
75
|
+
});
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**`ExtractResult` shape:**
|
|
79
|
+
|
|
80
|
+
```typescript
|
|
81
|
+
{
|
|
82
|
+
business: {
|
|
83
|
+
businessName: string;
|
|
84
|
+
businessType: string;
|
|
85
|
+
industrySector: string;
|
|
86
|
+
targetAudience: string;
|
|
87
|
+
description: string;
|
|
88
|
+
valueProposition: string;
|
|
89
|
+
painPoints: string[];
|
|
90
|
+
uniqueSellingPoints: string[];
|
|
91
|
+
keyInsights: string[];
|
|
92
|
+
confidenceScore: number; // 0-1
|
|
93
|
+
};
|
|
94
|
+
knowledgeItems: Array<{
|
|
95
|
+
title: string;
|
|
96
|
+
description: string;
|
|
97
|
+
content: string;
|
|
98
|
+
category: string;
|
|
99
|
+
source: string; // URL of source page
|
|
100
|
+
}>;
|
|
101
|
+
pagesScraped: number;
|
|
102
|
+
sitemapUrls: number;
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
#### Asynchronous
|
|
107
|
+
|
|
108
|
+
For long-running extractions, use `runAsync` to get a job ID and poll for the result:
|
|
109
|
+
|
|
110
|
+
```typescript
|
|
111
|
+
const { jobId, status } = await client.extract.runAsync('https://acme.com', {
|
|
112
|
+
maxPages: 50,
|
|
113
|
+
callbackUrl: 'https://your-server.com/webhooks/knowledgesdk',
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
// Poll until complete
|
|
117
|
+
const job = await client.jobs.poll(jobId, {
|
|
118
|
+
intervalMs: 3000, // check every 3 seconds
|
|
119
|
+
timeoutMs: 300000, // give up after 5 minutes
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
if (job.status === 'completed') {
|
|
123
|
+
const result = job.result as ExtractResult;
|
|
124
|
+
console.log(result.business.businessName);
|
|
125
|
+
}
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
### `scrape`
|
|
131
|
+
|
|
132
|
+
Scrape a single page and receive its content as Markdown along with metadata.
|
|
133
|
+
|
|
134
|
+
```typescript
|
|
135
|
+
const page = await client.scrape.run('https://acme.com/pricing');
|
|
136
|
+
|
|
137
|
+
console.log(page.title); // "Pricing — Acme Corp"
|
|
138
|
+
console.log(page.description); // "Simple, transparent pricing..."
|
|
139
|
+
console.log(page.markdown); // Full page content in Markdown
|
|
140
|
+
console.log(page.links); // Array of hrefs found on the page
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**`ScrapeResult` shape:**
|
|
144
|
+
|
|
145
|
+
```typescript
|
|
146
|
+
{
|
|
147
|
+
url: string;
|
|
148
|
+
markdown: string;
|
|
149
|
+
title: string | null;
|
|
150
|
+
description: string | null;
|
|
151
|
+
links: string[];
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
### `classify`
|
|
158
|
+
|
|
159
|
+
Classify a business by analyzing its website. Returns a structured profile without scraping the full site.
|
|
160
|
+
|
|
161
|
+
```typescript
|
|
162
|
+
const classification = await client.classify.run('https://acme.com');
|
|
163
|
+
|
|
164
|
+
console.log(classification.businessName); // "Acme Corp"
|
|
165
|
+
console.log(classification.businessType); // "B2B Software"
|
|
166
|
+
console.log(classification.industrySector); // "Project Management"
|
|
167
|
+
console.log(classification.targetAudience); // "SMBs and mid-market teams"
|
|
168
|
+
console.log(classification.valueProposition); // "Simplify team collaboration"
|
|
169
|
+
console.log(classification.painPoints); // ["Too many tools", "Poor visibility"]
|
|
170
|
+
console.log(classification.uniqueSellingPoints); // ["Real-time sync", "One-click reporting"]
|
|
171
|
+
console.log(classification.confidenceScore); // 0.89
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
### `screenshot`
|
|
177
|
+
|
|
178
|
+
Capture a full-page screenshot of any URL. Returns a base64-encoded PNG.
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
const { url, screenshot } = await client.screenshot.run('https://acme.com');
|
|
182
|
+
|
|
183
|
+
// Write to disk
|
|
184
|
+
import { writeFileSync } from 'fs';
|
|
185
|
+
const buffer = Buffer.from(screenshot, 'base64');
|
|
186
|
+
writeFileSync('screenshot.png', buffer);
|
|
187
|
+
|
|
188
|
+
// Or use inline in HTML
|
|
189
|
+
const dataUrl = `data:image/png;base64,${screenshot}`;
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
### `sitemap`
|
|
195
|
+
|
|
196
|
+
Discover all publicly accessible URLs for a website via its sitemap or by crawling.
|
|
197
|
+
|
|
198
|
+
```typescript
|
|
199
|
+
const { url, urls, count } = await client.sitemap.run('https://acme.com');
|
|
200
|
+
|
|
201
|
+
console.log(`Found ${count} URLs`);
|
|
202
|
+
urls.forEach((u) => console.log(u));
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
**`SitemapResult` shape:**
|
|
206
|
+
|
|
207
|
+
```typescript
|
|
208
|
+
{
|
|
209
|
+
url: string;
|
|
210
|
+
urls: string[];
|
|
211
|
+
count: number;
|
|
212
|
+
}
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
217
|
+
### `search`
|
|
218
|
+
|
|
219
|
+
Perform a semantic search across your indexed knowledge items.
|
|
220
|
+
|
|
221
|
+
```typescript
|
|
222
|
+
const results = await client.search.run('how do I cancel my subscription', {
|
|
223
|
+
limit: 10,
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
console.log(`${results.total} results for "${results.query}"`);
|
|
227
|
+
|
|
228
|
+
results.hits.forEach((hit) => {
|
|
229
|
+
console.log(`[score: ${hit.score.toFixed(2)}] ${hit.title}`);
|
|
230
|
+
console.log(`Category: ${hit.category} | Source: ${hit.source}`);
|
|
231
|
+
console.log(hit.content);
|
|
232
|
+
});
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
**`SearchResult` shape:**
|
|
236
|
+
|
|
237
|
+
```typescript
|
|
238
|
+
{
|
|
239
|
+
hits: Array<{
|
|
240
|
+
id: string;
|
|
241
|
+
title: string;
|
|
242
|
+
content: string;
|
|
243
|
+
category: string;
|
|
244
|
+
source: string;
|
|
245
|
+
score: number; // relevance score, higher is better
|
|
246
|
+
}>;
|
|
247
|
+
total: number;
|
|
248
|
+
query: string;
|
|
249
|
+
}
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
### `webhooks`
|
|
255
|
+
|
|
256
|
+
Register webhook endpoints to receive real-time event notifications.
|
|
257
|
+
|
|
258
|
+
#### Create a webhook
|
|
259
|
+
|
|
260
|
+
```typescript
|
|
261
|
+
const webhook = await client.webhooks.create({
|
|
262
|
+
url: 'https://your-server.com/webhooks/knowledgesdk',
|
|
263
|
+
events: ['extract.completed', 'extract.failed'],
|
|
264
|
+
displayName: 'My Production Webhook',
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
console.log(webhook.id); // "wh_abc123"
|
|
268
|
+
console.log(webhook.status); // "active"
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
#### List webhooks
|
|
272
|
+
|
|
273
|
+
```typescript
|
|
274
|
+
const webhooks = await client.webhooks.list();
|
|
275
|
+
webhooks.forEach((wh) => {
|
|
276
|
+
console.log(`${wh.id}: ${wh.url} — ${wh.status}`);
|
|
277
|
+
});
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
#### Delete a webhook
|
|
281
|
+
|
|
282
|
+
```typescript
|
|
283
|
+
await client.webhooks.delete('wh_abc123');
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
#### Test a webhook
|
|
287
|
+
|
|
288
|
+
Send a test payload to verify your endpoint is reachable:
|
|
289
|
+
|
|
290
|
+
```typescript
|
|
291
|
+
await client.webhooks.test('wh_abc123');
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
### `jobs`
|
|
297
|
+
|
|
298
|
+
Retrieve or poll the result of an asynchronous job.
|
|
299
|
+
|
|
300
|
+
#### Get a job by ID
|
|
301
|
+
|
|
302
|
+
```typescript
|
|
303
|
+
const job = await client.jobs.get('job_abc123');
|
|
304
|
+
|
|
305
|
+
console.log(job.status); // 'pending' | 'processing' | 'completed' | 'failed'
|
|
306
|
+
console.log(job.createdAt);
|
|
307
|
+
console.log(job.completedAt);
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
#### Poll until complete
|
|
311
|
+
|
|
312
|
+
```typescript
|
|
313
|
+
import { TimeoutError } from '@knowledgesdk/node';
|
|
314
|
+
|
|
315
|
+
try {
|
|
316
|
+
const job = await client.jobs.poll('job_abc123', {
|
|
317
|
+
intervalMs: 2000, // poll every 2 seconds (default)
|
|
318
|
+
timeoutMs: 120000, // give up after 2 minutes (default)
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
if (job.status === 'completed') {
|
|
322
|
+
console.log('Job completed:', job.result);
|
|
323
|
+
} else {
|
|
324
|
+
console.error('Job failed:', job.error);
|
|
325
|
+
}
|
|
326
|
+
} catch (err) {
|
|
327
|
+
if (err instanceof TimeoutError) {
|
|
328
|
+
console.error('Job timed out — try polling again later');
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
---
|
|
334
|
+
|
|
335
|
+
## Error Handling
|
|
336
|
+
|
|
337
|
+
All errors extend `KnowledgeSDKError` and carry structured metadata.
|
|
338
|
+
|
|
339
|
+
```typescript
|
|
340
|
+
import {
|
|
341
|
+
KnowledgeSDK,
|
|
342
|
+
KnowledgeSDKError,
|
|
343
|
+
APIError,
|
|
344
|
+
AuthenticationError,
|
|
345
|
+
NetworkError,
|
|
346
|
+
RateLimitError,
|
|
347
|
+
TimeoutError,
|
|
348
|
+
} from '@knowledgesdk/node';
|
|
349
|
+
|
|
350
|
+
const client = new KnowledgeSDK('sk_ks_your_api_key');
|
|
351
|
+
|
|
352
|
+
try {
|
|
353
|
+
const result = await client.extract.run('https://acme.com');
|
|
354
|
+
} catch (err) {
|
|
355
|
+
if (err instanceof AuthenticationError) {
|
|
356
|
+
console.error('Invalid API key:', err.message);
|
|
357
|
+
} else if (err instanceof RateLimitError) {
|
|
358
|
+
console.error('Rate limit hit. Retry after:', err.retryAfter);
|
|
359
|
+
} else if (err instanceof TimeoutError) {
|
|
360
|
+
console.error('Request timed out:', err.message);
|
|
361
|
+
} else if (err instanceof NetworkError) {
|
|
362
|
+
console.error('Network error:', err.message);
|
|
363
|
+
} else if (err instanceof APIError) {
|
|
364
|
+
console.error(`API error ${err.statusCode}:`, err.message);
|
|
365
|
+
console.error('Error code:', err.code);
|
|
366
|
+
console.error('Request ID:', err.requestId);
|
|
367
|
+
} else if (err instanceof KnowledgeSDKError) {
|
|
368
|
+
console.error('KnowledgeSDK error:', err.message);
|
|
369
|
+
} else {
|
|
370
|
+
throw err;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
### Error Classes
|
|
376
|
+
|
|
377
|
+
| Class | Description |
|
|
378
|
+
|---|---|
|
|
379
|
+
| `KnowledgeSDKError` | Base class for all SDK errors |
|
|
380
|
+
| `APIError` | 4xx/5xx responses from the API |
|
|
381
|
+
| `AuthenticationError` | Missing or invalid API key (401) |
|
|
382
|
+
| `NetworkError` | Network connectivity issues |
|
|
383
|
+
| `RateLimitError` | API rate limit exceeded (429) |
|
|
384
|
+
| `TimeoutError` | Request or job polling timed out |
|
|
385
|
+
|
|
386
|
+
All errors expose:
|
|
387
|
+
- `message` — human-readable description
|
|
388
|
+
- `statusCode` — HTTP status code (where applicable)
|
|
389
|
+
- `code` — machine-readable error code
|
|
390
|
+
- `requestId` — request ID from the API (for support)
|
|
391
|
+
- `data` — raw response body (where available)
|
|
392
|
+
|
|
393
|
+
---
|
|
394
|
+
|
|
395
|
+
## Debug Mode
|
|
396
|
+
|
|
397
|
+
Enable request/response logging for development:
|
|
398
|
+
|
|
399
|
+
```typescript
|
|
400
|
+
const client = new KnowledgeSDK('sk_ks_your_api_key');
|
|
401
|
+
client.setDebugMode(true);
|
|
402
|
+
|
|
403
|
+
// All requests and responses will now be printed to the console
|
|
404
|
+
const result = await client.scrape.run('https://acme.com');
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
---
|
|
408
|
+
|
|
409
|
+
## Advanced Usage
|
|
410
|
+
|
|
411
|
+
### Custom headers
|
|
412
|
+
|
|
413
|
+
```typescript
|
|
414
|
+
client.setHeaders({
|
|
415
|
+
'x-custom-header': 'my-value',
|
|
416
|
+
'x-trace-id': requestId,
|
|
417
|
+
});
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
### Retry configuration
|
|
421
|
+
|
|
422
|
+
By default the SDK retries up to 3 times on rate-limit (429) and server errors (5xx) using exponential backoff with jitter. Configure at construction:
|
|
423
|
+
|
|
424
|
+
```typescript
|
|
425
|
+
const client = new KnowledgeSDK('sk_ks_your_api_key', {
|
|
426
|
+
maxRetries: 5,
|
|
427
|
+
timeout: 60000, // 60 seconds
|
|
428
|
+
});
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
### Full async extraction workflow
|
|
432
|
+
|
|
433
|
+
```typescript
|
|
434
|
+
import { KnowledgeSDK, ExtractResult, TimeoutError } from '@knowledgesdk/node';
|
|
435
|
+
|
|
436
|
+
const client = new KnowledgeSDK(process.env.KNOWLEDGE_SDK_API_KEY!);
|
|
437
|
+
|
|
438
|
+
async function extractWebsite(url: string): Promise<ExtractResult> {
|
|
439
|
+
// Kick off async job
|
|
440
|
+
const { jobId } = await client.extract.runAsync(url, {
|
|
441
|
+
maxPages: 100,
|
|
442
|
+
callbackUrl: 'https://your-server.com/webhooks/knowledgesdk',
|
|
443
|
+
});
|
|
444
|
+
|
|
445
|
+
console.log(`Started job ${jobId}, polling for result...`);
|
|
446
|
+
|
|
447
|
+
// Poll until complete (up to 10 minutes)
|
|
448
|
+
const job = await client.jobs.poll(jobId, {
|
|
449
|
+
intervalMs: 5000,
|
|
450
|
+
timeoutMs: 600000,
|
|
451
|
+
});
|
|
452
|
+
|
|
453
|
+
if (job.status === 'failed') {
|
|
454
|
+
throw new Error(`Extraction failed: ${job.error}`);
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
return job.result as ExtractResult;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
const result = await extractWebsite('https://large-site.com');
|
|
461
|
+
console.log(`Extracted ${result.knowledgeItems.length} knowledge items`);
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
---
|
|
465
|
+
|
|
466
|
+
## TypeScript
|
|
467
|
+
|
|
468
|
+
The SDK is written in TypeScript and exports all types. No additional `@types` package is required.
|
|
469
|
+
|
|
470
|
+
```typescript
|
|
471
|
+
import type {
|
|
472
|
+
ExtractResult,
|
|
473
|
+
ExtractOptions,
|
|
474
|
+
ExtractAsyncResult,
|
|
475
|
+
KnowledgeItem,
|
|
476
|
+
BusinessProfile,
|
|
477
|
+
ScrapeResult,
|
|
478
|
+
BusinessClassification,
|
|
479
|
+
ScreenshotResult,
|
|
480
|
+
SitemapResult,
|
|
481
|
+
SearchResult,
|
|
482
|
+
SearchHit,
|
|
483
|
+
WebhookFull,
|
|
484
|
+
WebhookCreateOptions,
|
|
485
|
+
JobResult,
|
|
486
|
+
JobStatus,
|
|
487
|
+
KnowledgeSDKOptions,
|
|
488
|
+
} from '@knowledgesdk/node';
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
---
|
|
492
|
+
|
|
493
|
+
## License
|
|
494
|
+
|
|
495
|
+
MIT
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { HttpClient } from '../utils/http-client';
|
|
2
|
+
export interface BusinessClassification {
|
|
3
|
+
businessName: string;
|
|
4
|
+
businessType: string;
|
|
5
|
+
industrySector: string;
|
|
6
|
+
targetAudience: string;
|
|
7
|
+
description: string;
|
|
8
|
+
valueProposition: string;
|
|
9
|
+
painPoints: string[];
|
|
10
|
+
uniqueSellingPoints: string[];
|
|
11
|
+
keyInsights: string[];
|
|
12
|
+
confidenceScore: number;
|
|
13
|
+
}
|
|
14
|
+
export declare class Classify {
|
|
15
|
+
private httpClient;
|
|
16
|
+
constructor(httpClient: HttpClient);
|
|
17
|
+
/**
|
|
18
|
+
* Classify a business by analyzing its website URL.
|
|
19
|
+
* Returns a structured business profile with industry, target audience, and key insights.
|
|
20
|
+
* @param url The URL of the business website to classify
|
|
21
|
+
* @returns A structured business classification
|
|
22
|
+
*/
|
|
23
|
+
run(url: string): Promise<BusinessClassification>;
|
|
24
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.Classify = void 0;
|
|
4
|
+
class Classify {
|
|
5
|
+
constructor(httpClient) {
|
|
6
|
+
this.httpClient = httpClient;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Classify a business by analyzing its website URL.
|
|
10
|
+
* Returns a structured business profile with industry, target audience, and key insights.
|
|
11
|
+
* @param url The URL of the business website to classify
|
|
12
|
+
* @returns A structured business classification
|
|
13
|
+
*/
|
|
14
|
+
async run(url) {
|
|
15
|
+
return this.httpClient.post('/classify', { url });
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
exports.Classify = Classify;
|
|
19
|
+
//# sourceMappingURL=classify.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"classify.js","sourceRoot":"","sources":["../../src/api/classify.ts"],"names":[],"mappings":";;;AAeA,MAAa,QAAQ;IAGnB,YAAY,UAAsB;QAChC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,GAAG,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAyB,WAAW,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;IAC5E,CAAC;CACF;AAhBD,4BAgBC"}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { HttpClient } from '../utils/http-client';
|
|
2
|
+
export interface KnowledgeItem {
|
|
3
|
+
title: string;
|
|
4
|
+
description: string;
|
|
5
|
+
content: string;
|
|
6
|
+
category: string;
|
|
7
|
+
source: string;
|
|
8
|
+
}
|
|
9
|
+
export interface BusinessProfile {
|
|
10
|
+
businessName: string;
|
|
11
|
+
businessType: string;
|
|
12
|
+
industrySector: string;
|
|
13
|
+
targetAudience: string;
|
|
14
|
+
description: string;
|
|
15
|
+
valueProposition: string;
|
|
16
|
+
painPoints: string[];
|
|
17
|
+
uniqueSellingPoints: string[];
|
|
18
|
+
keyInsights: string[];
|
|
19
|
+
confidenceScore: number;
|
|
20
|
+
}
|
|
21
|
+
export interface ExtractResult {
|
|
22
|
+
business: BusinessProfile;
|
|
23
|
+
knowledgeItems: KnowledgeItem[];
|
|
24
|
+
pagesScraped: number;
|
|
25
|
+
sitemapUrls: number;
|
|
26
|
+
}
|
|
27
|
+
export interface ExtractOptions {
|
|
28
|
+
maxPages?: number;
|
|
29
|
+
}
|
|
30
|
+
export interface ExtractStreamOptions {
|
|
31
|
+
maxPages?: number;
|
|
32
|
+
}
|
|
33
|
+
export type ExtractStreamEvent = {
|
|
34
|
+
type: 'connected';
|
|
35
|
+
message: string;
|
|
36
|
+
} | {
|
|
37
|
+
type: 'progress';
|
|
38
|
+
message: string;
|
|
39
|
+
} | {
|
|
40
|
+
type: 'business_classified';
|
|
41
|
+
business: {
|
|
42
|
+
businessName: string;
|
|
43
|
+
businessType: string;
|
|
44
|
+
industry: string;
|
|
45
|
+
description: string;
|
|
46
|
+
};
|
|
47
|
+
} | {
|
|
48
|
+
type: 'pages_planned';
|
|
49
|
+
pages: Array<{
|
|
50
|
+
url: string;
|
|
51
|
+
purpose: string;
|
|
52
|
+
}>;
|
|
53
|
+
} | {
|
|
54
|
+
type: 'page_scraped';
|
|
55
|
+
url: string;
|
|
56
|
+
index: number;
|
|
57
|
+
total: number;
|
|
58
|
+
status: 'done' | 'failed';
|
|
59
|
+
} | {
|
|
60
|
+
type: 'urls_triaged';
|
|
61
|
+
suggestedUrls: Array<{
|
|
62
|
+
url: string;
|
|
63
|
+
reason: string;
|
|
64
|
+
}>;
|
|
65
|
+
} | {
|
|
66
|
+
type: 'complete';
|
|
67
|
+
result: ExtractResult;
|
|
68
|
+
} | {
|
|
69
|
+
type: 'error';
|
|
70
|
+
message: string;
|
|
71
|
+
};
|
|
72
|
+
export interface ExtractAsyncOptions {
|
|
73
|
+
maxPages?: number;
|
|
74
|
+
callbackUrl?: string;
|
|
75
|
+
}
|
|
76
|
+
export interface ExtractAsyncResult {
|
|
77
|
+
jobId: string;
|
|
78
|
+
status: string;
|
|
79
|
+
}
|
|
80
|
+
export declare class Extract {
|
|
81
|
+
private httpClient;
|
|
82
|
+
constructor(httpClient: HttpClient);
|
|
83
|
+
/**
|
|
84
|
+
* Run a synchronous extraction pipeline against a URL.
|
|
85
|
+
* Scrapes the site, classifies the business, and returns structured knowledge items.
|
|
86
|
+
* @param url The URL to extract knowledge from
|
|
87
|
+
* @param options Optional extraction options
|
|
88
|
+
* @returns The full extraction result including business profile and knowledge items
|
|
89
|
+
*/
|
|
90
|
+
run(url: string, options?: ExtractOptions): Promise<ExtractResult>;
|
|
91
|
+
/**
|
|
92
|
+
* Start an asynchronous extraction pipeline and return a job ID.
|
|
93
|
+
* Use jobs.poll() or jobs.get() to retrieve the result when complete.
|
|
94
|
+
* @param url The URL to extract knowledge from
|
|
95
|
+
* @param options Optional async extraction options including a callbackUrl
|
|
96
|
+
* @returns The job ID and initial status
|
|
97
|
+
*/
|
|
98
|
+
runAsync(url: string, options?: ExtractAsyncOptions): Promise<ExtractAsyncResult>;
|
|
99
|
+
/**
|
|
100
|
+
* Stream extraction progress as server-sent events.
|
|
101
|
+
* Yields typed events as the pipeline runs: classification, page discovery,
|
|
102
|
+
* per-page scraping, and the final complete result.
|
|
103
|
+
* Requires Node.js 18+ (native fetch).
|
|
104
|
+
*
|
|
105
|
+
* @example
|
|
106
|
+
* ```typescript
|
|
107
|
+
* for await (const event of client.extract.runStream('https://stripe.com')) {
|
|
108
|
+
* if (event.type === 'page_scraped') {
|
|
109
|
+
* console.log(`Scraped ${event.index + 1}/${event.total}: ${event.url}`);
|
|
110
|
+
* }
|
|
111
|
+
* if (event.type === 'complete') {
|
|
112
|
+
* console.log(event.result.knowledgeItems);
|
|
113
|
+
* }
|
|
114
|
+
* }
|
|
115
|
+
* ```
|
|
116
|
+
*/
|
|
117
|
+
runStream(url: string, options?: ExtractStreamOptions): AsyncGenerator<ExtractStreamEvent>;
|
|
118
|
+
}
|