@knowledgesdk/node 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +495 -0
  3. package/dist/api/classify.d.ts +24 -0
  4. package/dist/api/classify.js +19 -0
  5. package/dist/api/classify.js.map +1 -0
  6. package/dist/api/extract.d.ts +118 -0
  7. package/dist/api/extract.js +60 -0
  8. package/dist/api/extract.js.map +1 -0
  9. package/dist/api/jobs.d.ts +35 -0
  10. package/dist/api/jobs.js +43 -0
  11. package/dist/api/jobs.js.map +1 -0
  12. package/dist/api/scrape.d.ts +18 -0
  13. package/dist/api/scrape.js +18 -0
  14. package/dist/api/scrape.js.map +1 -0
  15. package/dist/api/screenshot.d.ts +15 -0
  16. package/dist/api/screenshot.js +18 -0
  17. package/dist/api/screenshot.js.map +1 -0
  18. package/dist/api/search.d.ts +29 -0
  19. package/dist/api/search.js +22 -0
  20. package/dist/api/search.js.map +1 -0
  21. package/dist/api/sitemap.d.ts +17 -0
  22. package/dist/api/sitemap.js +19 -0
  23. package/dist/api/sitemap.js.map +1 -0
  24. package/dist/api/webhooks.d.ts +40 -0
  25. package/dist/api/webhooks.js +39 -0
  26. package/dist/api/webhooks.js.map +1 -0
  27. package/dist/constants.d.ts +5 -0
  28. package/dist/constants.js +9 -0
  29. package/dist/constants.js.map +1 -0
  30. package/dist/errors.d.ts +32 -0
  31. package/dist/errors.js +52 -0
  32. package/dist/errors.js.map +1 -0
  33. package/dist/esm/api/classify.d.ts +24 -0
  34. package/dist/esm/api/classify.js +15 -0
  35. package/dist/esm/api/classify.js.map +1 -0
  36. package/dist/esm/api/extract.d.ts +118 -0
  37. package/dist/esm/api/extract.js +56 -0
  38. package/dist/esm/api/extract.js.map +1 -0
  39. package/dist/esm/api/jobs.d.ts +35 -0
  40. package/dist/esm/api/jobs.js +39 -0
  41. package/dist/esm/api/jobs.js.map +1 -0
  42. package/dist/esm/api/scrape.d.ts +18 -0
  43. package/dist/esm/api/scrape.js +14 -0
  44. package/dist/esm/api/scrape.js.map +1 -0
  45. package/dist/esm/api/screenshot.d.ts +15 -0
  46. package/dist/esm/api/screenshot.js +14 -0
  47. package/dist/esm/api/screenshot.js.map +1 -0
  48. package/dist/esm/api/search.d.ts +29 -0
  49. package/dist/esm/api/search.js +18 -0
  50. package/dist/esm/api/search.js.map +1 -0
  51. package/dist/esm/api/sitemap.d.ts +17 -0
  52. package/dist/esm/api/sitemap.js +15 -0
  53. package/dist/esm/api/sitemap.js.map +1 -0
  54. package/dist/esm/api/webhooks.d.ts +40 -0
  55. package/dist/esm/api/webhooks.js +35 -0
  56. package/dist/esm/api/webhooks.js.map +1 -0
  57. package/dist/esm/constants.d.ts +5 -0
  58. package/dist/esm/constants.js +6 -0
  59. package/dist/esm/constants.js.map +1 -0
  60. package/dist/esm/errors.d.ts +32 -0
  61. package/dist/esm/errors.js +43 -0
  62. package/dist/esm/errors.js.map +1 -0
  63. package/dist/esm/index.d.ts +100 -0
  64. package/dist/esm/index.js +91 -0
  65. package/dist/esm/index.js.map +1 -0
  66. package/dist/esm/utils/http-client.d.ts +62 -0
  67. package/dist/esm/utils/http-client.js +354 -0
  68. package/dist/esm/utils/http-client.js.map +1 -0
  69. package/dist/index.d.ts +100 -0
  70. package/dist/index.js +102 -0
  71. package/dist/index.js.map +1 -0
  72. package/dist/index.mjs +2 -0
  73. package/dist/index.mjs.map +1 -0
  74. package/dist/utils/http-client.d.ts +62 -0
  75. package/dist/utils/http-client.js +361 -0
  76. package/dist/utils/http-client.js.map +1 -0
  77. package/package.json +93 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 KnowledgeSDK
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,495 @@
1
+ # @knowledgesdk/node
2
+
3
+ Official Node.js SDK for the [KnowledgeSDK](https://knowledgesdk.com) API. Extract structured knowledge from any website — business profiles, content, screenshots, sitemaps, and more.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @knowledgesdk/node
9
+ ```
10
+
11
+ ```bash
12
+ yarn add @knowledgesdk/node
13
+ ```
14
+
15
+ ## Quick Start
16
+
17
+ ```typescript
18
+ import { KnowledgeSDK } from '@knowledgesdk/node';
19
+
20
+ const client = new KnowledgeSDK('sk_ks_your_api_key');
21
+
22
+ // Run the full extraction pipeline on a website
23
+ const result = await client.extract.run('https://example.com');
24
+ console.log(result.business.businessName);
25
+ console.log(result.knowledgeItems);
26
+ ```
27
+
28
+ ## Authentication
29
+
30
+ All API calls require an API key. Keys are prefixed with `sk_ks_`. Pass your key to the constructor:
31
+
32
+ ```typescript
33
+ const client = new KnowledgeSDK('sk_ks_your_api_key');
34
+ ```
35
+
36
+ You can also set `KNOWLEDGE_SDK_API_KEY` as an environment variable and pass it explicitly:
37
+
38
+ ```typescript
39
+ const client = new KnowledgeSDK(process.env.KNOWLEDGE_SDK_API_KEY!);
40
+ ```
41
+
42
+ ## Configuration
43
+
44
+ ```typescript
45
+ const client = new KnowledgeSDK('sk_ks_your_api_key', {
46
+ baseUrl: 'https://api.knowledgesdk.com', // default
47
+ maxRetries: 3, // default — retries on 429 and 5xx
48
+ timeout: 30000, // default — 30 seconds
49
+ });
50
+ ```
51
+
52
+ ## Resources
53
+
54
+ ### `extract`
55
+
56
+ Run the full pipeline against a URL: scrape, classify, and return structured knowledge items.
57
+
58
+ #### Synchronous
59
+
60
+ ```typescript
61
+ const result = await client.extract.run('https://acme.com', {
62
+ maxPages: 20,
63
+ });
64
+
65
+ console.log(result.business.businessName); // "Acme Corp"
66
+ console.log(result.business.industrySector); // "SaaS"
67
+ console.log(result.business.confidenceScore); // 0.92
68
+ console.log(result.pagesScraped); // 18
69
+ console.log(result.sitemapUrls); // 54
70
+ console.log(result.knowledgeItems.length); // 12
71
+
72
+ result.knowledgeItems.forEach((item) => {
73
+ console.log(`[${item.category}] ${item.title}`);
74
+ console.log(item.content);
75
+ });
76
+ ```
77
+
78
+ **`ExtractResult` shape:**
79
+
80
+ ```typescript
81
+ {
82
+ business: {
83
+ businessName: string;
84
+ businessType: string;
85
+ industrySector: string;
86
+ targetAudience: string;
87
+ description: string;
88
+ valueProposition: string;
89
+ painPoints: string[];
90
+ uniqueSellingPoints: string[];
91
+ keyInsights: string[];
92
+ confidenceScore: number; // 0-1
93
+ };
94
+ knowledgeItems: Array<{
95
+ title: string;
96
+ description: string;
97
+ content: string;
98
+ category: string;
99
+ source: string; // URL of source page
100
+ }>;
101
+ pagesScraped: number;
102
+ sitemapUrls: number;
103
+ }
104
+ ```
105
+
106
+ #### Asynchronous
107
+
108
+ For long-running extractions, use `runAsync` to get a job ID and poll for the result:
109
+
110
+ ```typescript
111
+ const { jobId, status } = await client.extract.runAsync('https://acme.com', {
112
+ maxPages: 50,
113
+ callbackUrl: 'https://your-server.com/webhooks/knowledgesdk',
114
+ });
115
+
116
+ // Poll until complete
117
+ const job = await client.jobs.poll(jobId, {
118
+ intervalMs: 3000, // check every 3 seconds
119
+ timeoutMs: 300000, // give up after 5 minutes
120
+ });
121
+
122
+ if (job.status === 'completed') {
123
+ const result = job.result as ExtractResult;
124
+ console.log(result.business.businessName);
125
+ }
126
+ ```
127
+
128
+ ---
129
+
130
+ ### `scrape`
131
+
132
+ Scrape a single page and receive its content as Markdown along with metadata.
133
+
134
+ ```typescript
135
+ const page = await client.scrape.run('https://acme.com/pricing');
136
+
137
+ console.log(page.title); // "Pricing — Acme Corp"
138
+ console.log(page.description); // "Simple, transparent pricing..."
139
+ console.log(page.markdown); // Full page content in Markdown
140
+ console.log(page.links); // Array of hrefs found on the page
141
+ ```
142
+
143
+ **`ScrapeResult` shape:**
144
+
145
+ ```typescript
146
+ {
147
+ url: string;
148
+ markdown: string;
149
+ title: string | null;
150
+ description: string | null;
151
+ links: string[];
152
+ }
153
+ ```
154
+
155
+ ---
156
+
157
+ ### `classify`
158
+
159
+ Classify a business by analyzing its website. Returns a structured profile without scraping the full site.
160
+
161
+ ```typescript
162
+ const classification = await client.classify.run('https://acme.com');
163
+
164
+ console.log(classification.businessName); // "Acme Corp"
165
+ console.log(classification.businessType); // "B2B Software"
166
+ console.log(classification.industrySector); // "Project Management"
167
+ console.log(classification.targetAudience); // "SMBs and mid-market teams"
168
+ console.log(classification.valueProposition); // "Simplify team collaboration"
169
+ console.log(classification.painPoints); // ["Too many tools", "Poor visibility"]
170
+ console.log(classification.uniqueSellingPoints); // ["Real-time sync", "One-click reporting"]
171
+ console.log(classification.confidenceScore); // 0.89
172
+ ```
173
+
174
+ ---
175
+
176
+ ### `screenshot`
177
+
178
+ Capture a full-page screenshot of any URL. Returns a base64-encoded PNG.
179
+
180
+ ```typescript
181
+ const { url, screenshot } = await client.screenshot.run('https://acme.com');
182
+
183
+ // Write to disk
184
+ import { writeFileSync } from 'fs';
185
+ const buffer = Buffer.from(screenshot, 'base64');
186
+ writeFileSync('screenshot.png', buffer);
187
+
188
+ // Or use inline in HTML
189
+ const dataUrl = `data:image/png;base64,${screenshot}`;
190
+ ```
191
+
192
+ ---
193
+
194
+ ### `sitemap`
195
+
196
+ Discover all publicly accessible URLs for a website via its sitemap or by crawling.
197
+
198
+ ```typescript
199
+ const { url, urls, count } = await client.sitemap.run('https://acme.com');
200
+
201
+ console.log(`Found ${count} URLs`);
202
+ urls.forEach((u) => console.log(u));
203
+ ```
204
+
205
+ **`SitemapResult` shape:**
206
+
207
+ ```typescript
208
+ {
209
+ url: string;
210
+ urls: string[];
211
+ count: number;
212
+ }
213
+ ```
214
+
215
+ ---
216
+
217
+ ### `search`
218
+
219
+ Perform a semantic search across your indexed knowledge items.
220
+
221
+ ```typescript
222
+ const results = await client.search.run('how do I cancel my subscription', {
223
+ limit: 10,
224
+ });
225
+
226
+ console.log(`${results.total} results for "${results.query}"`);
227
+
228
+ results.hits.forEach((hit) => {
229
+ console.log(`[score: ${hit.score.toFixed(2)}] ${hit.title}`);
230
+ console.log(`Category: ${hit.category} | Source: ${hit.source}`);
231
+ console.log(hit.content);
232
+ });
233
+ ```
234
+
235
+ **`SearchResult` shape:**
236
+
237
+ ```typescript
238
+ {
239
+ hits: Array<{
240
+ id: string;
241
+ title: string;
242
+ content: string;
243
+ category: string;
244
+ source: string;
245
+ score: number; // relevance score, higher is better
246
+ }>;
247
+ total: number;
248
+ query: string;
249
+ }
250
+ ```
251
+
252
+ ---
253
+
254
+ ### `webhooks`
255
+
256
+ Register webhook endpoints to receive real-time event notifications.
257
+
258
+ #### Create a webhook
259
+
260
+ ```typescript
261
+ const webhook = await client.webhooks.create({
262
+ url: 'https://your-server.com/webhooks/knowledgesdk',
263
+ events: ['extract.completed', 'extract.failed'],
264
+ displayName: 'My Production Webhook',
265
+ });
266
+
267
+ console.log(webhook.id); // "wh_abc123"
268
+ console.log(webhook.status); // "active"
269
+ ```
270
+
271
+ #### List webhooks
272
+
273
+ ```typescript
274
+ const webhooks = await client.webhooks.list();
275
+ webhooks.forEach((wh) => {
276
+ console.log(`${wh.id}: ${wh.url} — ${wh.status}`);
277
+ });
278
+ ```
279
+
280
+ #### Delete a webhook
281
+
282
+ ```typescript
283
+ await client.webhooks.delete('wh_abc123');
284
+ ```
285
+
286
+ #### Test a webhook
287
+
288
+ Send a test payload to verify your endpoint is reachable:
289
+
290
+ ```typescript
291
+ await client.webhooks.test('wh_abc123');
292
+ ```
293
+
294
+ ---
295
+
296
+ ### `jobs`
297
+
298
+ Retrieve or poll the result of an asynchronous job.
299
+
300
+ #### Get a job by ID
301
+
302
+ ```typescript
303
+ const job = await client.jobs.get('job_abc123');
304
+
305
+ console.log(job.status); // 'pending' | 'processing' | 'completed' | 'failed'
306
+ console.log(job.createdAt);
307
+ console.log(job.completedAt);
308
+ ```
309
+
310
+ #### Poll until complete
311
+
312
+ ```typescript
313
+ import { TimeoutError } from '@knowledgesdk/node';
314
+
315
+ try {
316
+ const job = await client.jobs.poll('job_abc123', {
317
+ intervalMs: 2000, // poll every 2 seconds (default)
318
+ timeoutMs: 120000, // give up after 2 minutes (default)
319
+ });
320
+
321
+ if (job.status === 'completed') {
322
+ console.log('Job completed:', job.result);
323
+ } else {
324
+ console.error('Job failed:', job.error);
325
+ }
326
+ } catch (err) {
327
+ if (err instanceof TimeoutError) {
328
+ console.error('Job timed out — try polling again later');
329
+ }
330
+ }
331
+ ```
332
+
333
+ ---
334
+
335
+ ## Error Handling
336
+
337
+ All errors extend `KnowledgeSDKError` and carry structured metadata.
338
+
339
+ ```typescript
340
+ import {
341
+ KnowledgeSDK,
342
+ KnowledgeSDKError,
343
+ APIError,
344
+ AuthenticationError,
345
+ NetworkError,
346
+ RateLimitError,
347
+ TimeoutError,
348
+ } from '@knowledgesdk/node';
349
+
350
+ const client = new KnowledgeSDK('sk_ks_your_api_key');
351
+
352
+ try {
353
+ const result = await client.extract.run('https://acme.com');
354
+ } catch (err) {
355
+ if (err instanceof AuthenticationError) {
356
+ console.error('Invalid API key:', err.message);
357
+ } else if (err instanceof RateLimitError) {
358
+ console.error('Rate limit hit. Retry after:', err.retryAfter);
359
+ } else if (err instanceof TimeoutError) {
360
+ console.error('Request timed out:', err.message);
361
+ } else if (err instanceof NetworkError) {
362
+ console.error('Network error:', err.message);
363
+ } else if (err instanceof APIError) {
364
+ console.error(`API error ${err.statusCode}:`, err.message);
365
+ console.error('Error code:', err.code);
366
+ console.error('Request ID:', err.requestId);
367
+ } else if (err instanceof KnowledgeSDKError) {
368
+ console.error('KnowledgeSDK error:', err.message);
369
+ } else {
370
+ throw err;
371
+ }
372
+ }
373
+ ```
374
+
375
+ ### Error Classes
376
+
377
+ | Class | Description |
378
+ |---|---|
379
+ | `KnowledgeSDKError` | Base class for all SDK errors |
380
+ | `APIError` | 4xx/5xx responses from the API |
381
+ | `AuthenticationError` | Missing or invalid API key (401) |
382
+ | `NetworkError` | Network connectivity issues |
383
+ | `RateLimitError` | API rate limit exceeded (429) |
384
+ | `TimeoutError` | Request or job polling timed out |
385
+
386
+ All errors expose:
387
+ - `message` — human-readable description
388
+ - `statusCode` — HTTP status code (where applicable)
389
+ - `code` — machine-readable error code
390
+ - `requestId` — request ID from the API (for support)
391
+ - `data` — raw response body (where available)
392
+
393
+ ---
394
+
395
+ ## Debug Mode
396
+
397
+ Enable request/response logging for development:
398
+
399
+ ```typescript
400
+ const client = new KnowledgeSDK('sk_ks_your_api_key');
401
+ client.setDebugMode(true);
402
+
403
+ // All requests and responses will now be printed to the console
404
+ const result = await client.scrape.run('https://acme.com');
405
+ ```
406
+
407
+ ---
408
+
409
+ ## Advanced Usage
410
+
411
+ ### Custom headers
412
+
413
+ ```typescript
414
+ client.setHeaders({
415
+ 'x-custom-header': 'my-value',
416
+ 'x-trace-id': requestId,
417
+ });
418
+ ```
419
+
420
+ ### Retry configuration
421
+
422
+ By default the SDK retries up to 3 times on rate-limit (429) and server errors (5xx) using exponential backoff with jitter. Configure at construction:
423
+
424
+ ```typescript
425
+ const client = new KnowledgeSDK('sk_ks_your_api_key', {
426
+ maxRetries: 5,
427
+ timeout: 60000, // 60 seconds
428
+ });
429
+ ```
430
+
431
+ ### Full async extraction workflow
432
+
433
+ ```typescript
434
+ import { KnowledgeSDK, ExtractResult, TimeoutError } from '@knowledgesdk/node';
435
+
436
+ const client = new KnowledgeSDK(process.env.KNOWLEDGE_SDK_API_KEY!);
437
+
438
+ async function extractWebsite(url: string): Promise<ExtractResult> {
439
+ // Kick off async job
440
+ const { jobId } = await client.extract.runAsync(url, {
441
+ maxPages: 100,
442
+ callbackUrl: 'https://your-server.com/webhooks/knowledgesdk',
443
+ });
444
+
445
+ console.log(`Started job ${jobId}, polling for result...`);
446
+
447
+ // Poll until complete (up to 10 minutes)
448
+ const job = await client.jobs.poll(jobId, {
449
+ intervalMs: 5000,
450
+ timeoutMs: 600000,
451
+ });
452
+
453
+ if (job.status === 'failed') {
454
+ throw new Error(`Extraction failed: ${job.error}`);
455
+ }
456
+
457
+ return job.result as ExtractResult;
458
+ }
459
+
460
+ const result = await extractWebsite('https://large-site.com');
461
+ console.log(`Extracted ${result.knowledgeItems.length} knowledge items`);
462
+ ```
463
+
464
+ ---
465
+
466
+ ## TypeScript
467
+
468
+ The SDK is written in TypeScript and exports all types. No additional `@types` package is required.
469
+
470
+ ```typescript
471
+ import type {
472
+ ExtractResult,
473
+ ExtractOptions,
474
+ ExtractAsyncResult,
475
+ KnowledgeItem,
476
+ BusinessProfile,
477
+ ScrapeResult,
478
+ BusinessClassification,
479
+ ScreenshotResult,
480
+ SitemapResult,
481
+ SearchResult,
482
+ SearchHit,
483
+ WebhookFull,
484
+ WebhookCreateOptions,
485
+ JobResult,
486
+ JobStatus,
487
+ KnowledgeSDKOptions,
488
+ } from '@knowledgesdk/node';
489
+ ```
490
+
491
+ ---
492
+
493
+ ## License
494
+
495
+ MIT
@@ -0,0 +1,24 @@
1
+ import { HttpClient } from '../utils/http-client';
2
+ export interface BusinessClassification {
3
+ businessName: string;
4
+ businessType: string;
5
+ industrySector: string;
6
+ targetAudience: string;
7
+ description: string;
8
+ valueProposition: string;
9
+ painPoints: string[];
10
+ uniqueSellingPoints: string[];
11
+ keyInsights: string[];
12
+ confidenceScore: number;
13
+ }
14
+ export declare class Classify {
15
+ private httpClient;
16
+ constructor(httpClient: HttpClient);
17
+ /**
18
+ * Classify a business by analyzing its website URL.
19
+ * Returns a structured business profile with industry, target audience, and key insights.
20
+ * @param url The URL of the business website to classify
21
+ * @returns A structured business classification
22
+ */
23
+ run(url: string): Promise<BusinessClassification>;
24
+ }
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.Classify = void 0;
4
+ class Classify {
5
+ constructor(httpClient) {
6
+ this.httpClient = httpClient;
7
+ }
8
+ /**
9
+ * Classify a business by analyzing its website URL.
10
+ * Returns a structured business profile with industry, target audience, and key insights.
11
+ * @param url The URL of the business website to classify
12
+ * @returns A structured business classification
13
+ */
14
+ async run(url) {
15
+ return this.httpClient.post('/classify', { url });
16
+ }
17
+ }
18
+ exports.Classify = Classify;
19
+ //# sourceMappingURL=classify.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"classify.js","sourceRoot":"","sources":["../../src/api/classify.ts"],"names":[],"mappings":";;;AAeA,MAAa,QAAQ;IAGnB,YAAY,UAAsB;QAChC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,GAAG,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAyB,WAAW,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;IAC5E,CAAC;CACF;AAhBD,4BAgBC"}
@@ -0,0 +1,118 @@
1
+ import { HttpClient } from '../utils/http-client';
2
+ export interface KnowledgeItem {
3
+ title: string;
4
+ description: string;
5
+ content: string;
6
+ category: string;
7
+ source: string;
8
+ }
9
+ export interface BusinessProfile {
10
+ businessName: string;
11
+ businessType: string;
12
+ industrySector: string;
13
+ targetAudience: string;
14
+ description: string;
15
+ valueProposition: string;
16
+ painPoints: string[];
17
+ uniqueSellingPoints: string[];
18
+ keyInsights: string[];
19
+ confidenceScore: number;
20
+ }
21
+ export interface ExtractResult {
22
+ business: BusinessProfile;
23
+ knowledgeItems: KnowledgeItem[];
24
+ pagesScraped: number;
25
+ sitemapUrls: number;
26
+ }
27
+ export interface ExtractOptions {
28
+ maxPages?: number;
29
+ }
30
+ export interface ExtractStreamOptions {
31
+ maxPages?: number;
32
+ }
33
+ export type ExtractStreamEvent = {
34
+ type: 'connected';
35
+ message: string;
36
+ } | {
37
+ type: 'progress';
38
+ message: string;
39
+ } | {
40
+ type: 'business_classified';
41
+ business: {
42
+ businessName: string;
43
+ businessType: string;
44
+ industry: string;
45
+ description: string;
46
+ };
47
+ } | {
48
+ type: 'pages_planned';
49
+ pages: Array<{
50
+ url: string;
51
+ purpose: string;
52
+ }>;
53
+ } | {
54
+ type: 'page_scraped';
55
+ url: string;
56
+ index: number;
57
+ total: number;
58
+ status: 'done' | 'failed';
59
+ } | {
60
+ type: 'urls_triaged';
61
+ suggestedUrls: Array<{
62
+ url: string;
63
+ reason: string;
64
+ }>;
65
+ } | {
66
+ type: 'complete';
67
+ result: ExtractResult;
68
+ } | {
69
+ type: 'error';
70
+ message: string;
71
+ };
72
+ export interface ExtractAsyncOptions {
73
+ maxPages?: number;
74
+ callbackUrl?: string;
75
+ }
76
+ export interface ExtractAsyncResult {
77
+ jobId: string;
78
+ status: string;
79
+ }
80
+ export declare class Extract {
81
+ private httpClient;
82
+ constructor(httpClient: HttpClient);
83
+ /**
84
+ * Run a synchronous extraction pipeline against a URL.
85
+ * Scrapes the site, classifies the business, and returns structured knowledge items.
86
+ * @param url The URL to extract knowledge from
87
+ * @param options Optional extraction options
88
+ * @returns The full extraction result including business profile and knowledge items
89
+ */
90
+ run(url: string, options?: ExtractOptions): Promise<ExtractResult>;
91
+ /**
92
+ * Start an asynchronous extraction pipeline and return a job ID.
93
+ * Use jobs.poll() or jobs.get() to retrieve the result when complete.
94
+ * @param url The URL to extract knowledge from
95
+ * @param options Optional async extraction options including a callbackUrl
96
+ * @returns The job ID and initial status
97
+ */
98
+ runAsync(url: string, options?: ExtractAsyncOptions): Promise<ExtractAsyncResult>;
99
+ /**
100
+ * Stream extraction progress as server-sent events.
101
+ * Yields typed events as the pipeline runs: classification, page discovery,
102
+ * per-page scraping, and the final complete result.
103
+ * Requires Node.js 18+ (native fetch).
104
+ *
105
+ * @example
106
+ * ```typescript
107
+ * for await (const event of client.extract.runStream('https://stripe.com')) {
108
+ * if (event.type === 'page_scraped') {
109
+ * console.log(`Scraped ${event.index + 1}/${event.total}: ${event.url}`);
110
+ * }
111
+ * if (event.type === 'complete') {
112
+ * console.log(event.result.knowledgeItems);
113
+ * }
114
+ * }
115
+ * ```
116
+ */
117
+ runStream(url: string, options?: ExtractStreamOptions): AsyncGenerator<ExtractStreamEvent>;
118
+ }