@dealcrawl/sdk 2.10.0 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +223 -37
- package/dist/index.d.mts +1145 -26
- package/dist/index.d.ts +1145 -26
- package/dist/index.js +631 -9
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +628 -10
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1122,6 +1122,245 @@ var AgentResource = class {
|
|
|
1122
1122
|
}
|
|
1123
1123
|
};
|
|
1124
1124
|
|
|
1125
|
+
// src/resources/auth.ts
|
|
1126
|
+
var AuthResource = class {
|
|
1127
|
+
constructor(ctx) {
|
|
1128
|
+
this.ctx = ctx;
|
|
1129
|
+
}
|
|
1130
|
+
/**
|
|
1131
|
+
* Generate SSE authentication token
|
|
1132
|
+
*
|
|
1133
|
+
* Required for browser-based SSE connections because EventSource API
|
|
1134
|
+
* doesn't support custom headers. Token is short-lived (5 minutes).
|
|
1135
|
+
*
|
|
1136
|
+
* Security:
|
|
1137
|
+
* - Requires valid API key (Bearer token)
|
|
1138
|
+
* - Token expires in 5 minutes
|
|
1139
|
+
* - Token can be restricted to specific job
|
|
1140
|
+
* - Token stored in Redis (revocable)
|
|
1141
|
+
*
|
|
1142
|
+
* @example
|
|
1143
|
+
* ```ts
|
|
1144
|
+
* // 1. Generate token
|
|
1145
|
+
* const { token, expiresAt } = await client.auth.generateSSEToken();
|
|
1146
|
+
* console.log(`Token expires at: ${expiresAt}`);
|
|
1147
|
+
*
|
|
1148
|
+
* // 2. Use in browser EventSource
|
|
1149
|
+
* const eventSource = new EventSource(`/v1/events?token=${token}`);
|
|
1150
|
+
*
|
|
1151
|
+
* eventSource.addEventListener('job.completed', (event) => {
|
|
1152
|
+
* const data = JSON.parse(event.data);
|
|
1153
|
+
* console.log('Job completed:', data);
|
|
1154
|
+
* });
|
|
1155
|
+
*
|
|
1156
|
+
* // 3. For specific job only
|
|
1157
|
+
* const jobToken = await client.auth.generateSSEToken({ jobId: "job_abc123" });
|
|
1158
|
+
* const jobEvents = new EventSource(`/v1/events/job_abc123?token=${jobToken.token}`);
|
|
1159
|
+
* ```
|
|
1160
|
+
*/
|
|
1161
|
+
async generateSSEToken(options) {
|
|
1162
|
+
const result = await post(
|
|
1163
|
+
this.ctx,
|
|
1164
|
+
"/v1/auth/sse-token",
|
|
1165
|
+
options ?? {}
|
|
1166
|
+
);
|
|
1167
|
+
return result.data;
|
|
1168
|
+
}
|
|
1169
|
+
/**
|
|
1170
|
+
* Get SSE connection limits for current tier
|
|
1171
|
+
*
|
|
1172
|
+
* Shows how many concurrent SSE connections are allowed
|
|
1173
|
+
* and how many are currently active.
|
|
1174
|
+
*
|
|
1175
|
+
* Tier limits:
|
|
1176
|
+
* - Free: 2 concurrent connections
|
|
1177
|
+
* - Pro: 10 concurrent connections
|
|
1178
|
+
* - Enterprise: 50 concurrent connections
|
|
1179
|
+
*
|
|
1180
|
+
* @example
|
|
1181
|
+
* ```ts
|
|
1182
|
+
* const limits = await client.auth.getLimits();
|
|
1183
|
+
*
|
|
1184
|
+
* console.log(`Tier: ${limits.tier}`);
|
|
1185
|
+
* console.log(`Max connections: ${limits.sse.maxConnections}`);
|
|
1186
|
+
* console.log(`Current connections: ${limits.sse.currentConnections}`);
|
|
1187
|
+
* console.log(`Available: ${limits.sse.available}`);
|
|
1188
|
+
*
|
|
1189
|
+
* // Check before opening new connection
|
|
1190
|
+
* if (limits.sse.available > 0) {
|
|
1191
|
+
* const token = await client.auth.generateSSEToken();
|
|
1192
|
+
* const eventSource = new EventSource(`/v1/events?token=${token.token}`);
|
|
1193
|
+
* } else {
|
|
1194
|
+
* console.error('No available SSE connection slots');
|
|
1195
|
+
* }
|
|
1196
|
+
* ```
|
|
1197
|
+
*/
|
|
1198
|
+
async getLimits() {
|
|
1199
|
+
const result = await get(
|
|
1200
|
+
this.ctx,
|
|
1201
|
+
"/v1/auth/limits"
|
|
1202
|
+
);
|
|
1203
|
+
return result.data;
|
|
1204
|
+
}
|
|
1205
|
+
};
|
|
1206
|
+
|
|
1207
|
+
// src/resources/convert.ts
|
|
1208
|
+
var ConvertResource = class {
|
|
1209
|
+
constructor(ctx) {
|
|
1210
|
+
this.ctx = ctx;
|
|
1211
|
+
}
|
|
1212
|
+
/**
|
|
1213
|
+
* Convert HTML to Markdown
|
|
1214
|
+
*
|
|
1215
|
+
* Transforms raw HTML content into clean, readable Markdown using GitHub Flavored Markdown (GFM).
|
|
1216
|
+
* Useful for:
|
|
1217
|
+
* - Converting scraped HTML to markdown for LLM processing
|
|
1218
|
+
* - Cleaning up messy HTML from web pages
|
|
1219
|
+
* - Extracting main content while removing noise (ads, nav, footer)
|
|
1220
|
+
* - Creating documentation from HTML sources
|
|
1221
|
+
*
|
|
1222
|
+
* Features:
|
|
1223
|
+
* - GFM table, strikethrough, and task list support
|
|
1224
|
+
* - Automatic noise removal (scripts, ads, navigation)
|
|
1225
|
+
* - Relative URL resolution
|
|
1226
|
+
* - Custom element exclusion via CSS selectors
|
|
1227
|
+
* - Output length limiting
|
|
1228
|
+
*
|
|
1229
|
+
* @param options - Conversion options
|
|
1230
|
+
* @returns Conversion result with markdown, metadata, and warnings
|
|
1231
|
+
*
|
|
1232
|
+
* @example Basic usage
|
|
1233
|
+
* ```ts
|
|
1234
|
+
* const result = await client.convert.htmlToMarkdown({
|
|
1235
|
+
* html: "<h1>Product</h1><p>Price: $99</p>"
|
|
1236
|
+
* });
|
|
1237
|
+
* console.log(result.data.markdown);
|
|
1238
|
+
* ```
|
|
1239
|
+
*
|
|
1240
|
+
* @example With all options
|
|
1241
|
+
* ```ts
|
|
1242
|
+
* const result = await client.convert.htmlToMarkdown({
|
|
1243
|
+
* html: htmlContent,
|
|
1244
|
+
* baseUrl: "https://shop.example.com",
|
|
1245
|
+
* options: {
|
|
1246
|
+
* gfmTables: true,
|
|
1247
|
+
* removeNoise: true,
|
|
1248
|
+
* excludeSelectors: [".advertisement", "#sidebar"],
|
|
1249
|
+
* absoluteUrls: true,
|
|
1250
|
+
* maxLength: 100000,
|
|
1251
|
+
* includeImages: true,
|
|
1252
|
+
* includeLinks: true
|
|
1253
|
+
* }
|
|
1254
|
+
* });
|
|
1255
|
+
*
|
|
1256
|
+
* // Check metadata
|
|
1257
|
+
* console.log(`Words: ${result.data.metadata.wordCount}`);
|
|
1258
|
+
* console.log(`Links: ${result.data.metadata.linkCount}`);
|
|
1259
|
+
* console.log(`Images: ${result.data.metadata.imageCount}`);
|
|
1260
|
+
* console.log(`Conversion time: ${result.data.metadata.conversionTimeMs}ms`);
|
|
1261
|
+
*
|
|
1262
|
+
* // Check for warnings
|
|
1263
|
+
* if (result.data.warnings?.length) {
|
|
1264
|
+
* console.warn("Conversion warnings:", result.data.warnings);
|
|
1265
|
+
* }
|
|
1266
|
+
* ```
|
|
1267
|
+
*
|
|
1268
|
+
* @example Converting scraped HTML
|
|
1269
|
+
* ```ts
|
|
1270
|
+
* // First scrape a page
|
|
1271
|
+
* const scrapeJob = await client.scrape.create({
|
|
1272
|
+
* url: "https://example.com/article"
|
|
1273
|
+
* });
|
|
1274
|
+
* const scrapeResult = await client.waitForResult(scrapeJob.jobId);
|
|
1275
|
+
*
|
|
1276
|
+
* // Then convert HTML to markdown
|
|
1277
|
+
* const markdown = await client.convert.htmlToMarkdown({
|
|
1278
|
+
* html: scrapeResult.data.html,
|
|
1279
|
+
* baseUrl: scrapeResult.data.url,
|
|
1280
|
+
* options: {
|
|
1281
|
+
* removeNoise: true,
|
|
1282
|
+
* onlyMainContent: true
|
|
1283
|
+
* }
|
|
1284
|
+
* });
|
|
1285
|
+
* ```
|
|
1286
|
+
*/
|
|
1287
|
+
async htmlToMarkdown(options) {
|
|
1288
|
+
const body = {
|
|
1289
|
+
html: options.html,
|
|
1290
|
+
baseUrl: options.baseUrl,
|
|
1291
|
+
options: options.options
|
|
1292
|
+
};
|
|
1293
|
+
const result = await post(this.ctx, "/v1/convert", body);
|
|
1294
|
+
return result.data;
|
|
1295
|
+
}
|
|
1296
|
+
/**
|
|
1297
|
+
* Alias for htmlToMarkdown() for convenience
|
|
1298
|
+
*
|
|
1299
|
+
* @example
|
|
1300
|
+
* ```ts
|
|
1301
|
+
* const result = await client.convert.toMarkdown({
|
|
1302
|
+
* html: "<h1>Hello</h1>"
|
|
1303
|
+
* });
|
|
1304
|
+
* ```
|
|
1305
|
+
*/
|
|
1306
|
+
async toMarkdown(options) {
|
|
1307
|
+
return this.htmlToMarkdown(options);
|
|
1308
|
+
}
|
|
1309
|
+
/**
|
|
1310
|
+
* Convert HTML with minimal options (just the HTML content)
|
|
1311
|
+
* Uses all default settings
|
|
1312
|
+
*
|
|
1313
|
+
* @param html - HTML content to convert
|
|
1314
|
+
* @param baseUrl - Optional base URL for resolving relative links
|
|
1315
|
+
* @returns Conversion result
|
|
1316
|
+
*
|
|
1317
|
+
* @example
|
|
1318
|
+
* ```ts
|
|
1319
|
+
* const result = await client.convert.quick(
|
|
1320
|
+
* "<h1>Title</h1><p>Content</p>",
|
|
1321
|
+
* "https://example.com"
|
|
1322
|
+
* );
|
|
1323
|
+
* console.log(result.data.markdown);
|
|
1324
|
+
* ```
|
|
1325
|
+
*/
|
|
1326
|
+
async quick(html, baseUrl) {
|
|
1327
|
+
return this.htmlToMarkdown({ html, baseUrl });
|
|
1328
|
+
}
|
|
1329
|
+
/**
|
|
1330
|
+
* Convert HTML with noise removal enabled
|
|
1331
|
+
* Removes navigation, footer, ads, scripts, and other clutter
|
|
1332
|
+
*
|
|
1333
|
+
* @param html - HTML content to convert
|
|
1334
|
+
* @param baseUrl - Optional base URL
|
|
1335
|
+
* @returns Conversion result with clean markdown
|
|
1336
|
+
*
|
|
1337
|
+
* @example
|
|
1338
|
+
* ```ts
|
|
1339
|
+
* // Extract just the main content from a messy page
|
|
1340
|
+
* const result = await client.convert.clean(messyHtml, "https://example.com");
|
|
1341
|
+
* console.log(result.data.markdown); // Clean, readable markdown
|
|
1342
|
+
* ```
|
|
1343
|
+
*/
|
|
1344
|
+
async clean(html, baseUrl) {
|
|
1345
|
+
return this.htmlToMarkdown({
|
|
1346
|
+
html,
|
|
1347
|
+
baseUrl,
|
|
1348
|
+
options: {
|
|
1349
|
+
removeNoise: true,
|
|
1350
|
+
excludeSelectors: [
|
|
1351
|
+
"nav",
|
|
1352
|
+
"footer",
|
|
1353
|
+
"aside",
|
|
1354
|
+
".advertisement",
|
|
1355
|
+
".ad",
|
|
1356
|
+
".sidebar",
|
|
1357
|
+
"#comments"
|
|
1358
|
+
]
|
|
1359
|
+
}
|
|
1360
|
+
});
|
|
1361
|
+
}
|
|
1362
|
+
};
|
|
1363
|
+
|
|
1125
1364
|
// src/resources/crawl.ts
|
|
1126
1365
|
var CRAWL_TEMPLATES = {
|
|
1127
1366
|
ecommerce: {
|
|
@@ -1724,6 +1963,236 @@ var DorkResource = class {
|
|
|
1724
1963
|
}
|
|
1725
1964
|
};
|
|
1726
1965
|
|
|
1966
|
+
// src/resources/events.ts
|
|
1967
|
+
var EventsResource = class {
|
|
1968
|
+
constructor(ctx) {
|
|
1969
|
+
this.ctx = ctx;
|
|
1970
|
+
}
|
|
1971
|
+
/**
|
|
1972
|
+
* Subscribe to all events for authenticated client
|
|
1973
|
+
*
|
|
1974
|
+
* Opens an SSE connection to receive real-time events for all jobs.
|
|
1975
|
+
* Requires an SSE token obtained via client.auth.generateSSEToken().
|
|
1976
|
+
*
|
|
1977
|
+
* Event Types:
|
|
1978
|
+
* - Job lifecycle: job.created, job.queued, job.started, job.progress,
|
|
1979
|
+
* job.completed, job.failed, job.cancelled
|
|
1980
|
+
* - Job details: job.log, job.metric, job.alert, job.checkpoint
|
|
1981
|
+
* - Deals: deal.found, deal.validated
|
|
1982
|
+
* - System: ping, connection.open, connection.close, error
|
|
1983
|
+
*
|
|
1984
|
+
* Features:
|
|
1985
|
+
* - Automatic reconnection on disconnect
|
|
1986
|
+
* - Event replay via Last-Event-ID
|
|
1987
|
+
* - Keepalive pings every 15 seconds
|
|
1988
|
+
* - Max connection time: 1 hour
|
|
1989
|
+
*
|
|
1990
|
+
* @param token - SSE authentication token from client.auth.generateSSEToken()
|
|
1991
|
+
* @param options - Subscription options (callbacks, reconnection settings)
|
|
1992
|
+
*
|
|
1993
|
+
* @example
|
|
1994
|
+
* ```ts
|
|
1995
|
+
* // Generate token
|
|
1996
|
+
* const { token } = await client.auth.generateSSEToken();
|
|
1997
|
+
*
|
|
1998
|
+
* // Subscribe with event handlers
|
|
1999
|
+
* const eventSource = client.events.subscribe(token, {
|
|
2000
|
+
* onEvent: (event) => {
|
|
2001
|
+
* // Handle all events
|
|
2002
|
+
* console.log('Event:', event.type);
|
|
2003
|
+
* const data = JSON.parse(event.data);
|
|
2004
|
+
*
|
|
2005
|
+
* if (data.jobId) {
|
|
2006
|
+
* console.log(`Job ${data.jobId}:`, data);
|
|
2007
|
+
* }
|
|
2008
|
+
* },
|
|
2009
|
+
* onError: (error) => {
|
|
2010
|
+
* console.error('SSE error:', error);
|
|
2011
|
+
* },
|
|
2012
|
+
* onOpen: () => {
|
|
2013
|
+
* console.log('SSE connection opened');
|
|
2014
|
+
* }
|
|
2015
|
+
* });
|
|
2016
|
+
*
|
|
2017
|
+
* // Listen for specific event types
|
|
2018
|
+
* eventSource.addEventListener('job.completed', (event) => {
|
|
2019
|
+
* const data = JSON.parse(event.data);
|
|
2020
|
+
* console.log('Job completed:', data);
|
|
2021
|
+
* });
|
|
2022
|
+
*
|
|
2023
|
+
* // Clean up
|
|
2024
|
+
* eventSource.close();
|
|
2025
|
+
* ```
|
|
2026
|
+
*/
|
|
2027
|
+
subscribe(token, options) {
|
|
2028
|
+
if (typeof EventSource === "undefined") {
|
|
2029
|
+
throw new Error(
|
|
2030
|
+
"EventSource is not available. SSE subscriptions only work in browsers. For Node.js, use polling via client.status.get() instead."
|
|
2031
|
+
);
|
|
2032
|
+
}
|
|
2033
|
+
const url = new URL("/v1/events", this.ctx.baseUrl);
|
|
2034
|
+
url.searchParams.set("token", token);
|
|
2035
|
+
const eventSource = new EventSource(url.toString());
|
|
2036
|
+
if (options?.onEvent) {
|
|
2037
|
+
eventSource.onmessage = options.onEvent;
|
|
2038
|
+
}
|
|
2039
|
+
if (options?.onError) {
|
|
2040
|
+
eventSource.onerror = (event) => {
|
|
2041
|
+
options.onError(
|
|
2042
|
+
new Error("SSE connection error. Will auto-reconnect if enabled.")
|
|
2043
|
+
);
|
|
2044
|
+
};
|
|
2045
|
+
}
|
|
2046
|
+
if (options?.onOpen) {
|
|
2047
|
+
eventSource.onopen = options.onOpen;
|
|
2048
|
+
}
|
|
2049
|
+
return eventSource;
|
|
2050
|
+
}
|
|
2051
|
+
/**
|
|
2052
|
+
* Subscribe to events for a specific job
|
|
2053
|
+
*
|
|
2054
|
+
* Opens an SSE connection filtered to a single job.
|
|
2055
|
+
* More efficient than global subscription when tracking one job.
|
|
2056
|
+
*
|
|
2057
|
+
* @param jobId - Job ID to subscribe to
|
|
2058
|
+
* @param token - SSE authentication token
|
|
2059
|
+
* @param options - Subscription options
|
|
2060
|
+
*
|
|
2061
|
+
* @example
|
|
2062
|
+
* ```ts
|
|
2063
|
+
* // Start a scrape job
|
|
2064
|
+
* const job = await client.scrape.create({ url: "https://example.com" });
|
|
2065
|
+
*
|
|
2066
|
+
* // Generate SSE token for this job
|
|
2067
|
+
* const { token } = await client.auth.generateSSEToken({ jobId: job.jobId });
|
|
2068
|
+
*
|
|
2069
|
+
* // Subscribe to job events
|
|
2070
|
+
* const eventSource = client.events.subscribeToJob(job.jobId, token, {
|
|
2071
|
+
* onEvent: (event) => {
|
|
2072
|
+
* const data = JSON.parse(event.data);
|
|
2073
|
+
* console.log(`[${event.type}]`, data);
|
|
2074
|
+
* }
|
|
2075
|
+
* });
|
|
2076
|
+
*
|
|
2077
|
+
* // Listen for completion
|
|
2078
|
+
* eventSource.addEventListener('job.completed', (event) => {
|
|
2079
|
+
* const data = JSON.parse(event.data);
|
|
2080
|
+
* console.log('Scrape completed!', data.summary);
|
|
2081
|
+
* eventSource.close();
|
|
2082
|
+
* });
|
|
2083
|
+
*
|
|
2084
|
+
* // Listen for progress
|
|
2085
|
+
* eventSource.addEventListener('job.progress', (event) => {
|
|
2086
|
+
* const data = JSON.parse(event.data);
|
|
2087
|
+
* console.log(`Progress: ${data.progress}%`);
|
|
2088
|
+
* });
|
|
2089
|
+
*
|
|
2090
|
+
* // Listen for errors
|
|
2091
|
+
* eventSource.addEventListener('job.failed', (event) => {
|
|
2092
|
+
* const data = JSON.parse(event.data);
|
|
2093
|
+
* console.error('Job failed:', data.error);
|
|
2094
|
+
* eventSource.close();
|
|
2095
|
+
* });
|
|
2096
|
+
* ```
|
|
2097
|
+
*/
|
|
2098
|
+
subscribeToJob(jobId, token, options) {
|
|
2099
|
+
if (typeof EventSource === "undefined") {
|
|
2100
|
+
throw new Error(
|
|
2101
|
+
"EventSource is not available. SSE subscriptions only work in browsers. For Node.js, use polling via client.status.get() instead."
|
|
2102
|
+
);
|
|
2103
|
+
}
|
|
2104
|
+
const url = new URL(`/v1/events/${jobId}`, this.ctx.baseUrl);
|
|
2105
|
+
url.searchParams.set("token", token);
|
|
2106
|
+
const eventSource = new EventSource(url.toString());
|
|
2107
|
+
if (options?.onEvent) {
|
|
2108
|
+
eventSource.onmessage = options.onEvent;
|
|
2109
|
+
}
|
|
2110
|
+
if (options?.onError) {
|
|
2111
|
+
eventSource.onerror = (event) => {
|
|
2112
|
+
options.onError(
|
|
2113
|
+
new Error("SSE connection error. Will auto-reconnect if enabled.")
|
|
2114
|
+
);
|
|
2115
|
+
};
|
|
2116
|
+
}
|
|
2117
|
+
if (options?.onOpen) {
|
|
2118
|
+
eventSource.onopen = options.onOpen;
|
|
2119
|
+
}
|
|
2120
|
+
return eventSource;
|
|
2121
|
+
}
|
|
2122
|
+
/**
|
|
2123
|
+
* Helper: Wait for job completion via SSE
|
|
2124
|
+
*
|
|
2125
|
+
* Convenience method that subscribes to a job and resolves when complete.
|
|
2126
|
+
* Automatically handles token generation and cleanup.
|
|
2127
|
+
*
|
|
2128
|
+
* @param jobId - Job ID to wait for
|
|
2129
|
+
* @param onProgress - Optional progress callback
|
|
2130
|
+
*
|
|
2131
|
+
* @example
|
|
2132
|
+
* ```ts
|
|
2133
|
+
* const job = await client.scrape.create({ url: "https://example.com" });
|
|
2134
|
+
*
|
|
2135
|
+
* // Wait for completion with progress updates
|
|
2136
|
+
* const result = await client.events.waitForCompletion(job.jobId, (progress) => {
|
|
2137
|
+
* console.log(`Progress: ${progress}%`);
|
|
2138
|
+
* });
|
|
2139
|
+
*
|
|
2140
|
+
* console.log('Job completed:', result);
|
|
2141
|
+
* ```
|
|
2142
|
+
*/
|
|
2143
|
+
async waitForCompletion(jobId, onProgress) {
|
|
2144
|
+
if (typeof EventSource === "undefined") {
|
|
2145
|
+
throw new Error(
|
|
2146
|
+
"waitForCompletion() only works in browsers. For Node.js, use client.waitForResult() instead."
|
|
2147
|
+
);
|
|
2148
|
+
}
|
|
2149
|
+
return new Promise(async (resolve, reject) => {
|
|
2150
|
+
const tokenResponse = await fetch(
|
|
2151
|
+
`${this.ctx.baseUrl}/v1/auth/sse-token`,
|
|
2152
|
+
{
|
|
2153
|
+
method: "POST",
|
|
2154
|
+
headers: {
|
|
2155
|
+
Authorization: `Bearer ${this.ctx.apiKey}`,
|
|
2156
|
+
"Content-Type": "application/json"
|
|
2157
|
+
},
|
|
2158
|
+
body: JSON.stringify({ jobId })
|
|
2159
|
+
}
|
|
2160
|
+
);
|
|
2161
|
+
if (!tokenResponse.ok) {
|
|
2162
|
+
reject(new Error("Failed to generate SSE token"));
|
|
2163
|
+
return;
|
|
2164
|
+
}
|
|
2165
|
+
const { token } = await tokenResponse.json();
|
|
2166
|
+
const eventSource = this.subscribeToJob(jobId, token, {
|
|
2167
|
+
onError: (error) => {
|
|
2168
|
+
eventSource.close();
|
|
2169
|
+
reject(error);
|
|
2170
|
+
}
|
|
2171
|
+
});
|
|
2172
|
+
eventSource.addEventListener("job.progress", (event) => {
|
|
2173
|
+
const data = JSON.parse(event.data);
|
|
2174
|
+
if (onProgress) {
|
|
2175
|
+
onProgress(data.progress);
|
|
2176
|
+
}
|
|
2177
|
+
});
|
|
2178
|
+
eventSource.addEventListener("job.completed", (event) => {
|
|
2179
|
+
const data = JSON.parse(event.data);
|
|
2180
|
+
eventSource.close();
|
|
2181
|
+
resolve(data);
|
|
2182
|
+
});
|
|
2183
|
+
eventSource.addEventListener("job.failed", (event) => {
|
|
2184
|
+
const data = JSON.parse(event.data);
|
|
2185
|
+
eventSource.close();
|
|
2186
|
+
reject(new Error(data.error?.message || "Job failed"));
|
|
2187
|
+
});
|
|
2188
|
+
eventSource.addEventListener("job.cancelled", (event) => {
|
|
2189
|
+
eventSource.close();
|
|
2190
|
+
reject(new Error("Job was cancelled"));
|
|
2191
|
+
});
|
|
2192
|
+
});
|
|
2193
|
+
}
|
|
2194
|
+
};
|
|
2195
|
+
|
|
1727
2196
|
// src/resources/extract.ts
|
|
1728
2197
|
var ExtractResource = class {
|
|
1729
2198
|
constructor(ctx) {
|
|
@@ -2164,6 +2633,7 @@ var ScrapeResource = class {
|
|
|
2164
2633
|
async create(options) {
|
|
2165
2634
|
const body = {
|
|
2166
2635
|
url: options.url,
|
|
2636
|
+
noStore: options.noStore,
|
|
2167
2637
|
detectSignals: options.detectSignals ?? true,
|
|
2168
2638
|
extractWithAI: options.extractWithAI,
|
|
2169
2639
|
extractDeal: options.extractDeal,
|
|
@@ -2176,7 +2646,10 @@ var ScrapeResource = class {
|
|
|
2176
2646
|
excludeSelectors: options.excludeSelectors,
|
|
2177
2647
|
onlyMainContent: options.onlyMainContent,
|
|
2178
2648
|
headers: options.headers,
|
|
2179
|
-
timeout: options.timeout
|
|
2649
|
+
timeout: options.timeout,
|
|
2650
|
+
outputMarkdown: options.outputMarkdown,
|
|
2651
|
+
markdownBaseUrl: options.markdownBaseUrl,
|
|
2652
|
+
actions: options.actions
|
|
2180
2653
|
};
|
|
2181
2654
|
const result = await post(this.ctx, "/v1/scrape", body);
|
|
2182
2655
|
return result.data;
|
|
@@ -2252,7 +2725,8 @@ var ScrapeResource = class {
|
|
|
2252
2725
|
* { url: "https://shop1.com/product1" },
|
|
2253
2726
|
* { url: "https://shop2.com/deal", extractDeal: true }
|
|
2254
2727
|
* ],
|
|
2255
|
-
* defaults: { detectSignals: true }
|
|
2728
|
+
* defaults: { detectSignals: true },
|
|
2729
|
+
* ignoreInvalidURLs: true
|
|
2256
2730
|
* });
|
|
2257
2731
|
* console.log(batch.batchId, batch.results);
|
|
2258
2732
|
* ```
|
|
@@ -2263,7 +2737,8 @@ var ScrapeResource = class {
|
|
|
2263
2737
|
defaults: options.defaults,
|
|
2264
2738
|
webhookUrl: options.webhookUrl,
|
|
2265
2739
|
priority: options.priority,
|
|
2266
|
-
delayMs: options.
|
|
2740
|
+
delayMs: options.delayMs,
|
|
2741
|
+
ignoreInvalidURLs: options.ignoreInvalidURLs
|
|
2267
2742
|
};
|
|
2268
2743
|
const result = await post(
|
|
2269
2744
|
this.ctx,
|
|
@@ -2313,6 +2788,57 @@ var ScrapeResource = class {
|
|
|
2313
2788
|
}
|
|
2314
2789
|
};
|
|
2315
2790
|
|
|
2791
|
+
// src/resources/screenshots.ts
|
|
2792
|
+
var ScreenshotsResource = class {
|
|
2793
|
+
constructor(ctx) {
|
|
2794
|
+
this.ctx = ctx;
|
|
2795
|
+
}
|
|
2796
|
+
/**
|
|
2797
|
+
* Refresh a signed URL before expiration
|
|
2798
|
+
*
|
|
2799
|
+
* @example
|
|
2800
|
+
* ```ts
|
|
2801
|
+
* const refreshed = await client.screenshots.refresh({
|
|
2802
|
+
* path: "job_abc123/1234567890_nanoid_example.png",
|
|
2803
|
+
* ttl: 604800 // 7 days
|
|
2804
|
+
* });
|
|
2805
|
+
* console.log(refreshed.url); // New signed URL
|
|
2806
|
+
* console.log(refreshed.expiresAt); // "2026-01-25T12:00:00Z"
|
|
2807
|
+
* console.log(refreshed.tierLimits); // { min: 3600, max: 604800, default: 604800 }
|
|
2808
|
+
* ```
|
|
2809
|
+
*/
|
|
2810
|
+
async refresh(options) {
|
|
2811
|
+
const result = await post(
|
|
2812
|
+
this.ctx,
|
|
2813
|
+
"/v1/screenshots/refresh",
|
|
2814
|
+
{
|
|
2815
|
+
path: options.path,
|
|
2816
|
+
ttl: options.ttl,
|
|
2817
|
+
bucket: options.bucket
|
|
2818
|
+
}
|
|
2819
|
+
);
|
|
2820
|
+
return result.data;
|
|
2821
|
+
}
|
|
2822
|
+
/**
|
|
2823
|
+
* Get TTL limits for the current tier
|
|
2824
|
+
*
|
|
2825
|
+
* @example
|
|
2826
|
+
* ```ts
|
|
2827
|
+
* const limits = await client.screenshots.getLimits();
|
|
2828
|
+
* console.log(limits.tier); // "pro"
|
|
2829
|
+
* console.log(limits.limits.max); // 604800 (7 days in seconds)
|
|
2830
|
+
* console.log(limits.formattedLimits.max); // "7 days"
|
|
2831
|
+
* ```
|
|
2832
|
+
*/
|
|
2833
|
+
async getLimits() {
|
|
2834
|
+
const result = await get(
|
|
2835
|
+
this.ctx,
|
|
2836
|
+
"/v1/screenshots/limits"
|
|
2837
|
+
);
|
|
2838
|
+
return result.data;
|
|
2839
|
+
}
|
|
2840
|
+
};
|
|
2841
|
+
|
|
2316
2842
|
// src/resources/search.ts
|
|
2317
2843
|
var SearchResource = class {
|
|
2318
2844
|
constructor(ctx) {
|
|
@@ -2325,7 +2851,7 @@ var SearchResource = class {
|
|
|
2325
2851
|
* ```ts
|
|
2326
2852
|
* const result = await client.search.create({
|
|
2327
2853
|
* query: "laptop deals black friday",
|
|
2328
|
-
*
|
|
2854
|
+
* limit: 20,
|
|
2329
2855
|
* useDealScoring: true
|
|
2330
2856
|
* });
|
|
2331
2857
|
* ```
|
|
@@ -2333,9 +2859,9 @@ var SearchResource = class {
|
|
|
2333
2859
|
async create(options) {
|
|
2334
2860
|
const body = {
|
|
2335
2861
|
query: options.query,
|
|
2336
|
-
limit: options.
|
|
2337
|
-
scrapeResults: options.
|
|
2338
|
-
maxScrapeResults: options.
|
|
2862
|
+
limit: options.limit,
|
|
2863
|
+
scrapeResults: options.scrapeResults,
|
|
2864
|
+
maxScrapeResults: options.maxScrapeResults,
|
|
2339
2865
|
useAiOptimization: options.useAiOptimization,
|
|
2340
2866
|
aiProvider: options.aiProvider,
|
|
2341
2867
|
aiModel: options.aiModel,
|
|
@@ -2389,7 +2915,7 @@ var SearchResource = class {
|
|
|
2389
2915
|
* @example
|
|
2390
2916
|
* ```ts
|
|
2391
2917
|
* const result = await client.search.andScrape("promo codes", {
|
|
2392
|
-
*
|
|
2918
|
+
* maxScrapeResults: 5
|
|
2393
2919
|
* });
|
|
2394
2920
|
* console.log(result.data.scrapedJobIds);
|
|
2395
2921
|
* ```
|
|
@@ -2397,7 +2923,7 @@ var SearchResource = class {
|
|
|
2397
2923
|
async andScrape(query, options) {
|
|
2398
2924
|
return this.create({
|
|
2399
2925
|
query,
|
|
2400
|
-
|
|
2926
|
+
scrapeResults: true,
|
|
2401
2927
|
...options
|
|
2402
2928
|
});
|
|
2403
2929
|
}
|
|
@@ -2811,6 +3337,20 @@ var DealCrawl = class {
|
|
|
2811
3337
|
* ```
|
|
2812
3338
|
*/
|
|
2813
3339
|
dork;
|
|
3340
|
+
/**
|
|
3341
|
+
* Convert resource - HTML to Markdown conversion
|
|
3342
|
+
*
|
|
3343
|
+
* @example
|
|
3344
|
+
* ```ts
|
|
3345
|
+
* const result = await client.convert.htmlToMarkdown({
|
|
3346
|
+
* html: "<h1>Title</h1><p>Content</p>",
|
|
3347
|
+
* baseUrl: "https://example.com",
|
|
3348
|
+
* options: { removeNoise: true }
|
|
3349
|
+
* });
|
|
3350
|
+
* console.log(result.data.markdown);
|
|
3351
|
+
* ```
|
|
3352
|
+
*/
|
|
3353
|
+
convert;
|
|
2814
3354
|
/**
|
|
2815
3355
|
* Agent resource - AI-powered autonomous web navigation
|
|
2816
3356
|
*
|
|
@@ -2892,6 +3432,80 @@ var DealCrawl = class {
|
|
|
2892
3432
|
* ```
|
|
2893
3433
|
*/
|
|
2894
3434
|
account;
|
|
3435
|
+
/**
|
|
3436
|
+
* Screenshots resource - Screenshot signed URL management
|
|
3437
|
+
*
|
|
3438
|
+
* @example
|
|
3439
|
+
* ```ts
|
|
3440
|
+
* // Refresh a signed URL before expiration
|
|
3441
|
+
* const refreshed = await client.screenshots.refresh({
|
|
3442
|
+
* path: "job_abc123/1234567890_nanoid_example.png",
|
|
3443
|
+
* ttl: 604800 // 7 days
|
|
3444
|
+
* });
|
|
3445
|
+
*
|
|
3446
|
+
* // Get tier-specific TTL limits
|
|
3447
|
+
* const limits = await client.screenshots.getLimits();
|
|
3448
|
+
* console.log(limits.formattedLimits.max); // "7 days"
|
|
3449
|
+
* ```
|
|
3450
|
+
*/
|
|
3451
|
+
screenshots;
|
|
3452
|
+
/**
|
|
3453
|
+
* Auth resource - SSE (Server-Sent Events) authentication
|
|
3454
|
+
*
|
|
3455
|
+
* @example
|
|
3456
|
+
* ```ts
|
|
3457
|
+
* // Generate SSE token for browser EventSource
|
|
3458
|
+
* const { token, expiresAt } = await client.auth.generateSSEToken();
|
|
3459
|
+
*
|
|
3460
|
+
* // Use in browser
|
|
3461
|
+
* const eventSource = new EventSource(`/v1/events?token=${token}`);
|
|
3462
|
+
*
|
|
3463
|
+
* // Generate token for specific job
|
|
3464
|
+
* const jobToken = await client.auth.generateSSEToken({ jobId: "job_123" });
|
|
3465
|
+
*
|
|
3466
|
+
* // Check connection limits
|
|
3467
|
+
* const limits = await client.auth.getLimits();
|
|
3468
|
+
* console.log(`Available connections: ${limits.sse.available}`);
|
|
3469
|
+
* ```
|
|
3470
|
+
*/
|
|
3471
|
+
auth;
|
|
3472
|
+
/**
|
|
3473
|
+
* Events resource - Real-time SSE event streaming (Browser only)
|
|
3474
|
+
*
|
|
3475
|
+
* IMPORTANT: This resource only works in browsers. For Node.js, use polling via client.status.get()
|
|
3476
|
+
*
|
|
3477
|
+
* @example Browser Usage
|
|
3478
|
+
* ```ts
|
|
3479
|
+
* // 1. Generate SSE token
|
|
3480
|
+
* const { token } = await client.auth.generateSSEToken();
|
|
3481
|
+
*
|
|
3482
|
+
* // 2. Subscribe to all events
|
|
3483
|
+
* const eventSource = client.events.subscribe(token, {
|
|
3484
|
+
* onEvent: (event) => {
|
|
3485
|
+
* console.log('Event:', event.type, JSON.parse(event.data));
|
|
3486
|
+
* }
|
|
3487
|
+
* });
|
|
3488
|
+
*
|
|
3489
|
+
* // 3. Or subscribe to specific job
|
|
3490
|
+
* const jobEvents = client.events.subscribeToJob('job_123', token, {
|
|
3491
|
+
* onEvent: (event) => {
|
|
3492
|
+
* const data = JSON.parse(event.data);
|
|
3493
|
+
* console.log(`Progress: ${data.progress}%`);
|
|
3494
|
+
* }
|
|
3495
|
+
* });
|
|
3496
|
+
*
|
|
3497
|
+
* // 4. Listen for specific events
|
|
3498
|
+
* eventSource.addEventListener('job.completed', (event) => {
|
|
3499
|
+
* const data = JSON.parse(event.data);
|
|
3500
|
+
* console.log('Job completed!', data.summary);
|
|
3501
|
+
* eventSource.close();
|
|
3502
|
+
* });
|
|
3503
|
+
*
|
|
3504
|
+
* // 5. Clean up
|
|
3505
|
+
* eventSource.close();
|
|
3506
|
+
* ```
|
|
3507
|
+
*/
|
|
3508
|
+
events;
|
|
2895
3509
|
// ============================================
|
|
2896
3510
|
// CONSTRUCTOR
|
|
2897
3511
|
// ============================================
|
|
@@ -2942,12 +3556,16 @@ var DealCrawl = class {
|
|
|
2942
3556
|
this.crawl = new CrawlResource(this.ctx);
|
|
2943
3557
|
this.extract = new ExtractResource(this.ctx);
|
|
2944
3558
|
this.dork = new DorkResource(this.ctx);
|
|
3559
|
+
this.convert = new ConvertResource(this.ctx);
|
|
2945
3560
|
this.agent = new AgentResource(this.ctx);
|
|
2946
3561
|
this.status = new StatusResource(this.ctx);
|
|
2947
3562
|
this.data = new DataResource(this.ctx);
|
|
2948
3563
|
this.webhooks = new WebhooksResource(this.ctx);
|
|
2949
3564
|
this.keys = new KeysResource(this.ctx);
|
|
2950
3565
|
this.account = new AccountResource(this.ctx);
|
|
3566
|
+
this.screenshots = new ScreenshotsResource(this.ctx);
|
|
3567
|
+
this.auth = new AuthResource(this.ctx);
|
|
3568
|
+
this.events = new EventsResource(this.ctx);
|
|
2951
3569
|
}
|
|
2952
3570
|
// ============================================
|
|
2953
3571
|
// POLLING METHODS
|
|
@@ -3093,6 +3711,8 @@ var DealCrawl = class {
|
|
|
3093
3711
|
|
|
3094
3712
|
exports.AccountResource = AccountResource;
|
|
3095
3713
|
exports.AgentResource = AgentResource;
|
|
3714
|
+
exports.AuthResource = AuthResource;
|
|
3715
|
+
exports.ConvertResource = ConvertResource;
|
|
3096
3716
|
exports.CrawlResource = CrawlResource;
|
|
3097
3717
|
exports.DEFAULT_CONFIG = DEFAULT_CONFIG;
|
|
3098
3718
|
exports.DataResource = DataResource;
|
|
@@ -3101,9 +3721,11 @@ exports.DealCrawlError = DealCrawlError;
|
|
|
3101
3721
|
exports.DorkResource = DorkResource;
|
|
3102
3722
|
exports.ERROR_CODES = ERROR_CODES;
|
|
3103
3723
|
exports.ERROR_MESSAGES = ERROR_MESSAGES;
|
|
3724
|
+
exports.EventsResource = EventsResource;
|
|
3104
3725
|
exports.ExtractResource = ExtractResource;
|
|
3105
3726
|
exports.KeysResource = KeysResource;
|
|
3106
3727
|
exports.ScrapeResource = ScrapeResource;
|
|
3728
|
+
exports.ScreenshotsResource = ScreenshotsResource;
|
|
3107
3729
|
exports.SearchResource = SearchResource;
|
|
3108
3730
|
exports.StatusResource = StatusResource;
|
|
3109
3731
|
exports.WebhooksResource = WebhooksResource;
|