@dealcrawl/sdk 2.10.0 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +223 -37
- package/dist/index.d.mts +1145 -26
- package/dist/index.d.ts +1145 -26
- package/dist/index.js +631 -9
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +628 -10
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1118,6 +1118,245 @@ var AgentResource = class {
|
|
|
1118
1118
|
}
|
|
1119
1119
|
};
|
|
1120
1120
|
|
|
1121
|
+
// src/resources/auth.ts
|
|
1122
|
+
var AuthResource = class {
|
|
1123
|
+
constructor(ctx) {
|
|
1124
|
+
this.ctx = ctx;
|
|
1125
|
+
}
|
|
1126
|
+
/**
|
|
1127
|
+
* Generate SSE authentication token
|
|
1128
|
+
*
|
|
1129
|
+
* Required for browser-based SSE connections because EventSource API
|
|
1130
|
+
* doesn't support custom headers. Token is short-lived (5 minutes).
|
|
1131
|
+
*
|
|
1132
|
+
* Security:
|
|
1133
|
+
* - Requires valid API key (Bearer token)
|
|
1134
|
+
* - Token expires in 5 minutes
|
|
1135
|
+
* - Token can be restricted to specific job
|
|
1136
|
+
* - Token stored in Redis (revocable)
|
|
1137
|
+
*
|
|
1138
|
+
* @example
|
|
1139
|
+
* ```ts
|
|
1140
|
+
* // 1. Generate token
|
|
1141
|
+
* const { token, expiresAt } = await client.auth.generateSSEToken();
|
|
1142
|
+
* console.log(`Token expires at: ${expiresAt}`);
|
|
1143
|
+
*
|
|
1144
|
+
* // 2. Use in browser EventSource
|
|
1145
|
+
* const eventSource = new EventSource(`/v1/events?token=${token}`);
|
|
1146
|
+
*
|
|
1147
|
+
* eventSource.addEventListener('job.completed', (event) => {
|
|
1148
|
+
* const data = JSON.parse(event.data);
|
|
1149
|
+
* console.log('Job completed:', data);
|
|
1150
|
+
* });
|
|
1151
|
+
*
|
|
1152
|
+
* // 3. For specific job only
|
|
1153
|
+
* const jobToken = await client.auth.generateSSEToken({ jobId: "job_abc123" });
|
|
1154
|
+
* const jobEvents = new EventSource(`/v1/events/job_abc123?token=${jobToken.token}`);
|
|
1155
|
+
* ```
|
|
1156
|
+
*/
|
|
1157
|
+
async generateSSEToken(options) {
|
|
1158
|
+
const result = await post(
|
|
1159
|
+
this.ctx,
|
|
1160
|
+
"/v1/auth/sse-token",
|
|
1161
|
+
options ?? {}
|
|
1162
|
+
);
|
|
1163
|
+
return result.data;
|
|
1164
|
+
}
|
|
1165
|
+
/**
|
|
1166
|
+
* Get SSE connection limits for current tier
|
|
1167
|
+
*
|
|
1168
|
+
* Shows how many concurrent SSE connections are allowed
|
|
1169
|
+
* and how many are currently active.
|
|
1170
|
+
*
|
|
1171
|
+
* Tier limits:
|
|
1172
|
+
* - Free: 2 concurrent connections
|
|
1173
|
+
* - Pro: 10 concurrent connections
|
|
1174
|
+
* - Enterprise: 50 concurrent connections
|
|
1175
|
+
*
|
|
1176
|
+
* @example
|
|
1177
|
+
* ```ts
|
|
1178
|
+
* const limits = await client.auth.getLimits();
|
|
1179
|
+
*
|
|
1180
|
+
* console.log(`Tier: ${limits.tier}`);
|
|
1181
|
+
* console.log(`Max connections: ${limits.sse.maxConnections}`);
|
|
1182
|
+
* console.log(`Current connections: ${limits.sse.currentConnections}`);
|
|
1183
|
+
* console.log(`Available: ${limits.sse.available}`);
|
|
1184
|
+
*
|
|
1185
|
+
* // Check before opening new connection
|
|
1186
|
+
* if (limits.sse.available > 0) {
|
|
1187
|
+
* const token = await client.auth.generateSSEToken();
|
|
1188
|
+
* const eventSource = new EventSource(`/v1/events?token=${token.token}`);
|
|
1189
|
+
* } else {
|
|
1190
|
+
* console.error('No available SSE connection slots');
|
|
1191
|
+
* }
|
|
1192
|
+
* ```
|
|
1193
|
+
*/
|
|
1194
|
+
async getLimits() {
|
|
1195
|
+
const result = await get(
|
|
1196
|
+
this.ctx,
|
|
1197
|
+
"/v1/auth/limits"
|
|
1198
|
+
);
|
|
1199
|
+
return result.data;
|
|
1200
|
+
}
|
|
1201
|
+
};
|
|
1202
|
+
|
|
1203
|
+
// src/resources/convert.ts
|
|
1204
|
+
var ConvertResource = class {
|
|
1205
|
+
constructor(ctx) {
|
|
1206
|
+
this.ctx = ctx;
|
|
1207
|
+
}
|
|
1208
|
+
/**
|
|
1209
|
+
* Convert HTML to Markdown
|
|
1210
|
+
*
|
|
1211
|
+
* Transforms raw HTML content into clean, readable Markdown using GitHub Flavored Markdown (GFM).
|
|
1212
|
+
* Useful for:
|
|
1213
|
+
* - Converting scraped HTML to markdown for LLM processing
|
|
1214
|
+
* - Cleaning up messy HTML from web pages
|
|
1215
|
+
* - Extracting main content while removing noise (ads, nav, footer)
|
|
1216
|
+
* - Creating documentation from HTML sources
|
|
1217
|
+
*
|
|
1218
|
+
* Features:
|
|
1219
|
+
* - GFM table, strikethrough, and task list support
|
|
1220
|
+
* - Automatic noise removal (scripts, ads, navigation)
|
|
1221
|
+
* - Relative URL resolution
|
|
1222
|
+
* - Custom element exclusion via CSS selectors
|
|
1223
|
+
* - Output length limiting
|
|
1224
|
+
*
|
|
1225
|
+
* @param options - Conversion options
|
|
1226
|
+
* @returns Conversion result with markdown, metadata, and warnings
|
|
1227
|
+
*
|
|
1228
|
+
* @example Basic usage
|
|
1229
|
+
* ```ts
|
|
1230
|
+
* const result = await client.convert.htmlToMarkdown({
|
|
1231
|
+
* html: "<h1>Product</h1><p>Price: $99</p>"
|
|
1232
|
+
* });
|
|
1233
|
+
* console.log(result.data.markdown);
|
|
1234
|
+
* ```
|
|
1235
|
+
*
|
|
1236
|
+
* @example With all options
|
|
1237
|
+
* ```ts
|
|
1238
|
+
* const result = await client.convert.htmlToMarkdown({
|
|
1239
|
+
* html: htmlContent,
|
|
1240
|
+
* baseUrl: "https://shop.example.com",
|
|
1241
|
+
* options: {
|
|
1242
|
+
* gfmTables: true,
|
|
1243
|
+
* removeNoise: true,
|
|
1244
|
+
* excludeSelectors: [".advertisement", "#sidebar"],
|
|
1245
|
+
* absoluteUrls: true,
|
|
1246
|
+
* maxLength: 100000,
|
|
1247
|
+
* includeImages: true,
|
|
1248
|
+
* includeLinks: true
|
|
1249
|
+
* }
|
|
1250
|
+
* });
|
|
1251
|
+
*
|
|
1252
|
+
* // Check metadata
|
|
1253
|
+
* console.log(`Words: ${result.data.metadata.wordCount}`);
|
|
1254
|
+
* console.log(`Links: ${result.data.metadata.linkCount}`);
|
|
1255
|
+
* console.log(`Images: ${result.data.metadata.imageCount}`);
|
|
1256
|
+
* console.log(`Conversion time: ${result.data.metadata.conversionTimeMs}ms`);
|
|
1257
|
+
*
|
|
1258
|
+
* // Check for warnings
|
|
1259
|
+
* if (result.data.warnings?.length) {
|
|
1260
|
+
* console.warn("Conversion warnings:", result.data.warnings);
|
|
1261
|
+
* }
|
|
1262
|
+
* ```
|
|
1263
|
+
*
|
|
1264
|
+
* @example Converting scraped HTML
|
|
1265
|
+
* ```ts
|
|
1266
|
+
* // First scrape a page
|
|
1267
|
+
* const scrapeJob = await client.scrape.create({
|
|
1268
|
+
* url: "https://example.com/article"
|
|
1269
|
+
* });
|
|
1270
|
+
* const scrapeResult = await client.waitForResult(scrapeJob.jobId);
|
|
1271
|
+
*
|
|
1272
|
+
* // Then convert HTML to markdown
|
|
1273
|
+
* const markdown = await client.convert.htmlToMarkdown({
|
|
1274
|
+
* html: scrapeResult.data.html,
|
|
1275
|
+
* baseUrl: scrapeResult.data.url,
|
|
1276
|
+
* options: {
|
|
1277
|
+
* removeNoise: true,
|
|
1278
|
+
* onlyMainContent: true
|
|
1279
|
+
* }
|
|
1280
|
+
* });
|
|
1281
|
+
* ```
|
|
1282
|
+
*/
|
|
1283
|
+
async htmlToMarkdown(options) {
|
|
1284
|
+
const body = {
|
|
1285
|
+
html: options.html,
|
|
1286
|
+
baseUrl: options.baseUrl,
|
|
1287
|
+
options: options.options
|
|
1288
|
+
};
|
|
1289
|
+
const result = await post(this.ctx, "/v1/convert", body);
|
|
1290
|
+
return result.data;
|
|
1291
|
+
}
|
|
1292
|
+
/**
|
|
1293
|
+
* Alias for htmlToMarkdown() for convenience
|
|
1294
|
+
*
|
|
1295
|
+
* @example
|
|
1296
|
+
* ```ts
|
|
1297
|
+
* const result = await client.convert.toMarkdown({
|
|
1298
|
+
* html: "<h1>Hello</h1>"
|
|
1299
|
+
* });
|
|
1300
|
+
* ```
|
|
1301
|
+
*/
|
|
1302
|
+
async toMarkdown(options) {
|
|
1303
|
+
return this.htmlToMarkdown(options);
|
|
1304
|
+
}
|
|
1305
|
+
/**
|
|
1306
|
+
* Convert HTML with minimal options (just the HTML content)
|
|
1307
|
+
* Uses all default settings
|
|
1308
|
+
*
|
|
1309
|
+
* @param html - HTML content to convert
|
|
1310
|
+
* @param baseUrl - Optional base URL for resolving relative links
|
|
1311
|
+
* @returns Conversion result
|
|
1312
|
+
*
|
|
1313
|
+
* @example
|
|
1314
|
+
* ```ts
|
|
1315
|
+
* const result = await client.convert.quick(
|
|
1316
|
+
* "<h1>Title</h1><p>Content</p>",
|
|
1317
|
+
* "https://example.com"
|
|
1318
|
+
* );
|
|
1319
|
+
* console.log(result.data.markdown);
|
|
1320
|
+
* ```
|
|
1321
|
+
*/
|
|
1322
|
+
async quick(html, baseUrl) {
|
|
1323
|
+
return this.htmlToMarkdown({ html, baseUrl });
|
|
1324
|
+
}
|
|
1325
|
+
/**
|
|
1326
|
+
* Convert HTML with noise removal enabled
|
|
1327
|
+
* Removes navigation, footer, ads, scripts, and other clutter
|
|
1328
|
+
*
|
|
1329
|
+
* @param html - HTML content to convert
|
|
1330
|
+
* @param baseUrl - Optional base URL
|
|
1331
|
+
* @returns Conversion result with clean markdown
|
|
1332
|
+
*
|
|
1333
|
+
* @example
|
|
1334
|
+
* ```ts
|
|
1335
|
+
* // Extract just the main content from a messy page
|
|
1336
|
+
* const result = await client.convert.clean(messyHtml, "https://example.com");
|
|
1337
|
+
* console.log(result.data.markdown); // Clean, readable markdown
|
|
1338
|
+
* ```
|
|
1339
|
+
*/
|
|
1340
|
+
async clean(html, baseUrl) {
|
|
1341
|
+
return this.htmlToMarkdown({
|
|
1342
|
+
html,
|
|
1343
|
+
baseUrl,
|
|
1344
|
+
options: {
|
|
1345
|
+
removeNoise: true,
|
|
1346
|
+
excludeSelectors: [
|
|
1347
|
+
"nav",
|
|
1348
|
+
"footer",
|
|
1349
|
+
"aside",
|
|
1350
|
+
".advertisement",
|
|
1351
|
+
".ad",
|
|
1352
|
+
".sidebar",
|
|
1353
|
+
"#comments"
|
|
1354
|
+
]
|
|
1355
|
+
}
|
|
1356
|
+
});
|
|
1357
|
+
}
|
|
1358
|
+
};
|
|
1359
|
+
|
|
1121
1360
|
// src/resources/crawl.ts
|
|
1122
1361
|
var CRAWL_TEMPLATES = {
|
|
1123
1362
|
ecommerce: {
|
|
@@ -1720,6 +1959,236 @@ var DorkResource = class {
|
|
|
1720
1959
|
}
|
|
1721
1960
|
};
|
|
1722
1961
|
|
|
1962
|
+
// src/resources/events.ts
|
|
1963
|
+
var EventsResource = class {
|
|
1964
|
+
constructor(ctx) {
|
|
1965
|
+
this.ctx = ctx;
|
|
1966
|
+
}
|
|
1967
|
+
/**
|
|
1968
|
+
* Subscribe to all events for authenticated client
|
|
1969
|
+
*
|
|
1970
|
+
* Opens an SSE connection to receive real-time events for all jobs.
|
|
1971
|
+
* Requires an SSE token obtained via client.auth.generateSSEToken().
|
|
1972
|
+
*
|
|
1973
|
+
* Event Types:
|
|
1974
|
+
* - Job lifecycle: job.created, job.queued, job.started, job.progress,
|
|
1975
|
+
* job.completed, job.failed, job.cancelled
|
|
1976
|
+
* - Job details: job.log, job.metric, job.alert, job.checkpoint
|
|
1977
|
+
* - Deals: deal.found, deal.validated
|
|
1978
|
+
* - System: ping, connection.open, connection.close, error
|
|
1979
|
+
*
|
|
1980
|
+
* Features:
|
|
1981
|
+
* - Automatic reconnection on disconnect
|
|
1982
|
+
* - Event replay via Last-Event-ID
|
|
1983
|
+
* - Keepalive pings every 15 seconds
|
|
1984
|
+
* - Max connection time: 1 hour
|
|
1985
|
+
*
|
|
1986
|
+
* @param token - SSE authentication token from client.auth.generateSSEToken()
|
|
1987
|
+
* @param options - Subscription options (callbacks, reconnection settings)
|
|
1988
|
+
*
|
|
1989
|
+
* @example
|
|
1990
|
+
* ```ts
|
|
1991
|
+
* // Generate token
|
|
1992
|
+
* const { token } = await client.auth.generateSSEToken();
|
|
1993
|
+
*
|
|
1994
|
+
* // Subscribe with event handlers
|
|
1995
|
+
* const eventSource = client.events.subscribe(token, {
|
|
1996
|
+
* onEvent: (event) => {
|
|
1997
|
+
* // Handle all events
|
|
1998
|
+
* console.log('Event:', event.type);
|
|
1999
|
+
* const data = JSON.parse(event.data);
|
|
2000
|
+
*
|
|
2001
|
+
* if (data.jobId) {
|
|
2002
|
+
* console.log(`Job ${data.jobId}:`, data);
|
|
2003
|
+
* }
|
|
2004
|
+
* },
|
|
2005
|
+
* onError: (error) => {
|
|
2006
|
+
* console.error('SSE error:', error);
|
|
2007
|
+
* },
|
|
2008
|
+
* onOpen: () => {
|
|
2009
|
+
* console.log('SSE connection opened');
|
|
2010
|
+
* }
|
|
2011
|
+
* });
|
|
2012
|
+
*
|
|
2013
|
+
* // Listen for specific event types
|
|
2014
|
+
* eventSource.addEventListener('job.completed', (event) => {
|
|
2015
|
+
* const data = JSON.parse(event.data);
|
|
2016
|
+
* console.log('Job completed:', data);
|
|
2017
|
+
* });
|
|
2018
|
+
*
|
|
2019
|
+
* // Clean up
|
|
2020
|
+
* eventSource.close();
|
|
2021
|
+
* ```
|
|
2022
|
+
*/
|
|
2023
|
+
subscribe(token, options) {
|
|
2024
|
+
if (typeof EventSource === "undefined") {
|
|
2025
|
+
throw new Error(
|
|
2026
|
+
"EventSource is not available. SSE subscriptions only work in browsers. For Node.js, use polling via client.status.get() instead."
|
|
2027
|
+
);
|
|
2028
|
+
}
|
|
2029
|
+
const url = new URL("/v1/events", this.ctx.baseUrl);
|
|
2030
|
+
url.searchParams.set("token", token);
|
|
2031
|
+
const eventSource = new EventSource(url.toString());
|
|
2032
|
+
if (options?.onEvent) {
|
|
2033
|
+
eventSource.onmessage = options.onEvent;
|
|
2034
|
+
}
|
|
2035
|
+
if (options?.onError) {
|
|
2036
|
+
eventSource.onerror = (event) => {
|
|
2037
|
+
options.onError(
|
|
2038
|
+
new Error("SSE connection error. Will auto-reconnect if enabled.")
|
|
2039
|
+
);
|
|
2040
|
+
};
|
|
2041
|
+
}
|
|
2042
|
+
if (options?.onOpen) {
|
|
2043
|
+
eventSource.onopen = options.onOpen;
|
|
2044
|
+
}
|
|
2045
|
+
return eventSource;
|
|
2046
|
+
}
|
|
2047
|
+
/**
|
|
2048
|
+
* Subscribe to events for a specific job
|
|
2049
|
+
*
|
|
2050
|
+
* Opens an SSE connection filtered to a single job.
|
|
2051
|
+
* More efficient than global subscription when tracking one job.
|
|
2052
|
+
*
|
|
2053
|
+
* @param jobId - Job ID to subscribe to
|
|
2054
|
+
* @param token - SSE authentication token
|
|
2055
|
+
* @param options - Subscription options
|
|
2056
|
+
*
|
|
2057
|
+
* @example
|
|
2058
|
+
* ```ts
|
|
2059
|
+
* // Start a scrape job
|
|
2060
|
+
* const job = await client.scrape.create({ url: "https://example.com" });
|
|
2061
|
+
*
|
|
2062
|
+
* // Generate SSE token for this job
|
|
2063
|
+
* const { token } = await client.auth.generateSSEToken({ jobId: job.jobId });
|
|
2064
|
+
*
|
|
2065
|
+
* // Subscribe to job events
|
|
2066
|
+
* const eventSource = client.events.subscribeToJob(job.jobId, token, {
|
|
2067
|
+
* onEvent: (event) => {
|
|
2068
|
+
* const data = JSON.parse(event.data);
|
|
2069
|
+
* console.log(`[${event.type}]`, data);
|
|
2070
|
+
* }
|
|
2071
|
+
* });
|
|
2072
|
+
*
|
|
2073
|
+
* // Listen for completion
|
|
2074
|
+
* eventSource.addEventListener('job.completed', (event) => {
|
|
2075
|
+
* const data = JSON.parse(event.data);
|
|
2076
|
+
* console.log('Scrape completed!', data.summary);
|
|
2077
|
+
* eventSource.close();
|
|
2078
|
+
* });
|
|
2079
|
+
*
|
|
2080
|
+
* // Listen for progress
|
|
2081
|
+
* eventSource.addEventListener('job.progress', (event) => {
|
|
2082
|
+
* const data = JSON.parse(event.data);
|
|
2083
|
+
* console.log(`Progress: ${data.progress}%`);
|
|
2084
|
+
* });
|
|
2085
|
+
*
|
|
2086
|
+
* // Listen for errors
|
|
2087
|
+
* eventSource.addEventListener('job.failed', (event) => {
|
|
2088
|
+
* const data = JSON.parse(event.data);
|
|
2089
|
+
* console.error('Job failed:', data.error);
|
|
2090
|
+
* eventSource.close();
|
|
2091
|
+
* });
|
|
2092
|
+
* ```
|
|
2093
|
+
*/
|
|
2094
|
+
subscribeToJob(jobId, token, options) {
|
|
2095
|
+
if (typeof EventSource === "undefined") {
|
|
2096
|
+
throw new Error(
|
|
2097
|
+
"EventSource is not available. SSE subscriptions only work in browsers. For Node.js, use polling via client.status.get() instead."
|
|
2098
|
+
);
|
|
2099
|
+
}
|
|
2100
|
+
const url = new URL(`/v1/events/${jobId}`, this.ctx.baseUrl);
|
|
2101
|
+
url.searchParams.set("token", token);
|
|
2102
|
+
const eventSource = new EventSource(url.toString());
|
|
2103
|
+
if (options?.onEvent) {
|
|
2104
|
+
eventSource.onmessage = options.onEvent;
|
|
2105
|
+
}
|
|
2106
|
+
if (options?.onError) {
|
|
2107
|
+
eventSource.onerror = (event) => {
|
|
2108
|
+
options.onError(
|
|
2109
|
+
new Error("SSE connection error. Will auto-reconnect if enabled.")
|
|
2110
|
+
);
|
|
2111
|
+
};
|
|
2112
|
+
}
|
|
2113
|
+
if (options?.onOpen) {
|
|
2114
|
+
eventSource.onopen = options.onOpen;
|
|
2115
|
+
}
|
|
2116
|
+
return eventSource;
|
|
2117
|
+
}
|
|
2118
|
+
/**
|
|
2119
|
+
* Helper: Wait for job completion via SSE
|
|
2120
|
+
*
|
|
2121
|
+
* Convenience method that subscribes to a job and resolves when complete.
|
|
2122
|
+
* Automatically handles token generation and cleanup.
|
|
2123
|
+
*
|
|
2124
|
+
* @param jobId - Job ID to wait for
|
|
2125
|
+
* @param onProgress - Optional progress callback
|
|
2126
|
+
*
|
|
2127
|
+
* @example
|
|
2128
|
+
* ```ts
|
|
2129
|
+
* const job = await client.scrape.create({ url: "https://example.com" });
|
|
2130
|
+
*
|
|
2131
|
+
* // Wait for completion with progress updates
|
|
2132
|
+
* const result = await client.events.waitForCompletion(job.jobId, (progress) => {
|
|
2133
|
+
* console.log(`Progress: ${progress}%`);
|
|
2134
|
+
* });
|
|
2135
|
+
*
|
|
2136
|
+
* console.log('Job completed:', result);
|
|
2137
|
+
* ```
|
|
2138
|
+
*/
|
|
2139
|
+
async waitForCompletion(jobId, onProgress) {
|
|
2140
|
+
if (typeof EventSource === "undefined") {
|
|
2141
|
+
throw new Error(
|
|
2142
|
+
"waitForCompletion() only works in browsers. For Node.js, use client.waitForResult() instead."
|
|
2143
|
+
);
|
|
2144
|
+
}
|
|
2145
|
+
return new Promise(async (resolve, reject) => {
|
|
2146
|
+
const tokenResponse = await fetch(
|
|
2147
|
+
`${this.ctx.baseUrl}/v1/auth/sse-token`,
|
|
2148
|
+
{
|
|
2149
|
+
method: "POST",
|
|
2150
|
+
headers: {
|
|
2151
|
+
Authorization: `Bearer ${this.ctx.apiKey}`,
|
|
2152
|
+
"Content-Type": "application/json"
|
|
2153
|
+
},
|
|
2154
|
+
body: JSON.stringify({ jobId })
|
|
2155
|
+
}
|
|
2156
|
+
);
|
|
2157
|
+
if (!tokenResponse.ok) {
|
|
2158
|
+
reject(new Error("Failed to generate SSE token"));
|
|
2159
|
+
return;
|
|
2160
|
+
}
|
|
2161
|
+
const { token } = await tokenResponse.json();
|
|
2162
|
+
const eventSource = this.subscribeToJob(jobId, token, {
|
|
2163
|
+
onError: (error) => {
|
|
2164
|
+
eventSource.close();
|
|
2165
|
+
reject(error);
|
|
2166
|
+
}
|
|
2167
|
+
});
|
|
2168
|
+
eventSource.addEventListener("job.progress", (event) => {
|
|
2169
|
+
const data = JSON.parse(event.data);
|
|
2170
|
+
if (onProgress) {
|
|
2171
|
+
onProgress(data.progress);
|
|
2172
|
+
}
|
|
2173
|
+
});
|
|
2174
|
+
eventSource.addEventListener("job.completed", (event) => {
|
|
2175
|
+
const data = JSON.parse(event.data);
|
|
2176
|
+
eventSource.close();
|
|
2177
|
+
resolve(data);
|
|
2178
|
+
});
|
|
2179
|
+
eventSource.addEventListener("job.failed", (event) => {
|
|
2180
|
+
const data = JSON.parse(event.data);
|
|
2181
|
+
eventSource.close();
|
|
2182
|
+
reject(new Error(data.error?.message || "Job failed"));
|
|
2183
|
+
});
|
|
2184
|
+
eventSource.addEventListener("job.cancelled", (event) => {
|
|
2185
|
+
eventSource.close();
|
|
2186
|
+
reject(new Error("Job was cancelled"));
|
|
2187
|
+
});
|
|
2188
|
+
});
|
|
2189
|
+
}
|
|
2190
|
+
};
|
|
2191
|
+
|
|
1723
2192
|
// src/resources/extract.ts
|
|
1724
2193
|
var ExtractResource = class {
|
|
1725
2194
|
constructor(ctx) {
|
|
@@ -2160,6 +2629,7 @@ var ScrapeResource = class {
|
|
|
2160
2629
|
async create(options) {
|
|
2161
2630
|
const body = {
|
|
2162
2631
|
url: options.url,
|
|
2632
|
+
noStore: options.noStore,
|
|
2163
2633
|
detectSignals: options.detectSignals ?? true,
|
|
2164
2634
|
extractWithAI: options.extractWithAI,
|
|
2165
2635
|
extractDeal: options.extractDeal,
|
|
@@ -2172,7 +2642,10 @@ var ScrapeResource = class {
|
|
|
2172
2642
|
excludeSelectors: options.excludeSelectors,
|
|
2173
2643
|
onlyMainContent: options.onlyMainContent,
|
|
2174
2644
|
headers: options.headers,
|
|
2175
|
-
timeout: options.timeout
|
|
2645
|
+
timeout: options.timeout,
|
|
2646
|
+
outputMarkdown: options.outputMarkdown,
|
|
2647
|
+
markdownBaseUrl: options.markdownBaseUrl,
|
|
2648
|
+
actions: options.actions
|
|
2176
2649
|
};
|
|
2177
2650
|
const result = await post(this.ctx, "/v1/scrape", body);
|
|
2178
2651
|
return result.data;
|
|
@@ -2248,7 +2721,8 @@ var ScrapeResource = class {
|
|
|
2248
2721
|
* { url: "https://shop1.com/product1" },
|
|
2249
2722
|
* { url: "https://shop2.com/deal", extractDeal: true }
|
|
2250
2723
|
* ],
|
|
2251
|
-
* defaults: { detectSignals: true }
|
|
2724
|
+
* defaults: { detectSignals: true },
|
|
2725
|
+
* ignoreInvalidURLs: true
|
|
2252
2726
|
* });
|
|
2253
2727
|
* console.log(batch.batchId, batch.results);
|
|
2254
2728
|
* ```
|
|
@@ -2259,7 +2733,8 @@ var ScrapeResource = class {
|
|
|
2259
2733
|
defaults: options.defaults,
|
|
2260
2734
|
webhookUrl: options.webhookUrl,
|
|
2261
2735
|
priority: options.priority,
|
|
2262
|
-
delayMs: options.
|
|
2736
|
+
delayMs: options.delayMs,
|
|
2737
|
+
ignoreInvalidURLs: options.ignoreInvalidURLs
|
|
2263
2738
|
};
|
|
2264
2739
|
const result = await post(
|
|
2265
2740
|
this.ctx,
|
|
@@ -2309,6 +2784,57 @@ var ScrapeResource = class {
|
|
|
2309
2784
|
}
|
|
2310
2785
|
};
|
|
2311
2786
|
|
|
2787
|
+
// src/resources/screenshots.ts
|
|
2788
|
+
var ScreenshotsResource = class {
|
|
2789
|
+
constructor(ctx) {
|
|
2790
|
+
this.ctx = ctx;
|
|
2791
|
+
}
|
|
2792
|
+
/**
|
|
2793
|
+
* Refresh a signed URL before expiration
|
|
2794
|
+
*
|
|
2795
|
+
* @example
|
|
2796
|
+
* ```ts
|
|
2797
|
+
* const refreshed = await client.screenshots.refresh({
|
|
2798
|
+
* path: "job_abc123/1234567890_nanoid_example.png",
|
|
2799
|
+
* ttl: 604800 // 7 days
|
|
2800
|
+
* });
|
|
2801
|
+
* console.log(refreshed.url); // New signed URL
|
|
2802
|
+
* console.log(refreshed.expiresAt); // "2026-01-25T12:00:00Z"
|
|
2803
|
+
* console.log(refreshed.tierLimits); // { min: 3600, max: 604800, default: 604800 }
|
|
2804
|
+
* ```
|
|
2805
|
+
*/
|
|
2806
|
+
async refresh(options) {
|
|
2807
|
+
const result = await post(
|
|
2808
|
+
this.ctx,
|
|
2809
|
+
"/v1/screenshots/refresh",
|
|
2810
|
+
{
|
|
2811
|
+
path: options.path,
|
|
2812
|
+
ttl: options.ttl,
|
|
2813
|
+
bucket: options.bucket
|
|
2814
|
+
}
|
|
2815
|
+
);
|
|
2816
|
+
return result.data;
|
|
2817
|
+
}
|
|
2818
|
+
/**
|
|
2819
|
+
* Get TTL limits for the current tier
|
|
2820
|
+
*
|
|
2821
|
+
* @example
|
|
2822
|
+
* ```ts
|
|
2823
|
+
* const limits = await client.screenshots.getLimits();
|
|
2824
|
+
* console.log(limits.tier); // "pro"
|
|
2825
|
+
* console.log(limits.limits.max); // 604800 (7 days in seconds)
|
|
2826
|
+
* console.log(limits.formattedLimits.max); // "7 days"
|
|
2827
|
+
* ```
|
|
2828
|
+
*/
|
|
2829
|
+
async getLimits() {
|
|
2830
|
+
const result = await get(
|
|
2831
|
+
this.ctx,
|
|
2832
|
+
"/v1/screenshots/limits"
|
|
2833
|
+
);
|
|
2834
|
+
return result.data;
|
|
2835
|
+
}
|
|
2836
|
+
};
|
|
2837
|
+
|
|
2312
2838
|
// src/resources/search.ts
|
|
2313
2839
|
var SearchResource = class {
|
|
2314
2840
|
constructor(ctx) {
|
|
@@ -2321,7 +2847,7 @@ var SearchResource = class {
|
|
|
2321
2847
|
* ```ts
|
|
2322
2848
|
* const result = await client.search.create({
|
|
2323
2849
|
* query: "laptop deals black friday",
|
|
2324
|
-
*
|
|
2850
|
+
* limit: 20,
|
|
2325
2851
|
* useDealScoring: true
|
|
2326
2852
|
* });
|
|
2327
2853
|
* ```
|
|
@@ -2329,9 +2855,9 @@ var SearchResource = class {
|
|
|
2329
2855
|
async create(options) {
|
|
2330
2856
|
const body = {
|
|
2331
2857
|
query: options.query,
|
|
2332
|
-
limit: options.
|
|
2333
|
-
scrapeResults: options.
|
|
2334
|
-
maxScrapeResults: options.
|
|
2858
|
+
limit: options.limit,
|
|
2859
|
+
scrapeResults: options.scrapeResults,
|
|
2860
|
+
maxScrapeResults: options.maxScrapeResults,
|
|
2335
2861
|
useAiOptimization: options.useAiOptimization,
|
|
2336
2862
|
aiProvider: options.aiProvider,
|
|
2337
2863
|
aiModel: options.aiModel,
|
|
@@ -2385,7 +2911,7 @@ var SearchResource = class {
|
|
|
2385
2911
|
* @example
|
|
2386
2912
|
* ```ts
|
|
2387
2913
|
* const result = await client.search.andScrape("promo codes", {
|
|
2388
|
-
*
|
|
2914
|
+
* maxScrapeResults: 5
|
|
2389
2915
|
* });
|
|
2390
2916
|
* console.log(result.data.scrapedJobIds);
|
|
2391
2917
|
* ```
|
|
@@ -2393,7 +2919,7 @@ var SearchResource = class {
|
|
|
2393
2919
|
async andScrape(query, options) {
|
|
2394
2920
|
return this.create({
|
|
2395
2921
|
query,
|
|
2396
|
-
|
|
2922
|
+
scrapeResults: true,
|
|
2397
2923
|
...options
|
|
2398
2924
|
});
|
|
2399
2925
|
}
|
|
@@ -2807,6 +3333,20 @@ var DealCrawl = class {
|
|
|
2807
3333
|
* ```
|
|
2808
3334
|
*/
|
|
2809
3335
|
dork;
|
|
3336
|
+
/**
|
|
3337
|
+
* Convert resource - HTML to Markdown conversion
|
|
3338
|
+
*
|
|
3339
|
+
* @example
|
|
3340
|
+
* ```ts
|
|
3341
|
+
* const result = await client.convert.htmlToMarkdown({
|
|
3342
|
+
* html: "<h1>Title</h1><p>Content</p>",
|
|
3343
|
+
* baseUrl: "https://example.com",
|
|
3344
|
+
* options: { removeNoise: true }
|
|
3345
|
+
* });
|
|
3346
|
+
* console.log(result.data.markdown);
|
|
3347
|
+
* ```
|
|
3348
|
+
*/
|
|
3349
|
+
convert;
|
|
2810
3350
|
/**
|
|
2811
3351
|
* Agent resource - AI-powered autonomous web navigation
|
|
2812
3352
|
*
|
|
@@ -2888,6 +3428,80 @@ var DealCrawl = class {
|
|
|
2888
3428
|
* ```
|
|
2889
3429
|
*/
|
|
2890
3430
|
account;
|
|
3431
|
+
/**
|
|
3432
|
+
* Screenshots resource - Screenshot signed URL management
|
|
3433
|
+
*
|
|
3434
|
+
* @example
|
|
3435
|
+
* ```ts
|
|
3436
|
+
* // Refresh a signed URL before expiration
|
|
3437
|
+
* const refreshed = await client.screenshots.refresh({
|
|
3438
|
+
* path: "job_abc123/1234567890_nanoid_example.png",
|
|
3439
|
+
* ttl: 604800 // 7 days
|
|
3440
|
+
* });
|
|
3441
|
+
*
|
|
3442
|
+
* // Get tier-specific TTL limits
|
|
3443
|
+
* const limits = await client.screenshots.getLimits();
|
|
3444
|
+
* console.log(limits.formattedLimits.max); // "7 days"
|
|
3445
|
+
* ```
|
|
3446
|
+
*/
|
|
3447
|
+
screenshots;
|
|
3448
|
+
/**
|
|
3449
|
+
* Auth resource - SSE (Server-Sent Events) authentication
|
|
3450
|
+
*
|
|
3451
|
+
* @example
|
|
3452
|
+
* ```ts
|
|
3453
|
+
* // Generate SSE token for browser EventSource
|
|
3454
|
+
* const { token, expiresAt } = await client.auth.generateSSEToken();
|
|
3455
|
+
*
|
|
3456
|
+
* // Use in browser
|
|
3457
|
+
* const eventSource = new EventSource(`/v1/events?token=${token}`);
|
|
3458
|
+
*
|
|
3459
|
+
* // Generate token for specific job
|
|
3460
|
+
* const jobToken = await client.auth.generateSSEToken({ jobId: "job_123" });
|
|
3461
|
+
*
|
|
3462
|
+
* // Check connection limits
|
|
3463
|
+
* const limits = await client.auth.getLimits();
|
|
3464
|
+
* console.log(`Available connections: ${limits.sse.available}`);
|
|
3465
|
+
* ```
|
|
3466
|
+
*/
|
|
3467
|
+
auth;
|
|
3468
|
+
/**
|
|
3469
|
+
* Events resource - Real-time SSE event streaming (Browser only)
|
|
3470
|
+
*
|
|
3471
|
+
* IMPORTANT: This resource only works in browsers. For Node.js, use polling via client.status.get()
|
|
3472
|
+
*
|
|
3473
|
+
* @example Browser Usage
|
|
3474
|
+
* ```ts
|
|
3475
|
+
* // 1. Generate SSE token
|
|
3476
|
+
* const { token } = await client.auth.generateSSEToken();
|
|
3477
|
+
*
|
|
3478
|
+
* // 2. Subscribe to all events
|
|
3479
|
+
* const eventSource = client.events.subscribe(token, {
|
|
3480
|
+
* onEvent: (event) => {
|
|
3481
|
+
* console.log('Event:', event.type, JSON.parse(event.data));
|
|
3482
|
+
* }
|
|
3483
|
+
* });
|
|
3484
|
+
*
|
|
3485
|
+
* // 3. Or subscribe to specific job
|
|
3486
|
+
* const jobEvents = client.events.subscribeToJob('job_123', token, {
|
|
3487
|
+
* onEvent: (event) => {
|
|
3488
|
+
* const data = JSON.parse(event.data);
|
|
3489
|
+
* console.log(`Progress: ${data.progress}%`);
|
|
3490
|
+
* }
|
|
3491
|
+
* });
|
|
3492
|
+
*
|
|
3493
|
+
* // 4. Listen for specific events
|
|
3494
|
+
* eventSource.addEventListener('job.completed', (event) => {
|
|
3495
|
+
* const data = JSON.parse(event.data);
|
|
3496
|
+
* console.log('Job completed!', data.summary);
|
|
3497
|
+
* eventSource.close();
|
|
3498
|
+
* });
|
|
3499
|
+
*
|
|
3500
|
+
* // 5. Clean up
|
|
3501
|
+
* eventSource.close();
|
|
3502
|
+
* ```
|
|
3503
|
+
*/
|
|
3504
|
+
events;
|
|
2891
3505
|
// ============================================
|
|
2892
3506
|
// CONSTRUCTOR
|
|
2893
3507
|
// ============================================
|
|
@@ -2938,12 +3552,16 @@ var DealCrawl = class {
|
|
|
2938
3552
|
this.crawl = new CrawlResource(this.ctx);
|
|
2939
3553
|
this.extract = new ExtractResource(this.ctx);
|
|
2940
3554
|
this.dork = new DorkResource(this.ctx);
|
|
3555
|
+
this.convert = new ConvertResource(this.ctx);
|
|
2941
3556
|
this.agent = new AgentResource(this.ctx);
|
|
2942
3557
|
this.status = new StatusResource(this.ctx);
|
|
2943
3558
|
this.data = new DataResource(this.ctx);
|
|
2944
3559
|
this.webhooks = new WebhooksResource(this.ctx);
|
|
2945
3560
|
this.keys = new KeysResource(this.ctx);
|
|
2946
3561
|
this.account = new AccountResource(this.ctx);
|
|
3562
|
+
this.screenshots = new ScreenshotsResource(this.ctx);
|
|
3563
|
+
this.auth = new AuthResource(this.ctx);
|
|
3564
|
+
this.events = new EventsResource(this.ctx);
|
|
2947
3565
|
}
|
|
2948
3566
|
// ============================================
|
|
2949
3567
|
// POLLING METHODS
|
|
@@ -3087,6 +3705,6 @@ var DealCrawl = class {
|
|
|
3087
3705
|
}
|
|
3088
3706
|
};
|
|
3089
3707
|
|
|
3090
|
-
export { AccountResource, AgentResource, CrawlResource, DEFAULT_CONFIG, DataResource, DealCrawl, DealCrawlError, DorkResource, ERROR_CODES, ERROR_MESSAGES, ExtractResource, KeysResource, ScrapeResource, SearchResource, StatusResource, WebhooksResource, DealCrawl as default, getErrorMessage, pollUntil, waitForAll, waitForAny, waitForResult };
|
|
3708
|
+
export { AccountResource, AgentResource, AuthResource, ConvertResource, CrawlResource, DEFAULT_CONFIG, DataResource, DealCrawl, DealCrawlError, DorkResource, ERROR_CODES, ERROR_MESSAGES, EventsResource, ExtractResource, KeysResource, ScrapeResource, ScreenshotsResource, SearchResource, StatusResource, WebhooksResource, DealCrawl as default, getErrorMessage, pollUntil, waitForAll, waitForAny, waitForResult };
|
|
3091
3709
|
//# sourceMappingURL=index.mjs.map
|
|
3092
3710
|
//# sourceMappingURL=index.mjs.map
|