scrapfly-sdk 0.6.2 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/esm/client.d.ts +13 -13
- package/esm/client.d.ts.map +1 -1
- package/esm/client.js +26 -24
- package/esm/polyfill.d.ts +6 -0
- package/esm/polyfill.d.ts.map +1 -0
- package/esm/polyfill.js +20 -0
- package/esm/utils.d.ts +7 -1
- package/esm/utils.d.ts.map +1 -1
- package/esm/utils.js +6 -2
- package/package.json +1 -1
- package/script/client.d.ts +13 -13
- package/script/client.d.ts.map +1 -1
- package/script/client.js +26 -24
- package/script/polyfill.d.ts +6 -0
- package/script/polyfill.d.ts.map +1 -0
- package/script/polyfill.js +48 -0
- package/script/utils.d.ts +7 -1
- package/script/utils.d.ts.map +1 -1
- package/script/utils.js +6 -2
package/README.md
CHANGED
|
@@ -24,8 +24,10 @@ The SDK is distributed through:
|
|
|
24
24
|
3. Start scraping: 🚀
|
|
25
25
|
|
|
26
26
|
```javascript
|
|
27
|
-
// node
|
|
27
|
+
// node
|
|
28
28
|
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
|
|
29
|
+
// bun
|
|
30
|
+
import { ScrapflyClient, ScrapeConfig} from '@scrapfly/scrapfly-sdk';
|
|
29
31
|
// deno:
|
|
30
32
|
import { ScrapflyClient, ScrapeConfig } from 'jsr:@scrapfly/scrapfly-sdk';
|
|
31
33
|
|
package/esm/client.d.ts
CHANGED
|
@@ -30,21 +30,21 @@ export declare class ScrapflyClient {
|
|
|
30
30
|
*/
|
|
31
31
|
scrape(config: ScrapeConfig): Promise<ScrapeResult>;
|
|
32
32
|
/**
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
Concurrently scrape multiple configs
|
|
34
|
+
This is a async generator call it like this:
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
36
|
+
const results = [];
|
|
37
|
+
const errors = [];
|
|
38
|
+
for await (const resultOrError of client.concurrentScrape(configs)) {
|
|
39
|
+
if (resultOrError instanceof Error) {
|
|
40
|
+
errors.push(resultOrError);
|
|
41
|
+
} else {
|
|
42
|
+
results.push(resultOrError);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
45
|
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
@param concurrencyLimit: if not set it will be taken from your account info
|
|
47
|
+
*/
|
|
48
48
|
concurrentScrape(configs: ScrapeConfig[], concurrencyLimit?: number): AsyncGenerator<ScrapeResult | Error | undefined, void, undefined>;
|
|
49
49
|
/**
|
|
50
50
|
* Save screenshot response to a file
|
package/esm/client.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../src/client.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,KAAK,MAAM,MAAM,aAAa,CAAC;AACtC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,KAAK,WAAW,EAAE,gBAAgB,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIjG,qBAAa,cAAc;IAClB,IAAI,SAA6B;IACxC,OAAO,CAAC,GAAG,CAAS;IACpB,OAAO,CAAC,EAAE,CAAS;IACnB,KAAK,oBAAc;gBAEP,OAAO,EAAE;QAAE,GAAG,EAAE,MAAM,CAAA;KAAE;IAQpC;;OAEG;IACH,SAAS,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,GAAG,MAAM,CAAC,aAAa;IAkEzE;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,GAAG,YAAY;IAUtE;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../src/client.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,KAAK,MAAM,MAAM,aAAa,CAAC;AACtC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,KAAK,WAAW,EAAE,gBAAgB,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIjG,qBAAa,cAAc;IAClB,IAAI,SAA6B;IACxC,OAAO,CAAC,GAAG,CAAS;IACpB,OAAO,CAAC,EAAE,CAAS;IACnB,KAAK,oBAAc;gBAEP,OAAO,EAAE;QAAE,GAAG,EAAE,MAAM,CAAA;KAAE;IAQpC;;OAEG;IACH,SAAS,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,GAAG,MAAM,CAAC,aAAa;IAkEzE;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,GAAG,YAAY;IAUtE;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,WAAW,CAAC;IA8BrC;;OAEG;IACG,MAAM,CAAC,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IA4CzD;;;;;;;;;;;;;;;MAeE;IACK,gBAAgB,CACrB,OAAO,EAAE,YAAY,EAAE,EACvB,gBAAgB,CAAC,EAAE,MAAM,GACxB,cAAc,CAAC,YAAY,GAAG,KAAK,GAAG,SAAS,EAAE,IAAI,EAAE,SAAS,CAAC;IAqCpE;;OAEG;IACG,cAAc,CAAC,MAAM,EAAE,gBAAgB,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC;IAoB7F;;OAEG;IACG,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAkB7E;;OAEG;IACG,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAuBrE;;OAEG;IACG,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAkB7E;;OAEG;IACG,OAAO,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAC;CA6BnE"}
|
package/esm/client.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import * as dntShim from "./_dnt.shims.js";
|
|
2
1
|
import { path } from './deps.js';
|
|
2
|
+
import { mkdir, writeFile } from './polyfill.js';
|
|
3
3
|
import { fetchRetry } from './utils.js';
|
|
4
4
|
import * as errors from './errors.js';
|
|
5
5
|
import { ExtractionResult, ScrapeResult, ScreenshotResult } from './result.js';
|
|
@@ -127,14 +127,15 @@ export class ScrapflyClient {
|
|
|
127
127
|
const url = new URL(this.HOST + '/account');
|
|
128
128
|
const params = { key: this.key };
|
|
129
129
|
url.search = new URLSearchParams(params).toString();
|
|
130
|
-
response = await this.fetch(
|
|
130
|
+
response = await this.fetch({
|
|
131
|
+
url: url.toString(),
|
|
131
132
|
method: 'GET',
|
|
132
133
|
headers: {
|
|
133
134
|
'user-agent': this.ua,
|
|
134
135
|
'accept-encoding': 'gzip, deflate, br',
|
|
135
136
|
accept: 'application/json',
|
|
136
137
|
},
|
|
137
|
-
})
|
|
138
|
+
});
|
|
138
139
|
}
|
|
139
140
|
catch (e) {
|
|
140
141
|
log.error('error', e);
|
|
@@ -159,7 +160,8 @@ export class ScrapflyClient {
|
|
|
159
160
|
const url = new URL(this.HOST + '/scrape');
|
|
160
161
|
const params = config.toApiParams({ key: this.key });
|
|
161
162
|
url.search = new URLSearchParams(params).toString();
|
|
162
|
-
response = await this.fetch(
|
|
163
|
+
response = await this.fetch({
|
|
164
|
+
url: url.toString(),
|
|
163
165
|
method: config.method,
|
|
164
166
|
headers: {
|
|
165
167
|
'user-agent': this.ua,
|
|
@@ -170,7 +172,7 @@ export class ScrapflyClient {
|
|
|
170
172
|
accept: 'application/json',
|
|
171
173
|
},
|
|
172
174
|
body: config.body,
|
|
173
|
-
})
|
|
175
|
+
});
|
|
174
176
|
}
|
|
175
177
|
catch (e) {
|
|
176
178
|
log.error('error', e);
|
|
@@ -193,21 +195,21 @@ export class ScrapflyClient {
|
|
|
193
195
|
return result;
|
|
194
196
|
}
|
|
195
197
|
/**
|
|
196
|
-
|
|
197
|
-
|
|
198
|
+
Concurrently scrape multiple configs
|
|
199
|
+
This is a async generator call it like this:
|
|
198
200
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
201
|
+
const results = [];
|
|
202
|
+
const errors = [];
|
|
203
|
+
for await (const resultOrError of client.concurrentScrape(configs)) {
|
|
204
|
+
if (resultOrError instanceof Error) {
|
|
205
|
+
errors.push(resultOrError);
|
|
206
|
+
} else {
|
|
207
|
+
results.push(resultOrError);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
208
210
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
+
@param concurrencyLimit: if not set it will be taken from your account info
|
|
212
|
+
*/
|
|
211
213
|
async *concurrentScrape(configs, concurrencyLimit) {
|
|
212
214
|
if (concurrencyLimit === undefined) {
|
|
213
215
|
const account = await this.account();
|
|
@@ -250,7 +252,7 @@ export class ScrapflyClient {
|
|
|
250
252
|
const extension_name = result.metadata.extension_name;
|
|
251
253
|
let file_path;
|
|
252
254
|
if (savePath) {
|
|
253
|
-
|
|
255
|
+
await mkdir(savePath, { recursive: true });
|
|
254
256
|
file_path = path.join(savePath, `${name}.${extension_name}`);
|
|
255
257
|
}
|
|
256
258
|
else {
|
|
@@ -258,7 +260,7 @@ export class ScrapflyClient {
|
|
|
258
260
|
}
|
|
259
261
|
const content = new Uint8Array(result.image);
|
|
260
262
|
// Use Deno's write file method
|
|
261
|
-
await
|
|
263
|
+
await writeFile(file_path, content);
|
|
262
264
|
}
|
|
263
265
|
/**
|
|
264
266
|
* Turn scrapfly screenshot API response to ScreenshotResult or raise one of ScrapflyError
|
|
@@ -289,7 +291,8 @@ export class ScrapflyClient {
|
|
|
289
291
|
const url = new URL(this.HOST + '/screenshot');
|
|
290
292
|
const params = config.toApiParams({ key: this.key });
|
|
291
293
|
url.search = new URLSearchParams(params).toString();
|
|
292
|
-
|
|
294
|
+
response = await this.fetch({
|
|
295
|
+
url: url.toString(),
|
|
293
296
|
method: 'GET',
|
|
294
297
|
headers: {
|
|
295
298
|
'user-agent': this.ua,
|
|
@@ -297,7 +300,6 @@ export class ScrapflyClient {
|
|
|
297
300
|
accept: 'application/json',
|
|
298
301
|
},
|
|
299
302
|
});
|
|
300
|
-
response = await this.fetch(req);
|
|
301
303
|
}
|
|
302
304
|
catch (e) {
|
|
303
305
|
log.error('error', e);
|
|
@@ -345,12 +347,12 @@ export class ScrapflyClient {
|
|
|
345
347
|
if (config.document_compression_format && config.document_compression_format) {
|
|
346
348
|
headers['content-encoding'] = config.document_compression_format;
|
|
347
349
|
}
|
|
348
|
-
|
|
350
|
+
response = await this.fetch({
|
|
351
|
+
url: url.toString(),
|
|
349
352
|
method: 'POST',
|
|
350
353
|
headers: headers,
|
|
351
354
|
body: config.body,
|
|
352
355
|
});
|
|
353
|
-
response = await this.fetch(req);
|
|
354
356
|
}
|
|
355
357
|
catch (e) {
|
|
356
358
|
log.error('error', e);
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
import * as dntShim from "./_dnt.shims.js";
|
|
3
|
+
export declare const isDeno: boolean;
|
|
4
|
+
export declare function mkdir(path: string | URL, options: dntShim.Deno.MkdirOptions): Promise<void>;
|
|
5
|
+
export declare function writeFile(path: string | URL, data: Uint8Array, options?: dntShim.Deno.WriteFileOptions): Promise<void>;
|
|
6
|
+
//# sourceMappingURL=polyfill.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"polyfill.d.ts","sourceRoot":"","sources":["../src/polyfill.ts"],"names":[],"mappings":";AAAA,OAAO,KAAK,OAAO,MAAM,iBAAiB,CAAC;AAC3C,eAAO,MAAM,MAAM,SAAsC,CAAC;AAE1D,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,EAAE,OAAO,EAAE,OAAO,CAAC,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAOjG;AAED,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,GAAG,GAAG,EAClB,IAAI,EAAE,UAAU,EAChB,OAAO,GAAE,OAAO,CAAC,IAAI,CAAC,gBAAqB,GAC1C,OAAO,CAAC,IAAI,CAAC,CAOf"}
|
package/esm/polyfill.js
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import * as dntShim from "./_dnt.shims.js";
|
|
2
|
+
export const isDeno = typeof dntShim.Deno !== 'undefined';
|
|
3
|
+
export async function mkdir(path, options) {
|
|
4
|
+
if (isDeno) {
|
|
5
|
+
await dntShim.Deno.mkdir(path, options);
|
|
6
|
+
}
|
|
7
|
+
else {
|
|
8
|
+
// @ts-ignore: type for Bun
|
|
9
|
+
await Bun.mkdir(path.toString(), options);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
export async function writeFile(path, data, options = {}) {
|
|
13
|
+
if (isDeno) {
|
|
14
|
+
await dntShim.Deno.writeFile(path, data, options);
|
|
15
|
+
}
|
|
16
|
+
else {
|
|
17
|
+
// @ts-ignore: type for Bun
|
|
18
|
+
await Bun.write(path.toString(), data, options);
|
|
19
|
+
}
|
|
20
|
+
}
|
package/esm/utils.d.ts
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
/// <reference types="node" />
|
|
2
2
|
export declare function urlsafe_b64encode(data: string): string;
|
|
3
|
-
export
|
|
3
|
+
export type RequestOptions = {
|
|
4
|
+
url: string;
|
|
5
|
+
method?: string;
|
|
6
|
+
headers?: any;
|
|
7
|
+
body?: string | Uint8Array;
|
|
8
|
+
};
|
|
9
|
+
export declare function fetchRetry(config: RequestOptions, retries?: number, retryDelay?: number): Promise<Response>;
|
|
4
10
|
//# sourceMappingURL=utils.d.ts.map
|
package/esm/utils.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":";
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":";AAEA,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAQtD;AAED,MAAM,MAAM,cAAc,GAAG;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,GAAG,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,GAAG,UAAU,CAAC;CAC5B,CAAC;AAEF,wBAAsB,UAAU,CAC9B,MAAM,EAAE,cAAc,EACtB,OAAO,GAAE,MAAU,EACnB,UAAU,GAAE,MAAa,GACxB,OAAO,CAAC,QAAQ,CAAC,CA+BnB"}
|
package/esm/utils.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { log } from './logger.js';
|
|
1
2
|
export function urlsafe_b64encode(data) {
|
|
2
3
|
const encoder = new TextEncoder();
|
|
3
4
|
const encoded = encoder.encode(data);
|
|
@@ -11,12 +12,15 @@ export async function fetchRetry(config, retries = 3, retryDelay = 1000) {
|
|
|
11
12
|
let lastError = null;
|
|
12
13
|
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
13
14
|
try {
|
|
14
|
-
// XXX:
|
|
15
|
-
const
|
|
15
|
+
// XXX: note that cloudflare workers don't support init options
|
|
16
|
+
const { url, ...reqInit } = config;
|
|
17
|
+
const response = await fetch(new Request(url, reqInit));
|
|
16
18
|
// retry 5xx status codes
|
|
17
19
|
if (response.status >= 500 && response.status < 600) {
|
|
20
|
+
const _text = await response.text(); // consume response to prevent leak
|
|
18
21
|
lastError = new Error(`Fetch failed with status: ${response.status}`);
|
|
19
22
|
if (attempt < retries) {
|
|
23
|
+
log.debug(`request failed ${response.status} (${_text}): retry ${attempt}/${retries} after ${retryDelay}ms`);
|
|
20
24
|
await new Promise((resolve) => setTimeout(resolve, retryDelay));
|
|
21
25
|
}
|
|
22
26
|
}
|
package/package.json
CHANGED
package/script/client.d.ts
CHANGED
|
@@ -30,21 +30,21 @@ export declare class ScrapflyClient {
|
|
|
30
30
|
*/
|
|
31
31
|
scrape(config: ScrapeConfig): Promise<ScrapeResult>;
|
|
32
32
|
/**
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
Concurrently scrape multiple configs
|
|
34
|
+
This is a async generator call it like this:
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
36
|
+
const results = [];
|
|
37
|
+
const errors = [];
|
|
38
|
+
for await (const resultOrError of client.concurrentScrape(configs)) {
|
|
39
|
+
if (resultOrError instanceof Error) {
|
|
40
|
+
errors.push(resultOrError);
|
|
41
|
+
} else {
|
|
42
|
+
results.push(resultOrError);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
45
|
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
@param concurrencyLimit: if not set it will be taken from your account info
|
|
47
|
+
*/
|
|
48
48
|
concurrentScrape(configs: ScrapeConfig[], concurrencyLimit?: number): AsyncGenerator<ScrapeResult | Error | undefined, void, undefined>;
|
|
49
49
|
/**
|
|
50
50
|
* Save screenshot response to a file
|
package/script/client.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../src/client.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,KAAK,MAAM,MAAM,aAAa,CAAC;AACtC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,KAAK,WAAW,EAAE,gBAAgB,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIjG,qBAAa,cAAc;IAClB,IAAI,SAA6B;IACxC,OAAO,CAAC,GAAG,CAAS;IACpB,OAAO,CAAC,EAAE,CAAS;IACnB,KAAK,oBAAc;gBAEP,OAAO,EAAE;QAAE,GAAG,EAAE,MAAM,CAAA;KAAE;IAQpC;;OAEG;IACH,SAAS,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,GAAG,MAAM,CAAC,aAAa;IAkEzE;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,GAAG,YAAY;IAUtE;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../src/client.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AACxC,OAAO,KAAK,MAAM,MAAM,aAAa,CAAC;AACtC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,EAAE,KAAK,WAAW,EAAE,gBAAgB,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIjG,qBAAa,cAAc;IAClB,IAAI,SAA6B;IACxC,OAAO,CAAC,GAAG,CAAS;IACpB,OAAO,CAAC,EAAE,CAAS;IACnB,KAAK,oBAAc;gBAEP,OAAO,EAAE;QAAE,GAAG,EAAE,MAAM,CAAA;KAAE;IAQpC;;OAEG;IACH,SAAS,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,GAAG,MAAM,CAAC,aAAa;IAkEzE;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,GAAG,YAAY;IAUtE;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,WAAW,CAAC;IA8BrC;;OAEG;IACG,MAAM,CAAC,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;IA4CzD;;;;;;;;;;;;;;;MAeE;IACK,gBAAgB,CACrB,OAAO,EAAE,YAAY,EAAE,EACvB,gBAAgB,CAAC,EAAE,MAAM,GACxB,cAAc,CAAC,YAAY,GAAG,KAAK,GAAG,SAAS,EAAE,IAAI,EAAE,SAAS,CAAC;IAqCpE;;OAEG;IACG,cAAc,CAAC,MAAM,EAAE,gBAAgB,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC;IAoB7F;;OAEG;IACG,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAkB7E;;OAEG;IACG,UAAU,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAuBrE;;OAEG;IACG,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAkB7E;;OAEG;IACG,OAAO,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAC;CA6BnE"}
|
package/script/client.js
CHANGED
|
@@ -24,8 +24,8 @@ var __importStar = (this && this.__importStar) || function (mod) {
|
|
|
24
24
|
};
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
26
|
exports.ScrapflyClient = void 0;
|
|
27
|
-
const dntShim = __importStar(require("./_dnt.shims.js"));
|
|
28
27
|
const deps_js_1 = require("./deps.js");
|
|
28
|
+
const polyfill_js_1 = require("./polyfill.js");
|
|
29
29
|
const utils_js_1 = require("./utils.js");
|
|
30
30
|
const errors = __importStar(require("./errors.js"));
|
|
31
31
|
const result_js_1 = require("./result.js");
|
|
@@ -153,14 +153,15 @@ class ScrapflyClient {
|
|
|
153
153
|
const url = new URL(this.HOST + '/account');
|
|
154
154
|
const params = { key: this.key };
|
|
155
155
|
url.search = new URLSearchParams(params).toString();
|
|
156
|
-
response = await this.fetch(
|
|
156
|
+
response = await this.fetch({
|
|
157
|
+
url: url.toString(),
|
|
157
158
|
method: 'GET',
|
|
158
159
|
headers: {
|
|
159
160
|
'user-agent': this.ua,
|
|
160
161
|
'accept-encoding': 'gzip, deflate, br',
|
|
161
162
|
accept: 'application/json',
|
|
162
163
|
},
|
|
163
|
-
})
|
|
164
|
+
});
|
|
164
165
|
}
|
|
165
166
|
catch (e) {
|
|
166
167
|
logger_js_1.log.error('error', e);
|
|
@@ -185,7 +186,8 @@ class ScrapflyClient {
|
|
|
185
186
|
const url = new URL(this.HOST + '/scrape');
|
|
186
187
|
const params = config.toApiParams({ key: this.key });
|
|
187
188
|
url.search = new URLSearchParams(params).toString();
|
|
188
|
-
response = await this.fetch(
|
|
189
|
+
response = await this.fetch({
|
|
190
|
+
url: url.toString(),
|
|
189
191
|
method: config.method,
|
|
190
192
|
headers: {
|
|
191
193
|
'user-agent': this.ua,
|
|
@@ -196,7 +198,7 @@ class ScrapflyClient {
|
|
|
196
198
|
accept: 'application/json',
|
|
197
199
|
},
|
|
198
200
|
body: config.body,
|
|
199
|
-
})
|
|
201
|
+
});
|
|
200
202
|
}
|
|
201
203
|
catch (e) {
|
|
202
204
|
logger_js_1.log.error('error', e);
|
|
@@ -219,21 +221,21 @@ class ScrapflyClient {
|
|
|
219
221
|
return result;
|
|
220
222
|
}
|
|
221
223
|
/**
|
|
222
|
-
|
|
223
|
-
|
|
224
|
+
Concurrently scrape multiple configs
|
|
225
|
+
This is a async generator call it like this:
|
|
224
226
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
227
|
+
const results = [];
|
|
228
|
+
const errors = [];
|
|
229
|
+
for await (const resultOrError of client.concurrentScrape(configs)) {
|
|
230
|
+
if (resultOrError instanceof Error) {
|
|
231
|
+
errors.push(resultOrError);
|
|
232
|
+
} else {
|
|
233
|
+
results.push(resultOrError);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
234
236
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
+
@param concurrencyLimit: if not set it will be taken from your account info
|
|
238
|
+
*/
|
|
237
239
|
async *concurrentScrape(configs, concurrencyLimit) {
|
|
238
240
|
if (concurrencyLimit === undefined) {
|
|
239
241
|
const account = await this.account();
|
|
@@ -276,7 +278,7 @@ class ScrapflyClient {
|
|
|
276
278
|
const extension_name = result.metadata.extension_name;
|
|
277
279
|
let file_path;
|
|
278
280
|
if (savePath) {
|
|
279
|
-
|
|
281
|
+
await (0, polyfill_js_1.mkdir)(savePath, { recursive: true });
|
|
280
282
|
file_path = deps_js_1.path.join(savePath, `${name}.${extension_name}`);
|
|
281
283
|
}
|
|
282
284
|
else {
|
|
@@ -284,7 +286,7 @@ class ScrapflyClient {
|
|
|
284
286
|
}
|
|
285
287
|
const content = new Uint8Array(result.image);
|
|
286
288
|
// Use Deno's write file method
|
|
287
|
-
await
|
|
289
|
+
await (0, polyfill_js_1.writeFile)(file_path, content);
|
|
288
290
|
}
|
|
289
291
|
/**
|
|
290
292
|
* Turn scrapfly screenshot API response to ScreenshotResult or raise one of ScrapflyError
|
|
@@ -315,7 +317,8 @@ class ScrapflyClient {
|
|
|
315
317
|
const url = new URL(this.HOST + '/screenshot');
|
|
316
318
|
const params = config.toApiParams({ key: this.key });
|
|
317
319
|
url.search = new URLSearchParams(params).toString();
|
|
318
|
-
|
|
320
|
+
response = await this.fetch({
|
|
321
|
+
url: url.toString(),
|
|
319
322
|
method: 'GET',
|
|
320
323
|
headers: {
|
|
321
324
|
'user-agent': this.ua,
|
|
@@ -323,7 +326,6 @@ class ScrapflyClient {
|
|
|
323
326
|
accept: 'application/json',
|
|
324
327
|
},
|
|
325
328
|
});
|
|
326
|
-
response = await this.fetch(req);
|
|
327
329
|
}
|
|
328
330
|
catch (e) {
|
|
329
331
|
logger_js_1.log.error('error', e);
|
|
@@ -371,12 +373,12 @@ class ScrapflyClient {
|
|
|
371
373
|
if (config.document_compression_format && config.document_compression_format) {
|
|
372
374
|
headers['content-encoding'] = config.document_compression_format;
|
|
373
375
|
}
|
|
374
|
-
|
|
376
|
+
response = await this.fetch({
|
|
377
|
+
url: url.toString(),
|
|
375
378
|
method: 'POST',
|
|
376
379
|
headers: headers,
|
|
377
380
|
body: config.body,
|
|
378
381
|
});
|
|
379
|
-
response = await this.fetch(req);
|
|
380
382
|
}
|
|
381
383
|
catch (e) {
|
|
382
384
|
logger_js_1.log.error('error', e);
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
import * as dntShim from "./_dnt.shims.js";
|
|
3
|
+
export declare const isDeno: boolean;
|
|
4
|
+
export declare function mkdir(path: string | URL, options: dntShim.Deno.MkdirOptions): Promise<void>;
|
|
5
|
+
export declare function writeFile(path: string | URL, data: Uint8Array, options?: dntShim.Deno.WriteFileOptions): Promise<void>;
|
|
6
|
+
//# sourceMappingURL=polyfill.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"polyfill.d.ts","sourceRoot":"","sources":["../src/polyfill.ts"],"names":[],"mappings":";AAAA,OAAO,KAAK,OAAO,MAAM,iBAAiB,CAAC;AAC3C,eAAO,MAAM,MAAM,SAAsC,CAAC;AAE1D,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,EAAE,OAAO,EAAE,OAAO,CAAC,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAOjG;AAED,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,GAAG,GAAG,EAClB,IAAI,EAAE,UAAU,EAChB,OAAO,GAAE,OAAO,CAAC,IAAI,CAAC,gBAAqB,GAC1C,OAAO,CAAC,IAAI,CAAC,CAOf"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.writeFile = exports.mkdir = exports.isDeno = void 0;
|
|
27
|
+
const dntShim = __importStar(require("./_dnt.shims.js"));
|
|
28
|
+
exports.isDeno = typeof dntShim.Deno !== 'undefined';
|
|
29
|
+
async function mkdir(path, options) {
|
|
30
|
+
if (exports.isDeno) {
|
|
31
|
+
await dntShim.Deno.mkdir(path, options);
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
// @ts-ignore: type for Bun
|
|
35
|
+
await Bun.mkdir(path.toString(), options);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
exports.mkdir = mkdir;
|
|
39
|
+
async function writeFile(path, data, options = {}) {
|
|
40
|
+
if (exports.isDeno) {
|
|
41
|
+
await dntShim.Deno.writeFile(path, data, options);
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
// @ts-ignore: type for Bun
|
|
45
|
+
await Bun.write(path.toString(), data, options);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
exports.writeFile = writeFile;
|
package/script/utils.d.ts
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
/// <reference types="node" />
|
|
2
2
|
export declare function urlsafe_b64encode(data: string): string;
|
|
3
|
-
export
|
|
3
|
+
export type RequestOptions = {
|
|
4
|
+
url: string;
|
|
5
|
+
method?: string;
|
|
6
|
+
headers?: any;
|
|
7
|
+
body?: string | Uint8Array;
|
|
8
|
+
};
|
|
9
|
+
export declare function fetchRetry(config: RequestOptions, retries?: number, retryDelay?: number): Promise<Response>;
|
|
4
10
|
//# sourceMappingURL=utils.d.ts.map
|
package/script/utils.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":";
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":";AAEA,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAQtD;AAED,MAAM,MAAM,cAAc,GAAG;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,GAAG,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,GAAG,UAAU,CAAC;CAC5B,CAAC;AAEF,wBAAsB,UAAU,CAC9B,MAAM,EAAE,cAAc,EACtB,OAAO,GAAE,MAAU,EACnB,UAAU,GAAE,MAAa,GACxB,OAAO,CAAC,QAAQ,CAAC,CA+BnB"}
|
package/script/utils.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.fetchRetry = exports.urlsafe_b64encode = void 0;
|
|
4
|
+
const logger_js_1 = require("./logger.js");
|
|
4
5
|
function urlsafe_b64encode(data) {
|
|
5
6
|
const encoder = new TextEncoder();
|
|
6
7
|
const encoded = encoder.encode(data);
|
|
@@ -15,12 +16,15 @@ async function fetchRetry(config, retries = 3, retryDelay = 1000) {
|
|
|
15
16
|
let lastError = null;
|
|
16
17
|
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
17
18
|
try {
|
|
18
|
-
// XXX:
|
|
19
|
-
const
|
|
19
|
+
// XXX: note that cloudflare workers don't support init options
|
|
20
|
+
const { url, ...reqInit } = config;
|
|
21
|
+
const response = await fetch(new Request(url, reqInit));
|
|
20
22
|
// retry 5xx status codes
|
|
21
23
|
if (response.status >= 500 && response.status < 600) {
|
|
24
|
+
const _text = await response.text(); // consume response to prevent leak
|
|
22
25
|
lastError = new Error(`Fetch failed with status: ${response.status}`);
|
|
23
26
|
if (attempt < retries) {
|
|
27
|
+
logger_js_1.log.debug(`request failed ${response.status} (${_text}): retry ${attempt}/${retries} after ${retryDelay}ms`);
|
|
24
28
|
await new Promise((resolve) => setTimeout(resolve, retryDelay));
|
|
25
29
|
}
|
|
26
30
|
}
|