urllib 3.0.0-alpha.1 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/HttpClient.ts CHANGED
@@ -1,25 +1,87 @@
1
1
  import { EventEmitter } from 'events';
2
+ import { LookupFunction } from 'net';
2
3
  import { debuglog } from 'util';
3
- import { Readable, isReadable } from 'stream';
4
- import { pipeline } from 'stream/promises';
4
+ import {
5
+ createGunzip,
6
+ createBrotliDecompress,
7
+ gunzipSync,
8
+ brotliDecompressSync,
9
+ } from 'zlib';
5
10
  import { Blob } from 'buffer';
6
- import { createReadStream } from 'fs';
11
+ import { Readable, pipeline } from 'stream';
12
+ import stream from 'stream';
7
13
  import { basename } from 'path';
14
+ import { createReadStream } from 'fs';
15
+ import { IncomingHttpHeaders } from 'http';
16
+ import { performance } from 'perf_hooks';
8
17
  import {
9
- fetch, RequestInit, Headers, FormData,
18
+ FormData as FormDataNative,
19
+ request as undiciRequest,
20
+ Dispatcher,
10
21
  } from 'undici';
22
+ import { FormData as FormDataNode } from 'formdata-node';
23
+ import { FormDataEncoder } from 'form-data-encoder';
11
24
  import createUserAgent from 'default-user-agent';
12
25
  import mime from 'mime-types';
13
- import { RequestURL, RequestOptions } from './Request';
14
- import { HttpClientResponseMeta, HttpClientResponse, ReadableStreamWithMeta } from './Response';
15
- import { parseJSON } from './utils';
26
+ import pump from 'pump';
27
+ import { HttpAgent, CheckAddressFunction } from './HttpAgent';
28
+ import { RequestURL, RequestOptions, HttpMethod } from './Request';
29
+ import { HttpClientResponseMeta, HttpClientResponse, ReadableWithMeta } from './Response';
30
+ import { parseJSON, sleep } from './utils';
31
+
32
+ const FormData = FormDataNative ?? FormDataNode;
33
+ // impl isReadable on Node.js 14
34
+ const isReadable = stream.isReadable ?? function isReadable(stream: any) {
35
+ return stream && typeof stream.read === 'function';
36
+ };
37
+ // impl promise pipeline on Node.js 14
38
+ const pipelinePromise = stream.promises?.pipeline ?? function pipeline(...args: any[]) {
39
+ return new Promise<void>((resolve, reject) => {
40
+ pump(...args, (err?: Error) => {
41
+ if (err) return reject(err);
42
+ resolve();
43
+ });
44
+ });
45
+ };
46
+
47
+ function noop() {
48
+ // noop
49
+ }
50
+
51
+ const MAX_REQURE_ID_VALUE = Math.pow(2, 31) - 10;
52
+ let globalRequestId = 0;
16
53
 
17
54
  const debug = debuglog('urllib');
18
55
 
19
56
  export type ClientOptions = {
20
57
  defaultArgs?: RequestOptions;
58
+ /**
59
+ * Custom DNS lookup function, default is `dns.lookup`.
60
+ */
61
+ lookup?: LookupFunction;
62
+ /**
63
+ * check request address to protect from SSRF and similar attacks.
64
+ * It receive two arguments(ip and family) and should return true or false to identified the address is legal or not.
65
+ * It rely on lookup and have the same version requirement.
66
+ */
67
+ checkAddress?: CheckAddressFunction;
68
+ connect?: {
69
+ key?: string | Buffer;
70
+ /**
71
+ * A string or Buffer containing the certificate key of the client in PEM format.
72
+ * Notes: This is necessary only if using the client certificate authentication
73
+ */
74
+ cert?: string | Buffer;
75
+ /**
76
+ * If true, the server certificate is verified against the list of supplied CAs.
77
+ * An 'error' event is emitted if verification fails.Default: true.
78
+ */
79
+ rejectUnauthorized?: boolean;
80
+ },
21
81
  };
22
82
 
83
+ type UndiciRquestOptions = { dispatcher?: Dispatcher } & Omit<Dispatcher.RequestOptions, 'origin' | 'path' | 'method'> & Partial<Pick<Dispatcher.RequestOptions, 'method'>>;
84
+
23
85
  // https://github.com/octet-stream/form-data
24
86
  class BlobFromStream {
25
87
  #stream;
@@ -51,7 +113,7 @@ class HttpClientRequestTimeoutError extends Error {
51
113
  }
52
114
  }
53
115
 
54
- const HEADER_USER_AGENT = createUserAgent('node-urllib', '3.0.0');
116
+ export const HEADER_USER_AGENT = createUserAgent('node-urllib', '3.0.0');
55
117
 
56
118
  function getFileName(stream: Readable) {
57
119
  const filePath: string = (stream as any).path;
@@ -61,69 +123,140 @@ function getFileName(stream: Readable) {
61
123
  return '';
62
124
  }
63
125
 
126
+ function defaultIsRetry(response: HttpClientResponse) {
127
+ return response.status >= 500;
128
+ }
129
+
130
+ function performanceTime(startTime: number) {
131
+ return Math.floor((performance.now() - startTime) * 1000) / 1000;
132
+ }
133
+
134
+ type RequestContext = {
135
+ retries: number;
136
+ };
137
+
64
138
  export class HttpClient extends EventEmitter {
65
- defaultArgs?: RequestOptions;
139
+ #defaultArgs?: RequestOptions;
140
+ #dispatcher?: Dispatcher;
66
141
 
67
142
  constructor(clientOptions?: ClientOptions) {
68
143
  super();
69
- this.defaultArgs = clientOptions?.defaultArgs;
144
+ this.#defaultArgs = clientOptions?.defaultArgs;
145
+ if (clientOptions?.lookup || clientOptions?.checkAddress || clientOptions?.connect) {
146
+ this.#dispatcher = new HttpAgent({
147
+ lookup: clientOptions.lookup,
148
+ checkAddress: clientOptions.checkAddress,
149
+ connect: clientOptions.connect,
150
+ });
151
+ }
70
152
  }
71
153
 
72
154
  async request(url: RequestURL, options?: RequestOptions) {
155
+ return await this.#requestInternal(url, options);
156
+ }
157
+
158
+ async #requestInternal(url: RequestURL, options?: RequestOptions, requestContext?: RequestContext): Promise<HttpClientResponse> {
159
+ if (globalRequestId >= MAX_REQURE_ID_VALUE) {
160
+ globalRequestId = 0;
161
+ }
162
+ const requestId = ++globalRequestId;
163
+
73
164
  const requestUrl = typeof url === 'string' ? new URL(url) : url;
74
165
  const args = {
75
- ...this.defaultArgs,
166
+ retry: 0,
167
+ ...this.#defaultArgs,
76
168
  ...options,
77
- emitter: this,
78
169
  };
79
- const requestStartTime = Date.now();
170
+ requestContext = {
171
+ retries: 0,
172
+ ...requestContext,
173
+ };
174
+ const requestStartTime = performance.now();
175
+
176
+ const reqMeta = {
177
+ requestId,
178
+ url: requestUrl.href,
179
+ args,
180
+ ctx: args.ctx,
181
+ };
80
182
  // keep urllib createCallbackResponse style
81
- const resHeaders: Record<string, string> = {};
183
+ const resHeaders: IncomingHttpHeaders = {};
82
184
  const res: HttpClientResponseMeta = {
83
185
  status: -1,
84
186
  statusCode: -1,
85
- statusMessage: '',
86
187
  headers: resHeaders,
87
188
  size: 0,
88
189
  aborted: false,
89
190
  rt: 0,
90
191
  keepAliveSocket: true,
91
- requestUrls: [ url.toString() ],
192
+ requestUrls: [],
92
193
  timing: {
194
+ waiting: 0,
93
195
  contentDownload: 0,
94
196
  },
95
197
  };
96
198
 
97
- let requestTimeout = 5000;
199
+ let headersTimeout = 5000;
200
+ let bodyTimeout = 5000;
98
201
  if (args.timeout) {
99
202
  if (Array.isArray(args.timeout)) {
100
- requestTimeout = args.timeout[args.timeout.length - 1] ?? requestTimeout;
203
+ headersTimeout = args.timeout[0] ?? headersTimeout;
204
+ bodyTimeout = args.timeout[1] ?? bodyTimeout;
101
205
  } else {
102
- requestTimeout = args.timeout;
206
+ headersTimeout = bodyTimeout = args.timeout;
103
207
  }
104
208
  }
105
209
 
106
- const requestTimeoutController = new AbortController();
107
- const requestTimerId = setTimeout(() => requestTimeoutController.abort(), requestTimeout);
108
- const method = (args.method ?? 'GET').toUpperCase();
109
-
110
- try {
111
- const headers = new Headers(args.headers ?? {});
112
- if (!headers.has('user-agent')) {
113
- // need to set user-agent
114
- headers.set('user-agent', HEADER_USER_AGENT);
115
- }
116
- if (args.dataType === 'json' && !headers.has('accept')) {
117
- headers.set('accept', 'application/json');
210
+ const method = (args.method ?? 'GET').toUpperCase() as HttpMethod;
211
+ const headers: IncomingHttpHeaders = {};
212
+ if (args.headers) {
213
+ // convert headers to lower-case
214
+ for (const name in args.headers) {
215
+ headers[name.toLowerCase()] = args.headers[name];
118
216
  }
217
+ }
218
+ // hidden user-agent
219
+ const hiddenUserAgent = 'user-agent' in headers && !headers['user-agent'];
220
+ if (hiddenUserAgent) {
221
+ delete headers['user-agent'];
222
+ } else if (!headers['user-agent']) {
223
+ // need to set user-agent
224
+ headers['user-agent'] = HEADER_USER_AGENT;
225
+ }
226
+ // Alias to dataType = 'stream'
227
+ if (args.streaming || args.customResponse) {
228
+ args.dataType = 'stream';
229
+ }
230
+ if (args.dataType === 'json' && !headers.accept) {
231
+ headers.accept = 'application/json';
232
+ }
233
+ // gzip alias to compressed
234
+ if (args.gzip && args.compressed !== false) {
235
+ args.compressed = true;
236
+ }
237
+ if (args.compressed && !headers['accept-encoding']) {
238
+ headers['accept-encoding'] = 'gzip, br';
239
+ }
240
+ if (requestContext.retries > 0) {
241
+ headers['x-urllib-retry'] = `${requestContext.retries}/${args.retry}`;
242
+ }
243
+ if (args.auth && !headers.authorization) {
244
+ headers.authorization = `Basic ${Buffer.from(args.auth).toString('base64')}`;
245
+ }
119
246
 
120
- const requestOptions: RequestInit = {
247
+ let opaque = args.opaque;
248
+ try {
249
+ const requestOptions: UndiciRquestOptions = {
121
250
  method,
122
251
  keepalive: true,
123
- signal: requestTimeoutController.signal,
252
+ maxRedirections: args.maxRedirects ?? 10,
253
+ headersTimeout,
254
+ bodyTimeout,
255
+ opaque,
256
+ dispatcher: this.#dispatcher,
124
257
  };
125
258
  if (args.followRedirect === false) {
126
- requestOptions.redirect = 'manual';
259
+ requestOptions.maxRedirections = 0;
127
260
  }
128
261
 
129
262
  const isGETOrHEAD = requestOptions.method === 'GET' || requestOptions.method === 'HEAD';
@@ -164,8 +297,8 @@ export class HttpClient extends EventEmitter {
164
297
  // const fileName = encodeURIComponent(basename(file));
165
298
  // formData.append(field, await fileFromPath(file, `utf-8''${fileName}`, { type: mime.lookup(fileName) || '' }));
166
299
  const fileName = basename(file);
167
- const fileReader = createReadStream(file);
168
- formData.append(field, new BlobFromStream(fileReader, mime.lookup(fileName) || ''), fileName);
300
+ const fileReadable = createReadStream(file);
301
+ formData.append(field, new BlobFromStream(fileReadable, mime.lookup(fileName) || ''), fileName);
169
302
  } else if (Buffer.isBuffer(file)) {
170
303
  formData.append(field, new Blob([ file ]), `bufferfile${index}`);
171
304
  } else if (file instanceof Readable || isReadable(file as any)) {
@@ -173,17 +306,27 @@ export class HttpClient extends EventEmitter {
173
306
  formData.append(field, new BlobFromStream(file, mime.lookup(fileName) || ''), fileName);
174
307
  }
175
308
  }
176
- requestOptions.body = formData;
309
+
310
+ if (FormDataNative) {
311
+ requestOptions.body = formData;
312
+ } else {
313
+ // Node.js 14 does not support spec-compliant FormData
314
+ // https://github.com/octet-stream/form-data#usage
315
+ const encoder = new FormDataEncoder(formData as any);
316
+ Object.assign(headers, encoder.headers);
317
+ // fix "Content-Length":"NaN"
318
+ delete headers['Content-Length'];
319
+ requestOptions.body = Readable.from(encoder);
320
+ }
177
321
  } else if (args.content) {
178
322
  if (!isGETOrHEAD) {
179
- if (isReadable(args.content as Readable)) {
180
- // disable keepalive
181
- requestOptions.keepalive = false;
182
- }
183
323
  // handle content
184
324
  requestOptions.body = args.content;
185
325
  if (args.contentType) {
186
- headers.set('content-type', args.contentType);
326
+ headers['content-type'] = args.contentType;
327
+ }
328
+ if (typeof args.content === 'string' && !headers['content-type']) {
329
+ headers['content-type'] = 'text/plain;charset=UTF-8';
187
330
  }
188
331
  }
189
332
  } else if (args.data) {
@@ -198,98 +341,174 @@ export class HttpClient extends EventEmitter {
198
341
  }
199
342
  } else {
200
343
  if (isStringOrBufferOrReadable) {
201
- if (isReadable(args.data as Readable)) {
202
- // disable keepalive
203
- requestOptions.keepalive = false;
204
- }
205
344
  requestOptions.body = args.data;
206
345
  } else {
207
346
  if (args.contentType === 'json'
208
347
  || args.contentType === 'application/json'
209
- || headers.get('content-type')?.startsWith('application/json')) {
348
+ || headers['content-type']?.startsWith('application/json')) {
210
349
  requestOptions.body = JSON.stringify(args.data);
211
- if (!headers.has('content-type')) {
212
- headers.set('content-type', 'application/json');
350
+ if (!headers['content-type']) {
351
+ headers['content-type'] = 'application/json';
213
352
  }
214
353
  } else {
215
- requestOptions.body = new URLSearchParams(args.data);
354
+ headers['content-type'] = 'application/x-www-form-urlencoded;charset=UTF-8';
355
+ requestOptions.body = new URLSearchParams(args.data).toString();
216
356
  }
217
357
  }
218
358
  }
219
359
  }
220
360
 
221
- debug('%s %s, headers: %j, timeout: %s', requestOptions.method, url, headers, requestTimeout);
361
+ debug('Request#%d %s %s, headers: %j, headersTimeout: %s, bodyTimeout: %s',
362
+ requestId, requestOptions.method, requestUrl.href, headers, headersTimeout, bodyTimeout);
222
363
  requestOptions.headers = headers;
364
+ if (this.listenerCount('request') > 0) {
365
+ this.emit('request', reqMeta);
366
+ }
223
367
 
224
- const response = await fetch(requestUrl, requestOptions);
225
- for (const [ name, value ] of response.headers) {
226
- res.headers[name] = value;
368
+ const response = await undiciRequest(requestUrl, requestOptions);
369
+ opaque = response.opaque;
370
+ if (args.timing) {
371
+ res.timing.waiting = performanceTime(requestStartTime);
227
372
  }
228
- res.status = res.statusCode = response.status;
229
- res.statusMessage = response.statusText;
230
- if (response.redirected) {
231
- res.requestUrls.push(response.url);
373
+
374
+ const context = response.context as { history: URL[] };
375
+ let lastUrl = '';
376
+ if (context?.history) {
377
+ for (const urlObject of context?.history) {
378
+ res.requestUrls.push(urlObject.href);
379
+ lastUrl = urlObject.href;
380
+ }
381
+ } else {
382
+ res.requestUrls.push(requestUrl.href);
383
+ lastUrl = requestUrl.href;
232
384
  }
385
+ const contentEncoding = response.headers['content-encoding'];
386
+ const isCompressedContent = contentEncoding === 'gzip' || contentEncoding === 'br';
387
+
388
+ res.headers = response.headers;
389
+ res.status = res.statusCode = response.statusCode;
233
390
  if (res.headers['content-length']) {
234
391
  res.size = parseInt(res.headers['content-length']);
235
392
  }
236
393
 
237
394
  let data: any = null;
238
- let responseBodyStream: ReadableStreamWithMeta | undefined;
239
- if (args.streaming || args.dataType === 'stream') {
395
+ let responseBodyStream: ReadableWithMeta | undefined;
396
+ if (args.dataType === 'stream') {
397
+ // streaming mode will disable retry
398
+ args.retry = 0;
240
399
  const meta = {
241
400
  status: res.status,
242
401
  statusCode: res.statusCode,
243
- statusMessage: res.statusMessage,
244
402
  headers: res.headers,
245
403
  };
246
- if (typeof Readable.fromWeb === 'function') {
247
- responseBodyStream = Object.assign(Readable.fromWeb(response.body!), meta);
404
+ if (isCompressedContent) {
405
+ // gzip or br
406
+ const decoder = contentEncoding === 'gzip' ? createGunzip() : createBrotliDecompress();
407
+ responseBodyStream = Object.assign(pipeline(response.body, decoder, noop), meta);
248
408
  } else {
249
- responseBodyStream = Object.assign(response.body!, meta);
409
+ responseBodyStream = Object.assign(response.body, meta);
250
410
  }
251
411
  } else if (args.writeStream) {
252
- await pipeline(response.body!, args.writeStream);
253
- } else if (args.dataType === 'text') {
254
- data = await response.text();
255
- } else if (args.dataType === 'json') {
256
- if (requestOptions.method === 'HEAD') {
257
- data = {};
412
+ // streaming mode will disable retry
413
+ args.retry = 0;
414
+ if (isCompressedContent) {
415
+ const decoder = contentEncoding === 'gzip' ? createGunzip() : createBrotliDecompress();
416
+ await pipelinePromise(response.body, decoder, args.writeStream);
258
417
  } else {
259
- data = await response.text();
418
+ await pipelinePromise(response.body, args.writeStream);
419
+ }
420
+ } else {
421
+ // buffer
422
+ data = Buffer.from(await response.body.arrayBuffer());
423
+ if (isCompressedContent) {
424
+ try {
425
+ data = contentEncoding === 'gzip' ? gunzipSync(data) : brotliDecompressSync(data);
426
+ } catch (err: any) {
427
+ if (err.name === 'Error') {
428
+ err.name = 'UnzipError';
429
+ }
430
+ throw err;
431
+ }
432
+ }
433
+ if (args.dataType === 'text') {
434
+ data = data.toString();
435
+ } else if (args.dataType === 'json') {
260
436
  if (data.length === 0) {
261
437
  data = null;
262
438
  } else {
263
- data = parseJSON(data, args.fixJSONCtlChars);
439
+ data = parseJSON(data.toString(), args.fixJSONCtlChars);
264
440
  }
265
441
  }
266
- } else {
267
- // buffer
268
- data = Buffer.from(await response.arrayBuffer());
269
442
  }
270
- res.rt = res.timing.contentDownload = Date.now() - requestStartTime;
443
+ res.rt = performanceTime(requestStartTime);
444
+ if (args.timing) {
445
+ res.timing.contentDownload = res.rt;
446
+ }
271
447
 
272
448
  const clientResponse: HttpClientResponse = {
273
- status: res.status,
449
+ opaque,
274
450
  data,
451
+ status: res.status,
275
452
  headers: res.headers,
276
- url: response.url,
277
- redirected: response.redirected,
453
+ url: lastUrl,
454
+ redirected: res.requestUrls.length > 1,
455
+ requestUrls: res.requestUrls,
278
456
  res: responseBodyStream ?? res,
279
457
  };
458
+
459
+ if (args.retry > 0 && requestContext.retries < args.retry) {
460
+ const isRetry = args.isRetry ?? defaultIsRetry;
461
+ if (isRetry(clientResponse)) {
462
+ if (args.retryDelay) {
463
+ await sleep(args.retryDelay);
464
+ }
465
+ requestContext.retries++;
466
+ return await this.#requestInternal(url, options, requestContext);
467
+ }
468
+ }
469
+
470
+ if (this.listenerCount('response') > 0) {
471
+ this.emit('response', {
472
+ requestId,
473
+ error: null,
474
+ ctx: args.ctx,
475
+ req: reqMeta,
476
+ res,
477
+ });
478
+ }
479
+
280
480
  return clientResponse;
281
481
  } catch (e: any) {
482
+ debug('Request#%d throw error: %s', requestId, e);
282
483
  let err = e;
283
- if (requestTimeoutController.signal.aborted) {
284
- err = new HttpClientRequestTimeoutError(requestTimeout, { cause: e });
484
+ if (err.name === 'HeadersTimeoutError') {
485
+ err = new HttpClientRequestTimeoutError(headersTimeout, { cause: e });
486
+ } else if (err.name === 'BodyTimeoutError') {
487
+ err = new HttpClientRequestTimeoutError(bodyTimeout, { cause: e });
285
488
  }
286
- err.res = res;
489
+ err.opaque = opaque;
287
490
  err.status = res.status;
288
491
  err.headers = res.headers;
289
- // console.error(err);
492
+ err.res = res;
493
+ // make sure requestUrls not empty
494
+ if (res.requestUrls.length === 0) {
495
+ res.requestUrls.push(requestUrl.href);
496
+ }
497
+ res.rt = performanceTime(requestStartTime);
498
+ if (args.timing) {
499
+ res.timing.contentDownload = res.rt;
500
+ }
501
+
502
+ if (this.listenerCount('response') > 0) {
503
+ this.emit('response', {
504
+ requestId,
505
+ error: err,
506
+ ctx: args.ctx,
507
+ req: reqMeta,
508
+ res,
509
+ });
510
+ }
290
511
  throw err;
291
- } finally {
292
- clearTimeout(requestTimerId);
293
512
  }
294
513
  }
295
514
  }