urllib 3.0.0-alpha.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/HttpClient.ts CHANGED
@@ -1,22 +1,87 @@
1
1
  import { EventEmitter } from 'events';
2
+ import { LookupFunction } from 'net';
2
3
  import { debuglog } from 'util';
3
- import { Readable, isReadable } from 'stream';
4
+ import {
5
+ createGunzip,
6
+ createBrotliDecompress,
7
+ gunzipSync,
8
+ brotliDecompressSync,
9
+ } from 'zlib';
4
10
  import { Blob } from 'buffer';
5
- import { createReadStream } from 'fs';
11
+ import { Readable, pipeline } from 'stream';
12
+ import stream from 'stream';
6
13
  import { basename } from 'path';
14
+ import { createReadStream } from 'fs';
15
+ import { IncomingHttpHeaders } from 'http';
16
+ import { performance } from 'perf_hooks';
7
17
  import {
8
- fetch, RequestInit, Headers, FormData,
18
+ FormData as FormDataNative,
19
+ request as undiciRequest,
20
+ Dispatcher,
9
21
  } from 'undici';
22
+ import { FormData as FormDataNode } from 'formdata-node';
23
+ import { FormDataEncoder } from 'form-data-encoder';
10
24
  import createUserAgent from 'default-user-agent';
11
25
  import mime from 'mime-types';
12
- import { RequestURL, RequestOptions } from './Request';
26
+ import pump from 'pump';
27
+ import { HttpAgent, CheckAddressFunction } from './HttpAgent';
28
+ import { RequestURL, RequestOptions, HttpMethod } from './Request';
29
+ import { HttpClientResponseMeta, HttpClientResponse, ReadableWithMeta } from './Response';
30
+ import { parseJSON, sleep } from './utils';
31
+
32
+ const FormData = FormDataNative ?? FormDataNode;
33
+ // impl isReadable on Node.js 14
34
+ const isReadable = stream.isReadable ?? function isReadable(stream: any) {
35
+ return stream && typeof stream.read === 'function';
36
+ };
37
+ // impl promise pipeline on Node.js 14
38
+ const pipelinePromise = stream.promises?.pipeline ?? function pipeline(...args: any[]) {
39
+ return new Promise<void>((resolve, reject) => {
40
+ pump(...args, (err?: Error) => {
41
+ if (err) return reject(err);
42
+ resolve();
43
+ });
44
+ });
45
+ };
46
+
47
+ function noop() {
48
+ // noop
49
+ }
50
+
51
+ const MAX_REQURE_ID_VALUE = Math.pow(2, 31) - 10;
52
+ let globalRequestId = 0;
13
53
 
14
54
  const debug = debuglog('urllib');
15
55
 
16
56
  export type ClientOptions = {
17
57
  defaultArgs?: RequestOptions;
58
+ /**
59
+ * Custom DNS lookup function, default is `dns.lookup`.
60
+ */
61
+ lookup?: LookupFunction;
62
+ /**
63
+ * check request address to protect from SSRF and similar attacks.
64
+ * It receive two arguments(ip and family) and should return true or false to identified the address is legal or not.
65
+ * It rely on lookup and have the same version requirement.
66
+ */
67
+ checkAddress?: CheckAddressFunction;
68
+ connect?: {
69
+ key?: string | Buffer;
70
+ /**
71
+ * A string or Buffer containing the certificate key of the client in PEM format.
72
+ * Notes: This is necessary only if using the client certificate authentication
73
+ */
74
+ cert?: string | Buffer;
75
+ /**
76
+ * If true, the server certificate is verified against the list of supplied CAs.
77
+ * An 'error' event is emitted if verification fails.Default: true.
78
+ */
79
+ rejectUnauthorized?: boolean;
80
+ },
18
81
  };
19
82
 
83
+ type UndiciRquestOptions = { dispatcher?: Dispatcher } & Omit<Dispatcher.RequestOptions, 'origin' | 'path' | 'method'> & Partial<Pick<Dispatcher.RequestOptions, 'method'>>;
84
+
20
85
  // https://github.com/octet-stream/form-data
21
86
  class BlobFromStream {
22
87
  #stream;
@@ -48,7 +113,7 @@ class HttpClientRequestTimeoutError extends Error {
48
113
  }
49
114
  }
50
115
 
51
- const HEADER_USER_AGENT = createUserAgent('node-urllib', '3.0.0');
116
+ export const HEADER_USER_AGENT = createUserAgent('node-urllib', '3.0.0');
52
117
 
53
118
  function getFileName(stream: Readable) {
54
119
  const filePath: string = (stream as any).path;
@@ -58,79 +123,147 @@ function getFileName(stream: Readable) {
58
123
  return '';
59
124
  }
60
125
 
126
+ function defaultIsRetry(response: HttpClientResponse) {
127
+ return response.status >= 500;
128
+ }
129
+
130
+ function performanceTime(startTime: number) {
131
+ return Math.floor((performance.now() - startTime) * 1000) / 1000;
132
+ }
133
+
134
+ type RequestContext = {
135
+ retries: number;
136
+ };
137
+
61
138
  export class HttpClient extends EventEmitter {
62
- defaultArgs?: RequestOptions;
139
+ #defaultArgs?: RequestOptions;
140
+ #dispatcher?: Dispatcher;
63
141
 
64
142
  constructor(clientOptions?: ClientOptions) {
65
143
  super();
66
- this.defaultArgs = clientOptions?.defaultArgs;
144
+ this.#defaultArgs = clientOptions?.defaultArgs;
145
+ if (clientOptions?.lookup || clientOptions?.checkAddress || clientOptions?.connect) {
146
+ this.#dispatcher = new HttpAgent({
147
+ lookup: clientOptions.lookup,
148
+ checkAddress: clientOptions.checkAddress,
149
+ connect: clientOptions.connect,
150
+ });
151
+ }
67
152
  }
68
153
 
69
154
  async request(url: RequestURL, options?: RequestOptions) {
155
+ return await this.#requestInternal(url, options);
156
+ }
157
+
158
+ async #requestInternal(url: RequestURL, options?: RequestOptions, requestContext?: RequestContext): Promise<HttpClientResponse> {
159
+ if (globalRequestId >= MAX_REQURE_ID_VALUE) {
160
+ globalRequestId = 0;
161
+ }
162
+ const requestId = ++globalRequestId;
163
+
70
164
  const requestUrl = typeof url === 'string' ? new URL(url) : url;
71
165
  const args = {
72
- ...this.defaultArgs,
166
+ retry: 0,
167
+ ...this.#defaultArgs,
73
168
  ...options,
74
- emitter: this,
75
169
  };
76
- const requestStartTime = Date.now();
170
+ requestContext = {
171
+ retries: 0,
172
+ ...requestContext,
173
+ };
174
+ const requestStartTime = performance.now();
175
+
176
+ const reqMeta = {
177
+ requestId,
178
+ url: requestUrl.href,
179
+ args,
180
+ ctx: args.ctx,
181
+ };
77
182
  // keep urllib createCallbackResponse style
78
- const resHeaders: Record<string, string> = {};
79
- const res = {
183
+ const resHeaders: IncomingHttpHeaders = {};
184
+ const res: HttpClientResponseMeta = {
80
185
  status: -1,
81
186
  statusCode: -1,
82
- statusMessage: '',
83
187
  headers: resHeaders,
84
188
  size: 0,
85
189
  aborted: false,
86
190
  rt: 0,
87
191
  keepAliveSocket: true,
88
- requestUrls: [ url.toString() ],
192
+ requestUrls: [],
89
193
  timing: {
194
+ waiting: 0,
90
195
  contentDownload: 0,
91
196
  },
92
- // remoteAddress: remoteAddress,
93
- // remotePort: remotePort,
94
- // socketHandledRequests: socketHandledRequests,
95
- // socketHandledResponses: socketHandledResponses,
96
197
  };
97
198
 
98
- let requestTimeout = 5000;
199
+ let headersTimeout = 5000;
200
+ let bodyTimeout = 5000;
99
201
  if (args.timeout) {
100
202
  if (Array.isArray(args.timeout)) {
101
- requestTimeout = args.timeout[args.timeout.length - 1] ?? requestTimeout;
203
+ headersTimeout = args.timeout[0] ?? headersTimeout;
204
+ bodyTimeout = args.timeout[1] ?? bodyTimeout;
102
205
  } else {
103
- requestTimeout = args.timeout;
206
+ headersTimeout = bodyTimeout = args.timeout;
104
207
  }
105
208
  }
106
209
 
107
- const requestTimeoutController = new AbortController();
108
- const requestTimerId = setTimeout(() => requestTimeoutController.abort(), requestTimeout);
109
- const method = (args.method ?? 'GET').toUpperCase();
110
-
111
- try {
112
- const headers = new Headers(args.headers ?? {});
113
- // don't set user-agent
114
- const disableUserAgent = args.headers &&
115
- (args.headers['User-Agent'] === null || args.headers['user-agent'] === null);
116
- if (!disableUserAgent && !headers.has('user-agent')) {
117
- // need to set user-agent
118
- headers.set('user-agent', HEADER_USER_AGENT);
119
- }
120
- if (args.dataType === 'json' && !headers.has('accept')) {
121
- headers.set('accept', 'application/json');
210
+ const method = (args.method ?? 'GET').toUpperCase() as HttpMethod;
211
+ const headers: IncomingHttpHeaders = {};
212
+ if (args.headers) {
213
+ // convert headers to lower-case
214
+ for (const name in args.headers) {
215
+ headers[name.toLowerCase()] = args.headers[name];
122
216
  }
217
+ }
218
+ // hidden user-agent
219
+ const hiddenUserAgent = 'user-agent' in headers && !headers['user-agent'];
220
+ if (hiddenUserAgent) {
221
+ delete headers['user-agent'];
222
+ } else if (!headers['user-agent']) {
223
+ // need to set user-agent
224
+ headers['user-agent'] = HEADER_USER_AGENT;
225
+ }
226
+ // Alias to dataType = 'stream'
227
+ if (args.streaming || args.customResponse) {
228
+ args.dataType = 'stream';
229
+ }
230
+ if (args.dataType === 'json' && !headers.accept) {
231
+ headers.accept = 'application/json';
232
+ }
233
+ // gzip alias to compressed
234
+ if (args.gzip && args.compressed !== false) {
235
+ args.compressed = true;
236
+ }
237
+ if (args.compressed && !headers['accept-encoding']) {
238
+ headers['accept-encoding'] = 'gzip, br';
239
+ }
240
+ if (requestContext.retries > 0) {
241
+ headers['x-urllib-retry'] = `${requestContext.retries}/${args.retry}`;
242
+ }
243
+ if (args.auth && !headers.authorization) {
244
+ headers.authorization = `Basic ${Buffer.from(args.auth).toString('base64')}`;
245
+ }
123
246
 
124
- const requestOptions: RequestInit = {
247
+ let opaque = args.opaque;
248
+ try {
249
+ const requestOptions: UndiciRquestOptions = {
125
250
  method,
126
251
  keepalive: true,
127
- signal: requestTimeoutController.signal,
252
+ maxRedirections: args.maxRedirects ?? 10,
253
+ headersTimeout,
254
+ bodyTimeout,
255
+ opaque,
256
+ dispatcher: this.#dispatcher,
128
257
  };
129
258
  if (args.followRedirect === false) {
130
- requestOptions.redirect = 'manual';
259
+ requestOptions.maxRedirections = 0;
131
260
  }
132
261
 
133
262
  const isGETOrHEAD = requestOptions.method === 'GET' || requestOptions.method === 'HEAD';
263
+ // alias to args.content
264
+ if (args.stream && !args.content) {
265
+ args.content = args.stream;
266
+ }
134
267
 
135
268
  if (args.files) {
136
269
  if (isGETOrHEAD) {
@@ -164,8 +297,8 @@ export class HttpClient extends EventEmitter {
164
297
  // const fileName = encodeURIComponent(basename(file));
165
298
  // formData.append(field, await fileFromPath(file, `utf-8''${fileName}`, { type: mime.lookup(fileName) || '' }));
166
299
  const fileName = basename(file);
167
- const fileReader = createReadStream(file);
168
- formData.append(field, new BlobFromStream(fileReader, mime.lookup(fileName) || ''), fileName);
300
+ const fileReadable = createReadStream(file);
301
+ formData.append(field, new BlobFromStream(fileReadable, mime.lookup(fileName) || ''), fileName);
169
302
  } else if (Buffer.isBuffer(file)) {
170
303
  formData.append(field, new Blob([ file ]), `bufferfile${index}`);
171
304
  } else if (file instanceof Readable || isReadable(file as any)) {
@@ -173,17 +306,27 @@ export class HttpClient extends EventEmitter {
173
306
  formData.append(field, new BlobFromStream(file, mime.lookup(fileName) || ''), fileName);
174
307
  }
175
308
  }
176
- requestOptions.body = formData;
309
+
310
+ if (FormDataNative) {
311
+ requestOptions.body = formData;
312
+ } else {
313
+ // Node.js 14 does not support spec-compliant FormData
314
+ // https://github.com/octet-stream/form-data#usage
315
+ const encoder = new FormDataEncoder(formData as any);
316
+ Object.assign(headers, encoder.headers);
317
+ // fix "Content-Length":"NaN"
318
+ delete headers['Content-Length'];
319
+ requestOptions.body = Readable.from(encoder);
320
+ }
177
321
  } else if (args.content) {
178
322
  if (!isGETOrHEAD) {
179
- if (isReadable(args.content as Readable)) {
180
- // disable keepalive
181
- requestOptions.keepalive = false;
182
- }
183
323
  // handle content
184
324
  requestOptions.body = args.content;
185
325
  if (args.contentType) {
186
- headers.set('content-type', args.contentType);
326
+ headers['content-type'] = args.contentType;
327
+ }
328
+ if (typeof args.content === 'string' && !headers['content-type']) {
329
+ headers['content-type'] = 'text/plain;charset=UTF-8';
187
330
  }
188
331
  }
189
332
  } else if (args.data) {
@@ -198,77 +341,174 @@ export class HttpClient extends EventEmitter {
198
341
  }
199
342
  } else {
200
343
  if (isStringOrBufferOrReadable) {
201
- if (isReadable(args.data as Readable)) {
202
- // disable keepalive
203
- requestOptions.keepalive = false;
204
- }
205
344
  requestOptions.body = args.data;
206
345
  } else {
207
346
  if (args.contentType === 'json'
208
347
  || args.contentType === 'application/json'
209
- || headers.get('content-type')?.startsWith('application/json')) {
348
+ || headers['content-type']?.startsWith('application/json')) {
210
349
  requestOptions.body = JSON.stringify(args.data);
211
- if (!headers.has('content-type')) {
212
- headers.set('content-type', 'application/json');
350
+ if (!headers['content-type']) {
351
+ headers['content-type'] = 'application/json';
213
352
  }
214
353
  } else {
215
- requestOptions.body = new URLSearchParams(args.data);
354
+ headers['content-type'] = 'application/x-www-form-urlencoded;charset=UTF-8';
355
+ requestOptions.body = new URLSearchParams(args.data).toString();
216
356
  }
217
357
  }
218
358
  }
219
359
  }
220
360
 
221
- debug('%s %s, headers: %j, timeout: %s', requestOptions.method, url, headers, requestTimeout);
361
+ debug('Request#%d %s %s, headers: %j, headersTimeout: %s, bodyTimeout: %s',
362
+ requestId, requestOptions.method, requestUrl.href, headers, headersTimeout, bodyTimeout);
222
363
  requestOptions.headers = headers;
364
+ if (this.listenerCount('request') > 0) {
365
+ this.emit('request', reqMeta);
366
+ }
223
367
 
224
- const response = await fetch(requestUrl, requestOptions);
225
- for (const [ name, value ] of response.headers) {
226
- res.headers[name] = value;
368
+ const response = await undiciRequest(requestUrl, requestOptions);
369
+ opaque = response.opaque;
370
+ if (args.timing) {
371
+ res.timing.waiting = performanceTime(requestStartTime);
227
372
  }
228
- res.status = res.statusCode = response.status;
229
- res.statusMessage = response.statusText;
230
- if (response.redirected) {
231
- res.requestUrls.push(response.url);
373
+
374
+ const context = response.context as { history: URL[] };
375
+ let lastUrl = '';
376
+ if (context?.history) {
377
+ for (const urlObject of context?.history) {
378
+ res.requestUrls.push(urlObject.href);
379
+ lastUrl = urlObject.href;
380
+ }
381
+ } else {
382
+ res.requestUrls.push(requestUrl.href);
383
+ lastUrl = requestUrl.href;
232
384
  }
385
+ const contentEncoding = response.headers['content-encoding'];
386
+ const isCompressedContent = contentEncoding === 'gzip' || contentEncoding === 'br';
387
+
388
+ res.headers = response.headers;
389
+ res.status = res.statusCode = response.statusCode;
233
390
  if (res.headers['content-length']) {
234
391
  res.size = parseInt(res.headers['content-length']);
235
392
  }
236
393
 
237
- let data: any;
238
- if (args.streaming || args.dataType === 'stream') {
239
- data = response.body;
240
- } else if (args.dataType === 'text') {
241
- data = await response.text();
242
- } else if (args.dataType === 'json') {
243
- if (requestOptions.method === 'HEAD') {
244
- data = {};
394
+ let data: any = null;
395
+ let responseBodyStream: ReadableWithMeta | undefined;
396
+ if (args.dataType === 'stream') {
397
+ // streaming mode will disable retry
398
+ args.retry = 0;
399
+ const meta = {
400
+ status: res.status,
401
+ statusCode: res.statusCode,
402
+ headers: res.headers,
403
+ };
404
+ if (isCompressedContent) {
405
+ // gzip or br
406
+ const decoder = contentEncoding === 'gzip' ? createGunzip() : createBrotliDecompress();
407
+ responseBodyStream = Object.assign(pipeline(response.body, decoder, noop), meta);
245
408
  } else {
246
- data = await response.json();
409
+ responseBodyStream = Object.assign(response.body, meta);
410
+ }
411
+ } else if (args.writeStream) {
412
+ // streaming mode will disable retry
413
+ args.retry = 0;
414
+ if (isCompressedContent) {
415
+ const decoder = contentEncoding === 'gzip' ? createGunzip() : createBrotliDecompress();
416
+ await pipelinePromise(response.body, decoder, args.writeStream);
417
+ } else {
418
+ await pipelinePromise(response.body, args.writeStream);
247
419
  }
248
420
  } else {
249
421
  // buffer
250
- data = Buffer.from(await response.arrayBuffer());
422
+ data = Buffer.from(await response.body.arrayBuffer());
423
+ if (isCompressedContent) {
424
+ try {
425
+ data = contentEncoding === 'gzip' ? gunzipSync(data) : brotliDecompressSync(data);
426
+ } catch (err: any) {
427
+ if (err.name === 'Error') {
428
+ err.name = 'UnzipError';
429
+ }
430
+ throw err;
431
+ }
432
+ }
433
+ if (args.dataType === 'text') {
434
+ data = data.toString();
435
+ } else if (args.dataType === 'json') {
436
+ if (data.length === 0) {
437
+ data = null;
438
+ } else {
439
+ data = parseJSON(data.toString(), args.fixJSONCtlChars);
440
+ }
441
+ }
442
+ }
443
+ res.rt = performanceTime(requestStartTime);
444
+ if (args.timing) {
445
+ res.timing.contentDownload = res.rt;
251
446
  }
252
- res.rt = res.timing.contentDownload = Date.now() - requestStartTime;
253
447
 
254
- return {
255
- status: res.status,
448
+ const clientResponse: HttpClientResponse = {
449
+ opaque,
256
450
  data,
451
+ status: res.status,
257
452
  headers: res.headers,
258
- url: response.url,
259
- redirected: response.redirected,
260
- res,
453
+ url: lastUrl,
454
+ redirected: res.requestUrls.length > 1,
455
+ requestUrls: res.requestUrls,
456
+ res: responseBodyStream ?? res,
261
457
  };
458
+
459
+ if (args.retry > 0 && requestContext.retries < args.retry) {
460
+ const isRetry = args.isRetry ?? defaultIsRetry;
461
+ if (isRetry(clientResponse)) {
462
+ if (args.retryDelay) {
463
+ await sleep(args.retryDelay);
464
+ }
465
+ requestContext.retries++;
466
+ return await this.#requestInternal(url, options, requestContext);
467
+ }
468
+ }
469
+
470
+ if (this.listenerCount('response') > 0) {
471
+ this.emit('response', {
472
+ requestId,
473
+ error: null,
474
+ ctx: args.ctx,
475
+ req: reqMeta,
476
+ res,
477
+ });
478
+ }
479
+
480
+ return clientResponse;
262
481
  } catch (e: any) {
482
+ debug('Request#%d throw error: %s', requestId, e);
263
483
  let err = e;
264
- if (requestTimeoutController.signal.aborted) {
265
- err = new HttpClientRequestTimeoutError(requestTimeout, { cause: e });
484
+ if (err.name === 'HeadersTimeoutError') {
485
+ err = new HttpClientRequestTimeoutError(headersTimeout, { cause: e });
486
+ } else if (err.name === 'BodyTimeoutError') {
487
+ err = new HttpClientRequestTimeoutError(bodyTimeout, { cause: e });
266
488
  }
489
+ err.opaque = opaque;
490
+ err.status = res.status;
491
+ err.headers = res.headers;
267
492
  err.res = res;
268
- // console.error(err);
493
+ // make sure requestUrls not empty
494
+ if (res.requestUrls.length === 0) {
495
+ res.requestUrls.push(requestUrl.href);
496
+ }
497
+ res.rt = performanceTime(requestStartTime);
498
+ if (args.timing) {
499
+ res.timing.contentDownload = res.rt;
500
+ }
501
+
502
+ if (this.listenerCount('response') > 0) {
503
+ this.emit('response', {
504
+ requestId,
505
+ error: err,
506
+ ctx: args.ctx,
507
+ req: reqMeta,
508
+ res,
509
+ });
510
+ }
269
511
  throw err;
270
- } finally {
271
- clearTimeout(requestTimerId);
272
512
  }
273
513
  }
274
514
  }