net-http 0.1.1 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,20 +1,136 @@
1
- # frozen_string_literal: false
2
- # HTTP response class.
1
+ # frozen_string_literal: true
2
+
3
+ # This class is the base class for \Net::HTTP response classes.
4
+ #
5
+ # == About the Examples
6
+ #
7
+ # :include: doc/net-http/examples.rdoc
8
+ #
9
+ # == Returned Responses
10
+ #
11
+ # \Method Net::HTTP.get_response returns
12
+ # an instance of one of the subclasses of \Net::HTTPResponse:
13
+ #
14
+ # Net::HTTP.get_response(uri)
15
+ # # => #<Net::HTTPOK 200 OK readbody=true>
16
+ # Net::HTTP.get_response(hostname, '/nosuch')
17
+ # # => #<Net::HTTPNotFound 404 Not Found readbody=true>
18
+ #
19
+ # As does method Net::HTTP#request:
20
+ #
21
+ # req = Net::HTTP::Get.new(uri)
22
+ # Net::HTTP.start(hostname) do |http|
23
+ # http.request(req)
24
+ # end # => #<Net::HTTPOK 200 OK readbody=true>
25
+ #
26
+ # \Class \Net::HTTPResponse includes module Net::HTTPHeader,
27
+ # which provides access to response header values via (among others):
28
+ #
29
+ # - \Hash-like method <tt>[]</tt>.
30
+ # - Specific reader methods, such as +content_type+.
31
+ #
32
+ # Examples:
33
+ #
34
+ # res = Net::HTTP.get_response(uri) # => #<Net::HTTPOK 200 OK readbody=true>
35
+ # res['Content-Type'] # => "text/html; charset=UTF-8"
36
+ # res.content_type # => "text/html"
37
+ #
38
+ # == Response Subclasses
39
+ #
40
+ # \Class \Net::HTTPResponse has a subclass for each
41
+ # {HTTP status code}[https://en.wikipedia.org/wiki/List_of_HTTP_status_codes].
42
+ # You can look up the response class for a given code:
43
+ #
44
+ # Net::HTTPResponse::CODE_TO_OBJ['200'] # => Net::HTTPOK
45
+ # Net::HTTPResponse::CODE_TO_OBJ['400'] # => Net::HTTPBadRequest
46
+ # Net::HTTPResponse::CODE_TO_OBJ['404'] # => Net::HTTPNotFound
47
+ #
48
+ # And you can retrieve the status code for a response object:
49
+ #
50
+ # Net::HTTP.get_response(uri).code # => "200"
51
+ # Net::HTTP.get_response(hostname, '/nosuch').code # => "404"
52
+ #
53
+ # The response subclasses (indentation shows class hierarchy):
54
+ #
55
+ # - Net::HTTPUnknownResponse (for unhandled \HTTP extensions).
3
56
  #
4
- # This class wraps together the response header and the response body (the
5
- # entity requested).
57
+ # - Net::HTTPInformation:
6
58
  #
7
- # It mixes in the HTTPHeader module, which provides access to response
8
- # header values both via hash-like methods and via individual readers.
59
+ # - Net::HTTPContinue (100)
60
+ # - Net::HTTPSwitchProtocol (101)
61
+ # - Net::HTTPProcessing (102)
62
+ # - Net::HTTPEarlyHints (103)
9
63
  #
10
- # Note that each possible HTTP response code defines its own
11
- # HTTPResponse subclass. All classes are defined under the Net module.
12
- # Indentation indicates inheritance. For a list of the classes see Net::HTTP.
64
+ # - Net::HTTPSuccess:
13
65
  #
14
- # Correspondence <code>HTTP code => class</code> is stored in CODE_TO_OBJ
15
- # constant:
66
+ # - Net::HTTPOK (200)
67
+ # - Net::HTTPCreated (201)
68
+ # - Net::HTTPAccepted (202)
69
+ # - Net::HTTPNonAuthoritativeInformation (203)
70
+ # - Net::HTTPNoContent (204)
71
+ # - Net::HTTPResetContent (205)
72
+ # - Net::HTTPPartialContent (206)
73
+ # - Net::HTTPMultiStatus (207)
74
+ # - Net::HTTPAlreadyReported (208)
75
+ # - Net::HTTPIMUsed (226)
16
76
  #
17
- # Net::HTTPResponse::CODE_TO_OBJ['404'] #=> Net::HTTPNotFound
77
+ # - Net::HTTPRedirection:
78
+ #
79
+ # - Net::HTTPMultipleChoices (300)
80
+ # - Net::HTTPMovedPermanently (301)
81
+ # - Net::HTTPFound (302)
82
+ # - Net::HTTPSeeOther (303)
83
+ # - Net::HTTPNotModified (304)
84
+ # - Net::HTTPUseProxy (305)
85
+ # - Net::HTTPTemporaryRedirect (307)
86
+ # - Net::HTTPPermanentRedirect (308)
87
+ #
88
+ # - Net::HTTPClientError:
89
+ #
90
+ # - Net::HTTPBadRequest (400)
91
+ # - Net::HTTPUnauthorized (401)
92
+ # - Net::HTTPPaymentRequired (402)
93
+ # - Net::HTTPForbidden (403)
94
+ # - Net::HTTPNotFound (404)
95
+ # - Net::HTTPMethodNotAllowed (405)
96
+ # - Net::HTTPNotAcceptable (406)
97
+ # - Net::HTTPProxyAuthenticationRequired (407)
98
+ # - Net::HTTPRequestTimeOut (408)
99
+ # - Net::HTTPConflict (409)
100
+ # - Net::HTTPGone (410)
101
+ # - Net::HTTPLengthRequired (411)
102
+ # - Net::HTTPPreconditionFailed (412)
103
+ # - Net::HTTPRequestEntityTooLarge (413)
104
+ # - Net::HTTPRequestURITooLong (414)
105
+ # - Net::HTTPUnsupportedMediaType (415)
106
+ # - Net::HTTPRequestedRangeNotSatisfiable (416)
107
+ # - Net::HTTPExpectationFailed (417)
108
+ # - Net::HTTPMisdirectedRequest (421)
109
+ # - Net::HTTPUnprocessableEntity (422)
110
+ # - Net::HTTPLocked (423)
111
+ # - Net::HTTPFailedDependency (424)
112
+ # - Net::HTTPUpgradeRequired (426)
113
+ # - Net::HTTPPreconditionRequired (428)
114
+ # - Net::HTTPTooManyRequests (429)
115
+ # - Net::HTTPRequestHeaderFieldsTooLarge (431)
116
+ # - Net::HTTPUnavailableForLegalReasons (451)
117
+ #
118
+ # - Net::HTTPServerError:
119
+ #
120
+ # - Net::HTTPInternalServerError (500)
121
+ # - Net::HTTPNotImplemented (501)
122
+ # - Net::HTTPBadGateway (502)
123
+ # - Net::HTTPServiceUnavailable (503)
124
+ # - Net::HTTPGatewayTimeOut (504)
125
+ # - Net::HTTPVersionNotSupported (505)
126
+ # - Net::HTTPVariantAlsoNegotiates (506)
127
+ # - Net::HTTPInsufficientStorage (507)
128
+ # - Net::HTTPLoopDetected (508)
129
+ # - Net::HTTPNotExtended (510)
130
+ # - Net::HTTPNetworkAuthenticationRequired (511)
131
+ #
132
+ # There is also the Net::HTTPBadResponse exception which is raised when
133
+ # there is a protocol error.
18
134
  #
19
135
  class Net::HTTPResponse
20
136
  class << self
@@ -84,6 +200,8 @@ class Net::HTTPResponse
84
200
  @read = false
85
201
  @uri = nil
86
202
  @decode_content = false
203
+ @body_encoding = false
204
+ @ignore_eof = true
87
205
  end
88
206
 
89
207
  # The HTTP version supported by the server.
@@ -106,6 +224,41 @@ class Net::HTTPResponse
106
224
  # Accept-Encoding header from the user.
107
225
  attr_accessor :decode_content
108
226
 
227
+ # Returns the value set by body_encoding=, or +false+ if none;
228
+ # see #body_encoding=.
229
+ attr_reader :body_encoding
230
+
231
+ # Sets the encoding that should be used when reading the body:
232
+ #
233
+ # - If the given value is an Encoding object, that encoding will be used.
234
+ # - Otherwise if the value is a string, the value of
235
+ # {Encoding#find(value)}[https://docs.ruby-lang.org/en/master/Encoding.html#method-c-find]
236
+ # will be used.
237
+ # - Otherwise an encoding will be deduced from the body itself.
238
+ #
239
+ # Examples:
240
+ #
241
+ # http = Net::HTTP.new(hostname)
242
+ # req = Net::HTTP::Get.new('/')
243
+ #
244
+ # http.request(req) do |res|
245
+ # p res.body.encoding # => #<Encoding:ASCII-8BIT>
246
+ # end
247
+ #
248
+ # http.request(req) do |res|
249
+ # res.body_encoding = "UTF-8"
250
+ # p res.body.encoding # => #<Encoding:UTF-8>
251
+ # end
252
+ #
253
+ def body_encoding=(value)
254
+ value = Encoding.find(value) if value.is_a?(String)
255
+ @body_encoding = value
256
+ end
257
+
258
+ # Whether to ignore EOF when reading bodies with a specified Content-Length
259
+ # header.
260
+ attr_accessor :ignore_eof
261
+
109
262
  def inspect
110
263
  "#<#{self.class} #{@code} #{@message} readbody=#{@read}>"
111
264
  end
@@ -120,7 +273,7 @@ class Net::HTTPResponse
120
273
 
121
274
  def error! #:nodoc:
122
275
  message = @code
123
- message += ' ' + @message.dump if @message
276
+ message = "#{message} #{@message.dump}" if @message
124
277
  raise error_type().new(message, self)
125
278
  end
126
279
 
@@ -213,30 +366,42 @@ class Net::HTTPResponse
213
366
  @body = nil
214
367
  end
215
368
  @read = true
369
+ return if @body.nil?
370
+
371
+ case enc = @body_encoding
372
+ when Encoding, false, nil
373
+ # Encoding: force given encoding
374
+ # false/nil: do not force encoding
375
+ else
376
+ # other value: detect encoding from body
377
+ enc = detect_encoding(@body)
378
+ end
379
+
380
+ @body.force_encoding(enc) if enc
216
381
 
217
382
  @body
218
383
  end
219
384
 
220
- # Returns the full entity body.
385
+ # Returns the string response body;
386
+ # note that repeated calls for the unmodified body return a cached string:
221
387
  #
222
- # Calling this method a second or subsequent time will return the
223
- # string already read.
388
+ # path = '/todos/1'
389
+ # Net::HTTP.start(hostname) do |http|
390
+ # res = http.get(path)
391
+ # p res.body
392
+ # p http.head(path).body # No body.
393
+ # end
224
394
  #
225
- # http.request_get('/index.html') {|res|
226
- # puts res.body
227
- # }
395
+ # Output:
228
396
  #
229
- # http.request_get('/index.html') {|res|
230
- # p res.body.object_id # 538149362
231
- # p res.body.object_id # 538149362
232
- # }
397
+ # "{\n \"userId\": 1,\n \"id\": 1,\n \"title\": \"delectus aut autem\",\n \"completed\": false\n}"
398
+ # nil
233
399
  #
234
400
  def body
235
401
  read_body()
236
402
  end
237
403
 
238
- # Because it may be necessary to modify the body, Eg, decompression
239
- # this method facilitates that.
404
+ # Sets the body of the response to the given value.
240
405
  def body=(value)
241
406
  @body = value
242
407
  end
@@ -245,6 +410,141 @@ class Net::HTTPResponse
245
410
 
246
411
  private
247
412
 
413
+ # :nodoc:
414
+ def detect_encoding(str, encoding=nil)
415
+ if encoding
416
+ elsif encoding = type_params['charset']
417
+ elsif encoding = check_bom(str)
418
+ else
419
+ encoding = case content_type&.downcase
420
+ when %r{text/x(?:ht)?ml|application/(?:[^+]+\+)?xml}
421
+ /\A<xml[ \t\r\n]+
422
+ version[ \t\r\n]*=[ \t\r\n]*(?:"[0-9.]+"|'[0-9.]*')[ \t\r\n]+
423
+ encoding[ \t\r\n]*=[ \t\r\n]*
424
+ (?:"([A-Za-z][\-A-Za-z0-9._]*)"|'([A-Za-z][\-A-Za-z0-9._]*)')/x =~ str
425
+ encoding = $1 || $2 || Encoding::UTF_8
426
+ when %r{text/html.*}
427
+ sniff_encoding(str)
428
+ end
429
+ end
430
+ return encoding
431
+ end
432
+
433
+ # :nodoc:
434
+ def sniff_encoding(str, encoding=nil)
435
+ # the encoding sniffing algorithm
436
+ # http://www.w3.org/TR/html5/parsing.html#determining-the-character-encoding
437
+ if enc = scanning_meta(str)
438
+ enc
439
+ # 6. last visited page or something
440
+ # 7. frequency
441
+ elsif str.ascii_only?
442
+ Encoding::US_ASCII
443
+ elsif str.dup.force_encoding(Encoding::UTF_8).valid_encoding?
444
+ Encoding::UTF_8
445
+ end
446
+ # 8. implementation-defined or user-specified
447
+ end
448
+
449
+ # :nodoc:
450
+ def check_bom(str)
451
+ case str.byteslice(0, 2)
452
+ when "\xFE\xFF"
453
+ return Encoding::UTF_16BE
454
+ when "\xFF\xFE"
455
+ return Encoding::UTF_16LE
456
+ end
457
+ if "\xEF\xBB\xBF" == str.byteslice(0, 3)
458
+ return Encoding::UTF_8
459
+ end
460
+ nil
461
+ end
462
+
463
+ # :nodoc:
464
+ def scanning_meta(str)
465
+ require 'strscan'
466
+ ss = StringScanner.new(str)
467
+ if ss.scan_until(/<meta[\t\n\f\r ]*/)
468
+ attrs = {} # attribute_list
469
+ got_pragma = false
470
+ need_pragma = nil
471
+ charset = nil
472
+
473
+ # step: Attributes
474
+ while attr = get_attribute(ss)
475
+ name, value = *attr
476
+ next if attrs[name]
477
+ attrs[name] = true
478
+ case name
479
+ when 'http-equiv'
480
+ got_pragma = true if value == 'content-type'
481
+ when 'content'
482
+ encoding = extracting_encodings_from_meta_elements(value)
483
+ unless charset
484
+ charset = encoding
485
+ end
486
+ need_pragma = true
487
+ when 'charset'
488
+ need_pragma = false
489
+ charset = value
490
+ end
491
+ end
492
+
493
+ # step: Processing
494
+ return if need_pragma.nil?
495
+ return if need_pragma && !got_pragma
496
+
497
+ charset = Encoding.find(charset) rescue nil
498
+ return unless charset
499
+ charset = Encoding::UTF_8 if charset == Encoding::UTF_16
500
+ return charset # tentative
501
+ end
502
+ nil
503
+ end
504
+
505
+ def get_attribute(ss)
506
+ ss.scan(/[\t\n\f\r \/]*/)
507
+ if ss.peek(1) == '>'
508
+ ss.getch
509
+ return nil
510
+ end
511
+ name = ss.scan(/[^=\t\n\f\r \/>]*/)
512
+ name.downcase!
513
+ raise if name.empty?
514
+ ss.skip(/[\t\n\f\r ]*/)
515
+ if ss.getch != '='
516
+ value = ''
517
+ return [name, value]
518
+ end
519
+ ss.skip(/[\t\n\f\r ]*/)
520
+ case ss.peek(1)
521
+ when '"'
522
+ ss.getch
523
+ value = ss.scan(/[^"]+/)
524
+ value.downcase!
525
+ ss.getch
526
+ when "'"
527
+ ss.getch
528
+ value = ss.scan(/[^']+/)
529
+ value.downcase!
530
+ ss.getch
531
+ when '>'
532
+ value = ''
533
+ else
534
+ value = ss.scan(/[^\t\n\f\r >]+/)
535
+ value.downcase!
536
+ end
537
+ [name, value]
538
+ end
539
+
540
+ def extracting_encodings_from_meta_elements(value)
541
+ # http://dev.w3.org/html5/spec/fetching-resources.html#algorithm-for-extracting-an-encoding-from-a-meta-element
542
+ if /charset[\t\n\f\r ]*=(?:"([^"]*)"|'([^']*)'|["']|\z|([^\t\n\f\r ;]+))/i =~ value
543
+ return $1 || $2 || $3
544
+ end
545
+ return nil
546
+ end
547
+
248
548
  ##
249
549
  # Checks for a supported Content-Encoding header and yields an Inflate
250
550
  # wrapper for this response's socket when zlib is present. If the
@@ -272,6 +572,9 @@ class Net::HTTPResponse
272
572
  ensure
273
573
  begin
274
574
  inflate_body_io.finish
575
+ if self['content-length']
576
+ self['content-length'] = inflate_body_io.bytes_inflated.to_s
577
+ end
275
578
  rescue => err
276
579
  # Ignore #finish's error if there is an exception from yield
277
580
  raise err if success
@@ -297,7 +600,7 @@ class Net::HTTPResponse
297
600
 
298
601
  clen = content_length()
299
602
  if clen
300
- @socket.read clen, dest, true # ignore EOF
603
+ @socket.read clen, dest, @ignore_eof
301
604
  return
302
605
  end
303
606
  clen = range_length()
@@ -337,7 +640,7 @@ class Net::HTTPResponse
337
640
  end
338
641
 
339
642
  def stream_check
340
- raise IOError, 'attempt to read body out of block' if @socket.closed?
643
+ raise IOError, 'attempt to read body out of block' if @socket.nil? || @socket.closed?
341
644
  end
342
645
 
343
646
  def procdest(dest, block)
@@ -346,7 +649,7 @@ class Net::HTTPResponse
346
649
  if block
347
650
  Net::ReadAdapter.new(block)
348
651
  else
349
- dest || ''
652
+ dest || +''
350
653
  end
351
654
  end
352
655
 
@@ -373,6 +676,14 @@ class Net::HTTPResponse
373
676
  @inflate.finish
374
677
  end
375
678
 
679
+ ##
680
+ # The number of bytes inflated, used to update the Content-Length of
681
+ # the response.
682
+
683
+ def bytes_inflated
684
+ @inflate.total_out
685
+ end
686
+
376
687
  ##
377
688
  # Returns a Net::ReadAdapter that inflates each read chunk into +dest+.
378
689
  #