net-http 0.1.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,136 @@
1
1
  # frozen_string_literal: false
2
- # HTTP response class.
2
+
3
+ # This class is the base class for \Net::HTTP request classes.
4
+ #
5
+ # == About the Examples
6
+ #
7
+ # :include: doc/net-http/examples.rdoc
8
+ #
9
+ # == Returned Responses
10
+ #
11
+ # \Method Net::HTTP.get_response returns
12
+ # an instance of one of the subclasses of \Net::HTTPResponse:
13
+ #
14
+ # Net::HTTP.get_response(uri)
15
+ # # => #<Net::HTTPOK 200 OK readbody=true>
16
+ # Net::HTTP.get_response(hostname, '/nosuch')
17
+ # # => #<Net::HTTPNotFound 404 Not Found readbody=true>
18
+ #
19
+ # As does method Net::HTTP#request:
20
+ #
21
+ # req = Net::HTTP::Get.new(uri)
22
+ # Net::HTTP.start(hostname) do |http|
23
+ # http.request(req)
24
+ # end # => #<Net::HTTPOK 200 OK readbody=true>
25
+ #
26
+ # \Class \Net::HTTPResponse includes module Net::HTTPHeader,
27
+ # which provides access to response header values via (among others):
28
+ #
29
+ # - \Hash-like method <tt>[]</tt>.
30
+ # - Specific reader methods, such as +content_type+.
31
+ #
32
+ # Examples:
33
+ #
34
+ # res = Net::HTTP.get_response(uri) # => #<Net::HTTPOK 200 OK readbody=true>
35
+ # res['Content-Type'] # => "text/html; charset=UTF-8"
36
+ # res.content_type # => "text/html"
37
+ #
38
+ # == Response Subclasses
39
+ #
40
+ # \Class \Net::HTTPResponse has a subclass for each
41
+ # {HTTP status code}[https://en.wikipedia.org/wiki/List_of_HTTP_status_codes].
42
+ # You can look up the response class for a given code:
43
+ #
44
+ # Net::HTTPResponse::CODE_TO_OBJ['200'] # => Net::HTTPOK
45
+ # Net::HTTPResponse::CODE_TO_OBJ['400'] # => Net::HTTPBadRequest
46
+ # Net::HTTPResponse::CODE_TO_OBJ['404'] # => Net::HTTPNotFound
47
+ #
48
+ # And you can retrieve the status code for a response object:
49
+ #
50
+ # Net::HTTP.get_response(uri).code # => "200"
51
+ # Net::HTTP.get_response(hostname, '/nosuch').code # => "404"
3
52
  #
4
- # This class wraps together the response header and the response body (the
5
- # entity requested).
53
+ # The response subclasses (indentation shows class hierarchy):
6
54
  #
7
- # It mixes in the HTTPHeader module, which provides access to response
8
- # header values both via hash-like methods and via individual readers.
55
+ # - Net::HTTPUnknownResponse (for unhandled \HTTP extensions).
9
56
  #
10
- # Note that each possible HTTP response code defines its own
11
- # HTTPResponse subclass. All classes are defined under the Net module.
12
- # Indentation indicates inheritance. For a list of the classes see Net::HTTP.
57
+ # - Net::HTTPInformation:
13
58
  #
14
- # Correspondence <code>HTTP code => class</code> is stored in CODE_TO_OBJ
15
- # constant:
59
+ # - Net::HTTPContinue (100)
60
+ # - Net::HTTPSwitchProtocol (101)
61
+ # - Net::HTTPProcessing (102)
62
+ # - Net::HTTPEarlyHints (103)
16
63
  #
17
- # Net::HTTPResponse::CODE_TO_OBJ['404'] #=> Net::HTTPNotFound
64
+ # - Net::HTTPSuccess:
65
+ #
66
+ # - Net::HTTPOK (200)
67
+ # - Net::HTTPCreated (201)
68
+ # - Net::HTTPAccepted (202)
69
+ # - Net::HTTPNonAuthoritativeInformation (203)
70
+ # - Net::HTTPNoContent (204)
71
+ # - Net::HTTPResetContent (205)
72
+ # - Net::HTTPPartialContent (206)
73
+ # - Net::HTTPMultiStatus (207)
74
+ # - Net::HTTPAlreadyReported (208)
75
+ # - Net::HTTPIMUsed (226)
76
+ #
77
+ # - Net::HTTPRedirection:
78
+ #
79
+ # - Net::HTTPMultipleChoices (300)
80
+ # - Net::HTTPMovedPermanently (301)
81
+ # - Net::HTTPFound (302)
82
+ # - Net::HTTPSeeOther (303)
83
+ # - Net::HTTPNotModified (304)
84
+ # - Net::HTTPUseProxy (305)
85
+ # - Net::HTTPTemporaryRedirect (307)
86
+ # - Net::HTTPPermanentRedirect (308)
87
+ #
88
+ # - Net::HTTPClientError:
89
+ #
90
+ # - Net::HTTPBadRequest (400)
91
+ # - Net::HTTPUnauthorized (401)
92
+ # - Net::HTTPPaymentRequired (402)
93
+ # - Net::HTTPForbidden (403)
94
+ # - Net::HTTPNotFound (404)
95
+ # - Net::HTTPMethodNotAllowed (405)
96
+ # - Net::HTTPNotAcceptable (406)
97
+ # - Net::HTTPProxyAuthenticationRequired (407)
98
+ # - Net::HTTPRequestTimeOut (408)
99
+ # - Net::HTTPConflict (409)
100
+ # - Net::HTTPGone (410)
101
+ # - Net::HTTPLengthRequired (411)
102
+ # - Net::HTTPPreconditionFailed (412)
103
+ # - Net::HTTPRequestEntityTooLarge (413)
104
+ # - Net::HTTPRequestURITooLong (414)
105
+ # - Net::HTTPUnsupportedMediaType (415)
106
+ # - Net::HTTPRequestedRangeNotSatisfiable (416)
107
+ # - Net::HTTPExpectationFailed (417)
108
+ # - Net::HTTPMisdirectedRequest (421)
109
+ # - Net::HTTPUnprocessableEntity (422)
110
+ # - Net::HTTPLocked (423)
111
+ # - Net::HTTPFailedDependency (424)
112
+ # - Net::HTTPUpgradeRequired (426)
113
+ # - Net::HTTPPreconditionRequired (428)
114
+ # - Net::HTTPTooManyRequests (429)
115
+ # - Net::HTTPRequestHeaderFieldsTooLarge (431)
116
+ # - Net::HTTPUnavailableForLegalReasons (451)
117
+ #
118
+ # - Net::HTTPServerError:
119
+ #
120
+ # - Net::HTTPInternalServerError (500)
121
+ # - Net::HTTPNotImplemented (501)
122
+ # - Net::HTTPBadGateway (502)
123
+ # - Net::HTTPServiceUnavailable (503)
124
+ # - Net::HTTPGatewayTimeOut (504)
125
+ # - Net::HTTPVersionNotSupported (505)
126
+ # - Net::HTTPVariantAlsoNegotiates (506)
127
+ # - Net::HTTPInsufficientStorage (507)
128
+ # - Net::HTTPLoopDetected (508)
129
+ # - Net::HTTPNotExtended (510)
130
+ # - Net::HTTPNetworkAuthenticationRequired (511)
131
+ #
132
+ # There is also the Net::HTTPBadResponse exception which is raised when
133
+ # there is a protocol error.
18
134
  #
19
135
  class Net::HTTPResponse
20
136
  class << self
@@ -84,6 +200,8 @@ class Net::HTTPResponse
84
200
  @read = false
85
201
  @uri = nil
86
202
  @decode_content = false
203
+ @body_encoding = false
204
+ @ignore_eof = true
87
205
  end
88
206
 
89
207
  # The HTTP version supported by the server.
@@ -106,6 +224,22 @@ class Net::HTTPResponse
106
224
  # Accept-Encoding header from the user.
107
225
  attr_accessor :decode_content
108
226
 
227
+ # The encoding to use for the response body. If Encoding, use that encoding.
228
+ # If other true value, attempt to detect the appropriate encoding, and use
229
+ # that.
230
+ attr_reader :body_encoding
231
+
232
+ # Set the encoding to use for the response body. If given a String, find
233
+ # the related Encoding.
234
+ def body_encoding=(value)
235
+ value = Encoding.find(value) if value.is_a?(String)
236
+ @body_encoding = value
237
+ end
238
+
239
+ # Whether to ignore EOF when reading bodies with a specified Content-Length
240
+ # header.
241
+ attr_accessor :ignore_eof
242
+
109
243
  def inspect
110
244
  "#<#{self.class} #{@code} #{@message} readbody=#{@read}>"
111
245
  end
@@ -214,6 +348,17 @@ class Net::HTTPResponse
214
348
  end
215
349
  @read = true
216
350
 
351
+ case enc = @body_encoding
352
+ when Encoding, false, nil
353
+ # Encoding: force given encoding
354
+ # false/nil: do not force encoding
355
+ else
356
+ # other value: detect encoding from body
357
+ enc = detect_encoding(@body)
358
+ end
359
+
360
+ @body.force_encoding(enc) if enc
361
+
217
362
  @body
218
363
  end
219
364
 
@@ -245,6 +390,141 @@ class Net::HTTPResponse
245
390
 
246
391
  private
247
392
 
393
+ # :nodoc:
394
+ def detect_encoding(str, encoding=nil)
395
+ if encoding
396
+ elsif encoding = type_params['charset']
397
+ elsif encoding = check_bom(str)
398
+ else
399
+ encoding = case content_type&.downcase
400
+ when %r{text/x(?:ht)?ml|application/(?:[^+]+\+)?xml}
401
+ /\A<xml[ \t\r\n]+
402
+ version[ \t\r\n]*=[ \t\r\n]*(?:"[0-9.]+"|'[0-9.]*')[ \t\r\n]+
403
+ encoding[ \t\r\n]*=[ \t\r\n]*
404
+ (?:"([A-Za-z][\-A-Za-z0-9._]*)"|'([A-Za-z][\-A-Za-z0-9._]*)')/x =~ str
405
+ encoding = $1 || $2 || Encoding::UTF_8
406
+ when %r{text/html.*}
407
+ sniff_encoding(str)
408
+ end
409
+ end
410
+ return encoding
411
+ end
412
+
413
+ # :nodoc:
414
+ def sniff_encoding(str, encoding=nil)
415
+ # the encoding sniffing algorithm
416
+ # http://www.w3.org/TR/html5/parsing.html#determining-the-character-encoding
417
+ if enc = scanning_meta(str)
418
+ enc
419
+ # 6. last visited page or something
420
+ # 7. frequency
421
+ elsif str.ascii_only?
422
+ Encoding::US_ASCII
423
+ elsif str.dup.force_encoding(Encoding::UTF_8).valid_encoding?
424
+ Encoding::UTF_8
425
+ end
426
+ # 8. implementation-defined or user-specified
427
+ end
428
+
429
+ # :nodoc:
430
+ def check_bom(str)
431
+ case str.byteslice(0, 2)
432
+ when "\xFE\xFF"
433
+ return Encoding::UTF_16BE
434
+ when "\xFF\xFE"
435
+ return Encoding::UTF_16LE
436
+ end
437
+ if "\xEF\xBB\xBF" == str.byteslice(0, 3)
438
+ return Encoding::UTF_8
439
+ end
440
+ nil
441
+ end
442
+
443
+ # :nodoc:
444
+ def scanning_meta(str)
445
+ require 'strscan'
446
+ ss = StringScanner.new(str)
447
+ if ss.scan_until(/<meta[\t\n\f\r ]*/)
448
+ attrs = {} # attribute_list
449
+ got_pragma = false
450
+ need_pragma = nil
451
+ charset = nil
452
+
453
+ # step: Attributes
454
+ while attr = get_attribute(ss)
455
+ name, value = *attr
456
+ next if attrs[name]
457
+ attrs[name] = true
458
+ case name
459
+ when 'http-equiv'
460
+ got_pragma = true if value == 'content-type'
461
+ when 'content'
462
+ encoding = extracting_encodings_from_meta_elements(value)
463
+ unless charset
464
+ charset = encoding
465
+ end
466
+ need_pragma = true
467
+ when 'charset'
468
+ need_pragma = false
469
+ charset = value
470
+ end
471
+ end
472
+
473
+ # step: Processing
474
+ return if need_pragma.nil?
475
+ return if need_pragma && !got_pragma
476
+
477
+ charset = Encoding.find(charset) rescue nil
478
+ return unless charset
479
+ charset = Encoding::UTF_8 if charset == Encoding::UTF_16
480
+ return charset # tentative
481
+ end
482
+ nil
483
+ end
484
+
485
+ def get_attribute(ss)
486
+ ss.scan(/[\t\n\f\r \/]*/)
487
+ if ss.peek(1) == '>'
488
+ ss.getch
489
+ return nil
490
+ end
491
+ name = ss.scan(/[^=\t\n\f\r \/>]*/)
492
+ name.downcase!
493
+ raise if name.empty?
494
+ ss.skip(/[\t\n\f\r ]*/)
495
+ if ss.getch != '='
496
+ value = ''
497
+ return [name, value]
498
+ end
499
+ ss.skip(/[\t\n\f\r ]*/)
500
+ case ss.peek(1)
501
+ when '"'
502
+ ss.getch
503
+ value = ss.scan(/[^"]+/)
504
+ value.downcase!
505
+ ss.getch
506
+ when "'"
507
+ ss.getch
508
+ value = ss.scan(/[^']+/)
509
+ value.downcase!
510
+ ss.getch
511
+ when '>'
512
+ value = ''
513
+ else
514
+ value = ss.scan(/[^\t\n\f\r >]+/)
515
+ value.downcase!
516
+ end
517
+ [name, value]
518
+ end
519
+
520
+ def extracting_encodings_from_meta_elements(value)
521
+ # http://dev.w3.org/html5/spec/fetching-resources.html#algorithm-for-extracting-an-encoding-from-a-meta-element
522
+ if /charset[\t\n\f\r ]*=(?:"([^"]*)"|'([^']*)'|["']|\z|([^\t\n\f\r ;]+))/i =~ value
523
+ return $1 || $2 || $3
524
+ end
525
+ return nil
526
+ end
527
+
248
528
  ##
249
529
  # Checks for a supported Content-Encoding header and yields an Inflate
250
530
  # wrapper for this response's socket when zlib is present. If the
@@ -272,6 +552,9 @@ class Net::HTTPResponse
272
552
  ensure
273
553
  begin
274
554
  inflate_body_io.finish
555
+ if self['content-length']
556
+ self['content-length'] = inflate_body_io.bytes_inflated.to_s
557
+ end
275
558
  rescue => err
276
559
  # Ignore #finish's error if there is an exception from yield
277
560
  raise err if success
@@ -297,7 +580,7 @@ class Net::HTTPResponse
297
580
 
298
581
  clen = content_length()
299
582
  if clen
300
- @socket.read clen, dest, true # ignore EOF
583
+ @socket.read clen, dest, @ignore_eof
301
584
  return
302
585
  end
303
586
  clen = range_length()
@@ -373,6 +656,14 @@ class Net::HTTPResponse
373
656
  @inflate.finish
374
657
  end
375
658
 
659
+ ##
660
+ # The number of bytes inflated, used to update the Content-Length of
661
+ # the response.
662
+
663
+ def bytes_inflated
664
+ @inflate.total_out
665
+ end
666
+
376
667
  ##
377
668
  # Returns a Net::ReadAdapter that inflates each read chunk into +dest+.
378
669
  #