net-http 0.1.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,20 +1,136 @@
1
1
  # frozen_string_literal: false
2
- # HTTP response class.
2
+
3
+ # This class is the base class for \Net::HTTP request classes.
4
+ #
5
+ # == About the Examples
6
+ #
7
+ # :include: doc/net-http/examples.rdoc
8
+ #
9
+ # == Returned Responses
10
+ #
11
+ # \Method Net::HTTP.get_response returns
12
+ # an instance of one of the subclasses of \Net::HTTPResponse:
13
+ #
14
+ # Net::HTTP.get_response(uri)
15
+ # # => #<Net::HTTPOK 200 OK readbody=true>
16
+ # Net::HTTP.get_response(hostname, '/nosuch')
17
+ # # => #<Net::HTTPNotFound 404 Not Found readbody=true>
18
+ #
19
+ # As does method Net::HTTP#request:
20
+ #
21
+ # req = Net::HTTP::Get.new(uri)
22
+ # Net::HTTP.start(hostname) do |http|
23
+ # http.request(req)
24
+ # end # => #<Net::HTTPOK 200 OK readbody=true>
25
+ #
26
+ # \Class \Net::HTTPResponse includes module Net::HTTPHeader,
27
+ # which provides access to response header values via (among others):
28
+ #
29
+ # - \Hash-like method <tt>[]</tt>.
30
+ # - Specific reader methods, such as +content_type+.
31
+ #
32
+ # Examples:
33
+ #
34
+ # res = Net::HTTP.get_response(uri) # => #<Net::HTTPOK 200 OK readbody=true>
35
+ # res['Content-Type'] # => "text/html; charset=UTF-8"
36
+ # res.content_type # => "text/html"
37
+ #
38
+ # == Response Subclasses
39
+ #
40
+ # \Class \Net::HTTPResponse has a subclass for each
41
+ # {HTTP status code}[https://en.wikipedia.org/wiki/List_of_HTTP_status_codes].
42
+ # You can look up the response class for a given code:
43
+ #
44
+ # Net::HTTPResponse::CODE_TO_OBJ['200'] # => Net::HTTPOK
45
+ # Net::HTTPResponse::CODE_TO_OBJ['400'] # => Net::HTTPBadRequest
46
+ # Net::HTTPResponse::CODE_TO_OBJ['404'] # => Net::HTTPNotFound
47
+ #
48
+ # And you can retrieve the status code for a response object:
49
+ #
50
+ # Net::HTTP.get_response(uri).code # => "200"
51
+ # Net::HTTP.get_response(hostname, '/nosuch').code # => "404"
3
52
  #
4
- # This class wraps together the response header and the response body (the
5
- # entity requested).
53
+ # The response subclasses (indentation shows class hierarchy):
6
54
  #
7
- # It mixes in the HTTPHeader module, which provides access to response
8
- # header values both via hash-like methods and via individual readers.
55
+ # - Net::HTTPUnknownResponse (for unhandled \HTTP extensions).
9
56
  #
10
- # Note that each possible HTTP response code defines its own
11
- # HTTPResponse subclass. All classes are defined under the Net module.
12
- # Indentation indicates inheritance. For a list of the classes see Net::HTTP.
57
+ # - Net::HTTPInformation:
13
58
  #
14
- # Correspondence <code>HTTP code => class</code> is stored in CODE_TO_OBJ
15
- # constant:
59
+ # - Net::HTTPContinue (100)
60
+ # - Net::HTTPSwitchProtocol (101)
61
+ # - Net::HTTPProcessing (102)
62
+ # - Net::HTTPEarlyHints (103)
16
63
  #
17
- # Net::HTTPResponse::CODE_TO_OBJ['404'] #=> Net::HTTPNotFound
64
+ # - Net::HTTPSuccess:
65
+ #
66
+ # - Net::HTTPOK (200)
67
+ # - Net::HTTPCreated (201)
68
+ # - Net::HTTPAccepted (202)
69
+ # - Net::HTTPNonAuthoritativeInformation (203)
70
+ # - Net::HTTPNoContent (204)
71
+ # - Net::HTTPResetContent (205)
72
+ # - Net::HTTPPartialContent (206)
73
+ # - Net::HTTPMultiStatus (207)
74
+ # - Net::HTTPAlreadyReported (208)
75
+ # - Net::HTTPIMUsed (226)
76
+ #
77
+ # - Net::HTTPRedirection:
78
+ #
79
+ # - Net::HTTPMultipleChoices (300)
80
+ # - Net::HTTPMovedPermanently (301)
81
+ # - Net::HTTPFound (302)
82
+ # - Net::HTTPSeeOther (303)
83
+ # - Net::HTTPNotModified (304)
84
+ # - Net::HTTPUseProxy (305)
85
+ # - Net::HTTPTemporaryRedirect (307)
86
+ # - Net::HTTPPermanentRedirect (308)
87
+ #
88
+ # - Net::HTTPClientError:
89
+ #
90
+ # - Net::HTTPBadRequest (400)
91
+ # - Net::HTTPUnauthorized (401)
92
+ # - Net::HTTPPaymentRequired (402)
93
+ # - Net::HTTPForbidden (403)
94
+ # - Net::HTTPNotFound (404)
95
+ # - Net::HTTPMethodNotAllowed (405)
96
+ # - Net::HTTPNotAcceptable (406)
97
+ # - Net::HTTPProxyAuthenticationRequired (407)
98
+ # - Net::HTTPRequestTimeOut (408)
99
+ # - Net::HTTPConflict (409)
100
+ # - Net::HTTPGone (410)
101
+ # - Net::HTTPLengthRequired (411)
102
+ # - Net::HTTPPreconditionFailed (412)
103
+ # - Net::HTTPRequestEntityTooLarge (413)
104
+ # - Net::HTTPRequestURITooLong (414)
105
+ # - Net::HTTPUnsupportedMediaType (415)
106
+ # - Net::HTTPRequestedRangeNotSatisfiable (416)
107
+ # - Net::HTTPExpectationFailed (417)
108
+ # - Net::HTTPMisdirectedRequest (421)
109
+ # - Net::HTTPUnprocessableEntity (422)
110
+ # - Net::HTTPLocked (423)
111
+ # - Net::HTTPFailedDependency (424)
112
+ # - Net::HTTPUpgradeRequired (426)
113
+ # - Net::HTTPPreconditionRequired (428)
114
+ # - Net::HTTPTooManyRequests (429)
115
+ # - Net::HTTPRequestHeaderFieldsTooLarge (431)
116
+ # - Net::HTTPUnavailableForLegalReasons (451)
117
+ #
118
+ # - Net::HTTPServerError:
119
+ #
120
+ # - Net::HTTPInternalServerError (500)
121
+ # - Net::HTTPNotImplemented (501)
122
+ # - Net::HTTPBadGateway (502)
123
+ # - Net::HTTPServiceUnavailable (503)
124
+ # - Net::HTTPGatewayTimeOut (504)
125
+ # - Net::HTTPVersionNotSupported (505)
126
+ # - Net::HTTPVariantAlsoNegotiates (506)
127
+ # - Net::HTTPInsufficientStorage (507)
128
+ # - Net::HTTPLoopDetected (508)
129
+ # - Net::HTTPNotExtended (510)
130
+ # - Net::HTTPNetworkAuthenticationRequired (511)
131
+ #
132
+ # There is also the Net::HTTPBadResponse exception which is raised when
133
+ # there is a protocol error.
18
134
  #
19
135
  class Net::HTTPResponse
20
136
  class << self
@@ -84,6 +200,8 @@ class Net::HTTPResponse
84
200
  @read = false
85
201
  @uri = nil
86
202
  @decode_content = false
203
+ @body_encoding = false
204
+ @ignore_eof = true
87
205
  end
88
206
 
89
207
  # The HTTP version supported by the server.
@@ -106,6 +224,22 @@ class Net::HTTPResponse
106
224
  # Accept-Encoding header from the user.
107
225
  attr_accessor :decode_content
108
226
 
227
+ # The encoding to use for the response body. If Encoding, use that encoding.
228
+ # If other true value, attempt to detect the appropriate encoding, and use
229
+ # that.
230
+ attr_reader :body_encoding
231
+
232
+ # Set the encoding to use for the response body. If given a String, find
233
+ # the related Encoding.
234
+ def body_encoding=(value)
235
+ value = Encoding.find(value) if value.is_a?(String)
236
+ @body_encoding = value
237
+ end
238
+
239
+ # Whether to ignore EOF when reading bodies with a specified Content-Length
240
+ # header.
241
+ attr_accessor :ignore_eof
242
+
109
243
  def inspect
110
244
  "#<#{self.class} #{@code} #{@message} readbody=#{@read}>"
111
245
  end
@@ -214,6 +348,17 @@ class Net::HTTPResponse
214
348
  end
215
349
  @read = true
216
350
 
351
+ case enc = @body_encoding
352
+ when Encoding, false, nil
353
+ # Encoding: force given encoding
354
+ # false/nil: do not force encoding
355
+ else
356
+ # other value: detect encoding from body
357
+ enc = detect_encoding(@body)
358
+ end
359
+
360
+ @body.force_encoding(enc) if enc
361
+
217
362
  @body
218
363
  end
219
364
 
@@ -245,6 +390,141 @@ class Net::HTTPResponse
245
390
 
246
391
  private
247
392
 
393
+ # :nodoc:
394
+ def detect_encoding(str, encoding=nil)
395
+ if encoding
396
+ elsif encoding = type_params['charset']
397
+ elsif encoding = check_bom(str)
398
+ else
399
+ encoding = case content_type&.downcase
400
+ when %r{text/x(?:ht)?ml|application/(?:[^+]+\+)?xml}
401
+ /\A<xml[ \t\r\n]+
402
+ version[ \t\r\n]*=[ \t\r\n]*(?:"[0-9.]+"|'[0-9.]*')[ \t\r\n]+
403
+ encoding[ \t\r\n]*=[ \t\r\n]*
404
+ (?:"([A-Za-z][\-A-Za-z0-9._]*)"|'([A-Za-z][\-A-Za-z0-9._]*)')/x =~ str
405
+ encoding = $1 || $2 || Encoding::UTF_8
406
+ when %r{text/html.*}
407
+ sniff_encoding(str)
408
+ end
409
+ end
410
+ return encoding
411
+ end
412
+
413
+ # :nodoc:
414
+ def sniff_encoding(str, encoding=nil)
415
+ # the encoding sniffing algorithm
416
+ # http://www.w3.org/TR/html5/parsing.html#determining-the-character-encoding
417
+ if enc = scanning_meta(str)
418
+ enc
419
+ # 6. last visited page or something
420
+ # 7. frequency
421
+ elsif str.ascii_only?
422
+ Encoding::US_ASCII
423
+ elsif str.dup.force_encoding(Encoding::UTF_8).valid_encoding?
424
+ Encoding::UTF_8
425
+ end
426
+ # 8. implementation-defined or user-specified
427
+ end
428
+
429
+ # :nodoc:
430
+ def check_bom(str)
431
+ case str.byteslice(0, 2)
432
+ when "\xFE\xFF"
433
+ return Encoding::UTF_16BE
434
+ when "\xFF\xFE"
435
+ return Encoding::UTF_16LE
436
+ end
437
+ if "\xEF\xBB\xBF" == str.byteslice(0, 3)
438
+ return Encoding::UTF_8
439
+ end
440
+ nil
441
+ end
442
+
443
+ # :nodoc:
444
+ def scanning_meta(str)
445
+ require 'strscan'
446
+ ss = StringScanner.new(str)
447
+ if ss.scan_until(/<meta[\t\n\f\r ]*/)
448
+ attrs = {} # attribute_list
449
+ got_pragma = false
450
+ need_pragma = nil
451
+ charset = nil
452
+
453
+ # step: Attributes
454
+ while attr = get_attribute(ss)
455
+ name, value = *attr
456
+ next if attrs[name]
457
+ attrs[name] = true
458
+ case name
459
+ when 'http-equiv'
460
+ got_pragma = true if value == 'content-type'
461
+ when 'content'
462
+ encoding = extracting_encodings_from_meta_elements(value)
463
+ unless charset
464
+ charset = encoding
465
+ end
466
+ need_pragma = true
467
+ when 'charset'
468
+ need_pragma = false
469
+ charset = value
470
+ end
471
+ end
472
+
473
+ # step: Processing
474
+ return if need_pragma.nil?
475
+ return if need_pragma && !got_pragma
476
+
477
+ charset = Encoding.find(charset) rescue nil
478
+ return unless charset
479
+ charset = Encoding::UTF_8 if charset == Encoding::UTF_16
480
+ return charset # tentative
481
+ end
482
+ nil
483
+ end
484
+
485
+ def get_attribute(ss)
486
+ ss.scan(/[\t\n\f\r \/]*/)
487
+ if ss.peek(1) == '>'
488
+ ss.getch
489
+ return nil
490
+ end
491
+ name = ss.scan(/[^=\t\n\f\r \/>]*/)
492
+ name.downcase!
493
+ raise if name.empty?
494
+ ss.skip(/[\t\n\f\r ]*/)
495
+ if ss.getch != '='
496
+ value = ''
497
+ return [name, value]
498
+ end
499
+ ss.skip(/[\t\n\f\r ]*/)
500
+ case ss.peek(1)
501
+ when '"'
502
+ ss.getch
503
+ value = ss.scan(/[^"]+/)
504
+ value.downcase!
505
+ ss.getch
506
+ when "'"
507
+ ss.getch
508
+ value = ss.scan(/[^']+/)
509
+ value.downcase!
510
+ ss.getch
511
+ when '>'
512
+ value = ''
513
+ else
514
+ value = ss.scan(/[^\t\n\f\r >]+/)
515
+ value.downcase!
516
+ end
517
+ [name, value]
518
+ end
519
+
520
+ def extracting_encodings_from_meta_elements(value)
521
+ # http://dev.w3.org/html5/spec/fetching-resources.html#algorithm-for-extracting-an-encoding-from-a-meta-element
522
+ if /charset[\t\n\f\r ]*=(?:"([^"]*)"|'([^']*)'|["']|\z|([^\t\n\f\r ;]+))/i =~ value
523
+ return $1 || $2 || $3
524
+ end
525
+ return nil
526
+ end
527
+
248
528
  ##
249
529
  # Checks for a supported Content-Encoding header and yields an Inflate
250
530
  # wrapper for this response's socket when zlib is present. If the
@@ -272,6 +552,9 @@ class Net::HTTPResponse
272
552
  ensure
273
553
  begin
274
554
  inflate_body_io.finish
555
+ if self['content-length']
556
+ self['content-length'] = inflate_body_io.bytes_inflated.to_s
557
+ end
275
558
  rescue => err
276
559
  # Ignore #finish's error if there is an exception from yield
277
560
  raise err if success
@@ -297,7 +580,7 @@ class Net::HTTPResponse
297
580
 
298
581
  clen = content_length()
299
582
  if clen
300
- @socket.read clen, dest, true # ignore EOF
583
+ @socket.read clen, dest, @ignore_eof
301
584
  return
302
585
  end
303
586
  clen = range_length()
@@ -373,6 +656,14 @@ class Net::HTTPResponse
373
656
  @inflate.finish
374
657
  end
375
658
 
659
+ ##
660
+ # The number of bytes inflated, used to update the Content-Length of
661
+ # the response.
662
+
663
+ def bytes_inflated
664
+ @inflate.total_out
665
+ end
666
+
376
667
  ##
377
668
  # Returns a Net::ReadAdapter that inflates each read chunk into +dest+.
378
669
  #