em-http-request 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,188 +9,25 @@
9
9
 
10
10
  module EventMachine
11
11
 
12
- # A simple hash is returned for each request made by HttpClient with the
13
- # headers that were given by the server for that request.
14
- class HttpResponseHeader < Hash
15
- # The reason returned in the http response ("OK","File not found",etc.)
16
- attr_accessor :http_reason
17
-
18
- # The HTTP version returned.
19
- attr_accessor :http_version
20
-
21
- # The status code (as a string!)
22
- attr_accessor :http_status
23
-
24
- # E-Tag
25
- def etag
26
- self["ETag"]
27
- end
28
-
29
- def last_modified
30
- time = self["Last-Modified"]
31
- Time.parse(time) if time
32
- end
33
-
34
- # HTTP response status as an integer
35
- def status
36
- Integer(http_status) rescue nil
37
- end
38
-
39
- # Length of content as an integer, or nil if chunked/unspecified
40
- def content_length
41
- @content_length ||= ((s = self[HttpClient::CONTENT_LENGTH]) &&
42
- (s =~ /^(\d+)$/)) ? $1.to_i : nil
43
- end
44
-
45
- # Cookie header from the server
46
- def cookie
47
- self[HttpClient::SET_COOKIE]
48
- end
49
-
50
- # Is the transfer encoding chunked?
51
- def chunked_encoding?
52
- /chunked/i === self[HttpClient::TRANSFER_ENCODING]
53
- end
54
-
55
- def keep_alive?
56
- /keep-alive/i === self[HttpClient::KEEP_ALIVE]
57
- end
58
-
59
- def compressed?
60
- /gzip|compressed|deflate/i === self[HttpClient::CONTENT_ENCODING]
61
- end
62
-
63
- def location
64
- self[HttpClient::LOCATION]
65
- end
66
- end
67
-
68
- class HttpChunkHeader < Hash
69
- # When parsing chunked encodings this is set
70
- attr_accessor :http_chunk_size
71
-
72
- # Size of the chunk as an integer
73
- def chunk_size
74
- return @chunk_size unless @chunk_size.nil?
75
- @chunk_size = @http_chunk_size ? @http_chunk_size.to_i(base=16) : 0
76
- end
77
- end
78
-
79
- # Methods for building HTTP requests
80
- module HttpEncoding
81
- HTTP_REQUEST_HEADER="%s %s HTTP/1.1\r\n"
82
- FIELD_ENCODING = "%s: %s\r\n"
83
-
84
- # Escapes a URI.
85
- def escape(s)
86
- s.to_s.gsub(/([^ a-zA-Z0-9_.-]+)/n) {
87
- '%'+$1.unpack('H2'*$1.bytesize).join('%').upcase
88
- }.tr(' ', '+')
89
- end
90
-
91
- # Unescapes a URI escaped string.
92
- def unescape(s)
93
- s.tr('+', ' ').gsub(/((?:%[0-9a-fA-F]{2})+)/n){
94
- [$1.delete('%')].pack('H*')
95
- }
96
- end
97
-
98
- # Map all header keys to a downcased string version
99
- def munge_header_keys(head)
100
- head.inject({}) { |h, (k, v)| h[k.to_s.downcase] = v; h }
101
- end
102
-
103
- # HTTP is kind of retarded that you have to specify a Host header, but if
104
- # you include port 80 then further redirects will tack on the :80 which is
105
- # annoying.
106
- def encode_host
107
- if @uri.port == 80 || @uri.port == 443
108
- return @uri.host
109
- else
110
- @uri.host + ":#{@uri.port}"
111
- end
112
- end
113
-
114
- def encode_request(method, path, query, uri_query)
115
- HTTP_REQUEST_HEADER % [method.to_s.upcase, encode_query(path, query, uri_query)]
116
- end
117
-
118
- def encode_query(path, query, uri_query)
119
- encoded_query = if query.kind_of?(Hash)
120
- query.map { |k, v| encode_param(k, v) }.join('&')
121
- else
122
- query.to_s
123
- end
124
- if !uri_query.to_s.empty?
125
- encoded_query = [encoded_query, uri_query].reject {|part| part.empty?}.join("&")
126
- end
127
- return path if encoded_query.to_s.empty?
128
- "#{path}?#{encoded_query}"
129
- end
130
-
131
- # URL encodes query parameters:
132
- # single k=v, or a URL encoded array, if v is an array of values
133
- def encode_param(k, v)
134
- if v.is_a?(Array)
135
- v.map { |e| escape(k) + "[]=" + escape(e) }.join("&")
136
- else
137
- escape(k) + "=" + escape(v)
138
- end
139
- end
140
-
141
- # Encode a field in an HTTP header
142
- def encode_field(k, v)
143
- FIELD_ENCODING % [k, v]
144
- end
145
-
146
- # Encode basic auth in an HTTP header
147
- # In: Array ([user, pass]) - for basic auth
148
- # String - custom auth string (OAuth, etc)
149
- def encode_auth(k,v)
150
- if v.is_a? Array
151
- FIELD_ENCODING % [k, ["Basic", Base64.encode64(v.join(":")).chomp].join(" ")]
152
- else
153
- encode_field(k,v)
154
- end
155
- end
156
-
157
- def encode_headers(head)
158
- head.inject('') do |result, (key, value)|
159
- # Munge keys from foo-bar-baz to Foo-Bar-Baz
160
- key = key.split('-').map { |k| k.to_s.capitalize }.join('-')
161
- result << case key
162
- when 'Authorization', 'Proxy-authorization'
163
- encode_auth(key, value)
164
- else
165
- encode_field(key, value)
166
- end
167
- end
168
- end
169
-
170
- def encode_cookie(cookie)
171
- if cookie.is_a? Hash
172
- cookie.inject('') { |result, (k, v)| result << encode_param(k, v) + ";" }
173
- else
174
- cookie
175
- end
176
- end
177
- end
178
-
179
12
  class HttpClient < Connection
180
13
  include EventMachine::Deferrable
181
- include HttpEncoding
14
+ include EventMachine::HttpEncoding
182
15
 
183
16
  TRANSFER_ENCODING="TRANSFER_ENCODING"
184
17
  CONTENT_ENCODING="CONTENT_ENCODING"
185
18
  CONTENT_LENGTH="CONTENT_LENGTH"
19
+ CONTENT_TYPE="CONTENT_TYPE"
20
+ LAST_MODIFIED="LAST_MODIFIED"
186
21
  KEEP_ALIVE="CONNECTION"
187
22
  SET_COOKIE="SET_COOKIE"
188
23
  LOCATION="LOCATION"
189
24
  HOST="HOST"
25
+ ETAG="ETAG"
26
+
190
27
  CRLF="\r\n"
191
28
 
192
29
  attr_accessor :method, :options, :uri
193
- attr_reader :response, :response_header, :error, :redirects, :last_effective_url
30
+ attr_reader :response, :response_header, :error, :redirects, :last_effective_url, :content_charset
194
31
 
195
32
  def post_init
196
33
  @parser = HttpClientParser.new
@@ -201,24 +38,35 @@ module EventMachine
201
38
  @redirects = 0
202
39
  @response = ''
203
40
  @error = ''
41
+ @headers = nil
204
42
  @last_effective_url = nil
205
43
  @content_decoder = nil
44
+ @content_charset = nil
206
45
  @stream = nil
207
46
  @disconnect = nil
208
47
  @state = :response_header
48
+ @socks_state = nil
209
49
  end
210
50
 
211
51
  # start HTTP request once we establish connection to host
212
52
  def connection_completed
213
- # if connecting to proxy, then first negotiate the connection
214
- # to intermediate server and wait for 200 response
215
- if @options[:proxy] and @state == :response_header
216
- @state = :response_proxy
53
+ # if a socks proxy is specified, then a connection request
54
+ # has to be made to the socks server and we need to wait
55
+ # for a response code
56
+ if socks_proxy? and @state == :response_header
57
+ @state = :connect_socks_proxy
58
+ send_socks_handshake
59
+
60
+ # if we need to negotiate the proxy connection first, then
61
+ # issue a CONNECT query and wait for 200 response
62
+ elsif connect_proxy? and @state == :response_header
63
+ @state = :connect_http_proxy
217
64
  send_request_header
218
65
 
219
66
  # if connecting via proxy, then state will be :proxy_connected,
220
67
  # indicating successful tunnel. from here, initiate normal http
221
68
  # exchange
69
+
222
70
  else
223
71
  @state = :response_header
224
72
  ssl = @options[:tls] || @options[:ssl] || {}
@@ -247,6 +95,7 @@ module EventMachine
247
95
  # fail the connection directly
248
96
  dns_error == true ? fail(self) : unbind
249
97
  end
98
+ alias :close :on_error
250
99
 
251
100
  # assign a stream processing block
252
101
  def stream(&blk)
@@ -258,6 +107,11 @@ module EventMachine
258
107
  @disconnect = blk
259
108
  end
260
109
 
110
+ # assign a headers parse callback
111
+ def headers(&blk)
112
+ @headers = blk
113
+ end
114
+
261
115
  # raw data push from the client (WebSocket) should
262
116
  # only be invoked after handshake, otherwise it will
263
117
  # inject data into the header exchange
@@ -275,31 +129,75 @@ module EventMachine
275
129
  def normalize_body
276
130
  @normalized_body ||= begin
277
131
  if @options[:body].is_a? Hash
278
- @options[:body].to_params
132
+ form_encode_body(@options[:body])
279
133
  else
280
134
  @options[:body]
281
135
  end
282
136
  end
283
137
  end
284
138
 
139
+ # determines if there is enough data in the buffer
140
+ def has_bytes?(num)
141
+ @data.size >= num
142
+ end
143
+
285
144
  def websocket?; @uri.scheme == 'ws'; end
145
+ def proxy?; !@options[:proxy].nil?; end
146
+
147
+ # determines if a proxy should be used that uses
148
+ # http-headers as proxy-mechanism
149
+ #
150
+ # this is the default proxy type if none is specified
151
+ def http_proxy?; proxy? && [nil, :http].include?(@options[:proxy][:type]); end
152
+
153
+ # determines if a http-proxy should be used with
154
+ # the CONNECT verb
155
+ def connect_proxy?; http_proxy? && (@options[:proxy][:use_connect] == true); end
156
+
157
+ # determines if a SOCKS5 proxy should be used
158
+ def socks_proxy?; proxy? && (@options[:proxy][:type] == :socks); end
159
+
160
+ def socks_methods
161
+ methods = []
162
+ methods << 2 if !options[:proxy][:authorization].nil? # 2 => Username/Password Authentication
163
+ methods << 0 # 0 => No Authentication Required
164
+
165
+ methods
166
+ end
167
+
168
+ def send_socks_handshake
169
+ # Method Negotiation as described on
170
+ # http://www.faqs.org/rfcs/rfc1928.html Section 3
171
+
172
+ @socks_state = :method_negotiation
173
+
174
+ methods = socks_methods
175
+ send_data [5, methods.size].pack('CC') + methods.pack('C*')
176
+ end
286
177
 
287
178
  def send_request_header
288
179
  query = @options[:query]
289
180
  head = @options[:head] ? munge_header_keys(@options[:head]) : {}
290
181
  file = @options[:file]
182
+ proxy = @options[:proxy]
291
183
  body = normalize_body
292
- request_header = nil
293
184
 
294
- if @state == :response_proxy
295
- proxy = @options[:proxy]
185
+ request_header = nil
296
186
 
297
- # initialize headers to establish the HTTP tunnel
187
+ if http_proxy?
188
+ # initialize headers for the http proxy
298
189
  head = proxy[:head] ? munge_header_keys(proxy[:head]) : {}
299
190
  head['proxy-authorization'] = proxy[:authorization] if proxy[:authorization]
300
- request_header = HTTP_REQUEST_HEADER % ['CONNECT', "#{@uri.host}:#{@uri.port}"]
301
191
 
302
- elsif websocket?
192
+ # if we need to negotiate the tunnel connection first, then
193
+ # issue a CONNECT query to the proxy first. This is an optional
194
+ # flag, by default we will provide full URIs to the proxy
195
+ if @state == :connect_http_proxy
196
+ request_header = HTTP_REQUEST_HEADER % ['CONNECT', "#{@uri.host}:#{@uri.port}"]
197
+ end
198
+ end
199
+
200
+ if websocket?
303
201
  head['upgrade'] = 'WebSocket'
304
202
  head['connection'] = 'Upgrade'
305
203
  head['origin'] = @options[:origin] || @uri.host
@@ -318,7 +216,12 @@ module EventMachine
318
216
 
319
217
  # Set content-type header if missing and body is a Ruby hash
320
218
  if not head['content-type'] and options[:body].is_a? Hash
321
- head['content-type'] = "application/x-www-form-urlencoded"
219
+ head['content-type'] = 'application/x-www-form-urlencoded'
220
+ end
221
+
222
+ # Set connection close unless keepalive
223
+ unless options[:keepalive]
224
+ head['connection'] = 'close'
322
225
  end
323
226
  end
324
227
 
@@ -332,7 +235,7 @@ module EventMachine
332
235
  @last_effective_url = @uri
333
236
 
334
237
  # Build the request headers
335
- request_header ||= encode_request(@method, @uri.path, query, @uri.query)
238
+ request_header ||= encode_request(@method, @uri, query, proxy)
336
239
  request_header << encode_headers(head)
337
240
  request_header << CRLF
338
241
  send_data request_header
@@ -367,6 +270,7 @@ module EventMachine
367
270
  end
368
271
 
369
272
  def on_decoded_body_data(data)
273
+ data.force_encoding @content_charset if @content_charset
370
274
  if @stream
371
275
  @stream.call(data)
372
276
  else
@@ -374,23 +278,32 @@ module EventMachine
374
278
  end
375
279
  end
376
280
 
281
+ def finished?
282
+ @state == :finished || (@state == :body && @bytes_remaining.nil?)
283
+ end
284
+
377
285
  def unbind
378
- if @last_effective_url != @uri and @redirects < @options[:redirects]
379
- # update uri to redirect location if we're allowed to traverse deeper
380
- @uri = @last_effective_url
286
+ if finished? && (@last_effective_url != @uri) && (@redirects < @options[:redirects])
287
+ begin
288
+ # update uri to redirect location if we're allowed to traverse deeper
289
+ @uri = @last_effective_url
381
290
 
382
- # keep track of the depth of requests we made in this session
383
- @redirects += 1
291
+ # keep track of the depth of requests we made in this session
292
+ @redirects += 1
384
293
 
385
- # swap current connection and reassign current handler
386
- req = HttpOptions.new(@method, @uri, @options)
387
- reconnect(req.host, req.port)
294
+ # swap current connection and reassign current handler
295
+ req = HttpOptions.new(@method, @uri, @options)
296
+ reconnect(req.host, req.port)
388
297
 
389
- @response_header = HttpResponseHeader.new
390
- @state = :response_header
391
- @data.clear
298
+ @response_header = HttpResponseHeader.new
299
+ @state = :response_header
300
+ @response = ''
301
+ @data.clear
302
+ rescue EventMachine::ConnectionError => e
303
+ on_error(e.message, true)
304
+ end
392
305
  else
393
- if @state == :finished || (@state == :body && @bytes_remaining.nil?)
306
+ if finished?
394
307
  succeed(self)
395
308
  else
396
309
  @disconnect.call(self) if @state == :websocket and @disconnect
@@ -405,7 +318,9 @@ module EventMachine
405
318
 
406
319
  def dispatch
407
320
  while case @state
408
- when :response_proxy
321
+ when :connect_socks_proxy
322
+ parse_socks_response
323
+ when :connect_http_proxy
409
324
  parse_response_header
410
325
  when :response_header
411
326
  parse_response_header
@@ -451,13 +366,17 @@ module EventMachine
451
366
  def parse_response_header
452
367
  return false unless parse_header(@response_header)
453
368
 
369
+ # invoke headers callback after full parse if one
370
+ # is specified by the user
371
+ @headers.call(@response_header) if @headers
372
+
454
373
  unless @response_header.http_status and @response_header.http_reason
455
374
  @state = :invalid
456
375
  on_error "no HTTP response"
457
376
  return false
458
377
  end
459
378
 
460
- if @state == :response_proxy
379
+ if @state == :connect_http_proxy
461
380
  # when a successfull tunnel is established, the proxy responds with a
462
381
  # 200 response code. from here, the tunnel is transparent.
463
382
  if @response_header.http_status.to_i == 200
@@ -475,9 +394,13 @@ module EventMachine
475
394
  if @response_header.location
476
395
  begin
477
396
  location = Addressable::URI.parse(@response_header.location)
397
+
478
398
  if location.relative?
479
399
  location = @uri.join(location)
480
400
  @response_header[LOCATION] = location.to_s
401
+ else
402
+ # if redirect is to an absolute url, check for correct URI structure
403
+ raise if location.host.nil?
481
404
  end
482
405
 
483
406
  # store last url on any sign of redirect
@@ -489,10 +412,13 @@ module EventMachine
489
412
  end
490
413
  end
491
414
 
492
- # shortcircuit on HEAD requests
415
+ # Fire callbacks immediately after recieving header requests
416
+ # if the request method is HEAD. In case of a redirect, terminate
417
+ # current connection and reinitialize the process.
493
418
  if @method == "HEAD"
494
419
  @state = :finished
495
- unbind
420
+ close_connection
421
+ return false
496
422
  end
497
423
 
498
424
  if websocket?
@@ -521,6 +447,118 @@ module EventMachine
521
447
  end
522
448
  end
523
449
 
450
+ if ''.respond_to?(:force_encoding) && /;\s*charset=\s*(.+?)\s*(;|$)/.match(response_header[CONTENT_TYPE])
451
+ @content_charset = Encoding.find($1.gsub(/^\"|\"$/, '')) rescue Encoding.default_external
452
+ end
453
+
454
+ true
455
+ end
456
+
457
+ def send_socks_connect_request
458
+ # TO-DO: Implement address types for IPv6 and Domain
459
+ begin
460
+ ip_address = Socket.gethostbyname(@uri.host).last
461
+ send_data [5, 1, 0, 1, ip_address, @uri.port].flatten.pack('CCCCA4n')
462
+
463
+ rescue
464
+ @state = :invalid
465
+ on_error "could not resolve host", true
466
+ return false
467
+ end
468
+
469
+ true
470
+ end
471
+
472
+ # parses socks 5 server responses as specified
473
+ # on http://www.faqs.org/rfcs/rfc1928.html
474
+ def parse_socks_response
475
+ if @socks_state == :method_negotiation
476
+ return false unless has_bytes? 2
477
+
478
+ _, method = @data.read(2).unpack('CC')
479
+
480
+ if socks_methods.include?(method)
481
+ if method == 0
482
+ @socks_state = :connecting
483
+
484
+ return send_socks_connect_request
485
+
486
+ elsif method == 2
487
+ @socks_state = :authenticating
488
+
489
+ credentials = @options[:proxy][:authorization]
490
+ if credentials.size < 2
491
+ @state = :invalid
492
+ on_error "username and password are not supplied"
493
+ return false
494
+ end
495
+
496
+ username, password = credentials
497
+
498
+ send_data [5, username.length, username, password.length, password].pack('CCA*CA*')
499
+ end
500
+
501
+ else
502
+ @state = :invalid
503
+ on_error "proxy did not accept method"
504
+ return false
505
+ end
506
+
507
+ elsif @socks_state == :authenticating
508
+ return false unless has_bytes? 2
509
+
510
+ _, status_code = @data.read(2).unpack('CC')
511
+
512
+ if status_code == 0
513
+ # success
514
+ @socks_state = :connecting
515
+
516
+ return send_socks_connect_request
517
+
518
+ else
519
+ # error
520
+ @state = :invalid
521
+ on_error "access denied by proxy"
522
+ return false
523
+ end
524
+
525
+ elsif @socks_state == :connecting
526
+ return false unless has_bytes? 10
527
+
528
+ _, response_code, _, address_type, _, _ = @data.read(10).unpack('CCCCNn')
529
+
530
+ if response_code == 0
531
+ # success
532
+ @socks_state = :connected
533
+ @state = :proxy_connected
534
+
535
+ @response_header = HttpResponseHeader.new
536
+
537
+ # connection_completed will invoke actions to
538
+ # start sending all http data transparently
539
+ # over the socks connection
540
+ connection_completed
541
+
542
+ else
543
+ # error
544
+ @state = :invalid
545
+
546
+ error_messages = {
547
+ 1 => "general socks server failure",
548
+ 2 => "connection not allowed by ruleset",
549
+ 3 => "network unreachable",
550
+ 4 => "host unreachable",
551
+ 5 => "connection refused",
552
+ 6 => "TTL expired",
553
+ 7 => "command not supported",
554
+ 8 => "address type not supported"
555
+ }
556
+ error_message = error_messages[response_code] || "unknown error (code: #{response_code})"
557
+ on_error "socks5 connect error: #{error_message}"
558
+ return false
559
+ end
560
+ end
561
+
524
562
  true
525
563
  end
526
564
 
@@ -610,13 +648,10 @@ module EventMachine
610
648
  on_request_complete
611
649
 
612
650
  else
613
- if @data.empty?
614
- @state = :finished
615
- on_request_complete
616
- else
617
- @state = :invalid
618
- on_error "garbage at end of body"
619
- end
651
+
652
+ @data.clear
653
+ @state = :finished
654
+ on_request_complete
620
655
  end
621
656
 
622
657
  false
@@ -628,8 +663,8 @@ module EventMachine
628
663
  # slice the message out of the buffer and pass in
629
664
  # for processing, and buffer data otherwise
630
665
  buffer = @data.read
631
- while msg = buffer.slice!(/\000([^\377]*)\377/)
632
- msg.gsub!(/^\x00|\xff$/, '')
666
+ while msg = buffer.slice!(/\000([^\377]*)\377/n)
667
+ msg.gsub!(/\A\x00|\xff\z/n, '')
633
668
  @stream.call(msg)
634
669
  end
635
670