knjrbfw 0.0.59 → 0.0.62

Sign up to get free protection for your applications and to get access to all the features.
data/lib/knj/http2.rb DELETED
@@ -1,662 +0,0 @@
1
- #This class tries to emulate a browser in Ruby without any visual stuff. Remember cookies, keep sessions alive, reset connections according to keep-alive rules and more.
2
- #===Examples
3
- # Knj::Http2.new(:host => "www.somedomain.com", :port => 80, :ssl => false, :debug => false) do |http|
4
- # res = http.get("index.rhtml?show=some_page")
5
- # html = res.body
6
- # print html
7
- #
8
- # res = res.post("index.rhtml?choice=login", {"username" => "John Doe", "password" => 123})
9
- # print res.body
10
- # print "#{res.headers}"
11
- # end
12
- class Knj::Http2
13
- attr_reader :cookies, :args
14
-
15
- def initialize(args = {})
16
- require "#{$knjpath}web"
17
-
18
- args = {:host => args} if args.is_a?(String)
19
- raise "Arguments wasnt a hash." if !args.is_a?(Hash)
20
-
21
- @args = args
22
- @cookies = {}
23
- @debug = @args[:debug]
24
-
25
- require "monitor"
26
- @mutex = Monitor.new
27
-
28
- if !@args[:port]
29
- if @args[:ssl]
30
- @args[:port] = 443
31
- else
32
- @args[:port] = 80
33
- end
34
- end
35
-
36
- if @args[:nl]
37
- @nl = @args[:nl]
38
- else
39
- @nl = "\r\n"
40
- end
41
-
42
- if @args[:user_agent]
43
- @uagent = @args[:user_agent]
44
- else
45
- @uagent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)"
46
- end
47
-
48
- raise "No host was given." if !@args[:host]
49
- self.reconnect
50
-
51
- if block_given?
52
- begin
53
- yield(self)
54
- ensure
55
- self.destroy
56
- end
57
- end
58
- end
59
-
60
- #Returns boolean based on the if the object is connected and the socket is working.
61
- #===Examples
62
- # print "Socket is working." if http.socket_working?
63
- def socket_working?
64
- return false if !@sock or @sock.closed?
65
-
66
- if @keepalive_timeout and @request_last
67
- between = Time.now.to_i - @request_last.to_i
68
- if between >= @keepalive_timeout
69
- print "Http2: We are over the keepalive-wait - returning false for socket_working?.\n" if @debug
70
- return false
71
- end
72
- end
73
-
74
- return true
75
- end
76
-
77
- #Destroys the object unsetting all variables and closing all sockets.
78
- #===Examples
79
- # http.destroy
80
- def destroy
81
- @args = nil
82
- @cookies = nil
83
- @debug = nil
84
- @mutex = nil
85
- @uagent = nil
86
- @keepalive_timeout = nil
87
- @request_last = nil
88
-
89
- @sock.close if @sock and !@sock.closed?
90
- @sock = nil
91
-
92
- @sock_plain.close if @sock_plain and !@sock_plain.closed?
93
- @sock_plain = nil
94
-
95
- @sock_ssl.close if @sock_ssl and !@sock_ssl.closed?
96
- @sock_ssl = nil
97
- end
98
-
99
- #Reconnects to the host.
100
- def reconnect
101
- require "socket"
102
- print "Http2: Reconnect.\n" if @debug
103
-
104
- #Reset variables.
105
- @keepalive_max = nil
106
- @keepalive_timeout = nil
107
- @connection = nil
108
- @contenttype = nil
109
- @charset = nil
110
-
111
- #Open connection.
112
- if @args[:proxy]
113
- print "Http2: Initializing proxy stuff.\n" if @debug
114
- @sock_plain = TCPSocket.new(@args[:proxy][:host], @args[:proxy][:port])
115
- @sock = @sock_plain
116
-
117
- @sock.write("CONNECT #{@args[:host]}:#{@args[:port]} HTTP/1.0#{@nl}")
118
- @sock.write("User-Agent: #{@uagent}#{@nl}")
119
-
120
- if @args[:proxy][:user] and @args[:proxy][:passwd]
121
- credential = ["#{@args[:proxy][:user]}:#{@args[:proxy][:passwd]}"].pack("m")
122
- credential.delete!("\r\n")
123
- @sock.write("Proxy-Authorization: Basic #{credential}#{@nl}")
124
- end
125
-
126
- @sock.write(@nl)
127
-
128
- res = @sock.gets
129
- raise res if res.to_s.downcase != "http/1.0 200 connection established#{@nl}"
130
-
131
- res_empty = @sock.gets
132
- raise "Empty res wasnt empty." if res_empty != @nl
133
- else
134
- print "Http2: Opening socket connection to '#{@args[:host]}:#{@args[:port]}'.\n" if @debug
135
- @sock_plain = TCPSocket.new(@args[:host], @args[:port].to_i)
136
- end
137
-
138
- if @args[:ssl]
139
- print "Http2: Initializing SSL.\n" if @debug
140
- require "openssl"
141
-
142
- ssl_context = OpenSSL::SSL::SSLContext.new
143
- #ssl_context.verify_mode = OpenSSL::SSL::VERIFY_PEER
144
-
145
- @sock_ssl = OpenSSL::SSL::SSLSocket.new(@sock_plain, ssl_context)
146
- @sock_ssl.sync_close = true
147
- @sock_ssl.connect
148
-
149
- @sock = @sock_ssl
150
- else
151
- @sock = @sock_plain
152
- end
153
- end
154
-
155
- #Returns a result-object based on the arguments.
156
- #===Examples
157
- # res = http.get("somepage.html")
158
- # print res.body #=> <String>-object containing the HTML gotten.
159
- def get(addr, args = {})
160
- @mutex.synchronize do
161
- args[:addr] = addr
162
- header_str = "GET /#{addr} HTTP/1.1#{@nl}"
163
- header_str << self.header_str(self.default_headers(args), args)
164
- header_str << "#{@nl}"
165
-
166
- print "Http2: Writing headers.\n" if @debug
167
- print "Header str: #{header_str}\n" if @debug
168
- self.write(header_str)
169
-
170
- print "Http2: Reading response.\n" if @debug
171
- resp = self.read_response(args)
172
-
173
- print "Http2: Done with get request.\n" if @debug
174
- return resp
175
- end
176
- end
177
-
178
- #Tries to write a string to the socket. If it fails it reconnects and tries again.
179
- def write(str)
180
- #Reset variables.
181
- @length = nil
182
- @encoding = nil
183
- self.reconnect if !self.socket_working?
184
-
185
- begin
186
- raise Errno::EPIPE, "The socket is closed." if !@sock or @sock.closed?
187
- @sock.puts(str)
188
- rescue Errno::EPIPE #this can also be thrown by puts.
189
- self.reconnect
190
- @sock.puts(str)
191
- end
192
-
193
- @request_last = Time.now
194
- end
195
-
196
- #Returns the default headers for a request.
197
- #===Examples
198
- # headers_hash = http.default_headers
199
- # print "#{headers_hash}"
200
- def default_headers(args = {})
201
- return args[:default_headers] if args[:default_headers]
202
-
203
- headers = {
204
- "Host" => @args[:host],
205
- "Connection" => "Keep-Alive",
206
- "User-Agent" => @uagent
207
- }
208
-
209
- if !@args.key?(:encoding_gzip) or @args[:encoding_gzip]
210
- headers["Accept-Encoding"] = "gzip"
211
- else
212
- #headers["Accept-Encoding"] = "none"
213
- end
214
-
215
- if @args[:basic_auth]
216
- headers["Authorization"] = "Basic #{Base64.encode64("#{@args[:basic_auth][:user]}:#{@args[:basic_auth][:passwd]}")}"
217
- end
218
-
219
- return headers
220
- end
221
-
222
- #This is used to convert a hash to valid post-data recursivly.
223
- def self.post_convert_data(pdata, args = nil)
224
- praw = ""
225
-
226
- if pdata.is_a?(Hash)
227
- pdata.each do |key, val|
228
- praw << "&" if praw != ""
229
-
230
- if args and args[:orig_key]
231
- key = "#{args[:orig_key]}[#{key}]"
232
- end
233
-
234
- if val.is_a?(Hash) or val.is_a?(Array)
235
- praw << self.post_convert_data(val, {:orig_key => key})
236
- else
237
- praw << "#{Knj::Web.urlenc(key)}=#{Knj::Web.urlenc(Knj::Http2.post_convert_data(val))}"
238
- end
239
- end
240
- elsif pdata.is_a?(Array)
241
- count = 0
242
- pdata.each do |val|
243
- if args and args[:orig_key]
244
- key = "#{args[:orig_key]}[#{count}]"
245
- else
246
- key = count
247
- end
248
-
249
- if val.is_a?(Hash) or val.is_a?(Array)
250
- praw << self.post_convert_data(val, {:orig_key => key})
251
- else
252
- praw << "#{Knj::Web.urlenc(key)}=#{Knj::Web.urlenc(Knj::Http2.post_convert_data(val))}"
253
- end
254
-
255
- count += 1
256
- end
257
- else
258
- return pdata.to_s
259
- end
260
-
261
- return praw
262
- end
263
-
264
- #Posts to a certain page.
265
- #===Examples
266
- # res = http.post("login.php", {"username" => "John Doe", "password" => 123)
267
- def post(addr, pdata = {}, args = {})
268
- @mutex.synchronize do
269
- print "Doing post.\n" if @debug
270
-
271
- praw = Knj::Http2.post_convert_data(pdata)
272
-
273
- header_str = "POST /#{addr} HTTP/1.1#{@nl}"
274
- header_str << self.header_str(self.default_headers(args).merge("Content-Type" => "application/x-www-form-urlencoded", "Content-Length" => praw.length), args)
275
- header_str << "#{@nl}"
276
- header_str << praw
277
-
278
- print "Header str: #{header_str}\n" if @debug
279
-
280
- self.write(header_str)
281
- return self.read_response(args)
282
- end
283
- end
284
-
285
- #Posts to a certain page using the multipart-method.
286
- #===Examples
287
- # res = http.post_multipart("upload.php", {"normal_value" => 123, "file" => Tempfile.new(?)})
288
- def post_multipart(addr, pdata, args = {})
289
- require "digest"
290
-
291
- @mutex.synchronize do
292
- boundary = Digest::MD5.hexdigest(Time.now.to_f.to_s)
293
-
294
- #Generate 'praw'-variable with post-content.
295
- tmp_path = "#{Knj::Os.tmpdir}/knj_http2_post_multiepart_tmp_#{boundary}"
296
-
297
- begin
298
- File.open(tmp_path, "w") do |praw|
299
- pdata.each do |key, val|
300
- praw << "--#{boundary}#{@nl}"
301
-
302
- if val.class.name == "Tempfile" and val.respond_to?("original_filename")
303
- praw << "Content-Disposition: form-data; name=\"#{key}\"; filename=\"#{val.original_filename}\";#{@nl}"
304
- praw << "Content-Length: #{val.to_s.bytesize}#{@nl}"
305
- elsif val.is_a?(Hash) and val[:filename]
306
- praw << "Content-Disposition: form-data; name=\"#{key}\"; filename=\"#{val[:filename]}\";#{@nl}"
307
-
308
- if val[:content]
309
- praw << "Content-Length: #{val[:content].to_s.bytesize}#{@nl}"
310
- elsif val[:fpath]
311
- praw << "Content-Length: #{File.size(val[:fpath])}#{@nl}"
312
- else
313
- raise "Could not figure out where to get content from."
314
- end
315
- else
316
- praw << "Content-Disposition: form-data; name=\"#{key}\";#{@nl}"
317
- praw << "Content-Length: #{val.to_s.bytesize}#{@nl}"
318
- end
319
-
320
- praw << "Content-Type: text/plain#{@nl}"
321
- praw << @nl
322
-
323
- if val.is_a?(StringIO)
324
- praw << val.read
325
- elsif val.is_a?(Hash) and val[:content]
326
- praw << val[:content].to_s
327
- elsif val.is_a?(Hash) and val[:fpath]
328
- File.open(val[:fpath], "r") do |fp|
329
- begin
330
- while data = fp.sysread(4096)
331
- praw << data
332
- end
333
- rescue EOFError
334
- #ignore.
335
- end
336
- end
337
- else
338
- praw << val.to_s
339
- end
340
-
341
- praw << @nl
342
- end
343
-
344
- praw << "--#{boundary}--"
345
- end
346
-
347
-
348
- #Generate header-string containing 'praw'-variable.
349
- header_str = "POST /#{addr} HTTP/1.1#{@nl}"
350
- header_str << self.header_str(self.default_headers(args).merge(
351
- "Content-Type" => "multipart/form-data; boundary=#{boundary}",
352
- "Content-Length" => File.size(tmp_path)
353
- ), args)
354
- header_str << @nl
355
-
356
-
357
- #Debug.
358
- print "Headerstr: #{header_str}\n" if @debug
359
-
360
-
361
- #Write and return.
362
- self.write(header_str)
363
- File.open(tmp_path, "r") do |fp|
364
- begin
365
- while data = fp.sysread(4096)
366
- @sock.write(data)
367
- end
368
- rescue EOFError
369
- #ignore.
370
- end
371
- end
372
-
373
- return self.read_response(args)
374
- ensure
375
- File.unlink(tmp_path) if File.exists?(tmp_path)
376
- end
377
- end
378
- end
379
-
380
- #Returns a header-string which normally would be used for a request in the given state.
381
- def header_str(headers_hash, args = {})
382
- if @cookies.length > 0 and (!args.key?(:cookies) or args[:cookies])
383
- cstr = ""
384
-
385
- first = true
386
- @cookies.each do |cookie_name, cookie_data|
387
- cstr << "; " if !first
388
- first = false if first
389
-
390
- if cookie_data.is_a?(Hash)
391
- cstr << "#{Knj::Web.urlenc(cookie_data["name"])}=#{Knj::Web.urlenc(cookie_data["value"])}"
392
- else
393
- cstr << "#{Knj::Web.urlenc(cookie_name)}=#{Knj::Web.urlenc(cookie_data)}"
394
- end
395
- end
396
-
397
- headers_hash["Cookie"] = cstr
398
- end
399
-
400
- headers_str = ""
401
- headers_hash.each do |key, val|
402
- headers_str << "#{key}: #{val}#{@nl}"
403
- end
404
-
405
- return headers_str
406
- end
407
-
408
- def on_content_call(args, line)
409
- args[:on_content].call(line) if args.key?(:on_content)
410
- end
411
-
412
- #Reads the response after posting headers and data.
413
- #===Examples
414
- # res = http.read_response
415
- def read_response(args = {})
416
- @mode = "headers"
417
- @resp = Knj::Http2::Response.new
418
-
419
- loop do
420
- begin
421
- if @length and @length > 0 and @mode == "body"
422
- line = @sock.read(@length)
423
- else
424
- line = @sock.gets
425
- end
426
-
427
- print "<#{@mode}>: '#{line}'\n" if @debug
428
- rescue Errno::ECONNRESET
429
- print "Http2: The connection was reset while reading - breaking gently...\n" if @debug
430
- @sock = nil
431
- break
432
- end
433
-
434
- break if line.to_s == ""
435
-
436
- if @mode == "headers" and line == @nl
437
- print "Changing mode to body!\n" if @debug
438
- break if @length == 0
439
- @mode = "body"
440
- next
441
- end
442
-
443
- if @mode == "headers"
444
- self.parse_header(line, args)
445
- elsif @mode == "body"
446
- self.on_content_call(args, "\r\n")
447
- stat = self.parse_body(line, args)
448
- break if stat == "break"
449
- next if stat == "next"
450
- end
451
- end
452
-
453
-
454
- #Check if we should reconnect based on keep-alive-max.
455
- if @keepalive_max == 1 or @connection == "close"
456
- @sock.close if !@sock.closed?
457
- @sock = nil
458
- end
459
-
460
-
461
- #Check if the content is gzip-encoded - if so: decode it!
462
- if @encoding == "gzip"
463
- require "zlib"
464
- require "iconv"
465
- io = StringIO.new(@resp.args[:body])
466
- gz = Zlib::GzipReader.new(io)
467
- untrusted_str = gz.read
468
- ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
469
- valid_string = ic.iconv(untrusted_str + " ")[0..-2]
470
- @resp.args[:body] = valid_string
471
- end
472
-
473
-
474
- #Release variables.
475
- resp = @resp
476
- @resp = nil
477
- @mode = nil
478
-
479
- raise "No status-code was received from the server.\n\nHeaders:\n#{Knj::Php.print_r(resp.headers, true)}\n\nBody:\n#{resp.args[:body]}" if !resp.args[:code]
480
-
481
- if resp.args[:code].to_s == "302" and resp.header?("location") and (!@args.key?(:follow_redirects) or @args[:follow_redirects])
482
- require "uri"
483
- uri = URI.parse(resp.header("location"))
484
- url = uri.path
485
- url << "?#{uri.query}" if uri.query.to_s.length > 0
486
-
487
- args = {:host => uri.host}
488
- args[:ssl] = true if uri.scheme == "https"
489
- args[:port] = uri.port if uri.port
490
-
491
- print "Redirecting from location-header to '#{url}'.\n" if @debug
492
-
493
- if !args[:host] or args[:host] == @args[:host]
494
- return self.get(url)
495
- else
496
- http = Knj::Http2.new(args)
497
- return http.get(url)
498
- end
499
- elsif resp.args[:code].to_s == "500"
500
- raise "500 - Internal server error: '#{args[:addr]}':\n\n#{resp.body}"
501
- elsif resp.args[:code].to_s == "403"
502
- raise Knj::Errors::NoAccess
503
- else
504
- return resp
505
- end
506
- end
507
-
508
- #Parse a header-line and saves it on the object.
509
- #===Examples
510
- # http.parse_header("Content-Type: text/html\r\n")
511
- def parse_header(line, args = {})
512
- if match = line.match(/^(.+?):\s*(.+)#{@nl}$/)
513
- key = match[1].to_s.downcase
514
-
515
- if key == "set-cookie"
516
- Knj::Web.parse_set_cookies(match[2]).each do |cookie_data|
517
- @cookies[cookie_data["name"]] = cookie_data
518
- end
519
- elsif key == "keep-alive"
520
- if ka_max = match[2].to_s.match(/max=(\d+)/)
521
- @keepalive_max = ka_max[1].to_i
522
- print "Http2: Keepalive-max set to: '#{@keepalive_max}'.\n" if @debug
523
- end
524
-
525
- if ka_timeout = match[2].to_s.match(/timeout=(\d+)/)
526
- @keepalive_timeout = ka_timeout[1].to_i
527
- print "Http2: Keepalive-timeout set to: '#{@keepalive_timeout}'.\n" if @debug
528
- end
529
- elsif key == "connection"
530
- @connection = match[2].to_s.downcase
531
- elsif key == "content-encoding"
532
- @encoding = match[2].to_s.downcase
533
- elsif key == "content-length"
534
- @length = match[2].to_i
535
- elsif key == "content-type"
536
- ctype = match[2].to_s
537
- if match_charset = ctype.match(/\s*;\s*charset=(.+)/i)
538
- @charset = match_charset[1].downcase
539
- @resp.args[:charset] = @charset
540
- ctype.gsub!(match_charset[0], "")
541
- end
542
-
543
- @ctype = ctype
544
- @resp.args[:contenttype] = @ctype
545
- end
546
-
547
- if key != "transfer-encoding" and key != "content-length" and key != "connection" and key != "keep-alive"
548
- self.on_content_call(args, line)
549
- end
550
-
551
- @resp.headers[key] = [] if !@resp.headers.key?(key)
552
- @resp.headers[key] << match[2]
553
- elsif match = line.match(/^HTTP\/([\d\.]+)\s+(\d+)\s+(.+)$/)
554
- @resp.args[:code] = match[2]
555
- @resp.args[:http_version] = match[1]
556
- else
557
- raise "Could not understand header string: '#{line}'.\n\n#{@sock.read(409600)}"
558
- end
559
- end
560
-
561
- #Parses the body based on given headers and saves it to the result-object.
562
- # http.parse_body(str)
563
- def parse_body(line, args)
564
- if @resp.args[:http_version] = "1.1"
565
- return "break" if @length == 0
566
-
567
- if @resp.header("transfer-encoding").to_s.downcase == "chunked"
568
- len = line.strip.hex
569
-
570
- if len > 0
571
- read = @sock.read(len)
572
- return "break" if read == "" or read == @nl
573
- @resp.args[:body] << read
574
- self.on_content_call(args, read)
575
- end
576
-
577
- nl = @sock.gets
578
- if len == 0
579
- if nl == @nl
580
- return "break"
581
- else
582
- raise "Dont know what to do :'-("
583
- end
584
- end
585
-
586
- raise "Should have read newline but didnt: '#{nl}'." if nl != @nl
587
- else
588
- @resp.args[:body] << line.to_s
589
- self.on_content_call(args, line)
590
- return "break" if @resp.header?("content-length") and @resp.args[:body].length >= @resp.header("content-length").to_i
591
- end
592
- else
593
- raise "Dont know how to read HTTP version: '#{@resp.args[:http_version]}'."
594
- end
595
- end
596
- end
597
-
598
- class Knj::Http2::Response
599
- attr_reader :args
600
-
601
- def initialize(args = {})
602
- @args = args
603
- @args[:headers] = {} if !@args.key?(:headers)
604
- @args[:body] = "" if !@args.key?(:body)
605
- end
606
-
607
- #Returns headers given from the host for the result.
608
- #===Examples
609
- # headers_hash = res.headers
610
- def headers
611
- return @args[:headers]
612
- end
613
-
614
- #Returns a certain header by name or false if not found.
615
- #===Examples
616
- # val = res.header("content-type")
617
- def header(key)
618
- return false if !@args[:headers].key?(key)
619
- return @args[:headers][key].first.to_s
620
- end
621
-
622
- #Returns true if a header of the given string exists.
623
- #===Examples
624
- # print "No content-type was given." if !http.header?("content-type")
625
- def header?(key)
626
- return true if @args[:headers].key?(key) and @args[:headers][key].first.to_s.length > 0
627
- return false
628
- end
629
-
630
- #Returns the code of the result (200, 404, 500 etc).
631
- #===Examples
632
- # print "An internal error occurred." if res.code.to_i == 500
633
- def code
634
- return @args[:code]
635
- end
636
-
637
- #Returns the HTTP-version of the result.
638
- #===Examples
639
- # print "We are using HTTP 1.1 and should support keep-alive." if res.http_version.to_s == "1.1"
640
- def http_version
641
- return @args[:http_version]
642
- end
643
-
644
- #Returns the complete body of the result as a string.
645
- #===Examples
646
- # print "Looks like we caught the end of it as well?" if res.body.to_s.downcase.index("</html>") != nil
647
- def body
648
- return @args[:body]
649
- end
650
-
651
- #Returns the charset of the result.
652
- def charset
653
- return @args[:charset]
654
- end
655
-
656
- #Returns the content-type of the result as a string.
657
- #===Examples
658
- # print "This body can be printed - its just plain text!" if http.contenttype == "text/plain"
659
- def contenttype
660
- return @args[:contenttype]
661
- end
662
- end