knjrbfw 0.0.59 → 0.0.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/knj/http2.rb DELETED
@@ -1,662 +0,0 @@
1
- #This class tries to emulate a browser in Ruby without any visual stuff. Remember cookies, keep sessions alive, reset connections according to keep-alive rules and more.
2
- #===Examples
3
- # Knj::Http2.new(:host => "www.somedomain.com", :port => 80, :ssl => false, :debug => false) do |http|
4
- # res = http.get("index.rhtml?show=some_page")
5
- # html = res.body
6
- # print html
7
- #
8
- # res = res.post("index.rhtml?choice=login", {"username" => "John Doe", "password" => 123})
9
- # print res.body
10
- # print "#{res.headers}"
11
- # end
12
- class Knj::Http2
13
- attr_reader :cookies, :args
14
-
15
- def initialize(args = {})
16
- require "#{$knjpath}web"
17
-
18
- args = {:host => args} if args.is_a?(String)
19
- raise "Arguments wasnt a hash." if !args.is_a?(Hash)
20
-
21
- @args = args
22
- @cookies = {}
23
- @debug = @args[:debug]
24
-
25
- require "monitor"
26
- @mutex = Monitor.new
27
-
28
- if !@args[:port]
29
- if @args[:ssl]
30
- @args[:port] = 443
31
- else
32
- @args[:port] = 80
33
- end
34
- end
35
-
36
- if @args[:nl]
37
- @nl = @args[:nl]
38
- else
39
- @nl = "\r\n"
40
- end
41
-
42
- if @args[:user_agent]
43
- @uagent = @args[:user_agent]
44
- else
45
- @uagent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)"
46
- end
47
-
48
- raise "No host was given." if !@args[:host]
49
- self.reconnect
50
-
51
- if block_given?
52
- begin
53
- yield(self)
54
- ensure
55
- self.destroy
56
- end
57
- end
58
- end
59
-
60
- #Returns boolean based on the if the object is connected and the socket is working.
61
- #===Examples
62
- # print "Socket is working." if http.socket_working?
63
- def socket_working?
64
- return false if !@sock or @sock.closed?
65
-
66
- if @keepalive_timeout and @request_last
67
- between = Time.now.to_i - @request_last.to_i
68
- if between >= @keepalive_timeout
69
- print "Http2: We are over the keepalive-wait - returning false for socket_working?.\n" if @debug
70
- return false
71
- end
72
- end
73
-
74
- return true
75
- end
76
-
77
- #Destroys the object unsetting all variables and closing all sockets.
78
- #===Examples
79
- # http.destroy
80
- def destroy
81
- @args = nil
82
- @cookies = nil
83
- @debug = nil
84
- @mutex = nil
85
- @uagent = nil
86
- @keepalive_timeout = nil
87
- @request_last = nil
88
-
89
- @sock.close if @sock and !@sock.closed?
90
- @sock = nil
91
-
92
- @sock_plain.close if @sock_plain and !@sock_plain.closed?
93
- @sock_plain = nil
94
-
95
- @sock_ssl.close if @sock_ssl and !@sock_ssl.closed?
96
- @sock_ssl = nil
97
- end
98
-
99
- #Reconnects to the host.
100
- def reconnect
101
- require "socket"
102
- print "Http2: Reconnect.\n" if @debug
103
-
104
- #Reset variables.
105
- @keepalive_max = nil
106
- @keepalive_timeout = nil
107
- @connection = nil
108
- @contenttype = nil
109
- @charset = nil
110
-
111
- #Open connection.
112
- if @args[:proxy]
113
- print "Http2: Initializing proxy stuff.\n" if @debug
114
- @sock_plain = TCPSocket.new(@args[:proxy][:host], @args[:proxy][:port])
115
- @sock = @sock_plain
116
-
117
- @sock.write("CONNECT #{@args[:host]}:#{@args[:port]} HTTP/1.0#{@nl}")
118
- @sock.write("User-Agent: #{@uagent}#{@nl}")
119
-
120
- if @args[:proxy][:user] and @args[:proxy][:passwd]
121
- credential = ["#{@args[:proxy][:user]}:#{@args[:proxy][:passwd]}"].pack("m")
122
- credential.delete!("\r\n")
123
- @sock.write("Proxy-Authorization: Basic #{credential}#{@nl}")
124
- end
125
-
126
- @sock.write(@nl)
127
-
128
- res = @sock.gets
129
- raise res if res.to_s.downcase != "http/1.0 200 connection established#{@nl}"
130
-
131
- res_empty = @sock.gets
132
- raise "Empty res wasnt empty." if res_empty != @nl
133
- else
134
- print "Http2: Opening socket connection to '#{@args[:host]}:#{@args[:port]}'.\n" if @debug
135
- @sock_plain = TCPSocket.new(@args[:host], @args[:port].to_i)
136
- end
137
-
138
- if @args[:ssl]
139
- print "Http2: Initializing SSL.\n" if @debug
140
- require "openssl"
141
-
142
- ssl_context = OpenSSL::SSL::SSLContext.new
143
- #ssl_context.verify_mode = OpenSSL::SSL::VERIFY_PEER
144
-
145
- @sock_ssl = OpenSSL::SSL::SSLSocket.new(@sock_plain, ssl_context)
146
- @sock_ssl.sync_close = true
147
- @sock_ssl.connect
148
-
149
- @sock = @sock_ssl
150
- else
151
- @sock = @sock_plain
152
- end
153
- end
154
-
155
- #Returns a result-object based on the arguments.
156
- #===Examples
157
- # res = http.get("somepage.html")
158
- # print res.body #=> <String>-object containing the HTML gotten.
159
- def get(addr, args = {})
160
- @mutex.synchronize do
161
- args[:addr] = addr
162
- header_str = "GET /#{addr} HTTP/1.1#{@nl}"
163
- header_str << self.header_str(self.default_headers(args), args)
164
- header_str << "#{@nl}"
165
-
166
- print "Http2: Writing headers.\n" if @debug
167
- print "Header str: #{header_str}\n" if @debug
168
- self.write(header_str)
169
-
170
- print "Http2: Reading response.\n" if @debug
171
- resp = self.read_response(args)
172
-
173
- print "Http2: Done with get request.\n" if @debug
174
- return resp
175
- end
176
- end
177
-
178
- #Tries to write a string to the socket. If it fails it reconnects and tries again.
179
- def write(str)
180
- #Reset variables.
181
- @length = nil
182
- @encoding = nil
183
- self.reconnect if !self.socket_working?
184
-
185
- begin
186
- raise Errno::EPIPE, "The socket is closed." if !@sock or @sock.closed?
187
- @sock.puts(str)
188
- rescue Errno::EPIPE #this can also be thrown by puts.
189
- self.reconnect
190
- @sock.puts(str)
191
- end
192
-
193
- @request_last = Time.now
194
- end
195
-
196
- #Returns the default headers for a request.
197
- #===Examples
198
- # headers_hash = http.default_headers
199
- # print "#{headers_hash}"
200
- def default_headers(args = {})
201
- return args[:default_headers] if args[:default_headers]
202
-
203
- headers = {
204
- "Host" => @args[:host],
205
- "Connection" => "Keep-Alive",
206
- "User-Agent" => @uagent
207
- }
208
-
209
- if !@args.key?(:encoding_gzip) or @args[:encoding_gzip]
210
- headers["Accept-Encoding"] = "gzip"
211
- else
212
- #headers["Accept-Encoding"] = "none"
213
- end
214
-
215
- if @args[:basic_auth]
216
- headers["Authorization"] = "Basic #{Base64.encode64("#{@args[:basic_auth][:user]}:#{@args[:basic_auth][:passwd]}")}"
217
- end
218
-
219
- return headers
220
- end
221
-
222
- #This is used to convert a hash to valid post-data recursivly.
223
- def self.post_convert_data(pdata, args = nil)
224
- praw = ""
225
-
226
- if pdata.is_a?(Hash)
227
- pdata.each do |key, val|
228
- praw << "&" if praw != ""
229
-
230
- if args and args[:orig_key]
231
- key = "#{args[:orig_key]}[#{key}]"
232
- end
233
-
234
- if val.is_a?(Hash) or val.is_a?(Array)
235
- praw << self.post_convert_data(val, {:orig_key => key})
236
- else
237
- praw << "#{Knj::Web.urlenc(key)}=#{Knj::Web.urlenc(Knj::Http2.post_convert_data(val))}"
238
- end
239
- end
240
- elsif pdata.is_a?(Array)
241
- count = 0
242
- pdata.each do |val|
243
- if args and args[:orig_key]
244
- key = "#{args[:orig_key]}[#{count}]"
245
- else
246
- key = count
247
- end
248
-
249
- if val.is_a?(Hash) or val.is_a?(Array)
250
- praw << self.post_convert_data(val, {:orig_key => key})
251
- else
252
- praw << "#{Knj::Web.urlenc(key)}=#{Knj::Web.urlenc(Knj::Http2.post_convert_data(val))}"
253
- end
254
-
255
- count += 1
256
- end
257
- else
258
- return pdata.to_s
259
- end
260
-
261
- return praw
262
- end
263
-
264
- #Posts to a certain page.
265
- #===Examples
266
- # res = http.post("login.php", {"username" => "John Doe", "password" => 123)
267
- def post(addr, pdata = {}, args = {})
268
- @mutex.synchronize do
269
- print "Doing post.\n" if @debug
270
-
271
- praw = Knj::Http2.post_convert_data(pdata)
272
-
273
- header_str = "POST /#{addr} HTTP/1.1#{@nl}"
274
- header_str << self.header_str(self.default_headers(args).merge("Content-Type" => "application/x-www-form-urlencoded", "Content-Length" => praw.length), args)
275
- header_str << "#{@nl}"
276
- header_str << praw
277
-
278
- print "Header str: #{header_str}\n" if @debug
279
-
280
- self.write(header_str)
281
- return self.read_response(args)
282
- end
283
- end
284
-
285
- #Posts to a certain page using the multipart-method.
286
- #===Examples
287
- # res = http.post_multipart("upload.php", {"normal_value" => 123, "file" => Tempfile.new(?)})
288
- def post_multipart(addr, pdata, args = {})
289
- require "digest"
290
-
291
- @mutex.synchronize do
292
- boundary = Digest::MD5.hexdigest(Time.now.to_f.to_s)
293
-
294
- #Generate 'praw'-variable with post-content.
295
- tmp_path = "#{Knj::Os.tmpdir}/knj_http2_post_multiepart_tmp_#{boundary}"
296
-
297
- begin
298
- File.open(tmp_path, "w") do |praw|
299
- pdata.each do |key, val|
300
- praw << "--#{boundary}#{@nl}"
301
-
302
- if val.class.name == "Tempfile" and val.respond_to?("original_filename")
303
- praw << "Content-Disposition: form-data; name=\"#{key}\"; filename=\"#{val.original_filename}\";#{@nl}"
304
- praw << "Content-Length: #{val.to_s.bytesize}#{@nl}"
305
- elsif val.is_a?(Hash) and val[:filename]
306
- praw << "Content-Disposition: form-data; name=\"#{key}\"; filename=\"#{val[:filename]}\";#{@nl}"
307
-
308
- if val[:content]
309
- praw << "Content-Length: #{val[:content].to_s.bytesize}#{@nl}"
310
- elsif val[:fpath]
311
- praw << "Content-Length: #{File.size(val[:fpath])}#{@nl}"
312
- else
313
- raise "Could not figure out where to get content from."
314
- end
315
- else
316
- praw << "Content-Disposition: form-data; name=\"#{key}\";#{@nl}"
317
- praw << "Content-Length: #{val.to_s.bytesize}#{@nl}"
318
- end
319
-
320
- praw << "Content-Type: text/plain#{@nl}"
321
- praw << @nl
322
-
323
- if val.is_a?(StringIO)
324
- praw << val.read
325
- elsif val.is_a?(Hash) and val[:content]
326
- praw << val[:content].to_s
327
- elsif val.is_a?(Hash) and val[:fpath]
328
- File.open(val[:fpath], "r") do |fp|
329
- begin
330
- while data = fp.sysread(4096)
331
- praw << data
332
- end
333
- rescue EOFError
334
- #ignore.
335
- end
336
- end
337
- else
338
- praw << val.to_s
339
- end
340
-
341
- praw << @nl
342
- end
343
-
344
- praw << "--#{boundary}--"
345
- end
346
-
347
-
348
- #Generate header-string containing 'praw'-variable.
349
- header_str = "POST /#{addr} HTTP/1.1#{@nl}"
350
- header_str << self.header_str(self.default_headers(args).merge(
351
- "Content-Type" => "multipart/form-data; boundary=#{boundary}",
352
- "Content-Length" => File.size(tmp_path)
353
- ), args)
354
- header_str << @nl
355
-
356
-
357
- #Debug.
358
- print "Headerstr: #{header_str}\n" if @debug
359
-
360
-
361
- #Write and return.
362
- self.write(header_str)
363
- File.open(tmp_path, "r") do |fp|
364
- begin
365
- while data = fp.sysread(4096)
366
- @sock.write(data)
367
- end
368
- rescue EOFError
369
- #ignore.
370
- end
371
- end
372
-
373
- return self.read_response(args)
374
- ensure
375
- File.unlink(tmp_path) if File.exists?(tmp_path)
376
- end
377
- end
378
- end
379
-
380
- #Returns a header-string which normally would be used for a request in the given state.
381
- def header_str(headers_hash, args = {})
382
- if @cookies.length > 0 and (!args.key?(:cookies) or args[:cookies])
383
- cstr = ""
384
-
385
- first = true
386
- @cookies.each do |cookie_name, cookie_data|
387
- cstr << "; " if !first
388
- first = false if first
389
-
390
- if cookie_data.is_a?(Hash)
391
- cstr << "#{Knj::Web.urlenc(cookie_data["name"])}=#{Knj::Web.urlenc(cookie_data["value"])}"
392
- else
393
- cstr << "#{Knj::Web.urlenc(cookie_name)}=#{Knj::Web.urlenc(cookie_data)}"
394
- end
395
- end
396
-
397
- headers_hash["Cookie"] = cstr
398
- end
399
-
400
- headers_str = ""
401
- headers_hash.each do |key, val|
402
- headers_str << "#{key}: #{val}#{@nl}"
403
- end
404
-
405
- return headers_str
406
- end
407
-
408
- def on_content_call(args, line)
409
- args[:on_content].call(line) if args.key?(:on_content)
410
- end
411
-
412
- #Reads the response after posting headers and data.
413
- #===Examples
414
- # res = http.read_response
415
- def read_response(args = {})
416
- @mode = "headers"
417
- @resp = Knj::Http2::Response.new
418
-
419
- loop do
420
- begin
421
- if @length and @length > 0 and @mode == "body"
422
- line = @sock.read(@length)
423
- else
424
- line = @sock.gets
425
- end
426
-
427
- print "<#{@mode}>: '#{line}'\n" if @debug
428
- rescue Errno::ECONNRESET
429
- print "Http2: The connection was reset while reading - breaking gently...\n" if @debug
430
- @sock = nil
431
- break
432
- end
433
-
434
- break if line.to_s == ""
435
-
436
- if @mode == "headers" and line == @nl
437
- print "Changing mode to body!\n" if @debug
438
- break if @length == 0
439
- @mode = "body"
440
- next
441
- end
442
-
443
- if @mode == "headers"
444
- self.parse_header(line, args)
445
- elsif @mode == "body"
446
- self.on_content_call(args, "\r\n")
447
- stat = self.parse_body(line, args)
448
- break if stat == "break"
449
- next if stat == "next"
450
- end
451
- end
452
-
453
-
454
- #Check if we should reconnect based on keep-alive-max.
455
- if @keepalive_max == 1 or @connection == "close"
456
- @sock.close if !@sock.closed?
457
- @sock = nil
458
- end
459
-
460
-
461
- #Check if the content is gzip-encoded - if so: decode it!
462
- if @encoding == "gzip"
463
- require "zlib"
464
- require "iconv"
465
- io = StringIO.new(@resp.args[:body])
466
- gz = Zlib::GzipReader.new(io)
467
- untrusted_str = gz.read
468
- ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
469
- valid_string = ic.iconv(untrusted_str + " ")[0..-2]
470
- @resp.args[:body] = valid_string
471
- end
472
-
473
-
474
- #Release variables.
475
- resp = @resp
476
- @resp = nil
477
- @mode = nil
478
-
479
- raise "No status-code was received from the server.\n\nHeaders:\n#{Knj::Php.print_r(resp.headers, true)}\n\nBody:\n#{resp.args[:body]}" if !resp.args[:code]
480
-
481
- if resp.args[:code].to_s == "302" and resp.header?("location") and (!@args.key?(:follow_redirects) or @args[:follow_redirects])
482
- require "uri"
483
- uri = URI.parse(resp.header("location"))
484
- url = uri.path
485
- url << "?#{uri.query}" if uri.query.to_s.length > 0
486
-
487
- args = {:host => uri.host}
488
- args[:ssl] = true if uri.scheme == "https"
489
- args[:port] = uri.port if uri.port
490
-
491
- print "Redirecting from location-header to '#{url}'.\n" if @debug
492
-
493
- if !args[:host] or args[:host] == @args[:host]
494
- return self.get(url)
495
- else
496
- http = Knj::Http2.new(args)
497
- return http.get(url)
498
- end
499
- elsif resp.args[:code].to_s == "500"
500
- raise "500 - Internal server error: '#{args[:addr]}':\n\n#{resp.body}"
501
- elsif resp.args[:code].to_s == "403"
502
- raise Knj::Errors::NoAccess
503
- else
504
- return resp
505
- end
506
- end
507
-
508
- #Parse a header-line and saves it on the object.
509
- #===Examples
510
- # http.parse_header("Content-Type: text/html\r\n")
511
- def parse_header(line, args = {})
512
- if match = line.match(/^(.+?):\s*(.+)#{@nl}$/)
513
- key = match[1].to_s.downcase
514
-
515
- if key == "set-cookie"
516
- Knj::Web.parse_set_cookies(match[2]).each do |cookie_data|
517
- @cookies[cookie_data["name"]] = cookie_data
518
- end
519
- elsif key == "keep-alive"
520
- if ka_max = match[2].to_s.match(/max=(\d+)/)
521
- @keepalive_max = ka_max[1].to_i
522
- print "Http2: Keepalive-max set to: '#{@keepalive_max}'.\n" if @debug
523
- end
524
-
525
- if ka_timeout = match[2].to_s.match(/timeout=(\d+)/)
526
- @keepalive_timeout = ka_timeout[1].to_i
527
- print "Http2: Keepalive-timeout set to: '#{@keepalive_timeout}'.\n" if @debug
528
- end
529
- elsif key == "connection"
530
- @connection = match[2].to_s.downcase
531
- elsif key == "content-encoding"
532
- @encoding = match[2].to_s.downcase
533
- elsif key == "content-length"
534
- @length = match[2].to_i
535
- elsif key == "content-type"
536
- ctype = match[2].to_s
537
- if match_charset = ctype.match(/\s*;\s*charset=(.+)/i)
538
- @charset = match_charset[1].downcase
539
- @resp.args[:charset] = @charset
540
- ctype.gsub!(match_charset[0], "")
541
- end
542
-
543
- @ctype = ctype
544
- @resp.args[:contenttype] = @ctype
545
- end
546
-
547
- if key != "transfer-encoding" and key != "content-length" and key != "connection" and key != "keep-alive"
548
- self.on_content_call(args, line)
549
- end
550
-
551
- @resp.headers[key] = [] if !@resp.headers.key?(key)
552
- @resp.headers[key] << match[2]
553
- elsif match = line.match(/^HTTP\/([\d\.]+)\s+(\d+)\s+(.+)$/)
554
- @resp.args[:code] = match[2]
555
- @resp.args[:http_version] = match[1]
556
- else
557
- raise "Could not understand header string: '#{line}'.\n\n#{@sock.read(409600)}"
558
- end
559
- end
560
-
561
- #Parses the body based on given headers and saves it to the result-object.
562
- # http.parse_body(str)
563
- def parse_body(line, args)
564
- if @resp.args[:http_version] = "1.1"
565
- return "break" if @length == 0
566
-
567
- if @resp.header("transfer-encoding").to_s.downcase == "chunked"
568
- len = line.strip.hex
569
-
570
- if len > 0
571
- read = @sock.read(len)
572
- return "break" if read == "" or read == @nl
573
- @resp.args[:body] << read
574
- self.on_content_call(args, read)
575
- end
576
-
577
- nl = @sock.gets
578
- if len == 0
579
- if nl == @nl
580
- return "break"
581
- else
582
- raise "Dont know what to do :'-("
583
- end
584
- end
585
-
586
- raise "Should have read newline but didnt: '#{nl}'." if nl != @nl
587
- else
588
- @resp.args[:body] << line.to_s
589
- self.on_content_call(args, line)
590
- return "break" if @resp.header?("content-length") and @resp.args[:body].length >= @resp.header("content-length").to_i
591
- end
592
- else
593
- raise "Dont know how to read HTTP version: '#{@resp.args[:http_version]}'."
594
- end
595
- end
596
- end
597
-
598
- class Knj::Http2::Response
599
- attr_reader :args
600
-
601
- def initialize(args = {})
602
- @args = args
603
- @args[:headers] = {} if !@args.key?(:headers)
604
- @args[:body] = "" if !@args.key?(:body)
605
- end
606
-
607
- #Returns headers given from the host for the result.
608
- #===Examples
609
- # headers_hash = res.headers
610
- def headers
611
- return @args[:headers]
612
- end
613
-
614
- #Returns a certain header by name or false if not found.
615
- #===Examples
616
- # val = res.header("content-type")
617
- def header(key)
618
- return false if !@args[:headers].key?(key)
619
- return @args[:headers][key].first.to_s
620
- end
621
-
622
- #Returns true if a header of the given string exists.
623
- #===Examples
624
- # print "No content-type was given." if !http.header?("content-type")
625
- def header?(key)
626
- return true if @args[:headers].key?(key) and @args[:headers][key].first.to_s.length > 0
627
- return false
628
- end
629
-
630
- #Returns the code of the result (200, 404, 500 etc).
631
- #===Examples
632
- # print "An internal error occurred." if res.code.to_i == 500
633
- def code
634
- return @args[:code]
635
- end
636
-
637
- #Returns the HTTP-version of the result.
638
- #===Examples
639
- # print "We are using HTTP 1.1 and should support keep-alive." if res.http_version.to_s == "1.1"
640
- def http_version
641
- return @args[:http_version]
642
- end
643
-
644
- #Returns the complete body of the result as a string.
645
- #===Examples
646
- # print "Looks like we caught the end of it as well?" if res.body.to_s.downcase.index("</html>") != nil
647
- def body
648
- return @args[:body]
649
- end
650
-
651
- #Returns the charset of the result.
652
- def charset
653
- return @args[:charset]
654
- end
655
-
656
- #Returns the content-type of the result as a string.
657
- #===Examples
658
- # print "This body can be printed - its just plain text!" if http.contenttype == "text/plain"
659
- def contenttype
660
- return @args[:contenttype]
661
- end
662
- end