romanbsd-curb-openuri 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :patch: 0
3
+ :minor: 1
4
+ :major: 0
@@ -0,0 +1,32 @@
1
+ require 'curl_agent'
2
+
3
+ module Kernel
4
+ private
5
+ alias open_uri_original_open open # :nodoc:
6
+
7
+ # makes possible to open various resources including URIs.
8
+ # If the first argument respond to `open' method,
9
+ # the method is called with the rest arguments.
10
+ #
11
+ # If the first argument is a string which begins with xxx://,
12
+ # it is parsed by URI.parse. If the parsed object respond to `open' method,
13
+ # the method is called with the rest arguments.
14
+ #
15
+ # Otherwise original open is called.
16
+ #
17
+ # Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
18
+ # URI::FTP#open,
19
+ # Kernel[#.]open can accepts such URIs and strings which begins with
20
+ # http://, https:// and ftp://.
21
+ # In these case, the opened file object is extended by OpenURI::Meta.
22
+ def open(name, *rest, &block) # :doc:
23
+ if name.respond_to?(:open)
24
+ name.open(*rest, &block)
25
+ elsif name.respond_to?(:to_s) && %r{\A(ftp|https?)://} =~ name
26
+ CurlAgent.open(name, *rest, &block)
27
+ else
28
+ open_uri_original_open(name, *rest, &block)
29
+ end
30
+ end
31
+ module_function :open
32
+ end
data/lib/curl_agent.rb ADDED
@@ -0,0 +1,94 @@
1
+ require 'rubygems'
2
+ require 'stringio'
3
+ require 'curb'
4
+
5
+ class CurlAgent
6
+ def initialize(url)
7
+ @curl = Curl::Easy.new(url)
8
+ # Defaults
9
+ @curl.headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6'
10
+ @curl.follow_location = true
11
+ @curl.max_redirects = 2
12
+ @curl.enable_cookies = true
13
+ @curl.connect_timeout = 5
14
+ @curl.timeout = 30
15
+ @performed = false
16
+ end
17
+
18
+ # Do the actual fetch, after which it's possible to call body_str method
19
+ def perform!
20
+ @curl.perform
21
+ @performed = true
22
+ end
23
+
24
+ # Returns the charset of the page
25
+ def charset
26
+ perform! unless @performed
27
+ content_type = @curl.content_type || ''
28
+ charset = if content_type.match(/charset\s*=\s*([a-zA-Z0-9-]+)/ni)
29
+ $1
30
+ elsif ! body_str.nil? and (m = body_str.slice(0,1000).match(%r{<meta.*http-equiv\s*=\s*['"]?Content-Type['"]?.*?>}mi)) and
31
+ m[0].match(%r{content=['"]text/html.*?charset=(.*?)['"]}mi)
32
+ $1
33
+ else
34
+ ''
35
+ end.downcase
36
+ end
37
+
38
+ # Proxies all calls to Curl::Easy instance
39
+ def respond_to?(symbol)
40
+ @curl.respond_to?(symbol)
41
+ end
42
+
43
+ # Proxies all calls to Curl::Easy instance
44
+ def method_missing(symbol, *args)
45
+ @curl.send(symbol, *args)
46
+ end
47
+
48
+ # This method opens the URL and returns an IO object.
49
+ # If a block is provided, it's called with that object.
50
+ # You can override defaults and provide configuration directives
51
+ # to Curl::Easy with symbol hash keys, for example:
52
+ # open('http://www.example.com/', :timeout => 10)
53
+ # all the rest keys will be passed as headers, for example:
54
+ # open('http://www.example.com/', :timeout => 10, 'User-Agent'=>'curl')
55
+ def self.open(name, *rest, &block)
56
+ mode, perm, rest = scan_open_optional_arguments(*rest)
57
+ options = rest.shift if !rest.empty? && Hash === rest.first
58
+ raise ArgumentError.new("extra arguments") if !rest.empty?
59
+
60
+ unless mode == nil || mode == 'r' || mode == 'rb' || mode == File::RDONLY
61
+ raise ArgumentError.new("invalid access mode #{mode} (resource is read only.)")
62
+ end
63
+
64
+ agent = CurlAgent.new(name)
65
+
66
+ options ||= {}
67
+ options.each {|k, v|
68
+ # Strings will be passed as headers, as in original open-uri
69
+ next unless k.is_a? Symbol
70
+ agent.send("#{k}=".intern, v)
71
+ options.delete(k)
72
+ }
73
+ # All that's left should be considered headers
74
+ agent.headers.merge!(options)
75
+
76
+ agent.perform!
77
+ io = StringIO.new(agent.body_str)
78
+ if block
79
+ block.call(io)
80
+ else
81
+ io
82
+ end
83
+ end
84
+
85
+ def self.scan_open_optional_arguments(*rest) # :nodoc:
86
+ if !rest.empty? && (String === rest.first || Integer === rest.first)
87
+ mode = rest.shift
88
+ if !rest.empty? && Integer === rest.first
89
+ perm = rest.shift
90
+ end
91
+ end
92
+ return mode, perm, rest
93
+ end
94
+ end
@@ -0,0 +1,22 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "CurbOpenuri" do
4
+ describe "shall override Kernel::open" do
5
+ ['http','https','ftp'].each do |p|
6
+ it "shall use curl for #{p}" do
7
+ CurlAgent.should_receive(:open).and_return('')
8
+ open("#{p}://www.example.com/")
9
+ end
10
+ end
11
+
12
+ it 'shall not use curl for other protocols' do
13
+ CurlAgent.should_not_receive(:open)
14
+ lambda {open('file:///dev/null')}.should raise_error(Errno::ENOENT)
15
+ end
16
+
17
+ it 'shall not use curl for files' do
18
+ CurlAgent.should_not_receive(:open)
19
+ open('/dev/null') {|f| }
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,84 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "CurlAgent" do
4
+
5
+ describe 'new method' do
6
+ it 'shall permit to override user-agent' do
7
+ curl = CurlAgent.new('http://www.example.com/')
8
+ curl.headers['User-Agent'].should_not be_nil
9
+ curl.headers['User-Agent'] = 'curl'
10
+ curl.headers['User-Agent'].should == 'curl'
11
+ end
12
+ end
13
+
14
+ describe 'when used alone' do
15
+ before(:each) do
16
+ @mock = mock('curl_easy')
17
+ @headers = {'User-Agent' => 'foo'}
18
+ @mock.stub!(:headers).and_return(@headers)
19
+ @mock.stub!(:'follow_location=')
20
+ @mock.stub!(:'max_redirects=')
21
+ @mock.stub!(:'enable_cookies=')
22
+ @mock.stub!(:'connect_timeout=')
23
+ @mock.stub!(:'timeout=')
24
+ @mock.should_receive(:perform)
25
+ Curl::Easy.should_receive(:new).and_return(@mock)
26
+ end
27
+
28
+ it 'should recognize charset' do
29
+ @mock.stub!(:content_type).and_return('Content-Type: text/html;charset=utf-8')
30
+ curl = CurlAgent.new('http://www.example.com/')
31
+ curl.charset.should == 'utf-8'
32
+ end
33
+
34
+ it 'should recognize upper case charset' do
35
+ @mock.stub!(:content_type).and_return('Content-Type: text/html;charset=Windows-1251')
36
+ curl = CurlAgent.new('http://www.example.com/')
37
+ curl.charset.should == 'windows-1251'
38
+ end
39
+
40
+ it 'should return empty str for empty charset' do
41
+ @mock.stub!(:content_type).and_return('Content-Type: text/html')
42
+ @mock.should_receive(:body_str).once
43
+ curl = CurlAgent.new('http://www.example.com/')
44
+ curl.charset.should == ''
45
+ end
46
+
47
+ it 'should attempt to find charset in html' do
48
+ @mock.stub!(:content_type).and_return('Content-Type: text/html')
49
+ @mock.stub!(:body_str).and_return(<<EOF)
50
+ <html>
51
+ <head>
52
+ <meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"/>
53
+ </head>
54
+ <body></body>
55
+ </html>
56
+ EOF
57
+ curl = CurlAgent.new('http://www.example.com/')
58
+ curl.charset.should == 'iso-8859-1'
59
+ end
60
+ end
61
+
62
+ describe 'when used with open' do
63
+ before(:each) do
64
+ @headers = {'User-Agent'=>'foo'}
65
+ @curl = mock('curl')
66
+ @curl.stub!(:headers).and_return(@headers)
67
+ @curl.stub!(:perform!)
68
+ @curl.stub!(:body_str).and_return('')
69
+ CurlAgent.should_receive(:new).and_return(@curl)
70
+ end
71
+
72
+ it 'shall permit to specify user-agent' do
73
+ @curl.headers['User-Agent'].should_not == 'curl'
74
+ CurlAgent.open('http://www.example.com/', 'User-Agent'=>'curl')
75
+ @curl.headers['User-Agent'].should == 'curl'
76
+ end
77
+
78
+ it 'shall permit to override timeout' do
79
+ @curl.should_receive(:'timeout=').once.with(10)
80
+ CurlAgent.open('http://www.example.com/', :timeout => 10)
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec'
2
+
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+
5
+ require 'curb_openuri'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: romanbsd-curb-openuri
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Roman Shterenzon
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-02-18 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: curb
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.1.4
24
+ version:
25
+ description: open-uri drop-in replacement that uses curb
26
+ email: romanbsd@yahoo.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files: []
32
+
33
+ files:
34
+ - VERSION.yml
35
+ - lib/curb_openuri.rb
36
+ - lib/curl_agent.rb
37
+ - spec/curb_openuri_spec.rb
38
+ - spec/spec_helper.rb
39
+ - spec/curl_agent_spec.rb
40
+ has_rdoc: true
41
+ homepage: http://github.com/romanbsd/curb-openuri
42
+ post_install_message:
43
+ rdoc_options:
44
+ - --inline-source
45
+ - --charset=UTF-8
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "0"
59
+ version:
60
+ requirements: []
61
+
62
+ rubyforge_project:
63
+ rubygems_version: 1.2.0
64
+ signing_key:
65
+ specification_version: 2
66
+ summary: open-uri drop-in replacement that uses curb
67
+ test_files: []
68
+