changa-curb-openuri 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :patch: 0
3
+ :minor: 1
4
+ :major: 0
@@ -0,0 +1,32 @@
1
+ require 'curl_agent'
2
+
3
+ module Kernel
4
+ private
5
+ alias curb_open_uri_original_open open # :nodoc:
6
+
7
+ # makes possible to open various resources including URIs.
8
+ # If the first argument respond to `open' method,
9
+ # the method is called with the rest arguments.
10
+ #
11
+ # If the first argument is a string which begins with xxx://,
12
+ # it is parsed by URI.parse. If the parsed object respond to `open' method,
13
+ # the method is called with the rest arguments.
14
+ #
15
+ # Otherwise original open is called.
16
+ #
17
+ # Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
18
+ # URI::FTP#open,
19
+ # Kernel[#.]open can accepts such URIs and strings which begins with
20
+ # http://, https:// and ftp://.
21
+ # In these case, the opened file object is extended by OpenURI::Meta.
22
+ def open(name, *rest, &block) # :doc:
23
+ if name.respond_to?(:open)
24
+ name.open(*rest, &block)
25
+ elsif name.respond_to?(:to_s) && %r{\A(ftp|https?)://} =~ name
26
+ CurlAgent.open(name, *rest, &block)
27
+ else
28
+ curb_open_uri_original_open(name, *rest, &block)
29
+ end
30
+ end
31
+ module_function :open
32
+ end
data/lib/curl_agent.rb ADDED
@@ -0,0 +1,96 @@
1
+ require 'rubygems'
2
+ require 'stringio'
3
+ require 'curb'
4
+
5
+ class CurlAgent
6
+ # See CurlAgent::open for explanation about options
7
+ def initialize(url, options = {})
8
+ @curl = Curl::Easy.new(url)
9
+ # Defaults
10
+ @curl.headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6'
11
+ @curl.follow_location = true
12
+ @curl.max_redirects = 2
13
+ @curl.enable_cookies = true
14
+ @curl.connect_timeout = 5
15
+ @curl.timeout = 30
16
+ @performed = false
17
+
18
+ options ||= {}
19
+ options.each {|k, v|
20
+ # Strings will be passed as headers, as in original open-uri
21
+ next unless k.is_a? Symbol
22
+ @curl.send("#{k}=".intern, v)
23
+ options.delete(k)
24
+ }
25
+
26
+ # All that's left should be considered headers
27
+ @curl.headers.merge!(options)
28
+ end
29
+
30
+ # Do the actual fetch, after which it's possible to call body_str method
31
+ def perform!
32
+ @curl.perform
33
+ @performed = true
34
+ end
35
+
36
+ # Returns the charset of the page
37
+ def charset
38
+ perform! unless @performed
39
+ content_type = @curl.content_type || ''
40
+ charset = if content_type.match(/charset\s*=\s*([a-zA-Z0-9-]+)/ni)
41
+ $1
42
+ elsif ! body_str.nil? and (m = body_str.slice(0,1000).match(%r{<meta.*http-equiv\s*=\s*['"]?Content-Type['"]?.*?>}mi)) and
43
+ m[0].match(%r{content=['"]text/html.*?charset=(.*?)['"]}mi)
44
+ $1
45
+ else
46
+ ''
47
+ end.downcase
48
+ end
49
+
50
+ # Proxies all calls to Curl::Easy instance
51
+ def respond_to?(symbol)
52
+ @curl.respond_to?(symbol)
53
+ end
54
+
55
+ # Proxies all calls to Curl::Easy instance
56
+ def method_missing(symbol, *args)
57
+ @curl.send(symbol, *args)
58
+ end
59
+
60
+ # This method opens the URL and returns an IO object.
61
+ # If a block is provided, it's called with that object.
62
+ # You can override defaults and provide configuration directives
63
+ # to Curl::Easy with symbol hash keys, for example:
64
+ # open('http://www.example.com/', :timeout => 10)
65
+ # all the rest keys will be passed as headers, for example:
66
+ # open('http://www.example.com/', :timeout => 10, 'User-Agent'=>'curl')
67
+ def self.open(name, *rest, &block)
68
+ mode, perm, rest = scan_open_optional_arguments(*rest)
69
+ options = rest.shift if !rest.empty? && Hash === rest.first
70
+ raise ArgumentError.new("extra arguments") if !rest.empty?
71
+
72
+ unless mode == nil || mode == 'r' || mode == 'rb' || mode == File::RDONLY
73
+ raise ArgumentError.new("invalid access mode #{mode} (resource is read only.)")
74
+ end
75
+
76
+ agent = CurlAgent.new(name, options)
77
+
78
+ agent.perform!
79
+ io = StringIO.new(agent.body_str)
80
+ if block
81
+ block.call(io)
82
+ else
83
+ io
84
+ end
85
+ end
86
+
87
+ def self.scan_open_optional_arguments(*rest) # :nodoc:
88
+ if !rest.empty? && (String === rest.first || Integer === rest.first)
89
+ mode = rest.shift
90
+ if !rest.empty? && Integer === rest.first
91
+ perm = rest.shift
92
+ end
93
+ end
94
+ return mode, perm, rest
95
+ end
96
+ end
@@ -0,0 +1,30 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "CurbOpenuri" do
4
+ describe "shall override Kernel::open" do
5
+ ['http','https','ftp'].each do |p|
6
+ it "shall use curl for #{p}" do
7
+ CurlAgent.should_receive(:open).and_return('')
8
+ open("#{p}://www.example.com/")
9
+ end
10
+ end
11
+
12
+ it 'shall not use curl for other protocols' do
13
+ CurlAgent.should_not_receive(:open)
14
+ lambda {open('file:///dev/null')}.should raise_error(Errno::ENOENT)
15
+ end
16
+
17
+ it 'shall not use curl for files' do
18
+ CurlAgent.should_not_receive(:open)
19
+ open('/dev/null') {|f| }
20
+ end
21
+
22
+ it 'shall still call open where the object supports it' do
23
+ mock = mock('openable')
24
+ mock.stub!(:respond_to?).with(:open).and_return(true)
25
+ mock.should_receive(:open)
26
+ CurlAgent.should_not_receive(:open)
27
+ open(mock)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,100 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "CurlAgent" do
4
+
5
+ describe 'new method' do
6
+ it 'shall permit to override user-agent later' do
7
+ curl = CurlAgent.new('http://www.example.com/')
8
+ curl.headers['User-Agent'].should_not be_nil
9
+ curl.headers['User-Agent'] = 'curl'
10
+ curl.headers['User-Agent'].should == 'curl'
11
+ end
12
+ end
13
+
14
+ describe 'when used alone' do
15
+ before(:each) do
16
+ @mock = mock('curl_easy')
17
+ @headers = {'User-Agent' => 'foo'}
18
+ @mock.stub!(:headers).and_return(@headers)
19
+ @mock.stub!(:'follow_location=')
20
+ @mock.stub!(:'max_redirects=')
21
+ @mock.stub!(:'enable_cookies=')
22
+ @mock.stub!(:'connect_timeout=')
23
+ @mock.stub!(:'timeout=')
24
+ @mock.should_receive(:perform)
25
+ Curl::Easy.should_receive(:new).and_return(@mock)
26
+ end
27
+
28
+ it 'should recognize charset' do
29
+ @mock.stub!(:content_type).and_return('Content-Type: text/html;charset=utf-8')
30
+ curl = CurlAgent.new('http://www.example.com/')
31
+ curl.charset.should == 'utf-8'
32
+ end
33
+
34
+ it 'should recognize upper case charset' do
35
+ @mock.stub!(:content_type).and_return('Content-Type: text/html;charset=Windows-1251')
36
+ curl = CurlAgent.new('http://www.example.com/')
37
+ curl.charset.should == 'windows-1251'
38
+ end
39
+
40
+ it 'should return empty str for empty charset' do
41
+ @mock.stub!(:content_type).and_return('Content-Type: text/html')
42
+ @mock.should_receive(:body_str).once
43
+ curl = CurlAgent.new('http://www.example.com/')
44
+ curl.charset.should == ''
45
+ end
46
+
47
+ it 'should attempt to find charset in html' do
48
+ @mock.stub!(:content_type).and_return('Content-Type: text/html')
49
+ @mock.stub!(:body_str).and_return(<<EOF)
50
+ <html>
51
+ <head>
52
+ <meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"/>
53
+ </head>
54
+ <body></body>
55
+ </html>
56
+ EOF
57
+ curl = CurlAgent.new('http://www.example.com/')
58
+ curl.charset.should == 'iso-8859-1'
59
+ end
60
+ end
61
+
62
+ describe 'when used with open' do
63
+ before(:each) do
64
+ @headers = {'User-Agent'=>'foo'}
65
+ @curl_easy = mock('curl_easy')
66
+ Curl::Easy.should_receive(:new).and_return(@curl_easy)
67
+ @curl_easy.stub!(:headers).and_return(@headers)
68
+ @curl_easy.stub!(:follow_location=)
69
+ @curl_easy.stub!(:max_redirects=)
70
+ @curl_easy.stub!(:enable_cookies=)
71
+ @curl_easy.stub!(:connect_timeout=)
72
+ @curl_easy.stub!(:timeout=)
73
+ @curl_easy.stub!(:perform)
74
+ @curl_easy.stub!(:body_str).and_return('test')
75
+ end
76
+
77
+ it 'shall permit to specify user-agent' do
78
+ @curl_easy.headers['User-Agent'].should_not == 'curl'
79
+ CurlAgent.open('http://www.example.com/', 'User-Agent'=>'curl')
80
+ @curl_easy.headers['User-Agent'].should == 'curl'
81
+ end
82
+
83
+ it 'shall permit to override timeout' do
84
+ @curl_easy.should_receive(:'timeout=').once.with(10)
85
+ CurlAgent.open('http://www.example.com/', :timeout => 10)
86
+ end
87
+
88
+ it 'shall use block when provided' do
89
+ CurlAgent.open('http://www.example.com/') {|f| f.read}.should == 'test'
90
+ end
91
+
92
+ end
93
+
94
+ describe 'when parsing parameters to open' do
95
+ it 'shall recognize wrong mode' do
96
+ CurlAgent.should_not_receive(:new)
97
+ lambda {CurlAgent.open('http://www.example.com/', 'w', 0600, :timeout=>10)}.should raise_error(ArgumentError)
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec'
2
+
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+
5
+ require 'curb_openuri'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: changa-curb-openuri
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Roman Shterenzon
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-02-18 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: curb
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.1.4
24
+ version:
25
+ description: open-uri drop-in replacement that uses curb
26
+ email:
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files: []
32
+
33
+ files:
34
+ - VERSION.yml
35
+ - lib/curb_openuri.rb
36
+ - lib/curl_agent.rb
37
+ - spec/curb_openuri_spec.rb
38
+ - spec/spec_helper.rb
39
+ - spec/curl_agent_spec.rb
40
+ has_rdoc: true
41
+ homepage: http://github.com/changa/curb-openuri
42
+ licenses: []
43
+
44
+ post_install_message:
45
+ rdoc_options:
46
+ - --inline-source
47
+ - --charset=UTF-8
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "0"
55
+ version:
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ requirements: []
63
+
64
+ rubyforge_project:
65
+ rubygems_version: 1.3.5
66
+ signing_key:
67
+ specification_version: 2
68
+ summary: open-uri drop-in replacement that uses curb
69
+ test_files: []
70
+