curb-openuri 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Roman Shterenzon
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,11 @@
1
+ curb-openuri
2
+ ============
3
+
4
+ This gem is a drop-in replacement for the stock 'open-uri' library.
5
+ It overloads the Kernel#open and uses curb (ruby binding for libcurl)
6
+ to do actual fetching of pages.
7
+
8
+ COPYRIGHT
9
+ =========
10
+
11
+ Copyright (c) 2008 Roman Shterenzon. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,39 @@
1
+ require 'rake'
2
+
3
+ begin
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |s|
6
+ s.name = "curb-openuri"
7
+ s.summary = %Q{open-uri drop-in replacement that uses curb}
8
+ s.email = 'romanbsd@yahoo.com'
9
+ s.homepage = "http://github.com/romanbsd/curb-openuri"
10
+ s.description = s.summary
11
+ s.authors = ["Roman Shterenzon"]
12
+ s.add_dependency('curb', '>=0.1.4')
13
+ end
14
+ rescue LoadError
15
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
16
+ end
17
+
18
+ require 'rake/rdoctask'
19
+ Rake::RDocTask.new do |rdoc|
20
+ rdoc.rdoc_dir = 'rdoc'
21
+ rdoc.title = 'curb-openuri'
22
+ rdoc.options << '--line-numbers' << '--inline-source'
23
+ rdoc.rdoc_files.include('README*')
24
+ rdoc.rdoc_files.include('lib/**/*.rb')
25
+ end
26
+
27
+ require 'spec/rake/spectask'
28
+ Spec::Rake::SpecTask.new(:spec) do |t|
29
+ t.libs << 'lib' << 'spec'
30
+ t.spec_files = FileList['spec/**/*_spec.rb']
31
+ end
32
+
33
+ Spec::Rake::SpecTask.new(:rcov) do |t|
34
+ t.libs << 'lib' << 'spec'
35
+ t.spec_files = FileList['spec/**/*_spec.rb']
36
+ t.rcov = true
37
+ end
38
+
39
+ task :default => :spec
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :patch: 0
3
+ :minor: 1
4
+ :major: 0
@@ -0,0 +1,56 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{curb-openuri}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Roman Shterenzon"]
12
+ s.date = %q{2009-11-29}
13
+ s.description = %q{open-uri drop-in replacement that uses curb}
14
+ s.email = %q{romanbsd@yahoo.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README",
23
+ "Rakefile",
24
+ "VERSION.yml",
25
+ "curb-openuri.gemspec",
26
+ "lib/curb_openuri.rb",
27
+ "lib/curl_agent.rb",
28
+ "spec/curb_openuri_spec.rb",
29
+ "spec/curl_agent_spec.rb",
30
+ "spec/spec_helper.rb"
31
+ ]
32
+ s.homepage = %q{http://github.com/romanbsd/curb-openuri}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.5}
36
+ s.summary = %q{open-uri drop-in replacement that uses curb}
37
+ s.test_files = [
38
+ "spec/curb_openuri_spec.rb",
39
+ "spec/curl_agent_spec.rb",
40
+ "spec/spec_helper.rb"
41
+ ]
42
+
43
+ if s.respond_to? :specification_version then
44
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
45
+ s.specification_version = 3
46
+
47
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
48
+ s.add_runtime_dependency(%q<curb>, [">= 0.1.4"])
49
+ else
50
+ s.add_dependency(%q<curb>, [">= 0.1.4"])
51
+ end
52
+ else
53
+ s.add_dependency(%q<curb>, [">= 0.1.4"])
54
+ end
55
+ end
56
+
@@ -0,0 +1,32 @@
1
+ require 'curl_agent'
2
+
3
+ module Kernel
4
+ private
5
+ alias open_uri_original_open open # :nodoc:
6
+
7
+ # makes possible to open various resources including URIs.
8
+ # If the first argument respond to `open' method,
9
+ # the method is called with the rest arguments.
10
+ #
11
+ # If the first argument is a string which begins with xxx://,
12
+ # it is parsed by URI.parse. If the parsed object respond to `open' method,
13
+ # the method is called with the rest arguments.
14
+ #
15
+ # Otherwise original open is called.
16
+ #
17
+ # Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
18
+ # URI::FTP#open,
19
+ # Kernel[#.]open can accepts such URIs and strings which begins with
20
+ # http://, https:// and ftp://.
21
+ # In these case, the opened file object is extended by OpenURI::Meta.
22
+ def open(name, *rest, &block) # :doc:
23
+ if name.respond_to?(:open)
24
+ name.open(*rest, &block)
25
+ elsif name.respond_to?(:to_s) && %r{\A(ftp|https?)://} =~ name
26
+ CurlAgent.open(name, *rest, &block)
27
+ else
28
+ open_uri_original_open(name, *rest, &block)
29
+ end
30
+ end
31
+ module_function :open
32
+ end
data/lib/curl_agent.rb ADDED
@@ -0,0 +1,96 @@
1
+ require 'rubygems'
2
+ require 'stringio'
3
+ require 'curb'
4
+
5
+ class CurlAgent
6
+ # See CurlAgent::open for explanation about options
7
+ def initialize(url, options = {})
8
+ @curl = Curl::Easy.new(url)
9
+ # Defaults
10
+ @curl.headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6'
11
+ @curl.follow_location = true
12
+ @curl.max_redirects = 2
13
+ @curl.enable_cookies = true
14
+ @curl.connect_timeout = 5
15
+ @curl.timeout = 30
16
+ @performed = false
17
+
18
+ options ||= {}
19
+ options.each {|k, v|
20
+ # Strings will be passed as headers, as in original open-uri
21
+ next unless k.is_a? Symbol
22
+ @curl.send("#{k}=".intern, v)
23
+ options.delete(k)
24
+ }
25
+
26
+ # All that's left should be considered headers
27
+ @curl.headers.merge!(options)
28
+ end
29
+
30
+ # Do the actual fetch, after which it's possible to call body_str method
31
+ def perform!
32
+ @curl.perform
33
+ @performed = true
34
+ end
35
+
36
+ # Returns the charset of the page
37
+ def charset
38
+ perform! unless @performed
39
+ content_type = @curl.content_type || ''
40
+ charset = if content_type.match(/charset\s*=\s*([a-zA-Z0-9-]+)/ni)
41
+ $1
42
+ elsif ! body_str.nil? and (m = body_str.slice(0,1000).match(%r{<meta.*http-equiv\s*=\s*['"]?Content-Type['"]?.*?>}mi)) and
43
+ m[0].match(%r{content=['"]text/html.*?charset=(.*?)['"]}mi)
44
+ $1
45
+ else
46
+ ''
47
+ end.downcase
48
+ end
49
+
50
+ # Proxies all calls to Curl::Easy instance
51
+ def respond_to?(symbol)
52
+ @curl.respond_to?(symbol)
53
+ end
54
+
55
+ # Proxies all calls to Curl::Easy instance
56
+ def method_missing(symbol, *args)
57
+ @curl.send(symbol, *args)
58
+ end
59
+
60
+ # This method opens the URL and returns an IO object.
61
+ # If a block is provided, it's called with that object.
62
+ # You can override defaults and provide configuration directives
63
+ # to Curl::Easy with symbol hash keys, for example:
64
+ # open('http://www.example.com/', :timeout => 10)
65
+ # all the rest keys will be passed as headers, for example:
66
+ # open('http://www.example.com/', :timeout => 10, 'User-Agent'=>'curl')
67
+ def self.open(name, *rest, &block)
68
+ mode, perm, rest = scan_open_optional_arguments(*rest)
69
+ options = rest.shift if !rest.empty? && Hash === rest.first
70
+ raise ArgumentError.new("extra arguments") if !rest.empty?
71
+
72
+ unless mode == nil || mode == 'r' || mode == 'rb' || mode == File::RDONLY
73
+ raise ArgumentError.new("invalid access mode #{mode} (resource is read only.)")
74
+ end
75
+
76
+ agent = CurlAgent.new(name, options)
77
+
78
+ agent.perform!
79
+ io = StringIO.new(agent.body_str)
80
+ if block
81
+ block.call(io)
82
+ else
83
+ io
84
+ end
85
+ end
86
+
87
+ def self.scan_open_optional_arguments(*rest) # :nodoc:
88
+ if !rest.empty? && (String === rest.first || Integer === rest.first)
89
+ mode = rest.shift
90
+ if !rest.empty? && Integer === rest.first
91
+ perm = rest.shift
92
+ end
93
+ end
94
+ return mode, perm, rest
95
+ end
96
+ end
@@ -0,0 +1,30 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "CurbOpenuri" do
4
+ describe "shall override Kernel::open" do
5
+ ['http','https','ftp'].each do |p|
6
+ it "shall use curl for #{p}" do
7
+ CurlAgent.should_receive(:open).and_return('')
8
+ open("#{p}://www.example.com/")
9
+ end
10
+ end
11
+
12
+ it 'shall not use curl for other protocols' do
13
+ CurlAgent.should_not_receive(:open)
14
+ lambda {open('file:///dev/null')}.should raise_error(Errno::ENOENT)
15
+ end
16
+
17
+ it 'shall not use curl for files' do
18
+ CurlAgent.should_not_receive(:open)
19
+ open('/dev/null') {|f| }
20
+ end
21
+
22
+ it 'shall still call open where the object supports it' do
23
+ mock = mock('openable')
24
+ mock.stub!(:respond_to?).with(:open).and_return(true)
25
+ mock.should_receive(:open)
26
+ CurlAgent.should_not_receive(:open)
27
+ open(mock)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,100 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "CurlAgent" do
4
+
5
+ describe 'new method' do
6
+ it 'shall permit to override user-agent later' do
7
+ curl = CurlAgent.new('http://www.example.com/')
8
+ curl.headers['User-Agent'].should_not be_nil
9
+ curl.headers['User-Agent'] = 'curl'
10
+ curl.headers['User-Agent'].should == 'curl'
11
+ end
12
+ end
13
+
14
+ describe 'when used alone' do
15
+ before(:each) do
16
+ @mock = mock('curl_easy')
17
+ @headers = {'User-Agent' => 'foo'}
18
+ @mock.stub!(:headers).and_return(@headers)
19
+ @mock.stub!(:'follow_location=')
20
+ @mock.stub!(:'max_redirects=')
21
+ @mock.stub!(:'enable_cookies=')
22
+ @mock.stub!(:'connect_timeout=')
23
+ @mock.stub!(:'timeout=')
24
+ @mock.should_receive(:perform)
25
+ Curl::Easy.should_receive(:new).and_return(@mock)
26
+ end
27
+
28
+ it 'should recognize charset' do
29
+ @mock.stub!(:content_type).and_return('Content-Type: text/html;charset=utf-8')
30
+ curl = CurlAgent.new('http://www.example.com/')
31
+ curl.charset.should == 'utf-8'
32
+ end
33
+
34
+ it 'should recognize upper case charset' do
35
+ @mock.stub!(:content_type).and_return('Content-Type: text/html;charset=Windows-1251')
36
+ curl = CurlAgent.new('http://www.example.com/')
37
+ curl.charset.should == 'windows-1251'
38
+ end
39
+
40
+ it 'should return empty str for empty charset' do
41
+ @mock.stub!(:content_type).and_return('Content-Type: text/html')
42
+ @mock.should_receive(:body_str).once
43
+ curl = CurlAgent.new('http://www.example.com/')
44
+ curl.charset.should == ''
45
+ end
46
+
47
+ it 'should attempt to find charset in html' do
48
+ @mock.stub!(:content_type).and_return('Content-Type: text/html')
49
+ @mock.stub!(:body_str).and_return(<<EOF)
50
+ <html>
51
+ <head>
52
+ <meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"/>
53
+ </head>
54
+ <body></body>
55
+ </html>
56
+ EOF
57
+ curl = CurlAgent.new('http://www.example.com/')
58
+ curl.charset.should == 'iso-8859-1'
59
+ end
60
+ end
61
+
62
+ describe 'when used with open' do
63
+ before(:each) do
64
+ @headers = {'User-Agent'=>'foo'}
65
+ @curl_easy = mock('curl_easy')
66
+ Curl::Easy.should_receive(:new).and_return(@curl_easy)
67
+ @curl_easy.stub!(:headers).and_return(@headers)
68
+ @curl_easy.stub!(:follow_location=)
69
+ @curl_easy.stub!(:max_redirects=)
70
+ @curl_easy.stub!(:enable_cookies=)
71
+ @curl_easy.stub!(:connect_timeout=)
72
+ @curl_easy.stub!(:timeout=)
73
+ @curl_easy.stub!(:perform)
74
+ @curl_easy.stub!(:body_str).and_return('test')
75
+ end
76
+
77
+ it 'shall permit to specify user-agent' do
78
+ @curl_easy.headers['User-Agent'].should_not == 'curl'
79
+ CurlAgent.open('http://www.example.com/', 'User-Agent'=>'curl')
80
+ @curl_easy.headers['User-Agent'].should == 'curl'
81
+ end
82
+
83
+ it 'shall permit to override timeout' do
84
+ @curl_easy.should_receive(:'timeout=').once.with(10)
85
+ CurlAgent.open('http://www.example.com/', :timeout => 10)
86
+ end
87
+
88
+ it 'shall use block when provided' do
89
+ CurlAgent.open('http://www.example.com/') {|f| f.read}.should == 'test'
90
+ end
91
+
92
+ end
93
+
94
+ describe 'when parsing parameters to open' do
95
+ it 'shall recognize wrong mode' do
96
+ CurlAgent.should_not_receive(:new)
97
+ lambda {CurlAgent.open('http://www.example.com/', 'w', 0600, :timeout=>10)}.should raise_error(ArgumentError)
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec'
2
+
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+
5
+ require 'curb_openuri'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: curb-openuri
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Roman Shterenzon
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-29 00:00:00 +02:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: curb
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.1.4
24
+ version:
25
+ description: open-uri drop-in replacement that uses curb
26
+ email: romanbsd@yahoo.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README
34
+ files:
35
+ - .gitignore
36
+ - LICENSE
37
+ - README
38
+ - Rakefile
39
+ - VERSION.yml
40
+ - curb-openuri.gemspec
41
+ - lib/curb_openuri.rb
42
+ - lib/curl_agent.rb
43
+ - spec/curb_openuri_spec.rb
44
+ - spec/curl_agent_spec.rb
45
+ - spec/spec_helper.rb
46
+ has_rdoc: true
47
+ homepage: http://github.com/romanbsd/curb-openuri
48
+ licenses: []
49
+
50
+ post_install_message:
51
+ rdoc_options:
52
+ - --charset=UTF-8
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: "0"
66
+ version:
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.3.5
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: open-uri drop-in replacement that uses curb
74
+ test_files:
75
+ - spec/curb_openuri_spec.rb
76
+ - spec/curl_agent_spec.rb
77
+ - spec/spec_helper.rb