curb-openuri 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Roman Shterenzon
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,11 @@
1
+ curb-openuri
2
+ ============
3
+
4
+ This gem is a drop-in replacement for the stock 'open-uri' library.
5
+ It overloads the Kernel#open and uses curb (ruby binding for libcurl)
6
+ to do actual fetching of pages.
7
+
8
+ COPYRIGHT
9
+ =========
10
+
11
+ Copyright (c) 2008 Roman Shterenzon. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,39 @@
1
+ require 'rake'
2
+
3
+ begin
4
+ require 'jeweler'
5
+ Jeweler::Tasks.new do |s|
6
+ s.name = "curb-openuri"
7
+ s.summary = %Q{open-uri drop-in replacement that uses curb}
8
+ s.email = 'romanbsd@yahoo.com'
9
+ s.homepage = "http://github.com/romanbsd/curb-openuri"
10
+ s.description = s.summary
11
+ s.authors = ["Roman Shterenzon"]
12
+ s.add_dependency('curb', '>=0.1.4')
13
+ end
14
+ rescue LoadError
15
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
16
+ end
17
+
18
+ require 'rake/rdoctask'
19
+ Rake::RDocTask.new do |rdoc|
20
+ rdoc.rdoc_dir = 'rdoc'
21
+ rdoc.title = 'curb-openuri'
22
+ rdoc.options << '--line-numbers' << '--inline-source'
23
+ rdoc.rdoc_files.include('README*')
24
+ rdoc.rdoc_files.include('lib/**/*.rb')
25
+ end
26
+
27
+ require 'spec/rake/spectask'
28
+ Spec::Rake::SpecTask.new(:spec) do |t|
29
+ t.libs << 'lib' << 'spec'
30
+ t.spec_files = FileList['spec/**/*_spec.rb']
31
+ end
32
+
33
+ Spec::Rake::SpecTask.new(:rcov) do |t|
34
+ t.libs << 'lib' << 'spec'
35
+ t.spec_files = FileList['spec/**/*_spec.rb']
36
+ t.rcov = true
37
+ end
38
+
39
+ task :default => :spec
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :patch: 0
3
+ :minor: 1
4
+ :major: 0
@@ -0,0 +1,56 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{curb-openuri}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Roman Shterenzon"]
12
+ s.date = %q{2009-11-29}
13
+ s.description = %q{open-uri drop-in replacement that uses curb}
14
+ s.email = %q{romanbsd@yahoo.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README",
23
+ "Rakefile",
24
+ "VERSION.yml",
25
+ "curb-openuri.gemspec",
26
+ "lib/curb_openuri.rb",
27
+ "lib/curl_agent.rb",
28
+ "spec/curb_openuri_spec.rb",
29
+ "spec/curl_agent_spec.rb",
30
+ "spec/spec_helper.rb"
31
+ ]
32
+ s.homepage = %q{http://github.com/romanbsd/curb-openuri}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.5}
36
+ s.summary = %q{open-uri drop-in replacement that uses curb}
37
+ s.test_files = [
38
+ "spec/curb_openuri_spec.rb",
39
+ "spec/curl_agent_spec.rb",
40
+ "spec/spec_helper.rb"
41
+ ]
42
+
43
+ if s.respond_to? :specification_version then
44
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
45
+ s.specification_version = 3
46
+
47
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
48
+ s.add_runtime_dependency(%q<curb>, [">= 0.1.4"])
49
+ else
50
+ s.add_dependency(%q<curb>, [">= 0.1.4"])
51
+ end
52
+ else
53
+ s.add_dependency(%q<curb>, [">= 0.1.4"])
54
+ end
55
+ end
56
+
@@ -0,0 +1,32 @@
1
+ require 'curl_agent'
2
+
3
+ module Kernel
4
+ private
5
+ alias open_uri_original_open open # :nodoc:
6
+
7
+ # makes possible to open various resources including URIs.
8
+ # If the first argument respond to `open' method,
9
+ # the method is called with the rest arguments.
10
+ #
11
+ # If the first argument is a string which begins with xxx://,
12
+ # it is parsed by URI.parse. If the parsed object respond to `open' method,
13
+ # the method is called with the rest arguments.
14
+ #
15
+ # Otherwise original open is called.
16
+ #
17
+ # Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
18
+ # URI::FTP#open,
19
+ # Kernel[#.]open can accepts such URIs and strings which begins with
20
+ # http://, https:// and ftp://.
21
+ # In these case, the opened file object is extended by OpenURI::Meta.
22
+ def open(name, *rest, &block) # :doc:
23
+ if name.respond_to?(:open)
24
+ name.open(*rest, &block)
25
+ elsif name.respond_to?(:to_s) && %r{\A(ftp|https?)://} =~ name
26
+ CurlAgent.open(name, *rest, &block)
27
+ else
28
+ open_uri_original_open(name, *rest, &block)
29
+ end
30
+ end
31
+ module_function :open
32
+ end
data/lib/curl_agent.rb ADDED
@@ -0,0 +1,96 @@
1
+ require 'rubygems'
2
+ require 'stringio'
3
+ require 'curb'
4
+
5
+ class CurlAgent
6
+ # See CurlAgent::open for explanation about options
7
+ def initialize(url, options = {})
8
+ @curl = Curl::Easy.new(url)
9
+ # Defaults
10
+ @curl.headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6'
11
+ @curl.follow_location = true
12
+ @curl.max_redirects = 2
13
+ @curl.enable_cookies = true
14
+ @curl.connect_timeout = 5
15
+ @curl.timeout = 30
16
+ @performed = false
17
+
18
+ options ||= {}
19
+ options.each {|k, v|
20
+ # Strings will be passed as headers, as in original open-uri
21
+ next unless k.is_a? Symbol
22
+ @curl.send("#{k}=".intern, v)
23
+ options.delete(k)
24
+ }
25
+
26
+ # All that's left should be considered headers
27
+ @curl.headers.merge!(options)
28
+ end
29
+
30
+ # Do the actual fetch, after which it's possible to call body_str method
31
+ def perform!
32
+ @curl.perform
33
+ @performed = true
34
+ end
35
+
36
+ # Returns the charset of the page
37
+ def charset
38
+ perform! unless @performed
39
+ content_type = @curl.content_type || ''
40
+ charset = if content_type.match(/charset\s*=\s*([a-zA-Z0-9-]+)/ni)
41
+ $1
42
+ elsif ! body_str.nil? and (m = body_str.slice(0,1000).match(%r{<meta.*http-equiv\s*=\s*['"]?Content-Type['"]?.*?>}mi)) and
43
+ m[0].match(%r{content=['"]text/html.*?charset=(.*?)['"]}mi)
44
+ $1
45
+ else
46
+ ''
47
+ end.downcase
48
+ end
49
+
50
+ # Proxies all calls to Curl::Easy instance
51
+ def respond_to?(symbol)
52
+ @curl.respond_to?(symbol)
53
+ end
54
+
55
+ # Proxies all calls to Curl::Easy instance
56
+ def method_missing(symbol, *args)
57
+ @curl.send(symbol, *args)
58
+ end
59
+
60
+ # This method opens the URL and returns an IO object.
61
+ # If a block is provided, it's called with that object.
62
+ # You can override defaults and provide configuration directives
63
+ # to Curl::Easy with symbol hash keys, for example:
64
+ # open('http://www.example.com/', :timeout => 10)
65
+ # all the rest keys will be passed as headers, for example:
66
+ # open('http://www.example.com/', :timeout => 10, 'User-Agent'=>'curl')
67
+ def self.open(name, *rest, &block)
68
+ mode, perm, rest = scan_open_optional_arguments(*rest)
69
+ options = rest.shift if !rest.empty? && Hash === rest.first
70
+ raise ArgumentError.new("extra arguments") if !rest.empty?
71
+
72
+ unless mode == nil || mode == 'r' || mode == 'rb' || mode == File::RDONLY
73
+ raise ArgumentError.new("invalid access mode #{mode} (resource is read only.)")
74
+ end
75
+
76
+ agent = CurlAgent.new(name, options)
77
+
78
+ agent.perform!
79
+ io = StringIO.new(agent.body_str)
80
+ if block
81
+ block.call(io)
82
+ else
83
+ io
84
+ end
85
+ end
86
+
87
+ def self.scan_open_optional_arguments(*rest) # :nodoc:
88
+ if !rest.empty? && (String === rest.first || Integer === rest.first)
89
+ mode = rest.shift
90
+ if !rest.empty? && Integer === rest.first
91
+ perm = rest.shift
92
+ end
93
+ end
94
+ return mode, perm, rest
95
+ end
96
+ end
@@ -0,0 +1,30 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "CurbOpenuri" do
4
+ describe "shall override Kernel::open" do
5
+ ['http','https','ftp'].each do |p|
6
+ it "shall use curl for #{p}" do
7
+ CurlAgent.should_receive(:open).and_return('')
8
+ open("#{p}://www.example.com/")
9
+ end
10
+ end
11
+
12
+ it 'shall not use curl for other protocols' do
13
+ CurlAgent.should_not_receive(:open)
14
+ lambda {open('file:///dev/null')}.should raise_error(Errno::ENOENT)
15
+ end
16
+
17
+ it 'shall not use curl for files' do
18
+ CurlAgent.should_not_receive(:open)
19
+ open('/dev/null') {|f| }
20
+ end
21
+
22
+ it 'shall still call open where the object supports it' do
23
+ mock = mock('openable')
24
+ mock.stub!(:respond_to?).with(:open).and_return(true)
25
+ mock.should_receive(:open)
26
+ CurlAgent.should_not_receive(:open)
27
+ open(mock)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,100 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "CurlAgent" do
4
+
5
+ describe 'new method' do
6
+ it 'shall permit to override user-agent later' do
7
+ curl = CurlAgent.new('http://www.example.com/')
8
+ curl.headers['User-Agent'].should_not be_nil
9
+ curl.headers['User-Agent'] = 'curl'
10
+ curl.headers['User-Agent'].should == 'curl'
11
+ end
12
+ end
13
+
14
+ describe 'when used alone' do
15
+ before(:each) do
16
+ @mock = mock('curl_easy')
17
+ @headers = {'User-Agent' => 'foo'}
18
+ @mock.stub!(:headers).and_return(@headers)
19
+ @mock.stub!(:'follow_location=')
20
+ @mock.stub!(:'max_redirects=')
21
+ @mock.stub!(:'enable_cookies=')
22
+ @mock.stub!(:'connect_timeout=')
23
+ @mock.stub!(:'timeout=')
24
+ @mock.should_receive(:perform)
25
+ Curl::Easy.should_receive(:new).and_return(@mock)
26
+ end
27
+
28
+ it 'should recognize charset' do
29
+ @mock.stub!(:content_type).and_return('Content-Type: text/html;charset=utf-8')
30
+ curl = CurlAgent.new('http://www.example.com/')
31
+ curl.charset.should == 'utf-8'
32
+ end
33
+
34
+ it 'should recognize upper case charset' do
35
+ @mock.stub!(:content_type).and_return('Content-Type: text/html;charset=Windows-1251')
36
+ curl = CurlAgent.new('http://www.example.com/')
37
+ curl.charset.should == 'windows-1251'
38
+ end
39
+
40
+ it 'should return empty str for empty charset' do
41
+ @mock.stub!(:content_type).and_return('Content-Type: text/html')
42
+ @mock.should_receive(:body_str).once
43
+ curl = CurlAgent.new('http://www.example.com/')
44
+ curl.charset.should == ''
45
+ end
46
+
47
+ it 'should attempt to find charset in html' do
48
+ @mock.stub!(:content_type).and_return('Content-Type: text/html')
49
+ @mock.stub!(:body_str).and_return(<<EOF)
50
+ <html>
51
+ <head>
52
+ <meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"/>
53
+ </head>
54
+ <body></body>
55
+ </html>
56
+ EOF
57
+ curl = CurlAgent.new('http://www.example.com/')
58
+ curl.charset.should == 'iso-8859-1'
59
+ end
60
+ end
61
+
62
+ describe 'when used with open' do
63
+ before(:each) do
64
+ @headers = {'User-Agent'=>'foo'}
65
+ @curl_easy = mock('curl_easy')
66
+ Curl::Easy.should_receive(:new).and_return(@curl_easy)
67
+ @curl_easy.stub!(:headers).and_return(@headers)
68
+ @curl_easy.stub!(:follow_location=)
69
+ @curl_easy.stub!(:max_redirects=)
70
+ @curl_easy.stub!(:enable_cookies=)
71
+ @curl_easy.stub!(:connect_timeout=)
72
+ @curl_easy.stub!(:timeout=)
73
+ @curl_easy.stub!(:perform)
74
+ @curl_easy.stub!(:body_str).and_return('test')
75
+ end
76
+
77
+ it 'shall permit to specify user-agent' do
78
+ @curl_easy.headers['User-Agent'].should_not == 'curl'
79
+ CurlAgent.open('http://www.example.com/', 'User-Agent'=>'curl')
80
+ @curl_easy.headers['User-Agent'].should == 'curl'
81
+ end
82
+
83
+ it 'shall permit to override timeout' do
84
+ @curl_easy.should_receive(:'timeout=').once.with(10)
85
+ CurlAgent.open('http://www.example.com/', :timeout => 10)
86
+ end
87
+
88
+ it 'shall use block when provided' do
89
+ CurlAgent.open('http://www.example.com/') {|f| f.read}.should == 'test'
90
+ end
91
+
92
+ end
93
+
94
+ describe 'when parsing parameters to open' do
95
+ it 'shall recognize wrong mode' do
96
+ CurlAgent.should_not_receive(:new)
97
+ lambda {CurlAgent.open('http://www.example.com/', 'w', 0600, :timeout=>10)}.should raise_error(ArgumentError)
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec'
2
+
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+
5
+ require 'curb_openuri'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: curb-openuri
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Roman Shterenzon
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-29 00:00:00 +02:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: curb
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.1.4
24
+ version:
25
+ description: open-uri drop-in replacement that uses curb
26
+ email: romanbsd@yahoo.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README
34
+ files:
35
+ - .gitignore
36
+ - LICENSE
37
+ - README
38
+ - Rakefile
39
+ - VERSION.yml
40
+ - curb-openuri.gemspec
41
+ - lib/curb_openuri.rb
42
+ - lib/curl_agent.rb
43
+ - spec/curb_openuri_spec.rb
44
+ - spec/curl_agent_spec.rb
45
+ - spec/spec_helper.rb
46
+ has_rdoc: true
47
+ homepage: http://github.com/romanbsd/curb-openuri
48
+ licenses: []
49
+
50
+ post_install_message:
51
+ rdoc_options:
52
+ - --charset=UTF-8
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: "0"
66
+ version:
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.3.5
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: open-uri drop-in replacement that uses curb
74
+ test_files:
75
+ - spec/curb_openuri_spec.rb
76
+ - spec/curl_agent_spec.rb
77
+ - spec/spec_helper.rb