curb-openuri 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/LICENSE +20 -0
- data/README +11 -0
- data/Rakefile +39 -0
- data/VERSION.yml +4 -0
- data/curb-openuri.gemspec +56 -0
- data/lib/curb_openuri.rb +32 -0
- data/lib/curl_agent.rb +96 -0
- data/spec/curb_openuri_spec.rb +30 -0
- data/spec/curl_agent_spec.rb +100 -0
- data/spec/spec_helper.rb +9 -0
- metadata +77 -0
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 Roman Shterenzon
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
curb-openuri
|
2
|
+
============
|
3
|
+
|
4
|
+
This gem is a drop-in replacement for the stock 'open-uri' library.
|
5
|
+
It overloads the Kernel#open and uses curb (ruby binding for libcurl)
|
6
|
+
to do actual fetching of pages.
|
7
|
+
|
8
|
+
COPYRIGHT
|
9
|
+
=========
|
10
|
+
|
11
|
+
Copyright (c) 2008 Roman Shterenzon. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |s|
|
6
|
+
s.name = "curb-openuri"
|
7
|
+
s.summary = %Q{open-uri drop-in replacement that uses curb}
|
8
|
+
s.email = 'romanbsd@yahoo.com'
|
9
|
+
s.homepage = "http://github.com/romanbsd/curb-openuri"
|
10
|
+
s.description = s.summary
|
11
|
+
s.authors = ["Roman Shterenzon"]
|
12
|
+
s.add_dependency('curb', '>=0.1.4')
|
13
|
+
end
|
14
|
+
rescue LoadError
|
15
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
16
|
+
end
|
17
|
+
|
18
|
+
require 'rake/rdoctask'
|
19
|
+
Rake::RDocTask.new do |rdoc|
|
20
|
+
rdoc.rdoc_dir = 'rdoc'
|
21
|
+
rdoc.title = 'curb-openuri'
|
22
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
23
|
+
rdoc.rdoc_files.include('README*')
|
24
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
25
|
+
end
|
26
|
+
|
27
|
+
require 'spec/rake/spectask'
|
28
|
+
Spec::Rake::SpecTask.new(:spec) do |t|
|
29
|
+
t.libs << 'lib' << 'spec'
|
30
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
31
|
+
end
|
32
|
+
|
33
|
+
Spec::Rake::SpecTask.new(:rcov) do |t|
|
34
|
+
t.libs << 'lib' << 'spec'
|
35
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
36
|
+
t.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
task :default => :spec
|
data/VERSION.yml
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{curb-openuri}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Roman Shterenzon"]
|
12
|
+
s.date = %q{2009-11-29}
|
13
|
+
s.description = %q{open-uri drop-in replacement that uses curb}
|
14
|
+
s.email = %q{romanbsd@yahoo.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".gitignore",
|
21
|
+
"LICENSE",
|
22
|
+
"README",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION.yml",
|
25
|
+
"curb-openuri.gemspec",
|
26
|
+
"lib/curb_openuri.rb",
|
27
|
+
"lib/curl_agent.rb",
|
28
|
+
"spec/curb_openuri_spec.rb",
|
29
|
+
"spec/curl_agent_spec.rb",
|
30
|
+
"spec/spec_helper.rb"
|
31
|
+
]
|
32
|
+
s.homepage = %q{http://github.com/romanbsd/curb-openuri}
|
33
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
34
|
+
s.require_paths = ["lib"]
|
35
|
+
s.rubygems_version = %q{1.3.5}
|
36
|
+
s.summary = %q{open-uri drop-in replacement that uses curb}
|
37
|
+
s.test_files = [
|
38
|
+
"spec/curb_openuri_spec.rb",
|
39
|
+
"spec/curl_agent_spec.rb",
|
40
|
+
"spec/spec_helper.rb"
|
41
|
+
]
|
42
|
+
|
43
|
+
if s.respond_to? :specification_version then
|
44
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
45
|
+
s.specification_version = 3
|
46
|
+
|
47
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
48
|
+
s.add_runtime_dependency(%q<curb>, [">= 0.1.4"])
|
49
|
+
else
|
50
|
+
s.add_dependency(%q<curb>, [">= 0.1.4"])
|
51
|
+
end
|
52
|
+
else
|
53
|
+
s.add_dependency(%q<curb>, [">= 0.1.4"])
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
data/lib/curb_openuri.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'curl_agent'
|
2
|
+
|
3
|
+
module Kernel
|
4
|
+
private
|
5
|
+
alias open_uri_original_open open # :nodoc:
|
6
|
+
|
7
|
+
# makes possible to open various resources including URIs.
|
8
|
+
# If the first argument respond to `open' method,
|
9
|
+
# the method is called with the rest arguments.
|
10
|
+
#
|
11
|
+
# If the first argument is a string which begins with xxx://,
|
12
|
+
# it is parsed by URI.parse. If the parsed object respond to `open' method,
|
13
|
+
# the method is called with the rest arguments.
|
14
|
+
#
|
15
|
+
# Otherwise original open is called.
|
16
|
+
#
|
17
|
+
# Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
|
18
|
+
# URI::FTP#open,
|
19
|
+
# Kernel[#.]open can accepts such URIs and strings which begins with
|
20
|
+
# http://, https:// and ftp://.
|
21
|
+
# In these case, the opened file object is extended by OpenURI::Meta.
|
22
|
+
def open(name, *rest, &block) # :doc:
|
23
|
+
if name.respond_to?(:open)
|
24
|
+
name.open(*rest, &block)
|
25
|
+
elsif name.respond_to?(:to_s) && %r{\A(ftp|https?)://} =~ name
|
26
|
+
CurlAgent.open(name, *rest, &block)
|
27
|
+
else
|
28
|
+
open_uri_original_open(name, *rest, &block)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
module_function :open
|
32
|
+
end
|
data/lib/curl_agent.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'stringio'
|
3
|
+
require 'curb'
|
4
|
+
|
5
|
+
class CurlAgent
|
6
|
+
# See CurlAgent::open for explanation about options
|
7
|
+
def initialize(url, options = {})
|
8
|
+
@curl = Curl::Easy.new(url)
|
9
|
+
# Defaults
|
10
|
+
@curl.headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6'
|
11
|
+
@curl.follow_location = true
|
12
|
+
@curl.max_redirects = 2
|
13
|
+
@curl.enable_cookies = true
|
14
|
+
@curl.connect_timeout = 5
|
15
|
+
@curl.timeout = 30
|
16
|
+
@performed = false
|
17
|
+
|
18
|
+
options ||= {}
|
19
|
+
options.each {|k, v|
|
20
|
+
# Strings will be passed as headers, as in original open-uri
|
21
|
+
next unless k.is_a? Symbol
|
22
|
+
@curl.send("#{k}=".intern, v)
|
23
|
+
options.delete(k)
|
24
|
+
}
|
25
|
+
|
26
|
+
# All that's left should be considered headers
|
27
|
+
@curl.headers.merge!(options)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Do the actual fetch, after which it's possible to call body_str method
|
31
|
+
def perform!
|
32
|
+
@curl.perform
|
33
|
+
@performed = true
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns the charset of the page
|
37
|
+
def charset
|
38
|
+
perform! unless @performed
|
39
|
+
content_type = @curl.content_type || ''
|
40
|
+
charset = if content_type.match(/charset\s*=\s*([a-zA-Z0-9-]+)/ni)
|
41
|
+
$1
|
42
|
+
elsif ! body_str.nil? and (m = body_str.slice(0,1000).match(%r{<meta.*http-equiv\s*=\s*['"]?Content-Type['"]?.*?>}mi)) and
|
43
|
+
m[0].match(%r{content=['"]text/html.*?charset=(.*?)['"]}mi)
|
44
|
+
$1
|
45
|
+
else
|
46
|
+
''
|
47
|
+
end.downcase
|
48
|
+
end
|
49
|
+
|
50
|
+
# Proxies all calls to Curl::Easy instance
|
51
|
+
def respond_to?(symbol)
|
52
|
+
@curl.respond_to?(symbol)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Proxies all calls to Curl::Easy instance
|
56
|
+
def method_missing(symbol, *args)
|
57
|
+
@curl.send(symbol, *args)
|
58
|
+
end
|
59
|
+
|
60
|
+
# This method opens the URL and returns an IO object.
|
61
|
+
# If a block is provided, it's called with that object.
|
62
|
+
# You can override defaults and provide configuration directives
|
63
|
+
# to Curl::Easy with symbol hash keys, for example:
|
64
|
+
# open('http://www.example.com/', :timeout => 10)
|
65
|
+
# all the rest keys will be passed as headers, for example:
|
66
|
+
# open('http://www.example.com/', :timeout => 10, 'User-Agent'=>'curl')
|
67
|
+
def self.open(name, *rest, &block)
|
68
|
+
mode, perm, rest = scan_open_optional_arguments(*rest)
|
69
|
+
options = rest.shift if !rest.empty? && Hash === rest.first
|
70
|
+
raise ArgumentError.new("extra arguments") if !rest.empty?
|
71
|
+
|
72
|
+
unless mode == nil || mode == 'r' || mode == 'rb' || mode == File::RDONLY
|
73
|
+
raise ArgumentError.new("invalid access mode #{mode} (resource is read only.)")
|
74
|
+
end
|
75
|
+
|
76
|
+
agent = CurlAgent.new(name, options)
|
77
|
+
|
78
|
+
agent.perform!
|
79
|
+
io = StringIO.new(agent.body_str)
|
80
|
+
if block
|
81
|
+
block.call(io)
|
82
|
+
else
|
83
|
+
io
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.scan_open_optional_arguments(*rest) # :nodoc:
|
88
|
+
if !rest.empty? && (String === rest.first || Integer === rest.first)
|
89
|
+
mode = rest.shift
|
90
|
+
if !rest.empty? && Integer === rest.first
|
91
|
+
perm = rest.shift
|
92
|
+
end
|
93
|
+
end
|
94
|
+
return mode, perm, rest
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe "CurbOpenuri" do
|
4
|
+
describe "shall override Kernel::open" do
|
5
|
+
['http','https','ftp'].each do |p|
|
6
|
+
it "shall use curl for #{p}" do
|
7
|
+
CurlAgent.should_receive(:open).and_return('')
|
8
|
+
open("#{p}://www.example.com/")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'shall not use curl for other protocols' do
|
13
|
+
CurlAgent.should_not_receive(:open)
|
14
|
+
lambda {open('file:///dev/null')}.should raise_error(Errno::ENOENT)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'shall not use curl for files' do
|
18
|
+
CurlAgent.should_not_receive(:open)
|
19
|
+
open('/dev/null') {|f| }
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'shall still call open where the object supports it' do
|
23
|
+
mock = mock('openable')
|
24
|
+
mock.stub!(:respond_to?).with(:open).and_return(true)
|
25
|
+
mock.should_receive(:open)
|
26
|
+
CurlAgent.should_not_receive(:open)
|
27
|
+
open(mock)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe "CurlAgent" do
|
4
|
+
|
5
|
+
describe 'new method' do
|
6
|
+
it 'shall permit to override user-agent later' do
|
7
|
+
curl = CurlAgent.new('http://www.example.com/')
|
8
|
+
curl.headers['User-Agent'].should_not be_nil
|
9
|
+
curl.headers['User-Agent'] = 'curl'
|
10
|
+
curl.headers['User-Agent'].should == 'curl'
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe 'when used alone' do
|
15
|
+
before(:each) do
|
16
|
+
@mock = mock('curl_easy')
|
17
|
+
@headers = {'User-Agent' => 'foo'}
|
18
|
+
@mock.stub!(:headers).and_return(@headers)
|
19
|
+
@mock.stub!(:'follow_location=')
|
20
|
+
@mock.stub!(:'max_redirects=')
|
21
|
+
@mock.stub!(:'enable_cookies=')
|
22
|
+
@mock.stub!(:'connect_timeout=')
|
23
|
+
@mock.stub!(:'timeout=')
|
24
|
+
@mock.should_receive(:perform)
|
25
|
+
Curl::Easy.should_receive(:new).and_return(@mock)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'should recognize charset' do
|
29
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html;charset=utf-8')
|
30
|
+
curl = CurlAgent.new('http://www.example.com/')
|
31
|
+
curl.charset.should == 'utf-8'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should recognize upper case charset' do
|
35
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html;charset=Windows-1251')
|
36
|
+
curl = CurlAgent.new('http://www.example.com/')
|
37
|
+
curl.charset.should == 'windows-1251'
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should return empty str for empty charset' do
|
41
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html')
|
42
|
+
@mock.should_receive(:body_str).once
|
43
|
+
curl = CurlAgent.new('http://www.example.com/')
|
44
|
+
curl.charset.should == ''
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should attempt to find charset in html' do
|
48
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html')
|
49
|
+
@mock.stub!(:body_str).and_return(<<EOF)
|
50
|
+
<html>
|
51
|
+
<head>
|
52
|
+
<meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"/>
|
53
|
+
</head>
|
54
|
+
<body></body>
|
55
|
+
</html>
|
56
|
+
EOF
|
57
|
+
curl = CurlAgent.new('http://www.example.com/')
|
58
|
+
curl.charset.should == 'iso-8859-1'
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe 'when used with open' do
|
63
|
+
before(:each) do
|
64
|
+
@headers = {'User-Agent'=>'foo'}
|
65
|
+
@curl_easy = mock('curl_easy')
|
66
|
+
Curl::Easy.should_receive(:new).and_return(@curl_easy)
|
67
|
+
@curl_easy.stub!(:headers).and_return(@headers)
|
68
|
+
@curl_easy.stub!(:follow_location=)
|
69
|
+
@curl_easy.stub!(:max_redirects=)
|
70
|
+
@curl_easy.stub!(:enable_cookies=)
|
71
|
+
@curl_easy.stub!(:connect_timeout=)
|
72
|
+
@curl_easy.stub!(:timeout=)
|
73
|
+
@curl_easy.stub!(:perform)
|
74
|
+
@curl_easy.stub!(:body_str).and_return('test')
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'shall permit to specify user-agent' do
|
78
|
+
@curl_easy.headers['User-Agent'].should_not == 'curl'
|
79
|
+
CurlAgent.open('http://www.example.com/', 'User-Agent'=>'curl')
|
80
|
+
@curl_easy.headers['User-Agent'].should == 'curl'
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'shall permit to override timeout' do
|
84
|
+
@curl_easy.should_receive(:'timeout=').once.with(10)
|
85
|
+
CurlAgent.open('http://www.example.com/', :timeout => 10)
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'shall use block when provided' do
|
89
|
+
CurlAgent.open('http://www.example.com/') {|f| f.read}.should == 'test'
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
describe 'when parsing parameters to open' do
|
95
|
+
it 'shall recognize wrong mode' do
|
96
|
+
CurlAgent.should_not_receive(:new)
|
97
|
+
lambda {CurlAgent.open('http://www.example.com/', 'w', 0600, :timeout=>10)}.should raise_error(ArgumentError)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: curb-openuri
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Roman Shterenzon
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-29 00:00:00 +02:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: curb
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.1.4
|
24
|
+
version:
|
25
|
+
description: open-uri drop-in replacement that uses curb
|
26
|
+
email: romanbsd@yahoo.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- LICENSE
|
33
|
+
- README
|
34
|
+
files:
|
35
|
+
- .gitignore
|
36
|
+
- LICENSE
|
37
|
+
- README
|
38
|
+
- Rakefile
|
39
|
+
- VERSION.yml
|
40
|
+
- curb-openuri.gemspec
|
41
|
+
- lib/curb_openuri.rb
|
42
|
+
- lib/curl_agent.rb
|
43
|
+
- spec/curb_openuri_spec.rb
|
44
|
+
- spec/curl_agent_spec.rb
|
45
|
+
- spec/spec_helper.rb
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: http://github.com/romanbsd/curb-openuri
|
48
|
+
licenses: []
|
49
|
+
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options:
|
52
|
+
- --charset=UTF-8
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: "0"
|
60
|
+
version:
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: "0"
|
66
|
+
version:
|
67
|
+
requirements: []
|
68
|
+
|
69
|
+
rubyforge_project:
|
70
|
+
rubygems_version: 1.3.5
|
71
|
+
signing_key:
|
72
|
+
specification_version: 3
|
73
|
+
summary: open-uri drop-in replacement that uses curb
|
74
|
+
test_files:
|
75
|
+
- spec/curb_openuri_spec.rb
|
76
|
+
- spec/curl_agent_spec.rb
|
77
|
+
- spec/spec_helper.rb
|