curb-openuri 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/LICENSE +20 -0
- data/README +11 -0
- data/Rakefile +39 -0
- data/VERSION.yml +4 -0
- data/curb-openuri.gemspec +56 -0
- data/lib/curb_openuri.rb +32 -0
- data/lib/curl_agent.rb +96 -0
- data/spec/curb_openuri_spec.rb +30 -0
- data/spec/curl_agent_spec.rb +100 -0
- data/spec/spec_helper.rb +9 -0
- metadata +77 -0
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 Roman Shterenzon
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
curb-openuri
|
2
|
+
============
|
3
|
+
|
4
|
+
This gem is a drop-in replacement for the stock 'open-uri' library.
|
5
|
+
It overloads the Kernel#open and uses curb (ruby binding for libcurl)
|
6
|
+
to do actual fetching of pages.
|
7
|
+
|
8
|
+
COPYRIGHT
|
9
|
+
=========
|
10
|
+
|
11
|
+
Copyright (c) 2008 Roman Shterenzon. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |s|
|
6
|
+
s.name = "curb-openuri"
|
7
|
+
s.summary = %Q{open-uri drop-in replacement that uses curb}
|
8
|
+
s.email = 'romanbsd@yahoo.com'
|
9
|
+
s.homepage = "http://github.com/romanbsd/curb-openuri"
|
10
|
+
s.description = s.summary
|
11
|
+
s.authors = ["Roman Shterenzon"]
|
12
|
+
s.add_dependency('curb', '>=0.1.4')
|
13
|
+
end
|
14
|
+
rescue LoadError
|
15
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
16
|
+
end
|
17
|
+
|
18
|
+
require 'rake/rdoctask'
|
19
|
+
Rake::RDocTask.new do |rdoc|
|
20
|
+
rdoc.rdoc_dir = 'rdoc'
|
21
|
+
rdoc.title = 'curb-openuri'
|
22
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
23
|
+
rdoc.rdoc_files.include('README*')
|
24
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
25
|
+
end
|
26
|
+
|
27
|
+
require 'spec/rake/spectask'
|
28
|
+
Spec::Rake::SpecTask.new(:spec) do |t|
|
29
|
+
t.libs << 'lib' << 'spec'
|
30
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
31
|
+
end
|
32
|
+
|
33
|
+
Spec::Rake::SpecTask.new(:rcov) do |t|
|
34
|
+
t.libs << 'lib' << 'spec'
|
35
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
36
|
+
t.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
task :default => :spec
|
data/VERSION.yml
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{curb-openuri}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Roman Shterenzon"]
|
12
|
+
s.date = %q{2009-11-29}
|
13
|
+
s.description = %q{open-uri drop-in replacement that uses curb}
|
14
|
+
s.email = %q{romanbsd@yahoo.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".gitignore",
|
21
|
+
"LICENSE",
|
22
|
+
"README",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION.yml",
|
25
|
+
"curb-openuri.gemspec",
|
26
|
+
"lib/curb_openuri.rb",
|
27
|
+
"lib/curl_agent.rb",
|
28
|
+
"spec/curb_openuri_spec.rb",
|
29
|
+
"spec/curl_agent_spec.rb",
|
30
|
+
"spec/spec_helper.rb"
|
31
|
+
]
|
32
|
+
s.homepage = %q{http://github.com/romanbsd/curb-openuri}
|
33
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
34
|
+
s.require_paths = ["lib"]
|
35
|
+
s.rubygems_version = %q{1.3.5}
|
36
|
+
s.summary = %q{open-uri drop-in replacement that uses curb}
|
37
|
+
s.test_files = [
|
38
|
+
"spec/curb_openuri_spec.rb",
|
39
|
+
"spec/curl_agent_spec.rb",
|
40
|
+
"spec/spec_helper.rb"
|
41
|
+
]
|
42
|
+
|
43
|
+
if s.respond_to? :specification_version then
|
44
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
45
|
+
s.specification_version = 3
|
46
|
+
|
47
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
48
|
+
s.add_runtime_dependency(%q<curb>, [">= 0.1.4"])
|
49
|
+
else
|
50
|
+
s.add_dependency(%q<curb>, [">= 0.1.4"])
|
51
|
+
end
|
52
|
+
else
|
53
|
+
s.add_dependency(%q<curb>, [">= 0.1.4"])
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
data/lib/curb_openuri.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'curl_agent'
|
2
|
+
|
3
|
+
module Kernel
|
4
|
+
private
|
5
|
+
alias open_uri_original_open open # :nodoc:
|
6
|
+
|
7
|
+
# makes possible to open various resources including URIs.
|
8
|
+
# If the first argument respond to `open' method,
|
9
|
+
# the method is called with the rest arguments.
|
10
|
+
#
|
11
|
+
# If the first argument is a string which begins with xxx://,
|
12
|
+
# it is parsed by URI.parse. If the parsed object respond to `open' method,
|
13
|
+
# the method is called with the rest arguments.
|
14
|
+
#
|
15
|
+
# Otherwise original open is called.
|
16
|
+
#
|
17
|
+
# Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
|
18
|
+
# URI::FTP#open,
|
19
|
+
# Kernel[#.]open can accepts such URIs and strings which begins with
|
20
|
+
# http://, https:// and ftp://.
|
21
|
+
# In these case, the opened file object is extended by OpenURI::Meta.
|
22
|
+
def open(name, *rest, &block) # :doc:
|
23
|
+
if name.respond_to?(:open)
|
24
|
+
name.open(*rest, &block)
|
25
|
+
elsif name.respond_to?(:to_s) && %r{\A(ftp|https?)://} =~ name
|
26
|
+
CurlAgent.open(name, *rest, &block)
|
27
|
+
else
|
28
|
+
open_uri_original_open(name, *rest, &block)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
module_function :open
|
32
|
+
end
|
data/lib/curl_agent.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'stringio'
|
3
|
+
require 'curb'
|
4
|
+
|
5
|
+
class CurlAgent
|
6
|
+
# See CurlAgent::open for explanation about options
|
7
|
+
def initialize(url, options = {})
|
8
|
+
@curl = Curl::Easy.new(url)
|
9
|
+
# Defaults
|
10
|
+
@curl.headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6'
|
11
|
+
@curl.follow_location = true
|
12
|
+
@curl.max_redirects = 2
|
13
|
+
@curl.enable_cookies = true
|
14
|
+
@curl.connect_timeout = 5
|
15
|
+
@curl.timeout = 30
|
16
|
+
@performed = false
|
17
|
+
|
18
|
+
options ||= {}
|
19
|
+
options.each {|k, v|
|
20
|
+
# Strings will be passed as headers, as in original open-uri
|
21
|
+
next unless k.is_a? Symbol
|
22
|
+
@curl.send("#{k}=".intern, v)
|
23
|
+
options.delete(k)
|
24
|
+
}
|
25
|
+
|
26
|
+
# All that's left should be considered headers
|
27
|
+
@curl.headers.merge!(options)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Do the actual fetch, after which it's possible to call body_str method
|
31
|
+
def perform!
|
32
|
+
@curl.perform
|
33
|
+
@performed = true
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns the charset of the page
|
37
|
+
def charset
|
38
|
+
perform! unless @performed
|
39
|
+
content_type = @curl.content_type || ''
|
40
|
+
charset = if content_type.match(/charset\s*=\s*([a-zA-Z0-9-]+)/ni)
|
41
|
+
$1
|
42
|
+
elsif ! body_str.nil? and (m = body_str.slice(0,1000).match(%r{<meta.*http-equiv\s*=\s*['"]?Content-Type['"]?.*?>}mi)) and
|
43
|
+
m[0].match(%r{content=['"]text/html.*?charset=(.*?)['"]}mi)
|
44
|
+
$1
|
45
|
+
else
|
46
|
+
''
|
47
|
+
end.downcase
|
48
|
+
end
|
49
|
+
|
50
|
+
# Proxies all calls to Curl::Easy instance
|
51
|
+
def respond_to?(symbol)
|
52
|
+
@curl.respond_to?(symbol)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Proxies all calls to Curl::Easy instance
|
56
|
+
def method_missing(symbol, *args)
|
57
|
+
@curl.send(symbol, *args)
|
58
|
+
end
|
59
|
+
|
60
|
+
# This method opens the URL and returns an IO object.
|
61
|
+
# If a block is provided, it's called with that object.
|
62
|
+
# You can override defaults and provide configuration directives
|
63
|
+
# to Curl::Easy with symbol hash keys, for example:
|
64
|
+
# open('http://www.example.com/', :timeout => 10)
|
65
|
+
# all the rest keys will be passed as headers, for example:
|
66
|
+
# open('http://www.example.com/', :timeout => 10, 'User-Agent'=>'curl')
|
67
|
+
def self.open(name, *rest, &block)
|
68
|
+
mode, perm, rest = scan_open_optional_arguments(*rest)
|
69
|
+
options = rest.shift if !rest.empty? && Hash === rest.first
|
70
|
+
raise ArgumentError.new("extra arguments") if !rest.empty?
|
71
|
+
|
72
|
+
unless mode == nil || mode == 'r' || mode == 'rb' || mode == File::RDONLY
|
73
|
+
raise ArgumentError.new("invalid access mode #{mode} (resource is read only.)")
|
74
|
+
end
|
75
|
+
|
76
|
+
agent = CurlAgent.new(name, options)
|
77
|
+
|
78
|
+
agent.perform!
|
79
|
+
io = StringIO.new(agent.body_str)
|
80
|
+
if block
|
81
|
+
block.call(io)
|
82
|
+
else
|
83
|
+
io
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.scan_open_optional_arguments(*rest) # :nodoc:
|
88
|
+
if !rest.empty? && (String === rest.first || Integer === rest.first)
|
89
|
+
mode = rest.shift
|
90
|
+
if !rest.empty? && Integer === rest.first
|
91
|
+
perm = rest.shift
|
92
|
+
end
|
93
|
+
end
|
94
|
+
return mode, perm, rest
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe "CurbOpenuri" do
|
4
|
+
describe "shall override Kernel::open" do
|
5
|
+
['http','https','ftp'].each do |p|
|
6
|
+
it "shall use curl for #{p}" do
|
7
|
+
CurlAgent.should_receive(:open).and_return('')
|
8
|
+
open("#{p}://www.example.com/")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'shall not use curl for other protocols' do
|
13
|
+
CurlAgent.should_not_receive(:open)
|
14
|
+
lambda {open('file:///dev/null')}.should raise_error(Errno::ENOENT)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'shall not use curl for files' do
|
18
|
+
CurlAgent.should_not_receive(:open)
|
19
|
+
open('/dev/null') {|f| }
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'shall still call open where the object supports it' do
|
23
|
+
mock = mock('openable')
|
24
|
+
mock.stub!(:respond_to?).with(:open).and_return(true)
|
25
|
+
mock.should_receive(:open)
|
26
|
+
CurlAgent.should_not_receive(:open)
|
27
|
+
open(mock)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe "CurlAgent" do
|
4
|
+
|
5
|
+
describe 'new method' do
|
6
|
+
it 'shall permit to override user-agent later' do
|
7
|
+
curl = CurlAgent.new('http://www.example.com/')
|
8
|
+
curl.headers['User-Agent'].should_not be_nil
|
9
|
+
curl.headers['User-Agent'] = 'curl'
|
10
|
+
curl.headers['User-Agent'].should == 'curl'
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe 'when used alone' do
|
15
|
+
before(:each) do
|
16
|
+
@mock = mock('curl_easy')
|
17
|
+
@headers = {'User-Agent' => 'foo'}
|
18
|
+
@mock.stub!(:headers).and_return(@headers)
|
19
|
+
@mock.stub!(:'follow_location=')
|
20
|
+
@mock.stub!(:'max_redirects=')
|
21
|
+
@mock.stub!(:'enable_cookies=')
|
22
|
+
@mock.stub!(:'connect_timeout=')
|
23
|
+
@mock.stub!(:'timeout=')
|
24
|
+
@mock.should_receive(:perform)
|
25
|
+
Curl::Easy.should_receive(:new).and_return(@mock)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'should recognize charset' do
|
29
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html;charset=utf-8')
|
30
|
+
curl = CurlAgent.new('http://www.example.com/')
|
31
|
+
curl.charset.should == 'utf-8'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should recognize upper case charset' do
|
35
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html;charset=Windows-1251')
|
36
|
+
curl = CurlAgent.new('http://www.example.com/')
|
37
|
+
curl.charset.should == 'windows-1251'
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should return empty str for empty charset' do
|
41
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html')
|
42
|
+
@mock.should_receive(:body_str).once
|
43
|
+
curl = CurlAgent.new('http://www.example.com/')
|
44
|
+
curl.charset.should == ''
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should attempt to find charset in html' do
|
48
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html')
|
49
|
+
@mock.stub!(:body_str).and_return(<<EOF)
|
50
|
+
<html>
|
51
|
+
<head>
|
52
|
+
<meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"/>
|
53
|
+
</head>
|
54
|
+
<body></body>
|
55
|
+
</html>
|
56
|
+
EOF
|
57
|
+
curl = CurlAgent.new('http://www.example.com/')
|
58
|
+
curl.charset.should == 'iso-8859-1'
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe 'when used with open' do
|
63
|
+
before(:each) do
|
64
|
+
@headers = {'User-Agent'=>'foo'}
|
65
|
+
@curl_easy = mock('curl_easy')
|
66
|
+
Curl::Easy.should_receive(:new).and_return(@curl_easy)
|
67
|
+
@curl_easy.stub!(:headers).and_return(@headers)
|
68
|
+
@curl_easy.stub!(:follow_location=)
|
69
|
+
@curl_easy.stub!(:max_redirects=)
|
70
|
+
@curl_easy.stub!(:enable_cookies=)
|
71
|
+
@curl_easy.stub!(:connect_timeout=)
|
72
|
+
@curl_easy.stub!(:timeout=)
|
73
|
+
@curl_easy.stub!(:perform)
|
74
|
+
@curl_easy.stub!(:body_str).and_return('test')
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'shall permit to specify user-agent' do
|
78
|
+
@curl_easy.headers['User-Agent'].should_not == 'curl'
|
79
|
+
CurlAgent.open('http://www.example.com/', 'User-Agent'=>'curl')
|
80
|
+
@curl_easy.headers['User-Agent'].should == 'curl'
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'shall permit to override timeout' do
|
84
|
+
@curl_easy.should_receive(:'timeout=').once.with(10)
|
85
|
+
CurlAgent.open('http://www.example.com/', :timeout => 10)
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'shall use block when provided' do
|
89
|
+
CurlAgent.open('http://www.example.com/') {|f| f.read}.should == 'test'
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
describe 'when parsing parameters to open' do
|
95
|
+
it 'shall recognize wrong mode' do
|
96
|
+
CurlAgent.should_not_receive(:new)
|
97
|
+
lambda {CurlAgent.open('http://www.example.com/', 'w', 0600, :timeout=>10)}.should raise_error(ArgumentError)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: curb-openuri
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Roman Shterenzon
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-29 00:00:00 +02:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: curb
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.1.4
|
24
|
+
version:
|
25
|
+
description: open-uri drop-in replacement that uses curb
|
26
|
+
email: romanbsd@yahoo.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- LICENSE
|
33
|
+
- README
|
34
|
+
files:
|
35
|
+
- .gitignore
|
36
|
+
- LICENSE
|
37
|
+
- README
|
38
|
+
- Rakefile
|
39
|
+
- VERSION.yml
|
40
|
+
- curb-openuri.gemspec
|
41
|
+
- lib/curb_openuri.rb
|
42
|
+
- lib/curl_agent.rb
|
43
|
+
- spec/curb_openuri_spec.rb
|
44
|
+
- spec/curl_agent_spec.rb
|
45
|
+
- spec/spec_helper.rb
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: http://github.com/romanbsd/curb-openuri
|
48
|
+
licenses: []
|
49
|
+
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options:
|
52
|
+
- --charset=UTF-8
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: "0"
|
60
|
+
version:
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: "0"
|
66
|
+
version:
|
67
|
+
requirements: []
|
68
|
+
|
69
|
+
rubyforge_project:
|
70
|
+
rubygems_version: 1.3.5
|
71
|
+
signing_key:
|
72
|
+
specification_version: 3
|
73
|
+
summary: open-uri drop-in replacement that uses curb
|
74
|
+
test_files:
|
75
|
+
- spec/curb_openuri_spec.rb
|
76
|
+
- spec/curl_agent_spec.rb
|
77
|
+
- spec/spec_helper.rb
|