changa-curb-openuri 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +4 -0
- data/lib/curb_openuri.rb +32 -0
- data/lib/curl_agent.rb +96 -0
- data/spec/curb_openuri_spec.rb +30 -0
- data/spec/curl_agent_spec.rb +100 -0
- data/spec/spec_helper.rb +9 -0
- metadata +70 -0
data/VERSION.yml
ADDED
data/lib/curb_openuri.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'curl_agent'
|
2
|
+
|
3
|
+
module Kernel
|
4
|
+
private
|
5
|
+
alias curb_open_uri_original_open open # :nodoc:
|
6
|
+
|
7
|
+
# makes possible to open various resources including URIs.
|
8
|
+
# If the first argument respond to `open' method,
|
9
|
+
# the method is called with the rest arguments.
|
10
|
+
#
|
11
|
+
# If the first argument is a string which begins with xxx://,
|
12
|
+
# it is parsed by URI.parse. If the parsed object respond to `open' method,
|
13
|
+
# the method is called with the rest arguments.
|
14
|
+
#
|
15
|
+
# Otherwise original open is called.
|
16
|
+
#
|
17
|
+
# Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
|
18
|
+
# URI::FTP#open,
|
19
|
+
# Kernel[#.]open can accepts such URIs and strings which begins with
|
20
|
+
# http://, https:// and ftp://.
|
21
|
+
# In these case, the opened file object is extended by OpenURI::Meta.
|
22
|
+
def open(name, *rest, &block) # :doc:
|
23
|
+
if name.respond_to?(:open)
|
24
|
+
name.open(*rest, &block)
|
25
|
+
elsif name.respond_to?(:to_s) && %r{\A(ftp|https?)://} =~ name
|
26
|
+
CurlAgent.open(name, *rest, &block)
|
27
|
+
else
|
28
|
+
curb_open_uri_original_open(name, *rest, &block)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
module_function :open
|
32
|
+
end
|
data/lib/curl_agent.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'stringio'
|
3
|
+
require 'curb'
|
4
|
+
|
5
|
+
class CurlAgent
|
6
|
+
# See CurlAgent::open for explanation about options
|
7
|
+
def initialize(url, options = {})
|
8
|
+
@curl = Curl::Easy.new(url)
|
9
|
+
# Defaults
|
10
|
+
@curl.headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6'
|
11
|
+
@curl.follow_location = true
|
12
|
+
@curl.max_redirects = 2
|
13
|
+
@curl.enable_cookies = true
|
14
|
+
@curl.connect_timeout = 5
|
15
|
+
@curl.timeout = 30
|
16
|
+
@performed = false
|
17
|
+
|
18
|
+
options ||= {}
|
19
|
+
options.each {|k, v|
|
20
|
+
# Strings will be passed as headers, as in original open-uri
|
21
|
+
next unless k.is_a? Symbol
|
22
|
+
@curl.send("#{k}=".intern, v)
|
23
|
+
options.delete(k)
|
24
|
+
}
|
25
|
+
|
26
|
+
# All that's left should be considered headers
|
27
|
+
@curl.headers.merge!(options)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Do the actual fetch, after which it's possible to call body_str method
|
31
|
+
def perform!
|
32
|
+
@curl.perform
|
33
|
+
@performed = true
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns the charset of the page
|
37
|
+
def charset
|
38
|
+
perform! unless @performed
|
39
|
+
content_type = @curl.content_type || ''
|
40
|
+
charset = if content_type.match(/charset\s*=\s*([a-zA-Z0-9-]+)/ni)
|
41
|
+
$1
|
42
|
+
elsif ! body_str.nil? and (m = body_str.slice(0,1000).match(%r{<meta.*http-equiv\s*=\s*['"]?Content-Type['"]?.*?>}mi)) and
|
43
|
+
m[0].match(%r{content=['"]text/html.*?charset=(.*?)['"]}mi)
|
44
|
+
$1
|
45
|
+
else
|
46
|
+
''
|
47
|
+
end.downcase
|
48
|
+
end
|
49
|
+
|
50
|
+
# Proxies all calls to Curl::Easy instance
|
51
|
+
def respond_to?(symbol)
|
52
|
+
@curl.respond_to?(symbol)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Proxies all calls to Curl::Easy instance
|
56
|
+
def method_missing(symbol, *args)
|
57
|
+
@curl.send(symbol, *args)
|
58
|
+
end
|
59
|
+
|
60
|
+
# This method opens the URL and returns an IO object.
|
61
|
+
# If a block is provided, it's called with that object.
|
62
|
+
# You can override defaults and provide configuration directives
|
63
|
+
# to Curl::Easy with symbol hash keys, for example:
|
64
|
+
# open('http://www.example.com/', :timeout => 10)
|
65
|
+
# all the rest keys will be passed as headers, for example:
|
66
|
+
# open('http://www.example.com/', :timeout => 10, 'User-Agent'=>'curl')
|
67
|
+
def self.open(name, *rest, &block)
|
68
|
+
mode, perm, rest = scan_open_optional_arguments(*rest)
|
69
|
+
options = rest.shift if !rest.empty? && Hash === rest.first
|
70
|
+
raise ArgumentError.new("extra arguments") if !rest.empty?
|
71
|
+
|
72
|
+
unless mode == nil || mode == 'r' || mode == 'rb' || mode == File::RDONLY
|
73
|
+
raise ArgumentError.new("invalid access mode #{mode} (resource is read only.)")
|
74
|
+
end
|
75
|
+
|
76
|
+
agent = CurlAgent.new(name, options)
|
77
|
+
|
78
|
+
agent.perform!
|
79
|
+
io = StringIO.new(agent.body_str)
|
80
|
+
if block
|
81
|
+
block.call(io)
|
82
|
+
else
|
83
|
+
io
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.scan_open_optional_arguments(*rest) # :nodoc:
|
88
|
+
if !rest.empty? && (String === rest.first || Integer === rest.first)
|
89
|
+
mode = rest.shift
|
90
|
+
if !rest.empty? && Integer === rest.first
|
91
|
+
perm = rest.shift
|
92
|
+
end
|
93
|
+
end
|
94
|
+
return mode, perm, rest
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe "CurbOpenuri" do
|
4
|
+
describe "shall override Kernel::open" do
|
5
|
+
['http','https','ftp'].each do |p|
|
6
|
+
it "shall use curl for #{p}" do
|
7
|
+
CurlAgent.should_receive(:open).and_return('')
|
8
|
+
open("#{p}://www.example.com/")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'shall not use curl for other protocols' do
|
13
|
+
CurlAgent.should_not_receive(:open)
|
14
|
+
lambda {open('file:///dev/null')}.should raise_error(Errno::ENOENT)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'shall not use curl for files' do
|
18
|
+
CurlAgent.should_not_receive(:open)
|
19
|
+
open('/dev/null') {|f| }
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'shall still call open where the object supports it' do
|
23
|
+
mock = mock('openable')
|
24
|
+
mock.stub!(:respond_to?).with(:open).and_return(true)
|
25
|
+
mock.should_receive(:open)
|
26
|
+
CurlAgent.should_not_receive(:open)
|
27
|
+
open(mock)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe "CurlAgent" do
|
4
|
+
|
5
|
+
describe 'new method' do
|
6
|
+
it 'shall permit to override user-agent later' do
|
7
|
+
curl = CurlAgent.new('http://www.example.com/')
|
8
|
+
curl.headers['User-Agent'].should_not be_nil
|
9
|
+
curl.headers['User-Agent'] = 'curl'
|
10
|
+
curl.headers['User-Agent'].should == 'curl'
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe 'when used alone' do
|
15
|
+
before(:each) do
|
16
|
+
@mock = mock('curl_easy')
|
17
|
+
@headers = {'User-Agent' => 'foo'}
|
18
|
+
@mock.stub!(:headers).and_return(@headers)
|
19
|
+
@mock.stub!(:'follow_location=')
|
20
|
+
@mock.stub!(:'max_redirects=')
|
21
|
+
@mock.stub!(:'enable_cookies=')
|
22
|
+
@mock.stub!(:'connect_timeout=')
|
23
|
+
@mock.stub!(:'timeout=')
|
24
|
+
@mock.should_receive(:perform)
|
25
|
+
Curl::Easy.should_receive(:new).and_return(@mock)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'should recognize charset' do
|
29
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html;charset=utf-8')
|
30
|
+
curl = CurlAgent.new('http://www.example.com/')
|
31
|
+
curl.charset.should == 'utf-8'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should recognize upper case charset' do
|
35
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html;charset=Windows-1251')
|
36
|
+
curl = CurlAgent.new('http://www.example.com/')
|
37
|
+
curl.charset.should == 'windows-1251'
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should return empty str for empty charset' do
|
41
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html')
|
42
|
+
@mock.should_receive(:body_str).once
|
43
|
+
curl = CurlAgent.new('http://www.example.com/')
|
44
|
+
curl.charset.should == ''
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should attempt to find charset in html' do
|
48
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html')
|
49
|
+
@mock.stub!(:body_str).and_return(<<EOF)
|
50
|
+
<html>
|
51
|
+
<head>
|
52
|
+
<meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"/>
|
53
|
+
</head>
|
54
|
+
<body></body>
|
55
|
+
</html>
|
56
|
+
EOF
|
57
|
+
curl = CurlAgent.new('http://www.example.com/')
|
58
|
+
curl.charset.should == 'iso-8859-1'
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe 'when used with open' do
|
63
|
+
before(:each) do
|
64
|
+
@headers = {'User-Agent'=>'foo'}
|
65
|
+
@curl_easy = mock('curl_easy')
|
66
|
+
Curl::Easy.should_receive(:new).and_return(@curl_easy)
|
67
|
+
@curl_easy.stub!(:headers).and_return(@headers)
|
68
|
+
@curl_easy.stub!(:follow_location=)
|
69
|
+
@curl_easy.stub!(:max_redirects=)
|
70
|
+
@curl_easy.stub!(:enable_cookies=)
|
71
|
+
@curl_easy.stub!(:connect_timeout=)
|
72
|
+
@curl_easy.stub!(:timeout=)
|
73
|
+
@curl_easy.stub!(:perform)
|
74
|
+
@curl_easy.stub!(:body_str).and_return('test')
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'shall permit to specify user-agent' do
|
78
|
+
@curl_easy.headers['User-Agent'].should_not == 'curl'
|
79
|
+
CurlAgent.open('http://www.example.com/', 'User-Agent'=>'curl')
|
80
|
+
@curl_easy.headers['User-Agent'].should == 'curl'
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'shall permit to override timeout' do
|
84
|
+
@curl_easy.should_receive(:'timeout=').once.with(10)
|
85
|
+
CurlAgent.open('http://www.example.com/', :timeout => 10)
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'shall use block when provided' do
|
89
|
+
CurlAgent.open('http://www.example.com/') {|f| f.read}.should == 'test'
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
describe 'when parsing parameters to open' do
|
95
|
+
it 'shall recognize wrong mode' do
|
96
|
+
CurlAgent.should_not_receive(:new)
|
97
|
+
lambda {CurlAgent.open('http://www.example.com/', 'w', 0600, :timeout=>10)}.should raise_error(ArgumentError)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: changa-curb-openuri
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Roman Shterenzon
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-02-18 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: curb
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.1.4
|
24
|
+
version:
|
25
|
+
description: open-uri drop-in replacement that uses curb
|
26
|
+
email:
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files: []
|
32
|
+
|
33
|
+
files:
|
34
|
+
- VERSION.yml
|
35
|
+
- lib/curb_openuri.rb
|
36
|
+
- lib/curl_agent.rb
|
37
|
+
- spec/curb_openuri_spec.rb
|
38
|
+
- spec/spec_helper.rb
|
39
|
+
- spec/curl_agent_spec.rb
|
40
|
+
has_rdoc: true
|
41
|
+
homepage: http://github.com/changa/curb-openuri
|
42
|
+
licenses: []
|
43
|
+
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options:
|
46
|
+
- --inline-source
|
47
|
+
- --charset=UTF-8
|
48
|
+
require_paths:
|
49
|
+
- lib
|
50
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: "0"
|
61
|
+
version:
|
62
|
+
requirements: []
|
63
|
+
|
64
|
+
rubyforge_project:
|
65
|
+
rubygems_version: 1.3.5
|
66
|
+
signing_key:
|
67
|
+
specification_version: 2
|
68
|
+
summary: open-uri drop-in replacement that uses curb
|
69
|
+
test_files: []
|
70
|
+
|