romanbsd-curb-openuri 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.yml +4 -0
- data/lib/curb_openuri.rb +32 -0
- data/lib/curl_agent.rb +94 -0
- data/spec/curb_openuri_spec.rb +22 -0
- data/spec/curl_agent_spec.rb +84 -0
- data/spec/spec_helper.rb +9 -0
- metadata +68 -0
data/VERSION.yml
ADDED
data/lib/curb_openuri.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'curl_agent'
|
2
|
+
|
3
|
+
module Kernel
|
4
|
+
private
|
5
|
+
alias open_uri_original_open open # :nodoc:
|
6
|
+
|
7
|
+
# makes possible to open various resources including URIs.
|
8
|
+
# If the first argument respond to `open' method,
|
9
|
+
# the method is called with the rest arguments.
|
10
|
+
#
|
11
|
+
# If the first argument is a string which begins with xxx://,
|
12
|
+
# it is parsed by URI.parse. If the parsed object respond to `open' method,
|
13
|
+
# the method is called with the rest arguments.
|
14
|
+
#
|
15
|
+
# Otherwise original open is called.
|
16
|
+
#
|
17
|
+
# Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and
|
18
|
+
# URI::FTP#open,
|
19
|
+
# Kernel[#.]open can accepts such URIs and strings which begins with
|
20
|
+
# http://, https:// and ftp://.
|
21
|
+
# In these case, the opened file object is extended by OpenURI::Meta.
|
22
|
+
def open(name, *rest, &block) # :doc:
|
23
|
+
if name.respond_to?(:open)
|
24
|
+
name.open(*rest, &block)
|
25
|
+
elsif name.respond_to?(:to_s) && %r{\A(ftp|https?)://} =~ name
|
26
|
+
CurlAgent.open(name, *rest, &block)
|
27
|
+
else
|
28
|
+
open_uri_original_open(name, *rest, &block)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
module_function :open
|
32
|
+
end
|
data/lib/curl_agent.rb
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'stringio'
|
3
|
+
require 'curb'
|
4
|
+
|
5
|
+
class CurlAgent
|
6
|
+
def initialize(url)
|
7
|
+
@curl = Curl::Easy.new(url)
|
8
|
+
# Defaults
|
9
|
+
@curl.headers['User-Agent'] = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6'
|
10
|
+
@curl.follow_location = true
|
11
|
+
@curl.max_redirects = 2
|
12
|
+
@curl.enable_cookies = true
|
13
|
+
@curl.connect_timeout = 5
|
14
|
+
@curl.timeout = 30
|
15
|
+
@performed = false
|
16
|
+
end
|
17
|
+
|
18
|
+
# Do the actual fetch, after which it's possible to call body_str method
|
19
|
+
def perform!
|
20
|
+
@curl.perform
|
21
|
+
@performed = true
|
22
|
+
end
|
23
|
+
|
24
|
+
# Returns the charset of the page
|
25
|
+
def charset
|
26
|
+
perform! unless @performed
|
27
|
+
content_type = @curl.content_type || ''
|
28
|
+
charset = if content_type.match(/charset\s*=\s*([a-zA-Z0-9-]+)/ni)
|
29
|
+
$1
|
30
|
+
elsif ! body_str.nil? and (m = body_str.slice(0,1000).match(%r{<meta.*http-equiv\s*=\s*['"]?Content-Type['"]?.*?>}mi)) and
|
31
|
+
m[0].match(%r{content=['"]text/html.*?charset=(.*?)['"]}mi)
|
32
|
+
$1
|
33
|
+
else
|
34
|
+
''
|
35
|
+
end.downcase
|
36
|
+
end
|
37
|
+
|
38
|
+
# Proxies all calls to Curl::Easy instance
|
39
|
+
def respond_to?(symbol)
|
40
|
+
@curl.respond_to?(symbol)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Proxies all calls to Curl::Easy instance
|
44
|
+
def method_missing(symbol, *args)
|
45
|
+
@curl.send(symbol, *args)
|
46
|
+
end
|
47
|
+
|
48
|
+
# This method opens the URL and returns an IO object.
|
49
|
+
# If a block is provided, it's called with that object.
|
50
|
+
# You can override defaults and provide configuration directives
|
51
|
+
# to Curl::Easy with symbol hash keys, for example:
|
52
|
+
# open('http://www.example.com/', :timeout => 10)
|
53
|
+
# all the rest keys will be passed as headers, for example:
|
54
|
+
# open('http://www.example.com/', :timeout => 10, 'User-Agent'=>'curl')
|
55
|
+
def self.open(name, *rest, &block)
|
56
|
+
mode, perm, rest = scan_open_optional_arguments(*rest)
|
57
|
+
options = rest.shift if !rest.empty? && Hash === rest.first
|
58
|
+
raise ArgumentError.new("extra arguments") if !rest.empty?
|
59
|
+
|
60
|
+
unless mode == nil || mode == 'r' || mode == 'rb' || mode == File::RDONLY
|
61
|
+
raise ArgumentError.new("invalid access mode #{mode} (resource is read only.)")
|
62
|
+
end
|
63
|
+
|
64
|
+
agent = CurlAgent.new(name)
|
65
|
+
|
66
|
+
options ||= {}
|
67
|
+
options.each {|k, v|
|
68
|
+
# Strings will be passed as headers, as in original open-uri
|
69
|
+
next unless k.is_a? Symbol
|
70
|
+
agent.send("#{k}=".intern, v)
|
71
|
+
options.delete(k)
|
72
|
+
}
|
73
|
+
# All that's left should be considered headers
|
74
|
+
agent.headers.merge!(options)
|
75
|
+
|
76
|
+
agent.perform!
|
77
|
+
io = StringIO.new(agent.body_str)
|
78
|
+
if block
|
79
|
+
block.call(io)
|
80
|
+
else
|
81
|
+
io
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.scan_open_optional_arguments(*rest) # :nodoc:
|
86
|
+
if !rest.empty? && (String === rest.first || Integer === rest.first)
|
87
|
+
mode = rest.shift
|
88
|
+
if !rest.empty? && Integer === rest.first
|
89
|
+
perm = rest.shift
|
90
|
+
end
|
91
|
+
end
|
92
|
+
return mode, perm, rest
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe "CurbOpenuri" do
|
4
|
+
describe "shall override Kernel::open" do
|
5
|
+
['http','https','ftp'].each do |p|
|
6
|
+
it "shall use curl for #{p}" do
|
7
|
+
CurlAgent.should_receive(:open).and_return('')
|
8
|
+
open("#{p}://www.example.com/")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'shall not use curl for other protocols' do
|
13
|
+
CurlAgent.should_not_receive(:open)
|
14
|
+
lambda {open('file:///dev/null')}.should raise_error(Errno::ENOENT)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'shall not use curl for files' do
|
18
|
+
CurlAgent.should_not_receive(:open)
|
19
|
+
open('/dev/null') {|f| }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
|
3
|
+
describe "CurlAgent" do
|
4
|
+
|
5
|
+
describe 'new method' do
|
6
|
+
it 'shall permit to override user-agent' do
|
7
|
+
curl = CurlAgent.new('http://www.example.com/')
|
8
|
+
curl.headers['User-Agent'].should_not be_nil
|
9
|
+
curl.headers['User-Agent'] = 'curl'
|
10
|
+
curl.headers['User-Agent'].should == 'curl'
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe 'when used alone' do
|
15
|
+
before(:each) do
|
16
|
+
@mock = mock('curl_easy')
|
17
|
+
@headers = {'User-Agent' => 'foo'}
|
18
|
+
@mock.stub!(:headers).and_return(@headers)
|
19
|
+
@mock.stub!(:'follow_location=')
|
20
|
+
@mock.stub!(:'max_redirects=')
|
21
|
+
@mock.stub!(:'enable_cookies=')
|
22
|
+
@mock.stub!(:'connect_timeout=')
|
23
|
+
@mock.stub!(:'timeout=')
|
24
|
+
@mock.should_receive(:perform)
|
25
|
+
Curl::Easy.should_receive(:new).and_return(@mock)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'should recognize charset' do
|
29
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html;charset=utf-8')
|
30
|
+
curl = CurlAgent.new('http://www.example.com/')
|
31
|
+
curl.charset.should == 'utf-8'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should recognize upper case charset' do
|
35
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html;charset=Windows-1251')
|
36
|
+
curl = CurlAgent.new('http://www.example.com/')
|
37
|
+
curl.charset.should == 'windows-1251'
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should return empty str for empty charset' do
|
41
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html')
|
42
|
+
@mock.should_receive(:body_str).once
|
43
|
+
curl = CurlAgent.new('http://www.example.com/')
|
44
|
+
curl.charset.should == ''
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should attempt to find charset in html' do
|
48
|
+
@mock.stub!(:content_type).and_return('Content-Type: text/html')
|
49
|
+
@mock.stub!(:body_str).and_return(<<EOF)
|
50
|
+
<html>
|
51
|
+
<head>
|
52
|
+
<meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"/>
|
53
|
+
</head>
|
54
|
+
<body></body>
|
55
|
+
</html>
|
56
|
+
EOF
|
57
|
+
curl = CurlAgent.new('http://www.example.com/')
|
58
|
+
curl.charset.should == 'iso-8859-1'
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe 'when used with open' do
|
63
|
+
before(:each) do
|
64
|
+
@headers = {'User-Agent'=>'foo'}
|
65
|
+
@curl = mock('curl')
|
66
|
+
@curl.stub!(:headers).and_return(@headers)
|
67
|
+
@curl.stub!(:perform!)
|
68
|
+
@curl.stub!(:body_str).and_return('')
|
69
|
+
CurlAgent.should_receive(:new).and_return(@curl)
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'shall permit to specify user-agent' do
|
73
|
+
@curl.headers['User-Agent'].should_not == 'curl'
|
74
|
+
CurlAgent.open('http://www.example.com/', 'User-Agent'=>'curl')
|
75
|
+
@curl.headers['User-Agent'].should == 'curl'
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'shall permit to override timeout' do
|
79
|
+
@curl.should_receive(:'timeout=').once.with(10)
|
80
|
+
CurlAgent.open('http://www.example.com/', :timeout => 10)
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: romanbsd-curb-openuri
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Roman Shterenzon
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-02-18 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: curb
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.1.4
|
24
|
+
version:
|
25
|
+
description: open-uri drop-in replacement that uses curb
|
26
|
+
email: romanbsd@yahoo.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files: []
|
32
|
+
|
33
|
+
files:
|
34
|
+
- VERSION.yml
|
35
|
+
- lib/curb_openuri.rb
|
36
|
+
- lib/curl_agent.rb
|
37
|
+
- spec/curb_openuri_spec.rb
|
38
|
+
- spec/spec_helper.rb
|
39
|
+
- spec/curl_agent_spec.rb
|
40
|
+
has_rdoc: true
|
41
|
+
homepage: http://github.com/romanbsd/curb-openuri
|
42
|
+
post_install_message:
|
43
|
+
rdoc_options:
|
44
|
+
- --inline-source
|
45
|
+
- --charset=UTF-8
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: "0"
|
53
|
+
version:
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: "0"
|
59
|
+
version:
|
60
|
+
requirements: []
|
61
|
+
|
62
|
+
rubyforge_project:
|
63
|
+
rubygems_version: 1.2.0
|
64
|
+
signing_key:
|
65
|
+
specification_version: 2
|
66
|
+
summary: open-uri drop-in replacement that uses curb
|
67
|
+
test_files: []
|
68
|
+
|