pioneer 0.0.7 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +9 -1
- data/lib/pioneer/base.rb +18 -1
- data/lib/pioneer/request.rb +3 -2
- data/lib/pioneer/version.rb +1 -1
- data/spec/pioneer/base_spec.rb +22 -0
- metadata +10 -10
data/CHANGELOG
CHANGED
@@ -28,4 +28,12 @@
|
|
28
28
|
## v0.0.7
|
29
29
|
|
30
30
|
* Rescuing of Retry Exception is removed to Base class, so it will be triggered in context of main loop (it will executed with global sleep timeout)
|
31
|
-
* Added `headers` callback support
|
31
|
+
* Added `headers` callback support
|
32
|
+
|
33
|
+
## v0.0.8
|
34
|
+
|
35
|
+
* em-http-request options support added
|
36
|
+
|
37
|
+
## v0.0.9
|
38
|
+
|
39
|
+
* pushing request_opt to init
|
data/lib/pioneer/base.rb
CHANGED
@@ -11,7 +11,7 @@ module Pioneer
|
|
11
11
|
class HttpSkipRequest < StandardError; end
|
12
12
|
|
13
13
|
class Base
|
14
|
-
attr_reader :name, :concurrency, :sleep, :log_level, :redirect
|
14
|
+
attr_reader :name, :concurrency, :sleep, :log_level, :redirect, :request_opts
|
15
15
|
|
16
16
|
def initialize(opts = {})
|
17
17
|
raise UndefinedLocations, "you should specify `locations` method in your `self.class`" unless self.methods.include? :locations
|
@@ -26,10 +26,13 @@ module Pioneer
|
|
26
26
|
@header = opts[:header] || nil
|
27
27
|
@redirects = opts[:redirects] || nil
|
28
28
|
@headers = opts[:headers] #|| nil
|
29
|
+
@request_opts = opts[:request_opts] #|| nil
|
29
30
|
end
|
30
31
|
|
31
32
|
#
|
32
33
|
# Main method: starting crawling through locations
|
34
|
+
# If we catch Pioneer::HttpRetryRequest then we are retrying request
|
35
|
+
# And if we catch Pioneer::HttpSkipRequest we just return nothing?
|
33
36
|
#
|
34
37
|
def start
|
35
38
|
result = []
|
@@ -88,6 +91,20 @@ module Pioneer
|
|
88
91
|
opts
|
89
92
|
end
|
90
93
|
|
94
|
+
#
|
95
|
+
# EmHttpRequest options
|
96
|
+
#
|
97
|
+
def request_opts
|
98
|
+
opts = {}
|
99
|
+
opts = case @request_opts
|
100
|
+
when Proc
|
101
|
+
@request_opts.call
|
102
|
+
else
|
103
|
+
@request_opts
|
104
|
+
end if @request_opts
|
105
|
+
opts
|
106
|
+
end
|
107
|
+
|
91
108
|
#
|
92
109
|
# Generate random header for request
|
93
110
|
#
|
data/lib/pioneer/request.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
module Pioneer
|
3
3
|
class Request
|
4
|
-
attr_reader :pioneer, :url, :result, :response, :error, :counter
|
4
|
+
attr_reader :pioneer, :url, :result, :response, :error, :counter, :request_opts
|
5
5
|
|
6
6
|
def initialize(url, pioneer, counter=0)
|
7
7
|
@pioneer = pioneer
|
8
8
|
@url = parse_url(url)
|
9
9
|
@counter = counter
|
10
|
+
@request_opts = @pioneer.request_opts
|
10
11
|
end
|
11
12
|
|
12
13
|
#
|
@@ -24,7 +25,7 @@ module Pioneer
|
|
24
25
|
#
|
25
26
|
def handle_request_error_or_return_result
|
26
27
|
begin
|
27
|
-
req = EM::HttpRequest.new(url).aget pioneer.http_opts
|
28
|
+
req = EM::HttpRequest.new(url, @request_opts).aget pioneer.http_opts
|
28
29
|
if pioneer.headers
|
29
30
|
req.headers{
|
30
31
|
pioneer.headers.call(req)
|
data/lib/pioneer/version.rb
CHANGED
data/spec/pioneer/base_spec.rb
CHANGED
@@ -2,4 +2,26 @@
|
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
4
|
describe Pioneer::Base do
|
5
|
+
before do
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should pass request_opts as a proc" do
|
9
|
+
request_opts = proc do
|
10
|
+
{
|
11
|
+
bind: {host: "192.168.1.1", port: '0'}
|
12
|
+
}
|
13
|
+
end
|
14
|
+
@pioneer = Pioneer::Crawler.new(request_opts: request_opts)
|
15
|
+
@pioneer.request_opts[:bind][:host].must_equal "192.168.1.1"
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should pass request_opts as a Hash" do
|
19
|
+
request_opts = begin
|
20
|
+
{
|
21
|
+
bind: {host: "192.168.1.1", port: '0'}
|
22
|
+
}
|
23
|
+
end
|
24
|
+
@pioneer = Pioneer::Crawler.new(request_opts: request_opts)
|
25
|
+
@pioneer.request_opts[:bind][:host].must_equal "192.168.1.1"
|
26
|
+
end
|
5
27
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pioneer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-04-11 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: yajl-ruby
|
16
|
-
requirement: &
|
16
|
+
requirement: &76605720 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *76605720
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: nokogiri
|
27
|
-
requirement: &
|
27
|
+
requirement: &76605500 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *76605500
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: em-synchrony
|
38
|
-
requirement: &
|
38
|
+
requirement: &76605290 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *76605290
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: em-http-request
|
49
|
-
requirement: &
|
49
|
+
requirement: &76605010 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *76605010
|
58
58
|
description: Simple async HTTP crawler based on em-synchrony
|
59
59
|
email:
|
60
60
|
- pedro.yanoviches@gmail.com
|