pioneer 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -1
- data/lib/pioneer/base.rb +18 -1
- data/lib/pioneer/request.rb +3 -2
- data/lib/pioneer/version.rb +1 -1
- data/spec/pioneer/base_spec.rb +22 -0
- metadata +10 -10
data/CHANGELOG
CHANGED
@@ -28,4 +28,12 @@
|
|
28
28
|
## v0.0.7
|
29
29
|
|
30
30
|
* Rescuing of Retry Exception is removed to Base class, so it will be triggered in context of main loop (it will executed with global sleep timeout)
|
31
|
-
* Added `headers` callback support
|
31
|
+
* Added `headers` callback support
|
32
|
+
|
33
|
+
## v0.0.8
|
34
|
+
|
35
|
+
* em-http-request options support added
|
36
|
+
|
37
|
+
## v0.0.9
|
38
|
+
|
39
|
+
* pushing request_opt to init
|
data/lib/pioneer/base.rb
CHANGED
@@ -11,7 +11,7 @@ module Pioneer
|
|
11
11
|
class HttpSkipRequest < StandardError; end
|
12
12
|
|
13
13
|
class Base
|
14
|
-
attr_reader :name, :concurrency, :sleep, :log_level, :redirect
|
14
|
+
attr_reader :name, :concurrency, :sleep, :log_level, :redirect, :request_opts
|
15
15
|
|
16
16
|
def initialize(opts = {})
|
17
17
|
raise UndefinedLocations, "you should specify `locations` method in your `self.class`" unless self.methods.include? :locations
|
@@ -26,10 +26,13 @@ module Pioneer
|
|
26
26
|
@header = opts[:header] || nil
|
27
27
|
@redirects = opts[:redirects] || nil
|
28
28
|
@headers = opts[:headers] #|| nil
|
29
|
+
@request_opts = opts[:request_opts] #|| nil
|
29
30
|
end
|
30
31
|
|
31
32
|
#
|
32
33
|
# Main method: starting crawling through locations
|
34
|
+
# If we catch Pioneer::HttpRetryRequest then we are retrying request
|
35
|
+
# And if we catch Pioneer::HttpSkipRequest we just return nothing?
|
33
36
|
#
|
34
37
|
def start
|
35
38
|
result = []
|
@@ -88,6 +91,20 @@ module Pioneer
|
|
88
91
|
opts
|
89
92
|
end
|
90
93
|
|
94
|
+
#
|
95
|
+
# EmHttpRequest options
|
96
|
+
#
|
97
|
+
def request_opts
|
98
|
+
opts = {}
|
99
|
+
opts = case @request_opts
|
100
|
+
when Proc
|
101
|
+
@request_opts.call
|
102
|
+
else
|
103
|
+
@request_opts
|
104
|
+
end if @request_opts
|
105
|
+
opts
|
106
|
+
end
|
107
|
+
|
91
108
|
#
|
92
109
|
# Generate random header for request
|
93
110
|
#
|
data/lib/pioneer/request.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
module Pioneer
|
3
3
|
class Request
|
4
|
-
attr_reader :pioneer, :url, :result, :response, :error, :counter
|
4
|
+
attr_reader :pioneer, :url, :result, :response, :error, :counter, :request_opts
|
5
5
|
|
6
6
|
def initialize(url, pioneer, counter=0)
|
7
7
|
@pioneer = pioneer
|
8
8
|
@url = parse_url(url)
|
9
9
|
@counter = counter
|
10
|
+
@request_opts = @pioneer.request_opts
|
10
11
|
end
|
11
12
|
|
12
13
|
#
|
@@ -24,7 +25,7 @@ module Pioneer
|
|
24
25
|
#
|
25
26
|
def handle_request_error_or_return_result
|
26
27
|
begin
|
27
|
-
req = EM::HttpRequest.new(url).aget pioneer.http_opts
|
28
|
+
req = EM::HttpRequest.new(url, @request_opts).aget pioneer.http_opts
|
28
29
|
if pioneer.headers
|
29
30
|
req.headers{
|
30
31
|
pioneer.headers.call(req)
|
data/lib/pioneer/version.rb
CHANGED
data/spec/pioneer/base_spec.rb
CHANGED
@@ -2,4 +2,26 @@
|
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
4
|
describe Pioneer::Base do
|
5
|
+
before do
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should pass request_opts as a proc" do
|
9
|
+
request_opts = proc do
|
10
|
+
{
|
11
|
+
bind: {host: "192.168.1.1", port: '0'}
|
12
|
+
}
|
13
|
+
end
|
14
|
+
@pioneer = Pioneer::Crawler.new(request_opts: request_opts)
|
15
|
+
@pioneer.request_opts[:bind][:host].must_equal "192.168.1.1"
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should pass request_opts as a Hash" do
|
19
|
+
request_opts = begin
|
20
|
+
{
|
21
|
+
bind: {host: "192.168.1.1", port: '0'}
|
22
|
+
}
|
23
|
+
end
|
24
|
+
@pioneer = Pioneer::Crawler.new(request_opts: request_opts)
|
25
|
+
@pioneer.request_opts[:bind][:host].must_equal "192.168.1.1"
|
26
|
+
end
|
5
27
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pioneer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-04-11 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: yajl-ruby
|
16
|
-
requirement: &
|
16
|
+
requirement: &76605720 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *76605720
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: nokogiri
|
27
|
-
requirement: &
|
27
|
+
requirement: &76605500 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *76605500
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: em-synchrony
|
38
|
-
requirement: &
|
38
|
+
requirement: &76605290 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *76605290
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: em-http-request
|
49
|
-
requirement: &
|
49
|
+
requirement: &76605010 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *76605010
|
58
58
|
description: Simple async HTTP crawler based on em-synchrony
|
59
59
|
email:
|
60
60
|
- pedro.yanoviches@gmail.com
|