pioneer 0.0.7 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -28,4 +28,12 @@
28
28
  ## v0.0.7
29
29
 
30
30
  * Rescuing of Retry Exception is removed to Base class, so it will be triggered in context of main loop (it will executed with global sleep timeout)
31
- * Added `headers` callback support
31
+ * Added `headers` callback support
32
+
33
+ ## v0.0.8
34
+
35
+ * em-http-request options support added
36
+
37
+ ## v0.0.9
38
+
39
+ * pushing request_opt to init
@@ -11,7 +11,7 @@ module Pioneer
11
11
  class HttpSkipRequest < StandardError; end
12
12
 
13
13
  class Base
14
- attr_reader :name, :concurrency, :sleep, :log_level, :redirect
14
+ attr_reader :name, :concurrency, :sleep, :log_level, :redirect, :request_opts
15
15
 
16
16
  def initialize(opts = {})
17
17
  raise UndefinedLocations, "you should specify `locations` method in your `self.class`" unless self.methods.include? :locations
@@ -26,10 +26,13 @@ module Pioneer
26
26
  @header = opts[:header] || nil
27
27
  @redirects = opts[:redirects] || nil
28
28
  @headers = opts[:headers] #|| nil
29
+ @request_opts = opts[:request_opts] #|| nil
29
30
  end
30
31
 
31
32
  #
32
33
  # Main method: starting crawling through locations
34
+ # If we catch Pioneer::HttpRetryRequest then we are retrying request
35
+ # And if we catch Pioneer::HttpSkipRequest we just return nothing?
33
36
  #
34
37
  def start
35
38
  result = []
@@ -88,6 +91,20 @@ module Pioneer
88
91
  opts
89
92
  end
90
93
 
94
+ #
95
+ # EmHttpRequest options
96
+ #
97
+ def request_opts
98
+ opts = {}
99
+ opts = case @request_opts
100
+ when Proc
101
+ @request_opts.call
102
+ else
103
+ @request_opts
104
+ end if @request_opts
105
+ opts
106
+ end
107
+
91
108
  #
92
109
  # Generate random header for request
93
110
  #
@@ -1,12 +1,13 @@
1
1
  # encoding: utf-8
2
2
  module Pioneer
3
3
  class Request
4
- attr_reader :pioneer, :url, :result, :response, :error, :counter
4
+ attr_reader :pioneer, :url, :result, :response, :error, :counter, :request_opts
5
5
 
6
6
  def initialize(url, pioneer, counter=0)
7
7
  @pioneer = pioneer
8
8
  @url = parse_url(url)
9
9
  @counter = counter
10
+ @request_opts = @pioneer.request_opts
10
11
  end
11
12
 
12
13
  #
@@ -24,7 +25,7 @@ module Pioneer
24
25
  #
25
26
  def handle_request_error_or_return_result
26
27
  begin
27
- req = EM::HttpRequest.new(url).aget pioneer.http_opts
28
+ req = EM::HttpRequest.new(url, @request_opts).aget pioneer.http_opts
28
29
  if pioneer.headers
29
30
  req.headers{
30
31
  pioneer.headers.call(req)
@@ -1,3 +1,3 @@
1
1
  module Pioneer
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.9"
3
3
  end
@@ -2,4 +2,26 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe Pioneer::Base do
5
+ before do
6
+ end
7
+
8
+ it "should pass request_opts as a proc" do
9
+ request_opts = proc do
10
+ {
11
+ bind: {host: "192.168.1.1", port: '0'}
12
+ }
13
+ end
14
+ @pioneer = Pioneer::Crawler.new(request_opts: request_opts)
15
+ @pioneer.request_opts[:bind][:host].must_equal "192.168.1.1"
16
+ end
17
+
18
+ it "should pass request_opts as a Hash" do
19
+ request_opts = begin
20
+ {
21
+ bind: {host: "192.168.1.1", port: '0'}
22
+ }
23
+ end
24
+ @pioneer = Pioneer::Crawler.new(request_opts: request_opts)
25
+ @pioneer.request_opts[:bind][:host].must_equal "192.168.1.1"
26
+ end
5
27
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pioneer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.9
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-21 00:00:00.000000000Z
12
+ date: 2012-04-11 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yajl-ruby
16
- requirement: &77891380 !ruby/object:Gem::Requirement
16
+ requirement: &76605720 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *77891380
24
+ version_requirements: *76605720
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: nokogiri
27
- requirement: &77891160 !ruby/object:Gem::Requirement
27
+ requirement: &76605500 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *77891160
35
+ version_requirements: *76605500
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: em-synchrony
38
- requirement: &77890920 !ruby/object:Gem::Requirement
38
+ requirement: &76605290 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *77890920
46
+ version_requirements: *76605290
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: em-http-request
49
- requirement: &77890690 !ruby/object:Gem::Requirement
49
+ requirement: &76605010 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *77890690
57
+ version_requirements: *76605010
58
58
  description: Simple async HTTP crawler based on em-synchrony
59
59
  email:
60
60
  - pedro.yanoviches@gmail.com