pioneer 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -28,4 +28,12 @@
28
28
  ## v0.0.7
29
29
 
30
30
  * Rescuing of Retry Exception is removed to Base class, so it will be triggered in context of main loop (it will executed with global sleep timeout)
31
- * Added `headers` callback support
31
+ * Added `headers` callback support
32
+
33
+ ## v0.0.8
34
+
35
+ * em-http-request options support added
36
+
37
+ ## v0.0.9
38
+
39
+ * pushing request_opt to init
@@ -11,7 +11,7 @@ module Pioneer
11
11
  class HttpSkipRequest < StandardError; end
12
12
 
13
13
  class Base
14
- attr_reader :name, :concurrency, :sleep, :log_level, :redirect
14
+ attr_reader :name, :concurrency, :sleep, :log_level, :redirect, :request_opts
15
15
 
16
16
  def initialize(opts = {})
17
17
  raise UndefinedLocations, "you should specify `locations` method in your `self.class`" unless self.methods.include? :locations
@@ -26,10 +26,13 @@ module Pioneer
26
26
  @header = opts[:header] || nil
27
27
  @redirects = opts[:redirects] || nil
28
28
  @headers = opts[:headers] #|| nil
29
+ @request_opts = opts[:request_opts] #|| nil
29
30
  end
30
31
 
31
32
  #
32
33
  # Main method: starting crawling through locations
34
+ # If we catch Pioneer::HttpRetryRequest then we are retrying request
35
+ # And if we catch Pioneer::HttpSkipRequest we just return nothing?
33
36
  #
34
37
  def start
35
38
  result = []
@@ -88,6 +91,20 @@ module Pioneer
88
91
  opts
89
92
  end
90
93
 
94
+ #
95
+ # EmHttpRequest options
96
+ #
97
+ def request_opts
98
+ opts = {}
99
+ opts = case @request_opts
100
+ when Proc
101
+ @request_opts.call
102
+ else
103
+ @request_opts
104
+ end if @request_opts
105
+ opts
106
+ end
107
+
91
108
  #
92
109
  # Generate random header for request
93
110
  #
@@ -1,12 +1,13 @@
1
1
  # encoding: utf-8
2
2
  module Pioneer
3
3
  class Request
4
- attr_reader :pioneer, :url, :result, :response, :error, :counter
4
+ attr_reader :pioneer, :url, :result, :response, :error, :counter, :request_opts
5
5
 
6
6
  def initialize(url, pioneer, counter=0)
7
7
  @pioneer = pioneer
8
8
  @url = parse_url(url)
9
9
  @counter = counter
10
+ @request_opts = @pioneer.request_opts
10
11
  end
11
12
 
12
13
  #
@@ -24,7 +25,7 @@ module Pioneer
24
25
  #
25
26
  def handle_request_error_or_return_result
26
27
  begin
27
- req = EM::HttpRequest.new(url).aget pioneer.http_opts
28
+ req = EM::HttpRequest.new(url, @request_opts).aget pioneer.http_opts
28
29
  if pioneer.headers
29
30
  req.headers{
30
31
  pioneer.headers.call(req)
@@ -1,3 +1,3 @@
1
1
  module Pioneer
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.9"
3
3
  end
@@ -2,4 +2,26 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe Pioneer::Base do
5
+ before do
6
+ end
7
+
8
+ it "should pass request_opts as a proc" do
9
+ request_opts = proc do
10
+ {
11
+ bind: {host: "192.168.1.1", port: '0'}
12
+ }
13
+ end
14
+ @pioneer = Pioneer::Crawler.new(request_opts: request_opts)
15
+ @pioneer.request_opts[:bind][:host].must_equal "192.168.1.1"
16
+ end
17
+
18
+ it "should pass request_opts as a Hash" do
19
+ request_opts = begin
20
+ {
21
+ bind: {host: "192.168.1.1", port: '0'}
22
+ }
23
+ end
24
+ @pioneer = Pioneer::Crawler.new(request_opts: request_opts)
25
+ @pioneer.request_opts[:bind][:host].must_equal "192.168.1.1"
26
+ end
5
27
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pioneer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.9
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-21 00:00:00.000000000Z
12
+ date: 2012-04-11 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yajl-ruby
16
- requirement: &77891380 !ruby/object:Gem::Requirement
16
+ requirement: &76605720 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *77891380
24
+ version_requirements: *76605720
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: nokogiri
27
- requirement: &77891160 !ruby/object:Gem::Requirement
27
+ requirement: &76605500 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *77891160
35
+ version_requirements: *76605500
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: em-synchrony
38
- requirement: &77890920 !ruby/object:Gem::Requirement
38
+ requirement: &76605290 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *77890920
46
+ version_requirements: *76605290
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: em-http-request
49
- requirement: &77890690 !ruby/object:Gem::Requirement
49
+ requirement: &76605010 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *77890690
57
+ version_requirements: *76605010
58
58
  description: Simple async HTTP crawler based on em-synchrony
59
59
  email:
60
60
  - pedro.yanoviches@gmail.com