crabfarm 0.0.13 → 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8c7539be24f102d8e0e7eb4961b2bb10678cfda2
4
- data.tar.gz: 7c5fc40e8a2fa4a5b2474b1b8b34bc5287b88d2f
3
+ metadata.gz: 5ffa69cbe5f984ef8f3d348bcec410f1676f8409
4
+ data.tar.gz: 9f496ef1a4bd75caa56fac8c26ef24b69a284acc
5
5
  SHA512:
6
- metadata.gz: bc1848de3ad1f401834f17d3c7691e6cfa90f17939ec5cde588fc1fc933f25e4ecd6af3b1be43a26d42182bb4f008fbd6d42b68ffe23fcd195b396540bfb72db
7
- data.tar.gz: 6b5a780eddbf1ca4cdf53b9d5ababfcd10825badda27f170e51f1e07ea22a12ff5aabea661f35166cf4f153955b9f13a589bbafe556efa845630a77cd2ce7ca1
6
+ metadata.gz: 976296a79fa1281b7415ec823b9e4c1e8c01d8f9280d13f382c507cba69f64a26c508f0bd9fccae7eb4d377101066cc66b33673939eb04e6133d33591658d468
7
+ data.tar.gz: 6d0cca01d8dc9bef52d888c72f6af1a3e623f14df75913b16c946db9fa86d67f9a3d883fec7c85eef30ff80f10e529e1a20f819f4bad76ba3383db99cd47bdff
@@ -22,6 +22,7 @@ module Crabfarm
22
22
  @pool = _pool
23
23
  @store = _store
24
24
  @params = _params
25
+ @events = []
25
26
 
26
27
  @dsl = Strategies.load(:browser_dsl, class_browser_dsl || Crabfarm.config.browser_dsl)
27
28
  @builder = Strategies.load(:output_builder, class_output_builder || Crabfarm.config.output_builder)
@@ -43,6 +44,18 @@ module Crabfarm
43
44
  raise NotImplementedError.new
44
45
  end
45
46
 
47
+ def event(_type, _msg)
48
+ @events << { created_at: Time.current, type: _type, msg: _msg }
49
+ end
50
+
51
+ def alert(_msg)
52
+ event(:alert, _msg)
53
+ end
54
+
55
+ def info(_msg)
56
+ event(:info, _msg)
57
+ end
58
+
46
59
  def fork_each(_enumerator, &_block)
47
60
  session_id = 0
48
61
  mutex = Mutex.new
@@ -12,11 +12,9 @@ module Crabfarm
12
12
  end
13
13
 
14
14
  def load
15
- unless @loaded
16
- init_phantom_if_required
17
- @pool = DriverBucketPool.new build_driver_factory
18
- @loaded = true
19
- end
15
+ init_phantom_if_required
16
+ init_driver_pool
17
+ @loaded = true
20
18
  end
21
19
 
22
20
  def run_state(_name, _params={})
@@ -27,28 +25,44 @@ module Crabfarm
27
25
  end
28
26
 
29
27
  def reset
30
- load
31
28
  @store.reset
32
- @pool.reset
29
+ @pool.reset unless @pool.nil?
33
30
  end
34
31
 
35
32
  def release
36
- if @loaded
37
- @pool.release
38
- @phantom.stop unless @phantom.nil?
39
- @loaded = false
40
- end
33
+ release_driver_pool
34
+ release_phantom
35
+ @loaded = false
41
36
  end
42
37
 
43
38
  private
44
39
 
40
+ def init_driver_pool
41
+ @pool = DriverBucketPool.new build_driver_factory if @pool.nil?
42
+ end
43
+
44
+ def release_driver_pool
45
+ @pool.release unless @pool.nil?
46
+ @pool = nil
47
+ end
48
+
45
49
  def init_phantom_if_required
46
- if config.phantom_mode_enabled?
47
- @phantom = PhantomRunner.new phantom_config
48
- @phantom.start
50
+ if config.phantom_mode_enabled? and @phantom.nil?
51
+ @phantom = load_and_start_phantom
49
52
  end
50
53
  end
51
54
 
55
+ def load_and_start_phantom
56
+ new_phantom = PhantomRunner.new phantom_config
57
+ new_phantom.start
58
+ return new_phantom
59
+ end
60
+
61
+ def release_phantom
62
+ @phantom.stop unless @phantom.nil?
63
+ @phantom = nil
64
+ end
65
+
52
66
  def build_driver_factory
53
67
  if @phantom
54
68
  PhantomDriverFactory.new @phantom, driver_config
@@ -8,6 +8,7 @@ module Crabfarm
8
8
  def initialize
9
9
  @running = true
10
10
  @working = false
11
+ @fatal = nil
11
12
  @lock = Mutex.new
12
13
  @thread = Thread.new { crawl_loop }
13
14
  end
@@ -19,7 +20,9 @@ module Crabfarm
19
20
 
20
21
  def change_state(_name, _params={}, _wait=nil)
21
22
  @lock.synchronize {
22
- if @working
23
+ if @fatal
24
+ raise CrawlerError.new @fatal
25
+ elsif @working
23
26
  raise StillWorkingError.new unless matches_next_state? _name, _params
24
27
  wait_and_load_struct _wait
25
28
  elsif matches_current_state? _name, _params
@@ -77,6 +80,7 @@ module Crabfarm
77
80
  end
78
81
 
79
82
  def state_as_struct
83
+ raise CrawlerError.new @fatal if @fatal
80
84
  raise CrawlerError.new @error if @error
81
85
 
82
86
  OpenStruct.new({
@@ -118,6 +122,11 @@ module Crabfarm
118
122
  rescue Exception => e
119
123
  logger.fatal "StateLoop: unhandled exception!"
120
124
  logger.fatal e
125
+
126
+ @lock.synchronize {
127
+ @working = false
128
+ @fatal = e
129
+ }
121
130
  ensure
122
131
  context.release
123
132
  end
@@ -1,8 +1,11 @@
1
1
  require 'net/http'
2
+ require 'timeout'
2
3
 
3
4
  module Crabfarm
4
5
  class PhantomRunner
5
6
 
7
+ PHANTOM_START_TM = 5 # seconds
8
+
6
9
  attr_reader :port
7
10
 
8
11
  def initialize(_config={})
@@ -13,8 +16,7 @@ module Crabfarm
13
16
  def start
14
17
  find_available_port
15
18
  Crabfarm.logger.info "Starting phantomjs in port #{@port}"
16
- @pid = Process.spawn({}, phantomjs_cmd)
17
- wait_for_server
19
+ @pid = spawn_phantomjs
18
20
  Crabfarm.logger.info "Phantomjs started (PID: #{@pid})"
19
21
  end
20
22
 
@@ -30,6 +32,18 @@ module Crabfarm
30
32
 
31
33
  private
32
34
 
35
+ def spawn_phantomjs
36
+ pid = Process.spawn({}, phantomjs_cmd)
37
+ begin
38
+ Timeout::timeout(PHANTOM_START_TM) { wait_for_server }
39
+ rescue Timeout::Error
40
+ Process.kill "INT", pid
41
+ Process.wait pid
42
+ raise
43
+ end
44
+ return pid
45
+ end
46
+
33
47
  def phantomjs_cmd
34
48
  cmd = [@config[:bin_path]]
35
49
  cmd << '--load-images=false' unless @config[:load_images]
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.0.13"
2
+ VERSION = "0.0.14"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-27 00:00:00.000000000 Z
11
+ date: 2015-03-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jbuilder