crabfarm 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8c7539be24f102d8e0e7eb4961b2bb10678cfda2
4
- data.tar.gz: 7c5fc40e8a2fa4a5b2474b1b8b34bc5287b88d2f
3
+ metadata.gz: 5ffa69cbe5f984ef8f3d348bcec410f1676f8409
4
+ data.tar.gz: 9f496ef1a4bd75caa56fac8c26ef24b69a284acc
5
5
  SHA512:
6
- metadata.gz: bc1848de3ad1f401834f17d3c7691e6cfa90f17939ec5cde588fc1fc933f25e4ecd6af3b1be43a26d42182bb4f008fbd6d42b68ffe23fcd195b396540bfb72db
7
- data.tar.gz: 6b5a780eddbf1ca4cdf53b9d5ababfcd10825badda27f170e51f1e07ea22a12ff5aabea661f35166cf4f153955b9f13a589bbafe556efa845630a77cd2ce7ca1
6
+ metadata.gz: 976296a79fa1281b7415ec823b9e4c1e8c01d8f9280d13f382c507cba69f64a26c508f0bd9fccae7eb4d377101066cc66b33673939eb04e6133d33591658d468
7
+ data.tar.gz: 6d0cca01d8dc9bef52d888c72f6af1a3e623f14df75913b16c946db9fa86d67f9a3d883fec7c85eef30ff80f10e529e1a20f819f4bad76ba3383db99cd47bdff
@@ -22,6 +22,7 @@ module Crabfarm
22
22
  @pool = _pool
23
23
  @store = _store
24
24
  @params = _params
25
+ @events = []
25
26
 
26
27
  @dsl = Strategies.load(:browser_dsl, class_browser_dsl || Crabfarm.config.browser_dsl)
27
28
  @builder = Strategies.load(:output_builder, class_output_builder || Crabfarm.config.output_builder)
@@ -43,6 +44,18 @@ module Crabfarm
43
44
  raise NotImplementedError.new
44
45
  end
45
46
 
47
+ def event(_type, _msg)
48
+ @events << { created_at: Time.current, type: _type, msg: _msg }
49
+ end
50
+
51
+ def alert(_msg)
52
+ event(:alert, _msg)
53
+ end
54
+
55
+ def info(_msg)
56
+ event(:info, _msg)
57
+ end
58
+
46
59
  def fork_each(_enumerator, &_block)
47
60
  session_id = 0
48
61
  mutex = Mutex.new
@@ -12,11 +12,9 @@ module Crabfarm
12
12
  end
13
13
 
14
14
  def load
15
- unless @loaded
16
- init_phantom_if_required
17
- @pool = DriverBucketPool.new build_driver_factory
18
- @loaded = true
19
- end
15
+ init_phantom_if_required
16
+ init_driver_pool
17
+ @loaded = true
20
18
  end
21
19
 
22
20
  def run_state(_name, _params={})
@@ -27,28 +25,44 @@ module Crabfarm
27
25
  end
28
26
 
29
27
  def reset
30
- load
31
28
  @store.reset
32
- @pool.reset
29
+ @pool.reset unless @pool.nil?
33
30
  end
34
31
 
35
32
  def release
36
- if @loaded
37
- @pool.release
38
- @phantom.stop unless @phantom.nil?
39
- @loaded = false
40
- end
33
+ release_driver_pool
34
+ release_phantom
35
+ @loaded = false
41
36
  end
42
37
 
43
38
  private
44
39
 
40
+ def init_driver_pool
41
+ @pool = DriverBucketPool.new build_driver_factory if @pool.nil?
42
+ end
43
+
44
+ def release_driver_pool
45
+ @pool.release unless @pool.nil?
46
+ @pool = nil
47
+ end
48
+
45
49
  def init_phantom_if_required
46
- if config.phantom_mode_enabled?
47
- @phantom = PhantomRunner.new phantom_config
48
- @phantom.start
50
+ if config.phantom_mode_enabled? and @phantom.nil?
51
+ @phantom = load_and_start_phantom
49
52
  end
50
53
  end
51
54
 
55
+ def load_and_start_phantom
56
+ new_phantom = PhantomRunner.new phantom_config
57
+ new_phantom.start
58
+ return new_phantom
59
+ end
60
+
61
+ def release_phantom
62
+ @phantom.stop unless @phantom.nil?
63
+ @phantom = nil
64
+ end
65
+
52
66
  def build_driver_factory
53
67
  if @phantom
54
68
  PhantomDriverFactory.new @phantom, driver_config
@@ -8,6 +8,7 @@ module Crabfarm
8
8
  def initialize
9
9
  @running = true
10
10
  @working = false
11
+ @fatal = nil
11
12
  @lock = Mutex.new
12
13
  @thread = Thread.new { crawl_loop }
13
14
  end
@@ -19,7 +20,9 @@ module Crabfarm
19
20
 
20
21
  def change_state(_name, _params={}, _wait=nil)
21
22
  @lock.synchronize {
22
- if @working
23
+ if @fatal
24
+ raise CrawlerError.new @fatal
25
+ elsif @working
23
26
  raise StillWorkingError.new unless matches_next_state? _name, _params
24
27
  wait_and_load_struct _wait
25
28
  elsif matches_current_state? _name, _params
@@ -77,6 +80,7 @@ module Crabfarm
77
80
  end
78
81
 
79
82
  def state_as_struct
83
+ raise CrawlerError.new @fatal if @fatal
80
84
  raise CrawlerError.new @error if @error
81
85
 
82
86
  OpenStruct.new({
@@ -118,6 +122,11 @@ module Crabfarm
118
122
  rescue Exception => e
119
123
  logger.fatal "StateLoop: unhandled exception!"
120
124
  logger.fatal e
125
+
126
+ @lock.synchronize {
127
+ @working = false
128
+ @fatal = e
129
+ }
121
130
  ensure
122
131
  context.release
123
132
  end
@@ -1,8 +1,11 @@
1
1
  require 'net/http'
2
+ require 'timeout'
2
3
 
3
4
  module Crabfarm
4
5
  class PhantomRunner
5
6
 
7
+ PHANTOM_START_TM = 5 # seconds
8
+
6
9
  attr_reader :port
7
10
 
8
11
  def initialize(_config={})
@@ -13,8 +16,7 @@ module Crabfarm
13
16
  def start
14
17
  find_available_port
15
18
  Crabfarm.logger.info "Starting phantomjs in port #{@port}"
16
- @pid = Process.spawn({}, phantomjs_cmd)
17
- wait_for_server
19
+ @pid = spawn_phantomjs
18
20
  Crabfarm.logger.info "Phantomjs started (PID: #{@pid})"
19
21
  end
20
22
 
@@ -30,6 +32,18 @@ module Crabfarm
30
32
 
31
33
  private
32
34
 
35
+ def spawn_phantomjs
36
+ pid = Process.spawn({}, phantomjs_cmd)
37
+ begin
38
+ Timeout::timeout(PHANTOM_START_TM) { wait_for_server }
39
+ rescue Timeout::Error
40
+ Process.kill "INT", pid
41
+ Process.wait pid
42
+ raise
43
+ end
44
+ return pid
45
+ end
46
+
33
47
  def phantomjs_cmd
34
48
  cmd = [@config[:bin_path]]
35
49
  cmd << '--load-images=false' unless @config[:load_images]
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.0.13"
2
+ VERSION = "0.0.14"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-27 00:00:00.000000000 Z
11
+ date: 2015-03-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jbuilder