crabfarm 0.0.13 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/crabfarm/base_state.rb +13 -0
- data/lib/crabfarm/context.rb +29 -15
- data/lib/crabfarm/engines/safe_state_loop.rb +10 -1
- data/lib/crabfarm/phantom_runner.rb +16 -2
- data/lib/crabfarm/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ffa69cbe5f984ef8f3d348bcec410f1676f8409
|
4
|
+
data.tar.gz: 9f496ef1a4bd75caa56fac8c26ef24b69a284acc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 976296a79fa1281b7415ec823b9e4c1e8c01d8f9280d13f382c507cba69f64a26c508f0bd9fccae7eb4d377101066cc66b33673939eb04e6133d33591658d468
|
7
|
+
data.tar.gz: 6d0cca01d8dc9bef52d888c72f6af1a3e623f14df75913b16c946db9fa86d67f9a3d883fec7c85eef30ff80f10e529e1a20f819f4bad76ba3383db99cd47bdff
|
data/lib/crabfarm/base_state.rb
CHANGED
@@ -22,6 +22,7 @@ module Crabfarm
|
|
22
22
|
@pool = _pool
|
23
23
|
@store = _store
|
24
24
|
@params = _params
|
25
|
+
@events = []
|
25
26
|
|
26
27
|
@dsl = Strategies.load(:browser_dsl, class_browser_dsl || Crabfarm.config.browser_dsl)
|
27
28
|
@builder = Strategies.load(:output_builder, class_output_builder || Crabfarm.config.output_builder)
|
@@ -43,6 +44,18 @@ module Crabfarm
|
|
43
44
|
raise NotImplementedError.new
|
44
45
|
end
|
45
46
|
|
47
|
+
def event(_type, _msg)
|
48
|
+
@events << { created_at: Time.current, type: _type, msg: _msg }
|
49
|
+
end
|
50
|
+
|
51
|
+
def alert(_msg)
|
52
|
+
event(:alert, _msg)
|
53
|
+
end
|
54
|
+
|
55
|
+
def info(_msg)
|
56
|
+
event(:info, _msg)
|
57
|
+
end
|
58
|
+
|
46
59
|
def fork_each(_enumerator, &_block)
|
47
60
|
session_id = 0
|
48
61
|
mutex = Mutex.new
|
data/lib/crabfarm/context.rb
CHANGED
@@ -12,11 +12,9 @@ module Crabfarm
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def load
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
@loaded = true
|
19
|
-
end
|
15
|
+
init_phantom_if_required
|
16
|
+
init_driver_pool
|
17
|
+
@loaded = true
|
20
18
|
end
|
21
19
|
|
22
20
|
def run_state(_name, _params={})
|
@@ -27,28 +25,44 @@ module Crabfarm
|
|
27
25
|
end
|
28
26
|
|
29
27
|
def reset
|
30
|
-
load
|
31
28
|
@store.reset
|
32
|
-
@pool.reset
|
29
|
+
@pool.reset unless @pool.nil?
|
33
30
|
end
|
34
31
|
|
35
32
|
def release
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
@loaded = false
|
40
|
-
end
|
33
|
+
release_driver_pool
|
34
|
+
release_phantom
|
35
|
+
@loaded = false
|
41
36
|
end
|
42
37
|
|
43
38
|
private
|
44
39
|
|
40
|
+
def init_driver_pool
|
41
|
+
@pool = DriverBucketPool.new build_driver_factory if @pool.nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
def release_driver_pool
|
45
|
+
@pool.release unless @pool.nil?
|
46
|
+
@pool = nil
|
47
|
+
end
|
48
|
+
|
45
49
|
def init_phantom_if_required
|
46
|
-
if config.phantom_mode_enabled?
|
47
|
-
@phantom =
|
48
|
-
@phantom.start
|
50
|
+
if config.phantom_mode_enabled? and @phantom.nil?
|
51
|
+
@phantom = load_and_start_phantom
|
49
52
|
end
|
50
53
|
end
|
51
54
|
|
55
|
+
def load_and_start_phantom
|
56
|
+
new_phantom = PhantomRunner.new phantom_config
|
57
|
+
new_phantom.start
|
58
|
+
return new_phantom
|
59
|
+
end
|
60
|
+
|
61
|
+
def release_phantom
|
62
|
+
@phantom.stop unless @phantom.nil?
|
63
|
+
@phantom = nil
|
64
|
+
end
|
65
|
+
|
52
66
|
def build_driver_factory
|
53
67
|
if @phantom
|
54
68
|
PhantomDriverFactory.new @phantom, driver_config
|
@@ -8,6 +8,7 @@ module Crabfarm
|
|
8
8
|
def initialize
|
9
9
|
@running = true
|
10
10
|
@working = false
|
11
|
+
@fatal = nil
|
11
12
|
@lock = Mutex.new
|
12
13
|
@thread = Thread.new { crawl_loop }
|
13
14
|
end
|
@@ -19,7 +20,9 @@ module Crabfarm
|
|
19
20
|
|
20
21
|
def change_state(_name, _params={}, _wait=nil)
|
21
22
|
@lock.synchronize {
|
22
|
-
if @
|
23
|
+
if @fatal
|
24
|
+
raise CrawlerError.new @fatal
|
25
|
+
elsif @working
|
23
26
|
raise StillWorkingError.new unless matches_next_state? _name, _params
|
24
27
|
wait_and_load_struct _wait
|
25
28
|
elsif matches_current_state? _name, _params
|
@@ -77,6 +80,7 @@ module Crabfarm
|
|
77
80
|
end
|
78
81
|
|
79
82
|
def state_as_struct
|
83
|
+
raise CrawlerError.new @fatal if @fatal
|
80
84
|
raise CrawlerError.new @error if @error
|
81
85
|
|
82
86
|
OpenStruct.new({
|
@@ -118,6 +122,11 @@ module Crabfarm
|
|
118
122
|
rescue Exception => e
|
119
123
|
logger.fatal "StateLoop: unhandled exception!"
|
120
124
|
logger.fatal e
|
125
|
+
|
126
|
+
@lock.synchronize {
|
127
|
+
@working = false
|
128
|
+
@fatal = e
|
129
|
+
}
|
121
130
|
ensure
|
122
131
|
context.release
|
123
132
|
end
|
@@ -1,8 +1,11 @@
|
|
1
1
|
require 'net/http'
|
2
|
+
require 'timeout'
|
2
3
|
|
3
4
|
module Crabfarm
|
4
5
|
class PhantomRunner
|
5
6
|
|
7
|
+
PHANTOM_START_TM = 5 # seconds
|
8
|
+
|
6
9
|
attr_reader :port
|
7
10
|
|
8
11
|
def initialize(_config={})
|
@@ -13,8 +16,7 @@ module Crabfarm
|
|
13
16
|
def start
|
14
17
|
find_available_port
|
15
18
|
Crabfarm.logger.info "Starting phantomjs in port #{@port}"
|
16
|
-
@pid =
|
17
|
-
wait_for_server
|
19
|
+
@pid = spawn_phantomjs
|
18
20
|
Crabfarm.logger.info "Phantomjs started (PID: #{@pid})"
|
19
21
|
end
|
20
22
|
|
@@ -30,6 +32,18 @@ module Crabfarm
|
|
30
32
|
|
31
33
|
private
|
32
34
|
|
35
|
+
def spawn_phantomjs
|
36
|
+
pid = Process.spawn({}, phantomjs_cmd)
|
37
|
+
begin
|
38
|
+
Timeout::timeout(PHANTOM_START_TM) { wait_for_server }
|
39
|
+
rescue Timeout::Error
|
40
|
+
Process.kill "INT", pid
|
41
|
+
Process.wait pid
|
42
|
+
raise
|
43
|
+
end
|
44
|
+
return pid
|
45
|
+
end
|
46
|
+
|
33
47
|
def phantomjs_cmd
|
34
48
|
cmd = [@config[:bin_path]]
|
35
49
|
cmd << '--load-images=false' unless @config[:load_images]
|
data/lib/crabfarm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crabfarm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ignacio Baixas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jbuilder
|