crabfarm 0.0.13 → 0.0.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/crabfarm/base_state.rb +13 -0
- data/lib/crabfarm/context.rb +29 -15
- data/lib/crabfarm/engines/safe_state_loop.rb +10 -1
- data/lib/crabfarm/phantom_runner.rb +16 -2
- data/lib/crabfarm/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ffa69cbe5f984ef8f3d348bcec410f1676f8409
|
4
|
+
data.tar.gz: 9f496ef1a4bd75caa56fac8c26ef24b69a284acc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 976296a79fa1281b7415ec823b9e4c1e8c01d8f9280d13f382c507cba69f64a26c508f0bd9fccae7eb4d377101066cc66b33673939eb04e6133d33591658d468
|
7
|
+
data.tar.gz: 6d0cca01d8dc9bef52d888c72f6af1a3e623f14df75913b16c946db9fa86d67f9a3d883fec7c85eef30ff80f10e529e1a20f819f4bad76ba3383db99cd47bdff
|
data/lib/crabfarm/base_state.rb
CHANGED
@@ -22,6 +22,7 @@ module Crabfarm
|
|
22
22
|
@pool = _pool
|
23
23
|
@store = _store
|
24
24
|
@params = _params
|
25
|
+
@events = []
|
25
26
|
|
26
27
|
@dsl = Strategies.load(:browser_dsl, class_browser_dsl || Crabfarm.config.browser_dsl)
|
27
28
|
@builder = Strategies.load(:output_builder, class_output_builder || Crabfarm.config.output_builder)
|
@@ -43,6 +44,18 @@ module Crabfarm
|
|
43
44
|
raise NotImplementedError.new
|
44
45
|
end
|
45
46
|
|
47
|
+
def event(_type, _msg)
|
48
|
+
@events << { created_at: Time.current, type: _type, msg: _msg }
|
49
|
+
end
|
50
|
+
|
51
|
+
def alert(_msg)
|
52
|
+
event(:alert, _msg)
|
53
|
+
end
|
54
|
+
|
55
|
+
def info(_msg)
|
56
|
+
event(:info, _msg)
|
57
|
+
end
|
58
|
+
|
46
59
|
def fork_each(_enumerator, &_block)
|
47
60
|
session_id = 0
|
48
61
|
mutex = Mutex.new
|
data/lib/crabfarm/context.rb
CHANGED
@@ -12,11 +12,9 @@ module Crabfarm
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def load
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
@loaded = true
|
19
|
-
end
|
15
|
+
init_phantom_if_required
|
16
|
+
init_driver_pool
|
17
|
+
@loaded = true
|
20
18
|
end
|
21
19
|
|
22
20
|
def run_state(_name, _params={})
|
@@ -27,28 +25,44 @@ module Crabfarm
|
|
27
25
|
end
|
28
26
|
|
29
27
|
def reset
|
30
|
-
load
|
31
28
|
@store.reset
|
32
|
-
@pool.reset
|
29
|
+
@pool.reset unless @pool.nil?
|
33
30
|
end
|
34
31
|
|
35
32
|
def release
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
@loaded = false
|
40
|
-
end
|
33
|
+
release_driver_pool
|
34
|
+
release_phantom
|
35
|
+
@loaded = false
|
41
36
|
end
|
42
37
|
|
43
38
|
private
|
44
39
|
|
40
|
+
def init_driver_pool
|
41
|
+
@pool = DriverBucketPool.new build_driver_factory if @pool.nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
def release_driver_pool
|
45
|
+
@pool.release unless @pool.nil?
|
46
|
+
@pool = nil
|
47
|
+
end
|
48
|
+
|
45
49
|
def init_phantom_if_required
|
46
|
-
if config.phantom_mode_enabled?
|
47
|
-
@phantom =
|
48
|
-
@phantom.start
|
50
|
+
if config.phantom_mode_enabled? and @phantom.nil?
|
51
|
+
@phantom = load_and_start_phantom
|
49
52
|
end
|
50
53
|
end
|
51
54
|
|
55
|
+
def load_and_start_phantom
|
56
|
+
new_phantom = PhantomRunner.new phantom_config
|
57
|
+
new_phantom.start
|
58
|
+
return new_phantom
|
59
|
+
end
|
60
|
+
|
61
|
+
def release_phantom
|
62
|
+
@phantom.stop unless @phantom.nil?
|
63
|
+
@phantom = nil
|
64
|
+
end
|
65
|
+
|
52
66
|
def build_driver_factory
|
53
67
|
if @phantom
|
54
68
|
PhantomDriverFactory.new @phantom, driver_config
|
@@ -8,6 +8,7 @@ module Crabfarm
|
|
8
8
|
def initialize
|
9
9
|
@running = true
|
10
10
|
@working = false
|
11
|
+
@fatal = nil
|
11
12
|
@lock = Mutex.new
|
12
13
|
@thread = Thread.new { crawl_loop }
|
13
14
|
end
|
@@ -19,7 +20,9 @@ module Crabfarm
|
|
19
20
|
|
20
21
|
def change_state(_name, _params={}, _wait=nil)
|
21
22
|
@lock.synchronize {
|
22
|
-
if @
|
23
|
+
if @fatal
|
24
|
+
raise CrawlerError.new @fatal
|
25
|
+
elsif @working
|
23
26
|
raise StillWorkingError.new unless matches_next_state? _name, _params
|
24
27
|
wait_and_load_struct _wait
|
25
28
|
elsif matches_current_state? _name, _params
|
@@ -77,6 +80,7 @@ module Crabfarm
|
|
77
80
|
end
|
78
81
|
|
79
82
|
def state_as_struct
|
83
|
+
raise CrawlerError.new @fatal if @fatal
|
80
84
|
raise CrawlerError.new @error if @error
|
81
85
|
|
82
86
|
OpenStruct.new({
|
@@ -118,6 +122,11 @@ module Crabfarm
|
|
118
122
|
rescue Exception => e
|
119
123
|
logger.fatal "StateLoop: unhandled exception!"
|
120
124
|
logger.fatal e
|
125
|
+
|
126
|
+
@lock.synchronize {
|
127
|
+
@working = false
|
128
|
+
@fatal = e
|
129
|
+
}
|
121
130
|
ensure
|
122
131
|
context.release
|
123
132
|
end
|
@@ -1,8 +1,11 @@
|
|
1
1
|
require 'net/http'
|
2
|
+
require 'timeout'
|
2
3
|
|
3
4
|
module Crabfarm
|
4
5
|
class PhantomRunner
|
5
6
|
|
7
|
+
PHANTOM_START_TM = 5 # seconds
|
8
|
+
|
6
9
|
attr_reader :port
|
7
10
|
|
8
11
|
def initialize(_config={})
|
@@ -13,8 +16,7 @@ module Crabfarm
|
|
13
16
|
def start
|
14
17
|
find_available_port
|
15
18
|
Crabfarm.logger.info "Starting phantomjs in port #{@port}"
|
16
|
-
@pid =
|
17
|
-
wait_for_server
|
19
|
+
@pid = spawn_phantomjs
|
18
20
|
Crabfarm.logger.info "Phantomjs started (PID: #{@pid})"
|
19
21
|
end
|
20
22
|
|
@@ -30,6 +32,18 @@ module Crabfarm
|
|
30
32
|
|
31
33
|
private
|
32
34
|
|
35
|
+
def spawn_phantomjs
|
36
|
+
pid = Process.spawn({}, phantomjs_cmd)
|
37
|
+
begin
|
38
|
+
Timeout::timeout(PHANTOM_START_TM) { wait_for_server }
|
39
|
+
rescue Timeout::Error
|
40
|
+
Process.kill "INT", pid
|
41
|
+
Process.wait pid
|
42
|
+
raise
|
43
|
+
end
|
44
|
+
return pid
|
45
|
+
end
|
46
|
+
|
33
47
|
def phantomjs_cmd
|
34
48
|
cmd = [@config[:bin_path]]
|
35
49
|
cmd << '--load-images=false' unless @config[:load_images]
|
data/lib/crabfarm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crabfarm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ignacio Baixas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: jbuilder
|