crabfarm 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/crabfarm +9 -0
- data/lib/crabfarm.rb +24 -0
- data/lib/crabfarm/adapters.rb +23 -0
- data/lib/crabfarm/adapters/capybara_adapter.rb +7 -0
- data/lib/crabfarm/adapters/surfer_adapter.rb +7 -0
- data/lib/crabfarm/adapters/watir_adapter.rb +7 -0
- data/lib/crabfarm/base_parser.rb +26 -0
- data/lib/crabfarm/base_state.rb +41 -0
- data/lib/crabfarm/cli.rb +79 -0
- data/lib/crabfarm/configuration.rb +83 -0
- data/lib/crabfarm/context.rb +32 -0
- data/lib/crabfarm/default_driver_factory.rb +37 -0
- data/lib/crabfarm/driver_bucket.rb +50 -0
- data/lib/crabfarm/driver_bucket_pool.rb +48 -0
- data/lib/crabfarm/dsl/surfer.rb +22 -0
- data/lib/crabfarm/dsl/surfer/search_context.rb +134 -0
- data/lib/crabfarm/dsl/surfer/surf_context.rb +58 -0
- data/lib/crabfarm/engines/safe_state_loop.rb +96 -0
- data/lib/crabfarm/errors.rb +50 -0
- data/lib/crabfarm/loader.rb +83 -0
- data/lib/crabfarm/modes/console.rb +86 -0
- data/lib/crabfarm/modes/generator.rb +120 -0
- data/lib/crabfarm/modes/server.rb +78 -0
- data/lib/crabfarm/module_helper.rb +35 -0
- data/lib/crabfarm/phantom_driver_factory.rb +33 -0
- data/lib/crabfarm/phantom_runner.rb +74 -0
- data/lib/crabfarm/rspec.rb +39 -0
- data/lib/crabfarm/state_store.rb +24 -0
- data/lib/crabfarm/support/custom_puma.rb +64 -0
- data/lib/crabfarm/templates/Crabfile.erb +3 -0
- data/lib/crabfarm/templates/Gemfile.erb +7 -0
- data/lib/crabfarm/templates/boot.rb.erb +13 -0
- data/lib/crabfarm/templates/crabfarm_bin.erb +3 -0
- data/lib/crabfarm/templates/dot_gitignore.erb +1 -0
- data/lib/crabfarm/templates/dot_gitkeep.erb +0 -0
- data/lib/crabfarm/templates/dot_rspec.erb +4 -0
- data/lib/crabfarm/templates/parser.rb.erb +8 -0
- data/lib/crabfarm/templates/parser_spec.rb.erb +7 -0
- data/lib/crabfarm/templates/spec_helper.rb.erb +22 -0
- data/lib/crabfarm/templates/state.rb.erb +8 -0
- data/lib/crabfarm/templates/state_spec.rb.erb +7 -0
- data/lib/crabfarm/version.rb +3 -0
- metadata +359 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
class DriverBucketPool
|
3
|
+
|
4
|
+
def initialize(_module)
|
5
|
+
@module = _module
|
6
|
+
@buckets = Hash.new
|
7
|
+
@phantom = nil
|
8
|
+
|
9
|
+
init_phantom_if_required
|
10
|
+
end
|
11
|
+
|
12
|
+
def driver(_session_id=nil)
|
13
|
+
_session_id ||= :default_driver
|
14
|
+
bucket = @buckets[_session_id.to_sym]
|
15
|
+
bucket = @buckets[_session_id.to_sym] = DriverBucket.new(@module, _session_id, build_driver_factory) if bucket.nil?
|
16
|
+
bucket
|
17
|
+
end
|
18
|
+
|
19
|
+
def reset
|
20
|
+
@buckets.values.each(&:reset)
|
21
|
+
@buckets = Hash.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def release
|
25
|
+
reset
|
26
|
+
@phantom.stop unless @phantom.nil?
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def init_phantom_if_required
|
32
|
+
if @module.settings.phantom_enabled?
|
33
|
+
@phantom = PhantomRunner.new @module.settings.phantom_config
|
34
|
+
@phantom.start
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def build_driver_factory
|
39
|
+
if @module.settings.phantom_enabled?
|
40
|
+
PhantomDriverFactory.new @phantom, @module.settings.driver_config
|
41
|
+
else
|
42
|
+
return @module.settings.driver_factory if @module.settings.driver_factory
|
43
|
+
DefaultDriverFactory.new @module.settings.driver_config
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'crabfarm/dsl/surfer/search_context'
|
2
|
+
require 'crabfarm/dsl/surfer/surf_context'
|
3
|
+
|
4
|
+
module Crabfarm
|
5
|
+
module Dsl
|
6
|
+
module Surfer
|
7
|
+
|
8
|
+
class Error < StandardError
|
9
|
+
attr_reader :source
|
10
|
+
|
11
|
+
def initialize(_message, _ctx)
|
12
|
+
super _message
|
13
|
+
@ctx = _ctx
|
14
|
+
@source = _ctx.root_context.page_source rescue nil # cache page source for future reference
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class EmptySetError < Error; end
|
19
|
+
class WebdriverError < Error; end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
module Dsl
|
3
|
+
module Surfer
|
4
|
+
class SearchContext
|
5
|
+
include Enumerable
|
6
|
+
extend Forwardable
|
7
|
+
|
8
|
+
TIMEOUT = 10.0 # Default timeout for waiting operations
|
9
|
+
|
10
|
+
def initialize(_elements, _parent)
|
11
|
+
@elements = _elements
|
12
|
+
@parent = _parent
|
13
|
+
end
|
14
|
+
|
15
|
+
# return the context's root context
|
16
|
+
def root_context
|
17
|
+
return @parent.root_context if @parent
|
18
|
+
self
|
19
|
+
end
|
20
|
+
|
21
|
+
# return the context's parent context
|
22
|
+
def parent_context
|
23
|
+
@parent
|
24
|
+
end
|
25
|
+
|
26
|
+
# forward read-only array methods to context
|
27
|
+
def_delegators :context, :each, :[], :length, :count, :empty?, :first, :last
|
28
|
+
|
29
|
+
# yield individual SearchContext for each element contained in this result
|
30
|
+
def explode(&_block)
|
31
|
+
return enum_for(__method__) if _block.nil?
|
32
|
+
context.each do |el|
|
33
|
+
_block.call SearchContext.new([el], self)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# searches for elements that match a given selector
|
38
|
+
def search(_selector=nil, _options={})
|
39
|
+
_options[:css] = _selector if _selector
|
40
|
+
|
41
|
+
wait_mode = _options.delete :wait
|
42
|
+
if wait_mode
|
43
|
+
|
44
|
+
# retrieve timeout
|
45
|
+
timeout = _options.delete :timeout
|
46
|
+
timeout = TIMEOUT if timeout.nil?
|
47
|
+
|
48
|
+
# use a selenium timeout
|
49
|
+
wait = Selenium::WebDriver::Wait.new(timeout: timeout)
|
50
|
+
wait.until do
|
51
|
+
new_elements = search_elements _options
|
52
|
+
|
53
|
+
# test wait condition
|
54
|
+
ok = case wait_mode
|
55
|
+
when :present then (new_elements.length > 0)
|
56
|
+
when :visible then (new_elements.length > 0 and new_elements.first.displayed?)
|
57
|
+
when :enabled then (new_elements.length > 0 and new_elements.first.displayed? and new_elements.first.enabled?)
|
58
|
+
when :not_present then (new_elements.length == 0)
|
59
|
+
when :not_visible then (not new_elements.any? { |e| e.displayed? })
|
60
|
+
else
|
61
|
+
raise SetupError.new "Invalid wait mode '#{wait_mode}'"
|
62
|
+
end
|
63
|
+
|
64
|
+
SearchContext.new new_elements, self if ok
|
65
|
+
end
|
66
|
+
else
|
67
|
+
SearchContext.new search_elements(_options), self
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# clears and sends_keys to this context main element
|
72
|
+
def fill(_value)
|
73
|
+
raise EmptySetError.new('Cannot call \'fill\' on an empty set', self) if empty?
|
74
|
+
wrap_errors do
|
75
|
+
context.first.clear
|
76
|
+
context.first.send_keys _value
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Any methods missing are forwarded to the main element (first).
|
81
|
+
def method_missing(_method, *_args, &_block)
|
82
|
+
wrap_errors do
|
83
|
+
m = /^(.*)_all$/.match _method.to_s
|
84
|
+
if m then
|
85
|
+
return [] if empty?
|
86
|
+
context.map { |e| e.send(m[1], *_args, &_block) }
|
87
|
+
else
|
88
|
+
raise EmptySetError.new("Cannot call '#{_method}' on an empty set", self) if empty?
|
89
|
+
context.first.send(_method, *_args, &_block)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def respond_to?(_method, _include_all=false)
|
95
|
+
return true if super
|
96
|
+
m = /^.*_all$/.match _method.to_s
|
97
|
+
if m then
|
98
|
+
return true if empty?
|
99
|
+
context.first.respond_to? m[1], _include_all
|
100
|
+
else
|
101
|
+
return true if empty?
|
102
|
+
context.first.respond_to? _method, _include_all
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
# wrap every selenium errors that happen inside block.
|
109
|
+
def wrap_errors
|
110
|
+
begin
|
111
|
+
yield
|
112
|
+
rescue Selenium::WebDriver::Error::WebDriverError => e
|
113
|
+
raise WebdriverError.new e, self
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# base filtering method, expands current context
|
118
|
+
def search_elements(_options)
|
119
|
+
wrap_errors do
|
120
|
+
context.inject([]) do |r, element|
|
121
|
+
r + element.find_elements(_options)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# returns the current context
|
127
|
+
def context
|
128
|
+
@elements
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
module Dsl
|
3
|
+
module Surfer
|
4
|
+
class SurfContext < SearchContext
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
def_delegators :@bucket, :parse, :setup
|
8
|
+
def_delegators 'driver.navigate', :back, :forward, :refresh
|
9
|
+
|
10
|
+
def initialize(_bucket)
|
11
|
+
super nil, nil
|
12
|
+
@bucket = _bucket
|
13
|
+
end
|
14
|
+
|
15
|
+
def driver
|
16
|
+
@bucket.original
|
17
|
+
end
|
18
|
+
|
19
|
+
def quit
|
20
|
+
@bucket.reset
|
21
|
+
end
|
22
|
+
|
23
|
+
def current_uri
|
24
|
+
URI.parse driver.current_url
|
25
|
+
end
|
26
|
+
|
27
|
+
def cookies
|
28
|
+
driver.manage.all_cookies
|
29
|
+
end
|
30
|
+
|
31
|
+
def goto(_url, _params=nil)
|
32
|
+
_url += "?#{_params.to_query}" if _params
|
33
|
+
retries = 0
|
34
|
+
|
35
|
+
loop do
|
36
|
+
begin
|
37
|
+
@bucket.reset if retries > 0
|
38
|
+
driver.get(_url)
|
39
|
+
break
|
40
|
+
rescue Timeout::Error #, Selenium::WebDriver::Error::UnknownError
|
41
|
+
# TODO: log this
|
42
|
+
raise if retries >= max_retries
|
43
|
+
retries += 1
|
44
|
+
sleep 1.0
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def context
|
52
|
+
[driver]
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module Crabfarm
|
4
|
+
module Engines
|
5
|
+
class SafeStateLoop
|
6
|
+
|
7
|
+
def initialize(_loader)
|
8
|
+
@context = _loader.load_context
|
9
|
+
@running = true
|
10
|
+
@working = false
|
11
|
+
@lock = Mutex.new
|
12
|
+
@thread = Thread.new { crawl_loop }
|
13
|
+
end
|
14
|
+
|
15
|
+
def release
|
16
|
+
@running = false
|
17
|
+
@thread.join
|
18
|
+
@context.release
|
19
|
+
end
|
20
|
+
|
21
|
+
def change_state(_name, _params={}, _wait=nil)
|
22
|
+
@lock.synchronize {
|
23
|
+
raise StillWorkingError.new if @working
|
24
|
+
@next_state_name = _name
|
25
|
+
@next_state_params = _params
|
26
|
+
@working = true
|
27
|
+
|
28
|
+
wait_and_load_struct _wait
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def wait_for_state(_wait=nil)
|
33
|
+
@lock.synchronize {
|
34
|
+
wait_and_load_struct _wait
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
def cancel
|
39
|
+
@lock.synchronize {
|
40
|
+
if @working
|
41
|
+
@thread.terminate
|
42
|
+
@thread.join
|
43
|
+
@thread = Thread.new { crawl_loop }
|
44
|
+
@working = false
|
45
|
+
end
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def wait_and_load_struct(_wait)
|
52
|
+
# need to use this method because mutex are not reentrant and monitors are slow.
|
53
|
+
wait_while_working _wait unless _wait.nil?
|
54
|
+
raise TimeoutError.new if @working
|
55
|
+
state_as_struct
|
56
|
+
end
|
57
|
+
|
58
|
+
def wait_while_working(_wait)
|
59
|
+
# TODO: use condition variables instead of wait loops
|
60
|
+
start = Time.now
|
61
|
+
while @working and Time.now - start < _wait.seconds
|
62
|
+
@lock.sleep 0.25
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def state_as_struct
|
67
|
+
raise CrawlerError.new @error if @error
|
68
|
+
|
69
|
+
OpenStruct.new({
|
70
|
+
name: @state_name,
|
71
|
+
params: @state_params,
|
72
|
+
doc: @doc
|
73
|
+
})
|
74
|
+
end
|
75
|
+
|
76
|
+
def crawl_loop
|
77
|
+
while @running
|
78
|
+
if @working
|
79
|
+
begin
|
80
|
+
last_state = @context.run_state @next_state_name, @next_state_params
|
81
|
+
@doc = last_state.output.attributes!
|
82
|
+
@error = nil
|
83
|
+
rescue Exception => e
|
84
|
+
@doc = nil
|
85
|
+
@error = e
|
86
|
+
end
|
87
|
+
|
88
|
+
@state_name = @next_state_name
|
89
|
+
@state_params = @next_state_params
|
90
|
+
@working = false
|
91
|
+
else sleep 0.2 end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
|
3
|
+
class Error < StandardError; end
|
4
|
+
|
5
|
+
class ConfigurationError < Error; end
|
6
|
+
|
7
|
+
class EntityNotFoundError < Error
|
8
|
+
attr_accessor :role, :name
|
9
|
+
|
10
|
+
def initialize(_role, _name)
|
11
|
+
super("The required #{_role} was not found (#{_name})")
|
12
|
+
@role = _role
|
13
|
+
@name = _name
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class ApiError < Error
|
18
|
+
def code; 500 end
|
19
|
+
def to_json; {} end
|
20
|
+
end
|
21
|
+
|
22
|
+
class StillWorkingError < ApiError
|
23
|
+
def code; 409 end
|
24
|
+
end
|
25
|
+
|
26
|
+
class TimeoutError < ApiError
|
27
|
+
def code; 408 end
|
28
|
+
end
|
29
|
+
|
30
|
+
class CrawlerBaseError < ApiError
|
31
|
+
def initialize(_msg, _trace)
|
32
|
+
@exc = _msg
|
33
|
+
@trace = _trace
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_json
|
37
|
+
{
|
38
|
+
exception: @exc,
|
39
|
+
backtrace: @trace
|
40
|
+
}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class CrawlerError < CrawlerBaseError
|
45
|
+
def initialize(_exc)
|
46
|
+
super _exc.to_s, _exc.backtrace
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
|
3
|
+
module Crabfarm
|
4
|
+
|
5
|
+
class Loader
|
6
|
+
|
7
|
+
attr_reader :module
|
8
|
+
|
9
|
+
def initialize(_base_path, _module_name=nil, &_config_block)
|
10
|
+
@path = _base_path
|
11
|
+
@name = _module_name
|
12
|
+
@config_block = _config_block
|
13
|
+
@module = if is_wrapped? then
|
14
|
+
"::#{@name}".constantize rescue nil
|
15
|
+
else nil end
|
16
|
+
end
|
17
|
+
|
18
|
+
def is_wrapped?
|
19
|
+
@name.present?
|
20
|
+
end
|
21
|
+
|
22
|
+
def is_loaded?
|
23
|
+
not @module.nil?
|
24
|
+
end
|
25
|
+
|
26
|
+
def load
|
27
|
+
crabfile = load_crabfile(@path)
|
28
|
+
@module = load_module(@name, File.join(@path, 'app'))
|
29
|
+
@module.send(:remove_const, :CF_CONFIG) rescue nil
|
30
|
+
@module.const_set :CF_CONFIG, crabfile
|
31
|
+
end
|
32
|
+
|
33
|
+
def load_context
|
34
|
+
load unless is_loaded?
|
35
|
+
Context.new @module
|
36
|
+
end
|
37
|
+
|
38
|
+
def unload
|
39
|
+
Object.send(:remove_const, @name) if is_wrapped?
|
40
|
+
@module = nil
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def load_crabfile(_path)
|
46
|
+
crabfile = File.read(File.join(_path, 'Crabfile'))
|
47
|
+
config = Configuration.new
|
48
|
+
config.instance_eval crabfile
|
49
|
+
@config_block.call(config) unless @config_block.nil?
|
50
|
+
return config
|
51
|
+
end
|
52
|
+
|
53
|
+
def load_module(_name, _path)
|
54
|
+
require_all_as(_name, _path)
|
55
|
+
if is_wrapped? then "::#{_name}".constantize else Object end
|
56
|
+
end
|
57
|
+
|
58
|
+
def require_all_as(_name, _src_path)
|
59
|
+
loader_code = "
|
60
|
+
pending = Dir.glob('#{File.join(_src_path, '**/*')}').select { |f| f.end_with? '.rb' }.map { |f| f[0...-3] }
|
61
|
+
|
62
|
+
while pending.size > 0
|
63
|
+
new_pending = []
|
64
|
+
pending.each do |file|
|
65
|
+
begin
|
66
|
+
require file
|
67
|
+
rescue NameError => e
|
68
|
+
new_pending << file
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
require new_pending.first if new_pending.size == pending.size
|
73
|
+
pending = new_pending
|
74
|
+
end
|
75
|
+
"
|
76
|
+
|
77
|
+
loader_code = "module ::#{_name}; #{loader_code}; end" if _name.present?
|
78
|
+
Object.instance_eval loader_code
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|