crabfarm 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/bin/crabfarm +9 -0
  3. data/lib/crabfarm.rb +24 -0
  4. data/lib/crabfarm/adapters.rb +23 -0
  5. data/lib/crabfarm/adapters/capybara_adapter.rb +7 -0
  6. data/lib/crabfarm/adapters/surfer_adapter.rb +7 -0
  7. data/lib/crabfarm/adapters/watir_adapter.rb +7 -0
  8. data/lib/crabfarm/base_parser.rb +26 -0
  9. data/lib/crabfarm/base_state.rb +41 -0
  10. data/lib/crabfarm/cli.rb +79 -0
  11. data/lib/crabfarm/configuration.rb +83 -0
  12. data/lib/crabfarm/context.rb +32 -0
  13. data/lib/crabfarm/default_driver_factory.rb +37 -0
  14. data/lib/crabfarm/driver_bucket.rb +50 -0
  15. data/lib/crabfarm/driver_bucket_pool.rb +48 -0
  16. data/lib/crabfarm/dsl/surfer.rb +22 -0
  17. data/lib/crabfarm/dsl/surfer/search_context.rb +134 -0
  18. data/lib/crabfarm/dsl/surfer/surf_context.rb +58 -0
  19. data/lib/crabfarm/engines/safe_state_loop.rb +96 -0
  20. data/lib/crabfarm/errors.rb +50 -0
  21. data/lib/crabfarm/loader.rb +83 -0
  22. data/lib/crabfarm/modes/console.rb +86 -0
  23. data/lib/crabfarm/modes/generator.rb +120 -0
  24. data/lib/crabfarm/modes/server.rb +78 -0
  25. data/lib/crabfarm/module_helper.rb +35 -0
  26. data/lib/crabfarm/phantom_driver_factory.rb +33 -0
  27. data/lib/crabfarm/phantom_runner.rb +74 -0
  28. data/lib/crabfarm/rspec.rb +39 -0
  29. data/lib/crabfarm/state_store.rb +24 -0
  30. data/lib/crabfarm/support/custom_puma.rb +64 -0
  31. data/lib/crabfarm/templates/Crabfile.erb +3 -0
  32. data/lib/crabfarm/templates/Gemfile.erb +7 -0
  33. data/lib/crabfarm/templates/boot.rb.erb +13 -0
  34. data/lib/crabfarm/templates/crabfarm_bin.erb +3 -0
  35. data/lib/crabfarm/templates/dot_gitignore.erb +1 -0
  36. data/lib/crabfarm/templates/dot_gitkeep.erb +0 -0
  37. data/lib/crabfarm/templates/dot_rspec.erb +4 -0
  38. data/lib/crabfarm/templates/parser.rb.erb +8 -0
  39. data/lib/crabfarm/templates/parser_spec.rb.erb +7 -0
  40. data/lib/crabfarm/templates/spec_helper.rb.erb +22 -0
  41. data/lib/crabfarm/templates/state.rb.erb +8 -0
  42. data/lib/crabfarm/templates/state_spec.rb.erb +7 -0
  43. data/lib/crabfarm/version.rb +3 -0
  44. metadata +359 -0
@@ -0,0 +1,48 @@
1
+ module Crabfarm
2
+ class DriverBucketPool
3
+
4
+ def initialize(_module)
5
+ @module = _module
6
+ @buckets = Hash.new
7
+ @phantom = nil
8
+
9
+ init_phantom_if_required
10
+ end
11
+
12
+ def driver(_session_id=nil)
13
+ _session_id ||= :default_driver
14
+ bucket = @buckets[_session_id.to_sym]
15
+ bucket = @buckets[_session_id.to_sym] = DriverBucket.new(@module, _session_id, build_driver_factory) if bucket.nil?
16
+ bucket
17
+ end
18
+
19
+ def reset
20
+ @buckets.values.each(&:reset)
21
+ @buckets = Hash.new
22
+ end
23
+
24
+ def release
25
+ reset
26
+ @phantom.stop unless @phantom.nil?
27
+ end
28
+
29
+ private
30
+
31
+ def init_phantom_if_required
32
+ if @module.settings.phantom_enabled?
33
+ @phantom = PhantomRunner.new @module.settings.phantom_config
34
+ @phantom.start
35
+ end
36
+ end
37
+
38
+ def build_driver_factory
39
+ if @module.settings.phantom_enabled?
40
+ PhantomDriverFactory.new @phantom, @module.settings.driver_config
41
+ else
42
+ return @module.settings.driver_factory if @module.settings.driver_factory
43
+ DefaultDriverFactory.new @module.settings.driver_config
44
+ end
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,22 @@
1
+ require 'crabfarm/dsl/surfer/search_context'
2
+ require 'crabfarm/dsl/surfer/surf_context'
3
+
4
+ module Crabfarm
5
+ module Dsl
6
+ module Surfer
7
+
8
+ class Error < StandardError
9
+ attr_reader :source
10
+
11
+ def initialize(_message, _ctx)
12
+ super _message
13
+ @ctx = _ctx
14
+ @source = _ctx.root_context.page_source rescue nil # cache page source for future reference
15
+ end
16
+ end
17
+
18
+ class EmptySetError < Error; end
19
+ class WebdriverError < Error; end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,134 @@
1
+ module Crabfarm
2
+ module Dsl
3
+ module Surfer
4
+ class SearchContext
5
+ include Enumerable
6
+ extend Forwardable
7
+
8
+ TIMEOUT = 10.0 # Default timeout for waiting operations
9
+
10
+ def initialize(_elements, _parent)
11
+ @elements = _elements
12
+ @parent = _parent
13
+ end
14
+
15
+ # return the context's root context
16
+ def root_context
17
+ return @parent.root_context if @parent
18
+ self
19
+ end
20
+
21
+ # return the context's parent context
22
+ def parent_context
23
+ @parent
24
+ end
25
+
26
+ # forward read-only array methods to context
27
+ def_delegators :context, :each, :[], :length, :count, :empty?, :first, :last
28
+
29
+ # yield individual SearchContext for each element contained in this result
30
+ def explode(&_block)
31
+ return enum_for(__method__) if _block.nil?
32
+ context.each do |el|
33
+ _block.call SearchContext.new([el], self)
34
+ end
35
+ end
36
+
37
+ # searches for elements that match a given selector
38
+ def search(_selector=nil, _options={})
39
+ _options[:css] = _selector if _selector
40
+
41
+ wait_mode = _options.delete :wait
42
+ if wait_mode
43
+
44
+ # retrieve timeout
45
+ timeout = _options.delete :timeout
46
+ timeout = TIMEOUT if timeout.nil?
47
+
48
+ # use a selenium timeout
49
+ wait = Selenium::WebDriver::Wait.new(timeout: timeout)
50
+ wait.until do
51
+ new_elements = search_elements _options
52
+
53
+ # test wait condition
54
+ ok = case wait_mode
55
+ when :present then (new_elements.length > 0)
56
+ when :visible then (new_elements.length > 0 and new_elements.first.displayed?)
57
+ when :enabled then (new_elements.length > 0 and new_elements.first.displayed? and new_elements.first.enabled?)
58
+ when :not_present then (new_elements.length == 0)
59
+ when :not_visible then (not new_elements.any? { |e| e.displayed? })
60
+ else
61
+ raise SetupError.new "Invalid wait mode '#{wait_mode}'"
62
+ end
63
+
64
+ SearchContext.new new_elements, self if ok
65
+ end
66
+ else
67
+ SearchContext.new search_elements(_options), self
68
+ end
69
+ end
70
+
71
+ # clears and sends_keys to this context main element
72
+ def fill(_value)
73
+ raise EmptySetError.new('Cannot call \'fill\' on an empty set', self) if empty?
74
+ wrap_errors do
75
+ context.first.clear
76
+ context.first.send_keys _value
77
+ end
78
+ end
79
+
80
+ # Any methods missing are forwarded to the main element (first).
81
+ def method_missing(_method, *_args, &_block)
82
+ wrap_errors do
83
+ m = /^(.*)_all$/.match _method.to_s
84
+ if m then
85
+ return [] if empty?
86
+ context.map { |e| e.send(m[1], *_args, &_block) }
87
+ else
88
+ raise EmptySetError.new("Cannot call '#{_method}' on an empty set", self) if empty?
89
+ context.first.send(_method, *_args, &_block)
90
+ end
91
+ end
92
+ end
93
+
94
+ def respond_to?(_method, _include_all=false)
95
+ return true if super
96
+ m = /^.*_all$/.match _method.to_s
97
+ if m then
98
+ return true if empty?
99
+ context.first.respond_to? m[1], _include_all
100
+ else
101
+ return true if empty?
102
+ context.first.respond_to? _method, _include_all
103
+ end
104
+ end
105
+
106
+ private
107
+
108
+ # wrap every selenium errors that happen inside block.
109
+ def wrap_errors
110
+ begin
111
+ yield
112
+ rescue Selenium::WebDriver::Error::WebDriverError => e
113
+ raise WebdriverError.new e, self
114
+ end
115
+ end
116
+
117
+ # base filtering method, expands current context
118
+ def search_elements(_options)
119
+ wrap_errors do
120
+ context.inject([]) do |r, element|
121
+ r + element.find_elements(_options)
122
+ end
123
+ end
124
+ end
125
+
126
+ # returns the current context
127
+ def context
128
+ @elements
129
+ end
130
+
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,58 @@
1
+ module Crabfarm
2
+ module Dsl
3
+ module Surfer
4
+ class SurfContext < SearchContext
5
+ extend Forwardable
6
+
7
+ def_delegators :@bucket, :parse, :setup
8
+ def_delegators 'driver.navigate', :back, :forward, :refresh
9
+
10
+ def initialize(_bucket)
11
+ super nil, nil
12
+ @bucket = _bucket
13
+ end
14
+
15
+ def driver
16
+ @bucket.original
17
+ end
18
+
19
+ def quit
20
+ @bucket.reset
21
+ end
22
+
23
+ def current_uri
24
+ URI.parse driver.current_url
25
+ end
26
+
27
+ def cookies
28
+ driver.manage.all_cookies
29
+ end
30
+
31
+ def goto(_url, _params=nil)
32
+ _url += "?#{_params.to_query}" if _params
33
+ retries = 0
34
+
35
+ loop do
36
+ begin
37
+ @bucket.reset if retries > 0
38
+ driver.get(_url)
39
+ break
40
+ rescue Timeout::Error #, Selenium::WebDriver::Error::UnknownError
41
+ # TODO: log this
42
+ raise if retries >= max_retries
43
+ retries += 1
44
+ sleep 1.0
45
+ end
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ def context
52
+ [driver]
53
+ end
54
+
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,96 @@
1
+ require 'ostruct'
2
+
3
+ module Crabfarm
4
+ module Engines
5
+ class SafeStateLoop
6
+
7
+ def initialize(_loader)
8
+ @context = _loader.load_context
9
+ @running = true
10
+ @working = false
11
+ @lock = Mutex.new
12
+ @thread = Thread.new { crawl_loop }
13
+ end
14
+
15
+ def release
16
+ @running = false
17
+ @thread.join
18
+ @context.release
19
+ end
20
+
21
+ def change_state(_name, _params={}, _wait=nil)
22
+ @lock.synchronize {
23
+ raise StillWorkingError.new if @working
24
+ @next_state_name = _name
25
+ @next_state_params = _params
26
+ @working = true
27
+
28
+ wait_and_load_struct _wait
29
+ }
30
+ end
31
+
32
+ def wait_for_state(_wait=nil)
33
+ @lock.synchronize {
34
+ wait_and_load_struct _wait
35
+ }
36
+ end
37
+
38
+ def cancel
39
+ @lock.synchronize {
40
+ if @working
41
+ @thread.terminate
42
+ @thread.join
43
+ @thread = Thread.new { crawl_loop }
44
+ @working = false
45
+ end
46
+ }
47
+ end
48
+
49
+ private
50
+
51
+ def wait_and_load_struct(_wait)
52
+ # need to use this method because mutex are not reentrant and monitors are slow.
53
+ wait_while_working _wait unless _wait.nil?
54
+ raise TimeoutError.new if @working
55
+ state_as_struct
56
+ end
57
+
58
+ def wait_while_working(_wait)
59
+ # TODO: use condition variables instead of wait loops
60
+ start = Time.now
61
+ while @working and Time.now - start < _wait.seconds
62
+ @lock.sleep 0.25
63
+ end
64
+ end
65
+
66
+ def state_as_struct
67
+ raise CrawlerError.new @error if @error
68
+
69
+ OpenStruct.new({
70
+ name: @state_name,
71
+ params: @state_params,
72
+ doc: @doc
73
+ })
74
+ end
75
+
76
+ def crawl_loop
77
+ while @running
78
+ if @working
79
+ begin
80
+ last_state = @context.run_state @next_state_name, @next_state_params
81
+ @doc = last_state.output.attributes!
82
+ @error = nil
83
+ rescue Exception => e
84
+ @doc = nil
85
+ @error = e
86
+ end
87
+
88
+ @state_name = @next_state_name
89
+ @state_params = @next_state_params
90
+ @working = false
91
+ else sleep 0.2 end
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,50 @@
1
+ module Crabfarm
2
+
3
+ class Error < StandardError; end
4
+
5
+ class ConfigurationError < Error; end
6
+
7
+ class EntityNotFoundError < Error
8
+ attr_accessor :role, :name
9
+
10
+ def initialize(_role, _name)
11
+ super("The required #{_role} was not found (#{_name})")
12
+ @role = _role
13
+ @name = _name
14
+ end
15
+ end
16
+
17
+ class ApiError < Error
18
+ def code; 500 end
19
+ def to_json; {} end
20
+ end
21
+
22
+ class StillWorkingError < ApiError
23
+ def code; 409 end
24
+ end
25
+
26
+ class TimeoutError < ApiError
27
+ def code; 408 end
28
+ end
29
+
30
+ class CrawlerBaseError < ApiError
31
+ def initialize(_msg, _trace)
32
+ @exc = _msg
33
+ @trace = _trace
34
+ end
35
+
36
+ def to_json
37
+ {
38
+ exception: @exc,
39
+ backtrace: @trace
40
+ }
41
+ end
42
+ end
43
+
44
+ class CrawlerError < CrawlerBaseError
45
+ def initialize(_exc)
46
+ super _exc.to_s, _exc.backtrace
47
+ end
48
+ end
49
+
50
+ end
@@ -0,0 +1,83 @@
1
+ require 'active_support'
2
+
3
+ module Crabfarm
4
+
5
+ class Loader
6
+
7
+ attr_reader :module
8
+
9
+ def initialize(_base_path, _module_name=nil, &_config_block)
10
+ @path = _base_path
11
+ @name = _module_name
12
+ @config_block = _config_block
13
+ @module = if is_wrapped? then
14
+ "::#{@name}".constantize rescue nil
15
+ else nil end
16
+ end
17
+
18
+ def is_wrapped?
19
+ @name.present?
20
+ end
21
+
22
+ def is_loaded?
23
+ not @module.nil?
24
+ end
25
+
26
+ def load
27
+ crabfile = load_crabfile(@path)
28
+ @module = load_module(@name, File.join(@path, 'app'))
29
+ @module.send(:remove_const, :CF_CONFIG) rescue nil
30
+ @module.const_set :CF_CONFIG, crabfile
31
+ end
32
+
33
+ def load_context
34
+ load unless is_loaded?
35
+ Context.new @module
36
+ end
37
+
38
+ def unload
39
+ Object.send(:remove_const, @name) if is_wrapped?
40
+ @module = nil
41
+ end
42
+
43
+ private
44
+
45
+ def load_crabfile(_path)
46
+ crabfile = File.read(File.join(_path, 'Crabfile'))
47
+ config = Configuration.new
48
+ config.instance_eval crabfile
49
+ @config_block.call(config) unless @config_block.nil?
50
+ return config
51
+ end
52
+
53
+ def load_module(_name, _path)
54
+ require_all_as(_name, _path)
55
+ if is_wrapped? then "::#{_name}".constantize else Object end
56
+ end
57
+
58
+ def require_all_as(_name, _src_path)
59
+ loader_code = "
60
+ pending = Dir.glob('#{File.join(_src_path, '**/*')}').select { |f| f.end_with? '.rb' }.map { |f| f[0...-3] }
61
+
62
+ while pending.size > 0
63
+ new_pending = []
64
+ pending.each do |file|
65
+ begin
66
+ require file
67
+ rescue NameError => e
68
+ new_pending << file
69
+ end
70
+ end
71
+
72
+ require new_pending.first if new_pending.size == pending.size
73
+ pending = new_pending
74
+ end
75
+ "
76
+
77
+ loader_code = "module ::#{_name}; #{loader_code}; end" if _name.present?
78
+ Object.instance_eval loader_code
79
+ end
80
+
81
+ end
82
+
83
+ end