crabfarm 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/bin/crabfarm +9 -0
  3. data/lib/crabfarm.rb +24 -0
  4. data/lib/crabfarm/adapters.rb +23 -0
  5. data/lib/crabfarm/adapters/capybara_adapter.rb +7 -0
  6. data/lib/crabfarm/adapters/surfer_adapter.rb +7 -0
  7. data/lib/crabfarm/adapters/watir_adapter.rb +7 -0
  8. data/lib/crabfarm/base_parser.rb +26 -0
  9. data/lib/crabfarm/base_state.rb +41 -0
  10. data/lib/crabfarm/cli.rb +79 -0
  11. data/lib/crabfarm/configuration.rb +83 -0
  12. data/lib/crabfarm/context.rb +32 -0
  13. data/lib/crabfarm/default_driver_factory.rb +37 -0
  14. data/lib/crabfarm/driver_bucket.rb +50 -0
  15. data/lib/crabfarm/driver_bucket_pool.rb +48 -0
  16. data/lib/crabfarm/dsl/surfer.rb +22 -0
  17. data/lib/crabfarm/dsl/surfer/search_context.rb +134 -0
  18. data/lib/crabfarm/dsl/surfer/surf_context.rb +58 -0
  19. data/lib/crabfarm/engines/safe_state_loop.rb +96 -0
  20. data/lib/crabfarm/errors.rb +50 -0
  21. data/lib/crabfarm/loader.rb +83 -0
  22. data/lib/crabfarm/modes/console.rb +86 -0
  23. data/lib/crabfarm/modes/generator.rb +120 -0
  24. data/lib/crabfarm/modes/server.rb +78 -0
  25. data/lib/crabfarm/module_helper.rb +35 -0
  26. data/lib/crabfarm/phantom_driver_factory.rb +33 -0
  27. data/lib/crabfarm/phantom_runner.rb +74 -0
  28. data/lib/crabfarm/rspec.rb +39 -0
  29. data/lib/crabfarm/state_store.rb +24 -0
  30. data/lib/crabfarm/support/custom_puma.rb +64 -0
  31. data/lib/crabfarm/templates/Crabfile.erb +3 -0
  32. data/lib/crabfarm/templates/Gemfile.erb +7 -0
  33. data/lib/crabfarm/templates/boot.rb.erb +13 -0
  34. data/lib/crabfarm/templates/crabfarm_bin.erb +3 -0
  35. data/lib/crabfarm/templates/dot_gitignore.erb +1 -0
  36. data/lib/crabfarm/templates/dot_gitkeep.erb +0 -0
  37. data/lib/crabfarm/templates/dot_rspec.erb +4 -0
  38. data/lib/crabfarm/templates/parser.rb.erb +8 -0
  39. data/lib/crabfarm/templates/parser_spec.rb.erb +7 -0
  40. data/lib/crabfarm/templates/spec_helper.rb.erb +22 -0
  41. data/lib/crabfarm/templates/state.rb.erb +8 -0
  42. data/lib/crabfarm/templates/state_spec.rb.erb +7 -0
  43. data/lib/crabfarm/version.rb +3 -0
  44. metadata +359 -0
@@ -0,0 +1,86 @@
1
+ require 'readline'
2
+ require 'rainbow'
3
+ require 'rainbow/ext/string'
4
+ require 'json'
5
+
6
+ module Crabfarm
7
+ module Modes
8
+ class Console
9
+
10
+ class ConsoleDsl
11
+
12
+ attr_reader :context
13
+
14
+ def initialize(_loader)
15
+ @loader = _loader
16
+ reload!
17
+ end
18
+
19
+ def reload!
20
+ unless @context.nil?
21
+ puts "Reloading crawler source".color(:green)
22
+ @context.release
23
+ @loader.unload
24
+ end
25
+
26
+ @context = @loader.load_context
27
+ end
28
+
29
+ def transition(_name=nil, _params={})
30
+ if _name.nil?
31
+ puts "Must provide a state name".color(:red)
32
+ return
33
+ end
34
+
35
+ begin
36
+ state = @context.run_state _name, _params
37
+ puts JSON.pretty_generate(state.output.attributes!).color(:green)
38
+ rescue EntityNotFoundError => e
39
+ puts "#{e.to_s}".color(:red)
40
+ rescue => e
41
+ puts "#{e.to_s}".color(:red)
42
+ puts e.backtrace
43
+ end
44
+ end
45
+
46
+ def help
47
+ puts "Ejem..."
48
+ end
49
+
50
+ def reset
51
+ puts "Resetting crawling context".color(:green)
52
+ @context.reset
53
+ end
54
+
55
+ alias :t :transition
56
+ alias :r :reset
57
+ end
58
+
59
+ def self.console_loop
60
+
61
+ if defined? CF_LOADER
62
+ # TODO: generated app should load itself
63
+ dsl = ConsoleDsl.new(CF_LOADER)
64
+
65
+ loop do
66
+ begin
67
+ dsl.instance_eval Readline.readline("> ", true)
68
+ rescue SyntaxError => se
69
+ puts "Syntax error: #{se.message}".color(:red)
70
+ rescue SystemExit, Interrupt
71
+ break
72
+ rescue => e
73
+ puts "Unknown command".color(:red)
74
+ end
75
+ end
76
+
77
+ puts "Releasing crawling context".color(:green)
78
+ dsl.context.release
79
+ else
80
+ puts "This command can only be run inside a crabfarm application".color(:red)
81
+ end
82
+ end
83
+
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,120 @@
1
+ require 'rainbow'
2
+ require 'rainbow/ext/string'
3
+ require 'active_support'
4
+ require 'erb'
5
+ require 'ostruct'
6
+
7
+ module Crabfarm
8
+ module Modes
9
+ class Generator
10
+
11
+ def generate_app(_name, _target)
12
+ with_external_path _target do
13
+ binding = {
14
+ name: _name,
15
+ version: Crabfarm::VERSION
16
+ }
17
+
18
+ path(_name).ensure
19
+ path(_name, '.gitignore').render('dot_gitignore')
20
+ path(_name, 'Gemfile').render('Gemfile', binding)
21
+ path(_name, 'Crabfile').render('Crabfile', binding)
22
+ path(_name, '.rspec').render('dot_rspec', binding)
23
+ path(_name, 'boot.rb').render('boot.rb', binding)
24
+ path(_name, 'bin', 'crabfarm').render('crabfarm_bin', binding, 0755)
25
+ path(_name, 'app', 'parsers', '.gitkeep').render('dot_gitkeep')
26
+ path(_name, 'app', 'states', '.gitkeep').render('dot_gitkeep')
27
+ path(_name, 'app', 'helpers', '.gitkeep').render('dot_gitkeep')
28
+ path(_name, 'spec', 'spec_helper.rb').render('spec_helper.rb', binding)
29
+ path(_name, 'spec', 'snapshots', '.gitkeep').render('dot_gitkeep')
30
+ end
31
+ end
32
+
33
+ def generate_state(_name)
34
+ with_crawler_path do
35
+ binding = { state_class: _name.camelize }
36
+ path('app', 'states', _name.parameterize + '.rb').render('state.rb', binding)
37
+ path('spec', 'states', _name.parameterize + '_spec.rb').render('state_spec.rb', binding)
38
+ end
39
+ end
40
+
41
+ def generate_parser(_name)
42
+ with_crawler_path do
43
+ binding = { parser_class: _name.camelize }
44
+ path('app', 'parsers', _name.parameterize + '.rb').render('parser.rb', binding)
45
+ path('spec', 'parsers', _name.parameterize + '_spec.rb').render('parser_spec.rb', binding)
46
+ end
47
+ end
48
+
49
+ def with_external_path(_target)
50
+ @base_path = _target
51
+ yield
52
+ end
53
+
54
+ def with_crawler_path
55
+ if defined? CF_PATH
56
+ @base_path = CF_PATH
57
+ yield
58
+ else
59
+ puts "This command can only be run inside a crabfarm application"
60
+ end
61
+ end
62
+
63
+ def path(*_args)
64
+ @path = _args
65
+ self
66
+ end
67
+
68
+ def ensure
69
+ generate_dir([@base_path] + @path, false)
70
+ self
71
+ end
72
+
73
+ def render(_template, _binding={}, _mod=nil)
74
+ path = [@base_path] + @path
75
+ generate_dir(path[0..-2], true)
76
+ render_template(_template, _binding, path, _mod)
77
+ self
78
+ end
79
+
80
+
81
+
82
+ private
83
+
84
+ def generate_dir(_path, _silent)
85
+ path = File.join(*_path)
86
+ dir = Pathname.new path
87
+ unless dir.exist?
88
+ puts "Generating #{path}".color(:green)
89
+ dir.mkpath
90
+ else
91
+ puts "Skipping #{path}".color(:yellow) unless _silent
92
+ end
93
+ end
94
+
95
+ def render_template(_template, _binding, _path, _mod)
96
+ template = File.join(template_dir, _template) + '.erb'
97
+ output = File.join(*_path)
98
+
99
+ unless Pathname.new(output).exist?
100
+ puts "Rendering #{output}".color(:green)
101
+ File.open(output, "w") do |f|
102
+ f.write eval_template_with_hash(template, _binding)
103
+ f.chmod(_mod) unless _mod.nil?
104
+ end
105
+ else
106
+ puts "Skipping #{output}, already exists".color(:yellow)
107
+ end
108
+ end
109
+
110
+ def eval_template_with_hash(_path, _hash)
111
+ erb = ERB.new(File.read _path)
112
+ erb.result(OpenStruct.new(_hash).instance_eval { binding })
113
+ end
114
+
115
+ def template_dir
116
+ File.expand_path('../../templates', __FILE__)
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,78 @@
1
+ require 'grape'
2
+ require 'crabfarm/support/custom_puma'
3
+ require 'crabfarm/engines/safe_state_loop'
4
+
5
+ module Crabfarm
6
+ module Modes
7
+ class Server
8
+
9
+ class API < Grape::API
10
+
11
+ MAX_WAIT = 60.0 * 5
12
+
13
+ format :json
14
+ prefix :api
15
+
16
+ rescue_from Grape::Exceptions::ValidationErrors do |e|
17
+ rack_response({ errors: e.as_json }.to_json, 400)
18
+ end
19
+
20
+ rescue_from Crabfarm::ApiError do |e|
21
+ rack_response(e.to_json.to_json, e.code)
22
+ end
23
+
24
+ helpers do
25
+ def evaluator
26
+ Server.evaluator
27
+ end
28
+
29
+ def wait
30
+ if params.has_key? :wait
31
+ [params[:wait].to_f, MAX_WAIT].min
32
+ else MAX_WAIT end
33
+ end
34
+
35
+ def print_state(_state)
36
+ {
37
+ name: _state.name,
38
+ params: _state.params,
39
+ doc: _state.doc
40
+ }
41
+ end
42
+ end
43
+
44
+ desc "Return the current crawler status."
45
+ params do
46
+ optional :wait, type: Float
47
+ end
48
+ get :state do
49
+ print_state evaluator.wait_for_state wait
50
+ end
51
+
52
+ desc "Change the crawler state"
53
+ params do
54
+ requires :name, type: String, desc: "Crawler state name"
55
+ optional :wait, type: Float
56
+ end
57
+ put :state do
58
+ print_state evaluator.change_state params[:name], params[:params], wait
59
+ end
60
+ end
61
+
62
+ def self.evaluator
63
+ @@evaluator
64
+ end
65
+
66
+ def self.start(_options)
67
+ @@evaluator = Engines::SafeStateLoop.new CF_LOADER
68
+ begin
69
+ Support::CustomPuma.run API, _options
70
+ ensure
71
+ @@evaluator.release
72
+ end
73
+ end
74
+
75
+ end
76
+ end
77
+ end
78
+
@@ -0,0 +1,35 @@
1
+ require 'active_support'
2
+
3
+ module Crabfarm
4
+ class ModuleHelper
5
+
6
+ attr_reader :dsl
7
+
8
+ def initialize(_module)
9
+ @module = _module
10
+ end
11
+
12
+ def settings
13
+ @module::CF_CONFIG
14
+ end
15
+
16
+ def load_state(_name)
17
+ load_entity _name, 'state', BaseState
18
+ end
19
+
20
+ def load_parser(_name)
21
+ load_entity _name, 'parser', BaseParser
22
+ end
23
+
24
+ private
25
+
26
+ def load_entity(_name, _role, _type)
27
+ name = _name.to_s.gsub(/[^A-Z0-9:]+/i, '_').camelize
28
+ mod = @module.const_get(name) rescue nil
29
+ raise EntityNotFoundError.new _role, name if mod.nil?
30
+ raise EntityNotFoundError.new _role, name unless mod < _type
31
+ mod
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,33 @@
1
+ module Crabfarm
2
+ class PhantomDriverFactory
3
+
4
+ def initialize(_phantom, _config={})
5
+ @phantom = _phantom
6
+ @config = _config
7
+ end
8
+
9
+ def build_driver(_session_id)
10
+
11
+ # setup a custom client to use longer timeouts
12
+ client = Selenium::WebDriver::Remote::Http::Default.new
13
+ client.timeout = @config[:remote_timeout]
14
+
15
+ driver = Selenium::WebDriver.for :remote, {
16
+ :url => phantom_url,
17
+ :http_client => client,
18
+ :desired_capabilities => @config[:capabilities]
19
+ }
20
+
21
+ driver.send(:bridge).setWindowSize(@config[:window_width], @config[:window_height])
22
+
23
+ return driver
24
+ end
25
+
26
+ private
27
+
28
+ def phantom_url
29
+ "http://localhost:#{@phantom.port}"
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,74 @@
1
+ require 'net/http'
2
+
3
+ module Crabfarm
4
+ class PhantomRunner
5
+
6
+ attr_reader :port
7
+
8
+ def initialize(_config={})
9
+ @config = _config;
10
+ @pid = nil
11
+ end
12
+
13
+ def start
14
+ find_available_port
15
+ @pid = Process.spawn({}, phantomjs_cmd)
16
+ wait_for_server
17
+ end
18
+
19
+ def stop
20
+ unless @pid.nil?
21
+ Process.kill("TERM", @pid)
22
+ Process.wait @pid
23
+ @pid = nil
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def phantomjs_cmd
30
+ cmd = [@config[:bin_path]]
31
+ cmd << '--load-images=false' unless @config[:load_images]
32
+ cmd << "--proxy=#{@config[:proxy]}" unless @config[:proxy].nil?
33
+ cmd << "--webdriver=#{@port}"
34
+ cmd << "--ssl-protocol=#{@config[:ssl]}" unless @config[:ssl].nil?
35
+ cmd << "--ignore-ssl-errors=true"
36
+ cmd << "--webdriver-loglevel=NONE" # TODO: remove when log path is choosen
37
+ # cmd << "--webdriver-logfile=/path/to/log/phantom.log"
38
+ cmd.join(' ')
39
+ end
40
+
41
+ def find_available_port
42
+ with_lock do
43
+ server = TCPServer.new('127.0.0.1', 0)
44
+ @port = server.addr[1]
45
+ server.close
46
+ end
47
+ end
48
+
49
+ def wait_for_server
50
+ loop do
51
+ begin
52
+ # TODO: generate a valid request to prevent warnings
53
+ Net::HTTP.get_response(URI.parse("http://127.0.0.1:#{@port}"))
54
+ break
55
+ rescue
56
+ end
57
+ end
58
+ end
59
+
60
+ def with_lock
61
+ return yield if @config[:lock_file].nil?
62
+
63
+ File.open(@config[:lock_file], 'a+') do |file|
64
+ begin
65
+ file.flock File::LOCK_EX
66
+ return yield
67
+ ensure
68
+ file.flock File::LOCK_UN
69
+ end
70
+ end
71
+ end
72
+
73
+ end
74
+ end
@@ -0,0 +1,39 @@
1
+ CF_LOADER.load
2
+
3
+ CF_TEST_CONTEXT = CF_LOADER.load_context
4
+ CF_TEST_BUCKET = CF_TEST_CONTEXT.driver
5
+
6
+ module Crabfarm
7
+ module RSpec
8
+
9
+ def parse(_snap_or_url, _options={})
10
+ fixture = Pathname.new(File.join(ENV['SNAPSHOT_DIR'], _snap_or_url))
11
+ if fixture.exist?
12
+ CF_TEST_BUCKET.get("file://#{fixture.realpath}")
13
+ else
14
+ CF_TEST_BUCKET.get(_snap_or_url)
15
+ end
16
+
17
+ CF_TEST_BUCKET.parse(described_class, _options)
18
+ end
19
+
20
+ def parser
21
+ @parser
22
+ end
23
+
24
+ end
25
+ end
26
+
27
+ RSpec.configure do |config|
28
+ config.include Crabfarm::RSpec
29
+
30
+ config.before(:example) do |example|
31
+ if example.metadata[:parsing]
32
+ @parser = parse example.metadata[:parsing], example.metadata[:using] || {}
33
+ end
34
+ end
35
+
36
+ config.after(:suite) do
37
+ CF_TEST_CONTEXT.release
38
+ end
39
+ end