crabfarm 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/bin/crabfarm +9 -0
  3. data/lib/crabfarm.rb +24 -0
  4. data/lib/crabfarm/adapters.rb +23 -0
  5. data/lib/crabfarm/adapters/capybara_adapter.rb +7 -0
  6. data/lib/crabfarm/adapters/surfer_adapter.rb +7 -0
  7. data/lib/crabfarm/adapters/watir_adapter.rb +7 -0
  8. data/lib/crabfarm/base_parser.rb +26 -0
  9. data/lib/crabfarm/base_state.rb +41 -0
  10. data/lib/crabfarm/cli.rb +79 -0
  11. data/lib/crabfarm/configuration.rb +83 -0
  12. data/lib/crabfarm/context.rb +32 -0
  13. data/lib/crabfarm/default_driver_factory.rb +37 -0
  14. data/lib/crabfarm/driver_bucket.rb +50 -0
  15. data/lib/crabfarm/driver_bucket_pool.rb +48 -0
  16. data/lib/crabfarm/dsl/surfer.rb +22 -0
  17. data/lib/crabfarm/dsl/surfer/search_context.rb +134 -0
  18. data/lib/crabfarm/dsl/surfer/surf_context.rb +58 -0
  19. data/lib/crabfarm/engines/safe_state_loop.rb +96 -0
  20. data/lib/crabfarm/errors.rb +50 -0
  21. data/lib/crabfarm/loader.rb +83 -0
  22. data/lib/crabfarm/modes/console.rb +86 -0
  23. data/lib/crabfarm/modes/generator.rb +120 -0
  24. data/lib/crabfarm/modes/server.rb +78 -0
  25. data/lib/crabfarm/module_helper.rb +35 -0
  26. data/lib/crabfarm/phantom_driver_factory.rb +33 -0
  27. data/lib/crabfarm/phantom_runner.rb +74 -0
  28. data/lib/crabfarm/rspec.rb +39 -0
  29. data/lib/crabfarm/state_store.rb +24 -0
  30. data/lib/crabfarm/support/custom_puma.rb +64 -0
  31. data/lib/crabfarm/templates/Crabfile.erb +3 -0
  32. data/lib/crabfarm/templates/Gemfile.erb +7 -0
  33. data/lib/crabfarm/templates/boot.rb.erb +13 -0
  34. data/lib/crabfarm/templates/crabfarm_bin.erb +3 -0
  35. data/lib/crabfarm/templates/dot_gitignore.erb +1 -0
  36. data/lib/crabfarm/templates/dot_gitkeep.erb +0 -0
  37. data/lib/crabfarm/templates/dot_rspec.erb +4 -0
  38. data/lib/crabfarm/templates/parser.rb.erb +8 -0
  39. data/lib/crabfarm/templates/parser_spec.rb.erb +7 -0
  40. data/lib/crabfarm/templates/spec_helper.rb.erb +22 -0
  41. data/lib/crabfarm/templates/state.rb.erb +8 -0
  42. data/lib/crabfarm/templates/state_spec.rb.erb +7 -0
  43. data/lib/crabfarm/version.rb +3 -0
  44. metadata +359 -0
@@ -0,0 +1,86 @@
1
+ require 'readline'
2
+ require 'rainbow'
3
+ require 'rainbow/ext/string'
4
+ require 'json'
5
+
6
+ module Crabfarm
7
+ module Modes
8
+ class Console
9
+
10
+ class ConsoleDsl
11
+
12
+ attr_reader :context
13
+
14
+ def initialize(_loader)
15
+ @loader = _loader
16
+ reload!
17
+ end
18
+
19
+ def reload!
20
+ unless @context.nil?
21
+ puts "Reloading crawler source".color(:green)
22
+ @context.release
23
+ @loader.unload
24
+ end
25
+
26
+ @context = @loader.load_context
27
+ end
28
+
29
+ def transition(_name=nil, _params={})
30
+ if _name.nil?
31
+ puts "Must provide a state name".color(:red)
32
+ return
33
+ end
34
+
35
+ begin
36
+ state = @context.run_state _name, _params
37
+ puts JSON.pretty_generate(state.output.attributes!).color(:green)
38
+ rescue EntityNotFoundError => e
39
+ puts "#{e.to_s}".color(:red)
40
+ rescue => e
41
+ puts "#{e.to_s}".color(:red)
42
+ puts e.backtrace
43
+ end
44
+ end
45
+
46
+ def help
47
+ puts "Ejem..."
48
+ end
49
+
50
+ def reset
51
+ puts "Resetting crawling context".color(:green)
52
+ @context.reset
53
+ end
54
+
55
+ alias :t :transition
56
+ alias :r :reset
57
+ end
58
+
59
+ def self.console_loop
60
+
61
+ if defined? CF_LOADER
62
+ # TODO: generated app should load itself
63
+ dsl = ConsoleDsl.new(CF_LOADER)
64
+
65
+ loop do
66
+ begin
67
+ dsl.instance_eval Readline.readline("> ", true)
68
+ rescue SyntaxError => se
69
+ puts "Syntax error: #{se.message}".color(:red)
70
+ rescue SystemExit, Interrupt
71
+ break
72
+ rescue => e
73
+ puts "Unknown command".color(:red)
74
+ end
75
+ end
76
+
77
+ puts "Releasing crawling context".color(:green)
78
+ dsl.context.release
79
+ else
80
+ puts "This command can only be run inside a crabfarm application".color(:red)
81
+ end
82
+ end
83
+
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,120 @@
1
+ require 'rainbow'
2
+ require 'rainbow/ext/string'
3
+ require 'active_support'
4
+ require 'erb'
5
+ require 'ostruct'
6
+
7
+ module Crabfarm
8
+ module Modes
9
+ class Generator
10
+
11
+ def generate_app(_name, _target)
12
+ with_external_path _target do
13
+ binding = {
14
+ name: _name,
15
+ version: Crabfarm::VERSION
16
+ }
17
+
18
+ path(_name).ensure
19
+ path(_name, '.gitignore').render('dot_gitignore')
20
+ path(_name, 'Gemfile').render('Gemfile', binding)
21
+ path(_name, 'Crabfile').render('Crabfile', binding)
22
+ path(_name, '.rspec').render('dot_rspec', binding)
23
+ path(_name, 'boot.rb').render('boot.rb', binding)
24
+ path(_name, 'bin', 'crabfarm').render('crabfarm_bin', binding, 0755)
25
+ path(_name, 'app', 'parsers', '.gitkeep').render('dot_gitkeep')
26
+ path(_name, 'app', 'states', '.gitkeep').render('dot_gitkeep')
27
+ path(_name, 'app', 'helpers', '.gitkeep').render('dot_gitkeep')
28
+ path(_name, 'spec', 'spec_helper.rb').render('spec_helper.rb', binding)
29
+ path(_name, 'spec', 'snapshots', '.gitkeep').render('dot_gitkeep')
30
+ end
31
+ end
32
+
33
+ def generate_state(_name)
34
+ with_crawler_path do
35
+ binding = { state_class: _name.camelize }
36
+ path('app', 'states', _name.parameterize + '.rb').render('state.rb', binding)
37
+ path('spec', 'states', _name.parameterize + '_spec.rb').render('state_spec.rb', binding)
38
+ end
39
+ end
40
+
41
+ def generate_parser(_name)
42
+ with_crawler_path do
43
+ binding = { parser_class: _name.camelize }
44
+ path('app', 'parsers', _name.parameterize + '.rb').render('parser.rb', binding)
45
+ path('spec', 'parsers', _name.parameterize + '_spec.rb').render('parser_spec.rb', binding)
46
+ end
47
+ end
48
+
49
+ def with_external_path(_target)
50
+ @base_path = _target
51
+ yield
52
+ end
53
+
54
+ def with_crawler_path
55
+ if defined? CF_PATH
56
+ @base_path = CF_PATH
57
+ yield
58
+ else
59
+ puts "This command can only be run inside a crabfarm application"
60
+ end
61
+ end
62
+
63
+ def path(*_args)
64
+ @path = _args
65
+ self
66
+ end
67
+
68
+ def ensure
69
+ generate_dir([@base_path] + @path, false)
70
+ self
71
+ end
72
+
73
+ def render(_template, _binding={}, _mod=nil)
74
+ path = [@base_path] + @path
75
+ generate_dir(path[0..-2], true)
76
+ render_template(_template, _binding, path, _mod)
77
+ self
78
+ end
79
+
80
+
81
+
82
+ private
83
+
84
+ def generate_dir(_path, _silent)
85
+ path = File.join(*_path)
86
+ dir = Pathname.new path
87
+ unless dir.exist?
88
+ puts "Generating #{path}".color(:green)
89
+ dir.mkpath
90
+ else
91
+ puts "Skipping #{path}".color(:yellow) unless _silent
92
+ end
93
+ end
94
+
95
+ def render_template(_template, _binding, _path, _mod)
96
+ template = File.join(template_dir, _template) + '.erb'
97
+ output = File.join(*_path)
98
+
99
+ unless Pathname.new(output).exist?
100
+ puts "Rendering #{output}".color(:green)
101
+ File.open(output, "w") do |f|
102
+ f.write eval_template_with_hash(template, _binding)
103
+ f.chmod(_mod) unless _mod.nil?
104
+ end
105
+ else
106
+ puts "Skipping #{output}, already exists".color(:yellow)
107
+ end
108
+ end
109
+
110
+ def eval_template_with_hash(_path, _hash)
111
+ erb = ERB.new(File.read _path)
112
+ erb.result(OpenStruct.new(_hash).instance_eval { binding })
113
+ end
114
+
115
+ def template_dir
116
+ File.expand_path('../../templates', __FILE__)
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,78 @@
1
+ require 'grape'
2
+ require 'crabfarm/support/custom_puma'
3
+ require 'crabfarm/engines/safe_state_loop'
4
+
5
+ module Crabfarm
6
+ module Modes
7
+ class Server
8
+
9
+ class API < Grape::API
10
+
11
+ MAX_WAIT = 60.0 * 5
12
+
13
+ format :json
14
+ prefix :api
15
+
16
+ rescue_from Grape::Exceptions::ValidationErrors do |e|
17
+ rack_response({ errors: e.as_json }.to_json, 400)
18
+ end
19
+
20
+ rescue_from Crabfarm::ApiError do |e|
21
+ rack_response(e.to_json.to_json, e.code)
22
+ end
23
+
24
+ helpers do
25
+ def evaluator
26
+ Server.evaluator
27
+ end
28
+
29
+ def wait
30
+ if params.has_key? :wait
31
+ [params[:wait].to_f, MAX_WAIT].min
32
+ else MAX_WAIT end
33
+ end
34
+
35
+ def print_state(_state)
36
+ {
37
+ name: _state.name,
38
+ params: _state.params,
39
+ doc: _state.doc
40
+ }
41
+ end
42
+ end
43
+
44
+ desc "Return the current crawler status."
45
+ params do
46
+ optional :wait, type: Float
47
+ end
48
+ get :state do
49
+ print_state evaluator.wait_for_state wait
50
+ end
51
+
52
+ desc "Change the crawler state"
53
+ params do
54
+ requires :name, type: String, desc: "Crawler state name"
55
+ optional :wait, type: Float
56
+ end
57
+ put :state do
58
+ print_state evaluator.change_state params[:name], params[:params], wait
59
+ end
60
+ end
61
+
62
+ def self.evaluator
63
+ @@evaluator
64
+ end
65
+
66
+ def self.start(_options)
67
+ @@evaluator = Engines::SafeStateLoop.new CF_LOADER
68
+ begin
69
+ Support::CustomPuma.run API, _options
70
+ ensure
71
+ @@evaluator.release
72
+ end
73
+ end
74
+
75
+ end
76
+ end
77
+ end
78
+
@@ -0,0 +1,35 @@
1
+ require 'active_support'
2
+
3
+ module Crabfarm
4
+ class ModuleHelper
5
+
6
+ attr_reader :dsl
7
+
8
+ def initialize(_module)
9
+ @module = _module
10
+ end
11
+
12
+ def settings
13
+ @module::CF_CONFIG
14
+ end
15
+
16
+ def load_state(_name)
17
+ load_entity _name, 'state', BaseState
18
+ end
19
+
20
+ def load_parser(_name)
21
+ load_entity _name, 'parser', BaseParser
22
+ end
23
+
24
+ private
25
+
26
+ def load_entity(_name, _role, _type)
27
+ name = _name.to_s.gsub(/[^A-Z0-9:]+/i, '_').camelize
28
+ mod = @module.const_get(name) rescue nil
29
+ raise EntityNotFoundError.new _role, name if mod.nil?
30
+ raise EntityNotFoundError.new _role, name unless mod < _type
31
+ mod
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,33 @@
1
+ module Crabfarm
2
+ class PhantomDriverFactory
3
+
4
+ def initialize(_phantom, _config={})
5
+ @phantom = _phantom
6
+ @config = _config
7
+ end
8
+
9
+ def build_driver(_session_id)
10
+
11
+ # setup a custom client to use longer timeouts
12
+ client = Selenium::WebDriver::Remote::Http::Default.new
13
+ client.timeout = @config[:remote_timeout]
14
+
15
+ driver = Selenium::WebDriver.for :remote, {
16
+ :url => phantom_url,
17
+ :http_client => client,
18
+ :desired_capabilities => @config[:capabilities]
19
+ }
20
+
21
+ driver.send(:bridge).setWindowSize(@config[:window_width], @config[:window_height])
22
+
23
+ return driver
24
+ end
25
+
26
+ private
27
+
28
+ def phantom_url
29
+ "http://localhost:#{@phantom.port}"
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,74 @@
1
+ require 'net/http'
2
+
3
+ module Crabfarm
4
+ class PhantomRunner
5
+
6
+ attr_reader :port
7
+
8
+ def initialize(_config={})
9
+ @config = _config;
10
+ @pid = nil
11
+ end
12
+
13
+ def start
14
+ find_available_port
15
+ @pid = Process.spawn({}, phantomjs_cmd)
16
+ wait_for_server
17
+ end
18
+
19
+ def stop
20
+ unless @pid.nil?
21
+ Process.kill("TERM", @pid)
22
+ Process.wait @pid
23
+ @pid = nil
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def phantomjs_cmd
30
+ cmd = [@config[:bin_path]]
31
+ cmd << '--load-images=false' unless @config[:load_images]
32
+ cmd << "--proxy=#{@config[:proxy]}" unless @config[:proxy].nil?
33
+ cmd << "--webdriver=#{@port}"
34
+ cmd << "--ssl-protocol=#{@config[:ssl]}" unless @config[:ssl].nil?
35
+ cmd << "--ignore-ssl-errors=true"
36
+ cmd << "--webdriver-loglevel=NONE" # TODO: remove when log path is choosen
37
+ # cmd << "--webdriver-logfile=/path/to/log/phantom.log"
38
+ cmd.join(' ')
39
+ end
40
+
41
+ def find_available_port
42
+ with_lock do
43
+ server = TCPServer.new('127.0.0.1', 0)
44
+ @port = server.addr[1]
45
+ server.close
46
+ end
47
+ end
48
+
49
+ def wait_for_server
50
+ loop do
51
+ begin
52
+ # TODO: generate a valid request to prevent warnings
53
+ Net::HTTP.get_response(URI.parse("http://127.0.0.1:#{@port}"))
54
+ break
55
+ rescue
56
+ end
57
+ end
58
+ end
59
+
60
+ def with_lock
61
+ return yield if @config[:lock_file].nil?
62
+
63
+ File.open(@config[:lock_file], 'a+') do |file|
64
+ begin
65
+ file.flock File::LOCK_EX
66
+ return yield
67
+ ensure
68
+ file.flock File::LOCK_UN
69
+ end
70
+ end
71
+ end
72
+
73
+ end
74
+ end
@@ -0,0 +1,39 @@
1
+ CF_LOADER.load
2
+
3
+ CF_TEST_CONTEXT = CF_LOADER.load_context
4
+ CF_TEST_BUCKET = CF_TEST_CONTEXT.driver
5
+
6
+ module Crabfarm
7
+ module RSpec
8
+
9
+ def parse(_snap_or_url, _options={})
10
+ fixture = Pathname.new(File.join(ENV['SNAPSHOT_DIR'], _snap_or_url))
11
+ if fixture.exist?
12
+ CF_TEST_BUCKET.get("file://#{fixture.realpath}")
13
+ else
14
+ CF_TEST_BUCKET.get(_snap_or_url)
15
+ end
16
+
17
+ CF_TEST_BUCKET.parse(described_class, _options)
18
+ end
19
+
20
+ def parser
21
+ @parser
22
+ end
23
+
24
+ end
25
+ end
26
+
27
+ RSpec.configure do |config|
28
+ config.include Crabfarm::RSpec
29
+
30
+ config.before(:example) do |example|
31
+ if example.metadata[:parsing]
32
+ @parser = parse example.metadata[:parsing], example.metadata[:using] || {}
33
+ end
34
+ end
35
+
36
+ config.after(:suite) do
37
+ CF_TEST_CONTEXT.release
38
+ end
39
+ end