crabfarm 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/crabfarm +9 -0
- data/lib/crabfarm.rb +24 -0
- data/lib/crabfarm/adapters.rb +23 -0
- data/lib/crabfarm/adapters/capybara_adapter.rb +7 -0
- data/lib/crabfarm/adapters/surfer_adapter.rb +7 -0
- data/lib/crabfarm/adapters/watir_adapter.rb +7 -0
- data/lib/crabfarm/base_parser.rb +26 -0
- data/lib/crabfarm/base_state.rb +41 -0
- data/lib/crabfarm/cli.rb +79 -0
- data/lib/crabfarm/configuration.rb +83 -0
- data/lib/crabfarm/context.rb +32 -0
- data/lib/crabfarm/default_driver_factory.rb +37 -0
- data/lib/crabfarm/driver_bucket.rb +50 -0
- data/lib/crabfarm/driver_bucket_pool.rb +48 -0
- data/lib/crabfarm/dsl/surfer.rb +22 -0
- data/lib/crabfarm/dsl/surfer/search_context.rb +134 -0
- data/lib/crabfarm/dsl/surfer/surf_context.rb +58 -0
- data/lib/crabfarm/engines/safe_state_loop.rb +96 -0
- data/lib/crabfarm/errors.rb +50 -0
- data/lib/crabfarm/loader.rb +83 -0
- data/lib/crabfarm/modes/console.rb +86 -0
- data/lib/crabfarm/modes/generator.rb +120 -0
- data/lib/crabfarm/modes/server.rb +78 -0
- data/lib/crabfarm/module_helper.rb +35 -0
- data/lib/crabfarm/phantom_driver_factory.rb +33 -0
- data/lib/crabfarm/phantom_runner.rb +74 -0
- data/lib/crabfarm/rspec.rb +39 -0
- data/lib/crabfarm/state_store.rb +24 -0
- data/lib/crabfarm/support/custom_puma.rb +64 -0
- data/lib/crabfarm/templates/Crabfile.erb +3 -0
- data/lib/crabfarm/templates/Gemfile.erb +7 -0
- data/lib/crabfarm/templates/boot.rb.erb +13 -0
- data/lib/crabfarm/templates/crabfarm_bin.erb +3 -0
- data/lib/crabfarm/templates/dot_gitignore.erb +1 -0
- data/lib/crabfarm/templates/dot_gitkeep.erb +0 -0
- data/lib/crabfarm/templates/dot_rspec.erb +4 -0
- data/lib/crabfarm/templates/parser.rb.erb +8 -0
- data/lib/crabfarm/templates/parser_spec.rb.erb +7 -0
- data/lib/crabfarm/templates/spec_helper.rb.erb +22 -0
- data/lib/crabfarm/templates/state.rb.erb +8 -0
- data/lib/crabfarm/templates/state_spec.rb.erb +7 -0
- data/lib/crabfarm/version.rb +3 -0
- metadata +359 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'readline'
|
2
|
+
require 'rainbow'
|
3
|
+
require 'rainbow/ext/string'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
module Crabfarm
|
7
|
+
module Modes
|
8
|
+
class Console
|
9
|
+
|
10
|
+
class ConsoleDsl
|
11
|
+
|
12
|
+
attr_reader :context
|
13
|
+
|
14
|
+
def initialize(_loader)
|
15
|
+
@loader = _loader
|
16
|
+
reload!
|
17
|
+
end
|
18
|
+
|
19
|
+
def reload!
|
20
|
+
unless @context.nil?
|
21
|
+
puts "Reloading crawler source".color(:green)
|
22
|
+
@context.release
|
23
|
+
@loader.unload
|
24
|
+
end
|
25
|
+
|
26
|
+
@context = @loader.load_context
|
27
|
+
end
|
28
|
+
|
29
|
+
def transition(_name=nil, _params={})
|
30
|
+
if _name.nil?
|
31
|
+
puts "Must provide a state name".color(:red)
|
32
|
+
return
|
33
|
+
end
|
34
|
+
|
35
|
+
begin
|
36
|
+
state = @context.run_state _name, _params
|
37
|
+
puts JSON.pretty_generate(state.output.attributes!).color(:green)
|
38
|
+
rescue EntityNotFoundError => e
|
39
|
+
puts "#{e.to_s}".color(:red)
|
40
|
+
rescue => e
|
41
|
+
puts "#{e.to_s}".color(:red)
|
42
|
+
puts e.backtrace
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def help
|
47
|
+
puts "Ejem..."
|
48
|
+
end
|
49
|
+
|
50
|
+
def reset
|
51
|
+
puts "Resetting crawling context".color(:green)
|
52
|
+
@context.reset
|
53
|
+
end
|
54
|
+
|
55
|
+
alias :t :transition
|
56
|
+
alias :r :reset
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.console_loop
|
60
|
+
|
61
|
+
if defined? CF_LOADER
|
62
|
+
# TODO: generated app should load itself
|
63
|
+
dsl = ConsoleDsl.new(CF_LOADER)
|
64
|
+
|
65
|
+
loop do
|
66
|
+
begin
|
67
|
+
dsl.instance_eval Readline.readline("> ", true)
|
68
|
+
rescue SyntaxError => se
|
69
|
+
puts "Syntax error: #{se.message}".color(:red)
|
70
|
+
rescue SystemExit, Interrupt
|
71
|
+
break
|
72
|
+
rescue => e
|
73
|
+
puts "Unknown command".color(:red)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
puts "Releasing crawling context".color(:green)
|
78
|
+
dsl.context.release
|
79
|
+
else
|
80
|
+
puts "This command can only be run inside a crabfarm application".color(:red)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'rainbow'
|
2
|
+
require 'rainbow/ext/string'
|
3
|
+
require 'active_support'
|
4
|
+
require 'erb'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
module Crabfarm
|
8
|
+
module Modes
|
9
|
+
class Generator
|
10
|
+
|
11
|
+
def generate_app(_name, _target)
|
12
|
+
with_external_path _target do
|
13
|
+
binding = {
|
14
|
+
name: _name,
|
15
|
+
version: Crabfarm::VERSION
|
16
|
+
}
|
17
|
+
|
18
|
+
path(_name).ensure
|
19
|
+
path(_name, '.gitignore').render('dot_gitignore')
|
20
|
+
path(_name, 'Gemfile').render('Gemfile', binding)
|
21
|
+
path(_name, 'Crabfile').render('Crabfile', binding)
|
22
|
+
path(_name, '.rspec').render('dot_rspec', binding)
|
23
|
+
path(_name, 'boot.rb').render('boot.rb', binding)
|
24
|
+
path(_name, 'bin', 'crabfarm').render('crabfarm_bin', binding, 0755)
|
25
|
+
path(_name, 'app', 'parsers', '.gitkeep').render('dot_gitkeep')
|
26
|
+
path(_name, 'app', 'states', '.gitkeep').render('dot_gitkeep')
|
27
|
+
path(_name, 'app', 'helpers', '.gitkeep').render('dot_gitkeep')
|
28
|
+
path(_name, 'spec', 'spec_helper.rb').render('spec_helper.rb', binding)
|
29
|
+
path(_name, 'spec', 'snapshots', '.gitkeep').render('dot_gitkeep')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def generate_state(_name)
|
34
|
+
with_crawler_path do
|
35
|
+
binding = { state_class: _name.camelize }
|
36
|
+
path('app', 'states', _name.parameterize + '.rb').render('state.rb', binding)
|
37
|
+
path('spec', 'states', _name.parameterize + '_spec.rb').render('state_spec.rb', binding)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def generate_parser(_name)
|
42
|
+
with_crawler_path do
|
43
|
+
binding = { parser_class: _name.camelize }
|
44
|
+
path('app', 'parsers', _name.parameterize + '.rb').render('parser.rb', binding)
|
45
|
+
path('spec', 'parsers', _name.parameterize + '_spec.rb').render('parser_spec.rb', binding)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def with_external_path(_target)
|
50
|
+
@base_path = _target
|
51
|
+
yield
|
52
|
+
end
|
53
|
+
|
54
|
+
def with_crawler_path
|
55
|
+
if defined? CF_PATH
|
56
|
+
@base_path = CF_PATH
|
57
|
+
yield
|
58
|
+
else
|
59
|
+
puts "This command can only be run inside a crabfarm application"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def path(*_args)
|
64
|
+
@path = _args
|
65
|
+
self
|
66
|
+
end
|
67
|
+
|
68
|
+
def ensure
|
69
|
+
generate_dir([@base_path] + @path, false)
|
70
|
+
self
|
71
|
+
end
|
72
|
+
|
73
|
+
def render(_template, _binding={}, _mod=nil)
|
74
|
+
path = [@base_path] + @path
|
75
|
+
generate_dir(path[0..-2], true)
|
76
|
+
render_template(_template, _binding, path, _mod)
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def generate_dir(_path, _silent)
|
85
|
+
path = File.join(*_path)
|
86
|
+
dir = Pathname.new path
|
87
|
+
unless dir.exist?
|
88
|
+
puts "Generating #{path}".color(:green)
|
89
|
+
dir.mkpath
|
90
|
+
else
|
91
|
+
puts "Skipping #{path}".color(:yellow) unless _silent
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def render_template(_template, _binding, _path, _mod)
|
96
|
+
template = File.join(template_dir, _template) + '.erb'
|
97
|
+
output = File.join(*_path)
|
98
|
+
|
99
|
+
unless Pathname.new(output).exist?
|
100
|
+
puts "Rendering #{output}".color(:green)
|
101
|
+
File.open(output, "w") do |f|
|
102
|
+
f.write eval_template_with_hash(template, _binding)
|
103
|
+
f.chmod(_mod) unless _mod.nil?
|
104
|
+
end
|
105
|
+
else
|
106
|
+
puts "Skipping #{output}, already exists".color(:yellow)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def eval_template_with_hash(_path, _hash)
|
111
|
+
erb = ERB.new(File.read _path)
|
112
|
+
erb.result(OpenStruct.new(_hash).instance_eval { binding })
|
113
|
+
end
|
114
|
+
|
115
|
+
def template_dir
|
116
|
+
File.expand_path('../../templates', __FILE__)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'grape'
|
2
|
+
require 'crabfarm/support/custom_puma'
|
3
|
+
require 'crabfarm/engines/safe_state_loop'
|
4
|
+
|
5
|
+
module Crabfarm
|
6
|
+
module Modes
|
7
|
+
class Server
|
8
|
+
|
9
|
+
class API < Grape::API
|
10
|
+
|
11
|
+
MAX_WAIT = 60.0 * 5
|
12
|
+
|
13
|
+
format :json
|
14
|
+
prefix :api
|
15
|
+
|
16
|
+
rescue_from Grape::Exceptions::ValidationErrors do |e|
|
17
|
+
rack_response({ errors: e.as_json }.to_json, 400)
|
18
|
+
end
|
19
|
+
|
20
|
+
rescue_from Crabfarm::ApiError do |e|
|
21
|
+
rack_response(e.to_json.to_json, e.code)
|
22
|
+
end
|
23
|
+
|
24
|
+
helpers do
|
25
|
+
def evaluator
|
26
|
+
Server.evaluator
|
27
|
+
end
|
28
|
+
|
29
|
+
def wait
|
30
|
+
if params.has_key? :wait
|
31
|
+
[params[:wait].to_f, MAX_WAIT].min
|
32
|
+
else MAX_WAIT end
|
33
|
+
end
|
34
|
+
|
35
|
+
def print_state(_state)
|
36
|
+
{
|
37
|
+
name: _state.name,
|
38
|
+
params: _state.params,
|
39
|
+
doc: _state.doc
|
40
|
+
}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
desc "Return the current crawler status."
|
45
|
+
params do
|
46
|
+
optional :wait, type: Float
|
47
|
+
end
|
48
|
+
get :state do
|
49
|
+
print_state evaluator.wait_for_state wait
|
50
|
+
end
|
51
|
+
|
52
|
+
desc "Change the crawler state"
|
53
|
+
params do
|
54
|
+
requires :name, type: String, desc: "Crawler state name"
|
55
|
+
optional :wait, type: Float
|
56
|
+
end
|
57
|
+
put :state do
|
58
|
+
print_state evaluator.change_state params[:name], params[:params], wait
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.evaluator
|
63
|
+
@@evaluator
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.start(_options)
|
67
|
+
@@evaluator = Engines::SafeStateLoop.new CF_LOADER
|
68
|
+
begin
|
69
|
+
Support::CustomPuma.run API, _options
|
70
|
+
ensure
|
71
|
+
@@evaluator.release
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
|
3
|
+
module Crabfarm
|
4
|
+
class ModuleHelper
|
5
|
+
|
6
|
+
attr_reader :dsl
|
7
|
+
|
8
|
+
def initialize(_module)
|
9
|
+
@module = _module
|
10
|
+
end
|
11
|
+
|
12
|
+
def settings
|
13
|
+
@module::CF_CONFIG
|
14
|
+
end
|
15
|
+
|
16
|
+
def load_state(_name)
|
17
|
+
load_entity _name, 'state', BaseState
|
18
|
+
end
|
19
|
+
|
20
|
+
def load_parser(_name)
|
21
|
+
load_entity _name, 'parser', BaseParser
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def load_entity(_name, _role, _type)
|
27
|
+
name = _name.to_s.gsub(/[^A-Z0-9:]+/i, '_').camelize
|
28
|
+
mod = @module.const_get(name) rescue nil
|
29
|
+
raise EntityNotFoundError.new _role, name if mod.nil?
|
30
|
+
raise EntityNotFoundError.new _role, name unless mod < _type
|
31
|
+
mod
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
class PhantomDriverFactory
|
3
|
+
|
4
|
+
def initialize(_phantom, _config={})
|
5
|
+
@phantom = _phantom
|
6
|
+
@config = _config
|
7
|
+
end
|
8
|
+
|
9
|
+
def build_driver(_session_id)
|
10
|
+
|
11
|
+
# setup a custom client to use longer timeouts
|
12
|
+
client = Selenium::WebDriver::Remote::Http::Default.new
|
13
|
+
client.timeout = @config[:remote_timeout]
|
14
|
+
|
15
|
+
driver = Selenium::WebDriver.for :remote, {
|
16
|
+
:url => phantom_url,
|
17
|
+
:http_client => client,
|
18
|
+
:desired_capabilities => @config[:capabilities]
|
19
|
+
}
|
20
|
+
|
21
|
+
driver.send(:bridge).setWindowSize(@config[:window_width], @config[:window_height])
|
22
|
+
|
23
|
+
return driver
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def phantom_url
|
29
|
+
"http://localhost:#{@phantom.port}"
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
module Crabfarm
|
4
|
+
class PhantomRunner
|
5
|
+
|
6
|
+
attr_reader :port
|
7
|
+
|
8
|
+
def initialize(_config={})
|
9
|
+
@config = _config;
|
10
|
+
@pid = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def start
|
14
|
+
find_available_port
|
15
|
+
@pid = Process.spawn({}, phantomjs_cmd)
|
16
|
+
wait_for_server
|
17
|
+
end
|
18
|
+
|
19
|
+
def stop
|
20
|
+
unless @pid.nil?
|
21
|
+
Process.kill("TERM", @pid)
|
22
|
+
Process.wait @pid
|
23
|
+
@pid = nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def phantomjs_cmd
|
30
|
+
cmd = [@config[:bin_path]]
|
31
|
+
cmd << '--load-images=false' unless @config[:load_images]
|
32
|
+
cmd << "--proxy=#{@config[:proxy]}" unless @config[:proxy].nil?
|
33
|
+
cmd << "--webdriver=#{@port}"
|
34
|
+
cmd << "--ssl-protocol=#{@config[:ssl]}" unless @config[:ssl].nil?
|
35
|
+
cmd << "--ignore-ssl-errors=true"
|
36
|
+
cmd << "--webdriver-loglevel=NONE" # TODO: remove when log path is choosen
|
37
|
+
# cmd << "--webdriver-logfile=/path/to/log/phantom.log"
|
38
|
+
cmd.join(' ')
|
39
|
+
end
|
40
|
+
|
41
|
+
def find_available_port
|
42
|
+
with_lock do
|
43
|
+
server = TCPServer.new('127.0.0.1', 0)
|
44
|
+
@port = server.addr[1]
|
45
|
+
server.close
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def wait_for_server
|
50
|
+
loop do
|
51
|
+
begin
|
52
|
+
# TODO: generate a valid request to prevent warnings
|
53
|
+
Net::HTTP.get_response(URI.parse("http://127.0.0.1:#{@port}"))
|
54
|
+
break
|
55
|
+
rescue
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def with_lock
|
61
|
+
return yield if @config[:lock_file].nil?
|
62
|
+
|
63
|
+
File.open(@config[:lock_file], 'a+') do |file|
|
64
|
+
begin
|
65
|
+
file.flock File::LOCK_EX
|
66
|
+
return yield
|
67
|
+
ensure
|
68
|
+
file.flock File::LOCK_UN
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
CF_LOADER.load
|
2
|
+
|
3
|
+
CF_TEST_CONTEXT = CF_LOADER.load_context
|
4
|
+
CF_TEST_BUCKET = CF_TEST_CONTEXT.driver
|
5
|
+
|
6
|
+
module Crabfarm
|
7
|
+
module RSpec
|
8
|
+
|
9
|
+
def parse(_snap_or_url, _options={})
|
10
|
+
fixture = Pathname.new(File.join(ENV['SNAPSHOT_DIR'], _snap_or_url))
|
11
|
+
if fixture.exist?
|
12
|
+
CF_TEST_BUCKET.get("file://#{fixture.realpath}")
|
13
|
+
else
|
14
|
+
CF_TEST_BUCKET.get(_snap_or_url)
|
15
|
+
end
|
16
|
+
|
17
|
+
CF_TEST_BUCKET.parse(described_class, _options)
|
18
|
+
end
|
19
|
+
|
20
|
+
def parser
|
21
|
+
@parser
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
RSpec.configure do |config|
|
28
|
+
config.include Crabfarm::RSpec
|
29
|
+
|
30
|
+
config.before(:example) do |example|
|
31
|
+
if example.metadata[:parsing]
|
32
|
+
@parser = parse example.metadata[:parsing], example.metadata[:using] || {}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
config.after(:suite) do
|
37
|
+
CF_TEST_CONTEXT.release
|
38
|
+
end
|
39
|
+
end
|