crabfarm 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/crabfarm +9 -0
- data/lib/crabfarm.rb +24 -0
- data/lib/crabfarm/adapters.rb +23 -0
- data/lib/crabfarm/adapters/capybara_adapter.rb +7 -0
- data/lib/crabfarm/adapters/surfer_adapter.rb +7 -0
- data/lib/crabfarm/adapters/watir_adapter.rb +7 -0
- data/lib/crabfarm/base_parser.rb +26 -0
- data/lib/crabfarm/base_state.rb +41 -0
- data/lib/crabfarm/cli.rb +79 -0
- data/lib/crabfarm/configuration.rb +83 -0
- data/lib/crabfarm/context.rb +32 -0
- data/lib/crabfarm/default_driver_factory.rb +37 -0
- data/lib/crabfarm/driver_bucket.rb +50 -0
- data/lib/crabfarm/driver_bucket_pool.rb +48 -0
- data/lib/crabfarm/dsl/surfer.rb +22 -0
- data/lib/crabfarm/dsl/surfer/search_context.rb +134 -0
- data/lib/crabfarm/dsl/surfer/surf_context.rb +58 -0
- data/lib/crabfarm/engines/safe_state_loop.rb +96 -0
- data/lib/crabfarm/errors.rb +50 -0
- data/lib/crabfarm/loader.rb +83 -0
- data/lib/crabfarm/modes/console.rb +86 -0
- data/lib/crabfarm/modes/generator.rb +120 -0
- data/lib/crabfarm/modes/server.rb +78 -0
- data/lib/crabfarm/module_helper.rb +35 -0
- data/lib/crabfarm/phantom_driver_factory.rb +33 -0
- data/lib/crabfarm/phantom_runner.rb +74 -0
- data/lib/crabfarm/rspec.rb +39 -0
- data/lib/crabfarm/state_store.rb +24 -0
- data/lib/crabfarm/support/custom_puma.rb +64 -0
- data/lib/crabfarm/templates/Crabfile.erb +3 -0
- data/lib/crabfarm/templates/Gemfile.erb +7 -0
- data/lib/crabfarm/templates/boot.rb.erb +13 -0
- data/lib/crabfarm/templates/crabfarm_bin.erb +3 -0
- data/lib/crabfarm/templates/dot_gitignore.erb +1 -0
- data/lib/crabfarm/templates/dot_gitkeep.erb +0 -0
- data/lib/crabfarm/templates/dot_rspec.erb +4 -0
- data/lib/crabfarm/templates/parser.rb.erb +8 -0
- data/lib/crabfarm/templates/parser_spec.rb.erb +7 -0
- data/lib/crabfarm/templates/spec_helper.rb.erb +22 -0
- data/lib/crabfarm/templates/state.rb.erb +8 -0
- data/lib/crabfarm/templates/state_spec.rb.erb +7 -0
- data/lib/crabfarm/version.rb +3 -0
- metadata +359 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'readline'
|
2
|
+
require 'rainbow'
|
3
|
+
require 'rainbow/ext/string'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
module Crabfarm
|
7
|
+
module Modes
|
8
|
+
class Console
|
9
|
+
|
10
|
+
class ConsoleDsl
|
11
|
+
|
12
|
+
attr_reader :context
|
13
|
+
|
14
|
+
def initialize(_loader)
|
15
|
+
@loader = _loader
|
16
|
+
reload!
|
17
|
+
end
|
18
|
+
|
19
|
+
def reload!
|
20
|
+
unless @context.nil?
|
21
|
+
puts "Reloading crawler source".color(:green)
|
22
|
+
@context.release
|
23
|
+
@loader.unload
|
24
|
+
end
|
25
|
+
|
26
|
+
@context = @loader.load_context
|
27
|
+
end
|
28
|
+
|
29
|
+
def transition(_name=nil, _params={})
|
30
|
+
if _name.nil?
|
31
|
+
puts "Must provide a state name".color(:red)
|
32
|
+
return
|
33
|
+
end
|
34
|
+
|
35
|
+
begin
|
36
|
+
state = @context.run_state _name, _params
|
37
|
+
puts JSON.pretty_generate(state.output.attributes!).color(:green)
|
38
|
+
rescue EntityNotFoundError => e
|
39
|
+
puts "#{e.to_s}".color(:red)
|
40
|
+
rescue => e
|
41
|
+
puts "#{e.to_s}".color(:red)
|
42
|
+
puts e.backtrace
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def help
|
47
|
+
puts "Ejem..."
|
48
|
+
end
|
49
|
+
|
50
|
+
def reset
|
51
|
+
puts "Resetting crawling context".color(:green)
|
52
|
+
@context.reset
|
53
|
+
end
|
54
|
+
|
55
|
+
alias :t :transition
|
56
|
+
alias :r :reset
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.console_loop
|
60
|
+
|
61
|
+
if defined? CF_LOADER
|
62
|
+
# TODO: generated app should load itself
|
63
|
+
dsl = ConsoleDsl.new(CF_LOADER)
|
64
|
+
|
65
|
+
loop do
|
66
|
+
begin
|
67
|
+
dsl.instance_eval Readline.readline("> ", true)
|
68
|
+
rescue SyntaxError => se
|
69
|
+
puts "Syntax error: #{se.message}".color(:red)
|
70
|
+
rescue SystemExit, Interrupt
|
71
|
+
break
|
72
|
+
rescue => e
|
73
|
+
puts "Unknown command".color(:red)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
puts "Releasing crawling context".color(:green)
|
78
|
+
dsl.context.release
|
79
|
+
else
|
80
|
+
puts "This command can only be run inside a crabfarm application".color(:red)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'rainbow'
|
2
|
+
require 'rainbow/ext/string'
|
3
|
+
require 'active_support'
|
4
|
+
require 'erb'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
module Crabfarm
|
8
|
+
module Modes
|
9
|
+
class Generator
|
10
|
+
|
11
|
+
def generate_app(_name, _target)
|
12
|
+
with_external_path _target do
|
13
|
+
binding = {
|
14
|
+
name: _name,
|
15
|
+
version: Crabfarm::VERSION
|
16
|
+
}
|
17
|
+
|
18
|
+
path(_name).ensure
|
19
|
+
path(_name, '.gitignore').render('dot_gitignore')
|
20
|
+
path(_name, 'Gemfile').render('Gemfile', binding)
|
21
|
+
path(_name, 'Crabfile').render('Crabfile', binding)
|
22
|
+
path(_name, '.rspec').render('dot_rspec', binding)
|
23
|
+
path(_name, 'boot.rb').render('boot.rb', binding)
|
24
|
+
path(_name, 'bin', 'crabfarm').render('crabfarm_bin', binding, 0755)
|
25
|
+
path(_name, 'app', 'parsers', '.gitkeep').render('dot_gitkeep')
|
26
|
+
path(_name, 'app', 'states', '.gitkeep').render('dot_gitkeep')
|
27
|
+
path(_name, 'app', 'helpers', '.gitkeep').render('dot_gitkeep')
|
28
|
+
path(_name, 'spec', 'spec_helper.rb').render('spec_helper.rb', binding)
|
29
|
+
path(_name, 'spec', 'snapshots', '.gitkeep').render('dot_gitkeep')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def generate_state(_name)
|
34
|
+
with_crawler_path do
|
35
|
+
binding = { state_class: _name.camelize }
|
36
|
+
path('app', 'states', _name.parameterize + '.rb').render('state.rb', binding)
|
37
|
+
path('spec', 'states', _name.parameterize + '_spec.rb').render('state_spec.rb', binding)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def generate_parser(_name)
|
42
|
+
with_crawler_path do
|
43
|
+
binding = { parser_class: _name.camelize }
|
44
|
+
path('app', 'parsers', _name.parameterize + '.rb').render('parser.rb', binding)
|
45
|
+
path('spec', 'parsers', _name.parameterize + '_spec.rb').render('parser_spec.rb', binding)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def with_external_path(_target)
|
50
|
+
@base_path = _target
|
51
|
+
yield
|
52
|
+
end
|
53
|
+
|
54
|
+
def with_crawler_path
|
55
|
+
if defined? CF_PATH
|
56
|
+
@base_path = CF_PATH
|
57
|
+
yield
|
58
|
+
else
|
59
|
+
puts "This command can only be run inside a crabfarm application"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def path(*_args)
|
64
|
+
@path = _args
|
65
|
+
self
|
66
|
+
end
|
67
|
+
|
68
|
+
def ensure
|
69
|
+
generate_dir([@base_path] + @path, false)
|
70
|
+
self
|
71
|
+
end
|
72
|
+
|
73
|
+
def render(_template, _binding={}, _mod=nil)
|
74
|
+
path = [@base_path] + @path
|
75
|
+
generate_dir(path[0..-2], true)
|
76
|
+
render_template(_template, _binding, path, _mod)
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def generate_dir(_path, _silent)
|
85
|
+
path = File.join(*_path)
|
86
|
+
dir = Pathname.new path
|
87
|
+
unless dir.exist?
|
88
|
+
puts "Generating #{path}".color(:green)
|
89
|
+
dir.mkpath
|
90
|
+
else
|
91
|
+
puts "Skipping #{path}".color(:yellow) unless _silent
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def render_template(_template, _binding, _path, _mod)
|
96
|
+
template = File.join(template_dir, _template) + '.erb'
|
97
|
+
output = File.join(*_path)
|
98
|
+
|
99
|
+
unless Pathname.new(output).exist?
|
100
|
+
puts "Rendering #{output}".color(:green)
|
101
|
+
File.open(output, "w") do |f|
|
102
|
+
f.write eval_template_with_hash(template, _binding)
|
103
|
+
f.chmod(_mod) unless _mod.nil?
|
104
|
+
end
|
105
|
+
else
|
106
|
+
puts "Skipping #{output}, already exists".color(:yellow)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def eval_template_with_hash(_path, _hash)
|
111
|
+
erb = ERB.new(File.read _path)
|
112
|
+
erb.result(OpenStruct.new(_hash).instance_eval { binding })
|
113
|
+
end
|
114
|
+
|
115
|
+
def template_dir
|
116
|
+
File.expand_path('../../templates', __FILE__)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'grape'
|
2
|
+
require 'crabfarm/support/custom_puma'
|
3
|
+
require 'crabfarm/engines/safe_state_loop'
|
4
|
+
|
5
|
+
module Crabfarm
|
6
|
+
module Modes
|
7
|
+
class Server
|
8
|
+
|
9
|
+
class API < Grape::API
|
10
|
+
|
11
|
+
MAX_WAIT = 60.0 * 5
|
12
|
+
|
13
|
+
format :json
|
14
|
+
prefix :api
|
15
|
+
|
16
|
+
rescue_from Grape::Exceptions::ValidationErrors do |e|
|
17
|
+
rack_response({ errors: e.as_json }.to_json, 400)
|
18
|
+
end
|
19
|
+
|
20
|
+
rescue_from Crabfarm::ApiError do |e|
|
21
|
+
rack_response(e.to_json.to_json, e.code)
|
22
|
+
end
|
23
|
+
|
24
|
+
helpers do
|
25
|
+
def evaluator
|
26
|
+
Server.evaluator
|
27
|
+
end
|
28
|
+
|
29
|
+
def wait
|
30
|
+
if params.has_key? :wait
|
31
|
+
[params[:wait].to_f, MAX_WAIT].min
|
32
|
+
else MAX_WAIT end
|
33
|
+
end
|
34
|
+
|
35
|
+
def print_state(_state)
|
36
|
+
{
|
37
|
+
name: _state.name,
|
38
|
+
params: _state.params,
|
39
|
+
doc: _state.doc
|
40
|
+
}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
desc "Return the current crawler status."
|
45
|
+
params do
|
46
|
+
optional :wait, type: Float
|
47
|
+
end
|
48
|
+
get :state do
|
49
|
+
print_state evaluator.wait_for_state wait
|
50
|
+
end
|
51
|
+
|
52
|
+
desc "Change the crawler state"
|
53
|
+
params do
|
54
|
+
requires :name, type: String, desc: "Crawler state name"
|
55
|
+
optional :wait, type: Float
|
56
|
+
end
|
57
|
+
put :state do
|
58
|
+
print_state evaluator.change_state params[:name], params[:params], wait
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.evaluator
|
63
|
+
@@evaluator
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.start(_options)
|
67
|
+
@@evaluator = Engines::SafeStateLoop.new CF_LOADER
|
68
|
+
begin
|
69
|
+
Support::CustomPuma.run API, _options
|
70
|
+
ensure
|
71
|
+
@@evaluator.release
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
|
3
|
+
module Crabfarm
|
4
|
+
class ModuleHelper
|
5
|
+
|
6
|
+
attr_reader :dsl
|
7
|
+
|
8
|
+
def initialize(_module)
|
9
|
+
@module = _module
|
10
|
+
end
|
11
|
+
|
12
|
+
def settings
|
13
|
+
@module::CF_CONFIG
|
14
|
+
end
|
15
|
+
|
16
|
+
def load_state(_name)
|
17
|
+
load_entity _name, 'state', BaseState
|
18
|
+
end
|
19
|
+
|
20
|
+
def load_parser(_name)
|
21
|
+
load_entity _name, 'parser', BaseParser
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def load_entity(_name, _role, _type)
|
27
|
+
name = _name.to_s.gsub(/[^A-Z0-9:]+/i, '_').camelize
|
28
|
+
mod = @module.const_get(name) rescue nil
|
29
|
+
raise EntityNotFoundError.new _role, name if mod.nil?
|
30
|
+
raise EntityNotFoundError.new _role, name unless mod < _type
|
31
|
+
mod
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
class PhantomDriverFactory
|
3
|
+
|
4
|
+
def initialize(_phantom, _config={})
|
5
|
+
@phantom = _phantom
|
6
|
+
@config = _config
|
7
|
+
end
|
8
|
+
|
9
|
+
def build_driver(_session_id)
|
10
|
+
|
11
|
+
# setup a custom client to use longer timeouts
|
12
|
+
client = Selenium::WebDriver::Remote::Http::Default.new
|
13
|
+
client.timeout = @config[:remote_timeout]
|
14
|
+
|
15
|
+
driver = Selenium::WebDriver.for :remote, {
|
16
|
+
:url => phantom_url,
|
17
|
+
:http_client => client,
|
18
|
+
:desired_capabilities => @config[:capabilities]
|
19
|
+
}
|
20
|
+
|
21
|
+
driver.send(:bridge).setWindowSize(@config[:window_width], @config[:window_height])
|
22
|
+
|
23
|
+
return driver
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def phantom_url
|
29
|
+
"http://localhost:#{@phantom.port}"
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
module Crabfarm
|
4
|
+
class PhantomRunner
|
5
|
+
|
6
|
+
attr_reader :port
|
7
|
+
|
8
|
+
def initialize(_config={})
|
9
|
+
@config = _config;
|
10
|
+
@pid = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def start
|
14
|
+
find_available_port
|
15
|
+
@pid = Process.spawn({}, phantomjs_cmd)
|
16
|
+
wait_for_server
|
17
|
+
end
|
18
|
+
|
19
|
+
def stop
|
20
|
+
unless @pid.nil?
|
21
|
+
Process.kill("TERM", @pid)
|
22
|
+
Process.wait @pid
|
23
|
+
@pid = nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def phantomjs_cmd
|
30
|
+
cmd = [@config[:bin_path]]
|
31
|
+
cmd << '--load-images=false' unless @config[:load_images]
|
32
|
+
cmd << "--proxy=#{@config[:proxy]}" unless @config[:proxy].nil?
|
33
|
+
cmd << "--webdriver=#{@port}"
|
34
|
+
cmd << "--ssl-protocol=#{@config[:ssl]}" unless @config[:ssl].nil?
|
35
|
+
cmd << "--ignore-ssl-errors=true"
|
36
|
+
cmd << "--webdriver-loglevel=NONE" # TODO: remove when log path is choosen
|
37
|
+
# cmd << "--webdriver-logfile=/path/to/log/phantom.log"
|
38
|
+
cmd.join(' ')
|
39
|
+
end
|
40
|
+
|
41
|
+
def find_available_port
|
42
|
+
with_lock do
|
43
|
+
server = TCPServer.new('127.0.0.1', 0)
|
44
|
+
@port = server.addr[1]
|
45
|
+
server.close
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def wait_for_server
|
50
|
+
loop do
|
51
|
+
begin
|
52
|
+
# TODO: generate a valid request to prevent warnings
|
53
|
+
Net::HTTP.get_response(URI.parse("http://127.0.0.1:#{@port}"))
|
54
|
+
break
|
55
|
+
rescue
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def with_lock
|
61
|
+
return yield if @config[:lock_file].nil?
|
62
|
+
|
63
|
+
File.open(@config[:lock_file], 'a+') do |file|
|
64
|
+
begin
|
65
|
+
file.flock File::LOCK_EX
|
66
|
+
return yield
|
67
|
+
ensure
|
68
|
+
file.flock File::LOCK_UN
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
CF_LOADER.load
|
2
|
+
|
3
|
+
CF_TEST_CONTEXT = CF_LOADER.load_context
|
4
|
+
CF_TEST_BUCKET = CF_TEST_CONTEXT.driver
|
5
|
+
|
6
|
+
module Crabfarm
|
7
|
+
module RSpec
|
8
|
+
|
9
|
+
def parse(_snap_or_url, _options={})
|
10
|
+
fixture = Pathname.new(File.join(ENV['SNAPSHOT_DIR'], _snap_or_url))
|
11
|
+
if fixture.exist?
|
12
|
+
CF_TEST_BUCKET.get("file://#{fixture.realpath}")
|
13
|
+
else
|
14
|
+
CF_TEST_BUCKET.get(_snap_or_url)
|
15
|
+
end
|
16
|
+
|
17
|
+
CF_TEST_BUCKET.parse(described_class, _options)
|
18
|
+
end
|
19
|
+
|
20
|
+
def parser
|
21
|
+
@parser
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
RSpec.configure do |config|
|
28
|
+
config.include Crabfarm::RSpec
|
29
|
+
|
30
|
+
config.before(:example) do |example|
|
31
|
+
if example.metadata[:parsing]
|
32
|
+
@parser = parse example.metadata[:parsing], example.metadata[:using] || {}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
config.after(:suite) do
|
37
|
+
CF_TEST_CONTEXT.release
|
38
|
+
end
|
39
|
+
end
|