crabfarm 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/crabfarm +9 -0
- data/lib/crabfarm.rb +24 -0
- data/lib/crabfarm/adapters.rb +23 -0
- data/lib/crabfarm/adapters/capybara_adapter.rb +7 -0
- data/lib/crabfarm/adapters/surfer_adapter.rb +7 -0
- data/lib/crabfarm/adapters/watir_adapter.rb +7 -0
- data/lib/crabfarm/base_parser.rb +26 -0
- data/lib/crabfarm/base_state.rb +41 -0
- data/lib/crabfarm/cli.rb +79 -0
- data/lib/crabfarm/configuration.rb +83 -0
- data/lib/crabfarm/context.rb +32 -0
- data/lib/crabfarm/default_driver_factory.rb +37 -0
- data/lib/crabfarm/driver_bucket.rb +50 -0
- data/lib/crabfarm/driver_bucket_pool.rb +48 -0
- data/lib/crabfarm/dsl/surfer.rb +22 -0
- data/lib/crabfarm/dsl/surfer/search_context.rb +134 -0
- data/lib/crabfarm/dsl/surfer/surf_context.rb +58 -0
- data/lib/crabfarm/engines/safe_state_loop.rb +96 -0
- data/lib/crabfarm/errors.rb +50 -0
- data/lib/crabfarm/loader.rb +83 -0
- data/lib/crabfarm/modes/console.rb +86 -0
- data/lib/crabfarm/modes/generator.rb +120 -0
- data/lib/crabfarm/modes/server.rb +78 -0
- data/lib/crabfarm/module_helper.rb +35 -0
- data/lib/crabfarm/phantom_driver_factory.rb +33 -0
- data/lib/crabfarm/phantom_runner.rb +74 -0
- data/lib/crabfarm/rspec.rb +39 -0
- data/lib/crabfarm/state_store.rb +24 -0
- data/lib/crabfarm/support/custom_puma.rb +64 -0
- data/lib/crabfarm/templates/Crabfile.erb +3 -0
- data/lib/crabfarm/templates/Gemfile.erb +7 -0
- data/lib/crabfarm/templates/boot.rb.erb +13 -0
- data/lib/crabfarm/templates/crabfarm_bin.erb +3 -0
- data/lib/crabfarm/templates/dot_gitignore.erb +1 -0
- data/lib/crabfarm/templates/dot_gitkeep.erb +0 -0
- data/lib/crabfarm/templates/dot_rspec.erb +4 -0
- data/lib/crabfarm/templates/parser.rb.erb +8 -0
- data/lib/crabfarm/templates/parser_spec.rb.erb +7 -0
- data/lib/crabfarm/templates/spec_helper.rb.erb +22 -0
- data/lib/crabfarm/templates/state.rb.erb +8 -0
- data/lib/crabfarm/templates/state_spec.rb.erb +7 -0
- data/lib/crabfarm/version.rb +3 -0
- metadata +359 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7eb935482cb5663082aae4a3d9e24a722c1aea5d
|
4
|
+
data.tar.gz: 05b8038d530eb0d5f4be9325621405c90056b7be
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ef524a63574fa86249a0f31f08af390c07be7cb738f6a1ac8063bc59ba1a426074bc5681867c234e341905b150c73f3e2f2bd056f1e71dd39b15b9eff8ea5e3c
|
7
|
+
data.tar.gz: 23496fa635c7baca3606c04693939a6b600a8898b6acbc2c55231985f0b5d8b9420316e52a54c743e26b31ced3e805cc7e938b1e14b21d234b171f4723d653f1
|
data/bin/crabfarm
ADDED
data/lib/crabfarm.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require "forwardable"
|
2
|
+
require "jbuilder"
|
3
|
+
require "selenium-webdriver"
|
4
|
+
|
5
|
+
require "crabfarm/version"
|
6
|
+
require "crabfarm/errors"
|
7
|
+
require "crabfarm/configuration"
|
8
|
+
require "crabfarm/module_helper"
|
9
|
+
require "crabfarm/driver_bucket"
|
10
|
+
require "crabfarm/driver_bucket_pool"
|
11
|
+
require "crabfarm/default_driver_factory"
|
12
|
+
require "crabfarm/phantom_driver_factory"
|
13
|
+
require "crabfarm/phantom_runner"
|
14
|
+
require "crabfarm/state_store"
|
15
|
+
require "crabfarm/context"
|
16
|
+
require "crabfarm/base_state"
|
17
|
+
require "crabfarm/base_parser"
|
18
|
+
require 'crabfarm/dsl/surfer'
|
19
|
+
require "crabfarm/adapters"
|
20
|
+
require "crabfarm/loader"
|
21
|
+
|
22
|
+
module Crabfarm
|
23
|
+
# Your code goes here...
|
24
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'crabfarm/adapters/capybara_adapter'
|
2
|
+
require 'crabfarm/adapters/surfer_adapter'
|
3
|
+
require 'crabfarm/adapters/watir_adapter'
|
4
|
+
|
5
|
+
module Crabfarm
|
6
|
+
module Adapters
|
7
|
+
@@adapters = {}
|
8
|
+
|
9
|
+
def self.register_dsl(_name, _adapter)
|
10
|
+
@@adapters[_name.to_sym] = _adapter
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.load_from_dsl_name _name
|
14
|
+
raise ConfigurationError.new "Invalid dsl name #{_name}" unless @@adapters.has_key? _name.to_sym
|
15
|
+
@@adapters[_name.to_sym]
|
16
|
+
end
|
17
|
+
|
18
|
+
# bundled adapters
|
19
|
+
register_dsl :watir, WatirAdapter
|
20
|
+
register_dsl :capybara, CapybaraAdapter
|
21
|
+
register_dsl :surfer, SurferAdapter
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
class BaseParser
|
3
|
+
|
4
|
+
attr_reader :browser, :params
|
5
|
+
|
6
|
+
def self.browser_dsl(_dsl)
|
7
|
+
@dsl = _dsl
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(_module, _driver, _params)
|
11
|
+
dsl_class = Adapters.load_from_dsl_name(class_dsl || _module.settings.default_dsl)
|
12
|
+
@browser = dsl_class.wrap _driver
|
13
|
+
@params = _params
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse
|
17
|
+
raise NotImplementedError.new
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def class_dsl
|
23
|
+
self.class.instance_variable_get :@dsl
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
class BaseState
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
attr_reader :params
|
6
|
+
|
7
|
+
def_delegators :@pool, :driver
|
8
|
+
def_delegators :@store, :get, :fetch
|
9
|
+
|
10
|
+
def self.browser_dsl(_dsl)
|
11
|
+
@class_dsl = _dsl
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(_module, _pool, _store, _params)
|
15
|
+
@module = _module
|
16
|
+
@pool = _pool
|
17
|
+
@store = _store
|
18
|
+
@params = _params
|
19
|
+
@output = Jbuilder.new
|
20
|
+
@dsl = Adapters.load_from_dsl_name(class_dsl || @module.settings.default_dsl)
|
21
|
+
end
|
22
|
+
|
23
|
+
def browser(_name=nil)
|
24
|
+
@dsl.wrap driver(_name)
|
25
|
+
end
|
26
|
+
|
27
|
+
def output
|
28
|
+
@output ||= Jbuilder.new
|
29
|
+
end
|
30
|
+
|
31
|
+
def crawl
|
32
|
+
raise NotImplementedError.new
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def class_dsl
|
38
|
+
self.class.instance_variable_get :@class_dsl
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/crabfarm/cli.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'gli'
|
2
|
+
|
3
|
+
module Crabfarm
|
4
|
+
class CLI
|
5
|
+
extend GLI::App
|
6
|
+
|
7
|
+
program_desc 'Crabfarm toolbelt'
|
8
|
+
|
9
|
+
pre do |global_options,command,options,args|
|
10
|
+
# Things to do before
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
14
|
+
desc "Starts the crawler in console mode"
|
15
|
+
command [:console, :c] do |c|
|
16
|
+
c.action do |global_options,options,args|
|
17
|
+
require "crabfarm/modes/console"
|
18
|
+
Crabfarm::Modes::Console.console_loop
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
desc "Starts the crawler in server mode"
|
23
|
+
command [:server, :s] do |c|
|
24
|
+
c.desc "Set the server host, defaults to 0.0.0.0"
|
25
|
+
c.flag [:h,:host]
|
26
|
+
|
27
|
+
c.desc "Set the server port, defaults to 3100"
|
28
|
+
c.flag [:p,:port]
|
29
|
+
|
30
|
+
c.desc "Set the server min and max threads, defaults to 0:16"
|
31
|
+
c.flag [:t,:threads]
|
32
|
+
|
33
|
+
c.action do |global_options,options,args|
|
34
|
+
require "crabfarm/modes/server"
|
35
|
+
server_options = {}
|
36
|
+
server_options[:Host] = options[:host] unless options[:host].nil?
|
37
|
+
server_options[:Port] = options[:port] || 3100
|
38
|
+
server_options[:Threads] = options[:threads] unless options[:threads].nil?
|
39
|
+
Crabfarm::Modes::Server.start server_options
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
desc "Generates crabfarm scaffolding"
|
44
|
+
command [:generate, :g] do |c|
|
45
|
+
|
46
|
+
c.desc "Generates a new crabfarm application"
|
47
|
+
c.command :app do |app|
|
48
|
+
app.action do |global_options,options,args|
|
49
|
+
require "crabfarm/modes/generator"
|
50
|
+
Crabfarm::Modes::Generator.new.generate_app(args[0], Dir.pwd)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
c.desc "Generates a new crabfarm parser and parser spec"
|
55
|
+
c.command :parser do |parser|
|
56
|
+
parser.action do |global_options,options,args|
|
57
|
+
require "crabfarm/modes/generator"
|
58
|
+
Crabfarm::Modes::Generator.new.generate_parser(args[0])
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
c.desc "Generates a new crabfarm state and parser spec"
|
63
|
+
c.command :state do |parser|
|
64
|
+
parser.action do |global_options,options,args|
|
65
|
+
require "crabfarm/modes/generator"
|
66
|
+
Crabfarm::Modes::Generator.new.generate_state(args[0])
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
command :publish do |c|
|
72
|
+
c.action do |global_options,options,args|
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
exit run(ARGV)
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
|
3
|
+
class Configuration
|
4
|
+
|
5
|
+
# TODO: improve DSL, it sucks
|
6
|
+
|
7
|
+
attr_accessor :default_dsl
|
8
|
+
attr_accessor :driver_factory
|
9
|
+
|
10
|
+
# Default driver configuration parameters
|
11
|
+
attr_accessor :driver_name
|
12
|
+
attr_accessor :driver_host
|
13
|
+
attr_accessor :driver_port
|
14
|
+
attr_accessor :driver_capabilities
|
15
|
+
attr_accessor :driver_remote_timeout
|
16
|
+
attr_accessor :driver_window_width
|
17
|
+
attr_accessor :driver_window_height
|
18
|
+
|
19
|
+
# Phantom launcher configuration
|
20
|
+
attr_accessor :phantom_enabled
|
21
|
+
attr_accessor :phantom_load_images
|
22
|
+
attr_accessor :phantom_proxy
|
23
|
+
attr_accessor :phantom_ssl
|
24
|
+
attr_accessor :phantom_bin_path
|
25
|
+
attr_accessor :phantom_lock_file
|
26
|
+
|
27
|
+
def driver_config
|
28
|
+
{
|
29
|
+
name: @driver_name,
|
30
|
+
capabilities: @driver_capabilities,
|
31
|
+
remote_host: driver_remote_host,
|
32
|
+
remote_timeout: @driver_remote_timeout,
|
33
|
+
window_width: @driver_window_width,
|
34
|
+
window_height: @driver_window_height
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
def phantom_enabled?
|
39
|
+
@phantom_enabled
|
40
|
+
end
|
41
|
+
|
42
|
+
def phantom_config
|
43
|
+
{
|
44
|
+
load_images: @phantom_load_images,
|
45
|
+
proxy: @phantom_proxy,
|
46
|
+
ssl: @phantom_ssl,
|
47
|
+
bin_path: @phantom_bin_path,
|
48
|
+
lock_file: @phantom_lock_file
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def initialize
|
53
|
+
@default_dsl = :surfer
|
54
|
+
@driver_factory = nil
|
55
|
+
|
56
|
+
@driver_name = :chrome
|
57
|
+
@driver_capabilities = Selenium::WebDriver::Remote::Capabilities.firefox
|
58
|
+
@driver_host = 'localhost'
|
59
|
+
@driver_port = '8080'
|
60
|
+
@driver_remote_timeout = 120
|
61
|
+
@driver_window_width = 1280
|
62
|
+
@driver_window_height = 800
|
63
|
+
|
64
|
+
@phantom_enabled = false
|
65
|
+
@phantom_load_images = false
|
66
|
+
@phantom_proxy = nil
|
67
|
+
@phantom_ssl = 'any'
|
68
|
+
@phantom_bin_path = 'phantomjs'
|
69
|
+
@phantom_lock_file = nil
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def driver_remote_host
|
75
|
+
if @driver_host.nil? then nil
|
76
|
+
elsif @driver_port.nil? then "http://#{@driver_host}"
|
77
|
+
else "http://#{@driver_host}:#{@driver_port}"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
|
3
|
+
module Crabfarm
|
4
|
+
class Context
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
def_delegators :@pool, :driver
|
8
|
+
|
9
|
+
def initialize(_module)
|
10
|
+
@module = ModuleHelper.new _module
|
11
|
+
@pool = DriverBucketPool.new @module
|
12
|
+
@store = StateStore.new @module
|
13
|
+
end
|
14
|
+
|
15
|
+
def run_state(_name, _params={})
|
16
|
+
state = @module.load_state(_name).new @module, @pool, @store, _params
|
17
|
+
state.crawl
|
18
|
+
state
|
19
|
+
end
|
20
|
+
|
21
|
+
def reset
|
22
|
+
@store.reset
|
23
|
+
@pool.reset
|
24
|
+
end
|
25
|
+
|
26
|
+
def release
|
27
|
+
@pool.release
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
class DefaultDriverFactory
|
3
|
+
|
4
|
+
def initialize(_config={})
|
5
|
+
@config = _config
|
6
|
+
end
|
7
|
+
|
8
|
+
def build_driver(_session_id)
|
9
|
+
|
10
|
+
driver_name = @config[:name]
|
11
|
+
raise ConfigurationError.new 'must provide a webdriver type' if driver_name.nil?
|
12
|
+
|
13
|
+
case driver_name
|
14
|
+
when :remote
|
15
|
+
# setup a custom client to use longer timeouts
|
16
|
+
client = Selenium::WebDriver::Remote::Http::Default.new
|
17
|
+
client.timeout = @config[:remote_timeout]
|
18
|
+
|
19
|
+
driver = Selenium::WebDriver.for :remote, {
|
20
|
+
:url => @config[:remote_host],
|
21
|
+
:http_client => client,
|
22
|
+
:desired_capabilities => @config[:capabilities]
|
23
|
+
}
|
24
|
+
|
25
|
+
driver.send(:bridge).setWindowSize(@config[:window_width], @config[:window_height])
|
26
|
+
else
|
27
|
+
driver = Selenium::WebDriver.for driver_name.to_sym
|
28
|
+
|
29
|
+
# apply browser configuration to new driver
|
30
|
+
driver.manage.window.resize_to(@config[:window_width], @config[:window_height]) rescue nil
|
31
|
+
end
|
32
|
+
|
33
|
+
return driver
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
class DriverBucket
|
3
|
+
|
4
|
+
attr_reader :session_id
|
5
|
+
|
6
|
+
def initialize(_module, _session_id, _factory)
|
7
|
+
@module = _module
|
8
|
+
@session_id = _session_id
|
9
|
+
@factory = _factory
|
10
|
+
@driver = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def setup(_factory)
|
14
|
+
reset
|
15
|
+
@factory = _factory
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse(_parser_class, _options={})
|
19
|
+
_parser_class = @module.load_parser(_parser_class) if _parser_class.is_a? String or _parser_class.is_a? Symbol
|
20
|
+
parser = _parser_class.new @module, self, _options
|
21
|
+
parser.parse
|
22
|
+
return parser
|
23
|
+
end
|
24
|
+
|
25
|
+
def original
|
26
|
+
@driver ||= @factory.build_driver(@session_id)
|
27
|
+
end
|
28
|
+
|
29
|
+
def reset
|
30
|
+
if @driver
|
31
|
+
@driver.quit rescue nil
|
32
|
+
@driver = nil
|
33
|
+
end
|
34
|
+
self
|
35
|
+
end
|
36
|
+
|
37
|
+
# forward every missing method to actual driver
|
38
|
+
|
39
|
+
def respond_to?(symbol, include_priv=false)
|
40
|
+
original.respond_to?(symbol, include_priv)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def method_missing(method, *args, &block)
|
46
|
+
original.__send__(method, *args, &block)
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|