crabfarm 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/crabfarm +9 -0
- data/lib/crabfarm.rb +24 -0
- data/lib/crabfarm/adapters.rb +23 -0
- data/lib/crabfarm/adapters/capybara_adapter.rb +7 -0
- data/lib/crabfarm/adapters/surfer_adapter.rb +7 -0
- data/lib/crabfarm/adapters/watir_adapter.rb +7 -0
- data/lib/crabfarm/base_parser.rb +26 -0
- data/lib/crabfarm/base_state.rb +41 -0
- data/lib/crabfarm/cli.rb +79 -0
- data/lib/crabfarm/configuration.rb +83 -0
- data/lib/crabfarm/context.rb +32 -0
- data/lib/crabfarm/default_driver_factory.rb +37 -0
- data/lib/crabfarm/driver_bucket.rb +50 -0
- data/lib/crabfarm/driver_bucket_pool.rb +48 -0
- data/lib/crabfarm/dsl/surfer.rb +22 -0
- data/lib/crabfarm/dsl/surfer/search_context.rb +134 -0
- data/lib/crabfarm/dsl/surfer/surf_context.rb +58 -0
- data/lib/crabfarm/engines/safe_state_loop.rb +96 -0
- data/lib/crabfarm/errors.rb +50 -0
- data/lib/crabfarm/loader.rb +83 -0
- data/lib/crabfarm/modes/console.rb +86 -0
- data/lib/crabfarm/modes/generator.rb +120 -0
- data/lib/crabfarm/modes/server.rb +78 -0
- data/lib/crabfarm/module_helper.rb +35 -0
- data/lib/crabfarm/phantom_driver_factory.rb +33 -0
- data/lib/crabfarm/phantom_runner.rb +74 -0
- data/lib/crabfarm/rspec.rb +39 -0
- data/lib/crabfarm/state_store.rb +24 -0
- data/lib/crabfarm/support/custom_puma.rb +64 -0
- data/lib/crabfarm/templates/Crabfile.erb +3 -0
- data/lib/crabfarm/templates/Gemfile.erb +7 -0
- data/lib/crabfarm/templates/boot.rb.erb +13 -0
- data/lib/crabfarm/templates/crabfarm_bin.erb +3 -0
- data/lib/crabfarm/templates/dot_gitignore.erb +1 -0
- data/lib/crabfarm/templates/dot_gitkeep.erb +0 -0
- data/lib/crabfarm/templates/dot_rspec.erb +4 -0
- data/lib/crabfarm/templates/parser.rb.erb +8 -0
- data/lib/crabfarm/templates/parser_spec.rb.erb +7 -0
- data/lib/crabfarm/templates/spec_helper.rb.erb +22 -0
- data/lib/crabfarm/templates/state.rb.erb +8 -0
- data/lib/crabfarm/templates/state_spec.rb.erb +7 -0
- data/lib/crabfarm/version.rb +3 -0
- metadata +359 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7eb935482cb5663082aae4a3d9e24a722c1aea5d
|
4
|
+
data.tar.gz: 05b8038d530eb0d5f4be9325621405c90056b7be
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ef524a63574fa86249a0f31f08af390c07be7cb738f6a1ac8063bc59ba1a426074bc5681867c234e341905b150c73f3e2f2bd056f1e71dd39b15b9eff8ea5e3c
|
7
|
+
data.tar.gz: 23496fa635c7baca3606c04693939a6b600a8898b6acbc2c55231985f0b5d8b9420316e52a54c743e26b31ced3e805cc7e938b1e14b21d234b171f4723d653f1
|
data/bin/crabfarm
ADDED
data/lib/crabfarm.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require "forwardable"
|
2
|
+
require "jbuilder"
|
3
|
+
require "selenium-webdriver"
|
4
|
+
|
5
|
+
require "crabfarm/version"
|
6
|
+
require "crabfarm/errors"
|
7
|
+
require "crabfarm/configuration"
|
8
|
+
require "crabfarm/module_helper"
|
9
|
+
require "crabfarm/driver_bucket"
|
10
|
+
require "crabfarm/driver_bucket_pool"
|
11
|
+
require "crabfarm/default_driver_factory"
|
12
|
+
require "crabfarm/phantom_driver_factory"
|
13
|
+
require "crabfarm/phantom_runner"
|
14
|
+
require "crabfarm/state_store"
|
15
|
+
require "crabfarm/context"
|
16
|
+
require "crabfarm/base_state"
|
17
|
+
require "crabfarm/base_parser"
|
18
|
+
require 'crabfarm/dsl/surfer'
|
19
|
+
require "crabfarm/adapters"
|
20
|
+
require "crabfarm/loader"
|
21
|
+
|
22
|
+
module Crabfarm
|
23
|
+
# Your code goes here...
|
24
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'crabfarm/adapters/capybara_adapter'
|
2
|
+
require 'crabfarm/adapters/surfer_adapter'
|
3
|
+
require 'crabfarm/adapters/watir_adapter'
|
4
|
+
|
5
|
+
module Crabfarm
|
6
|
+
module Adapters
|
7
|
+
@@adapters = {}
|
8
|
+
|
9
|
+
def self.register_dsl(_name, _adapter)
|
10
|
+
@@adapters[_name.to_sym] = _adapter
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.load_from_dsl_name _name
|
14
|
+
raise ConfigurationError.new "Invalid dsl name #{_name}" unless @@adapters.has_key? _name.to_sym
|
15
|
+
@@adapters[_name.to_sym]
|
16
|
+
end
|
17
|
+
|
18
|
+
# bundled adapters
|
19
|
+
register_dsl :watir, WatirAdapter
|
20
|
+
register_dsl :capybara, CapybaraAdapter
|
21
|
+
register_dsl :surfer, SurferAdapter
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
class BaseParser
|
3
|
+
|
4
|
+
attr_reader :browser, :params
|
5
|
+
|
6
|
+
def self.browser_dsl(_dsl)
|
7
|
+
@dsl = _dsl
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(_module, _driver, _params)
|
11
|
+
dsl_class = Adapters.load_from_dsl_name(class_dsl || _module.settings.default_dsl)
|
12
|
+
@browser = dsl_class.wrap _driver
|
13
|
+
@params = _params
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse
|
17
|
+
raise NotImplementedError.new
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def class_dsl
|
23
|
+
self.class.instance_variable_get :@dsl
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
class BaseState
|
3
|
+
extend Forwardable
|
4
|
+
|
5
|
+
attr_reader :params
|
6
|
+
|
7
|
+
def_delegators :@pool, :driver
|
8
|
+
def_delegators :@store, :get, :fetch
|
9
|
+
|
10
|
+
def self.browser_dsl(_dsl)
|
11
|
+
@class_dsl = _dsl
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(_module, _pool, _store, _params)
|
15
|
+
@module = _module
|
16
|
+
@pool = _pool
|
17
|
+
@store = _store
|
18
|
+
@params = _params
|
19
|
+
@output = Jbuilder.new
|
20
|
+
@dsl = Adapters.load_from_dsl_name(class_dsl || @module.settings.default_dsl)
|
21
|
+
end
|
22
|
+
|
23
|
+
def browser(_name=nil)
|
24
|
+
@dsl.wrap driver(_name)
|
25
|
+
end
|
26
|
+
|
27
|
+
def output
|
28
|
+
@output ||= Jbuilder.new
|
29
|
+
end
|
30
|
+
|
31
|
+
def crawl
|
32
|
+
raise NotImplementedError.new
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def class_dsl
|
38
|
+
self.class.instance_variable_get :@class_dsl
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/crabfarm/cli.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'gli'
|
2
|
+
|
3
|
+
module Crabfarm
|
4
|
+
class CLI
|
5
|
+
extend GLI::App
|
6
|
+
|
7
|
+
program_desc 'Crabfarm toolbelt'
|
8
|
+
|
9
|
+
pre do |global_options,command,options,args|
|
10
|
+
# Things to do before
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
14
|
+
desc "Starts the crawler in console mode"
|
15
|
+
command [:console, :c] do |c|
|
16
|
+
c.action do |global_options,options,args|
|
17
|
+
require "crabfarm/modes/console"
|
18
|
+
Crabfarm::Modes::Console.console_loop
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
desc "Starts the crawler in server mode"
|
23
|
+
command [:server, :s] do |c|
|
24
|
+
c.desc "Set the server host, defaults to 0.0.0.0"
|
25
|
+
c.flag [:h,:host]
|
26
|
+
|
27
|
+
c.desc "Set the server port, defaults to 3100"
|
28
|
+
c.flag [:p,:port]
|
29
|
+
|
30
|
+
c.desc "Set the server min and max threads, defaults to 0:16"
|
31
|
+
c.flag [:t,:threads]
|
32
|
+
|
33
|
+
c.action do |global_options,options,args|
|
34
|
+
require "crabfarm/modes/server"
|
35
|
+
server_options = {}
|
36
|
+
server_options[:Host] = options[:host] unless options[:host].nil?
|
37
|
+
server_options[:Port] = options[:port] || 3100
|
38
|
+
server_options[:Threads] = options[:threads] unless options[:threads].nil?
|
39
|
+
Crabfarm::Modes::Server.start server_options
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
desc "Generates crabfarm scaffolding"
|
44
|
+
command [:generate, :g] do |c|
|
45
|
+
|
46
|
+
c.desc "Generates a new crabfarm application"
|
47
|
+
c.command :app do |app|
|
48
|
+
app.action do |global_options,options,args|
|
49
|
+
require "crabfarm/modes/generator"
|
50
|
+
Crabfarm::Modes::Generator.new.generate_app(args[0], Dir.pwd)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
c.desc "Generates a new crabfarm parser and parser spec"
|
55
|
+
c.command :parser do |parser|
|
56
|
+
parser.action do |global_options,options,args|
|
57
|
+
require "crabfarm/modes/generator"
|
58
|
+
Crabfarm::Modes::Generator.new.generate_parser(args[0])
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
c.desc "Generates a new crabfarm state and parser spec"
|
63
|
+
c.command :state do |parser|
|
64
|
+
parser.action do |global_options,options,args|
|
65
|
+
require "crabfarm/modes/generator"
|
66
|
+
Crabfarm::Modes::Generator.new.generate_state(args[0])
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
command :publish do |c|
|
72
|
+
c.action do |global_options,options,args|
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
exit run(ARGV)
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
|
3
|
+
class Configuration
|
4
|
+
|
5
|
+
# TODO: improve DSL, it sucks
|
6
|
+
|
7
|
+
attr_accessor :default_dsl
|
8
|
+
attr_accessor :driver_factory
|
9
|
+
|
10
|
+
# Default driver configuration parameters
|
11
|
+
attr_accessor :driver_name
|
12
|
+
attr_accessor :driver_host
|
13
|
+
attr_accessor :driver_port
|
14
|
+
attr_accessor :driver_capabilities
|
15
|
+
attr_accessor :driver_remote_timeout
|
16
|
+
attr_accessor :driver_window_width
|
17
|
+
attr_accessor :driver_window_height
|
18
|
+
|
19
|
+
# Phantom launcher configuration
|
20
|
+
attr_accessor :phantom_enabled
|
21
|
+
attr_accessor :phantom_load_images
|
22
|
+
attr_accessor :phantom_proxy
|
23
|
+
attr_accessor :phantom_ssl
|
24
|
+
attr_accessor :phantom_bin_path
|
25
|
+
attr_accessor :phantom_lock_file
|
26
|
+
|
27
|
+
def driver_config
|
28
|
+
{
|
29
|
+
name: @driver_name,
|
30
|
+
capabilities: @driver_capabilities,
|
31
|
+
remote_host: driver_remote_host,
|
32
|
+
remote_timeout: @driver_remote_timeout,
|
33
|
+
window_width: @driver_window_width,
|
34
|
+
window_height: @driver_window_height
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
def phantom_enabled?
|
39
|
+
@phantom_enabled
|
40
|
+
end
|
41
|
+
|
42
|
+
def phantom_config
|
43
|
+
{
|
44
|
+
load_images: @phantom_load_images,
|
45
|
+
proxy: @phantom_proxy,
|
46
|
+
ssl: @phantom_ssl,
|
47
|
+
bin_path: @phantom_bin_path,
|
48
|
+
lock_file: @phantom_lock_file
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def initialize
|
53
|
+
@default_dsl = :surfer
|
54
|
+
@driver_factory = nil
|
55
|
+
|
56
|
+
@driver_name = :chrome
|
57
|
+
@driver_capabilities = Selenium::WebDriver::Remote::Capabilities.firefox
|
58
|
+
@driver_host = 'localhost'
|
59
|
+
@driver_port = '8080'
|
60
|
+
@driver_remote_timeout = 120
|
61
|
+
@driver_window_width = 1280
|
62
|
+
@driver_window_height = 800
|
63
|
+
|
64
|
+
@phantom_enabled = false
|
65
|
+
@phantom_load_images = false
|
66
|
+
@phantom_proxy = nil
|
67
|
+
@phantom_ssl = 'any'
|
68
|
+
@phantom_bin_path = 'phantomjs'
|
69
|
+
@phantom_lock_file = nil
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def driver_remote_host
|
75
|
+
if @driver_host.nil? then nil
|
76
|
+
elsif @driver_port.nil? then "http://#{@driver_host}"
|
77
|
+
else "http://#{@driver_host}:#{@driver_port}"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
|
3
|
+
module Crabfarm
|
4
|
+
class Context
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
def_delegators :@pool, :driver
|
8
|
+
|
9
|
+
def initialize(_module)
|
10
|
+
@module = ModuleHelper.new _module
|
11
|
+
@pool = DriverBucketPool.new @module
|
12
|
+
@store = StateStore.new @module
|
13
|
+
end
|
14
|
+
|
15
|
+
def run_state(_name, _params={})
|
16
|
+
state = @module.load_state(_name).new @module, @pool, @store, _params
|
17
|
+
state.crawl
|
18
|
+
state
|
19
|
+
end
|
20
|
+
|
21
|
+
def reset
|
22
|
+
@store.reset
|
23
|
+
@pool.reset
|
24
|
+
end
|
25
|
+
|
26
|
+
def release
|
27
|
+
@pool.release
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
class DefaultDriverFactory
|
3
|
+
|
4
|
+
def initialize(_config={})
|
5
|
+
@config = _config
|
6
|
+
end
|
7
|
+
|
8
|
+
def build_driver(_session_id)
|
9
|
+
|
10
|
+
driver_name = @config[:name]
|
11
|
+
raise ConfigurationError.new 'must provide a webdriver type' if driver_name.nil?
|
12
|
+
|
13
|
+
case driver_name
|
14
|
+
when :remote
|
15
|
+
# setup a custom client to use longer timeouts
|
16
|
+
client = Selenium::WebDriver::Remote::Http::Default.new
|
17
|
+
client.timeout = @config[:remote_timeout]
|
18
|
+
|
19
|
+
driver = Selenium::WebDriver.for :remote, {
|
20
|
+
:url => @config[:remote_host],
|
21
|
+
:http_client => client,
|
22
|
+
:desired_capabilities => @config[:capabilities]
|
23
|
+
}
|
24
|
+
|
25
|
+
driver.send(:bridge).setWindowSize(@config[:window_width], @config[:window_height])
|
26
|
+
else
|
27
|
+
driver = Selenium::WebDriver.for driver_name.to_sym
|
28
|
+
|
29
|
+
# apply browser configuration to new driver
|
30
|
+
driver.manage.window.resize_to(@config[:window_width], @config[:window_height]) rescue nil
|
31
|
+
end
|
32
|
+
|
33
|
+
return driver
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Crabfarm
|
2
|
+
class DriverBucket
|
3
|
+
|
4
|
+
attr_reader :session_id
|
5
|
+
|
6
|
+
def initialize(_module, _session_id, _factory)
|
7
|
+
@module = _module
|
8
|
+
@session_id = _session_id
|
9
|
+
@factory = _factory
|
10
|
+
@driver = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def setup(_factory)
|
14
|
+
reset
|
15
|
+
@factory = _factory
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse(_parser_class, _options={})
|
19
|
+
_parser_class = @module.load_parser(_parser_class) if _parser_class.is_a? String or _parser_class.is_a? Symbol
|
20
|
+
parser = _parser_class.new @module, self, _options
|
21
|
+
parser.parse
|
22
|
+
return parser
|
23
|
+
end
|
24
|
+
|
25
|
+
def original
|
26
|
+
@driver ||= @factory.build_driver(@session_id)
|
27
|
+
end
|
28
|
+
|
29
|
+
def reset
|
30
|
+
if @driver
|
31
|
+
@driver.quit rescue nil
|
32
|
+
@driver = nil
|
33
|
+
end
|
34
|
+
self
|
35
|
+
end
|
36
|
+
|
37
|
+
# forward every missing method to actual driver
|
38
|
+
|
39
|
+
def respond_to?(symbol, include_priv=false)
|
40
|
+
original.respond_to?(symbol, include_priv)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def method_missing(method, *args, &block)
|
46
|
+
original.__send__(method, *args, &block)
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|