crabfarm 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/bin/crabfarm +9 -0
  3. data/lib/crabfarm.rb +24 -0
  4. data/lib/crabfarm/adapters.rb +23 -0
  5. data/lib/crabfarm/adapters/capybara_adapter.rb +7 -0
  6. data/lib/crabfarm/adapters/surfer_adapter.rb +7 -0
  7. data/lib/crabfarm/adapters/watir_adapter.rb +7 -0
  8. data/lib/crabfarm/base_parser.rb +26 -0
  9. data/lib/crabfarm/base_state.rb +41 -0
  10. data/lib/crabfarm/cli.rb +79 -0
  11. data/lib/crabfarm/configuration.rb +83 -0
  12. data/lib/crabfarm/context.rb +32 -0
  13. data/lib/crabfarm/default_driver_factory.rb +37 -0
  14. data/lib/crabfarm/driver_bucket.rb +50 -0
  15. data/lib/crabfarm/driver_bucket_pool.rb +48 -0
  16. data/lib/crabfarm/dsl/surfer.rb +22 -0
  17. data/lib/crabfarm/dsl/surfer/search_context.rb +134 -0
  18. data/lib/crabfarm/dsl/surfer/surf_context.rb +58 -0
  19. data/lib/crabfarm/engines/safe_state_loop.rb +96 -0
  20. data/lib/crabfarm/errors.rb +50 -0
  21. data/lib/crabfarm/loader.rb +83 -0
  22. data/lib/crabfarm/modes/console.rb +86 -0
  23. data/lib/crabfarm/modes/generator.rb +120 -0
  24. data/lib/crabfarm/modes/server.rb +78 -0
  25. data/lib/crabfarm/module_helper.rb +35 -0
  26. data/lib/crabfarm/phantom_driver_factory.rb +33 -0
  27. data/lib/crabfarm/phantom_runner.rb +74 -0
  28. data/lib/crabfarm/rspec.rb +39 -0
  29. data/lib/crabfarm/state_store.rb +24 -0
  30. data/lib/crabfarm/support/custom_puma.rb +64 -0
  31. data/lib/crabfarm/templates/Crabfile.erb +3 -0
  32. data/lib/crabfarm/templates/Gemfile.erb +7 -0
  33. data/lib/crabfarm/templates/boot.rb.erb +13 -0
  34. data/lib/crabfarm/templates/crabfarm_bin.erb +3 -0
  35. data/lib/crabfarm/templates/dot_gitignore.erb +1 -0
  36. data/lib/crabfarm/templates/dot_gitkeep.erb +0 -0
  37. data/lib/crabfarm/templates/dot_rspec.erb +4 -0
  38. data/lib/crabfarm/templates/parser.rb.erb +8 -0
  39. data/lib/crabfarm/templates/parser_spec.rb.erb +7 -0
  40. data/lib/crabfarm/templates/spec_helper.rb.erb +22 -0
  41. data/lib/crabfarm/templates/state.rb.erb +8 -0
  42. data/lib/crabfarm/templates/state_spec.rb.erb +7 -0
  43. data/lib/crabfarm/version.rb +3 -0
  44. metadata +359 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7eb935482cb5663082aae4a3d9e24a722c1aea5d
4
+ data.tar.gz: 05b8038d530eb0d5f4be9325621405c90056b7be
5
+ SHA512:
6
+ metadata.gz: ef524a63574fa86249a0f31f08af390c07be7cb738f6a1ac8063bc59ba1a426074bc5681867c234e341905b150c73f3e2f2bd056f1e71dd39b15b9eff8ea5e3c
7
+ data.tar.gz: 23496fa635c7baca3606c04693939a6b600a8898b6acbc2c55231985f0b5d8b9420316e52a54c743e26b31ced3e805cc7e938b1e14b21d234b171f4723d653f1
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ begin
4
+ require './boot'
5
+ rescue LoadError
6
+ require 'crabfarm'
7
+ end
8
+
9
+ require 'crabfarm/cli'
@@ -0,0 +1,24 @@
1
+ require "forwardable"
2
+ require "jbuilder"
3
+ require "selenium-webdriver"
4
+
5
+ require "crabfarm/version"
6
+ require "crabfarm/errors"
7
+ require "crabfarm/configuration"
8
+ require "crabfarm/module_helper"
9
+ require "crabfarm/driver_bucket"
10
+ require "crabfarm/driver_bucket_pool"
11
+ require "crabfarm/default_driver_factory"
12
+ require "crabfarm/phantom_driver_factory"
13
+ require "crabfarm/phantom_runner"
14
+ require "crabfarm/state_store"
15
+ require "crabfarm/context"
16
+ require "crabfarm/base_state"
17
+ require "crabfarm/base_parser"
18
+ require 'crabfarm/dsl/surfer'
19
+ require "crabfarm/adapters"
20
+ require "crabfarm/loader"
21
+
22
+ module Crabfarm
23
+ # Your code goes here...
24
+ end
@@ -0,0 +1,23 @@
1
+ require 'crabfarm/adapters/capybara_adapter'
2
+ require 'crabfarm/adapters/surfer_adapter'
3
+ require 'crabfarm/adapters/watir_adapter'
4
+
5
+ module Crabfarm
6
+ module Adapters
7
+ @@adapters = {}
8
+
9
+ def self.register_dsl(_name, _adapter)
10
+ @@adapters[_name.to_sym] = _adapter
11
+ end
12
+
13
+ def self.load_from_dsl_name _name
14
+ raise ConfigurationError.new "Invalid dsl name #{_name}" unless @@adapters.has_key? _name.to_sym
15
+ @@adapters[_name.to_sym]
16
+ end
17
+
18
+ # bundled adapters
19
+ register_dsl :watir, WatirAdapter
20
+ register_dsl :capybara, CapybaraAdapter
21
+ register_dsl :surfer, SurferAdapter
22
+ end
23
+ end
@@ -0,0 +1,7 @@
1
+ module Crabfarm::Adapters
2
+ class CapybaraAdapter
3
+ def self.wrap(_bucket)
4
+ raise "Capybara adapter is incompleted"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Crabfarm::Adapters
2
+ class SurferAdapter
3
+ def self.wrap(_bucket)
4
+ Crabfarm::Dsl::Surfer::SurfContext.new _bucket
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Crabfarm::Adapters
2
+ class WatirAdapter
3
+ def self.wrap(_bucket)
4
+ Watir::Browser.new _bucket.original
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,26 @@
1
+ module Crabfarm
2
+ class BaseParser
3
+
4
+ attr_reader :browser, :params
5
+
6
+ def self.browser_dsl(_dsl)
7
+ @dsl = _dsl
8
+ end
9
+
10
+ def initialize(_module, _driver, _params)
11
+ dsl_class = Adapters.load_from_dsl_name(class_dsl || _module.settings.default_dsl)
12
+ @browser = dsl_class.wrap _driver
13
+ @params = _params
14
+ end
15
+
16
+ def parse
17
+ raise NotImplementedError.new
18
+ end
19
+
20
+ private
21
+
22
+ def class_dsl
23
+ self.class.instance_variable_get :@dsl
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,41 @@
1
+ module Crabfarm
2
+ class BaseState
3
+ extend Forwardable
4
+
5
+ attr_reader :params
6
+
7
+ def_delegators :@pool, :driver
8
+ def_delegators :@store, :get, :fetch
9
+
10
+ def self.browser_dsl(_dsl)
11
+ @class_dsl = _dsl
12
+ end
13
+
14
+ def initialize(_module, _pool, _store, _params)
15
+ @module = _module
16
+ @pool = _pool
17
+ @store = _store
18
+ @params = _params
19
+ @output = Jbuilder.new
20
+ @dsl = Adapters.load_from_dsl_name(class_dsl || @module.settings.default_dsl)
21
+ end
22
+
23
+ def browser(_name=nil)
24
+ @dsl.wrap driver(_name)
25
+ end
26
+
27
+ def output
28
+ @output ||= Jbuilder.new
29
+ end
30
+
31
+ def crawl
32
+ raise NotImplementedError.new
33
+ end
34
+
35
+ private
36
+
37
+ def class_dsl
38
+ self.class.instance_variable_get :@class_dsl
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,79 @@
1
+ require 'gli'
2
+
3
+ module Crabfarm
4
+ class CLI
5
+ extend GLI::App
6
+
7
+ program_desc 'Crabfarm toolbelt'
8
+
9
+ pre do |global_options,command,options,args|
10
+ # Things to do before
11
+ true
12
+ end
13
+
14
+ desc "Starts the crawler in console mode"
15
+ command [:console, :c] do |c|
16
+ c.action do |global_options,options,args|
17
+ require "crabfarm/modes/console"
18
+ Crabfarm::Modes::Console.console_loop
19
+ end
20
+ end
21
+
22
+ desc "Starts the crawler in server mode"
23
+ command [:server, :s] do |c|
24
+ c.desc "Set the server host, defaults to 0.0.0.0"
25
+ c.flag [:h,:host]
26
+
27
+ c.desc "Set the server port, defaults to 3100"
28
+ c.flag [:p,:port]
29
+
30
+ c.desc "Set the server min and max threads, defaults to 0:16"
31
+ c.flag [:t,:threads]
32
+
33
+ c.action do |global_options,options,args|
34
+ require "crabfarm/modes/server"
35
+ server_options = {}
36
+ server_options[:Host] = options[:host] unless options[:host].nil?
37
+ server_options[:Port] = options[:port] || 3100
38
+ server_options[:Threads] = options[:threads] unless options[:threads].nil?
39
+ Crabfarm::Modes::Server.start server_options
40
+ end
41
+ end
42
+
43
+ desc "Generates crabfarm scaffolding"
44
+ command [:generate, :g] do |c|
45
+
46
+ c.desc "Generates a new crabfarm application"
47
+ c.command :app do |app|
48
+ app.action do |global_options,options,args|
49
+ require "crabfarm/modes/generator"
50
+ Crabfarm::Modes::Generator.new.generate_app(args[0], Dir.pwd)
51
+ end
52
+ end
53
+
54
+ c.desc "Generates a new crabfarm parser and parser spec"
55
+ c.command :parser do |parser|
56
+ parser.action do |global_options,options,args|
57
+ require "crabfarm/modes/generator"
58
+ Crabfarm::Modes::Generator.new.generate_parser(args[0])
59
+ end
60
+ end
61
+
62
+ c.desc "Generates a new crabfarm state and parser spec"
63
+ c.command :state do |parser|
64
+ parser.action do |global_options,options,args|
65
+ require "crabfarm/modes/generator"
66
+ Crabfarm::Modes::Generator.new.generate_state(args[0])
67
+ end
68
+ end
69
+ end
70
+
71
+ command :publish do |c|
72
+ c.action do |global_options,options,args|
73
+
74
+ end
75
+ end
76
+
77
+ exit run(ARGV)
78
+ end
79
+ end
@@ -0,0 +1,83 @@
1
+ module Crabfarm
2
+
3
+ class Configuration
4
+
5
+ # TODO: improve DSL, it sucks
6
+
7
+ attr_accessor :default_dsl
8
+ attr_accessor :driver_factory
9
+
10
+ # Default driver configuration parameters
11
+ attr_accessor :driver_name
12
+ attr_accessor :driver_host
13
+ attr_accessor :driver_port
14
+ attr_accessor :driver_capabilities
15
+ attr_accessor :driver_remote_timeout
16
+ attr_accessor :driver_window_width
17
+ attr_accessor :driver_window_height
18
+
19
+ # Phantom launcher configuration
20
+ attr_accessor :phantom_enabled
21
+ attr_accessor :phantom_load_images
22
+ attr_accessor :phantom_proxy
23
+ attr_accessor :phantom_ssl
24
+ attr_accessor :phantom_bin_path
25
+ attr_accessor :phantom_lock_file
26
+
27
+ def driver_config
28
+ {
29
+ name: @driver_name,
30
+ capabilities: @driver_capabilities,
31
+ remote_host: driver_remote_host,
32
+ remote_timeout: @driver_remote_timeout,
33
+ window_width: @driver_window_width,
34
+ window_height: @driver_window_height
35
+ }
36
+ end
37
+
38
+ def phantom_enabled?
39
+ @phantom_enabled
40
+ end
41
+
42
+ def phantom_config
43
+ {
44
+ load_images: @phantom_load_images,
45
+ proxy: @phantom_proxy,
46
+ ssl: @phantom_ssl,
47
+ bin_path: @phantom_bin_path,
48
+ lock_file: @phantom_lock_file
49
+ }
50
+ end
51
+
52
+ def initialize
53
+ @default_dsl = :surfer
54
+ @driver_factory = nil
55
+
56
+ @driver_name = :chrome
57
+ @driver_capabilities = Selenium::WebDriver::Remote::Capabilities.firefox
58
+ @driver_host = 'localhost'
59
+ @driver_port = '8080'
60
+ @driver_remote_timeout = 120
61
+ @driver_window_width = 1280
62
+ @driver_window_height = 800
63
+
64
+ @phantom_enabled = false
65
+ @phantom_load_images = false
66
+ @phantom_proxy = nil
67
+ @phantom_ssl = 'any'
68
+ @phantom_bin_path = 'phantomjs'
69
+ @phantom_lock_file = nil
70
+ end
71
+
72
+ private
73
+
74
+ def driver_remote_host
75
+ if @driver_host.nil? then nil
76
+ elsif @driver_port.nil? then "http://#{@driver_host}"
77
+ else "http://#{@driver_host}:#{@driver_port}"
78
+ end
79
+ end
80
+
81
+ end
82
+
83
+ end
@@ -0,0 +1,32 @@
1
+ require 'active_support'
2
+
3
+ module Crabfarm
4
+ class Context
5
+ extend Forwardable
6
+
7
+ def_delegators :@pool, :driver
8
+
9
+ def initialize(_module)
10
+ @module = ModuleHelper.new _module
11
+ @pool = DriverBucketPool.new @module
12
+ @store = StateStore.new @module
13
+ end
14
+
15
+ def run_state(_name, _params={})
16
+ state = @module.load_state(_name).new @module, @pool, @store, _params
17
+ state.crawl
18
+ state
19
+ end
20
+
21
+ def reset
22
+ @store.reset
23
+ @pool.reset
24
+ end
25
+
26
+ def release
27
+ @pool.release
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,37 @@
1
+ module Crabfarm
2
+ class DefaultDriverFactory
3
+
4
+ def initialize(_config={})
5
+ @config = _config
6
+ end
7
+
8
+ def build_driver(_session_id)
9
+
10
+ driver_name = @config[:name]
11
+ raise ConfigurationError.new 'must provide a webdriver type' if driver_name.nil?
12
+
13
+ case driver_name
14
+ when :remote
15
+ # setup a custom client to use longer timeouts
16
+ client = Selenium::WebDriver::Remote::Http::Default.new
17
+ client.timeout = @config[:remote_timeout]
18
+
19
+ driver = Selenium::WebDriver.for :remote, {
20
+ :url => @config[:remote_host],
21
+ :http_client => client,
22
+ :desired_capabilities => @config[:capabilities]
23
+ }
24
+
25
+ driver.send(:bridge).setWindowSize(@config[:window_width], @config[:window_height])
26
+ else
27
+ driver = Selenium::WebDriver.for driver_name.to_sym
28
+
29
+ # apply browser configuration to new driver
30
+ driver.manage.window.resize_to(@config[:window_width], @config[:window_height]) rescue nil
31
+ end
32
+
33
+ return driver
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,50 @@
1
+ module Crabfarm
2
+ class DriverBucket
3
+
4
+ attr_reader :session_id
5
+
6
+ def initialize(_module, _session_id, _factory)
7
+ @module = _module
8
+ @session_id = _session_id
9
+ @factory = _factory
10
+ @driver = nil
11
+ end
12
+
13
+ def setup(_factory)
14
+ reset
15
+ @factory = _factory
16
+ end
17
+
18
+ def parse(_parser_class, _options={})
19
+ _parser_class = @module.load_parser(_parser_class) if _parser_class.is_a? String or _parser_class.is_a? Symbol
20
+ parser = _parser_class.new @module, self, _options
21
+ parser.parse
22
+ return parser
23
+ end
24
+
25
+ def original
26
+ @driver ||= @factory.build_driver(@session_id)
27
+ end
28
+
29
+ def reset
30
+ if @driver
31
+ @driver.quit rescue nil
32
+ @driver = nil
33
+ end
34
+ self
35
+ end
36
+
37
+ # forward every missing method to actual driver
38
+
39
+ def respond_to?(symbol, include_priv=false)
40
+ original.respond_to?(symbol, include_priv)
41
+ end
42
+
43
+ private
44
+
45
+ def method_missing(method, *args, &block)
46
+ original.__send__(method, *args, &block)
47
+ end
48
+
49
+ end
50
+ end