crabfarm 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/bin/crabfarm +9 -0
  3. data/lib/crabfarm.rb +24 -0
  4. data/lib/crabfarm/adapters.rb +23 -0
  5. data/lib/crabfarm/adapters/capybara_adapter.rb +7 -0
  6. data/lib/crabfarm/adapters/surfer_adapter.rb +7 -0
  7. data/lib/crabfarm/adapters/watir_adapter.rb +7 -0
  8. data/lib/crabfarm/base_parser.rb +26 -0
  9. data/lib/crabfarm/base_state.rb +41 -0
  10. data/lib/crabfarm/cli.rb +79 -0
  11. data/lib/crabfarm/configuration.rb +83 -0
  12. data/lib/crabfarm/context.rb +32 -0
  13. data/lib/crabfarm/default_driver_factory.rb +37 -0
  14. data/lib/crabfarm/driver_bucket.rb +50 -0
  15. data/lib/crabfarm/driver_bucket_pool.rb +48 -0
  16. data/lib/crabfarm/dsl/surfer.rb +22 -0
  17. data/lib/crabfarm/dsl/surfer/search_context.rb +134 -0
  18. data/lib/crabfarm/dsl/surfer/surf_context.rb +58 -0
  19. data/lib/crabfarm/engines/safe_state_loop.rb +96 -0
  20. data/lib/crabfarm/errors.rb +50 -0
  21. data/lib/crabfarm/loader.rb +83 -0
  22. data/lib/crabfarm/modes/console.rb +86 -0
  23. data/lib/crabfarm/modes/generator.rb +120 -0
  24. data/lib/crabfarm/modes/server.rb +78 -0
  25. data/lib/crabfarm/module_helper.rb +35 -0
  26. data/lib/crabfarm/phantom_driver_factory.rb +33 -0
  27. data/lib/crabfarm/phantom_runner.rb +74 -0
  28. data/lib/crabfarm/rspec.rb +39 -0
  29. data/lib/crabfarm/state_store.rb +24 -0
  30. data/lib/crabfarm/support/custom_puma.rb +64 -0
  31. data/lib/crabfarm/templates/Crabfile.erb +3 -0
  32. data/lib/crabfarm/templates/Gemfile.erb +7 -0
  33. data/lib/crabfarm/templates/boot.rb.erb +13 -0
  34. data/lib/crabfarm/templates/crabfarm_bin.erb +3 -0
  35. data/lib/crabfarm/templates/dot_gitignore.erb +1 -0
  36. data/lib/crabfarm/templates/dot_gitkeep.erb +0 -0
  37. data/lib/crabfarm/templates/dot_rspec.erb +4 -0
  38. data/lib/crabfarm/templates/parser.rb.erb +8 -0
  39. data/lib/crabfarm/templates/parser_spec.rb.erb +7 -0
  40. data/lib/crabfarm/templates/spec_helper.rb.erb +22 -0
  41. data/lib/crabfarm/templates/state.rb.erb +8 -0
  42. data/lib/crabfarm/templates/state_spec.rb.erb +7 -0
  43. data/lib/crabfarm/version.rb +3 -0
  44. metadata +359 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7eb935482cb5663082aae4a3d9e24a722c1aea5d
4
+ data.tar.gz: 05b8038d530eb0d5f4be9325621405c90056b7be
5
+ SHA512:
6
+ metadata.gz: ef524a63574fa86249a0f31f08af390c07be7cb738f6a1ac8063bc59ba1a426074bc5681867c234e341905b150c73f3e2f2bd056f1e71dd39b15b9eff8ea5e3c
7
+ data.tar.gz: 23496fa635c7baca3606c04693939a6b600a8898b6acbc2c55231985f0b5d8b9420316e52a54c743e26b31ced3e805cc7e938b1e14b21d234b171f4723d653f1
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ begin
4
+ require './boot'
5
+ rescue LoadError
6
+ require 'crabfarm'
7
+ end
8
+
9
+ require 'crabfarm/cli'
@@ -0,0 +1,24 @@
1
+ require "forwardable"
2
+ require "jbuilder"
3
+ require "selenium-webdriver"
4
+
5
+ require "crabfarm/version"
6
+ require "crabfarm/errors"
7
+ require "crabfarm/configuration"
8
+ require "crabfarm/module_helper"
9
+ require "crabfarm/driver_bucket"
10
+ require "crabfarm/driver_bucket_pool"
11
+ require "crabfarm/default_driver_factory"
12
+ require "crabfarm/phantom_driver_factory"
13
+ require "crabfarm/phantom_runner"
14
+ require "crabfarm/state_store"
15
+ require "crabfarm/context"
16
+ require "crabfarm/base_state"
17
+ require "crabfarm/base_parser"
18
+ require 'crabfarm/dsl/surfer'
19
+ require "crabfarm/adapters"
20
+ require "crabfarm/loader"
21
+
22
+ module Crabfarm
23
+ # Your code goes here...
24
+ end
@@ -0,0 +1,23 @@
1
+ require 'crabfarm/adapters/capybara_adapter'
2
+ require 'crabfarm/adapters/surfer_adapter'
3
+ require 'crabfarm/adapters/watir_adapter'
4
+
5
+ module Crabfarm
6
+ module Adapters
7
+ @@adapters = {}
8
+
9
+ def self.register_dsl(_name, _adapter)
10
+ @@adapters[_name.to_sym] = _adapter
11
+ end
12
+
13
+ def self.load_from_dsl_name _name
14
+ raise ConfigurationError.new "Invalid dsl name #{_name}" unless @@adapters.has_key? _name.to_sym
15
+ @@adapters[_name.to_sym]
16
+ end
17
+
18
+ # bundled adapters
19
+ register_dsl :watir, WatirAdapter
20
+ register_dsl :capybara, CapybaraAdapter
21
+ register_dsl :surfer, SurferAdapter
22
+ end
23
+ end
@@ -0,0 +1,7 @@
1
+ module Crabfarm::Adapters
2
+ class CapybaraAdapter
3
+ def self.wrap(_bucket)
4
+ raise "Capybara adapter is incompleted"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Crabfarm::Adapters
2
+ class SurferAdapter
3
+ def self.wrap(_bucket)
4
+ Crabfarm::Dsl::Surfer::SurfContext.new _bucket
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Crabfarm::Adapters
2
+ class WatirAdapter
3
+ def self.wrap(_bucket)
4
+ Watir::Browser.new _bucket.original
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,26 @@
1
+ module Crabfarm
2
+ class BaseParser
3
+
4
+ attr_reader :browser, :params
5
+
6
+ def self.browser_dsl(_dsl)
7
+ @dsl = _dsl
8
+ end
9
+
10
+ def initialize(_module, _driver, _params)
11
+ dsl_class = Adapters.load_from_dsl_name(class_dsl || _module.settings.default_dsl)
12
+ @browser = dsl_class.wrap _driver
13
+ @params = _params
14
+ end
15
+
16
+ def parse
17
+ raise NotImplementedError.new
18
+ end
19
+
20
+ private
21
+
22
+ def class_dsl
23
+ self.class.instance_variable_get :@dsl
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,41 @@
1
+ module Crabfarm
2
+ class BaseState
3
+ extend Forwardable
4
+
5
+ attr_reader :params
6
+
7
+ def_delegators :@pool, :driver
8
+ def_delegators :@store, :get, :fetch
9
+
10
+ def self.browser_dsl(_dsl)
11
+ @class_dsl = _dsl
12
+ end
13
+
14
+ def initialize(_module, _pool, _store, _params)
15
+ @module = _module
16
+ @pool = _pool
17
+ @store = _store
18
+ @params = _params
19
+ @output = Jbuilder.new
20
+ @dsl = Adapters.load_from_dsl_name(class_dsl || @module.settings.default_dsl)
21
+ end
22
+
23
+ def browser(_name=nil)
24
+ @dsl.wrap driver(_name)
25
+ end
26
+
27
+ def output
28
+ @output ||= Jbuilder.new
29
+ end
30
+
31
+ def crawl
32
+ raise NotImplementedError.new
33
+ end
34
+
35
+ private
36
+
37
+ def class_dsl
38
+ self.class.instance_variable_get :@class_dsl
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,79 @@
1
+ require 'gli'
2
+
3
+ module Crabfarm
4
+ class CLI
5
+ extend GLI::App
6
+
7
+ program_desc 'Crabfarm toolbelt'
8
+
9
+ pre do |global_options,command,options,args|
10
+ # Things to do before
11
+ true
12
+ end
13
+
14
+ desc "Starts the crawler in console mode"
15
+ command [:console, :c] do |c|
16
+ c.action do |global_options,options,args|
17
+ require "crabfarm/modes/console"
18
+ Crabfarm::Modes::Console.console_loop
19
+ end
20
+ end
21
+
22
+ desc "Starts the crawler in server mode"
23
+ command [:server, :s] do |c|
24
+ c.desc "Set the server host, defaults to 0.0.0.0"
25
+ c.flag [:h,:host]
26
+
27
+ c.desc "Set the server port, defaults to 3100"
28
+ c.flag [:p,:port]
29
+
30
+ c.desc "Set the server min and max threads, defaults to 0:16"
31
+ c.flag [:t,:threads]
32
+
33
+ c.action do |global_options,options,args|
34
+ require "crabfarm/modes/server"
35
+ server_options = {}
36
+ server_options[:Host] = options[:host] unless options[:host].nil?
37
+ server_options[:Port] = options[:port] || 3100
38
+ server_options[:Threads] = options[:threads] unless options[:threads].nil?
39
+ Crabfarm::Modes::Server.start server_options
40
+ end
41
+ end
42
+
43
+ desc "Generates crabfarm scaffolding"
44
+ command [:generate, :g] do |c|
45
+
46
+ c.desc "Generates a new crabfarm application"
47
+ c.command :app do |app|
48
+ app.action do |global_options,options,args|
49
+ require "crabfarm/modes/generator"
50
+ Crabfarm::Modes::Generator.new.generate_app(args[0], Dir.pwd)
51
+ end
52
+ end
53
+
54
+ c.desc "Generates a new crabfarm parser and parser spec"
55
+ c.command :parser do |parser|
56
+ parser.action do |global_options,options,args|
57
+ require "crabfarm/modes/generator"
58
+ Crabfarm::Modes::Generator.new.generate_parser(args[0])
59
+ end
60
+ end
61
+
62
+ c.desc "Generates a new crabfarm state and parser spec"
63
+ c.command :state do |parser|
64
+ parser.action do |global_options,options,args|
65
+ require "crabfarm/modes/generator"
66
+ Crabfarm::Modes::Generator.new.generate_state(args[0])
67
+ end
68
+ end
69
+ end
70
+
71
+ command :publish do |c|
72
+ c.action do |global_options,options,args|
73
+
74
+ end
75
+ end
76
+
77
+ exit run(ARGV)
78
+ end
79
+ end
@@ -0,0 +1,83 @@
1
+ module Crabfarm
2
+
3
+ class Configuration
4
+
5
+ # TODO: improve DSL, it sucks
6
+
7
+ attr_accessor :default_dsl
8
+ attr_accessor :driver_factory
9
+
10
+ # Default driver configuration parameters
11
+ attr_accessor :driver_name
12
+ attr_accessor :driver_host
13
+ attr_accessor :driver_port
14
+ attr_accessor :driver_capabilities
15
+ attr_accessor :driver_remote_timeout
16
+ attr_accessor :driver_window_width
17
+ attr_accessor :driver_window_height
18
+
19
+ # Phantom launcher configuration
20
+ attr_accessor :phantom_enabled
21
+ attr_accessor :phantom_load_images
22
+ attr_accessor :phantom_proxy
23
+ attr_accessor :phantom_ssl
24
+ attr_accessor :phantom_bin_path
25
+ attr_accessor :phantom_lock_file
26
+
27
+ def driver_config
28
+ {
29
+ name: @driver_name,
30
+ capabilities: @driver_capabilities,
31
+ remote_host: driver_remote_host,
32
+ remote_timeout: @driver_remote_timeout,
33
+ window_width: @driver_window_width,
34
+ window_height: @driver_window_height
35
+ }
36
+ end
37
+
38
+ def phantom_enabled?
39
+ @phantom_enabled
40
+ end
41
+
42
+ def phantom_config
43
+ {
44
+ load_images: @phantom_load_images,
45
+ proxy: @phantom_proxy,
46
+ ssl: @phantom_ssl,
47
+ bin_path: @phantom_bin_path,
48
+ lock_file: @phantom_lock_file
49
+ }
50
+ end
51
+
52
+ def initialize
53
+ @default_dsl = :surfer
54
+ @driver_factory = nil
55
+
56
+ @driver_name = :chrome
57
+ @driver_capabilities = Selenium::WebDriver::Remote::Capabilities.firefox
58
+ @driver_host = 'localhost'
59
+ @driver_port = '8080'
60
+ @driver_remote_timeout = 120
61
+ @driver_window_width = 1280
62
+ @driver_window_height = 800
63
+
64
+ @phantom_enabled = false
65
+ @phantom_load_images = false
66
+ @phantom_proxy = nil
67
+ @phantom_ssl = 'any'
68
+ @phantom_bin_path = 'phantomjs'
69
+ @phantom_lock_file = nil
70
+ end
71
+
72
+ private
73
+
74
+ def driver_remote_host
75
+ if @driver_host.nil? then nil
76
+ elsif @driver_port.nil? then "http://#{@driver_host}"
77
+ else "http://#{@driver_host}:#{@driver_port}"
78
+ end
79
+ end
80
+
81
+ end
82
+
83
+ end
@@ -0,0 +1,32 @@
1
+ require 'active_support'
2
+
3
+ module Crabfarm
4
+ class Context
5
+ extend Forwardable
6
+
7
+ def_delegators :@pool, :driver
8
+
9
+ def initialize(_module)
10
+ @module = ModuleHelper.new _module
11
+ @pool = DriverBucketPool.new @module
12
+ @store = StateStore.new @module
13
+ end
14
+
15
+ def run_state(_name, _params={})
16
+ state = @module.load_state(_name).new @module, @pool, @store, _params
17
+ state.crawl
18
+ state
19
+ end
20
+
21
+ def reset
22
+ @store.reset
23
+ @pool.reset
24
+ end
25
+
26
+ def release
27
+ @pool.release
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,37 @@
1
+ module Crabfarm
2
+ class DefaultDriverFactory
3
+
4
+ def initialize(_config={})
5
+ @config = _config
6
+ end
7
+
8
+ def build_driver(_session_id)
9
+
10
+ driver_name = @config[:name]
11
+ raise ConfigurationError.new 'must provide a webdriver type' if driver_name.nil?
12
+
13
+ case driver_name
14
+ when :remote
15
+ # setup a custom client to use longer timeouts
16
+ client = Selenium::WebDriver::Remote::Http::Default.new
17
+ client.timeout = @config[:remote_timeout]
18
+
19
+ driver = Selenium::WebDriver.for :remote, {
20
+ :url => @config[:remote_host],
21
+ :http_client => client,
22
+ :desired_capabilities => @config[:capabilities]
23
+ }
24
+
25
+ driver.send(:bridge).setWindowSize(@config[:window_width], @config[:window_height])
26
+ else
27
+ driver = Selenium::WebDriver.for driver_name.to_sym
28
+
29
+ # apply browser configuration to new driver
30
+ driver.manage.window.resize_to(@config[:window_width], @config[:window_height]) rescue nil
31
+ end
32
+
33
+ return driver
34
+ end
35
+
36
+ end
37
+ end
@@ -0,0 +1,50 @@
1
+ module Crabfarm
2
+ class DriverBucket
3
+
4
+ attr_reader :session_id
5
+
6
+ def initialize(_module, _session_id, _factory)
7
+ @module = _module
8
+ @session_id = _session_id
9
+ @factory = _factory
10
+ @driver = nil
11
+ end
12
+
13
+ def setup(_factory)
14
+ reset
15
+ @factory = _factory
16
+ end
17
+
18
+ def parse(_parser_class, _options={})
19
+ _parser_class = @module.load_parser(_parser_class) if _parser_class.is_a? String or _parser_class.is_a? Symbol
20
+ parser = _parser_class.new @module, self, _options
21
+ parser.parse
22
+ return parser
23
+ end
24
+
25
+ def original
26
+ @driver ||= @factory.build_driver(@session_id)
27
+ end
28
+
29
+ def reset
30
+ if @driver
31
+ @driver.quit rescue nil
32
+ @driver = nil
33
+ end
34
+ self
35
+ end
36
+
37
+ # forward every missing method to actual driver
38
+
39
+ def respond_to?(symbol, include_priv=false)
40
+ original.respond_to?(symbol, include_priv)
41
+ end
42
+
43
+ private
44
+
45
+ def method_missing(method, *args, &block)
46
+ original.__send__(method, *args, &block)
47
+ end
48
+
49
+ end
50
+ end