crabfarm 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7eb935482cb5663082aae4a3d9e24a722c1aea5d
4
- data.tar.gz: 05b8038d530eb0d5f4be9325621405c90056b7be
3
+ metadata.gz: cc45d8d0e358193e3729c8ca8d38715680470c78
4
+ data.tar.gz: 30c698146ec3b2d4032cddf4d4b3011af4edaa75
5
5
  SHA512:
6
- metadata.gz: ef524a63574fa86249a0f31f08af390c07be7cb738f6a1ac8063bc59ba1a426074bc5681867c234e341905b150c73f3e2f2bd056f1e71dd39b15b9eff8ea5e3c
7
- data.tar.gz: 23496fa635c7baca3606c04693939a6b600a8898b6acbc2c55231985f0b5d8b9420316e52a54c743e26b31ced3e805cc7e938b1e14b21d234b171f4723d653f1
6
+ metadata.gz: 85c7b65ee73ea096635260495077002c74d4891ecab632814de81a474eed2cfc9b9eebbfe4f60a200d927a60e7cbbf00e9910b6988fc9cc73abbee8368bb7163
7
+ data.tar.gz: 9607b619d00a96ae83a20238492d47897976498bf48e63400e5845765e5585c2bf872d31b2d9803479ee4900ca23433ee6b58c391c05d8441298566716d2e1f4
data/lib/crabfarm/cli.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'gli'
2
+ require 'crabfarm/support/gli'
2
3
 
3
4
  module Crabfarm
4
5
  class CLI
@@ -6,16 +7,17 @@ module Crabfarm
6
7
 
7
8
  program_desc 'Crabfarm toolbelt'
8
9
 
9
- pre do |global_options,command,options,args|
10
- # Things to do before
11
- true
12
- end
13
-
14
10
  desc "Starts the crawler in console mode"
15
11
  command [:console, :c] do |c|
12
+
13
+ Support::GLI.generate_options c
14
+
16
15
  c.action do |global_options,options,args|
16
+ next puts "This command can only be run inside a crabfarm application" unless defined? CF_LOADER
17
+
17
18
  require "crabfarm/modes/console"
18
- Crabfarm::Modes::Console.console_loop
19
+ CF_LOADER.load Support::GLI.parse_options options
20
+ Crabfarm::Modes::Console.start CF_LOADER
19
21
  end
20
22
  end
21
23
 
@@ -30,13 +32,19 @@ module Crabfarm
30
32
  c.desc "Set the server min and max threads, defaults to 0:16"
31
33
  c.flag [:t,:threads]
32
34
 
35
+ Support::GLI.generate_options c
36
+
33
37
  c.action do |global_options,options,args|
38
+ next puts "This command can only be run inside a crabfarm application" unless defined? CF_LOADER
39
+
34
40
  require "crabfarm/modes/server"
35
41
  server_options = {}
36
42
  server_options[:Host] = options[:host] unless options[:host].nil?
37
43
  server_options[:Port] = options[:port] || 3100
38
44
  server_options[:Threads] = options[:threads] unless options[:threads].nil?
39
- Crabfarm::Modes::Server.start server_options
45
+
46
+ CF_LOADER.load Support::GLI.parse_options options
47
+ Crabfarm::Modes::Server.start CF_LOADER, server_options
40
48
  end
41
49
  end
42
50
 
@@ -2,81 +2,103 @@ module Crabfarm
2
2
 
3
3
  class Configuration
4
4
 
5
- # TODO: improve DSL, it sucks
6
-
7
- attr_accessor :default_dsl
8
- attr_accessor :driver_factory
9
-
10
- # Default driver configuration parameters
11
- attr_accessor :driver_name
12
- attr_accessor :driver_host
13
- attr_accessor :driver_port
14
- attr_accessor :driver_capabilities
15
- attr_accessor :driver_remote_timeout
16
- attr_accessor :driver_window_width
17
- attr_accessor :driver_window_height
18
-
19
- # Phantom launcher configuration
20
- attr_accessor :phantom_enabled
21
- attr_accessor :phantom_load_images
22
- attr_accessor :phantom_proxy
23
- attr_accessor :phantom_ssl
24
- attr_accessor :phantom_bin_path
25
- attr_accessor :phantom_lock_file
5
+ class Option < Struct.new(:name, :type, :text); end
6
+
7
+ OPTIONS = [
8
+ [:default_dsl, :string, 'Default dsl used by parsers and states'],
9
+ [:driver_factory, :mixed, 'Driver factory, disabled if phantom_mode is used'],
10
+ [:log_path, :string, 'Path where logs should be stored'],
11
+
12
+ # Default driver configuration parameters
13
+ [:driver, ['chrome', 'firefox', 'phantomjs', 'remote'], 'Webdriver to be user, common options: chrome, firefox, phantomjs, remote.'],
14
+ [:driver_host, :string, 'Remote host, only available in driver: remote'],
15
+ [:driver_port, :integer, 'Remote port, only available in driver: remote'],
16
+ [:driver_capabilities, :mixed, 'Driver capabilities, depends on selected driver.'],
17
+ [:driver_remote_timeout, :float, 'Request timeout in seconds, only available for remote or phatomjs driver.'],
18
+ [:driver_window_width, :integer, 'Initial browser window width.'],
19
+ [:driver_window_height, :integer, 'Initial browser window height.'],
20
+
21
+ # Phantom launcher configuration
22
+ [:phantom_load_images, :boolean, 'Phantomjs image loading, only for phantomjs driver.'],
23
+ [:phantom_proxy, :string, 'Phantonjs proxy address, only for phantomjs driver.'],
24
+ [:phantom_ssl, ['sslv3', 'sslv2', 'tlsv1', 'any'], 'Phantomjs ssl mode: sslv3, sslv2, tlsv1 or any, only for phantomjs driver.'],
25
+ [:phantom_bin_path, :string, 'Phantomjs binary path, only for phantomjs driver.'],
26
+ [:phantom_lock_file, :string, 'Phantomjs lock file path, only for phantomjs driver.']
27
+ ]
28
+ .map { |o| Option.new *o }
29
+
30
+ OPTIONS.each do |var|
31
+ define_method "set_#{var.name}" do |val|
32
+ @values[var.name] = val
33
+ end
34
+
35
+ define_method "#{var.name}" do
36
+ @values[var.name]
37
+ end
38
+ end
39
+
40
+ def initialize
41
+ @values = {
42
+ default_dsl: :surfer,
43
+ driver_factory: nil,
44
+ log_path: 'logs',
45
+
46
+ driver: 'phantomjs',
47
+ driver_capabilities: Selenium::WebDriver::Remote::Capabilities.firefox,
48
+ driver_host: 'localhost',
49
+ driver_port: '8080',
50
+ driver_remote_timeout: 120,
51
+ driver_window_width: 1280,
52
+ driver_window_height: 800,
53
+
54
+ phantom_load_images: false,
55
+ phantom_proxy: nil,
56
+ phantom_ssl: 'any',
57
+ phantom_bin_path: 'phantomjs',
58
+ phantom_lock_file: nil
59
+ }
60
+ end
61
+
62
+ def set(_options)
63
+ @values.merge! _options
64
+ end
65
+
66
+ def driver_remote_host
67
+ if driver_host then nil
68
+ elsif driver_port then "http://#{driver_host}"
69
+ else "http://#{driver_host}:#{driver_port}"
70
+ end
71
+ end
26
72
 
27
73
  def driver_config
28
74
  {
29
- name: @driver_name,
30
- capabilities: @driver_capabilities,
75
+ name: driver,
76
+ capabilities: driver_capabilities,
31
77
  remote_host: driver_remote_host,
32
- remote_timeout: @driver_remote_timeout,
33
- window_width: @driver_window_width,
34
- window_height: @driver_window_height
78
+ remote_timeout: driver_remote_timeout,
79
+ window_width: driver_window_width,
80
+ window_height: driver_window_height
35
81
  }
36
82
  end
37
83
 
38
- def phantom_enabled?
39
- @phantom_enabled
84
+ def phantom_mode_enabled?
85
+ driver.to_s == 'phantomjs'
40
86
  end
41
87
 
42
88
  def phantom_config
43
89
  {
44
- load_images: @phantom_load_images,
45
- proxy: @phantom_proxy,
46
- ssl: @phantom_ssl,
47
- bin_path: @phantom_bin_path,
48
- lock_file: @phantom_lock_file
90
+ load_images: phantom_load_images,
91
+ proxy: phantom_proxy,
92
+ ssl: phantom_ssl,
93
+ bin_path: phantom_bin_path,
94
+ lock_file: phantom_lock_file,
95
+ log_file: log_path ? File.join(log_path, 'phantom.log') : nil
49
96
  }
50
97
  end
51
98
 
52
- def initialize
53
- @default_dsl = :surfer
54
- @driver_factory = nil
55
-
56
- @driver_name = :chrome
57
- @driver_capabilities = Selenium::WebDriver::Remote::Capabilities.firefox
58
- @driver_host = 'localhost'
59
- @driver_port = '8080'
60
- @driver_remote_timeout = 120
61
- @driver_window_width = 1280
62
- @driver_window_height = 800
63
-
64
- @phantom_enabled = false
65
- @phantom_load_images = false
66
- @phantom_proxy = nil
67
- @phantom_ssl = 'any'
68
- @phantom_bin_path = 'phantomjs'
69
- @phantom_lock_file = nil
70
- end
71
-
72
- private
73
-
74
- def driver_remote_host
75
- if @driver_host.nil? then nil
76
- elsif @driver_port.nil? then "http://#{@driver_host}"
77
- else "http://#{@driver_host}:#{@driver_port}"
78
- end
79
- end
99
+ # Add enviroment support (like a Gemfile)
100
+ # group :test { set_driver :phantom }
101
+ # set_driver :phantom, group: :test
80
102
 
81
103
  end
82
104
 
@@ -7,7 +7,7 @@ module Crabfarm
7
7
  def_delegators :@pool, :driver
8
8
 
9
9
  def initialize(_module)
10
- @module = ModuleHelper.new _module
10
+ @module = _module
11
11
  @pool = DriverBucketPool.new @module
12
12
  @store = StateStore.new @module
13
13
  end
@@ -29,14 +29,14 @@ module Crabfarm
29
29
  private
30
30
 
31
31
  def init_phantom_if_required
32
- if @module.settings.phantom_enabled?
32
+ if @module.settings.phantom_mode_enabled?
33
33
  @phantom = PhantomRunner.new @module.settings.phantom_config
34
34
  @phantom.start
35
35
  end
36
36
  end
37
37
 
38
38
  def build_driver_factory
39
- if @module.settings.phantom_enabled?
39
+ if @module.settings.phantom_mode_enabled?
40
40
  PhantomDriverFactory.new @phantom, @module.settings.driver_config
41
41
  else
42
42
  return @module.settings.driver_factory if @module.settings.driver_factory
@@ -6,58 +6,75 @@ module Crabfarm
6
6
 
7
7
  attr_reader :module
8
8
 
9
- def initialize(_base_path, _module_name=nil, &_config_block)
9
+ def initialize(_base_path, _module=Object)
10
10
  @path = _base_path
11
- @name = _module_name
12
- @config_block = _config_block
13
- @module = if is_wrapped? then
14
- "::#{@name}".constantize rescue nil
15
- else nil end
11
+ @module = _module
12
+ @config = nil
13
+ @source_loaded = false
16
14
  end
17
15
 
18
- def is_wrapped?
19
- @name.present?
16
+ def load(_overrides=nil)
17
+ load_config _overrides
18
+ load_source
20
19
  end
21
20
 
22
- def is_loaded?
23
- not @module.nil?
21
+ def load_config(_overrides=nil)
22
+ raise ConfigurationError.new 'Source already loaded, call unload_source first' if @source_loaded
23
+ raise ConfigurationError.new 'Crabfile not found' unless File.exists? crafile_path
24
+
25
+ @config = read_crabfile crafile_path
26
+ @config.set _overrides unless _overrides.nil?
27
+ end
28
+
29
+ def load_source
30
+ raise ConfigurationError.new 'Crabfile must be loaded first' if @config.nil?
31
+ raise ConfigurationError.new 'Source already loaded, call reload_source instead' if @source_loaded
32
+
33
+ require_from_path source_path
34
+ @source_loaded = true
35
+ end
36
+
37
+ def unload_source
38
+ # TODO: unload every class in a list
39
+ @source_loaded = false
24
40
  end
25
41
 
26
- def load
27
- crabfile = load_crabfile(@path)
28
- @module = load_module(@name, File.join(@path, 'app'))
29
- @module.send(:remove_const, :CF_CONFIG) rescue nil
30
- @module.const_set :CF_CONFIG, crabfile
42
+ def reload_source
43
+ unload_source if @source_loaded
44
+ load_source
31
45
  end
32
46
 
33
- def load_context
34
- load unless is_loaded?
35
- Context.new @module
47
+ def is_loaded?
48
+ @source_loaded
36
49
  end
37
50
 
38
- def unload
39
- Object.send(:remove_const, @name) if is_wrapped?
40
- @module = nil
51
+ def load_context(_overrides={})
52
+ raise ConfigurationError.new 'Must load source first' unless @source_loaded
53
+ Context.new ModuleHelper.new @module, @config
41
54
  end
42
55
 
43
56
  private
44
57
 
45
- def load_crabfile(_path)
46
- crabfile = File.read(File.join(_path, 'Crabfile'))
58
+ def crafile_path
59
+ File.join @path, 'Crabfile'
60
+ end
61
+
62
+ def source_path
63
+ File.join @path, 'app'
64
+ end
65
+
66
+ def read_crabfile(_path)
47
67
  config = Configuration.new
48
- config.instance_eval crabfile
49
- @config_block.call(config) unless @config_block.nil?
68
+ config.instance_eval File.read crafile_path
50
69
  return config
51
70
  end
52
71
 
53
- def load_module(_name, _path)
54
- require_all_as(_name, _path)
55
- if is_wrapped? then "::#{_name}".constantize else Object end
56
- end
72
+ def require_from_path(_src_path)
73
+ @module.module_eval do
74
+ # TODO: put every loaded class in a list, store last update or hash so next time is required
75
+ # it can be reloaded automatically.
57
76
 
58
- def require_all_as(_name, _src_path)
59
- loader_code = "
60
- pending = Dir.glob('#{File.join(_src_path, '**/*')}').select { |f| f.end_with? '.rb' }.map { |f| f[0...-3] }
77
+ pending = Dir.glob(File.join(_src_path, '**/*')).select { |f| f.end_with? '.rb' }.map { |f| f[0...-3] }
61
78
 
62
79
  while pending.size > 0
63
80
  new_pending = []
@@ -72,10 +89,7 @@ module Crabfarm
72
89
  require new_pending.first if new_pending.size == pending.size
73
90
  pending = new_pending
74
91
  end
75
- "
76
-
77
- loader_code = "module ::#{_name}; #{loader_code}; end" if _name.present?
78
- Object.instance_eval loader_code
92
+ end
79
93
  end
80
94
 
81
95
  end
@@ -20,7 +20,7 @@ module Crabfarm
20
20
  unless @context.nil?
21
21
  puts "Reloading crawler source".color(:green)
22
22
  @context.release
23
- @loader.unload
23
+ @loader.reload_source
24
24
  end
25
25
 
26
26
  @context = @loader.load_context
@@ -56,29 +56,23 @@ module Crabfarm
56
56
  alias :r :reset
57
57
  end
58
58
 
59
- def self.console_loop
59
+ def self.start(_loader)
60
+ dsl = ConsoleDsl.new(_loader)
60
61
 
61
- if defined? CF_LOADER
62
- # TODO: generated app should load itself
63
- dsl = ConsoleDsl.new(CF_LOADER)
64
-
65
- loop do
66
- begin
67
- dsl.instance_eval Readline.readline("> ", true)
68
- rescue SyntaxError => se
69
- puts "Syntax error: #{se.message}".color(:red)
70
- rescue SystemExit, Interrupt
71
- break
72
- rescue => e
73
- puts "Unknown command".color(:red)
74
- end
62
+ loop do
63
+ begin
64
+ dsl.instance_eval Readline.readline("> ", true)
65
+ rescue SyntaxError => se
66
+ puts "Syntax error: #{se.message}".color(:red)
67
+ rescue SystemExit, Interrupt
68
+ break
69
+ rescue => e
70
+ puts "Unknown command".color(:red)
75
71
  end
76
-
77
- puts "Releasing crawling context".color(:green)
78
- dsl.context.release
79
- else
80
- puts "This command can only be run inside a crabfarm application".color(:red)
81
72
  end
73
+
74
+ puts "Releasing crawling context".color(:green)
75
+ dsl.context.release
82
76
  end
83
77
 
84
78
  end
@@ -8,7 +8,7 @@ module Crabfarm
8
8
 
9
9
  class API < Grape::API
10
10
 
11
- MAX_WAIT = 60.0 * 5
11
+ DEFAULT_WAIT = 60.0 * 5
12
12
 
13
13
  format :json
14
14
  prefix :api
@@ -27,9 +27,7 @@ module Crabfarm
27
27
  end
28
28
 
29
29
  def wait
30
- if params.has_key? :wait
31
- [params[:wait].to_f, MAX_WAIT].min
32
- else MAX_WAIT end
30
+ params.fetch(:wait, DEFAULT_WAIT)
33
31
  end
34
32
 
35
33
  def print_state(_state)
@@ -63,8 +61,8 @@ module Crabfarm
63
61
  @@evaluator
64
62
  end
65
63
 
66
- def self.start(_options)
67
- @@evaluator = Engines::SafeStateLoop.new CF_LOADER
64
+ def self.start(_loader, _options)
65
+ @@evaluator = Engines::SafeStateLoop.new _loader
68
66
  begin
69
67
  Support::CustomPuma.run API, _options
70
68
  ensure
@@ -5,12 +5,13 @@ module Crabfarm
5
5
 
6
6
  attr_reader :dsl
7
7
 
8
- def initialize(_module)
8
+ def initialize(_module, _config=nil)
9
9
  @module = _module
10
+ @config = _config || @module::CF_CONFIG
10
11
  end
11
12
 
12
13
  def settings
13
- @module::CF_CONFIG
14
+ @config
14
15
  end
15
16
 
16
17
  def load_state(_name)
@@ -33,24 +33,24 @@ module Crabfarm
33
33
  cmd << "--webdriver=#{@port}"
34
34
  cmd << "--ssl-protocol=#{@config[:ssl]}" unless @config[:ssl].nil?
35
35
  cmd << "--ignore-ssl-errors=true"
36
- cmd << "--webdriver-loglevel=NONE" # TODO: remove when log path is choosen
37
- # cmd << "--webdriver-logfile=/path/to/log/phantom.log"
36
+ cmd << "--webdriver-loglevel=WARN"
37
+ cmd << "--webdriver-logfile=#{@config[:log_file]}" unless @config[:log_file].nil?
38
38
  cmd.join(' ')
39
39
  end
40
40
 
41
41
  def find_available_port
42
42
  with_lock do
43
- server = TCPServer.new('127.0.0.1', 0)
44
- @port = server.addr[1]
45
- server.close
43
+ socket = Socket.new(:INET, :STREAM, 0)
44
+ socket.bind(Addrinfo.tcp("127.0.0.1", 0))
45
+ @port = socket.local_address.ip_port
46
+ socket.close
46
47
  end
47
48
  end
48
49
 
49
50
  def wait_for_server
50
51
  loop do
51
52
  begin
52
- # TODO: generate a valid request to prevent warnings
53
- Net::HTTP.get_response(URI.parse("http://127.0.0.1:#{@port}"))
53
+ Net::HTTP.get_response(URI.parse("http://127.0.0.1:#{@port}/status"))
54
54
  break
55
55
  rescue
56
56
  end
@@ -0,0 +1,34 @@
1
+ module Crabfarm
2
+ module Support
3
+ module GLI
4
+ def self.generate_options(_cmd)
5
+ Configuration::OPTIONS.each do |opt|
6
+ if opt.type != :mixed
7
+ _cmd.desc opt.text
8
+ _cmd.flag "cf-#{opt.name}"
9
+ end
10
+ end
11
+ end
12
+
13
+ def self.parse_options(_options)
14
+ config_overrides = {}
15
+ Configuration::OPTIONS.each do |opt|
16
+ value = _options["cf-#{opt.name}"]
17
+ next if value.nil?
18
+
19
+ value = if opt.type.is_a? Array
20
+ opt.type.find { |t| t.to_s == value }
21
+ elsif opt.type == :integer then value.to_i
22
+ elsif opt.type == :float then value.to_f
23
+ elsif opt.type == :boolean then [true, false].find { |t| t.to_s == value }
24
+ elsif opt.type == :string then value
25
+ else nil end
26
+ next if value.nil?
27
+
28
+ config_overrides[opt.name] = value
29
+ end
30
+ config_overrides
31
+ end
32
+ end
33
+ end
34
+ end
@@ -1,3 +1,2 @@
1
- self.default_dsl = :surfer
2
- self.driver_name = :firefox
3
- # self.phantom_enabled = true
1
+ set_default_dsl :surfer
2
+ set_driver :phantomjs
@@ -1,13 +1,11 @@
1
- # Load gems listed in the Gemfile.
2
-
3
1
  require 'bundler'
4
2
 
5
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __FILE__)
6
- require 'bundler/setup' if File.exist?(ENV['BUNDLE_GEMFILE'])
3
+ # Load gems listed in the Gemfile.
7
4
 
8
- Bundler.require
5
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __FILE__)
6
+ Bundler.require :default
9
7
 
10
8
  # Run code loader
11
9
 
12
10
  CF_PATH = File.expand_path('../', __FILE__)
13
- CF_LOADER = Crabfarm::Loader.new CF_PATH, ENV['CF_NAMESPACE']
11
+ CF_LOADER = Crabfarm::Loader.new CF_PATH
@@ -1 +1,2 @@
1
1
  Gemfile.lock
2
+ logs/*.*
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-30 00:00:00.000000000 Z
11
+ date: 2015-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jbuilder
@@ -317,6 +317,7 @@ files:
317
317
  - lib/crabfarm/rspec.rb
318
318
  - lib/crabfarm/state_store.rb
319
319
  - lib/crabfarm/support/custom_puma.rb
320
+ - lib/crabfarm/support/gli.rb
320
321
  - lib/crabfarm/templates/boot.rb.erb
321
322
  - lib/crabfarm/templates/crabfarm_bin.erb
322
323
  - lib/crabfarm/templates/Crabfile.erb