crabfarm 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7eb935482cb5663082aae4a3d9e24a722c1aea5d
4
- data.tar.gz: 05b8038d530eb0d5f4be9325621405c90056b7be
3
+ metadata.gz: cc45d8d0e358193e3729c8ca8d38715680470c78
4
+ data.tar.gz: 30c698146ec3b2d4032cddf4d4b3011af4edaa75
5
5
  SHA512:
6
- metadata.gz: ef524a63574fa86249a0f31f08af390c07be7cb738f6a1ac8063bc59ba1a426074bc5681867c234e341905b150c73f3e2f2bd056f1e71dd39b15b9eff8ea5e3c
7
- data.tar.gz: 23496fa635c7baca3606c04693939a6b600a8898b6acbc2c55231985f0b5d8b9420316e52a54c743e26b31ced3e805cc7e938b1e14b21d234b171f4723d653f1
6
+ metadata.gz: 85c7b65ee73ea096635260495077002c74d4891ecab632814de81a474eed2cfc9b9eebbfe4f60a200d927a60e7cbbf00e9910b6988fc9cc73abbee8368bb7163
7
+ data.tar.gz: 9607b619d00a96ae83a20238492d47897976498bf48e63400e5845765e5585c2bf872d31b2d9803479ee4900ca23433ee6b58c391c05d8441298566716d2e1f4
data/lib/crabfarm/cli.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'gli'
2
+ require 'crabfarm/support/gli'
2
3
 
3
4
  module Crabfarm
4
5
  class CLI
@@ -6,16 +7,17 @@ module Crabfarm
6
7
 
7
8
  program_desc 'Crabfarm toolbelt'
8
9
 
9
- pre do |global_options,command,options,args|
10
- # Things to do before
11
- true
12
- end
13
-
14
10
  desc "Starts the crawler in console mode"
15
11
  command [:console, :c] do |c|
12
+
13
+ Support::GLI.generate_options c
14
+
16
15
  c.action do |global_options,options,args|
16
+ next puts "This command can only be run inside a crabfarm application" unless defined? CF_LOADER
17
+
17
18
  require "crabfarm/modes/console"
18
- Crabfarm::Modes::Console.console_loop
19
+ CF_LOADER.load Support::GLI.parse_options options
20
+ Crabfarm::Modes::Console.start CF_LOADER
19
21
  end
20
22
  end
21
23
 
@@ -30,13 +32,19 @@ module Crabfarm
30
32
  c.desc "Set the server min and max threads, defaults to 0:16"
31
33
  c.flag [:t,:threads]
32
34
 
35
+ Support::GLI.generate_options c
36
+
33
37
  c.action do |global_options,options,args|
38
+ next puts "This command can only be run inside a crabfarm application" unless defined? CF_LOADER
39
+
34
40
  require "crabfarm/modes/server"
35
41
  server_options = {}
36
42
  server_options[:Host] = options[:host] unless options[:host].nil?
37
43
  server_options[:Port] = options[:port] || 3100
38
44
  server_options[:Threads] = options[:threads] unless options[:threads].nil?
39
- Crabfarm::Modes::Server.start server_options
45
+
46
+ CF_LOADER.load Support::GLI.parse_options options
47
+ Crabfarm::Modes::Server.start CF_LOADER, server_options
40
48
  end
41
49
  end
42
50
 
@@ -2,81 +2,103 @@ module Crabfarm
2
2
 
3
3
  class Configuration
4
4
 
5
- # TODO: improve DSL, it sucks
6
-
7
- attr_accessor :default_dsl
8
- attr_accessor :driver_factory
9
-
10
- # Default driver configuration parameters
11
- attr_accessor :driver_name
12
- attr_accessor :driver_host
13
- attr_accessor :driver_port
14
- attr_accessor :driver_capabilities
15
- attr_accessor :driver_remote_timeout
16
- attr_accessor :driver_window_width
17
- attr_accessor :driver_window_height
18
-
19
- # Phantom launcher configuration
20
- attr_accessor :phantom_enabled
21
- attr_accessor :phantom_load_images
22
- attr_accessor :phantom_proxy
23
- attr_accessor :phantom_ssl
24
- attr_accessor :phantom_bin_path
25
- attr_accessor :phantom_lock_file
5
+ class Option < Struct.new(:name, :type, :text); end
6
+
7
+ OPTIONS = [
8
+ [:default_dsl, :string, 'Default dsl used by parsers and states'],
9
+ [:driver_factory, :mixed, 'Driver factory, disabled if phantom_mode is used'],
10
+ [:log_path, :string, 'Path where logs should be stored'],
11
+
12
+ # Default driver configuration parameters
13
+ [:driver, ['chrome', 'firefox', 'phantomjs', 'remote'], 'Webdriver to be user, common options: chrome, firefox, phantomjs, remote.'],
14
+ [:driver_host, :string, 'Remote host, only available in driver: remote'],
15
+ [:driver_port, :integer, 'Remote port, only available in driver: remote'],
16
+ [:driver_capabilities, :mixed, 'Driver capabilities, depends on selected driver.'],
17
+ [:driver_remote_timeout, :float, 'Request timeout in seconds, only available for remote or phatomjs driver.'],
18
+ [:driver_window_width, :integer, 'Initial browser window width.'],
19
+ [:driver_window_height, :integer, 'Initial browser window height.'],
20
+
21
+ # Phantom launcher configuration
22
+ [:phantom_load_images, :boolean, 'Phantomjs image loading, only for phantomjs driver.'],
23
+ [:phantom_proxy, :string, 'Phantonjs proxy address, only for phantomjs driver.'],
24
+ [:phantom_ssl, ['sslv3', 'sslv2', 'tlsv1', 'any'], 'Phantomjs ssl mode: sslv3, sslv2, tlsv1 or any, only for phantomjs driver.'],
25
+ [:phantom_bin_path, :string, 'Phantomjs binary path, only for phantomjs driver.'],
26
+ [:phantom_lock_file, :string, 'Phantomjs lock file path, only for phantomjs driver.']
27
+ ]
28
+ .map { |o| Option.new *o }
29
+
30
+ OPTIONS.each do |var|
31
+ define_method "set_#{var.name}" do |val|
32
+ @values[var.name] = val
33
+ end
34
+
35
+ define_method "#{var.name}" do
36
+ @values[var.name]
37
+ end
38
+ end
39
+
40
+ def initialize
41
+ @values = {
42
+ default_dsl: :surfer,
43
+ driver_factory: nil,
44
+ log_path: 'logs',
45
+
46
+ driver: 'phantomjs',
47
+ driver_capabilities: Selenium::WebDriver::Remote::Capabilities.firefox,
48
+ driver_host: 'localhost',
49
+ driver_port: '8080',
50
+ driver_remote_timeout: 120,
51
+ driver_window_width: 1280,
52
+ driver_window_height: 800,
53
+
54
+ phantom_load_images: false,
55
+ phantom_proxy: nil,
56
+ phantom_ssl: 'any',
57
+ phantom_bin_path: 'phantomjs',
58
+ phantom_lock_file: nil
59
+ }
60
+ end
61
+
62
+ def set(_options)
63
+ @values.merge! _options
64
+ end
65
+
66
+ def driver_remote_host
67
+ if driver_host then nil
68
+ elsif driver_port then "http://#{driver_host}"
69
+ else "http://#{driver_host}:#{driver_port}"
70
+ end
71
+ end
26
72
 
27
73
  def driver_config
28
74
  {
29
- name: @driver_name,
30
- capabilities: @driver_capabilities,
75
+ name: driver,
76
+ capabilities: driver_capabilities,
31
77
  remote_host: driver_remote_host,
32
- remote_timeout: @driver_remote_timeout,
33
- window_width: @driver_window_width,
34
- window_height: @driver_window_height
78
+ remote_timeout: driver_remote_timeout,
79
+ window_width: driver_window_width,
80
+ window_height: driver_window_height
35
81
  }
36
82
  end
37
83
 
38
- def phantom_enabled?
39
- @phantom_enabled
84
+ def phantom_mode_enabled?
85
+ driver.to_s == 'phantomjs'
40
86
  end
41
87
 
42
88
  def phantom_config
43
89
  {
44
- load_images: @phantom_load_images,
45
- proxy: @phantom_proxy,
46
- ssl: @phantom_ssl,
47
- bin_path: @phantom_bin_path,
48
- lock_file: @phantom_lock_file
90
+ load_images: phantom_load_images,
91
+ proxy: phantom_proxy,
92
+ ssl: phantom_ssl,
93
+ bin_path: phantom_bin_path,
94
+ lock_file: phantom_lock_file,
95
+ log_file: log_path ? File.join(log_path, 'phantom.log') : nil
49
96
  }
50
97
  end
51
98
 
52
- def initialize
53
- @default_dsl = :surfer
54
- @driver_factory = nil
55
-
56
- @driver_name = :chrome
57
- @driver_capabilities = Selenium::WebDriver::Remote::Capabilities.firefox
58
- @driver_host = 'localhost'
59
- @driver_port = '8080'
60
- @driver_remote_timeout = 120
61
- @driver_window_width = 1280
62
- @driver_window_height = 800
63
-
64
- @phantom_enabled = false
65
- @phantom_load_images = false
66
- @phantom_proxy = nil
67
- @phantom_ssl = 'any'
68
- @phantom_bin_path = 'phantomjs'
69
- @phantom_lock_file = nil
70
- end
71
-
72
- private
73
-
74
- def driver_remote_host
75
- if @driver_host.nil? then nil
76
- elsif @driver_port.nil? then "http://#{@driver_host}"
77
- else "http://#{@driver_host}:#{@driver_port}"
78
- end
79
- end
99
+ # Add enviroment support (like a Gemfile)
100
+ # group :test { set_driver :phantom }
101
+ # set_driver :phantom, group: :test
80
102
 
81
103
  end
82
104
 
@@ -7,7 +7,7 @@ module Crabfarm
7
7
  def_delegators :@pool, :driver
8
8
 
9
9
  def initialize(_module)
10
- @module = ModuleHelper.new _module
10
+ @module = _module
11
11
  @pool = DriverBucketPool.new @module
12
12
  @store = StateStore.new @module
13
13
  end
@@ -29,14 +29,14 @@ module Crabfarm
29
29
  private
30
30
 
31
31
  def init_phantom_if_required
32
- if @module.settings.phantom_enabled?
32
+ if @module.settings.phantom_mode_enabled?
33
33
  @phantom = PhantomRunner.new @module.settings.phantom_config
34
34
  @phantom.start
35
35
  end
36
36
  end
37
37
 
38
38
  def build_driver_factory
39
- if @module.settings.phantom_enabled?
39
+ if @module.settings.phantom_mode_enabled?
40
40
  PhantomDriverFactory.new @phantom, @module.settings.driver_config
41
41
  else
42
42
  return @module.settings.driver_factory if @module.settings.driver_factory
@@ -6,58 +6,75 @@ module Crabfarm
6
6
 
7
7
  attr_reader :module
8
8
 
9
- def initialize(_base_path, _module_name=nil, &_config_block)
9
+ def initialize(_base_path, _module=Object)
10
10
  @path = _base_path
11
- @name = _module_name
12
- @config_block = _config_block
13
- @module = if is_wrapped? then
14
- "::#{@name}".constantize rescue nil
15
- else nil end
11
+ @module = _module
12
+ @config = nil
13
+ @source_loaded = false
16
14
  end
17
15
 
18
- def is_wrapped?
19
- @name.present?
16
+ def load(_overrides=nil)
17
+ load_config _overrides
18
+ load_source
20
19
  end
21
20
 
22
- def is_loaded?
23
- not @module.nil?
21
+ def load_config(_overrides=nil)
22
+ raise ConfigurationError.new 'Source already loaded, call unload_source first' if @source_loaded
23
+ raise ConfigurationError.new 'Crabfile not found' unless File.exists? crafile_path
24
+
25
+ @config = read_crabfile crafile_path
26
+ @config.set _overrides unless _overrides.nil?
27
+ end
28
+
29
+ def load_source
30
+ raise ConfigurationError.new 'Crabfile must be loaded first' if @config.nil?
31
+ raise ConfigurationError.new 'Source already loaded, call reload_source instead' if @source_loaded
32
+
33
+ require_from_path source_path
34
+ @source_loaded = true
35
+ end
36
+
37
+ def unload_source
38
+ # TODO: unload every class in a list
39
+ @source_loaded = false
24
40
  end
25
41
 
26
- def load
27
- crabfile = load_crabfile(@path)
28
- @module = load_module(@name, File.join(@path, 'app'))
29
- @module.send(:remove_const, :CF_CONFIG) rescue nil
30
- @module.const_set :CF_CONFIG, crabfile
42
+ def reload_source
43
+ unload_source if @source_loaded
44
+ load_source
31
45
  end
32
46
 
33
- def load_context
34
- load unless is_loaded?
35
- Context.new @module
47
+ def is_loaded?
48
+ @source_loaded
36
49
  end
37
50
 
38
- def unload
39
- Object.send(:remove_const, @name) if is_wrapped?
40
- @module = nil
51
+ def load_context(_overrides={})
52
+ raise ConfigurationError.new 'Must load source first' unless @source_loaded
53
+ Context.new ModuleHelper.new @module, @config
41
54
  end
42
55
 
43
56
  private
44
57
 
45
- def load_crabfile(_path)
46
- crabfile = File.read(File.join(_path, 'Crabfile'))
58
+ def crafile_path
59
+ File.join @path, 'Crabfile'
60
+ end
61
+
62
+ def source_path
63
+ File.join @path, 'app'
64
+ end
65
+
66
+ def read_crabfile(_path)
47
67
  config = Configuration.new
48
- config.instance_eval crabfile
49
- @config_block.call(config) unless @config_block.nil?
68
+ config.instance_eval File.read crafile_path
50
69
  return config
51
70
  end
52
71
 
53
- def load_module(_name, _path)
54
- require_all_as(_name, _path)
55
- if is_wrapped? then "::#{_name}".constantize else Object end
56
- end
72
+ def require_from_path(_src_path)
73
+ @module.module_eval do
74
+ # TODO: put every loaded class in a list, store last update or hash so next time is required
75
+ # it can be reloaded automatically.
57
76
 
58
- def require_all_as(_name, _src_path)
59
- loader_code = "
60
- pending = Dir.glob('#{File.join(_src_path, '**/*')}').select { |f| f.end_with? '.rb' }.map { |f| f[0...-3] }
77
+ pending = Dir.glob(File.join(_src_path, '**/*')).select { |f| f.end_with? '.rb' }.map { |f| f[0...-3] }
61
78
 
62
79
  while pending.size > 0
63
80
  new_pending = []
@@ -72,10 +89,7 @@ module Crabfarm
72
89
  require new_pending.first if new_pending.size == pending.size
73
90
  pending = new_pending
74
91
  end
75
- "
76
-
77
- loader_code = "module ::#{_name}; #{loader_code}; end" if _name.present?
78
- Object.instance_eval loader_code
92
+ end
79
93
  end
80
94
 
81
95
  end
@@ -20,7 +20,7 @@ module Crabfarm
20
20
  unless @context.nil?
21
21
  puts "Reloading crawler source".color(:green)
22
22
  @context.release
23
- @loader.unload
23
+ @loader.reload_source
24
24
  end
25
25
 
26
26
  @context = @loader.load_context
@@ -56,29 +56,23 @@ module Crabfarm
56
56
  alias :r :reset
57
57
  end
58
58
 
59
- def self.console_loop
59
+ def self.start(_loader)
60
+ dsl = ConsoleDsl.new(_loader)
60
61
 
61
- if defined? CF_LOADER
62
- # TODO: generated app should load itself
63
- dsl = ConsoleDsl.new(CF_LOADER)
64
-
65
- loop do
66
- begin
67
- dsl.instance_eval Readline.readline("> ", true)
68
- rescue SyntaxError => se
69
- puts "Syntax error: #{se.message}".color(:red)
70
- rescue SystemExit, Interrupt
71
- break
72
- rescue => e
73
- puts "Unknown command".color(:red)
74
- end
62
+ loop do
63
+ begin
64
+ dsl.instance_eval Readline.readline("> ", true)
65
+ rescue SyntaxError => se
66
+ puts "Syntax error: #{se.message}".color(:red)
67
+ rescue SystemExit, Interrupt
68
+ break
69
+ rescue => e
70
+ puts "Unknown command".color(:red)
75
71
  end
76
-
77
- puts "Releasing crawling context".color(:green)
78
- dsl.context.release
79
- else
80
- puts "This command can only be run inside a crabfarm application".color(:red)
81
72
  end
73
+
74
+ puts "Releasing crawling context".color(:green)
75
+ dsl.context.release
82
76
  end
83
77
 
84
78
  end
@@ -8,7 +8,7 @@ module Crabfarm
8
8
 
9
9
  class API < Grape::API
10
10
 
11
- MAX_WAIT = 60.0 * 5
11
+ DEFAULT_WAIT = 60.0 * 5
12
12
 
13
13
  format :json
14
14
  prefix :api
@@ -27,9 +27,7 @@ module Crabfarm
27
27
  end
28
28
 
29
29
  def wait
30
- if params.has_key? :wait
31
- [params[:wait].to_f, MAX_WAIT].min
32
- else MAX_WAIT end
30
+ params.fetch(:wait, DEFAULT_WAIT)
33
31
  end
34
32
 
35
33
  def print_state(_state)
@@ -63,8 +61,8 @@ module Crabfarm
63
61
  @@evaluator
64
62
  end
65
63
 
66
- def self.start(_options)
67
- @@evaluator = Engines::SafeStateLoop.new CF_LOADER
64
+ def self.start(_loader, _options)
65
+ @@evaluator = Engines::SafeStateLoop.new _loader
68
66
  begin
69
67
  Support::CustomPuma.run API, _options
70
68
  ensure
@@ -5,12 +5,13 @@ module Crabfarm
5
5
 
6
6
  attr_reader :dsl
7
7
 
8
- def initialize(_module)
8
+ def initialize(_module, _config=nil)
9
9
  @module = _module
10
+ @config = _config || @module::CF_CONFIG
10
11
  end
11
12
 
12
13
  def settings
13
- @module::CF_CONFIG
14
+ @config
14
15
  end
15
16
 
16
17
  def load_state(_name)
@@ -33,24 +33,24 @@ module Crabfarm
33
33
  cmd << "--webdriver=#{@port}"
34
34
  cmd << "--ssl-protocol=#{@config[:ssl]}" unless @config[:ssl].nil?
35
35
  cmd << "--ignore-ssl-errors=true"
36
- cmd << "--webdriver-loglevel=NONE" # TODO: remove when log path is choosen
37
- # cmd << "--webdriver-logfile=/path/to/log/phantom.log"
36
+ cmd << "--webdriver-loglevel=WARN"
37
+ cmd << "--webdriver-logfile=#{@config[:log_file]}" unless @config[:log_file].nil?
38
38
  cmd.join(' ')
39
39
  end
40
40
 
41
41
  def find_available_port
42
42
  with_lock do
43
- server = TCPServer.new('127.0.0.1', 0)
44
- @port = server.addr[1]
45
- server.close
43
+ socket = Socket.new(:INET, :STREAM, 0)
44
+ socket.bind(Addrinfo.tcp("127.0.0.1", 0))
45
+ @port = socket.local_address.ip_port
46
+ socket.close
46
47
  end
47
48
  end
48
49
 
49
50
  def wait_for_server
50
51
  loop do
51
52
  begin
52
- # TODO: generate a valid request to prevent warnings
53
- Net::HTTP.get_response(URI.parse("http://127.0.0.1:#{@port}"))
53
+ Net::HTTP.get_response(URI.parse("http://127.0.0.1:#{@port}/status"))
54
54
  break
55
55
  rescue
56
56
  end
@@ -0,0 +1,34 @@
1
+ module Crabfarm
2
+ module Support
3
+ module GLI
4
+ def self.generate_options(_cmd)
5
+ Configuration::OPTIONS.each do |opt|
6
+ if opt.type != :mixed
7
+ _cmd.desc opt.text
8
+ _cmd.flag "cf-#{opt.name}"
9
+ end
10
+ end
11
+ end
12
+
13
+ def self.parse_options(_options)
14
+ config_overrides = {}
15
+ Configuration::OPTIONS.each do |opt|
16
+ value = _options["cf-#{opt.name}"]
17
+ next if value.nil?
18
+
19
+ value = if opt.type.is_a? Array
20
+ opt.type.find { |t| t.to_s == value }
21
+ elsif opt.type == :integer then value.to_i
22
+ elsif opt.type == :float then value.to_f
23
+ elsif opt.type == :boolean then [true, false].find { |t| t.to_s == value }
24
+ elsif opt.type == :string then value
25
+ else nil end
26
+ next if value.nil?
27
+
28
+ config_overrides[opt.name] = value
29
+ end
30
+ config_overrides
31
+ end
32
+ end
33
+ end
34
+ end
@@ -1,3 +1,2 @@
1
- self.default_dsl = :surfer
2
- self.driver_name = :firefox
3
- # self.phantom_enabled = true
1
+ set_default_dsl :surfer
2
+ set_driver :phantomjs
@@ -1,13 +1,11 @@
1
- # Load gems listed in the Gemfile.
2
-
3
1
  require 'bundler'
4
2
 
5
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __FILE__)
6
- require 'bundler/setup' if File.exist?(ENV['BUNDLE_GEMFILE'])
3
+ # Load gems listed in the Gemfile.
7
4
 
8
- Bundler.require
5
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __FILE__)
6
+ Bundler.require :default
9
7
 
10
8
  # Run code loader
11
9
 
12
10
  CF_PATH = File.expand_path('../', __FILE__)
13
- CF_LOADER = Crabfarm::Loader.new CF_PATH, ENV['CF_NAMESPACE']
11
+ CF_LOADER = Crabfarm::Loader.new CF_PATH
@@ -1 +1,2 @@
1
1
  Gemfile.lock
2
+ logs/*.*
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-30 00:00:00.000000000 Z
11
+ date: 2015-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jbuilder
@@ -317,6 +317,7 @@ files:
317
317
  - lib/crabfarm/rspec.rb
318
318
  - lib/crabfarm/state_store.rb
319
319
  - lib/crabfarm/support/custom_puma.rb
320
+ - lib/crabfarm/support/gli.rb
320
321
  - lib/crabfarm/templates/boot.rb.erb
321
322
  - lib/crabfarm/templates/crabfarm_bin.erb
322
323
  - lib/crabfarm/templates/Crabfile.erb