crabfarm 0.0.16 → 0.0.17

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 01f53897c32d27e6253c74660a0842b9cb7c3c97
4
- data.tar.gz: 7993ac4152d6128ce8b69950458e48dfe042089a
3
+ metadata.gz: 62fc780d5c9db277ef5147d564c74cd684dbd39b
4
+ data.tar.gz: 9028aeb0ce71914ab549644948c0ceb83e1efdda
5
5
  SHA512:
6
- metadata.gz: af73913311cb5404d95724470ce07bb37e3ed686abb02a39e20b6a85b0ad028a31ef4524c8ffd09381ce20157e107da7cbe20ddea22c08f3908997d81c1e37b1
7
- data.tar.gz: 3345f29499e15089b3fd6feccf984f9121187500e9859a2016ccfaf14947d363ca335b0212025722b9eac273996171a6f133e2c37f9d0fa5f2bd649952333742
6
+ metadata.gz: f003f40fbb5727dee2f831fede26ff6bfd559028c5d339f56aa63aabe3cc0fe7f03519d73df03ebee5121a8478dc0a082f8124272b194dc2a5023402c5147cbd
7
+ data.tar.gz: a19ccb92b10a0b1d316992cbde562987ac1a3928b4ae32811e56e570f93073f839b4422ae2a0e06bdcbebff3557bf5e0f1ea197bdbff442052c7b878273ca297
@@ -1,7 +1,7 @@
1
1
  module Crabfarm
2
2
  class CapybaraBrowserDsl
3
3
  def self.wrap(_bucket)
4
- raise "Capybara adapter is incompleted"
4
+ raise NotImplementedError.new "Capybara adapter is incompleted"
5
5
  end
6
6
  end
7
7
  end
@@ -1,21 +1,18 @@
1
- require 'watir-webdriver'
2
-
3
1
  class Watir::Browser
4
- def parse(_parser_class, _options={})
5
- Crabfarm::ParserService.parse _parser_class, html, _options
2
+ def to_html
3
+ html
6
4
  end
7
5
  end
8
6
 
9
7
  class Watir::Element
10
- def parse(_parser_class, _options={})
11
- Crabfarm::ParserService.parse _parser_class, html, _options
8
+ def to_html
9
+ html
12
10
  end
13
11
  end
14
12
 
15
13
  class Watir::ElementCollection
16
- def parse(_parser_class, _options={})
17
- full_html = self.map(&:html).join
18
- Crabfarm::ParserService.parse _parser_class, full_html, _options
14
+ def to_html
15
+ self.map(&:html).join
19
16
  end
20
17
  end
21
18
 
@@ -1,5 +1,3 @@
1
- require 'jbuilder'
2
-
3
1
  module Crabfarm
4
2
  class JbuilderOutputBuilder
5
3
  def self.prepare
@@ -1,9 +1,13 @@
1
1
  require 'nokogiri'
2
2
 
3
3
  module Crabfarm
4
- class NokogiriDsl
5
- def self.parse(_html)
6
- Nokogiri::HTML _html
4
+ class NokogiriAdapter
5
+ def self.parse(_element)
6
+ if _element.respond_to? :to_html
7
+ Nokogiri::HTML _element.to_html
8
+ else
9
+ Nokogiri::HTML _element
10
+ end
7
11
  end
8
12
  end
9
13
  end
@@ -0,0 +1,9 @@
1
+ require "pdf-reader"
2
+
3
+ module Crabfarm
4
+ class PdfReaderAdapter
5
+ def self.parse(_pdf_data)
6
+ PDF::Reader.new StringIO.new _pdf_data
7
+ end
8
+ end
9
+ end
@@ -1,18 +1,18 @@
1
1
  module Crabfarm
2
2
  class BaseParser < Delegator
3
3
 
4
- attr_reader :params, :root
4
+ attr_reader :params, :document
5
5
 
6
- def self.parser_dsl(_dsl)
7
- @parser_dsl = _dsl
6
+ def self.engine(_engine)
7
+ @engine = _engine
8
8
  end
9
9
 
10
- def initialize(_html, _params)
11
- dsl_class = Strategies.load(:parser_dsl, class_parser_dsl || Crabfarm.config.parser_dsl)
12
- @root = dsl_class.parse _html
10
+ def initialize(_target, _params)
11
+ engine_class = Strategies.load(:parser_engine, class_engine || Crabfarm.config.parser_engine)
12
+ @document = engine_class.parse _target
13
13
  @params = _params
14
14
 
15
- super @root
15
+ super @document
16
16
  end
17
17
 
18
18
  def parse
@@ -20,17 +20,17 @@ module Crabfarm
20
20
  end
21
21
 
22
22
  def __getobj__
23
- @root
23
+ @document
24
24
  end
25
25
 
26
26
  def __setobj__(obj)
27
- @root = obj
27
+ @document = obj
28
28
  end
29
29
 
30
30
  private
31
31
 
32
- def class_parser_dsl
33
- self.class.instance_variable_get :@parser_dsl
32
+ def class_engine
33
+ self.class.instance_variable_get :@engine
34
34
  end
35
35
  end
36
36
  end
@@ -5,10 +5,13 @@ module Crabfarm
5
5
  class BaseState
6
6
  extend Forwardable
7
7
 
8
+ PARSE_METHOD_RX = /^parse_(.*)$/
9
+
8
10
  attr_reader :params, :output
9
11
 
10
- def_delegators :@pool, :driver
11
- def_delegators :@store, :get, :fetch
12
+ def_delegators '@context', :http
13
+ def_delegators '@context.pool', :driver
14
+ def_delegators '@context.store', :get, :fetch
12
15
 
13
16
  def self.browser_dsl(_dsl)
14
17
  @class_browser_dsl = _dsl
@@ -18,11 +21,9 @@ module Crabfarm
18
21
  @class_output_builder = _builder
19
22
  end
20
23
 
21
- def initialize(_pool, _store, _params)
22
- @pool = _pool
23
- @store = _store
24
+ def initialize(_context, _params)
25
+ @context = _context
24
26
  @params = _params
25
- @events = []
26
27
 
27
28
  @dsl = Strategies.load(:browser_dsl, class_browser_dsl || Crabfarm.config.browser_dsl)
28
29
  @builder = Strategies.load(:output_builder, class_output_builder || Crabfarm.config.output_builder)
@@ -32,6 +33,10 @@ module Crabfarm
32
33
  @dsl.wrap driver(_name)
33
34
  end
34
35
 
36
+ def download(_url)
37
+ @context.http.get(_url).body
38
+ end
39
+
35
40
  def output
36
41
  @output ||= @builder.prepare
37
42
  end
@@ -44,16 +49,16 @@ module Crabfarm
44
49
  raise NotImplementedError.new
45
50
  end
46
51
 
47
- def event(_type, _msg)
48
- @events << { created_at: Time.current, type: _type, msg: _msg }
49
- end
52
+ def parse(_target=nil, _options={})
53
+ parser_class = _options.delete :using
50
54
 
51
- def alert(_msg)
52
- event(:alert, _msg)
53
- end
55
+ if parser_class.nil?
56
+ parser_class = (self.class.name + 'Parser').constantize
57
+ end
54
58
 
55
- def info(_msg)
56
- event(:info, _msg)
59
+ parser = parser_class.new _target, @params.merge(_options)
60
+ parser.parse
61
+ return parser
57
62
  end
58
63
 
59
64
  def fork_each(_enumerator, &_block)
@@ -66,6 +71,20 @@ module Crabfarm
66
71
  ThreadsWait.all_waits(*ths)
67
72
  end
68
73
 
74
+ def method_missing(_method, *_args, &_block)
75
+ m = PARSE_METHOD_RX.match(_method)
76
+ if m
77
+ options = _args[1] || {}
78
+ options[:using] = (m[1].camelize + 'Parser').constantize
79
+ parse _args[0], options
80
+ else super end
81
+ end
82
+
83
+ def respond_to?(_method, _include_all=false)
84
+ return true if PARSE_METHOD_RX === _method
85
+ super
86
+ end
87
+
69
88
  private
70
89
 
71
90
  def class_browser_dsl
@@ -6,7 +6,7 @@ module Crabfarm
6
6
 
7
7
  OPTIONS = [
8
8
  [:browser_dsl, :string, 'Default browser dsl used by states'],
9
- [:parser_dsl, :string, 'Default parser dsl used by parsers'],
9
+ [:parser_engine, :string, 'Default parser engine used by parsers'],
10
10
  [:output_builder, :string, 'Default json output builder used by states'],
11
11
  [:driver_factory, :mixed, 'Driver factory, disabled if phantom_mode is used'],
12
12
  [:log_path, :string, 'Path where logs should be stored'],
@@ -51,7 +51,7 @@ module Crabfarm
51
51
  def reset
52
52
  @values = {
53
53
  browser_dsl: :surfer,
54
- parser_dsl: :nokogiri,
54
+ parser_engine: :nokogiri,
55
55
  output_builder: :hash,
56
56
  driver_factory: nil,
57
57
  log_path: nil,
@@ -4,7 +4,7 @@ module Crabfarm
4
4
  class Context
5
5
  extend Forwardable
6
6
 
7
- def_delegators :@pool, :driver
7
+ attr_accessor :pool, :store, :http
8
8
 
9
9
  def initialize
10
10
  @store = StateStore.new
@@ -14,16 +14,10 @@ module Crabfarm
14
14
  def load
15
15
  init_phantom_if_required
16
16
  init_driver_pool
17
+ init_http_client
17
18
  @loaded = true
18
19
  end
19
20
 
20
- def run_state(_name, _params={})
21
- load
22
- state = LoaderService.load_state(_name).new @pool, @store, _params
23
- state.crawl
24
- state
25
- end
26
-
27
21
  def reset
28
22
  @store.reset
29
23
  @pool.reset unless @pool.nil?
@@ -63,6 +57,14 @@ module Crabfarm
63
57
  @phantom = nil
64
58
  end
65
59
 
60
+ def init_http_client
61
+ @http = build_http_client if @http.nil?
62
+ end
63
+
64
+ def release_http_client
65
+ @http = nil
66
+ end
67
+
66
68
  def build_driver_factory
67
69
  if @phantom
68
70
  PhantomDriverFactory.new @phantom, driver_config
@@ -72,6 +74,10 @@ module Crabfarm
72
74
  end
73
75
  end
74
76
 
77
+ def build_http_client
78
+ HttpClient.new config.proxy
79
+ end
80
+
75
81
  def config
76
82
  Crabfarm.config
77
83
  end
@@ -28,6 +28,10 @@ module Crabfarm
28
28
 
29
29
  private
30
30
 
31
+ def build_http_client
32
+ HttpClient.new proxy_address
33
+ end
34
+
31
35
  def restart_with_options(_options)
32
36
  stop_daemon
33
37
  @runner = CrabtrapRunner.new Crabfarm.config.crabtrap_config.merge(_options)
@@ -1,5 +1,3 @@
1
- require 'net/http'
2
-
3
1
  module Crabfarm
4
2
  class CrabtrapRunner
5
3
 
@@ -91,10 +91,6 @@ module Crabfarm
91
91
  end
92
92
  end
93
93
 
94
- def parse(_parser_class, _params={})
95
- ParserService.parse _parser_class, to_html, _params
96
- end
97
-
98
94
  def to_html
99
95
  elements.map { |e| e['outerHTML'] }.join
100
96
  end
@@ -109,7 +109,7 @@ module Crabfarm
109
109
  begin
110
110
  ActiveSupport::Dependencies.clear
111
111
  logger.info "StateLoop: loading state: #{@next_state_name}"
112
- @doc = context.run_state(@next_state_name, @next_state_params).output_as_json
112
+ @doc = TransitionService.apply_state(context, @next_state_name, @next_state_params).output_as_json
113
113
  logger.info "StateLoop: state loaded successfully: #{@next_state_name}"
114
114
  @error = nil
115
115
  rescue Exception => e
@@ -1,13 +1,12 @@
1
1
  module Crabfarm
2
- class ForkedState
3
- extend Forwardable
4
-
5
- def_delegators :@state, :params, :get, :fetch, :event, :alert, :info
2
+ class ForkedState < Delegator
6
3
 
7
4
  def initialize(_state, _name, _mutex)
8
5
  @state = _state
9
6
  @name = _name
10
7
  @mutex = _mutex
8
+
9
+ super @state
11
10
  end
12
11
 
13
12
  def driver
@@ -18,10 +17,22 @@ module Crabfarm
18
17
  @browser ||= @state.browser(@name)
19
18
  end
20
19
 
20
+ def output
21
+ raise ScriptError.new 'Use lock_output to access output in forked states'
22
+ end
23
+
21
24
  def lock_output
22
25
  @mutex.synchronize {
23
26
  yield @state.output
24
27
  }
25
28
  end
29
+
30
+ def __getobj__
31
+ @state
32
+ end
33
+
34
+ def __setobj__(obj)
35
+ @state = obj
36
+ end
26
37
  end
27
38
  end
@@ -0,0 +1,97 @@
1
+ require "uri"
2
+
3
+ module Crabfarm
4
+ class HttpClient
5
+
6
+ class HttpRequestError < StandardError
7
+ extend Forwardable
8
+
9
+ def_delegators :@response, :code, :body
10
+
11
+ attr_reader :response
12
+
13
+ def initialize(_response)
14
+ @response = _response
15
+ super _response.message
16
+ end
17
+ end
18
+
19
+ class MaximumRedirectsError < StandardError
20
+ def initialize
21
+ super 'Redirection loop detected!'
22
+ end
23
+ end
24
+
25
+ attr_reader :proxy_addr, :proxy_port
26
+
27
+ def initialize(_proxy=nil)
28
+ if _proxy.nil?
29
+ @proxy_addr = nil
30
+ @proxy_port = nil
31
+ else
32
+ @proxy_addr, @proxy_port = _proxy.split ':'
33
+ end
34
+ end
35
+
36
+ def get(_url, _query={}, _headers={})
37
+ uri = URI _url
38
+ perform_request Net::HTTP::Get, uri, _headers
39
+ end
40
+
41
+ def post(_url, _data, _headers={})
42
+ perform_request Net::HTTP::Post, URI(_url), _headers do |req|
43
+ req.body = prepare_data(_data)
44
+ end
45
+ end
46
+
47
+ def put(_url, _data, _headers={})
48
+ perform_request Net::HTTP::Put, URI(_url), _headers do |req|
49
+ req.body = prepare_data(_data)
50
+ end
51
+ end
52
+
53
+ def delete(_url)
54
+ perform_request Net::HTTP::Delete, URI(_url), _headers
55
+ end
56
+
57
+ private
58
+
59
+ def perform_request(_req_type, _uri, _headers, _limit=10)
60
+
61
+ raise MaximumRedirectsError.new if _limit == 0
62
+
63
+ request = _req_type.new(_uri.path.empty? ? '/' : _uri.path)
64
+ _headers.keys.each { |k| request[k] = _headers[k] }
65
+ yield request if block_given?
66
+
67
+ response = build_client(_uri).request request
68
+
69
+ case response
70
+ when Net::HTTPSuccess then
71
+ response
72
+ when Net::HTTPRedirection then
73
+ location = response['location']
74
+ perform_request(_req_type, URI.parse(location), _headers, _limit - 1)
75
+ else
76
+ handle_error_response response
77
+ end
78
+ end
79
+
80
+ def build_client(uri)
81
+ client = Net::HTTP.new uri.host, uri.port || 80, proxy_addr, proxy_port
82
+ client.use_ssl = true if uri.scheme == 'https'
83
+ client.verify_mode = OpenSSL::SSL::VERIFY_NONE
84
+ client
85
+ end
86
+
87
+ def handle_error_response(_response)
88
+ raise HttpRequestError.new _response
89
+ end
90
+
91
+ def prepare_data(_data)
92
+ if _data.is_a? Hash
93
+ _data.keys.map { |k| "#{k}=#{_data[k]}" }.join '&'
94
+ else _data end
95
+ end
96
+ end
97
+ end
@@ -34,7 +34,7 @@ module Crabfarm
34
34
  begin
35
35
  elapsed = Benchmark.measure do
36
36
  puts "Transitioning to #{_name.to_s.camelize} state"
37
- doc = @context.run_state(_name, _params).output_as_json
37
+ doc = TransitionService.apply_state(@context, _name, _params).output_as_json
38
38
 
39
39
  puts "State changed, generated document:"
40
40
  puts JSON.pretty_generate(doc).color(:green).gsub(/(^|\\n)/, ' ')
@@ -35,18 +35,20 @@ module Crabfarm
35
35
  end
36
36
 
37
37
  def generate_state(_name)
38
+ class_name = _name.camelize
38
39
  with_crawler_path do
39
- binding = { state_class: _name.camelize }
40
- path('app', 'states', _name.underscore + '.rb').render('state.rb', binding)
41
- path('spec', 'states', _name.underscore + '_spec.rb').render('state_spec.rb', binding)
40
+ binding = { state_class: class_name.camelize }
41
+ path('app', 'states', class_name.underscore + '.rb').render('state.rb', binding)
42
+ path('spec', 'states', class_name.underscore + '_spec.rb').render('state_spec.rb', binding)
42
43
  end
43
44
  end
44
45
 
45
46
  def generate_parser(_name)
47
+ class_name = _name.camelize + 'Parser'
46
48
  with_crawler_path do
47
- binding = { parser_class: _name.camelize }
48
- path('app', 'parsers', _name.underscore + '.rb').render('parser.rb', binding)
49
- path('spec', 'parsers', _name.underscore + '_spec.rb').render('parser_spec.rb', binding)
49
+ binding = { parser_class: class_name }
50
+ path('app', 'parsers', class_name.underscore + '.rb').render('parser.rb', binding)
51
+ path('spec', 'parsers', class_name.underscore + '_spec.rb').render('parser_spec.rb', binding)
50
52
  end
51
53
  end
52
54
 
@@ -1,4 +1,3 @@
1
- require 'net/http'
2
1
  require 'timeout'
3
2
 
4
3
  module Crabfarm
@@ -3,7 +3,6 @@ require 'net/http'
3
3
 
4
4
  CF_TEST_CONTEXT = Crabfarm::CrabtrapContext::new
5
5
  CF_TEST_CONTEXT.load
6
- CF_TEST_BUCKET = CF_TEST_CONTEXT.driver
7
6
 
8
7
  module Crabfarm
9
8
  module RSpec
@@ -16,7 +15,9 @@ module Crabfarm
16
15
  Net::HTTP.get(URI.parse _snap_or_url)
17
16
  end
18
17
 
19
- ParserService.parse described_class, html, _options
18
+ parser = described_class.new html, _options
19
+ parser.parse
20
+ parser
20
21
  end
21
22
 
22
23
  def crawl(_state=nil, _params={})
@@ -27,9 +28,9 @@ module Crabfarm
27
28
 
28
29
  if _state.nil?
29
30
  return nil unless described_class < BaseState # TODO: maybe raise an error here.
30
- @state = @last_state = CF_TEST_CONTEXT.run_state(described_class, _params)
31
+ @state = @last_state = TransitionService.apply_state CF_TEST_CONTEXT, described_class, _params
31
32
  else
32
- @last_state = CF_TEST_CONTEXT.run_state(_state, _params)
33
+ @last_state = TransitionService.apply_state CF_TEST_CONTEXT, _state, _params
33
34
  end
34
35
  end
35
36
 
@@ -2,25 +2,42 @@ module Crabfarm
2
2
  module Strategies
3
3
 
4
4
  class Loader
5
- def initialize(_klass, _pkg=nil)
5
+ def initialize(_name, _klass, _pkg, _deps)
6
+ @name = _name
6
7
  @klass = _klass
7
8
  @pkg = _pkg
9
+ @deps = _deps
8
10
  end
9
11
 
10
12
  def load
13
+ load_dependencies
11
14
  require @pkg if @pkg
12
15
  if @klass.is_a? String then Object.const_get @klass else @klass end
13
16
  end
17
+
18
+ private
19
+
20
+ def load_dependencies
21
+ @deps.each do |dep|
22
+ begin
23
+ require dep
24
+ # TODO: check dependency version!
25
+ rescue LoadError
26
+ raise ConfigurationError.new "Missing #{@name} dependency, please add `gem \"#{dep}\"` to the crawler's Gemfile"
27
+ end
28
+ end
29
+ end
14
30
  end
15
31
 
16
32
  @@register = {}
17
33
 
18
- def self.register(_cat, _name, _klass, _pkg=nil)
19
- @@register[_cat.to_s + _name.to_s] = Loader.new(_klass, _pkg)
34
+ def self.register(_cat, _name, _klass, _pkg=nil, _deps=[])
35
+ full_name = _cat.to_s + ':' + _name.to_s
36
+ @@register[full_name] = Loader.new(full_name, _klass, _pkg, _deps)
20
37
  end
21
38
 
22
39
  def self.load(_cat, _name)
23
- full_name = _cat.to_s + _name.to_s
40
+ full_name = _cat.to_s + ':' + _name.to_s
24
41
  raise ConfigurationError.new "Invalid #{_cat} name #{_name}" unless @@register.has_key? full_name
25
42
  @@register[full_name].load
26
43
  end
@@ -1,10 +1,14 @@
1
1
 
2
2
  # The default crawling dsl to use in states and parsers, can be overriden in each component using the `browser_dsl :dsl` modifier
3
- # Available options are :surfer, :watir and :capybara
3
+ # Available options are :surfer, :watir and :capybara. Both watir and capybara require an additional gem to be added to Gemfile
4
4
  set_browser_dsl :surfer
5
5
 
6
+ # The default parser engine for parsers that do not specify one.
7
+ # Available options are :nokogiri and :pdf_parser. :pdf_parser requires an additional gem to be added to Gemfile
8
+ # set_parser_engine :nokogiri
9
+
6
10
  # Change the defaut output builder used in a state to generate the output document.
7
- # Available options are :hash, :ostruct, :jbuilder
11
+ # Available options are :hash, :ostruct, :jbuilder. :jbuilder requires an additional gem to be added to Gemfile
8
12
  # set_output_builder :hash
9
13
 
10
14
  # The path where every crawler log is stored.
@@ -0,0 +1,23 @@
1
+ module Crabfarm
2
+ class TransitionService
3
+
4
+ def self.apply_state(_context, _name, _params={})
5
+ state_class = if _name.is_a? String or _name.is_a? Symbol
6
+ load_by_name _name
7
+ else _name end
8
+
9
+ _context.load
10
+ state = state_class.new _context, _params
11
+ state.crawl
12
+ state
13
+ end
14
+
15
+ private
16
+
17
+ def self.load_by_name(_name)
18
+ name = _name.to_s.gsub(/[^A-Z0-9:]+/i, '_').camelize
19
+ name.constantize
20
+ end
21
+
22
+ end
23
+ end
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.0.16"
2
+ VERSION = "0.0.17"
3
3
  end
data/lib/crabfarm.rb CHANGED
@@ -1,15 +1,16 @@
1
- require 'logger'
1
+ require "logger"
2
2
  require "forwardable"
3
+ require "net/http"
3
4
  require "active_support/inflector"
4
5
  require "selenium-webdriver"
5
6
 
6
7
  require "crabfarm/version"
7
8
  require "crabfarm/errors"
8
9
  require "crabfarm/configuration"
9
- require "crabfarm/loader_service"
10
- require "crabfarm/parser_service"
10
+ require "crabfarm/transition_service"
11
11
  require "crabfarm/driver_bucket"
12
12
  require "crabfarm/driver_bucket_pool"
13
+ require "crabfarm/http_client"
13
14
  require "crabfarm/default_driver_factory"
14
15
  require "crabfarm/phantom_driver_factory"
15
16
  require "crabfarm/phantom_runner"
@@ -43,15 +44,16 @@ module Crabfarm
43
44
  module Strategies
44
45
  # bundled browser dsl adapters
45
46
  register :browser_dsl, :surfer, 'Crabfarm::SurferBrowserDsl', 'crabfarm/adapters/browser/surfer'
46
- register :browser_dsl, :watir, 'Crabfarm::WatirBrowserDsl', 'crabfarm/adapters/browser/watir'
47
- register :browser_dsl, :capybara, 'Crabfarm::CapybaraBrowserDsl', 'crabfarm/adapters/browser/capybara'
47
+ register :browser_dsl, :watir, 'Crabfarm::WatirBrowserDsl', 'crabfarm/adapters/browser/watir', ['watir-webdriver']
48
+ register :browser_dsl, :capybara, 'Crabfarm::CapybaraBrowserDsl', 'crabfarm/adapters/browser/capybara', ['capybara']
48
49
 
49
50
  # bundled parsers dsl adapters
50
- register :parser_dsl, :nokogiri, 'Crabfarm::NokogiriDsl', 'crabfarm/adapters/parser/nokogiri'
51
+ register :parser_engine, :nokogiri, 'Crabfarm::NokogiriAdapter', 'crabfarm/adapters/parser/nokogiri'
52
+ register :parser_engine, :pdf_reader, 'Crabfarm::PdfReaderAdapter', 'crabfarm/adapters/parser/pdf_reader', ['pdf-reader']
51
53
 
52
54
  # bundled state output builders
53
55
  register :output_builder, :hash, 'Crabfarm::HashOutputBuilder', 'crabfarm/adapters/output/hash'
54
56
  register :output_builder, :ostruct, 'Crabfarm::OStructOutputBuilder', 'crabfarm/adapters/output/ostruct'
55
- register :output_builder, :jbuilder, 'Crabfarm::JbuilderOutputBuilder', 'crabfarm/adapters/output/jbuilder'
57
+ register :output_builder, :jbuilder, 'Crabfarm::JbuilderOutputBuilder', 'crabfarm/adapters/output/jbuilder', ['jbuilder']
56
58
  end
57
59
  end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.16
4
+ version: 0.0.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-11 00:00:00.000000000 Z
11
+ date: 2015-03-12 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: jbuilder
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: 2.2.0
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ~>
25
- - !ruby/object:Gem::Version
26
- version: 2.2.0
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: selenium-webdriver
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -38,34 +24,6 @@ dependencies:
38
24
  - - ~>
39
25
  - !ruby/object:Gem::Version
40
26
  version: '2.33'
41
- - !ruby/object:Gem::Dependency
42
- name: capybara
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - '>='
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - '>='
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: watir-webdriver
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - '>='
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
27
  - !ruby/object:Gem::Dependency
70
28
  name: nokogiri
71
29
  requirement: !ruby/object:Gem::Requirement
@@ -170,20 +128,6 @@ dependencies:
170
128
  - - '>='
171
129
  - !ruby/object:Gem::Version
172
130
  version: '0'
173
- - !ruby/object:Gem::Dependency
174
- name: multipart-post
175
- requirement: !ruby/object:Gem::Requirement
176
- requirements:
177
- - - '>='
178
- - !ruby/object:Gem::Version
179
- version: '0'
180
- type: :runtime
181
- prerelease: false
182
- version_requirements: !ruby/object:Gem::Requirement
183
- requirements:
184
- - - '>='
185
- - !ruby/object:Gem::Version
186
- version: '0'
187
131
  - !ruby/object:Gem::Dependency
188
132
  name: bundler
189
133
  requirement: !ruby/object:Gem::Requirement
@@ -324,6 +268,62 @@ dependencies:
324
268
  - - '>='
325
269
  - !ruby/object:Gem::Version
326
270
  version: '0'
271
+ - !ruby/object:Gem::Dependency
272
+ name: pdf-reader
273
+ requirement: !ruby/object:Gem::Requirement
274
+ requirements:
275
+ - - ~>
276
+ - !ruby/object:Gem::Version
277
+ version: 1.3.3
278
+ type: :development
279
+ prerelease: false
280
+ version_requirements: !ruby/object:Gem::Requirement
281
+ requirements:
282
+ - - ~>
283
+ - !ruby/object:Gem::Version
284
+ version: 1.3.3
285
+ - !ruby/object:Gem::Dependency
286
+ name: capybara
287
+ requirement: !ruby/object:Gem::Requirement
288
+ requirements:
289
+ - - '>='
290
+ - !ruby/object:Gem::Version
291
+ version: '0'
292
+ type: :development
293
+ prerelease: false
294
+ version_requirements: !ruby/object:Gem::Requirement
295
+ requirements:
296
+ - - '>='
297
+ - !ruby/object:Gem::Version
298
+ version: '0'
299
+ - !ruby/object:Gem::Dependency
300
+ name: watir-webdriver
301
+ requirement: !ruby/object:Gem::Requirement
302
+ requirements:
303
+ - - '>='
304
+ - !ruby/object:Gem::Version
305
+ version: '0'
306
+ type: :development
307
+ prerelease: false
308
+ version_requirements: !ruby/object:Gem::Requirement
309
+ requirements:
310
+ - - '>='
311
+ - !ruby/object:Gem::Version
312
+ version: '0'
313
+ - !ruby/object:Gem::Dependency
314
+ name: jbuilder
315
+ requirement: !ruby/object:Gem::Requirement
316
+ requirements:
317
+ - - ~>
318
+ - !ruby/object:Gem::Version
319
+ version: 2.2.0
320
+ type: :development
321
+ prerelease: false
322
+ version_requirements: !ruby/object:Gem::Requirement
323
+ requirements:
324
+ - - ~>
325
+ - !ruby/object:Gem::Version
326
+ version: 2.2.0
327
327
  description:
328
328
  email:
329
329
  - ignacio@platan.us
@@ -339,6 +339,7 @@ files:
339
339
  - lib/crabfarm/adapters/output/jbuilder.rb
340
340
  - lib/crabfarm/adapters/output/ostruct.rb
341
341
  - lib/crabfarm/adapters/parser/nokogiri.rb
342
+ - lib/crabfarm/adapters/parser/pdf_reader.rb
342
343
  - lib/crabfarm/base_parser.rb
343
344
  - lib/crabfarm/base_state.rb
344
345
  - lib/crabfarm/cli.rb
@@ -355,14 +356,13 @@ files:
355
356
  - lib/crabfarm/engines/safe_state_loop.rb
356
357
  - lib/crabfarm/errors.rb
357
358
  - lib/crabfarm/forked_state.rb
358
- - lib/crabfarm/loader_service.rb
359
+ - lib/crabfarm/http_client.rb
359
360
  - lib/crabfarm/mocks/noop_driver.rb
360
361
  - lib/crabfarm/modes/console.rb
361
362
  - lib/crabfarm/modes/generator.rb
362
363
  - lib/crabfarm/modes/publisher.rb
363
364
  - lib/crabfarm/modes/recorder.rb
364
365
  - lib/crabfarm/modes/server.rb
365
- - lib/crabfarm/parser_service.rb
366
366
  - lib/crabfarm/phantom_driver_factory.rb
367
367
  - lib/crabfarm/phantom_runner.rb
368
368
  - lib/crabfarm/rspec.rb
@@ -383,6 +383,7 @@ files:
383
383
  - lib/crabfarm/templates/spec_helper.rb.erb
384
384
  - lib/crabfarm/templates/state.rb.erb
385
385
  - lib/crabfarm/templates/state_spec.rb.erb
386
+ - lib/crabfarm/transition_service.rb
386
387
  - lib/crabfarm/version.rb
387
388
  - lib/crabfarm.rb
388
389
  - bin/crabfarm
@@ -1,29 +0,0 @@
1
- module Crabfarm
2
- class LoaderService
3
-
4
- def self.load_state(_name)
5
- load_entity _name, 'state', BaseState
6
- end
7
-
8
- def self.load_parser(_name)
9
- load_entity _name, 'parser', BaseParser
10
- end
11
-
12
- private
13
-
14
- def self.load_entity(_name, _role, _type)
15
-
16
- if _name.is_a? String or _name.is_a? Symbol
17
- name = _name.to_s.gsub(/[^A-Z0-9:]+/i, '_').camelize
18
- mod = name.constantize rescue nil
19
- else
20
- mod = _name
21
- end
22
-
23
- raise EntityNotFoundError.new _role, name if mod.nil?
24
- raise EntityNotFoundError.new _role, name unless mod < _type
25
- mod
26
- end
27
-
28
- end
29
- end
@@ -1,12 +0,0 @@
1
- module Crabfarm
2
- class ParserService
3
-
4
- def self.parse(_parser_class, _html, _options={})
5
- _parser_class = LoaderService.load_parser(_parser_class) if _parser_class.is_a? String or _parser_class.is_a? Symbol
6
- parser = _parser_class.new _html, _options
7
- parser.parse
8
- parser
9
- end
10
-
11
- end
12
- end