crabfarm 0.0.16 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 01f53897c32d27e6253c74660a0842b9cb7c3c97
4
- data.tar.gz: 7993ac4152d6128ce8b69950458e48dfe042089a
3
+ metadata.gz: 62fc780d5c9db277ef5147d564c74cd684dbd39b
4
+ data.tar.gz: 9028aeb0ce71914ab549644948c0ceb83e1efdda
5
5
  SHA512:
6
- metadata.gz: af73913311cb5404d95724470ce07bb37e3ed686abb02a39e20b6a85b0ad028a31ef4524c8ffd09381ce20157e107da7cbe20ddea22c08f3908997d81c1e37b1
7
- data.tar.gz: 3345f29499e15089b3fd6feccf984f9121187500e9859a2016ccfaf14947d363ca335b0212025722b9eac273996171a6f133e2c37f9d0fa5f2bd649952333742
6
+ metadata.gz: f003f40fbb5727dee2f831fede26ff6bfd559028c5d339f56aa63aabe3cc0fe7f03519d73df03ebee5121a8478dc0a082f8124272b194dc2a5023402c5147cbd
7
+ data.tar.gz: a19ccb92b10a0b1d316992cbde562987ac1a3928b4ae32811e56e570f93073f839b4422ae2a0e06bdcbebff3557bf5e0f1ea197bdbff442052c7b878273ca297
@@ -1,7 +1,7 @@
1
1
  module Crabfarm
2
2
  class CapybaraBrowserDsl
3
3
  def self.wrap(_bucket)
4
- raise "Capybara adapter is incompleted"
4
+ raise NotImplementedError.new "Capybara adapter is incompleted"
5
5
  end
6
6
  end
7
7
  end
@@ -1,21 +1,18 @@
1
- require 'watir-webdriver'
2
-
3
1
  class Watir::Browser
4
- def parse(_parser_class, _options={})
5
- Crabfarm::ParserService.parse _parser_class, html, _options
2
+ def to_html
3
+ html
6
4
  end
7
5
  end
8
6
 
9
7
  class Watir::Element
10
- def parse(_parser_class, _options={})
11
- Crabfarm::ParserService.parse _parser_class, html, _options
8
+ def to_html
9
+ html
12
10
  end
13
11
  end
14
12
 
15
13
  class Watir::ElementCollection
16
- def parse(_parser_class, _options={})
17
- full_html = self.map(&:html).join
18
- Crabfarm::ParserService.parse _parser_class, full_html, _options
14
+ def to_html
15
+ self.map(&:html).join
19
16
  end
20
17
  end
21
18
 
@@ -1,5 +1,3 @@
1
- require 'jbuilder'
2
-
3
1
  module Crabfarm
4
2
  class JbuilderOutputBuilder
5
3
  def self.prepare
@@ -1,9 +1,13 @@
1
1
  require 'nokogiri'
2
2
 
3
3
  module Crabfarm
4
- class NokogiriDsl
5
- def self.parse(_html)
6
- Nokogiri::HTML _html
4
+ class NokogiriAdapter
5
+ def self.parse(_element)
6
+ if _element.respond_to? :to_html
7
+ Nokogiri::HTML _element.to_html
8
+ else
9
+ Nokogiri::HTML _element
10
+ end
7
11
  end
8
12
  end
9
13
  end
@@ -0,0 +1,9 @@
1
+ require "pdf-reader"
2
+
3
+ module Crabfarm
4
+ class PdfReaderAdapter
5
+ def self.parse(_pdf_data)
6
+ PDF::Reader.new StringIO.new _pdf_data
7
+ end
8
+ end
9
+ end
@@ -1,18 +1,18 @@
1
1
  module Crabfarm
2
2
  class BaseParser < Delegator
3
3
 
4
- attr_reader :params, :root
4
+ attr_reader :params, :document
5
5
 
6
- def self.parser_dsl(_dsl)
7
- @parser_dsl = _dsl
6
+ def self.engine(_engine)
7
+ @engine = _engine
8
8
  end
9
9
 
10
- def initialize(_html, _params)
11
- dsl_class = Strategies.load(:parser_dsl, class_parser_dsl || Crabfarm.config.parser_dsl)
12
- @root = dsl_class.parse _html
10
+ def initialize(_target, _params)
11
+ engine_class = Strategies.load(:parser_engine, class_engine || Crabfarm.config.parser_engine)
12
+ @document = engine_class.parse _target
13
13
  @params = _params
14
14
 
15
- super @root
15
+ super @document
16
16
  end
17
17
 
18
18
  def parse
@@ -20,17 +20,17 @@ module Crabfarm
20
20
  end
21
21
 
22
22
  def __getobj__
23
- @root
23
+ @document
24
24
  end
25
25
 
26
26
  def __setobj__(obj)
27
- @root = obj
27
+ @document = obj
28
28
  end
29
29
 
30
30
  private
31
31
 
32
- def class_parser_dsl
33
- self.class.instance_variable_get :@parser_dsl
32
+ def class_engine
33
+ self.class.instance_variable_get :@engine
34
34
  end
35
35
  end
36
36
  end
@@ -5,10 +5,13 @@ module Crabfarm
5
5
  class BaseState
6
6
  extend Forwardable
7
7
 
8
+ PARSE_METHOD_RX = /^parse_(.*)$/
9
+
8
10
  attr_reader :params, :output
9
11
 
10
- def_delegators :@pool, :driver
11
- def_delegators :@store, :get, :fetch
12
+ def_delegators '@context', :http
13
+ def_delegators '@context.pool', :driver
14
+ def_delegators '@context.store', :get, :fetch
12
15
 
13
16
  def self.browser_dsl(_dsl)
14
17
  @class_browser_dsl = _dsl
@@ -18,11 +21,9 @@ module Crabfarm
18
21
  @class_output_builder = _builder
19
22
  end
20
23
 
21
- def initialize(_pool, _store, _params)
22
- @pool = _pool
23
- @store = _store
24
+ def initialize(_context, _params)
25
+ @context = _context
24
26
  @params = _params
25
- @events = []
26
27
 
27
28
  @dsl = Strategies.load(:browser_dsl, class_browser_dsl || Crabfarm.config.browser_dsl)
28
29
  @builder = Strategies.load(:output_builder, class_output_builder || Crabfarm.config.output_builder)
@@ -32,6 +33,10 @@ module Crabfarm
32
33
  @dsl.wrap driver(_name)
33
34
  end
34
35
 
36
+ def download(_url)
37
+ @context.http.get(_url).body
38
+ end
39
+
35
40
  def output
36
41
  @output ||= @builder.prepare
37
42
  end
@@ -44,16 +49,16 @@ module Crabfarm
44
49
  raise NotImplementedError.new
45
50
  end
46
51
 
47
- def event(_type, _msg)
48
- @events << { created_at: Time.current, type: _type, msg: _msg }
49
- end
52
+ def parse(_target=nil, _options={})
53
+ parser_class = _options.delete :using
50
54
 
51
- def alert(_msg)
52
- event(:alert, _msg)
53
- end
55
+ if parser_class.nil?
56
+ parser_class = (self.class.name + 'Parser').constantize
57
+ end
54
58
 
55
- def info(_msg)
56
- event(:info, _msg)
59
+ parser = parser_class.new _target, @params.merge(_options)
60
+ parser.parse
61
+ return parser
57
62
  end
58
63
 
59
64
  def fork_each(_enumerator, &_block)
@@ -66,6 +71,20 @@ module Crabfarm
66
71
  ThreadsWait.all_waits(*ths)
67
72
  end
68
73
 
74
+ def method_missing(_method, *_args, &_block)
75
+ m = PARSE_METHOD_RX.match(_method)
76
+ if m
77
+ options = _args[1] || {}
78
+ options[:using] = (m[1].camelize + 'Parser').constantize
79
+ parse _args[0], options
80
+ else super end
81
+ end
82
+
83
+ def respond_to?(_method, _include_all=false)
84
+ return true if PARSE_METHOD_RX === _method
85
+ super
86
+ end
87
+
69
88
  private
70
89
 
71
90
  def class_browser_dsl
@@ -6,7 +6,7 @@ module Crabfarm
6
6
 
7
7
  OPTIONS = [
8
8
  [:browser_dsl, :string, 'Default browser dsl used by states'],
9
- [:parser_dsl, :string, 'Default parser dsl used by parsers'],
9
+ [:parser_engine, :string, 'Default parser engine used by parsers'],
10
10
  [:output_builder, :string, 'Default json output builder used by states'],
11
11
  [:driver_factory, :mixed, 'Driver factory, disabled if phantom_mode is used'],
12
12
  [:log_path, :string, 'Path where logs should be stored'],
@@ -51,7 +51,7 @@ module Crabfarm
51
51
  def reset
52
52
  @values = {
53
53
  browser_dsl: :surfer,
54
- parser_dsl: :nokogiri,
54
+ parser_engine: :nokogiri,
55
55
  output_builder: :hash,
56
56
  driver_factory: nil,
57
57
  log_path: nil,
@@ -4,7 +4,7 @@ module Crabfarm
4
4
  class Context
5
5
  extend Forwardable
6
6
 
7
- def_delegators :@pool, :driver
7
+ attr_accessor :pool, :store, :http
8
8
 
9
9
  def initialize
10
10
  @store = StateStore.new
@@ -14,16 +14,10 @@ module Crabfarm
14
14
  def load
15
15
  init_phantom_if_required
16
16
  init_driver_pool
17
+ init_http_client
17
18
  @loaded = true
18
19
  end
19
20
 
20
- def run_state(_name, _params={})
21
- load
22
- state = LoaderService.load_state(_name).new @pool, @store, _params
23
- state.crawl
24
- state
25
- end
26
-
27
21
  def reset
28
22
  @store.reset
29
23
  @pool.reset unless @pool.nil?
@@ -63,6 +57,14 @@ module Crabfarm
63
57
  @phantom = nil
64
58
  end
65
59
 
60
+ def init_http_client
61
+ @http = build_http_client if @http.nil?
62
+ end
63
+
64
+ def release_http_client
65
+ @http = nil
66
+ end
67
+
66
68
  def build_driver_factory
67
69
  if @phantom
68
70
  PhantomDriverFactory.new @phantom, driver_config
@@ -72,6 +74,10 @@ module Crabfarm
72
74
  end
73
75
  end
74
76
 
77
+ def build_http_client
78
+ HttpClient.new config.proxy
79
+ end
80
+
75
81
  def config
76
82
  Crabfarm.config
77
83
  end
@@ -28,6 +28,10 @@ module Crabfarm
28
28
 
29
29
  private
30
30
 
31
+ def build_http_client
32
+ HttpClient.new proxy_address
33
+ end
34
+
31
35
  def restart_with_options(_options)
32
36
  stop_daemon
33
37
  @runner = CrabtrapRunner.new Crabfarm.config.crabtrap_config.merge(_options)
@@ -1,5 +1,3 @@
1
- require 'net/http'
2
-
3
1
  module Crabfarm
4
2
  class CrabtrapRunner
5
3
 
@@ -91,10 +91,6 @@ module Crabfarm
91
91
  end
92
92
  end
93
93
 
94
- def parse(_parser_class, _params={})
95
- ParserService.parse _parser_class, to_html, _params
96
- end
97
-
98
94
  def to_html
99
95
  elements.map { |e| e['outerHTML'] }.join
100
96
  end
@@ -109,7 +109,7 @@ module Crabfarm
109
109
  begin
110
110
  ActiveSupport::Dependencies.clear
111
111
  logger.info "StateLoop: loading state: #{@next_state_name}"
112
- @doc = context.run_state(@next_state_name, @next_state_params).output_as_json
112
+ @doc = TransitionService.apply_state(context, @next_state_name, @next_state_params).output_as_json
113
113
  logger.info "StateLoop: state loaded successfully: #{@next_state_name}"
114
114
  @error = nil
115
115
  rescue Exception => e
@@ -1,13 +1,12 @@
1
1
  module Crabfarm
2
- class ForkedState
3
- extend Forwardable
4
-
5
- def_delegators :@state, :params, :get, :fetch, :event, :alert, :info
2
+ class ForkedState < Delegator
6
3
 
7
4
  def initialize(_state, _name, _mutex)
8
5
  @state = _state
9
6
  @name = _name
10
7
  @mutex = _mutex
8
+
9
+ super @state
11
10
  end
12
11
 
13
12
  def driver
@@ -18,10 +17,22 @@ module Crabfarm
18
17
  @browser ||= @state.browser(@name)
19
18
  end
20
19
 
20
+ def output
21
+ raise ScriptError.new 'Use lock_output to access output in forked states'
22
+ end
23
+
21
24
  def lock_output
22
25
  @mutex.synchronize {
23
26
  yield @state.output
24
27
  }
25
28
  end
29
+
30
+ def __getobj__
31
+ @state
32
+ end
33
+
34
+ def __setobj__(obj)
35
+ @state = obj
36
+ end
26
37
  end
27
38
  end
@@ -0,0 +1,97 @@
1
+ require "uri"
2
+
3
+ module Crabfarm
4
+ class HttpClient
5
+
6
+ class HttpRequestError < StandardError
7
+ extend Forwardable
8
+
9
+ def_delegators :@response, :code, :body
10
+
11
+ attr_reader :response
12
+
13
+ def initialize(_response)
14
+ @response = _response
15
+ super _response.message
16
+ end
17
+ end
18
+
19
+ class MaximumRedirectsError < StandardError
20
+ def initialize
21
+ super 'Redirection loop detected!'
22
+ end
23
+ end
24
+
25
+ attr_reader :proxy_addr, :proxy_port
26
+
27
+ def initialize(_proxy=nil)
28
+ if _proxy.nil?
29
+ @proxy_addr = nil
30
+ @proxy_port = nil
31
+ else
32
+ @proxy_addr, @proxy_port = _proxy.split ':'
33
+ end
34
+ end
35
+
36
+ def get(_url, _query={}, _headers={})
37
+ uri = URI _url
38
+ perform_request Net::HTTP::Get, uri, _headers
39
+ end
40
+
41
+ def post(_url, _data, _headers={})
42
+ perform_request Net::HTTP::Post, URI(_url), _headers do |req|
43
+ req.body = prepare_data(_data)
44
+ end
45
+ end
46
+
47
+ def put(_url, _data, _headers={})
48
+ perform_request Net::HTTP::Put, URI(_url), _headers do |req|
49
+ req.body = prepare_data(_data)
50
+ end
51
+ end
52
+
53
+ def delete(_url)
54
+ perform_request Net::HTTP::Delete, URI(_url), _headers
55
+ end
56
+
57
+ private
58
+
59
+ def perform_request(_req_type, _uri, _headers, _limit=10)
60
+
61
+ raise MaximumRedirectsError.new if _limit == 0
62
+
63
+ request = _req_type.new(_uri.path.empty? ? '/' : _uri.path)
64
+ _headers.keys.each { |k| request[k] = _headers[k] }
65
+ yield request if block_given?
66
+
67
+ response = build_client(_uri).request request
68
+
69
+ case response
70
+ when Net::HTTPSuccess then
71
+ response
72
+ when Net::HTTPRedirection then
73
+ location = response['location']
74
+ perform_request(_req_type, URI.parse(location), _headers, _limit - 1)
75
+ else
76
+ handle_error_response response
77
+ end
78
+ end
79
+
80
+ def build_client(uri)
81
+ client = Net::HTTP.new uri.host, uri.port || 80, proxy_addr, proxy_port
82
+ client.use_ssl = true if uri.scheme == 'https'
83
+ client.verify_mode = OpenSSL::SSL::VERIFY_NONE
84
+ client
85
+ end
86
+
87
+ def handle_error_response(_response)
88
+ raise HttpRequestError.new _response
89
+ end
90
+
91
+ def prepare_data(_data)
92
+ if _data.is_a? Hash
93
+ _data.keys.map { |k| "#{k}=#{_data[k]}" }.join '&'
94
+ else _data end
95
+ end
96
+ end
97
+ end
@@ -34,7 +34,7 @@ module Crabfarm
34
34
  begin
35
35
  elapsed = Benchmark.measure do
36
36
  puts "Transitioning to #{_name.to_s.camelize} state"
37
- doc = @context.run_state(_name, _params).output_as_json
37
+ doc = TransitionService.apply_state(@context, _name, _params).output_as_json
38
38
 
39
39
  puts "State changed, generated document:"
40
40
  puts JSON.pretty_generate(doc).color(:green).gsub(/(^|\\n)/, ' ')
@@ -35,18 +35,20 @@ module Crabfarm
35
35
  end
36
36
 
37
37
  def generate_state(_name)
38
+ class_name = _name.camelize
38
39
  with_crawler_path do
39
- binding = { state_class: _name.camelize }
40
- path('app', 'states', _name.underscore + '.rb').render('state.rb', binding)
41
- path('spec', 'states', _name.underscore + '_spec.rb').render('state_spec.rb', binding)
40
+ binding = { state_class: class_name.camelize }
41
+ path('app', 'states', class_name.underscore + '.rb').render('state.rb', binding)
42
+ path('spec', 'states', class_name.underscore + '_spec.rb').render('state_spec.rb', binding)
42
43
  end
43
44
  end
44
45
 
45
46
  def generate_parser(_name)
47
+ class_name = _name.camelize + 'Parser'
46
48
  with_crawler_path do
47
- binding = { parser_class: _name.camelize }
48
- path('app', 'parsers', _name.underscore + '.rb').render('parser.rb', binding)
49
- path('spec', 'parsers', _name.underscore + '_spec.rb').render('parser_spec.rb', binding)
49
+ binding = { parser_class: class_name }
50
+ path('app', 'parsers', class_name.underscore + '.rb').render('parser.rb', binding)
51
+ path('spec', 'parsers', class_name.underscore + '_spec.rb').render('parser_spec.rb', binding)
50
52
  end
51
53
  end
52
54
 
@@ -1,4 +1,3 @@
1
- require 'net/http'
2
1
  require 'timeout'
3
2
 
4
3
  module Crabfarm
@@ -3,7 +3,6 @@ require 'net/http'
3
3
 
4
4
  CF_TEST_CONTEXT = Crabfarm::CrabtrapContext::new
5
5
  CF_TEST_CONTEXT.load
6
- CF_TEST_BUCKET = CF_TEST_CONTEXT.driver
7
6
 
8
7
  module Crabfarm
9
8
  module RSpec
@@ -16,7 +15,9 @@ module Crabfarm
16
15
  Net::HTTP.get(URI.parse _snap_or_url)
17
16
  end
18
17
 
19
- ParserService.parse described_class, html, _options
18
+ parser = described_class.new html, _options
19
+ parser.parse
20
+ parser
20
21
  end
21
22
 
22
23
  def crawl(_state=nil, _params={})
@@ -27,9 +28,9 @@ module Crabfarm
27
28
 
28
29
  if _state.nil?
29
30
  return nil unless described_class < BaseState # TODO: maybe raise an error here.
30
- @state = @last_state = CF_TEST_CONTEXT.run_state(described_class, _params)
31
+ @state = @last_state = TransitionService.apply_state CF_TEST_CONTEXT, described_class, _params
31
32
  else
32
- @last_state = CF_TEST_CONTEXT.run_state(_state, _params)
33
+ @last_state = TransitionService.apply_state CF_TEST_CONTEXT, _state, _params
33
34
  end
34
35
  end
35
36
 
@@ -2,25 +2,42 @@ module Crabfarm
2
2
  module Strategies
3
3
 
4
4
  class Loader
5
- def initialize(_klass, _pkg=nil)
5
+ def initialize(_name, _klass, _pkg, _deps)
6
+ @name = _name
6
7
  @klass = _klass
7
8
  @pkg = _pkg
9
+ @deps = _deps
8
10
  end
9
11
 
10
12
  def load
13
+ load_dependencies
11
14
  require @pkg if @pkg
12
15
  if @klass.is_a? String then Object.const_get @klass else @klass end
13
16
  end
17
+
18
+ private
19
+
20
+ def load_dependencies
21
+ @deps.each do |dep|
22
+ begin
23
+ require dep
24
+ # TODO: check dependency version!
25
+ rescue LoadError
26
+ raise ConfigurationError.new "Missing #{@name} dependency, please add `gem \"#{dep}\"` to the crawler's Gemfile"
27
+ end
28
+ end
29
+ end
14
30
  end
15
31
 
16
32
  @@register = {}
17
33
 
18
- def self.register(_cat, _name, _klass, _pkg=nil)
19
- @@register[_cat.to_s + _name.to_s] = Loader.new(_klass, _pkg)
34
+ def self.register(_cat, _name, _klass, _pkg=nil, _deps=[])
35
+ full_name = _cat.to_s + ':' + _name.to_s
36
+ @@register[full_name] = Loader.new(full_name, _klass, _pkg, _deps)
20
37
  end
21
38
 
22
39
  def self.load(_cat, _name)
23
- full_name = _cat.to_s + _name.to_s
40
+ full_name = _cat.to_s + ':' + _name.to_s
24
41
  raise ConfigurationError.new "Invalid #{_cat} name #{_name}" unless @@register.has_key? full_name
25
42
  @@register[full_name].load
26
43
  end
@@ -1,10 +1,14 @@
1
1
 
2
2
  # The default crawling dsl to use in states and parsers, can be overriden in each component using the `browser_dsl :dsl` modifier
3
- # Available options are :surfer, :watir and :capybara
3
+ # Available options are :surfer, :watir and :capybara. Both watir and capybara require an additional gem to be added to Gemfile
4
4
  set_browser_dsl :surfer
5
5
 
6
+ # The default parser engine for parsers that do not specify one.
7
+ # Available options are :nokogiri and :pdf_parser. :pdf_parser requires an additional gem to be added to Gemfile
8
+ # set_parser_engine :nokogiri
9
+
6
10
  # Change the defaut output builder used in a state to generate the output document.
7
- # Available options are :hash, :ostruct, :jbuilder
11
+ # Available options are :hash, :ostruct, :jbuilder. :jbuilder requires an additional gem to be added to Gemfile
8
12
  # set_output_builder :hash
9
13
 
10
14
  # The path where every crawler log is stored.
@@ -0,0 +1,23 @@
1
+ module Crabfarm
2
+ class TransitionService
3
+
4
+ def self.apply_state(_context, _name, _params={})
5
+ state_class = if _name.is_a? String or _name.is_a? Symbol
6
+ load_by_name _name
7
+ else _name end
8
+
9
+ _context.load
10
+ state = state_class.new _context, _params
11
+ state.crawl
12
+ state
13
+ end
14
+
15
+ private
16
+
17
+ def self.load_by_name(_name)
18
+ name = _name.to_s.gsub(/[^A-Z0-9:]+/i, '_').camelize
19
+ name.constantize
20
+ end
21
+
22
+ end
23
+ end
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.0.16"
2
+ VERSION = "0.0.17"
3
3
  end
data/lib/crabfarm.rb CHANGED
@@ -1,15 +1,16 @@
1
- require 'logger'
1
+ require "logger"
2
2
  require "forwardable"
3
+ require "net/http"
3
4
  require "active_support/inflector"
4
5
  require "selenium-webdriver"
5
6
 
6
7
  require "crabfarm/version"
7
8
  require "crabfarm/errors"
8
9
  require "crabfarm/configuration"
9
- require "crabfarm/loader_service"
10
- require "crabfarm/parser_service"
10
+ require "crabfarm/transition_service"
11
11
  require "crabfarm/driver_bucket"
12
12
  require "crabfarm/driver_bucket_pool"
13
+ require "crabfarm/http_client"
13
14
  require "crabfarm/default_driver_factory"
14
15
  require "crabfarm/phantom_driver_factory"
15
16
  require "crabfarm/phantom_runner"
@@ -43,15 +44,16 @@ module Crabfarm
43
44
  module Strategies
44
45
  # bundled browser dsl adapters
45
46
  register :browser_dsl, :surfer, 'Crabfarm::SurferBrowserDsl', 'crabfarm/adapters/browser/surfer'
46
- register :browser_dsl, :watir, 'Crabfarm::WatirBrowserDsl', 'crabfarm/adapters/browser/watir'
47
- register :browser_dsl, :capybara, 'Crabfarm::CapybaraBrowserDsl', 'crabfarm/adapters/browser/capybara'
47
+ register :browser_dsl, :watir, 'Crabfarm::WatirBrowserDsl', 'crabfarm/adapters/browser/watir', ['watir-webdriver']
48
+ register :browser_dsl, :capybara, 'Crabfarm::CapybaraBrowserDsl', 'crabfarm/adapters/browser/capybara', ['capybara']
48
49
 
49
50
  # bundled parsers dsl adapters
50
- register :parser_dsl, :nokogiri, 'Crabfarm::NokogiriDsl', 'crabfarm/adapters/parser/nokogiri'
51
+ register :parser_engine, :nokogiri, 'Crabfarm::NokogiriAdapter', 'crabfarm/adapters/parser/nokogiri'
52
+ register :parser_engine, :pdf_reader, 'Crabfarm::PdfReaderAdapter', 'crabfarm/adapters/parser/pdf_reader', ['pdf-reader']
51
53
 
52
54
  # bundled state output builders
53
55
  register :output_builder, :hash, 'Crabfarm::HashOutputBuilder', 'crabfarm/adapters/output/hash'
54
56
  register :output_builder, :ostruct, 'Crabfarm::OStructOutputBuilder', 'crabfarm/adapters/output/ostruct'
55
- register :output_builder, :jbuilder, 'Crabfarm::JbuilderOutputBuilder', 'crabfarm/adapters/output/jbuilder'
57
+ register :output_builder, :jbuilder, 'Crabfarm::JbuilderOutputBuilder', 'crabfarm/adapters/output/jbuilder', ['jbuilder']
56
58
  end
57
59
  end
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.16
4
+ version: 0.0.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-11 00:00:00.000000000 Z
11
+ date: 2015-03-12 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: jbuilder
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: 2.2.0
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ~>
25
- - !ruby/object:Gem::Version
26
- version: 2.2.0
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: selenium-webdriver
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -38,34 +24,6 @@ dependencies:
38
24
  - - ~>
39
25
  - !ruby/object:Gem::Version
40
26
  version: '2.33'
41
- - !ruby/object:Gem::Dependency
42
- name: capybara
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - '>='
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - '>='
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: watir-webdriver
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - '>='
67
- - !ruby/object:Gem::Version
68
- version: '0'
69
27
  - !ruby/object:Gem::Dependency
70
28
  name: nokogiri
71
29
  requirement: !ruby/object:Gem::Requirement
@@ -170,20 +128,6 @@ dependencies:
170
128
  - - '>='
171
129
  - !ruby/object:Gem::Version
172
130
  version: '0'
173
- - !ruby/object:Gem::Dependency
174
- name: multipart-post
175
- requirement: !ruby/object:Gem::Requirement
176
- requirements:
177
- - - '>='
178
- - !ruby/object:Gem::Version
179
- version: '0'
180
- type: :runtime
181
- prerelease: false
182
- version_requirements: !ruby/object:Gem::Requirement
183
- requirements:
184
- - - '>='
185
- - !ruby/object:Gem::Version
186
- version: '0'
187
131
  - !ruby/object:Gem::Dependency
188
132
  name: bundler
189
133
  requirement: !ruby/object:Gem::Requirement
@@ -324,6 +268,62 @@ dependencies:
324
268
  - - '>='
325
269
  - !ruby/object:Gem::Version
326
270
  version: '0'
271
+ - !ruby/object:Gem::Dependency
272
+ name: pdf-reader
273
+ requirement: !ruby/object:Gem::Requirement
274
+ requirements:
275
+ - - ~>
276
+ - !ruby/object:Gem::Version
277
+ version: 1.3.3
278
+ type: :development
279
+ prerelease: false
280
+ version_requirements: !ruby/object:Gem::Requirement
281
+ requirements:
282
+ - - ~>
283
+ - !ruby/object:Gem::Version
284
+ version: 1.3.3
285
+ - !ruby/object:Gem::Dependency
286
+ name: capybara
287
+ requirement: !ruby/object:Gem::Requirement
288
+ requirements:
289
+ - - '>='
290
+ - !ruby/object:Gem::Version
291
+ version: '0'
292
+ type: :development
293
+ prerelease: false
294
+ version_requirements: !ruby/object:Gem::Requirement
295
+ requirements:
296
+ - - '>='
297
+ - !ruby/object:Gem::Version
298
+ version: '0'
299
+ - !ruby/object:Gem::Dependency
300
+ name: watir-webdriver
301
+ requirement: !ruby/object:Gem::Requirement
302
+ requirements:
303
+ - - '>='
304
+ - !ruby/object:Gem::Version
305
+ version: '0'
306
+ type: :development
307
+ prerelease: false
308
+ version_requirements: !ruby/object:Gem::Requirement
309
+ requirements:
310
+ - - '>='
311
+ - !ruby/object:Gem::Version
312
+ version: '0'
313
+ - !ruby/object:Gem::Dependency
314
+ name: jbuilder
315
+ requirement: !ruby/object:Gem::Requirement
316
+ requirements:
317
+ - - ~>
318
+ - !ruby/object:Gem::Version
319
+ version: 2.2.0
320
+ type: :development
321
+ prerelease: false
322
+ version_requirements: !ruby/object:Gem::Requirement
323
+ requirements:
324
+ - - ~>
325
+ - !ruby/object:Gem::Version
326
+ version: 2.2.0
327
327
  description:
328
328
  email:
329
329
  - ignacio@platan.us
@@ -339,6 +339,7 @@ files:
339
339
  - lib/crabfarm/adapters/output/jbuilder.rb
340
340
  - lib/crabfarm/adapters/output/ostruct.rb
341
341
  - lib/crabfarm/adapters/parser/nokogiri.rb
342
+ - lib/crabfarm/adapters/parser/pdf_reader.rb
342
343
  - lib/crabfarm/base_parser.rb
343
344
  - lib/crabfarm/base_state.rb
344
345
  - lib/crabfarm/cli.rb
@@ -355,14 +356,13 @@ files:
355
356
  - lib/crabfarm/engines/safe_state_loop.rb
356
357
  - lib/crabfarm/errors.rb
357
358
  - lib/crabfarm/forked_state.rb
358
- - lib/crabfarm/loader_service.rb
359
+ - lib/crabfarm/http_client.rb
359
360
  - lib/crabfarm/mocks/noop_driver.rb
360
361
  - lib/crabfarm/modes/console.rb
361
362
  - lib/crabfarm/modes/generator.rb
362
363
  - lib/crabfarm/modes/publisher.rb
363
364
  - lib/crabfarm/modes/recorder.rb
364
365
  - lib/crabfarm/modes/server.rb
365
- - lib/crabfarm/parser_service.rb
366
366
  - lib/crabfarm/phantom_driver_factory.rb
367
367
  - lib/crabfarm/phantom_runner.rb
368
368
  - lib/crabfarm/rspec.rb
@@ -383,6 +383,7 @@ files:
383
383
  - lib/crabfarm/templates/spec_helper.rb.erb
384
384
  - lib/crabfarm/templates/state.rb.erb
385
385
  - lib/crabfarm/templates/state_spec.rb.erb
386
+ - lib/crabfarm/transition_service.rb
386
387
  - lib/crabfarm/version.rb
387
388
  - lib/crabfarm.rb
388
389
  - bin/crabfarm
@@ -1,29 +0,0 @@
1
- module Crabfarm
2
- class LoaderService
3
-
4
- def self.load_state(_name)
5
- load_entity _name, 'state', BaseState
6
- end
7
-
8
- def self.load_parser(_name)
9
- load_entity _name, 'parser', BaseParser
10
- end
11
-
12
- private
13
-
14
- def self.load_entity(_name, _role, _type)
15
-
16
- if _name.is_a? String or _name.is_a? Symbol
17
- name = _name.to_s.gsub(/[^A-Z0-9:]+/i, '_').camelize
18
- mod = name.constantize rescue nil
19
- else
20
- mod = _name
21
- end
22
-
23
- raise EntityNotFoundError.new _role, name if mod.nil?
24
- raise EntityNotFoundError.new _role, name unless mod < _type
25
- mod
26
- end
27
-
28
- end
29
- end
@@ -1,12 +0,0 @@
1
- module Crabfarm
2
- class ParserService
3
-
4
- def self.parse(_parser_class, _html, _options={})
5
- _parser_class = LoaderService.load_parser(_parser_class) if _parser_class.is_a? String or _parser_class.is_a? Symbol
6
- parser = _parser_class.new _html, _options
7
- parser.parse
8
- parser
9
- end
10
-
11
- end
12
- end