crabfarm 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/lib/crabfarm.rb +17 -18
  3. data/lib/crabfarm/adapters/browser/abstract_webdriver.rb +60 -0
  4. data/lib/crabfarm/adapters/browser/chrome.rb +24 -0
  5. data/lib/crabfarm/adapters/browser/firefox.rb +26 -0
  6. data/lib/crabfarm/adapters/browser/noop.rb +25 -0
  7. data/lib/crabfarm/adapters/browser/phantom_js.rb +59 -0
  8. data/lib/crabfarm/adapters/browser/remote_webdriver.rb +31 -0
  9. data/lib/crabfarm/adapters/driver_wrapper/capybara.rb +11 -0
  10. data/lib/crabfarm/adapters/driver_wrapper/surfer.rb +13 -0
  11. data/lib/crabfarm/adapters/{browser → driver_wrapper}/watir.rb +7 -3
  12. data/lib/crabfarm/adapters/parser/nokogiri.rb +17 -15
  13. data/lib/crabfarm/adapters/parser/pdf_reader.rb +14 -12
  14. data/lib/crabfarm/assertion/fields.rb +85 -0
  15. data/lib/crabfarm/base_navigator.rb +78 -0
  16. data/lib/crabfarm/base_reducer.rb +68 -0
  17. data/lib/crabfarm/base_struct.rb +17 -0
  18. data/lib/crabfarm/cli.rb +18 -8
  19. data/lib/crabfarm/configuration.rb +24 -51
  20. data/lib/crabfarm/context.rb +19 -43
  21. data/lib/crabfarm/crabtrap_context.rb +4 -11
  22. data/lib/crabfarm/driver_pool.rb +32 -0
  23. data/lib/crabfarm/dsl/surfer/surf_context.rb +5 -25
  24. data/lib/crabfarm/engines/async_state_manager.rb +1 -1
  25. data/lib/crabfarm/engines/sync_state_manager.rb +1 -1
  26. data/lib/crabfarm/forked_navigator.rb +31 -0
  27. data/lib/crabfarm/modes/console.rb +4 -4
  28. data/lib/crabfarm/modes/generator.rb +24 -11
  29. data/lib/crabfarm/rspec.rb +26 -24
  30. data/lib/crabfarm/strategies.rb +15 -9
  31. data/lib/crabfarm/templates/Crabfile.erb +21 -26
  32. data/lib/crabfarm/templates/Gemfile.erb +6 -0
  33. data/lib/crabfarm/templates/navigator.rb.erb +20 -0
  34. data/lib/crabfarm/templates/{state_spec.rb.erb → navigator_spec.rb.erb} +1 -1
  35. data/lib/crabfarm/templates/{parser.rb.erb → reducer.rb.erb} +4 -4
  36. data/lib/crabfarm/templates/{parser_spec.rb.erb → reducer_spec.rb.erb} +1 -1
  37. data/lib/crabfarm/templates/struct.rb.erb +12 -0
  38. data/lib/crabfarm/transition_service.rb +20 -7
  39. data/lib/crabfarm/version.rb +1 -1
  40. metadata +50 -48
  41. data/lib/crabfarm/adapters/browser/capybara.rb +0 -7
  42. data/lib/crabfarm/adapters/browser/surfer.rb +0 -9
  43. data/lib/crabfarm/adapters/output/hash.rb +0 -11
  44. data/lib/crabfarm/adapters/output/jbuilder.rb +0 -11
  45. data/lib/crabfarm/adapters/output/ostruct.rb +0 -14
  46. data/lib/crabfarm/base_parser.rb +0 -59
  47. data/lib/crabfarm/base_state.rb +0 -112
  48. data/lib/crabfarm/default_driver_factory.rb +0 -86
  49. data/lib/crabfarm/driver_bucket.rb +0 -42
  50. data/lib/crabfarm/driver_bucket_pool.rb +0 -26
  51. data/lib/crabfarm/forked_state.rb +0 -38
  52. data/lib/crabfarm/mocks/noop_driver.rb +0 -6
  53. data/lib/crabfarm/phantom_driver_factory.rb +0 -33
  54. data/lib/crabfarm/templates/state.rb.erb +0 -8
@@ -2,6 +2,12 @@ source 'https://rubygems.org'
2
2
 
3
3
  gem "crabfarm", '<%= version %>'
4
4
 
5
+ # Comment this is not using the nokogiri default HTML parser
6
+ gem 'nokogiri', "~> 1.6.6"
7
+
8
+ # Comment this is not using a selenium webdriver based driver
9
+ gem "selenium-webdriver", "~> 2.45"
10
+
5
11
  group :test do
6
12
  gem "rspec-nc"
7
13
  end
@@ -0,0 +1,20 @@
1
+ class <%= navigator_class %> < Crabfarm::BaseNavigator
2
+
3
+ def run
4
+ <% if navigator_url.nil? %>
5
+ # replace the following by your navigation code:
6
+ raise NotImplementedError.new 'You must provide some navigation code for <%= navigator_class %>'
7
+ <% else %>
8
+ browser.goto '<%= navigator_url %>'
9
+ <% end %>
10
+
11
+ # Call the homonymous reducer over the entire document and output it by default
12
+ # You can call other reducers by using the `reduce` method, like this:
13
+ #
14
+ # reduce browser.search('td').first, using: MyOtherReducer
15
+ #
16
+ reduce_with_defaults
17
+ end
18
+
19
+ end
20
+
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe <%= state_class %> do
3
+ describe <%= navigator_class %> do
4
4
 
5
5
  pending "should ensure output has the right structure"
6
6
 
@@ -1,9 +1,9 @@
1
- class <%= parser_class %> < Crabfarm::BaseParser
1
+ class <%= reducer_class %> < Crabfarm::BaseReducer
2
2
 
3
- def parse
4
- # You can replace the following line after running the owner state specs once.
3
+ def run
4
+ # You can replace the following line after running the owner navigator specs once.
5
5
  # Take a look at the 'Testing' section of the README.md for more information!
6
- take_snapshot
6
+ take_snapshot_and_fail
7
7
  end
8
8
 
9
9
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe <%= parser_class %> do
3
+ describe <%= reducer_class %> do
4
4
 
5
5
  pending "should extract values from snapshot"
6
6
 
@@ -0,0 +1,12 @@
1
+ class <%= struct_class %> < Crabfarm::BaseStruct
2
+
3
+ # add some fields, some examples:
4
+ #
5
+ # has_string :a_string
6
+ # has_integer :an_integer_greater_than_8, greated_than: 8
7
+ # has_array :an_array
8
+ # has_field :misc_field
9
+ #
10
+
11
+ end
12
+
@@ -1,20 +1,33 @@
1
1
  module Crabfarm
2
2
  class TransitionService
3
3
 
4
- def self.apply_state(_context, _name, _params={})
5
- state_class = if _name.is_a? String or _name.is_a? Symbol
4
+ def self.transition(_context, _name, _params={})
5
+ self.new(_context).transition(_name, _params)
6
+ end
7
+
8
+ attr_reader :document, :navigator
9
+
10
+ def initialize(_context)
11
+ @context = _context
12
+ end
13
+
14
+ def transition(_name, _params={})
15
+ navigator_class = if _name.is_a? String or _name.is_a? Symbol
6
16
  load_class_from_uri _name
7
17
  else _name end
8
18
 
9
- _context.prepare
10
- state = state_class.new _context, _params
11
- state.crawl
12
- state
19
+ @context.prepare
20
+ @navigator = navigator_class.new @context, _params
21
+
22
+ @document = @navigator.run
23
+ @document = @document.as_json if @document.respond_to? :as_json
24
+
25
+ self
13
26
  end
14
27
 
15
28
  private
16
29
 
17
- def self.load_class_from_uri(_uri)
30
+ def load_class_from_uri(_uri)
18
31
  class_name = Utils::Naming.decode_crabfarm_uri _uri
19
32
  class_name.constantize
20
33
  end
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.2.5"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,43 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-27 00:00:00.000000000 Z
11
+ date: 2015-04-15 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: selenium-webdriver
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '2.45'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ~>
25
- - !ruby/object:Gem::Version
26
- version: '2.45'
27
- - !ruby/object:Gem::Dependency
28
- name: nokogiri
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ~>
32
- - !ruby/object:Gem::Version
33
- version: 1.6.6
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ~>
39
- - !ruby/object:Gem::Version
40
- version: 1.6.6
41
13
  - !ruby/object:Gem::Dependency
42
14
  name: activesupport
43
15
  requirement: !ruby/object:Gem::Requirement
@@ -156,6 +128,34 @@ dependencies:
156
128
  - - ~>
157
129
  - !ruby/object:Gem::Version
158
130
  version: 0.5.5
131
+ - !ruby/object:Gem::Dependency
132
+ name: selenium-webdriver
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ~>
136
+ - !ruby/object:Gem::Version
137
+ version: '2.45'
138
+ type: :development
139
+ prerelease: false
140
+ version_requirements: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ~>
143
+ - !ruby/object:Gem::Version
144
+ version: '2.45'
145
+ - !ruby/object:Gem::Dependency
146
+ name: nokogiri
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - ~>
150
+ - !ruby/object:Gem::Version
151
+ version: 1.6.6
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ~>
157
+ - !ruby/object:Gem::Version
158
+ version: 1.6.6
159
159
  - !ruby/object:Gem::Dependency
160
160
  name: bundler
161
161
  requirement: !ruby/object:Gem::Requirement
@@ -374,45 +374,46 @@ executables:
374
374
  extensions: []
375
375
  extra_rdoc_files: []
376
376
  files:
377
- - lib/crabfarm/adapters/browser/capybara.rb
378
- - lib/crabfarm/adapters/browser/surfer.rb
379
- - lib/crabfarm/adapters/browser/watir.rb
380
- - lib/crabfarm/adapters/output/hash.rb
381
- - lib/crabfarm/adapters/output/jbuilder.rb
382
- - lib/crabfarm/adapters/output/ostruct.rb
377
+ - lib/crabfarm/adapters/browser/abstract_webdriver.rb
378
+ - lib/crabfarm/adapters/browser/chrome.rb
379
+ - lib/crabfarm/adapters/browser/firefox.rb
380
+ - lib/crabfarm/adapters/browser/noop.rb
381
+ - lib/crabfarm/adapters/browser/phantom_js.rb
382
+ - lib/crabfarm/adapters/browser/remote_webdriver.rb
383
+ - lib/crabfarm/adapters/driver_wrapper/capybara.rb
384
+ - lib/crabfarm/adapters/driver_wrapper/surfer.rb
385
+ - lib/crabfarm/adapters/driver_wrapper/watir.rb
383
386
  - lib/crabfarm/adapters/parser/nokogiri.rb
384
387
  - lib/crabfarm/adapters/parser/pdf_reader.rb
385
388
  - lib/crabfarm/assertion/context.rb
389
+ - lib/crabfarm/assertion/fields.rb
386
390
  - lib/crabfarm/assertion/parsers.rb
387
391
  - lib/crabfarm/assertion/validations.rb
388
392
  - lib/crabfarm/assertion/wrapper.rb
389
- - lib/crabfarm/base_parser.rb
390
- - lib/crabfarm/base_state.rb
393
+ - lib/crabfarm/base_navigator.rb
394
+ - lib/crabfarm/base_reducer.rb
395
+ - lib/crabfarm/base_struct.rb
391
396
  - lib/crabfarm/cli.rb
392
397
  - lib/crabfarm/configuration.rb
393
398
  - lib/crabfarm/context.rb
394
399
  - lib/crabfarm/context_factory.rb
395
400
  - lib/crabfarm/crabtrap_context.rb
396
401
  - lib/crabfarm/crabtrap_runner.rb
397
- - lib/crabfarm/default_driver_factory.rb
398
- - lib/crabfarm/driver_bucket.rb
399
- - lib/crabfarm/driver_bucket_pool.rb
402
+ - lib/crabfarm/driver_pool.rb
400
403
  - lib/crabfarm/dsl/surfer/search_context.rb
401
404
  - lib/crabfarm/dsl/surfer/surf_context.rb
402
405
  - lib/crabfarm/dsl/surfer.rb
403
406
  - lib/crabfarm/engines/async_state_manager.rb
404
407
  - lib/crabfarm/engines/sync_state_manager.rb
405
408
  - lib/crabfarm/errors.rb
406
- - lib/crabfarm/forked_state.rb
409
+ - lib/crabfarm/forked_navigator.rb
407
410
  - lib/crabfarm/global_state.rb
408
411
  - lib/crabfarm/http_client.rb
409
- - lib/crabfarm/mocks/noop_driver.rb
410
412
  - lib/crabfarm/modes/console.rb
411
413
  - lib/crabfarm/modes/generator.rb
412
414
  - lib/crabfarm/modes/publisher.rb
413
415
  - lib/crabfarm/modes/recorder.rb
414
416
  - lib/crabfarm/modes/server.rb
415
- - lib/crabfarm/phantom_driver_factory.rb
416
417
  - lib/crabfarm/phantom_runner.rb
417
418
  - lib/crabfarm/rspec.rb
418
419
  - lib/crabfarm/state_store.rb
@@ -427,11 +428,12 @@ files:
427
428
  - lib/crabfarm/templates/dot_gitkeep.erb
428
429
  - lib/crabfarm/templates/dot_rspec.erb
429
430
  - lib/crabfarm/templates/Gemfile.erb
430
- - lib/crabfarm/templates/parser.rb.erb
431
- - lib/crabfarm/templates/parser_spec.rb.erb
431
+ - lib/crabfarm/templates/navigator.rb.erb
432
+ - lib/crabfarm/templates/navigator_spec.rb.erb
433
+ - lib/crabfarm/templates/reducer.rb.erb
434
+ - lib/crabfarm/templates/reducer_spec.rb.erb
432
435
  - lib/crabfarm/templates/spec_helper.rb.erb
433
- - lib/crabfarm/templates/state.rb.erb
434
- - lib/crabfarm/templates/state_spec.rb.erb
436
+ - lib/crabfarm/templates/struct.rb.erb
435
437
  - lib/crabfarm/transition_service.rb
436
438
  - lib/crabfarm/utils/naming.rb
437
439
  - lib/crabfarm/utils/port_discovery.rb
@@ -1,7 +0,0 @@
1
- module Crabfarm
2
- class CapybaraBrowserDsl
3
- def self.wrap(_bucket)
4
- raise NotImplementedError.new "Capybara adapter is incompleted"
5
- end
6
- end
7
- end
@@ -1,9 +0,0 @@
1
- require 'crabfarm/dsl/surfer'
2
-
3
- module Crabfarm
4
- class SurferBrowserDsl
5
- def self.wrap(_bucket)
6
- Crabfarm::Dsl::Surfer::SurfContext.new _bucket
7
- end
8
- end
9
- end
@@ -1,11 +0,0 @@
1
- module Crabfarm
2
- class HashOutputBuilder
3
- def self.prepare
4
- Hash.new
5
- end
6
-
7
- def self.serialize(_output)
8
- _output
9
- end
10
- end
11
- end
@@ -1,11 +0,0 @@
1
- module Crabfarm
2
- class JbuilderOutputBuilder
3
- def self.prepare
4
- Jbuilder.new
5
- end
6
-
7
- def self.serialize(_output)
8
- _output.attributes!
9
- end
10
- end
11
- end
@@ -1,14 +0,0 @@
1
- require 'ostruct'
2
-
3
- module Crabfarm
4
- class OStructOutputBuilder
5
- def self.prepare
6
- # TODO: maybe wrap open struct in a class that automatically generate other openstruct when nested properties are accessed
7
- OpenStruct.new
8
- end
9
-
10
- def self.serialize(_output)
11
- _output.to_h
12
- end
13
- end
14
- end
@@ -1,59 +0,0 @@
1
- require "crabfarm/assertion/context"
2
-
3
- module Crabfarm
4
- class BaseParser < Delegator
5
- include Assertion::Context
6
-
7
- attr_reader :params, :document
8
-
9
- def self.parser_engine(_engine=nil)
10
- @engine_name = _engine
11
- end
12
-
13
- def self.engine
14
- @engine ||= Strategies.load(:parser_engine, @engine_name || Crabfarm.config.parser_engine)
15
- end
16
-
17
- def self.snapshot_path(_name=nil)
18
- _name = self.to_s.underscore if _name.nil?
19
- File.join(GlobalState.snapshots_path, _name + '.' + engine.format)
20
- end
21
-
22
- def engine
23
- self.class.engine
24
- end
25
-
26
- def initialize(_target, _params)
27
- @parsed_data = engine.preprocess_parsing_target _target
28
- @document = engine.parse @parsed_data
29
- @params = _params
30
-
31
- super @document
32
- end
33
-
34
- def parse
35
- raise NotImplementedError.new
36
- end
37
-
38
- def take_snapshot(_name=nil)
39
- file_path = self.class.snapshot_path _name
40
-
41
- raise ArgumentError.new "Snapshot already exists '#{file_path}', make sure to implement the #{self.class.to_s} parse method." if File.exist? file_path
42
-
43
- dir_path = file_path.split(File::SEPARATOR)[0...-1]
44
- FileUtils.mkpath dir_path.join(File::SEPARATOR) if dir_path.length > 0
45
-
46
- File.write file_path, @parsed_data
47
- nil
48
- end
49
-
50
- def __getobj__
51
- @document
52
- end
53
-
54
- def __setobj__(obj)
55
- @document = obj
56
- end
57
-
58
- end
59
- end
@@ -1,112 +0,0 @@
1
- require 'thwait'
2
- require 'crabfarm/forked_state'
3
- require "crabfarm/assertion/context"
4
-
5
- module Crabfarm
6
- class BaseState
7
- include Assertion::Context
8
- extend Forwardable
9
-
10
- PARSE_METHOD_RX = /^parse_(.*)$/
11
-
12
- attr_reader :params, :output
13
-
14
- def_delegators '@context', :http
15
- def_delegators '@context.pool', :driver
16
- def_delegators '@context.store', :get, :fetch
17
-
18
- def self.browser_dsl(_dsl)
19
- @class_browser_dsl = _dsl
20
- end
21
-
22
- def self.output_builder(_builder)
23
- @class_output_builder = _builder
24
- end
25
-
26
- def initialize(_context, _params)
27
- @context = _context
28
- @params = _params
29
-
30
- @dsl = Strategies.load(:browser_dsl, class_browser_dsl || Crabfarm.config.browser_dsl)
31
- @builder = Strategies.load(:output_builder, class_output_builder || Crabfarm.config.output_builder)
32
- @output = @builder.prepare
33
- end
34
-
35
- def browser(_name=nil)
36
- @dsl.wrap driver(_name)
37
- end
38
-
39
- def download(_url)
40
- @context.http.get(_url).body
41
- end
42
-
43
- def output
44
- @output
45
- end
46
-
47
- def output_as_json
48
- @builder.serialize @output
49
- end
50
-
51
- def crawl
52
- raise NotImplementedError.new
53
- end
54
-
55
- def parse(_target=nil, _options={})
56
- parser_class = _options.delete :using
57
-
58
- if parser_class.nil?
59
- parser_class = (self.class.name + 'Parser').constantize
60
- end
61
-
62
- parser = parser_class.new _target, @params.merge(_options)
63
- parser.parse
64
- return parser
65
- end
66
-
67
- def fork_each(_enumerator, &_block)
68
- session_id = 0
69
- mutex = Mutex.new
70
- ths = _enumerator.map do |value|
71
- session_id += 1
72
- start_forked_state("th_session_#{session_id}", value, _block, mutex)
73
- end
74
- ThreadsWait.all_waits(*ths)
75
- end
76
-
77
- def method_missing(_method, *_args, &_block)
78
- m = PARSE_METHOD_RX.match(_method)
79
- if m
80
- options = _args[1] || {}
81
- options[:using] = (m[1].camelize + 'Parser').constantize
82
- parse _args[0], options
83
- else super end
84
- end
85
-
86
- def respond_to?(_method, _include_all=false)
87
- return true if PARSE_METHOD_RX === _method
88
- super
89
- end
90
-
91
- private
92
-
93
- def class_browser_dsl
94
- self.class.instance_variable_get :@class_browser_dsl
95
- end
96
-
97
- def class_output_builder
98
- self.class.instance_variable_get :@class_output_builder
99
- end
100
-
101
- def start_forked_state(_name, _value, _block, _mutex)
102
- Thread.new {
103
- sub_state = ForkedState.new self, _name, _mutex
104
- begin
105
- sub_state.instance_exec _value, &_block
106
- ensure
107
- sub_state.driver.reset
108
- end
109
- }
110
- end
111
- end
112
- end