crabfarm 0.2.5 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/lib/crabfarm.rb +17 -18
  3. data/lib/crabfarm/adapters/browser/abstract_webdriver.rb +60 -0
  4. data/lib/crabfarm/adapters/browser/chrome.rb +24 -0
  5. data/lib/crabfarm/adapters/browser/firefox.rb +26 -0
  6. data/lib/crabfarm/adapters/browser/noop.rb +25 -0
  7. data/lib/crabfarm/adapters/browser/phantom_js.rb +59 -0
  8. data/lib/crabfarm/adapters/browser/remote_webdriver.rb +31 -0
  9. data/lib/crabfarm/adapters/driver_wrapper/capybara.rb +11 -0
  10. data/lib/crabfarm/adapters/driver_wrapper/surfer.rb +13 -0
  11. data/lib/crabfarm/adapters/{browser → driver_wrapper}/watir.rb +7 -3
  12. data/lib/crabfarm/adapters/parser/nokogiri.rb +17 -15
  13. data/lib/crabfarm/adapters/parser/pdf_reader.rb +14 -12
  14. data/lib/crabfarm/assertion/fields.rb +85 -0
  15. data/lib/crabfarm/base_navigator.rb +78 -0
  16. data/lib/crabfarm/base_reducer.rb +68 -0
  17. data/lib/crabfarm/base_struct.rb +17 -0
  18. data/lib/crabfarm/cli.rb +18 -8
  19. data/lib/crabfarm/configuration.rb +24 -51
  20. data/lib/crabfarm/context.rb +19 -43
  21. data/lib/crabfarm/crabtrap_context.rb +4 -11
  22. data/lib/crabfarm/driver_pool.rb +32 -0
  23. data/lib/crabfarm/dsl/surfer/surf_context.rb +5 -25
  24. data/lib/crabfarm/engines/async_state_manager.rb +1 -1
  25. data/lib/crabfarm/engines/sync_state_manager.rb +1 -1
  26. data/lib/crabfarm/forked_navigator.rb +31 -0
  27. data/lib/crabfarm/modes/console.rb +4 -4
  28. data/lib/crabfarm/modes/generator.rb +24 -11
  29. data/lib/crabfarm/rspec.rb +26 -24
  30. data/lib/crabfarm/strategies.rb +15 -9
  31. data/lib/crabfarm/templates/Crabfile.erb +21 -26
  32. data/lib/crabfarm/templates/Gemfile.erb +6 -0
  33. data/lib/crabfarm/templates/navigator.rb.erb +20 -0
  34. data/lib/crabfarm/templates/{state_spec.rb.erb → navigator_spec.rb.erb} +1 -1
  35. data/lib/crabfarm/templates/{parser.rb.erb → reducer.rb.erb} +4 -4
  36. data/lib/crabfarm/templates/{parser_spec.rb.erb → reducer_spec.rb.erb} +1 -1
  37. data/lib/crabfarm/templates/struct.rb.erb +12 -0
  38. data/lib/crabfarm/transition_service.rb +20 -7
  39. data/lib/crabfarm/version.rb +1 -1
  40. metadata +50 -48
  41. data/lib/crabfarm/adapters/browser/capybara.rb +0 -7
  42. data/lib/crabfarm/adapters/browser/surfer.rb +0 -9
  43. data/lib/crabfarm/adapters/output/hash.rb +0 -11
  44. data/lib/crabfarm/adapters/output/jbuilder.rb +0 -11
  45. data/lib/crabfarm/adapters/output/ostruct.rb +0 -14
  46. data/lib/crabfarm/base_parser.rb +0 -59
  47. data/lib/crabfarm/base_state.rb +0 -112
  48. data/lib/crabfarm/default_driver_factory.rb +0 -86
  49. data/lib/crabfarm/driver_bucket.rb +0 -42
  50. data/lib/crabfarm/driver_bucket_pool.rb +0 -26
  51. data/lib/crabfarm/forked_state.rb +0 -38
  52. data/lib/crabfarm/mocks/noop_driver.rb +0 -6
  53. data/lib/crabfarm/phantom_driver_factory.rb +0 -33
  54. data/lib/crabfarm/templates/state.rb.erb +0 -8
@@ -2,6 +2,12 @@ source 'https://rubygems.org'
2
2
 
3
3
  gem "crabfarm", '<%= version %>'
4
4
 
5
+ # Comment this is not using the nokogiri default HTML parser
6
+ gem 'nokogiri', "~> 1.6.6"
7
+
8
+ # Comment this is not using a selenium webdriver based driver
9
+ gem "selenium-webdriver", "~> 2.45"
10
+
5
11
  group :test do
6
12
  gem "rspec-nc"
7
13
  end
@@ -0,0 +1,20 @@
1
+ class <%= navigator_class %> < Crabfarm::BaseNavigator
2
+
3
+ def run
4
+ <% if navigator_url.nil? %>
5
+ # replace the following by your navigation code:
6
+ raise NotImplementedError.new 'You must provide some navigation code for <%= navigator_class %>'
7
+ <% else %>
8
+ browser.goto '<%= navigator_url %>'
9
+ <% end %>
10
+
11
+ # Call the homonymous reducer over the entire document and output it by default
12
+ # You can call other reducers by using the `reduce` method, like this:
13
+ #
14
+ # reduce browser.search('td').first, using: MyOtherReducer
15
+ #
16
+ reduce_with_defaults
17
+ end
18
+
19
+ end
20
+
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe <%= state_class %> do
3
+ describe <%= navigator_class %> do
4
4
 
5
5
  pending "should ensure output has the right structure"
6
6
 
@@ -1,9 +1,9 @@
1
- class <%= parser_class %> < Crabfarm::BaseParser
1
+ class <%= reducer_class %> < Crabfarm::BaseReducer
2
2
 
3
- def parse
4
- # You can replace the following line after running the owner state specs once.
3
+ def run
4
+ # You can replace the following line after running the owner navigator specs once.
5
5
  # Take a look at the 'Testing' section of the README.md for more information!
6
- take_snapshot
6
+ take_snapshot_and_fail
7
7
  end
8
8
 
9
9
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe <%= parser_class %> do
3
+ describe <%= reducer_class %> do
4
4
 
5
5
  pending "should extract values from snapshot"
6
6
 
@@ -0,0 +1,12 @@
1
+ class <%= struct_class %> < Crabfarm::BaseStruct
2
+
3
+ # add some fields, some examples:
4
+ #
5
+ # has_string :a_string
6
+ # has_integer :an_integer_greater_than_8, greated_than: 8
7
+ # has_array :an_array
8
+ # has_field :misc_field
9
+ #
10
+
11
+ end
12
+
@@ -1,20 +1,33 @@
1
1
  module Crabfarm
2
2
  class TransitionService
3
3
 
4
- def self.apply_state(_context, _name, _params={})
5
- state_class = if _name.is_a? String or _name.is_a? Symbol
4
+ def self.transition(_context, _name, _params={})
5
+ self.new(_context).transition(_name, _params)
6
+ end
7
+
8
+ attr_reader :document, :navigator
9
+
10
+ def initialize(_context)
11
+ @context = _context
12
+ end
13
+
14
+ def transition(_name, _params={})
15
+ navigator_class = if _name.is_a? String or _name.is_a? Symbol
6
16
  load_class_from_uri _name
7
17
  else _name end
8
18
 
9
- _context.prepare
10
- state = state_class.new _context, _params
11
- state.crawl
12
- state
19
+ @context.prepare
20
+ @navigator = navigator_class.new @context, _params
21
+
22
+ @document = @navigator.run
23
+ @document = @document.as_json if @document.respond_to? :as_json
24
+
25
+ self
13
26
  end
14
27
 
15
28
  private
16
29
 
17
- def self.load_class_from_uri(_uri)
30
+ def load_class_from_uri(_uri)
18
31
  class_name = Utils::Naming.decode_crabfarm_uri _uri
19
32
  class_name.constantize
20
33
  end
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.2.5"
2
+ VERSION = "0.3.0"
3
3
  end
metadata CHANGED
@@ -1,43 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-27 00:00:00.000000000 Z
11
+ date: 2015-04-15 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: selenium-webdriver
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '2.45'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ~>
25
- - !ruby/object:Gem::Version
26
- version: '2.45'
27
- - !ruby/object:Gem::Dependency
28
- name: nokogiri
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ~>
32
- - !ruby/object:Gem::Version
33
- version: 1.6.6
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ~>
39
- - !ruby/object:Gem::Version
40
- version: 1.6.6
41
13
  - !ruby/object:Gem::Dependency
42
14
  name: activesupport
43
15
  requirement: !ruby/object:Gem::Requirement
@@ -156,6 +128,34 @@ dependencies:
156
128
  - - ~>
157
129
  - !ruby/object:Gem::Version
158
130
  version: 0.5.5
131
+ - !ruby/object:Gem::Dependency
132
+ name: selenium-webdriver
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ~>
136
+ - !ruby/object:Gem::Version
137
+ version: '2.45'
138
+ type: :development
139
+ prerelease: false
140
+ version_requirements: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ~>
143
+ - !ruby/object:Gem::Version
144
+ version: '2.45'
145
+ - !ruby/object:Gem::Dependency
146
+ name: nokogiri
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - ~>
150
+ - !ruby/object:Gem::Version
151
+ version: 1.6.6
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - ~>
157
+ - !ruby/object:Gem::Version
158
+ version: 1.6.6
159
159
  - !ruby/object:Gem::Dependency
160
160
  name: bundler
161
161
  requirement: !ruby/object:Gem::Requirement
@@ -374,45 +374,46 @@ executables:
374
374
  extensions: []
375
375
  extra_rdoc_files: []
376
376
  files:
377
- - lib/crabfarm/adapters/browser/capybara.rb
378
- - lib/crabfarm/adapters/browser/surfer.rb
379
- - lib/crabfarm/adapters/browser/watir.rb
380
- - lib/crabfarm/adapters/output/hash.rb
381
- - lib/crabfarm/adapters/output/jbuilder.rb
382
- - lib/crabfarm/adapters/output/ostruct.rb
377
+ - lib/crabfarm/adapters/browser/abstract_webdriver.rb
378
+ - lib/crabfarm/adapters/browser/chrome.rb
379
+ - lib/crabfarm/adapters/browser/firefox.rb
380
+ - lib/crabfarm/adapters/browser/noop.rb
381
+ - lib/crabfarm/adapters/browser/phantom_js.rb
382
+ - lib/crabfarm/adapters/browser/remote_webdriver.rb
383
+ - lib/crabfarm/adapters/driver_wrapper/capybara.rb
384
+ - lib/crabfarm/adapters/driver_wrapper/surfer.rb
385
+ - lib/crabfarm/adapters/driver_wrapper/watir.rb
383
386
  - lib/crabfarm/adapters/parser/nokogiri.rb
384
387
  - lib/crabfarm/adapters/parser/pdf_reader.rb
385
388
  - lib/crabfarm/assertion/context.rb
389
+ - lib/crabfarm/assertion/fields.rb
386
390
  - lib/crabfarm/assertion/parsers.rb
387
391
  - lib/crabfarm/assertion/validations.rb
388
392
  - lib/crabfarm/assertion/wrapper.rb
389
- - lib/crabfarm/base_parser.rb
390
- - lib/crabfarm/base_state.rb
393
+ - lib/crabfarm/base_navigator.rb
394
+ - lib/crabfarm/base_reducer.rb
395
+ - lib/crabfarm/base_struct.rb
391
396
  - lib/crabfarm/cli.rb
392
397
  - lib/crabfarm/configuration.rb
393
398
  - lib/crabfarm/context.rb
394
399
  - lib/crabfarm/context_factory.rb
395
400
  - lib/crabfarm/crabtrap_context.rb
396
401
  - lib/crabfarm/crabtrap_runner.rb
397
- - lib/crabfarm/default_driver_factory.rb
398
- - lib/crabfarm/driver_bucket.rb
399
- - lib/crabfarm/driver_bucket_pool.rb
402
+ - lib/crabfarm/driver_pool.rb
400
403
  - lib/crabfarm/dsl/surfer/search_context.rb
401
404
  - lib/crabfarm/dsl/surfer/surf_context.rb
402
405
  - lib/crabfarm/dsl/surfer.rb
403
406
  - lib/crabfarm/engines/async_state_manager.rb
404
407
  - lib/crabfarm/engines/sync_state_manager.rb
405
408
  - lib/crabfarm/errors.rb
406
- - lib/crabfarm/forked_state.rb
409
+ - lib/crabfarm/forked_navigator.rb
407
410
  - lib/crabfarm/global_state.rb
408
411
  - lib/crabfarm/http_client.rb
409
- - lib/crabfarm/mocks/noop_driver.rb
410
412
  - lib/crabfarm/modes/console.rb
411
413
  - lib/crabfarm/modes/generator.rb
412
414
  - lib/crabfarm/modes/publisher.rb
413
415
  - lib/crabfarm/modes/recorder.rb
414
416
  - lib/crabfarm/modes/server.rb
415
- - lib/crabfarm/phantom_driver_factory.rb
416
417
  - lib/crabfarm/phantom_runner.rb
417
418
  - lib/crabfarm/rspec.rb
418
419
  - lib/crabfarm/state_store.rb
@@ -427,11 +428,12 @@ files:
427
428
  - lib/crabfarm/templates/dot_gitkeep.erb
428
429
  - lib/crabfarm/templates/dot_rspec.erb
429
430
  - lib/crabfarm/templates/Gemfile.erb
430
- - lib/crabfarm/templates/parser.rb.erb
431
- - lib/crabfarm/templates/parser_spec.rb.erb
431
+ - lib/crabfarm/templates/navigator.rb.erb
432
+ - lib/crabfarm/templates/navigator_spec.rb.erb
433
+ - lib/crabfarm/templates/reducer.rb.erb
434
+ - lib/crabfarm/templates/reducer_spec.rb.erb
432
435
  - lib/crabfarm/templates/spec_helper.rb.erb
433
- - lib/crabfarm/templates/state.rb.erb
434
- - lib/crabfarm/templates/state_spec.rb.erb
436
+ - lib/crabfarm/templates/struct.rb.erb
435
437
  - lib/crabfarm/transition_service.rb
436
438
  - lib/crabfarm/utils/naming.rb
437
439
  - lib/crabfarm/utils/port_discovery.rb
@@ -1,7 +0,0 @@
1
- module Crabfarm
2
- class CapybaraBrowserDsl
3
- def self.wrap(_bucket)
4
- raise NotImplementedError.new "Capybara adapter is incompleted"
5
- end
6
- end
7
- end
@@ -1,9 +0,0 @@
1
- require 'crabfarm/dsl/surfer'
2
-
3
- module Crabfarm
4
- class SurferBrowserDsl
5
- def self.wrap(_bucket)
6
- Crabfarm::Dsl::Surfer::SurfContext.new _bucket
7
- end
8
- end
9
- end
@@ -1,11 +0,0 @@
1
- module Crabfarm
2
- class HashOutputBuilder
3
- def self.prepare
4
- Hash.new
5
- end
6
-
7
- def self.serialize(_output)
8
- _output
9
- end
10
- end
11
- end
@@ -1,11 +0,0 @@
1
- module Crabfarm
2
- class JbuilderOutputBuilder
3
- def self.prepare
4
- Jbuilder.new
5
- end
6
-
7
- def self.serialize(_output)
8
- _output.attributes!
9
- end
10
- end
11
- end
@@ -1,14 +0,0 @@
1
- require 'ostruct'
2
-
3
- module Crabfarm
4
- class OStructOutputBuilder
5
- def self.prepare
6
- # TODO: maybe wrap open struct in a class that automatically generate other openstruct when nested properties are accessed
7
- OpenStruct.new
8
- end
9
-
10
- def self.serialize(_output)
11
- _output.to_h
12
- end
13
- end
14
- end
@@ -1,59 +0,0 @@
1
- require "crabfarm/assertion/context"
2
-
3
- module Crabfarm
4
- class BaseParser < Delegator
5
- include Assertion::Context
6
-
7
- attr_reader :params, :document
8
-
9
- def self.parser_engine(_engine=nil)
10
- @engine_name = _engine
11
- end
12
-
13
- def self.engine
14
- @engine ||= Strategies.load(:parser_engine, @engine_name || Crabfarm.config.parser_engine)
15
- end
16
-
17
- def self.snapshot_path(_name=nil)
18
- _name = self.to_s.underscore if _name.nil?
19
- File.join(GlobalState.snapshots_path, _name + '.' + engine.format)
20
- end
21
-
22
- def engine
23
- self.class.engine
24
- end
25
-
26
- def initialize(_target, _params)
27
- @parsed_data = engine.preprocess_parsing_target _target
28
- @document = engine.parse @parsed_data
29
- @params = _params
30
-
31
- super @document
32
- end
33
-
34
- def parse
35
- raise NotImplementedError.new
36
- end
37
-
38
- def take_snapshot(_name=nil)
39
- file_path = self.class.snapshot_path _name
40
-
41
- raise ArgumentError.new "Snapshot already exists '#{file_path}', make sure to implement the #{self.class.to_s} parse method." if File.exist? file_path
42
-
43
- dir_path = file_path.split(File::SEPARATOR)[0...-1]
44
- FileUtils.mkpath dir_path.join(File::SEPARATOR) if dir_path.length > 0
45
-
46
- File.write file_path, @parsed_data
47
- nil
48
- end
49
-
50
- def __getobj__
51
- @document
52
- end
53
-
54
- def __setobj__(obj)
55
- @document = obj
56
- end
57
-
58
- end
59
- end
@@ -1,112 +0,0 @@
1
- require 'thwait'
2
- require 'crabfarm/forked_state'
3
- require "crabfarm/assertion/context"
4
-
5
- module Crabfarm
6
- class BaseState
7
- include Assertion::Context
8
- extend Forwardable
9
-
10
- PARSE_METHOD_RX = /^parse_(.*)$/
11
-
12
- attr_reader :params, :output
13
-
14
- def_delegators '@context', :http
15
- def_delegators '@context.pool', :driver
16
- def_delegators '@context.store', :get, :fetch
17
-
18
- def self.browser_dsl(_dsl)
19
- @class_browser_dsl = _dsl
20
- end
21
-
22
- def self.output_builder(_builder)
23
- @class_output_builder = _builder
24
- end
25
-
26
- def initialize(_context, _params)
27
- @context = _context
28
- @params = _params
29
-
30
- @dsl = Strategies.load(:browser_dsl, class_browser_dsl || Crabfarm.config.browser_dsl)
31
- @builder = Strategies.load(:output_builder, class_output_builder || Crabfarm.config.output_builder)
32
- @output = @builder.prepare
33
- end
34
-
35
- def browser(_name=nil)
36
- @dsl.wrap driver(_name)
37
- end
38
-
39
- def download(_url)
40
- @context.http.get(_url).body
41
- end
42
-
43
- def output
44
- @output
45
- end
46
-
47
- def output_as_json
48
- @builder.serialize @output
49
- end
50
-
51
- def crawl
52
- raise NotImplementedError.new
53
- end
54
-
55
- def parse(_target=nil, _options={})
56
- parser_class = _options.delete :using
57
-
58
- if parser_class.nil?
59
- parser_class = (self.class.name + 'Parser').constantize
60
- end
61
-
62
- parser = parser_class.new _target, @params.merge(_options)
63
- parser.parse
64
- return parser
65
- end
66
-
67
- def fork_each(_enumerator, &_block)
68
- session_id = 0
69
- mutex = Mutex.new
70
- ths = _enumerator.map do |value|
71
- session_id += 1
72
- start_forked_state("th_session_#{session_id}", value, _block, mutex)
73
- end
74
- ThreadsWait.all_waits(*ths)
75
- end
76
-
77
- def method_missing(_method, *_args, &_block)
78
- m = PARSE_METHOD_RX.match(_method)
79
- if m
80
- options = _args[1] || {}
81
- options[:using] = (m[1].camelize + 'Parser').constantize
82
- parse _args[0], options
83
- else super end
84
- end
85
-
86
- def respond_to?(_method, _include_all=false)
87
- return true if PARSE_METHOD_RX === _method
88
- super
89
- end
90
-
91
- private
92
-
93
- def class_browser_dsl
94
- self.class.instance_variable_get :@class_browser_dsl
95
- end
96
-
97
- def class_output_builder
98
- self.class.instance_variable_get :@class_output_builder
99
- end
100
-
101
- def start_forked_state(_name, _value, _block, _mutex)
102
- Thread.new {
103
- sub_state = ForkedState.new self, _name, _mutex
104
- begin
105
- sub_state.instance_exec _value, &_block
106
- ensure
107
- sub_state.driver.reset
108
- end
109
- }
110
- end
111
- end
112
- end