crabfarm 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/crabfarm.rb +17 -18
- data/lib/crabfarm/adapters/browser/abstract_webdriver.rb +60 -0
- data/lib/crabfarm/adapters/browser/chrome.rb +24 -0
- data/lib/crabfarm/adapters/browser/firefox.rb +26 -0
- data/lib/crabfarm/adapters/browser/noop.rb +25 -0
- data/lib/crabfarm/adapters/browser/phantom_js.rb +59 -0
- data/lib/crabfarm/adapters/browser/remote_webdriver.rb +31 -0
- data/lib/crabfarm/adapters/driver_wrapper/capybara.rb +11 -0
- data/lib/crabfarm/adapters/driver_wrapper/surfer.rb +13 -0
- data/lib/crabfarm/adapters/{browser → driver_wrapper}/watir.rb +7 -3
- data/lib/crabfarm/adapters/parser/nokogiri.rb +17 -15
- data/lib/crabfarm/adapters/parser/pdf_reader.rb +14 -12
- data/lib/crabfarm/assertion/fields.rb +85 -0
- data/lib/crabfarm/base_navigator.rb +78 -0
- data/lib/crabfarm/base_reducer.rb +68 -0
- data/lib/crabfarm/base_struct.rb +17 -0
- data/lib/crabfarm/cli.rb +18 -8
- data/lib/crabfarm/configuration.rb +24 -51
- data/lib/crabfarm/context.rb +19 -43
- data/lib/crabfarm/crabtrap_context.rb +4 -11
- data/lib/crabfarm/driver_pool.rb +32 -0
- data/lib/crabfarm/dsl/surfer/surf_context.rb +5 -25
- data/lib/crabfarm/engines/async_state_manager.rb +1 -1
- data/lib/crabfarm/engines/sync_state_manager.rb +1 -1
- data/lib/crabfarm/forked_navigator.rb +31 -0
- data/lib/crabfarm/modes/console.rb +4 -4
- data/lib/crabfarm/modes/generator.rb +24 -11
- data/lib/crabfarm/rspec.rb +26 -24
- data/lib/crabfarm/strategies.rb +15 -9
- data/lib/crabfarm/templates/Crabfile.erb +21 -26
- data/lib/crabfarm/templates/Gemfile.erb +6 -0
- data/lib/crabfarm/templates/navigator.rb.erb +20 -0
- data/lib/crabfarm/templates/{state_spec.rb.erb → navigator_spec.rb.erb} +1 -1
- data/lib/crabfarm/templates/{parser.rb.erb → reducer.rb.erb} +4 -4
- data/lib/crabfarm/templates/{parser_spec.rb.erb → reducer_spec.rb.erb} +1 -1
- data/lib/crabfarm/templates/struct.rb.erb +12 -0
- data/lib/crabfarm/transition_service.rb +20 -7
- data/lib/crabfarm/version.rb +1 -1
- metadata +50 -48
- data/lib/crabfarm/adapters/browser/capybara.rb +0 -7
- data/lib/crabfarm/adapters/browser/surfer.rb +0 -9
- data/lib/crabfarm/adapters/output/hash.rb +0 -11
- data/lib/crabfarm/adapters/output/jbuilder.rb +0 -11
- data/lib/crabfarm/adapters/output/ostruct.rb +0 -14
- data/lib/crabfarm/base_parser.rb +0 -59
- data/lib/crabfarm/base_state.rb +0 -112
- data/lib/crabfarm/default_driver_factory.rb +0 -86
- data/lib/crabfarm/driver_bucket.rb +0 -42
- data/lib/crabfarm/driver_bucket_pool.rb +0 -26
- data/lib/crabfarm/forked_state.rb +0 -38
- data/lib/crabfarm/mocks/noop_driver.rb +0 -6
- data/lib/crabfarm/phantom_driver_factory.rb +0 -33
- data/lib/crabfarm/templates/state.rb.erb +0 -8
@@ -2,6 +2,12 @@ source 'https://rubygems.org'
|
|
2
2
|
|
3
3
|
gem "crabfarm", '<%= version %>'
|
4
4
|
|
5
|
+
# Comment this is not using the nokogiri default HTML parser
|
6
|
+
gem 'nokogiri', "~> 1.6.6"
|
7
|
+
|
8
|
+
# Comment this is not using a selenium webdriver based driver
|
9
|
+
gem "selenium-webdriver", "~> 2.45"
|
10
|
+
|
5
11
|
group :test do
|
6
12
|
gem "rspec-nc"
|
7
13
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class <%= navigator_class %> < Crabfarm::BaseNavigator
|
2
|
+
|
3
|
+
def run
|
4
|
+
<% if navigator_url.nil? %>
|
5
|
+
# replace the following by your navigation code:
|
6
|
+
raise NotImplementedError.new 'You must provide some navigation code for <%= navigator_class %>'
|
7
|
+
<% else %>
|
8
|
+
browser.goto '<%= navigator_url %>'
|
9
|
+
<% end %>
|
10
|
+
|
11
|
+
# Call the homonymous reducer over the entire document and output it by default
|
12
|
+
# You can call other reducers by using the `reduce` method, like this:
|
13
|
+
#
|
14
|
+
# reduce browser.search('td').first, using: MyOtherReducer
|
15
|
+
#
|
16
|
+
reduce_with_defaults
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
@@ -1,9 +1,9 @@
|
|
1
|
-
class <%=
|
1
|
+
class <%= reducer_class %> < Crabfarm::BaseReducer
|
2
2
|
|
3
|
-
def
|
4
|
-
# You can replace the following line after running the owner
|
3
|
+
def run
|
4
|
+
# You can replace the following line after running the owner navigator specs once.
|
5
5
|
# Take a look at the 'Testing' section of the README.md for more information!
|
6
|
-
|
6
|
+
take_snapshot_and_fail
|
7
7
|
end
|
8
8
|
|
9
9
|
end
|
@@ -1,20 +1,33 @@
|
|
1
1
|
module Crabfarm
|
2
2
|
class TransitionService
|
3
3
|
|
4
|
-
def self.
|
5
|
-
|
4
|
+
def self.transition(_context, _name, _params={})
|
5
|
+
self.new(_context).transition(_name, _params)
|
6
|
+
end
|
7
|
+
|
8
|
+
attr_reader :document, :navigator
|
9
|
+
|
10
|
+
def initialize(_context)
|
11
|
+
@context = _context
|
12
|
+
end
|
13
|
+
|
14
|
+
def transition(_name, _params={})
|
15
|
+
navigator_class = if _name.is_a? String or _name.is_a? Symbol
|
6
16
|
load_class_from_uri _name
|
7
17
|
else _name end
|
8
18
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
19
|
+
@context.prepare
|
20
|
+
@navigator = navigator_class.new @context, _params
|
21
|
+
|
22
|
+
@document = @navigator.run
|
23
|
+
@document = @document.as_json if @document.respond_to? :as_json
|
24
|
+
|
25
|
+
self
|
13
26
|
end
|
14
27
|
|
15
28
|
private
|
16
29
|
|
17
|
-
def
|
30
|
+
def load_class_from_uri(_uri)
|
18
31
|
class_name = Utils::Naming.decode_crabfarm_uri _uri
|
19
32
|
class_name.constantize
|
20
33
|
end
|
data/lib/crabfarm/version.rb
CHANGED
metadata
CHANGED
@@ -1,43 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crabfarm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ignacio Baixas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: selenium-webdriver
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '2.45'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ~>
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '2.45'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: nokogiri
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ~>
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 1.6.6
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ~>
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: 1.6.6
|
41
13
|
- !ruby/object:Gem::Dependency
|
42
14
|
name: activesupport
|
43
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -156,6 +128,34 @@ dependencies:
|
|
156
128
|
- - ~>
|
157
129
|
- !ruby/object:Gem::Version
|
158
130
|
version: 0.5.5
|
131
|
+
- !ruby/object:Gem::Dependency
|
132
|
+
name: selenium-webdriver
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - ~>
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '2.45'
|
138
|
+
type: :development
|
139
|
+
prerelease: false
|
140
|
+
version_requirements: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - ~>
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '2.45'
|
145
|
+
- !ruby/object:Gem::Dependency
|
146
|
+
name: nokogiri
|
147
|
+
requirement: !ruby/object:Gem::Requirement
|
148
|
+
requirements:
|
149
|
+
- - ~>
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: 1.6.6
|
152
|
+
type: :development
|
153
|
+
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ~>
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: 1.6.6
|
159
159
|
- !ruby/object:Gem::Dependency
|
160
160
|
name: bundler
|
161
161
|
requirement: !ruby/object:Gem::Requirement
|
@@ -374,45 +374,46 @@ executables:
|
|
374
374
|
extensions: []
|
375
375
|
extra_rdoc_files: []
|
376
376
|
files:
|
377
|
-
- lib/crabfarm/adapters/browser/
|
378
|
-
- lib/crabfarm/adapters/browser/
|
379
|
-
- lib/crabfarm/adapters/browser/
|
380
|
-
- lib/crabfarm/adapters/
|
381
|
-
- lib/crabfarm/adapters/
|
382
|
-
- lib/crabfarm/adapters/
|
377
|
+
- lib/crabfarm/adapters/browser/abstract_webdriver.rb
|
378
|
+
- lib/crabfarm/adapters/browser/chrome.rb
|
379
|
+
- lib/crabfarm/adapters/browser/firefox.rb
|
380
|
+
- lib/crabfarm/adapters/browser/noop.rb
|
381
|
+
- lib/crabfarm/adapters/browser/phantom_js.rb
|
382
|
+
- lib/crabfarm/adapters/browser/remote_webdriver.rb
|
383
|
+
- lib/crabfarm/adapters/driver_wrapper/capybara.rb
|
384
|
+
- lib/crabfarm/adapters/driver_wrapper/surfer.rb
|
385
|
+
- lib/crabfarm/adapters/driver_wrapper/watir.rb
|
383
386
|
- lib/crabfarm/adapters/parser/nokogiri.rb
|
384
387
|
- lib/crabfarm/adapters/parser/pdf_reader.rb
|
385
388
|
- lib/crabfarm/assertion/context.rb
|
389
|
+
- lib/crabfarm/assertion/fields.rb
|
386
390
|
- lib/crabfarm/assertion/parsers.rb
|
387
391
|
- lib/crabfarm/assertion/validations.rb
|
388
392
|
- lib/crabfarm/assertion/wrapper.rb
|
389
|
-
- lib/crabfarm/
|
390
|
-
- lib/crabfarm/
|
393
|
+
- lib/crabfarm/base_navigator.rb
|
394
|
+
- lib/crabfarm/base_reducer.rb
|
395
|
+
- lib/crabfarm/base_struct.rb
|
391
396
|
- lib/crabfarm/cli.rb
|
392
397
|
- lib/crabfarm/configuration.rb
|
393
398
|
- lib/crabfarm/context.rb
|
394
399
|
- lib/crabfarm/context_factory.rb
|
395
400
|
- lib/crabfarm/crabtrap_context.rb
|
396
401
|
- lib/crabfarm/crabtrap_runner.rb
|
397
|
-
- lib/crabfarm/
|
398
|
-
- lib/crabfarm/driver_bucket.rb
|
399
|
-
- lib/crabfarm/driver_bucket_pool.rb
|
402
|
+
- lib/crabfarm/driver_pool.rb
|
400
403
|
- lib/crabfarm/dsl/surfer/search_context.rb
|
401
404
|
- lib/crabfarm/dsl/surfer/surf_context.rb
|
402
405
|
- lib/crabfarm/dsl/surfer.rb
|
403
406
|
- lib/crabfarm/engines/async_state_manager.rb
|
404
407
|
- lib/crabfarm/engines/sync_state_manager.rb
|
405
408
|
- lib/crabfarm/errors.rb
|
406
|
-
- lib/crabfarm/
|
409
|
+
- lib/crabfarm/forked_navigator.rb
|
407
410
|
- lib/crabfarm/global_state.rb
|
408
411
|
- lib/crabfarm/http_client.rb
|
409
|
-
- lib/crabfarm/mocks/noop_driver.rb
|
410
412
|
- lib/crabfarm/modes/console.rb
|
411
413
|
- lib/crabfarm/modes/generator.rb
|
412
414
|
- lib/crabfarm/modes/publisher.rb
|
413
415
|
- lib/crabfarm/modes/recorder.rb
|
414
416
|
- lib/crabfarm/modes/server.rb
|
415
|
-
- lib/crabfarm/phantom_driver_factory.rb
|
416
417
|
- lib/crabfarm/phantom_runner.rb
|
417
418
|
- lib/crabfarm/rspec.rb
|
418
419
|
- lib/crabfarm/state_store.rb
|
@@ -427,11 +428,12 @@ files:
|
|
427
428
|
- lib/crabfarm/templates/dot_gitkeep.erb
|
428
429
|
- lib/crabfarm/templates/dot_rspec.erb
|
429
430
|
- lib/crabfarm/templates/Gemfile.erb
|
430
|
-
- lib/crabfarm/templates/
|
431
|
-
- lib/crabfarm/templates/
|
431
|
+
- lib/crabfarm/templates/navigator.rb.erb
|
432
|
+
- lib/crabfarm/templates/navigator_spec.rb.erb
|
433
|
+
- lib/crabfarm/templates/reducer.rb.erb
|
434
|
+
- lib/crabfarm/templates/reducer_spec.rb.erb
|
432
435
|
- lib/crabfarm/templates/spec_helper.rb.erb
|
433
|
-
- lib/crabfarm/templates/
|
434
|
-
- lib/crabfarm/templates/state_spec.rb.erb
|
436
|
+
- lib/crabfarm/templates/struct.rb.erb
|
435
437
|
- lib/crabfarm/transition_service.rb
|
436
438
|
- lib/crabfarm/utils/naming.rb
|
437
439
|
- lib/crabfarm/utils/port_discovery.rb
|
@@ -1,14 +0,0 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
|
3
|
-
module Crabfarm
|
4
|
-
class OStructOutputBuilder
|
5
|
-
def self.prepare
|
6
|
-
# TODO: maybe wrap open struct in a class that automatically generate other openstruct when nested properties are accessed
|
7
|
-
OpenStruct.new
|
8
|
-
end
|
9
|
-
|
10
|
-
def self.serialize(_output)
|
11
|
-
_output.to_h
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
data/lib/crabfarm/base_parser.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
require "crabfarm/assertion/context"
|
2
|
-
|
3
|
-
module Crabfarm
|
4
|
-
class BaseParser < Delegator
|
5
|
-
include Assertion::Context
|
6
|
-
|
7
|
-
attr_reader :params, :document
|
8
|
-
|
9
|
-
def self.parser_engine(_engine=nil)
|
10
|
-
@engine_name = _engine
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.engine
|
14
|
-
@engine ||= Strategies.load(:parser_engine, @engine_name || Crabfarm.config.parser_engine)
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.snapshot_path(_name=nil)
|
18
|
-
_name = self.to_s.underscore if _name.nil?
|
19
|
-
File.join(GlobalState.snapshots_path, _name + '.' + engine.format)
|
20
|
-
end
|
21
|
-
|
22
|
-
def engine
|
23
|
-
self.class.engine
|
24
|
-
end
|
25
|
-
|
26
|
-
def initialize(_target, _params)
|
27
|
-
@parsed_data = engine.preprocess_parsing_target _target
|
28
|
-
@document = engine.parse @parsed_data
|
29
|
-
@params = _params
|
30
|
-
|
31
|
-
super @document
|
32
|
-
end
|
33
|
-
|
34
|
-
def parse
|
35
|
-
raise NotImplementedError.new
|
36
|
-
end
|
37
|
-
|
38
|
-
def take_snapshot(_name=nil)
|
39
|
-
file_path = self.class.snapshot_path _name
|
40
|
-
|
41
|
-
raise ArgumentError.new "Snapshot already exists '#{file_path}', make sure to implement the #{self.class.to_s} parse method." if File.exist? file_path
|
42
|
-
|
43
|
-
dir_path = file_path.split(File::SEPARATOR)[0...-1]
|
44
|
-
FileUtils.mkpath dir_path.join(File::SEPARATOR) if dir_path.length > 0
|
45
|
-
|
46
|
-
File.write file_path, @parsed_data
|
47
|
-
nil
|
48
|
-
end
|
49
|
-
|
50
|
-
def __getobj__
|
51
|
-
@document
|
52
|
-
end
|
53
|
-
|
54
|
-
def __setobj__(obj)
|
55
|
-
@document = obj
|
56
|
-
end
|
57
|
-
|
58
|
-
end
|
59
|
-
end
|
data/lib/crabfarm/base_state.rb
DELETED
@@ -1,112 +0,0 @@
|
|
1
|
-
require 'thwait'
|
2
|
-
require 'crabfarm/forked_state'
|
3
|
-
require "crabfarm/assertion/context"
|
4
|
-
|
5
|
-
module Crabfarm
|
6
|
-
class BaseState
|
7
|
-
include Assertion::Context
|
8
|
-
extend Forwardable
|
9
|
-
|
10
|
-
PARSE_METHOD_RX = /^parse_(.*)$/
|
11
|
-
|
12
|
-
attr_reader :params, :output
|
13
|
-
|
14
|
-
def_delegators '@context', :http
|
15
|
-
def_delegators '@context.pool', :driver
|
16
|
-
def_delegators '@context.store', :get, :fetch
|
17
|
-
|
18
|
-
def self.browser_dsl(_dsl)
|
19
|
-
@class_browser_dsl = _dsl
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.output_builder(_builder)
|
23
|
-
@class_output_builder = _builder
|
24
|
-
end
|
25
|
-
|
26
|
-
def initialize(_context, _params)
|
27
|
-
@context = _context
|
28
|
-
@params = _params
|
29
|
-
|
30
|
-
@dsl = Strategies.load(:browser_dsl, class_browser_dsl || Crabfarm.config.browser_dsl)
|
31
|
-
@builder = Strategies.load(:output_builder, class_output_builder || Crabfarm.config.output_builder)
|
32
|
-
@output = @builder.prepare
|
33
|
-
end
|
34
|
-
|
35
|
-
def browser(_name=nil)
|
36
|
-
@dsl.wrap driver(_name)
|
37
|
-
end
|
38
|
-
|
39
|
-
def download(_url)
|
40
|
-
@context.http.get(_url).body
|
41
|
-
end
|
42
|
-
|
43
|
-
def output
|
44
|
-
@output
|
45
|
-
end
|
46
|
-
|
47
|
-
def output_as_json
|
48
|
-
@builder.serialize @output
|
49
|
-
end
|
50
|
-
|
51
|
-
def crawl
|
52
|
-
raise NotImplementedError.new
|
53
|
-
end
|
54
|
-
|
55
|
-
def parse(_target=nil, _options={})
|
56
|
-
parser_class = _options.delete :using
|
57
|
-
|
58
|
-
if parser_class.nil?
|
59
|
-
parser_class = (self.class.name + 'Parser').constantize
|
60
|
-
end
|
61
|
-
|
62
|
-
parser = parser_class.new _target, @params.merge(_options)
|
63
|
-
parser.parse
|
64
|
-
return parser
|
65
|
-
end
|
66
|
-
|
67
|
-
def fork_each(_enumerator, &_block)
|
68
|
-
session_id = 0
|
69
|
-
mutex = Mutex.new
|
70
|
-
ths = _enumerator.map do |value|
|
71
|
-
session_id += 1
|
72
|
-
start_forked_state("th_session_#{session_id}", value, _block, mutex)
|
73
|
-
end
|
74
|
-
ThreadsWait.all_waits(*ths)
|
75
|
-
end
|
76
|
-
|
77
|
-
def method_missing(_method, *_args, &_block)
|
78
|
-
m = PARSE_METHOD_RX.match(_method)
|
79
|
-
if m
|
80
|
-
options = _args[1] || {}
|
81
|
-
options[:using] = (m[1].camelize + 'Parser').constantize
|
82
|
-
parse _args[0], options
|
83
|
-
else super end
|
84
|
-
end
|
85
|
-
|
86
|
-
def respond_to?(_method, _include_all=false)
|
87
|
-
return true if PARSE_METHOD_RX === _method
|
88
|
-
super
|
89
|
-
end
|
90
|
-
|
91
|
-
private
|
92
|
-
|
93
|
-
def class_browser_dsl
|
94
|
-
self.class.instance_variable_get :@class_browser_dsl
|
95
|
-
end
|
96
|
-
|
97
|
-
def class_output_builder
|
98
|
-
self.class.instance_variable_get :@class_output_builder
|
99
|
-
end
|
100
|
-
|
101
|
-
def start_forked_state(_name, _value, _block, _mutex)
|
102
|
-
Thread.new {
|
103
|
-
sub_state = ForkedState.new self, _name, _mutex
|
104
|
-
begin
|
105
|
-
sub_state.instance_exec _value, &_block
|
106
|
-
ensure
|
107
|
-
sub_state.driver.reset
|
108
|
-
end
|
109
|
-
}
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|