crabfarm 0.2.5 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/crabfarm.rb +17 -18
- data/lib/crabfarm/adapters/browser/abstract_webdriver.rb +60 -0
- data/lib/crabfarm/adapters/browser/chrome.rb +24 -0
- data/lib/crabfarm/adapters/browser/firefox.rb +26 -0
- data/lib/crabfarm/adapters/browser/noop.rb +25 -0
- data/lib/crabfarm/adapters/browser/phantom_js.rb +59 -0
- data/lib/crabfarm/adapters/browser/remote_webdriver.rb +31 -0
- data/lib/crabfarm/adapters/driver_wrapper/capybara.rb +11 -0
- data/lib/crabfarm/adapters/driver_wrapper/surfer.rb +13 -0
- data/lib/crabfarm/adapters/{browser → driver_wrapper}/watir.rb +7 -3
- data/lib/crabfarm/adapters/parser/nokogiri.rb +17 -15
- data/lib/crabfarm/adapters/parser/pdf_reader.rb +14 -12
- data/lib/crabfarm/assertion/fields.rb +85 -0
- data/lib/crabfarm/base_navigator.rb +78 -0
- data/lib/crabfarm/base_reducer.rb +68 -0
- data/lib/crabfarm/base_struct.rb +17 -0
- data/lib/crabfarm/cli.rb +18 -8
- data/lib/crabfarm/configuration.rb +24 -51
- data/lib/crabfarm/context.rb +19 -43
- data/lib/crabfarm/crabtrap_context.rb +4 -11
- data/lib/crabfarm/driver_pool.rb +32 -0
- data/lib/crabfarm/dsl/surfer/surf_context.rb +5 -25
- data/lib/crabfarm/engines/async_state_manager.rb +1 -1
- data/lib/crabfarm/engines/sync_state_manager.rb +1 -1
- data/lib/crabfarm/forked_navigator.rb +31 -0
- data/lib/crabfarm/modes/console.rb +4 -4
- data/lib/crabfarm/modes/generator.rb +24 -11
- data/lib/crabfarm/rspec.rb +26 -24
- data/lib/crabfarm/strategies.rb +15 -9
- data/lib/crabfarm/templates/Crabfile.erb +21 -26
- data/lib/crabfarm/templates/Gemfile.erb +6 -0
- data/lib/crabfarm/templates/navigator.rb.erb +20 -0
- data/lib/crabfarm/templates/{state_spec.rb.erb → navigator_spec.rb.erb} +1 -1
- data/lib/crabfarm/templates/{parser.rb.erb → reducer.rb.erb} +4 -4
- data/lib/crabfarm/templates/{parser_spec.rb.erb → reducer_spec.rb.erb} +1 -1
- data/lib/crabfarm/templates/struct.rb.erb +12 -0
- data/lib/crabfarm/transition_service.rb +20 -7
- data/lib/crabfarm/version.rb +1 -1
- metadata +50 -48
- data/lib/crabfarm/adapters/browser/capybara.rb +0 -7
- data/lib/crabfarm/adapters/browser/surfer.rb +0 -9
- data/lib/crabfarm/adapters/output/hash.rb +0 -11
- data/lib/crabfarm/adapters/output/jbuilder.rb +0 -11
- data/lib/crabfarm/adapters/output/ostruct.rb +0 -14
- data/lib/crabfarm/base_parser.rb +0 -59
- data/lib/crabfarm/base_state.rb +0 -112
- data/lib/crabfarm/default_driver_factory.rb +0 -86
- data/lib/crabfarm/driver_bucket.rb +0 -42
- data/lib/crabfarm/driver_bucket_pool.rb +0 -26
- data/lib/crabfarm/forked_state.rb +0 -38
- data/lib/crabfarm/mocks/noop_driver.rb +0 -6
- data/lib/crabfarm/phantom_driver_factory.rb +0 -33
- data/lib/crabfarm/templates/state.rb.erb +0 -8
@@ -2,6 +2,12 @@ source 'https://rubygems.org'
|
|
2
2
|
|
3
3
|
gem "crabfarm", '<%= version %>'
|
4
4
|
|
5
|
+
# Comment this is not using the nokogiri default HTML parser
|
6
|
+
gem 'nokogiri', "~> 1.6.6"
|
7
|
+
|
8
|
+
# Comment this is not using a selenium webdriver based driver
|
9
|
+
gem "selenium-webdriver", "~> 2.45"
|
10
|
+
|
5
11
|
group :test do
|
6
12
|
gem "rspec-nc"
|
7
13
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class <%= navigator_class %> < Crabfarm::BaseNavigator
|
2
|
+
|
3
|
+
def run
|
4
|
+
<% if navigator_url.nil? %>
|
5
|
+
# replace the following by your navigation code:
|
6
|
+
raise NotImplementedError.new 'You must provide some navigation code for <%= navigator_class %>'
|
7
|
+
<% else %>
|
8
|
+
browser.goto '<%= navigator_url %>'
|
9
|
+
<% end %>
|
10
|
+
|
11
|
+
# Call the homonymous reducer over the entire document and output it by default
|
12
|
+
# You can call other reducers by using the `reduce` method, like this:
|
13
|
+
#
|
14
|
+
# reduce browser.search('td').first, using: MyOtherReducer
|
15
|
+
#
|
16
|
+
reduce_with_defaults
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
@@ -1,9 +1,9 @@
|
|
1
|
-
class <%=
|
1
|
+
class <%= reducer_class %> < Crabfarm::BaseReducer
|
2
2
|
|
3
|
-
def
|
4
|
-
# You can replace the following line after running the owner
|
3
|
+
def run
|
4
|
+
# You can replace the following line after running the owner navigator specs once.
|
5
5
|
# Take a look at the 'Testing' section of the README.md for more information!
|
6
|
-
|
6
|
+
take_snapshot_and_fail
|
7
7
|
end
|
8
8
|
|
9
9
|
end
|
@@ -1,20 +1,33 @@
|
|
1
1
|
module Crabfarm
|
2
2
|
class TransitionService
|
3
3
|
|
4
|
-
def self.
|
5
|
-
|
4
|
+
def self.transition(_context, _name, _params={})
|
5
|
+
self.new(_context).transition(_name, _params)
|
6
|
+
end
|
7
|
+
|
8
|
+
attr_reader :document, :navigator
|
9
|
+
|
10
|
+
def initialize(_context)
|
11
|
+
@context = _context
|
12
|
+
end
|
13
|
+
|
14
|
+
def transition(_name, _params={})
|
15
|
+
navigator_class = if _name.is_a? String or _name.is_a? Symbol
|
6
16
|
load_class_from_uri _name
|
7
17
|
else _name end
|
8
18
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
19
|
+
@context.prepare
|
20
|
+
@navigator = navigator_class.new @context, _params
|
21
|
+
|
22
|
+
@document = @navigator.run
|
23
|
+
@document = @document.as_json if @document.respond_to? :as_json
|
24
|
+
|
25
|
+
self
|
13
26
|
end
|
14
27
|
|
15
28
|
private
|
16
29
|
|
17
|
-
def
|
30
|
+
def load_class_from_uri(_uri)
|
18
31
|
class_name = Utils::Naming.decode_crabfarm_uri _uri
|
19
32
|
class_name.constantize
|
20
33
|
end
|
data/lib/crabfarm/version.rb
CHANGED
metadata
CHANGED
@@ -1,43 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crabfarm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ignacio Baixas
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: selenium-webdriver
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '2.45'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ~>
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '2.45'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: nokogiri
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ~>
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 1.6.6
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ~>
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: 1.6.6
|
41
13
|
- !ruby/object:Gem::Dependency
|
42
14
|
name: activesupport
|
43
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -156,6 +128,34 @@ dependencies:
|
|
156
128
|
- - ~>
|
157
129
|
- !ruby/object:Gem::Version
|
158
130
|
version: 0.5.5
|
131
|
+
- !ruby/object:Gem::Dependency
|
132
|
+
name: selenium-webdriver
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - ~>
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '2.45'
|
138
|
+
type: :development
|
139
|
+
prerelease: false
|
140
|
+
version_requirements: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - ~>
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '2.45'
|
145
|
+
- !ruby/object:Gem::Dependency
|
146
|
+
name: nokogiri
|
147
|
+
requirement: !ruby/object:Gem::Requirement
|
148
|
+
requirements:
|
149
|
+
- - ~>
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: 1.6.6
|
152
|
+
type: :development
|
153
|
+
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ~>
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: 1.6.6
|
159
159
|
- !ruby/object:Gem::Dependency
|
160
160
|
name: bundler
|
161
161
|
requirement: !ruby/object:Gem::Requirement
|
@@ -374,45 +374,46 @@ executables:
|
|
374
374
|
extensions: []
|
375
375
|
extra_rdoc_files: []
|
376
376
|
files:
|
377
|
-
- lib/crabfarm/adapters/browser/
|
378
|
-
- lib/crabfarm/adapters/browser/
|
379
|
-
- lib/crabfarm/adapters/browser/
|
380
|
-
- lib/crabfarm/adapters/
|
381
|
-
- lib/crabfarm/adapters/
|
382
|
-
- lib/crabfarm/adapters/
|
377
|
+
- lib/crabfarm/adapters/browser/abstract_webdriver.rb
|
378
|
+
- lib/crabfarm/adapters/browser/chrome.rb
|
379
|
+
- lib/crabfarm/adapters/browser/firefox.rb
|
380
|
+
- lib/crabfarm/adapters/browser/noop.rb
|
381
|
+
- lib/crabfarm/adapters/browser/phantom_js.rb
|
382
|
+
- lib/crabfarm/adapters/browser/remote_webdriver.rb
|
383
|
+
- lib/crabfarm/adapters/driver_wrapper/capybara.rb
|
384
|
+
- lib/crabfarm/adapters/driver_wrapper/surfer.rb
|
385
|
+
- lib/crabfarm/adapters/driver_wrapper/watir.rb
|
383
386
|
- lib/crabfarm/adapters/parser/nokogiri.rb
|
384
387
|
- lib/crabfarm/adapters/parser/pdf_reader.rb
|
385
388
|
- lib/crabfarm/assertion/context.rb
|
389
|
+
- lib/crabfarm/assertion/fields.rb
|
386
390
|
- lib/crabfarm/assertion/parsers.rb
|
387
391
|
- lib/crabfarm/assertion/validations.rb
|
388
392
|
- lib/crabfarm/assertion/wrapper.rb
|
389
|
-
- lib/crabfarm/
|
390
|
-
- lib/crabfarm/
|
393
|
+
- lib/crabfarm/base_navigator.rb
|
394
|
+
- lib/crabfarm/base_reducer.rb
|
395
|
+
- lib/crabfarm/base_struct.rb
|
391
396
|
- lib/crabfarm/cli.rb
|
392
397
|
- lib/crabfarm/configuration.rb
|
393
398
|
- lib/crabfarm/context.rb
|
394
399
|
- lib/crabfarm/context_factory.rb
|
395
400
|
- lib/crabfarm/crabtrap_context.rb
|
396
401
|
- lib/crabfarm/crabtrap_runner.rb
|
397
|
-
- lib/crabfarm/
|
398
|
-
- lib/crabfarm/driver_bucket.rb
|
399
|
-
- lib/crabfarm/driver_bucket_pool.rb
|
402
|
+
- lib/crabfarm/driver_pool.rb
|
400
403
|
- lib/crabfarm/dsl/surfer/search_context.rb
|
401
404
|
- lib/crabfarm/dsl/surfer/surf_context.rb
|
402
405
|
- lib/crabfarm/dsl/surfer.rb
|
403
406
|
- lib/crabfarm/engines/async_state_manager.rb
|
404
407
|
- lib/crabfarm/engines/sync_state_manager.rb
|
405
408
|
- lib/crabfarm/errors.rb
|
406
|
-
- lib/crabfarm/
|
409
|
+
- lib/crabfarm/forked_navigator.rb
|
407
410
|
- lib/crabfarm/global_state.rb
|
408
411
|
- lib/crabfarm/http_client.rb
|
409
|
-
- lib/crabfarm/mocks/noop_driver.rb
|
410
412
|
- lib/crabfarm/modes/console.rb
|
411
413
|
- lib/crabfarm/modes/generator.rb
|
412
414
|
- lib/crabfarm/modes/publisher.rb
|
413
415
|
- lib/crabfarm/modes/recorder.rb
|
414
416
|
- lib/crabfarm/modes/server.rb
|
415
|
-
- lib/crabfarm/phantom_driver_factory.rb
|
416
417
|
- lib/crabfarm/phantom_runner.rb
|
417
418
|
- lib/crabfarm/rspec.rb
|
418
419
|
- lib/crabfarm/state_store.rb
|
@@ -427,11 +428,12 @@ files:
|
|
427
428
|
- lib/crabfarm/templates/dot_gitkeep.erb
|
428
429
|
- lib/crabfarm/templates/dot_rspec.erb
|
429
430
|
- lib/crabfarm/templates/Gemfile.erb
|
430
|
-
- lib/crabfarm/templates/
|
431
|
-
- lib/crabfarm/templates/
|
431
|
+
- lib/crabfarm/templates/navigator.rb.erb
|
432
|
+
- lib/crabfarm/templates/navigator_spec.rb.erb
|
433
|
+
- lib/crabfarm/templates/reducer.rb.erb
|
434
|
+
- lib/crabfarm/templates/reducer_spec.rb.erb
|
432
435
|
- lib/crabfarm/templates/spec_helper.rb.erb
|
433
|
-
- lib/crabfarm/templates/
|
434
|
-
- lib/crabfarm/templates/state_spec.rb.erb
|
436
|
+
- lib/crabfarm/templates/struct.rb.erb
|
435
437
|
- lib/crabfarm/transition_service.rb
|
436
438
|
- lib/crabfarm/utils/naming.rb
|
437
439
|
- lib/crabfarm/utils/port_discovery.rb
|
@@ -1,14 +0,0 @@
|
|
1
|
-
require 'ostruct'
|
2
|
-
|
3
|
-
module Crabfarm
|
4
|
-
class OStructOutputBuilder
|
5
|
-
def self.prepare
|
6
|
-
# TODO: maybe wrap open struct in a class that automatically generate other openstruct when nested properties are accessed
|
7
|
-
OpenStruct.new
|
8
|
-
end
|
9
|
-
|
10
|
-
def self.serialize(_output)
|
11
|
-
_output.to_h
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
data/lib/crabfarm/base_parser.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
require "crabfarm/assertion/context"
|
2
|
-
|
3
|
-
module Crabfarm
|
4
|
-
class BaseParser < Delegator
|
5
|
-
include Assertion::Context
|
6
|
-
|
7
|
-
attr_reader :params, :document
|
8
|
-
|
9
|
-
def self.parser_engine(_engine=nil)
|
10
|
-
@engine_name = _engine
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.engine
|
14
|
-
@engine ||= Strategies.load(:parser_engine, @engine_name || Crabfarm.config.parser_engine)
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.snapshot_path(_name=nil)
|
18
|
-
_name = self.to_s.underscore if _name.nil?
|
19
|
-
File.join(GlobalState.snapshots_path, _name + '.' + engine.format)
|
20
|
-
end
|
21
|
-
|
22
|
-
def engine
|
23
|
-
self.class.engine
|
24
|
-
end
|
25
|
-
|
26
|
-
def initialize(_target, _params)
|
27
|
-
@parsed_data = engine.preprocess_parsing_target _target
|
28
|
-
@document = engine.parse @parsed_data
|
29
|
-
@params = _params
|
30
|
-
|
31
|
-
super @document
|
32
|
-
end
|
33
|
-
|
34
|
-
def parse
|
35
|
-
raise NotImplementedError.new
|
36
|
-
end
|
37
|
-
|
38
|
-
def take_snapshot(_name=nil)
|
39
|
-
file_path = self.class.snapshot_path _name
|
40
|
-
|
41
|
-
raise ArgumentError.new "Snapshot already exists '#{file_path}', make sure to implement the #{self.class.to_s} parse method." if File.exist? file_path
|
42
|
-
|
43
|
-
dir_path = file_path.split(File::SEPARATOR)[0...-1]
|
44
|
-
FileUtils.mkpath dir_path.join(File::SEPARATOR) if dir_path.length > 0
|
45
|
-
|
46
|
-
File.write file_path, @parsed_data
|
47
|
-
nil
|
48
|
-
end
|
49
|
-
|
50
|
-
def __getobj__
|
51
|
-
@document
|
52
|
-
end
|
53
|
-
|
54
|
-
def __setobj__(obj)
|
55
|
-
@document = obj
|
56
|
-
end
|
57
|
-
|
58
|
-
end
|
59
|
-
end
|
data/lib/crabfarm/base_state.rb
DELETED
@@ -1,112 +0,0 @@
|
|
1
|
-
require 'thwait'
|
2
|
-
require 'crabfarm/forked_state'
|
3
|
-
require "crabfarm/assertion/context"
|
4
|
-
|
5
|
-
module Crabfarm
|
6
|
-
class BaseState
|
7
|
-
include Assertion::Context
|
8
|
-
extend Forwardable
|
9
|
-
|
10
|
-
PARSE_METHOD_RX = /^parse_(.*)$/
|
11
|
-
|
12
|
-
attr_reader :params, :output
|
13
|
-
|
14
|
-
def_delegators '@context', :http
|
15
|
-
def_delegators '@context.pool', :driver
|
16
|
-
def_delegators '@context.store', :get, :fetch
|
17
|
-
|
18
|
-
def self.browser_dsl(_dsl)
|
19
|
-
@class_browser_dsl = _dsl
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.output_builder(_builder)
|
23
|
-
@class_output_builder = _builder
|
24
|
-
end
|
25
|
-
|
26
|
-
def initialize(_context, _params)
|
27
|
-
@context = _context
|
28
|
-
@params = _params
|
29
|
-
|
30
|
-
@dsl = Strategies.load(:browser_dsl, class_browser_dsl || Crabfarm.config.browser_dsl)
|
31
|
-
@builder = Strategies.load(:output_builder, class_output_builder || Crabfarm.config.output_builder)
|
32
|
-
@output = @builder.prepare
|
33
|
-
end
|
34
|
-
|
35
|
-
def browser(_name=nil)
|
36
|
-
@dsl.wrap driver(_name)
|
37
|
-
end
|
38
|
-
|
39
|
-
def download(_url)
|
40
|
-
@context.http.get(_url).body
|
41
|
-
end
|
42
|
-
|
43
|
-
def output
|
44
|
-
@output
|
45
|
-
end
|
46
|
-
|
47
|
-
def output_as_json
|
48
|
-
@builder.serialize @output
|
49
|
-
end
|
50
|
-
|
51
|
-
def crawl
|
52
|
-
raise NotImplementedError.new
|
53
|
-
end
|
54
|
-
|
55
|
-
def parse(_target=nil, _options={})
|
56
|
-
parser_class = _options.delete :using
|
57
|
-
|
58
|
-
if parser_class.nil?
|
59
|
-
parser_class = (self.class.name + 'Parser').constantize
|
60
|
-
end
|
61
|
-
|
62
|
-
parser = parser_class.new _target, @params.merge(_options)
|
63
|
-
parser.parse
|
64
|
-
return parser
|
65
|
-
end
|
66
|
-
|
67
|
-
def fork_each(_enumerator, &_block)
|
68
|
-
session_id = 0
|
69
|
-
mutex = Mutex.new
|
70
|
-
ths = _enumerator.map do |value|
|
71
|
-
session_id += 1
|
72
|
-
start_forked_state("th_session_#{session_id}", value, _block, mutex)
|
73
|
-
end
|
74
|
-
ThreadsWait.all_waits(*ths)
|
75
|
-
end
|
76
|
-
|
77
|
-
def method_missing(_method, *_args, &_block)
|
78
|
-
m = PARSE_METHOD_RX.match(_method)
|
79
|
-
if m
|
80
|
-
options = _args[1] || {}
|
81
|
-
options[:using] = (m[1].camelize + 'Parser').constantize
|
82
|
-
parse _args[0], options
|
83
|
-
else super end
|
84
|
-
end
|
85
|
-
|
86
|
-
def respond_to?(_method, _include_all=false)
|
87
|
-
return true if PARSE_METHOD_RX === _method
|
88
|
-
super
|
89
|
-
end
|
90
|
-
|
91
|
-
private
|
92
|
-
|
93
|
-
def class_browser_dsl
|
94
|
-
self.class.instance_variable_get :@class_browser_dsl
|
95
|
-
end
|
96
|
-
|
97
|
-
def class_output_builder
|
98
|
-
self.class.instance_variable_get :@class_output_builder
|
99
|
-
end
|
100
|
-
|
101
|
-
def start_forked_state(_name, _value, _block, _mutex)
|
102
|
-
Thread.new {
|
103
|
-
sub_state = ForkedState.new self, _name, _mutex
|
104
|
-
begin
|
105
|
-
sub_state.instance_exec _value, &_block
|
106
|
-
ensure
|
107
|
-
sub_state.driver.reset
|
108
|
-
end
|
109
|
-
}
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|