crabfarm 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 89af60fe404e36fbc112ab5325ad3bbc618e25f1
4
- data.tar.gz: 2930de41a6c3c9725533d1a9f84fc6d0b89b0ab9
3
+ metadata.gz: 425cdb9cbc7e43b16e7ab9a7d8ef5f187f95d07d
4
+ data.tar.gz: a8f7360685f9febdb8e9076586ab1e856f6c0323
5
5
  SHA512:
6
- metadata.gz: 31c2d828b6c7351f2efa1a7b79ba0762a21770b12f66560cc310a7ce1e0c91a574adfc1a3c8193937fed2d63a7f7d35d891c4b4c419e9b87a8e73d192c61d167
7
- data.tar.gz: de65bef60361d0c7fa3365d8c1e05a82ed702bfbce0224c54f2ea5d94fd4a32d572e775d9321b388b0b46460fbba5c691a7c472f19ca213f45d64f1b4cf4f8ff
6
+ metadata.gz: 203b29582a08881d693923620e7cb601433be93edf3551f8daebc414fff2aca12e3bfcc34b3e81b0d045921cfbb5ad74a47e7d3858925c31bf300e995107b33d
7
+ data.tar.gz: 8b2215150d4976599385b2da16b1b5cb89f169ef6b0ba41b3e719cd56fc8d68b2c061d015cece7523ad72ec95378c9c49ad0faa292259a5f7645c17cc4af273d
@@ -0,0 +1,17 @@
1
+ class Pincers::Core::SearchContext
2
+ def webdriver_elements
3
+ elements
4
+ end
5
+ end
6
+
7
+ module Crabfarm
8
+ module Adapters
9
+ module DriverWrapper
10
+ class Pincers
11
+ def self.wrap(_driver)
12
+ ::Pincers.for_webdriver _driver
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,23 @@
1
+ module Crabfarm
2
+ module Adapters
3
+ module Parser
4
+ class Pincers
5
+ def self.format
6
+ 'html'
7
+ end
8
+
9
+ def self.parse(_raw)
10
+ ::Pincers.for_nokogiri ::Nokogiri::HTML _raw
11
+ end
12
+
13
+ def self.preprocess_parsing_target(_target)
14
+ if _target.respond_to? :to_html
15
+ _target.to_html
16
+ else
17
+ _target
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -21,6 +21,12 @@ module Crabfarm
21
21
  @params = _params
22
22
  end
23
23
 
24
+ def navigate(_name, _params={})
25
+ TransitionService.transition(@context, _name, params.merge(_params)).navigator
26
+ end
27
+
28
+ alias :nav :navigate
29
+
24
30
  def browser(_name=nil)
25
31
  @context.pool.driver(_name)
26
32
  end
@@ -15,10 +15,10 @@ module Crabfarm
15
15
  [:webdriver_host, :string, 'Remote host, only available in driver: remote'],
16
16
  [:webdriver_port, :integer, 'Remote port, only available in driver: remote'],
17
17
  [:webdriver_capabilities, :mixed, 'Driver capabilities, depends on selected driver.'],
18
- [:webdriver_remote_timeout, :float, 'Request timeout in seconds, only available for remote or phatomjs driver.'],
18
+ [:webdriver_remote_timeout, :float, 'Request timeout in seconds, only available for remote or phantomjs driver.'],
19
19
  [:webdriver_window_width, :integer, 'Initial browser window width.'],
20
20
  [:webdriver_window_height, :integer, 'Initial browser window height.'],
21
- [:webdriver_dsl, :string, 'Webdriver wrapper to use, built in options are watir and surfer'],
21
+ [:webdriver_dsl, :string, 'Webdriver wrapper to use, built in options are pincers and watir'],
22
22
 
23
23
  # Phantom launcher configuration
24
24
  [:phantom_load_images, :boolean, 'Phantomjs image loading, only for phantomjs driver.'],
@@ -53,7 +53,7 @@ module Crabfarm
53
53
  def reset
54
54
  @values = {
55
55
  browser: 'phantomjs',
56
- parser: :nokogiri,
56
+ parser: :pincers,
57
57
  driver_factory: nil,
58
58
  log_path: nil,
59
59
  proxy: nil,
@@ -63,7 +63,7 @@ module Crabfarm
63
63
  webdriver_remote_timeout: 120,
64
64
  webdriver_window_width: 1280,
65
65
  webdriver_window_height: 800,
66
- webdriver_dsl: :watir,
66
+ webdriver_dsl: :pincers,
67
67
  phantom_load_images: false,
68
68
  phantom_ssl: 'any',
69
69
  phantom_bin_path: 'phantomjs',
@@ -88,7 +88,7 @@ module Crabfarm
88
88
  end
89
89
 
90
90
  def validate_remote(_url)
91
- return true if /^\w+\/\w+$/i === _url
91
+ return true if /^[\w\-]+\/[\w\-]+$/i === _url
92
92
  puts "Invalid remote syntax: #{_url}".color :red
93
93
  return false
94
94
  end
@@ -3,8 +3,8 @@
3
3
  set_browser :phantomjs
4
4
 
5
5
  # The default parser engine for reducers that do not specify one.
6
- # Available options are :nokogiri and :pdf_parser. :pdf_parser requires an additional gem to be added to Gemfile
7
- set_parser :nokogiri
6
+ # Available options are :pincers, :nokogiri and :pdf_parser. :pdf_parser requires an additional gem to be added to Gemfile
7
+ set_parser :pincers
8
8
 
9
9
  # The path where every crawler log is stored.
10
10
  set_log_path 'logs'
@@ -17,9 +17,9 @@ set_log_path 'logs'
17
17
 
18
18
  # The following parameters only apply if using a webdriver based driver
19
19
 
20
- # Selects the webdriver wrapper library to be used, options are :surfer, :watir and :capybara.
20
+ # Selects the webdriver wrapper library to be used, options are :pincers, :watir and :capybara.
21
21
  # Both watir and capybara require an additional gem to be added to Gemfile
22
- set_webdriver_dsl :watir
22
+ set_webdriver_dsl :pincers
23
23
 
24
24
  # Set the selected webdriver capabilities (check the driver documentation for more details)
25
25
  # set_webdriver_capabilities
@@ -1,16 +1,14 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
3
  gem "crabfarm", '<%= version %>'
4
+ gem "pincers", '~> 0.2.0'
4
5
 
5
- # Comment this is not using the nokogiri default HTML parser
6
+ # Comment this if not using a nokogiri based parser or browser
6
7
  gem 'nokogiri', "~> 1.6.6"
7
8
 
8
- # Comment this is not using a selenium webdriver based driver
9
+ # Comment this if not using a selenium webdriver based driver
9
10
  gem "selenium-webdriver", "~> 2.45"
10
11
 
11
- # Comment this if using other webdriver dsl
12
- gem "watir-webdriver"
13
-
14
12
  group :test do
15
13
  gem "rspec", "~> 3.2.0"
16
14
  gem "rspec-nc"
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.5.3"
2
+ VERSION = "0.6.0"
3
3
  end
data/lib/crabfarm.rb CHANGED
@@ -101,11 +101,12 @@ module Crabfarm
101
101
  register :browser, :noop, 'Crabfarm::Adapters::Browser::Noop'
102
102
 
103
103
  # bundled webdriver dsl adapters
104
- register :webdriver_dsl, :surfer, 'Crabfarm::Adapters::DriverWrapper::Surfer'
104
+ register :webdriver_dsl, :pincers, 'Crabfarm::Adapters::DriverWrapper::Pincers', dependencies: ['pincers']
105
105
  register :webdriver_dsl, :watir, 'Crabfarm::Adapters::DriverWrapper::Watir', dependencies: ['watir-webdriver']
106
106
  register :webdriver_dsl, :capybara, 'Crabfarm::Adapters::DriverWrapper::Capybara', dependencies: ['capybara']
107
107
 
108
108
  # bundled parsers dsl adapters
109
+ register :parser, :pincers, 'Crabfarm::Adapters::Parser::Pincers', dependencies: ['pincers', 'nokogiri']
109
110
  register :parser, :nokogiri, 'Crabfarm::Adapters::Parser::Nokogiri', dependencies: ['nokogiri']
110
111
  register :parser, :pdf_reader, 'Crabfarm::Adapters::Parser::PdfReader', dependencies: ['pdf-reader']
111
112
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-08 00:00:00.000000000 Z
11
+ date: 2015-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -184,6 +184,20 @@ dependencies:
184
184
  - - ~>
185
185
  - !ruby/object:Gem::Version
186
186
  version: 1.6.6
187
+ - !ruby/object:Gem::Dependency
188
+ name: pincers
189
+ requirement: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - ~>
192
+ - !ruby/object:Gem::Version
193
+ version: 0.2.0
194
+ type: :development
195
+ prerelease: false
196
+ version_requirements: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - ~>
199
+ - !ruby/object:Gem::Version
200
+ version: 0.2.0
187
201
  - !ruby/object:Gem::Dependency
188
202
  name: bundler
189
203
  requirement: !ruby/object:Gem::Requirement
@@ -409,10 +423,11 @@ files:
409
423
  - lib/crabfarm/adapters/browser/phantom_js.rb
410
424
  - lib/crabfarm/adapters/browser/remote_webdriver.rb
411
425
  - lib/crabfarm/adapters/driver_wrapper/capybara.rb
412
- - lib/crabfarm/adapters/driver_wrapper/surfer.rb
426
+ - lib/crabfarm/adapters/driver_wrapper/pincers.rb
413
427
  - lib/crabfarm/adapters/driver_wrapper/watir.rb
414
428
  - lib/crabfarm/adapters/parser/nokogiri.rb
415
429
  - lib/crabfarm/adapters/parser/pdf_reader.rb
430
+ - lib/crabfarm/adapters/parser/pincers.rb
416
431
  - lib/crabfarm/assertion/context.rb
417
432
  - lib/crabfarm/assertion/fields.rb
418
433
  - lib/crabfarm/assertion/parsers.rb
@@ -428,9 +443,6 @@ files:
428
443
  - lib/crabfarm/crabtrap_context.rb
429
444
  - lib/crabfarm/crabtrap_runner.rb
430
445
  - lib/crabfarm/driver_pool.rb
431
- - lib/crabfarm/dsl/surfer/search_context.rb
432
- - lib/crabfarm/dsl/surfer/surf_context.rb
433
- - lib/crabfarm/dsl/surfer.rb
434
446
  - lib/crabfarm/engines/async_state_manager.rb
435
447
  - lib/crabfarm/engines/sync_state_manager.rb
436
448
  - lib/crabfarm/errors.rb
@@ -528,3 +540,4 @@ signing_key:
528
540
  specification_version: 4
529
541
  summary: Crabfarm crawler creation framework
530
542
  test_files: []
543
+ has_rdoc:
@@ -1,13 +0,0 @@
1
- require 'crabfarm/dsl/surfer'
2
-
3
- module Crabfarm
4
- module Adapters
5
- module DriverWrapper
6
- class Surfer
7
- def self.wrap(_driver)
8
- Crabfarm::Dsl::Surfer::SurfContext.new _driver
9
- end
10
- end
11
- end
12
- end
13
- end
@@ -1,152 +0,0 @@
1
- module Crabfarm
2
- module Dsl
3
- module Surfer
4
- class SearchContext
5
- include Enumerable
6
- extend Forwardable
7
-
8
- TIMEOUT = 10.0 # Default timeout for waiting operations
9
-
10
- attr_accessor :elements, :parent
11
-
12
- def_delegators :elements, :length, :count, :empty?
13
-
14
- def initialize(_elements, _parent)
15
- @elements = _elements
16
- @parent = _parent
17
- end
18
-
19
- def webdriver_elements
20
- @elements
21
- end
22
-
23
- def root
24
- @parent.root
25
- end
26
-
27
- def each
28
- elements.each { |el| yield child_context [el] }
29
- end
30
-
31
- def [](*args)
32
- if args[0].is_a? String or args[0].is_a? Symbol
33
- attribute args[0]
34
- else
35
- child_context Array(elements.send(:[],*args))
36
- end
37
- end
38
-
39
- def first
40
- if elements.first.nil? then nil else child_context [elements.first] end
41
- end
42
-
43
- def last
44
- if elements.last.nil? then nil else child_context [elements.last] end
45
- end
46
-
47
- def element!
48
- raise EmptySetError.new("This set is empty", self) if empty?
49
- elements.first
50
- end
51
-
52
- def classes
53
- wrap_errors { (element!['class'] || '').split(' ') }
54
- end
55
-
56
- def search(_selector=nil, _options={})
57
- _options[:css] = _selector if _selector
58
-
59
- wait_mode = _options.delete :wait
60
- if wait_mode
61
-
62
- # retrieve timeout
63
- timeout = _options.delete :timeout
64
- timeout = TIMEOUT if timeout.nil?
65
-
66
- # use a selenium timeout
67
- wrap_errors do
68
- wait = Selenium::WebDriver::Wait.new(timeout: timeout)
69
- wait.until do
70
- new_elements = search_elements _options
71
-
72
- # test wait condition
73
- ok = case wait_mode
74
- when :present then (new_elements.length > 0)
75
- when :visible then (new_elements.length > 0 and new_elements.first.displayed?)
76
- when :enabled then (new_elements.length > 0 and new_elements.first.displayed? and new_elements.first.enabled?)
77
- when :not_present then (new_elements.length == 0)
78
- when :not_visible then (not new_elements.any? { |e| e.displayed? })
79
- else
80
- raise SetupError.new "Invalid wait mode '#{wait_mode}'"
81
- end
82
-
83
- child_context new_elements if ok
84
- end
85
- end
86
- else
87
- child_context search_elements(_options)
88
- end
89
- end
90
-
91
- def fill(_value)
92
- wrap_errors do
93
- element!.clear
94
- element!.send_keys _value
95
- end
96
- end
97
-
98
- def to_html
99
- elements.map { |e| e['outerHTML'] }.join
100
- end
101
-
102
- # Any methods missing are forwarded to the main element (first).
103
- def method_missing(_method, *_args, &_block)
104
- wrap_errors do
105
- m = /^(.*)_all$/.match _method.to_s
106
- if m then
107
- return [] if empty?
108
- elements.map { |e| e.send(m[1], *_args, &_block) }
109
- else
110
- element!.send(_method, *_args, &_block)
111
- end
112
- end
113
- end
114
-
115
- def respond_to?(_method, _include_all=false)
116
- return true if super
117
- m = /^.*_all$/.match _method.to_s
118
- if m then
119
- return true if empty?
120
- elements.first.respond_to? m[1], _include_all
121
- else
122
- return true if empty?
123
- elements.first.respond_to? _method, _include_all
124
- end
125
- end
126
-
127
- private
128
-
129
- def child_context(_elements)
130
- SearchContext.new _elements, self
131
- end
132
-
133
- def wrap_errors
134
- begin
135
- yield
136
- rescue Selenium::WebDriver::Error::WebDriverError => e
137
- raise WebdriverError.new e, self
138
- end
139
- end
140
-
141
- def search_elements(_options)
142
- wrap_errors do
143
- elements.inject([]) do |r, element|
144
- r + element.find_elements(_options)
145
- end
146
- end
147
- end
148
-
149
- end
150
- end
151
- end
152
- end
@@ -1,42 +0,0 @@
1
- module Crabfarm
2
- module Dsl
3
- module Surfer
4
- class SurfContext < SearchContext
5
-
6
- attr_reader :driver
7
-
8
- def_delegators 'driver.navigate', :back, :forward, :refresh
9
-
10
- def initialize(_driver)
11
- super nil, self
12
- @driver = _driver
13
- end
14
-
15
- def root
16
- self
17
- end
18
-
19
- def elements
20
- [driver]
21
- end
22
-
23
- def to_html
24
- driver.page_source
25
- end
26
-
27
- def current_uri
28
- URI.parse driver.current_url
29
- end
30
-
31
- def cookies
32
- driver.manage.all_cookies
33
- end
34
-
35
- def goto(_url, _params=nil)
36
- _url += "?#{_params.to_query}" if _params
37
- driver.get(_url)
38
- end
39
- end
40
- end
41
- end
42
- end
@@ -1,22 +0,0 @@
1
- require 'crabfarm/dsl/surfer/search_context'
2
- require 'crabfarm/dsl/surfer/surf_context'
3
-
4
- module Crabfarm
5
- module Dsl
6
- module Surfer
7
-
8
- class Error < StandardError
9
- attr_reader :source
10
-
11
- def initialize(_message, _ctx)
12
- super _message
13
- @ctx = _ctx
14
- @source = _ctx.root.page_source rescue nil # cache page source for future reference
15
- end
16
- end
17
-
18
- class EmptySetError < Error; end
19
- class WebdriverError < Error; end
20
- end
21
- end
22
- end