crabfarm 0.5.3 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 89af60fe404e36fbc112ab5325ad3bbc618e25f1
4
- data.tar.gz: 2930de41a6c3c9725533d1a9f84fc6d0b89b0ab9
3
+ metadata.gz: 425cdb9cbc7e43b16e7ab9a7d8ef5f187f95d07d
4
+ data.tar.gz: a8f7360685f9febdb8e9076586ab1e856f6c0323
5
5
  SHA512:
6
- metadata.gz: 31c2d828b6c7351f2efa1a7b79ba0762a21770b12f66560cc310a7ce1e0c91a574adfc1a3c8193937fed2d63a7f7d35d891c4b4c419e9b87a8e73d192c61d167
7
- data.tar.gz: de65bef60361d0c7fa3365d8c1e05a82ed702bfbce0224c54f2ea5d94fd4a32d572e775d9321b388b0b46460fbba5c691a7c472f19ca213f45d64f1b4cf4f8ff
6
+ metadata.gz: 203b29582a08881d693923620e7cb601433be93edf3551f8daebc414fff2aca12e3bfcc34b3e81b0d045921cfbb5ad74a47e7d3858925c31bf300e995107b33d
7
+ data.tar.gz: 8b2215150d4976599385b2da16b1b5cb89f169ef6b0ba41b3e719cd56fc8d68b2c061d015cece7523ad72ec95378c9c49ad0faa292259a5f7645c17cc4af273d
@@ -0,0 +1,17 @@
1
+ class Pincers::Core::SearchContext
2
+ def webdriver_elements
3
+ elements
4
+ end
5
+ end
6
+
7
+ module Crabfarm
8
+ module Adapters
9
+ module DriverWrapper
10
+ class Pincers
11
+ def self.wrap(_driver)
12
+ ::Pincers.for_webdriver _driver
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,23 @@
1
+ module Crabfarm
2
+ module Adapters
3
+ module Parser
4
+ class Pincers
5
+ def self.format
6
+ 'html'
7
+ end
8
+
9
+ def self.parse(_raw)
10
+ ::Pincers.for_nokogiri ::Nokogiri::HTML _raw
11
+ end
12
+
13
+ def self.preprocess_parsing_target(_target)
14
+ if _target.respond_to? :to_html
15
+ _target.to_html
16
+ else
17
+ _target
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -21,6 +21,12 @@ module Crabfarm
21
21
  @params = _params
22
22
  end
23
23
 
24
+ def navigate(_name, _params={})
25
+ TransitionService.transition(@context, _name, params.merge(_params)).navigator
26
+ end
27
+
28
+ alias :nav :navigate
29
+
24
30
  def browser(_name=nil)
25
31
  @context.pool.driver(_name)
26
32
  end
@@ -15,10 +15,10 @@ module Crabfarm
15
15
  [:webdriver_host, :string, 'Remote host, only available in driver: remote'],
16
16
  [:webdriver_port, :integer, 'Remote port, only available in driver: remote'],
17
17
  [:webdriver_capabilities, :mixed, 'Driver capabilities, depends on selected driver.'],
18
- [:webdriver_remote_timeout, :float, 'Request timeout in seconds, only available for remote or phatomjs driver.'],
18
+ [:webdriver_remote_timeout, :float, 'Request timeout in seconds, only available for remote or phantomjs driver.'],
19
19
  [:webdriver_window_width, :integer, 'Initial browser window width.'],
20
20
  [:webdriver_window_height, :integer, 'Initial browser window height.'],
21
- [:webdriver_dsl, :string, 'Webdriver wrapper to use, built in options are watir and surfer'],
21
+ [:webdriver_dsl, :string, 'Webdriver wrapper to use, built in options are pincers and watir'],
22
22
 
23
23
  # Phantom launcher configuration
24
24
  [:phantom_load_images, :boolean, 'Phantomjs image loading, only for phantomjs driver.'],
@@ -53,7 +53,7 @@ module Crabfarm
53
53
  def reset
54
54
  @values = {
55
55
  browser: 'phantomjs',
56
- parser: :nokogiri,
56
+ parser: :pincers,
57
57
  driver_factory: nil,
58
58
  log_path: nil,
59
59
  proxy: nil,
@@ -63,7 +63,7 @@ module Crabfarm
63
63
  webdriver_remote_timeout: 120,
64
64
  webdriver_window_width: 1280,
65
65
  webdriver_window_height: 800,
66
- webdriver_dsl: :watir,
66
+ webdriver_dsl: :pincers,
67
67
  phantom_load_images: false,
68
68
  phantom_ssl: 'any',
69
69
  phantom_bin_path: 'phantomjs',
@@ -88,7 +88,7 @@ module Crabfarm
88
88
  end
89
89
 
90
90
  def validate_remote(_url)
91
- return true if /^\w+\/\w+$/i === _url
91
+ return true if /^[\w\-]+\/[\w\-]+$/i === _url
92
92
  puts "Invalid remote syntax: #{_url}".color :red
93
93
  return false
94
94
  end
@@ -3,8 +3,8 @@
3
3
  set_browser :phantomjs
4
4
 
5
5
  # The default parser engine for reducers that do not specify one.
6
- # Available options are :nokogiri and :pdf_parser. :pdf_parser requires an additional gem to be added to Gemfile
7
- set_parser :nokogiri
6
+ # Available options are :pincers, :nokogiri and :pdf_parser. :pdf_parser requires an additional gem to be added to Gemfile
7
+ set_parser :pincers
8
8
 
9
9
  # The path where every crawler log is stored.
10
10
  set_log_path 'logs'
@@ -17,9 +17,9 @@ set_log_path 'logs'
17
17
 
18
18
  # The following parameters only apply if using a webdriver based driver
19
19
 
20
- # Selects the webdriver wrapper library to be used, options are :surfer, :watir and :capybara.
20
+ # Selects the webdriver wrapper library to be used, options are :pincers, :watir and :capybara.
21
21
  # Both watir and capybara require an additional gem to be added to Gemfile
22
- set_webdriver_dsl :watir
22
+ set_webdriver_dsl :pincers
23
23
 
24
24
  # Set the selected webdriver capabilities (check the driver documentation for more details)
25
25
  # set_webdriver_capabilities
@@ -1,16 +1,14 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
3
  gem "crabfarm", '<%= version %>'
4
+ gem "pincers", '~> 0.2.0'
4
5
 
5
- # Comment this is not using the nokogiri default HTML parser
6
+ # Comment this if not using a nokogiri based parser or browser
6
7
  gem 'nokogiri', "~> 1.6.6"
7
8
 
8
- # Comment this is not using a selenium webdriver based driver
9
+ # Comment this if not using a selenium webdriver based driver
9
10
  gem "selenium-webdriver", "~> 2.45"
10
11
 
11
- # Comment this if using other webdriver dsl
12
- gem "watir-webdriver"
13
-
14
12
  group :test do
15
13
  gem "rspec", "~> 3.2.0"
16
14
  gem "rspec-nc"
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.5.3"
2
+ VERSION = "0.6.0"
3
3
  end
data/lib/crabfarm.rb CHANGED
@@ -101,11 +101,12 @@ module Crabfarm
101
101
  register :browser, :noop, 'Crabfarm::Adapters::Browser::Noop'
102
102
 
103
103
  # bundled webdriver dsl adapters
104
- register :webdriver_dsl, :surfer, 'Crabfarm::Adapters::DriverWrapper::Surfer'
104
+ register :webdriver_dsl, :pincers, 'Crabfarm::Adapters::DriverWrapper::Pincers', dependencies: ['pincers']
105
105
  register :webdriver_dsl, :watir, 'Crabfarm::Adapters::DriverWrapper::Watir', dependencies: ['watir-webdriver']
106
106
  register :webdriver_dsl, :capybara, 'Crabfarm::Adapters::DriverWrapper::Capybara', dependencies: ['capybara']
107
107
 
108
108
  # bundled parsers dsl adapters
109
+ register :parser, :pincers, 'Crabfarm::Adapters::Parser::Pincers', dependencies: ['pincers', 'nokogiri']
109
110
  register :parser, :nokogiri, 'Crabfarm::Adapters::Parser::Nokogiri', dependencies: ['nokogiri']
110
111
  register :parser, :pdf_reader, 'Crabfarm::Adapters::Parser::PdfReader', dependencies: ['pdf-reader']
111
112
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-06-08 00:00:00.000000000 Z
11
+ date: 2015-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -184,6 +184,20 @@ dependencies:
184
184
  - - ~>
185
185
  - !ruby/object:Gem::Version
186
186
  version: 1.6.6
187
+ - !ruby/object:Gem::Dependency
188
+ name: pincers
189
+ requirement: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - ~>
192
+ - !ruby/object:Gem::Version
193
+ version: 0.2.0
194
+ type: :development
195
+ prerelease: false
196
+ version_requirements: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - ~>
199
+ - !ruby/object:Gem::Version
200
+ version: 0.2.0
187
201
  - !ruby/object:Gem::Dependency
188
202
  name: bundler
189
203
  requirement: !ruby/object:Gem::Requirement
@@ -409,10 +423,11 @@ files:
409
423
  - lib/crabfarm/adapters/browser/phantom_js.rb
410
424
  - lib/crabfarm/adapters/browser/remote_webdriver.rb
411
425
  - lib/crabfarm/adapters/driver_wrapper/capybara.rb
412
- - lib/crabfarm/adapters/driver_wrapper/surfer.rb
426
+ - lib/crabfarm/adapters/driver_wrapper/pincers.rb
413
427
  - lib/crabfarm/adapters/driver_wrapper/watir.rb
414
428
  - lib/crabfarm/adapters/parser/nokogiri.rb
415
429
  - lib/crabfarm/adapters/parser/pdf_reader.rb
430
+ - lib/crabfarm/adapters/parser/pincers.rb
416
431
  - lib/crabfarm/assertion/context.rb
417
432
  - lib/crabfarm/assertion/fields.rb
418
433
  - lib/crabfarm/assertion/parsers.rb
@@ -428,9 +443,6 @@ files:
428
443
  - lib/crabfarm/crabtrap_context.rb
429
444
  - lib/crabfarm/crabtrap_runner.rb
430
445
  - lib/crabfarm/driver_pool.rb
431
- - lib/crabfarm/dsl/surfer/search_context.rb
432
- - lib/crabfarm/dsl/surfer/surf_context.rb
433
- - lib/crabfarm/dsl/surfer.rb
434
446
  - lib/crabfarm/engines/async_state_manager.rb
435
447
  - lib/crabfarm/engines/sync_state_manager.rb
436
448
  - lib/crabfarm/errors.rb
@@ -528,3 +540,4 @@ signing_key:
528
540
  specification_version: 4
529
541
  summary: Crabfarm crawler creation framework
530
542
  test_files: []
543
+ has_rdoc:
@@ -1,13 +0,0 @@
1
- require 'crabfarm/dsl/surfer'
2
-
3
- module Crabfarm
4
- module Adapters
5
- module DriverWrapper
6
- class Surfer
7
- def self.wrap(_driver)
8
- Crabfarm::Dsl::Surfer::SurfContext.new _driver
9
- end
10
- end
11
- end
12
- end
13
- end
@@ -1,152 +0,0 @@
1
- module Crabfarm
2
- module Dsl
3
- module Surfer
4
- class SearchContext
5
- include Enumerable
6
- extend Forwardable
7
-
8
- TIMEOUT = 10.0 # Default timeout for waiting operations
9
-
10
- attr_accessor :elements, :parent
11
-
12
- def_delegators :elements, :length, :count, :empty?
13
-
14
- def initialize(_elements, _parent)
15
- @elements = _elements
16
- @parent = _parent
17
- end
18
-
19
- def webdriver_elements
20
- @elements
21
- end
22
-
23
- def root
24
- @parent.root
25
- end
26
-
27
- def each
28
- elements.each { |el| yield child_context [el] }
29
- end
30
-
31
- def [](*args)
32
- if args[0].is_a? String or args[0].is_a? Symbol
33
- attribute args[0]
34
- else
35
- child_context Array(elements.send(:[],*args))
36
- end
37
- end
38
-
39
- def first
40
- if elements.first.nil? then nil else child_context [elements.first] end
41
- end
42
-
43
- def last
44
- if elements.last.nil? then nil else child_context [elements.last] end
45
- end
46
-
47
- def element!
48
- raise EmptySetError.new("This set is empty", self) if empty?
49
- elements.first
50
- end
51
-
52
- def classes
53
- wrap_errors { (element!['class'] || '').split(' ') }
54
- end
55
-
56
- def search(_selector=nil, _options={})
57
- _options[:css] = _selector if _selector
58
-
59
- wait_mode = _options.delete :wait
60
- if wait_mode
61
-
62
- # retrieve timeout
63
- timeout = _options.delete :timeout
64
- timeout = TIMEOUT if timeout.nil?
65
-
66
- # use a selenium timeout
67
- wrap_errors do
68
- wait = Selenium::WebDriver::Wait.new(timeout: timeout)
69
- wait.until do
70
- new_elements = search_elements _options
71
-
72
- # test wait condition
73
- ok = case wait_mode
74
- when :present then (new_elements.length > 0)
75
- when :visible then (new_elements.length > 0 and new_elements.first.displayed?)
76
- when :enabled then (new_elements.length > 0 and new_elements.first.displayed? and new_elements.first.enabled?)
77
- when :not_present then (new_elements.length == 0)
78
- when :not_visible then (not new_elements.any? { |e| e.displayed? })
79
- else
80
- raise SetupError.new "Invalid wait mode '#{wait_mode}'"
81
- end
82
-
83
- child_context new_elements if ok
84
- end
85
- end
86
- else
87
- child_context search_elements(_options)
88
- end
89
- end
90
-
91
- def fill(_value)
92
- wrap_errors do
93
- element!.clear
94
- element!.send_keys _value
95
- end
96
- end
97
-
98
- def to_html
99
- elements.map { |e| e['outerHTML'] }.join
100
- end
101
-
102
- # Any methods missing are forwarded to the main element (first).
103
- def method_missing(_method, *_args, &_block)
104
- wrap_errors do
105
- m = /^(.*)_all$/.match _method.to_s
106
- if m then
107
- return [] if empty?
108
- elements.map { |e| e.send(m[1], *_args, &_block) }
109
- else
110
- element!.send(_method, *_args, &_block)
111
- end
112
- end
113
- end
114
-
115
- def respond_to?(_method, _include_all=false)
116
- return true if super
117
- m = /^.*_all$/.match _method.to_s
118
- if m then
119
- return true if empty?
120
- elements.first.respond_to? m[1], _include_all
121
- else
122
- return true if empty?
123
- elements.first.respond_to? _method, _include_all
124
- end
125
- end
126
-
127
- private
128
-
129
- def child_context(_elements)
130
- SearchContext.new _elements, self
131
- end
132
-
133
- def wrap_errors
134
- begin
135
- yield
136
- rescue Selenium::WebDriver::Error::WebDriverError => e
137
- raise WebdriverError.new e, self
138
- end
139
- end
140
-
141
- def search_elements(_options)
142
- wrap_errors do
143
- elements.inject([]) do |r, element|
144
- r + element.find_elements(_options)
145
- end
146
- end
147
- end
148
-
149
- end
150
- end
151
- end
152
- end
@@ -1,42 +0,0 @@
1
- module Crabfarm
2
- module Dsl
3
- module Surfer
4
- class SurfContext < SearchContext
5
-
6
- attr_reader :driver
7
-
8
- def_delegators 'driver.navigate', :back, :forward, :refresh
9
-
10
- def initialize(_driver)
11
- super nil, self
12
- @driver = _driver
13
- end
14
-
15
- def root
16
- self
17
- end
18
-
19
- def elements
20
- [driver]
21
- end
22
-
23
- def to_html
24
- driver.page_source
25
- end
26
-
27
- def current_uri
28
- URI.parse driver.current_url
29
- end
30
-
31
- def cookies
32
- driver.manage.all_cookies
33
- end
34
-
35
- def goto(_url, _params=nil)
36
- _url += "?#{_params.to_query}" if _params
37
- driver.get(_url)
38
- end
39
- end
40
- end
41
- end
42
- end
@@ -1,22 +0,0 @@
1
- require 'crabfarm/dsl/surfer/search_context'
2
- require 'crabfarm/dsl/surfer/surf_context'
3
-
4
- module Crabfarm
5
- module Dsl
6
- module Surfer
7
-
8
- class Error < StandardError
9
- attr_reader :source
10
-
11
- def initialize(_message, _ctx)
12
- super _message
13
- @ctx = _ctx
14
- @source = _ctx.root.page_source rescue nil # cache page source for future reference
15
- end
16
- end
17
-
18
- class EmptySetError < Error; end
19
- class WebdriverError < Error; end
20
- end
21
- end
22
- end