crabfarm 0.7.9 → 0.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8a3f375503cc8b1d8107bfdb7df79ec9ce923852
4
- data.tar.gz: 7a0d201d17f5983f37426285e88f72f4538dee70
3
+ metadata.gz: 61b08ac4fbd03cce9915e677accfd155a32df06d
4
+ data.tar.gz: c6dfcfde7084e9a169c3b841b0db323f91e618f0
5
5
  SHA512:
6
- metadata.gz: a6b929e335feddffdeef3438a216978d0ae0b887b58d56f4c4cb031ddd70d680af26c04e2f08731b7a4b4372450cefd753ef29f32a57da7bea3f892931342600
7
- data.tar.gz: 758d3b2c5f11ed4f94cd4fa2b5d587c71d21035a0ed87870cafcd89d48e50ed2443e770c7386bd8d5140d4e4f014f46a51af1585c3ed2ff10e03ebaae7eb4e7e
6
+ metadata.gz: 1a6e29253f64e0c783b7ad257cbcd66f247b50310f275b3ac1384ced3c5fb8fba1942dfc64da72833c40c414e2251405d39f01ee40d1b1de2f0cd10dad873b86
7
+ data.tar.gz: 84ac0447cfdacf3bb6e04ea73b937a157a13954ba5a5325a10b60d61d8261b9a46315acdd38fd522daa7e592dc6375cfedece54fb6779ae2569f41b26b6198cc
@@ -7,9 +7,10 @@ module Crabfarm
7
7
 
8
8
  attr_accessor :config
9
9
 
10
- def initialize(_proxy=nil)
10
+ def initialize(_proxy = nil, _proxy_auth = nil)
11
11
  @config = load_driver_config
12
12
  @config[:proxy] = _proxy
13
+ @config[:proxy_auth] = _proxy_auth
13
14
  end
14
15
 
15
16
  def build_driver(_session_id)
@@ -4,10 +4,10 @@ module Crabfarm
4
4
  module Adapters
5
5
  module Browser
6
6
  class Chenso < Base
7
-
8
- def initialize(_proxy=nil)
7
+ def initialize(_proxy = nil, _proxy_auth = nil)
9
8
  @config = load_chenso_config
10
9
  @config[:proxy] = _proxy
10
+ @config[:proxy_auth] = _proxy_auth
11
11
  end
12
12
 
13
13
  def build_driver(_session_id)
@@ -22,10 +22,9 @@ module Crabfarm
22
22
 
23
23
  def load_chenso_config
24
24
  {
25
- # nothing for now
25
+ user_agent: Crabfarm.config.user_agent
26
26
  }
27
27
  end
28
-
29
28
  end
30
29
  end
31
30
  end
@@ -4,14 +4,12 @@ module Crabfarm
4
4
  module Adapters
5
5
  module Browser
6
6
  class Noop < Base
7
-
8
- def initialize(_proxy=nil)
7
+ def initialize(_proxy = nil, _proxy_user = nil)
9
8
  end
10
9
 
11
10
  def build_driver(_session_id)
12
11
  _session_id || :noop
13
12
  end
14
-
15
13
  end
16
14
  end
17
15
  end
@@ -10,6 +10,8 @@ module Crabfarm
10
10
  [:parser, :string, 'Default parser engine used by reducers'],
11
11
  [:log_path, :string, 'Path where logs should be stored'],
12
12
  [:proxy, :string, 'If given, a proxy is used to connect to the internet if driver supports it'],
13
+ [:proxy_auth, :string, 'Proxy authentication parameters as user:password'],
14
+ [:user_agent, :string, 'Allows overriding default driver user agent, only available in chenso'],
13
15
 
14
16
  # Webdriver configuration parameters
15
17
  [:webdriver_host, :string, 'Remote host, only available in driver: remote'],
@@ -56,6 +58,8 @@ module Crabfarm
56
58
  driver_factory: nil,
57
59
  log_path: nil,
58
60
  proxy: nil,
61
+ proxy_auth: nil,
62
+ user_agent: nil,
59
63
  webdriver_capabilities: nil,
60
64
  webdriver_host: 'localhost',
61
65
  webdriver_port: '8080',
@@ -87,8 +91,7 @@ module Crabfarm
87
91
  def crabtrap_config
88
92
  {
89
93
  bin_path: crabtrap_bin_path,
90
- log_level: crabtrap_log_level,
91
- proxy: proxy
94
+ log_level: crabtrap_log_level
92
95
  }
93
96
  end
94
97
 
@@ -48,7 +48,7 @@ module Crabfarm
48
48
 
49
49
  def init_browser_adapter
50
50
  if @browser_adapter.nil?
51
- @browser_adapter = build_browser_adapter proxy
51
+ @browser_adapter = build_browser_adapter proxy, proxy_auth
52
52
  @browser_adapter.prepare_driver_services
53
53
  end
54
54
  end
@@ -67,14 +67,18 @@ module Crabfarm
67
67
  @pool = nil
68
68
  end
69
69
 
70
- def build_browser_adapter(_proxy)
71
- Strategies.load(:browser, config.browser).new _proxy
70
+ def build_browser_adapter(_proxy, _proxy_auth)
71
+ Strategies.load(:browser, config.browser).new(_proxy, _proxy_auth)
72
72
  end
73
73
 
74
74
  def proxy
75
75
  config.proxy
76
76
  end
77
77
 
78
+ def proxy_auth
79
+ config.proxy_auth
80
+ end
81
+
78
82
  def config
79
83
  Crabfarm.config
80
84
  end
@@ -2,7 +2,6 @@ require 'crabfarm/crabtrap_runner'
2
2
 
3
3
  module Crabfarm
4
4
  class CrabtrapContext < Context
5
-
6
5
  attr_accessor :mode
7
6
 
8
7
  def initialize(_mode=:pass, _path=nil)
@@ -78,13 +77,15 @@ module Crabfarm
78
77
  end
79
78
 
80
79
  def proxy
81
- # just step over configuration proxy
82
80
  proxy_address
83
81
  end
84
82
 
83
+ def proxy_auth
84
+ nil
85
+ end
86
+
85
87
  def proxy_address
86
88
  "127.0.0.1:#{@port}"
87
89
  end
88
-
89
90
  end
90
91
  end
@@ -15,9 +15,13 @@ module Crabfarm
15
15
  "127.0.0.1:#{@manager.proxy_port}"
16
16
  end
17
17
 
18
+ def proxy_auth
19
+ nil
20
+ end
21
+
18
22
  private
19
23
 
20
- def build_browser_adapter(_proxy)
24
+ def build_browser_adapter(_proxy, _proxy_auth)
21
25
  # use a special browser adapter to override primary driver
22
26
  return BrowserAdapter.new @manager
23
27
  end
@@ -32,6 +32,7 @@ module Crabfarm
32
32
  path(_name, 'spec', 'mementos', '.gitkeep').render('dot_gitkeep')
33
33
  path(_name, 'spec', 'integration', '.gitkeep').render('dot_gitkeep')
34
34
  path(_name, 'logs', '.gitkeep').render('dot_gitkeep')
35
+ path(_name, 'README.md').render('README.md', binding)
35
36
  end
36
37
  end
37
38
 
@@ -1,61 +1,50 @@
1
1
  require 'selenium-webdriver'
2
2
 
3
3
  module Crabfarm
4
- module Support
5
- module WebdriverFactory
6
- extend self
4
+ module Support
5
+ module WebdriverFactory
6
+ extend self
7
7
 
8
- def build_chrome_driver(_options={})
9
- capabilities = Selenium::WebDriver::Remote::Capabilities.chrome
8
+ def build_chrome_driver(_options={})
9
+ capabilities = Selenium::WebDriver::Remote::Capabilities.chrome
10
+ capabilities.proxy = build_proxy(_options) if _options[:proxy].present?
10
11
 
11
- if _options[:proxy].present?
12
- capabilities.proxy = Selenium::WebDriver::Proxy.new({
13
- :http => _options[:proxy],
14
- :ssl => _options[:proxy]
15
- })
12
+ setup_webdriver Selenium::WebDriver.for(:chrome, detach: false, desired_capabilities: capabilities), _options
16
13
  end
17
14
 
18
- common_setup Selenium::WebDriver.for(:chrome, detach: false, desired_capabilities: capabilities), _options
19
- end
20
-
21
- def build_firefox_driver(_options={})
22
- capabilities = Selenium::WebDriver::Remote::Capabilities.firefox
15
+ def build_firefox_driver(_options={})
16
+ capabilities = Selenium::WebDriver::Remote::Capabilities.firefox
17
+ capabilities.proxy = build_proxy(_options) if _options[:proxy].present?
23
18
 
24
- if _options[:proxy].present?
25
- capabilities.proxy = Selenium::WebDriver::Proxy.new({
26
- :http => _options[:proxy],
27
- :ssl => _options[:proxy]
28
- })
19
+ setup_webdriver Selenium::WebDriver.for(:firefox, desired_capabilities: capabilities), _options
29
20
  end
30
21
 
31
- common_setup Selenium::WebDriver.for(:firefox, desired_capabilities: capabilities), _options
32
- end
22
+ def build_remote_driver(_options={})
23
+ client = Selenium::WebDriver::Remote::Http::Default.new
24
+ client.timeout = _options[:remote_timeout]
25
+ client.proxy = build_proxy(_options) if _options[:proxy].present?
26
+
27
+ setup_webdriver(Selenium::WebDriver.for(:remote, {
28
+ :url => _options[:remote_host],
29
+ :http_client => client,
30
+ :desired_capabilities => _options[:capabilities] || Selenium::WebDriver::Remote::Capabilities.firefox
31
+ }), _options)
32
+ end
33
33
 
34
- def build_remote_driver(_options={})
35
- client = Selenium::WebDriver::Remote::Http::Default.new
36
- client.timeout = _options[:remote_timeout]
34
+ private
37
35
 
38
- if _options[:proxy].present?
39
- client.proxy = Selenium::WebDriver::Proxy.new({
36
+ def build_proxy(_options)
37
+ # TODO: support authentication
38
+ Selenium::WebDriver::Proxy.new({
40
39
  :http => _options[:proxy],
41
40
  :ssl => _options[:proxy]
42
41
  })
43
42
  end
44
43
 
45
- common_setup(Selenium::WebDriver.for(:remote, {
46
- :url => _options[:remote_host],
47
- :http_client => client,
48
- :desired_capabilities => _options[:capabilities] || Selenium::WebDriver::Remote::Capabilities.firefox
49
- }), _options)
50
- end
51
-
52
- private
53
-
54
- def common_setup(_driver, _options)
55
- _driver.manage.window.resize_to(_options[:window_width], _options[:window_height]) rescue nil
56
- return _driver
44
+ def setup_webdriver(_driver, _options)
45
+ _driver.manage.window.resize_to(_options[:window_width], _options[:window_height]) rescue nil
46
+ return _driver
47
+ end
57
48
  end
58
-
59
49
  end
60
- end
61
50
  end
@@ -12,6 +12,12 @@ set_log_path 'logs'
12
12
  # Set crawler proxy, this setting is overrided when running the crawler in crabfarm.io
13
13
  # set_proxy 'the.proxy.address'
14
14
 
15
+ # Set crawler proxy authentication, this setting is overrided when running the crawler in crabfarm.io
16
+ # set_proxy_auth 'user:password'
17
+
18
+ # Set the crawler's user agent string
19
+ # set_user_agent 'MyCrawler'
20
+
15
21
  # General webdriver configuration
16
22
  ########################################
17
23
 
@@ -1,7 +1,7 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
3
  gem "crabfarm", '<%= version %>'
4
- gem "pincers", '~> 0.7'
4
+ gem "pincers", '~> 0.7', ">= 0.7.11"
5
5
 
6
6
  # Comment this if not using a nokogiri based parser or browser
7
7
  gem 'nokogiri', "~> 1.6"
@@ -0,0 +1,44 @@
1
+ ![Crabfarm](http://crabfarm.io/img/teaser-bg-3.png)
2
+
3
+ ## <%= name %>
4
+
5
+ This is a crawler created using the [Crabfarm framework](http://crabfarm.io/#/teaser). It is composed of navigators which allow you to access different sections of a website to extract infomation or perform actions.
6
+
7
+ To learn more about how this crawler was put together read the [documentation](http://github.com/platanus/crabfarm-gem).
8
+
9
+ ### Deploy
10
+
11
+ To deploy <%= name %> first you must have an active account for the [Crabfarm Grid](https://grid.crabfarm.io).
12
+
13
+ If you already have an account, simply do:
14
+ ```shell
15
+ crabfarm p
16
+ ```
17
+
18
+ This will ask you for you credentials if it haven't and upload it to the grid.
19
+
20
+ ### Use
21
+
22
+ To use the crawler from Ruby you can install cangrejo-gem:
23
+
24
+ ```
25
+ gem install 'cangrejo'
26
+ ```
27
+
28
+ and then
29
+
30
+ ```ruby
31
+ require 'cangrejo'
32
+ Cangrejo.connect 'org/repo' do |session|
33
+ session.navigate(:navigator_name, parameter_name: 'hello')
34
+ end
35
+ ```
36
+
37
+ For more information, visit the [cangrejo-gem repository](https://github.com/platanus/cangrejo-gem).
38
+
39
+ If you prefer NodeJS instead, there is [Camaron](https://github.com/platanus/camaron)
40
+
41
+
42
+ ### Navigators
43
+
44
+ Take a look at the [usage examples](tree/master/spec/navigators)
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.7.9"
2
+ VERSION = "0.7.11"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.9
4
+ version: 0.7.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-05 00:00:00.000000000 Z
11
+ date: 2016-05-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -223,6 +223,9 @@ dependencies:
223
223
  - - ~>
224
224
  - !ruby/object:Gem::Version
225
225
  version: '0.7'
226
+ - - '>='
227
+ - !ruby/object:Gem::Version
228
+ version: 0.7.11
226
229
  type: :development
227
230
  prerelease: false
228
231
  version_requirements: !ruby/object:Gem::Requirement
@@ -230,6 +233,9 @@ dependencies:
230
233
  - - ~>
231
234
  - !ruby/object:Gem::Version
232
235
  version: '0.7'
236
+ - - '>='
237
+ - !ruby/object:Gem::Version
238
+ version: 0.7.11
233
239
  - !ruby/object:Gem::Dependency
234
240
  name: bundler
235
241
  requirement: !ruby/object:Gem::Requirement
@@ -535,6 +541,7 @@ files:
535
541
  - lib/crabfarm/support/webdriver_factory.rb
536
542
  - lib/crabfarm/templates/Crabfile.erb
537
543
  - lib/crabfarm/templates/Gemfile.erb
544
+ - lib/crabfarm/templates/README.md.erb
538
545
  - lib/crabfarm/templates/boot.rb.erb
539
546
  - lib/crabfarm/templates/crabfarm_bin.erb
540
547
  - lib/crabfarm/templates/dot_crabfarm.erb
@@ -576,7 +583,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
576
583
  version: '0'
577
584
  requirements: []
578
585
  rubyforge_project:
579
- rubygems_version: 2.4.8
586
+ rubygems_version: 2.6.1
580
587
  signing_key:
581
588
  specification_version: 4
582
589
  summary: Crabfarm is a TDD oriented web scrapping framework