crabfarm 0.7.9 → 0.7.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8a3f375503cc8b1d8107bfdb7df79ec9ce923852
4
- data.tar.gz: 7a0d201d17f5983f37426285e88f72f4538dee70
3
+ metadata.gz: 61b08ac4fbd03cce9915e677accfd155a32df06d
4
+ data.tar.gz: c6dfcfde7084e9a169c3b841b0db323f91e618f0
5
5
  SHA512:
6
- metadata.gz: a6b929e335feddffdeef3438a216978d0ae0b887b58d56f4c4cb031ddd70d680af26c04e2f08731b7a4b4372450cefd753ef29f32a57da7bea3f892931342600
7
- data.tar.gz: 758d3b2c5f11ed4f94cd4fa2b5d587c71d21035a0ed87870cafcd89d48e50ed2443e770c7386bd8d5140d4e4f014f46a51af1585c3ed2ff10e03ebaae7eb4e7e
6
+ metadata.gz: 1a6e29253f64e0c783b7ad257cbcd66f247b50310f275b3ac1384ced3c5fb8fba1942dfc64da72833c40c414e2251405d39f01ee40d1b1de2f0cd10dad873b86
7
+ data.tar.gz: 84ac0447cfdacf3bb6e04ea73b937a157a13954ba5a5325a10b60d61d8261b9a46315acdd38fd522daa7e592dc6375cfedece54fb6779ae2569f41b26b6198cc
@@ -7,9 +7,10 @@ module Crabfarm
7
7
 
8
8
  attr_accessor :config
9
9
 
10
- def initialize(_proxy=nil)
10
+ def initialize(_proxy = nil, _proxy_auth = nil)
11
11
  @config = load_driver_config
12
12
  @config[:proxy] = _proxy
13
+ @config[:proxy_auth] = _proxy_auth
13
14
  end
14
15
 
15
16
  def build_driver(_session_id)
@@ -4,10 +4,10 @@ module Crabfarm
4
4
  module Adapters
5
5
  module Browser
6
6
  class Chenso < Base
7
-
8
- def initialize(_proxy=nil)
7
+ def initialize(_proxy = nil, _proxy_auth = nil)
9
8
  @config = load_chenso_config
10
9
  @config[:proxy] = _proxy
10
+ @config[:proxy_auth] = _proxy_auth
11
11
  end
12
12
 
13
13
  def build_driver(_session_id)
@@ -22,10 +22,9 @@ module Crabfarm
22
22
 
23
23
  def load_chenso_config
24
24
  {
25
- # nothing for now
25
+ user_agent: Crabfarm.config.user_agent
26
26
  }
27
27
  end
28
-
29
28
  end
30
29
  end
31
30
  end
@@ -4,14 +4,12 @@ module Crabfarm
4
4
  module Adapters
5
5
  module Browser
6
6
  class Noop < Base
7
-
8
- def initialize(_proxy=nil)
7
+ def initialize(_proxy = nil, _proxy_user = nil)
9
8
  end
10
9
 
11
10
  def build_driver(_session_id)
12
11
  _session_id || :noop
13
12
  end
14
-
15
13
  end
16
14
  end
17
15
  end
@@ -10,6 +10,8 @@ module Crabfarm
10
10
  [:parser, :string, 'Default parser engine used by reducers'],
11
11
  [:log_path, :string, 'Path where logs should be stored'],
12
12
  [:proxy, :string, 'If given, a proxy is used to connect to the internet if driver supports it'],
13
+ [:proxy_auth, :string, 'Proxy authentication parameters as user:password'],
14
+ [:user_agent, :string, 'Allows overriding default driver user agent, only available in chenso'],
13
15
 
14
16
  # Webdriver configuration parameters
15
17
  [:webdriver_host, :string, 'Remote host, only available in driver: remote'],
@@ -56,6 +58,8 @@ module Crabfarm
56
58
  driver_factory: nil,
57
59
  log_path: nil,
58
60
  proxy: nil,
61
+ proxy_auth: nil,
62
+ user_agent: nil,
59
63
  webdriver_capabilities: nil,
60
64
  webdriver_host: 'localhost',
61
65
  webdriver_port: '8080',
@@ -87,8 +91,7 @@ module Crabfarm
87
91
  def crabtrap_config
88
92
  {
89
93
  bin_path: crabtrap_bin_path,
90
- log_level: crabtrap_log_level,
91
- proxy: proxy
94
+ log_level: crabtrap_log_level
92
95
  }
93
96
  end
94
97
 
@@ -48,7 +48,7 @@ module Crabfarm
48
48
 
49
49
  def init_browser_adapter
50
50
  if @browser_adapter.nil?
51
- @browser_adapter = build_browser_adapter proxy
51
+ @browser_adapter = build_browser_adapter proxy, proxy_auth
52
52
  @browser_adapter.prepare_driver_services
53
53
  end
54
54
  end
@@ -67,14 +67,18 @@ module Crabfarm
67
67
  @pool = nil
68
68
  end
69
69
 
70
- def build_browser_adapter(_proxy)
71
- Strategies.load(:browser, config.browser).new _proxy
70
+ def build_browser_adapter(_proxy, _proxy_auth)
71
+ Strategies.load(:browser, config.browser).new(_proxy, _proxy_auth)
72
72
  end
73
73
 
74
74
  def proxy
75
75
  config.proxy
76
76
  end
77
77
 
78
+ def proxy_auth
79
+ config.proxy_auth
80
+ end
81
+
78
82
  def config
79
83
  Crabfarm.config
80
84
  end
@@ -2,7 +2,6 @@ require 'crabfarm/crabtrap_runner'
2
2
 
3
3
  module Crabfarm
4
4
  class CrabtrapContext < Context
5
-
6
5
  attr_accessor :mode
7
6
 
8
7
  def initialize(_mode=:pass, _path=nil)
@@ -78,13 +77,15 @@ module Crabfarm
78
77
  end
79
78
 
80
79
  def proxy
81
- # just step over configuration proxy
82
80
  proxy_address
83
81
  end
84
82
 
83
+ def proxy_auth
84
+ nil
85
+ end
86
+
85
87
  def proxy_address
86
88
  "127.0.0.1:#{@port}"
87
89
  end
88
-
89
90
  end
90
91
  end
@@ -15,9 +15,13 @@ module Crabfarm
15
15
  "127.0.0.1:#{@manager.proxy_port}"
16
16
  end
17
17
 
18
+ def proxy_auth
19
+ nil
20
+ end
21
+
18
22
  private
19
23
 
20
- def build_browser_adapter(_proxy)
24
+ def build_browser_adapter(_proxy, _proxy_auth)
21
25
  # use a special browser adapter to override primary driver
22
26
  return BrowserAdapter.new @manager
23
27
  end
@@ -32,6 +32,7 @@ module Crabfarm
32
32
  path(_name, 'spec', 'mementos', '.gitkeep').render('dot_gitkeep')
33
33
  path(_name, 'spec', 'integration', '.gitkeep').render('dot_gitkeep')
34
34
  path(_name, 'logs', '.gitkeep').render('dot_gitkeep')
35
+ path(_name, 'README.md').render('README.md', binding)
35
36
  end
36
37
  end
37
38
 
@@ -1,61 +1,50 @@
1
1
  require 'selenium-webdriver'
2
2
 
3
3
  module Crabfarm
4
- module Support
5
- module WebdriverFactory
6
- extend self
4
+ module Support
5
+ module WebdriverFactory
6
+ extend self
7
7
 
8
- def build_chrome_driver(_options={})
9
- capabilities = Selenium::WebDriver::Remote::Capabilities.chrome
8
+ def build_chrome_driver(_options={})
9
+ capabilities = Selenium::WebDriver::Remote::Capabilities.chrome
10
+ capabilities.proxy = build_proxy(_options) if _options[:proxy].present?
10
11
 
11
- if _options[:proxy].present?
12
- capabilities.proxy = Selenium::WebDriver::Proxy.new({
13
- :http => _options[:proxy],
14
- :ssl => _options[:proxy]
15
- })
12
+ setup_webdriver Selenium::WebDriver.for(:chrome, detach: false, desired_capabilities: capabilities), _options
16
13
  end
17
14
 
18
- common_setup Selenium::WebDriver.for(:chrome, detach: false, desired_capabilities: capabilities), _options
19
- end
20
-
21
- def build_firefox_driver(_options={})
22
- capabilities = Selenium::WebDriver::Remote::Capabilities.firefox
15
+ def build_firefox_driver(_options={})
16
+ capabilities = Selenium::WebDriver::Remote::Capabilities.firefox
17
+ capabilities.proxy = build_proxy(_options) if _options[:proxy].present?
23
18
 
24
- if _options[:proxy].present?
25
- capabilities.proxy = Selenium::WebDriver::Proxy.new({
26
- :http => _options[:proxy],
27
- :ssl => _options[:proxy]
28
- })
19
+ setup_webdriver Selenium::WebDriver.for(:firefox, desired_capabilities: capabilities), _options
29
20
  end
30
21
 
31
- common_setup Selenium::WebDriver.for(:firefox, desired_capabilities: capabilities), _options
32
- end
22
+ def build_remote_driver(_options={})
23
+ client = Selenium::WebDriver::Remote::Http::Default.new
24
+ client.timeout = _options[:remote_timeout]
25
+ client.proxy = build_proxy(_options) if _options[:proxy].present?
26
+
27
+ setup_webdriver(Selenium::WebDriver.for(:remote, {
28
+ :url => _options[:remote_host],
29
+ :http_client => client,
30
+ :desired_capabilities => _options[:capabilities] || Selenium::WebDriver::Remote::Capabilities.firefox
31
+ }), _options)
32
+ end
33
33
 
34
- def build_remote_driver(_options={})
35
- client = Selenium::WebDriver::Remote::Http::Default.new
36
- client.timeout = _options[:remote_timeout]
34
+ private
37
35
 
38
- if _options[:proxy].present?
39
- client.proxy = Selenium::WebDriver::Proxy.new({
36
+ def build_proxy(_options)
37
+ # TODO: support authentication
38
+ Selenium::WebDriver::Proxy.new({
40
39
  :http => _options[:proxy],
41
40
  :ssl => _options[:proxy]
42
41
  })
43
42
  end
44
43
 
45
- common_setup(Selenium::WebDriver.for(:remote, {
46
- :url => _options[:remote_host],
47
- :http_client => client,
48
- :desired_capabilities => _options[:capabilities] || Selenium::WebDriver::Remote::Capabilities.firefox
49
- }), _options)
50
- end
51
-
52
- private
53
-
54
- def common_setup(_driver, _options)
55
- _driver.manage.window.resize_to(_options[:window_width], _options[:window_height]) rescue nil
56
- return _driver
44
+ def setup_webdriver(_driver, _options)
45
+ _driver.manage.window.resize_to(_options[:window_width], _options[:window_height]) rescue nil
46
+ return _driver
47
+ end
57
48
  end
58
-
59
49
  end
60
- end
61
50
  end
@@ -12,6 +12,12 @@ set_log_path 'logs'
12
12
  # Set crawler proxy, this setting is overrided when running the crawler in crabfarm.io
13
13
  # set_proxy 'the.proxy.address'
14
14
 
15
+ # Set crawler proxy authentication, this setting is overrided when running the crawler in crabfarm.io
16
+ # set_proxy_auth 'user:password'
17
+
18
+ # Set the crawler's user agent string
19
+ # set_user_agent 'MyCrawler'
20
+
15
21
  # General webdriver configuration
16
22
  ########################################
17
23
 
@@ -1,7 +1,7 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
3
  gem "crabfarm", '<%= version %>'
4
- gem "pincers", '~> 0.7'
4
+ gem "pincers", '~> 0.7', ">= 0.7.11"
5
5
 
6
6
  # Comment this if not using a nokogiri based parser or browser
7
7
  gem 'nokogiri', "~> 1.6"
@@ -0,0 +1,44 @@
1
+ ![Crabfarm](http://crabfarm.io/img/teaser-bg-3.png)
2
+
3
+ ## <%= name %>
4
+
5
+ This is a crawler created using the [Crabfarm framework](http://crabfarm.io/#/teaser). It is composed of navigators which allow you to access different sections of a website to extract infomation or perform actions.
6
+
7
+ To learn more about how this crawler was put together read the [documentation](http://github.com/platanus/crabfarm-gem).
8
+
9
+ ### Deploy
10
+
11
+ To deploy <%= name %> first you must have an active account for the [Crabfarm Grid](https://grid.crabfarm.io).
12
+
13
+ If you already have an account, simply do:
14
+ ```shell
15
+ crabfarm p
16
+ ```
17
+
18
+ This will ask you for you credentials if it haven't and upload it to the grid.
19
+
20
+ ### Use
21
+
22
+ To use the crawler from Ruby you can install cangrejo-gem:
23
+
24
+ ```
25
+ gem install 'cangrejo'
26
+ ```
27
+
28
+ and then
29
+
30
+ ```ruby
31
+ require 'cangrejo'
32
+ Cangrejo.connect 'org/repo' do |session|
33
+ session.navigate(:navigator_name, parameter_name: 'hello')
34
+ end
35
+ ```
36
+
37
+ For more information, visit the [cangrejo-gem repository](https://github.com/platanus/cangrejo-gem).
38
+
39
+ If you prefer NodeJS instead, there is [Camaron](https://github.com/platanus/camaron)
40
+
41
+
42
+ ### Navigators
43
+
44
+ Take a look at the [usage examples](tree/master/spec/navigators)
@@ -1,3 +1,3 @@
1
1
  module Crabfarm
2
- VERSION = "0.7.9"
2
+ VERSION = "0.7.11"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crabfarm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.9
4
+ version: 0.7.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-05 00:00:00.000000000 Z
11
+ date: 2016-05-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -223,6 +223,9 @@ dependencies:
223
223
  - - ~>
224
224
  - !ruby/object:Gem::Version
225
225
  version: '0.7'
226
+ - - '>='
227
+ - !ruby/object:Gem::Version
228
+ version: 0.7.11
226
229
  type: :development
227
230
  prerelease: false
228
231
  version_requirements: !ruby/object:Gem::Requirement
@@ -230,6 +233,9 @@ dependencies:
230
233
  - - ~>
231
234
  - !ruby/object:Gem::Version
232
235
  version: '0.7'
236
+ - - '>='
237
+ - !ruby/object:Gem::Version
238
+ version: 0.7.11
233
239
  - !ruby/object:Gem::Dependency
234
240
  name: bundler
235
241
  requirement: !ruby/object:Gem::Requirement
@@ -535,6 +541,7 @@ files:
535
541
  - lib/crabfarm/support/webdriver_factory.rb
536
542
  - lib/crabfarm/templates/Crabfile.erb
537
543
  - lib/crabfarm/templates/Gemfile.erb
544
+ - lib/crabfarm/templates/README.md.erb
538
545
  - lib/crabfarm/templates/boot.rb.erb
539
546
  - lib/crabfarm/templates/crabfarm_bin.erb
540
547
  - lib/crabfarm/templates/dot_crabfarm.erb
@@ -576,7 +583,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
576
583
  version: '0'
577
584
  requirements: []
578
585
  rubyforge_project:
579
- rubygems_version: 2.4.8
586
+ rubygems_version: 2.6.1
580
587
  signing_key:
581
588
  specification_version: 4
582
589
  summary: Crabfarm is a TDD oriented web scrapping framework