capybara_crawler 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/README.md +53 -0
- data/Rakefile +18 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/capybara_crawler.gemspec +32 -0
- data/doc/capybara-cheatsheet.md +204 -0
- data/examples/custom_user_agent.rb +15 -0
- data/examples/google_search.rb +41 -0
- data/lib/capybara_crawler.rb +6 -0
- data/lib/capybara_crawler/crawler.rb +123 -0
- data/lib/capybara_crawler/version.rb +3 -0
- metadata +199 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 12bca6bf174ac78ecc5a66200ea7c282a084ce8a
|
4
|
+
data.tar.gz: 583e3743cbbce4ea35b9ed8a0d8005e73851915b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f07c9b1cc4062a8ef90a6c927f9b24f6a82fea50fe7d50c5d473db3bff231f84a9fb9c05d1a960438c8efd176e4590e0d59e3d2c863085900a703079873b303b
|
7
|
+
data.tar.gz: cf53828a6354c39edb2695e0f01052b478e6f7e6216cdcb8c8d7517f1f03f74c1fcd7c81d412c16b744ccfb02898dc67c89520761b1e5834f6d466fe47ab28a0
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# CapybaraCrawler
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/capybara_crawler`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
TODO: Delete this and the text above, and describe your gem
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'capybara_crawler'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install capybara_crawler
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
see `examples` and `spec/capybara_crawler_spec.rb`
|
26
|
+
|
27
|
+
## More
|
28
|
+
|
29
|
+
### Capybara cheatsheet
|
30
|
+
|
31
|
+
read `doc/capybara-cheatsheet.md`
|
32
|
+
|
33
|
+
## Dependencies
|
34
|
+
|
35
|
+
### Capybara Driver: poltergeist
|
36
|
+
|
37
|
+
Poltergeist is a driver for Capybara that allows you to run your tests on a headless WebKit browser, provided by PhantomJS.
|
38
|
+
https://github.com/teampoltergeist/poltergeist
|
39
|
+
|
40
|
+
### PhantomJS
|
41
|
+
PhantomJS is a headless WebKit scriptable with a JavaScript API.
|
42
|
+
http://phantomjs.org/
|
43
|
+
|
44
|
+
## Development
|
45
|
+
|
46
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
47
|
+
|
48
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
49
|
+
|
50
|
+
## Contributing
|
51
|
+
|
52
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/rainchen/capybara_crawler.
|
53
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task :default => :spec
|
7
|
+
|
8
|
+
namespace :tmp do
|
9
|
+
desc "Clear capybara files from tmp/"
|
10
|
+
task clear: ["tmp:capybara:clear"]
|
11
|
+
|
12
|
+
namespace :capybara do
|
13
|
+
# desc "Clears all files and directories in tmp/capybara"
|
14
|
+
task :clear do
|
15
|
+
rm_rf Dir["tmp/capybara/[^.]*"], verbose: false
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "capybara_crawler"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'capybara_crawler/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "capybara_crawler"
|
8
|
+
spec.version = CapybaraCrawler::VERSION
|
9
|
+
spec.authors = ["RainChen"]
|
10
|
+
spec.email = ["hirainchen@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{a simple web crawler using DSL of capybara}
|
13
|
+
spec.description = %q{a simple web crawler using DSL of capybara}
|
14
|
+
spec.homepage = "https://github.com/rainchen/capybara_crawler"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
+
spec.bindir = "exe"
|
18
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "capybara"
|
22
|
+
spec.add_dependency "poltergeist"
|
23
|
+
spec.add_dependency "poltergeist-suppressor"
|
24
|
+
spec.add_dependency "capybara-user_agent"
|
25
|
+
spec.add_dependency "capybara-mechanize"
|
26
|
+
|
27
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
28
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
29
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
30
|
+
spec.add_development_dependency "byebug"
|
31
|
+
spec.add_development_dependency "launchy"
|
32
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
# Capybara cheatsheet
|
2
|
+
|
3
|
+
basing on https://gist.github.com/tomas-stefano/6652111
|
4
|
+
|
5
|
+
## Capybara Actions
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
# Anchor
|
9
|
+
click_link 'Save'
|
10
|
+
|
11
|
+
# Button
|
12
|
+
click_button 'awesome'
|
13
|
+
|
14
|
+
# Both above
|
15
|
+
click_link_or_button 'Save'
|
16
|
+
|
17
|
+
# `click_on` alias for click_link_or_button, shorter yet unambiguous
|
18
|
+
click_on('Link Text') # clicks on either links or buttons
|
19
|
+
click_on('Button Value')
|
20
|
+
|
21
|
+
# Text (area) field
|
22
|
+
fill_in 'Name or id', with: 'Content'
|
23
|
+
|
24
|
+
# Checkbox
|
25
|
+
check 'Content'
|
26
|
+
uncheck 'Content'
|
27
|
+
|
28
|
+
# Radio button
|
29
|
+
choose 'Content'
|
30
|
+
|
31
|
+
# Select option from select tag
|
32
|
+
select 'Option', from: 'Label'
|
33
|
+
|
34
|
+
# File input
|
35
|
+
attach_file Rails.root.join('spec/fixture/some_file.png')
|
36
|
+
```
|
37
|
+
|
38
|
+
## Capybara Finders
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
page.all(:xpath, '//a')
|
42
|
+
|
43
|
+
page.first(:xpath, '//a')
|
44
|
+
|
45
|
+
page.find('//textarea[@id="additional_newline"]')
|
46
|
+
|
47
|
+
page.find(:xpath, "//input[@id='form_pets_dog']")['checked']
|
48
|
+
# => true
|
49
|
+
|
50
|
+
page.find(:css, '#with_focus_event').trigger(:focus)
|
51
|
+
|
52
|
+
page.find(:css,'.wrapper').hover
|
53
|
+
|
54
|
+
page.find_field("test_field").value
|
55
|
+
# => 'blah'
|
56
|
+
|
57
|
+
page.find_by_id('red').tag_name
|
58
|
+
# => 'a'
|
59
|
+
|
60
|
+
# finds invisible elements when false
|
61
|
+
page.find_by_id("hidden_via_ancestor", visible: false)
|
62
|
+
|
63
|
+
page.find_button('What an Awesome')[:value]
|
64
|
+
# => 'awesome'
|
65
|
+
|
66
|
+
page.find_link('abo').text
|
67
|
+
# => 'labore'
|
68
|
+
|
69
|
+
page.find_link('other')[:href]
|
70
|
+
# => '/some_uri'
|
71
|
+
```
|
72
|
+
|
73
|
+
**Note:** `find` will wait for an element to appear on the page, as explained in the Ajax section. If the element does not appear it will raise an error.
|
74
|
+
|
75
|
+
**Note:** In XPath the expression `//` means something very specific, and it might not be what you think. Contrary to common belief, `//` means "anywhere in the document" not "anywhere in the current context".
|
76
|
+
|
77
|
+
|
78
|
+
## Capybara Scoped Finder `within`
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
within(search_form) do
|
82
|
+
fill_in 'Name', with: 'iOS 7'
|
83
|
+
click_button 'Search'
|
84
|
+
end
|
85
|
+
|
86
|
+
def search_form
|
87
|
+
'.search_form'
|
88
|
+
end
|
89
|
+
|
90
|
+
within_fieldset("villain_fieldset") do
|
91
|
+
# ...
|
92
|
+
end
|
93
|
+
|
94
|
+
within_table("some_table") do
|
95
|
+
# ...
|
96
|
+
end
|
97
|
+
|
98
|
+
# Execute the given block within the given iframe using given frame name or index.
|
99
|
+
#
|
100
|
+
within_frame('some_frame') do
|
101
|
+
end
|
102
|
+
|
103
|
+
save_page
|
104
|
+
|
105
|
+
# You need to install launchy gem.
|
106
|
+
save_and_open_page
|
107
|
+
```
|
108
|
+
|
109
|
+
## Capybara Common
|
110
|
+
|
111
|
+
```ruby
|
112
|
+
visit("http://google.com")
|
113
|
+
|
114
|
+
page.current_url
|
115
|
+
|
116
|
+
# Execute the given script, not returning a result. This is useful for scripts that return
|
117
|
+
# complex objects, such as jQuery statements. +execute_script+ should be used over
|
118
|
+
# +evaluate_script+ whenever possible.
|
119
|
+
#
|
120
|
+
page.execute_script("$('#change').text('Funky Doodle')")
|
121
|
+
|
122
|
+
# Evaluate the given JavaScript and return the result. Be careful when using this with
|
123
|
+
# scripts that return complex objects, such as jQuery statements. +execute_script+ might
|
124
|
+
# be a better alternative.
|
125
|
+
#
|
126
|
+
page.evaluate_script("1+3")
|
127
|
+
# => 4
|
128
|
+
|
129
|
+
using_wait_time 6 do
|
130
|
+
# ... Changed Capybara.default_wait_time in this block scope.
|
131
|
+
end
|
132
|
+
```
|
133
|
+
|
134
|
+
## Capybara Matchers
|
135
|
+
|
136
|
+
```ruby
|
137
|
+
expect(page).to have_content("Some Content")
|
138
|
+
expect(page).to have_no_content("Some Content")
|
139
|
+
|
140
|
+
# True if there is a anchor tag with text matching regex
|
141
|
+
expect(page).to have_xpath("//a")
|
142
|
+
expect(page).to have_xpath("//a",:href => "google.com")
|
143
|
+
expect(page).to have_xpath("//a[@href => 'google.com']")
|
144
|
+
expect(page).to have_xpath("//a[contains(.,'some string')]")
|
145
|
+
expect(page).to have_xpath("//p//a", :text => /re[dab]i/i, :count => 1)
|
146
|
+
|
147
|
+
# can take both xpath and css as input and can take arguments similar to both have_css and have_xpath
|
148
|
+
expect(page).to have_selector(:xpath, "//p/h1")
|
149
|
+
expect(page).to have_selector(:css, "p a#post_edit_path")
|
150
|
+
|
151
|
+
expect(page).to have_css("input#post_title")
|
152
|
+
expect(page).to have_css("input#post_title", :value => "Capybara cheatsheet")
|
153
|
+
|
154
|
+
# True if there are 3 input tags in response
|
155
|
+
expect(page).to have_css("input", :count => 3)
|
156
|
+
|
157
|
+
# True if there or fewer or equal to 3 input tags
|
158
|
+
expect(page).to have_css("input", :maximum => 3)
|
159
|
+
|
160
|
+
# True if there are minimum of 3 input tags
|
161
|
+
expect(page).to have_css("input", :minimum => 3)
|
162
|
+
|
163
|
+
# True if there 1 to 3 input tags
|
164
|
+
expect(page).to have_css("input", :between => 1..3)
|
165
|
+
|
166
|
+
# True if there is a anchor tag with text hello
|
167
|
+
expect(page).to have_css("p a", :text => "hello")
|
168
|
+
expect(page).to have_css("p a", :text => /[hH]ello(.+)/i)
|
169
|
+
|
170
|
+
# For making capybara to take css as default selector
|
171
|
+
Capybara.default_selector = :css
|
172
|
+
|
173
|
+
# checks for the presence of the input tag
|
174
|
+
expect(page).to have_selector("input")
|
175
|
+
|
176
|
+
# checks for input tag with value
|
177
|
+
expect(page).to have_selector("input", :value =>"Post Title")
|
178
|
+
|
179
|
+
expect(page).to have_no_selector("input")
|
180
|
+
|
181
|
+
# For making capybara to take css as default selector
|
182
|
+
Capybara.default_selector = :xpath
|
183
|
+
# checks for the presence of the input tag
|
184
|
+
expect(page).to have_selector("//input")
|
185
|
+
|
186
|
+
# checks for input tag with value
|
187
|
+
expect(page).to have_selector("//input", :value =>"Post Title")
|
188
|
+
|
189
|
+
# checks for presence of a input field named FirstName in a form
|
190
|
+
expect(page).to have_field("FirstName")
|
191
|
+
|
192
|
+
expect(page).to have_field("FirstName", :value => "Rambo")
|
193
|
+
expect(page).to have_field("FirstName", :with => "Rambo")
|
194
|
+
|
195
|
+
expect(page).to have_link("Foo")
|
196
|
+
expect(page).to have_link("Foo", :href=>"googl.com")
|
197
|
+
expect(page).to have_no_link("Foo", :href=>"google.com")
|
198
|
+
|
199
|
+
|
200
|
+
# check page content
|
201
|
+
# have_title matcher now supports :wait option
|
202
|
+
expect(page).to have_title("changed title")
|
203
|
+
expect(page).to have_text("text in page.text")
|
204
|
+
```
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# an example for how to custom user agent for capybara_crawler
|
2
|
+
class CustomUserAgent < CapybaraCrawler::Crawler
|
3
|
+
def check_user_agent
|
4
|
+
visit "http://www.useragentstring.com/" # or using "http://www.whatsmyua.info"
|
5
|
+
page.find_field("uas").value
|
6
|
+
end
|
7
|
+
|
8
|
+
def as_iphone
|
9
|
+
set_user_agent :iphone
|
10
|
+
end
|
11
|
+
|
12
|
+
def custom_user_agent
|
13
|
+
set_custom_user_agent "CapybaraCrawler/1.0"
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# an example for how to use capybara_crawler to do google search
|
2
|
+
class GoogleSearch < CapybaraCrawler::Crawler
|
3
|
+
GOOGLE_SEARCH_URL = "https://www.google.com/ncr"
|
4
|
+
GOOGLE_ADVANCED_SEARCH_URL = "https://www.google.com/advanced_search?hl=en&fg=1"
|
5
|
+
|
6
|
+
# search using Google Instant
|
7
|
+
def instant_search(query)
|
8
|
+
visit GOOGLE_SEARCH_URL
|
9
|
+
fill_in "q", with: query
|
10
|
+
press_return_on "q"
|
11
|
+
page
|
12
|
+
end
|
13
|
+
|
14
|
+
# search without js driver
|
15
|
+
def quick_search(query)
|
16
|
+
disable_js do
|
17
|
+
visit GOOGLE_SEARCH_URL
|
18
|
+
fill_in "q", with: query
|
19
|
+
click_button "Google Search"
|
20
|
+
page
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# search using Google advanced search
|
25
|
+
# options could be :exact, :any, :none, :site
|
26
|
+
# select language as "English"
|
27
|
+
def advanced_search(query, options = {})
|
28
|
+
visit GOOGLE_ADVANCED_SEARCH_URL
|
29
|
+
fill_in "as_q" , with: query # all these words
|
30
|
+
fill_in "as_epq" , with: options[:exact] if options[:exact] # exact
|
31
|
+
fill_in "as_oq" , with: options[:any] if options[:any] # any
|
32
|
+
fill_in "as_eq" , with: options[:none] if options[:none] # none
|
33
|
+
fill_in "as_sitesearch" , with: options[:site] if options[:site] # site or domain:
|
34
|
+
# select language as "English"
|
35
|
+
find('.goog-select', text: "any language").click
|
36
|
+
within("#lr_menu") { find('li.goog-menuitem', :text => "English").click } # select "English"
|
37
|
+
click_button "Advanced Search"
|
38
|
+
page
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require "capybara/dsl"
|
2
|
+
require 'capybara/poltergeist'
|
3
|
+
require 'poltergeist/suppressor'
|
4
|
+
require 'capybara/user_agent'
|
5
|
+
require 'capybara/mechanize'
|
6
|
+
|
7
|
+
module CapybaraCrawler
|
8
|
+
# Crawler is a class can use DSL of Capybara
|
9
|
+
class Crawler
|
10
|
+
include Capybara::DSL
|
11
|
+
include Capybara::UserAgent::DSL
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def init_driver
|
15
|
+
register_driver_poltergeist_crawler
|
16
|
+
register_driver_mechanize_crawler
|
17
|
+
Capybara.default_max_wait_time = 3
|
18
|
+
Capybara.run_server = false
|
19
|
+
end
|
20
|
+
|
21
|
+
def register_driver_poltergeist_crawler
|
22
|
+
Capybara.register_driver :poltergeist_crawler do |app|
|
23
|
+
Capybara::Poltergeist::Driver.new(app, {
|
24
|
+
js_errors: false, # when false, JavaScript errors do not get re-raised in Ruby.
|
25
|
+
inspector: false, # when false, remote debugging will be disabled
|
26
|
+
debug: false, # output log messages like {"id":"68d74fdd-adab-4331-ab8e-48153b9a3176","name":"set_js_errors","args":[false]}
|
27
|
+
phantomjs_logger: Capybara::Poltergeist::Suppressor.new # silences noisy phantomjs warnings such as 'CoreText performance note...'.
|
28
|
+
})
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def register_driver_mechanize_crawler
|
33
|
+
Capybara.register_driver :mechanize_crawler do |app|
|
34
|
+
driver = Capybara::Mechanize::Driver.new(app || {})
|
35
|
+
driver.configure do |agent|
|
36
|
+
# Configure other Mechanize options here.
|
37
|
+
agent.log = Logger.new "tmp/mechanize_crawler.log"
|
38
|
+
end
|
39
|
+
driver
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def config_users_agents
|
44
|
+
Capybara::UserAgent.add_user_agents(
|
45
|
+
:phantomjs => "Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1",
|
46
|
+
:capybara_crawler => "Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/538.1 (KHTML, like Gecko) CapybaraCrawler/#{CapybaraCrawler::VERSION} Safari/538.1",
|
47
|
+
:windows_firefox => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1",
|
48
|
+
:mac_safari => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/601.5.17 (KHTML, like Gecko) Version/9.1 Safari/537.86.5",
|
49
|
+
:default => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/538.1 (KHTML, like Gecko) Version/9.1 Safari/538.1",
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def init
|
54
|
+
if !@initialized # make sure init once
|
55
|
+
init_driver
|
56
|
+
config_users_agents
|
57
|
+
@initialized = true
|
58
|
+
end
|
59
|
+
Capybara.default_driver = :poltergeist_crawler
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# when a new class inherited
|
64
|
+
def self.inherited(subclass)
|
65
|
+
init
|
66
|
+
subclass.send :include, MoreDSL
|
67
|
+
end
|
68
|
+
|
69
|
+
# add more DSL methods
|
70
|
+
module MoreDSL
|
71
|
+
# press a key on a field
|
72
|
+
# usage:
|
73
|
+
# press_key :return on: 'field-id'
|
74
|
+
# http://www.rubydoc.info/github/jnicklas/capybara/Capybara%2FNode%2FElement%3Asend_keys
|
75
|
+
def press_key(key, on:)
|
76
|
+
field = find_field(on)
|
77
|
+
field.send_keys key
|
78
|
+
end
|
79
|
+
|
80
|
+
# press "return" key on a field
|
81
|
+
def press_return_on(field_name_or_id)
|
82
|
+
press_key :return, on: field_name_or_id
|
83
|
+
end
|
84
|
+
|
85
|
+
# disable running javascript to crawl faster
|
86
|
+
def disable_js(&block)
|
87
|
+
Capybara.current_driver = :mechanize_crawler
|
88
|
+
result = yield
|
89
|
+
Capybara.use_default_driver # switch back to default driver
|
90
|
+
result
|
91
|
+
end
|
92
|
+
|
93
|
+
# override Capybara::UserAgent::DSL#set_custom_user_agent
|
94
|
+
def set_custom_user_agent(user_agent)
|
95
|
+
driver = Capybara.current_session.driver
|
96
|
+
if driver.is_a?(Capybara::Mechanize::Driver)
|
97
|
+
driver.configure do |agent|
|
98
|
+
agent.user_agent = user_agent
|
99
|
+
end
|
100
|
+
else
|
101
|
+
super
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def initialize
|
107
|
+
config_user_agent
|
108
|
+
end
|
109
|
+
|
110
|
+
# use a user agent which is similar as :mac_safari to make it easy to check target page on Mac using Safari by default
|
111
|
+
# but using "Safari/538.1" which is extracted from default user agent of PhantomJS/2.1.1
|
112
|
+
def default_user_agent
|
113
|
+
Capybara::UserAgent.user_agents[:default]
|
114
|
+
end
|
115
|
+
|
116
|
+
protected
|
117
|
+
# set default user_agent as firefox
|
118
|
+
# more agent: http://whatsmyuseragent.com/
|
119
|
+
def config_user_agent
|
120
|
+
set_user_agent :default
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
metadata
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: capybara_crawler
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- RainChen
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-03-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: capybara
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: poltergeist
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: poltergeist-suppressor
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: capybara-user_agent
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: capybara-mechanize
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: bundler
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.12'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.12'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rake
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '10.0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '10.0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rspec
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '3.0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '3.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: byebug
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: launchy
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
description: a simple web crawler using DSL of capybara
|
154
|
+
email:
|
155
|
+
- hirainchen@gmail.com
|
156
|
+
executables: []
|
157
|
+
extensions: []
|
158
|
+
extra_rdoc_files: []
|
159
|
+
files:
|
160
|
+
- ".gitignore"
|
161
|
+
- ".rspec"
|
162
|
+
- ".travis.yml"
|
163
|
+
- Gemfile
|
164
|
+
- README.md
|
165
|
+
- Rakefile
|
166
|
+
- bin/console
|
167
|
+
- bin/setup
|
168
|
+
- capybara_crawler.gemspec
|
169
|
+
- doc/capybara-cheatsheet.md
|
170
|
+
- examples/custom_user_agent.rb
|
171
|
+
- examples/google_search.rb
|
172
|
+
- lib/capybara_crawler.rb
|
173
|
+
- lib/capybara_crawler/crawler.rb
|
174
|
+
- lib/capybara_crawler/version.rb
|
175
|
+
homepage: https://github.com/rainchen/capybara_crawler
|
176
|
+
licenses: []
|
177
|
+
metadata: {}
|
178
|
+
post_install_message:
|
179
|
+
rdoc_options: []
|
180
|
+
require_paths:
|
181
|
+
- lib
|
182
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
183
|
+
requirements:
|
184
|
+
- - ">="
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
version: '0'
|
187
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
188
|
+
requirements:
|
189
|
+
- - ">="
|
190
|
+
- !ruby/object:Gem::Version
|
191
|
+
version: '0'
|
192
|
+
requirements: []
|
193
|
+
rubyforge_project:
|
194
|
+
rubygems_version: 2.5.2
|
195
|
+
signing_key:
|
196
|
+
specification_version: 4
|
197
|
+
summary: a simple web crawler using DSL of capybara
|
198
|
+
test_files: []
|
199
|
+
has_rdoc:
|