capybara_crawler 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/README.md +53 -0
- data/Rakefile +18 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/capybara_crawler.gemspec +32 -0
- data/doc/capybara-cheatsheet.md +204 -0
- data/examples/custom_user_agent.rb +15 -0
- data/examples/google_search.rb +41 -0
- data/lib/capybara_crawler.rb +6 -0
- data/lib/capybara_crawler/crawler.rb +123 -0
- data/lib/capybara_crawler/version.rb +3 -0
- metadata +199 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 12bca6bf174ac78ecc5a66200ea7c282a084ce8a
|
4
|
+
data.tar.gz: 583e3743cbbce4ea35b9ed8a0d8005e73851915b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f07c9b1cc4062a8ef90a6c927f9b24f6a82fea50fe7d50c5d473db3bff231f84a9fb9c05d1a960438c8efd176e4590e0d59e3d2c863085900a703079873b303b
|
7
|
+
data.tar.gz: cf53828a6354c39edb2695e0f01052b478e6f7e6216cdcb8c8d7517f1f03f74c1fcd7c81d412c16b744ccfb02898dc67c89520761b1e5834f6d466fe47ab28a0
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# CapybaraCrawler
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/capybara_crawler`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
TODO: Delete this and the text above, and describe your gem
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'capybara_crawler'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install capybara_crawler
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
see `examples` and `spec/capybara_crawler_spec.rb`
|
26
|
+
|
27
|
+
## More
|
28
|
+
|
29
|
+
### Capybara cheatsheet
|
30
|
+
|
31
|
+
read `doc/capybara-cheatsheet.md`
|
32
|
+
|
33
|
+
## Dependencies
|
34
|
+
|
35
|
+
### Capybara Driver: poltergeist
|
36
|
+
|
37
|
+
Poltergeist is a driver for Capybara that allows you to run your tests on a headless WebKit browser, provided by PhantomJS.
|
38
|
+
https://github.com/teampoltergeist/poltergeist
|
39
|
+
|
40
|
+
### PhantomJS
|
41
|
+
PhantomJS is a headless WebKit scriptable with a JavaScript API.
|
42
|
+
http://phantomjs.org/
|
43
|
+
|
44
|
+
## Development
|
45
|
+
|
46
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
47
|
+
|
48
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
49
|
+
|
50
|
+
## Contributing
|
51
|
+
|
52
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/rainchen/capybara_crawler.
|
53
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task :default => :spec
|
7
|
+
|
8
|
+
namespace :tmp do
|
9
|
+
desc "Clear capybara files from tmp/"
|
10
|
+
task clear: ["tmp:capybara:clear"]
|
11
|
+
|
12
|
+
namespace :capybara do
|
13
|
+
# desc "Clears all files and directories in tmp/capybara"
|
14
|
+
task :clear do
|
15
|
+
rm_rf Dir["tmp/capybara/[^.]*"], verbose: false
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "capybara_crawler"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'capybara_crawler/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "capybara_crawler"
|
8
|
+
spec.version = CapybaraCrawler::VERSION
|
9
|
+
spec.authors = ["RainChen"]
|
10
|
+
spec.email = ["hirainchen@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{a simple web crawler using DSL of capybara}
|
13
|
+
spec.description = %q{a simple web crawler using DSL of capybara}
|
14
|
+
spec.homepage = "https://github.com/rainchen/capybara_crawler"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
+
spec.bindir = "exe"
|
18
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "capybara"
|
22
|
+
spec.add_dependency "poltergeist"
|
23
|
+
spec.add_dependency "poltergeist-suppressor"
|
24
|
+
spec.add_dependency "capybara-user_agent"
|
25
|
+
spec.add_dependency "capybara-mechanize"
|
26
|
+
|
27
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
28
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
29
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
30
|
+
spec.add_development_dependency "byebug"
|
31
|
+
spec.add_development_dependency "launchy"
|
32
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
# Capybara cheatsheet
|
2
|
+
|
3
|
+
basing on https://gist.github.com/tomas-stefano/6652111
|
4
|
+
|
5
|
+
## Capybara Actions
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
# Anchor
|
9
|
+
click_link 'Save'
|
10
|
+
|
11
|
+
# Button
|
12
|
+
click_button 'awesome'
|
13
|
+
|
14
|
+
# Both above
|
15
|
+
click_link_or_button 'Save'
|
16
|
+
|
17
|
+
# `click_on` alias for click_link_or_button, shorter yet unambiguous
|
18
|
+
click_on('Link Text') # clicks on either links or buttons
|
19
|
+
click_on('Button Value')
|
20
|
+
|
21
|
+
# Text (area) field
|
22
|
+
fill_in 'Name or id', with: 'Content'
|
23
|
+
|
24
|
+
# Checkbox
|
25
|
+
check 'Content'
|
26
|
+
uncheck 'Content'
|
27
|
+
|
28
|
+
# Radio button
|
29
|
+
choose 'Content'
|
30
|
+
|
31
|
+
# Select option from select tag
|
32
|
+
select 'Option', from: 'Label'
|
33
|
+
|
34
|
+
# File input
|
35
|
+
attach_file Rails.root.join('spec/fixture/some_file.png')
|
36
|
+
```
|
37
|
+
|
38
|
+
## Capybara Finders
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
page.all(:xpath, '//a')
|
42
|
+
|
43
|
+
page.first(:xpath, '//a')
|
44
|
+
|
45
|
+
page.find('//textarea[@id="additional_newline"]')
|
46
|
+
|
47
|
+
page.find(:xpath, "//input[@id='form_pets_dog']")['checked']
|
48
|
+
# => true
|
49
|
+
|
50
|
+
page.find(:css, '#with_focus_event').trigger(:focus)
|
51
|
+
|
52
|
+
page.find(:css,'.wrapper').hover
|
53
|
+
|
54
|
+
page.find_field("test_field").value
|
55
|
+
# => 'blah'
|
56
|
+
|
57
|
+
page.find_by_id('red').tag_name
|
58
|
+
# => 'a'
|
59
|
+
|
60
|
+
# finds invisible elements when false
|
61
|
+
page.find_by_id("hidden_via_ancestor", visible: false)
|
62
|
+
|
63
|
+
page.find_button('What an Awesome')[:value]
|
64
|
+
# => 'awesome'
|
65
|
+
|
66
|
+
page.find_link('abo').text
|
67
|
+
# => 'labore'
|
68
|
+
|
69
|
+
page.find_link('other')[:href]
|
70
|
+
# => '/some_uri'
|
71
|
+
```
|
72
|
+
|
73
|
+
**Note:** `find` will wait for an element to appear on the page, as explained in the Ajax section. If the element does not appear it will raise an error.
|
74
|
+
|
75
|
+
**Note:** In XPath the expression `//` means something very specific, and it might not be what you think. Contrary to common belief, `//` means "anywhere in the document" not "anywhere in the current context".
|
76
|
+
|
77
|
+
|
78
|
+
## Capybara Scoped Finder `within`
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
within(search_form) do
|
82
|
+
fill_in 'Name', with: 'iOS 7'
|
83
|
+
click_button 'Search'
|
84
|
+
end
|
85
|
+
|
86
|
+
def search_form
|
87
|
+
'.search_form'
|
88
|
+
end
|
89
|
+
|
90
|
+
within_fieldset("villain_fieldset") do
|
91
|
+
# ...
|
92
|
+
end
|
93
|
+
|
94
|
+
within_table("some_table") do
|
95
|
+
# ...
|
96
|
+
end
|
97
|
+
|
98
|
+
# Execute the given block within the given iframe using given frame name or index.
|
99
|
+
#
|
100
|
+
within_frame('some_frame') do
|
101
|
+
end
|
102
|
+
|
103
|
+
save_page
|
104
|
+
|
105
|
+
# You need to install launchy gem.
|
106
|
+
save_and_open_page
|
107
|
+
```
|
108
|
+
|
109
|
+
## Capybara Common
|
110
|
+
|
111
|
+
```ruby
|
112
|
+
visit("http://google.com")
|
113
|
+
|
114
|
+
page.current_url
|
115
|
+
|
116
|
+
# Execute the given script, not returning a result. This is useful for scripts that return
|
117
|
+
# complex objects, such as jQuery statements. +execute_script+ should be used over
|
118
|
+
# +evaluate_script+ whenever possible.
|
119
|
+
#
|
120
|
+
page.execute_script("$('#change').text('Funky Doodle')")
|
121
|
+
|
122
|
+
# Evaluate the given JavaScript and return the result. Be careful when using this with
|
123
|
+
# scripts that return complex objects, such as jQuery statements. +execute_script+ might
|
124
|
+
# be a better alternative.
|
125
|
+
#
|
126
|
+
page.evaluate_script("1+3")
|
127
|
+
# => 4
|
128
|
+
|
129
|
+
using_wait_time 6 do
|
130
|
+
# ... Changed Capybara.default_wait_time in this block scope.
|
131
|
+
end
|
132
|
+
```
|
133
|
+
|
134
|
+
## Capybara Matchers
|
135
|
+
|
136
|
+
```ruby
|
137
|
+
expect(page).to have_content("Some Content")
|
138
|
+
expect(page).to have_no_content("Some Content")
|
139
|
+
|
140
|
+
# True if there is a anchor tag with text matching regex
|
141
|
+
expect(page).to have_xpath("//a")
|
142
|
+
expect(page).to have_xpath("//a",:href => "google.com")
|
143
|
+
expect(page).to have_xpath("//a[@href => 'google.com']")
|
144
|
+
expect(page).to have_xpath("//a[contains(.,'some string')]")
|
145
|
+
expect(page).to have_xpath("//p//a", :text => /re[dab]i/i, :count => 1)
|
146
|
+
|
147
|
+
# can take both xpath and css as input and can take arguments similar to both have_css and have_xpath
|
148
|
+
expect(page).to have_selector(:xpath, "//p/h1")
|
149
|
+
expect(page).to have_selector(:css, "p a#post_edit_path")
|
150
|
+
|
151
|
+
expect(page).to have_css("input#post_title")
|
152
|
+
expect(page).to have_css("input#post_title", :value => "Capybara cheatsheet")
|
153
|
+
|
154
|
+
# True if there are 3 input tags in response
|
155
|
+
expect(page).to have_css("input", :count => 3)
|
156
|
+
|
157
|
+
# True if there or fewer or equal to 3 input tags
|
158
|
+
expect(page).to have_css("input", :maximum => 3)
|
159
|
+
|
160
|
+
# True if there are minimum of 3 input tags
|
161
|
+
expect(page).to have_css("input", :minimum => 3)
|
162
|
+
|
163
|
+
# True if there 1 to 3 input tags
|
164
|
+
expect(page).to have_css("input", :between => 1..3)
|
165
|
+
|
166
|
+
# True if there is a anchor tag with text hello
|
167
|
+
expect(page).to have_css("p a", :text => "hello")
|
168
|
+
expect(page).to have_css("p a", :text => /[hH]ello(.+)/i)
|
169
|
+
|
170
|
+
# For making capybara to take css as default selector
|
171
|
+
Capybara.default_selector = :css
|
172
|
+
|
173
|
+
# checks for the presence of the input tag
|
174
|
+
expect(page).to have_selector("input")
|
175
|
+
|
176
|
+
# checks for input tag with value
|
177
|
+
expect(page).to have_selector("input", :value =>"Post Title")
|
178
|
+
|
179
|
+
expect(page).to have_no_selector("input")
|
180
|
+
|
181
|
+
# For making capybara to take css as default selector
|
182
|
+
Capybara.default_selector = :xpath
|
183
|
+
# checks for the presence of the input tag
|
184
|
+
expect(page).to have_selector("//input")
|
185
|
+
|
186
|
+
# checks for input tag with value
|
187
|
+
expect(page).to have_selector("//input", :value =>"Post Title")
|
188
|
+
|
189
|
+
# checks for presence of a input field named FirstName in a form
|
190
|
+
expect(page).to have_field("FirstName")
|
191
|
+
|
192
|
+
expect(page).to have_field("FirstName", :value => "Rambo")
|
193
|
+
expect(page).to have_field("FirstName", :with => "Rambo")
|
194
|
+
|
195
|
+
expect(page).to have_link("Foo")
|
196
|
+
expect(page).to have_link("Foo", :href=>"googl.com")
|
197
|
+
expect(page).to have_no_link("Foo", :href=>"google.com")
|
198
|
+
|
199
|
+
|
200
|
+
# check page content
|
201
|
+
# have_title matcher now supports :wait option
|
202
|
+
expect(page).to have_title("changed title")
|
203
|
+
expect(page).to have_text("text in page.text")
|
204
|
+
```
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# an example for how to custom user agent for capybara_crawler
|
2
|
+
class CustomUserAgent < CapybaraCrawler::Crawler
|
3
|
+
def check_user_agent
|
4
|
+
visit "http://www.useragentstring.com/" # or using "http://www.whatsmyua.info"
|
5
|
+
page.find_field("uas").value
|
6
|
+
end
|
7
|
+
|
8
|
+
def as_iphone
|
9
|
+
set_user_agent :iphone
|
10
|
+
end
|
11
|
+
|
12
|
+
def custom_user_agent
|
13
|
+
set_custom_user_agent "CapybaraCrawler/1.0"
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# an example for how to use capybara_crawler to do google search
|
2
|
+
class GoogleSearch < CapybaraCrawler::Crawler
|
3
|
+
GOOGLE_SEARCH_URL = "https://www.google.com/ncr"
|
4
|
+
GOOGLE_ADVANCED_SEARCH_URL = "https://www.google.com/advanced_search?hl=en&fg=1"
|
5
|
+
|
6
|
+
# search using Google Instant
|
7
|
+
def instant_search(query)
|
8
|
+
visit GOOGLE_SEARCH_URL
|
9
|
+
fill_in "q", with: query
|
10
|
+
press_return_on "q"
|
11
|
+
page
|
12
|
+
end
|
13
|
+
|
14
|
+
# search without js driver
|
15
|
+
def quick_search(query)
|
16
|
+
disable_js do
|
17
|
+
visit GOOGLE_SEARCH_URL
|
18
|
+
fill_in "q", with: query
|
19
|
+
click_button "Google Search"
|
20
|
+
page
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# search using Google advanced search
|
25
|
+
# options could be :exact, :any, :none, :site
|
26
|
+
# select language as "English"
|
27
|
+
def advanced_search(query, options = {})
|
28
|
+
visit GOOGLE_ADVANCED_SEARCH_URL
|
29
|
+
fill_in "as_q" , with: query # all these words
|
30
|
+
fill_in "as_epq" , with: options[:exact] if options[:exact] # exact
|
31
|
+
fill_in "as_oq" , with: options[:any] if options[:any] # any
|
32
|
+
fill_in "as_eq" , with: options[:none] if options[:none] # none
|
33
|
+
fill_in "as_sitesearch" , with: options[:site] if options[:site] # site or domain:
|
34
|
+
# select language as "English"
|
35
|
+
find('.goog-select', text: "any language").click
|
36
|
+
within("#lr_menu") { find('li.goog-menuitem', :text => "English").click } # select "English"
|
37
|
+
click_button "Advanced Search"
|
38
|
+
page
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require "capybara/dsl"
|
2
|
+
require 'capybara/poltergeist'
|
3
|
+
require 'poltergeist/suppressor'
|
4
|
+
require 'capybara/user_agent'
|
5
|
+
require 'capybara/mechanize'
|
6
|
+
|
7
|
+
module CapybaraCrawler
|
8
|
+
# Crawler is a class can use DSL of Capybara
|
9
|
+
class Crawler
|
10
|
+
include Capybara::DSL
|
11
|
+
include Capybara::UserAgent::DSL
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def init_driver
|
15
|
+
register_driver_poltergeist_crawler
|
16
|
+
register_driver_mechanize_crawler
|
17
|
+
Capybara.default_max_wait_time = 3
|
18
|
+
Capybara.run_server = false
|
19
|
+
end
|
20
|
+
|
21
|
+
def register_driver_poltergeist_crawler
|
22
|
+
Capybara.register_driver :poltergeist_crawler do |app|
|
23
|
+
Capybara::Poltergeist::Driver.new(app, {
|
24
|
+
js_errors: false, # when false, JavaScript errors do not get re-raised in Ruby.
|
25
|
+
inspector: false, # when false, remote debugging will be disabled
|
26
|
+
debug: false, # output log messages like {"id":"68d74fdd-adab-4331-ab8e-48153b9a3176","name":"set_js_errors","args":[false]}
|
27
|
+
phantomjs_logger: Capybara::Poltergeist::Suppressor.new # silences noisy phantomjs warnings such as 'CoreText performance note...'.
|
28
|
+
})
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def register_driver_mechanize_crawler
|
33
|
+
Capybara.register_driver :mechanize_crawler do |app|
|
34
|
+
driver = Capybara::Mechanize::Driver.new(app || {})
|
35
|
+
driver.configure do |agent|
|
36
|
+
# Configure other Mechanize options here.
|
37
|
+
agent.log = Logger.new "tmp/mechanize_crawler.log"
|
38
|
+
end
|
39
|
+
driver
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def config_users_agents
|
44
|
+
Capybara::UserAgent.add_user_agents(
|
45
|
+
:phantomjs => "Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1",
|
46
|
+
:capybara_crawler => "Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/538.1 (KHTML, like Gecko) CapybaraCrawler/#{CapybaraCrawler::VERSION} Safari/538.1",
|
47
|
+
:windows_firefox => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1",
|
48
|
+
:mac_safari => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/601.5.17 (KHTML, like Gecko) Version/9.1 Safari/537.86.5",
|
49
|
+
:default => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/538.1 (KHTML, like Gecko) Version/9.1 Safari/538.1",
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def init
|
54
|
+
if !@initialized # make sure init once
|
55
|
+
init_driver
|
56
|
+
config_users_agents
|
57
|
+
@initialized = true
|
58
|
+
end
|
59
|
+
Capybara.default_driver = :poltergeist_crawler
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# when a new class inherited
|
64
|
+
def self.inherited(subclass)
|
65
|
+
init
|
66
|
+
subclass.send :include, MoreDSL
|
67
|
+
end
|
68
|
+
|
69
|
+
# add more DSL methods
|
70
|
+
module MoreDSL
|
71
|
+
# press a key on a field
|
72
|
+
# usage:
|
73
|
+
# press_key :return on: 'field-id'
|
74
|
+
# http://www.rubydoc.info/github/jnicklas/capybara/Capybara%2FNode%2FElement%3Asend_keys
|
75
|
+
def press_key(key, on:)
|
76
|
+
field = find_field(on)
|
77
|
+
field.send_keys key
|
78
|
+
end
|
79
|
+
|
80
|
+
# press "return" key on a field
|
81
|
+
def press_return_on(field_name_or_id)
|
82
|
+
press_key :return, on: field_name_or_id
|
83
|
+
end
|
84
|
+
|
85
|
+
# disable running javascript to crawl faster
|
86
|
+
def disable_js(&block)
|
87
|
+
Capybara.current_driver = :mechanize_crawler
|
88
|
+
result = yield
|
89
|
+
Capybara.use_default_driver # switch back to default driver
|
90
|
+
result
|
91
|
+
end
|
92
|
+
|
93
|
+
# override Capybara::UserAgent::DSL#set_custom_user_agent
|
94
|
+
def set_custom_user_agent(user_agent)
|
95
|
+
driver = Capybara.current_session.driver
|
96
|
+
if driver.is_a?(Capybara::Mechanize::Driver)
|
97
|
+
driver.configure do |agent|
|
98
|
+
agent.user_agent = user_agent
|
99
|
+
end
|
100
|
+
else
|
101
|
+
super
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def initialize
|
107
|
+
config_user_agent
|
108
|
+
end
|
109
|
+
|
110
|
+
# use a user agent which is similar as :mac_safari to make it easy to check target page on Mac using Safari by default
|
111
|
+
# but using "Safari/538.1" which is extracted from default user agent of PhantomJS/2.1.1
|
112
|
+
def default_user_agent
|
113
|
+
Capybara::UserAgent.user_agents[:default]
|
114
|
+
end
|
115
|
+
|
116
|
+
protected
|
117
|
+
# set default user_agent as firefox
|
118
|
+
# more agent: http://whatsmyuseragent.com/
|
119
|
+
def config_user_agent
|
120
|
+
set_user_agent :default
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
metadata
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: capybara_crawler
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- RainChen
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-03-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: capybara
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: poltergeist
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: poltergeist-suppressor
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: capybara-user_agent
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: capybara-mechanize
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: bundler
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.12'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.12'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rake
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '10.0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '10.0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: rspec
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '3.0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '3.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: byebug
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: launchy
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
description: a simple web crawler using DSL of capybara
|
154
|
+
email:
|
155
|
+
- hirainchen@gmail.com
|
156
|
+
executables: []
|
157
|
+
extensions: []
|
158
|
+
extra_rdoc_files: []
|
159
|
+
files:
|
160
|
+
- ".gitignore"
|
161
|
+
- ".rspec"
|
162
|
+
- ".travis.yml"
|
163
|
+
- Gemfile
|
164
|
+
- README.md
|
165
|
+
- Rakefile
|
166
|
+
- bin/console
|
167
|
+
- bin/setup
|
168
|
+
- capybara_crawler.gemspec
|
169
|
+
- doc/capybara-cheatsheet.md
|
170
|
+
- examples/custom_user_agent.rb
|
171
|
+
- examples/google_search.rb
|
172
|
+
- lib/capybara_crawler.rb
|
173
|
+
- lib/capybara_crawler/crawler.rb
|
174
|
+
- lib/capybara_crawler/version.rb
|
175
|
+
homepage: https://github.com/rainchen/capybara_crawler
|
176
|
+
licenses: []
|
177
|
+
metadata: {}
|
178
|
+
post_install_message:
|
179
|
+
rdoc_options: []
|
180
|
+
require_paths:
|
181
|
+
- lib
|
182
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
183
|
+
requirements:
|
184
|
+
- - ">="
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
version: '0'
|
187
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
188
|
+
requirements:
|
189
|
+
- - ">="
|
190
|
+
- !ruby/object:Gem::Version
|
191
|
+
version: '0'
|
192
|
+
requirements: []
|
193
|
+
rubyforge_project:
|
194
|
+
rubygems_version: 2.5.2
|
195
|
+
signing_key:
|
196
|
+
specification_version: 4
|
197
|
+
summary: a simple web crawler using DSL of capybara
|
198
|
+
test_files: []
|
199
|
+
has_rdoc:
|