gscraper 0.2.4 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +8 -0
- data/.specopts +1 -0
- data/.yardopts +1 -0
- data/ChangeLog.md +122 -0
- data/Gemfile +25 -0
- data/{README.txt → README.md} +25 -24
- data/Rakefile +32 -10
- data/gscraper.gemspec +112 -0
- data/lib/gscraper.rb +0 -2
- data/lib/gscraper/extensions.rb +0 -2
- data/lib/gscraper/extensions/uri.rb +0 -2
- data/lib/gscraper/extensions/uri/http.rb +0 -2
- data/lib/gscraper/extensions/uri/query_params.rb +18 -5
- data/lib/gscraper/gscraper.rb +61 -70
- data/lib/gscraper/has_pages.rb +76 -20
- data/lib/gscraper/licenses.rb +0 -2
- data/lib/gscraper/page.rb +45 -16
- data/lib/gscraper/search.rb +0 -2
- data/lib/gscraper/search/ajax_query.rb +75 -22
- data/lib/gscraper/search/page.rb +328 -122
- data/lib/gscraper/search/query.rb +100 -7
- data/lib/gscraper/search/result.rb +27 -6
- data/lib/gscraper/search/search.rb +59 -9
- data/lib/gscraper/search/web_query.rb +120 -37
- data/lib/gscraper/sponsored_ad.rb +19 -6
- data/lib/gscraper/sponsored_links.rb +260 -92
- data/lib/gscraper/version.rb +2 -3
- data/spec/extensions/uri/query_params_spec.rb +8 -0
- data/spec/gscraper_spec.rb +9 -4
- data/spec/has_pages_examples.rb +0 -2
- data/spec/has_sponsored_links_examples.rb +2 -1
- data/spec/helpers/query.rb +3 -1
- data/spec/helpers/uri.rb +6 -4
- data/spec/page_has_results_examples.rb +0 -2
- data/spec/search/ajax_query_spec.rb +6 -11
- data/spec/search/page_has_results_examples.rb +0 -2
- data/spec/search/web_query_spec.rb +6 -11
- data/spec/spec_helper.rb +10 -4
- metadata +147 -54
- data/History.txt +0 -101
- data/Manifest.txt +0 -38
- data/tasks/spec.rb +0 -9
data/lib/gscraper/licenses.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
#
|
2
|
-
#--
|
3
2
|
# GScraper - A web-scraping interface to various Google Services.
|
4
3
|
#
|
5
4
|
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
@@ -17,7 +16,6 @@
|
|
17
16
|
# You should have received a copy of the GNU General Public License
|
18
17
|
# along with this program; if not, write to the Free Software
|
19
18
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
20
|
-
#++
|
21
19
|
#
|
22
20
|
|
23
21
|
module GScraper
|
data/lib/gscraper/page.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
#
|
2
|
-
#--
|
3
2
|
# GScraper - A web-scraping interface to various Google Services.
|
4
3
|
#
|
5
4
|
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
@@ -17,47 +16,77 @@
|
|
17
16
|
# You should have received a copy of the GNU General Public License
|
18
17
|
# along with this program; if not, write to the Free Software
|
19
18
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
20
|
-
#++
|
21
19
|
#
|
22
20
|
|
21
|
+
require 'enumerator'
|
22
|
+
|
23
23
|
module GScraper
|
24
24
|
class Page < Array
|
25
25
|
|
26
26
|
#
|
27
|
-
# Creates a new Page object
|
28
|
-
#
|
27
|
+
# Creates a new Page object.
|
28
|
+
#
|
29
|
+
# @param [Array] elements
|
30
|
+
# The elements to populate the page with.
|
31
|
+
#
|
32
|
+
# @yield [page]
|
33
|
+
# If a block is given, it will be passed the newly created page.
|
34
|
+
#
|
35
|
+
# @yieldparam [Page] page
|
36
|
+
# The newly created page.
|
29
37
|
#
|
30
|
-
def initialize(elements=[]
|
38
|
+
def initialize(elements=[])
|
31
39
|
super(elements)
|
32
40
|
|
33
|
-
|
41
|
+
yield self if block_given?
|
34
42
|
end
|
35
43
|
|
36
44
|
#
|
37
|
-
#
|
38
|
-
# given _block_. If the _block_ is not given, the page will be
|
39
|
-
# returned.
|
45
|
+
# Maps the elements within the page.
|
40
46
|
#
|
41
|
-
#
|
47
|
+
# @yield [element]
|
48
|
+
# The given block will be passed each element in the page.
|
42
49
|
#
|
43
|
-
#
|
50
|
+
# @return [Array, Enumerator]
|
51
|
+
# The mapped result. If no block was given, an Enumerator object will
|
52
|
+
# be returned.
|
44
53
|
#
|
45
|
-
|
46
|
-
|
54
|
+
# @example
|
55
|
+
# page.map
|
56
|
+
# # => Page
|
57
|
+
#
|
58
|
+
# @example
|
59
|
+
# page.map { |element| element.field }
|
60
|
+
# # => [...]
|
61
|
+
#
|
62
|
+
def map
|
63
|
+
return enum_for(:map) unless block_given?
|
47
64
|
|
48
65
|
mapped = []
|
49
66
|
|
50
|
-
each { |element| mapped <<
|
67
|
+
each { |element| mapped << yield(element) }
|
51
68
|
return mapped
|
52
69
|
end
|
53
70
|
|
54
71
|
#
|
55
|
-
# Selects the elements within the
|
72
|
+
# Selects the elements within the page.
|
73
|
+
#
|
74
|
+
# @yield [element]
|
75
|
+
# The given block will be passed each element in the page.
|
76
|
+
#
|
77
|
+
# @return [Array, Enumerator]
|
78
|
+
# The selected elements. If no block was given, an Enumerator object
|
79
|
+
# is returned.
|
56
80
|
#
|
81
|
+
# @example
|
57
82
|
# page.select { |element| element.field =~ /ruby/i }
|
58
83
|
#
|
59
84
|
def select(&block)
|
60
|
-
|
85
|
+
unless block
|
86
|
+
enum_for(:select)
|
87
|
+
else
|
88
|
+
self.class.new(super(&block))
|
89
|
+
end
|
61
90
|
end
|
62
91
|
|
63
92
|
end
|
data/lib/gscraper/search.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
#
|
2
|
-
#--
|
3
2
|
# GScraper - A web-scraping interface to various Google Services.
|
4
3
|
#
|
5
4
|
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
@@ -17,7 +16,6 @@
|
|
17
16
|
# You should have received a copy of the GNU General Public License
|
18
17
|
# along with this program; if not, write to the Free Software
|
19
18
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
20
|
-
#++
|
21
19
|
#
|
22
20
|
|
23
21
|
require 'gscraper/search/web_query'
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#
|
2
|
-
#--
|
3
2
|
# GScraper - A web-scraping interface to various Google Services.
|
4
3
|
#
|
5
4
|
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
@@ -17,7 +16,6 @@
|
|
17
16
|
# You should have received a copy of the GNU General Public License
|
18
17
|
# along with this program; if not, write to the Free Software
|
19
18
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
20
|
-
#++
|
21
19
|
#
|
22
20
|
|
23
21
|
require 'gscraper/search/result'
|
@@ -32,6 +30,9 @@ require 'nokogiri'
|
|
32
30
|
|
33
31
|
module GScraper
|
34
32
|
module Search
|
33
|
+
#
|
34
|
+
# Represents a Query through the Google AJAX search API.
|
35
|
+
#
|
35
36
|
class AJAXQuery < Query
|
36
37
|
|
37
38
|
include HasPages
|
@@ -70,16 +71,28 @@ module GScraper
|
|
70
71
|
attr_accessor :version
|
71
72
|
|
72
73
|
#
|
73
|
-
# Creates a new
|
74
|
-
# given it will be passed the newly created AJAXQuery object.
|
74
|
+
# Creates a new AJAX query.
|
75
75
|
#
|
76
|
-
#
|
77
|
-
#
|
78
|
-
#
|
79
|
-
#
|
80
|
-
#
|
81
|
-
#
|
82
|
-
#
|
76
|
+
# @param [Hash] options
|
77
|
+
# Query options.
|
78
|
+
#
|
79
|
+
# @option options [Symbol] :language (:en)
|
80
|
+
# The search language.
|
81
|
+
#
|
82
|
+
# @option options [String] :sig ('582c1116317355adf613a6a843f19ece')
|
83
|
+
# The search signature.
|
84
|
+
#
|
85
|
+
# @option options [Symbol] :key (:notsupplied)
|
86
|
+
# The search key.
|
87
|
+
#
|
88
|
+
# @option options [Float] :version (1.0)
|
89
|
+
# The desired API version.
|
90
|
+
#
|
91
|
+
# @yield [query]
|
92
|
+
# If a block is given, the new AJAX query will be passed to it.
|
93
|
+
#
|
94
|
+
# @yieldparam [AJAXQuery] query
|
95
|
+
# The new AJAX query.
|
83
96
|
#
|
84
97
|
def initialize(options={},&block)
|
85
98
|
@agent = GScraper.web_agent(options)
|
@@ -94,10 +107,26 @@ module GScraper
|
|
94
107
|
end
|
95
108
|
|
96
109
|
#
|
97
|
-
# Creates a new
|
98
|
-
#
|
110
|
+
# Creates a new AJAX query from the specified URL.
|
111
|
+
#
|
112
|
+
# @param [URI::HTTP, String] url
|
113
|
+
# The URL to create the query from.
|
114
|
+
#
|
115
|
+
# @param [Hash] options
|
116
|
+
# Additional query options.
|
117
|
+
#
|
118
|
+
# @yield [query]
|
119
|
+
# If a block is given, it will be passed the new AJAX query.
|
120
|
+
#
|
121
|
+
# @yieldparam [AJAXQuery] query
|
122
|
+
# The new AJAX query.
|
123
|
+
#
|
124
|
+
# @return [AJAXQuery]
|
125
|
+
# The new AJAX query.
|
99
126
|
#
|
100
|
-
|
127
|
+
# @see AJAXQuery.new
|
128
|
+
#
|
129
|
+
def AJAXQuery.from_url(url,options={},&block)
|
101
130
|
url = URI(url.to_s)
|
102
131
|
|
103
132
|
options[:language] = url.query_params['hl']
|
@@ -111,14 +140,22 @@ module GScraper
|
|
111
140
|
end
|
112
141
|
|
113
142
|
#
|
114
|
-
#
|
143
|
+
# The results per page.
|
144
|
+
#
|
145
|
+
# @return [Integer]
|
146
|
+
# The number of results per page.
|
147
|
+
#
|
148
|
+
# @see RESULTS_PER_PAGE
|
115
149
|
#
|
116
150
|
def results_per_page
|
117
151
|
RESULTS_PER_PAGE
|
118
152
|
end
|
119
153
|
|
120
154
|
#
|
121
|
-
#
|
155
|
+
# The URL that represents the query.
|
156
|
+
#
|
157
|
+
# @return [URI::HTTP]
|
158
|
+
# The URL for the query.
|
122
159
|
#
|
123
160
|
def search_url
|
124
161
|
search_url = URI(API_URL)
|
@@ -134,8 +171,13 @@ module GScraper
|
|
134
171
|
end
|
135
172
|
|
136
173
|
#
|
137
|
-
#
|
138
|
-
#
|
174
|
+
# The URL that represents the query at a specific page index.
|
175
|
+
#
|
176
|
+
# @param [Integer] page_index
|
177
|
+
# The page index to create the URL for.
|
178
|
+
#
|
179
|
+
# @return [URI::HTTP]
|
180
|
+
# The query URL for the given page index.
|
139
181
|
#
|
140
182
|
def page_url(page_index)
|
141
183
|
url = search_url
|
@@ -148,8 +190,13 @@ module GScraper
|
|
148
190
|
end
|
149
191
|
|
150
192
|
#
|
151
|
-
#
|
152
|
-
#
|
193
|
+
# A page containing results at the specified page index.
|
194
|
+
#
|
195
|
+
# @param [Integer] page_index
|
196
|
+
# The index of the page.
|
197
|
+
#
|
198
|
+
# @return [Page<Result>]
|
199
|
+
# A page object.
|
153
200
|
#
|
154
201
|
def page(page_index)
|
155
202
|
Page.new do |new_page|
|
@@ -162,8 +209,14 @@ module GScraper
|
|
162
209
|
hash['results'].each_with_index do |result,index|
|
163
210
|
rank = rank_offset + (index + 1)
|
164
211
|
title = Nokogiri::HTML(result['title']).inner_text
|
165
|
-
url = URI(result['unescapedUrl'])
|
166
|
-
|
212
|
+
url = URI(URI.escape(result['unescapedUrl']))
|
213
|
+
|
214
|
+
unless result['content'].empty?
|
215
|
+
summary = Nokogiri::HTML(result['content']).inner_text
|
216
|
+
else
|
217
|
+
summary = ''
|
218
|
+
end
|
219
|
+
|
167
220
|
cached_url = URI(result['cacheUrl'])
|
168
221
|
|
169
222
|
new_page << Result.new(rank,title,url,summary,cached_url)
|
data/lib/gscraper/search/page.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
#
|
2
|
-
#--
|
3
2
|
# GScraper - A web-scraping interface to various Google Services.
|
4
3
|
#
|
5
4
|
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
@@ -17,7 +16,6 @@
|
|
17
16
|
# You should have received a copy of the GNU General Public License
|
18
17
|
# along with this program; if not, write to the Free Software
|
19
18
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
20
|
-
#++
|
21
19
|
#
|
22
20
|
|
23
21
|
require 'gscraper/search/result'
|
@@ -27,224 +25,376 @@ module GScraper
|
|
27
25
|
module Search
|
28
26
|
class Page < GScraper::Page
|
29
27
|
|
28
|
+
alias results_with select
|
29
|
+
|
30
30
|
#
|
31
|
-
# Selects the results
|
31
|
+
# Selects the results with the matching title.
|
32
32
|
#
|
33
|
-
#
|
33
|
+
# @param [String, Regexp] title
|
34
|
+
# The title to search for.
|
34
35
|
#
|
35
|
-
|
36
|
-
|
37
|
-
end
|
38
|
-
|
36
|
+
# @yield [result]
|
37
|
+
# The given block will be passed each matching result.
|
39
38
|
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
# result will be passed to the _block_.
|
39
|
+
# @yieldparam [Result] result
|
40
|
+
# A result with the matching title.
|
43
41
|
#
|
42
|
+
# @return [Array<Result>]
|
43
|
+
# The results with the matching title.
|
44
|
+
#
|
45
|
+
# @example
|
44
46
|
# page.results_with_title('hackety org') #=> Page
|
45
47
|
#
|
48
|
+
# @example
|
46
49
|
# page.results_with_title(/awesome/) do |result|
|
47
50
|
# puts result.url
|
48
51
|
# end
|
49
52
|
#
|
50
|
-
def results_with_title(title
|
51
|
-
|
52
|
-
|
53
|
+
def results_with_title(title)
|
54
|
+
unless block_given?
|
55
|
+
enum_for(:results_with_title,title)
|
53
56
|
else
|
54
|
-
|
55
|
-
|
57
|
+
results_with do |result|
|
58
|
+
if result.title.match(title)
|
59
|
+
yield result
|
56
60
|
|
57
|
-
|
58
|
-
|
61
|
+
true
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
59
65
|
end
|
60
66
|
|
61
67
|
#
|
62
|
-
# Selects the results with the matching
|
63
|
-
# either a String or a Regexp. If _block_ is given, each matching
|
64
|
-
# result will be passed to the _block_.
|
68
|
+
# Selects the results with the matching URL.
|
65
69
|
#
|
70
|
+
# @param [String, Regexp] url
|
71
|
+
# The URL to search for.
|
72
|
+
#
|
73
|
+
# @yield [result]
|
74
|
+
# The given block will be passed each matching result.
|
75
|
+
#
|
76
|
+
# @yieldparam [Result] result
|
77
|
+
# A result with the matching URL.
|
78
|
+
#
|
79
|
+
# @return [Array<Result>]
|
80
|
+
# The results with the matching URL.
|
81
|
+
#
|
82
|
+
# @example
|
66
83
|
# page.results_with_url(/\.com/) # => Page
|
67
84
|
#
|
85
|
+
# @example
|
68
86
|
# page.results_with_url(/^https:\/\//) do |result|
|
69
87
|
# puts result.title
|
70
88
|
# end
|
71
89
|
#
|
72
|
-
def results_with_url(url
|
73
|
-
|
74
|
-
|
90
|
+
def results_with_url(url)
|
91
|
+
unless block_given?
|
92
|
+
enum_for(:results_with_url,url)
|
75
93
|
else
|
76
|
-
|
77
|
-
|
94
|
+
results_with do |result|
|
95
|
+
if result.url.match(url)
|
96
|
+
yield result
|
78
97
|
|
79
|
-
|
80
|
-
|
98
|
+
true
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
81
102
|
end
|
82
103
|
|
83
104
|
#
|
84
|
-
# Selects the results with the matching
|
85
|
-
# be either a String or a Regexp. If _block_ is given, each matching
|
86
|
-
# result will be passed to the _block_.
|
105
|
+
# Selects the results with the matching summary.
|
87
106
|
#
|
107
|
+
# @param [String, Regexp] summary
|
108
|
+
# The summary to search for.
|
109
|
+
#
|
110
|
+
# @yield [result]
|
111
|
+
# The given block will be passed each matching result.
|
112
|
+
#
|
113
|
+
# @yieldparam [Result] result
|
114
|
+
# A result with the matching summary.
|
115
|
+
#
|
116
|
+
# @return [Array<Result>]
|
117
|
+
# The results with the matching summary.
|
118
|
+
#
|
119
|
+
# @example
|
88
120
|
# page.results_with_summary(/cheese cake/) # => Page
|
89
121
|
#
|
122
|
+
# @example
|
90
123
|
# page.results_with_summary(/Scientifically/) do |result|
|
91
124
|
# puts result.url
|
92
125
|
# end
|
93
126
|
#
|
94
|
-
def results_with_summary(summary
|
95
|
-
|
96
|
-
|
127
|
+
def results_with_summary(summary)
|
128
|
+
unless block_given?
|
129
|
+
enum_for(:results_with_summary,summary)
|
97
130
|
else
|
98
|
-
|
99
|
-
|
131
|
+
results_with do |result|
|
132
|
+
if result.summary.match(summary)
|
133
|
+
yield result
|
100
134
|
|
101
|
-
|
102
|
-
|
135
|
+
true
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
103
139
|
end
|
104
140
|
|
105
141
|
#
|
106
|
-
#
|
107
|
-
# Page.
|
142
|
+
# Iterates over each result's rank within the page.
|
108
143
|
#
|
109
|
-
#
|
144
|
+
# @yield [rank]
|
145
|
+
# The given block will be passed the ranks of each result in
|
146
|
+
# the page.
|
110
147
|
#
|
111
|
-
|
112
|
-
|
148
|
+
# @yieldparam [Integer] rank
|
149
|
+
# The rank of a result in the page.
|
150
|
+
#
|
151
|
+
# @return [Enumerator]
|
152
|
+
# If no block is given, an Enumerator object will be returned.
|
153
|
+
#
|
154
|
+
# @example
|
155
|
+
# each_rank { |rank| puts rank }
|
156
|
+
#
|
157
|
+
def each_rank
|
158
|
+
unless block_given?
|
159
|
+
enum_for(:each_rank)
|
160
|
+
else
|
161
|
+
each { |result| yield result.rank }
|
162
|
+
end
|
113
163
|
end
|
114
164
|
|
115
165
|
#
|
116
|
-
#
|
117
|
-
# Page.
|
166
|
+
# Iterates over each result's title within the page.
|
118
167
|
#
|
119
|
-
#
|
168
|
+
# @yield [title]
|
169
|
+
# The given block will be passed the title of each result in
|
170
|
+
# the page.
|
171
|
+
#
|
172
|
+
# @yieldparam [String] title
|
173
|
+
# The title of a result in the page.
|
120
174
|
#
|
121
|
-
|
122
|
-
|
175
|
+
# @return [Enumerator]
|
176
|
+
# If no block is given, an Enumerator object will be returned.
|
177
|
+
#
|
178
|
+
# @example
|
179
|
+
# each_title { |title| puts title }
|
180
|
+
#
|
181
|
+
def each_title
|
182
|
+
unless block_given?
|
183
|
+
enum_for(:each_title)
|
184
|
+
else
|
185
|
+
each { |result| yield result.title }
|
186
|
+
end
|
123
187
|
end
|
124
188
|
|
125
189
|
#
|
126
|
-
#
|
127
|
-
# Page.
|
190
|
+
# Iterates over each result's url within the page.
|
128
191
|
#
|
129
|
-
#
|
192
|
+
# @yield [url]
|
193
|
+
# The given block will be passed the URL of each result in
|
194
|
+
# the page.
|
195
|
+
#
|
196
|
+
# @yieldparam [URI::HTTP] url
|
197
|
+
# The URL of a result in the page.
|
130
198
|
#
|
131
|
-
|
132
|
-
|
199
|
+
# @return [Enumerator]
|
200
|
+
# If no block is given, an Enumerator object will be returned.
|
201
|
+
#
|
202
|
+
# @example
|
203
|
+
# each_url { |url| puts url }
|
204
|
+
#
|
205
|
+
def each_url
|
206
|
+
unless block_given?
|
207
|
+
enum_for(:each_url)
|
208
|
+
else
|
209
|
+
each { |result| yield result.url }
|
210
|
+
end
|
133
211
|
end
|
134
212
|
|
135
213
|
#
|
136
|
-
#
|
137
|
-
# Page.
|
214
|
+
# Iterates over each result's summary within the page.
|
138
215
|
#
|
139
|
-
#
|
216
|
+
# @yield [summary]
|
217
|
+
# The given block will be passed the summary of each result in
|
218
|
+
# the page.
|
219
|
+
#
|
220
|
+
# @yieldparam [String] summary
|
221
|
+
# The summary of a result in the page.
|
140
222
|
#
|
141
|
-
|
142
|
-
|
223
|
+
# @return [Enumerator]
|
224
|
+
# If no block is given, an Enumerator object will be returned.
|
225
|
+
#
|
226
|
+
# @example
|
227
|
+
# each_summary { |summary| puts summary }
|
228
|
+
#
|
229
|
+
def each_summary
|
230
|
+
unless block_given?
|
231
|
+
enum_for(:each_summary)
|
232
|
+
else
|
233
|
+
each { |result| yield result.summary }
|
234
|
+
end
|
143
235
|
end
|
144
236
|
|
145
237
|
#
|
146
|
-
#
|
147
|
-
# the Page.
|
238
|
+
# Iterates over each result's cached URLs within the page.
|
148
239
|
#
|
149
|
-
#
|
240
|
+
# @yield [cached_url]
|
241
|
+
# The given block will be passed the Cached URL of each result in
|
242
|
+
# the page.
|
243
|
+
#
|
244
|
+
# @yieldparam [URI::HTTP] cached_url
|
245
|
+
# The Cached URL of a result in the page.
|
150
246
|
#
|
151
|
-
|
152
|
-
|
247
|
+
# @return [Enumerator]
|
248
|
+
# If no block is given, an Enumerator object will be returned.
|
249
|
+
#
|
250
|
+
# @example
|
251
|
+
# each_cached_url { |cached_url| puts cached_url }
|
252
|
+
#
|
253
|
+
def each_cached_url
|
254
|
+
unless block_given?
|
255
|
+
enum_for(:each_cached_url)
|
256
|
+
else
|
257
|
+
each do |result|
|
258
|
+
yield result.cached_url if result.cached_url
|
259
|
+
end
|
260
|
+
end
|
153
261
|
end
|
154
262
|
|
155
263
|
#
|
156
|
-
#
|
157
|
-
# the Page.
|
264
|
+
# Iterates over each result's cached pages within the page.
|
158
265
|
#
|
159
|
-
#
|
266
|
+
# @yield [cached_page]
|
267
|
+
# The given block will be passed the Cached Page of each result in
|
268
|
+
# the page.
|
269
|
+
#
|
270
|
+
# @yieldparam [Mechanize::Page] cached_page
|
271
|
+
# The Cached Page of a result in the page.
|
160
272
|
#
|
161
|
-
|
162
|
-
|
273
|
+
# @return [Enumerator]
|
274
|
+
# If no block is given, an Enumerator object will be returned.
|
275
|
+
#
|
276
|
+
# @example
|
277
|
+
# each_cached_page { |page| puts page.readlines }
|
278
|
+
#
|
279
|
+
def each_cached_page
|
280
|
+
unless block_given?
|
281
|
+
enum_for(:each_cached_page)
|
282
|
+
else
|
283
|
+
each do |result|
|
284
|
+
yield result.cached_page if result.cached_page
|
285
|
+
end
|
286
|
+
end
|
163
287
|
end
|
164
288
|
|
165
289
|
#
|
166
|
-
#
|
167
|
-
# within the Page.
|
290
|
+
# Iterates over each result's similar Query URLs within the page.
|
168
291
|
#
|
169
|
-
#
|
292
|
+
# @yield [similar_url]
|
293
|
+
# The given block will be passed the Similar Query URL of each
|
294
|
+
# result in the page.
|
295
|
+
#
|
296
|
+
# @yieldparam [URI::HTTP] similar_url
|
297
|
+
# The Cached URL of a result in the page.
|
170
298
|
#
|
171
|
-
|
172
|
-
|
299
|
+
# @return [Enumerator]
|
300
|
+
# If no block is given, an Enumerator object will be returned.
|
301
|
+
#
|
302
|
+
# @example
|
303
|
+
# each_similar_url { |similar_url| puts similar_url }
|
304
|
+
#
|
305
|
+
def each_similar_url
|
306
|
+
unless block_given?
|
307
|
+
enum_for(:each_similar_url)
|
308
|
+
else
|
309
|
+
each do |result|
|
310
|
+
yield result.similar_url if result.similar_url
|
311
|
+
end
|
312
|
+
end
|
173
313
|
end
|
174
314
|
|
175
315
|
#
|
176
|
-
#
|
177
|
-
# the given _block_.
|
316
|
+
# Returns the ranks of the results in the page.
|
178
317
|
#
|
179
|
-
#
|
318
|
+
# @return [Array<Integer>]
|
319
|
+
# The ranks of the results.
|
180
320
|
#
|
181
|
-
def
|
182
|
-
|
321
|
+
def ranks
|
322
|
+
each_rank.to_a
|
183
323
|
end
|
184
324
|
|
185
325
|
#
|
186
|
-
#
|
187
|
-
# the given _block_.
|
326
|
+
# Returns the titles of the results in the page.
|
188
327
|
#
|
189
|
-
#
|
328
|
+
# @return [Array<String>]
|
329
|
+
# The titles of the results.
|
190
330
|
#
|
191
|
-
def
|
192
|
-
|
331
|
+
def titles
|
332
|
+
each_title.to_a
|
193
333
|
end
|
194
334
|
|
195
335
|
#
|
196
|
-
#
|
197
|
-
# the given _block_.
|
336
|
+
# Returns the URLs of the results in the page.
|
198
337
|
#
|
199
|
-
#
|
338
|
+
# @return [Array<URI::HTTP>]
|
339
|
+
# The URLs of the results.
|
200
340
|
#
|
201
|
-
def
|
202
|
-
|
341
|
+
def urls
|
342
|
+
each_url.to_a
|
203
343
|
end
|
204
344
|
|
205
345
|
#
|
206
|
-
#
|
207
|
-
# to the given _block_.
|
346
|
+
# Returns the summaries of the results in the page.
|
208
347
|
#
|
209
|
-
#
|
348
|
+
# @return [Array<String>]
|
349
|
+
# The summaries of the results.
|
210
350
|
#
|
211
|
-
def
|
212
|
-
|
351
|
+
def summaries
|
352
|
+
each_summary.to_a
|
213
353
|
end
|
214
354
|
|
215
355
|
#
|
216
|
-
#
|
217
|
-
# each to the given _block_.
|
356
|
+
# Returns the Cached URLs of the results in the page.
|
218
357
|
#
|
219
|
-
#
|
358
|
+
# @return [Array<URI::HTTP>]
|
359
|
+
# The Cached URLs of the results.
|
220
360
|
#
|
221
|
-
def
|
222
|
-
|
361
|
+
def cached_urls
|
362
|
+
each_cached_url.to_a
|
223
363
|
end
|
224
364
|
|
225
365
|
#
|
226
|
-
#
|
227
|
-
# each to the given _block_.
|
366
|
+
# Returns the Cached Pages of the results in the page.
|
228
367
|
#
|
229
|
-
#
|
368
|
+
# @return [Array<Mechanize::Page>]
|
369
|
+
# The Cached Pages of the results.
|
230
370
|
#
|
231
|
-
def
|
232
|
-
|
371
|
+
def cached_pages
|
372
|
+
each_cached_page.to_a
|
233
373
|
end
|
234
374
|
|
235
375
|
#
|
236
|
-
#
|
237
|
-
# passing each to the given _block_.
|
376
|
+
# Returns the Similar Query URLs of the results in the page.
|
238
377
|
#
|
239
|
-
#
|
378
|
+
# @return [Array<URI::HTTP>]
|
379
|
+
# The Similar Query URLs of the results.
|
240
380
|
#
|
241
|
-
def
|
242
|
-
|
381
|
+
def similar_urls
|
382
|
+
each_similar_url.to_a
|
243
383
|
end
|
244
384
|
|
245
385
|
#
|
246
|
-
# Returns the ranks of the results that match the
|
386
|
+
# Returns the ranks of the results that match the given block.
|
387
|
+
#
|
388
|
+
# @yield [result]
|
389
|
+
# The given block will be used to filter the results in the page.
|
247
390
|
#
|
391
|
+
# @yieldparam [Result] result
|
392
|
+
# A result in the page.
|
393
|
+
#
|
394
|
+
# @return [Array<Integer>]
|
395
|
+
# The ranks of the results which match the given block.
|
396
|
+
#
|
397
|
+
# @example
|
248
398
|
# page.ranks_of { |result| result.title =~ /awesome/ }
|
249
399
|
#
|
250
400
|
def ranks_of(&block)
|
@@ -252,8 +402,18 @@ module GScraper
|
|
252
402
|
end
|
253
403
|
|
254
404
|
#
|
255
|
-
# Returns the titles of the results that match the
|
405
|
+
# Returns the titles of the results that match the given block.
|
406
|
+
#
|
407
|
+
# @yield [result]
|
408
|
+
# The given block will be used to filter the results in the page.
|
256
409
|
#
|
410
|
+
# @yieldparam [Result] result
|
411
|
+
# A result in the page.
|
412
|
+
#
|
413
|
+
# @return [Array<String>]
|
414
|
+
# The titles of the results which match the given block.
|
415
|
+
#
|
416
|
+
# @example
|
257
417
|
# page.titles_of { |result| result.url.include?('www') }
|
258
418
|
#
|
259
419
|
def titles_of(&block)
|
@@ -261,8 +421,18 @@ module GScraper
|
|
261
421
|
end
|
262
422
|
|
263
423
|
#
|
264
|
-
# Returns the urls of the results that match the
|
424
|
+
# Returns the urls of the results that match the given block.
|
425
|
+
#
|
426
|
+
# @yield [result]
|
427
|
+
# The given block will be used to filter the results in the page.
|
265
428
|
#
|
429
|
+
# @yieldparam [Result] result
|
430
|
+
# A result in the page.
|
431
|
+
#
|
432
|
+
# @return [Array<URI::HTTP>]
|
433
|
+
# The URLs of the results which match the given block.
|
434
|
+
#
|
435
|
+
# @example
|
266
436
|
# page.urls_of { |result| result.summary =~ /awesome pants/ }
|
267
437
|
#
|
268
438
|
def urls_of(&block)
|
@@ -270,9 +440,18 @@ module GScraper
|
|
270
440
|
end
|
271
441
|
|
272
442
|
#
|
273
|
-
# Returns the summaries of the results that match the
|
274
|
-
#
|
443
|
+
# Returns the summaries of the results that match the given block.
|
444
|
+
#
|
445
|
+
# @yield [result]
|
446
|
+
# The given block will be used to filter the results in the page.
|
275
447
|
#
|
448
|
+
# @yieldparam [Result] result
|
449
|
+
# A result in the page.
|
450
|
+
#
|
451
|
+
# @return [Array<String>]
|
452
|
+
# The summaries of the results which match the given block.
|
453
|
+
#
|
454
|
+
# @example
|
276
455
|
# page.summaries_of { |result| result.title =~ /what if/ }
|
277
456
|
#
|
278
457
|
def summaries_of(&block)
|
@@ -280,9 +459,18 @@ module GScraper
|
|
280
459
|
end
|
281
460
|
|
282
461
|
#
|
283
|
-
# Returns the
|
284
|
-
#
|
462
|
+
# Returns the Cached URLs of the results that match the given block.
|
463
|
+
#
|
464
|
+
# @yield [result]
|
465
|
+
# The given block will be used to filter the results in the page.
|
285
466
|
#
|
467
|
+
# @yieldparam [Result] result
|
468
|
+
# A result in the page.
|
469
|
+
#
|
470
|
+
# @return [Array<URI::HTTP>]
|
471
|
+
# The Cached URLs of the results which match the given block.
|
472
|
+
#
|
473
|
+
# @example
|
286
474
|
# page.cached_urls_of { |result| result.title =~ /howdy/ }
|
287
475
|
#
|
288
476
|
def cached_urls_of(&block)
|
@@ -290,20 +478,38 @@ module GScraper
|
|
290
478
|
end
|
291
479
|
|
292
480
|
#
|
293
|
-
# Returns the cached pages of the results that match the
|
294
|
-
#
|
295
|
-
#
|
481
|
+
# Returns the cached pages of the results that match the given block.
|
482
|
+
#
|
483
|
+
# @yield [result]
|
484
|
+
# The given block will be used to filter the results in the page.
|
296
485
|
#
|
486
|
+
# @yieldparam [Result] result
|
487
|
+
# A result in the page.
|
488
|
+
#
|
489
|
+
# @return [Array<Mechanize::Page>]
|
490
|
+
# The Cached Page of the results which match the given block.
|
491
|
+
#
|
492
|
+
# @example
|
297
493
|
# page.cached_pages_of { |result| result.title =~ /dude/ }
|
298
494
|
#
|
299
|
-
def cached_pages_of(
|
300
|
-
results_with(&block).cached_pages
|
495
|
+
def cached_pages_of(&block)
|
496
|
+
results_with(&block).cached_pages
|
301
497
|
end
|
302
498
|
|
303
499
|
#
|
304
|
-
# Returns the
|
305
|
-
#
|
500
|
+
# Returns the Similar Query URLs of the results that match the given
|
501
|
+
# block.
|
502
|
+
#
|
503
|
+
# @yield [result]
|
504
|
+
# The given block will be used to filter the results in the page.
|
505
|
+
#
|
506
|
+
# @yieldparam [Result] result
|
507
|
+
# A result in the page.
|
508
|
+
#
|
509
|
+
# @return [Array<URI::HTTP>]
|
510
|
+
# The Similar Query URLs of the results which match the given block.
|
306
511
|
#
|
512
|
+
# @example
|
307
513
|
# page.similar_urls_of { |result| result.title =~ /what if/ }
|
308
514
|
#
|
309
515
|
def similar_urls_of(&block)
|