gscraper 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/.gitignore +8 -0
  2. data/.specopts +1 -0
  3. data/.yardopts +1 -0
  4. data/ChangeLog.md +122 -0
  5. data/Gemfile +25 -0
  6. data/{README.txt → README.md} +25 -24
  7. data/Rakefile +32 -10
  8. data/gscraper.gemspec +112 -0
  9. data/lib/gscraper.rb +0 -2
  10. data/lib/gscraper/extensions.rb +0 -2
  11. data/lib/gscraper/extensions/uri.rb +0 -2
  12. data/lib/gscraper/extensions/uri/http.rb +0 -2
  13. data/lib/gscraper/extensions/uri/query_params.rb +18 -5
  14. data/lib/gscraper/gscraper.rb +61 -70
  15. data/lib/gscraper/has_pages.rb +76 -20
  16. data/lib/gscraper/licenses.rb +0 -2
  17. data/lib/gscraper/page.rb +45 -16
  18. data/lib/gscraper/search.rb +0 -2
  19. data/lib/gscraper/search/ajax_query.rb +75 -22
  20. data/lib/gscraper/search/page.rb +328 -122
  21. data/lib/gscraper/search/query.rb +100 -7
  22. data/lib/gscraper/search/result.rb +27 -6
  23. data/lib/gscraper/search/search.rb +59 -9
  24. data/lib/gscraper/search/web_query.rb +120 -37
  25. data/lib/gscraper/sponsored_ad.rb +19 -6
  26. data/lib/gscraper/sponsored_links.rb +260 -92
  27. data/lib/gscraper/version.rb +2 -3
  28. data/spec/extensions/uri/query_params_spec.rb +8 -0
  29. data/spec/gscraper_spec.rb +9 -4
  30. data/spec/has_pages_examples.rb +0 -2
  31. data/spec/has_sponsored_links_examples.rb +2 -1
  32. data/spec/helpers/query.rb +3 -1
  33. data/spec/helpers/uri.rb +6 -4
  34. data/spec/page_has_results_examples.rb +0 -2
  35. data/spec/search/ajax_query_spec.rb +6 -11
  36. data/spec/search/page_has_results_examples.rb +0 -2
  37. data/spec/search/web_query_spec.rb +6 -11
  38. data/spec/spec_helper.rb +10 -4
  39. metadata +147 -54
  40. data/History.txt +0 -101
  41. data/Manifest.txt +0 -38
  42. data/tasks/spec.rb +0 -9
@@ -1,5 +1,4 @@
1
1
  #
2
- #--
3
2
  # GScraper - A web-scraping interface to various Google Services.
4
3
  #
5
4
  # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
@@ -17,7 +16,6 @@
17
16
  # You should have received a copy of the GNU General Public License
18
17
  # along with this program; if not, write to the Free Software
19
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
- #++
21
19
  #
22
20
 
23
21
  module GScraper
@@ -1,5 +1,4 @@
1
1
  #
2
- #--
3
2
  # GScraper - A web-scraping interface to various Google Services.
4
3
  #
5
4
  # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
@@ -17,47 +16,77 @@
17
16
  # You should have received a copy of the GNU General Public License
18
17
  # along with this program; if not, write to the Free Software
19
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
- #++
21
19
  #
22
20
 
21
+ require 'enumerator'
22
+
23
23
  module GScraper
24
24
  class Page < Array
25
25
 
26
26
  #
27
- # Creates a new Page object with the given _elements_. If a _block_
28
- # is given, it will be passed the newly created Page object.
27
+ # Creates a new Page object.
28
+ #
29
+ # @param [Array] elements
30
+ # The elements to populate the page with.
31
+ #
32
+ # @yield [page]
33
+ # If a block is given, it will be passed the newly created page.
34
+ #
35
+ # @yieldparam [Page] page
36
+ # The newly created page.
29
37
  #
30
- def initialize(elements=[],&block)
38
+ def initialize(elements=[])
31
39
  super(elements)
32
40
 
33
- block.call(self) if block
41
+ yield self if block_given?
34
42
  end
35
43
 
36
44
  #
37
- # Returns a mapped Array of the elements within the Page using the
38
- # given _block_. If the _block_ is not given, the page will be
39
- # returned.
45
+ # Maps the elements within the page.
40
46
  #
41
- # page.map # => Page
47
+ # @yield [element]
48
+ # The given block will be passed each element in the page.
42
49
  #
43
- # page.map { |element| element.field } # => [...]
50
+ # @return [Array, Enumerator]
51
+ # The mapped result. If no block was given, an Enumerator object will
52
+ # be returned.
44
53
  #
45
- def map(&block)
46
- return self unless block
54
+ # @example
55
+ # page.map
56
+ # # => Page
57
+ #
58
+ # @example
59
+ # page.map { |element| element.field }
60
+ # # => [...]
61
+ #
62
+ def map
63
+ return enum_for(:map) unless block_given?
47
64
 
48
65
  mapped = []
49
66
 
50
- each { |element| mapped << block.call(element) }
67
+ each { |element| mapped << yield(element) }
51
68
  return mapped
52
69
  end
53
70
 
54
71
  #
55
- # Selects the elements within the Page which match the given _block_.
72
+ # Selects the elements within the page.
73
+ #
74
+ # @yield [element]
75
+ # The given block will be passed each element in the page.
76
+ #
77
+ # @return [Array, Enumerator]
78
+ # The selected elements. If no block was given, an Enumerator object
79
+ # is returned.
56
80
  #
81
+ # @example
57
82
  # page.select { |element| element.field =~ /ruby/i }
58
83
  #
59
84
  def select(&block)
60
- self.class.new(super(&block))
85
+ unless block
86
+ enum_for(:select)
87
+ else
88
+ self.class.new(super(&block))
89
+ end
61
90
  end
62
91
 
63
92
  end
@@ -1,5 +1,4 @@
1
1
  #
2
- #--
3
2
  # GScraper - A web-scraping interface to various Google Services.
4
3
  #
5
4
  # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
@@ -17,7 +16,6 @@
17
16
  # You should have received a copy of the GNU General Public License
18
17
  # along with this program; if not, write to the Free Software
19
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
- #++
21
19
  #
22
20
 
23
21
  require 'gscraper/search/web_query'
@@ -1,5 +1,4 @@
1
1
  #
2
- #--
3
2
  # GScraper - A web-scraping interface to various Google Services.
4
3
  #
5
4
  # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
@@ -17,7 +16,6 @@
17
16
  # You should have received a copy of the GNU General Public License
18
17
  # along with this program; if not, write to the Free Software
19
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
- #++
21
19
  #
22
20
 
23
21
  require 'gscraper/search/result'
@@ -32,6 +30,9 @@ require 'nokogiri'
32
30
 
33
31
  module GScraper
34
32
  module Search
33
+ #
34
+ # Represents a Query through the Google AJAX search API.
35
+ #
35
36
  class AJAXQuery < Query
36
37
 
37
38
  include HasPages
@@ -70,16 +71,28 @@ module GScraper
70
71
  attr_accessor :version
71
72
 
72
73
  #
73
- # Creates a new AJAXQuery with the given _options_. If a _block_ is
74
- # given it will be passed the newly created AJAXQuery object.
74
+ # Creates a new AJAX query.
75
75
  #
76
- # _options_ may contain the following keys:
77
- # <tt>:language</tt>:: The search language. Defaults to <tt>:en</tt>.
78
- # <tt>:sig</tt>:: The search signature. Defaults to
79
- # +582c1116317355adf613a6a843f19ece+.
80
- # <tt>:key</tt>:: The search key. Defaults to <tt>:notsupplied</tt>.
81
- # <tt>:version</tt>:: The desired API version. Defaults to
82
- # <tt>1.0</tt>.
76
+ # @param [Hash] options
77
+ # Query options.
78
+ #
79
+ # @option options [Symbol] :language (:en)
80
+ # The search language.
81
+ #
82
+ # @option options [String] :sig ('582c1116317355adf613a6a843f19ece')
83
+ # The search signature.
84
+ #
85
+ # @option options [Symbol] :key (:notsupplied)
86
+ # The search key.
87
+ #
88
+ # @option options [Float] :version (1.0)
89
+ # The desired API version.
90
+ #
91
+ # @yield [query]
92
+ # If a block is given, the new AJAX query will be passed to it.
93
+ #
94
+ # @yieldparam [AJAXQuery] query
95
+ # The new AJAX query.
83
96
  #
84
97
  def initialize(options={},&block)
85
98
  @agent = GScraper.web_agent(options)
@@ -94,10 +107,26 @@ module GScraper
94
107
  end
95
108
 
96
109
  #
97
- # Creates a new AJAXQuery object from the specified URL. If a block is
98
- # given, it will be passed the newly created AJAXQuery object.
110
+ # Creates a new AJAX query from the specified URL.
111
+ #
112
+ # @param [URI::HTTP, String] url
113
+ # The URL to create the query from.
114
+ #
115
+ # @param [Hash] options
116
+ # Additional query options.
117
+ #
118
+ # @yield [query]
119
+ # If a block is given, it will be passed the new AJAX query.
120
+ #
121
+ # @yieldparam [AJAXQuery] query
122
+ # The new AJAX query.
123
+ #
124
+ # @return [AJAXQuery]
125
+ # The new AJAX query.
99
126
  #
100
- def self.from_url(url,options={},&block)
127
+ # @see AJAXQuery.new
128
+ #
129
+ def AJAXQuery.from_url(url,options={},&block)
101
130
  url = URI(url.to_s)
102
131
 
103
132
  options[:language] = url.query_params['hl']
@@ -111,14 +140,22 @@ module GScraper
111
140
  end
112
141
 
113
142
  #
114
- # Returns +RESULTS_PER_PAGE+.
143
+ # The results per page.
144
+ #
145
+ # @return [Integer]
146
+ # The number of results per page.
147
+ #
148
+ # @see RESULTS_PER_PAGE
115
149
  #
116
150
  def results_per_page
117
151
  RESULTS_PER_PAGE
118
152
  end
119
153
 
120
154
  #
121
- # Returns the URL that represents the query.
155
+ # The URL that represents the query.
156
+ #
157
+ # @return [URI::HTTP]
158
+ # The URL for the query.
122
159
  #
123
160
  def search_url
124
161
  search_url = URI(API_URL)
@@ -134,8 +171,13 @@ module GScraper
134
171
  end
135
172
 
136
173
  #
137
- # Returns the URL that represents the query of a specific
138
- # _page_index_.
174
+ # The URL that represents the query at a specific page index.
175
+ #
176
+ # @param [Integer] page_index
177
+ # The page index to create the URL for.
178
+ #
179
+ # @return [URI::HTTP]
180
+ # The query URL for the given page index.
139
181
  #
140
182
  def page_url(page_index)
141
183
  url = search_url
@@ -148,8 +190,13 @@ module GScraper
148
190
  end
149
191
 
150
192
  #
151
- # Returns a Page object containing Result objects at the specified
152
- # _page_index_.
193
+ # A page containing results at the specified page index.
194
+ #
195
+ # @param [Integer] page_index
196
+ # The index of the page.
197
+ #
198
+ # @return [Page<Result>]
199
+ # A page object.
153
200
  #
154
201
  def page(page_index)
155
202
  Page.new do |new_page|
@@ -162,8 +209,14 @@ module GScraper
162
209
  hash['results'].each_with_index do |result,index|
163
210
  rank = rank_offset + (index + 1)
164
211
  title = Nokogiri::HTML(result['title']).inner_text
165
- url = URI(result['unescapedUrl'])
166
- summary = Nokogiri::HTML(result['content']).inner_text
212
+ url = URI(URI.escape(result['unescapedUrl']))
213
+
214
+ unless result['content'].empty?
215
+ summary = Nokogiri::HTML(result['content']).inner_text
216
+ else
217
+ summary = ''
218
+ end
219
+
167
220
  cached_url = URI(result['cacheUrl'])
168
221
 
169
222
  new_page << Result.new(rank,title,url,summary,cached_url)
@@ -1,5 +1,4 @@
1
1
  #
2
- #--
3
2
  # GScraper - A web-scraping interface to various Google Services.
4
3
  #
5
4
  # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
@@ -17,7 +16,6 @@
17
16
  # You should have received a copy of the GNU General Public License
18
17
  # along with this program; if not, write to the Free Software
19
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
- #++
21
19
  #
22
20
 
23
21
  require 'gscraper/search/result'
@@ -27,224 +25,376 @@ module GScraper
27
25
  module Search
28
26
  class Page < GScraper::Page
29
27
 
28
+ alias results_with select
29
+
30
30
  #
31
- # Selects the results using the specified _block_.
31
+ # Selects the results with the matching title.
32
32
  #
33
- # page.results_with { |result| result.title =~ /blog/ }
33
+ # @param [String, Regexp] title
34
+ # The title to search for.
34
35
  #
35
- def results_with(&block)
36
- select(&block)
37
- end
38
-
36
+ # @yield [result]
37
+ # The given block will be passed each matching result.
39
38
  #
40
- # Selects the results with the matching _title_. The _title_ may be
41
- # either a String or a Regexp. If _block_ is given, each matching
42
- # result will be passed to the _block_.
39
+ # @yieldparam [Result] result
40
+ # A result with the matching title.
43
41
  #
42
+ # @return [Array<Result>]
43
+ # The results with the matching title.
44
+ #
45
+ # @example
44
46
  # page.results_with_title('hackety org') #=> Page
45
47
  #
48
+ # @example
46
49
  # page.results_with_title(/awesome/) do |result|
47
50
  # puts result.url
48
51
  # end
49
52
  #
50
- def results_with_title(title,&block)
51
- if title.kind_of?(Regexp)
52
- results = results_with { |result| result.title =~ title }
53
+ def results_with_title(title)
54
+ unless block_given?
55
+ enum_for(:results_with_title,title)
53
56
  else
54
- results = results_with { |result| result.title == title }
55
- end
57
+ results_with do |result|
58
+ if result.title.match(title)
59
+ yield result
56
60
 
57
- results.each(&block) if block
58
- return results
61
+ true
62
+ end
63
+ end
64
+ end
59
65
  end
60
66
 
61
67
  #
62
- # Selects the results with the matching _url_. The _url_ may be
63
- # either a String or a Regexp. If _block_ is given, each matching
64
- # result will be passed to the _block_.
68
+ # Selects the results with the matching URL.
65
69
  #
70
+ # @param [String, Regexp] url
71
+ # The URL to search for.
72
+ #
73
+ # @yield [result]
74
+ # The given block will be passed each matching result.
75
+ #
76
+ # @yieldparam [Result] result
77
+ # A result with the matching URL.
78
+ #
79
+ # @return [Array<Result>]
80
+ # The results with the matching URL.
81
+ #
82
+ # @example
66
83
  # page.results_with_url(/\.com/) # => Page
67
84
  #
85
+ # @example
68
86
  # page.results_with_url(/^https:\/\//) do |result|
69
87
  # puts result.title
70
88
  # end
71
89
  #
72
- def results_with_url(url,&block)
73
- if url.kind_of?(Regexp)
74
- results = results_with { |result| result.url =~ url }
90
+ def results_with_url(url)
91
+ unless block_given?
92
+ enum_for(:results_with_url,url)
75
93
  else
76
- results = results_with { |result| result.url == url }
77
- end
94
+ results_with do |result|
95
+ if result.url.match(url)
96
+ yield result
78
97
 
79
- results.each(&block) if block
80
- return results
98
+ true
99
+ end
100
+ end
101
+ end
81
102
  end
82
103
 
83
104
  #
84
- # Selects the results with the matching _summary_. The _summary_ may
85
- # be either a String or a Regexp. If _block_ is given, each matching
86
- # result will be passed to the _block_.
105
+ # Selects the results with the matching summary.
87
106
  #
107
+ # @param [String, Regexp] summary
108
+ # The summary to search for.
109
+ #
110
+ # @yield [result]
111
+ # The given block will be passed each matching result.
112
+ #
113
+ # @yieldparam [Result] result
114
+ # A result with the matching summary.
115
+ #
116
+ # @return [Array<Result>]
117
+ # The results with the matching summary.
118
+ #
119
+ # @example
88
120
  # page.results_with_summary(/cheese cake/) # => Page
89
121
  #
122
+ # @example
90
123
  # page.results_with_summary(/Scientifically/) do |result|
91
124
  # puts result.url
92
125
  # end
93
126
  #
94
- def results_with_summary(summary,&block)
95
- if summary.kind_of?(Regexp)
96
- results = results_with { |result| result.summary =~ summary }
127
+ def results_with_summary(summary)
128
+ unless block_given?
129
+ enum_for(:results_with_summary,summary)
97
130
  else
98
- results = results_with { |result| result.summary == summary }
99
- end
131
+ results_with do |result|
132
+ if result.summary.match(summary)
133
+ yield result
100
134
 
101
- results.each(&block) if block
102
- return results
135
+ true
136
+ end
137
+ end
138
+ end
103
139
  end
104
140
 
105
141
  #
106
- # Returns an Array containing the ranks of the results within the
107
- # Page.
142
+ # Iterates over each result's rank within the page.
108
143
  #
109
- # page.ranks # => [...]
144
+ # @yield [rank]
145
+ # The given block will be passed the ranks of each result in
146
+ # the page.
110
147
  #
111
- def ranks
112
- map { |result| result.rank }
148
+ # @yieldparam [Integer] rank
149
+ # The rank of a result in the page.
150
+ #
151
+ # @return [Enumerator]
152
+ # If no block is given, an Enumerator object will be returned.
153
+ #
154
+ # @example
155
+ # each_rank { |rank| puts rank }
156
+ #
157
+ def each_rank
158
+ unless block_given?
159
+ enum_for(:each_rank)
160
+ else
161
+ each { |result| yield result.rank }
162
+ end
113
163
  end
114
164
 
115
165
  #
116
- # Returns an Array containing the titles of the results within the
117
- # Page.
166
+ # Iterates over each result's title within the page.
118
167
  #
119
- # page.titles # => [...]
168
+ # @yield [title]
169
+ # The given block will be passed the title of each result in
170
+ # the page.
171
+ #
172
+ # @yieldparam [String] title
173
+ # The title of a result in the page.
120
174
  #
121
- def titles
122
- map { |result| result.title }
175
+ # @return [Enumerator]
176
+ # If no block is given, an Enumerator object will be returned.
177
+ #
178
+ # @example
179
+ # each_title { |title| puts title }
180
+ #
181
+ def each_title
182
+ unless block_given?
183
+ enum_for(:each_title)
184
+ else
185
+ each { |result| yield result.title }
186
+ end
123
187
  end
124
188
 
125
189
  #
126
- # Returns an Array containing the URLs of the results within the
127
- # Page.
190
+ # Iterates over each result's url within the page.
128
191
  #
129
- # page.urls # => [...]
192
+ # @yield [url]
193
+ # The given block will be passed the URL of each result in
194
+ # the page.
195
+ #
196
+ # @yieldparam [URI::HTTP] url
197
+ # The URL of a result in the page.
130
198
  #
131
- def urls
132
- map { |result| result.url }
199
+ # @return [Enumerator]
200
+ # If no block is given, an Enumerator object will be returned.
201
+ #
202
+ # @example
203
+ # each_url { |url| puts url }
204
+ #
205
+ def each_url
206
+ unless block_given?
207
+ enum_for(:each_url)
208
+ else
209
+ each { |result| yield result.url }
210
+ end
133
211
  end
134
212
 
135
213
  #
136
- # Returns an Array containing the summaries of the results within the
137
- # Page.
214
+ # Iterates over each result's summary within the page.
138
215
  #
139
- # page.summaries # => [...]
216
+ # @yield [summary]
217
+ # The given block will be passed the summary of each result in
218
+ # the page.
219
+ #
220
+ # @yieldparam [String] summary
221
+ # The summary of a result in the page.
140
222
  #
141
- def summaries
142
- map { |result| result.summary }
223
+ # @return [Enumerator]
224
+ # If no block is given, an Enumerator object will be returned.
225
+ #
226
+ # @example
227
+ # each_summary { |summary| puts summary }
228
+ #
229
+ def each_summary
230
+ unless block_given?
231
+ enum_for(:each_summary)
232
+ else
233
+ each { |result| yield result.summary }
234
+ end
143
235
  end
144
236
 
145
237
  #
146
- # Returns an Array containing the cached URLs of the results within
147
- # the Page.
238
+ # Iterates over each result's cached URLs within the page.
148
239
  #
149
- # page.cached_urls # => [...]
240
+ # @yield [cached_url]
241
+ # The given block will be passed the Cached URL of each result in
242
+ # the page.
243
+ #
244
+ # @yieldparam [URI::HTTP] cached_url
245
+ # The Cached URL of a result in the page.
150
246
  #
151
- def cached_urls
152
- map { |result| result.cached_url }.compact
247
+ # @return [Enumerator]
248
+ # If no block is given, an Enumerator object will be returned.
249
+ #
250
+ # @example
251
+ # each_cached_url { |cached_url| puts cached_url }
252
+ #
253
+ def each_cached_url
254
+ unless block_given?
255
+ enum_for(:each_cached_url)
256
+ else
257
+ each do |result|
258
+ yield result.cached_url if result.cached_url
259
+ end
260
+ end
153
261
  end
154
262
 
155
263
  #
156
- # Returns an Array containing the cached pages of the results within
157
- # the Page.
264
+ # Iterates over each result's cached pages within the page.
158
265
  #
159
- # page.cached_pages # => [...]
266
+ # @yield [cached_page]
267
+ # The given block will be passed the Cached Page of each result in
268
+ # the page.
269
+ #
270
+ # @yieldparam [Mechanize::Page] cached_page
271
+ # The Cached Page of a result in the page.
160
272
  #
161
- def cached_pages
162
- map { |result| result.cached_page }.compact
273
+ # @return [Enumerator]
274
+ # If no block is given, an Enumerator object will be returned.
275
+ #
276
+ # @example
277
+ # each_cached_page { |page| puts page.readlines }
278
+ #
279
+ def each_cached_page
280
+ unless block_given?
281
+ enum_for(:each_cached_page)
282
+ else
283
+ each do |result|
284
+ yield result.cached_page if result.cached_page
285
+ end
286
+ end
163
287
  end
164
288
 
165
289
  #
166
- # Returns an Array containing the similar Query URLs of the results
167
- # within the Page.
290
+ # Iterates over each result's similar Query URLs within the page.
168
291
  #
169
- # page.similar_urls # => [...]
292
+ # @yield [similar_url]
293
+ # The given block will be passed the Similar Query URL of each
294
+ # result in the page.
295
+ #
296
+ # @yieldparam [URI::HTTP] similar_url
297
+ # The Cached URL of a result in the page.
170
298
  #
171
- def similar_urls
172
- map { |result| result.similar_url }.compact
299
+ # @return [Enumerator]
300
+ # If no block is given, an Enumerator object will be returned.
301
+ #
302
+ # @example
303
+ # each_similar_url { |similar_url| puts similar_url }
304
+ #
305
+ def each_similar_url
306
+ unless block_given?
307
+ enum_for(:each_similar_url)
308
+ else
309
+ each do |result|
310
+ yield result.similar_url if result.similar_url
311
+ end
312
+ end
173
313
  end
174
314
 
175
315
  #
176
- # Iterates over each result's rank within the Page, passing each to
177
- # the given _block_.
316
+ # Returns the ranks of the results in the page.
178
317
  #
179
- # each_rank { |rank| puts rank }
318
+ # @return [Array<Integer>]
319
+ # The ranks of the results.
180
320
  #
181
- def each_rank(&block)
182
- ranks.each(&block)
321
+ def ranks
322
+ each_rank.to_a
183
323
  end
184
324
 
185
325
  #
186
- # Iterates over each result's title within the Page, passing each to
187
- # the given _block_.
326
+ # Returns the titles of the results in the page.
188
327
  #
189
- # each_title { |title| puts title }
328
+ # @return [Array<String>]
329
+ # The titles of the results.
190
330
  #
191
- def each_title(&block)
192
- titles.each(&block)
331
+ def titles
332
+ each_title.to_a
193
333
  end
194
334
 
195
335
  #
196
- # Iterates over each result's url within the Page, passing each to
197
- # the given _block_.
336
+ # Returns the URLs of the results in the page.
198
337
  #
199
- # each_url { |url| puts url }
338
+ # @return [Array<URI::HTTP>]
339
+ # The URLs of the results.
200
340
  #
201
- def each_url(&block)
202
- urls.each(&block)
341
+ def urls
342
+ each_url.to_a
203
343
  end
204
344
 
205
345
  #
206
- # Iterates over each result's summary within the Page, passing each
207
- # to the given _block_.
346
+ # Returns the summaries of the results in the page.
208
347
  #
209
- # each_summary { |summary| puts summary }
348
+ # @return [Array<String>]
349
+ # The summaries of the results.
210
350
  #
211
- def each_summary(&block)
212
- summaries.each(&block)
351
+ def summaries
352
+ each_summary.to_a
213
353
  end
214
354
 
215
355
  #
216
- # Iterates over each result's cached URLs within the Page, passing
217
- # each to the given _block_.
356
+ # Returns the Cached URLs of the results in the page.
218
357
  #
219
- # each_cached_url { |url| puts url }
358
+ # @return [Array<URI::HTTP>]
359
+ # The Cached URLs of the results.
220
360
  #
221
- def each_cached_url(&block)
222
- cached_urls.each(&block)
361
+ def cached_urls
362
+ each_cached_url.to_a
223
363
  end
224
364
 
225
365
  #
226
- # Iterates over each result's cached pages within the Page, passing
227
- # each to the given _block_.
366
+ # Returns the Cached Pages of the results in the page.
228
367
  #
229
- # each_cached_page { |page| puts page.readlines }
368
+ # @return [Array<Mechanize::Page>]
369
+ # The Cached Pages of the results.
230
370
  #
231
- def each_cached_page(&block)
232
- cached_pages.each(&block)
371
+ def cached_pages
372
+ each_cached_page.to_a
233
373
  end
234
374
 
235
375
  #
236
- # Iterates over each result's similar Query URLs within the Page,
237
- # passing each to the given _block_.
376
+ # Returns the Similar Query URLs of the results in the page.
238
377
  #
239
- # each_similar_url { |url| puts url }
378
+ # @return [Array<URI::HTTP>]
379
+ # The Similar Query URLs of the results.
240
380
  #
241
- def each_similar_url(&block)
242
- similar_urls.each(&block)
381
+ def similar_urls
382
+ each_similar_url.to_a
243
383
  end
244
384
 
245
385
  #
246
- # Returns the ranks of the results that match the specified _block_.
386
+ # Returns the ranks of the results that match the given block.
387
+ #
388
+ # @yield [result]
389
+ # The given block will be used to filter the results in the page.
247
390
  #
391
+ # @yieldparam [Result] result
392
+ # A result in the page.
393
+ #
394
+ # @return [Array<Integer>]
395
+ # The ranks of the results which match the given block.
396
+ #
397
+ # @example
248
398
  # page.ranks_of { |result| result.title =~ /awesome/ }
249
399
  #
250
400
  def ranks_of(&block)
@@ -252,8 +402,18 @@ module GScraper
252
402
  end
253
403
 
254
404
  #
255
- # Returns the titles of the results that match the specified _block_.
405
+ # Returns the titles of the results that match the given block.
406
+ #
407
+ # @yield [result]
408
+ # The given block will be used to filter the results in the page.
256
409
  #
410
+ # @yieldparam [Result] result
411
+ # A result in the page.
412
+ #
413
+ # @return [Array<String>]
414
+ # The titles of the results which match the given block.
415
+ #
416
+ # @example
257
417
  # page.titles_of { |result| result.url.include?('www') }
258
418
  #
259
419
  def titles_of(&block)
@@ -261,8 +421,18 @@ module GScraper
261
421
  end
262
422
 
263
423
  #
264
- # Returns the urls of the results that match the specified _block_.
424
+ # Returns the urls of the results that match the given block.
425
+ #
426
+ # @yield [result]
427
+ # The given block will be used to filter the results in the page.
265
428
  #
429
+ # @yieldparam [Result] result
430
+ # A result in the page.
431
+ #
432
+ # @return [Array<URI::HTTP>]
433
+ # The URLs of the results which match the given block.
434
+ #
435
+ # @example
266
436
  # page.urls_of { |result| result.summary =~ /awesome pants/ }
267
437
  #
268
438
  def urls_of(&block)
@@ -270,9 +440,18 @@ module GScraper
270
440
  end
271
441
 
272
442
  #
273
- # Returns the summaries of the results that match the specified
274
- # _block_.
443
+ # Returns the summaries of the results that match the given block.
444
+ #
445
+ # @yield [result]
446
+ # The given block will be used to filter the results in the page.
275
447
  #
448
+ # @yieldparam [Result] result
449
+ # A result in the page.
450
+ #
451
+ # @return [Array<String>]
452
+ # The summaries of the results which match the given block.
453
+ #
454
+ # @example
276
455
  # page.summaries_of { |result| result.title =~ /what if/ }
277
456
  #
278
457
  def summaries_of(&block)
@@ -280,9 +459,18 @@ module GScraper
280
459
  end
281
460
 
282
461
  #
283
- # Returns the cached URLs of the results that match the specified
284
- # _block_.
462
+ # Returns the Cached URLs of the results that match the given block.
463
+ #
464
+ # @yield [result]
465
+ # The given block will be used to filter the results in the page.
285
466
  #
467
+ # @yieldparam [Result] result
468
+ # A result in the page.
469
+ #
470
+ # @return [Array<URI::HTTP>]
471
+ # The Cached URLs of the results which match the given block.
472
+ #
473
+ # @example
286
474
  # page.cached_urls_of { |result| result.title =~ /howdy/ }
287
475
  #
288
476
  def cached_urls_of(&block)
@@ -290,20 +478,38 @@ module GScraper
290
478
  end
291
479
 
292
480
  #
293
- # Returns the cached pages of the results that match the specified
294
- # _block_. If _options_ are given, they will be used in accessing
295
- # the cached pages.
481
+ # Returns the cached pages of the results that match the given block.
482
+ #
483
+ # @yield [result]
484
+ # The given block will be used to filter the results in the page.
296
485
  #
486
+ # @yieldparam [Result] result
487
+ # A result in the page.
488
+ #
489
+ # @return [Array<Mechanize::Page>]
490
+ # The Cached Page of the results which match the given block.
491
+ #
492
+ # @example
297
493
  # page.cached_pages_of { |result| result.title =~ /dude/ }
298
494
  #
299
- def cached_pages_of(options={},&block)
300
- results_with(&block).cached_pages(options)
495
+ def cached_pages_of(&block)
496
+ results_with(&block).cached_pages
301
497
  end
302
498
 
303
499
  #
304
- # Returns the similar query URLs of the results that match the
305
- # specified _block_.
500
+ # Returns the Similar Query URLs of the results that match the given
501
+ # block.
502
+ #
503
+ # @yield [result]
504
+ # The given block will be used to filter the results in the page.
505
+ #
506
+ # @yieldparam [Result] result
507
+ # A result in the page.
508
+ #
509
+ # @return [Array<URI::HTTP>]
510
+ # The Similar Query URLs of the results which match the given block.
306
511
  #
512
+ # @example
307
513
  # page.similar_urls_of { |result| result.title =~ /what if/ }
308
514
  #
309
515
  def similar_urls_of(&block)