gscraper 0.2.4 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/.gitignore +8 -0
  2. data/.specopts +1 -0
  3. data/.yardopts +1 -0
  4. data/ChangeLog.md +122 -0
  5. data/Gemfile +25 -0
  6. data/{README.txt → README.md} +25 -24
  7. data/Rakefile +32 -10
  8. data/gscraper.gemspec +112 -0
  9. data/lib/gscraper.rb +0 -2
  10. data/lib/gscraper/extensions.rb +0 -2
  11. data/lib/gscraper/extensions/uri.rb +0 -2
  12. data/lib/gscraper/extensions/uri/http.rb +0 -2
  13. data/lib/gscraper/extensions/uri/query_params.rb +18 -5
  14. data/lib/gscraper/gscraper.rb +61 -70
  15. data/lib/gscraper/has_pages.rb +76 -20
  16. data/lib/gscraper/licenses.rb +0 -2
  17. data/lib/gscraper/page.rb +45 -16
  18. data/lib/gscraper/search.rb +0 -2
  19. data/lib/gscraper/search/ajax_query.rb +75 -22
  20. data/lib/gscraper/search/page.rb +328 -122
  21. data/lib/gscraper/search/query.rb +100 -7
  22. data/lib/gscraper/search/result.rb +27 -6
  23. data/lib/gscraper/search/search.rb +59 -9
  24. data/lib/gscraper/search/web_query.rb +120 -37
  25. data/lib/gscraper/sponsored_ad.rb +19 -6
  26. data/lib/gscraper/sponsored_links.rb +260 -92
  27. data/lib/gscraper/version.rb +2 -3
  28. data/spec/extensions/uri/query_params_spec.rb +8 -0
  29. data/spec/gscraper_spec.rb +9 -4
  30. data/spec/has_pages_examples.rb +0 -2
  31. data/spec/has_sponsored_links_examples.rb +2 -1
  32. data/spec/helpers/query.rb +3 -1
  33. data/spec/helpers/uri.rb +6 -4
  34. data/spec/page_has_results_examples.rb +0 -2
  35. data/spec/search/ajax_query_spec.rb +6 -11
  36. data/spec/search/page_has_results_examples.rb +0 -2
  37. data/spec/search/web_query_spec.rb +6 -11
  38. data/spec/spec_helper.rb +10 -4
  39. metadata +147 -54
  40. data/History.txt +0 -101
  41. data/Manifest.txt +0 -38
  42. data/tasks/spec.rb +0 -9
@@ -1,5 +1,4 @@
1
1
  #
2
- #--
3
2
  # GScraper - A web-scraping interface to various Google Services.
4
3
  #
5
4
  # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
@@ -17,7 +16,6 @@
17
16
  # You should have received a copy of the GNU General Public License
18
17
  # along with this program; if not, write to the Free Software
19
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
- #++
21
19
  #
22
20
 
23
21
  require 'gscraper/extensions/uri'
@@ -32,7 +30,13 @@ module GScraper
32
30
  attr_reader :url
33
31
 
34
32
  #
35
- # Creates a new SponsoredAd with the specified _title_ and _url_.
33
+ # Creates a new SponsoredAd.
34
+ #
35
+ # @param [String] title
36
+ # The title of the ad.
37
+ #
38
+ # @param [URI::HTTP] url
39
+ # The URL of the ad.
36
40
  #
37
41
  def initialize(title,url)
38
42
  @title = title
@@ -40,21 +44,30 @@ module GScraper
40
44
  end
41
45
 
42
46
  #
43
- # Returns the direct link of the ad.
47
+ # The direct link of the ad.
48
+ #
49
+ # @return [String]
50
+ # The direct link.
44
51
  #
45
52
  def direct_link
46
53
  @url.query_params['adurl'] || @url.query_params['q']
47
54
  end
48
55
 
49
56
  #
50
- # Returns the direct URL of the ad.
57
+ # The direct URI of the ad.
58
+ #
59
+ # @return [URI::HTTP]
60
+ # The direct URI.
51
61
  #
52
62
  def direct_url
53
63
  URI(URI.escape(direct_link))
54
64
  end
55
65
 
56
66
  #
57
- # Returns the title of the ad.
67
+ # The title of the ad.
68
+ #
69
+ # @return [String]
70
+ # The title.
58
71
  #
59
72
  def to_s
60
73
  @title.to_s
@@ -1,5 +1,4 @@
1
1
  #
2
- #--
3
2
  # GScraper - A web-scraping interface to various Google Services.
4
3
  #
5
4
  # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
@@ -17,181 +16,330 @@
17
16
  # You should have received a copy of the GNU General Public License
18
17
  # along with this program; if not, write to the Free Software
19
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
- #++
21
19
  #
22
20
 
23
21
  require 'gscraper/sponsored_ad'
24
22
 
23
+ require 'enumerator'
24
+
25
25
  module GScraper
26
26
  class SponsoredLinks < Array
27
+
28
+ #
29
+ # Creates a new SponsoredLinks object.
27
30
  #
28
- # Creates a new SponsoredLinks object with the given _ads_. If a
29
- # _block_ is given, it will be passed the newly created SponsoredLinks
30
- # object.
31
+ # @param [Array] ads
32
+ # The ads to populate the sponsored links object with.
31
33
  #
32
- def initialize(ads=[],&block)
34
+ # @yield [links]
35
+ # If a block is given, it will be passed the new sponsored links
36
+ # object.
37
+ #
38
+ # @yieldparam [SponsoredLinks] links
39
+ # The new sponsored links object.
40
+ #
41
+ def initialize(ads=[])
33
42
  super(ads)
34
43
 
35
- block.call(self) if block
44
+ yield self if block_given?
36
45
  end
37
46
 
38
47
  #
39
- # Returns a mapped Array of the ads within the SponsoredLinks
40
- # using the given _block_. If the _block_ is not given, the
41
- # SponsoredLinks will be returned.
48
+ # Maps the sponsored ads.
49
+ #
50
+ # @yield [ad]
51
+ # The given block will be passed each ad.
42
52
  #
43
- # sponsored.map # => SponsoredLinks
53
+ # @yieldparam [SponsoredAd] ad
54
+ # The sponsored ad.
44
55
  #
45
- # sponsored.map { |ad| ad.url } # => [...]
56
+ # @return [Array, Enumerator]
57
+ # The mapped result. If no block was given, an Enumerator object will
58
+ # be returned.
46
59
  #
47
- def map(&block)
48
- return self unless block
60
+ # @example
61
+ # sponsored.map
62
+ # # => SponsoredLinks
63
+ #
64
+ # @example
65
+ # sponsored.map { |ad| ad.url }
66
+ # # => [...]
67
+ #
68
+ def map
69
+ return enum_for(:map) unless block_given?
49
70
 
50
71
  mapped = []
51
72
 
52
- each { |ad| mapped << block.call(ad) }
73
+ each { |ad| mapped << yield(ad) }
53
74
  return mapped
54
75
  end
55
76
 
56
77
  #
57
- # Selects the ads within the SponsoredLinks which match the given _block_.
78
+ # Selects the ads within the sponsored links.
79
+ #
80
+ # @yield [ad]
81
+ # The given block will determine which ads to select.
82
+ #
83
+ # @yieldparam [SponsoredAd] ad
84
+ # A sponsored ad.
85
+ #
86
+ # @return [Array, Enumerator]
87
+ # The selected ads. If no block is given, an Enumerator object will
88
+ # be returned.
58
89
  #
90
+ # @example
59
91
  # sponsored.select { |ad| ad.title =~ /consume/i }
60
92
  #
61
93
  def select(&block)
62
- SponsoredLinks.new(super(&block))
94
+ unless block
95
+ enum_for(:select)
96
+ else
97
+ SponsoredLinks.new(super(&block))
98
+ end
63
99
  end
64
100
 
101
+ alias ads_with select
102
+
65
103
  #
66
- # Selects the ads using the specified _block_.
104
+ # Selects the ads with the matching title.
67
105
  #
68
- # sponsored.ads_with { |ad| ad.title =~ /status symbol/ }
106
+ # @param [String, Regexp] title
107
+ # The title to search for.
69
108
  #
70
- def ads_with(&block)
71
- select(&block)
72
- end
73
-
109
+ # @yield [ad]
110
+ # Each matching ad will be passed to the given block.
111
+ #
112
+ # @yieldparam [SponsoredAd] ad
113
+ # A sponsored ad with the matching title.
74
114
  #
75
- # Selects the ads with the matching _title_. The _title_ may be
76
- # either a String or a Regexp. If _block_ is given, each matching
77
- # ad will be passed to the _block_.
115
+ # @return [Array, Enumerator]
116
+ # The sponsored ads with the matching title. If no block is given,
117
+ # an Enumerator object will be returned.
78
118
  #
79
- # sponsored.ads_with_title('be attractive') #=> SponsoredLinks
119
+ # @example
120
+ # sponsored.ads_with_title('be attractive')
121
+ # # => SponsoredLinks
80
122
  #
123
+ # @example
81
124
  # sponsored.ads_with_title(/buy me/) do |ad|
82
125
  # puts ad.url
83
126
  # end
84
127
  #
85
- def ads_with_title(title,&block)
86
- if title.kind_of?(Regexp)
87
- ads = ads_with { |ad| ad.title =~ title }
88
- else
89
- ads = ads_with { |ad| ad.title == title }
90
- end
128
+ def ads_with_title(title)
129
+ return enum_for(:ads_with_title,title) unless block_given?
130
+
131
+ comparitor = if title.kind_of?(Regexp)
132
+ lambda { |ad| ad.title =~ title }
133
+ else
134
+ lambda { |ad| ad.title == title }
135
+ end
91
136
 
92
- ads.each(&block) if block
93
- return ads
137
+ return ads_with do |ad|
138
+ if comparitor.call(ad)
139
+ yield ad
140
+
141
+ true
142
+ end
143
+ end
94
144
  end
95
145
 
96
146
  #
97
- # Selects the ads with the matching _url_. The _url_ may be
98
- # either a String or a Regexp. If _block_ is given, each matching
99
- # ad will be passed to the _block_.
147
+ # Selects the ads with the matching URL.
100
148
  #
101
- # sponsored.ads_with_url(/\.com/) # => SponsoredLinks
149
+ # @param [String, Regexp] url
150
+ # The URL to search for.
102
151
  #
103
- def ads_with_url(url,&block)
104
- if url.kind_of?(Regexp)
105
- ads = ads_with { |ad| ad.url =~ url }
106
- else
107
- ads = ads_with { |ad| ad.url == url }
108
- end
152
+ # @yield [ad]
153
+ # Each matching ad will be passed to the given block.
154
+ #
155
+ # @yieldparam [SponsoredAd] ad
156
+ # A sponsored ad with the matching URL.
157
+ #
158
+ # @return [Array, Enumerator]
159
+ # The sponsored ads with the matching URL. If no block is given,
160
+ # an Enumerator object will be returned.
161
+ #
162
+ # @example
163
+ # sponsored.ads_with_url(/\.com/)
164
+ # # => SponsoredLinks
165
+ #
166
+ def ads_with_url(url)
167
+ return enum_for(:ads_with_url,url) unless block_given?
109
168
 
110
- ads.each(&block) if block
111
- return ads
169
+ comparitor = if url.kind_of?(Regexp)
170
+ lambda { |ad| ad.url =~ url }
171
+ else
172
+ lambda { |ad| ad.url == url }
173
+ end
174
+
175
+ return ads_with do |ad|
176
+ if comparitor.call(ad)
177
+ yield ad
178
+
179
+ true
180
+ end
181
+ end
112
182
  end
113
183
 
114
184
  #
115
- # Selects the ads with the matching _direct_url_. The _direct_url_ may
116
- # be either a String or a Regexp. If _block_ is given, each matching
117
- # ad will be passed to the _block_.
185
+ # Selects the ads with the matching direct URL.
118
186
  #
119
- # sponsored.ads_with_direct_url(/\.com/) # => SponsoredLinks
187
+ # @param [String, Regexp] direct_url
188
+ # The direct URL to search for.
120
189
  #
121
- def ads_with_direct_url(direct_url,&block)
122
- if direct_url.kind_of?(Regexp)
123
- ads = ads_with { |ad| ad.direct_url =~ direct_url }
124
- else
125
- ads = ads_with { |ad| ad.direct_url == direct_url }
126
- end
190
+ # @yield [ad]
191
+ # Each matching ad will be passed to the given block.
192
+ #
193
+ # @yieldparam [SponsoredAd] ad
194
+ # A sponsored ad with the matching direct URL.
195
+ #
196
+ # @return [Array, Enumerator]
197
+ # The sponsored ads with the matching URL. If no block is given,
198
+ # an Enumerator object will be returned.
199
+ #
200
+ # @example
201
+ # sponsored.ads_with_direct_url(/\.com/)
202
+ # # => SponsoredLinks
203
+ #
204
+ def ads_with_direct_url(direct_url)
205
+ return enum_for(:ads_with_direct_url,direct_url) unless block_given?
206
+
207
+ comparitor = if direct_url.kind_of?(Regexp)
208
+ lambda { |ad| ad.direct_url =~ direct_url }
209
+ else
210
+ lambda { |ad| ad.direct_url == direct_url }
211
+ end
127
212
 
128
- ads.each(&block) if block
129
- return ads
213
+ return ads_with do |ad|
214
+ if comparitor.call(ad)
215
+ yield ad
216
+
217
+ true
218
+ end
219
+ end
130
220
  end
131
221
 
132
222
  #
133
- # Returns an Array containing the titles of the ads within the
134
- # SponsoredLinks.
223
+ # Iterates over the titles of each ad.
135
224
  #
136
- # sponsored.titles # => [...]
225
+ # @yield [title]
226
+ # The given block will be passed each title.
137
227
  #
138
- def titles
139
- map { |ad| ad.title }
228
+ # @yieldparam [String] title
229
+ # A title of an ad.
230
+ #
231
+ # @return [Enumerator]
232
+ # If no block is given, an Enumerator object will be returned.
233
+ #
234
+ # @example
235
+ # each_title { |title| puts title }
236
+ #
237
+ def each_title
238
+ unless block_given?
239
+ enum_for(:each_title)
240
+ else
241
+ each { |ad| yield ad.title }
242
+ end
140
243
  end
141
244
 
142
245
  #
143
- # Returns an Array containing the URLs of the ads within the
144
- # SponsoredLinks.
246
+ # Iterates over the URLs of each ad.
145
247
  #
146
- # sponsored.urls # => [...]
248
+ # @yield [url]
249
+ # The given block will be passed each URL.
147
250
  #
148
- def urls
149
- map { |ad| ad.url }
251
+ # @yieldparam [URI::HTTP] url
252
+ # An URL of an ad.
253
+ #
254
+ # @return [Enumerator]
255
+ # If no block is given, an Enumerator object will be returned.
256
+ #
257
+ # @example
258
+ # each_url { |url| puts url }
259
+ #
260
+ def each_url
261
+ unless block_given?
262
+ enum_for(:each_url)
263
+ else
264
+ each { |ad| yield ad.url }
265
+ end
150
266
  end
151
267
 
152
268
  #
153
- # Returns an Array containing the direct URLs of the ads within the
154
- # SponsoredLinks.
269
+ # Iterates over the direct URLs of each ad.
155
270
  #
156
- # sponsored.direct_urls # => [...]
271
+ # @yield [direct_url]
272
+ # The given block will be passed each direct URL.
157
273
  #
158
- def direct_urls
159
- map { |ad| ad.direct_url }
274
+ # @yieldparam [URI::HTTP] direct_url
275
+ # A direct URL of an ad.
276
+ #
277
+ # @return [Enumerator]
278
+ # If no block is given, an Enumerator object will be returned.
279
+ #
280
+ # @example
281
+ # each_direct_url { |url| puts url }
282
+ #
283
+ def each_direct_url
284
+ unless block_given?
285
+ enum_for(:each_direct_url)
286
+ else
287
+ each { |ad| yield ad.direct_url }
288
+ end
160
289
  end
161
290
 
162
291
  #
163
- # Iterates over each ad's title within the SponsoredLinks, passing each to
164
- # the given _block_.
292
+ # The titles for the ads.
165
293
  #
166
- # each_title { |title| puts title }
294
+ # @return [Array<String>]
295
+ # The titles for the ads.
296
+ #
297
+ # @example
298
+ # sponsored.titles # => [...]
167
299
  #
168
- def each_title(&block)
169
- titles.each(&block)
300
+ def titles
301
+ each_title.to_a
170
302
  end
171
303
 
172
304
  #
173
- # Iterates over each ad's URL within the SponsoredLinks, passing each to
174
- # the given _block_.
305
+ # The URLs for the ads.
175
306
  #
176
- # each_url { |url| puts url }
307
+ # @return [Array<URI::HTTP>]
308
+ # The URLs for the ads.
177
309
  #
178
- def each_url(&block)
179
- urls.each(&block)
310
+ # @example
311
+ # sponsored.urls # => [...]
312
+ #
313
+ def urls
314
+ each_url.to_a
180
315
  end
181
316
 
182
317
  #
183
- # Iterates over each ad's direct URL within the SponsoredLinks, passing
184
- # each to the given _block_.
318
+ # The direct URLs for the ads.
185
319
  #
186
- # each_direct_url { |url| puts url }
320
+ # @return [Array<URI::HTTP>]
321
+ # The direct URLs for the ads.
322
+ #
323
+ # @example
324
+ # sponsored.direct_urls # => [...]
187
325
  #
188
- def each_direct_url(&block)
189
- direct_urls.each(&block)
326
+ def direct_urls
327
+ each_direct_url.to_a
190
328
  end
191
329
 
192
330
  #
193
- # Returns the titles of the ads that match the specified _block_.
331
+ # The titles of the selected ads.
332
+ #
333
+ # @yield [ad]
334
+ # The given block will be passed each ad to be selected.
335
+ #
336
+ # @yieldparam [SponsoredAd] ad
337
+ # An ad to be selected.
194
338
  #
339
+ # @return [Array<String>]
340
+ # The titles of the selected ads.
341
+ #
342
+ # @example
195
343
  # sponsored.titles_of { |ad| ad.url.include?('www') }
196
344
  #
197
345
  def titles_of(&block)
@@ -199,8 +347,18 @@ module GScraper
199
347
  end
200
348
 
201
349
  #
202
- # Returns the URLs of the ads that match the specified _block_.
350
+ # The URLs of the selected ads.
351
+ #
352
+ # @yield [ad]
353
+ # The given block will be passed each ad to be selected.
203
354
  #
355
+ # @yieldparam [SponsoredAd] ad
356
+ # An ad to be selected.
357
+ #
358
+ # @return [Array<String>]
359
+ # The URLs of the selected ads.
360
+ #
361
+ # @example
204
362
  # sponsored.urls_of { |ad| ad.title =~ /buy these pants/ }
205
363
  #
206
364
  def urls_of(&block)
@@ -208,8 +366,18 @@ module GScraper
208
366
  end
209
367
 
210
368
  #
211
- # Returns the direct URLs of the ads that match the specified _block_.
369
+ # The direct URLs of the selected ads.
370
+ #
371
+ # @yield [ad]
372
+ # The given block will be passed each ad to be selected.
373
+ #
374
+ # @yieldparam [SponsoredAd] ad
375
+ # An ad to be selected.
376
+ #
377
+ # @return [Array<String>]
378
+ # The direct URLs of the selected ads.
212
379
  #
380
+ # @example
213
381
  # sponsored.urls_of { |ad| ad.title =~ /buy these pants/ }
214
382
  #
215
383
  def direct_urls_of(&block)