gscraper 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,7 +1,22 @@
1
+ == 0.1.4 / 2007-12-23
2
+
3
+ * Added Search::Query#result_at for easier access of a single result at
4
+ a given index.
5
+ * Adding scraping of the "Cached" and "Similar Pages" URLs of Search
6
+ Results.
7
+ * Added methods to Search::Page for accessing cached URLs, cached pages,
8
+ similar query URLs and similar Queries in mass.
9
+ * Search::Query#page and Search::Query#first_page now can receive blocks.
10
+ * Improved the formating of URL query parameters.
11
+ * Added more unit-tests.
12
+ * Fixed scraping of Search Result summaries.
13
+ * Fixed various bugs in Search::Query uncovered during unit-testing.
14
+ * Fixed typos in Search::Page's documentation.
15
+
1
16
  == 0.1.3 / 2007-12-22
2
17
 
3
- * Added the Page class, which contains many of convenance methods for
4
- searching through the results within a Page.
18
+ * Added the Search::Page class, which contains many of convenance methods
19
+ for searching through the results within a Page.
5
20
 
6
21
  == 0.1.2 / 2007-12-22
7
22
 
data/Manifest.txt CHANGED
@@ -17,3 +17,6 @@ lib/gscraper/search/search.rb
17
17
  lib/gscraper/search.rb
18
18
  test/test_gscraper.rb
19
19
  test/search/query_from_url.rb
20
+ test/search/query_result.rb
21
+ test/search/query_pages.rb
22
+ test/search/page_results.rb
data/Rakefile CHANGED
@@ -12,6 +12,7 @@ Hoe.new('gscraper', GScraper::VERSION) do |p|
12
12
  p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
13
13
  p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
14
14
  p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
15
+ p.extra_deps = ['hpricot', 'mechanize']
15
16
  end
16
17
 
17
18
  # vim: syntax=Ruby
@@ -4,12 +4,14 @@ module URI
4
4
  # Query parameters
5
5
  attr_reader :query_params
6
6
 
7
+ alias_method :old_initialize, :initialize
8
+
7
9
  #
8
10
  # Creates a new URI::HTTP object and initializes query_params as a
9
11
  # new Hash.
10
12
  #
11
13
  def initialize(*args)
12
- super(*args)
14
+ old_initialize(*args)
13
15
 
14
16
  @query_params = {}
15
17
  parse_query_params
@@ -57,7 +59,11 @@ module URI
57
59
  if value==true
58
60
  "#{name}=active"
59
61
  elsif value
60
- "#{name}=#{URI.encode(value.to_s)}"
62
+ if value.kind_of?(Array)
63
+ "#{name}=#{URI.encode(value.join(' '))}"
64
+ else
65
+ "#{name}=#{URI.encode(value.to_s)}"
66
+ end
61
67
  else
62
68
  "#{name}="
63
69
  end
@@ -13,7 +13,7 @@ module GScraper
13
13
  # Returns the GScraper User-Agent
14
14
  #
15
15
  def GScraper.user_agent
16
- @user_agent
16
+ @user_agent ||= nil
17
17
  end
18
18
 
19
19
  #
@@ -24,10 +24,11 @@ module GScraper
24
24
  end
25
25
 
26
26
  #
27
- # Opens the _uri_ with the given _opts_. The contents of the _uri_ will
28
- # be returned.
27
+ # Opens the _uri_ with the given _opts_. The contents of the _uri_ will be
28
+ # returned.
29
29
  #
30
30
  # GScraper.open('http://www.hackety.org/')
31
+ #
31
32
  # GScraper.open('http://tenderlovemaking.com/',
32
33
  # :user_agent_alias => 'Linux Mozilla')
33
34
  # GScraper.open('http://www.wired.com/', :user_agent => 'the future')
@@ -115,77 +115,174 @@ module GScraper
115
115
 
116
116
  #
117
117
  # Returns an Array containing the ranks of the results within the
118
- # Page. If _block_ is given, each rank will be passed to the _block_.
118
+ # Page.
119
119
  #
120
120
  # page.ranks # => [...]
121
121
  #
122
- # page.ranks do |rank|
123
- # puts ranks
124
- # end
125
- #
126
- def ranks(&block)
127
- mapped = map { |result| result.rank }
128
-
129
- mapped.each(&block) if block
130
- return mapped
122
+ def ranks
123
+ map { |result| result.rank }
131
124
  end
132
125
 
133
126
  #
134
127
  # Returns an Array containing the titles of the results within the
135
- # Page. If _block_ is given, each title will be passed to the _block_.
128
+ # Page.
136
129
  #
137
130
  # page.titles # => [...]
138
131
  #
139
- # page.titles do |title|
140
- # puts title
141
- # end
142
- #
143
- def titles(&block)
144
- mapped = map { |result| result.title }
145
-
146
- mapped.each(&block) if block
147
- return mapped
132
+ def titles
133
+ map { |result| result.title }
148
134
  end
149
135
 
150
136
  #
151
137
  # Returns an Array containing the URLs of the results within the
152
- # Page. If _block_ is given, each URL will be passed to the _block_.
138
+ # Page.
153
139
  #
154
140
  # page.urls # => [...]
155
141
  #
156
- # page.urls do |url|
157
- # puts url
158
- # end
159
- #
160
- def urls(&block)
161
- mapped = map { |result| result.url }
162
-
163
- mapped.each(&block) if block
164
- return mapped
142
+ def urls
143
+ map { |result| result.url }
165
144
  end
166
145
 
167
146
  #
168
147
  # Returns an Array containing the summaries of the results within the
169
- # Page. If _block_ is given, each summary will be passed to the
170
- # _block_.
148
+ # Page.
171
149
  #
172
150
  # page.summaries # => [...]
173
151
  #
174
- # page.summaries do |summary|
175
- # puts summary
176
- # end
152
+ def summaries
153
+ map { |result| result.summary }
154
+ end
155
+
156
+ #
157
+ # Returns an Array containing the cached URLs of the results within
158
+ # the Page.
159
+ #
160
+ # page.cached_urls # => [...]
177
161
  #
178
- def summaries(&block)
179
- mapped = map { |result| result.summaries }
162
+ def cached_urls
163
+ map { |result| result.cached_url }
164
+ end
180
165
 
181
- mapped.each(&block) if block
182
- return mapped
166
+ #
167
+ # Returns an Array containing the cached pages of the results within
168
+ # the Page. If _opts_ are given, they will be used in accessing the
169
+ # cached page.
170
+ #
171
+ # page.cached_pages # => [...]
172
+ #
173
+ def cached_pages(opts={})
174
+ map { |result| result.cached_page(opts) }
175
+ end
176
+
177
+ #
178
+ # Returns an Array containing the similar Query URLs of the results
179
+ # within the Page.
180
+ #
181
+ # page.similar_urls # => [...]
182
+ #
183
+ def similar_urls
184
+ map { |result| result.similar_url }
185
+ end
186
+
187
+ #
188
+ # Returns an Array containing the similar Queries of the results
189
+ # within the Page.
190
+ #
191
+ # page.similar_queries # => [...]
192
+ #
193
+ def similar_queries
194
+ map { |result| result.similar_query }
195
+ end
196
+
197
+ #
198
+ # Iterates over each result's rank within the Page, passing each to
199
+ # the given _block_.
200
+ #
201
+ # each_rank { |rank| puts rank }
202
+ #
203
+ def each_rank(&block)
204
+ ranks.each(&block)
205
+ end
206
+
207
+ #
208
+ # Iterates over each result's title within the Page, passing each to
209
+ # the given _block_.
210
+ #
211
+ # each_title { |title| puts title }
212
+ #
213
+ def each_title(&block)
214
+ titles.each(&block)
215
+ end
216
+
217
+ #
218
+ # Iterates over each result's url within the Page, passing each to
219
+ # the given _block_.
220
+ #
221
+ # each_url { |url| puts url }
222
+ #
223
+ def each_url(&block)
224
+ urls.each(&block)
225
+ end
226
+
227
+ #
228
+ # Iterates over each result's summary within the Page, passing each
229
+ # to the given _block_.
230
+ #
231
+ # each_summary { |summary| puts summary }
232
+ #
233
+ def each_summary(&block)
234
+ summaries.each(&block)
235
+ end
236
+
237
+ #
238
+ # Iterates over each result's cached URLs within the Page, passing
239
+ # each to the given _block_.
240
+ #
241
+ # each_cached_url { |url| puts url }
242
+ #
243
+ def each_cached_url(&block)
244
+ cached_urls.each(&block)
245
+ end
246
+
247
+ #
248
+ # Iterates over each result's cached pages within the Page, passing
249
+ # each to the given _block_. If _opts_ are given, they will be used
250
+ # in accessing the cached pages.
251
+ #
252
+ # each_cached_page { |page| puts page.readlines }
253
+ #
254
+ def each_cached_page(opts={},&block)
255
+ cached_pages(opts).each(&block)
256
+ end
257
+
258
+ #
259
+ # Iterates over each result's similar Query URLs within the Page,
260
+ # passing each to the given _block_.
261
+ #
262
+ # each_similar_url { |url| puts url }
263
+ #
264
+ def each_similar_url(&block)
265
+ similar_urls.each(&block)
266
+ end
267
+
268
+ #
269
+ # Iterates over each result's similar Query within the Page, passing
270
+ # each to the given _block_.
271
+ #
272
+ # each_similar_query do |q|
273
+ # q.first_page do |page|
274
+ # puts page.urls.join("\n")
275
+ # end
276
+ # end
277
+ #
278
+ def each_similar_query(&block)
279
+ similar_queries.each(&block)
183
280
  end
184
281
 
185
282
  #
186
283
  # Returns the ranks of the results that match the specified _block_.
187
284
  #
188
- # page.ranks_of { |result result.title =~ /awesome/ }
285
+ # page.ranks_of { |result| result.title =~ /awesome/ }
189
286
  #
190
287
  def ranks_of(&block)
191
288
  results_with(&block).ranks
@@ -194,7 +291,7 @@ module GScraper
194
291
  #
195
292
  # Returns the titles of the results that match the specified _block_.
196
293
  #
197
- # page.titles_of { |result result.url.include?('www') }
294
+ # page.titles_of { |result| result.url.include?('www') }
198
295
  #
199
296
  def titles_of(&block)
200
297
  results_with(&block).titles
@@ -203,7 +300,7 @@ module GScraper
203
300
  #
204
301
  # Returns the urls of the results that match the specified _block_.
205
302
  #
206
- # page.urls_of { |result result.summary =~ /awesome pants/ }
303
+ # page.urls_of { |result| result.summary =~ /awesome pants/ }
207
304
  #
208
305
  def urls_of(&block)
209
306
  results_with(&block).urls
@@ -213,12 +310,53 @@ module GScraper
213
310
  # Returns the summaries of the results that match the specified
214
311
  # _block_.
215
312
  #
216
- # page.summaries_of { |result result.title =~ /what if/ }
313
+ # page.summaries_of { |result| result.title =~ /what if/ }
217
314
  #
218
315
  def summaries_of(&block)
219
316
  results_with(&block).summaries
220
317
  end
221
318
 
319
+ #
320
+ # Returns the cached URLs of the results that match the specified
321
+ # _block_.
322
+ #
323
+ # page.cached_urls_of { |result| result.title =~ /howdy/ }
324
+ #
325
+ def cached_urls_of(&block)
326
+ results_with(&block).cached_urls
327
+ end
328
+
329
+ #
330
+ # Returns the cached pages of the results that match the specified
331
+ # _block_. If _opts_ are given, they will be used in accessing
332
+ # the cached pages.
333
+ #
334
+ # page.cached_pages_of { |result| result.title =~ /dude/ }
335
+ #
336
+ def cached_pages_of(opts={},&block)
337
+ results_with(&block).cached_pages(opts)
338
+ end
339
+
340
+ #
341
+ # Returns the similar query URLs of the results that match the
342
+ # specified _block_.
343
+ #
344
+ # page.similar_urls_of { |result| result.title =~ /what if/ }
345
+ #
346
+ def similar_urls_of(&block)
347
+ results_with(&block).similar_urls
348
+ end
349
+
350
+ #
351
+ # Returns the similar Queries of the results that match the
352
+ # specified _block_.
353
+ #
354
+ # page.similar_queries_of { |result| result.title =~ /hackety/ }
355
+ #
356
+ def similar_queries_of(&block)
357
+ results_with(&block).similar_queries
358
+ end
359
+
222
360
  end
223
361
  end
224
362
  end
@@ -10,7 +10,8 @@ module GScraper
10
10
  module Search
11
11
  class Query
12
12
 
13
- SEARCH_URL = 'http://www.google.com/search'
13
+ SEARCH_HOST = 'www.google.com'
14
+ SEARCH_URL = "http://#{SEARCH_HOST}/search"
14
15
 
15
16
  RESULTS_PER_PAGE = 10
16
17
 
@@ -90,7 +91,7 @@ module GScraper
90
91
  def initialize(opts={},&block)
91
92
  super()
92
93
 
93
- @results_per_page = opts[:results_per_page] || RESULTS_PER_PAGE
94
+ @results_per_page = (opts[:results_per_page] || RESULTS_PER_PAGE)
94
95
 
95
96
  @query = opts[:query]
96
97
  @exact_phrase = opts[:exact_phrase]
@@ -104,12 +105,29 @@ module GScraper
104
105
 
105
106
  if opts[:within_past_day]
106
107
  @within_past_day = opts[:within_past_day]
108
+ @within_past_week = false
109
+ @within_past_months = false
110
+ @within_past_year = false
107
111
  elsif opts[:within_past_week]
112
+ @within_past_day = false
108
113
  @within_past_week = opts[:within_past_week]
114
+ @within_past_months = false
115
+ @within_past_year = false
109
116
  elsif opts[:within_past_months]
117
+ @within_past_day = false
118
+ @within_past_week = false
110
119
  @within_past_months = opts[:within_past_months]
120
+ @within_past_year = false
111
121
  elsif opts[:within_past_year]
122
+ @within_past_day = false
123
+ @within_past_week = false
124
+ @within_past_months = false
112
125
  @within_past_year = opts[:within_past_year]
126
+ else
127
+ @within_past_day = false
128
+ @within_past_week = false
129
+ @within_past_months = false
130
+ @within_past_year = false
113
131
  end
114
132
 
115
133
  @numeric_range = opts[:numeric_range]
@@ -318,7 +336,7 @@ module GScraper
318
336
  def page_url(page_index)
319
337
  url = search_url
320
338
 
321
- url.query_params['start'] = page_index_offset(page_index)
339
+ url.query_params['start'] = page_result_offset(page_index)
322
340
  url.query_params['sa'] = 'N'
323
341
 
324
342
  return url
@@ -327,33 +345,67 @@ module GScraper
327
345
  #
328
346
  # Returns a Page object containing Result objects at the specified
329
347
  # _page_index_. If _opts_ are given, they will be used in accessing
330
- # the SEARCH_URL.
348
+ # the SEARCH_URL. If a _block_ is given, it will be passed the newly
349
+ # created Page.
331
350
  #
332
- def page(page_index,opts={})
351
+ def page(page_index,opts={},&block)
333
352
  doc = Hpricot(GScraper.open(page_url(page_index),opts))
353
+
334
354
  new_page = Page.new
355
+ results = doc.search('//div.g')[0...@results_per_page.to_i]
356
+
357
+ results.each_with_index do |result,index|
358
+ rank = page_result_offset(page_index) + (index + 1)
359
+ title = result.at('//h2.r').inner_text
360
+ url = result.at('//h2.r/a').get_attribute('href')
361
+
362
+ summary = result.at('//td.j//font').children[0...-3].inject('') do |accum,elem|
363
+ accum + elem.inner_text
364
+ end
365
+
366
+ cached_url = nil
367
+ similar_url = nil
335
368
 
336
- doc.search('//div.g').each_with_index do |result,index|
337
- rank = page_index_offset(page_index) + (index + 1)
338
- title = result.search('//h2.r').first.inner_text
339
- url = result.search('//h2.r/a').first.get_attribute('href')
340
- # TODO: exclude URL and Links from summary text
341
- summary = result.search('//td.j').first.inner_text
369
+ if (cached_link = result.at('//td.j//font/nobr/a:first'))
370
+ cached_url = cached_link.get_attribute('href')
371
+ end
342
372
 
343
- # TODO: scrape Cached and Similar links
373
+ if (similar_link = result.at('//td.j//font/nobr/a:last'))
374
+ similar_url = "http://#{SEARCH_HOST}" + similar_link.get_attribute('href')
375
+ end
344
376
 
345
- new_page << Result.new(rank,title,url,summary)
377
+ new_page << Result.new(rank,title,url,summary,cached_url,similar_url)
346
378
  end
347
379
 
380
+ block.call(new_page) if block
348
381
  return new_page
349
382
  end
350
383
 
351
384
  #
352
- # Returns the results on the first page. If _opts_ are given, they
353
- # will be used in accessing the SEARCH_URL.
385
+ # Returns the Results on the first page. If _opts_ are given, they
386
+ # will be used in accessing the SEARCH_URL. If a _block_ is given
387
+ # it will be passed the newly created Page.
388
+ #
389
+ def first_page(opts={},&block)
390
+ page(1,opts,&block)
391
+ end
392
+
393
+ #
394
+ # Returns the Result at the specified _index_. If _opts_ are given,
395
+ # they will be used in accessing the Page containing the requested
396
+ # Result.
354
397
  #
355
- def first_page(opts={})
356
- page(1,opts)
398
+ def result_at(index,opts={})
399
+ page(result_page_index(index),opts)[page_result_index(index)]
400
+ end
401
+
402
+ #
403
+ # Returns the first Result at the specified _index_. If _opts_ are
404
+ # given, they will be used in accessing the Page containing the
405
+ # requested Result.
406
+ #
407
+ def first_result(opts={})
408
+ result_at(1,opts)
357
409
  end
358
410
 
359
411
  #
@@ -387,8 +439,22 @@ module GScraper
387
439
  #
388
440
  # Returns the rank offset for the specified _page_index_.
389
441
  #
390
- def page_index_offset(page_index)
391
- (page_index.to_i - 1) * @result_per_page.to_i
442
+ def page_result_offset(page_index)
443
+ (page_index.to_i - 1) * @results_per_page.to_i
444
+ end
445
+
446
+ #
447
+ # Returns the in-Page index of the _result_index_.
448
+ #
449
+ def page_result_index(result_index)
450
+ (result_index.to_i - 1) % @results_per_page.to_i
451
+ end
452
+
453
+ #
454
+ # Returns the page index for the specified _result_index_
455
+ #
456
+ def result_page_index(result_index)
457
+ ((result_index.to_i - 1) / @results_per_page.to_i) + 1
392
458
  end
393
459
 
394
460
  end
@@ -14,15 +14,54 @@ module GScraper
14
14
  # Summary from the result page
15
15
  attr_reader :summary
16
16
 
17
+ # URL of the cached result page
18
+ attr_reader :cached_url
19
+
20
+ # URL of the similar results Query
21
+ attr_reader :similar_url
22
+
17
23
  #
18
24
  # Creates a new Result object with the given _rank_, _title_
19
- # _summary_, _url_ and _size_.
25
+ # _summary_, _url_, _size_, _cache_url_ and _similar_url_.
20
26
  #
21
- def initialize(rank,title,url,summary)
27
+ def initialize(rank,title,url,summary,cached_url=nil,similar_url=nil)
22
28
  @rank = rank
23
29
  @title = title
24
30
  @url = url
25
31
  @summary = summary
32
+ @cached_url = cached_url
33
+ @similar_url = similar_url
34
+ end
35
+
36
+ #
37
+ # Opens the URL of the cached page for the Result. If _opts_ are
38
+ # given, they will be used in accessing the cached page URL.
39
+ #
40
+ # result.cached_page # => File
41
+ #
42
+ def cached_page(opts={})
43
+ if @cached_url
44
+ return GScraper.open(@cached_url,opts)
45
+ end
46
+ end
47
+
48
+ #
49
+ # Create a new Query for results that are similar to the Result. If
50
+ # a _block_ is given, it will be passed the newly created Query
51
+ # object.
52
+ #
53
+ # result.similar_query # => Query
54
+ #
55
+ # result.similar_query do |q|
56
+ # q.first_page.each_url do |url|
57
+ # puts url
58
+ # end
59
+ # end
60
+ #
61
+ def similar_query(&block)
62
+ if @similar_url
63
+ return Query.from_url(@similar_url,&block)
64
+ end
26
65
  end
27
66
 
28
67
  #
@@ -1,3 +1,3 @@
1
1
  module GScraper
2
- VERSION = '0.1.3'
2
+ VERSION = '0.1.4'
3
3
  end
@@ -0,0 +1,103 @@
1
+ require 'test/unit'
2
+ require 'gscraper/search/page'
3
+ require 'gscraper/search/query'
4
+
5
+ class PageResults < Test::Unit::TestCase
6
+
7
+ include GScraper
8
+
9
+ def setup
10
+ @query = Search::Query.new(:query => 'ruby')
11
+ @page = @query.first_page
12
+ end
13
+
14
+ def test_results_per_page
15
+ assert_equal @page.length, @query.results_per_page
16
+ end
17
+
18
+ def test_first_result
19
+ assert_not_nil @page[0], "First Page for Query 'ruby' does not have a first Result"
20
+ end
21
+
22
+ def test_last_result
23
+ assert_not_nil @page[-1], "First Page for Query 'ruby' does not have a last Result"
24
+ end
25
+
26
+ def test_ranks
27
+ ranks = @page.ranks
28
+
29
+ assert_not_nil ranks, "First Page for Query 'ruby' does not have any ranks"
30
+
31
+ assert_equal ranks.class, Array, "The ranks of a Page must be an Array"
32
+
33
+ assert_equal ranks.empty?, false, "The ranks of the First Page are empty"
34
+
35
+ assert_equal ranks.length, @page.length
36
+ end
37
+
38
+ def test_titles
39
+ titles = @page.titles
40
+
41
+ assert_not_nil titles, "First Page for Query 'ruby' does not have any titles"
42
+
43
+ assert_equal titles.class, Array, "The titles of a Page must be an Array"
44
+
45
+ assert_equal titles.empty?, false, "The titles of the First Page are empty"
46
+
47
+ assert_equal titles.length, @page.length
48
+ end
49
+
50
+ def test_urls
51
+ urls = @page.urls
52
+
53
+ assert_not_nil urls, "First Page for Query 'ruby' does not have any urls"
54
+
55
+ assert_equal urls.class, Array, "The urls of a Page must be an Array"
56
+
57
+ assert_equal urls.empty?, false, "The urls of the First Page are empty"
58
+
59
+ assert_equal urls.length, @page.length
60
+ end
61
+
62
+ def test_summaries
63
+ summaries = @page.summaries
64
+
65
+ assert_not_nil summaries, "First Page for Query 'ruby' does not have any summaries"
66
+
67
+ assert_equal summaries.class, Array, "The summaries of a Page must be an Array"
68
+
69
+ assert_equal summaries.empty?, false, "The summaries of the First Page are empty"
70
+
71
+ assert_equal summaries.length, @page.length
72
+ end
73
+
74
+ def test_cached_urls
75
+ cached_urls = @page.cached_urls
76
+
77
+ assert_not_nil cached_urls, "First Page for Query 'ruby' does not have any cached_urls"
78
+
79
+ assert_equal cached_urls.class, Array, "The cached_urls of a Page must be an Array"
80
+
81
+ assert_equal cached_urls.empty?, false, "The cached_urls of the First Page are empty"
82
+
83
+ assert_equal cached_urls.length, @page.length
84
+ end
85
+
86
+ def test_similar_urls
87
+ similar_urls = @page.similar_urls
88
+
89
+ assert_not_nil similar_urls, "First Page for Query 'ruby' does not have any similar URLs"
90
+
91
+ assert_equal similar_urls.class, Array, "The similar URLs of a Page must be an Array"
92
+
93
+ assert_equal similar_urls.empty?, false, "The similar URLs of the First Page are empty"
94
+
95
+ assert_equal similar_urls.length, @page.length
96
+ end
97
+
98
+ def teardown
99
+ @page = nil
100
+ @query = nil
101
+ end
102
+
103
+ end
@@ -11,10 +11,6 @@ class QueryFromURL < Test::Unit::TestCase
11
11
  @query = Search::Query.from_url(QUERY_URL)
12
12
  end
13
13
 
14
- def teardown
15
- @query = nil
16
- end
17
-
18
14
  def test_query
19
15
  assert_equal @query.query, 'test'
20
16
  end
@@ -47,4 +43,8 @@ class QueryFromURL < Test::Unit::TestCase
47
43
  assert_nil @query.links_to
48
44
  end
49
45
 
46
+ def teardown
47
+ @query = nil
48
+ end
49
+
50
50
  end
@@ -0,0 +1,32 @@
1
+ require 'test/unit'
2
+ require 'gscraper/search/query'
3
+
4
+ class QueryPages < Test::Unit::TestCase
5
+
6
+ include GScraper
7
+
8
+ def setup
9
+ @query = Search::Query.new(:query => 'ruby')
10
+ end
11
+
12
+ def test_first_page
13
+ page = @query.first_page
14
+
15
+ assert_not_nil page
16
+ assert_equal page.empty?, false, "Query of 'ruby' has zero results"
17
+ assert_equal page.length, @query.results_per_page
18
+ end
19
+
20
+ def test_second_page
21
+ page = @query.page(2)
22
+
23
+ assert_not_nil page
24
+ assert_equal page.empty?, false, "Query of 'ruby' has zero results"
25
+ assert_equal page.length, @query.results_per_page
26
+ end
27
+
28
+ def teardown
29
+ @query = nil
30
+ end
31
+
32
+ end
@@ -0,0 +1,30 @@
1
+ require 'test/unit'
2
+ require 'gscraper/search/query'
3
+
4
+ class QueryResult < Test::Unit::TestCase
5
+
6
+ include GScraper
7
+
8
+ def setup
9
+ @query = Search::Query.new(:query => 'ruby')
10
+ end
11
+
12
+ def test_first_result
13
+ result = @query.first_result
14
+
15
+ assert_not_nil result, "The Query for 'ruby' has no first-result"
16
+ assert_equal result.rank, 1, "The first result for the Query 'ruby' does not have the rank of 1"
17
+ end
18
+
19
+ def test_second_result
20
+ result = @query.result_at(2)
21
+
22
+ assert_not_nil result, "The Query for 'ruby' has no second-result"
23
+ assert_equal result.rank, 2, "The second result for the Query 'ruby' does not have the rank of 2"
24
+ end
25
+
26
+ def teardown
27
+ @query = nil
28
+ end
29
+
30
+ end
@@ -2,3 +2,6 @@ $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__),'..','lib')
2
2
 
3
3
  require 'test/unit'
4
4
  require 'search/query_from_url'
5
+ require 'search/query_result'
6
+ require 'search/query_pages'
7
+ require 'search/page_results'
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: gscraper
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.3
7
- date: 2007-12-21 00:00:00 -08:00
6
+ version: 0.1.4
7
+ date: 2007-12-22 00:00:00 -08:00
8
8
  summary: A ruby web-scraping interface to various Google Services
9
9
  require_paths:
10
10
  - lib
@@ -48,6 +48,9 @@ files:
48
48
  - lib/gscraper/search.rb
49
49
  - test/test_gscraper.rb
50
50
  - test/search/query_from_url.rb
51
+ - test/search/query_result.rb
52
+ - test/search/query_pages.rb
53
+ - test/search/page_results.rb
51
54
  test_files:
52
55
  - test/test_gscraper.rb
53
56
  rdoc_options:
@@ -65,6 +68,24 @@ extensions: []
65
68
  requirements: []
66
69
 
67
70
  dependencies:
71
+ - !ruby/object:Gem::Dependency
72
+ name: hpricot
73
+ version_requirement:
74
+ version_requirements: !ruby/object:Gem::Version::Requirement
75
+ requirements:
76
+ - - ">"
77
+ - !ruby/object:Gem::Version
78
+ version: 0.0.0
79
+ version:
80
+ - !ruby/object:Gem::Dependency
81
+ name: mechanize
82
+ version_requirement:
83
+ version_requirements: !ruby/object:Gem::Version::Requirement
84
+ requirements:
85
+ - - ">"
86
+ - !ruby/object:Gem::Version
87
+ version: 0.0.0
88
+ version:
68
89
  - !ruby/object:Gem::Dependency
69
90
  name: hoe
70
91
  version_requirement: