gscraper 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,7 +1,22 @@
1
+ == 0.1.4 / 2007-12-23
2
+
3
+ * Added Search::Query#result_at for easier access of a single result at
4
+ a given index.
5
+ * Adding scraping of the "Cached" and "Similar Pages" URLs of Search
6
+ Results.
7
+ * Added methods to Search::Page for accessing cached URLs, cached pages,
8
+ similar query URLs and similar Queries in mass.
9
+ * Search::Query#page and Search::Query#first_page now can receive blocks.
10
+ * Improved the formating of URL query parameters.
11
+ * Added more unit-tests.
12
+ * Fixed scraping of Search Result summaries.
13
+ * Fixed various bugs in Search::Query uncovered during unit-testing.
14
+ * Fixed typos in Search::Page's documentation.
15
+
1
16
  == 0.1.3 / 2007-12-22
2
17
 
3
- * Added the Page class, which contains many of convenance methods for
4
- searching through the results within a Page.
18
+ * Added the Search::Page class, which contains many of convenance methods
19
+ for searching through the results within a Page.
5
20
 
6
21
  == 0.1.2 / 2007-12-22
7
22
 
data/Manifest.txt CHANGED
@@ -17,3 +17,6 @@ lib/gscraper/search/search.rb
17
17
  lib/gscraper/search.rb
18
18
  test/test_gscraper.rb
19
19
  test/search/query_from_url.rb
20
+ test/search/query_result.rb
21
+ test/search/query_pages.rb
22
+ test/search/page_results.rb
data/Rakefile CHANGED
@@ -12,6 +12,7 @@ Hoe.new('gscraper', GScraper::VERSION) do |p|
12
12
  p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
13
13
  p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
14
14
  p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
15
+ p.extra_deps = ['hpricot', 'mechanize']
15
16
  end
16
17
 
17
18
  # vim: syntax=Ruby
@@ -4,12 +4,14 @@ module URI
4
4
  # Query parameters
5
5
  attr_reader :query_params
6
6
 
7
+ alias_method :old_initialize, :initialize
8
+
7
9
  #
8
10
  # Creates a new URI::HTTP object and initializes query_params as a
9
11
  # new Hash.
10
12
  #
11
13
  def initialize(*args)
12
- super(*args)
14
+ old_initialize(*args)
13
15
 
14
16
  @query_params = {}
15
17
  parse_query_params
@@ -57,7 +59,11 @@ module URI
57
59
  if value==true
58
60
  "#{name}=active"
59
61
  elsif value
60
- "#{name}=#{URI.encode(value.to_s)}"
62
+ if value.kind_of?(Array)
63
+ "#{name}=#{URI.encode(value.join(' '))}"
64
+ else
65
+ "#{name}=#{URI.encode(value.to_s)}"
66
+ end
61
67
  else
62
68
  "#{name}="
63
69
  end
@@ -13,7 +13,7 @@ module GScraper
13
13
  # Returns the GScraper User-Agent
14
14
  #
15
15
  def GScraper.user_agent
16
- @user_agent
16
+ @user_agent ||= nil
17
17
  end
18
18
 
19
19
  #
@@ -24,10 +24,11 @@ module GScraper
24
24
  end
25
25
 
26
26
  #
27
- # Opens the _uri_ with the given _opts_. The contents of the _uri_ will
28
- # be returned.
27
+ # Opens the _uri_ with the given _opts_. The contents of the _uri_ will be
28
+ # returned.
29
29
  #
30
30
  # GScraper.open('http://www.hackety.org/')
31
+ #
31
32
  # GScraper.open('http://tenderlovemaking.com/',
32
33
  # :user_agent_alias => 'Linux Mozilla')
33
34
  # GScraper.open('http://www.wired.com/', :user_agent => 'the future')
@@ -115,77 +115,174 @@ module GScraper
115
115
 
116
116
  #
117
117
  # Returns an Array containing the ranks of the results within the
118
- # Page. If _block_ is given, each rank will be passed to the _block_.
118
+ # Page.
119
119
  #
120
120
  # page.ranks # => [...]
121
121
  #
122
- # page.ranks do |rank|
123
- # puts ranks
124
- # end
125
- #
126
- def ranks(&block)
127
- mapped = map { |result| result.rank }
128
-
129
- mapped.each(&block) if block
130
- return mapped
122
+ def ranks
123
+ map { |result| result.rank }
131
124
  end
132
125
 
133
126
  #
134
127
  # Returns an Array containing the titles of the results within the
135
- # Page. If _block_ is given, each title will be passed to the _block_.
128
+ # Page.
136
129
  #
137
130
  # page.titles # => [...]
138
131
  #
139
- # page.titles do |title|
140
- # puts title
141
- # end
142
- #
143
- def titles(&block)
144
- mapped = map { |result| result.title }
145
-
146
- mapped.each(&block) if block
147
- return mapped
132
+ def titles
133
+ map { |result| result.title }
148
134
  end
149
135
 
150
136
  #
151
137
  # Returns an Array containing the URLs of the results within the
152
- # Page. If _block_ is given, each URL will be passed to the _block_.
138
+ # Page.
153
139
  #
154
140
  # page.urls # => [...]
155
141
  #
156
- # page.urls do |url|
157
- # puts url
158
- # end
159
- #
160
- def urls(&block)
161
- mapped = map { |result| result.url }
162
-
163
- mapped.each(&block) if block
164
- return mapped
142
+ def urls
143
+ map { |result| result.url }
165
144
  end
166
145
 
167
146
  #
168
147
  # Returns an Array containing the summaries of the results within the
169
- # Page. If _block_ is given, each summary will be passed to the
170
- # _block_.
148
+ # Page.
171
149
  #
172
150
  # page.summaries # => [...]
173
151
  #
174
- # page.summaries do |summary|
175
- # puts summary
176
- # end
152
+ def summaries
153
+ map { |result| result.summary }
154
+ end
155
+
156
+ #
157
+ # Returns an Array containing the cached URLs of the results within
158
+ # the Page.
159
+ #
160
+ # page.cached_urls # => [...]
177
161
  #
178
- def summaries(&block)
179
- mapped = map { |result| result.summaries }
162
+ def cached_urls
163
+ map { |result| result.cached_url }
164
+ end
180
165
 
181
- mapped.each(&block) if block
182
- return mapped
166
+ #
167
+ # Returns an Array containing the cached pages of the results within
168
+ # the Page. If _opts_ are given, they will be used in accessing the
169
+ # cached page.
170
+ #
171
+ # page.cached_pages # => [...]
172
+ #
173
+ def cached_pages(opts={})
174
+ map { |result| result.cached_page(opts) }
175
+ end
176
+
177
+ #
178
+ # Returns an Array containing the similar Query URLs of the results
179
+ # within the Page.
180
+ #
181
+ # page.similar_urls # => [...]
182
+ #
183
+ def similar_urls
184
+ map { |result| result.similar_url }
185
+ end
186
+
187
+ #
188
+ # Returns an Array containing the similar Queries of the results
189
+ # within the Page.
190
+ #
191
+ # page.similar_queries # => [...]
192
+ #
193
+ def similar_queries
194
+ map { |result| result.similar_query }
195
+ end
196
+
197
+ #
198
+ # Iterates over each result's rank within the Page, passing each to
199
+ # the given _block_.
200
+ #
201
+ # each_rank { |rank| puts rank }
202
+ #
203
+ def each_rank(&block)
204
+ ranks.each(&block)
205
+ end
206
+
207
+ #
208
+ # Iterates over each result's title within the Page, passing each to
209
+ # the given _block_.
210
+ #
211
+ # each_title { |title| puts title }
212
+ #
213
+ def each_title(&block)
214
+ titles.each(&block)
215
+ end
216
+
217
+ #
218
+ # Iterates over each result's url within the Page, passing each to
219
+ # the given _block_.
220
+ #
221
+ # each_url { |url| puts url }
222
+ #
223
+ def each_url(&block)
224
+ urls.each(&block)
225
+ end
226
+
227
+ #
228
+ # Iterates over each result's summary within the Page, passing each
229
+ # to the given _block_.
230
+ #
231
+ # each_summary { |summary| puts summary }
232
+ #
233
+ def each_summary(&block)
234
+ summaries.each(&block)
235
+ end
236
+
237
+ #
238
+ # Iterates over each result's cached URLs within the Page, passing
239
+ # each to the given _block_.
240
+ #
241
+ # each_cached_url { |url| puts url }
242
+ #
243
+ def each_cached_url(&block)
244
+ cached_urls.each(&block)
245
+ end
246
+
247
+ #
248
+ # Iterates over each result's cached pages within the Page, passing
249
+ # each to the given _block_. If _opts_ are given, they will be used
250
+ # in accessing the cached pages.
251
+ #
252
+ # each_cached_page { |page| puts page.readlines }
253
+ #
254
+ def each_cached_page(opts={},&block)
255
+ cached_pages(opts).each(&block)
256
+ end
257
+
258
+ #
259
+ # Iterates over each result's similar Query URLs within the Page,
260
+ # passing each to the given _block_.
261
+ #
262
+ # each_similar_url { |url| puts url }
263
+ #
264
+ def each_similar_url(&block)
265
+ similar_urls.each(&block)
266
+ end
267
+
268
+ #
269
+ # Iterates over each result's similar Query within the Page, passing
270
+ # each to the given _block_.
271
+ #
272
+ # each_similar_query do |q|
273
+ # q.first_page do |page|
274
+ # puts page.urls.join("\n")
275
+ # end
276
+ # end
277
+ #
278
+ def each_similar_query(&block)
279
+ similar_queries.each(&block)
183
280
  end
184
281
 
185
282
  #
186
283
  # Returns the ranks of the results that match the specified _block_.
187
284
  #
188
- # page.ranks_of { |result result.title =~ /awesome/ }
285
+ # page.ranks_of { |result| result.title =~ /awesome/ }
189
286
  #
190
287
  def ranks_of(&block)
191
288
  results_with(&block).ranks
@@ -194,7 +291,7 @@ module GScraper
194
291
  #
195
292
  # Returns the titles of the results that match the specified _block_.
196
293
  #
197
- # page.titles_of { |result result.url.include?('www') }
294
+ # page.titles_of { |result| result.url.include?('www') }
198
295
  #
199
296
  def titles_of(&block)
200
297
  results_with(&block).titles
@@ -203,7 +300,7 @@ module GScraper
203
300
  #
204
301
  # Returns the urls of the results that match the specified _block_.
205
302
  #
206
- # page.urls_of { |result result.summary =~ /awesome pants/ }
303
+ # page.urls_of { |result| result.summary =~ /awesome pants/ }
207
304
  #
208
305
  def urls_of(&block)
209
306
  results_with(&block).urls
@@ -213,12 +310,53 @@ module GScraper
213
310
  # Returns the summaries of the results that match the specified
214
311
  # _block_.
215
312
  #
216
- # page.summaries_of { |result result.title =~ /what if/ }
313
+ # page.summaries_of { |result| result.title =~ /what if/ }
217
314
  #
218
315
  def summaries_of(&block)
219
316
  results_with(&block).summaries
220
317
  end
221
318
 
319
+ #
320
+ # Returns the cached URLs of the results that match the specified
321
+ # _block_.
322
+ #
323
+ # page.cached_urls_of { |result| result.title =~ /howdy/ }
324
+ #
325
+ def cached_urls_of(&block)
326
+ results_with(&block).cached_urls
327
+ end
328
+
329
+ #
330
+ # Returns the cached pages of the results that match the specified
331
+ # _block_. If _opts_ are given, they will be used in accessing
332
+ # the cached pages.
333
+ #
334
+ # page.cached_pages_of { |result| result.title =~ /dude/ }
335
+ #
336
+ def cached_pages_of(opts={},&block)
337
+ results_with(&block).cached_pages(opts)
338
+ end
339
+
340
+ #
341
+ # Returns the similar query URLs of the results that match the
342
+ # specified _block_.
343
+ #
344
+ # page.similar_urls_of { |result| result.title =~ /what if/ }
345
+ #
346
+ def similar_urls_of(&block)
347
+ results_with(&block).similar_urls
348
+ end
349
+
350
+ #
351
+ # Returns the similar Queries of the results that match the
352
+ # specified _block_.
353
+ #
354
+ # page.similar_queries_of { |result| result.title =~ /hackety/ }
355
+ #
356
+ def similar_queries_of(&block)
357
+ results_with(&block).similar_queries
358
+ end
359
+
222
360
  end
223
361
  end
224
362
  end
@@ -10,7 +10,8 @@ module GScraper
10
10
  module Search
11
11
  class Query
12
12
 
13
- SEARCH_URL = 'http://www.google.com/search'
13
+ SEARCH_HOST = 'www.google.com'
14
+ SEARCH_URL = "http://#{SEARCH_HOST}/search"
14
15
 
15
16
  RESULTS_PER_PAGE = 10
16
17
 
@@ -90,7 +91,7 @@ module GScraper
90
91
  def initialize(opts={},&block)
91
92
  super()
92
93
 
93
- @results_per_page = opts[:results_per_page] || RESULTS_PER_PAGE
94
+ @results_per_page = (opts[:results_per_page] || RESULTS_PER_PAGE)
94
95
 
95
96
  @query = opts[:query]
96
97
  @exact_phrase = opts[:exact_phrase]
@@ -104,12 +105,29 @@ module GScraper
104
105
 
105
106
  if opts[:within_past_day]
106
107
  @within_past_day = opts[:within_past_day]
108
+ @within_past_week = false
109
+ @within_past_months = false
110
+ @within_past_year = false
107
111
  elsif opts[:within_past_week]
112
+ @within_past_day = false
108
113
  @within_past_week = opts[:within_past_week]
114
+ @within_past_months = false
115
+ @within_past_year = false
109
116
  elsif opts[:within_past_months]
117
+ @within_past_day = false
118
+ @within_past_week = false
110
119
  @within_past_months = opts[:within_past_months]
120
+ @within_past_year = false
111
121
  elsif opts[:within_past_year]
122
+ @within_past_day = false
123
+ @within_past_week = false
124
+ @within_past_months = false
112
125
  @within_past_year = opts[:within_past_year]
126
+ else
127
+ @within_past_day = false
128
+ @within_past_week = false
129
+ @within_past_months = false
130
+ @within_past_year = false
113
131
  end
114
132
 
115
133
  @numeric_range = opts[:numeric_range]
@@ -318,7 +336,7 @@ module GScraper
318
336
  def page_url(page_index)
319
337
  url = search_url
320
338
 
321
- url.query_params['start'] = page_index_offset(page_index)
339
+ url.query_params['start'] = page_result_offset(page_index)
322
340
  url.query_params['sa'] = 'N'
323
341
 
324
342
  return url
@@ -327,33 +345,67 @@ module GScraper
327
345
  #
328
346
  # Returns a Page object containing Result objects at the specified
329
347
  # _page_index_. If _opts_ are given, they will be used in accessing
330
- # the SEARCH_URL.
348
+ # the SEARCH_URL. If a _block_ is given, it will be passed the newly
349
+ # created Page.
331
350
  #
332
- def page(page_index,opts={})
351
+ def page(page_index,opts={},&block)
333
352
  doc = Hpricot(GScraper.open(page_url(page_index),opts))
353
+
334
354
  new_page = Page.new
355
+ results = doc.search('//div.g')[0...@results_per_page.to_i]
356
+
357
+ results.each_with_index do |result,index|
358
+ rank = page_result_offset(page_index) + (index + 1)
359
+ title = result.at('//h2.r').inner_text
360
+ url = result.at('//h2.r/a').get_attribute('href')
361
+
362
+ summary = result.at('//td.j//font').children[0...-3].inject('') do |accum,elem|
363
+ accum + elem.inner_text
364
+ end
365
+
366
+ cached_url = nil
367
+ similar_url = nil
335
368
 
336
- doc.search('//div.g').each_with_index do |result,index|
337
- rank = page_index_offset(page_index) + (index + 1)
338
- title = result.search('//h2.r').first.inner_text
339
- url = result.search('//h2.r/a').first.get_attribute('href')
340
- # TODO: exclude URL and Links from summary text
341
- summary = result.search('//td.j').first.inner_text
369
+ if (cached_link = result.at('//td.j//font/nobr/a:first'))
370
+ cached_url = cached_link.get_attribute('href')
371
+ end
342
372
 
343
- # TODO: scrape Cached and Similar links
373
+ if (similar_link = result.at('//td.j//font/nobr/a:last'))
374
+ similar_url = "http://#{SEARCH_HOST}" + similar_link.get_attribute('href')
375
+ end
344
376
 
345
- new_page << Result.new(rank,title,url,summary)
377
+ new_page << Result.new(rank,title,url,summary,cached_url,similar_url)
346
378
  end
347
379
 
380
+ block.call(new_page) if block
348
381
  return new_page
349
382
  end
350
383
 
351
384
  #
352
- # Returns the results on the first page. If _opts_ are given, they
353
- # will be used in accessing the SEARCH_URL.
385
+ # Returns the Results on the first page. If _opts_ are given, they
386
+ # will be used in accessing the SEARCH_URL. If a _block_ is given
387
+ # it will be passed the newly created Page.
388
+ #
389
+ def first_page(opts={},&block)
390
+ page(1,opts,&block)
391
+ end
392
+
393
+ #
394
+ # Returns the Result at the specified _index_. If _opts_ are given,
395
+ # they will be used in accessing the Page containing the requested
396
+ # Result.
354
397
  #
355
- def first_page(opts={})
356
- page(1,opts)
398
+ def result_at(index,opts={})
399
+ page(result_page_index(index),opts)[page_result_index(index)]
400
+ end
401
+
402
+ #
403
+ # Returns the first Result at the specified _index_. If _opts_ are
404
+ # given, they will be used in accessing the Page containing the
405
+ # requested Result.
406
+ #
407
+ def first_result(opts={})
408
+ result_at(1,opts)
357
409
  end
358
410
 
359
411
  #
@@ -387,8 +439,22 @@ module GScraper
387
439
  #
388
440
  # Returns the rank offset for the specified _page_index_.
389
441
  #
390
- def page_index_offset(page_index)
391
- (page_index.to_i - 1) * @result_per_page.to_i
442
+ def page_result_offset(page_index)
443
+ (page_index.to_i - 1) * @results_per_page.to_i
444
+ end
445
+
446
+ #
447
+ # Returns the in-Page index of the _result_index_.
448
+ #
449
+ def page_result_index(result_index)
450
+ (result_index.to_i - 1) % @results_per_page.to_i
451
+ end
452
+
453
+ #
454
+ # Returns the page index for the specified _result_index_
455
+ #
456
+ def result_page_index(result_index)
457
+ ((result_index.to_i - 1) / @results_per_page.to_i) + 1
392
458
  end
393
459
 
394
460
  end
@@ -14,15 +14,54 @@ module GScraper
14
14
  # Summary from the result page
15
15
  attr_reader :summary
16
16
 
17
+ # URL of the cached result page
18
+ attr_reader :cached_url
19
+
20
+ # URL of the similar results Query
21
+ attr_reader :similar_url
22
+
17
23
  #
18
24
  # Creates a new Result object with the given _rank_, _title_
19
- # _summary_, _url_ and _size_.
25
+ # _summary_, _url_, _size_, _cache_url_ and _similar_url_.
20
26
  #
21
- def initialize(rank,title,url,summary)
27
+ def initialize(rank,title,url,summary,cached_url=nil,similar_url=nil)
22
28
  @rank = rank
23
29
  @title = title
24
30
  @url = url
25
31
  @summary = summary
32
+ @cached_url = cached_url
33
+ @similar_url = similar_url
34
+ end
35
+
36
+ #
37
+ # Opens the URL of the cached page for the Result. If _opts_ are
38
+ # given, they will be used in accessing the cached page URL.
39
+ #
40
+ # result.cached_page # => File
41
+ #
42
+ def cached_page(opts={})
43
+ if @cached_url
44
+ return GScraper.open(@cached_url,opts)
45
+ end
46
+ end
47
+
48
+ #
49
+ # Create a new Query for results that are similar to the Result. If
50
+ # a _block_ is given, it will be passed the newly created Query
51
+ # object.
52
+ #
53
+ # result.similar_query # => Query
54
+ #
55
+ # result.similar_query do |q|
56
+ # q.first_page.each_url do |url|
57
+ # puts url
58
+ # end
59
+ # end
60
+ #
61
+ def similar_query(&block)
62
+ if @similar_url
63
+ return Query.from_url(@similar_url,&block)
64
+ end
26
65
  end
27
66
 
28
67
  #
@@ -1,3 +1,3 @@
1
1
  module GScraper
2
- VERSION = '0.1.3'
2
+ VERSION = '0.1.4'
3
3
  end
@@ -0,0 +1,103 @@
1
+ require 'test/unit'
2
+ require 'gscraper/search/page'
3
+ require 'gscraper/search/query'
4
+
5
+ class PageResults < Test::Unit::TestCase
6
+
7
+ include GScraper
8
+
9
+ def setup
10
+ @query = Search::Query.new(:query => 'ruby')
11
+ @page = @query.first_page
12
+ end
13
+
14
+ def test_results_per_page
15
+ assert_equal @page.length, @query.results_per_page
16
+ end
17
+
18
+ def test_first_result
19
+ assert_not_nil @page[0], "First Page for Query 'ruby' does not have a first Result"
20
+ end
21
+
22
+ def test_last_result
23
+ assert_not_nil @page[-1], "First Page for Query 'ruby' does not have a last Result"
24
+ end
25
+
26
+ def test_ranks
27
+ ranks = @page.ranks
28
+
29
+ assert_not_nil ranks, "First Page for Query 'ruby' does not have any ranks"
30
+
31
+ assert_equal ranks.class, Array, "The ranks of a Page must be an Array"
32
+
33
+ assert_equal ranks.empty?, false, "The ranks of the First Page are empty"
34
+
35
+ assert_equal ranks.length, @page.length
36
+ end
37
+
38
+ def test_titles
39
+ titles = @page.titles
40
+
41
+ assert_not_nil titles, "First Page for Query 'ruby' does not have any titles"
42
+
43
+ assert_equal titles.class, Array, "The titles of a Page must be an Array"
44
+
45
+ assert_equal titles.empty?, false, "The titles of the First Page are empty"
46
+
47
+ assert_equal titles.length, @page.length
48
+ end
49
+
50
+ def test_urls
51
+ urls = @page.urls
52
+
53
+ assert_not_nil urls, "First Page for Query 'ruby' does not have any urls"
54
+
55
+ assert_equal urls.class, Array, "The urls of a Page must be an Array"
56
+
57
+ assert_equal urls.empty?, false, "The urls of the First Page are empty"
58
+
59
+ assert_equal urls.length, @page.length
60
+ end
61
+
62
+ def test_summaries
63
+ summaries = @page.summaries
64
+
65
+ assert_not_nil summaries, "First Page for Query 'ruby' does not have any summaries"
66
+
67
+ assert_equal summaries.class, Array, "The summaries of a Page must be an Array"
68
+
69
+ assert_equal summaries.empty?, false, "The summaries of the First Page are empty"
70
+
71
+ assert_equal summaries.length, @page.length
72
+ end
73
+
74
+ def test_cached_urls
75
+ cached_urls = @page.cached_urls
76
+
77
+ assert_not_nil cached_urls, "First Page for Query 'ruby' does not have any cached_urls"
78
+
79
+ assert_equal cached_urls.class, Array, "The cached_urls of a Page must be an Array"
80
+
81
+ assert_equal cached_urls.empty?, false, "The cached_urls of the First Page are empty"
82
+
83
+ assert_equal cached_urls.length, @page.length
84
+ end
85
+
86
+ def test_similar_urls
87
+ similar_urls = @page.similar_urls
88
+
89
+ assert_not_nil similar_urls, "First Page for Query 'ruby' does not have any similar URLs"
90
+
91
+ assert_equal similar_urls.class, Array, "The similar URLs of a Page must be an Array"
92
+
93
+ assert_equal similar_urls.empty?, false, "The similar URLs of the First Page are empty"
94
+
95
+ assert_equal similar_urls.length, @page.length
96
+ end
97
+
98
+ def teardown
99
+ @page = nil
100
+ @query = nil
101
+ end
102
+
103
+ end
@@ -11,10 +11,6 @@ class QueryFromURL < Test::Unit::TestCase
11
11
  @query = Search::Query.from_url(QUERY_URL)
12
12
  end
13
13
 
14
- def teardown
15
- @query = nil
16
- end
17
-
18
14
  def test_query
19
15
  assert_equal @query.query, 'test'
20
16
  end
@@ -47,4 +43,8 @@ class QueryFromURL < Test::Unit::TestCase
47
43
  assert_nil @query.links_to
48
44
  end
49
45
 
46
+ def teardown
47
+ @query = nil
48
+ end
49
+
50
50
  end
@@ -0,0 +1,32 @@
1
+ require 'test/unit'
2
+ require 'gscraper/search/query'
3
+
4
+ class QueryPages < Test::Unit::TestCase
5
+
6
+ include GScraper
7
+
8
+ def setup
9
+ @query = Search::Query.new(:query => 'ruby')
10
+ end
11
+
12
+ def test_first_page
13
+ page = @query.first_page
14
+
15
+ assert_not_nil page
16
+ assert_equal page.empty?, false, "Query of 'ruby' has zero results"
17
+ assert_equal page.length, @query.results_per_page
18
+ end
19
+
20
+ def test_second_page
21
+ page = @query.page(2)
22
+
23
+ assert_not_nil page
24
+ assert_equal page.empty?, false, "Query of 'ruby' has zero results"
25
+ assert_equal page.length, @query.results_per_page
26
+ end
27
+
28
+ def teardown
29
+ @query = nil
30
+ end
31
+
32
+ end
@@ -0,0 +1,30 @@
1
+ require 'test/unit'
2
+ require 'gscraper/search/query'
3
+
4
+ class QueryResult < Test::Unit::TestCase
5
+
6
+ include GScraper
7
+
8
+ def setup
9
+ @query = Search::Query.new(:query => 'ruby')
10
+ end
11
+
12
+ def test_first_result
13
+ result = @query.first_result
14
+
15
+ assert_not_nil result, "The Query for 'ruby' has no first-result"
16
+ assert_equal result.rank, 1, "The first result for the Query 'ruby' does not have the rank of 1"
17
+ end
18
+
19
+ def test_second_result
20
+ result = @query.result_at(2)
21
+
22
+ assert_not_nil result, "The Query for 'ruby' has no second-result"
23
+ assert_equal result.rank, 2, "The second result for the Query 'ruby' does not have the rank of 2"
24
+ end
25
+
26
+ def teardown
27
+ @query = nil
28
+ end
29
+
30
+ end
@@ -2,3 +2,6 @@ $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__),'..','lib')
2
2
 
3
3
  require 'test/unit'
4
4
  require 'search/query_from_url'
5
+ require 'search/query_result'
6
+ require 'search/query_pages'
7
+ require 'search/page_results'
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: gscraper
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.3
7
- date: 2007-12-21 00:00:00 -08:00
6
+ version: 0.1.4
7
+ date: 2007-12-22 00:00:00 -08:00
8
8
  summary: A ruby web-scraping interface to various Google Services
9
9
  require_paths:
10
10
  - lib
@@ -48,6 +48,9 @@ files:
48
48
  - lib/gscraper/search.rb
49
49
  - test/test_gscraper.rb
50
50
  - test/search/query_from_url.rb
51
+ - test/search/query_result.rb
52
+ - test/search/query_pages.rb
53
+ - test/search/page_results.rb
51
54
  test_files:
52
55
  - test/test_gscraper.rb
53
56
  rdoc_options:
@@ -65,6 +68,24 @@ extensions: []
65
68
  requirements: []
66
69
 
67
70
  dependencies:
71
+ - !ruby/object:Gem::Dependency
72
+ name: hpricot
73
+ version_requirement:
74
+ version_requirements: !ruby/object:Gem::Version::Requirement
75
+ requirements:
76
+ - - ">"
77
+ - !ruby/object:Gem::Version
78
+ version: 0.0.0
79
+ version:
80
+ - !ruby/object:Gem::Dependency
81
+ name: mechanize
82
+ version_requirement:
83
+ version_requirements: !ruby/object:Gem::Version::Requirement
84
+ requirements:
85
+ - - ">"
86
+ - !ruby/object:Gem::Version
87
+ version: 0.0.0
88
+ version:
68
89
  - !ruby/object:Gem::Dependency
69
90
  name: hoe
70
91
  version_requirement: