gscraper 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,11 @@
1
+ == 0.1.7 / 2008-04-28
2
+
3
+ * Added support for specifing Search modifiers.
4
+
5
+ Search.query(:filetype => :xls)
6
+
7
+ * Added the Search::Result#page method.
8
+
1
9
  == 0.1.6 / 2008-03-15
2
10
 
3
11
  * Renamed GScraper.http_agent to GScraper.web_agent.
data/README.txt CHANGED
@@ -66,7 +66,7 @@ GScraper is a web-scraping interface to various Google Services.
66
66
  page = q.page(2)
67
67
 
68
68
  page.urls # => [...]
69
- pagesummaries # => [...]
69
+ page.summaries # => [...]
70
70
  page.ranks_of { |result| result.url =~ /^https/ } # => [...]
71
71
  page.titles_of { |result| result.summary =~ /password/ } # => [...]
72
72
  page.cached_pages # => [...]
@@ -127,7 +127,7 @@ GScraper is a web-scraping interface to various Google Services.
127
127
 
128
128
  The MIT License
129
129
 
130
- Copyright (c) 2007 Hal Brodigan
130
+ Copyright (c) 2007-2008 Hal Brodigan
131
131
 
132
132
  Permission is hereby granted, free of charge, to any person obtaining
133
133
  a copy of this software and associated documentation files (the
@@ -29,6 +29,39 @@ module GScraper
29
29
  # Search query
30
30
  attr_accessor :query
31
31
 
32
+ # Search 'link' modifier
33
+ attr_accessor :link
34
+
35
+ # Search 'related' modifier
36
+ attr_accessor :related
37
+
38
+ # Search 'info' modifier
39
+ attr_accessor :info
40
+
41
+ # Search 'site' modifier
42
+ attr_accessor :site
43
+
44
+ # Search 'filetype' modifier
45
+ attr_accessor :filetype
46
+
47
+ # Search 'allintitle' modifier
48
+ attr_accessor :allintitle
49
+
50
+ # Search 'intitle' modifier
51
+ attr_accessor :intitle
52
+
53
+ # Search 'allinurl' modifier
54
+ attr_accessor :allinurl
55
+
56
+ # Search 'inurl' modifier
57
+ attr_accessor :inurl
58
+
59
+ # Search 'allintext' modifier
60
+ attr_accessor :allintext
61
+
62
+ # Search 'intext' modifier
63
+ attr_accessor :intext
64
+
32
65
  # Search for results containing the exact phrase
33
66
  attr_accessor :exact_phrase
34
67
 
@@ -100,6 +133,20 @@ module GScraper
100
133
  @results_per_page = (options[:results_per_page] || RESULTS_PER_PAGE)
101
134
 
102
135
  @query = options[:query]
136
+
137
+ @link = options[:link]
138
+ @related = options[:related]
139
+ @info = options[:info]
140
+ @site = options[:site]
141
+ @filetype = options[:filetype]
142
+
143
+ @allintitle = options[:allintitle]
144
+ @intitle = options[:intitle]
145
+ @allinurl = options[:allinurl]
146
+ @inurl = options[:inurl]
147
+ @allintext = options[:allintext]
148
+ @intext = options[:intext]
149
+
103
150
  @exact_phrase = options[:exact_phrase]
104
151
  @with_words = options[:with_words]
105
152
  @without_words = options[:without_words]
@@ -248,18 +295,55 @@ module GScraper
248
295
  #
249
296
  def search_url
250
297
  url = URI(SEARCH_URL)
298
+ query_expr = []
299
+
300
+ set_param = lambda { |param,value|
301
+ url.query_params[param.to_s] = value if value
302
+ }
303
+
304
+ append_modifier = lambda { |name|
305
+ modifier = instance_variable_get("@#{name}")
306
+
307
+ query_expr << "#{name}:#{modifier}" if modifier
308
+ }
309
+
310
+ join_ops = lambda { |name|
311
+ ops = instance_variable_get("@#{name}")
312
+
313
+ if ops.kind_of?(Array)
314
+ query_expr << "#{name}:#{ops.join(' ')}"
315
+ elsif ops
316
+ query_expr << "#{name}:#{ops}"
317
+ end
318
+ }
319
+
320
+ set_param.call('num',@results_per_page)
321
+
322
+ query_expr << @query if @query
323
+
324
+ append_modifier.call(:link)
325
+ append_modifier.call(:related)
326
+ append_modifier.call(:info)
327
+ append_modifier.call(:site)
328
+ append_modifier.call(:filetype)
329
+
330
+ join_ops.call(:allintitle)
331
+ append_modifier.call(:intitle)
332
+ join_ops.call(:allinurl)
333
+ append_modifier.call(:inurl)
334
+ join_ops.call(:allintext)
335
+ append_modifier.call(:intext)
251
336
 
252
- if @results_per_page
253
- url.query_params['num'] = @results_per_page
337
+ unless query_expr.empty?
338
+ url.query_params['as_q'] = query_expr.join(' ')
254
339
  end
255
340
 
256
- url.query_params['as_q'] = @query if @query
257
- url.query_params['as_epq'] = @exact_phrase if @exact_phrase
258
- url.query_params['as_oq'] = @with_words if @with_words
259
- url.query_params['as_eq'] = @without_words if @without_words
341
+ set_param.call('as_epq',@exact_phrase)
342
+ set_param.call('as_oq',@with_words)
343
+ set_param.call('as_eq',@without_words)
260
344
 
261
- url.query_params['lr'] = @language if @language
262
- url.query_params['cr'] = @region if @region
345
+ set_param.call('lr',@language)
346
+ set_param.call('cr',@region)
263
347
 
264
348
  if @in_format
265
349
  url.query_params['as_ft'] = 'i'
@@ -38,6 +38,14 @@ module GScraper
38
38
  @similar_url = similar_url
39
39
  end
40
40
 
41
+ #
42
+ # Fetches the page of the result. If a _block_ is given it will be
43
+ # passed the page.
44
+ #
45
+ def page(&block)
46
+ get_page(@url,&block)
47
+ end
48
+
41
49
  #
42
50
  # Create a new Query for results that are similar to the Result. If
43
51
  # a _block_ is given, it will be passed the newly created Query
@@ -1,3 +1,3 @@
1
1
  module GScraper
2
- VERSION = '0.1.6'
2
+ VERSION = '0.1.7'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Postmodern Modulus III
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-03-15 00:00:00 -07:00
12
+ date: 2008-04-28 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
99
  requirements: []
100
100
 
101
101
  rubyforge_project: gscraper
102
- rubygems_version: 1.0.1
102
+ rubygems_version: 1.1.1
103
103
  signing_key:
104
104
  specification_version: 2
105
105
  summary: A ruby web-scraping interface to various Google Services