gscraper 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,11 @@
1
+ == 0.1.7 / 2008-04-28
2
+
3
+ * Added support for specifing Search modifiers.
4
+
5
+ Search.query(:filetype => :xls)
6
+
7
+ * Added the Search::Result#page method.
8
+
1
9
  == 0.1.6 / 2008-03-15
2
10
 
3
11
  * Renamed GScraper.http_agent to GScraper.web_agent.
data/README.txt CHANGED
@@ -66,7 +66,7 @@ GScraper is a web-scraping interface to various Google Services.
66
66
  page = q.page(2)
67
67
 
68
68
  page.urls # => [...]
69
- pagesummaries # => [...]
69
+ page.summaries # => [...]
70
70
  page.ranks_of { |result| result.url =~ /^https/ } # => [...]
71
71
  page.titles_of { |result| result.summary =~ /password/ } # => [...]
72
72
  page.cached_pages # => [...]
@@ -127,7 +127,7 @@ GScraper is a web-scraping interface to various Google Services.
127
127
 
128
128
  The MIT License
129
129
 
130
- Copyright (c) 2007 Hal Brodigan
130
+ Copyright (c) 2007-2008 Hal Brodigan
131
131
 
132
132
  Permission is hereby granted, free of charge, to any person obtaining
133
133
  a copy of this software and associated documentation files (the
@@ -29,6 +29,39 @@ module GScraper
29
29
  # Search query
30
30
  attr_accessor :query
31
31
 
32
+ # Search 'link' modifier
33
+ attr_accessor :link
34
+
35
+ # Search 'related' modifier
36
+ attr_accessor :related
37
+
38
+ # Search 'info' modifier
39
+ attr_accessor :info
40
+
41
+ # Search 'site' modifier
42
+ attr_accessor :site
43
+
44
+ # Search 'filetype' modifier
45
+ attr_accessor :filetype
46
+
47
+ # Search 'allintitle' modifier
48
+ attr_accessor :allintitle
49
+
50
+ # Search 'intitle' modifier
51
+ attr_accessor :intitle
52
+
53
+ # Search 'allinurl' modifier
54
+ attr_accessor :allinurl
55
+
56
+ # Search 'inurl' modifier
57
+ attr_accessor :inurl
58
+
59
+ # Search 'allintext' modifier
60
+ attr_accessor :allintext
61
+
62
+ # Search 'intext' modifier
63
+ attr_accessor :intext
64
+
32
65
  # Search for results containing the exact phrase
33
66
  attr_accessor :exact_phrase
34
67
 
@@ -100,6 +133,20 @@ module GScraper
100
133
  @results_per_page = (options[:results_per_page] || RESULTS_PER_PAGE)
101
134
 
102
135
  @query = options[:query]
136
+
137
+ @link = options[:link]
138
+ @related = options[:related]
139
+ @info = options[:info]
140
+ @site = options[:site]
141
+ @filetype = options[:filetype]
142
+
143
+ @allintitle = options[:allintitle]
144
+ @intitle = options[:intitle]
145
+ @allinurl = options[:allinurl]
146
+ @inurl = options[:inurl]
147
+ @allintext = options[:allintext]
148
+ @intext = options[:intext]
149
+
103
150
  @exact_phrase = options[:exact_phrase]
104
151
  @with_words = options[:with_words]
105
152
  @without_words = options[:without_words]
@@ -248,18 +295,55 @@ module GScraper
248
295
  #
249
296
  def search_url
250
297
  url = URI(SEARCH_URL)
298
+ query_expr = []
299
+
300
+ set_param = lambda { |param,value|
301
+ url.query_params[param.to_s] = value if value
302
+ }
303
+
304
+ append_modifier = lambda { |name|
305
+ modifier = instance_variable_get("@#{name}")
306
+
307
+ query_expr << "#{name}:#{modifier}" if modifier
308
+ }
309
+
310
+ join_ops = lambda { |name|
311
+ ops = instance_variable_get("@#{name}")
312
+
313
+ if ops.kind_of?(Array)
314
+ query_expr << "#{name}:#{ops.join(' ')}"
315
+ elsif ops
316
+ query_expr << "#{name}:#{ops}"
317
+ end
318
+ }
319
+
320
+ set_param.call('num',@results_per_page)
321
+
322
+ query_expr << @query if @query
323
+
324
+ append_modifier.call(:link)
325
+ append_modifier.call(:related)
326
+ append_modifier.call(:info)
327
+ append_modifier.call(:site)
328
+ append_modifier.call(:filetype)
329
+
330
+ join_ops.call(:allintitle)
331
+ append_modifier.call(:intitle)
332
+ join_ops.call(:allinurl)
333
+ append_modifier.call(:inurl)
334
+ join_ops.call(:allintext)
335
+ append_modifier.call(:intext)
251
336
 
252
- if @results_per_page
253
- url.query_params['num'] = @results_per_page
337
+ unless query_expr.empty?
338
+ url.query_params['as_q'] = query_expr.join(' ')
254
339
  end
255
340
 
256
- url.query_params['as_q'] = @query if @query
257
- url.query_params['as_epq'] = @exact_phrase if @exact_phrase
258
- url.query_params['as_oq'] = @with_words if @with_words
259
- url.query_params['as_eq'] = @without_words if @without_words
341
+ set_param.call('as_epq',@exact_phrase)
342
+ set_param.call('as_oq',@with_words)
343
+ set_param.call('as_eq',@without_words)
260
344
 
261
- url.query_params['lr'] = @language if @language
262
- url.query_params['cr'] = @region if @region
345
+ set_param.call('lr',@language)
346
+ set_param.call('cr',@region)
263
347
 
264
348
  if @in_format
265
349
  url.query_params['as_ft'] = 'i'
@@ -38,6 +38,14 @@ module GScraper
38
38
  @similar_url = similar_url
39
39
  end
40
40
 
41
+ #
42
+ # Fetches the page of the result. If a _block_ is given it will be
43
+ # passed the page.
44
+ #
45
+ def page(&block)
46
+ get_page(@url,&block)
47
+ end
48
+
41
49
  #
42
50
  # Create a new Query for results that are similar to the Result. If
43
51
  # a _block_ is given, it will be passed the newly created Query
@@ -1,3 +1,3 @@
1
1
  module GScraper
2
- VERSION = '0.1.6'
2
+ VERSION = '0.1.7'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Postmodern Modulus III
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-03-15 00:00:00 -07:00
12
+ date: 2008-04-28 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
99
  requirements: []
100
100
 
101
101
  rubyforge_project: gscraper
102
- rubygems_version: 1.0.1
102
+ rubygems_version: 1.1.1
103
103
  signing_key:
104
104
  specification_version: 2
105
105
  summary: A ruby web-scraping interface to various Google Services