gscraper 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +8 -0
- data/README.txt +2 -2
- data/lib/gscraper/search/query.rb +92 -8
- data/lib/gscraper/search/result.rb +8 -0
- data/lib/gscraper/version.rb +1 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.txt
CHANGED
@@ -66,7 +66,7 @@ GScraper is a web-scraping interface to various Google Services.
|
|
66
66
|
page = q.page(2)
|
67
67
|
|
68
68
|
page.urls # => [...]
|
69
|
-
|
69
|
+
page.summaries # => [...]
|
70
70
|
page.ranks_of { |result| result.url =~ /^https/ } # => [...]
|
71
71
|
page.titles_of { |result| result.summary =~ /password/ } # => [...]
|
72
72
|
page.cached_pages # => [...]
|
@@ -127,7 +127,7 @@ GScraper is a web-scraping interface to various Google Services.
|
|
127
127
|
|
128
128
|
The MIT License
|
129
129
|
|
130
|
-
Copyright (c) 2007 Hal Brodigan
|
130
|
+
Copyright (c) 2007-2008 Hal Brodigan
|
131
131
|
|
132
132
|
Permission is hereby granted, free of charge, to any person obtaining
|
133
133
|
a copy of this software and associated documentation files (the
|
@@ -29,6 +29,39 @@ module GScraper
|
|
29
29
|
# Search query
|
30
30
|
attr_accessor :query
|
31
31
|
|
32
|
+
# Search 'link' modifier
|
33
|
+
attr_accessor :link
|
34
|
+
|
35
|
+
# Search 'related' modifier
|
36
|
+
attr_accessor :related
|
37
|
+
|
38
|
+
# Search 'info' modifier
|
39
|
+
attr_accessor :info
|
40
|
+
|
41
|
+
# Search 'site' modifier
|
42
|
+
attr_accessor :site
|
43
|
+
|
44
|
+
# Search 'filetype' modifier
|
45
|
+
attr_accessor :filetype
|
46
|
+
|
47
|
+
# Search 'allintitle' modifier
|
48
|
+
attr_accessor :allintitle
|
49
|
+
|
50
|
+
# Search 'intitle' modifier
|
51
|
+
attr_accessor :intitle
|
52
|
+
|
53
|
+
# Search 'allinurl' modifier
|
54
|
+
attr_accessor :allinurl
|
55
|
+
|
56
|
+
# Search 'inurl' modifier
|
57
|
+
attr_accessor :inurl
|
58
|
+
|
59
|
+
# Search 'allintext' modifier
|
60
|
+
attr_accessor :allintext
|
61
|
+
|
62
|
+
# Search 'intext' modifier
|
63
|
+
attr_accessor :intext
|
64
|
+
|
32
65
|
# Search for results containing the exact phrase
|
33
66
|
attr_accessor :exact_phrase
|
34
67
|
|
@@ -100,6 +133,20 @@ module GScraper
|
|
100
133
|
@results_per_page = (options[:results_per_page] || RESULTS_PER_PAGE)
|
101
134
|
|
102
135
|
@query = options[:query]
|
136
|
+
|
137
|
+
@link = options[:link]
|
138
|
+
@related = options[:related]
|
139
|
+
@info = options[:info]
|
140
|
+
@site = options[:site]
|
141
|
+
@filetype = options[:filetype]
|
142
|
+
|
143
|
+
@allintitle = options[:allintitle]
|
144
|
+
@intitle = options[:intitle]
|
145
|
+
@allinurl = options[:allinurl]
|
146
|
+
@inurl = options[:inurl]
|
147
|
+
@allintext = options[:allintext]
|
148
|
+
@intext = options[:intext]
|
149
|
+
|
103
150
|
@exact_phrase = options[:exact_phrase]
|
104
151
|
@with_words = options[:with_words]
|
105
152
|
@without_words = options[:without_words]
|
@@ -248,18 +295,55 @@ module GScraper
|
|
248
295
|
#
|
249
296
|
def search_url
|
250
297
|
url = URI(SEARCH_URL)
|
298
|
+
query_expr = []
|
299
|
+
|
300
|
+
set_param = lambda { |param,value|
|
301
|
+
url.query_params[param.to_s] = value if value
|
302
|
+
}
|
303
|
+
|
304
|
+
append_modifier = lambda { |name|
|
305
|
+
modifier = instance_variable_get("@#{name}")
|
306
|
+
|
307
|
+
query_expr << "#{name}:#{modifier}" if modifier
|
308
|
+
}
|
309
|
+
|
310
|
+
join_ops = lambda { |name|
|
311
|
+
ops = instance_variable_get("@#{name}")
|
312
|
+
|
313
|
+
if ops.kind_of?(Array)
|
314
|
+
query_expr << "#{name}:#{ops.join(' ')}"
|
315
|
+
elsif ops
|
316
|
+
query_expr << "#{name}:#{ops}"
|
317
|
+
end
|
318
|
+
}
|
319
|
+
|
320
|
+
set_param.call('num',@results_per_page)
|
321
|
+
|
322
|
+
query_expr << @query if @query
|
323
|
+
|
324
|
+
append_modifier.call(:link)
|
325
|
+
append_modifier.call(:related)
|
326
|
+
append_modifier.call(:info)
|
327
|
+
append_modifier.call(:site)
|
328
|
+
append_modifier.call(:filetype)
|
329
|
+
|
330
|
+
join_ops.call(:allintitle)
|
331
|
+
append_modifier.call(:intitle)
|
332
|
+
join_ops.call(:allinurl)
|
333
|
+
append_modifier.call(:inurl)
|
334
|
+
join_ops.call(:allintext)
|
335
|
+
append_modifier.call(:intext)
|
251
336
|
|
252
|
-
|
253
|
-
url.query_params['
|
337
|
+
unless query_expr.empty?
|
338
|
+
url.query_params['as_q'] = query_expr.join(' ')
|
254
339
|
end
|
255
340
|
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
url.query_params['as_eq'] = @without_words if @without_words
|
341
|
+
set_param.call('as_epq',@exact_phrase)
|
342
|
+
set_param.call('as_oq',@with_words)
|
343
|
+
set_param.call('as_eq',@without_words)
|
260
344
|
|
261
|
-
|
262
|
-
|
345
|
+
set_param.call('lr',@language)
|
346
|
+
set_param.call('cr',@region)
|
263
347
|
|
264
348
|
if @in_format
|
265
349
|
url.query_params['as_ft'] = 'i'
|
@@ -38,6 +38,14 @@ module GScraper
|
|
38
38
|
@similar_url = similar_url
|
39
39
|
end
|
40
40
|
|
41
|
+
#
|
42
|
+
# Fetches the page of the result. If a _block_ is given it will be
|
43
|
+
# passed the page.
|
44
|
+
#
|
45
|
+
def page(&block)
|
46
|
+
get_page(@url,&block)
|
47
|
+
end
|
48
|
+
|
41
49
|
#
|
42
50
|
# Create a new Query for results that are similar to the Result. If
|
43
51
|
# a _block_ is given, it will be passed the newly created Query
|
data/lib/gscraper/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gscraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Postmodern Modulus III
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-04-28 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
99
|
requirements: []
|
100
100
|
|
101
101
|
rubyforge_project: gscraper
|
102
|
-
rubygems_version: 1.
|
102
|
+
rubygems_version: 1.1.1
|
103
103
|
signing_key:
|
104
104
|
specification_version: 2
|
105
105
|
summary: A ruby web-scraping interface to various Google Services
|