gscraper 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +8 -0
- data/README.txt +2 -2
- data/lib/gscraper/search/query.rb +92 -8
- data/lib/gscraper/search/result.rb +8 -0
- data/lib/gscraper/version.rb +1 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.txt
CHANGED
@@ -66,7 +66,7 @@ GScraper is a web-scraping interface to various Google Services.
|
|
66
66
|
page = q.page(2)
|
67
67
|
|
68
68
|
page.urls # => [...]
|
69
|
-
|
69
|
+
page.summaries # => [...]
|
70
70
|
page.ranks_of { |result| result.url =~ /^https/ } # => [...]
|
71
71
|
page.titles_of { |result| result.summary =~ /password/ } # => [...]
|
72
72
|
page.cached_pages # => [...]
|
@@ -127,7 +127,7 @@ GScraper is a web-scraping interface to various Google Services.
|
|
127
127
|
|
128
128
|
The MIT License
|
129
129
|
|
130
|
-
Copyright (c) 2007 Hal Brodigan
|
130
|
+
Copyright (c) 2007-2008 Hal Brodigan
|
131
131
|
|
132
132
|
Permission is hereby granted, free of charge, to any person obtaining
|
133
133
|
a copy of this software and associated documentation files (the
|
@@ -29,6 +29,39 @@ module GScraper
|
|
29
29
|
# Search query
|
30
30
|
attr_accessor :query
|
31
31
|
|
32
|
+
# Search 'link' modifier
|
33
|
+
attr_accessor :link
|
34
|
+
|
35
|
+
# Search 'related' modifier
|
36
|
+
attr_accessor :related
|
37
|
+
|
38
|
+
# Search 'info' modifier
|
39
|
+
attr_accessor :info
|
40
|
+
|
41
|
+
# Search 'site' modifier
|
42
|
+
attr_accessor :site
|
43
|
+
|
44
|
+
# Search 'filetype' modifier
|
45
|
+
attr_accessor :filetype
|
46
|
+
|
47
|
+
# Search 'allintitle' modifier
|
48
|
+
attr_accessor :allintitle
|
49
|
+
|
50
|
+
# Search 'intitle' modifier
|
51
|
+
attr_accessor :intitle
|
52
|
+
|
53
|
+
# Search 'allinurl' modifier
|
54
|
+
attr_accessor :allinurl
|
55
|
+
|
56
|
+
# Search 'inurl' modifier
|
57
|
+
attr_accessor :inurl
|
58
|
+
|
59
|
+
# Search 'allintext' modifier
|
60
|
+
attr_accessor :allintext
|
61
|
+
|
62
|
+
# Search 'intext' modifier
|
63
|
+
attr_accessor :intext
|
64
|
+
|
32
65
|
# Search for results containing the exact phrase
|
33
66
|
attr_accessor :exact_phrase
|
34
67
|
|
@@ -100,6 +133,20 @@ module GScraper
|
|
100
133
|
@results_per_page = (options[:results_per_page] || RESULTS_PER_PAGE)
|
101
134
|
|
102
135
|
@query = options[:query]
|
136
|
+
|
137
|
+
@link = options[:link]
|
138
|
+
@related = options[:related]
|
139
|
+
@info = options[:info]
|
140
|
+
@site = options[:site]
|
141
|
+
@filetype = options[:filetype]
|
142
|
+
|
143
|
+
@allintitle = options[:allintitle]
|
144
|
+
@intitle = options[:intitle]
|
145
|
+
@allinurl = options[:allinurl]
|
146
|
+
@inurl = options[:inurl]
|
147
|
+
@allintext = options[:allintext]
|
148
|
+
@intext = options[:intext]
|
149
|
+
|
103
150
|
@exact_phrase = options[:exact_phrase]
|
104
151
|
@with_words = options[:with_words]
|
105
152
|
@without_words = options[:without_words]
|
@@ -248,18 +295,55 @@ module GScraper
|
|
248
295
|
#
|
249
296
|
def search_url
|
250
297
|
url = URI(SEARCH_URL)
|
298
|
+
query_expr = []
|
299
|
+
|
300
|
+
set_param = lambda { |param,value|
|
301
|
+
url.query_params[param.to_s] = value if value
|
302
|
+
}
|
303
|
+
|
304
|
+
append_modifier = lambda { |name|
|
305
|
+
modifier = instance_variable_get("@#{name}")
|
306
|
+
|
307
|
+
query_expr << "#{name}:#{modifier}" if modifier
|
308
|
+
}
|
309
|
+
|
310
|
+
join_ops = lambda { |name|
|
311
|
+
ops = instance_variable_get("@#{name}")
|
312
|
+
|
313
|
+
if ops.kind_of?(Array)
|
314
|
+
query_expr << "#{name}:#{ops.join(' ')}"
|
315
|
+
elsif ops
|
316
|
+
query_expr << "#{name}:#{ops}"
|
317
|
+
end
|
318
|
+
}
|
319
|
+
|
320
|
+
set_param.call('num',@results_per_page)
|
321
|
+
|
322
|
+
query_expr << @query if @query
|
323
|
+
|
324
|
+
append_modifier.call(:link)
|
325
|
+
append_modifier.call(:related)
|
326
|
+
append_modifier.call(:info)
|
327
|
+
append_modifier.call(:site)
|
328
|
+
append_modifier.call(:filetype)
|
329
|
+
|
330
|
+
join_ops.call(:allintitle)
|
331
|
+
append_modifier.call(:intitle)
|
332
|
+
join_ops.call(:allinurl)
|
333
|
+
append_modifier.call(:inurl)
|
334
|
+
join_ops.call(:allintext)
|
335
|
+
append_modifier.call(:intext)
|
251
336
|
|
252
|
-
|
253
|
-
url.query_params['
|
337
|
+
unless query_expr.empty?
|
338
|
+
url.query_params['as_q'] = query_expr.join(' ')
|
254
339
|
end
|
255
340
|
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
url.query_params['as_eq'] = @without_words if @without_words
|
341
|
+
set_param.call('as_epq',@exact_phrase)
|
342
|
+
set_param.call('as_oq',@with_words)
|
343
|
+
set_param.call('as_eq',@without_words)
|
260
344
|
|
261
|
-
|
262
|
-
|
345
|
+
set_param.call('lr',@language)
|
346
|
+
set_param.call('cr',@region)
|
263
347
|
|
264
348
|
if @in_format
|
265
349
|
url.query_params['as_ft'] = 'i'
|
@@ -38,6 +38,14 @@ module GScraper
|
|
38
38
|
@similar_url = similar_url
|
39
39
|
end
|
40
40
|
|
41
|
+
#
|
42
|
+
# Fetches the page of the result. If a _block_ is given it will be
|
43
|
+
# passed the page.
|
44
|
+
#
|
45
|
+
def page(&block)
|
46
|
+
get_page(@url,&block)
|
47
|
+
end
|
48
|
+
|
41
49
|
#
|
42
50
|
# Create a new Query for results that are similar to the Result. If
|
43
51
|
# a _block_ is given, it will be passed the newly created Query
|
data/lib/gscraper/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gscraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Postmodern Modulus III
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-04-28 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -99,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
99
|
requirements: []
|
100
100
|
|
101
101
|
rubyforge_project: gscraper
|
102
|
-
rubygems_version: 1.
|
102
|
+
rubygems_version: 1.1.1
|
103
103
|
signing_key:
|
104
104
|
specification_version: 2
|
105
105
|
summary: A ruby web-scraping interface to various Google Services
|