goog 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/goog +13 -9
- data/lib/goog.rb +3 -0
- metadata +2 -1
data/bin/goog
CHANGED
@@ -6,6 +6,7 @@ require 'yaml'
|
|
6
6
|
require 'uri'
|
7
7
|
|
8
8
|
if ARGV[0] =~ /^-h|--help/
|
9
|
+
require 'goog'
|
9
10
|
puts <<END
|
10
11
|
Usage: goog [-n pages] [query]
|
11
12
|
|
@@ -13,7 +14,7 @@ Synopsis:
|
|
13
14
|
-n [pages] Return n pages of results
|
14
15
|
-h, --help Show this message
|
15
16
|
|
16
|
-
goog
|
17
|
+
goog #{Goog::VERSION}
|
17
18
|
http://github.com/danchoi/goog
|
18
19
|
Author: Daniel Choi <dhchoi@gmail.com>
|
19
20
|
END
|
@@ -26,20 +27,21 @@ query = ARGV.join(' ')
|
|
26
27
|
unless query
|
27
28
|
abort "Please provide a search query"
|
28
29
|
end
|
29
|
-
|
30
|
+
|
30
31
|
unless `which tidy` =~ /tidy/
|
31
32
|
abort "No tidy found. Please install tidy."
|
32
33
|
end
|
33
34
|
if RUBY_VERSION !~ /^1.9/
|
34
35
|
abort "Requires Ruby 1.9"
|
35
36
|
end
|
36
|
-
query = CGI.escape query
|
37
|
-
|
37
|
+
query = "/search?q=#{CGI.escape query}"
|
38
38
|
(1..pages).each do |page|
|
39
|
-
|
40
|
-
|
39
|
+
if query.nil?
|
40
|
+
exit
|
41
|
+
end
|
42
|
+
curl = "curl -s -A Mozilla 'http://www.google.com#{query}' | tidy --wrap 0 -indent -ashtml --merge-divs yes 2>/dev/null"
|
41
43
|
resp = %x{#{curl}}
|
42
|
-
doc = Nokogiri::HTML resp
|
44
|
+
doc = Nokogiri::HTML resp, nil, 'iso-8859-1'
|
43
45
|
doc.search('ol li.g').each_with_index {|li, index|
|
44
46
|
next unless li.at('h3 a')
|
45
47
|
link = li.at('h3 a')['href'].sub(/^\/url\?q=/, '')
|
@@ -52,8 +54,6 @@ query = CGI.escape query
|
|
52
54
|
description.search('span').remove
|
53
55
|
excerpt = begin
|
54
56
|
s = description.inner_text.strip
|
55
|
-
s.force_encoding('iso-8859-1')
|
56
|
-
s.encode!('utf-8', undef: :replace, invalid: :replace)
|
57
57
|
s.gsub(/\s{2,}/, ' ')
|
58
58
|
rescue
|
59
59
|
puts "ERROR"
|
@@ -70,6 +70,10 @@ query = CGI.escape query
|
|
70
70
|
puts res.compact
|
71
71
|
puts
|
72
72
|
}
|
73
|
+
# find next page link
|
74
|
+
# <a href="/search?q=why+the+lucky+stiff&hl=en&ie=UTF-8&prmd=ivns&ei=K6akT9bRBeaM6QHjifmwBA&start=10&sa=N" style="text-align:left"><span style="display:block;margin-left:53px">Next</span></a>
|
75
|
+
next_page_href = (nextspan = doc.at("//span[contains(child::text(),'Next')]")) && nextspan.parent[:href]
|
76
|
+
query = next_page_href
|
73
77
|
end
|
74
78
|
|
75
79
|
|
data/lib/goog.rb
ADDED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: goog
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -36,6 +36,7 @@ extensions: []
|
|
36
36
|
extra_rdoc_files: []
|
37
37
|
files:
|
38
38
|
- bin/goog
|
39
|
+
- lib/goog.rb
|
39
40
|
homepage: https://github.com/danchoi/goog
|
40
41
|
licenses: []
|
41
42
|
post_install_message:
|