goog 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/goog +13 -9
- data/lib/goog.rb +3 -0
- metadata +2 -1
data/bin/goog
CHANGED
@@ -6,6 +6,7 @@ require 'yaml'
|
|
6
6
|
require 'uri'
|
7
7
|
|
8
8
|
if ARGV[0] =~ /^-h|--help/
|
9
|
+
require 'goog'
|
9
10
|
puts <<END
|
10
11
|
Usage: goog [-n pages] [query]
|
11
12
|
|
@@ -13,7 +14,7 @@ Synopsis:
|
|
13
14
|
-n [pages] Return n pages of results
|
14
15
|
-h, --help Show this message
|
15
16
|
|
16
|
-
goog
|
17
|
+
goog #{Goog::VERSION}
|
17
18
|
http://github.com/danchoi/goog
|
18
19
|
Author: Daniel Choi <dhchoi@gmail.com>
|
19
20
|
END
|
@@ -26,20 +27,21 @@ query = ARGV.join(' ')
|
|
26
27
|
unless query
|
27
28
|
abort "Please provide a search query"
|
28
29
|
end
|
29
|
-
|
30
|
+
|
30
31
|
unless `which tidy` =~ /tidy/
|
31
32
|
abort "No tidy found. Please install tidy."
|
32
33
|
end
|
33
34
|
if RUBY_VERSION !~ /^1.9/
|
34
35
|
abort "Requires Ruby 1.9"
|
35
36
|
end
|
36
|
-
query = CGI.escape query
|
37
|
-
|
37
|
+
query = "/search?q=#{CGI.escape query}"
|
38
38
|
(1..pages).each do |page|
|
39
|
-
|
40
|
-
|
39
|
+
if query.nil?
|
40
|
+
exit
|
41
|
+
end
|
42
|
+
curl = "curl -s -A Mozilla 'http://www.google.com#{query}' | tidy --wrap 0 -indent -ashtml --merge-divs yes 2>/dev/null"
|
41
43
|
resp = %x{#{curl}}
|
42
|
-
doc = Nokogiri::HTML resp
|
44
|
+
doc = Nokogiri::HTML resp, nil, 'iso-8859-1'
|
43
45
|
doc.search('ol li.g').each_with_index {|li, index|
|
44
46
|
next unless li.at('h3 a')
|
45
47
|
link = li.at('h3 a')['href'].sub(/^\/url\?q=/, '')
|
@@ -52,8 +54,6 @@ query = CGI.escape query
|
|
52
54
|
description.search('span').remove
|
53
55
|
excerpt = begin
|
54
56
|
s = description.inner_text.strip
|
55
|
-
s.force_encoding('iso-8859-1')
|
56
|
-
s.encode!('utf-8', undef: :replace, invalid: :replace)
|
57
57
|
s.gsub(/\s{2,}/, ' ')
|
58
58
|
rescue
|
59
59
|
puts "ERROR"
|
@@ -70,6 +70,10 @@ query = CGI.escape query
|
|
70
70
|
puts res.compact
|
71
71
|
puts
|
72
72
|
}
|
73
|
+
# find next page link
|
74
|
+
# <a href="/search?q=why+the+lucky+stiff&hl=en&ie=UTF-8&prmd=ivns&ei=K6akT9bRBeaM6QHjifmwBA&start=10&sa=N" style="text-align:left"><span style="display:block;margin-left:53px">Next</span></a>
|
75
|
+
next_page_href = (nextspan = doc.at("//span[contains(child::text(),'Next')]")) && nextspan.parent[:href]
|
76
|
+
query = next_page_href
|
73
77
|
end
|
74
78
|
|
75
79
|
|
data/lib/goog.rb
ADDED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: goog
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -36,6 +36,7 @@ extensions: []
|
|
36
36
|
extra_rdoc_files: []
|
37
37
|
files:
|
38
38
|
- bin/goog
|
39
|
+
- lib/goog.rb
|
39
40
|
homepage: https://github.com/danchoi/goog
|
40
41
|
licenses: []
|
41
42
|
post_install_message:
|