goog 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/bin/goog +13 -9
  2. data/lib/goog.rb +3 -0
  3. metadata +2 -1
data/bin/goog CHANGED
@@ -6,6 +6,7 @@ require 'yaml'
6
6
  require 'uri'
7
7
 
8
8
  if ARGV[0] =~ /^-h|--help/
9
+ require 'goog'
9
10
  puts <<END
10
11
  Usage: goog [-n pages] [query]
11
12
 
@@ -13,7 +14,7 @@ Synopsis:
13
14
  -n [pages] Return n pages of results
14
15
  -h, --help Show this message
15
16
 
16
- goog 0.0.2
17
+ goog #{Goog::VERSION}
17
18
  http://github.com/danchoi/goog
18
19
  Author: Daniel Choi <dhchoi@gmail.com>
19
20
  END
@@ -26,20 +27,21 @@ query = ARGV.join(' ')
26
27
  unless query
27
28
  abort "Please provide a search query"
28
29
  end
29
- CACHE = "#{ENV['HOME']}/.goog.cache.yml"
30
+
30
31
  unless `which tidy` =~ /tidy/
31
32
  abort "No tidy found. Please install tidy."
32
33
  end
33
34
  if RUBY_VERSION !~ /^1.9/
34
35
  abort "Requires Ruby 1.9"
35
36
  end
36
- query = CGI.escape query
37
-
37
+ query = "/search?q=#{CGI.escape query}"
38
38
  (1..pages).each do |page|
39
- q = page == 1 ? query : "#{query}&start=#{page * 10}"
40
- curl = "curl -s -A Mozilla http://www.google.com/search?q=#{q} | tidy --wrap 0 -indent -ashtml --merge-divs yes 2>/dev/null"
39
+ if query.nil?
40
+ exit
41
+ end
42
+ curl = "curl -s -A Mozilla 'http://www.google.com#{query}' | tidy --wrap 0 -indent -ashtml --merge-divs yes 2>/dev/null"
41
43
  resp = %x{#{curl}}
42
- doc = Nokogiri::HTML resp
44
+ doc = Nokogiri::HTML resp, nil, 'iso-8859-1'
43
45
  doc.search('ol li.g').each_with_index {|li, index|
44
46
  next unless li.at('h3 a')
45
47
  link = li.at('h3 a')['href'].sub(/^\/url\?q=/, '')
@@ -52,8 +54,6 @@ query = CGI.escape query
52
54
  description.search('span').remove
53
55
  excerpt = begin
54
56
  s = description.inner_text.strip
55
- s.force_encoding('iso-8859-1')
56
- s.encode!('utf-8', undef: :replace, invalid: :replace)
57
57
  s.gsub(/\s{2,}/, ' ')
58
58
  rescue
59
59
  puts "ERROR"
@@ -70,6 +70,10 @@ query = CGI.escape query
70
70
  puts res.compact
71
71
  puts
72
72
  }
73
+ # find next page link
74
+ # <a href="/search?q=why+the+lucky+stiff&amp;hl=en&amp;ie=UTF-8&amp;prmd=ivns&amp;ei=K6akT9bRBeaM6QHjifmwBA&amp;start=10&amp;sa=N" style="text-align:left"><span style="display:block;margin-left:53px">Next</span></a>
75
+ next_page_href = (nextspan = doc.at("//span[contains(child::text(),'Next')]")) && nextspan.parent[:href]
76
+ query = next_page_href
73
77
  end
74
78
 
75
79
 
@@ -0,0 +1,3 @@
1
+ module Goog
2
+ VERSION = '0.0.3'
3
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: goog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -36,6 +36,7 @@ extensions: []
36
36
  extra_rdoc_files: []
37
37
  files:
38
38
  - bin/goog
39
+ - lib/goog.rb
39
40
  homepage: https://github.com/danchoi/goog
40
41
  licenses: []
41
42
  post_install_message: