goog 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/bin/goog +13 -9
  2. data/lib/goog.rb +3 -0
  3. metadata +2 -1
data/bin/goog CHANGED
@@ -6,6 +6,7 @@ require 'yaml'
6
6
  require 'uri'
7
7
 
8
8
  if ARGV[0] =~ /^-h|--help/
9
+ require 'goog'
9
10
  puts <<END
10
11
  Usage: goog [-n pages] [query]
11
12
 
@@ -13,7 +14,7 @@ Synopsis:
13
14
  -n [pages] Return n pages of results
14
15
  -h, --help Show this message
15
16
 
16
- goog 0.0.2
17
+ goog #{Goog::VERSION}
17
18
  http://github.com/danchoi/goog
18
19
  Author: Daniel Choi <dhchoi@gmail.com>
19
20
  END
@@ -26,20 +27,21 @@ query = ARGV.join(' ')
26
27
  unless query
27
28
  abort "Please provide a search query"
28
29
  end
29
- CACHE = "#{ENV['HOME']}/.goog.cache.yml"
30
+
30
31
  unless `which tidy` =~ /tidy/
31
32
  abort "No tidy found. Please install tidy."
32
33
  end
33
34
  if RUBY_VERSION !~ /^1.9/
34
35
  abort "Requires Ruby 1.9"
35
36
  end
36
- query = CGI.escape query
37
-
37
+ query = "/search?q=#{CGI.escape query}"
38
38
  (1..pages).each do |page|
39
- q = page == 1 ? query : "#{query}&start=#{page * 10}"
40
- curl = "curl -s -A Mozilla http://www.google.com/search?q=#{q} | tidy --wrap 0 -indent -ashtml --merge-divs yes 2>/dev/null"
39
+ if query.nil?
40
+ exit
41
+ end
42
+ curl = "curl -s -A Mozilla 'http://www.google.com#{query}' | tidy --wrap 0 -indent -ashtml --merge-divs yes 2>/dev/null"
41
43
  resp = %x{#{curl}}
42
- doc = Nokogiri::HTML resp
44
+ doc = Nokogiri::HTML resp, nil, 'iso-8859-1'
43
45
  doc.search('ol li.g').each_with_index {|li, index|
44
46
  next unless li.at('h3 a')
45
47
  link = li.at('h3 a')['href'].sub(/^\/url\?q=/, '')
@@ -52,8 +54,6 @@ query = CGI.escape query
52
54
  description.search('span').remove
53
55
  excerpt = begin
54
56
  s = description.inner_text.strip
55
- s.force_encoding('iso-8859-1')
56
- s.encode!('utf-8', undef: :replace, invalid: :replace)
57
57
  s.gsub(/\s{2,}/, ' ')
58
58
  rescue
59
59
  puts "ERROR"
@@ -70,6 +70,10 @@ query = CGI.escape query
70
70
  puts res.compact
71
71
  puts
72
72
  }
73
+ # find next page link
74
+ # <a href="/search?q=why+the+lucky+stiff&amp;hl=en&amp;ie=UTF-8&amp;prmd=ivns&amp;ei=K6akT9bRBeaM6QHjifmwBA&amp;start=10&amp;sa=N" style="text-align:left"><span style="display:block;margin-left:53px">Next</span></a>
75
+ next_page_href = (nextspan = doc.at("//span[contains(child::text(),'Next')]")) && nextspan.parent[:href]
76
+ query = next_page_href
73
77
  end
74
78
 
75
79
 
@@ -0,0 +1,3 @@
1
+ module Goog
2
+ VERSION = '0.0.3'
3
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: goog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -36,6 +36,7 @@ extensions: []
36
36
  extra_rdoc_files: []
37
37
  files:
38
38
  - bin/goog
39
+ - lib/goog.rb
39
40
  homepage: https://github.com/danchoi/goog
40
41
  licenses: []
41
42
  post_install_message: