google-browse 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ Gemfile.lock
2
+ pkg/
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source :rubygems
2
+
3
+ gem 'slop'
4
+ gem 'launchy'
5
+ gem 'mechanize'
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Bil Bas (Spooner)
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,132 @@
1
+ Google-Browse
2
+ =============
3
+
4
+ This is a very simple text browser which aids in searching and navigating on Google.com. Shows results as a simple list, any of which may be opened in a full
5
+ browser.
6
+
7
+ Not really intended for real use, since it is only really a toy.
8
+
9
+ WARNING: Used excessively, this tool may get Google locking you out thinking that you are an evil scraper bot! Be careful!
10
+
11
+ Installation
12
+ ------------
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ gem 'google-browse'
17
+
18
+ And then execute:
19
+
20
+ $ bundle
21
+
22
+ Or install it yourself as:
23
+
24
+ $ gem install google-browse
25
+
26
+ Usage
27
+ -----
28
+
29
+ The gem provides a single command which starts the text search browser:
30
+
31
+ $ google-browse
32
+
33
+ $ google-browse "spooner github"
34
+
35
+ Example output
36
+ --------------
37
+
38
+ $ ruby bin/google-browse fish
39
+
40
+ Google Browse v0.1.1
41
+
42
+
43
+ Page 1, showing results 1 to 5 for: fish
44
+ ________________________________________
45
+
46
+ 1: Fish - Wikipedia, the free encyclopedia
47
+ A fish is any member of a paraphyletic group of organisms that consist of ...
48
+ http://en.wikipedia.org/wiki/Fish
49
+
50
+ 2: Official Fish site
51
+ Official site for Fish: writer, actor and vocalist. Extensive information ...
52
+ http://www.fish-thecompany.com/
53
+
54
+ 3: Clever Bird Goes Fishing - YouTube
55
+ Sign in with your YouTube Account (YouTube, Google+, Gmail, Orkut, Picasa,...
56
+ http://www.youtube.co.uk/watch?v=uBuPiC3ArL8
57
+
58
+ 4: Robot Fish - YouTube
59
+ For more cool animal videos http://ow.ly/7v79B ...
60
+ http://www.youtube.com/watch?v=eO9oseiCTdk
61
+
62
+ 5: Fish! | Fish Kitchen! | Jarvis » Fish Kitchen Group
63
+ Celebrity restaurateur Tony Allan started the fish! empire when he opened ...
64
+ http://www.fishkitchen.com/
65
+
66
+
67
+ Enter number of link to browse or [N/h/s/q]: n
68
+
69
+ Page 2, showing results 6 to 10 for: fish
70
+ _________________________________________
71
+
72
+ 6: Fish | Life and style | The Guardian
73
+ Latest news and comment on Fish from guardian.co.uk.
74
+ http://www.guardian.co.uk/lifeandstyle/fish
75
+
76
+ 7: Pet Fish Supplies for Sale at Pets At Home: Fish Pond Supplies, Fish ...
77
+ Buy fish products from Pets at Home, the UK's largest pet shop, with fast...
78
+ http://www.petsathome.com/shop/fish/
79
+
80
+ 8: Fish recipes | Salmon recipes, fish stew & more | Jamie Oliver recipes
81
+ Good fresh fish smells of the sea and is packed full of good stuff. Check...
82
+ http://www.jamieoliver.com/recipes/fish-recipes
83
+
84
+ 9: Hugh's Fish Fight - Half of all fish caught in the North Sea is thrown ...
85
+ Half of all fish caught in the North Sea is thrown back overboard dead. B...
86
+ http://www.fishfight.net/
87
+
88
+ 10: Grafixation web design and business services - Welcome to our site
89
+ Grafixation: specialists in web design, graphics, site renovation, promot...
90
+ http://www.the-company.com/
91
+
92
+
93
+ Enter number of link to browse or [N/p/h/s/q]: s
94
+
95
+ Enter search string: frog
96
+
97
+ Page 1, showing results 1 to 5 for: frog
98
+ ________________________________________
99
+
100
+ 1: Frog - Wikipedia, the free encyclopedia
101
+ Frogs are a diverse and largely carnivorous group of short-bodied, taille...
102
+ http://en.wikipedia.org/wiki/Frog
103
+
104
+ 2: Frog Learning Platform | The UKs most advanced Learning Platform
105
+ 'Frog is like a Lego® kit that allows users to build most things that can ...
106
+ http://www.frogtrade.com/
107
+
108
+ 3: frog
109
+ We are a global innovation firm. We help create and bring to market meanin...
110
+ http://www.frogdesign.com/
111
+
112
+ 4: Frog Song - YouTube
113
+ ... to add danielinvt's video to your playlist. Sign in. Statistics Report...
114
+ http://www.youtube.com/watch?v=lfFGXG2-6kg
115
+
116
+ 5: Frog VLE Nonsuch
117
+ To log in to the Nonsuch Learning Environment enter your username and pass...
118
+ http://www.nonsuch.sutton.sch.uk/
119
+
120
+
121
+ Enter number of link to browse or [N/h/s/q]: 4
122
+
123
+ ((Opens Frog Song Youtube video in default browser))
124
+
125
+
126
+ ## Contributing
127
+
128
+ 1. Fork it
129
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
130
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
131
+ 4. Push to the branch (`git push origin my-new-feature`)
132
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/bin/google-browse ADDED
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Google search CLI browser that can open links into a browser.
4
+
5
+ require 'bundler/setup'
6
+ require 'slop'
7
+
8
+ require_relative '../lib/google-browse'
9
+
10
+ # Manage CLI options.
11
+ results_per_page = GoogleBrowse::Browser::DEFAULT_RESULTS_PER_PAGE
12
+ opts = Slop.parse help: true do
13
+ banner "Usage: #{File.basename $0} [options] ['QUERY-STRING']"
14
+
15
+ on 'n=', 'number=',
16
+ "Number of results per page (default: #{results_per_page})",
17
+ as: Integer, default: results_per_page
18
+ end
19
+
20
+ exit 0 if opts.help?
21
+
22
+ def cli_error(opts, message)
23
+ puts "ERROR: #{message}"
24
+ puts
25
+ puts opts
26
+ exit 0
27
+ end
28
+
29
+ cli_error opts, 'Must have 1 or more results per page!' unless opts[:number] >= 1
30
+
31
+ # BUG: No idea why the -n option STAYS in argv ;(
32
+ query = ARGV.empty? ? nil : ARGV.join(" ")
33
+
34
+ puts
35
+ puts "Google Browse v#{GoogleBrowse::VERSION}"
36
+
37
+ GoogleBrowse.search query: query, results_per_page: opts[:number]
38
+
39
+
40
+
41
+
42
+
@@ -0,0 +1,18 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/google_browse/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Bil Bas (Spooner)"]
6
+ gem.email = ["bil.bagpuss@gmail.com"]
7
+ gem.description = %q{This is a very simple text browser which aids in searching and navigating on Google.com. Shows results as a simple list, any of which may be opened in a full
8
+ browser. Not really intended for real use, since it is only really a toy.}
9
+ gem.summary = %q{Simple text-browser for Google.com}
10
+ gem.homepage = ""
11
+
12
+ gem.files = `git ls-files`.split($\)
13
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
14
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
15
+ gem.name = "google-browse"
16
+ gem.require_paths = ["lib"]
17
+ gem.version = GoogleBrowse::VERSION
18
+ end
@@ -0,0 +1,11 @@
1
+ # Google search CLI browser that can open links into a browser.
2
+ require 'bundler/setup'
3
+ require 'mechanize'
4
+ require 'launchy'
5
+
6
+ require 'uri'
7
+ require 'ostruct'
8
+
9
+ require_relative "google_browse/scraper"
10
+ require_relative "google_browse/browser"
11
+ require_relative "google_browse/version"
@@ -0,0 +1,208 @@
1
+ # Google search CLI browser that can open links into a browser.
2
+ module GoogleBrowse
3
+ class << self
4
+ def search(*args); Browser.new *args; end
5
+ end
6
+
7
+ class Browser
8
+ DEFAULT_RESULTS_PER_PAGE = 5
9
+ MIN_RESULTS_PER_PAGE = 1
10
+ MAX_RESULTS_PER_PAGE = 20 # Avoid being rude to Google.
11
+ NUM_COLUMNS = 79
12
+
13
+ class << self
14
+ def search(*args); new *args; end
15
+ end
16
+
17
+ # @option :results_per_page [Integer] (5) Number of results to show per page.
18
+ # @option :query [String] Initial search string.
19
+ def initialize(options = {})
20
+ options = {
21
+ results_per_page: DEFAULT_RESULTS_PER_PAGE,
22
+ }.merge! options
23
+
24
+ @results_per_page = [
25
+ [MIN_RESULTS_PER_PAGE, options[:results_per_page]].max,
26
+ MAX_RESULTS_PER_PAGE
27
+ ].min
28
+
29
+ @quit = false
30
+
31
+ if options[:query]
32
+ @scraper = Scraper.new options[:query]
33
+ else
34
+ @scraper = nil
35
+ input_new_search
36
+ end
37
+
38
+ @page_number = 0
39
+
40
+ puts
41
+ list_links
42
+ navigate until quit?
43
+ end
44
+
45
+ protected
46
+ def quit?; @quit end
47
+ # Index, in @links, of the first link to show.
48
+ def first_link_index; @page_number * @results_per_page end
49
+ def link_range; first_link_index..last_link_index end
50
+
51
+ protected
52
+ # Index, in @links, of the last link to show.
53
+ def last_link_index
54
+ if @scraper.more_pages?
55
+ first_link_index + @results_per_page - 1
56
+ else
57
+ @scraper.num_links - 1
58
+ end
59
+ end
60
+
61
+ protected
62
+ # Are we showing the last page?
63
+ def last_page?
64
+ !@scraper.more_pages? && last_link_index == (@scraper.num_links - 1)
65
+ end
66
+
67
+ protected
68
+ def underline(title)
69
+ puts title
70
+ puts '_' * title.length
71
+ end
72
+
73
+ protected
74
+ def list_links
75
+ limit_page_number
76
+
77
+ # Ensure we have enough links downloaded to display them.
78
+ first, last = first_link_index + 1, last_link_index + 1
79
+
80
+ # Force scraper to read as high as it can, then limit the page number.
81
+ @scraper[last_link_index]
82
+ @page_number = [@page_number, @scraper.num_links.div(@results_per_page)].min
83
+
84
+ if @scraper.num_links > 0
85
+ num_columns = last.to_s.length
86
+
87
+ puts
88
+ underline "Page #{@page_number + 1}, showing results #{first} to #{last} for: #{@scraper.query}"
89
+
90
+ @scraper[link_range].each.with_index(first) do |link, i|
91
+ indent = ' ' * (num_columns + 2)
92
+ puts
93
+ max_width = NUM_COLUMNS - indent.size
94
+ puts "#{i.to_s.rjust num_columns}: #{limit_text link.title, max_width}"
95
+ puts "#{indent}#{limit_text link.body, max_width}"
96
+ puts "#{indent}#{link.url}"
97
+ end
98
+ else
99
+ # No joy. Let's try a new search...
100
+ puts "No results for #{@scraper.query}!"
101
+ input_new_search
102
+ list_links
103
+ end
104
+ end
105
+
106
+ protected
107
+ def limit_text(text, length)
108
+ if text.size < length
109
+ text
110
+ else
111
+ text[0, length - 3] + '...'
112
+ end
113
+ end
114
+
115
+ protected
116
+ def limit_page_number
117
+ unless @scraper.more_pages?
118
+ @page_number = @scraper.num_links.div @results_per_page
119
+ end
120
+ end
121
+
122
+ protected
123
+ def navigate
124
+ # Ask the user for instructions.
125
+ puts
126
+
127
+ next_ = last_page? ? '' : 'N/'
128
+ previous = @page_number.zero? ? '' : 'p/'
129
+ print "Enter number of link to browse or [#{next_}#{previous}h/s/q]: "
130
+ input = $stdin.gets.strip
131
+
132
+ case input.upcase
133
+ when 'N', '' # Next page.
134
+ unless last_page?
135
+ @page_number += 1
136
+ list_links
137
+ end
138
+
139
+ when 'P' # Previous page.
140
+ if @page_number > 0
141
+ @page_number -= 1
142
+ list_links
143
+ end
144
+
145
+ when 'H', '?'
146
+ puts <<-END_OF_TEXT
147
+
148
+ Browser help
149
+ ------------
150
+
151
+ N(next) - Next page (default action).
152
+ P(revious) - Previous page.
153
+ H(elp) - This help message.
154
+ S(earch) - Enter a new query string.
155
+ Q(uit) - Quit the browser.
156
+
157
+ Enter a link number to open it in your default browser for viewing.
158
+ END_OF_TEXT
159
+
160
+ when 'S' # Search
161
+ input_new_search
162
+
163
+ list_links
164
+
165
+ when 'Q' # Quit.
166
+ @quit = true
167
+
168
+ else # Follow link to page.
169
+ link_index = input.to_i - 1
170
+ if link_index.between? first_link_index, last_link_index
171
+ link = @scraper[link_index]
172
+ puts
173
+ puts "Navigating to #{link_index}: #{link.title} (#{link.url})"
174
+ puts
175
+ Launchy.open link.url
176
+ puts
177
+ else
178
+ puts "Bad input: #{input}"
179
+ end
180
+ end
181
+
182
+ puts
183
+ end
184
+
185
+ protected
186
+ def input_new_search
187
+ input = ''
188
+ while input.empty?
189
+ puts
190
+ print "Enter search string: "
191
+ input = $stdin.gets.strip
192
+ end
193
+
194
+ if @scraper
195
+ @scraper.query = input
196
+ else
197
+ @scraper = Scraper.new input
198
+ end
199
+
200
+ @page_number = 0
201
+ end
202
+ end
203
+ end
204
+
205
+
206
+
207
+
208
+
@@ -0,0 +1,129 @@
1
+ # Google search CLI browser that can open links into a browser.
2
+ module GoogleBrowse
3
+ class Scraper
4
+ BASE_PAGE = 'http://google.com'
5
+ RESULTS_PER_REQUEST = 100 # TODO: Use this! &num=100?
6
+
7
+ attr_reader :query
8
+ def num_links; @links.size; end
9
+ def more_pages?; @more_pages end
10
+
11
+ # @option :query [String] Initial search string.
12
+ def initialize(query)
13
+ @links = [] # All the links retrieved are cached here.
14
+ @agent = Mechanize.new do |agent|
15
+ agent.max_history = 1 # We cache the important data ourselves.
16
+ agent.user_agent = 'Safari' # And why not?
17
+ agent.user_agent_alias = 'Mac Safari' # And why not?
18
+ agent.keep_alive = false
19
+ end
20
+
21
+ self.query = query
22
+ end
23
+
24
+ # Set the search query string.
25
+ def query=(text)
26
+ retrieve_initial_page text
27
+ end
28
+
29
+ # @param index [Integer, Range]
30
+ def [](index)
31
+ case index
32
+ when Integer
33
+ retrieve_next_page while more_pages? and index > @links.size
34
+ @links[index]
35
+
36
+ when Range
37
+ retrieve_next_page while more_pages? and index.max > @links.size
38
+ @links[index]
39
+
40
+ else
41
+ raise TypeError, "Expected Integer or Range"
42
+ end
43
+ end
44
+
45
+ protected
46
+ def retrieve_initial_page(query)
47
+ @query = query
48
+ @links.clear
49
+
50
+ # Go to Google home page and create an initial query.
51
+ get BASE_PAGE
52
+ query_form = @agent.page.form_with name: /f/
53
+
54
+ # Make the search.
55
+ query_form.q = @query
56
+
57
+ query_form.submit query_form.button_with(name: 'btnK')
58
+
59
+ @page_number = 0
60
+ @more_pages = true
61
+
62
+ parse_links
63
+ end
64
+
65
+ protected
66
+ def get(page)
67
+ @agent.get page
68
+ end
69
+
70
+ protected
71
+ def next_page_link
72
+ link = @agent.page.search('table#nav td a').last
73
+ if link
74
+ link[:href]
75
+ else
76
+ :no_more_pages
77
+ end
78
+ end
79
+
80
+ protected
81
+ def retrieve_next_page
82
+ link = next_page_link
83
+
84
+ if link == :no_more_pages
85
+ @more_pages = false
86
+ else
87
+ get link
88
+ parse_links
89
+ end
90
+ end
91
+
92
+ protected
93
+ # Parse all the links found on the current page.
94
+ def parse_links
95
+ results = @agent.page.search 'li.g'
96
+ results.each do |result|
97
+ # May be youtube or google images/video links, so ignore these.
98
+ link = result.search('h3.r a').first
99
+ next unless link
100
+
101
+ body = result.search('span.st').first || OpenStruct.new(text: '')
102
+
103
+ # Extract the proper URL from the link, disregarding any that aren't full uris
104
+ # (e.g. google image/video links)
105
+ uri = URI.extract(link[:href]).first
106
+
107
+ if uri
108
+ url = uri[/[^\&]*/] # Trim off the trailing crap.
109
+ @links << OpenStruct.new(title: link.text, url: url, body: body.text)
110
+ end
111
+ end
112
+ end
113
+
114
+ protected
115
+ # Index, in @links, of the last link to show.
116
+ def last_link_index
117
+ if more_pages?
118
+ first_link_index + @results_per_page - 1
119
+ else
120
+ @links.size - 1
121
+ end
122
+ end
123
+ end
124
+ end
125
+
126
+
127
+
128
+
129
+
@@ -0,0 +1,3 @@
1
+ module GoogleBrowse
2
+ VERSION = "0.1.2"
3
+ end
@@ -0,0 +1,3 @@
1
+ module GoogleBrowse
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: google-browse
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Bil Bas (Spooner)
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-12 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: ! 'This is a very simple text browser which aids in searching and navigating
15
+ on Google.com. Shows results as a simple list, any of which may be opened in a full
16
+
17
+ browser. Not really intended for real use, since it is only really a toy.'
18
+ email:
19
+ - bil.bagpuss@gmail.com
20
+ executables:
21
+ - google-browse
22
+ extensions: []
23
+ extra_rdoc_files: []
24
+ files:
25
+ - .gitignore
26
+ - Gemfile
27
+ - LICENSE
28
+ - README.md
29
+ - Rakefile
30
+ - bin/google-browse
31
+ - google-browse.gemspec
32
+ - lib/google-browse.rb
33
+ - lib/google_browse/browser.rb
34
+ - lib/google_browse/scraper.rb
35
+ - lib/google_browse/version.rb
36
+ - lib/google_browse/version.rb~
37
+ homepage: ''
38
+ licenses: []
39
+ post_install_message:
40
+ rdoc_options: []
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ none: false
45
+ requirements:
46
+ - - ! '>='
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubyforge_project:
57
+ rubygems_version: 1.8.24
58
+ signing_key:
59
+ specification_version: 3
60
+ summary: Simple text-browser for Google.com
61
+ test_files: []
62
+ has_rdoc: