google-site-search 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +9 -0
- data/Guardfile +24 -0
- data/LICENSE +22 -0
- data/README.rdoc +145 -0
- data/Rakefile +2 -0
- data/google-site-search.gemspec +19 -0
- data/lib/google-site-search.rb +79 -0
- data/lib/google-site-search/result.rb +22 -0
- data/lib/google-site-search/search.rb +80 -0
- data/lib/google-site-search/url_builder.rb +42 -0
- data/lib/google-site-search/version.rb +3 -0
- data/test/test_google_site_search.rb +37 -0
- data/test/test_helper.rb +5 -0
- data/test/test_search.rb +7 -0
- data/test/test_url_builder.rb +13 -0
- metadata +89 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Guardfile
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# A sample Guardfile
|
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
|
3
|
+
|
|
4
|
+
guard 'minitest', :notify => false do
|
|
5
|
+
# with Minitest::Unit
|
|
6
|
+
watch(%r|^test/(.*)\/?test_(.*)\.rb|)
|
|
7
|
+
watch(%r|^lib/(.*)([^/]+)\.rb|) { |m| "test/#{m[1]}test_#{m[2]}.rb" }
|
|
8
|
+
watch(%r|^test/test_helper\.rb|) { "test" }
|
|
9
|
+
|
|
10
|
+
# with Minitest::Spec
|
|
11
|
+
# watch(%r|^spec/(.*)_spec\.rb|)
|
|
12
|
+
# watch(%r|^lib/(.*)([^/]+)\.rb|) { |m| "spec/#{m[1]}#{m[2]}_spec.rb" }
|
|
13
|
+
# watch(%r|^spec/spec_helper\.rb|) { "spec" }
|
|
14
|
+
|
|
15
|
+
# Rails 3.2
|
|
16
|
+
# watch(%r|^app/controllers/(.*)\.rb|) { |m| "test/controllers/#{m[1]}_test.rb" }
|
|
17
|
+
# watch(%r|^app/helpers/(.*)\.rb|) { |m| "test/helpers/#{m[1]}_test.rb" }
|
|
18
|
+
# watch(%r|^app/models/(.*)\.rb|) { |m| "test/unit/#{m[1]}_test.rb" }
|
|
19
|
+
|
|
20
|
+
# Rails
|
|
21
|
+
# watch(%r|^app/controllers/(.*)\.rb|) { |m| "test/functional/#{m[1]}_test.rb" }
|
|
22
|
+
# watch(%r|^app/helpers/(.*)\.rb|) { |m| "test/helpers/#{m[1]}_test.rb" }
|
|
23
|
+
# watch(%r|^app/models/(.*)\.rb|) { |m| "test/unit/#{m[1]}_test.rb" }
|
|
24
|
+
end
|
data/LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Copyright (c) 2012 David Vallance
|
|
2
|
+
|
|
3
|
+
MIT License
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
6
|
+
a copy of this software and associated documentation files (the
|
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
11
|
+
the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be
|
|
14
|
+
included in all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
= Gem: google-site-search
|
|
2
|
+
|
|
3
|
+
== Description
|
|
4
|
+
|
|
5
|
+
This gem was created to aid in the querying and parsing of the {Google Site Search}[http://www.google.com/sitesearch] api.
|
|
6
|
+
|
|
7
|
+
In the simplest use case it will query your google site search for a term and supply you with an object containing the results. However I've built this gem with the intention that you will want to explicitly handle _what_ and _how_ the specific results are stored.
|
|
8
|
+
|
|
9
|
+
== Installation
|
|
10
|
+
|
|
11
|
+
Add the following to your projects Gemfile.
|
|
12
|
+
|
|
13
|
+
gem 'google-site-search', :git => "git@github.com:dvallance/google-site-search.git"
|
|
14
|
+
|
|
15
|
+
Require the code if necessary (_note:_ some frameworks like rails are set to auto-require gems for you by default)
|
|
16
|
+
|
|
17
|
+
require 'google-site-search'
|
|
18
|
+
|
|
19
|
+
== Usage
|
|
20
|
+
|
|
21
|
+
The simpliest way to use the gem is by providing just a *search* *query* *term* and your *search* *engine* *unique* *id* code (_e.g._ looks like this +00255077836266642015+:+u-scht7a-8i+ and is located in your google site search control panel)
|
|
22
|
+
|
|
23
|
+
#just assign the query to an object
|
|
24
|
+
search = GoogleSiteSearch.query(GoogleSiteSearch::UrlBuilder.new("microsoft", "00255077836266642015:u-scht7a-8i")
|
|
25
|
+
|
|
26
|
+
#object has search attributes like
|
|
27
|
+
puts search.next_results_url
|
|
28
|
+
puts search.previous_results_url
|
|
29
|
+
puts search.xml
|
|
30
|
+
puts search.spelling
|
|
31
|
+
puts search.spelling_url
|
|
32
|
+
|
|
33
|
+
#object has an array of each specific result that contains title, description and its link by default
|
|
34
|
+
search.results.each do |result|
|
|
35
|
+
puts result.title
|
|
36
|
+
puts result.description
|
|
37
|
+
puts result.link
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
The _query_ method expects a valid url so if you wanted to supply your own you can! However I have created a builder class to help with proper url creation and to help do some of the work for you.
|
|
41
|
+
|
|
42
|
+
== Advanced Usage
|
|
43
|
+
|
|
44
|
+
An important requirement for this gem was to be able to use {structured data}[https://developers.google.com/custom-search/docs/structured_data] for:
|
|
45
|
+
* querying the search api itself (_i.e._ filtering and sorting )
|
|
46
|
+
* displaying specific information in views (_i.e._ display a specific field like'author', or 'product_type')
|
|
47
|
+
|
|
48
|
+
Therefore I allow the developer to supply his own "*Results*" class to the query and allow them to parse each result xml element explicitly.
|
|
49
|
+
|
|
50
|
+
The default Result class is as follows:
|
|
51
|
+
|
|
52
|
+
class Result
|
|
53
|
+
attr_reader :title, :link, :description
|
|
54
|
+
|
|
55
|
+
def initialize(node)
|
|
56
|
+
@title = node.find_first("T").content
|
|
57
|
+
@link = node.find_first("UE").content
|
|
58
|
+
@description = node.find_first("S").content
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
As you can see it is very simple. Your class simply needs an initialize method that will recieve an xml node, which it can then do with as it pleases. After it is initialized it is added to the _search.results_ array as shown previously.
|
|
63
|
+
|
|
64
|
+
See
|
|
65
|
+
* {libxml-ruby}[http://libxml.rubyforge.org/rdoc/] for help on xml parsing.
|
|
66
|
+
* {Googles Site Search XML API reference}[https://developers.google.com/custom-search/docs/xml_results?hl=en] for the <R> tag which encapsulates the details of an individual search result.
|
|
67
|
+
|
|
68
|
+
== Pagination
|
|
69
|
+
|
|
70
|
+
The google search api actually does the work of pagination for us, supplying the next and previous urls. The urls are relative to \http://www.google.com so I added a _paginate_ method to simplify the call.
|
|
71
|
+
|
|
72
|
+
search2 = GoogleSiteSearch.query(GoogleSiteSearch.paginate(search1.next_results_url))
|
|
73
|
+
|
|
74
|
+
== Pagination Simple Example
|
|
75
|
+
|
|
76
|
+
This works and is fairly straight forward.
|
|
77
|
+
|
|
78
|
+
In your controller:
|
|
79
|
+
|
|
80
|
+
if params[:move]
|
|
81
|
+
@search = GoogleSiteSearch.query(GoogleSiteSearch.paginate(params[:move]))
|
|
82
|
+
else
|
|
83
|
+
@search = GoogleSiteSearch.query(GoogleSiteSearch::UrlBuilder.new("microsoft", "00255077836266642015:u-scht7a-8i", :num => 5))
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
In your view:
|
|
87
|
+
|
|
88
|
+
<% if @search.previous_results_url %>
|
|
89
|
+
<%= link_to "Previous", search_url(:move => @search.previous_results_url) %>
|
|
90
|
+
<% end %>
|
|
91
|
+
<% if @search.next_results_url %>
|
|
92
|
+
<%= link_to "More", search_url(:move => @search.next_results_url) %>
|
|
93
|
+
<% end %>
|
|
94
|
+
|
|
95
|
+
== Escaping
|
|
96
|
+
|
|
97
|
+
If you start passing around the url's in parameters you may run into issues if you don't escape/unescape the url. If so try...
|
|
98
|
+
|
|
99
|
+
View adds escape:
|
|
100
|
+
|
|
101
|
+
<%= link_to "Previous", search_url(:move => CGI::escape(@search.previous_results_url)) %>
|
|
102
|
+
|
|
103
|
+
Controller unescapes:
|
|
104
|
+
|
|
105
|
+
@search = GoogleSiteSearch.query(GoogleSiteSearch.paginate(CGI::unescape(params[:move])))
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
== Filtering and Sorting
|
|
110
|
+
|
|
111
|
+
See {Filtering and sorting search results.}[https://developers.google.com/custom-search/docs/structured_search]
|
|
112
|
+
|
|
113
|
+
=== Filtering
|
|
114
|
+
|
|
115
|
+
Google expects filtering to be on the "search query" itself. However I feel my end users won't and shouldn't be aware of all the possible filtering options (most of my filtering will be based off of dataobject values I supply myself). So I try and keep the filters and actual "search term" separate.
|
|
116
|
+
|
|
117
|
+
From the google reference link above an example filter search query is <b>halloween more:pagemap:document-author:lisamorton</b>
|
|
118
|
+
|
|
119
|
+
#using the example above would look like this.
|
|
120
|
+
search = GoogleSiteSearch.query(GoogleSiteSearch::UrlBuilder.new("halloween", "00255077836266642015:u-scht7a-8i", :filter => "more:pagemap:document-author:lisamorton")
|
|
121
|
+
|
|
122
|
+
=== Separate Search Term From Filters
|
|
123
|
+
|
|
124
|
+
The full "search query" is returned by google's api and stored in the Search object in a few spots. (i.e <i>@search.search_query</i> method and <i>@search.spelling_q</i>).
|
|
125
|
+
|
|
126
|
+
To separate the search term from the filter use:
|
|
127
|
+
|
|
128
|
+
search_term, filters = GoogleSiteSearch.separate_search_term_from_filters(@search.search_query)
|
|
129
|
+
|
|
130
|
+
=== Sorting
|
|
131
|
+
|
|
132
|
+
Sorting would also be done by specifing a *sort* option.
|
|
133
|
+
|
|
134
|
+
search = GoogleSiteSearch.query(GoogleSiteSearch::UrlBuilder.new("halloween", "00255077836266642015:u-scht7a-8i", :filter => "more:pagemap:document-author:lisamorton", :sort => "data-sdate")
|
|
135
|
+
|
|
136
|
+
== Other Params
|
|
137
|
+
|
|
138
|
+
Any <b>[param=value]</b> query string additions you want to add can be assigned like the sorting above. For example to limit the search results return, to 5, would look like...
|
|
139
|
+
|
|
140
|
+
#get only 5 search results with the filtering and sorting from above still applyed.
|
|
141
|
+
search = GoogleSiteSearch.query(GoogleSiteSearch::UrlBuilder.new("halloween more:pagemap:document-author:lisamorton", "00255077836266642015:u-scht7a-8i", :sort => "date-sdate", :num => "5" )
|
|
142
|
+
|
|
143
|
+
== Author
|
|
144
|
+
|
|
145
|
+
David Vallance
|
data/Rakefile
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
require File.expand_path('../lib/google-site-search/version', __FILE__)
|
|
3
|
+
|
|
4
|
+
Gem::Specification.new do |gem|
|
|
5
|
+
gem.authors = ["David Vallance"]
|
|
6
|
+
gem.email = ["davevallance@gmail.com"]
|
|
7
|
+
gem.description = %q{A gem to aid in the consumption of the google site search service; querys the service, populates a result object and has some related helper methods.}
|
|
8
|
+
gem.summary = gem.description
|
|
9
|
+
gem.homepage = "https://github.com/dvallance/google-site-search"
|
|
10
|
+
|
|
11
|
+
gem.files = `git ls-files`.split($\)
|
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
|
14
|
+
gem.name = "google-site-search"
|
|
15
|
+
gem.require_paths = ["lib"]
|
|
16
|
+
gem.version = GoogleSiteSearch::VERSION
|
|
17
|
+
gem.add_dependency("activesupport")
|
|
18
|
+
gem.add_dependency("libxml-ruby")
|
|
19
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
require "active_support/core_ext/object/to_query"
|
|
2
|
+
require "active_support/core_ext/hash/indifferent_access"
|
|
3
|
+
require "active_support/core_ext/object/try"
|
|
4
|
+
require "active_support/core_ext/object/blank"
|
|
5
|
+
require "google-site-search/version"
|
|
6
|
+
require "google-site-search/url_builder"
|
|
7
|
+
require "google-site-search/search"
|
|
8
|
+
require "google-site-search/result"
|
|
9
|
+
require "timeout"
|
|
10
|
+
require "net/http"
|
|
11
|
+
require "uri"
|
|
12
|
+
require "xml"
|
|
13
|
+
|
|
14
|
+
##
|
|
15
|
+
# A module to help query and parse the google site search api.
|
|
16
|
+
#
|
|
17
|
+
module GoogleSiteSearch
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
GOOGLE_SEARCH_URL = "http://www.google.com"
|
|
21
|
+
DEFAULT_PARAMS = {
|
|
22
|
+
:client => "google-csbe",
|
|
23
|
+
:output => "xml_no_dtd",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
class << self
|
|
27
|
+
|
|
28
|
+
# Expects the URL returned by Search#next_results_url or Search#previous_results_url.
|
|
29
|
+
def paginate url
|
|
30
|
+
GOOGLE_SEARCH_URL + url.to_s
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# See Search - This is a convienence method for creating and querying.
|
|
34
|
+
def query url, result_class = Result
|
|
35
|
+
Search.new(url, result_class).query
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def query_multiple url, result_class = Result, times
|
|
39
|
+
searchs = []
|
|
40
|
+
while times > 0
|
|
41
|
+
times -= 1
|
|
42
|
+
url = paginate(searchs.last.try(:next_results_url)) unless searchs.empty?
|
|
43
|
+
searchs << Search.new(url, result_class).query
|
|
44
|
+
end
|
|
45
|
+
searchs
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Makes a request to the google search api and returns the xml response as a string.
|
|
49
|
+
def request_xml url
|
|
50
|
+
response = nil
|
|
51
|
+
begin
|
|
52
|
+
::Timeout::timeout(2) do
|
|
53
|
+
response = Net::HTTP.get_response(URI.parse(url.to_s))
|
|
54
|
+
end
|
|
55
|
+
rescue Errno::ECONNREFUSED => err
|
|
56
|
+
rescue URI::InvalidURIError => err
|
|
57
|
+
puts "URI Error [#{url}]"
|
|
58
|
+
rescue => err
|
|
59
|
+
puts "ERROR #{err.class}"
|
|
60
|
+
end
|
|
61
|
+
response.body if response && response.code == "200"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Google returns a result link as an absolute but you may
|
|
65
|
+
# want a relative version.
|
|
66
|
+
def relative_path path
|
|
67
|
+
uri = URI.parse(path)
|
|
68
|
+
uri.relative? ? path : [uri.path,uri.query].compact.join("?")
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Google's api will give back a full query which has the filter options on it. I like to deal with them separately so this method breaks them up.
|
|
72
|
+
def separate_search_term_from_filters(string)
|
|
73
|
+
match = /\smore:p.*/.match(string)
|
|
74
|
+
return [string, nil] if match.nil?
|
|
75
|
+
return [match.pre_match, match[0]]
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module GoogleSiteSearch
|
|
2
|
+
|
|
3
|
+
# A default class that parses a result element from
|
|
4
|
+
# Googles search API.
|
|
5
|
+
#
|
|
6
|
+
# See {LibXML Ruby's Node}[http://libxml.rubyforge.org/rdoc/classes/LibXML/XML/Node.html] when writing your own Result class.
|
|
7
|
+
class Result
|
|
8
|
+
attr_reader :title, :link, :description
|
|
9
|
+
|
|
10
|
+
# ==== Attributes
|
|
11
|
+
#
|
|
12
|
+
# * +node+ - LibXML::XML::Node.
|
|
13
|
+
def initialize(node)
|
|
14
|
+
@title = node.find_first("T").content
|
|
15
|
+
|
|
16
|
+
# Fully qualified URL to the result.
|
|
17
|
+
@link = node.find_first("UE").content
|
|
18
|
+
|
|
19
|
+
@description = node.find_first("S").content
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
module GoogleSiteSearch
|
|
2
|
+
|
|
3
|
+
# Search is responsible for parsing the returned xml from
|
|
4
|
+
# google's API.
|
|
5
|
+
#
|
|
6
|
+
# XML parsing is done using {LibXML Ruby}[http://libxml.rubyforge.org/rdoc/]
|
|
7
|
+
class Search
|
|
8
|
+
# Goolge Site Search API url.
|
|
9
|
+
attr_reader :url
|
|
10
|
+
# Array of *result_class* objects.
|
|
11
|
+
attr_reader :results
|
|
12
|
+
# Spelling suggestion in HTML format.
|
|
13
|
+
attr_reader :spelling
|
|
14
|
+
# Spelling suggestion URL escaped.
|
|
15
|
+
attr_reader :spelling_q
|
|
16
|
+
# Pulled from the XML as the estimated total number of results.
|
|
17
|
+
# *Note* Google themselves say this may not be accurate.
|
|
18
|
+
attr_reader :estimated_results_total
|
|
19
|
+
# Relative URL to get the next set of results (if any).
|
|
20
|
+
attr_reader :next_results_url
|
|
21
|
+
# Relative URL to get the previous set of results (if any).
|
|
22
|
+
attr_reader :previous_results_url
|
|
23
|
+
# String of the xml returned by Google.
|
|
24
|
+
attr_reader :xml
|
|
25
|
+
# Class supplied which is responsible for parsing each
|
|
26
|
+
# individual result from the API XML.
|
|
27
|
+
attr_reader :result_class
|
|
28
|
+
# The full search term + filters query google interpreted from
|
|
29
|
+
# the url supplied.
|
|
30
|
+
attr_reader :search_query
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ==== Attributes
|
|
34
|
+
#
|
|
35
|
+
# * +url+ - Expects a fully qualified url to Googles search API
|
|
36
|
+
# (can be a string or from an objects to_s method).
|
|
37
|
+
# * +reulst_class+ - A class that's initialize method is expected
|
|
38
|
+
# to handle the parsing of an individual result entry.
|
|
39
|
+
def initialize url, result_class
|
|
40
|
+
@url = url.to_s
|
|
41
|
+
@results = Array.new
|
|
42
|
+
@result_class = result_class
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Query's Google API, stores the xml and parses values into itself.
|
|
46
|
+
def query
|
|
47
|
+
@xml = GoogleSiteSearch::request_xml(url)
|
|
48
|
+
parse_xml unless @xml.nil?
|
|
49
|
+
self
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def results
|
|
53
|
+
@results || []
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
def parse_xml
|
|
60
|
+
begin
|
|
61
|
+
doc = ::XML::Parser.string(@xml).parse
|
|
62
|
+
doc.find("//GSP/RES/R").each do |result_node|
|
|
63
|
+
@results << result_class.new(result_node)
|
|
64
|
+
#puts "*** attribute = #{result_node.find_first("PageMap/DataObject[@type='metatags']/Attribute[@name='title']").attributes[:value] }"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
spelling_node = doc.find_first("Spelling/Suggestion")
|
|
68
|
+
@spelling = spelling_node.try(:content)
|
|
69
|
+
@spelling_q = spelling_node.try(:attributes).try(:[],:q)
|
|
70
|
+
@estimated_results_total = doc.find_first("RES/M").try(:content)
|
|
71
|
+
@next_results_url = doc.find_first("RES/NB/NU").try(:content)
|
|
72
|
+
@previous_results_url = doc.find_first("RES/NB/PU").try(:content)
|
|
73
|
+
@search_query = doc.find_first("Q").try(:content)
|
|
74
|
+
|
|
75
|
+
rescue => err
|
|
76
|
+
puts "parse_xml error #{err.message}"
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
module GoogleSiteSearch
|
|
2
|
+
|
|
3
|
+
# UrlBuilder is responsible for generating a valid url for querying
|
|
4
|
+
# the google search API.
|
|
5
|
+
class UrlBuilder
|
|
6
|
+
attr_accessor :search_term, :filter, :custom_params, :search_engine_id, :sort
|
|
7
|
+
|
|
8
|
+
# ==== Attributes
|
|
9
|
+
#
|
|
10
|
+
# * +search_term+ - should be a simple search with *no* *filters* added.
|
|
11
|
+
#
|
|
12
|
+
# Even though Googles search API uses filters on the search term
|
|
13
|
+
# itself, I wanted to separate out that functionality.
|
|
14
|
+
# Filters should be added separately as a param.
|
|
15
|
+
#
|
|
16
|
+
# * +search_engine_id+ - your unique id found in your Google Site Search
|
|
17
|
+
# control panel.
|
|
18
|
+
#
|
|
19
|
+
# * +params+ - supply a hash that is converted to query params. See
|
|
20
|
+
# {Request Params}[https://developers.google.com/custom-search/docs/xml_results?hl=en#wsRequestParameters].
|
|
21
|
+
def initialize search_term, search_engine_id, params = {}
|
|
22
|
+
raise ArgumentError if search_term.blank? || search_engine_id.blank?
|
|
23
|
+
@search_term = search_term
|
|
24
|
+
@search_engine_id = search_engine_id
|
|
25
|
+
@custom_params = params.with_indifferent_access
|
|
26
|
+
@filter = @custom_params.delete :filter
|
|
27
|
+
@sort = @custom_params.delete :sort
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Joins the search term and the filters, to get the full search query that google expects.
|
|
31
|
+
def query
|
|
32
|
+
[@search_term,@filter].compact.join(" ")
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Returns a fully qualified URL for the Google search API.
|
|
36
|
+
def url
|
|
37
|
+
"#{GOOGLE_SEARCH_URL}/cse?#{@custom_params.merge(DEFAULT_PARAMS).merge(:q => query, :cx => @search_engine_id, :sort => @sort).delete_if{|k,v| v.nil?}.to_query}"
|
|
38
|
+
end
|
|
39
|
+
alias :to_s :url
|
|
40
|
+
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
require_relative 'test_helper'
|
|
2
|
+
|
|
3
|
+
describe GoogleSiteSearch do
|
|
4
|
+
|
|
5
|
+
describe '#.relative_path' do
|
|
6
|
+
|
|
7
|
+
it 'raises InvalidURLError if a nil is given' do
|
|
8
|
+
-> {GoogleSiteSearch.relative_path(nil)}.must_raise URI::InvalidURIError
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
describe 'given a relative path' do
|
|
12
|
+
|
|
13
|
+
it 'returns the path given' do
|
|
14
|
+
GoogleSiteSearch.relative_path("/somepath").must_equal "/somepath"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
describe 'given an absolute url' do
|
|
20
|
+
|
|
21
|
+
it 'with just the domain a root path will be returned' do
|
|
22
|
+
GoogleSiteSearch.relative_path("http://www.somesite.com/").must_equal "/"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it 'with a domain and path given the path will be returned' do
|
|
26
|
+
GoogleSiteSearch.relative_path("http://www.somesite.com/my-test").must_equal "/my-test"
|
|
27
|
+
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it 'with a query string the path and query will be returned' do
|
|
31
|
+
GoogleSiteSearch.relative_path("http://www.somesite.com/my-test?something=value").must_equal "/my-test?something=value"
|
|
32
|
+
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
data/test/test_helper.rb
ADDED
data/test/test_search.rb
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
require_relative 'test_helper'
|
|
2
|
+
|
|
3
|
+
describe UrlBuilder do
|
|
4
|
+
it "#initialize expects valid arguments" do
|
|
5
|
+
-> {GoogleSiteSearch::UrlBuilder.new(nil, nil)}.must_raise ArgumentError
|
|
6
|
+
-> {GoogleSiteSearch::UrlBuilder.new("","")}.must_raise ArgumentError
|
|
7
|
+
-> {GoogleSiteSearch::UrlBuilder.new("string", nil)}.must_raise ArgumentError
|
|
8
|
+
-> {GoogleSiteSearch::UrlBuilder.new(nil, "string")}.must_raise ArgumentError
|
|
9
|
+
GoogleSiteSearch::UrlBuilder.new("string", "string").must_be_instance_of UrlBuilder
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
end
|
|
13
|
+
|
metadata
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: google-site-search
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.2
|
|
5
|
+
prerelease:
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- David Vallance
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2012-10-25 00:00:00.000000000 Z
|
|
13
|
+
dependencies:
|
|
14
|
+
- !ruby/object:Gem::Dependency
|
|
15
|
+
name: activesupport
|
|
16
|
+
requirement: &7621180 !ruby/object:Gem::Requirement
|
|
17
|
+
none: false
|
|
18
|
+
requirements:
|
|
19
|
+
- - ! '>='
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '0'
|
|
22
|
+
type: :runtime
|
|
23
|
+
prerelease: false
|
|
24
|
+
version_requirements: *7621180
|
|
25
|
+
- !ruby/object:Gem::Dependency
|
|
26
|
+
name: libxml-ruby
|
|
27
|
+
requirement: &7620760 !ruby/object:Gem::Requirement
|
|
28
|
+
none: false
|
|
29
|
+
requirements:
|
|
30
|
+
- - ! '>='
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '0'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: *7620760
|
|
36
|
+
description: A gem to aid in the consumption of the google site search service; querys
|
|
37
|
+
the service, populates a result object and has some related helper methods.
|
|
38
|
+
email:
|
|
39
|
+
- davevallance@gmail.com
|
|
40
|
+
executables: []
|
|
41
|
+
extensions: []
|
|
42
|
+
extra_rdoc_files: []
|
|
43
|
+
files:
|
|
44
|
+
- .gitignore
|
|
45
|
+
- Gemfile
|
|
46
|
+
- Guardfile
|
|
47
|
+
- LICENSE
|
|
48
|
+
- README.rdoc
|
|
49
|
+
- Rakefile
|
|
50
|
+
- google-site-search.gemspec
|
|
51
|
+
- lib/google-site-search.rb
|
|
52
|
+
- lib/google-site-search/result.rb
|
|
53
|
+
- lib/google-site-search/search.rb
|
|
54
|
+
- lib/google-site-search/url_builder.rb
|
|
55
|
+
- lib/google-site-search/version.rb
|
|
56
|
+
- test/test_google_site_search.rb
|
|
57
|
+
- test/test_helper.rb
|
|
58
|
+
- test/test_search.rb
|
|
59
|
+
- test/test_url_builder.rb
|
|
60
|
+
homepage: https://github.com/dvallance/google-site-search
|
|
61
|
+
licenses: []
|
|
62
|
+
post_install_message:
|
|
63
|
+
rdoc_options: []
|
|
64
|
+
require_paths:
|
|
65
|
+
- lib
|
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
67
|
+
none: false
|
|
68
|
+
requirements:
|
|
69
|
+
- - ! '>='
|
|
70
|
+
- !ruby/object:Gem::Version
|
|
71
|
+
version: '0'
|
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
73
|
+
none: false
|
|
74
|
+
requirements:
|
|
75
|
+
- - ! '>='
|
|
76
|
+
- !ruby/object:Gem::Version
|
|
77
|
+
version: '0'
|
|
78
|
+
requirements: []
|
|
79
|
+
rubyforge_project:
|
|
80
|
+
rubygems_version: 1.8.10
|
|
81
|
+
signing_key:
|
|
82
|
+
specification_version: 3
|
|
83
|
+
summary: A gem to aid in the consumption of the google site search service; querys
|
|
84
|
+
the service, populates a result object and has some related helper methods.
|
|
85
|
+
test_files:
|
|
86
|
+
- test/test_google_site_search.rb
|
|
87
|
+
- test/test_helper.rb
|
|
88
|
+
- test/test_search.rb
|
|
89
|
+
- test/test_url_builder.rb
|