google_rest 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/README +22 -0
  2. data/init.rb +3 -0
  3. data/lib/google_rest.rb +231 -0
  4. metadata +82 -0
data/README ADDED
@@ -0,0 +1,22 @@
1
+ GoogleRest
2
+ ==========
3
+
4
+ A plugin to manipulate some Google REST API. For now it partially support:
5
+ * Google Ajax Feed Api (find a feed, retrieve articles from a feed)
6
+ * Google Search API (inbound links, number of indexed page)
7
+
8
+ Setup
9
+ =====
10
+
11
+ You must add a config/google_rest.yml with your api key and referer (requested by Google Terms of use)
12
+ Ex config/google_rest.yml
13
+ ---
14
+ api_key: XXXXX
15
+ referer: http://www.mydomain.com
16
+
17
+
18
+ Example
19
+ =======
20
+
21
+
22
+ Copyright (c) 2008 Olivier Ruffin, http://www.veilleperso.com , released under the MIT license
data/init.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'httparty'
2
+ require 'json'
3
+ require "google_rest"
@@ -0,0 +1,231 @@
1
+ require 'json/add/rails'
2
+
3
+ module GoogleRest
4
+ PER_PAGE = 8
5
+
6
+ class Results
7
+
8
+ attr_accessor :raw, :results, :cursor
9
+
10
+ def initialize(raw)
11
+ self.raw = raw
12
+ data = (raw || {})
13
+ self.cursor = data["cursor"]
14
+ self.results = data["results"] || []
15
+ end
16
+
17
+ def empty?
18
+ raw.blank?
19
+ end
20
+
21
+ def each
22
+ results.each { |r| yield r }
23
+ end
24
+
25
+ def map
26
+ results.map { |r| yield r }
27
+ end
28
+
29
+ def page
30
+ cursor["currentPageIndex"].blank? ? 1 : cursor["currentPageIndex"]
31
+ end
32
+
33
+ def pages
34
+ (cursor["pages"] || [])
35
+ end
36
+
37
+ def total_pages
38
+ (cursor["pages"] || []).length
39
+ end
40
+
41
+ def total_entries
42
+ if pages.last
43
+ pages.last["start"].to_i + GoogleRest::PER_PAGE - 1
44
+ else
45
+ 0
46
+ end
47
+ end
48
+
49
+ def per_page
50
+ 8
51
+ end
52
+
53
+ def paginated(current_page = nil)
54
+ results.paginate(:page => current_page || page, :per_page => per_page, :total_entries => total_entries)
55
+ end
56
+
57
+ end
58
+
59
+ class Request
60
+ if defined?(HTTParty)
61
+ include HTTParty
62
+ base_uri "http://ajax.googleapis.com/ajax/services"
63
+ format :json
64
+ parser Proc.new { |data, format|
65
+ begin
66
+ Crack::JSON.parse(data)
67
+ rescue Crack::ParseError => err
68
+ ActiveSupport::JSON.decode(data)
69
+ end
70
+ }
71
+ end
72
+
73
+ attr_accessor :api_key
74
+ attr_accessor :referer
75
+ attr_accessor :userip
76
+
77
+ @@ascii_available = "".respond_to?(:to_ascii)
78
+
79
+ API_VERSION = "1.0"
80
+ API_URL = {
81
+ :feed_lookup => "/feed/lookup",
82
+ :feed_load => "/feed/load",
83
+ :web => "/search/web",
84
+ :blog => "/search/blogs",
85
+ :news => "/search/news",
86
+ :image => "/search/images",
87
+ :local => "/search/local",
88
+ :video => "/search/video"
89
+ }
90
+
91
+ def initialize(userip = nil)
92
+ path = Rails.root.join("config/google_rest.yml")
93
+ if !File.exists?(path)
94
+ raise StandardError, "Missing config file: #{path}"
95
+ else
96
+ config = YAML.load_file(path) || {}
97
+ data = config[Rails.env.to_s] || config
98
+ self.api_key = data['api_key']
99
+ self.referer = data['referer']
100
+ self.userip = userip
101
+ end
102
+ end
103
+
104
+ def feed_lookup(website)
105
+ return nil if website.blank?
106
+ res = google_request(:feed_lookup, {:q => website.to_s.gsub(/^https?:\/\//i, '')})
107
+ res.empty? ? nil : res.raw["url"]
108
+ end
109
+
110
+ def feed_load(feed_url, count_entries = false)
111
+ return nil if feed_url.blank?
112
+ if count_entries == false
113
+ # we retrieve the current feed entries
114
+ res = google_request(:feed_load, {:q => feed_url, :num => -1})
115
+ else
116
+ res = google_request(:feed_load, {:q => feed_url, :num => count_entries, :scoring => 'h'} )
117
+ end
118
+ res.empty? ? nil : res.raw["feed"]
119
+ end
120
+
121
+ def blog_search(query, options = {})
122
+ common_search(:blog, {:scoring => 'd', :rsz => 'large', :q => query}.merge(options))
123
+ end
124
+
125
+ def web_search(query, options = {})
126
+ common_search(:web, {:rsz => 'large', :q => query}.merge(options))
127
+ end
128
+
129
+ def news_search(query, options = {})
130
+ common_search(:news, {:rsz => 'large', :scoring => 'd', :q => query}.merge(options))
131
+ end
132
+
133
+ def image_search(query, options = {})
134
+ common_search(:image, {:q => query}.merge(options))
135
+ end
136
+
137
+ def video_search(query, options = {})
138
+ common_search(:video, {:q => query}.merge(options))
139
+ end
140
+
141
+ def local_search(query, options = {})
142
+ common_search(:local, {:q => query}.merge(options))
143
+ end
144
+
145
+ def inbound_links(url)
146
+ res = common_search(:web, {:q => "link:#{url.gsub(/^https?:\/\//,'')}"})
147
+ res.cursor.blank? ? 0 : res.cursor["estimatedResultCount"].to_i
148
+ end
149
+
150
+ def indexed_pages(url)
151
+ res = common_search(:web, {:q => "site:#{url.gsub(/^https?:\/\//,'')}"})
152
+ res.cursor.blank? ? 0 : res.cursor["estimatedResultCount"].to_i
153
+ end
154
+
155
+ def api_key?
156
+ !self.api_key.blank?
157
+ end
158
+
159
+ def userip?
160
+ !self.userip.blank?
161
+ end
162
+
163
+ private
164
+ def common_search(type, query = {})
165
+ if !(lang=query.delete(:lang)).blank?
166
+ lang.downcase!
167
+ if type == :news
168
+ query.reverse_merge!(:hl => lang, :ned => lang)
169
+ else
170
+ query.reverse_merge!(:hl => lang, :lr => "lang_#{lang}")
171
+ end
172
+ end
173
+ query.delete_if {|k,v| v.blank?}
174
+ google_request(type, query)
175
+ end
176
+
177
+ def google_request(type, query = {})
178
+ # HTTParty now use Crack instead of ActiveSupport::JSON to do the decoding
179
+ # But it doesn't seems to work properly with some Google Results so we hack it to not parse
180
+ # google results and we decode them by ourselves
181
+ begin
182
+ Timeout::timeout(15) do
183
+ no_escape = query.delete(:no_escape)
184
+ query[:v] = API_VERSION
185
+ query[:key] = api_key if api_key?
186
+ query[:userip] = userip if userip?
187
+ self.class.headers({'Referer' => self.referer})
188
+ res = self.class.get(API_URL[type], :query => query)
189
+ if !res["responseData"].blank?
190
+ GoogleRest::Results.new(no_escape ? res["responseData"] : Util.json_recursive_unescape(res["responseData"]))
191
+ else
192
+ GoogleRest::Results.new(nil)
193
+ end
194
+ end
195
+ rescue Timeout::Error => err
196
+ GoogleRest::Results.new(nil)
197
+ end
198
+ end
199
+
200
+ module Util
201
+ JSON_ESCAPE = { '&' => '\u0026', '>' => '\u003E', '<' => '\u003C', '=' => '\u003D' }
202
+
203
+ # A utility method for unescaping HTML entities in JSON strings.
204
+ # puts json_unescape("\u003E 0 \u0026 a \u003C 10?")
205
+ # # => is a > 0 & a < 10?
206
+ def json_unescape(s)
207
+ JSON_ESCAPE.inject(s.to_s) { |str, (k,v)| str.gsub!(/#{Regexp.escape(v)}/i, k); str }
208
+ end
209
+
210
+ # A utility method for escaping HTML entities in JSON strings.
211
+ # puts json_escape("is a > 0 & a < 10?")
212
+ # # => is a \u003E 0 \u0026 a \u003C 10?
213
+ def json_escape(s)
214
+ s.to_s.gsub(/[&"><]/) { |special| JSON_ESCAPE[special] }
215
+ end
216
+
217
+ # A utility method for unescaping recursively HTML entities in JSON array or hash.
218
+ def json_recursive_unescape(data)
219
+ case data
220
+ when String : json_unescape(data)
221
+ when Hash : data.inject({}) { |hsh, (k,v)| hsh[k] = json_recursive_unescape(v);hsh }
222
+ when Array : data.collect { |v| json_recursive_unescape(v) }
223
+ end
224
+ end
225
+
226
+ module_function :json_escape
227
+ module_function :json_unescape
228
+ module_function :json_recursive_unescape
229
+ end
230
+ end
231
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: google_rest
3
+ version: !ruby/object:Gem::Version
4
+ hash: 23
5
+ prerelease:
6
+ segments:
7
+ - 1
8
+ - 0
9
+ - 0
10
+ version: 1.0.0
11
+ platform: ruby
12
+ authors:
13
+ - Olivier Ruffin
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-17 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: httparty
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Wrapper around google rest api
36
+ email: olivier@muweb.fr
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files: []
42
+
43
+ files:
44
+ - README
45
+ - init.rb
46
+ - lib/google_rest.rb
47
+ has_rdoc: true
48
+ homepage: https://github.com/veilleperso/google_rest
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options: []
53
+
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ hash: 3
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ requirements: []
75
+
76
+ rubyforge_project:
77
+ rubygems_version: 1.4.2
78
+ signing_key:
79
+ specification_version: 3
80
+ summary: Google REST API
81
+ test_files: []
82
+