crown 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/ChangeLog +4 -0
  2. data/README.rdoc +14 -11
  3. data/VERSION +1 -1
  4. data/crown.gemspec +15 -5
  5. data/example/entrylist.rb +69 -0
  6. data/example/fbcount.rb +1 -1
  7. data/example/hbentry.rb +1 -1
  8. data/example/rtcount.rb +1 -0
  9. data/example/twcount.rb +1 -1
  10. data/example/{annual.rb → urilist.rb} +21 -26
  11. data/lib/crown.rb +1 -1
  12. data/{example/hbtrace.rb → lib/crown/amazon.rb} +8 -23
  13. data/lib/crown/amazon/crawler.rb +159 -0
  14. data/lib/crown/amazon/ecs.rb +385 -0
  15. data/lib/crown/amazon/entrylist.rb +171 -0
  16. data/lib/crown/backtype.rb +2 -2
  17. data/lib/crown/buzzurl.rb +2 -2
  18. data/lib/crown/cgm.rb +8 -0
  19. data/lib/crown/cgm/countable.rb +1 -1
  20. data/lib/crown/cgm/summarizable.rb +1 -1
  21. data/lib/crown/delicious.rb +2 -2
  22. data/lib/crown/facebook.rb +4 -4
  23. data/lib/crown/facebook/entry.rb +5 -3
  24. data/lib/crown/google.rb +38 -0
  25. data/lib/crown/google/plusone.rb +65 -0
  26. data/lib/crown/google/plusone/counter.rb +102 -0
  27. data/lib/crown/hatena/bookmark.rb +7 -7
  28. data/lib/crown/hatena/bookmark/entry.rb +70 -68
  29. data/lib/crown/hatena/bookmark/entrylist.rb +98 -0
  30. data/lib/crown/hatena/bookmark/urilist.rb +349 -0
  31. data/lib/crown/http-wrapper.rb +0 -1
  32. data/lib/crown/linkedin.rb +60 -0
  33. data/lib/crown/linkedin/counter.rb +81 -0
  34. data/lib/crown/livedoor/clip.rb +2 -2
  35. data/lib/crown/livedoor/clip/counter.rb +1 -1
  36. data/lib/crown/livedoor/reader.rb +2 -2
  37. data/lib/crown/topsy.rb +2 -3
  38. data/lib/crown/tweetmeme.rb +2 -2
  39. data/lib/crown/twitter.rb +1 -1
  40. data/lib/crown/twitter/uri.rb +2 -2
  41. data/lib/crown/twitter/user.rb +4 -4
  42. data/lib/crown/twitter/user/entry.rb +26 -6
  43. data/lib/crown/yahoo/bookmark.rb +3 -7
  44. data/test/crown-test.rb +34 -12
  45. metadata +17 -7
  46. data/lib/crown/hatena/bookmark/linktrace.rb +0 -135
@@ -0,0 +1,385 @@
1
+ #--
2
+ # Copyright (c) 2010 Herryanto Siatono
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'net/http'
25
+ require 'nokogiri'
26
+ require 'cgi'
27
+ require 'hmac-sha2'
28
+ require 'base64'
29
+ require 'openssl'
30
+
31
+ module Amazon
32
+ class RequestError < StandardError; end
33
+
34
+ class Ecs
35
+ SERVICE_URLS = {
36
+ :us => 'http://ecs.amazonaws.com/onca/xml',
37
+ :uk => 'http://ecs.amazonaws.co.uk/onca/xml',
38
+ :ca => 'http://ecs.amazonaws.ca/onca/xml',
39
+ :de => 'http://ecs.amazonaws.de/onca/xml',
40
+ :jp => 'http://ecs.amazonaws.jp/onca/xml',
41
+ :fr => 'http://ecs.amazonaws.fr/onca/xml',
42
+ :it => 'http://webservices.amazon.it/onca/xml',
43
+ :cn => 'http://webservices.amazon.cn/onca/xml'
44
+ }
45
+
46
+ OPENSSL_DIGEST_SUPPORT = OpenSSL::Digest.constants.include?( 'SHA256' ) ||
47
+ OpenSSL::Digest.constants.include?( :SHA256 )
48
+
49
+ OPENSSL_DIGEST = OpenSSL::Digest::Digest.new( 'sha256' ) if OPENSSL_DIGEST_SUPPORT
50
+
51
+ @@options = {
52
+ :version => "2011-08-01",
53
+ :service => "AWSECommerceService"
54
+ }
55
+
56
+ @@debug = false
57
+
58
+ # Default search options
59
+ def self.options
60
+ @@options
61
+ end
62
+
63
+ # Set default search options
64
+ def self.options=(opts)
65
+ @@options = opts
66
+ end
67
+
68
+ # Get debug flag.
69
+ def self.debug
70
+ @@debug
71
+ end
72
+
73
+ # Set debug flag to true or false.
74
+ def self.debug=(dbg)
75
+ @@debug = dbg
76
+ end
77
+
78
+ def self.configure(&proc)
79
+ raise ArgumentError, "Block is required." unless block_given?
80
+ yield @@options
81
+ end
82
+
83
+ # Search amazon items with search terms. Default search index option is 'Books'.
84
+ # For other search type other than keywords, please specify :type => [search type param name].
85
+ def self.item_search(terms, opts = {})
86
+ opts[:operation] = 'ItemSearch'
87
+ opts[:search_index] = opts[:search_index] || 'Books'
88
+
89
+ type = opts.delete(:type)
90
+ if type
91
+ opts[type.to_sym] = terms
92
+ else
93
+ opts[:keywords] = terms
94
+ end
95
+
96
+ self.send_request(opts)
97
+ end
98
+
99
+ # Search an item by ASIN no.
100
+ def self.item_lookup(item_id, opts = {})
101
+ opts[:operation] = 'ItemLookup'
102
+ opts[:item_id] = item_id
103
+
104
+ self.send_request(opts)
105
+ end
106
+
107
+ # Search a browse node by BrowseNodeId
108
+ def self.browse_node_lookup(browse_node_id, opts = {})
109
+ opts[:operation] = 'BrowseNodeLookup'
110
+ opts[:browse_node_id] = browse_node_id
111
+
112
+ self.send_request(opts)
113
+ end
114
+
115
+ # Generic send request to ECS REST service. You have to specify the :operation parameter.
116
+ def self.send_request(opts)
117
+ opts = self.options.merge(opts) if self.options
118
+
119
+ # Include other required options
120
+ opts[:timestamp] = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
121
+
122
+ request_url = prepare_url(opts)
123
+ log "Request URL: #{request_url}"
124
+
125
+ res = Net::HTTP.get_response(URI::parse(request_url))
126
+ unless res.kind_of? Net::HTTPSuccess
127
+ raise Amazon::RequestError, "HTTP Response: #{res.code} #{res.message}"
128
+ end
129
+ Response.new(res.body)
130
+ end
131
+
132
+ def self.validate_request(opts)
133
+ raise Amazon::RequestError, "" if opts[:associate_tag]
134
+ end
135
+
136
+ # Response object returned after a REST call to Amazon service.
137
+ class Response
138
+
139
+ # XML input is in string format
140
+ def initialize(xml)
141
+ @doc = Nokogiri::XML(xml, 'nul', 'UTF-8')
142
+ @doc.remove_namespaces!
143
+ # @doc.xpath("//*").each { |elem| elem.name = elem.name.downcase }
144
+ # @doc.xpath("//@*").each { |att| att.name = att.name.downcase }
145
+ end
146
+
147
+ # Return Nokogiri::XML::Document object.
148
+ def doc
149
+ @doc
150
+ end
151
+
152
+ # Return true if request is valid.
153
+ def is_valid_request?
154
+ Element.get(@doc, "//IsValid") == "True"
155
+ end
156
+
157
+ # Return true if response has an error.
158
+ def has_error?
159
+ !(error.nil? || error.empty?)
160
+ end
161
+
162
+ # Return error message.
163
+ def error
164
+ Element.get(@doc, "//Error/Message")
165
+ end
166
+
167
+ # Return error code
168
+ def error_code
169
+ Element.get(@doc, "//Error/Code")
170
+ end
171
+
172
+ # Return an array of Amazon::Element item objects.
173
+ def items
174
+ @items ||= (@doc/"Item").collect { |item| Element.new(item) }
175
+ end
176
+
177
+ # Return the first item (Amazon::Element)
178
+ def first_item
179
+ items.first
180
+ end
181
+
182
+ # Return current page no if :item_page option is when initiating the request.
183
+ def item_page
184
+ @item_page ||= Element.get(@doc, "//ItemPage").to_i
185
+ end
186
+
187
+ # Return total results.
188
+ def total_results
189
+ @total_results ||= Element.get(@doc, "//TotalResults").to_i
190
+ end
191
+
192
+ # Return total pages.
193
+ def total_pages
194
+ @total_pages ||= Element.get(@doc, "//TotalPages").to_i
195
+ end
196
+
197
+ def marshal_dump
198
+ @doc.to_s
199
+ end
200
+
201
+ def marshal_load(xml)
202
+ initialize(xml)
203
+ end
204
+ end
205
+
206
+ protected
207
+ def self.log(s)
208
+ return unless self.debug
209
+ if defined? RAILS_DEFAULT_LOGGER
210
+ RAILS_DEFAULT_LOGGER.error(s)
211
+ elsif defined? LOGGER
212
+ LOGGER.error(s)
213
+ else
214
+ puts s
215
+ end
216
+ end
217
+
218
+ private
219
+ def self.prepare_url(opts)
220
+ country = opts.delete(:country)
221
+ country = (country.nil?) ? 'us' : country
222
+ request_url = SERVICE_URLS[country.to_sym]
223
+ raise Amazon::RequestError, "Invalid country '#{country}'" unless request_url
224
+
225
+ secret_key = opts.delete(:AWS_secret_key)
226
+ request_host = URI.parse(request_url).host
227
+
228
+ qs = ''
229
+
230
+ opts = opts.collect do |a,b|
231
+ [camelize(a.to_s), b.to_s]
232
+ end
233
+
234
+ opts = opts.sort do |c,d|
235
+ c[0].to_s <=> d[0].to_s
236
+ end
237
+
238
+ opts.each do |e|
239
+ log "Adding #{e[0]}=#{e[1]}"
240
+ next unless e[1]
241
+ e[1] = e[1].join(',') if e[1].is_a? Array
242
+ # v = URI.encode(e[1].to_s, Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))
243
+ v = self.url_encode(e[1].to_s)
244
+ qs << "&" unless qs.length == 0
245
+ qs << "#{e[0]}=#{v}"
246
+ end
247
+
248
+ signature = ''
249
+ unless secret_key.nil?
250
+ request_to_sign="GET\n#{request_host}\n/onca/xml\n#{qs}"
251
+ signature = "&Signature=#{sign_request(request_to_sign, secret_key)}"
252
+ end
253
+
254
+ "#{request_url}?#{qs}#{signature}"
255
+ end
256
+
257
+ def self.url_encode(string)
258
+ string.gsub( /([^a-zA-Z0-9_.~-]+)/ ) do
259
+ '%' + $1.unpack( 'H2' * $1.bytesize ).join( '%' ).upcase
260
+ end
261
+ end
262
+
263
+ def self.camelize(s)
264
+ s.to_s.gsub(/\/(.?)/) { "::" + $1.upcase }.gsub(/(^|_)(.)/) { $2.upcase }
265
+ end
266
+
267
+ def self.sign_request(url, key)
268
+ return nil if key.nil?
269
+
270
+ if (OPENSSL_DIGEST_SUPPORT)
271
+ signature = OpenSSL::HMAC.digest(OPENSSL_DIGEST, key, url)
272
+ signature = [signature].pack('m').chomp
273
+ else
274
+ signature = Base64.encode64( HMAC::SHA256.digest(key, url) ).strip
275
+ end
276
+ signature = URI.escape(signature, Regexp.new("[+=]"))
277
+ return signature
278
+ end
279
+ end
280
+
281
+ # Internal wrapper class to provide convenient method to access Nokogiri element value.
282
+ class Element
283
+ class << self
284
+ # Return the text value of an element.
285
+ def get(element, path='.')
286
+ return unless element
287
+ result = element.at_xpath(path)
288
+ result = result.inner_html if result
289
+ result
290
+ end
291
+
292
+ # Return an unescaped text value of an element.
293
+ def get_unescaped(element, path='.')
294
+ result = self.get(element, path)
295
+ CGI::unescapeHTML(result) if result
296
+ end
297
+
298
+ # Return an array of values based on the given path.
299
+ def get_array(element, path='.')
300
+ return unless element
301
+
302
+ result = element/path
303
+ if (result.is_a? Nokogiri::XML::NodeSet) || (result.is_a? Array)
304
+ result.collect { |item| self.get(item) }
305
+ else
306
+ [self.get(result)]
307
+ end
308
+ end
309
+
310
+ # Return child element text values of the given path.
311
+ def get_hash(element, path='.')
312
+ return unless element
313
+
314
+ result = element.at_xpath(path)
315
+ if result
316
+ hash = {}
317
+ result = result.children
318
+ result.each do |item|
319
+ hash[item.name] = item.inner_html
320
+ end
321
+ hash
322
+ end
323
+ end
324
+ end
325
+
326
+ # Pass Nokogiri::XML::Element object
327
+ def initialize(element)
328
+ @element = element
329
+ end
330
+
331
+ # Returns Nokogiri::XML::Element object
332
+ def elem
333
+ @element
334
+ end
335
+
336
+ # Returns a Nokogiri::XML::NodeSet of elements matching the given path. Example: element/"author".
337
+ def /(path)
338
+ elements = @element/path
339
+ return nil if elements.size == 0
340
+ elements
341
+ end
342
+
343
+ # Return an array of Amazon::Element matching the given path
344
+ def get_elements(path)
345
+ elements = self./(path)
346
+ return unless elements
347
+ elements = elements.map{|element| Element.new(element)}
348
+ end
349
+
350
+ # Similar with search_and_convert but always return first element if more than one elements found
351
+ def get_element(path)
352
+ elements = get_elements(path)
353
+ elements[0] if elements
354
+ end
355
+
356
+ # Get the text value of the given path, leave empty to retrieve current element value.
357
+ def get(path='.')
358
+ Element.get(@element, path)
359
+ end
360
+
361
+ # Get the unescaped HTML text of the given path.
362
+ def get_unescaped(path='.')
363
+ Element.get_unescaped(@element, path)
364
+ end
365
+
366
+ # Get the array values of the given path.
367
+ def get_array(path='.')
368
+ Element.get_array(@element, path)
369
+ end
370
+
371
+ # Get the children element text values in hash format with the element names as the hash keys.
372
+ def get_hash(path='.')
373
+ Element.get_hash(@element, path)
374
+ end
375
+
376
+ def attributes
377
+ return unless self.elem
378
+ self.elem.attributes
379
+ end
380
+
381
+ def to_s
382
+ elem.to_s if elem
383
+ end
384
+ end
385
+ end
@@ -0,0 +1,171 @@
1
+ # -*- coding: utf-8 -*-
2
+ # --------------------------------------------------------------------------- #
3
+ #
4
+ # amazon/entrylist.rb
5
+ #
6
+ # Copyright (c) 2008 - 2012, clown.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions
10
+ # are met:
11
+ #
12
+ # - Redistributions of source code must retain the above copyright
13
+ # notice, this list of conditions and the following disclaimer.
14
+ # - Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ # - No names of its contributors may be used to endorse or promote
18
+ # products derived from this software without specific prior written
19
+ # permission.
20
+ #
21
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ #
33
+ # --------------------------------------------------------------------------- #
34
+ module Crown
35
+ module Amazon
36
+ # ------------------------------------------------------------------- #
37
+ #
38
+ # EntryList
39
+ #
40
+ # 指定した URI の a タグ href 属性から Amazon の ASIN と思われる
41
+ # 情報を抽出し,Amazon の商品情報を取得するクラス.
42
+ #
43
+ # ------------------------------------------------------------------- #
44
+ class EntryList
45
+ require 'uri'
46
+ require 'crown/http-wrapper'
47
+ require 'crown/amazon/ecs'
48
+
49
+ # --------------------------------------------------------------- #
50
+ # accessors
51
+ # --------------------------------------------------------------- #
52
+ attr_accessor :interval
53
+
54
+ # --------------------------------------------------------------- #
55
+ # initialize
56
+ # --------------------------------------------------------------- #
57
+ def initialize()
58
+ @interval = 2
59
+ end
60
+
61
+ # --------------------------------------------------------------- #
62
+ # EntryList.get
63
+ # --------------------------------------------------------------- #
64
+ def EntryList.get(uri, options = {}, &block)
65
+ return Crown::Amazon::EntryList.new.get(uri, options, &block)
66
+ end
67
+
68
+ # --------------------------------------------------------------- #
69
+ # asin
70
+ # --------------------------------------------------------------- #
71
+ def asin(uri, options = {})
72
+ parser = URI.parse(uri.strip)
73
+ path = parser.path
74
+ path += '?' + parser.query if (parser.query != nil)
75
+
76
+ proxy_addr = nil
77
+ proxy_port = nil
78
+ if (options.class == Hash)
79
+ proxy_addr = options[:proxy_address] if (options.has_key?(:proxy_address))
80
+ proxy_port = options[:proxy_port] if (options.has_key?(:proxy_port))
81
+ end
82
+
83
+ result = Array.new
84
+ Crown::HTTPWrapper.start(parser.host, parser.port, proxy_addr, proxy_port) { |session|
85
+ response = session.get(path)
86
+ return [] if (response == nil || response.code.to_i != 200)
87
+
88
+ html = Nokogiri::HTML.parse(response.body)
89
+ html.search('a').each { |node|
90
+ next if (node['href'] == nil)
91
+
92
+ begin
93
+ parser = URI.parse(node['href'].strip)
94
+ rescue URI::InvalidURIError
95
+ parser = URI.parse(URI.encode(node['href'].strip))
96
+ end
97
+ next if (parser == nil || parser.host == nil || parser.path == nil)
98
+
99
+ if (parser.host.match(/^(?:www\.)?amazon\.(?:com|ca|co\.uk|de|co\.jp|jp|fr|cn)$/) != nil)
100
+ asin = guess_asin(parser.path, parser.query)
101
+ if (asin != nil && !result.include?(asin))
102
+ yield asin if (block_given?)
103
+ result.push(asin)
104
+ end
105
+ end
106
+ }
107
+ }
108
+ end
109
+
110
+ # --------------------------------------------------------------- #
111
+ # get
112
+ # --------------------------------------------------------------- #
113
+ def get(uri, options = {})
114
+ result = Array.new
115
+ asin(uri, options) { |asin|
116
+ entry = ::Amazon::Ecs.item_lookup(asin, options)
117
+ if (entry != nil && entry.items.length > 0)
118
+ yield entry.first_item if (block_given?)
119
+ result.push(entry)
120
+ end
121
+ sleep(@interval)
122
+ }
123
+
124
+ return result
125
+ end
126
+
127
+ private
128
+ # --------------------------------------------------------------- #
129
+ #
130
+ # get_asin
131
+ #
132
+ # パスおよびクエリーから ASIN を抽出する.推測方法は,
133
+ # /[B0123489][A-Z0-9]{9}/ にマッチする文字列を探すと言う方法を
134
+ # 採用している.
135
+ #
136
+ # --------------------------------------------------------------- #
137
+ def guess_asin(path, query)
138
+ if (path != nil)
139
+ path.scan(/[B0123489][A-Z0-9]{9}/) { |asin|
140
+ return asin if (asin[0].chr == 'B' || check_digit(asin))
141
+ }
142
+ end
143
+
144
+ if (query != nil)
145
+ asin = query.match(/[B0123489][A-Z0-9]{9}/)
146
+ return asin[0] if (asin != nil)
147
+ end
148
+
149
+ return nil
150
+ end
151
+
152
+ # --------------------------------------------------------------- #
153
+ #
154
+ # check_digit
155
+ #
156
+ # ISBN-10 のチェックディジット計算して,有効な ISBN-10 の値か
157
+ # どうか判定する.
158
+ #
159
+ # --------------------------------------------------------------- #
160
+ def check_digit(asin)
161
+ sum = 0
162
+ (0..8).each { |i|
163
+ sum += (10 - i) * asin[i].chr.to_i
164
+ }
165
+ check = 11 - (sum % 11)
166
+ check = (check < 10) ? check.to_s : ((check == 10) ? 'X' : '0')
167
+ return check == asin[9].chr
168
+ end
169
+ end # EntryList
170
+ end # Amazon
171
+ end # Crown