crown 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/ChangeLog +4 -0
  2. data/README.rdoc +14 -11
  3. data/VERSION +1 -1
  4. data/crown.gemspec +15 -5
  5. data/example/entrylist.rb +69 -0
  6. data/example/fbcount.rb +1 -1
  7. data/example/hbentry.rb +1 -1
  8. data/example/rtcount.rb +1 -0
  9. data/example/twcount.rb +1 -1
  10. data/example/{annual.rb → urilist.rb} +21 -26
  11. data/lib/crown.rb +1 -1
  12. data/{example/hbtrace.rb → lib/crown/amazon.rb} +8 -23
  13. data/lib/crown/amazon/crawler.rb +159 -0
  14. data/lib/crown/amazon/ecs.rb +385 -0
  15. data/lib/crown/amazon/entrylist.rb +171 -0
  16. data/lib/crown/backtype.rb +2 -2
  17. data/lib/crown/buzzurl.rb +2 -2
  18. data/lib/crown/cgm.rb +8 -0
  19. data/lib/crown/cgm/countable.rb +1 -1
  20. data/lib/crown/cgm/summarizable.rb +1 -1
  21. data/lib/crown/delicious.rb +2 -2
  22. data/lib/crown/facebook.rb +4 -4
  23. data/lib/crown/facebook/entry.rb +5 -3
  24. data/lib/crown/google.rb +38 -0
  25. data/lib/crown/google/plusone.rb +65 -0
  26. data/lib/crown/google/plusone/counter.rb +102 -0
  27. data/lib/crown/hatena/bookmark.rb +7 -7
  28. data/lib/crown/hatena/bookmark/entry.rb +70 -68
  29. data/lib/crown/hatena/bookmark/entrylist.rb +98 -0
  30. data/lib/crown/hatena/bookmark/urilist.rb +349 -0
  31. data/lib/crown/http-wrapper.rb +0 -1
  32. data/lib/crown/linkedin.rb +60 -0
  33. data/lib/crown/linkedin/counter.rb +81 -0
  34. data/lib/crown/livedoor/clip.rb +2 -2
  35. data/lib/crown/livedoor/clip/counter.rb +1 -1
  36. data/lib/crown/livedoor/reader.rb +2 -2
  37. data/lib/crown/topsy.rb +2 -3
  38. data/lib/crown/tweetmeme.rb +2 -2
  39. data/lib/crown/twitter.rb +1 -1
  40. data/lib/crown/twitter/uri.rb +2 -2
  41. data/lib/crown/twitter/user.rb +4 -4
  42. data/lib/crown/twitter/user/entry.rb +26 -6
  43. data/lib/crown/yahoo/bookmark.rb +3 -7
  44. data/test/crown-test.rb +34 -12
  45. metadata +17 -7
  46. data/lib/crown/hatena/bookmark/linktrace.rb +0 -135
@@ -0,0 +1,385 @@
1
+ #--
2
+ # Copyright (c) 2010 Herryanto Siatono
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'net/http'
25
+ require 'nokogiri'
26
+ require 'cgi'
27
+ require 'hmac-sha2'
28
+ require 'base64'
29
+ require 'openssl'
30
+
31
+ module Amazon
32
+ class RequestError < StandardError; end
33
+
34
+ class Ecs
35
+ SERVICE_URLS = {
36
+ :us => 'http://ecs.amazonaws.com/onca/xml',
37
+ :uk => 'http://ecs.amazonaws.co.uk/onca/xml',
38
+ :ca => 'http://ecs.amazonaws.ca/onca/xml',
39
+ :de => 'http://ecs.amazonaws.de/onca/xml',
40
+ :jp => 'http://ecs.amazonaws.jp/onca/xml',
41
+ :fr => 'http://ecs.amazonaws.fr/onca/xml',
42
+ :it => 'http://webservices.amazon.it/onca/xml',
43
+ :cn => 'http://webservices.amazon.cn/onca/xml'
44
+ }
45
+
46
+ OPENSSL_DIGEST_SUPPORT = OpenSSL::Digest.constants.include?( 'SHA256' ) ||
47
+ OpenSSL::Digest.constants.include?( :SHA256 )
48
+
49
+ OPENSSL_DIGEST = OpenSSL::Digest::Digest.new( 'sha256' ) if OPENSSL_DIGEST_SUPPORT
50
+
51
+ @@options = {
52
+ :version => "2011-08-01",
53
+ :service => "AWSECommerceService"
54
+ }
55
+
56
+ @@debug = false
57
+
58
+ # Default search options
59
+ def self.options
60
+ @@options
61
+ end
62
+
63
+ # Set default search options
64
+ def self.options=(opts)
65
+ @@options = opts
66
+ end
67
+
68
+ # Get debug flag.
69
+ def self.debug
70
+ @@debug
71
+ end
72
+
73
+ # Set debug flag to true or false.
74
+ def self.debug=(dbg)
75
+ @@debug = dbg
76
+ end
77
+
78
+ def self.configure(&proc)
79
+ raise ArgumentError, "Block is required." unless block_given?
80
+ yield @@options
81
+ end
82
+
83
+ # Search amazon items with search terms. Default search index option is 'Books'.
84
+ # For other search type other than keywords, please specify :type => [search type param name].
85
+ def self.item_search(terms, opts = {})
86
+ opts[:operation] = 'ItemSearch'
87
+ opts[:search_index] = opts[:search_index] || 'Books'
88
+
89
+ type = opts.delete(:type)
90
+ if type
91
+ opts[type.to_sym] = terms
92
+ else
93
+ opts[:keywords] = terms
94
+ end
95
+
96
+ self.send_request(opts)
97
+ end
98
+
99
+ # Search an item by ASIN no.
100
+ def self.item_lookup(item_id, opts = {})
101
+ opts[:operation] = 'ItemLookup'
102
+ opts[:item_id] = item_id
103
+
104
+ self.send_request(opts)
105
+ end
106
+
107
+ # Search a browse node by BrowseNodeId
108
+ def self.browse_node_lookup(browse_node_id, opts = {})
109
+ opts[:operation] = 'BrowseNodeLookup'
110
+ opts[:browse_node_id] = browse_node_id
111
+
112
+ self.send_request(opts)
113
+ end
114
+
115
+ # Generic send request to ECS REST service. You have to specify the :operation parameter.
116
+ def self.send_request(opts)
117
+ opts = self.options.merge(opts) if self.options
118
+
119
+ # Include other required options
120
+ opts[:timestamp] = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
121
+
122
+ request_url = prepare_url(opts)
123
+ log "Request URL: #{request_url}"
124
+
125
+ res = Net::HTTP.get_response(URI::parse(request_url))
126
+ unless res.kind_of? Net::HTTPSuccess
127
+ raise Amazon::RequestError, "HTTP Response: #{res.code} #{res.message}"
128
+ end
129
+ Response.new(res.body)
130
+ end
131
+
132
+ def self.validate_request(opts)
133
+ raise Amazon::RequestError, "" if opts[:associate_tag]
134
+ end
135
+
136
+ # Response object returned after a REST call to Amazon service.
137
+ class Response
138
+
139
+ # XML input is in string format
140
+ def initialize(xml)
141
+ @doc = Nokogiri::XML(xml, 'nul', 'UTF-8')
142
+ @doc.remove_namespaces!
143
+ # @doc.xpath("//*").each { |elem| elem.name = elem.name.downcase }
144
+ # @doc.xpath("//@*").each { |att| att.name = att.name.downcase }
145
+ end
146
+
147
+ # Return Nokogiri::XML::Document object.
148
+ def doc
149
+ @doc
150
+ end
151
+
152
+ # Return true if request is valid.
153
+ def is_valid_request?
154
+ Element.get(@doc, "//IsValid") == "True"
155
+ end
156
+
157
+ # Return true if response has an error.
158
+ def has_error?
159
+ !(error.nil? || error.empty?)
160
+ end
161
+
162
+ # Return error message.
163
+ def error
164
+ Element.get(@doc, "//Error/Message")
165
+ end
166
+
167
+ # Return error code
168
+ def error_code
169
+ Element.get(@doc, "//Error/Code")
170
+ end
171
+
172
+ # Return an array of Amazon::Element item objects.
173
+ def items
174
+ @items ||= (@doc/"Item").collect { |item| Element.new(item) }
175
+ end
176
+
177
+ # Return the first item (Amazon::Element)
178
+ def first_item
179
+ items.first
180
+ end
181
+
182
+ # Return current page no if :item_page option is when initiating the request.
183
+ def item_page
184
+ @item_page ||= Element.get(@doc, "//ItemPage").to_i
185
+ end
186
+
187
+ # Return total results.
188
+ def total_results
189
+ @total_results ||= Element.get(@doc, "//TotalResults").to_i
190
+ end
191
+
192
+ # Return total pages.
193
+ def total_pages
194
+ @total_pages ||= Element.get(@doc, "//TotalPages").to_i
195
+ end
196
+
197
+ def marshal_dump
198
+ @doc.to_s
199
+ end
200
+
201
+ def marshal_load(xml)
202
+ initialize(xml)
203
+ end
204
+ end
205
+
206
+ protected
207
+ def self.log(s)
208
+ return unless self.debug
209
+ if defined? RAILS_DEFAULT_LOGGER
210
+ RAILS_DEFAULT_LOGGER.error(s)
211
+ elsif defined? LOGGER
212
+ LOGGER.error(s)
213
+ else
214
+ puts s
215
+ end
216
+ end
217
+
218
+ private
219
+ def self.prepare_url(opts)
220
+ country = opts.delete(:country)
221
+ country = (country.nil?) ? 'us' : country
222
+ request_url = SERVICE_URLS[country.to_sym]
223
+ raise Amazon::RequestError, "Invalid country '#{country}'" unless request_url
224
+
225
+ secret_key = opts.delete(:AWS_secret_key)
226
+ request_host = URI.parse(request_url).host
227
+
228
+ qs = ''
229
+
230
+ opts = opts.collect do |a,b|
231
+ [camelize(a.to_s), b.to_s]
232
+ end
233
+
234
+ opts = opts.sort do |c,d|
235
+ c[0].to_s <=> d[0].to_s
236
+ end
237
+
238
+ opts.each do |e|
239
+ log "Adding #{e[0]}=#{e[1]}"
240
+ next unless e[1]
241
+ e[1] = e[1].join(',') if e[1].is_a? Array
242
+ # v = URI.encode(e[1].to_s, Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))
243
+ v = self.url_encode(e[1].to_s)
244
+ qs << "&" unless qs.length == 0
245
+ qs << "#{e[0]}=#{v}"
246
+ end
247
+
248
+ signature = ''
249
+ unless secret_key.nil?
250
+ request_to_sign="GET\n#{request_host}\n/onca/xml\n#{qs}"
251
+ signature = "&Signature=#{sign_request(request_to_sign, secret_key)}"
252
+ end
253
+
254
+ "#{request_url}?#{qs}#{signature}"
255
+ end
256
+
257
+ def self.url_encode(string)
258
+ string.gsub( /([^a-zA-Z0-9_.~-]+)/ ) do
259
+ '%' + $1.unpack( 'H2' * $1.bytesize ).join( '%' ).upcase
260
+ end
261
+ end
262
+
263
+ def self.camelize(s)
264
+ s.to_s.gsub(/\/(.?)/) { "::" + $1.upcase }.gsub(/(^|_)(.)/) { $2.upcase }
265
+ end
266
+
267
+ def self.sign_request(url, key)
268
+ return nil if key.nil?
269
+
270
+ if (OPENSSL_DIGEST_SUPPORT)
271
+ signature = OpenSSL::HMAC.digest(OPENSSL_DIGEST, key, url)
272
+ signature = [signature].pack('m').chomp
273
+ else
274
+ signature = Base64.encode64( HMAC::SHA256.digest(key, url) ).strip
275
+ end
276
+ signature = URI.escape(signature, Regexp.new("[+=]"))
277
+ return signature
278
+ end
279
+ end
280
+
281
+ # Internal wrapper class to provide convenient method to access Nokogiri element value.
282
+ class Element
283
+ class << self
284
+ # Return the text value of an element.
285
+ def get(element, path='.')
286
+ return unless element
287
+ result = element.at_xpath(path)
288
+ result = result.inner_html if result
289
+ result
290
+ end
291
+
292
+ # Return an unescaped text value of an element.
293
+ def get_unescaped(element, path='.')
294
+ result = self.get(element, path)
295
+ CGI::unescapeHTML(result) if result
296
+ end
297
+
298
+ # Return an array of values based on the given path.
299
+ def get_array(element, path='.')
300
+ return unless element
301
+
302
+ result = element/path
303
+ if (result.is_a? Nokogiri::XML::NodeSet) || (result.is_a? Array)
304
+ result.collect { |item| self.get(item) }
305
+ else
306
+ [self.get(result)]
307
+ end
308
+ end
309
+
310
+ # Return child element text values of the given path.
311
+ def get_hash(element, path='.')
312
+ return unless element
313
+
314
+ result = element.at_xpath(path)
315
+ if result
316
+ hash = {}
317
+ result = result.children
318
+ result.each do |item|
319
+ hash[item.name] = item.inner_html
320
+ end
321
+ hash
322
+ end
323
+ end
324
+ end
325
+
326
+ # Pass Nokogiri::XML::Element object
327
+ def initialize(element)
328
+ @element = element
329
+ end
330
+
331
+ # Returns Nokogiri::XML::Element object
332
+ def elem
333
+ @element
334
+ end
335
+
336
+ # Returns a Nokogiri::XML::NodeSet of elements matching the given path. Example: element/"author".
337
+ def /(path)
338
+ elements = @element/path
339
+ return nil if elements.size == 0
340
+ elements
341
+ end
342
+
343
+ # Return an array of Amazon::Element matching the given path
344
+ def get_elements(path)
345
+ elements = self./(path)
346
+ return unless elements
347
+ elements = elements.map{|element| Element.new(element)}
348
+ end
349
+
350
+ # Similar with search_and_convert but always return first element if more than one elements found
351
+ def get_element(path)
352
+ elements = get_elements(path)
353
+ elements[0] if elements
354
+ end
355
+
356
+ # Get the text value of the given path, leave empty to retrieve current element value.
357
+ def get(path='.')
358
+ Element.get(@element, path)
359
+ end
360
+
361
+ # Get the unescaped HTML text of the given path.
362
+ def get_unescaped(path='.')
363
+ Element.get_unescaped(@element, path)
364
+ end
365
+
366
+ # Get the array values of the given path.
367
+ def get_array(path='.')
368
+ Element.get_array(@element, path)
369
+ end
370
+
371
+ # Get the children element text values in hash format with the element names as the hash keys.
372
+ def get_hash(path='.')
373
+ Element.get_hash(@element, path)
374
+ end
375
+
376
+ def attributes
377
+ return unless self.elem
378
+ self.elem.attributes
379
+ end
380
+
381
+ def to_s
382
+ elem.to_s if elem
383
+ end
384
+ end
385
+ end
@@ -0,0 +1,171 @@
1
+ # -*- coding: utf-8 -*-
2
+ # --------------------------------------------------------------------------- #
3
+ #
4
+ # amazon/entrylist.rb
5
+ #
6
+ # Copyright (c) 2008 - 2012, clown.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions
10
+ # are met:
11
+ #
12
+ # - Redistributions of source code must retain the above copyright
13
+ # notice, this list of conditions and the following disclaimer.
14
+ # - Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ # - No names of its contributors may be used to endorse or promote
18
+ # products derived from this software without specific prior written
19
+ # permission.
20
+ #
21
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ #
33
+ # --------------------------------------------------------------------------- #
34
+ module Crown
35
+ module Amazon
36
+ # ------------------------------------------------------------------- #
37
+ #
38
+ # EntryList
39
+ #
40
+ # 指定した URI の a タグ href 属性から Amazon の ASIN と思われる
41
+ # 情報を抽出し,Amazon の商品情報を取得するクラス.
42
+ #
43
+ # ------------------------------------------------------------------- #
44
+ class EntryList
45
+ require 'uri'
46
+ require 'crown/http-wrapper'
47
+ require 'crown/amazon/ecs'
48
+
49
+ # --------------------------------------------------------------- #
50
+ # accessors
51
+ # --------------------------------------------------------------- #
52
+ attr_accessor :interval
53
+
54
+ # --------------------------------------------------------------- #
55
+ # initialize
56
+ # --------------------------------------------------------------- #
57
+ def initialize()
58
+ @interval = 2
59
+ end
60
+
61
+ # --------------------------------------------------------------- #
62
+ # EntryList.get
63
+ # --------------------------------------------------------------- #
64
+ def EntryList.get(uri, options = {}, &block)
65
+ return Crown::Amazon::EntryList.new.get(uri, options, &block)
66
+ end
67
+
68
+ # --------------------------------------------------------------- #
69
+ # asin
70
+ # --------------------------------------------------------------- #
71
+ def asin(uri, options = {})
72
+ parser = URI.parse(uri.strip)
73
+ path = parser.path
74
+ path += '?' + parser.query if (parser.query != nil)
75
+
76
+ proxy_addr = nil
77
+ proxy_port = nil
78
+ if (options.class == Hash)
79
+ proxy_addr = options[:proxy_address] if (options.has_key?(:proxy_address))
80
+ proxy_port = options[:proxy_port] if (options.has_key?(:proxy_port))
81
+ end
82
+
83
+ result = Array.new
84
+ Crown::HTTPWrapper.start(parser.host, parser.port, proxy_addr, proxy_port) { |session|
85
+ response = session.get(path)
86
+ return [] if (response == nil || response.code.to_i != 200)
87
+
88
+ html = Nokogiri::HTML.parse(response.body)
89
+ html.search('a').each { |node|
90
+ next if (node['href'] == nil)
91
+
92
+ begin
93
+ parser = URI.parse(node['href'].strip)
94
+ rescue URI::InvalidURIError
95
+ parser = URI.parse(URI.encode(node['href'].strip))
96
+ end
97
+ next if (parser == nil || parser.host == nil || parser.path == nil)
98
+
99
+ if (parser.host.match(/^(?:www\.)?amazon\.(?:com|ca|co\.uk|de|co\.jp|jp|fr|cn)$/) != nil)
100
+ asin = guess_asin(parser.path, parser.query)
101
+ if (asin != nil && !result.include?(asin))
102
+ yield asin if (block_given?)
103
+ result.push(asin)
104
+ end
105
+ end
106
+ }
107
+ }
108
+ end
109
+
110
+ # --------------------------------------------------------------- #
111
+ # get
112
+ # --------------------------------------------------------------- #
113
+ def get(uri, options = {})
114
+ result = Array.new
115
+ asin(uri, options) { |asin|
116
+ entry = ::Amazon::Ecs.item_lookup(asin, options)
117
+ if (entry != nil && entry.items.length > 0)
118
+ yield entry.first_item if (block_given?)
119
+ result.push(entry)
120
+ end
121
+ sleep(@interval)
122
+ }
123
+
124
+ return result
125
+ end
126
+
127
+ private
128
+ # --------------------------------------------------------------- #
129
+ #
130
+ # get_asin
131
+ #
132
+ # パスおよびクエリーから ASIN を抽出する.推測方法は,
133
+ # /[B0123489][A-Z0-9]{9}/ にマッチする文字列を探すと言う方法を
134
+ # 採用している.
135
+ #
136
+ # --------------------------------------------------------------- #
137
+ def guess_asin(path, query)
138
+ if (path != nil)
139
+ path.scan(/[B0123489][A-Z0-9]{9}/) { |asin|
140
+ return asin if (asin[0].chr == 'B' || check_digit(asin))
141
+ }
142
+ end
143
+
144
+ if (query != nil)
145
+ asin = query.match(/[B0123489][A-Z0-9]{9}/)
146
+ return asin[0] if (asin != nil)
147
+ end
148
+
149
+ return nil
150
+ end
151
+
152
+ # --------------------------------------------------------------- #
153
+ #
154
+ # check_digit
155
+ #
156
+ # ISBN-10 のチェックディジット計算して,有効な ISBN-10 の値か
157
+ # どうか判定する.
158
+ #
159
+ # --------------------------------------------------------------- #
160
+ def check_digit(asin)
161
+ sum = 0
162
+ (0..8).each { |i|
163
+ sum += (10 - i) * asin[i].chr.to_i
164
+ }
165
+ check = 11 - (sum % 11)
166
+ check = (check < 10) ? check.to_s : ((check == 10) ? 'X' : '0')
167
+ return check == asin[9].chr
168
+ end
169
+ end # EntryList
170
+ end # Amazon
171
+ end # Crown