crown 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +4 -0
- data/README.rdoc +14 -11
- data/VERSION +1 -1
- data/crown.gemspec +15 -5
- data/example/entrylist.rb +69 -0
- data/example/fbcount.rb +1 -1
- data/example/hbentry.rb +1 -1
- data/example/rtcount.rb +1 -0
- data/example/twcount.rb +1 -1
- data/example/{annual.rb → urilist.rb} +21 -26
- data/lib/crown.rb +1 -1
- data/{example/hbtrace.rb → lib/crown/amazon.rb} +8 -23
- data/lib/crown/amazon/crawler.rb +159 -0
- data/lib/crown/amazon/ecs.rb +385 -0
- data/lib/crown/amazon/entrylist.rb +171 -0
- data/lib/crown/backtype.rb +2 -2
- data/lib/crown/buzzurl.rb +2 -2
- data/lib/crown/cgm.rb +8 -0
- data/lib/crown/cgm/countable.rb +1 -1
- data/lib/crown/cgm/summarizable.rb +1 -1
- data/lib/crown/delicious.rb +2 -2
- data/lib/crown/facebook.rb +4 -4
- data/lib/crown/facebook/entry.rb +5 -3
- data/lib/crown/google.rb +38 -0
- data/lib/crown/google/plusone.rb +65 -0
- data/lib/crown/google/plusone/counter.rb +102 -0
- data/lib/crown/hatena/bookmark.rb +7 -7
- data/lib/crown/hatena/bookmark/entry.rb +70 -68
- data/lib/crown/hatena/bookmark/entrylist.rb +98 -0
- data/lib/crown/hatena/bookmark/urilist.rb +349 -0
- data/lib/crown/http-wrapper.rb +0 -1
- data/lib/crown/linkedin.rb +60 -0
- data/lib/crown/linkedin/counter.rb +81 -0
- data/lib/crown/livedoor/clip.rb +2 -2
- data/lib/crown/livedoor/clip/counter.rb +1 -1
- data/lib/crown/livedoor/reader.rb +2 -2
- data/lib/crown/topsy.rb +2 -3
- data/lib/crown/tweetmeme.rb +2 -2
- data/lib/crown/twitter.rb +1 -1
- data/lib/crown/twitter/uri.rb +2 -2
- data/lib/crown/twitter/user.rb +4 -4
- data/lib/crown/twitter/user/entry.rb +26 -6
- data/lib/crown/yahoo/bookmark.rb +3 -7
- data/test/crown-test.rb +34 -12
- metadata +17 -7
- data/lib/crown/hatena/bookmark/linktrace.rb +0 -135
@@ -0,0 +1,385 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2010 Herryanto Siatono
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'net/http'
|
25
|
+
require 'nokogiri'
|
26
|
+
require 'cgi'
|
27
|
+
require 'hmac-sha2'
|
28
|
+
require 'base64'
|
29
|
+
require 'openssl'
|
30
|
+
|
31
|
+
module Amazon
|
32
|
+
class RequestError < StandardError; end
|
33
|
+
|
34
|
+
class Ecs
|
35
|
+
SERVICE_URLS = {
|
36
|
+
:us => 'http://ecs.amazonaws.com/onca/xml',
|
37
|
+
:uk => 'http://ecs.amazonaws.co.uk/onca/xml',
|
38
|
+
:ca => 'http://ecs.amazonaws.ca/onca/xml',
|
39
|
+
:de => 'http://ecs.amazonaws.de/onca/xml',
|
40
|
+
:jp => 'http://ecs.amazonaws.jp/onca/xml',
|
41
|
+
:fr => 'http://ecs.amazonaws.fr/onca/xml',
|
42
|
+
:it => 'http://webservices.amazon.it/onca/xml',
|
43
|
+
:cn => 'http://webservices.amazon.cn/onca/xml'
|
44
|
+
}
|
45
|
+
|
46
|
+
OPENSSL_DIGEST_SUPPORT = OpenSSL::Digest.constants.include?( 'SHA256' ) ||
|
47
|
+
OpenSSL::Digest.constants.include?( :SHA256 )
|
48
|
+
|
49
|
+
OPENSSL_DIGEST = OpenSSL::Digest::Digest.new( 'sha256' ) if OPENSSL_DIGEST_SUPPORT
|
50
|
+
|
51
|
+
@@options = {
|
52
|
+
:version => "2011-08-01",
|
53
|
+
:service => "AWSECommerceService"
|
54
|
+
}
|
55
|
+
|
56
|
+
@@debug = false
|
57
|
+
|
58
|
+
# Default search options
|
59
|
+
def self.options
|
60
|
+
@@options
|
61
|
+
end
|
62
|
+
|
63
|
+
# Set default search options
|
64
|
+
def self.options=(opts)
|
65
|
+
@@options = opts
|
66
|
+
end
|
67
|
+
|
68
|
+
# Get debug flag.
|
69
|
+
def self.debug
|
70
|
+
@@debug
|
71
|
+
end
|
72
|
+
|
73
|
+
# Set debug flag to true or false.
|
74
|
+
def self.debug=(dbg)
|
75
|
+
@@debug = dbg
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.configure(&proc)
|
79
|
+
raise ArgumentError, "Block is required." unless block_given?
|
80
|
+
yield @@options
|
81
|
+
end
|
82
|
+
|
83
|
+
# Search amazon items with search terms. Default search index option is 'Books'.
|
84
|
+
# For other search type other than keywords, please specify :type => [search type param name].
|
85
|
+
def self.item_search(terms, opts = {})
|
86
|
+
opts[:operation] = 'ItemSearch'
|
87
|
+
opts[:search_index] = opts[:search_index] || 'Books'
|
88
|
+
|
89
|
+
type = opts.delete(:type)
|
90
|
+
if type
|
91
|
+
opts[type.to_sym] = terms
|
92
|
+
else
|
93
|
+
opts[:keywords] = terms
|
94
|
+
end
|
95
|
+
|
96
|
+
self.send_request(opts)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Search an item by ASIN no.
|
100
|
+
def self.item_lookup(item_id, opts = {})
|
101
|
+
opts[:operation] = 'ItemLookup'
|
102
|
+
opts[:item_id] = item_id
|
103
|
+
|
104
|
+
self.send_request(opts)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Search a browse node by BrowseNodeId
|
108
|
+
def self.browse_node_lookup(browse_node_id, opts = {})
|
109
|
+
opts[:operation] = 'BrowseNodeLookup'
|
110
|
+
opts[:browse_node_id] = browse_node_id
|
111
|
+
|
112
|
+
self.send_request(opts)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Generic send request to ECS REST service. You have to specify the :operation parameter.
|
116
|
+
def self.send_request(opts)
|
117
|
+
opts = self.options.merge(opts) if self.options
|
118
|
+
|
119
|
+
# Include other required options
|
120
|
+
opts[:timestamp] = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
|
121
|
+
|
122
|
+
request_url = prepare_url(opts)
|
123
|
+
log "Request URL: #{request_url}"
|
124
|
+
|
125
|
+
res = Net::HTTP.get_response(URI::parse(request_url))
|
126
|
+
unless res.kind_of? Net::HTTPSuccess
|
127
|
+
raise Amazon::RequestError, "HTTP Response: #{res.code} #{res.message}"
|
128
|
+
end
|
129
|
+
Response.new(res.body)
|
130
|
+
end
|
131
|
+
|
132
|
+
def self.validate_request(opts)
|
133
|
+
raise Amazon::RequestError, "" if opts[:associate_tag]
|
134
|
+
end
|
135
|
+
|
136
|
+
# Response object returned after a REST call to Amazon service.
|
137
|
+
class Response
|
138
|
+
|
139
|
+
# XML input is in string format
|
140
|
+
def initialize(xml)
|
141
|
+
@doc = Nokogiri::XML(xml, 'nul', 'UTF-8')
|
142
|
+
@doc.remove_namespaces!
|
143
|
+
# @doc.xpath("//*").each { |elem| elem.name = elem.name.downcase }
|
144
|
+
# @doc.xpath("//@*").each { |att| att.name = att.name.downcase }
|
145
|
+
end
|
146
|
+
|
147
|
+
# Return Nokogiri::XML::Document object.
|
148
|
+
def doc
|
149
|
+
@doc
|
150
|
+
end
|
151
|
+
|
152
|
+
# Return true if request is valid.
|
153
|
+
def is_valid_request?
|
154
|
+
Element.get(@doc, "//IsValid") == "True"
|
155
|
+
end
|
156
|
+
|
157
|
+
# Return true if response has an error.
|
158
|
+
def has_error?
|
159
|
+
!(error.nil? || error.empty?)
|
160
|
+
end
|
161
|
+
|
162
|
+
# Return error message.
|
163
|
+
def error
|
164
|
+
Element.get(@doc, "//Error/Message")
|
165
|
+
end
|
166
|
+
|
167
|
+
# Return error code
|
168
|
+
def error_code
|
169
|
+
Element.get(@doc, "//Error/Code")
|
170
|
+
end
|
171
|
+
|
172
|
+
# Return an array of Amazon::Element item objects.
|
173
|
+
def items
|
174
|
+
@items ||= (@doc/"Item").collect { |item| Element.new(item) }
|
175
|
+
end
|
176
|
+
|
177
|
+
# Return the first item (Amazon::Element)
|
178
|
+
def first_item
|
179
|
+
items.first
|
180
|
+
end
|
181
|
+
|
182
|
+
# Return current page no if :item_page option is when initiating the request.
|
183
|
+
def item_page
|
184
|
+
@item_page ||= Element.get(@doc, "//ItemPage").to_i
|
185
|
+
end
|
186
|
+
|
187
|
+
# Return total results.
|
188
|
+
def total_results
|
189
|
+
@total_results ||= Element.get(@doc, "//TotalResults").to_i
|
190
|
+
end
|
191
|
+
|
192
|
+
# Return total pages.
|
193
|
+
def total_pages
|
194
|
+
@total_pages ||= Element.get(@doc, "//TotalPages").to_i
|
195
|
+
end
|
196
|
+
|
197
|
+
def marshal_dump
|
198
|
+
@doc.to_s
|
199
|
+
end
|
200
|
+
|
201
|
+
def marshal_load(xml)
|
202
|
+
initialize(xml)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
protected
|
207
|
+
def self.log(s)
|
208
|
+
return unless self.debug
|
209
|
+
if defined? RAILS_DEFAULT_LOGGER
|
210
|
+
RAILS_DEFAULT_LOGGER.error(s)
|
211
|
+
elsif defined? LOGGER
|
212
|
+
LOGGER.error(s)
|
213
|
+
else
|
214
|
+
puts s
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
private
|
219
|
+
def self.prepare_url(opts)
|
220
|
+
country = opts.delete(:country)
|
221
|
+
country = (country.nil?) ? 'us' : country
|
222
|
+
request_url = SERVICE_URLS[country.to_sym]
|
223
|
+
raise Amazon::RequestError, "Invalid country '#{country}'" unless request_url
|
224
|
+
|
225
|
+
secret_key = opts.delete(:AWS_secret_key)
|
226
|
+
request_host = URI.parse(request_url).host
|
227
|
+
|
228
|
+
qs = ''
|
229
|
+
|
230
|
+
opts = opts.collect do |a,b|
|
231
|
+
[camelize(a.to_s), b.to_s]
|
232
|
+
end
|
233
|
+
|
234
|
+
opts = opts.sort do |c,d|
|
235
|
+
c[0].to_s <=> d[0].to_s
|
236
|
+
end
|
237
|
+
|
238
|
+
opts.each do |e|
|
239
|
+
log "Adding #{e[0]}=#{e[1]}"
|
240
|
+
next unless e[1]
|
241
|
+
e[1] = e[1].join(',') if e[1].is_a? Array
|
242
|
+
# v = URI.encode(e[1].to_s, Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))
|
243
|
+
v = self.url_encode(e[1].to_s)
|
244
|
+
qs << "&" unless qs.length == 0
|
245
|
+
qs << "#{e[0]}=#{v}"
|
246
|
+
end
|
247
|
+
|
248
|
+
signature = ''
|
249
|
+
unless secret_key.nil?
|
250
|
+
request_to_sign="GET\n#{request_host}\n/onca/xml\n#{qs}"
|
251
|
+
signature = "&Signature=#{sign_request(request_to_sign, secret_key)}"
|
252
|
+
end
|
253
|
+
|
254
|
+
"#{request_url}?#{qs}#{signature}"
|
255
|
+
end
|
256
|
+
|
257
|
+
def self.url_encode(string)
|
258
|
+
string.gsub( /([^a-zA-Z0-9_.~-]+)/ ) do
|
259
|
+
'%' + $1.unpack( 'H2' * $1.bytesize ).join( '%' ).upcase
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
def self.camelize(s)
|
264
|
+
s.to_s.gsub(/\/(.?)/) { "::" + $1.upcase }.gsub(/(^|_)(.)/) { $2.upcase }
|
265
|
+
end
|
266
|
+
|
267
|
+
def self.sign_request(url, key)
|
268
|
+
return nil if key.nil?
|
269
|
+
|
270
|
+
if (OPENSSL_DIGEST_SUPPORT)
|
271
|
+
signature = OpenSSL::HMAC.digest(OPENSSL_DIGEST, key, url)
|
272
|
+
signature = [signature].pack('m').chomp
|
273
|
+
else
|
274
|
+
signature = Base64.encode64( HMAC::SHA256.digest(key, url) ).strip
|
275
|
+
end
|
276
|
+
signature = URI.escape(signature, Regexp.new("[+=]"))
|
277
|
+
return signature
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
# Internal wrapper class to provide convenient method to access Nokogiri element value.
|
282
|
+
class Element
|
283
|
+
class << self
|
284
|
+
# Return the text value of an element.
|
285
|
+
def get(element, path='.')
|
286
|
+
return unless element
|
287
|
+
result = element.at_xpath(path)
|
288
|
+
result = result.inner_html if result
|
289
|
+
result
|
290
|
+
end
|
291
|
+
|
292
|
+
# Return an unescaped text value of an element.
|
293
|
+
def get_unescaped(element, path='.')
|
294
|
+
result = self.get(element, path)
|
295
|
+
CGI::unescapeHTML(result) if result
|
296
|
+
end
|
297
|
+
|
298
|
+
# Return an array of values based on the given path.
|
299
|
+
def get_array(element, path='.')
|
300
|
+
return unless element
|
301
|
+
|
302
|
+
result = element/path
|
303
|
+
if (result.is_a? Nokogiri::XML::NodeSet) || (result.is_a? Array)
|
304
|
+
result.collect { |item| self.get(item) }
|
305
|
+
else
|
306
|
+
[self.get(result)]
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
# Return child element text values of the given path.
|
311
|
+
def get_hash(element, path='.')
|
312
|
+
return unless element
|
313
|
+
|
314
|
+
result = element.at_xpath(path)
|
315
|
+
if result
|
316
|
+
hash = {}
|
317
|
+
result = result.children
|
318
|
+
result.each do |item|
|
319
|
+
hash[item.name] = item.inner_html
|
320
|
+
end
|
321
|
+
hash
|
322
|
+
end
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
# Pass Nokogiri::XML::Element object
|
327
|
+
def initialize(element)
|
328
|
+
@element = element
|
329
|
+
end
|
330
|
+
|
331
|
+
# Returns Nokogiri::XML::Element object
|
332
|
+
def elem
|
333
|
+
@element
|
334
|
+
end
|
335
|
+
|
336
|
+
# Returns a Nokogiri::XML::NodeSet of elements matching the given path. Example: element/"author".
|
337
|
+
def /(path)
|
338
|
+
elements = @element/path
|
339
|
+
return nil if elements.size == 0
|
340
|
+
elements
|
341
|
+
end
|
342
|
+
|
343
|
+
# Return an array of Amazon::Element matching the given path
|
344
|
+
def get_elements(path)
|
345
|
+
elements = self./(path)
|
346
|
+
return unless elements
|
347
|
+
elements = elements.map{|element| Element.new(element)}
|
348
|
+
end
|
349
|
+
|
350
|
+
# Similar with search_and_convert but always return first element if more than one elements found
|
351
|
+
def get_element(path)
|
352
|
+
elements = get_elements(path)
|
353
|
+
elements[0] if elements
|
354
|
+
end
|
355
|
+
|
356
|
+
# Get the text value of the given path, leave empty to retrieve current element value.
|
357
|
+
def get(path='.')
|
358
|
+
Element.get(@element, path)
|
359
|
+
end
|
360
|
+
|
361
|
+
# Get the unescaped HTML text of the given path.
|
362
|
+
def get_unescaped(path='.')
|
363
|
+
Element.get_unescaped(@element, path)
|
364
|
+
end
|
365
|
+
|
366
|
+
# Get the array values of the given path.
|
367
|
+
def get_array(path='.')
|
368
|
+
Element.get_array(@element, path)
|
369
|
+
end
|
370
|
+
|
371
|
+
# Get the children element text values in hash format with the element names as the hash keys.
|
372
|
+
def get_hash(path='.')
|
373
|
+
Element.get_hash(@element, path)
|
374
|
+
end
|
375
|
+
|
376
|
+
def attributes
|
377
|
+
return unless self.elem
|
378
|
+
self.elem.attributes
|
379
|
+
end
|
380
|
+
|
381
|
+
def to_s
|
382
|
+
elem.to_s if elem
|
383
|
+
end
|
384
|
+
end
|
385
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# --------------------------------------------------------------------------- #
|
3
|
+
#
|
4
|
+
# amazon/entrylist.rb
|
5
|
+
#
|
6
|
+
# Copyright (c) 2008 - 2012, clown.
|
7
|
+
#
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
9
|
+
# modification, are permitted provided that the following conditions
|
10
|
+
# are met:
|
11
|
+
#
|
12
|
+
# - Redistributions of source code must retain the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer.
|
14
|
+
# - Redistributions in binary form must reproduce the above copyright
|
15
|
+
# notice, this list of conditions and the following disclaimer in the
|
16
|
+
# documentation and/or other materials provided with the distribution.
|
17
|
+
# - No names of its contributors may be used to endorse or promote
|
18
|
+
# products derived from this software without specific prior written
|
19
|
+
# permission.
|
20
|
+
#
|
21
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
22
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
23
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
24
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
25
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
26
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
27
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
28
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
29
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
30
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
31
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
32
|
+
#
|
33
|
+
# --------------------------------------------------------------------------- #
|
34
|
+
module Crown
|
35
|
+
module Amazon
|
36
|
+
# ------------------------------------------------------------------- #
|
37
|
+
#
|
38
|
+
# EntryList
|
39
|
+
#
|
40
|
+
# 指定した URI の a タグ href 属性から Amazon の ASIN と思われる
|
41
|
+
# 情報を抽出し,Amazon の商品情報を取得するクラス.
|
42
|
+
#
|
43
|
+
# ------------------------------------------------------------------- #
|
44
|
+
class EntryList
|
45
|
+
require 'uri'
|
46
|
+
require 'crown/http-wrapper'
|
47
|
+
require 'crown/amazon/ecs'
|
48
|
+
|
49
|
+
# --------------------------------------------------------------- #
|
50
|
+
# accessors
|
51
|
+
# --------------------------------------------------------------- #
|
52
|
+
attr_accessor :interval
|
53
|
+
|
54
|
+
# --------------------------------------------------------------- #
|
55
|
+
# initialize
|
56
|
+
# --------------------------------------------------------------- #
|
57
|
+
def initialize()
|
58
|
+
@interval = 2
|
59
|
+
end
|
60
|
+
|
61
|
+
# --------------------------------------------------------------- #
|
62
|
+
# EntryList.get
|
63
|
+
# --------------------------------------------------------------- #
|
64
|
+
def EntryList.get(uri, options = {}, &block)
|
65
|
+
return Crown::Amazon::EntryList.new.get(uri, options, &block)
|
66
|
+
end
|
67
|
+
|
68
|
+
# --------------------------------------------------------------- #
|
69
|
+
# asin
|
70
|
+
# --------------------------------------------------------------- #
|
71
|
+
def asin(uri, options = {})
|
72
|
+
parser = URI.parse(uri.strip)
|
73
|
+
path = parser.path
|
74
|
+
path += '?' + parser.query if (parser.query != nil)
|
75
|
+
|
76
|
+
proxy_addr = nil
|
77
|
+
proxy_port = nil
|
78
|
+
if (options.class == Hash)
|
79
|
+
proxy_addr = options[:proxy_address] if (options.has_key?(:proxy_address))
|
80
|
+
proxy_port = options[:proxy_port] if (options.has_key?(:proxy_port))
|
81
|
+
end
|
82
|
+
|
83
|
+
result = Array.new
|
84
|
+
Crown::HTTPWrapper.start(parser.host, parser.port, proxy_addr, proxy_port) { |session|
|
85
|
+
response = session.get(path)
|
86
|
+
return [] if (response == nil || response.code.to_i != 200)
|
87
|
+
|
88
|
+
html = Nokogiri::HTML.parse(response.body)
|
89
|
+
html.search('a').each { |node|
|
90
|
+
next if (node['href'] == nil)
|
91
|
+
|
92
|
+
begin
|
93
|
+
parser = URI.parse(node['href'].strip)
|
94
|
+
rescue URI::InvalidURIError
|
95
|
+
parser = URI.parse(URI.encode(node['href'].strip))
|
96
|
+
end
|
97
|
+
next if (parser == nil || parser.host == nil || parser.path == nil)
|
98
|
+
|
99
|
+
if (parser.host.match(/^(?:www\.)?amazon\.(?:com|ca|co\.uk|de|co\.jp|jp|fr|cn)$/) != nil)
|
100
|
+
asin = guess_asin(parser.path, parser.query)
|
101
|
+
if (asin != nil && !result.include?(asin))
|
102
|
+
yield asin if (block_given?)
|
103
|
+
result.push(asin)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
}
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
# --------------------------------------------------------------- #
|
111
|
+
# get
|
112
|
+
# --------------------------------------------------------------- #
|
113
|
+
def get(uri, options = {})
|
114
|
+
result = Array.new
|
115
|
+
asin(uri, options) { |asin|
|
116
|
+
entry = ::Amazon::Ecs.item_lookup(asin, options)
|
117
|
+
if (entry != nil && entry.items.length > 0)
|
118
|
+
yield entry.first_item if (block_given?)
|
119
|
+
result.push(entry)
|
120
|
+
end
|
121
|
+
sleep(@interval)
|
122
|
+
}
|
123
|
+
|
124
|
+
return result
|
125
|
+
end
|
126
|
+
|
127
|
+
private
|
128
|
+
# --------------------------------------------------------------- #
|
129
|
+
#
|
130
|
+
# get_asin
|
131
|
+
#
|
132
|
+
# パスおよびクエリーから ASIN を抽出する.推測方法は,
|
133
|
+
# /[B0123489][A-Z0-9]{9}/ にマッチする文字列を探すと言う方法を
|
134
|
+
# 採用している.
|
135
|
+
#
|
136
|
+
# --------------------------------------------------------------- #
|
137
|
+
def guess_asin(path, query)
|
138
|
+
if (path != nil)
|
139
|
+
path.scan(/[B0123489][A-Z0-9]{9}/) { |asin|
|
140
|
+
return asin if (asin[0].chr == 'B' || check_digit(asin))
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
if (query != nil)
|
145
|
+
asin = query.match(/[B0123489][A-Z0-9]{9}/)
|
146
|
+
return asin[0] if (asin != nil)
|
147
|
+
end
|
148
|
+
|
149
|
+
return nil
|
150
|
+
end
|
151
|
+
|
152
|
+
# --------------------------------------------------------------- #
|
153
|
+
#
|
154
|
+
# check_digit
|
155
|
+
#
|
156
|
+
# ISBN-10 のチェックディジット計算して,有効な ISBN-10 の値か
|
157
|
+
# どうか判定する.
|
158
|
+
#
|
159
|
+
# --------------------------------------------------------------- #
|
160
|
+
def check_digit(asin)
|
161
|
+
sum = 0
|
162
|
+
(0..8).each { |i|
|
163
|
+
sum += (10 - i) * asin[i].chr.to_i
|
164
|
+
}
|
165
|
+
check = 11 - (sum % 11)
|
166
|
+
check = (check < 10) ? check.to_s : ((check == 10) ? 'X' : '0')
|
167
|
+
return check == asin[9].chr
|
168
|
+
end
|
169
|
+
end # EntryList
|
170
|
+
end # Amazon
|
171
|
+
end # Crown
|