crown 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +4 -0
- data/README.rdoc +14 -11
- data/VERSION +1 -1
- data/crown.gemspec +15 -5
- data/example/entrylist.rb +69 -0
- data/example/fbcount.rb +1 -1
- data/example/hbentry.rb +1 -1
- data/example/rtcount.rb +1 -0
- data/example/twcount.rb +1 -1
- data/example/{annual.rb → urilist.rb} +21 -26
- data/lib/crown.rb +1 -1
- data/{example/hbtrace.rb → lib/crown/amazon.rb} +8 -23
- data/lib/crown/amazon/crawler.rb +159 -0
- data/lib/crown/amazon/ecs.rb +385 -0
- data/lib/crown/amazon/entrylist.rb +171 -0
- data/lib/crown/backtype.rb +2 -2
- data/lib/crown/buzzurl.rb +2 -2
- data/lib/crown/cgm.rb +8 -0
- data/lib/crown/cgm/countable.rb +1 -1
- data/lib/crown/cgm/summarizable.rb +1 -1
- data/lib/crown/delicious.rb +2 -2
- data/lib/crown/facebook.rb +4 -4
- data/lib/crown/facebook/entry.rb +5 -3
- data/lib/crown/google.rb +38 -0
- data/lib/crown/google/plusone.rb +65 -0
- data/lib/crown/google/plusone/counter.rb +102 -0
- data/lib/crown/hatena/bookmark.rb +7 -7
- data/lib/crown/hatena/bookmark/entry.rb +70 -68
- data/lib/crown/hatena/bookmark/entrylist.rb +98 -0
- data/lib/crown/hatena/bookmark/urilist.rb +349 -0
- data/lib/crown/http-wrapper.rb +0 -1
- data/lib/crown/linkedin.rb +60 -0
- data/lib/crown/linkedin/counter.rb +81 -0
- data/lib/crown/livedoor/clip.rb +2 -2
- data/lib/crown/livedoor/clip/counter.rb +1 -1
- data/lib/crown/livedoor/reader.rb +2 -2
- data/lib/crown/topsy.rb +2 -3
- data/lib/crown/tweetmeme.rb +2 -2
- data/lib/crown/twitter.rb +1 -1
- data/lib/crown/twitter/uri.rb +2 -2
- data/lib/crown/twitter/user.rb +4 -4
- data/lib/crown/twitter/user/entry.rb +26 -6
- data/lib/crown/yahoo/bookmark.rb +3 -7
- data/test/crown-test.rb +34 -12
- metadata +17 -7
- data/lib/crown/hatena/bookmark/linktrace.rb +0 -135
@@ -0,0 +1,385 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2010 Herryanto Siatono
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
18
|
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
19
|
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
20
|
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
21
|
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'net/http'
|
25
|
+
require 'nokogiri'
|
26
|
+
require 'cgi'
|
27
|
+
require 'hmac-sha2'
|
28
|
+
require 'base64'
|
29
|
+
require 'openssl'
|
30
|
+
|
31
|
+
module Amazon
|
32
|
+
class RequestError < StandardError; end
|
33
|
+
|
34
|
+
class Ecs
|
35
|
+
SERVICE_URLS = {
|
36
|
+
:us => 'http://ecs.amazonaws.com/onca/xml',
|
37
|
+
:uk => 'http://ecs.amazonaws.co.uk/onca/xml',
|
38
|
+
:ca => 'http://ecs.amazonaws.ca/onca/xml',
|
39
|
+
:de => 'http://ecs.amazonaws.de/onca/xml',
|
40
|
+
:jp => 'http://ecs.amazonaws.jp/onca/xml',
|
41
|
+
:fr => 'http://ecs.amazonaws.fr/onca/xml',
|
42
|
+
:it => 'http://webservices.amazon.it/onca/xml',
|
43
|
+
:cn => 'http://webservices.amazon.cn/onca/xml'
|
44
|
+
}
|
45
|
+
|
46
|
+
OPENSSL_DIGEST_SUPPORT = OpenSSL::Digest.constants.include?( 'SHA256' ) ||
|
47
|
+
OpenSSL::Digest.constants.include?( :SHA256 )
|
48
|
+
|
49
|
+
OPENSSL_DIGEST = OpenSSL::Digest::Digest.new( 'sha256' ) if OPENSSL_DIGEST_SUPPORT
|
50
|
+
|
51
|
+
@@options = {
|
52
|
+
:version => "2011-08-01",
|
53
|
+
:service => "AWSECommerceService"
|
54
|
+
}
|
55
|
+
|
56
|
+
@@debug = false
|
57
|
+
|
58
|
+
# Default search options
|
59
|
+
def self.options
|
60
|
+
@@options
|
61
|
+
end
|
62
|
+
|
63
|
+
# Set default search options
|
64
|
+
def self.options=(opts)
|
65
|
+
@@options = opts
|
66
|
+
end
|
67
|
+
|
68
|
+
# Get debug flag.
|
69
|
+
def self.debug
|
70
|
+
@@debug
|
71
|
+
end
|
72
|
+
|
73
|
+
# Set debug flag to true or false.
|
74
|
+
def self.debug=(dbg)
|
75
|
+
@@debug = dbg
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.configure(&proc)
|
79
|
+
raise ArgumentError, "Block is required." unless block_given?
|
80
|
+
yield @@options
|
81
|
+
end
|
82
|
+
|
83
|
+
# Search amazon items with search terms. Default search index option is 'Books'.
|
84
|
+
# For other search type other than keywords, please specify :type => [search type param name].
|
85
|
+
def self.item_search(terms, opts = {})
|
86
|
+
opts[:operation] = 'ItemSearch'
|
87
|
+
opts[:search_index] = opts[:search_index] || 'Books'
|
88
|
+
|
89
|
+
type = opts.delete(:type)
|
90
|
+
if type
|
91
|
+
opts[type.to_sym] = terms
|
92
|
+
else
|
93
|
+
opts[:keywords] = terms
|
94
|
+
end
|
95
|
+
|
96
|
+
self.send_request(opts)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Search an item by ASIN no.
|
100
|
+
def self.item_lookup(item_id, opts = {})
|
101
|
+
opts[:operation] = 'ItemLookup'
|
102
|
+
opts[:item_id] = item_id
|
103
|
+
|
104
|
+
self.send_request(opts)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Search a browse node by BrowseNodeId
|
108
|
+
def self.browse_node_lookup(browse_node_id, opts = {})
|
109
|
+
opts[:operation] = 'BrowseNodeLookup'
|
110
|
+
opts[:browse_node_id] = browse_node_id
|
111
|
+
|
112
|
+
self.send_request(opts)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Generic send request to ECS REST service. You have to specify the :operation parameter.
|
116
|
+
def self.send_request(opts)
|
117
|
+
opts = self.options.merge(opts) if self.options
|
118
|
+
|
119
|
+
# Include other required options
|
120
|
+
opts[:timestamp] = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%SZ")
|
121
|
+
|
122
|
+
request_url = prepare_url(opts)
|
123
|
+
log "Request URL: #{request_url}"
|
124
|
+
|
125
|
+
res = Net::HTTP.get_response(URI::parse(request_url))
|
126
|
+
unless res.kind_of? Net::HTTPSuccess
|
127
|
+
raise Amazon::RequestError, "HTTP Response: #{res.code} #{res.message}"
|
128
|
+
end
|
129
|
+
Response.new(res.body)
|
130
|
+
end
|
131
|
+
|
132
|
+
def self.validate_request(opts)
|
133
|
+
raise Amazon::RequestError, "" if opts[:associate_tag]
|
134
|
+
end
|
135
|
+
|
136
|
+
# Response object returned after a REST call to Amazon service.
|
137
|
+
class Response
|
138
|
+
|
139
|
+
# XML input is in string format
|
140
|
+
def initialize(xml)
|
141
|
+
@doc = Nokogiri::XML(xml, 'nul', 'UTF-8')
|
142
|
+
@doc.remove_namespaces!
|
143
|
+
# @doc.xpath("//*").each { |elem| elem.name = elem.name.downcase }
|
144
|
+
# @doc.xpath("//@*").each { |att| att.name = att.name.downcase }
|
145
|
+
end
|
146
|
+
|
147
|
+
# Return Nokogiri::XML::Document object.
|
148
|
+
def doc
|
149
|
+
@doc
|
150
|
+
end
|
151
|
+
|
152
|
+
# Return true if request is valid.
|
153
|
+
def is_valid_request?
|
154
|
+
Element.get(@doc, "//IsValid") == "True"
|
155
|
+
end
|
156
|
+
|
157
|
+
# Return true if response has an error.
|
158
|
+
def has_error?
|
159
|
+
!(error.nil? || error.empty?)
|
160
|
+
end
|
161
|
+
|
162
|
+
# Return error message.
|
163
|
+
def error
|
164
|
+
Element.get(@doc, "//Error/Message")
|
165
|
+
end
|
166
|
+
|
167
|
+
# Return error code
|
168
|
+
def error_code
|
169
|
+
Element.get(@doc, "//Error/Code")
|
170
|
+
end
|
171
|
+
|
172
|
+
# Return an array of Amazon::Element item objects.
|
173
|
+
def items
|
174
|
+
@items ||= (@doc/"Item").collect { |item| Element.new(item) }
|
175
|
+
end
|
176
|
+
|
177
|
+
# Return the first item (Amazon::Element)
|
178
|
+
def first_item
|
179
|
+
items.first
|
180
|
+
end
|
181
|
+
|
182
|
+
# Return current page no if :item_page option is when initiating the request.
|
183
|
+
def item_page
|
184
|
+
@item_page ||= Element.get(@doc, "//ItemPage").to_i
|
185
|
+
end
|
186
|
+
|
187
|
+
# Return total results.
|
188
|
+
def total_results
|
189
|
+
@total_results ||= Element.get(@doc, "//TotalResults").to_i
|
190
|
+
end
|
191
|
+
|
192
|
+
# Return total pages.
|
193
|
+
def total_pages
|
194
|
+
@total_pages ||= Element.get(@doc, "//TotalPages").to_i
|
195
|
+
end
|
196
|
+
|
197
|
+
def marshal_dump
|
198
|
+
@doc.to_s
|
199
|
+
end
|
200
|
+
|
201
|
+
def marshal_load(xml)
|
202
|
+
initialize(xml)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
protected
|
207
|
+
def self.log(s)
|
208
|
+
return unless self.debug
|
209
|
+
if defined? RAILS_DEFAULT_LOGGER
|
210
|
+
RAILS_DEFAULT_LOGGER.error(s)
|
211
|
+
elsif defined? LOGGER
|
212
|
+
LOGGER.error(s)
|
213
|
+
else
|
214
|
+
puts s
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
private
|
219
|
+
def self.prepare_url(opts)
|
220
|
+
country = opts.delete(:country)
|
221
|
+
country = (country.nil?) ? 'us' : country
|
222
|
+
request_url = SERVICE_URLS[country.to_sym]
|
223
|
+
raise Amazon::RequestError, "Invalid country '#{country}'" unless request_url
|
224
|
+
|
225
|
+
secret_key = opts.delete(:AWS_secret_key)
|
226
|
+
request_host = URI.parse(request_url).host
|
227
|
+
|
228
|
+
qs = ''
|
229
|
+
|
230
|
+
opts = opts.collect do |a,b|
|
231
|
+
[camelize(a.to_s), b.to_s]
|
232
|
+
end
|
233
|
+
|
234
|
+
opts = opts.sort do |c,d|
|
235
|
+
c[0].to_s <=> d[0].to_s
|
236
|
+
end
|
237
|
+
|
238
|
+
opts.each do |e|
|
239
|
+
log "Adding #{e[0]}=#{e[1]}"
|
240
|
+
next unless e[1]
|
241
|
+
e[1] = e[1].join(',') if e[1].is_a? Array
|
242
|
+
# v = URI.encode(e[1].to_s, Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))
|
243
|
+
v = self.url_encode(e[1].to_s)
|
244
|
+
qs << "&" unless qs.length == 0
|
245
|
+
qs << "#{e[0]}=#{v}"
|
246
|
+
end
|
247
|
+
|
248
|
+
signature = ''
|
249
|
+
unless secret_key.nil?
|
250
|
+
request_to_sign="GET\n#{request_host}\n/onca/xml\n#{qs}"
|
251
|
+
signature = "&Signature=#{sign_request(request_to_sign, secret_key)}"
|
252
|
+
end
|
253
|
+
|
254
|
+
"#{request_url}?#{qs}#{signature}"
|
255
|
+
end
|
256
|
+
|
257
|
+
def self.url_encode(string)
|
258
|
+
string.gsub( /([^a-zA-Z0-9_.~-]+)/ ) do
|
259
|
+
'%' + $1.unpack( 'H2' * $1.bytesize ).join( '%' ).upcase
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
def self.camelize(s)
|
264
|
+
s.to_s.gsub(/\/(.?)/) { "::" + $1.upcase }.gsub(/(^|_)(.)/) { $2.upcase }
|
265
|
+
end
|
266
|
+
|
267
|
+
def self.sign_request(url, key)
|
268
|
+
return nil if key.nil?
|
269
|
+
|
270
|
+
if (OPENSSL_DIGEST_SUPPORT)
|
271
|
+
signature = OpenSSL::HMAC.digest(OPENSSL_DIGEST, key, url)
|
272
|
+
signature = [signature].pack('m').chomp
|
273
|
+
else
|
274
|
+
signature = Base64.encode64( HMAC::SHA256.digest(key, url) ).strip
|
275
|
+
end
|
276
|
+
signature = URI.escape(signature, Regexp.new("[+=]"))
|
277
|
+
return signature
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
# Internal wrapper class to provide convenient method to access Nokogiri element value.
|
282
|
+
class Element
|
283
|
+
class << self
|
284
|
+
# Return the text value of an element.
|
285
|
+
def get(element, path='.')
|
286
|
+
return unless element
|
287
|
+
result = element.at_xpath(path)
|
288
|
+
result = result.inner_html if result
|
289
|
+
result
|
290
|
+
end
|
291
|
+
|
292
|
+
# Return an unescaped text value of an element.
|
293
|
+
def get_unescaped(element, path='.')
|
294
|
+
result = self.get(element, path)
|
295
|
+
CGI::unescapeHTML(result) if result
|
296
|
+
end
|
297
|
+
|
298
|
+
# Return an array of values based on the given path.
|
299
|
+
def get_array(element, path='.')
|
300
|
+
return unless element
|
301
|
+
|
302
|
+
result = element/path
|
303
|
+
if (result.is_a? Nokogiri::XML::NodeSet) || (result.is_a? Array)
|
304
|
+
result.collect { |item| self.get(item) }
|
305
|
+
else
|
306
|
+
[self.get(result)]
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
# Return child element text values of the given path.
|
311
|
+
def get_hash(element, path='.')
|
312
|
+
return unless element
|
313
|
+
|
314
|
+
result = element.at_xpath(path)
|
315
|
+
if result
|
316
|
+
hash = {}
|
317
|
+
result = result.children
|
318
|
+
result.each do |item|
|
319
|
+
hash[item.name] = item.inner_html
|
320
|
+
end
|
321
|
+
hash
|
322
|
+
end
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
# Pass Nokogiri::XML::Element object
|
327
|
+
def initialize(element)
|
328
|
+
@element = element
|
329
|
+
end
|
330
|
+
|
331
|
+
# Returns Nokogiri::XML::Element object
|
332
|
+
def elem
|
333
|
+
@element
|
334
|
+
end
|
335
|
+
|
336
|
+
# Returns a Nokogiri::XML::NodeSet of elements matching the given path. Example: element/"author".
|
337
|
+
def /(path)
|
338
|
+
elements = @element/path
|
339
|
+
return nil if elements.size == 0
|
340
|
+
elements
|
341
|
+
end
|
342
|
+
|
343
|
+
# Return an array of Amazon::Element matching the given path
|
344
|
+
def get_elements(path)
|
345
|
+
elements = self./(path)
|
346
|
+
return unless elements
|
347
|
+
elements = elements.map{|element| Element.new(element)}
|
348
|
+
end
|
349
|
+
|
350
|
+
# Similar with search_and_convert but always return first element if more than one elements found
|
351
|
+
def get_element(path)
|
352
|
+
elements = get_elements(path)
|
353
|
+
elements[0] if elements
|
354
|
+
end
|
355
|
+
|
356
|
+
# Get the text value of the given path, leave empty to retrieve current element value.
|
357
|
+
def get(path='.')
|
358
|
+
Element.get(@element, path)
|
359
|
+
end
|
360
|
+
|
361
|
+
# Get the unescaped HTML text of the given path.
|
362
|
+
def get_unescaped(path='.')
|
363
|
+
Element.get_unescaped(@element, path)
|
364
|
+
end
|
365
|
+
|
366
|
+
# Get the array values of the given path.
|
367
|
+
def get_array(path='.')
|
368
|
+
Element.get_array(@element, path)
|
369
|
+
end
|
370
|
+
|
371
|
+
# Get the children element text values in hash format with the element names as the hash keys.
|
372
|
+
def get_hash(path='.')
|
373
|
+
Element.get_hash(@element, path)
|
374
|
+
end
|
375
|
+
|
376
|
+
def attributes
|
377
|
+
return unless self.elem
|
378
|
+
self.elem.attributes
|
379
|
+
end
|
380
|
+
|
381
|
+
def to_s
|
382
|
+
elem.to_s if elem
|
383
|
+
end
|
384
|
+
end
|
385
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# --------------------------------------------------------------------------- #
|
3
|
+
#
|
4
|
+
# amazon/entrylist.rb
|
5
|
+
#
|
6
|
+
# Copyright (c) 2008 - 2012, clown.
|
7
|
+
#
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
9
|
+
# modification, are permitted provided that the following conditions
|
10
|
+
# are met:
|
11
|
+
#
|
12
|
+
# - Redistributions of source code must retain the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer.
|
14
|
+
# - Redistributions in binary form must reproduce the above copyright
|
15
|
+
# notice, this list of conditions and the following disclaimer in the
|
16
|
+
# documentation and/or other materials provided with the distribution.
|
17
|
+
# - No names of its contributors may be used to endorse or promote
|
18
|
+
# products derived from this software without specific prior written
|
19
|
+
# permission.
|
20
|
+
#
|
21
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
22
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
23
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
24
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
25
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
26
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
27
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
28
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
29
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
30
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
31
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
32
|
+
#
|
33
|
+
# --------------------------------------------------------------------------- #
|
34
|
+
module Crown
|
35
|
+
module Amazon
|
36
|
+
# ------------------------------------------------------------------- #
|
37
|
+
#
|
38
|
+
# EntryList
|
39
|
+
#
|
40
|
+
# 指定した URI の a タグ href 属性から Amazon の ASIN と思われる
|
41
|
+
# 情報を抽出し,Amazon の商品情報を取得するクラス.
|
42
|
+
#
|
43
|
+
# ------------------------------------------------------------------- #
|
44
|
+
class EntryList
|
45
|
+
require 'uri'
|
46
|
+
require 'crown/http-wrapper'
|
47
|
+
require 'crown/amazon/ecs'
|
48
|
+
|
49
|
+
# --------------------------------------------------------------- #
|
50
|
+
# accessors
|
51
|
+
# --------------------------------------------------------------- #
|
52
|
+
attr_accessor :interval
|
53
|
+
|
54
|
+
# --------------------------------------------------------------- #
|
55
|
+
# initialize
|
56
|
+
# --------------------------------------------------------------- #
|
57
|
+
def initialize()
|
58
|
+
@interval = 2
|
59
|
+
end
|
60
|
+
|
61
|
+
# --------------------------------------------------------------- #
|
62
|
+
# EntryList.get
|
63
|
+
# --------------------------------------------------------------- #
|
64
|
+
def EntryList.get(uri, options = {}, &block)
|
65
|
+
return Crown::Amazon::EntryList.new.get(uri, options, &block)
|
66
|
+
end
|
67
|
+
|
68
|
+
# --------------------------------------------------------------- #
|
69
|
+
# asin
|
70
|
+
# --------------------------------------------------------------- #
|
71
|
+
def asin(uri, options = {})
|
72
|
+
parser = URI.parse(uri.strip)
|
73
|
+
path = parser.path
|
74
|
+
path += '?' + parser.query if (parser.query != nil)
|
75
|
+
|
76
|
+
proxy_addr = nil
|
77
|
+
proxy_port = nil
|
78
|
+
if (options.class == Hash)
|
79
|
+
proxy_addr = options[:proxy_address] if (options.has_key?(:proxy_address))
|
80
|
+
proxy_port = options[:proxy_port] if (options.has_key?(:proxy_port))
|
81
|
+
end
|
82
|
+
|
83
|
+
result = Array.new
|
84
|
+
Crown::HTTPWrapper.start(parser.host, parser.port, proxy_addr, proxy_port) { |session|
|
85
|
+
response = session.get(path)
|
86
|
+
return [] if (response == nil || response.code.to_i != 200)
|
87
|
+
|
88
|
+
html = Nokogiri::HTML.parse(response.body)
|
89
|
+
html.search('a').each { |node|
|
90
|
+
next if (node['href'] == nil)
|
91
|
+
|
92
|
+
begin
|
93
|
+
parser = URI.parse(node['href'].strip)
|
94
|
+
rescue URI::InvalidURIError
|
95
|
+
parser = URI.parse(URI.encode(node['href'].strip))
|
96
|
+
end
|
97
|
+
next if (parser == nil || parser.host == nil || parser.path == nil)
|
98
|
+
|
99
|
+
if (parser.host.match(/^(?:www\.)?amazon\.(?:com|ca|co\.uk|de|co\.jp|jp|fr|cn)$/) != nil)
|
100
|
+
asin = guess_asin(parser.path, parser.query)
|
101
|
+
if (asin != nil && !result.include?(asin))
|
102
|
+
yield asin if (block_given?)
|
103
|
+
result.push(asin)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
}
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
# --------------------------------------------------------------- #
|
111
|
+
# get
|
112
|
+
# --------------------------------------------------------------- #
|
113
|
+
def get(uri, options = {})
|
114
|
+
result = Array.new
|
115
|
+
asin(uri, options) { |asin|
|
116
|
+
entry = ::Amazon::Ecs.item_lookup(asin, options)
|
117
|
+
if (entry != nil && entry.items.length > 0)
|
118
|
+
yield entry.first_item if (block_given?)
|
119
|
+
result.push(entry)
|
120
|
+
end
|
121
|
+
sleep(@interval)
|
122
|
+
}
|
123
|
+
|
124
|
+
return result
|
125
|
+
end
|
126
|
+
|
127
|
+
private
|
128
|
+
# --------------------------------------------------------------- #
|
129
|
+
#
|
130
|
+
# get_asin
|
131
|
+
#
|
132
|
+
# パスおよびクエリーから ASIN を抽出する.推測方法は,
|
133
|
+
# /[B0123489][A-Z0-9]{9}/ にマッチする文字列を探すと言う方法を
|
134
|
+
# 採用している.
|
135
|
+
#
|
136
|
+
# --------------------------------------------------------------- #
|
137
|
+
def guess_asin(path, query)
|
138
|
+
if (path != nil)
|
139
|
+
path.scan(/[B0123489][A-Z0-9]{9}/) { |asin|
|
140
|
+
return asin if (asin[0].chr == 'B' || check_digit(asin))
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
if (query != nil)
|
145
|
+
asin = query.match(/[B0123489][A-Z0-9]{9}/)
|
146
|
+
return asin[0] if (asin != nil)
|
147
|
+
end
|
148
|
+
|
149
|
+
return nil
|
150
|
+
end
|
151
|
+
|
152
|
+
# --------------------------------------------------------------- #
|
153
|
+
#
|
154
|
+
# check_digit
|
155
|
+
#
|
156
|
+
# ISBN-10 のチェックディジット計算して,有効な ISBN-10 の値か
|
157
|
+
# どうか判定する.
|
158
|
+
#
|
159
|
+
# --------------------------------------------------------------- #
|
160
|
+
def check_digit(asin)
|
161
|
+
sum = 0
|
162
|
+
(0..8).each { |i|
|
163
|
+
sum += (10 - i) * asin[i].chr.to_i
|
164
|
+
}
|
165
|
+
check = 11 - (sum % 11)
|
166
|
+
check = (check < 10) ? check.to_s : ((check == 10) ? 'X' : '0')
|
167
|
+
return check == asin[9].chr
|
168
|
+
end
|
169
|
+
end # EntryList
|
170
|
+
end # Amazon
|
171
|
+
end # Crown
|