crown 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/ChangeLog +4 -0
  2. data/README.rdoc +14 -11
  3. data/VERSION +1 -1
  4. data/crown.gemspec +15 -5
  5. data/example/entrylist.rb +69 -0
  6. data/example/fbcount.rb +1 -1
  7. data/example/hbentry.rb +1 -1
  8. data/example/rtcount.rb +1 -0
  9. data/example/twcount.rb +1 -1
  10. data/example/{annual.rb → urilist.rb} +21 -26
  11. data/lib/crown.rb +1 -1
  12. data/{example/hbtrace.rb → lib/crown/amazon.rb} +8 -23
  13. data/lib/crown/amazon/crawler.rb +159 -0
  14. data/lib/crown/amazon/ecs.rb +385 -0
  15. data/lib/crown/amazon/entrylist.rb +171 -0
  16. data/lib/crown/backtype.rb +2 -2
  17. data/lib/crown/buzzurl.rb +2 -2
  18. data/lib/crown/cgm.rb +8 -0
  19. data/lib/crown/cgm/countable.rb +1 -1
  20. data/lib/crown/cgm/summarizable.rb +1 -1
  21. data/lib/crown/delicious.rb +2 -2
  22. data/lib/crown/facebook.rb +4 -4
  23. data/lib/crown/facebook/entry.rb +5 -3
  24. data/lib/crown/google.rb +38 -0
  25. data/lib/crown/google/plusone.rb +65 -0
  26. data/lib/crown/google/plusone/counter.rb +102 -0
  27. data/lib/crown/hatena/bookmark.rb +7 -7
  28. data/lib/crown/hatena/bookmark/entry.rb +70 -68
  29. data/lib/crown/hatena/bookmark/entrylist.rb +98 -0
  30. data/lib/crown/hatena/bookmark/urilist.rb +349 -0
  31. data/lib/crown/http-wrapper.rb +0 -1
  32. data/lib/crown/linkedin.rb +60 -0
  33. data/lib/crown/linkedin/counter.rb +81 -0
  34. data/lib/crown/livedoor/clip.rb +2 -2
  35. data/lib/crown/livedoor/clip/counter.rb +1 -1
  36. data/lib/crown/livedoor/reader.rb +2 -2
  37. data/lib/crown/topsy.rb +2 -3
  38. data/lib/crown/tweetmeme.rb +2 -2
  39. data/lib/crown/twitter.rb +1 -1
  40. data/lib/crown/twitter/uri.rb +2 -2
  41. data/lib/crown/twitter/user.rb +4 -4
  42. data/lib/crown/twitter/user/entry.rb +26 -6
  43. data/lib/crown/yahoo/bookmark.rb +3 -7
  44. data/test/crown-test.rb +34 -12
  45. metadata +17 -7
  46. data/lib/crown/hatena/bookmark/linktrace.rb +0 -135
@@ -0,0 +1,98 @@
1
+ # -*- coding: utf-8 -*-
2
+ # --------------------------------------------------------------------------- #
3
+ #
4
+ # hatena/bookmark/entrylist.rb
5
+ #
6
+ # Copyright (c) 2008 - 2012, clown.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions
10
+ # are met:
11
+ #
12
+ # - Redistributions of source code must retain the above copyright
13
+ # notice, this list of conditions and the following disclaimer.
14
+ # - Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ # - No names of its contributors may be used to endorse or promote
18
+ # products derived from this software without specific prior written
19
+ # permission.
20
+ #
21
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ #
33
+ # --------------------------------------------------------------------------- #
34
+ module Crown
35
+ module Hatena
36
+ class Bookmark
37
+ require 'crown/hatena/bookmark/urilist'
38
+ require 'crown/hatena/bookmark/entry'
39
+
40
+ # --------------------------------------------------------------- #
41
+ #
42
+ # EntryList
43
+ #
44
+ # はてなブックマークの新着エントリー,または人気エントリー一覧
45
+ # の各エントリー情報を取得するクラス.
46
+ #
47
+ # --------------------------------------------------------------- #
48
+ class EntryList < URIList
49
+ attr_accessor :interval
50
+
51
+ # ----------------------------------------------------------- #
52
+ # initialize
53
+ # ----------------------------------------------------------- #
54
+ def initialize(options = {})
55
+ super
56
+ @interval = 30
57
+ end
58
+
59
+ # ----------------------------------------------------------- #
60
+ # EntryList.start
61
+ # ----------------------------------------------------------- #
62
+ def EntryList.start(options = {}, &block)
63
+ Crown::Hatena::Bookmark::EntryList.new(options).start(&block)
64
+ end
65
+
66
+ # ----------------------------------------------------------- #
67
+ # get
68
+ # ----------------------------------------------------------- #
69
+ def get()
70
+ return get_summary(super()) if (!block_given?)
71
+ while (more?)
72
+ super().each { |entry|
73
+ result = Crown::Hatena::Bookmark::Entry.new.summary(entry.uri,
74
+ { :proxy_address => proxy_address(), :proxy_port => proxy_port() })
75
+ yield result if (result != nil)
76
+ sleep(@interval)
77
+ }
78
+ end
79
+ end
80
+
81
+ private
82
+ # ----------------------------------------------------------- #
83
+ # get_summary
84
+ # ----------------------------------------------------------- #
85
+ def get_summary(entries)
86
+ dest = Array.new
87
+ entries.each { |entry|
88
+ result = Crown::Hatena::Bookmark::Entry.new.summary(entry.uri,
89
+ { :proxy_address => proxy_address(), :proxy_port => proxy_port() })
90
+ dest.add(result) if (result != nil)
91
+ sleep(@interval)
92
+ }
93
+ return dest
94
+ end
95
+ end # EntryList
96
+ end # Bookmark
97
+ end # Hatena
98
+ end # Crown
@@ -0,0 +1,349 @@
1
+ # -*- coding: utf-8 -*-
2
+ # --------------------------------------------------------------------------- #
3
+ #
4
+ # hatena/bookmark/urilist.rb
5
+ #
6
+ # Copyright (c) 2008 - 2012, clown.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions
10
+ # are met:
11
+ #
12
+ # - Redistributions of source code must retain the above copyright
13
+ # notice, this list of conditions and the following disclaimer.
14
+ # - Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ # - No names of its contributors may be used to endorse or promote
18
+ # products derived from this software without specific prior written
19
+ # permission.
20
+ #
21
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ #
33
+ # --------------------------------------------------------------------------- #
34
+ module Crown
35
+ module Hatena
36
+ class Bookmark
37
+ # --------------------------------------------------------------- #
38
+ #
39
+ # URIList
40
+ #
41
+ # 指定した Web サイトのはてなブックマーク一覧から順に URL を
42
+ # 取得するクラス.2 ページ目以降の結果は,html のヘッダ情報の
43
+ # next 属性からページの URL を推測して取得する.
44
+ #
45
+ # --------------------------------------------------------------- #
46
+ class URIList
47
+ require 'cgi'
48
+ require 'uri'
49
+ require 'date'
50
+ require 'crown/http-wrapper'
51
+ require 'rubygems'
52
+ require 'nokogiri'
53
+
54
+ # ----------------------------------------------------------- #
55
+ # structures
56
+ # ----------------------------------------------------------- #
57
+ Response = Struct.new(:uri, :title, :date)
58
+
59
+ # ----------------------------------------------------------- #
60
+ # variables
61
+ # ----------------------------------------------------------- #
62
+ @@valid_types = [ :hotentry, :entrylist, :video, :asin ]
63
+ @@valid_categories = [ :general, :social, :economics, :life, :entertainment, :knowledge, :it, :game, :fun, :news ]
64
+ @@valid_options = [ :uri, :sort, :threshold, :offset, :src ]
65
+ @@valid_sorts = [ :eid, :hot, :count ]
66
+ @@valid_sources = [ :youtube, :nicovideo, :ugomemo ]
67
+
68
+ # ----------------------------------------------------------- #
69
+ # initialize
70
+ # ----------------------------------------------------------- #
71
+ def initialize(options = {})
72
+ proxy_addr = nil
73
+ proxy_port = nil
74
+ if (options.class == Hash)
75
+ proxy_addr = options[:proxy_address] if (options.has_key?(:proxy_address))
76
+ proxy_port = options[:proxy_port] if (options.has_key?(:proxy_port))
77
+ end
78
+ @session = Crown::HTTPWrapper.new('b.hatena.ne.jp', 80, proxy_addr, proxy_port)
79
+ @basename = basename(options)
80
+ @path = String.new(@basename)
81
+ end
82
+
83
+ # ----------------------------------------------------------- #
84
+ # URIList.start
85
+ # ----------------------------------------------------------- #
86
+ def URIList.start(options = {}, &block)
87
+ return URIList.new(options).start(&block)
88
+ end
89
+
90
+ # ----------------------------------------------------------- #
91
+ # start
92
+ # ----------------------------------------------------------- #
93
+ def start()
94
+ @session.start if (!@session.active?)
95
+ if (block_given?)
96
+ yield self
97
+ @session.finish if (@session.active?)
98
+ end
99
+ return self
100
+ end
101
+
102
+ # ----------------------------------------------------------- #
103
+ # finish
104
+ # ----------------------------------------------------------- #
105
+ def finish()
106
+ @session.finish if (@session.active?)
107
+ end
108
+
109
+ # ----------------------------------------------------------- #
110
+ # reset
111
+ # ----------------------------------------------------------- #
112
+ def reset()
113
+ @path = String.new(@basename)
114
+ @session.finish if (@session.active?)
115
+ return self
116
+ end
117
+
118
+ # ----------------------------------------------------------- #
119
+ #
120
+ # more?
121
+ #
122
+ # まだ取得できる URL が存在するかどうかを判定する.more?()
123
+ # は html の next 属性から次のページが推測できているか
124
+ # どうかで判定している.
125
+ #
126
+ # ----------------------------------------------------------- #
127
+ def more?()
128
+ return @path != nil
129
+ end
130
+
131
+ # ----------------------------------------------------------- #
132
+ # get
133
+ # ----------------------------------------------------------- #
134
+ def get()
135
+ return get_block() if (!block_given?)
136
+ while (more?)
137
+ get_block().each { |uri|
138
+ yield uri
139
+ }
140
+ end
141
+ end
142
+
143
+ # ----------------------------------------------------------- #
144
+ # proxy_address
145
+ # ----------------------------------------------------------- #
146
+ def proxy_address
147
+ return @session.proxy_address
148
+ end
149
+
150
+ # ----------------------------------------------------------- #
151
+ # proxy_port
152
+ # ----------------------------------------------------------- #
153
+ def proxy_port
154
+ return @session.proxy_port
155
+ end
156
+
157
+ private
158
+ # ----------------------------------------------------------- #
159
+ #
160
+ # get_block
161
+ #
162
+ # はてなブックマークの新着エントリーに掲載された URL の
163
+ # 各種情報(URL, タイトル,日付)を取得する.
164
+ #
165
+ # ----------------------------------------------------------- #
166
+ def get_block()
167
+ result = Array.new
168
+
169
+ return result if (@path == nil)
170
+
171
+ response = @session.get(@path)
172
+ return result if (response == nil || response.code.to_i != 200)
173
+ @path = path(@path, response.body)
174
+
175
+ html = Nokogiri::HTML(response.body)
176
+
177
+ # Amazon 商品のブックマーク情報のみ構成が異なるので別メソッドで処理する.
178
+ contents = html.xpath('//ul[@class="hotentry hotasin show_detail"]/li')
179
+ return get_asin_block(contents) if (!contents.empty?)
180
+
181
+ contents = html.xpath('//ul[@class="hotentry"]/li')
182
+ contents = html.xpath('//ul[@class="videolist"]/li') if (contents.empty?)
183
+
184
+ contents.each { |node|
185
+ entry = Response.new
186
+
187
+ node.search('h3/a').each { |x|
188
+ entry.uri = x['href']
189
+ entry.title = x['title']
190
+ }
191
+
192
+ node.css('ul.entry-info li.timestamp').each { |x|
193
+ entry.date = Date.strptime(x.content, "%Y/%m/%d")
194
+ }
195
+
196
+ result.push(entry)
197
+ }
198
+
199
+ # 無限ループする場合があるので,現在のページから 1件も結果が
200
+ # 取得できないときはこれ以上の検索を止める.
201
+ @path = nil if (result.empty?)
202
+
203
+ return result
204
+ end
205
+
206
+ # ----------------------------------------------------------- #
207
+ #
208
+ # get_asin_block
209
+ #
210
+ # はてなブックマークの新着エントリーに掲載された Amazon
211
+ # 商品の各種情報(URL, タイトル,日付)を取得する.
212
+ #
213
+ # ----------------------------------------------------------- #
214
+ def get_asin_block(contents)
215
+ result = Array.new
216
+ contents.each { |node|
217
+ entry = Response.new
218
+
219
+ node.xpath('ul[@class="asin_detail"]/li/a').each { |link|
220
+ uri = URI.parse(link['href'])
221
+ if (uri.host.match(/^(?:www\.)?amazon\.(?:com|ca|co\.uk|de|co\.jp|jp|fr|cn)$/) != nil)
222
+ entry.uri = uri.host + '/gp/product/' + asin(uri)
223
+ entry.title = link.content
224
+ break
225
+ end
226
+ }
227
+ next if (entry.uri == nil)
228
+
229
+ node.css('li.asin-info-sub').each { |info|
230
+ info.content.scan(/発売日: ([0-9]+\/[0-9]+\/[0-9]+)/) { |date|
231
+ entry.date = Date.strptime(date[0], "%Y/%m/%d")
232
+ }
233
+ }
234
+
235
+ result.push(entry)
236
+ }
237
+ return result
238
+ end
239
+
240
+ # ----------------------------------------------------------- #
241
+ #
242
+ # asin
243
+ #
244
+ # パスおよびクエリーから ASIN を抽出する.推測方法は,
245
+ # /[B0123489][A-Z0-9]{9}/ にマッチする文字列を探すと言う
246
+ # 方法を採用している.
247
+ #
248
+ # ----------------------------------------------------------- #
249
+ def asin(uri)
250
+ if (uri.path != nil)
251
+ uri.path.scan(/[B0123489][A-Z0-9]{9}/) { |asin|
252
+ return asin if (asin[0].chr == 'B' || check_digit(asin))
253
+ }
254
+ end
255
+
256
+ if (uri.query != nil)
257
+ asin = uri.query.match(/[B0123489][A-Z0-9]{9}/)
258
+ return asin[0] if (asin != nil)
259
+ end
260
+
261
+ return nil
262
+ end
263
+
264
+ # ----------------------------------------------------------- #
265
+ #
266
+ # check_digit
267
+ #
268
+ # ISBN-10 のチェックディジット計算して,有効な ISBN-10 の
269
+ # 値かどうか判定する.
270
+ #
271
+ # ----------------------------------------------------------- #
272
+ def check_digit(asin)
273
+ sum = 0
274
+ (0..8).each { |i|
275
+ sum += (10 - i) * asin[i].chr.to_i
276
+ }
277
+ check = 11 - (sum % 11)
278
+ check = (check < 10) ? check.to_s : ((check == 10) ? 'X' : '0')
279
+ return check == asin[9].chr
280
+ end
281
+
282
+ # ----------------------------------------------------------- #
283
+ # basename
284
+ # ----------------------------------------------------------- #
285
+ def basename(options = {})
286
+ return '/entrylist' if (options.class != Hash)
287
+
288
+ dest = '/'
289
+ delimiter = '?'
290
+ if (options.has_key?(:type) && @@valid_types.include?(options[:type]))
291
+ dest << options[:type].to_s
292
+ if (options[:type] == :asin)
293
+ dest << '?show_detail=1'
294
+ delimiter = '&'
295
+ end
296
+ else
297
+ dest << 'entrylist'
298
+ end
299
+
300
+ if (options.has_key?(:category) && @@valid_categories.include?(options[:category]))
301
+ if (options[:type] == :hotentry && options[:category] == :general)
302
+ dest << '?mode=' << options[:category].to_s
303
+ delimiter = '&'
304
+ else
305
+ dest << '/' << options[:category].to_s
306
+ end
307
+ end
308
+
309
+ options.each { |key, value|
310
+ case
311
+ when (key == :uri)
312
+ dest << delimiter << 'url=' << CGI.escape(value)
313
+ delimiter = '&'
314
+ when (key == :sort && @@valid_sorts.include?(value))
315
+ dest << delimiter << key.to_s << '=' << value.to_s
316
+ delimiter = '&'
317
+ when (key == :threshold)
318
+ dest << delimiter << key.to_s << '=' << value.to_s
319
+ delimiter = '&'
320
+ when (key == :offset)
321
+ delimiter = '&'
322
+ when (key == :src && @@valid_sources.include?(value))
323
+ dest << delimiter << key.to_s << '=' << value.to_s
324
+ delimiter = '&'
325
+ end
326
+ }
327
+
328
+ return dest
329
+ end
330
+
331
+ # ----------------------------------------------------------- #
332
+ #
333
+ # path
334
+ #
335
+ # 取得した html の next 属性から次のページへのパスを探す.
336
+ #
337
+ # ----------------------------------------------------------- #
338
+ def path(prev, body)
339
+ link = body.scan(/<link rel="next" href="(.+?)".*?>/m).to_s
340
+ if (link.empty?) then return nil;
341
+ elsif (link[0] == 47) then return link;
342
+ elsif (link[0] == 63) then return prev.gsub(/\?.*$/, "").concat(link);
343
+ else return @basename + link;
344
+ end
345
+ end
346
+ end # URIList
347
+ end # Bookmark
348
+ end # Hatena
349
+ end # Crown
@@ -97,7 +97,6 @@ module Crown
97
97
  end
98
98
  return response
99
99
  rescue Exception => e
100
- puts(e.message)
101
100
  @http.finish if (@http.active?)
102
101
  if (n < @retry_limit)
103
102
  sleep(@retry_interval)