crown 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/ChangeLog +4 -0
  2. data/README.rdoc +14 -11
  3. data/VERSION +1 -1
  4. data/crown.gemspec +15 -5
  5. data/example/entrylist.rb +69 -0
  6. data/example/fbcount.rb +1 -1
  7. data/example/hbentry.rb +1 -1
  8. data/example/rtcount.rb +1 -0
  9. data/example/twcount.rb +1 -1
  10. data/example/{annual.rb → urilist.rb} +21 -26
  11. data/lib/crown.rb +1 -1
  12. data/{example/hbtrace.rb → lib/crown/amazon.rb} +8 -23
  13. data/lib/crown/amazon/crawler.rb +159 -0
  14. data/lib/crown/amazon/ecs.rb +385 -0
  15. data/lib/crown/amazon/entrylist.rb +171 -0
  16. data/lib/crown/backtype.rb +2 -2
  17. data/lib/crown/buzzurl.rb +2 -2
  18. data/lib/crown/cgm.rb +8 -0
  19. data/lib/crown/cgm/countable.rb +1 -1
  20. data/lib/crown/cgm/summarizable.rb +1 -1
  21. data/lib/crown/delicious.rb +2 -2
  22. data/lib/crown/facebook.rb +4 -4
  23. data/lib/crown/facebook/entry.rb +5 -3
  24. data/lib/crown/google.rb +38 -0
  25. data/lib/crown/google/plusone.rb +65 -0
  26. data/lib/crown/google/plusone/counter.rb +102 -0
  27. data/lib/crown/hatena/bookmark.rb +7 -7
  28. data/lib/crown/hatena/bookmark/entry.rb +70 -68
  29. data/lib/crown/hatena/bookmark/entrylist.rb +98 -0
  30. data/lib/crown/hatena/bookmark/urilist.rb +349 -0
  31. data/lib/crown/http-wrapper.rb +0 -1
  32. data/lib/crown/linkedin.rb +60 -0
  33. data/lib/crown/linkedin/counter.rb +81 -0
  34. data/lib/crown/livedoor/clip.rb +2 -2
  35. data/lib/crown/livedoor/clip/counter.rb +1 -1
  36. data/lib/crown/livedoor/reader.rb +2 -2
  37. data/lib/crown/topsy.rb +2 -3
  38. data/lib/crown/tweetmeme.rb +2 -2
  39. data/lib/crown/twitter.rb +1 -1
  40. data/lib/crown/twitter/uri.rb +2 -2
  41. data/lib/crown/twitter/user.rb +4 -4
  42. data/lib/crown/twitter/user/entry.rb +26 -6
  43. data/lib/crown/yahoo/bookmark.rb +3 -7
  44. data/test/crown-test.rb +34 -12
  45. metadata +17 -7
  46. data/lib/crown/hatena/bookmark/linktrace.rb +0 -135
@@ -0,0 +1,98 @@
1
+ # -*- coding: utf-8 -*-
2
+ # --------------------------------------------------------------------------- #
3
+ #
4
+ # hatena/bookmark/entrylist.rb
5
+ #
6
+ # Copyright (c) 2008 - 2012, clown.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions
10
+ # are met:
11
+ #
12
+ # - Redistributions of source code must retain the above copyright
13
+ # notice, this list of conditions and the following disclaimer.
14
+ # - Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ # - No names of its contributors may be used to endorse or promote
18
+ # products derived from this software without specific prior written
19
+ # permission.
20
+ #
21
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ #
33
+ # --------------------------------------------------------------------------- #
34
+ module Crown
35
+ module Hatena
36
+ class Bookmark
37
+ require 'crown/hatena/bookmark/urilist'
38
+ require 'crown/hatena/bookmark/entry'
39
+
40
+ # --------------------------------------------------------------- #
41
+ #
42
+ # EntryList
43
+ #
44
+ # はてなブックマークの新着エントリー,または人気エントリー一覧
45
+ # の各エントリー情報を取得するクラス.
46
+ #
47
+ # --------------------------------------------------------------- #
48
+ class EntryList < URIList
49
+ attr_accessor :interval
50
+
51
+ # ----------------------------------------------------------- #
52
+ # initialize
53
+ # ----------------------------------------------------------- #
54
+ def initialize(options = {})
55
+ super
56
+ @interval = 30
57
+ end
58
+
59
+ # ----------------------------------------------------------- #
60
+ # EntryList.start
61
+ # ----------------------------------------------------------- #
62
+ def EntryList.start(options = {}, &block)
63
+ Crown::Hatena::Bookmark::EntryList.new(options).start(&block)
64
+ end
65
+
66
+ # ----------------------------------------------------------- #
67
+ # get
68
+ # ----------------------------------------------------------- #
69
+ def get()
70
+ return get_summary(super()) if (!block_given?)
71
+ while (more?)
72
+ super().each { |entry|
73
+ result = Crown::Hatena::Bookmark::Entry.new.summary(entry.uri,
74
+ { :proxy_address => proxy_address(), :proxy_port => proxy_port() })
75
+ yield result if (result != nil)
76
+ sleep(@interval)
77
+ }
78
+ end
79
+ end
80
+
81
+ private
82
+ # ----------------------------------------------------------- #
83
+ # get_summary
84
+ # ----------------------------------------------------------- #
85
+ def get_summary(entries)
86
+ dest = Array.new
87
+ entries.each { |entry|
88
+ result = Crown::Hatena::Bookmark::Entry.new.summary(entry.uri,
89
+ { :proxy_address => proxy_address(), :proxy_port => proxy_port() })
90
+ dest.add(result) if (result != nil)
91
+ sleep(@interval)
92
+ }
93
+ return dest
94
+ end
95
+ end # EntryList
96
+ end # Bookmark
97
+ end # Hatena
98
+ end # Crown
@@ -0,0 +1,349 @@
1
+ # -*- coding: utf-8 -*-
2
+ # --------------------------------------------------------------------------- #
3
+ #
4
+ # hatena/bookmark/urilist.rb
5
+ #
6
+ # Copyright (c) 2008 - 2012, clown.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions
10
+ # are met:
11
+ #
12
+ # - Redistributions of source code must retain the above copyright
13
+ # notice, this list of conditions and the following disclaimer.
14
+ # - Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ # - No names of its contributors may be used to endorse or promote
18
+ # products derived from this software without specific prior written
19
+ # permission.
20
+ #
21
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ #
33
+ # --------------------------------------------------------------------------- #
34
+ module Crown
35
+ module Hatena
36
+ class Bookmark
37
+ # --------------------------------------------------------------- #
38
+ #
39
+ # URIList
40
+ #
41
+ # 指定した Web サイトのはてなブックマーク一覧から順に URL を
42
+ # 取得するクラス.2 ページ目以降の結果は,html のヘッダ情報の
43
+ # next 属性からページの URL を推測して取得する.
44
+ #
45
+ # --------------------------------------------------------------- #
46
+ class URIList
47
+ require 'cgi'
48
+ require 'uri'
49
+ require 'date'
50
+ require 'crown/http-wrapper'
51
+ require 'rubygems'
52
+ require 'nokogiri'
53
+
54
+ # ----------------------------------------------------------- #
55
+ # structures
56
+ # ----------------------------------------------------------- #
57
+ Response = Struct.new(:uri, :title, :date)
58
+
59
+ # ----------------------------------------------------------- #
60
+ # variables
61
+ # ----------------------------------------------------------- #
62
+ @@valid_types = [ :hotentry, :entrylist, :video, :asin ]
63
+ @@valid_categories = [ :general, :social, :economics, :life, :entertainment, :knowledge, :it, :game, :fun, :news ]
64
+ @@valid_options = [ :uri, :sort, :threshold, :offset, :src ]
65
+ @@valid_sorts = [ :eid, :hot, :count ]
66
+ @@valid_sources = [ :youtube, :nicovideo, :ugomemo ]
67
+
68
+ # ----------------------------------------------------------- #
69
+ # initialize
70
+ # ----------------------------------------------------------- #
71
+ def initialize(options = {})
72
+ proxy_addr = nil
73
+ proxy_port = nil
74
+ if (options.class == Hash)
75
+ proxy_addr = options[:proxy_address] if (options.has_key?(:proxy_address))
76
+ proxy_port = options[:proxy_port] if (options.has_key?(:proxy_port))
77
+ end
78
+ @session = Crown::HTTPWrapper.new('b.hatena.ne.jp', 80, proxy_addr, proxy_port)
79
+ @basename = basename(options)
80
+ @path = String.new(@basename)
81
+ end
82
+
83
+ # ----------------------------------------------------------- #
84
+ # URIList.start
85
+ # ----------------------------------------------------------- #
86
+ def URIList.start(options = {}, &block)
87
+ return URIList.new(options).start(&block)
88
+ end
89
+
90
+ # ----------------------------------------------------------- #
91
+ # start
92
+ # ----------------------------------------------------------- #
93
+ def start()
94
+ @session.start if (!@session.active?)
95
+ if (block_given?)
96
+ yield self
97
+ @session.finish if (@session.active?)
98
+ end
99
+ return self
100
+ end
101
+
102
+ # ----------------------------------------------------------- #
103
+ # finish
104
+ # ----------------------------------------------------------- #
105
+ def finish()
106
+ @session.finish if (@session.active?)
107
+ end
108
+
109
+ # ----------------------------------------------------------- #
110
+ # reset
111
+ # ----------------------------------------------------------- #
112
+ def reset()
113
+ @path = String.new(@basename)
114
+ @session.finish if (@session.active?)
115
+ return self
116
+ end
117
+
118
+ # ----------------------------------------------------------- #
119
+ #
120
+ # more?
121
+ #
122
+ # まだ取得できる URL が存在するかどうかを判定する.more?()
123
+ # は html の next 属性から次のページが推測できているか
124
+ # どうかで判定している.
125
+ #
126
+ # ----------------------------------------------------------- #
127
+ def more?()
128
+ return @path != nil
129
+ end
130
+
131
+ # ----------------------------------------------------------- #
132
+ # get
133
+ # ----------------------------------------------------------- #
134
+ def get()
135
+ return get_block() if (!block_given?)
136
+ while (more?)
137
+ get_block().each { |uri|
138
+ yield uri
139
+ }
140
+ end
141
+ end
142
+
143
+ # ----------------------------------------------------------- #
144
+ # proxy_address
145
+ # ----------------------------------------------------------- #
146
+ def proxy_address
147
+ return @session.proxy_address
148
+ end
149
+
150
+ # ----------------------------------------------------------- #
151
+ # proxy_port
152
+ # ----------------------------------------------------------- #
153
+ def proxy_port
154
+ return @session.proxy_port
155
+ end
156
+
157
+ private
158
+ # ----------------------------------------------------------- #
159
+ #
160
+ # get_block
161
+ #
162
+ # はてなブックマークの新着エントリーに掲載された URL の
163
+ # 各種情報(URL, タイトル,日付)を取得する.
164
+ #
165
+ # ----------------------------------------------------------- #
166
+ def get_block()
167
+ result = Array.new
168
+
169
+ return result if (@path == nil)
170
+
171
+ response = @session.get(@path)
172
+ return result if (response == nil || response.code.to_i != 200)
173
+ @path = path(@path, response.body)
174
+
175
+ html = Nokogiri::HTML(response.body)
176
+
177
+ # Amazon 商品のブックマーク情報のみ構成が異なるので別メソッドで処理する.
178
+ contents = html.xpath('//ul[@class="hotentry hotasin show_detail"]/li')
179
+ return get_asin_block(contents) if (!contents.empty?)
180
+
181
+ contents = html.xpath('//ul[@class="hotentry"]/li')
182
+ contents = html.xpath('//ul[@class="videolist"]/li') if (contents.empty?)
183
+
184
+ contents.each { |node|
185
+ entry = Response.new
186
+
187
+ node.search('h3/a').each { |x|
188
+ entry.uri = x['href']
189
+ entry.title = x['title']
190
+ }
191
+
192
+ node.css('ul.entry-info li.timestamp').each { |x|
193
+ entry.date = Date.strptime(x.content, "%Y/%m/%d")
194
+ }
195
+
196
+ result.push(entry)
197
+ }
198
+
199
+ # 無限ループする場合があるので,現在のページから 1件も結果が
200
+ # 取得できないときはこれ以上の検索を止める.
201
+ @path = nil if (result.empty?)
202
+
203
+ return result
204
+ end
205
+
206
+ # ----------------------------------------------------------- #
207
+ #
208
+ # get_asin_block
209
+ #
210
+ # はてなブックマークの新着エントリーに掲載された Amazon
211
+ # 商品の各種情報(URL, タイトル,日付)を取得する.
212
+ #
213
+ # ----------------------------------------------------------- #
214
+ def get_asin_block(contents)
215
+ result = Array.new
216
+ contents.each { |node|
217
+ entry = Response.new
218
+
219
+ node.xpath('ul[@class="asin_detail"]/li/a').each { |link|
220
+ uri = URI.parse(link['href'])
221
+ if (uri.host.match(/^(?:www\.)?amazon\.(?:com|ca|co\.uk|de|co\.jp|jp|fr|cn)$/) != nil)
222
+ entry.uri = uri.host + '/gp/product/' + asin(uri)
223
+ entry.title = link.content
224
+ break
225
+ end
226
+ }
227
+ next if (entry.uri == nil)
228
+
229
+ node.css('li.asin-info-sub').each { |info|
230
+ info.content.scan(/発売日: ([0-9]+\/[0-9]+\/[0-9]+)/) { |date|
231
+ entry.date = Date.strptime(date[0], "%Y/%m/%d")
232
+ }
233
+ }
234
+
235
+ result.push(entry)
236
+ }
237
+ return result
238
+ end
239
+
240
+ # ----------------------------------------------------------- #
241
+ #
242
+ # asin
243
+ #
244
+ # パスおよびクエリーから ASIN を抽出する.推測方法は,
245
+ # /[B0123489][A-Z0-9]{9}/ にマッチする文字列を探すと言う
246
+ # 方法を採用している.
247
+ #
248
+ # ----------------------------------------------------------- #
249
+ def asin(uri)
250
+ if (uri.path != nil)
251
+ uri.path.scan(/[B0123489][A-Z0-9]{9}/) { |asin|
252
+ return asin if (asin[0].chr == 'B' || check_digit(asin))
253
+ }
254
+ end
255
+
256
+ if (uri.query != nil)
257
+ asin = uri.query.match(/[B0123489][A-Z0-9]{9}/)
258
+ return asin[0] if (asin != nil)
259
+ end
260
+
261
+ return nil
262
+ end
263
+
264
+ # ----------------------------------------------------------- #
265
+ #
266
+ # check_digit
267
+ #
268
+ # ISBN-10 のチェックディジット計算して,有効な ISBN-10 の
269
+ # 値かどうか判定する.
270
+ #
271
+ # ----------------------------------------------------------- #
272
+ def check_digit(asin)
273
+ sum = 0
274
+ (0..8).each { |i|
275
+ sum += (10 - i) * asin[i].chr.to_i
276
+ }
277
+ check = 11 - (sum % 11)
278
+ check = (check < 10) ? check.to_s : ((check == 10) ? 'X' : '0')
279
+ return check == asin[9].chr
280
+ end
281
+
282
+ # ----------------------------------------------------------- #
283
+ # basename
284
+ # ----------------------------------------------------------- #
285
+ def basename(options = {})
286
+ return '/entrylist' if (options.class != Hash)
287
+
288
+ dest = '/'
289
+ delimiter = '?'
290
+ if (options.has_key?(:type) && @@valid_types.include?(options[:type]))
291
+ dest << options[:type].to_s
292
+ if (options[:type] == :asin)
293
+ dest << '?show_detail=1'
294
+ delimiter = '&'
295
+ end
296
+ else
297
+ dest << 'entrylist'
298
+ end
299
+
300
+ if (options.has_key?(:category) && @@valid_categories.include?(options[:category]))
301
+ if (options[:type] == :hotentry && options[:category] == :general)
302
+ dest << '?mode=' << options[:category].to_s
303
+ delimiter = '&'
304
+ else
305
+ dest << '/' << options[:category].to_s
306
+ end
307
+ end
308
+
309
+ options.each { |key, value|
310
+ case
311
+ when (key == :uri)
312
+ dest << delimiter << 'url=' << CGI.escape(value)
313
+ delimiter = '&'
314
+ when (key == :sort && @@valid_sorts.include?(value))
315
+ dest << delimiter << key.to_s << '=' << value.to_s
316
+ delimiter = '&'
317
+ when (key == :threshold)
318
+ dest << delimiter << key.to_s << '=' << value.to_s
319
+ delimiter = '&'
320
+ when (key == :offset)
321
+ delimiter = '&'
322
+ when (key == :src && @@valid_sources.include?(value))
323
+ dest << delimiter << key.to_s << '=' << value.to_s
324
+ delimiter = '&'
325
+ end
326
+ }
327
+
328
+ return dest
329
+ end
330
+
331
+ # ----------------------------------------------------------- #
332
+ #
333
+ # path
334
+ #
335
+ # 取得した html の next 属性から次のページへのパスを探す.
336
+ #
337
+ # ----------------------------------------------------------- #
338
+ def path(prev, body)
339
+ link = body.scan(/<link rel="next" href="(.+?)".*?>/m).to_s
340
+ if (link.empty?) then return nil;
341
+ elsif (link[0] == 47) then return link;
342
+ elsif (link[0] == 63) then return prev.gsub(/\?.*$/, "").concat(link);
343
+ else return @basename + link;
344
+ end
345
+ end
346
+ end # URIList
347
+ end # Bookmark
348
+ end # Hatena
349
+ end # Crown
@@ -97,7 +97,6 @@ module Crown
97
97
  end
98
98
  return response
99
99
  rescue Exception => e
100
- puts(e.message)
101
100
  @http.finish if (@http.active?)
102
101
  if (n < @retry_limit)
103
102
  sleep(@retry_interval)