nicoscraper 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -3
- data/Gemfile.lock +11 -0
- data/README.md +39 -27
- data/VERSION +1 -1
- data/lib/classes/connector.rb +35 -37
- data/lib/classes/movie.rb +1 -2
- data/lib/classes/mylist.rb +2 -4
- data/lib/classes/parser.rb +6 -6
- data/lib/classes/searcher.rb +12 -11
- data/nicoscraper.gemspec +6 -2
- data/test/mylist_spec.rb +35 -0
- data/test/searcher_spec.rb +153 -75
- metadata +25 -13
data/Gemfile
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
|
-
|
3
|
-
# Example:
|
4
|
-
# gem "activesupport", ">= 2.3.5"
|
2
|
+
|
5
3
|
gem "damerau-levenshtein", ">= 0.5.3"
|
6
4
|
gem "libxml-ruby", ">= 2.2.2"
|
5
|
+
gem "mechanize", ">=2.0.0"
|
7
6
|
|
8
7
|
# Add dependencies to develop your gem here.
|
9
8
|
# Include everything needed to run rake, tests, features, etc.
|
data/Gemfile.lock
CHANGED
@@ -8,9 +8,19 @@ GEM
|
|
8
8
|
git (>= 1.2.5)
|
9
9
|
rake
|
10
10
|
libxml-ruby (2.2.2)
|
11
|
+
mechanize (2.0.1)
|
12
|
+
net-http-digest_auth (>= 1.1.1, ~> 1.1)
|
13
|
+
net-http-persistent (~> 1.8)
|
14
|
+
nokogiri (~> 1.4)
|
15
|
+
webrobots (>= 0.0.9, ~> 0.0)
|
16
|
+
net-http-digest_auth (1.1.1)
|
17
|
+
net-http-persistent (1.9)
|
18
|
+
nokogiri (1.5.0)
|
11
19
|
rake (0.8.7)
|
12
20
|
rcov (0.9.10)
|
13
21
|
shoulda (2.11.3)
|
22
|
+
webrobots (0.0.11)
|
23
|
+
nokogiri (>= 1.4.4)
|
14
24
|
|
15
25
|
PLATFORMS
|
16
26
|
ruby
|
@@ -20,6 +30,7 @@ DEPENDENCIES
|
|
20
30
|
damerau-levenshtein (>= 0.5.3)
|
21
31
|
jeweler (~> 1.6.4)
|
22
32
|
libxml-ruby (>= 2.2.2)
|
33
|
+
mechanize (>= 2.0.0)
|
23
34
|
rake (= 0.8.7)
|
24
35
|
rcov
|
25
36
|
shoulda
|
data/README.md
CHANGED
@@ -9,7 +9,7 @@ NicoScraper
|
|
9
9
|
**Author:** Masami Yonehara
|
10
10
|
**Copyright:** 2011
|
11
11
|
**License:** MIT License
|
12
|
-
**Latest Version:** 0.2.7
|
12
|
+
**Latest Version:** 0.2.7
|
13
13
|
**Release Date:** Sep 25th 2011
|
14
14
|
|
15
15
|
|
@@ -18,17 +18,24 @@ NicoScraper
|
|
18
18
|
|
19
19
|
ニコニコ動画の動画ページ、検索ページ、あるいはそのAtomフィードから情報を取得し、その情報に対して各種操作を行えます。タグやマイリスト検索結果からの抽出、および抽出結果に対する反復処理を行うメソッドも備え、ランキングサイト等の制作を支援します。
|
20
20
|
|
21
|
-
|
21
|
+
導入
|
22
22
|
------
|
23
|
-
|
23
|
+
|
24
|
+
1. インストール
|
25
|
+
|
26
|
+
動作にはRuby 1.9.2 が必要です。インストールには、
|
24
27
|
|
25
28
|
$ gem install nicoscraper
|
26
29
|
|
27
|
-
|
30
|
+
として下さい。依存関係としてMechanizeとlibxml-rubyがインストールされます。
|
31
|
+
|
32
|
+
もし`libxml2`や`libxslt`が足りない等のエラーが出た場合には、それらを導入して下さい。yumならば、
|
33
|
+
|
34
|
+
yum install -y libxml2 libxml2-devel libxslt libxslt-devel
|
28
35
|
|
29
|
-
|
36
|
+
とすれば、だいたい解決すると思います。
|
30
37
|
|
31
|
-
|
38
|
+
2. 使用法
|
32
39
|
|
33
40
|
require 'nicoscraper'
|
34
41
|
|
@@ -49,9 +56,9 @@ NicoScraper
|
|
49
56
|
movie = Nicos::Movie::new("sm1097445")
|
50
57
|
movie.getInfo
|
51
58
|
|
52
|
-
|
59
|
+
上の例のように、Movieクラスのインスタンス(以下「動画インスタンス」)を動画IDを与えて生成した後、`getInfo`メソッドを利用します。その結果、
|
53
60
|
|
54
|
-
|
61
|
+
p movie
|
55
62
|
|
56
63
|
<Nicos::Movie:0x00000002537aa8
|
57
64
|
@video_id="sm1097445",
|
@@ -85,11 +92,10 @@ NicoScraper
|
|
85
92
|
|
86
93
|
mylist = Nicos::Mylist::new("15196568")
|
87
94
|
mylist.getInfo
|
95
|
+
このように実行すると、
|
88
96
|
|
89
97
|
p mylist
|
90
98
|
|
91
|
-
このように実行すると、
|
92
|
-
|
93
99
|
<Nicos::Mylist:0x00000002884670
|
94
100
|
@mylist_id=15196568,
|
95
101
|
@movies=[
|
@@ -97,12 +103,12 @@ NicoScraper
|
|
97
103
|
@video_id="sm8481759",
|
98
104
|
@available=true,
|
99
105
|
@title="【Oblivion】おっさんの大冒険1(ゆっくり実況)",
|
100
|
-
...
|
106
|
+
... ,
|
101
107
|
#<Nicos::Movie:0x0000000251a6b0
|
102
108
|
@video_id="sm8506034",
|
103
109
|
@available=true,
|
104
110
|
@title="【Oblivion】おっさんの大冒険2(ゆっくり実況)",
|
105
|
-
...
|
111
|
+
... ,
|
106
112
|
],
|
107
113
|
@available=true,
|
108
114
|
@title="【Oblivion】おっさんの大冒険",
|
@@ -119,16 +125,16 @@ NicoScraper
|
|
119
125
|
|
120
126
|
t = Time.now
|
121
127
|
tda = Date::new(t.year, t.month, t.day) - 3
|
122
|
-
threeDaysAgo = Time.local(
|
128
|
+
threeDaysAgo = Time.local(tda.year, tda.month, tda.day, 0, 0, 0).to_i
|
123
129
|
|
124
130
|
searchByTag = Nicos::Searcher::ByTag.new()
|
125
131
|
searchByTag.execute(
|
126
132
|
'VOCALOID',
|
127
133
|
'post_new'
|
128
|
-
) { |result,
|
134
|
+
) { |result, status|
|
129
135
|
terminate = false
|
130
136
|
|
131
|
-
result.each { |movie|
|
137
|
+
result.each { |movie| # first_retrieve == 投稿日
|
132
138
|
terminate = ( movie.first_retrieve <= threeDaysAgo )
|
133
139
|
|
134
140
|
puts movie.title +
|
@@ -139,13 +145,13 @@ NicoScraper
|
|
139
145
|
if terminate
|
140
146
|
puts "loop terminated."
|
141
147
|
else
|
142
|
-
"continue"
|
148
|
+
"continue" # "continue"を返すと検索を継続
|
143
149
|
end
|
144
150
|
}
|
145
151
|
|
146
152
|
この例では、`VOCALOID`というタグの付く動画を、`post_new`=投稿日時が新しい順からさかのぼって取得し、取得した動画の日付が3日前の0時0分を超えるまでそれを続けます。
|
147
153
|
|
148
|
-
ブロック内の第1引数には取得結果に基づく動画インスタンスが与えられるのですが、これは32個分の配列です。なぜ32個のセットなのかと言うと、ご存知のようにニコニコ動画の検索画面はページで区切られており、Searcherモジュールの各メソッドはページ毎に情報を取得し、ページ単位でブロックをコールするからです。HTMLから取得するにしろAtomフィードから取得するにしろ、1ページに32
|
154
|
+
ブロック内の第1引数には取得結果に基づく動画インスタンスが与えられるのですが、これは32個分の配列です。なぜ32個のセットなのかと言うと、ご存知のようにニコニコ動画の検索画面はページで区切られており、Searcherモジュールの各メソッドはページ毎に情報を取得し、ページ単位でブロックをコールするからです。HTMLから取得するにしろAtomフィードから取得するにしろ、1ページに32個の動画情報が含まれています。第2引数にはそれまでのアクセス成否情報等が渡されます。
|
149
155
|
|
150
156
|
そして、**ブロック内で`"continue"`の文字列を返すことによりスクレイプが継続します。**つまり、`"continue"`文字列を返し続けるロジックを組み込まないと、1ページ目を読んだ時点で処理が終了します。これは意図せざる過剰アクセスを防ぐための措置です。
|
151
157
|
|
@@ -307,8 +313,24 @@ Mylistクラスのインスタンス
|
|
307
313
|
動画に与えられるもう一つの一意なIDであり、投稿日時と同じか非常に近いUNIX時間になっている。例えば、"【初音ミク】みくみくにしてあげる♪【してやんよ】"の動画IDは`sm1097445`であり、アイテムIDは`1190218917`である。このアイテムIDを日時に直すと、日本時間における2007年9月20日 1:21:57となるが、動画に投稿日時として表示されるのは、2007年9月20日 1:22:02である。
|
308
314
|
|
309
315
|
|
316
|
+
###要望、バグ報告について
|
317
|
+
以下のどちらかにお願いします。
|
318
|
+
|
319
|
+
+ zeitdiebe@gmail.com
|
320
|
+
|
321
|
+
+ http://twitter.com/h_demon
|
322
|
+
|
323
|
+
GitHubを経由して下さってもいいのですが、まだ慣れていないので対応が遅れるかもしれません。
|
324
|
+
|
325
|
+
|
310
326
|
###更新履歴
|
311
327
|
|
328
|
+
**v 0.2.8**
|
329
|
+
|
330
|
+
+ MylistAtomパーサが再生数等を認識しない問題を修正。
|
331
|
+
|
332
|
+
+ Searcherメソッドのブロック内の第2仮引数を、それまでのアクセス成否情報を含むハッシュオブジェクトに変更。
|
333
|
+
|
312
334
|
**v 0.2.7**
|
313
335
|
|
314
336
|
+ ドキュメントのSearcherモジュールの例の誤り、その他細かい点を訂正。
|
@@ -347,13 +369,3 @@ Mylistクラスのインスタンス
|
|
347
369
|
+ シリーズ性判定の強化。説明文中にある「次 sm***」等の表記を解析し、マイリストに頼らずにシリーズ性を判定するようにする。
|
348
370
|
|
349
371
|
+ コミュニティ動画、限定公開動画・マイリストへの対応。
|
350
|
-
|
351
|
-
|
352
|
-
###要望、バグ報告について
|
353
|
-
以下のどちらかにお願いします。
|
354
|
-
|
355
|
-
+ zeitdiebe@gmail.com
|
356
|
-
|
357
|
-
+ http://twitter.com/h_demon
|
358
|
-
|
359
|
-
GitHubを経由して下さってもいいのですが、まだ慣れていないので対応が遅れるかもしれません。
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.8
|
data/lib/classes/connector.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$:.unshift File.dirname(__FILE__)
|
3
3
|
|
4
4
|
require 'rubygems'
|
5
|
-
require 'ruby-debug'
|
6
5
|
require 'net/http'
|
7
6
|
|
8
7
|
module Nicos
|
@@ -11,37 +10,44 @@ module Nicos
|
|
11
10
|
def initialize
|
12
11
|
# デフォルトのウェイト設定
|
13
12
|
@seqTime = 0
|
14
|
-
@result = {
|
13
|
+
@result = {
|
14
|
+
"notPublic" => [],
|
15
|
+
"limInCommunity" => [],
|
16
|
+
"notFound" => [],
|
17
|
+
"deleted" => [],
|
18
|
+
"succeededNum" => 0
|
19
|
+
}
|
15
20
|
@waitConfig = @@waitConfig
|
16
21
|
end
|
17
22
|
attr_accessor :waitConfig
|
23
|
+
attr_accessor :result
|
18
24
|
|
19
25
|
private
|
20
26
|
|
21
27
|
def notPublic
|
22
28
|
# マイリスト非公開のときに403になる。後で専用の処理を入れるべき。
|
23
29
|
puts "This movie/mylist is not public."
|
24
|
-
@result
|
25
|
-
return { "order" => "
|
30
|
+
@result["notPublic"].push(@nowAccess)
|
31
|
+
return { "order" => "skip" }
|
26
32
|
end
|
27
33
|
|
28
34
|
def limInCommunity
|
29
35
|
puts "This movie/mylist is limited in comunity members."
|
30
36
|
# ex. item_id -> 1294702905
|
31
|
-
@result
|
32
|
-
return { "order" => "
|
37
|
+
@result["limInCommunity"].push(@nowAccess)
|
38
|
+
return { "order" => "skip" }
|
33
39
|
end
|
34
40
|
|
35
41
|
def notFound
|
36
42
|
puts "This movie/mylist is not found."
|
37
|
-
@result
|
38
|
-
return { "order" => "
|
43
|
+
@result["notFound"].push(@nowAccess)
|
44
|
+
return { "order" => "skip" }
|
39
45
|
end
|
40
46
|
|
41
47
|
def deleted
|
42
48
|
puts "This movie/mylist is deleted."
|
43
|
-
@result
|
44
|
-
return { "order" => "
|
49
|
+
@result["deleted"].push(@nowAccess)
|
50
|
+
return { "order" => "skip" }
|
45
51
|
end
|
46
52
|
|
47
53
|
def deniedSeqReq
|
@@ -72,7 +78,14 @@ module Nicos
|
|
72
78
|
return { "order" => "retry" }
|
73
79
|
end
|
74
80
|
|
75
|
-
def
|
81
|
+
def reachedLast
|
82
|
+
puts "Reached the last page."
|
83
|
+
@result = "reachedLast"
|
84
|
+
return { "order" => "terminate" }
|
85
|
+
end
|
86
|
+
|
87
|
+
def succeeded(resBody)
|
88
|
+
@result["succeededNum"] += 1
|
76
89
|
sleep @waitConfig["each"]
|
77
90
|
@seqTime += 1
|
78
91
|
|
@@ -80,7 +93,7 @@ module Nicos
|
|
80
93
|
sleep @waitConfig["afterSeq"]
|
81
94
|
@seqTime = 0
|
82
95
|
end
|
83
|
-
return { "order" => "
|
96
|
+
return { "order" => "afterTheSuccess", "body" => resBody }
|
84
97
|
end
|
85
98
|
|
86
99
|
def wait(status)
|
@@ -100,6 +113,7 @@ module Nicos
|
|
100
113
|
Net::HTTP.start(host, 80) { |http|
|
101
114
|
response = http.get(entity, HEADER)
|
102
115
|
}
|
116
|
+
@nowAccess = host + entity
|
103
117
|
|
104
118
|
rescue => e
|
105
119
|
puts e
|
@@ -122,8 +136,7 @@ module Nicos
|
|
122
136
|
else
|
123
137
|
unknownError
|
124
138
|
end
|
125
|
-
end until res["order"]
|
126
|
-
res["order"] == "terminate"
|
139
|
+
end until res["order"] != "retry"
|
127
140
|
|
128
141
|
res
|
129
142
|
end
|
@@ -138,35 +151,20 @@ module Nicos
|
|
138
151
|
end
|
139
152
|
|
140
153
|
def reviewRes(resBody)
|
154
|
+
resBody = resBody.force_encoding("UTF-8")
|
141
155
|
if # アクセス集中時
|
142
|
-
/大変ご迷惑をおかけいたしますが、しばらく時間をあけてから再度検索いただくようご協力をお願いいたします。/ =~
|
143
|
-
|
144
|
-
then
|
156
|
+
/大変ご迷惑をおかけいたしますが、しばらく時間をあけてから再度検索いただくようご協力をお願いいたします。/ =~
|
157
|
+
resBody then
|
145
158
|
serverIsBusy
|
159
|
+
elsif /\<entry\>/ =~ resBody && /\<\/entry\>/ =~ resBody
|
160
|
+
succeeded(resBody)
|
146
161
|
else
|
147
|
-
|
162
|
+
reachedLast
|
148
163
|
end
|
149
164
|
end
|
150
165
|
end
|
151
166
|
|
152
|
-
class TagAtom <
|
153
|
-
private
|
154
|
-
|
155
|
-
def forbidden
|
156
|
-
# マイリストが非公開の場合、html/Atomのどちらへのリクエストであっても、403が返ってくる。
|
157
|
-
notPublic
|
158
|
-
end
|
159
|
-
|
160
|
-
def reviewRes(resBody)
|
161
|
-
if # アクセス集中時
|
162
|
-
/大変ご迷惑をおかけいたしますが、しばらく時間をあけてから再度検索いただくようご協力をお願いいたします。/ =~
|
163
|
-
resBody.force_encoding("UTF-8")
|
164
|
-
then
|
165
|
-
serverIsBusy
|
166
|
-
else
|
167
|
-
success(resBody)
|
168
|
-
end
|
169
|
-
end
|
167
|
+
class TagAtom < MylistAtom
|
170
168
|
end
|
171
169
|
|
172
170
|
class GetThumbInfo < Xml
|
@@ -187,7 +185,7 @@ module Nicos
|
|
187
185
|
serverIsBusy
|
188
186
|
end
|
189
187
|
else
|
190
|
-
|
188
|
+
succeeded(resBody)
|
191
189
|
end
|
192
190
|
end
|
193
191
|
end
|
data/lib/classes/movie.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$:.unshift File.dirname(__FILE__)
|
3
3
|
|
4
4
|
require 'rubygems'
|
5
|
-
require 'ruby-debug'
|
6
5
|
require 'damerau-levenshtein'
|
7
6
|
require 'kconv'
|
8
7
|
|
@@ -126,7 +125,7 @@ module Nicos
|
|
126
125
|
result = con.get(host, entity)
|
127
126
|
|
128
127
|
if
|
129
|
-
result["order"] == "
|
128
|
+
result["order"] == "afterTheSuccess"
|
130
129
|
then
|
131
130
|
parsed = Nicos::Parser::getThumbInfo(result["body"])
|
132
131
|
set(parsed)
|
data/lib/classes/mylist.rb
CHANGED
@@ -153,10 +153,8 @@ module Nicos
|
|
153
153
|
puts @mylist_id
|
154
154
|
entity = '/mylist/' + @mylist_id.to_s + '?rss=atom&numbers=1'
|
155
155
|
result = con.get(host, entity)
|
156
|
-
|
157
|
-
if
|
158
|
-
result["order"] == "success"
|
159
|
-
then
|
156
|
+
|
157
|
+
if result["order"] == "afterTheSuccess"
|
160
158
|
parsed = Nicos::Parser::mylistAtom(result["body"])
|
161
159
|
|
162
160
|
parsed["entry"].each { |e|
|
data/lib/classes/parser.rb
CHANGED
@@ -204,16 +204,16 @@ module Nicos
|
|
204
204
|
/(<p\sclass\=\"nico-info-length\"\>)([^\<]{1,})/ =~ html
|
205
205
|
length = $2
|
206
206
|
|
207
|
-
/(<
|
208
|
-
first_retrieve = $2
|
207
|
+
/(<strong\sclass\=\"nico-info-date\"\>)([^\<]{1,})/ =~ html
|
208
|
+
first_retrieve = Nicos::Converter.japToUnix($2)
|
209
209
|
|
210
|
-
/(<
|
210
|
+
/(<strong\sclass\=\"nico-numbers-view\"\>)([^\<]{1,})/ =~ html
|
211
211
|
view = $2
|
212
212
|
|
213
|
-
/(<
|
213
|
+
/(<strong\sclass\=\"nico-numbers-res\"\>)([^\<]{1,})/ =~ html
|
214
214
|
res = $2
|
215
215
|
|
216
|
-
/(<
|
216
|
+
/(<strong\sclass\=\"nico-numbers-mylist\"\>)([^\<]{1,})/ =~ html
|
217
217
|
mylist = $2
|
218
218
|
|
219
219
|
parsed["entry"][n]["memo"] = memo
|
@@ -227,7 +227,7 @@ module Nicos
|
|
227
227
|
end
|
228
228
|
end
|
229
229
|
end
|
230
|
-
|
230
|
+
|
231
231
|
doc.close
|
232
232
|
parsed
|
233
233
|
end
|
data/lib/classes/searcher.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
$:.unshift File.dirname(__FILE__)
|
3
3
|
|
4
4
|
require 'rubygems'
|
5
|
-
require 'ruby-debug'
|
6
5
|
require 'time'
|
7
6
|
require 'mechanize'
|
8
7
|
require 'kconv'
|
@@ -15,7 +14,7 @@ module Nicos
|
|
15
14
|
class ByTagSuper < Nicos::Connector::Config
|
16
15
|
private
|
17
16
|
|
18
|
-
def get(tag, sort,
|
17
|
+
def get(tag, sort, method)
|
19
18
|
paramAry = []
|
20
19
|
|
21
20
|
case sort
|
@@ -45,9 +44,9 @@ module Nicos
|
|
45
44
|
sortStr = 'sort=l&order=a'
|
46
45
|
end
|
47
46
|
|
48
|
-
paramAry.push("page=#{page}") if page != 1
|
47
|
+
paramAry.push("page=#{@page}") if @page != 1
|
49
48
|
paramAry.push(sortStr)
|
50
|
-
|
49
|
+
paramAry.push("rss=atom&numbers=1") if method == "atom"
|
51
50
|
param = tag + "?" + paramAry.join('&')
|
52
51
|
|
53
52
|
host = 'www.nicovideo.jp'
|
@@ -57,20 +56,18 @@ module Nicos
|
|
57
56
|
end
|
58
57
|
|
59
58
|
def loop(tag, sort, method, &block)
|
60
|
-
|
61
|
-
page = 1
|
59
|
+
@page = 1
|
62
60
|
movieObjAry = []
|
63
|
-
order
|
61
|
+
order = ""
|
64
62
|
|
65
63
|
begin
|
66
64
|
response = get(
|
67
65
|
tag,
|
68
66
|
sort,
|
69
|
-
page,
|
70
67
|
method
|
71
68
|
)
|
72
69
|
|
73
|
-
if response["order"] == "
|
70
|
+
if response["order"] == "afterTheSuccess"
|
74
71
|
result = parse(response["body"])
|
75
72
|
result.each { |each|
|
76
73
|
movie = Nicos::Movie.new(each["video_id"])
|
@@ -78,10 +75,14 @@ module Nicos
|
|
78
75
|
movie.set(each)
|
79
76
|
movieObjAry.push(movie)
|
80
77
|
}
|
78
|
+
elsif response["order"] == "terminate"
|
79
|
+
puts "Request loop terminated."
|
80
|
+
break
|
81
81
|
end
|
82
82
|
|
83
|
-
|
84
|
-
|
83
|
+
status = {"page" => @page, "results" => @connector.result}
|
84
|
+
order = block.call(movieObjAry, status)
|
85
|
+
@page += 1
|
85
86
|
end until order != "continue"
|
86
87
|
end
|
87
88
|
|
data/nicoscraper.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{nicoscraper}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.8"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = [%q{Masami Yonehara}]
|
12
|
-
s.date = %q{2011-09-
|
12
|
+
s.date = %q{2011-09-29}
|
13
13
|
s.description = %q{It scrape movies and mylists of Niconico douga.
|
14
14
|
}
|
15
15
|
s.email = %q{zeitdiebe@gmail.com}
|
@@ -38,6 +38,7 @@ Gem::Specification.new do |s|
|
|
38
38
|
"lib/nicoscraper.rb",
|
39
39
|
"nicoscraper.gemspec",
|
40
40
|
"test/movie_spec.rb",
|
41
|
+
"test/mylist_spec.rb",
|
41
42
|
"test/searcher_spec.rb"
|
42
43
|
]
|
43
44
|
s.homepage = %q{http://github.com/hdemon/nicoscraper}
|
@@ -52,6 +53,7 @@ Gem::Specification.new do |s|
|
|
52
53
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
53
54
|
s.add_runtime_dependency(%q<damerau-levenshtein>, [">= 0.5.3"])
|
54
55
|
s.add_runtime_dependency(%q<libxml-ruby>, [">= 2.2.2"])
|
56
|
+
s.add_runtime_dependency(%q<mechanize>, [">= 2.0.0"])
|
55
57
|
s.add_development_dependency(%q<rake>, ["= 0.8.7"])
|
56
58
|
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
57
59
|
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
@@ -60,6 +62,7 @@ Gem::Specification.new do |s|
|
|
60
62
|
else
|
61
63
|
s.add_dependency(%q<damerau-levenshtein>, [">= 0.5.3"])
|
62
64
|
s.add_dependency(%q<libxml-ruby>, [">= 2.2.2"])
|
65
|
+
s.add_dependency(%q<mechanize>, [">= 2.0.0"])
|
63
66
|
s.add_dependency(%q<rake>, ["= 0.8.7"])
|
64
67
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
65
68
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
@@ -69,6 +72,7 @@ Gem::Specification.new do |s|
|
|
69
72
|
else
|
70
73
|
s.add_dependency(%q<damerau-levenshtein>, [">= 0.5.3"])
|
71
74
|
s.add_dependency(%q<libxml-ruby>, [">= 2.2.2"])
|
75
|
+
s.add_dependency(%q<mechanize>, [">= 2.0.0"])
|
72
76
|
s.add_dependency(%q<rake>, ["= 0.8.7"])
|
73
77
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
74
78
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
data/test/mylist_spec.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
#$:.unshift File.dirname(__FILE__) + "/../lib"
|
3
|
+
|
4
|
+
require '../lib/nicoscraper.rb'
|
5
|
+
|
6
|
+
describe Nicos::Movie, "After executiton of 'getInfo' method" do
|
7
|
+
before(:all) do
|
8
|
+
@mylist = Nicos::Mylist.new(15196568)
|
9
|
+
@mylist.getInfo
|
10
|
+
p @mylist
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should have following values" do
|
14
|
+
@mylist.available.should be_true
|
15
|
+
|
16
|
+
@mylist.mylist_id.should === 15196568
|
17
|
+
@mylist.movies .should be_instance_of(Array)
|
18
|
+
|
19
|
+
movieObj = @mylist.movies[0]
|
20
|
+
movieObj.available .should be_true
|
21
|
+
|
22
|
+
movieObj.video_id .should_not be_nil
|
23
|
+
movieObj.title .should_not be_nil
|
24
|
+
movieObj.first_retrieve .should_not be_nil
|
25
|
+
#movieObj.memo .should_not be_nil
|
26
|
+
movieObj.description .should_not be_nil
|
27
|
+
movieObj.thumbnail_url.should_not be_nil
|
28
|
+
movieObj.length .should_not be_nil
|
29
|
+
|
30
|
+
movieObj.view_counter .should_not be_nil
|
31
|
+
movieObj.comment_num .should_not be_nil
|
32
|
+
movieObj.mylist_counter.should_not be_nil
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
data/test/searcher_spec.rb
CHANGED
@@ -3,80 +3,6 @@
|
|
3
3
|
|
4
4
|
require '../lib/nicoscraper.rb'
|
5
5
|
|
6
|
-
describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
|
7
|
-
"and return a string except \"continue\" in this block" do
|
8
|
-
before(:all) do
|
9
|
-
searcher = Nicos::Searcher::ByTag.new()
|
10
|
-
@count = 0
|
11
|
-
|
12
|
-
searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
|
13
|
-
@count += 1
|
14
|
-
"not continue"
|
15
|
-
}
|
16
|
-
end
|
17
|
-
|
18
|
-
it "should end only one access." do
|
19
|
-
@count.should == 1
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
|
24
|
-
"and return a string except \"continue\" in this block" do
|
25
|
-
before(:all) do
|
26
|
-
searcher = Nicos::Searcher::ByTag.new()
|
27
|
-
@count = 0
|
28
|
-
|
29
|
-
searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
|
30
|
-
@count += 1
|
31
|
-
nil
|
32
|
-
}
|
33
|
-
end
|
34
|
-
|
35
|
-
it "should end only one access." do
|
36
|
-
@count.should == 1
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
|
41
|
-
"passing following argument" do
|
42
|
-
before(:all) do
|
43
|
-
searcher = Nicos::Searcher::ByTag.new()
|
44
|
-
count = 0
|
45
|
-
|
46
|
-
searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
|
47
|
-
@result = result
|
48
|
-
|
49
|
-
count += 1
|
50
|
-
puts count
|
51
|
-
"continue" unless count >= 3
|
52
|
-
}
|
53
|
-
puts "end"
|
54
|
-
end
|
55
|
-
|
56
|
-
it "should have Array of movie objects." do
|
57
|
-
@result .should be_kind_of(Array)
|
58
|
-
@result[0].should be_instance_of(Nicos::Movie)
|
59
|
-
end
|
60
|
-
|
61
|
-
it "should contains movie objects that have following structure." do
|
62
|
-
@result[0].available .should be_true
|
63
|
-
|
64
|
-
@result[0].video_id .should_not be_nil
|
65
|
-
@result[0].title .should_not be_nil
|
66
|
-
@result[0].create_time .should_not be_nil
|
67
|
-
@result[0].update_time .should_not be_nil
|
68
|
-
#@result[0].memo .should_not be_nil
|
69
|
-
@result[0].description .should_not be_nil
|
70
|
-
@result[0].thumbnail_url.should_not be_nil
|
71
|
-
@result[0].create_time .should_not be_nil
|
72
|
-
@result[0].update_time .should_not be_nil
|
73
|
-
@result[0].length .should_not be_nil
|
74
|
-
|
75
|
-
@result[0].view_counter .should_not be_nil
|
76
|
-
@result[0].comment_num .should_not be_nil
|
77
|
-
@result[0].mylist_counter.should_not be_nil
|
78
|
-
end
|
79
|
-
end
|
80
6
|
|
81
7
|
describe "When execute 'Nicos::Connector::setWait" do
|
82
8
|
before(:all) do
|
@@ -204,4 +130,156 @@ describe "When execute 'Nicos::Connector::setWait" do
|
|
204
130
|
@c2.waitConfig["timedOut"]["wait"]
|
205
131
|
.should == 10
|
206
132
|
end
|
207
|
-
end
|
133
|
+
end
|
134
|
+
|
135
|
+
describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
|
136
|
+
"and return a string except \"continue\" in this block" do
|
137
|
+
before(:all) do
|
138
|
+
wait = {
|
139
|
+
'seqAccLimit' => 0, # 連続してリクエストする回数
|
140
|
+
'afterSeq' => 0, # 連続リクエスト後のウェイト(以下全て単位は秒)
|
141
|
+
'each' => 0, # 連続リクエスト時の、1リクエスト毎のウェイト
|
142
|
+
|
143
|
+
'increment' => 0, # アクセス拒絶時の、次回以降の1リクエスト毎のウェイトの増加量
|
144
|
+
|
145
|
+
'deniedSeqReq'=> { # 連続アクセス拒絶時
|
146
|
+
'retryLimit' => 3, # 再試行回数の上限
|
147
|
+
'wait' => 120 # 再試行までのウェイト
|
148
|
+
},
|
149
|
+
|
150
|
+
'serverIsBusy'=> { # サーバ混雑時
|
151
|
+
'retryLimit' => 3,
|
152
|
+
'wait' => 120
|
153
|
+
},
|
154
|
+
|
155
|
+
'serviceUnavailable' => { # 503時
|
156
|
+
'retryLimit' => 3,
|
157
|
+
'wait' => 120
|
158
|
+
},
|
159
|
+
|
160
|
+
'timedOut' => { # タイムアウト時
|
161
|
+
'retryLimit' => 3,
|
162
|
+
'wait' => 10
|
163
|
+
}
|
164
|
+
}
|
165
|
+
Nicos::Connector::Config::setWait(wait)
|
166
|
+
|
167
|
+
searcher = Nicos::Searcher::ByTag.new()
|
168
|
+
@count = 0
|
169
|
+
|
170
|
+
searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
|
171
|
+
@count += 1
|
172
|
+
"not continue"
|
173
|
+
}
|
174
|
+
end
|
175
|
+
|
176
|
+
it "should end only one access." do
|
177
|
+
@count.should == 1
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
|
182
|
+
"and return a string except \"continue\" in this block" do
|
183
|
+
before(:all) do
|
184
|
+
searcher = Nicos::Searcher::ByTag.new()
|
185
|
+
@count = 0
|
186
|
+
|
187
|
+
searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
|
188
|
+
@count += 1
|
189
|
+
nil
|
190
|
+
}
|
191
|
+
end
|
192
|
+
|
193
|
+
it "should end only one access." do
|
194
|
+
@count.should == 1
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
|
199
|
+
"passing following argument" do
|
200
|
+
before(:all) do
|
201
|
+
searcher = Nicos::Searcher::ByTag.new()
|
202
|
+
count = 0
|
203
|
+
|
204
|
+
searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result, status|
|
205
|
+
@result = result
|
206
|
+
@status = status
|
207
|
+
p @status
|
208
|
+
count += 1
|
209
|
+
"continue" unless count >= 3
|
210
|
+
}
|
211
|
+
puts "end"
|
212
|
+
end
|
213
|
+
|
214
|
+
it "should have Array of movie objects." do
|
215
|
+
@result .should be_kind_of(Array)
|
216
|
+
@result[0].should be_instance_of(Nicos::Movie)
|
217
|
+
end
|
218
|
+
|
219
|
+
it "should contains movie objects that have following structure." do
|
220
|
+
@result[0].available .should be_true
|
221
|
+
|
222
|
+
@result[0].video_id .should_not be_nil
|
223
|
+
@result[0].title .should_not be_nil
|
224
|
+
@result[0].create_time .should_not be_nil
|
225
|
+
@result[0].update_time .should_not be_nil
|
226
|
+
#@result[0].memo .should_not be_nil
|
227
|
+
@result[0].description .should_not be_nil
|
228
|
+
@result[0].thumbnail_url.should_not be_nil
|
229
|
+
@result[0].create_time .should_not be_nil
|
230
|
+
@result[0].update_time .should_not be_nil
|
231
|
+
@result[0].length .should_not be_nil
|
232
|
+
|
233
|
+
@result[0].view_counter .should_not be_nil
|
234
|
+
@result[0].comment_num .should_not be_nil
|
235
|
+
@result[0].mylist_counter.should_not be_nil
|
236
|
+
end
|
237
|
+
|
238
|
+
it "should contains movie objects that have following structure." do
|
239
|
+
@status["results"]["notPublic"] .should be_instance_of(Array)
|
240
|
+
@status["results"]["limInCommunity"].should be_instance_of(Array)
|
241
|
+
@status["results"]["notFound"] .should be_instance_of(Array)
|
242
|
+
@status["results"]["deleted"] .should be_instance_of(Array)
|
243
|
+
@status["results"]["succeededNum"] .should >= 0
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
|
248
|
+
"and searcher reaches the last page." do
|
249
|
+
before(:all) do
|
250
|
+
searcher = Nicos::Searcher::ByTag.new()
|
251
|
+
count = 0
|
252
|
+
|
253
|
+
searcher.execute("アヤックス", "post_old") { |result, status|
|
254
|
+
@result = result
|
255
|
+
|
256
|
+
count += 1
|
257
|
+
puts count
|
258
|
+
"continue"
|
259
|
+
}
|
260
|
+
end
|
261
|
+
|
262
|
+
it "should have Array of movie objects." do
|
263
|
+
@result .should be_kind_of(Array)
|
264
|
+
@result[0].should be_instance_of(Nicos::Movie)
|
265
|
+
end
|
266
|
+
|
267
|
+
it "should contains movie objects that have following structure." do
|
268
|
+
@result[0].available .should be_true
|
269
|
+
|
270
|
+
@result[0].video_id .should_not be_nil
|
271
|
+
@result[0].title .should_not be_nil
|
272
|
+
@result[0].create_time .should_not be_nil
|
273
|
+
@result[0].update_time .should_not be_nil
|
274
|
+
#@result[0].memo .should_not be_nil
|
275
|
+
@result[0].description .should_not be_nil
|
276
|
+
@result[0].thumbnail_url.should_not be_nil
|
277
|
+
@result[0].create_time .should_not be_nil
|
278
|
+
@result[0].update_time .should_not be_nil
|
279
|
+
@result[0].length .should_not be_nil
|
280
|
+
|
281
|
+
@result[0].view_counter .should_not be_nil
|
282
|
+
@result[0].comment_num .should_not be_nil
|
283
|
+
@result[0].mylist_counter.should_not be_nil
|
284
|
+
end
|
285
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: nicoscraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.2.
|
5
|
+
version: 0.2.8
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Masami Yonehara
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-09-
|
13
|
+
date: 2011-09-29 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: damerau-levenshtein
|
@@ -35,8 +35,19 @@ dependencies:
|
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: *id002
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
|
-
name:
|
38
|
+
name: mechanize
|
39
39
|
requirement: &id003 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: 2.0.0
|
45
|
+
type: :runtime
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *id003
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: rake
|
50
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
40
51
|
none: false
|
41
52
|
requirements:
|
42
53
|
- - "="
|
@@ -44,10 +55,10 @@ dependencies:
|
|
44
55
|
version: 0.8.7
|
45
56
|
type: :development
|
46
57
|
prerelease: false
|
47
|
-
version_requirements: *
|
58
|
+
version_requirements: *id004
|
48
59
|
- !ruby/object:Gem::Dependency
|
49
60
|
name: shoulda
|
50
|
-
requirement: &
|
61
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
51
62
|
none: false
|
52
63
|
requirements:
|
53
64
|
- - ">="
|
@@ -55,10 +66,10 @@ dependencies:
|
|
55
66
|
version: "0"
|
56
67
|
type: :development
|
57
68
|
prerelease: false
|
58
|
-
version_requirements: *
|
69
|
+
version_requirements: *id005
|
59
70
|
- !ruby/object:Gem::Dependency
|
60
71
|
name: bundler
|
61
|
-
requirement: &
|
72
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
62
73
|
none: false
|
63
74
|
requirements:
|
64
75
|
- - ~>
|
@@ -66,10 +77,10 @@ dependencies:
|
|
66
77
|
version: 1.0.0
|
67
78
|
type: :development
|
68
79
|
prerelease: false
|
69
|
-
version_requirements: *
|
80
|
+
version_requirements: *id006
|
70
81
|
- !ruby/object:Gem::Dependency
|
71
82
|
name: jeweler
|
72
|
-
requirement: &
|
83
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
73
84
|
none: false
|
74
85
|
requirements:
|
75
86
|
- - ~>
|
@@ -77,10 +88,10 @@ dependencies:
|
|
77
88
|
version: 1.6.4
|
78
89
|
type: :development
|
79
90
|
prerelease: false
|
80
|
-
version_requirements: *
|
91
|
+
version_requirements: *id007
|
81
92
|
- !ruby/object:Gem::Dependency
|
82
93
|
name: rcov
|
83
|
-
requirement: &
|
94
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
84
95
|
none: false
|
85
96
|
requirements:
|
86
97
|
- - ">="
|
@@ -88,7 +99,7 @@ dependencies:
|
|
88
99
|
version: "0"
|
89
100
|
type: :development
|
90
101
|
prerelease: false
|
91
|
-
version_requirements: *
|
102
|
+
version_requirements: *id008
|
92
103
|
description: "It scrape movies and mylists of Niconico douga.\n "
|
93
104
|
email: zeitdiebe@gmail.com
|
94
105
|
executables: []
|
@@ -119,6 +130,7 @@ files:
|
|
119
130
|
- lib/nicoscraper.rb
|
120
131
|
- nicoscraper.gemspec
|
121
132
|
- test/movie_spec.rb
|
133
|
+
- test/mylist_spec.rb
|
122
134
|
- test/searcher_spec.rb
|
123
135
|
homepage: http://github.com/hdemon/nicoscraper
|
124
136
|
licenses:
|
@@ -133,7 +145,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
133
145
|
requirements:
|
134
146
|
- - ">="
|
135
147
|
- !ruby/object:Gem::Version
|
136
|
-
hash:
|
148
|
+
hash: -2925432120660025319
|
137
149
|
segments:
|
138
150
|
- 0
|
139
151
|
version: "0"
|