nicoscraper 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,248 @@
1
+ # -*- encoding: utf-8 -*-# -*- encoding: utf-8 -*-
2
+ $:.unshift File.dirname(__FILE__)
3
+
4
+ require 'rubygems'
5
+ require 'ruby-debug'
6
+ require 'time'
7
+ require 'mechanize'
8
+ require 'kconv'
9
+
10
+ require 'parser.rb'
11
+
12
+ module Nicos
13
+ module Searcher
14
+ class ByTagSuper < Nicos::Connector::Config
15
+ private
16
+
17
+ def get(tag, sort, page, method)
18
+ paramAry = []
19
+
20
+ case sort
21
+ when 'comment_new'
22
+ sortStr = ''
23
+ when 'comment_old'
24
+ sortStr = 'order=a'
25
+ when 'view_many'
26
+ sortStr = 'sort=v'
27
+ when 'view_few'
28
+ sortStr = 'sort=v&order=a'
29
+ when 'comment_many'
30
+ sortStr = 'sort=r'
31
+ when 'comment_few'
32
+ sortStr = 'sort=r&order=a'
33
+ when 'mylist_many'
34
+ sortStr = 'sort=m'
35
+ when 'mylist_few'
36
+ sortStr = 'sort=m&order=a'
37
+ when 'post_new'
38
+ sortStr = 'sort=f'
39
+ when 'post_old'
40
+ sortStr = 'sort=f&order=a'
41
+ when 'length_long'
42
+ sortStr = 'sort=l'
43
+ when 'length_short'
44
+ sortStr = 'sort=l&order=a'
45
+ end
46
+
47
+ paramAry.push("page=#{page}") if page != 1
48
+ paramAry.push(sortStr)
49
+ if method == "atom" then paramAry.push("rss=atom&numbers=1") end
50
+ param = tag + "?" + paramAry.join('&')
51
+
52
+ host = 'www.nicovideo.jp'
53
+ entity = '/tag/' + param
54
+
55
+ @connector.get(host, entity)
56
+ end
57
+
58
+ def loop(tag, sort, method, &block)
59
+ termFlag = false
60
+ page = 1
61
+ movieObjAry = []
62
+ order = "continue"
63
+
64
+ begin
65
+ response = get(
66
+ tag,
67
+ sort,
68
+ page,
69
+ method
70
+ )
71
+
72
+ if response["order"] == "success"
73
+ result = parse(response["body"])
74
+ result.each { |each|
75
+ movie = Nicos::Movie.new(each["video_id"])
76
+ each["available"] = true
77
+ movie.set(each)
78
+ movieObjAry.push(movie)
79
+ }
80
+ end
81
+
82
+ order = block.call(movieObjAry, page)
83
+ page += 1
84
+ end until order != "continue"
85
+ end
86
+
87
+ public
88
+
89
+ include Nicos::Connector::SetWait
90
+ end
91
+
92
+ class ByTagHtml < ByTagSuper
93
+ def initialize
94
+ @numOfSearched = 32
95
+ @incrAmt = 0.2
96
+
97
+ @connector = Nicos::Connector.new('mech')
98
+
99
+ # HTML中の各パラメータの所在を示すXPath
100
+ @videoIdXP = "//div[@class='uad_thumbfrm']/table/tr/td/p/a"
101
+ @lengthXP = "//div[@class='uad_thumbfrm']/table/tr/td/p[2]/span"
102
+ @viewXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[1]/strong"
103
+ @resXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[2]/strong"
104
+ @mylistXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[3]/a/strong"
105
+ @adXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[4]/a/strong"
106
+ @waitConfig = @@waitConfig
107
+ end
108
+ attr_accessor :waitConfig
109
+
110
+ private
111
+
112
+ def parse(movieNum)
113
+ result = []
114
+
115
+ video_id = /(sm|nm)[0-9]{1,}/.match(@connector.mech.page.search(@videoIdXP)[movieNum]['href'])[0]
116
+ lengthStr = @connector.mech.page.search(@lengthXP)[movieNum].text.split(/\:/)
117
+ length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
118
+ view = @connector.mech.page.search(@viewXP)[movieNum]
119
+ .text.gsub(/\,/, '').to_i
120
+ res = @connector.mech.page.search(@resXP)[movieNum]
121
+ .text.gsub(/\,/, '').to_i
122
+ mylist = @connector.mech.page.search(@mylistXP)[movieNum]
123
+ .text.gsub(/\,/, '').to_i
124
+ ad = @connector.mech.page.search(@adXP)[movieNum]
125
+ .text.gsub(/\,/, '').to_i
126
+
127
+ result.push({
128
+ "video_id" => video_id,
129
+ "length" => length,
130
+ "view" => view,
131
+ "res" => res,
132
+ "mylist" => mylist,
133
+ "ad" => ad
134
+ })
135
+ end
136
+
137
+ public
138
+
139
+ # @param [String] tag
140
+ # @param [String] sortMethod
141
+ # @param [HashObj] waitConfig
142
+ def execute(tag, sortMethod, &block)
143
+ loop(tag, sort, "mech") { |result, page|
144
+ block.call(result, page)
145
+ }
146
+ end
147
+ end
148
+
149
+ class ByTag < ByTagSuper
150
+ def initialize
151
+ @numOfSearched = 32
152
+ @incrAmt = 0.2
153
+ @connector = Nicos::Connector::TagAtom.new()
154
+ @waitConfig = @@waitConfig
155
+ end
156
+ attr_accessor :waitConfig
157
+
158
+ private
159
+
160
+ def parse(xml)
161
+ Nicos::Parser.tagAtom(xml)
162
+ end
163
+
164
+ public
165
+
166
+ # 実行
167
+ #
168
+ # @param [String] tag 検索したいタグ文字列
169
+ # @param [String] sortMethod ソート方法
170
+ #==sortMethod: ソート方法
171
+ # *comment_new*
172
+ # コメントが新しい順
173
+ #
174
+ # *comment_old*
175
+ # コメントが新しい順
176
+ #
177
+ # *view_many*
178
+ # 再生数が多い順
179
+ #
180
+ # *view_few*
181
+ # 再生数が少ない順
182
+ #
183
+ # *comment_many*
184
+ # コメントが多い順
185
+ #
186
+ # *comment_few*
187
+ # コメントが少ない順
188
+ #
189
+ # *mylist_many*
190
+ # マイリスト登録が多い順
191
+ #
192
+ # *mylist_few*
193
+ # マイリスト登録が少ない順
194
+ #
195
+ # *post_new*
196
+ # 登録が新しい順
197
+ #
198
+ # *post_old*
199
+ # 登録が少ない順
200
+ #
201
+ # *length_long*
202
+ # 再生時間が長い順
203
+ #
204
+ # *length_short*
205
+ # 再生時間が短い順
206
+ #
207
+ # @param [HashObj] waitConfig ウェイト設定
208
+ #==waitConfig: ウェイト設定
209
+ # <b>ウェイトの変更に際しては、READMEの注意点と免責事項を事前にお読み下さい。</b>
210
+ #
211
+ # 以下のフォーマットのハッシュオブジェクトを与えて下さい。これはデフォルト設定です。
212
+ # また、ハッシュは以下のキーを全て用意する必要はありません。
213
+ # 変更したい部分のキーと値のみを持つハッシュオブジェクトを作って下さい。
214
+ #
215
+ # @waitConfig = {
216
+ # 'seqAccLimit' => 10, # 連続してリクエストする回数
217
+ # 'afterSeq' => 10, # 連続リクエスト後のウェイト(以下、単位は全て秒)
218
+ # 'each' => 1, # 連続リクエスト時の、1リクエスト毎のウェイト
219
+ # 'increment' => 1, # アクセス拒絶時の、次回以降の1リクエスト毎のウェイトの増加量
220
+ #
221
+ # 'deniedSeqReq'=> { # 連続アクセスを拒否された際の設定(以下同じ)
222
+ # 'retryLimit' => 3, # 再試行の上限回数
223
+ # 'wait' => 120 # 次のアクセスまでのウェイト
224
+ # },
225
+ #
226
+ # 'serverIsBusy'=> { # サーバ混雑時
227
+ # 'retryLimit' => 3,
228
+ # 'wait' => 120
229
+ # },
230
+ #
231
+ # 'serviceUnavailable' => { # 503が返ってきた時
232
+ # 'retryLimit' => 3,
233
+ # 'wait' => 120
234
+ # },
235
+ #
236
+ # 'timedOut' => { # タイムアウト時
237
+ # 'retryLimit' => 3,
238
+ # 'wait' => 10
239
+ # }
240
+ # }
241
+ def execute(tag, sortMethod, &block)
242
+ loop(tag, sortMethod, "atom") { |result, page|
243
+ block.call(result, page)
244
+ }
245
+ end
246
+ end
247
+ end
248
+ end
@@ -0,0 +1,15 @@
1
+ require "ruby-debug"
2
+
3
+ # mixin non destructive
4
+ def mixinND(targetObj, overWriteObj)
5
+ output = Marshal.load(Marshal.dump(targetObj))
6
+ if targetObj.instance_of?(Hash)
7
+ overWriteObj.each_key { |key|
8
+ overWriteObj[key] = mixinND(targetObj[key], overWriteObj[key])
9
+ output[key] = overWriteObj[key]
10
+ }
11
+ else
12
+ output = overWriteObj
13
+ end
14
+ return output
15
+ end
@@ -0,0 +1,63 @@
1
+ module Nicos
2
+ module Connector
3
+ class Config
4
+ @@waitConfigDefault =
5
+ @@waitConfig = {
6
+ 'seqAccLimit' => 10, # 連続してリクエストする回数
7
+ 'afterSeq' => 10, # 連続リクエスト後のウェイト(以下全て単位は秒)
8
+ 'each' => 1, # 連続リクエスト時の、1リクエスト毎のウェイト
9
+
10
+ 'increment' => 1, # アクセス拒絶時の、次回以降の1リクエスト毎のウェイトの増加量
11
+
12
+ 'deniedSeqReq'=> { # 連続アクセス拒絶時
13
+ 'retryLimit' => 3, # 再試行回数の上限
14
+ 'wait' => 120 # 再試行までのウェイト
15
+ },
16
+
17
+ 'serverIsBusy'=> { # サーバ混雑時
18
+ 'retryLimit' => 3,
19
+ 'wait' => 120
20
+ },
21
+
22
+ 'serviceUnavailable' => { # 503時
23
+ 'retryLimit' => 3,
24
+ 'wait' => 120
25
+ },
26
+
27
+ 'timedOut' => { # タイムアウト時
28
+ 'retryLimit' => 3,
29
+ 'wait' => 10
30
+ }
31
+ }
32
+
33
+ def Config.setWaitDefault(waitConfig)
34
+ @@waitConfigDefault = mixinND(
35
+ @@waitConfigDefault,
36
+ waitConfig
37
+ )
38
+ end
39
+
40
+ def Config.setWait(waitConfig)
41
+ case waitConfig
42
+ when "default"
43
+ @@waitConfig = @@waitConfigDefault
44
+ when nil
45
+ else
46
+ @@waitConfig = mixinND(
47
+ @@waitConfig,
48
+ waitConfig
49
+ )
50
+ end
51
+ end
52
+ end
53
+
54
+ module SetWait
55
+ def setWait(waitConfig)
56
+ @waitConfig = mixinND(
57
+ @waitConfig,
58
+ waitConfig
59
+ ) if waitConfig != nil
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Nicos
3
+ VERSION = "0.2.5"
4
+ REPOSITORY =
5
+ "http://github.com/hdemon/nicoscraper/"
6
+ AUTHOR = "Masami Yonehara"
7
+
8
+ # nicos.rbが存在する絶対パスを取得
9
+ ROOT = File.expand_path(File.dirname(__FILE__))
10
+
11
+ # 追加で読み込みたいファイルがあればここに記載。
12
+ # ADDON = File.join(ROOT, '', '')
13
+
14
+ #
15
+ CONFIG_DIR = File.join(ROOT, 'config')
16
+ CLASSES = File.join(ROOT, 'classes')
17
+ end
18
+
19
+ # puts Nicos::ROOT
20
+ # puts Nicos::CONFIG_DIR
21
+
22
+ # Load files.
23
+ [
24
+ Nicos::CONFIG_DIR,
25
+ Nicos::CLASSES
26
+ ].each do |path|
27
+ Dir.glob(File.join(path, '*.rb')).each do |file|
28
+ require file
29
+ # puts file
30
+ end
31
+ end
data/test/movie_spec.rb CHANGED
@@ -94,7 +94,7 @@ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
94
94
  searcher = Nicos::Searcher::ByTag.new()
95
95
  @count = 0
96
96
 
97
- searcher.execute("ゆっくり実況プレイpart1リンク", "post_old", nil) { |result|
97
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
98
98
  @count += 1
99
99
  "not continue"
100
100
  }
@@ -111,7 +111,7 @@ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
111
111
  searcher = Nicos::Searcher::ByTag.new()
112
112
  @count = 0
113
113
 
114
- searcher.execute("ゆっくり実況プレイpart1リンク", "post_old", nil) { |result|
114
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
115
115
  @count += 1
116
116
  nil
117
117
  }
@@ -128,7 +128,7 @@ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
128
128
  searcher = Nicos::Searcher::ByTag.new()
129
129
  count = 0
130
130
 
131
- searcher.execute("ゆっくり実況プレイpart1リンク", "post_old", nil) { |result|
131
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
132
132
  @result = result
133
133
 
134
134
  count += 1
@@ -161,4 +161,52 @@ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
161
161
  @result[0].comment_num .should_not be_nil
162
162
  @result[0].mylist_counter.should_not be_nil
163
163
  end
164
+ end
165
+
166
+ describe "When execute 'Nicos::Connector::setWait" do
167
+ before(:all) do
168
+ wait = {
169
+ 'seqAccLimit' => 100,
170
+
171
+ 'deniedSeqReq'=> {
172
+ 'retryLimit' => 30,
173
+ 'wait' => 1200
174
+ },
175
+
176
+ 'serverIsBusy'=> {
177
+ 'retryLimit' => 10
178
+ }
179
+ }
180
+
181
+ Nicos::Connector::Config::setWait(wait)
182
+ end
183
+
184
+ it "should have following values." do
185
+ c = Nicos::Searcher::ByTag.new()
186
+ c.waitConfig .should_not be_nil
187
+ c.waitConfig["seqAccLimit"]
188
+ .should == 100
189
+ c.waitConfig["afterSeq"]
190
+ .should == 10
191
+ c.waitConfig["each"]
192
+ .should == 1
193
+ c.waitConfig["increment"]
194
+ .should == 1
195
+ c.waitConfig["deniedSeqReq"]["retryLimit"]
196
+ .should == 30
197
+ c.waitConfig["deniedSeqReq"]["wait"]
198
+ .should == 1200
199
+ c.waitConfig["serverIsBusy"]["retryLimit"]
200
+ .should == 10
201
+ c.waitConfig["serverIsBusy"]["wait"]
202
+ .should == 120
203
+ c.waitConfig["serviceUnavailable"]["retryLimit"]
204
+ .should == 3
205
+ c.waitConfig["serviceUnavailable"]["wait"]
206
+ .should == 120
207
+ c.waitConfig["timedOut"]["retryLimit"]
208
+ .should == 3
209
+ c.waitConfig["timedOut"]["wait"]
210
+ .should == 10
211
+ end
164
212
  end
@@ -0,0 +1,207 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.unshift File.dirname(__FILE__) + "/../lib"
3
+
4
+ require 'nicoscraper'
5
+
6
+ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
7
+ "and return a string except \"continue\" in this block" do
8
+ before(:all) do
9
+ searcher = Nicos::Searcher::ByTag.new()
10
+ @count = 0
11
+
12
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
13
+ @count += 1
14
+ "not continue"
15
+ }
16
+ end
17
+
18
+ it "should end only one access." do
19
+ @count.should == 1
20
+ end
21
+ end
22
+
23
+ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
24
+ "and return a string except \"continue\" in this block" do
25
+ before(:all) do
26
+ searcher = Nicos::Searcher::ByTag.new()
27
+ @count = 0
28
+
29
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
30
+ @count += 1
31
+ nil
32
+ }
33
+ end
34
+
35
+ it "should end only one access." do
36
+ @count.should == 1
37
+ end
38
+ end
39
+
40
+ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
41
+ "passing following argument" do
42
+ before(:all) do
43
+ searcher = Nicos::Searcher::ByTag.new()
44
+ count = 0
45
+
46
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
47
+ @result = result
48
+
49
+ count += 1
50
+ puts count
51
+ "continue" unless count >= 3
52
+ }
53
+ puts "end"
54
+ end
55
+
56
+ it "should have Array of movie objects." do
57
+ @result .should be_kind_of(Array)
58
+ @result[0].should be_instance_of(Nicos::Movie)
59
+ end
60
+
61
+ it "should contains movie objects that have following structure." do
62
+ @result[0].available .should be_true
63
+
64
+ @result[0].video_id .should_not be_nil
65
+ @result[0].title .should_not be_nil
66
+ @result[0].create_time .should_not be_nil
67
+ @result[0].update_time .should_not be_nil
68
+ #@result[0].memo .should_not be_nil
69
+ @result[0].description .should_not be_nil
70
+ @result[0].thumbnail_url.should_not be_nil
71
+ @result[0].create_time .should_not be_nil
72
+ @result[0].update_time .should_not be_nil
73
+ @result[0].length .should_not be_nil
74
+
75
+ @result[0].view_counter .should_not be_nil
76
+ @result[0].comment_num .should_not be_nil
77
+ @result[0].mylist_counter.should_not be_nil
78
+ end
79
+ end
80
+
81
+ describe "When execute 'Nicos::Connector::setWait" do
82
+ before(:all) do
83
+ wait = {
84
+ 'seqAccLimit' => 100,
85
+
86
+ 'deniedSeqReq'=> {
87
+ 'retryLimit' => 30,
88
+ 'wait' => 1200
89
+ },
90
+
91
+ 'serverIsBusy'=> {
92
+ 'retryLimit' => 10
93
+ }
94
+ }
95
+
96
+ Nicos::Connector::Config::setWait(wait)
97
+ end
98
+
99
+ it "should have following values." do
100
+ c = Nicos::Searcher::ByTag.new()
101
+ c.waitConfig .should_not be_nil
102
+ c.waitConfig["seqAccLimit"]
103
+ .should == 100
104
+ c.waitConfig["afterSeq"]
105
+ .should == 10
106
+ c.waitConfig["each"]
107
+ .should == 1
108
+ c.waitConfig["increment"]
109
+ .should == 1
110
+ c.waitConfig["deniedSeqReq"]["retryLimit"]
111
+ .should == 30
112
+ c.waitConfig["deniedSeqReq"]["wait"]
113
+ .should == 1200
114
+ c.waitConfig["serverIsBusy"]["retryLimit"]
115
+ .should == 10
116
+ c.waitConfig["serverIsBusy"]["wait"]
117
+ .should == 120
118
+ c.waitConfig["serviceUnavailable"]["retryLimit"]
119
+ .should == 3
120
+ c.waitConfig["serviceUnavailable"]["wait"]
121
+ .should == 120
122
+ c.waitConfig["timedOut"]["retryLimit"]
123
+ .should == 3
124
+ c.waitConfig["timedOut"]["wait"]
125
+ .should == 10
126
+ end
127
+
128
+ after(:all) do
129
+ Nicos::Connector::Config::setWait("default")
130
+ end
131
+ end
132
+
133
+ describe "When execute 'Nicos::Connector::setWait" do
134
+ before(:all) do
135
+ wait = {
136
+ 'seqAccLimit' => 100,
137
+
138
+ 'deniedSeqReq'=> {
139
+ 'retryLimit' => 30,
140
+ 'wait' => 1200
141
+ },
142
+
143
+ 'serverIsBusy'=> {
144
+ 'retryLimit' => 10
145
+ }
146
+ }
147
+
148
+ @c1 = Nicos::Searcher::ByTag.new()
149
+ @c1.setWait(wait)
150
+
151
+ @c2 = Nicos::Searcher::ByTag.new()
152
+ end
153
+
154
+ it "should have following values." do
155
+ @c1.waitConfig .should_not be_nil
156
+ @c1.waitConfig["seqAccLimit"]
157
+ .should == 100
158
+ @c1.waitConfig["afterSeq"]
159
+ .should == 10
160
+ @c1.waitConfig["each"]
161
+ .should == 1
162
+ @c1.waitConfig["increment"]
163
+ .should == 1
164
+ @c1.waitConfig["deniedSeqReq"]["retryLimit"]
165
+ .should == 30
166
+ @c1.waitConfig["deniedSeqReq"]["wait"]
167
+ .should == 1200
168
+ @c1.waitConfig["serverIsBusy"]["retryLimit"]
169
+ .should == 10
170
+ @c1.waitConfig["serverIsBusy"]["wait"]
171
+ .should == 120
172
+ @c1.waitConfig["serviceUnavailable"]["retryLimit"]
173
+ .should == 3
174
+ @c1.waitConfig["serviceUnavailable"]["wait"]
175
+ .should == 120
176
+ @c1.waitConfig["timedOut"]["retryLimit"]
177
+ .should == 3
178
+ @c1.waitConfig["timedOut"]["wait"]
179
+ .should == 10
180
+
181
+ @c2.waitConfig .should_not be_nil
182
+ @c2.waitConfig["seqAccLimit"]
183
+ .should == 10
184
+ @c2.waitConfig["afterSeq"]
185
+ .should == 10
186
+ @c2.waitConfig["each"]
187
+ .should == 1
188
+ @c2.waitConfig["increment"]
189
+ .should == 1
190
+ @c2.waitConfig["deniedSeqReq"]["retryLimit"]
191
+ .should == 3
192
+ @c2.waitConfig["deniedSeqReq"]["wait"]
193
+ .should == 120
194
+ @c2.waitConfig["serverIsBusy"]["retryLimit"]
195
+ .should == 3
196
+ @c2.waitConfig["serverIsBusy"]["wait"]
197
+ .should == 120
198
+ @c2.waitConfig["serviceUnavailable"]["retryLimit"]
199
+ .should == 3
200
+ @c2.waitConfig["serviceUnavailable"]["wait"]
201
+ .should == 120
202
+ @c2.waitConfig["timedOut"]["retryLimit"]
203
+ .should == 3
204
+ @c2.waitConfig["timedOut"]["wait"]
205
+ .should == 10
206
+ end
207
+ end