nicoscraper 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,248 @@
1
+ # -*- encoding: utf-8 -*-# -*- encoding: utf-8 -*-
2
+ $:.unshift File.dirname(__FILE__)
3
+
4
+ require 'rubygems'
5
+ require 'ruby-debug'
6
+ require 'time'
7
+ require 'mechanize'
8
+ require 'kconv'
9
+
10
+ require 'parser.rb'
11
+
12
+ module Nicos
13
+ module Searcher
14
+ class ByTagSuper < Nicos::Connector::Config
15
+ private
16
+
17
+ def get(tag, sort, page, method)
18
+ paramAry = []
19
+
20
+ case sort
21
+ when 'comment_new'
22
+ sortStr = ''
23
+ when 'comment_old'
24
+ sortStr = 'order=a'
25
+ when 'view_many'
26
+ sortStr = 'sort=v'
27
+ when 'view_few'
28
+ sortStr = 'sort=v&order=a'
29
+ when 'comment_many'
30
+ sortStr = 'sort=r'
31
+ when 'comment_few'
32
+ sortStr = 'sort=r&order=a'
33
+ when 'mylist_many'
34
+ sortStr = 'sort=m'
35
+ when 'mylist_few'
36
+ sortStr = 'sort=m&order=a'
37
+ when 'post_new'
38
+ sortStr = 'sort=f'
39
+ when 'post_old'
40
+ sortStr = 'sort=f&order=a'
41
+ when 'length_long'
42
+ sortStr = 'sort=l'
43
+ when 'length_short'
44
+ sortStr = 'sort=l&order=a'
45
+ end
46
+
47
+ paramAry.push("page=#{page}") if page != 1
48
+ paramAry.push(sortStr)
49
+ if method == "atom" then paramAry.push("rss=atom&numbers=1") end
50
+ param = tag + "?" + paramAry.join('&')
51
+
52
+ host = 'www.nicovideo.jp'
53
+ entity = '/tag/' + param
54
+
55
+ @connector.get(host, entity)
56
+ end
57
+
58
+ def loop(tag, sort, method, &block)
59
+ termFlag = false
60
+ page = 1
61
+ movieObjAry = []
62
+ order = "continue"
63
+
64
+ begin
65
+ response = get(
66
+ tag,
67
+ sort,
68
+ page,
69
+ method
70
+ )
71
+
72
+ if response["order"] == "success"
73
+ result = parse(response["body"])
74
+ result.each { |each|
75
+ movie = Nicos::Movie.new(each["video_id"])
76
+ each["available"] = true
77
+ movie.set(each)
78
+ movieObjAry.push(movie)
79
+ }
80
+ end
81
+
82
+ order = block.call(movieObjAry, page)
83
+ page += 1
84
+ end until order != "continue"
85
+ end
86
+
87
+ public
88
+
89
+ include Nicos::Connector::SetWait
90
+ end
91
+
92
+ class ByTagHtml < ByTagSuper
93
+ def initialize
94
+ @numOfSearched = 32
95
+ @incrAmt = 0.2
96
+
97
+ @connector = Nicos::Connector.new('mech')
98
+
99
+ # HTML中の各パラメータの所在を示すXPath
100
+ @videoIdXP = "//div[@class='uad_thumbfrm']/table/tr/td/p/a"
101
+ @lengthXP = "//div[@class='uad_thumbfrm']/table/tr/td/p[2]/span"
102
+ @viewXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[1]/strong"
103
+ @resXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[2]/strong"
104
+ @mylistXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[3]/a/strong"
105
+ @adXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[4]/a/strong"
106
+ @waitConfig = @@waitConfig
107
+ end
108
+ attr_accessor :waitConfig
109
+
110
+ private
111
+
112
+ def parse(movieNum)
113
+ result = []
114
+
115
+ video_id = /(sm|nm)[0-9]{1,}/.match(@connector.mech.page.search(@videoIdXP)[movieNum]['href'])[0]
116
+ lengthStr = @connector.mech.page.search(@lengthXP)[movieNum].text.split(/\:/)
117
+ length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
118
+ view = @connector.mech.page.search(@viewXP)[movieNum]
119
+ .text.gsub(/\,/, '').to_i
120
+ res = @connector.mech.page.search(@resXP)[movieNum]
121
+ .text.gsub(/\,/, '').to_i
122
+ mylist = @connector.mech.page.search(@mylistXP)[movieNum]
123
+ .text.gsub(/\,/, '').to_i
124
+ ad = @connector.mech.page.search(@adXP)[movieNum]
125
+ .text.gsub(/\,/, '').to_i
126
+
127
+ result.push({
128
+ "video_id" => video_id,
129
+ "length" => length,
130
+ "view" => view,
131
+ "res" => res,
132
+ "mylist" => mylist,
133
+ "ad" => ad
134
+ })
135
+ end
136
+
137
+ public
138
+
139
+ # @param [String] tag
140
+ # @param [String] sortMethod
141
+ # @param [HashObj] waitConfig
142
+ def execute(tag, sortMethod, &block)
143
+ loop(tag, sort, "mech") { |result, page|
144
+ block.call(result, page)
145
+ }
146
+ end
147
+ end
148
+
149
+ class ByTag < ByTagSuper
150
+ def initialize
151
+ @numOfSearched = 32
152
+ @incrAmt = 0.2
153
+ @connector = Nicos::Connector::TagAtom.new()
154
+ @waitConfig = @@waitConfig
155
+ end
156
+ attr_accessor :waitConfig
157
+
158
+ private
159
+
160
+ def parse(xml)
161
+ Nicos::Parser.tagAtom(xml)
162
+ end
163
+
164
+ public
165
+
166
+ # 実行
167
+ #
168
+ # @param [String] tag 検索したいタグ文字列
169
+ # @param [String] sortMethod ソート方法
170
+ #==sortMethod: ソート方法
171
+ # *comment_new*
172
+ # コメントが新しい順
173
+ #
174
+ # *comment_old*
175
+ # コメントが新しい順
176
+ #
177
+ # *view_many*
178
+ # 再生数が多い順
179
+ #
180
+ # *view_few*
181
+ # 再生数が少ない順
182
+ #
183
+ # *comment_many*
184
+ # コメントが多い順
185
+ #
186
+ # *comment_few*
187
+ # コメントが少ない順
188
+ #
189
+ # *mylist_many*
190
+ # マイリスト登録が多い順
191
+ #
192
+ # *mylist_few*
193
+ # マイリスト登録が少ない順
194
+ #
195
+ # *post_new*
196
+ # 登録が新しい順
197
+ #
198
+ # *post_old*
199
+ # 登録が少ない順
200
+ #
201
+ # *length_long*
202
+ # 再生時間が長い順
203
+ #
204
+ # *length_short*
205
+ # 再生時間が短い順
206
+ #
207
+ # @param [HashObj] waitConfig ウェイト設定
208
+ #==waitConfig: ウェイト設定
209
+ # <b>ウェイトの変更に際しては、READMEの注意点と免責事項を事前にお読み下さい。</b>
210
+ #
211
+ # 以下のフォーマットのハッシュオブジェクトを与えて下さい。これはデフォルト設定です。
212
+ # また、ハッシュは以下のキーを全て用意する必要はありません。
213
+ # 変更したい部分のキーと値のみを持つハッシュオブジェクトを作って下さい。
214
+ #
215
+ # @waitConfig = {
216
+ # 'seqAccLimit' => 10, # 連続してリクエストする回数
217
+ # 'afterSeq' => 10, # 連続リクエスト後のウェイト(以下、単位は全て秒)
218
+ # 'each' => 1, # 連続リクエスト時の、1リクエスト毎のウェイト
219
+ # 'increment' => 1, # アクセス拒絶時の、次回以降の1リクエスト毎のウェイトの増加量
220
+ #
221
+ # 'deniedSeqReq'=> { # 連続アクセスを拒否された際の設定(以下同じ)
222
+ # 'retryLimit' => 3, # 再試行の上限回数
223
+ # 'wait' => 120 # 次のアクセスまでのウェイト
224
+ # },
225
+ #
226
+ # 'serverIsBusy'=> { # サーバ混雑時
227
+ # 'retryLimit' => 3,
228
+ # 'wait' => 120
229
+ # },
230
+ #
231
+ # 'serviceUnavailable' => { # 503が返ってきた時
232
+ # 'retryLimit' => 3,
233
+ # 'wait' => 120
234
+ # },
235
+ #
236
+ # 'timedOut' => { # タイムアウト時
237
+ # 'retryLimit' => 3,
238
+ # 'wait' => 10
239
+ # }
240
+ # }
241
+ def execute(tag, sortMethod, &block)
242
+ loop(tag, sortMethod, "atom") { |result, page|
243
+ block.call(result, page)
244
+ }
245
+ end
246
+ end
247
+ end
248
+ end
@@ -0,0 +1,15 @@
1
+ require "ruby-debug"
2
+
3
+ # mixin non destructive
4
+ def mixinND(targetObj, overWriteObj)
5
+ output = Marshal.load(Marshal.dump(targetObj))
6
+ if targetObj.instance_of?(Hash)
7
+ overWriteObj.each_key { |key|
8
+ overWriteObj[key] = mixinND(targetObj[key], overWriteObj[key])
9
+ output[key] = overWriteObj[key]
10
+ }
11
+ else
12
+ output = overWriteObj
13
+ end
14
+ return output
15
+ end
@@ -0,0 +1,63 @@
1
+ module Nicos
2
+ module Connector
3
+ class Config
4
+ @@waitConfigDefault =
5
+ @@waitConfig = {
6
+ 'seqAccLimit' => 10, # 連続してリクエストする回数
7
+ 'afterSeq' => 10, # 連続リクエスト後のウェイト(以下全て単位は秒)
8
+ 'each' => 1, # 連続リクエスト時の、1リクエスト毎のウェイト
9
+
10
+ 'increment' => 1, # アクセス拒絶時の、次回以降の1リクエスト毎のウェイトの増加量
11
+
12
+ 'deniedSeqReq'=> { # 連続アクセス拒絶時
13
+ 'retryLimit' => 3, # 再試行回数の上限
14
+ 'wait' => 120 # 再試行までのウェイト
15
+ },
16
+
17
+ 'serverIsBusy'=> { # サーバ混雑時
18
+ 'retryLimit' => 3,
19
+ 'wait' => 120
20
+ },
21
+
22
+ 'serviceUnavailable' => { # 503時
23
+ 'retryLimit' => 3,
24
+ 'wait' => 120
25
+ },
26
+
27
+ 'timedOut' => { # タイムアウト時
28
+ 'retryLimit' => 3,
29
+ 'wait' => 10
30
+ }
31
+ }
32
+
33
+ def Config.setWaitDefault(waitConfig)
34
+ @@waitConfigDefault = mixinND(
35
+ @@waitConfigDefault,
36
+ waitConfig
37
+ )
38
+ end
39
+
40
+ def Config.setWait(waitConfig)
41
+ case waitConfig
42
+ when "default"
43
+ @@waitConfig = @@waitConfigDefault
44
+ when nil
45
+ else
46
+ @@waitConfig = mixinND(
47
+ @@waitConfig,
48
+ waitConfig
49
+ )
50
+ end
51
+ end
52
+ end
53
+
54
+ module SetWait
55
+ def setWait(waitConfig)
56
+ @waitConfig = mixinND(
57
+ @waitConfig,
58
+ waitConfig
59
+ ) if waitConfig != nil
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+ module Nicos
3
+ VERSION = "0.2.5"
4
+ REPOSITORY =
5
+ "http://github.com/hdemon/nicoscraper/"
6
+ AUTHOR = "Masami Yonehara"
7
+
8
+ # nicos.rbが存在する絶対パスを取得
9
+ ROOT = File.expand_path(File.dirname(__FILE__))
10
+
11
+ # 追加で読み込みたいファイルがあればここに記載。
12
+ # ADDON = File.join(ROOT, '', '')
13
+
14
+ #
15
+ CONFIG_DIR = File.join(ROOT, 'config')
16
+ CLASSES = File.join(ROOT, 'classes')
17
+ end
18
+
19
+ # puts Nicos::ROOT
20
+ # puts Nicos::CONFIG_DIR
21
+
22
+ # Load files.
23
+ [
24
+ Nicos::CONFIG_DIR,
25
+ Nicos::CLASSES
26
+ ].each do |path|
27
+ Dir.glob(File.join(path, '*.rb')).each do |file|
28
+ require file
29
+ # puts file
30
+ end
31
+ end
data/test/movie_spec.rb CHANGED
@@ -94,7 +94,7 @@ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
94
94
  searcher = Nicos::Searcher::ByTag.new()
95
95
  @count = 0
96
96
 
97
- searcher.execute("ゆっくり実況プレイpart1リンク", "post_old", nil) { |result|
97
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
98
98
  @count += 1
99
99
  "not continue"
100
100
  }
@@ -111,7 +111,7 @@ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
111
111
  searcher = Nicos::Searcher::ByTag.new()
112
112
  @count = 0
113
113
 
114
- searcher.execute("ゆっくり実況プレイpart1リンク", "post_old", nil) { |result|
114
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
115
115
  @count += 1
116
116
  nil
117
117
  }
@@ -128,7 +128,7 @@ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
128
128
  searcher = Nicos::Searcher::ByTag.new()
129
129
  count = 0
130
130
 
131
- searcher.execute("ゆっくり実況プレイpart1リンク", "post_old", nil) { |result|
131
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
132
132
  @result = result
133
133
 
134
134
  count += 1
@@ -161,4 +161,52 @@ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
161
161
  @result[0].comment_num .should_not be_nil
162
162
  @result[0].mylist_counter.should_not be_nil
163
163
  end
164
+ end
165
+
166
+ describe "When execute 'Nicos::Connector::setWait" do
167
+ before(:all) do
168
+ wait = {
169
+ 'seqAccLimit' => 100,
170
+
171
+ 'deniedSeqReq'=> {
172
+ 'retryLimit' => 30,
173
+ 'wait' => 1200
174
+ },
175
+
176
+ 'serverIsBusy'=> {
177
+ 'retryLimit' => 10
178
+ }
179
+ }
180
+
181
+ Nicos::Connector::Config::setWait(wait)
182
+ end
183
+
184
+ it "should have following values." do
185
+ c = Nicos::Searcher::ByTag.new()
186
+ c.waitConfig .should_not be_nil
187
+ c.waitConfig["seqAccLimit"]
188
+ .should == 100
189
+ c.waitConfig["afterSeq"]
190
+ .should == 10
191
+ c.waitConfig["each"]
192
+ .should == 1
193
+ c.waitConfig["increment"]
194
+ .should == 1
195
+ c.waitConfig["deniedSeqReq"]["retryLimit"]
196
+ .should == 30
197
+ c.waitConfig["deniedSeqReq"]["wait"]
198
+ .should == 1200
199
+ c.waitConfig["serverIsBusy"]["retryLimit"]
200
+ .should == 10
201
+ c.waitConfig["serverIsBusy"]["wait"]
202
+ .should == 120
203
+ c.waitConfig["serviceUnavailable"]["retryLimit"]
204
+ .should == 3
205
+ c.waitConfig["serviceUnavailable"]["wait"]
206
+ .should == 120
207
+ c.waitConfig["timedOut"]["retryLimit"]
208
+ .should == 3
209
+ c.waitConfig["timedOut"]["wait"]
210
+ .should == 10
211
+ end
164
212
  end
@@ -0,0 +1,207 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.unshift File.dirname(__FILE__) + "/../lib"
3
+
4
+ require 'nicoscraper'
5
+
6
+ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
7
+ "and return a string except \"continue\" in this block" do
8
+ before(:all) do
9
+ searcher = Nicos::Searcher::ByTag.new()
10
+ @count = 0
11
+
12
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
13
+ @count += 1
14
+ "not continue"
15
+ }
16
+ end
17
+
18
+ it "should end only one access." do
19
+ @count.should == 1
20
+ end
21
+ end
22
+
23
+ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
24
+ "and return a string except \"continue\" in this block" do
25
+ before(:all) do
26
+ searcher = Nicos::Searcher::ByTag.new()
27
+ @count = 0
28
+
29
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
30
+ @count += 1
31
+ nil
32
+ }
33
+ end
34
+
35
+ it "should end only one access." do
36
+ @count.should == 1
37
+ end
38
+ end
39
+
40
+ describe "When execute 'Nicos::Searcher::ByTag.execute' method " +
41
+ "passing following argument" do
42
+ before(:all) do
43
+ searcher = Nicos::Searcher::ByTag.new()
44
+ count = 0
45
+
46
+ searcher.execute("ゆっくり実況プレイpart1リンク", "post_old") { |result|
47
+ @result = result
48
+
49
+ count += 1
50
+ puts count
51
+ "continue" unless count >= 3
52
+ }
53
+ puts "end"
54
+ end
55
+
56
+ it "should have Array of movie objects." do
57
+ @result .should be_kind_of(Array)
58
+ @result[0].should be_instance_of(Nicos::Movie)
59
+ end
60
+
61
+ it "should contains movie objects that have following structure." do
62
+ @result[0].available .should be_true
63
+
64
+ @result[0].video_id .should_not be_nil
65
+ @result[0].title .should_not be_nil
66
+ @result[0].create_time .should_not be_nil
67
+ @result[0].update_time .should_not be_nil
68
+ #@result[0].memo .should_not be_nil
69
+ @result[0].description .should_not be_nil
70
+ @result[0].thumbnail_url.should_not be_nil
71
+ @result[0].create_time .should_not be_nil
72
+ @result[0].update_time .should_not be_nil
73
+ @result[0].length .should_not be_nil
74
+
75
+ @result[0].view_counter .should_not be_nil
76
+ @result[0].comment_num .should_not be_nil
77
+ @result[0].mylist_counter.should_not be_nil
78
+ end
79
+ end
80
+
81
+ describe "When execute 'Nicos::Connector::setWait" do
82
+ before(:all) do
83
+ wait = {
84
+ 'seqAccLimit' => 100,
85
+
86
+ 'deniedSeqReq'=> {
87
+ 'retryLimit' => 30,
88
+ 'wait' => 1200
89
+ },
90
+
91
+ 'serverIsBusy'=> {
92
+ 'retryLimit' => 10
93
+ }
94
+ }
95
+
96
+ Nicos::Connector::Config::setWait(wait)
97
+ end
98
+
99
+ it "should have following values." do
100
+ c = Nicos::Searcher::ByTag.new()
101
+ c.waitConfig .should_not be_nil
102
+ c.waitConfig["seqAccLimit"]
103
+ .should == 100
104
+ c.waitConfig["afterSeq"]
105
+ .should == 10
106
+ c.waitConfig["each"]
107
+ .should == 1
108
+ c.waitConfig["increment"]
109
+ .should == 1
110
+ c.waitConfig["deniedSeqReq"]["retryLimit"]
111
+ .should == 30
112
+ c.waitConfig["deniedSeqReq"]["wait"]
113
+ .should == 1200
114
+ c.waitConfig["serverIsBusy"]["retryLimit"]
115
+ .should == 10
116
+ c.waitConfig["serverIsBusy"]["wait"]
117
+ .should == 120
118
+ c.waitConfig["serviceUnavailable"]["retryLimit"]
119
+ .should == 3
120
+ c.waitConfig["serviceUnavailable"]["wait"]
121
+ .should == 120
122
+ c.waitConfig["timedOut"]["retryLimit"]
123
+ .should == 3
124
+ c.waitConfig["timedOut"]["wait"]
125
+ .should == 10
126
+ end
127
+
128
+ after(:all) do
129
+ Nicos::Connector::Config::setWait("default")
130
+ end
131
+ end
132
+
133
+ describe "When execute 'Nicos::Connector::setWait" do
134
+ before(:all) do
135
+ wait = {
136
+ 'seqAccLimit' => 100,
137
+
138
+ 'deniedSeqReq'=> {
139
+ 'retryLimit' => 30,
140
+ 'wait' => 1200
141
+ },
142
+
143
+ 'serverIsBusy'=> {
144
+ 'retryLimit' => 10
145
+ }
146
+ }
147
+
148
+ @c1 = Nicos::Searcher::ByTag.new()
149
+ @c1.setWait(wait)
150
+
151
+ @c2 = Nicos::Searcher::ByTag.new()
152
+ end
153
+
154
+ it "should have following values." do
155
+ @c1.waitConfig .should_not be_nil
156
+ @c1.waitConfig["seqAccLimit"]
157
+ .should == 100
158
+ @c1.waitConfig["afterSeq"]
159
+ .should == 10
160
+ @c1.waitConfig["each"]
161
+ .should == 1
162
+ @c1.waitConfig["increment"]
163
+ .should == 1
164
+ @c1.waitConfig["deniedSeqReq"]["retryLimit"]
165
+ .should == 30
166
+ @c1.waitConfig["deniedSeqReq"]["wait"]
167
+ .should == 1200
168
+ @c1.waitConfig["serverIsBusy"]["retryLimit"]
169
+ .should == 10
170
+ @c1.waitConfig["serverIsBusy"]["wait"]
171
+ .should == 120
172
+ @c1.waitConfig["serviceUnavailable"]["retryLimit"]
173
+ .should == 3
174
+ @c1.waitConfig["serviceUnavailable"]["wait"]
175
+ .should == 120
176
+ @c1.waitConfig["timedOut"]["retryLimit"]
177
+ .should == 3
178
+ @c1.waitConfig["timedOut"]["wait"]
179
+ .should == 10
180
+
181
+ @c2.waitConfig .should_not be_nil
182
+ @c2.waitConfig["seqAccLimit"]
183
+ .should == 10
184
+ @c2.waitConfig["afterSeq"]
185
+ .should == 10
186
+ @c2.waitConfig["each"]
187
+ .should == 1
188
+ @c2.waitConfig["increment"]
189
+ .should == 1
190
+ @c2.waitConfig["deniedSeqReq"]["retryLimit"]
191
+ .should == 3
192
+ @c2.waitConfig["deniedSeqReq"]["wait"]
193
+ .should == 120
194
+ @c2.waitConfig["serverIsBusy"]["retryLimit"]
195
+ .should == 3
196
+ @c2.waitConfig["serverIsBusy"]["wait"]
197
+ .should == 120
198
+ @c2.waitConfig["serviceUnavailable"]["retryLimit"]
199
+ .should == 3
200
+ @c2.waitConfig["serviceUnavailable"]["wait"]
201
+ .should == 120
202
+ @c2.waitConfig["timedOut"]["retryLimit"]
203
+ .should == 3
204
+ @c2.waitConfig["timedOut"]["wait"]
205
+ .should == 10
206
+ end
207
+ end