nicoscraper 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -1
- data/Gemfile.lock +3 -1
- data/README.md +152 -28
- data/VERSION +1 -1
- data/lib/classes/connector.rb +330 -0
- data/lib/classes/converter.rb +73 -0
- data/lib/classes/header.rb +9 -0
- data/lib/classes/movie.rb +521 -0
- data/lib/classes/mylist.rb +318 -0
- data/lib/classes/parser.rb +235 -0
- data/lib/classes/searcher.rb +248 -0
- data/lib/classes/tools.rb +15 -0
- data/lib/config/wait.rb +63 -0
- data/lib/nicoscraper.rb +31 -0
- data/test/movie_spec.rb +51 -3
- data/test/searcher_spec.rb +207 -0
- metadata +36 -22
- data/lib/connector.rb +0 -364
- data/lib/converter.rb +0 -72
- data/lib/movie.rb +0 -518
- data/lib/mylist.rb +0 -317
- data/lib/namespace.rb +0 -3
- data/lib/parser.rb +0 -234
- data/lib/searcher.rb +0 -243
- data/nicoscraper.gemspec +0 -72
data/lib/searcher.rb
DELETED
@@ -1,243 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-# -*- encoding: utf-8 -*-
|
2
|
-
$:.unshift File.dirname(__FILE__)
|
3
|
-
|
4
|
-
require 'rubygems'
|
5
|
-
require 'ruby-debug'
|
6
|
-
|
7
|
-
require 'time'
|
8
|
-
require 'mechanize'
|
9
|
-
require 'kconv'
|
10
|
-
|
11
|
-
require 'namespace.rb'
|
12
|
-
require 'parser.rb'
|
13
|
-
|
14
|
-
module Nicos::Searcher
|
15
|
-
# :nodocs:
|
16
|
-
class ByTagSuper
|
17
|
-
private
|
18
|
-
|
19
|
-
def get(tag, sort, page, method, waitObj)
|
20
|
-
paramAry = []
|
21
|
-
|
22
|
-
case sort
|
23
|
-
when 'comment_new'
|
24
|
-
sortStr = ''
|
25
|
-
when 'comment_old'
|
26
|
-
sortStr = 'order=a'
|
27
|
-
when 'view_many'
|
28
|
-
sortStr = 'sort=v'
|
29
|
-
when 'view_few'
|
30
|
-
sortStr = 'sort=v&order=a'
|
31
|
-
when 'comment_many'
|
32
|
-
sortStr = 'sort=r'
|
33
|
-
when 'comment_few'
|
34
|
-
sortStr = 'sort=r&order=a'
|
35
|
-
when 'mylist_many'
|
36
|
-
sortStr = 'sort=m'
|
37
|
-
when 'mylist_few'
|
38
|
-
sortStr = 'sort=m&order=a'
|
39
|
-
when 'post_new'
|
40
|
-
sortStr = 'sort=f'
|
41
|
-
when 'post_old'
|
42
|
-
sortStr = 'sort=f&order=a'
|
43
|
-
when 'length_long'
|
44
|
-
sortStr = 'sort=l'
|
45
|
-
when 'length_short'
|
46
|
-
sortStr = 'sort=l&order=a'
|
47
|
-
end
|
48
|
-
|
49
|
-
paramAry.push("page=#{page}") if page != 1
|
50
|
-
paramAry.push(sortStr)
|
51
|
-
if method == "atom" then paramAry.push("rss=atom&numbers=1") end
|
52
|
-
param = tag + "?" + paramAry.join('&')
|
53
|
-
|
54
|
-
host = 'www.nicovideo.jp'
|
55
|
-
entity = '/tag/' + param
|
56
|
-
|
57
|
-
@connector.setWait(waitObj)
|
58
|
-
@connector.get(host, entity)
|
59
|
-
end
|
60
|
-
|
61
|
-
def loop(tag, sort, method, waitObj, &block)
|
62
|
-
termFlag = false
|
63
|
-
page = 1
|
64
|
-
movieObjAry = []
|
65
|
-
order = "continue"
|
66
|
-
|
67
|
-
begin
|
68
|
-
response = get(
|
69
|
-
tag,
|
70
|
-
sort,
|
71
|
-
page,
|
72
|
-
method,
|
73
|
-
waitObj
|
74
|
-
)
|
75
|
-
|
76
|
-
if response["order"] == "success"
|
77
|
-
result = parse(response["body"])
|
78
|
-
result.each { |each|
|
79
|
-
movie = Nicos::Movie.new(each["video_id"])
|
80
|
-
each["available"] = true
|
81
|
-
movie.set(each)
|
82
|
-
movieObjAry.push(movie)
|
83
|
-
}
|
84
|
-
end
|
85
|
-
|
86
|
-
order = block.call(movieObjAry, page)
|
87
|
-
page += 1
|
88
|
-
end until order != "continue"
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
class ByTagHtml < ByTagSuper
|
93
|
-
def initialize
|
94
|
-
@numOfSearched = 32
|
95
|
-
@incrAmt = 0.2
|
96
|
-
|
97
|
-
@connector = Nicos::Connector.new('mech')
|
98
|
-
|
99
|
-
# HTML中の各パラメータの所在を示すXPath
|
100
|
-
@videoIdXP = "//div[@class='uad_thumbfrm']/table/tr/td/p/a"
|
101
|
-
@lengthXP = "//div[@class='uad_thumbfrm']/table/tr/td/p[2]/span"
|
102
|
-
@viewXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[1]/strong"
|
103
|
-
@resXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[2]/strong"
|
104
|
-
@mylistXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[3]/a/strong"
|
105
|
-
@adXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[4]/a/strong"
|
106
|
-
end
|
107
|
-
|
108
|
-
private
|
109
|
-
|
110
|
-
def parse(movieNum)
|
111
|
-
result = []
|
112
|
-
|
113
|
-
video_id = /(sm|nm)[0-9]{1,}/.match(@connector.mech.page.search(@videoIdXP)[movieNum]['href'])[0]
|
114
|
-
lengthStr = @connector.mech.page.search(@lengthXP)[movieNum].text.split(/\:/)
|
115
|
-
length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
|
116
|
-
view = @connector.mech.page.search(@viewXP)[movieNum]
|
117
|
-
.text.gsub(/\,/, '').to_i
|
118
|
-
res = @connector.mech.page.search(@resXP)[movieNum]
|
119
|
-
.text.gsub(/\,/, '').to_i
|
120
|
-
mylist = @connector.mech.page.search(@mylistXP)[movieNum]
|
121
|
-
.text.gsub(/\,/, '').to_i
|
122
|
-
ad = @connector.mech.page.search(@adXP)[movieNum]
|
123
|
-
.text.gsub(/\,/, '').to_i
|
124
|
-
|
125
|
-
result.push({
|
126
|
-
"video_id" => video_id,
|
127
|
-
"length" => length,
|
128
|
-
"view" => view,
|
129
|
-
"res" => res,
|
130
|
-
"mylist" => mylist,
|
131
|
-
"ad" => ad
|
132
|
-
})
|
133
|
-
end
|
134
|
-
|
135
|
-
public
|
136
|
-
|
137
|
-
# @param [String] tag
|
138
|
-
# @param [String] sortMethod
|
139
|
-
# @param [HashObj] waitConfig
|
140
|
-
def execute(tag, sortMethod, waitConfig, &block)
|
141
|
-
loop(tag, sort, "mech", waitObj) { |result, page|
|
142
|
-
block.call(result, page)
|
143
|
-
}
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
class ByTag < ByTagSuper
|
148
|
-
def initialize
|
149
|
-
@numOfSearched = 32
|
150
|
-
@incrAmt = 0.2
|
151
|
-
@connector = Nicos::Connector::TagAtom.new()
|
152
|
-
end
|
153
|
-
|
154
|
-
private
|
155
|
-
|
156
|
-
def parse(xml)
|
157
|
-
Nicos::Parser.tagAtom(xml)
|
158
|
-
end
|
159
|
-
|
160
|
-
public
|
161
|
-
|
162
|
-
# 実行
|
163
|
-
#
|
164
|
-
# @param [String] tag 検索したいタグ文字列
|
165
|
-
# @param [String] sortMethod ソート方法
|
166
|
-
#==sortMethod: ソート方法
|
167
|
-
# *comment_new*
|
168
|
-
# コメントが新しい順
|
169
|
-
#
|
170
|
-
# *comment_old*
|
171
|
-
# コメントが新しい順
|
172
|
-
#
|
173
|
-
# *view_many*
|
174
|
-
# 再生数が多い順
|
175
|
-
#
|
176
|
-
# *view_few*
|
177
|
-
# 再生数が少ない順
|
178
|
-
#
|
179
|
-
# *comment_many*
|
180
|
-
# コメントが多い順
|
181
|
-
#
|
182
|
-
# *comment_few*
|
183
|
-
# コメントが少ない順
|
184
|
-
#
|
185
|
-
# *mylist_many*
|
186
|
-
# マイリスト登録が多い順
|
187
|
-
#
|
188
|
-
# *mylist_few*
|
189
|
-
# マイリスト登録が少ない順
|
190
|
-
#
|
191
|
-
# *post_new*
|
192
|
-
# 登録が新しい順
|
193
|
-
#
|
194
|
-
# *post_old*
|
195
|
-
# 登録が少ない順
|
196
|
-
#
|
197
|
-
# *length_long*
|
198
|
-
# 再生時間が長い順
|
199
|
-
#
|
200
|
-
# *length_short*
|
201
|
-
# 再生時間が短い順
|
202
|
-
#
|
203
|
-
# @param [HashObj] waitConfig ウェイト設定
|
204
|
-
#==waitConfig: ウェイト設定
|
205
|
-
# <b>ウェイトの変更に際しては、READMEの注意点と免責事項を事前にお読み下さい。</b>
|
206
|
-
#
|
207
|
-
# 以下のフォーマットのハッシュオブジェクトを与えて下さい。これはデフォルト設定です。
|
208
|
-
# また、ハッシュは以下のキーを全て用意する必要はありません。
|
209
|
-
# 変更したい部分のキーと値のみを持つハッシュオブジェクトを作って下さい。
|
210
|
-
#
|
211
|
-
# @waitConfig = {
|
212
|
-
# 'seqAccLimit' => 10, # 連続してリクエストする回数
|
213
|
-
# 'afterSeq' => 10, # 連続リクエスト後のウェイト(以下、単位は全て秒)
|
214
|
-
# 'each' => 1, # 連続リクエスト時の、1リクエスト毎のウェイト
|
215
|
-
# 'increment' => 1, # アクセス拒絶時の、次回以降の1リクエスト毎のウェイトの増加量
|
216
|
-
#
|
217
|
-
# 'deniedSeqReq'=> { # 連続アクセスを拒否された際の設定(以下同じ)
|
218
|
-
# 'retryLimit' => 3, # 再試行の上限回数
|
219
|
-
# 'wait' => 120 # 次のアクセスまでのウェイト
|
220
|
-
# },
|
221
|
-
#
|
222
|
-
# 'serverIsBusy'=> { # サーバ混雑時
|
223
|
-
# 'retryLimit' => 3,
|
224
|
-
# 'wait' => 120
|
225
|
-
# },
|
226
|
-
#
|
227
|
-
# 'serviceUnavailable' => { # 503が返ってきた時
|
228
|
-
# 'retryLimit' => 3,
|
229
|
-
# 'wait' => 120
|
230
|
-
# },
|
231
|
-
#
|
232
|
-
# 'timedOut' => { # タイムアウト時
|
233
|
-
# 'retryLimit' => 3,
|
234
|
-
# 'wait' => 10
|
235
|
-
# }
|
236
|
-
# }
|
237
|
-
def execute(tag, sortMethod, waitConfig, &block)
|
238
|
-
loop(tag, sortMethod, "atom", waitConfig) { |result, page|
|
239
|
-
block.call(result, page)
|
240
|
-
}
|
241
|
-
end
|
242
|
-
end
|
243
|
-
end
|
data/nicoscraper.gemspec
DELETED
@@ -1,72 +0,0 @@
|
|
1
|
-
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
-
# -*- encoding: utf-8 -*-
|
5
|
-
|
6
|
-
Gem::Specification.new do |s|
|
7
|
-
s.name = %q{nicoscraper}
|
8
|
-
s.version = "0.2.4"
|
9
|
-
|
10
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = [%q{Masami Yonehara}]
|
12
|
-
s.date = %q{2011-09-23}
|
13
|
-
s.description = %q{It scrape movies and mylists of Niconico douga.
|
14
|
-
}
|
15
|
-
s.email = %q{zeitdiebe@gmail.com}
|
16
|
-
s.extra_rdoc_files = [
|
17
|
-
"LICENSE.txt",
|
18
|
-
"README.md"
|
19
|
-
]
|
20
|
-
s.files = [
|
21
|
-
".document",
|
22
|
-
"Gemfile",
|
23
|
-
"Gemfile.lock",
|
24
|
-
"LICENSE.txt",
|
25
|
-
"README.md",
|
26
|
-
"Rakefile",
|
27
|
-
"VERSION",
|
28
|
-
"index.html",
|
29
|
-
"lib/connector.rb",
|
30
|
-
"lib/converter.rb",
|
31
|
-
"lib/movie.rb",
|
32
|
-
"lib/mylist.rb",
|
33
|
-
"lib/namespace.rb",
|
34
|
-
"lib/parser.rb",
|
35
|
-
"lib/searcher.rb",
|
36
|
-
"nicoscraper.gemspec",
|
37
|
-
"test/movie_spec.rb"
|
38
|
-
]
|
39
|
-
s.homepage = %q{http://github.com/hdemon/nicoscraper}
|
40
|
-
s.licenses = [%q{MIT}]
|
41
|
-
s.require_paths = [%q{lib}]
|
42
|
-
s.rubygems_version = %q{1.8.8}
|
43
|
-
s.summary = %q{The scraper for Niconico douga.}
|
44
|
-
|
45
|
-
if s.respond_to? :specification_version then
|
46
|
-
s.specification_version = 3
|
47
|
-
|
48
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
-
s.add_runtime_dependency(%q<damerau-levenshtein>, [">= 0"])
|
50
|
-
s.add_development_dependency(%q<rake>, ["= 0.8.7"])
|
51
|
-
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
52
|
-
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
53
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
54
|
-
s.add_development_dependency(%q<rcov>, [">= 0"])
|
55
|
-
else
|
56
|
-
s.add_dependency(%q<damerau-levenshtein>, [">= 0"])
|
57
|
-
s.add_dependency(%q<rake>, ["= 0.8.7"])
|
58
|
-
s.add_dependency(%q<shoulda>, [">= 0"])
|
59
|
-
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
60
|
-
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
61
|
-
s.add_dependency(%q<rcov>, [">= 0"])
|
62
|
-
end
|
63
|
-
else
|
64
|
-
s.add_dependency(%q<damerau-levenshtein>, [">= 0"])
|
65
|
-
s.add_dependency(%q<rake>, ["= 0.8.7"])
|
66
|
-
s.add_dependency(%q<shoulda>, [">= 0"])
|
67
|
-
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
68
|
-
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
69
|
-
s.add_dependency(%q<rcov>, [">= 0"])
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|