nicoscraper 0.2.12 → 0.2.13
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +0 -2
- data/Gemfile.lock +0 -13
- data/README.md +37 -27
- data/VERSION +1 -1
- data/lib/classes/connector.rb +43 -161
- data/lib/classes/movie.rb +46 -45
- data/lib/classes/mylist.rb +111 -141
- data/lib/classes/parser.rb +343 -264
- data/lib/classes/searcher.rb +4 -4
- data/lib/classes/tools.rb +11 -0
- data/lib/config/mylist.rb +5 -0
- data/lib/nicoscraper.rb +1 -1
- data/nicoscraper.gemspec +4 -8
- data/test/mylist_getmoreinfo_spec.rb +122 -0
- data/test/mylist_spec.rb +7 -3
- metadata +17 -37
data/lib/classes/mylist.rb
CHANGED
@@ -17,184 +17,154 @@ module Nicos
|
|
17
17
|
@available = false
|
18
18
|
end
|
19
19
|
|
20
|
+
private
|
21
|
+
|
22
|
+
def ngram(data, n)
|
23
|
+
ret = []
|
24
|
+
data.split(//u).each_cons(n) do |a|
|
25
|
+
ret << a.join
|
26
|
+
end
|
27
|
+
ret
|
28
|
+
end
|
29
|
+
|
30
|
+
def sim(a, b, n)
|
31
|
+
agram = ngram(a, n)
|
32
|
+
bgram = ngram(b, n)
|
33
|
+
|
34
|
+
all = (agram | bgram).size.to_f
|
35
|
+
same = (agram & bgram).size.to_f
|
36
|
+
|
37
|
+
(same / all).nan? ? 0 : (same / all)
|
38
|
+
end
|
39
|
+
|
40
|
+
def connect(connector, type)
|
41
|
+
host = 'www.nicovideo.jp'
|
42
|
+
entity = "/mylist/#{@mylist_id.to_s}"
|
43
|
+
param = (type == :atom ? '?rss=atom&numbers=1' : '')
|
44
|
+
|
45
|
+
result = connector.get(host, entity, param)
|
46
|
+
status = connector.getStatus
|
47
|
+
|
48
|
+
{ :result => result, :status => status }
|
49
|
+
end
|
50
|
+
|
51
|
+
def parse(result, &block)
|
52
|
+
if result[:order] == :afterTheSuccess
|
53
|
+
parsed = block.call(result)
|
54
|
+
|
55
|
+
parsed[:entry].each do |e|
|
56
|
+
movie = Nicos::Movie.new(e[:video_id])
|
57
|
+
e[:available] = true
|
58
|
+
movie.set(e)
|
59
|
+
@movies.push(movie)
|
60
|
+
end if parsed[:entry] != nil
|
61
|
+
|
62
|
+
set(parsed[:mylist])
|
63
|
+
@available = true
|
64
|
+
parsed
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
public
|
69
|
+
|
20
70
|
# 自分に含まれている動画のタイトルをすべての組み合わせにおいて比較し、
|
21
71
|
# 類似度の平均を返す。
|
22
72
|
#
|
23
73
|
# @return [Fixnum] 編集距離に基づく類似度。上限は1、下限はなし。
|
24
74
|
def getSimilarity
|
25
75
|
l = @movies.length - 1
|
26
|
-
|
27
|
-
|
28
|
-
dlAry = []
|
76
|
+
sim = 0.0
|
77
|
+
simAry = []
|
29
78
|
count_o = 0
|
30
79
|
count_i = 0
|
31
80
|
|
32
|
-
|
33
|
-
|
34
|
-
while count_i <= l do
|
35
|
-
dl = dlc.distance(
|
36
|
-
@movies[count_i].title,
|
37
|
-
@movies[count_o].title
|
38
|
-
)
|
39
|
-
|
40
|
-
dl = 1.0 - dl.fdiv( @movies[count_i].title.length)
|
41
|
-
dlAry.push(dl)
|
42
|
-
|
43
|
-
count_i += 1
|
44
|
-
end
|
45
|
-
count_o += 1
|
81
|
+
@movies.each do |movie|
|
82
|
+
puts "\s" + movie.title
|
46
83
|
end
|
84
|
+
|
85
|
+
if @movies.length >= 2
|
86
|
+
while count_o <= l do
|
87
|
+
count_i = count_o + 1
|
88
|
+
while count_i <= l do
|
89
|
+
simAry.push(
|
90
|
+
sim(
|
91
|
+
@movies[count_i].title,
|
92
|
+
@movies[count_o].title,
|
93
|
+
3
|
94
|
+
)
|
95
|
+
)
|
96
|
+
count_i += 1
|
97
|
+
end
|
98
|
+
count_o += 1
|
99
|
+
end
|
47
100
|
|
48
|
-
if l != 0 && dlAry.length > 0
|
49
101
|
t = 0
|
50
|
-
|
51
|
-
similarity = t /
|
52
|
-
elsif
|
53
|
-
similarity = 0
|
54
|
-
else
|
102
|
+
simAry.each { |_sim| t += _sim }
|
103
|
+
similarity = t / simAry.length
|
104
|
+
elsif @movies.length == 1
|
55
105
|
similarity = 1
|
106
|
+
else
|
107
|
+
similarity = 0
|
56
108
|
end
|
57
|
-
|
58
|
-
|
109
|
+
p similarity
|
110
|
+
( similarity * 100 ).round / 100.0
|
59
111
|
end
|
60
112
|
|
61
|
-
=begin
|
62
|
-
# 自分に含まれている動画のタイトルをすべての組み合わせにおいて比較し、
|
63
|
-
def getInfoHtml
|
64
|
-
con = Nicos::Connector::Html.new('mech')
|
65
|
-
reqUrl = 'http://www.nicovideo.jp' +
|
66
|
-
'/mylist/' + @mylist_id.to_s
|
67
|
-
mechPage = con.mechGet(reqUrl)
|
68
|
-
result = []
|
69
|
-
|
70
|
-
# Mylist自身の情報を取得
|
71
|
-
jsonStr = mechPage.search(
|
72
|
-
"/html/body/div[2]" +
|
73
|
-
"/div/div[2]/script[7]"
|
74
|
-
).to_html
|
75
|
-
|
76
|
-
reg = /MylistGroup\.preloadSingle.{1,}?Mylist\.preload\(/m
|
77
|
-
mlJson = jsonStr.scan(reg)[0]
|
78
|
-
|
79
|
-
id = mlJson.scan(/\sid:[^\n]{1,}/)[0]
|
80
|
-
.scan(/[0-9]{1,}/)[0]
|
81
|
-
user_id = mlJson.scan(/\suser_id:[^\n]{1,}/)[0]
|
82
|
-
.scan(/[0-9]{1,}/)[0]
|
83
|
-
name = mlJson.scan(/\sname:[^\n]{1,}/)[0]
|
84
|
-
name = name.slice(
|
85
|
-
" name: \"".length,
|
86
|
-
name.length - " name: \"".length - "\",\n".length
|
87
|
-
)
|
88
|
-
desc = mlJson.scan(/\sdescription:.{1,}/)[0]
|
89
|
-
desc = desc.slice(
|
90
|
-
" description: \"".length,
|
91
|
-
desc.length - " description: \"".length - "\",\npublic".length
|
92
|
-
)
|
93
|
-
public = mlJson.scan(/\spublic:[^,]{1,}/)[0]
|
94
|
-
.scan(/[0-9]{1,}/)[0]
|
95
|
-
default_sort = mlJson.scan(/\sdefault_sort:[^\n]{1,}/)[0]
|
96
|
-
.scan(/[0-9]{1,}/)[0]
|
97
|
-
create_time = mlJson.scan(/\screate_time:[^\n]{1,}/)[0]
|
98
|
-
.scan(/[0-9]{1,}/)[0]
|
99
|
-
update_time = mlJson.scan(/\supdate_time:[^\n]{1,}/)[0]
|
100
|
-
.scan(/[0-9]{1,}/)[0]
|
101
|
-
icon_id = mlJson.scan(/\sicon_id:[^\n]{1,}/)[0]
|
102
|
-
.scan(/[0-9]{1,}/)[0]
|
103
|
-
|
104
|
-
# mlJson = mlJson.scan(/[^\r\n ]{1,}/).join('')
|
105
|
-
#mlJson = mlJson.scan(/{.+/)[0].split(',')
|
106
|
-
|
107
|
-
# 説明文が空欄だった時の措置。
|
108
|
-
desc = mlJson[3].scan(/\".+\"/)[0]
|
109
|
-
if desc != nil then desc = desc.scan(/[^\"]{1,}/)[0] end
|
110
|
-
|
111
|
-
paramObj = {
|
112
|
-
:id => id,
|
113
|
-
:user_id => user_id,
|
114
|
-
:name => name,
|
115
|
-
:description => description,
|
116
|
-
:public => public,
|
117
|
-
:default_sort => default_sort,
|
118
|
-
:create_time => create_time,
|
119
|
-
:update_time => update_time,
|
120
|
-
:icon_id => icon_id
|
121
|
-
# "sort_order" => ,
|
122
|
-
}
|
123
|
-
set(paramObj)
|
124
|
-
|
125
|
-
# 自分に含まれる動画の情報を取得
|
126
|
-
jsonStr = mechPage.search(
|
127
|
-
"/html/body/div[2]" +
|
128
|
-
"/div/div[2]/script[7]"
|
129
|
-
).to_html
|
130
|
-
|
131
|
-
mvJson = jsonStr.scan(/Mylist.preload.+/)[0]
|
132
|
-
mvJson = mvJson.scan(/\".{1,}/)[0]
|
133
|
-
mvJson = mvJson.slice(0, mvJson.length - 5)
|
134
|
-
#mvJson = mvJson.split('},{')
|
135
|
-
mvJson = Nicos::Unicode.unescape(mvJson).split('},{')
|
136
|
-
|
137
|
-
mvJson.each { |e|
|
138
|
-
e = "{" + e + "}"
|
139
|
-
param = JSON.parse(e)
|
140
|
-
movie = Nicos::Movie.new(param['item_data']['video_id'])
|
141
|
-
movie.set(param)
|
142
|
-
|
143
|
-
@movies.push(movie)
|
144
|
-
}
|
145
|
-
end
|
146
|
-
=end
|
147
113
|
|
148
114
|
# マイリストのAtomフィードから、マイリストとそれに含まれる動画の情報を取得する。
|
149
115
|
#
|
150
|
-
# @return [Fixnum]
|
116
|
+
# @return [Fixnum] Trigram法による、
|
151
117
|
def getInfo
|
152
118
|
parsed = nil
|
153
119
|
@available = false
|
154
120
|
|
155
|
-
|
156
|
-
|
157
|
-
|
121
|
+
res = connect(
|
122
|
+
Nicos::Connector::MylistAtom.new(),
|
123
|
+
:atom )
|
124
|
+
parse = parse(res[:result]) do |result|
|
125
|
+
Nicos::Parser::Xml::mylistAtom(result[:body])
|
126
|
+
end
|
158
127
|
|
159
|
-
|
160
|
-
|
128
|
+
{
|
129
|
+
:parsed => parse,
|
130
|
+
:status => res[:status][:status],
|
131
|
+
:retry => res[:status][:retry]
|
132
|
+
}
|
133
|
+
end
|
161
134
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
parsed[:entry].each { |e|
|
166
|
-
movie = Nicos::Movie.new(e[:video_id])
|
167
|
-
e[:available] = true
|
168
|
-
movie.set(e)
|
169
|
-
@movies.push(movie)
|
170
|
-
}
|
135
|
+
def getMoreInfo
|
136
|
+
parsed = nil
|
137
|
+
@available = false
|
171
138
|
|
172
|
-
|
173
|
-
|
174
|
-
|
139
|
+
res = connect(
|
140
|
+
Nicos::Connector::MylistHtml.new(),
|
141
|
+
:html )
|
142
|
+
parse = parse(res[:result]) do |result|
|
143
|
+
Nicos::Parser::Html::mylist(result[:body])
|
144
|
+
end
|
175
145
|
|
176
146
|
{
|
177
|
-
:parsed =>
|
178
|
-
:status => status[:status],
|
179
|
-
:retry => status[:retry]
|
147
|
+
:parsed => parse,
|
148
|
+
:status => res[:status][:status],
|
149
|
+
:retry => res[:status][:retry]
|
180
150
|
}
|
181
|
-
end
|
151
|
+
end
|
182
152
|
|
183
153
|
# {Movie#set} を参照。
|
184
154
|
def set(paramObj)
|
185
155
|
paramObj.each_key do |key|
|
186
156
|
param = paramObj[key]
|
187
157
|
case key
|
188
|
-
when "mylist_id", :mylist_id then @mylist_id = param
|
189
|
-
when "user_id", :user_id then @user_id = param
|
158
|
+
when "mylist_id", :mylist_id then @mylist_id = param.to_i
|
159
|
+
when "user_id", :user_id then @user_id = param.to_i
|
190
160
|
when "title", :title then @title = param
|
191
161
|
when "description",:description then @description = param
|
192
|
-
when "public", :public then @public = param
|
193
|
-
when "default_sort",:default_sort then @default_sort = param
|
194
|
-
when "create_time",:create_time then @create_time = param
|
195
|
-
when "update_time",:
|
196
|
-
when "icon_id", :icon_id then @icon_id = param
|
197
|
-
when "sort_order", :sort_order then @sort_order = param
|
162
|
+
when "public", :public then @public = param.to_i
|
163
|
+
when "default_sort",:default_sort then @default_sort = param.to_i
|
164
|
+
when "create_time",:create_time then @create_time = param.to_i
|
165
|
+
when "update_time",:update_time then @update_time = param.to_i
|
166
|
+
when "icon_id", :icon_id then @icon_id = param.to_i
|
167
|
+
when "sort_order", :sort_order then @sort_order = param.to_i
|
198
168
|
when "movies", :movies then @movies = param
|
199
169
|
when "updated", :updated then @update_time = param
|
200
170
|
when "author", :author then @author = param
|