nicoscraper 0.2.12 → 0.2.13

Sign up to get free protection for your applications and to get access to all the features.
@@ -17,184 +17,154 @@ module Nicos
17
17
  @available = false
18
18
  end
19
19
 
20
+ private
21
+
22
+ def ngram(data, n)
23
+ ret = []
24
+ data.split(//u).each_cons(n) do |a|
25
+ ret << a.join
26
+ end
27
+ ret
28
+ end
29
+
30
+ def sim(a, b, n)
31
+ agram = ngram(a, n)
32
+ bgram = ngram(b, n)
33
+
34
+ all = (agram | bgram).size.to_f
35
+ same = (agram & bgram).size.to_f
36
+
37
+ (same / all).nan? ? 0 : (same / all)
38
+ end
39
+
40
+ def connect(connector, type)
41
+ host = 'www.nicovideo.jp'
42
+ entity = "/mylist/#{@mylist_id.to_s}"
43
+ param = (type == :atom ? '?rss=atom&numbers=1' : '')
44
+
45
+ result = connector.get(host, entity, param)
46
+ status = connector.getStatus
47
+
48
+ { :result => result, :status => status }
49
+ end
50
+
51
+ def parse(result, &block)
52
+ if result[:order] == :afterTheSuccess
53
+ parsed = block.call(result)
54
+
55
+ parsed[:entry].each do |e|
56
+ movie = Nicos::Movie.new(e[:video_id])
57
+ e[:available] = true
58
+ movie.set(e)
59
+ @movies.push(movie)
60
+ end if parsed[:entry] != nil
61
+
62
+ set(parsed[:mylist])
63
+ @available = true
64
+ parsed
65
+ end
66
+ end
67
+
68
+ public
69
+
20
70
  # 自分に含まれている動画のタイトルをすべての組み合わせにおいて比較し、
21
71
  # 類似度の平均を返す。
22
72
  #
23
73
  # @return [Fixnum] 編集距離に基づく類似度。上限は1、下限はなし。
24
74
  def getSimilarity
25
75
  l = @movies.length - 1
26
- dlc = DamerauLevenshtein
27
- dl = 0.0
28
- dlAry = []
76
+ sim = 0.0
77
+ simAry = []
29
78
  count_o = 0
30
79
  count_i = 0
31
80
 
32
- while count_o <= l do
33
- count_i = count_o + 1
34
- while count_i <= l do
35
- dl = dlc.distance(
36
- @movies[count_i].title,
37
- @movies[count_o].title
38
- )
39
-
40
- dl = 1.0 - dl.fdiv( @movies[count_i].title.length)
41
- dlAry.push(dl)
42
-
43
- count_i += 1
44
- end
45
- count_o += 1
81
+ @movies.each do |movie|
82
+ puts "\s" + movie.title
46
83
  end
84
+
85
+ if @movies.length >= 2
86
+ while count_o <= l do
87
+ count_i = count_o + 1
88
+ while count_i <= l do
89
+ simAry.push(
90
+ sim(
91
+ @movies[count_i].title,
92
+ @movies[count_o].title,
93
+ 3
94
+ )
95
+ )
96
+ count_i += 1
97
+ end
98
+ count_o += 1
99
+ end
47
100
 
48
- if l != 0 && dlAry.length > 0
49
101
  t = 0
50
- dlAry.each { |_dl| t += _dl }
51
- similarity = t / dlAry.length
52
- elsif dlAry.length == 0
53
- similarity = 0
54
- else
102
+ simAry.each { |_sim| t += _sim }
103
+ similarity = t / simAry.length
104
+ elsif @movies.length == 1
55
105
  similarity = 1
106
+ else
107
+ similarity = 0
56
108
  end
57
-
58
- return similarity
109
+ p similarity
110
+ ( similarity * 100 ).round / 100.0
59
111
  end
60
112
 
61
- =begin
62
- # 自分に含まれている動画のタイトルをすべての組み合わせにおいて比較し、
63
- def getInfoHtml
64
- con = Nicos::Connector::Html.new('mech')
65
- reqUrl = 'http://www.nicovideo.jp' +
66
- '/mylist/' + @mylist_id.to_s
67
- mechPage = con.mechGet(reqUrl)
68
- result = []
69
-
70
- # Mylist自身の情報を取得
71
- jsonStr = mechPage.search(
72
- "/html/body/div[2]" +
73
- "/div/div[2]/script[7]"
74
- ).to_html
75
-
76
- reg = /MylistGroup\.preloadSingle.{1,}?Mylist\.preload\(/m
77
- mlJson = jsonStr.scan(reg)[0]
78
-
79
- id = mlJson.scan(/\sid:[^\n]{1,}/)[0]
80
- .scan(/[0-9]{1,}/)[0]
81
- user_id = mlJson.scan(/\suser_id:[^\n]{1,}/)[0]
82
- .scan(/[0-9]{1,}/)[0]
83
- name = mlJson.scan(/\sname:[^\n]{1,}/)[0]
84
- name = name.slice(
85
- " name: \"".length,
86
- name.length - " name: \"".length - "\",\n".length
87
- )
88
- desc = mlJson.scan(/\sdescription:.{1,}/)[0]
89
- desc = desc.slice(
90
- " description: \"".length,
91
- desc.length - " description: \"".length - "\",\npublic".length
92
- )
93
- public = mlJson.scan(/\spublic:[^,]{1,}/)[0]
94
- .scan(/[0-9]{1,}/)[0]
95
- default_sort = mlJson.scan(/\sdefault_sort:[^\n]{1,}/)[0]
96
- .scan(/[0-9]{1,}/)[0]
97
- create_time = mlJson.scan(/\screate_time:[^\n]{1,}/)[0]
98
- .scan(/[0-9]{1,}/)[0]
99
- update_time = mlJson.scan(/\supdate_time:[^\n]{1,}/)[0]
100
- .scan(/[0-9]{1,}/)[0]
101
- icon_id = mlJson.scan(/\sicon_id:[^\n]{1,}/)[0]
102
- .scan(/[0-9]{1,}/)[0]
103
-
104
- # mlJson = mlJson.scan(/[^\r\n ]{1,}/).join('')
105
- #mlJson = mlJson.scan(/{.+/)[0].split(',')
106
-
107
- # 説明文が空欄だった時の措置。
108
- desc = mlJson[3].scan(/\".+\"/)[0]
109
- if desc != nil then desc = desc.scan(/[^\"]{1,}/)[0] end
110
-
111
- paramObj = {
112
- :id => id,
113
- :user_id => user_id,
114
- :name => name,
115
- :description => description,
116
- :public => public,
117
- :default_sort => default_sort,
118
- :create_time => create_time,
119
- :update_time => update_time,
120
- :icon_id => icon_id
121
- # "sort_order" => ,
122
- }
123
- set(paramObj)
124
-
125
- # 自分に含まれる動画の情報を取得
126
- jsonStr = mechPage.search(
127
- "/html/body/div[2]" +
128
- "/div/div[2]/script[7]"
129
- ).to_html
130
-
131
- mvJson = jsonStr.scan(/Mylist.preload.+/)[0]
132
- mvJson = mvJson.scan(/\".{1,}/)[0]
133
- mvJson = mvJson.slice(0, mvJson.length - 5)
134
- #mvJson = mvJson.split('},{')
135
- mvJson = Nicos::Unicode.unescape(mvJson).split('},{')
136
-
137
- mvJson.each { |e|
138
- e = "{" + e + "}"
139
- param = JSON.parse(e)
140
- movie = Nicos::Movie.new(param['item_data']['video_id'])
141
- movie.set(param)
142
-
143
- @movies.push(movie)
144
- }
145
- end
146
- =end
147
113
 
148
114
  # マイリストのAtomフィードから、マイリストとそれに含まれる動画の情報を取得する。
149
115
  #
150
- # @return [Fixnum] 編集距離に基づく類似度。上限は1、下限はなし。
116
+ # @return [Fixnum] Trigram法による、
151
117
  def getInfo
152
118
  parsed = nil
153
119
  @available = false
154
120
 
155
- con = Nicos::Connector::MylistAtom.new()
156
- host = 'www.nicovideo.jp'
157
- entity = '/mylist/' + @mylist_id.to_s + '?rss=atom&numbers=1'
121
+ res = connect(
122
+ Nicos::Connector::MylistAtom.new(),
123
+ :atom )
124
+ parse = parse(res[:result]) do |result|
125
+ Nicos::Parser::Xml::mylistAtom(result[:body])
126
+ end
158
127
 
159
- result = con.get(host, entity)
160
- status = con.getStatus
128
+ {
129
+ :parsed => parse,
130
+ :status => res[:status][:status],
131
+ :retry => res[:status][:retry]
132
+ }
133
+ end
161
134
 
162
- if result[:order] == :afterTheSuccess
163
- parsed = Nicos::Parser::mylistAtom(result[:body])
164
-
165
- parsed[:entry].each { |e|
166
- movie = Nicos::Movie.new(e[:video_id])
167
- e[:available] = true
168
- movie.set(e)
169
- @movies.push(movie)
170
- }
135
+ def getMoreInfo
136
+ parsed = nil
137
+ @available = false
171
138
 
172
- set(parsed[:mylist])
173
- @available = true
174
- end
139
+ res = connect(
140
+ Nicos::Connector::MylistHtml.new(),
141
+ :html )
142
+ parse = parse(res[:result]) do |result|
143
+ Nicos::Parser::Html::mylist(result[:body])
144
+ end
175
145
 
176
146
  {
177
- :parsed => parsed,
178
- :status => status[:status],
179
- :retry => status[:retry]
147
+ :parsed => parse,
148
+ :status => res[:status][:status],
149
+ :retry => res[:status][:retry]
180
150
  }
181
- end
151
+ end
182
152
 
183
153
  # {Movie#set} を参照。
184
154
  def set(paramObj)
185
155
  paramObj.each_key do |key|
186
156
  param = paramObj[key]
187
157
  case key
188
- when "mylist_id", :mylist_id then @mylist_id = param
189
- when "user_id", :user_id then @user_id = param
158
+ when "mylist_id", :mylist_id then @mylist_id = param.to_i
159
+ when "user_id", :user_id then @user_id = param.to_i
190
160
  when "title", :title then @title = param
191
161
  when "description",:description then @description = param
192
- when "public", :public then @public = param
193
- when "default_sort",:default_sort then @default_sort = param
194
- when "create_time",:create_time then @create_time = param
195
- when "update_time",:updated_time then @update_time = param
196
- when "icon_id", :icon_id then @icon_id = param
197
- when "sort_order", :sort_order then @sort_order = param
162
+ when "public", :public then @public = param.to_i
163
+ when "default_sort",:default_sort then @default_sort = param.to_i
164
+ when "create_time",:create_time then @create_time = param.to_i
165
+ when "update_time",:update_time then @update_time = param.to_i
166
+ when "icon_id", :icon_id then @icon_id = param.to_i
167
+ when "sort_order", :sort_order then @sort_order = param.to_i
198
168
  when "movies", :movies then @movies = param
199
169
  when "updated", :updated then @update_time = param
200
170
  when "author", :author then @author = param