nicoscraper 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/mylist.rb DELETED
@@ -1,317 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
- $:.unshift File.dirname(__FILE__)
3
-
4
- require 'rubygems'
5
- require 'ruby-debug'
6
- require 'kconv'
7
-
8
- require 'namespace.rb'
9
- require 'parser.rb'
10
- require 'movie.rb'
11
- require 'connector.rb'
12
-
13
-
14
- class Nicos::Mylist
15
- def initialize (mylist_id)
16
- @mylist_id = mylist_id
17
- @movies = []
18
- @available = false
19
- end
20
-
21
- # 自分に含まれている動画のタイトルをすべての組み合わせにおいて比較し、
22
- # 類似度の平均を返す。
23
- #
24
- # @return [Fixnum] 編集距離に基づく類似度。上限は1、下限はなし。
25
- def getSimilarity
26
- l = @movies.length - 1
27
- dlc = DamerauLevenshtein
28
- dl = 0.0
29
- dlAry = []
30
- count_o = 0
31
- count_i = 0
32
-
33
- while count_o <= l do
34
- count_i = count_o + 1
35
- while count_i <= l do
36
- dl = dlc.distance(
37
- @movies[count_i].title,
38
- @movies[count_o].title
39
- )
40
-
41
- dl = 1.0 - dl.fdiv( @movies[count_i].title.length)
42
- dlAry.push(dl)
43
-
44
- count_i += 1
45
- end
46
- count_o += 1
47
- end
48
-
49
- if l != 0 && dlAry.length > 0
50
- t = 0
51
- dlAry.each { |_dl| t += _dl }
52
- similarity = t / dlAry.length
53
- elsif dlAry.length == 0
54
- similarity = 0
55
- else
56
- similarity = 1
57
- end
58
-
59
- return similarity
60
- end
61
-
62
- # 自分に含まれている動画のタイトルをすべての組み合わせにおいて比較し、
63
- def getInfoHtml
64
- con = Nicos::Connector::Html.new('mech')
65
- reqUrl = 'http://www.nicovideo.jp' +
66
- '/mylist/' + @mylist_id.to_s
67
- con.setWait(nil)
68
- mechPage = con.mechGet(reqUrl)
69
- result = []
70
-
71
- # Mylist自身の情報を取得
72
- jsonStr = mechPage.search(
73
- "/html/body/div[2]" +
74
- "/div/div[2]/script[7]"
75
- ).to_html
76
-
77
- reg = /MylistGroup\.preloadSingle.{1,}?Mylist\.preload\(/m
78
- mlJson = jsonStr.scan(reg)[0]
79
-
80
- id = mlJson.scan(/\sid:[^\n]{1,}/)[0]
81
- .scan(/[0-9]{1,}/)[0]
82
- user_id = mlJson.scan(/\suser_id:[^\n]{1,}/)[0]
83
- .scan(/[0-9]{1,}/)[0]
84
- name = mlJson.scan(/\sname:[^\n]{1,}/)[0]
85
- name = name.slice(
86
- " name: \"".length,
87
- name.length - " name: \"".length - "\",\n".length
88
- )
89
- desc = mlJson.scan(/\sdescription:.{1,}/)[0]
90
- desc = desc.slice(
91
- " description: \"".length,
92
- desc.length - " description: \"".length - "\",\npublic".length
93
- )
94
- public = mlJson.scan(/\spublic:[^,]{1,}/)[0]
95
- .scan(/[0-9]{1,}/)[0]
96
- default_sort = mlJson.scan(/\sdefault_sort:[^\n]{1,}/)[0]
97
- .scan(/[0-9]{1,}/)[0]
98
- create_time = mlJson.scan(/\screate_time:[^\n]{1,}/)[0]
99
- .scan(/[0-9]{1,}/)[0]
100
- update_time = mlJson.scan(/\supdate_time:[^\n]{1,}/)[0]
101
- .scan(/[0-9]{1,}/)[0]
102
- icon_id = mlJson.scan(/\sicon_id:[^\n]{1,}/)[0]
103
- .scan(/[0-9]{1,}/)[0]
104
-
105
- # mlJson = mlJson.scan(/[^\r\n ]{1,}/).join('')
106
- #mlJson = mlJson.scan(/{.+/)[0].split(',')
107
-
108
- # 説明文が空欄だった時の措置。
109
- desc = mlJson[3].scan(/\".+\"/)[0]
110
- if desc != nil then desc = desc.scan(/[^\"]{1,}/)[0] end
111
-
112
- paramObj = {
113
- "id" => id,
114
- "user_id" => user_id,
115
- "name" => name,
116
- "description" => description,
117
- "public" => public,
118
- "default_sort" => default_sort,
119
- "create_time" => create_time,
120
- "update_time" => update_time,
121
- "icon_id" => icon_id
122
- # "sort_order" => ,
123
- }
124
- set(paramObj)
125
-
126
- # 自分に含まれる動画の情報を取得
127
- jsonStr = mechPage.search(
128
- "/html/body/div[2]" +
129
- "/div/div[2]/script[7]"
130
- ).to_html
131
-
132
- mvJson = jsonStr.scan(/Mylist.preload.+/)[0]
133
- mvJson = mvJson.scan(/\".{1,}/)[0]
134
- mvJson = mvJson.slice(0, mvJson.length - 5)
135
- #mvJson = mvJson.split('},{')
136
- mvJson = Nicos::Unicode.unescape(mvJson).split('},{')
137
-
138
- mvJson.each { |e|
139
- e = "{" + e + "}"
140
- param = JSON.parse(e)
141
- movie = Nicos::Movie.new(param['item_data']['video_id'])
142
- movie.set(param)
143
-
144
- @movies.push(movie)
145
- }
146
- end
147
-
148
- # マイリストのAtomフィードから、マイリストとそれに含まれる動画の情報を取得する。
149
- #
150
- # @return [Fixnum] 編集距離に基づく類似度。上限は1、下限はなし。
151
- def getInfo
152
- con = Nicos::Connector::MylistAtom.new()
153
- host = 'www.nicovideo.jp'
154
- puts @mylist_id
155
- entity = '/mylist/' + @mylist_id.to_s + '?rss=atom&numbers=1'
156
- con.setWait(nil)
157
- result = con.get(host, entity)
158
-
159
- if
160
- result["order"] == "success"
161
- then
162
- parsed = Nicos::Parser::mylistAtom(result["body"])
163
-
164
- parsed["entry"].each { |e|
165
- movie = Nicos::Movie.new(e["video_id"])
166
- e["available"] = true
167
- movie.set(e)
168
- @movies.push(movie)
169
- }
170
-
171
- @available = true
172
- set(parsed["mylist"])
173
- p self
174
- else
175
- @available = false
176
- end
177
- end
178
-
179
- # {Movie#set} を参照。
180
- def set(paramObj)
181
- paramObj.each_key { |key|
182
- param = paramObj[key]
183
- case key
184
- when "mylist_id"
185
- @mylist_id = param
186
- when "id"
187
- @mylist_id = param
188
- when "user_id"
189
- @user_id = param
190
- when "title"
191
- @title = param
192
- when "description"
193
- @description = param
194
- when "public"
195
- @public = param
196
- when "default_sort"
197
- @default_sort = param
198
- when "create_time"
199
- @create_time = param
200
- when "update_time"
201
- @update_time = param
202
- when "icon_id"
203
- @icon_id = param
204
- when "sort_order"
205
- @sort_order = param
206
- when "movies"
207
- @movies = param
208
-
209
- when "updated"
210
- @update_time = param
211
- when "author"
212
- @author = param
213
- end
214
- }
215
- end
216
-
217
- # このインスタンスがgetInfo等によって正常に情報を取得できている場合、trueとなる。
218
- # 各種メソッドの実行には、これがtrueであることが要求される。
219
- #
220
- # @return [Boolean]
221
- attr_accessor :available
222
-
223
- # マイリストID
224
- #
225
- # @return [Fixnum]
226
- # <b>取得可能なメソッド</b>
227
- # {Nicos::Movie#getInfo Mylist::getInfo}
228
- # {Nicos::Movie#getInfo Mylist::getHtmlInfo}
229
- attr_accessor :mylist_id
230
-
231
- # ユーザID
232
- #
233
- # @return [Fixnum]
234
- # <b>取得可能なメソッド</b>
235
- # {Nicos::Movie#getInfo Mylist::getHtmlInfo}
236
- attr_accessor :user_id
237
-
238
- # マイリストのタイトル
239
- #
240
- # @return [Fixnum]
241
- # <b>取得可能なメソッド</b>
242
- # {Nicos::Movie#getInfo Mylist::getInfo}
243
- # {Nicos::Movie#getInfo Mylist::getHtmlInfo}
244
- attr_accessor :title
245
-
246
- # マイリストの説明文
247
- #
248
- # @return [Fixnum]
249
- # <b>取得可能なメソッド</b>
250
- # {Nicos::Movie#getInfo Mylist::getInfo}
251
- # {Nicos::Movie#getInfo Mylist::getHtmlInfo}
252
- attr_accessor :description
253
-
254
- # 公開設定
255
- #
256
- # 調査中
257
- # @return [Fixnum]
258
- # <b>取得可能なメソッド</b>
259
- # {Nicos::Movie#getInfo Mylist::getInfo}
260
- # {Nicos::Movie#getInfo Mylist::getHtmlInfo}
261
- attr_accessor :public
262
-
263
- # ソート順の設定
264
- #
265
- # ソート順の設定
266
- # @return [Fixnum]
267
- # <b>取得可能なメソッド</b>
268
- # {Nicos::Movie#getInfo Mylist::getInfo}
269
- # {Nicos::Movie#getInfo Mylist::getHtmlInfo}
270
- attr_accessor :default_sort
271
-
272
- # マイリスト作成日時
273
- #
274
- # @return [Fixnum]
275
- # <b>取得可能なメソッド</b>
276
- # {Nicos::Movie#getInfo Mylist::getInfo}
277
- # {Nicos::Movie#getInfo Mylist::getHtmlInfo}
278
- attr_accessor :create_time
279
-
280
- # マイリストの更新日時
281
- #
282
- # @return [Fixnum]
283
- # <b>取得可能なメソッド</b>
284
- # {Nicos::Movie#getInfo Mylist::getInfo}
285
- # {Nicos::Movie#getInfo Mylist::getHtmlInfo}
286
- attr_accessor :update_time
287
-
288
- # アイコンの色?
289
- #
290
- # @return [Fixnum]
291
- # <b>取得可能なメソッド</b>
292
- # {Nicos::Movie#getInfo Mylist::getInfo}
293
- # {Nicos::Movie#getInfo Mylist::getHtmlInfo}
294
- attr_accessor :icon_id
295
-
296
- # 現在のソート順
297
- #
298
- # @return [Fixnum]
299
- # <b>取得可能なメソッド</b>
300
- # {Nicos::Movie#getInfo Mylist::getInfo}
301
- # {Nicos::Movie#getInfo Mylist::getHtmlInfo}
302
- attr_accessor :sort_order
303
-
304
- # 作成者の名前
305
- #
306
- # @return [Fixnum]
307
- # <b>取得可能なメソッド</b>
308
- # {Nicos::Movie#getInfo Mylist::getInfo}
309
- attr_accessor :author
310
-
311
- # マイリストが含む動画インスタンスの配列
312
- #
313
- # getInfo等のメソッドを利用した際に、そのマイリストが含む動画の
314
- # インスタンスが配列として自動的に作られ、moviesに収められる。
315
- # @return [Array<Movie>]
316
- attr_accessor :movies
317
- end
data/lib/namespace.rb DELETED
@@ -1,3 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- module Nicos end
data/lib/parser.rb DELETED
@@ -1,234 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
- $:.unshift File.dirname(__FILE__)
3
-
4
- require 'rubygems'
5
- require 'xml'
6
- require 'time'
7
-
8
- require 'namespace.rb'
9
- require 'converter.rb'
10
-
11
- module Nicos::Parser
12
- # getThumbInfoが返すXMLを解析し、ハッシュオブジェクトにして返します。
13
- #
14
- # @return [HashObj]
15
- def getThumbInfo(xml)
16
- doc = XML::Reader.string(
17
- xml,
18
- :options => XML::Parser::Options::NOBLANKS |
19
- XML::Parser::Options::NOENT
20
- )
21
-
22
- n = -1
23
- parsed = {}
24
- category = ""
25
-
26
- while doc.read
27
- unless doc.node_type == XML::Reader::TYPE_END_ELEMENT
28
- case doc.name
29
- when "video_id", "title", "description", "thumbnail_url",
30
- "movie_type", "last_res_body" , "watch_url", "thumb_type"
31
- label = doc.name
32
- doc.read
33
- parsed[label] = doc.value
34
- when "size_high", "size_low", "view_counter", "comment_num",
35
- "mylist_counter", "embeddable", "no_live_play",
36
- "user_id"
37
- label = doc.name
38
- doc.read
39
- parsed[label] = doc.value.to_i
40
- when "first_retrieve"
41
- label = doc.name
42
- doc.read
43
- parsed[label] = Nicos::Converter.iso8601ToUnix(doc.value)
44
- when "length"
45
- doc.read
46
- lengthStr = doc.value.split(/\:/)
47
- length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
48
- parsed["length"] = length
49
- when "tags"
50
- doc.move_to_attribute("domain")
51
- category = doc.value
52
- if defined? parsed["tags" + category]
53
- parsed["tags_" + category] = []
54
- end
55
- when "tag"
56
- doc.read
57
- parsed["tags_" + category].push(doc.value)
58
- end
59
- end
60
- end
61
-
62
- doc.close
63
- parsed
64
- end
65
-
66
- # タグ検索のAtomフィードが返すXMLを解析し、ハッシュオブジェクトにして返します。
67
- #
68
- # @return [HashObj]
69
- def tagAtom(xml)
70
- doc = XML::Reader.string(
71
- xml,
72
- :options => XML::Parser::Options::NOBLANKS |
73
- XML::Parser::Options::NOENT
74
- )
75
-
76
- n = -1
77
- parsed = [{}]
78
-
79
- while doc.read
80
- unless doc.node_type == XML::Reader::TYPE_END_ELEMENT
81
- case doc.name
82
- when "entry"
83
- n += 1
84
- parsed[n] = {}
85
- when "title"
86
- doc.read
87
- parsed[n]["title"] = doc.value
88
- when "link"
89
- doc.move_to_attribute("href")
90
- parsed[n]["video_id"] = doc.value.split('/')[4]
91
- when "published", "updated"
92
- label = doc.name
93
- doc.read
94
- parsed[n][label] = Nicos::Converter.iso8601ToUnix(doc.value)
95
- when "p"
96
- doc.move_to_attribute("class")
97
- case doc.value
98
- when "nico-thumbnail"
99
- doc.read
100
- doc.move_to_attribute("src")
101
- parsed[n]["thumbnail_url"] = doc.value
102
- when "nico-description"
103
- doc.read
104
- parsed[n]["description"] = doc.value
105
- end
106
- when "strong"
107
- doc.move_to_attribute("class")
108
- case doc.value
109
- when "nico-info-length"
110
- doc.read
111
- lengthStr = doc.value.split(/\:/)
112
- length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
113
- parsed[n]["length"] = length
114
- when "nico-numbers-view", "nico-numbers-res",
115
- "nico-numbers-mylist"
116
- label = doc.value
117
- doc.read
118
- parsed[n][label.slice(13,99)] = doc.value.to_i
119
- end
120
- end
121
- end
122
- end
123
-
124
- doc.close
125
- parsed
126
- end
127
-
128
- # マイリストのAtomフィードが返すXMLを解析し、ハッシュオブジェクトにして返します。
129
- #
130
- # @return [HashObj]
131
- def mylistAtom(xml)
132
- doc = XML::Reader.string(
133
- xml,
134
- :options => XML::Parser::Options::NOBLANKS |
135
- XML::Parser::Options::NOENT
136
- )
137
-
138
- n = -1
139
- parsed = { "mylist" => {}, "entry" => [{}] }
140
- while doc.read
141
- unless doc.node_type == XML::Reader::TYPE_END_ELEMENT
142
- case doc.name
143
-
144
- # <title> and <id> are marked up both in mylist and
145
- # each entry's node. So we need to assign the value to the
146
- # appropriate variable in accordance with node's location.
147
- when "title"
148
- if n == -1
149
- doc.read
150
- d = doc.value
151
- tmp = doc.value.slice(6, 99)
152
- tmp = tmp.slice(0, tmp.length - 7)
153
- parsed["mylist"]["title"] = tmp
154
- else
155
- doc.read
156
- parsed["entry"][n]["title"] = doc.value
157
- end
158
- when "link"
159
- if n != -1
160
- doc.move_to_attribute("href")
161
- parsed["entry"][n]["video_id"] =
162
- Nicos::Extractor.videoId(doc.value)
163
- end
164
- when "subtitle"
165
- doc.read
166
- parsed["mylist"]["description"] = doc.value
167
- when "id"
168
- if n == -1
169
- doc.read
170
- parsed["mylist"]["mylist_id"] =
171
- Nicos::Extractor.mylistId(doc.value)
172
- else
173
- doc.read
174
- parsed["entry"][n]["item_id"] =
175
- Nicos::Extractor.itemId(doc.value)
176
- end
177
- when "updated"
178
- doc.read
179
- parsed["mylist"]["updated"] =
180
- Nicos::Converter.iso8601ToUnix(doc.value)
181
- when "name"
182
- doc.read
183
- parsed["mylist"]["author"] = doc.value
184
- when "entry"
185
- n += 1
186
- parsed["entry"][n] = {}
187
- when "content"
188
- doc.read
189
- html = doc.value
190
-
191
- /(<p\sclass=\"nico-memo\"\>)([^\<]{1,})/ =~ html
192
- memo = $2
193
-
194
- /(<p\sclass=\"nico-thumbnail\">.+src=\")(http:\/\/[^\"]{1,})/ =~ html
195
- thumbnail_url = $2
196
-
197
- /(<p\sclass\=\"nico-description\"\>)([^\<]{1,})/ =~ html
198
- description = $2
199
-
200
- /(<p\sclass\=\"nico-info-length\"\>)([^\<]{1,})/ =~ html
201
- length = $2
202
-
203
- /(<p\sclass\=\"nico-info-date\"\>)([^\<]{1,})/ =~ html
204
- first_retrieve = $2
205
-
206
- /(<p\sclass\=\"nico-numbers-view\"\>)([^\<]{1,})/ =~ html
207
- view = $2
208
-
209
- /(<p\sclass\=\"nico-numbers-res\"\>)([^\<]{1,})/ =~ html
210
- res = $2
211
-
212
- /(<p\sclass\=\"nico-numbers-mylist\"\>)([^\<]{1,})/ =~ html
213
- mylist = $2
214
-
215
- parsed["entry"][n]["memo"] = memo
216
- parsed["entry"][n]["thumbnail_url"] = thumbnail_url
217
- parsed["entry"][n]["description"] = description
218
- parsed["entry"][n]["length"] = length
219
- parsed["entry"][n]["first_retrieve"] = first_retrieve
220
- parsed["entry"][n]["view"] = view
221
- parsed["entry"][n]["res"] = res
222
- parsed["entry"][n]["mylist"] = mylist
223
- end
224
- end
225
- end
226
-
227
- doc.close
228
- parsed
229
- end
230
-
231
- module_function :tagAtom
232
- module_function :mylistAtom
233
- module_function :getThumbInfo
234
- end