nicoscraper 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +112 -0
- data/VERSION +1 -1
- data/lib/connector.rb +305 -304
- data/lib/converter.rb +3 -3
- data/lib/movie.rb +7 -7
- data/lib/mylist.rb +11 -11
- data/lib/nicos.rb +9 -0
- data/lib/parser.rb +11 -13
- data/lib/searcher.rb +130 -129
- data/nicoscraper.gemspec +5 -4
- data/test/movie_spec.rb +9 -11
- metadata +5 -4
- data/test/helper.rb +0 -18
- data/test/test_nicoscraper.rb +0 -7
data/lib/converter.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
require 'xml'
|
3
3
|
require 'time'
|
4
4
|
|
5
|
-
module
|
5
|
+
module Nicos::Converter
|
6
6
|
def iso8601ToUnix(str)
|
7
7
|
Time.strptime(str, "%Y-%m-%dT%H:%M:%S").to_i
|
8
8
|
end
|
@@ -30,7 +30,7 @@ module Convert
|
|
30
30
|
module_function :commaRemover
|
31
31
|
end
|
32
32
|
|
33
|
-
module
|
33
|
+
module Nicos::Extractor
|
34
34
|
def mylistId(str)
|
35
35
|
/(mylist\/)([0-9]{1,})/ =~ str
|
36
36
|
$2.to_i
|
@@ -50,7 +50,7 @@ module Extract
|
|
50
50
|
module_function :videoId
|
51
51
|
end
|
52
52
|
|
53
|
-
module Unicode
|
53
|
+
module Nicos::Unicode
|
54
54
|
def escape(str)
|
55
55
|
ary = str.unpack("U*").map!{|i| "\\u#{i.to_s(16)}"}
|
56
56
|
ary.join
|
data/lib/movie.rb
CHANGED
@@ -9,8 +9,8 @@ require 'kconv'
|
|
9
9
|
require 'parser.rb'
|
10
10
|
require 'mylist.rb'
|
11
11
|
require 'connector.rb'
|
12
|
-
|
13
|
-
class Movie
|
12
|
+
|
13
|
+
class Nicos::Movie
|
14
14
|
def initialize(video_id)
|
15
15
|
@video_id = video_id
|
16
16
|
@available = false
|
@@ -23,7 +23,7 @@ class Movie
|
|
23
23
|
# 指定されたマイリストに自分が入っていれば、真を返す。
|
24
24
|
def isBelongsTo (mylistId, &block)
|
25
25
|
isBelongs = false
|
26
|
-
thisMl = Mylist.new(mylistId)
|
26
|
+
thisMl = Nicos::Mylist.new(mylistId)
|
27
27
|
thisMl.getInfoLt
|
28
28
|
|
29
29
|
thisMl.movies.each { |movie|
|
@@ -76,8 +76,7 @@ class Movie
|
|
76
76
|
}
|
77
77
|
|
78
78
|
sMylistIdAry.each { |mylistId|
|
79
|
-
|
80
|
-
mlObjAry.push( Mylist.new(mylistId) )
|
79
|
+
mlObjAry.push( Nicos::Mylist.new(mylistId) )
|
81
80
|
}
|
82
81
|
|
83
82
|
puts "\sDiscern logic terminated."
|
@@ -105,7 +104,7 @@ class Movie
|
|
105
104
|
end
|
106
105
|
|
107
106
|
def getInfo
|
108
|
-
con =
|
107
|
+
con = Nicos::Connector::GetThumbInfo.new()
|
109
108
|
host = 'ext.nicovideo.jp'
|
110
109
|
entity = '/api/getthumbinfo/' + @video_id
|
111
110
|
con.setWait(nil)
|
@@ -114,8 +113,9 @@ class Movie
|
|
114
113
|
if
|
115
114
|
result["order"] == "success"
|
116
115
|
then
|
117
|
-
parsed =
|
116
|
+
parsed = Nicos::Parser::getThumbInfo(result["body"])
|
118
117
|
set(parsed)
|
118
|
+
p self
|
119
119
|
@available = true
|
120
120
|
else
|
121
121
|
@available = false
|
data/lib/mylist.rb
CHANGED
@@ -9,8 +9,7 @@ require 'parser.rb'
|
|
9
9
|
require 'movie.rb'
|
10
10
|
require 'connector.rb'
|
11
11
|
|
12
|
-
|
13
|
-
class Mylist
|
12
|
+
class Nicos::Mylist
|
14
13
|
def initialize (mylist_id)
|
15
14
|
@mylist_id = mylist_id
|
16
15
|
@movies = []
|
@@ -57,7 +56,7 @@ class Mylist
|
|
57
56
|
end
|
58
57
|
|
59
58
|
def getInfo
|
60
|
-
con = Connector.new('mech')
|
59
|
+
con = Nicos::Connector::Html.new('mech')
|
61
60
|
reqUrl = 'http://www.nicovideo.jp' +
|
62
61
|
'/mylist/' + @mylist_id.to_s
|
63
62
|
con.setWait(nil)
|
@@ -129,12 +128,12 @@ class Mylist
|
|
129
128
|
mvJson = mvJson.scan(/\".{1,}/)[0]
|
130
129
|
mvJson = mvJson.slice(0, mvJson.length - 5)
|
131
130
|
#mvJson = mvJson.split('},{')
|
132
|
-
mvJson = Unicode.unescape(mvJson).split('},{')
|
131
|
+
mvJson = Nicos::Unicode.unescape(mvJson).split('},{')
|
133
132
|
|
134
133
|
mvJson.each { |e|
|
135
134
|
e = "{" + e + "}"
|
136
135
|
param = JSON.parse(e)
|
137
|
-
movie = Movie.new(param['item_data']['video_id'])
|
136
|
+
movie = Nicos::Movie.new(param['item_data']['video_id'])
|
138
137
|
movie.set(param)
|
139
138
|
|
140
139
|
@movies.push(movie)
|
@@ -142,7 +141,7 @@ class Mylist
|
|
142
141
|
end
|
143
142
|
|
144
143
|
def getInfoLt
|
145
|
-
con =
|
144
|
+
con = Nicos::Connector::MylistAtom.new()
|
146
145
|
host = 'www.nicovideo.jp'
|
147
146
|
puts @mylist_id
|
148
147
|
entity = '/mylist/' + @mylist_id.to_s + '?rss=atom&numbers=1'
|
@@ -152,17 +151,18 @@ class Mylist
|
|
152
151
|
if
|
153
152
|
result["order"] == "success"
|
154
153
|
then
|
155
|
-
parsed =
|
154
|
+
parsed = Nicos::Parser::mylistAtom(result["body"])
|
156
155
|
|
157
156
|
parsed["entry"].each { |e|
|
158
|
-
movie = Movie.new(e["video_id"])
|
157
|
+
movie = Nicos::Movie.new(e["video_id"])
|
159
158
|
e["available"] = true
|
160
159
|
movie.set(e)
|
161
160
|
@movies.push(movie)
|
162
161
|
}
|
163
|
-
|
164
|
-
set(parsed["mylist"])
|
162
|
+
|
165
163
|
@available = true
|
164
|
+
set(parsed["mylist"])
|
165
|
+
p self
|
166
166
|
else
|
167
167
|
@available = false
|
168
168
|
end
|
@@ -220,4 +220,4 @@ class Mylist
|
|
220
220
|
attr_accessor :author
|
221
221
|
|
222
222
|
attr_accessor :movies
|
223
|
-
end
|
223
|
+
end
|
data/lib/nicos.rb
ADDED
data/lib/parser.rb
CHANGED
@@ -6,9 +6,7 @@ require 'xml'
|
|
6
6
|
require 'time'
|
7
7
|
require 'converter.rb'
|
8
8
|
|
9
|
-
module
|
10
|
-
public
|
11
|
-
|
9
|
+
module Nicos::Parser
|
12
10
|
def getThumbInfo(xml)
|
13
11
|
doc = XML::Reader.string(
|
14
12
|
xml,
|
@@ -37,7 +35,7 @@ module NicoParser
|
|
37
35
|
when "first_retrieve"
|
38
36
|
label = doc.name
|
39
37
|
doc.read
|
40
|
-
parsed[label] =
|
38
|
+
parsed[label] = Nicos::Converter.iso8601ToUnix(doc.value)
|
41
39
|
when "length"
|
42
40
|
doc.read
|
43
41
|
lengthStr = doc.value.split(/\:/)
|
@@ -60,7 +58,7 @@ module NicoParser
|
|
60
58
|
parsed
|
61
59
|
end
|
62
60
|
|
63
|
-
def
|
61
|
+
def tagAtom(xml)
|
64
62
|
doc = XML::Reader.string(
|
65
63
|
xml,
|
66
64
|
:options => XML::Parser::Options::NOBLANKS |
|
@@ -85,7 +83,7 @@ module NicoParser
|
|
85
83
|
when "published", "updated"
|
86
84
|
label = doc.name
|
87
85
|
doc.read
|
88
|
-
parsed[n][label] =
|
86
|
+
parsed[n][label] = Nicos::Converter.iso8601ToUnix(doc.value)
|
89
87
|
when "p"
|
90
88
|
doc.move_to_attribute("class")
|
91
89
|
case doc.value
|
@@ -119,7 +117,7 @@ module NicoParser
|
|
119
117
|
parsed
|
120
118
|
end
|
121
119
|
|
122
|
-
def
|
120
|
+
def mylistAtom(xml)
|
123
121
|
doc = XML::Reader.string(
|
124
122
|
xml,
|
125
123
|
:options => XML::Parser::Options::NOBLANKS |
|
@@ -150,7 +148,7 @@ module NicoParser
|
|
150
148
|
if n != -1
|
151
149
|
doc.move_to_attribute("href")
|
152
150
|
parsed["entry"][n]["video_id"] =
|
153
|
-
|
151
|
+
Nicos::Extractor.videoId(doc.value)
|
154
152
|
end
|
155
153
|
when "subtitle"
|
156
154
|
doc.read
|
@@ -159,16 +157,16 @@ module NicoParser
|
|
159
157
|
if n == -1
|
160
158
|
doc.read
|
161
159
|
parsed["mylist"]["mylist_id"] =
|
162
|
-
|
160
|
+
Nicos::Extractor.mylistId(doc.value)
|
163
161
|
else
|
164
162
|
doc.read
|
165
163
|
parsed["entry"][n]["item_id"] =
|
166
|
-
|
164
|
+
Nicos::Extractor.itemId(doc.value)
|
167
165
|
end
|
168
166
|
when "updated"
|
169
167
|
doc.read
|
170
168
|
parsed["mylist"]["updated"] =
|
171
|
-
|
169
|
+
Nicos::Converter.iso8601ToUnix(doc.value)
|
172
170
|
when "name"
|
173
171
|
doc.read
|
174
172
|
parsed["mylist"]["author"] = doc.value
|
@@ -220,7 +218,7 @@ module NicoParser
|
|
220
218
|
parsed
|
221
219
|
end
|
222
220
|
|
223
|
-
module_function :
|
224
|
-
module_function :
|
221
|
+
module_function :tagAtom
|
222
|
+
module_function :mylistAtom
|
225
223
|
module_function :getThumbInfo
|
226
224
|
end
|
data/lib/searcher.rb
CHANGED
@@ -10,147 +10,148 @@ require 'kconv'
|
|
10
10
|
|
11
11
|
require 'parser.rb'
|
12
12
|
|
13
|
+
module Nicos::Searcher
|
14
|
+
class ByTagSuper
|
15
|
+
private
|
13
16
|
|
14
|
-
|
15
|
-
|
17
|
+
def get(tag, sort, page, method, waitObj)
|
18
|
+
paramAry = []
|
19
|
+
|
20
|
+
case sort
|
21
|
+
when 'comment_new'
|
22
|
+
sortStr = ''
|
23
|
+
when 'comment_old'
|
24
|
+
sortStr = 'order=a'
|
25
|
+
when 'view_many'
|
26
|
+
sortStr = 'sort=v'
|
27
|
+
when 'view_few'
|
28
|
+
sortStr = 'sort=v&order=a'
|
29
|
+
when 'comment_many'
|
30
|
+
sortStr = 'sort=r'
|
31
|
+
when 'comment_few'
|
32
|
+
sortStr = 'sort=r&order=a'
|
33
|
+
when 'mylist_many'
|
34
|
+
sortStr = 'sort=m'
|
35
|
+
when 'mylist_few'
|
36
|
+
sortStr = 'sort=m&order=a'
|
37
|
+
when 'post_new'
|
38
|
+
sortStr = 'sort=f'
|
39
|
+
when 'post_old'
|
40
|
+
sortStr = 'sort=f&order=a'
|
41
|
+
when 'length_long'
|
42
|
+
sortStr = 'sort=l'
|
43
|
+
when 'length_short'
|
44
|
+
sortStr = 'sort=l&order=a'
|
45
|
+
end
|
46
|
+
|
47
|
+
paramAry.push("page=#{page}") if page != 1
|
48
|
+
paramAry.push(sortStr)
|
49
|
+
if method == "atom" then paramAry.push("rss=atom&numbers=1") end
|
50
|
+
param = tag + "?" + paramAry.join('&')
|
51
|
+
|
52
|
+
host = 'www.nicovideo.jp'
|
53
|
+
entity = '/tag/' + param
|
16
54
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
case sort
|
21
|
-
when 'comment_new'
|
22
|
-
sortStr = ''
|
23
|
-
when 'comment_old'
|
24
|
-
sortStr = 'order=a'
|
25
|
-
when 'view_many'
|
26
|
-
sortStr = 'sort=v'
|
27
|
-
when 'view_few'
|
28
|
-
sortStr = 'sort=v&order=a'
|
29
|
-
when 'comment_many'
|
30
|
-
sortStr = 'sort=r'
|
31
|
-
when 'comment_few'
|
32
|
-
sortStr = 'sort=r&order=a'
|
33
|
-
when 'mylist_many'
|
34
|
-
sortStr = 'sort=m'
|
35
|
-
when 'mylist_few'
|
36
|
-
sortStr = 'sort=m&order=a'
|
37
|
-
when 'post_new'
|
38
|
-
sortStr = 'sort=f'
|
39
|
-
when 'post_old'
|
40
|
-
sortStr = 'sort=f&order=a'
|
41
|
-
when 'length_long'
|
42
|
-
sortStr = 'sort=l'
|
43
|
-
when 'length_short'
|
44
|
-
sortStr = 'sort=l&order=a'
|
55
|
+
@connector.setWait(waitObj)
|
56
|
+
@connector.get(host, entity)
|
45
57
|
end
|
46
|
-
|
47
|
-
paramAry.push("page=#{page}") if page != 1
|
48
|
-
paramAry.push(sortStr)
|
49
|
-
if method == "atom" then paramAry.push("rss=atom&numbers=1") end
|
50
|
-
param = tag + "?" + paramAry.join('&')
|
51
|
-
|
52
|
-
host = 'www.nicovideo.jp'
|
53
|
-
entity = '/tag/' + param
|
54
58
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
page,
|
69
|
-
method,
|
70
|
-
waitObj
|
71
|
-
)
|
59
|
+
def loop(tag, sort, method, waitObj, &block)
|
60
|
+
termFlag = false
|
61
|
+
page = 1
|
62
|
+
movieObjAry = []
|
63
|
+
|
64
|
+
begin
|
65
|
+
response = get(
|
66
|
+
tag,
|
67
|
+
sort,
|
68
|
+
page,
|
69
|
+
method,
|
70
|
+
waitObj
|
71
|
+
)
|
72
72
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
73
|
+
if response["order"] == "success"
|
74
|
+
result = parse(response["body"])
|
75
|
+
result.each { |each|
|
76
|
+
movie = Nicos::Movie.new(each["video_id"])
|
77
|
+
each["available"] = true
|
78
|
+
movie.set(each)
|
79
|
+
movieObjAry.push(movie)
|
80
|
+
}
|
81
81
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
82
|
+
termFlag = block.call(movieObjAry, page)
|
83
|
+
else
|
84
|
+
termFlag = true
|
85
|
+
end
|
86
|
+
|
87
|
+
page += 1
|
88
|
+
end until termFlag
|
89
|
+
end
|
89
90
|
end
|
90
|
-
end
|
91
91
|
|
92
|
-
class
|
93
|
-
|
94
|
-
|
95
|
-
|
92
|
+
class ByTag < ByTagSuper
|
93
|
+
def initialize
|
94
|
+
@numOfSearched = 32
|
95
|
+
@incrAmt = 0.2
|
96
96
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
97
|
+
@connector = Nicos::Connector.new('mech')
|
98
|
+
|
99
|
+
# HTML中の各パラメータの所在を示すXPath
|
100
|
+
@videoIdXP = "//div[@class='uad_thumbfrm']/table/tr/td/p/a"
|
101
|
+
@lengthXP = "//div[@class='uad_thumbfrm']/table/tr/td/p[2]/span"
|
102
|
+
@viewXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[1]/strong"
|
103
|
+
@resXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[2]/strong"
|
104
|
+
@mylistXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[3]/a/strong"
|
105
|
+
@adXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[4]/a/strong"
|
106
|
+
end
|
107
|
+
|
108
|
+
def parse(movieNum)
|
109
|
+
result = []
|
110
|
+
|
111
|
+
video_id = /(sm|nm)[0-9]{1,}/.match(@connector.mech.page.search(@videoIdXP)[movieNum]['href'])[0]
|
112
|
+
lengthStr = @connector.mech.page.search(@lengthXP)[movieNum].text.split(/\:/)
|
113
|
+
length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
|
114
|
+
view = @connector.mech.page.search(@viewXP)[movieNum]
|
115
|
+
.text.gsub(/\,/, '').to_i
|
116
|
+
res = @connector.mech.page.search(@resXP)[movieNum]
|
117
|
+
.text.gsub(/\,/, '').to_i
|
118
|
+
mylist = @connector.mech.page.search(@mylistXP)[movieNum]
|
119
|
+
.text.gsub(/\,/, '').to_i
|
120
|
+
ad = @connector.mech.page.search(@adXP)[movieNum]
|
121
|
+
.text.gsub(/\,/, '').to_i
|
122
122
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
123
|
+
result.push({
|
124
|
+
"video_id" => video_id,
|
125
|
+
"length" => length,
|
126
|
+
"view" => view,
|
127
|
+
"res" => res,
|
128
|
+
"mylist" => mylist,
|
129
|
+
"ad" => ad
|
130
|
+
})
|
131
|
+
end
|
132
|
+
|
133
|
+
def execute(tag, sort, waitObj, &block)
|
134
|
+
loop(tag, sort, "mech", waitObj) { |result, page|
|
135
|
+
block.call(result, page)
|
136
|
+
}
|
137
|
+
end
|
137
138
|
end
|
138
|
-
end
|
139
139
|
|
140
|
-
class
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
140
|
+
class ByTagLt < ByTagSuper
|
141
|
+
def initialize
|
142
|
+
@numOfSearched = 32
|
143
|
+
@incrAmt = 0.2
|
144
|
+
@connector = Nicos::Connector::TagAtom.new()
|
145
|
+
end
|
146
146
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
147
|
+
def parse(xml)
|
148
|
+
Nicos::Parser.tagAtom(xml)
|
149
|
+
end
|
150
|
+
|
151
|
+
def execute(tag, sort, waitObj, &block)
|
152
|
+
loop(tag, sort, "atom", waitObj) { |result, page|
|
153
|
+
block.call(result, page)
|
154
|
+
}
|
155
|
+
end
|
155
156
|
end
|
156
157
|
end
|