nicoscraper 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +112 -0
- data/VERSION +1 -1
- data/lib/connector.rb +305 -304
- data/lib/converter.rb +3 -3
- data/lib/movie.rb +7 -7
- data/lib/mylist.rb +11 -11
- data/lib/nicos.rb +9 -0
- data/lib/parser.rb +11 -13
- data/lib/searcher.rb +130 -129
- data/nicoscraper.gemspec +5 -4
- data/test/movie_spec.rb +9 -11
- metadata +5 -4
- data/test/helper.rb +0 -18
- data/test/test_nicoscraper.rb +0 -7
data/lib/converter.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
require 'xml'
|
3
3
|
require 'time'
|
4
4
|
|
5
|
-
module
|
5
|
+
module Nicos::Converter
|
6
6
|
def iso8601ToUnix(str)
|
7
7
|
Time.strptime(str, "%Y-%m-%dT%H:%M:%S").to_i
|
8
8
|
end
|
@@ -30,7 +30,7 @@ module Convert
|
|
30
30
|
module_function :commaRemover
|
31
31
|
end
|
32
32
|
|
33
|
-
module
|
33
|
+
module Nicos::Extractor
|
34
34
|
def mylistId(str)
|
35
35
|
/(mylist\/)([0-9]{1,})/ =~ str
|
36
36
|
$2.to_i
|
@@ -50,7 +50,7 @@ module Extract
|
|
50
50
|
module_function :videoId
|
51
51
|
end
|
52
52
|
|
53
|
-
module Unicode
|
53
|
+
module Nicos::Unicode
|
54
54
|
def escape(str)
|
55
55
|
ary = str.unpack("U*").map!{|i| "\\u#{i.to_s(16)}"}
|
56
56
|
ary.join
|
data/lib/movie.rb
CHANGED
@@ -9,8 +9,8 @@ require 'kconv'
|
|
9
9
|
require 'parser.rb'
|
10
10
|
require 'mylist.rb'
|
11
11
|
require 'connector.rb'
|
12
|
-
|
13
|
-
class Movie
|
12
|
+
|
13
|
+
class Nicos::Movie
|
14
14
|
def initialize(video_id)
|
15
15
|
@video_id = video_id
|
16
16
|
@available = false
|
@@ -23,7 +23,7 @@ class Movie
|
|
23
23
|
# 指定されたマイリストに自分が入っていれば、真を返す。
|
24
24
|
def isBelongsTo (mylistId, &block)
|
25
25
|
isBelongs = false
|
26
|
-
thisMl = Mylist.new(mylistId)
|
26
|
+
thisMl = Nicos::Mylist.new(mylistId)
|
27
27
|
thisMl.getInfoLt
|
28
28
|
|
29
29
|
thisMl.movies.each { |movie|
|
@@ -76,8 +76,7 @@ class Movie
|
|
76
76
|
}
|
77
77
|
|
78
78
|
sMylistIdAry.each { |mylistId|
|
79
|
-
|
80
|
-
mlObjAry.push( Mylist.new(mylistId) )
|
79
|
+
mlObjAry.push( Nicos::Mylist.new(mylistId) )
|
81
80
|
}
|
82
81
|
|
83
82
|
puts "\sDiscern logic terminated."
|
@@ -105,7 +104,7 @@ class Movie
|
|
105
104
|
end
|
106
105
|
|
107
106
|
def getInfo
|
108
|
-
con =
|
107
|
+
con = Nicos::Connector::GetThumbInfo.new()
|
109
108
|
host = 'ext.nicovideo.jp'
|
110
109
|
entity = '/api/getthumbinfo/' + @video_id
|
111
110
|
con.setWait(nil)
|
@@ -114,8 +113,9 @@ class Movie
|
|
114
113
|
if
|
115
114
|
result["order"] == "success"
|
116
115
|
then
|
117
|
-
parsed =
|
116
|
+
parsed = Nicos::Parser::getThumbInfo(result["body"])
|
118
117
|
set(parsed)
|
118
|
+
p self
|
119
119
|
@available = true
|
120
120
|
else
|
121
121
|
@available = false
|
data/lib/mylist.rb
CHANGED
@@ -9,8 +9,7 @@ require 'parser.rb'
|
|
9
9
|
require 'movie.rb'
|
10
10
|
require 'connector.rb'
|
11
11
|
|
12
|
-
|
13
|
-
class Mylist
|
12
|
+
class Nicos::Mylist
|
14
13
|
def initialize (mylist_id)
|
15
14
|
@mylist_id = mylist_id
|
16
15
|
@movies = []
|
@@ -57,7 +56,7 @@ class Mylist
|
|
57
56
|
end
|
58
57
|
|
59
58
|
def getInfo
|
60
|
-
con = Connector.new('mech')
|
59
|
+
con = Nicos::Connector::Html.new('mech')
|
61
60
|
reqUrl = 'http://www.nicovideo.jp' +
|
62
61
|
'/mylist/' + @mylist_id.to_s
|
63
62
|
con.setWait(nil)
|
@@ -129,12 +128,12 @@ class Mylist
|
|
129
128
|
mvJson = mvJson.scan(/\".{1,}/)[0]
|
130
129
|
mvJson = mvJson.slice(0, mvJson.length - 5)
|
131
130
|
#mvJson = mvJson.split('},{')
|
132
|
-
mvJson = Unicode.unescape(mvJson).split('},{')
|
131
|
+
mvJson = Nicos::Unicode.unescape(mvJson).split('},{')
|
133
132
|
|
134
133
|
mvJson.each { |e|
|
135
134
|
e = "{" + e + "}"
|
136
135
|
param = JSON.parse(e)
|
137
|
-
movie = Movie.new(param['item_data']['video_id'])
|
136
|
+
movie = Nicos::Movie.new(param['item_data']['video_id'])
|
138
137
|
movie.set(param)
|
139
138
|
|
140
139
|
@movies.push(movie)
|
@@ -142,7 +141,7 @@ class Mylist
|
|
142
141
|
end
|
143
142
|
|
144
143
|
def getInfoLt
|
145
|
-
con =
|
144
|
+
con = Nicos::Connector::MylistAtom.new()
|
146
145
|
host = 'www.nicovideo.jp'
|
147
146
|
puts @mylist_id
|
148
147
|
entity = '/mylist/' + @mylist_id.to_s + '?rss=atom&numbers=1'
|
@@ -152,17 +151,18 @@ class Mylist
|
|
152
151
|
if
|
153
152
|
result["order"] == "success"
|
154
153
|
then
|
155
|
-
parsed =
|
154
|
+
parsed = Nicos::Parser::mylistAtom(result["body"])
|
156
155
|
|
157
156
|
parsed["entry"].each { |e|
|
158
|
-
movie = Movie.new(e["video_id"])
|
157
|
+
movie = Nicos::Movie.new(e["video_id"])
|
159
158
|
e["available"] = true
|
160
159
|
movie.set(e)
|
161
160
|
@movies.push(movie)
|
162
161
|
}
|
163
|
-
|
164
|
-
set(parsed["mylist"])
|
162
|
+
|
165
163
|
@available = true
|
164
|
+
set(parsed["mylist"])
|
165
|
+
p self
|
166
166
|
else
|
167
167
|
@available = false
|
168
168
|
end
|
@@ -220,4 +220,4 @@ class Mylist
|
|
220
220
|
attr_accessor :author
|
221
221
|
|
222
222
|
attr_accessor :movies
|
223
|
-
end
|
223
|
+
end
|
data/lib/nicos.rb
ADDED
data/lib/parser.rb
CHANGED
@@ -6,9 +6,7 @@ require 'xml'
|
|
6
6
|
require 'time'
|
7
7
|
require 'converter.rb'
|
8
8
|
|
9
|
-
module
|
10
|
-
public
|
11
|
-
|
9
|
+
module Nicos::Parser
|
12
10
|
def getThumbInfo(xml)
|
13
11
|
doc = XML::Reader.string(
|
14
12
|
xml,
|
@@ -37,7 +35,7 @@ module NicoParser
|
|
37
35
|
when "first_retrieve"
|
38
36
|
label = doc.name
|
39
37
|
doc.read
|
40
|
-
parsed[label] =
|
38
|
+
parsed[label] = Nicos::Converter.iso8601ToUnix(doc.value)
|
41
39
|
when "length"
|
42
40
|
doc.read
|
43
41
|
lengthStr = doc.value.split(/\:/)
|
@@ -60,7 +58,7 @@ module NicoParser
|
|
60
58
|
parsed
|
61
59
|
end
|
62
60
|
|
63
|
-
def
|
61
|
+
def tagAtom(xml)
|
64
62
|
doc = XML::Reader.string(
|
65
63
|
xml,
|
66
64
|
:options => XML::Parser::Options::NOBLANKS |
|
@@ -85,7 +83,7 @@ module NicoParser
|
|
85
83
|
when "published", "updated"
|
86
84
|
label = doc.name
|
87
85
|
doc.read
|
88
|
-
parsed[n][label] =
|
86
|
+
parsed[n][label] = Nicos::Converter.iso8601ToUnix(doc.value)
|
89
87
|
when "p"
|
90
88
|
doc.move_to_attribute("class")
|
91
89
|
case doc.value
|
@@ -119,7 +117,7 @@ module NicoParser
|
|
119
117
|
parsed
|
120
118
|
end
|
121
119
|
|
122
|
-
def
|
120
|
+
def mylistAtom(xml)
|
123
121
|
doc = XML::Reader.string(
|
124
122
|
xml,
|
125
123
|
:options => XML::Parser::Options::NOBLANKS |
|
@@ -150,7 +148,7 @@ module NicoParser
|
|
150
148
|
if n != -1
|
151
149
|
doc.move_to_attribute("href")
|
152
150
|
parsed["entry"][n]["video_id"] =
|
153
|
-
|
151
|
+
Nicos::Extractor.videoId(doc.value)
|
154
152
|
end
|
155
153
|
when "subtitle"
|
156
154
|
doc.read
|
@@ -159,16 +157,16 @@ module NicoParser
|
|
159
157
|
if n == -1
|
160
158
|
doc.read
|
161
159
|
parsed["mylist"]["mylist_id"] =
|
162
|
-
|
160
|
+
Nicos::Extractor.mylistId(doc.value)
|
163
161
|
else
|
164
162
|
doc.read
|
165
163
|
parsed["entry"][n]["item_id"] =
|
166
|
-
|
164
|
+
Nicos::Extractor.itemId(doc.value)
|
167
165
|
end
|
168
166
|
when "updated"
|
169
167
|
doc.read
|
170
168
|
parsed["mylist"]["updated"] =
|
171
|
-
|
169
|
+
Nicos::Converter.iso8601ToUnix(doc.value)
|
172
170
|
when "name"
|
173
171
|
doc.read
|
174
172
|
parsed["mylist"]["author"] = doc.value
|
@@ -220,7 +218,7 @@ module NicoParser
|
|
220
218
|
parsed
|
221
219
|
end
|
222
220
|
|
223
|
-
module_function :
|
224
|
-
module_function :
|
221
|
+
module_function :tagAtom
|
222
|
+
module_function :mylistAtom
|
225
223
|
module_function :getThumbInfo
|
226
224
|
end
|
data/lib/searcher.rb
CHANGED
@@ -10,147 +10,148 @@ require 'kconv'
|
|
10
10
|
|
11
11
|
require 'parser.rb'
|
12
12
|
|
13
|
+
module Nicos::Searcher
|
14
|
+
class ByTagSuper
|
15
|
+
private
|
13
16
|
|
14
|
-
|
15
|
-
|
17
|
+
def get(tag, sort, page, method, waitObj)
|
18
|
+
paramAry = []
|
19
|
+
|
20
|
+
case sort
|
21
|
+
when 'comment_new'
|
22
|
+
sortStr = ''
|
23
|
+
when 'comment_old'
|
24
|
+
sortStr = 'order=a'
|
25
|
+
when 'view_many'
|
26
|
+
sortStr = 'sort=v'
|
27
|
+
when 'view_few'
|
28
|
+
sortStr = 'sort=v&order=a'
|
29
|
+
when 'comment_many'
|
30
|
+
sortStr = 'sort=r'
|
31
|
+
when 'comment_few'
|
32
|
+
sortStr = 'sort=r&order=a'
|
33
|
+
when 'mylist_many'
|
34
|
+
sortStr = 'sort=m'
|
35
|
+
when 'mylist_few'
|
36
|
+
sortStr = 'sort=m&order=a'
|
37
|
+
when 'post_new'
|
38
|
+
sortStr = 'sort=f'
|
39
|
+
when 'post_old'
|
40
|
+
sortStr = 'sort=f&order=a'
|
41
|
+
when 'length_long'
|
42
|
+
sortStr = 'sort=l'
|
43
|
+
when 'length_short'
|
44
|
+
sortStr = 'sort=l&order=a'
|
45
|
+
end
|
46
|
+
|
47
|
+
paramAry.push("page=#{page}") if page != 1
|
48
|
+
paramAry.push(sortStr)
|
49
|
+
if method == "atom" then paramAry.push("rss=atom&numbers=1") end
|
50
|
+
param = tag + "?" + paramAry.join('&')
|
51
|
+
|
52
|
+
host = 'www.nicovideo.jp'
|
53
|
+
entity = '/tag/' + param
|
16
54
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
case sort
|
21
|
-
when 'comment_new'
|
22
|
-
sortStr = ''
|
23
|
-
when 'comment_old'
|
24
|
-
sortStr = 'order=a'
|
25
|
-
when 'view_many'
|
26
|
-
sortStr = 'sort=v'
|
27
|
-
when 'view_few'
|
28
|
-
sortStr = 'sort=v&order=a'
|
29
|
-
when 'comment_many'
|
30
|
-
sortStr = 'sort=r'
|
31
|
-
when 'comment_few'
|
32
|
-
sortStr = 'sort=r&order=a'
|
33
|
-
when 'mylist_many'
|
34
|
-
sortStr = 'sort=m'
|
35
|
-
when 'mylist_few'
|
36
|
-
sortStr = 'sort=m&order=a'
|
37
|
-
when 'post_new'
|
38
|
-
sortStr = 'sort=f'
|
39
|
-
when 'post_old'
|
40
|
-
sortStr = 'sort=f&order=a'
|
41
|
-
when 'length_long'
|
42
|
-
sortStr = 'sort=l'
|
43
|
-
when 'length_short'
|
44
|
-
sortStr = 'sort=l&order=a'
|
55
|
+
@connector.setWait(waitObj)
|
56
|
+
@connector.get(host, entity)
|
45
57
|
end
|
46
|
-
|
47
|
-
paramAry.push("page=#{page}") if page != 1
|
48
|
-
paramAry.push(sortStr)
|
49
|
-
if method == "atom" then paramAry.push("rss=atom&numbers=1") end
|
50
|
-
param = tag + "?" + paramAry.join('&')
|
51
|
-
|
52
|
-
host = 'www.nicovideo.jp'
|
53
|
-
entity = '/tag/' + param
|
54
58
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
page,
|
69
|
-
method,
|
70
|
-
waitObj
|
71
|
-
)
|
59
|
+
def loop(tag, sort, method, waitObj, &block)
|
60
|
+
termFlag = false
|
61
|
+
page = 1
|
62
|
+
movieObjAry = []
|
63
|
+
|
64
|
+
begin
|
65
|
+
response = get(
|
66
|
+
tag,
|
67
|
+
sort,
|
68
|
+
page,
|
69
|
+
method,
|
70
|
+
waitObj
|
71
|
+
)
|
72
72
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
73
|
+
if response["order"] == "success"
|
74
|
+
result = parse(response["body"])
|
75
|
+
result.each { |each|
|
76
|
+
movie = Nicos::Movie.new(each["video_id"])
|
77
|
+
each["available"] = true
|
78
|
+
movie.set(each)
|
79
|
+
movieObjAry.push(movie)
|
80
|
+
}
|
81
81
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
82
|
+
termFlag = block.call(movieObjAry, page)
|
83
|
+
else
|
84
|
+
termFlag = true
|
85
|
+
end
|
86
|
+
|
87
|
+
page += 1
|
88
|
+
end until termFlag
|
89
|
+
end
|
89
90
|
end
|
90
|
-
end
|
91
91
|
|
92
|
-
class
|
93
|
-
|
94
|
-
|
95
|
-
|
92
|
+
class ByTag < ByTagSuper
|
93
|
+
def initialize
|
94
|
+
@numOfSearched = 32
|
95
|
+
@incrAmt = 0.2
|
96
96
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
97
|
+
@connector = Nicos::Connector.new('mech')
|
98
|
+
|
99
|
+
# HTML中の各パラメータの所在を示すXPath
|
100
|
+
@videoIdXP = "//div[@class='uad_thumbfrm']/table/tr/td/p/a"
|
101
|
+
@lengthXP = "//div[@class='uad_thumbfrm']/table/tr/td/p[2]/span"
|
102
|
+
@viewXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[1]/strong"
|
103
|
+
@resXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[2]/strong"
|
104
|
+
@mylistXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[3]/a/strong"
|
105
|
+
@adXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[4]/a/strong"
|
106
|
+
end
|
107
|
+
|
108
|
+
def parse(movieNum)
|
109
|
+
result = []
|
110
|
+
|
111
|
+
video_id = /(sm|nm)[0-9]{1,}/.match(@connector.mech.page.search(@videoIdXP)[movieNum]['href'])[0]
|
112
|
+
lengthStr = @connector.mech.page.search(@lengthXP)[movieNum].text.split(/\:/)
|
113
|
+
length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
|
114
|
+
view = @connector.mech.page.search(@viewXP)[movieNum]
|
115
|
+
.text.gsub(/\,/, '').to_i
|
116
|
+
res = @connector.mech.page.search(@resXP)[movieNum]
|
117
|
+
.text.gsub(/\,/, '').to_i
|
118
|
+
mylist = @connector.mech.page.search(@mylistXP)[movieNum]
|
119
|
+
.text.gsub(/\,/, '').to_i
|
120
|
+
ad = @connector.mech.page.search(@adXP)[movieNum]
|
121
|
+
.text.gsub(/\,/, '').to_i
|
122
122
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
123
|
+
result.push({
|
124
|
+
"video_id" => video_id,
|
125
|
+
"length" => length,
|
126
|
+
"view" => view,
|
127
|
+
"res" => res,
|
128
|
+
"mylist" => mylist,
|
129
|
+
"ad" => ad
|
130
|
+
})
|
131
|
+
end
|
132
|
+
|
133
|
+
def execute(tag, sort, waitObj, &block)
|
134
|
+
loop(tag, sort, "mech", waitObj) { |result, page|
|
135
|
+
block.call(result, page)
|
136
|
+
}
|
137
|
+
end
|
137
138
|
end
|
138
|
-
end
|
139
139
|
|
140
|
-
class
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
140
|
+
class ByTagLt < ByTagSuper
|
141
|
+
def initialize
|
142
|
+
@numOfSearched = 32
|
143
|
+
@incrAmt = 0.2
|
144
|
+
@connector = Nicos::Connector::TagAtom.new()
|
145
|
+
end
|
146
146
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
147
|
+
def parse(xml)
|
148
|
+
Nicos::Parser.tagAtom(xml)
|
149
|
+
end
|
150
|
+
|
151
|
+
def execute(tag, sort, waitObj, &block)
|
152
|
+
loop(tag, sort, "atom", waitObj) { |result, page|
|
153
|
+
block.call(result, page)
|
154
|
+
}
|
155
|
+
end
|
155
156
|
end
|
156
157
|
end
|