nicoscraper 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,7 +2,7 @@
2
2
  require 'xml'
3
3
  require 'time'
4
4
 
5
- module Convert
5
+ module Nicos::Converter
6
6
  def iso8601ToUnix(str)
7
7
  Time.strptime(str, "%Y-%m-%dT%H:%M:%S").to_i
8
8
  end
@@ -30,7 +30,7 @@ module Convert
30
30
  module_function :commaRemover
31
31
  end
32
32
 
33
- module Extract
33
+ module Nicos::Extractor
34
34
  def mylistId(str)
35
35
  /(mylist\/)([0-9]{1,})/ =~ str
36
36
  $2.to_i
@@ -50,7 +50,7 @@ module Extract
50
50
  module_function :videoId
51
51
  end
52
52
 
53
- module Unicode
53
+ module Nicos::Unicode
54
54
  def escape(str)
55
55
  ary = str.unpack("U*").map!{|i| "\\u#{i.to_s(16)}"}
56
56
  ary.join
@@ -9,8 +9,8 @@ require 'kconv'
9
9
  require 'parser.rb'
10
10
  require 'mylist.rb'
11
11
  require 'connector.rb'
12
-
13
- class Movie
12
+
13
+ class Nicos::Movie
14
14
  def initialize(video_id)
15
15
  @video_id = video_id
16
16
  @available = false
@@ -23,7 +23,7 @@ class Movie
23
23
  # 指定されたマイリストに自分が入っていれば、真を返す。
24
24
  def isBelongsTo (mylistId, &block)
25
25
  isBelongs = false
26
- thisMl = Mylist.new(mylistId)
26
+ thisMl = Nicos::Mylist.new(mylistId)
27
27
  thisMl.getInfoLt
28
28
 
29
29
  thisMl.movies.each { |movie|
@@ -76,8 +76,7 @@ class Movie
76
76
  }
77
77
 
78
78
  sMylistIdAry.each { |mylistId|
79
- puts mylistId
80
- mlObjAry.push( Mylist.new(mylistId) )
79
+ mlObjAry.push( Nicos::Mylist.new(mylistId) )
81
80
  }
82
81
 
83
82
  puts "\sDiscern logic terminated."
@@ -105,7 +104,7 @@ class Movie
105
104
  end
106
105
 
107
106
  def getInfo
108
- con = GetThumbInfoConnector.new()
107
+ con = Nicos::Connector::GetThumbInfo.new()
109
108
  host = 'ext.nicovideo.jp'
110
109
  entity = '/api/getthumbinfo/' + @video_id
111
110
  con.setWait(nil)
@@ -114,8 +113,9 @@ class Movie
114
113
  if
115
114
  result["order"] == "success"
116
115
  then
117
- parsed = NicoParser.getThumbInfo(result["body"])
116
+ parsed = Nicos::Parser::getThumbInfo(result["body"])
118
117
  set(parsed)
118
+ p self
119
119
  @available = true
120
120
  else
121
121
  @available = false
@@ -9,8 +9,7 @@ require 'parser.rb'
9
9
  require 'movie.rb'
10
10
  require 'connector.rb'
11
11
 
12
-
13
- class Mylist
12
+ class Nicos::Mylist
14
13
  def initialize (mylist_id)
15
14
  @mylist_id = mylist_id
16
15
  @movies = []
@@ -57,7 +56,7 @@ class Mylist
57
56
  end
58
57
 
59
58
  def getInfo
60
- con = Connector.new('mech')
59
+ con = Nicos::Connector::Html.new('mech')
61
60
  reqUrl = 'http://www.nicovideo.jp' +
62
61
  '/mylist/' + @mylist_id.to_s
63
62
  con.setWait(nil)
@@ -129,12 +128,12 @@ class Mylist
129
128
  mvJson = mvJson.scan(/\".{1,}/)[0]
130
129
  mvJson = mvJson.slice(0, mvJson.length - 5)
131
130
  #mvJson = mvJson.split('},{')
132
- mvJson = Unicode.unescape(mvJson).split('},{')
131
+ mvJson = Nicos::Unicode.unescape(mvJson).split('},{')
133
132
 
134
133
  mvJson.each { |e|
135
134
  e = "{" + e + "}"
136
135
  param = JSON.parse(e)
137
- movie = Movie.new(param['item_data']['video_id'])
136
+ movie = Nicos::Movie.new(param['item_data']['video_id'])
138
137
  movie.set(param)
139
138
 
140
139
  @movies.push(movie)
@@ -142,7 +141,7 @@ class Mylist
142
141
  end
143
142
 
144
143
  def getInfoLt
145
- con = MylistAtomConnector.new()
144
+ con = Nicos::Connector::MylistAtom.new()
146
145
  host = 'www.nicovideo.jp'
147
146
  puts @mylist_id
148
147
  entity = '/mylist/' + @mylist_id.to_s + '?rss=atom&numbers=1'
@@ -152,17 +151,18 @@ class Mylist
152
151
  if
153
152
  result["order"] == "success"
154
153
  then
155
- parsed = NicoParser.mylistRss(result["body"])
154
+ parsed = Nicos::Parser::mylistAtom(result["body"])
156
155
 
157
156
  parsed["entry"].each { |e|
158
- movie = Movie.new(e["video_id"])
157
+ movie = Nicos::Movie.new(e["video_id"])
159
158
  e["available"] = true
160
159
  movie.set(e)
161
160
  @movies.push(movie)
162
161
  }
163
-
164
- set(parsed["mylist"])
162
+
165
163
  @available = true
164
+ set(parsed["mylist"])
165
+ p self
166
166
  else
167
167
  @available = false
168
168
  end
@@ -220,4 +220,4 @@ class Mylist
220
220
  attr_accessor :author
221
221
 
222
222
  attr_accessor :movies
223
- end
223
+ end
@@ -0,0 +1,9 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.unshift File.dirname(__FILE__) + "/../lib"
3
+
4
+ module Nicos
5
+ end
6
+
7
+ require 'movie.rb'
8
+ require 'mylist.rb'
9
+ require 'searcher.rb'
@@ -6,9 +6,7 @@ require 'xml'
6
6
  require 'time'
7
7
  require 'converter.rb'
8
8
 
9
- module NicoParser
10
- public
11
-
9
+ module Nicos::Parser
12
10
  def getThumbInfo(xml)
13
11
  doc = XML::Reader.string(
14
12
  xml,
@@ -37,7 +35,7 @@ module NicoParser
37
35
  when "first_retrieve"
38
36
  label = doc.name
39
37
  doc.read
40
- parsed[label] = Convert.iso8601ToUnix(doc.value)
38
+ parsed[label] = Nicos::Converter.iso8601ToUnix(doc.value)
41
39
  when "length"
42
40
  doc.read
43
41
  lengthStr = doc.value.split(/\:/)
@@ -60,7 +58,7 @@ module NicoParser
60
58
  parsed
61
59
  end
62
60
 
63
- def tagRss(xml)
61
+ def tagAtom(xml)
64
62
  doc = XML::Reader.string(
65
63
  xml,
66
64
  :options => XML::Parser::Options::NOBLANKS |
@@ -85,7 +83,7 @@ module NicoParser
85
83
  when "published", "updated"
86
84
  label = doc.name
87
85
  doc.read
88
- parsed[n][label] = Convert.iso8601ToUnix(doc.value)
86
+ parsed[n][label] = Nicos::Converter.iso8601ToUnix(doc.value)
89
87
  when "p"
90
88
  doc.move_to_attribute("class")
91
89
  case doc.value
@@ -119,7 +117,7 @@ module NicoParser
119
117
  parsed
120
118
  end
121
119
 
122
- def mylistRss(xml)
120
+ def mylistAtom(xml)
123
121
  doc = XML::Reader.string(
124
122
  xml,
125
123
  :options => XML::Parser::Options::NOBLANKS |
@@ -150,7 +148,7 @@ module NicoParser
150
148
  if n != -1
151
149
  doc.move_to_attribute("href")
152
150
  parsed["entry"][n]["video_id"] =
153
- Extract.videoId(doc.value)
151
+ Nicos::Extractor.videoId(doc.value)
154
152
  end
155
153
  when "subtitle"
156
154
  doc.read
@@ -159,16 +157,16 @@ module NicoParser
159
157
  if n == -1
160
158
  doc.read
161
159
  parsed["mylist"]["mylist_id"] =
162
- Extract.mylistId(doc.value)
160
+ Nicos::Extractor.mylistId(doc.value)
163
161
  else
164
162
  doc.read
165
163
  parsed["entry"][n]["item_id"] =
166
- Extract.itemId(doc.value)
164
+ Nicos::Extractor.itemId(doc.value)
167
165
  end
168
166
  when "updated"
169
167
  doc.read
170
168
  parsed["mylist"]["updated"] =
171
- Convert.iso8601ToUnix(doc.value)
169
+ Nicos::Converter.iso8601ToUnix(doc.value)
172
170
  when "name"
173
171
  doc.read
174
172
  parsed["mylist"]["author"] = doc.value
@@ -220,7 +218,7 @@ module NicoParser
220
218
  parsed
221
219
  end
222
220
 
223
- module_function :tagRss
224
- module_function :mylistRss
221
+ module_function :tagAtom
222
+ module_function :mylistAtom
225
223
  module_function :getThumbInfo
226
224
  end
@@ -10,147 +10,148 @@ require 'kconv'
10
10
 
11
11
  require 'parser.rb'
12
12
 
13
+ module Nicos::Searcher
14
+ class ByTagSuper
15
+ private
13
16
 
14
- class SearchByTagSuper
15
- private
17
+ def get(tag, sort, page, method, waitObj)
18
+ paramAry = []
19
+
20
+ case sort
21
+ when 'comment_new'
22
+ sortStr = ''
23
+ when 'comment_old'
24
+ sortStr = 'order=a'
25
+ when 'view_many'
26
+ sortStr = 'sort=v'
27
+ when 'view_few'
28
+ sortStr = 'sort=v&order=a'
29
+ when 'comment_many'
30
+ sortStr = 'sort=r'
31
+ when 'comment_few'
32
+ sortStr = 'sort=r&order=a'
33
+ when 'mylist_many'
34
+ sortStr = 'sort=m'
35
+ when 'mylist_few'
36
+ sortStr = 'sort=m&order=a'
37
+ when 'post_new'
38
+ sortStr = 'sort=f'
39
+ when 'post_old'
40
+ sortStr = 'sort=f&order=a'
41
+ when 'length_long'
42
+ sortStr = 'sort=l'
43
+ when 'length_short'
44
+ sortStr = 'sort=l&order=a'
45
+ end
46
+
47
+ paramAry.push("page=#{page}") if page != 1
48
+ paramAry.push(sortStr)
49
+ if method == "atom" then paramAry.push("rss=atom&numbers=1") end
50
+ param = tag + "?" + paramAry.join('&')
51
+
52
+ host = 'www.nicovideo.jp'
53
+ entity = '/tag/' + param
16
54
 
17
- def get(tag, sort, page, method, waitObj)
18
- paramAry = []
19
-
20
- case sort
21
- when 'comment_new'
22
- sortStr = ''
23
- when 'comment_old'
24
- sortStr = 'order=a'
25
- when 'view_many'
26
- sortStr = 'sort=v'
27
- when 'view_few'
28
- sortStr = 'sort=v&order=a'
29
- when 'comment_many'
30
- sortStr = 'sort=r'
31
- when 'comment_few'
32
- sortStr = 'sort=r&order=a'
33
- when 'mylist_many'
34
- sortStr = 'sort=m'
35
- when 'mylist_few'
36
- sortStr = 'sort=m&order=a'
37
- when 'post_new'
38
- sortStr = 'sort=f'
39
- when 'post_old'
40
- sortStr = 'sort=f&order=a'
41
- when 'length_long'
42
- sortStr = 'sort=l'
43
- when 'length_short'
44
- sortStr = 'sort=l&order=a'
55
+ @connector.setWait(waitObj)
56
+ @connector.get(host, entity)
45
57
  end
46
-
47
- paramAry.push("page=#{page}") if page != 1
48
- paramAry.push(sortStr)
49
- if method == "atom" then paramAry.push("rss=atom&numbers=1") end
50
- param = tag + "?" + paramAry.join('&')
51
-
52
- host = 'www.nicovideo.jp'
53
- entity = '/tag/' + param
54
58
 
55
- @connector.setWait(waitObj)
56
- @connector.get(host, entity)
57
- end
58
-
59
- def loop(tag, sort, method, waitObj, &block)
60
- termFlag = false
61
- page = 1
62
- movieObjAry = []
63
-
64
- begin
65
- response = get(
66
- tag,
67
- sort,
68
- page,
69
- method,
70
- waitObj
71
- )
59
+ def loop(tag, sort, method, waitObj, &block)
60
+ termFlag = false
61
+ page = 1
62
+ movieObjAry = []
63
+
64
+ begin
65
+ response = get(
66
+ tag,
67
+ sort,
68
+ page,
69
+ method,
70
+ waitObj
71
+ )
72
72
 
73
- if response["order"] == "success"
74
- result = parse(response["body"])
75
- result.each { |each|
76
- movie = Movie.new(each["video_id"])
77
- each["available"] = true
78
- movie.set(each)
79
- movieObjAry.push(movie)
80
- }
73
+ if response["order"] == "success"
74
+ result = parse(response["body"])
75
+ result.each { |each|
76
+ movie = Nicos::Movie.new(each["video_id"])
77
+ each["available"] = true
78
+ movie.set(each)
79
+ movieObjAry.push(movie)
80
+ }
81
81
 
82
- termFlag = block.call(movieObjAry, page)
83
- else
84
- termFlag = true
85
- end
86
-
87
- page += 1
88
- end until termFlag
82
+ termFlag = block.call(movieObjAry, page)
83
+ else
84
+ termFlag = true
85
+ end
86
+
87
+ page += 1
88
+ end until termFlag
89
+ end
89
90
  end
90
- end
91
91
 
92
- class SearchByTag < SearchByTagSuper
93
- def initialize
94
- @numOfSearched = 32
95
- @incrAmt = 0.2
92
+ class ByTag < ByTagSuper
93
+ def initialize
94
+ @numOfSearched = 32
95
+ @incrAmt = 0.2
96
96
 
97
- @connector = Connector.new('mech')
98
-
99
- # HTML中の各パラメータの所在を示すXPath
100
- @videoIdXP = "//div[@class='uad_thumbfrm']/table/tr/td/p/a"
101
- @lengthXP = "//div[@class='uad_thumbfrm']/table/tr/td/p[2]/span"
102
- @viewXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[1]/strong"
103
- @resXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[2]/strong"
104
- @mylistXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[3]/a/strong"
105
- @adXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[4]/a/strong"
106
- end
107
-
108
- def parse(movieNum)
109
- result = []
110
-
111
- video_id = /(sm|nm)[0-9]{1,}/.match(@connector.mech.page.search(@videoIdXP)[movieNum]['href'])[0]
112
- lengthStr = @connector.mech.page.search(@lengthXP)[movieNum].text.split(/\:/)
113
- length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
114
- view = @connector.mech.page.search(@viewXP)[movieNum]
115
- .text.gsub(/\,/, '').to_i
116
- res = @connector.mech.page.search(@resXP)[movieNum]
117
- .text.gsub(/\,/, '').to_i
118
- mylist = @connector.mech.page.search(@mylistXP)[movieNum]
119
- .text.gsub(/\,/, '').to_i
120
- ad = @connector.mech.page.search(@adXP)[movieNum]
121
- .text.gsub(/\,/, '').to_i
97
+ @connector = Nicos::Connector.new('mech')
98
+
99
+ # HTML中の各パラメータの所在を示すXPath
100
+ @videoIdXP = "//div[@class='uad_thumbfrm']/table/tr/td/p/a"
101
+ @lengthXP = "//div[@class='uad_thumbfrm']/table/tr/td/p[2]/span"
102
+ @viewXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[1]/strong"
103
+ @resXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[2]/strong"
104
+ @mylistXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[3]/a/strong"
105
+ @adXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[4]/a/strong"
106
+ end
107
+
108
+ def parse(movieNum)
109
+ result = []
110
+
111
+ video_id = /(sm|nm)[0-9]{1,}/.match(@connector.mech.page.search(@videoIdXP)[movieNum]['href'])[0]
112
+ lengthStr = @connector.mech.page.search(@lengthXP)[movieNum].text.split(/\:/)
113
+ length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
114
+ view = @connector.mech.page.search(@viewXP)[movieNum]
115
+ .text.gsub(/\,/, '').to_i
116
+ res = @connector.mech.page.search(@resXP)[movieNum]
117
+ .text.gsub(/\,/, '').to_i
118
+ mylist = @connector.mech.page.search(@mylistXP)[movieNum]
119
+ .text.gsub(/\,/, '').to_i
120
+ ad = @connector.mech.page.search(@adXP)[movieNum]
121
+ .text.gsub(/\,/, '').to_i
122
122
 
123
- result.push({
124
- "video_id" => video_id,
125
- "length" => length,
126
- "view" => view,
127
- "res" => res,
128
- "mylist" => mylist,
129
- "ad" => ad
130
- })
131
- end
132
-
133
- def execute(tag, sort, waitObj, &block)
134
- loop(tag, sort, "mech", waitObj) { |result, page|
135
- block.call(result, page)
136
- }
123
+ result.push({
124
+ "video_id" => video_id,
125
+ "length" => length,
126
+ "view" => view,
127
+ "res" => res,
128
+ "mylist" => mylist,
129
+ "ad" => ad
130
+ })
131
+ end
132
+
133
+ def execute(tag, sort, waitObj, &block)
134
+ loop(tag, sort, "mech", waitObj) { |result, page|
135
+ block.call(result, page)
136
+ }
137
+ end
137
138
  end
138
- end
139
139
 
140
- class SearchByTagLt < SearchByTagSuper
141
- def initialize
142
- @numOfSearched = 32
143
- @incrAmt = 0.2
144
- @connector = SearchByTagAtomConnector.new()
145
- end
140
+ class ByTagLt < ByTagSuper
141
+ def initialize
142
+ @numOfSearched = 32
143
+ @incrAmt = 0.2
144
+ @connector = Nicos::Connector::TagAtom.new()
145
+ end
146
146
 
147
- def parse(xml)
148
- NicoParser.tagRss(xml)
149
- end
150
-
151
- def execute(tag, sort, waitObj, &block)
152
- loop(tag, sort, "atom", waitObj) { |result, page|
153
- block.call(result, page)
154
- }
147
+ def parse(xml)
148
+ Nicos::Parser.tagAtom(xml)
149
+ end
150
+
151
+ def execute(tag, sort, waitObj, &block)
152
+ loop(tag, sort, "atom", waitObj) { |result, page|
153
+ block.call(result, page)
154
+ }
155
+ end
155
156
  end
156
157
  end