nicoscraper 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  require 'xml'
3
3
  require 'time'
4
4
 
5
- module Convert
5
+ module Nicos::Converter
6
6
  def iso8601ToUnix(str)
7
7
  Time.strptime(str, "%Y-%m-%dT%H:%M:%S").to_i
8
8
  end
@@ -30,7 +30,7 @@ module Convert
30
30
  module_function :commaRemover
31
31
  end
32
32
 
33
- module Extract
33
+ module Nicos::Extractor
34
34
  def mylistId(str)
35
35
  /(mylist\/)([0-9]{1,})/ =~ str
36
36
  $2.to_i
@@ -50,7 +50,7 @@ module Extract
50
50
  module_function :videoId
51
51
  end
52
52
 
53
- module Unicode
53
+ module Nicos::Unicode
54
54
  def escape(str)
55
55
  ary = str.unpack("U*").map!{|i| "\\u#{i.to_s(16)}"}
56
56
  ary.join
@@ -9,8 +9,8 @@ require 'kconv'
9
9
  require 'parser.rb'
10
10
  require 'mylist.rb'
11
11
  require 'connector.rb'
12
-
13
- class Movie
12
+
13
+ class Nicos::Movie
14
14
  def initialize(video_id)
15
15
  @video_id = video_id
16
16
  @available = false
@@ -23,7 +23,7 @@ class Movie
23
23
  # 指定されたマイリストに自分が入っていれば、真を返す。
24
24
  def isBelongsTo (mylistId, &block)
25
25
  isBelongs = false
26
- thisMl = Mylist.new(mylistId)
26
+ thisMl = Nicos::Mylist.new(mylistId)
27
27
  thisMl.getInfoLt
28
28
 
29
29
  thisMl.movies.each { |movie|
@@ -76,8 +76,7 @@ class Movie
76
76
  }
77
77
 
78
78
  sMylistIdAry.each { |mylistId|
79
- puts mylistId
80
- mlObjAry.push( Mylist.new(mylistId) )
79
+ mlObjAry.push( Nicos::Mylist.new(mylistId) )
81
80
  }
82
81
 
83
82
  puts "\sDiscern logic terminated."
@@ -105,7 +104,7 @@ class Movie
105
104
  end
106
105
 
107
106
  def getInfo
108
- con = GetThumbInfoConnector.new()
107
+ con = Nicos::Connector::GetThumbInfo.new()
109
108
  host = 'ext.nicovideo.jp'
110
109
  entity = '/api/getthumbinfo/' + @video_id
111
110
  con.setWait(nil)
@@ -114,8 +113,9 @@ class Movie
114
113
  if
115
114
  result["order"] == "success"
116
115
  then
117
- parsed = NicoParser.getThumbInfo(result["body"])
116
+ parsed = Nicos::Parser::getThumbInfo(result["body"])
118
117
  set(parsed)
118
+ p self
119
119
  @available = true
120
120
  else
121
121
  @available = false
@@ -9,8 +9,7 @@ require 'parser.rb'
9
9
  require 'movie.rb'
10
10
  require 'connector.rb'
11
11
 
12
-
13
- class Mylist
12
+ class Nicos::Mylist
14
13
  def initialize (mylist_id)
15
14
  @mylist_id = mylist_id
16
15
  @movies = []
@@ -57,7 +56,7 @@ class Mylist
57
56
  end
58
57
 
59
58
  def getInfo
60
- con = Connector.new('mech')
59
+ con = Nicos::Connector::Html.new('mech')
61
60
  reqUrl = 'http://www.nicovideo.jp' +
62
61
  '/mylist/' + @mylist_id.to_s
63
62
  con.setWait(nil)
@@ -129,12 +128,12 @@ class Mylist
129
128
  mvJson = mvJson.scan(/\".{1,}/)[0]
130
129
  mvJson = mvJson.slice(0, mvJson.length - 5)
131
130
  #mvJson = mvJson.split('},{')
132
- mvJson = Unicode.unescape(mvJson).split('},{')
131
+ mvJson = Nicos::Unicode.unescape(mvJson).split('},{')
133
132
 
134
133
  mvJson.each { |e|
135
134
  e = "{" + e + "}"
136
135
  param = JSON.parse(e)
137
- movie = Movie.new(param['item_data']['video_id'])
136
+ movie = Nicos::Movie.new(param['item_data']['video_id'])
138
137
  movie.set(param)
139
138
 
140
139
  @movies.push(movie)
@@ -142,7 +141,7 @@ class Mylist
142
141
  end
143
142
 
144
143
  def getInfoLt
145
- con = MylistAtomConnector.new()
144
+ con = Nicos::Connector::MylistAtom.new()
146
145
  host = 'www.nicovideo.jp'
147
146
  puts @mylist_id
148
147
  entity = '/mylist/' + @mylist_id.to_s + '?rss=atom&numbers=1'
@@ -152,17 +151,18 @@ class Mylist
152
151
  if
153
152
  result["order"] == "success"
154
153
  then
155
- parsed = NicoParser.mylistRss(result["body"])
154
+ parsed = Nicos::Parser::mylistAtom(result["body"])
156
155
 
157
156
  parsed["entry"].each { |e|
158
- movie = Movie.new(e["video_id"])
157
+ movie = Nicos::Movie.new(e["video_id"])
159
158
  e["available"] = true
160
159
  movie.set(e)
161
160
  @movies.push(movie)
162
161
  }
163
-
164
- set(parsed["mylist"])
162
+
165
163
  @available = true
164
+ set(parsed["mylist"])
165
+ p self
166
166
  else
167
167
  @available = false
168
168
  end
@@ -220,4 +220,4 @@ class Mylist
220
220
  attr_accessor :author
221
221
 
222
222
  attr_accessor :movies
223
- end
223
+ end
@@ -0,0 +1,9 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.unshift File.dirname(__FILE__) + "/../lib"
3
+
4
+ module Nicos
5
+ end
6
+
7
+ require 'movie.rb'
8
+ require 'mylist.rb'
9
+ require 'searcher.rb'
@@ -6,9 +6,7 @@ require 'xml'
6
6
  require 'time'
7
7
  require 'converter.rb'
8
8
 
9
- module NicoParser
10
- public
11
-
9
+ module Nicos::Parser
12
10
  def getThumbInfo(xml)
13
11
  doc = XML::Reader.string(
14
12
  xml,
@@ -37,7 +35,7 @@ module NicoParser
37
35
  when "first_retrieve"
38
36
  label = doc.name
39
37
  doc.read
40
- parsed[label] = Convert.iso8601ToUnix(doc.value)
38
+ parsed[label] = Nicos::Converter.iso8601ToUnix(doc.value)
41
39
  when "length"
42
40
  doc.read
43
41
  lengthStr = doc.value.split(/\:/)
@@ -60,7 +58,7 @@ module NicoParser
60
58
  parsed
61
59
  end
62
60
 
63
- def tagRss(xml)
61
+ def tagAtom(xml)
64
62
  doc = XML::Reader.string(
65
63
  xml,
66
64
  :options => XML::Parser::Options::NOBLANKS |
@@ -85,7 +83,7 @@ module NicoParser
85
83
  when "published", "updated"
86
84
  label = doc.name
87
85
  doc.read
88
- parsed[n][label] = Convert.iso8601ToUnix(doc.value)
86
+ parsed[n][label] = Nicos::Converter.iso8601ToUnix(doc.value)
89
87
  when "p"
90
88
  doc.move_to_attribute("class")
91
89
  case doc.value
@@ -119,7 +117,7 @@ module NicoParser
119
117
  parsed
120
118
  end
121
119
 
122
- def mylistRss(xml)
120
+ def mylistAtom(xml)
123
121
  doc = XML::Reader.string(
124
122
  xml,
125
123
  :options => XML::Parser::Options::NOBLANKS |
@@ -150,7 +148,7 @@ module NicoParser
150
148
  if n != -1
151
149
  doc.move_to_attribute("href")
152
150
  parsed["entry"][n]["video_id"] =
153
- Extract.videoId(doc.value)
151
+ Nicos::Extractor.videoId(doc.value)
154
152
  end
155
153
  when "subtitle"
156
154
  doc.read
@@ -159,16 +157,16 @@ module NicoParser
159
157
  if n == -1
160
158
  doc.read
161
159
  parsed["mylist"]["mylist_id"] =
162
- Extract.mylistId(doc.value)
160
+ Nicos::Extractor.mylistId(doc.value)
163
161
  else
164
162
  doc.read
165
163
  parsed["entry"][n]["item_id"] =
166
- Extract.itemId(doc.value)
164
+ Nicos::Extractor.itemId(doc.value)
167
165
  end
168
166
  when "updated"
169
167
  doc.read
170
168
  parsed["mylist"]["updated"] =
171
- Convert.iso8601ToUnix(doc.value)
169
+ Nicos::Converter.iso8601ToUnix(doc.value)
172
170
  when "name"
173
171
  doc.read
174
172
  parsed["mylist"]["author"] = doc.value
@@ -220,7 +218,7 @@ module NicoParser
220
218
  parsed
221
219
  end
222
220
 
223
- module_function :tagRss
224
- module_function :mylistRss
221
+ module_function :tagAtom
222
+ module_function :mylistAtom
225
223
  module_function :getThumbInfo
226
224
  end
@@ -10,147 +10,148 @@ require 'kconv'
10
10
 
11
11
  require 'parser.rb'
12
12
 
13
+ module Nicos::Searcher
14
+ class ByTagSuper
15
+ private
13
16
 
14
- class SearchByTagSuper
15
- private
17
+ def get(tag, sort, page, method, waitObj)
18
+ paramAry = []
19
+
20
+ case sort
21
+ when 'comment_new'
22
+ sortStr = ''
23
+ when 'comment_old'
24
+ sortStr = 'order=a'
25
+ when 'view_many'
26
+ sortStr = 'sort=v'
27
+ when 'view_few'
28
+ sortStr = 'sort=v&order=a'
29
+ when 'comment_many'
30
+ sortStr = 'sort=r'
31
+ when 'comment_few'
32
+ sortStr = 'sort=r&order=a'
33
+ when 'mylist_many'
34
+ sortStr = 'sort=m'
35
+ when 'mylist_few'
36
+ sortStr = 'sort=m&order=a'
37
+ when 'post_new'
38
+ sortStr = 'sort=f'
39
+ when 'post_old'
40
+ sortStr = 'sort=f&order=a'
41
+ when 'length_long'
42
+ sortStr = 'sort=l'
43
+ when 'length_short'
44
+ sortStr = 'sort=l&order=a'
45
+ end
46
+
47
+ paramAry.push("page=#{page}") if page != 1
48
+ paramAry.push(sortStr)
49
+ if method == "atom" then paramAry.push("rss=atom&numbers=1") end
50
+ param = tag + "?" + paramAry.join('&')
51
+
52
+ host = 'www.nicovideo.jp'
53
+ entity = '/tag/' + param
16
54
 
17
- def get(tag, sort, page, method, waitObj)
18
- paramAry = []
19
-
20
- case sort
21
- when 'comment_new'
22
- sortStr = ''
23
- when 'comment_old'
24
- sortStr = 'order=a'
25
- when 'view_many'
26
- sortStr = 'sort=v'
27
- when 'view_few'
28
- sortStr = 'sort=v&order=a'
29
- when 'comment_many'
30
- sortStr = 'sort=r'
31
- when 'comment_few'
32
- sortStr = 'sort=r&order=a'
33
- when 'mylist_many'
34
- sortStr = 'sort=m'
35
- when 'mylist_few'
36
- sortStr = 'sort=m&order=a'
37
- when 'post_new'
38
- sortStr = 'sort=f'
39
- when 'post_old'
40
- sortStr = 'sort=f&order=a'
41
- when 'length_long'
42
- sortStr = 'sort=l'
43
- when 'length_short'
44
- sortStr = 'sort=l&order=a'
55
+ @connector.setWait(waitObj)
56
+ @connector.get(host, entity)
45
57
  end
46
-
47
- paramAry.push("page=#{page}") if page != 1
48
- paramAry.push(sortStr)
49
- if method == "atom" then paramAry.push("rss=atom&numbers=1") end
50
- param = tag + "?" + paramAry.join('&')
51
-
52
- host = 'www.nicovideo.jp'
53
- entity = '/tag/' + param
54
58
 
55
- @connector.setWait(waitObj)
56
- @connector.get(host, entity)
57
- end
58
-
59
- def loop(tag, sort, method, waitObj, &block)
60
- termFlag = false
61
- page = 1
62
- movieObjAry = []
63
-
64
- begin
65
- response = get(
66
- tag,
67
- sort,
68
- page,
69
- method,
70
- waitObj
71
- )
59
+ def loop(tag, sort, method, waitObj, &block)
60
+ termFlag = false
61
+ page = 1
62
+ movieObjAry = []
63
+
64
+ begin
65
+ response = get(
66
+ tag,
67
+ sort,
68
+ page,
69
+ method,
70
+ waitObj
71
+ )
72
72
 
73
- if response["order"] == "success"
74
- result = parse(response["body"])
75
- result.each { |each|
76
- movie = Movie.new(each["video_id"])
77
- each["available"] = true
78
- movie.set(each)
79
- movieObjAry.push(movie)
80
- }
73
+ if response["order"] == "success"
74
+ result = parse(response["body"])
75
+ result.each { |each|
76
+ movie = Nicos::Movie.new(each["video_id"])
77
+ each["available"] = true
78
+ movie.set(each)
79
+ movieObjAry.push(movie)
80
+ }
81
81
 
82
- termFlag = block.call(movieObjAry, page)
83
- else
84
- termFlag = true
85
- end
86
-
87
- page += 1
88
- end until termFlag
82
+ termFlag = block.call(movieObjAry, page)
83
+ else
84
+ termFlag = true
85
+ end
86
+
87
+ page += 1
88
+ end until termFlag
89
+ end
89
90
  end
90
- end
91
91
 
92
- class SearchByTag < SearchByTagSuper
93
- def initialize
94
- @numOfSearched = 32
95
- @incrAmt = 0.2
92
+ class ByTag < ByTagSuper
93
+ def initialize
94
+ @numOfSearched = 32
95
+ @incrAmt = 0.2
96
96
 
97
- @connector = Connector.new('mech')
98
-
99
- # HTML中の各パラメータの所在を示すXPath
100
- @videoIdXP = "//div[@class='uad_thumbfrm']/table/tr/td/p/a"
101
- @lengthXP = "//div[@class='uad_thumbfrm']/table/tr/td/p[2]/span"
102
- @viewXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[1]/strong"
103
- @resXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[2]/strong"
104
- @mylistXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[3]/a/strong"
105
- @adXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[4]/a/strong"
106
- end
107
-
108
- def parse(movieNum)
109
- result = []
110
-
111
- video_id = /(sm|nm)[0-9]{1,}/.match(@connector.mech.page.search(@videoIdXP)[movieNum]['href'])[0]
112
- lengthStr = @connector.mech.page.search(@lengthXP)[movieNum].text.split(/\:/)
113
- length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
114
- view = @connector.mech.page.search(@viewXP)[movieNum]
115
- .text.gsub(/\,/, '').to_i
116
- res = @connector.mech.page.search(@resXP)[movieNum]
117
- .text.gsub(/\,/, '').to_i
118
- mylist = @connector.mech.page.search(@mylistXP)[movieNum]
119
- .text.gsub(/\,/, '').to_i
120
- ad = @connector.mech.page.search(@adXP)[movieNum]
121
- .text.gsub(/\,/, '').to_i
97
+ @connector = Nicos::Connector.new('mech')
98
+
99
+ # HTML中の各パラメータの所在を示すXPath
100
+ @videoIdXP = "//div[@class='uad_thumbfrm']/table/tr/td/p/a"
101
+ @lengthXP = "//div[@class='uad_thumbfrm']/table/tr/td/p[2]/span"
102
+ @viewXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[1]/strong"
103
+ @resXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[2]/strong"
104
+ @mylistXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[3]/a/strong"
105
+ @adXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[4]/a/strong"
106
+ end
107
+
108
+ def parse(movieNum)
109
+ result = []
110
+
111
+ video_id = /(sm|nm)[0-9]{1,}/.match(@connector.mech.page.search(@videoIdXP)[movieNum]['href'])[0]
112
+ lengthStr = @connector.mech.page.search(@lengthXP)[movieNum].text.split(/\:/)
113
+ length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
114
+ view = @connector.mech.page.search(@viewXP)[movieNum]
115
+ .text.gsub(/\,/, '').to_i
116
+ res = @connector.mech.page.search(@resXP)[movieNum]
117
+ .text.gsub(/\,/, '').to_i
118
+ mylist = @connector.mech.page.search(@mylistXP)[movieNum]
119
+ .text.gsub(/\,/, '').to_i
120
+ ad = @connector.mech.page.search(@adXP)[movieNum]
121
+ .text.gsub(/\,/, '').to_i
122
122
 
123
- result.push({
124
- "video_id" => video_id,
125
- "length" => length,
126
- "view" => view,
127
- "res" => res,
128
- "mylist" => mylist,
129
- "ad" => ad
130
- })
131
- end
132
-
133
- def execute(tag, sort, waitObj, &block)
134
- loop(tag, sort, "mech", waitObj) { |result, page|
135
- block.call(result, page)
136
- }
123
+ result.push({
124
+ "video_id" => video_id,
125
+ "length" => length,
126
+ "view" => view,
127
+ "res" => res,
128
+ "mylist" => mylist,
129
+ "ad" => ad
130
+ })
131
+ end
132
+
133
+ def execute(tag, sort, waitObj, &block)
134
+ loop(tag, sort, "mech", waitObj) { |result, page|
135
+ block.call(result, page)
136
+ }
137
+ end
137
138
  end
138
- end
139
139
 
140
- class SearchByTagLt < SearchByTagSuper
141
- def initialize
142
- @numOfSearched = 32
143
- @incrAmt = 0.2
144
- @connector = SearchByTagAtomConnector.new()
145
- end
140
+ class ByTagLt < ByTagSuper
141
+ def initialize
142
+ @numOfSearched = 32
143
+ @incrAmt = 0.2
144
+ @connector = Nicos::Connector::TagAtom.new()
145
+ end
146
146
 
147
- def parse(xml)
148
- NicoParser.tagRss(xml)
149
- end
150
-
151
- def execute(tag, sort, waitObj, &block)
152
- loop(tag, sort, "atom", waitObj) { |result, page|
153
- block.call(result, page)
154
- }
147
+ def parse(xml)
148
+ Nicos::Parser.tagAtom(xml)
149
+ end
150
+
151
+ def execute(tag, sort, waitObj, &block)
152
+ loop(tag, sort, "atom", waitObj) { |result, page|
153
+ block.call(result, page)
154
+ }
155
+ end
155
156
  end
156
157
  end