nicoscraper 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/searcher.rb DELETED
@@ -1,243 +0,0 @@
1
- # -*- encoding: utf-8 -*-# -*- encoding: utf-8 -*-
2
- $:.unshift File.dirname(__FILE__)
3
-
4
- require 'rubygems'
5
- require 'ruby-debug'
6
-
7
- require 'time'
8
- require 'mechanize'
9
- require 'kconv'
10
-
11
- require 'namespace.rb'
12
- require 'parser.rb'
13
-
14
- module Nicos::Searcher
15
- # :nodocs:
16
- class ByTagSuper
17
- private
18
-
19
- def get(tag, sort, page, method, waitObj)
20
- paramAry = []
21
-
22
- case sort
23
- when 'comment_new'
24
- sortStr = ''
25
- when 'comment_old'
26
- sortStr = 'order=a'
27
- when 'view_many'
28
- sortStr = 'sort=v'
29
- when 'view_few'
30
- sortStr = 'sort=v&order=a'
31
- when 'comment_many'
32
- sortStr = 'sort=r'
33
- when 'comment_few'
34
- sortStr = 'sort=r&order=a'
35
- when 'mylist_many'
36
- sortStr = 'sort=m'
37
- when 'mylist_few'
38
- sortStr = 'sort=m&order=a'
39
- when 'post_new'
40
- sortStr = 'sort=f'
41
- when 'post_old'
42
- sortStr = 'sort=f&order=a'
43
- when 'length_long'
44
- sortStr = 'sort=l'
45
- when 'length_short'
46
- sortStr = 'sort=l&order=a'
47
- end
48
-
49
- paramAry.push("page=#{page}") if page != 1
50
- paramAry.push(sortStr)
51
- if method == "atom" then paramAry.push("rss=atom&numbers=1") end
52
- param = tag + "?" + paramAry.join('&')
53
-
54
- host = 'www.nicovideo.jp'
55
- entity = '/tag/' + param
56
-
57
- @connector.setWait(waitObj)
58
- @connector.get(host, entity)
59
- end
60
-
61
- def loop(tag, sort, method, waitObj, &block)
62
- termFlag = false
63
- page = 1
64
- movieObjAry = []
65
- order = "continue"
66
-
67
- begin
68
- response = get(
69
- tag,
70
- sort,
71
- page,
72
- method,
73
- waitObj
74
- )
75
-
76
- if response["order"] == "success"
77
- result = parse(response["body"])
78
- result.each { |each|
79
- movie = Nicos::Movie.new(each["video_id"])
80
- each["available"] = true
81
- movie.set(each)
82
- movieObjAry.push(movie)
83
- }
84
- end
85
-
86
- order = block.call(movieObjAry, page)
87
- page += 1
88
- end until order != "continue"
89
- end
90
- end
91
-
92
- class ByTagHtml < ByTagSuper
93
- def initialize
94
- @numOfSearched = 32
95
- @incrAmt = 0.2
96
-
97
- @connector = Nicos::Connector.new('mech')
98
-
99
- # HTML中の各パラメータの所在を示すXPath
100
- @videoIdXP = "//div[@class='uad_thumbfrm']/table/tr/td/p/a"
101
- @lengthXP = "//div[@class='uad_thumbfrm']/table/tr/td/p[2]/span"
102
- @viewXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[1]/strong"
103
- @resXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[2]/strong"
104
- @mylistXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[3]/a/strong"
105
- @adXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[4]/a/strong"
106
- end
107
-
108
- private
109
-
110
- def parse(movieNum)
111
- result = []
112
-
113
- video_id = /(sm|nm)[0-9]{1,}/.match(@connector.mech.page.search(@videoIdXP)[movieNum]['href'])[0]
114
- lengthStr = @connector.mech.page.search(@lengthXP)[movieNum].text.split(/\:/)
115
- length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
116
- view = @connector.mech.page.search(@viewXP)[movieNum]
117
- .text.gsub(/\,/, '').to_i
118
- res = @connector.mech.page.search(@resXP)[movieNum]
119
- .text.gsub(/\,/, '').to_i
120
- mylist = @connector.mech.page.search(@mylistXP)[movieNum]
121
- .text.gsub(/\,/, '').to_i
122
- ad = @connector.mech.page.search(@adXP)[movieNum]
123
- .text.gsub(/\,/, '').to_i
124
-
125
- result.push({
126
- "video_id" => video_id,
127
- "length" => length,
128
- "view" => view,
129
- "res" => res,
130
- "mylist" => mylist,
131
- "ad" => ad
132
- })
133
- end
134
-
135
- public
136
-
137
- # @param [String] tag
138
- # @param [String] sortMethod
139
- # @param [HashObj] waitConfig
140
- def execute(tag, sortMethod, waitConfig, &block)
141
- loop(tag, sort, "mech", waitObj) { |result, page|
142
- block.call(result, page)
143
- }
144
- end
145
- end
146
-
147
- class ByTag < ByTagSuper
148
- def initialize
149
- @numOfSearched = 32
150
- @incrAmt = 0.2
151
- @connector = Nicos::Connector::TagAtom.new()
152
- end
153
-
154
- private
155
-
156
- def parse(xml)
157
- Nicos::Parser.tagAtom(xml)
158
- end
159
-
160
- public
161
-
162
- # 実行
163
- #
164
- # @param [String] tag 検索したいタグ文字列
165
- # @param [String] sortMethod ソート方法
166
- #==sortMethod: ソート方法
167
- # *comment_new*
168
- # コメントが新しい順
169
- #
170
- # *comment_old*
171
- # コメントが新しい順
172
- #
173
- # *view_many*
174
- # 再生数が多い順
175
- #
176
- # *view_few*
177
- # 再生数が少ない順
178
- #
179
- # *comment_many*
180
- # コメントが多い順
181
- #
182
- # *comment_few*
183
- # コメントが少ない順
184
- #
185
- # *mylist_many*
186
- # マイリスト登録が多い順
187
- #
188
- # *mylist_few*
189
- # マイリスト登録が少ない順
190
- #
191
- # *post_new*
192
- # 登録が新しい順
193
- #
194
- # *post_old*
195
- # 登録が少ない順
196
- #
197
- # *length_long*
198
- # 再生時間が長い順
199
- #
200
- # *length_short*
201
- # 再生時間が短い順
202
- #
203
- # @param [HashObj] waitConfig ウェイト設定
204
- #==waitConfig: ウェイト設定
205
- # <b>ウェイトの変更に際しては、READMEの注意点と免責事項を事前にお読み下さい。</b>
206
- #
207
- # 以下のフォーマットのハッシュオブジェクトを与えて下さい。これはデフォルト設定です。
208
- # また、ハッシュは以下のキーを全て用意する必要はありません。
209
- # 変更したい部分のキーと値のみを持つハッシュオブジェクトを作って下さい。
210
- #
211
- # @waitConfig = {
212
- # 'seqAccLimit' => 10, # 連続してリクエストする回数
213
- # 'afterSeq' => 10, # 連続リクエスト後のウェイト(以下、単位は全て秒)
214
- # 'each' => 1, # 連続リクエスト時の、1リクエスト毎のウェイト
215
- # 'increment' => 1, # アクセス拒絶時の、次回以降の1リクエスト毎のウェイトの増加量
216
- #
217
- # 'deniedSeqReq'=> { # 連続アクセスを拒否された際の設定(以下同じ)
218
- # 'retryLimit' => 3, # 再試行の上限回数
219
- # 'wait' => 120 # 次のアクセスまでのウェイト
220
- # },
221
- #
222
- # 'serverIsBusy'=> { # サーバ混雑時
223
- # 'retryLimit' => 3,
224
- # 'wait' => 120
225
- # },
226
- #
227
- # 'serviceUnavailable' => { # 503が返ってきた時
228
- # 'retryLimit' => 3,
229
- # 'wait' => 120
230
- # },
231
- #
232
- # 'timedOut' => { # タイムアウト時
233
- # 'retryLimit' => 3,
234
- # 'wait' => 10
235
- # }
236
- # }
237
- def execute(tag, sortMethod, waitConfig, &block)
238
- loop(tag, sortMethod, "atom", waitConfig) { |result, page|
239
- block.call(result, page)
240
- }
241
- end
242
- end
243
- end
data/nicoscraper.gemspec DELETED
@@ -1,72 +0,0 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
- # -*- encoding: utf-8 -*-
5
-
6
- Gem::Specification.new do |s|
7
- s.name = %q{nicoscraper}
8
- s.version = "0.2.4"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = [%q{Masami Yonehara}]
12
- s.date = %q{2011-09-23}
13
- s.description = %q{It scrape movies and mylists of Niconico douga.
14
- }
15
- s.email = %q{zeitdiebe@gmail.com}
16
- s.extra_rdoc_files = [
17
- "LICENSE.txt",
18
- "README.md"
19
- ]
20
- s.files = [
21
- ".document",
22
- "Gemfile",
23
- "Gemfile.lock",
24
- "LICENSE.txt",
25
- "README.md",
26
- "Rakefile",
27
- "VERSION",
28
- "index.html",
29
- "lib/connector.rb",
30
- "lib/converter.rb",
31
- "lib/movie.rb",
32
- "lib/mylist.rb",
33
- "lib/namespace.rb",
34
- "lib/parser.rb",
35
- "lib/searcher.rb",
36
- "nicoscraper.gemspec",
37
- "test/movie_spec.rb"
38
- ]
39
- s.homepage = %q{http://github.com/hdemon/nicoscraper}
40
- s.licenses = [%q{MIT}]
41
- s.require_paths = [%q{lib}]
42
- s.rubygems_version = %q{1.8.8}
43
- s.summary = %q{The scraper for Niconico douga.}
44
-
45
- if s.respond_to? :specification_version then
46
- s.specification_version = 3
47
-
48
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
- s.add_runtime_dependency(%q<damerau-levenshtein>, [">= 0"])
50
- s.add_development_dependency(%q<rake>, ["= 0.8.7"])
51
- s.add_development_dependency(%q<shoulda>, [">= 0"])
52
- s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
53
- s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
54
- s.add_development_dependency(%q<rcov>, [">= 0"])
55
- else
56
- s.add_dependency(%q<damerau-levenshtein>, [">= 0"])
57
- s.add_dependency(%q<rake>, ["= 0.8.7"])
58
- s.add_dependency(%q<shoulda>, [">= 0"])
59
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
60
- s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
61
- s.add_dependency(%q<rcov>, [">= 0"])
62
- end
63
- else
64
- s.add_dependency(%q<damerau-levenshtein>, [">= 0"])
65
- s.add_dependency(%q<rake>, ["= 0.8.7"])
66
- s.add_dependency(%q<shoulda>, [">= 0"])
67
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
68
- s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
69
- s.add_dependency(%q<rcov>, [">= 0"])
70
- end
71
- end
72
-