nicoscraper 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
data/lib/searcher.rb DELETED
@@ -1,243 +0,0 @@
1
- # -*- encoding: utf-8 -*-# -*- encoding: utf-8 -*-
2
- $:.unshift File.dirname(__FILE__)
3
-
4
- require 'rubygems'
5
- require 'ruby-debug'
6
-
7
- require 'time'
8
- require 'mechanize'
9
- require 'kconv'
10
-
11
- require 'namespace.rb'
12
- require 'parser.rb'
13
-
14
- module Nicos::Searcher
15
- # :nodocs:
16
- class ByTagSuper
17
- private
18
-
19
- def get(tag, sort, page, method, waitObj)
20
- paramAry = []
21
-
22
- case sort
23
- when 'comment_new'
24
- sortStr = ''
25
- when 'comment_old'
26
- sortStr = 'order=a'
27
- when 'view_many'
28
- sortStr = 'sort=v'
29
- when 'view_few'
30
- sortStr = 'sort=v&order=a'
31
- when 'comment_many'
32
- sortStr = 'sort=r'
33
- when 'comment_few'
34
- sortStr = 'sort=r&order=a'
35
- when 'mylist_many'
36
- sortStr = 'sort=m'
37
- when 'mylist_few'
38
- sortStr = 'sort=m&order=a'
39
- when 'post_new'
40
- sortStr = 'sort=f'
41
- when 'post_old'
42
- sortStr = 'sort=f&order=a'
43
- when 'length_long'
44
- sortStr = 'sort=l'
45
- when 'length_short'
46
- sortStr = 'sort=l&order=a'
47
- end
48
-
49
- paramAry.push("page=#{page}") if page != 1
50
- paramAry.push(sortStr)
51
- if method == "atom" then paramAry.push("rss=atom&numbers=1") end
52
- param = tag + "?" + paramAry.join('&')
53
-
54
- host = 'www.nicovideo.jp'
55
- entity = '/tag/' + param
56
-
57
- @connector.setWait(waitObj)
58
- @connector.get(host, entity)
59
- end
60
-
61
- def loop(tag, sort, method, waitObj, &block)
62
- termFlag = false
63
- page = 1
64
- movieObjAry = []
65
- order = "continue"
66
-
67
- begin
68
- response = get(
69
- tag,
70
- sort,
71
- page,
72
- method,
73
- waitObj
74
- )
75
-
76
- if response["order"] == "success"
77
- result = parse(response["body"])
78
- result.each { |each|
79
- movie = Nicos::Movie.new(each["video_id"])
80
- each["available"] = true
81
- movie.set(each)
82
- movieObjAry.push(movie)
83
- }
84
- end
85
-
86
- order = block.call(movieObjAry, page)
87
- page += 1
88
- end until order != "continue"
89
- end
90
- end
91
-
92
- class ByTagHtml < ByTagSuper
93
- def initialize
94
- @numOfSearched = 32
95
- @incrAmt = 0.2
96
-
97
- @connector = Nicos::Connector.new('mech')
98
-
99
- # HTML中の各パラメータの所在を示すXPath
100
- @videoIdXP = "//div[@class='uad_thumbfrm']/table/tr/td/p/a"
101
- @lengthXP = "//div[@class='uad_thumbfrm']/table/tr/td/p[2]/span"
102
- @viewXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[1]/strong"
103
- @resXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[2]/strong"
104
- @mylistXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[3]/a/strong"
105
- @adXP = "//div[@class='uad_thumbfrm']/table/tr/td[2]/div/nobr[4]/a/strong"
106
- end
107
-
108
- private
109
-
110
- def parse(movieNum)
111
- result = []
112
-
113
- video_id = /(sm|nm)[0-9]{1,}/.match(@connector.mech.page.search(@videoIdXP)[movieNum]['href'])[0]
114
- lengthStr = @connector.mech.page.search(@lengthXP)[movieNum].text.split(/\:/)
115
- length = lengthStr[0].to_i * 60 + lengthStr[1].to_i
116
- view = @connector.mech.page.search(@viewXP)[movieNum]
117
- .text.gsub(/\,/, '').to_i
118
- res = @connector.mech.page.search(@resXP)[movieNum]
119
- .text.gsub(/\,/, '').to_i
120
- mylist = @connector.mech.page.search(@mylistXP)[movieNum]
121
- .text.gsub(/\,/, '').to_i
122
- ad = @connector.mech.page.search(@adXP)[movieNum]
123
- .text.gsub(/\,/, '').to_i
124
-
125
- result.push({
126
- "video_id" => video_id,
127
- "length" => length,
128
- "view" => view,
129
- "res" => res,
130
- "mylist" => mylist,
131
- "ad" => ad
132
- })
133
- end
134
-
135
- public
136
-
137
- # @param [String] tag
138
- # @param [String] sortMethod
139
- # @param [HashObj] waitConfig
140
- def execute(tag, sortMethod, waitConfig, &block)
141
- loop(tag, sort, "mech", waitObj) { |result, page|
142
- block.call(result, page)
143
- }
144
- end
145
- end
146
-
147
- class ByTag < ByTagSuper
148
- def initialize
149
- @numOfSearched = 32
150
- @incrAmt = 0.2
151
- @connector = Nicos::Connector::TagAtom.new()
152
- end
153
-
154
- private
155
-
156
- def parse(xml)
157
- Nicos::Parser.tagAtom(xml)
158
- end
159
-
160
- public
161
-
162
- # 実行
163
- #
164
- # @param [String] tag 検索したいタグ文字列
165
- # @param [String] sortMethod ソート方法
166
- #==sortMethod: ソート方法
167
- # *comment_new*
168
- # コメントが新しい順
169
- #
170
- # *comment_old*
171
- # コメントが新しい順
172
- #
173
- # *view_many*
174
- # 再生数が多い順
175
- #
176
- # *view_few*
177
- # 再生数が少ない順
178
- #
179
- # *comment_many*
180
- # コメントが多い順
181
- #
182
- # *comment_few*
183
- # コメントが少ない順
184
- #
185
- # *mylist_many*
186
- # マイリスト登録が多い順
187
- #
188
- # *mylist_few*
189
- # マイリスト登録が少ない順
190
- #
191
- # *post_new*
192
- # 登録が新しい順
193
- #
194
- # *post_old*
195
- # 登録が少ない順
196
- #
197
- # *length_long*
198
- # 再生時間が長い順
199
- #
200
- # *length_short*
201
- # 再生時間が短い順
202
- #
203
- # @param [HashObj] waitConfig ウェイト設定
204
- #==waitConfig: ウェイト設定
205
- # <b>ウェイトの変更に際しては、READMEの注意点と免責事項を事前にお読み下さい。</b>
206
- #
207
- # 以下のフォーマットのハッシュオブジェクトを与えて下さい。これはデフォルト設定です。
208
- # また、ハッシュは以下のキーを全て用意する必要はありません。
209
- # 変更したい部分のキーと値のみを持つハッシュオブジェクトを作って下さい。
210
- #
211
- # @waitConfig = {
212
- # 'seqAccLimit' => 10, # 連続してリクエストする回数
213
- # 'afterSeq' => 10, # 連続リクエスト後のウェイト(以下、単位は全て秒)
214
- # 'each' => 1, # 連続リクエスト時の、1リクエスト毎のウェイト
215
- # 'increment' => 1, # アクセス拒絶時の、次回以降の1リクエスト毎のウェイトの増加量
216
- #
217
- # 'deniedSeqReq'=> { # 連続アクセスを拒否された際の設定(以下同じ)
218
- # 'retryLimit' => 3, # 再試行の上限回数
219
- # 'wait' => 120 # 次のアクセスまでのウェイト
220
- # },
221
- #
222
- # 'serverIsBusy'=> { # サーバ混雑時
223
- # 'retryLimit' => 3,
224
- # 'wait' => 120
225
- # },
226
- #
227
- # 'serviceUnavailable' => { # 503が返ってきた時
228
- # 'retryLimit' => 3,
229
- # 'wait' => 120
230
- # },
231
- #
232
- # 'timedOut' => { # タイムアウト時
233
- # 'retryLimit' => 3,
234
- # 'wait' => 10
235
- # }
236
- # }
237
- def execute(tag, sortMethod, waitConfig, &block)
238
- loop(tag, sortMethod, "atom", waitConfig) { |result, page|
239
- block.call(result, page)
240
- }
241
- end
242
- end
243
- end
data/nicoscraper.gemspec DELETED
@@ -1,72 +0,0 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
- # -*- encoding: utf-8 -*-
5
-
6
- Gem::Specification.new do |s|
7
- s.name = %q{nicoscraper}
8
- s.version = "0.2.4"
9
-
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = [%q{Masami Yonehara}]
12
- s.date = %q{2011-09-23}
13
- s.description = %q{It scrape movies and mylists of Niconico douga.
14
- }
15
- s.email = %q{zeitdiebe@gmail.com}
16
- s.extra_rdoc_files = [
17
- "LICENSE.txt",
18
- "README.md"
19
- ]
20
- s.files = [
21
- ".document",
22
- "Gemfile",
23
- "Gemfile.lock",
24
- "LICENSE.txt",
25
- "README.md",
26
- "Rakefile",
27
- "VERSION",
28
- "index.html",
29
- "lib/connector.rb",
30
- "lib/converter.rb",
31
- "lib/movie.rb",
32
- "lib/mylist.rb",
33
- "lib/namespace.rb",
34
- "lib/parser.rb",
35
- "lib/searcher.rb",
36
- "nicoscraper.gemspec",
37
- "test/movie_spec.rb"
38
- ]
39
- s.homepage = %q{http://github.com/hdemon/nicoscraper}
40
- s.licenses = [%q{MIT}]
41
- s.require_paths = [%q{lib}]
42
- s.rubygems_version = %q{1.8.8}
43
- s.summary = %q{The scraper for Niconico douga.}
44
-
45
- if s.respond_to? :specification_version then
46
- s.specification_version = 3
47
-
48
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
- s.add_runtime_dependency(%q<damerau-levenshtein>, [">= 0"])
50
- s.add_development_dependency(%q<rake>, ["= 0.8.7"])
51
- s.add_development_dependency(%q<shoulda>, [">= 0"])
52
- s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
53
- s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
54
- s.add_development_dependency(%q<rcov>, [">= 0"])
55
- else
56
- s.add_dependency(%q<damerau-levenshtein>, [">= 0"])
57
- s.add_dependency(%q<rake>, ["= 0.8.7"])
58
- s.add_dependency(%q<shoulda>, [">= 0"])
59
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
60
- s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
61
- s.add_dependency(%q<rcov>, [">= 0"])
62
- end
63
- else
64
- s.add_dependency(%q<damerau-levenshtein>, [">= 0"])
65
- s.add_dependency(%q<rake>, ["= 0.8.7"])
66
- s.add_dependency(%q<shoulda>, [">= 0"])
67
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
68
- s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
69
- s.add_dependency(%q<rcov>, [">= 0"])
70
- end
71
- end
72
-