list_spider 2.3.0 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/setup CHANGED
@@ -1,8 +1,8 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
- set -vx
5
-
6
- bundle install
7
-
8
- # Do any other automated setup that you need to do here
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -1,3 +1,3 @@
1
- #!/bin/sh
2
-
1
+ #!/bin/sh
2
+
3
3
  rubocop -a -D -f simple -o rubocopresult
@@ -1,72 +1,72 @@
1
-
2
- class FileFilter
3
- # 4033
4
- # 920
5
- def initialize(dir_pattern, size_threshold: 1000,
6
- cust_judge: nil, process_block: nil)
7
- @dir_pattern = dir_pattern
8
- @size_threshold = size_threshold
9
- @cust_judge = cust_judge ? cust_judge : method(:default_judge)
10
- @total = 0
11
- @process_block = process_block
12
- end
13
-
14
- def default_judge(f)
15
- File.size(f) <= @size_threshold
16
- end
17
-
18
- def filter_file(f)
19
- if @cust_judge.call(f)
20
- @total += 1
21
- @process_block.call(f)
22
- end
23
- end
24
-
25
- def start
26
- Dir.glob(@dir_pattern) do |f|
27
- filter_file(f)
28
- end
29
- puts "total:#{@total}"
30
- end
31
-
32
- def self.delete(dir_pattern, size_threshold: 1000, cust_judge: nil)
33
- FileFilter.new(
34
- dir_pattern,
35
- size_threshold: size_threshold,
36
- cust_judge: cust_judge,
37
- process_block:
38
- proc do |f|
39
- puts "deleted file: #{f}"
40
- File.delete(f)
41
- end
42
- ).start
43
- end
44
-
45
- def self.check(dir_pattern, size_threshold: 1000, cust_judge: nil)
46
- FileFilter.new(
47
- dir_pattern,
48
- size_threshold: size_threshold,
49
- cust_judge: cust_judge,
50
- process_block:
51
- proc do |f|
52
- puts "filterd file: #{f}"
53
- end
54
- ).start
55
- end
56
-
57
- def self.check_save_result(dir_pattern, save_file_name: 'filtered_file.txt',
58
- size_threshold: 1000, cust_judge: nil)
59
- result_file = File.open(save_file_name, 'wt')
60
- FileFilter.new(
61
- dir_pattern,
62
- size_threshold: size_threshold,
63
- cust_judge: cust_judge,
64
- process_block:
65
- proc do |f|
66
- puts "filterd file: #{f}"
67
- result_file << f << "\n"
68
- end
69
- ).start
70
- result_file.close
71
- end
72
- end
1
+
2
+ class FileFilter
3
+ # 4033
4
+ # 920
5
+ def initialize(dir_pattern, size_threshold: 1000,
6
+ cust_judge: nil, process_block: nil)
7
+ @dir_pattern = dir_pattern
8
+ @size_threshold = size_threshold
9
+ @cust_judge = cust_judge ? cust_judge : method(:default_judge)
10
+ @total = 0
11
+ @process_block = process_block
12
+ end
13
+
14
+ def default_judge(f)
15
+ File.size(f) <= @size_threshold
16
+ end
17
+
18
+ def filter_file(f)
19
+ if @cust_judge.call(f)
20
+ @total += 1
21
+ @process_block.call(f)
22
+ end
23
+ end
24
+
25
+ def start
26
+ Dir.glob(@dir_pattern) do |f|
27
+ filter_file(f)
28
+ end
29
+ puts "total:#{@total}"
30
+ end
31
+
32
+ def self.delete(dir_pattern, size_threshold: 1000, cust_judge: nil)
33
+ FileFilter.new(
34
+ dir_pattern,
35
+ size_threshold: size_threshold,
36
+ cust_judge: cust_judge,
37
+ process_block:
38
+ proc do |f|
39
+ puts "deleted file: #{f}"
40
+ File.delete(f)
41
+ end
42
+ ).start
43
+ end
44
+
45
+ def self.check(dir_pattern, size_threshold: 1000, cust_judge: nil)
46
+ FileFilter.new(
47
+ dir_pattern,
48
+ size_threshold: size_threshold,
49
+ cust_judge: cust_judge,
50
+ process_block:
51
+ proc do |f|
52
+ puts "filterd file: #{f}"
53
+ end
54
+ ).start
55
+ end
56
+
57
+ def self.check_save_result(dir_pattern, save_file_name: 'filtered_file.txt',
58
+ size_threshold: 1000, cust_judge: nil)
59
+ result_file = File.open(save_file_name, 'wt')
60
+ FileFilter.new(
61
+ dir_pattern,
62
+ size_threshold: size_threshold,
63
+ cust_judge: cust_judge,
64
+ process_block:
65
+ proc do |f|
66
+ puts "filterd file: #{f}"
67
+ result_file << f << "\n"
68
+ end
69
+ ).start
70
+ result_file.close
71
+ end
72
+ end
@@ -1,297 +1,308 @@
1
- require 'list_spider/version'
2
- require 'em-http-request'
3
- require 'nokogiri'
4
- require 'fileutils'
5
- require 'set'
6
- require 'addressable/uri'
7
- require File.expand_path('spider_helper', __dir__)
8
- require File.expand_path('file_filter', __dir__)
9
-
10
- # 爬取任务类
11
- class TaskStruct
12
- # * href 请求链接
13
- # * local_path 保存数据的本地路径(此路径作为去重标准)
14
- # * http_method http方法,取值::get, :head, :delete, :put, :post, :patch, :options
15
- # * custom_data 自定义数据
16
- # * parse_method 解析保存文件的回调,参数是TaskStruct对象本身
17
- def initialize(href, # 请求链接
18
- local_path, # 保存数据的本地路径(此路径作为去重标准)
19
- # http方法,取值::get, :head, :delete, :put, :post, :patch, :options
20
- http_method: :get,
21
- custom_data: nil, # 自定义数据
22
- parse_method: nil, # 解析保存文件的回调,参数是TaskStruct对象本身
23
- # 请求成功后的回调,此时可能没有保存文件,比如301,404
24
- # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
25
- # http_req.response_header.status 状态码
26
- # http_req.response_header 返回头
27
- # http_req.response 返回体
28
- callback: nil,
29
- # 请求失败后的回调
30
- # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
31
- errback: nil,
32
- stream_callback: nil, # 流数据处理回调
33
- convert_to_utf8: false, # 是否转换为utf8编码
34
- overwrite_exist: false, # 是否覆盖现有文件
35
- # 请求设置
36
- redirects: 3, # 重定向次数
37
- keepalive: nil, # (暂不支持复用)
38
- file: nil, # 要上传的文件路径
39
- path: nil, # 请求路径,在流水线方式请求时有用(暂不支持)
40
- query: nil, # 查询字符串,可以是string或hash类型
41
- body: nil, # 请求体,可以是string或hash类型
42
- head: nil, # 请求头
43
- # 连接设置
44
- connect_timeout: 60, # 连接超时时间
45
- inactivity_timeout: nil, # 连接后超时时间
46
- # ssl设置
47
- # ssl: {
48
- # :private_key_file => '/tmp/server.key',
49
- # :cert_chain_file => '/tmp/server.crt',
50
- # :verify_peer => false
51
- # }
52
- ssl: nil,
53
- # bind: {
54
- # :host => '123.123.123.123', # use a specific interface for outbound request
55
- # :port => '123'
56
- # }
57
- bind: nil,
58
- # 代理设置
59
- # proxy: {
60
- # :host => '127.0.0.1', # proxy address
61
- # :port => 9000, # proxy port
62
- # :type => :socks5 # default proxy mode is HTTP proxy, change to :socks5 if required
63
-
64
- # :authorization => ['user', 'pass'] # proxy authorization header
65
- # }
66
- proxy: nil)
67
- @href = href
68
- @local_path = local_path
69
- @http_method = http_method
70
- @custom_data = custom_data
71
- @parse_method = parse_method
72
- @callback = callback
73
- @errback = errback
74
- @stream_callback = stream_callback
75
- @convert_to_utf8 = convert_to_utf8
76
- @overwrite_exist = overwrite_exist
77
-
78
- @request_options = {
79
- redirects: redirects,
80
- keepalive: keepalive,
81
- file: file,
82
- path: path,
83
- query: query,
84
- body: body,
85
- head: head
86
- }.compact
87
-
88
- @connection_options = {
89
- connect_timeout: connect_timeout,
90
- inactivity_timeout: inactivity_timeout,
91
- ssl: ssl,
92
- bind: bind,
93
- proxy: proxy
94
- }.compact
95
- end
96
-
97
- attr_accessor :href, :local_path,
98
- :http_method,
99
- :custom_data,
100
- :request_object,
101
- :parse_method,
102
- :callback,
103
- :errback,
104
- :stream_callback,
105
- :convert_to_utf8,
106
- :overwrite_exist,
107
- :request_options,
108
- :connection_options
109
- end
110
-
111
- module ListSpider
112
- RANDOM_TIME = -1
113
- NO_LIMIT_CONCURRENT = -1
114
- DEFAULT_CONCURRNET_MAX = 50
115
- DEFAULT_INTERVAL = 0
116
-
117
- @random_time_range = 3..10
118
- @local_path_set = Set.new
119
-
120
- class << self
121
- def get_list(down_list, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
122
- if interval.is_a? Range
123
- @random_time_range = interval
124
- interval = RANDOM_TIME
125
- end
126
-
127
- @down_list = filter_list(down_list)
128
- @interval = interval
129
- @max = max
130
- @max = @down_list.size if @max == NO_LIMIT_CONCURRENT
131
- @succeed_size = 0
132
- @failed_size = 0
133
-
134
- puts "total size:#{@down_list.size}"
135
- event_machine_start_list(next_task, method(:complete))
136
- end
137
-
138
- def get_one(task, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
139
- get_list([task], interval: interval, max: max)
140
- end
141
-
142
- def add_task(task)
143
- if task.is_a? Array
144
- need_down_list = filter_list(task)
145
- @down_list += need_down_list
146
- elsif task.is_a?TaskStruct
147
- need_down_list = filter_list([task])
148
- @down_list += need_down_list
149
- else
150
- puts "error task type:#{task.class}"
151
- end
152
- end
153
-
154
- private
155
-
156
- def event_machine_down(link_struct_list, callback = nil)
157
- failed_list = []
158
- succeed_list = []
159
- multi = EventMachine::MultiRequest.new
160
- begin_time = Time.now
161
-
162
- for_each_proc =
163
- proc do |task_struct|
164
- http_req = EventMachine::HttpRequest.new(task_struct.href, task_struct.connection_options).public_send(task_struct.http_method, task_struct.request_options)
165
- http_req.stream { |chunk| stream_callback.call(chunk) } if task_struct.stream_callback
166
- task_struct.request_object = http_req
167
-
168
- http_req.callback do
169
- s = http_req.response_header.status
170
- puts "#{Time.now}, http status code: #{s}"
171
-
172
- if s == 200
173
- local_dir = File.dirname(task_struct.local_path)
174
- FileUtils.mkdir_p(local_dir) unless Dir.exist?(local_dir)
175
- begin
176
- File.open(task_struct.local_path, 'wb') do |f|
177
- f << if @convert_to_utf8 == true
178
- SpiderHelper.to_utf8(http_req.response)
179
- else
180
- http_req.response
181
- end
182
- end
183
- call_parse_method(task_struct)
184
- succeed_list << task_struct
185
- rescue StandardError => exception
186
- puts exception
187
- end
188
- end
189
- task_struct.callback.call(task_struct, http_req) if task_struct.callback
190
- end
191
-
192
- http_req.errback do
193
- puts "#{Time.now}, #{task_struct.href}, error: #{http_req.error}"
194
-
195
- task_struct.errback.call(task_struct, http_req) if task_struct.errback
196
- end
197
-
198
- begin
199
- multi.add task_struct.local_path, http_req
200
- rescue StandardError => exception
201
- puts exception
202
- puts task_struct.href
203
- puts task_struct.local_path
204
- stop_machine
205
- end
206
- end
207
-
208
- cb =
209
- proc do
210
- end_time = Time.now
211
- puts "use time:#{end_time - begin_time} seconds"
212
- if callback.nil?
213
- stop_machine
214
- else
215
- callback.call(multi, succeed_list, failed_list)
216
- end
217
- end
218
- link_struct_list.each(&for_each_proc)
219
- multi.callback(&cb)
220
- end
221
-
222
- def stop_machine
223
- puts "success size:#{@succeed_size}"
224
- puts "failed size:#{@failed_size}"
225
- @end_time = Time.now
226
- puts "total use time:#{@end_time - @begin_time} seconds"
227
- EventMachine.stop
228
- @local_path_set.clear
229
- end
230
-
231
- def next_task
232
- @down_list.shift(@max)
233
- end
234
-
235
- def call_parse_method(task_struct)
236
- task_struct.parse_method.call(task_struct) if task_struct.parse_method
237
- end
238
-
239
- def complete(_multi, success_list, failed_list)
240
- @succeed_size += success_list.size
241
- @failed_size += failed_list.size
242
- @succeed_list.concat(success_list)
243
- @failed_list.concat(failed_list)
244
-
245
- todo = next_task
246
-
247
- if todo.empty?
248
- stop_machine
249
- else
250
- if @interval != 0
251
- if !success_list.empty? || !failed_list.empty?
252
- if @interval == RANDOM_TIME
253
- sleep(rand(@random_time_range))
254
- else
255
- sleep(@interval)
256
- end
257
- end
258
- end
259
- event_machine_down(todo, method(:complete))
260
- end
261
- end
262
-
263
- def event_machine_start_list(down_list, callback = nil)
264
- EventMachine.run do
265
- @succeed_list = []
266
- @failed_list = []
267
- @begin_time = Time.now
268
- if down_list.empty?
269
- if callback
270
- callback.call(nil, [], [])
271
- else
272
- stop_machine
273
- end
274
- else
275
- event_machine_down(down_list, callback)
276
- end
277
- end
278
- end
279
-
280
- def filter_list(down_list)
281
- need_down_list = []
282
- down_list.each do |ts|
283
- if !ts.overwrite_exist && File.exist?(ts.local_path)
284
- call_parse_method(ts)
285
- elsif @local_path_set.add?(ts.local_path)
286
- need_down_list << ts
287
- end
288
- end
289
- need_down_list
290
- end
291
- end
292
-
293
- Signal.trap('INT') do
294
- ListSpider.stop_machine
295
- exit!
296
- end
297
- end
1
+ require 'list_spider/version'
2
+ require 'em-http-request'
3
+ require 'nokogiri'
4
+ require 'fileutils'
5
+ require 'set'
6
+ require 'securerandom'
7
+ require 'addressable/uri'
8
+ require File.expand_path('spider_helper', __dir__)
9
+ require File.expand_path('file_filter', __dir__)
10
+
11
+ # 爬取任务类
12
+ class TaskStruct
13
+ # * href 请求链接
14
+ # * local_path 保存数据的本地路径(保存文件的情况下此路径作为去重标准)
15
+ # * http_method http方法,取值::get, :head, :delete, :put, :post, :patch, :options
16
+ # * custom_data 自定义数据
17
+ # * parse_method 解析保存文件的回调,参数是TaskStruct对象本身
18
+ def initialize(href, # 请求链接
19
+ local_path = :nil, # 保存数据的本地路径(保存文件的情况下此路径作为去重标准)
20
+ # http方法,取值::get, :head, :delete, :put, :post, :patch, :options
21
+ http_method: :get,
22
+ custom_data: nil, # 自定义数据
23
+ parse_method: nil, # 解析保存文件的回调,参数是TaskStruct对象本身
24
+ # 请求成功后的回调,此时可能没有保存文件,比如301,404
25
+ # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
26
+ # http_req.response_header.status 状态码
27
+ # http_req.response_header 返回头
28
+ # http_req.response 返回体
29
+ callback: nil,
30
+ # 请求失败后的回调
31
+ # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
32
+ errback: nil,
33
+ stream_callback: nil, # 流数据处理回调
34
+ convert_to_utf8: false, # 是否转换为utf8编码
35
+ overwrite_exist: false, # 是否覆盖现有文件
36
+ # 请求设置
37
+ redirects: 3, # 重定向次数
38
+ keepalive: nil, # (暂不支持复用)
39
+ file: nil, # 要上传的文件路径
40
+ path: nil, # 请求路径,在流水线方式请求时有用(暂不支持)
41
+ query: nil, # 查询字符串,可以是string或hash类型
42
+ body: nil, # 请求体,可以是string或hash类型
43
+ head: nil, # 请求头
44
+ # 连接设置
45
+ connect_timeout: 60, # 连接超时时间
46
+ inactivity_timeout: nil, # 连接后超时时间
47
+ # ssl设置
48
+ # ssl: {
49
+ # :private_key_file => '/tmp/server.key',
50
+ # :cert_chain_file => '/tmp/server.crt',
51
+ # :verify_peer => false
52
+ # }
53
+ ssl: nil,
54
+ # bind: {
55
+ # :host => '123.123.123.123', # use a specific interface for outbound request
56
+ # :port => '123'
57
+ # }
58
+ bind: nil,
59
+ # 代理设置
60
+ # proxy: {
61
+ # :host => '127.0.0.1', # proxy address
62
+ # :port => 9000, # proxy port
63
+ # :type => :socks5 # default proxy mode is HTTP proxy, change to :socks5 if required
64
+
65
+ # :authorization => ['user', 'pass'] # proxy authorization header
66
+ # }
67
+ proxy: nil)
68
+ @href = href
69
+ @local_path = local_path
70
+ @http_method = http_method
71
+ @custom_data = custom_data
72
+ @parse_method = parse_method
73
+ @callback = callback
74
+ @errback = errback
75
+ @stream_callback = stream_callback
76
+ @convert_to_utf8 = convert_to_utf8
77
+ @overwrite_exist = overwrite_exist
78
+
79
+ @request_options = {
80
+ redirects: redirects,
81
+ keepalive: keepalive,
82
+ file: file,
83
+ path: path,
84
+ query: query,
85
+ body: body,
86
+ head: head
87
+ }.compact
88
+
89
+ @connection_options = {
90
+ connect_timeout: connect_timeout,
91
+ inactivity_timeout: inactivity_timeout,
92
+ ssl: ssl,
93
+ bind: bind,
94
+ proxy: proxy
95
+ }.compact
96
+ end
97
+
98
+ attr_accessor :href, :local_path,
99
+ :http_method,
100
+ :custom_data,
101
+ :request_object,
102
+ :parse_method,
103
+ :callback,
104
+ :errback,
105
+ :stream_callback,
106
+ :convert_to_utf8,
107
+ :overwrite_exist,
108
+ :request_options,
109
+ :connection_options
110
+ end
111
+
112
+ module ListSpider
113
+ RANDOM_TIME = -1
114
+ NO_LIMIT_CONCURRENT = -1
115
+ DEFAULT_CONCURRNET_MAX = 50
116
+ DEFAULT_INTERVAL = 0
117
+
118
+ @random_time_range = 3..10
119
+ @local_path_set = Set.new
120
+ @down_list = []
121
+ @save_file = true
122
+
123
+ class << self
124
+ attr_accessor :save_file
125
+
126
+ def get_list(down_list, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
127
+ if interval.is_a? Range
128
+ @random_time_range = interval
129
+ interval = RANDOM_TIME
130
+ end
131
+
132
+ filter_list(down_list)
133
+ @interval = interval
134
+ @max = max
135
+ @max = @down_list.size if @max == NO_LIMIT_CONCURRENT
136
+ @succeed_size = 0
137
+ @failed_size = 0
138
+
139
+ puts "total size:#{@down_list.size}"
140
+ event_machine_start_list(next_task, method(:complete))
141
+ end
142
+
143
+ def get_one(task, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
144
+ get_list([task], interval: interval, max: max)
145
+ end
146
+
147
+ def add_task(task)
148
+ if task.is_a? Array
149
+ filter_list(task)
150
+ elsif task.is_a?TaskStruct
151
+ filter_list([task])
152
+ else
153
+ puts "error task type:#{task.class}"
154
+ end
155
+ end
156
+
157
+ def stop
158
+ stop_machine
159
+ end
160
+
161
+ private
162
+
163
+ def event_machine_down(link_struct_list, callback = nil)
164
+ failed_list = []
165
+ succeed_list = []
166
+ multi = EventMachine::MultiRequest.new
167
+ begin_time = Time.now
168
+
169
+ for_each_proc =
170
+ proc do |task_struct|
171
+ http_req = EventMachine::HttpRequest.new(task_struct.href, task_struct.connection_options).public_send(task_struct.http_method, task_struct.request_options)
172
+ http_req.stream { |chunk| stream_callback.call(chunk) } if task_struct.stream_callback
173
+ task_struct.request_object = http_req
174
+
175
+ http_req.callback do
176
+ s = http_req.response_header.status
177
+ puts "#{Time.now}, http status code: #{s}"
178
+
179
+ if s == 200 && @save_file
180
+ local_dir = File.dirname(task_struct.local_path)
181
+ FileUtils.mkdir_p(local_dir) unless Dir.exist?(local_dir)
182
+ begin
183
+ File.open(task_struct.local_path, 'wb') do |f|
184
+ f << if @convert_to_utf8 == true
185
+ SpiderHelper.to_utf8(http_req.response)
186
+ else
187
+ http_req.response
188
+ end
189
+ end
190
+ call_parse_method(task_struct)
191
+ succeed_list << task_struct
192
+ rescue StandardError => exception
193
+ puts exception
194
+ end
195
+ end
196
+ task_struct.callback.call(task_struct, http_req) if task_struct.callback
197
+ end
198
+
199
+ http_req.errback do
200
+ puts "#{Time.now}, #{task_struct.href}, error: #{http_req.error}"
201
+
202
+ task_struct.errback.call(task_struct, http_req) if task_struct.errback
203
+ end
204
+
205
+ begin
206
+ if @save_file
207
+ multi.add task_struct.local_path, http_req
208
+ else
209
+ multi.add SecureRandom.uuid, http_req
210
+ end
211
+ rescue StandardError => exception
212
+ puts exception
213
+ puts task_struct.href
214
+ puts task_struct.local_path
215
+ stop_machine
216
+ end
217
+ end
218
+
219
+ cb =
220
+ proc do
221
+ end_time = Time.now
222
+ puts "use time:#{end_time - begin_time} seconds"
223
+ if callback.nil?
224
+ stop_machine
225
+ else
226
+ callback.call(multi, succeed_list, failed_list)
227
+ end
228
+ end
229
+ link_struct_list.each(&for_each_proc)
230
+ multi.callback(&cb)
231
+ end
232
+
233
+ def stop_machine
234
+ puts "success size:#{@succeed_size}"
235
+ puts "failed size:#{@failed_size}"
236
+ @end_time = Time.now
237
+ puts "total use time:#{@end_time - @begin_time} seconds"
238
+ EventMachine.stop
239
+ @local_path_set.clear
240
+ end
241
+
242
+ def next_task
243
+ @down_list.shift(@max)
244
+ end
245
+
246
+ def call_parse_method(task_struct)
247
+ task_struct.parse_method.call(task_struct) if task_struct.parse_method
248
+ end
249
+
250
+ def complete(_multi, success_list, failed_list)
251
+ @succeed_size += success_list.size
252
+ @failed_size += failed_list.size
253
+ @succeed_list.concat(success_list)
254
+ @failed_list.concat(failed_list)
255
+
256
+ todo = next_task
257
+
258
+ if todo.empty?
259
+ stop_machine
260
+ else
261
+ if @interval != 0
262
+ if !success_list.empty? || !failed_list.empty?
263
+ if @interval == RANDOM_TIME
264
+ sleep(rand(@random_time_range))
265
+ else
266
+ sleep(@interval)
267
+ end
268
+ end
269
+ end
270
+ event_machine_down(todo, method(:complete))
271
+ end
272
+ end
273
+
274
+ def event_machine_start_list(down_list, callback = nil)
275
+ EventMachine.run do
276
+ @succeed_list = []
277
+ @failed_list = []
278
+ @begin_time = Time.now
279
+ if down_list.empty?
280
+ if callback
281
+ callback.call(nil, [], [])
282
+ else
283
+ stop_machine
284
+ end
285
+ else
286
+ event_machine_down(down_list, callback)
287
+ end
288
+ end
289
+ end
290
+
291
+ def filter_list(down_list)
292
+ return unless @save_file
293
+
294
+ down_list.each do |ts|
295
+ if !ts.overwrite_exist && File.exist?(ts.local_path)
296
+ call_parse_method(ts)
297
+ elsif @local_path_set.add?(ts.local_path)
298
+ @down_list << ts
299
+ end
300
+ end
301
+ end
302
+ end
303
+
304
+ Signal.trap('INT') do
305
+ ListSpider.stop_machine
306
+ exit!
307
+ end
308
+ end