list_spider 2.2.0 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/setup CHANGED
@@ -1,8 +1,8 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
- set -vx
5
-
6
- bundle install
7
-
8
- # Do any other automated setup that you need to do here
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -1,3 +1,3 @@
1
- #!/bin/sh
2
-
1
+ #!/bin/sh
2
+
3
3
  rubocop -a -D -f simple -o rubocopresult
@@ -1,72 +1,72 @@
1
-
2
- class FileFilter
3
- # 4033
4
- # 920
5
- def initialize(dir_pattern, size_threshold: 1000,
6
- cust_judge: nil, process_block: nil)
7
- @dir_pattern = dir_pattern
8
- @size_threshold = size_threshold
9
- @cust_judge = cust_judge ? cust_judge : method(:default_judge)
10
- @total = 0
11
- @process_block = process_block
12
- end
13
-
14
- def default_judge(f)
15
- File.size(f) <= @size_threshold
16
- end
17
-
18
- def filter_file(f)
19
- if @cust_judge.call(f)
20
- @total += 1
21
- @process_block.call(f)
22
- end
23
- end
24
-
25
- def start
26
- Dir.glob(@dir_pattern) do |f|
27
- filter_file(f)
28
- end
29
- puts "total:#{@total}"
30
- end
31
-
32
- def self.delete(dir_pattern, size_threshold: 1000, cust_judge: nil)
33
- FileFilter.new(
34
- dir_pattern,
35
- size_threshold: size_threshold,
36
- cust_judge: cust_judge,
37
- process_block:
38
- proc do |f|
39
- puts "deleted file: #{f}"
40
- File.delete(f)
41
- end
42
- ).start
43
- end
44
-
45
- def self.check(dir_pattern, size_threshold: 1000, cust_judge: nil)
46
- FileFilter.new(
47
- dir_pattern,
48
- size_threshold: size_threshold,
49
- cust_judge: cust_judge,
50
- process_block:
51
- proc do |f|
52
- puts "filterd file: #{f}"
53
- end
54
- ).start
55
- end
56
-
57
- def self.check_save_result(dir_pattern, save_file_name: 'filtered_file.txt',
58
- size_threshold: 1000, cust_judge: nil)
59
- result_file = File.open(save_file_name, 'wt')
60
- FileFilter.new(
61
- dir_pattern,
62
- size_threshold: size_threshold,
63
- cust_judge: cust_judge,
64
- process_block:
65
- proc do |f|
66
- puts "filterd file: #{f}"
67
- result_file << f << "\n"
68
- end
69
- ).start
70
- result_file.close
71
- end
72
- end
1
+
2
+ class FileFilter
3
+ # 4033
4
+ # 920
5
+ def initialize(dir_pattern, size_threshold: 1000,
6
+ cust_judge: nil, process_block: nil)
7
+ @dir_pattern = dir_pattern
8
+ @size_threshold = size_threshold
9
+ @cust_judge = cust_judge ? cust_judge : method(:default_judge)
10
+ @total = 0
11
+ @process_block = process_block
12
+ end
13
+
14
+ def default_judge(f)
15
+ File.size(f) <= @size_threshold
16
+ end
17
+
18
+ def filter_file(f)
19
+ if @cust_judge.call(f)
20
+ @total += 1
21
+ @process_block.call(f)
22
+ end
23
+ end
24
+
25
+ def start
26
+ Dir.glob(@dir_pattern) do |f|
27
+ filter_file(f)
28
+ end
29
+ puts "total:#{@total}"
30
+ end
31
+
32
+ def self.delete(dir_pattern, size_threshold: 1000, cust_judge: nil)
33
+ FileFilter.new(
34
+ dir_pattern,
35
+ size_threshold: size_threshold,
36
+ cust_judge: cust_judge,
37
+ process_block:
38
+ proc do |f|
39
+ puts "deleted file: #{f}"
40
+ File.delete(f)
41
+ end
42
+ ).start
43
+ end
44
+
45
+ def self.check(dir_pattern, size_threshold: 1000, cust_judge: nil)
46
+ FileFilter.new(
47
+ dir_pattern,
48
+ size_threshold: size_threshold,
49
+ cust_judge: cust_judge,
50
+ process_block:
51
+ proc do |f|
52
+ puts "filterd file: #{f}"
53
+ end
54
+ ).start
55
+ end
56
+
57
+ def self.check_save_result(dir_pattern, save_file_name: 'filtered_file.txt',
58
+ size_threshold: 1000, cust_judge: nil)
59
+ result_file = File.open(save_file_name, 'wt')
60
+ FileFilter.new(
61
+ dir_pattern,
62
+ size_threshold: size_threshold,
63
+ cust_judge: cust_judge,
64
+ process_block:
65
+ proc do |f|
66
+ puts "filterd file: #{f}"
67
+ result_file << f << "\n"
68
+ end
69
+ ).start
70
+ result_file.close
71
+ end
72
+ end
@@ -1,297 +1,297 @@
1
- require 'list_spider/version'
2
- require 'em-http-request'
3
- require 'nokogiri'
4
- require 'fileutils'
5
- require 'set'
6
- require 'addressable/uri'
7
- require File.expand_path('spider_helper', __dir__)
8
- require File.expand_path('file_filter', __dir__)
9
-
10
- # 爬取任务类
11
- class TaskStruct
12
- # * href 请求链接
13
- # * local_path 保存数据的本地路径(此路径作为去重标准)
14
- # * http_method http方法,取值::get, :head, :delete, :put, :post, :patch, :options
15
- # * custom_data 自定义数据
16
- # * parse_method 解析保存文件的回调,参数是TaskStruct对象本身
17
- def initialize(href, # 请求链接
18
- local_path, # 保存数据的本地路径(此路径作为去重标准)
19
- # http方法,取值::get, :head, :delete, :put, :post, :patch, :options
20
- http_method: :get,
21
- custom_data: nil, # 自定义数据
22
- parse_method: nil, # 解析保存文件的回调,参数是TaskStruct对象本身
23
- # 请求成功后的回调,此时可能没有保存文件,比如301,404
24
- # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
25
- # http_req.response_header.status 状态码
26
- # http_req.response_header 返回头
27
- # http_req.response 返回体
28
- callback: nil,
29
- # 请求失败后的回调
30
- # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
31
- errback: nil,
32
- stream_callback: nil, # 流数据处理回调
33
- convert_to_utf8: false, # 是否转换为utf8编码
34
- overwrite_exist: false, # 是否覆盖现有文件
35
- # 请求设置
36
- redirects: 3, # 重定向次数
37
- keepalive: nil, # (暂不支持复用)
38
- file: nil, # 要上传的文件路径
39
- path: nil, # 请求路径,在流水线方式请求时有用(暂不支持)
40
- query: nil, # 查询字符串,可以是string或hash类型
41
- body: nil, # 请求体,可以是string或hash类型
42
- head: nil, # 请求头
43
- # 连接设置
44
- connect_timeout: 60, # 连接超时时间
45
- inactivity_timeout: nil, # 连接后超时时间
46
- # ssl设置
47
- # ssl: {
48
- # :private_key_file => '/tmp/server.key',
49
- # :cert_chain_file => '/tmp/server.crt',
50
- # :verify_peer => false
51
- # }
52
- ssl: nil,
53
- # bind: {
54
- # :host => '123.123.123.123', # use a specific interface for outbound request
55
- # :port => '123'
56
- # }
57
- bind: nil,
58
- # 代理设置
59
- # proxy: {
60
- # :host => '127.0.0.1', # proxy address
61
- # :port => 9000, # proxy port
62
- # :type => :socks5 # default proxy mode is HTTP proxy, change to :socks5 if required
63
-
64
- # :authorization => ['user', 'pass'] # proxy authorization header
65
- # }
66
- proxy: nil)
67
- @href = href
68
- @local_path = local_path
69
- @http_method = http_method
70
- @custom_data = custom_data
71
- @parse_method = parse_method
72
- @callback = callback
73
- @errback = errback
74
- @stream_callback = stream_callback
75
- @convert_to_utf8 = convert_to_utf8
76
- @overwrite_exist = overwrite_exist
77
-
78
- @request_options = {
79
- redirects: redirects,
80
- keepalive: keepalive,
81
- file: file,
82
- path: path,
83
- query: query,
84
- body: body,
85
- head: head
86
- }.compact
87
-
88
- @connection_options = {
89
- connect_timeout: connect_timeout,
90
- inactivity_timeout: inactivity_timeout,
91
- ssl: ssl,
92
- bind: bind,
93
- proxy: proxy
94
- }.compact
95
- end
96
-
97
- attr_accessor :href, :local_path,
98
- :http_method,
99
- :custom_data,
100
- :request_object,
101
- :parse_method,
102
- :callback,
103
- :errback,
104
- :stream_callback,
105
- :convert_to_utf8,
106
- :overwrite_exist,
107
- :request_options,
108
- :connection_options
109
- end
110
-
111
- module ListSpider
112
- RANDOM_TIME = -1
113
- NO_LIMIT_CONCURRENT = -1
114
- DEFAULT_CONCURRNET_MAX = 50
115
- DEFAULT_INTERVAL = 0
116
-
117
- @random_time_range = 3..10
118
- @local_path_set = Set.new
119
-
120
- class << self
121
- def get_list(down_list, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
122
- if interval.is_a? Range
123
- @random_time_range = interval
124
- interval = RANDOM_TIME
125
- end
126
-
127
- @down_list = filter_list(down_list)
128
- @interval = interval
129
- @max = max
130
- @max = @down_list.size if @max == NO_LIMIT_CONCURRENT
131
- @succeed_size = 0
132
- @failed_size = 0
133
-
134
- puts "total size:#{@down_list.size}"
135
- event_machine_start_list(next_task, method(:complete))
136
- end
137
-
138
- def get_one(task, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
139
- get_list([task], interval: interval, max: max)
140
- end
141
-
142
- def add_task(task)
143
- if task.is_a? Array
144
- need_down_list = filter_list(task)
145
- @down_list += need_down_list
146
- elsif task.is_a?TaskStruct
147
- need_down_list = filter_list([task])
148
- @down_list += need_down_list
149
- else
150
- puts "error task type:#{task.class}"
151
- end
152
- end
153
-
154
- private
155
-
156
- def event_machine_down(link_struct_list, callback = nil)
157
- failed_list = []
158
- succeed_list = []
159
- multi = EventMachine::MultiRequest.new
160
- begin_time = Time.now
161
-
162
- for_each_proc =
163
- proc do |task_struct|
164
- http_req = EventMachine::HttpRequest.new(task_struct.href, task_struct.connection_options).public_send(task_struct.http_method, task_struct.request_options)
165
- http_req.stream { |chunk| stream_callback.call(chunk) } if task_struct.stream_callback
166
- task_struct.request_object = http_req
167
-
168
- http_req.callback do
169
- s = http_req.response_header.status
170
- puts "#{Time.now}, http status code: #{s}"
171
-
172
- if s == 200
173
- local_dir = File.dirname(task_struct.local_path)
174
- FileUtils.mkdir_p(local_dir) unless Dir.exist?(local_dir)
175
- begin
176
- File.open(task_struct.local_path, 'wb') do |f|
177
- f << if @convert_to_utf8 == true
178
- SpiderHelper.to_utf8(http_req.response)
179
- else
180
- http_req.response
181
- end
182
- end
183
- call_parse_method(task_struct)
184
- succeed_list << task_struct
185
- rescue StandardError => exception
186
- puts exception
187
- end
188
- end
189
- task_struct.callback.call(task_struct, http_req) if task_struct.callback
190
- end
191
-
192
- http_req.errback do
193
- puts "#{Time.now}, #{task_struct.href}, error: #{http_req.error}"
194
-
195
- task_struct.errback.call(task_struct, http_req) if task_struct.errback
196
- end
197
-
198
- begin
199
- multi.add task_struct.local_path, http_req
200
- rescue StandardError => exception
201
- puts exception
202
- puts task_struct.href
203
- puts task_struct.local_path
204
- stop_machine
205
- end
206
- end
207
-
208
- cb =
209
- proc do
210
- end_time = Time.now
211
- puts "use time:#{end_time - begin_time} seconds"
212
- if callback.nil?
213
- stop_machine
214
- else
215
- callback.call(multi, succeed_list, failed_list)
216
- end
217
- end
218
- link_struct_list.each(&for_each_proc)
219
- multi.callback(&cb)
220
- end
221
-
222
- def stop_machine
223
- puts "success size:#{@succeed_size}"
224
- puts "failed size:#{@failed_size}"
225
- @end_time = Time.now
226
- puts "total use time:#{@end_time - @begin_time} seconds"
227
- EventMachine.stop
228
- @local_path_set.clear
229
- end
230
-
231
- def next_task
232
- @down_list.shift(@max)
233
- end
234
-
235
- def call_parse_method(task_struct)
236
- task_struct.parse_method.call(task_struct) if task_struct.parse_method
237
- end
238
-
239
- def complete(_multi, success_list, failed_list)
240
- @succeed_size += success_list.size
241
- @failed_size += failed_list.size
242
- @succeed_list.concat(success_list)
243
- @failed_list.concat(failed_list)
244
-
245
- todo = next_task
246
-
247
- if todo.empty?
248
- stop_machine
249
- else
250
- if @interval != 0
251
- if !success_list.empty? || !failed_list.empty?
252
- if @interval == RANDOM_TIME
253
- sleep(rand(@random_time_range))
254
- else
255
- sleep(@interval)
256
- end
257
- end
258
- end
259
- event_machine_down(todo, method(:complete))
260
- end
261
- end
262
-
263
- def event_machine_start_list(down_list, callback = nil)
264
- EventMachine.run do
265
- @succeed_list = []
266
- @failed_list = []
267
- @begin_time = Time.now
268
- if down_list.empty?
269
- if callback
270
- callback.call(nil, [], [])
271
- else
272
- stop_machine
273
- end
274
- else
275
- event_machine_down(down_list, callback)
276
- end
277
- end
278
- end
279
-
280
- def filter_list(down_list)
281
- need_down_list = []
282
- down_list.each do |ts|
283
- if !ts.overwrite_exist && File.exist?(ts.local_path)
284
- call_parse_method(ts)
285
- elsif @local_path_set.add?(ts.local_path)
286
- need_down_list << ts
287
- end
288
- end
289
- need_down_list
290
- end
291
- end
292
-
293
- Signal.trap('INT') do
294
- ListSpider.stop_machine
295
- exit!
296
- end
297
- end
1
+ require 'list_spider/version'
2
+ require 'em-http-request'
3
+ require 'nokogiri'
4
+ require 'fileutils'
5
+ require 'set'
6
+ require 'addressable/uri'
7
+ require File.expand_path('spider_helper', __dir__)
8
+ require File.expand_path('file_filter', __dir__)
9
+
10
+ # 爬取任务类
11
+ class TaskStruct
12
+ # * href 请求链接
13
+ # * local_path 保存数据的本地路径(此路径作为去重标准)
14
+ # * http_method http方法,取值::get, :head, :delete, :put, :post, :patch, :options
15
+ # * custom_data 自定义数据
16
+ # * parse_method 解析保存文件的回调,参数是TaskStruct对象本身
17
+ def initialize(href, # 请求链接
18
+ local_path, # 保存数据的本地路径(此路径作为去重标准)
19
+ # http方法,取值::get, :head, :delete, :put, :post, :patch, :options
20
+ http_method: :get,
21
+ custom_data: nil, # 自定义数据
22
+ parse_method: nil, # 解析保存文件的回调,参数是TaskStruct对象本身
23
+ # 请求成功后的回调,此时可能没有保存文件,比如301,404
24
+ # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
25
+ # http_req.response_header.status 状态码
26
+ # http_req.response_header 返回头
27
+ # http_req.response 返回体
28
+ callback: nil,
29
+ # 请求失败后的回调
30
+ # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
31
+ errback: nil,
32
+ stream_callback: nil, # 流数据处理回调
33
+ convert_to_utf8: false, # 是否转换为utf8编码
34
+ overwrite_exist: false, # 是否覆盖现有文件
35
+ # 请求设置
36
+ redirects: 3, # 重定向次数
37
+ keepalive: nil, # (暂不支持复用)
38
+ file: nil, # 要上传的文件路径
39
+ path: nil, # 请求路径,在流水线方式请求时有用(暂不支持)
40
+ query: nil, # 查询字符串,可以是string或hash类型
41
+ body: nil, # 请求体,可以是string或hash类型
42
+ head: nil, # 请求头
43
+ # 连接设置
44
+ connect_timeout: 60, # 连接超时时间
45
+ inactivity_timeout: nil, # 连接后超时时间
46
+ # ssl设置
47
+ # ssl: {
48
+ # :private_key_file => '/tmp/server.key',
49
+ # :cert_chain_file => '/tmp/server.crt',
50
+ # :verify_peer => false
51
+ # }
52
+ ssl: nil,
53
+ # bind: {
54
+ # :host => '123.123.123.123', # use a specific interface for outbound request
55
+ # :port => '123'
56
+ # }
57
+ bind: nil,
58
+ # 代理设置
59
+ # proxy: {
60
+ # :host => '127.0.0.1', # proxy address
61
+ # :port => 9000, # proxy port
62
+ # :type => :socks5 # default proxy mode is HTTP proxy, change to :socks5 if required
63
+
64
+ # :authorization => ['user', 'pass'] # proxy authorization header
65
+ # }
66
+ proxy: nil)
67
+ @href = href
68
+ @local_path = local_path
69
+ @http_method = http_method
70
+ @custom_data = custom_data
71
+ @parse_method = parse_method
72
+ @callback = callback
73
+ @errback = errback
74
+ @stream_callback = stream_callback
75
+ @convert_to_utf8 = convert_to_utf8
76
+ @overwrite_exist = overwrite_exist
77
+
78
+ @request_options = {
79
+ redirects: redirects,
80
+ keepalive: keepalive,
81
+ file: file,
82
+ path: path,
83
+ query: query,
84
+ body: body,
85
+ head: head
86
+ }.compact
87
+
88
+ @connection_options = {
89
+ connect_timeout: connect_timeout,
90
+ inactivity_timeout: inactivity_timeout,
91
+ ssl: ssl,
92
+ bind: bind,
93
+ proxy: proxy
94
+ }.compact
95
+ end
96
+
97
+ attr_accessor :href, :local_path,
98
+ :http_method,
99
+ :custom_data,
100
+ :request_object,
101
+ :parse_method,
102
+ :callback,
103
+ :errback,
104
+ :stream_callback,
105
+ :convert_to_utf8,
106
+ :overwrite_exist,
107
+ :request_options,
108
+ :connection_options
109
+ end
110
+
111
+ module ListSpider
112
+ RANDOM_TIME = -1
113
+ NO_LIMIT_CONCURRENT = -1
114
+ DEFAULT_CONCURRNET_MAX = 50
115
+ DEFAULT_INTERVAL = 0
116
+
117
+ @random_time_range = 3..10
118
+ @local_path_set = Set.new
119
+
120
+ class << self
121
+ def get_list(down_list, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
122
+ if interval.is_a? Range
123
+ @random_time_range = interval
124
+ interval = RANDOM_TIME
125
+ end
126
+
127
+ @down_list = filter_list(down_list)
128
+ @interval = interval
129
+ @max = max
130
+ @max = @down_list.size if @max == NO_LIMIT_CONCURRENT
131
+ @succeed_size = 0
132
+ @failed_size = 0
133
+
134
+ puts "total size:#{@down_list.size}"
135
+ event_machine_start_list(next_task, method(:complete))
136
+ end
137
+
138
+ def get_one(task, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
139
+ get_list([task], interval: interval, max: max)
140
+ end
141
+
142
+ def add_task(task)
143
+ if task.is_a? Array
144
+ need_down_list = filter_list(task)
145
+ @down_list += need_down_list
146
+ elsif task.is_a?TaskStruct
147
+ need_down_list = filter_list([task])
148
+ @down_list += need_down_list
149
+ else
150
+ puts "error task type:#{task.class}"
151
+ end
152
+ end
153
+
154
+ private
155
+
156
+ def event_machine_down(link_struct_list, callback = nil)
157
+ failed_list = []
158
+ succeed_list = []
159
+ multi = EventMachine::MultiRequest.new
160
+ begin_time = Time.now
161
+
162
+ for_each_proc =
163
+ proc do |task_struct|
164
+ http_req = EventMachine::HttpRequest.new(task_struct.href, task_struct.connection_options).public_send(task_struct.http_method, task_struct.request_options)
165
+ http_req.stream { |chunk| stream_callback.call(chunk) } if task_struct.stream_callback
166
+ task_struct.request_object = http_req
167
+
168
+ http_req.callback do
169
+ s = http_req.response_header.status
170
+ puts "#{Time.now}, http status code: #{s}"
171
+
172
+ if s == 200
173
+ local_dir = File.dirname(task_struct.local_path)
174
+ FileUtils.mkdir_p(local_dir) unless Dir.exist?(local_dir)
175
+ begin
176
+ File.open(task_struct.local_path, 'wb') do |f|
177
+ f << if @convert_to_utf8 == true
178
+ SpiderHelper.to_utf8(http_req.response)
179
+ else
180
+ http_req.response
181
+ end
182
+ end
183
+ call_parse_method(task_struct)
184
+ succeed_list << task_struct
185
+ rescue StandardError => exception
186
+ puts exception
187
+ end
188
+ end
189
+ task_struct.callback.call(task_struct, http_req) if task_struct.callback
190
+ end
191
+
192
+ http_req.errback do
193
+ puts "#{Time.now}, #{task_struct.href}, error: #{http_req.error}"
194
+
195
+ task_struct.errback.call(task_struct, http_req) if task_struct.errback
196
+ end
197
+
198
+ begin
199
+ multi.add task_struct.local_path, http_req
200
+ rescue StandardError => exception
201
+ puts exception
202
+ puts task_struct.href
203
+ puts task_struct.local_path
204
+ stop_machine
205
+ end
206
+ end
207
+
208
+ cb =
209
+ proc do
210
+ end_time = Time.now
211
+ puts "use time:#{end_time - begin_time} seconds"
212
+ if callback.nil?
213
+ stop_machine
214
+ else
215
+ callback.call(multi, succeed_list, failed_list)
216
+ end
217
+ end
218
+ link_struct_list.each(&for_each_proc)
219
+ multi.callback(&cb)
220
+ end
221
+
222
+ def stop_machine
223
+ puts "success size:#{@succeed_size}"
224
+ puts "failed size:#{@failed_size}"
225
+ @end_time = Time.now
226
+ puts "total use time:#{@end_time - @begin_time} seconds"
227
+ EventMachine.stop
228
+ @local_path_set.clear
229
+ end
230
+
231
+ def next_task
232
+ @down_list.shift(@max)
233
+ end
234
+
235
+ def call_parse_method(task_struct)
236
+ task_struct.parse_method.call(task_struct) if task_struct.parse_method
237
+ end
238
+
239
+ def complete(_multi, success_list, failed_list)
240
+ @succeed_size += success_list.size
241
+ @failed_size += failed_list.size
242
+ @succeed_list.concat(success_list)
243
+ @failed_list.concat(failed_list)
244
+
245
+ todo = next_task
246
+
247
+ if todo.empty?
248
+ stop_machine
249
+ else
250
+ if @interval != 0
251
+ if !success_list.empty? || !failed_list.empty?
252
+ if @interval == RANDOM_TIME
253
+ sleep(rand(@random_time_range))
254
+ else
255
+ sleep(@interval)
256
+ end
257
+ end
258
+ end
259
+ event_machine_down(todo, method(:complete))
260
+ end
261
+ end
262
+
263
+ def event_machine_start_list(down_list, callback = nil)
264
+ EventMachine.run do
265
+ @succeed_list = []
266
+ @failed_list = []
267
+ @begin_time = Time.now
268
+ if down_list.empty?
269
+ if callback
270
+ callback.call(nil, [], [])
271
+ else
272
+ stop_machine
273
+ end
274
+ else
275
+ event_machine_down(down_list, callback)
276
+ end
277
+ end
278
+ end
279
+
280
+ def filter_list(down_list)
281
+ need_down_list = []
282
+ down_list.each do |ts|
283
+ if !ts.overwrite_exist && File.exist?(ts.local_path)
284
+ call_parse_method(ts)
285
+ elsif @local_path_set.add?(ts.local_path)
286
+ need_down_list << ts
287
+ end
288
+ end
289
+ need_down_list
290
+ end
291
+ end
292
+
293
+ Signal.trap('INT') do
294
+ ListSpider.stop_machine
295
+ exit!
296
+ end
297
+ end