list_spider 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/setup CHANGED
@@ -1,8 +1,8 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
- IFS=$'\n\t'
4
- set -vx
5
-
6
- bundle install
7
-
8
- # Do any other automated setup that you need to do here
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -1,3 +1,3 @@
1
- #!/bin/sh
2
-
1
+ #!/bin/sh
2
+
3
3
  rubocop -a -D -f simple -o rubocopresult
@@ -1,72 +1,72 @@
1
-
2
- class FileFilter
3
- # 4033
4
- # 920
5
- def initialize(dir_pattern, size_threshold: 1000,
6
- cust_judge: nil, process_block: nil)
7
- @dir_pattern = dir_pattern
8
- @size_threshold = size_threshold
9
- @cust_judge = cust_judge ? cust_judge : method(:default_judge)
10
- @total = 0
11
- @process_block = process_block
12
- end
13
-
14
- def default_judge(f)
15
- File.size(f) <= @size_threshold
16
- end
17
-
18
- def filter_file(f)
19
- if @cust_judge.call(f)
20
- @total += 1
21
- @process_block.call(f)
22
- end
23
- end
24
-
25
- def start
26
- Dir.glob(@dir_pattern) do |f|
27
- filter_file(f)
28
- end
29
- puts "total:#{@total}"
30
- end
31
-
32
- def self.delete(dir_pattern, size_threshold: 1000, cust_judge: nil)
33
- FileFilter.new(
34
- dir_pattern,
35
- size_threshold: size_threshold,
36
- cust_judge: cust_judge,
37
- process_block:
38
- proc do |f|
39
- puts "deleted file: #{f}"
40
- File.delete(f)
41
- end
42
- ).start
43
- end
44
-
45
- def self.check(dir_pattern, size_threshold: 1000, cust_judge: nil)
46
- FileFilter.new(
47
- dir_pattern,
48
- size_threshold: size_threshold,
49
- cust_judge: cust_judge,
50
- process_block:
51
- proc do |f|
52
- puts "filterd file: #{f}"
53
- end
54
- ).start
55
- end
56
-
57
- def self.check_save_result(dir_pattern, save_file_name: 'filtered_file.txt',
58
- size_threshold: 1000, cust_judge: nil)
59
- result_file = File.open(save_file_name, 'wt')
60
- FileFilter.new(
61
- dir_pattern,
62
- size_threshold: size_threshold,
63
- cust_judge: cust_judge,
64
- process_block:
65
- proc do |f|
66
- puts "filterd file: #{f}"
67
- result_file << f << "\n"
68
- end
69
- ).start
70
- result_file.close
71
- end
72
- end
1
+
2
+ class FileFilter
3
+ # 4033
4
+ # 920
5
+ def initialize(dir_pattern, size_threshold: 1000,
6
+ cust_judge: nil, process_block: nil)
7
+ @dir_pattern = dir_pattern
8
+ @size_threshold = size_threshold
9
+ @cust_judge = cust_judge ? cust_judge : method(:default_judge)
10
+ @total = 0
11
+ @process_block = process_block
12
+ end
13
+
14
+ def default_judge(f)
15
+ File.size(f) <= @size_threshold
16
+ end
17
+
18
+ def filter_file(f)
19
+ if @cust_judge.call(f)
20
+ @total += 1
21
+ @process_block.call(f)
22
+ end
23
+ end
24
+
25
+ def start
26
+ Dir.glob(@dir_pattern) do |f|
27
+ filter_file(f)
28
+ end
29
+ puts "total:#{@total}"
30
+ end
31
+
32
+ def self.delete(dir_pattern, size_threshold: 1000, cust_judge: nil)
33
+ FileFilter.new(
34
+ dir_pattern,
35
+ size_threshold: size_threshold,
36
+ cust_judge: cust_judge,
37
+ process_block:
38
+ proc do |f|
39
+ puts "deleted file: #{f}"
40
+ File.delete(f)
41
+ end
42
+ ).start
43
+ end
44
+
45
+ def self.check(dir_pattern, size_threshold: 1000, cust_judge: nil)
46
+ FileFilter.new(
47
+ dir_pattern,
48
+ size_threshold: size_threshold,
49
+ cust_judge: cust_judge,
50
+ process_block:
51
+ proc do |f|
52
+ puts "filterd file: #{f}"
53
+ end
54
+ ).start
55
+ end
56
+
57
+ def self.check_save_result(dir_pattern, save_file_name: 'filtered_file.txt',
58
+ size_threshold: 1000, cust_judge: nil)
59
+ result_file = File.open(save_file_name, 'wt')
60
+ FileFilter.new(
61
+ dir_pattern,
62
+ size_threshold: size_threshold,
63
+ cust_judge: cust_judge,
64
+ process_block:
65
+ proc do |f|
66
+ puts "filterd file: #{f}"
67
+ result_file << f << "\n"
68
+ end
69
+ ).start
70
+ result_file.close
71
+ end
72
+ end
@@ -1,297 +1,297 @@
1
- require 'list_spider/version'
2
- require 'em-http-request'
3
- require 'nokogiri'
4
- require 'fileutils'
5
- require 'set'
6
- require 'addressable/uri'
7
- require File.expand_path('spider_helper', __dir__)
8
- require File.expand_path('file_filter', __dir__)
9
-
10
- # 爬取任务类
11
- class TaskStruct
12
- # * href 请求链接
13
- # * local_path 保存数据的本地路径(此路径作为去重标准)
14
- # * http_method http方法,取值::get, :head, :delete, :put, :post, :patch, :options
15
- # * custom_data 自定义数据
16
- # * parse_method 解析保存文件的回调,参数是TaskStruct对象本身
17
- def initialize(href, # 请求链接
18
- local_path, # 保存数据的本地路径(此路径作为去重标准)
19
- # http方法,取值::get, :head, :delete, :put, :post, :patch, :options
20
- http_method: :get,
21
- custom_data: nil, # 自定义数据
22
- parse_method: nil, # 解析保存文件的回调,参数是TaskStruct对象本身
23
- # 请求成功后的回调,此时可能没有保存文件,比如301,404
24
- # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
25
- # http_req.response_header.status 状态码
26
- # http_req.response_header 返回头
27
- # http_req.response 返回体
28
- callback: nil,
29
- # 请求失败后的回调
30
- # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
31
- errback: nil,
32
- stream_callback: nil, # 流数据处理回调
33
- convert_to_utf8: false, # 是否转换为utf8编码
34
- overwrite_exist: false, # 是否覆盖现有文件
35
- # 请求设置
36
- redirects: 3, # 重定向次数
37
- keepalive: nil, # (暂不支持复用)
38
- file: nil, # 要上传的文件路径
39
- path: nil, # 请求路径,在流水线方式请求时有用(暂不支持)
40
- query: nil, # 查询字符串,可以是string或hash类型
41
- body: nil, # 请求体,可以是string或hash类型
42
- head: nil, # 请求头
43
- # 连接设置
44
- connect_timeout: 60, # 连接超时时间
45
- inactivity_timeout: nil, # 连接后超时时间
46
- # ssl设置
47
- # ssl: {
48
- # :private_key_file => '/tmp/server.key',
49
- # :cert_chain_file => '/tmp/server.crt',
50
- # :verify_peer => false
51
- # }
52
- ssl: nil,
53
- # bind: {
54
- # :host => '123.123.123.123', # use a specific interface for outbound request
55
- # :port => '123'
56
- # }
57
- bind: nil,
58
- # 代理设置
59
- # proxy: {
60
- # :host => '127.0.0.1', # proxy address
61
- # :port => 9000, # proxy port
62
- # :type => :socks5 # default proxy mode is HTTP proxy, change to :socks5 if required
63
-
64
- # :authorization => ['user', 'pass'] # proxy authorization header
65
- # }
66
- proxy: nil)
67
- @href = href
68
- @local_path = local_path
69
- @http_method = http_method
70
- @custom_data = custom_data
71
- @parse_method = parse_method
72
- @callback = callback
73
- @errback = errback
74
- @stream_callback = stream_callback
75
- @convert_to_utf8 = convert_to_utf8
76
- @overwrite_exist = overwrite_exist
77
-
78
- @request_options = {
79
- redirects: redirects,
80
- keepalive: keepalive,
81
- file: file,
82
- path: path,
83
- query: query,
84
- body: body,
85
- head: head
86
- }.compact
87
-
88
- @connection_options = {
89
- connect_timeout: connect_timeout,
90
- inactivity_timeout: inactivity_timeout,
91
- ssl: ssl,
92
- bind: bind,
93
- proxy: proxy
94
- }.compact
95
- end
96
-
97
- attr_accessor :href, :local_path,
98
- :http_method,
99
- :custom_data,
100
- :request_object,
101
- :parse_method,
102
- :callback,
103
- :errback,
104
- :stream_callback,
105
- :convert_to_utf8,
106
- :overwrite_exist,
107
- :request_options,
108
- :connection_options
109
- end
110
-
111
- module ListSpider
112
- RANDOM_TIME = -1
113
- NO_LIMIT_CONCURRENT = -1
114
- DEFAULT_CONCURRNET_MAX = 50
115
- DEFAULT_INTERVAL = 0
116
-
117
- @random_time_range = 3..10
118
- @local_path_set = Set.new
119
-
120
- class << self
121
- def get_list(down_list, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
122
- if interval.is_a? Range
123
- @random_time_range = interval
124
- interval = RANDOM_TIME
125
- end
126
-
127
- @down_list = filter_list(down_list)
128
- @interval = interval
129
- @max = max
130
- @max = @down_list.size if @max == NO_LIMIT_CONCURRENT
131
- @succeed_size = 0
132
- @failed_size = 0
133
-
134
- puts "total size:#{@down_list.size}"
135
- event_machine_start_list(next_task, method(:complete))
136
- end
137
-
138
- def get_one(task, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
139
- get_list([task], interval: interval, max: max)
140
- end
141
-
142
- def add_task(task)
143
- if task.is_a? Array
144
- need_down_list = filter_list(task)
145
- @down_list += need_down_list
146
- elsif task.is_a?TaskStruct
147
- need_down_list = filter_list([task])
148
- @down_list += need_down_list
149
- else
150
- puts "error task type:#{task.class}"
151
- end
152
- end
153
-
154
- private
155
-
156
- def event_machine_down(link_struct_list, callback = nil)
157
- failed_list = []
158
- succeed_list = []
159
- multi = EventMachine::MultiRequest.new
160
- begin_time = Time.now
161
-
162
- for_each_proc =
163
- proc do |task_struct|
164
- http_req = EventMachine::HttpRequest.new(task_struct.href, task_struct.connection_options).public_send(task_struct.http_method, task_struct.request_options)
165
- http_req.stream { |chunk| stream_callback.call(chunk) } if task_struct.stream_callback
166
- task_struct.request_object = http_req
167
-
168
- http_req.callback do
169
- s = http_req.response_header.status
170
- puts "#{Time.now}, http status code: #{s}"
171
-
172
- if s == 200
173
- local_dir = File.dirname(task_struct.local_path)
174
- FileUtils.mkdir_p(local_dir) unless Dir.exist?(local_dir)
175
- begin
176
- File.open(task_struct.local_path, 'wb') do |f|
177
- f << if @convert_to_utf8 == true
178
- SpiderHelper.to_utf8(http_req.response)
179
- else
180
- http_req.response
181
- end
182
- end
183
- call_parse_method(task_struct)
184
- succeed_list << task_struct
185
- rescue StandardError => exception
186
- puts exception
187
- end
188
- end
189
- task_struct.callback.call(task_struct, http_req) if task_struct.callback
190
- end
191
-
192
- http_req.errback do
193
- puts "#{Time.now}, #{task_struct.href}, error: #{http_req.error}"
194
-
195
- task_struct.errback.call(task_struct, http_req) if task_struct.errback
196
- end
197
-
198
- begin
199
- multi.add task_struct.local_path, http_req
200
- rescue StandardError => exception
201
- puts exception
202
- puts task_struct.href
203
- puts task_struct.local_path
204
- stop_machine
205
- end
206
- end
207
-
208
- cb =
209
- proc do
210
- end_time = Time.now
211
- puts "use time:#{end_time - begin_time} seconds"
212
- if callback.nil?
213
- stop_machine
214
- else
215
- callback.call(multi, succeed_list, failed_list)
216
- end
217
- end
218
- link_struct_list.each(&for_each_proc)
219
- multi.callback(&cb)
220
- end
221
-
222
- def stop_machine
223
- puts "success size:#{@succeed_size}"
224
- puts "failed size:#{@failed_size}"
225
- @end_time = Time.now
226
- puts "total use time:#{@end_time - @begin_time} seconds"
227
- EventMachine.stop
228
- @local_path_set.clear
229
- end
230
-
231
- def next_task
232
- @down_list.shift(@max)
233
- end
234
-
235
- def call_parse_method(task_struct)
236
- task_struct.parse_method.call(task_struct) if task_struct.parse_method
237
- end
238
-
239
- def complete(_multi, success_list, failed_list)
240
- @succeed_size += success_list.size
241
- @failed_size += failed_list.size
242
- @succeed_list.concat(success_list)
243
- @failed_list.concat(failed_list)
244
-
245
- todo = next_task
246
-
247
- if todo.empty?
248
- stop_machine
249
- else
250
- if @interval != 0
251
- if !success_list.empty? || !failed_list.empty?
252
- if @interval == RANDOM_TIME
253
- sleep(rand(@random_time_range))
254
- else
255
- sleep(@interval)
256
- end
257
- end
258
- end
259
- event_machine_down(todo, method(:complete))
260
- end
261
- end
262
-
263
- def event_machine_start_list(down_list, callback = nil)
264
- EventMachine.run do
265
- @succeed_list = []
266
- @failed_list = []
267
- @begin_time = Time.now
268
- if down_list.empty?
269
- if callback
270
- callback.call(nil, [], [])
271
- else
272
- stop_machine
273
- end
274
- else
275
- event_machine_down(down_list, callback)
276
- end
277
- end
278
- end
279
-
280
- def filter_list(down_list)
281
- need_down_list = []
282
- down_list.each do |ts|
283
- if !ts.overwrite_exist && File.exist?(ts.local_path)
284
- call_parse_method(ts)
285
- elsif @local_path_set.add?(ts.local_path)
286
- need_down_list << ts
287
- end
288
- end
289
- need_down_list
290
- end
291
- end
292
-
293
- Signal.trap('INT') do
294
- ListSpider.stop_machine
295
- exit!
296
- end
297
- end
1
+ require 'list_spider/version'
2
+ require 'em-http-request'
3
+ require 'nokogiri'
4
+ require 'fileutils'
5
+ require 'set'
6
+ require 'addressable/uri'
7
+ require File.expand_path('spider_helper', __dir__)
8
+ require File.expand_path('file_filter', __dir__)
9
+
10
+ # 爬取任务类
11
+ class TaskStruct
12
+ # * href 请求链接
13
+ # * local_path 保存数据的本地路径(此路径作为去重标准)
14
+ # * http_method http方法,取值::get, :head, :delete, :put, :post, :patch, :options
15
+ # * custom_data 自定义数据
16
+ # * parse_method 解析保存文件的回调,参数是TaskStruct对象本身
17
+ def initialize(href, # 请求链接
18
+ local_path, # 保存数据的本地路径(此路径作为去重标准)
19
+ # http方法,取值::get, :head, :delete, :put, :post, :patch, :options
20
+ http_method: :get,
21
+ custom_data: nil, # 自定义数据
22
+ parse_method: nil, # 解析保存文件的回调,参数是TaskStruct对象本身
23
+ # 请求成功后的回调,此时可能没有保存文件,比如301,404
24
+ # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
25
+ # http_req.response_header.status 状态码
26
+ # http_req.response_header 返回头
27
+ # http_req.response 返回体
28
+ callback: nil,
29
+ # 请求失败后的回调
30
+ # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
31
+ errback: nil,
32
+ stream_callback: nil, # 流数据处理回调
33
+ convert_to_utf8: false, # 是否转换为utf8编码
34
+ overwrite_exist: false, # 是否覆盖现有文件
35
+ # 请求设置
36
+ redirects: 3, # 重定向次数
37
+ keepalive: nil, # (暂不支持复用)
38
+ file: nil, # 要上传的文件路径
39
+ path: nil, # 请求路径,在流水线方式请求时有用(暂不支持)
40
+ query: nil, # 查询字符串,可以是string或hash类型
41
+ body: nil, # 请求体,可以是string或hash类型
42
+ head: nil, # 请求头
43
+ # 连接设置
44
+ connect_timeout: 60, # 连接超时时间
45
+ inactivity_timeout: nil, # 连接后超时时间
46
+ # ssl设置
47
+ # ssl: {
48
+ # :private_key_file => '/tmp/server.key',
49
+ # :cert_chain_file => '/tmp/server.crt',
50
+ # :verify_peer => false
51
+ # }
52
+ ssl: nil,
53
+ # bind: {
54
+ # :host => '123.123.123.123', # use a specific interface for outbound request
55
+ # :port => '123'
56
+ # }
57
+ bind: nil,
58
+ # 代理设置
59
+ # proxy: {
60
+ # :host => '127.0.0.1', # proxy address
61
+ # :port => 9000, # proxy port
62
+ # :type => :socks5 # default proxy mode is HTTP proxy, change to :socks5 if required
63
+
64
+ # :authorization => ['user', 'pass'] # proxy authorization header
65
+ # }
66
+ proxy: nil)
67
+ @href = href
68
+ @local_path = local_path
69
+ @http_method = http_method
70
+ @custom_data = custom_data
71
+ @parse_method = parse_method
72
+ @callback = callback
73
+ @errback = errback
74
+ @stream_callback = stream_callback
75
+ @convert_to_utf8 = convert_to_utf8
76
+ @overwrite_exist = overwrite_exist
77
+
78
+ @request_options = {
79
+ redirects: redirects,
80
+ keepalive: keepalive,
81
+ file: file,
82
+ path: path,
83
+ query: query,
84
+ body: body,
85
+ head: head
86
+ }.compact
87
+
88
+ @connection_options = {
89
+ connect_timeout: connect_timeout,
90
+ inactivity_timeout: inactivity_timeout,
91
+ ssl: ssl,
92
+ bind: bind,
93
+ proxy: proxy
94
+ }.compact
95
+ end
96
+
97
+ attr_accessor :href, :local_path,
98
+ :http_method,
99
+ :custom_data,
100
+ :request_object,
101
+ :parse_method,
102
+ :callback,
103
+ :errback,
104
+ :stream_callback,
105
+ :convert_to_utf8,
106
+ :overwrite_exist,
107
+ :request_options,
108
+ :connection_options
109
+ end
110
+
111
+ module ListSpider
112
+ RANDOM_TIME = -1
113
+ NO_LIMIT_CONCURRENT = -1
114
+ DEFAULT_CONCURRNET_MAX = 50
115
+ DEFAULT_INTERVAL = 0
116
+
117
+ @random_time_range = 3..10
118
+ @local_path_set = Set.new
119
+
120
+ class << self
121
+ def get_list(down_list, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
122
+ if interval.is_a? Range
123
+ @random_time_range = interval
124
+ interval = RANDOM_TIME
125
+ end
126
+
127
+ @down_list = filter_list(down_list)
128
+ @interval = interval
129
+ @max = max
130
+ @max = @down_list.size if @max == NO_LIMIT_CONCURRENT
131
+ @succeed_size = 0
132
+ @failed_size = 0
133
+
134
+ puts "total size:#{@down_list.size}"
135
+ event_machine_start_list(next_task, method(:complete))
136
+ end
137
+
138
+ def get_one(task, interval: DEFAULT_INTERVAL, max: DEFAULT_CONCURRNET_MAX)
139
+ get_list([task], interval: interval, max: max)
140
+ end
141
+
142
+ def add_task(task)
143
+ if task.is_a? Array
144
+ need_down_list = filter_list(task)
145
+ @down_list += need_down_list
146
+ elsif task.is_a?TaskStruct
147
+ need_down_list = filter_list([task])
148
+ @down_list += need_down_list
149
+ else
150
+ puts "error task type:#{task.class}"
151
+ end
152
+ end
153
+
154
+ private
155
+
156
+ def event_machine_down(link_struct_list, callback = nil)
157
+ failed_list = []
158
+ succeed_list = []
159
+ multi = EventMachine::MultiRequest.new
160
+ begin_time = Time.now
161
+
162
+ for_each_proc =
163
+ proc do |task_struct|
164
+ http_req = EventMachine::HttpRequest.new(task_struct.href, task_struct.connection_options).public_send(task_struct.http_method, task_struct.request_options)
165
+ http_req.stream { |chunk| stream_callback.call(chunk) } if task_struct.stream_callback
166
+ task_struct.request_object = http_req
167
+
168
+ http_req.callback do
169
+ s = http_req.response_header.status
170
+ puts "#{Time.now}, http status code: #{s}"
171
+
172
+ if s == 200
173
+ local_dir = File.dirname(task_struct.local_path)
174
+ FileUtils.mkdir_p(local_dir) unless Dir.exist?(local_dir)
175
+ begin
176
+ File.open(task_struct.local_path, 'wb') do |f|
177
+ f << if @convert_to_utf8 == true
178
+ SpiderHelper.to_utf8(http_req.response)
179
+ else
180
+ http_req.response
181
+ end
182
+ end
183
+ call_parse_method(task_struct)
184
+ succeed_list << task_struct
185
+ rescue StandardError => exception
186
+ puts exception
187
+ end
188
+ end
189
+ task_struct.callback.call(task_struct, http_req) if task_struct.callback
190
+ end
191
+
192
+ http_req.errback do
193
+ puts "#{Time.now}, #{task_struct.href}, error: #{http_req.error}"
194
+
195
+ task_struct.errback.call(task_struct, http_req) if task_struct.errback
196
+ end
197
+
198
+ begin
199
+ multi.add task_struct.local_path, http_req
200
+ rescue StandardError => exception
201
+ puts exception
202
+ puts task_struct.href
203
+ puts task_struct.local_path
204
+ stop_machine
205
+ end
206
+ end
207
+
208
+ cb =
209
+ proc do
210
+ end_time = Time.now
211
+ puts "use time:#{end_time - begin_time} seconds"
212
+ if callback.nil?
213
+ stop_machine
214
+ else
215
+ callback.call(multi, succeed_list, failed_list)
216
+ end
217
+ end
218
+ link_struct_list.each(&for_each_proc)
219
+ multi.callback(&cb)
220
+ end
221
+
222
+ def stop_machine
223
+ puts "success size:#{@succeed_size}"
224
+ puts "failed size:#{@failed_size}"
225
+ @end_time = Time.now
226
+ puts "total use time:#{@end_time - @begin_time} seconds"
227
+ EventMachine.stop
228
+ @local_path_set.clear
229
+ end
230
+
231
+ def next_task
232
+ @down_list.shift(@max)
233
+ end
234
+
235
+ def call_parse_method(task_struct)
236
+ task_struct.parse_method.call(task_struct) if task_struct.parse_method
237
+ end
238
+
239
+ def complete(_multi, success_list, failed_list)
240
+ @succeed_size += success_list.size
241
+ @failed_size += failed_list.size
242
+ @succeed_list.concat(success_list)
243
+ @failed_list.concat(failed_list)
244
+
245
+ todo = next_task
246
+
247
+ if todo.empty?
248
+ stop_machine
249
+ else
250
+ if @interval != 0
251
+ if !success_list.empty? || !failed_list.empty?
252
+ if @interval == RANDOM_TIME
253
+ sleep(rand(@random_time_range))
254
+ else
255
+ sleep(@interval)
256
+ end
257
+ end
258
+ end
259
+ event_machine_down(todo, method(:complete))
260
+ end
261
+ end
262
+
263
+ def event_machine_start_list(down_list, callback = nil)
264
+ EventMachine.run do
265
+ @succeed_list = []
266
+ @failed_list = []
267
+ @begin_time = Time.now
268
+ if down_list.empty?
269
+ if callback
270
+ callback.call(nil, [], [])
271
+ else
272
+ stop_machine
273
+ end
274
+ else
275
+ event_machine_down(down_list, callback)
276
+ end
277
+ end
278
+ end
279
+
280
+ def filter_list(down_list)
281
+ need_down_list = []
282
+ down_list.each do |ts|
283
+ if !ts.overwrite_exist && File.exist?(ts.local_path)
284
+ call_parse_method(ts)
285
+ elsif @local_path_set.add?(ts.local_path)
286
+ need_down_list << ts
287
+ end
288
+ end
289
+ need_down_list
290
+ end
291
+ end
292
+
293
+ Signal.trap('INT') do
294
+ ListSpider.stop_machine
295
+ exit!
296
+ end
297
+ end