list_spider 0.1.9 → 0.1.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/list_spider.rb +27 -27
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4b43422e1a52997518a53a38c021f68700ec6d3f
4
- data.tar.gz: cefa5535e5870a221b04a47d6b1f6ec177392caa
3
+ metadata.gz: 540f6b583b38b40fe7c6096d372a43b93dd20002
4
+ data.tar.gz: 99b187103019165b5c87dd094c748d0b889d306b
5
5
  SHA512:
6
- metadata.gz: 2c561ce1828e592a538e1747dccc1da39c1946f182ffc19212fd8284b90b0c1f43e7e584ff3bb1f416b984bc04328901d887bf4fd294f20448ad98a7d7b0c1dd
7
- data.tar.gz: 55d6a8f01bb1a325d365d42f33123289881f37e985768ef75538a6f1a1f044b248cf42f5685f6974591f86f0ba140e78a693cac4af4981c6b564cd3a131568ef
6
+ metadata.gz: 6bd249021510b182d67912573feb5f9204c82f201340d7515f6dcb0d024fd9920abd1acb7724c6670967455a1a0b92426f40cf3d7550da40f00decfdd8e7f903
7
+ data.tar.gz: 6a28ca654fbb5f906dacfcccc121db52c09b6efa70c1e3724a1db3937e7be15922286c476ff390c48c227a7b492866c39ef1c3187e9636a0fd3ec9ac391d88fc
data/lib/list_spider.rb CHANGED
@@ -40,11 +40,11 @@ module ListSpider
40
40
  @connection_opts = {connect_timeout: 60}
41
41
  @overwrite_exist = false
42
42
  @max_redirects = 10
43
- @@local_path_set = Set.new
43
+ @local_path_set = Set.new
44
44
 
45
45
  class << self
46
46
 
47
- attr_accessor :random_time_range, :conver_to_utf8, :overwrite_exist, :max_redirects
47
+ attr_accessor :conver_to_utf8, :overwrite_exist, :max_redirects
48
48
 
49
49
  def set_proxy(proxy_addr, proxy_port, username: nil, password: nil)
50
50
  @connection_opts = {
@@ -61,7 +61,7 @@ module ListSpider
61
61
  end
62
62
 
63
63
  def set_header_option(header_option)
64
- @@header_option = header_option
64
+ @header_option = header_option
65
65
  end
66
66
 
67
67
  def event_machine_down(link_struct_list, callback = nil)
@@ -73,7 +73,7 @@ module ListSpider
73
73
  for_each_proc = proc do |e|
74
74
  opt = {}
75
75
  opt = {:redirects => @max_redirects}
76
- opt[:head] = @@header_option if defined? @@header_option
76
+ opt[:head] = @header_option if defined? @header_option
77
77
  if e.http_method == :post
78
78
  opt[:body] = e.params unless e.params.empty?
79
79
  if @connection_opts
@@ -149,16 +149,16 @@ module ListSpider
149
149
  end
150
150
 
151
151
  def stop_machine
152
- puts "success size:#{@@succeed_size}"
153
- puts "failed size:#{@@failed_size}"
154
- @@end_time = Time.now
155
- puts "total use time:#{@@end_time-@@begin_time} seconds"
152
+ puts "success size:#{@succeed_size}"
153
+ puts "failed size:#{@failed_size}"
154
+ @end_time = Time.now
155
+ puts "total use time:#{@end_time-@begin_time} seconds"
156
156
  EventMachine.stop
157
- @@local_path_set.clear
157
+ @local_path_set.clear
158
158
  end
159
159
 
160
160
  def get_next_task
161
- return @@down_list.shift(@@max)
161
+ return @down_list.shift(@max)
162
162
  end
163
163
 
164
164
  def call_parse_method(e)
@@ -188,8 +188,8 @@ module ListSpider
188
188
  end
189
189
 
190
190
  def complete(multi, success_list, failed_list)
191
- @@succeed_size += success_list.size
192
- @@failed_size += failed_list.size
191
+ @succeed_size += success_list.size
192
+ @failed_size += failed_list.size
193
193
  success_list.each do |e|
194
194
  call_parse_method(e)
195
195
  end
@@ -199,12 +199,12 @@ module ListSpider
199
199
  if todo.empty?
200
200
  stop_machine
201
201
  else
202
- if @@interval != 0
202
+ if @interval != 0
203
203
  if success_list.size != 0 || failed_list.size != 0
204
- if @@interval == RANDOM_TIME
204
+ if @interval == RANDOM_TIME
205
205
  sleep(rand(@random_time_range))
206
206
  else
207
- sleep(@@interval)
207
+ sleep(@interval)
208
208
  end
209
209
  end
210
210
  end
@@ -214,7 +214,7 @@ module ListSpider
214
214
 
215
215
  def event_machine_start_list(down_list, callback = nil)
216
216
  EventMachine.run {
217
- @@begin_time = Time.now
217
+ @begin_time = Time.now
218
218
  if down_list.empty?
219
219
  if callback
220
220
  callback.call(nil, [], [])
@@ -232,7 +232,7 @@ module ListSpider
232
232
  down_list.each do |ts|
233
233
  if !@overwrite_exist && File.exist?(ts.local_path)
234
234
  call_parse_method(ts)
235
- elsif @@local_path_set.add?(ts.local_path)
235
+ elsif @local_path_set.add?(ts.local_path)
236
236
  need_down_list << ts
237
237
  end
238
238
  end
@@ -245,18 +245,18 @@ module ListSpider
245
245
  interval = RANDOM_TIME
246
246
  end
247
247
 
248
- @@down_list = []
248
+ @down_list = []
249
249
 
250
250
  need_down_list = filter_list(down_list)
251
251
 
252
- @@down_list = @@down_list + need_down_list
253
- @@interval = interval
254
- @@max = max
255
- @@max = @@down_list.size if @@max == NO_LIMIT_CONCURRENT
256
- @@succeed_size = 0
257
- @@failed_size = 0
252
+ @down_list = @down_list + need_down_list
253
+ @interval = interval
254
+ @max = max
255
+ @max = @down_list.size if @max == NO_LIMIT_CONCURRENT
256
+ @succeed_size = 0
257
+ @failed_size = 0
258
258
 
259
- puts "total size:#{@@down_list.size}"
259
+ puts "total size:#{@down_list.size}"
260
260
  event_machine_start_list(get_next_task, method(:complete))
261
261
  end
262
262
 
@@ -267,10 +267,10 @@ module ListSpider
267
267
  def add_task(task)
268
268
  if task.is_a?Array
269
269
  need_down_list = filter_list(task)
270
- @@down_list = @@down_list + need_down_list
270
+ @down_list = @down_list + need_down_list
271
271
  elsif task.is_a?TaskStruct
272
272
  need_down_list = filter_list([task])
273
- @@down_list = @@down_list + need_down_list
273
+ @down_list = @down_list + need_down_list
274
274
  else
275
275
  puts "error task type:#{task.class}"
276
276
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: list_spider
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Charles Zhang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-12 00:00:00.000000000 Z
11
+ date: 2016-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: em-http-request