list_spider 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/list_spider.rb +27 -27
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4b43422e1a52997518a53a38c021f68700ec6d3f
4
- data.tar.gz: cefa5535e5870a221b04a47d6b1f6ec177392caa
3
+ metadata.gz: 540f6b583b38b40fe7c6096d372a43b93dd20002
4
+ data.tar.gz: 99b187103019165b5c87dd094c748d0b889d306b
5
5
  SHA512:
6
- metadata.gz: 2c561ce1828e592a538e1747dccc1da39c1946f182ffc19212fd8284b90b0c1f43e7e584ff3bb1f416b984bc04328901d887bf4fd294f20448ad98a7d7b0c1dd
7
- data.tar.gz: 55d6a8f01bb1a325d365d42f33123289881f37e985768ef75538a6f1a1f044b248cf42f5685f6974591f86f0ba140e78a693cac4af4981c6b564cd3a131568ef
6
+ metadata.gz: 6bd249021510b182d67912573feb5f9204c82f201340d7515f6dcb0d024fd9920abd1acb7724c6670967455a1a0b92426f40cf3d7550da40f00decfdd8e7f903
7
+ data.tar.gz: 6a28ca654fbb5f906dacfcccc121db52c09b6efa70c1e3724a1db3937e7be15922286c476ff390c48c227a7b492866c39ef1c3187e9636a0fd3ec9ac391d88fc
data/lib/list_spider.rb CHANGED
@@ -40,11 +40,11 @@ module ListSpider
40
40
  @connection_opts = {connect_timeout: 60}
41
41
  @overwrite_exist = false
42
42
  @max_redirects = 10
43
- @@local_path_set = Set.new
43
+ @local_path_set = Set.new
44
44
 
45
45
  class << self
46
46
 
47
- attr_accessor :random_time_range, :conver_to_utf8, :overwrite_exist, :max_redirects
47
+ attr_accessor :conver_to_utf8, :overwrite_exist, :max_redirects
48
48
 
49
49
  def set_proxy(proxy_addr, proxy_port, username: nil, password: nil)
50
50
  @connection_opts = {
@@ -61,7 +61,7 @@ module ListSpider
61
61
  end
62
62
 
63
63
  def set_header_option(header_option)
64
- @@header_option = header_option
64
+ @header_option = header_option
65
65
  end
66
66
 
67
67
  def event_machine_down(link_struct_list, callback = nil)
@@ -73,7 +73,7 @@ module ListSpider
73
73
  for_each_proc = proc do |e|
74
74
  opt = {}
75
75
  opt = {:redirects => @max_redirects}
76
- opt[:head] = @@header_option if defined? @@header_option
76
+ opt[:head] = @header_option if defined? @header_option
77
77
  if e.http_method == :post
78
78
  opt[:body] = e.params unless e.params.empty?
79
79
  if @connection_opts
@@ -149,16 +149,16 @@ module ListSpider
149
149
  end
150
150
 
151
151
  def stop_machine
152
- puts "success size:#{@@succeed_size}"
153
- puts "failed size:#{@@failed_size}"
154
- @@end_time = Time.now
155
- puts "total use time:#{@@end_time-@@begin_time} seconds"
152
+ puts "success size:#{@succeed_size}"
153
+ puts "failed size:#{@failed_size}"
154
+ @end_time = Time.now
155
+ puts "total use time:#{@end_time-@begin_time} seconds"
156
156
  EventMachine.stop
157
- @@local_path_set.clear
157
+ @local_path_set.clear
158
158
  end
159
159
 
160
160
  def get_next_task
161
- return @@down_list.shift(@@max)
161
+ return @down_list.shift(@max)
162
162
  end
163
163
 
164
164
  def call_parse_method(e)
@@ -188,8 +188,8 @@ module ListSpider
188
188
  end
189
189
 
190
190
  def complete(multi, success_list, failed_list)
191
- @@succeed_size += success_list.size
192
- @@failed_size += failed_list.size
191
+ @succeed_size += success_list.size
192
+ @failed_size += failed_list.size
193
193
  success_list.each do |e|
194
194
  call_parse_method(e)
195
195
  end
@@ -199,12 +199,12 @@ module ListSpider
199
199
  if todo.empty?
200
200
  stop_machine
201
201
  else
202
- if @@interval != 0
202
+ if @interval != 0
203
203
  if success_list.size != 0 || failed_list.size != 0
204
- if @@interval == RANDOM_TIME
204
+ if @interval == RANDOM_TIME
205
205
  sleep(rand(@random_time_range))
206
206
  else
207
- sleep(@@interval)
207
+ sleep(@interval)
208
208
  end
209
209
  end
210
210
  end
@@ -214,7 +214,7 @@ module ListSpider
214
214
 
215
215
  def event_machine_start_list(down_list, callback = nil)
216
216
  EventMachine.run {
217
- @@begin_time = Time.now
217
+ @begin_time = Time.now
218
218
  if down_list.empty?
219
219
  if callback
220
220
  callback.call(nil, [], [])
@@ -232,7 +232,7 @@ module ListSpider
232
232
  down_list.each do |ts|
233
233
  if !@overwrite_exist && File.exist?(ts.local_path)
234
234
  call_parse_method(ts)
235
- elsif @@local_path_set.add?(ts.local_path)
235
+ elsif @local_path_set.add?(ts.local_path)
236
236
  need_down_list << ts
237
237
  end
238
238
  end
@@ -245,18 +245,18 @@ module ListSpider
245
245
  interval = RANDOM_TIME
246
246
  end
247
247
 
248
- @@down_list = []
248
+ @down_list = []
249
249
 
250
250
  need_down_list = filter_list(down_list)
251
251
 
252
- @@down_list = @@down_list + need_down_list
253
- @@interval = interval
254
- @@max = max
255
- @@max = @@down_list.size if @@max == NO_LIMIT_CONCURRENT
256
- @@succeed_size = 0
257
- @@failed_size = 0
252
+ @down_list = @down_list + need_down_list
253
+ @interval = interval
254
+ @max = max
255
+ @max = @down_list.size if @max == NO_LIMIT_CONCURRENT
256
+ @succeed_size = 0
257
+ @failed_size = 0
258
258
 
259
- puts "total size:#{@@down_list.size}"
259
+ puts "total size:#{@down_list.size}"
260
260
  event_machine_start_list(get_next_task, method(:complete))
261
261
  end
262
262
 
@@ -267,10 +267,10 @@ module ListSpider
267
267
  def add_task(task)
268
268
  if task.is_a?Array
269
269
  need_down_list = filter_list(task)
270
- @@down_list = @@down_list + need_down_list
270
+ @down_list = @down_list + need_down_list
271
271
  elsif task.is_a?TaskStruct
272
272
  need_down_list = filter_list([task])
273
- @@down_list = @@down_list + need_down_list
273
+ @down_list = @down_list + need_down_list
274
274
  else
275
275
  puts "error task type:#{task.class}"
276
276
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: list_spider
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.1.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Charles Zhang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-12 00:00:00.000000000 Z
11
+ date: 2016-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: em-http-request