list_spider 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/list_spider.rb +16 -16
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: facd4c45a64f0cca934e70de9cfda4f05cf1a1d9
4
- data.tar.gz: b0e5940faa51be59c13a0efcfc114b43981082b7
3
+ metadata.gz: 624a5d306b193b1ece24867cc1ff8b8031931d90
4
+ data.tar.gz: 00b861e2d021dfd668275cf33a87b275fafa0def
5
5
  SHA512:
6
- metadata.gz: a21daf8996b6aa27714a6407511a094f6381f739a65066077ca89bb0f4783ba0e6576b48080dbdaacee73d1a2ea4e6c77195ba832ee8f0bc28bde295f8e50b8c
7
- data.tar.gz: e5bb44f131f82f480d9b5ebd146587c6473f6d3a7433fea32dd46bb74ac2c76adebc362048ee1c6228ca563ae4aa3d1b69bcc6d8735a4ee683e5c848f60944a4
6
+ metadata.gz: 1e739921dc0b12d47a267cb5ed01ff626055d4e5ff5589c9ec9330906ab4db051079631a23671772af51a27d5dca9c03652b8f163e0028e4ea04f4dd382c6f29
7
+ data.tar.gz: dd6badf6fae1780525c5c4be335021624b3ecc943d7db12c60e9dd4c4bb21cfb731c30b24148c436858eb3617c1fd997c27ad01fea59e7dc5743e1b1dcdc94d2
@@ -33,11 +33,11 @@ module ListSpider
33
33
  RANDOM_TIME = -1
34
34
  NO_LIMIT_CONCURRENT = -1
35
35
 
36
- @@random_time_range = 3..10
37
- @@conver_to_utf8 = false
38
- @@connection_opts = {:connect_timeout => 2*60}
39
- @@overwrite_exist = false
40
- @@max_redirects = 10
36
+ @random_time_range = 3..10
37
+ @conver_to_utf8 = false
38
+ @connection_opts = {connect_timeout: 2*60}
39
+ @overwrite_exist = false
40
+ @max_redirects = 10
41
41
  @@url_set = Set.new
42
42
 
43
43
  class << self
@@ -45,17 +45,17 @@ module ListSpider
45
45
  attr_accessor :random_time_range, :conver_to_utf8, :overwrite_exist, :max_redirects
46
46
 
47
47
  def set_proxy(proxy_addr, proxy_port, username: nil, password: nil)
48
- @@connection_opts = {
48
+ @connection_opts = {
49
49
  :proxy => {
50
50
  :host => proxy_addr,
51
51
  :port => proxy_port
52
52
  }
53
53
  }
54
- @@connection_opts[:proxy][:authorization] = [username, password] if username && password
54
+ @connection_opts[:proxy][:authorization] = [username, password] if username && password
55
55
  end
56
56
 
57
57
  def connect_timeout(max_connect_time)
58
- @@connection_opts[:connect_timeout] = max_connect_time
58
+ @connection_opts[:connect_timeout] = max_connect_time
59
59
  end
60
60
 
61
61
  def set_header_option(header_option)
@@ -70,19 +70,19 @@ module ListSpider
70
70
 
71
71
  for_each_proc = proc do |e|
72
72
  opt = {}
73
- opt = {:redirects => @@max_redirects}
73
+ opt = {:redirects => @max_redirects}
74
74
  opt[:head] = @@header_option if defined? @@header_option
75
75
  if e.http_method == :post
76
76
  opt[:body] = e.params unless e.params.empty?
77
- if @@connection_opts
78
- w = EventMachine::HttpRequest.new(e.href, @@connection_opts).post opt
77
+ if @connection_opts
78
+ w = EventMachine::HttpRequest.new(e.href, @connection_opts).post opt
79
79
  else
80
80
  w = EventMachine::HttpRequest.new(e.href).post opt
81
81
  end
82
82
  else
83
- if @@connection_opts
83
+ if @connection_opts
84
84
  opt[:query] = e.params unless e.params.empty?
85
- w = EventMachine::HttpRequest.new(e.href, @@connection_opts).get opt
85
+ w = EventMachine::HttpRequest.new(e.href, @connection_opts).get opt
86
86
  else
87
87
  w = EventMachine::HttpRequest.new(e.href).get opt
88
88
  end
@@ -96,7 +96,7 @@ module ListSpider
96
96
  FileUtils.mkdir_p(local_dir) unless Dir.exist?(local_dir)
97
97
  begin
98
98
  File.open(e.local_path, "w") do |f|
99
- if @@conver_to_utf8 == true
99
+ if @conver_to_utf8 == true
100
100
  f << SpiderHelper.to_utf8( w.response)
101
101
  else
102
102
  f << w.response
@@ -173,7 +173,7 @@ module ListSpider
173
173
  if @@inter_val != 0
174
174
  if success_list.size != 0 || failed_list.size != 0
175
175
  if @@inter_val == RANDOM_TIME
176
- sleep(rand(@@random_time_range))
176
+ sleep(rand(@random_time_range))
177
177
  else
178
178
  sleep(@@inter_val)
179
179
  end
@@ -201,7 +201,7 @@ module ListSpider
201
201
  def filter_list(down_list)
202
202
  need_down_list = []
203
203
  down_list.each do |ts|
204
- if !@@overwrite_exist && File.exist?(ts.local_path)
204
+ if !@overwrite_exist && File.exist?(ts.local_path)
205
205
  ts.parse_method.call(ts.local_path, ts.extra_data) if ts.parse_method
206
206
  else
207
207
  need_down_list << ts
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: list_spider
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Charles Zhang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-05 00:00:00.000000000 Z
11
+ date: 2016-05-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: em-http-request