list_spider 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/list_spider.rb +40 -16
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ffb9c26b0fc7db4fd6a2a3b831832964afbaa949
4
- data.tar.gz: 2c829fb860ad3c0c8b3825c1575988cadfccaa15
3
+ metadata.gz: aa975b345606cc3995e08310737afd1338ad4c80
4
+ data.tar.gz: a5d15c9dcf5f1f78395f75911370617c9481b1ea
5
5
  SHA512:
6
- metadata.gz: 1673f2f5daed52295ee567c2d1c6b672f1bdf532fd31a334a704cd9ac5fbdcef9aea0f27e9828cd816769f74bfcbd8498294f1d190e73defb0fb6c15fb990e04
7
- data.tar.gz: 31247ae66dfac70a69671c336dcc942812d26fd9572c2e527dbd511bf40ed680304fcb24d7f1fac01e51b52d666061ffacaaae49c960a51aecff61f07ddcf9be
6
+ metadata.gz: 8fd86f3186bf640d2d895028d1373984046183e941619bf01b0c2d611deb7a62d4a52098da23e3299d20949212b776a6b368a4923c37e152d2e3e3038b433a1a
7
+ data.tar.gz: 722ee573500abe11549e679b3def10def9830ef58ae6f4bf24e0e98d0fc872524d97c964ce17df3dc9718eedfb3e66067f09fdeda89bce6d39f0c2fa1bbca750
@@ -24,7 +24,7 @@ class TaskStruct
24
24
  o.class == self.class && o.href == href && o.local_path == local_path && o.http_method == http_method && o.params == params && o.extra_data == extra_data
25
25
  end
26
26
 
27
- attr_accessor :origin_href , :href, :local_path, :http_method, :params, :extra_data, :parse_method
27
+ attr_accessor :origin_href , :href, :local_path, :http_method, :params, :extra_data, :parse_method, :request_object
28
28
 
29
29
  end
30
30
 
@@ -37,10 +37,10 @@ module ListSpider
37
37
 
38
38
  @random_time_range = 3..10
39
39
  @conver_to_utf8 = false
40
- @connection_opts = {connect_timeout: 2*60}
40
+ @connection_opts = {connect_timeout: 60}
41
41
  @overwrite_exist = false
42
42
  @max_redirects = 10
43
- @@url_set = Set.new
43
+ @@local_path_set = Set.new
44
44
 
45
45
  class << self
46
46
 
@@ -61,7 +61,7 @@ module ListSpider
61
61
  end
62
62
 
63
63
  def set_header_option(header_option)
64
- @@header_option = optHash
64
+ @@header_option = header_option
65
65
  end
66
66
 
67
67
  def event_machine_down(link_struct_list, callback = nil)
@@ -90,6 +90,8 @@ module ListSpider
90
90
  end
91
91
  end
92
92
 
93
+ e.request_object = w
94
+
93
95
  w.callback {
94
96
  s = w.response_header.status
95
97
  puts s
@@ -122,7 +124,15 @@ module ListSpider
122
124
  SpiderHelper.direct_http_post(e.href, e.local_path, e.params)
123
125
  end
124
126
  }
125
- multi.add e.local_path, w
127
+
128
+ begin
129
+ multi.add e.local_path, w
130
+ rescue Exception => exception
131
+ puts exception
132
+ puts e.href
133
+ puts e.local_path
134
+ stop_machine
135
+ end
126
136
  end
127
137
 
128
138
  cb = Proc.new do
@@ -144,27 +154,36 @@ module ListSpider
144
154
  @@end_time = Time.now
145
155
  puts "total use time:#{@@end_time-@@begin_time} seconds"
146
156
  EventMachine.stop
147
- @@url_set.clear
157
+ @@local_path_set.clear
148
158
  end
149
159
 
150
160
  def get_next_task
151
- todo = []
161
+ return @@down_list.shift(@@max)
162
+ end
152
163
 
153
- until todo.size >= @@max || @@down_list.empty? do
154
- e = @@down_list.shift
155
- if @@url_set.add?(e.href)
156
- todo << e
164
+ def call_parse_method(e)
165
+ pm = e.parse_method
166
+ if pm
167
+ case pm.arity
168
+ when 1
169
+ pm.call(e.local_path)
170
+ when 2
171
+ pm.call(e.local_path, e.extra_data)
172
+ when 3
173
+ res_header = nil
174
+ res_header = e.request_object.response_header if e.request_object
175
+ pm.call(e.local_path, e.extra_data, res_header)
176
+ else
177
+ puts "Error! The number of arguments is:#{pm.arity}. While expected number is 1, 2, 3"
157
178
  end
158
179
  end
159
-
160
- return todo
161
180
  end
162
181
 
163
182
  def complete(multi, success_list, failed_list)
164
183
  @@succeed_size += success_list.size
165
184
  @@failed_size += failed_list.size
166
185
  success_list.each do |e|
167
- e.parse_method.call(e.local_path, e.extra_data) if e.parse_method
186
+ call_parse_method(e)
168
187
  end
169
188
 
170
189
  todo = get_next_task
@@ -204,8 +223,8 @@ module ListSpider
204
223
  need_down_list = []
205
224
  down_list.each do |ts|
206
225
  if !@overwrite_exist && File.exist?(ts.local_path)
207
- ts.parse_method.call(ts.local_path, ts.extra_data) if ts.parse_method
208
- else
226
+ call_parse_method(ts)
227
+ elsif @@local_path_set.add?(ts.local_path)
209
228
  need_down_list << ts
210
229
  end
211
230
  end
@@ -243,6 +262,11 @@ module ListSpider
243
262
  puts "error task type:#{task.class}"
244
263
  end
245
264
  end
265
+ end
246
266
 
267
+ Signal.trap("INT") do
268
+ ListSpider.stop_machine
269
+ exit!
247
270
  end
271
+
248
272
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: list_spider
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Charles Zhang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-08 00:00:00.000000000 Z
11
+ date: 2016-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: em-http-request