micro_spider 0.1.22 → 0.1.23
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/micro_spider.rb +24 -7
- data/lib/spider_core/version.rb +1 -1
- metadata +2 -2
data/lib/micro_spider.rb
CHANGED
@@ -201,7 +201,7 @@ class MicroSpider
|
|
201
201
|
end
|
202
202
|
|
203
203
|
if path.nil?
|
204
|
-
|
204
|
+
complete
|
205
205
|
return excretion
|
206
206
|
end
|
207
207
|
|
@@ -220,6 +220,7 @@ class MicroSpider
|
|
220
220
|
@broken_paths << path
|
221
221
|
logger.fatal("Caught exception when visit `#{path}`")
|
222
222
|
logger.fatal(err)
|
223
|
+
logger.fatal(err.backtrace.join("\n"))
|
223
224
|
else
|
224
225
|
@visited_paths << path
|
225
226
|
execute_actions
|
@@ -313,13 +314,29 @@ class MicroSpider
|
|
313
314
|
@page ||= Capybara::Session.new(Capybara.mode, Capybara.app)
|
314
315
|
end
|
315
316
|
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
317
|
+
# Because we don't share the page, the connect may or maynot be killd, it will eat too much mem.
|
318
|
+
# Make this spider instance suicide.
|
319
|
+
# For now, specially for `capybara-webkit`
|
320
|
+
def suicide
|
321
|
+
if Capybara.mode.to_s == 'webkit'
|
322
|
+
@page.driver.browser.instance_variable_get(:@connection).send :kill_process
|
322
323
|
end
|
324
|
+
@page = nil
|
323
325
|
end
|
324
326
|
|
327
|
+
protected
|
328
|
+
|
329
|
+
def sleep_or_not
|
330
|
+
if delay && delay > 0
|
331
|
+
logger.info "Nedd sleep #{delay} sec."
|
332
|
+
sleep(delay)
|
333
|
+
logger.info 'Wakeup'
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
def complete
|
338
|
+
excretion[:status] = 'completed'
|
339
|
+
suicide
|
340
|
+
end
|
341
|
+
|
325
342
|
end
|
data/lib/spider_core/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: micro_spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.23
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-03-
|
12
|
+
date: 2014-03-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: capybara
|