micro_spider 0.1.18 → 0.1.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/micro_spider.rb +19 -3
- data/lib/spider_core/exceptions.rb +3 -0
- data/lib/spider_core/version.rb +1 -1
- data/lib/spider_core.rb +1 -0
- metadata +4 -3
data/lib/micro_spider.rb
CHANGED
@@ -1,13 +1,23 @@
|
|
1
1
|
require 'capybara'
|
2
|
-
require 'capybara-webkit'
|
3
2
|
require 'capybara/dsl'
|
3
|
+
require 'capybara/mechanize'
|
4
4
|
|
5
|
-
Capybara.
|
5
|
+
Capybara.default_driver = :mechanize
|
6
|
+
Capybara.current_driver = :mechanize
|
7
|
+
Capybara.app = proc { |env| [200, {'Content-Type' => 'text/html'}, 'You need to use MicroSpider#site method to set app host.'] }
|
6
8
|
Capybara.configure do |config|
|
7
9
|
config.ignore_hidden_elements = false
|
8
10
|
config.run_server = false
|
9
11
|
end
|
10
12
|
|
13
|
+
# If has capybara-webkit, first priority
|
14
|
+
begin
|
15
|
+
require 'capybara-webkit'
|
16
|
+
Capybara.current_driver = :webkit
|
17
|
+
rescue Exception => e
|
18
|
+
# Nothing to do.
|
19
|
+
end
|
20
|
+
|
11
21
|
require 'logger'
|
12
22
|
require 'set'
|
13
23
|
require 'timeout'
|
@@ -52,6 +62,7 @@ class MicroSpider
|
|
52
62
|
# spider.visit('http://google.com')
|
53
63
|
#
|
54
64
|
def visit(path)
|
65
|
+
raise ArgumentError, "Path can't be nil or empty" if path.nil? || path.empty?
|
55
66
|
sleep_or_not
|
56
67
|
logger.info "Begin to visit #{path}."
|
57
68
|
super(path)
|
@@ -64,7 +75,8 @@ class MicroSpider
|
|
64
75
|
#
|
65
76
|
def click(locator, opts = {}, &block)
|
66
77
|
actions << lambda {
|
67
|
-
path = find_link(locator, opts)[:href]
|
78
|
+
path = find_link(locator, opts)[:href] rescue nil
|
79
|
+
raise SpiderCore::ClickPathNotFound, "#{locator} not found" if path.nil?
|
68
80
|
if block_given?
|
69
81
|
spider = self.spawn
|
70
82
|
spider.entrance(path)
|
@@ -223,6 +235,10 @@ class MicroSpider
|
|
223
235
|
logger.fatal(err.message)
|
224
236
|
logger.fatal(err.backtrace.inspect)
|
225
237
|
break
|
238
|
+
rescue SpiderCore::ClickPathNotFound => err
|
239
|
+
logger.fatal(err.message)
|
240
|
+
logger.fatal(err.backtrace.inspect)
|
241
|
+
break
|
226
242
|
end
|
227
243
|
}
|
228
244
|
end
|
data/lib/spider_core/version.rb
CHANGED
data/lib/spider_core.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: micro_spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.19
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-09-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: capybara
|
@@ -28,7 +28,7 @@ dependencies:
|
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
|
-
name: capybara-
|
31
|
+
name: capybara-mechanize
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
none: false
|
34
34
|
requirements:
|
@@ -148,6 +148,7 @@ extra_rdoc_files: []
|
|
148
148
|
files:
|
149
149
|
- lib/micro_spider.rb
|
150
150
|
- lib/spider_core/behavior.rb
|
151
|
+
- lib/spider_core/exceptions.rb
|
151
152
|
- lib/spider_core/field_dsl.rb
|
152
153
|
- lib/spider_core/follow_dsl.rb
|
153
154
|
- lib/spider_core/pagination_dsl.rb
|