spider 0.4.2 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +3 -0
- data/README +7 -1
- data/lib/spider/spider_instance.rb +11 -9
- data/spider.gemspec +1 -1
- metadata +2 -2
data/CHANGES
CHANGED
data/README
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
|
1
2
|
Spider, a Web spidering library for Ruby. It handles the robots.txt,
|
2
3
|
scraping, collecting, and looping so that you can just handle the data.
|
3
4
|
|
@@ -132,9 +133,14 @@ scraping, collecting, and looping so that you can just handle the data.
|
|
132
133
|
== Author
|
133
134
|
|
134
135
|
John Nagro john.nagro@gmail.com
|
136
|
+
|
135
137
|
Mike Burns http://mike-burns.com mike@mike-burns.com (original author)
|
136
138
|
|
137
|
-
|
139
|
+
Many thanks to:
|
140
|
+
Matt Horan
|
141
|
+
Henri Cook
|
142
|
+
Sander van der Vliet
|
143
|
+
John Buckley
|
138
144
|
|
139
145
|
With `robot_rules' from James Edward Gray II via
|
140
146
|
http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/177589
|
@@ -295,15 +295,17 @@ class SpiderInstance
|
|
295
295
|
def construct_complete_url(base_url, additional_url, parsed_additional_url = nil) #:nodoc:
|
296
296
|
parsed_additional_url ||= URI.parse(additional_url)
|
297
297
|
case parsed_additional_url.scheme
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
298
|
+
when nil
|
299
|
+
u = base_url.is_a?(URI) ? base_url : URI.parse(base_url)
|
300
|
+
if additional_url[0].chr == '/'
|
301
|
+
"#{u.scheme}://#{u.host}#{additional_url}"
|
302
|
+
elsif u.path.nil? || u.path == ''
|
303
|
+
"#{u.scheme}://#{u.host}/#{additional_url}"
|
304
|
+
elsif u.path[0].chr == '/'
|
305
|
+
"#{u.scheme}://#{u.host}#{u.path}/#{additional_url}"
|
306
|
+
else
|
307
|
+
"#{u.scheme}://#{u.host}/#{u.path}/#{additional_url}"
|
308
|
+
end
|
307
309
|
else
|
308
310
|
additional_url
|
309
311
|
end
|
data/spider.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Nagro
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-10-09 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|