spidr 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/README.txt +1 -1
- data/Rakefile +1 -1
- data/lib/spidr/agent.rb +0 -1
- data/lib/spidr/page.rb +11 -6
- data/lib/spidr/version.rb +1 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.txt
CHANGED
data/Rakefile
CHANGED
data/lib/spidr/agent.rb
CHANGED
data/lib/spidr/page.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'uri'
|
2
|
-
require '
|
2
|
+
require 'nokogiri'
|
3
3
|
|
4
4
|
module Spidr
|
5
5
|
class Page
|
@@ -185,12 +185,17 @@ module Spidr
|
|
185
185
|
end
|
186
186
|
|
187
187
|
#
|
188
|
-
#
|
189
|
-
#
|
188
|
+
# If the page has a <tt>text/html</tt> content-type, a
|
189
|
+
# Nokogiri::HTML::Document object will be returned. If the page has a
|
190
|
+
# <tt>text/xml</tt> content-type, a Nokogiri::XML::Document object
|
191
|
+
# will be returned. Other content-types will cause +nil+ to be
|
192
|
+
# returned.
|
190
193
|
#
|
191
194
|
def doc
|
192
195
|
if html?
|
193
|
-
return @doc ||=
|
196
|
+
return @doc ||= Nokogiri::HTML(body)
|
197
|
+
elsif xml?
|
198
|
+
return @doc ||= Nokogiri::XML(body)
|
194
199
|
end
|
195
200
|
end
|
196
201
|
|
@@ -201,8 +206,8 @@ module Spidr
|
|
201
206
|
urls = []
|
202
207
|
|
203
208
|
if html?
|
204
|
-
doc.search('a[@href]') do |a|
|
205
|
-
url = a.
|
209
|
+
self.doc.search('a[@href]').each do |a|
|
210
|
+
url = a.get_attribute('href')
|
206
211
|
|
207
212
|
urls << url unless url.empty?
|
208
213
|
end
|
data/lib/spidr/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Postmodern
|
@@ -9,11 +9,11 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-01-
|
12
|
+
date: 2009-01-15 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
16
|
+
name: nokogiri
|
17
17
|
type: :runtime
|
18
18
|
version_requirement:
|
19
19
|
version_requirements: !ruby/object:Gem::Requirement
|