spidr 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/README.txt +1 -1
- data/Rakefile +1 -1
- data/lib/spidr/agent.rb +0 -1
- data/lib/spidr/page.rb +11 -6
- data/lib/spidr/version.rb +1 -1
- metadata +3 -3
data/History.txt
CHANGED
data/README.txt
CHANGED
data/Rakefile
CHANGED
data/lib/spidr/agent.rb
CHANGED
data/lib/spidr/page.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'uri'
|
2
|
-
require '
|
2
|
+
require 'nokogiri'
|
3
3
|
|
4
4
|
module Spidr
|
5
5
|
class Page
|
@@ -185,12 +185,17 @@ module Spidr
|
|
185
185
|
end
|
186
186
|
|
187
187
|
#
|
188
|
-
#
|
189
|
-
#
|
188
|
+
# If the page has a <tt>text/html</tt> content-type, a
|
189
|
+
# Nokogiri::HTML::Document object will be returned. If the page has a
|
190
|
+
# <tt>text/xml</tt> content-type, a Nokogiri::XML::Document object
|
191
|
+
# will be returned. Other content-types will cause +nil+ to be
|
192
|
+
# returned.
|
190
193
|
#
|
191
194
|
def doc
|
192
195
|
if html?
|
193
|
-
return @doc ||=
|
196
|
+
return @doc ||= Nokogiri::HTML(body)
|
197
|
+
elsif xml?
|
198
|
+
return @doc ||= Nokogiri::XML(body)
|
194
199
|
end
|
195
200
|
end
|
196
201
|
|
@@ -201,8 +206,8 @@ module Spidr
|
|
201
206
|
urls = []
|
202
207
|
|
203
208
|
if html?
|
204
|
-
doc.search('a[@href]') do |a|
|
205
|
-
url = a.
|
209
|
+
self.doc.search('a[@href]').each do |a|
|
210
|
+
url = a.get_attribute('href')
|
206
211
|
|
207
212
|
urls << url unless url.empty?
|
208
213
|
end
|
data/lib/spidr/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Postmodern
|
@@ -9,11 +9,11 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-01-
|
12
|
+
date: 2009-01-15 00:00:00 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
16
|
+
name: nokogiri
|
17
17
|
type: :runtime
|
18
18
|
version_requirement:
|
19
19
|
version_requirements: !ruby/object:Gem::Requirement
|