simplecrawler 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/simplecrawler.rb +2 -2
- data/tests/simplecrawler_test.rb +6 -0
- metadata +2 -2
data/lib/simplecrawler.rb
CHANGED
@@ -23,7 +23,7 @@ module SimpleCrawler
|
|
23
23
|
require File.dirname(__FILE__) + '/document'
|
24
24
|
|
25
25
|
MARKUP_MIME_TYPES = ["text/html", "text/xml", "application/xml", "application/xhtml+xml"]
|
26
|
-
VERSION = "0.1.
|
26
|
+
VERSION = "0.1.4"
|
27
27
|
|
28
28
|
class Crawler
|
29
29
|
|
@@ -103,7 +103,7 @@ module SimpleCrawler
|
|
103
103
|
doc = Document.new
|
104
104
|
begin
|
105
105
|
uri = @site_uri.clone
|
106
|
-
uri.path =
|
106
|
+
uri.path = path if path != "/"
|
107
107
|
doc.uri = uri
|
108
108
|
|
109
109
|
log("Trying #{uri}")
|
data/tests/simplecrawler_test.rb
CHANGED
@@ -12,8 +12,14 @@ class SimpleCrawlerTest < Test::Unit::TestCase
|
|
12
12
|
def test_initialize_crawler
|
13
13
|
@crawler = SimpleCrawler::Crawler.new("http://www.example.com/")
|
14
14
|
assert @crawler.queue.length == 1
|
15
|
+
assert_equal "/", @crawler.queue[0]
|
15
16
|
end
|
16
17
|
|
18
|
+
def test_initialize_crawler_with_path
|
19
|
+
@crawler = SimpleCrawler::Crawler.new("http://www.example.com/deep/down/in/hierarchy/")
|
20
|
+
assert @crawler.queue.length == 1
|
21
|
+
assert_equal "/deep/down/in/hierarchy/", @crawler.queue[0]
|
22
|
+
end
|
17
23
|
|
18
24
|
def test_initialize_crawler_without_uri_path
|
19
25
|
@crawler = SimpleCrawler::Crawler.new("http://www.example.com")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simplecrawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Krantz
|
@@ -9,7 +9,7 @@ autorequire: simplecrawler
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-09-17 00:00:00 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|