dmoz_sax_doc 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/lib/dmoz_sax/alias.rb +0 -8
- data/lib/dmoz_sax/path.rb +18 -14
- data/lib/dmoz_sax/version.rb +1 -1
- data/spec/alias_spec.rb +5 -2
- data/spec/content_document_spec.rb +0 -5
- data/spec/path_spec.rb +6 -6
- data/spec/structure_document_spec.rb +5 -3
- metadata +2 -2
data/Gemfile.lock
CHANGED
data/lib/dmoz_sax/alias.rb
CHANGED
data/lib/dmoz_sax/path.rb
CHANGED
@@ -9,30 +9,34 @@ module DmozSax
|
|
9
9
|
resource = str.gsub('_', ' ').split(':')
|
10
10
|
|
11
11
|
@name = resource.first if resource.length == 2
|
12
|
+
@level = level.to_i
|
12
13
|
|
13
14
|
unless resource.empty?
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
@path = []
|
15
|
+
super(resource.last.split('/'))
|
16
|
+
else
|
17
|
+
super([])
|
18
18
|
end
|
19
|
-
@level = level.to_i
|
20
|
-
super(@path.freeze)
|
21
19
|
end
|
22
20
|
|
23
|
-
def
|
24
|
-
|
21
|
+
def to_s
|
22
|
+
DmozSax::Path.path_str(DmozSax::Path.normalize(self))
|
25
23
|
end
|
26
24
|
|
27
|
-
def
|
28
|
-
|
25
|
+
def parent_to_s
|
26
|
+
DmozSax::Path.path_str(DmozSax::Path.normalize(self[0...-1]))
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.path_str arr
|
30
|
+
arr.empty? ? '/' : "/#{ arr.join('/') }"
|
29
31
|
end
|
30
32
|
|
31
|
-
def
|
32
|
-
if
|
33
|
-
|
33
|
+
def self.normalize arr
|
34
|
+
if arr.nil? or arr.length == 0
|
35
|
+
[]
|
34
36
|
else
|
35
|
-
|
37
|
+
dup = arr.dup.reject {|a| a =~ /^[A-Z0-9]$/}
|
38
|
+
dup.shift if arr.first == 'Top'
|
39
|
+
dup
|
36
40
|
end
|
37
41
|
end
|
38
42
|
end
|
data/lib/dmoz_sax/version.rb
CHANGED
data/spec/alias_spec.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe DmozSax::Alias do
|
4
|
-
|
5
|
-
|
4
|
+
it "has a name and path parsed from a string" do
|
5
|
+
a = DmozSax::Alias.new("Publishing:Top/Arts/Business/Publishing")
|
6
|
+
a.path.name.should == 'Publishing'
|
7
|
+
a.path.to_s.should == '/Arts/Business/Publishing'
|
8
|
+
end
|
6
9
|
end
|
@@ -1,11 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe DmozSax::ContentDocument do
|
4
|
-
#it 'can parse a real content.rdf.u8 document' do
|
5
|
-
# parser = Nokogiri::XML::SAX::Parser.new(DmozSax::ContentDocument.new)
|
6
|
-
# parser.parse(File.open('/opt/data/DMOZ/content.rdf.u8'))
|
7
|
-
#end
|
8
|
-
|
9
4
|
it 'can parse a sample content.rdf.u8 document' do
|
10
5
|
|
11
6
|
topics = []
|
data/spec/path_spec.rb
CHANGED
@@ -10,14 +10,14 @@ describe DmozSax::Path do
|
|
10
10
|
|
11
11
|
(('A'..'Z').to_a + (0..9).to_a).each do |char|
|
12
12
|
path = DmozSax::Path.new("Top/This/Topic/#{ char }/Path")
|
13
|
-
path.
|
13
|
+
path.to_s.should == "/This/Topic/Path"
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
17
|
it "may optionally be preceeded by a name or identifier" do
|
18
18
|
path = DmozSax::Path.new("Sample_Directory:Top/This/Topic/Path")
|
19
19
|
path.name.should == 'Sample Directory'
|
20
|
-
path.
|
20
|
+
path.to_s.should == '/This/Topic/Path'
|
21
21
|
end
|
22
22
|
|
23
23
|
context "as an immutable array" do
|
@@ -41,20 +41,20 @@ describe DmozSax::Path do
|
|
41
41
|
context "getting parent path" do
|
42
42
|
it "returns parent path" do
|
43
43
|
path = DmozSax::Path.new 'This/Topic/Path'
|
44
|
-
path.
|
44
|
+
path.parent_to_s.should == '/This/Topic'
|
45
45
|
end
|
46
46
|
|
47
47
|
it "has not parent path if top level path" do
|
48
48
|
path = DmozSax::Path.new ''
|
49
|
-
path.
|
49
|
+
path.parent_to_s.should == '/'
|
50
50
|
|
51
51
|
path = DmozSax::Path.new 'Top'
|
52
|
-
path.
|
52
|
+
path.parent_to_s.should == '/'
|
53
53
|
end
|
54
54
|
|
55
55
|
it "follows the same rules removing index categories" do
|
56
56
|
path = DmozSax::Path.new 'Top/Topic/A/Path'
|
57
|
-
path.
|
57
|
+
path.parent_to_s.should == '/Topic'
|
58
58
|
end
|
59
59
|
end
|
60
60
|
end
|
@@ -20,12 +20,14 @@ describe DmozSax::StructureDocument do
|
|
20
20
|
parser.parse(File.open('spec/samples/structure_sample.rdf.u8'))
|
21
21
|
topics.count.should == 2
|
22
22
|
topics[1].title.should == 'Arts'
|
23
|
-
topics[1].path.should == ['Arts']
|
23
|
+
topics[1].path.should == ['Top', 'Arts']
|
24
24
|
topics[1].description.should include 'aesthetic objects'
|
25
25
|
topics[1].cid.should == 381773
|
26
26
|
|
27
27
|
aliases.count.should == 2
|
28
|
-
aliases[0].
|
29
|
-
aliases[0].path.should == ['Business','Publishing and Printing','Publishing','Books','Arts']
|
28
|
+
aliases[0].path.name.should == 'Publishers'
|
29
|
+
aliases[0].path.should == ['Top','Business','Publishing and Printing','Publishing','Books','Arts']
|
30
|
+
aliases[0].path.to_s.should == '/Business/Publishing and Printing/Publishing/Books/Arts'
|
31
|
+
aliases[0].path.parent_to_s.should == '/Business/Publishing and Printing/Publishing/Books'
|
30
32
|
end
|
31
33
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dmoz_sax_doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|