dmoz_sax_doc 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/lib/dmoz_sax/alias.rb +0 -8
- data/lib/dmoz_sax/path.rb +18 -14
- data/lib/dmoz_sax/version.rb +1 -1
- data/spec/alias_spec.rb +5 -2
- data/spec/content_document_spec.rb +0 -5
- data/spec/path_spec.rb +6 -6
- data/spec/structure_document_spec.rb +5 -3
- metadata +2 -2
data/Gemfile.lock
CHANGED
data/lib/dmoz_sax/alias.rb
CHANGED
data/lib/dmoz_sax/path.rb
CHANGED
@@ -9,30 +9,34 @@ module DmozSax
|
|
9
9
|
resource = str.gsub('_', ' ').split(':')
|
10
10
|
|
11
11
|
@name = resource.first if resource.length == 2
|
12
|
+
@level = level.to_i
|
12
13
|
|
13
14
|
unless resource.empty?
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
@path = []
|
15
|
+
super(resource.last.split('/'))
|
16
|
+
else
|
17
|
+
super([])
|
18
18
|
end
|
19
|
-
@level = level.to_i
|
20
|
-
super(@path.freeze)
|
21
19
|
end
|
22
20
|
|
23
|
-
def
|
24
|
-
|
21
|
+
def to_s
|
22
|
+
DmozSax::Path.path_str(DmozSax::Path.normalize(self))
|
25
23
|
end
|
26
24
|
|
27
|
-
def
|
28
|
-
|
25
|
+
def parent_to_s
|
26
|
+
DmozSax::Path.path_str(DmozSax::Path.normalize(self[0...-1]))
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.path_str arr
|
30
|
+
arr.empty? ? '/' : "/#{ arr.join('/') }"
|
29
31
|
end
|
30
32
|
|
31
|
-
def
|
32
|
-
if
|
33
|
-
|
33
|
+
def self.normalize arr
|
34
|
+
if arr.nil? or arr.length == 0
|
35
|
+
[]
|
34
36
|
else
|
35
|
-
|
37
|
+
dup = arr.dup.reject {|a| a =~ /^[A-Z0-9]$/}
|
38
|
+
dup.shift if arr.first == 'Top'
|
39
|
+
dup
|
36
40
|
end
|
37
41
|
end
|
38
42
|
end
|
data/lib/dmoz_sax/version.rb
CHANGED
data/spec/alias_spec.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe DmozSax::Alias do
|
4
|
-
|
5
|
-
|
4
|
+
it "has a name and path parsed from a string" do
|
5
|
+
a = DmozSax::Alias.new("Publishing:Top/Arts/Business/Publishing")
|
6
|
+
a.path.name.should == 'Publishing'
|
7
|
+
a.path.to_s.should == '/Arts/Business/Publishing'
|
8
|
+
end
|
6
9
|
end
|
@@ -1,11 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe DmozSax::ContentDocument do
|
4
|
-
#it 'can parse a real content.rdf.u8 document' do
|
5
|
-
# parser = Nokogiri::XML::SAX::Parser.new(DmozSax::ContentDocument.new)
|
6
|
-
# parser.parse(File.open('/opt/data/DMOZ/content.rdf.u8'))
|
7
|
-
#end
|
8
|
-
|
9
4
|
it 'can parse a sample content.rdf.u8 document' do
|
10
5
|
|
11
6
|
topics = []
|
data/spec/path_spec.rb
CHANGED
@@ -10,14 +10,14 @@ describe DmozSax::Path do
|
|
10
10
|
|
11
11
|
(('A'..'Z').to_a + (0..9).to_a).each do |char|
|
12
12
|
path = DmozSax::Path.new("Top/This/Topic/#{ char }/Path")
|
13
|
-
path.
|
13
|
+
path.to_s.should == "/This/Topic/Path"
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
17
|
it "may optionally be preceeded by a name or identifier" do
|
18
18
|
path = DmozSax::Path.new("Sample_Directory:Top/This/Topic/Path")
|
19
19
|
path.name.should == 'Sample Directory'
|
20
|
-
path.
|
20
|
+
path.to_s.should == '/This/Topic/Path'
|
21
21
|
end
|
22
22
|
|
23
23
|
context "as an immutable array" do
|
@@ -41,20 +41,20 @@ describe DmozSax::Path do
|
|
41
41
|
context "getting parent path" do
|
42
42
|
it "returns parent path" do
|
43
43
|
path = DmozSax::Path.new 'This/Topic/Path'
|
44
|
-
path.
|
44
|
+
path.parent_to_s.should == '/This/Topic'
|
45
45
|
end
|
46
46
|
|
47
47
|
it "has not parent path if top level path" do
|
48
48
|
path = DmozSax::Path.new ''
|
49
|
-
path.
|
49
|
+
path.parent_to_s.should == '/'
|
50
50
|
|
51
51
|
path = DmozSax::Path.new 'Top'
|
52
|
-
path.
|
52
|
+
path.parent_to_s.should == '/'
|
53
53
|
end
|
54
54
|
|
55
55
|
it "follows the same rules removing index categories" do
|
56
56
|
path = DmozSax::Path.new 'Top/Topic/A/Path'
|
57
|
-
path.
|
57
|
+
path.parent_to_s.should == '/Topic'
|
58
58
|
end
|
59
59
|
end
|
60
60
|
end
|
@@ -20,12 +20,14 @@ describe DmozSax::StructureDocument do
|
|
20
20
|
parser.parse(File.open('spec/samples/structure_sample.rdf.u8'))
|
21
21
|
topics.count.should == 2
|
22
22
|
topics[1].title.should == 'Arts'
|
23
|
-
topics[1].path.should == ['Arts']
|
23
|
+
topics[1].path.should == ['Top', 'Arts']
|
24
24
|
topics[1].description.should include 'aesthetic objects'
|
25
25
|
topics[1].cid.should == 381773
|
26
26
|
|
27
27
|
aliases.count.should == 2
|
28
|
-
aliases[0].
|
29
|
-
aliases[0].path.should == ['Business','Publishing and Printing','Publishing','Books','Arts']
|
28
|
+
aliases[0].path.name.should == 'Publishers'
|
29
|
+
aliases[0].path.should == ['Top','Business','Publishing and Printing','Publishing','Books','Arts']
|
30
|
+
aliases[0].path.to_s.should == '/Business/Publishing and Printing/Publishing/Books/Arts'
|
31
|
+
aliases[0].path.parent_to_s.should == '/Business/Publishing and Printing/Publishing/Books'
|
30
32
|
end
|
31
33
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dmoz_sax_doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|