llt-tei_handler 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/lib/llt/tei_handler/pre_processor.rb +23 -2
- data/lib/llt/tei_handler/version.rb +1 -1
- data/spec/lib/llt/tei_handler/pre_processor_spec.rb +22 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e59ea02658a4ad35e4ad4621532d8af69be04fda
|
4
|
+
data.tar.gz: 531f99e7c1b0e1c91ba7eb85e289f16392f8e22b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de353304255b5b1a3da61cc27538716b01929688ac90eb98f5eb70267c0a17d2c1d222d6047ce82107534fdc1abb16292e310f3b6b7d85a3305eed9812a1d65d
|
7
|
+
data.tar.gz: c3c184ce65d6b735b98d18c3886ca9c3e17d0919756dd268ddc75c698488aa2bb93cbdd9a737110d76e01817be84d8f0bd5649ae3e0078f4e2bd9d2d9c2bd20e
|
data/Gemfile
CHANGED
@@ -5,7 +5,7 @@ module LLT
|
|
5
5
|
|
6
6
|
def initialize(document)
|
7
7
|
@document = Nokogiri::XML(document)
|
8
|
-
|
8
|
+
try_to_find_tei_root unless is_tei?
|
9
9
|
end
|
10
10
|
|
11
11
|
def to_xml
|
@@ -13,7 +13,7 @@ module LLT
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def is_tei?
|
16
|
-
@document.root.name
|
16
|
+
@document.root.name =~ /^TEI/
|
17
17
|
end
|
18
18
|
|
19
19
|
def ignore_nodes(*nodes)
|
@@ -33,6 +33,27 @@ module LLT
|
|
33
33
|
doc
|
34
34
|
end
|
35
35
|
end
|
36
|
+
|
37
|
+
def try_to_find_tei_root
|
38
|
+
tei = @document.xpath('//*[name() = "TEI" or name() = "TEI.2"]').first
|
39
|
+
if tei
|
40
|
+
if RUBY_ENGINE == "jruby"
|
41
|
+
# Pretty unnecessarily reparses the document fragment, but
|
42
|
+
# nokogiri-java seems to have problems with handling namespaces
|
43
|
+
# when assigning the root node of a document by hand.
|
44
|
+
#
|
45
|
+
# The issue has been reported on the nokogiri-talk Google group
|
46
|
+
# and is described in detail there (currently awaiting approval)
|
47
|
+
@document = Nokogiri::XML(tei.to_s)
|
48
|
+
else
|
49
|
+
@document = Nokogiri::XML::Document.new
|
50
|
+
@document.root = tei
|
51
|
+
end
|
52
|
+
@document.encoding = "UTF-8"
|
53
|
+
else
|
54
|
+
raise ArgumentError.new('Document is no TEI XML')
|
55
|
+
end
|
56
|
+
end
|
36
57
|
end
|
37
58
|
end
|
38
59
|
end
|
@@ -17,15 +17,36 @@ describe LLT::TeiHandler::PreProcessor do
|
|
17
17
|
expect { pre_processor.new(doc) }.not_to raise_error
|
18
18
|
end
|
19
19
|
|
20
|
+
it "tries to find the TEI root element" do
|
21
|
+
doc = <<-EOF
|
22
|
+
<?xml version="1.0" encoding="utf-8"?>
|
23
|
+
<reply>
|
24
|
+
<TEI xmlns="http://www.tei-c.org/ns/1.0">
|
25
|
+
<teiHeader></teiHeader>
|
26
|
+
</TEI>
|
27
|
+
</reply>
|
28
|
+
EOF
|
29
|
+
expect { pre_processor.new(doc) }.not_to raise_error
|
30
|
+
end
|
31
|
+
|
20
32
|
it "throws an error when the document is NOT TEI" do
|
21
33
|
doc = <<-EOF
|
22
34
|
<?xml version="1.0" encoding="utf-8"?>
|
23
35
|
<doc>
|
24
36
|
</doc>
|
25
37
|
EOF
|
26
|
-
|
27
38
|
expect { pre_processor.new(doc) }.to raise_error ArgumentError
|
28
39
|
end
|
40
|
+
|
41
|
+
it "allows different TEI versions" do
|
42
|
+
doc = <<-EOF
|
43
|
+
<?xml version="1.0" encoding="utf-8"?>
|
44
|
+
<TEI.2 xmlns="http://www.tei-c.org/ns/1.0">
|
45
|
+
</TEI.2>
|
46
|
+
EOF
|
47
|
+
expect { pre_processor.new(doc) }.not_to raise_error
|
48
|
+
|
49
|
+
end
|
29
50
|
end
|
30
51
|
|
31
52
|
let(:tei_doc) do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llt-tei_handler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- LFDM
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|