llt-tei_handler 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/lib/llt/tei_handler/pre_processor.rb +23 -2
- data/lib/llt/tei_handler/version.rb +1 -1
- data/spec/lib/llt/tei_handler/pre_processor_spec.rb +22 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e59ea02658a4ad35e4ad4621532d8af69be04fda
|
4
|
+
data.tar.gz: 531f99e7c1b0e1c91ba7eb85e289f16392f8e22b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de353304255b5b1a3da61cc27538716b01929688ac90eb98f5eb70267c0a17d2c1d222d6047ce82107534fdc1abb16292e310f3b6b7d85a3305eed9812a1d65d
|
7
|
+
data.tar.gz: c3c184ce65d6b735b98d18c3886ca9c3e17d0919756dd268ddc75c698488aa2bb93cbdd9a737110d76e01817be84d8f0bd5649ae3e0078f4e2bd9d2d9c2bd20e
|
data/Gemfile
CHANGED
@@ -5,7 +5,7 @@ module LLT
|
|
5
5
|
|
6
6
|
def initialize(document)
|
7
7
|
@document = Nokogiri::XML(document)
|
8
|
-
|
8
|
+
try_to_find_tei_root unless is_tei?
|
9
9
|
end
|
10
10
|
|
11
11
|
def to_xml
|
@@ -13,7 +13,7 @@ module LLT
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def is_tei?
|
16
|
-
@document.root.name
|
16
|
+
@document.root.name =~ /^TEI/
|
17
17
|
end
|
18
18
|
|
19
19
|
def ignore_nodes(*nodes)
|
@@ -33,6 +33,27 @@ module LLT
|
|
33
33
|
doc
|
34
34
|
end
|
35
35
|
end
|
36
|
+
|
37
|
+
def try_to_find_tei_root
|
38
|
+
tei = @document.xpath('//*[name() = "TEI" or name() = "TEI.2"]').first
|
39
|
+
if tei
|
40
|
+
if RUBY_ENGINE == "jruby"
|
41
|
+
# Pretty unnecessarily reparses the document fragment, but
|
42
|
+
# nokogiri-java seems to have problems with handling namespaces
|
43
|
+
# when assigning the root node of a document by hand.
|
44
|
+
#
|
45
|
+
# The issue has been reported on the nokogiri-talk Google group
|
46
|
+
# and is described in detail there (currently awaiting approval)
|
47
|
+
@document = Nokogiri::XML(tei.to_s)
|
48
|
+
else
|
49
|
+
@document = Nokogiri::XML::Document.new
|
50
|
+
@document.root = tei
|
51
|
+
end
|
52
|
+
@document.encoding = "UTF-8"
|
53
|
+
else
|
54
|
+
raise ArgumentError.new('Document is no TEI XML')
|
55
|
+
end
|
56
|
+
end
|
36
57
|
end
|
37
58
|
end
|
38
59
|
end
|
@@ -17,15 +17,36 @@ describe LLT::TeiHandler::PreProcessor do
|
|
17
17
|
expect { pre_processor.new(doc) }.not_to raise_error
|
18
18
|
end
|
19
19
|
|
20
|
+
it "tries to find the TEI root element" do
|
21
|
+
doc = <<-EOF
|
22
|
+
<?xml version="1.0" encoding="utf-8"?>
|
23
|
+
<reply>
|
24
|
+
<TEI xmlns="http://www.tei-c.org/ns/1.0">
|
25
|
+
<teiHeader></teiHeader>
|
26
|
+
</TEI>
|
27
|
+
</reply>
|
28
|
+
EOF
|
29
|
+
expect { pre_processor.new(doc) }.not_to raise_error
|
30
|
+
end
|
31
|
+
|
20
32
|
it "throws an error when the document is NOT TEI" do
|
21
33
|
doc = <<-EOF
|
22
34
|
<?xml version="1.0" encoding="utf-8"?>
|
23
35
|
<doc>
|
24
36
|
</doc>
|
25
37
|
EOF
|
26
|
-
|
27
38
|
expect { pre_processor.new(doc) }.to raise_error ArgumentError
|
28
39
|
end
|
40
|
+
|
41
|
+
it "allows different TEI versions" do
|
42
|
+
doc = <<-EOF
|
43
|
+
<?xml version="1.0" encoding="utf-8"?>
|
44
|
+
<TEI.2 xmlns="http://www.tei-c.org/ns/1.0">
|
45
|
+
</TEI.2>
|
46
|
+
EOF
|
47
|
+
expect { pre_processor.new(doc) }.not_to raise_error
|
48
|
+
|
49
|
+
end
|
29
50
|
end
|
30
51
|
|
31
52
|
let(:tei_doc) do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llt-tei_handler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- LFDM
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|