llt-segmenter 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 201cbc98ca192041429641ef0e3c0fcf128a3654
4
- data.tar.gz: 1f61cb110d27d4b60427f16b1c7eb58576c67eb0
3
+ metadata.gz: f6c90686915f9e02706f88b650695a4b40aa6867
4
+ data.tar.gz: 67ed2ade8bb50c0419a82f18b40eeef031c7a243
5
5
  SHA512:
6
- metadata.gz: 76017f1fc143d0d6d190b341218c3272f71ad230a1c81496ab9813a94a285262f3c15e73ef73570fc8a5b5d07ceb726b7e2ad9e046f9c54a8d61576017c0aac8
7
- data.tar.gz: 0915e76fac1f68d23b9454c010fc5820b021457ff4b53cf8bb0bf98a05b73cffc21b888a0169d14d331a571812c5027e26e7dd56a07dc6936be7538160dff099
6
+ metadata.gz: e38adead709637f4520233ead966d000c332b8b356f4e2d827ae40b3ce31d6e2a0e6969762b57393bcb02c9a9b647e1e617a5bd81ab20712663ac525de7a5062
7
+ data.tar.gz: 50fedc9e52883f8458a437a6f6ff6252bbe6e1f6d78cdfb59d5b78873a27da3dfc40481a1008cf7278493196041cc42a5beaa40be2faef9071e5f36aad946769
data/Gemfile CHANGED
@@ -6,9 +6,9 @@ gem 'pry'
6
6
 
7
7
  gem 'coveralls', require: false
8
8
 
9
- gem 'llt-core', git: 'https://github.com/latin-language-toolkit/llt-core.git'
10
- gem 'llt-constants', git: 'https://github.com/latin-language-toolkit/llt-constants.git'
11
- gem 'llt-logger', git: 'https://github.com/latin-language-toolkit/llt-logger.git'
9
+ gem 'llt-core', git: 'git://github.com/latin-language-toolkit/llt-core.git'
10
+ gem 'llt-constants', git: 'git://github.com/latin-language-toolkit/llt-constants.git'
11
+ gem 'llt-logger', git: 'git://github.com/latin-language-toolkit/llt-logger.git'
12
12
 
13
13
  platform :jruby do
14
14
  gem 'jruby-httpclient'
@@ -5,6 +5,7 @@ require 'llt/core/api'
5
5
 
6
6
  class Api < Sinatra::Base
7
7
  register Sinatra::RespondWith
8
+ register LLT::Core::Api::VersionRoutes
8
9
  helpers LLT::Core::Api::Helpers
9
10
 
10
11
  get '/segment' do
@@ -17,4 +18,6 @@ class Api < Sinatra::Base
17
18
  f.xml { to_xml(sentences, params) }
18
19
  end
19
20
  end
21
+
22
+ add_version_route_for('/segment', dependencies: %i{ Core Segmenter })
20
23
  end
@@ -1,5 +1,5 @@
1
1
  module LLT
2
2
  class Segmenter
3
- VERSION = "0.0.4"
3
+ VERSION = "0.0.5"
4
4
  end
5
5
  end
@@ -0,0 +1,7 @@
1
+ module LLT
2
+ class Segmenter
3
+ class VersionInfo
4
+ include Core::Versioner
5
+ end
6
+ end
7
+ end
data/lib/llt/segmenter.rb CHANGED
@@ -2,6 +2,8 @@ require "llt/constants"
2
2
  require "llt/core"
3
3
  require "llt/logger"
4
4
  require "llt/sentence"
5
+ require "llt/segmenter/version"
6
+ require "llt/segmenter/version_info"
5
7
 
6
8
  module LLT
7
9
  class Segmenter
@@ -30,7 +32,7 @@ module LLT
30
32
  AWB = ALL_ABBRS_PIPED.split('|').map { |abbr| "(?<=\\s|^|>)#{abbr}" }.join('|')
31
33
  # the xml escaped characters cannot be refactored to something along
32
34
  # &(?:amp|quot); - it's an invalid pattern in the look-behind
33
- SENTENCE_CLOSER = /(?<!#{AWB})\.(?!\.)|[\?!:]|((?<!&amp|&quot|&apos|&lt|&gt);)/
35
+ SENTENCE_CLOSER = /(?<!#{AWB})\.(?!\.)|[\?!:·]|((?<!&amp|&quot|&apos|&lt|&gt);)/
34
36
  DIRECT_SPEECH_DELIMITER = /['"”]|&(?:apos|quot);/
35
37
  TRAILERS = /\)|\s*<\/.*?>/
36
38
 
@@ -51,8 +53,14 @@ module LLT
51
53
  @indexing = parse_option(:indexing, options)
52
54
  @id = 0 if @indexing
53
55
 
56
+ # newline_boundary is only active when we aren't working with xml!
54
57
  nl_boundary = parse_option(:newline_boundary, options)
55
- @sentence_closer = Regexp.union(SENTENCE_CLOSER, /\n{#{nl_boundary}}/)
58
+
59
+ @sentence_closer = build_sentence_closer_regexp(nl_boundary)
60
+ end
61
+
62
+ def build_sentence_closer_regexp(nl_boundary)
63
+ @xml ? SENTENCE_CLOSER : Regexp.union(SENTENCE_CLOSER, /\n{#{nl_boundary}}/)
56
64
  end
57
65
 
58
66
  # Used to normalized wonky whitespace in front of or behind direct speech
@@ -115,9 +123,16 @@ module LLT
115
123
 
116
124
  def scan_through_string(scanner, sentences = [])
117
125
  while scanner.rest?
126
+ loop_guard = scanner.pos
127
+
118
128
  sentence = scan_until_next_sentence(scanner, sentences)
119
129
 
120
- rebuild_xml_tags(scanner, sentence, sentences) if @xml
130
+ raise if scanner.pos == loop_guard
131
+
132
+ if @xml
133
+ rebuild_xml_tags(scanner, sentence, sentences)
134
+ take_all_closing_tags(scanner, sentence)
135
+ end
121
136
  sentence << trailing_delimiters(scanner)
122
137
 
123
138
  sentence.strip!
@@ -130,6 +145,10 @@ module LLT
130
145
  sentences
131
146
  end
132
147
 
148
+ def scan_to_first_real_text(scanner)
149
+ scanner.scan_until(/<.*?>\s*(?=\w)/)
150
+ end
151
+
133
152
  def scan_until_next_sentence(scanner, sentences)
134
153
  scanner.scan_until(@sentence_closer) ||
135
154
  rescue_no_delimiters(sentences, scanner)
@@ -160,6 +179,18 @@ module LLT
160
179
  ! sentence.match(/#{@sentence_closer}\s*<.*?>$/)
161
180
  end
162
181
 
182
+ def take_all_closing_tags(scanner, sentence)
183
+ if closing_tags_only?(scanner.rest)
184
+ sentence << scanner.rest
185
+ scanner.terminate
186
+ end
187
+ end
188
+
189
+ def closing_tags_only?(str)
190
+ str.match(/\A(\s*<\/.*?>\s*|\s*<.*?\/>\s*)+\z/)
191
+ end
192
+
193
+
163
194
  def rescue_no_delimiters(sentences, scanner)
164
195
  if sentences.any?
165
196
  # broken off texts
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
22
  spec.add_development_dependency "rake"
23
- spec.add_development_dependency "rspec"
23
+ spec.add_development_dependency "rspec", "~> 2.14"
24
24
  spec.add_development_dependency "simplecov", "~> 0.7"
25
25
  spec.add_dependency "llt-core"
26
26
  spec.add_dependency "llt-constants"
@@ -0,0 +1,203 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-model href="http://www.stoa.org/epidoc/schema/latest/tei-epidoc.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
3
+ <TEI xmlns="http://www.tei-c.org/ns/1.0">
4
+ <text xml:lang="lat">
5
+ <body>
6
+ <div type="edition" subtype="poesis-elegia" n="urn:cts:croala:petrov02.eleg01.lat1">
7
+ <lg met="elegiacum">
8
+ <l n="1">Conjugis ut carae patrio mens debita caelo</l>
9
+
10
+ <l n="2"> Pars melior, miseram laeta reliquit humum:</l>
11
+
12
+ <l n="3">Postquam, illa rapta, simul omnis rapta voluptas</l>
13
+
14
+ <l n="4"> Est mihi (jam ex illo tempore mensis abit)</l>
15
+
16
+ <l n="5">Non nisi perpetuos fundunt mea lumina fletus;</l>
17
+
18
+ <l n="6">Lux sequitur noctem tristis, et umbra diem.</l>
19
+
20
+ <l n="7">Pabula sunt <sic>lacrymae</sic>, <sic>lacrymae</sic> sunt pocula, sed quae</l>
21
+
22
+ <l n="8"> Plus, Medea, tuo gramine fellis habent.</l>
23
+
24
+ <l n="9">Sic me qui pascit, simul enecat humor, et hospes</l>
25
+
26
+ <l n="10"> Ipse mei cordis, cor mihi luctus edit.</l>
27
+
28
+ <l n="11">Ac veluti turtur, sociam cui barbarus auceps</l>
29
+
30
+ <l n="12"> Exceptam structis perdidit insidiis,</l>
31
+
32
+ <l n="13">Si quam forte videt sine vite, et frondibus ulmum,</l>
33
+
34
+ <l n="14"> Aequalem sorti, consimilemque suae,</l>
35
+
36
+ <l n="15">Flectit iter, ramoque sedens miserabilis ales</l>
37
+
38
+ <l n="16"> Gutture subrauco nil nisi triste gemit.</l>
39
+
40
+ <l n="17">Non illum exhilarat facies pulcherrima Veris,</l>
41
+
42
+ <l n="18"> Nulla sibi in notis pabula quaerit agris:</l>
43
+
44
+ <l n="19">Non sociae possunt volucres abducere ramo,</l>
45
+
46
+ <l n="20"> Ad prope labentes non sitis urget aquas.</l>
47
+
48
+ <l n="21">Sic ego. Sic vitam sine te, dulcissima conjux,</l>
49
+
50
+ <l n="22"> Si vitae haec nomen vita meretur, ago.</l>
51
+
52
+ <l n="23">Sola queri misero, sola est mihi flere voluptas,</l>
53
+
54
+ <l n="24"> Sola loci facies maesta, silensque placet.</l>
55
+
56
+ <l n="25">Non aures cantus, non fila loquacia mulcent;</l>
57
+
58
+ <l n="26"> Non oculos formae gratia, flosque rapit.</l>
59
+
60
+ <l n="27">Unam te in sylvis, unam in florentibus hortis,</l>
61
+
62
+ <l n="28">Per juga, per valles quaero, nec invenio.</l>
63
+
64
+ <l n="29">Nec magis Eurydice est Vati quaesita marito,</l>
65
+
66
+ <l n="30"> Tartareum quamvis viderit ille canem:</l>
67
+
68
+ <l n="31">Nec magis est Cephalo Procris defleta, videnti</l>
69
+
70
+ <l n="32"> Deceptae errorem, flagitiumque manus;</l>
71
+
72
+ <l n="33">Quam totas ego te noctes, mea vita, diesque</l>
73
+
74
+ <l n="34"> Quaero, nec inventam maestus abesse queror.</l>
75
+
76
+ <l n="35">Et tamen ante oculos errat tua semper imago,</l>
77
+
78
+ <l n="36"> (Quid non fingit amans?) et tua verba sonant.</l>
79
+
80
+ <l n="37">Si qua avis in densis, Siren innoxia, lucis</l>
81
+
82
+ <l n="38"> Est audita mihi fundere dulce melos,</l>
83
+
84
+ <l n="39">Sisto gradum, et similis deceptus imagine vocis</l>
85
+
86
+ <l n="40"> Est, inquam, est cantus conjugis ille meae.</l>
87
+
88
+ <l n="41">Si quando ad fontes, aut ad vernantia prata,</l>
89
+
90
+ <l n="42"> Aut maris ad placidas me tulit error aquas,</l>
91
+
92
+ <l n="43">Hic locus est, dico, quem visere saepe solebat.</l>
93
+
94
+ <l n="44"> Quae mora (jam sol est ortus) abesse facit?</l>
95
+
96
+ <l n="45">Sed jam jam veniet; latet illa forte sub umbra,</l>
97
+
98
+ <l n="46"> Aut illi pietas est sua causa morae.</l>
99
+
100
+ <l n="47">Causa morae est certe pietas: nisi fallimur, haec est,</l>
101
+
102
+ <l n="48"> Fundentem ad superos quae videt hora preces.</l>
103
+
104
+ <l n="49">Mox sat ut illusum me liquit amabilis error,</l>
105
+
106
+ <l n="50"> Protinus ex oculis bina fluenta cadunt.</l>
107
+
108
+ <l n="51">Bina fluenta cadunt, quorum hinc dolor elicit unum,</l>
109
+
110
+ <l n="52"> Inde aliud, tanti causa doloris, amor.</l>
111
+
112
+ <l n="53">Meque ipsum incuso, quod sim tam stultus, et amens,</l>
113
+
114
+ <l n="54"> Et pascam aerumnas crudelitate meas:</l>
115
+
116
+ <l n="55">Rursus in errores tamen hos delabor, et hujus</l>
117
+
118
+ <l n="56"> Erroris rursus paenitet esse reum.</l>
119
+
120
+ <l n="57">Sic pugnant mea vota meis contraria votis,</l>
121
+
122
+ <l n="58"> Nec placet, heu! misero quod modo dulce fuit.</l>
123
+
124
+ <l n="59">Nec quod sim discors, angit modo; saevius angit</l>
125
+
126
+ <l n="60"> Vivere me longos te sine posse dies.</l>
127
+
128
+ <l n="61">Ah! ubi sunt voces illae, et mea fortia verba?</l>
129
+
130
+ <l n="62"> Ah! ubi, quae verbis debet inesse, fides?</l>
131
+
132
+ <l n="63">Me quoque rapturam subito, quae te hora tulisset,</l>
133
+
134
+ <l n="64"> Et pariter praedam mortis utrumque fore?</l>
135
+
136
+ <l n="65">Ecce tamen vivo, nec post nova cornua Phoebes</l>
137
+
138
+ <l n="66"> Vis me maeroris perdere longa potest.</l>
139
+
140
+ <l n="67">Heu! quae dura silex, quod inexsuperabile robur,</l>
141
+
142
+ <l n="68"> Quod ferrum, et triplex aes mihi pectus obit?</l>
143
+
144
+ <l n="69">Vivo equidem, vivo, sed morte est tristior ipsa,</l>
145
+
146
+ <l n="70"> Quae sine te, conjux, vita relicta mihi est.</l>
147
+
148
+ <l n="71">At tu nunc choreis Natorum immixta tuorum,</l>
149
+
150
+ <l n="72"> Qui (prona) facili ad Superos te praeiere (via) gradu,</l>
151
+
152
+ <l n="73">Plena Deo frueris, nec, quae tibi parta, bonorum</l>
153
+
154
+ <l n="74"> Amittendorum te timor ullus habet.</l>
155
+
156
+ <l n="75">Nam tua non tristes pietas te duxit in oras:</l>
157
+
158
+ <l n="76"> Debetur sedes non nisi laeta piis.</l>
159
+
160
+ <l n="77">Te plaga (credo equidem) summi plaga lucida caeli,</l>
161
+
162
+ <l n="78"> Te laeta aeterno vere vireta tenent.</l>
163
+
164
+ <l n="79">Ipsum ipsum Auctorem rerum, quem qui videt, ultra</l>
165
+
166
+ <l n="80"> Nil habet optandum, jam sine nube vides.</l>
167
+
168
+ <l n="81">Usque et ubique vides, at non saturata videndo</l>
169
+
170
+ <l n="82"> Illo oculos pascis; pressa sed usque fame es.</l>
171
+
172
+ <l n="83">Te vis implet opum, sed non (licet impleat) explet;</l>
173
+
174
+ <l n="84"> Excipit unum aliud, subsequiturque bonum.</l>
175
+
176
+ <l n="85">Non te humiles curae, non te mortalia tangunt;</l>
177
+
178
+ <l n="86"> Prae caelo, et stellis quam tibi sordet humus!</l>
179
+
180
+ <l n="87">Sordet humus certe. non sic tamen, ut tua nunquam</l>
181
+
182
+ <l n="88"> Ad miserum flectas lumina blanda virum;</l>
183
+
184
+ <l n="89">Audire aut flentem fugias, et saucia flentis,</l>
185
+
186
+ <l n="90"> Qua licet, admota corda fovere manu;</l>
187
+
188
+ <l n="91">Iactatumque diu ventisque undisque vocare</l>
189
+
190
+ <l n="92"> Ad laeta Eridani littora stelliferi.</l>
191
+
192
+ <l n="93">Quam tua sors felix, quam nostra simillima morti est,</l>
193
+
194
+ <l n="94"> Felle ego, tu Divum vesceris ambrosia.</l>
195
+
196
+ <l n="95">Non tamen invideo tua gaudia, sed miser opto,</l>
197
+
198
+ <l n="96"> Laetitiae consors quam prius esse tuae.</l>
199
+ </lg>
200
+ </div>
201
+ </body>
202
+ </text>
203
+ </TEI>
@@ -0,0 +1,204 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-model href="http://www.stoa.org/epidoc/schema/latest/tei-epidoc.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
3
+ <TEI xmlns="http://www.tei-c.org/ns/1.0">
4
+ <text xml:lang="lat">
5
+ <body>
6
+ <div type="edition" subtype="poesis-elegia" n="urn:cts:croala:petrov02.eleg01.lat1">
7
+ <lg met="elegiacum">
8
+ <l n="1">Conjugis ut carae patrio mens debita caelo</l>
9
+
10
+ <l n="2"> Pars melior, miseram laeta reliquit humum:</l>
11
+
12
+ <l n="3">Postquam, illa rapta, simul omnis rapta voluptas</l>
13
+
14
+ <l n="4"> Est mihi (jam ex illo tempore mensis abit)</l>
15
+
16
+ <l n="5">Non nisi perpetuos fundunt mea lumina fletus;</l>
17
+
18
+ <l n="6">Lux sequitur noctem tristis, et umbra diem.</l>
19
+
20
+ <l n="7">Pabula sunt <sic>lacrymae</sic>, <sic>lacrymae</sic> sunt pocula, sed
21
+ quae</l>
22
+
23
+ <l n="8"> Plus, Medea, tuo gramine fellis habent.</l>
24
+
25
+ <l n="9">Sic me qui pascit, simul enecat humor, et hospes</l>
26
+
27
+ <l n="10"> Ipse mei cordis, cor mihi luctus edit.</l>
28
+
29
+ <l n="11">Ac veluti turtur, sociam cui barbarus auceps</l>
30
+
31
+ <l n="12"> Exceptam structis perdidit insidiis,</l>
32
+
33
+ <l n="13">Si quam forte videt sine vite, et frondibus ulmum,</l>
34
+
35
+ <l n="14"> Aequalem sorti, consimilemque suae,</l>
36
+
37
+ <l n="15">Flectit iter, ramoque sedens miserabilis ales</l>
38
+
39
+ <l n="16"> Gutture subrauco nil nisi triste gemit.</l>
40
+
41
+ <l n="17">Non illum exhilarat facies pulcherrima Veris,</l>
42
+
43
+ <l n="18"> Nulla sibi in notis pabula quaerit agris:</l>
44
+
45
+ <l n="19">Non sociae possunt volucres abducere ramo,</l>
46
+
47
+ <l n="20"> Ad prope labentes non sitis urget aquas.</l>
48
+
49
+ <l n="21">Sic ego. Sic vitam sine te, dulcissima conjux,</l>
50
+
51
+ <l n="22"> Si vitae haec nomen vita meretur, ago.</l>
52
+
53
+ <l n="23">Sola queri misero, sola est mihi flere voluptas,</l>
54
+
55
+ <l n="24"> Sola loci facies maesta, silensque placet.</l>
56
+
57
+ <l n="25">Non aures cantus, non fila loquacia mulcent;</l>
58
+
59
+ <l n="26"> Non oculos formae gratia, flosque rapit.</l>
60
+
61
+ <l n="27">Unam te in sylvis, unam in florentibus hortis,</l>
62
+
63
+ <l n="28">Per juga, per valles quaero, nec invenio.</l>
64
+
65
+ <l n="29">Nec magis Eurydice est Vati quaesita marito,</l>
66
+
67
+ <l n="30"> Tartareum quamvis viderit ille canem:</l>
68
+
69
+ <l n="31">Nec magis est Cephalo Procris defleta, videnti</l>
70
+
71
+ <l n="32"> Deceptae errorem, flagitiumque manus;</l>
72
+
73
+ <l n="33">Quam totas ego te noctes, mea vita, diesque</l>
74
+
75
+ <l n="34"> Quaero, nec inventam maestus abesse queror.</l>
76
+
77
+ <l n="35">Et tamen ante oculos errat tua semper imago,</l>
78
+
79
+ <l n="36"> (Quid non fingit amans?) et tua verba sonant.</l>
80
+
81
+ <l n="37">Si qua avis in densis, Siren innoxia, lucis</l>
82
+
83
+ <l n="38"> Est audita mihi fundere dulce melos,</l>
84
+
85
+ <l n="39">Sisto gradum, et similis deceptus imagine vocis</l>
86
+
87
+ <l n="40"> Est, inquam, est cantus conjugis ille meae.</l>
88
+
89
+ <l n="41">Si quando ad fontes, aut ad vernantia prata,</l>
90
+
91
+ <l n="42"> Aut maris ad placidas me tulit error aquas,</l>
92
+
93
+ <l n="43">Hic locus est, dico, quem visere saepe solebat.</l>
94
+
95
+ <l n="44"> Quae mora (jam sol est ortus) abesse facit?</l>
96
+
97
+ <l n="45">Sed jam jam veniet; latet illa forte sub umbra,</l>
98
+
99
+ <l n="46"> Aut illi pietas est sua causa morae.</l>
100
+
101
+ <l n="47">Causa morae est certe pietas: nisi fallimur, haec est,</l>
102
+
103
+ <l n="48"> Fundentem ad superos quae videt hora preces.</l>
104
+
105
+ <l n="49">Mox sat ut illusum me liquit amabilis error,</l>
106
+
107
+ <l n="50"> Protinus ex oculis bina fluenta cadunt.</l>
108
+
109
+ <l n="51">Bina fluenta cadunt, quorum hinc dolor elicit unum,</l>
110
+
111
+ <l n="52"> Inde aliud, tanti causa doloris, amor.</l>
112
+
113
+ <l n="53">Meque ipsum incuso, quod sim tam stultus, et amens,</l>
114
+
115
+ <l n="54"> Et pascam aerumnas crudelitate meas:</l>
116
+
117
+ <l n="55">Rursus in errores tamen hos delabor, et hujus</l>
118
+
119
+ <l n="56"> Erroris rursus paenitet esse reum.</l>
120
+
121
+ <l n="57">Sic pugnant mea vota meis contraria votis,</l>
122
+
123
+ <l n="58"> Nec placet, heu! misero quod modo dulce fuit.</l>
124
+
125
+ <l n="59">Nec quod sim discors, angit modo; saevius angit</l>
126
+
127
+ <l n="60"> Vivere me longos te sine posse dies.</l>
128
+
129
+ <l n="61">Ah! ubi sunt voces illae, et mea fortia verba?</l>
130
+
131
+ <l n="62"> Ah! ubi, quae verbis debet inesse, fides?</l>
132
+
133
+ <l n="63">Me quoque rapturam subito, quae te hora tulisset,</l>
134
+
135
+ <l n="64"> Et pariter praedam mortis utrumque fore?</l>
136
+
137
+ <l n="65">Ecce tamen vivo, nec post nova cornua Phoebes</l>
138
+
139
+ <l n="66"> Vis me maeroris perdere longa potest.</l>
140
+
141
+ <l n="67">Heu! quae dura silex, quod inexsuperabile robur,</l>
142
+
143
+ <l n="68"> Quod ferrum, et triplex aes mihi pectus obit?</l>
144
+
145
+ <l n="69">Vivo equidem, vivo, sed morte est tristior ipsa,</l>
146
+
147
+ <l n="70"> Quae sine te, conjux, vita relicta mihi est.</l>
148
+
149
+ <l n="71">At tu nunc choreis Natorum immixta tuorum,</l>
150
+
151
+ <l n="72"> Qui (prona) facili ad Superos te praeiere (via) gradu,</l>
152
+
153
+ <l n="73">Plena Deo frueris, nec, quae tibi parta, bonorum</l>
154
+
155
+ <l n="74"> Amittendorum te timor ullus habet.</l>
156
+
157
+ <l n="75">Nam tua non tristes pietas te duxit in oras:</l>
158
+
159
+ <l n="76"> Debetur sedes non nisi laeta piis.</l>
160
+
161
+ <l n="77">Te plaga (credo equidem) summi plaga lucida caeli,</l>
162
+
163
+ <l n="78"> Te laeta aeterno vere vireta tenent.</l>
164
+
165
+ <l n="79">Ipsum ipsum Auctorem rerum, quem qui videt, ultra</l>
166
+
167
+ <l n="80"> Nil habet optandum, jam sine nube vides.</l>
168
+
169
+ <l n="81">Usque et ubique vides, at non saturata videndo</l>
170
+
171
+ <l n="82"> Illo oculos pascis; pressa sed usque fame es.</l>
172
+
173
+ <l n="83">Te vis implet opum, sed non (licet impleat) explet;</l>
174
+
175
+ <l n="84"> Excipit unum aliud, subsequiturque bonum.</l>
176
+
177
+ <l n="85">Non te humiles curae, non te mortalia tangunt;</l>
178
+
179
+ <l n="86"> Prae caelo, et stellis quam tibi sordet humus!</l>
180
+
181
+ <l n="87">Sordet humus certe. non sic tamen, ut tua nunquam</l>
182
+
183
+ <l n="88"> Ad miserum flectas lumina blanda virum;</l>
184
+
185
+ <l n="89">Audire aut flentem fugias, et saucia flentis,</l>
186
+
187
+ <l n="90"> Qua licet, admota corda fovere manu;</l>
188
+
189
+ <l n="91">Iactatumque diu ventisque undisque vocare</l>
190
+
191
+ <l n="92"> Ad laeta Eridani littora stelliferi.</l>
192
+
193
+ <l n="93">Quam tua sors felix, quam nostra simillima morti est,</l>
194
+
195
+ <l n="94"> Felle ego, tu Divum vesceris ambrosia.</l>
196
+
197
+ <l n="95">Non tamen invideo tua gaudia, sed miser opto,</l>
198
+
199
+ <l n="96"> Laetitiae consors quam prius esse tuae.</l>
200
+ </lg>
201
+ </div>
202
+ </body>
203
+ </text>
204
+ </TEI>
@@ -0,0 +1,202 @@
1
+ <TEI xmlns="http://www.tei-c.org/ns/1.0">
2
+ <text xml:lang="lat">
3
+ <body>
4
+ <div type="edition" subtype="poesis-elegia" n="urn:cts:croala:petrov02.eleg01.lat1">
5
+ <lg met="elegiacum">
6
+ <l n="1">Conjugis ut carae patrio mens debita caelo</l>
7
+
8
+ <l n="2"> Pars melior, miseram laeta reliquit humum:</l>
9
+
10
+ <l n="3">Postquam, illa rapta, simul omnis rapta voluptas</l>
11
+
12
+ <l n="4"> Est mihi (jam ex illo tempore mensis abit)</l>
13
+
14
+ <l n="5">Non nisi perpetuos fundunt mea lumina fletus;</l>
15
+
16
+ <l n="6">Lux sequitur noctem tristis, et umbra diem.</l>
17
+
18
+ <l n="7">Pabula sunt <sic>lacrymae</sic>, <sic>lacrymae</sic> sunt pocula, sed
19
+ quae</l>
20
+
21
+ <l n="8"> Plus, Medea, tuo gramine fellis habent.</l>
22
+
23
+ <l n="9">Sic me qui pascit, simul enecat humor, et hospes</l>
24
+
25
+ <l n="10"> Ipse mei cordis, cor mihi luctus edit.</l>
26
+
27
+ <l n="11">Ac veluti turtur, sociam cui barbarus auceps</l>
28
+
29
+ <l n="12"> Exceptam structis perdidit insidiis,</l>
30
+
31
+ <l n="13">Si quam forte videt sine vite, et frondibus ulmum,</l>
32
+
33
+ <l n="14"> Aequalem sorti, consimilemque suae,</l>
34
+
35
+ <l n="15">Flectit iter, ramoque sedens miserabilis ales</l>
36
+
37
+ <l n="16"> Gutture subrauco nil nisi triste gemit.</l>
38
+
39
+ <l n="17">Non illum exhilarat facies pulcherrima Veris,</l>
40
+
41
+ <l n="18"> Nulla sibi in notis pabula quaerit agris:</l>
42
+
43
+ <l n="19">Non sociae possunt volucres abducere ramo,</l>
44
+
45
+ <l n="20"> Ad prope labentes non sitis urget aquas.</l>
46
+
47
+ <l n="21">Sic ego. Sic vitam sine te, dulcissima conjux,</l>
48
+
49
+ <l n="22"> Si vitae haec nomen vita meretur, ago.</l>
50
+
51
+ <l n="23">Sola queri misero, sola est mihi flere voluptas,</l>
52
+
53
+ <l n="24"> Sola loci facies maesta, silensque placet.</l>
54
+
55
+ <l n="25">Non aures cantus, non fila loquacia mulcent;</l>
56
+
57
+ <l n="26"> Non oculos formae gratia, flosque rapit.</l>
58
+
59
+ <l n="27">Unam te in sylvis, unam in florentibus hortis,</l>
60
+
61
+ <l n="28">Per juga, per valles quaero, nec invenio.</l>
62
+
63
+ <l n="29">Nec magis Eurydice est Vati quaesita marito,</l>
64
+
65
+ <l n="30"> Tartareum quamvis viderit ille canem:</l>
66
+
67
+ <l n="31">Nec magis est Cephalo Procris defleta, videnti</l>
68
+
69
+ <l n="32"> Deceptae errorem, flagitiumque manus;</l>
70
+
71
+ <l n="33">Quam totas ego te noctes, mea vita, diesque</l>
72
+
73
+ <l n="34"> Quaero, nec inventam maestus abesse queror.</l>
74
+
75
+ <l n="35">Et tamen ante oculos errat tua semper imago,</l>
76
+
77
+ <l n="36"> (Quid non fingit amans?) et tua verba sonant.</l>
78
+
79
+ <l n="37">Si qua avis in densis, Siren innoxia, lucis</l>
80
+
81
+ <l n="38"> Est audita mihi fundere dulce melos,</l>
82
+
83
+ <l n="39">Sisto gradum, et similis deceptus imagine vocis</l>
84
+
85
+ <l n="40"> Est, inquam, est cantus conjugis ille meae.</l>
86
+
87
+ <l n="41">Si quando ad fontes, aut ad vernantia prata,</l>
88
+
89
+ <l n="42"> Aut maris ad placidas me tulit error aquas,</l>
90
+
91
+ <l n="43">Hic locus est, dico, quem visere saepe solebat.</l>
92
+
93
+ <l n="44"> Quae mora (jam sol est ortus) abesse facit?</l>
94
+
95
+ <l n="45">Sed jam jam veniet; latet illa forte sub umbra,</l>
96
+
97
+ <l n="46"> Aut illi pietas est sua causa morae.</l>
98
+
99
+ <l n="47">Causa morae est certe pietas: nisi fallimur, haec est,</l>
100
+
101
+ <l n="48"> Fundentem ad superos quae videt hora preces.</l>
102
+
103
+ <l n="49">Mox sat ut illusum me liquit amabilis error,</l>
104
+
105
+ <l n="50"> Protinus ex oculis bina fluenta cadunt.</l>
106
+
107
+ <l n="51">Bina fluenta cadunt, quorum hinc dolor elicit unum,</l>
108
+
109
+ <l n="52"> Inde aliud, tanti causa doloris, amor.</l>
110
+
111
+ <l n="53">Meque ipsum incuso, quod sim tam stultus, et amens,</l>
112
+
113
+ <l n="54"> Et pascam aerumnas crudelitate meas:</l>
114
+
115
+ <l n="55">Rursus in errores tamen hos delabor, et hujus</l>
116
+
117
+ <l n="56"> Erroris rursus paenitet esse reum.</l>
118
+
119
+ <l n="57">Sic pugnant mea vota meis contraria votis,</l>
120
+
121
+ <l n="58"> Nec placet, heu! misero quod modo dulce fuit.</l>
122
+
123
+ <l n="59">Nec quod sim discors, angit modo; saevius angit</l>
124
+
125
+ <l n="60"> Vivere me longos te sine posse dies.</l>
126
+
127
+ <l n="61">Ah! ubi sunt voces illae, et mea fortia verba?</l>
128
+
129
+ <l n="62"> Ah! ubi, quae verbis debet inesse, fides?</l>
130
+
131
+ <l n="63">Me quoque rapturam subito, quae te hora tulisset,</l>
132
+
133
+ <l n="64"> Et pariter praedam mortis utrumque fore?</l>
134
+
135
+ <l n="65">Ecce tamen vivo, nec post nova cornua Phoebes</l>
136
+
137
+ <l n="66"> Vis me maeroris perdere longa potest.</l>
138
+
139
+ <l n="67">Heu! quae dura silex, quod inexsuperabile robur,</l>
140
+
141
+ <l n="68"> Quod ferrum, et triplex aes mihi pectus obit?</l>
142
+
143
+ <l n="69">Vivo equidem, vivo, sed morte est tristior ipsa,</l>
144
+
145
+ <l n="70"> Quae sine te, conjux, vita relicta mihi est.</l>
146
+
147
+ <l n="71">At tu nunc choreis Natorum immixta tuorum,</l>
148
+
149
+ <l n="72"> Qui (prona) facili ad Superos te praeiere (via) gradu,</l>
150
+
151
+ <l n="73">Plena Deo frueris, nec, quae tibi parta, bonorum</l>
152
+
153
+ <l n="74"> Amittendorum te timor ullus habet.</l>
154
+
155
+ <l n="75">Nam tua non tristes pietas te duxit in oras:</l>
156
+
157
+ <l n="76"> Debetur sedes non nisi laeta piis.</l>
158
+
159
+ <l n="77">Te plaga (credo equidem) summi plaga lucida caeli,</l>
160
+
161
+ <l n="78"> Te laeta aeterno vere vireta tenent.</l>
162
+
163
+ <l n="79">Ipsum ipsum Auctorem rerum, quem qui videt, ultra</l>
164
+
165
+ <l n="80"> Nil habet optandum, jam sine nube vides.</l>
166
+
167
+ <l n="81">Usque et ubique vides, at non saturata videndo</l>
168
+
169
+ <l n="82"> Illo oculos pascis; pressa sed usque fame es.</l>
170
+
171
+ <l n="83">Te vis implet opum, sed non (licet impleat) explet;</l>
172
+
173
+ <l n="84"> Excipit unum aliud, subsequiturque bonum.</l>
174
+
175
+ <l n="85">Non te humiles curae, non te mortalia tangunt;</l>
176
+
177
+ <l n="86"> Prae caelo, et stellis quam tibi sordet humus!</l>
178
+
179
+ <l n="87">Sordet humus certe. non sic tamen, ut tua nunquam</l>
180
+
181
+ <l n="88"> Ad miserum flectas lumina blanda virum;</l>
182
+
183
+ <l n="89">Audire aut flentem fugias, et saucia flentis,</l>
184
+
185
+ <l n="90"> Qua licet, admota corda fovere manu;</l>
186
+
187
+ <l n="91">Iactatumque diu ventisque undisque vocare</l>
188
+
189
+ <l n="92"> Ad laeta Eridani littora stelliferi.</l>
190
+
191
+ <l n="93">Quam tua sors felix, quam nostra simillima morti est,</l>
192
+
193
+ <l n="94"> Felle ego, tu Divum vesceris ambrosia.</l>
194
+
195
+ <l n="95">Non tamen invideo tua gaudia, sed miser opto,</l>
196
+
197
+ <l n="96"> Laetitiae consors quam prius esse tuae.</l>
198
+ </lg>
199
+ </div>
200
+ </body>
201
+ </text>
202
+ </TEI>
@@ -0,0 +1,255 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-model href="http://www.stoa.org/epidoc/schema/latest/tei-epidoc.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
3
+ <TEI xmlns="http://www.tei-c.org/ns/1.0">
4
+ <text xml:lang="lat">
5
+ <body>
6
+ <div type="edition" subtype="poesis-elegia" n="urn:cts:croala:petrov02.eleg02.lat1">
7
+ <lg>
8
+ <l n="1">Siccine dividimur, vita mihi carior Uxor,</l>
9
+
10
+ <l n="2">Morte tua nostrum dissociante thorum?</l>
11
+
12
+ <l n="3">Siccine me miserum, nunquam relictura, relinquis</l>
13
+
14
+ <l n="4">Me miserum, et Natos, pignora cara, tuos?</l>
15
+
16
+ <l n="5">Non te Bactra tenent, non me nunc ultima Thule,</l>
17
+
18
+ <l n="6">Longius est spatium, nos quod abesse facit.</l>
19
+
20
+ <l n="7">Nunc mihi, nunc cantum ventosque, amnesque morantem,</l>
21
+
22
+ <l n="8">Nunc cuperem auditas Manibus esse fides.</l>
23
+
24
+ <l n="9">Non summo ut faciam quercus descendere ab Aemo;</l>
25
+
26
+ <l n="10">Aut stare immotas voce canentis aves.</l>
27
+
28
+ <l n="11">Sed carae ut repetam fugientem conjugis Umbram</l>
29
+
30
+ <l n="12">Nunc prece, nunc <sic>cythara</sic> consociante preces</l>
31
+
32
+ <l n="13">Irem audax (amor ipse facem praeferret eunti)</l>
33
+
34
+ <l n="14">Plena tenebrarum per loca, plena metus.</l>
35
+
36
+ <l n="15">Atque aliquis miserans maesta de gente silentum,</l>
37
+
38
+ <l n="16">Quos olim simili vulnere stravit amor,</l>
39
+
40
+ <l n="17">Ostendens procul Elysium, loca laeta piorum,</l>
41
+
42
+ <l n="18">„Quam petis, ille tenet“, diceret, „ille locus“.</l>
43
+
44
+ <l n="19">Non me, non novies Styx interfusa teneret</l>
45
+
46
+ <l n="20">Viminea et novies trajicienda rate.</l>
47
+
48
+ <l n="21">Non fera vipereo terreret crine Megaera,</l>
49
+
50
+ <l n="22">Majestas Stygii non truculenta Jovis.</l>
51
+
52
+ <l n="23">Forsan et ille pius <sic>lacrymas</sic> ad verba precantis</l>
53
+
54
+ <l n="24">Funderet, et reditum Conjugis annueret.</l>
55
+
56
+ <l n="25">Quid loquor ah! demens? non est revocabilis ultra,</l>
57
+
58
+ <l n="26">Transvecta est Stygios quae semel Umbra lacus.</l>
59
+
60
+ <l n="27">Non redit ad fontem, quae fontem deserit unda.</l>
61
+
62
+ <l n="28">Non reflorescunt, quae cecidere, rosae.</l>
63
+
64
+ <l n="29">Ergo, quod est miseris reliquum solumque levamen,</l>
65
+
66
+ <l n="30">Quo lever, heu! superest nil nisi flere mihi,</l>
67
+
68
+ <l n="31">Nil nisi flere meos, saevissima vulnera, casus,</l>
69
+
70
+ <l n="32">Dum cineri uxoris jungar et ipse cinis.</l>
71
+
72
+ <l n="33">Vix afflicta Domus geminati insignia luctus</l>
73
+
74
+ <l n="34">Exuerat, madidas non bene sicca genas:</l>
75
+
76
+ <l n="35">Ecce cadit saevae jam tertia victima Morti</l>
77
+
78
+ <l n="36">Uxor, vulneribus victima caesa tribus.</l>
79
+
80
+ <l n="37">Non fuit illa tibi communi lege necanda,</l>
81
+
82
+ <l n="38">Impia Mors, plagis percutienda fuit.</l>
83
+
84
+ <l n="39">Cuncta tui in miseram <sic>consumpsti</sic> tela furoris,</l>
85
+
86
+ <l n="40">In sola haeserunt sed tua tela cute.</l>
87
+
88
+ <l n="41">Membra malis cessere, animus fuit altior illis,</l>
89
+
90
+ <l n="42">Tu fragilis victrix corporis, ille tui.</l>
91
+
92
+ <l n="43">Saepe opus igne fuit, ferroque; ullumne dolentis</l>
93
+
94
+ <l n="44">Inter eas poenas edidit illa sonum?</l>
95
+
96
+ <l n="45">Num viso extimuit ferro, ingemuitve recepto?</l>
97
+
98
+ <l n="46">Non patiens gemitum, sed miser ipse dabam.</l>
99
+
100
+ <l n="47">Ah! chalybe immiti languentis caesa rigabat</l>
101
+
102
+ <l n="48">Membra cruor: <sic>lacrymis</sic> non maduere genae.</l>
103
+
104
+ <l n="49">Num, quando monita est, momenta novissima vitae</l>
105
+
106
+ <l n="50">Instare, et celeri jam properare gradu?</l>
107
+
108
+ <l n="51">Scilicet hos animos illi mens inscia noxae,</l>
109
+
110
+ <l n="52">Inque Deum pietas, et proba vita dabat,</l>
111
+
112
+ <l n="53">Hos amor in miseros, miserisque intenta juvandis,</l>
113
+
114
+ <l n="54">Clausa sibi, reliquis semper aperta manus:</l>
115
+
116
+ <l n="55">Et vigil in Natos cura, et studium acre tuendae</l>
117
+
118
+ <l n="56">In nivea morum simplicitate domus:</l>
119
+
120
+ <l n="57">Et tormenta per haec candentis atrocia ferri,</l>
121
+
122
+ <l n="58">Quae toties visa est sustinuisse libens,</l>
123
+
124
+ <l n="59">Posthuma vitandi tormenta, piantia Manes,</l>
125
+
126
+ <l n="60">Spes prope certa, sibi si qua pianda forent.</l>
127
+
128
+ <l n="61">Hos animos etiam illa dabant, quae pectore forti</l>
129
+
130
+ <l n="62">Ex gemina amissa vulnera prole tulit.</l>
131
+
132
+ <l n="63">O Superi, quantum vidit maeroris amari,</l>
133
+
134
+ <l n="64">Et quantum vidit roboris illa dies!</l>
135
+
136
+ <l n="65">Quae caruere genae <sic>lacrymis</sic>, et questibus ora?</l>
137
+
138
+ <l n="66">Fundere nec <sic>lacrymas</sic> visa, nec illa queri.</l>
139
+
140
+ <l n="67">Exuerat Matrem coram, Matrem intus agebat,</l>
141
+
142
+ <l n="68">Non se, sed miserum commiserata Virum.</l>
143
+
144
+ <l n="69">Dotibus his tantis quid nunc bona corporis addam?</l>
145
+
146
+ <l n="70">Quid cultum ingenuis artibus ingenium?</l>
147
+
148
+ <l n="71">Quid quam dulce melos manabat ab ore loquentis,</l>
149
+
150
+ <l n="72">In quo opifex mellis fecerat ipsa favos?</l>
151
+
152
+ <l n="73">Talem fata mihi rapuere, immitia fata,</l>
153
+
154
+ <l n="74">Consortem thalami, subsidiumque mei.</l>
155
+
156
+ <l n="75">Post duo dilectae crudelia funera prolis,</l>
157
+
158
+ <l n="76">Praesto illa, auxilium quae mihi ferret, erat.</l>
159
+
160
+ <l n="77">Tota videbatur gens esse superstes in illa</l>
161
+
162
+ <l n="78">Nullaque, ea salva, membra recisa Domus.</l>
163
+
164
+ <l n="79">Nec me illa omnino miserum solante putabam,</l>
165
+
166
+ <l n="80">Illa levamen erat levantis, et illa comes.</l>
167
+
168
+ <l n="81">Decrescit siquidem fidas diffusus in aures</l>
169
+
170
+ <l n="82">Luctus, ut in rivos secta fit unda minor.</l>
171
+
172
+ <l n="83">Adde alios casus, et quae mala plurima passus,</l>
173
+
174
+ <l n="84">Insontem invidiae vi superante, fui:</l>
175
+
176
+ <l n="85">Nunc mihi fraude mala jus libertatis ademptum,</l>
177
+
178
+ <l n="86">Nunc raptae, parcus quas dabat usus, opes.</l>
179
+
180
+ <l n="87">Dum tu aderas, uxor, felix dicebar, eramque,</l>
181
+
182
+ <l n="88">Nil nocuit telis sors mihi saeva suis.</l>
183
+
184
+ <l n="89">Vim mala perdebant in dulci conjugis ore,</l>
185
+
186
+ <l n="90">Et quam tu, poterant aspera fata minus.</l>
187
+
188
+ <l n="91">Cui querar infelix? cui nunc mea tristia dicam?</l>
189
+
190
+ <l n="92">Quae <sic>lacrymas</sic> posthac terget amica manus?</l>
191
+
192
+ <l n="93">Cuncta, illa rapta, rapuit manus invida Parcae,</l>
193
+
194
+ <l n="94">Unde afflicta malis mens capiebat opem.</l>
195
+
196
+ <l n="95">Nec, quia ter denos mecum una exegerit annos</l>
197
+
198
+ <l n="96">Est mihi, ceu sero rapta, dolenda minus.</l>
199
+
200
+ <l n="97">Ah! magis illa ligant, quorum est diuturnior usus,</l>
201
+
202
+ <l n="98">Et plus, quae plus sunt nexa, dirempta dolent.</l>
203
+
204
+ <l n="99">Sic tener, ac mollis ramo si ramus adhaesit,</l>
205
+
206
+ <l n="100">Disjungas facili vincla novella manu.</l>
207
+
208
+ <l n="101">Frangitur annosus, vel vix victusque gemensque</l>
209
+
210
+ <l n="102">Alter ab amplexu solvitur alterius.</l>
211
+
212
+ <l n="103">Nullaque sat longa est, quae desinit esse, voluptas,</l>
213
+
214
+ <l n="104">Et nulla est magno longa in amore mora.</l>
215
+
216
+ <l n="105">Longa mora est tali mihi nunc uxore carenti,</l>
217
+
218
+ <l n="106">Omnis mense dies longior, hora die est.</l>
219
+
220
+ <l n="107">Quid moror ergo miser? cur non invisa relinquo</l>
221
+
222
+ <l n="108">Lumina? cur te non, o mea vita, sequor?</l>
223
+
224
+ <l n="109">Tu mihi dicebas, sine me nil dulce futurum,</l>
225
+
226
+ <l n="110">Non si ipsum flueret nectar in ora, tibi.</l>
227
+
228
+ <l n="111">Quo sola ergo fugis, comitem aspernata maritum?</l>
229
+
230
+ <l n="112">In rapidos abeunt cur tua dicta Notos?</l>
231
+
232
+ <l n="113">Sed non sola fugis: me tecum, ubicumque moraris,</l>
233
+
234
+ <l n="114">Qui bene nos junxit, vivere cogit amor.</l>
235
+
236
+ <l n="115">Una mei pars tecum abiit, minor altera mecum est,</l>
237
+
238
+ <l n="116">Pars abiit melior, plena dolore manet.</l>
239
+
240
+ <l n="117">Ast haec ipsa brevi tua post vestigia curret,</l>
241
+
242
+ <l n="118">A majore nequit pars procul esse minor.</l>
243
+
244
+ <l n="119">O volet illa dies, quae partes uniat ambas,</l>
245
+
246
+ <l n="120">Meque addat comitem tempus in omne tibi!</l>
247
+
248
+ <l n="121">Mors tua divisit, mea nos conjungat, et urna</l>
249
+
250
+ <l n="122">Consociet, thalamus quos sociare nequit.</l>
251
+ </lg>
252
+ </div>
253
+ </body>
254
+ </text>
255
+ </TEI>
@@ -1,6 +1,10 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe LLT::Segmenter do
4
+ def load_fixture(filename)
5
+ File.read(File.expand_path("../../../fixtures/#{filename}", __FILE__))
6
+ end
7
+
4
8
  let(:segmenter) { LLT::Segmenter.new }
5
9
  describe "#segment" do
6
10
  it "returns an array of LLT::Sentence elements" do
@@ -102,6 +106,7 @@ describe LLT::Segmenter do
102
106
  it "doesn't break with punctuation in element names II" do
103
107
  txt = '<grc.test>text.</grc.test> text 2.'
104
108
  sentences = segmenter.segment(txt, xml: true)
109
+ puts sentences
105
110
  sentences.should have(2).items
106
111
  sentences[0].to_s.should == '<grc.test>text.</grc.test>'
107
112
  sentences[1].to_s.should == 'text 2.'
@@ -140,6 +145,19 @@ describe LLT::Segmenter do
140
145
  sentences = segmenter.segment(txt, xml: true)
141
146
  sentences.should have(1).item
142
147
  end
148
+
149
+ it "doesn't fall with multiple closing tags at the end" do
150
+ txt = '<div type="div1" xml:id="c097"> <l>Numen inest vati, vatum mens consona caelo est, </l> <l n="100">Nec certus scit fallere Apollo. </l> </div>'
151
+ sentences = segmenter.segment(txt, xml: true)
152
+ puts sentences
153
+ sentences.should have(1).item
154
+ end
155
+
156
+ it "doesn't fall with empty tags" do
157
+ txt = '<div type="div1" xml:id="c097"> <l>Numen inest vati, vatum mens consona caelo est, </l> <l n="100">Nec certus scit fallere Apollo. </l> <milestone unit="page" n="210"/> </div>'
158
+ sentences = segmenter.segment(txt, xml: true)
159
+ sentences.should have(1).item
160
+ end
143
161
  end
144
162
 
145
163
  context "with xml escaped characters" do
@@ -163,6 +181,16 @@ describe LLT::Segmenter do
163
181
  sentences.should have(2).item
164
182
  sentences[1].to_s.should == 'success.'
165
183
  end
184
+
185
+ describe "when CGI.unescaping HTML characters" do
186
+ it "acknowledges &apos; as potentially trailing delimiter" do
187
+ txt = '&apos;text.&apos; success.'
188
+ unescaped = CGI.unescapeHTML(txt)
189
+ sentences = segmenter.segment(unescaped)
190
+ sentences.should have(2).item
191
+ sentences[1].to_s.should == 'success.'
192
+ end
193
+ end
166
194
  end
167
195
 
168
196
  context "newline (\\n) handling" do
@@ -319,6 +347,36 @@ describe LLT::Segmenter do
319
347
  end
320
348
  end
321
349
 
350
+ context "with full TEI files" do
351
+ it "doesn't go into an endless loop when something is wrong" do
352
+ txt = load_fixture('petrov_eleg01_with_endless_loop.xml')
353
+ sentences = segmenter.segment(txt, xml: true)
354
+ sentences.should_not be_empty
355
+ sentences.should have(60).items
356
+ end
357
+
358
+ it "example II" do
359
+ txt = load_fixture('petrov_eleg01_with_endless_loop_no_xml_header.xml')
360
+ sentences = segmenter.segment(txt, xml: true)
361
+ sentences.should_not be_empty
362
+ sentences.should have(60).items
363
+ end
364
+
365
+ it "example III" do
366
+ txt = load_fixture('petrov_eleg01_cleaned.xml')
367
+ sentences = segmenter.segment(txt, xml: true)
368
+ sentences.should_not be_empty
369
+ sentences.should have(60).items
370
+ end
371
+
372
+ it "example IV" do
373
+ txt = load_fixture('petrov_eleg02_with_internal_error.xml')
374
+ sentences = segmenter.segment(txt, xml: true)
375
+ sentences.should_not be_empty
376
+ sentences.should have(74).items
377
+ end
378
+ end
379
+
322
380
  describe "takes an optional keyword argument add_to" do
323
381
  class ParagraphDummy
324
382
  attr_reader :sentences
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llt-segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gernot Höflechner, Robert Lichstensteiner, Christof Sirk
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-11 00:00:00.000000000 Z
11
+ date: 2014-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -42,16 +42,16 @@ dependencies:
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '2.14'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '2.14'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: simplecov
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -127,8 +127,13 @@ files:
127
127
  - lib/llt/segmenter.rb
128
128
  - lib/llt/segmenter/api.rb
129
129
  - lib/llt/segmenter/version.rb
130
+ - lib/llt/segmenter/version_info.rb
130
131
  - lib/llt/sentence.rb
131
132
  - llt-segmenter.gemspec
133
+ - spec/fixtures/petrov_eleg01_cleaned.xml
134
+ - spec/fixtures/petrov_eleg01_with_endless_loop.xml
135
+ - spec/fixtures/petrov_eleg01_with_endless_loop_no_xml_header.xml
136
+ - spec/fixtures/petrov_eleg02_with_internal_error.xml
132
137
  - spec/lib/llt/segmenter/api_spec.rb
133
138
  - spec/lib/llt/segmenter_spec.rb
134
139
  - spec/spec_helper.rb
@@ -157,6 +162,10 @@ signing_key:
157
162
  specification_version: 4
158
163
  summary: Segments text into sentences
159
164
  test_files:
165
+ - spec/fixtures/petrov_eleg01_cleaned.xml
166
+ - spec/fixtures/petrov_eleg01_with_endless_loop.xml
167
+ - spec/fixtures/petrov_eleg01_with_endless_loop_no_xml_header.xml
168
+ - spec/fixtures/petrov_eleg02_with_internal_error.xml
160
169
  - spec/lib/llt/segmenter/api_spec.rb
161
170
  - spec/lib/llt/segmenter_spec.rb
162
171
  - spec/spec_helper.rb