llt-segmenter 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 201cbc98ca192041429641ef0e3c0fcf128a3654
4
- data.tar.gz: 1f61cb110d27d4b60427f16b1c7eb58576c67eb0
3
+ metadata.gz: f6c90686915f9e02706f88b650695a4b40aa6867
4
+ data.tar.gz: 67ed2ade8bb50c0419a82f18b40eeef031c7a243
5
5
  SHA512:
6
- metadata.gz: 76017f1fc143d0d6d190b341218c3272f71ad230a1c81496ab9813a94a285262f3c15e73ef73570fc8a5b5d07ceb726b7e2ad9e046f9c54a8d61576017c0aac8
7
- data.tar.gz: 0915e76fac1f68d23b9454c010fc5820b021457ff4b53cf8bb0bf98a05b73cffc21b888a0169d14d331a571812c5027e26e7dd56a07dc6936be7538160dff099
6
+ metadata.gz: e38adead709637f4520233ead966d000c332b8b356f4e2d827ae40b3ce31d6e2a0e6969762b57393bcb02c9a9b647e1e617a5bd81ab20712663ac525de7a5062
7
+ data.tar.gz: 50fedc9e52883f8458a437a6f6ff6252bbe6e1f6d78cdfb59d5b78873a27da3dfc40481a1008cf7278493196041cc42a5beaa40be2faef9071e5f36aad946769
data/Gemfile CHANGED
@@ -6,9 +6,9 @@ gem 'pry'
6
6
 
7
7
  gem 'coveralls', require: false
8
8
 
9
- gem 'llt-core', git: 'https://github.com/latin-language-toolkit/llt-core.git'
10
- gem 'llt-constants', git: 'https://github.com/latin-language-toolkit/llt-constants.git'
11
- gem 'llt-logger', git: 'https://github.com/latin-language-toolkit/llt-logger.git'
9
+ gem 'llt-core', git: 'git://github.com/latin-language-toolkit/llt-core.git'
10
+ gem 'llt-constants', git: 'git://github.com/latin-language-toolkit/llt-constants.git'
11
+ gem 'llt-logger', git: 'git://github.com/latin-language-toolkit/llt-logger.git'
12
12
 
13
13
  platform :jruby do
14
14
  gem 'jruby-httpclient'
@@ -5,6 +5,7 @@ require 'llt/core/api'
5
5
 
6
6
  class Api < Sinatra::Base
7
7
  register Sinatra::RespondWith
8
+ register LLT::Core::Api::VersionRoutes
8
9
  helpers LLT::Core::Api::Helpers
9
10
 
10
11
  get '/segment' do
@@ -17,4 +18,6 @@ class Api < Sinatra::Base
17
18
  f.xml { to_xml(sentences, params) }
18
19
  end
19
20
  end
21
+
22
+ add_version_route_for('/segment', dependencies: %i{ Core Segmenter })
20
23
  end
@@ -1,5 +1,5 @@
1
1
  module LLT
2
2
  class Segmenter
3
- VERSION = "0.0.4"
3
+ VERSION = "0.0.5"
4
4
  end
5
5
  end
@@ -0,0 +1,7 @@
1
+ module LLT
2
+ class Segmenter
3
+ class VersionInfo
4
+ include Core::Versioner
5
+ end
6
+ end
7
+ end
data/lib/llt/segmenter.rb CHANGED
@@ -2,6 +2,8 @@ require "llt/constants"
2
2
  require "llt/core"
3
3
  require "llt/logger"
4
4
  require "llt/sentence"
5
+ require "llt/segmenter/version"
6
+ require "llt/segmenter/version_info"
5
7
 
6
8
  module LLT
7
9
  class Segmenter
@@ -30,7 +32,7 @@ module LLT
30
32
  AWB = ALL_ABBRS_PIPED.split('|').map { |abbr| "(?<=\\s|^|>)#{abbr}" }.join('|')
31
33
  # the xml escaped characters cannot be refactored to something along
32
34
  # &(?:amp|quot); - it's an invalid pattern in the look-behind
33
- SENTENCE_CLOSER = /(?<!#{AWB})\.(?!\.)|[\?!:]|((?<!&amp|&quot|&apos|&lt|&gt);)/
35
+ SENTENCE_CLOSER = /(?<!#{AWB})\.(?!\.)|[\?!:·]|((?<!&amp|&quot|&apos|&lt|&gt);)/
34
36
  DIRECT_SPEECH_DELIMITER = /['"”]|&(?:apos|quot);/
35
37
  TRAILERS = /\)|\s*<\/.*?>/
36
38
 
@@ -51,8 +53,14 @@ module LLT
51
53
  @indexing = parse_option(:indexing, options)
52
54
  @id = 0 if @indexing
53
55
 
56
+ # newline_boundary is only active when we aren't working with xml!
54
57
  nl_boundary = parse_option(:newline_boundary, options)
55
- @sentence_closer = Regexp.union(SENTENCE_CLOSER, /\n{#{nl_boundary}}/)
58
+
59
+ @sentence_closer = build_sentence_closer_regexp(nl_boundary)
60
+ end
61
+
62
+ def build_sentence_closer_regexp(nl_boundary)
63
+ @xml ? SENTENCE_CLOSER : Regexp.union(SENTENCE_CLOSER, /\n{#{nl_boundary}}/)
56
64
  end
57
65
 
58
66
  # Used to normalized wonky whitespace in front of or behind direct speech
@@ -115,9 +123,16 @@ module LLT
115
123
 
116
124
  def scan_through_string(scanner, sentences = [])
117
125
  while scanner.rest?
126
+ loop_guard = scanner.pos
127
+
118
128
  sentence = scan_until_next_sentence(scanner, sentences)
119
129
 
120
- rebuild_xml_tags(scanner, sentence, sentences) if @xml
130
+ raise if scanner.pos == loop_guard
131
+
132
+ if @xml
133
+ rebuild_xml_tags(scanner, sentence, sentences)
134
+ take_all_closing_tags(scanner, sentence)
135
+ end
121
136
  sentence << trailing_delimiters(scanner)
122
137
 
123
138
  sentence.strip!
@@ -130,6 +145,10 @@ module LLT
130
145
  sentences
131
146
  end
132
147
 
148
+ def scan_to_first_real_text(scanner)
149
+ scanner.scan_until(/<.*?>\s*(?=\w)/)
150
+ end
151
+
133
152
  def scan_until_next_sentence(scanner, sentences)
134
153
  scanner.scan_until(@sentence_closer) ||
135
154
  rescue_no_delimiters(sentences, scanner)
@@ -160,6 +179,18 @@ module LLT
160
179
  ! sentence.match(/#{@sentence_closer}\s*<.*?>$/)
161
180
  end
162
181
 
182
+ def take_all_closing_tags(scanner, sentence)
183
+ if closing_tags_only?(scanner.rest)
184
+ sentence << scanner.rest
185
+ scanner.terminate
186
+ end
187
+ end
188
+
189
+ def closing_tags_only?(str)
190
+ str.match(/\A(\s*<\/.*?>\s*|\s*<.*?\/>\s*)+\z/)
191
+ end
192
+
193
+
163
194
  def rescue_no_delimiters(sentences, scanner)
164
195
  if sentences.any?
165
196
  # broken off texts
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
22
  spec.add_development_dependency "rake"
23
- spec.add_development_dependency "rspec"
23
+ spec.add_development_dependency "rspec", "~> 2.14"
24
24
  spec.add_development_dependency "simplecov", "~> 0.7"
25
25
  spec.add_dependency "llt-core"
26
26
  spec.add_dependency "llt-constants"
@@ -0,0 +1,203 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-model href="http://www.stoa.org/epidoc/schema/latest/tei-epidoc.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
3
+ <TEI xmlns="http://www.tei-c.org/ns/1.0">
4
+ <text xml:lang="lat">
5
+ <body>
6
+ <div type="edition" subtype="poesis-elegia" n="urn:cts:croala:petrov02.eleg01.lat1">
7
+ <lg met="elegiacum">
8
+ <l n="1">Conjugis ut carae patrio mens debita caelo</l>
9
+
10
+ <l n="2"> Pars melior, miseram laeta reliquit humum:</l>
11
+
12
+ <l n="3">Postquam, illa rapta, simul omnis rapta voluptas</l>
13
+
14
+ <l n="4"> Est mihi (jam ex illo tempore mensis abit)</l>
15
+
16
+ <l n="5">Non nisi perpetuos fundunt mea lumina fletus;</l>
17
+
18
+ <l n="6">Lux sequitur noctem tristis, et umbra diem.</l>
19
+
20
+ <l n="7">Pabula sunt <sic>lacrymae</sic>, <sic>lacrymae</sic> sunt pocula, sed quae</l>
21
+
22
+ <l n="8"> Plus, Medea, tuo gramine fellis habent.</l>
23
+
24
+ <l n="9">Sic me qui pascit, simul enecat humor, et hospes</l>
25
+
26
+ <l n="10"> Ipse mei cordis, cor mihi luctus edit.</l>
27
+
28
+ <l n="11">Ac veluti turtur, sociam cui barbarus auceps</l>
29
+
30
+ <l n="12"> Exceptam structis perdidit insidiis,</l>
31
+
32
+ <l n="13">Si quam forte videt sine vite, et frondibus ulmum,</l>
33
+
34
+ <l n="14"> Aequalem sorti, consimilemque suae,</l>
35
+
36
+ <l n="15">Flectit iter, ramoque sedens miserabilis ales</l>
37
+
38
+ <l n="16"> Gutture subrauco nil nisi triste gemit.</l>
39
+
40
+ <l n="17">Non illum exhilarat facies pulcherrima Veris,</l>
41
+
42
+ <l n="18"> Nulla sibi in notis pabula quaerit agris:</l>
43
+
44
+ <l n="19">Non sociae possunt volucres abducere ramo,</l>
45
+
46
+ <l n="20"> Ad prope labentes non sitis urget aquas.</l>
47
+
48
+ <l n="21">Sic ego. Sic vitam sine te, dulcissima conjux,</l>
49
+
50
+ <l n="22"> Si vitae haec nomen vita meretur, ago.</l>
51
+
52
+ <l n="23">Sola queri misero, sola est mihi flere voluptas,</l>
53
+
54
+ <l n="24"> Sola loci facies maesta, silensque placet.</l>
55
+
56
+ <l n="25">Non aures cantus, non fila loquacia mulcent;</l>
57
+
58
+ <l n="26"> Non oculos formae gratia, flosque rapit.</l>
59
+
60
+ <l n="27">Unam te in sylvis, unam in florentibus hortis,</l>
61
+
62
+ <l n="28">Per juga, per valles quaero, nec invenio.</l>
63
+
64
+ <l n="29">Nec magis Eurydice est Vati quaesita marito,</l>
65
+
66
+ <l n="30"> Tartareum quamvis viderit ille canem:</l>
67
+
68
+ <l n="31">Nec magis est Cephalo Procris defleta, videnti</l>
69
+
70
+ <l n="32"> Deceptae errorem, flagitiumque manus;</l>
71
+
72
+ <l n="33">Quam totas ego te noctes, mea vita, diesque</l>
73
+
74
+ <l n="34"> Quaero, nec inventam maestus abesse queror.</l>
75
+
76
+ <l n="35">Et tamen ante oculos errat tua semper imago,</l>
77
+
78
+ <l n="36"> (Quid non fingit amans?) et tua verba sonant.</l>
79
+
80
+ <l n="37">Si qua avis in densis, Siren innoxia, lucis</l>
81
+
82
+ <l n="38"> Est audita mihi fundere dulce melos,</l>
83
+
84
+ <l n="39">Sisto gradum, et similis deceptus imagine vocis</l>
85
+
86
+ <l n="40"> Est, inquam, est cantus conjugis ille meae.</l>
87
+
88
+ <l n="41">Si quando ad fontes, aut ad vernantia prata,</l>
89
+
90
+ <l n="42"> Aut maris ad placidas me tulit error aquas,</l>
91
+
92
+ <l n="43">Hic locus est, dico, quem visere saepe solebat.</l>
93
+
94
+ <l n="44"> Quae mora (jam sol est ortus) abesse facit?</l>
95
+
96
+ <l n="45">Sed jam jam veniet; latet illa forte sub umbra,</l>
97
+
98
+ <l n="46"> Aut illi pietas est sua causa morae.</l>
99
+
100
+ <l n="47">Causa morae est certe pietas: nisi fallimur, haec est,</l>
101
+
102
+ <l n="48"> Fundentem ad superos quae videt hora preces.</l>
103
+
104
+ <l n="49">Mox sat ut illusum me liquit amabilis error,</l>
105
+
106
+ <l n="50"> Protinus ex oculis bina fluenta cadunt.</l>
107
+
108
+ <l n="51">Bina fluenta cadunt, quorum hinc dolor elicit unum,</l>
109
+
110
+ <l n="52"> Inde aliud, tanti causa doloris, amor.</l>
111
+
112
+ <l n="53">Meque ipsum incuso, quod sim tam stultus, et amens,</l>
113
+
114
+ <l n="54"> Et pascam aerumnas crudelitate meas:</l>
115
+
116
+ <l n="55">Rursus in errores tamen hos delabor, et hujus</l>
117
+
118
+ <l n="56"> Erroris rursus paenitet esse reum.</l>
119
+
120
+ <l n="57">Sic pugnant mea vota meis contraria votis,</l>
121
+
122
+ <l n="58"> Nec placet, heu! misero quod modo dulce fuit.</l>
123
+
124
+ <l n="59">Nec quod sim discors, angit modo; saevius angit</l>
125
+
126
+ <l n="60"> Vivere me longos te sine posse dies.</l>
127
+
128
+ <l n="61">Ah! ubi sunt voces illae, et mea fortia verba?</l>
129
+
130
+ <l n="62"> Ah! ubi, quae verbis debet inesse, fides?</l>
131
+
132
+ <l n="63">Me quoque rapturam subito, quae te hora tulisset,</l>
133
+
134
+ <l n="64"> Et pariter praedam mortis utrumque fore?</l>
135
+
136
+ <l n="65">Ecce tamen vivo, nec post nova cornua Phoebes</l>
137
+
138
+ <l n="66"> Vis me maeroris perdere longa potest.</l>
139
+
140
+ <l n="67">Heu! quae dura silex, quod inexsuperabile robur,</l>
141
+
142
+ <l n="68"> Quod ferrum, et triplex aes mihi pectus obit?</l>
143
+
144
+ <l n="69">Vivo equidem, vivo, sed morte est tristior ipsa,</l>
145
+
146
+ <l n="70"> Quae sine te, conjux, vita relicta mihi est.</l>
147
+
148
+ <l n="71">At tu nunc choreis Natorum immixta tuorum,</l>
149
+
150
+ <l n="72"> Qui (prona) facili ad Superos te praeiere (via) gradu,</l>
151
+
152
+ <l n="73">Plena Deo frueris, nec, quae tibi parta, bonorum</l>
153
+
154
+ <l n="74"> Amittendorum te timor ullus habet.</l>
155
+
156
+ <l n="75">Nam tua non tristes pietas te duxit in oras:</l>
157
+
158
+ <l n="76"> Debetur sedes non nisi laeta piis.</l>
159
+
160
+ <l n="77">Te plaga (credo equidem) summi plaga lucida caeli,</l>
161
+
162
+ <l n="78"> Te laeta aeterno vere vireta tenent.</l>
163
+
164
+ <l n="79">Ipsum ipsum Auctorem rerum, quem qui videt, ultra</l>
165
+
166
+ <l n="80"> Nil habet optandum, jam sine nube vides.</l>
167
+
168
+ <l n="81">Usque et ubique vides, at non saturata videndo</l>
169
+
170
+ <l n="82"> Illo oculos pascis; pressa sed usque fame es.</l>
171
+
172
+ <l n="83">Te vis implet opum, sed non (licet impleat) explet;</l>
173
+
174
+ <l n="84"> Excipit unum aliud, subsequiturque bonum.</l>
175
+
176
+ <l n="85">Non te humiles curae, non te mortalia tangunt;</l>
177
+
178
+ <l n="86"> Prae caelo, et stellis quam tibi sordet humus!</l>
179
+
180
+ <l n="87">Sordet humus certe. non sic tamen, ut tua nunquam</l>
181
+
182
+ <l n="88"> Ad miserum flectas lumina blanda virum;</l>
183
+
184
+ <l n="89">Audire aut flentem fugias, et saucia flentis,</l>
185
+
186
+ <l n="90"> Qua licet, admota corda fovere manu;</l>
187
+
188
+ <l n="91">Iactatumque diu ventisque undisque vocare</l>
189
+
190
+ <l n="92"> Ad laeta Eridani littora stelliferi.</l>
191
+
192
+ <l n="93">Quam tua sors felix, quam nostra simillima morti est,</l>
193
+
194
+ <l n="94"> Felle ego, tu Divum vesceris ambrosia.</l>
195
+
196
+ <l n="95">Non tamen invideo tua gaudia, sed miser opto,</l>
197
+
198
+ <l n="96"> Laetitiae consors quam prius esse tuae.</l>
199
+ </lg>
200
+ </div>
201
+ </body>
202
+ </text>
203
+ </TEI>
@@ -0,0 +1,204 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-model href="http://www.stoa.org/epidoc/schema/latest/tei-epidoc.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
3
+ <TEI xmlns="http://www.tei-c.org/ns/1.0">
4
+ <text xml:lang="lat">
5
+ <body>
6
+ <div type="edition" subtype="poesis-elegia" n="urn:cts:croala:petrov02.eleg01.lat1">
7
+ <lg met="elegiacum">
8
+ <l n="1">Conjugis ut carae patrio mens debita caelo</l>
9
+
10
+ <l n="2"> Pars melior, miseram laeta reliquit humum:</l>
11
+
12
+ <l n="3">Postquam, illa rapta, simul omnis rapta voluptas</l>
13
+
14
+ <l n="4"> Est mihi (jam ex illo tempore mensis abit)</l>
15
+
16
+ <l n="5">Non nisi perpetuos fundunt mea lumina fletus;</l>
17
+
18
+ <l n="6">Lux sequitur noctem tristis, et umbra diem.</l>
19
+
20
+ <l n="7">Pabula sunt <sic>lacrymae</sic>, <sic>lacrymae</sic> sunt pocula, sed
21
+ quae</l>
22
+
23
+ <l n="8"> Plus, Medea, tuo gramine fellis habent.</l>
24
+
25
+ <l n="9">Sic me qui pascit, simul enecat humor, et hospes</l>
26
+
27
+ <l n="10"> Ipse mei cordis, cor mihi luctus edit.</l>
28
+
29
+ <l n="11">Ac veluti turtur, sociam cui barbarus auceps</l>
30
+
31
+ <l n="12"> Exceptam structis perdidit insidiis,</l>
32
+
33
+ <l n="13">Si quam forte videt sine vite, et frondibus ulmum,</l>
34
+
35
+ <l n="14"> Aequalem sorti, consimilemque suae,</l>
36
+
37
+ <l n="15">Flectit iter, ramoque sedens miserabilis ales</l>
38
+
39
+ <l n="16"> Gutture subrauco nil nisi triste gemit.</l>
40
+
41
+ <l n="17">Non illum exhilarat facies pulcherrima Veris,</l>
42
+
43
+ <l n="18"> Nulla sibi in notis pabula quaerit agris:</l>
44
+
45
+ <l n="19">Non sociae possunt volucres abducere ramo,</l>
46
+
47
+ <l n="20"> Ad prope labentes non sitis urget aquas.</l>
48
+
49
+ <l n="21">Sic ego. Sic vitam sine te, dulcissima conjux,</l>
50
+
51
+ <l n="22"> Si vitae haec nomen vita meretur, ago.</l>
52
+
53
+ <l n="23">Sola queri misero, sola est mihi flere voluptas,</l>
54
+
55
+ <l n="24"> Sola loci facies maesta, silensque placet.</l>
56
+
57
+ <l n="25">Non aures cantus, non fila loquacia mulcent;</l>
58
+
59
+ <l n="26"> Non oculos formae gratia, flosque rapit.</l>
60
+
61
+ <l n="27">Unam te in sylvis, unam in florentibus hortis,</l>
62
+
63
+ <l n="28">Per juga, per valles quaero, nec invenio.</l>
64
+
65
+ <l n="29">Nec magis Eurydice est Vati quaesita marito,</l>
66
+
67
+ <l n="30"> Tartareum quamvis viderit ille canem:</l>
68
+
69
+ <l n="31">Nec magis est Cephalo Procris defleta, videnti</l>
70
+
71
+ <l n="32"> Deceptae errorem, flagitiumque manus;</l>
72
+
73
+ <l n="33">Quam totas ego te noctes, mea vita, diesque</l>
74
+
75
+ <l n="34"> Quaero, nec inventam maestus abesse queror.</l>
76
+
77
+ <l n="35">Et tamen ante oculos errat tua semper imago,</l>
78
+
79
+ <l n="36"> (Quid non fingit amans?) et tua verba sonant.</l>
80
+
81
+ <l n="37">Si qua avis in densis, Siren innoxia, lucis</l>
82
+
83
+ <l n="38"> Est audita mihi fundere dulce melos,</l>
84
+
85
+ <l n="39">Sisto gradum, et similis deceptus imagine vocis</l>
86
+
87
+ <l n="40"> Est, inquam, est cantus conjugis ille meae.</l>
88
+
89
+ <l n="41">Si quando ad fontes, aut ad vernantia prata,</l>
90
+
91
+ <l n="42"> Aut maris ad placidas me tulit error aquas,</l>
92
+
93
+ <l n="43">Hic locus est, dico, quem visere saepe solebat.</l>
94
+
95
+ <l n="44"> Quae mora (jam sol est ortus) abesse facit?</l>
96
+
97
+ <l n="45">Sed jam jam veniet; latet illa forte sub umbra,</l>
98
+
99
+ <l n="46"> Aut illi pietas est sua causa morae.</l>
100
+
101
+ <l n="47">Causa morae est certe pietas: nisi fallimur, haec est,</l>
102
+
103
+ <l n="48"> Fundentem ad superos quae videt hora preces.</l>
104
+
105
+ <l n="49">Mox sat ut illusum me liquit amabilis error,</l>
106
+
107
+ <l n="50"> Protinus ex oculis bina fluenta cadunt.</l>
108
+
109
+ <l n="51">Bina fluenta cadunt, quorum hinc dolor elicit unum,</l>
110
+
111
+ <l n="52"> Inde aliud, tanti causa doloris, amor.</l>
112
+
113
+ <l n="53">Meque ipsum incuso, quod sim tam stultus, et amens,</l>
114
+
115
+ <l n="54"> Et pascam aerumnas crudelitate meas:</l>
116
+
117
+ <l n="55">Rursus in errores tamen hos delabor, et hujus</l>
118
+
119
+ <l n="56"> Erroris rursus paenitet esse reum.</l>
120
+
121
+ <l n="57">Sic pugnant mea vota meis contraria votis,</l>
122
+
123
+ <l n="58"> Nec placet, heu! misero quod modo dulce fuit.</l>
124
+
125
+ <l n="59">Nec quod sim discors, angit modo; saevius angit</l>
126
+
127
+ <l n="60"> Vivere me longos te sine posse dies.</l>
128
+
129
+ <l n="61">Ah! ubi sunt voces illae, et mea fortia verba?</l>
130
+
131
+ <l n="62"> Ah! ubi, quae verbis debet inesse, fides?</l>
132
+
133
+ <l n="63">Me quoque rapturam subito, quae te hora tulisset,</l>
134
+
135
+ <l n="64"> Et pariter praedam mortis utrumque fore?</l>
136
+
137
+ <l n="65">Ecce tamen vivo, nec post nova cornua Phoebes</l>
138
+
139
+ <l n="66"> Vis me maeroris perdere longa potest.</l>
140
+
141
+ <l n="67">Heu! quae dura silex, quod inexsuperabile robur,</l>
142
+
143
+ <l n="68"> Quod ferrum, et triplex aes mihi pectus obit?</l>
144
+
145
+ <l n="69">Vivo equidem, vivo, sed morte est tristior ipsa,</l>
146
+
147
+ <l n="70"> Quae sine te, conjux, vita relicta mihi est.</l>
148
+
149
+ <l n="71">At tu nunc choreis Natorum immixta tuorum,</l>
150
+
151
+ <l n="72"> Qui (prona) facili ad Superos te praeiere (via) gradu,</l>
152
+
153
+ <l n="73">Plena Deo frueris, nec, quae tibi parta, bonorum</l>
154
+
155
+ <l n="74"> Amittendorum te timor ullus habet.</l>
156
+
157
+ <l n="75">Nam tua non tristes pietas te duxit in oras:</l>
158
+
159
+ <l n="76"> Debetur sedes non nisi laeta piis.</l>
160
+
161
+ <l n="77">Te plaga (credo equidem) summi plaga lucida caeli,</l>
162
+
163
+ <l n="78"> Te laeta aeterno vere vireta tenent.</l>
164
+
165
+ <l n="79">Ipsum ipsum Auctorem rerum, quem qui videt, ultra</l>
166
+
167
+ <l n="80"> Nil habet optandum, jam sine nube vides.</l>
168
+
169
+ <l n="81">Usque et ubique vides, at non saturata videndo</l>
170
+
171
+ <l n="82"> Illo oculos pascis; pressa sed usque fame es.</l>
172
+
173
+ <l n="83">Te vis implet opum, sed non (licet impleat) explet;</l>
174
+
175
+ <l n="84"> Excipit unum aliud, subsequiturque bonum.</l>
176
+
177
+ <l n="85">Non te humiles curae, non te mortalia tangunt;</l>
178
+
179
+ <l n="86"> Prae caelo, et stellis quam tibi sordet humus!</l>
180
+
181
+ <l n="87">Sordet humus certe. non sic tamen, ut tua nunquam</l>
182
+
183
+ <l n="88"> Ad miserum flectas lumina blanda virum;</l>
184
+
185
+ <l n="89">Audire aut flentem fugias, et saucia flentis,</l>
186
+
187
+ <l n="90"> Qua licet, admota corda fovere manu;</l>
188
+
189
+ <l n="91">Iactatumque diu ventisque undisque vocare</l>
190
+
191
+ <l n="92"> Ad laeta Eridani littora stelliferi.</l>
192
+
193
+ <l n="93">Quam tua sors felix, quam nostra simillima morti est,</l>
194
+
195
+ <l n="94"> Felle ego, tu Divum vesceris ambrosia.</l>
196
+
197
+ <l n="95">Non tamen invideo tua gaudia, sed miser opto,</l>
198
+
199
+ <l n="96"> Laetitiae consors quam prius esse tuae.</l>
200
+ </lg>
201
+ </div>
202
+ </body>
203
+ </text>
204
+ </TEI>
@@ -0,0 +1,202 @@
1
+ <TEI xmlns="http://www.tei-c.org/ns/1.0">
2
+ <text xml:lang="lat">
3
+ <body>
4
+ <div type="edition" subtype="poesis-elegia" n="urn:cts:croala:petrov02.eleg01.lat1">
5
+ <lg met="elegiacum">
6
+ <l n="1">Conjugis ut carae patrio mens debita caelo</l>
7
+
8
+ <l n="2"> Pars melior, miseram laeta reliquit humum:</l>
9
+
10
+ <l n="3">Postquam, illa rapta, simul omnis rapta voluptas</l>
11
+
12
+ <l n="4"> Est mihi (jam ex illo tempore mensis abit)</l>
13
+
14
+ <l n="5">Non nisi perpetuos fundunt mea lumina fletus;</l>
15
+
16
+ <l n="6">Lux sequitur noctem tristis, et umbra diem.</l>
17
+
18
+ <l n="7">Pabula sunt <sic>lacrymae</sic>, <sic>lacrymae</sic> sunt pocula, sed
19
+ quae</l>
20
+
21
+ <l n="8"> Plus, Medea, tuo gramine fellis habent.</l>
22
+
23
+ <l n="9">Sic me qui pascit, simul enecat humor, et hospes</l>
24
+
25
+ <l n="10"> Ipse mei cordis, cor mihi luctus edit.</l>
26
+
27
+ <l n="11">Ac veluti turtur, sociam cui barbarus auceps</l>
28
+
29
+ <l n="12"> Exceptam structis perdidit insidiis,</l>
30
+
31
+ <l n="13">Si quam forte videt sine vite, et frondibus ulmum,</l>
32
+
33
+ <l n="14"> Aequalem sorti, consimilemque suae,</l>
34
+
35
+ <l n="15">Flectit iter, ramoque sedens miserabilis ales</l>
36
+
37
+ <l n="16"> Gutture subrauco nil nisi triste gemit.</l>
38
+
39
+ <l n="17">Non illum exhilarat facies pulcherrima Veris,</l>
40
+
41
+ <l n="18"> Nulla sibi in notis pabula quaerit agris:</l>
42
+
43
+ <l n="19">Non sociae possunt volucres abducere ramo,</l>
44
+
45
+ <l n="20"> Ad prope labentes non sitis urget aquas.</l>
46
+
47
+ <l n="21">Sic ego. Sic vitam sine te, dulcissima conjux,</l>
48
+
49
+ <l n="22"> Si vitae haec nomen vita meretur, ago.</l>
50
+
51
+ <l n="23">Sola queri misero, sola est mihi flere voluptas,</l>
52
+
53
+ <l n="24"> Sola loci facies maesta, silensque placet.</l>
54
+
55
+ <l n="25">Non aures cantus, non fila loquacia mulcent;</l>
56
+
57
+ <l n="26"> Non oculos formae gratia, flosque rapit.</l>
58
+
59
+ <l n="27">Unam te in sylvis, unam in florentibus hortis,</l>
60
+
61
+ <l n="28">Per juga, per valles quaero, nec invenio.</l>
62
+
63
+ <l n="29">Nec magis Eurydice est Vati quaesita marito,</l>
64
+
65
+ <l n="30"> Tartareum quamvis viderit ille canem:</l>
66
+
67
+ <l n="31">Nec magis est Cephalo Procris defleta, videnti</l>
68
+
69
+ <l n="32"> Deceptae errorem, flagitiumque manus;</l>
70
+
71
+ <l n="33">Quam totas ego te noctes, mea vita, diesque</l>
72
+
73
+ <l n="34"> Quaero, nec inventam maestus abesse queror.</l>
74
+
75
+ <l n="35">Et tamen ante oculos errat tua semper imago,</l>
76
+
77
+ <l n="36"> (Quid non fingit amans?) et tua verba sonant.</l>
78
+
79
+ <l n="37">Si qua avis in densis, Siren innoxia, lucis</l>
80
+
81
+ <l n="38"> Est audita mihi fundere dulce melos,</l>
82
+
83
+ <l n="39">Sisto gradum, et similis deceptus imagine vocis</l>
84
+
85
+ <l n="40"> Est, inquam, est cantus conjugis ille meae.</l>
86
+
87
+ <l n="41">Si quando ad fontes, aut ad vernantia prata,</l>
88
+
89
+ <l n="42"> Aut maris ad placidas me tulit error aquas,</l>
90
+
91
+ <l n="43">Hic locus est, dico, quem visere saepe solebat.</l>
92
+
93
+ <l n="44"> Quae mora (jam sol est ortus) abesse facit?</l>
94
+
95
+ <l n="45">Sed jam jam veniet; latet illa forte sub umbra,</l>
96
+
97
+ <l n="46"> Aut illi pietas est sua causa morae.</l>
98
+
99
+ <l n="47">Causa morae est certe pietas: nisi fallimur, haec est,</l>
100
+
101
+ <l n="48"> Fundentem ad superos quae videt hora preces.</l>
102
+
103
+ <l n="49">Mox sat ut illusum me liquit amabilis error,</l>
104
+
105
+ <l n="50"> Protinus ex oculis bina fluenta cadunt.</l>
106
+
107
+ <l n="51">Bina fluenta cadunt, quorum hinc dolor elicit unum,</l>
108
+
109
+ <l n="52"> Inde aliud, tanti causa doloris, amor.</l>
110
+
111
+ <l n="53">Meque ipsum incuso, quod sim tam stultus, et amens,</l>
112
+
113
+ <l n="54"> Et pascam aerumnas crudelitate meas:</l>
114
+
115
+ <l n="55">Rursus in errores tamen hos delabor, et hujus</l>
116
+
117
+ <l n="56"> Erroris rursus paenitet esse reum.</l>
118
+
119
+ <l n="57">Sic pugnant mea vota meis contraria votis,</l>
120
+
121
+ <l n="58"> Nec placet, heu! misero quod modo dulce fuit.</l>
122
+
123
+ <l n="59">Nec quod sim discors, angit modo; saevius angit</l>
124
+
125
+ <l n="60"> Vivere me longos te sine posse dies.</l>
126
+
127
+ <l n="61">Ah! ubi sunt voces illae, et mea fortia verba?</l>
128
+
129
+ <l n="62"> Ah! ubi, quae verbis debet inesse, fides?</l>
130
+
131
+ <l n="63">Me quoque rapturam subito, quae te hora tulisset,</l>
132
+
133
+ <l n="64"> Et pariter praedam mortis utrumque fore?</l>
134
+
135
+ <l n="65">Ecce tamen vivo, nec post nova cornua Phoebes</l>
136
+
137
+ <l n="66"> Vis me maeroris perdere longa potest.</l>
138
+
139
+ <l n="67">Heu! quae dura silex, quod inexsuperabile robur,</l>
140
+
141
+ <l n="68"> Quod ferrum, et triplex aes mihi pectus obit?</l>
142
+
143
+ <l n="69">Vivo equidem, vivo, sed morte est tristior ipsa,</l>
144
+
145
+ <l n="70"> Quae sine te, conjux, vita relicta mihi est.</l>
146
+
147
+ <l n="71">At tu nunc choreis Natorum immixta tuorum,</l>
148
+
149
+ <l n="72"> Qui (prona) facili ad Superos te praeiere (via) gradu,</l>
150
+
151
+ <l n="73">Plena Deo frueris, nec, quae tibi parta, bonorum</l>
152
+
153
+ <l n="74"> Amittendorum te timor ullus habet.</l>
154
+
155
+ <l n="75">Nam tua non tristes pietas te duxit in oras:</l>
156
+
157
+ <l n="76"> Debetur sedes non nisi laeta piis.</l>
158
+
159
+ <l n="77">Te plaga (credo equidem) summi plaga lucida caeli,</l>
160
+
161
+ <l n="78"> Te laeta aeterno vere vireta tenent.</l>
162
+
163
+ <l n="79">Ipsum ipsum Auctorem rerum, quem qui videt, ultra</l>
164
+
165
+ <l n="80"> Nil habet optandum, jam sine nube vides.</l>
166
+
167
+ <l n="81">Usque et ubique vides, at non saturata videndo</l>
168
+
169
+ <l n="82"> Illo oculos pascis; pressa sed usque fame es.</l>
170
+
171
+ <l n="83">Te vis implet opum, sed non (licet impleat) explet;</l>
172
+
173
+ <l n="84"> Excipit unum aliud, subsequiturque bonum.</l>
174
+
175
+ <l n="85">Non te humiles curae, non te mortalia tangunt;</l>
176
+
177
+ <l n="86"> Prae caelo, et stellis quam tibi sordet humus!</l>
178
+
179
+ <l n="87">Sordet humus certe. non sic tamen, ut tua nunquam</l>
180
+
181
+ <l n="88"> Ad miserum flectas lumina blanda virum;</l>
182
+
183
+ <l n="89">Audire aut flentem fugias, et saucia flentis,</l>
184
+
185
+ <l n="90"> Qua licet, admota corda fovere manu;</l>
186
+
187
+ <l n="91">Iactatumque diu ventisque undisque vocare</l>
188
+
189
+ <l n="92"> Ad laeta Eridani littora stelliferi.</l>
190
+
191
+ <l n="93">Quam tua sors felix, quam nostra simillima morti est,</l>
192
+
193
+ <l n="94"> Felle ego, tu Divum vesceris ambrosia.</l>
194
+
195
+ <l n="95">Non tamen invideo tua gaudia, sed miser opto,</l>
196
+
197
+ <l n="96"> Laetitiae consors quam prius esse tuae.</l>
198
+ </lg>
199
+ </div>
200
+ </body>
201
+ </text>
202
+ </TEI>
@@ -0,0 +1,255 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-model href="http://www.stoa.org/epidoc/schema/latest/tei-epidoc.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
3
+ <TEI xmlns="http://www.tei-c.org/ns/1.0">
4
+ <text xml:lang="lat">
5
+ <body>
6
+ <div type="edition" subtype="poesis-elegia" n="urn:cts:croala:petrov02.eleg02.lat1">
7
+ <lg>
8
+ <l n="1">Siccine dividimur, vita mihi carior Uxor,</l>
9
+
10
+ <l n="2">Morte tua nostrum dissociante thorum?</l>
11
+
12
+ <l n="3">Siccine me miserum, nunquam relictura, relinquis</l>
13
+
14
+ <l n="4">Me miserum, et Natos, pignora cara, tuos?</l>
15
+
16
+ <l n="5">Non te Bactra tenent, non me nunc ultima Thule,</l>
17
+
18
+ <l n="6">Longius est spatium, nos quod abesse facit.</l>
19
+
20
+ <l n="7">Nunc mihi, nunc cantum ventosque, amnesque morantem,</l>
21
+
22
+ <l n="8">Nunc cuperem auditas Manibus esse fides.</l>
23
+
24
+ <l n="9">Non summo ut faciam quercus descendere ab Aemo;</l>
25
+
26
+ <l n="10">Aut stare immotas voce canentis aves.</l>
27
+
28
+ <l n="11">Sed carae ut repetam fugientem conjugis Umbram</l>
29
+
30
+ <l n="12">Nunc prece, nunc <sic>cythara</sic> consociante preces</l>
31
+
32
+ <l n="13">Irem audax (amor ipse facem praeferret eunti)</l>
33
+
34
+ <l n="14">Plena tenebrarum per loca, plena metus.</l>
35
+
36
+ <l n="15">Atque aliquis miserans maesta de gente silentum,</l>
37
+
38
+ <l n="16">Quos olim simili vulnere stravit amor,</l>
39
+
40
+ <l n="17">Ostendens procul Elysium, loca laeta piorum,</l>
41
+
42
+ <l n="18">„Quam petis, ille tenet“, diceret, „ille locus“.</l>
43
+
44
+ <l n="19">Non me, non novies Styx interfusa teneret</l>
45
+
46
+ <l n="20">Viminea et novies trajicienda rate.</l>
47
+
48
+ <l n="21">Non fera vipereo terreret crine Megaera,</l>
49
+
50
+ <l n="22">Majestas Stygii non truculenta Jovis.</l>
51
+
52
+ <l n="23">Forsan et ille pius <sic>lacrymas</sic> ad verba precantis</l>
53
+
54
+ <l n="24">Funderet, et reditum Conjugis annueret.</l>
55
+
56
+ <l n="25">Quid loquor ah! demens? non est revocabilis ultra,</l>
57
+
58
+ <l n="26">Transvecta est Stygios quae semel Umbra lacus.</l>
59
+
60
+ <l n="27">Non redit ad fontem, quae fontem deserit unda.</l>
61
+
62
+ <l n="28">Non reflorescunt, quae cecidere, rosae.</l>
63
+
64
+ <l n="29">Ergo, quod est miseris reliquum solumque levamen,</l>
65
+
66
+ <l n="30">Quo lever, heu! superest nil nisi flere mihi,</l>
67
+
68
+ <l n="31">Nil nisi flere meos, saevissima vulnera, casus,</l>
69
+
70
+ <l n="32">Dum cineri uxoris jungar et ipse cinis.</l>
71
+
72
+ <l n="33">Vix afflicta Domus geminati insignia luctus</l>
73
+
74
+ <l n="34">Exuerat, madidas non bene sicca genas:</l>
75
+
76
+ <l n="35">Ecce cadit saevae jam tertia victima Morti</l>
77
+
78
+ <l n="36">Uxor, vulneribus victima caesa tribus.</l>
79
+
80
+ <l n="37">Non fuit illa tibi communi lege necanda,</l>
81
+
82
+ <l n="38">Impia Mors, plagis percutienda fuit.</l>
83
+
84
+ <l n="39">Cuncta tui in miseram <sic>consumpsti</sic> tela furoris,</l>
85
+
86
+ <l n="40">In sola haeserunt sed tua tela cute.</l>
87
+
88
+ <l n="41">Membra malis cessere, animus fuit altior illis,</l>
89
+
90
+ <l n="42">Tu fragilis victrix corporis, ille tui.</l>
91
+
92
+ <l n="43">Saepe opus igne fuit, ferroque; ullumne dolentis</l>
93
+
94
+ <l n="44">Inter eas poenas edidit illa sonum?</l>
95
+
96
+ <l n="45">Num viso extimuit ferro, ingemuitve recepto?</l>
97
+
98
+ <l n="46">Non patiens gemitum, sed miser ipse dabam.</l>
99
+
100
+ <l n="47">Ah! chalybe immiti languentis caesa rigabat</l>
101
+
102
+ <l n="48">Membra cruor: <sic>lacrymis</sic> non maduere genae.</l>
103
+
104
+ <l n="49">Num, quando monita est, momenta novissima vitae</l>
105
+
106
+ <l n="50">Instare, et celeri jam properare gradu?</l>
107
+
108
+ <l n="51">Scilicet hos animos illi mens inscia noxae,</l>
109
+
110
+ <l n="52">Inque Deum pietas, et proba vita dabat,</l>
111
+
112
+ <l n="53">Hos amor in miseros, miserisque intenta juvandis,</l>
113
+
114
+ <l n="54">Clausa sibi, reliquis semper aperta manus:</l>
115
+
116
+ <l n="55">Et vigil in Natos cura, et studium acre tuendae</l>
117
+
118
+ <l n="56">In nivea morum simplicitate domus:</l>
119
+
120
+ <l n="57">Et tormenta per haec candentis atrocia ferri,</l>
121
+
122
+ <l n="58">Quae toties visa est sustinuisse libens,</l>
123
+
124
+ <l n="59">Posthuma vitandi tormenta, piantia Manes,</l>
125
+
126
+ <l n="60">Spes prope certa, sibi si qua pianda forent.</l>
127
+
128
+ <l n="61">Hos animos etiam illa dabant, quae pectore forti</l>
129
+
130
+ <l n="62">Ex gemina amissa vulnera prole tulit.</l>
131
+
132
+ <l n="63">O Superi, quantum vidit maeroris amari,</l>
133
+
134
+ <l n="64">Et quantum vidit roboris illa dies!</l>
135
+
136
+ <l n="65">Quae caruere genae <sic>lacrymis</sic>, et questibus ora?</l>
137
+
138
+ <l n="66">Fundere nec <sic>lacrymas</sic> visa, nec illa queri.</l>
139
+
140
+ <l n="67">Exuerat Matrem coram, Matrem intus agebat,</l>
141
+
142
+ <l n="68">Non se, sed miserum commiserata Virum.</l>
143
+
144
+ <l n="69">Dotibus his tantis quid nunc bona corporis addam?</l>
145
+
146
+ <l n="70">Quid cultum ingenuis artibus ingenium?</l>
147
+
148
+ <l n="71">Quid quam dulce melos manabat ab ore loquentis,</l>
149
+
150
+ <l n="72">In quo opifex mellis fecerat ipsa favos?</l>
151
+
152
+ <l n="73">Talem fata mihi rapuere, immitia fata,</l>
153
+
154
+ <l n="74">Consortem thalami, subsidiumque mei.</l>
155
+
156
+ <l n="75">Post duo dilectae crudelia funera prolis,</l>
157
+
158
+ <l n="76">Praesto illa, auxilium quae mihi ferret, erat.</l>
159
+
160
+ <l n="77">Tota videbatur gens esse superstes in illa</l>
161
+
162
+ <l n="78">Nullaque, ea salva, membra recisa Domus.</l>
163
+
164
+ <l n="79">Nec me illa omnino miserum solante putabam,</l>
165
+
166
+ <l n="80">Illa levamen erat levantis, et illa comes.</l>
167
+
168
+ <l n="81">Decrescit siquidem fidas diffusus in aures</l>
169
+
170
+ <l n="82">Luctus, ut in rivos secta fit unda minor.</l>
171
+
172
+ <l n="83">Adde alios casus, et quae mala plurima passus,</l>
173
+
174
+ <l n="84">Insontem invidiae vi superante, fui:</l>
175
+
176
+ <l n="85">Nunc mihi fraude mala jus libertatis ademptum,</l>
177
+
178
+ <l n="86">Nunc raptae, parcus quas dabat usus, opes.</l>
179
+
180
+ <l n="87">Dum tu aderas, uxor, felix dicebar, eramque,</l>
181
+
182
+ <l n="88">Nil nocuit telis sors mihi saeva suis.</l>
183
+
184
+ <l n="89">Vim mala perdebant in dulci conjugis ore,</l>
185
+
186
+ <l n="90">Et quam tu, poterant aspera fata minus.</l>
187
+
188
+ <l n="91">Cui querar infelix? cui nunc mea tristia dicam?</l>
189
+
190
+ <l n="92">Quae <sic>lacrymas</sic> posthac terget amica manus?</l>
191
+
192
+ <l n="93">Cuncta, illa rapta, rapuit manus invida Parcae,</l>
193
+
194
+ <l n="94">Unde afflicta malis mens capiebat opem.</l>
195
+
196
+ <l n="95">Nec, quia ter denos mecum una exegerit annos</l>
197
+
198
+ <l n="96">Est mihi, ceu sero rapta, dolenda minus.</l>
199
+
200
+ <l n="97">Ah! magis illa ligant, quorum est diuturnior usus,</l>
201
+
202
+ <l n="98">Et plus, quae plus sunt nexa, dirempta dolent.</l>
203
+
204
+ <l n="99">Sic tener, ac mollis ramo si ramus adhaesit,</l>
205
+
206
+ <l n="100">Disjungas facili vincla novella manu.</l>
207
+
208
+ <l n="101">Frangitur annosus, vel vix victusque gemensque</l>
209
+
210
+ <l n="102">Alter ab amplexu solvitur alterius.</l>
211
+
212
+ <l n="103">Nullaque sat longa est, quae desinit esse, voluptas,</l>
213
+
214
+ <l n="104">Et nulla est magno longa in amore mora.</l>
215
+
216
+ <l n="105">Longa mora est tali mihi nunc uxore carenti,</l>
217
+
218
+ <l n="106">Omnis mense dies longior, hora die est.</l>
219
+
220
+ <l n="107">Quid moror ergo miser? cur non invisa relinquo</l>
221
+
222
+ <l n="108">Lumina? cur te non, o mea vita, sequor?</l>
223
+
224
+ <l n="109">Tu mihi dicebas, sine me nil dulce futurum,</l>
225
+
226
+ <l n="110">Non si ipsum flueret nectar in ora, tibi.</l>
227
+
228
+ <l n="111">Quo sola ergo fugis, comitem aspernata maritum?</l>
229
+
230
+ <l n="112">In rapidos abeunt cur tua dicta Notos?</l>
231
+
232
+ <l n="113">Sed non sola fugis: me tecum, ubicumque moraris,</l>
233
+
234
+ <l n="114">Qui bene nos junxit, vivere cogit amor.</l>
235
+
236
+ <l n="115">Una mei pars tecum abiit, minor altera mecum est,</l>
237
+
238
+ <l n="116">Pars abiit melior, plena dolore manet.</l>
239
+
240
+ <l n="117">Ast haec ipsa brevi tua post vestigia curret,</l>
241
+
242
+ <l n="118">A majore nequit pars procul esse minor.</l>
243
+
244
+ <l n="119">O volet illa dies, quae partes uniat ambas,</l>
245
+
246
+ <l n="120">Meque addat comitem tempus in omne tibi!</l>
247
+
248
+ <l n="121">Mors tua divisit, mea nos conjungat, et urna</l>
249
+
250
+ <l n="122">Consociet, thalamus quos sociare nequit.</l>
251
+ </lg>
252
+ </div>
253
+ </body>
254
+ </text>
255
+ </TEI>
@@ -1,6 +1,10 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe LLT::Segmenter do
4
+ def load_fixture(filename)
5
+ File.read(File.expand_path("../../../fixtures/#{filename}", __FILE__))
6
+ end
7
+
4
8
  let(:segmenter) { LLT::Segmenter.new }
5
9
  describe "#segment" do
6
10
  it "returns an array of LLT::Sentence elements" do
@@ -102,6 +106,7 @@ describe LLT::Segmenter do
102
106
  it "doesn't break with punctuation in element names II" do
103
107
  txt = '<grc.test>text.</grc.test> text 2.'
104
108
  sentences = segmenter.segment(txt, xml: true)
109
+ puts sentences
105
110
  sentences.should have(2).items
106
111
  sentences[0].to_s.should == '<grc.test>text.</grc.test>'
107
112
  sentences[1].to_s.should == 'text 2.'
@@ -140,6 +145,19 @@ describe LLT::Segmenter do
140
145
  sentences = segmenter.segment(txt, xml: true)
141
146
  sentences.should have(1).item
142
147
  end
148
+
149
+ it "doesn't fall with multiple closing tags at the end" do
150
+ txt = '<div type="div1" xml:id="c097"> <l>Numen inest vati, vatum mens consona caelo est, </l> <l n="100">Nec certus scit fallere Apollo. </l> </div>'
151
+ sentences = segmenter.segment(txt, xml: true)
152
+ puts sentences
153
+ sentences.should have(1).item
154
+ end
155
+
156
+ it "doesn't fall with empty tags" do
157
+ txt = '<div type="div1" xml:id="c097"> <l>Numen inest vati, vatum mens consona caelo est, </l> <l n="100">Nec certus scit fallere Apollo. </l> <milestone unit="page" n="210"/> </div>'
158
+ sentences = segmenter.segment(txt, xml: true)
159
+ sentences.should have(1).item
160
+ end
143
161
  end
144
162
 
145
163
  context "with xml escaped characters" do
@@ -163,6 +181,16 @@ describe LLT::Segmenter do
163
181
  sentences.should have(2).item
164
182
  sentences[1].to_s.should == 'success.'
165
183
  end
184
+
185
+ describe "when CGI.unescaping HTML characters" do
186
+ it "acknowledges &apos; as potentially trailing delimiter" do
187
+ txt = '&apos;text.&apos; success.'
188
+ unescaped = CGI.unescapeHTML(txt)
189
+ sentences = segmenter.segment(unescaped)
190
+ sentences.should have(2).item
191
+ sentences[1].to_s.should == 'success.'
192
+ end
193
+ end
166
194
  end
167
195
 
168
196
  context "newline (\\n) handling" do
@@ -319,6 +347,36 @@ describe LLT::Segmenter do
319
347
  end
320
348
  end
321
349
 
350
+ context "with full TEI files" do
351
+ it "doesn't go into an endless loop when something is wrong" do
352
+ txt = load_fixture('petrov_eleg01_with_endless_loop.xml')
353
+ sentences = segmenter.segment(txt, xml: true)
354
+ sentences.should_not be_empty
355
+ sentences.should have(60).items
356
+ end
357
+
358
+ it "example II" do
359
+ txt = load_fixture('petrov_eleg01_with_endless_loop_no_xml_header.xml')
360
+ sentences = segmenter.segment(txt, xml: true)
361
+ sentences.should_not be_empty
362
+ sentences.should have(60).items
363
+ end
364
+
365
+ it "example III" do
366
+ txt = load_fixture('petrov_eleg01_cleaned.xml')
367
+ sentences = segmenter.segment(txt, xml: true)
368
+ sentences.should_not be_empty
369
+ sentences.should have(60).items
370
+ end
371
+
372
+ it "example IV" do
373
+ txt = load_fixture('petrov_eleg02_with_internal_error.xml')
374
+ sentences = segmenter.segment(txt, xml: true)
375
+ sentences.should_not be_empty
376
+ sentences.should have(74).items
377
+ end
378
+ end
379
+
322
380
  describe "takes an optional keyword argument add_to" do
323
381
  class ParagraphDummy
324
382
  attr_reader :sentences
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llt-segmenter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gernot Höflechner, Robert Lichstensteiner, Christof Sirk
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-11 00:00:00.000000000 Z
11
+ date: 2014-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -42,16 +42,16 @@ dependencies:
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '2.14'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '2.14'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: simplecov
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -127,8 +127,13 @@ files:
127
127
  - lib/llt/segmenter.rb
128
128
  - lib/llt/segmenter/api.rb
129
129
  - lib/llt/segmenter/version.rb
130
+ - lib/llt/segmenter/version_info.rb
130
131
  - lib/llt/sentence.rb
131
132
  - llt-segmenter.gemspec
133
+ - spec/fixtures/petrov_eleg01_cleaned.xml
134
+ - spec/fixtures/petrov_eleg01_with_endless_loop.xml
135
+ - spec/fixtures/petrov_eleg01_with_endless_loop_no_xml_header.xml
136
+ - spec/fixtures/petrov_eleg02_with_internal_error.xml
132
137
  - spec/lib/llt/segmenter/api_spec.rb
133
138
  - spec/lib/llt/segmenter_spec.rb
134
139
  - spec/spec_helper.rb
@@ -157,6 +162,10 @@ signing_key:
157
162
  specification_version: 4
158
163
  summary: Segments text into sentences
159
164
  test_files:
165
+ - spec/fixtures/petrov_eleg01_cleaned.xml
166
+ - spec/fixtures/petrov_eleg01_with_endless_loop.xml
167
+ - spec/fixtures/petrov_eleg01_with_endless_loop_no_xml_header.xml
168
+ - spec/fixtures/petrov_eleg02_with_internal_error.xml
160
169
  - spec/lib/llt/segmenter/api_spec.rb
161
170
  - spec/lib/llt/segmenter_spec.rb
162
171
  - spec/spec_helper.rb