llt-tokenizer 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9a9abfc5e79b148f497749053c8ccfa7ac9653af
4
- data.tar.gz: 1c9fe20eb2824eccc1840602beae6552415eb5d2
3
+ metadata.gz: cd490c0611dc9cd5ed2f2aad95631b500e7e7d35
4
+ data.tar.gz: fe8162ded4cecfda383f3919576ec19ca14f5a38
5
5
  SHA512:
6
- metadata.gz: 3cd367d754d75f895240c709aed9697140c8359490bc634e56f118b77cc015c2a08c80d7fa4fa74448084844beec4749a7b01b1789c0805a3a5a8fa8d465d5e9
7
- data.tar.gz: 21c50a75955cab805fb81bc1435963e047171936c015981121de1405378fb4af9c21a69153c0c043d3a504986e1022437690cedb60b88e0b8246ca6fce20565b
6
+ metadata.gz: a8a90a133ccd0c27fbbd20e64df0a8e560ad183010ae55716362e0909c737a6d5359c51a647da387ddade71a3bd50545381c899cd3fcea095001ef0e27c42483
7
+ data.tar.gz: 2d81412a66b473206d23aec3128d3321ff736f3a9d5ab8fca0349bcc347e8d692b3ee33d0fd2c67f87f031335113c1a54caef80b5aa0a4eaca4c5bd2320d6778
@@ -1,5 +1,5 @@
1
1
  module LLT
2
2
  class Tokenizer
3
- VERSION = "0.0.6"
3
+ VERSION = "0.0.7"
4
4
  end
5
5
  end
data/lib/llt/tokenizer.rb CHANGED
@@ -169,7 +169,7 @@ module LLT
169
169
  ENCLITICS = %w{ que ne ve c }
170
170
  def split_enklitika_and_change_their_position
171
171
  split_with_force
172
- split_nec
172
+ split_nec_and_oute
173
173
  make_frequent_corrections
174
174
  end
175
175
 
@@ -202,16 +202,22 @@ module LLT
202
202
  "#{@enclitics_marker}#{val}"
203
203
  end
204
204
 
205
- def split_nec
206
- indices = []
205
+ def split_nec_and_oute
206
+ nec_indices = []
207
+ oute_indices = []
207
208
  @worker.each_with_index do |token, i|
208
- if token =~ /^nec$/i
209
+ case token
210
+ when /^nec$/i
209
211
  token.slice!(-1)
210
- indices << (i + indices.size + @shift_range)
212
+ nec_indices << (i + nec_indices.size + @shift_range)
213
+ when /^οὐτε$/i
214
+ token.slice!(-2, 2)
215
+ oute_indices << (i + oute_indices.size + @shift_range)
211
216
  end
212
217
  end
213
218
 
214
- indices.each { |i| @worker.insert(i, enclitic('c')) }
219
+ nec_indices.each { |i| @worker.insert(i, enclitic('c')) }
220
+ oute_indices.each { |i| @worker.insert(i, enclitic('τε')) }
215
221
  end
216
222
 
217
223
  def make_frequent_corrections
@@ -260,6 +260,18 @@ describe LLT::Tokenizer do
260
260
  end
261
261
  end
262
262
  end
263
+
264
+ context "when confronted with -τε" do
265
+ examples = {
266
+ 'οὐτε' => '-τε οὐ'
267
+ }
268
+
269
+ examples.each do |example, expected|
270
+ it "transforms #{example} to #{expected}" do
271
+ enklitika_test(example).should be_transformed_to expected
272
+ end
273
+ end
274
+ end
263
275
  end
264
276
 
265
277
  describe "#merge_what_needs_merging" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llt-tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - LFDM
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-11 00:00:00.000000000 Z
11
+ date: 2014-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -190,7 +190,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
190
190
  version: '0'
191
191
  requirements: []
192
192
  rubyforge_project:
193
- rubygems_version: 2.2.0
193
+ rubygems_version: 2.2.2
194
194
  signing_key:
195
195
  specification_version: 4
196
196
  summary: Breaks latin sentences into tokens