llt-tokenizer 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9a9abfc5e79b148f497749053c8ccfa7ac9653af
4
- data.tar.gz: 1c9fe20eb2824eccc1840602beae6552415eb5d2
3
+ metadata.gz: cd490c0611dc9cd5ed2f2aad95631b500e7e7d35
4
+ data.tar.gz: fe8162ded4cecfda383f3919576ec19ca14f5a38
5
5
  SHA512:
6
- metadata.gz: 3cd367d754d75f895240c709aed9697140c8359490bc634e56f118b77cc015c2a08c80d7fa4fa74448084844beec4749a7b01b1789c0805a3a5a8fa8d465d5e9
7
- data.tar.gz: 21c50a75955cab805fb81bc1435963e047171936c015981121de1405378fb4af9c21a69153c0c043d3a504986e1022437690cedb60b88e0b8246ca6fce20565b
6
+ metadata.gz: a8a90a133ccd0c27fbbd20e64df0a8e560ad183010ae55716362e0909c737a6d5359c51a647da387ddade71a3bd50545381c899cd3fcea095001ef0e27c42483
7
+ data.tar.gz: 2d81412a66b473206d23aec3128d3321ff736f3a9d5ab8fca0349bcc347e8d692b3ee33d0fd2c67f87f031335113c1a54caef80b5aa0a4eaca4c5bd2320d6778
@@ -1,5 +1,5 @@
1
1
  module LLT
2
2
  class Tokenizer
3
- VERSION = "0.0.6"
3
+ VERSION = "0.0.7"
4
4
  end
5
5
  end
data/lib/llt/tokenizer.rb CHANGED
@@ -169,7 +169,7 @@ module LLT
169
169
  ENCLITICS = %w{ que ne ve c }
170
170
  def split_enklitika_and_change_their_position
171
171
  split_with_force
172
- split_nec
172
+ split_nec_and_oute
173
173
  make_frequent_corrections
174
174
  end
175
175
 
@@ -202,16 +202,22 @@ module LLT
202
202
  "#{@enclitics_marker}#{val}"
203
203
  end
204
204
 
205
- def split_nec
206
- indices = []
205
+ def split_nec_and_oute
206
+ nec_indices = []
207
+ oute_indices = []
207
208
  @worker.each_with_index do |token, i|
208
- if token =~ /^nec$/i
209
+ case token
210
+ when /^nec$/i
209
211
  token.slice!(-1)
210
- indices << (i + indices.size + @shift_range)
212
+ nec_indices << (i + nec_indices.size + @shift_range)
213
+ when /^οὐτε$/i
214
+ token.slice!(-2, 2)
215
+ oute_indices << (i + oute_indices.size + @shift_range)
211
216
  end
212
217
  end
213
218
 
214
- indices.each { |i| @worker.insert(i, enclitic('c')) }
219
+ nec_indices.each { |i| @worker.insert(i, enclitic('c')) }
220
+ oute_indices.each { |i| @worker.insert(i, enclitic('τε')) }
215
221
  end
216
222
 
217
223
  def make_frequent_corrections
@@ -260,6 +260,18 @@ describe LLT::Tokenizer do
260
260
  end
261
261
  end
262
262
  end
263
+
264
+ context "when confronted with -τε" do
265
+ examples = {
266
+ 'οὐτε' => '-τε οὐ'
267
+ }
268
+
269
+ examples.each do |example, expected|
270
+ it "transforms #{example} to #{expected}" do
271
+ enklitika_test(example).should be_transformed_to expected
272
+ end
273
+ end
274
+ end
263
275
  end
264
276
 
265
277
  describe "#merge_what_needs_merging" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llt-tokenizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - LFDM
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-11 00:00:00.000000000 Z
11
+ date: 2014-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -190,7 +190,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
190
190
  version: '0'
191
191
  requirements: []
192
192
  rubyforge_project:
193
- rubygems_version: 2.2.0
193
+ rubygems_version: 2.2.2
194
194
  signing_key:
195
195
  specification_version: 4
196
196
  summary: Breaks latin sentences into tokens