lingo 1.8.5 → 1.8.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +25 -0
  3. data/README +7 -5
  4. data/Rakefile +58 -55
  5. data/{lingo-call.cfg → config/lingo-call.cfg} +1 -1
  6. data/{lingo.cfg → config/lingo.cfg} +10 -2
  7. data/{lir.cfg → config/lir.cfg} +10 -2
  8. data/{de → dict/de}/lingo-abk.txt +0 -0
  9. data/{de → dict/de}/lingo-dic.txt +0 -0
  10. data/{de → dict/de}/lingo-mul.txt +0 -0
  11. data/{de → dict/de}/lingo-syn.txt +0 -0
  12. data/{de → dict/de}/test_dic.txt +0 -0
  13. data/{de → dict/de}/test_gen.txt +0 -0
  14. data/{de → dict/de}/test_mu2.txt +0 -0
  15. data/{de → dict/de}/test_mul.txt +0 -0
  16. data/{de → dict/de}/test_sgw.txt +0 -0
  17. data/{de → dict/de}/test_syn.txt +0 -0
  18. data/{de → dict/de}/user-dic.txt +0 -0
  19. data/{en → dict/en}/lingo-dic.txt +0 -0
  20. data/{en → dict/en}/lingo-irr.txt +0 -0
  21. data/{en → dict/en}/lingo-mul.txt +0 -0
  22. data/{en → dict/en}/lingo-syn.txt +0 -0
  23. data/{en → dict/en}/lingo-wdn.txt +0 -0
  24. data/{en → dict/en}/user-dic.txt +0 -0
  25. data/{ru → dict/ru}/lingo-dic.txt +0 -0
  26. data/{ru → dict/ru}/lingo-mul.txt +0 -0
  27. data/{ru → dict/ru}/lingo-syn.txt +0 -0
  28. data/{ru → dict/ru}/user-dic.txt +0 -0
  29. data/{de.lang → lang/de.lang} +1 -1
  30. data/{en.lang → lang/en.lang} +0 -0
  31. data/{ru.lang → lang/ru.lang} +0 -0
  32. data/lib/lingo.rb +14 -15
  33. data/lib/lingo/app.rb +4 -2
  34. data/lib/lingo/attendee.rb +23 -43
  35. data/lib/lingo/attendee/abbreviator.rb +5 -5
  36. data/lib/lingo/attendee/debugger.rb +39 -12
  37. data/lib/lingo/attendee/decomposer.rb +3 -4
  38. data/lib/lingo/attendee/dehyphenizer.rb +4 -4
  39. data/lib/lingo/attendee/formatter.rb +1 -3
  40. data/lib/lingo/attendee/multi_worder.rb +3 -4
  41. data/lib/lingo/attendee/noneword_filter.rb +8 -12
  42. data/lib/lingo/attendee/object_filter.rb +6 -3
  43. data/lib/lingo/attendee/sequencer.rb +5 -5
  44. data/lib/lingo/attendee/stemmer.rb +3 -2
  45. data/lib/lingo/attendee/synonymer.rb +3 -4
  46. data/lib/lingo/attendee/text_reader.rb +39 -38
  47. data/lib/lingo/attendee/text_writer.rb +10 -10
  48. data/lib/lingo/attendee/tokenizer.rb +63 -33
  49. data/lib/lingo/attendee/variator.rb +3 -7
  50. data/lib/lingo/attendee/vector_filter.rb +132 -65
  51. data/lib/lingo/attendee/word_searcher.rb +5 -3
  52. data/lib/lingo/buffered_attendee.rb +1 -3
  53. data/lib/lingo/call.rb +4 -3
  54. data/lib/lingo/cli.rb +5 -1
  55. data/lib/lingo/config.rb +11 -5
  56. data/lib/lingo/ctl.rb +3 -3
  57. data/lib/lingo/database.rb +3 -1
  58. data/lib/lingo/database/crypter.rb +1 -3
  59. data/lib/lingo/database/source.rb +3 -1
  60. data/lib/lingo/database/source/key_value.rb +3 -1
  61. data/lib/lingo/database/source/multi_key.rb +3 -1
  62. data/lib/lingo/database/source/multi_value.rb +3 -1
  63. data/lib/lingo/database/source/single_word.rb +3 -1
  64. data/lib/lingo/database/source/word_class.rb +3 -1
  65. data/lib/lingo/debug.rb +5 -5
  66. data/lib/lingo/{agenda_item.rb → deferred_attendee.rb} +21 -12
  67. data/lib/lingo/error.rb +1 -1
  68. data/lib/lingo/language.rb +1 -9
  69. data/lib/lingo/language/dictionary.rb +2 -17
  70. data/lib/lingo/language/grammar.rb +10 -10
  71. data/lib/lingo/language/lexical.rb +2 -0
  72. data/lib/lingo/language/lexical_hash.rb +2 -0
  73. data/lib/lingo/language/token.rb +17 -3
  74. data/lib/lingo/language/word.rb +13 -5
  75. data/lib/lingo/language/word_form.rb +5 -3
  76. data/lib/lingo/progress.rb +2 -2
  77. data/lib/lingo/srv.rb +1 -1
  78. data/lib/lingo/srv/lingosrv.cfg +1 -1
  79. data/lib/lingo/version.rb +1 -1
  80. data/lib/lingo/web.rb +1 -1
  81. data/lib/lingo/web/lingoweb.cfg +1 -1
  82. data/test/attendee/ts_abbreviator.rb +4 -2
  83. data/test/attendee/ts_multi_worder.rb +81 -88
  84. data/test/attendee/ts_noneword_filter.rb +2 -2
  85. data/test/attendee/ts_object_filter.rb +2 -2
  86. data/test/attendee/ts_sequencer.rb +40 -20
  87. data/test/attendee/ts_stemmer.rb +52 -26
  88. data/test/attendee/ts_text_reader.rb +75 -56
  89. data/test/attendee/ts_text_writer.rb +6 -4
  90. data/test/attendee/ts_tokenizer.rb +304 -193
  91. data/test/attendee/ts_vector_filter.rb +242 -9
  92. data/test/ref/artikel.non +3 -0
  93. data/test/ref/artikel.vec +1 -4
  94. data/test/ref/artikel.vef +940 -0
  95. data/test/ref/artikel.ven +0 -3
  96. data/test/ref/artikel.ver +0 -3
  97. data/test/ref/artikel.vet +2580 -0
  98. data/test/ref/lir.non +34 -31
  99. data/test/ref/lir.seq +14 -15
  100. data/test/ref/lir.vec +37 -37
  101. data/test/ref/lir.vef +329 -0
  102. data/test/ref/lir.ven +329 -0
  103. data/test/ref/lir.ver +329 -0
  104. data/test/ref/lir.vet +329 -0
  105. data/test/test_helper.rb +29 -16
  106. data/test/ts_language.rb +6 -47
  107. metadata +74 -87
  108. data/lingo.rb +0 -29
  109. data/spec/spec_helper.rb +0 -5
@@ -3,7 +3,7 @@
3
3
  require 'test/unit'
4
4
  require 'lingo'
5
5
 
6
- class LingoTestCase < Test::Unit::TestCase
6
+ class LingoTestCase < Test::Unit::TestCase
7
7
 
8
8
  unless const_defined?(:TEST_FILE)
9
9
  TEST_FILE = 'test/de/test.txt'
@@ -20,12 +20,18 @@ class LingoTestCase < Test::Unit::TestCase
20
20
  [a || '', b || '', *c]
21
21
  end
22
22
 
23
+ def li(t, o)
24
+ ["#{t}\r\n", o]
25
+ end
26
+
23
27
  def ai(t)
24
- Lingo::AgendaItem.new(*split(t))
28
+ i = t.split('|')
29
+ i.unshift(i.shift.to_sym)
25
30
  end
26
31
 
27
32
  def tk(t)
28
- Lingo::Language::Token.new(*split(t, /\|(?=[A-Z])/))
33
+ a, b, *c = split(t, /\|(?=[A-Z\d])/)
34
+ Lingo::Language::Token.new(a, b, *c.map(&:to_i))
29
35
  end
30
36
 
31
37
  def lx(t)
@@ -55,8 +61,15 @@ class AttendeeTestCase < LingoTestCase
55
61
  @lingo.reset
56
62
 
57
63
  list = [{ @attendee => cfg }]
58
- list.unshift 'TestSpooler' => { 'out' => 'input', 'input' => input } if input
59
- list.push 'TestDumper' => { 'in' => 'output', 'output' => output = [] } if expect
64
+
65
+ list.unshift('TestSpooler' => {
66
+ 'out' => 'input',
67
+ 'input' => input,
68
+ 'pos' => @attendee == 'Tokenizer' }) if input
69
+
70
+ list.push('TestDumper' => {
71
+ 'in' => 'output',
72
+ 'output' => output = [] }) if expect
60
73
 
61
74
  @lingo.invite(list)
62
75
  @lingo.start
@@ -74,32 +87,31 @@ class Lingo
74
87
 
75
88
  class TestSpooler < self
76
89
 
77
- protected
78
-
79
90
  def init
80
- @input = get_key('input')
91
+ @input, @pos = get_key('input'), get_key('pos', nil) && 0
81
92
  end
82
93
 
83
- def control(cmd, param)
84
- @input.each { |i| forward(i) } if cmd == STR_CMD_TALK
94
+ def control(cmd)
95
+ if cmd == :TALK
96
+ @input.each { |i| i.is_a?(Array) ? command(*i) :
97
+ @pos ? forward(i, @pos += i.bytesize) : forward(i) }
98
+ end
85
99
  end
86
100
 
87
101
  end
88
102
 
89
103
  class TestDumper < self
90
104
 
91
- protected
92
-
93
105
  def init
94
106
  @output = get_key('output')
95
107
  end
96
108
 
97
- def control(cmd, param)
98
- @output << AgendaItem.new(cmd, param)
109
+ def control(*args)
110
+ @output.push(args)
99
111
  end
100
112
 
101
- def process(obj)
102
- @output << obj
113
+ def process(obj, *rest)
114
+ @output.push(rest.empty? ? obj : rest.unshift(obj))
103
115
  end
104
116
 
105
117
  end
@@ -115,4 +127,5 @@ class Lingo
115
127
  end
116
128
 
117
129
  end
130
+
118
131
  end
@@ -23,8 +23,6 @@ class TestLexicalHash < LingoTestCase
23
23
  $stderr = old_stderr
24
24
  end
25
25
 
26
- # TODO: Crypt testen...
27
-
28
26
  def test_cache
29
27
  lh('sys-dic') { |ds|
30
28
  assert_equal([lx('regen|s|m'), lx('regen|s|n'), lx('regen|v'), lx('rege|a')], ds['regen'])
@@ -38,11 +36,9 @@ class TestLexicalHash < LingoTestCase
38
36
 
39
37
  lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
40
38
 
41
- # Keine Store-Datei vorhanden, nur Text vorhanden
42
39
  File.delete(*Dir["#{Lingo.find(:store, txt_file)}.*"])
43
40
  lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
44
41
 
45
- # Store vorhanden, aber Text ist neuer
46
42
  lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
47
43
  end
48
44
 
@@ -59,8 +55,8 @@ class TestLexicalHash < LingoTestCase
59
55
  assert_equal([lx('abelscher ring ohne nullteiler|m')], ds['abelscher ring ohne nullteiler'])
60
56
  assert_equal([4], ds['abelscher ring ohne'])
61
57
  assert_equal([lx('alleinreisende frau|m')], ds['alleinreisend frau'])
62
- assert_equal([lx('abschaltbarer leistungshalbleiter|m')], ds['abschaltbar leistungshalbleiter'])
63
- assert_equal(nil, ds['abschaltbarer leistungshalbleiter'])
58
+ assert_equal([lx('abschaltbarer leistungshalbleiter|m')], ds['abschaltbarer leistungshalbleiter'])
59
+ assert_equal(nil, ds['abschaltbar leistungshalbleiter'])
64
60
  }
65
61
  end
66
62
 
@@ -113,12 +109,6 @@ class TestDictionary < LingoTestCase
113
109
  end
114
110
 
115
111
  def test_params
116
- # Keine Sprach-Konfiguration angegeben
117
- #assert_raise(RuntimeError) {
118
- # Lingo::Language::Dictionary.new({ 'source' => %w[sys-dic] }, @lingo)
119
- #}
120
-
121
- # Falsche Parameter angegeben (Pflichtparameter ohne Defaultwert)
122
112
  assert_raise(ArgumentError) {
123
113
  Lingo::Language::Dictionary.new({ 'course' => %w[sys-dic] }, @lingo)
124
114
  }
@@ -142,39 +132,27 @@ class TestDictionary < LingoTestCase
142
132
 
143
133
  def test_select_two_sources_mode_first
144
134
  ld('source' => %w[sys-dic tst-dic], 'mode' => 'first') { |dic|
145
- # in keiner Quelle vorhanden
146
135
  assert_equal([], dic.select('hasennasen'))
147
- # nur in erster Quelle vorhanden
148
136
  assert_equal([lx('knaller|s')], dic.select('knaller'))
149
- # nur in zweiter Quelle vorhanden
150
137
  assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
151
- # in beiden Quellen vorhanden
152
138
  assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n')], dic.select('a-dur'))
153
139
  }
154
140
  end
155
141
 
156
142
  def test_select_two_sources_mode_first_flipped
157
143
  ld('source' => %w[tst-dic sys-dic], 'mode' => 'first') { |dic|
158
- # in keiner Quelle vorhanden
159
144
  assert_equal([], dic.select('hasennasen'))
160
- # nur in erster Quelle vorhanden
161
145
  assert_equal([lx('knaller|s')], dic.select('knaller'))
162
- # nur in zweiter Quelle vorhanden
163
146
  assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
164
- # in beiden Quellen vorhanden
165
147
  assert_equal([lx('b-dur|s')], dic.select('a-dur'))
166
148
  }
167
149
  end
168
150
 
169
151
  def test_select_two_sources_mode_all
170
152
  ld('source' => %w[sys-dic tst-dic], 'mode' => 'all') { |dic|
171
- # in keiner Quelle vorhanden
172
153
  assert_equal([], dic.select('hasennasen'))
173
- # nur in erster Quelle vorhanden
174
154
  assert_equal([lx('knaller|s')], dic.select('knaller'))
175
- # nur in zweiter Quelle vorhanden
176
155
  assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
177
- # in beiden Quellen vorhanden
178
156
  assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n'), lx('b-dur|s')], dic.select('a-dur'))
179
157
  assert_equal([lx('aas|s|n'), lx('aas|s')], dic.select('aas'))
180
158
  }
@@ -182,15 +160,11 @@ class TestDictionary < LingoTestCase
182
160
 
183
161
  def test_select_two_sources_mode_default
184
162
  ld('source' => %w[sys-dic tst-dic]) { |dic|
185
- # in keiner Quelle vorhanden
186
163
  assert_equal([], dic.select('hasennasen'))
187
- # nur in erster Quelle vorhanden
188
164
  assert_equal([lx('knaller|s')], dic.select('knaller'))
189
- # nur in zweiter Quelle vorhanden
190
165
  assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
191
166
  assert_equal([lx('wirkungsort|s'), lx('wirkung|s+'), lx('ort|s+')], dic.select('wirkungsort'))
192
167
  assert_equal([lx('zettelkatalog|k'), lx('zettel|s+'), lx('katalog|s+')], dic.select('zettelkatalog'))
193
- # in beiden Quellen vorhanden
194
168
  assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n'), lx('b-dur|s')], dic.select('a-dur'))
195
169
  assert_equal([lx('aas|s|n'), lx('aas|s')], dic.select('aas'))
196
170
  }
@@ -207,7 +181,7 @@ class TestDictionary < LingoTestCase
207
181
 
208
182
  def test_infix_lexicals
209
183
  ld('source' => %w[sys-dic]) { |dic|
210
- assert_equal( [lx('information|f'), lx('informationsen|f')], ax(dic, 'informations', :infix))
184
+ assert_equal([lx('information|f')], ax(dic, 'informations', :infix))
211
185
  }
212
186
  end
213
187
 
@@ -222,7 +196,7 @@ class TestDictionary < LingoTestCase
222
196
 
223
197
  def test_select_with_infix
224
198
  ld('source' => %w[sys-dic]) { |dic|
225
- assert_equal( [lx('information|f'), lx('informationsen|f')], ax(dic, 'informations', :infix))
199
+ assert_equal([lx('information|f')], ax(dic, 'informations', :infix))
226
200
  }
227
201
  end
228
202
 
@@ -250,10 +224,6 @@ class TestGrammar < LingoTestCase
250
224
  @lingo = Lingo.new
251
225
  end
252
226
 
253
- def test_params
254
- # Die gleichen Fälle wie bei Dictionary, daher nicht notwendig
255
- end
256
-
257
227
  def test_cache
258
228
  lg { |gra|
259
229
  assert_equal(
@@ -294,7 +264,7 @@ class TestGrammar < LingoTestCase
294
264
  )
295
265
 
296
266
  assert_equal(
297
- wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzen|v+', 'erforschung|s+'),
267
+ wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzer|s+', 'forschung|s+'),
298
268
  gra.find_compound('benutzerforschung')
299
269
  )
300
270
 
@@ -318,64 +288,53 @@ class TestGrammar < LingoTestCase
318
288
  gra.find_compound('titelbestandsbestände')
319
289
  )
320
290
 
321
- # hinterer Teil ist ein Wort mit Suffix
322
291
  assert_equal(
323
292
  wd('hasenbraten|KOM', 'hasenbraten|k', 'hase|s+', 'braten|v+'),
324
293
  gra.find_compound('hasenbraten')
325
294
  )
326
295
 
327
- # hinterer Teil ist ein Wort mit Infix ohne Schwanz
328
296
  assert_equal(
329
297
  wd('nasenlaufen|KOM', 'nasenlaufen|k', 'nase|s+', 'laufen|v+'),
330
298
  gra.find_compound('nasenlaufen')
331
299
  )
332
300
 
333
- # hinterer Teil ist ein Wort mit Infix mit Schwanz
334
301
  assert_equal(
335
302
  wd('nasenlaufens|KOM', 'nasenlaufen|k', 'nase|s+', 'laufen|v+'),
336
303
  gra.find_compound('nasenlaufens')
337
304
  )
338
305
 
339
- # hinterer Teil ist ein Kompositum nach Bindestrich
340
306
  assert_equal(
341
307
  wd('arrafat-nachfolgebedarf|KOM', 'arrafat-nachfolgebedarf|k', 'arrafat|x+', 'nachfolge|s+', 'bedarf|s+'),
342
308
  gra.find_compound('arrafat-nachfolgebedarf')
343
309
  )
344
310
 
345
- # hinterer Teil ist ein TakeItAsIs nach Bindestrich
346
311
  assert_equal(
347
312
  wd('nachfolge-arrafat|KOM', 'nachfolge-arrafat|k', 'nachfolge|s+', 'arrafat|x+'),
348
313
  gra.find_compound('nachfolge-arrafat')
349
314
  )
350
315
 
351
- # vorderer Teil ist ein Wort mit Suffix => siehe Hasenbraten
352
- # vorderer Teil ist ein Kompositum
353
316
  assert_equal(
354
317
  wd('morgenonkelmantel|KOM', 'morgenonkelmantel|k', 'morgen|w+', 'morgen|s+', 'onkel|s+', 'mantel|s+'),
355
318
  gra.find_compound('morgenonkelmantel')
356
319
  )
357
320
 
358
- # vorderer Teil ist ein TakeItAsIs vor Bindestrich / bindestrichversion
359
321
  assert_equal(
360
322
  wd('arrafat-nachfolger|KOM', 'arrafat-nachfolger|k', 'arrafat|x+', 'nachfolger|s+'),
361
323
  gra.find_compound('arrafat-nachfolger')
362
324
  )
363
325
 
364
- # bindestrichversion zwei-teilig
365
326
  assert_equal(
366
327
  wd('cd-rom-technologie|KOM', 'cd-rom-technologie|k', 'cd-rom|s+|f', 'cd-rom|s+|m', 'technologie|s+|f'),
367
328
  gra.find_compound('cd-rom-technologie')
368
329
  )
369
330
 
370
- # bindestrichversion drei-teilig
371
331
  assert_equal(
372
332
  wd('albert-ludwigs-universität|KOM', 'albert-ludwigs-universität|k', 'albert|e+', 'ludwig|e+', 'universität|s+'),
373
333
  gra.find_compound('albert-ludwigs-universität')
374
334
  )
375
335
 
376
- # normal mit suggestion
377
336
  assert_equal(
378
- wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzen|v+', 'erforschung|s+'),
337
+ wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzer|s+', 'forschung|s+'),
379
338
  gra.find_compound('benutzerforschung')
380
339
  )
381
340
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lingo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.5
4
+ version: 1.8.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Vorhauer
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-10-02 00:00:00.000000000 Z
12
+ date: 2015-02-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: cyclops
@@ -17,34 +17,28 @@ dependencies:
17
17
  requirements:
18
18
  - - "~>"
19
19
  - !ruby/object:Gem::Version
20
- version: '0.0'
21
- - - ">="
22
- - !ruby/object:Gem::Version
23
- version: 0.0.4
20
+ version: '0.1'
24
21
  type: :runtime
25
22
  prerelease: false
26
23
  version_requirements: !ruby/object:Gem::Requirement
27
24
  requirements:
28
25
  - - "~>"
29
26
  - !ruby/object:Gem::Version
30
- version: '0.0'
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: 0.0.4
27
+ version: '0.1'
34
28
  - !ruby/object:Gem::Dependency
35
29
  name: nuggets
36
30
  requirement: !ruby/object:Gem::Requirement
37
31
  requirements:
38
32
  - - "~>"
39
33
  - !ruby/object:Gem::Version
40
- version: '1.0'
34
+ version: '1.1'
41
35
  type: :runtime
42
36
  prerelease: false
43
37
  version_requirements: !ruby/object:Gem::Requirement
44
38
  requirements:
45
39
  - - "~>"
46
40
  - !ruby/object:Gem::Version
47
- version: '1.0'
41
+ version: '1.1'
48
42
  - !ruby/object:Gem::Dependency
49
43
  name: rubyzip
50
44
  requirement: !ruby/object:Gem::Requirement
@@ -119,16 +113,22 @@ dependencies:
119
113
  name: hen
120
114
  requirement: !ruby/object:Gem::Requirement
121
115
  requirements:
116
+ - - "~>"
117
+ - !ruby/object:Gem::Version
118
+ version: '0.8'
122
119
  - - ">="
123
120
  - !ruby/object:Gem::Version
124
- version: '0'
121
+ version: 0.8.1
125
122
  type: :development
126
123
  prerelease: false
127
124
  version_requirements: !ruby/object:Gem::Requirement
128
125
  requirements:
126
+ - - "~>"
127
+ - !ruby/object:Gem::Version
128
+ version: '0.8'
129
129
  - - ">="
130
130
  - !ruby/object:Gem::Version
131
- version: '0'
131
+ version: 0.8.1
132
132
  - !ruby/object:Gem::Dependency
133
133
  name: rake
134
134
  requirement: !ruby/object:Gem::Requirement
@@ -144,7 +144,7 @@ dependencies:
144
144
  - !ruby/object:Gem::Version
145
145
  version: '0'
146
146
  - !ruby/object:Gem::Dependency
147
- name: rspec
147
+ name: test-unit
148
148
  requirement: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - ">="
@@ -189,27 +189,34 @@ files:
189
189
  - bin/lingoctl
190
190
  - bin/lingosrv
191
191
  - bin/lingoweb
192
- - de.lang
193
- - de/lingo-abk.txt
194
- - de/lingo-dic.txt
195
- - de/lingo-mul.txt
196
- - de/lingo-syn.txt
197
- - de/test_dic.txt
198
- - de/test_gen.txt
199
- - de/test_mu2.txt
200
- - de/test_mul.txt
201
- - de/test_sgw.txt
202
- - de/test_syn.txt
203
- - de/user-dic.txt
204
- - en.lang
205
- - en/lingo-dic.txt
206
- - en/lingo-irr.txt
207
- - en/lingo-mul.txt
208
- - en/lingo-syn.txt
209
- - en/lingo-wdn.txt
210
- - en/user-dic.txt
192
+ - config/lingo-call.cfg
193
+ - config/lingo.cfg
194
+ - config/lir.cfg
195
+ - dict/de/lingo-abk.txt
196
+ - dict/de/lingo-dic.txt
197
+ - dict/de/lingo-mul.txt
198
+ - dict/de/lingo-syn.txt
199
+ - dict/de/test_dic.txt
200
+ - dict/de/test_gen.txt
201
+ - dict/de/test_mu2.txt
202
+ - dict/de/test_mul.txt
203
+ - dict/de/test_sgw.txt
204
+ - dict/de/test_syn.txt
205
+ - dict/de/user-dic.txt
206
+ - dict/en/lingo-dic.txt
207
+ - dict/en/lingo-irr.txt
208
+ - dict/en/lingo-mul.txt
209
+ - dict/en/lingo-syn.txt
210
+ - dict/en/lingo-wdn.txt
211
+ - dict/en/user-dic.txt
212
+ - dict/ru/lingo-dic.txt
213
+ - dict/ru/lingo-mul.txt
214
+ - dict/ru/lingo-syn.txt
215
+ - dict/ru/user-dic.txt
216
+ - lang/de.lang
217
+ - lang/en.lang
218
+ - lang/ru.lang
211
219
  - lib/lingo.rb
212
- - lib/lingo/agenda_item.rb
213
220
  - lib/lingo/app.rb
214
221
  - lib/lingo/attendee.rb
215
222
  - lib/lingo/attendee/abbreviator.rb
@@ -249,6 +256,7 @@ files:
249
256
  - lib/lingo/database/source/single_word.rb
250
257
  - lib/lingo/database/source/word_class.rb
251
258
  - lib/lingo/debug.rb
259
+ - lib/lingo/deferred_attendee.rb
252
260
  - lib/lingo/error.rb
253
261
  - lib/lingo/language.rb
254
262
  - lib/lingo/language/char.rb
@@ -271,16 +279,6 @@ files:
271
279
  - lib/lingo/web/public/lingo.png
272
280
  - lib/lingo/web/public/lingoweb.css
273
281
  - lib/lingo/web/views/index.erb
274
- - lingo-call.cfg
275
- - lingo.cfg
276
- - lingo.rb
277
- - lir.cfg
278
- - ru.lang
279
- - ru/lingo-dic.txt
280
- - ru/lingo-mul.txt
281
- - ru/lingo-syn.txt
282
- - ru/user-dic.txt
283
- - spec/spec_helper.rb
284
282
  - test/attendee/ts_abbreviator.rb
285
283
  - test/attendee/ts_decomposer.rb
286
284
  - test/attendee/ts_multi_worder.rb
@@ -304,13 +302,19 @@ files:
304
302
  - test/ref/artikel.seq
305
303
  - test/ref/artikel.syn
306
304
  - test/ref/artikel.vec
305
+ - test/ref/artikel.vef
307
306
  - test/ref/artikel.ven
308
307
  - test/ref/artikel.ver
308
+ - test/ref/artikel.vet
309
309
  - test/ref/lir.mul
310
310
  - test/ref/lir.non
311
311
  - test/ref/lir.seq
312
312
  - test/ref/lir.syn
313
313
  - test/ref/lir.vec
314
+ - test/ref/lir.vef
315
+ - test/ref/lir.ven
316
+ - test/ref/lir.ver
317
+ - test/ref/lir.vet
314
318
  - test/test_helper.rb
315
319
  - test/ts_database.rb
316
320
  - test/ts_language.rb
@@ -324,51 +328,34 @@ licenses:
324
328
  metadata: {}
325
329
  post_install_message: |2+
326
330
 
327
- lingo-1.8.5 [2014-10-02]:
331
+ lingo-1.8.6 [2015-02-09]:
328
332
 
329
- * Dictionary values (projections) are no longer sorted; hence, order of
330
- definition affects processing.
331
- * Lexicals in Lingo::Language::Word are no longer sorted; in particular,
332
- compound parts keep their original order.
333
- * Lexicals in Lingo::Language::Word are no longer cleaned from duplicates.
334
- * Compiled dictionaries are updated whenever the Lingo version or their
335
- configuration changes, not only when the source file's size or modification
336
- time changes.
337
- * Lingo::Attendee::Synonymer learned <tt>compound-parts</tt> option to also
338
- generate synonyms for compound parts when set to +true+.
339
- * Lingo::Attendee::TextReader learned better PDF-to-text conversion using the
340
- +pdftotext+ command; specify <tt>filter: pdftotext</tt> in the config.
341
- * Lingo::Attendee::VectorFilter learned +dict+ option to print words in
342
- dictionary format (viz. Lingo::Database::Source::WordClass).
343
- * Lingo::Attendee::VectorFilter learned +preamble+ option to print current
344
- configuration to the beginning of the log file (<tt>debug: 'true'</tt>);
345
- set <tt>preamble: false</tt> to disable.
346
- * Multiword dictionaries compiled from base forms can now generate inflected
347
- adjectives based on the gender of the head noun; set <tt>inflect: true</tt>
348
- in the dictionary config.
349
- * Lingo::Database::Source::WordClass supports gender information being encoded
350
- in the dictionary as well as shorthand notation for multiple word
351
- classes/genders.
352
- * Lingo::Database::Source::WordClass supports compounds being encoded in the
353
- dictionary (appending <tt>+</tt> to their parts' word classes is
354
- recommended).
355
- * Lingo::Database::Source removes leading and trailing whitespace from
356
- dictionary lines.
357
- * Lingo::Database::Crypter uses OpenSSL to encrypt/decrypt dictionaries.
358
- Note: Can't decrypt dictionaries encrypted with the old scheme anymore.
359
- * Lingo::Attendee::Tokenizer learned subset of MediaWiki syntax.
360
- * Eliminated pathological behaviour of the +URLS+ rule in
361
- Lingo::Attendee::Tokenizer.
362
- * Fixed regression introduced in 1.8.2 where <tt>combine: all</tt> would no
363
- longer work in Lingo::Attendee::MultiWorder.
364
- * Updated and extended Russian dictionaries. (Yulia Dorokhova, Thomas Müller)
365
- * +lingoctl+ no longer overwrites existing files without confirmation.
366
- * +lingoctl+ learned +archive+ command.
367
- * Dictionary cleanup.
333
+ * Lingo::Attendee::VectorFilter learned +pos+ option to print position and
334
+ byte offset with each word.
335
+ * Lingo::Attendee::VectorFilter learned +tfidf+ option to sort results based
336
+ on their tf–idf[https://en.wikipedia.org/wiki/Tf–idf] score; the document
337
+ frequencies are calculated over the "corpus" of all files processed during
338
+ a single program invocation.
339
+ * Lingo::Attendee::VectorFilter learned +tokens+ option to filter on
340
+ Lingo::Language::Token in addition to Lingo::Language::Word.
341
+ * Lingo::Attendee::VectorFilter no longer supports +debug+ (as well as
342
+ +prompt+ and +preamble+); use Lingo::Attendee::DebugFilter instead.
343
+ * Lingo::Attendee::TextReader no longer removes line endings; option +chomp+
344
+ is obsolete.
345
+ * Lingo::Attendee::TextReader passes byte offset to the following attendee.
346
+ * Lingo::Attendee::Tokenizer records token's byte offset.
347
+ * Lingo::Attendee::Tokenizer records token's sequence position.
348
+ * Lingo::Attendee::Tokenizer learned <tt>skip-tags</tt> option to skip over
349
+ specified tags' contents.
350
+ * Lingo::Attendee subclasses warn when invalid or obsolete options or names
351
+ are used.
352
+ * Changed German infix substitution +/en+ to +ch/chen+ in order to prevent
353
+ overly aggressive identifications.
354
+ * Internal refactoring and API changes.
368
355
 
369
356
  rdoc_options:
370
357
  - "--title"
371
- - lingo Application documentation (v1.8.5)
358
+ - lingo Application documentation (v1.8.6)
372
359
  - "--charset"
373
360
  - UTF-8
374
361
  - "--line-numbers"
@@ -389,7 +376,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
389
376
  version: '0'
390
377
  requirements: []
391
378
  rubyforge_project:
392
- rubygems_version: 2.4.2
379
+ rubygems_version: 2.4.5
393
380
  signing_key:
394
381
  specification_version: 4
395
382
  summary: The full-featured automatic indexing system