lingo 1.8.5 → 1.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +25 -0
  3. data/README +7 -5
  4. data/Rakefile +58 -55
  5. data/{lingo-call.cfg → config/lingo-call.cfg} +1 -1
  6. data/{lingo.cfg → config/lingo.cfg} +10 -2
  7. data/{lir.cfg → config/lir.cfg} +10 -2
  8. data/{de → dict/de}/lingo-abk.txt +0 -0
  9. data/{de → dict/de}/lingo-dic.txt +0 -0
  10. data/{de → dict/de}/lingo-mul.txt +0 -0
  11. data/{de → dict/de}/lingo-syn.txt +0 -0
  12. data/{de → dict/de}/test_dic.txt +0 -0
  13. data/{de → dict/de}/test_gen.txt +0 -0
  14. data/{de → dict/de}/test_mu2.txt +0 -0
  15. data/{de → dict/de}/test_mul.txt +0 -0
  16. data/{de → dict/de}/test_sgw.txt +0 -0
  17. data/{de → dict/de}/test_syn.txt +0 -0
  18. data/{de → dict/de}/user-dic.txt +0 -0
  19. data/{en → dict/en}/lingo-dic.txt +0 -0
  20. data/{en → dict/en}/lingo-irr.txt +0 -0
  21. data/{en → dict/en}/lingo-mul.txt +0 -0
  22. data/{en → dict/en}/lingo-syn.txt +0 -0
  23. data/{en → dict/en}/lingo-wdn.txt +0 -0
  24. data/{en → dict/en}/user-dic.txt +0 -0
  25. data/{ru → dict/ru}/lingo-dic.txt +0 -0
  26. data/{ru → dict/ru}/lingo-mul.txt +0 -0
  27. data/{ru → dict/ru}/lingo-syn.txt +0 -0
  28. data/{ru → dict/ru}/user-dic.txt +0 -0
  29. data/{de.lang → lang/de.lang} +1 -1
  30. data/{en.lang → lang/en.lang} +0 -0
  31. data/{ru.lang → lang/ru.lang} +0 -0
  32. data/lib/lingo.rb +14 -15
  33. data/lib/lingo/app.rb +4 -2
  34. data/lib/lingo/attendee.rb +23 -43
  35. data/lib/lingo/attendee/abbreviator.rb +5 -5
  36. data/lib/lingo/attendee/debugger.rb +39 -12
  37. data/lib/lingo/attendee/decomposer.rb +3 -4
  38. data/lib/lingo/attendee/dehyphenizer.rb +4 -4
  39. data/lib/lingo/attendee/formatter.rb +1 -3
  40. data/lib/lingo/attendee/multi_worder.rb +3 -4
  41. data/lib/lingo/attendee/noneword_filter.rb +8 -12
  42. data/lib/lingo/attendee/object_filter.rb +6 -3
  43. data/lib/lingo/attendee/sequencer.rb +5 -5
  44. data/lib/lingo/attendee/stemmer.rb +3 -2
  45. data/lib/lingo/attendee/synonymer.rb +3 -4
  46. data/lib/lingo/attendee/text_reader.rb +39 -38
  47. data/lib/lingo/attendee/text_writer.rb +10 -10
  48. data/lib/lingo/attendee/tokenizer.rb +63 -33
  49. data/lib/lingo/attendee/variator.rb +3 -7
  50. data/lib/lingo/attendee/vector_filter.rb +132 -65
  51. data/lib/lingo/attendee/word_searcher.rb +5 -3
  52. data/lib/lingo/buffered_attendee.rb +1 -3
  53. data/lib/lingo/call.rb +4 -3
  54. data/lib/lingo/cli.rb +5 -1
  55. data/lib/lingo/config.rb +11 -5
  56. data/lib/lingo/ctl.rb +3 -3
  57. data/lib/lingo/database.rb +3 -1
  58. data/lib/lingo/database/crypter.rb +1 -3
  59. data/lib/lingo/database/source.rb +3 -1
  60. data/lib/lingo/database/source/key_value.rb +3 -1
  61. data/lib/lingo/database/source/multi_key.rb +3 -1
  62. data/lib/lingo/database/source/multi_value.rb +3 -1
  63. data/lib/lingo/database/source/single_word.rb +3 -1
  64. data/lib/lingo/database/source/word_class.rb +3 -1
  65. data/lib/lingo/debug.rb +5 -5
  66. data/lib/lingo/{agenda_item.rb → deferred_attendee.rb} +21 -12
  67. data/lib/lingo/error.rb +1 -1
  68. data/lib/lingo/language.rb +1 -9
  69. data/lib/lingo/language/dictionary.rb +2 -17
  70. data/lib/lingo/language/grammar.rb +10 -10
  71. data/lib/lingo/language/lexical.rb +2 -0
  72. data/lib/lingo/language/lexical_hash.rb +2 -0
  73. data/lib/lingo/language/token.rb +17 -3
  74. data/lib/lingo/language/word.rb +13 -5
  75. data/lib/lingo/language/word_form.rb +5 -3
  76. data/lib/lingo/progress.rb +2 -2
  77. data/lib/lingo/srv.rb +1 -1
  78. data/lib/lingo/srv/lingosrv.cfg +1 -1
  79. data/lib/lingo/version.rb +1 -1
  80. data/lib/lingo/web.rb +1 -1
  81. data/lib/lingo/web/lingoweb.cfg +1 -1
  82. data/test/attendee/ts_abbreviator.rb +4 -2
  83. data/test/attendee/ts_multi_worder.rb +81 -88
  84. data/test/attendee/ts_noneword_filter.rb +2 -2
  85. data/test/attendee/ts_object_filter.rb +2 -2
  86. data/test/attendee/ts_sequencer.rb +40 -20
  87. data/test/attendee/ts_stemmer.rb +52 -26
  88. data/test/attendee/ts_text_reader.rb +75 -56
  89. data/test/attendee/ts_text_writer.rb +6 -4
  90. data/test/attendee/ts_tokenizer.rb +304 -193
  91. data/test/attendee/ts_vector_filter.rb +242 -9
  92. data/test/ref/artikel.non +3 -0
  93. data/test/ref/artikel.vec +1 -4
  94. data/test/ref/artikel.vef +940 -0
  95. data/test/ref/artikel.ven +0 -3
  96. data/test/ref/artikel.ver +0 -3
  97. data/test/ref/artikel.vet +2580 -0
  98. data/test/ref/lir.non +34 -31
  99. data/test/ref/lir.seq +14 -15
  100. data/test/ref/lir.vec +37 -37
  101. data/test/ref/lir.vef +329 -0
  102. data/test/ref/lir.ven +329 -0
  103. data/test/ref/lir.ver +329 -0
  104. data/test/ref/lir.vet +329 -0
  105. data/test/test_helper.rb +29 -16
  106. data/test/ts_language.rb +6 -47
  107. metadata +74 -87
  108. data/lingo.rb +0 -29
  109. data/spec/spec_helper.rb +0 -5
@@ -3,7 +3,7 @@
3
3
  require 'test/unit'
4
4
  require 'lingo'
5
5
 
6
- class LingoTestCase < Test::Unit::TestCase
6
+ class LingoTestCase < Test::Unit::TestCase
7
7
 
8
8
  unless const_defined?(:TEST_FILE)
9
9
  TEST_FILE = 'test/de/test.txt'
@@ -20,12 +20,18 @@ class LingoTestCase < Test::Unit::TestCase
20
20
  [a || '', b || '', *c]
21
21
  end
22
22
 
23
+ def li(t, o)
24
+ ["#{t}\r\n", o]
25
+ end
26
+
23
27
  def ai(t)
24
- Lingo::AgendaItem.new(*split(t))
28
+ i = t.split('|')
29
+ i.unshift(i.shift.to_sym)
25
30
  end
26
31
 
27
32
  def tk(t)
28
- Lingo::Language::Token.new(*split(t, /\|(?=[A-Z])/))
33
+ a, b, *c = split(t, /\|(?=[A-Z\d])/)
34
+ Lingo::Language::Token.new(a, b, *c.map(&:to_i))
29
35
  end
30
36
 
31
37
  def lx(t)
@@ -55,8 +61,15 @@ class AttendeeTestCase < LingoTestCase
55
61
  @lingo.reset
56
62
 
57
63
  list = [{ @attendee => cfg }]
58
- list.unshift 'TestSpooler' => { 'out' => 'input', 'input' => input } if input
59
- list.push 'TestDumper' => { 'in' => 'output', 'output' => output = [] } if expect
64
+
65
+ list.unshift('TestSpooler' => {
66
+ 'out' => 'input',
67
+ 'input' => input,
68
+ 'pos' => @attendee == 'Tokenizer' }) if input
69
+
70
+ list.push('TestDumper' => {
71
+ 'in' => 'output',
72
+ 'output' => output = [] }) if expect
60
73
 
61
74
  @lingo.invite(list)
62
75
  @lingo.start
@@ -74,32 +87,31 @@ class Lingo
74
87
 
75
88
  class TestSpooler < self
76
89
 
77
- protected
78
-
79
90
  def init
80
- @input = get_key('input')
91
+ @input, @pos = get_key('input'), get_key('pos', nil) && 0
81
92
  end
82
93
 
83
- def control(cmd, param)
84
- @input.each { |i| forward(i) } if cmd == STR_CMD_TALK
94
+ def control(cmd)
95
+ if cmd == :TALK
96
+ @input.each { |i| i.is_a?(Array) ? command(*i) :
97
+ @pos ? forward(i, @pos += i.bytesize) : forward(i) }
98
+ end
85
99
  end
86
100
 
87
101
  end
88
102
 
89
103
  class TestDumper < self
90
104
 
91
- protected
92
-
93
105
  def init
94
106
  @output = get_key('output')
95
107
  end
96
108
 
97
- def control(cmd, param)
98
- @output << AgendaItem.new(cmd, param)
109
+ def control(*args)
110
+ @output.push(args)
99
111
  end
100
112
 
101
- def process(obj)
102
- @output << obj
113
+ def process(obj, *rest)
114
+ @output.push(rest.empty? ? obj : rest.unshift(obj))
103
115
  end
104
116
 
105
117
  end
@@ -115,4 +127,5 @@ class Lingo
115
127
  end
116
128
 
117
129
  end
130
+
118
131
  end
@@ -23,8 +23,6 @@ class TestLexicalHash < LingoTestCase
23
23
  $stderr = old_stderr
24
24
  end
25
25
 
26
- # TODO: Crypt testen...
27
-
28
26
  def test_cache
29
27
  lh('sys-dic') { |ds|
30
28
  assert_equal([lx('regen|s|m'), lx('regen|s|n'), lx('regen|v'), lx('rege|a')], ds['regen'])
@@ -38,11 +36,9 @@ class TestLexicalHash < LingoTestCase
38
36
 
39
37
  lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
40
38
 
41
- # Keine Store-Datei vorhanden, nur Text vorhanden
42
39
  File.delete(*Dir["#{Lingo.find(:store, txt_file)}.*"])
43
40
  lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
44
41
 
45
- # Store vorhanden, aber Text ist neuer
46
42
  lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
47
43
  end
48
44
 
@@ -59,8 +55,8 @@ class TestLexicalHash < LingoTestCase
59
55
  assert_equal([lx('abelscher ring ohne nullteiler|m')], ds['abelscher ring ohne nullteiler'])
60
56
  assert_equal([4], ds['abelscher ring ohne'])
61
57
  assert_equal([lx('alleinreisende frau|m')], ds['alleinreisend frau'])
62
- assert_equal([lx('abschaltbarer leistungshalbleiter|m')], ds['abschaltbar leistungshalbleiter'])
63
- assert_equal(nil, ds['abschaltbarer leistungshalbleiter'])
58
+ assert_equal([lx('abschaltbarer leistungshalbleiter|m')], ds['abschaltbarer leistungshalbleiter'])
59
+ assert_equal(nil, ds['abschaltbar leistungshalbleiter'])
64
60
  }
65
61
  end
66
62
 
@@ -113,12 +109,6 @@ class TestDictionary < LingoTestCase
113
109
  end
114
110
 
115
111
  def test_params
116
- # Keine Sprach-Konfiguration angegeben
117
- #assert_raise(RuntimeError) {
118
- # Lingo::Language::Dictionary.new({ 'source' => %w[sys-dic] }, @lingo)
119
- #}
120
-
121
- # Falsche Parameter angegeben (Pflichtparameter ohne Defaultwert)
122
112
  assert_raise(ArgumentError) {
123
113
  Lingo::Language::Dictionary.new({ 'course' => %w[sys-dic] }, @lingo)
124
114
  }
@@ -142,39 +132,27 @@ class TestDictionary < LingoTestCase
142
132
 
143
133
  def test_select_two_sources_mode_first
144
134
  ld('source' => %w[sys-dic tst-dic], 'mode' => 'first') { |dic|
145
- # in keiner Quelle vorhanden
146
135
  assert_equal([], dic.select('hasennasen'))
147
- # nur in erster Quelle vorhanden
148
136
  assert_equal([lx('knaller|s')], dic.select('knaller'))
149
- # nur in zweiter Quelle vorhanden
150
137
  assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
151
- # in beiden Quellen vorhanden
152
138
  assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n')], dic.select('a-dur'))
153
139
  }
154
140
  end
155
141
 
156
142
  def test_select_two_sources_mode_first_flipped
157
143
  ld('source' => %w[tst-dic sys-dic], 'mode' => 'first') { |dic|
158
- # in keiner Quelle vorhanden
159
144
  assert_equal([], dic.select('hasennasen'))
160
- # nur in erster Quelle vorhanden
161
145
  assert_equal([lx('knaller|s')], dic.select('knaller'))
162
- # nur in zweiter Quelle vorhanden
163
146
  assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
164
- # in beiden Quellen vorhanden
165
147
  assert_equal([lx('b-dur|s')], dic.select('a-dur'))
166
148
  }
167
149
  end
168
150
 
169
151
  def test_select_two_sources_mode_all
170
152
  ld('source' => %w[sys-dic tst-dic], 'mode' => 'all') { |dic|
171
- # in keiner Quelle vorhanden
172
153
  assert_equal([], dic.select('hasennasen'))
173
- # nur in erster Quelle vorhanden
174
154
  assert_equal([lx('knaller|s')], dic.select('knaller'))
175
- # nur in zweiter Quelle vorhanden
176
155
  assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
177
- # in beiden Quellen vorhanden
178
156
  assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n'), lx('b-dur|s')], dic.select('a-dur'))
179
157
  assert_equal([lx('aas|s|n'), lx('aas|s')], dic.select('aas'))
180
158
  }
@@ -182,15 +160,11 @@ class TestDictionary < LingoTestCase
182
160
 
183
161
  def test_select_two_sources_mode_default
184
162
  ld('source' => %w[sys-dic tst-dic]) { |dic|
185
- # in keiner Quelle vorhanden
186
163
  assert_equal([], dic.select('hasennasen'))
187
- # nur in erster Quelle vorhanden
188
164
  assert_equal([lx('knaller|s')], dic.select('knaller'))
189
- # nur in zweiter Quelle vorhanden
190
165
  assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
191
166
  assert_equal([lx('wirkungsort|s'), lx('wirkung|s+'), lx('ort|s+')], dic.select('wirkungsort'))
192
167
  assert_equal([lx('zettelkatalog|k'), lx('zettel|s+'), lx('katalog|s+')], dic.select('zettelkatalog'))
193
- # in beiden Quellen vorhanden
194
168
  assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n'), lx('b-dur|s')], dic.select('a-dur'))
195
169
  assert_equal([lx('aas|s|n'), lx('aas|s')], dic.select('aas'))
196
170
  }
@@ -207,7 +181,7 @@ class TestDictionary < LingoTestCase
207
181
 
208
182
  def test_infix_lexicals
209
183
  ld('source' => %w[sys-dic]) { |dic|
210
- assert_equal( [lx('information|f'), lx('informationsen|f')], ax(dic, 'informations', :infix))
184
+ assert_equal([lx('information|f')], ax(dic, 'informations', :infix))
211
185
  }
212
186
  end
213
187
 
@@ -222,7 +196,7 @@ class TestDictionary < LingoTestCase
222
196
 
223
197
  def test_select_with_infix
224
198
  ld('source' => %w[sys-dic]) { |dic|
225
- assert_equal( [lx('information|f'), lx('informationsen|f')], ax(dic, 'informations', :infix))
199
+ assert_equal([lx('information|f')], ax(dic, 'informations', :infix))
226
200
  }
227
201
  end
228
202
 
@@ -250,10 +224,6 @@ class TestGrammar < LingoTestCase
250
224
  @lingo = Lingo.new
251
225
  end
252
226
 
253
- def test_params
254
- # Die gleichen Fälle wie bei Dictionary, daher nicht notwendig
255
- end
256
-
257
227
  def test_cache
258
228
  lg { |gra|
259
229
  assert_equal(
@@ -294,7 +264,7 @@ class TestGrammar < LingoTestCase
294
264
  )
295
265
 
296
266
  assert_equal(
297
- wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzen|v+', 'erforschung|s+'),
267
+ wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzer|s+', 'forschung|s+'),
298
268
  gra.find_compound('benutzerforschung')
299
269
  )
300
270
 
@@ -318,64 +288,53 @@ class TestGrammar < LingoTestCase
318
288
  gra.find_compound('titelbestandsbestände')
319
289
  )
320
290
 
321
- # hinterer Teil ist ein Wort mit Suffix
322
291
  assert_equal(
323
292
  wd('hasenbraten|KOM', 'hasenbraten|k', 'hase|s+', 'braten|v+'),
324
293
  gra.find_compound('hasenbraten')
325
294
  )
326
295
 
327
- # hinterer Teil ist ein Wort mit Infix ohne Schwanz
328
296
  assert_equal(
329
297
  wd('nasenlaufen|KOM', 'nasenlaufen|k', 'nase|s+', 'laufen|v+'),
330
298
  gra.find_compound('nasenlaufen')
331
299
  )
332
300
 
333
- # hinterer Teil ist ein Wort mit Infix mit Schwanz
334
301
  assert_equal(
335
302
  wd('nasenlaufens|KOM', 'nasenlaufen|k', 'nase|s+', 'laufen|v+'),
336
303
  gra.find_compound('nasenlaufens')
337
304
  )
338
305
 
339
- # hinterer Teil ist ein Kompositum nach Bindestrich
340
306
  assert_equal(
341
307
  wd('arrafat-nachfolgebedarf|KOM', 'arrafat-nachfolgebedarf|k', 'arrafat|x+', 'nachfolge|s+', 'bedarf|s+'),
342
308
  gra.find_compound('arrafat-nachfolgebedarf')
343
309
  )
344
310
 
345
- # hinterer Teil ist ein TakeItAsIs nach Bindestrich
346
311
  assert_equal(
347
312
  wd('nachfolge-arrafat|KOM', 'nachfolge-arrafat|k', 'nachfolge|s+', 'arrafat|x+'),
348
313
  gra.find_compound('nachfolge-arrafat')
349
314
  )
350
315
 
351
- # vorderer Teil ist ein Wort mit Suffix => siehe Hasenbraten
352
- # vorderer Teil ist ein Kompositum
353
316
  assert_equal(
354
317
  wd('morgenonkelmantel|KOM', 'morgenonkelmantel|k', 'morgen|w+', 'morgen|s+', 'onkel|s+', 'mantel|s+'),
355
318
  gra.find_compound('morgenonkelmantel')
356
319
  )
357
320
 
358
- # vorderer Teil ist ein TakeItAsIs vor Bindestrich / bindestrichversion
359
321
  assert_equal(
360
322
  wd('arrafat-nachfolger|KOM', 'arrafat-nachfolger|k', 'arrafat|x+', 'nachfolger|s+'),
361
323
  gra.find_compound('arrafat-nachfolger')
362
324
  )
363
325
 
364
- # bindestrichversion zwei-teilig
365
326
  assert_equal(
366
327
  wd('cd-rom-technologie|KOM', 'cd-rom-technologie|k', 'cd-rom|s+|f', 'cd-rom|s+|m', 'technologie|s+|f'),
367
328
  gra.find_compound('cd-rom-technologie')
368
329
  )
369
330
 
370
- # bindestrichversion drei-teilig
371
331
  assert_equal(
372
332
  wd('albert-ludwigs-universität|KOM', 'albert-ludwigs-universität|k', 'albert|e+', 'ludwig|e+', 'universität|s+'),
373
333
  gra.find_compound('albert-ludwigs-universität')
374
334
  )
375
335
 
376
- # normal mit suggestion
377
336
  assert_equal(
378
- wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzen|v+', 'erforschung|s+'),
337
+ wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzer|s+', 'forschung|s+'),
379
338
  gra.find_compound('benutzerforschung')
380
339
  )
381
340
  }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lingo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.5
4
+ version: 1.8.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Vorhauer
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-10-02 00:00:00.000000000 Z
12
+ date: 2015-02-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: cyclops
@@ -17,34 +17,28 @@ dependencies:
17
17
  requirements:
18
18
  - - "~>"
19
19
  - !ruby/object:Gem::Version
20
- version: '0.0'
21
- - - ">="
22
- - !ruby/object:Gem::Version
23
- version: 0.0.4
20
+ version: '0.1'
24
21
  type: :runtime
25
22
  prerelease: false
26
23
  version_requirements: !ruby/object:Gem::Requirement
27
24
  requirements:
28
25
  - - "~>"
29
26
  - !ruby/object:Gem::Version
30
- version: '0.0'
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: 0.0.4
27
+ version: '0.1'
34
28
  - !ruby/object:Gem::Dependency
35
29
  name: nuggets
36
30
  requirement: !ruby/object:Gem::Requirement
37
31
  requirements:
38
32
  - - "~>"
39
33
  - !ruby/object:Gem::Version
40
- version: '1.0'
34
+ version: '1.1'
41
35
  type: :runtime
42
36
  prerelease: false
43
37
  version_requirements: !ruby/object:Gem::Requirement
44
38
  requirements:
45
39
  - - "~>"
46
40
  - !ruby/object:Gem::Version
47
- version: '1.0'
41
+ version: '1.1'
48
42
  - !ruby/object:Gem::Dependency
49
43
  name: rubyzip
50
44
  requirement: !ruby/object:Gem::Requirement
@@ -119,16 +113,22 @@ dependencies:
119
113
  name: hen
120
114
  requirement: !ruby/object:Gem::Requirement
121
115
  requirements:
116
+ - - "~>"
117
+ - !ruby/object:Gem::Version
118
+ version: '0.8'
122
119
  - - ">="
123
120
  - !ruby/object:Gem::Version
124
- version: '0'
121
+ version: 0.8.1
125
122
  type: :development
126
123
  prerelease: false
127
124
  version_requirements: !ruby/object:Gem::Requirement
128
125
  requirements:
126
+ - - "~>"
127
+ - !ruby/object:Gem::Version
128
+ version: '0.8'
129
129
  - - ">="
130
130
  - !ruby/object:Gem::Version
131
- version: '0'
131
+ version: 0.8.1
132
132
  - !ruby/object:Gem::Dependency
133
133
  name: rake
134
134
  requirement: !ruby/object:Gem::Requirement
@@ -144,7 +144,7 @@ dependencies:
144
144
  - !ruby/object:Gem::Version
145
145
  version: '0'
146
146
  - !ruby/object:Gem::Dependency
147
- name: rspec
147
+ name: test-unit
148
148
  requirement: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - ">="
@@ -189,27 +189,34 @@ files:
189
189
  - bin/lingoctl
190
190
  - bin/lingosrv
191
191
  - bin/lingoweb
192
- - de.lang
193
- - de/lingo-abk.txt
194
- - de/lingo-dic.txt
195
- - de/lingo-mul.txt
196
- - de/lingo-syn.txt
197
- - de/test_dic.txt
198
- - de/test_gen.txt
199
- - de/test_mu2.txt
200
- - de/test_mul.txt
201
- - de/test_sgw.txt
202
- - de/test_syn.txt
203
- - de/user-dic.txt
204
- - en.lang
205
- - en/lingo-dic.txt
206
- - en/lingo-irr.txt
207
- - en/lingo-mul.txt
208
- - en/lingo-syn.txt
209
- - en/lingo-wdn.txt
210
- - en/user-dic.txt
192
+ - config/lingo-call.cfg
193
+ - config/lingo.cfg
194
+ - config/lir.cfg
195
+ - dict/de/lingo-abk.txt
196
+ - dict/de/lingo-dic.txt
197
+ - dict/de/lingo-mul.txt
198
+ - dict/de/lingo-syn.txt
199
+ - dict/de/test_dic.txt
200
+ - dict/de/test_gen.txt
201
+ - dict/de/test_mu2.txt
202
+ - dict/de/test_mul.txt
203
+ - dict/de/test_sgw.txt
204
+ - dict/de/test_syn.txt
205
+ - dict/de/user-dic.txt
206
+ - dict/en/lingo-dic.txt
207
+ - dict/en/lingo-irr.txt
208
+ - dict/en/lingo-mul.txt
209
+ - dict/en/lingo-syn.txt
210
+ - dict/en/lingo-wdn.txt
211
+ - dict/en/user-dic.txt
212
+ - dict/ru/lingo-dic.txt
213
+ - dict/ru/lingo-mul.txt
214
+ - dict/ru/lingo-syn.txt
215
+ - dict/ru/user-dic.txt
216
+ - lang/de.lang
217
+ - lang/en.lang
218
+ - lang/ru.lang
211
219
  - lib/lingo.rb
212
- - lib/lingo/agenda_item.rb
213
220
  - lib/lingo/app.rb
214
221
  - lib/lingo/attendee.rb
215
222
  - lib/lingo/attendee/abbreviator.rb
@@ -249,6 +256,7 @@ files:
249
256
  - lib/lingo/database/source/single_word.rb
250
257
  - lib/lingo/database/source/word_class.rb
251
258
  - lib/lingo/debug.rb
259
+ - lib/lingo/deferred_attendee.rb
252
260
  - lib/lingo/error.rb
253
261
  - lib/lingo/language.rb
254
262
  - lib/lingo/language/char.rb
@@ -271,16 +279,6 @@ files:
271
279
  - lib/lingo/web/public/lingo.png
272
280
  - lib/lingo/web/public/lingoweb.css
273
281
  - lib/lingo/web/views/index.erb
274
- - lingo-call.cfg
275
- - lingo.cfg
276
- - lingo.rb
277
- - lir.cfg
278
- - ru.lang
279
- - ru/lingo-dic.txt
280
- - ru/lingo-mul.txt
281
- - ru/lingo-syn.txt
282
- - ru/user-dic.txt
283
- - spec/spec_helper.rb
284
282
  - test/attendee/ts_abbreviator.rb
285
283
  - test/attendee/ts_decomposer.rb
286
284
  - test/attendee/ts_multi_worder.rb
@@ -304,13 +302,19 @@ files:
304
302
  - test/ref/artikel.seq
305
303
  - test/ref/artikel.syn
306
304
  - test/ref/artikel.vec
305
+ - test/ref/artikel.vef
307
306
  - test/ref/artikel.ven
308
307
  - test/ref/artikel.ver
308
+ - test/ref/artikel.vet
309
309
  - test/ref/lir.mul
310
310
  - test/ref/lir.non
311
311
  - test/ref/lir.seq
312
312
  - test/ref/lir.syn
313
313
  - test/ref/lir.vec
314
+ - test/ref/lir.vef
315
+ - test/ref/lir.ven
316
+ - test/ref/lir.ver
317
+ - test/ref/lir.vet
314
318
  - test/test_helper.rb
315
319
  - test/ts_database.rb
316
320
  - test/ts_language.rb
@@ -324,51 +328,34 @@ licenses:
324
328
  metadata: {}
325
329
  post_install_message: |2+
326
330
 
327
- lingo-1.8.5 [2014-10-02]:
331
+ lingo-1.8.6 [2015-02-09]:
328
332
 
329
- * Dictionary values (projections) are no longer sorted; hence, order of
330
- definition affects processing.
331
- * Lexicals in Lingo::Language::Word are no longer sorted; in particular,
332
- compound parts keep their original order.
333
- * Lexicals in Lingo::Language::Word are no longer cleaned from duplicates.
334
- * Compiled dictionaries are updated whenever the Lingo version or their
335
- configuration changes, not only when the source file's size or modification
336
- time changes.
337
- * Lingo::Attendee::Synonymer learned <tt>compound-parts</tt> option to also
338
- generate synonyms for compound parts when set to +true+.
339
- * Lingo::Attendee::TextReader learned better PDF-to-text conversion using the
340
- +pdftotext+ command; specify <tt>filter: pdftotext</tt> in the config.
341
- * Lingo::Attendee::VectorFilter learned +dict+ option to print words in
342
- dictionary format (viz. Lingo::Database::Source::WordClass).
343
- * Lingo::Attendee::VectorFilter learned +preamble+ option to print current
344
- configuration to the beginning of the log file (<tt>debug: 'true'</tt>);
345
- set <tt>preamble: false</tt> to disable.
346
- * Multiword dictionaries compiled from base forms can now generate inflected
347
- adjectives based on the gender of the head noun; set <tt>inflect: true</tt>
348
- in the dictionary config.
349
- * Lingo::Database::Source::WordClass supports gender information being encoded
350
- in the dictionary as well as shorthand notation for multiple word
351
- classes/genders.
352
- * Lingo::Database::Source::WordClass supports compounds being encoded in the
353
- dictionary (appending <tt>+</tt> to their parts' word classes is
354
- recommended).
355
- * Lingo::Database::Source removes leading and trailing whitespace from
356
- dictionary lines.
357
- * Lingo::Database::Crypter uses OpenSSL to encrypt/decrypt dictionaries.
358
- Note: Can't decrypt dictionaries encrypted with the old scheme anymore.
359
- * Lingo::Attendee::Tokenizer learned subset of MediaWiki syntax.
360
- * Eliminated pathological behaviour of the +URLS+ rule in
361
- Lingo::Attendee::Tokenizer.
362
- * Fixed regression introduced in 1.8.2 where <tt>combine: all</tt> would no
363
- longer work in Lingo::Attendee::MultiWorder.
364
- * Updated and extended Russian dictionaries. (Yulia Dorokhova, Thomas Müller)
365
- * +lingoctl+ no longer overwrites existing files without confirmation.
366
- * +lingoctl+ learned +archive+ command.
367
- * Dictionary cleanup.
333
+ * Lingo::Attendee::VectorFilter learned +pos+ option to print position and
334
+ byte offset with each word.
335
+ * Lingo::Attendee::VectorFilter learned +tfidf+ option to sort results based
336
+ on their tf–idf[https://en.wikipedia.org/wiki/Tf–idf] score; the document
337
+ frequencies are calculated over the "corpus" of all files processed during
338
+ a single program invocation.
339
+ * Lingo::Attendee::VectorFilter learned +tokens+ option to filter on
340
+ Lingo::Language::Token in addition to Lingo::Language::Word.
341
+ * Lingo::Attendee::VectorFilter no longer supports +debug+ (as well as
342
+ +prompt+ and +preamble+); use Lingo::Attendee::DebugFilter instead.
343
+ * Lingo::Attendee::TextReader no longer removes line endings; option +chomp+
344
+ is obsolete.
345
+ * Lingo::Attendee::TextReader passes byte offset to the following attendee.
346
+ * Lingo::Attendee::Tokenizer records token's byte offset.
347
+ * Lingo::Attendee::Tokenizer records token's sequence position.
348
+ * Lingo::Attendee::Tokenizer learned <tt>skip-tags</tt> option to skip over
349
+ specified tags' contents.
350
+ * Lingo::Attendee subclasses warn when invalid or obsolete options or names
351
+ are used.
352
+ * Changed German infix substitution +/en+ to +ch/chen+ in order to prevent
353
+ overly aggressive identifications.
354
+ * Internal refactoring and API changes.
368
355
 
369
356
  rdoc_options:
370
357
  - "--title"
371
- - lingo Application documentation (v1.8.5)
358
+ - lingo Application documentation (v1.8.6)
372
359
  - "--charset"
373
360
  - UTF-8
374
361
  - "--line-numbers"
@@ -389,7 +376,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
389
376
  version: '0'
390
377
  requirements: []
391
378
  rubyforge_project:
392
- rubygems_version: 2.4.2
379
+ rubygems_version: 2.4.5
393
380
  signing_key:
394
381
  specification_version: 4
395
382
  summary: The full-featured automatic indexing system