lingo 1.8.1 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. data/ChangeLog +23 -5
  2. data/README +1 -1
  3. data/Rakefile +5 -7
  4. data/TODO +2 -0
  5. data/bin/lingo +5 -1
  6. data/de.lang +1 -1
  7. data/en/lingo-syn.txt +0 -0
  8. data/en.lang +2 -1
  9. data/lib/lingo/attendee/abbreviator.rb +8 -9
  10. data/lib/lingo/attendee/debugger.rb +5 -4
  11. data/lib/lingo/attendee/decomposer.rb +8 -3
  12. data/lib/lingo/attendee/dehyphenizer.rb +19 -63
  13. data/lib/lingo/attendee/formatter.rb +1 -1
  14. data/lib/lingo/attendee/multi_worder.rb +67 -155
  15. data/lib/lingo/attendee/noneword_filter.rb +16 -9
  16. data/lib/lingo/attendee/object_filter.rb +1 -1
  17. data/lib/lingo/attendee/sequencer.rb +32 -63
  18. data/lib/lingo/attendee/stemmer/porter.rb +343 -0
  19. data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
  20. data/lib/lingo/attendee/synonymer.rb +10 -9
  21. data/lib/lingo/attendee/text_reader.rb +102 -76
  22. data/lib/lingo/attendee/text_writer.rb +23 -26
  23. data/lib/lingo/attendee/tokenizer.rb +13 -27
  24. data/lib/lingo/attendee/variator.rb +26 -66
  25. data/lib/lingo/attendee/vector_filter.rb +42 -43
  26. data/lib/lingo/attendee/word_searcher.rb +6 -7
  27. data/lib/lingo/attendee.rb +25 -7
  28. data/lib/lingo/buffered_attendee.rb +36 -10
  29. data/lib/lingo/cachable.rb +8 -8
  30. data/lib/lingo/config.rb +5 -6
  31. data/lib/lingo/ctl.rb +2 -3
  32. data/lib/lingo/database/crypter.rb +9 -26
  33. data/lib/lingo/database/gdbm_store.rb +3 -5
  34. data/lib/lingo/database/libcdb_store.rb +4 -6
  35. data/lib/lingo/database/sdbm_store.rb +11 -6
  36. data/lib/lingo/database/show_progress.rb +3 -43
  37. data/lib/lingo/database/source/key_value.rb +2 -6
  38. data/lib/lingo/database/source/multi_key.rb +3 -5
  39. data/lib/lingo/database/source/multi_value.rb +2 -6
  40. data/lib/lingo/database/source/single_word.rb +4 -6
  41. data/lib/lingo/database/source/word_class.rb +4 -10
  42. data/lib/lingo/database/source.rb +20 -18
  43. data/lib/lingo/database.rb +84 -59
  44. data/lib/lingo/error.rb +57 -1
  45. data/lib/lingo/language/dictionary.rb +21 -18
  46. data/lib/lingo/language/grammar.rb +40 -49
  47. data/lib/lingo/language/lexical.rb +6 -6
  48. data/lib/lingo/language/lexical_hash.rb +6 -0
  49. data/lib/lingo/language/word.rb +32 -15
  50. data/lib/lingo/language/word_form.rb +1 -1
  51. data/lib/lingo/language.rb +14 -25
  52. data/lib/lingo/reportable.rb +12 -10
  53. data/lib/lingo/show_progress.rb +81 -0
  54. data/lib/lingo/version.rb +1 -1
  55. data/lib/lingo.rb +63 -24
  56. data/lingo-call.cfg +6 -10
  57. data/lingo.cfg +60 -44
  58. data/lir.cfg +42 -41
  59. data/test/attendee/ts_abbreviator.rb +3 -5
  60. data/test/attendee/ts_decomposer.rb +3 -5
  61. data/test/attendee/ts_multi_worder.rb +87 -145
  62. data/test/attendee/ts_noneword_filter.rb +5 -3
  63. data/test/attendee/ts_object_filter.rb +5 -3
  64. data/test/attendee/ts_sequencer.rb +3 -5
  65. data/test/attendee/ts_stemmer.rb +309 -0
  66. data/test/attendee/ts_synonymer.rb +15 -11
  67. data/test/attendee/ts_text_reader.rb +12 -15
  68. data/test/attendee/ts_text_writer.rb +24 -29
  69. data/test/attendee/ts_tokenizer.rb +9 -7
  70. data/test/attendee/ts_variator.rb +4 -4
  71. data/test/attendee/ts_vector_filter.rb +24 -16
  72. data/test/attendee/ts_word_searcher.rb +20 -36
  73. data/test/{lir.csv → lir.vec} +0 -0
  74. data/test/ref/artikel.vec +943 -943
  75. data/test/ref/artikel.ven +943 -943
  76. data/test/ref/lir.non +201 -201
  77. data/test/ref/lir.seq +178 -178
  78. data/test/ref/lir.syn +49 -49
  79. data/test/ref/lir.vec +329 -0
  80. data/test/test_helper.rb +20 -36
  81. data/test/ts_database.rb +10 -10
  82. data/test/ts_language.rb +279 -319
  83. metadata +93 -104
  84. data/info/Objekte.png +0 -0
  85. data/info/Typen.png +0 -0
  86. data/info/database.png +0 -0
  87. data/info/db_small.png +0 -0
  88. data/info/download.png +0 -0
  89. data/info/kerze.png +0 -0
  90. data/info/language.png +0 -0
  91. data/info/lingo.png +0 -0
  92. data/info/logo.png +0 -0
  93. data/info/meeting.png +0 -0
  94. data/info/types.png +0 -0
  95. data/lingo-all.cfg +0 -89
  96. data/porter/stem.cfg +0 -311
  97. data/porter/stem.rb +0 -150
  98. data/test/ref/lir.csv +0 -329
  99. data/test.cfg +0 -79
@@ -0,0 +1,309 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative '../test_helper'
4
+
5
+ class TestAttendeeStemmer < AttendeeTestCase
6
+
7
+ def test_type
8
+ assert_raise(Lingo::NameNotFoundError) { meet({ 'type' => 'bla' }, []) }
9
+ end
10
+
11
+ def test_basic
12
+ meet({}, [
13
+ wd('bla|IDF'),
14
+ wd('blub|?'),
15
+ wd('blubs|?'),
16
+ ai('EOF|')
17
+ ], [
18
+ wd('bla|IDF'),
19
+ wd('blub|?'),
20
+ wd('blubs|?', 'blub|z'),
21
+ ai('EOF|')
22
+ ])
23
+ end
24
+
25
+ def test_wc
26
+ meet({ 'wordclass' => 'w' }, [
27
+ wd('bla|IDF'),
28
+ wd('blub|?'),
29
+ wd('blubs|?'),
30
+ ai('EOF|')
31
+ ], [
32
+ wd('bla|IDF'),
33
+ wd('blub|?'),
34
+ wd('blubs|?', 'blub|w'),
35
+ ai('EOF|')
36
+ ])
37
+ end
38
+
39
+ def test_mode
40
+ meet({ 'mode' => '' }, [
41
+ wd('bla|IDF'),
42
+ wd('a|?'),
43
+ wd('yet|?'),
44
+ wd('blubs|?'),
45
+ ai('EOF|')
46
+ ], [
47
+ wd('bla|IDF'),
48
+ wd('a|?'),
49
+ wd('yet|?'),
50
+ wd('blubs|?', 'blub|z'),
51
+ ai('EOF|')
52
+ ])
53
+
54
+ meet({ 'mode' => 'all' }, [
55
+ wd('bla|IDF'),
56
+ wd('a|?'),
57
+ wd('yet|?'),
58
+ wd('blubs|?'),
59
+ ai('EOF|')
60
+ ], [
61
+ wd('bla|IDF'),
62
+ wd('a|?', 'a|z'),
63
+ wd('yet|?', 'yet|z'),
64
+ wd('blubs|?', 'blub|z'),
65
+ ai('EOF|')
66
+ ])
67
+ end
68
+
69
+ def test_examples_100
70
+ meet({}, [
71
+ wd('S100|IDF'),
72
+ wd('caresses|?'),
73
+ wd('ponies|?'),
74
+ wd('ties|?'),
75
+ wd('caress|?'),
76
+ wd('cats|?'),
77
+ ai('EOF|')
78
+ ], [
79
+ wd('S100|IDF'),
80
+ wd('caresses|?', 'caress|z'),
81
+ wd('ponies|?', 'poni|z'),
82
+ wd('ties|?', 'ti|z'), # snowball: tie
83
+ wd('caress|?', 'caress|z'),
84
+ wd('cats|?', 'cat|z'),
85
+ ai('EOF|')
86
+ ])
87
+ end
88
+
89
+ def test_examples_110
90
+ meet({ 'mode' => 'all' }, [
91
+ wd('S110|IDF'),
92
+ wd('agreed|?'),
93
+ wd('feed|?'),
94
+ wd('plastered|?'),
95
+ wd('bled|?'),
96
+ wd('motoring|?'),
97
+ wd('sing|?'),
98
+ ai('EOF|')
99
+ ], [
100
+ wd('S110|IDF'),
101
+ wd('agreed|?', 'agre|z'),
102
+ wd('feed|?', 'fe|z'), # snowball: feed
103
+ wd('plastered|?', 'plaster|z'),
104
+ wd('bled|?', 'bled|z'),
105
+ wd('motoring|?', 'motor|z'),
106
+ wd('sing|?', 'sing|z'),
107
+ ai('EOF|')
108
+ ])
109
+ end
110
+
111
+ def test_examples_111
112
+ meet({}, [
113
+ wd('S111|IDF'),
114
+ wd('conflated|?'),
115
+ wd('troubled|?'),
116
+ wd('sized|?'),
117
+ wd('hopping|?'),
118
+ wd('tanned|?'),
119
+ wd('falling|?'),
120
+ wd('hissing|?'),
121
+ wd('fizzed|?'),
122
+ wd('failing|?'),
123
+ wd('filing|?'),
124
+ ai('EOF|')
125
+ ], [
126
+ wd('S111|IDF'),
127
+ wd('conflated|?', 'conflat|z'),
128
+ wd('troubled|?', 'troubl|z'),
129
+ wd('sized|?', 'size|z'),
130
+ wd('hopping|?', 'hop|z'),
131
+ wd('tanned|?', 'tan|z'),
132
+ wd('falling|?', 'fall|z'),
133
+ wd('hissing|?', 'hiss|z'),
134
+ wd('fizzed|?', 'fizz|z'),
135
+ wd('failing|?', 'fail|z'),
136
+ wd('filing|?', 'file|z'),
137
+ ai('EOF|')
138
+ ])
139
+ end
140
+
141
+ def test_examples_120
142
+ meet({ 'mode' => 'all' }, [
143
+ wd('S120|IDF'),
144
+ wd('happy|?'),
145
+ wd('sky|?'),
146
+ ai('EOF|')
147
+ ], [
148
+ wd('S120|IDF'),
149
+ wd('happy|?', 'happi|z'),
150
+ wd('sky|?', 'sky|z'),
151
+ ai('EOF|')
152
+ ])
153
+ end
154
+
155
+ def test_examples_200
156
+ meet({}, [
157
+ wd('S200|IDF'),
158
+ wd('relational|?'),
159
+ wd('conditional|?'),
160
+ wd('rational|?'),
161
+ wd('valency|?'),
162
+ wd('hesitancy|?'),
163
+ wd('digitizer|?'),
164
+ wd('conformably|?'),
165
+ wd('radically|?'),
166
+ wd('differently|?'),
167
+ wd('vilely|?'),
168
+ wd('analogously|?'),
169
+ wd('vietnamization|?'),
170
+ wd('predication|?'),
171
+ wd('operator|?'),
172
+ wd('feudalism|?'),
173
+ wd('decisiveness|?'),
174
+ wd('hopefulness|?'),
175
+ wd('callousness|?'),
176
+ wd('formality|?'),
177
+ wd('sensitivity|?'),
178
+ wd('sensibility|?'),
179
+ ai('EOF|')
180
+ ], [
181
+ wd('S200|IDF'),
182
+ wd('relational|?', 'relat|z'),
183
+ wd('conditional|?', 'condit|z'),
184
+ wd('rational|?', 'ration|z'),
185
+ wd('valency|?', 'valenc|z'),
186
+ wd('hesitancy|?', 'hesit|z'),
187
+ wd('digitizer|?', 'digit|z'),
188
+ wd('conformably|?', 'conform|z'),
189
+ wd('radically|?', 'radic|z'),
190
+ wd('differently|?', 'differ|z'),
191
+ wd('vilely|?', 'vile|z'),
192
+ wd('analogously|?', 'analog|z'),
193
+ wd('vietnamization|?', 'vietnam|z'),
194
+ wd('predication|?', 'predic|z'),
195
+ wd('operator|?', 'oper|z'),
196
+ wd('feudalism|?', 'feudal|z'),
197
+ wd('decisiveness|?', 'decis|z'),
198
+ wd('hopefulness|?', 'hope|z'),
199
+ wd('callousness|?', 'callous|z'),
200
+ wd('formality|?', 'formal|z'),
201
+ wd('sensitivity|?', 'sensit|z'),
202
+ wd('sensibility|?', 'sensibl|z'),
203
+ ai('EOF|')
204
+ ])
205
+ end
206
+
207
+ def test_examples_300
208
+ meet({}, [
209
+ wd('S300|IDF'),
210
+ wd('triplicate|?'),
211
+ wd('formative|?'),
212
+ wd('formalize|?'),
213
+ wd('electricity|?'),
214
+ wd('electrical|?'),
215
+ wd('hopeful|?'),
216
+ wd('goodness|?'),
217
+ ai('EOF|')
218
+ ], [
219
+ wd('S300|IDF'),
220
+ wd('triplicate|?', 'triplic|z'),
221
+ wd('formative|?', 'form|z'), # snowball: format
222
+ wd('formalize|?', 'formal|z'),
223
+ wd('electricity|?', 'electr|z'),
224
+ wd('electrical|?', 'electr|z'),
225
+ wd('hopeful|?', 'hope|z'),
226
+ wd('goodness|?', 'good|z'),
227
+ ai('EOF|')
228
+ ])
229
+ end
230
+
231
+ def test_examples_400
232
+ meet({}, [
233
+ wd('S400|IDF'),
234
+ wd('revival|?'),
235
+ wd('allowance|?'),
236
+ wd('inference|?'),
237
+ wd('airliner|?'),
238
+ wd('gyroscopic|?'),
239
+ wd('adjustable|?'),
240
+ wd('defensible|?'),
241
+ wd('irritant|?'),
242
+ wd('replacement|?'),
243
+ wd('adjustment|?'),
244
+ wd('dependent|?'),
245
+ wd('adoption|?'),
246
+ wd('homologou|?'),
247
+ wd('communism|?'),
248
+ wd('activate|?'),
249
+ wd('angularity|?'),
250
+ wd('homologous|?'),
251
+ wd('effective|?'),
252
+ wd('bowdlerize|?'),
253
+ ai('EOF|')
254
+ ], [
255
+ wd('S400|IDF'),
256
+ wd('revival|?', 'reviv|z'),
257
+ wd('allowance|?', 'allow|z'),
258
+ wd('inference|?', 'infer|z'),
259
+ wd('airliner|?', 'airlin|z'),
260
+ wd('gyroscopic|?', 'gyroscop|z'),
261
+ wd('adjustable|?', 'adjust|z'),
262
+ wd('defensible|?', 'defens|z'),
263
+ wd('irritant|?', 'irrit|z'),
264
+ wd('replacement|?', 'replac|z'),
265
+ wd('adjustment|?', 'adjust|z'),
266
+ wd('dependent|?', 'depend|z'),
267
+ wd('adoption|?', 'adopt|z'),
268
+ wd('homologou|?', 'homolog|z'), # snowball: homologou
269
+ wd('communism|?', 'commun|z'), # snowball: communism
270
+ wd('activate|?', 'activ|z'),
271
+ wd('angularity|?', 'angular|z'),
272
+ wd('homologous|?', 'homolog|z'),
273
+ wd('effective|?', 'effect|z'),
274
+ wd('bowdlerize|?', 'bowdler|z'),
275
+ ai('EOF|')
276
+ ])
277
+ end
278
+
279
+ def test_examples_500
280
+ meet({ 'mode' => 'all' }, [
281
+ wd('S500|IDF'),
282
+ wd('probate|?'),
283
+ wd('rate|?'),
284
+ wd('cease|?'),
285
+ ai('EOF|')
286
+ ], [
287
+ wd('S500|IDF'),
288
+ wd('probate|?', 'probat|z'),
289
+ wd('rate|?', 'rate|z'),
290
+ wd('cease|?', 'ceas|z'),
291
+ ai('EOF|')
292
+ ])
293
+ end
294
+
295
+ def test_examples_510
296
+ meet({ 'mode' => 'all' }, [
297
+ wd('S510|IDF'),
298
+ wd('controll|?'),
299
+ wd('roll|?'),
300
+ ai('EOF|')
301
+ ], [
302
+ wd('S510|IDF'),
303
+ wd('controll|?', 'control|z'),
304
+ wd('roll|?', 'roll|z'),
305
+ ai('EOF|')
306
+ ])
307
+ end
308
+
309
+ end
@@ -5,23 +5,27 @@ require_relative '../test_helper'
5
5
  class TestAttendeeSynonymer < AttendeeTestCase
6
6
 
7
7
  def test_basic
8
- @input = [wd('abtastzeiten|IDF', 'abtastzeit|s')]
9
- @expect = [wd('abtastzeiten|IDF', 'abtastzeit|s', 'abtastfrequenz|y', 'abtastperiode|y')]
10
- meet({'source'=>'sys-syn', 'check'=>'-,MUL'})
11
- # @expect.each_index {|i| assert_equal(@expect[i], @output[i]) }
8
+ meet({ 'source' => 'sys-syn', 'check' => '-,MUL' }, [
9
+ wd('abtastzeiten|IDF', 'abtastzeit|s')
10
+ ], [
11
+ wd('abtastzeiten|IDF', 'abtastzeit|s', 'abtastfrequenz|y', 'abtastperiode|y')
12
+ ])
12
13
  end
13
14
 
14
15
  def test_first
15
- @input = [wd('Aktienanleihe|IDF', 'aktienanleihe|s')]
16
- @expect = [wd('Aktienanleihe|IDF', 'aktienanleihe|s', 'aktien-anleihe|y',
17
- 'reverse convertible bond|y', 'reverse convertibles|y')]
18
- meet({'source'=>'sys-syn,tst-syn', 'check'=>'-,MUL', 'mode'=>'first'})
16
+ meet({ 'source' => 'sys-syn,tst-syn', 'check' => '-,MUL', 'mode' => 'first' }, [
17
+ wd('Aktienanleihe|IDF', 'aktienanleihe|s')
18
+ ], [
19
+ wd('Aktienanleihe|IDF', 'aktienanleihe|s', 'aktien-anleihe|y', 'reverse convertible bond|y', 'reverse convertibles|y')
20
+ ])
19
21
  end
20
22
 
21
23
  def test_all
22
- @input = [wd('Kerlchen|IDF', 'kerlchen|s')]
23
- @expect = [wd('Kerlchen|IDF', 'kerlchen|s', 'kerlchen|y', 'zwerg-nase|y')]
24
- meet({'source'=>'sys-syn,tst-syn', 'check'=>'-,MUL', 'mode'=>'all'})
24
+ meet({ 'source' => 'sys-syn,tst-syn', 'check' => '-,MUL', 'mode' => 'all' }, [
25
+ wd('Kerlchen|IDF', 'kerlchen|s')
26
+ ], [
27
+ wd('Kerlchen|IDF', 'kerlchen|s', 'kerlchen|y', 'zwerg-nase|y')
28
+ ])
25
29
  end
26
30
 
27
31
  end
@@ -5,8 +5,8 @@ require_relative '../test_helper'
5
5
  class TestAttendeeTextReader < AttendeeTestCase
6
6
 
7
7
  def test_lir_file
8
- @expect = [
9
- ai('LIR-FORMAT|'), ai('FILE|test/lir.txt'),
8
+ meet({ 'files' => 'test/lir.txt', 'records' => true }, nil, [
9
+ ai('LIR-FORMAT|'), ai("FILE|#{path = File.expand_path('test/lir.txt')}"),
10
10
  ai('RECORD|00237'),
11
11
  '020: GERHARD.',
12
12
  '025: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
@@ -17,14 +17,13 @@ class TestAttendeeTextReader < AttendeeTestCase
17
17
  ai('RECORD|00239'),
18
18
  '020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
19
19
  '056: "Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.',
20
- ai('EOF|test/lir.txt')
21
- ]
22
- meet({'files'=>'test/lir.txt', 'lir-record-pattern'=>'^\[(\d+)\.\]'})
20
+ ai("EOF|#{path}")
21
+ ])
23
22
  end
24
23
 
25
24
  def test_lir_file_another_pattern
26
- @expect = [
27
- ai('LIR-FORMAT|'), ai('FILE|test/lir2.txt'),
25
+ meet({ 'files' => 'test/lir2.txt', 'records' => '^\021(\d+)\022' }, nil, [
26
+ ai('LIR-FORMAT|'), ai("FILE|#{path = File.expand_path('test/lir2.txt')}"),
28
27
  ai('RECORD|00237'),
29
28
  '020: GERHARD.',
30
29
  '025: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
@@ -35,18 +34,16 @@ class TestAttendeeTextReader < AttendeeTestCase
35
34
  ai('RECORD|00239'),
36
35
  '020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
37
36
  '056: "Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.',
38
- ai('EOF|test/lir2.txt')
39
- ]
40
- meet({'files'=>'test/lir2.txt', 'lir-record-pattern'=>'^\021(\d+)\022'})
37
+ ai("EOF|#{path}")
38
+ ])
41
39
  end
42
40
 
43
41
  def test_normal_file
44
- @expect = [
45
- ai('FILE|test/mul.txt'),
42
+ meet({ 'files' => 'test/mul.txt' }, nil, [
43
+ ai("FILE|#{path = File.expand_path('test/mul.txt')}"),
46
44
  'Die abstrakte Kunst ist schön.',
47
- ai('EOF|test/mul.txt')
48
- ]
49
- meet({'files'=>'test/mul.txt'})
45
+ ai("EOF|#{path}")
46
+ ])
50
47
  end
51
48
 
52
49
  end
@@ -5,7 +5,7 @@ require_relative '../test_helper'
5
5
  class TestAttendeeTextWriter < AttendeeTestCase
6
6
 
7
7
  def setup
8
- @data = [
8
+ @input = [
9
9
  ai('FILE|test/test.txt'),
10
10
  wd('Dies|IDF'),
11
11
  wd('ist|IDF'),
@@ -25,34 +25,31 @@ class TestAttendeeTextWriter < AttendeeTestCase
25
25
  end
26
26
 
27
27
  def test_basic
28
- @input = @data
29
- @expect = [ "Dies,ist,eine,Zeile,.\n", "Dies,ist,eine,zweite,Zeile,.\n" ]
30
- meet({'ext'=>'tst', 'sep'=>','}, false)
28
+ meet({ 'ext' => 'tst', 'sep' => ',' }, @input)
31
29
 
32
- @output = File.readlines('test/test.tst', encoding: Lingo::ENC)
33
- assert_equal(@expect, @output)
30
+ assert_equal([
31
+ "Dies,ist,eine,Zeile,.\n", "Dies,ist,eine,zweite,Zeile,.\n"
32
+ ], File.readlines('test/test.tst', encoding: Lingo::ENC))
34
33
  end
35
34
 
36
35
  def test_complex
37
- @input = @data
38
- @expect = [ "Dies-ist-eine-Zeile-.\n", "Dies-ist-eine-zweite-Zeile-.\n" ]
39
- meet({'ext'=>'yip', 'sep'=>'-'}, false)
36
+ meet({ 'ext' => 'yip', 'sep' => '-' }, @input)
40
37
 
41
- @output = File.readlines('test/test.yip', encoding: Lingo::ENC)
42
- assert_equal(@expect, @output)
38
+ assert_equal([
39
+ "Dies-ist-eine-Zeile-.\n", "Dies-ist-eine-zweite-Zeile-.\n"
40
+ ], File.readlines('test/test.yip', encoding: Lingo::ENC))
43
41
  end
44
42
 
45
43
  def test_crlf
46
- @input = @data
47
- @expect = [ "Dies\n", "ist\n", "eine\n", "Zeile\n", ".\n", "Dies\n", "ist\n", "eine\n", "zweite\n", "Zeile\n", ".\n" ]
48
- meet({'sep'=>"\n"}, false)
44
+ meet({ 'sep' => "\n" }, @input)
49
45
 
50
- @output = File.readlines('test/test.txt2', encoding: Lingo::ENC)
51
- assert_equal(@expect, @output)
46
+ assert_equal([
47
+ "Dies\n", "ist\n", "eine\n", "Zeile\n", ".\n", "Dies\n", "ist\n", "eine\n", "zweite\n", "Zeile\n", ".\n"
48
+ ], File.readlines('test/test.txt2', encoding: Lingo::ENC))
52
49
  end
53
50
 
54
51
  def test_lir_file
55
- @input = [
52
+ meet({ 'ext' => 'vec', 'lir-format' => nil }, [
56
53
  ai('LIR-FORMAT|'), ai('FILE|test/lir.txt'),
57
54
  ai('RECORD|00237'),
58
55
  '020: GERHARD.',
@@ -65,28 +62,26 @@ class TestAttendeeTextWriter < AttendeeTestCase
65
62
  '020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
66
63
  "056: \"Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.\r",
67
64
  ai('EOF|test/lir.txt')
68
- ]
69
- @expect = [
65
+ ])
66
+
67
+ assert_equal([
70
68
  "00237*020: GERHARD. 025: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressour\
71
69
  cen. 056: Die intellektuelle Erschließung des Internet befindet sich in einer Krise. GERHARD ist derzeit weltweit der einzige.\r\n",
72
70
  "00238*020: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen. 025: das D\
73
71
  FG-Projekt GERHARD.\r\n",
74
72
  "00239*020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter. 056: \"Das Buch ist ein praxisbezogenes VADEMECUM\
75
73
  für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.\r\n"
76
- ]
77
- meet({'ext'=>'csv', 'lir-format'=>nil}, false)
78
-
79
- @output = File.readlines('test/lir.csv', encoding: Lingo::ENC)
80
- assert_equal(@expect, @output)
74
+ ], File.readlines('test/lir.vec', encoding: Lingo::ENC))
81
75
  end
82
76
 
83
77
  def test_nonewords
84
- @input = [ai('FILE|test/text.txt'), 'Nonwörter', 'Nonsense', ai('EOF|test/text.txt')]
85
- @expect = [ "Nonwörter\n", "Nonsense" ]
86
- meet({'ext'=>'non', 'sep'=>"\n"}, false)
78
+ meet({ 'ext' => 'non', 'sep' => "\n" }, [
79
+ ai('FILE|test/text.txt'), 'Nonwörter', 'Nonsense', ai('EOF|test/text.txt')
80
+ ])
87
81
 
88
- @output = File.readlines('test/text.non', encoding: Lingo::ENC)
89
- assert_equal(@expect, @output)
82
+ assert_equal([
83
+ "Nonwörter\n", "Nonsense"
84
+ ], File.readlines('test/text.non', encoding: Lingo::ENC))
90
85
  end
91
86
 
92
87
  end
@@ -3,14 +3,17 @@
3
3
  class TestAttendeeTokenizer < AttendeeTestCase
4
4
 
5
5
  def test_basic
6
- @input = ["Dies ist ein Test."]
7
- @expect = [tk('Dies|WORD'), tk('ist|WORD'), tk('ein|WORD'), tk('Test|WORD'), tk('.|PUNC')]
8
- meet({})
6
+ meet({}, [
7
+ "Dies ist ein Test."
8
+ ], [
9
+ tk('Dies|WORD'), tk('ist|WORD'), tk('ein|WORD'), tk('Test|WORD'), tk('.|PUNC')
10
+ ])
9
11
  end
10
12
 
11
13
  def test_complex
12
- @input = ["1964 www.vorhauer.de bzw. nasenbär, ()"]
13
- @expect = [
14
+ meet({}, [
15
+ "1964 www.vorhauer.de bzw. nasenbär, ()"
16
+ ], [
14
17
  tk('1964|NUMS'),
15
18
  tk('www.vorhauer.de|URLS'),
16
19
  tk('bzw|WORD'),
@@ -19,8 +22,7 @@ class TestAttendeeTokenizer < AttendeeTestCase
19
22
  tk(',|PUNC'),
20
23
  tk('(|OTHR'),
21
24
  tk(')|OTHR')
22
- ]
23
- meet({})
25
+ ])
24
26
  end
25
27
 
26
28
  end
@@ -5,14 +5,14 @@ require_relative '../test_helper'
5
5
  class TestAttendeeVariator < AttendeeTestCase
6
6
 
7
7
  def test_basic
8
- @input = [wd('fchwarz|?'), wd('fchilling|?'), wd('iehwarzfchilling|?'), wd('fchiiiirg|?')]
9
- @expect = [
8
+ meet({ 'source' => 'sys-dic' }, [
9
+ wd('fchwarz|?'), wd('fchilling|?'), wd('iehwarzfchilling|?'), wd('fchiiiirg|?')
10
+ ], [
10
11
  wd('*schwarz|IDF', 'schwarz|s', 'schwarz|a'),
11
12
  wd('*schilling|IDF', 'schilling|s'),
12
13
  wd('*schwarzschilling|KOM', 'schwarzschilling|k', 'schwarz|a+', 'schilling|s+', 'schwarz|s+'),
13
14
  wd('fchiiiirg|?')
14
- ]
15
- meet({'source'=>'sys-dic'})
15
+ ])
16
16
  end
17
17
 
18
18
  end
@@ -14,43 +14,51 @@ class TestAttendeeVectorFilter < AttendeeTestCase
14
14
  end
15
15
 
16
16
  def test_basic
17
- @expect = [ai('FILE|test'), 'substantiv', ai('EOF|test')]
18
- meet({})
17
+ meet({}, @input, [
18
+ ai('FILE|test'), 'substantiv', ai('EOF|test')
19
+ ])
19
20
  end
20
21
 
21
22
  def test_lexicals
22
- @expect = [ai('FILE|test'), 'adjektiv', 'eigenname', 'substantiv', 'verb', ai('EOF|test')]
23
- meet({'lexicals'=>'[save]'})
23
+ meet({ 'lexicals' => '[save]' }, @input, [
24
+ ai('FILE|test'), 'adjektiv', 'eigenname', 'substantiv', 'verb', ai('EOF|test')
25
+ ])
24
26
  end
25
27
 
26
28
  def test_sort_term_abs
27
- @expect = [ai('FILE|test'), '1 adjektiv', '1 eigenname', '1 substantiv', '1 verb', ai('EOF|test')]
28
- meet({'lexicals'=>'[save]', 'sort'=>'term_abs'})
29
+ meet({ 'lexicals' => '[save]', 'sort' => 'term_abs' }, @input, [
30
+ ai('FILE|test'), '1 adjektiv', '1 eigenname', '1 substantiv', '1 verb', ai('EOF|test')
31
+ ])
29
32
  end
30
33
 
31
34
  def test_sort_term_rel
32
- @expect = [ai('FILE|test'), '0.50000 adjektiv', '0.50000 eigenname', '0.50000 substantiv', '0.50000 verb', ai('EOF|test')]
33
- meet({'lexicals'=>'[save]', 'sort'=>'term_rel'})
35
+ meet({ 'lexicals' => '[save]', 'sort' => 'term_rel' }, @input, [
36
+ ai('FILE|test'), '0.50000 adjektiv', '0.50000 eigenname', '0.50000 substantiv', '0.50000 verb', ai('EOF|test')
37
+ ])
34
38
  end
35
39
 
36
40
  def test_sort_sto_abs
37
- @expect = [ai('FILE|test'), 'adjektiv {1}', 'eigenname {1}', 'substantiv {1}', 'verb {1}', ai('EOF|test')]
38
- meet({'lexicals'=>'[save]', 'sort'=>'sto_abs'})
41
+ meet({ 'lexicals' => '[save]', 'sort' => 'sto_abs' }, @input, [
42
+ ai('FILE|test'), 'adjektiv {1}', 'eigenname {1}', 'substantiv {1}', 'verb {1}', ai('EOF|test')
43
+ ])
39
44
  end
40
45
 
41
46
  def test_sort_sto_rel
42
- @expect = [ai('FILE|test'), 'adjektiv {0.50000}', 'eigenname {0.50000}', 'substantiv {0.50000}', 'verb {0.50000}', ai('EOF|test')]
43
- meet({'lexicals'=>'[save]', 'sort'=>'sto_rel'})
47
+ meet({ 'lexicals' => '[save]', 'sort' => 'sto_rel' }, @input, [
48
+ ai('FILE|test'), 'adjektiv {0.50000}', 'eigenname {0.50000}', 'substantiv {0.50000}', 'verb {0.50000}', ai('EOF|test')
49
+ ])
44
50
  end
45
51
 
46
52
  def test_nonword
47
- @expect = [ai('FILE|test'), 'unknown', ai('EOF|test')]
48
- meet({'lexicals'=>'\?'})
53
+ meet({ 'lexicals' => '\?' }, @input, [
54
+ ai('FILE|test'), 'unknown', ai('EOF|test')
55
+ ])
49
56
  end
50
57
 
51
58
  def test_nonword_sort_term_abs
52
- @expect = [ai('FILE|test'), '1 unknown', ai('EOF|test')]
53
- meet({'lexicals'=>'\?', 'sort'=>'term_abs'})
59
+ meet({ 'lexicals' => '\?', 'sort' => 'term_abs' }, @input, [
60
+ ai('FILE|test'), '1 unknown', ai('EOF|test')
61
+ ])
54
62
  end
55
63
 
56
64
  end