lingo 1.8.1 → 1.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. data/ChangeLog +23 -5
  2. data/README +1 -1
  3. data/Rakefile +5 -7
  4. data/TODO +2 -0
  5. data/bin/lingo +5 -1
  6. data/de.lang +1 -1
  7. data/en/lingo-syn.txt +0 -0
  8. data/en.lang +2 -1
  9. data/lib/lingo/attendee/abbreviator.rb +8 -9
  10. data/lib/lingo/attendee/debugger.rb +5 -4
  11. data/lib/lingo/attendee/decomposer.rb +8 -3
  12. data/lib/lingo/attendee/dehyphenizer.rb +19 -63
  13. data/lib/lingo/attendee/formatter.rb +1 -1
  14. data/lib/lingo/attendee/multi_worder.rb +67 -155
  15. data/lib/lingo/attendee/noneword_filter.rb +16 -9
  16. data/lib/lingo/attendee/object_filter.rb +1 -1
  17. data/lib/lingo/attendee/sequencer.rb +32 -63
  18. data/lib/lingo/attendee/stemmer/porter.rb +343 -0
  19. data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
  20. data/lib/lingo/attendee/synonymer.rb +10 -9
  21. data/lib/lingo/attendee/text_reader.rb +102 -76
  22. data/lib/lingo/attendee/text_writer.rb +23 -26
  23. data/lib/lingo/attendee/tokenizer.rb +13 -27
  24. data/lib/lingo/attendee/variator.rb +26 -66
  25. data/lib/lingo/attendee/vector_filter.rb +42 -43
  26. data/lib/lingo/attendee/word_searcher.rb +6 -7
  27. data/lib/lingo/attendee.rb +25 -7
  28. data/lib/lingo/buffered_attendee.rb +36 -10
  29. data/lib/lingo/cachable.rb +8 -8
  30. data/lib/lingo/config.rb +5 -6
  31. data/lib/lingo/ctl.rb +2 -3
  32. data/lib/lingo/database/crypter.rb +9 -26
  33. data/lib/lingo/database/gdbm_store.rb +3 -5
  34. data/lib/lingo/database/libcdb_store.rb +4 -6
  35. data/lib/lingo/database/sdbm_store.rb +11 -6
  36. data/lib/lingo/database/show_progress.rb +3 -43
  37. data/lib/lingo/database/source/key_value.rb +2 -6
  38. data/lib/lingo/database/source/multi_key.rb +3 -5
  39. data/lib/lingo/database/source/multi_value.rb +2 -6
  40. data/lib/lingo/database/source/single_word.rb +4 -6
  41. data/lib/lingo/database/source/word_class.rb +4 -10
  42. data/lib/lingo/database/source.rb +20 -18
  43. data/lib/lingo/database.rb +84 -59
  44. data/lib/lingo/error.rb +57 -1
  45. data/lib/lingo/language/dictionary.rb +21 -18
  46. data/lib/lingo/language/grammar.rb +40 -49
  47. data/lib/lingo/language/lexical.rb +6 -6
  48. data/lib/lingo/language/lexical_hash.rb +6 -0
  49. data/lib/lingo/language/word.rb +32 -15
  50. data/lib/lingo/language/word_form.rb +1 -1
  51. data/lib/lingo/language.rb +14 -25
  52. data/lib/lingo/reportable.rb +12 -10
  53. data/lib/lingo/show_progress.rb +81 -0
  54. data/lib/lingo/version.rb +1 -1
  55. data/lib/lingo.rb +63 -24
  56. data/lingo-call.cfg +6 -10
  57. data/lingo.cfg +60 -44
  58. data/lir.cfg +42 -41
  59. data/test/attendee/ts_abbreviator.rb +3 -5
  60. data/test/attendee/ts_decomposer.rb +3 -5
  61. data/test/attendee/ts_multi_worder.rb +87 -145
  62. data/test/attendee/ts_noneword_filter.rb +5 -3
  63. data/test/attendee/ts_object_filter.rb +5 -3
  64. data/test/attendee/ts_sequencer.rb +3 -5
  65. data/test/attendee/ts_stemmer.rb +309 -0
  66. data/test/attendee/ts_synonymer.rb +15 -11
  67. data/test/attendee/ts_text_reader.rb +12 -15
  68. data/test/attendee/ts_text_writer.rb +24 -29
  69. data/test/attendee/ts_tokenizer.rb +9 -7
  70. data/test/attendee/ts_variator.rb +4 -4
  71. data/test/attendee/ts_vector_filter.rb +24 -16
  72. data/test/attendee/ts_word_searcher.rb +20 -36
  73. data/test/{lir.csv → lir.vec} +0 -0
  74. data/test/ref/artikel.vec +943 -943
  75. data/test/ref/artikel.ven +943 -943
  76. data/test/ref/lir.non +201 -201
  77. data/test/ref/lir.seq +178 -178
  78. data/test/ref/lir.syn +49 -49
  79. data/test/ref/lir.vec +329 -0
  80. data/test/test_helper.rb +20 -36
  81. data/test/ts_database.rb +10 -10
  82. data/test/ts_language.rb +279 -319
  83. metadata +93 -104
  84. data/info/Objekte.png +0 -0
  85. data/info/Typen.png +0 -0
  86. data/info/database.png +0 -0
  87. data/info/db_small.png +0 -0
  88. data/info/download.png +0 -0
  89. data/info/kerze.png +0 -0
  90. data/info/language.png +0 -0
  91. data/info/lingo.png +0 -0
  92. data/info/logo.png +0 -0
  93. data/info/meeting.png +0 -0
  94. data/info/types.png +0 -0
  95. data/lingo-all.cfg +0 -89
  96. data/porter/stem.cfg +0 -311
  97. data/porter/stem.rb +0 -150
  98. data/test/ref/lir.csv +0 -329
  99. data/test.cfg +0 -79
@@ -0,0 +1,309 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative '../test_helper'
4
+
5
+ class TestAttendeeStemmer < AttendeeTestCase
6
+
7
+ def test_type
8
+ assert_raise(Lingo::NameNotFoundError) { meet({ 'type' => 'bla' }, []) }
9
+ end
10
+
11
+ def test_basic
12
+ meet({}, [
13
+ wd('bla|IDF'),
14
+ wd('blub|?'),
15
+ wd('blubs|?'),
16
+ ai('EOF|')
17
+ ], [
18
+ wd('bla|IDF'),
19
+ wd('blub|?'),
20
+ wd('blubs|?', 'blub|z'),
21
+ ai('EOF|')
22
+ ])
23
+ end
24
+
25
+ def test_wc
26
+ meet({ 'wordclass' => 'w' }, [
27
+ wd('bla|IDF'),
28
+ wd('blub|?'),
29
+ wd('blubs|?'),
30
+ ai('EOF|')
31
+ ], [
32
+ wd('bla|IDF'),
33
+ wd('blub|?'),
34
+ wd('blubs|?', 'blub|w'),
35
+ ai('EOF|')
36
+ ])
37
+ end
38
+
39
+ def test_mode
40
+ meet({ 'mode' => '' }, [
41
+ wd('bla|IDF'),
42
+ wd('a|?'),
43
+ wd('yet|?'),
44
+ wd('blubs|?'),
45
+ ai('EOF|')
46
+ ], [
47
+ wd('bla|IDF'),
48
+ wd('a|?'),
49
+ wd('yet|?'),
50
+ wd('blubs|?', 'blub|z'),
51
+ ai('EOF|')
52
+ ])
53
+
54
+ meet({ 'mode' => 'all' }, [
55
+ wd('bla|IDF'),
56
+ wd('a|?'),
57
+ wd('yet|?'),
58
+ wd('blubs|?'),
59
+ ai('EOF|')
60
+ ], [
61
+ wd('bla|IDF'),
62
+ wd('a|?', 'a|z'),
63
+ wd('yet|?', 'yet|z'),
64
+ wd('blubs|?', 'blub|z'),
65
+ ai('EOF|')
66
+ ])
67
+ end
68
+
69
+ def test_examples_100
70
+ meet({}, [
71
+ wd('S100|IDF'),
72
+ wd('caresses|?'),
73
+ wd('ponies|?'),
74
+ wd('ties|?'),
75
+ wd('caress|?'),
76
+ wd('cats|?'),
77
+ ai('EOF|')
78
+ ], [
79
+ wd('S100|IDF'),
80
+ wd('caresses|?', 'caress|z'),
81
+ wd('ponies|?', 'poni|z'),
82
+ wd('ties|?', 'ti|z'), # snowball: tie
83
+ wd('caress|?', 'caress|z'),
84
+ wd('cats|?', 'cat|z'),
85
+ ai('EOF|')
86
+ ])
87
+ end
88
+
89
+ def test_examples_110
90
+ meet({ 'mode' => 'all' }, [
91
+ wd('S110|IDF'),
92
+ wd('agreed|?'),
93
+ wd('feed|?'),
94
+ wd('plastered|?'),
95
+ wd('bled|?'),
96
+ wd('motoring|?'),
97
+ wd('sing|?'),
98
+ ai('EOF|')
99
+ ], [
100
+ wd('S110|IDF'),
101
+ wd('agreed|?', 'agre|z'),
102
+ wd('feed|?', 'fe|z'), # snowball: feed
103
+ wd('plastered|?', 'plaster|z'),
104
+ wd('bled|?', 'bled|z'),
105
+ wd('motoring|?', 'motor|z'),
106
+ wd('sing|?', 'sing|z'),
107
+ ai('EOF|')
108
+ ])
109
+ end
110
+
111
+ def test_examples_111
112
+ meet({}, [
113
+ wd('S111|IDF'),
114
+ wd('conflated|?'),
115
+ wd('troubled|?'),
116
+ wd('sized|?'),
117
+ wd('hopping|?'),
118
+ wd('tanned|?'),
119
+ wd('falling|?'),
120
+ wd('hissing|?'),
121
+ wd('fizzed|?'),
122
+ wd('failing|?'),
123
+ wd('filing|?'),
124
+ ai('EOF|')
125
+ ], [
126
+ wd('S111|IDF'),
127
+ wd('conflated|?', 'conflat|z'),
128
+ wd('troubled|?', 'troubl|z'),
129
+ wd('sized|?', 'size|z'),
130
+ wd('hopping|?', 'hop|z'),
131
+ wd('tanned|?', 'tan|z'),
132
+ wd('falling|?', 'fall|z'),
133
+ wd('hissing|?', 'hiss|z'),
134
+ wd('fizzed|?', 'fizz|z'),
135
+ wd('failing|?', 'fail|z'),
136
+ wd('filing|?', 'file|z'),
137
+ ai('EOF|')
138
+ ])
139
+ end
140
+
141
+ def test_examples_120
142
+ meet({ 'mode' => 'all' }, [
143
+ wd('S120|IDF'),
144
+ wd('happy|?'),
145
+ wd('sky|?'),
146
+ ai('EOF|')
147
+ ], [
148
+ wd('S120|IDF'),
149
+ wd('happy|?', 'happi|z'),
150
+ wd('sky|?', 'sky|z'),
151
+ ai('EOF|')
152
+ ])
153
+ end
154
+
155
+ def test_examples_200
156
+ meet({}, [
157
+ wd('S200|IDF'),
158
+ wd('relational|?'),
159
+ wd('conditional|?'),
160
+ wd('rational|?'),
161
+ wd('valency|?'),
162
+ wd('hesitancy|?'),
163
+ wd('digitizer|?'),
164
+ wd('conformably|?'),
165
+ wd('radically|?'),
166
+ wd('differently|?'),
167
+ wd('vilely|?'),
168
+ wd('analogously|?'),
169
+ wd('vietnamization|?'),
170
+ wd('predication|?'),
171
+ wd('operator|?'),
172
+ wd('feudalism|?'),
173
+ wd('decisiveness|?'),
174
+ wd('hopefulness|?'),
175
+ wd('callousness|?'),
176
+ wd('formality|?'),
177
+ wd('sensitivity|?'),
178
+ wd('sensibility|?'),
179
+ ai('EOF|')
180
+ ], [
181
+ wd('S200|IDF'),
182
+ wd('relational|?', 'relat|z'),
183
+ wd('conditional|?', 'condit|z'),
184
+ wd('rational|?', 'ration|z'),
185
+ wd('valency|?', 'valenc|z'),
186
+ wd('hesitancy|?', 'hesit|z'),
187
+ wd('digitizer|?', 'digit|z'),
188
+ wd('conformably|?', 'conform|z'),
189
+ wd('radically|?', 'radic|z'),
190
+ wd('differently|?', 'differ|z'),
191
+ wd('vilely|?', 'vile|z'),
192
+ wd('analogously|?', 'analog|z'),
193
+ wd('vietnamization|?', 'vietnam|z'),
194
+ wd('predication|?', 'predic|z'),
195
+ wd('operator|?', 'oper|z'),
196
+ wd('feudalism|?', 'feudal|z'),
197
+ wd('decisiveness|?', 'decis|z'),
198
+ wd('hopefulness|?', 'hope|z'),
199
+ wd('callousness|?', 'callous|z'),
200
+ wd('formality|?', 'formal|z'),
201
+ wd('sensitivity|?', 'sensit|z'),
202
+ wd('sensibility|?', 'sensibl|z'),
203
+ ai('EOF|')
204
+ ])
205
+ end
206
+
207
+ def test_examples_300
208
+ meet({}, [
209
+ wd('S300|IDF'),
210
+ wd('triplicate|?'),
211
+ wd('formative|?'),
212
+ wd('formalize|?'),
213
+ wd('electricity|?'),
214
+ wd('electrical|?'),
215
+ wd('hopeful|?'),
216
+ wd('goodness|?'),
217
+ ai('EOF|')
218
+ ], [
219
+ wd('S300|IDF'),
220
+ wd('triplicate|?', 'triplic|z'),
221
+ wd('formative|?', 'form|z'), # snowball: format
222
+ wd('formalize|?', 'formal|z'),
223
+ wd('electricity|?', 'electr|z'),
224
+ wd('electrical|?', 'electr|z'),
225
+ wd('hopeful|?', 'hope|z'),
226
+ wd('goodness|?', 'good|z'),
227
+ ai('EOF|')
228
+ ])
229
+ end
230
+
231
+ def test_examples_400
232
+ meet({}, [
233
+ wd('S400|IDF'),
234
+ wd('revival|?'),
235
+ wd('allowance|?'),
236
+ wd('inference|?'),
237
+ wd('airliner|?'),
238
+ wd('gyroscopic|?'),
239
+ wd('adjustable|?'),
240
+ wd('defensible|?'),
241
+ wd('irritant|?'),
242
+ wd('replacement|?'),
243
+ wd('adjustment|?'),
244
+ wd('dependent|?'),
245
+ wd('adoption|?'),
246
+ wd('homologou|?'),
247
+ wd('communism|?'),
248
+ wd('activate|?'),
249
+ wd('angularity|?'),
250
+ wd('homologous|?'),
251
+ wd('effective|?'),
252
+ wd('bowdlerize|?'),
253
+ ai('EOF|')
254
+ ], [
255
+ wd('S400|IDF'),
256
+ wd('revival|?', 'reviv|z'),
257
+ wd('allowance|?', 'allow|z'),
258
+ wd('inference|?', 'infer|z'),
259
+ wd('airliner|?', 'airlin|z'),
260
+ wd('gyroscopic|?', 'gyroscop|z'),
261
+ wd('adjustable|?', 'adjust|z'),
262
+ wd('defensible|?', 'defens|z'),
263
+ wd('irritant|?', 'irrit|z'),
264
+ wd('replacement|?', 'replac|z'),
265
+ wd('adjustment|?', 'adjust|z'),
266
+ wd('dependent|?', 'depend|z'),
267
+ wd('adoption|?', 'adopt|z'),
268
+ wd('homologou|?', 'homolog|z'), # snowball: homologou
269
+ wd('communism|?', 'commun|z'), # snowball: communism
270
+ wd('activate|?', 'activ|z'),
271
+ wd('angularity|?', 'angular|z'),
272
+ wd('homologous|?', 'homolog|z'),
273
+ wd('effective|?', 'effect|z'),
274
+ wd('bowdlerize|?', 'bowdler|z'),
275
+ ai('EOF|')
276
+ ])
277
+ end
278
+
279
+ def test_examples_500
280
+ meet({ 'mode' => 'all' }, [
281
+ wd('S500|IDF'),
282
+ wd('probate|?'),
283
+ wd('rate|?'),
284
+ wd('cease|?'),
285
+ ai('EOF|')
286
+ ], [
287
+ wd('S500|IDF'),
288
+ wd('probate|?', 'probat|z'),
289
+ wd('rate|?', 'rate|z'),
290
+ wd('cease|?', 'ceas|z'),
291
+ ai('EOF|')
292
+ ])
293
+ end
294
+
295
+ def test_examples_510
296
+ meet({ 'mode' => 'all' }, [
297
+ wd('S510|IDF'),
298
+ wd('controll|?'),
299
+ wd('roll|?'),
300
+ ai('EOF|')
301
+ ], [
302
+ wd('S510|IDF'),
303
+ wd('controll|?', 'control|z'),
304
+ wd('roll|?', 'roll|z'),
305
+ ai('EOF|')
306
+ ])
307
+ end
308
+
309
+ end
@@ -5,23 +5,27 @@ require_relative '../test_helper'
5
5
  class TestAttendeeSynonymer < AttendeeTestCase
6
6
 
7
7
  def test_basic
8
- @input = [wd('abtastzeiten|IDF', 'abtastzeit|s')]
9
- @expect = [wd('abtastzeiten|IDF', 'abtastzeit|s', 'abtastfrequenz|y', 'abtastperiode|y')]
10
- meet({'source'=>'sys-syn', 'check'=>'-,MUL'})
11
- # @expect.each_index {|i| assert_equal(@expect[i], @output[i]) }
8
+ meet({ 'source' => 'sys-syn', 'check' => '-,MUL' }, [
9
+ wd('abtastzeiten|IDF', 'abtastzeit|s')
10
+ ], [
11
+ wd('abtastzeiten|IDF', 'abtastzeit|s', 'abtastfrequenz|y', 'abtastperiode|y')
12
+ ])
12
13
  end
13
14
 
14
15
  def test_first
15
- @input = [wd('Aktienanleihe|IDF', 'aktienanleihe|s')]
16
- @expect = [wd('Aktienanleihe|IDF', 'aktienanleihe|s', 'aktien-anleihe|y',
17
- 'reverse convertible bond|y', 'reverse convertibles|y')]
18
- meet({'source'=>'sys-syn,tst-syn', 'check'=>'-,MUL', 'mode'=>'first'})
16
+ meet({ 'source' => 'sys-syn,tst-syn', 'check' => '-,MUL', 'mode' => 'first' }, [
17
+ wd('Aktienanleihe|IDF', 'aktienanleihe|s')
18
+ ], [
19
+ wd('Aktienanleihe|IDF', 'aktienanleihe|s', 'aktien-anleihe|y', 'reverse convertible bond|y', 'reverse convertibles|y')
20
+ ])
19
21
  end
20
22
 
21
23
  def test_all
22
- @input = [wd('Kerlchen|IDF', 'kerlchen|s')]
23
- @expect = [wd('Kerlchen|IDF', 'kerlchen|s', 'kerlchen|y', 'zwerg-nase|y')]
24
- meet({'source'=>'sys-syn,tst-syn', 'check'=>'-,MUL', 'mode'=>'all'})
24
+ meet({ 'source' => 'sys-syn,tst-syn', 'check' => '-,MUL', 'mode' => 'all' }, [
25
+ wd('Kerlchen|IDF', 'kerlchen|s')
26
+ ], [
27
+ wd('Kerlchen|IDF', 'kerlchen|s', 'kerlchen|y', 'zwerg-nase|y')
28
+ ])
25
29
  end
26
30
 
27
31
  end
@@ -5,8 +5,8 @@ require_relative '../test_helper'
5
5
  class TestAttendeeTextReader < AttendeeTestCase
6
6
 
7
7
  def test_lir_file
8
- @expect = [
9
- ai('LIR-FORMAT|'), ai('FILE|test/lir.txt'),
8
+ meet({ 'files' => 'test/lir.txt', 'records' => true }, nil, [
9
+ ai('LIR-FORMAT|'), ai("FILE|#{path = File.expand_path('test/lir.txt')}"),
10
10
  ai('RECORD|00237'),
11
11
  '020: GERHARD.',
12
12
  '025: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
@@ -17,14 +17,13 @@ class TestAttendeeTextReader < AttendeeTestCase
17
17
  ai('RECORD|00239'),
18
18
  '020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
19
19
  '056: "Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.',
20
- ai('EOF|test/lir.txt')
21
- ]
22
- meet({'files'=>'test/lir.txt', 'lir-record-pattern'=>'^\[(\d+)\.\]'})
20
+ ai("EOF|#{path}")
21
+ ])
23
22
  end
24
23
 
25
24
  def test_lir_file_another_pattern
26
- @expect = [
27
- ai('LIR-FORMAT|'), ai('FILE|test/lir2.txt'),
25
+ meet({ 'files' => 'test/lir2.txt', 'records' => '^\021(\d+)\022' }, nil, [
26
+ ai('LIR-FORMAT|'), ai("FILE|#{path = File.expand_path('test/lir2.txt')}"),
28
27
  ai('RECORD|00237'),
29
28
  '020: GERHARD.',
30
29
  '025: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
@@ -35,18 +34,16 @@ class TestAttendeeTextReader < AttendeeTestCase
35
34
  ai('RECORD|00239'),
36
35
  '020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
37
36
  '056: "Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.',
38
- ai('EOF|test/lir2.txt')
39
- ]
40
- meet({'files'=>'test/lir2.txt', 'lir-record-pattern'=>'^\021(\d+)\022'})
37
+ ai("EOF|#{path}")
38
+ ])
41
39
  end
42
40
 
43
41
  def test_normal_file
44
- @expect = [
45
- ai('FILE|test/mul.txt'),
42
+ meet({ 'files' => 'test/mul.txt' }, nil, [
43
+ ai("FILE|#{path = File.expand_path('test/mul.txt')}"),
46
44
  'Die abstrakte Kunst ist schön.',
47
- ai('EOF|test/mul.txt')
48
- ]
49
- meet({'files'=>'test/mul.txt'})
45
+ ai("EOF|#{path}")
46
+ ])
50
47
  end
51
48
 
52
49
  end
@@ -5,7 +5,7 @@ require_relative '../test_helper'
5
5
  class TestAttendeeTextWriter < AttendeeTestCase
6
6
 
7
7
  def setup
8
- @data = [
8
+ @input = [
9
9
  ai('FILE|test/test.txt'),
10
10
  wd('Dies|IDF'),
11
11
  wd('ist|IDF'),
@@ -25,34 +25,31 @@ class TestAttendeeTextWriter < AttendeeTestCase
25
25
  end
26
26
 
27
27
  def test_basic
28
- @input = @data
29
- @expect = [ "Dies,ist,eine,Zeile,.\n", "Dies,ist,eine,zweite,Zeile,.\n" ]
30
- meet({'ext'=>'tst', 'sep'=>','}, false)
28
+ meet({ 'ext' => 'tst', 'sep' => ',' }, @input)
31
29
 
32
- @output = File.readlines('test/test.tst', encoding: Lingo::ENC)
33
- assert_equal(@expect, @output)
30
+ assert_equal([
31
+ "Dies,ist,eine,Zeile,.\n", "Dies,ist,eine,zweite,Zeile,.\n"
32
+ ], File.readlines('test/test.tst', encoding: Lingo::ENC))
34
33
  end
35
34
 
36
35
  def test_complex
37
- @input = @data
38
- @expect = [ "Dies-ist-eine-Zeile-.\n", "Dies-ist-eine-zweite-Zeile-.\n" ]
39
- meet({'ext'=>'yip', 'sep'=>'-'}, false)
36
+ meet({ 'ext' => 'yip', 'sep' => '-' }, @input)
40
37
 
41
- @output = File.readlines('test/test.yip', encoding: Lingo::ENC)
42
- assert_equal(@expect, @output)
38
+ assert_equal([
39
+ "Dies-ist-eine-Zeile-.\n", "Dies-ist-eine-zweite-Zeile-.\n"
40
+ ], File.readlines('test/test.yip', encoding: Lingo::ENC))
43
41
  end
44
42
 
45
43
  def test_crlf
46
- @input = @data
47
- @expect = [ "Dies\n", "ist\n", "eine\n", "Zeile\n", ".\n", "Dies\n", "ist\n", "eine\n", "zweite\n", "Zeile\n", ".\n" ]
48
- meet({'sep'=>"\n"}, false)
44
+ meet({ 'sep' => "\n" }, @input)
49
45
 
50
- @output = File.readlines('test/test.txt2', encoding: Lingo::ENC)
51
- assert_equal(@expect, @output)
46
+ assert_equal([
47
+ "Dies\n", "ist\n", "eine\n", "Zeile\n", ".\n", "Dies\n", "ist\n", "eine\n", "zweite\n", "Zeile\n", ".\n"
48
+ ], File.readlines('test/test.txt2', encoding: Lingo::ENC))
52
49
  end
53
50
 
54
51
  def test_lir_file
55
- @input = [
52
+ meet({ 'ext' => 'vec', 'lir-format' => nil }, [
56
53
  ai('LIR-FORMAT|'), ai('FILE|test/lir.txt'),
57
54
  ai('RECORD|00237'),
58
55
  '020: GERHARD.',
@@ -65,28 +62,26 @@ class TestAttendeeTextWriter < AttendeeTestCase
65
62
  '020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
66
63
  "056: \"Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.\r",
67
64
  ai('EOF|test/lir.txt')
68
- ]
69
- @expect = [
65
+ ])
66
+
67
+ assert_equal([
70
68
  "00237*020: GERHARD. 025: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressour\
71
69
  cen. 056: Die intellektuelle Erschließung des Internet befindet sich in einer Krise. GERHARD ist derzeit weltweit der einzige.\r\n",
72
70
  "00238*020: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen. 025: das D\
73
71
  FG-Projekt GERHARD.\r\n",
74
72
  "00239*020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter. 056: \"Das Buch ist ein praxisbezogenes VADEMECUM\
75
73
  für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.\r\n"
76
- ]
77
- meet({'ext'=>'csv', 'lir-format'=>nil}, false)
78
-
79
- @output = File.readlines('test/lir.csv', encoding: Lingo::ENC)
80
- assert_equal(@expect, @output)
74
+ ], File.readlines('test/lir.vec', encoding: Lingo::ENC))
81
75
  end
82
76
 
83
77
  def test_nonewords
84
- @input = [ai('FILE|test/text.txt'), 'Nonwörter', 'Nonsense', ai('EOF|test/text.txt')]
85
- @expect = [ "Nonwörter\n", "Nonsense" ]
86
- meet({'ext'=>'non', 'sep'=>"\n"}, false)
78
+ meet({ 'ext' => 'non', 'sep' => "\n" }, [
79
+ ai('FILE|test/text.txt'), 'Nonwörter', 'Nonsense', ai('EOF|test/text.txt')
80
+ ])
87
81
 
88
- @output = File.readlines('test/text.non', encoding: Lingo::ENC)
89
- assert_equal(@expect, @output)
82
+ assert_equal([
83
+ "Nonwörter\n", "Nonsense"
84
+ ], File.readlines('test/text.non', encoding: Lingo::ENC))
90
85
  end
91
86
 
92
87
  end
@@ -3,14 +3,17 @@
3
3
  class TestAttendeeTokenizer < AttendeeTestCase
4
4
 
5
5
  def test_basic
6
- @input = ["Dies ist ein Test."]
7
- @expect = [tk('Dies|WORD'), tk('ist|WORD'), tk('ein|WORD'), tk('Test|WORD'), tk('.|PUNC')]
8
- meet({})
6
+ meet({}, [
7
+ "Dies ist ein Test."
8
+ ], [
9
+ tk('Dies|WORD'), tk('ist|WORD'), tk('ein|WORD'), tk('Test|WORD'), tk('.|PUNC')
10
+ ])
9
11
  end
10
12
 
11
13
  def test_complex
12
- @input = ["1964 www.vorhauer.de bzw. nasenbär, ()"]
13
- @expect = [
14
+ meet({}, [
15
+ "1964 www.vorhauer.de bzw. nasenbär, ()"
16
+ ], [
14
17
  tk('1964|NUMS'),
15
18
  tk('www.vorhauer.de|URLS'),
16
19
  tk('bzw|WORD'),
@@ -19,8 +22,7 @@ class TestAttendeeTokenizer < AttendeeTestCase
19
22
  tk(',|PUNC'),
20
23
  tk('(|OTHR'),
21
24
  tk(')|OTHR')
22
- ]
23
- meet({})
25
+ ])
24
26
  end
25
27
 
26
28
  end
@@ -5,14 +5,14 @@ require_relative '../test_helper'
5
5
  class TestAttendeeVariator < AttendeeTestCase
6
6
 
7
7
  def test_basic
8
- @input = [wd('fchwarz|?'), wd('fchilling|?'), wd('iehwarzfchilling|?'), wd('fchiiiirg|?')]
9
- @expect = [
8
+ meet({ 'source' => 'sys-dic' }, [
9
+ wd('fchwarz|?'), wd('fchilling|?'), wd('iehwarzfchilling|?'), wd('fchiiiirg|?')
10
+ ], [
10
11
  wd('*schwarz|IDF', 'schwarz|s', 'schwarz|a'),
11
12
  wd('*schilling|IDF', 'schilling|s'),
12
13
  wd('*schwarzschilling|KOM', 'schwarzschilling|k', 'schwarz|a+', 'schilling|s+', 'schwarz|s+'),
13
14
  wd('fchiiiirg|?')
14
- ]
15
- meet({'source'=>'sys-dic'})
15
+ ])
16
16
  end
17
17
 
18
18
  end
@@ -14,43 +14,51 @@ class TestAttendeeVectorFilter < AttendeeTestCase
14
14
  end
15
15
 
16
16
  def test_basic
17
- @expect = [ai('FILE|test'), 'substantiv', ai('EOF|test')]
18
- meet({})
17
+ meet({}, @input, [
18
+ ai('FILE|test'), 'substantiv', ai('EOF|test')
19
+ ])
19
20
  end
20
21
 
21
22
  def test_lexicals
22
- @expect = [ai('FILE|test'), 'adjektiv', 'eigenname', 'substantiv', 'verb', ai('EOF|test')]
23
- meet({'lexicals'=>'[save]'})
23
+ meet({ 'lexicals' => '[save]' }, @input, [
24
+ ai('FILE|test'), 'adjektiv', 'eigenname', 'substantiv', 'verb', ai('EOF|test')
25
+ ])
24
26
  end
25
27
 
26
28
  def test_sort_term_abs
27
- @expect = [ai('FILE|test'), '1 adjektiv', '1 eigenname', '1 substantiv', '1 verb', ai('EOF|test')]
28
- meet({'lexicals'=>'[save]', 'sort'=>'term_abs'})
29
+ meet({ 'lexicals' => '[save]', 'sort' => 'term_abs' }, @input, [
30
+ ai('FILE|test'), '1 adjektiv', '1 eigenname', '1 substantiv', '1 verb', ai('EOF|test')
31
+ ])
29
32
  end
30
33
 
31
34
  def test_sort_term_rel
32
- @expect = [ai('FILE|test'), '0.50000 adjektiv', '0.50000 eigenname', '0.50000 substantiv', '0.50000 verb', ai('EOF|test')]
33
- meet({'lexicals'=>'[save]', 'sort'=>'term_rel'})
35
+ meet({ 'lexicals' => '[save]', 'sort' => 'term_rel' }, @input, [
36
+ ai('FILE|test'), '0.50000 adjektiv', '0.50000 eigenname', '0.50000 substantiv', '0.50000 verb', ai('EOF|test')
37
+ ])
34
38
  end
35
39
 
36
40
  def test_sort_sto_abs
37
- @expect = [ai('FILE|test'), 'adjektiv {1}', 'eigenname {1}', 'substantiv {1}', 'verb {1}', ai('EOF|test')]
38
- meet({'lexicals'=>'[save]', 'sort'=>'sto_abs'})
41
+ meet({ 'lexicals' => '[save]', 'sort' => 'sto_abs' }, @input, [
42
+ ai('FILE|test'), 'adjektiv {1}', 'eigenname {1}', 'substantiv {1}', 'verb {1}', ai('EOF|test')
43
+ ])
39
44
  end
40
45
 
41
46
  def test_sort_sto_rel
42
- @expect = [ai('FILE|test'), 'adjektiv {0.50000}', 'eigenname {0.50000}', 'substantiv {0.50000}', 'verb {0.50000}', ai('EOF|test')]
43
- meet({'lexicals'=>'[save]', 'sort'=>'sto_rel'})
47
+ meet({ 'lexicals' => '[save]', 'sort' => 'sto_rel' }, @input, [
48
+ ai('FILE|test'), 'adjektiv {0.50000}', 'eigenname {0.50000}', 'substantiv {0.50000}', 'verb {0.50000}', ai('EOF|test')
49
+ ])
44
50
  end
45
51
 
46
52
  def test_nonword
47
- @expect = [ai('FILE|test'), 'unknown', ai('EOF|test')]
48
- meet({'lexicals'=>'\?'})
53
+ meet({ 'lexicals' => '\?' }, @input, [
54
+ ai('FILE|test'), 'unknown', ai('EOF|test')
55
+ ])
49
56
  end
50
57
 
51
58
  def test_nonword_sort_term_abs
52
- @expect = [ai('FILE|test'), '1 unknown', ai('EOF|test')]
53
- meet({'lexicals'=>'\?', 'sort'=>'term_abs'})
59
+ meet({ 'lexicals' => '\?', 'sort' => 'term_abs' }, @input, [
60
+ ai('FILE|test'), '1 unknown', ai('EOF|test')
61
+ ])
54
62
  end
55
63
 
56
64
  end