lingo 1.8.1 → 1.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +23 -5
- data/README +1 -1
- data/Rakefile +5 -7
- data/TODO +2 -0
- data/bin/lingo +5 -1
- data/de.lang +1 -1
- data/en/lingo-syn.txt +0 -0
- data/en.lang +2 -1
- data/lib/lingo/attendee/abbreviator.rb +8 -9
- data/lib/lingo/attendee/debugger.rb +5 -4
- data/lib/lingo/attendee/decomposer.rb +8 -3
- data/lib/lingo/attendee/dehyphenizer.rb +19 -63
- data/lib/lingo/attendee/formatter.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +67 -155
- data/lib/lingo/attendee/noneword_filter.rb +16 -9
- data/lib/lingo/attendee/object_filter.rb +1 -1
- data/lib/lingo/attendee/sequencer.rb +32 -63
- data/lib/lingo/attendee/stemmer/porter.rb +343 -0
- data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
- data/lib/lingo/attendee/synonymer.rb +10 -9
- data/lib/lingo/attendee/text_reader.rb +102 -76
- data/lib/lingo/attendee/text_writer.rb +23 -26
- data/lib/lingo/attendee/tokenizer.rb +13 -27
- data/lib/lingo/attendee/variator.rb +26 -66
- data/lib/lingo/attendee/vector_filter.rb +42 -43
- data/lib/lingo/attendee/word_searcher.rb +6 -7
- data/lib/lingo/attendee.rb +25 -7
- data/lib/lingo/buffered_attendee.rb +36 -10
- data/lib/lingo/cachable.rb +8 -8
- data/lib/lingo/config.rb +5 -6
- data/lib/lingo/ctl.rb +2 -3
- data/lib/lingo/database/crypter.rb +9 -26
- data/lib/lingo/database/gdbm_store.rb +3 -5
- data/lib/lingo/database/libcdb_store.rb +4 -6
- data/lib/lingo/database/sdbm_store.rb +11 -6
- data/lib/lingo/database/show_progress.rb +3 -43
- data/lib/lingo/database/source/key_value.rb +2 -6
- data/lib/lingo/database/source/multi_key.rb +3 -5
- data/lib/lingo/database/source/multi_value.rb +2 -6
- data/lib/lingo/database/source/single_word.rb +4 -6
- data/lib/lingo/database/source/word_class.rb +4 -10
- data/lib/lingo/database/source.rb +20 -18
- data/lib/lingo/database.rb +84 -59
- data/lib/lingo/error.rb +57 -1
- data/lib/lingo/language/dictionary.rb +21 -18
- data/lib/lingo/language/grammar.rb +40 -49
- data/lib/lingo/language/lexical.rb +6 -6
- data/lib/lingo/language/lexical_hash.rb +6 -0
- data/lib/lingo/language/word.rb +32 -15
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/language.rb +14 -25
- data/lib/lingo/reportable.rb +12 -10
- data/lib/lingo/show_progress.rb +81 -0
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo.rb +63 -24
- data/lingo-call.cfg +6 -10
- data/lingo.cfg +60 -44
- data/lir.cfg +42 -41
- data/test/attendee/ts_abbreviator.rb +3 -5
- data/test/attendee/ts_decomposer.rb +3 -5
- data/test/attendee/ts_multi_worder.rb +87 -145
- data/test/attendee/ts_noneword_filter.rb +5 -3
- data/test/attendee/ts_object_filter.rb +5 -3
- data/test/attendee/ts_sequencer.rb +3 -5
- data/test/attendee/ts_stemmer.rb +309 -0
- data/test/attendee/ts_synonymer.rb +15 -11
- data/test/attendee/ts_text_reader.rb +12 -15
- data/test/attendee/ts_text_writer.rb +24 -29
- data/test/attendee/ts_tokenizer.rb +9 -7
- data/test/attendee/ts_variator.rb +4 -4
- data/test/attendee/ts_vector_filter.rb +24 -16
- data/test/attendee/ts_word_searcher.rb +20 -36
- data/test/{lir.csv → lir.vec} +0 -0
- data/test/ref/artikel.vec +943 -943
- data/test/ref/artikel.ven +943 -943
- data/test/ref/lir.non +201 -201
- data/test/ref/lir.seq +178 -178
- data/test/ref/lir.syn +49 -49
- data/test/ref/lir.vec +329 -0
- data/test/test_helper.rb +20 -36
- data/test/ts_database.rb +10 -10
- data/test/ts_language.rb +279 -319
- metadata +93 -104
- data/info/Objekte.png +0 -0
- data/info/Typen.png +0 -0
- data/info/database.png +0 -0
- data/info/db_small.png +0 -0
- data/info/download.png +0 -0
- data/info/kerze.png +0 -0
- data/info/language.png +0 -0
- data/info/lingo.png +0 -0
- data/info/logo.png +0 -0
- data/info/meeting.png +0 -0
- data/info/types.png +0 -0
- data/lingo-all.cfg +0 -89
- data/porter/stem.cfg +0 -311
- data/porter/stem.rb +0 -150
- data/test/ref/lir.csv +0 -329
- data/test.cfg +0 -79
data/test/ts_language.rb
CHANGED
@@ -4,8 +4,6 @@ require_relative 'test_helper'
|
|
4
4
|
|
5
5
|
class TestLexicalHash < LingoTestCase
|
6
6
|
|
7
|
-
LH = Lingo::Language::LexicalHash
|
8
|
-
|
9
7
|
def setup
|
10
8
|
@lingo = Lingo.new
|
11
9
|
@database_config = @lingo.config['language/dictionary/databases']
|
@@ -18,8 +16,9 @@ class TestLexicalHash < LingoTestCase
|
|
18
16
|
def test_params
|
19
17
|
old_stderr, $stderr = $stderr, StringIO.new('')
|
20
18
|
|
21
|
-
|
22
|
-
|
19
|
+
assert_raise(Lingo::NoDatabaseConfigError) {
|
20
|
+
Lingo::Language::LexicalHash.new('nonsens', @lingo)
|
21
|
+
}
|
23
22
|
ensure
|
24
23
|
$stderr = old_stderr
|
25
24
|
end
|
@@ -27,240 +26,239 @@ class TestLexicalHash < LingoTestCase
|
|
27
26
|
# TODO: Crypt testen...
|
28
27
|
|
29
28
|
def test_cache
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
lh('sys-dic') { |ds|
|
30
|
+
assert_equal([lx('regen|s'), lx('regen|v'), lx('rege|a')], ds['regen'])
|
31
|
+
assert_equal([lx('regen|s'), lx('regen|v'), lx('rege|a')], ds['regen'])
|
32
|
+
assert_equal([lx('regen|s'), lx('regen|v'), lx('rege|a')], ds['regen'])
|
33
|
+
}
|
35
34
|
end
|
36
35
|
|
37
36
|
def test_report
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
37
|
+
lh('tst-syn') { |ds|
|
38
|
+
ds['abwickeln'] # source read
|
39
|
+
ds['abwickeln'] # cache hit
|
40
|
+
ds['regen'] # source read
|
41
|
+
ds['nonesens'] # source read, nothing found
|
42
|
+
|
43
|
+
assert_equal({
|
44
|
+
'tst-syn: cache hits' => 1,
|
45
|
+
'tst-syn: total requests' => 4,
|
46
|
+
'tst-syn: source reads' => 3,
|
47
|
+
'tst-syn: data found' => 2
|
48
|
+
}, ds.report)
|
49
49
|
}
|
50
|
-
|
51
|
-
assert_equal(expect, ds.report)
|
52
|
-
ds.close
|
53
50
|
end
|
54
51
|
|
55
52
|
def test_auto_create
|
56
|
-
txt_file = @database_config['tst-sgw']['name']
|
53
|
+
txt_file = @database_config[id = 'tst-sgw']['name']
|
57
54
|
|
58
|
-
ds
|
59
|
-
assert_equal([lx('substantiv|s')], ds['substantiv'])
|
60
|
-
ds.close
|
55
|
+
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
61
56
|
|
62
57
|
# Keine Store-Datei vorhanden, nur Text vorhanden
|
63
58
|
File.delete(*Dir["#{Lingo.find(:store, txt_file)}.*"])
|
64
|
-
ds
|
65
|
-
assert_equal([lx('substantiv|s')], ds['substantiv'])
|
66
|
-
ds.close
|
59
|
+
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
67
60
|
|
68
61
|
# Store vorhanden, aber Text ist neuer
|
69
|
-
ds
|
70
|
-
assert_equal([lx('substantiv|s')], ds['substantiv'])
|
71
|
-
ds.close
|
62
|
+
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
72
63
|
end
|
73
64
|
|
74
65
|
def test_singleword
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
66
|
+
lh('tst-sgw') { |ds|
|
67
|
+
assert_equal([lx('substantiv|s')], ds['substantiv'])
|
68
|
+
assert_equal([lx('mehr wort gruppe|s')], ds['mehr wort gruppe'])
|
69
|
+
assert_equal(nil, ds['nicht vorhanden'])
|
70
|
+
}
|
80
71
|
end
|
81
72
|
|
82
73
|
def test_keyvalue
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
74
|
+
lh('sys-mul') { |ds|
|
75
|
+
assert_equal([lx('abelscher ring ohne nullteiler|m')], ds['abelscher ring ohne nullteiler'])
|
76
|
+
assert_equal(['*4'], ds['abelscher ring ohne'])
|
77
|
+
assert_equal([lx('alleinreisende frau|m')], ds['alleinreisend frau'])
|
78
|
+
assert_equal([lx('abschaltbarer leistungshalbleiter|m')], ds['abschaltbar leistungshalbleiter'])
|
79
|
+
assert_equal(nil, ds['abschaltbarer leistungshalbleiter'])
|
80
|
+
}
|
90
81
|
end
|
91
82
|
|
92
83
|
def test_wordclass
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
84
|
+
lh('sys-dic') { |ds|
|
85
|
+
assert_equal([lx('a-dur|s')], ds['a-dur'])
|
86
|
+
assert_equal([lx('aalen|v'), lx('aalen|e')], ds['aalen'])
|
87
|
+
assert_equal([lx('abarbeitend|a')], ds['abarbeitend'])
|
88
|
+
}
|
98
89
|
end
|
99
90
|
|
100
91
|
def test_case
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
92
|
+
lh('sys-dic') { |ds|
|
93
|
+
assert_equal([lx('abänderung|s')], ds['abänderung'])
|
94
|
+
assert_equal([lx('abänderung|s')], ds['Abänderung'])
|
95
|
+
assert_equal([lx('abänderung|s')], ds['ABÄNDERUNG'])
|
96
|
+
}
|
106
97
|
end
|
107
98
|
|
108
99
|
def test_multivalue
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
100
|
+
lh('sys-syn') { |ds|
|
101
|
+
assert_equal([lx('abbau <chemie>|y'), lx('chemische abbaureaktion|y'), lx('chemischer abbau|y'), lx('photochemischer abbau|y')], ds['abbaureaktion'])
|
102
|
+
assert_equal([lx('dependenz|y'), lx('unselbstständigkeit|y'), lx('unselbständigkeit|y')], ds['abhängigkeit'])
|
103
|
+
}
|
104
|
+
end
|
105
|
+
|
106
|
+
def lh(id, &block)
|
107
|
+
Lingo::Language::LexicalHash.open(id, @lingo, &block)
|
113
108
|
end
|
114
109
|
|
115
110
|
end
|
116
111
|
|
117
112
|
class TestDictionary < LingoTestCase
|
118
113
|
|
119
|
-
LD = Lingo::Language::Dictionary
|
120
|
-
|
121
114
|
def setup
|
122
115
|
@lingo = Lingo.new
|
123
116
|
end
|
124
117
|
|
125
118
|
def test_params
|
126
119
|
# Keine Sprach-Konfiguration angegeben
|
127
|
-
|
120
|
+
#assert_raise(RuntimeError) {
|
121
|
+
# Lingo::Language::Dictionary.new({ 'source' => %w[sys-dic] }, @lingo)
|
122
|
+
#}
|
123
|
+
|
128
124
|
# Falsche Parameter angegeben (Pflichtparameter ohne Defaultwert)
|
129
|
-
assert_raise(ArgumentError) {
|
125
|
+
assert_raise(ArgumentError) {
|
126
|
+
Lingo::Language::Dictionary.new({ 'course' => %w[sys-dic] }, @lingo)
|
127
|
+
}
|
130
128
|
end
|
131
129
|
|
132
130
|
def test_cache
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
131
|
+
ld('source' => %w[sys-dic]) { |dic|
|
132
|
+
assert_equal([lx('nase|s')], dic.select('nase'))
|
133
|
+
assert_equal([lx('nase|s')], dic.select('nase'))
|
134
|
+
assert_equal([lx('nase|s')], dic.select('nase'))
|
135
|
+
}
|
138
136
|
end
|
139
137
|
|
140
138
|
def test_report
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
139
|
+
ld('source' => %w[sys-dic]) { |dic|
|
140
|
+
dic.select('abwickeln') # source read
|
141
|
+
dic.select('abwickeln') # cache hit
|
142
|
+
dic.select('regen') # source read
|
143
|
+
dic.select('nonesens') # source read, nothing found
|
144
|
+
|
145
|
+
assert_equal({
|
146
|
+
'sys-dic: total requests' => 4,
|
147
|
+
'sys-dic: data found' => 2,
|
148
|
+
'sys-dic: cache hits' => 1,
|
149
|
+
'sys-dic: source reads' => 3
|
150
|
+
}, dic.report)
|
152
151
|
}
|
153
|
-
|
154
|
-
assert_equal(expect, dic.report)
|
155
|
-
dic.close
|
156
152
|
end
|
157
153
|
|
158
154
|
def test_select_one_source
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
155
|
+
ld('source' => %w[sys-dic]) { |dic|
|
156
|
+
assert_equal([lx('nase|s')], dic.select('nase'))
|
157
|
+
assert_equal([lx('nase|s')], dic.select('NASE'))
|
158
|
+
assert_equal([], dic.select('hasennasen'))
|
159
|
+
}
|
164
160
|
end
|
165
161
|
|
166
162
|
def test_select_two_sources_mode_first
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
163
|
+
ld('source' => %w[sys-dic tst-dic], 'mode' => 'first') { |dic|
|
164
|
+
# in keiner Quelle vorhanden
|
165
|
+
assert_equal([], dic.select('hasennasen'))
|
166
|
+
# nur in erster Quelle vorhanden
|
167
|
+
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
168
|
+
# nur in zweiter Quelle vorhanden
|
169
|
+
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
170
|
+
# in beiden Quellen vorhanden
|
171
|
+
assert_equal([lx('a-dur|s')], dic.select('a-dur'))
|
172
|
+
}
|
177
173
|
end
|
178
174
|
|
179
175
|
def test_select_two_sources_mode_first_flipped
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
176
|
+
ld('source' => %w[tst-dic sys-dic], 'mode' => 'first') { |dic|
|
177
|
+
# in keiner Quelle vorhanden
|
178
|
+
assert_equal([], dic.select('hasennasen'))
|
179
|
+
# nur in erster Quelle vorhanden
|
180
|
+
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
181
|
+
# nur in zweiter Quelle vorhanden
|
182
|
+
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
183
|
+
# in beiden Quellen vorhanden
|
184
|
+
assert_equal([lx('b-dur|s')], dic.select('a-dur'))
|
185
|
+
}
|
190
186
|
end
|
191
187
|
|
192
188
|
def test_select_two_sources_mode_all
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
189
|
+
ld('source' => %w[sys-dic tst-dic], 'mode' => 'all') { |dic|
|
190
|
+
# in keiner Quelle vorhanden
|
191
|
+
assert_equal([], dic.select('hasennasen'))
|
192
|
+
# nur in erster Quelle vorhanden
|
193
|
+
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
194
|
+
# nur in zweiter Quelle vorhanden
|
195
|
+
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
196
|
+
# in beiden Quellen vorhanden
|
197
|
+
assert_equal([lx('a-dur|s'), lx('b-dur|s')], dic.select('a-dur'))
|
198
|
+
assert_equal([lx('aas|s')], dic.select('aas'))
|
199
|
+
}
|
204
200
|
end
|
205
201
|
|
206
202
|
def test_select_two_sources_mode_default
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
203
|
+
ld('source' => %w[sys-dic tst-dic]) { |dic|
|
204
|
+
# in keiner Quelle vorhanden
|
205
|
+
assert_equal([], dic.select('hasennasen'))
|
206
|
+
# nur in erster Quelle vorhanden
|
207
|
+
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
208
|
+
# nur in zweiter Quelle vorhanden
|
209
|
+
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
210
|
+
# in beiden Quellen vorhanden
|
211
|
+
assert_equal([lx('a-dur|s'), lx('b-dur|s')], dic.select('a-dur'))
|
212
|
+
assert_equal([lx('aas|s')], dic.select('aas'))
|
213
|
+
}
|
218
214
|
end
|
219
215
|
|
220
216
|
def test_suffix_lexicals
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
217
|
+
ld('source' => %w[sys-dic]) { |dic|
|
218
|
+
assert_equal([lx('mau|s'), lx('mauer|s')], dic.suffix_lexicals('mauern'))
|
219
|
+
assert_equal([lx('hasen|s'), lx('hasen|v'), lx('hasen|e')], dic.suffix_lexicals('hasens'))
|
220
|
+
assert_equal([lx('schönst|s'), lx('schön|a'), lx('schönst|a')], dic.suffix_lexicals('schönster'))
|
221
|
+
assert_equal([lx('segnen|v'), lx('segneen|v')], dic.suffix_lexicals('segnet'))
|
222
|
+
}
|
227
223
|
end
|
228
224
|
|
229
225
|
def test_infix_lexicals
|
230
|
-
|
231
|
-
|
232
|
-
|
226
|
+
ld('source' => %w[sys-dic]) { |dic|
|
227
|
+
assert_equal( [lx('information|s'), lx('information|v'), lx('information|e')], dic.suffix_lexicals('informations'))
|
228
|
+
}
|
233
229
|
end
|
234
230
|
|
235
231
|
def test_select_with_suffix
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
232
|
+
ld('source' => %w[sys-dic]) { |dic|
|
233
|
+
assert_equal([lx('mauern|v')], dic.select_with_suffix('mauern'))
|
234
|
+
assert_equal([lx('hase|s')], dic.select_with_suffix('hasen'))
|
235
|
+
assert_equal([lx('schön|a')], dic.select_with_suffix('schönster'))
|
236
|
+
assert_equal([lx('segnen|v')], dic.select_with_suffix('segnet'))
|
237
|
+
}
|
242
238
|
end
|
243
239
|
|
244
240
|
def test_select_with_infix
|
245
|
-
|
246
|
-
|
247
|
-
|
241
|
+
ld('source' => %w[sys-dic]) { |dic|
|
242
|
+
assert_equal( [lx('information|s'), lx('information|v'), lx('information|e')], dic.suffix_lexicals('informations'))
|
243
|
+
}
|
248
244
|
end
|
249
245
|
|
250
246
|
def test_find_word
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
247
|
+
ld('source' => %w[sys-dic]) { |dic|
|
248
|
+
assert_equal(wd('hasennasen|?'), dic.find_word('hasennasen'))
|
249
|
+
assert_equal(wd('hase|IDF', 'hase|s'), dic.find_word('hase'))
|
250
|
+
assert_equal(wd('haseses|IDF', 'hase|s'), dic.find_word('haseses'))
|
251
|
+
}
|
252
|
+
end
|
253
|
+
|
254
|
+
def ld(cfg, &block)
|
255
|
+
Lingo::Language::Dictionary.open(cfg, @lingo, &block)
|
256
256
|
end
|
257
257
|
|
258
258
|
end
|
259
259
|
|
260
260
|
class TestGrammar < LingoTestCase
|
261
261
|
|
262
|
-
LG = Lingo::Language::Grammar
|
263
|
-
|
264
262
|
def setup
|
265
263
|
@lingo = Lingo.new
|
266
264
|
end
|
@@ -270,177 +268,139 @@ class TestGrammar < LingoTestCase
|
|
270
268
|
end
|
271
269
|
|
272
270
|
def test_cache
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
end
|
288
|
-
|
289
|
-
def
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
[
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
[lx('benutzerforschung|k'), lx('erforschung|s'), lx('benutzen|v')],
|
392
|
-
[6, 11],
|
393
|
-
'vs'
|
394
|
-
],
|
395
|
-
gra.permute_compositum('benutzerforschung', 1, false)
|
396
|
-
)
|
397
|
-
|
398
|
-
gra.close
|
399
|
-
end
|
400
|
-
|
401
|
-
def test_find_compositum
|
402
|
-
gra = LG.new({'source'=>['sys-dic']}, @lingo)
|
403
|
-
assert_equal(
|
404
|
-
wd('informationswissenschaften|KOM', 'informationswissenschaft|k', 'information|s+', 'wissenschaft|s+'),
|
405
|
-
gra.find_compositum('informationswissenschaften')
|
406
|
-
)
|
407
|
-
assert_equal(
|
408
|
-
wd('cd-rom-technologie|KOM', 'cd-rom-technologie|k', 'cd-rom|s+', 'technologie|s+'),
|
409
|
-
gra.find_compositum('cd-rom-technologie')
|
410
|
-
)
|
411
|
-
assert_equal(
|
412
|
-
wd('albert-ludwigs-universität|KOM', 'albert-ludwigs-universität|k', 'albert|e+', 'ludwig|e+', 'universität|s+'),
|
413
|
-
gra.find_compositum('albert-ludwigs-universität')
|
414
|
-
)
|
415
|
-
assert_equal(
|
416
|
-
wd('client-server-system|KOM', 'client-server-system|k', 'client|s+', 'server|s+', 'system|s+'),
|
417
|
-
gra.find_compositum('client-server-system')
|
418
|
-
)
|
419
|
-
assert_equal(
|
420
|
-
wd('benutzerforschung|KOM', 'benutzerforschung|k', 'erforschung|s+', 'benutzen|v+'),
|
421
|
-
gra.find_compositum('benutzerforschung')
|
422
|
-
)
|
423
|
-
assert_equal(
|
424
|
-
wd('clustersuche|KOM', 'clustersuche|k', 'cluster|s+', 'suche|s+', 'suchen|v+'),
|
425
|
-
gra.find_compositum('clustersuche')
|
426
|
-
)
|
427
|
-
gra.close
|
271
|
+
lg { |gra|
|
272
|
+
assert_equal(
|
273
|
+
wd('informationswissenschaften|KOM', 'informationswissenschaft|k', 'information|s+', 'wissenschaft|s+'),
|
274
|
+
gra.find_compound('informationswissenschaften')
|
275
|
+
)
|
276
|
+
assert_equal(
|
277
|
+
wd('informationswissenschaften|KOM', 'informationswissenschaft|k', 'information|s+', 'wissenschaft|s+'),
|
278
|
+
gra.find_compound('informationswissenschaften')
|
279
|
+
)
|
280
|
+
assert_equal(
|
281
|
+
wd('informationswissenschaften|KOM', 'informationswissenschaft|k', 'information|s+', 'wissenschaft|s+'),
|
282
|
+
gra.find_compound('informationswissenschaften')
|
283
|
+
)
|
284
|
+
}
|
285
|
+
end
|
286
|
+
|
287
|
+
def test_test_compound
|
288
|
+
lg { |gra|
|
289
|
+
# hinterer Teil ist ein Wort mit Suffix
|
290
|
+
assert_equal([
|
291
|
+
[lx('hasenbraten|k'), lx('hase|s'), lx('braten|v')],
|
292
|
+
[5, 6], 'sv'], gra.test_compound('hasen', '', 'braten')
|
293
|
+
)
|
294
|
+
|
295
|
+
# hinterer Teil ist ein Wort mit Infix ohne Schwanz
|
296
|
+
assert_equal([
|
297
|
+
[lx('nasenlaufen|k'), lx('nase|s'), lx('laufen|v')],
|
298
|
+
[5, 7], 'sv'], gra.test_compound('nasen', '', 'laufens')
|
299
|
+
)
|
300
|
+
|
301
|
+
# hinterer Teil ist ein Wort mit Infix mit Schwanz
|
302
|
+
assert_equal([
|
303
|
+
[lx('nasenlaufens|k'), lx('nase|s'), lx('laufen|v')],
|
304
|
+
[5, 7], 'sv'], gra.test_compound('nasen', '', 'laufens', 1, true)
|
305
|
+
)
|
306
|
+
|
307
|
+
# hinterer Teil ist ein Kompositum nach Bindestrich
|
308
|
+
assert_equal([
|
309
|
+
[lx('arrafat-nachfolgebedarf|k'), lx('bedarf|s'), lx('nachfolge|s'), lx('arrafat|x')],
|
310
|
+
[7, 9, 6], 'xss'], gra.test_compound('arrafat', '-', 'nachfolgebedarf')
|
311
|
+
)
|
312
|
+
|
313
|
+
# hinterer Teil ist ein TakeItAsIs nach Bindestrich
|
314
|
+
assert_equal([
|
315
|
+
[lx('nachfolge-arrafat|k'), lx('nachfolge|s'), lx('arrafat|x')],
|
316
|
+
[9, 7], 'sx'], gra.test_compound('nachfolge', '-', 'arrafat')
|
317
|
+
)
|
318
|
+
|
319
|
+
# vorderer Teil ist ein Wort mit Suffix => siehe Hasenbraten
|
320
|
+
# vorderer Teil ist ein Kompositum
|
321
|
+
assert_equal([
|
322
|
+
[lx('morgenonkelmantel|k'), lx('mantel|s'), lx('morgen|s'), lx('onkel|s'), lx('morgen|w')],
|
323
|
+
[6, 5, 6], 'sss'], gra.test_compound('morgenonkel', '', 'mantel')
|
324
|
+
)
|
325
|
+
|
326
|
+
# vorderer Teil ist ein TakeItAsIs vor Bindestrich
|
327
|
+
assert_equal([
|
328
|
+
[lx('arrafat-nachfolger|k'), lx('nachfolger|s'), lx('arrafat|x')],
|
329
|
+
[7, 10], 'xs'], gra.test_compound('arrafat', '-', 'nachfolger')
|
330
|
+
)
|
331
|
+
}
|
332
|
+
end
|
333
|
+
|
334
|
+
def test_permute_compound
|
335
|
+
lg { |gra|
|
336
|
+
# bindestrichversion
|
337
|
+
assert_equal([
|
338
|
+
[lx('arrafat-nachfolger|k'), lx('nachfolger|s'), lx('arrafat|x')],
|
339
|
+
[7, 10], 'xs'], gra.permute_compound('arrafat-nachfolger')
|
340
|
+
)
|
341
|
+
|
342
|
+
# bindestrichversion zwei-teilig
|
343
|
+
assert_equal([
|
344
|
+
[lx('cd-rom-technologie|k'), lx('cd-rom|s'), lx('technologie|s')],
|
345
|
+
[6, 11], 'ss'], gra.permute_compound('cd-rom-technologie')
|
346
|
+
)
|
347
|
+
|
348
|
+
# bindestrichversion drei-teilig
|
349
|
+
assert_equal([
|
350
|
+
[lx('albert-ludwigs-universität|k'), lx('universität|s'), lx('albert|e'), lx('ludwig|e')],
|
351
|
+
[6, 7, 11], 'ees'], gra.permute_compound('albert-ludwigs-universität')
|
352
|
+
)
|
353
|
+
|
354
|
+
# normal mit suggestion
|
355
|
+
assert_equal([
|
356
|
+
[lx('benutzerforschung|k'), lx('erforschung|s'), lx('benutzen|v')],
|
357
|
+
[6, 11], 'vs'], gra.permute_compound('benutzerforschung')
|
358
|
+
)
|
359
|
+
}
|
360
|
+
end
|
361
|
+
|
362
|
+
def test_find_compound
|
363
|
+
lg { |gra|
|
364
|
+
assert_equal(
|
365
|
+
wd('informationswissenschaften|KOM', 'informationswissenschaft|k', 'information|s+', 'wissenschaft|s+'),
|
366
|
+
gra.find_compound('informationswissenschaften')
|
367
|
+
)
|
368
|
+
assert_equal(
|
369
|
+
wd('cd-rom-technologie|KOM', 'cd-rom-technologie|k', 'cd-rom|s+', 'technologie|s+'),
|
370
|
+
gra.find_compound('cd-rom-technologie')
|
371
|
+
)
|
372
|
+
assert_equal(
|
373
|
+
wd('albert-ludwigs-universität|KOM', 'albert-ludwigs-universität|k', 'albert|e+', 'ludwig|e+', 'universität|s+'),
|
374
|
+
gra.find_compound('albert-ludwigs-universität')
|
375
|
+
)
|
376
|
+
assert_equal(
|
377
|
+
wd('client-server-system|KOM', 'client-server-system|k', 'client|s+', 'server|s+', 'system|s+'),
|
378
|
+
gra.find_compound('client-server-system')
|
379
|
+
)
|
380
|
+
assert_equal(
|
381
|
+
wd('benutzerforschung|KOM', 'benutzerforschung|k', 'erforschung|s+', 'benutzen|v+'),
|
382
|
+
gra.find_compound('benutzerforschung')
|
383
|
+
)
|
384
|
+
assert_equal(
|
385
|
+
wd('clustersuche|KOM', 'clustersuche|k', 'cluster|s+', 'suche|s+', 'suchen|v+'),
|
386
|
+
gra.find_compound('clustersuche')
|
387
|
+
)
|
388
|
+
}
|
428
389
|
end
|
429
390
|
|
430
391
|
def test_min_word_size
|
431
|
-
gra
|
432
|
-
assert_equal( wd('undsund|?'), gra.find_compositum('undsund'))
|
433
|
-
gra.close
|
392
|
+
lg { |gra| assert_equal( wd('undsund|?'), gra.find_compound('undsund')) }
|
434
393
|
end
|
435
394
|
|
436
395
|
def test_max_parts
|
437
|
-
|
438
|
-
|
439
|
-
wd('
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
396
|
+
lg { |gra|
|
397
|
+
assert_equal(wd('baumsbaumsbaum|KOM', 'baumsbaumsbaum|k', 'baum|s+'), gra.find_compound('baumsbaumsbaum'))
|
398
|
+
assert_equal(wd('baumsbaumsbaumsbaumsbaumsbaum|?'), gra.find_compound('baumsbaumsbaumsbaumsbaumsbaum'))
|
399
|
+
}
|
400
|
+
end
|
401
|
+
|
402
|
+
def lg(&block)
|
403
|
+
Lingo::Language::Grammar.open({ 'source' => %w[sys-dic] }, @lingo, &block)
|
444
404
|
end
|
445
405
|
|
446
406
|
end
|