lingo 1.8.1 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +23 -5
- data/README +1 -1
- data/Rakefile +5 -7
- data/TODO +2 -0
- data/bin/lingo +5 -1
- data/de.lang +1 -1
- data/en/lingo-syn.txt +0 -0
- data/en.lang +2 -1
- data/lib/lingo/attendee/abbreviator.rb +8 -9
- data/lib/lingo/attendee/debugger.rb +5 -4
- data/lib/lingo/attendee/decomposer.rb +8 -3
- data/lib/lingo/attendee/dehyphenizer.rb +19 -63
- data/lib/lingo/attendee/formatter.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +67 -155
- data/lib/lingo/attendee/noneword_filter.rb +16 -9
- data/lib/lingo/attendee/object_filter.rb +1 -1
- data/lib/lingo/attendee/sequencer.rb +32 -63
- data/lib/lingo/attendee/stemmer/porter.rb +343 -0
- data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
- data/lib/lingo/attendee/synonymer.rb +10 -9
- data/lib/lingo/attendee/text_reader.rb +102 -76
- data/lib/lingo/attendee/text_writer.rb +23 -26
- data/lib/lingo/attendee/tokenizer.rb +13 -27
- data/lib/lingo/attendee/variator.rb +26 -66
- data/lib/lingo/attendee/vector_filter.rb +42 -43
- data/lib/lingo/attendee/word_searcher.rb +6 -7
- data/lib/lingo/attendee.rb +25 -7
- data/lib/lingo/buffered_attendee.rb +36 -10
- data/lib/lingo/cachable.rb +8 -8
- data/lib/lingo/config.rb +5 -6
- data/lib/lingo/ctl.rb +2 -3
- data/lib/lingo/database/crypter.rb +9 -26
- data/lib/lingo/database/gdbm_store.rb +3 -5
- data/lib/lingo/database/libcdb_store.rb +4 -6
- data/lib/lingo/database/sdbm_store.rb +11 -6
- data/lib/lingo/database/show_progress.rb +3 -43
- data/lib/lingo/database/source/key_value.rb +2 -6
- data/lib/lingo/database/source/multi_key.rb +3 -5
- data/lib/lingo/database/source/multi_value.rb +2 -6
- data/lib/lingo/database/source/single_word.rb +4 -6
- data/lib/lingo/database/source/word_class.rb +4 -10
- data/lib/lingo/database/source.rb +20 -18
- data/lib/lingo/database.rb +84 -59
- data/lib/lingo/error.rb +57 -1
- data/lib/lingo/language/dictionary.rb +21 -18
- data/lib/lingo/language/grammar.rb +40 -49
- data/lib/lingo/language/lexical.rb +6 -6
- data/lib/lingo/language/lexical_hash.rb +6 -0
- data/lib/lingo/language/word.rb +32 -15
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/language.rb +14 -25
- data/lib/lingo/reportable.rb +12 -10
- data/lib/lingo/show_progress.rb +81 -0
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo.rb +63 -24
- data/lingo-call.cfg +6 -10
- data/lingo.cfg +60 -44
- data/lir.cfg +42 -41
- data/test/attendee/ts_abbreviator.rb +3 -5
- data/test/attendee/ts_decomposer.rb +3 -5
- data/test/attendee/ts_multi_worder.rb +87 -145
- data/test/attendee/ts_noneword_filter.rb +5 -3
- data/test/attendee/ts_object_filter.rb +5 -3
- data/test/attendee/ts_sequencer.rb +3 -5
- data/test/attendee/ts_stemmer.rb +309 -0
- data/test/attendee/ts_synonymer.rb +15 -11
- data/test/attendee/ts_text_reader.rb +12 -15
- data/test/attendee/ts_text_writer.rb +24 -29
- data/test/attendee/ts_tokenizer.rb +9 -7
- data/test/attendee/ts_variator.rb +4 -4
- data/test/attendee/ts_vector_filter.rb +24 -16
- data/test/attendee/ts_word_searcher.rb +20 -36
- data/test/{lir.csv → lir.vec} +0 -0
- data/test/ref/artikel.vec +943 -943
- data/test/ref/artikel.ven +943 -943
- data/test/ref/lir.non +201 -201
- data/test/ref/lir.seq +178 -178
- data/test/ref/lir.syn +49 -49
- data/test/ref/lir.vec +329 -0
- data/test/test_helper.rb +20 -36
- data/test/ts_database.rb +10 -10
- data/test/ts_language.rb +279 -319
- metadata +93 -104
- data/info/Objekte.png +0 -0
- data/info/Typen.png +0 -0
- data/info/database.png +0 -0
- data/info/db_small.png +0 -0
- data/info/download.png +0 -0
- data/info/kerze.png +0 -0
- data/info/language.png +0 -0
- data/info/lingo.png +0 -0
- data/info/logo.png +0 -0
- data/info/meeting.png +0 -0
- data/info/types.png +0 -0
- data/lingo-all.cfg +0 -89
- data/porter/stem.cfg +0 -311
- data/porter/stem.rb +0 -150
- data/test/ref/lir.csv +0 -329
- data/test.cfg +0 -79
data/test/ts_language.rb
CHANGED
@@ -4,8 +4,6 @@ require_relative 'test_helper'
|
|
4
4
|
|
5
5
|
class TestLexicalHash < LingoTestCase
|
6
6
|
|
7
|
-
LH = Lingo::Language::LexicalHash
|
8
|
-
|
9
7
|
def setup
|
10
8
|
@lingo = Lingo.new
|
11
9
|
@database_config = @lingo.config['language/dictionary/databases']
|
@@ -18,8 +16,9 @@ class TestLexicalHash < LingoTestCase
|
|
18
16
|
def test_params
|
19
17
|
old_stderr, $stderr = $stderr, StringIO.new('')
|
20
18
|
|
21
|
-
|
22
|
-
|
19
|
+
assert_raise(Lingo::NoDatabaseConfigError) {
|
20
|
+
Lingo::Language::LexicalHash.new('nonsens', @lingo)
|
21
|
+
}
|
23
22
|
ensure
|
24
23
|
$stderr = old_stderr
|
25
24
|
end
|
@@ -27,240 +26,239 @@ class TestLexicalHash < LingoTestCase
|
|
27
26
|
# TODO: Crypt testen...
|
28
27
|
|
29
28
|
def test_cache
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
lh('sys-dic') { |ds|
|
30
|
+
assert_equal([lx('regen|s'), lx('regen|v'), lx('rege|a')], ds['regen'])
|
31
|
+
assert_equal([lx('regen|s'), lx('regen|v'), lx('rege|a')], ds['regen'])
|
32
|
+
assert_equal([lx('regen|s'), lx('regen|v'), lx('rege|a')], ds['regen'])
|
33
|
+
}
|
35
34
|
end
|
36
35
|
|
37
36
|
def test_report
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
37
|
+
lh('tst-syn') { |ds|
|
38
|
+
ds['abwickeln'] # source read
|
39
|
+
ds['abwickeln'] # cache hit
|
40
|
+
ds['regen'] # source read
|
41
|
+
ds['nonesens'] # source read, nothing found
|
42
|
+
|
43
|
+
assert_equal({
|
44
|
+
'tst-syn: cache hits' => 1,
|
45
|
+
'tst-syn: total requests' => 4,
|
46
|
+
'tst-syn: source reads' => 3,
|
47
|
+
'tst-syn: data found' => 2
|
48
|
+
}, ds.report)
|
49
49
|
}
|
50
|
-
|
51
|
-
assert_equal(expect, ds.report)
|
52
|
-
ds.close
|
53
50
|
end
|
54
51
|
|
55
52
|
def test_auto_create
|
56
|
-
txt_file = @database_config['tst-sgw']['name']
|
53
|
+
txt_file = @database_config[id = 'tst-sgw']['name']
|
57
54
|
|
58
|
-
ds
|
59
|
-
assert_equal([lx('substantiv|s')], ds['substantiv'])
|
60
|
-
ds.close
|
55
|
+
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
61
56
|
|
62
57
|
# Keine Store-Datei vorhanden, nur Text vorhanden
|
63
58
|
File.delete(*Dir["#{Lingo.find(:store, txt_file)}.*"])
|
64
|
-
ds
|
65
|
-
assert_equal([lx('substantiv|s')], ds['substantiv'])
|
66
|
-
ds.close
|
59
|
+
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
67
60
|
|
68
61
|
# Store vorhanden, aber Text ist neuer
|
69
|
-
ds
|
70
|
-
assert_equal([lx('substantiv|s')], ds['substantiv'])
|
71
|
-
ds.close
|
62
|
+
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
72
63
|
end
|
73
64
|
|
74
65
|
def test_singleword
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
66
|
+
lh('tst-sgw') { |ds|
|
67
|
+
assert_equal([lx('substantiv|s')], ds['substantiv'])
|
68
|
+
assert_equal([lx('mehr wort gruppe|s')], ds['mehr wort gruppe'])
|
69
|
+
assert_equal(nil, ds['nicht vorhanden'])
|
70
|
+
}
|
80
71
|
end
|
81
72
|
|
82
73
|
def test_keyvalue
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
74
|
+
lh('sys-mul') { |ds|
|
75
|
+
assert_equal([lx('abelscher ring ohne nullteiler|m')], ds['abelscher ring ohne nullteiler'])
|
76
|
+
assert_equal(['*4'], ds['abelscher ring ohne'])
|
77
|
+
assert_equal([lx('alleinreisende frau|m')], ds['alleinreisend frau'])
|
78
|
+
assert_equal([lx('abschaltbarer leistungshalbleiter|m')], ds['abschaltbar leistungshalbleiter'])
|
79
|
+
assert_equal(nil, ds['abschaltbarer leistungshalbleiter'])
|
80
|
+
}
|
90
81
|
end
|
91
82
|
|
92
83
|
def test_wordclass
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
84
|
+
lh('sys-dic') { |ds|
|
85
|
+
assert_equal([lx('a-dur|s')], ds['a-dur'])
|
86
|
+
assert_equal([lx('aalen|v'), lx('aalen|e')], ds['aalen'])
|
87
|
+
assert_equal([lx('abarbeitend|a')], ds['abarbeitend'])
|
88
|
+
}
|
98
89
|
end
|
99
90
|
|
100
91
|
def test_case
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
92
|
+
lh('sys-dic') { |ds|
|
93
|
+
assert_equal([lx('abänderung|s')], ds['abänderung'])
|
94
|
+
assert_equal([lx('abänderung|s')], ds['Abänderung'])
|
95
|
+
assert_equal([lx('abänderung|s')], ds['ABÄNDERUNG'])
|
96
|
+
}
|
106
97
|
end
|
107
98
|
|
108
99
|
def test_multivalue
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
100
|
+
lh('sys-syn') { |ds|
|
101
|
+
assert_equal([lx('abbau <chemie>|y'), lx('chemische abbaureaktion|y'), lx('chemischer abbau|y'), lx('photochemischer abbau|y')], ds['abbaureaktion'])
|
102
|
+
assert_equal([lx('dependenz|y'), lx('unselbstständigkeit|y'), lx('unselbständigkeit|y')], ds['abhängigkeit'])
|
103
|
+
}
|
104
|
+
end
|
105
|
+
|
106
|
+
def lh(id, &block)
|
107
|
+
Lingo::Language::LexicalHash.open(id, @lingo, &block)
|
113
108
|
end
|
114
109
|
|
115
110
|
end
|
116
111
|
|
117
112
|
class TestDictionary < LingoTestCase
|
118
113
|
|
119
|
-
LD = Lingo::Language::Dictionary
|
120
|
-
|
121
114
|
def setup
|
122
115
|
@lingo = Lingo.new
|
123
116
|
end
|
124
117
|
|
125
118
|
def test_params
|
126
119
|
# Keine Sprach-Konfiguration angegeben
|
127
|
-
|
120
|
+
#assert_raise(RuntimeError) {
|
121
|
+
# Lingo::Language::Dictionary.new({ 'source' => %w[sys-dic] }, @lingo)
|
122
|
+
#}
|
123
|
+
|
128
124
|
# Falsche Parameter angegeben (Pflichtparameter ohne Defaultwert)
|
129
|
-
assert_raise(ArgumentError) {
|
125
|
+
assert_raise(ArgumentError) {
|
126
|
+
Lingo::Language::Dictionary.new({ 'course' => %w[sys-dic] }, @lingo)
|
127
|
+
}
|
130
128
|
end
|
131
129
|
|
132
130
|
def test_cache
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
131
|
+
ld('source' => %w[sys-dic]) { |dic|
|
132
|
+
assert_equal([lx('nase|s')], dic.select('nase'))
|
133
|
+
assert_equal([lx('nase|s')], dic.select('nase'))
|
134
|
+
assert_equal([lx('nase|s')], dic.select('nase'))
|
135
|
+
}
|
138
136
|
end
|
139
137
|
|
140
138
|
def test_report
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
139
|
+
ld('source' => %w[sys-dic]) { |dic|
|
140
|
+
dic.select('abwickeln') # source read
|
141
|
+
dic.select('abwickeln') # cache hit
|
142
|
+
dic.select('regen') # source read
|
143
|
+
dic.select('nonesens') # source read, nothing found
|
144
|
+
|
145
|
+
assert_equal({
|
146
|
+
'sys-dic: total requests' => 4,
|
147
|
+
'sys-dic: data found' => 2,
|
148
|
+
'sys-dic: cache hits' => 1,
|
149
|
+
'sys-dic: source reads' => 3
|
150
|
+
}, dic.report)
|
152
151
|
}
|
153
|
-
|
154
|
-
assert_equal(expect, dic.report)
|
155
|
-
dic.close
|
156
152
|
end
|
157
153
|
|
158
154
|
def test_select_one_source
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
155
|
+
ld('source' => %w[sys-dic]) { |dic|
|
156
|
+
assert_equal([lx('nase|s')], dic.select('nase'))
|
157
|
+
assert_equal([lx('nase|s')], dic.select('NASE'))
|
158
|
+
assert_equal([], dic.select('hasennasen'))
|
159
|
+
}
|
164
160
|
end
|
165
161
|
|
166
162
|
def test_select_two_sources_mode_first
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
163
|
+
ld('source' => %w[sys-dic tst-dic], 'mode' => 'first') { |dic|
|
164
|
+
# in keiner Quelle vorhanden
|
165
|
+
assert_equal([], dic.select('hasennasen'))
|
166
|
+
# nur in erster Quelle vorhanden
|
167
|
+
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
168
|
+
# nur in zweiter Quelle vorhanden
|
169
|
+
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
170
|
+
# in beiden Quellen vorhanden
|
171
|
+
assert_equal([lx('a-dur|s')], dic.select('a-dur'))
|
172
|
+
}
|
177
173
|
end
|
178
174
|
|
179
175
|
def test_select_two_sources_mode_first_flipped
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
176
|
+
ld('source' => %w[tst-dic sys-dic], 'mode' => 'first') { |dic|
|
177
|
+
# in keiner Quelle vorhanden
|
178
|
+
assert_equal([], dic.select('hasennasen'))
|
179
|
+
# nur in erster Quelle vorhanden
|
180
|
+
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
181
|
+
# nur in zweiter Quelle vorhanden
|
182
|
+
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
183
|
+
# in beiden Quellen vorhanden
|
184
|
+
assert_equal([lx('b-dur|s')], dic.select('a-dur'))
|
185
|
+
}
|
190
186
|
end
|
191
187
|
|
192
188
|
def test_select_two_sources_mode_all
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
189
|
+
ld('source' => %w[sys-dic tst-dic], 'mode' => 'all') { |dic|
|
190
|
+
# in keiner Quelle vorhanden
|
191
|
+
assert_equal([], dic.select('hasennasen'))
|
192
|
+
# nur in erster Quelle vorhanden
|
193
|
+
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
194
|
+
# nur in zweiter Quelle vorhanden
|
195
|
+
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
196
|
+
# in beiden Quellen vorhanden
|
197
|
+
assert_equal([lx('a-dur|s'), lx('b-dur|s')], dic.select('a-dur'))
|
198
|
+
assert_equal([lx('aas|s')], dic.select('aas'))
|
199
|
+
}
|
204
200
|
end
|
205
201
|
|
206
202
|
def test_select_two_sources_mode_default
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
203
|
+
ld('source' => %w[sys-dic tst-dic]) { |dic|
|
204
|
+
# in keiner Quelle vorhanden
|
205
|
+
assert_equal([], dic.select('hasennasen'))
|
206
|
+
# nur in erster Quelle vorhanden
|
207
|
+
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
208
|
+
# nur in zweiter Quelle vorhanden
|
209
|
+
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
210
|
+
# in beiden Quellen vorhanden
|
211
|
+
assert_equal([lx('a-dur|s'), lx('b-dur|s')], dic.select('a-dur'))
|
212
|
+
assert_equal([lx('aas|s')], dic.select('aas'))
|
213
|
+
}
|
218
214
|
end
|
219
215
|
|
220
216
|
def test_suffix_lexicals
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
217
|
+
ld('source' => %w[sys-dic]) { |dic|
|
218
|
+
assert_equal([lx('mau|s'), lx('mauer|s')], dic.suffix_lexicals('mauern'))
|
219
|
+
assert_equal([lx('hasen|s'), lx('hasen|v'), lx('hasen|e')], dic.suffix_lexicals('hasens'))
|
220
|
+
assert_equal([lx('schönst|s'), lx('schön|a'), lx('schönst|a')], dic.suffix_lexicals('schönster'))
|
221
|
+
assert_equal([lx('segnen|v'), lx('segneen|v')], dic.suffix_lexicals('segnet'))
|
222
|
+
}
|
227
223
|
end
|
228
224
|
|
229
225
|
def test_infix_lexicals
|
230
|
-
|
231
|
-
|
232
|
-
|
226
|
+
ld('source' => %w[sys-dic]) { |dic|
|
227
|
+
assert_equal( [lx('information|s'), lx('information|v'), lx('information|e')], dic.suffix_lexicals('informations'))
|
228
|
+
}
|
233
229
|
end
|
234
230
|
|
235
231
|
def test_select_with_suffix
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
232
|
+
ld('source' => %w[sys-dic]) { |dic|
|
233
|
+
assert_equal([lx('mauern|v')], dic.select_with_suffix('mauern'))
|
234
|
+
assert_equal([lx('hase|s')], dic.select_with_suffix('hasen'))
|
235
|
+
assert_equal([lx('schön|a')], dic.select_with_suffix('schönster'))
|
236
|
+
assert_equal([lx('segnen|v')], dic.select_with_suffix('segnet'))
|
237
|
+
}
|
242
238
|
end
|
243
239
|
|
244
240
|
def test_select_with_infix
|
245
|
-
|
246
|
-
|
247
|
-
|
241
|
+
ld('source' => %w[sys-dic]) { |dic|
|
242
|
+
assert_equal( [lx('information|s'), lx('information|v'), lx('information|e')], dic.suffix_lexicals('informations'))
|
243
|
+
}
|
248
244
|
end
|
249
245
|
|
250
246
|
def test_find_word
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
247
|
+
ld('source' => %w[sys-dic]) { |dic|
|
248
|
+
assert_equal(wd('hasennasen|?'), dic.find_word('hasennasen'))
|
249
|
+
assert_equal(wd('hase|IDF', 'hase|s'), dic.find_word('hase'))
|
250
|
+
assert_equal(wd('haseses|IDF', 'hase|s'), dic.find_word('haseses'))
|
251
|
+
}
|
252
|
+
end
|
253
|
+
|
254
|
+
def ld(cfg, &block)
|
255
|
+
Lingo::Language::Dictionary.open(cfg, @lingo, &block)
|
256
256
|
end
|
257
257
|
|
258
258
|
end
|
259
259
|
|
260
260
|
class TestGrammar < LingoTestCase
|
261
261
|
|
262
|
-
LG = Lingo::Language::Grammar
|
263
|
-
|
264
262
|
def setup
|
265
263
|
@lingo = Lingo.new
|
266
264
|
end
|
@@ -270,177 +268,139 @@ class TestGrammar < LingoTestCase
|
|
270
268
|
end
|
271
269
|
|
272
270
|
def test_cache
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
end
|
288
|
-
|
289
|
-
def
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
[
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
[lx('benutzerforschung|k'), lx('erforschung|s'), lx('benutzen|v')],
|
392
|
-
[6, 11],
|
393
|
-
'vs'
|
394
|
-
],
|
395
|
-
gra.permute_compositum('benutzerforschung', 1, false)
|
396
|
-
)
|
397
|
-
|
398
|
-
gra.close
|
399
|
-
end
|
400
|
-
|
401
|
-
def test_find_compositum
|
402
|
-
gra = LG.new({'source'=>['sys-dic']}, @lingo)
|
403
|
-
assert_equal(
|
404
|
-
wd('informationswissenschaften|KOM', 'informationswissenschaft|k', 'information|s+', 'wissenschaft|s+'),
|
405
|
-
gra.find_compositum('informationswissenschaften')
|
406
|
-
)
|
407
|
-
assert_equal(
|
408
|
-
wd('cd-rom-technologie|KOM', 'cd-rom-technologie|k', 'cd-rom|s+', 'technologie|s+'),
|
409
|
-
gra.find_compositum('cd-rom-technologie')
|
410
|
-
)
|
411
|
-
assert_equal(
|
412
|
-
wd('albert-ludwigs-universität|KOM', 'albert-ludwigs-universität|k', 'albert|e+', 'ludwig|e+', 'universität|s+'),
|
413
|
-
gra.find_compositum('albert-ludwigs-universität')
|
414
|
-
)
|
415
|
-
assert_equal(
|
416
|
-
wd('client-server-system|KOM', 'client-server-system|k', 'client|s+', 'server|s+', 'system|s+'),
|
417
|
-
gra.find_compositum('client-server-system')
|
418
|
-
)
|
419
|
-
assert_equal(
|
420
|
-
wd('benutzerforschung|KOM', 'benutzerforschung|k', 'erforschung|s+', 'benutzen|v+'),
|
421
|
-
gra.find_compositum('benutzerforschung')
|
422
|
-
)
|
423
|
-
assert_equal(
|
424
|
-
wd('clustersuche|KOM', 'clustersuche|k', 'cluster|s+', 'suche|s+', 'suchen|v+'),
|
425
|
-
gra.find_compositum('clustersuche')
|
426
|
-
)
|
427
|
-
gra.close
|
271
|
+
lg { |gra|
|
272
|
+
assert_equal(
|
273
|
+
wd('informationswissenschaften|KOM', 'informationswissenschaft|k', 'information|s+', 'wissenschaft|s+'),
|
274
|
+
gra.find_compound('informationswissenschaften')
|
275
|
+
)
|
276
|
+
assert_equal(
|
277
|
+
wd('informationswissenschaften|KOM', 'informationswissenschaft|k', 'information|s+', 'wissenschaft|s+'),
|
278
|
+
gra.find_compound('informationswissenschaften')
|
279
|
+
)
|
280
|
+
assert_equal(
|
281
|
+
wd('informationswissenschaften|KOM', 'informationswissenschaft|k', 'information|s+', 'wissenschaft|s+'),
|
282
|
+
gra.find_compound('informationswissenschaften')
|
283
|
+
)
|
284
|
+
}
|
285
|
+
end
|
286
|
+
|
287
|
+
def test_test_compound
|
288
|
+
lg { |gra|
|
289
|
+
# hinterer Teil ist ein Wort mit Suffix
|
290
|
+
assert_equal([
|
291
|
+
[lx('hasenbraten|k'), lx('hase|s'), lx('braten|v')],
|
292
|
+
[5, 6], 'sv'], gra.test_compound('hasen', '', 'braten')
|
293
|
+
)
|
294
|
+
|
295
|
+
# hinterer Teil ist ein Wort mit Infix ohne Schwanz
|
296
|
+
assert_equal([
|
297
|
+
[lx('nasenlaufen|k'), lx('nase|s'), lx('laufen|v')],
|
298
|
+
[5, 7], 'sv'], gra.test_compound('nasen', '', 'laufens')
|
299
|
+
)
|
300
|
+
|
301
|
+
# hinterer Teil ist ein Wort mit Infix mit Schwanz
|
302
|
+
assert_equal([
|
303
|
+
[lx('nasenlaufens|k'), lx('nase|s'), lx('laufen|v')],
|
304
|
+
[5, 7], 'sv'], gra.test_compound('nasen', '', 'laufens', 1, true)
|
305
|
+
)
|
306
|
+
|
307
|
+
# hinterer Teil ist ein Kompositum nach Bindestrich
|
308
|
+
assert_equal([
|
309
|
+
[lx('arrafat-nachfolgebedarf|k'), lx('bedarf|s'), lx('nachfolge|s'), lx('arrafat|x')],
|
310
|
+
[7, 9, 6], 'xss'], gra.test_compound('arrafat', '-', 'nachfolgebedarf')
|
311
|
+
)
|
312
|
+
|
313
|
+
# hinterer Teil ist ein TakeItAsIs nach Bindestrich
|
314
|
+
assert_equal([
|
315
|
+
[lx('nachfolge-arrafat|k'), lx('nachfolge|s'), lx('arrafat|x')],
|
316
|
+
[9, 7], 'sx'], gra.test_compound('nachfolge', '-', 'arrafat')
|
317
|
+
)
|
318
|
+
|
319
|
+
# vorderer Teil ist ein Wort mit Suffix => siehe Hasenbraten
|
320
|
+
# vorderer Teil ist ein Kompositum
|
321
|
+
assert_equal([
|
322
|
+
[lx('morgenonkelmantel|k'), lx('mantel|s'), lx('morgen|s'), lx('onkel|s'), lx('morgen|w')],
|
323
|
+
[6, 5, 6], 'sss'], gra.test_compound('morgenonkel', '', 'mantel')
|
324
|
+
)
|
325
|
+
|
326
|
+
# vorderer Teil ist ein TakeItAsIs vor Bindestrich
|
327
|
+
assert_equal([
|
328
|
+
[lx('arrafat-nachfolger|k'), lx('nachfolger|s'), lx('arrafat|x')],
|
329
|
+
[7, 10], 'xs'], gra.test_compound('arrafat', '-', 'nachfolger')
|
330
|
+
)
|
331
|
+
}
|
332
|
+
end
|
333
|
+
|
334
|
+
def test_permute_compound
|
335
|
+
lg { |gra|
|
336
|
+
# bindestrichversion
|
337
|
+
assert_equal([
|
338
|
+
[lx('arrafat-nachfolger|k'), lx('nachfolger|s'), lx('arrafat|x')],
|
339
|
+
[7, 10], 'xs'], gra.permute_compound('arrafat-nachfolger')
|
340
|
+
)
|
341
|
+
|
342
|
+
# bindestrichversion zwei-teilig
|
343
|
+
assert_equal([
|
344
|
+
[lx('cd-rom-technologie|k'), lx('cd-rom|s'), lx('technologie|s')],
|
345
|
+
[6, 11], 'ss'], gra.permute_compound('cd-rom-technologie')
|
346
|
+
)
|
347
|
+
|
348
|
+
# bindestrichversion drei-teilig
|
349
|
+
assert_equal([
|
350
|
+
[lx('albert-ludwigs-universität|k'), lx('universität|s'), lx('albert|e'), lx('ludwig|e')],
|
351
|
+
[6, 7, 11], 'ees'], gra.permute_compound('albert-ludwigs-universität')
|
352
|
+
)
|
353
|
+
|
354
|
+
# normal mit suggestion
|
355
|
+
assert_equal([
|
356
|
+
[lx('benutzerforschung|k'), lx('erforschung|s'), lx('benutzen|v')],
|
357
|
+
[6, 11], 'vs'], gra.permute_compound('benutzerforschung')
|
358
|
+
)
|
359
|
+
}
|
360
|
+
end
|
361
|
+
|
362
|
+
def test_find_compound
|
363
|
+
lg { |gra|
|
364
|
+
assert_equal(
|
365
|
+
wd('informationswissenschaften|KOM', 'informationswissenschaft|k', 'information|s+', 'wissenschaft|s+'),
|
366
|
+
gra.find_compound('informationswissenschaften')
|
367
|
+
)
|
368
|
+
assert_equal(
|
369
|
+
wd('cd-rom-technologie|KOM', 'cd-rom-technologie|k', 'cd-rom|s+', 'technologie|s+'),
|
370
|
+
gra.find_compound('cd-rom-technologie')
|
371
|
+
)
|
372
|
+
assert_equal(
|
373
|
+
wd('albert-ludwigs-universität|KOM', 'albert-ludwigs-universität|k', 'albert|e+', 'ludwig|e+', 'universität|s+'),
|
374
|
+
gra.find_compound('albert-ludwigs-universität')
|
375
|
+
)
|
376
|
+
assert_equal(
|
377
|
+
wd('client-server-system|KOM', 'client-server-system|k', 'client|s+', 'server|s+', 'system|s+'),
|
378
|
+
gra.find_compound('client-server-system')
|
379
|
+
)
|
380
|
+
assert_equal(
|
381
|
+
wd('benutzerforschung|KOM', 'benutzerforschung|k', 'erforschung|s+', 'benutzen|v+'),
|
382
|
+
gra.find_compound('benutzerforschung')
|
383
|
+
)
|
384
|
+
assert_equal(
|
385
|
+
wd('clustersuche|KOM', 'clustersuche|k', 'cluster|s+', 'suche|s+', 'suchen|v+'),
|
386
|
+
gra.find_compound('clustersuche')
|
387
|
+
)
|
388
|
+
}
|
428
389
|
end
|
429
390
|
|
430
391
|
def test_min_word_size
|
431
|
-
gra
|
432
|
-
assert_equal( wd('undsund|?'), gra.find_compositum('undsund'))
|
433
|
-
gra.close
|
392
|
+
lg { |gra| assert_equal( wd('undsund|?'), gra.find_compound('undsund')) }
|
434
393
|
end
|
435
394
|
|
436
395
|
def test_max_parts
|
437
|
-
|
438
|
-
|
439
|
-
wd('
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
396
|
+
lg { |gra|
|
397
|
+
assert_equal(wd('baumsbaumsbaum|KOM', 'baumsbaumsbaum|k', 'baum|s+'), gra.find_compound('baumsbaumsbaum'))
|
398
|
+
assert_equal(wd('baumsbaumsbaumsbaumsbaumsbaum|?'), gra.find_compound('baumsbaumsbaumsbaumsbaumsbaum'))
|
399
|
+
}
|
400
|
+
end
|
401
|
+
|
402
|
+
def lg(&block)
|
403
|
+
Lingo::Language::Grammar.open({ 'source' => %w[sys-dic] }, @lingo, &block)
|
444
404
|
end
|
445
405
|
|
446
406
|
end
|