lingo 1.8.3 → 1.8.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ChangeLog +24 -2
- data/README +16 -10
- data/Rakefile +15 -6
- data/en/lingo-irr.txt +60 -60
- data/lib/lingo.rb +14 -6
- data/lib/lingo/app.rb +3 -0
- data/lib/lingo/attendee.rb +6 -0
- data/lib/lingo/attendee/abbreviator.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +1 -1
- data/lib/lingo/attendee/noneword_filter.rb +14 -5
- data/lib/lingo/attendee/sequencer.rb +63 -37
- data/lib/lingo/attendee/text_reader.rb +14 -15
- data/lib/lingo/attendee/text_writer.rb +3 -3
- data/lib/lingo/attendee/vector_filter.rb +5 -5
- data/lib/lingo/call.rb +1 -1
- data/lib/lingo/cli.rb +2 -2
- data/lib/lingo/ctl.rb +3 -1
- data/lib/lingo/database.rb +1 -1
- data/lib/lingo/database/show_progress.rb +15 -2
- data/lib/lingo/database/source.rb +6 -1
- data/lib/lingo/error.rb +28 -4
- data/lib/lingo/language/grammar.rb +7 -7
- data/lib/lingo/language/word.rb +6 -2
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/show_progress.rb +3 -2
- data/lib/lingo/srv.rb +15 -6
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web.rb +40 -10
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/lib/lingo/web/public/lingoweb.css +7 -4
- data/lib/lingo/web/views/index.erb +97 -39
- data/lingo.cfg +1 -1
- data/lir.cfg +1 -1
- data/test/attendee/ts_abbreviator.rb +22 -0
- data/test/attendee/ts_sequencer.rb +278 -1
- data/test/attendee/ts_text_reader.rb +34 -0
- data/test/attendee/ts_text_writer.rb +1 -1
- metadata +139 -133
data/lingo.cfg
CHANGED
@@ -43,7 +43,7 @@ meeting:
|
|
43
43
|
- multi_worder: { source: sys-mul }
|
44
44
|
|
45
45
|
# Wortsequenzen anhand von Regeln identifizieren
|
46
|
-
- sequencer: { stopper: PUNC,OTHR }
|
46
|
+
- sequencer: { stopper: 'PUNC,OTHR' }
|
47
47
|
|
48
48
|
# Relationierungen einfügen
|
49
49
|
- synonymer: { skip: '?,t', source: sys-syn, out: syn }
|
data/lir.cfg
CHANGED
@@ -48,7 +48,7 @@ meeting:
|
|
48
48
|
- multi_worder: { source: sys-mul }
|
49
49
|
|
50
50
|
# Wortsequenzen anhand von Regeln identifizieren
|
51
|
-
- sequencer: { stopper: PUNC,OTHR }
|
51
|
+
- sequencer: { stopper: 'PUNC,OTHR' }
|
52
52
|
|
53
53
|
# Relationierungen einfügen
|
54
54
|
- synonymer: { skip: '?,t', source: sys-syn, out: syn }
|
@@ -24,4 +24,26 @@ class TestAttendeeAbbreviator < AttendeeTestCase
|
|
24
24
|
])
|
25
25
|
end
|
26
26
|
|
27
|
+
def test_sentence
|
28
|
+
meet({ 'source' => 'sys-abk' }, [
|
29
|
+
tk('Dieser|WORD'),
|
30
|
+
tk('Satz|WORD'),
|
31
|
+
tk('hat|WORD'),
|
32
|
+
tk('keinen|WORD'),
|
33
|
+
tk('Punkt|WORD'),
|
34
|
+
tk('am|WORD'),
|
35
|
+
tk('Ende|WORD'),
|
36
|
+
ai('EOF|')
|
37
|
+
], [
|
38
|
+
tk('Dieser|WORD'),
|
39
|
+
tk('Satz|WORD'),
|
40
|
+
tk('hat|WORD'),
|
41
|
+
tk('keinen|WORD'),
|
42
|
+
tk('Punkt|WORD'),
|
43
|
+
tk('am|WORD'),
|
44
|
+
tk('Ende|WORD'),
|
45
|
+
ai('EOF|')
|
46
|
+
])
|
47
|
+
end
|
48
|
+
|
27
49
|
end
|
@@ -5,7 +5,7 @@ require_relative '../test_helper'
|
|
5
5
|
class TestAttendeeSequencer < AttendeeTestCase
|
6
6
|
|
7
7
|
def test_basic
|
8
|
-
meet({
|
8
|
+
meet({}, [
|
9
9
|
# AS
|
10
10
|
wd('Die|IDF', 'die|w'),
|
11
11
|
wd('helle|IDF', 'hell|a'),
|
@@ -32,4 +32,281 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
32
32
|
])
|
33
33
|
end
|
34
34
|
|
35
|
+
def test_param
|
36
|
+
meet({ 'sequences' => [['SS', '1 2'], ['SSS', '1 2 3']] }, [
|
37
|
+
# (AS)
|
38
|
+
wd('Die|IDF', 'die|w'),
|
39
|
+
wd('helle|IDF', 'hell|a'),
|
40
|
+
wd('Sonne|IDF', 'sonne|s'),
|
41
|
+
tk('.|PUNC'),
|
42
|
+
# SS + SS + SSS
|
43
|
+
wd('Der|IDF', 'der|w'),
|
44
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
45
|
+
wd('Abbild|IDF', 'abbild|s'),
|
46
|
+
wd('Gottes|IDF', 'gott|s'),
|
47
|
+
wd('Turm|IDF', 'turm|s'),
|
48
|
+
tk('.|PUNC'),
|
49
|
+
# SS
|
50
|
+
wd('Der|IDF', 'der|w'),
|
51
|
+
wd('Sonne|IDF', 'sonne|s'),
|
52
|
+
wd('Untergang|IDF', 'untergang|s'),
|
53
|
+
ai('EOF|')
|
54
|
+
], [
|
55
|
+
# (AS)
|
56
|
+
wd('Die|IDF', 'die|w'),
|
57
|
+
wd('helle|IDF', 'hell|a'),
|
58
|
+
wd('Sonne|IDF', 'sonne|s'),
|
59
|
+
tk('.|PUNC'),
|
60
|
+
# SS + SS + SSS
|
61
|
+
wd('Der|IDF', 'der|w'),
|
62
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
63
|
+
wd('Abbild|IDF', 'abbild|s'),
|
64
|
+
wd('Gottes|IDF', 'gott|s'),
|
65
|
+
wd('Turm|IDF', 'turm|s'),
|
66
|
+
tk('.|PUNC'),
|
67
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
68
|
+
wd('gott turm|SEQ', 'gott turm|q'),
|
69
|
+
wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
|
70
|
+
# SS
|
71
|
+
wd('Der|IDF', 'der|w'),
|
72
|
+
wd('Sonne|IDF', 'sonne|s'),
|
73
|
+
wd('Untergang|IDF', 'untergang|s'),
|
74
|
+
wd('sonne untergang|SEQ', 'sonne untergang|q'),
|
75
|
+
ai('EOF|')
|
76
|
+
])
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_multi
|
80
|
+
meet({ 'sequences' => [['MS', '1 2']] }, [
|
81
|
+
# MS
|
82
|
+
wd('Der|IDF', 'der|w'),
|
83
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
84
|
+
wd('Abbild|IDF', 'abbild|s'),
|
85
|
+
wd('Gottes|IDF', 'gott|s'),
|
86
|
+
wd('Turm|IDF', 'turm|s'),
|
87
|
+
tk('.|PUNC'),
|
88
|
+
ai('EOF|')
|
89
|
+
], [
|
90
|
+
# MS
|
91
|
+
wd('Der|IDF', 'der|w'),
|
92
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
93
|
+
wd('Abbild|IDF', 'abbild|s'),
|
94
|
+
wd('Gottes|IDF', 'gott|s'),
|
95
|
+
wd('Turm|IDF', 'turm|s'),
|
96
|
+
tk('.|PUNC'),
|
97
|
+
wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
|
98
|
+
ai('EOF|')
|
99
|
+
])
|
100
|
+
meet({ 'sequences' => [['MS', '1 2'], ['SS', '1 2'], ['SSS', '1 2 3']] }, [
|
101
|
+
# MS + SS + SS + SSS
|
102
|
+
wd('Der|IDF', 'der|w'),
|
103
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
104
|
+
wd('Abbild|IDF', 'abbild|s'),
|
105
|
+
wd('Gottes|IDF', 'gott|s'),
|
106
|
+
wd('Turm|IDF', 'turm|s'),
|
107
|
+
tk('.|PUNC'),
|
108
|
+
# SS
|
109
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
110
|
+
wd('Abbild|IDF', 'abbild|s'),
|
111
|
+
wd('Gottes|IDF', 'gott|s'),
|
112
|
+
ai('EOF|')
|
113
|
+
], [
|
114
|
+
# MS + SS + SS + SSS
|
115
|
+
wd('Der|IDF', 'der|w'),
|
116
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
117
|
+
wd('Abbild|IDF', 'abbild|s'),
|
118
|
+
wd('Gottes|IDF', 'gott|s'),
|
119
|
+
wd('Turm|IDF', 'turm|s'),
|
120
|
+
tk('.|PUNC'),
|
121
|
+
wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
|
122
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
123
|
+
wd('gott turm|SEQ', 'gott turm|q'),
|
124
|
+
wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
|
125
|
+
# SS
|
126
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
127
|
+
wd('Abbild|IDF', 'abbild|s'),
|
128
|
+
wd('Gottes|IDF', 'gott|s'),
|
129
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
130
|
+
ai('EOF|')
|
131
|
+
])
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_regex
|
135
|
+
meet({ 'sequences' => [['[MS]S', '1 2']] }, [
|
136
|
+
# MS + SS + SS
|
137
|
+
wd('Der|IDF', 'der|w'),
|
138
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
139
|
+
wd('Abbild|IDF', 'abbild|s'),
|
140
|
+
wd('Gottes|IDF', 'gott|s'),
|
141
|
+
wd('Turm|IDF', 'turm|s'),
|
142
|
+
tk('.|PUNC'),
|
143
|
+
# SS
|
144
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
145
|
+
wd('Abbild|IDF', 'abbild|s'),
|
146
|
+
wd('Gottes|IDF', 'gott|s'),
|
147
|
+
ai('EOF|')
|
148
|
+
], [
|
149
|
+
# MS + SS + SS
|
150
|
+
wd('Der|IDF', 'der|w'),
|
151
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
152
|
+
wd('Abbild|IDF', 'abbild|s'),
|
153
|
+
wd('Gottes|IDF', 'gott|s'),
|
154
|
+
wd('Turm|IDF', 'turm|s'),
|
155
|
+
tk('.|PUNC'),
|
156
|
+
wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
|
157
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
158
|
+
wd('gott turm|SEQ', 'gott turm|q'),
|
159
|
+
# SS
|
160
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
161
|
+
wd('Abbild|IDF', 'abbild|s'),
|
162
|
+
wd('Gottes|IDF', 'gott|s'),
|
163
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
164
|
+
ai('EOF|')
|
165
|
+
])
|
166
|
+
end
|
167
|
+
|
168
|
+
def test_regex_none
|
169
|
+
meet({ 'sequences' => ['..'] }, [
|
170
|
+
# (MS + SS + SS)
|
171
|
+
wd('Der|IDF', 'der|w'),
|
172
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
173
|
+
wd('Abbild|IDF', 'abbild|s'),
|
174
|
+
wd('Gottes|IDF', 'gott|s'),
|
175
|
+
wd('Turm|IDF', 'turm|s'),
|
176
|
+
tk('.|PUNC'),
|
177
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
178
|
+
wd('Abbild|IDF', 'abbild|s'),
|
179
|
+
wd('Gottes|IDF', 'gott|s'),
|
180
|
+
ai('EOF|')
|
181
|
+
], [
|
182
|
+
# (MS + SS + SS)
|
183
|
+
wd('Der|IDF', 'der|w'),
|
184
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
185
|
+
wd('Abbild|IDF', 'abbild|s'),
|
186
|
+
wd('Gottes|IDF', 'gott|s'),
|
187
|
+
wd('Turm|IDF', 'turm|s'),
|
188
|
+
tk('.|PUNC'),
|
189
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
190
|
+
wd('Abbild|IDF', 'abbild|s'),
|
191
|
+
wd('Gottes|IDF', 'gott|s'),
|
192
|
+
ai('EOF|')
|
193
|
+
])
|
194
|
+
end
|
195
|
+
|
196
|
+
def test_regex_comm
|
197
|
+
meet({ 'sequences' => ['(?#MS)..'] }, [ # = [MS][MS]
|
198
|
+
# MS + SS + SS
|
199
|
+
wd('Der|IDF', 'der|w'),
|
200
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
201
|
+
wd('Abbild|IDF', 'abbild|s'),
|
202
|
+
wd('Gottes|IDF', 'gott|s'),
|
203
|
+
wd('Turm|IDF', 'turm|s'),
|
204
|
+
tk('.|PUNC'),
|
205
|
+
# SS
|
206
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
207
|
+
wd('Abbild|IDF', 'abbild|s'),
|
208
|
+
wd('Gottes|IDF', 'gott|s'),
|
209
|
+
ai('EOF|')
|
210
|
+
], [
|
211
|
+
# MS + SS + SS
|
212
|
+
wd('Der|IDF', 'der|w'),
|
213
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
214
|
+
wd('Abbild|IDF', 'abbild|s'),
|
215
|
+
wd('Gottes|IDF', 'gott|s'),
|
216
|
+
wd('Turm|IDF', 'turm|s'),
|
217
|
+
tk('.|PUNC'),
|
218
|
+
wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
|
219
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
220
|
+
wd('gott turm|SEQ', 'gott turm|q'),
|
221
|
+
# SS
|
222
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
223
|
+
wd('Abbild|IDF', 'abbild|s'),
|
224
|
+
wd('Gottes|IDF', 'gott|s'),
|
225
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
226
|
+
ai('EOF|')
|
227
|
+
])
|
228
|
+
end
|
229
|
+
|
230
|
+
def test_regex_quan
|
231
|
+
meet({ 'sequences' => ['[MS]S+'] }, [
|
232
|
+
# MS + SSS + (SS) + SS
|
233
|
+
wd('Der|IDF', 'der|w'),
|
234
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
235
|
+
wd('Abbild|IDF', 'abbild|s'),
|
236
|
+
wd('Gottes|IDF', 'gott|s'),
|
237
|
+
wd('Turm|IDF', 'turm|s'),
|
238
|
+
tk('.|PUNC'),
|
239
|
+
ai('EOF|')
|
240
|
+
], [
|
241
|
+
# MS + SSS + (SS) + SS
|
242
|
+
wd('Der|IDF', 'der|w'),
|
243
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
244
|
+
wd('Abbild|IDF', 'abbild|s'),
|
245
|
+
wd('Gottes|IDF', 'gott|s'),
|
246
|
+
wd('Turm|IDF', 'turm|s'),
|
247
|
+
tk('.|PUNC'),
|
248
|
+
wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
|
249
|
+
wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
|
250
|
+
#wd('abbild gott|SEQ', 'abbild gott|q'), # FIXME
|
251
|
+
wd('gott turm|SEQ', 'gott turm|q'),
|
252
|
+
ai('EOF|')
|
253
|
+
])
|
254
|
+
end
|
255
|
+
|
256
|
+
def test_regex_form
|
257
|
+
meet({ 'sequences' => [['[MS]S+', '^']] }, [
|
258
|
+
# MS + SSS + (SS) + SS
|
259
|
+
wd('Der|IDF', 'der|w'),
|
260
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
261
|
+
wd('Abbild|IDF', 'abbild|s'),
|
262
|
+
wd('Gottes|IDF', 'gott|s'),
|
263
|
+
wd('Turm|IDF', 'turm|s'),
|
264
|
+
tk('.|PUNC'),
|
265
|
+
ai('EOF|')
|
266
|
+
], [
|
267
|
+
# MS + SSS + (SS) + SS
|
268
|
+
wd('Der|IDF', 'der|w'),
|
269
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
270
|
+
wd('Abbild|IDF', 'abbild|s'),
|
271
|
+
wd('Gottes|IDF', 'gott|s'),
|
272
|
+
wd('Turm|IDF', 'turm|s'),
|
273
|
+
tk('.|PUNC'),
|
274
|
+
wd('ms:abbild gottes^turm|SEQ', 'ms:abbild gottes^turm|q'),
|
275
|
+
wd('sss:abbild^gott^turm|SEQ', 'sss:abbild^gott^turm|q'),
|
276
|
+
#wd('ss:abbild^gott|SEQ', 'ss:abbild^gott|q'), # FIXME
|
277
|
+
wd('ss:gott^turm|SEQ', 'ss:gott^turm|q'),
|
278
|
+
ai('EOF|')
|
279
|
+
])
|
280
|
+
end
|
281
|
+
|
282
|
+
def test_match
|
283
|
+
meet({ 'sequences' => [['WA', '1 2 (0)'], ['A[SK]', '0: 2, 1']] }, [
|
284
|
+
# WA + AS
|
285
|
+
wd('Die|IDF', 'die|w'),
|
286
|
+
wd('helle|IDF', 'hell|a'),
|
287
|
+
wd('Sonne|IDF', 'sonne|s'),
|
288
|
+
tk('.|PUNC'),
|
289
|
+
# WA + AK
|
290
|
+
wd('Der|IDF', 'der|w'),
|
291
|
+
wd('schöne|IDF', 'schön|a'),
|
292
|
+
wd('Sonnenuntergang|KOM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
|
293
|
+
ai('EOF|')
|
294
|
+
], [
|
295
|
+
# WA + AS
|
296
|
+
wd('Die|IDF', 'die|w'),
|
297
|
+
wd('helle|IDF', 'hell|a'),
|
298
|
+
wd('Sonne|IDF', 'sonne|s'),
|
299
|
+
tk('.|PUNC'),
|
300
|
+
wd('die hell (wa)|SEQ', 'die hell (wa)|q'),
|
301
|
+
wd('as: sonne, hell|SEQ', 'as: sonne, hell|q'),
|
302
|
+
# WA + AK
|
303
|
+
wd('Der|IDF', 'der|w'),
|
304
|
+
wd('schöne|IDF', 'schön|a'),
|
305
|
+
wd('Sonnenuntergang|KOM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
|
306
|
+
wd('der schön (wa)|SEQ', 'der schön (wa)|q'),
|
307
|
+
wd('ak: sonnenuntergang, schön|SEQ', 'ak: sonnenuntergang, schön|q'),
|
308
|
+
ai('EOF|')
|
309
|
+
])
|
310
|
+
end
|
311
|
+
|
35
312
|
end
|
@@ -38,6 +38,23 @@ class TestAttendeeTextReader < AttendeeTestCase
|
|
38
38
|
])
|
39
39
|
end
|
40
40
|
|
41
|
+
def test_lir_file_no_capture
|
42
|
+
meet({ 'files' => 'test/lir.txt', 'records' => '^\[\d+\.\]', 'fields' => false }, nil, [
|
43
|
+
ai('LIR-FORMAT|'), ai("FILE|#{path = File.expand_path('test/lir.txt')}"),
|
44
|
+
ai('RECORD|[00237.]'),
|
45
|
+
'020: GERHARD.',
|
46
|
+
'025: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
|
47
|
+
'056: Die intellektuelle Erschließung des Internet befindet sich in einer Krise. GERHARD ist derzeit weltweit der einzige.',
|
48
|
+
ai('RECORD|[00238.]'),
|
49
|
+
'020: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
|
50
|
+
'025: das DFG-Projekt GERHARD.',
|
51
|
+
ai('RECORD|[00239.]'),
|
52
|
+
'020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
|
53
|
+
'056: "Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.',
|
54
|
+
ai("EOF|#{path}")
|
55
|
+
])
|
56
|
+
end
|
57
|
+
|
41
58
|
def test_lir_file_fields
|
42
59
|
meet({ 'files' => 'test/lir.txt', 'records' => true }, nil, [
|
43
60
|
ai('LIR-FORMAT|'), ai("FILE|#{path = File.expand_path('test/lir.txt')}"),
|
@@ -72,6 +89,23 @@ class TestAttendeeTextReader < AttendeeTestCase
|
|
72
89
|
])
|
73
90
|
end
|
74
91
|
|
92
|
+
def test_lir_file_fields_no_capture
|
93
|
+
meet({ 'files' => 'test/lir.txt', 'records' => '^\[\d+\.\]' }, nil, [
|
94
|
+
ai('LIR-FORMAT|'), ai("FILE|#{path = File.expand_path('test/lir.txt')}"),
|
95
|
+
ai('RECORD|[00237.]'),
|
96
|
+
'GERHARD.',
|
97
|
+
'Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
|
98
|
+
'Die intellektuelle Erschließung des Internet befindet sich in einer Krise. GERHARD ist derzeit weltweit der einzige.',
|
99
|
+
ai('RECORD|[00238.]'),
|
100
|
+
'Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
|
101
|
+
'das DFG-Projekt GERHARD.',
|
102
|
+
ai('RECORD|[00239.]'),
|
103
|
+
'Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
|
104
|
+
'"Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.',
|
105
|
+
ai("EOF|#{path}")
|
106
|
+
])
|
107
|
+
end
|
108
|
+
|
75
109
|
def test_normal_file
|
76
110
|
meet({ 'files' => 'test/mul.txt' }, nil, [
|
77
111
|
ai("FILE|#{path = File.expand_path('test/mul.txt')}"),
|
@@ -49,7 +49,7 @@ class TestAttendeeTextWriter < AttendeeTestCase
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def test_lir_file
|
52
|
-
meet({ 'ext' => 'vec', 'lir-format' =>
|
52
|
+
meet({ 'ext' => 'vec', 'lir-format' => false }, [
|
53
53
|
ai('LIR-FORMAT|'), ai('FILE|test/lir.txt'),
|
54
54
|
ai('RECORD|00237'),
|
55
55
|
'020: GERHARD.',
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lingo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.8.
|
5
|
-
prerelease:
|
4
|
+
version: 1.8.4
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- John Vorhauer
|
@@ -10,116 +9,123 @@ authors:
|
|
10
9
|
autorequire:
|
11
10
|
bindir: bin
|
12
11
|
cert_chain: []
|
13
|
-
date:
|
12
|
+
date: 2013-10-18 00:00:00.000000000 Z
|
14
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: highline
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
15
28
|
- !ruby/object:Gem::Dependency
|
16
29
|
name: ruby-nuggets
|
17
30
|
requirement: !ruby/object:Gem::Requirement
|
18
|
-
none: false
|
19
31
|
requirements:
|
20
|
-
- -
|
32
|
+
- - ">="
|
21
33
|
- !ruby/object:Gem::Version
|
22
|
-
version: 0.
|
34
|
+
version: 0.9.2
|
23
35
|
type: :runtime
|
24
36
|
prerelease: false
|
25
37
|
version_requirements: !ruby/object:Gem::Requirement
|
26
|
-
none: false
|
27
38
|
requirements:
|
28
|
-
- -
|
39
|
+
- - ">="
|
29
40
|
- !ruby/object:Gem::Version
|
30
|
-
version: 0.
|
41
|
+
version: 0.9.2
|
31
42
|
- !ruby/object:Gem::Dependency
|
32
|
-
name:
|
43
|
+
name: sinatra
|
33
44
|
requirement: !ruby/object:Gem::Requirement
|
34
|
-
none: false
|
35
45
|
requirements:
|
36
|
-
- -
|
46
|
+
- - ">="
|
37
47
|
- !ruby/object:Gem::Version
|
38
48
|
version: '0'
|
39
49
|
type: :runtime
|
40
50
|
prerelease: false
|
41
51
|
version_requirements: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
52
|
requirements:
|
44
|
-
- -
|
53
|
+
- - ">="
|
45
54
|
- !ruby/object:Gem::Version
|
46
55
|
version: '0'
|
47
56
|
- !ruby/object:Gem::Dependency
|
48
|
-
name:
|
57
|
+
name: sinatra-contrib
|
49
58
|
requirement: !ruby/object:Gem::Requirement
|
50
|
-
none: false
|
51
59
|
requirements:
|
52
|
-
- -
|
60
|
+
- - ">="
|
53
61
|
- !ruby/object:Gem::Version
|
54
62
|
version: '0'
|
55
63
|
type: :runtime
|
56
64
|
prerelease: false
|
57
65
|
version_requirements: !ruby/object:Gem::Requirement
|
58
|
-
none: false
|
59
66
|
requirements:
|
60
|
-
- -
|
67
|
+
- - ">="
|
61
68
|
- !ruby/object:Gem::Version
|
62
69
|
version: '0'
|
63
70
|
- !ruby/object:Gem::Dependency
|
64
|
-
name:
|
71
|
+
name: unicode
|
65
72
|
requirement: !ruby/object:Gem::Requirement
|
66
|
-
none: false
|
67
73
|
requirements:
|
68
|
-
- -
|
74
|
+
- - ">="
|
69
75
|
- !ruby/object:Gem::Version
|
70
76
|
version: '0'
|
71
77
|
type: :runtime
|
72
78
|
prerelease: false
|
73
79
|
version_requirements: !ruby/object:Gem::Requirement
|
74
|
-
none: false
|
75
80
|
requirements:
|
76
|
-
- -
|
81
|
+
- - ">="
|
77
82
|
- !ruby/object:Gem::Version
|
78
83
|
version: '0'
|
79
84
|
- !ruby/object:Gem::Dependency
|
80
85
|
name: diff-lcs
|
81
86
|
requirement: !ruby/object:Gem::Requirement
|
82
|
-
none: false
|
83
87
|
requirements:
|
84
|
-
- -
|
88
|
+
- - ">="
|
85
89
|
- !ruby/object:Gem::Version
|
86
90
|
version: 1.1.3
|
87
91
|
type: :development
|
88
92
|
prerelease: false
|
89
93
|
version_requirements: !ruby/object:Gem::Requirement
|
90
|
-
none: false
|
91
94
|
requirements:
|
92
|
-
- -
|
95
|
+
- - ">="
|
93
96
|
- !ruby/object:Gem::Version
|
94
97
|
version: 1.1.3
|
95
98
|
- !ruby/object:Gem::Dependency
|
96
99
|
name: open4
|
97
100
|
requirement: !ruby/object:Gem::Requirement
|
98
|
-
none: false
|
99
101
|
requirements:
|
100
|
-
- -
|
102
|
+
- - ">="
|
101
103
|
- !ruby/object:Gem::Version
|
102
104
|
version: '0'
|
103
105
|
type: :development
|
104
106
|
prerelease: false
|
105
107
|
version_requirements: !ruby/object:Gem::Requirement
|
106
|
-
none: false
|
107
108
|
requirements:
|
108
|
-
- -
|
109
|
+
- - ">="
|
109
110
|
- !ruby/object:Gem::Version
|
110
111
|
version: '0'
|
111
|
-
description:
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
of
|
112
|
+
description: |
|
113
|
+
Lingo is an open source indexing system for research and teachings.
|
114
|
+
The main functions of Lingo are:
|
115
|
+
|
116
|
+
* identification of (i.e. reduction to) basic word form by means of
|
117
|
+
dictionaries and suffix lists
|
118
|
+
* algorithmic decomposition
|
119
|
+
* dictionary-based synonymisation and identification of phrases
|
120
|
+
* generic identification of phrases/word sequences based on patterns
|
121
|
+
of word classes
|
116
122
|
email:
|
117
123
|
- lingo@vorhauer.de
|
118
|
-
- jens.wille@
|
124
|
+
- jens.wille@gmail.com
|
119
125
|
executables:
|
120
|
-
- lingosrv
|
121
126
|
- lingo
|
122
127
|
- lingoctl
|
128
|
+
- lingosrv
|
123
129
|
- lingoweb
|
124
130
|
extensions: []
|
125
131
|
extra_rdoc_files:
|
@@ -127,172 +133,172 @@ extra_rdoc_files:
|
|
127
133
|
- COPYING
|
128
134
|
- ChangeLog
|
129
135
|
files:
|
130
|
-
- lib/lingo
|
131
|
-
- lib/lingo/version.rb
|
132
|
-
- lib/lingo/cli.rb
|
133
|
-
- lib/lingo/srv.rb
|
134
|
-
- lib/lingo/web.rb
|
136
|
+
- lib/lingo.rb
|
135
137
|
- lib/lingo/agenda_item.rb
|
136
|
-
- lib/lingo/
|
137
|
-
- lib/lingo/
|
138
|
-
- lib/lingo/language/word.rb
|
139
|
-
- lib/lingo/language/char.rb
|
140
|
-
- lib/lingo/language/lexical.rb
|
141
|
-
- lib/lingo/language/token.rb
|
142
|
-
- lib/lingo/language/grammar.rb
|
143
|
-
- lib/lingo/language/dictionary.rb
|
144
|
-
- lib/lingo/language/word_form.rb
|
145
|
-
- lib/lingo/error.rb
|
146
|
-
- lib/lingo/database/source.rb
|
147
|
-
- lib/lingo/database/source/key_value.rb
|
148
|
-
- lib/lingo/database/source/multi_value.rb
|
149
|
-
- lib/lingo/database/source/single_word.rb
|
150
|
-
- lib/lingo/database/source/word_class.rb
|
151
|
-
- lib/lingo/database/source/multi_key.rb
|
152
|
-
- lib/lingo/database/crypter.rb
|
153
|
-
- lib/lingo/database/sdbm_store.rb
|
154
|
-
- lib/lingo/database/libcdb_store.rb
|
155
|
-
- lib/lingo/database/hash_store.rb
|
156
|
-
- lib/lingo/database/show_progress.rb
|
157
|
-
- lib/lingo/database/gdbm_store.rb
|
158
|
-
- lib/lingo/call.rb
|
138
|
+
- lib/lingo/app.rb
|
139
|
+
- lib/lingo/attendee.rb
|
159
140
|
- lib/lingo/attendee/abbreviator.rb
|
160
|
-
- lib/lingo/attendee/text_writer.rb
|
161
141
|
- lib/lingo/attendee/debugger.rb
|
162
|
-
- lib/lingo/attendee/dehyphenizer.rb
|
163
|
-
- lib/lingo/attendee/stemmer/porter.rb
|
164
142
|
- lib/lingo/attendee/decomposer.rb
|
143
|
+
- lib/lingo/attendee/dehyphenizer.rb
|
144
|
+
- lib/lingo/attendee/formatter.rb
|
165
145
|
- lib/lingo/attendee/multi_worder.rb
|
166
|
-
- lib/lingo/attendee/tokenizer.rb
|
167
|
-
- lib/lingo/attendee/word_searcher.rb
|
168
|
-
- lib/lingo/attendee/variator.rb
|
169
146
|
- lib/lingo/attendee/noneword_filter.rb
|
170
|
-
- lib/lingo/attendee/sequencer.rb
|
171
147
|
- lib/lingo/attendee/object_filter.rb
|
148
|
+
- lib/lingo/attendee/sequencer.rb
|
172
149
|
- lib/lingo/attendee/stemmer.rb
|
150
|
+
- lib/lingo/attendee/stemmer/porter.rb
|
173
151
|
- lib/lingo/attendee/synonymer.rb
|
174
152
|
- lib/lingo/attendee/text_reader.rb
|
175
|
-
- lib/lingo/attendee/
|
153
|
+
- lib/lingo/attendee/text_writer.rb
|
154
|
+
- lib/lingo/attendee/tokenizer.rb
|
155
|
+
- lib/lingo/attendee/variator.rb
|
176
156
|
- lib/lingo/attendee/vector_filter.rb
|
177
|
-
- lib/lingo/
|
157
|
+
- lib/lingo/attendee/word_searcher.rb
|
158
|
+
- lib/lingo/buffered_attendee.rb
|
159
|
+
- lib/lingo/call.rb
|
160
|
+
- lib/lingo/cli.rb
|
161
|
+
- lib/lingo/config.rb
|
162
|
+
- lib/lingo/ctl.rb
|
178
163
|
- lib/lingo/database.rb
|
179
|
-
- lib/lingo/
|
180
|
-
- lib/lingo/
|
164
|
+
- lib/lingo/database/crypter.rb
|
165
|
+
- lib/lingo/database/gdbm_store.rb
|
166
|
+
- lib/lingo/database/hash_store.rb
|
167
|
+
- lib/lingo/database/libcdb_store.rb
|
168
|
+
- lib/lingo/database/sdbm_store.rb
|
169
|
+
- lib/lingo/database/show_progress.rb
|
170
|
+
- lib/lingo/database/source.rb
|
171
|
+
- lib/lingo/database/source/key_value.rb
|
172
|
+
- lib/lingo/database/source/multi_key.rb
|
173
|
+
- lib/lingo/database/source/multi_value.rb
|
174
|
+
- lib/lingo/database/source/single_word.rb
|
175
|
+
- lib/lingo/database/source/word_class.rb
|
181
176
|
- lib/lingo/debug.rb
|
182
|
-
- lib/lingo/
|
183
|
-
- lib/lingo/
|
184
|
-
- lib/lingo.rb
|
185
|
-
-
|
177
|
+
- lib/lingo/error.rb
|
178
|
+
- lib/lingo/language.rb
|
179
|
+
- lib/lingo/language/char.rb
|
180
|
+
- lib/lingo/language/dictionary.rb
|
181
|
+
- lib/lingo/language/grammar.rb
|
182
|
+
- lib/lingo/language/lexical.rb
|
183
|
+
- lib/lingo/language/lexical_hash.rb
|
184
|
+
- lib/lingo/language/token.rb
|
185
|
+
- lib/lingo/language/word.rb
|
186
|
+
- lib/lingo/language/word_form.rb
|
187
|
+
- lib/lingo/show_progress.rb
|
188
|
+
- lib/lingo/srv.rb
|
189
|
+
- lib/lingo/version.rb
|
190
|
+
- lib/lingo/web.rb
|
186
191
|
- bin/lingo
|
187
192
|
- bin/lingoctl
|
193
|
+
- bin/lingosrv
|
188
194
|
- bin/lingoweb
|
189
195
|
- lingo.rb
|
190
|
-
- lingo.cfg
|
191
196
|
- lingo-call.cfg
|
197
|
+
- lingo.cfg
|
192
198
|
- lir.cfg
|
193
199
|
- de.lang
|
194
200
|
- en.lang
|
195
201
|
- ru.lang
|
196
|
-
- de/lingo-dic.txt
|
197
202
|
- de/lingo-abk.txt
|
203
|
+
- de/lingo-dic.txt
|
198
204
|
- de/lingo-mul.txt
|
199
205
|
- de/lingo-syn.txt
|
200
|
-
- de/user-dic.txt
|
201
|
-
- de/test_syn2.txt
|
202
|
-
- de/test_mul2.txt
|
203
|
-
- de/test_mul.txt
|
204
|
-
- de/test_syn.txt
|
205
206
|
- de/test_dic.txt
|
207
|
+
- de/test_mul.txt
|
208
|
+
- de/test_mul2.txt
|
206
209
|
- de/test_singleword.txt
|
210
|
+
- de/test_syn.txt
|
211
|
+
- de/test_syn2.txt
|
212
|
+
- de/user-dic.txt
|
207
213
|
- en/lingo-dic.txt
|
214
|
+
- en/lingo-irr.txt
|
208
215
|
- en/lingo-mul.txt
|
209
|
-
- en/lingo-wdn.txt
|
210
216
|
- en/lingo-syn.txt
|
211
|
-
- en/lingo-
|
217
|
+
- en/lingo-wdn.txt
|
212
218
|
- en/user-dic.txt
|
213
219
|
- ru/lingo-dic.txt
|
214
220
|
- ru/lingo-mul.txt
|
215
221
|
- ru/lingo-syn.txt
|
216
|
-
- txt/artikel.txt
|
217
222
|
- txt/artikel-en.txt
|
218
223
|
- txt/artikel-ru.txt
|
224
|
+
- txt/artikel.txt
|
219
225
|
- txt/lir.txt
|
220
|
-
- lib/lingo/srv/lingosrv.cfg
|
221
226
|
- lib/lingo/srv/config.ru
|
227
|
+
- lib/lingo/srv/lingosrv.cfg
|
228
|
+
- lib/lingo/web/config.ru
|
222
229
|
- lib/lingo/web/lingoweb.cfg
|
223
230
|
- lib/lingo/web/public/lingo.png
|
224
231
|
- lib/lingo/web/public/lingoweb.css
|
225
232
|
- lib/lingo/web/views/index.erb
|
226
|
-
- lib/lingo/web/config.ru
|
227
233
|
- COPYING
|
228
234
|
- ChangeLog
|
229
|
-
- Rakefile
|
230
235
|
- README
|
236
|
+
- Rakefile
|
231
237
|
- spec/spec_helper.rb
|
232
|
-
- .rspec
|
233
|
-
- test/
|
234
|
-
- test/ref/lir.mul
|
235
|
-
- test/ref/lir.seq
|
236
|
-
- test/ref/artikel.mul
|
237
|
-
- test/ref/lir.syn
|
238
|
-
- test/ref/artikel.ver
|
239
|
-
- test/ref/artikel.seq
|
240
|
-
- test/ref/artikel.non
|
241
|
-
- test/ref/lir.non
|
242
|
-
- test/ref/artikel.vec
|
243
|
-
- test/ref/lir.vec
|
244
|
-
- test/ref/artikel.syn
|
245
|
-
- test/lir2.txt
|
246
|
-
- test/ts_database.rb
|
247
|
-
- test/test_helper.rb
|
248
|
-
- test/attendee/ts_object_filter.rb
|
249
|
-
- test/attendee/ts_vector_filter.rb
|
250
|
-
- test/attendee/ts_synonymer.rb
|
238
|
+
- ".rspec"
|
239
|
+
- test/attendee/ts_abbreviator.rb
|
251
240
|
- test/attendee/ts_decomposer.rb
|
252
|
-
- test/attendee/
|
241
|
+
- test/attendee/ts_multi_worder.rb
|
253
242
|
- test/attendee/ts_noneword_filter.rb
|
254
|
-
- test/attendee/
|
243
|
+
- test/attendee/ts_object_filter.rb
|
255
244
|
- test/attendee/ts_sequencer.rb
|
256
|
-
- test/attendee/
|
245
|
+
- test/attendee/ts_stemmer.rb
|
246
|
+
- test/attendee/ts_synonymer.rb
|
257
247
|
- test/attendee/ts_text_reader.rb
|
258
|
-
- test/attendee/
|
248
|
+
- test/attendee/ts_text_writer.rb
|
249
|
+
- test/attendee/ts_tokenizer.rb
|
259
250
|
- test/attendee/ts_variator.rb
|
260
|
-
- test/attendee/
|
251
|
+
- test/attendee/ts_vector_filter.rb
|
261
252
|
- test/attendee/ts_word_searcher.rb
|
253
|
+
- test/lir.txt
|
262
254
|
- test/lir.vec
|
255
|
+
- test/lir2.txt
|
263
256
|
- test/mul.txt
|
264
|
-
- test/
|
257
|
+
- test/ref/artikel.mul
|
258
|
+
- test/ref/artikel.non
|
259
|
+
- test/ref/artikel.seq
|
260
|
+
- test/ref/artikel.syn
|
261
|
+
- test/ref/artikel.vec
|
262
|
+
- test/ref/artikel.ven
|
263
|
+
- test/ref/artikel.ver
|
264
|
+
- test/ref/lir.mul
|
265
|
+
- test/ref/lir.non
|
266
|
+
- test/ref/lir.seq
|
267
|
+
- test/ref/lir.syn
|
268
|
+
- test/ref/lir.vec
|
269
|
+
- test/test_helper.rb
|
270
|
+
- test/ts_database.rb
|
265
271
|
- test/ts_language.rb
|
266
272
|
homepage: http://lex-lingo.de
|
267
|
-
licenses:
|
273
|
+
licenses:
|
274
|
+
- AGPL
|
275
|
+
metadata: {}
|
268
276
|
post_install_message:
|
269
277
|
rdoc_options:
|
270
|
-
- --charset
|
278
|
+
- "--charset"
|
271
279
|
- UTF-8
|
272
|
-
- --line-numbers
|
273
|
-
- --all
|
274
|
-
- --title
|
275
|
-
- lingo Application documentation (v1.8.
|
276
|
-
- --main
|
280
|
+
- "--line-numbers"
|
281
|
+
- "--all"
|
282
|
+
- "--title"
|
283
|
+
- lingo Application documentation (v1.8.4)
|
284
|
+
- "--main"
|
277
285
|
- README
|
278
286
|
require_paths:
|
279
287
|
- lib
|
280
288
|
required_ruby_version: !ruby/object:Gem::Requirement
|
281
|
-
none: false
|
282
289
|
requirements:
|
283
|
-
- -
|
290
|
+
- - ">="
|
284
291
|
- !ruby/object:Gem::Version
|
285
292
|
version: 1.9.2
|
286
293
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
287
|
-
none: false
|
288
294
|
requirements:
|
289
|
-
- -
|
295
|
+
- - ">="
|
290
296
|
- !ruby/object:Gem::Version
|
291
297
|
version: '0'
|
292
298
|
requirements: []
|
293
299
|
rubyforge_project:
|
294
|
-
rubygems_version: 1.
|
300
|
+
rubygems_version: 2.1.9
|
295
301
|
signing_key:
|
296
|
-
specification_version:
|
302
|
+
specification_version: 4
|
297
303
|
summary: The full-featured automatic indexing system
|
298
304
|
test_files: []
|