lingo 1.8.3 → 1.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ChangeLog +24 -2
- data/README +16 -10
- data/Rakefile +15 -6
- data/en/lingo-irr.txt +60 -60
- data/lib/lingo.rb +14 -6
- data/lib/lingo/app.rb +3 -0
- data/lib/lingo/attendee.rb +6 -0
- data/lib/lingo/attendee/abbreviator.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +1 -1
- data/lib/lingo/attendee/noneword_filter.rb +14 -5
- data/lib/lingo/attendee/sequencer.rb +63 -37
- data/lib/lingo/attendee/text_reader.rb +14 -15
- data/lib/lingo/attendee/text_writer.rb +3 -3
- data/lib/lingo/attendee/vector_filter.rb +5 -5
- data/lib/lingo/call.rb +1 -1
- data/lib/lingo/cli.rb +2 -2
- data/lib/lingo/ctl.rb +3 -1
- data/lib/lingo/database.rb +1 -1
- data/lib/lingo/database/show_progress.rb +15 -2
- data/lib/lingo/database/source.rb +6 -1
- data/lib/lingo/error.rb +28 -4
- data/lib/lingo/language/grammar.rb +7 -7
- data/lib/lingo/language/word.rb +6 -2
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/show_progress.rb +3 -2
- data/lib/lingo/srv.rb +15 -6
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web.rb +40 -10
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/lib/lingo/web/public/lingoweb.css +7 -4
- data/lib/lingo/web/views/index.erb +97 -39
- data/lingo.cfg +1 -1
- data/lir.cfg +1 -1
- data/test/attendee/ts_abbreviator.rb +22 -0
- data/test/attendee/ts_sequencer.rb +278 -1
- data/test/attendee/ts_text_reader.rb +34 -0
- data/test/attendee/ts_text_writer.rb +1 -1
- metadata +139 -133
data/lingo.cfg
CHANGED
@@ -43,7 +43,7 @@ meeting:
|
|
43
43
|
- multi_worder: { source: sys-mul }
|
44
44
|
|
45
45
|
# Wortsequenzen anhand von Regeln identifizieren
|
46
|
-
- sequencer: { stopper: PUNC,OTHR }
|
46
|
+
- sequencer: { stopper: 'PUNC,OTHR' }
|
47
47
|
|
48
48
|
# Relationierungen einfügen
|
49
49
|
- synonymer: { skip: '?,t', source: sys-syn, out: syn }
|
data/lir.cfg
CHANGED
@@ -48,7 +48,7 @@ meeting:
|
|
48
48
|
- multi_worder: { source: sys-mul }
|
49
49
|
|
50
50
|
# Wortsequenzen anhand von Regeln identifizieren
|
51
|
-
- sequencer: { stopper: PUNC,OTHR }
|
51
|
+
- sequencer: { stopper: 'PUNC,OTHR' }
|
52
52
|
|
53
53
|
# Relationierungen einfügen
|
54
54
|
- synonymer: { skip: '?,t', source: sys-syn, out: syn }
|
@@ -24,4 +24,26 @@ class TestAttendeeAbbreviator < AttendeeTestCase
|
|
24
24
|
])
|
25
25
|
end
|
26
26
|
|
27
|
+
def test_sentence
|
28
|
+
meet({ 'source' => 'sys-abk' }, [
|
29
|
+
tk('Dieser|WORD'),
|
30
|
+
tk('Satz|WORD'),
|
31
|
+
tk('hat|WORD'),
|
32
|
+
tk('keinen|WORD'),
|
33
|
+
tk('Punkt|WORD'),
|
34
|
+
tk('am|WORD'),
|
35
|
+
tk('Ende|WORD'),
|
36
|
+
ai('EOF|')
|
37
|
+
], [
|
38
|
+
tk('Dieser|WORD'),
|
39
|
+
tk('Satz|WORD'),
|
40
|
+
tk('hat|WORD'),
|
41
|
+
tk('keinen|WORD'),
|
42
|
+
tk('Punkt|WORD'),
|
43
|
+
tk('am|WORD'),
|
44
|
+
tk('Ende|WORD'),
|
45
|
+
ai('EOF|')
|
46
|
+
])
|
47
|
+
end
|
48
|
+
|
27
49
|
end
|
@@ -5,7 +5,7 @@ require_relative '../test_helper'
|
|
5
5
|
class TestAttendeeSequencer < AttendeeTestCase
|
6
6
|
|
7
7
|
def test_basic
|
8
|
-
meet({
|
8
|
+
meet({}, [
|
9
9
|
# AS
|
10
10
|
wd('Die|IDF', 'die|w'),
|
11
11
|
wd('helle|IDF', 'hell|a'),
|
@@ -32,4 +32,281 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
32
32
|
])
|
33
33
|
end
|
34
34
|
|
35
|
+
def test_param
|
36
|
+
meet({ 'sequences' => [['SS', '1 2'], ['SSS', '1 2 3']] }, [
|
37
|
+
# (AS)
|
38
|
+
wd('Die|IDF', 'die|w'),
|
39
|
+
wd('helle|IDF', 'hell|a'),
|
40
|
+
wd('Sonne|IDF', 'sonne|s'),
|
41
|
+
tk('.|PUNC'),
|
42
|
+
# SS + SS + SSS
|
43
|
+
wd('Der|IDF', 'der|w'),
|
44
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
45
|
+
wd('Abbild|IDF', 'abbild|s'),
|
46
|
+
wd('Gottes|IDF', 'gott|s'),
|
47
|
+
wd('Turm|IDF', 'turm|s'),
|
48
|
+
tk('.|PUNC'),
|
49
|
+
# SS
|
50
|
+
wd('Der|IDF', 'der|w'),
|
51
|
+
wd('Sonne|IDF', 'sonne|s'),
|
52
|
+
wd('Untergang|IDF', 'untergang|s'),
|
53
|
+
ai('EOF|')
|
54
|
+
], [
|
55
|
+
# (AS)
|
56
|
+
wd('Die|IDF', 'die|w'),
|
57
|
+
wd('helle|IDF', 'hell|a'),
|
58
|
+
wd('Sonne|IDF', 'sonne|s'),
|
59
|
+
tk('.|PUNC'),
|
60
|
+
# SS + SS + SSS
|
61
|
+
wd('Der|IDF', 'der|w'),
|
62
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
63
|
+
wd('Abbild|IDF', 'abbild|s'),
|
64
|
+
wd('Gottes|IDF', 'gott|s'),
|
65
|
+
wd('Turm|IDF', 'turm|s'),
|
66
|
+
tk('.|PUNC'),
|
67
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
68
|
+
wd('gott turm|SEQ', 'gott turm|q'),
|
69
|
+
wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
|
70
|
+
# SS
|
71
|
+
wd('Der|IDF', 'der|w'),
|
72
|
+
wd('Sonne|IDF', 'sonne|s'),
|
73
|
+
wd('Untergang|IDF', 'untergang|s'),
|
74
|
+
wd('sonne untergang|SEQ', 'sonne untergang|q'),
|
75
|
+
ai('EOF|')
|
76
|
+
])
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_multi
|
80
|
+
meet({ 'sequences' => [['MS', '1 2']] }, [
|
81
|
+
# MS
|
82
|
+
wd('Der|IDF', 'der|w'),
|
83
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
84
|
+
wd('Abbild|IDF', 'abbild|s'),
|
85
|
+
wd('Gottes|IDF', 'gott|s'),
|
86
|
+
wd('Turm|IDF', 'turm|s'),
|
87
|
+
tk('.|PUNC'),
|
88
|
+
ai('EOF|')
|
89
|
+
], [
|
90
|
+
# MS
|
91
|
+
wd('Der|IDF', 'der|w'),
|
92
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
93
|
+
wd('Abbild|IDF', 'abbild|s'),
|
94
|
+
wd('Gottes|IDF', 'gott|s'),
|
95
|
+
wd('Turm|IDF', 'turm|s'),
|
96
|
+
tk('.|PUNC'),
|
97
|
+
wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
|
98
|
+
ai('EOF|')
|
99
|
+
])
|
100
|
+
meet({ 'sequences' => [['MS', '1 2'], ['SS', '1 2'], ['SSS', '1 2 3']] }, [
|
101
|
+
# MS + SS + SS + SSS
|
102
|
+
wd('Der|IDF', 'der|w'),
|
103
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
104
|
+
wd('Abbild|IDF', 'abbild|s'),
|
105
|
+
wd('Gottes|IDF', 'gott|s'),
|
106
|
+
wd('Turm|IDF', 'turm|s'),
|
107
|
+
tk('.|PUNC'),
|
108
|
+
# SS
|
109
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
110
|
+
wd('Abbild|IDF', 'abbild|s'),
|
111
|
+
wd('Gottes|IDF', 'gott|s'),
|
112
|
+
ai('EOF|')
|
113
|
+
], [
|
114
|
+
# MS + SS + SS + SSS
|
115
|
+
wd('Der|IDF', 'der|w'),
|
116
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
117
|
+
wd('Abbild|IDF', 'abbild|s'),
|
118
|
+
wd('Gottes|IDF', 'gott|s'),
|
119
|
+
wd('Turm|IDF', 'turm|s'),
|
120
|
+
tk('.|PUNC'),
|
121
|
+
wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
|
122
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
123
|
+
wd('gott turm|SEQ', 'gott turm|q'),
|
124
|
+
wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
|
125
|
+
# SS
|
126
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
127
|
+
wd('Abbild|IDF', 'abbild|s'),
|
128
|
+
wd('Gottes|IDF', 'gott|s'),
|
129
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
130
|
+
ai('EOF|')
|
131
|
+
])
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_regex
|
135
|
+
meet({ 'sequences' => [['[MS]S', '1 2']] }, [
|
136
|
+
# MS + SS + SS
|
137
|
+
wd('Der|IDF', 'der|w'),
|
138
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
139
|
+
wd('Abbild|IDF', 'abbild|s'),
|
140
|
+
wd('Gottes|IDF', 'gott|s'),
|
141
|
+
wd('Turm|IDF', 'turm|s'),
|
142
|
+
tk('.|PUNC'),
|
143
|
+
# SS
|
144
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
145
|
+
wd('Abbild|IDF', 'abbild|s'),
|
146
|
+
wd('Gottes|IDF', 'gott|s'),
|
147
|
+
ai('EOF|')
|
148
|
+
], [
|
149
|
+
# MS + SS + SS
|
150
|
+
wd('Der|IDF', 'der|w'),
|
151
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
152
|
+
wd('Abbild|IDF', 'abbild|s'),
|
153
|
+
wd('Gottes|IDF', 'gott|s'),
|
154
|
+
wd('Turm|IDF', 'turm|s'),
|
155
|
+
tk('.|PUNC'),
|
156
|
+
wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
|
157
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
158
|
+
wd('gott turm|SEQ', 'gott turm|q'),
|
159
|
+
# SS
|
160
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
161
|
+
wd('Abbild|IDF', 'abbild|s'),
|
162
|
+
wd('Gottes|IDF', 'gott|s'),
|
163
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
164
|
+
ai('EOF|')
|
165
|
+
])
|
166
|
+
end
|
167
|
+
|
168
|
+
def test_regex_none
|
169
|
+
meet({ 'sequences' => ['..'] }, [
|
170
|
+
# (MS + SS + SS)
|
171
|
+
wd('Der|IDF', 'der|w'),
|
172
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
173
|
+
wd('Abbild|IDF', 'abbild|s'),
|
174
|
+
wd('Gottes|IDF', 'gott|s'),
|
175
|
+
wd('Turm|IDF', 'turm|s'),
|
176
|
+
tk('.|PUNC'),
|
177
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
178
|
+
wd('Abbild|IDF', 'abbild|s'),
|
179
|
+
wd('Gottes|IDF', 'gott|s'),
|
180
|
+
ai('EOF|')
|
181
|
+
], [
|
182
|
+
# (MS + SS + SS)
|
183
|
+
wd('Der|IDF', 'der|w'),
|
184
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
185
|
+
wd('Abbild|IDF', 'abbild|s'),
|
186
|
+
wd('Gottes|IDF', 'gott|s'),
|
187
|
+
wd('Turm|IDF', 'turm|s'),
|
188
|
+
tk('.|PUNC'),
|
189
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
190
|
+
wd('Abbild|IDF', 'abbild|s'),
|
191
|
+
wd('Gottes|IDF', 'gott|s'),
|
192
|
+
ai('EOF|')
|
193
|
+
])
|
194
|
+
end
|
195
|
+
|
196
|
+
def test_regex_comm
|
197
|
+
meet({ 'sequences' => ['(?#MS)..'] }, [ # = [MS][MS]
|
198
|
+
# MS + SS + SS
|
199
|
+
wd('Der|IDF', 'der|w'),
|
200
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
201
|
+
wd('Abbild|IDF', 'abbild|s'),
|
202
|
+
wd('Gottes|IDF', 'gott|s'),
|
203
|
+
wd('Turm|IDF', 'turm|s'),
|
204
|
+
tk('.|PUNC'),
|
205
|
+
# SS
|
206
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
207
|
+
wd('Abbild|IDF', 'abbild|s'),
|
208
|
+
wd('Gottes|IDF', 'gott|s'),
|
209
|
+
ai('EOF|')
|
210
|
+
], [
|
211
|
+
# MS + SS + SS
|
212
|
+
wd('Der|IDF', 'der|w'),
|
213
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
214
|
+
wd('Abbild|IDF', 'abbild|s'),
|
215
|
+
wd('Gottes|IDF', 'gott|s'),
|
216
|
+
wd('Turm|IDF', 'turm|s'),
|
217
|
+
tk('.|PUNC'),
|
218
|
+
wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
|
219
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
220
|
+
wd('gott turm|SEQ', 'gott turm|q'),
|
221
|
+
# SS
|
222
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
223
|
+
wd('Abbild|IDF', 'abbild|s'),
|
224
|
+
wd('Gottes|IDF', 'gott|s'),
|
225
|
+
wd('abbild gott|SEQ', 'abbild gott|q'),
|
226
|
+
ai('EOF|')
|
227
|
+
])
|
228
|
+
end
|
229
|
+
|
230
|
+
def test_regex_quan
|
231
|
+
meet({ 'sequences' => ['[MS]S+'] }, [
|
232
|
+
# MS + SSS + (SS) + SS
|
233
|
+
wd('Der|IDF', 'der|w'),
|
234
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
235
|
+
wd('Abbild|IDF', 'abbild|s'),
|
236
|
+
wd('Gottes|IDF', 'gott|s'),
|
237
|
+
wd('Turm|IDF', 'turm|s'),
|
238
|
+
tk('.|PUNC'),
|
239
|
+
ai('EOF|')
|
240
|
+
], [
|
241
|
+
# MS + SSS + (SS) + SS
|
242
|
+
wd('Der|IDF', 'der|w'),
|
243
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
244
|
+
wd('Abbild|IDF', 'abbild|s'),
|
245
|
+
wd('Gottes|IDF', 'gott|s'),
|
246
|
+
wd('Turm|IDF', 'turm|s'),
|
247
|
+
tk('.|PUNC'),
|
248
|
+
wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
|
249
|
+
wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
|
250
|
+
#wd('abbild gott|SEQ', 'abbild gott|q'), # FIXME
|
251
|
+
wd('gott turm|SEQ', 'gott turm|q'),
|
252
|
+
ai('EOF|')
|
253
|
+
])
|
254
|
+
end
|
255
|
+
|
256
|
+
def test_regex_form
|
257
|
+
meet({ 'sequences' => [['[MS]S+', '^']] }, [
|
258
|
+
# MS + SSS + (SS) + SS
|
259
|
+
wd('Der|IDF', 'der|w'),
|
260
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
261
|
+
wd('Abbild|IDF', 'abbild|s'),
|
262
|
+
wd('Gottes|IDF', 'gott|s'),
|
263
|
+
wd('Turm|IDF', 'turm|s'),
|
264
|
+
tk('.|PUNC'),
|
265
|
+
ai('EOF|')
|
266
|
+
], [
|
267
|
+
# MS + SSS + (SS) + SS
|
268
|
+
wd('Der|IDF', 'der|w'),
|
269
|
+
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
270
|
+
wd('Abbild|IDF', 'abbild|s'),
|
271
|
+
wd('Gottes|IDF', 'gott|s'),
|
272
|
+
wd('Turm|IDF', 'turm|s'),
|
273
|
+
tk('.|PUNC'),
|
274
|
+
wd('ms:abbild gottes^turm|SEQ', 'ms:abbild gottes^turm|q'),
|
275
|
+
wd('sss:abbild^gott^turm|SEQ', 'sss:abbild^gott^turm|q'),
|
276
|
+
#wd('ss:abbild^gott|SEQ', 'ss:abbild^gott|q'), # FIXME
|
277
|
+
wd('ss:gott^turm|SEQ', 'ss:gott^turm|q'),
|
278
|
+
ai('EOF|')
|
279
|
+
])
|
280
|
+
end
|
281
|
+
|
282
|
+
def test_match
|
283
|
+
meet({ 'sequences' => [['WA', '1 2 (0)'], ['A[SK]', '0: 2, 1']] }, [
|
284
|
+
# WA + AS
|
285
|
+
wd('Die|IDF', 'die|w'),
|
286
|
+
wd('helle|IDF', 'hell|a'),
|
287
|
+
wd('Sonne|IDF', 'sonne|s'),
|
288
|
+
tk('.|PUNC'),
|
289
|
+
# WA + AK
|
290
|
+
wd('Der|IDF', 'der|w'),
|
291
|
+
wd('schöne|IDF', 'schön|a'),
|
292
|
+
wd('Sonnenuntergang|KOM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
|
293
|
+
ai('EOF|')
|
294
|
+
], [
|
295
|
+
# WA + AS
|
296
|
+
wd('Die|IDF', 'die|w'),
|
297
|
+
wd('helle|IDF', 'hell|a'),
|
298
|
+
wd('Sonne|IDF', 'sonne|s'),
|
299
|
+
tk('.|PUNC'),
|
300
|
+
wd('die hell (wa)|SEQ', 'die hell (wa)|q'),
|
301
|
+
wd('as: sonne, hell|SEQ', 'as: sonne, hell|q'),
|
302
|
+
# WA + AK
|
303
|
+
wd('Der|IDF', 'der|w'),
|
304
|
+
wd('schöne|IDF', 'schön|a'),
|
305
|
+
wd('Sonnenuntergang|KOM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
|
306
|
+
wd('der schön (wa)|SEQ', 'der schön (wa)|q'),
|
307
|
+
wd('ak: sonnenuntergang, schön|SEQ', 'ak: sonnenuntergang, schön|q'),
|
308
|
+
ai('EOF|')
|
309
|
+
])
|
310
|
+
end
|
311
|
+
|
35
312
|
end
|
@@ -38,6 +38,23 @@ class TestAttendeeTextReader < AttendeeTestCase
|
|
38
38
|
])
|
39
39
|
end
|
40
40
|
|
41
|
+
def test_lir_file_no_capture
|
42
|
+
meet({ 'files' => 'test/lir.txt', 'records' => '^\[\d+\.\]', 'fields' => false }, nil, [
|
43
|
+
ai('LIR-FORMAT|'), ai("FILE|#{path = File.expand_path('test/lir.txt')}"),
|
44
|
+
ai('RECORD|[00237.]'),
|
45
|
+
'020: GERHARD.',
|
46
|
+
'025: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
|
47
|
+
'056: Die intellektuelle Erschließung des Internet befindet sich in einer Krise. GERHARD ist derzeit weltweit der einzige.',
|
48
|
+
ai('RECORD|[00238.]'),
|
49
|
+
'020: Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
|
50
|
+
'025: das DFG-Projekt GERHARD.',
|
51
|
+
ai('RECORD|[00239.]'),
|
52
|
+
'020: Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
|
53
|
+
'056: "Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.',
|
54
|
+
ai("EOF|#{path}")
|
55
|
+
])
|
56
|
+
end
|
57
|
+
|
41
58
|
def test_lir_file_fields
|
42
59
|
meet({ 'files' => 'test/lir.txt', 'records' => true }, nil, [
|
43
60
|
ai('LIR-FORMAT|'), ai("FILE|#{path = File.expand_path('test/lir.txt')}"),
|
@@ -72,6 +89,23 @@ class TestAttendeeTextReader < AttendeeTestCase
|
|
72
89
|
])
|
73
90
|
end
|
74
91
|
|
92
|
+
def test_lir_file_fields_no_capture
|
93
|
+
meet({ 'files' => 'test/lir.txt', 'records' => '^\[\d+\.\]' }, nil, [
|
94
|
+
ai('LIR-FORMAT|'), ai("FILE|#{path = File.expand_path('test/lir.txt')}"),
|
95
|
+
ai('RECORD|[00237.]'),
|
96
|
+
'GERHARD.',
|
97
|
+
'Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
|
98
|
+
'Die intellektuelle Erschließung des Internet befindet sich in einer Krise. GERHARD ist derzeit weltweit der einzige.',
|
99
|
+
ai('RECORD|[00238.]'),
|
100
|
+
'Automatisches Sammeln, Klassifizieren und Indexieren von wissenschaftlich relevanten Informationsressourcen.',
|
101
|
+
'das DFG-Projekt GERHARD.',
|
102
|
+
ai('RECORD|[00239.]'),
|
103
|
+
'Information Retrieval und Dokumentmanagement im Multimedia-Zeitalter.',
|
104
|
+
'"Das Buch ist ein praxisbezogenes VADEMECUM für alle, die in einer Welt der Datennetze Wissen/Informationen sammeln.',
|
105
|
+
ai("EOF|#{path}")
|
106
|
+
])
|
107
|
+
end
|
108
|
+
|
75
109
|
def test_normal_file
|
76
110
|
meet({ 'files' => 'test/mul.txt' }, nil, [
|
77
111
|
ai("FILE|#{path = File.expand_path('test/mul.txt')}"),
|
@@ -49,7 +49,7 @@ class TestAttendeeTextWriter < AttendeeTestCase
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def test_lir_file
|
52
|
-
meet({ 'ext' => 'vec', 'lir-format' =>
|
52
|
+
meet({ 'ext' => 'vec', 'lir-format' => false }, [
|
53
53
|
ai('LIR-FORMAT|'), ai('FILE|test/lir.txt'),
|
54
54
|
ai('RECORD|00237'),
|
55
55
|
'020: GERHARD.',
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lingo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.8.
|
5
|
-
prerelease:
|
4
|
+
version: 1.8.4
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- John Vorhauer
|
@@ -10,116 +9,123 @@ authors:
|
|
10
9
|
autorequire:
|
11
10
|
bindir: bin
|
12
11
|
cert_chain: []
|
13
|
-
date:
|
12
|
+
date: 2013-10-18 00:00:00.000000000 Z
|
14
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: highline
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
15
28
|
- !ruby/object:Gem::Dependency
|
16
29
|
name: ruby-nuggets
|
17
30
|
requirement: !ruby/object:Gem::Requirement
|
18
|
-
none: false
|
19
31
|
requirements:
|
20
|
-
- -
|
32
|
+
- - ">="
|
21
33
|
- !ruby/object:Gem::Version
|
22
|
-
version: 0.
|
34
|
+
version: 0.9.2
|
23
35
|
type: :runtime
|
24
36
|
prerelease: false
|
25
37
|
version_requirements: !ruby/object:Gem::Requirement
|
26
|
-
none: false
|
27
38
|
requirements:
|
28
|
-
- -
|
39
|
+
- - ">="
|
29
40
|
- !ruby/object:Gem::Version
|
30
|
-
version: 0.
|
41
|
+
version: 0.9.2
|
31
42
|
- !ruby/object:Gem::Dependency
|
32
|
-
name:
|
43
|
+
name: sinatra
|
33
44
|
requirement: !ruby/object:Gem::Requirement
|
34
|
-
none: false
|
35
45
|
requirements:
|
36
|
-
- -
|
46
|
+
- - ">="
|
37
47
|
- !ruby/object:Gem::Version
|
38
48
|
version: '0'
|
39
49
|
type: :runtime
|
40
50
|
prerelease: false
|
41
51
|
version_requirements: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
52
|
requirements:
|
44
|
-
- -
|
53
|
+
- - ">="
|
45
54
|
- !ruby/object:Gem::Version
|
46
55
|
version: '0'
|
47
56
|
- !ruby/object:Gem::Dependency
|
48
|
-
name:
|
57
|
+
name: sinatra-contrib
|
49
58
|
requirement: !ruby/object:Gem::Requirement
|
50
|
-
none: false
|
51
59
|
requirements:
|
52
|
-
- -
|
60
|
+
- - ">="
|
53
61
|
- !ruby/object:Gem::Version
|
54
62
|
version: '0'
|
55
63
|
type: :runtime
|
56
64
|
prerelease: false
|
57
65
|
version_requirements: !ruby/object:Gem::Requirement
|
58
|
-
none: false
|
59
66
|
requirements:
|
60
|
-
- -
|
67
|
+
- - ">="
|
61
68
|
- !ruby/object:Gem::Version
|
62
69
|
version: '0'
|
63
70
|
- !ruby/object:Gem::Dependency
|
64
|
-
name:
|
71
|
+
name: unicode
|
65
72
|
requirement: !ruby/object:Gem::Requirement
|
66
|
-
none: false
|
67
73
|
requirements:
|
68
|
-
- -
|
74
|
+
- - ">="
|
69
75
|
- !ruby/object:Gem::Version
|
70
76
|
version: '0'
|
71
77
|
type: :runtime
|
72
78
|
prerelease: false
|
73
79
|
version_requirements: !ruby/object:Gem::Requirement
|
74
|
-
none: false
|
75
80
|
requirements:
|
76
|
-
- -
|
81
|
+
- - ">="
|
77
82
|
- !ruby/object:Gem::Version
|
78
83
|
version: '0'
|
79
84
|
- !ruby/object:Gem::Dependency
|
80
85
|
name: diff-lcs
|
81
86
|
requirement: !ruby/object:Gem::Requirement
|
82
|
-
none: false
|
83
87
|
requirements:
|
84
|
-
- -
|
88
|
+
- - ">="
|
85
89
|
- !ruby/object:Gem::Version
|
86
90
|
version: 1.1.3
|
87
91
|
type: :development
|
88
92
|
prerelease: false
|
89
93
|
version_requirements: !ruby/object:Gem::Requirement
|
90
|
-
none: false
|
91
94
|
requirements:
|
92
|
-
- -
|
95
|
+
- - ">="
|
93
96
|
- !ruby/object:Gem::Version
|
94
97
|
version: 1.1.3
|
95
98
|
- !ruby/object:Gem::Dependency
|
96
99
|
name: open4
|
97
100
|
requirement: !ruby/object:Gem::Requirement
|
98
|
-
none: false
|
99
101
|
requirements:
|
100
|
-
- -
|
102
|
+
- - ">="
|
101
103
|
- !ruby/object:Gem::Version
|
102
104
|
version: '0'
|
103
105
|
type: :development
|
104
106
|
prerelease: false
|
105
107
|
version_requirements: !ruby/object:Gem::Requirement
|
106
|
-
none: false
|
107
108
|
requirements:
|
108
|
-
- -
|
109
|
+
- - ">="
|
109
110
|
- !ruby/object:Gem::Version
|
110
111
|
version: '0'
|
111
|
-
description:
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
of
|
112
|
+
description: |
|
113
|
+
Lingo is an open source indexing system for research and teachings.
|
114
|
+
The main functions of Lingo are:
|
115
|
+
|
116
|
+
* identification of (i.e. reduction to) basic word form by means of
|
117
|
+
dictionaries and suffix lists
|
118
|
+
* algorithmic decomposition
|
119
|
+
* dictionary-based synonymisation and identification of phrases
|
120
|
+
* generic identification of phrases/word sequences based on patterns
|
121
|
+
of word classes
|
116
122
|
email:
|
117
123
|
- lingo@vorhauer.de
|
118
|
-
- jens.wille@
|
124
|
+
- jens.wille@gmail.com
|
119
125
|
executables:
|
120
|
-
- lingosrv
|
121
126
|
- lingo
|
122
127
|
- lingoctl
|
128
|
+
- lingosrv
|
123
129
|
- lingoweb
|
124
130
|
extensions: []
|
125
131
|
extra_rdoc_files:
|
@@ -127,172 +133,172 @@ extra_rdoc_files:
|
|
127
133
|
- COPYING
|
128
134
|
- ChangeLog
|
129
135
|
files:
|
130
|
-
- lib/lingo
|
131
|
-
- lib/lingo/version.rb
|
132
|
-
- lib/lingo/cli.rb
|
133
|
-
- lib/lingo/srv.rb
|
134
|
-
- lib/lingo/web.rb
|
136
|
+
- lib/lingo.rb
|
135
137
|
- lib/lingo/agenda_item.rb
|
136
|
-
- lib/lingo/
|
137
|
-
- lib/lingo/
|
138
|
-
- lib/lingo/language/word.rb
|
139
|
-
- lib/lingo/language/char.rb
|
140
|
-
- lib/lingo/language/lexical.rb
|
141
|
-
- lib/lingo/language/token.rb
|
142
|
-
- lib/lingo/language/grammar.rb
|
143
|
-
- lib/lingo/language/dictionary.rb
|
144
|
-
- lib/lingo/language/word_form.rb
|
145
|
-
- lib/lingo/error.rb
|
146
|
-
- lib/lingo/database/source.rb
|
147
|
-
- lib/lingo/database/source/key_value.rb
|
148
|
-
- lib/lingo/database/source/multi_value.rb
|
149
|
-
- lib/lingo/database/source/single_word.rb
|
150
|
-
- lib/lingo/database/source/word_class.rb
|
151
|
-
- lib/lingo/database/source/multi_key.rb
|
152
|
-
- lib/lingo/database/crypter.rb
|
153
|
-
- lib/lingo/database/sdbm_store.rb
|
154
|
-
- lib/lingo/database/libcdb_store.rb
|
155
|
-
- lib/lingo/database/hash_store.rb
|
156
|
-
- lib/lingo/database/show_progress.rb
|
157
|
-
- lib/lingo/database/gdbm_store.rb
|
158
|
-
- lib/lingo/call.rb
|
138
|
+
- lib/lingo/app.rb
|
139
|
+
- lib/lingo/attendee.rb
|
159
140
|
- lib/lingo/attendee/abbreviator.rb
|
160
|
-
- lib/lingo/attendee/text_writer.rb
|
161
141
|
- lib/lingo/attendee/debugger.rb
|
162
|
-
- lib/lingo/attendee/dehyphenizer.rb
|
163
|
-
- lib/lingo/attendee/stemmer/porter.rb
|
164
142
|
- lib/lingo/attendee/decomposer.rb
|
143
|
+
- lib/lingo/attendee/dehyphenizer.rb
|
144
|
+
- lib/lingo/attendee/formatter.rb
|
165
145
|
- lib/lingo/attendee/multi_worder.rb
|
166
|
-
- lib/lingo/attendee/tokenizer.rb
|
167
|
-
- lib/lingo/attendee/word_searcher.rb
|
168
|
-
- lib/lingo/attendee/variator.rb
|
169
146
|
- lib/lingo/attendee/noneword_filter.rb
|
170
|
-
- lib/lingo/attendee/sequencer.rb
|
171
147
|
- lib/lingo/attendee/object_filter.rb
|
148
|
+
- lib/lingo/attendee/sequencer.rb
|
172
149
|
- lib/lingo/attendee/stemmer.rb
|
150
|
+
- lib/lingo/attendee/stemmer/porter.rb
|
173
151
|
- lib/lingo/attendee/synonymer.rb
|
174
152
|
- lib/lingo/attendee/text_reader.rb
|
175
|
-
- lib/lingo/attendee/
|
153
|
+
- lib/lingo/attendee/text_writer.rb
|
154
|
+
- lib/lingo/attendee/tokenizer.rb
|
155
|
+
- lib/lingo/attendee/variator.rb
|
176
156
|
- lib/lingo/attendee/vector_filter.rb
|
177
|
-
- lib/lingo/
|
157
|
+
- lib/lingo/attendee/word_searcher.rb
|
158
|
+
- lib/lingo/buffered_attendee.rb
|
159
|
+
- lib/lingo/call.rb
|
160
|
+
- lib/lingo/cli.rb
|
161
|
+
- lib/lingo/config.rb
|
162
|
+
- lib/lingo/ctl.rb
|
178
163
|
- lib/lingo/database.rb
|
179
|
-
- lib/lingo/
|
180
|
-
- lib/lingo/
|
164
|
+
- lib/lingo/database/crypter.rb
|
165
|
+
- lib/lingo/database/gdbm_store.rb
|
166
|
+
- lib/lingo/database/hash_store.rb
|
167
|
+
- lib/lingo/database/libcdb_store.rb
|
168
|
+
- lib/lingo/database/sdbm_store.rb
|
169
|
+
- lib/lingo/database/show_progress.rb
|
170
|
+
- lib/lingo/database/source.rb
|
171
|
+
- lib/lingo/database/source/key_value.rb
|
172
|
+
- lib/lingo/database/source/multi_key.rb
|
173
|
+
- lib/lingo/database/source/multi_value.rb
|
174
|
+
- lib/lingo/database/source/single_word.rb
|
175
|
+
- lib/lingo/database/source/word_class.rb
|
181
176
|
- lib/lingo/debug.rb
|
182
|
-
- lib/lingo/
|
183
|
-
- lib/lingo/
|
184
|
-
- lib/lingo.rb
|
185
|
-
-
|
177
|
+
- lib/lingo/error.rb
|
178
|
+
- lib/lingo/language.rb
|
179
|
+
- lib/lingo/language/char.rb
|
180
|
+
- lib/lingo/language/dictionary.rb
|
181
|
+
- lib/lingo/language/grammar.rb
|
182
|
+
- lib/lingo/language/lexical.rb
|
183
|
+
- lib/lingo/language/lexical_hash.rb
|
184
|
+
- lib/lingo/language/token.rb
|
185
|
+
- lib/lingo/language/word.rb
|
186
|
+
- lib/lingo/language/word_form.rb
|
187
|
+
- lib/lingo/show_progress.rb
|
188
|
+
- lib/lingo/srv.rb
|
189
|
+
- lib/lingo/version.rb
|
190
|
+
- lib/lingo/web.rb
|
186
191
|
- bin/lingo
|
187
192
|
- bin/lingoctl
|
193
|
+
- bin/lingosrv
|
188
194
|
- bin/lingoweb
|
189
195
|
- lingo.rb
|
190
|
-
- lingo.cfg
|
191
196
|
- lingo-call.cfg
|
197
|
+
- lingo.cfg
|
192
198
|
- lir.cfg
|
193
199
|
- de.lang
|
194
200
|
- en.lang
|
195
201
|
- ru.lang
|
196
|
-
- de/lingo-dic.txt
|
197
202
|
- de/lingo-abk.txt
|
203
|
+
- de/lingo-dic.txt
|
198
204
|
- de/lingo-mul.txt
|
199
205
|
- de/lingo-syn.txt
|
200
|
-
- de/user-dic.txt
|
201
|
-
- de/test_syn2.txt
|
202
|
-
- de/test_mul2.txt
|
203
|
-
- de/test_mul.txt
|
204
|
-
- de/test_syn.txt
|
205
206
|
- de/test_dic.txt
|
207
|
+
- de/test_mul.txt
|
208
|
+
- de/test_mul2.txt
|
206
209
|
- de/test_singleword.txt
|
210
|
+
- de/test_syn.txt
|
211
|
+
- de/test_syn2.txt
|
212
|
+
- de/user-dic.txt
|
207
213
|
- en/lingo-dic.txt
|
214
|
+
- en/lingo-irr.txt
|
208
215
|
- en/lingo-mul.txt
|
209
|
-
- en/lingo-wdn.txt
|
210
216
|
- en/lingo-syn.txt
|
211
|
-
- en/lingo-
|
217
|
+
- en/lingo-wdn.txt
|
212
218
|
- en/user-dic.txt
|
213
219
|
- ru/lingo-dic.txt
|
214
220
|
- ru/lingo-mul.txt
|
215
221
|
- ru/lingo-syn.txt
|
216
|
-
- txt/artikel.txt
|
217
222
|
- txt/artikel-en.txt
|
218
223
|
- txt/artikel-ru.txt
|
224
|
+
- txt/artikel.txt
|
219
225
|
- txt/lir.txt
|
220
|
-
- lib/lingo/srv/lingosrv.cfg
|
221
226
|
- lib/lingo/srv/config.ru
|
227
|
+
- lib/lingo/srv/lingosrv.cfg
|
228
|
+
- lib/lingo/web/config.ru
|
222
229
|
- lib/lingo/web/lingoweb.cfg
|
223
230
|
- lib/lingo/web/public/lingo.png
|
224
231
|
- lib/lingo/web/public/lingoweb.css
|
225
232
|
- lib/lingo/web/views/index.erb
|
226
|
-
- lib/lingo/web/config.ru
|
227
233
|
- COPYING
|
228
234
|
- ChangeLog
|
229
|
-
- Rakefile
|
230
235
|
- README
|
236
|
+
- Rakefile
|
231
237
|
- spec/spec_helper.rb
|
232
|
-
- .rspec
|
233
|
-
- test/
|
234
|
-
- test/ref/lir.mul
|
235
|
-
- test/ref/lir.seq
|
236
|
-
- test/ref/artikel.mul
|
237
|
-
- test/ref/lir.syn
|
238
|
-
- test/ref/artikel.ver
|
239
|
-
- test/ref/artikel.seq
|
240
|
-
- test/ref/artikel.non
|
241
|
-
- test/ref/lir.non
|
242
|
-
- test/ref/artikel.vec
|
243
|
-
- test/ref/lir.vec
|
244
|
-
- test/ref/artikel.syn
|
245
|
-
- test/lir2.txt
|
246
|
-
- test/ts_database.rb
|
247
|
-
- test/test_helper.rb
|
248
|
-
- test/attendee/ts_object_filter.rb
|
249
|
-
- test/attendee/ts_vector_filter.rb
|
250
|
-
- test/attendee/ts_synonymer.rb
|
238
|
+
- ".rspec"
|
239
|
+
- test/attendee/ts_abbreviator.rb
|
251
240
|
- test/attendee/ts_decomposer.rb
|
252
|
-
- test/attendee/
|
241
|
+
- test/attendee/ts_multi_worder.rb
|
253
242
|
- test/attendee/ts_noneword_filter.rb
|
254
|
-
- test/attendee/
|
243
|
+
- test/attendee/ts_object_filter.rb
|
255
244
|
- test/attendee/ts_sequencer.rb
|
256
|
-
- test/attendee/
|
245
|
+
- test/attendee/ts_stemmer.rb
|
246
|
+
- test/attendee/ts_synonymer.rb
|
257
247
|
- test/attendee/ts_text_reader.rb
|
258
|
-
- test/attendee/
|
248
|
+
- test/attendee/ts_text_writer.rb
|
249
|
+
- test/attendee/ts_tokenizer.rb
|
259
250
|
- test/attendee/ts_variator.rb
|
260
|
-
- test/attendee/
|
251
|
+
- test/attendee/ts_vector_filter.rb
|
261
252
|
- test/attendee/ts_word_searcher.rb
|
253
|
+
- test/lir.txt
|
262
254
|
- test/lir.vec
|
255
|
+
- test/lir2.txt
|
263
256
|
- test/mul.txt
|
264
|
-
- test/
|
257
|
+
- test/ref/artikel.mul
|
258
|
+
- test/ref/artikel.non
|
259
|
+
- test/ref/artikel.seq
|
260
|
+
- test/ref/artikel.syn
|
261
|
+
- test/ref/artikel.vec
|
262
|
+
- test/ref/artikel.ven
|
263
|
+
- test/ref/artikel.ver
|
264
|
+
- test/ref/lir.mul
|
265
|
+
- test/ref/lir.non
|
266
|
+
- test/ref/lir.seq
|
267
|
+
- test/ref/lir.syn
|
268
|
+
- test/ref/lir.vec
|
269
|
+
- test/test_helper.rb
|
270
|
+
- test/ts_database.rb
|
265
271
|
- test/ts_language.rb
|
266
272
|
homepage: http://lex-lingo.de
|
267
|
-
licenses:
|
273
|
+
licenses:
|
274
|
+
- AGPL
|
275
|
+
metadata: {}
|
268
276
|
post_install_message:
|
269
277
|
rdoc_options:
|
270
|
-
- --charset
|
278
|
+
- "--charset"
|
271
279
|
- UTF-8
|
272
|
-
- --line-numbers
|
273
|
-
- --all
|
274
|
-
- --title
|
275
|
-
- lingo Application documentation (v1.8.
|
276
|
-
- --main
|
280
|
+
- "--line-numbers"
|
281
|
+
- "--all"
|
282
|
+
- "--title"
|
283
|
+
- lingo Application documentation (v1.8.4)
|
284
|
+
- "--main"
|
277
285
|
- README
|
278
286
|
require_paths:
|
279
287
|
- lib
|
280
288
|
required_ruby_version: !ruby/object:Gem::Requirement
|
281
|
-
none: false
|
282
289
|
requirements:
|
283
|
-
- -
|
290
|
+
- - ">="
|
284
291
|
- !ruby/object:Gem::Version
|
285
292
|
version: 1.9.2
|
286
293
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
287
|
-
none: false
|
288
294
|
requirements:
|
289
|
-
- -
|
295
|
+
- - ">="
|
290
296
|
- !ruby/object:Gem::Version
|
291
297
|
version: '0'
|
292
298
|
requirements: []
|
293
299
|
rubyforge_project:
|
294
|
-
rubygems_version: 1.
|
300
|
+
rubygems_version: 2.1.9
|
295
301
|
signing_key:
|
296
|
-
specification_version:
|
302
|
+
specification_version: 4
|
297
303
|
summary: The full-featured automatic indexing system
|
298
304
|
test_files: []
|