lingo 1.8.5 → 1.8.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +25 -0
- data/README +7 -5
- data/Rakefile +58 -55
- data/{lingo-call.cfg → config/lingo-call.cfg} +1 -1
- data/{lingo.cfg → config/lingo.cfg} +10 -2
- data/{lir.cfg → config/lir.cfg} +10 -2
- data/{de → dict/de}/lingo-abk.txt +0 -0
- data/{de → dict/de}/lingo-dic.txt +0 -0
- data/{de → dict/de}/lingo-mul.txt +0 -0
- data/{de → dict/de}/lingo-syn.txt +0 -0
- data/{de → dict/de}/test_dic.txt +0 -0
- data/{de → dict/de}/test_gen.txt +0 -0
- data/{de → dict/de}/test_mu2.txt +0 -0
- data/{de → dict/de}/test_mul.txt +0 -0
- data/{de → dict/de}/test_sgw.txt +0 -0
- data/{de → dict/de}/test_syn.txt +0 -0
- data/{de → dict/de}/user-dic.txt +0 -0
- data/{en → dict/en}/lingo-dic.txt +0 -0
- data/{en → dict/en}/lingo-irr.txt +0 -0
- data/{en → dict/en}/lingo-mul.txt +0 -0
- data/{en → dict/en}/lingo-syn.txt +0 -0
- data/{en → dict/en}/lingo-wdn.txt +0 -0
- data/{en → dict/en}/user-dic.txt +0 -0
- data/{ru → dict/ru}/lingo-dic.txt +0 -0
- data/{ru → dict/ru}/lingo-mul.txt +0 -0
- data/{ru → dict/ru}/lingo-syn.txt +0 -0
- data/{ru → dict/ru}/user-dic.txt +0 -0
- data/{de.lang → lang/de.lang} +1 -1
- data/{en.lang → lang/en.lang} +0 -0
- data/{ru.lang → lang/ru.lang} +0 -0
- data/lib/lingo.rb +14 -15
- data/lib/lingo/app.rb +4 -2
- data/lib/lingo/attendee.rb +23 -43
- data/lib/lingo/attendee/abbreviator.rb +5 -5
- data/lib/lingo/attendee/debugger.rb +39 -12
- data/lib/lingo/attendee/decomposer.rb +3 -4
- data/lib/lingo/attendee/dehyphenizer.rb +4 -4
- data/lib/lingo/attendee/formatter.rb +1 -3
- data/lib/lingo/attendee/multi_worder.rb +3 -4
- data/lib/lingo/attendee/noneword_filter.rb +8 -12
- data/lib/lingo/attendee/object_filter.rb +6 -3
- data/lib/lingo/attendee/sequencer.rb +5 -5
- data/lib/lingo/attendee/stemmer.rb +3 -2
- data/lib/lingo/attendee/synonymer.rb +3 -4
- data/lib/lingo/attendee/text_reader.rb +39 -38
- data/lib/lingo/attendee/text_writer.rb +10 -10
- data/lib/lingo/attendee/tokenizer.rb +63 -33
- data/lib/lingo/attendee/variator.rb +3 -7
- data/lib/lingo/attendee/vector_filter.rb +132 -65
- data/lib/lingo/attendee/word_searcher.rb +5 -3
- data/lib/lingo/buffered_attendee.rb +1 -3
- data/lib/lingo/call.rb +4 -3
- data/lib/lingo/cli.rb +5 -1
- data/lib/lingo/config.rb +11 -5
- data/lib/lingo/ctl.rb +3 -3
- data/lib/lingo/database.rb +3 -1
- data/lib/lingo/database/crypter.rb +1 -3
- data/lib/lingo/database/source.rb +3 -1
- data/lib/lingo/database/source/key_value.rb +3 -1
- data/lib/lingo/database/source/multi_key.rb +3 -1
- data/lib/lingo/database/source/multi_value.rb +3 -1
- data/lib/lingo/database/source/single_word.rb +3 -1
- data/lib/lingo/database/source/word_class.rb +3 -1
- data/lib/lingo/debug.rb +5 -5
- data/lib/lingo/{agenda_item.rb → deferred_attendee.rb} +21 -12
- data/lib/lingo/error.rb +1 -1
- data/lib/lingo/language.rb +1 -9
- data/lib/lingo/language/dictionary.rb +2 -17
- data/lib/lingo/language/grammar.rb +10 -10
- data/lib/lingo/language/lexical.rb +2 -0
- data/lib/lingo/language/lexical_hash.rb +2 -0
- data/lib/lingo/language/token.rb +17 -3
- data/lib/lingo/language/word.rb +13 -5
- data/lib/lingo/language/word_form.rb +5 -3
- data/lib/lingo/progress.rb +2 -2
- data/lib/lingo/srv.rb +1 -1
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web.rb +1 -1
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/test/attendee/ts_abbreviator.rb +4 -2
- data/test/attendee/ts_multi_worder.rb +81 -88
- data/test/attendee/ts_noneword_filter.rb +2 -2
- data/test/attendee/ts_object_filter.rb +2 -2
- data/test/attendee/ts_sequencer.rb +40 -20
- data/test/attendee/ts_stemmer.rb +52 -26
- data/test/attendee/ts_text_reader.rb +75 -56
- data/test/attendee/ts_text_writer.rb +6 -4
- data/test/attendee/ts_tokenizer.rb +304 -193
- data/test/attendee/ts_vector_filter.rb +242 -9
- data/test/ref/artikel.non +3 -0
- data/test/ref/artikel.vec +1 -4
- data/test/ref/artikel.vef +940 -0
- data/test/ref/artikel.ven +0 -3
- data/test/ref/artikel.ver +0 -3
- data/test/ref/artikel.vet +2580 -0
- data/test/ref/lir.non +34 -31
- data/test/ref/lir.seq +14 -15
- data/test/ref/lir.vec +37 -37
- data/test/ref/lir.vef +329 -0
- data/test/ref/lir.ven +329 -0
- data/test/ref/lir.ver +329 -0
- data/test/ref/lir.vet +329 -0
- data/test/test_helper.rb +29 -16
- data/test/ts_language.rb +6 -47
- metadata +74 -87
- data/lingo.rb +0 -29
- data/spec/spec_helper.rb +0 -5
data/test/test_helper.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'test/unit'
|
4
4
|
require 'lingo'
|
5
5
|
|
6
|
-
class LingoTestCase <
|
6
|
+
class LingoTestCase < Test::Unit::TestCase
|
7
7
|
|
8
8
|
unless const_defined?(:TEST_FILE)
|
9
9
|
TEST_FILE = 'test/de/test.txt'
|
@@ -20,12 +20,18 @@ class LingoTestCase < Test::Unit::TestCase
|
|
20
20
|
[a || '', b || '', *c]
|
21
21
|
end
|
22
22
|
|
23
|
+
def li(t, o)
|
24
|
+
["#{t}\r\n", o]
|
25
|
+
end
|
26
|
+
|
23
27
|
def ai(t)
|
24
|
-
|
28
|
+
i = t.split('|')
|
29
|
+
i.unshift(i.shift.to_sym)
|
25
30
|
end
|
26
31
|
|
27
32
|
def tk(t)
|
28
|
-
|
33
|
+
a, b, *c = split(t, /\|(?=[A-Z\d])/)
|
34
|
+
Lingo::Language::Token.new(a, b, *c.map(&:to_i))
|
29
35
|
end
|
30
36
|
|
31
37
|
def lx(t)
|
@@ -55,8 +61,15 @@ class AttendeeTestCase < LingoTestCase
|
|
55
61
|
@lingo.reset
|
56
62
|
|
57
63
|
list = [{ @attendee => cfg }]
|
58
|
-
|
59
|
-
list.
|
64
|
+
|
65
|
+
list.unshift('TestSpooler' => {
|
66
|
+
'out' => 'input',
|
67
|
+
'input' => input,
|
68
|
+
'pos' => @attendee == 'Tokenizer' }) if input
|
69
|
+
|
70
|
+
list.push('TestDumper' => {
|
71
|
+
'in' => 'output',
|
72
|
+
'output' => output = [] }) if expect
|
60
73
|
|
61
74
|
@lingo.invite(list)
|
62
75
|
@lingo.start
|
@@ -74,32 +87,31 @@ class Lingo
|
|
74
87
|
|
75
88
|
class TestSpooler < self
|
76
89
|
|
77
|
-
protected
|
78
|
-
|
79
90
|
def init
|
80
|
-
@input = get_key('input')
|
91
|
+
@input, @pos = get_key('input'), get_key('pos', nil) && 0
|
81
92
|
end
|
82
93
|
|
83
|
-
def control(cmd
|
84
|
-
|
94
|
+
def control(cmd)
|
95
|
+
if cmd == :TALK
|
96
|
+
@input.each { |i| i.is_a?(Array) ? command(*i) :
|
97
|
+
@pos ? forward(i, @pos += i.bytesize) : forward(i) }
|
98
|
+
end
|
85
99
|
end
|
86
100
|
|
87
101
|
end
|
88
102
|
|
89
103
|
class TestDumper < self
|
90
104
|
|
91
|
-
protected
|
92
|
-
|
93
105
|
def init
|
94
106
|
@output = get_key('output')
|
95
107
|
end
|
96
108
|
|
97
|
-
def control(
|
98
|
-
@output
|
109
|
+
def control(*args)
|
110
|
+
@output.push(args)
|
99
111
|
end
|
100
112
|
|
101
|
-
def process(obj)
|
102
|
-
@output
|
113
|
+
def process(obj, *rest)
|
114
|
+
@output.push(rest.empty? ? obj : rest.unshift(obj))
|
103
115
|
end
|
104
116
|
|
105
117
|
end
|
@@ -115,4 +127,5 @@ class Lingo
|
|
115
127
|
end
|
116
128
|
|
117
129
|
end
|
130
|
+
|
118
131
|
end
|
data/test/ts_language.rb
CHANGED
@@ -23,8 +23,6 @@ class TestLexicalHash < LingoTestCase
|
|
23
23
|
$stderr = old_stderr
|
24
24
|
end
|
25
25
|
|
26
|
-
# TODO: Crypt testen...
|
27
|
-
|
28
26
|
def test_cache
|
29
27
|
lh('sys-dic') { |ds|
|
30
28
|
assert_equal([lx('regen|s|m'), lx('regen|s|n'), lx('regen|v'), lx('rege|a')], ds['regen'])
|
@@ -38,11 +36,9 @@ class TestLexicalHash < LingoTestCase
|
|
38
36
|
|
39
37
|
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
40
38
|
|
41
|
-
# Keine Store-Datei vorhanden, nur Text vorhanden
|
42
39
|
File.delete(*Dir["#{Lingo.find(:store, txt_file)}.*"])
|
43
40
|
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
44
41
|
|
45
|
-
# Store vorhanden, aber Text ist neuer
|
46
42
|
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
47
43
|
end
|
48
44
|
|
@@ -59,8 +55,8 @@ class TestLexicalHash < LingoTestCase
|
|
59
55
|
assert_equal([lx('abelscher ring ohne nullteiler|m')], ds['abelscher ring ohne nullteiler'])
|
60
56
|
assert_equal([4], ds['abelscher ring ohne'])
|
61
57
|
assert_equal([lx('alleinreisende frau|m')], ds['alleinreisend frau'])
|
62
|
-
assert_equal([lx('abschaltbarer leistungshalbleiter|m')], ds['
|
63
|
-
assert_equal(nil, ds['
|
58
|
+
assert_equal([lx('abschaltbarer leistungshalbleiter|m')], ds['abschaltbarer leistungshalbleiter'])
|
59
|
+
assert_equal(nil, ds['abschaltbar leistungshalbleiter'])
|
64
60
|
}
|
65
61
|
end
|
66
62
|
|
@@ -113,12 +109,6 @@ class TestDictionary < LingoTestCase
|
|
113
109
|
end
|
114
110
|
|
115
111
|
def test_params
|
116
|
-
# Keine Sprach-Konfiguration angegeben
|
117
|
-
#assert_raise(RuntimeError) {
|
118
|
-
# Lingo::Language::Dictionary.new({ 'source' => %w[sys-dic] }, @lingo)
|
119
|
-
#}
|
120
|
-
|
121
|
-
# Falsche Parameter angegeben (Pflichtparameter ohne Defaultwert)
|
122
112
|
assert_raise(ArgumentError) {
|
123
113
|
Lingo::Language::Dictionary.new({ 'course' => %w[sys-dic] }, @lingo)
|
124
114
|
}
|
@@ -142,39 +132,27 @@ class TestDictionary < LingoTestCase
|
|
142
132
|
|
143
133
|
def test_select_two_sources_mode_first
|
144
134
|
ld('source' => %w[sys-dic tst-dic], 'mode' => 'first') { |dic|
|
145
|
-
# in keiner Quelle vorhanden
|
146
135
|
assert_equal([], dic.select('hasennasen'))
|
147
|
-
# nur in erster Quelle vorhanden
|
148
136
|
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
149
|
-
# nur in zweiter Quelle vorhanden
|
150
137
|
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
151
|
-
# in beiden Quellen vorhanden
|
152
138
|
assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n')], dic.select('a-dur'))
|
153
139
|
}
|
154
140
|
end
|
155
141
|
|
156
142
|
def test_select_two_sources_mode_first_flipped
|
157
143
|
ld('source' => %w[tst-dic sys-dic], 'mode' => 'first') { |dic|
|
158
|
-
# in keiner Quelle vorhanden
|
159
144
|
assert_equal([], dic.select('hasennasen'))
|
160
|
-
# nur in erster Quelle vorhanden
|
161
145
|
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
162
|
-
# nur in zweiter Quelle vorhanden
|
163
146
|
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
164
|
-
# in beiden Quellen vorhanden
|
165
147
|
assert_equal([lx('b-dur|s')], dic.select('a-dur'))
|
166
148
|
}
|
167
149
|
end
|
168
150
|
|
169
151
|
def test_select_two_sources_mode_all
|
170
152
|
ld('source' => %w[sys-dic tst-dic], 'mode' => 'all') { |dic|
|
171
|
-
# in keiner Quelle vorhanden
|
172
153
|
assert_equal([], dic.select('hasennasen'))
|
173
|
-
# nur in erster Quelle vorhanden
|
174
154
|
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
175
|
-
# nur in zweiter Quelle vorhanden
|
176
155
|
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
177
|
-
# in beiden Quellen vorhanden
|
178
156
|
assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n'), lx('b-dur|s')], dic.select('a-dur'))
|
179
157
|
assert_equal([lx('aas|s|n'), lx('aas|s')], dic.select('aas'))
|
180
158
|
}
|
@@ -182,15 +160,11 @@ class TestDictionary < LingoTestCase
|
|
182
160
|
|
183
161
|
def test_select_two_sources_mode_default
|
184
162
|
ld('source' => %w[sys-dic tst-dic]) { |dic|
|
185
|
-
# in keiner Quelle vorhanden
|
186
163
|
assert_equal([], dic.select('hasennasen'))
|
187
|
-
# nur in erster Quelle vorhanden
|
188
164
|
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
189
|
-
# nur in zweiter Quelle vorhanden
|
190
165
|
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
191
166
|
assert_equal([lx('wirkungsort|s'), lx('wirkung|s+'), lx('ort|s+')], dic.select('wirkungsort'))
|
192
167
|
assert_equal([lx('zettelkatalog|k'), lx('zettel|s+'), lx('katalog|s+')], dic.select('zettelkatalog'))
|
193
|
-
# in beiden Quellen vorhanden
|
194
168
|
assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n'), lx('b-dur|s')], dic.select('a-dur'))
|
195
169
|
assert_equal([lx('aas|s|n'), lx('aas|s')], dic.select('aas'))
|
196
170
|
}
|
@@ -207,7 +181,7 @@ class TestDictionary < LingoTestCase
|
|
207
181
|
|
208
182
|
def test_infix_lexicals
|
209
183
|
ld('source' => %w[sys-dic]) { |dic|
|
210
|
-
assert_equal(
|
184
|
+
assert_equal([lx('information|f')], ax(dic, 'informations', :infix))
|
211
185
|
}
|
212
186
|
end
|
213
187
|
|
@@ -222,7 +196,7 @@ class TestDictionary < LingoTestCase
|
|
222
196
|
|
223
197
|
def test_select_with_infix
|
224
198
|
ld('source' => %w[sys-dic]) { |dic|
|
225
|
-
assert_equal(
|
199
|
+
assert_equal([lx('information|f')], ax(dic, 'informations', :infix))
|
226
200
|
}
|
227
201
|
end
|
228
202
|
|
@@ -250,10 +224,6 @@ class TestGrammar < LingoTestCase
|
|
250
224
|
@lingo = Lingo.new
|
251
225
|
end
|
252
226
|
|
253
|
-
def test_params
|
254
|
-
# Die gleichen Fälle wie bei Dictionary, daher nicht notwendig
|
255
|
-
end
|
256
|
-
|
257
227
|
def test_cache
|
258
228
|
lg { |gra|
|
259
229
|
assert_equal(
|
@@ -294,7 +264,7 @@ class TestGrammar < LingoTestCase
|
|
294
264
|
)
|
295
265
|
|
296
266
|
assert_equal(
|
297
|
-
wd('benutzerforschung|KOM', 'benutzerforschung|k', '
|
267
|
+
wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzer|s+', 'forschung|s+'),
|
298
268
|
gra.find_compound('benutzerforschung')
|
299
269
|
)
|
300
270
|
|
@@ -318,64 +288,53 @@ class TestGrammar < LingoTestCase
|
|
318
288
|
gra.find_compound('titelbestandsbestände')
|
319
289
|
)
|
320
290
|
|
321
|
-
# hinterer Teil ist ein Wort mit Suffix
|
322
291
|
assert_equal(
|
323
292
|
wd('hasenbraten|KOM', 'hasenbraten|k', 'hase|s+', 'braten|v+'),
|
324
293
|
gra.find_compound('hasenbraten')
|
325
294
|
)
|
326
295
|
|
327
|
-
# hinterer Teil ist ein Wort mit Infix ohne Schwanz
|
328
296
|
assert_equal(
|
329
297
|
wd('nasenlaufen|KOM', 'nasenlaufen|k', 'nase|s+', 'laufen|v+'),
|
330
298
|
gra.find_compound('nasenlaufen')
|
331
299
|
)
|
332
300
|
|
333
|
-
# hinterer Teil ist ein Wort mit Infix mit Schwanz
|
334
301
|
assert_equal(
|
335
302
|
wd('nasenlaufens|KOM', 'nasenlaufen|k', 'nase|s+', 'laufen|v+'),
|
336
303
|
gra.find_compound('nasenlaufens')
|
337
304
|
)
|
338
305
|
|
339
|
-
# hinterer Teil ist ein Kompositum nach Bindestrich
|
340
306
|
assert_equal(
|
341
307
|
wd('arrafat-nachfolgebedarf|KOM', 'arrafat-nachfolgebedarf|k', 'arrafat|x+', 'nachfolge|s+', 'bedarf|s+'),
|
342
308
|
gra.find_compound('arrafat-nachfolgebedarf')
|
343
309
|
)
|
344
310
|
|
345
|
-
# hinterer Teil ist ein TakeItAsIs nach Bindestrich
|
346
311
|
assert_equal(
|
347
312
|
wd('nachfolge-arrafat|KOM', 'nachfolge-arrafat|k', 'nachfolge|s+', 'arrafat|x+'),
|
348
313
|
gra.find_compound('nachfolge-arrafat')
|
349
314
|
)
|
350
315
|
|
351
|
-
# vorderer Teil ist ein Wort mit Suffix => siehe Hasenbraten
|
352
|
-
# vorderer Teil ist ein Kompositum
|
353
316
|
assert_equal(
|
354
317
|
wd('morgenonkelmantel|KOM', 'morgenonkelmantel|k', 'morgen|w+', 'morgen|s+', 'onkel|s+', 'mantel|s+'),
|
355
318
|
gra.find_compound('morgenonkelmantel')
|
356
319
|
)
|
357
320
|
|
358
|
-
# vorderer Teil ist ein TakeItAsIs vor Bindestrich / bindestrichversion
|
359
321
|
assert_equal(
|
360
322
|
wd('arrafat-nachfolger|KOM', 'arrafat-nachfolger|k', 'arrafat|x+', 'nachfolger|s+'),
|
361
323
|
gra.find_compound('arrafat-nachfolger')
|
362
324
|
)
|
363
325
|
|
364
|
-
# bindestrichversion zwei-teilig
|
365
326
|
assert_equal(
|
366
327
|
wd('cd-rom-technologie|KOM', 'cd-rom-technologie|k', 'cd-rom|s+|f', 'cd-rom|s+|m', 'technologie|s+|f'),
|
367
328
|
gra.find_compound('cd-rom-technologie')
|
368
329
|
)
|
369
330
|
|
370
|
-
# bindestrichversion drei-teilig
|
371
331
|
assert_equal(
|
372
332
|
wd('albert-ludwigs-universität|KOM', 'albert-ludwigs-universität|k', 'albert|e+', 'ludwig|e+', 'universität|s+'),
|
373
333
|
gra.find_compound('albert-ludwigs-universität')
|
374
334
|
)
|
375
335
|
|
376
|
-
# normal mit suggestion
|
377
336
|
assert_equal(
|
378
|
-
wd('benutzerforschung|KOM', 'benutzerforschung|k', '
|
337
|
+
wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzer|s+', 'forschung|s+'),
|
379
338
|
gra.find_compound('benutzerforschung')
|
380
339
|
)
|
381
340
|
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lingo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.8.
|
4
|
+
version: 1.8.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Vorhauer
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2015-02-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: cyclops
|
@@ -17,34 +17,28 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: '0.
|
21
|
-
- - ">="
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.4
|
20
|
+
version: '0.1'
|
24
21
|
type: :runtime
|
25
22
|
prerelease: false
|
26
23
|
version_requirements: !ruby/object:Gem::Requirement
|
27
24
|
requirements:
|
28
25
|
- - "~>"
|
29
26
|
- !ruby/object:Gem::Version
|
30
|
-
version: '0.
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 0.0.4
|
27
|
+
version: '0.1'
|
34
28
|
- !ruby/object:Gem::Dependency
|
35
29
|
name: nuggets
|
36
30
|
requirement: !ruby/object:Gem::Requirement
|
37
31
|
requirements:
|
38
32
|
- - "~>"
|
39
33
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1.
|
34
|
+
version: '1.1'
|
41
35
|
type: :runtime
|
42
36
|
prerelease: false
|
43
37
|
version_requirements: !ruby/object:Gem::Requirement
|
44
38
|
requirements:
|
45
39
|
- - "~>"
|
46
40
|
- !ruby/object:Gem::Version
|
47
|
-
version: '1.
|
41
|
+
version: '1.1'
|
48
42
|
- !ruby/object:Gem::Dependency
|
49
43
|
name: rubyzip
|
50
44
|
requirement: !ruby/object:Gem::Requirement
|
@@ -119,16 +113,22 @@ dependencies:
|
|
119
113
|
name: hen
|
120
114
|
requirement: !ruby/object:Gem::Requirement
|
121
115
|
requirements:
|
116
|
+
- - "~>"
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0.8'
|
122
119
|
- - ">="
|
123
120
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
121
|
+
version: 0.8.1
|
125
122
|
type: :development
|
126
123
|
prerelease: false
|
127
124
|
version_requirements: !ruby/object:Gem::Requirement
|
128
125
|
requirements:
|
126
|
+
- - "~>"
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '0.8'
|
129
129
|
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version:
|
131
|
+
version: 0.8.1
|
132
132
|
- !ruby/object:Gem::Dependency
|
133
133
|
name: rake
|
134
134
|
requirement: !ruby/object:Gem::Requirement
|
@@ -144,7 +144,7 @@ dependencies:
|
|
144
144
|
- !ruby/object:Gem::Version
|
145
145
|
version: '0'
|
146
146
|
- !ruby/object:Gem::Dependency
|
147
|
-
name:
|
147
|
+
name: test-unit
|
148
148
|
requirement: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
150
|
- - ">="
|
@@ -189,27 +189,34 @@ files:
|
|
189
189
|
- bin/lingoctl
|
190
190
|
- bin/lingosrv
|
191
191
|
- bin/lingoweb
|
192
|
-
-
|
193
|
-
-
|
194
|
-
-
|
195
|
-
- de/lingo-
|
196
|
-
- de/lingo-
|
197
|
-
- de/
|
198
|
-
- de/
|
199
|
-
- de/
|
200
|
-
- de/
|
201
|
-
- de/
|
202
|
-
- de/
|
203
|
-
- de/
|
204
|
-
-
|
205
|
-
-
|
206
|
-
- en/lingo-
|
207
|
-
- en/lingo-
|
208
|
-
- en/lingo-
|
209
|
-
- en/lingo-
|
210
|
-
- en/
|
192
|
+
- config/lingo-call.cfg
|
193
|
+
- config/lingo.cfg
|
194
|
+
- config/lir.cfg
|
195
|
+
- dict/de/lingo-abk.txt
|
196
|
+
- dict/de/lingo-dic.txt
|
197
|
+
- dict/de/lingo-mul.txt
|
198
|
+
- dict/de/lingo-syn.txt
|
199
|
+
- dict/de/test_dic.txt
|
200
|
+
- dict/de/test_gen.txt
|
201
|
+
- dict/de/test_mu2.txt
|
202
|
+
- dict/de/test_mul.txt
|
203
|
+
- dict/de/test_sgw.txt
|
204
|
+
- dict/de/test_syn.txt
|
205
|
+
- dict/de/user-dic.txt
|
206
|
+
- dict/en/lingo-dic.txt
|
207
|
+
- dict/en/lingo-irr.txt
|
208
|
+
- dict/en/lingo-mul.txt
|
209
|
+
- dict/en/lingo-syn.txt
|
210
|
+
- dict/en/lingo-wdn.txt
|
211
|
+
- dict/en/user-dic.txt
|
212
|
+
- dict/ru/lingo-dic.txt
|
213
|
+
- dict/ru/lingo-mul.txt
|
214
|
+
- dict/ru/lingo-syn.txt
|
215
|
+
- dict/ru/user-dic.txt
|
216
|
+
- lang/de.lang
|
217
|
+
- lang/en.lang
|
218
|
+
- lang/ru.lang
|
211
219
|
- lib/lingo.rb
|
212
|
-
- lib/lingo/agenda_item.rb
|
213
220
|
- lib/lingo/app.rb
|
214
221
|
- lib/lingo/attendee.rb
|
215
222
|
- lib/lingo/attendee/abbreviator.rb
|
@@ -249,6 +256,7 @@ files:
|
|
249
256
|
- lib/lingo/database/source/single_word.rb
|
250
257
|
- lib/lingo/database/source/word_class.rb
|
251
258
|
- lib/lingo/debug.rb
|
259
|
+
- lib/lingo/deferred_attendee.rb
|
252
260
|
- lib/lingo/error.rb
|
253
261
|
- lib/lingo/language.rb
|
254
262
|
- lib/lingo/language/char.rb
|
@@ -271,16 +279,6 @@ files:
|
|
271
279
|
- lib/lingo/web/public/lingo.png
|
272
280
|
- lib/lingo/web/public/lingoweb.css
|
273
281
|
- lib/lingo/web/views/index.erb
|
274
|
-
- lingo-call.cfg
|
275
|
-
- lingo.cfg
|
276
|
-
- lingo.rb
|
277
|
-
- lir.cfg
|
278
|
-
- ru.lang
|
279
|
-
- ru/lingo-dic.txt
|
280
|
-
- ru/lingo-mul.txt
|
281
|
-
- ru/lingo-syn.txt
|
282
|
-
- ru/user-dic.txt
|
283
|
-
- spec/spec_helper.rb
|
284
282
|
- test/attendee/ts_abbreviator.rb
|
285
283
|
- test/attendee/ts_decomposer.rb
|
286
284
|
- test/attendee/ts_multi_worder.rb
|
@@ -304,13 +302,19 @@ files:
|
|
304
302
|
- test/ref/artikel.seq
|
305
303
|
- test/ref/artikel.syn
|
306
304
|
- test/ref/artikel.vec
|
305
|
+
- test/ref/artikel.vef
|
307
306
|
- test/ref/artikel.ven
|
308
307
|
- test/ref/artikel.ver
|
308
|
+
- test/ref/artikel.vet
|
309
309
|
- test/ref/lir.mul
|
310
310
|
- test/ref/lir.non
|
311
311
|
- test/ref/lir.seq
|
312
312
|
- test/ref/lir.syn
|
313
313
|
- test/ref/lir.vec
|
314
|
+
- test/ref/lir.vef
|
315
|
+
- test/ref/lir.ven
|
316
|
+
- test/ref/lir.ver
|
317
|
+
- test/ref/lir.vet
|
314
318
|
- test/test_helper.rb
|
315
319
|
- test/ts_database.rb
|
316
320
|
- test/ts_language.rb
|
@@ -324,51 +328,34 @@ licenses:
|
|
324
328
|
metadata: {}
|
325
329
|
post_install_message: |2+
|
326
330
|
|
327
|
-
lingo-1.8.
|
331
|
+
lingo-1.8.6 [2015-02-09]:
|
328
332
|
|
329
|
-
*
|
330
|
-
|
331
|
-
*
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
* Lingo::Attendee::
|
338
|
-
|
339
|
-
* Lingo::Attendee::TextReader
|
340
|
-
|
341
|
-
* Lingo::Attendee::
|
342
|
-
|
343
|
-
* Lingo::Attendee::
|
344
|
-
|
345
|
-
|
346
|
-
*
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
classes/genders.
|
352
|
-
* Lingo::Database::Source::WordClass supports compounds being encoded in the
|
353
|
-
dictionary (appending <tt>+</tt> to their parts' word classes is
|
354
|
-
recommended).
|
355
|
-
* Lingo::Database::Source removes leading and trailing whitespace from
|
356
|
-
dictionary lines.
|
357
|
-
* Lingo::Database::Crypter uses OpenSSL to encrypt/decrypt dictionaries.
|
358
|
-
Note: Can't decrypt dictionaries encrypted with the old scheme anymore.
|
359
|
-
* Lingo::Attendee::Tokenizer learned subset of MediaWiki syntax.
|
360
|
-
* Eliminated pathological behaviour of the +URLS+ rule in
|
361
|
-
Lingo::Attendee::Tokenizer.
|
362
|
-
* Fixed regression introduced in 1.8.2 where <tt>combine: all</tt> would no
|
363
|
-
longer work in Lingo::Attendee::MultiWorder.
|
364
|
-
* Updated and extended Russian dictionaries. (Yulia Dorokhova, Thomas Müller)
|
365
|
-
* +lingoctl+ no longer overwrites existing files without confirmation.
|
366
|
-
* +lingoctl+ learned +archive+ command.
|
367
|
-
* Dictionary cleanup.
|
333
|
+
* Lingo::Attendee::VectorFilter learned +pos+ option to print position and
|
334
|
+
byte offset with each word.
|
335
|
+
* Lingo::Attendee::VectorFilter learned +tfidf+ option to sort results based
|
336
|
+
on their tf–idf[https://en.wikipedia.org/wiki/Tf–idf] score; the document
|
337
|
+
frequencies are calculated over the "corpus" of all files processed during
|
338
|
+
a single program invocation.
|
339
|
+
* Lingo::Attendee::VectorFilter learned +tokens+ option to filter on
|
340
|
+
Lingo::Language::Token in addition to Lingo::Language::Word.
|
341
|
+
* Lingo::Attendee::VectorFilter no longer supports +debug+ (as well as
|
342
|
+
+prompt+ and +preamble+); use Lingo::Attendee::DebugFilter instead.
|
343
|
+
* Lingo::Attendee::TextReader no longer removes line endings; option +chomp+
|
344
|
+
is obsolete.
|
345
|
+
* Lingo::Attendee::TextReader passes byte offset to the following attendee.
|
346
|
+
* Lingo::Attendee::Tokenizer records token's byte offset.
|
347
|
+
* Lingo::Attendee::Tokenizer records token's sequence position.
|
348
|
+
* Lingo::Attendee::Tokenizer learned <tt>skip-tags</tt> option to skip over
|
349
|
+
specified tags' contents.
|
350
|
+
* Lingo::Attendee subclasses warn when invalid or obsolete options or names
|
351
|
+
are used.
|
352
|
+
* Changed German infix substitution +/en+ to +ch/chen+ in order to prevent
|
353
|
+
overly aggressive identifications.
|
354
|
+
* Internal refactoring and API changes.
|
368
355
|
|
369
356
|
rdoc_options:
|
370
357
|
- "--title"
|
371
|
-
- lingo Application documentation (v1.8.
|
358
|
+
- lingo Application documentation (v1.8.6)
|
372
359
|
- "--charset"
|
373
360
|
- UTF-8
|
374
361
|
- "--line-numbers"
|
@@ -389,7 +376,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
389
376
|
version: '0'
|
390
377
|
requirements: []
|
391
378
|
rubyforge_project:
|
392
|
-
rubygems_version: 2.4.
|
379
|
+
rubygems_version: 2.4.5
|
393
380
|
signing_key:
|
394
381
|
specification_version: 4
|
395
382
|
summary: The full-featured automatic indexing system
|