lingo 1.8.5 → 1.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +25 -0
- data/README +7 -5
- data/Rakefile +58 -55
- data/{lingo-call.cfg → config/lingo-call.cfg} +1 -1
- data/{lingo.cfg → config/lingo.cfg} +10 -2
- data/{lir.cfg → config/lir.cfg} +10 -2
- data/{de → dict/de}/lingo-abk.txt +0 -0
- data/{de → dict/de}/lingo-dic.txt +0 -0
- data/{de → dict/de}/lingo-mul.txt +0 -0
- data/{de → dict/de}/lingo-syn.txt +0 -0
- data/{de → dict/de}/test_dic.txt +0 -0
- data/{de → dict/de}/test_gen.txt +0 -0
- data/{de → dict/de}/test_mu2.txt +0 -0
- data/{de → dict/de}/test_mul.txt +0 -0
- data/{de → dict/de}/test_sgw.txt +0 -0
- data/{de → dict/de}/test_syn.txt +0 -0
- data/{de → dict/de}/user-dic.txt +0 -0
- data/{en → dict/en}/lingo-dic.txt +0 -0
- data/{en → dict/en}/lingo-irr.txt +0 -0
- data/{en → dict/en}/lingo-mul.txt +0 -0
- data/{en → dict/en}/lingo-syn.txt +0 -0
- data/{en → dict/en}/lingo-wdn.txt +0 -0
- data/{en → dict/en}/user-dic.txt +0 -0
- data/{ru → dict/ru}/lingo-dic.txt +0 -0
- data/{ru → dict/ru}/lingo-mul.txt +0 -0
- data/{ru → dict/ru}/lingo-syn.txt +0 -0
- data/{ru → dict/ru}/user-dic.txt +0 -0
- data/{de.lang → lang/de.lang} +1 -1
- data/{en.lang → lang/en.lang} +0 -0
- data/{ru.lang → lang/ru.lang} +0 -0
- data/lib/lingo.rb +14 -15
- data/lib/lingo/app.rb +4 -2
- data/lib/lingo/attendee.rb +23 -43
- data/lib/lingo/attendee/abbreviator.rb +5 -5
- data/lib/lingo/attendee/debugger.rb +39 -12
- data/lib/lingo/attendee/decomposer.rb +3 -4
- data/lib/lingo/attendee/dehyphenizer.rb +4 -4
- data/lib/lingo/attendee/formatter.rb +1 -3
- data/lib/lingo/attendee/multi_worder.rb +3 -4
- data/lib/lingo/attendee/noneword_filter.rb +8 -12
- data/lib/lingo/attendee/object_filter.rb +6 -3
- data/lib/lingo/attendee/sequencer.rb +5 -5
- data/lib/lingo/attendee/stemmer.rb +3 -2
- data/lib/lingo/attendee/synonymer.rb +3 -4
- data/lib/lingo/attendee/text_reader.rb +39 -38
- data/lib/lingo/attendee/text_writer.rb +10 -10
- data/lib/lingo/attendee/tokenizer.rb +63 -33
- data/lib/lingo/attendee/variator.rb +3 -7
- data/lib/lingo/attendee/vector_filter.rb +132 -65
- data/lib/lingo/attendee/word_searcher.rb +5 -3
- data/lib/lingo/buffered_attendee.rb +1 -3
- data/lib/lingo/call.rb +4 -3
- data/lib/lingo/cli.rb +5 -1
- data/lib/lingo/config.rb +11 -5
- data/lib/lingo/ctl.rb +3 -3
- data/lib/lingo/database.rb +3 -1
- data/lib/lingo/database/crypter.rb +1 -3
- data/lib/lingo/database/source.rb +3 -1
- data/lib/lingo/database/source/key_value.rb +3 -1
- data/lib/lingo/database/source/multi_key.rb +3 -1
- data/lib/lingo/database/source/multi_value.rb +3 -1
- data/lib/lingo/database/source/single_word.rb +3 -1
- data/lib/lingo/database/source/word_class.rb +3 -1
- data/lib/lingo/debug.rb +5 -5
- data/lib/lingo/{agenda_item.rb → deferred_attendee.rb} +21 -12
- data/lib/lingo/error.rb +1 -1
- data/lib/lingo/language.rb +1 -9
- data/lib/lingo/language/dictionary.rb +2 -17
- data/lib/lingo/language/grammar.rb +10 -10
- data/lib/lingo/language/lexical.rb +2 -0
- data/lib/lingo/language/lexical_hash.rb +2 -0
- data/lib/lingo/language/token.rb +17 -3
- data/lib/lingo/language/word.rb +13 -5
- data/lib/lingo/language/word_form.rb +5 -3
- data/lib/lingo/progress.rb +2 -2
- data/lib/lingo/srv.rb +1 -1
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web.rb +1 -1
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/test/attendee/ts_abbreviator.rb +4 -2
- data/test/attendee/ts_multi_worder.rb +81 -88
- data/test/attendee/ts_noneword_filter.rb +2 -2
- data/test/attendee/ts_object_filter.rb +2 -2
- data/test/attendee/ts_sequencer.rb +40 -20
- data/test/attendee/ts_stemmer.rb +52 -26
- data/test/attendee/ts_text_reader.rb +75 -56
- data/test/attendee/ts_text_writer.rb +6 -4
- data/test/attendee/ts_tokenizer.rb +304 -193
- data/test/attendee/ts_vector_filter.rb +242 -9
- data/test/ref/artikel.non +3 -0
- data/test/ref/artikel.vec +1 -4
- data/test/ref/artikel.vef +940 -0
- data/test/ref/artikel.ven +0 -3
- data/test/ref/artikel.ver +0 -3
- data/test/ref/artikel.vet +2580 -0
- data/test/ref/lir.non +34 -31
- data/test/ref/lir.seq +14 -15
- data/test/ref/lir.vec +37 -37
- data/test/ref/lir.vef +329 -0
- data/test/ref/lir.ven +329 -0
- data/test/ref/lir.ver +329 -0
- data/test/ref/lir.vet +329 -0
- data/test/test_helper.rb +29 -16
- data/test/ts_language.rb +6 -47
- metadata +74 -87
- data/lingo.rb +0 -29
- data/spec/spec_helper.rb +0 -5
data/test/test_helper.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'test/unit'
|
4
4
|
require 'lingo'
|
5
5
|
|
6
|
-
class LingoTestCase <
|
6
|
+
class LingoTestCase < Test::Unit::TestCase
|
7
7
|
|
8
8
|
unless const_defined?(:TEST_FILE)
|
9
9
|
TEST_FILE = 'test/de/test.txt'
|
@@ -20,12 +20,18 @@ class LingoTestCase < Test::Unit::TestCase
|
|
20
20
|
[a || '', b || '', *c]
|
21
21
|
end
|
22
22
|
|
23
|
+
def li(t, o)
|
24
|
+
["#{t}\r\n", o]
|
25
|
+
end
|
26
|
+
|
23
27
|
def ai(t)
|
24
|
-
|
28
|
+
i = t.split('|')
|
29
|
+
i.unshift(i.shift.to_sym)
|
25
30
|
end
|
26
31
|
|
27
32
|
def tk(t)
|
28
|
-
|
33
|
+
a, b, *c = split(t, /\|(?=[A-Z\d])/)
|
34
|
+
Lingo::Language::Token.new(a, b, *c.map(&:to_i))
|
29
35
|
end
|
30
36
|
|
31
37
|
def lx(t)
|
@@ -55,8 +61,15 @@ class AttendeeTestCase < LingoTestCase
|
|
55
61
|
@lingo.reset
|
56
62
|
|
57
63
|
list = [{ @attendee => cfg }]
|
58
|
-
|
59
|
-
list.
|
64
|
+
|
65
|
+
list.unshift('TestSpooler' => {
|
66
|
+
'out' => 'input',
|
67
|
+
'input' => input,
|
68
|
+
'pos' => @attendee == 'Tokenizer' }) if input
|
69
|
+
|
70
|
+
list.push('TestDumper' => {
|
71
|
+
'in' => 'output',
|
72
|
+
'output' => output = [] }) if expect
|
60
73
|
|
61
74
|
@lingo.invite(list)
|
62
75
|
@lingo.start
|
@@ -74,32 +87,31 @@ class Lingo
|
|
74
87
|
|
75
88
|
class TestSpooler < self
|
76
89
|
|
77
|
-
protected
|
78
|
-
|
79
90
|
def init
|
80
|
-
@input = get_key('input')
|
91
|
+
@input, @pos = get_key('input'), get_key('pos', nil) && 0
|
81
92
|
end
|
82
93
|
|
83
|
-
def control(cmd
|
84
|
-
|
94
|
+
def control(cmd)
|
95
|
+
if cmd == :TALK
|
96
|
+
@input.each { |i| i.is_a?(Array) ? command(*i) :
|
97
|
+
@pos ? forward(i, @pos += i.bytesize) : forward(i) }
|
98
|
+
end
|
85
99
|
end
|
86
100
|
|
87
101
|
end
|
88
102
|
|
89
103
|
class TestDumper < self
|
90
104
|
|
91
|
-
protected
|
92
|
-
|
93
105
|
def init
|
94
106
|
@output = get_key('output')
|
95
107
|
end
|
96
108
|
|
97
|
-
def control(
|
98
|
-
@output
|
109
|
+
def control(*args)
|
110
|
+
@output.push(args)
|
99
111
|
end
|
100
112
|
|
101
|
-
def process(obj)
|
102
|
-
@output
|
113
|
+
def process(obj, *rest)
|
114
|
+
@output.push(rest.empty? ? obj : rest.unshift(obj))
|
103
115
|
end
|
104
116
|
|
105
117
|
end
|
@@ -115,4 +127,5 @@ class Lingo
|
|
115
127
|
end
|
116
128
|
|
117
129
|
end
|
130
|
+
|
118
131
|
end
|
data/test/ts_language.rb
CHANGED
@@ -23,8 +23,6 @@ class TestLexicalHash < LingoTestCase
|
|
23
23
|
$stderr = old_stderr
|
24
24
|
end
|
25
25
|
|
26
|
-
# TODO: Crypt testen...
|
27
|
-
|
28
26
|
def test_cache
|
29
27
|
lh('sys-dic') { |ds|
|
30
28
|
assert_equal([lx('regen|s|m'), lx('regen|s|n'), lx('regen|v'), lx('rege|a')], ds['regen'])
|
@@ -38,11 +36,9 @@ class TestLexicalHash < LingoTestCase
|
|
38
36
|
|
39
37
|
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
40
38
|
|
41
|
-
# Keine Store-Datei vorhanden, nur Text vorhanden
|
42
39
|
File.delete(*Dir["#{Lingo.find(:store, txt_file)}.*"])
|
43
40
|
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
44
41
|
|
45
|
-
# Store vorhanden, aber Text ist neuer
|
46
42
|
lh(id) { |ds| assert_equal([lx('substantiv|s')], ds['substantiv']) }
|
47
43
|
end
|
48
44
|
|
@@ -59,8 +55,8 @@ class TestLexicalHash < LingoTestCase
|
|
59
55
|
assert_equal([lx('abelscher ring ohne nullteiler|m')], ds['abelscher ring ohne nullteiler'])
|
60
56
|
assert_equal([4], ds['abelscher ring ohne'])
|
61
57
|
assert_equal([lx('alleinreisende frau|m')], ds['alleinreisend frau'])
|
62
|
-
assert_equal([lx('abschaltbarer leistungshalbleiter|m')], ds['
|
63
|
-
assert_equal(nil, ds['
|
58
|
+
assert_equal([lx('abschaltbarer leistungshalbleiter|m')], ds['abschaltbarer leistungshalbleiter'])
|
59
|
+
assert_equal(nil, ds['abschaltbar leistungshalbleiter'])
|
64
60
|
}
|
65
61
|
end
|
66
62
|
|
@@ -113,12 +109,6 @@ class TestDictionary < LingoTestCase
|
|
113
109
|
end
|
114
110
|
|
115
111
|
def test_params
|
116
|
-
# Keine Sprach-Konfiguration angegeben
|
117
|
-
#assert_raise(RuntimeError) {
|
118
|
-
# Lingo::Language::Dictionary.new({ 'source' => %w[sys-dic] }, @lingo)
|
119
|
-
#}
|
120
|
-
|
121
|
-
# Falsche Parameter angegeben (Pflichtparameter ohne Defaultwert)
|
122
112
|
assert_raise(ArgumentError) {
|
123
113
|
Lingo::Language::Dictionary.new({ 'course' => %w[sys-dic] }, @lingo)
|
124
114
|
}
|
@@ -142,39 +132,27 @@ class TestDictionary < LingoTestCase
|
|
142
132
|
|
143
133
|
def test_select_two_sources_mode_first
|
144
134
|
ld('source' => %w[sys-dic tst-dic], 'mode' => 'first') { |dic|
|
145
|
-
# in keiner Quelle vorhanden
|
146
135
|
assert_equal([], dic.select('hasennasen'))
|
147
|
-
# nur in erster Quelle vorhanden
|
148
136
|
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
149
|
-
# nur in zweiter Quelle vorhanden
|
150
137
|
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
151
|
-
# in beiden Quellen vorhanden
|
152
138
|
assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n')], dic.select('a-dur'))
|
153
139
|
}
|
154
140
|
end
|
155
141
|
|
156
142
|
def test_select_two_sources_mode_first_flipped
|
157
143
|
ld('source' => %w[tst-dic sys-dic], 'mode' => 'first') { |dic|
|
158
|
-
# in keiner Quelle vorhanden
|
159
144
|
assert_equal([], dic.select('hasennasen'))
|
160
|
-
# nur in erster Quelle vorhanden
|
161
145
|
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
162
|
-
# nur in zweiter Quelle vorhanden
|
163
146
|
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
164
|
-
# in beiden Quellen vorhanden
|
165
147
|
assert_equal([lx('b-dur|s')], dic.select('a-dur'))
|
166
148
|
}
|
167
149
|
end
|
168
150
|
|
169
151
|
def test_select_two_sources_mode_all
|
170
152
|
ld('source' => %w[sys-dic tst-dic], 'mode' => 'all') { |dic|
|
171
|
-
# in keiner Quelle vorhanden
|
172
153
|
assert_equal([], dic.select('hasennasen'))
|
173
|
-
# nur in erster Quelle vorhanden
|
174
154
|
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
175
|
-
# nur in zweiter Quelle vorhanden
|
176
155
|
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
177
|
-
# in beiden Quellen vorhanden
|
178
156
|
assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n'), lx('b-dur|s')], dic.select('a-dur'))
|
179
157
|
assert_equal([lx('aas|s|n'), lx('aas|s')], dic.select('aas'))
|
180
158
|
}
|
@@ -182,15 +160,11 @@ class TestDictionary < LingoTestCase
|
|
182
160
|
|
183
161
|
def test_select_two_sources_mode_default
|
184
162
|
ld('source' => %w[sys-dic tst-dic]) { |dic|
|
185
|
-
# in keiner Quelle vorhanden
|
186
163
|
assert_equal([], dic.select('hasennasen'))
|
187
|
-
# nur in erster Quelle vorhanden
|
188
164
|
assert_equal([lx('knaller|s')], dic.select('knaller'))
|
189
|
-
# nur in zweiter Quelle vorhanden
|
190
165
|
assert_equal([lx('super indexierungssystem|m')], dic.select('lex-lingo'))
|
191
166
|
assert_equal([lx('wirkungsort|s'), lx('wirkung|s+'), lx('ort|s+')], dic.select('wirkungsort'))
|
192
167
|
assert_equal([lx('zettelkatalog|k'), lx('zettel|s+'), lx('katalog|s+')], dic.select('zettelkatalog'))
|
193
|
-
# in beiden Quellen vorhanden
|
194
168
|
assert_equal([lx('a-dur|s|m'), lx('a-dur|s|n'), lx('b-dur|s')], dic.select('a-dur'))
|
195
169
|
assert_equal([lx('aas|s|n'), lx('aas|s')], dic.select('aas'))
|
196
170
|
}
|
@@ -207,7 +181,7 @@ class TestDictionary < LingoTestCase
|
|
207
181
|
|
208
182
|
def test_infix_lexicals
|
209
183
|
ld('source' => %w[sys-dic]) { |dic|
|
210
|
-
assert_equal(
|
184
|
+
assert_equal([lx('information|f')], ax(dic, 'informations', :infix))
|
211
185
|
}
|
212
186
|
end
|
213
187
|
|
@@ -222,7 +196,7 @@ class TestDictionary < LingoTestCase
|
|
222
196
|
|
223
197
|
def test_select_with_infix
|
224
198
|
ld('source' => %w[sys-dic]) { |dic|
|
225
|
-
assert_equal(
|
199
|
+
assert_equal([lx('information|f')], ax(dic, 'informations', :infix))
|
226
200
|
}
|
227
201
|
end
|
228
202
|
|
@@ -250,10 +224,6 @@ class TestGrammar < LingoTestCase
|
|
250
224
|
@lingo = Lingo.new
|
251
225
|
end
|
252
226
|
|
253
|
-
def test_params
|
254
|
-
# Die gleichen Fälle wie bei Dictionary, daher nicht notwendig
|
255
|
-
end
|
256
|
-
|
257
227
|
def test_cache
|
258
228
|
lg { |gra|
|
259
229
|
assert_equal(
|
@@ -294,7 +264,7 @@ class TestGrammar < LingoTestCase
|
|
294
264
|
)
|
295
265
|
|
296
266
|
assert_equal(
|
297
|
-
wd('benutzerforschung|KOM', 'benutzerforschung|k', '
|
267
|
+
wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzer|s+', 'forschung|s+'),
|
298
268
|
gra.find_compound('benutzerforschung')
|
299
269
|
)
|
300
270
|
|
@@ -318,64 +288,53 @@ class TestGrammar < LingoTestCase
|
|
318
288
|
gra.find_compound('titelbestandsbestände')
|
319
289
|
)
|
320
290
|
|
321
|
-
# hinterer Teil ist ein Wort mit Suffix
|
322
291
|
assert_equal(
|
323
292
|
wd('hasenbraten|KOM', 'hasenbraten|k', 'hase|s+', 'braten|v+'),
|
324
293
|
gra.find_compound('hasenbraten')
|
325
294
|
)
|
326
295
|
|
327
|
-
# hinterer Teil ist ein Wort mit Infix ohne Schwanz
|
328
296
|
assert_equal(
|
329
297
|
wd('nasenlaufen|KOM', 'nasenlaufen|k', 'nase|s+', 'laufen|v+'),
|
330
298
|
gra.find_compound('nasenlaufen')
|
331
299
|
)
|
332
300
|
|
333
|
-
# hinterer Teil ist ein Wort mit Infix mit Schwanz
|
334
301
|
assert_equal(
|
335
302
|
wd('nasenlaufens|KOM', 'nasenlaufen|k', 'nase|s+', 'laufen|v+'),
|
336
303
|
gra.find_compound('nasenlaufens')
|
337
304
|
)
|
338
305
|
|
339
|
-
# hinterer Teil ist ein Kompositum nach Bindestrich
|
340
306
|
assert_equal(
|
341
307
|
wd('arrafat-nachfolgebedarf|KOM', 'arrafat-nachfolgebedarf|k', 'arrafat|x+', 'nachfolge|s+', 'bedarf|s+'),
|
342
308
|
gra.find_compound('arrafat-nachfolgebedarf')
|
343
309
|
)
|
344
310
|
|
345
|
-
# hinterer Teil ist ein TakeItAsIs nach Bindestrich
|
346
311
|
assert_equal(
|
347
312
|
wd('nachfolge-arrafat|KOM', 'nachfolge-arrafat|k', 'nachfolge|s+', 'arrafat|x+'),
|
348
313
|
gra.find_compound('nachfolge-arrafat')
|
349
314
|
)
|
350
315
|
|
351
|
-
# vorderer Teil ist ein Wort mit Suffix => siehe Hasenbraten
|
352
|
-
# vorderer Teil ist ein Kompositum
|
353
316
|
assert_equal(
|
354
317
|
wd('morgenonkelmantel|KOM', 'morgenonkelmantel|k', 'morgen|w+', 'morgen|s+', 'onkel|s+', 'mantel|s+'),
|
355
318
|
gra.find_compound('morgenonkelmantel')
|
356
319
|
)
|
357
320
|
|
358
|
-
# vorderer Teil ist ein TakeItAsIs vor Bindestrich / bindestrichversion
|
359
321
|
assert_equal(
|
360
322
|
wd('arrafat-nachfolger|KOM', 'arrafat-nachfolger|k', 'arrafat|x+', 'nachfolger|s+'),
|
361
323
|
gra.find_compound('arrafat-nachfolger')
|
362
324
|
)
|
363
325
|
|
364
|
-
# bindestrichversion zwei-teilig
|
365
326
|
assert_equal(
|
366
327
|
wd('cd-rom-technologie|KOM', 'cd-rom-technologie|k', 'cd-rom|s+|f', 'cd-rom|s+|m', 'technologie|s+|f'),
|
367
328
|
gra.find_compound('cd-rom-technologie')
|
368
329
|
)
|
369
330
|
|
370
|
-
# bindestrichversion drei-teilig
|
371
331
|
assert_equal(
|
372
332
|
wd('albert-ludwigs-universität|KOM', 'albert-ludwigs-universität|k', 'albert|e+', 'ludwig|e+', 'universität|s+'),
|
373
333
|
gra.find_compound('albert-ludwigs-universität')
|
374
334
|
)
|
375
335
|
|
376
|
-
# normal mit suggestion
|
377
336
|
assert_equal(
|
378
|
-
wd('benutzerforschung|KOM', 'benutzerforschung|k', '
|
337
|
+
wd('benutzerforschung|KOM', 'benutzerforschung|k', 'benutzer|s+', 'forschung|s+'),
|
379
338
|
gra.find_compound('benutzerforschung')
|
380
339
|
)
|
381
340
|
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lingo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.8.
|
4
|
+
version: 1.8.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Vorhauer
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2015-02-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: cyclops
|
@@ -17,34 +17,28 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: '0.
|
21
|
-
- - ">="
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.4
|
20
|
+
version: '0.1'
|
24
21
|
type: :runtime
|
25
22
|
prerelease: false
|
26
23
|
version_requirements: !ruby/object:Gem::Requirement
|
27
24
|
requirements:
|
28
25
|
- - "~>"
|
29
26
|
- !ruby/object:Gem::Version
|
30
|
-
version: '0.
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 0.0.4
|
27
|
+
version: '0.1'
|
34
28
|
- !ruby/object:Gem::Dependency
|
35
29
|
name: nuggets
|
36
30
|
requirement: !ruby/object:Gem::Requirement
|
37
31
|
requirements:
|
38
32
|
- - "~>"
|
39
33
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1.
|
34
|
+
version: '1.1'
|
41
35
|
type: :runtime
|
42
36
|
prerelease: false
|
43
37
|
version_requirements: !ruby/object:Gem::Requirement
|
44
38
|
requirements:
|
45
39
|
- - "~>"
|
46
40
|
- !ruby/object:Gem::Version
|
47
|
-
version: '1.
|
41
|
+
version: '1.1'
|
48
42
|
- !ruby/object:Gem::Dependency
|
49
43
|
name: rubyzip
|
50
44
|
requirement: !ruby/object:Gem::Requirement
|
@@ -119,16 +113,22 @@ dependencies:
|
|
119
113
|
name: hen
|
120
114
|
requirement: !ruby/object:Gem::Requirement
|
121
115
|
requirements:
|
116
|
+
- - "~>"
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0.8'
|
122
119
|
- - ">="
|
123
120
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
121
|
+
version: 0.8.1
|
125
122
|
type: :development
|
126
123
|
prerelease: false
|
127
124
|
version_requirements: !ruby/object:Gem::Requirement
|
128
125
|
requirements:
|
126
|
+
- - "~>"
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '0.8'
|
129
129
|
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version:
|
131
|
+
version: 0.8.1
|
132
132
|
- !ruby/object:Gem::Dependency
|
133
133
|
name: rake
|
134
134
|
requirement: !ruby/object:Gem::Requirement
|
@@ -144,7 +144,7 @@ dependencies:
|
|
144
144
|
- !ruby/object:Gem::Version
|
145
145
|
version: '0'
|
146
146
|
- !ruby/object:Gem::Dependency
|
147
|
-
name:
|
147
|
+
name: test-unit
|
148
148
|
requirement: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
150
|
- - ">="
|
@@ -189,27 +189,34 @@ files:
|
|
189
189
|
- bin/lingoctl
|
190
190
|
- bin/lingosrv
|
191
191
|
- bin/lingoweb
|
192
|
-
-
|
193
|
-
-
|
194
|
-
-
|
195
|
-
- de/lingo-
|
196
|
-
- de/lingo-
|
197
|
-
- de/
|
198
|
-
- de/
|
199
|
-
- de/
|
200
|
-
- de/
|
201
|
-
- de/
|
202
|
-
- de/
|
203
|
-
- de/
|
204
|
-
-
|
205
|
-
-
|
206
|
-
- en/lingo-
|
207
|
-
- en/lingo-
|
208
|
-
- en/lingo-
|
209
|
-
- en/lingo-
|
210
|
-
- en/
|
192
|
+
- config/lingo-call.cfg
|
193
|
+
- config/lingo.cfg
|
194
|
+
- config/lir.cfg
|
195
|
+
- dict/de/lingo-abk.txt
|
196
|
+
- dict/de/lingo-dic.txt
|
197
|
+
- dict/de/lingo-mul.txt
|
198
|
+
- dict/de/lingo-syn.txt
|
199
|
+
- dict/de/test_dic.txt
|
200
|
+
- dict/de/test_gen.txt
|
201
|
+
- dict/de/test_mu2.txt
|
202
|
+
- dict/de/test_mul.txt
|
203
|
+
- dict/de/test_sgw.txt
|
204
|
+
- dict/de/test_syn.txt
|
205
|
+
- dict/de/user-dic.txt
|
206
|
+
- dict/en/lingo-dic.txt
|
207
|
+
- dict/en/lingo-irr.txt
|
208
|
+
- dict/en/lingo-mul.txt
|
209
|
+
- dict/en/lingo-syn.txt
|
210
|
+
- dict/en/lingo-wdn.txt
|
211
|
+
- dict/en/user-dic.txt
|
212
|
+
- dict/ru/lingo-dic.txt
|
213
|
+
- dict/ru/lingo-mul.txt
|
214
|
+
- dict/ru/lingo-syn.txt
|
215
|
+
- dict/ru/user-dic.txt
|
216
|
+
- lang/de.lang
|
217
|
+
- lang/en.lang
|
218
|
+
- lang/ru.lang
|
211
219
|
- lib/lingo.rb
|
212
|
-
- lib/lingo/agenda_item.rb
|
213
220
|
- lib/lingo/app.rb
|
214
221
|
- lib/lingo/attendee.rb
|
215
222
|
- lib/lingo/attendee/abbreviator.rb
|
@@ -249,6 +256,7 @@ files:
|
|
249
256
|
- lib/lingo/database/source/single_word.rb
|
250
257
|
- lib/lingo/database/source/word_class.rb
|
251
258
|
- lib/lingo/debug.rb
|
259
|
+
- lib/lingo/deferred_attendee.rb
|
252
260
|
- lib/lingo/error.rb
|
253
261
|
- lib/lingo/language.rb
|
254
262
|
- lib/lingo/language/char.rb
|
@@ -271,16 +279,6 @@ files:
|
|
271
279
|
- lib/lingo/web/public/lingo.png
|
272
280
|
- lib/lingo/web/public/lingoweb.css
|
273
281
|
- lib/lingo/web/views/index.erb
|
274
|
-
- lingo-call.cfg
|
275
|
-
- lingo.cfg
|
276
|
-
- lingo.rb
|
277
|
-
- lir.cfg
|
278
|
-
- ru.lang
|
279
|
-
- ru/lingo-dic.txt
|
280
|
-
- ru/lingo-mul.txt
|
281
|
-
- ru/lingo-syn.txt
|
282
|
-
- ru/user-dic.txt
|
283
|
-
- spec/spec_helper.rb
|
284
282
|
- test/attendee/ts_abbreviator.rb
|
285
283
|
- test/attendee/ts_decomposer.rb
|
286
284
|
- test/attendee/ts_multi_worder.rb
|
@@ -304,13 +302,19 @@ files:
|
|
304
302
|
- test/ref/artikel.seq
|
305
303
|
- test/ref/artikel.syn
|
306
304
|
- test/ref/artikel.vec
|
305
|
+
- test/ref/artikel.vef
|
307
306
|
- test/ref/artikel.ven
|
308
307
|
- test/ref/artikel.ver
|
308
|
+
- test/ref/artikel.vet
|
309
309
|
- test/ref/lir.mul
|
310
310
|
- test/ref/lir.non
|
311
311
|
- test/ref/lir.seq
|
312
312
|
- test/ref/lir.syn
|
313
313
|
- test/ref/lir.vec
|
314
|
+
- test/ref/lir.vef
|
315
|
+
- test/ref/lir.ven
|
316
|
+
- test/ref/lir.ver
|
317
|
+
- test/ref/lir.vet
|
314
318
|
- test/test_helper.rb
|
315
319
|
- test/ts_database.rb
|
316
320
|
- test/ts_language.rb
|
@@ -324,51 +328,34 @@ licenses:
|
|
324
328
|
metadata: {}
|
325
329
|
post_install_message: |2+
|
326
330
|
|
327
|
-
lingo-1.8.
|
331
|
+
lingo-1.8.6 [2015-02-09]:
|
328
332
|
|
329
|
-
*
|
330
|
-
|
331
|
-
*
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
* Lingo::Attendee::
|
338
|
-
|
339
|
-
* Lingo::Attendee::TextReader
|
340
|
-
|
341
|
-
* Lingo::Attendee::
|
342
|
-
|
343
|
-
* Lingo::Attendee::
|
344
|
-
|
345
|
-
|
346
|
-
*
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
classes/genders.
|
352
|
-
* Lingo::Database::Source::WordClass supports compounds being encoded in the
|
353
|
-
dictionary (appending <tt>+</tt> to their parts' word classes is
|
354
|
-
recommended).
|
355
|
-
* Lingo::Database::Source removes leading and trailing whitespace from
|
356
|
-
dictionary lines.
|
357
|
-
* Lingo::Database::Crypter uses OpenSSL to encrypt/decrypt dictionaries.
|
358
|
-
Note: Can't decrypt dictionaries encrypted with the old scheme anymore.
|
359
|
-
* Lingo::Attendee::Tokenizer learned subset of MediaWiki syntax.
|
360
|
-
* Eliminated pathological behaviour of the +URLS+ rule in
|
361
|
-
Lingo::Attendee::Tokenizer.
|
362
|
-
* Fixed regression introduced in 1.8.2 where <tt>combine: all</tt> would no
|
363
|
-
longer work in Lingo::Attendee::MultiWorder.
|
364
|
-
* Updated and extended Russian dictionaries. (Yulia Dorokhova, Thomas Müller)
|
365
|
-
* +lingoctl+ no longer overwrites existing files without confirmation.
|
366
|
-
* +lingoctl+ learned +archive+ command.
|
367
|
-
* Dictionary cleanup.
|
333
|
+
* Lingo::Attendee::VectorFilter learned +pos+ option to print position and
|
334
|
+
byte offset with each word.
|
335
|
+
* Lingo::Attendee::VectorFilter learned +tfidf+ option to sort results based
|
336
|
+
on their tf–idf[https://en.wikipedia.org/wiki/Tf–idf] score; the document
|
337
|
+
frequencies are calculated over the "corpus" of all files processed during
|
338
|
+
a single program invocation.
|
339
|
+
* Lingo::Attendee::VectorFilter learned +tokens+ option to filter on
|
340
|
+
Lingo::Language::Token in addition to Lingo::Language::Word.
|
341
|
+
* Lingo::Attendee::VectorFilter no longer supports +debug+ (as well as
|
342
|
+
+prompt+ and +preamble+); use Lingo::Attendee::DebugFilter instead.
|
343
|
+
* Lingo::Attendee::TextReader no longer removes line endings; option +chomp+
|
344
|
+
is obsolete.
|
345
|
+
* Lingo::Attendee::TextReader passes byte offset to the following attendee.
|
346
|
+
* Lingo::Attendee::Tokenizer records token's byte offset.
|
347
|
+
* Lingo::Attendee::Tokenizer records token's sequence position.
|
348
|
+
* Lingo::Attendee::Tokenizer learned <tt>skip-tags</tt> option to skip over
|
349
|
+
specified tags' contents.
|
350
|
+
* Lingo::Attendee subclasses warn when invalid or obsolete options or names
|
351
|
+
are used.
|
352
|
+
* Changed German infix substitution +/en+ to +ch/chen+ in order to prevent
|
353
|
+
overly aggressive identifications.
|
354
|
+
* Internal refactoring and API changes.
|
368
355
|
|
369
356
|
rdoc_options:
|
370
357
|
- "--title"
|
371
|
-
- lingo Application documentation (v1.8.
|
358
|
+
- lingo Application documentation (v1.8.6)
|
372
359
|
- "--charset"
|
373
360
|
- UTF-8
|
374
361
|
- "--line-numbers"
|
@@ -389,7 +376,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
389
376
|
version: '0'
|
390
377
|
requirements: []
|
391
378
|
rubyforge_project:
|
392
|
-
rubygems_version: 2.4.
|
379
|
+
rubygems_version: 2.4.5
|
393
380
|
signing_key:
|
394
381
|
specification_version: 4
|
395
382
|
summary: The full-featured automatic indexing system
|