lingo 1.8.1 → 1.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +23 -5
- data/README +1 -1
- data/Rakefile +5 -7
- data/TODO +2 -0
- data/bin/lingo +5 -1
- data/de.lang +1 -1
- data/en/lingo-syn.txt +0 -0
- data/en.lang +2 -1
- data/lib/lingo/attendee/abbreviator.rb +8 -9
- data/lib/lingo/attendee/debugger.rb +5 -4
- data/lib/lingo/attendee/decomposer.rb +8 -3
- data/lib/lingo/attendee/dehyphenizer.rb +19 -63
- data/lib/lingo/attendee/formatter.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +67 -155
- data/lib/lingo/attendee/noneword_filter.rb +16 -9
- data/lib/lingo/attendee/object_filter.rb +1 -1
- data/lib/lingo/attendee/sequencer.rb +32 -63
- data/lib/lingo/attendee/stemmer/porter.rb +343 -0
- data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
- data/lib/lingo/attendee/synonymer.rb +10 -9
- data/lib/lingo/attendee/text_reader.rb +102 -76
- data/lib/lingo/attendee/text_writer.rb +23 -26
- data/lib/lingo/attendee/tokenizer.rb +13 -27
- data/lib/lingo/attendee/variator.rb +26 -66
- data/lib/lingo/attendee/vector_filter.rb +42 -43
- data/lib/lingo/attendee/word_searcher.rb +6 -7
- data/lib/lingo/attendee.rb +25 -7
- data/lib/lingo/buffered_attendee.rb +36 -10
- data/lib/lingo/cachable.rb +8 -8
- data/lib/lingo/config.rb +5 -6
- data/lib/lingo/ctl.rb +2 -3
- data/lib/lingo/database/crypter.rb +9 -26
- data/lib/lingo/database/gdbm_store.rb +3 -5
- data/lib/lingo/database/libcdb_store.rb +4 -6
- data/lib/lingo/database/sdbm_store.rb +11 -6
- data/lib/lingo/database/show_progress.rb +3 -43
- data/lib/lingo/database/source/key_value.rb +2 -6
- data/lib/lingo/database/source/multi_key.rb +3 -5
- data/lib/lingo/database/source/multi_value.rb +2 -6
- data/lib/lingo/database/source/single_word.rb +4 -6
- data/lib/lingo/database/source/word_class.rb +4 -10
- data/lib/lingo/database/source.rb +20 -18
- data/lib/lingo/database.rb +84 -59
- data/lib/lingo/error.rb +57 -1
- data/lib/lingo/language/dictionary.rb +21 -18
- data/lib/lingo/language/grammar.rb +40 -49
- data/lib/lingo/language/lexical.rb +6 -6
- data/lib/lingo/language/lexical_hash.rb +6 -0
- data/lib/lingo/language/word.rb +32 -15
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/language.rb +14 -25
- data/lib/lingo/reportable.rb +12 -10
- data/lib/lingo/show_progress.rb +81 -0
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo.rb +63 -24
- data/lingo-call.cfg +6 -10
- data/lingo.cfg +60 -44
- data/lir.cfg +42 -41
- data/test/attendee/ts_abbreviator.rb +3 -5
- data/test/attendee/ts_decomposer.rb +3 -5
- data/test/attendee/ts_multi_worder.rb +87 -145
- data/test/attendee/ts_noneword_filter.rb +5 -3
- data/test/attendee/ts_object_filter.rb +5 -3
- data/test/attendee/ts_sequencer.rb +3 -5
- data/test/attendee/ts_stemmer.rb +309 -0
- data/test/attendee/ts_synonymer.rb +15 -11
- data/test/attendee/ts_text_reader.rb +12 -15
- data/test/attendee/ts_text_writer.rb +24 -29
- data/test/attendee/ts_tokenizer.rb +9 -7
- data/test/attendee/ts_variator.rb +4 -4
- data/test/attendee/ts_vector_filter.rb +24 -16
- data/test/attendee/ts_word_searcher.rb +20 -36
- data/test/{lir.csv → lir.vec} +0 -0
- data/test/ref/artikel.vec +943 -943
- data/test/ref/artikel.ven +943 -943
- data/test/ref/lir.non +201 -201
- data/test/ref/lir.seq +178 -178
- data/test/ref/lir.syn +49 -49
- data/test/ref/lir.vec +329 -0
- data/test/test_helper.rb +20 -36
- data/test/ts_database.rb +10 -10
- data/test/ts_language.rb +279 -319
- metadata +93 -104
- data/info/Objekte.png +0 -0
- data/info/Typen.png +0 -0
- data/info/database.png +0 -0
- data/info/db_small.png +0 -0
- data/info/download.png +0 -0
- data/info/kerze.png +0 -0
- data/info/language.png +0 -0
- data/info/lingo.png +0 -0
- data/info/logo.png +0 -0
- data/info/meeting.png +0 -0
- data/info/types.png +0 -0
- data/lingo-all.cfg +0 -89
- data/porter/stem.cfg +0 -311
- data/porter/stem.rb +0 -150
- data/test/ref/lir.csv +0 -329
- data/test.cfg +0 -79
@@ -4,110 +4,94 @@ require_relative '../test_helper'
|
|
4
4
|
|
5
5
|
class TestAttendeeWordSearcher < AttendeeTestCase
|
6
6
|
|
7
|
-
def setup
|
8
|
-
@test_synonyms = [
|
9
|
-
lx('experiment|y'), lx('kontrolle|y'), lx('probelauf|y'),
|
10
|
-
lx('prüfung|y'), lx('test|y'), lx('testlauf|y'),
|
11
|
-
lx('testversuch|y'), lx('trockentest|y'), lx('versuch|y')
|
12
|
-
]
|
13
|
-
end
|
14
|
-
|
15
7
|
def test_basic
|
16
|
-
|
17
|
-
|
8
|
+
meet({ 'source' => 'sys-dic,sys-syn,sys-mul' }, [
|
9
|
+
tk('Dies|WORD'), tk('ist|WORD'), tk('ein|WORD'), tk('Test|WORD'), tk('.|PUNC'), ai('EOL|')
|
10
|
+
], [
|
18
11
|
wd('Dies|IDF', 'dies|w'),
|
19
12
|
wd('ist|IDF', 'sein|v'),
|
20
13
|
wd('ein|IDF', 'einen|v', 'ein|w'),
|
21
14
|
wd('Test|IDF', 'test|s', 'testen|v'),
|
22
15
|
tk('.|PUNC'),
|
23
16
|
ai('EOL|')
|
24
|
-
]
|
25
|
-
meet({'source'=>'sys-dic,sys-syn,sys-mul'})
|
17
|
+
])
|
26
18
|
end
|
27
19
|
|
28
20
|
def test_mode
|
29
|
-
|
30
|
-
|
21
|
+
meet({ 'source' => 'sys-syn,sys-dic', 'mode' => 'first' }, [
|
22
|
+
tk('Dies|WORD'), tk('ist|WORD'), tk('ein|WORD'), tk('Test|WORD'), tk('.|PUNC'), ai('EOL|')
|
23
|
+
], [
|
31
24
|
wd('Dies|IDF', 'dies|w'),
|
32
25
|
wd('ist|IDF', 'sein|v'),
|
33
26
|
wd('ein|IDF', 'einen|v', 'ein|w'),
|
34
27
|
wd('Test|IDF', 'test|s', 'testen|v'),
|
35
28
|
tk('.|PUNC'),
|
36
29
|
ai('EOL|')
|
37
|
-
]
|
38
|
-
meet({'source'=>'sys-syn,sys-dic', 'mode'=>'first'})
|
30
|
+
])
|
39
31
|
end
|
40
32
|
|
41
33
|
def test_two_sources_mode_first
|
42
|
-
|
34
|
+
meet({ 'source' => 'sys-dic,tst-dic', 'mode' => 'first' }, [
|
43
35
|
tk('Hasennasen|WORD'),
|
44
36
|
tk('Knaller|WORD'),
|
45
37
|
tk('Lex-Lingo|WORD'),
|
46
38
|
tk('A-Dur|WORD'),
|
47
39
|
ai('EOL|')
|
48
|
-
]
|
49
|
-
@expect = [
|
40
|
+
], [
|
50
41
|
wd('Hasennasen|?'),
|
51
42
|
wd('Knaller|IDF', 'knaller|s'),
|
52
43
|
wd('Lex-Lingo|IDF', 'super indexierungssystem|m'),
|
53
44
|
wd('A-Dur|IDF', 'a-dur|s'),
|
54
45
|
ai('EOL|')
|
55
|
-
]
|
56
|
-
meet({'source'=>'sys-dic,tst-dic', 'mode'=>'first'})
|
46
|
+
])
|
57
47
|
end
|
58
48
|
|
59
49
|
def test_two_sources_mode_first_flipped
|
60
|
-
|
50
|
+
meet({ 'source' => 'tst-dic,sys-dic', 'mode' => 'first' }, [
|
61
51
|
tk('Hasennasen|WORD'),
|
62
52
|
tk('Knaller|WORD'),
|
63
53
|
tk('Lex-Lingo|WORD'),
|
64
54
|
tk('A-Dur|WORD'),
|
65
55
|
ai('EOL|')
|
66
|
-
]
|
67
|
-
@expect = [
|
56
|
+
], [
|
68
57
|
wd('Hasennasen|?'),
|
69
58
|
wd('Knaller|IDF', 'knaller|s'),
|
70
59
|
wd('Lex-Lingo|IDF', 'super indexierungssystem|m'),
|
71
60
|
wd('A-Dur|IDF', 'b-dur|s'),
|
72
61
|
ai('EOL|')
|
73
|
-
]
|
74
|
-
meet({'source'=>'tst-dic,sys-dic', 'mode'=>'first'})
|
62
|
+
])
|
75
63
|
end
|
76
64
|
|
77
65
|
def test_select_two_sources_mode_all
|
78
|
-
|
66
|
+
meet({ 'source' => 'sys-dic,tst-dic', 'mode' => 'all' }, [
|
79
67
|
tk('Hasennasen|WORD'),
|
80
68
|
tk('Knaller|WORD'),
|
81
69
|
tk('Lex-Lingo|WORD'),
|
82
70
|
tk('A-Dur|WORD'),
|
83
71
|
ai('EOL|')
|
84
|
-
]
|
85
|
-
@expect = [
|
72
|
+
], [
|
86
73
|
wd('Hasennasen|?'),
|
87
74
|
wd('Knaller|IDF', 'knaller|s'),
|
88
75
|
wd('Lex-Lingo|IDF', 'super indexierungssystem|m'),
|
89
76
|
wd('A-Dur|IDF', 'a-dur|s', 'b-dur|s'),
|
90
77
|
ai('EOL|')
|
91
|
-
]
|
92
|
-
meet({'source'=>'sys-dic,tst-dic', 'mode'=>'all'})
|
78
|
+
])
|
93
79
|
end
|
94
80
|
|
95
81
|
def test_select_two_sources_mode_def
|
96
|
-
|
82
|
+
meet({ 'source' => 'sys-dic,tst-dic' }, [
|
97
83
|
tk('Hasennasen|WORD'),
|
98
84
|
tk('Knaller|WORD'),
|
99
85
|
tk('Lex-Lingo|WORD'),
|
100
86
|
tk('A-Dur|WORD'),
|
101
87
|
ai('EOL|')
|
102
|
-
]
|
103
|
-
@expect = [
|
88
|
+
], [
|
104
89
|
wd('Hasennasen|?'),
|
105
90
|
wd('Knaller|IDF', 'knaller|s'),
|
106
91
|
wd('Lex-Lingo|IDF', 'super indexierungssystem|m'),
|
107
92
|
wd('A-Dur|IDF', 'a-dur|s', 'b-dur|s'),
|
108
93
|
ai('EOL|')
|
109
|
-
]
|
110
|
-
meet({'source'=>'sys-dic,tst-dic'})
|
94
|
+
])
|
111
95
|
end
|
112
96
|
|
113
97
|
end
|
data/test/{lir.csv → lir.vec}
RENAMED
File without changes
|