lingo 1.8.4.2 → 1.8.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +413 -325
- data/README +380 -131
- data/Rakefile +19 -21
- data/de/lingo-abk.txt +15 -17
- data/de/lingo-dic.txt +20210 -20659
- data/de/lingo-mul.txt +5 -13
- data/de/lingo-syn.txt +5 -8
- data/de/test_dic.txt +2 -0
- data/de/test_gen.txt +8 -0
- data/de/{test_mul2.txt → test_mu2.txt} +0 -0
- data/de/{test_singleword.txt → test_sgw.txt} +0 -0
- data/de/user-dic.txt +5 -7
- data/de.lang +64 -49
- data/en/lingo-dic.txt +6398 -6404
- data/en/lingo-irr.txt +2 -3
- data/en/lingo-mul.txt +6 -7
- data/en/lingo-wdn.txt +881 -1762
- data/en/user-dic.txt +2 -5
- data/en.lang +39 -39
- data/lib/lingo/app.rb +10 -6
- data/lib/lingo/attendee/abbreviator.rb +1 -0
- data/lib/lingo/attendee/decomposer.rb +2 -1
- data/lib/lingo/attendee/multi_worder.rb +5 -6
- data/lib/lingo/attendee/stemmer.rb +1 -1
- data/lib/lingo/attendee/synonymer.rb +4 -2
- data/lib/lingo/attendee/text_reader.rb +77 -57
- data/lib/lingo/attendee/text_writer.rb +1 -1
- data/lib/lingo/attendee/tokenizer.rb +101 -50
- data/lib/lingo/attendee/variator.rb +2 -1
- data/lib/lingo/attendee/vector_filter.rb +28 -6
- data/lib/lingo/attendee/word_searcher.rb +2 -1
- data/lib/lingo/attendee.rb +8 -4
- data/lib/lingo/call.rb +7 -3
- data/lib/lingo/cli.rb +8 -16
- data/lib/lingo/config.rb +11 -6
- data/lib/lingo/ctl.rb +54 -3
- data/lib/lingo/database/crypter.rb +8 -14
- data/lib/lingo/database/hash_store.rb +1 -1
- data/lib/lingo/database/{show_progress.rb → progress.rb} +7 -8
- data/lib/lingo/database/source/key_value.rb +6 -5
- data/lib/lingo/database/source/multi_key.rb +5 -2
- data/lib/lingo/database/source/multi_value.rb +6 -4
- data/lib/lingo/database/source/single_word.rb +2 -3
- data/lib/lingo/database/source/word_class.rb +24 -5
- data/lib/lingo/database/source.rb +5 -3
- data/lib/lingo/database.rb +102 -41
- data/lib/lingo/error.rb +24 -2
- data/lib/lingo/language/dictionary.rb +26 -54
- data/lib/lingo/language/grammar.rb +19 -23
- data/lib/lingo/language/lexical.rb +5 -1
- data/lib/lingo/language/lexical_hash.rb +7 -12
- data/lib/lingo/language/token.rb +10 -1
- data/lib/lingo/language/word.rb +35 -23
- data/lib/lingo/language/word_form.rb +5 -4
- data/lib/lingo/{show_progress.rb → progress.rb} +43 -30
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/srv/public/.gitkeep +0 -0
- data/lib/lingo/srv.rb +11 -6
- data/lib/lingo/version.rb +2 -2
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/lib/lingo/web/views/index.erb +4 -4
- data/lib/lingo/web.rb +4 -6
- data/lib/lingo.rb +4 -12
- data/lingo.cfg +1 -1
- data/lir.cfg +1 -1
- data/ru/lingo-dic.txt +33473 -2113
- data/ru/lingo-mul.txt +8430 -1913
- data/ru/lingo-syn.txt +1634 -0
- data/ru/user-dic.txt +6 -0
- data/ru.lang +49 -47
- data/spec/spec_helper.rb +4 -0
- data/test/attendee/ts_decomposer.rb +2 -2
- data/test/attendee/ts_synonymer.rb +3 -3
- data/test/attendee/ts_tokenizer.rb +215 -2
- data/test/attendee/ts_variator.rb +2 -2
- data/test/attendee/ts_word_searcher.rb +10 -6
- data/test/ref/artikel.seq +2 -2
- data/test/ref/artikel.vec +5 -5
- data/test/ref/artikel.ven +11 -11
- data/test/ref/artikel.ver +11 -11
- data/test/ref/lir.seq +13 -13
- data/test/ref/lir.vec +31 -31
- data/test/test_helper.rb +19 -5
- data/test/ts_database.rb +206 -77
- data/test/ts_language.rb +86 -26
- metadata +93 -49
- data/.rspec +0 -1
- data/de/test_syn2.txt +0 -1
data/de/lingo-mul.txt
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
+
# Mehrwort-Wörterbuch lingo-mul.txt
|
2
|
+
# enthält als Basis für eigene Erweiterungen
|
3
|
+
# exemplarisch Mehrwortbegriffe, die mit "a" beginnen
|
1
4
|
#
|
2
|
-
#
|
3
|
-
|
4
|
-
# exemplarisch Mehrwortbegriffe, die mit "a" beginnen
|
5
|
-
#
|
6
|
-
# Stand: 18.10.05 / Klaus Lepsky
|
7
|
-
#
|
8
|
-
#
|
5
|
+
# Stand: 18.10.05 / Klaus Lepsky
|
6
|
+
|
9
7
|
albert einstein
|
10
8
|
albert ainshtain
|
11
9
|
einstein, albert
|
@@ -13,12 +11,6 @@ a data query language
|
|
13
11
|
a fresco
|
14
12
|
a part
|
15
13
|
a priori
|
16
|
-
abelsche transformation
|
17
|
-
abelsche umformung
|
18
|
-
a data query language
|
19
|
-
a fresco
|
20
|
-
a part
|
21
|
-
a priori
|
22
14
|
a programming language
|
23
15
|
a- 3-struktur
|
24
16
|
a- 4-struktur
|
data/de/lingo-syn.txt
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
+
# Synonym-Wörterbuch lingo-syn.txt
|
2
|
+
# enthält als Basis für eigene Erweiterungen
|
3
|
+
# exemplarisch Synonyme, die mit "a" beginnen
|
1
4
|
#
|
2
|
-
#
|
3
|
-
|
4
|
-
# exemplarisch Synonyme, die mit "a" beginnen
|
5
|
-
#
|
6
|
-
# Stand: 16.03.05 / Klaus Lepsky
|
7
|
-
#
|
8
|
-
#
|
5
|
+
# Stand: 16.03.05 / Klaus Lepsky
|
6
|
+
|
9
7
|
aachener kongress=aachen / kongress <1818>
|
10
8
|
aal=flussaal
|
11
9
|
aalartige fische=aalfische
|
@@ -5405,7 +5403,6 @@ alter=betagter
|
|
5405
5403
|
alter <100 jahre>=hundertjähriger
|
5406
5404
|
alter druck=altes buch
|
5407
5405
|
alter ego=alter ego
|
5408
|
-
alter ego=alter ego
|
5409
5406
|
alter herr=philister <studentenverbindung>
|
5410
5407
|
alter mensch=alter
|
5411
5408
|
alter stil / klassisches chinesisch=guwen
|
data/de/test_dic.txt
CHANGED
data/de/test_gen.txt
ADDED
File without changes
|
File without changes
|
data/de/user-dic.txt
CHANGED
@@ -1,10 +1,8 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
4
|
-
#
|
5
|
-
#
|
1
|
+
# Musterwörterbuch user-dic.txt als Vorlage
|
2
|
+
# für eigene Benutzerwörterbücher
|
3
|
+
|
4
|
+
bibliografie=bibliografie #s
|
5
|
+
bibliographie=bibliografie #s
|
6
6
|
klassifikation=klassifikation #s
|
7
7
|
thesaurus=thesaurus #s
|
8
8
|
verschlagwortung=verschlagwortung #s
|
9
|
-
bibliografie=bibliografie #s
|
10
|
-
bibliographie=bibliografie #s
|
data/de.lang
CHANGED
@@ -40,71 +40,86 @@
|
|
40
40
|
# definiert sein, da es sonst noch nicht existiert!
|
41
41
|
#
|
42
42
|
|
43
|
-
|
44
|
-
---
|
43
|
+
---
|
45
44
|
language:
|
46
45
|
name: 'Deutsch'
|
47
46
|
|
48
47
|
dictionary:
|
49
48
|
databases:
|
50
|
-
#
|
49
|
+
# System dictionaries
|
51
50
|
sys-dic: { name: de/lingo-dic.txt, txt-format: WordClass, separator: '=' }
|
52
51
|
sys-abk: { name: de/lingo-abk.txt, txt-format: WordClass, separator: '=' }
|
53
|
-
sys-syn: { name: de/lingo-syn.txt, txt-format: KeyValue,
|
54
|
-
sys-mul: { name: de/lingo-mul.txt, txt-format: SingleWord, use-lex:
|
55
|
-
# Benutzerwörterbücher
|
56
|
-
usr-dic: { name: de/user-dic.txt, txt-format: WordClass, separator: '=' }
|
52
|
+
sys-syn: { name: de/lingo-syn.txt, txt-format: KeyValue, separator: '=', def-wc: y }
|
53
|
+
sys-mul: { name: de/lingo-mul.txt, txt-format: SingleWord, use-lex: sys-dic, def-wc: m }
|
57
54
|
|
58
|
-
#
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
tst-
|
63
|
-
tst-
|
64
|
-
tst-
|
65
|
-
tst-
|
66
|
-
tst-sgw: { name: de/
|
55
|
+
# User dictionaries
|
56
|
+
usr-dic: { name: de/user-dic.txt, txt-format: WordClass, separator: '=' }
|
57
|
+
|
58
|
+
# Test dictionaries
|
59
|
+
tst-dic: { name: de/test_dic.txt, txt-format: WordClass } # TEST: Lesen von zwei Quellen
|
60
|
+
tst-syn: { name: de/test_syn.txt, txt-format: MultiValue, def-wc: y } # TEST: Mehrere Datenquellen
|
61
|
+
tst-mul: { name: de/test_mul.txt, use-lex: sys-dic, def-wc: m } # TEST: Mehrere Multiwörterbücher
|
62
|
+
tst-mu2: { name: de/test_mu2.txt, use-lex: sys-dic, def-wc: m } # TEST: Mehrere Multiwörterbücher
|
63
|
+
tst-sgw: { name: de/test_sgw.txt, txt-format: SingleWord } # TEST: SingleWord-Format
|
64
|
+
tst-gen: { name: de/test_gen.txt, txt-format: WordClass } # TEST: Genus
|
67
65
|
|
68
66
|
compound:
|
69
|
-
min-word-size:
|
70
|
-
min-part-size:
|
71
|
-
max-parts:
|
72
|
-
min-avg-part-size:
|
73
|
-
append-wordclass:
|
67
|
+
min-word-size: '7'
|
68
|
+
min-part-size: '3'
|
69
|
+
max-parts: '5'
|
70
|
+
min-avg-part-size: '4'
|
71
|
+
append-wordclass: '+'
|
74
72
|
skip-sequences: [ xx ]
|
75
73
|
|
76
74
|
suffix:
|
77
|
-
#
|
78
|
-
#
|
79
|
-
#
|
80
|
-
- [s,
|
81
|
-
- [a,
|
82
|
-
- [v,
|
83
|
-
- [e,
|
84
|
-
- [f,
|
75
|
+
# Suffixliste, Stand: 30-06-2005
|
76
|
+
# Suffixklasse: s = Substantiv, a = Adjektiv, v = Verb, e = Eigenwort, f = Fugung
|
77
|
+
# Suffixe je Klasse: "<suffix>['/'<ersetzung>][ <suffix>['/'<ersetzung>]]"
|
78
|
+
- [s, 'e en er ern es n s se sen ses']
|
79
|
+
- [a, 'este ste ster sten stes ester estes esten e em en er ere eren erer eres es erem']
|
80
|
+
- [v, 'e/en en/en est/en et/en st/en t/en te/en ten/en eten/en ete/en etest/en s']
|
81
|
+
- [e, 's']
|
82
|
+
- [f, 's n e en es er ch/che /en']
|
83
|
+
|
84
|
+
inflect:
|
85
|
+
a: # adjectives
|
86
|
+
f: e # feminine
|
87
|
+
m: er # masculine
|
88
|
+
n: es # neuter
|
89
|
+
p: e # plurale tantum
|
85
90
|
|
86
91
|
attendees:
|
87
92
|
variator:
|
88
93
|
variations:
|
89
|
-
- [
|
90
|
-
- [
|
91
|
-
- [
|
92
|
-
- [
|
93
|
-
- [
|
94
|
-
- [
|
95
|
-
- [
|
96
|
-
- [
|
97
|
-
- [
|
98
|
-
- [
|
99
|
-
- [
|
100
|
-
- [
|
101
|
-
- [
|
102
|
-
- [
|
103
|
-
- [
|
104
|
-
- [
|
105
|
-
- [
|
106
|
-
- [
|
107
|
-
- [
|
94
|
+
- [ieh, sch]
|
95
|
+
- [fec, see]
|
96
|
+
- [it, st ]
|
97
|
+
- [fch, sch]
|
98
|
+
- [fp, sp ]
|
99
|
+
- [f, s ]
|
100
|
+
- [c, e ]
|
101
|
+
- [ffc, sse]
|
102
|
+
- [ff, ss ]
|
103
|
+
- [e, c ]
|
104
|
+
- [ni, m ]
|
105
|
+
- [feh, sch]
|
106
|
+
- [lt, st ]
|
107
|
+
- [il, st ]
|
108
|
+
- [ftc, ste]
|
109
|
+
- [ft, st ]
|
110
|
+
- [fl, st ]
|
111
|
+
- [li, h ]
|
112
|
+
- [i, s ]
|
108
113
|
|
109
114
|
sequencer:
|
110
|
-
sequences: [ [AS,
|
115
|
+
sequences: [ [AS, '2, 1'], [AK, '2, 1'], [AAK, '3, 1 2'], [AAS, '3, 1 2'] ]
|
116
|
+
|
117
|
+
# tokenizer rules:
|
118
|
+
# SPAC = \s+
|
119
|
+
# NUMS = [+-]?(?:\d{4,}|\d{1,3}(?:\.\d{3,3})*)(?:\.|(?:,\d+)?%?)
|
120
|
+
# URLS = (?:www\.|mailto:|(?:news|https?|ftps?)://|\S+?[._]\S+?@\S+?\.)\S+
|
121
|
+
# ABRV = (?:(?:(?:#{CHAR})+\.)+)(?:#{CHAR})+
|
122
|
+
# WORD = (?:#{CHAR}|#{DIGIT}|-)+
|
123
|
+
# PUNC = [!,.:;?¡¿]
|
124
|
+
# OTHR = ["$#%&'()*+\-/<=>@\[\\\]^_{|}~¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾×÷]
|
125
|
+
# HELP = \S*
|