lingo 1.8.4.2 → 1.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +413 -325
- data/README +380 -131
- data/Rakefile +19 -21
- data/de/lingo-abk.txt +15 -17
- data/de/lingo-dic.txt +20210 -20659
- data/de/lingo-mul.txt +5 -13
- data/de/lingo-syn.txt +5 -8
- data/de/test_dic.txt +2 -0
- data/de/test_gen.txt +8 -0
- data/de/{test_mul2.txt → test_mu2.txt} +0 -0
- data/de/{test_singleword.txt → test_sgw.txt} +0 -0
- data/de/user-dic.txt +5 -7
- data/de.lang +64 -49
- data/en/lingo-dic.txt +6398 -6404
- data/en/lingo-irr.txt +2 -3
- data/en/lingo-mul.txt +6 -7
- data/en/lingo-wdn.txt +881 -1762
- data/en/user-dic.txt +2 -5
- data/en.lang +39 -39
- data/lib/lingo/app.rb +10 -6
- data/lib/lingo/attendee/abbreviator.rb +1 -0
- data/lib/lingo/attendee/decomposer.rb +2 -1
- data/lib/lingo/attendee/multi_worder.rb +5 -6
- data/lib/lingo/attendee/stemmer.rb +1 -1
- data/lib/lingo/attendee/synonymer.rb +4 -2
- data/lib/lingo/attendee/text_reader.rb +77 -57
- data/lib/lingo/attendee/text_writer.rb +1 -1
- data/lib/lingo/attendee/tokenizer.rb +101 -50
- data/lib/lingo/attendee/variator.rb +2 -1
- data/lib/lingo/attendee/vector_filter.rb +28 -6
- data/lib/lingo/attendee/word_searcher.rb +2 -1
- data/lib/lingo/attendee.rb +8 -4
- data/lib/lingo/call.rb +7 -3
- data/lib/lingo/cli.rb +8 -16
- data/lib/lingo/config.rb +11 -6
- data/lib/lingo/ctl.rb +54 -3
- data/lib/lingo/database/crypter.rb +8 -14
- data/lib/lingo/database/hash_store.rb +1 -1
- data/lib/lingo/database/{show_progress.rb → progress.rb} +7 -8
- data/lib/lingo/database/source/key_value.rb +6 -5
- data/lib/lingo/database/source/multi_key.rb +5 -2
- data/lib/lingo/database/source/multi_value.rb +6 -4
- data/lib/lingo/database/source/single_word.rb +2 -3
- data/lib/lingo/database/source/word_class.rb +24 -5
- data/lib/lingo/database/source.rb +5 -3
- data/lib/lingo/database.rb +102 -41
- data/lib/lingo/error.rb +24 -2
- data/lib/lingo/language/dictionary.rb +26 -54
- data/lib/lingo/language/grammar.rb +19 -23
- data/lib/lingo/language/lexical.rb +5 -1
- data/lib/lingo/language/lexical_hash.rb +7 -12
- data/lib/lingo/language/token.rb +10 -1
- data/lib/lingo/language/word.rb +35 -23
- data/lib/lingo/language/word_form.rb +5 -4
- data/lib/lingo/{show_progress.rb → progress.rb} +43 -30
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/srv/public/.gitkeep +0 -0
- data/lib/lingo/srv.rb +11 -6
- data/lib/lingo/version.rb +2 -2
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/lib/lingo/web/views/index.erb +4 -4
- data/lib/lingo/web.rb +4 -6
- data/lib/lingo.rb +4 -12
- data/lingo.cfg +1 -1
- data/lir.cfg +1 -1
- data/ru/lingo-dic.txt +33473 -2113
- data/ru/lingo-mul.txt +8430 -1913
- data/ru/lingo-syn.txt +1634 -0
- data/ru/user-dic.txt +6 -0
- data/ru.lang +49 -47
- data/spec/spec_helper.rb +4 -0
- data/test/attendee/ts_decomposer.rb +2 -2
- data/test/attendee/ts_synonymer.rb +3 -3
- data/test/attendee/ts_tokenizer.rb +215 -2
- data/test/attendee/ts_variator.rb +2 -2
- data/test/attendee/ts_word_searcher.rb +10 -6
- data/test/ref/artikel.seq +2 -2
- data/test/ref/artikel.vec +5 -5
- data/test/ref/artikel.ven +11 -11
- data/test/ref/artikel.ver +11 -11
- data/test/ref/lir.seq +13 -13
- data/test/ref/lir.vec +31 -31
- data/test/test_helper.rb +19 -5
- data/test/ts_database.rb +206 -77
- data/test/ts_language.rb +86 -26
- metadata +93 -49
- data/.rspec +0 -1
- data/de/test_syn2.txt +0 -1
data/de/lingo-mul.txt
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
+
# Mehrwort-Wörterbuch lingo-mul.txt
|
2
|
+
# enthält als Basis für eigene Erweiterungen
|
3
|
+
# exemplarisch Mehrwortbegriffe, die mit "a" beginnen
|
1
4
|
#
|
2
|
-
#
|
3
|
-
|
4
|
-
# exemplarisch Mehrwortbegriffe, die mit "a" beginnen
|
5
|
-
#
|
6
|
-
# Stand: 18.10.05 / Klaus Lepsky
|
7
|
-
#
|
8
|
-
#
|
5
|
+
# Stand: 18.10.05 / Klaus Lepsky
|
6
|
+
|
9
7
|
albert einstein
|
10
8
|
albert ainshtain
|
11
9
|
einstein, albert
|
@@ -13,12 +11,6 @@ a data query language
|
|
13
11
|
a fresco
|
14
12
|
a part
|
15
13
|
a priori
|
16
|
-
abelsche transformation
|
17
|
-
abelsche umformung
|
18
|
-
a data query language
|
19
|
-
a fresco
|
20
|
-
a part
|
21
|
-
a priori
|
22
14
|
a programming language
|
23
15
|
a- 3-struktur
|
24
16
|
a- 4-struktur
|
data/de/lingo-syn.txt
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
+
# Synonym-Wörterbuch lingo-syn.txt
|
2
|
+
# enthält als Basis für eigene Erweiterungen
|
3
|
+
# exemplarisch Synonyme, die mit "a" beginnen
|
1
4
|
#
|
2
|
-
#
|
3
|
-
|
4
|
-
# exemplarisch Synonyme, die mit "a" beginnen
|
5
|
-
#
|
6
|
-
# Stand: 16.03.05 / Klaus Lepsky
|
7
|
-
#
|
8
|
-
#
|
5
|
+
# Stand: 16.03.05 / Klaus Lepsky
|
6
|
+
|
9
7
|
aachener kongress=aachen / kongress <1818>
|
10
8
|
aal=flussaal
|
11
9
|
aalartige fische=aalfische
|
@@ -5405,7 +5403,6 @@ alter=betagter
|
|
5405
5403
|
alter <100 jahre>=hundertjähriger
|
5406
5404
|
alter druck=altes buch
|
5407
5405
|
alter ego=alter ego
|
5408
|
-
alter ego=alter ego
|
5409
5406
|
alter herr=philister <studentenverbindung>
|
5410
5407
|
alter mensch=alter
|
5411
5408
|
alter stil / klassisches chinesisch=guwen
|
data/de/test_dic.txt
CHANGED
data/de/test_gen.txt
ADDED
File without changes
|
File without changes
|
data/de/user-dic.txt
CHANGED
@@ -1,10 +1,8 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
4
|
-
#
|
5
|
-
#
|
1
|
+
# Musterwörterbuch user-dic.txt als Vorlage
|
2
|
+
# für eigene Benutzerwörterbücher
|
3
|
+
|
4
|
+
bibliografie=bibliografie #s
|
5
|
+
bibliographie=bibliografie #s
|
6
6
|
klassifikation=klassifikation #s
|
7
7
|
thesaurus=thesaurus #s
|
8
8
|
verschlagwortung=verschlagwortung #s
|
9
|
-
bibliografie=bibliografie #s
|
10
|
-
bibliographie=bibliografie #s
|
data/de.lang
CHANGED
@@ -40,71 +40,86 @@
|
|
40
40
|
# definiert sein, da es sonst noch nicht existiert!
|
41
41
|
#
|
42
42
|
|
43
|
-
|
44
|
-
---
|
43
|
+
---
|
45
44
|
language:
|
46
45
|
name: 'Deutsch'
|
47
46
|
|
48
47
|
dictionary:
|
49
48
|
databases:
|
50
|
-
#
|
49
|
+
# System dictionaries
|
51
50
|
sys-dic: { name: de/lingo-dic.txt, txt-format: WordClass, separator: '=' }
|
52
51
|
sys-abk: { name: de/lingo-abk.txt, txt-format: WordClass, separator: '=' }
|
53
|
-
sys-syn: { name: de/lingo-syn.txt, txt-format: KeyValue,
|
54
|
-
sys-mul: { name: de/lingo-mul.txt, txt-format: SingleWord, use-lex:
|
55
|
-
# Benutzerwörterbücher
|
56
|
-
usr-dic: { name: de/user-dic.txt, txt-format: WordClass, separator: '=' }
|
52
|
+
sys-syn: { name: de/lingo-syn.txt, txt-format: KeyValue, separator: '=', def-wc: y }
|
53
|
+
sys-mul: { name: de/lingo-mul.txt, txt-format: SingleWord, use-lex: sys-dic, def-wc: m }
|
57
54
|
|
58
|
-
#
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
tst-
|
63
|
-
tst-
|
64
|
-
tst-
|
65
|
-
tst-
|
66
|
-
tst-sgw: { name: de/
|
55
|
+
# User dictionaries
|
56
|
+
usr-dic: { name: de/user-dic.txt, txt-format: WordClass, separator: '=' }
|
57
|
+
|
58
|
+
# Test dictionaries
|
59
|
+
tst-dic: { name: de/test_dic.txt, txt-format: WordClass } # TEST: Lesen von zwei Quellen
|
60
|
+
tst-syn: { name: de/test_syn.txt, txt-format: MultiValue, def-wc: y } # TEST: Mehrere Datenquellen
|
61
|
+
tst-mul: { name: de/test_mul.txt, use-lex: sys-dic, def-wc: m } # TEST: Mehrere Multiwörterbücher
|
62
|
+
tst-mu2: { name: de/test_mu2.txt, use-lex: sys-dic, def-wc: m } # TEST: Mehrere Multiwörterbücher
|
63
|
+
tst-sgw: { name: de/test_sgw.txt, txt-format: SingleWord } # TEST: SingleWord-Format
|
64
|
+
tst-gen: { name: de/test_gen.txt, txt-format: WordClass } # TEST: Genus
|
67
65
|
|
68
66
|
compound:
|
69
|
-
min-word-size:
|
70
|
-
min-part-size:
|
71
|
-
max-parts:
|
72
|
-
min-avg-part-size:
|
73
|
-
append-wordclass:
|
67
|
+
min-word-size: '7'
|
68
|
+
min-part-size: '3'
|
69
|
+
max-parts: '5'
|
70
|
+
min-avg-part-size: '4'
|
71
|
+
append-wordclass: '+'
|
74
72
|
skip-sequences: [ xx ]
|
75
73
|
|
76
74
|
suffix:
|
77
|
-
#
|
78
|
-
#
|
79
|
-
#
|
80
|
-
- [s,
|
81
|
-
- [a,
|
82
|
-
- [v,
|
83
|
-
- [e,
|
84
|
-
- [f,
|
75
|
+
# Suffixliste, Stand: 30-06-2005
|
76
|
+
# Suffixklasse: s = Substantiv, a = Adjektiv, v = Verb, e = Eigenwort, f = Fugung
|
77
|
+
# Suffixe je Klasse: "<suffix>['/'<ersetzung>][ <suffix>['/'<ersetzung>]]"
|
78
|
+
- [s, 'e en er ern es n s se sen ses']
|
79
|
+
- [a, 'este ste ster sten stes ester estes esten e em en er ere eren erer eres es erem']
|
80
|
+
- [v, 'e/en en/en est/en et/en st/en t/en te/en ten/en eten/en ete/en etest/en s']
|
81
|
+
- [e, 's']
|
82
|
+
- [f, 's n e en es er ch/che /en']
|
83
|
+
|
84
|
+
inflect:
|
85
|
+
a: # adjectives
|
86
|
+
f: e # feminine
|
87
|
+
m: er # masculine
|
88
|
+
n: es # neuter
|
89
|
+
p: e # plurale tantum
|
85
90
|
|
86
91
|
attendees:
|
87
92
|
variator:
|
88
93
|
variations:
|
89
|
-
- [
|
90
|
-
- [
|
91
|
-
- [
|
92
|
-
- [
|
93
|
-
- [
|
94
|
-
- [
|
95
|
-
- [
|
96
|
-
- [
|
97
|
-
- [
|
98
|
-
- [
|
99
|
-
- [
|
100
|
-
- [
|
101
|
-
- [
|
102
|
-
- [
|
103
|
-
- [
|
104
|
-
- [
|
105
|
-
- [
|
106
|
-
- [
|
107
|
-
- [
|
94
|
+
- [ieh, sch]
|
95
|
+
- [fec, see]
|
96
|
+
- [it, st ]
|
97
|
+
- [fch, sch]
|
98
|
+
- [fp, sp ]
|
99
|
+
- [f, s ]
|
100
|
+
- [c, e ]
|
101
|
+
- [ffc, sse]
|
102
|
+
- [ff, ss ]
|
103
|
+
- [e, c ]
|
104
|
+
- [ni, m ]
|
105
|
+
- [feh, sch]
|
106
|
+
- [lt, st ]
|
107
|
+
- [il, st ]
|
108
|
+
- [ftc, ste]
|
109
|
+
- [ft, st ]
|
110
|
+
- [fl, st ]
|
111
|
+
- [li, h ]
|
112
|
+
- [i, s ]
|
108
113
|
|
109
114
|
sequencer:
|
110
|
-
sequences: [ [AS,
|
115
|
+
sequences: [ [AS, '2, 1'], [AK, '2, 1'], [AAK, '3, 1 2'], [AAS, '3, 1 2'] ]
|
116
|
+
|
117
|
+
# tokenizer rules:
|
118
|
+
# SPAC = \s+
|
119
|
+
# NUMS = [+-]?(?:\d{4,}|\d{1,3}(?:\.\d{3,3})*)(?:\.|(?:,\d+)?%?)
|
120
|
+
# URLS = (?:www\.|mailto:|(?:news|https?|ftps?)://|\S+?[._]\S+?@\S+?\.)\S+
|
121
|
+
# ABRV = (?:(?:(?:#{CHAR})+\.)+)(?:#{CHAR})+
|
122
|
+
# WORD = (?:#{CHAR}|#{DIGIT}|-)+
|
123
|
+
# PUNC = [!,.:;?¡¿]
|
124
|
+
# OTHR = ["$#%&'()*+\-/<=>@\[\\\]^_{|}~¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾×÷]
|
125
|
+
# HELP = \S*
|