lingo 1.8.0 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +13 -0
- data/README +49 -29
- data/Rakefile +28 -4
- data/TODO +2 -9
- data/bin/lingo +24 -0
- data/bin/lingoctl +24 -0
- data/de/lingo-dic.txt +559 -74
- data/info/gpl-hdr.txt +21 -24
- data/lib/lingo.rb +83 -112
- data/lib/lingo/agenda_item.rb +53 -0
- data/lib/lingo/attendee.rb +261 -0
- data/lib/lingo/attendee/abbreviator.rb +95 -97
- data/lib/lingo/attendee/debugger.rb +94 -93
- data/lib/lingo/attendee/decomposer.rb +76 -83
- data/lib/lingo/attendee/dehyphenizer.rb +141 -144
- data/lib/lingo/attendee/formatter.rb +65 -0
- data/lib/lingo/attendee/multi_worder.rb +302 -0
- data/lib/lingo/attendee/noneword_filter.rb +89 -84
- data/lib/lingo/attendee/object_filter.rb +91 -0
- data/lib/lingo/attendee/sequencer.rb +159 -158
- data/lib/lingo/attendee/synonymer.rb +81 -84
- data/lib/lingo/attendee/text_reader.rb +242 -0
- data/lib/lingo/attendee/text_writer.rb +169 -0
- data/lib/lingo/attendee/tokenizer.rb +192 -191
- data/lib/lingo/attendee/variator.rb +152 -156
- data/lib/lingo/attendee/vector_filter.rb +140 -135
- data/lib/lingo/attendee/word_searcher.rb +98 -0
- data/lib/lingo/buffered_attendee.rb +69 -0
- data/lib/lingo/cachable.rb +58 -0
- data/lib/lingo/call.rb +72 -0
- data/lib/lingo/cli.rb +26 -0
- data/lib/lingo/config.rb +23 -26
- data/lib/lingo/core_ext.rb +42 -0
- data/lib/lingo/ctl.rb +239 -173
- data/lib/lingo/database.rb +148 -496
- data/lib/lingo/database/crypter.rb +85 -0
- data/lib/lingo/database/gdbm_store.rb +49 -0
- data/lib/lingo/database/hash_store.rb +67 -0
- data/lib/lingo/database/libcdb_store.rb +58 -0
- data/lib/lingo/database/sdbm_store.rb +64 -0
- data/lib/lingo/database/show_progress.rb +81 -0
- data/lib/lingo/database/source.rb +134 -0
- data/lib/lingo/database/source/key_value.rb +62 -0
- data/lib/lingo/database/source/multi_key.rb +65 -0
- data/lib/lingo/database/source/multi_value.rb +65 -0
- data/lib/lingo/database/source/single_word.rb +60 -0
- data/lib/lingo/database/source/word_class.rb +64 -0
- data/lib/lingo/error.rb +122 -0
- data/lib/lingo/language.rb +78 -518
- data/lib/lingo/language/dictionary.rb +173 -0
- data/lib/lingo/language/grammar.rb +211 -0
- data/lib/lingo/language/lexical.rb +66 -0
- data/lib/lingo/language/lexical_hash.rb +88 -0
- data/lib/lingo/language/token.rb +48 -0
- data/lib/lingo/language/word.rb +130 -0
- data/lib/lingo/language/word_form.rb +83 -0
- data/lib/lingo/reportable.rb +59 -0
- data/lib/lingo/version.rb +1 -1
- data/lingo-all.cfg +14 -10
- data/lingo-call.cfg +5 -5
- data/lingo.cfg +14 -12
- data/lingo.rb +26 -0
- data/lir.cfg +13 -9
- data/spec/spec_helper.rb +1 -0
- data/test.cfg +11 -11
- data/test/attendee/ts_abbreviator.rb +0 -6
- data/test/attendee/ts_decomposer.rb +0 -6
- data/test/attendee/{ts_multiworder.rb → ts_multi_worder.rb} +1 -7
- data/test/attendee/ts_noneword_filter.rb +1 -7
- data/test/attendee/{ts_objectfilter.rb → ts_object_filter.rb} +1 -7
- data/test/attendee/ts_sequencer.rb +0 -6
- data/test/attendee/ts_synonymer.rb +0 -6
- data/test/attendee/{ts_textreader.rb → ts_text_reader.rb} +1 -7
- data/test/attendee/{ts_textwriter.rb → ts_text_writer.rb} +1 -7
- data/test/attendee/ts_tokenizer.rb +0 -6
- data/test/attendee/ts_variator.rb +0 -6
- data/test/attendee/ts_vector_filter.rb +1 -7
- data/test/attendee/{ts_wordsearcher.rb → ts_word_searcher.rb} +1 -7
- data/test/ref/artikel.non +2 -29
- data/test/ref/artikel.seq +13 -8
- data/test/ref/artikel.vec +30 -15
- data/test/ref/artikel.ven +29 -14
- data/test/ref/artikel.ver +58 -43
- data/test/ref/lir.csv +146 -145
- data/test/ref/lir.non +186 -210
- data/test/ref/lir.seq +54 -50
- data/test/test_helper.rb +41 -36
- data/test/ts_database.rb +12 -11
- data/test/ts_language.rb +118 -68
- metadata +67 -29
- data/lib/lingo/attendee/multiworder.rb +0 -301
- data/lib/lingo/attendee/objectfilter.rb +0 -86
- data/lib/lingo/attendee/textreader.rb +0 -237
- data/lib/lingo/attendee/textwriter.rb +0 -196
- data/lib/lingo/attendee/wordsearcher.rb +0 -96
- data/lib/lingo/attendees.rb +0 -289
- data/lib/lingo/const.rb +0 -131
- data/lib/lingo/modules.rb +0 -98
- data/lib/lingo/types.rb +0 -285
- data/lib/lingo/utilities.rb +0 -40
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
#--
|
|
4
|
+
###############################################################################
|
|
5
|
+
# #
|
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
|
7
|
+
# #
|
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
|
10
|
+
# #
|
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
|
14
|
+
# any later version. #
|
|
15
|
+
# #
|
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
|
19
|
+
# more details. #
|
|
20
|
+
# #
|
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
|
23
|
+
# #
|
|
24
|
+
###############################################################################
|
|
25
|
+
#++
|
|
26
|
+
|
|
27
|
+
class Lingo
|
|
28
|
+
|
|
29
|
+
module Language
|
|
30
|
+
|
|
31
|
+
# Die Klasse LexicalHash ermöglicht den Zugriff auf die Lingodatenbanken. Im Gegensatz zur
|
|
32
|
+
# Klasse Database, welche nur Strings als Ergebnis zurück gibt, wird hier als Ergebnis ein
|
|
33
|
+
# Array von Lexical-Objekten zurück gegeben.
|
|
34
|
+
|
|
35
|
+
class LexicalHash
|
|
36
|
+
|
|
37
|
+
include Cachable
|
|
38
|
+
include Reportable
|
|
39
|
+
|
|
40
|
+
def initialize(id, lingo)
|
|
41
|
+
init_cachable
|
|
42
|
+
init_reportable(id)
|
|
43
|
+
|
|
44
|
+
@wc = lingo.database_config(id).fetch('def-wc', LA_UNKNOWN)
|
|
45
|
+
@src = Database.open(id, lingo)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def close
|
|
49
|
+
@src.close
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def [](key)
|
|
53
|
+
inc('total requests')
|
|
54
|
+
key = key.downcase
|
|
55
|
+
|
|
56
|
+
if hit?(key)
|
|
57
|
+
inc('cache hits')
|
|
58
|
+
return retrieve(key)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
inc('source reads')
|
|
62
|
+
|
|
63
|
+
if record = @src[key]
|
|
64
|
+
record = record.map { |str|
|
|
65
|
+
case str
|
|
66
|
+
when /^\*\d+$/ then str
|
|
67
|
+
when /^#(.)$/ then Lexical.new(key, $1)
|
|
68
|
+
when /^([^#]+?)\s*#(.)$/ then Lexical.new($1, $2)
|
|
69
|
+
when /^([^#]+)$/ then Lexical.new($1, @wc)
|
|
70
|
+
else str
|
|
71
|
+
end
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
record.compact!
|
|
75
|
+
record.sort!
|
|
76
|
+
record.uniq!
|
|
77
|
+
|
|
78
|
+
inc('data found')
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
store(key, record)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
#--
|
|
4
|
+
###############################################################################
|
|
5
|
+
# #
|
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
|
7
|
+
# #
|
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
|
10
|
+
# #
|
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
|
14
|
+
# any later version. #
|
|
15
|
+
# #
|
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
|
19
|
+
# more details. #
|
|
20
|
+
# #
|
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
|
23
|
+
# #
|
|
24
|
+
###############################################################################
|
|
25
|
+
#++
|
|
26
|
+
|
|
27
|
+
class Lingo
|
|
28
|
+
|
|
29
|
+
module Language
|
|
30
|
+
|
|
31
|
+
# Die Klasse Token, abgeleitet von der Klasse WordForm, stellt den Container
|
|
32
|
+
# für ein einzelnes Wort eines Textes dar. Das Wort wird mit einem Attribut versehen,
|
|
33
|
+
# welches der Regel entspricht, die dieses Wort identifiziert hat.
|
|
34
|
+
#
|
|
35
|
+
# Steht z.B. in ruby.cfg eine Regel zur Erkennung einer Zahl, die mit NUM bezeichnet wird,
|
|
36
|
+
# so wird dies dem Token angeheftet, z.B. Token.new('100', 'NUM') -> #100/NUM#
|
|
37
|
+
|
|
38
|
+
class Token < WordForm
|
|
39
|
+
|
|
40
|
+
def to_s
|
|
41
|
+
":#{super}:"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
#--
|
|
4
|
+
###############################################################################
|
|
5
|
+
# #
|
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
|
7
|
+
# #
|
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
|
10
|
+
# #
|
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
|
14
|
+
# any later version. #
|
|
15
|
+
# #
|
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
|
19
|
+
# more details. #
|
|
20
|
+
# #
|
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
|
23
|
+
# #
|
|
24
|
+
###############################################################################
|
|
25
|
+
#++
|
|
26
|
+
|
|
27
|
+
class Lingo
|
|
28
|
+
|
|
29
|
+
module Language
|
|
30
|
+
|
|
31
|
+
# Die Klasse Word bündelt spezifische Eigenschaften eines Wortes mit den
|
|
32
|
+
# dazu notwendigen Methoden.
|
|
33
|
+
|
|
34
|
+
class Word < WordForm
|
|
35
|
+
|
|
36
|
+
def self.new_lexical(form, attr, lex_attr)
|
|
37
|
+
new(form, attr) << Lexical.new(form, lex_attr)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Exakte Representation der originären Zeichenkette, so wie sie im Satz
|
|
41
|
+
# gefunden wurde, z.B. <tt>form = "RubyLing"</tt>
|
|
42
|
+
#
|
|
43
|
+
# Ergebnis der Wörterbuch-Suche. Sie stellt die Grundform des Wortes dar.
|
|
44
|
+
# Dabei kann es mehrere mögliche Grundformen geben, z.B. kann +abgeschoben+
|
|
45
|
+
# als Grundform das _Adjektiv_ +abgeschoben+ sein, oder aber das _Verb_
|
|
46
|
+
# +abschieben+.
|
|
47
|
+
#
|
|
48
|
+
# <tt>lemma = [['abgeschoben', '#a'], ['abschieben', '#v']]</tt>.
|
|
49
|
+
#
|
|
50
|
+
# <b>Achtung: Lemma wird nicht durch die Word-Klasse bestückt, sondern extern
|
|
51
|
+
# durch die Klasse Dictionary</b>
|
|
52
|
+
|
|
53
|
+
def initialize(form, attr = WA_UNSET)
|
|
54
|
+
super
|
|
55
|
+
@lexicals = []
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def lexicals(compound_parts = true)
|
|
59
|
+
if !compound_parts && attr == WA_KOMPOSITUM
|
|
60
|
+
@lexicals.select { |lex| lex.attr == LA_KOMPOSITUM }
|
|
61
|
+
else
|
|
62
|
+
@lexicals
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def lexicals=(lexis)
|
|
67
|
+
if lexis.is_a?(Array)
|
|
68
|
+
@lexicals = lexis.sort.uniq
|
|
69
|
+
else
|
|
70
|
+
raise TypeError, "wrong argument type #{lexis.class} (expected Array)"
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def attrs(compound_parts = true)
|
|
75
|
+
lexicals(compound_parts).map { |lex| lex.attr }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def parts
|
|
79
|
+
1
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def min_part_size
|
|
83
|
+
form.length
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Gibt genau die Grundform der Wortklasse zurück, die der RegExp des Übergabe-Parameters
|
|
87
|
+
# entspricht, z.B. <tt>word.get_wc(/a/) = ['abgeschoben', '#a']</tt>
|
|
88
|
+
def get_class(wc_re)
|
|
89
|
+
wc_re = Regexp.new(wc_re) unless wc_re.is_a?(Regexp)
|
|
90
|
+
|
|
91
|
+
unless lexicals.empty?
|
|
92
|
+
lexicals.select { |lex| lex.attr =~ wc_re }
|
|
93
|
+
else
|
|
94
|
+
attr =~ wc_re ? [self] : []
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def norm
|
|
99
|
+
identified? ? lexicals.first.form : form
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def compo_form
|
|
103
|
+
if attr == WA_KOMPOSITUM
|
|
104
|
+
get_class(LA_KOMPOSITUM).first
|
|
105
|
+
else
|
|
106
|
+
nil
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def <<(*other)
|
|
111
|
+
lexicals.concat(other.flatten)
|
|
112
|
+
self
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def <=>(other)
|
|
116
|
+
other.nil? ? 1 : to_a.push(lexicals) <=> other.to_a.push(other.lexicals)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def to_s
|
|
120
|
+
s = "<#{form}"
|
|
121
|
+
s << "|#{attr}" unless identified?
|
|
122
|
+
s << " = #{lexicals.inspect}" unless lexicals.empty?
|
|
123
|
+
s << '>'
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
#--
|
|
4
|
+
###############################################################################
|
|
5
|
+
# #
|
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
|
7
|
+
# #
|
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
|
10
|
+
# #
|
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
|
14
|
+
# any later version. #
|
|
15
|
+
# #
|
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
|
19
|
+
# more details. #
|
|
20
|
+
# #
|
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
|
23
|
+
# #
|
|
24
|
+
###############################################################################
|
|
25
|
+
#++
|
|
26
|
+
|
|
27
|
+
class Lingo
|
|
28
|
+
|
|
29
|
+
module Language
|
|
30
|
+
|
|
31
|
+
# Die Klasse WordForm ist die Basisklasse für weitere Klassen, die im Rahmen der
|
|
32
|
+
# Objektstruktur eines Wortes benötigt werden. Die Klasse stellt eine Zeichenkette bereit,
|
|
33
|
+
# die mit einem Attribut versehen werden kann.
|
|
34
|
+
|
|
35
|
+
class WordForm
|
|
36
|
+
|
|
37
|
+
include Comparable
|
|
38
|
+
|
|
39
|
+
attr_accessor :form, :attr
|
|
40
|
+
|
|
41
|
+
def initialize(form, attr = '-')
|
|
42
|
+
@form, @attr = form || '', attr || ''
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def unknown?
|
|
46
|
+
[WA_UNKNOWN, WA_UNKMULPART].include?(attr)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def identified?
|
|
50
|
+
attr == WA_IDENTIFIED
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def <=>(other)
|
|
54
|
+
other.nil? ? 1 : to_a <=> other.to_a
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def to_a
|
|
58
|
+
[form, attr]
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def to_s
|
|
62
|
+
to_a.join('/')
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def inspect
|
|
66
|
+
to_s
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def hash
|
|
70
|
+
to_s.hash
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def eql?(other)
|
|
74
|
+
self.class.equal?(other.class) && to_s == other.to_s
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
alias_method :==, :eql?
|
|
78
|
+
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
#--
|
|
4
|
+
###############################################################################
|
|
5
|
+
# #
|
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
|
7
|
+
# #
|
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
|
10
|
+
# #
|
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
|
14
|
+
# any later version. #
|
|
15
|
+
# #
|
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
|
19
|
+
# more details. #
|
|
20
|
+
# #
|
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
|
23
|
+
# #
|
|
24
|
+
###############################################################################
|
|
25
|
+
#++
|
|
26
|
+
|
|
27
|
+
class Lingo
|
|
28
|
+
|
|
29
|
+
# Provides counters.
|
|
30
|
+
|
|
31
|
+
module Reportable
|
|
32
|
+
|
|
33
|
+
def init_reportable(prefix = nil)
|
|
34
|
+
@counters, @prefix = Hash.new(0), prefix ? "#{prefix}: " : ''
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def inc(counter)
|
|
38
|
+
@counters[counter] += 1
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def add(counter, value)
|
|
42
|
+
@counters[counter] += value
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def set(counter, value)
|
|
46
|
+
@counters[counter] = value
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def get(counter)
|
|
50
|
+
@counters[counter]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def report
|
|
54
|
+
@counters.each_with_object({}) { |(k, v), r| r["#{@prefix}#{k}"] = v }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
end
|
data/lib/lingo/version.rb
CHANGED
data/lingo-all.cfg
CHANGED
|
@@ -11,7 +11,7 @@ meeting:
|
|
|
11
11
|
#
|
|
12
12
|
|
|
13
13
|
# Angegebene Datei zeilenweise einlesen und verarbeitet
|
|
14
|
-
-
|
|
14
|
+
- text_reader: { files: '$(files)' }
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
########################################
|
|
@@ -25,7 +25,7 @@ meeting:
|
|
|
25
25
|
# - abbreviator: { source: 'sys-abk' }
|
|
26
26
|
|
|
27
27
|
# Verbleibende Token im Wörterbuch suchen
|
|
28
|
-
-
|
|
28
|
+
- word_searcher: { source: 'sys-dic', mode: 'first' }
|
|
29
29
|
|
|
30
30
|
# Schreibweisen variieren und erneut suchen
|
|
31
31
|
# - variator: { source: 'sys-dic' }
|
|
@@ -37,7 +37,7 @@ meeting:
|
|
|
37
37
|
# - decomposer: { source: 'sys-dic' }
|
|
38
38
|
|
|
39
39
|
# Mehrwortgruppen im Strom erkennen
|
|
40
|
-
# -
|
|
40
|
+
# - multi_worder: { stopper: 'PUNC,OTHR', source: 'sys-mul' }
|
|
41
41
|
|
|
42
42
|
# Wortsequenzen anhand von Regeln identifizieren
|
|
43
43
|
# - sequencer: { stopper: 'PUNC,OTHR' }
|
|
@@ -56,30 +56,34 @@ meeting:
|
|
|
56
56
|
# Ergebnisse ausgeben
|
|
57
57
|
#
|
|
58
58
|
|
|
59
|
+
# Erstelle Datei mit Endung .log für Datenstrom
|
|
60
|
+
# - vector_filter: { in: syn, debug: 'true', prompt: 'lex:) ' }
|
|
61
|
+
# - text_writer: { ext: log, sep: "\n" }
|
|
62
|
+
|
|
59
63
|
# Erstelle Datei mit Endung .non für nicht erkannte Wörter
|
|
60
64
|
# - noneword_filter: { in: syn }
|
|
61
|
-
# -
|
|
65
|
+
# - text_writer: { ext: non, sep: "\n" }
|
|
62
66
|
|
|
63
67
|
# Erstelle Datei mit Endung .vec für erkannte Indexterme
|
|
64
68
|
# - vector_filter: { in: syn, lexicals: '^[ksavem]$' }
|
|
65
|
-
# -
|
|
69
|
+
# - text_writer: { ext: vec, sep: "\n" }
|
|
66
70
|
|
|
67
71
|
# Erstelle Datei mit Endung .ven für erkannte Indexterme mit absoluter Häufigkeit
|
|
68
72
|
# - vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: 'term_abs' }
|
|
69
|
-
# -
|
|
73
|
+
# - text_writer: { ext: ven, sep: "\n" }
|
|
70
74
|
|
|
71
75
|
# Erstelle Datei mit Endung .ver für erkannte Indexterme mit relativer Häufigkeit
|
|
72
76
|
# - vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: 'term_rel' }
|
|
73
|
-
# -
|
|
77
|
+
# - text_writer: { ext: ver, sep: "\n" }
|
|
74
78
|
|
|
75
79
|
# Erstelle Datei mit Endung .mul für erkannte Mehrwortgruppen
|
|
76
80
|
# - vector_filter: { in: syn, lexicals: m }
|
|
77
|
-
# -
|
|
81
|
+
# - text_writer: { ext: mul, sep: "\n" }
|
|
78
82
|
|
|
79
83
|
# Erstelle Datei mit Endung .seq für erkannte Wortsequenzen
|
|
80
84
|
# - vector_filter: { in: syn, lexicals: q, sort: 'term_abs' }
|
|
81
|
-
# -
|
|
85
|
+
# - text_writer: { ext: seq, sep: "\n" }
|
|
82
86
|
|
|
83
87
|
# Erstelle Datei mit Endung .syn für erkannte Synonyme
|
|
84
88
|
# - vector_filter: { in: syn, lexicals: y, sort: 'term_abs' }
|
|
85
|
-
# -
|
|
89
|
+
# - text_writer: { ext: syn, sep: "\n" }
|