lingo 1.8.0 → 1.8.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +13 -0
- data/README +49 -29
- data/Rakefile +28 -4
- data/TODO +2 -9
- data/bin/lingo +24 -0
- data/bin/lingoctl +24 -0
- data/de/lingo-dic.txt +559 -74
- data/info/gpl-hdr.txt +21 -24
- data/lib/lingo.rb +83 -112
- data/lib/lingo/agenda_item.rb +53 -0
- data/lib/lingo/attendee.rb +261 -0
- data/lib/lingo/attendee/abbreviator.rb +95 -97
- data/lib/lingo/attendee/debugger.rb +94 -93
- data/lib/lingo/attendee/decomposer.rb +76 -83
- data/lib/lingo/attendee/dehyphenizer.rb +141 -144
- data/lib/lingo/attendee/formatter.rb +65 -0
- data/lib/lingo/attendee/multi_worder.rb +302 -0
- data/lib/lingo/attendee/noneword_filter.rb +89 -84
- data/lib/lingo/attendee/object_filter.rb +91 -0
- data/lib/lingo/attendee/sequencer.rb +159 -158
- data/lib/lingo/attendee/synonymer.rb +81 -84
- data/lib/lingo/attendee/text_reader.rb +242 -0
- data/lib/lingo/attendee/text_writer.rb +169 -0
- data/lib/lingo/attendee/tokenizer.rb +192 -191
- data/lib/lingo/attendee/variator.rb +152 -156
- data/lib/lingo/attendee/vector_filter.rb +140 -135
- data/lib/lingo/attendee/word_searcher.rb +98 -0
- data/lib/lingo/buffered_attendee.rb +69 -0
- data/lib/lingo/cachable.rb +58 -0
- data/lib/lingo/call.rb +72 -0
- data/lib/lingo/cli.rb +26 -0
- data/lib/lingo/config.rb +23 -26
- data/lib/lingo/core_ext.rb +42 -0
- data/lib/lingo/ctl.rb +239 -173
- data/lib/lingo/database.rb +148 -496
- data/lib/lingo/database/crypter.rb +85 -0
- data/lib/lingo/database/gdbm_store.rb +49 -0
- data/lib/lingo/database/hash_store.rb +67 -0
- data/lib/lingo/database/libcdb_store.rb +58 -0
- data/lib/lingo/database/sdbm_store.rb +64 -0
- data/lib/lingo/database/show_progress.rb +81 -0
- data/lib/lingo/database/source.rb +134 -0
- data/lib/lingo/database/source/key_value.rb +62 -0
- data/lib/lingo/database/source/multi_key.rb +65 -0
- data/lib/lingo/database/source/multi_value.rb +65 -0
- data/lib/lingo/database/source/single_word.rb +60 -0
- data/lib/lingo/database/source/word_class.rb +64 -0
- data/lib/lingo/error.rb +122 -0
- data/lib/lingo/language.rb +78 -518
- data/lib/lingo/language/dictionary.rb +173 -0
- data/lib/lingo/language/grammar.rb +211 -0
- data/lib/lingo/language/lexical.rb +66 -0
- data/lib/lingo/language/lexical_hash.rb +88 -0
- data/lib/lingo/language/token.rb +48 -0
- data/lib/lingo/language/word.rb +130 -0
- data/lib/lingo/language/word_form.rb +83 -0
- data/lib/lingo/reportable.rb +59 -0
- data/lib/lingo/version.rb +1 -1
- data/lingo-all.cfg +14 -10
- data/lingo-call.cfg +5 -5
- data/lingo.cfg +14 -12
- data/lingo.rb +26 -0
- data/lir.cfg +13 -9
- data/spec/spec_helper.rb +1 -0
- data/test.cfg +11 -11
- data/test/attendee/ts_abbreviator.rb +0 -6
- data/test/attendee/ts_decomposer.rb +0 -6
- data/test/attendee/{ts_multiworder.rb → ts_multi_worder.rb} +1 -7
- data/test/attendee/ts_noneword_filter.rb +1 -7
- data/test/attendee/{ts_objectfilter.rb → ts_object_filter.rb} +1 -7
- data/test/attendee/ts_sequencer.rb +0 -6
- data/test/attendee/ts_synonymer.rb +0 -6
- data/test/attendee/{ts_textreader.rb → ts_text_reader.rb} +1 -7
- data/test/attendee/{ts_textwriter.rb → ts_text_writer.rb} +1 -7
- data/test/attendee/ts_tokenizer.rb +0 -6
- data/test/attendee/ts_variator.rb +0 -6
- data/test/attendee/ts_vector_filter.rb +1 -7
- data/test/attendee/{ts_wordsearcher.rb → ts_word_searcher.rb} +1 -7
- data/test/ref/artikel.non +2 -29
- data/test/ref/artikel.seq +13 -8
- data/test/ref/artikel.vec +30 -15
- data/test/ref/artikel.ven +29 -14
- data/test/ref/artikel.ver +58 -43
- data/test/ref/lir.csv +146 -145
- data/test/ref/lir.non +186 -210
- data/test/ref/lir.seq +54 -50
- data/test/test_helper.rb +41 -36
- data/test/ts_database.rb +12 -11
- data/test/ts_language.rb +118 -68
- metadata +67 -29
- data/lib/lingo/attendee/multiworder.rb +0 -301
- data/lib/lingo/attendee/objectfilter.rb +0 -86
- data/lib/lingo/attendee/textreader.rb +0 -237
- data/lib/lingo/attendee/textwriter.rb +0 -196
- data/lib/lingo/attendee/wordsearcher.rb +0 -96
- data/lib/lingo/attendees.rb +0 -289
- data/lib/lingo/const.rb +0 -131
- data/lib/lingo/modules.rb +0 -98
- data/lib/lingo/types.rb +0 -285
- data/lib/lingo/utilities.rb +0 -40
@@ -0,0 +1,88 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
module Language
|
30
|
+
|
31
|
+
# Die Klasse LexicalHash ermöglicht den Zugriff auf die Lingodatenbanken. Im Gegensatz zur
|
32
|
+
# Klasse Database, welche nur Strings als Ergebnis zurück gibt, wird hier als Ergebnis ein
|
33
|
+
# Array von Lexical-Objekten zurück gegeben.
|
34
|
+
|
35
|
+
class LexicalHash
|
36
|
+
|
37
|
+
include Cachable
|
38
|
+
include Reportable
|
39
|
+
|
40
|
+
def initialize(id, lingo)
|
41
|
+
init_cachable
|
42
|
+
init_reportable(id)
|
43
|
+
|
44
|
+
@wc = lingo.database_config(id).fetch('def-wc', LA_UNKNOWN)
|
45
|
+
@src = Database.open(id, lingo)
|
46
|
+
end
|
47
|
+
|
48
|
+
def close
|
49
|
+
@src.close
|
50
|
+
end
|
51
|
+
|
52
|
+
def [](key)
|
53
|
+
inc('total requests')
|
54
|
+
key = key.downcase
|
55
|
+
|
56
|
+
if hit?(key)
|
57
|
+
inc('cache hits')
|
58
|
+
return retrieve(key)
|
59
|
+
end
|
60
|
+
|
61
|
+
inc('source reads')
|
62
|
+
|
63
|
+
if record = @src[key]
|
64
|
+
record = record.map { |str|
|
65
|
+
case str
|
66
|
+
when /^\*\d+$/ then str
|
67
|
+
when /^#(.)$/ then Lexical.new(key, $1)
|
68
|
+
when /^([^#]+?)\s*#(.)$/ then Lexical.new($1, $2)
|
69
|
+
when /^([^#]+)$/ then Lexical.new($1, @wc)
|
70
|
+
else str
|
71
|
+
end
|
72
|
+
}
|
73
|
+
|
74
|
+
record.compact!
|
75
|
+
record.sort!
|
76
|
+
record.uniq!
|
77
|
+
|
78
|
+
inc('data found')
|
79
|
+
end
|
80
|
+
|
81
|
+
store(key, record)
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
module Language
|
30
|
+
|
31
|
+
# Die Klasse Token, abgeleitet von der Klasse WordForm, stellt den Container
|
32
|
+
# für ein einzelnes Wort eines Textes dar. Das Wort wird mit einem Attribut versehen,
|
33
|
+
# welches der Regel entspricht, die dieses Wort identifiziert hat.
|
34
|
+
#
|
35
|
+
# Steht z.B. in ruby.cfg eine Regel zur Erkennung einer Zahl, die mit NUM bezeichnet wird,
|
36
|
+
# so wird dies dem Token angeheftet, z.B. Token.new('100', 'NUM') -> #100/NUM#
|
37
|
+
|
38
|
+
class Token < WordForm
|
39
|
+
|
40
|
+
def to_s
|
41
|
+
":#{super}:"
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
module Language
|
30
|
+
|
31
|
+
# Die Klasse Word bündelt spezifische Eigenschaften eines Wortes mit den
|
32
|
+
# dazu notwendigen Methoden.
|
33
|
+
|
34
|
+
class Word < WordForm
|
35
|
+
|
36
|
+
def self.new_lexical(form, attr, lex_attr)
|
37
|
+
new(form, attr) << Lexical.new(form, lex_attr)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Exakte Representation der originären Zeichenkette, so wie sie im Satz
|
41
|
+
# gefunden wurde, z.B. <tt>form = "RubyLing"</tt>
|
42
|
+
#
|
43
|
+
# Ergebnis der Wörterbuch-Suche. Sie stellt die Grundform des Wortes dar.
|
44
|
+
# Dabei kann es mehrere mögliche Grundformen geben, z.B. kann +abgeschoben+
|
45
|
+
# als Grundform das _Adjektiv_ +abgeschoben+ sein, oder aber das _Verb_
|
46
|
+
# +abschieben+.
|
47
|
+
#
|
48
|
+
# <tt>lemma = [['abgeschoben', '#a'], ['abschieben', '#v']]</tt>.
|
49
|
+
#
|
50
|
+
# <b>Achtung: Lemma wird nicht durch die Word-Klasse bestückt, sondern extern
|
51
|
+
# durch die Klasse Dictionary</b>
|
52
|
+
|
53
|
+
def initialize(form, attr = WA_UNSET)
|
54
|
+
super
|
55
|
+
@lexicals = []
|
56
|
+
end
|
57
|
+
|
58
|
+
def lexicals(compound_parts = true)
|
59
|
+
if !compound_parts && attr == WA_KOMPOSITUM
|
60
|
+
@lexicals.select { |lex| lex.attr == LA_KOMPOSITUM }
|
61
|
+
else
|
62
|
+
@lexicals
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def lexicals=(lexis)
|
67
|
+
if lexis.is_a?(Array)
|
68
|
+
@lexicals = lexis.sort.uniq
|
69
|
+
else
|
70
|
+
raise TypeError, "wrong argument type #{lexis.class} (expected Array)"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def attrs(compound_parts = true)
|
75
|
+
lexicals(compound_parts).map { |lex| lex.attr }
|
76
|
+
end
|
77
|
+
|
78
|
+
def parts
|
79
|
+
1
|
80
|
+
end
|
81
|
+
|
82
|
+
def min_part_size
|
83
|
+
form.length
|
84
|
+
end
|
85
|
+
|
86
|
+
# Gibt genau die Grundform der Wortklasse zurück, die der RegExp des Übergabe-Parameters
|
87
|
+
# entspricht, z.B. <tt>word.get_wc(/a/) = ['abgeschoben', '#a']</tt>
|
88
|
+
def get_class(wc_re)
|
89
|
+
wc_re = Regexp.new(wc_re) unless wc_re.is_a?(Regexp)
|
90
|
+
|
91
|
+
unless lexicals.empty?
|
92
|
+
lexicals.select { |lex| lex.attr =~ wc_re }
|
93
|
+
else
|
94
|
+
attr =~ wc_re ? [self] : []
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def norm
|
99
|
+
identified? ? lexicals.first.form : form
|
100
|
+
end
|
101
|
+
|
102
|
+
def compo_form
|
103
|
+
if attr == WA_KOMPOSITUM
|
104
|
+
get_class(LA_KOMPOSITUM).first
|
105
|
+
else
|
106
|
+
nil
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def <<(*other)
|
111
|
+
lexicals.concat(other.flatten)
|
112
|
+
self
|
113
|
+
end
|
114
|
+
|
115
|
+
def <=>(other)
|
116
|
+
other.nil? ? 1 : to_a.push(lexicals) <=> other.to_a.push(other.lexicals)
|
117
|
+
end
|
118
|
+
|
119
|
+
def to_s
|
120
|
+
s = "<#{form}"
|
121
|
+
s << "|#{attr}" unless identified?
|
122
|
+
s << " = #{lexicals.inspect}" unless lexicals.empty?
|
123
|
+
s << '>'
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
module Language
|
30
|
+
|
31
|
+
# Die Klasse WordForm ist die Basisklasse für weitere Klassen, die im Rahmen der
|
32
|
+
# Objektstruktur eines Wortes benötigt werden. Die Klasse stellt eine Zeichenkette bereit,
|
33
|
+
# die mit einem Attribut versehen werden kann.
|
34
|
+
|
35
|
+
class WordForm
|
36
|
+
|
37
|
+
include Comparable
|
38
|
+
|
39
|
+
attr_accessor :form, :attr
|
40
|
+
|
41
|
+
def initialize(form, attr = '-')
|
42
|
+
@form, @attr = form || '', attr || ''
|
43
|
+
end
|
44
|
+
|
45
|
+
def unknown?
|
46
|
+
[WA_UNKNOWN, WA_UNKMULPART].include?(attr)
|
47
|
+
end
|
48
|
+
|
49
|
+
def identified?
|
50
|
+
attr == WA_IDENTIFIED
|
51
|
+
end
|
52
|
+
|
53
|
+
def <=>(other)
|
54
|
+
other.nil? ? 1 : to_a <=> other.to_a
|
55
|
+
end
|
56
|
+
|
57
|
+
def to_a
|
58
|
+
[form, attr]
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_s
|
62
|
+
to_a.join('/')
|
63
|
+
end
|
64
|
+
|
65
|
+
def inspect
|
66
|
+
to_s
|
67
|
+
end
|
68
|
+
|
69
|
+
def hash
|
70
|
+
to_s.hash
|
71
|
+
end
|
72
|
+
|
73
|
+
def eql?(other)
|
74
|
+
self.class.equal?(other.class) && to_s == other.to_s
|
75
|
+
end
|
76
|
+
|
77
|
+
alias_method :==, :eql?
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
# Provides counters.
|
30
|
+
|
31
|
+
module Reportable
|
32
|
+
|
33
|
+
def init_reportable(prefix = nil)
|
34
|
+
@counters, @prefix = Hash.new(0), prefix ? "#{prefix}: " : ''
|
35
|
+
end
|
36
|
+
|
37
|
+
def inc(counter)
|
38
|
+
@counters[counter] += 1
|
39
|
+
end
|
40
|
+
|
41
|
+
def add(counter, value)
|
42
|
+
@counters[counter] += value
|
43
|
+
end
|
44
|
+
|
45
|
+
def set(counter, value)
|
46
|
+
@counters[counter] = value
|
47
|
+
end
|
48
|
+
|
49
|
+
def get(counter)
|
50
|
+
@counters[counter]
|
51
|
+
end
|
52
|
+
|
53
|
+
def report
|
54
|
+
@counters.each_with_object({}) { |(k, v), r| r["#{@prefix}#{k}"] = v }
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
data/lib/lingo/version.rb
CHANGED
data/lingo-all.cfg
CHANGED
@@ -11,7 +11,7 @@ meeting:
|
|
11
11
|
#
|
12
12
|
|
13
13
|
# Angegebene Datei zeilenweise einlesen und verarbeitet
|
14
|
-
-
|
14
|
+
- text_reader: { files: '$(files)' }
|
15
15
|
|
16
16
|
|
17
17
|
########################################
|
@@ -25,7 +25,7 @@ meeting:
|
|
25
25
|
# - abbreviator: { source: 'sys-abk' }
|
26
26
|
|
27
27
|
# Verbleibende Token im Wörterbuch suchen
|
28
|
-
-
|
28
|
+
- word_searcher: { source: 'sys-dic', mode: 'first' }
|
29
29
|
|
30
30
|
# Schreibweisen variieren und erneut suchen
|
31
31
|
# - variator: { source: 'sys-dic' }
|
@@ -37,7 +37,7 @@ meeting:
|
|
37
37
|
# - decomposer: { source: 'sys-dic' }
|
38
38
|
|
39
39
|
# Mehrwortgruppen im Strom erkennen
|
40
|
-
# -
|
40
|
+
# - multi_worder: { stopper: 'PUNC,OTHR', source: 'sys-mul' }
|
41
41
|
|
42
42
|
# Wortsequenzen anhand von Regeln identifizieren
|
43
43
|
# - sequencer: { stopper: 'PUNC,OTHR' }
|
@@ -56,30 +56,34 @@ meeting:
|
|
56
56
|
# Ergebnisse ausgeben
|
57
57
|
#
|
58
58
|
|
59
|
+
# Erstelle Datei mit Endung .log für Datenstrom
|
60
|
+
# - vector_filter: { in: syn, debug: 'true', prompt: 'lex:) ' }
|
61
|
+
# - text_writer: { ext: log, sep: "\n" }
|
62
|
+
|
59
63
|
# Erstelle Datei mit Endung .non für nicht erkannte Wörter
|
60
64
|
# - noneword_filter: { in: syn }
|
61
|
-
# -
|
65
|
+
# - text_writer: { ext: non, sep: "\n" }
|
62
66
|
|
63
67
|
# Erstelle Datei mit Endung .vec für erkannte Indexterme
|
64
68
|
# - vector_filter: { in: syn, lexicals: '^[ksavem]$' }
|
65
|
-
# -
|
69
|
+
# - text_writer: { ext: vec, sep: "\n" }
|
66
70
|
|
67
71
|
# Erstelle Datei mit Endung .ven für erkannte Indexterme mit absoluter Häufigkeit
|
68
72
|
# - vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: 'term_abs' }
|
69
|
-
# -
|
73
|
+
# - text_writer: { ext: ven, sep: "\n" }
|
70
74
|
|
71
75
|
# Erstelle Datei mit Endung .ver für erkannte Indexterme mit relativer Häufigkeit
|
72
76
|
# - vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: 'term_rel' }
|
73
|
-
# -
|
77
|
+
# - text_writer: { ext: ver, sep: "\n" }
|
74
78
|
|
75
79
|
# Erstelle Datei mit Endung .mul für erkannte Mehrwortgruppen
|
76
80
|
# - vector_filter: { in: syn, lexicals: m }
|
77
|
-
# -
|
81
|
+
# - text_writer: { ext: mul, sep: "\n" }
|
78
82
|
|
79
83
|
# Erstelle Datei mit Endung .seq für erkannte Wortsequenzen
|
80
84
|
# - vector_filter: { in: syn, lexicals: q, sort: 'term_abs' }
|
81
|
-
# -
|
85
|
+
# - text_writer: { ext: seq, sep: "\n" }
|
82
86
|
|
83
87
|
# Erstelle Datei mit Endung .syn für erkannte Synonyme
|
84
88
|
# - vector_filter: { in: syn, lexicals: y, sort: 'term_abs' }
|
85
|
-
# -
|
89
|
+
# - text_writer: { ext: syn, sep: "\n" }
|