lingo 1.8.5 → 1.8.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +25 -0
- data/README +7 -5
- data/Rakefile +58 -55
- data/{lingo-call.cfg → config/lingo-call.cfg} +1 -1
- data/{lingo.cfg → config/lingo.cfg} +10 -2
- data/{lir.cfg → config/lir.cfg} +10 -2
- data/{de → dict/de}/lingo-abk.txt +0 -0
- data/{de → dict/de}/lingo-dic.txt +0 -0
- data/{de → dict/de}/lingo-mul.txt +0 -0
- data/{de → dict/de}/lingo-syn.txt +0 -0
- data/{de → dict/de}/test_dic.txt +0 -0
- data/{de → dict/de}/test_gen.txt +0 -0
- data/{de → dict/de}/test_mu2.txt +0 -0
- data/{de → dict/de}/test_mul.txt +0 -0
- data/{de → dict/de}/test_sgw.txt +0 -0
- data/{de → dict/de}/test_syn.txt +0 -0
- data/{de → dict/de}/user-dic.txt +0 -0
- data/{en → dict/en}/lingo-dic.txt +0 -0
- data/{en → dict/en}/lingo-irr.txt +0 -0
- data/{en → dict/en}/lingo-mul.txt +0 -0
- data/{en → dict/en}/lingo-syn.txt +0 -0
- data/{en → dict/en}/lingo-wdn.txt +0 -0
- data/{en → dict/en}/user-dic.txt +0 -0
- data/{ru → dict/ru}/lingo-dic.txt +0 -0
- data/{ru → dict/ru}/lingo-mul.txt +0 -0
- data/{ru → dict/ru}/lingo-syn.txt +0 -0
- data/{ru → dict/ru}/user-dic.txt +0 -0
- data/{de.lang → lang/de.lang} +1 -1
- data/{en.lang → lang/en.lang} +0 -0
- data/{ru.lang → lang/ru.lang} +0 -0
- data/lib/lingo.rb +14 -15
- data/lib/lingo/app.rb +4 -2
- data/lib/lingo/attendee.rb +23 -43
- data/lib/lingo/attendee/abbreviator.rb +5 -5
- data/lib/lingo/attendee/debugger.rb +39 -12
- data/lib/lingo/attendee/decomposer.rb +3 -4
- data/lib/lingo/attendee/dehyphenizer.rb +4 -4
- data/lib/lingo/attendee/formatter.rb +1 -3
- data/lib/lingo/attendee/multi_worder.rb +3 -4
- data/lib/lingo/attendee/noneword_filter.rb +8 -12
- data/lib/lingo/attendee/object_filter.rb +6 -3
- data/lib/lingo/attendee/sequencer.rb +5 -5
- data/lib/lingo/attendee/stemmer.rb +3 -2
- data/lib/lingo/attendee/synonymer.rb +3 -4
- data/lib/lingo/attendee/text_reader.rb +39 -38
- data/lib/lingo/attendee/text_writer.rb +10 -10
- data/lib/lingo/attendee/tokenizer.rb +63 -33
- data/lib/lingo/attendee/variator.rb +3 -7
- data/lib/lingo/attendee/vector_filter.rb +132 -65
- data/lib/lingo/attendee/word_searcher.rb +5 -3
- data/lib/lingo/buffered_attendee.rb +1 -3
- data/lib/lingo/call.rb +4 -3
- data/lib/lingo/cli.rb +5 -1
- data/lib/lingo/config.rb +11 -5
- data/lib/lingo/ctl.rb +3 -3
- data/lib/lingo/database.rb +3 -1
- data/lib/lingo/database/crypter.rb +1 -3
- data/lib/lingo/database/source.rb +3 -1
- data/lib/lingo/database/source/key_value.rb +3 -1
- data/lib/lingo/database/source/multi_key.rb +3 -1
- data/lib/lingo/database/source/multi_value.rb +3 -1
- data/lib/lingo/database/source/single_word.rb +3 -1
- data/lib/lingo/database/source/word_class.rb +3 -1
- data/lib/lingo/debug.rb +5 -5
- data/lib/lingo/{agenda_item.rb → deferred_attendee.rb} +21 -12
- data/lib/lingo/error.rb +1 -1
- data/lib/lingo/language.rb +1 -9
- data/lib/lingo/language/dictionary.rb +2 -17
- data/lib/lingo/language/grammar.rb +10 -10
- data/lib/lingo/language/lexical.rb +2 -0
- data/lib/lingo/language/lexical_hash.rb +2 -0
- data/lib/lingo/language/token.rb +17 -3
- data/lib/lingo/language/word.rb +13 -5
- data/lib/lingo/language/word_form.rb +5 -3
- data/lib/lingo/progress.rb +2 -2
- data/lib/lingo/srv.rb +1 -1
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web.rb +1 -1
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/test/attendee/ts_abbreviator.rb +4 -2
- data/test/attendee/ts_multi_worder.rb +81 -88
- data/test/attendee/ts_noneword_filter.rb +2 -2
- data/test/attendee/ts_object_filter.rb +2 -2
- data/test/attendee/ts_sequencer.rb +40 -20
- data/test/attendee/ts_stemmer.rb +52 -26
- data/test/attendee/ts_text_reader.rb +75 -56
- data/test/attendee/ts_text_writer.rb +6 -4
- data/test/attendee/ts_tokenizer.rb +304 -193
- data/test/attendee/ts_vector_filter.rb +242 -9
- data/test/ref/artikel.non +3 -0
- data/test/ref/artikel.vec +1 -4
- data/test/ref/artikel.vef +940 -0
- data/test/ref/artikel.ven +0 -3
- data/test/ref/artikel.ver +0 -3
- data/test/ref/artikel.vet +2580 -0
- data/test/ref/lir.non +34 -31
- data/test/ref/lir.seq +14 -15
- data/test/ref/lir.vec +37 -37
- data/test/ref/lir.vef +329 -0
- data/test/ref/lir.ven +329 -0
- data/test/ref/lir.ver +329 -0
- data/test/ref/lir.vet +329 -0
- data/test/test_helper.rb +29 -16
- data/test/ts_language.rb +6 -47
- metadata +74 -87
- data/lingo.rb +0 -29
- data/spec/spec_helper.rb +0 -5
data/lib/lingo/app.rb
CHANGED
@@ -55,7 +55,7 @@ class Lingo
|
|
55
55
|
ARGV.unshift(*lingo_options) if lingo_options.is_a?(Array)
|
56
56
|
end
|
57
57
|
|
58
|
-
OptionParser.new(banner,
|
58
|
+
OptionParser.new(banner, 12) { |o|
|
59
59
|
o.on('-p port', 'set the port (default is 4567)') { |v| set :port, Integer(v) }
|
60
60
|
o.on('-o addr', 'set the host (default is 0.0.0.0)') { |v| set :bind, v }
|
61
61
|
o.on('-e env', 'set the environment (default is development)') { |v| set :environment, v.to_sym }
|
@@ -63,7 +63,9 @@ class Lingo
|
|
63
63
|
o.on('-x', 'turn on the mutex lock (default is off)') { set :lock, true }
|
64
64
|
}.parse!(argv)
|
65
65
|
|
66
|
-
|
66
|
+
argv.pop if File.basename($0) == 'rackup' # rackup config
|
67
|
+
|
68
|
+
abort "Unrecognized arguments: #{argv}\n#{banner}" unless argv.empty?
|
67
69
|
|
68
70
|
ARGV.unshift(*yield) if block_given?
|
69
71
|
rescue OptionParser::ParseError => err
|
data/lib/lingo/attendee.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -28,6 +28,7 @@ require 'nuggets/string/evaluate'
|
|
28
28
|
|
29
29
|
class Lingo
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Lingo ist als universelles Indexierungssystem entworfen worden. Seine Stärke liegt in der einfachen Konfigurierbarkeit für
|
32
33
|
# spezifische Aufgaben und in der schnelle Entwicklung weiterer Funktionen durch systematischen Kapselung der Komplexität auf
|
33
34
|
# kleine Verarbeitungseinheiten. Die kleinste Verarbeitungseinheit wird Attendee genannt. Um ein gewünschtes Verarbeitungsergebnis
|
@@ -63,54 +64,43 @@ class Lingo
|
|
63
64
|
# - verarbeitet und/oder transformiert datenobjekte
|
64
65
|
# - wird gesteuert durch kommandos
|
65
66
|
# - schreibt verarbeitungsstatistiken
|
67
|
+
#++
|
66
68
|
|
67
69
|
class Attendee
|
68
70
|
|
69
71
|
include Language
|
70
72
|
|
71
|
-
STR_CMD_TALK = 'TALK'
|
72
|
-
STR_CMD_LIR = 'LIR-FORMAT'
|
73
|
-
STR_CMD_FILE = 'FILE'
|
74
|
-
STR_CMD_EOL = 'EOL'
|
75
|
-
STR_CMD_RECORD = 'RECORD'
|
76
|
-
STR_CMD_EOF = 'EOF'
|
77
|
-
|
78
73
|
DEFAULT_SKIP = [TA_PUNCTUATION, TA_OTHER].join(',')
|
79
74
|
|
80
75
|
def initialize(config, lingo)
|
81
|
-
@lingo, @config, @
|
76
|
+
@lingo, @config, @subscribers = lingo, config, []
|
82
77
|
|
83
78
|
# Make sure config exists
|
84
79
|
lingo.dictionary_config
|
85
80
|
|
86
|
-
@dic
|
87
|
-
|
88
|
-
init if self.class.method_defined?(:init)
|
81
|
+
@dic, @gra, @valid_keys = nil, nil, %w[name in out]
|
89
82
|
|
90
|
-
|
91
|
-
@can_process = self.class.method_defined?(:process)
|
83
|
+
init
|
92
84
|
|
93
|
-
|
85
|
+
unless (invalid_keys = config.keys - @valid_keys).empty?
|
86
|
+
warn(
|
87
|
+
"CONFIGURATION NOTICE: #{self.class.name.sub(/\ALingo::/, '')}" <<
|
88
|
+
" options invalid or obsolete: #{invalid_keys.sort.join(', ')}" <<
|
89
|
+
" (in #{lingo.config.config_file})"
|
90
|
+
)
|
91
|
+
end
|
94
92
|
end
|
95
93
|
|
96
|
-
attr_reader :lingo
|
94
|
+
attr_reader :lingo, :subscribers
|
97
95
|
|
98
|
-
def
|
99
|
-
|
100
|
-
end
|
101
|
-
|
102
|
-
def listen(obj)
|
103
|
-
if obj.is_a?(AgendaItem)
|
104
|
-
args = obj.to_a
|
105
|
-
control(*args) if @can_control
|
106
|
-
forward(*args) unless obj.cmd == STR_CMD_TALK || skip_command!
|
107
|
-
else
|
108
|
-
@can_process ? process(obj) : forward(obj)
|
109
|
-
end
|
96
|
+
def forward(*args)
|
97
|
+
subscribers.each { |sub| sub.process(*args) }
|
110
98
|
end
|
111
99
|
|
112
|
-
def
|
113
|
-
|
100
|
+
def command(*args)
|
101
|
+
subscribers.each { |sub|
|
102
|
+
sub.command(*args) unless sub.control(*args) == :skip_command
|
103
|
+
}
|
114
104
|
end
|
115
105
|
|
116
106
|
private
|
@@ -120,27 +110,16 @@ class Lingo
|
|
120
110
|
g && (block_given? ? !yield(w) : w.unknown?) ? g.find_compound(f) : w
|
121
111
|
end
|
122
112
|
|
123
|
-
def skip_command
|
124
|
-
@skip_command = true
|
125
|
-
end
|
126
|
-
|
127
|
-
def skip_command!
|
128
|
-
@skip_command.tap { @skip_command &&= false }
|
129
|
-
end
|
130
|
-
|
131
|
-
def forward(obj, param = nil)
|
132
|
-
talk(param ? AgendaItem.new(obj, param) : obj)
|
133
|
-
end
|
134
|
-
|
135
113
|
def flush(buffer)
|
136
114
|
buffer.each { |i| forward(i) }.clear
|
137
115
|
end
|
138
116
|
|
139
117
|
def has_key?(key)
|
140
|
-
@config
|
118
|
+
@config.key?(key)
|
141
119
|
end
|
142
120
|
|
143
121
|
def get_key(key, default = nodefault = true)
|
122
|
+
@valid_keys << key
|
144
123
|
raise MissingConfigError.new(key) if nodefault && !has_key?(key)
|
145
124
|
@config.fetch(key, default)
|
146
125
|
end
|
@@ -188,6 +167,7 @@ class Lingo
|
|
188
167
|
end
|
189
168
|
|
190
169
|
require_relative 'buffered_attendee'
|
170
|
+
require_relative 'deferred_attendee'
|
191
171
|
|
192
172
|
require_relative 'attendee/abbreviator'
|
193
173
|
require_relative 'attendee/debugger'
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Die Erkennung von Abkürzungen kann auf vielfältige Weise erfolgen. In jedem Fall
|
32
33
|
# sollte eine sichere Unterscheidung von einem Satzende-Punkt möglich sein.
|
33
34
|
# Der in Lingo gewählte Ansatz befreit den Tokenizer von dieser Arbeit und konzentriert
|
@@ -67,18 +68,17 @@ class Lingo
|
|
67
68
|
# out> :./PUNC:
|
68
69
|
# out> *EOL('test.txt')
|
69
70
|
# out> *EOF('test.txt')
|
71
|
+
#++
|
70
72
|
|
71
73
|
class Abbreviator < self
|
72
74
|
|
73
|
-
protected
|
74
|
-
|
75
75
|
def init
|
76
76
|
set_dic
|
77
77
|
@abbr = nil
|
78
78
|
end
|
79
79
|
|
80
|
-
def control(cmd,
|
81
|
-
send_abbr(@abbr) if [
|
80
|
+
def control(cmd, *)
|
81
|
+
send_abbr(@abbr) if [:RECORD, :EOF].include?(cmd)
|
82
82
|
end
|
83
83
|
|
84
84
|
def process(obj)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -24,10 +24,13 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'yaml'
|
28
|
+
|
27
29
|
class Lingo
|
28
30
|
|
29
31
|
class Attendee
|
30
32
|
|
33
|
+
#--
|
31
34
|
# Die Attendees von Lingo übergeben Daten über ihre Kommunikationskanäle und entweder kommt bei
|
32
35
|
# einer komplexen Konfiguration hinten das gewünschte Ergebnis raus oder aber auch nicht. Für den
|
33
36
|
# letzeren Fall ist der Debugger primär gedacht. Er kann an beliebige Stelle in den Datenstrom
|
@@ -84,30 +87,54 @@ class Lingo
|
|
84
87
|
# TOKEN:) *EOL('test.txt')
|
85
88
|
# LINES:) *EOF('test.txt')
|
86
89
|
# TOKEN:) *EOF('test.txt')
|
90
|
+
#++
|
87
91
|
|
88
92
|
class Debugger < self
|
89
93
|
|
90
|
-
|
94
|
+
def init(default_prompt = 'lex:) ')
|
95
|
+
@prompt = get_key('prompt', default_prompt)
|
91
96
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
97
|
+
unless @filter ||= get_key('filter', false)
|
98
|
+
@cmd_eval = get_key('ceval', 'true')
|
99
|
+
else
|
100
|
+
@cmd_eval = nil
|
101
|
+
end
|
102
|
+
|
103
|
+
@obj_eval = get_key('eval', 'true')
|
104
|
+
@preamble = get_key('preamble', true)
|
96
105
|
end
|
97
106
|
|
98
|
-
def control(cmd, param)
|
99
|
-
|
107
|
+
def control(cmd, param = nil, *)
|
108
|
+
if @cmd_eval
|
109
|
+
debug(eval(@cmd_eval)) { "*#{cmd}('#{param}')" }
|
110
|
+
elsif cmd == :EOL
|
111
|
+
:skip_command
|
112
|
+
end
|
100
113
|
end
|
101
114
|
|
102
115
|
def process(obj)
|
103
|
-
debug(
|
104
|
-
forward(obj)
|
116
|
+
debug(eval(@obj_eval)) { obj.inspect }
|
117
|
+
forward(obj) unless @filter
|
105
118
|
end
|
106
119
|
|
107
120
|
private
|
108
121
|
|
109
|
-
def debug(
|
110
|
-
|
122
|
+
def debug(condition)
|
123
|
+
send_msg((@preamble = nil; @lingo.config.to_h.to_yaml)) if @preamble
|
124
|
+
send_msg(@prompt + yield) if condition
|
125
|
+
end
|
126
|
+
|
127
|
+
def send_msg(msg)
|
128
|
+
@filter ? forward(msg) : warn(msg)
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
class DebugFilter < Debugger
|
134
|
+
|
135
|
+
def init
|
136
|
+
@filter = true
|
137
|
+
super('')
|
111
138
|
end
|
112
139
|
|
113
140
|
end
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Komposita, also zusammengesetzte Wörter, sind eine Spezialität der deutschen Sprache
|
32
33
|
# (z.B. Indexierungssystem oder Kompositumerkennung).
|
33
34
|
# Könnte man alle Kombinationen in den Wörterbüchern hinterlegen, dann würde der
|
@@ -70,17 +71,15 @@ class Lingo
|
|
70
71
|
# out> :./PUNC:
|
71
72
|
# out> *EOL('test.txt')
|
72
73
|
# out> *EOF('test.txt')
|
74
|
+
#++
|
73
75
|
|
74
76
|
class Decomposer < self
|
75
77
|
|
76
|
-
protected
|
77
|
-
|
78
78
|
def init
|
79
79
|
set_gra
|
80
80
|
end
|
81
81
|
|
82
|
-
def control(
|
83
|
-
# can control
|
82
|
+
def control(*)
|
84
83
|
end
|
85
84
|
|
86
85
|
def process(obj)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Der Dehyphenizer ... muss noch dokumentiert werden
|
32
33
|
#
|
33
34
|
# === Mögliche Verlinkung
|
@@ -65,11 +66,10 @@ class Lingo
|
|
65
66
|
# out> :./PUNC:
|
66
67
|
# out> *EOL('test.txt')
|
67
68
|
# out> *EOF('test.txt')
|
69
|
+
#++
|
68
70
|
|
69
71
|
class Dehyphenizer < BufferedAttendee
|
70
72
|
|
71
|
-
protected
|
72
|
-
|
73
73
|
def init
|
74
74
|
set_dic
|
75
75
|
set_gra
|
@@ -79,7 +79,7 @@ class Lingo
|
|
79
79
|
@expected_tokens_in_buffer, @eof_handling = 2, false
|
80
80
|
end
|
81
81
|
|
82
|
-
def control(cmd,
|
82
|
+
def control(cmd, *)
|
83
83
|
control_multi(cmd)
|
84
84
|
end
|
85
85
|
|
@@ -30,14 +30,12 @@ class Lingo
|
|
30
30
|
|
31
31
|
class Formatter < TextWriter
|
32
32
|
|
33
|
-
protected
|
34
|
-
|
35
33
|
def init
|
36
34
|
super
|
37
35
|
|
38
36
|
@ext = get_key('ext', '-')
|
39
37
|
@format = get_key('format', '%s')
|
40
|
-
@map = get_key('map', Hash.
|
38
|
+
@map = get_key('map', Hash.nest { |k| k })
|
41
39
|
|
42
40
|
@no_puts = true
|
43
41
|
end
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Mit der bisher beschriebenen Vorgehensweise werden die durch den Tokenizer erkannten
|
32
33
|
# Token aufgelöst und in Words verwandelt und über den Abbreviator und Decomposer auch
|
33
34
|
# Spezialfälle behandelt, die einzelne Wörter betreffen.
|
@@ -72,11 +73,10 @@ class Lingo
|
|
72
73
|
# out> :./PUNC:
|
73
74
|
# out> *EOL('test.txt')
|
74
75
|
# out> *EOF('test.txt')
|
76
|
+
#++
|
75
77
|
|
76
78
|
class MultiWorder < BufferedAttendee
|
77
79
|
|
78
|
-
protected
|
79
|
-
|
80
80
|
def init
|
81
81
|
# combine lexical variants?
|
82
82
|
#
|
@@ -112,7 +112,7 @@ class Lingo
|
|
112
112
|
@expected_tokens_in_buffer, @eof_handling = 3, false
|
113
113
|
end
|
114
114
|
|
115
|
-
def control(cmd,
|
115
|
+
def control(cmd, *)
|
116
116
|
control_multi(cmd)
|
117
117
|
end
|
118
118
|
|
@@ -172,7 +172,6 @@ class Lingo
|
|
172
172
|
WA_MULTIWORD, lex.select { |l| l.is_a?(Lexical) }))
|
173
173
|
end
|
174
174
|
|
175
|
-
# Prüft einen definiert langen Schlüssel ab Position 0 im Buffer
|
176
175
|
def check_multiword_key(len)
|
177
176
|
return [] if valid_tokens_in_buffer < len
|
178
177
|
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Der NonewordFilter ermöglicht es, alle nicht erkannten Wörter aus dem Datenstrom zu
|
32
33
|
# selektieren und weiterzuleiten. Im Prinzip werden alle erkannten Wörter gefiltert.
|
33
34
|
# Bei einem Indexierungslauf können so alle nicht durch den Wordsearcher erkannten Wörter,
|
@@ -65,11 +66,10 @@ class Lingo
|
|
65
66
|
# out> *FILE('test.txt')
|
66
67
|
# out> "lingo"
|
67
68
|
# out> *EOF('test.txt')
|
69
|
+
#++
|
68
70
|
|
69
71
|
class NonewordFilter < self
|
70
72
|
|
71
|
-
protected
|
72
|
-
|
73
73
|
def init
|
74
74
|
@sort = get_key('sort', !ENV['LINGO_NO_SORT'])
|
75
75
|
@dict = get_key('dict', false)
|
@@ -78,16 +78,12 @@ class Lingo
|
|
78
78
|
@nonewords = []
|
79
79
|
end
|
80
80
|
|
81
|
-
def control(cmd,
|
81
|
+
def control(cmd, *)
|
82
82
|
case cmd
|
83
|
-
when
|
84
|
-
|
85
|
-
when
|
86
|
-
|
87
|
-
when STR_CMD_RECORD
|
88
|
-
send_nonewords unless @dict
|
89
|
-
when STR_CMD_EOF
|
90
|
-
send_nonewords
|
83
|
+
when :FILE then @nonewords.clear
|
84
|
+
when :EOL then :skip_command
|
85
|
+
when :RECORD then send_nonewords unless @dict
|
86
|
+
when :EOF then send_nonewords
|
91
87
|
end
|
92
88
|
end
|
93
89
|
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Der ObjectFilter ermöglicht es, beliebige Objekte aus dem Datenstrom herauszufiltern.
|
32
33
|
# Um die gewünschten Objekte zu identifizieren, sind ein paar Ruby-Kenntnisse und das Wissen
|
33
34
|
# um die Lingo Klassen notwendig. Hier sollen kurz die häufigsten Fälle angesprochen werden:
|
@@ -67,15 +68,17 @@ class Lingo
|
|
67
68
|
# out> <Indexierung = [(indexierung/s)]>
|
68
69
|
# out> *EOL('test.txt')
|
69
70
|
# out> *EOF('test.txt')
|
71
|
+
#++
|
70
72
|
|
71
73
|
class ObjectFilter < self
|
72
74
|
|
73
|
-
protected
|
74
|
-
|
75
75
|
def init
|
76
76
|
@obj_eval = get_key('objects', 'true')
|
77
77
|
end
|
78
78
|
|
79
|
+
def control(*)
|
80
|
+
end
|
81
|
+
|
79
82
|
def process(obj)
|
80
83
|
forward(obj) if eval(@obj_eval)
|
81
84
|
end
|