lingo 1.8.5 → 1.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +25 -0
- data/README +7 -5
- data/Rakefile +58 -55
- data/{lingo-call.cfg → config/lingo-call.cfg} +1 -1
- data/{lingo.cfg → config/lingo.cfg} +10 -2
- data/{lir.cfg → config/lir.cfg} +10 -2
- data/{de → dict/de}/lingo-abk.txt +0 -0
- data/{de → dict/de}/lingo-dic.txt +0 -0
- data/{de → dict/de}/lingo-mul.txt +0 -0
- data/{de → dict/de}/lingo-syn.txt +0 -0
- data/{de → dict/de}/test_dic.txt +0 -0
- data/{de → dict/de}/test_gen.txt +0 -0
- data/{de → dict/de}/test_mu2.txt +0 -0
- data/{de → dict/de}/test_mul.txt +0 -0
- data/{de → dict/de}/test_sgw.txt +0 -0
- data/{de → dict/de}/test_syn.txt +0 -0
- data/{de → dict/de}/user-dic.txt +0 -0
- data/{en → dict/en}/lingo-dic.txt +0 -0
- data/{en → dict/en}/lingo-irr.txt +0 -0
- data/{en → dict/en}/lingo-mul.txt +0 -0
- data/{en → dict/en}/lingo-syn.txt +0 -0
- data/{en → dict/en}/lingo-wdn.txt +0 -0
- data/{en → dict/en}/user-dic.txt +0 -0
- data/{ru → dict/ru}/lingo-dic.txt +0 -0
- data/{ru → dict/ru}/lingo-mul.txt +0 -0
- data/{ru → dict/ru}/lingo-syn.txt +0 -0
- data/{ru → dict/ru}/user-dic.txt +0 -0
- data/{de.lang → lang/de.lang} +1 -1
- data/{en.lang → lang/en.lang} +0 -0
- data/{ru.lang → lang/ru.lang} +0 -0
- data/lib/lingo.rb +14 -15
- data/lib/lingo/app.rb +4 -2
- data/lib/lingo/attendee.rb +23 -43
- data/lib/lingo/attendee/abbreviator.rb +5 -5
- data/lib/lingo/attendee/debugger.rb +39 -12
- data/lib/lingo/attendee/decomposer.rb +3 -4
- data/lib/lingo/attendee/dehyphenizer.rb +4 -4
- data/lib/lingo/attendee/formatter.rb +1 -3
- data/lib/lingo/attendee/multi_worder.rb +3 -4
- data/lib/lingo/attendee/noneword_filter.rb +8 -12
- data/lib/lingo/attendee/object_filter.rb +6 -3
- data/lib/lingo/attendee/sequencer.rb +5 -5
- data/lib/lingo/attendee/stemmer.rb +3 -2
- data/lib/lingo/attendee/synonymer.rb +3 -4
- data/lib/lingo/attendee/text_reader.rb +39 -38
- data/lib/lingo/attendee/text_writer.rb +10 -10
- data/lib/lingo/attendee/tokenizer.rb +63 -33
- data/lib/lingo/attendee/variator.rb +3 -7
- data/lib/lingo/attendee/vector_filter.rb +132 -65
- data/lib/lingo/attendee/word_searcher.rb +5 -3
- data/lib/lingo/buffered_attendee.rb +1 -3
- data/lib/lingo/call.rb +4 -3
- data/lib/lingo/cli.rb +5 -1
- data/lib/lingo/config.rb +11 -5
- data/lib/lingo/ctl.rb +3 -3
- data/lib/lingo/database.rb +3 -1
- data/lib/lingo/database/crypter.rb +1 -3
- data/lib/lingo/database/source.rb +3 -1
- data/lib/lingo/database/source/key_value.rb +3 -1
- data/lib/lingo/database/source/multi_key.rb +3 -1
- data/lib/lingo/database/source/multi_value.rb +3 -1
- data/lib/lingo/database/source/single_word.rb +3 -1
- data/lib/lingo/database/source/word_class.rb +3 -1
- data/lib/lingo/debug.rb +5 -5
- data/lib/lingo/{agenda_item.rb → deferred_attendee.rb} +21 -12
- data/lib/lingo/error.rb +1 -1
- data/lib/lingo/language.rb +1 -9
- data/lib/lingo/language/dictionary.rb +2 -17
- data/lib/lingo/language/grammar.rb +10 -10
- data/lib/lingo/language/lexical.rb +2 -0
- data/lib/lingo/language/lexical_hash.rb +2 -0
- data/lib/lingo/language/token.rb +17 -3
- data/lib/lingo/language/word.rb +13 -5
- data/lib/lingo/language/word_form.rb +5 -3
- data/lib/lingo/progress.rb +2 -2
- data/lib/lingo/srv.rb +1 -1
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web.rb +1 -1
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/test/attendee/ts_abbreviator.rb +4 -2
- data/test/attendee/ts_multi_worder.rb +81 -88
- data/test/attendee/ts_noneword_filter.rb +2 -2
- data/test/attendee/ts_object_filter.rb +2 -2
- data/test/attendee/ts_sequencer.rb +40 -20
- data/test/attendee/ts_stemmer.rb +52 -26
- data/test/attendee/ts_text_reader.rb +75 -56
- data/test/attendee/ts_text_writer.rb +6 -4
- data/test/attendee/ts_tokenizer.rb +304 -193
- data/test/attendee/ts_vector_filter.rb +242 -9
- data/test/ref/artikel.non +3 -0
- data/test/ref/artikel.vec +1 -4
- data/test/ref/artikel.vef +940 -0
- data/test/ref/artikel.ven +0 -3
- data/test/ref/artikel.ver +0 -3
- data/test/ref/artikel.vet +2580 -0
- data/test/ref/lir.non +34 -31
- data/test/ref/lir.seq +14 -15
- data/test/ref/lir.vec +37 -37
- data/test/ref/lir.vef +329 -0
- data/test/ref/lir.ven +329 -0
- data/test/ref/lir.ver +329 -0
- data/test/ref/lir.vet +329 -0
- data/test/test_helper.rb +29 -16
- data/test/ts_language.rb +6 -47
- metadata +74 -87
- data/lingo.rb +0 -29
- data/spec/spec_helper.rb +0 -5
data/lib/lingo/app.rb
CHANGED
@@ -55,7 +55,7 @@ class Lingo
|
|
55
55
|
ARGV.unshift(*lingo_options) if lingo_options.is_a?(Array)
|
56
56
|
end
|
57
57
|
|
58
|
-
OptionParser.new(banner,
|
58
|
+
OptionParser.new(banner, 12) { |o|
|
59
59
|
o.on('-p port', 'set the port (default is 4567)') { |v| set :port, Integer(v) }
|
60
60
|
o.on('-o addr', 'set the host (default is 0.0.0.0)') { |v| set :bind, v }
|
61
61
|
o.on('-e env', 'set the environment (default is development)') { |v| set :environment, v.to_sym }
|
@@ -63,7 +63,9 @@ class Lingo
|
|
63
63
|
o.on('-x', 'turn on the mutex lock (default is off)') { set :lock, true }
|
64
64
|
}.parse!(argv)
|
65
65
|
|
66
|
-
|
66
|
+
argv.pop if File.basename($0) == 'rackup' # rackup config
|
67
|
+
|
68
|
+
abort "Unrecognized arguments: #{argv}\n#{banner}" unless argv.empty?
|
67
69
|
|
68
70
|
ARGV.unshift(*yield) if block_given?
|
69
71
|
rescue OptionParser::ParseError => err
|
data/lib/lingo/attendee.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -28,6 +28,7 @@ require 'nuggets/string/evaluate'
|
|
28
28
|
|
29
29
|
class Lingo
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Lingo ist als universelles Indexierungssystem entworfen worden. Seine Stärke liegt in der einfachen Konfigurierbarkeit für
|
32
33
|
# spezifische Aufgaben und in der schnelle Entwicklung weiterer Funktionen durch systematischen Kapselung der Komplexität auf
|
33
34
|
# kleine Verarbeitungseinheiten. Die kleinste Verarbeitungseinheit wird Attendee genannt. Um ein gewünschtes Verarbeitungsergebnis
|
@@ -63,54 +64,43 @@ class Lingo
|
|
63
64
|
# - verarbeitet und/oder transformiert datenobjekte
|
64
65
|
# - wird gesteuert durch kommandos
|
65
66
|
# - schreibt verarbeitungsstatistiken
|
67
|
+
#++
|
66
68
|
|
67
69
|
class Attendee
|
68
70
|
|
69
71
|
include Language
|
70
72
|
|
71
|
-
STR_CMD_TALK = 'TALK'
|
72
|
-
STR_CMD_LIR = 'LIR-FORMAT'
|
73
|
-
STR_CMD_FILE = 'FILE'
|
74
|
-
STR_CMD_EOL = 'EOL'
|
75
|
-
STR_CMD_RECORD = 'RECORD'
|
76
|
-
STR_CMD_EOF = 'EOF'
|
77
|
-
|
78
73
|
DEFAULT_SKIP = [TA_PUNCTUATION, TA_OTHER].join(',')
|
79
74
|
|
80
75
|
def initialize(config, lingo)
|
81
|
-
@lingo, @config, @
|
76
|
+
@lingo, @config, @subscribers = lingo, config, []
|
82
77
|
|
83
78
|
# Make sure config exists
|
84
79
|
lingo.dictionary_config
|
85
80
|
|
86
|
-
@dic
|
87
|
-
|
88
|
-
init if self.class.method_defined?(:init)
|
81
|
+
@dic, @gra, @valid_keys = nil, nil, %w[name in out]
|
89
82
|
|
90
|
-
|
91
|
-
@can_process = self.class.method_defined?(:process)
|
83
|
+
init
|
92
84
|
|
93
|
-
|
85
|
+
unless (invalid_keys = config.keys - @valid_keys).empty?
|
86
|
+
warn(
|
87
|
+
"CONFIGURATION NOTICE: #{self.class.name.sub(/\ALingo::/, '')}" <<
|
88
|
+
" options invalid or obsolete: #{invalid_keys.sort.join(', ')}" <<
|
89
|
+
" (in #{lingo.config.config_file})"
|
90
|
+
)
|
91
|
+
end
|
94
92
|
end
|
95
93
|
|
96
|
-
attr_reader :lingo
|
94
|
+
attr_reader :lingo, :subscribers
|
97
95
|
|
98
|
-
def
|
99
|
-
|
100
|
-
end
|
101
|
-
|
102
|
-
def listen(obj)
|
103
|
-
if obj.is_a?(AgendaItem)
|
104
|
-
args = obj.to_a
|
105
|
-
control(*args) if @can_control
|
106
|
-
forward(*args) unless obj.cmd == STR_CMD_TALK || skip_command!
|
107
|
-
else
|
108
|
-
@can_process ? process(obj) : forward(obj)
|
109
|
-
end
|
96
|
+
def forward(*args)
|
97
|
+
subscribers.each { |sub| sub.process(*args) }
|
110
98
|
end
|
111
99
|
|
112
|
-
def
|
113
|
-
|
100
|
+
def command(*args)
|
101
|
+
subscribers.each { |sub|
|
102
|
+
sub.command(*args) unless sub.control(*args) == :skip_command
|
103
|
+
}
|
114
104
|
end
|
115
105
|
|
116
106
|
private
|
@@ -120,27 +110,16 @@ class Lingo
|
|
120
110
|
g && (block_given? ? !yield(w) : w.unknown?) ? g.find_compound(f) : w
|
121
111
|
end
|
122
112
|
|
123
|
-
def skip_command
|
124
|
-
@skip_command = true
|
125
|
-
end
|
126
|
-
|
127
|
-
def skip_command!
|
128
|
-
@skip_command.tap { @skip_command &&= false }
|
129
|
-
end
|
130
|
-
|
131
|
-
def forward(obj, param = nil)
|
132
|
-
talk(param ? AgendaItem.new(obj, param) : obj)
|
133
|
-
end
|
134
|
-
|
135
113
|
def flush(buffer)
|
136
114
|
buffer.each { |i| forward(i) }.clear
|
137
115
|
end
|
138
116
|
|
139
117
|
def has_key?(key)
|
140
|
-
@config
|
118
|
+
@config.key?(key)
|
141
119
|
end
|
142
120
|
|
143
121
|
def get_key(key, default = nodefault = true)
|
122
|
+
@valid_keys << key
|
144
123
|
raise MissingConfigError.new(key) if nodefault && !has_key?(key)
|
145
124
|
@config.fetch(key, default)
|
146
125
|
end
|
@@ -188,6 +167,7 @@ class Lingo
|
|
188
167
|
end
|
189
168
|
|
190
169
|
require_relative 'buffered_attendee'
|
170
|
+
require_relative 'deferred_attendee'
|
191
171
|
|
192
172
|
require_relative 'attendee/abbreviator'
|
193
173
|
require_relative 'attendee/debugger'
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Die Erkennung von Abkürzungen kann auf vielfältige Weise erfolgen. In jedem Fall
|
32
33
|
# sollte eine sichere Unterscheidung von einem Satzende-Punkt möglich sein.
|
33
34
|
# Der in Lingo gewählte Ansatz befreit den Tokenizer von dieser Arbeit und konzentriert
|
@@ -67,18 +68,17 @@ class Lingo
|
|
67
68
|
# out> :./PUNC:
|
68
69
|
# out> *EOL('test.txt')
|
69
70
|
# out> *EOF('test.txt')
|
71
|
+
#++
|
70
72
|
|
71
73
|
class Abbreviator < self
|
72
74
|
|
73
|
-
protected
|
74
|
-
|
75
75
|
def init
|
76
76
|
set_dic
|
77
77
|
@abbr = nil
|
78
78
|
end
|
79
79
|
|
80
|
-
def control(cmd,
|
81
|
-
send_abbr(@abbr) if [
|
80
|
+
def control(cmd, *)
|
81
|
+
send_abbr(@abbr) if [:RECORD, :EOF].include?(cmd)
|
82
82
|
end
|
83
83
|
|
84
84
|
def process(obj)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -24,10 +24,13 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'yaml'
|
28
|
+
|
27
29
|
class Lingo
|
28
30
|
|
29
31
|
class Attendee
|
30
32
|
|
33
|
+
#--
|
31
34
|
# Die Attendees von Lingo übergeben Daten über ihre Kommunikationskanäle und entweder kommt bei
|
32
35
|
# einer komplexen Konfiguration hinten das gewünschte Ergebnis raus oder aber auch nicht. Für den
|
33
36
|
# letzeren Fall ist der Debugger primär gedacht. Er kann an beliebige Stelle in den Datenstrom
|
@@ -84,30 +87,54 @@ class Lingo
|
|
84
87
|
# TOKEN:) *EOL('test.txt')
|
85
88
|
# LINES:) *EOF('test.txt')
|
86
89
|
# TOKEN:) *EOF('test.txt')
|
90
|
+
#++
|
87
91
|
|
88
92
|
class Debugger < self
|
89
93
|
|
90
|
-
|
94
|
+
def init(default_prompt = 'lex:) ')
|
95
|
+
@prompt = get_key('prompt', default_prompt)
|
91
96
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
97
|
+
unless @filter ||= get_key('filter', false)
|
98
|
+
@cmd_eval = get_key('ceval', 'true')
|
99
|
+
else
|
100
|
+
@cmd_eval = nil
|
101
|
+
end
|
102
|
+
|
103
|
+
@obj_eval = get_key('eval', 'true')
|
104
|
+
@preamble = get_key('preamble', true)
|
96
105
|
end
|
97
106
|
|
98
|
-
def control(cmd, param)
|
99
|
-
|
107
|
+
def control(cmd, param = nil, *)
|
108
|
+
if @cmd_eval
|
109
|
+
debug(eval(@cmd_eval)) { "*#{cmd}('#{param}')" }
|
110
|
+
elsif cmd == :EOL
|
111
|
+
:skip_command
|
112
|
+
end
|
100
113
|
end
|
101
114
|
|
102
115
|
def process(obj)
|
103
|
-
debug(
|
104
|
-
forward(obj)
|
116
|
+
debug(eval(@obj_eval)) { obj.inspect }
|
117
|
+
forward(obj) unless @filter
|
105
118
|
end
|
106
119
|
|
107
120
|
private
|
108
121
|
|
109
|
-
def debug(
|
110
|
-
|
122
|
+
def debug(condition)
|
123
|
+
send_msg((@preamble = nil; @lingo.config.to_h.to_yaml)) if @preamble
|
124
|
+
send_msg(@prompt + yield) if condition
|
125
|
+
end
|
126
|
+
|
127
|
+
def send_msg(msg)
|
128
|
+
@filter ? forward(msg) : warn(msg)
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
class DebugFilter < Debugger
|
134
|
+
|
135
|
+
def init
|
136
|
+
@filter = true
|
137
|
+
super('')
|
111
138
|
end
|
112
139
|
|
113
140
|
end
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Komposita, also zusammengesetzte Wörter, sind eine Spezialität der deutschen Sprache
|
32
33
|
# (z.B. Indexierungssystem oder Kompositumerkennung).
|
33
34
|
# Könnte man alle Kombinationen in den Wörterbüchern hinterlegen, dann würde der
|
@@ -70,17 +71,15 @@ class Lingo
|
|
70
71
|
# out> :./PUNC:
|
71
72
|
# out> *EOL('test.txt')
|
72
73
|
# out> *EOF('test.txt')
|
74
|
+
#++
|
73
75
|
|
74
76
|
class Decomposer < self
|
75
77
|
|
76
|
-
protected
|
77
|
-
|
78
78
|
def init
|
79
79
|
set_gra
|
80
80
|
end
|
81
81
|
|
82
|
-
def control(
|
83
|
-
# can control
|
82
|
+
def control(*)
|
84
83
|
end
|
85
84
|
|
86
85
|
def process(obj)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Der Dehyphenizer ... muss noch dokumentiert werden
|
32
33
|
#
|
33
34
|
# === Mögliche Verlinkung
|
@@ -65,11 +66,10 @@ class Lingo
|
|
65
66
|
# out> :./PUNC:
|
66
67
|
# out> *EOL('test.txt')
|
67
68
|
# out> *EOF('test.txt')
|
69
|
+
#++
|
68
70
|
|
69
71
|
class Dehyphenizer < BufferedAttendee
|
70
72
|
|
71
|
-
protected
|
72
|
-
|
73
73
|
def init
|
74
74
|
set_dic
|
75
75
|
set_gra
|
@@ -79,7 +79,7 @@ class Lingo
|
|
79
79
|
@expected_tokens_in_buffer, @eof_handling = 2, false
|
80
80
|
end
|
81
81
|
|
82
|
-
def control(cmd,
|
82
|
+
def control(cmd, *)
|
83
83
|
control_multi(cmd)
|
84
84
|
end
|
85
85
|
|
@@ -30,14 +30,12 @@ class Lingo
|
|
30
30
|
|
31
31
|
class Formatter < TextWriter
|
32
32
|
|
33
|
-
protected
|
34
|
-
|
35
33
|
def init
|
36
34
|
super
|
37
35
|
|
38
36
|
@ext = get_key('ext', '-')
|
39
37
|
@format = get_key('format', '%s')
|
40
|
-
@map = get_key('map', Hash.
|
38
|
+
@map = get_key('map', Hash.nest { |k| k })
|
41
39
|
|
42
40
|
@no_puts = true
|
43
41
|
end
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Mit der bisher beschriebenen Vorgehensweise werden die durch den Tokenizer erkannten
|
32
33
|
# Token aufgelöst und in Words verwandelt und über den Abbreviator und Decomposer auch
|
33
34
|
# Spezialfälle behandelt, die einzelne Wörter betreffen.
|
@@ -72,11 +73,10 @@ class Lingo
|
|
72
73
|
# out> :./PUNC:
|
73
74
|
# out> *EOL('test.txt')
|
74
75
|
# out> *EOF('test.txt')
|
76
|
+
#++
|
75
77
|
|
76
78
|
class MultiWorder < BufferedAttendee
|
77
79
|
|
78
|
-
protected
|
79
|
-
|
80
80
|
def init
|
81
81
|
# combine lexical variants?
|
82
82
|
#
|
@@ -112,7 +112,7 @@ class Lingo
|
|
112
112
|
@expected_tokens_in_buffer, @eof_handling = 3, false
|
113
113
|
end
|
114
114
|
|
115
|
-
def control(cmd,
|
115
|
+
def control(cmd, *)
|
116
116
|
control_multi(cmd)
|
117
117
|
end
|
118
118
|
|
@@ -172,7 +172,6 @@ class Lingo
|
|
172
172
|
WA_MULTIWORD, lex.select { |l| l.is_a?(Lexical) }))
|
173
173
|
end
|
174
174
|
|
175
|
-
# Prüft einen definiert langen Schlüssel ab Position 0 im Buffer
|
176
175
|
def check_multiword_key(len)
|
177
176
|
return [] if valid_tokens_in_buffer < len
|
178
177
|
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Der NonewordFilter ermöglicht es, alle nicht erkannten Wörter aus dem Datenstrom zu
|
32
33
|
# selektieren und weiterzuleiten. Im Prinzip werden alle erkannten Wörter gefiltert.
|
33
34
|
# Bei einem Indexierungslauf können so alle nicht durch den Wordsearcher erkannten Wörter,
|
@@ -65,11 +66,10 @@ class Lingo
|
|
65
66
|
# out> *FILE('test.txt')
|
66
67
|
# out> "lingo"
|
67
68
|
# out> *EOF('test.txt')
|
69
|
+
#++
|
68
70
|
|
69
71
|
class NonewordFilter < self
|
70
72
|
|
71
|
-
protected
|
72
|
-
|
73
73
|
def init
|
74
74
|
@sort = get_key('sort', !ENV['LINGO_NO_SORT'])
|
75
75
|
@dict = get_key('dict', false)
|
@@ -78,16 +78,12 @@ class Lingo
|
|
78
78
|
@nonewords = []
|
79
79
|
end
|
80
80
|
|
81
|
-
def control(cmd,
|
81
|
+
def control(cmd, *)
|
82
82
|
case cmd
|
83
|
-
when
|
84
|
-
|
85
|
-
when
|
86
|
-
|
87
|
-
when STR_CMD_RECORD
|
88
|
-
send_nonewords unless @dict
|
89
|
-
when STR_CMD_EOF
|
90
|
-
send_nonewords
|
83
|
+
when :FILE then @nonewords.clear
|
84
|
+
when :EOL then :skip_command
|
85
|
+
when :RECORD then send_nonewords unless @dict
|
86
|
+
when :EOF then send_nonewords
|
91
87
|
end
|
92
88
|
end
|
93
89
|
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -28,6 +28,7 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Attendee
|
30
30
|
|
31
|
+
#--
|
31
32
|
# Der ObjectFilter ermöglicht es, beliebige Objekte aus dem Datenstrom herauszufiltern.
|
32
33
|
# Um die gewünschten Objekte zu identifizieren, sind ein paar Ruby-Kenntnisse und das Wissen
|
33
34
|
# um die Lingo Klassen notwendig. Hier sollen kurz die häufigsten Fälle angesprochen werden:
|
@@ -67,15 +68,17 @@ class Lingo
|
|
67
68
|
# out> <Indexierung = [(indexierung/s)]>
|
68
69
|
# out> *EOL('test.txt')
|
69
70
|
# out> *EOF('test.txt')
|
71
|
+
#++
|
70
72
|
|
71
73
|
class ObjectFilter < self
|
72
74
|
|
73
|
-
protected
|
74
|
-
|
75
75
|
def init
|
76
76
|
@obj_eval = get_key('objects', 'true')
|
77
77
|
end
|
78
78
|
|
79
|
+
def control(*)
|
80
|
+
end
|
81
|
+
|
79
82
|
def process(obj)
|
80
83
|
forward(obj) if eval(@obj_eval)
|
81
84
|
end
|