lingo 1.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +1 -0
- data/COPYING +663 -0
- data/ChangeLog +754 -0
- data/README +322 -0
- data/Rakefile +100 -0
- data/TODO +28 -0
- data/bin/lingo +5 -0
- data/bin/lingoctl +6 -0
- data/de.lang +121 -0
- data/de/lingo-abk.txt +74 -0
- data/de/lingo-dic.txt +56822 -0
- data/de/lingo-mul.txt +3209 -0
- data/de/lingo-syn.txt +14841 -0
- data/de/test_dic.txt +24 -0
- data/de/test_mul.txt +17 -0
- data/de/test_mul2.txt +2 -0
- data/de/test_singleword.txt +2 -0
- data/de/test_syn.txt +4 -0
- data/de/test_syn2.txt +1 -0
- data/de/user-dic.txt +10 -0
- data/en.lang +113 -0
- data/en/lingo-dic.txt +55434 -0
- data/en/lingo-mul.txt +456 -0
- data/en/user-dic.txt +5 -0
- data/info/Objekte.png +0 -0
- data/info/Typen.png +0 -0
- data/info/database.png +0 -0
- data/info/db_small.png +0 -0
- data/info/download.png +0 -0
- data/info/gpl-hdr.txt +27 -0
- data/info/kerze.png +0 -0
- data/info/language.png +0 -0
- data/info/lingo.png +0 -0
- data/info/logo.png +0 -0
- data/info/meeting.png +0 -0
- data/info/types.png +0 -0
- data/lib/lingo.rb +321 -0
- data/lib/lingo/attendee/abbreviator.rb +119 -0
- data/lib/lingo/attendee/debugger.rb +111 -0
- data/lib/lingo/attendee/decomposer.rb +101 -0
- data/lib/lingo/attendee/dehyphenizer.rb +167 -0
- data/lib/lingo/attendee/multiworder.rb +301 -0
- data/lib/lingo/attendee/noneword_filter.rb +103 -0
- data/lib/lingo/attendee/objectfilter.rb +86 -0
- data/lib/lingo/attendee/sequencer.rb +190 -0
- data/lib/lingo/attendee/synonymer.rb +105 -0
- data/lib/lingo/attendee/textreader.rb +237 -0
- data/lib/lingo/attendee/textwriter.rb +196 -0
- data/lib/lingo/attendee/tokenizer.rb +218 -0
- data/lib/lingo/attendee/variator.rb +185 -0
- data/lib/lingo/attendee/vector_filter.rb +158 -0
- data/lib/lingo/attendee/wordsearcher.rb +96 -0
- data/lib/lingo/attendees.rb +289 -0
- data/lib/lingo/cli.rb +62 -0
- data/lib/lingo/config.rb +104 -0
- data/lib/lingo/const.rb +131 -0
- data/lib/lingo/ctl.rb +173 -0
- data/lib/lingo/database.rb +587 -0
- data/lib/lingo/language.rb +530 -0
- data/lib/lingo/modules.rb +98 -0
- data/lib/lingo/types.rb +285 -0
- data/lib/lingo/utilities.rb +40 -0
- data/lib/lingo/version.rb +27 -0
- data/lingo-all.cfg +85 -0
- data/lingo-call.cfg +15 -0
- data/lingo.cfg +78 -0
- data/lingo.rb +3 -0
- data/lir.cfg +72 -0
- data/porter/stem.cfg +311 -0
- data/porter/stem.rb +150 -0
- data/spec/spec_helper.rb +0 -0
- data/test.cfg +79 -0
- data/test/attendee/ts_abbreviator.rb +35 -0
- data/test/attendee/ts_decomposer.rb +31 -0
- data/test/attendee/ts_multiworder.rb +390 -0
- data/test/attendee/ts_noneword_filter.rb +19 -0
- data/test/attendee/ts_objectfilter.rb +19 -0
- data/test/attendee/ts_sequencer.rb +43 -0
- data/test/attendee/ts_synonymer.rb +33 -0
- data/test/attendee/ts_textreader.rb +58 -0
- data/test/attendee/ts_textwriter.rb +98 -0
- data/test/attendee/ts_tokenizer.rb +32 -0
- data/test/attendee/ts_variator.rb +24 -0
- data/test/attendee/ts_vector_filter.rb +62 -0
- data/test/attendee/ts_wordsearcher.rb +119 -0
- data/test/lir.csv +3 -0
- data/test/lir.txt +12 -0
- data/test/lir2.txt +12 -0
- data/test/mul.txt +1 -0
- data/test/ref/artikel.mul +1 -0
- data/test/ref/artikel.non +159 -0
- data/test/ref/artikel.seq +270 -0
- data/test/ref/artikel.syn +16 -0
- data/test/ref/artikel.vec +928 -0
- data/test/ref/artikel.ven +928 -0
- data/test/ref/artikel.ver +928 -0
- data/test/ref/lir.csv +328 -0
- data/test/ref/lir.mul +1 -0
- data/test/ref/lir.non +274 -0
- data/test/ref/lir.seq +249 -0
- data/test/ref/lir.syn +94 -0
- data/test/test_helper.rb +113 -0
- data/test/ts_database.rb +269 -0
- data/test/ts_language.rb +396 -0
- data/txt/artikel-en.txt +157 -0
- data/txt/artikel.txt +170 -0
- data/txt/lir.txt +1317 -0
- metadata +211 -0
data/en/user-dic.txt
ADDED
data/info/Objekte.png
ADDED
Binary file
|
data/info/Typen.png
ADDED
Binary file
|
data/info/database.png
ADDED
Binary file
|
data/info/db_small.png
ADDED
Binary file
|
data/info/download.png
ADDED
Binary file
|
data/info/gpl-hdr.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#--
|
2
|
+
# LINGO ist ein Indexierungssystem mit Grundformreduktion, Kompositumzerlegung,
|
3
|
+
# Mehrworterkennung und Relationierung.
|
4
|
+
#
|
5
|
+
# Copyright (C) 2005-2007 John Vorhauer
|
6
|
+
# Copyright (C) 2007-2011 John Vorhauer, Jens Wille
|
7
|
+
#
|
8
|
+
# This program is free software; you can redistribute it and/or modify it under
|
9
|
+
# the terms of the GNU Affero General Public License as published by the Free
|
10
|
+
# Software Foundation; either version 3 of the License, or (at your option)
|
11
|
+
# any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
14
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
15
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
16
|
+
# details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU Affero General Public License along
|
19
|
+
# with this program; if not, write to the Free Software Foundation, Inc.,
|
20
|
+
# 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
|
21
|
+
#
|
22
|
+
# For more information visit http://www.lex-lingo.de or contact me at
|
23
|
+
# welcomeATlex-lingoDOTde near 50°55'N+6°55'E.
|
24
|
+
#
|
25
|
+
# Lex Lingo rules from here on
|
26
|
+
#++
|
27
|
+
|
data/info/kerze.png
ADDED
Binary file
|
data/info/language.png
ADDED
Binary file
|
data/info/lingo.png
ADDED
Binary file
|
data/info/logo.png
ADDED
Binary file
|
data/info/meeting.png
ADDED
Binary file
|
data/info/types.png
ADDED
Binary file
|
data/lib/lingo.rb
ADDED
@@ -0,0 +1,321 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
# LINGO ist ein Indexierungssystem mit Grundformreduktion, Kompositumzerlegung,
|
5
|
+
# Mehrworterkennung und Relationierung.
|
6
|
+
#
|
7
|
+
# Copyright (C) 2005-2007 John Vorhauer
|
8
|
+
# Copyright (C) 2007-2011 John Vorhauer, Jens Wille
|
9
|
+
#
|
10
|
+
# This program is free software; you can redistribute it and/or modify it under
|
11
|
+
# the terms of the GNU Affero General Public License as published by the Free
|
12
|
+
# Software Foundation; either version 3 of the License, or (at your option)
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
16
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
17
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
18
|
+
# details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License along
|
21
|
+
# with this program; if not, write to the Free Software Foundation, Inc.,
|
22
|
+
# 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
|
23
|
+
#
|
24
|
+
# For more information visit http://www.lex-lingo.de or contact me at
|
25
|
+
# welcomeATlex-lingoDOTde near 50°55'N+6°55'E.
|
26
|
+
#
|
27
|
+
# Lex Lingo rules from here on
|
28
|
+
#++
|
29
|
+
|
30
|
+
require 'stringio'
|
31
|
+
require 'benchmark'
|
32
|
+
require 'nuggets/env/user_home'
|
33
|
+
require 'nuggets/numeric/duration'
|
34
|
+
|
35
|
+
require_relative 'lingo/config'
|
36
|
+
require_relative 'lingo/attendees'
|
37
|
+
require_relative 'lingo/attendee/abbreviator'
|
38
|
+
require_relative 'lingo/attendee/debugger'
|
39
|
+
require_relative 'lingo/attendee/decomposer'
|
40
|
+
require_relative 'lingo/attendee/dehyphenizer'
|
41
|
+
require_relative 'lingo/attendee/multiworder'
|
42
|
+
require_relative 'lingo/attendee/noneword_filter'
|
43
|
+
require_relative 'lingo/attendee/objectfilter'
|
44
|
+
require_relative 'lingo/attendee/variator'
|
45
|
+
require_relative 'lingo/attendee/sequencer'
|
46
|
+
require_relative 'lingo/attendee/synonymer'
|
47
|
+
require_relative 'lingo/attendee/textreader'
|
48
|
+
require_relative 'lingo/attendee/textwriter'
|
49
|
+
require_relative 'lingo/attendee/tokenizer'
|
50
|
+
require_relative 'lingo/attendee/vector_filter'
|
51
|
+
require_relative 'lingo/attendee/wordsearcher'
|
52
|
+
require_relative 'lingo/version'
|
53
|
+
|
54
|
+
class Lingo
|
55
|
+
|
56
|
+
# The system-wide Lingo directory (+LINGO_BASE+).
|
57
|
+
BASE = ENV['LINGO_BASE'] || File.expand_path('../..', __FILE__)
|
58
|
+
|
59
|
+
# The user's personal Lingo directory (+LINGO_HOME+).
|
60
|
+
HOME = ENV['LINGO_HOME'] || File.join(ENV.user_home, '.lingo')
|
61
|
+
|
62
|
+
# The local Lingo directory (+LINGO_CURR+).
|
63
|
+
CURR = ENV['LINGO_CURR'] || '.'
|
64
|
+
|
65
|
+
# The search path for Lingo dictionary and configuration files.
|
66
|
+
PATH = ENV['LINGO_PATH'] || [CURR, HOME, BASE].join(File::PATH_SEPARATOR)
|
67
|
+
|
68
|
+
FIND_OPTIONS = {
|
69
|
+
config: { dir: 'config', ext: 'cfg' },
|
70
|
+
dict: { dir: 'dict', ext: 'txt' },
|
71
|
+
lang: { dir: 'lang', ext: 'lang' },
|
72
|
+
store: { dir: 'store', ext: nil }
|
73
|
+
}
|
74
|
+
|
75
|
+
class << self
|
76
|
+
|
77
|
+
def talk(*args)
|
78
|
+
new(*args).talk
|
79
|
+
end
|
80
|
+
|
81
|
+
def call(cfg = find(:config, 'lingo-call'), args = [], &block)
|
82
|
+
Call.new(['-c', cfg, *args]).call(&block)
|
83
|
+
end
|
84
|
+
|
85
|
+
def error(msg)
|
86
|
+
abort(msg)
|
87
|
+
end
|
88
|
+
|
89
|
+
def list(type, options = {})
|
90
|
+
options = options_for(type, options)
|
91
|
+
path = path_for(options)
|
92
|
+
|
93
|
+
glob = file_with_ext('*', options)
|
94
|
+
glob = File.join('??', glob) if type == :dict
|
95
|
+
|
96
|
+
[].tap { |list| walk(path, options) { |dir|
|
97
|
+
Dir[File.join(dir, glob)].sort.each { |file|
|
98
|
+
pn = Pathname.new(file)
|
99
|
+
list << realpath_for(pn, path) if pn.file?
|
100
|
+
}
|
101
|
+
} }
|
102
|
+
end
|
103
|
+
|
104
|
+
def find(type, file, options = {})
|
105
|
+
if options.is_a?(Array)
|
106
|
+
path = options
|
107
|
+
options = options_for(type)
|
108
|
+
else
|
109
|
+
options = options_for(type, options)
|
110
|
+
path = path_for(options)
|
111
|
+
end
|
112
|
+
|
113
|
+
type = :file if type != :store
|
114
|
+
send("find_#{type}", file, path, options)
|
115
|
+
rescue RuntimeError, Errno::ENOENT => err
|
116
|
+
block_given? ? yield(err) : raise
|
117
|
+
end
|
118
|
+
|
119
|
+
def basename(type, file)
|
120
|
+
dir, name = File.split(file)
|
121
|
+
type != :dict ? name : File.join(File.basename(dir), name)
|
122
|
+
end
|
123
|
+
|
124
|
+
def basepath(type, file)
|
125
|
+
File.join(options_for(type)[:dir], basename(type, file))
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
def find_file(file, path, options)
|
131
|
+
pn = Pathname.new(file_with_ext(file, options)).cleanpath
|
132
|
+
|
133
|
+
walk(path, options) { |dir|
|
134
|
+
pn2 = pn.expand_path(dir)
|
135
|
+
pn = pn2 and break if pn2.exist?
|
136
|
+
} if pn.relative?
|
137
|
+
|
138
|
+
realpath_for(pn, path)
|
139
|
+
end
|
140
|
+
|
141
|
+
def find_store(file, path, options)
|
142
|
+
base = basename(:dict, find(:dict, file, path))
|
143
|
+
|
144
|
+
walk(path.reverse, options, false) { |dir|
|
145
|
+
Pathname.new(dir).ascend { |r|
|
146
|
+
break true if r.file?
|
147
|
+
|
148
|
+
return File.join(dir, base).tap { |s|
|
149
|
+
s.chomp!(File.extname(s))
|
150
|
+
} if r.writable?
|
151
|
+
|
152
|
+
break true if r.exist?
|
153
|
+
}
|
154
|
+
}
|
155
|
+
|
156
|
+
raise 'No writable store found in search path'
|
157
|
+
end
|
158
|
+
|
159
|
+
def options_for(type, options = {})
|
160
|
+
if find_options = FIND_OPTIONS[type]
|
161
|
+
options = find_options.merge(options)
|
162
|
+
else
|
163
|
+
raise ArgumentError, "Invalid type `#{type.inspect}'", caller(1)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def path_for(options)
|
168
|
+
options[:path] || PATH.split(File::PATH_SEPARATOR)
|
169
|
+
end
|
170
|
+
|
171
|
+
def file_with_ext(file, options)
|
172
|
+
ext = options[:ext]
|
173
|
+
ext && File.extname(file).empty? ? "#{file}.#{ext}" : file
|
174
|
+
end
|
175
|
+
|
176
|
+
def walk(path, options, legacy = true)
|
177
|
+
dirs = [options[:dir].to_s]
|
178
|
+
dirs << '' if legacy
|
179
|
+
dirs.uniq!
|
180
|
+
|
181
|
+
seen = Hash.new { |h, k| h[k] = true; false }
|
182
|
+
|
183
|
+
path.each { |d|
|
184
|
+
next if seen[d = File.expand_path(d)]
|
185
|
+
dirs.each { |i| yield File.join(d, i) } or break
|
186
|
+
}
|
187
|
+
end
|
188
|
+
|
189
|
+
def realpath_for(pn, path)
|
190
|
+
pn.realpath(path.first).to_s
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
attr_reader :dictionaries, :report_status, :report_time
|
196
|
+
|
197
|
+
def initialize(*args)
|
198
|
+
@config_args = args
|
199
|
+
reset(false)
|
200
|
+
end
|
201
|
+
|
202
|
+
def config
|
203
|
+
@config ||= Config.new(*@config_args)
|
204
|
+
end
|
205
|
+
|
206
|
+
def dictionary_config
|
207
|
+
@dictionary_config ||= config['language/dictionary']
|
208
|
+
end
|
209
|
+
|
210
|
+
def database_config(id)
|
211
|
+
dictionary_config['databases'][id]
|
212
|
+
end
|
213
|
+
|
214
|
+
def talk
|
215
|
+
invite
|
216
|
+
start
|
217
|
+
ensure
|
218
|
+
reset
|
219
|
+
end
|
220
|
+
|
221
|
+
def invite(list = config['meeting/attendees'])
|
222
|
+
supplier = Hash.new { |h, k| h[k] = [] }
|
223
|
+
subscriber = Hash.new { |h, k| h[k] = [] }
|
224
|
+
|
225
|
+
last_link, auto_link = '', 0
|
226
|
+
|
227
|
+
list.each { |hash|
|
228
|
+
# {'attendee' => {'name'=>'Attendee', 'in'=>'nase', 'out'=>'ohr', 'param'=>'hase'}}
|
229
|
+
cfg = hash.values.first.merge('name' => hash.keys.first.capitalize)
|
230
|
+
|
231
|
+
%w[in out].each { |key| (cfg[key] ||= '').downcase! }
|
232
|
+
|
233
|
+
cfg['in'] = last_link if cfg['in'].empty?
|
234
|
+
cfg['out'] = "auto_link_out_#{auto_link += 1}" if cfg['out'].empty?
|
235
|
+
last_link = cfg['out']
|
236
|
+
|
237
|
+
data = config["language/attendees/#{cfg['name'].downcase}"]
|
238
|
+
cfg.update(data) if data
|
239
|
+
|
240
|
+
attendee = Attendee.const_get(cfg['name']).new(cfg, self)
|
241
|
+
@attendees << attendee
|
242
|
+
|
243
|
+
cfg['in'].split(STRING_SEPERATOR_PATTERN).each { |interest|
|
244
|
+
subscriber[interest] << attendee
|
245
|
+
}
|
246
|
+
cfg['out'].split(STRING_SEPERATOR_PATTERN).each { |theme|
|
247
|
+
supplier[theme] << attendee
|
248
|
+
}
|
249
|
+
}
|
250
|
+
|
251
|
+
supplier.each { |channel, attendees| attendees.each { |att|
|
252
|
+
att.add_subscriber(subscriber[channel])
|
253
|
+
} }
|
254
|
+
end
|
255
|
+
|
256
|
+
def start(report_status = config['status'], report_time = config['perfmon'])
|
257
|
+
@report_status, @report_time = report_status, report_time
|
258
|
+
|
259
|
+
time = Benchmark.realtime {
|
260
|
+
@attendees.first.listen(AgendaItem.new(STR_CMD_TALK))
|
261
|
+
}
|
262
|
+
|
263
|
+
if report_status || report_time
|
264
|
+
config.stderr.puts "Require protocol...\n#{separator = '-' * 61}"
|
265
|
+
@attendees.first.listen(AgendaItem.new(STR_CMD_STATUS))
|
266
|
+
config.stderr.puts "#{separator}\nThe duration of the meeting was #{time.to_hms(2)}"
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
def reset(close = true)
|
271
|
+
dictionaries.each(&:close) if close
|
272
|
+
@dictionaries, @attendees = [], []
|
273
|
+
end
|
274
|
+
|
275
|
+
class Call < Lingo
|
276
|
+
|
277
|
+
def initialize(args = [])
|
278
|
+
super(args, StringIO.new, StringIO.new, StringIO.new)
|
279
|
+
end
|
280
|
+
|
281
|
+
def call
|
282
|
+
invite
|
283
|
+
|
284
|
+
if block_given?
|
285
|
+
begin
|
286
|
+
yield self
|
287
|
+
ensure
|
288
|
+
reset
|
289
|
+
end
|
290
|
+
else
|
291
|
+
self
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
def talk(str)
|
296
|
+
config.stdin.reopen(str)
|
297
|
+
|
298
|
+
start
|
299
|
+
|
300
|
+
%w[stdout stderr].flat_map { |key|
|
301
|
+
io = config.send(key).tap(&:rewind)
|
302
|
+
io.readlines.each(&:chomp!).tap {
|
303
|
+
io.truncate(0)
|
304
|
+
io.rewind
|
305
|
+
}
|
306
|
+
}.tap { |res|
|
307
|
+
if block_given?
|
308
|
+
res.map!(&Proc.new)
|
309
|
+
else
|
310
|
+
res.sort!
|
311
|
+
res.uniq!
|
312
|
+
end
|
313
|
+
}
|
314
|
+
end
|
315
|
+
|
316
|
+
end
|
317
|
+
|
318
|
+
end
|
319
|
+
|
320
|
+
require 'nuggets/util/pluggable'
|
321
|
+
Util::Pluggable.load_plugins_for(Lingo)
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
# LINGO ist ein Indexierungssystem mit Grundformreduktion, Kompositumzerlegung,
|
5
|
+
# Mehrworterkennung und Relationierung.
|
6
|
+
#
|
7
|
+
# Copyright (C) 2005-2007 John Vorhauer
|
8
|
+
# Copyright (C) 2007-2011 John Vorhauer, Jens Wille
|
9
|
+
#
|
10
|
+
# This program is free software; you can redistribute it and/or modify it under
|
11
|
+
# the terms of the GNU Affero General Public License as published by the Free
|
12
|
+
# Software Foundation; either version 3 of the License, or (at your option)
|
13
|
+
# any later version.
|
14
|
+
#
|
15
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
16
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
17
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
18
|
+
# details.
|
19
|
+
#
|
20
|
+
# You should have received a copy of the GNU Affero General Public License along
|
21
|
+
# with this program; if not, write to the Free Software Foundation, Inc.,
|
22
|
+
# 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
|
23
|
+
#
|
24
|
+
# For more information visit http://www.lex-lingo.de or contact me at
|
25
|
+
# welcomeATlex-lingoDOTde near 50°55'N+6°55'E.
|
26
|
+
#
|
27
|
+
# Lex Lingo rules from here on
|
28
|
+
#++
|
29
|
+
|
30
|
+
class Lingo
|
31
|
+
|
32
|
+
# Die Erkennung von Abkürzungen kann auf vielfältige Weise erfolgen. In jedem Fall
|
33
|
+
# sollte eine sichere Unterscheidung von einem Satzende-Punkt möglich sein.
|
34
|
+
# Der in Lingo gewählte Ansatz befreit den Tokenizer von dieser Arbeit und konzentriert
|
35
|
+
# die Erkennung in diesem Attendee.
|
36
|
+
# Sobald der Abbreviator im Datenstrom auf ein Punkt trifft (Token = <tt>:./PUNC:</tt>),
|
37
|
+
# prüft er das vorhergehende Token auf eine gültige Abkürzung im Abkürzungs-Wörterbuch.
|
38
|
+
# Wird es als Abkürzung erkannt, dann wird das Token in ein Word gewandelt und das
|
39
|
+
# Punkt-Token aus dem Zeichenstrom entfernt.
|
40
|
+
#
|
41
|
+
# === Mögliche Verlinkung
|
42
|
+
# Erwartet:: Daten des Typs *Token* z.B. von Tokenizer
|
43
|
+
# Erzeugt:: Leitet Token weiter und wandelt erkannte Abkürzungen in den Typ *Word* z.B. für Wordsearcher
|
44
|
+
#
|
45
|
+
# === Parameter
|
46
|
+
# Kursiv dargestellte Parameter sind optional (ggf. mit Angabe der Voreinstellung).
|
47
|
+
# Alle anderen Parameter müssen zwingend angegeben werden.
|
48
|
+
# <b>in</b>:: siehe allgemeine Beschreibung des Attendee
|
49
|
+
# <b>out</b>:: siehe allgemeine Beschreibung des Attendee
|
50
|
+
# <b>source</b>:: siehe allgemeine Beschreibung des Dictionary
|
51
|
+
# <b><i>mode</i></b>:: (Standard: all) siehe allgemeine Beschreibung des Dictionary
|
52
|
+
#
|
53
|
+
# === Beispiele
|
54
|
+
# Bei der Verarbeitung einer normalen Textdatei mit der Ablaufkonfiguration <tt>t1.cfg</tt>
|
55
|
+
# meeting:
|
56
|
+
# attendees:
|
57
|
+
# - textreader: { out: lines, files: '$(files)' }
|
58
|
+
# - tokenizer: { in: lines, out: token }
|
59
|
+
# - abbreviator: { in: token, out: abbrev, source: 'sys-abk' }
|
60
|
+
# - debugger: { in: abbrev, prompt: 'out>' }
|
61
|
+
# ergibt die Ausgabe über den Debugger: <tt>lingo -c t1 test.txt</tt>
|
62
|
+
# out> *FILE('test.txt')
|
63
|
+
# out> :Dies/WORD:
|
64
|
+
# out> :ist/WORD:
|
65
|
+
# out> <ggf. = [(gegebenenfalls/w)]>
|
66
|
+
# out> :eine/WORD:
|
67
|
+
# out> :Abk³rzung/WORD:
|
68
|
+
# out> :./PUNC:
|
69
|
+
# out> *EOL('test.txt')
|
70
|
+
# out> *EOF('test.txt')
|
71
|
+
|
72
|
+
class Attendee::Abbreviator < BufferedAttendee
|
73
|
+
|
74
|
+
protected
|
75
|
+
|
76
|
+
def init
|
77
|
+
# Wörterbuch bereitstellen
|
78
|
+
src = get_array('source')
|
79
|
+
mod = get_key('mode', 'all')
|
80
|
+
@dic = Dictionary.new({'source'=>src, 'mode'=>mod}, @lingo)
|
81
|
+
end
|
82
|
+
|
83
|
+
def control(cmd, par)
|
84
|
+
@dic.report.each_pair { |key, value| set(key, value) } if cmd == STR_CMD_STATUS
|
85
|
+
|
86
|
+
# Jedes Control-Object ist auch Auslöser der Verarbeitung
|
87
|
+
process_buffer
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def process_buffer?
|
93
|
+
@buffer[-1].kind_of?(Token) && @buffer[-1].form == CHAR_PUNCT
|
94
|
+
end
|
95
|
+
|
96
|
+
def process_buffer
|
97
|
+
if @buffer.size < 2
|
98
|
+
forward_buffer
|
99
|
+
return
|
100
|
+
end
|
101
|
+
|
102
|
+
# Wort vor dem Punkt im Abkürzungswörterbuch suchen
|
103
|
+
if @buffer[-2].kind_of?(Token)
|
104
|
+
inc('Anzahl gesuchter Abkürzungen')
|
105
|
+
abbr = @dic.find_word(@buffer[-2].form)
|
106
|
+
if abbr.attr == WA_IDENTIFIED
|
107
|
+
inc('Anzahl gefundener Abkürzungen')
|
108
|
+
abbr.form += CHAR_PUNCT
|
109
|
+
@buffer[-2] = abbr
|
110
|
+
@buffer.delete_at(-1)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
forward_buffer
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|