lingo 1.8.1 → 1.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +23 -5
- data/README +1 -1
- data/Rakefile +5 -7
- data/TODO +2 -0
- data/bin/lingo +5 -1
- data/de.lang +1 -1
- data/en/lingo-syn.txt +0 -0
- data/en.lang +2 -1
- data/lib/lingo/attendee/abbreviator.rb +8 -9
- data/lib/lingo/attendee/debugger.rb +5 -4
- data/lib/lingo/attendee/decomposer.rb +8 -3
- data/lib/lingo/attendee/dehyphenizer.rb +19 -63
- data/lib/lingo/attendee/formatter.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +67 -155
- data/lib/lingo/attendee/noneword_filter.rb +16 -9
- data/lib/lingo/attendee/object_filter.rb +1 -1
- data/lib/lingo/attendee/sequencer.rb +32 -63
- data/lib/lingo/attendee/stemmer/porter.rb +343 -0
- data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
- data/lib/lingo/attendee/synonymer.rb +10 -9
- data/lib/lingo/attendee/text_reader.rb +102 -76
- data/lib/lingo/attendee/text_writer.rb +23 -26
- data/lib/lingo/attendee/tokenizer.rb +13 -27
- data/lib/lingo/attendee/variator.rb +26 -66
- data/lib/lingo/attendee/vector_filter.rb +42 -43
- data/lib/lingo/attendee/word_searcher.rb +6 -7
- data/lib/lingo/attendee.rb +25 -7
- data/lib/lingo/buffered_attendee.rb +36 -10
- data/lib/lingo/cachable.rb +8 -8
- data/lib/lingo/config.rb +5 -6
- data/lib/lingo/ctl.rb +2 -3
- data/lib/lingo/database/crypter.rb +9 -26
- data/lib/lingo/database/gdbm_store.rb +3 -5
- data/lib/lingo/database/libcdb_store.rb +4 -6
- data/lib/lingo/database/sdbm_store.rb +11 -6
- data/lib/lingo/database/show_progress.rb +3 -43
- data/lib/lingo/database/source/key_value.rb +2 -6
- data/lib/lingo/database/source/multi_key.rb +3 -5
- data/lib/lingo/database/source/multi_value.rb +2 -6
- data/lib/lingo/database/source/single_word.rb +4 -6
- data/lib/lingo/database/source/word_class.rb +4 -10
- data/lib/lingo/database/source.rb +20 -18
- data/lib/lingo/database.rb +84 -59
- data/lib/lingo/error.rb +57 -1
- data/lib/lingo/language/dictionary.rb +21 -18
- data/lib/lingo/language/grammar.rb +40 -49
- data/lib/lingo/language/lexical.rb +6 -6
- data/lib/lingo/language/lexical_hash.rb +6 -0
- data/lib/lingo/language/word.rb +32 -15
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/language.rb +14 -25
- data/lib/lingo/reportable.rb +12 -10
- data/lib/lingo/show_progress.rb +81 -0
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo.rb +63 -24
- data/lingo-call.cfg +6 -10
- data/lingo.cfg +60 -44
- data/lir.cfg +42 -41
- data/test/attendee/ts_abbreviator.rb +3 -5
- data/test/attendee/ts_decomposer.rb +3 -5
- data/test/attendee/ts_multi_worder.rb +87 -145
- data/test/attendee/ts_noneword_filter.rb +5 -3
- data/test/attendee/ts_object_filter.rb +5 -3
- data/test/attendee/ts_sequencer.rb +3 -5
- data/test/attendee/ts_stemmer.rb +309 -0
- data/test/attendee/ts_synonymer.rb +15 -11
- data/test/attendee/ts_text_reader.rb +12 -15
- data/test/attendee/ts_text_writer.rb +24 -29
- data/test/attendee/ts_tokenizer.rb +9 -7
- data/test/attendee/ts_variator.rb +4 -4
- data/test/attendee/ts_vector_filter.rb +24 -16
- data/test/attendee/ts_word_searcher.rb +20 -36
- data/test/{lir.csv → lir.vec} +0 -0
- data/test/ref/artikel.vec +943 -943
- data/test/ref/artikel.ven +943 -943
- data/test/ref/lir.non +201 -201
- data/test/ref/lir.seq +178 -178
- data/test/ref/lir.syn +49 -49
- data/test/ref/lir.vec +329 -0
- data/test/test_helper.rb +20 -36
- data/test/ts_database.rb +10 -10
- data/test/ts_language.rb +279 -319
- metadata +93 -104
- data/info/Objekte.png +0 -0
- data/info/Typen.png +0 -0
- data/info/database.png +0 -0
- data/info/db_small.png +0 -0
- data/info/download.png +0 -0
- data/info/kerze.png +0 -0
- data/info/language.png +0 -0
- data/info/lingo.png +0 -0
- data/info/logo.png +0 -0
- data/info/meeting.png +0 -0
- data/info/types.png +0 -0
- data/lingo-all.cfg +0 -89
- data/porter/stem.cfg +0 -311
- data/porter/stem.rb +0 -150
- data/test/ref/lir.csv +0 -329
- data/test.cfg +0 -79
@@ -24,6 +24,8 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'find'
|
28
|
+
|
27
29
|
%w[filemagic mime/types hpricot pdf-reader].each { |lib|
|
28
30
|
begin
|
29
31
|
require lib
|
@@ -55,19 +57,19 @@ class Lingo
|
|
55
57
|
# Komma voneinander getrennt, z.B.
|
56
58
|
# files: 'readme.txt'
|
57
59
|
# files: 'readme.txt,lingo.cfg'
|
58
|
-
# <b><i>
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
62
|
-
#
|
63
|
-
#
|
60
|
+
# <b><i>records</i></b>:: Mit diesem Parameter wird angegeben, woran der Anfang
|
61
|
+
# eines neuen Records erkannt werden kann und wie die
|
62
|
+
# Record-Nummer identifiziert wird. Das Format einer
|
63
|
+
# LIR-Datei ist z.B.
|
64
|
+
# [00001.]
|
65
|
+
# 020: ¬Die Aufgabenteilung zwischen Wortschatz und Grammatik.
|
64
66
|
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
#
|
69
|
-
#
|
70
|
-
#
|
67
|
+
# [00002.]
|
68
|
+
# 020: Nicht-konventionelle Thesaurusrelationen als Orientierungshilfen.
|
69
|
+
# Mit der Angabe von
|
70
|
+
# records: "^\[(\d+)\.\]"
|
71
|
+
# werden die Record-Zeilen erkannt und jeweils die Record-Nummer +00001+,
|
72
|
+
# bzw. +00002+ erkannt.
|
71
73
|
#
|
72
74
|
# === Generierte Kommandos
|
73
75
|
# Damit der nachfolgende Datenstrom einwandfrei verarbeitet werden kann, generiert der TextReader
|
@@ -90,7 +92,7 @@ class Lingo
|
|
90
92
|
# Bei der Verarbeitung einer LIR-Datei mit der Ablaufkonfiguration <tt>t2.cfg</tt>
|
91
93
|
# meeting:
|
92
94
|
# attendees:
|
93
|
-
# - text_reader: { out: lines, files: '$(files)',
|
95
|
+
# - text_reader: { out: lines, files: '$(files)', records: "^\[(\d+)\.\]" }
|
94
96
|
# - debugger: { in: lines, prompt: 'out>'}
|
95
97
|
# ergibt die Ausgabe mit <tt>lingo -c t2 lir.txt</tt>
|
96
98
|
# out> *LIR-FORMAT('')
|
@@ -105,112 +107,136 @@ class Lingo
|
|
105
107
|
|
106
108
|
protected
|
107
109
|
|
108
|
-
# TODO: FILE und LIR-FILE
|
109
|
-
# TODO: lir-record-pattern abkürzen
|
110
|
-
# Interpretation der Parameter
|
110
|
+
# TODO: FILE und LIR-FILE (?)
|
111
111
|
def init
|
112
|
-
|
113
|
-
|
114
|
-
@
|
115
|
-
@
|
116
|
-
@
|
112
|
+
get_files
|
113
|
+
|
114
|
+
@chomp = get_key('chomp', true)
|
115
|
+
@filter = get_key('filter', false)
|
116
|
+
@progress = get_key('progress', false)
|
117
|
+
|
118
|
+
if @lir = get_key('records', get_key('lir-record-pattern', nil)) # DEPRECATE lir-record-pattern
|
119
|
+
@lir = @lir == true ? %r{^\[(\d+)\.\]} : Regexp.new(@lir)
|
120
|
+
end
|
117
121
|
end
|
118
122
|
|
119
123
|
def control(cmd, param)
|
120
|
-
if cmd==STR_CMD_TALK
|
121
|
-
forward(STR_CMD_LIR, '') if @
|
122
|
-
@files.each
|
124
|
+
if cmd == STR_CMD_TALK
|
125
|
+
forward(STR_CMD_LIR, '') if @lir
|
126
|
+
@files.each(&method(:spool))
|
123
127
|
end
|
124
128
|
end
|
125
129
|
|
126
130
|
private
|
127
131
|
|
128
132
|
# Gibt eine Datei zeilenweise in den Ausgabekanal
|
129
|
-
def spool(
|
130
|
-
unless stdin?(
|
131
|
-
raise FileNotFoundError.new(filename) unless File.exist?(filename)
|
132
|
-
|
133
|
+
def spool(path)
|
134
|
+
unless stdin = stdin?(path)
|
133
135
|
inc('Anzahl Dateien')
|
134
|
-
add('Anzahl Bytes', File.
|
136
|
+
add('Anzahl Bytes', size = File.size(path))
|
137
|
+
|
138
|
+
size = nil unless @progress
|
135
139
|
end
|
136
140
|
|
137
|
-
forward(STR_CMD_FILE,
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
141
|
+
forward(STR_CMD_FILE, path)
|
142
|
+
|
143
|
+
ShowProgress.new(self, size, path) { |progress|
|
144
|
+
filter(path, stdin) { |line, pos|
|
145
|
+
inc('Anzahl Zeilen')
|
146
|
+
progress[pos]
|
147
|
+
|
148
|
+
line.chomp! if @chomp
|
149
|
+
|
150
|
+
if line =~ @lir
|
151
|
+
forward(STR_CMD_RECORD, $1)
|
152
|
+
else
|
153
|
+
forward(line) unless line.empty?
|
154
|
+
end
|
155
|
+
}
|
149
156
|
}
|
150
157
|
|
151
|
-
forward(STR_CMD_EOF,
|
158
|
+
forward(STR_CMD_EOF, path)
|
152
159
|
end
|
153
160
|
|
154
|
-
def filter(
|
155
|
-
|
156
|
-
@lingo.config.stdin.set_encoding(ENC)
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
161
|
+
def filter(path, stdin = stdin?(path))
|
162
|
+
io, block = stdin ? [
|
163
|
+
@lingo.config.stdin.set_encoding(ENC),
|
164
|
+
lambda { |line| yield line, 0 }
|
165
|
+
] : [
|
166
|
+
File.open(path, 'rb', encoding: ENC),
|
167
|
+
lambda { |line| yield line, io.pos }
|
168
|
+
]
|
169
|
+
|
170
|
+
case @filter == true ? file_type(path, io) : @filter.to_s
|
171
|
+
when /html/i then io = filter_html(io)
|
172
|
+
when /xml/i then io = filter_html(io, true)
|
173
|
+
when /pdf/i then filter_pdf(io, &block); return
|
174
|
+
end
|
165
175
|
|
166
|
-
|
176
|
+
io.each_line(&block) if io
|
167
177
|
end
|
168
178
|
|
169
|
-
def filter_pdf(
|
179
|
+
def filter_pdf(io, &block)
|
170
180
|
if Object.const_defined?(:PDF) && PDF.const_defined?(:Reader)
|
171
|
-
PDFFilter.filter(
|
172
|
-
nil
|
181
|
+
PDFFilter.filter(io, &block)
|
173
182
|
else
|
174
183
|
warn "PDF filter not available. Please install `pdf-reader'."
|
175
|
-
file
|
176
184
|
end
|
177
185
|
end
|
178
186
|
|
179
|
-
def filter_html(
|
187
|
+
def filter_html(io, xml = false)
|
180
188
|
if Object.const_defined?(:Hpricot)
|
181
|
-
Hpricot(
|
189
|
+
Hpricot(io, xml: xml).inner_text
|
182
190
|
else
|
183
191
|
warn "#{xml ? 'X' : 'HT'}ML filter not available. Please install `hpricot'."
|
184
|
-
|
192
|
+
nil
|
185
193
|
end
|
186
194
|
end
|
187
195
|
|
188
|
-
def file_type(
|
189
|
-
if Object.const_defined?(:FileMagic) &&
|
190
|
-
|
191
|
-
|
192
|
-
|
196
|
+
def file_type(path, io)
|
197
|
+
if Object.const_defined?(:FileMagic) && io.respond_to?(:rewind)
|
198
|
+
FileMagic.fm(:mime, simplified: true).buffer(io.read(256)).tap {
|
199
|
+
io.rewind
|
200
|
+
}
|
193
201
|
elsif Object.const_defined?(:MIME) && MIME.const_defined?(:Types)
|
194
|
-
|
195
|
-
type.
|
196
|
-
|
197
|
-
warn 'Filters not available. File type could not be determined.'
|
198
|
-
nil
|
199
|
-
end
|
202
|
+
MIME::Types.of(path).first.tap { |type| type ? type.content_type :
|
203
|
+
warn('Filters not available. File type could not be determined.')
|
204
|
+
}
|
200
205
|
else
|
201
206
|
warn "Filters not available. Please install `ruby-filemagic' or `mime-types'."
|
202
207
|
nil
|
203
208
|
end
|
204
209
|
end
|
205
210
|
|
206
|
-
def stdin?(
|
207
|
-
%w[STDIN -].include?(
|
211
|
+
def stdin?(path)
|
212
|
+
%w[STDIN -].include?(path)
|
213
|
+
end
|
214
|
+
|
215
|
+
def get_files
|
216
|
+
args = [get_key('glob', '*.txt'), get_key('recursive', false)]
|
217
|
+
|
218
|
+
@files = []
|
219
|
+
|
220
|
+
Array(get_key('files', '-')).each { |path|
|
221
|
+
stdin?(path) ? @files << path : add_files(path, *args)
|
222
|
+
}
|
223
|
+
|
224
|
+
@files.map!(&File.method(:expand_path))
|
225
|
+
@files.uniq!
|
226
|
+
end
|
227
|
+
|
228
|
+
def add_files(path, glob, recursive = false)
|
229
|
+
Dir[path].sort!.each { |match|
|
230
|
+
File.directory?(match) ? recursive ? Find.find(match) { |entry|
|
231
|
+
@files << entry if File.file?(entry) && File.fnmatch?(glob, entry)
|
232
|
+
} : add_files(File.join(match, glob), glob) : @files << match
|
233
|
+
}.empty? and raise FileNotFoundError.new(path)
|
208
234
|
end
|
209
235
|
|
210
236
|
class PDFFilter
|
211
237
|
|
212
|
-
def self.filter(
|
213
|
-
PDF::Reader.new.parse(
|
238
|
+
def self.filter(io, &block)
|
239
|
+
PDF::Reader.new.parse(io, new(&block))
|
214
240
|
end
|
215
241
|
|
216
242
|
def initialize(&block)
|
@@ -82,11 +82,15 @@ class Lingo
|
|
82
82
|
def init
|
83
83
|
@ext = get_key('ext', 'txt2')
|
84
84
|
@lir = get_key('lir-format', false)
|
85
|
-
|
85
|
+
|
86
|
+
@sep = @config['sep'] unless @lir
|
87
|
+
@sep &&= @sep.evaluate
|
88
|
+
@sep ||= ' '
|
89
|
+
|
86
90
|
@no_sep, @no_puts = true, false
|
87
91
|
end
|
88
92
|
|
89
|
-
def control(cmd,
|
93
|
+
def control(cmd, param)
|
90
94
|
case cmd
|
91
95
|
when STR_CMD_LIR
|
92
96
|
@lir = true
|
@@ -94,49 +98,43 @@ class Lingo
|
|
94
98
|
@no_sep = true
|
95
99
|
|
96
100
|
if stdout?(@ext)
|
97
|
-
@filename = @ext
|
98
|
-
@file = @lingo.config.stdout
|
101
|
+
@filename, @file = @ext, @lingo.config.stdout
|
99
102
|
else
|
100
|
-
@filename = par.sub(/(\.[^.]+)?$/, '.'+@ext)
|
101
|
-
@file = File.new(@filename,'w')
|
102
103
|
inc('Anzahl Dateien')
|
104
|
+
@file = File.open(@filename = File.set_ext(param, ".#{@ext}"), 'w')
|
103
105
|
end
|
104
106
|
|
105
|
-
@lir_rec_no = ''
|
106
|
-
@lir_rec_buf = Array.new
|
107
|
+
@lir_rec_no, @lir_rec_buf = '', []
|
107
108
|
when STR_CMD_RECORD
|
108
109
|
@no_sep = true
|
110
|
+
|
109
111
|
if @lir
|
110
112
|
flush_lir_buffer
|
111
|
-
@lir_rec_no =
|
113
|
+
@lir_rec_no = param
|
112
114
|
end
|
113
115
|
when STR_CMD_EOL
|
114
116
|
@no_sep = true
|
117
|
+
|
115
118
|
unless @lir
|
116
|
-
@file.puts unless @no_puts # unless @sep=="\n"
|
117
119
|
inc('Anzahl Zeilen')
|
120
|
+
@file.puts unless @no_puts
|
118
121
|
end
|
119
122
|
when STR_CMD_EOF
|
120
123
|
flush_lir_buffer if @lir
|
121
124
|
|
122
125
|
unless stdout?(@filename)
|
126
|
+
add('Anzahl Bytes', @file.size)
|
123
127
|
@file.close
|
124
|
-
add('Anzahl Bytes', File.stat(@filename).size)
|
125
128
|
end
|
126
129
|
end
|
127
130
|
end
|
128
131
|
|
129
132
|
def process(obj)
|
130
|
-
if
|
131
|
-
|
132
|
-
|
133
|
-
@file.print
|
134
|
-
@
|
135
|
-
if obj.is_a?(Word) || obj.is_a?(Token)
|
136
|
-
@file.print obj.form
|
137
|
-
else
|
138
|
-
@file.print obj
|
139
|
-
end
|
133
|
+
obj = obj.form if obj.is_a?(WordForm)
|
134
|
+
|
135
|
+
@lir ? @lir_rec_buf << obj : begin
|
136
|
+
@no_sep ? @no_sep = false : @file.print(@sep)
|
137
|
+
@file.print(obj)
|
140
138
|
end
|
141
139
|
end
|
142
140
|
|
@@ -144,12 +142,11 @@ class Lingo
|
|
144
142
|
|
145
143
|
def flush_lir_buffer
|
146
144
|
unless @lir_rec_no.empty? || @lir_rec_buf.empty?
|
147
|
-
|
148
|
-
@
|
149
|
-
|
150
|
-
@file.print @lir_rec_no, '*', @lir_rec_buf.join(@sep), "\n"
|
151
|
-
end
|
145
|
+
@file.print(*[@lir_rec_no, @lir_rec_buf.join(@sep), "\n"].tap { |buf|
|
146
|
+
@sep =~ /\n/ ? buf.insert(1, "\n").unshift('*') : buf.insert(1, '*')
|
147
|
+
})
|
152
148
|
end
|
149
|
+
|
153
150
|
@lir_rec_no = ''
|
154
151
|
@lir_rec_buf.clear
|
155
152
|
end
|
@@ -101,10 +101,6 @@ class Lingo
|
|
101
101
|
protected
|
102
102
|
|
103
103
|
def init
|
104
|
-
# Regular Expressions für Token-Erkennung einlesen
|
105
|
-
regulars = get_key('regulars', '')
|
106
|
-
raise NoConfigKeyError.new(:regulars) unless regulars
|
107
|
-
|
108
104
|
@space = get_key('space', false)
|
109
105
|
@tags = get_key('tags', true)
|
110
106
|
@wiki = get_key('wiki', true)
|
@@ -115,30 +111,22 @@ class Lingo
|
|
115
111
|
@rules << ['WIKI', /^\[\[.+?\]\]/] unless @wiki
|
116
112
|
@rules.unshift(['WIKI', /^=+.+=+$/]) unless @wiki
|
117
113
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
regulars.each { |rule|
|
122
|
-
name = rule.keys[0]
|
123
|
-
expr = rule.values[0].gsub(/_(\w+?)_/) {
|
114
|
+
get_key('regulars', []).each_with_object({}) { |rule, macros|
|
115
|
+
expr = rule.values.first.gsub(/_(\w+?)_/) {
|
124
116
|
macros[$&] || begin
|
125
117
|
Database::Source.const_get("UTF8_#{$1.upcase}")
|
126
118
|
rescue NameError
|
127
119
|
end
|
128
120
|
}
|
129
121
|
|
130
|
-
if name =~ /^_\w+_$/
|
131
|
-
macros[name] = expr
|
122
|
+
if (name = rule.keys.first) =~ /^_\w+_$/
|
123
|
+
macros[name] = expr
|
132
124
|
else
|
133
|
-
@rules << [name,
|
125
|
+
@rules << [name, /^#{expr}/]
|
134
126
|
end
|
135
127
|
}
|
136
128
|
|
137
|
-
|
138
|
-
# nicht um die Verarbeitung einer LIR-Datei handelt. Im Falle einer normalen Datei
|
139
|
-
# wird der Dateiname gespeichert und als Kennzeichen für die Erzeugung von
|
140
|
-
# Zeilenende-Nachrichten herangezogen.
|
141
|
-
@filename = nil
|
129
|
+
@filename = @cont = nil
|
142
130
|
end
|
143
131
|
|
144
132
|
def control(cmd, param)
|
@@ -154,12 +142,10 @@ class Lingo
|
|
154
142
|
inc('Anzahl Zeilen')
|
155
143
|
|
156
144
|
tokenize(obj) { |form, attr|
|
157
|
-
|
158
|
-
|
159
|
-
inc('Anzahl Muster '+token.attr)
|
145
|
+
inc("Anzahl Muster #{attr}")
|
160
146
|
inc('Anzahl Token')
|
161
147
|
|
162
|
-
forward(
|
148
|
+
forward(Token.new(form, attr))
|
163
149
|
}
|
164
150
|
|
165
151
|
forward(STR_CMD_EOL, @filename) if @filename
|
@@ -175,7 +161,7 @@ class Lingo
|
|
175
161
|
case @cont
|
176
162
|
when 'HTML'
|
177
163
|
if textline =~ /^[^<>]*>/
|
178
|
-
yield
|
164
|
+
yield $&, @cont
|
179
165
|
textline, @cont = $', nil
|
180
166
|
else
|
181
167
|
yield textline, @cont
|
@@ -183,7 +169,7 @@ class Lingo
|
|
183
169
|
end
|
184
170
|
when 'WIKI'
|
185
171
|
if textline =~ /^[^\[\]]*\]\]/
|
186
|
-
yield
|
172
|
+
yield $&, @cont
|
187
173
|
textline, @cont = $', nil
|
188
174
|
else
|
189
175
|
yield textline, @cont
|
@@ -191,12 +177,12 @@ class Lingo
|
|
191
177
|
end
|
192
178
|
when nil
|
193
179
|
if !@tags && textline =~ /<[^<>]*$/
|
194
|
-
yield
|
180
|
+
yield $&, @cont = 'HTML'
|
195
181
|
textline = $`
|
196
182
|
end
|
197
183
|
|
198
184
|
if !@wiki && textline =~ /\[\[[^\[\]]*$/
|
199
|
-
yield
|
185
|
+
yield $&, @cont = 'WIKI'
|
200
186
|
textline = $`
|
201
187
|
end
|
202
188
|
end
|
@@ -204,7 +190,7 @@ class Lingo
|
|
204
190
|
until textline.empty?
|
205
191
|
@rules.each { |name, expr|
|
206
192
|
if textline =~ expr
|
207
|
-
yield
|
193
|
+
yield $&, name if name != 'SPAC' || @space
|
208
194
|
textline = $'
|
209
195
|
break
|
210
196
|
end
|
@@ -75,67 +75,39 @@ class Lingo
|
|
75
75
|
protected
|
76
76
|
|
77
77
|
def init
|
78
|
-
|
79
|
-
@
|
80
|
-
@
|
81
|
-
|
78
|
+
@marker = get_key('marker', '*')
|
79
|
+
@max = get_key('max-var', max = 10000).to_i
|
80
|
+
@max = max unless @max > 0
|
81
|
+
@var = get_key('variations')
|
82
82
|
|
83
|
-
|
84
|
-
@var_strings = get_key('variations')
|
85
|
-
raise MissingConfigError.new(:variations) if @var_strings.empty?
|
83
|
+
raise MissingConfigError.new(:variations) if @var.empty?
|
86
84
|
|
87
|
-
# Initialisierungen
|
88
85
|
@check = Hash.new(false)
|
89
|
-
|
86
|
+
get_array('check', WA_UNKNOWN).each { |s| @check[s.upcase] = true }
|
90
87
|
|
91
88
|
set_dic
|
92
89
|
set_gra
|
93
|
-
|
94
|
-
if @max_var.zero?
|
95
|
-
@max_var = 10000
|
96
|
-
@lingo.warn "#{self.class}: max-var is 0, setting to #{@max_var}"
|
97
|
-
end
|
98
90
|
end
|
99
91
|
|
100
|
-
def control(cmd,
|
101
|
-
|
102
|
-
if cmd == STR_CMD_STATUS
|
103
|
-
# Eigenen Status um Status von Dictionary und Grammer erweitern
|
104
|
-
@dic.report.each_pair { | k, v | set( k, v ) }
|
105
|
-
@gra.report.each_pair { | k, v | set( k, v ) }
|
106
|
-
end
|
92
|
+
def control(cmd, param)
|
93
|
+
report_on(cmd, @dic, @gra)
|
107
94
|
end
|
108
95
|
|
109
96
|
def process(obj)
|
110
|
-
# Zu prüfende Wörter filtern
|
111
97
|
if obj.is_a?(Word) && @check[obj.attr]
|
112
|
-
# Statistik für Report
|
113
98
|
inc('Anzahl gesuchter Wörter')
|
114
99
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
end
|
121
|
-
|
122
|
-
# Prüfe Variation auf bekanntes Wort
|
123
|
-
variations[0...@max_var].each do |var|
|
124
|
-
# Variiertes Wort im Wörterbuch suchen
|
125
|
-
word = @dic.find_word(var)
|
126
|
-
word = @gra.find_compositum(var) if word.unknown?
|
127
|
-
next if word.unknown? || (
|
128
|
-
word.attr == WA_KOMPOSITUM && word.lexicals.any? { |lex|
|
129
|
-
lex.attr[0..0] == LA_TAKEITASIS
|
100
|
+
@var.each_with_object([obj.form]) { |a, v| variate(v, *a) }.
|
101
|
+
tap { |v| v.slice!(@max..-1) }.each { |var|
|
102
|
+
next if (word = find_word(var)).unknown? || (
|
103
|
+
word.attr == WA_COMPOUND && word.lexicals.any? { |lex|
|
104
|
+
lex.attr.start_with?(LA_TAKEITASIS)
|
130
105
|
}
|
131
106
|
)
|
132
107
|
|
133
|
-
# Das erste erkannte Wort beendet die Suche
|
134
108
|
inc('Anzahl gefundener Wörter')
|
135
|
-
word.form = @marker + var
|
136
|
-
|
137
|
-
return
|
138
|
-
end
|
109
|
+
return forward(word.tap { word.form = @marker + var })
|
110
|
+
}
|
139
111
|
end
|
140
112
|
|
141
113
|
forward(obj)
|
@@ -146,32 +118,20 @@ class Lingo
|
|
146
118
|
# Variiere die Bestandteile eines Arrays gemäß den Austauschvorgaben.
|
147
119
|
#
|
148
120
|
# variate( 'Tiieh', 'ieh', 'sch' ) => ['Tiieh', 'Tisch']
|
149
|
-
def variate(
|
150
|
-
|
151
|
-
add_variations = []
|
152
|
-
from_re = Regexp.new(from)
|
153
|
-
|
154
|
-
# alle Wörter in der variation_list permutieren
|
155
|
-
variation_list.each do |wordform|
|
121
|
+
def variate(variations, from, to)
|
122
|
+
add, change, re = [], [from, to], Regexp.new(from)
|
156
123
|
|
157
|
-
|
158
|
-
|
159
|
-
n = wordpart.size - 1
|
124
|
+
variations.each { |form|
|
125
|
+
parts = " #{form} ".split(re)
|
160
126
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
# i[x] = Wert des x.ten Bit von Integer i
|
168
|
-
(1..n).each { |j| variation += change[i[j-1]] + wordpart[j] }
|
169
|
-
|
170
|
-
add_variations << variation.strip
|
171
|
-
end
|
172
|
-
end
|
127
|
+
1.upto(2 ** (n = parts.size - 1) - 1) { |i|
|
128
|
+
var = parts.first
|
129
|
+
1.upto(n) { |j| var += change[i[j - 1]] + parts[j] }
|
130
|
+
add << var.strip
|
131
|
+
}
|
132
|
+
}
|
173
133
|
|
174
|
-
|
134
|
+
variations.concat(add)
|
175
135
|
end
|
176
136
|
|
177
137
|
end
|