lingo 1.8.1 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +23 -5
- data/README +1 -1
- data/Rakefile +5 -7
- data/TODO +2 -0
- data/bin/lingo +5 -1
- data/de.lang +1 -1
- data/en/lingo-syn.txt +0 -0
- data/en.lang +2 -1
- data/lib/lingo/attendee/abbreviator.rb +8 -9
- data/lib/lingo/attendee/debugger.rb +5 -4
- data/lib/lingo/attendee/decomposer.rb +8 -3
- data/lib/lingo/attendee/dehyphenizer.rb +19 -63
- data/lib/lingo/attendee/formatter.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +67 -155
- data/lib/lingo/attendee/noneword_filter.rb +16 -9
- data/lib/lingo/attendee/object_filter.rb +1 -1
- data/lib/lingo/attendee/sequencer.rb +32 -63
- data/lib/lingo/attendee/stemmer/porter.rb +343 -0
- data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
- data/lib/lingo/attendee/synonymer.rb +10 -9
- data/lib/lingo/attendee/text_reader.rb +102 -76
- data/lib/lingo/attendee/text_writer.rb +23 -26
- data/lib/lingo/attendee/tokenizer.rb +13 -27
- data/lib/lingo/attendee/variator.rb +26 -66
- data/lib/lingo/attendee/vector_filter.rb +42 -43
- data/lib/lingo/attendee/word_searcher.rb +6 -7
- data/lib/lingo/attendee.rb +25 -7
- data/lib/lingo/buffered_attendee.rb +36 -10
- data/lib/lingo/cachable.rb +8 -8
- data/lib/lingo/config.rb +5 -6
- data/lib/lingo/ctl.rb +2 -3
- data/lib/lingo/database/crypter.rb +9 -26
- data/lib/lingo/database/gdbm_store.rb +3 -5
- data/lib/lingo/database/libcdb_store.rb +4 -6
- data/lib/lingo/database/sdbm_store.rb +11 -6
- data/lib/lingo/database/show_progress.rb +3 -43
- data/lib/lingo/database/source/key_value.rb +2 -6
- data/lib/lingo/database/source/multi_key.rb +3 -5
- data/lib/lingo/database/source/multi_value.rb +2 -6
- data/lib/lingo/database/source/single_word.rb +4 -6
- data/lib/lingo/database/source/word_class.rb +4 -10
- data/lib/lingo/database/source.rb +20 -18
- data/lib/lingo/database.rb +84 -59
- data/lib/lingo/error.rb +57 -1
- data/lib/lingo/language/dictionary.rb +21 -18
- data/lib/lingo/language/grammar.rb +40 -49
- data/lib/lingo/language/lexical.rb +6 -6
- data/lib/lingo/language/lexical_hash.rb +6 -0
- data/lib/lingo/language/word.rb +32 -15
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/language.rb +14 -25
- data/lib/lingo/reportable.rb +12 -10
- data/lib/lingo/show_progress.rb +81 -0
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo.rb +63 -24
- data/lingo-call.cfg +6 -10
- data/lingo.cfg +60 -44
- data/lir.cfg +42 -41
- data/test/attendee/ts_abbreviator.rb +3 -5
- data/test/attendee/ts_decomposer.rb +3 -5
- data/test/attendee/ts_multi_worder.rb +87 -145
- data/test/attendee/ts_noneword_filter.rb +5 -3
- data/test/attendee/ts_object_filter.rb +5 -3
- data/test/attendee/ts_sequencer.rb +3 -5
- data/test/attendee/ts_stemmer.rb +309 -0
- data/test/attendee/ts_synonymer.rb +15 -11
- data/test/attendee/ts_text_reader.rb +12 -15
- data/test/attendee/ts_text_writer.rb +24 -29
- data/test/attendee/ts_tokenizer.rb +9 -7
- data/test/attendee/ts_variator.rb +4 -4
- data/test/attendee/ts_vector_filter.rb +24 -16
- data/test/attendee/ts_word_searcher.rb +20 -36
- data/test/{lir.csv → lir.vec} +0 -0
- data/test/ref/artikel.vec +943 -943
- data/test/ref/artikel.ven +943 -943
- data/test/ref/lir.non +201 -201
- data/test/ref/lir.seq +178 -178
- data/test/ref/lir.syn +49 -49
- data/test/ref/lir.vec +329 -0
- data/test/test_helper.rb +20 -36
- data/test/ts_database.rb +10 -10
- data/test/ts_language.rb +279 -319
- metadata +93 -104
- data/info/Objekte.png +0 -0
- data/info/Typen.png +0 -0
- data/info/database.png +0 -0
- data/info/db_small.png +0 -0
- data/info/download.png +0 -0
- data/info/kerze.png +0 -0
- data/info/language.png +0 -0
- data/info/lingo.png +0 -0
- data/info/logo.png +0 -0
- data/info/meeting.png +0 -0
- data/info/types.png +0 -0
- data/lingo-all.cfg +0 -89
- data/porter/stem.cfg +0 -311
- data/porter/stem.rb +0 -150
- data/test/ref/lir.csv +0 -329
- data/test.cfg +0 -79
@@ -24,6 +24,8 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'find'
|
28
|
+
|
27
29
|
%w[filemagic mime/types hpricot pdf-reader].each { |lib|
|
28
30
|
begin
|
29
31
|
require lib
|
@@ -55,19 +57,19 @@ class Lingo
|
|
55
57
|
# Komma voneinander getrennt, z.B.
|
56
58
|
# files: 'readme.txt'
|
57
59
|
# files: 'readme.txt,lingo.cfg'
|
58
|
-
# <b><i>
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
62
|
-
#
|
63
|
-
#
|
60
|
+
# <b><i>records</i></b>:: Mit diesem Parameter wird angegeben, woran der Anfang
|
61
|
+
# eines neuen Records erkannt werden kann und wie die
|
62
|
+
# Record-Nummer identifiziert wird. Das Format einer
|
63
|
+
# LIR-Datei ist z.B.
|
64
|
+
# [00001.]
|
65
|
+
# 020: ¬Die Aufgabenteilung zwischen Wortschatz und Grammatik.
|
64
66
|
#
|
65
|
-
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
#
|
69
|
-
#
|
70
|
-
#
|
67
|
+
# [00002.]
|
68
|
+
# 020: Nicht-konventionelle Thesaurusrelationen als Orientierungshilfen.
|
69
|
+
# Mit der Angabe von
|
70
|
+
# records: "^\[(\d+)\.\]"
|
71
|
+
# werden die Record-Zeilen erkannt und jeweils die Record-Nummer +00001+,
|
72
|
+
# bzw. +00002+ erkannt.
|
71
73
|
#
|
72
74
|
# === Generierte Kommandos
|
73
75
|
# Damit der nachfolgende Datenstrom einwandfrei verarbeitet werden kann, generiert der TextReader
|
@@ -90,7 +92,7 @@ class Lingo
|
|
90
92
|
# Bei der Verarbeitung einer LIR-Datei mit der Ablaufkonfiguration <tt>t2.cfg</tt>
|
91
93
|
# meeting:
|
92
94
|
# attendees:
|
93
|
-
# - text_reader: { out: lines, files: '$(files)',
|
95
|
+
# - text_reader: { out: lines, files: '$(files)', records: "^\[(\d+)\.\]" }
|
94
96
|
# - debugger: { in: lines, prompt: 'out>'}
|
95
97
|
# ergibt die Ausgabe mit <tt>lingo -c t2 lir.txt</tt>
|
96
98
|
# out> *LIR-FORMAT('')
|
@@ -105,112 +107,136 @@ class Lingo
|
|
105
107
|
|
106
108
|
protected
|
107
109
|
|
108
|
-
# TODO: FILE und LIR-FILE
|
109
|
-
# TODO: lir-record-pattern abkürzen
|
110
|
-
# Interpretation der Parameter
|
110
|
+
# TODO: FILE und LIR-FILE (?)
|
111
111
|
def init
|
112
|
-
|
113
|
-
|
114
|
-
@
|
115
|
-
@
|
116
|
-
@
|
112
|
+
get_files
|
113
|
+
|
114
|
+
@chomp = get_key('chomp', true)
|
115
|
+
@filter = get_key('filter', false)
|
116
|
+
@progress = get_key('progress', false)
|
117
|
+
|
118
|
+
if @lir = get_key('records', get_key('lir-record-pattern', nil)) # DEPRECATE lir-record-pattern
|
119
|
+
@lir = @lir == true ? %r{^\[(\d+)\.\]} : Regexp.new(@lir)
|
120
|
+
end
|
117
121
|
end
|
118
122
|
|
119
123
|
def control(cmd, param)
|
120
|
-
if cmd==STR_CMD_TALK
|
121
|
-
forward(STR_CMD_LIR, '') if @
|
122
|
-
@files.each
|
124
|
+
if cmd == STR_CMD_TALK
|
125
|
+
forward(STR_CMD_LIR, '') if @lir
|
126
|
+
@files.each(&method(:spool))
|
123
127
|
end
|
124
128
|
end
|
125
129
|
|
126
130
|
private
|
127
131
|
|
128
132
|
# Gibt eine Datei zeilenweise in den Ausgabekanal
|
129
|
-
def spool(
|
130
|
-
unless stdin?(
|
131
|
-
raise FileNotFoundError.new(filename) unless File.exist?(filename)
|
132
|
-
|
133
|
+
def spool(path)
|
134
|
+
unless stdin = stdin?(path)
|
133
135
|
inc('Anzahl Dateien')
|
134
|
-
add('Anzahl Bytes', File.
|
136
|
+
add('Anzahl Bytes', size = File.size(path))
|
137
|
+
|
138
|
+
size = nil unless @progress
|
135
139
|
end
|
136
140
|
|
137
|
-
forward(STR_CMD_FILE,
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
141
|
+
forward(STR_CMD_FILE, path)
|
142
|
+
|
143
|
+
ShowProgress.new(self, size, path) { |progress|
|
144
|
+
filter(path, stdin) { |line, pos|
|
145
|
+
inc('Anzahl Zeilen')
|
146
|
+
progress[pos]
|
147
|
+
|
148
|
+
line.chomp! if @chomp
|
149
|
+
|
150
|
+
if line =~ @lir
|
151
|
+
forward(STR_CMD_RECORD, $1)
|
152
|
+
else
|
153
|
+
forward(line) unless line.empty?
|
154
|
+
end
|
155
|
+
}
|
149
156
|
}
|
150
157
|
|
151
|
-
forward(STR_CMD_EOF,
|
158
|
+
forward(STR_CMD_EOF, path)
|
152
159
|
end
|
153
160
|
|
154
|
-
def filter(
|
155
|
-
|
156
|
-
@lingo.config.stdin.set_encoding(ENC)
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
161
|
+
def filter(path, stdin = stdin?(path))
|
162
|
+
io, block = stdin ? [
|
163
|
+
@lingo.config.stdin.set_encoding(ENC),
|
164
|
+
lambda { |line| yield line, 0 }
|
165
|
+
] : [
|
166
|
+
File.open(path, 'rb', encoding: ENC),
|
167
|
+
lambda { |line| yield line, io.pos }
|
168
|
+
]
|
169
|
+
|
170
|
+
case @filter == true ? file_type(path, io) : @filter.to_s
|
171
|
+
when /html/i then io = filter_html(io)
|
172
|
+
when /xml/i then io = filter_html(io, true)
|
173
|
+
when /pdf/i then filter_pdf(io, &block); return
|
174
|
+
end
|
165
175
|
|
166
|
-
|
176
|
+
io.each_line(&block) if io
|
167
177
|
end
|
168
178
|
|
169
|
-
def filter_pdf(
|
179
|
+
def filter_pdf(io, &block)
|
170
180
|
if Object.const_defined?(:PDF) && PDF.const_defined?(:Reader)
|
171
|
-
PDFFilter.filter(
|
172
|
-
nil
|
181
|
+
PDFFilter.filter(io, &block)
|
173
182
|
else
|
174
183
|
warn "PDF filter not available. Please install `pdf-reader'."
|
175
|
-
file
|
176
184
|
end
|
177
185
|
end
|
178
186
|
|
179
|
-
def filter_html(
|
187
|
+
def filter_html(io, xml = false)
|
180
188
|
if Object.const_defined?(:Hpricot)
|
181
|
-
Hpricot(
|
189
|
+
Hpricot(io, xml: xml).inner_text
|
182
190
|
else
|
183
191
|
warn "#{xml ? 'X' : 'HT'}ML filter not available. Please install `hpricot'."
|
184
|
-
|
192
|
+
nil
|
185
193
|
end
|
186
194
|
end
|
187
195
|
|
188
|
-
def file_type(
|
189
|
-
if Object.const_defined?(:FileMagic) &&
|
190
|
-
|
191
|
-
|
192
|
-
|
196
|
+
def file_type(path, io)
|
197
|
+
if Object.const_defined?(:FileMagic) && io.respond_to?(:rewind)
|
198
|
+
FileMagic.fm(:mime, simplified: true).buffer(io.read(256)).tap {
|
199
|
+
io.rewind
|
200
|
+
}
|
193
201
|
elsif Object.const_defined?(:MIME) && MIME.const_defined?(:Types)
|
194
|
-
|
195
|
-
type.
|
196
|
-
|
197
|
-
warn 'Filters not available. File type could not be determined.'
|
198
|
-
nil
|
199
|
-
end
|
202
|
+
MIME::Types.of(path).first.tap { |type| type ? type.content_type :
|
203
|
+
warn('Filters not available. File type could not be determined.')
|
204
|
+
}
|
200
205
|
else
|
201
206
|
warn "Filters not available. Please install `ruby-filemagic' or `mime-types'."
|
202
207
|
nil
|
203
208
|
end
|
204
209
|
end
|
205
210
|
|
206
|
-
def stdin?(
|
207
|
-
%w[STDIN -].include?(
|
211
|
+
def stdin?(path)
|
212
|
+
%w[STDIN -].include?(path)
|
213
|
+
end
|
214
|
+
|
215
|
+
def get_files
|
216
|
+
args = [get_key('glob', '*.txt'), get_key('recursive', false)]
|
217
|
+
|
218
|
+
@files = []
|
219
|
+
|
220
|
+
Array(get_key('files', '-')).each { |path|
|
221
|
+
stdin?(path) ? @files << path : add_files(path, *args)
|
222
|
+
}
|
223
|
+
|
224
|
+
@files.map!(&File.method(:expand_path))
|
225
|
+
@files.uniq!
|
226
|
+
end
|
227
|
+
|
228
|
+
def add_files(path, glob, recursive = false)
|
229
|
+
Dir[path].sort!.each { |match|
|
230
|
+
File.directory?(match) ? recursive ? Find.find(match) { |entry|
|
231
|
+
@files << entry if File.file?(entry) && File.fnmatch?(glob, entry)
|
232
|
+
} : add_files(File.join(match, glob), glob) : @files << match
|
233
|
+
}.empty? and raise FileNotFoundError.new(path)
|
208
234
|
end
|
209
235
|
|
210
236
|
class PDFFilter
|
211
237
|
|
212
|
-
def self.filter(
|
213
|
-
PDF::Reader.new.parse(
|
238
|
+
def self.filter(io, &block)
|
239
|
+
PDF::Reader.new.parse(io, new(&block))
|
214
240
|
end
|
215
241
|
|
216
242
|
def initialize(&block)
|
@@ -82,11 +82,15 @@ class Lingo
|
|
82
82
|
def init
|
83
83
|
@ext = get_key('ext', 'txt2')
|
84
84
|
@lir = get_key('lir-format', false)
|
85
|
-
|
85
|
+
|
86
|
+
@sep = @config['sep'] unless @lir
|
87
|
+
@sep &&= @sep.evaluate
|
88
|
+
@sep ||= ' '
|
89
|
+
|
86
90
|
@no_sep, @no_puts = true, false
|
87
91
|
end
|
88
92
|
|
89
|
-
def control(cmd,
|
93
|
+
def control(cmd, param)
|
90
94
|
case cmd
|
91
95
|
when STR_CMD_LIR
|
92
96
|
@lir = true
|
@@ -94,49 +98,43 @@ class Lingo
|
|
94
98
|
@no_sep = true
|
95
99
|
|
96
100
|
if stdout?(@ext)
|
97
|
-
@filename = @ext
|
98
|
-
@file = @lingo.config.stdout
|
101
|
+
@filename, @file = @ext, @lingo.config.stdout
|
99
102
|
else
|
100
|
-
@filename = par.sub(/(\.[^.]+)?$/, '.'+@ext)
|
101
|
-
@file = File.new(@filename,'w')
|
102
103
|
inc('Anzahl Dateien')
|
104
|
+
@file = File.open(@filename = File.set_ext(param, ".#{@ext}"), 'w')
|
103
105
|
end
|
104
106
|
|
105
|
-
@lir_rec_no = ''
|
106
|
-
@lir_rec_buf = Array.new
|
107
|
+
@lir_rec_no, @lir_rec_buf = '', []
|
107
108
|
when STR_CMD_RECORD
|
108
109
|
@no_sep = true
|
110
|
+
|
109
111
|
if @lir
|
110
112
|
flush_lir_buffer
|
111
|
-
@lir_rec_no =
|
113
|
+
@lir_rec_no = param
|
112
114
|
end
|
113
115
|
when STR_CMD_EOL
|
114
116
|
@no_sep = true
|
117
|
+
|
115
118
|
unless @lir
|
116
|
-
@file.puts unless @no_puts # unless @sep=="\n"
|
117
119
|
inc('Anzahl Zeilen')
|
120
|
+
@file.puts unless @no_puts
|
118
121
|
end
|
119
122
|
when STR_CMD_EOF
|
120
123
|
flush_lir_buffer if @lir
|
121
124
|
|
122
125
|
unless stdout?(@filename)
|
126
|
+
add('Anzahl Bytes', @file.size)
|
123
127
|
@file.close
|
124
|
-
add('Anzahl Bytes', File.stat(@filename).size)
|
125
128
|
end
|
126
129
|
end
|
127
130
|
end
|
128
131
|
|
129
132
|
def process(obj)
|
130
|
-
if
|
131
|
-
|
132
|
-
|
133
|
-
@file.print
|
134
|
-
@
|
135
|
-
if obj.is_a?(Word) || obj.is_a?(Token)
|
136
|
-
@file.print obj.form
|
137
|
-
else
|
138
|
-
@file.print obj
|
139
|
-
end
|
133
|
+
obj = obj.form if obj.is_a?(WordForm)
|
134
|
+
|
135
|
+
@lir ? @lir_rec_buf << obj : begin
|
136
|
+
@no_sep ? @no_sep = false : @file.print(@sep)
|
137
|
+
@file.print(obj)
|
140
138
|
end
|
141
139
|
end
|
142
140
|
|
@@ -144,12 +142,11 @@ class Lingo
|
|
144
142
|
|
145
143
|
def flush_lir_buffer
|
146
144
|
unless @lir_rec_no.empty? || @lir_rec_buf.empty?
|
147
|
-
|
148
|
-
@
|
149
|
-
|
150
|
-
@file.print @lir_rec_no, '*', @lir_rec_buf.join(@sep), "\n"
|
151
|
-
end
|
145
|
+
@file.print(*[@lir_rec_no, @lir_rec_buf.join(@sep), "\n"].tap { |buf|
|
146
|
+
@sep =~ /\n/ ? buf.insert(1, "\n").unshift('*') : buf.insert(1, '*')
|
147
|
+
})
|
152
148
|
end
|
149
|
+
|
153
150
|
@lir_rec_no = ''
|
154
151
|
@lir_rec_buf.clear
|
155
152
|
end
|
@@ -101,10 +101,6 @@ class Lingo
|
|
101
101
|
protected
|
102
102
|
|
103
103
|
def init
|
104
|
-
# Regular Expressions für Token-Erkennung einlesen
|
105
|
-
regulars = get_key('regulars', '')
|
106
|
-
raise NoConfigKeyError.new(:regulars) unless regulars
|
107
|
-
|
108
104
|
@space = get_key('space', false)
|
109
105
|
@tags = get_key('tags', true)
|
110
106
|
@wiki = get_key('wiki', true)
|
@@ -115,30 +111,22 @@ class Lingo
|
|
115
111
|
@rules << ['WIKI', /^\[\[.+?\]\]/] unless @wiki
|
116
112
|
@rules.unshift(['WIKI', /^=+.+=+$/]) unless @wiki
|
117
113
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
regulars.each { |rule|
|
122
|
-
name = rule.keys[0]
|
123
|
-
expr = rule.values[0].gsub(/_(\w+?)_/) {
|
114
|
+
get_key('regulars', []).each_with_object({}) { |rule, macros|
|
115
|
+
expr = rule.values.first.gsub(/_(\w+?)_/) {
|
124
116
|
macros[$&] || begin
|
125
117
|
Database::Source.const_get("UTF8_#{$1.upcase}")
|
126
118
|
rescue NameError
|
127
119
|
end
|
128
120
|
}
|
129
121
|
|
130
|
-
if name =~ /^_\w+_$/
|
131
|
-
macros[name] = expr
|
122
|
+
if (name = rule.keys.first) =~ /^_\w+_$/
|
123
|
+
macros[name] = expr
|
132
124
|
else
|
133
|
-
@rules << [name,
|
125
|
+
@rules << [name, /^#{expr}/]
|
134
126
|
end
|
135
127
|
}
|
136
128
|
|
137
|
-
|
138
|
-
# nicht um die Verarbeitung einer LIR-Datei handelt. Im Falle einer normalen Datei
|
139
|
-
# wird der Dateiname gespeichert und als Kennzeichen für die Erzeugung von
|
140
|
-
# Zeilenende-Nachrichten herangezogen.
|
141
|
-
@filename = nil
|
129
|
+
@filename = @cont = nil
|
142
130
|
end
|
143
131
|
|
144
132
|
def control(cmd, param)
|
@@ -154,12 +142,10 @@ class Lingo
|
|
154
142
|
inc('Anzahl Zeilen')
|
155
143
|
|
156
144
|
tokenize(obj) { |form, attr|
|
157
|
-
|
158
|
-
|
159
|
-
inc('Anzahl Muster '+token.attr)
|
145
|
+
inc("Anzahl Muster #{attr}")
|
160
146
|
inc('Anzahl Token')
|
161
147
|
|
162
|
-
forward(
|
148
|
+
forward(Token.new(form, attr))
|
163
149
|
}
|
164
150
|
|
165
151
|
forward(STR_CMD_EOL, @filename) if @filename
|
@@ -175,7 +161,7 @@ class Lingo
|
|
175
161
|
case @cont
|
176
162
|
when 'HTML'
|
177
163
|
if textline =~ /^[^<>]*>/
|
178
|
-
yield
|
164
|
+
yield $&, @cont
|
179
165
|
textline, @cont = $', nil
|
180
166
|
else
|
181
167
|
yield textline, @cont
|
@@ -183,7 +169,7 @@ class Lingo
|
|
183
169
|
end
|
184
170
|
when 'WIKI'
|
185
171
|
if textline =~ /^[^\[\]]*\]\]/
|
186
|
-
yield
|
172
|
+
yield $&, @cont
|
187
173
|
textline, @cont = $', nil
|
188
174
|
else
|
189
175
|
yield textline, @cont
|
@@ -191,12 +177,12 @@ class Lingo
|
|
191
177
|
end
|
192
178
|
when nil
|
193
179
|
if !@tags && textline =~ /<[^<>]*$/
|
194
|
-
yield
|
180
|
+
yield $&, @cont = 'HTML'
|
195
181
|
textline = $`
|
196
182
|
end
|
197
183
|
|
198
184
|
if !@wiki && textline =~ /\[\[[^\[\]]*$/
|
199
|
-
yield
|
185
|
+
yield $&, @cont = 'WIKI'
|
200
186
|
textline = $`
|
201
187
|
end
|
202
188
|
end
|
@@ -204,7 +190,7 @@ class Lingo
|
|
204
190
|
until textline.empty?
|
205
191
|
@rules.each { |name, expr|
|
206
192
|
if textline =~ expr
|
207
|
-
yield
|
193
|
+
yield $&, name if name != 'SPAC' || @space
|
208
194
|
textline = $'
|
209
195
|
break
|
210
196
|
end
|
@@ -75,67 +75,39 @@ class Lingo
|
|
75
75
|
protected
|
76
76
|
|
77
77
|
def init
|
78
|
-
|
79
|
-
@
|
80
|
-
@
|
81
|
-
|
78
|
+
@marker = get_key('marker', '*')
|
79
|
+
@max = get_key('max-var', max = 10000).to_i
|
80
|
+
@max = max unless @max > 0
|
81
|
+
@var = get_key('variations')
|
82
82
|
|
83
|
-
|
84
|
-
@var_strings = get_key('variations')
|
85
|
-
raise MissingConfigError.new(:variations) if @var_strings.empty?
|
83
|
+
raise MissingConfigError.new(:variations) if @var.empty?
|
86
84
|
|
87
|
-
# Initialisierungen
|
88
85
|
@check = Hash.new(false)
|
89
|
-
|
86
|
+
get_array('check', WA_UNKNOWN).each { |s| @check[s.upcase] = true }
|
90
87
|
|
91
88
|
set_dic
|
92
89
|
set_gra
|
93
|
-
|
94
|
-
if @max_var.zero?
|
95
|
-
@max_var = 10000
|
96
|
-
@lingo.warn "#{self.class}: max-var is 0, setting to #{@max_var}"
|
97
|
-
end
|
98
90
|
end
|
99
91
|
|
100
|
-
def control(cmd,
|
101
|
-
|
102
|
-
if cmd == STR_CMD_STATUS
|
103
|
-
# Eigenen Status um Status von Dictionary und Grammer erweitern
|
104
|
-
@dic.report.each_pair { | k, v | set( k, v ) }
|
105
|
-
@gra.report.each_pair { | k, v | set( k, v ) }
|
106
|
-
end
|
92
|
+
def control(cmd, param)
|
93
|
+
report_on(cmd, @dic, @gra)
|
107
94
|
end
|
108
95
|
|
109
96
|
def process(obj)
|
110
|
-
# Zu prüfende Wörter filtern
|
111
97
|
if obj.is_a?(Word) && @check[obj.attr]
|
112
|
-
# Statistik für Report
|
113
98
|
inc('Anzahl gesuchter Wörter')
|
114
99
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
end
|
121
|
-
|
122
|
-
# Prüfe Variation auf bekanntes Wort
|
123
|
-
variations[0...@max_var].each do |var|
|
124
|
-
# Variiertes Wort im Wörterbuch suchen
|
125
|
-
word = @dic.find_word(var)
|
126
|
-
word = @gra.find_compositum(var) if word.unknown?
|
127
|
-
next if word.unknown? || (
|
128
|
-
word.attr == WA_KOMPOSITUM && word.lexicals.any? { |lex|
|
129
|
-
lex.attr[0..0] == LA_TAKEITASIS
|
100
|
+
@var.each_with_object([obj.form]) { |a, v| variate(v, *a) }.
|
101
|
+
tap { |v| v.slice!(@max..-1) }.each { |var|
|
102
|
+
next if (word = find_word(var)).unknown? || (
|
103
|
+
word.attr == WA_COMPOUND && word.lexicals.any? { |lex|
|
104
|
+
lex.attr.start_with?(LA_TAKEITASIS)
|
130
105
|
}
|
131
106
|
)
|
132
107
|
|
133
|
-
# Das erste erkannte Wort beendet die Suche
|
134
108
|
inc('Anzahl gefundener Wörter')
|
135
|
-
word.form = @marker + var
|
136
|
-
|
137
|
-
return
|
138
|
-
end
|
109
|
+
return forward(word.tap { word.form = @marker + var })
|
110
|
+
}
|
139
111
|
end
|
140
112
|
|
141
113
|
forward(obj)
|
@@ -146,32 +118,20 @@ class Lingo
|
|
146
118
|
# Variiere die Bestandteile eines Arrays gemäß den Austauschvorgaben.
|
147
119
|
#
|
148
120
|
# variate( 'Tiieh', 'ieh', 'sch' ) => ['Tiieh', 'Tisch']
|
149
|
-
def variate(
|
150
|
-
|
151
|
-
add_variations = []
|
152
|
-
from_re = Regexp.new(from)
|
153
|
-
|
154
|
-
# alle Wörter in der variation_list permutieren
|
155
|
-
variation_list.each do |wordform|
|
121
|
+
def variate(variations, from, to)
|
122
|
+
add, change, re = [], [from, to], Regexp.new(from)
|
156
123
|
|
157
|
-
|
158
|
-
|
159
|
-
n = wordpart.size - 1
|
124
|
+
variations.each { |form|
|
125
|
+
parts = " #{form} ".split(re)
|
160
126
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
# i[x] = Wert des x.ten Bit von Integer i
|
168
|
-
(1..n).each { |j| variation += change[i[j-1]] + wordpart[j] }
|
169
|
-
|
170
|
-
add_variations << variation.strip
|
171
|
-
end
|
172
|
-
end
|
127
|
+
1.upto(2 ** (n = parts.size - 1) - 1) { |i|
|
128
|
+
var = parts.first
|
129
|
+
1.upto(n) { |j| var += change[i[j - 1]] + parts[j] }
|
130
|
+
add << var.strip
|
131
|
+
}
|
132
|
+
}
|
173
133
|
|
174
|
-
|
134
|
+
variations.concat(add)
|
175
135
|
end
|
176
136
|
|
177
137
|
end
|