lingo 1.8.1 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. data/ChangeLog +23 -5
  2. data/README +1 -1
  3. data/Rakefile +5 -7
  4. data/TODO +2 -0
  5. data/bin/lingo +5 -1
  6. data/de.lang +1 -1
  7. data/en/lingo-syn.txt +0 -0
  8. data/en.lang +2 -1
  9. data/lib/lingo/attendee/abbreviator.rb +8 -9
  10. data/lib/lingo/attendee/debugger.rb +5 -4
  11. data/lib/lingo/attendee/decomposer.rb +8 -3
  12. data/lib/lingo/attendee/dehyphenizer.rb +19 -63
  13. data/lib/lingo/attendee/formatter.rb +1 -1
  14. data/lib/lingo/attendee/multi_worder.rb +67 -155
  15. data/lib/lingo/attendee/noneword_filter.rb +16 -9
  16. data/lib/lingo/attendee/object_filter.rb +1 -1
  17. data/lib/lingo/attendee/sequencer.rb +32 -63
  18. data/lib/lingo/attendee/stemmer/porter.rb +343 -0
  19. data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
  20. data/lib/lingo/attendee/synonymer.rb +10 -9
  21. data/lib/lingo/attendee/text_reader.rb +102 -76
  22. data/lib/lingo/attendee/text_writer.rb +23 -26
  23. data/lib/lingo/attendee/tokenizer.rb +13 -27
  24. data/lib/lingo/attendee/variator.rb +26 -66
  25. data/lib/lingo/attendee/vector_filter.rb +42 -43
  26. data/lib/lingo/attendee/word_searcher.rb +6 -7
  27. data/lib/lingo/attendee.rb +25 -7
  28. data/lib/lingo/buffered_attendee.rb +36 -10
  29. data/lib/lingo/cachable.rb +8 -8
  30. data/lib/lingo/config.rb +5 -6
  31. data/lib/lingo/ctl.rb +2 -3
  32. data/lib/lingo/database/crypter.rb +9 -26
  33. data/lib/lingo/database/gdbm_store.rb +3 -5
  34. data/lib/lingo/database/libcdb_store.rb +4 -6
  35. data/lib/lingo/database/sdbm_store.rb +11 -6
  36. data/lib/lingo/database/show_progress.rb +3 -43
  37. data/lib/lingo/database/source/key_value.rb +2 -6
  38. data/lib/lingo/database/source/multi_key.rb +3 -5
  39. data/lib/lingo/database/source/multi_value.rb +2 -6
  40. data/lib/lingo/database/source/single_word.rb +4 -6
  41. data/lib/lingo/database/source/word_class.rb +4 -10
  42. data/lib/lingo/database/source.rb +20 -18
  43. data/lib/lingo/database.rb +84 -59
  44. data/lib/lingo/error.rb +57 -1
  45. data/lib/lingo/language/dictionary.rb +21 -18
  46. data/lib/lingo/language/grammar.rb +40 -49
  47. data/lib/lingo/language/lexical.rb +6 -6
  48. data/lib/lingo/language/lexical_hash.rb +6 -0
  49. data/lib/lingo/language/word.rb +32 -15
  50. data/lib/lingo/language/word_form.rb +1 -1
  51. data/lib/lingo/language.rb +14 -25
  52. data/lib/lingo/reportable.rb +12 -10
  53. data/lib/lingo/show_progress.rb +81 -0
  54. data/lib/lingo/version.rb +1 -1
  55. data/lib/lingo.rb +63 -24
  56. data/lingo-call.cfg +6 -10
  57. data/lingo.cfg +60 -44
  58. data/lir.cfg +42 -41
  59. data/test/attendee/ts_abbreviator.rb +3 -5
  60. data/test/attendee/ts_decomposer.rb +3 -5
  61. data/test/attendee/ts_multi_worder.rb +87 -145
  62. data/test/attendee/ts_noneword_filter.rb +5 -3
  63. data/test/attendee/ts_object_filter.rb +5 -3
  64. data/test/attendee/ts_sequencer.rb +3 -5
  65. data/test/attendee/ts_stemmer.rb +309 -0
  66. data/test/attendee/ts_synonymer.rb +15 -11
  67. data/test/attendee/ts_text_reader.rb +12 -15
  68. data/test/attendee/ts_text_writer.rb +24 -29
  69. data/test/attendee/ts_tokenizer.rb +9 -7
  70. data/test/attendee/ts_variator.rb +4 -4
  71. data/test/attendee/ts_vector_filter.rb +24 -16
  72. data/test/attendee/ts_word_searcher.rb +20 -36
  73. data/test/{lir.csv → lir.vec} +0 -0
  74. data/test/ref/artikel.vec +943 -943
  75. data/test/ref/artikel.ven +943 -943
  76. data/test/ref/lir.non +201 -201
  77. data/test/ref/lir.seq +178 -178
  78. data/test/ref/lir.syn +49 -49
  79. data/test/ref/lir.vec +329 -0
  80. data/test/test_helper.rb +20 -36
  81. data/test/ts_database.rb +10 -10
  82. data/test/ts_language.rb +279 -319
  83. metadata +93 -104
  84. data/info/Objekte.png +0 -0
  85. data/info/Typen.png +0 -0
  86. data/info/database.png +0 -0
  87. data/info/db_small.png +0 -0
  88. data/info/download.png +0 -0
  89. data/info/kerze.png +0 -0
  90. data/info/language.png +0 -0
  91. data/info/lingo.png +0 -0
  92. data/info/logo.png +0 -0
  93. data/info/meeting.png +0 -0
  94. data/info/types.png +0 -0
  95. data/lingo-all.cfg +0 -89
  96. data/porter/stem.cfg +0 -311
  97. data/porter/stem.rb +0 -150
  98. data/test/ref/lir.csv +0 -329
  99. data/test.cfg +0 -79
@@ -24,6 +24,8 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
+ require 'find'
28
+
27
29
  %w[filemagic mime/types hpricot pdf-reader].each { |lib|
28
30
  begin
29
31
  require lib
@@ -55,19 +57,19 @@ class Lingo
55
57
  # Komma voneinander getrennt, z.B.
56
58
  # files: 'readme.txt'
57
59
  # files: 'readme.txt,lingo.cfg'
58
- # <b><i>lir-record-pattern</i></b>:: Mit diesem Parameter wird angegeben, woran der Anfang
59
- # eines neuen Records erkannt werden kann und wie die
60
- # Record-Nummer identifiziert wird. Das Format einer
61
- # LIR-Datei ist z.B.
62
- # [00001.]
63
- # 020: ¬Die Aufgabenteilung zwischen Wortschatz und Grammatik.
60
+ # <b><i>records</i></b>:: Mit diesem Parameter wird angegeben, woran der Anfang
61
+ # eines neuen Records erkannt werden kann und wie die
62
+ # Record-Nummer identifiziert wird. Das Format einer
63
+ # LIR-Datei ist z.B.
64
+ # [00001.]
65
+ # 020: ¬Die Aufgabenteilung zwischen Wortschatz und Grammatik.
64
66
  #
65
- # [00002.]
66
- # 020: Nicht-konventionelle Thesaurusrelationen als Orientierungshilfen.
67
- # Mit der Angabe von
68
- # lir-record-pattern: "^\[(\d+)\.\]"
69
- # werden die Record-Zeilen erkannt und jeweils die Record-Nummer +00001+,
70
- # bzw. +00002+ erkannt.
67
+ # [00002.]
68
+ # 020: Nicht-konventionelle Thesaurusrelationen als Orientierungshilfen.
69
+ # Mit der Angabe von
70
+ # records: "^\[(\d+)\.\]"
71
+ # werden die Record-Zeilen erkannt und jeweils die Record-Nummer +00001+,
72
+ # bzw. +00002+ erkannt.
71
73
  #
72
74
  # === Generierte Kommandos
73
75
  # Damit der nachfolgende Datenstrom einwandfrei verarbeitet werden kann, generiert der TextReader
@@ -90,7 +92,7 @@ class Lingo
90
92
  # Bei der Verarbeitung einer LIR-Datei mit der Ablaufkonfiguration <tt>t2.cfg</tt>
91
93
  # meeting:
92
94
  # attendees:
93
- # - text_reader: { out: lines, files: '$(files)', lir-record-pattern: "^\[(\d+)\.\]" }
95
+ # - text_reader: { out: lines, files: '$(files)', records: "^\[(\d+)\.\]" }
94
96
  # - debugger: { in: lines, prompt: 'out>'}
95
97
  # ergibt die Ausgabe mit <tt>lingo -c t2 lir.txt</tt>
96
98
  # out> *LIR-FORMAT('')
@@ -105,112 +107,136 @@ class Lingo
105
107
 
106
108
  protected
107
109
 
108
- # TODO: FILE und LIR-FILE
109
- # TODO: lir-record-pattern abkürzen
110
- # Interpretation der Parameter
110
+ # TODO: FILE und LIR-FILE (?)
111
111
  def init
112
- @files = Array(get_key('files', '-'))
113
- @rec_pat = Regexp.new(get_key('lir-record-pattern', ''))
114
- @is_LIR_file = has_key?('lir-record-pattern')
115
- @chomp = get_key('chomp', true)
116
- @filter = get_key('filter', false)
112
+ get_files
113
+
114
+ @chomp = get_key('chomp', true)
115
+ @filter = get_key('filter', false)
116
+ @progress = get_key('progress', false)
117
+
118
+ if @lir = get_key('records', get_key('lir-record-pattern', nil)) # DEPRECATE lir-record-pattern
119
+ @lir = @lir == true ? %r{^\[(\d+)\.\]} : Regexp.new(@lir)
120
+ end
117
121
  end
118
122
 
119
123
  def control(cmd, param)
120
- if cmd==STR_CMD_TALK
121
- forward(STR_CMD_LIR, '') if @is_LIR_file
122
- @files.each { |filename| spool(filename) }
124
+ if cmd == STR_CMD_TALK
125
+ forward(STR_CMD_LIR, '') if @lir
126
+ @files.each(&method(:spool))
123
127
  end
124
128
  end
125
129
 
126
130
  private
127
131
 
128
132
  # Gibt eine Datei zeilenweise in den Ausgabekanal
129
- def spool(filename)
130
- unless stdin?(filename)
131
- raise FileNotFoundError.new(filename) unless File.exist?(filename)
132
-
133
+ def spool(path)
134
+ unless stdin = stdin?(path)
133
135
  inc('Anzahl Dateien')
134
- add('Anzahl Bytes', File.stat(filename).size)
136
+ add('Anzahl Bytes', size = File.size(path))
137
+
138
+ size = nil unless @progress
135
139
  end
136
140
 
137
- forward(STR_CMD_FILE, filename)
138
-
139
- filter(filename) { |line|
140
- inc('Anzahl Zeilen')
141
- line.chomp! if @chomp
142
- line.gsub!(/\303\237/, "ß")
143
- ### HACK
144
- if @is_LIR_file && line =~ @rec_pat
145
- forward(STR_CMD_RECORD, $1)
146
- else
147
- forward(line) if line.size>0
148
- end
141
+ forward(STR_CMD_FILE, path)
142
+
143
+ ShowProgress.new(self, size, path) { |progress|
144
+ filter(path, stdin) { |line, pos|
145
+ inc('Anzahl Zeilen')
146
+ progress[pos]
147
+
148
+ line.chomp! if @chomp
149
+
150
+ if line =~ @lir
151
+ forward(STR_CMD_RECORD, $1)
152
+ else
153
+ forward(line) unless line.empty?
154
+ end
155
+ }
149
156
  }
150
157
 
151
- forward(STR_CMD_EOF, filename)
158
+ forward(STR_CMD_EOF, path)
152
159
  end
153
160
 
154
- def filter(filename, &block)
155
- file = stdin?(filename) ?
156
- @lingo.config.stdin.set_encoding(ENC) :
157
- File.open(filename, 'rb', encoding: ENC)
158
-
159
- file = case @filter == true ? file_type(filename, file) : @filter.to_s
160
- when /html/ then filter_html(file)
161
- when /xml/ then filter_html(file, true)
162
- when /pdf/ then filter_pdf(file, &block) or return
163
- else file
164
- end if @filter
161
+ def filter(path, stdin = stdin?(path))
162
+ io, block = stdin ? [
163
+ @lingo.config.stdin.set_encoding(ENC),
164
+ lambda { |line| yield line, 0 }
165
+ ] : [
166
+ File.open(path, 'rb', encoding: ENC),
167
+ lambda { |line| yield line, io.pos }
168
+ ]
169
+
170
+ case @filter == true ? file_type(path, io) : @filter.to_s
171
+ when /html/i then io = filter_html(io)
172
+ when /xml/i then io = filter_html(io, true)
173
+ when /pdf/i then filter_pdf(io, &block); return
174
+ end
165
175
 
166
- file.each_line(&block)
176
+ io.each_line(&block) if io
167
177
  end
168
178
 
169
- def filter_pdf(file, &block)
179
+ def filter_pdf(io, &block)
170
180
  if Object.const_defined?(:PDF) && PDF.const_defined?(:Reader)
171
- PDFFilter.filter(file, &block)
172
- nil
181
+ PDFFilter.filter(io, &block)
173
182
  else
174
183
  warn "PDF filter not available. Please install `pdf-reader'."
175
- file
176
184
  end
177
185
  end
178
186
 
179
- def filter_html(file, xml = false)
187
+ def filter_html(io, xml = false)
180
188
  if Object.const_defined?(:Hpricot)
181
- Hpricot(file, xml: xml).inner_text
189
+ Hpricot(io, xml: xml).inner_text
182
190
  else
183
191
  warn "#{xml ? 'X' : 'HT'}ML filter not available. Please install `hpricot'."
184
- file
192
+ nil
185
193
  end
186
194
  end
187
195
 
188
- def file_type(filename, file)
189
- if Object.const_defined?(:FileMagic) && file.respond_to?(:rewind)
190
- type = FileMagic.fm(:mime, simplified: true).buffer(file.read(256))
191
- file.rewind
192
- type
196
+ def file_type(path, io)
197
+ if Object.const_defined?(:FileMagic) && io.respond_to?(:rewind)
198
+ FileMagic.fm(:mime, simplified: true).buffer(io.read(256)).tap {
199
+ io.rewind
200
+ }
193
201
  elsif Object.const_defined?(:MIME) && MIME.const_defined?(:Types)
194
- if type = MIME::Types.of(filename).first
195
- type.content_type
196
- else
197
- warn 'Filters not available. File type could not be determined.'
198
- nil
199
- end
202
+ MIME::Types.of(path).first.tap { |type| type ? type.content_type :
203
+ warn('Filters not available. File type could not be determined.')
204
+ }
200
205
  else
201
206
  warn "Filters not available. Please install `ruby-filemagic' or `mime-types'."
202
207
  nil
203
208
  end
204
209
  end
205
210
 
206
- def stdin?(filename)
207
- %w[STDIN -].include?(filename)
211
+ def stdin?(path)
212
+ %w[STDIN -].include?(path)
213
+ end
214
+
215
+ def get_files
216
+ args = [get_key('glob', '*.txt'), get_key('recursive', false)]
217
+
218
+ @files = []
219
+
220
+ Array(get_key('files', '-')).each { |path|
221
+ stdin?(path) ? @files << path : add_files(path, *args)
222
+ }
223
+
224
+ @files.map!(&File.method(:expand_path))
225
+ @files.uniq!
226
+ end
227
+
228
+ def add_files(path, glob, recursive = false)
229
+ Dir[path].sort!.each { |match|
230
+ File.directory?(match) ? recursive ? Find.find(match) { |entry|
231
+ @files << entry if File.file?(entry) && File.fnmatch?(glob, entry)
232
+ } : add_files(File.join(match, glob), glob) : @files << match
233
+ }.empty? and raise FileNotFoundError.new(path)
208
234
  end
209
235
 
210
236
  class PDFFilter
211
237
 
212
- def self.filter(file, &block)
213
- PDF::Reader.new.parse(file, new(&block))
238
+ def self.filter(io, &block)
239
+ PDF::Reader.new.parse(io, new(&block))
214
240
  end
215
241
 
216
242
  def initialize(&block)
@@ -82,11 +82,15 @@ class Lingo
82
82
  def init
83
83
  @ext = get_key('ext', 'txt2')
84
84
  @lir = get_key('lir-format', false)
85
- @sep = @lir ? ' ' : eval("\"#{@config['sep'] || ' '}\"")
85
+
86
+ @sep = @config['sep'] unless @lir
87
+ @sep &&= @sep.evaluate
88
+ @sep ||= ' '
89
+
86
90
  @no_sep, @no_puts = true, false
87
91
  end
88
92
 
89
- def control(cmd, par)
93
+ def control(cmd, param)
90
94
  case cmd
91
95
  when STR_CMD_LIR
92
96
  @lir = true
@@ -94,49 +98,43 @@ class Lingo
94
98
  @no_sep = true
95
99
 
96
100
  if stdout?(@ext)
97
- @filename = @ext
98
- @file = @lingo.config.stdout
101
+ @filename, @file = @ext, @lingo.config.stdout
99
102
  else
100
- @filename = par.sub(/(\.[^.]+)?$/, '.'+@ext)
101
- @file = File.new(@filename,'w')
102
103
  inc('Anzahl Dateien')
104
+ @file = File.open(@filename = File.set_ext(param, ".#{@ext}"), 'w')
103
105
  end
104
106
 
105
- @lir_rec_no = ''
106
- @lir_rec_buf = Array.new
107
+ @lir_rec_no, @lir_rec_buf = '', []
107
108
  when STR_CMD_RECORD
108
109
  @no_sep = true
110
+
109
111
  if @lir
110
112
  flush_lir_buffer
111
- @lir_rec_no = par
113
+ @lir_rec_no = param
112
114
  end
113
115
  when STR_CMD_EOL
114
116
  @no_sep = true
117
+
115
118
  unless @lir
116
- @file.puts unless @no_puts # unless @sep=="\n"
117
119
  inc('Anzahl Zeilen')
120
+ @file.puts unless @no_puts
118
121
  end
119
122
  when STR_CMD_EOF
120
123
  flush_lir_buffer if @lir
121
124
 
122
125
  unless stdout?(@filename)
126
+ add('Anzahl Bytes', @file.size)
123
127
  @file.close
124
- add('Anzahl Bytes', File.stat(@filename).size)
125
128
  end
126
129
  end
127
130
  end
128
131
 
129
132
  def process(obj)
130
- if @lir
131
- @lir_rec_buf << (obj.kind_of?(Token) ? obj.form : obj.to_s)
132
- else
133
- @file.print @sep unless @no_sep
134
- @no_sep=false if @no_sep
135
- if obj.is_a?(Word) || obj.is_a?(Token)
136
- @file.print obj.form
137
- else
138
- @file.print obj
139
- end
133
+ obj = obj.form if obj.is_a?(WordForm)
134
+
135
+ @lir ? @lir_rec_buf << obj : begin
136
+ @no_sep ? @no_sep = false : @file.print(@sep)
137
+ @file.print(obj)
140
138
  end
141
139
  end
142
140
 
@@ -144,12 +142,11 @@ class Lingo
144
142
 
145
143
  def flush_lir_buffer
146
144
  unless @lir_rec_no.empty? || @lir_rec_buf.empty?
147
- if @sep =~ /\n/
148
- @file.print '*', @lir_rec_no, "\n", @lir_rec_buf.join(@sep), "\n"
149
- else
150
- @file.print @lir_rec_no, '*', @lir_rec_buf.join(@sep), "\n"
151
- end
145
+ @file.print(*[@lir_rec_no, @lir_rec_buf.join(@sep), "\n"].tap { |buf|
146
+ @sep =~ /\n/ ? buf.insert(1, "\n").unshift('*') : buf.insert(1, '*')
147
+ })
152
148
  end
149
+
153
150
  @lir_rec_no = ''
154
151
  @lir_rec_buf.clear
155
152
  end
@@ -101,10 +101,6 @@ class Lingo
101
101
  protected
102
102
 
103
103
  def init
104
- # Regular Expressions für Token-Erkennung einlesen
105
- regulars = get_key('regulars', '')
106
- raise NoConfigKeyError.new(:regulars) unless regulars
107
-
108
104
  @space = get_key('space', false)
109
105
  @tags = get_key('tags', true)
110
106
  @wiki = get_key('wiki', true)
@@ -115,30 +111,22 @@ class Lingo
115
111
  @rules << ['WIKI', /^\[\[.+?\]\]/] unless @wiki
116
112
  @rules.unshift(['WIKI', /^=+.+=+$/]) unless @wiki
117
113
 
118
- # Mit _xxx_ gekennzeichnete Makros anwenden und Expressions ergänzen und umwandeln
119
- macros = {}
120
-
121
- regulars.each { |rule|
122
- name = rule.keys[0]
123
- expr = rule.values[0].gsub(/_(\w+?)_/) {
114
+ get_key('regulars', []).each_with_object({}) { |rule, macros|
115
+ expr = rule.values.first.gsub(/_(\w+?)_/) {
124
116
  macros[$&] || begin
125
117
  Database::Source.const_get("UTF8_#{$1.upcase}")
126
118
  rescue NameError
127
119
  end
128
120
  }
129
121
 
130
- if name =~ /^_\w+_$/ # is a macro
131
- macros[name] = expr if name =~ /^_\w+_$/
122
+ if (name = rule.keys.first) =~ /^_\w+_$/
123
+ macros[name] = expr
132
124
  else
133
- @rules << [name, Regexp.new('^'+expr)]
125
+ @rules << [name, /^#{expr}/]
134
126
  end
135
127
  }
136
128
 
137
- # Der Tokenizer gibt jedes Zeilenende als Information weiter, sofern es sich
138
- # nicht um die Verarbeitung einer LIR-Datei handelt. Im Falle einer normalen Datei
139
- # wird der Dateiname gespeichert und als Kennzeichen für die Erzeugung von
140
- # Zeilenende-Nachrichten herangezogen.
141
- @filename = nil
129
+ @filename = @cont = nil
142
130
  end
143
131
 
144
132
  def control(cmd, param)
@@ -154,12 +142,10 @@ class Lingo
154
142
  inc('Anzahl Zeilen')
155
143
 
156
144
  tokenize(obj) { |form, attr|
157
- token = Token.new(form, attr)
158
-
159
- inc('Anzahl Muster '+token.attr)
145
+ inc("Anzahl Muster #{attr}")
160
146
  inc('Anzahl Token')
161
147
 
162
- forward(token)
148
+ forward(Token.new(form, attr))
163
149
  }
164
150
 
165
151
  forward(STR_CMD_EOL, @filename) if @filename
@@ -175,7 +161,7 @@ class Lingo
175
161
  case @cont
176
162
  when 'HTML'
177
163
  if textline =~ /^[^<>]*>/
178
- yield $~[0], @cont
164
+ yield $&, @cont
179
165
  textline, @cont = $', nil
180
166
  else
181
167
  yield textline, @cont
@@ -183,7 +169,7 @@ class Lingo
183
169
  end
184
170
  when 'WIKI'
185
171
  if textline =~ /^[^\[\]]*\]\]/
186
- yield $~[0], @cont
172
+ yield $&, @cont
187
173
  textline, @cont = $', nil
188
174
  else
189
175
  yield textline, @cont
@@ -191,12 +177,12 @@ class Lingo
191
177
  end
192
178
  when nil
193
179
  if !@tags && textline =~ /<[^<>]*$/
194
- yield $~[0], @cont = 'HTML'
180
+ yield $&, @cont = 'HTML'
195
181
  textline = $`
196
182
  end
197
183
 
198
184
  if !@wiki && textline =~ /\[\[[^\[\]]*$/
199
- yield $~[0], @cont = 'WIKI'
185
+ yield $&, @cont = 'WIKI'
200
186
  textline = $`
201
187
  end
202
188
  end
@@ -204,7 +190,7 @@ class Lingo
204
190
  until textline.empty?
205
191
  @rules.each { |name, expr|
206
192
  if textline =~ expr
207
- yield $~[0], name if name != 'SPAC' || @space
193
+ yield $&, name if name != 'SPAC' || @space
208
194
  textline = $'
209
195
  break
210
196
  end
@@ -75,67 +75,39 @@ class Lingo
75
75
  protected
76
76
 
77
77
  def init
78
- # Parameter verarbeiten
79
- @marker = get_key('marker', '*')
80
- @max_var = get_key('max-var', '10000').to_i
81
- filter = get_array('check', WA_UNKNOWN)
78
+ @marker = get_key('marker', '*')
79
+ @max = get_key('max-var', max = 10000).to_i
80
+ @max = max unless @max > 0
81
+ @var = get_key('variations')
82
82
 
83
- # Daten verarbeiten
84
- @var_strings = get_key('variations')
85
- raise MissingConfigError.new(:variations) if @var_strings.empty?
83
+ raise MissingConfigError.new(:variations) if @var.empty?
86
84
 
87
- # Initialisierungen
88
85
  @check = Hash.new(false)
89
- filter.each { |s| @check[s.upcase] = true }
86
+ get_array('check', WA_UNKNOWN).each { |s| @check[s.upcase] = true }
90
87
 
91
88
  set_dic
92
89
  set_gra
93
-
94
- if @max_var.zero?
95
- @max_var = 10000
96
- @lingo.warn "#{self.class}: max-var is 0, setting to #{@max_var}"
97
- end
98
90
  end
99
91
 
100
- def control(cmd, par)
101
- # Status wird abgefragt
102
- if cmd == STR_CMD_STATUS
103
- # Eigenen Status um Status von Dictionary und Grammer erweitern
104
- @dic.report.each_pair { | k, v | set( k, v ) }
105
- @gra.report.each_pair { | k, v | set( k, v ) }
106
- end
92
+ def control(cmd, param)
93
+ report_on(cmd, @dic, @gra)
107
94
  end
108
95
 
109
96
  def process(obj)
110
- # Zu prüfende Wörter filtern
111
97
  if obj.is_a?(Word) && @check[obj.attr]
112
- # Statistik für Report
113
98
  inc('Anzahl gesuchter Wörter')
114
99
 
115
- # Erzeuge Variationen einer Wortform
116
- variations = [obj.form]
117
- @var_strings.each do |switch|
118
- from, to = switch
119
- variations = variate(variations, from, to)
120
- end
121
-
122
- # Prüfe Variation auf bekanntes Wort
123
- variations[0...@max_var].each do |var|
124
- # Variiertes Wort im Wörterbuch suchen
125
- word = @dic.find_word(var)
126
- word = @gra.find_compositum(var) if word.unknown?
127
- next if word.unknown? || (
128
- word.attr == WA_KOMPOSITUM && word.lexicals.any? { |lex|
129
- lex.attr[0..0] == LA_TAKEITASIS
100
+ @var.each_with_object([obj.form]) { |a, v| variate(v, *a) }.
101
+ tap { |v| v.slice!(@max..-1) }.each { |var|
102
+ next if (word = find_word(var)).unknown? || (
103
+ word.attr == WA_COMPOUND && word.lexicals.any? { |lex|
104
+ lex.attr.start_with?(LA_TAKEITASIS)
130
105
  }
131
106
  )
132
107
 
133
- # Das erste erkannte Wort beendet die Suche
134
108
  inc('Anzahl gefundener Wörter')
135
- word.form = @marker + var
136
- forward(word)
137
- return
138
- end
109
+ return forward(word.tap { word.form = @marker + var })
110
+ }
139
111
  end
140
112
 
141
113
  forward(obj)
@@ -146,32 +118,20 @@ class Lingo
146
118
  # Variiere die Bestandteile eines Arrays gemäß den Austauschvorgaben.
147
119
  #
148
120
  # variate( 'Tiieh', 'ieh', 'sch' ) => ['Tiieh', 'Tisch']
149
- def variate(variation_list, from, to)
150
- # neue Varianten sammeln
151
- add_variations = []
152
- from_re = Regexp.new(from)
153
-
154
- # alle Wörter in der variation_list permutieren
155
- variation_list.each do |wordform|
121
+ def variate(variations, from, to)
122
+ add, change, re = [], [from, to], Regexp.new(from)
156
123
 
157
- # Wortform in Teile zerlegen und anschließend Dimension feststellen
158
- wordpart = " #{wordform} ".split( from_re )
159
- n = wordpart.size - 1
124
+ variations.each { |form|
125
+ parts = " #{form} ".split(re)
160
126
 
161
- # Austauschketten in Matrix hinterlegen
162
- change = [from, to]
163
-
164
- # Austauschketten auf alle Teile anwenden
165
- (1..(2**n-1)).each do |i|
166
- variation = wordpart[0]
167
- # i[x] = Wert des x.ten Bit von Integer i
168
- (1..n).each { |j| variation += change[i[j-1]] + wordpart[j] }
169
-
170
- add_variations << variation.strip
171
- end
172
- end
127
+ 1.upto(2 ** (n = parts.size - 1) - 1) { |i|
128
+ var = parts.first
129
+ 1.upto(n) { |j| var += change[i[j - 1]] + parts[j] }
130
+ add << var.strip
131
+ }
132
+ }
173
133
 
174
- variation_list + add_variations
134
+ variations.concat(add)
175
135
  end
176
136
 
177
137
  end