lingo 1.8.1 → 1.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. data/ChangeLog +23 -5
  2. data/README +1 -1
  3. data/Rakefile +5 -7
  4. data/TODO +2 -0
  5. data/bin/lingo +5 -1
  6. data/de.lang +1 -1
  7. data/en/lingo-syn.txt +0 -0
  8. data/en.lang +2 -1
  9. data/lib/lingo/attendee/abbreviator.rb +8 -9
  10. data/lib/lingo/attendee/debugger.rb +5 -4
  11. data/lib/lingo/attendee/decomposer.rb +8 -3
  12. data/lib/lingo/attendee/dehyphenizer.rb +19 -63
  13. data/lib/lingo/attendee/formatter.rb +1 -1
  14. data/lib/lingo/attendee/multi_worder.rb +67 -155
  15. data/lib/lingo/attendee/noneword_filter.rb +16 -9
  16. data/lib/lingo/attendee/object_filter.rb +1 -1
  17. data/lib/lingo/attendee/sequencer.rb +32 -63
  18. data/lib/lingo/attendee/stemmer/porter.rb +343 -0
  19. data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
  20. data/lib/lingo/attendee/synonymer.rb +10 -9
  21. data/lib/lingo/attendee/text_reader.rb +102 -76
  22. data/lib/lingo/attendee/text_writer.rb +23 -26
  23. data/lib/lingo/attendee/tokenizer.rb +13 -27
  24. data/lib/lingo/attendee/variator.rb +26 -66
  25. data/lib/lingo/attendee/vector_filter.rb +42 -43
  26. data/lib/lingo/attendee/word_searcher.rb +6 -7
  27. data/lib/lingo/attendee.rb +25 -7
  28. data/lib/lingo/buffered_attendee.rb +36 -10
  29. data/lib/lingo/cachable.rb +8 -8
  30. data/lib/lingo/config.rb +5 -6
  31. data/lib/lingo/ctl.rb +2 -3
  32. data/lib/lingo/database/crypter.rb +9 -26
  33. data/lib/lingo/database/gdbm_store.rb +3 -5
  34. data/lib/lingo/database/libcdb_store.rb +4 -6
  35. data/lib/lingo/database/sdbm_store.rb +11 -6
  36. data/lib/lingo/database/show_progress.rb +3 -43
  37. data/lib/lingo/database/source/key_value.rb +2 -6
  38. data/lib/lingo/database/source/multi_key.rb +3 -5
  39. data/lib/lingo/database/source/multi_value.rb +2 -6
  40. data/lib/lingo/database/source/single_word.rb +4 -6
  41. data/lib/lingo/database/source/word_class.rb +4 -10
  42. data/lib/lingo/database/source.rb +20 -18
  43. data/lib/lingo/database.rb +84 -59
  44. data/lib/lingo/error.rb +57 -1
  45. data/lib/lingo/language/dictionary.rb +21 -18
  46. data/lib/lingo/language/grammar.rb +40 -49
  47. data/lib/lingo/language/lexical.rb +6 -6
  48. data/lib/lingo/language/lexical_hash.rb +6 -0
  49. data/lib/lingo/language/word.rb +32 -15
  50. data/lib/lingo/language/word_form.rb +1 -1
  51. data/lib/lingo/language.rb +14 -25
  52. data/lib/lingo/reportable.rb +12 -10
  53. data/lib/lingo/show_progress.rb +81 -0
  54. data/lib/lingo/version.rb +1 -1
  55. data/lib/lingo.rb +63 -24
  56. data/lingo-call.cfg +6 -10
  57. data/lingo.cfg +60 -44
  58. data/lir.cfg +42 -41
  59. data/test/attendee/ts_abbreviator.rb +3 -5
  60. data/test/attendee/ts_decomposer.rb +3 -5
  61. data/test/attendee/ts_multi_worder.rb +87 -145
  62. data/test/attendee/ts_noneword_filter.rb +5 -3
  63. data/test/attendee/ts_object_filter.rb +5 -3
  64. data/test/attendee/ts_sequencer.rb +3 -5
  65. data/test/attendee/ts_stemmer.rb +309 -0
  66. data/test/attendee/ts_synonymer.rb +15 -11
  67. data/test/attendee/ts_text_reader.rb +12 -15
  68. data/test/attendee/ts_text_writer.rb +24 -29
  69. data/test/attendee/ts_tokenizer.rb +9 -7
  70. data/test/attendee/ts_variator.rb +4 -4
  71. data/test/attendee/ts_vector_filter.rb +24 -16
  72. data/test/attendee/ts_word_searcher.rb +20 -36
  73. data/test/{lir.csv → lir.vec} +0 -0
  74. data/test/ref/artikel.vec +943 -943
  75. data/test/ref/artikel.ven +943 -943
  76. data/test/ref/lir.non +201 -201
  77. data/test/ref/lir.seq +178 -178
  78. data/test/ref/lir.syn +49 -49
  79. data/test/ref/lir.vec +329 -0
  80. data/test/test_helper.rb +20 -36
  81. data/test/ts_database.rb +10 -10
  82. data/test/ts_language.rb +279 -319
  83. metadata +93 -104
  84. data/info/Objekte.png +0 -0
  85. data/info/Typen.png +0 -0
  86. data/info/database.png +0 -0
  87. data/info/db_small.png +0 -0
  88. data/info/download.png +0 -0
  89. data/info/kerze.png +0 -0
  90. data/info/language.png +0 -0
  91. data/info/lingo.png +0 -0
  92. data/info/logo.png +0 -0
  93. data/info/meeting.png +0 -0
  94. data/info/types.png +0 -0
  95. data/lingo-all.cfg +0 -89
  96. data/porter/stem.cfg +0 -311
  97. data/porter/stem.rb +0 -150
  98. data/test/ref/lir.csv +0 -329
  99. data/test.cfg +0 -79
@@ -24,6 +24,8 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
+ require 'find'
28
+
27
29
  %w[filemagic mime/types hpricot pdf-reader].each { |lib|
28
30
  begin
29
31
  require lib
@@ -55,19 +57,19 @@ class Lingo
55
57
  # Komma voneinander getrennt, z.B.
56
58
  # files: 'readme.txt'
57
59
  # files: 'readme.txt,lingo.cfg'
58
- # <b><i>lir-record-pattern</i></b>:: Mit diesem Parameter wird angegeben, woran der Anfang
59
- # eines neuen Records erkannt werden kann und wie die
60
- # Record-Nummer identifiziert wird. Das Format einer
61
- # LIR-Datei ist z.B.
62
- # [00001.]
63
- # 020: ¬Die Aufgabenteilung zwischen Wortschatz und Grammatik.
60
+ # <b><i>records</i></b>:: Mit diesem Parameter wird angegeben, woran der Anfang
61
+ # eines neuen Records erkannt werden kann und wie die
62
+ # Record-Nummer identifiziert wird. Das Format einer
63
+ # LIR-Datei ist z.B.
64
+ # [00001.]
65
+ # 020: ¬Die Aufgabenteilung zwischen Wortschatz und Grammatik.
64
66
  #
65
- # [00002.]
66
- # 020: Nicht-konventionelle Thesaurusrelationen als Orientierungshilfen.
67
- # Mit der Angabe von
68
- # lir-record-pattern: "^\[(\d+)\.\]"
69
- # werden die Record-Zeilen erkannt und jeweils die Record-Nummer +00001+,
70
- # bzw. +00002+ erkannt.
67
+ # [00002.]
68
+ # 020: Nicht-konventionelle Thesaurusrelationen als Orientierungshilfen.
69
+ # Mit der Angabe von
70
+ # records: "^\[(\d+)\.\]"
71
+ # werden die Record-Zeilen erkannt und jeweils die Record-Nummer +00001+,
72
+ # bzw. +00002+ erkannt.
71
73
  #
72
74
  # === Generierte Kommandos
73
75
  # Damit der nachfolgende Datenstrom einwandfrei verarbeitet werden kann, generiert der TextReader
@@ -90,7 +92,7 @@ class Lingo
90
92
  # Bei der Verarbeitung einer LIR-Datei mit der Ablaufkonfiguration <tt>t2.cfg</tt>
91
93
  # meeting:
92
94
  # attendees:
93
- # - text_reader: { out: lines, files: '$(files)', lir-record-pattern: "^\[(\d+)\.\]" }
95
+ # - text_reader: { out: lines, files: '$(files)', records: "^\[(\d+)\.\]" }
94
96
  # - debugger: { in: lines, prompt: 'out>'}
95
97
  # ergibt die Ausgabe mit <tt>lingo -c t2 lir.txt</tt>
96
98
  # out> *LIR-FORMAT('')
@@ -105,112 +107,136 @@ class Lingo
105
107
 
106
108
  protected
107
109
 
108
- # TODO: FILE und LIR-FILE
109
- # TODO: lir-record-pattern abkürzen
110
- # Interpretation der Parameter
110
+ # TODO: FILE und LIR-FILE (?)
111
111
  def init
112
- @files = Array(get_key('files', '-'))
113
- @rec_pat = Regexp.new(get_key('lir-record-pattern', ''))
114
- @is_LIR_file = has_key?('lir-record-pattern')
115
- @chomp = get_key('chomp', true)
116
- @filter = get_key('filter', false)
112
+ get_files
113
+
114
+ @chomp = get_key('chomp', true)
115
+ @filter = get_key('filter', false)
116
+ @progress = get_key('progress', false)
117
+
118
+ if @lir = get_key('records', get_key('lir-record-pattern', nil)) # DEPRECATE lir-record-pattern
119
+ @lir = @lir == true ? %r{^\[(\d+)\.\]} : Regexp.new(@lir)
120
+ end
117
121
  end
118
122
 
119
123
  def control(cmd, param)
120
- if cmd==STR_CMD_TALK
121
- forward(STR_CMD_LIR, '') if @is_LIR_file
122
- @files.each { |filename| spool(filename) }
124
+ if cmd == STR_CMD_TALK
125
+ forward(STR_CMD_LIR, '') if @lir
126
+ @files.each(&method(:spool))
123
127
  end
124
128
  end
125
129
 
126
130
  private
127
131
 
128
132
  # Gibt eine Datei zeilenweise in den Ausgabekanal
129
- def spool(filename)
130
- unless stdin?(filename)
131
- raise FileNotFoundError.new(filename) unless File.exist?(filename)
132
-
133
+ def spool(path)
134
+ unless stdin = stdin?(path)
133
135
  inc('Anzahl Dateien')
134
- add('Anzahl Bytes', File.stat(filename).size)
136
+ add('Anzahl Bytes', size = File.size(path))
137
+
138
+ size = nil unless @progress
135
139
  end
136
140
 
137
- forward(STR_CMD_FILE, filename)
138
-
139
- filter(filename) { |line|
140
- inc('Anzahl Zeilen')
141
- line.chomp! if @chomp
142
- line.gsub!(/\303\237/, "ß")
143
- ### HACK
144
- if @is_LIR_file && line =~ @rec_pat
145
- forward(STR_CMD_RECORD, $1)
146
- else
147
- forward(line) if line.size>0
148
- end
141
+ forward(STR_CMD_FILE, path)
142
+
143
+ ShowProgress.new(self, size, path) { |progress|
144
+ filter(path, stdin) { |line, pos|
145
+ inc('Anzahl Zeilen')
146
+ progress[pos]
147
+
148
+ line.chomp! if @chomp
149
+
150
+ if line =~ @lir
151
+ forward(STR_CMD_RECORD, $1)
152
+ else
153
+ forward(line) unless line.empty?
154
+ end
155
+ }
149
156
  }
150
157
 
151
- forward(STR_CMD_EOF, filename)
158
+ forward(STR_CMD_EOF, path)
152
159
  end
153
160
 
154
- def filter(filename, &block)
155
- file = stdin?(filename) ?
156
- @lingo.config.stdin.set_encoding(ENC) :
157
- File.open(filename, 'rb', encoding: ENC)
158
-
159
- file = case @filter == true ? file_type(filename, file) : @filter.to_s
160
- when /html/ then filter_html(file)
161
- when /xml/ then filter_html(file, true)
162
- when /pdf/ then filter_pdf(file, &block) or return
163
- else file
164
- end if @filter
161
+ def filter(path, stdin = stdin?(path))
162
+ io, block = stdin ? [
163
+ @lingo.config.stdin.set_encoding(ENC),
164
+ lambda { |line| yield line, 0 }
165
+ ] : [
166
+ File.open(path, 'rb', encoding: ENC),
167
+ lambda { |line| yield line, io.pos }
168
+ ]
169
+
170
+ case @filter == true ? file_type(path, io) : @filter.to_s
171
+ when /html/i then io = filter_html(io)
172
+ when /xml/i then io = filter_html(io, true)
173
+ when /pdf/i then filter_pdf(io, &block); return
174
+ end
165
175
 
166
- file.each_line(&block)
176
+ io.each_line(&block) if io
167
177
  end
168
178
 
169
- def filter_pdf(file, &block)
179
+ def filter_pdf(io, &block)
170
180
  if Object.const_defined?(:PDF) && PDF.const_defined?(:Reader)
171
- PDFFilter.filter(file, &block)
172
- nil
181
+ PDFFilter.filter(io, &block)
173
182
  else
174
183
  warn "PDF filter not available. Please install `pdf-reader'."
175
- file
176
184
  end
177
185
  end
178
186
 
179
- def filter_html(file, xml = false)
187
+ def filter_html(io, xml = false)
180
188
  if Object.const_defined?(:Hpricot)
181
- Hpricot(file, xml: xml).inner_text
189
+ Hpricot(io, xml: xml).inner_text
182
190
  else
183
191
  warn "#{xml ? 'X' : 'HT'}ML filter not available. Please install `hpricot'."
184
- file
192
+ nil
185
193
  end
186
194
  end
187
195
 
188
- def file_type(filename, file)
189
- if Object.const_defined?(:FileMagic) && file.respond_to?(:rewind)
190
- type = FileMagic.fm(:mime, simplified: true).buffer(file.read(256))
191
- file.rewind
192
- type
196
+ def file_type(path, io)
197
+ if Object.const_defined?(:FileMagic) && io.respond_to?(:rewind)
198
+ FileMagic.fm(:mime, simplified: true).buffer(io.read(256)).tap {
199
+ io.rewind
200
+ }
193
201
  elsif Object.const_defined?(:MIME) && MIME.const_defined?(:Types)
194
- if type = MIME::Types.of(filename).first
195
- type.content_type
196
- else
197
- warn 'Filters not available. File type could not be determined.'
198
- nil
199
- end
202
+ MIME::Types.of(path).first.tap { |type| type ? type.content_type :
203
+ warn('Filters not available. File type could not be determined.')
204
+ }
200
205
  else
201
206
  warn "Filters not available. Please install `ruby-filemagic' or `mime-types'."
202
207
  nil
203
208
  end
204
209
  end
205
210
 
206
- def stdin?(filename)
207
- %w[STDIN -].include?(filename)
211
+ def stdin?(path)
212
+ %w[STDIN -].include?(path)
213
+ end
214
+
215
+ def get_files
216
+ args = [get_key('glob', '*.txt'), get_key('recursive', false)]
217
+
218
+ @files = []
219
+
220
+ Array(get_key('files', '-')).each { |path|
221
+ stdin?(path) ? @files << path : add_files(path, *args)
222
+ }
223
+
224
+ @files.map!(&File.method(:expand_path))
225
+ @files.uniq!
226
+ end
227
+
228
+ def add_files(path, glob, recursive = false)
229
+ Dir[path].sort!.each { |match|
230
+ File.directory?(match) ? recursive ? Find.find(match) { |entry|
231
+ @files << entry if File.file?(entry) && File.fnmatch?(glob, entry)
232
+ } : add_files(File.join(match, glob), glob) : @files << match
233
+ }.empty? and raise FileNotFoundError.new(path)
208
234
  end
209
235
 
210
236
  class PDFFilter
211
237
 
212
- def self.filter(file, &block)
213
- PDF::Reader.new.parse(file, new(&block))
238
+ def self.filter(io, &block)
239
+ PDF::Reader.new.parse(io, new(&block))
214
240
  end
215
241
 
216
242
  def initialize(&block)
@@ -82,11 +82,15 @@ class Lingo
82
82
  def init
83
83
  @ext = get_key('ext', 'txt2')
84
84
  @lir = get_key('lir-format', false)
85
- @sep = @lir ? ' ' : eval("\"#{@config['sep'] || ' '}\"")
85
+
86
+ @sep = @config['sep'] unless @lir
87
+ @sep &&= @sep.evaluate
88
+ @sep ||= ' '
89
+
86
90
  @no_sep, @no_puts = true, false
87
91
  end
88
92
 
89
- def control(cmd, par)
93
+ def control(cmd, param)
90
94
  case cmd
91
95
  when STR_CMD_LIR
92
96
  @lir = true
@@ -94,49 +98,43 @@ class Lingo
94
98
  @no_sep = true
95
99
 
96
100
  if stdout?(@ext)
97
- @filename = @ext
98
- @file = @lingo.config.stdout
101
+ @filename, @file = @ext, @lingo.config.stdout
99
102
  else
100
- @filename = par.sub(/(\.[^.]+)?$/, '.'+@ext)
101
- @file = File.new(@filename,'w')
102
103
  inc('Anzahl Dateien')
104
+ @file = File.open(@filename = File.set_ext(param, ".#{@ext}"), 'w')
103
105
  end
104
106
 
105
- @lir_rec_no = ''
106
- @lir_rec_buf = Array.new
107
+ @lir_rec_no, @lir_rec_buf = '', []
107
108
  when STR_CMD_RECORD
108
109
  @no_sep = true
110
+
109
111
  if @lir
110
112
  flush_lir_buffer
111
- @lir_rec_no = par
113
+ @lir_rec_no = param
112
114
  end
113
115
  when STR_CMD_EOL
114
116
  @no_sep = true
117
+
115
118
  unless @lir
116
- @file.puts unless @no_puts # unless @sep=="\n"
117
119
  inc('Anzahl Zeilen')
120
+ @file.puts unless @no_puts
118
121
  end
119
122
  when STR_CMD_EOF
120
123
  flush_lir_buffer if @lir
121
124
 
122
125
  unless stdout?(@filename)
126
+ add('Anzahl Bytes', @file.size)
123
127
  @file.close
124
- add('Anzahl Bytes', File.stat(@filename).size)
125
128
  end
126
129
  end
127
130
  end
128
131
 
129
132
  def process(obj)
130
- if @lir
131
- @lir_rec_buf << (obj.kind_of?(Token) ? obj.form : obj.to_s)
132
- else
133
- @file.print @sep unless @no_sep
134
- @no_sep=false if @no_sep
135
- if obj.is_a?(Word) || obj.is_a?(Token)
136
- @file.print obj.form
137
- else
138
- @file.print obj
139
- end
133
+ obj = obj.form if obj.is_a?(WordForm)
134
+
135
+ @lir ? @lir_rec_buf << obj : begin
136
+ @no_sep ? @no_sep = false : @file.print(@sep)
137
+ @file.print(obj)
140
138
  end
141
139
  end
142
140
 
@@ -144,12 +142,11 @@ class Lingo
144
142
 
145
143
  def flush_lir_buffer
146
144
  unless @lir_rec_no.empty? || @lir_rec_buf.empty?
147
- if @sep =~ /\n/
148
- @file.print '*', @lir_rec_no, "\n", @lir_rec_buf.join(@sep), "\n"
149
- else
150
- @file.print @lir_rec_no, '*', @lir_rec_buf.join(@sep), "\n"
151
- end
145
+ @file.print(*[@lir_rec_no, @lir_rec_buf.join(@sep), "\n"].tap { |buf|
146
+ @sep =~ /\n/ ? buf.insert(1, "\n").unshift('*') : buf.insert(1, '*')
147
+ })
152
148
  end
149
+
153
150
  @lir_rec_no = ''
154
151
  @lir_rec_buf.clear
155
152
  end
@@ -101,10 +101,6 @@ class Lingo
101
101
  protected
102
102
 
103
103
  def init
104
- # Regular Expressions für Token-Erkennung einlesen
105
- regulars = get_key('regulars', '')
106
- raise NoConfigKeyError.new(:regulars) unless regulars
107
-
108
104
  @space = get_key('space', false)
109
105
  @tags = get_key('tags', true)
110
106
  @wiki = get_key('wiki', true)
@@ -115,30 +111,22 @@ class Lingo
115
111
  @rules << ['WIKI', /^\[\[.+?\]\]/] unless @wiki
116
112
  @rules.unshift(['WIKI', /^=+.+=+$/]) unless @wiki
117
113
 
118
- # Mit _xxx_ gekennzeichnete Makros anwenden und Expressions ergänzen und umwandeln
119
- macros = {}
120
-
121
- regulars.each { |rule|
122
- name = rule.keys[0]
123
- expr = rule.values[0].gsub(/_(\w+?)_/) {
114
+ get_key('regulars', []).each_with_object({}) { |rule, macros|
115
+ expr = rule.values.first.gsub(/_(\w+?)_/) {
124
116
  macros[$&] || begin
125
117
  Database::Source.const_get("UTF8_#{$1.upcase}")
126
118
  rescue NameError
127
119
  end
128
120
  }
129
121
 
130
- if name =~ /^_\w+_$/ # is a macro
131
- macros[name] = expr if name =~ /^_\w+_$/
122
+ if (name = rule.keys.first) =~ /^_\w+_$/
123
+ macros[name] = expr
132
124
  else
133
- @rules << [name, Regexp.new('^'+expr)]
125
+ @rules << [name, /^#{expr}/]
134
126
  end
135
127
  }
136
128
 
137
- # Der Tokenizer gibt jedes Zeilenende als Information weiter, sofern es sich
138
- # nicht um die Verarbeitung einer LIR-Datei handelt. Im Falle einer normalen Datei
139
- # wird der Dateiname gespeichert und als Kennzeichen für die Erzeugung von
140
- # Zeilenende-Nachrichten herangezogen.
141
- @filename = nil
129
+ @filename = @cont = nil
142
130
  end
143
131
 
144
132
  def control(cmd, param)
@@ -154,12 +142,10 @@ class Lingo
154
142
  inc('Anzahl Zeilen')
155
143
 
156
144
  tokenize(obj) { |form, attr|
157
- token = Token.new(form, attr)
158
-
159
- inc('Anzahl Muster '+token.attr)
145
+ inc("Anzahl Muster #{attr}")
160
146
  inc('Anzahl Token')
161
147
 
162
- forward(token)
148
+ forward(Token.new(form, attr))
163
149
  }
164
150
 
165
151
  forward(STR_CMD_EOL, @filename) if @filename
@@ -175,7 +161,7 @@ class Lingo
175
161
  case @cont
176
162
  when 'HTML'
177
163
  if textline =~ /^[^<>]*>/
178
- yield $~[0], @cont
164
+ yield $&, @cont
179
165
  textline, @cont = $', nil
180
166
  else
181
167
  yield textline, @cont
@@ -183,7 +169,7 @@ class Lingo
183
169
  end
184
170
  when 'WIKI'
185
171
  if textline =~ /^[^\[\]]*\]\]/
186
- yield $~[0], @cont
172
+ yield $&, @cont
187
173
  textline, @cont = $', nil
188
174
  else
189
175
  yield textline, @cont
@@ -191,12 +177,12 @@ class Lingo
191
177
  end
192
178
  when nil
193
179
  if !@tags && textline =~ /<[^<>]*$/
194
- yield $~[0], @cont = 'HTML'
180
+ yield $&, @cont = 'HTML'
195
181
  textline = $`
196
182
  end
197
183
 
198
184
  if !@wiki && textline =~ /\[\[[^\[\]]*$/
199
- yield $~[0], @cont = 'WIKI'
185
+ yield $&, @cont = 'WIKI'
200
186
  textline = $`
201
187
  end
202
188
  end
@@ -204,7 +190,7 @@ class Lingo
204
190
  until textline.empty?
205
191
  @rules.each { |name, expr|
206
192
  if textline =~ expr
207
- yield $~[0], name if name != 'SPAC' || @space
193
+ yield $&, name if name != 'SPAC' || @space
208
194
  textline = $'
209
195
  break
210
196
  end
@@ -75,67 +75,39 @@ class Lingo
75
75
  protected
76
76
 
77
77
  def init
78
- # Parameter verarbeiten
79
- @marker = get_key('marker', '*')
80
- @max_var = get_key('max-var', '10000').to_i
81
- filter = get_array('check', WA_UNKNOWN)
78
+ @marker = get_key('marker', '*')
79
+ @max = get_key('max-var', max = 10000).to_i
80
+ @max = max unless @max > 0
81
+ @var = get_key('variations')
82
82
 
83
- # Daten verarbeiten
84
- @var_strings = get_key('variations')
85
- raise MissingConfigError.new(:variations) if @var_strings.empty?
83
+ raise MissingConfigError.new(:variations) if @var.empty?
86
84
 
87
- # Initialisierungen
88
85
  @check = Hash.new(false)
89
- filter.each { |s| @check[s.upcase] = true }
86
+ get_array('check', WA_UNKNOWN).each { |s| @check[s.upcase] = true }
90
87
 
91
88
  set_dic
92
89
  set_gra
93
-
94
- if @max_var.zero?
95
- @max_var = 10000
96
- @lingo.warn "#{self.class}: max-var is 0, setting to #{@max_var}"
97
- end
98
90
  end
99
91
 
100
- def control(cmd, par)
101
- # Status wird abgefragt
102
- if cmd == STR_CMD_STATUS
103
- # Eigenen Status um Status von Dictionary und Grammer erweitern
104
- @dic.report.each_pair { | k, v | set( k, v ) }
105
- @gra.report.each_pair { | k, v | set( k, v ) }
106
- end
92
+ def control(cmd, param)
93
+ report_on(cmd, @dic, @gra)
107
94
  end
108
95
 
109
96
  def process(obj)
110
- # Zu prüfende Wörter filtern
111
97
  if obj.is_a?(Word) && @check[obj.attr]
112
- # Statistik für Report
113
98
  inc('Anzahl gesuchter Wörter')
114
99
 
115
- # Erzeuge Variationen einer Wortform
116
- variations = [obj.form]
117
- @var_strings.each do |switch|
118
- from, to = switch
119
- variations = variate(variations, from, to)
120
- end
121
-
122
- # Prüfe Variation auf bekanntes Wort
123
- variations[0...@max_var].each do |var|
124
- # Variiertes Wort im Wörterbuch suchen
125
- word = @dic.find_word(var)
126
- word = @gra.find_compositum(var) if word.unknown?
127
- next if word.unknown? || (
128
- word.attr == WA_KOMPOSITUM && word.lexicals.any? { |lex|
129
- lex.attr[0..0] == LA_TAKEITASIS
100
+ @var.each_with_object([obj.form]) { |a, v| variate(v, *a) }.
101
+ tap { |v| v.slice!(@max..-1) }.each { |var|
102
+ next if (word = find_word(var)).unknown? || (
103
+ word.attr == WA_COMPOUND && word.lexicals.any? { |lex|
104
+ lex.attr.start_with?(LA_TAKEITASIS)
130
105
  }
131
106
  )
132
107
 
133
- # Das erste erkannte Wort beendet die Suche
134
108
  inc('Anzahl gefundener Wörter')
135
- word.form = @marker + var
136
- forward(word)
137
- return
138
- end
109
+ return forward(word.tap { word.form = @marker + var })
110
+ }
139
111
  end
140
112
 
141
113
  forward(obj)
@@ -146,32 +118,20 @@ class Lingo
146
118
  # Variiere die Bestandteile eines Arrays gemäß den Austauschvorgaben.
147
119
  #
148
120
  # variate( 'Tiieh', 'ieh', 'sch' ) => ['Tiieh', 'Tisch']
149
- def variate(variation_list, from, to)
150
- # neue Varianten sammeln
151
- add_variations = []
152
- from_re = Regexp.new(from)
153
-
154
- # alle Wörter in der variation_list permutieren
155
- variation_list.each do |wordform|
121
+ def variate(variations, from, to)
122
+ add, change, re = [], [from, to], Regexp.new(from)
156
123
 
157
- # Wortform in Teile zerlegen und anschließend Dimension feststellen
158
- wordpart = " #{wordform} ".split( from_re )
159
- n = wordpart.size - 1
124
+ variations.each { |form|
125
+ parts = " #{form} ".split(re)
160
126
 
161
- # Austauschketten in Matrix hinterlegen
162
- change = [from, to]
163
-
164
- # Austauschketten auf alle Teile anwenden
165
- (1..(2**n-1)).each do |i|
166
- variation = wordpart[0]
167
- # i[x] = Wert des x.ten Bit von Integer i
168
- (1..n).each { |j| variation += change[i[j-1]] + wordpart[j] }
169
-
170
- add_variations << variation.strip
171
- end
172
- end
127
+ 1.upto(2 ** (n = parts.size - 1) - 1) { |i|
128
+ var = parts.first
129
+ 1.upto(n) { |j| var += change[i[j - 1]] + parts[j] }
130
+ add << var.strip
131
+ }
132
+ }
173
133
 
174
- variation_list + add_variations
134
+ variations.concat(add)
175
135
  end
176
136
 
177
137
  end