lingo 1.8.4.2 → 1.8.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +413 -325
  3. data/README +380 -131
  4. data/Rakefile +19 -21
  5. data/de/lingo-abk.txt +15 -17
  6. data/de/lingo-dic.txt +20210 -20659
  7. data/de/lingo-mul.txt +5 -13
  8. data/de/lingo-syn.txt +5 -8
  9. data/de/test_dic.txt +2 -0
  10. data/de/test_gen.txt +8 -0
  11. data/de/{test_mul2.txt → test_mu2.txt} +0 -0
  12. data/de/{test_singleword.txt → test_sgw.txt} +0 -0
  13. data/de/user-dic.txt +5 -7
  14. data/de.lang +64 -49
  15. data/en/lingo-dic.txt +6398 -6404
  16. data/en/lingo-irr.txt +2 -3
  17. data/en/lingo-mul.txt +6 -7
  18. data/en/lingo-wdn.txt +881 -1762
  19. data/en/user-dic.txt +2 -5
  20. data/en.lang +39 -39
  21. data/lib/lingo/app.rb +10 -6
  22. data/lib/lingo/attendee/abbreviator.rb +1 -0
  23. data/lib/lingo/attendee/decomposer.rb +2 -1
  24. data/lib/lingo/attendee/multi_worder.rb +5 -6
  25. data/lib/lingo/attendee/stemmer.rb +1 -1
  26. data/lib/lingo/attendee/synonymer.rb +4 -2
  27. data/lib/lingo/attendee/text_reader.rb +77 -57
  28. data/lib/lingo/attendee/text_writer.rb +1 -1
  29. data/lib/lingo/attendee/tokenizer.rb +101 -50
  30. data/lib/lingo/attendee/variator.rb +2 -1
  31. data/lib/lingo/attendee/vector_filter.rb +28 -6
  32. data/lib/lingo/attendee/word_searcher.rb +2 -1
  33. data/lib/lingo/attendee.rb +8 -4
  34. data/lib/lingo/call.rb +7 -3
  35. data/lib/lingo/cli.rb +8 -16
  36. data/lib/lingo/config.rb +11 -6
  37. data/lib/lingo/ctl.rb +54 -3
  38. data/lib/lingo/database/crypter.rb +8 -14
  39. data/lib/lingo/database/hash_store.rb +1 -1
  40. data/lib/lingo/database/{show_progress.rb → progress.rb} +7 -8
  41. data/lib/lingo/database/source/key_value.rb +6 -5
  42. data/lib/lingo/database/source/multi_key.rb +5 -2
  43. data/lib/lingo/database/source/multi_value.rb +6 -4
  44. data/lib/lingo/database/source/single_word.rb +2 -3
  45. data/lib/lingo/database/source/word_class.rb +24 -5
  46. data/lib/lingo/database/source.rb +5 -3
  47. data/lib/lingo/database.rb +102 -41
  48. data/lib/lingo/error.rb +24 -2
  49. data/lib/lingo/language/dictionary.rb +26 -54
  50. data/lib/lingo/language/grammar.rb +19 -23
  51. data/lib/lingo/language/lexical.rb +5 -1
  52. data/lib/lingo/language/lexical_hash.rb +7 -12
  53. data/lib/lingo/language/token.rb +10 -1
  54. data/lib/lingo/language/word.rb +35 -23
  55. data/lib/lingo/language/word_form.rb +5 -4
  56. data/lib/lingo/{show_progress.rb → progress.rb} +43 -30
  57. data/lib/lingo/srv/lingosrv.cfg +1 -1
  58. data/lib/lingo/srv/public/.gitkeep +0 -0
  59. data/lib/lingo/srv.rb +11 -6
  60. data/lib/lingo/version.rb +2 -2
  61. data/lib/lingo/web/lingoweb.cfg +1 -1
  62. data/lib/lingo/web/views/index.erb +4 -4
  63. data/lib/lingo/web.rb +4 -6
  64. data/lib/lingo.rb +4 -12
  65. data/lingo.cfg +1 -1
  66. data/lir.cfg +1 -1
  67. data/ru/lingo-dic.txt +33473 -2113
  68. data/ru/lingo-mul.txt +8430 -1913
  69. data/ru/lingo-syn.txt +1634 -0
  70. data/ru/user-dic.txt +6 -0
  71. data/ru.lang +49 -47
  72. data/spec/spec_helper.rb +4 -0
  73. data/test/attendee/ts_decomposer.rb +2 -2
  74. data/test/attendee/ts_synonymer.rb +3 -3
  75. data/test/attendee/ts_tokenizer.rb +215 -2
  76. data/test/attendee/ts_variator.rb +2 -2
  77. data/test/attendee/ts_word_searcher.rb +10 -6
  78. data/test/ref/artikel.seq +2 -2
  79. data/test/ref/artikel.vec +5 -5
  80. data/test/ref/artikel.ven +11 -11
  81. data/test/ref/artikel.ver +11 -11
  82. data/test/ref/lir.seq +13 -13
  83. data/test/ref/lir.vec +31 -31
  84. data/test/test_helper.rb +19 -5
  85. data/test/ts_database.rb +206 -77
  86. data/test/ts_language.rb +86 -26
  87. metadata +93 -49
  88. data/.rspec +0 -1
  89. data/de/test_syn2.txt +0 -1
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2013 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -37,16 +37,17 @@ class Lingo
37
37
 
38
38
  class KeyValue < self
39
39
 
40
+ DEFAULT_SEPARATOR = '*'
41
+
40
42
  def initialize(id, lingo)
41
- super
42
- @pat = /^(#{@wrd})#{Regexp.escape(@sep ||= '*')}(#{@wrd})$/
43
+ super(id, lingo, Language::LA_UNKNOWN)
44
+ @pat = /^(#{@wrd})#{Regexp.escape(@sep ||= DEFAULT_SEPARATOR)}(#{@wrd})$/
43
45
  end
44
46
 
45
47
  private
46
48
 
47
49
  def convert_line(line, key, val)
48
- key, val = key.strip, val.strip
49
- [key, %W[#{val unless key == val}##{@def}]]
50
+ [key.strip, %W[#{val.strip}##{@def}]]
50
51
  end
51
52
 
52
53
  end
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2013 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -38,12 +38,15 @@ class Lingo
38
38
 
39
39
  class MultiKey < self
40
40
 
41
+ DEFAULT_SEPARATOR = ';'
42
+
41
43
  def initialize(id, lingo)
42
44
  super
43
- @pat = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/
45
+ @pat = /^#{@wrd}(?:#{Regexp.escape(@sep ||= DEFAULT_SEPARATOR)}#{@wrd})*$/
44
46
  end
45
47
 
46
48
  def set(db, key, val)
49
+ key += "##{@def}" if @def
47
50
  val.each { |v| db[v] = [key] }
48
51
  end
49
52
 
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2013 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -36,14 +36,16 @@ class Lingo
36
36
 
37
37
  class MultiValue < self
38
38
 
39
+ DEFAULT_SEPARATOR = ';'
40
+
39
41
  def initialize(id, lingo)
40
42
  super
41
- @pat, @idx = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/, -1
43
+ @pat = /^#{@wrd}(?:#{Regexp.escape(@sep ||= DEFAULT_SEPARATOR)}#{@wrd})*$/
42
44
  end
43
45
 
44
46
  def set(db, key, val)
45
- db[key = "#{IDX_REF}#{@idx += 1}"] = val
46
- val.each { |v| db[v] = [key] }
47
+ values = val.map { |v| @def ? "#{v}##{@def}" : v }
48
+ val.each { |v| db[v] = values }
47
49
  end
48
50
 
49
51
  private
@@ -37,16 +37,15 @@ class Lingo
37
37
  class SingleWord < self
38
38
 
39
39
  def initialize(id, lingo)
40
- super
40
+ super(id, lingo, Language::LA_NOUN)
41
41
  @pat = /^(#{@wrd})$/
42
- @def = @config.fetch('def-wc', 's').downcase
43
42
  @mul = @config.fetch('def-mul-wc', @def).downcase
44
43
  end
45
44
 
46
45
  private
47
46
 
48
47
  def convert_line(line, key, val)
49
- [key = key.strip, %W[##{key =~ /\s/ ? @mul : @def}]]
48
+ [k = key.strip, %W[#{k}##{k.include?(' ') ? @mul : @def}]]
50
49
  end
51
50
 
52
51
  end
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2013 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -36,17 +36,36 @@ class Lingo
36
36
 
37
37
  class WordClass < self
38
38
 
39
+ DEFAULT_SEPARATOR = ','
40
+
41
+ GENDER_SEPARATOR = '.'
42
+
39
43
  def initialize(id, lingo)
40
44
  super
41
- @pat = /^(#{@wrd})#{Regexp.escape(@sep ||= ',')}((?:#{@wrd}#\w)+)$/
45
+
46
+ gen = Regexp.escape(GENDER_SEPARATOR)
47
+ sep = Regexp.escape(@sep ||= DEFAULT_SEPARATOR)
48
+
49
+ w, a = '\w%1$s(?:\|\w%1$s)*', '[+]?'
50
+ wc = "##{w % a}(?:#{gen}#{w % ''})?"
51
+
52
+ @pat = /^(#{@wrd})#{sep}((?:#{@wrd}#{wc})+)$/
42
53
  end
43
54
 
44
55
  private
45
56
 
46
57
  def convert_line(line, key, val)
47
- [key = key.strip, val.strip.scan(/(\S.+?)\s*#(\w)/).map! { |v, c|
48
- "#{v unless key == v}##{c}"
49
- }]
58
+ values = []
59
+
60
+ val.strip.scan(/(\S.*?)\s*#(\S+)/) { |k, v|
61
+ v, f = v.split('.')
62
+
63
+ v.split('|').product(f ? f.split('|') : [nil]) { |w, g|
64
+ values << "#{k}##{w}##{g}"
65
+ }
66
+ }
67
+
68
+ [key.strip, values]
50
69
  end
51
70
 
52
71
  end
@@ -53,7 +53,7 @@ class Lingo
53
53
 
54
54
  attr_reader :pos
55
55
 
56
- def initialize(id, lingo)
56
+ def initialize(id, lingo, def_wc_default = nil)
57
57
  @config = lingo.database_config(id)
58
58
 
59
59
  source_file = Lingo.find(:dict, name = @config['name'], relax: true)
@@ -68,7 +68,8 @@ class Lingo
68
68
 
69
69
  raise SourceFileNotFoundError.new(name, id) unless @src.exist?
70
70
 
71
- @def = @config.fetch('def-wc', Language::LA_UNKNOWN).downcase
71
+ @def = @config.fetch('def-wc', def_wc_default)
72
+ @def = @def.downcase if @def
72
73
  @sep = @config['separator']
73
74
 
74
75
  @wrd = "(?:#{Language::Char::ANY})+"
@@ -87,7 +88,8 @@ class Lingo
87
88
  @src.each_line($/, encoding: ENC) { |line|
88
89
  @pos += length = line.bytesize
89
90
 
90
- next if line =~ /\A\s*#/ || line.strip.empty?
91
+ line.strip!
92
+ next if line.empty? || line.start_with?('#')
91
93
 
92
94
  line.chomp!
93
95
  line.replace(Unicode.downcase(line))
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -24,7 +24,7 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
- require_relative 'database/show_progress'
27
+ require_relative 'database/progress'
28
28
  require_relative 'database/crypter'
29
29
  require_relative 'database/source'
30
30
 
@@ -40,15 +40,9 @@ class Lingo
40
40
  class Database
41
41
 
42
42
  FLD_SEP = '|'
43
- IDX_REF = '^'
44
43
  KEY_REF = '*'
45
44
  SYS_KEY = '~'
46
45
 
47
- IDX_REF_ESC = Regexp.escape(IDX_REF)
48
- KEY_REF_ESC = Regexp.escape(KEY_REF)
49
-
50
- INDEX_PATTERN = %r{\A#{IDX_REF_ESC}\d+\z}
51
-
52
46
  BACKENDS = []
53
47
  BACKEND_BY_EXT = {}
54
48
 
@@ -68,13 +62,11 @@ class Lingo
68
62
 
69
63
  end
70
64
 
71
- attr_reader :backend
72
-
73
65
  def initialize(id, lingo)
74
66
  @id, @lingo, @config, @db = id, lingo, lingo.database_config(id), nil
75
67
 
76
- @srcfile = Lingo.find(:dict, @config['name'], relax: true)
77
- @crypter = @config.has_key?('crypt') && Crypter.new
68
+ @srcfile = Lingo.find(:dict, config['name'], relax: true)
69
+ @crypter = config.key?('crypt') && Crypter.new
78
70
 
79
71
  @val = Hash.new { |h, k| h[k] = [] }
80
72
 
@@ -93,6 +85,8 @@ class Lingo
93
85
  convert unless uptodate?
94
86
  end
95
87
 
88
+ attr_reader :lingo, :config, :backend
89
+
96
90
  def closed?
97
91
  !@db || _closed?
98
92
  end
@@ -125,18 +119,13 @@ class Lingo
125
119
 
126
120
  def [](key)
127
121
  val = _val(key) unless closed?
128
- return unless val
129
-
130
- # Äquvalenzklassen behandeln
131
- val.split(FLD_SEP).map { |v|
132
- v =~ INDEX_PATTERN ? _val(v) : v
133
- }.compact.join(FLD_SEP).split(FLD_SEP)
122
+ val.split(FLD_SEP) if val
134
123
  end
135
124
 
136
125
  def []=(key, val)
137
126
  return if closed?
138
127
 
139
- val = @val[key].concat(val).sort!
128
+ val = @val[key].concat(val)
140
129
  val.uniq!
141
130
 
142
131
  val = val.join(FLD_SEP)
@@ -144,7 +133,7 @@ class Lingo
144
133
  end
145
134
 
146
135
  def warn(*msg)
147
- @lingo.warn(*msg)
136
+ lingo.warn(*msg)
148
137
  end
149
138
 
150
139
  private
@@ -171,9 +160,24 @@ class Lingo
171
160
  get_backend(mod) or raise BackendNotAvailableError.new(mod, file)
172
161
  end
173
162
 
163
+ def config_hash
164
+ hashes = [config]
165
+
166
+ if use_lex = config['use-lex']
167
+ hashes.concat(lingo.
168
+ dictionary_config['databases'].
169
+ values_at(*use_lex.split(SEP_RE)))
170
+ end
171
+
172
+ Crypter.digest(hashes.inspect)
173
+ end
174
+
174
175
  def uptodate?(file = @stofile)
175
176
  src = Pathname.new(@srcfile)
176
- @source_key = lambda { [src.size, src.mtime].join(FLD_SEP) }
177
+
178
+ @source_key = lambda {
179
+ [src.size, src.mtime, VERSION, config_hash].join(FLD_SEP)
180
+ }
177
181
 
178
182
  sys_key = open { @db[SYS_KEY] } if File.exist?(file)
179
183
  sys_key && (!src.exist? || sys_key == @source_key.call)
@@ -217,7 +221,7 @@ class Lingo
217
221
  end
218
222
 
219
223
  def _val(key)
220
- if val = _get(@crypter ? @crypter.digest(key) : key)
224
+ if val = _get(@crypter ? Crypter.digest(key) : key)
221
225
  _encode!(val)
222
226
  @crypter ? @crypter.decode(key, val) : val
223
227
  end
@@ -227,33 +231,25 @@ class Lingo
227
231
  str.force_encoding(ENC)
228
232
  end
229
233
 
230
- def convert(verbose = @lingo.config.stderr.tty?)
231
- src = Source.get(@config.fetch('txt-format', 'key_value'), @id, @lingo)
232
-
233
- if lex = @config['use-lex']
234
- a = [{ 'source' => lex.split(SEP_RE), 'mode' => @config['lex-mode'] }, @lingo]
235
- d, g = Language::Dictionary.new(*a), Language::Grammar.new(*a); a = nil
234
+ def convert(verbose = lingo.config.stderr.tty?)
235
+ src = Source.get(config.fetch('txt-format', 'key_value'), @id, lingo)
236
236
 
237
- sep, block = ' ', lambda { |f|
238
- (r = d.find_word(f)).unknown? &&
239
- (c = (r = g.find_compound(f)).compo_form) ? c.form : r.norm
240
- }
241
- end
237
+ sep, key_map, val_map = prepare_lex
242
238
 
243
- ShowProgress.new(self, src, verbose) { |progress| create {
239
+ Progress.new(self, src, verbose) { |progress| create {
244
240
  src.each { |key, val|
245
- progress[src.pos]
241
+ progress << src.pos
246
242
 
247
243
  if key
248
244
  key.chomp!('.')
249
245
 
250
- if lex && key.include?(sep)
251
- k = key.split(sep).map!(&block).join(sep)
246
+ if sep && key.include?(sep)
247
+ key = key.split(sep).map!(&key_map).join(sep)
248
+ val = val.map { |v| val_map[v.split(sep)].join(sep) } if val_map
252
249
 
253
- c = k.count(sep) + 1
254
- self[k.split(sep)[0, 3].join(sep)] = ["#{KEY_REF}#{c}"] if c > 3
255
-
256
- key, val = k, val.map { |v| v.start_with?('#') ? key + v : v }
250
+ if (cnt = key.count(sep)) > 2
251
+ self[key.split(sep)[0, 3].join(sep)] = ["#{KEY_REF}#{cnt + 1}"]
252
+ end
257
253
  end
258
254
  end
259
255
 
@@ -264,6 +260,71 @@ class Lingo
264
260
  } }
265
261
  end
266
262
 
263
+ def prepare_lex
264
+ use_lex = config['use-lex'] or return
265
+
266
+ args = [{
267
+ 'source' => use_lex.split(SEP_RE),
268
+ 'mode' => config['lex-mode']
269
+ }, lingo]
270
+
271
+ dic = Language::Dictionary.new(*args)
272
+ gra = Language::Grammar.new(*args)
273
+
274
+ args = nil
275
+
276
+ if inflect = config['inflect']
277
+ inflect, wc = inflect == true ? %w[s e] : inflect.split(SEP_RE), 'a'
278
+
279
+ if cfg = lingo.dictionary_config['inflect'] and suffixes = cfg[wc]
280
+ wc, re = /#{wc}/, /\A[^#]+/
281
+ else
282
+ warn "#{self.class}: No suffixes to inflect ##{wc}: #{@id}"
283
+ inflect = false
284
+ end
285
+ end
286
+
287
+ [' ', lambda { |form|
288
+ word = dic.find_word(form)
289
+
290
+ if word.unknown?
291
+ compo = gra.find_compound(form)
292
+
293
+ if compo_form = compo.compo_form
294
+ compo_form.form
295
+ else
296
+ compo.norm
297
+ end
298
+ else
299
+ word.norm
300
+ end
301
+ }, inflect && lambda { |forms|
302
+ inflectables = []
303
+
304
+ forms.each { |form|
305
+ word = dic.find_word(word_form = form[re])
306
+
307
+ if word.identified? and lexical = word.get_class(wc).first
308
+ inflectables << form if form == lexical.form
309
+ else
310
+ unless inflectables.empty?
311
+ comp = gra.find_compound(word_form) if word.unknown?
312
+ word = comp.head || comp if comp && !comp.unknown?
313
+
314
+ if word.attr?(*inflect)
315
+ suffix = suffixes[word.genders.compact.first]
316
+ inflectables.each { |lex_form| lex_form << suffix } if suffix
317
+ end
318
+ end
319
+
320
+ break
321
+ end
322
+ }
323
+
324
+ forms
325
+ }]
326
+ end
327
+
267
328
  end
268
329
 
269
330
  end
data/lib/lingo/error.rb CHANGED
@@ -89,7 +89,7 @@ class Lingo
89
89
  end
90
90
 
91
91
  def to_s
92
- error("An error occured when trying to #{action} `#{file}'")
92
+ error("An error occured while trying to #{action} `#{file}'")
93
93
  end
94
94
 
95
95
  end
@@ -194,7 +194,29 @@ class Lingo
194
194
  end
195
195
 
196
196
  def to_s
197
- error("#{class_name}: An error occured while trying to load '#{lib}'")
197
+ error("#{class_name}: An error occured while trying to load `#{lib}'")
198
+ end
199
+
200
+ end
201
+
202
+ class TokenizeError < LingoError
203
+
204
+ attr_reader :line, :file, :num, :err
205
+
206
+ def initialize(line, file, num, err)
207
+ @line, @file, @num, @err = line, file, num, err
208
+ end
209
+
210
+ def to_s
211
+ line, file = self.line, self.file
212
+
213
+ if line.is_a?(String) && line.length > 48
214
+ line = line[0, 45] + '...'
215
+ end
216
+
217
+ file &&= "#{file}:#{num}: "
218
+
219
+ error("An error occured while trying to tokenize #{file}#{line.inspect}")
198
220
  end
199
221
 
200
222
  end
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -30,8 +30,6 @@ class Lingo
30
30
 
31
31
  class Dictionary
32
32
 
33
- KEY_REF_RE = %r{\A#{Database::KEY_REF_ESC}\d+}
34
-
35
33
  def self.open(*args)
36
34
  yield dictionary = new(*args)
37
35
  ensure
@@ -39,7 +37,7 @@ class Lingo
39
37
  end
40
38
 
41
39
  def initialize(config, lingo)
42
- unless config.has_key?('source')
40
+ unless config.key?('source')
43
41
  raise ArgumentError, "Required parameter `source' missing."
44
42
  end
45
43
 
@@ -70,22 +68,18 @@ class Lingo
70
68
  #
71
69
  # Erstellt aus dem String ein Wort und sucht nach diesem im Wörterbuch.
72
70
  def find_word(str)
73
- (@_word ||= {})[str] ||= Word.new(str, WA_UNKNOWN).tap { |w|
74
- unless (lexicals = select_with_suffix(str)).empty?
75
- w.lexicals = lexicals
76
- w.attr = WA_IDENTIFIED
77
- end
78
- }
71
+ (@_word ||= {})[str] ||=
72
+ Word.new(str, WA_UNKNOWN).identify(select_with_suffix(str))
79
73
  end
80
74
 
81
- def find_synonyms(obj, syn = [])
75
+ def find_synonyms(obj, syn = [], com = true)
82
76
  lex = obj.lexicals
83
77
  lex = [obj] if lex.empty? && obj.unknown?
84
78
 
85
- com, ref = obj.attr == WA_COMPOUND, KEY_REF_RE
79
+ com &&= obj.attr == WA_COMPOUND
86
80
 
87
81
  lex.each { |l|
88
- select(l.form, syn) { |i| i =~ ref } unless com &&
82
+ select(l.form, syn) unless com &&
89
83
  l.attr != LA_COMPOUND || l.attr == LA_SYNONYM
90
84
  }
91
85
 
@@ -97,14 +91,11 @@ class Lingo
97
91
  # Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+)
98
92
  def select(str, lex = [])
99
93
  @src.each { |src|
100
- l = src[str] or next
101
- lex.concat(block_given? ? l.delete_if { |i| yield i } : l)
94
+ lex.concat(src[str] || next)
102
95
  break unless @all
103
96
  }
104
97
 
105
- lex.sort!
106
- lex.uniq!
107
-
98
+ lex.empty? && block_given? ? yield(lex) : lex.uniq!
108
99
  lex
109
100
  end
110
101
 
@@ -113,7 +104,17 @@ class Lingo
113
104
  # Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+).
114
105
  # Sucht dabei auch Wörter, die um wortklassenspezifische Suffixe bereinigt wurden.
115
106
  def select_with_suffix(str)
116
- select_with_affix(:suffix, str)
107
+ select(str) { |lex|
108
+ each_affix(str) { |form, attr|
109
+ unless (selected = select(form)).empty?
110
+ if selected.first.attr == LA_COMPOUND
111
+ lex.concat(selected) if selected.last.attr?(attr)
112
+ else
113
+ selected.each { |l| lex << l if l.attr?(attr) }
114
+ end
115
+ end
116
+ }
117
+ }
117
118
  end
118
119
 
119
120
  # _dic_.select_with_infix( _aString_ ) -> _ArrayOfLexicals_
@@ -121,44 +122,15 @@ class Lingo
121
122
  # Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+).
122
123
  # Sucht dabei auch Wörter, die eine Fugung am Ende haben.
123
124
  def select_with_infix(str)
124
- select_with_affix(:infix, str)
125
- end
126
-
127
- # _dic_.suffix_lexicals( _aString_ ) -> _ArrayOfLexicals_
128
- #
129
- # Gibt alle möglichen Lexicals zurück, die von der Endung her auf den String anwendbar sind:
130
- #
131
- # dic.suffix_lexicals("Hasens") -> [(hasen/s), (hasen/e), (has/e)]
132
- def suffix_lexicals(str)
133
- affix_lexicals(:suffix, str)
134
- end
135
-
136
- # _dic_.gap_lexicals( _aString_ ) -> _ArrayOfLexicals_
137
- #
138
- # Gibt alle möglichen Lexicals zurück, die von der Endung her auf den String anwendbar sind:
139
- def infix_lexicals(str)
140
- affix_lexicals(:infix, str)
141
- end
142
-
143
- private
144
-
145
- def select_with_affix(affix, str)
146
- lex = select(str)
147
-
148
- affix_lexicals(affix, str).each { |a| select(a.form, lex) { |b|
149
- affix == :suffix && a.attr != b.attr
150
- } } if lex.empty?
151
-
152
- lex
125
+ select(str) { |lex|
126
+ each_affix(str, :infix) { |form, _| select(form, lex) }
127
+ }
153
128
  end
154
129
 
155
- def affix_lexicals(affix, str)
156
- lex = instance_variable_get("@#{affix}es").map { |r, e, t|
157
- Lexical.new("#{$`}#{e == '*' ? '' : e}#{$'}", t) if str =~ r
130
+ def each_affix(str, affix = :suffix)
131
+ instance_variable_get("@#{affix}es").each { |r, e, t|
132
+ yield "#{$`}#{e == '*' ? '' : e}#{$'}", t if str =~ r
158
133
  }
159
-
160
- lex.compact!
161
- lex
162
134
  end
163
135
 
164
136
  end