lingo 1.8.4.2 → 1.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +413 -325
  3. data/README +380 -131
  4. data/Rakefile +19 -21
  5. data/de/lingo-abk.txt +15 -17
  6. data/de/lingo-dic.txt +20210 -20659
  7. data/de/lingo-mul.txt +5 -13
  8. data/de/lingo-syn.txt +5 -8
  9. data/de/test_dic.txt +2 -0
  10. data/de/test_gen.txt +8 -0
  11. data/de/{test_mul2.txt → test_mu2.txt} +0 -0
  12. data/de/{test_singleword.txt → test_sgw.txt} +0 -0
  13. data/de/user-dic.txt +5 -7
  14. data/de.lang +64 -49
  15. data/en/lingo-dic.txt +6398 -6404
  16. data/en/lingo-irr.txt +2 -3
  17. data/en/lingo-mul.txt +6 -7
  18. data/en/lingo-wdn.txt +881 -1762
  19. data/en/user-dic.txt +2 -5
  20. data/en.lang +39 -39
  21. data/lib/lingo/app.rb +10 -6
  22. data/lib/lingo/attendee/abbreviator.rb +1 -0
  23. data/lib/lingo/attendee/decomposer.rb +2 -1
  24. data/lib/lingo/attendee/multi_worder.rb +5 -6
  25. data/lib/lingo/attendee/stemmer.rb +1 -1
  26. data/lib/lingo/attendee/synonymer.rb +4 -2
  27. data/lib/lingo/attendee/text_reader.rb +77 -57
  28. data/lib/lingo/attendee/text_writer.rb +1 -1
  29. data/lib/lingo/attendee/tokenizer.rb +101 -50
  30. data/lib/lingo/attendee/variator.rb +2 -1
  31. data/lib/lingo/attendee/vector_filter.rb +28 -6
  32. data/lib/lingo/attendee/word_searcher.rb +2 -1
  33. data/lib/lingo/attendee.rb +8 -4
  34. data/lib/lingo/call.rb +7 -3
  35. data/lib/lingo/cli.rb +8 -16
  36. data/lib/lingo/config.rb +11 -6
  37. data/lib/lingo/ctl.rb +54 -3
  38. data/lib/lingo/database/crypter.rb +8 -14
  39. data/lib/lingo/database/hash_store.rb +1 -1
  40. data/lib/lingo/database/{show_progress.rb → progress.rb} +7 -8
  41. data/lib/lingo/database/source/key_value.rb +6 -5
  42. data/lib/lingo/database/source/multi_key.rb +5 -2
  43. data/lib/lingo/database/source/multi_value.rb +6 -4
  44. data/lib/lingo/database/source/single_word.rb +2 -3
  45. data/lib/lingo/database/source/word_class.rb +24 -5
  46. data/lib/lingo/database/source.rb +5 -3
  47. data/lib/lingo/database.rb +102 -41
  48. data/lib/lingo/error.rb +24 -2
  49. data/lib/lingo/language/dictionary.rb +26 -54
  50. data/lib/lingo/language/grammar.rb +19 -23
  51. data/lib/lingo/language/lexical.rb +5 -1
  52. data/lib/lingo/language/lexical_hash.rb +7 -12
  53. data/lib/lingo/language/token.rb +10 -1
  54. data/lib/lingo/language/word.rb +35 -23
  55. data/lib/lingo/language/word_form.rb +5 -4
  56. data/lib/lingo/{show_progress.rb → progress.rb} +43 -30
  57. data/lib/lingo/srv/lingosrv.cfg +1 -1
  58. data/lib/lingo/srv/public/.gitkeep +0 -0
  59. data/lib/lingo/srv.rb +11 -6
  60. data/lib/lingo/version.rb +2 -2
  61. data/lib/lingo/web/lingoweb.cfg +1 -1
  62. data/lib/lingo/web/views/index.erb +4 -4
  63. data/lib/lingo/web.rb +4 -6
  64. data/lib/lingo.rb +4 -12
  65. data/lingo.cfg +1 -1
  66. data/lir.cfg +1 -1
  67. data/ru/lingo-dic.txt +33473 -2113
  68. data/ru/lingo-mul.txt +8430 -1913
  69. data/ru/lingo-syn.txt +1634 -0
  70. data/ru/user-dic.txt +6 -0
  71. data/ru.lang +49 -47
  72. data/spec/spec_helper.rb +4 -0
  73. data/test/attendee/ts_decomposer.rb +2 -2
  74. data/test/attendee/ts_synonymer.rb +3 -3
  75. data/test/attendee/ts_tokenizer.rb +215 -2
  76. data/test/attendee/ts_variator.rb +2 -2
  77. data/test/attendee/ts_word_searcher.rb +10 -6
  78. data/test/ref/artikel.seq +2 -2
  79. data/test/ref/artikel.vec +5 -5
  80. data/test/ref/artikel.ven +11 -11
  81. data/test/ref/artikel.ver +11 -11
  82. data/test/ref/lir.seq +13 -13
  83. data/test/ref/lir.vec +31 -31
  84. data/test/test_helper.rb +19 -5
  85. data/test/ts_database.rb +206 -77
  86. data/test/ts_language.rb +86 -26
  87. metadata +93 -49
  88. data/.rspec +0 -1
  89. data/de/test_syn2.txt +0 -1
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2013 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -37,16 +37,17 @@ class Lingo
37
37
 
38
38
  class KeyValue < self
39
39
 
40
+ DEFAULT_SEPARATOR = '*'
41
+
40
42
  def initialize(id, lingo)
41
- super
42
- @pat = /^(#{@wrd})#{Regexp.escape(@sep ||= '*')}(#{@wrd})$/
43
+ super(id, lingo, Language::LA_UNKNOWN)
44
+ @pat = /^(#{@wrd})#{Regexp.escape(@sep ||= DEFAULT_SEPARATOR)}(#{@wrd})$/
43
45
  end
44
46
 
45
47
  private
46
48
 
47
49
  def convert_line(line, key, val)
48
- key, val = key.strip, val.strip
49
- [key, %W[#{val unless key == val}##{@def}]]
50
+ [key.strip, %W[#{val.strip}##{@def}]]
50
51
  end
51
52
 
52
53
  end
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2013 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -38,12 +38,15 @@ class Lingo
38
38
 
39
39
  class MultiKey < self
40
40
 
41
+ DEFAULT_SEPARATOR = ';'
42
+
41
43
  def initialize(id, lingo)
42
44
  super
43
- @pat = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/
45
+ @pat = /^#{@wrd}(?:#{Regexp.escape(@sep ||= DEFAULT_SEPARATOR)}#{@wrd})*$/
44
46
  end
45
47
 
46
48
  def set(db, key, val)
49
+ key += "##{@def}" if @def
47
50
  val.each { |v| db[v] = [key] }
48
51
  end
49
52
 
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2013 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -36,14 +36,16 @@ class Lingo
36
36
 
37
37
  class MultiValue < self
38
38
 
39
+ DEFAULT_SEPARATOR = ';'
40
+
39
41
  def initialize(id, lingo)
40
42
  super
41
- @pat, @idx = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/, -1
43
+ @pat = /^#{@wrd}(?:#{Regexp.escape(@sep ||= DEFAULT_SEPARATOR)}#{@wrd})*$/
42
44
  end
43
45
 
44
46
  def set(db, key, val)
45
- db[key = "#{IDX_REF}#{@idx += 1}"] = val
46
- val.each { |v| db[v] = [key] }
47
+ values = val.map { |v| @def ? "#{v}##{@def}" : v }
48
+ val.each { |v| db[v] = values }
47
49
  end
48
50
 
49
51
  private
@@ -37,16 +37,15 @@ class Lingo
37
37
  class SingleWord < self
38
38
 
39
39
  def initialize(id, lingo)
40
- super
40
+ super(id, lingo, Language::LA_NOUN)
41
41
  @pat = /^(#{@wrd})$/
42
- @def = @config.fetch('def-wc', 's').downcase
43
42
  @mul = @config.fetch('def-mul-wc', @def).downcase
44
43
  end
45
44
 
46
45
  private
47
46
 
48
47
  def convert_line(line, key, val)
49
- [key = key.strip, %W[##{key =~ /\s/ ? @mul : @def}]]
48
+ [k = key.strip, %W[#{k}##{k.include?(' ') ? @mul : @def}]]
50
49
  end
51
50
 
52
51
  end
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2013 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -36,17 +36,36 @@ class Lingo
36
36
 
37
37
  class WordClass < self
38
38
 
39
+ DEFAULT_SEPARATOR = ','
40
+
41
+ GENDER_SEPARATOR = '.'
42
+
39
43
  def initialize(id, lingo)
40
44
  super
41
- @pat = /^(#{@wrd})#{Regexp.escape(@sep ||= ',')}((?:#{@wrd}#\w)+)$/
45
+
46
+ gen = Regexp.escape(GENDER_SEPARATOR)
47
+ sep = Regexp.escape(@sep ||= DEFAULT_SEPARATOR)
48
+
49
+ w, a = '\w%1$s(?:\|\w%1$s)*', '[+]?'
50
+ wc = "##{w % a}(?:#{gen}#{w % ''})?"
51
+
52
+ @pat = /^(#{@wrd})#{sep}((?:#{@wrd}#{wc})+)$/
42
53
  end
43
54
 
44
55
  private
45
56
 
46
57
  def convert_line(line, key, val)
47
- [key = key.strip, val.strip.scan(/(\S.+?)\s*#(\w)/).map! { |v, c|
48
- "#{v unless key == v}##{c}"
49
- }]
58
+ values = []
59
+
60
+ val.strip.scan(/(\S.*?)\s*#(\S+)/) { |k, v|
61
+ v, f = v.split('.')
62
+
63
+ v.split('|').product(f ? f.split('|') : [nil]) { |w, g|
64
+ values << "#{k}##{w}##{g}"
65
+ }
66
+ }
67
+
68
+ [key.strip, values]
50
69
  end
51
70
 
52
71
  end
@@ -53,7 +53,7 @@ class Lingo
53
53
 
54
54
  attr_reader :pos
55
55
 
56
- def initialize(id, lingo)
56
+ def initialize(id, lingo, def_wc_default = nil)
57
57
  @config = lingo.database_config(id)
58
58
 
59
59
  source_file = Lingo.find(:dict, name = @config['name'], relax: true)
@@ -68,7 +68,8 @@ class Lingo
68
68
 
69
69
  raise SourceFileNotFoundError.new(name, id) unless @src.exist?
70
70
 
71
- @def = @config.fetch('def-wc', Language::LA_UNKNOWN).downcase
71
+ @def = @config.fetch('def-wc', def_wc_default)
72
+ @def = @def.downcase if @def
72
73
  @sep = @config['separator']
73
74
 
74
75
  @wrd = "(?:#{Language::Char::ANY})+"
@@ -87,7 +88,8 @@ class Lingo
87
88
  @src.each_line($/, encoding: ENC) { |line|
88
89
  @pos += length = line.bytesize
89
90
 
90
- next if line =~ /\A\s*#/ || line.strip.empty?
91
+ line.strip!
92
+ next if line.empty? || line.start_with?('#')
91
93
 
92
94
  line.chomp!
93
95
  line.replace(Unicode.downcase(line))
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -24,7 +24,7 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
- require_relative 'database/show_progress'
27
+ require_relative 'database/progress'
28
28
  require_relative 'database/crypter'
29
29
  require_relative 'database/source'
30
30
 
@@ -40,15 +40,9 @@ class Lingo
40
40
  class Database
41
41
 
42
42
  FLD_SEP = '|'
43
- IDX_REF = '^'
44
43
  KEY_REF = '*'
45
44
  SYS_KEY = '~'
46
45
 
47
- IDX_REF_ESC = Regexp.escape(IDX_REF)
48
- KEY_REF_ESC = Regexp.escape(KEY_REF)
49
-
50
- INDEX_PATTERN = %r{\A#{IDX_REF_ESC}\d+\z}
51
-
52
46
  BACKENDS = []
53
47
  BACKEND_BY_EXT = {}
54
48
 
@@ -68,13 +62,11 @@ class Lingo
68
62
 
69
63
  end
70
64
 
71
- attr_reader :backend
72
-
73
65
  def initialize(id, lingo)
74
66
  @id, @lingo, @config, @db = id, lingo, lingo.database_config(id), nil
75
67
 
76
- @srcfile = Lingo.find(:dict, @config['name'], relax: true)
77
- @crypter = @config.has_key?('crypt') && Crypter.new
68
+ @srcfile = Lingo.find(:dict, config['name'], relax: true)
69
+ @crypter = config.key?('crypt') && Crypter.new
78
70
 
79
71
  @val = Hash.new { |h, k| h[k] = [] }
80
72
 
@@ -93,6 +85,8 @@ class Lingo
93
85
  convert unless uptodate?
94
86
  end
95
87
 
88
+ attr_reader :lingo, :config, :backend
89
+
96
90
  def closed?
97
91
  !@db || _closed?
98
92
  end
@@ -125,18 +119,13 @@ class Lingo
125
119
 
126
120
  def [](key)
127
121
  val = _val(key) unless closed?
128
- return unless val
129
-
130
- # Äquvalenzklassen behandeln
131
- val.split(FLD_SEP).map { |v|
132
- v =~ INDEX_PATTERN ? _val(v) : v
133
- }.compact.join(FLD_SEP).split(FLD_SEP)
122
+ val.split(FLD_SEP) if val
134
123
  end
135
124
 
136
125
  def []=(key, val)
137
126
  return if closed?
138
127
 
139
- val = @val[key].concat(val).sort!
128
+ val = @val[key].concat(val)
140
129
  val.uniq!
141
130
 
142
131
  val = val.join(FLD_SEP)
@@ -144,7 +133,7 @@ class Lingo
144
133
  end
145
134
 
146
135
  def warn(*msg)
147
- @lingo.warn(*msg)
136
+ lingo.warn(*msg)
148
137
  end
149
138
 
150
139
  private
@@ -171,9 +160,24 @@ class Lingo
171
160
  get_backend(mod) or raise BackendNotAvailableError.new(mod, file)
172
161
  end
173
162
 
163
+ def config_hash
164
+ hashes = [config]
165
+
166
+ if use_lex = config['use-lex']
167
+ hashes.concat(lingo.
168
+ dictionary_config['databases'].
169
+ values_at(*use_lex.split(SEP_RE)))
170
+ end
171
+
172
+ Crypter.digest(hashes.inspect)
173
+ end
174
+
174
175
  def uptodate?(file = @stofile)
175
176
  src = Pathname.new(@srcfile)
176
- @source_key = lambda { [src.size, src.mtime].join(FLD_SEP) }
177
+
178
+ @source_key = lambda {
179
+ [src.size, src.mtime, VERSION, config_hash].join(FLD_SEP)
180
+ }
177
181
 
178
182
  sys_key = open { @db[SYS_KEY] } if File.exist?(file)
179
183
  sys_key && (!src.exist? || sys_key == @source_key.call)
@@ -217,7 +221,7 @@ class Lingo
217
221
  end
218
222
 
219
223
  def _val(key)
220
- if val = _get(@crypter ? @crypter.digest(key) : key)
224
+ if val = _get(@crypter ? Crypter.digest(key) : key)
221
225
  _encode!(val)
222
226
  @crypter ? @crypter.decode(key, val) : val
223
227
  end
@@ -227,33 +231,25 @@ class Lingo
227
231
  str.force_encoding(ENC)
228
232
  end
229
233
 
230
- def convert(verbose = @lingo.config.stderr.tty?)
231
- src = Source.get(@config.fetch('txt-format', 'key_value'), @id, @lingo)
232
-
233
- if lex = @config['use-lex']
234
- a = [{ 'source' => lex.split(SEP_RE), 'mode' => @config['lex-mode'] }, @lingo]
235
- d, g = Language::Dictionary.new(*a), Language::Grammar.new(*a); a = nil
234
+ def convert(verbose = lingo.config.stderr.tty?)
235
+ src = Source.get(config.fetch('txt-format', 'key_value'), @id, lingo)
236
236
 
237
- sep, block = ' ', lambda { |f|
238
- (r = d.find_word(f)).unknown? &&
239
- (c = (r = g.find_compound(f)).compo_form) ? c.form : r.norm
240
- }
241
- end
237
+ sep, key_map, val_map = prepare_lex
242
238
 
243
- ShowProgress.new(self, src, verbose) { |progress| create {
239
+ Progress.new(self, src, verbose) { |progress| create {
244
240
  src.each { |key, val|
245
- progress[src.pos]
241
+ progress << src.pos
246
242
 
247
243
  if key
248
244
  key.chomp!('.')
249
245
 
250
- if lex && key.include?(sep)
251
- k = key.split(sep).map!(&block).join(sep)
246
+ if sep && key.include?(sep)
247
+ key = key.split(sep).map!(&key_map).join(sep)
248
+ val = val.map { |v| val_map[v.split(sep)].join(sep) } if val_map
252
249
 
253
- c = k.count(sep) + 1
254
- self[k.split(sep)[0, 3].join(sep)] = ["#{KEY_REF}#{c}"] if c > 3
255
-
256
- key, val = k, val.map { |v| v.start_with?('#') ? key + v : v }
250
+ if (cnt = key.count(sep)) > 2
251
+ self[key.split(sep)[0, 3].join(sep)] = ["#{KEY_REF}#{cnt + 1}"]
252
+ end
257
253
  end
258
254
  end
259
255
 
@@ -264,6 +260,71 @@ class Lingo
264
260
  } }
265
261
  end
266
262
 
263
+ def prepare_lex
264
+ use_lex = config['use-lex'] or return
265
+
266
+ args = [{
267
+ 'source' => use_lex.split(SEP_RE),
268
+ 'mode' => config['lex-mode']
269
+ }, lingo]
270
+
271
+ dic = Language::Dictionary.new(*args)
272
+ gra = Language::Grammar.new(*args)
273
+
274
+ args = nil
275
+
276
+ if inflect = config['inflect']
277
+ inflect, wc = inflect == true ? %w[s e] : inflect.split(SEP_RE), 'a'
278
+
279
+ if cfg = lingo.dictionary_config['inflect'] and suffixes = cfg[wc]
280
+ wc, re = /#{wc}/, /\A[^#]+/
281
+ else
282
+ warn "#{self.class}: No suffixes to inflect ##{wc}: #{@id}"
283
+ inflect = false
284
+ end
285
+ end
286
+
287
+ [' ', lambda { |form|
288
+ word = dic.find_word(form)
289
+
290
+ if word.unknown?
291
+ compo = gra.find_compound(form)
292
+
293
+ if compo_form = compo.compo_form
294
+ compo_form.form
295
+ else
296
+ compo.norm
297
+ end
298
+ else
299
+ word.norm
300
+ end
301
+ }, inflect && lambda { |forms|
302
+ inflectables = []
303
+
304
+ forms.each { |form|
305
+ word = dic.find_word(word_form = form[re])
306
+
307
+ if word.identified? and lexical = word.get_class(wc).first
308
+ inflectables << form if form == lexical.form
309
+ else
310
+ unless inflectables.empty?
311
+ comp = gra.find_compound(word_form) if word.unknown?
312
+ word = comp.head || comp if comp && !comp.unknown?
313
+
314
+ if word.attr?(*inflect)
315
+ suffix = suffixes[word.genders.compact.first]
316
+ inflectables.each { |lex_form| lex_form << suffix } if suffix
317
+ end
318
+ end
319
+
320
+ break
321
+ end
322
+ }
323
+
324
+ forms
325
+ }]
326
+ end
327
+
267
328
  end
268
329
 
269
330
  end
data/lib/lingo/error.rb CHANGED
@@ -89,7 +89,7 @@ class Lingo
89
89
  end
90
90
 
91
91
  def to_s
92
- error("An error occured when trying to #{action} `#{file}'")
92
+ error("An error occured while trying to #{action} `#{file}'")
93
93
  end
94
94
 
95
95
  end
@@ -194,7 +194,29 @@ class Lingo
194
194
  end
195
195
 
196
196
  def to_s
197
- error("#{class_name}: An error occured while trying to load '#{lib}'")
197
+ error("#{class_name}: An error occured while trying to load `#{lib}'")
198
+ end
199
+
200
+ end
201
+
202
+ class TokenizeError < LingoError
203
+
204
+ attr_reader :line, :file, :num, :err
205
+
206
+ def initialize(line, file, num, err)
207
+ @line, @file, @num, @err = line, file, num, err
208
+ end
209
+
210
+ def to_s
211
+ line, file = self.line, self.file
212
+
213
+ if line.is_a?(String) && line.length > 48
214
+ line = line[0, 45] + '...'
215
+ end
216
+
217
+ file &&= "#{file}:#{num}: "
218
+
219
+ error("An error occured while trying to tokenize #{file}#{line.inspect}")
198
220
  end
199
221
 
200
222
  end
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -30,8 +30,6 @@ class Lingo
30
30
 
31
31
  class Dictionary
32
32
 
33
- KEY_REF_RE = %r{\A#{Database::KEY_REF_ESC}\d+}
34
-
35
33
  def self.open(*args)
36
34
  yield dictionary = new(*args)
37
35
  ensure
@@ -39,7 +37,7 @@ class Lingo
39
37
  end
40
38
 
41
39
  def initialize(config, lingo)
42
- unless config.has_key?('source')
40
+ unless config.key?('source')
43
41
  raise ArgumentError, "Required parameter `source' missing."
44
42
  end
45
43
 
@@ -70,22 +68,18 @@ class Lingo
70
68
  #
71
69
  # Erstellt aus dem String ein Wort und sucht nach diesem im Wörterbuch.
72
70
  def find_word(str)
73
- (@_word ||= {})[str] ||= Word.new(str, WA_UNKNOWN).tap { |w|
74
- unless (lexicals = select_with_suffix(str)).empty?
75
- w.lexicals = lexicals
76
- w.attr = WA_IDENTIFIED
77
- end
78
- }
71
+ (@_word ||= {})[str] ||=
72
+ Word.new(str, WA_UNKNOWN).identify(select_with_suffix(str))
79
73
  end
80
74
 
81
- def find_synonyms(obj, syn = [])
75
+ def find_synonyms(obj, syn = [], com = true)
82
76
  lex = obj.lexicals
83
77
  lex = [obj] if lex.empty? && obj.unknown?
84
78
 
85
- com, ref = obj.attr == WA_COMPOUND, KEY_REF_RE
79
+ com &&= obj.attr == WA_COMPOUND
86
80
 
87
81
  lex.each { |l|
88
- select(l.form, syn) { |i| i =~ ref } unless com &&
82
+ select(l.form, syn) unless com &&
89
83
  l.attr != LA_COMPOUND || l.attr == LA_SYNONYM
90
84
  }
91
85
 
@@ -97,14 +91,11 @@ class Lingo
97
91
  # Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+)
98
92
  def select(str, lex = [])
99
93
  @src.each { |src|
100
- l = src[str] or next
101
- lex.concat(block_given? ? l.delete_if { |i| yield i } : l)
94
+ lex.concat(src[str] || next)
102
95
  break unless @all
103
96
  }
104
97
 
105
- lex.sort!
106
- lex.uniq!
107
-
98
+ lex.empty? && block_given? ? yield(lex) : lex.uniq!
108
99
  lex
109
100
  end
110
101
 
@@ -113,7 +104,17 @@ class Lingo
113
104
  # Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+).
114
105
  # Sucht dabei auch Wörter, die um wortklassenspezifische Suffixe bereinigt wurden.
115
106
  def select_with_suffix(str)
116
- select_with_affix(:suffix, str)
107
+ select(str) { |lex|
108
+ each_affix(str) { |form, attr|
109
+ unless (selected = select(form)).empty?
110
+ if selected.first.attr == LA_COMPOUND
111
+ lex.concat(selected) if selected.last.attr?(attr)
112
+ else
113
+ selected.each { |l| lex << l if l.attr?(attr) }
114
+ end
115
+ end
116
+ }
117
+ }
117
118
  end
118
119
 
119
120
  # _dic_.select_with_infix( _aString_ ) -> _ArrayOfLexicals_
@@ -121,44 +122,15 @@ class Lingo
121
122
  # Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+).
122
123
  # Sucht dabei auch Wörter, die eine Fugung am Ende haben.
123
124
  def select_with_infix(str)
124
- select_with_affix(:infix, str)
125
- end
126
-
127
- # _dic_.suffix_lexicals( _aString_ ) -> _ArrayOfLexicals_
128
- #
129
- # Gibt alle möglichen Lexicals zurück, die von der Endung her auf den String anwendbar sind:
130
- #
131
- # dic.suffix_lexicals("Hasens") -> [(hasen/s), (hasen/e), (has/e)]
132
- def suffix_lexicals(str)
133
- affix_lexicals(:suffix, str)
134
- end
135
-
136
- # _dic_.gap_lexicals( _aString_ ) -> _ArrayOfLexicals_
137
- #
138
- # Gibt alle möglichen Lexicals zurück, die von der Endung her auf den String anwendbar sind:
139
- def infix_lexicals(str)
140
- affix_lexicals(:infix, str)
141
- end
142
-
143
- private
144
-
145
- def select_with_affix(affix, str)
146
- lex = select(str)
147
-
148
- affix_lexicals(affix, str).each { |a| select(a.form, lex) { |b|
149
- affix == :suffix && a.attr != b.attr
150
- } } if lex.empty?
151
-
152
- lex
125
+ select(str) { |lex|
126
+ each_affix(str, :infix) { |form, _| select(form, lex) }
127
+ }
153
128
  end
154
129
 
155
- def affix_lexicals(affix, str)
156
- lex = instance_variable_get("@#{affix}es").map { |r, e, t|
157
- Lexical.new("#{$`}#{e == '*' ? '' : e}#{$'}", t) if str =~ r
130
+ def each_affix(str, affix = :suffix)
131
+ instance_variable_get("@#{affix}es").each { |r, e, t|
132
+ yield "#{$`}#{e == '*' ? '' : e}#{$'}", t if str =~ r
158
133
  }
159
-
160
- lex.compact!
161
- lex
162
134
  end
163
135
 
164
136
  end