lingo 1.8.1 → 1.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. data/ChangeLog +23 -5
  2. data/README +1 -1
  3. data/Rakefile +5 -7
  4. data/TODO +2 -0
  5. data/bin/lingo +5 -1
  6. data/de.lang +1 -1
  7. data/en/lingo-syn.txt +0 -0
  8. data/en.lang +2 -1
  9. data/lib/lingo/attendee/abbreviator.rb +8 -9
  10. data/lib/lingo/attendee/debugger.rb +5 -4
  11. data/lib/lingo/attendee/decomposer.rb +8 -3
  12. data/lib/lingo/attendee/dehyphenizer.rb +19 -63
  13. data/lib/lingo/attendee/formatter.rb +1 -1
  14. data/lib/lingo/attendee/multi_worder.rb +67 -155
  15. data/lib/lingo/attendee/noneword_filter.rb +16 -9
  16. data/lib/lingo/attendee/object_filter.rb +1 -1
  17. data/lib/lingo/attendee/sequencer.rb +32 -63
  18. data/lib/lingo/attendee/stemmer/porter.rb +343 -0
  19. data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
  20. data/lib/lingo/attendee/synonymer.rb +10 -9
  21. data/lib/lingo/attendee/text_reader.rb +102 -76
  22. data/lib/lingo/attendee/text_writer.rb +23 -26
  23. data/lib/lingo/attendee/tokenizer.rb +13 -27
  24. data/lib/lingo/attendee/variator.rb +26 -66
  25. data/lib/lingo/attendee/vector_filter.rb +42 -43
  26. data/lib/lingo/attendee/word_searcher.rb +6 -7
  27. data/lib/lingo/attendee.rb +25 -7
  28. data/lib/lingo/buffered_attendee.rb +36 -10
  29. data/lib/lingo/cachable.rb +8 -8
  30. data/lib/lingo/config.rb +5 -6
  31. data/lib/lingo/ctl.rb +2 -3
  32. data/lib/lingo/database/crypter.rb +9 -26
  33. data/lib/lingo/database/gdbm_store.rb +3 -5
  34. data/lib/lingo/database/libcdb_store.rb +4 -6
  35. data/lib/lingo/database/sdbm_store.rb +11 -6
  36. data/lib/lingo/database/show_progress.rb +3 -43
  37. data/lib/lingo/database/source/key_value.rb +2 -6
  38. data/lib/lingo/database/source/multi_key.rb +3 -5
  39. data/lib/lingo/database/source/multi_value.rb +2 -6
  40. data/lib/lingo/database/source/single_word.rb +4 -6
  41. data/lib/lingo/database/source/word_class.rb +4 -10
  42. data/lib/lingo/database/source.rb +20 -18
  43. data/lib/lingo/database.rb +84 -59
  44. data/lib/lingo/error.rb +57 -1
  45. data/lib/lingo/language/dictionary.rb +21 -18
  46. data/lib/lingo/language/grammar.rb +40 -49
  47. data/lib/lingo/language/lexical.rb +6 -6
  48. data/lib/lingo/language/lexical_hash.rb +6 -0
  49. data/lib/lingo/language/word.rb +32 -15
  50. data/lib/lingo/language/word_form.rb +1 -1
  51. data/lib/lingo/language.rb +14 -25
  52. data/lib/lingo/reportable.rb +12 -10
  53. data/lib/lingo/show_progress.rb +81 -0
  54. data/lib/lingo/version.rb +1 -1
  55. data/lib/lingo.rb +63 -24
  56. data/lingo-call.cfg +6 -10
  57. data/lingo.cfg +60 -44
  58. data/lir.cfg +42 -41
  59. data/test/attendee/ts_abbreviator.rb +3 -5
  60. data/test/attendee/ts_decomposer.rb +3 -5
  61. data/test/attendee/ts_multi_worder.rb +87 -145
  62. data/test/attendee/ts_noneword_filter.rb +5 -3
  63. data/test/attendee/ts_object_filter.rb +5 -3
  64. data/test/attendee/ts_sequencer.rb +3 -5
  65. data/test/attendee/ts_stemmer.rb +309 -0
  66. data/test/attendee/ts_synonymer.rb +15 -11
  67. data/test/attendee/ts_text_reader.rb +12 -15
  68. data/test/attendee/ts_text_writer.rb +24 -29
  69. data/test/attendee/ts_tokenizer.rb +9 -7
  70. data/test/attendee/ts_variator.rb +4 -4
  71. data/test/attendee/ts_vector_filter.rb +24 -16
  72. data/test/attendee/ts_word_searcher.rb +20 -36
  73. data/test/{lir.csv → lir.vec} +0 -0
  74. data/test/ref/artikel.vec +943 -943
  75. data/test/ref/artikel.ven +943 -943
  76. data/test/ref/lir.non +201 -201
  77. data/test/ref/lir.seq +178 -178
  78. data/test/ref/lir.syn +49 -49
  79. data/test/ref/lir.vec +329 -0
  80. data/test/test_helper.rb +20 -36
  81. data/test/ts_database.rb +10 -10
  82. data/test/ts_language.rb +279 -319
  83. metadata +93 -104
  84. data/info/Objekte.png +0 -0
  85. data/info/Typen.png +0 -0
  86. data/info/database.png +0 -0
  87. data/info/db_small.png +0 -0
  88. data/info/download.png +0 -0
  89. data/info/kerze.png +0 -0
  90. data/info/language.png +0 -0
  91. data/info/lingo.png +0 -0
  92. data/info/logo.png +0 -0
  93. data/info/meeting.png +0 -0
  94. data/info/types.png +0 -0
  95. data/lingo-all.cfg +0 -89
  96. data/porter/stem.cfg +0 -311
  97. data/porter/stem.rb +0 -150
  98. data/test/ref/lir.csv +0 -329
  99. data/test.cfg +0 -79
@@ -29,7 +29,7 @@ class Lingo
29
29
  module Language
30
30
 
31
31
  # Die Klasse Grammar beinhaltet grammatikalische Spezialitäten einer Sprache. Derzeit findet die
32
- # Kompositumerkennung hier ihren Platz, die mit der Methode find_compositum aufgerufen werden kann.
32
+ # Kompositumerkennung hier ihren Platz, die mit der Methode find_compound aufgerufen werden kann.
33
33
  # Die Klasse Grammar wird genau wie ein Dictionary initialisiert. Das bei der Initialisierung angegebene Wörterbuch ist Grundlage
34
34
  # für die Erkennung der Kompositumteile.
35
35
 
@@ -40,31 +40,26 @@ class Lingo
40
40
 
41
41
  HYPHEN_RE = %r{\A(.+)-([^-]+)\z}
42
42
 
43
- # initialize(config, dictionary_config) -> _Grammar_
44
- # config = Attendee-spezifische Parameter
45
- # dictionary_config = Datenbankkonfiguration aus de.lang
43
+ def self.open(*args)
44
+ yield grammar = new(*args)
45
+ ensure
46
+ grammar.close if grammar
47
+ end
48
+
46
49
  def initialize(config, lingo)
47
50
  init_cachable
48
51
  init_reportable
49
52
 
50
53
  @dic, @suggestions = Dictionary.new(config, lingo), []
51
54
 
52
- cfg = lingo.dictionary_config['compositum']
53
-
54
- # Ein Wort muss mindestens 8 Zeichen lang sein, damit
55
- # überhaupt eine Prüfung stattfindet.
56
- @min_word_size = (cfg['min-word-size'] || 8).to_i
57
-
58
- # Die durchschnittliche Länge der Kompositum-Wortteile
59
- # muss mindestens 4 Zeichen lang sein, sonst ist es kein
60
- # gültiges Kompositum.
61
- @min_avg_part_size = (cfg['min-avg-part-size'] || 4).to_i
62
-
63
- # Der kürzeste Kompositum-Wortteil muss mindestens 1 Zeichen lang sein
64
- @min_part_size = (cfg['min-part-size'] || 1).to_i
55
+ cfg = lingo.dictionary_config['compound'] ||
56
+ lingo.dictionary_config['compositum'] # DEPRECATE compositum
65
57
 
66
- # Ein Kompositum darf aus höchstens 4 Wortteilen bestehen
67
- @max_parts = (cfg['max-parts'] || 4).to_i
58
+ {
59
+ min_word_size: 8, min_avg_part_size: 4, min_part_size: 1, max_parts: 4
60
+ }.each { |k, v|
61
+ instance_variable_set("@#{k}", cfg.fetch(k.to_s.tr('_', '-'), v).to_i)
62
+ }
68
63
 
69
64
  # Die Wortklasse eines Kompositum-Wortteils kann separat gekennzeichnet
70
65
  # werden, um sie von Wortklassen normaler Wörter unterscheiden zu
@@ -75,7 +70,7 @@ class Lingo
75
70
  # Bestimmte Sequenzen können als ungültige Komposita erkannt werden,
76
71
  # z.B. ist ein Kompositum aus zwei Adjetiven kein Kompositum, also
77
72
  # skip-sequence = 'aa'
78
- @sequences = cfg.fetch('skip-sequences', []).map(&:downcase)
73
+ @sequences = cfg.fetch('skip-sequences', []).map!(&:downcase)
79
74
  end
80
75
 
81
76
  def close
@@ -86,12 +81,12 @@ class Lingo
86
81
  super.update(@dic.report)
87
82
  end
88
83
 
89
- # find_compositum(str) -> word wenn level=1
90
- # find_compositum(str) -> [lex, sta] wenn level!=1
84
+ # find_compound(str) -> word wenn level=1
85
+ # find_compound(str) -> [lex, sta] wenn level!=1
91
86
  #
92
- # find_compositum arbeitet in verschiedenen Leveln, da die Methode auch rekursiv aufgerufen wird. Ein Level größer 1
87
+ # find_compound arbeitet in verschiedenen Leveln, da die Methode auch rekursiv aufgerufen wird. Ein Level größer 1
93
88
  # entspricht daher einem rekursiven Aufruf
94
- def find_compositum(str, level = 1, tail = false)
89
+ def find_compound(str, level = 1, tail = false)
95
90
  key, top, empty = str.downcase, level == 1, [[], [], '']
96
91
 
97
92
  if top && hit?(key)
@@ -108,16 +103,21 @@ class Lingo
108
103
 
109
104
  inc('Komposita geprüft')
110
105
 
111
- res = permute_compositum(key, level, tail)
112
- val = !(lex = res.first).empty? && valid?(str, *res[1..-1])
106
+ lex, sta, seq = res = permute_compound(key, level, tail)
107
+
108
+ val = !lex.empty? &&
109
+ sta.size <= @max_parts &&
110
+ sta.min >= @min_part_size &&
111
+ str.length / sta.size >= @min_avg_part_size &&
112
+ (@sequences.empty? || !@sequences.include?(seq))
113
113
 
114
114
  if top
115
115
  if val
116
116
  inc('Komposita erkannt')
117
117
 
118
- com.attr = WA_KOMPOSITUM
118
+ com.attr = WA_COMPOUND
119
119
  com.lexicals = lex.map { |l|
120
- l.attr == LA_KOMPOSITUM ? l :
120
+ l.attr == LA_COMPOUND ? l :
121
121
  Lexical.new(l.form, l.attr + @append_wc)
122
122
  }
123
123
  end
@@ -128,14 +128,14 @@ class Lingo
128
128
  end
129
129
  end
130
130
 
131
- # permute_compositum( _aString_ ) -> [lex, sta, seq]
132
- def permute_compositum(str, level, tail)
133
- return test_compositum($1, '-', $2, level, tail) if str =~ HYPHEN_RE
131
+ # permute_compound( _aString_ ) -> [lex, sta, seq]
132
+ def permute_compound(str, level = 1, tail = false)
133
+ return test_compound($1, '-', $2, level, tail) if str =~ HYPHEN_RE
134
134
 
135
135
  sug, len = @suggestions[level] ||= [], str.length
136
136
 
137
137
  1.upto(len - 1) { |i|
138
- res = test_compositum(str[0, i], '', str[i, len], level, tail)
138
+ res = test_compound(str[0, i], '', str[i, len], level, tail)
139
139
 
140
140
  unless (lex = res.first).empty?
141
141
  return res unless lex.last.attr == LA_TAKEITASIS
@@ -146,10 +146,10 @@ class Lingo
146
146
  sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear }
147
147
  end
148
148
 
149
- # test_compositum() -> [lex, sta, seq]
149
+ # test_compound() -> [lex, sta, seq]
150
150
  #
151
151
  # Testet einen definiert zerlegten String auf Kompositum
152
- def test_compositum(fstr, infix, bstr, level, tail)
152
+ def test_compound(fstr, infix, bstr, level = 1, tail = false)
153
153
  sta, seq, empty = [fstr.length, bstr.length], %w[? ?], [[], [], '']
154
154
 
155
155
  if !(blex = @dic.select_with_suffix(bstr)).sort!.empty?
@@ -159,10 +159,10 @@ class Lingo
159
159
  # 2. Word w/ infix, unless tail part
160
160
  bform, seq[1] = bstr, blex.first.attr
161
161
  elsif infix == '-'
162
- blex, bsta, bseq = find_compositum(bstr, level + 1, tail)
162
+ blex, bsta, bseq = find_compound(bstr, level + 1, tail)
163
163
 
164
164
  if !blex.sort!.empty?
165
- # 3. Compositum
165
+ # 3. Compound
166
166
  bform, seq[1], sta[1..-1] = blex.first.form, bseq, bsta
167
167
  else
168
168
  # 4. Take it as is
@@ -176,10 +176,10 @@ class Lingo
176
176
  # 1. Word w/ infix
177
177
  fform, seq[0] = fstr, flex.first.attr
178
178
  else
179
- flex, fsta, fseq = find_compositum(fstr, level + 1, true)
179
+ flex, fsta, fseq = find_compound(fstr, level + 1, true)
180
180
 
181
181
  if !flex.sort!.empty?
182
- # 2. Compositum
182
+ # 2. Compound
183
183
  fform, seq[0], sta[0..0] = flex.first.form, fseq, fsta
184
184
  elsif infix == '-'
185
185
  # 3. Take it as is
@@ -189,21 +189,12 @@ class Lingo
189
189
  end
190
190
  end
191
191
 
192
- flex.concat(blex).delete_if { |l| l.attr == LA_KOMPOSITUM }.
193
- push(Lexical.new(fform + infix + bform, LA_KOMPOSITUM)).sort!
192
+ flex.concat(blex).delete_if { |l| l.attr == LA_COMPOUND }.
193
+ push(Lexical.new(fform + infix + bform, LA_COMPOUND)).sort!
194
194
 
195
195
  [flex, sta, seq.join]
196
196
  end
197
197
 
198
- private
199
-
200
- def valid?(str, sta, seq)
201
- sta.size <= @max_parts &&
202
- sta.sort.first >= @min_part_size &&
203
- str.length / sta.size >= @min_avg_part_size &&
204
- (@sequences.empty? || !@sequences.include?(seq))
205
- end
206
-
207
198
  end
208
199
 
209
200
  end
@@ -39,14 +39,14 @@ class Lingo
39
39
  def <=>(other)
40
40
  return 1 unless other.is_a?(self.class)
41
41
 
42
- if attr == other.attr
42
+ a1, a2 = attr, other.attr
43
+
44
+ if a1 == a2
43
45
  form <=> other.form
44
46
  else
45
- attr.empty? ? 1 : other.attr.empty? ? -1 : begin
46
- a = LA_SORTORDER.index(attr)
47
- b = LA_SORTORDER.index(other.attr)
48
-
49
- a ? b ? b <=> a : -1 : b ? 1 : attr <=> other.attr
47
+ a1.empty? ? 1 : a2.empty? ? -1 : begin
48
+ i1, i2 = [a1, a2].map(&LA_SORTORDER.method(:index))
49
+ i1 ? i2 ? i2 <=> i1 : -1 : i2 ? 1 : a1 <=> a2
50
50
  end
51
51
  end
52
52
  end
@@ -37,6 +37,12 @@ class Lingo
37
37
  include Cachable
38
38
  include Reportable
39
39
 
40
+ def self.open(*args)
41
+ yield lexical_hash = new(*args)
42
+ ensure
43
+ lexical_hash.close if lexical_hash
44
+ end
45
+
40
46
  def initialize(id, lingo)
41
47
  init_cachable
42
48
  init_reportable(id)
@@ -33,8 +33,16 @@ class Lingo
33
33
 
34
34
  class Word < WordForm
35
35
 
36
- def self.new_lexical(form, attr, lex_attr)
37
- new(form, attr) << Lexical.new(form, lex_attr)
36
+ class << self
37
+
38
+ def new_lexicals(form, attr, lex)
39
+ new(form, attr) << lex
40
+ end
41
+
42
+ def new_lexical(form, attr, lex_attr)
43
+ new_lexicals(form, attr, Lexical.new(form, lex_attr))
44
+ end
45
+
38
46
  end
39
47
 
40
48
  # Exakte Representation der originären Zeichenkette, so wie sie im Satz
@@ -56,23 +64,32 @@ class Lingo
56
64
  end
57
65
 
58
66
  def lexicals(compound_parts = true)
59
- if !compound_parts && attr == WA_KOMPOSITUM
60
- @lexicals.select { |lex| lex.attr == LA_KOMPOSITUM }
67
+ if !compound_parts && attr == WA_COMPOUND
68
+ @lexicals.select { |lex| lex.attr == LA_COMPOUND }
61
69
  else
62
70
  @lexicals
63
71
  end
64
72
  end
65
73
 
66
- def lexicals=(lexis)
67
- if lexis.is_a?(Array)
68
- @lexicals = lexis.sort.uniq
74
+ def lexicals=(lex)
75
+ if lex.is_a?(Array)
76
+ @lexicals = lex.sort.uniq
69
77
  else
70
- raise TypeError, "wrong argument type #{lexis.class} (expected Array)"
78
+ raise TypeError, "wrong argument type #{lex.class} (expected Array)"
71
79
  end
72
80
  end
73
81
 
82
+ def add_lexicals(lex)
83
+ @lexicals.concat(lex)
84
+
85
+ @lexicals.sort!
86
+ @lexicals.uniq!
87
+
88
+ self
89
+ end
90
+
74
91
  def attrs(compound_parts = true)
75
- lexicals(compound_parts).map { |lex| lex.attr }
92
+ lexicals(compound_parts).map(&:attr)
76
93
  end
77
94
 
78
95
  def parts
@@ -100,15 +117,15 @@ class Lingo
100
117
  end
101
118
 
102
119
  def compo_form
103
- if attr == WA_KOMPOSITUM
104
- get_class(LA_KOMPOSITUM).first
105
- else
106
- nil
107
- end
120
+ get_class(LA_COMPOUND).first if attr == WA_COMPOUND
121
+ end
122
+
123
+ def full_compound?
124
+ attr == WA_COMPOUND && get_class('x+').empty?
108
125
  end
109
126
 
110
127
  def <<(*other)
111
- lexicals.concat(other.flatten)
128
+ lexicals.concat(other.tap(&:flatten!))
112
129
  self
113
130
  end
114
131
 
@@ -71,7 +71,7 @@ class Lingo
71
71
  end
72
72
 
73
73
  def eql?(other)
74
- self.class.equal?(other.class) && to_s == other.to_s
74
+ self.class.equal?(other.class) && (self <=> other) == 0
75
75
  end
76
76
 
77
77
  alias_method :==, :eql?
@@ -50,7 +50,7 @@ class Lingo
50
50
  # Status, wenn das Word nicht gefunden werden konnte
51
51
  WA_UNKNOWN = '?'
52
52
  # Wort ist als Kompositum erkannt worden
53
- WA_KOMPOSITUM = 'KOM'
53
+ WA_COMPOUND = 'KOM'
54
54
  # Wort ist eine Mehrwortgruppe
55
55
  WA_MULTIWORD = 'MUL'
56
56
  # Wort ist eine Mehrwortgruppe
@@ -58,31 +58,20 @@ class Lingo
58
58
  # Word ist unbekannt, jedoch Teil einer Mehrwortgruppe
59
59
  WA_UNKMULPART = 'MU?'
60
60
 
61
- LA_SUBSTANTIV = 's'
62
- LA_ADJEKTIV = 'a'
63
- LA_VERB = 'v'
64
- LA_EIGENNAME = 'e'
65
- LA_KOMPOSITUM = 'k'
66
- LA_MULTIWORD = 'm'
67
- LA_SEQUENCE = 'q'
68
- LA_WORTFORM = 'w'
69
- LA_SYNONYM = 'y'
70
- LA_STOPWORD = 't'
71
- LA_TAKEITASIS = 'x'
72
- LA_UNKNOWN = '?'
73
-
74
61
  LA_SORTORDER = [
75
- LA_MULTIWORD,
76
- LA_KOMPOSITUM,
77
- LA_SUBSTANTIV,
78
- LA_VERB,
79
- LA_ADJEKTIV,
80
- LA_EIGENNAME,
81
- LA_WORTFORM,
82
- LA_STOPWORD,
83
- LA_TAKEITASIS,
84
- LA_SYNONYM,
85
- LA_UNKNOWN
62
+ LA_SEQUENCE = 'q',
63
+ LA_MULTIWORD = 'm',
64
+ LA_COMPOUND = 'k',
65
+ LA_NOUN = 's',
66
+ LA_VERB = 'v',
67
+ LA_ADJECTIVE = 'a',
68
+ LA_NAME = 'e',
69
+ LA_WORDFORM = 'w',
70
+ LA_STOPWORD = 't',
71
+ LA_TAKEITASIS = 'x',
72
+ LA_SYNONYM = 'y',
73
+ LA_STEM = 'z',
74
+ LA_UNKNOWN = '?'
86
75
  ].reverse.join
87
76
 
88
77
  end
@@ -31,27 +31,29 @@ class Lingo
31
31
  module Reportable
32
32
 
33
33
  def init_reportable(prefix = nil)
34
- @counters, @prefix = Hash.new(0), prefix ? "#{prefix}: " : ''
34
+ @reportable_hash = Hash.new(0)
35
+ @reportable_prefix = prefix ? "#{prefix}: " : ''
35
36
  end
36
37
 
37
- def inc(counter)
38
- @counters[counter] += 1
38
+ def inc(key)
39
+ @reportable_hash[key] += 1
39
40
  end
40
41
 
41
- def add(counter, value)
42
- @counters[counter] += value
42
+ def add(key, val)
43
+ @reportable_hash[key] += val
43
44
  end
44
45
 
45
- def set(counter, value)
46
- @counters[counter] = value
46
+ def set(key, val)
47
+ @reportable_hash[key] = val
47
48
  end
48
49
 
49
- def get(counter)
50
- @counters[counter]
50
+ def get(key)
51
+ @reportable_hash[key]
51
52
  end
52
53
 
53
54
  def report
54
- @counters.each_with_object({}) { |(k, v), r| r["#{@prefix}#{k}"] = v }
55
+ q = @reportable_prefix
56
+ @reportable_hash.each_with_object({}) { |(k, v), r| r["#{q}#{k}"] = v }
55
57
  end
56
58
 
57
59
  end
@@ -0,0 +1,81 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ class ShowProgress
30
+
31
+ def initialize(obj, max, name = nil, doit = true, text = 'progress')
32
+ return yield self unless max && doit
33
+
34
+ @out = obj.instance_variable_get(:@lingo).config.stderr
35
+
36
+ # To get the length of the formatted string we have
37
+ # to actually substitute the placeholder.
38
+ fmt = ' [%3d%%]'
39
+ len = (fmt % 0).length
40
+
41
+ # Now we know how far to "go back" to
42
+ # overwrite the formatted string...
43
+ back = "\b" * len
44
+
45
+ @fmt = fmt + back
46
+ @clr = ' ' * len + back
47
+
48
+ print name, ': ' if name
49
+
50
+ @rat, @cnt, @next = max / 100.0, 0, 0
51
+ print text
52
+ step
53
+
54
+ yield self
55
+
56
+ print "#{@clr} done.\n"
57
+ end
58
+
59
+ def [](value)
60
+ if defined?(@cnt)
61
+ @cnt = value
62
+ step if @cnt >= @next
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def step
69
+ percent = @cnt / @rat
70
+ @next = (percent + 1) * @rat
71
+
72
+ print @fmt % percent if percent.finite?
73
+ end
74
+
75
+ def print(*args)
76
+ @out.print(*args)
77
+ end
78
+
79
+ end
80
+
81
+ end
data/lib/lingo/version.rb CHANGED
@@ -4,7 +4,7 @@ class Lingo
4
4
 
5
5
  MAJOR = 1
6
6
  MINOR = 8
7
- TINY = 1
7
+ TINY = 2
8
8
 
9
9
  class << self
10
10
 
data/lib/lingo.rb CHANGED
@@ -25,6 +25,8 @@
25
25
  #++
26
26
 
27
27
  require 'stringio'
28
+ require 'pathname'
29
+ require 'fileutils'
28
30
  require 'benchmark'
29
31
  require 'nuggets/file/ext'
30
32
  require 'nuggets/env/user_home'
@@ -43,7 +45,8 @@ class Lingo
43
45
  CURR = ENV['LINGO_CURR'] || '.'
44
46
 
45
47
  # The search path for Lingo dictionary and configuration files.
46
- PATH = ENV['LINGO_PATH'] || [CURR, HOME, BASE].join(File::PATH_SEPARATOR)
48
+ PATH = ENV['LINGO_PATH'].nil? ? [CURR, HOME, BASE] :
49
+ ENV['LINGO_PATH'].split(File::PATH_SEPARATOR)
47
50
 
48
51
  ENV['LINGO_PLUGIN_PATH'] ||= File.join(HOME, 'plugins')
49
52
 
@@ -59,7 +62,7 @@ class Lingo
59
62
  # Default encoding
60
63
  ENC = 'UTF-8'.freeze
61
64
 
62
- STRING_SEPARATOR_RE = %r{[; ,|]}
65
+ SEP_RE = %r{[; ,|]}
63
66
 
64
67
  class << self
65
68
 
@@ -79,7 +82,7 @@ class Lingo
79
82
  glob = File.join('??', glob) if type == :dict
80
83
 
81
84
  [].tap { |list| walk(path, options) { |dir|
82
- Dir[File.join(dir, glob)].sort.each { |file|
85
+ Dir[File.join(dir, glob)].sort!.each { |file|
83
86
  pn = Pathname.new(file)
84
87
  list << realpath_for(pn, path) if pn.file?
85
88
  }
@@ -110,29 +113,69 @@ class Lingo
110
113
  File.join(options_for(type)[:dir], basename(type, file))
111
114
  end
112
115
 
116
+ def append_path(*path)
117
+ include_path(path)
118
+ end
119
+
120
+ def prepend_path(*path)
121
+ include_path(path, true)
122
+ end
123
+
124
+ def get_const(name, klass = self)
125
+ klass.const_get(name.camelcase)
126
+ rescue NameError
127
+ raise NameNotFoundError.new(klass, name)
128
+ end
129
+
113
130
  private
114
131
 
132
+ def include_path(path, pre = false)
133
+ PATH.insert(pre ? 0 : -1, *path.map!(&:to_s))
134
+ end
135
+
115
136
  def find_file(file, path, options)
116
- pn = Pathname.new(file_with_ext(file, options)).cleanpath
137
+ if glob = options[:glob]
138
+ file = File.chomp_ext(file)
139
+ options[:ext] ||= '*'
140
+ end
141
+
142
+ file = file_with_ext(file, options)
143
+ pn = Pathname.new(file).cleanpath
117
144
 
118
145
  if pn.relative?
119
146
  walk(path, options) { |dir|
120
147
  pn2 = pn.expand_path(dir)
121
- pn = pn2 and break if pn2.exist?
148
+ ex = pn2.exist?
149
+
150
+ pn2 = Pathname.glob(pn2).first if glob && !ex
151
+ pn = pn2 and break if glob ? pn2 : ex
122
152
  }
123
153
  end
124
154
 
125
155
  realpath_for(pn, path)
156
+ rescue Errno::ENOENT
157
+ raise unless relax = options[:relax]
158
+ relax.respond_to?(:[]) ? relax[file] : file
126
159
  end
127
160
 
128
161
  def find_store(file, path, options)
129
- base = basename(:dict, find(:dict, file, path))
162
+ base = basename(:dict, find(:dict, file, path) {
163
+ raise SourceFileNotFoundError.new(nil, find_file(file, path,
164
+ options.merge(glob: true, relax: lambda { |_file|
165
+ raise SourceFileNotFoundError.new(file, _file)
166
+ })
167
+ ))
168
+ })
130
169
 
131
170
  walk(path.reverse, options, false) { |dir|
132
171
  Pathname.new(dir).ascend { |i|
133
- break true if i.file?
134
- return File.chomp_ext(File.join(dir, base)) if i.writable?
135
- break true if i.exist?
172
+ begin
173
+ stat = i.stat
174
+
175
+ break true if stat.file? || !stat.writable?
176
+ return File.chomp_ext(File.join(dir, base))
177
+ rescue Errno::ENOENT
178
+ end
136
179
  }
137
180
  }
138
181
 
@@ -148,7 +191,7 @@ class Lingo
148
191
  end
149
192
 
150
193
  def path_for(options)
151
- options[:path] || PATH.split(File::PATH_SEPARATOR)
194
+ options[:path] || PATH
152
195
  end
153
196
 
154
197
  def file_with_ext(file, options)
@@ -223,30 +266,25 @@ class Lingo
223
266
 
224
267
  list.each { |hash|
225
268
  # {'attendee' => {'name'=>'Attendee', 'in'=>'nase', 'out'=>'ohr', 'param'=>'hase'}}
226
- cfg = hash.values.first.merge('name' => hash.keys.first.camelcase)
269
+ cfg = hash.values.first.merge('name' => name = hash.keys.first.camelcase)
227
270
 
228
271
  %w[in out].each { |key| (cfg[key] ||= '').downcase! }
229
272
 
230
- cfg['in'] = last_link if cfg['in'].empty?
231
- cfg['out'] = "auto_link_out_#{auto_link += 1}" if cfg['out'].empty?
273
+ cfg['in'] = last_link if cfg['in'].empty?
274
+ cfg['out'] = "auto_link-#{auto_link += 1}" if cfg['out'].empty?
232
275
  last_link = cfg['out']
233
276
 
234
- data = config["language/attendees/#{cfg['name'].downcase}"]
235
- cfg.update(data) if data
277
+ cfg.update(config["language/attendees/#{name.downcase}"] || {})
236
278
 
237
- attendee = Attendee.const_get(cfg['name']).new(cfg, self)
238
- @attendees << attendee
279
+ @attendees << attendee = Attendee.const_get(name).new(cfg, self)
239
280
 
240
- cfg['in'].split(STRING_SEPARATOR_RE).each { |interest|
241
- subscriber[interest] << attendee
242
- }
243
- cfg['out'].split(STRING_SEPARATOR_RE).each { |theme|
244
- supplier[theme] << attendee
281
+ { 'in' => subscriber, 'out' => supplier }.each { |key, target|
282
+ cfg[key].split(SEP_RE).each { |ch| target[ch] << attendee }
245
283
  }
246
284
  }
247
285
 
248
- supplier.each { |channel, attendees| attendees.each { |attendee|
249
- attendee.add_subscriber(subscriber[channel])
286
+ supplier.each { |ch, attendees| attendees.each { |att|
287
+ att.add_subscriber(subscriber[ch])
250
288
  } }
251
289
  end
252
290
 
@@ -283,6 +321,7 @@ require_relative 'lingo/core_ext'
283
321
  require_relative 'lingo/cachable'
284
322
  require_relative 'lingo/reportable'
285
323
  require_relative 'lingo/agenda_item'
324
+ require_relative 'lingo/show_progress'
286
325
  require_relative 'lingo/database'
287
326
  require_relative 'lingo/language'
288
327
  require_relative 'lingo/attendee'