lingo 1.8.1 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. data/ChangeLog +23 -5
  2. data/README +1 -1
  3. data/Rakefile +5 -7
  4. data/TODO +2 -0
  5. data/bin/lingo +5 -1
  6. data/de.lang +1 -1
  7. data/en/lingo-syn.txt +0 -0
  8. data/en.lang +2 -1
  9. data/lib/lingo/attendee/abbreviator.rb +8 -9
  10. data/lib/lingo/attendee/debugger.rb +5 -4
  11. data/lib/lingo/attendee/decomposer.rb +8 -3
  12. data/lib/lingo/attendee/dehyphenizer.rb +19 -63
  13. data/lib/lingo/attendee/formatter.rb +1 -1
  14. data/lib/lingo/attendee/multi_worder.rb +67 -155
  15. data/lib/lingo/attendee/noneword_filter.rb +16 -9
  16. data/lib/lingo/attendee/object_filter.rb +1 -1
  17. data/lib/lingo/attendee/sequencer.rb +32 -63
  18. data/lib/lingo/attendee/stemmer/porter.rb +343 -0
  19. data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
  20. data/lib/lingo/attendee/synonymer.rb +10 -9
  21. data/lib/lingo/attendee/text_reader.rb +102 -76
  22. data/lib/lingo/attendee/text_writer.rb +23 -26
  23. data/lib/lingo/attendee/tokenizer.rb +13 -27
  24. data/lib/lingo/attendee/variator.rb +26 -66
  25. data/lib/lingo/attendee/vector_filter.rb +42 -43
  26. data/lib/lingo/attendee/word_searcher.rb +6 -7
  27. data/lib/lingo/attendee.rb +25 -7
  28. data/lib/lingo/buffered_attendee.rb +36 -10
  29. data/lib/lingo/cachable.rb +8 -8
  30. data/lib/lingo/config.rb +5 -6
  31. data/lib/lingo/ctl.rb +2 -3
  32. data/lib/lingo/database/crypter.rb +9 -26
  33. data/lib/lingo/database/gdbm_store.rb +3 -5
  34. data/lib/lingo/database/libcdb_store.rb +4 -6
  35. data/lib/lingo/database/sdbm_store.rb +11 -6
  36. data/lib/lingo/database/show_progress.rb +3 -43
  37. data/lib/lingo/database/source/key_value.rb +2 -6
  38. data/lib/lingo/database/source/multi_key.rb +3 -5
  39. data/lib/lingo/database/source/multi_value.rb +2 -6
  40. data/lib/lingo/database/source/single_word.rb +4 -6
  41. data/lib/lingo/database/source/word_class.rb +4 -10
  42. data/lib/lingo/database/source.rb +20 -18
  43. data/lib/lingo/database.rb +84 -59
  44. data/lib/lingo/error.rb +57 -1
  45. data/lib/lingo/language/dictionary.rb +21 -18
  46. data/lib/lingo/language/grammar.rb +40 -49
  47. data/lib/lingo/language/lexical.rb +6 -6
  48. data/lib/lingo/language/lexical_hash.rb +6 -0
  49. data/lib/lingo/language/word.rb +32 -15
  50. data/lib/lingo/language/word_form.rb +1 -1
  51. data/lib/lingo/language.rb +14 -25
  52. data/lib/lingo/reportable.rb +12 -10
  53. data/lib/lingo/show_progress.rb +81 -0
  54. data/lib/lingo/version.rb +1 -1
  55. data/lib/lingo.rb +63 -24
  56. data/lingo-call.cfg +6 -10
  57. data/lingo.cfg +60 -44
  58. data/lir.cfg +42 -41
  59. data/test/attendee/ts_abbreviator.rb +3 -5
  60. data/test/attendee/ts_decomposer.rb +3 -5
  61. data/test/attendee/ts_multi_worder.rb +87 -145
  62. data/test/attendee/ts_noneword_filter.rb +5 -3
  63. data/test/attendee/ts_object_filter.rb +5 -3
  64. data/test/attendee/ts_sequencer.rb +3 -5
  65. data/test/attendee/ts_stemmer.rb +309 -0
  66. data/test/attendee/ts_synonymer.rb +15 -11
  67. data/test/attendee/ts_text_reader.rb +12 -15
  68. data/test/attendee/ts_text_writer.rb +24 -29
  69. data/test/attendee/ts_tokenizer.rb +9 -7
  70. data/test/attendee/ts_variator.rb +4 -4
  71. data/test/attendee/ts_vector_filter.rb +24 -16
  72. data/test/attendee/ts_word_searcher.rb +20 -36
  73. data/test/{lir.csv → lir.vec} +0 -0
  74. data/test/ref/artikel.vec +943 -943
  75. data/test/ref/artikel.ven +943 -943
  76. data/test/ref/lir.non +201 -201
  77. data/test/ref/lir.seq +178 -178
  78. data/test/ref/lir.syn +49 -49
  79. data/test/ref/lir.vec +329 -0
  80. data/test/test_helper.rb +20 -36
  81. data/test/ts_database.rb +10 -10
  82. data/test/ts_language.rb +279 -319
  83. metadata +93 -104
  84. data/info/Objekte.png +0 -0
  85. data/info/Typen.png +0 -0
  86. data/info/database.png +0 -0
  87. data/info/db_small.png +0 -0
  88. data/info/download.png +0 -0
  89. data/info/kerze.png +0 -0
  90. data/info/language.png +0 -0
  91. data/info/lingo.png +0 -0
  92. data/info/logo.png +0 -0
  93. data/info/meeting.png +0 -0
  94. data/info/types.png +0 -0
  95. data/lingo-all.cfg +0 -89
  96. data/porter/stem.cfg +0 -311
  97. data/porter/stem.rb +0 -150
  98. data/test/ref/lir.csv +0 -329
  99. data/test.cfg +0 -79
@@ -29,7 +29,7 @@ class Lingo
29
29
  module Language
30
30
 
31
31
  # Die Klasse Grammar beinhaltet grammatikalische Spezialitäten einer Sprache. Derzeit findet die
32
- # Kompositumerkennung hier ihren Platz, die mit der Methode find_compositum aufgerufen werden kann.
32
+ # Kompositumerkennung hier ihren Platz, die mit der Methode find_compound aufgerufen werden kann.
33
33
  # Die Klasse Grammar wird genau wie ein Dictionary initialisiert. Das bei der Initialisierung angegebene Wörterbuch ist Grundlage
34
34
  # für die Erkennung der Kompositumteile.
35
35
 
@@ -40,31 +40,26 @@ class Lingo
40
40
 
41
41
  HYPHEN_RE = %r{\A(.+)-([^-]+)\z}
42
42
 
43
- # initialize(config, dictionary_config) -> _Grammar_
44
- # config = Attendee-spezifische Parameter
45
- # dictionary_config = Datenbankkonfiguration aus de.lang
43
+ def self.open(*args)
44
+ yield grammar = new(*args)
45
+ ensure
46
+ grammar.close if grammar
47
+ end
48
+
46
49
  def initialize(config, lingo)
47
50
  init_cachable
48
51
  init_reportable
49
52
 
50
53
  @dic, @suggestions = Dictionary.new(config, lingo), []
51
54
 
52
- cfg = lingo.dictionary_config['compositum']
53
-
54
- # Ein Wort muss mindestens 8 Zeichen lang sein, damit
55
- # überhaupt eine Prüfung stattfindet.
56
- @min_word_size = (cfg['min-word-size'] || 8).to_i
57
-
58
- # Die durchschnittliche Länge der Kompositum-Wortteile
59
- # muss mindestens 4 Zeichen lang sein, sonst ist es kein
60
- # gültiges Kompositum.
61
- @min_avg_part_size = (cfg['min-avg-part-size'] || 4).to_i
62
-
63
- # Der kürzeste Kompositum-Wortteil muss mindestens 1 Zeichen lang sein
64
- @min_part_size = (cfg['min-part-size'] || 1).to_i
55
+ cfg = lingo.dictionary_config['compound'] ||
56
+ lingo.dictionary_config['compositum'] # DEPRECATE compositum
65
57
 
66
- # Ein Kompositum darf aus höchstens 4 Wortteilen bestehen
67
- @max_parts = (cfg['max-parts'] || 4).to_i
58
+ {
59
+ min_word_size: 8, min_avg_part_size: 4, min_part_size: 1, max_parts: 4
60
+ }.each { |k, v|
61
+ instance_variable_set("@#{k}", cfg.fetch(k.to_s.tr('_', '-'), v).to_i)
62
+ }
68
63
 
69
64
  # Die Wortklasse eines Kompositum-Wortteils kann separat gekennzeichnet
70
65
  # werden, um sie von Wortklassen normaler Wörter unterscheiden zu
@@ -75,7 +70,7 @@ class Lingo
75
70
  # Bestimmte Sequenzen können als ungültige Komposita erkannt werden,
76
71
  # z.B. ist ein Kompositum aus zwei Adjetiven kein Kompositum, also
77
72
  # skip-sequence = 'aa'
78
- @sequences = cfg.fetch('skip-sequences', []).map(&:downcase)
73
+ @sequences = cfg.fetch('skip-sequences', []).map!(&:downcase)
79
74
  end
80
75
 
81
76
  def close
@@ -86,12 +81,12 @@ class Lingo
86
81
  super.update(@dic.report)
87
82
  end
88
83
 
89
- # find_compositum(str) -> word wenn level=1
90
- # find_compositum(str) -> [lex, sta] wenn level!=1
84
+ # find_compound(str) -> word wenn level=1
85
+ # find_compound(str) -> [lex, sta] wenn level!=1
91
86
  #
92
- # find_compositum arbeitet in verschiedenen Leveln, da die Methode auch rekursiv aufgerufen wird. Ein Level größer 1
87
+ # find_compound arbeitet in verschiedenen Leveln, da die Methode auch rekursiv aufgerufen wird. Ein Level größer 1
93
88
  # entspricht daher einem rekursiven Aufruf
94
- def find_compositum(str, level = 1, tail = false)
89
+ def find_compound(str, level = 1, tail = false)
95
90
  key, top, empty = str.downcase, level == 1, [[], [], '']
96
91
 
97
92
  if top && hit?(key)
@@ -108,16 +103,21 @@ class Lingo
108
103
 
109
104
  inc('Komposita geprüft')
110
105
 
111
- res = permute_compositum(key, level, tail)
112
- val = !(lex = res.first).empty? && valid?(str, *res[1..-1])
106
+ lex, sta, seq = res = permute_compound(key, level, tail)
107
+
108
+ val = !lex.empty? &&
109
+ sta.size <= @max_parts &&
110
+ sta.min >= @min_part_size &&
111
+ str.length / sta.size >= @min_avg_part_size &&
112
+ (@sequences.empty? || !@sequences.include?(seq))
113
113
 
114
114
  if top
115
115
  if val
116
116
  inc('Komposita erkannt')
117
117
 
118
- com.attr = WA_KOMPOSITUM
118
+ com.attr = WA_COMPOUND
119
119
  com.lexicals = lex.map { |l|
120
- l.attr == LA_KOMPOSITUM ? l :
120
+ l.attr == LA_COMPOUND ? l :
121
121
  Lexical.new(l.form, l.attr + @append_wc)
122
122
  }
123
123
  end
@@ -128,14 +128,14 @@ class Lingo
128
128
  end
129
129
  end
130
130
 
131
- # permute_compositum( _aString_ ) -> [lex, sta, seq]
132
- def permute_compositum(str, level, tail)
133
- return test_compositum($1, '-', $2, level, tail) if str =~ HYPHEN_RE
131
+ # permute_compound( _aString_ ) -> [lex, sta, seq]
132
+ def permute_compound(str, level = 1, tail = false)
133
+ return test_compound($1, '-', $2, level, tail) if str =~ HYPHEN_RE
134
134
 
135
135
  sug, len = @suggestions[level] ||= [], str.length
136
136
 
137
137
  1.upto(len - 1) { |i|
138
- res = test_compositum(str[0, i], '', str[i, len], level, tail)
138
+ res = test_compound(str[0, i], '', str[i, len], level, tail)
139
139
 
140
140
  unless (lex = res.first).empty?
141
141
  return res unless lex.last.attr == LA_TAKEITASIS
@@ -146,10 +146,10 @@ class Lingo
146
146
  sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear }
147
147
  end
148
148
 
149
- # test_compositum() -> [lex, sta, seq]
149
+ # test_compound() -> [lex, sta, seq]
150
150
  #
151
151
  # Testet einen definiert zerlegten String auf Kompositum
152
- def test_compositum(fstr, infix, bstr, level, tail)
152
+ def test_compound(fstr, infix, bstr, level = 1, tail = false)
153
153
  sta, seq, empty = [fstr.length, bstr.length], %w[? ?], [[], [], '']
154
154
 
155
155
  if !(blex = @dic.select_with_suffix(bstr)).sort!.empty?
@@ -159,10 +159,10 @@ class Lingo
159
159
  # 2. Word w/ infix, unless tail part
160
160
  bform, seq[1] = bstr, blex.first.attr
161
161
  elsif infix == '-'
162
- blex, bsta, bseq = find_compositum(bstr, level + 1, tail)
162
+ blex, bsta, bseq = find_compound(bstr, level + 1, tail)
163
163
 
164
164
  if !blex.sort!.empty?
165
- # 3. Compositum
165
+ # 3. Compound
166
166
  bform, seq[1], sta[1..-1] = blex.first.form, bseq, bsta
167
167
  else
168
168
  # 4. Take it as is
@@ -176,10 +176,10 @@ class Lingo
176
176
  # 1. Word w/ infix
177
177
  fform, seq[0] = fstr, flex.first.attr
178
178
  else
179
- flex, fsta, fseq = find_compositum(fstr, level + 1, true)
179
+ flex, fsta, fseq = find_compound(fstr, level + 1, true)
180
180
 
181
181
  if !flex.sort!.empty?
182
- # 2. Compositum
182
+ # 2. Compound
183
183
  fform, seq[0], sta[0..0] = flex.first.form, fseq, fsta
184
184
  elsif infix == '-'
185
185
  # 3. Take it as is
@@ -189,21 +189,12 @@ class Lingo
189
189
  end
190
190
  end
191
191
 
192
- flex.concat(blex).delete_if { |l| l.attr == LA_KOMPOSITUM }.
193
- push(Lexical.new(fform + infix + bform, LA_KOMPOSITUM)).sort!
192
+ flex.concat(blex).delete_if { |l| l.attr == LA_COMPOUND }.
193
+ push(Lexical.new(fform + infix + bform, LA_COMPOUND)).sort!
194
194
 
195
195
  [flex, sta, seq.join]
196
196
  end
197
197
 
198
- private
199
-
200
- def valid?(str, sta, seq)
201
- sta.size <= @max_parts &&
202
- sta.sort.first >= @min_part_size &&
203
- str.length / sta.size >= @min_avg_part_size &&
204
- (@sequences.empty? || !@sequences.include?(seq))
205
- end
206
-
207
198
  end
208
199
 
209
200
  end
@@ -39,14 +39,14 @@ class Lingo
39
39
  def <=>(other)
40
40
  return 1 unless other.is_a?(self.class)
41
41
 
42
- if attr == other.attr
42
+ a1, a2 = attr, other.attr
43
+
44
+ if a1 == a2
43
45
  form <=> other.form
44
46
  else
45
- attr.empty? ? 1 : other.attr.empty? ? -1 : begin
46
- a = LA_SORTORDER.index(attr)
47
- b = LA_SORTORDER.index(other.attr)
48
-
49
- a ? b ? b <=> a : -1 : b ? 1 : attr <=> other.attr
47
+ a1.empty? ? 1 : a2.empty? ? -1 : begin
48
+ i1, i2 = [a1, a2].map(&LA_SORTORDER.method(:index))
49
+ i1 ? i2 ? i2 <=> i1 : -1 : i2 ? 1 : a1 <=> a2
50
50
  end
51
51
  end
52
52
  end
@@ -37,6 +37,12 @@ class Lingo
37
37
  include Cachable
38
38
  include Reportable
39
39
 
40
+ def self.open(*args)
41
+ yield lexical_hash = new(*args)
42
+ ensure
43
+ lexical_hash.close if lexical_hash
44
+ end
45
+
40
46
  def initialize(id, lingo)
41
47
  init_cachable
42
48
  init_reportable(id)
@@ -33,8 +33,16 @@ class Lingo
33
33
 
34
34
  class Word < WordForm
35
35
 
36
- def self.new_lexical(form, attr, lex_attr)
37
- new(form, attr) << Lexical.new(form, lex_attr)
36
+ class << self
37
+
38
+ def new_lexicals(form, attr, lex)
39
+ new(form, attr) << lex
40
+ end
41
+
42
+ def new_lexical(form, attr, lex_attr)
43
+ new_lexicals(form, attr, Lexical.new(form, lex_attr))
44
+ end
45
+
38
46
  end
39
47
 
40
48
  # Exakte Representation der originären Zeichenkette, so wie sie im Satz
@@ -56,23 +64,32 @@ class Lingo
56
64
  end
57
65
 
58
66
  def lexicals(compound_parts = true)
59
- if !compound_parts && attr == WA_KOMPOSITUM
60
- @lexicals.select { |lex| lex.attr == LA_KOMPOSITUM }
67
+ if !compound_parts && attr == WA_COMPOUND
68
+ @lexicals.select { |lex| lex.attr == LA_COMPOUND }
61
69
  else
62
70
  @lexicals
63
71
  end
64
72
  end
65
73
 
66
- def lexicals=(lexis)
67
- if lexis.is_a?(Array)
68
- @lexicals = lexis.sort.uniq
74
+ def lexicals=(lex)
75
+ if lex.is_a?(Array)
76
+ @lexicals = lex.sort.uniq
69
77
  else
70
- raise TypeError, "wrong argument type #{lexis.class} (expected Array)"
78
+ raise TypeError, "wrong argument type #{lex.class} (expected Array)"
71
79
  end
72
80
  end
73
81
 
82
+ def add_lexicals(lex)
83
+ @lexicals.concat(lex)
84
+
85
+ @lexicals.sort!
86
+ @lexicals.uniq!
87
+
88
+ self
89
+ end
90
+
74
91
  def attrs(compound_parts = true)
75
- lexicals(compound_parts).map { |lex| lex.attr }
92
+ lexicals(compound_parts).map(&:attr)
76
93
  end
77
94
 
78
95
  def parts
@@ -100,15 +117,15 @@ class Lingo
100
117
  end
101
118
 
102
119
  def compo_form
103
- if attr == WA_KOMPOSITUM
104
- get_class(LA_KOMPOSITUM).first
105
- else
106
- nil
107
- end
120
+ get_class(LA_COMPOUND).first if attr == WA_COMPOUND
121
+ end
122
+
123
+ def full_compound?
124
+ attr == WA_COMPOUND && get_class('x+').empty?
108
125
  end
109
126
 
110
127
  def <<(*other)
111
- lexicals.concat(other.flatten)
128
+ lexicals.concat(other.tap(&:flatten!))
112
129
  self
113
130
  end
114
131
 
@@ -71,7 +71,7 @@ class Lingo
71
71
  end
72
72
 
73
73
  def eql?(other)
74
- self.class.equal?(other.class) && to_s == other.to_s
74
+ self.class.equal?(other.class) && (self <=> other) == 0
75
75
  end
76
76
 
77
77
  alias_method :==, :eql?
@@ -50,7 +50,7 @@ class Lingo
50
50
  # Status, wenn das Word nicht gefunden werden konnte
51
51
  WA_UNKNOWN = '?'
52
52
  # Wort ist als Kompositum erkannt worden
53
- WA_KOMPOSITUM = 'KOM'
53
+ WA_COMPOUND = 'KOM'
54
54
  # Wort ist eine Mehrwortgruppe
55
55
  WA_MULTIWORD = 'MUL'
56
56
  # Wort ist eine Mehrwortgruppe
@@ -58,31 +58,20 @@ class Lingo
58
58
  # Word ist unbekannt, jedoch Teil einer Mehrwortgruppe
59
59
  WA_UNKMULPART = 'MU?'
60
60
 
61
- LA_SUBSTANTIV = 's'
62
- LA_ADJEKTIV = 'a'
63
- LA_VERB = 'v'
64
- LA_EIGENNAME = 'e'
65
- LA_KOMPOSITUM = 'k'
66
- LA_MULTIWORD = 'm'
67
- LA_SEQUENCE = 'q'
68
- LA_WORTFORM = 'w'
69
- LA_SYNONYM = 'y'
70
- LA_STOPWORD = 't'
71
- LA_TAKEITASIS = 'x'
72
- LA_UNKNOWN = '?'
73
-
74
61
  LA_SORTORDER = [
75
- LA_MULTIWORD,
76
- LA_KOMPOSITUM,
77
- LA_SUBSTANTIV,
78
- LA_VERB,
79
- LA_ADJEKTIV,
80
- LA_EIGENNAME,
81
- LA_WORTFORM,
82
- LA_STOPWORD,
83
- LA_TAKEITASIS,
84
- LA_SYNONYM,
85
- LA_UNKNOWN
62
+ LA_SEQUENCE = 'q',
63
+ LA_MULTIWORD = 'm',
64
+ LA_COMPOUND = 'k',
65
+ LA_NOUN = 's',
66
+ LA_VERB = 'v',
67
+ LA_ADJECTIVE = 'a',
68
+ LA_NAME = 'e',
69
+ LA_WORDFORM = 'w',
70
+ LA_STOPWORD = 't',
71
+ LA_TAKEITASIS = 'x',
72
+ LA_SYNONYM = 'y',
73
+ LA_STEM = 'z',
74
+ LA_UNKNOWN = '?'
86
75
  ].reverse.join
87
76
 
88
77
  end
@@ -31,27 +31,29 @@ class Lingo
31
31
  module Reportable
32
32
 
33
33
  def init_reportable(prefix = nil)
34
- @counters, @prefix = Hash.new(0), prefix ? "#{prefix}: " : ''
34
+ @reportable_hash = Hash.new(0)
35
+ @reportable_prefix = prefix ? "#{prefix}: " : ''
35
36
  end
36
37
 
37
- def inc(counter)
38
- @counters[counter] += 1
38
+ def inc(key)
39
+ @reportable_hash[key] += 1
39
40
  end
40
41
 
41
- def add(counter, value)
42
- @counters[counter] += value
42
+ def add(key, val)
43
+ @reportable_hash[key] += val
43
44
  end
44
45
 
45
- def set(counter, value)
46
- @counters[counter] = value
46
+ def set(key, val)
47
+ @reportable_hash[key] = val
47
48
  end
48
49
 
49
- def get(counter)
50
- @counters[counter]
50
+ def get(key)
51
+ @reportable_hash[key]
51
52
  end
52
53
 
53
54
  def report
54
- @counters.each_with_object({}) { |(k, v), r| r["#{@prefix}#{k}"] = v }
55
+ q = @reportable_prefix
56
+ @reportable_hash.each_with_object({}) { |(k, v), r| r["#{q}#{k}"] = v }
55
57
  end
56
58
 
57
59
  end
@@ -0,0 +1,81 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ class ShowProgress
30
+
31
+ def initialize(obj, max, name = nil, doit = true, text = 'progress')
32
+ return yield self unless max && doit
33
+
34
+ @out = obj.instance_variable_get(:@lingo).config.stderr
35
+
36
+ # To get the length of the formatted string we have
37
+ # to actually substitute the placeholder.
38
+ fmt = ' [%3d%%]'
39
+ len = (fmt % 0).length
40
+
41
+ # Now we know how far to "go back" to
42
+ # overwrite the formatted string...
43
+ back = "\b" * len
44
+
45
+ @fmt = fmt + back
46
+ @clr = ' ' * len + back
47
+
48
+ print name, ': ' if name
49
+
50
+ @rat, @cnt, @next = max / 100.0, 0, 0
51
+ print text
52
+ step
53
+
54
+ yield self
55
+
56
+ print "#{@clr} done.\n"
57
+ end
58
+
59
+ def [](value)
60
+ if defined?(@cnt)
61
+ @cnt = value
62
+ step if @cnt >= @next
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def step
69
+ percent = @cnt / @rat
70
+ @next = (percent + 1) * @rat
71
+
72
+ print @fmt % percent if percent.finite?
73
+ end
74
+
75
+ def print(*args)
76
+ @out.print(*args)
77
+ end
78
+
79
+ end
80
+
81
+ end
data/lib/lingo/version.rb CHANGED
@@ -4,7 +4,7 @@ class Lingo
4
4
 
5
5
  MAJOR = 1
6
6
  MINOR = 8
7
- TINY = 1
7
+ TINY = 2
8
8
 
9
9
  class << self
10
10
 
data/lib/lingo.rb CHANGED
@@ -25,6 +25,8 @@
25
25
  #++
26
26
 
27
27
  require 'stringio'
28
+ require 'pathname'
29
+ require 'fileutils'
28
30
  require 'benchmark'
29
31
  require 'nuggets/file/ext'
30
32
  require 'nuggets/env/user_home'
@@ -43,7 +45,8 @@ class Lingo
43
45
  CURR = ENV['LINGO_CURR'] || '.'
44
46
 
45
47
  # The search path for Lingo dictionary and configuration files.
46
- PATH = ENV['LINGO_PATH'] || [CURR, HOME, BASE].join(File::PATH_SEPARATOR)
48
+ PATH = ENV['LINGO_PATH'].nil? ? [CURR, HOME, BASE] :
49
+ ENV['LINGO_PATH'].split(File::PATH_SEPARATOR)
47
50
 
48
51
  ENV['LINGO_PLUGIN_PATH'] ||= File.join(HOME, 'plugins')
49
52
 
@@ -59,7 +62,7 @@ class Lingo
59
62
  # Default encoding
60
63
  ENC = 'UTF-8'.freeze
61
64
 
62
- STRING_SEPARATOR_RE = %r{[; ,|]}
65
+ SEP_RE = %r{[; ,|]}
63
66
 
64
67
  class << self
65
68
 
@@ -79,7 +82,7 @@ class Lingo
79
82
  glob = File.join('??', glob) if type == :dict
80
83
 
81
84
  [].tap { |list| walk(path, options) { |dir|
82
- Dir[File.join(dir, glob)].sort.each { |file|
85
+ Dir[File.join(dir, glob)].sort!.each { |file|
83
86
  pn = Pathname.new(file)
84
87
  list << realpath_for(pn, path) if pn.file?
85
88
  }
@@ -110,29 +113,69 @@ class Lingo
110
113
  File.join(options_for(type)[:dir], basename(type, file))
111
114
  end
112
115
 
116
+ def append_path(*path)
117
+ include_path(path)
118
+ end
119
+
120
+ def prepend_path(*path)
121
+ include_path(path, true)
122
+ end
123
+
124
+ def get_const(name, klass = self)
125
+ klass.const_get(name.camelcase)
126
+ rescue NameError
127
+ raise NameNotFoundError.new(klass, name)
128
+ end
129
+
113
130
  private
114
131
 
132
+ def include_path(path, pre = false)
133
+ PATH.insert(pre ? 0 : -1, *path.map!(&:to_s))
134
+ end
135
+
115
136
  def find_file(file, path, options)
116
- pn = Pathname.new(file_with_ext(file, options)).cleanpath
137
+ if glob = options[:glob]
138
+ file = File.chomp_ext(file)
139
+ options[:ext] ||= '*'
140
+ end
141
+
142
+ file = file_with_ext(file, options)
143
+ pn = Pathname.new(file).cleanpath
117
144
 
118
145
  if pn.relative?
119
146
  walk(path, options) { |dir|
120
147
  pn2 = pn.expand_path(dir)
121
- pn = pn2 and break if pn2.exist?
148
+ ex = pn2.exist?
149
+
150
+ pn2 = Pathname.glob(pn2).first if glob && !ex
151
+ pn = pn2 and break if glob ? pn2 : ex
122
152
  }
123
153
  end
124
154
 
125
155
  realpath_for(pn, path)
156
+ rescue Errno::ENOENT
157
+ raise unless relax = options[:relax]
158
+ relax.respond_to?(:[]) ? relax[file] : file
126
159
  end
127
160
 
128
161
  def find_store(file, path, options)
129
- base = basename(:dict, find(:dict, file, path))
162
+ base = basename(:dict, find(:dict, file, path) {
163
+ raise SourceFileNotFoundError.new(nil, find_file(file, path,
164
+ options.merge(glob: true, relax: lambda { |_file|
165
+ raise SourceFileNotFoundError.new(file, _file)
166
+ })
167
+ ))
168
+ })
130
169
 
131
170
  walk(path.reverse, options, false) { |dir|
132
171
  Pathname.new(dir).ascend { |i|
133
- break true if i.file?
134
- return File.chomp_ext(File.join(dir, base)) if i.writable?
135
- break true if i.exist?
172
+ begin
173
+ stat = i.stat
174
+
175
+ break true if stat.file? || !stat.writable?
176
+ return File.chomp_ext(File.join(dir, base))
177
+ rescue Errno::ENOENT
178
+ end
136
179
  }
137
180
  }
138
181
 
@@ -148,7 +191,7 @@ class Lingo
148
191
  end
149
192
 
150
193
  def path_for(options)
151
- options[:path] || PATH.split(File::PATH_SEPARATOR)
194
+ options[:path] || PATH
152
195
  end
153
196
 
154
197
  def file_with_ext(file, options)
@@ -223,30 +266,25 @@ class Lingo
223
266
 
224
267
  list.each { |hash|
225
268
  # {'attendee' => {'name'=>'Attendee', 'in'=>'nase', 'out'=>'ohr', 'param'=>'hase'}}
226
- cfg = hash.values.first.merge('name' => hash.keys.first.camelcase)
269
+ cfg = hash.values.first.merge('name' => name = hash.keys.first.camelcase)
227
270
 
228
271
  %w[in out].each { |key| (cfg[key] ||= '').downcase! }
229
272
 
230
- cfg['in'] = last_link if cfg['in'].empty?
231
- cfg['out'] = "auto_link_out_#{auto_link += 1}" if cfg['out'].empty?
273
+ cfg['in'] = last_link if cfg['in'].empty?
274
+ cfg['out'] = "auto_link-#{auto_link += 1}" if cfg['out'].empty?
232
275
  last_link = cfg['out']
233
276
 
234
- data = config["language/attendees/#{cfg['name'].downcase}"]
235
- cfg.update(data) if data
277
+ cfg.update(config["language/attendees/#{name.downcase}"] || {})
236
278
 
237
- attendee = Attendee.const_get(cfg['name']).new(cfg, self)
238
- @attendees << attendee
279
+ @attendees << attendee = Attendee.const_get(name).new(cfg, self)
239
280
 
240
- cfg['in'].split(STRING_SEPARATOR_RE).each { |interest|
241
- subscriber[interest] << attendee
242
- }
243
- cfg['out'].split(STRING_SEPARATOR_RE).each { |theme|
244
- supplier[theme] << attendee
281
+ { 'in' => subscriber, 'out' => supplier }.each { |key, target|
282
+ cfg[key].split(SEP_RE).each { |ch| target[ch] << attendee }
245
283
  }
246
284
  }
247
285
 
248
- supplier.each { |channel, attendees| attendees.each { |attendee|
249
- attendee.add_subscriber(subscriber[channel])
286
+ supplier.each { |ch, attendees| attendees.each { |att|
287
+ att.add_subscriber(subscriber[ch])
250
288
  } }
251
289
  end
252
290
 
@@ -283,6 +321,7 @@ require_relative 'lingo/core_ext'
283
321
  require_relative 'lingo/cachable'
284
322
  require_relative 'lingo/reportable'
285
323
  require_relative 'lingo/agenda_item'
324
+ require_relative 'lingo/show_progress'
286
325
  require_relative 'lingo/database'
287
326
  require_relative 'lingo/language'
288
327
  require_relative 'lingo/attendee'