lingo 1.8.1 → 1.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. data/ChangeLog +23 -5
  2. data/README +1 -1
  3. data/Rakefile +5 -7
  4. data/TODO +2 -0
  5. data/bin/lingo +5 -1
  6. data/de.lang +1 -1
  7. data/en/lingo-syn.txt +0 -0
  8. data/en.lang +2 -1
  9. data/lib/lingo/attendee/abbreviator.rb +8 -9
  10. data/lib/lingo/attendee/debugger.rb +5 -4
  11. data/lib/lingo/attendee/decomposer.rb +8 -3
  12. data/lib/lingo/attendee/dehyphenizer.rb +19 -63
  13. data/lib/lingo/attendee/formatter.rb +1 -1
  14. data/lib/lingo/attendee/multi_worder.rb +67 -155
  15. data/lib/lingo/attendee/noneword_filter.rb +16 -9
  16. data/lib/lingo/attendee/object_filter.rb +1 -1
  17. data/lib/lingo/attendee/sequencer.rb +32 -63
  18. data/lib/lingo/attendee/stemmer/porter.rb +343 -0
  19. data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
  20. data/lib/lingo/attendee/synonymer.rb +10 -9
  21. data/lib/lingo/attendee/text_reader.rb +102 -76
  22. data/lib/lingo/attendee/text_writer.rb +23 -26
  23. data/lib/lingo/attendee/tokenizer.rb +13 -27
  24. data/lib/lingo/attendee/variator.rb +26 -66
  25. data/lib/lingo/attendee/vector_filter.rb +42 -43
  26. data/lib/lingo/attendee/word_searcher.rb +6 -7
  27. data/lib/lingo/attendee.rb +25 -7
  28. data/lib/lingo/buffered_attendee.rb +36 -10
  29. data/lib/lingo/cachable.rb +8 -8
  30. data/lib/lingo/config.rb +5 -6
  31. data/lib/lingo/ctl.rb +2 -3
  32. data/lib/lingo/database/crypter.rb +9 -26
  33. data/lib/lingo/database/gdbm_store.rb +3 -5
  34. data/lib/lingo/database/libcdb_store.rb +4 -6
  35. data/lib/lingo/database/sdbm_store.rb +11 -6
  36. data/lib/lingo/database/show_progress.rb +3 -43
  37. data/lib/lingo/database/source/key_value.rb +2 -6
  38. data/lib/lingo/database/source/multi_key.rb +3 -5
  39. data/lib/lingo/database/source/multi_value.rb +2 -6
  40. data/lib/lingo/database/source/single_word.rb +4 -6
  41. data/lib/lingo/database/source/word_class.rb +4 -10
  42. data/lib/lingo/database/source.rb +20 -18
  43. data/lib/lingo/database.rb +84 -59
  44. data/lib/lingo/error.rb +57 -1
  45. data/lib/lingo/language/dictionary.rb +21 -18
  46. data/lib/lingo/language/grammar.rb +40 -49
  47. data/lib/lingo/language/lexical.rb +6 -6
  48. data/lib/lingo/language/lexical_hash.rb +6 -0
  49. data/lib/lingo/language/word.rb +32 -15
  50. data/lib/lingo/language/word_form.rb +1 -1
  51. data/lib/lingo/language.rb +14 -25
  52. data/lib/lingo/reportable.rb +12 -10
  53. data/lib/lingo/show_progress.rb +81 -0
  54. data/lib/lingo/version.rb +1 -1
  55. data/lib/lingo.rb +63 -24
  56. data/lingo-call.cfg +6 -10
  57. data/lingo.cfg +60 -44
  58. data/lir.cfg +42 -41
  59. data/test/attendee/ts_abbreviator.rb +3 -5
  60. data/test/attendee/ts_decomposer.rb +3 -5
  61. data/test/attendee/ts_multi_worder.rb +87 -145
  62. data/test/attendee/ts_noneword_filter.rb +5 -3
  63. data/test/attendee/ts_object_filter.rb +5 -3
  64. data/test/attendee/ts_sequencer.rb +3 -5
  65. data/test/attendee/ts_stemmer.rb +309 -0
  66. data/test/attendee/ts_synonymer.rb +15 -11
  67. data/test/attendee/ts_text_reader.rb +12 -15
  68. data/test/attendee/ts_text_writer.rb +24 -29
  69. data/test/attendee/ts_tokenizer.rb +9 -7
  70. data/test/attendee/ts_variator.rb +4 -4
  71. data/test/attendee/ts_vector_filter.rb +24 -16
  72. data/test/attendee/ts_word_searcher.rb +20 -36
  73. data/test/{lir.csv → lir.vec} +0 -0
  74. data/test/ref/artikel.vec +943 -943
  75. data/test/ref/artikel.ven +943 -943
  76. data/test/ref/lir.non +201 -201
  77. data/test/ref/lir.seq +178 -178
  78. data/test/ref/lir.syn +49 -49
  79. data/test/ref/lir.vec +329 -0
  80. data/test/test_helper.rb +20 -36
  81. data/test/ts_database.rb +10 -10
  82. data/test/ts_language.rb +279 -319
  83. metadata +93 -104
  84. data/info/Objekte.png +0 -0
  85. data/info/Typen.png +0 -0
  86. data/info/database.png +0 -0
  87. data/info/db_small.png +0 -0
  88. data/info/download.png +0 -0
  89. data/info/kerze.png +0 -0
  90. data/info/language.png +0 -0
  91. data/info/lingo.png +0 -0
  92. data/info/logo.png +0 -0
  93. data/info/meeting.png +0 -0
  94. data/info/types.png +0 -0
  95. data/lingo-all.cfg +0 -89
  96. data/porter/stem.cfg +0 -311
  97. data/porter/stem.rb +0 -150
  98. data/test/ref/lir.csv +0 -329
  99. data/test.cfg +0 -79
@@ -82,74 +82,73 @@ class Lingo
82
82
  protected
83
83
 
84
84
  def init
85
- @lexis = Regexp.new(get_key('lexicals', '[sy]').downcase)
86
- @sort = get_key('sort', 'normal').downcase
87
- @skip = get_array('skip', TA_PUNCTUATION+','+TA_OTHER).collect {|s| s.upcase }
88
- @vectors = Array.new
89
- @word_count = 0
90
-
91
85
  if @debug = get_key('debug', false)
92
86
  @prompt = get_key('prompt', 'lex:) ')
87
+ else
88
+ @lex = Regexp.new(get_key('lexicals', '[sy]').downcase)
89
+ @skip = get_array('skip', DEFAULT_SKIP, :upcase)
90
+
91
+ if sort = get_key('sort', 'normal')
92
+ @sort_format, @sort_method = sort.downcase.split('_', 2)
93
+ end
93
94
  end
95
+
96
+ @vectors, @word_count = [], 0.0
94
97
  end
95
98
 
96
- def control(cmd, par)
99
+ def control(cmd, param)
97
100
  case cmd
98
101
  when STR_CMD_EOL
99
102
  skip_command
100
103
  when STR_CMD_FILE, STR_CMD_RECORD, STR_CMD_EOF
101
- @debug ? @vectors.each(&method(:forward)) : sendVector
102
- @vectors.clear
104
+ send_vectors unless @vectors.empty?
103
105
  end
104
106
  end
105
107
 
106
108
  def process(obj)
107
109
  if @debug
108
- @vectors << "#{@prompt} #{obj.inspect}" if eval(@debug)
109
- elsif obj.is_a?(Word)
110
- @word_count += 1 if @skip.index(obj.attr).nil?
111
- unless obj.lexicals.nil?
112
- lexis = obj.get_class(@lexis) #lexicals.collect { |lex| (lex.attr =~ @lexis) ? lex : nil }.compact # get_class(@lexis)
113
- lexis.each { |lex| @vectors << lex.form.downcase }
114
- add('Anzahl von Vektor-Wörtern', lexis.size)
115
- end
110
+ forward("#{@prompt} #{obj.inspect}") if eval(@debug)
111
+ elsif obj.is_a?(Word) && !@skip.include?(obj.attr)
112
+ @word_count += 1
113
+
114
+ cnt = obj.get_class(@lex).each { |lex|
115
+ vec = lex.form.downcase
116
+ @sort_format ? @vectors << vec : forward(vec)
117
+ }.size
118
+
119
+ add('Anzahl von Vektor-Wörtern', cnt)
116
120
  end
117
121
  end
118
122
 
119
123
  private
120
124
 
121
- def sendVector
122
- return if @vectors.size==0
123
-
125
+ def send_vectors
124
126
  add('Objekte gefiltert', @vectors.size)
125
127
 
126
- # Array der Vector-Wörter zählen und nach Häufigkeit sortieren
127
- if @sort=='normal'
128
- @vectors = @vectors.compact.sort.uniq
128
+ if @sort_format == 'normal'
129
+ @vectors.sort!
130
+ @vectors.uniq!
131
+
132
+ @vectors.each(&method(:forward)).clear
129
133
  else
130
- cnt = Hash.new(0)
131
- @vectors.compact.each { |e| cnt[e]+=1 }
132
- @vectors = cnt.to_a.sort { |x,y|
133
- if (y[1]<=>x[1])==0
134
- x[0]<=>y[0]
135
- else
136
- y[1]<=>x[1]
137
- end
138
- }
139
- end
134
+ cnt, fmt = Hash.new(0), '%d'
135
+
136
+ @vectors.each { |v| cnt[v] += 1 }.clear
137
+ vec = cnt.sort_by { |v, c| [-c, v] }
140
138
 
141
- # Vectoren je nach Parameter formatiert weiterleiten
142
- @vectors.collect { |vec|
143
- case @sort
144
- when 'term_abs' then sprintf "%d %s", vec[1], vec[0]
145
- when 'term_rel' then sprintf "%6.5f %s", vec[1].to_f/@word_count, vec[0]
146
- when 'sto_abs' then sprintf "%s {%d}", vec[0], vec[1]
147
- when 'sto_rel' then sprintf "%s {%6.5f}", vec[0], vec[1].to_f/@word_count
148
- else sprintf "%s", vec
139
+ if @sort_method == 'rel'
140
+ vec.each { |v| v[1] /= @word_count }
141
+ fmt = '%6.5f'
149
142
  end
150
- }.each(&method(:forward))
151
143
 
152
- @word_count = 0 if @sort == 'sto_rel'
144
+ if @sort_format == 'sto'
145
+ fmt, @word_count = "%s {#{fmt}}", 0.0
146
+ else
147
+ fmt.insert(1, '2$') << ' %1$s'
148
+ end
149
+
150
+ vec.each { |v| forward(fmt % v) }
151
+ end
153
152
  end
154
153
 
155
154
  end
@@ -71,19 +71,18 @@ class Lingo
71
71
  set_dic
72
72
  end
73
73
 
74
- def control(cmd, par)
75
- @dic.report.each_pair { |key, value|
76
- set(key, value)
77
- } if cmd == STR_CMD_STATUS
74
+ def control(cmd, param)
75
+ report_on(cmd, @dic)
78
76
  end
79
77
 
80
78
  def process(obj)
81
79
  if obj.is_a?(Token) && obj.attr == TA_WORD
82
80
  inc('Anzahl gesuchter Wörter')
83
- word = @dic.find_word(obj.form)
84
- inc('Anzahl gefundener Wörter') unless word.unknown?
85
- obj = word
81
+
82
+ obj = @dic.find_word(obj.form)
83
+ inc('Anzahl gefundener Wörter') unless obj.unknown?
86
84
  end
85
+
87
86
  forward(obj)
88
87
  end
89
88
 
@@ -24,6 +24,8 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
+ require 'nuggets/string/evaluate'
28
+
27
29
  class Lingo
28
30
 
29
31
  # Lingo ist als universelles Indexierungssystem entworfen worden. Seine Stärke liegt in der einfachen Konfigurierbarkeit für
@@ -80,6 +82,8 @@ class Lingo
80
82
  STA_TIM_COMMANDS = 'Time to control '
81
83
  STA_TIM_OBJECTS = 'Time to process '
82
84
 
85
+ DEFAULT_SKIP = [TA_PUNCTUATION, TA_OTHER].join(',')
86
+
83
87
  def initialize(config, lingo)
84
88
  @lingo = lingo
85
89
 
@@ -129,6 +133,15 @@ class Lingo
129
133
 
130
134
  private
131
135
 
136
+ def find_word(f, d = @dic, g = @gra)
137
+ w = d.find_word(f)
138
+ g && (block_given? ? !yield(w) : w.unknown?) ? g.find_compound(f) : w
139
+ end
140
+
141
+ def report_on(cmd, *rep)
142
+ rep.each { |r| r.report.each { |q| set(*q) } } if cmd == STR_CMD_STATUS
143
+ end
144
+
132
145
  def sta_for(key)
133
146
  %w[NUM TIM].map { |i| self.class.const_get("STA_#{i}_#{key.upcase}") }
134
147
  end
@@ -139,9 +152,9 @@ class Lingo
139
152
 
140
153
  return yield unless @lingo.report_time
141
154
 
142
- @timer = Time.new
155
+ @timer = Time.now.to_i
143
156
  res = yield
144
- add(t, Time.new - @timer)
157
+ add(t, Time.now.to_i - @timer)
145
158
  res
146
159
  end
147
160
 
@@ -184,7 +197,7 @@ class Lingo
184
197
  })
185
198
  }
186
199
 
187
- @lingo.warn msg % arg
200
+ warn msg % arg
188
201
  end
189
202
 
190
203
  def report_status
@@ -192,8 +205,8 @@ class Lingo
192
205
 
193
206
  msg = "Attendee <%s> was connected from '%s' to '%s' reporting..."
194
207
 
195
- @lingo.warn msg % @config.values_at(*%w[name in out]),
196
- nil, report.sort.map { |k, v| " #{k} = #{v}" }, nil
208
+ warn msg % @config.values_at(*%w[name in out]), nil,
209
+ report.sort.map! { |k, v| " #{k} = #{v}" }, nil
197
210
  end
198
211
 
199
212
  def skip_command
@@ -217,8 +230,8 @@ class Lingo
217
230
  @config.fetch(key, default)
218
231
  end
219
232
 
220
- def get_array(key, default = nil)
221
- get_key(key, default).split(STRING_SEPARATOR_RE)
233
+ def get_array(key, default = nil, m = nil)
234
+ get_key(key, default).split(SEP_RE).tap { |ary| ary.map!(&m) if m }
222
235
  end
223
236
 
224
237
  def dictionary(src, mod)
@@ -237,6 +250,10 @@ class Lingo
237
250
  @gra = grammar(get_array('source'), get_key('mode', 'all'))
238
251
  end
239
252
 
253
+ def warn(*msg)
254
+ @lingo.warn(*msg)
255
+ end
256
+
240
257
  end
241
258
 
242
259
  end
@@ -252,6 +269,7 @@ require_relative 'attendee/noneword_filter'
252
269
  require_relative 'attendee/object_filter'
253
270
  require_relative 'attendee/variator'
254
271
  require_relative 'attendee/sequencer'
272
+ require_relative 'attendee/stemmer'
255
273
  require_relative 'attendee/synonymer'
256
274
  require_relative 'attendee/text_reader'
257
275
  require_relative 'attendee/text_writer'
@@ -28,8 +28,6 @@ class Lingo
28
28
 
29
29
  class BufferedAttendee < Attendee
30
30
 
31
- BufferInsert = Struct.new(:position, :object)
32
-
33
31
  def initialize(config, lingo)
34
32
  @buffer, @inserts = [], []
35
33
  super
@@ -38,30 +36,58 @@ class Lingo
38
36
  protected
39
37
 
40
38
  def process(obj)
41
- @buffer.push(obj)
39
+ @buffer << obj
42
40
  process_buffer if process_buffer?
43
41
  end
44
42
 
45
43
  private
46
44
 
47
- def forward_buffer
48
- @inserts.sort_by!(&:position).each { |i|
49
- @buffer.insert(i.position, i.object)
50
- }.clear
45
+ def form_at(index, klass = WordForm)
46
+ obj = @buffer[index]
47
+ obj.form if obj.is_a?(klass)
48
+ end
51
49
 
50
+ def forward_buffer
51
+ @inserts.sort_by!(&:first).each { |i| @buffer.insert(*i) }.clear
52
52
  @buffer.each(&method(:forward)).clear
53
53
  end
54
54
 
55
+ def forward_number_of_token(len = default = @buffer.size, punct = !default)
56
+ begin
57
+ unless @buffer.empty?
58
+ forward(item = @buffer.delete_at(0))
59
+ len -= 1 unless punct && item.form == CHAR_PUNCT
60
+ end
61
+ end while len > 0
62
+ end
63
+
64
+ def valid_tokens_in_buffer
65
+ @buffer.count { |item| item.form != CHAR_PUNCT }
66
+ end
67
+
55
68
  def process_buffer?
56
- true
69
+ !instance_variable_defined?(:@expected_tokens_in_buffer) ||
70
+ valid_tokens_in_buffer >= @expected_tokens_in_buffer
57
71
  end
58
72
 
59
73
  def process_buffer
60
74
  raise NotImplementedError
61
75
  end
62
76
 
63
- def deferred_insert(pos, obj)
64
- @inserts << BufferInsert.new(pos, obj)
77
+ def control_multi(cmd, dic = @dic)
78
+ report_on(cmd, dic)
79
+
80
+ if [STR_CMD_RECORD, STR_CMD_EOF].include?(cmd)
81
+ @eof_handling = true
82
+
83
+ while valid_tokens_in_buffer > 1
84
+ process_buffer
85
+ end
86
+
87
+ forward_number_of_token
88
+
89
+ @eof_handling = false
90
+ end
65
91
  end
66
92
 
67
93
  end
@@ -31,26 +31,26 @@ class Lingo
31
31
  module Cachable
32
32
 
33
33
  def init_cachable
34
- @cache = Hash.new(false)
34
+ @cachable_hash = Hash.new(false)
35
35
  end
36
36
 
37
37
  def hit?(key)
38
- @cache.has_key?(key)
38
+ @cachable_hash.has_key?(key)
39
39
  end
40
40
 
41
- def store(key, value)
42
- @cache[key] = cache_value(value)
43
- value
41
+ def store(key, val)
42
+ @cachable_hash[key] = cache_value(val)
43
+ val
44
44
  end
45
45
 
46
46
  def retrieve(key)
47
- cache_value(@cache[key])
47
+ cache_value(@cachable_hash[key])
48
48
  end
49
49
 
50
50
  private
51
51
 
52
- def cache_value(value)
53
- value.nil? ? nil : value.dup
52
+ def cache_value(val)
53
+ val.dup unless val.nil?
54
54
  end
55
55
 
56
56
  end
data/lib/lingo/config.rb CHANGED
@@ -41,13 +41,12 @@ class Lingo
41
41
  load_config('config')
42
42
 
43
43
  Array(self['meeting/attendees']).each { |a|
44
- r = a['text_reader'] || a['textreader'] or next
44
+ r = a['text_reader'] || a['textreader'] or next # DEPRECATE textreader
45
45
 
46
46
  f = @cli.files
47
47
 
48
48
  if i = r['files']
49
- r['files'] = i.strip == '$(files)' ?
50
- f : i.split(STRING_SEPARATOR_RE)
49
+ r['files'] = i.strip == '$(files)' ? f : i.split(SEP_RE)
51
50
  elsif !f.empty?
52
51
  r['files'] = f
53
52
  end
@@ -57,12 +56,12 @@ class Lingo
57
56
  end
58
57
 
59
58
  def [](key)
60
- key_to_nodes(key).inject(@opts) { |value, node| value[node] }
59
+ key_to_nodes(key).inject(@opts) { |hash, node| hash[node] }
61
60
  end
62
61
 
63
- def []=(key, value)
62
+ def []=(key, val)
64
63
  nodes = key_to_nodes(key); node = nodes.pop
65
- (self[nodes_to_key(nodes)] ||= {})[node] = value
64
+ (self[nodes_to_key(nodes)] ||= {})[node] = val
66
65
  end
67
66
 
68
67
  def stdin
data/lib/lingo/ctl.rb CHANGED
@@ -25,7 +25,6 @@
25
25
  #++
26
26
 
27
27
  require 'optparse'
28
- require 'fileutils'
29
28
 
30
29
  class Lingo
31
30
 
@@ -88,7 +87,7 @@ Usage: #{PROG} <command> [arguments] [options]
88
87
  #{PROG} [-h|--help] [--version]
89
88
  EOT
90
89
 
91
- def do
90
+ def ctl
92
91
  parse_options
93
92
  send("do_#{ALIASES[ARGV.shift]}")
94
93
  end
@@ -230,7 +229,7 @@ EOT
230
229
  end
231
230
 
232
231
  def self.ctl
233
- Ctl.do
232
+ Ctl.ctl
234
233
  rescue => err
235
234
  raise if $VERBOSE
236
235
  abort "#{err.backtrace.first}: #{err} (#{err.class})"
@@ -24,6 +24,8 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
+ require 'digest/sha1'
28
+
27
29
  class Lingo
28
30
 
29
31
  class Database
@@ -39,35 +41,16 @@ class Lingo
39
41
  end
40
42
 
41
43
  def encode(key, val)
42
- hex = ''
43
-
44
- crypt(key, val).each_byte { |byte|
45
- # To get a hex representation for a char we just utilize
46
- # the quotient and the remainder of division by base 16.
47
- q, r = byte.divmod(16)
48
- hex << HEX_CHARS[q] << HEX_CHARS[r]
49
- }
50
-
51
- [digest(key), hex]
44
+ [digest(key), crypt(key, val).each_byte.with_object('') { |b, s|
45
+ b.divmod(16).each { |i| s << HEX_CHARS[i] }
46
+ }]
52
47
  end
53
48
 
54
49
  def decode(key, val)
55
- str, q, first = '', 0, false
56
-
57
- val.each_byte { |byte|
58
- byte = byte.chr(ENC)
59
-
60
- # Our hex chars are 2 bytes wide, so we have to keep track
61
- # of whether it's the first or the second of the two.
62
- if first = !first
63
- q = HEX_CHARS.index(byte)
64
- else
65
- # Now we got both parts, so let's revert the divmod(16)
66
- str << q * 16 + HEX_CHARS.index(byte)
67
- end
68
- }
69
-
70
- crypt(key, str)
50
+ crypt(key, val.each_byte.each_slice(2).with_object('') { |b, s|
51
+ q, r = b.map { |i| HEX_CHARS.index(i.chr(ENC)) }
52
+ s << q * 16 + r
53
+ })
71
54
  end
72
55
 
73
56
  private
@@ -32,14 +32,12 @@ class Lingo
32
32
 
33
33
  module GDBMStore
34
34
 
35
- private
35
+ Database.register(self, 'db')
36
36
 
37
- def store_ext
38
- '.db'
39
- end
37
+ private
40
38
 
41
39
  def _open
42
- GDBM.open(@dbm_name)
40
+ GDBM.open(@stofile)
43
41
  end
44
42
 
45
43
  end
@@ -32,14 +32,12 @@ class Lingo
32
32
 
33
33
  module LibCDBStore
34
34
 
35
- private
35
+ Database.register(self, 'cdb')
36
36
 
37
- def store_ext
38
- '.cdb'
39
- end
37
+ private
40
38
 
41
39
  def create
42
- LibCDB::CDB.open(@dbm_name, 'w') { |db|
40
+ LibCDB::CDB.open(@stofile, 'w') { |db|
43
41
  @db = db
44
42
  yield
45
43
  }
@@ -48,7 +46,7 @@ class Lingo
48
46
  end
49
47
 
50
48
  def _open
51
- LibCDB::CDB.open(@dbm_name)
49
+ LibCDB::CDB.open(@stofile)
52
50
  end
53
51
 
54
52
  end
@@ -32,26 +32,31 @@ class Lingo
32
32
 
33
33
  module SDBMStore
34
34
 
35
+ Database.register(self, %w[dir pag], -1, false)
36
+
35
37
  private
36
38
 
37
39
  def uptodate?
38
- super(@dbm_name + '.pag')
40
+ super(@stofile + EXT.last)
39
41
  end
40
42
 
41
43
  def _clear
42
- File.delete(*Dir["#{@dbm_name}.{pag,dir}"])
44
+ File.delete(*Dir["#{@stofile}{#{EXT.join(',')}}"])
43
45
  end
44
46
 
45
47
  def _open
46
- SDBM.open(@dbm_name)
48
+ SDBM.open(@stofile)
49
+ end
50
+
51
+ def _get(key)
52
+ val = super
53
+ val && val.encode(ENC)
47
54
  end
48
55
 
49
56
  def _set(key, val)
50
57
  if val.length > 950
58
+ warn "Warning: Entry `#{key}' (#{@srcfile}) too long for SDBM. Truncating..."
51
59
  val = val[0, 950]
52
-
53
- @lingo.warn "Warning: Entry `#{key}' (#{@src_file})" <<
54
- 'too long for SDBM. Truncating...'
55
60
  end
56
61
 
57
62
  super
@@ -28,50 +28,10 @@ class Lingo
28
28
 
29
29
  class Database
30
30
 
31
- class ShowProgress
31
+ class ShowProgress < ShowProgress
32
32
 
33
- def initialize(src, max, act = true)
34
- @out, @act = src.instance_variable_get(:@lingo).config.stderr, act
35
-
36
- # To get the length of the formatted string we have
37
- # to actually substitute the placeholder.
38
- fmt = ' [%3d%%]'
39
- len = (fmt % 0).length
40
-
41
- # Now we know how far to "go back" to
42
- # overwrite the formatted string...
43
- back = "\b" * len
44
-
45
- @fmt = fmt + back
46
- @clr = ' ' * len + back
47
-
48
- print src.instance_variable_get(:@config)['name'], ': '
49
-
50
- @rat, @cnt, @next = max / 100.0, 0, 0
51
- print 'convert '
52
- step
53
-
54
- yield self
55
-
56
- print "#{@clr}ok\n"
57
- end
58
-
59
- def [](value)
60
- @cnt = value
61
- step if @cnt >= @next
62
- end
63
-
64
- private
65
-
66
- def step
67
- percent = @cnt / @rat
68
- @next = (percent + 1) * @rat
69
-
70
- print @fmt % percent
71
- end
72
-
73
- def print(*args)
74
- @out.print(*args) if @act
33
+ def initialize(obj, max, act = true)
34
+ super(obj, max, obj.instance_variable_get(:@config)['name'], act, 'convert')
75
35
  end
76
36
 
77
37
  end
@@ -39,18 +39,14 @@ class Lingo
39
39
 
40
40
  def initialize(id, lingo)
41
41
  super
42
-
43
- @separator = @config.fetch('separator', '*')
44
- @line_pattern = Regexp.new('^(' + @legal_word + ')' + Regexp.escape(@separator) + '(' + @legal_word + ')$')
42
+ @pat = /^(#{@wrd})#{Regexp.escape(@sep ||= '*')}(#{@wrd})$/
45
43
  end
46
44
 
47
45
  private
48
46
 
49
47
  def convert_line(line, key, val)
50
48
  key, val = key.strip, val.strip
51
- val = '' if key == val
52
- val = [val + '#' + @wordclass]
53
- [key, val]
49
+ [key, %W[#{val unless key == val}##{@def}]]
54
50
  end
55
51
 
56
52
  end
@@ -40,9 +40,7 @@ class Lingo
40
40
 
41
41
  def initialize(id, lingo)
42
42
  super
43
-
44
- @separator = @config.fetch('separator', ';')
45
- @line_pattern = Regexp.new('^' + @legal_word + '(?:' + Regexp.escape(@separator) + @legal_word + ')*$')
43
+ @pat = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/
46
44
  end
47
45
 
48
46
  def set(db, key, val)
@@ -52,8 +50,8 @@ class Lingo
52
50
  private
53
51
 
54
52
  def convert_line(line, key, val)
55
- values = line.split(@separator).map { |value| value.strip }
56
- [values[0], values[1..-1]]
53
+ values = line.split(@sep).each(&:strip!)
54
+ [values.shift, values]
57
55
  end
58
56
 
59
57
  end
@@ -38,11 +38,7 @@ class Lingo
38
38
 
39
39
  def initialize(id, lingo)
40
40
  super
41
-
42
- @separator = @config.fetch('separator', ';')
43
- @line_pattern = Regexp.new('^' + @legal_word + '(?:' + Regexp.escape(@separator) + @legal_word + ')*$')
44
-
45
- @idx = -1
41
+ @pat, @idx = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/, -1
46
42
  end
47
43
 
48
44
  def set(db, key, val)
@@ -53,7 +49,7 @@ class Lingo
53
49
  private
54
50
 
55
51
  def convert_line(line, key, val)
56
- [nil, line.split(@separator).map { |value| value.strip }]
52
+ [nil, line.split(@sep).each(&:strip!)]
57
53
  end
58
54
 
59
55
  end