lingo 1.8.1 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. data/ChangeLog +23 -5
  2. data/README +1 -1
  3. data/Rakefile +5 -7
  4. data/TODO +2 -0
  5. data/bin/lingo +5 -1
  6. data/de.lang +1 -1
  7. data/en/lingo-syn.txt +0 -0
  8. data/en.lang +2 -1
  9. data/lib/lingo/attendee/abbreviator.rb +8 -9
  10. data/lib/lingo/attendee/debugger.rb +5 -4
  11. data/lib/lingo/attendee/decomposer.rb +8 -3
  12. data/lib/lingo/attendee/dehyphenizer.rb +19 -63
  13. data/lib/lingo/attendee/formatter.rb +1 -1
  14. data/lib/lingo/attendee/multi_worder.rb +67 -155
  15. data/lib/lingo/attendee/noneword_filter.rb +16 -9
  16. data/lib/lingo/attendee/object_filter.rb +1 -1
  17. data/lib/lingo/attendee/sequencer.rb +32 -63
  18. data/lib/lingo/attendee/stemmer/porter.rb +343 -0
  19. data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
  20. data/lib/lingo/attendee/synonymer.rb +10 -9
  21. data/lib/lingo/attendee/text_reader.rb +102 -76
  22. data/lib/lingo/attendee/text_writer.rb +23 -26
  23. data/lib/lingo/attendee/tokenizer.rb +13 -27
  24. data/lib/lingo/attendee/variator.rb +26 -66
  25. data/lib/lingo/attendee/vector_filter.rb +42 -43
  26. data/lib/lingo/attendee/word_searcher.rb +6 -7
  27. data/lib/lingo/attendee.rb +25 -7
  28. data/lib/lingo/buffered_attendee.rb +36 -10
  29. data/lib/lingo/cachable.rb +8 -8
  30. data/lib/lingo/config.rb +5 -6
  31. data/lib/lingo/ctl.rb +2 -3
  32. data/lib/lingo/database/crypter.rb +9 -26
  33. data/lib/lingo/database/gdbm_store.rb +3 -5
  34. data/lib/lingo/database/libcdb_store.rb +4 -6
  35. data/lib/lingo/database/sdbm_store.rb +11 -6
  36. data/lib/lingo/database/show_progress.rb +3 -43
  37. data/lib/lingo/database/source/key_value.rb +2 -6
  38. data/lib/lingo/database/source/multi_key.rb +3 -5
  39. data/lib/lingo/database/source/multi_value.rb +2 -6
  40. data/lib/lingo/database/source/single_word.rb +4 -6
  41. data/lib/lingo/database/source/word_class.rb +4 -10
  42. data/lib/lingo/database/source.rb +20 -18
  43. data/lib/lingo/database.rb +84 -59
  44. data/lib/lingo/error.rb +57 -1
  45. data/lib/lingo/language/dictionary.rb +21 -18
  46. data/lib/lingo/language/grammar.rb +40 -49
  47. data/lib/lingo/language/lexical.rb +6 -6
  48. data/lib/lingo/language/lexical_hash.rb +6 -0
  49. data/lib/lingo/language/word.rb +32 -15
  50. data/lib/lingo/language/word_form.rb +1 -1
  51. data/lib/lingo/language.rb +14 -25
  52. data/lib/lingo/reportable.rb +12 -10
  53. data/lib/lingo/show_progress.rb +81 -0
  54. data/lib/lingo/version.rb +1 -1
  55. data/lib/lingo.rb +63 -24
  56. data/lingo-call.cfg +6 -10
  57. data/lingo.cfg +60 -44
  58. data/lir.cfg +42 -41
  59. data/test/attendee/ts_abbreviator.rb +3 -5
  60. data/test/attendee/ts_decomposer.rb +3 -5
  61. data/test/attendee/ts_multi_worder.rb +87 -145
  62. data/test/attendee/ts_noneword_filter.rb +5 -3
  63. data/test/attendee/ts_object_filter.rb +5 -3
  64. data/test/attendee/ts_sequencer.rb +3 -5
  65. data/test/attendee/ts_stemmer.rb +309 -0
  66. data/test/attendee/ts_synonymer.rb +15 -11
  67. data/test/attendee/ts_text_reader.rb +12 -15
  68. data/test/attendee/ts_text_writer.rb +24 -29
  69. data/test/attendee/ts_tokenizer.rb +9 -7
  70. data/test/attendee/ts_variator.rb +4 -4
  71. data/test/attendee/ts_vector_filter.rb +24 -16
  72. data/test/attendee/ts_word_searcher.rb +20 -36
  73. data/test/{lir.csv → lir.vec} +0 -0
  74. data/test/ref/artikel.vec +943 -943
  75. data/test/ref/artikel.ven +943 -943
  76. data/test/ref/lir.non +201 -201
  77. data/test/ref/lir.seq +178 -178
  78. data/test/ref/lir.syn +49 -49
  79. data/test/ref/lir.vec +329 -0
  80. data/test/test_helper.rb +20 -36
  81. data/test/ts_database.rb +10 -10
  82. data/test/ts_language.rb +279 -319
  83. metadata +93 -104
  84. data/info/Objekte.png +0 -0
  85. data/info/Typen.png +0 -0
  86. data/info/database.png +0 -0
  87. data/info/db_small.png +0 -0
  88. data/info/download.png +0 -0
  89. data/info/kerze.png +0 -0
  90. data/info/language.png +0 -0
  91. data/info/lingo.png +0 -0
  92. data/info/logo.png +0 -0
  93. data/info/meeting.png +0 -0
  94. data/info/types.png +0 -0
  95. data/lingo-all.cfg +0 -89
  96. data/porter/stem.cfg +0 -311
  97. data/porter/stem.rb +0 -150
  98. data/test/ref/lir.csv +0 -329
  99. data/test.cfg +0 -79
@@ -82,74 +82,73 @@ class Lingo
82
82
  protected
83
83
 
84
84
  def init
85
- @lexis = Regexp.new(get_key('lexicals', '[sy]').downcase)
86
- @sort = get_key('sort', 'normal').downcase
87
- @skip = get_array('skip', TA_PUNCTUATION+','+TA_OTHER).collect {|s| s.upcase }
88
- @vectors = Array.new
89
- @word_count = 0
90
-
91
85
  if @debug = get_key('debug', false)
92
86
  @prompt = get_key('prompt', 'lex:) ')
87
+ else
88
+ @lex = Regexp.new(get_key('lexicals', '[sy]').downcase)
89
+ @skip = get_array('skip', DEFAULT_SKIP, :upcase)
90
+
91
+ if sort = get_key('sort', 'normal')
92
+ @sort_format, @sort_method = sort.downcase.split('_', 2)
93
+ end
93
94
  end
95
+
96
+ @vectors, @word_count = [], 0.0
94
97
  end
95
98
 
96
- def control(cmd, par)
99
+ def control(cmd, param)
97
100
  case cmd
98
101
  when STR_CMD_EOL
99
102
  skip_command
100
103
  when STR_CMD_FILE, STR_CMD_RECORD, STR_CMD_EOF
101
- @debug ? @vectors.each(&method(:forward)) : sendVector
102
- @vectors.clear
104
+ send_vectors unless @vectors.empty?
103
105
  end
104
106
  end
105
107
 
106
108
  def process(obj)
107
109
  if @debug
108
- @vectors << "#{@prompt} #{obj.inspect}" if eval(@debug)
109
- elsif obj.is_a?(Word)
110
- @word_count += 1 if @skip.index(obj.attr).nil?
111
- unless obj.lexicals.nil?
112
- lexis = obj.get_class(@lexis) #lexicals.collect { |lex| (lex.attr =~ @lexis) ? lex : nil }.compact # get_class(@lexis)
113
- lexis.each { |lex| @vectors << lex.form.downcase }
114
- add('Anzahl von Vektor-Wörtern', lexis.size)
115
- end
110
+ forward("#{@prompt} #{obj.inspect}") if eval(@debug)
111
+ elsif obj.is_a?(Word) && !@skip.include?(obj.attr)
112
+ @word_count += 1
113
+
114
+ cnt = obj.get_class(@lex).each { |lex|
115
+ vec = lex.form.downcase
116
+ @sort_format ? @vectors << vec : forward(vec)
117
+ }.size
118
+
119
+ add('Anzahl von Vektor-Wörtern', cnt)
116
120
  end
117
121
  end
118
122
 
119
123
  private
120
124
 
121
- def sendVector
122
- return if @vectors.size==0
123
-
125
+ def send_vectors
124
126
  add('Objekte gefiltert', @vectors.size)
125
127
 
126
- # Array der Vector-Wörter zählen und nach Häufigkeit sortieren
127
- if @sort=='normal'
128
- @vectors = @vectors.compact.sort.uniq
128
+ if @sort_format == 'normal'
129
+ @vectors.sort!
130
+ @vectors.uniq!
131
+
132
+ @vectors.each(&method(:forward)).clear
129
133
  else
130
- cnt = Hash.new(0)
131
- @vectors.compact.each { |e| cnt[e]+=1 }
132
- @vectors = cnt.to_a.sort { |x,y|
133
- if (y[1]<=>x[1])==0
134
- x[0]<=>y[0]
135
- else
136
- y[1]<=>x[1]
137
- end
138
- }
139
- end
134
+ cnt, fmt = Hash.new(0), '%d'
135
+
136
+ @vectors.each { |v| cnt[v] += 1 }.clear
137
+ vec = cnt.sort_by { |v, c| [-c, v] }
140
138
 
141
- # Vectoren je nach Parameter formatiert weiterleiten
142
- @vectors.collect { |vec|
143
- case @sort
144
- when 'term_abs' then sprintf "%d %s", vec[1], vec[0]
145
- when 'term_rel' then sprintf "%6.5f %s", vec[1].to_f/@word_count, vec[0]
146
- when 'sto_abs' then sprintf "%s {%d}", vec[0], vec[1]
147
- when 'sto_rel' then sprintf "%s {%6.5f}", vec[0], vec[1].to_f/@word_count
148
- else sprintf "%s", vec
139
+ if @sort_method == 'rel'
140
+ vec.each { |v| v[1] /= @word_count }
141
+ fmt = '%6.5f'
149
142
  end
150
- }.each(&method(:forward))
151
143
 
152
- @word_count = 0 if @sort == 'sto_rel'
144
+ if @sort_format == 'sto'
145
+ fmt, @word_count = "%s {#{fmt}}", 0.0
146
+ else
147
+ fmt.insert(1, '2$') << ' %1$s'
148
+ end
149
+
150
+ vec.each { |v| forward(fmt % v) }
151
+ end
153
152
  end
154
153
 
155
154
  end
@@ -71,19 +71,18 @@ class Lingo
71
71
  set_dic
72
72
  end
73
73
 
74
- def control(cmd, par)
75
- @dic.report.each_pair { |key, value|
76
- set(key, value)
77
- } if cmd == STR_CMD_STATUS
74
+ def control(cmd, param)
75
+ report_on(cmd, @dic)
78
76
  end
79
77
 
80
78
  def process(obj)
81
79
  if obj.is_a?(Token) && obj.attr == TA_WORD
82
80
  inc('Anzahl gesuchter Wörter')
83
- word = @dic.find_word(obj.form)
84
- inc('Anzahl gefundener Wörter') unless word.unknown?
85
- obj = word
81
+
82
+ obj = @dic.find_word(obj.form)
83
+ inc('Anzahl gefundener Wörter') unless obj.unknown?
86
84
  end
85
+
87
86
  forward(obj)
88
87
  end
89
88
 
@@ -24,6 +24,8 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
+ require 'nuggets/string/evaluate'
28
+
27
29
  class Lingo
28
30
 
29
31
  # Lingo ist als universelles Indexierungssystem entworfen worden. Seine Stärke liegt in der einfachen Konfigurierbarkeit für
@@ -80,6 +82,8 @@ class Lingo
80
82
  STA_TIM_COMMANDS = 'Time to control '
81
83
  STA_TIM_OBJECTS = 'Time to process '
82
84
 
85
+ DEFAULT_SKIP = [TA_PUNCTUATION, TA_OTHER].join(',')
86
+
83
87
  def initialize(config, lingo)
84
88
  @lingo = lingo
85
89
 
@@ -129,6 +133,15 @@ class Lingo
129
133
 
130
134
  private
131
135
 
136
+ def find_word(f, d = @dic, g = @gra)
137
+ w = d.find_word(f)
138
+ g && (block_given? ? !yield(w) : w.unknown?) ? g.find_compound(f) : w
139
+ end
140
+
141
+ def report_on(cmd, *rep)
142
+ rep.each { |r| r.report.each { |q| set(*q) } } if cmd == STR_CMD_STATUS
143
+ end
144
+
132
145
  def sta_for(key)
133
146
  %w[NUM TIM].map { |i| self.class.const_get("STA_#{i}_#{key.upcase}") }
134
147
  end
@@ -139,9 +152,9 @@ class Lingo
139
152
 
140
153
  return yield unless @lingo.report_time
141
154
 
142
- @timer = Time.new
155
+ @timer = Time.now.to_i
143
156
  res = yield
144
- add(t, Time.new - @timer)
157
+ add(t, Time.now.to_i - @timer)
145
158
  res
146
159
  end
147
160
 
@@ -184,7 +197,7 @@ class Lingo
184
197
  })
185
198
  }
186
199
 
187
- @lingo.warn msg % arg
200
+ warn msg % arg
188
201
  end
189
202
 
190
203
  def report_status
@@ -192,8 +205,8 @@ class Lingo
192
205
 
193
206
  msg = "Attendee <%s> was connected from '%s' to '%s' reporting..."
194
207
 
195
- @lingo.warn msg % @config.values_at(*%w[name in out]),
196
- nil, report.sort.map { |k, v| " #{k} = #{v}" }, nil
208
+ warn msg % @config.values_at(*%w[name in out]), nil,
209
+ report.sort.map! { |k, v| " #{k} = #{v}" }, nil
197
210
  end
198
211
 
199
212
  def skip_command
@@ -217,8 +230,8 @@ class Lingo
217
230
  @config.fetch(key, default)
218
231
  end
219
232
 
220
- def get_array(key, default = nil)
221
- get_key(key, default).split(STRING_SEPARATOR_RE)
233
+ def get_array(key, default = nil, m = nil)
234
+ get_key(key, default).split(SEP_RE).tap { |ary| ary.map!(&m) if m }
222
235
  end
223
236
 
224
237
  def dictionary(src, mod)
@@ -237,6 +250,10 @@ class Lingo
237
250
  @gra = grammar(get_array('source'), get_key('mode', 'all'))
238
251
  end
239
252
 
253
+ def warn(*msg)
254
+ @lingo.warn(*msg)
255
+ end
256
+
240
257
  end
241
258
 
242
259
  end
@@ -252,6 +269,7 @@ require_relative 'attendee/noneword_filter'
252
269
  require_relative 'attendee/object_filter'
253
270
  require_relative 'attendee/variator'
254
271
  require_relative 'attendee/sequencer'
272
+ require_relative 'attendee/stemmer'
255
273
  require_relative 'attendee/synonymer'
256
274
  require_relative 'attendee/text_reader'
257
275
  require_relative 'attendee/text_writer'
@@ -28,8 +28,6 @@ class Lingo
28
28
 
29
29
  class BufferedAttendee < Attendee
30
30
 
31
- BufferInsert = Struct.new(:position, :object)
32
-
33
31
  def initialize(config, lingo)
34
32
  @buffer, @inserts = [], []
35
33
  super
@@ -38,30 +36,58 @@ class Lingo
38
36
  protected
39
37
 
40
38
  def process(obj)
41
- @buffer.push(obj)
39
+ @buffer << obj
42
40
  process_buffer if process_buffer?
43
41
  end
44
42
 
45
43
  private
46
44
 
47
- def forward_buffer
48
- @inserts.sort_by!(&:position).each { |i|
49
- @buffer.insert(i.position, i.object)
50
- }.clear
45
+ def form_at(index, klass = WordForm)
46
+ obj = @buffer[index]
47
+ obj.form if obj.is_a?(klass)
48
+ end
51
49
 
50
+ def forward_buffer
51
+ @inserts.sort_by!(&:first).each { |i| @buffer.insert(*i) }.clear
52
52
  @buffer.each(&method(:forward)).clear
53
53
  end
54
54
 
55
+ def forward_number_of_token(len = default = @buffer.size, punct = !default)
56
+ begin
57
+ unless @buffer.empty?
58
+ forward(item = @buffer.delete_at(0))
59
+ len -= 1 unless punct && item.form == CHAR_PUNCT
60
+ end
61
+ end while len > 0
62
+ end
63
+
64
+ def valid_tokens_in_buffer
65
+ @buffer.count { |item| item.form != CHAR_PUNCT }
66
+ end
67
+
55
68
  def process_buffer?
56
- true
69
+ !instance_variable_defined?(:@expected_tokens_in_buffer) ||
70
+ valid_tokens_in_buffer >= @expected_tokens_in_buffer
57
71
  end
58
72
 
59
73
  def process_buffer
60
74
  raise NotImplementedError
61
75
  end
62
76
 
63
- def deferred_insert(pos, obj)
64
- @inserts << BufferInsert.new(pos, obj)
77
+ def control_multi(cmd, dic = @dic)
78
+ report_on(cmd, dic)
79
+
80
+ if [STR_CMD_RECORD, STR_CMD_EOF].include?(cmd)
81
+ @eof_handling = true
82
+
83
+ while valid_tokens_in_buffer > 1
84
+ process_buffer
85
+ end
86
+
87
+ forward_number_of_token
88
+
89
+ @eof_handling = false
90
+ end
65
91
  end
66
92
 
67
93
  end
@@ -31,26 +31,26 @@ class Lingo
31
31
  module Cachable
32
32
 
33
33
  def init_cachable
34
- @cache = Hash.new(false)
34
+ @cachable_hash = Hash.new(false)
35
35
  end
36
36
 
37
37
  def hit?(key)
38
- @cache.has_key?(key)
38
+ @cachable_hash.has_key?(key)
39
39
  end
40
40
 
41
- def store(key, value)
42
- @cache[key] = cache_value(value)
43
- value
41
+ def store(key, val)
42
+ @cachable_hash[key] = cache_value(val)
43
+ val
44
44
  end
45
45
 
46
46
  def retrieve(key)
47
- cache_value(@cache[key])
47
+ cache_value(@cachable_hash[key])
48
48
  end
49
49
 
50
50
  private
51
51
 
52
- def cache_value(value)
53
- value.nil? ? nil : value.dup
52
+ def cache_value(val)
53
+ val.dup unless val.nil?
54
54
  end
55
55
 
56
56
  end
data/lib/lingo/config.rb CHANGED
@@ -41,13 +41,12 @@ class Lingo
41
41
  load_config('config')
42
42
 
43
43
  Array(self['meeting/attendees']).each { |a|
44
- r = a['text_reader'] || a['textreader'] or next
44
+ r = a['text_reader'] || a['textreader'] or next # DEPRECATE textreader
45
45
 
46
46
  f = @cli.files
47
47
 
48
48
  if i = r['files']
49
- r['files'] = i.strip == '$(files)' ?
50
- f : i.split(STRING_SEPARATOR_RE)
49
+ r['files'] = i.strip == '$(files)' ? f : i.split(SEP_RE)
51
50
  elsif !f.empty?
52
51
  r['files'] = f
53
52
  end
@@ -57,12 +56,12 @@ class Lingo
57
56
  end
58
57
 
59
58
  def [](key)
60
- key_to_nodes(key).inject(@opts) { |value, node| value[node] }
59
+ key_to_nodes(key).inject(@opts) { |hash, node| hash[node] }
61
60
  end
62
61
 
63
- def []=(key, value)
62
+ def []=(key, val)
64
63
  nodes = key_to_nodes(key); node = nodes.pop
65
- (self[nodes_to_key(nodes)] ||= {})[node] = value
64
+ (self[nodes_to_key(nodes)] ||= {})[node] = val
66
65
  end
67
66
 
68
67
  def stdin
data/lib/lingo/ctl.rb CHANGED
@@ -25,7 +25,6 @@
25
25
  #++
26
26
 
27
27
  require 'optparse'
28
- require 'fileutils'
29
28
 
30
29
  class Lingo
31
30
 
@@ -88,7 +87,7 @@ Usage: #{PROG} <command> [arguments] [options]
88
87
  #{PROG} [-h|--help] [--version]
89
88
  EOT
90
89
 
91
- def do
90
+ def ctl
92
91
  parse_options
93
92
  send("do_#{ALIASES[ARGV.shift]}")
94
93
  end
@@ -230,7 +229,7 @@ EOT
230
229
  end
231
230
 
232
231
  def self.ctl
233
- Ctl.do
232
+ Ctl.ctl
234
233
  rescue => err
235
234
  raise if $VERBOSE
236
235
  abort "#{err.backtrace.first}: #{err} (#{err.class})"
@@ -24,6 +24,8 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
+ require 'digest/sha1'
28
+
27
29
  class Lingo
28
30
 
29
31
  class Database
@@ -39,35 +41,16 @@ class Lingo
39
41
  end
40
42
 
41
43
  def encode(key, val)
42
- hex = ''
43
-
44
- crypt(key, val).each_byte { |byte|
45
- # To get a hex representation for a char we just utilize
46
- # the quotient and the remainder of division by base 16.
47
- q, r = byte.divmod(16)
48
- hex << HEX_CHARS[q] << HEX_CHARS[r]
49
- }
50
-
51
- [digest(key), hex]
44
+ [digest(key), crypt(key, val).each_byte.with_object('') { |b, s|
45
+ b.divmod(16).each { |i| s << HEX_CHARS[i] }
46
+ }]
52
47
  end
53
48
 
54
49
  def decode(key, val)
55
- str, q, first = '', 0, false
56
-
57
- val.each_byte { |byte|
58
- byte = byte.chr(ENC)
59
-
60
- # Our hex chars are 2 bytes wide, so we have to keep track
61
- # of whether it's the first or the second of the two.
62
- if first = !first
63
- q = HEX_CHARS.index(byte)
64
- else
65
- # Now we got both parts, so let's revert the divmod(16)
66
- str << q * 16 + HEX_CHARS.index(byte)
67
- end
68
- }
69
-
70
- crypt(key, str)
50
+ crypt(key, val.each_byte.each_slice(2).with_object('') { |b, s|
51
+ q, r = b.map { |i| HEX_CHARS.index(i.chr(ENC)) }
52
+ s << q * 16 + r
53
+ })
71
54
  end
72
55
 
73
56
  private
@@ -32,14 +32,12 @@ class Lingo
32
32
 
33
33
  module GDBMStore
34
34
 
35
- private
35
+ Database.register(self, 'db')
36
36
 
37
- def store_ext
38
- '.db'
39
- end
37
+ private
40
38
 
41
39
  def _open
42
- GDBM.open(@dbm_name)
40
+ GDBM.open(@stofile)
43
41
  end
44
42
 
45
43
  end
@@ -32,14 +32,12 @@ class Lingo
32
32
 
33
33
  module LibCDBStore
34
34
 
35
- private
35
+ Database.register(self, 'cdb')
36
36
 
37
- def store_ext
38
- '.cdb'
39
- end
37
+ private
40
38
 
41
39
  def create
42
- LibCDB::CDB.open(@dbm_name, 'w') { |db|
40
+ LibCDB::CDB.open(@stofile, 'w') { |db|
43
41
  @db = db
44
42
  yield
45
43
  }
@@ -48,7 +46,7 @@ class Lingo
48
46
  end
49
47
 
50
48
  def _open
51
- LibCDB::CDB.open(@dbm_name)
49
+ LibCDB::CDB.open(@stofile)
52
50
  end
53
51
 
54
52
  end
@@ -32,26 +32,31 @@ class Lingo
32
32
 
33
33
  module SDBMStore
34
34
 
35
+ Database.register(self, %w[dir pag], -1, false)
36
+
35
37
  private
36
38
 
37
39
  def uptodate?
38
- super(@dbm_name + '.pag')
40
+ super(@stofile + EXT.last)
39
41
  end
40
42
 
41
43
  def _clear
42
- File.delete(*Dir["#{@dbm_name}.{pag,dir}"])
44
+ File.delete(*Dir["#{@stofile}{#{EXT.join(',')}}"])
43
45
  end
44
46
 
45
47
  def _open
46
- SDBM.open(@dbm_name)
48
+ SDBM.open(@stofile)
49
+ end
50
+
51
+ def _get(key)
52
+ val = super
53
+ val && val.encode(ENC)
47
54
  end
48
55
 
49
56
  def _set(key, val)
50
57
  if val.length > 950
58
+ warn "Warning: Entry `#{key}' (#{@srcfile}) too long for SDBM. Truncating..."
51
59
  val = val[0, 950]
52
-
53
- @lingo.warn "Warning: Entry `#{key}' (#{@src_file})" <<
54
- 'too long for SDBM. Truncating...'
55
60
  end
56
61
 
57
62
  super
@@ -28,50 +28,10 @@ class Lingo
28
28
 
29
29
  class Database
30
30
 
31
- class ShowProgress
31
+ class ShowProgress < ShowProgress
32
32
 
33
- def initialize(src, max, act = true)
34
- @out, @act = src.instance_variable_get(:@lingo).config.stderr, act
35
-
36
- # To get the length of the formatted string we have
37
- # to actually substitute the placeholder.
38
- fmt = ' [%3d%%]'
39
- len = (fmt % 0).length
40
-
41
- # Now we know how far to "go back" to
42
- # overwrite the formatted string...
43
- back = "\b" * len
44
-
45
- @fmt = fmt + back
46
- @clr = ' ' * len + back
47
-
48
- print src.instance_variable_get(:@config)['name'], ': '
49
-
50
- @rat, @cnt, @next = max / 100.0, 0, 0
51
- print 'convert '
52
- step
53
-
54
- yield self
55
-
56
- print "#{@clr}ok\n"
57
- end
58
-
59
- def [](value)
60
- @cnt = value
61
- step if @cnt >= @next
62
- end
63
-
64
- private
65
-
66
- def step
67
- percent = @cnt / @rat
68
- @next = (percent + 1) * @rat
69
-
70
- print @fmt % percent
71
- end
72
-
73
- def print(*args)
74
- @out.print(*args) if @act
33
+ def initialize(obj, max, act = true)
34
+ super(obj, max, obj.instance_variable_get(:@config)['name'], act, 'convert')
75
35
  end
76
36
 
77
37
  end
@@ -39,18 +39,14 @@ class Lingo
39
39
 
40
40
  def initialize(id, lingo)
41
41
  super
42
-
43
- @separator = @config.fetch('separator', '*')
44
- @line_pattern = Regexp.new('^(' + @legal_word + ')' + Regexp.escape(@separator) + '(' + @legal_word + ')$')
42
+ @pat = /^(#{@wrd})#{Regexp.escape(@sep ||= '*')}(#{@wrd})$/
45
43
  end
46
44
 
47
45
  private
48
46
 
49
47
  def convert_line(line, key, val)
50
48
  key, val = key.strip, val.strip
51
- val = '' if key == val
52
- val = [val + '#' + @wordclass]
53
- [key, val]
49
+ [key, %W[#{val unless key == val}##{@def}]]
54
50
  end
55
51
 
56
52
  end
@@ -40,9 +40,7 @@ class Lingo
40
40
 
41
41
  def initialize(id, lingo)
42
42
  super
43
-
44
- @separator = @config.fetch('separator', ';')
45
- @line_pattern = Regexp.new('^' + @legal_word + '(?:' + Regexp.escape(@separator) + @legal_word + ')*$')
43
+ @pat = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/
46
44
  end
47
45
 
48
46
  def set(db, key, val)
@@ -52,8 +50,8 @@ class Lingo
52
50
  private
53
51
 
54
52
  def convert_line(line, key, val)
55
- values = line.split(@separator).map { |value| value.strip }
56
- [values[0], values[1..-1]]
53
+ values = line.split(@sep).each(&:strip!)
54
+ [values.shift, values]
57
55
  end
58
56
 
59
57
  end
@@ -38,11 +38,7 @@ class Lingo
38
38
 
39
39
  def initialize(id, lingo)
40
40
  super
41
-
42
- @separator = @config.fetch('separator', ';')
43
- @line_pattern = Regexp.new('^' + @legal_word + '(?:' + Regexp.escape(@separator) + @legal_word + ')*$')
44
-
45
- @idx = -1
41
+ @pat, @idx = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/, -1
46
42
  end
47
43
 
48
44
  def set(db, key, val)
@@ -53,7 +49,7 @@ class Lingo
53
49
  private
54
50
 
55
51
  def convert_line(line, key, val)
56
- [nil, line.split(@separator).map { |value| value.strip }]
52
+ [nil, line.split(@sep).each(&:strip!)]
57
53
  end
58
54
 
59
55
  end