lingo 1.8.1 → 1.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +23 -5
- data/README +1 -1
- data/Rakefile +5 -7
- data/TODO +2 -0
- data/bin/lingo +5 -1
- data/de.lang +1 -1
- data/en/lingo-syn.txt +0 -0
- data/en.lang +2 -1
- data/lib/lingo/attendee/abbreviator.rb +8 -9
- data/lib/lingo/attendee/debugger.rb +5 -4
- data/lib/lingo/attendee/decomposer.rb +8 -3
- data/lib/lingo/attendee/dehyphenizer.rb +19 -63
- data/lib/lingo/attendee/formatter.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +67 -155
- data/lib/lingo/attendee/noneword_filter.rb +16 -9
- data/lib/lingo/attendee/object_filter.rb +1 -1
- data/lib/lingo/attendee/sequencer.rb +32 -63
- data/lib/lingo/attendee/stemmer/porter.rb +343 -0
- data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
- data/lib/lingo/attendee/synonymer.rb +10 -9
- data/lib/lingo/attendee/text_reader.rb +102 -76
- data/lib/lingo/attendee/text_writer.rb +23 -26
- data/lib/lingo/attendee/tokenizer.rb +13 -27
- data/lib/lingo/attendee/variator.rb +26 -66
- data/lib/lingo/attendee/vector_filter.rb +42 -43
- data/lib/lingo/attendee/word_searcher.rb +6 -7
- data/lib/lingo/attendee.rb +25 -7
- data/lib/lingo/buffered_attendee.rb +36 -10
- data/lib/lingo/cachable.rb +8 -8
- data/lib/lingo/config.rb +5 -6
- data/lib/lingo/ctl.rb +2 -3
- data/lib/lingo/database/crypter.rb +9 -26
- data/lib/lingo/database/gdbm_store.rb +3 -5
- data/lib/lingo/database/libcdb_store.rb +4 -6
- data/lib/lingo/database/sdbm_store.rb +11 -6
- data/lib/lingo/database/show_progress.rb +3 -43
- data/lib/lingo/database/source/key_value.rb +2 -6
- data/lib/lingo/database/source/multi_key.rb +3 -5
- data/lib/lingo/database/source/multi_value.rb +2 -6
- data/lib/lingo/database/source/single_word.rb +4 -6
- data/lib/lingo/database/source/word_class.rb +4 -10
- data/lib/lingo/database/source.rb +20 -18
- data/lib/lingo/database.rb +84 -59
- data/lib/lingo/error.rb +57 -1
- data/lib/lingo/language/dictionary.rb +21 -18
- data/lib/lingo/language/grammar.rb +40 -49
- data/lib/lingo/language/lexical.rb +6 -6
- data/lib/lingo/language/lexical_hash.rb +6 -0
- data/lib/lingo/language/word.rb +32 -15
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/language.rb +14 -25
- data/lib/lingo/reportable.rb +12 -10
- data/lib/lingo/show_progress.rb +81 -0
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo.rb +63 -24
- data/lingo-call.cfg +6 -10
- data/lingo.cfg +60 -44
- data/lir.cfg +42 -41
- data/test/attendee/ts_abbreviator.rb +3 -5
- data/test/attendee/ts_decomposer.rb +3 -5
- data/test/attendee/ts_multi_worder.rb +87 -145
- data/test/attendee/ts_noneword_filter.rb +5 -3
- data/test/attendee/ts_object_filter.rb +5 -3
- data/test/attendee/ts_sequencer.rb +3 -5
- data/test/attendee/ts_stemmer.rb +309 -0
- data/test/attendee/ts_synonymer.rb +15 -11
- data/test/attendee/ts_text_reader.rb +12 -15
- data/test/attendee/ts_text_writer.rb +24 -29
- data/test/attendee/ts_tokenizer.rb +9 -7
- data/test/attendee/ts_variator.rb +4 -4
- data/test/attendee/ts_vector_filter.rb +24 -16
- data/test/attendee/ts_word_searcher.rb +20 -36
- data/test/{lir.csv → lir.vec} +0 -0
- data/test/ref/artikel.vec +943 -943
- data/test/ref/artikel.ven +943 -943
- data/test/ref/lir.non +201 -201
- data/test/ref/lir.seq +178 -178
- data/test/ref/lir.syn +49 -49
- data/test/ref/lir.vec +329 -0
- data/test/test_helper.rb +20 -36
- data/test/ts_database.rb +10 -10
- data/test/ts_language.rb +279 -319
- metadata +93 -104
- data/info/Objekte.png +0 -0
- data/info/Typen.png +0 -0
- data/info/database.png +0 -0
- data/info/db_small.png +0 -0
- data/info/download.png +0 -0
- data/info/kerze.png +0 -0
- data/info/language.png +0 -0
- data/info/lingo.png +0 -0
- data/info/logo.png +0 -0
- data/info/meeting.png +0 -0
- data/info/types.png +0 -0
- data/lingo-all.cfg +0 -89
- data/porter/stem.cfg +0 -311
- data/porter/stem.rb +0 -150
- data/test/ref/lir.csv +0 -329
- data/test.cfg +0 -79
@@ -82,74 +82,73 @@ class Lingo
|
|
82
82
|
protected
|
83
83
|
|
84
84
|
def init
|
85
|
-
@lexis = Regexp.new(get_key('lexicals', '[sy]').downcase)
|
86
|
-
@sort = get_key('sort', 'normal').downcase
|
87
|
-
@skip = get_array('skip', TA_PUNCTUATION+','+TA_OTHER).collect {|s| s.upcase }
|
88
|
-
@vectors = Array.new
|
89
|
-
@word_count = 0
|
90
|
-
|
91
85
|
if @debug = get_key('debug', false)
|
92
86
|
@prompt = get_key('prompt', 'lex:) ')
|
87
|
+
else
|
88
|
+
@lex = Regexp.new(get_key('lexicals', '[sy]').downcase)
|
89
|
+
@skip = get_array('skip', DEFAULT_SKIP, :upcase)
|
90
|
+
|
91
|
+
if sort = get_key('sort', 'normal')
|
92
|
+
@sort_format, @sort_method = sort.downcase.split('_', 2)
|
93
|
+
end
|
93
94
|
end
|
95
|
+
|
96
|
+
@vectors, @word_count = [], 0.0
|
94
97
|
end
|
95
98
|
|
96
|
-
def control(cmd,
|
99
|
+
def control(cmd, param)
|
97
100
|
case cmd
|
98
101
|
when STR_CMD_EOL
|
99
102
|
skip_command
|
100
103
|
when STR_CMD_FILE, STR_CMD_RECORD, STR_CMD_EOF
|
101
|
-
|
102
|
-
@vectors.clear
|
104
|
+
send_vectors unless @vectors.empty?
|
103
105
|
end
|
104
106
|
end
|
105
107
|
|
106
108
|
def process(obj)
|
107
109
|
if @debug
|
108
|
-
|
109
|
-
elsif obj.is_a?(Word)
|
110
|
-
@word_count += 1
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
110
|
+
forward("#{@prompt} #{obj.inspect}") if eval(@debug)
|
111
|
+
elsif obj.is_a?(Word) && !@skip.include?(obj.attr)
|
112
|
+
@word_count += 1
|
113
|
+
|
114
|
+
cnt = obj.get_class(@lex).each { |lex|
|
115
|
+
vec = lex.form.downcase
|
116
|
+
@sort_format ? @vectors << vec : forward(vec)
|
117
|
+
}.size
|
118
|
+
|
119
|
+
add('Anzahl von Vektor-Wörtern', cnt)
|
116
120
|
end
|
117
121
|
end
|
118
122
|
|
119
123
|
private
|
120
124
|
|
121
|
-
def
|
122
|
-
return if @vectors.size==0
|
123
|
-
|
125
|
+
def send_vectors
|
124
126
|
add('Objekte gefiltert', @vectors.size)
|
125
127
|
|
126
|
-
|
127
|
-
|
128
|
-
@vectors
|
128
|
+
if @sort_format == 'normal'
|
129
|
+
@vectors.sort!
|
130
|
+
@vectors.uniq!
|
131
|
+
|
132
|
+
@vectors.each(&method(:forward)).clear
|
129
133
|
else
|
130
|
-
cnt = Hash.new(0)
|
131
|
-
|
132
|
-
@vectors
|
133
|
-
|
134
|
-
x[0]<=>y[0]
|
135
|
-
else
|
136
|
-
y[1]<=>x[1]
|
137
|
-
end
|
138
|
-
}
|
139
|
-
end
|
134
|
+
cnt, fmt = Hash.new(0), '%d'
|
135
|
+
|
136
|
+
@vectors.each { |v| cnt[v] += 1 }.clear
|
137
|
+
vec = cnt.sort_by { |v, c| [-c, v] }
|
140
138
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
when 'term_abs' then sprintf "%d %s", vec[1], vec[0]
|
145
|
-
when 'term_rel' then sprintf "%6.5f %s", vec[1].to_f/@word_count, vec[0]
|
146
|
-
when 'sto_abs' then sprintf "%s {%d}", vec[0], vec[1]
|
147
|
-
when 'sto_rel' then sprintf "%s {%6.5f}", vec[0], vec[1].to_f/@word_count
|
148
|
-
else sprintf "%s", vec
|
139
|
+
if @sort_method == 'rel'
|
140
|
+
vec.each { |v| v[1] /= @word_count }
|
141
|
+
fmt = '%6.5f'
|
149
142
|
end
|
150
|
-
}.each(&method(:forward))
|
151
143
|
|
152
|
-
|
144
|
+
if @sort_format == 'sto'
|
145
|
+
fmt, @word_count = "%s {#{fmt}}", 0.0
|
146
|
+
else
|
147
|
+
fmt.insert(1, '2$') << ' %1$s'
|
148
|
+
end
|
149
|
+
|
150
|
+
vec.each { |v| forward(fmt % v) }
|
151
|
+
end
|
153
152
|
end
|
154
153
|
|
155
154
|
end
|
@@ -71,19 +71,18 @@ class Lingo
|
|
71
71
|
set_dic
|
72
72
|
end
|
73
73
|
|
74
|
-
def control(cmd,
|
75
|
-
@dic
|
76
|
-
set(key, value)
|
77
|
-
} if cmd == STR_CMD_STATUS
|
74
|
+
def control(cmd, param)
|
75
|
+
report_on(cmd, @dic)
|
78
76
|
end
|
79
77
|
|
80
78
|
def process(obj)
|
81
79
|
if obj.is_a?(Token) && obj.attr == TA_WORD
|
82
80
|
inc('Anzahl gesuchter Wörter')
|
83
|
-
|
84
|
-
|
85
|
-
|
81
|
+
|
82
|
+
obj = @dic.find_word(obj.form)
|
83
|
+
inc('Anzahl gefundener Wörter') unless obj.unknown?
|
86
84
|
end
|
85
|
+
|
87
86
|
forward(obj)
|
88
87
|
end
|
89
88
|
|
data/lib/lingo/attendee.rb
CHANGED
@@ -24,6 +24,8 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'nuggets/string/evaluate'
|
28
|
+
|
27
29
|
class Lingo
|
28
30
|
|
29
31
|
# Lingo ist als universelles Indexierungssystem entworfen worden. Seine Stärke liegt in der einfachen Konfigurierbarkeit für
|
@@ -80,6 +82,8 @@ class Lingo
|
|
80
82
|
STA_TIM_COMMANDS = 'Time to control '
|
81
83
|
STA_TIM_OBJECTS = 'Time to process '
|
82
84
|
|
85
|
+
DEFAULT_SKIP = [TA_PUNCTUATION, TA_OTHER].join(',')
|
86
|
+
|
83
87
|
def initialize(config, lingo)
|
84
88
|
@lingo = lingo
|
85
89
|
|
@@ -129,6 +133,15 @@ class Lingo
|
|
129
133
|
|
130
134
|
private
|
131
135
|
|
136
|
+
def find_word(f, d = @dic, g = @gra)
|
137
|
+
w = d.find_word(f)
|
138
|
+
g && (block_given? ? !yield(w) : w.unknown?) ? g.find_compound(f) : w
|
139
|
+
end
|
140
|
+
|
141
|
+
def report_on(cmd, *rep)
|
142
|
+
rep.each { |r| r.report.each { |q| set(*q) } } if cmd == STR_CMD_STATUS
|
143
|
+
end
|
144
|
+
|
132
145
|
def sta_for(key)
|
133
146
|
%w[NUM TIM].map { |i| self.class.const_get("STA_#{i}_#{key.upcase}") }
|
134
147
|
end
|
@@ -139,9 +152,9 @@ class Lingo
|
|
139
152
|
|
140
153
|
return yield unless @lingo.report_time
|
141
154
|
|
142
|
-
@timer = Time.
|
155
|
+
@timer = Time.now.to_i
|
143
156
|
res = yield
|
144
|
-
add(t, Time.
|
157
|
+
add(t, Time.now.to_i - @timer)
|
145
158
|
res
|
146
159
|
end
|
147
160
|
|
@@ -184,7 +197,7 @@ class Lingo
|
|
184
197
|
})
|
185
198
|
}
|
186
199
|
|
187
|
-
|
200
|
+
warn msg % arg
|
188
201
|
end
|
189
202
|
|
190
203
|
def report_status
|
@@ -192,8 +205,8 @@ class Lingo
|
|
192
205
|
|
193
206
|
msg = "Attendee <%s> was connected from '%s' to '%s' reporting..."
|
194
207
|
|
195
|
-
|
196
|
-
|
208
|
+
warn msg % @config.values_at(*%w[name in out]), nil,
|
209
|
+
report.sort.map! { |k, v| " #{k} = #{v}" }, nil
|
197
210
|
end
|
198
211
|
|
199
212
|
def skip_command
|
@@ -217,8 +230,8 @@ class Lingo
|
|
217
230
|
@config.fetch(key, default)
|
218
231
|
end
|
219
232
|
|
220
|
-
def get_array(key, default = nil)
|
221
|
-
get_key(key, default).split(
|
233
|
+
def get_array(key, default = nil, m = nil)
|
234
|
+
get_key(key, default).split(SEP_RE).tap { |ary| ary.map!(&m) if m }
|
222
235
|
end
|
223
236
|
|
224
237
|
def dictionary(src, mod)
|
@@ -237,6 +250,10 @@ class Lingo
|
|
237
250
|
@gra = grammar(get_array('source'), get_key('mode', 'all'))
|
238
251
|
end
|
239
252
|
|
253
|
+
def warn(*msg)
|
254
|
+
@lingo.warn(*msg)
|
255
|
+
end
|
256
|
+
|
240
257
|
end
|
241
258
|
|
242
259
|
end
|
@@ -252,6 +269,7 @@ require_relative 'attendee/noneword_filter'
|
|
252
269
|
require_relative 'attendee/object_filter'
|
253
270
|
require_relative 'attendee/variator'
|
254
271
|
require_relative 'attendee/sequencer'
|
272
|
+
require_relative 'attendee/stemmer'
|
255
273
|
require_relative 'attendee/synonymer'
|
256
274
|
require_relative 'attendee/text_reader'
|
257
275
|
require_relative 'attendee/text_writer'
|
@@ -28,8 +28,6 @@ class Lingo
|
|
28
28
|
|
29
29
|
class BufferedAttendee < Attendee
|
30
30
|
|
31
|
-
BufferInsert = Struct.new(:position, :object)
|
32
|
-
|
33
31
|
def initialize(config, lingo)
|
34
32
|
@buffer, @inserts = [], []
|
35
33
|
super
|
@@ -38,30 +36,58 @@ class Lingo
|
|
38
36
|
protected
|
39
37
|
|
40
38
|
def process(obj)
|
41
|
-
@buffer
|
39
|
+
@buffer << obj
|
42
40
|
process_buffer if process_buffer?
|
43
41
|
end
|
44
42
|
|
45
43
|
private
|
46
44
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
|
45
|
+
def form_at(index, klass = WordForm)
|
46
|
+
obj = @buffer[index]
|
47
|
+
obj.form if obj.is_a?(klass)
|
48
|
+
end
|
51
49
|
|
50
|
+
def forward_buffer
|
51
|
+
@inserts.sort_by!(&:first).each { |i| @buffer.insert(*i) }.clear
|
52
52
|
@buffer.each(&method(:forward)).clear
|
53
53
|
end
|
54
54
|
|
55
|
+
def forward_number_of_token(len = default = @buffer.size, punct = !default)
|
56
|
+
begin
|
57
|
+
unless @buffer.empty?
|
58
|
+
forward(item = @buffer.delete_at(0))
|
59
|
+
len -= 1 unless punct && item.form == CHAR_PUNCT
|
60
|
+
end
|
61
|
+
end while len > 0
|
62
|
+
end
|
63
|
+
|
64
|
+
def valid_tokens_in_buffer
|
65
|
+
@buffer.count { |item| item.form != CHAR_PUNCT }
|
66
|
+
end
|
67
|
+
|
55
68
|
def process_buffer?
|
56
|
-
|
69
|
+
!instance_variable_defined?(:@expected_tokens_in_buffer) ||
|
70
|
+
valid_tokens_in_buffer >= @expected_tokens_in_buffer
|
57
71
|
end
|
58
72
|
|
59
73
|
def process_buffer
|
60
74
|
raise NotImplementedError
|
61
75
|
end
|
62
76
|
|
63
|
-
def
|
64
|
-
|
77
|
+
def control_multi(cmd, dic = @dic)
|
78
|
+
report_on(cmd, dic)
|
79
|
+
|
80
|
+
if [STR_CMD_RECORD, STR_CMD_EOF].include?(cmd)
|
81
|
+
@eof_handling = true
|
82
|
+
|
83
|
+
while valid_tokens_in_buffer > 1
|
84
|
+
process_buffer
|
85
|
+
end
|
86
|
+
|
87
|
+
forward_number_of_token
|
88
|
+
|
89
|
+
@eof_handling = false
|
90
|
+
end
|
65
91
|
end
|
66
92
|
|
67
93
|
end
|
data/lib/lingo/cachable.rb
CHANGED
@@ -31,26 +31,26 @@ class Lingo
|
|
31
31
|
module Cachable
|
32
32
|
|
33
33
|
def init_cachable
|
34
|
-
@
|
34
|
+
@cachable_hash = Hash.new(false)
|
35
35
|
end
|
36
36
|
|
37
37
|
def hit?(key)
|
38
|
-
@
|
38
|
+
@cachable_hash.has_key?(key)
|
39
39
|
end
|
40
40
|
|
41
|
-
def store(key,
|
42
|
-
@
|
43
|
-
|
41
|
+
def store(key, val)
|
42
|
+
@cachable_hash[key] = cache_value(val)
|
43
|
+
val
|
44
44
|
end
|
45
45
|
|
46
46
|
def retrieve(key)
|
47
|
-
cache_value(@
|
47
|
+
cache_value(@cachable_hash[key])
|
48
48
|
end
|
49
49
|
|
50
50
|
private
|
51
51
|
|
52
|
-
def cache_value(
|
53
|
-
|
52
|
+
def cache_value(val)
|
53
|
+
val.dup unless val.nil?
|
54
54
|
end
|
55
55
|
|
56
56
|
end
|
data/lib/lingo/config.rb
CHANGED
@@ -41,13 +41,12 @@ class Lingo
|
|
41
41
|
load_config('config')
|
42
42
|
|
43
43
|
Array(self['meeting/attendees']).each { |a|
|
44
|
-
r = a['text_reader'] || a['textreader'] or next
|
44
|
+
r = a['text_reader'] || a['textreader'] or next # DEPRECATE textreader
|
45
45
|
|
46
46
|
f = @cli.files
|
47
47
|
|
48
48
|
if i = r['files']
|
49
|
-
r['files'] = i.strip == '$(files)' ?
|
50
|
-
f : i.split(STRING_SEPARATOR_RE)
|
49
|
+
r['files'] = i.strip == '$(files)' ? f : i.split(SEP_RE)
|
51
50
|
elsif !f.empty?
|
52
51
|
r['files'] = f
|
53
52
|
end
|
@@ -57,12 +56,12 @@ class Lingo
|
|
57
56
|
end
|
58
57
|
|
59
58
|
def [](key)
|
60
|
-
key_to_nodes(key).inject(@opts) { |
|
59
|
+
key_to_nodes(key).inject(@opts) { |hash, node| hash[node] }
|
61
60
|
end
|
62
61
|
|
63
|
-
def []=(key,
|
62
|
+
def []=(key, val)
|
64
63
|
nodes = key_to_nodes(key); node = nodes.pop
|
65
|
-
(self[nodes_to_key(nodes)] ||= {})[node] =
|
64
|
+
(self[nodes_to_key(nodes)] ||= {})[node] = val
|
66
65
|
end
|
67
66
|
|
68
67
|
def stdin
|
data/lib/lingo/ctl.rb
CHANGED
@@ -25,7 +25,6 @@
|
|
25
25
|
#++
|
26
26
|
|
27
27
|
require 'optparse'
|
28
|
-
require 'fileutils'
|
29
28
|
|
30
29
|
class Lingo
|
31
30
|
|
@@ -88,7 +87,7 @@ Usage: #{PROG} <command> [arguments] [options]
|
|
88
87
|
#{PROG} [-h|--help] [--version]
|
89
88
|
EOT
|
90
89
|
|
91
|
-
def
|
90
|
+
def ctl
|
92
91
|
parse_options
|
93
92
|
send("do_#{ALIASES[ARGV.shift]}")
|
94
93
|
end
|
@@ -230,7 +229,7 @@ EOT
|
|
230
229
|
end
|
231
230
|
|
232
231
|
def self.ctl
|
233
|
-
Ctl.
|
232
|
+
Ctl.ctl
|
234
233
|
rescue => err
|
235
234
|
raise if $VERBOSE
|
236
235
|
abort "#{err.backtrace.first}: #{err} (#{err.class})"
|
@@ -24,6 +24,8 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'digest/sha1'
|
28
|
+
|
27
29
|
class Lingo
|
28
30
|
|
29
31
|
class Database
|
@@ -39,35 +41,16 @@ class Lingo
|
|
39
41
|
end
|
40
42
|
|
41
43
|
def encode(key, val)
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
# To get a hex representation for a char we just utilize
|
46
|
-
# the quotient and the remainder of division by base 16.
|
47
|
-
q, r = byte.divmod(16)
|
48
|
-
hex << HEX_CHARS[q] << HEX_CHARS[r]
|
49
|
-
}
|
50
|
-
|
51
|
-
[digest(key), hex]
|
44
|
+
[digest(key), crypt(key, val).each_byte.with_object('') { |b, s|
|
45
|
+
b.divmod(16).each { |i| s << HEX_CHARS[i] }
|
46
|
+
}]
|
52
47
|
end
|
53
48
|
|
54
49
|
def decode(key, val)
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# Our hex chars are 2 bytes wide, so we have to keep track
|
61
|
-
# of whether it's the first or the second of the two.
|
62
|
-
if first = !first
|
63
|
-
q = HEX_CHARS.index(byte)
|
64
|
-
else
|
65
|
-
# Now we got both parts, so let's revert the divmod(16)
|
66
|
-
str << q * 16 + HEX_CHARS.index(byte)
|
67
|
-
end
|
68
|
-
}
|
69
|
-
|
70
|
-
crypt(key, str)
|
50
|
+
crypt(key, val.each_byte.each_slice(2).with_object('') { |b, s|
|
51
|
+
q, r = b.map { |i| HEX_CHARS.index(i.chr(ENC)) }
|
52
|
+
s << q * 16 + r
|
53
|
+
})
|
71
54
|
end
|
72
55
|
|
73
56
|
private
|
@@ -32,14 +32,12 @@ class Lingo
|
|
32
32
|
|
33
33
|
module LibCDBStore
|
34
34
|
|
35
|
-
|
35
|
+
Database.register(self, 'cdb')
|
36
36
|
|
37
|
-
|
38
|
-
'.cdb'
|
39
|
-
end
|
37
|
+
private
|
40
38
|
|
41
39
|
def create
|
42
|
-
LibCDB::CDB.open(@
|
40
|
+
LibCDB::CDB.open(@stofile, 'w') { |db|
|
43
41
|
@db = db
|
44
42
|
yield
|
45
43
|
}
|
@@ -48,7 +46,7 @@ class Lingo
|
|
48
46
|
end
|
49
47
|
|
50
48
|
def _open
|
51
|
-
LibCDB::CDB.open(@
|
49
|
+
LibCDB::CDB.open(@stofile)
|
52
50
|
end
|
53
51
|
|
54
52
|
end
|
@@ -32,26 +32,31 @@ class Lingo
|
|
32
32
|
|
33
33
|
module SDBMStore
|
34
34
|
|
35
|
+
Database.register(self, %w[dir pag], -1, false)
|
36
|
+
|
35
37
|
private
|
36
38
|
|
37
39
|
def uptodate?
|
38
|
-
super(@
|
40
|
+
super(@stofile + EXT.last)
|
39
41
|
end
|
40
42
|
|
41
43
|
def _clear
|
42
|
-
File.delete(*Dir["#{@
|
44
|
+
File.delete(*Dir["#{@stofile}{#{EXT.join(',')}}"])
|
43
45
|
end
|
44
46
|
|
45
47
|
def _open
|
46
|
-
SDBM.open(@
|
48
|
+
SDBM.open(@stofile)
|
49
|
+
end
|
50
|
+
|
51
|
+
def _get(key)
|
52
|
+
val = super
|
53
|
+
val && val.encode(ENC)
|
47
54
|
end
|
48
55
|
|
49
56
|
def _set(key, val)
|
50
57
|
if val.length > 950
|
58
|
+
warn "Warning: Entry `#{key}' (#{@srcfile}) too long for SDBM. Truncating..."
|
51
59
|
val = val[0, 950]
|
52
|
-
|
53
|
-
@lingo.warn "Warning: Entry `#{key}' (#{@src_file})" <<
|
54
|
-
'too long for SDBM. Truncating...'
|
55
60
|
end
|
56
61
|
|
57
62
|
super
|
@@ -28,50 +28,10 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Database
|
30
30
|
|
31
|
-
class ShowProgress
|
31
|
+
class ShowProgress < ShowProgress
|
32
32
|
|
33
|
-
def initialize(
|
34
|
-
|
35
|
-
|
36
|
-
# To get the length of the formatted string we have
|
37
|
-
# to actually substitute the placeholder.
|
38
|
-
fmt = ' [%3d%%]'
|
39
|
-
len = (fmt % 0).length
|
40
|
-
|
41
|
-
# Now we know how far to "go back" to
|
42
|
-
# overwrite the formatted string...
|
43
|
-
back = "\b" * len
|
44
|
-
|
45
|
-
@fmt = fmt + back
|
46
|
-
@clr = ' ' * len + back
|
47
|
-
|
48
|
-
print src.instance_variable_get(:@config)['name'], ': '
|
49
|
-
|
50
|
-
@rat, @cnt, @next = max / 100.0, 0, 0
|
51
|
-
print 'convert '
|
52
|
-
step
|
53
|
-
|
54
|
-
yield self
|
55
|
-
|
56
|
-
print "#{@clr}ok\n"
|
57
|
-
end
|
58
|
-
|
59
|
-
def [](value)
|
60
|
-
@cnt = value
|
61
|
-
step if @cnt >= @next
|
62
|
-
end
|
63
|
-
|
64
|
-
private
|
65
|
-
|
66
|
-
def step
|
67
|
-
percent = @cnt / @rat
|
68
|
-
@next = (percent + 1) * @rat
|
69
|
-
|
70
|
-
print @fmt % percent
|
71
|
-
end
|
72
|
-
|
73
|
-
def print(*args)
|
74
|
-
@out.print(*args) if @act
|
33
|
+
def initialize(obj, max, act = true)
|
34
|
+
super(obj, max, obj.instance_variable_get(:@config)['name'], act, 'convert')
|
75
35
|
end
|
76
36
|
|
77
37
|
end
|
@@ -39,18 +39,14 @@ class Lingo
|
|
39
39
|
|
40
40
|
def initialize(id, lingo)
|
41
41
|
super
|
42
|
-
|
43
|
-
@separator = @config.fetch('separator', '*')
|
44
|
-
@line_pattern = Regexp.new('^(' + @legal_word + ')' + Regexp.escape(@separator) + '(' + @legal_word + ')$')
|
42
|
+
@pat = /^(#{@wrd})#{Regexp.escape(@sep ||= '*')}(#{@wrd})$/
|
45
43
|
end
|
46
44
|
|
47
45
|
private
|
48
46
|
|
49
47
|
def convert_line(line, key, val)
|
50
48
|
key, val = key.strip, val.strip
|
51
|
-
val
|
52
|
-
val = [val + '#' + @wordclass]
|
53
|
-
[key, val]
|
49
|
+
[key, %W[#{val unless key == val}##{@def}]]
|
54
50
|
end
|
55
51
|
|
56
52
|
end
|
@@ -40,9 +40,7 @@ class Lingo
|
|
40
40
|
|
41
41
|
def initialize(id, lingo)
|
42
42
|
super
|
43
|
-
|
44
|
-
@separator = @config.fetch('separator', ';')
|
45
|
-
@line_pattern = Regexp.new('^' + @legal_word + '(?:' + Regexp.escape(@separator) + @legal_word + ')*$')
|
43
|
+
@pat = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/
|
46
44
|
end
|
47
45
|
|
48
46
|
def set(db, key, val)
|
@@ -52,8 +50,8 @@ class Lingo
|
|
52
50
|
private
|
53
51
|
|
54
52
|
def convert_line(line, key, val)
|
55
|
-
values = line.split(@
|
56
|
-
[values
|
53
|
+
values = line.split(@sep).each(&:strip!)
|
54
|
+
[values.shift, values]
|
57
55
|
end
|
58
56
|
|
59
57
|
end
|
@@ -38,11 +38,7 @@ class Lingo
|
|
38
38
|
|
39
39
|
def initialize(id, lingo)
|
40
40
|
super
|
41
|
-
|
42
|
-
@separator = @config.fetch('separator', ';')
|
43
|
-
@line_pattern = Regexp.new('^' + @legal_word + '(?:' + Regexp.escape(@separator) + @legal_word + ')*$')
|
44
|
-
|
45
|
-
@idx = -1
|
41
|
+
@pat, @idx = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/, -1
|
46
42
|
end
|
47
43
|
|
48
44
|
def set(db, key, val)
|
@@ -53,7 +49,7 @@ class Lingo
|
|
53
49
|
private
|
54
50
|
|
55
51
|
def convert_line(line, key, val)
|
56
|
-
[nil, line.split(@
|
52
|
+
[nil, line.split(@sep).each(&:strip!)]
|
57
53
|
end
|
58
54
|
|
59
55
|
end
|