lingo 1.8.1 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +23 -5
- data/README +1 -1
- data/Rakefile +5 -7
- data/TODO +2 -0
- data/bin/lingo +5 -1
- data/de.lang +1 -1
- data/en/lingo-syn.txt +0 -0
- data/en.lang +2 -1
- data/lib/lingo/attendee/abbreviator.rb +8 -9
- data/lib/lingo/attendee/debugger.rb +5 -4
- data/lib/lingo/attendee/decomposer.rb +8 -3
- data/lib/lingo/attendee/dehyphenizer.rb +19 -63
- data/lib/lingo/attendee/formatter.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +67 -155
- data/lib/lingo/attendee/noneword_filter.rb +16 -9
- data/lib/lingo/attendee/object_filter.rb +1 -1
- data/lib/lingo/attendee/sequencer.rb +32 -63
- data/lib/lingo/attendee/stemmer/porter.rb +343 -0
- data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
- data/lib/lingo/attendee/synonymer.rb +10 -9
- data/lib/lingo/attendee/text_reader.rb +102 -76
- data/lib/lingo/attendee/text_writer.rb +23 -26
- data/lib/lingo/attendee/tokenizer.rb +13 -27
- data/lib/lingo/attendee/variator.rb +26 -66
- data/lib/lingo/attendee/vector_filter.rb +42 -43
- data/lib/lingo/attendee/word_searcher.rb +6 -7
- data/lib/lingo/attendee.rb +25 -7
- data/lib/lingo/buffered_attendee.rb +36 -10
- data/lib/lingo/cachable.rb +8 -8
- data/lib/lingo/config.rb +5 -6
- data/lib/lingo/ctl.rb +2 -3
- data/lib/lingo/database/crypter.rb +9 -26
- data/lib/lingo/database/gdbm_store.rb +3 -5
- data/lib/lingo/database/libcdb_store.rb +4 -6
- data/lib/lingo/database/sdbm_store.rb +11 -6
- data/lib/lingo/database/show_progress.rb +3 -43
- data/lib/lingo/database/source/key_value.rb +2 -6
- data/lib/lingo/database/source/multi_key.rb +3 -5
- data/lib/lingo/database/source/multi_value.rb +2 -6
- data/lib/lingo/database/source/single_word.rb +4 -6
- data/lib/lingo/database/source/word_class.rb +4 -10
- data/lib/lingo/database/source.rb +20 -18
- data/lib/lingo/database.rb +84 -59
- data/lib/lingo/error.rb +57 -1
- data/lib/lingo/language/dictionary.rb +21 -18
- data/lib/lingo/language/grammar.rb +40 -49
- data/lib/lingo/language/lexical.rb +6 -6
- data/lib/lingo/language/lexical_hash.rb +6 -0
- data/lib/lingo/language/word.rb +32 -15
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/language.rb +14 -25
- data/lib/lingo/reportable.rb +12 -10
- data/lib/lingo/show_progress.rb +81 -0
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo.rb +63 -24
- data/lingo-call.cfg +6 -10
- data/lingo.cfg +60 -44
- data/lir.cfg +42 -41
- data/test/attendee/ts_abbreviator.rb +3 -5
- data/test/attendee/ts_decomposer.rb +3 -5
- data/test/attendee/ts_multi_worder.rb +87 -145
- data/test/attendee/ts_noneword_filter.rb +5 -3
- data/test/attendee/ts_object_filter.rb +5 -3
- data/test/attendee/ts_sequencer.rb +3 -5
- data/test/attendee/ts_stemmer.rb +309 -0
- data/test/attendee/ts_synonymer.rb +15 -11
- data/test/attendee/ts_text_reader.rb +12 -15
- data/test/attendee/ts_text_writer.rb +24 -29
- data/test/attendee/ts_tokenizer.rb +9 -7
- data/test/attendee/ts_variator.rb +4 -4
- data/test/attendee/ts_vector_filter.rb +24 -16
- data/test/attendee/ts_word_searcher.rb +20 -36
- data/test/{lir.csv → lir.vec} +0 -0
- data/test/ref/artikel.vec +943 -943
- data/test/ref/artikel.ven +943 -943
- data/test/ref/lir.non +201 -201
- data/test/ref/lir.seq +178 -178
- data/test/ref/lir.syn +49 -49
- data/test/ref/lir.vec +329 -0
- data/test/test_helper.rb +20 -36
- data/test/ts_database.rb +10 -10
- data/test/ts_language.rb +279 -319
- metadata +93 -104
- data/info/Objekte.png +0 -0
- data/info/Typen.png +0 -0
- data/info/database.png +0 -0
- data/info/db_small.png +0 -0
- data/info/download.png +0 -0
- data/info/kerze.png +0 -0
- data/info/language.png +0 -0
- data/info/lingo.png +0 -0
- data/info/logo.png +0 -0
- data/info/meeting.png +0 -0
- data/info/types.png +0 -0
- data/lingo-all.cfg +0 -89
- data/porter/stem.cfg +0 -311
- data/porter/stem.rb +0 -150
- data/test/ref/lir.csv +0 -329
- data/test.cfg +0 -79
@@ -82,74 +82,73 @@ class Lingo
|
|
82
82
|
protected
|
83
83
|
|
84
84
|
def init
|
85
|
-
@lexis = Regexp.new(get_key('lexicals', '[sy]').downcase)
|
86
|
-
@sort = get_key('sort', 'normal').downcase
|
87
|
-
@skip = get_array('skip', TA_PUNCTUATION+','+TA_OTHER).collect {|s| s.upcase }
|
88
|
-
@vectors = Array.new
|
89
|
-
@word_count = 0
|
90
|
-
|
91
85
|
if @debug = get_key('debug', false)
|
92
86
|
@prompt = get_key('prompt', 'lex:) ')
|
87
|
+
else
|
88
|
+
@lex = Regexp.new(get_key('lexicals', '[sy]').downcase)
|
89
|
+
@skip = get_array('skip', DEFAULT_SKIP, :upcase)
|
90
|
+
|
91
|
+
if sort = get_key('sort', 'normal')
|
92
|
+
@sort_format, @sort_method = sort.downcase.split('_', 2)
|
93
|
+
end
|
93
94
|
end
|
95
|
+
|
96
|
+
@vectors, @word_count = [], 0.0
|
94
97
|
end
|
95
98
|
|
96
|
-
def control(cmd,
|
99
|
+
def control(cmd, param)
|
97
100
|
case cmd
|
98
101
|
when STR_CMD_EOL
|
99
102
|
skip_command
|
100
103
|
when STR_CMD_FILE, STR_CMD_RECORD, STR_CMD_EOF
|
101
|
-
|
102
|
-
@vectors.clear
|
104
|
+
send_vectors unless @vectors.empty?
|
103
105
|
end
|
104
106
|
end
|
105
107
|
|
106
108
|
def process(obj)
|
107
109
|
if @debug
|
108
|
-
|
109
|
-
elsif obj.is_a?(Word)
|
110
|
-
@word_count += 1
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
110
|
+
forward("#{@prompt} #{obj.inspect}") if eval(@debug)
|
111
|
+
elsif obj.is_a?(Word) && !@skip.include?(obj.attr)
|
112
|
+
@word_count += 1
|
113
|
+
|
114
|
+
cnt = obj.get_class(@lex).each { |lex|
|
115
|
+
vec = lex.form.downcase
|
116
|
+
@sort_format ? @vectors << vec : forward(vec)
|
117
|
+
}.size
|
118
|
+
|
119
|
+
add('Anzahl von Vektor-Wörtern', cnt)
|
116
120
|
end
|
117
121
|
end
|
118
122
|
|
119
123
|
private
|
120
124
|
|
121
|
-
def
|
122
|
-
return if @vectors.size==0
|
123
|
-
|
125
|
+
def send_vectors
|
124
126
|
add('Objekte gefiltert', @vectors.size)
|
125
127
|
|
126
|
-
|
127
|
-
|
128
|
-
@vectors
|
128
|
+
if @sort_format == 'normal'
|
129
|
+
@vectors.sort!
|
130
|
+
@vectors.uniq!
|
131
|
+
|
132
|
+
@vectors.each(&method(:forward)).clear
|
129
133
|
else
|
130
|
-
cnt = Hash.new(0)
|
131
|
-
|
132
|
-
@vectors
|
133
|
-
|
134
|
-
x[0]<=>y[0]
|
135
|
-
else
|
136
|
-
y[1]<=>x[1]
|
137
|
-
end
|
138
|
-
}
|
139
|
-
end
|
134
|
+
cnt, fmt = Hash.new(0), '%d'
|
135
|
+
|
136
|
+
@vectors.each { |v| cnt[v] += 1 }.clear
|
137
|
+
vec = cnt.sort_by { |v, c| [-c, v] }
|
140
138
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
when 'term_abs' then sprintf "%d %s", vec[1], vec[0]
|
145
|
-
when 'term_rel' then sprintf "%6.5f %s", vec[1].to_f/@word_count, vec[0]
|
146
|
-
when 'sto_abs' then sprintf "%s {%d}", vec[0], vec[1]
|
147
|
-
when 'sto_rel' then sprintf "%s {%6.5f}", vec[0], vec[1].to_f/@word_count
|
148
|
-
else sprintf "%s", vec
|
139
|
+
if @sort_method == 'rel'
|
140
|
+
vec.each { |v| v[1] /= @word_count }
|
141
|
+
fmt = '%6.5f'
|
149
142
|
end
|
150
|
-
}.each(&method(:forward))
|
151
143
|
|
152
|
-
|
144
|
+
if @sort_format == 'sto'
|
145
|
+
fmt, @word_count = "%s {#{fmt}}", 0.0
|
146
|
+
else
|
147
|
+
fmt.insert(1, '2$') << ' %1$s'
|
148
|
+
end
|
149
|
+
|
150
|
+
vec.each { |v| forward(fmt % v) }
|
151
|
+
end
|
153
152
|
end
|
154
153
|
|
155
154
|
end
|
@@ -71,19 +71,18 @@ class Lingo
|
|
71
71
|
set_dic
|
72
72
|
end
|
73
73
|
|
74
|
-
def control(cmd,
|
75
|
-
@dic
|
76
|
-
set(key, value)
|
77
|
-
} if cmd == STR_CMD_STATUS
|
74
|
+
def control(cmd, param)
|
75
|
+
report_on(cmd, @dic)
|
78
76
|
end
|
79
77
|
|
80
78
|
def process(obj)
|
81
79
|
if obj.is_a?(Token) && obj.attr == TA_WORD
|
82
80
|
inc('Anzahl gesuchter Wörter')
|
83
|
-
|
84
|
-
|
85
|
-
|
81
|
+
|
82
|
+
obj = @dic.find_word(obj.form)
|
83
|
+
inc('Anzahl gefundener Wörter') unless obj.unknown?
|
86
84
|
end
|
85
|
+
|
87
86
|
forward(obj)
|
88
87
|
end
|
89
88
|
|
data/lib/lingo/attendee.rb
CHANGED
@@ -24,6 +24,8 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'nuggets/string/evaluate'
|
28
|
+
|
27
29
|
class Lingo
|
28
30
|
|
29
31
|
# Lingo ist als universelles Indexierungssystem entworfen worden. Seine Stärke liegt in der einfachen Konfigurierbarkeit für
|
@@ -80,6 +82,8 @@ class Lingo
|
|
80
82
|
STA_TIM_COMMANDS = 'Time to control '
|
81
83
|
STA_TIM_OBJECTS = 'Time to process '
|
82
84
|
|
85
|
+
DEFAULT_SKIP = [TA_PUNCTUATION, TA_OTHER].join(',')
|
86
|
+
|
83
87
|
def initialize(config, lingo)
|
84
88
|
@lingo = lingo
|
85
89
|
|
@@ -129,6 +133,15 @@ class Lingo
|
|
129
133
|
|
130
134
|
private
|
131
135
|
|
136
|
+
def find_word(f, d = @dic, g = @gra)
|
137
|
+
w = d.find_word(f)
|
138
|
+
g && (block_given? ? !yield(w) : w.unknown?) ? g.find_compound(f) : w
|
139
|
+
end
|
140
|
+
|
141
|
+
def report_on(cmd, *rep)
|
142
|
+
rep.each { |r| r.report.each { |q| set(*q) } } if cmd == STR_CMD_STATUS
|
143
|
+
end
|
144
|
+
|
132
145
|
def sta_for(key)
|
133
146
|
%w[NUM TIM].map { |i| self.class.const_get("STA_#{i}_#{key.upcase}") }
|
134
147
|
end
|
@@ -139,9 +152,9 @@ class Lingo
|
|
139
152
|
|
140
153
|
return yield unless @lingo.report_time
|
141
154
|
|
142
|
-
@timer = Time.
|
155
|
+
@timer = Time.now.to_i
|
143
156
|
res = yield
|
144
|
-
add(t, Time.
|
157
|
+
add(t, Time.now.to_i - @timer)
|
145
158
|
res
|
146
159
|
end
|
147
160
|
|
@@ -184,7 +197,7 @@ class Lingo
|
|
184
197
|
})
|
185
198
|
}
|
186
199
|
|
187
|
-
|
200
|
+
warn msg % arg
|
188
201
|
end
|
189
202
|
|
190
203
|
def report_status
|
@@ -192,8 +205,8 @@ class Lingo
|
|
192
205
|
|
193
206
|
msg = "Attendee <%s> was connected from '%s' to '%s' reporting..."
|
194
207
|
|
195
|
-
|
196
|
-
|
208
|
+
warn msg % @config.values_at(*%w[name in out]), nil,
|
209
|
+
report.sort.map! { |k, v| " #{k} = #{v}" }, nil
|
197
210
|
end
|
198
211
|
|
199
212
|
def skip_command
|
@@ -217,8 +230,8 @@ class Lingo
|
|
217
230
|
@config.fetch(key, default)
|
218
231
|
end
|
219
232
|
|
220
|
-
def get_array(key, default = nil)
|
221
|
-
get_key(key, default).split(
|
233
|
+
def get_array(key, default = nil, m = nil)
|
234
|
+
get_key(key, default).split(SEP_RE).tap { |ary| ary.map!(&m) if m }
|
222
235
|
end
|
223
236
|
|
224
237
|
def dictionary(src, mod)
|
@@ -237,6 +250,10 @@ class Lingo
|
|
237
250
|
@gra = grammar(get_array('source'), get_key('mode', 'all'))
|
238
251
|
end
|
239
252
|
|
253
|
+
def warn(*msg)
|
254
|
+
@lingo.warn(*msg)
|
255
|
+
end
|
256
|
+
|
240
257
|
end
|
241
258
|
|
242
259
|
end
|
@@ -252,6 +269,7 @@ require_relative 'attendee/noneword_filter'
|
|
252
269
|
require_relative 'attendee/object_filter'
|
253
270
|
require_relative 'attendee/variator'
|
254
271
|
require_relative 'attendee/sequencer'
|
272
|
+
require_relative 'attendee/stemmer'
|
255
273
|
require_relative 'attendee/synonymer'
|
256
274
|
require_relative 'attendee/text_reader'
|
257
275
|
require_relative 'attendee/text_writer'
|
@@ -28,8 +28,6 @@ class Lingo
|
|
28
28
|
|
29
29
|
class BufferedAttendee < Attendee
|
30
30
|
|
31
|
-
BufferInsert = Struct.new(:position, :object)
|
32
|
-
|
33
31
|
def initialize(config, lingo)
|
34
32
|
@buffer, @inserts = [], []
|
35
33
|
super
|
@@ -38,30 +36,58 @@ class Lingo
|
|
38
36
|
protected
|
39
37
|
|
40
38
|
def process(obj)
|
41
|
-
@buffer
|
39
|
+
@buffer << obj
|
42
40
|
process_buffer if process_buffer?
|
43
41
|
end
|
44
42
|
|
45
43
|
private
|
46
44
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
|
45
|
+
def form_at(index, klass = WordForm)
|
46
|
+
obj = @buffer[index]
|
47
|
+
obj.form if obj.is_a?(klass)
|
48
|
+
end
|
51
49
|
|
50
|
+
def forward_buffer
|
51
|
+
@inserts.sort_by!(&:first).each { |i| @buffer.insert(*i) }.clear
|
52
52
|
@buffer.each(&method(:forward)).clear
|
53
53
|
end
|
54
54
|
|
55
|
+
def forward_number_of_token(len = default = @buffer.size, punct = !default)
|
56
|
+
begin
|
57
|
+
unless @buffer.empty?
|
58
|
+
forward(item = @buffer.delete_at(0))
|
59
|
+
len -= 1 unless punct && item.form == CHAR_PUNCT
|
60
|
+
end
|
61
|
+
end while len > 0
|
62
|
+
end
|
63
|
+
|
64
|
+
def valid_tokens_in_buffer
|
65
|
+
@buffer.count { |item| item.form != CHAR_PUNCT }
|
66
|
+
end
|
67
|
+
|
55
68
|
def process_buffer?
|
56
|
-
|
69
|
+
!instance_variable_defined?(:@expected_tokens_in_buffer) ||
|
70
|
+
valid_tokens_in_buffer >= @expected_tokens_in_buffer
|
57
71
|
end
|
58
72
|
|
59
73
|
def process_buffer
|
60
74
|
raise NotImplementedError
|
61
75
|
end
|
62
76
|
|
63
|
-
def
|
64
|
-
|
77
|
+
def control_multi(cmd, dic = @dic)
|
78
|
+
report_on(cmd, dic)
|
79
|
+
|
80
|
+
if [STR_CMD_RECORD, STR_CMD_EOF].include?(cmd)
|
81
|
+
@eof_handling = true
|
82
|
+
|
83
|
+
while valid_tokens_in_buffer > 1
|
84
|
+
process_buffer
|
85
|
+
end
|
86
|
+
|
87
|
+
forward_number_of_token
|
88
|
+
|
89
|
+
@eof_handling = false
|
90
|
+
end
|
65
91
|
end
|
66
92
|
|
67
93
|
end
|
data/lib/lingo/cachable.rb
CHANGED
@@ -31,26 +31,26 @@ class Lingo
|
|
31
31
|
module Cachable
|
32
32
|
|
33
33
|
def init_cachable
|
34
|
-
@
|
34
|
+
@cachable_hash = Hash.new(false)
|
35
35
|
end
|
36
36
|
|
37
37
|
def hit?(key)
|
38
|
-
@
|
38
|
+
@cachable_hash.has_key?(key)
|
39
39
|
end
|
40
40
|
|
41
|
-
def store(key,
|
42
|
-
@
|
43
|
-
|
41
|
+
def store(key, val)
|
42
|
+
@cachable_hash[key] = cache_value(val)
|
43
|
+
val
|
44
44
|
end
|
45
45
|
|
46
46
|
def retrieve(key)
|
47
|
-
cache_value(@
|
47
|
+
cache_value(@cachable_hash[key])
|
48
48
|
end
|
49
49
|
|
50
50
|
private
|
51
51
|
|
52
|
-
def cache_value(
|
53
|
-
|
52
|
+
def cache_value(val)
|
53
|
+
val.dup unless val.nil?
|
54
54
|
end
|
55
55
|
|
56
56
|
end
|
data/lib/lingo/config.rb
CHANGED
@@ -41,13 +41,12 @@ class Lingo
|
|
41
41
|
load_config('config')
|
42
42
|
|
43
43
|
Array(self['meeting/attendees']).each { |a|
|
44
|
-
r = a['text_reader'] || a['textreader'] or next
|
44
|
+
r = a['text_reader'] || a['textreader'] or next # DEPRECATE textreader
|
45
45
|
|
46
46
|
f = @cli.files
|
47
47
|
|
48
48
|
if i = r['files']
|
49
|
-
r['files'] = i.strip == '$(files)' ?
|
50
|
-
f : i.split(STRING_SEPARATOR_RE)
|
49
|
+
r['files'] = i.strip == '$(files)' ? f : i.split(SEP_RE)
|
51
50
|
elsif !f.empty?
|
52
51
|
r['files'] = f
|
53
52
|
end
|
@@ -57,12 +56,12 @@ class Lingo
|
|
57
56
|
end
|
58
57
|
|
59
58
|
def [](key)
|
60
|
-
key_to_nodes(key).inject(@opts) { |
|
59
|
+
key_to_nodes(key).inject(@opts) { |hash, node| hash[node] }
|
61
60
|
end
|
62
61
|
|
63
|
-
def []=(key,
|
62
|
+
def []=(key, val)
|
64
63
|
nodes = key_to_nodes(key); node = nodes.pop
|
65
|
-
(self[nodes_to_key(nodes)] ||= {})[node] =
|
64
|
+
(self[nodes_to_key(nodes)] ||= {})[node] = val
|
66
65
|
end
|
67
66
|
|
68
67
|
def stdin
|
data/lib/lingo/ctl.rb
CHANGED
@@ -25,7 +25,6 @@
|
|
25
25
|
#++
|
26
26
|
|
27
27
|
require 'optparse'
|
28
|
-
require 'fileutils'
|
29
28
|
|
30
29
|
class Lingo
|
31
30
|
|
@@ -88,7 +87,7 @@ Usage: #{PROG} <command> [arguments] [options]
|
|
88
87
|
#{PROG} [-h|--help] [--version]
|
89
88
|
EOT
|
90
89
|
|
91
|
-
def
|
90
|
+
def ctl
|
92
91
|
parse_options
|
93
92
|
send("do_#{ALIASES[ARGV.shift]}")
|
94
93
|
end
|
@@ -230,7 +229,7 @@ EOT
|
|
230
229
|
end
|
231
230
|
|
232
231
|
def self.ctl
|
233
|
-
Ctl.
|
232
|
+
Ctl.ctl
|
234
233
|
rescue => err
|
235
234
|
raise if $VERBOSE
|
236
235
|
abort "#{err.backtrace.first}: #{err} (#{err.class})"
|
@@ -24,6 +24,8 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'digest/sha1'
|
28
|
+
|
27
29
|
class Lingo
|
28
30
|
|
29
31
|
class Database
|
@@ -39,35 +41,16 @@ class Lingo
|
|
39
41
|
end
|
40
42
|
|
41
43
|
def encode(key, val)
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
# To get a hex representation for a char we just utilize
|
46
|
-
# the quotient and the remainder of division by base 16.
|
47
|
-
q, r = byte.divmod(16)
|
48
|
-
hex << HEX_CHARS[q] << HEX_CHARS[r]
|
49
|
-
}
|
50
|
-
|
51
|
-
[digest(key), hex]
|
44
|
+
[digest(key), crypt(key, val).each_byte.with_object('') { |b, s|
|
45
|
+
b.divmod(16).each { |i| s << HEX_CHARS[i] }
|
46
|
+
}]
|
52
47
|
end
|
53
48
|
|
54
49
|
def decode(key, val)
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# Our hex chars are 2 bytes wide, so we have to keep track
|
61
|
-
# of whether it's the first or the second of the two.
|
62
|
-
if first = !first
|
63
|
-
q = HEX_CHARS.index(byte)
|
64
|
-
else
|
65
|
-
# Now we got both parts, so let's revert the divmod(16)
|
66
|
-
str << q * 16 + HEX_CHARS.index(byte)
|
67
|
-
end
|
68
|
-
}
|
69
|
-
|
70
|
-
crypt(key, str)
|
50
|
+
crypt(key, val.each_byte.each_slice(2).with_object('') { |b, s|
|
51
|
+
q, r = b.map { |i| HEX_CHARS.index(i.chr(ENC)) }
|
52
|
+
s << q * 16 + r
|
53
|
+
})
|
71
54
|
end
|
72
55
|
|
73
56
|
private
|
@@ -32,14 +32,12 @@ class Lingo
|
|
32
32
|
|
33
33
|
module LibCDBStore
|
34
34
|
|
35
|
-
|
35
|
+
Database.register(self, 'cdb')
|
36
36
|
|
37
|
-
|
38
|
-
'.cdb'
|
39
|
-
end
|
37
|
+
private
|
40
38
|
|
41
39
|
def create
|
42
|
-
LibCDB::CDB.open(@
|
40
|
+
LibCDB::CDB.open(@stofile, 'w') { |db|
|
43
41
|
@db = db
|
44
42
|
yield
|
45
43
|
}
|
@@ -48,7 +46,7 @@ class Lingo
|
|
48
46
|
end
|
49
47
|
|
50
48
|
def _open
|
51
|
-
LibCDB::CDB.open(@
|
49
|
+
LibCDB::CDB.open(@stofile)
|
52
50
|
end
|
53
51
|
|
54
52
|
end
|
@@ -32,26 +32,31 @@ class Lingo
|
|
32
32
|
|
33
33
|
module SDBMStore
|
34
34
|
|
35
|
+
Database.register(self, %w[dir pag], -1, false)
|
36
|
+
|
35
37
|
private
|
36
38
|
|
37
39
|
def uptodate?
|
38
|
-
super(@
|
40
|
+
super(@stofile + EXT.last)
|
39
41
|
end
|
40
42
|
|
41
43
|
def _clear
|
42
|
-
File.delete(*Dir["#{@
|
44
|
+
File.delete(*Dir["#{@stofile}{#{EXT.join(',')}}"])
|
43
45
|
end
|
44
46
|
|
45
47
|
def _open
|
46
|
-
SDBM.open(@
|
48
|
+
SDBM.open(@stofile)
|
49
|
+
end
|
50
|
+
|
51
|
+
def _get(key)
|
52
|
+
val = super
|
53
|
+
val && val.encode(ENC)
|
47
54
|
end
|
48
55
|
|
49
56
|
def _set(key, val)
|
50
57
|
if val.length > 950
|
58
|
+
warn "Warning: Entry `#{key}' (#{@srcfile}) too long for SDBM. Truncating..."
|
51
59
|
val = val[0, 950]
|
52
|
-
|
53
|
-
@lingo.warn "Warning: Entry `#{key}' (#{@src_file})" <<
|
54
|
-
'too long for SDBM. Truncating...'
|
55
60
|
end
|
56
61
|
|
57
62
|
super
|
@@ -28,50 +28,10 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Database
|
30
30
|
|
31
|
-
class ShowProgress
|
31
|
+
class ShowProgress < ShowProgress
|
32
32
|
|
33
|
-
def initialize(
|
34
|
-
|
35
|
-
|
36
|
-
# To get the length of the formatted string we have
|
37
|
-
# to actually substitute the placeholder.
|
38
|
-
fmt = ' [%3d%%]'
|
39
|
-
len = (fmt % 0).length
|
40
|
-
|
41
|
-
# Now we know how far to "go back" to
|
42
|
-
# overwrite the formatted string...
|
43
|
-
back = "\b" * len
|
44
|
-
|
45
|
-
@fmt = fmt + back
|
46
|
-
@clr = ' ' * len + back
|
47
|
-
|
48
|
-
print src.instance_variable_get(:@config)['name'], ': '
|
49
|
-
|
50
|
-
@rat, @cnt, @next = max / 100.0, 0, 0
|
51
|
-
print 'convert '
|
52
|
-
step
|
53
|
-
|
54
|
-
yield self
|
55
|
-
|
56
|
-
print "#{@clr}ok\n"
|
57
|
-
end
|
58
|
-
|
59
|
-
def [](value)
|
60
|
-
@cnt = value
|
61
|
-
step if @cnt >= @next
|
62
|
-
end
|
63
|
-
|
64
|
-
private
|
65
|
-
|
66
|
-
def step
|
67
|
-
percent = @cnt / @rat
|
68
|
-
@next = (percent + 1) * @rat
|
69
|
-
|
70
|
-
print @fmt % percent
|
71
|
-
end
|
72
|
-
|
73
|
-
def print(*args)
|
74
|
-
@out.print(*args) if @act
|
33
|
+
def initialize(obj, max, act = true)
|
34
|
+
super(obj, max, obj.instance_variable_get(:@config)['name'], act, 'convert')
|
75
35
|
end
|
76
36
|
|
77
37
|
end
|
@@ -39,18 +39,14 @@ class Lingo
|
|
39
39
|
|
40
40
|
def initialize(id, lingo)
|
41
41
|
super
|
42
|
-
|
43
|
-
@separator = @config.fetch('separator', '*')
|
44
|
-
@line_pattern = Regexp.new('^(' + @legal_word + ')' + Regexp.escape(@separator) + '(' + @legal_word + ')$')
|
42
|
+
@pat = /^(#{@wrd})#{Regexp.escape(@sep ||= '*')}(#{@wrd})$/
|
45
43
|
end
|
46
44
|
|
47
45
|
private
|
48
46
|
|
49
47
|
def convert_line(line, key, val)
|
50
48
|
key, val = key.strip, val.strip
|
51
|
-
val
|
52
|
-
val = [val + '#' + @wordclass]
|
53
|
-
[key, val]
|
49
|
+
[key, %W[#{val unless key == val}##{@def}]]
|
54
50
|
end
|
55
51
|
|
56
52
|
end
|
@@ -40,9 +40,7 @@ class Lingo
|
|
40
40
|
|
41
41
|
def initialize(id, lingo)
|
42
42
|
super
|
43
|
-
|
44
|
-
@separator = @config.fetch('separator', ';')
|
45
|
-
@line_pattern = Regexp.new('^' + @legal_word + '(?:' + Regexp.escape(@separator) + @legal_word + ')*$')
|
43
|
+
@pat = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/
|
46
44
|
end
|
47
45
|
|
48
46
|
def set(db, key, val)
|
@@ -52,8 +50,8 @@ class Lingo
|
|
52
50
|
private
|
53
51
|
|
54
52
|
def convert_line(line, key, val)
|
55
|
-
values = line.split(@
|
56
|
-
[values
|
53
|
+
values = line.split(@sep).each(&:strip!)
|
54
|
+
[values.shift, values]
|
57
55
|
end
|
58
56
|
|
59
57
|
end
|
@@ -38,11 +38,7 @@ class Lingo
|
|
38
38
|
|
39
39
|
def initialize(id, lingo)
|
40
40
|
super
|
41
|
-
|
42
|
-
@separator = @config.fetch('separator', ';')
|
43
|
-
@line_pattern = Regexp.new('^' + @legal_word + '(?:' + Regexp.escape(@separator) + @legal_word + ')*$')
|
44
|
-
|
45
|
-
@idx = -1
|
41
|
+
@pat, @idx = /^#{@wrd}(?:#{Regexp.escape(@sep ||= ';')}#{@wrd})*$/, -1
|
46
42
|
end
|
47
43
|
|
48
44
|
def set(db, key, val)
|
@@ -53,7 +49,7 @@ class Lingo
|
|
53
49
|
private
|
54
50
|
|
55
51
|
def convert_line(line, key, val)
|
56
|
-
[nil, line.split(@
|
52
|
+
[nil, line.split(@sep).each(&:strip!)]
|
57
53
|
end
|
58
54
|
|
59
55
|
end
|