lingo 1.8.1 → 1.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +23 -5
- data/README +1 -1
- data/Rakefile +5 -7
- data/TODO +2 -0
- data/bin/lingo +5 -1
- data/de.lang +1 -1
- data/en/lingo-syn.txt +0 -0
- data/en.lang +2 -1
- data/lib/lingo/attendee/abbreviator.rb +8 -9
- data/lib/lingo/attendee/debugger.rb +5 -4
- data/lib/lingo/attendee/decomposer.rb +8 -3
- data/lib/lingo/attendee/dehyphenizer.rb +19 -63
- data/lib/lingo/attendee/formatter.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +67 -155
- data/lib/lingo/attendee/noneword_filter.rb +16 -9
- data/lib/lingo/attendee/object_filter.rb +1 -1
- data/lib/lingo/attendee/sequencer.rb +32 -63
- data/lib/lingo/attendee/stemmer/porter.rb +343 -0
- data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
- data/lib/lingo/attendee/synonymer.rb +10 -9
- data/lib/lingo/attendee/text_reader.rb +102 -76
- data/lib/lingo/attendee/text_writer.rb +23 -26
- data/lib/lingo/attendee/tokenizer.rb +13 -27
- data/lib/lingo/attendee/variator.rb +26 -66
- data/lib/lingo/attendee/vector_filter.rb +42 -43
- data/lib/lingo/attendee/word_searcher.rb +6 -7
- data/lib/lingo/attendee.rb +25 -7
- data/lib/lingo/buffered_attendee.rb +36 -10
- data/lib/lingo/cachable.rb +8 -8
- data/lib/lingo/config.rb +5 -6
- data/lib/lingo/ctl.rb +2 -3
- data/lib/lingo/database/crypter.rb +9 -26
- data/lib/lingo/database/gdbm_store.rb +3 -5
- data/lib/lingo/database/libcdb_store.rb +4 -6
- data/lib/lingo/database/sdbm_store.rb +11 -6
- data/lib/lingo/database/show_progress.rb +3 -43
- data/lib/lingo/database/source/key_value.rb +2 -6
- data/lib/lingo/database/source/multi_key.rb +3 -5
- data/lib/lingo/database/source/multi_value.rb +2 -6
- data/lib/lingo/database/source/single_word.rb +4 -6
- data/lib/lingo/database/source/word_class.rb +4 -10
- data/lib/lingo/database/source.rb +20 -18
- data/lib/lingo/database.rb +84 -59
- data/lib/lingo/error.rb +57 -1
- data/lib/lingo/language/dictionary.rb +21 -18
- data/lib/lingo/language/grammar.rb +40 -49
- data/lib/lingo/language/lexical.rb +6 -6
- data/lib/lingo/language/lexical_hash.rb +6 -0
- data/lib/lingo/language/word.rb +32 -15
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/language.rb +14 -25
- data/lib/lingo/reportable.rb +12 -10
- data/lib/lingo/show_progress.rb +81 -0
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo.rb +63 -24
- data/lingo-call.cfg +6 -10
- data/lingo.cfg +60 -44
- data/lir.cfg +42 -41
- data/test/attendee/ts_abbreviator.rb +3 -5
- data/test/attendee/ts_decomposer.rb +3 -5
- data/test/attendee/ts_multi_worder.rb +87 -145
- data/test/attendee/ts_noneword_filter.rb +5 -3
- data/test/attendee/ts_object_filter.rb +5 -3
- data/test/attendee/ts_sequencer.rb +3 -5
- data/test/attendee/ts_stemmer.rb +309 -0
- data/test/attendee/ts_synonymer.rb +15 -11
- data/test/attendee/ts_text_reader.rb +12 -15
- data/test/attendee/ts_text_writer.rb +24 -29
- data/test/attendee/ts_tokenizer.rb +9 -7
- data/test/attendee/ts_variator.rb +4 -4
- data/test/attendee/ts_vector_filter.rb +24 -16
- data/test/attendee/ts_word_searcher.rb +20 -36
- data/test/{lir.csv → lir.vec} +0 -0
- data/test/ref/artikel.vec +943 -943
- data/test/ref/artikel.ven +943 -943
- data/test/ref/lir.non +201 -201
- data/test/ref/lir.seq +178 -178
- data/test/ref/lir.syn +49 -49
- data/test/ref/lir.vec +329 -0
- data/test/test_helper.rb +20 -36
- data/test/ts_database.rb +10 -10
- data/test/ts_language.rb +279 -319
- metadata +93 -104
- data/info/Objekte.png +0 -0
- data/info/Typen.png +0 -0
- data/info/database.png +0 -0
- data/info/db_small.png +0 -0
- data/info/download.png +0 -0
- data/info/kerze.png +0 -0
- data/info/language.png +0 -0
- data/info/lingo.png +0 -0
- data/info/logo.png +0 -0
- data/info/meeting.png +0 -0
- data/info/types.png +0 -0
- data/lingo-all.cfg +0 -89
- data/porter/stem.cfg +0 -311
- data/porter/stem.rb +0 -150
- data/test/ref/lir.csv +0 -329
- data/test.cfg +0 -79
@@ -29,7 +29,7 @@ class Lingo
|
|
29
29
|
module Language
|
30
30
|
|
31
31
|
# Die Klasse Grammar beinhaltet grammatikalische Spezialitäten einer Sprache. Derzeit findet die
|
32
|
-
# Kompositumerkennung hier ihren Platz, die mit der Methode
|
32
|
+
# Kompositumerkennung hier ihren Platz, die mit der Methode find_compound aufgerufen werden kann.
|
33
33
|
# Die Klasse Grammar wird genau wie ein Dictionary initialisiert. Das bei der Initialisierung angegebene Wörterbuch ist Grundlage
|
34
34
|
# für die Erkennung der Kompositumteile.
|
35
35
|
|
@@ -40,31 +40,26 @@ class Lingo
|
|
40
40
|
|
41
41
|
HYPHEN_RE = %r{\A(.+)-([^-]+)\z}
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
def self.open(*args)
|
44
|
+
yield grammar = new(*args)
|
45
|
+
ensure
|
46
|
+
grammar.close if grammar
|
47
|
+
end
|
48
|
+
|
46
49
|
def initialize(config, lingo)
|
47
50
|
init_cachable
|
48
51
|
init_reportable
|
49
52
|
|
50
53
|
@dic, @suggestions = Dictionary.new(config, lingo), []
|
51
54
|
|
52
|
-
cfg = lingo.dictionary_config['
|
53
|
-
|
54
|
-
# Ein Wort muss mindestens 8 Zeichen lang sein, damit
|
55
|
-
# überhaupt eine Prüfung stattfindet.
|
56
|
-
@min_word_size = (cfg['min-word-size'] || 8).to_i
|
57
|
-
|
58
|
-
# Die durchschnittliche Länge der Kompositum-Wortteile
|
59
|
-
# muss mindestens 4 Zeichen lang sein, sonst ist es kein
|
60
|
-
# gültiges Kompositum.
|
61
|
-
@min_avg_part_size = (cfg['min-avg-part-size'] || 4).to_i
|
62
|
-
|
63
|
-
# Der kürzeste Kompositum-Wortteil muss mindestens 1 Zeichen lang sein
|
64
|
-
@min_part_size = (cfg['min-part-size'] || 1).to_i
|
55
|
+
cfg = lingo.dictionary_config['compound'] ||
|
56
|
+
lingo.dictionary_config['compositum'] # DEPRECATE compositum
|
65
57
|
|
66
|
-
|
67
|
-
|
58
|
+
{
|
59
|
+
min_word_size: 8, min_avg_part_size: 4, min_part_size: 1, max_parts: 4
|
60
|
+
}.each { |k, v|
|
61
|
+
instance_variable_set("@#{k}", cfg.fetch(k.to_s.tr('_', '-'), v).to_i)
|
62
|
+
}
|
68
63
|
|
69
64
|
# Die Wortklasse eines Kompositum-Wortteils kann separat gekennzeichnet
|
70
65
|
# werden, um sie von Wortklassen normaler Wörter unterscheiden zu
|
@@ -75,7 +70,7 @@ class Lingo
|
|
75
70
|
# Bestimmte Sequenzen können als ungültige Komposita erkannt werden,
|
76
71
|
# z.B. ist ein Kompositum aus zwei Adjetiven kein Kompositum, also
|
77
72
|
# skip-sequence = 'aa'
|
78
|
-
@sequences = cfg.fetch('skip-sequences', []).map(&:downcase)
|
73
|
+
@sequences = cfg.fetch('skip-sequences', []).map!(&:downcase)
|
79
74
|
end
|
80
75
|
|
81
76
|
def close
|
@@ -86,12 +81,12 @@ class Lingo
|
|
86
81
|
super.update(@dic.report)
|
87
82
|
end
|
88
83
|
|
89
|
-
#
|
90
|
-
#
|
84
|
+
# find_compound(str) -> word wenn level=1
|
85
|
+
# find_compound(str) -> [lex, sta] wenn level!=1
|
91
86
|
#
|
92
|
-
#
|
87
|
+
# find_compound arbeitet in verschiedenen Leveln, da die Methode auch rekursiv aufgerufen wird. Ein Level größer 1
|
93
88
|
# entspricht daher einem rekursiven Aufruf
|
94
|
-
def
|
89
|
+
def find_compound(str, level = 1, tail = false)
|
95
90
|
key, top, empty = str.downcase, level == 1, [[], [], '']
|
96
91
|
|
97
92
|
if top && hit?(key)
|
@@ -108,16 +103,21 @@ class Lingo
|
|
108
103
|
|
109
104
|
inc('Komposita geprüft')
|
110
105
|
|
111
|
-
res =
|
112
|
-
|
106
|
+
lex, sta, seq = res = permute_compound(key, level, tail)
|
107
|
+
|
108
|
+
val = !lex.empty? &&
|
109
|
+
sta.size <= @max_parts &&
|
110
|
+
sta.min >= @min_part_size &&
|
111
|
+
str.length / sta.size >= @min_avg_part_size &&
|
112
|
+
(@sequences.empty? || !@sequences.include?(seq))
|
113
113
|
|
114
114
|
if top
|
115
115
|
if val
|
116
116
|
inc('Komposita erkannt')
|
117
117
|
|
118
|
-
com.attr =
|
118
|
+
com.attr = WA_COMPOUND
|
119
119
|
com.lexicals = lex.map { |l|
|
120
|
-
l.attr ==
|
120
|
+
l.attr == LA_COMPOUND ? l :
|
121
121
|
Lexical.new(l.form, l.attr + @append_wc)
|
122
122
|
}
|
123
123
|
end
|
@@ -128,14 +128,14 @@ class Lingo
|
|
128
128
|
end
|
129
129
|
end
|
130
130
|
|
131
|
-
#
|
132
|
-
def
|
133
|
-
return
|
131
|
+
# permute_compound( _aString_ ) -> [lex, sta, seq]
|
132
|
+
def permute_compound(str, level = 1, tail = false)
|
133
|
+
return test_compound($1, '-', $2, level, tail) if str =~ HYPHEN_RE
|
134
134
|
|
135
135
|
sug, len = @suggestions[level] ||= [], str.length
|
136
136
|
|
137
137
|
1.upto(len - 1) { |i|
|
138
|
-
res =
|
138
|
+
res = test_compound(str[0, i], '', str[i, len], level, tail)
|
139
139
|
|
140
140
|
unless (lex = res.first).empty?
|
141
141
|
return res unless lex.last.attr == LA_TAKEITASIS
|
@@ -146,10 +146,10 @@ class Lingo
|
|
146
146
|
sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear }
|
147
147
|
end
|
148
148
|
|
149
|
-
#
|
149
|
+
# test_compound() -> [lex, sta, seq]
|
150
150
|
#
|
151
151
|
# Testet einen definiert zerlegten String auf Kompositum
|
152
|
-
def
|
152
|
+
def test_compound(fstr, infix, bstr, level = 1, tail = false)
|
153
153
|
sta, seq, empty = [fstr.length, bstr.length], %w[? ?], [[], [], '']
|
154
154
|
|
155
155
|
if !(blex = @dic.select_with_suffix(bstr)).sort!.empty?
|
@@ -159,10 +159,10 @@ class Lingo
|
|
159
159
|
# 2. Word w/ infix, unless tail part
|
160
160
|
bform, seq[1] = bstr, blex.first.attr
|
161
161
|
elsif infix == '-'
|
162
|
-
blex, bsta, bseq =
|
162
|
+
blex, bsta, bseq = find_compound(bstr, level + 1, tail)
|
163
163
|
|
164
164
|
if !blex.sort!.empty?
|
165
|
-
# 3.
|
165
|
+
# 3. Compound
|
166
166
|
bform, seq[1], sta[1..-1] = blex.first.form, bseq, bsta
|
167
167
|
else
|
168
168
|
# 4. Take it as is
|
@@ -176,10 +176,10 @@ class Lingo
|
|
176
176
|
# 1. Word w/ infix
|
177
177
|
fform, seq[0] = fstr, flex.first.attr
|
178
178
|
else
|
179
|
-
flex, fsta, fseq =
|
179
|
+
flex, fsta, fseq = find_compound(fstr, level + 1, true)
|
180
180
|
|
181
181
|
if !flex.sort!.empty?
|
182
|
-
# 2.
|
182
|
+
# 2. Compound
|
183
183
|
fform, seq[0], sta[0..0] = flex.first.form, fseq, fsta
|
184
184
|
elsif infix == '-'
|
185
185
|
# 3. Take it as is
|
@@ -189,21 +189,12 @@ class Lingo
|
|
189
189
|
end
|
190
190
|
end
|
191
191
|
|
192
|
-
flex.concat(blex).delete_if { |l| l.attr ==
|
193
|
-
push(Lexical.new(fform + infix + bform,
|
192
|
+
flex.concat(blex).delete_if { |l| l.attr == LA_COMPOUND }.
|
193
|
+
push(Lexical.new(fform + infix + bform, LA_COMPOUND)).sort!
|
194
194
|
|
195
195
|
[flex, sta, seq.join]
|
196
196
|
end
|
197
197
|
|
198
|
-
private
|
199
|
-
|
200
|
-
def valid?(str, sta, seq)
|
201
|
-
sta.size <= @max_parts &&
|
202
|
-
sta.sort.first >= @min_part_size &&
|
203
|
-
str.length / sta.size >= @min_avg_part_size &&
|
204
|
-
(@sequences.empty? || !@sequences.include?(seq))
|
205
|
-
end
|
206
|
-
|
207
198
|
end
|
208
199
|
|
209
200
|
end
|
@@ -39,14 +39,14 @@ class Lingo
|
|
39
39
|
def <=>(other)
|
40
40
|
return 1 unless other.is_a?(self.class)
|
41
41
|
|
42
|
-
|
42
|
+
a1, a2 = attr, other.attr
|
43
|
+
|
44
|
+
if a1 == a2
|
43
45
|
form <=> other.form
|
44
46
|
else
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
a ? b ? b <=> a : -1 : b ? 1 : attr <=> other.attr
|
47
|
+
a1.empty? ? 1 : a2.empty? ? -1 : begin
|
48
|
+
i1, i2 = [a1, a2].map(&LA_SORTORDER.method(:index))
|
49
|
+
i1 ? i2 ? i2 <=> i1 : -1 : i2 ? 1 : a1 <=> a2
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
data/lib/lingo/language/word.rb
CHANGED
@@ -33,8 +33,16 @@ class Lingo
|
|
33
33
|
|
34
34
|
class Word < WordForm
|
35
35
|
|
36
|
-
|
37
|
-
|
36
|
+
class << self
|
37
|
+
|
38
|
+
def new_lexicals(form, attr, lex)
|
39
|
+
new(form, attr) << lex
|
40
|
+
end
|
41
|
+
|
42
|
+
def new_lexical(form, attr, lex_attr)
|
43
|
+
new_lexicals(form, attr, Lexical.new(form, lex_attr))
|
44
|
+
end
|
45
|
+
|
38
46
|
end
|
39
47
|
|
40
48
|
# Exakte Representation der originären Zeichenkette, so wie sie im Satz
|
@@ -56,23 +64,32 @@ class Lingo
|
|
56
64
|
end
|
57
65
|
|
58
66
|
def lexicals(compound_parts = true)
|
59
|
-
if !compound_parts && attr ==
|
60
|
-
@lexicals.select { |lex| lex.attr ==
|
67
|
+
if !compound_parts && attr == WA_COMPOUND
|
68
|
+
@lexicals.select { |lex| lex.attr == LA_COMPOUND }
|
61
69
|
else
|
62
70
|
@lexicals
|
63
71
|
end
|
64
72
|
end
|
65
73
|
|
66
|
-
def lexicals=(
|
67
|
-
if
|
68
|
-
@lexicals =
|
74
|
+
def lexicals=(lex)
|
75
|
+
if lex.is_a?(Array)
|
76
|
+
@lexicals = lex.sort.uniq
|
69
77
|
else
|
70
|
-
raise TypeError, "wrong argument type #{
|
78
|
+
raise TypeError, "wrong argument type #{lex.class} (expected Array)"
|
71
79
|
end
|
72
80
|
end
|
73
81
|
|
82
|
+
def add_lexicals(lex)
|
83
|
+
@lexicals.concat(lex)
|
84
|
+
|
85
|
+
@lexicals.sort!
|
86
|
+
@lexicals.uniq!
|
87
|
+
|
88
|
+
self
|
89
|
+
end
|
90
|
+
|
74
91
|
def attrs(compound_parts = true)
|
75
|
-
lexicals(compound_parts).map
|
92
|
+
lexicals(compound_parts).map(&:attr)
|
76
93
|
end
|
77
94
|
|
78
95
|
def parts
|
@@ -100,15 +117,15 @@ class Lingo
|
|
100
117
|
end
|
101
118
|
|
102
119
|
def compo_form
|
103
|
-
if attr ==
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
120
|
+
get_class(LA_COMPOUND).first if attr == WA_COMPOUND
|
121
|
+
end
|
122
|
+
|
123
|
+
def full_compound?
|
124
|
+
attr == WA_COMPOUND && get_class('x+').empty?
|
108
125
|
end
|
109
126
|
|
110
127
|
def <<(*other)
|
111
|
-
lexicals.concat(other.flatten)
|
128
|
+
lexicals.concat(other.tap(&:flatten!))
|
112
129
|
self
|
113
130
|
end
|
114
131
|
|
data/lib/lingo/language.rb
CHANGED
@@ -50,7 +50,7 @@ class Lingo
|
|
50
50
|
# Status, wenn das Word nicht gefunden werden konnte
|
51
51
|
WA_UNKNOWN = '?'
|
52
52
|
# Wort ist als Kompositum erkannt worden
|
53
|
-
|
53
|
+
WA_COMPOUND = 'KOM'
|
54
54
|
# Wort ist eine Mehrwortgruppe
|
55
55
|
WA_MULTIWORD = 'MUL'
|
56
56
|
# Wort ist eine Mehrwortgruppe
|
@@ -58,31 +58,20 @@ class Lingo
|
|
58
58
|
# Word ist unbekannt, jedoch Teil einer Mehrwortgruppe
|
59
59
|
WA_UNKMULPART = 'MU?'
|
60
60
|
|
61
|
-
LA_SUBSTANTIV = 's'
|
62
|
-
LA_ADJEKTIV = 'a'
|
63
|
-
LA_VERB = 'v'
|
64
|
-
LA_EIGENNAME = 'e'
|
65
|
-
LA_KOMPOSITUM = 'k'
|
66
|
-
LA_MULTIWORD = 'm'
|
67
|
-
LA_SEQUENCE = 'q'
|
68
|
-
LA_WORTFORM = 'w'
|
69
|
-
LA_SYNONYM = 'y'
|
70
|
-
LA_STOPWORD = 't'
|
71
|
-
LA_TAKEITASIS = 'x'
|
72
|
-
LA_UNKNOWN = '?'
|
73
|
-
|
74
61
|
LA_SORTORDER = [
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
62
|
+
LA_SEQUENCE = 'q',
|
63
|
+
LA_MULTIWORD = 'm',
|
64
|
+
LA_COMPOUND = 'k',
|
65
|
+
LA_NOUN = 's',
|
66
|
+
LA_VERB = 'v',
|
67
|
+
LA_ADJECTIVE = 'a',
|
68
|
+
LA_NAME = 'e',
|
69
|
+
LA_WORDFORM = 'w',
|
70
|
+
LA_STOPWORD = 't',
|
71
|
+
LA_TAKEITASIS = 'x',
|
72
|
+
LA_SYNONYM = 'y',
|
73
|
+
LA_STEM = 'z',
|
74
|
+
LA_UNKNOWN = '?'
|
86
75
|
].reverse.join
|
87
76
|
|
88
77
|
end
|
data/lib/lingo/reportable.rb
CHANGED
@@ -31,27 +31,29 @@ class Lingo
|
|
31
31
|
module Reportable
|
32
32
|
|
33
33
|
def init_reportable(prefix = nil)
|
34
|
-
@
|
34
|
+
@reportable_hash = Hash.new(0)
|
35
|
+
@reportable_prefix = prefix ? "#{prefix}: " : ''
|
35
36
|
end
|
36
37
|
|
37
|
-
def inc(
|
38
|
-
@
|
38
|
+
def inc(key)
|
39
|
+
@reportable_hash[key] += 1
|
39
40
|
end
|
40
41
|
|
41
|
-
def add(
|
42
|
-
@
|
42
|
+
def add(key, val)
|
43
|
+
@reportable_hash[key] += val
|
43
44
|
end
|
44
45
|
|
45
|
-
def set(
|
46
|
-
@
|
46
|
+
def set(key, val)
|
47
|
+
@reportable_hash[key] = val
|
47
48
|
end
|
48
49
|
|
49
|
-
def get(
|
50
|
-
@
|
50
|
+
def get(key)
|
51
|
+
@reportable_hash[key]
|
51
52
|
end
|
52
53
|
|
53
54
|
def report
|
54
|
-
|
55
|
+
q = @reportable_prefix
|
56
|
+
@reportable_hash.each_with_object({}) { |(k, v), r| r["#{q}#{k}"] = v }
|
55
57
|
end
|
56
58
|
|
57
59
|
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
class ShowProgress
|
30
|
+
|
31
|
+
def initialize(obj, max, name = nil, doit = true, text = 'progress')
|
32
|
+
return yield self unless max && doit
|
33
|
+
|
34
|
+
@out = obj.instance_variable_get(:@lingo).config.stderr
|
35
|
+
|
36
|
+
# To get the length of the formatted string we have
|
37
|
+
# to actually substitute the placeholder.
|
38
|
+
fmt = ' [%3d%%]'
|
39
|
+
len = (fmt % 0).length
|
40
|
+
|
41
|
+
# Now we know how far to "go back" to
|
42
|
+
# overwrite the formatted string...
|
43
|
+
back = "\b" * len
|
44
|
+
|
45
|
+
@fmt = fmt + back
|
46
|
+
@clr = ' ' * len + back
|
47
|
+
|
48
|
+
print name, ': ' if name
|
49
|
+
|
50
|
+
@rat, @cnt, @next = max / 100.0, 0, 0
|
51
|
+
print text
|
52
|
+
step
|
53
|
+
|
54
|
+
yield self
|
55
|
+
|
56
|
+
print "#{@clr} done.\n"
|
57
|
+
end
|
58
|
+
|
59
|
+
def [](value)
|
60
|
+
if defined?(@cnt)
|
61
|
+
@cnt = value
|
62
|
+
step if @cnt >= @next
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def step
|
69
|
+
percent = @cnt / @rat
|
70
|
+
@next = (percent + 1) * @rat
|
71
|
+
|
72
|
+
print @fmt % percent if percent.finite?
|
73
|
+
end
|
74
|
+
|
75
|
+
def print(*args)
|
76
|
+
@out.print(*args)
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
data/lib/lingo/version.rb
CHANGED
data/lib/lingo.rb
CHANGED
@@ -25,6 +25,8 @@
|
|
25
25
|
#++
|
26
26
|
|
27
27
|
require 'stringio'
|
28
|
+
require 'pathname'
|
29
|
+
require 'fileutils'
|
28
30
|
require 'benchmark'
|
29
31
|
require 'nuggets/file/ext'
|
30
32
|
require 'nuggets/env/user_home'
|
@@ -43,7 +45,8 @@ class Lingo
|
|
43
45
|
CURR = ENV['LINGO_CURR'] || '.'
|
44
46
|
|
45
47
|
# The search path for Lingo dictionary and configuration files.
|
46
|
-
PATH = ENV['LINGO_PATH']
|
48
|
+
PATH = ENV['LINGO_PATH'].nil? ? [CURR, HOME, BASE] :
|
49
|
+
ENV['LINGO_PATH'].split(File::PATH_SEPARATOR)
|
47
50
|
|
48
51
|
ENV['LINGO_PLUGIN_PATH'] ||= File.join(HOME, 'plugins')
|
49
52
|
|
@@ -59,7 +62,7 @@ class Lingo
|
|
59
62
|
# Default encoding
|
60
63
|
ENC = 'UTF-8'.freeze
|
61
64
|
|
62
|
-
|
65
|
+
SEP_RE = %r{[; ,|]}
|
63
66
|
|
64
67
|
class << self
|
65
68
|
|
@@ -79,7 +82,7 @@ class Lingo
|
|
79
82
|
glob = File.join('??', glob) if type == :dict
|
80
83
|
|
81
84
|
[].tap { |list| walk(path, options) { |dir|
|
82
|
-
Dir[File.join(dir, glob)].sort
|
85
|
+
Dir[File.join(dir, glob)].sort!.each { |file|
|
83
86
|
pn = Pathname.new(file)
|
84
87
|
list << realpath_for(pn, path) if pn.file?
|
85
88
|
}
|
@@ -110,29 +113,69 @@ class Lingo
|
|
110
113
|
File.join(options_for(type)[:dir], basename(type, file))
|
111
114
|
end
|
112
115
|
|
116
|
+
def append_path(*path)
|
117
|
+
include_path(path)
|
118
|
+
end
|
119
|
+
|
120
|
+
def prepend_path(*path)
|
121
|
+
include_path(path, true)
|
122
|
+
end
|
123
|
+
|
124
|
+
def get_const(name, klass = self)
|
125
|
+
klass.const_get(name.camelcase)
|
126
|
+
rescue NameError
|
127
|
+
raise NameNotFoundError.new(klass, name)
|
128
|
+
end
|
129
|
+
|
113
130
|
private
|
114
131
|
|
132
|
+
def include_path(path, pre = false)
|
133
|
+
PATH.insert(pre ? 0 : -1, *path.map!(&:to_s))
|
134
|
+
end
|
135
|
+
|
115
136
|
def find_file(file, path, options)
|
116
|
-
|
137
|
+
if glob = options[:glob]
|
138
|
+
file = File.chomp_ext(file)
|
139
|
+
options[:ext] ||= '*'
|
140
|
+
end
|
141
|
+
|
142
|
+
file = file_with_ext(file, options)
|
143
|
+
pn = Pathname.new(file).cleanpath
|
117
144
|
|
118
145
|
if pn.relative?
|
119
146
|
walk(path, options) { |dir|
|
120
147
|
pn2 = pn.expand_path(dir)
|
121
|
-
|
148
|
+
ex = pn2.exist?
|
149
|
+
|
150
|
+
pn2 = Pathname.glob(pn2).first if glob && !ex
|
151
|
+
pn = pn2 and break if glob ? pn2 : ex
|
122
152
|
}
|
123
153
|
end
|
124
154
|
|
125
155
|
realpath_for(pn, path)
|
156
|
+
rescue Errno::ENOENT
|
157
|
+
raise unless relax = options[:relax]
|
158
|
+
relax.respond_to?(:[]) ? relax[file] : file
|
126
159
|
end
|
127
160
|
|
128
161
|
def find_store(file, path, options)
|
129
|
-
base = basename(:dict, find(:dict, file, path)
|
162
|
+
base = basename(:dict, find(:dict, file, path) {
|
163
|
+
raise SourceFileNotFoundError.new(nil, find_file(file, path,
|
164
|
+
options.merge(glob: true, relax: lambda { |_file|
|
165
|
+
raise SourceFileNotFoundError.new(file, _file)
|
166
|
+
})
|
167
|
+
))
|
168
|
+
})
|
130
169
|
|
131
170
|
walk(path.reverse, options, false) { |dir|
|
132
171
|
Pathname.new(dir).ascend { |i|
|
133
|
-
|
134
|
-
|
135
|
-
|
172
|
+
begin
|
173
|
+
stat = i.stat
|
174
|
+
|
175
|
+
break true if stat.file? || !stat.writable?
|
176
|
+
return File.chomp_ext(File.join(dir, base))
|
177
|
+
rescue Errno::ENOENT
|
178
|
+
end
|
136
179
|
}
|
137
180
|
}
|
138
181
|
|
@@ -148,7 +191,7 @@ class Lingo
|
|
148
191
|
end
|
149
192
|
|
150
193
|
def path_for(options)
|
151
|
-
options[:path] || PATH
|
194
|
+
options[:path] || PATH
|
152
195
|
end
|
153
196
|
|
154
197
|
def file_with_ext(file, options)
|
@@ -223,30 +266,25 @@ class Lingo
|
|
223
266
|
|
224
267
|
list.each { |hash|
|
225
268
|
# {'attendee' => {'name'=>'Attendee', 'in'=>'nase', 'out'=>'ohr', 'param'=>'hase'}}
|
226
|
-
cfg = hash.values.first.merge('name' => hash.keys.first.camelcase)
|
269
|
+
cfg = hash.values.first.merge('name' => name = hash.keys.first.camelcase)
|
227
270
|
|
228
271
|
%w[in out].each { |key| (cfg[key] ||= '').downcase! }
|
229
272
|
|
230
|
-
cfg['in'] = last_link
|
231
|
-
cfg['out'] = "
|
273
|
+
cfg['in'] = last_link if cfg['in'].empty?
|
274
|
+
cfg['out'] = "auto_link-#{auto_link += 1}" if cfg['out'].empty?
|
232
275
|
last_link = cfg['out']
|
233
276
|
|
234
|
-
|
235
|
-
cfg.update(data) if data
|
277
|
+
cfg.update(config["language/attendees/#{name.downcase}"] || {})
|
236
278
|
|
237
|
-
attendee = Attendee.const_get(
|
238
|
-
@attendees << attendee
|
279
|
+
@attendees << attendee = Attendee.const_get(name).new(cfg, self)
|
239
280
|
|
240
|
-
|
241
|
-
|
242
|
-
}
|
243
|
-
cfg['out'].split(STRING_SEPARATOR_RE).each { |theme|
|
244
|
-
supplier[theme] << attendee
|
281
|
+
{ 'in' => subscriber, 'out' => supplier }.each { |key, target|
|
282
|
+
cfg[key].split(SEP_RE).each { |ch| target[ch] << attendee }
|
245
283
|
}
|
246
284
|
}
|
247
285
|
|
248
|
-
supplier.each { |
|
249
|
-
|
286
|
+
supplier.each { |ch, attendees| attendees.each { |att|
|
287
|
+
att.add_subscriber(subscriber[ch])
|
250
288
|
} }
|
251
289
|
end
|
252
290
|
|
@@ -283,6 +321,7 @@ require_relative 'lingo/core_ext'
|
|
283
321
|
require_relative 'lingo/cachable'
|
284
322
|
require_relative 'lingo/reportable'
|
285
323
|
require_relative 'lingo/agenda_item'
|
324
|
+
require_relative 'lingo/show_progress'
|
286
325
|
require_relative 'lingo/database'
|
287
326
|
require_relative 'lingo/language'
|
288
327
|
require_relative 'lingo/attendee'
|