lingo 1.8.1 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +23 -5
- data/README +1 -1
- data/Rakefile +5 -7
- data/TODO +2 -0
- data/bin/lingo +5 -1
- data/de.lang +1 -1
- data/en/lingo-syn.txt +0 -0
- data/en.lang +2 -1
- data/lib/lingo/attendee/abbreviator.rb +8 -9
- data/lib/lingo/attendee/debugger.rb +5 -4
- data/lib/lingo/attendee/decomposer.rb +8 -3
- data/lib/lingo/attendee/dehyphenizer.rb +19 -63
- data/lib/lingo/attendee/formatter.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +67 -155
- data/lib/lingo/attendee/noneword_filter.rb +16 -9
- data/lib/lingo/attendee/object_filter.rb +1 -1
- data/lib/lingo/attendee/sequencer.rb +32 -63
- data/lib/lingo/attendee/stemmer/porter.rb +343 -0
- data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
- data/lib/lingo/attendee/synonymer.rb +10 -9
- data/lib/lingo/attendee/text_reader.rb +102 -76
- data/lib/lingo/attendee/text_writer.rb +23 -26
- data/lib/lingo/attendee/tokenizer.rb +13 -27
- data/lib/lingo/attendee/variator.rb +26 -66
- data/lib/lingo/attendee/vector_filter.rb +42 -43
- data/lib/lingo/attendee/word_searcher.rb +6 -7
- data/lib/lingo/attendee.rb +25 -7
- data/lib/lingo/buffered_attendee.rb +36 -10
- data/lib/lingo/cachable.rb +8 -8
- data/lib/lingo/config.rb +5 -6
- data/lib/lingo/ctl.rb +2 -3
- data/lib/lingo/database/crypter.rb +9 -26
- data/lib/lingo/database/gdbm_store.rb +3 -5
- data/lib/lingo/database/libcdb_store.rb +4 -6
- data/lib/lingo/database/sdbm_store.rb +11 -6
- data/lib/lingo/database/show_progress.rb +3 -43
- data/lib/lingo/database/source/key_value.rb +2 -6
- data/lib/lingo/database/source/multi_key.rb +3 -5
- data/lib/lingo/database/source/multi_value.rb +2 -6
- data/lib/lingo/database/source/single_word.rb +4 -6
- data/lib/lingo/database/source/word_class.rb +4 -10
- data/lib/lingo/database/source.rb +20 -18
- data/lib/lingo/database.rb +84 -59
- data/lib/lingo/error.rb +57 -1
- data/lib/lingo/language/dictionary.rb +21 -18
- data/lib/lingo/language/grammar.rb +40 -49
- data/lib/lingo/language/lexical.rb +6 -6
- data/lib/lingo/language/lexical_hash.rb +6 -0
- data/lib/lingo/language/word.rb +32 -15
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/language.rb +14 -25
- data/lib/lingo/reportable.rb +12 -10
- data/lib/lingo/show_progress.rb +81 -0
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo.rb +63 -24
- data/lingo-call.cfg +6 -10
- data/lingo.cfg +60 -44
- data/lir.cfg +42 -41
- data/test/attendee/ts_abbreviator.rb +3 -5
- data/test/attendee/ts_decomposer.rb +3 -5
- data/test/attendee/ts_multi_worder.rb +87 -145
- data/test/attendee/ts_noneword_filter.rb +5 -3
- data/test/attendee/ts_object_filter.rb +5 -3
- data/test/attendee/ts_sequencer.rb +3 -5
- data/test/attendee/ts_stemmer.rb +309 -0
- data/test/attendee/ts_synonymer.rb +15 -11
- data/test/attendee/ts_text_reader.rb +12 -15
- data/test/attendee/ts_text_writer.rb +24 -29
- data/test/attendee/ts_tokenizer.rb +9 -7
- data/test/attendee/ts_variator.rb +4 -4
- data/test/attendee/ts_vector_filter.rb +24 -16
- data/test/attendee/ts_word_searcher.rb +20 -36
- data/test/{lir.csv → lir.vec} +0 -0
- data/test/ref/artikel.vec +943 -943
- data/test/ref/artikel.ven +943 -943
- data/test/ref/lir.non +201 -201
- data/test/ref/lir.seq +178 -178
- data/test/ref/lir.syn +49 -49
- data/test/ref/lir.vec +329 -0
- data/test/test_helper.rb +20 -36
- data/test/ts_database.rb +10 -10
- data/test/ts_language.rb +279 -319
- metadata +93 -104
- data/info/Objekte.png +0 -0
- data/info/Typen.png +0 -0
- data/info/database.png +0 -0
- data/info/db_small.png +0 -0
- data/info/download.png +0 -0
- data/info/kerze.png +0 -0
- data/info/language.png +0 -0
- data/info/lingo.png +0 -0
- data/info/logo.png +0 -0
- data/info/meeting.png +0 -0
- data/info/types.png +0 -0
- data/lingo-all.cfg +0 -89
- data/porter/stem.cfg +0 -311
- data/porter/stem.rb +0 -150
- data/test/ref/lir.csv +0 -329
- data/test.cfg +0 -79
@@ -29,7 +29,7 @@ class Lingo
|
|
29
29
|
module Language
|
30
30
|
|
31
31
|
# Die Klasse Grammar beinhaltet grammatikalische Spezialitäten einer Sprache. Derzeit findet die
|
32
|
-
# Kompositumerkennung hier ihren Platz, die mit der Methode
|
32
|
+
# Kompositumerkennung hier ihren Platz, die mit der Methode find_compound aufgerufen werden kann.
|
33
33
|
# Die Klasse Grammar wird genau wie ein Dictionary initialisiert. Das bei der Initialisierung angegebene Wörterbuch ist Grundlage
|
34
34
|
# für die Erkennung der Kompositumteile.
|
35
35
|
|
@@ -40,31 +40,26 @@ class Lingo
|
|
40
40
|
|
41
41
|
HYPHEN_RE = %r{\A(.+)-([^-]+)\z}
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
def self.open(*args)
|
44
|
+
yield grammar = new(*args)
|
45
|
+
ensure
|
46
|
+
grammar.close if grammar
|
47
|
+
end
|
48
|
+
|
46
49
|
def initialize(config, lingo)
|
47
50
|
init_cachable
|
48
51
|
init_reportable
|
49
52
|
|
50
53
|
@dic, @suggestions = Dictionary.new(config, lingo), []
|
51
54
|
|
52
|
-
cfg = lingo.dictionary_config['
|
53
|
-
|
54
|
-
# Ein Wort muss mindestens 8 Zeichen lang sein, damit
|
55
|
-
# überhaupt eine Prüfung stattfindet.
|
56
|
-
@min_word_size = (cfg['min-word-size'] || 8).to_i
|
57
|
-
|
58
|
-
# Die durchschnittliche Länge der Kompositum-Wortteile
|
59
|
-
# muss mindestens 4 Zeichen lang sein, sonst ist es kein
|
60
|
-
# gültiges Kompositum.
|
61
|
-
@min_avg_part_size = (cfg['min-avg-part-size'] || 4).to_i
|
62
|
-
|
63
|
-
# Der kürzeste Kompositum-Wortteil muss mindestens 1 Zeichen lang sein
|
64
|
-
@min_part_size = (cfg['min-part-size'] || 1).to_i
|
55
|
+
cfg = lingo.dictionary_config['compound'] ||
|
56
|
+
lingo.dictionary_config['compositum'] # DEPRECATE compositum
|
65
57
|
|
66
|
-
|
67
|
-
|
58
|
+
{
|
59
|
+
min_word_size: 8, min_avg_part_size: 4, min_part_size: 1, max_parts: 4
|
60
|
+
}.each { |k, v|
|
61
|
+
instance_variable_set("@#{k}", cfg.fetch(k.to_s.tr('_', '-'), v).to_i)
|
62
|
+
}
|
68
63
|
|
69
64
|
# Die Wortklasse eines Kompositum-Wortteils kann separat gekennzeichnet
|
70
65
|
# werden, um sie von Wortklassen normaler Wörter unterscheiden zu
|
@@ -75,7 +70,7 @@ class Lingo
|
|
75
70
|
# Bestimmte Sequenzen können als ungültige Komposita erkannt werden,
|
76
71
|
# z.B. ist ein Kompositum aus zwei Adjetiven kein Kompositum, also
|
77
72
|
# skip-sequence = 'aa'
|
78
|
-
@sequences = cfg.fetch('skip-sequences', []).map(&:downcase)
|
73
|
+
@sequences = cfg.fetch('skip-sequences', []).map!(&:downcase)
|
79
74
|
end
|
80
75
|
|
81
76
|
def close
|
@@ -86,12 +81,12 @@ class Lingo
|
|
86
81
|
super.update(@dic.report)
|
87
82
|
end
|
88
83
|
|
89
|
-
#
|
90
|
-
#
|
84
|
+
# find_compound(str) -> word wenn level=1
|
85
|
+
# find_compound(str) -> [lex, sta] wenn level!=1
|
91
86
|
#
|
92
|
-
#
|
87
|
+
# find_compound arbeitet in verschiedenen Leveln, da die Methode auch rekursiv aufgerufen wird. Ein Level größer 1
|
93
88
|
# entspricht daher einem rekursiven Aufruf
|
94
|
-
def
|
89
|
+
def find_compound(str, level = 1, tail = false)
|
95
90
|
key, top, empty = str.downcase, level == 1, [[], [], '']
|
96
91
|
|
97
92
|
if top && hit?(key)
|
@@ -108,16 +103,21 @@ class Lingo
|
|
108
103
|
|
109
104
|
inc('Komposita geprüft')
|
110
105
|
|
111
|
-
res =
|
112
|
-
|
106
|
+
lex, sta, seq = res = permute_compound(key, level, tail)
|
107
|
+
|
108
|
+
val = !lex.empty? &&
|
109
|
+
sta.size <= @max_parts &&
|
110
|
+
sta.min >= @min_part_size &&
|
111
|
+
str.length / sta.size >= @min_avg_part_size &&
|
112
|
+
(@sequences.empty? || !@sequences.include?(seq))
|
113
113
|
|
114
114
|
if top
|
115
115
|
if val
|
116
116
|
inc('Komposita erkannt')
|
117
117
|
|
118
|
-
com.attr =
|
118
|
+
com.attr = WA_COMPOUND
|
119
119
|
com.lexicals = lex.map { |l|
|
120
|
-
l.attr ==
|
120
|
+
l.attr == LA_COMPOUND ? l :
|
121
121
|
Lexical.new(l.form, l.attr + @append_wc)
|
122
122
|
}
|
123
123
|
end
|
@@ -128,14 +128,14 @@ class Lingo
|
|
128
128
|
end
|
129
129
|
end
|
130
130
|
|
131
|
-
#
|
132
|
-
def
|
133
|
-
return
|
131
|
+
# permute_compound( _aString_ ) -> [lex, sta, seq]
|
132
|
+
def permute_compound(str, level = 1, tail = false)
|
133
|
+
return test_compound($1, '-', $2, level, tail) if str =~ HYPHEN_RE
|
134
134
|
|
135
135
|
sug, len = @suggestions[level] ||= [], str.length
|
136
136
|
|
137
137
|
1.upto(len - 1) { |i|
|
138
|
-
res =
|
138
|
+
res = test_compound(str[0, i], '', str[i, len], level, tail)
|
139
139
|
|
140
140
|
unless (lex = res.first).empty?
|
141
141
|
return res unless lex.last.attr == LA_TAKEITASIS
|
@@ -146,10 +146,10 @@ class Lingo
|
|
146
146
|
sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear }
|
147
147
|
end
|
148
148
|
|
149
|
-
#
|
149
|
+
# test_compound() -> [lex, sta, seq]
|
150
150
|
#
|
151
151
|
# Testet einen definiert zerlegten String auf Kompositum
|
152
|
-
def
|
152
|
+
def test_compound(fstr, infix, bstr, level = 1, tail = false)
|
153
153
|
sta, seq, empty = [fstr.length, bstr.length], %w[? ?], [[], [], '']
|
154
154
|
|
155
155
|
if !(blex = @dic.select_with_suffix(bstr)).sort!.empty?
|
@@ -159,10 +159,10 @@ class Lingo
|
|
159
159
|
# 2. Word w/ infix, unless tail part
|
160
160
|
bform, seq[1] = bstr, blex.first.attr
|
161
161
|
elsif infix == '-'
|
162
|
-
blex, bsta, bseq =
|
162
|
+
blex, bsta, bseq = find_compound(bstr, level + 1, tail)
|
163
163
|
|
164
164
|
if !blex.sort!.empty?
|
165
|
-
# 3.
|
165
|
+
# 3. Compound
|
166
166
|
bform, seq[1], sta[1..-1] = blex.first.form, bseq, bsta
|
167
167
|
else
|
168
168
|
# 4. Take it as is
|
@@ -176,10 +176,10 @@ class Lingo
|
|
176
176
|
# 1. Word w/ infix
|
177
177
|
fform, seq[0] = fstr, flex.first.attr
|
178
178
|
else
|
179
|
-
flex, fsta, fseq =
|
179
|
+
flex, fsta, fseq = find_compound(fstr, level + 1, true)
|
180
180
|
|
181
181
|
if !flex.sort!.empty?
|
182
|
-
# 2.
|
182
|
+
# 2. Compound
|
183
183
|
fform, seq[0], sta[0..0] = flex.first.form, fseq, fsta
|
184
184
|
elsif infix == '-'
|
185
185
|
# 3. Take it as is
|
@@ -189,21 +189,12 @@ class Lingo
|
|
189
189
|
end
|
190
190
|
end
|
191
191
|
|
192
|
-
flex.concat(blex).delete_if { |l| l.attr ==
|
193
|
-
push(Lexical.new(fform + infix + bform,
|
192
|
+
flex.concat(blex).delete_if { |l| l.attr == LA_COMPOUND }.
|
193
|
+
push(Lexical.new(fform + infix + bform, LA_COMPOUND)).sort!
|
194
194
|
|
195
195
|
[flex, sta, seq.join]
|
196
196
|
end
|
197
197
|
|
198
|
-
private
|
199
|
-
|
200
|
-
def valid?(str, sta, seq)
|
201
|
-
sta.size <= @max_parts &&
|
202
|
-
sta.sort.first >= @min_part_size &&
|
203
|
-
str.length / sta.size >= @min_avg_part_size &&
|
204
|
-
(@sequences.empty? || !@sequences.include?(seq))
|
205
|
-
end
|
206
|
-
|
207
198
|
end
|
208
199
|
|
209
200
|
end
|
@@ -39,14 +39,14 @@ class Lingo
|
|
39
39
|
def <=>(other)
|
40
40
|
return 1 unless other.is_a?(self.class)
|
41
41
|
|
42
|
-
|
42
|
+
a1, a2 = attr, other.attr
|
43
|
+
|
44
|
+
if a1 == a2
|
43
45
|
form <=> other.form
|
44
46
|
else
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
a ? b ? b <=> a : -1 : b ? 1 : attr <=> other.attr
|
47
|
+
a1.empty? ? 1 : a2.empty? ? -1 : begin
|
48
|
+
i1, i2 = [a1, a2].map(&LA_SORTORDER.method(:index))
|
49
|
+
i1 ? i2 ? i2 <=> i1 : -1 : i2 ? 1 : a1 <=> a2
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
data/lib/lingo/language/word.rb
CHANGED
@@ -33,8 +33,16 @@ class Lingo
|
|
33
33
|
|
34
34
|
class Word < WordForm
|
35
35
|
|
36
|
-
|
37
|
-
|
36
|
+
class << self
|
37
|
+
|
38
|
+
def new_lexicals(form, attr, lex)
|
39
|
+
new(form, attr) << lex
|
40
|
+
end
|
41
|
+
|
42
|
+
def new_lexical(form, attr, lex_attr)
|
43
|
+
new_lexicals(form, attr, Lexical.new(form, lex_attr))
|
44
|
+
end
|
45
|
+
|
38
46
|
end
|
39
47
|
|
40
48
|
# Exakte Representation der originären Zeichenkette, so wie sie im Satz
|
@@ -56,23 +64,32 @@ class Lingo
|
|
56
64
|
end
|
57
65
|
|
58
66
|
def lexicals(compound_parts = true)
|
59
|
-
if !compound_parts && attr ==
|
60
|
-
@lexicals.select { |lex| lex.attr ==
|
67
|
+
if !compound_parts && attr == WA_COMPOUND
|
68
|
+
@lexicals.select { |lex| lex.attr == LA_COMPOUND }
|
61
69
|
else
|
62
70
|
@lexicals
|
63
71
|
end
|
64
72
|
end
|
65
73
|
|
66
|
-
def lexicals=(
|
67
|
-
if
|
68
|
-
@lexicals =
|
74
|
+
def lexicals=(lex)
|
75
|
+
if lex.is_a?(Array)
|
76
|
+
@lexicals = lex.sort.uniq
|
69
77
|
else
|
70
|
-
raise TypeError, "wrong argument type #{
|
78
|
+
raise TypeError, "wrong argument type #{lex.class} (expected Array)"
|
71
79
|
end
|
72
80
|
end
|
73
81
|
|
82
|
+
def add_lexicals(lex)
|
83
|
+
@lexicals.concat(lex)
|
84
|
+
|
85
|
+
@lexicals.sort!
|
86
|
+
@lexicals.uniq!
|
87
|
+
|
88
|
+
self
|
89
|
+
end
|
90
|
+
|
74
91
|
def attrs(compound_parts = true)
|
75
|
-
lexicals(compound_parts).map
|
92
|
+
lexicals(compound_parts).map(&:attr)
|
76
93
|
end
|
77
94
|
|
78
95
|
def parts
|
@@ -100,15 +117,15 @@ class Lingo
|
|
100
117
|
end
|
101
118
|
|
102
119
|
def compo_form
|
103
|
-
if attr ==
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
120
|
+
get_class(LA_COMPOUND).first if attr == WA_COMPOUND
|
121
|
+
end
|
122
|
+
|
123
|
+
def full_compound?
|
124
|
+
attr == WA_COMPOUND && get_class('x+').empty?
|
108
125
|
end
|
109
126
|
|
110
127
|
def <<(*other)
|
111
|
-
lexicals.concat(other.flatten)
|
128
|
+
lexicals.concat(other.tap(&:flatten!))
|
112
129
|
self
|
113
130
|
end
|
114
131
|
|
data/lib/lingo/language.rb
CHANGED
@@ -50,7 +50,7 @@ class Lingo
|
|
50
50
|
# Status, wenn das Word nicht gefunden werden konnte
|
51
51
|
WA_UNKNOWN = '?'
|
52
52
|
# Wort ist als Kompositum erkannt worden
|
53
|
-
|
53
|
+
WA_COMPOUND = 'KOM'
|
54
54
|
# Wort ist eine Mehrwortgruppe
|
55
55
|
WA_MULTIWORD = 'MUL'
|
56
56
|
# Wort ist eine Mehrwortgruppe
|
@@ -58,31 +58,20 @@ class Lingo
|
|
58
58
|
# Word ist unbekannt, jedoch Teil einer Mehrwortgruppe
|
59
59
|
WA_UNKMULPART = 'MU?'
|
60
60
|
|
61
|
-
LA_SUBSTANTIV = 's'
|
62
|
-
LA_ADJEKTIV = 'a'
|
63
|
-
LA_VERB = 'v'
|
64
|
-
LA_EIGENNAME = 'e'
|
65
|
-
LA_KOMPOSITUM = 'k'
|
66
|
-
LA_MULTIWORD = 'm'
|
67
|
-
LA_SEQUENCE = 'q'
|
68
|
-
LA_WORTFORM = 'w'
|
69
|
-
LA_SYNONYM = 'y'
|
70
|
-
LA_STOPWORD = 't'
|
71
|
-
LA_TAKEITASIS = 'x'
|
72
|
-
LA_UNKNOWN = '?'
|
73
|
-
|
74
61
|
LA_SORTORDER = [
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
62
|
+
LA_SEQUENCE = 'q',
|
63
|
+
LA_MULTIWORD = 'm',
|
64
|
+
LA_COMPOUND = 'k',
|
65
|
+
LA_NOUN = 's',
|
66
|
+
LA_VERB = 'v',
|
67
|
+
LA_ADJECTIVE = 'a',
|
68
|
+
LA_NAME = 'e',
|
69
|
+
LA_WORDFORM = 'w',
|
70
|
+
LA_STOPWORD = 't',
|
71
|
+
LA_TAKEITASIS = 'x',
|
72
|
+
LA_SYNONYM = 'y',
|
73
|
+
LA_STEM = 'z',
|
74
|
+
LA_UNKNOWN = '?'
|
86
75
|
].reverse.join
|
87
76
|
|
88
77
|
end
|
data/lib/lingo/reportable.rb
CHANGED
@@ -31,27 +31,29 @@ class Lingo
|
|
31
31
|
module Reportable
|
32
32
|
|
33
33
|
def init_reportable(prefix = nil)
|
34
|
-
@
|
34
|
+
@reportable_hash = Hash.new(0)
|
35
|
+
@reportable_prefix = prefix ? "#{prefix}: " : ''
|
35
36
|
end
|
36
37
|
|
37
|
-
def inc(
|
38
|
-
@
|
38
|
+
def inc(key)
|
39
|
+
@reportable_hash[key] += 1
|
39
40
|
end
|
40
41
|
|
41
|
-
def add(
|
42
|
-
@
|
42
|
+
def add(key, val)
|
43
|
+
@reportable_hash[key] += val
|
43
44
|
end
|
44
45
|
|
45
|
-
def set(
|
46
|
-
@
|
46
|
+
def set(key, val)
|
47
|
+
@reportable_hash[key] = val
|
47
48
|
end
|
48
49
|
|
49
|
-
def get(
|
50
|
-
@
|
50
|
+
def get(key)
|
51
|
+
@reportable_hash[key]
|
51
52
|
end
|
52
53
|
|
53
54
|
def report
|
54
|
-
|
55
|
+
q = @reportable_prefix
|
56
|
+
@reportable_hash.each_with_object({}) { |(k, v), r| r["#{q}#{k}"] = v }
|
55
57
|
end
|
56
58
|
|
57
59
|
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
class ShowProgress
|
30
|
+
|
31
|
+
def initialize(obj, max, name = nil, doit = true, text = 'progress')
|
32
|
+
return yield self unless max && doit
|
33
|
+
|
34
|
+
@out = obj.instance_variable_get(:@lingo).config.stderr
|
35
|
+
|
36
|
+
# To get the length of the formatted string we have
|
37
|
+
# to actually substitute the placeholder.
|
38
|
+
fmt = ' [%3d%%]'
|
39
|
+
len = (fmt % 0).length
|
40
|
+
|
41
|
+
# Now we know how far to "go back" to
|
42
|
+
# overwrite the formatted string...
|
43
|
+
back = "\b" * len
|
44
|
+
|
45
|
+
@fmt = fmt + back
|
46
|
+
@clr = ' ' * len + back
|
47
|
+
|
48
|
+
print name, ': ' if name
|
49
|
+
|
50
|
+
@rat, @cnt, @next = max / 100.0, 0, 0
|
51
|
+
print text
|
52
|
+
step
|
53
|
+
|
54
|
+
yield self
|
55
|
+
|
56
|
+
print "#{@clr} done.\n"
|
57
|
+
end
|
58
|
+
|
59
|
+
def [](value)
|
60
|
+
if defined?(@cnt)
|
61
|
+
@cnt = value
|
62
|
+
step if @cnt >= @next
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def step
|
69
|
+
percent = @cnt / @rat
|
70
|
+
@next = (percent + 1) * @rat
|
71
|
+
|
72
|
+
print @fmt % percent if percent.finite?
|
73
|
+
end
|
74
|
+
|
75
|
+
def print(*args)
|
76
|
+
@out.print(*args)
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
data/lib/lingo/version.rb
CHANGED
data/lib/lingo.rb
CHANGED
@@ -25,6 +25,8 @@
|
|
25
25
|
#++
|
26
26
|
|
27
27
|
require 'stringio'
|
28
|
+
require 'pathname'
|
29
|
+
require 'fileutils'
|
28
30
|
require 'benchmark'
|
29
31
|
require 'nuggets/file/ext'
|
30
32
|
require 'nuggets/env/user_home'
|
@@ -43,7 +45,8 @@ class Lingo
|
|
43
45
|
CURR = ENV['LINGO_CURR'] || '.'
|
44
46
|
|
45
47
|
# The search path for Lingo dictionary and configuration files.
|
46
|
-
PATH = ENV['LINGO_PATH']
|
48
|
+
PATH = ENV['LINGO_PATH'].nil? ? [CURR, HOME, BASE] :
|
49
|
+
ENV['LINGO_PATH'].split(File::PATH_SEPARATOR)
|
47
50
|
|
48
51
|
ENV['LINGO_PLUGIN_PATH'] ||= File.join(HOME, 'plugins')
|
49
52
|
|
@@ -59,7 +62,7 @@ class Lingo
|
|
59
62
|
# Default encoding
|
60
63
|
ENC = 'UTF-8'.freeze
|
61
64
|
|
62
|
-
|
65
|
+
SEP_RE = %r{[; ,|]}
|
63
66
|
|
64
67
|
class << self
|
65
68
|
|
@@ -79,7 +82,7 @@ class Lingo
|
|
79
82
|
glob = File.join('??', glob) if type == :dict
|
80
83
|
|
81
84
|
[].tap { |list| walk(path, options) { |dir|
|
82
|
-
Dir[File.join(dir, glob)].sort
|
85
|
+
Dir[File.join(dir, glob)].sort!.each { |file|
|
83
86
|
pn = Pathname.new(file)
|
84
87
|
list << realpath_for(pn, path) if pn.file?
|
85
88
|
}
|
@@ -110,29 +113,69 @@ class Lingo
|
|
110
113
|
File.join(options_for(type)[:dir], basename(type, file))
|
111
114
|
end
|
112
115
|
|
116
|
+
def append_path(*path)
|
117
|
+
include_path(path)
|
118
|
+
end
|
119
|
+
|
120
|
+
def prepend_path(*path)
|
121
|
+
include_path(path, true)
|
122
|
+
end
|
123
|
+
|
124
|
+
def get_const(name, klass = self)
|
125
|
+
klass.const_get(name.camelcase)
|
126
|
+
rescue NameError
|
127
|
+
raise NameNotFoundError.new(klass, name)
|
128
|
+
end
|
129
|
+
|
113
130
|
private
|
114
131
|
|
132
|
+
def include_path(path, pre = false)
|
133
|
+
PATH.insert(pre ? 0 : -1, *path.map!(&:to_s))
|
134
|
+
end
|
135
|
+
|
115
136
|
def find_file(file, path, options)
|
116
|
-
|
137
|
+
if glob = options[:glob]
|
138
|
+
file = File.chomp_ext(file)
|
139
|
+
options[:ext] ||= '*'
|
140
|
+
end
|
141
|
+
|
142
|
+
file = file_with_ext(file, options)
|
143
|
+
pn = Pathname.new(file).cleanpath
|
117
144
|
|
118
145
|
if pn.relative?
|
119
146
|
walk(path, options) { |dir|
|
120
147
|
pn2 = pn.expand_path(dir)
|
121
|
-
|
148
|
+
ex = pn2.exist?
|
149
|
+
|
150
|
+
pn2 = Pathname.glob(pn2).first if glob && !ex
|
151
|
+
pn = pn2 and break if glob ? pn2 : ex
|
122
152
|
}
|
123
153
|
end
|
124
154
|
|
125
155
|
realpath_for(pn, path)
|
156
|
+
rescue Errno::ENOENT
|
157
|
+
raise unless relax = options[:relax]
|
158
|
+
relax.respond_to?(:[]) ? relax[file] : file
|
126
159
|
end
|
127
160
|
|
128
161
|
def find_store(file, path, options)
|
129
|
-
base = basename(:dict, find(:dict, file, path)
|
162
|
+
base = basename(:dict, find(:dict, file, path) {
|
163
|
+
raise SourceFileNotFoundError.new(nil, find_file(file, path,
|
164
|
+
options.merge(glob: true, relax: lambda { |_file|
|
165
|
+
raise SourceFileNotFoundError.new(file, _file)
|
166
|
+
})
|
167
|
+
))
|
168
|
+
})
|
130
169
|
|
131
170
|
walk(path.reverse, options, false) { |dir|
|
132
171
|
Pathname.new(dir).ascend { |i|
|
133
|
-
|
134
|
-
|
135
|
-
|
172
|
+
begin
|
173
|
+
stat = i.stat
|
174
|
+
|
175
|
+
break true if stat.file? || !stat.writable?
|
176
|
+
return File.chomp_ext(File.join(dir, base))
|
177
|
+
rescue Errno::ENOENT
|
178
|
+
end
|
136
179
|
}
|
137
180
|
}
|
138
181
|
|
@@ -148,7 +191,7 @@ class Lingo
|
|
148
191
|
end
|
149
192
|
|
150
193
|
def path_for(options)
|
151
|
-
options[:path] || PATH
|
194
|
+
options[:path] || PATH
|
152
195
|
end
|
153
196
|
|
154
197
|
def file_with_ext(file, options)
|
@@ -223,30 +266,25 @@ class Lingo
|
|
223
266
|
|
224
267
|
list.each { |hash|
|
225
268
|
# {'attendee' => {'name'=>'Attendee', 'in'=>'nase', 'out'=>'ohr', 'param'=>'hase'}}
|
226
|
-
cfg = hash.values.first.merge('name' => hash.keys.first.camelcase)
|
269
|
+
cfg = hash.values.first.merge('name' => name = hash.keys.first.camelcase)
|
227
270
|
|
228
271
|
%w[in out].each { |key| (cfg[key] ||= '').downcase! }
|
229
272
|
|
230
|
-
cfg['in'] = last_link
|
231
|
-
cfg['out'] = "
|
273
|
+
cfg['in'] = last_link if cfg['in'].empty?
|
274
|
+
cfg['out'] = "auto_link-#{auto_link += 1}" if cfg['out'].empty?
|
232
275
|
last_link = cfg['out']
|
233
276
|
|
234
|
-
|
235
|
-
cfg.update(data) if data
|
277
|
+
cfg.update(config["language/attendees/#{name.downcase}"] || {})
|
236
278
|
|
237
|
-
attendee = Attendee.const_get(
|
238
|
-
@attendees << attendee
|
279
|
+
@attendees << attendee = Attendee.const_get(name).new(cfg, self)
|
239
280
|
|
240
|
-
|
241
|
-
|
242
|
-
}
|
243
|
-
cfg['out'].split(STRING_SEPARATOR_RE).each { |theme|
|
244
|
-
supplier[theme] << attendee
|
281
|
+
{ 'in' => subscriber, 'out' => supplier }.each { |key, target|
|
282
|
+
cfg[key].split(SEP_RE).each { |ch| target[ch] << attendee }
|
245
283
|
}
|
246
284
|
}
|
247
285
|
|
248
|
-
supplier.each { |
|
249
|
-
|
286
|
+
supplier.each { |ch, attendees| attendees.each { |att|
|
287
|
+
att.add_subscriber(subscriber[ch])
|
250
288
|
} }
|
251
289
|
end
|
252
290
|
|
@@ -283,6 +321,7 @@ require_relative 'lingo/core_ext'
|
|
283
321
|
require_relative 'lingo/cachable'
|
284
322
|
require_relative 'lingo/reportable'
|
285
323
|
require_relative 'lingo/agenda_item'
|
324
|
+
require_relative 'lingo/show_progress'
|
286
325
|
require_relative 'lingo/database'
|
287
326
|
require_relative 'lingo/language'
|
288
327
|
require_relative 'lingo/attendee'
|