lingo 1.8.4.2 → 1.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +413 -325
- data/README +380 -131
- data/Rakefile +19 -21
- data/de/lingo-abk.txt +15 -17
- data/de/lingo-dic.txt +20210 -20659
- data/de/lingo-mul.txt +5 -13
- data/de/lingo-syn.txt +5 -8
- data/de/test_dic.txt +2 -0
- data/de/test_gen.txt +8 -0
- data/de/{test_mul2.txt → test_mu2.txt} +0 -0
- data/de/{test_singleword.txt → test_sgw.txt} +0 -0
- data/de/user-dic.txt +5 -7
- data/de.lang +64 -49
- data/en/lingo-dic.txt +6398 -6404
- data/en/lingo-irr.txt +2 -3
- data/en/lingo-mul.txt +6 -7
- data/en/lingo-wdn.txt +881 -1762
- data/en/user-dic.txt +2 -5
- data/en.lang +39 -39
- data/lib/lingo/app.rb +10 -6
- data/lib/lingo/attendee/abbreviator.rb +1 -0
- data/lib/lingo/attendee/decomposer.rb +2 -1
- data/lib/lingo/attendee/multi_worder.rb +5 -6
- data/lib/lingo/attendee/stemmer.rb +1 -1
- data/lib/lingo/attendee/synonymer.rb +4 -2
- data/lib/lingo/attendee/text_reader.rb +77 -57
- data/lib/lingo/attendee/text_writer.rb +1 -1
- data/lib/lingo/attendee/tokenizer.rb +101 -50
- data/lib/lingo/attendee/variator.rb +2 -1
- data/lib/lingo/attendee/vector_filter.rb +28 -6
- data/lib/lingo/attendee/word_searcher.rb +2 -1
- data/lib/lingo/attendee.rb +8 -4
- data/lib/lingo/call.rb +7 -3
- data/lib/lingo/cli.rb +8 -16
- data/lib/lingo/config.rb +11 -6
- data/lib/lingo/ctl.rb +54 -3
- data/lib/lingo/database/crypter.rb +8 -14
- data/lib/lingo/database/hash_store.rb +1 -1
- data/lib/lingo/database/{show_progress.rb → progress.rb} +7 -8
- data/lib/lingo/database/source/key_value.rb +6 -5
- data/lib/lingo/database/source/multi_key.rb +5 -2
- data/lib/lingo/database/source/multi_value.rb +6 -4
- data/lib/lingo/database/source/single_word.rb +2 -3
- data/lib/lingo/database/source/word_class.rb +24 -5
- data/lib/lingo/database/source.rb +5 -3
- data/lib/lingo/database.rb +102 -41
- data/lib/lingo/error.rb +24 -2
- data/lib/lingo/language/dictionary.rb +26 -54
- data/lib/lingo/language/grammar.rb +19 -23
- data/lib/lingo/language/lexical.rb +5 -1
- data/lib/lingo/language/lexical_hash.rb +7 -12
- data/lib/lingo/language/token.rb +10 -1
- data/lib/lingo/language/word.rb +35 -23
- data/lib/lingo/language/word_form.rb +5 -4
- data/lib/lingo/{show_progress.rb → progress.rb} +43 -30
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/srv/public/.gitkeep +0 -0
- data/lib/lingo/srv.rb +11 -6
- data/lib/lingo/version.rb +2 -2
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/lib/lingo/web/views/index.erb +4 -4
- data/lib/lingo/web.rb +4 -6
- data/lib/lingo.rb +4 -12
- data/lingo.cfg +1 -1
- data/lir.cfg +1 -1
- data/ru/lingo-dic.txt +33473 -2113
- data/ru/lingo-mul.txt +8430 -1913
- data/ru/lingo-syn.txt +1634 -0
- data/ru/user-dic.txt +6 -0
- data/ru.lang +49 -47
- data/spec/spec_helper.rb +4 -0
- data/test/attendee/ts_decomposer.rb +2 -2
- data/test/attendee/ts_synonymer.rb +3 -3
- data/test/attendee/ts_tokenizer.rb +215 -2
- data/test/attendee/ts_variator.rb +2 -2
- data/test/attendee/ts_word_searcher.rb +10 -6
- data/test/ref/artikel.seq +2 -2
- data/test/ref/artikel.vec +5 -5
- data/test/ref/artikel.ven +11 -11
- data/test/ref/artikel.ver +11 -11
- data/test/ref/lir.seq +13 -13
- data/test/ref/lir.vec +31 -31
- data/test/test_helper.rb +19 -5
- data/test/ts_database.rb +206 -77
- data/test/ts_language.rb +86 -26
- metadata +93 -49
- data/.rspec +0 -1
- data/de/test_syn2.txt +0 -1
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -82,33 +82,47 @@ class Lingo
|
|
82
82
|
|
83
83
|
CHAR, DIGIT = Char::CHAR, Char::DIGIT
|
84
84
|
|
85
|
+
PROTO = '(?:news|https?|ftps?)://'
|
86
|
+
|
85
87
|
RULES = [
|
86
|
-
['WIKI', /^=+.+=+$/],
|
87
88
|
['SPAC', /^\s+/],
|
88
|
-
['
|
89
|
-
['WIKI', /^\[\[.+?\]\]/],
|
89
|
+
['WIKI', /^=+.+=+|^__[A-Z]+__/],
|
90
90
|
['NUMS', /^[+-]?(?:\d{4,}|\d{1,3}(?:\.\d{3,3})*)(?:\.|(?:,\d+)?%?)/],
|
91
|
-
['URLS', /^(?:
|
91
|
+
['URLS', /^(?:www\.|mailto:|#{PROTO}|\S+?[._]\S+?@\S+?\.)\S+/],
|
92
92
|
['ABRV', /^(?:(?:(?:#{CHAR})+\.)+)(?:#{CHAR})+/],
|
93
93
|
['WORD', /^(?:#{CHAR}|#{DIGIT}|-)+/],
|
94
|
-
['PUNC', /^[!,.:;?¡¿]
|
95
|
-
|
96
|
-
|
94
|
+
['PUNC', /^[!,.:;?¡¿]+/]
|
95
|
+
]
|
96
|
+
|
97
|
+
OTHER = [
|
98
|
+
['OTHR', /^["$#%&'()*+\/<=>@\[\\\]^_{|}~¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾×÷]/],
|
99
|
+
['HELP', /^\S+/]
|
97
100
|
]
|
98
101
|
|
102
|
+
NESTS = {
|
103
|
+
'HTML' => ['<', '>'],
|
104
|
+
'WIKI:VARIABLE' => ['{{{', '}}}'],
|
105
|
+
'WIKI:TEMPLATE' => ['{{', '}}'],
|
106
|
+
'WIKI:LINK_INT' => ['[[', ']]'],
|
107
|
+
'WIKI:LINK_EXT' => [/^\[\s*#{PROTO}/, ']']
|
108
|
+
}
|
109
|
+
|
99
110
|
class << self
|
100
111
|
|
101
112
|
def rule(name)
|
102
113
|
RULES.assoc(name)
|
103
114
|
end
|
104
115
|
|
116
|
+
def rules(name)
|
117
|
+
RULES.select { |rule,| rule == name }
|
118
|
+
end
|
119
|
+
|
105
120
|
def delete(*names)
|
106
|
-
names.
|
121
|
+
names.map { |name| rules(name).each { |rule| RULES.delete(rule) } }
|
107
122
|
end
|
108
123
|
|
109
|
-
def replace(name, expr)
|
110
|
-
rule = rule
|
111
|
-
rule[1] = block_given? ? yield(rule[1]) : expr
|
124
|
+
def replace(name, expr = nil)
|
125
|
+
rules(name).each { |rule| rule[1] = expr || yield(*rule) }
|
112
126
|
end
|
113
127
|
|
114
128
|
def insert(*rules)
|
@@ -152,22 +166,37 @@ class Lingo
|
|
152
166
|
skip << 'HTML' unless @tags
|
153
167
|
skip << 'WIKI' unless @wiki
|
154
168
|
|
155
|
-
@rules = RULES.
|
169
|
+
[@rules = RULES.dup, @nests = NESTS.dup].each { |hash|
|
170
|
+
hash.delete_if { |name, _| skip.include?(Token.clean(name)) }
|
171
|
+
}
|
172
|
+
|
173
|
+
@nest, nest_re = [], []
|
156
174
|
|
157
|
-
@
|
175
|
+
@nests.each { |name, re|
|
176
|
+
re.map!.with_index { |r, i| r.is_a?(Regexp) ?
|
177
|
+
r : /^#{'.*?' if i > 0}#{Regexp.escape(r)}/ }
|
178
|
+
|
179
|
+
nest_re << "(?<#{name}>#{Regexp.new(
|
180
|
+
re[0].source.sub(/^\^/, ''), re[0].options)})"
|
181
|
+
}
|
182
|
+
|
183
|
+
@nest_re = /^(?<_>.*?)(?:#{nest_re.join('|')})/
|
184
|
+
|
185
|
+
@filename = @linenum = nil
|
158
186
|
end
|
159
187
|
|
160
188
|
def control(cmd, param)
|
161
189
|
case cmd
|
162
|
-
when STR_CMD_FILE then @filename = param
|
163
|
-
when STR_CMD_LIR then @filename = nil
|
164
|
-
when
|
190
|
+
when STR_CMD_FILE then @filename, @linenum = param, 1
|
191
|
+
when STR_CMD_LIR then @filename, @linenum = nil, nil
|
192
|
+
when STR_CMD_EOL then @linenum += 1 if @linenum
|
193
|
+
when STR_CMD_EOF then @nest.clear
|
165
194
|
end
|
166
195
|
end
|
167
196
|
|
168
197
|
def process(obj)
|
169
198
|
if obj.is_a?(String)
|
170
|
-
tokenize(obj)
|
199
|
+
tokenize(obj)
|
171
200
|
forward(STR_CMD_EOL, @filename) if @filename
|
172
201
|
else
|
173
202
|
forward(obj)
|
@@ -178,44 +207,66 @@ class Lingo
|
|
178
207
|
|
179
208
|
# tokenize("Eine Zeile.") -> [:Eine/WORD:, :Zeile/WORD:, :./PUNC:]
|
180
209
|
def tokenize(line)
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
if line =~ /^[^\[\]]*\]\]/
|
192
|
-
yield $&, @cont
|
193
|
-
line, @cont = $', nil
|
194
|
-
else
|
195
|
-
yield line, @cont
|
196
|
-
return
|
197
|
-
end
|
198
|
-
when nil
|
199
|
-
if @tags && line =~ /<[^<>]*$/
|
200
|
-
yield $&, @cont = 'HTML'
|
201
|
-
line = $`
|
202
|
-
end
|
203
|
-
|
204
|
-
if @wiki && line =~ /\[\[[^\[\]]*$/
|
205
|
-
yield $&, @cont = 'WIKI'
|
206
|
-
line = $`
|
207
|
-
end
|
210
|
+
@nest.empty? ? tokenize_line(line) : tokenize_nest(line)
|
211
|
+
rescue => err
|
212
|
+
raise err if err.is_a?(TokenizeError)
|
213
|
+
raise TokenizeError.new(line, @filename, @linenum, err)
|
214
|
+
end
|
215
|
+
|
216
|
+
def tokenize_line(line)
|
217
|
+
while (length = line.length) > 0 && tokenize_rule(line) { |rest|
|
218
|
+
length == rest.length ? break : line = rest
|
219
|
+
}
|
208
220
|
end
|
209
221
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
222
|
+
tokenize_open(line) unless line.empty?
|
223
|
+
end
|
224
|
+
|
225
|
+
def tokenize_rule(line, rules = @rules)
|
226
|
+
rules.find { |name, expr|
|
227
|
+
next unless line =~ expr
|
228
|
+
forward_token($&, name) if name != 'SPAC' || @space
|
229
|
+
yield $'
|
215
230
|
}
|
231
|
+
end
|
232
|
+
|
233
|
+
def tokenize_nest(line)
|
234
|
+
mdo = @nest_re.match(line)
|
235
|
+
mdc = @nests[@nest.last].last.match(line)
|
236
|
+
|
237
|
+
if mdo && (!mdc || mdo[0].length < mdc[0].length)
|
238
|
+
forward_token(mdo[:_], @nest.last) unless mdo[:_].empty?
|
239
|
+
|
240
|
+
nest = @nests.keys.find { |name| mdo[name] }
|
241
|
+
forward_nest(mdo[nest], mdo.post_match, nest)
|
242
|
+
elsif mdc
|
243
|
+
forward_token(mdc[0], @nest.pop)
|
244
|
+
tokenize(mdc.post_match)
|
245
|
+
else
|
246
|
+
forward_token(line, @nest.last)
|
216
247
|
end
|
217
248
|
end
|
218
249
|
|
250
|
+
def tokenize_open(line)
|
251
|
+
@nests.each { |nest, (open_re, _)|
|
252
|
+
next unless line =~ open_re
|
253
|
+
return forward_nest($&, $', nest)
|
254
|
+
}
|
255
|
+
|
256
|
+
tokenize_rule(line, OTHER) { |rest| line = rest }
|
257
|
+
tokenize(line)
|
258
|
+
end
|
259
|
+
|
260
|
+
def forward_nest(match, rest, nest)
|
261
|
+
forward_token(match, nest)
|
262
|
+
@nest << nest
|
263
|
+
tokenize(rest)
|
264
|
+
end
|
265
|
+
|
266
|
+
def forward_token(*args)
|
267
|
+
forward(Token.new(*args))
|
268
|
+
end
|
269
|
+
|
219
270
|
end
|
220
271
|
|
221
272
|
end
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -90,6 +90,7 @@ class Lingo
|
|
90
90
|
end
|
91
91
|
|
92
92
|
def control(cmd, param)
|
93
|
+
# can control
|
93
94
|
end
|
94
95
|
|
95
96
|
def process(obj)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -86,10 +86,15 @@ class Lingo
|
|
86
86
|
def init
|
87
87
|
if @debug = get_key('debug', false)
|
88
88
|
@prompt = get_key('prompt', 'lex:) ')
|
89
|
+
@preamble = get_key('preamble', true)
|
89
90
|
else
|
90
91
|
@lex = get_re('lexicals', '[sy]')
|
91
92
|
@skip = get_array('skip', DEFAULT_SKIP, :upcase)
|
92
93
|
|
94
|
+
@dict = get_key('dict', false)
|
95
|
+
@norm = get_key('norm', false) if @dict
|
96
|
+
@dict = Database::Source::WordClass::DEFAULT_SEPARATOR if @dict == true
|
97
|
+
|
93
98
|
@src = get_key('src', false)
|
94
99
|
@src = DEFAULT_SRC_SEP if @src == true
|
95
100
|
|
@@ -112,15 +117,32 @@ class Lingo
|
|
112
117
|
|
113
118
|
def process(obj)
|
114
119
|
if @debug
|
120
|
+
forward((@preamble = nil; @lingo.config.to_h.to_yaml)) if @preamble
|
115
121
|
forward("#{@prompt} #{obj.inspect}") if eval(@debug)
|
116
122
|
elsif obj.is_a?(Word) && !@skip.include?(obj.attr)
|
117
123
|
@word_count += 1
|
118
124
|
|
119
|
-
|
120
|
-
vec =
|
121
|
-
|
122
|
-
@
|
123
|
-
|
125
|
+
if @dict
|
126
|
+
vec, sep = [], Database::Source::WordClass::GENDER_SEPARATOR
|
127
|
+
|
128
|
+
obj.get_class(@lex).each { |lex|
|
129
|
+
str = "#{lex.form} ##{lex.attr}"
|
130
|
+
str << sep << lex.gender if lex.gender
|
131
|
+
vec << str
|
132
|
+
}
|
133
|
+
|
134
|
+
unless vec.empty?
|
135
|
+
wrd = @norm ? obj.lexicals.first.form : obj.form
|
136
|
+
vec = Unicode.downcase("#{wrd}#{@dict}#{vec.join(' ')}")
|
137
|
+
@sort_format ? @vectors << vec : forward(vec)
|
138
|
+
end
|
139
|
+
else
|
140
|
+
obj.get_class(@lex).each { |lex|
|
141
|
+
vec = Unicode.downcase(lex.form)
|
142
|
+
vec << @src << lex.src if @src && lex.src
|
143
|
+
@sort_format ? @vectors << vec : forward(vec)
|
144
|
+
}
|
145
|
+
end
|
124
146
|
end
|
125
147
|
end
|
126
148
|
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -72,6 +72,7 @@ class Lingo
|
|
72
72
|
end
|
73
73
|
|
74
74
|
def control(cmd, param)
|
75
|
+
# can control
|
75
76
|
end
|
76
77
|
|
77
78
|
def process(obj)
|
data/lib/lingo/attendee.rb
CHANGED
@@ -83,6 +83,8 @@ class Lingo
|
|
83
83
|
# Make sure config exists
|
84
84
|
lingo.dictionary_config
|
85
85
|
|
86
|
+
@dic = @gra = nil
|
87
|
+
|
86
88
|
init if self.class.method_defined?(:init)
|
87
89
|
|
88
90
|
@can_control = self.class.method_defined?(:control)
|
@@ -91,6 +93,8 @@ class Lingo
|
|
91
93
|
@skip_command = false
|
92
94
|
end
|
93
95
|
|
96
|
+
attr_reader :lingo
|
97
|
+
|
94
98
|
def add_subscriber(subscriber)
|
95
99
|
@subscriber.concat(subscriber)
|
96
100
|
end
|
@@ -133,7 +137,7 @@ class Lingo
|
|
133
137
|
end
|
134
138
|
|
135
139
|
def has_key?(key)
|
136
|
-
@config && @config.
|
140
|
+
@config && @config.key?(key)
|
137
141
|
end
|
138
142
|
|
139
143
|
def get_key(key, default = nodefault = true)
|
@@ -154,11 +158,11 @@ class Lingo
|
|
154
158
|
end
|
155
159
|
|
156
160
|
def dictionary(src, mod)
|
157
|
-
Language::Dictionary.new({ 'source' => src, 'mode' => mod },
|
161
|
+
Language::Dictionary.new({ 'source' => src, 'mode' => mod }, lingo)
|
158
162
|
end
|
159
163
|
|
160
164
|
def grammar(src, mod)
|
161
|
-
Language::Grammar.new({ 'source' => src, 'mode' => mod },
|
165
|
+
Language::Grammar.new({ 'source' => src, 'mode' => mod }, lingo)
|
162
166
|
end
|
163
167
|
|
164
168
|
def set_dic
|
@@ -170,7 +174,7 @@ class Lingo
|
|
170
174
|
end
|
171
175
|
|
172
176
|
def warn(*msg)
|
173
|
-
|
177
|
+
lingo.warn(*msg)
|
174
178
|
end
|
175
179
|
|
176
180
|
def require_lib(lib)
|
data/lib/lingo/call.rb
CHANGED
@@ -48,7 +48,7 @@ class Lingo
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
def talk(str)
|
51
|
+
def talk(str, raw = false)
|
52
52
|
config.stdin.reopen(str)
|
53
53
|
|
54
54
|
start
|
@@ -57,7 +57,7 @@ class Lingo
|
|
57
57
|
io = config.send(key)
|
58
58
|
io.rewind
|
59
59
|
|
60
|
-
lines = io.readlines
|
60
|
+
lines = io.readlines
|
61
61
|
|
62
62
|
io.truncate(0)
|
63
63
|
io.rewind
|
@@ -65,8 +65,12 @@ class Lingo
|
|
65
65
|
lines
|
66
66
|
}
|
67
67
|
|
68
|
+
return res.join if raw
|
69
|
+
|
70
|
+
res.each { |i| i.chomp! }
|
71
|
+
|
68
72
|
block_given? ? res.map! { |i| yield i } : begin
|
69
|
-
|
73
|
+
res.sort! unless ENV['LINGO_NO_SORT']
|
70
74
|
res.uniq!
|
71
75
|
res
|
72
76
|
end
|
data/lib/lingo/cli.rb
CHANGED
@@ -24,11 +24,11 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
-
require '
|
27
|
+
require 'cyclops'
|
28
28
|
|
29
29
|
class Lingo
|
30
30
|
|
31
|
-
class CLI <
|
31
|
+
class CLI < Cyclops
|
32
32
|
|
33
33
|
class << self
|
34
34
|
|
@@ -55,26 +55,18 @@ class Lingo
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def opts(opts)
|
58
|
-
opts.
|
59
|
-
options[:config] = config
|
60
|
-
}
|
61
|
-
|
62
|
-
opts.separator ''
|
63
|
-
|
64
|
-
opts.on('-l', '--language LANG', "Language for processing [Default: #{defaults[:language]}]") { |language|
|
65
|
-
options[:language] = language
|
66
|
-
}
|
58
|
+
opts.option(:language__LANG, "Language for processing [Default: #{defaults[:language]}]")
|
67
59
|
|
68
|
-
opts.separator
|
60
|
+
opts.separator
|
69
61
|
|
70
|
-
opts.
|
62
|
+
opts.option(:log__FILE, :L, 'Log file to print debug information to') { |log|
|
71
63
|
options[:log] = stderr.reopen(log == '-' ? stdout : File.open(log, 'a+', encoding: ENC))
|
72
64
|
}
|
73
65
|
|
74
|
-
opts.separator
|
66
|
+
opts.separator
|
75
67
|
|
76
|
-
opts.
|
77
|
-
options[:profile] = profile == '-'
|
68
|
+
opts.option(:profile__PATH, :P, 'Print profiling results') { |profile|
|
69
|
+
options[:profile] = stdout if profile == '-'
|
78
70
|
}
|
79
71
|
end
|
80
72
|
|
data/lib/lingo/config.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -24,11 +24,9 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
-
require '
|
27
|
+
require 'safe_yaml/load'
|
28
28
|
require_relative 'cli'
|
29
29
|
|
30
|
-
YAML::ENGINE.yamler = 'psych'
|
31
|
-
|
32
30
|
class Lingo
|
33
31
|
|
34
32
|
class Config
|
@@ -58,8 +56,12 @@ class Lingo
|
|
58
56
|
end
|
59
57
|
end
|
60
58
|
|
59
|
+
def to_h
|
60
|
+
@opts
|
61
|
+
end
|
62
|
+
|
61
63
|
def [](key)
|
62
|
-
key_to_nodes(key).inject(
|
64
|
+
key_to_nodes(key).inject(to_h) { |hash, node| hash[node] }
|
63
65
|
end
|
64
66
|
|
65
67
|
def []=(key, val)
|
@@ -125,7 +127,10 @@ class Lingo
|
|
125
127
|
|
126
128
|
def load_config(key, type = key.to_sym)
|
127
129
|
file = Lingo.find(type, @opts[key]) { quit }
|
128
|
-
|
130
|
+
File.open(file, encoding: ENC) { |f| @opts.update(SafeYAML.load(f)) }
|
131
|
+
rescue Psych::SyntaxError => err
|
132
|
+
err.message << " (in #{file})"
|
133
|
+
raise
|
129
134
|
end
|
130
135
|
|
131
136
|
end
|
data/lib/lingo/ctl.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -25,6 +25,9 @@
|
|
25
25
|
#++
|
26
26
|
|
27
27
|
require 'optparse'
|
28
|
+
require 'zip'
|
29
|
+
|
30
|
+
Zip.unicode_names = true
|
28
31
|
|
29
32
|
class Lingo
|
30
33
|
|
@@ -36,7 +39,7 @@ class Lingo
|
|
36
39
|
PROGNAME, OPTIONS = File.basename(PROG), {}
|
37
40
|
|
38
41
|
COMMANDS, ALIASES = {}, Hash.new { |h, k|
|
39
|
-
h[k] = COMMANDS.
|
42
|
+
h[k] = COMMANDS.key?(k) ? k : 'usage'
|
40
43
|
}
|
41
44
|
|
42
45
|
USAGE = <<-EOT
|
@@ -79,6 +82,7 @@ Usage: #{PROG} <command> [arguments] [options]
|
|
79
82
|
}
|
80
83
|
|
81
84
|
{ demo: [:d, 'Initialize demo directory', '[path]', 'current directory'],
|
85
|
+
archive: [:a, 'Create archive of directory', '[path]', 'current directory'],
|
82
86
|
rackup: [:r, 'Print path to rackup file', 'name'],
|
83
87
|
path: [:p, 'Print search path for dictionaries and configurations'],
|
84
88
|
help: [:h, 'Print help for available commands'],
|
@@ -111,19 +115,47 @@ Usage: #{PROG} <command> [arguments] [options]
|
|
111
115
|
|
112
116
|
usage('Source and target are the same.') if source == target
|
113
117
|
|
118
|
+
return unless overwrite?(target)
|
119
|
+
|
114
120
|
FileUtils.mkdir_p(File.dirname(target))
|
115
121
|
FileUtils.cp(source, target, verbose: true)
|
116
122
|
end
|
117
123
|
|
124
|
+
def do_archive
|
125
|
+
OPTIONS.update(path: ARGV.shift, scope: :local)
|
126
|
+
no_args
|
127
|
+
|
128
|
+
source = File.expand_path(path_for_scope.first)
|
129
|
+
target = "#{source}.zip"
|
130
|
+
|
131
|
+
abort "No such directory: #{source}" unless Dir.exist?(source)
|
132
|
+
|
133
|
+
return unless overwrite?(target, true)
|
134
|
+
|
135
|
+
base, name = File.split(source)
|
136
|
+
|
137
|
+
Dir.chdir(base) {
|
138
|
+
Zip::File.open(target, Zip::File::CREATE) { |zipfile|
|
139
|
+
Dir[File.join(name, '**', '*')].each { |file|
|
140
|
+
zipfile.add(file, file)
|
141
|
+
}
|
142
|
+
}
|
143
|
+
}
|
144
|
+
|
145
|
+
puts "Directory successfully archived at `#{target}'."
|
146
|
+
end
|
147
|
+
|
118
148
|
def do_clearstore
|
119
149
|
store = Dir["#{find(:store, false)}.*"]
|
120
150
|
FileUtils.rm(store, verbose: true) unless store.empty?
|
121
151
|
end
|
122
152
|
|
123
153
|
def do_demo
|
124
|
-
OPTIONS.update(path:
|
154
|
+
OPTIONS.update(path: ARGV.shift, scope: :system)
|
125
155
|
no_args
|
126
156
|
|
157
|
+
path = path_for_scope(:local).first
|
158
|
+
|
127
159
|
copy_list(:config) { |i| !File.basename(i).start_with?('test') }
|
128
160
|
copy_list(:lang)
|
129
161
|
copy_list(:dict) { |i| File.basename(i).start_with?('user') }
|
@@ -240,6 +272,25 @@ Usage: #{PROG} <command> [arguments] [options]
|
|
240
272
|
files.each { |file| ARGV.replace([file]); copy(what) }
|
241
273
|
end
|
242
274
|
|
275
|
+
def overwrite?(target, unlink = false)
|
276
|
+
!File.exist?(target) || if agree?("#{target} already exists. Overwrite?")
|
277
|
+
File.unlink(target) if unlink
|
278
|
+
true
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
def agree?(msg)
|
283
|
+
print "#{msg} (y/n) [n] "
|
284
|
+
|
285
|
+
case answer = $stdin.gets.chomp
|
286
|
+
when /\Ano?\z/i, '' then nil
|
287
|
+
when /\Ay(?:es)?\z/i then true
|
288
|
+
else puts 'Please enter "yes" or "no".'; agree?(msg)
|
289
|
+
end
|
290
|
+
rescue Interrupt
|
291
|
+
abort ''
|
292
|
+
end
|
293
|
+
|
243
294
|
end
|
244
295
|
|
245
296
|
def self.ctl
|
@@ -24,6 +24,7 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'openssl'
|
27
28
|
require 'digest/sha1'
|
28
29
|
|
29
30
|
class Lingo
|
@@ -34,31 +35,24 @@ class Lingo
|
|
34
35
|
|
35
36
|
class Crypter
|
36
37
|
|
37
|
-
|
38
|
-
|
39
|
-
def digest(key)
|
38
|
+
def self.digest(key)
|
40
39
|
Digest::SHA1.hexdigest(key)
|
41
40
|
end
|
42
41
|
|
43
42
|
def encode(key, val)
|
44
|
-
[digest(key), crypt(key, val)
|
45
|
-
b.divmod(16).each { |i| s << HEX_CHARS[i] }
|
46
|
-
}]
|
43
|
+
[self.class.digest(key), crypt(:encrypt, key, val)]
|
47
44
|
end
|
48
45
|
|
49
46
|
def decode(key, val)
|
50
|
-
crypt(key, val
|
51
|
-
q, r = b.map { |i| HEX_CHARS.index(i.chr(ENC)) }
|
52
|
-
s << q * 16 + r
|
53
|
-
})
|
47
|
+
crypt(:decrypt, key, val).force_encoding(ENC)
|
54
48
|
end
|
55
49
|
|
56
50
|
private
|
57
51
|
|
58
|
-
def crypt(
|
59
|
-
|
60
|
-
|
61
|
-
|
52
|
+
def crypt(method, key, val)
|
53
|
+
cipher = OpenSSL::Cipher.new('aes-128-cbc').send(method)
|
54
|
+
cipher.iv = cipher.key = self.class.digest(key)
|
55
|
+
cipher.update(val) + cipher.final
|
62
56
|
end
|
63
57
|
|
64
58
|
end
|
@@ -28,18 +28,17 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Database
|
30
30
|
|
31
|
-
class
|
31
|
+
class Progress < Progress
|
32
32
|
|
33
33
|
def initialize(obj, src, doit = true)
|
34
|
-
|
35
|
-
super(obj, src.size, name, doit, 'convert', false)
|
34
|
+
super(obj, src.size, obj.config['name'], doit, 'convert', false)
|
36
35
|
|
37
|
-
if defined?(@
|
38
|
-
|
36
|
+
if defined?(@count)
|
37
|
+
count, rejected = src.rejected
|
39
38
|
|
40
|
-
if
|
41
|
-
print ' (',
|
42
|
-
print ': ',
|
39
|
+
if count > 0
|
40
|
+
print ' (', count, ' rejected'
|
41
|
+
print ': ', rejected if rejected
|
43
42
|
print ')'
|
44
43
|
end
|
45
44
|
|