lingo 1.8.4.2 → 1.8.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +413 -325
- data/README +380 -131
- data/Rakefile +19 -21
- data/de/lingo-abk.txt +15 -17
- data/de/lingo-dic.txt +20210 -20659
- data/de/lingo-mul.txt +5 -13
- data/de/lingo-syn.txt +5 -8
- data/de/test_dic.txt +2 -0
- data/de/test_gen.txt +8 -0
- data/de/{test_mul2.txt → test_mu2.txt} +0 -0
- data/de/{test_singleword.txt → test_sgw.txt} +0 -0
- data/de/user-dic.txt +5 -7
- data/de.lang +64 -49
- data/en/lingo-dic.txt +6398 -6404
- data/en/lingo-irr.txt +2 -3
- data/en/lingo-mul.txt +6 -7
- data/en/lingo-wdn.txt +881 -1762
- data/en/user-dic.txt +2 -5
- data/en.lang +39 -39
- data/lib/lingo/app.rb +10 -6
- data/lib/lingo/attendee/abbreviator.rb +1 -0
- data/lib/lingo/attendee/decomposer.rb +2 -1
- data/lib/lingo/attendee/multi_worder.rb +5 -6
- data/lib/lingo/attendee/stemmer.rb +1 -1
- data/lib/lingo/attendee/synonymer.rb +4 -2
- data/lib/lingo/attendee/text_reader.rb +77 -57
- data/lib/lingo/attendee/text_writer.rb +1 -1
- data/lib/lingo/attendee/tokenizer.rb +101 -50
- data/lib/lingo/attendee/variator.rb +2 -1
- data/lib/lingo/attendee/vector_filter.rb +28 -6
- data/lib/lingo/attendee/word_searcher.rb +2 -1
- data/lib/lingo/attendee.rb +8 -4
- data/lib/lingo/call.rb +7 -3
- data/lib/lingo/cli.rb +8 -16
- data/lib/lingo/config.rb +11 -6
- data/lib/lingo/ctl.rb +54 -3
- data/lib/lingo/database/crypter.rb +8 -14
- data/lib/lingo/database/hash_store.rb +1 -1
- data/lib/lingo/database/{show_progress.rb → progress.rb} +7 -8
- data/lib/lingo/database/source/key_value.rb +6 -5
- data/lib/lingo/database/source/multi_key.rb +5 -2
- data/lib/lingo/database/source/multi_value.rb +6 -4
- data/lib/lingo/database/source/single_word.rb +2 -3
- data/lib/lingo/database/source/word_class.rb +24 -5
- data/lib/lingo/database/source.rb +5 -3
- data/lib/lingo/database.rb +102 -41
- data/lib/lingo/error.rb +24 -2
- data/lib/lingo/language/dictionary.rb +26 -54
- data/lib/lingo/language/grammar.rb +19 -23
- data/lib/lingo/language/lexical.rb +5 -1
- data/lib/lingo/language/lexical_hash.rb +7 -12
- data/lib/lingo/language/token.rb +10 -1
- data/lib/lingo/language/word.rb +35 -23
- data/lib/lingo/language/word_form.rb +5 -4
- data/lib/lingo/{show_progress.rb → progress.rb} +43 -30
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/srv/public/.gitkeep +0 -0
- data/lib/lingo/srv.rb +11 -6
- data/lib/lingo/version.rb +2 -2
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/lib/lingo/web/views/index.erb +4 -4
- data/lib/lingo/web.rb +4 -6
- data/lib/lingo.rb +4 -12
- data/lingo.cfg +1 -1
- data/lir.cfg +1 -1
- data/ru/lingo-dic.txt +33473 -2113
- data/ru/lingo-mul.txt +8430 -1913
- data/ru/lingo-syn.txt +1634 -0
- data/ru/user-dic.txt +6 -0
- data/ru.lang +49 -47
- data/spec/spec_helper.rb +4 -0
- data/test/attendee/ts_decomposer.rb +2 -2
- data/test/attendee/ts_synonymer.rb +3 -3
- data/test/attendee/ts_tokenizer.rb +215 -2
- data/test/attendee/ts_variator.rb +2 -2
- data/test/attendee/ts_word_searcher.rb +10 -6
- data/test/ref/artikel.seq +2 -2
- data/test/ref/artikel.vec +5 -5
- data/test/ref/artikel.ven +11 -11
- data/test/ref/artikel.ver +11 -11
- data/test/ref/lir.seq +13 -13
- data/test/ref/lir.vec +31 -31
- data/test/test_helper.rb +19 -5
- data/test/ts_database.rb +206 -77
- data/test/ts_language.rb +86 -26
- metadata +93 -49
- data/.rspec +0 -1
- data/de/test_syn2.txt +0 -1
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -82,33 +82,47 @@ class Lingo
|
|
82
82
|
|
83
83
|
CHAR, DIGIT = Char::CHAR, Char::DIGIT
|
84
84
|
|
85
|
+
PROTO = '(?:news|https?|ftps?)://'
|
86
|
+
|
85
87
|
RULES = [
|
86
|
-
['WIKI', /^=+.+=+$/],
|
87
88
|
['SPAC', /^\s+/],
|
88
|
-
['
|
89
|
-
['WIKI', /^\[\[.+?\]\]/],
|
89
|
+
['WIKI', /^=+.+=+|^__[A-Z]+__/],
|
90
90
|
['NUMS', /^[+-]?(?:\d{4,}|\d{1,3}(?:\.\d{3,3})*)(?:\.|(?:,\d+)?%?)/],
|
91
|
-
['URLS', /^(?:
|
91
|
+
['URLS', /^(?:www\.|mailto:|#{PROTO}|\S+?[._]\S+?@\S+?\.)\S+/],
|
92
92
|
['ABRV', /^(?:(?:(?:#{CHAR})+\.)+)(?:#{CHAR})+/],
|
93
93
|
['WORD', /^(?:#{CHAR}|#{DIGIT}|-)+/],
|
94
|
-
['PUNC', /^[!,.:;?¡¿]
|
95
|
-
|
96
|
-
|
94
|
+
['PUNC', /^[!,.:;?¡¿]+/]
|
95
|
+
]
|
96
|
+
|
97
|
+
OTHER = [
|
98
|
+
['OTHR', /^["$#%&'()*+\/<=>@\[\\\]^_{|}~¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾×÷]/],
|
99
|
+
['HELP', /^\S+/]
|
97
100
|
]
|
98
101
|
|
102
|
+
NESTS = {
|
103
|
+
'HTML' => ['<', '>'],
|
104
|
+
'WIKI:VARIABLE' => ['{{{', '}}}'],
|
105
|
+
'WIKI:TEMPLATE' => ['{{', '}}'],
|
106
|
+
'WIKI:LINK_INT' => ['[[', ']]'],
|
107
|
+
'WIKI:LINK_EXT' => [/^\[\s*#{PROTO}/, ']']
|
108
|
+
}
|
109
|
+
|
99
110
|
class << self
|
100
111
|
|
101
112
|
def rule(name)
|
102
113
|
RULES.assoc(name)
|
103
114
|
end
|
104
115
|
|
116
|
+
def rules(name)
|
117
|
+
RULES.select { |rule,| rule == name }
|
118
|
+
end
|
119
|
+
|
105
120
|
def delete(*names)
|
106
|
-
names.
|
121
|
+
names.map { |name| rules(name).each { |rule| RULES.delete(rule) } }
|
107
122
|
end
|
108
123
|
|
109
|
-
def replace(name, expr)
|
110
|
-
rule = rule
|
111
|
-
rule[1] = block_given? ? yield(rule[1]) : expr
|
124
|
+
def replace(name, expr = nil)
|
125
|
+
rules(name).each { |rule| rule[1] = expr || yield(*rule) }
|
112
126
|
end
|
113
127
|
|
114
128
|
def insert(*rules)
|
@@ -152,22 +166,37 @@ class Lingo
|
|
152
166
|
skip << 'HTML' unless @tags
|
153
167
|
skip << 'WIKI' unless @wiki
|
154
168
|
|
155
|
-
@rules = RULES.
|
169
|
+
[@rules = RULES.dup, @nests = NESTS.dup].each { |hash|
|
170
|
+
hash.delete_if { |name, _| skip.include?(Token.clean(name)) }
|
171
|
+
}
|
172
|
+
|
173
|
+
@nest, nest_re = [], []
|
156
174
|
|
157
|
-
@
|
175
|
+
@nests.each { |name, re|
|
176
|
+
re.map!.with_index { |r, i| r.is_a?(Regexp) ?
|
177
|
+
r : /^#{'.*?' if i > 0}#{Regexp.escape(r)}/ }
|
178
|
+
|
179
|
+
nest_re << "(?<#{name}>#{Regexp.new(
|
180
|
+
re[0].source.sub(/^\^/, ''), re[0].options)})"
|
181
|
+
}
|
182
|
+
|
183
|
+
@nest_re = /^(?<_>.*?)(?:#{nest_re.join('|')})/
|
184
|
+
|
185
|
+
@filename = @linenum = nil
|
158
186
|
end
|
159
187
|
|
160
188
|
def control(cmd, param)
|
161
189
|
case cmd
|
162
|
-
when STR_CMD_FILE then @filename = param
|
163
|
-
when STR_CMD_LIR then @filename = nil
|
164
|
-
when
|
190
|
+
when STR_CMD_FILE then @filename, @linenum = param, 1
|
191
|
+
when STR_CMD_LIR then @filename, @linenum = nil, nil
|
192
|
+
when STR_CMD_EOL then @linenum += 1 if @linenum
|
193
|
+
when STR_CMD_EOF then @nest.clear
|
165
194
|
end
|
166
195
|
end
|
167
196
|
|
168
197
|
def process(obj)
|
169
198
|
if obj.is_a?(String)
|
170
|
-
tokenize(obj)
|
199
|
+
tokenize(obj)
|
171
200
|
forward(STR_CMD_EOL, @filename) if @filename
|
172
201
|
else
|
173
202
|
forward(obj)
|
@@ -178,44 +207,66 @@ class Lingo
|
|
178
207
|
|
179
208
|
# tokenize("Eine Zeile.") -> [:Eine/WORD:, :Zeile/WORD:, :./PUNC:]
|
180
209
|
def tokenize(line)
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
if line =~ /^[^\[\]]*\]\]/
|
192
|
-
yield $&, @cont
|
193
|
-
line, @cont = $', nil
|
194
|
-
else
|
195
|
-
yield line, @cont
|
196
|
-
return
|
197
|
-
end
|
198
|
-
when nil
|
199
|
-
if @tags && line =~ /<[^<>]*$/
|
200
|
-
yield $&, @cont = 'HTML'
|
201
|
-
line = $`
|
202
|
-
end
|
203
|
-
|
204
|
-
if @wiki && line =~ /\[\[[^\[\]]*$/
|
205
|
-
yield $&, @cont = 'WIKI'
|
206
|
-
line = $`
|
207
|
-
end
|
210
|
+
@nest.empty? ? tokenize_line(line) : tokenize_nest(line)
|
211
|
+
rescue => err
|
212
|
+
raise err if err.is_a?(TokenizeError)
|
213
|
+
raise TokenizeError.new(line, @filename, @linenum, err)
|
214
|
+
end
|
215
|
+
|
216
|
+
def tokenize_line(line)
|
217
|
+
while (length = line.length) > 0 && tokenize_rule(line) { |rest|
|
218
|
+
length == rest.length ? break : line = rest
|
219
|
+
}
|
208
220
|
end
|
209
221
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
222
|
+
tokenize_open(line) unless line.empty?
|
223
|
+
end
|
224
|
+
|
225
|
+
def tokenize_rule(line, rules = @rules)
|
226
|
+
rules.find { |name, expr|
|
227
|
+
next unless line =~ expr
|
228
|
+
forward_token($&, name) if name != 'SPAC' || @space
|
229
|
+
yield $'
|
215
230
|
}
|
231
|
+
end
|
232
|
+
|
233
|
+
def tokenize_nest(line)
|
234
|
+
mdo = @nest_re.match(line)
|
235
|
+
mdc = @nests[@nest.last].last.match(line)
|
236
|
+
|
237
|
+
if mdo && (!mdc || mdo[0].length < mdc[0].length)
|
238
|
+
forward_token(mdo[:_], @nest.last) unless mdo[:_].empty?
|
239
|
+
|
240
|
+
nest = @nests.keys.find { |name| mdo[name] }
|
241
|
+
forward_nest(mdo[nest], mdo.post_match, nest)
|
242
|
+
elsif mdc
|
243
|
+
forward_token(mdc[0], @nest.pop)
|
244
|
+
tokenize(mdc.post_match)
|
245
|
+
else
|
246
|
+
forward_token(line, @nest.last)
|
216
247
|
end
|
217
248
|
end
|
218
249
|
|
250
|
+
def tokenize_open(line)
|
251
|
+
@nests.each { |nest, (open_re, _)|
|
252
|
+
next unless line =~ open_re
|
253
|
+
return forward_nest($&, $', nest)
|
254
|
+
}
|
255
|
+
|
256
|
+
tokenize_rule(line, OTHER) { |rest| line = rest }
|
257
|
+
tokenize(line)
|
258
|
+
end
|
259
|
+
|
260
|
+
def forward_nest(match, rest, nest)
|
261
|
+
forward_token(match, nest)
|
262
|
+
@nest << nest
|
263
|
+
tokenize(rest)
|
264
|
+
end
|
265
|
+
|
266
|
+
def forward_token(*args)
|
267
|
+
forward(Token.new(*args))
|
268
|
+
end
|
269
|
+
|
219
270
|
end
|
220
271
|
|
221
272
|
end
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -90,6 +90,7 @@ class Lingo
|
|
90
90
|
end
|
91
91
|
|
92
92
|
def control(cmd, param)
|
93
|
+
# can control
|
93
94
|
end
|
94
95
|
|
95
96
|
def process(obj)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -86,10 +86,15 @@ class Lingo
|
|
86
86
|
def init
|
87
87
|
if @debug = get_key('debug', false)
|
88
88
|
@prompt = get_key('prompt', 'lex:) ')
|
89
|
+
@preamble = get_key('preamble', true)
|
89
90
|
else
|
90
91
|
@lex = get_re('lexicals', '[sy]')
|
91
92
|
@skip = get_array('skip', DEFAULT_SKIP, :upcase)
|
92
93
|
|
94
|
+
@dict = get_key('dict', false)
|
95
|
+
@norm = get_key('norm', false) if @dict
|
96
|
+
@dict = Database::Source::WordClass::DEFAULT_SEPARATOR if @dict == true
|
97
|
+
|
93
98
|
@src = get_key('src', false)
|
94
99
|
@src = DEFAULT_SRC_SEP if @src == true
|
95
100
|
|
@@ -112,15 +117,32 @@ class Lingo
|
|
112
117
|
|
113
118
|
def process(obj)
|
114
119
|
if @debug
|
120
|
+
forward((@preamble = nil; @lingo.config.to_h.to_yaml)) if @preamble
|
115
121
|
forward("#{@prompt} #{obj.inspect}") if eval(@debug)
|
116
122
|
elsif obj.is_a?(Word) && !@skip.include?(obj.attr)
|
117
123
|
@word_count += 1
|
118
124
|
|
119
|
-
|
120
|
-
vec =
|
121
|
-
|
122
|
-
@
|
123
|
-
|
125
|
+
if @dict
|
126
|
+
vec, sep = [], Database::Source::WordClass::GENDER_SEPARATOR
|
127
|
+
|
128
|
+
obj.get_class(@lex).each { |lex|
|
129
|
+
str = "#{lex.form} ##{lex.attr}"
|
130
|
+
str << sep << lex.gender if lex.gender
|
131
|
+
vec << str
|
132
|
+
}
|
133
|
+
|
134
|
+
unless vec.empty?
|
135
|
+
wrd = @norm ? obj.lexicals.first.form : obj.form
|
136
|
+
vec = Unicode.downcase("#{wrd}#{@dict}#{vec.join(' ')}")
|
137
|
+
@sort_format ? @vectors << vec : forward(vec)
|
138
|
+
end
|
139
|
+
else
|
140
|
+
obj.get_class(@lex).each { |lex|
|
141
|
+
vec = Unicode.downcase(lex.form)
|
142
|
+
vec << @src << lex.src if @src && lex.src
|
143
|
+
@sort_format ? @vectors << vec : forward(vec)
|
144
|
+
}
|
145
|
+
end
|
124
146
|
end
|
125
147
|
end
|
126
148
|
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -72,6 +72,7 @@ class Lingo
|
|
72
72
|
end
|
73
73
|
|
74
74
|
def control(cmd, param)
|
75
|
+
# can control
|
75
76
|
end
|
76
77
|
|
77
78
|
def process(obj)
|
data/lib/lingo/attendee.rb
CHANGED
@@ -83,6 +83,8 @@ class Lingo
|
|
83
83
|
# Make sure config exists
|
84
84
|
lingo.dictionary_config
|
85
85
|
|
86
|
+
@dic = @gra = nil
|
87
|
+
|
86
88
|
init if self.class.method_defined?(:init)
|
87
89
|
|
88
90
|
@can_control = self.class.method_defined?(:control)
|
@@ -91,6 +93,8 @@ class Lingo
|
|
91
93
|
@skip_command = false
|
92
94
|
end
|
93
95
|
|
96
|
+
attr_reader :lingo
|
97
|
+
|
94
98
|
def add_subscriber(subscriber)
|
95
99
|
@subscriber.concat(subscriber)
|
96
100
|
end
|
@@ -133,7 +137,7 @@ class Lingo
|
|
133
137
|
end
|
134
138
|
|
135
139
|
def has_key?(key)
|
136
|
-
@config && @config.
|
140
|
+
@config && @config.key?(key)
|
137
141
|
end
|
138
142
|
|
139
143
|
def get_key(key, default = nodefault = true)
|
@@ -154,11 +158,11 @@ class Lingo
|
|
154
158
|
end
|
155
159
|
|
156
160
|
def dictionary(src, mod)
|
157
|
-
Language::Dictionary.new({ 'source' => src, 'mode' => mod },
|
161
|
+
Language::Dictionary.new({ 'source' => src, 'mode' => mod }, lingo)
|
158
162
|
end
|
159
163
|
|
160
164
|
def grammar(src, mod)
|
161
|
-
Language::Grammar.new({ 'source' => src, 'mode' => mod },
|
165
|
+
Language::Grammar.new({ 'source' => src, 'mode' => mod }, lingo)
|
162
166
|
end
|
163
167
|
|
164
168
|
def set_dic
|
@@ -170,7 +174,7 @@ class Lingo
|
|
170
174
|
end
|
171
175
|
|
172
176
|
def warn(*msg)
|
173
|
-
|
177
|
+
lingo.warn(*msg)
|
174
178
|
end
|
175
179
|
|
176
180
|
def require_lib(lib)
|
data/lib/lingo/call.rb
CHANGED
@@ -48,7 +48,7 @@ class Lingo
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
def talk(str)
|
51
|
+
def talk(str, raw = false)
|
52
52
|
config.stdin.reopen(str)
|
53
53
|
|
54
54
|
start
|
@@ -57,7 +57,7 @@ class Lingo
|
|
57
57
|
io = config.send(key)
|
58
58
|
io.rewind
|
59
59
|
|
60
|
-
lines = io.readlines
|
60
|
+
lines = io.readlines
|
61
61
|
|
62
62
|
io.truncate(0)
|
63
63
|
io.rewind
|
@@ -65,8 +65,12 @@ class Lingo
|
|
65
65
|
lines
|
66
66
|
}
|
67
67
|
|
68
|
+
return res.join if raw
|
69
|
+
|
70
|
+
res.each { |i| i.chomp! }
|
71
|
+
|
68
72
|
block_given? ? res.map! { |i| yield i } : begin
|
69
|
-
|
73
|
+
res.sort! unless ENV['LINGO_NO_SORT']
|
70
74
|
res.uniq!
|
71
75
|
res
|
72
76
|
end
|
data/lib/lingo/cli.rb
CHANGED
@@ -24,11 +24,11 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
-
require '
|
27
|
+
require 'cyclops'
|
28
28
|
|
29
29
|
class Lingo
|
30
30
|
|
31
|
-
class CLI <
|
31
|
+
class CLI < Cyclops
|
32
32
|
|
33
33
|
class << self
|
34
34
|
|
@@ -55,26 +55,18 @@ class Lingo
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def opts(opts)
|
58
|
-
opts.
|
59
|
-
options[:config] = config
|
60
|
-
}
|
61
|
-
|
62
|
-
opts.separator ''
|
63
|
-
|
64
|
-
opts.on('-l', '--language LANG', "Language for processing [Default: #{defaults[:language]}]") { |language|
|
65
|
-
options[:language] = language
|
66
|
-
}
|
58
|
+
opts.option(:language__LANG, "Language for processing [Default: #{defaults[:language]}]")
|
67
59
|
|
68
|
-
opts.separator
|
60
|
+
opts.separator
|
69
61
|
|
70
|
-
opts.
|
62
|
+
opts.option(:log__FILE, :L, 'Log file to print debug information to') { |log|
|
71
63
|
options[:log] = stderr.reopen(log == '-' ? stdout : File.open(log, 'a+', encoding: ENC))
|
72
64
|
}
|
73
65
|
|
74
|
-
opts.separator
|
66
|
+
opts.separator
|
75
67
|
|
76
|
-
opts.
|
77
|
-
options[:profile] = profile == '-'
|
68
|
+
opts.option(:profile__PATH, :P, 'Print profiling results') { |profile|
|
69
|
+
options[:profile] = stdout if profile == '-'
|
78
70
|
}
|
79
71
|
end
|
80
72
|
|
data/lib/lingo/config.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -24,11 +24,9 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
-
require '
|
27
|
+
require 'safe_yaml/load'
|
28
28
|
require_relative 'cli'
|
29
29
|
|
30
|
-
YAML::ENGINE.yamler = 'psych'
|
31
|
-
|
32
30
|
class Lingo
|
33
31
|
|
34
32
|
class Config
|
@@ -58,8 +56,12 @@ class Lingo
|
|
58
56
|
end
|
59
57
|
end
|
60
58
|
|
59
|
+
def to_h
|
60
|
+
@opts
|
61
|
+
end
|
62
|
+
|
61
63
|
def [](key)
|
62
|
-
key_to_nodes(key).inject(
|
64
|
+
key_to_nodes(key).inject(to_h) { |hash, node| hash[node] }
|
63
65
|
end
|
64
66
|
|
65
67
|
def []=(key, val)
|
@@ -125,7 +127,10 @@ class Lingo
|
|
125
127
|
|
126
128
|
def load_config(key, type = key.to_sym)
|
127
129
|
file = Lingo.find(type, @opts[key]) { quit }
|
128
|
-
|
130
|
+
File.open(file, encoding: ENC) { |f| @opts.update(SafeYAML.load(f)) }
|
131
|
+
rescue Psych::SyntaxError => err
|
132
|
+
err.message << " (in #{file})"
|
133
|
+
raise
|
129
134
|
end
|
130
135
|
|
131
136
|
end
|
data/lib/lingo/ctl.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -25,6 +25,9 @@
|
|
25
25
|
#++
|
26
26
|
|
27
27
|
require 'optparse'
|
28
|
+
require 'zip'
|
29
|
+
|
30
|
+
Zip.unicode_names = true
|
28
31
|
|
29
32
|
class Lingo
|
30
33
|
|
@@ -36,7 +39,7 @@ class Lingo
|
|
36
39
|
PROGNAME, OPTIONS = File.basename(PROG), {}
|
37
40
|
|
38
41
|
COMMANDS, ALIASES = {}, Hash.new { |h, k|
|
39
|
-
h[k] = COMMANDS.
|
42
|
+
h[k] = COMMANDS.key?(k) ? k : 'usage'
|
40
43
|
}
|
41
44
|
|
42
45
|
USAGE = <<-EOT
|
@@ -79,6 +82,7 @@ Usage: #{PROG} <command> [arguments] [options]
|
|
79
82
|
}
|
80
83
|
|
81
84
|
{ demo: [:d, 'Initialize demo directory', '[path]', 'current directory'],
|
85
|
+
archive: [:a, 'Create archive of directory', '[path]', 'current directory'],
|
82
86
|
rackup: [:r, 'Print path to rackup file', 'name'],
|
83
87
|
path: [:p, 'Print search path for dictionaries and configurations'],
|
84
88
|
help: [:h, 'Print help for available commands'],
|
@@ -111,19 +115,47 @@ Usage: #{PROG} <command> [arguments] [options]
|
|
111
115
|
|
112
116
|
usage('Source and target are the same.') if source == target
|
113
117
|
|
118
|
+
return unless overwrite?(target)
|
119
|
+
|
114
120
|
FileUtils.mkdir_p(File.dirname(target))
|
115
121
|
FileUtils.cp(source, target, verbose: true)
|
116
122
|
end
|
117
123
|
|
124
|
+
def do_archive
|
125
|
+
OPTIONS.update(path: ARGV.shift, scope: :local)
|
126
|
+
no_args
|
127
|
+
|
128
|
+
source = File.expand_path(path_for_scope.first)
|
129
|
+
target = "#{source}.zip"
|
130
|
+
|
131
|
+
abort "No such directory: #{source}" unless Dir.exist?(source)
|
132
|
+
|
133
|
+
return unless overwrite?(target, true)
|
134
|
+
|
135
|
+
base, name = File.split(source)
|
136
|
+
|
137
|
+
Dir.chdir(base) {
|
138
|
+
Zip::File.open(target, Zip::File::CREATE) { |zipfile|
|
139
|
+
Dir[File.join(name, '**', '*')].each { |file|
|
140
|
+
zipfile.add(file, file)
|
141
|
+
}
|
142
|
+
}
|
143
|
+
}
|
144
|
+
|
145
|
+
puts "Directory successfully archived at `#{target}'."
|
146
|
+
end
|
147
|
+
|
118
148
|
def do_clearstore
|
119
149
|
store = Dir["#{find(:store, false)}.*"]
|
120
150
|
FileUtils.rm(store, verbose: true) unless store.empty?
|
121
151
|
end
|
122
152
|
|
123
153
|
def do_demo
|
124
|
-
OPTIONS.update(path:
|
154
|
+
OPTIONS.update(path: ARGV.shift, scope: :system)
|
125
155
|
no_args
|
126
156
|
|
157
|
+
path = path_for_scope(:local).first
|
158
|
+
|
127
159
|
copy_list(:config) { |i| !File.basename(i).start_with?('test') }
|
128
160
|
copy_list(:lang)
|
129
161
|
copy_list(:dict) { |i| File.basename(i).start_with?('user') }
|
@@ -240,6 +272,25 @@ Usage: #{PROG} <command> [arguments] [options]
|
|
240
272
|
files.each { |file| ARGV.replace([file]); copy(what) }
|
241
273
|
end
|
242
274
|
|
275
|
+
def overwrite?(target, unlink = false)
|
276
|
+
!File.exist?(target) || if agree?("#{target} already exists. Overwrite?")
|
277
|
+
File.unlink(target) if unlink
|
278
|
+
true
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
def agree?(msg)
|
283
|
+
print "#{msg} (y/n) [n] "
|
284
|
+
|
285
|
+
case answer = $stdin.gets.chomp
|
286
|
+
when /\Ano?\z/i, '' then nil
|
287
|
+
when /\Ay(?:es)?\z/i then true
|
288
|
+
else puts 'Please enter "yes" or "no".'; agree?(msg)
|
289
|
+
end
|
290
|
+
rescue Interrupt
|
291
|
+
abort ''
|
292
|
+
end
|
293
|
+
|
243
294
|
end
|
244
295
|
|
245
296
|
def self.ctl
|
@@ -24,6 +24,7 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'openssl'
|
27
28
|
require 'digest/sha1'
|
28
29
|
|
29
30
|
class Lingo
|
@@ -34,31 +35,24 @@ class Lingo
|
|
34
35
|
|
35
36
|
class Crypter
|
36
37
|
|
37
|
-
|
38
|
-
|
39
|
-
def digest(key)
|
38
|
+
def self.digest(key)
|
40
39
|
Digest::SHA1.hexdigest(key)
|
41
40
|
end
|
42
41
|
|
43
42
|
def encode(key, val)
|
44
|
-
[digest(key), crypt(key, val)
|
45
|
-
b.divmod(16).each { |i| s << HEX_CHARS[i] }
|
46
|
-
}]
|
43
|
+
[self.class.digest(key), crypt(:encrypt, key, val)]
|
47
44
|
end
|
48
45
|
|
49
46
|
def decode(key, val)
|
50
|
-
crypt(key, val
|
51
|
-
q, r = b.map { |i| HEX_CHARS.index(i.chr(ENC)) }
|
52
|
-
s << q * 16 + r
|
53
|
-
})
|
47
|
+
crypt(:decrypt, key, val).force_encoding(ENC)
|
54
48
|
end
|
55
49
|
|
56
50
|
private
|
57
51
|
|
58
|
-
def crypt(
|
59
|
-
|
60
|
-
|
61
|
-
|
52
|
+
def crypt(method, key, val)
|
53
|
+
cipher = OpenSSL::Cipher.new('aes-128-cbc').send(method)
|
54
|
+
cipher.iv = cipher.key = self.class.digest(key)
|
55
|
+
cipher.update(val) + cipher.final
|
62
56
|
end
|
63
57
|
|
64
58
|
end
|
@@ -28,18 +28,17 @@ class Lingo
|
|
28
28
|
|
29
29
|
class Database
|
30
30
|
|
31
|
-
class
|
31
|
+
class Progress < Progress
|
32
32
|
|
33
33
|
def initialize(obj, src, doit = true)
|
34
|
-
|
35
|
-
super(obj, src.size, name, doit, 'convert', false)
|
34
|
+
super(obj, src.size, obj.config['name'], doit, 'convert', false)
|
36
35
|
|
37
|
-
if defined?(@
|
38
|
-
|
36
|
+
if defined?(@count)
|
37
|
+
count, rejected = src.rejected
|
39
38
|
|
40
|
-
if
|
41
|
-
print ' (',
|
42
|
-
print ': ',
|
39
|
+
if count > 0
|
40
|
+
print ' (', count, ' rejected'
|
41
|
+
print ': ', rejected if rejected
|
43
42
|
print ')'
|
44
43
|
end
|
45
44
|
|