lingo 1.8.4.2 → 1.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +413 -325
  3. data/README +380 -131
  4. data/Rakefile +19 -21
  5. data/de/lingo-abk.txt +15 -17
  6. data/de/lingo-dic.txt +20210 -20659
  7. data/de/lingo-mul.txt +5 -13
  8. data/de/lingo-syn.txt +5 -8
  9. data/de/test_dic.txt +2 -0
  10. data/de/test_gen.txt +8 -0
  11. data/de/{test_mul2.txt → test_mu2.txt} +0 -0
  12. data/de/{test_singleword.txt → test_sgw.txt} +0 -0
  13. data/de/user-dic.txt +5 -7
  14. data/de.lang +64 -49
  15. data/en/lingo-dic.txt +6398 -6404
  16. data/en/lingo-irr.txt +2 -3
  17. data/en/lingo-mul.txt +6 -7
  18. data/en/lingo-wdn.txt +881 -1762
  19. data/en/user-dic.txt +2 -5
  20. data/en.lang +39 -39
  21. data/lib/lingo/app.rb +10 -6
  22. data/lib/lingo/attendee/abbreviator.rb +1 -0
  23. data/lib/lingo/attendee/decomposer.rb +2 -1
  24. data/lib/lingo/attendee/multi_worder.rb +5 -6
  25. data/lib/lingo/attendee/stemmer.rb +1 -1
  26. data/lib/lingo/attendee/synonymer.rb +4 -2
  27. data/lib/lingo/attendee/text_reader.rb +77 -57
  28. data/lib/lingo/attendee/text_writer.rb +1 -1
  29. data/lib/lingo/attendee/tokenizer.rb +101 -50
  30. data/lib/lingo/attendee/variator.rb +2 -1
  31. data/lib/lingo/attendee/vector_filter.rb +28 -6
  32. data/lib/lingo/attendee/word_searcher.rb +2 -1
  33. data/lib/lingo/attendee.rb +8 -4
  34. data/lib/lingo/call.rb +7 -3
  35. data/lib/lingo/cli.rb +8 -16
  36. data/lib/lingo/config.rb +11 -6
  37. data/lib/lingo/ctl.rb +54 -3
  38. data/lib/lingo/database/crypter.rb +8 -14
  39. data/lib/lingo/database/hash_store.rb +1 -1
  40. data/lib/lingo/database/{show_progress.rb → progress.rb} +7 -8
  41. data/lib/lingo/database/source/key_value.rb +6 -5
  42. data/lib/lingo/database/source/multi_key.rb +5 -2
  43. data/lib/lingo/database/source/multi_value.rb +6 -4
  44. data/lib/lingo/database/source/single_word.rb +2 -3
  45. data/lib/lingo/database/source/word_class.rb +24 -5
  46. data/lib/lingo/database/source.rb +5 -3
  47. data/lib/lingo/database.rb +102 -41
  48. data/lib/lingo/error.rb +24 -2
  49. data/lib/lingo/language/dictionary.rb +26 -54
  50. data/lib/lingo/language/grammar.rb +19 -23
  51. data/lib/lingo/language/lexical.rb +5 -1
  52. data/lib/lingo/language/lexical_hash.rb +7 -12
  53. data/lib/lingo/language/token.rb +10 -1
  54. data/lib/lingo/language/word.rb +35 -23
  55. data/lib/lingo/language/word_form.rb +5 -4
  56. data/lib/lingo/{show_progress.rb → progress.rb} +43 -30
  57. data/lib/lingo/srv/lingosrv.cfg +1 -1
  58. data/lib/lingo/srv/public/.gitkeep +0 -0
  59. data/lib/lingo/srv.rb +11 -6
  60. data/lib/lingo/version.rb +2 -2
  61. data/lib/lingo/web/lingoweb.cfg +1 -1
  62. data/lib/lingo/web/views/index.erb +4 -4
  63. data/lib/lingo/web.rb +4 -6
  64. data/lib/lingo.rb +4 -12
  65. data/lingo.cfg +1 -1
  66. data/lir.cfg +1 -1
  67. data/ru/lingo-dic.txt +33473 -2113
  68. data/ru/lingo-mul.txt +8430 -1913
  69. data/ru/lingo-syn.txt +1634 -0
  70. data/ru/user-dic.txt +6 -0
  71. data/ru.lang +49 -47
  72. data/spec/spec_helper.rb +4 -0
  73. data/test/attendee/ts_decomposer.rb +2 -2
  74. data/test/attendee/ts_synonymer.rb +3 -3
  75. data/test/attendee/ts_tokenizer.rb +215 -2
  76. data/test/attendee/ts_variator.rb +2 -2
  77. data/test/attendee/ts_word_searcher.rb +10 -6
  78. data/test/ref/artikel.seq +2 -2
  79. data/test/ref/artikel.vec +5 -5
  80. data/test/ref/artikel.ven +11 -11
  81. data/test/ref/artikel.ver +11 -11
  82. data/test/ref/lir.seq +13 -13
  83. data/test/ref/lir.vec +31 -31
  84. data/test/test_helper.rb +19 -5
  85. data/test/ts_database.rb +206 -77
  86. data/test/ts_language.rb +86 -26
  87. metadata +93 -49
  88. data/.rspec +0 -1
  89. data/de/test_syn2.txt +0 -1
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -82,33 +82,47 @@ class Lingo
82
82
 
83
83
  CHAR, DIGIT = Char::CHAR, Char::DIGIT
84
84
 
85
+ PROTO = '(?:news|https?|ftps?)://'
86
+
85
87
  RULES = [
86
- ['WIKI', /^=+.+=+$/],
87
88
  ['SPAC', /^\s+/],
88
- ['HTML', /^<[^>]+>/],
89
- ['WIKI', /^\[\[.+?\]\]/],
89
+ ['WIKI', /^=+.+=+|^__[A-Z]+__/],
90
90
  ['NUMS', /^[+-]?(?:\d{4,}|\d{1,3}(?:\.\d{3,3})*)(?:\.|(?:,\d+)?%?)/],
91
- ['URLS', /^(?:(?:mailto:|(?:news|https?|ftps?):\/\/)\S+|^(?:www(?:\.\S+)+)|[^\s.]+(?:[\._]\S+)+@\S+(?:\.\S+)+)/],
91
+ ['URLS', /^(?:www\.|mailto:|#{PROTO}|\S+?[._]\S+?@\S+?\.)\S+/],
92
92
  ['ABRV', /^(?:(?:(?:#{CHAR})+\.)+)(?:#{CHAR})+/],
93
93
  ['WORD', /^(?:#{CHAR}|#{DIGIT}|-)+/],
94
- ['PUNC', /^[!,.:;?¡¿]/],
95
- ['OTHR', /^["$#%&'()*+\-\/<=>@\[\\\]^_{|}~¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾×÷]/],
96
- ['HELP', /^[^ ]*/]
94
+ ['PUNC', /^[!,.:;?¡¿]+/]
95
+ ]
96
+
97
+ OTHER = [
98
+ ['OTHR', /^["$#%&'()*+\/<=>@\[\\\]^_{|}~¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾×÷]/],
99
+ ['HELP', /^\S+/]
97
100
  ]
98
101
 
102
+ NESTS = {
103
+ 'HTML' => ['<', '>'],
104
+ 'WIKI:VARIABLE' => ['{{{', '}}}'],
105
+ 'WIKI:TEMPLATE' => ['{{', '}}'],
106
+ 'WIKI:LINK_INT' => ['[[', ']]'],
107
+ 'WIKI:LINK_EXT' => [/^\[\s*#{PROTO}/, ']']
108
+ }
109
+
99
110
  class << self
100
111
 
101
112
  def rule(name)
102
113
  RULES.assoc(name)
103
114
  end
104
115
 
116
+ def rules(name)
117
+ RULES.select { |rule,| rule == name }
118
+ end
119
+
105
120
  def delete(*names)
106
- names.each { |name| RULES.delete(rule(name)) }
121
+ names.map { |name| rules(name).each { |rule| RULES.delete(rule) } }
107
122
  end
108
123
 
109
- def replace(name, expr)
110
- rule = rule(name) or return
111
- rule[1] = block_given? ? yield(rule[1]) : expr
124
+ def replace(name, expr = nil)
125
+ rules(name).each { |rule| rule[1] = expr || yield(*rule) }
112
126
  end
113
127
 
114
128
  def insert(*rules)
@@ -152,22 +166,37 @@ class Lingo
152
166
  skip << 'HTML' unless @tags
153
167
  skip << 'WIKI' unless @wiki
154
168
 
155
- @rules = RULES.reject { |name, _| skip.include?(name) }
169
+ [@rules = RULES.dup, @nests = NESTS.dup].each { |hash|
170
+ hash.delete_if { |name, _| skip.include?(Token.clean(name)) }
171
+ }
172
+
173
+ @nest, nest_re = [], []
156
174
 
157
- @filename = @cont = nil
175
+ @nests.each { |name, re|
176
+ re.map!.with_index { |r, i| r.is_a?(Regexp) ?
177
+ r : /^#{'.*?' if i > 0}#{Regexp.escape(r)}/ }
178
+
179
+ nest_re << "(?<#{name}>#{Regexp.new(
180
+ re[0].source.sub(/^\^/, ''), re[0].options)})"
181
+ }
182
+
183
+ @nest_re = /^(?<_>.*?)(?:#{nest_re.join('|')})/
184
+
185
+ @filename = @linenum = nil
158
186
  end
159
187
 
160
188
  def control(cmd, param)
161
189
  case cmd
162
- when STR_CMD_FILE then @filename = param
163
- when STR_CMD_LIR then @filename = nil
164
- when STR_CMD_EOF then @cont = nil
190
+ when STR_CMD_FILE then @filename, @linenum = param, 1
191
+ when STR_CMD_LIR then @filename, @linenum = nil, nil
192
+ when STR_CMD_EOL then @linenum += 1 if @linenum
193
+ when STR_CMD_EOF then @nest.clear
165
194
  end
166
195
  end
167
196
 
168
197
  def process(obj)
169
198
  if obj.is_a?(String)
170
- tokenize(obj) { |*i| forward(Token.new(*i)) }
199
+ tokenize(obj)
171
200
  forward(STR_CMD_EOL, @filename) if @filename
172
201
  else
173
202
  forward(obj)
@@ -178,44 +207,66 @@ class Lingo
178
207
 
179
208
  # tokenize("Eine Zeile.") -> [:Eine/WORD:, :Zeile/WORD:, :./PUNC:]
180
209
  def tokenize(line)
181
- case @cont
182
- when 'HTML'
183
- if line =~ /^[^<>]*>/
184
- yield $&, @cont
185
- line, @cont = $', nil
186
- else
187
- yield line, @cont
188
- return
189
- end
190
- when 'WIKI'
191
- if line =~ /^[^\[\]]*\]\]/
192
- yield $&, @cont
193
- line, @cont = $', nil
194
- else
195
- yield line, @cont
196
- return
197
- end
198
- when nil
199
- if @tags && line =~ /<[^<>]*$/
200
- yield $&, @cont = 'HTML'
201
- line = $`
202
- end
203
-
204
- if @wiki && line =~ /\[\[[^\[\]]*$/
205
- yield $&, @cont = 'WIKI'
206
- line = $`
207
- end
210
+ @nest.empty? ? tokenize_line(line) : tokenize_nest(line)
211
+ rescue => err
212
+ raise err if err.is_a?(TokenizeError)
213
+ raise TokenizeError.new(line, @filename, @linenum, err)
214
+ end
215
+
216
+ def tokenize_line(line)
217
+ while (length = line.length) > 0 && tokenize_rule(line) { |rest|
218
+ length == rest.length ? break : line = rest
219
+ }
208
220
  end
209
221
 
210
- while (l = line.length) > 0 && @rules.find { |name, expr|
211
- if line =~ expr
212
- yield $&, name if name != 'SPAC' || @space
213
- l == $'.length ? break : line = $'
214
- end
222
+ tokenize_open(line) unless line.empty?
223
+ end
224
+
225
+ def tokenize_rule(line, rules = @rules)
226
+ rules.find { |name, expr|
227
+ next unless line =~ expr
228
+ forward_token($&, name) if name != 'SPAC' || @space
229
+ yield $'
215
230
  }
231
+ end
232
+
233
+ def tokenize_nest(line)
234
+ mdo = @nest_re.match(line)
235
+ mdc = @nests[@nest.last].last.match(line)
236
+
237
+ if mdo && (!mdc || mdo[0].length < mdc[0].length)
238
+ forward_token(mdo[:_], @nest.last) unless mdo[:_].empty?
239
+
240
+ nest = @nests.keys.find { |name| mdo[name] }
241
+ forward_nest(mdo[nest], mdo.post_match, nest)
242
+ elsif mdc
243
+ forward_token(mdc[0], @nest.pop)
244
+ tokenize(mdc.post_match)
245
+ else
246
+ forward_token(line, @nest.last)
216
247
  end
217
248
  end
218
249
 
250
+ def tokenize_open(line)
251
+ @nests.each { |nest, (open_re, _)|
252
+ next unless line =~ open_re
253
+ return forward_nest($&, $', nest)
254
+ }
255
+
256
+ tokenize_rule(line, OTHER) { |rest| line = rest }
257
+ tokenize(line)
258
+ end
259
+
260
+ def forward_nest(match, rest, nest)
261
+ forward_token(match, nest)
262
+ @nest << nest
263
+ tokenize(rest)
264
+ end
265
+
266
+ def forward_token(*args)
267
+ forward(Token.new(*args))
268
+ end
269
+
219
270
  end
220
271
 
221
272
  end
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -90,6 +90,7 @@ class Lingo
90
90
  end
91
91
 
92
92
  def control(cmd, param)
93
+ # can control
93
94
  end
94
95
 
95
96
  def process(obj)
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -86,10 +86,15 @@ class Lingo
86
86
  def init
87
87
  if @debug = get_key('debug', false)
88
88
  @prompt = get_key('prompt', 'lex:) ')
89
+ @preamble = get_key('preamble', true)
89
90
  else
90
91
  @lex = get_re('lexicals', '[sy]')
91
92
  @skip = get_array('skip', DEFAULT_SKIP, :upcase)
92
93
 
94
+ @dict = get_key('dict', false)
95
+ @norm = get_key('norm', false) if @dict
96
+ @dict = Database::Source::WordClass::DEFAULT_SEPARATOR if @dict == true
97
+
93
98
  @src = get_key('src', false)
94
99
  @src = DEFAULT_SRC_SEP if @src == true
95
100
 
@@ -112,15 +117,32 @@ class Lingo
112
117
 
113
118
  def process(obj)
114
119
  if @debug
120
+ forward((@preamble = nil; @lingo.config.to_h.to_yaml)) if @preamble
115
121
  forward("#{@prompt} #{obj.inspect}") if eval(@debug)
116
122
  elsif obj.is_a?(Word) && !@skip.include?(obj.attr)
117
123
  @word_count += 1
118
124
 
119
- obj.get_class(@lex).each { |lex|
120
- vec = Unicode.downcase(lex.form)
121
- vec << @src << lex.src if @src && lex.src
122
- @sort_format ? @vectors << vec : forward(vec)
123
- }
125
+ if @dict
126
+ vec, sep = [], Database::Source::WordClass::GENDER_SEPARATOR
127
+
128
+ obj.get_class(@lex).each { |lex|
129
+ str = "#{lex.form} ##{lex.attr}"
130
+ str << sep << lex.gender if lex.gender
131
+ vec << str
132
+ }
133
+
134
+ unless vec.empty?
135
+ wrd = @norm ? obj.lexicals.first.form : obj.form
136
+ vec = Unicode.downcase("#{wrd}#{@dict}#{vec.join(' ')}")
137
+ @sort_format ? @vectors << vec : forward(vec)
138
+ end
139
+ else
140
+ obj.get_class(@lex).each { |lex|
141
+ vec = Unicode.downcase(lex.form)
142
+ vec << @src << lex.src if @src && lex.src
143
+ @sort_format ? @vectors << vec : forward(vec)
144
+ }
145
+ end
124
146
  end
125
147
  end
126
148
 
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -72,6 +72,7 @@ class Lingo
72
72
  end
73
73
 
74
74
  def control(cmd, param)
75
+ # can control
75
76
  end
76
77
 
77
78
  def process(obj)
@@ -83,6 +83,8 @@ class Lingo
83
83
  # Make sure config exists
84
84
  lingo.dictionary_config
85
85
 
86
+ @dic = @gra = nil
87
+
86
88
  init if self.class.method_defined?(:init)
87
89
 
88
90
  @can_control = self.class.method_defined?(:control)
@@ -91,6 +93,8 @@ class Lingo
91
93
  @skip_command = false
92
94
  end
93
95
 
96
+ attr_reader :lingo
97
+
94
98
  def add_subscriber(subscriber)
95
99
  @subscriber.concat(subscriber)
96
100
  end
@@ -133,7 +137,7 @@ class Lingo
133
137
  end
134
138
 
135
139
  def has_key?(key)
136
- @config && @config.has_key?(key)
140
+ @config && @config.key?(key)
137
141
  end
138
142
 
139
143
  def get_key(key, default = nodefault = true)
@@ -154,11 +158,11 @@ class Lingo
154
158
  end
155
159
 
156
160
  def dictionary(src, mod)
157
- Language::Dictionary.new({ 'source' => src, 'mode' => mod }, @lingo)
161
+ Language::Dictionary.new({ 'source' => src, 'mode' => mod }, lingo)
158
162
  end
159
163
 
160
164
  def grammar(src, mod)
161
- Language::Grammar.new({ 'source' => src, 'mode' => mod }, @lingo)
165
+ Language::Grammar.new({ 'source' => src, 'mode' => mod }, lingo)
162
166
  end
163
167
 
164
168
  def set_dic
@@ -170,7 +174,7 @@ class Lingo
170
174
  end
171
175
 
172
176
  def warn(*msg)
173
- @lingo.warn(*msg)
177
+ lingo.warn(*msg)
174
178
  end
175
179
 
176
180
  def require_lib(lib)
data/lib/lingo/call.rb CHANGED
@@ -48,7 +48,7 @@ class Lingo
48
48
  end
49
49
  end
50
50
 
51
- def talk(str)
51
+ def talk(str, raw = false)
52
52
  config.stdin.reopen(str)
53
53
 
54
54
  start
@@ -57,7 +57,7 @@ class Lingo
57
57
  io = config.send(key)
58
58
  io.rewind
59
59
 
60
- lines = io.readlines.each { |i| i.chomp! }
60
+ lines = io.readlines
61
61
 
62
62
  io.truncate(0)
63
63
  io.rewind
@@ -65,8 +65,12 @@ class Lingo
65
65
  lines
66
66
  }
67
67
 
68
+ return res.join if raw
69
+
70
+ res.each { |i| i.chomp! }
71
+
68
72
  block_given? ? res.map! { |i| yield i } : begin
69
- Lingo.sort!(res)
73
+ res.sort! unless ENV['LINGO_NO_SORT']
70
74
  res.uniq!
71
75
  res
72
76
  end
data/lib/lingo/cli.rb CHANGED
@@ -24,11 +24,11 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
- require 'nuggets/cli'
27
+ require 'cyclops'
28
28
 
29
29
  class Lingo
30
30
 
31
- class CLI < ::Nuggets::CLI
31
+ class CLI < Cyclops
32
32
 
33
33
  class << self
34
34
 
@@ -55,26 +55,18 @@ class Lingo
55
55
  end
56
56
 
57
57
  def opts(opts)
58
- opts.on('-c', '--config YAML', "Config file [Default: #{defaults[:config]}#{' (currently not present)' unless File.readable?(defaults[:config])}]") { |config|
59
- options[:config] = config
60
- }
61
-
62
- opts.separator ''
63
-
64
- opts.on('-l', '--language LANG', "Language for processing [Default: #{defaults[:language]}]") { |language|
65
- options[:language] = language
66
- }
58
+ opts.option(:language__LANG, "Language for processing [Default: #{defaults[:language]}]")
67
59
 
68
- opts.separator ''
60
+ opts.separator
69
61
 
70
- opts.on('-L', '--log FILE', 'Log file to print debug information to') { |log|
62
+ opts.option(:log__FILE, :L, 'Log file to print debug information to') { |log|
71
63
  options[:log] = stderr.reopen(log == '-' ? stdout : File.open(log, 'a+', encoding: ENC))
72
64
  }
73
65
 
74
- opts.separator ''
66
+ opts.separator
75
67
 
76
- opts.on('-P', '--profile PATH', 'Print profiling results') { |profile|
77
- options[:profile] = profile == '-' ? stdout : profile
68
+ opts.option(:profile__PATH, :P, 'Print profiling results') { |profile|
69
+ options[:profile] = stdout if profile == '-'
78
70
  }
79
71
  end
80
72
 
data/lib/lingo/config.rb CHANGED
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -24,11 +24,9 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
- require 'yaml'
27
+ require 'safe_yaml/load'
28
28
  require_relative 'cli'
29
29
 
30
- YAML::ENGINE.yamler = 'psych'
31
-
32
30
  class Lingo
33
31
 
34
32
  class Config
@@ -58,8 +56,12 @@ class Lingo
58
56
  end
59
57
  end
60
58
 
59
+ def to_h
60
+ @opts
61
+ end
62
+
61
63
  def [](key)
62
- key_to_nodes(key).inject(@opts) { |hash, node| hash[node] }
64
+ key_to_nodes(key).inject(to_h) { |hash, node| hash[node] }
63
65
  end
64
66
 
65
67
  def []=(key, val)
@@ -125,7 +127,10 @@ class Lingo
125
127
 
126
128
  def load_config(key, type = key.to_sym)
127
129
  file = Lingo.find(type, @opts[key]) { quit }
128
- @opts.update(File.open(file, encoding: ENC) { |f| YAML.load(f) })
130
+ File.open(file, encoding: ENC) { |f| @opts.update(SafeYAML.load(f)) }
131
+ rescue Psych::SyntaxError => err
132
+ err.message << " (in #{file})"
133
+ raise
129
134
  end
130
135
 
131
136
  end
data/lib/lingo/ctl.rb CHANGED
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -25,6 +25,9 @@
25
25
  #++
26
26
 
27
27
  require 'optparse'
28
+ require 'zip'
29
+
30
+ Zip.unicode_names = true
28
31
 
29
32
  class Lingo
30
33
 
@@ -36,7 +39,7 @@ class Lingo
36
39
  PROGNAME, OPTIONS = File.basename(PROG), {}
37
40
 
38
41
  COMMANDS, ALIASES = {}, Hash.new { |h, k|
39
- h[k] = COMMANDS.has_key?(k) ? k : 'usage'
42
+ h[k] = COMMANDS.key?(k) ? k : 'usage'
40
43
  }
41
44
 
42
45
  USAGE = <<-EOT
@@ -79,6 +82,7 @@ Usage: #{PROG} <command> [arguments] [options]
79
82
  }
80
83
 
81
84
  { demo: [:d, 'Initialize demo directory', '[path]', 'current directory'],
85
+ archive: [:a, 'Create archive of directory', '[path]', 'current directory'],
82
86
  rackup: [:r, 'Print path to rackup file', 'name'],
83
87
  path: [:p, 'Print search path for dictionaries and configurations'],
84
88
  help: [:h, 'Print help for available commands'],
@@ -111,19 +115,47 @@ Usage: #{PROG} <command> [arguments] [options]
111
115
 
112
116
  usage('Source and target are the same.') if source == target
113
117
 
118
+ return unless overwrite?(target)
119
+
114
120
  FileUtils.mkdir_p(File.dirname(target))
115
121
  FileUtils.cp(source, target, verbose: true)
116
122
  end
117
123
 
124
+ def do_archive
125
+ OPTIONS.update(path: ARGV.shift, scope: :local)
126
+ no_args
127
+
128
+ source = File.expand_path(path_for_scope.first)
129
+ target = "#{source}.zip"
130
+
131
+ abort "No such directory: #{source}" unless Dir.exist?(source)
132
+
133
+ return unless overwrite?(target, true)
134
+
135
+ base, name = File.split(source)
136
+
137
+ Dir.chdir(base) {
138
+ Zip::File.open(target, Zip::File::CREATE) { |zipfile|
139
+ Dir[File.join(name, '**', '*')].each { |file|
140
+ zipfile.add(file, file)
141
+ }
142
+ }
143
+ }
144
+
145
+ puts "Directory successfully archived at `#{target}'."
146
+ end
147
+
118
148
  def do_clearstore
119
149
  store = Dir["#{find(:store, false)}.*"]
120
150
  FileUtils.rm(store, verbose: true) unless store.empty?
121
151
  end
122
152
 
123
153
  def do_demo
124
- OPTIONS.update(path: path = ARGV.shift, scope: :system)
154
+ OPTIONS.update(path: ARGV.shift, scope: :system)
125
155
  no_args
126
156
 
157
+ path = path_for_scope(:local).first
158
+
127
159
  copy_list(:config) { |i| !File.basename(i).start_with?('test') }
128
160
  copy_list(:lang)
129
161
  copy_list(:dict) { |i| File.basename(i).start_with?('user') }
@@ -240,6 +272,25 @@ Usage: #{PROG} <command> [arguments] [options]
240
272
  files.each { |file| ARGV.replace([file]); copy(what) }
241
273
  end
242
274
 
275
+ def overwrite?(target, unlink = false)
276
+ !File.exist?(target) || if agree?("#{target} already exists. Overwrite?")
277
+ File.unlink(target) if unlink
278
+ true
279
+ end
280
+ end
281
+
282
+ def agree?(msg)
283
+ print "#{msg} (y/n) [n] "
284
+
285
+ case answer = $stdin.gets.chomp
286
+ when /\Ano?\z/i, '' then nil
287
+ when /\Ay(?:es)?\z/i then true
288
+ else puts 'Please enter "yes" or "no".'; agree?(msg)
289
+ end
290
+ rescue Interrupt
291
+ abort ''
292
+ end
293
+
243
294
  end
244
295
 
245
296
  def self.ctl
@@ -24,6 +24,7 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
+ require 'openssl'
27
28
  require 'digest/sha1'
28
29
 
29
30
  class Lingo
@@ -34,31 +35,24 @@ class Lingo
34
35
 
35
36
  class Crypter
36
37
 
37
- HEX_CHARS = '0123456789abcdef'.freeze
38
-
39
- def digest(key)
38
+ def self.digest(key)
40
39
  Digest::SHA1.hexdigest(key)
41
40
  end
42
41
 
43
42
  def encode(key, val)
44
- [digest(key), crypt(key, val).each_byte.with_object('') { |b, s|
45
- b.divmod(16).each { |i| s << HEX_CHARS[i] }
46
- }]
43
+ [self.class.digest(key), crypt(:encrypt, key, val)]
47
44
  end
48
45
 
49
46
  def decode(key, val)
50
- crypt(key, val.each_byte.each_slice(2).with_object('') { |b, s|
51
- q, r = b.map { |i| HEX_CHARS.index(i.chr(ENC)) }
52
- s << q * 16 + r
53
- })
47
+ crypt(:decrypt, key, val).force_encoding(ENC)
54
48
  end
55
49
 
56
50
  private
57
51
 
58
- def crypt(k, v)
59
- c, y = '', k.codepoints.reverse_each.cycle
60
- v.each_codepoint { |x| c << (x ^ y.next).chr(ENC) }
61
- c
52
+ def crypt(method, key, val)
53
+ cipher = OpenSSL::Cipher.new('aes-128-cbc').send(method)
54
+ cipher.iv = cipher.key = self.class.digest(key)
55
+ cipher.update(val) + cipher.final
62
56
  end
63
57
 
64
58
  end
@@ -63,7 +63,7 @@ class Lingo
63
63
  # Dup key, because we're reusing everything.
64
64
  def _each
65
65
  @db.each { |key, val| yield key.dup, val }
66
- end
66
+ end
67
67
 
68
68
  end
69
69
 
@@ -28,18 +28,17 @@ class Lingo
28
28
 
29
29
  class Database
30
30
 
31
- class ShowProgress < ShowProgress
31
+ class Progress < Progress
32
32
 
33
33
  def initialize(obj, src, doit = true)
34
- name = obj.instance_variable_get(:@config)['name']
35
- super(obj, src.size, name, doit, 'convert', false)
34
+ super(obj, src.size, obj.config['name'], doit, 'convert', false)
36
35
 
37
- if defined?(@cnt)
38
- cnt, rej = src.rejected
36
+ if defined?(@count)
37
+ count, rejected = src.rejected
39
38
 
40
- if cnt > 0
41
- print ' (', cnt, ' rejected'
42
- print ': ', rej if rej
39
+ if count > 0
40
+ print ' (', count, ' rejected'
41
+ print ': ', rejected if rejected
43
42
  print ')'
44
43
  end
45
44