lingo 1.8.3 → 1.8.4

Sign up to get free protection for your applications and to get access to all the features.
data/lib/lingo/app.rb CHANGED
@@ -24,9 +24,11 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
+ require 'json'
27
28
  require 'optparse'
28
29
  require 'shellwords'
29
30
  require 'sinatra/base'
31
+ require 'sinatra/cookies'
30
32
 
31
33
  class Lingo
32
34
 
@@ -37,6 +39,7 @@ class Lingo
37
39
  def init_app(file, *args, &block)
38
40
  set :root, File.chomp_ext(file)
39
41
  parse_options(*args, &block)
42
+ helpers Sinatra::Cookies
40
43
  end
41
44
 
42
45
  def parse_options(lingo_options = false)
@@ -173,6 +173,12 @@ class Lingo
173
173
  @lingo.warn(*msg)
174
174
  end
175
175
 
176
+ def require_lib(lib)
177
+ require lib
178
+ rescue LoadError => err
179
+ raise LibraryLoadError.new(self.class, lib, err)
180
+ end
181
+
176
182
  end
177
183
 
178
184
  end
@@ -77,7 +77,7 @@ class Lingo
77
77
  end
78
78
 
79
79
  def control(cmd, param)
80
- send_abbr(nil) if [STR_CMD_RECORD, STR_CMD_EOF].include?(cmd)
80
+ send_abbr(@abbr) if [STR_CMD_RECORD, STR_CMD_EOF].include?(cmd)
81
81
  end
82
82
 
83
83
  def process(obj)
@@ -180,7 +180,7 @@ class Lingo
180
180
  seq = []
181
181
 
182
182
  @buffer.each { |obj|
183
- next [obj] unless obj.is_a?(WordForm)
183
+ next seq << [obj] unless obj.is_a?(WordForm)
184
184
  next if (form = obj.form) == CHAR_PUNCT
185
185
 
186
186
  w = find_word(form, @lex_dic, @lex_gra)
@@ -71,7 +71,11 @@ class Lingo
71
71
  protected
72
72
 
73
73
  def init
74
- @nonewords, @sort = [], get_key('sort', true)
74
+ @sort = get_key('sort', !ENV['LINGO_NO_SORT'])
75
+ @dict = get_key('dict', false)
76
+ @dict = '=' if @dict == true
77
+
78
+ @nonewords = []
75
79
  end
76
80
 
77
81
  def control(cmd, param)
@@ -80,14 +84,17 @@ class Lingo
80
84
  @nonewords.clear
81
85
  when STR_CMD_EOL
82
86
  skip_command
83
- when STR_CMD_RECORD, STR_CMD_EOF
84
- send_nonewords unless @nonewords.empty?
87
+ when STR_CMD_RECORD
88
+ send_nonewords unless @dict
89
+ when STR_CMD_EOF
90
+ send_nonewords
85
91
  end
86
92
  end
87
93
 
88
94
  def process(obj)
89
95
  if obj.is_a?(Word) && obj.unknown?
90
96
  non = Unicode.downcase(obj.form)
97
+ non = "#{non}#{@dict}#{non} #?" if @dict
91
98
  @sort ? @nonewords << non : forward(non)
92
99
  end
93
100
  end
@@ -95,8 +102,10 @@ class Lingo
95
102
  private
96
103
 
97
104
  def send_nonewords
98
- @nonewords.uniq!
99
- flush(@nonewords.sort!)
105
+ unless @nonewords.empty?
106
+ @nonewords.uniq!
107
+ flush(@nonewords.sort!)
108
+ end
100
109
  end
101
110
 
102
111
  end
@@ -96,15 +96,20 @@ class Lingo
96
96
  protected
97
97
 
98
98
  def init
99
- @stopper = get_array('stopper', DEFAULT_SKIP, :upcase)
100
- @classes = []
99
+ @stopper = get_array('stopper', DEFAULT_SKIP)
100
+ .push(WA_UNKNOWN, WA_UNKMULPART)
101
101
 
102
- @seq = get_key('sequences').map { |string, format|
103
- @classes.concat(classes = string.downcase!.chars.to_a)
104
- [string, classes, format]
102
+ @mwc = get_key('multiword', LA_MULTIWORD)
103
+ @cls = []
104
+
105
+ @seq = get_key('sequences').map { |str, fmt|
106
+ @cls.concat(cls = (str = str.downcase).scan(/[[:alpha:]]/))
107
+
108
+ (str =~ /\W/ ? [Regexp.new(str), nil] : [str, cls]).push(
109
+ fmt == true ? '|' : fmt ? fmt.gsub(/\d+/, '%\&$s') : nil)
105
110
  }
106
111
 
107
- @classes.uniq!
112
+ @cls.uniq!
108
113
 
109
114
  raise MissingConfigError.new(:sequences) if @seq.empty?
110
115
  end
@@ -114,67 +119,88 @@ class Lingo
114
119
  end
115
120
 
116
121
  def process_buffer?
117
- (obj = @buffer.last).is_a?(WordForm) && (obj.is_a?(Word) &&
118
- obj.unknown? || @stopper.include?(obj.attr.upcase))
122
+ (obj = @buffer.last).is_a?(WordForm) && @stopper.include?(obj.attr)
119
123
  end
120
124
 
121
125
  def process_buffer
122
- matches = []
126
+ flush(@buffer.size < 2 ? @buffer : begin
127
+ arg, cls, mwc, unk = [[], buf = [], map = [], @seq], @cls, @mwc, %w[#]
128
+
129
+ iter, skip, rewind = @buffer.each_with_index, 0, lambda {
130
+ iter.rewind; skip.times { iter.next }; skip = 0
131
+ }
123
132
 
124
- if @buffer.size > 1
125
- buf, map, seq, cls, unk = [], [], @seq, @classes, %w[#]
133
+ loop {
134
+ obj, idx = begin
135
+ iter.next
136
+ rescue StopIteration
137
+ raise unless skip > 0
138
+
139
+ buf.slice!(0, skip)
140
+ map.slice!(0, skip)
141
+
142
+ rewind.call
143
+ end
126
144
 
127
- @buffer.each { |obj|
128
145
  att = obj.is_a?(Word) && !obj.unknown? ? obj.attrs(false) : unk
129
146
 
130
- (att &= cls).empty? ? find_seq(buf, map, seq, matches) : begin
147
+ if (att &= cls).empty?
148
+ find_seq(*arg)
149
+ rewind.call if skip > 0
150
+ else
151
+ if n = obj.multiword_size(mwc)
152
+ n.times { iter.next }
153
+ skip = idx + 1
154
+ end
155
+
131
156
  buf << obj
132
157
  map << att
133
158
  end
134
159
  }
135
160
 
136
- find_seq(buf, map, seq, matches)
137
- end
138
-
139
- flush(@buffer.concat(matches))
161
+ @buffer.concat(find_seq(*arg))
162
+ end)
140
163
  end
141
164
 
142
165
  private
143
166
 
144
- def find_seq(buf, map, seq, matches)
145
- return if buf.empty?
167
+ def find_seq(mat, buf, map, seq)
168
+ return mat if buf.empty?
146
169
 
147
- match = Hash.new { |h, k| h[k] = [] }
170
+ forms, args = [], []
148
171
 
149
- map.replace(map.shift.product(*map))
150
- map.map! { |i| i.join }
151
- map.uniq!
172
+ map.replace(map.shift.product(*map)).map! { |i| i.join }.uniq!
152
173
 
153
174
  map.each { |q|
154
- seq.each { |string, classes, format|
155
- while pos = q.index(string, pos || 0)
156
- form = format.dup
175
+ seq.each { |str, cls, fmt|
176
+ _str, _cls = [str, cls]
177
+
178
+ while pos = q.index(str, pos || 0)
179
+ _str, _cls = [$&, $&.chars] unless cls
157
180
 
158
- classes.each_with_index { |wc, i|
181
+ args.clear
182
+
183
+ _cls.each_with_index { |wc, i|
159
184
  buf[pos + i].lexicals.find { |l|
160
- form.gsub!(i.succ.to_s, l.form) if l.attr == wc
185
+ args[i] = l.form if l.attr == wc
161
186
  } or break
162
187
  } or next
163
188
 
164
- match[pos += 1] << form
189
+ forms << (
190
+ fmt =~ /\d/ ? fmt.gsub('%0$s', _str) % args :
191
+ fmt ? "#{_str}:#{args.join(fmt)}" : args.join(' ')
192
+ )
193
+
194
+ pos += 1
165
195
  end
166
196
  }
167
- }
197
+ }.clear
168
198
 
169
- match.each_value { |forms|
170
- forms.uniq!
171
- forms.each { |form|
172
- matches << Word.new_lexical(form, WA_SEQUENCE, LA_SEQUENCE)
173
- }
174
- }
199
+ forms.uniq!
200
+ forms.each { |f| mat << Word.new_lexical(f, WA_SEQUENCE, LA_SEQUENCE) }
175
201
 
176
202
  buf.clear
177
- map.clear
203
+ mat
178
204
  end
179
205
 
180
206
  end
@@ -133,21 +133,24 @@ class Lingo
133
133
 
134
134
  # Gibt eine Datei zeilenweise in den Ausgabekanal
135
135
  def spool(path)
136
- unless stdin = stdin?(path)
137
- size = File.size(path) if @progress
138
- end
139
-
140
136
  forward(STR_CMD_FILE, path)
141
137
 
142
- ShowProgress.new(self, size, path) { |progress|
143
- filter(path, stdin) { |line, pos|
138
+ if stdin?(path)
139
+ io = @lingo.config.stdin.set_encoding(ENC)
140
+ io = StringIO.new(io.read) if @progress
141
+ else
142
+ io, name = File.open(path, 'rb', encoding: ENC), path
143
+ end
144
+
145
+ ShowProgress.new(self, @progress && io.size, name) { |progress|
146
+ filter(io) { |line, pos|
144
147
  progress[pos]
145
148
 
146
149
  line.chomp! if @chomp
147
150
  next if line =~ @skip
148
151
 
149
152
  if line =~ @lir
150
- forward(STR_CMD_RECORD, $1)
153
+ forward(STR_CMD_RECORD, $1 || $&)
151
154
  else
152
155
  line.sub!(@cut, '') if @cut
153
156
  forward(line) unless line.empty?
@@ -158,14 +161,10 @@ class Lingo
158
161
  forward(STR_CMD_EOF, path)
159
162
  end
160
163
 
161
- def filter(path, stdin = stdin?(path))
162
- io = stdin ?
163
- @lingo.config.stdin.set_encoding(ENC) :
164
- File.open(path, 'rb', encoding: ENC)
165
-
166
- block = stdin || !@progress ?
167
- lambda { |line| yield line, 0 } :
168
- lambda { |line| yield line, io.pos }
164
+ def filter(io)
165
+ block = @progress ?
166
+ lambda { |line| yield line, io.pos } :
167
+ lambda { |line| yield line, 0 }
169
168
 
170
169
  case @filter == true ? file_type(path, io) : @filter.to_s
171
170
  when /html/i then io = filter_html(io)
@@ -93,7 +93,7 @@ class Lingo
93
93
  def control(cmd, param)
94
94
  case cmd
95
95
  when STR_CMD_LIR
96
- @lir = true
96
+ @lir = true unless @lir.nil?
97
97
  when STR_CMD_FILE
98
98
  @no_sep = true
99
99
 
@@ -105,9 +105,9 @@ class Lingo
105
105
 
106
106
  @lir_rec_no, @lir_rec_buf = '', []
107
107
  when STR_CMD_RECORD
108
- @no_sep = true
109
-
110
108
  if @lir
109
+ @no_sep = true
110
+
111
111
  flush_lir_buffer
112
112
  @lir_rec_no = param
113
113
  end
@@ -93,12 +93,12 @@ class Lingo
93
93
  @src = get_key('src', false)
94
94
  @src = DEFAULT_SRC_SEP if @src == true
95
95
 
96
- if sort = get_key('sort', 'normal')
96
+ if sort = get_key('sort', ENV['LINGO_NO_SORT'] ? false : 'normal')
97
97
  @sort_format, @sort_method = sort.downcase.split('_', 2)
98
98
  end
99
99
  end
100
100
 
101
- @vectors, @word_count = [], 0.0
101
+ @vectors, @word_count = [], 0
102
102
  end
103
103
 
104
104
  def control(cmd, param)
@@ -137,12 +137,12 @@ class Lingo
137
137
  vec = cnt.sort_by { |v, c| [-c, v] }
138
138
 
139
139
  if @sort_method == 'rel'
140
- vec.each { |v| v[1] /= @word_count }
141
- fmt = '%6.5f'
140
+ fmt, wc = '%6.5f', @word_count.to_f
141
+ vec.each { |v| v[1] /= wc }
142
142
  end
143
143
 
144
144
  if @sort_format == 'sto'
145
- fmt, @word_count = "%s {#{fmt}}", 0.0
145
+ fmt, @word_count = "%s {#{fmt}}", 0
146
146
  else
147
147
  fmt.insert(1, '2$') << ' %1$s'
148
148
  end
data/lib/lingo/call.rb CHANGED
@@ -66,7 +66,7 @@ class Lingo
66
66
  }
67
67
 
68
68
  block_given? ? res.map! { |i| yield i } : begin
69
- res.sort!
69
+ Lingo.sort!(res)
70
70
  res.uniq!
71
71
  res
72
72
  end
data/lib/lingo/cli.rb CHANGED
@@ -24,11 +24,11 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
- require 'nuggets/util/cli'
27
+ require 'nuggets/cli'
28
28
 
29
29
  class Lingo
30
30
 
31
- class CLI < ::Util::CLI
31
+ class CLI < ::Nuggets::CLI
32
32
 
33
33
  class << self
34
34
 
data/lib/lingo/ctl.rb CHANGED
@@ -121,13 +121,15 @@ Usage: #{PROG} <command> [arguments] [options]
121
121
  end
122
122
 
123
123
  def do_demo
124
- OPTIONS.update(path: ARGV.shift, scope: :system)
124
+ OPTIONS.update(path: path = ARGV.shift, scope: :system)
125
125
  no_args
126
126
 
127
127
  copy_list(:config) { |i| !File.basename(i).start_with?('test') }
128
128
  copy_list(:lang)
129
129
  copy_list(:dict) { |i| File.basename(i).start_with?('user') }
130
130
  copy_list(:sample)
131
+
132
+ puts "Demo directory successfully initialized at `#{path}'."
131
133
  end
132
134
 
133
135
  def do_rackup(doit = true)
@@ -240,7 +240,7 @@ class Lingo
240
240
  }
241
241
  end
242
242
 
243
- ShowProgress.new(self, src.size, verbose) { |progress| create {
243
+ ShowProgress.new(self, src, verbose) { |progress| create {
244
244
  src.each { |key, val|
245
245
  progress[src.pos]
246
246
 
@@ -30,8 +30,21 @@ class Lingo
30
30
 
31
31
  class ShowProgress < ShowProgress
32
32
 
33
- def initialize(obj, max, act = true)
34
- super(obj, max, obj.instance_variable_get(:@config)['name'], act, 'convert')
33
+ def initialize(obj, src, doit = true)
34
+ name = obj.instance_variable_get(:@config)['name']
35
+ super(obj, src.size, name, doit, 'convert', false)
36
+
37
+ if defined?(@cnt)
38
+ cnt, rej = src.rejected
39
+
40
+ if cnt > 0
41
+ print ' (', cnt, ' rejected'
42
+ print ': ', rej if rej
43
+ print ')'
44
+ end
45
+
46
+ print "\n"
47
+ end
35
48
  end
36
49
 
37
50
  end
@@ -74,7 +74,7 @@ class Lingo
74
74
  @wrd = "(?:#{Language::Char::ANY})+"
75
75
  @pat = /^#{@wrd}$/
76
76
 
77
- @pos = 0
77
+ @pos = @rej_cnt = 0
78
78
  end
79
79
 
80
80
  def size
@@ -95,6 +95,7 @@ class Lingo
95
95
  if length < 4096 && line =~ @pat
96
96
  yield convert_line(line, $1, $2)
97
97
  else
98
+ @rej_cnt += 1
98
99
  reject_file.puts(line) if reject_file
99
100
  end
100
101
  }
@@ -111,6 +112,10 @@ class Lingo
111
112
  db[key] = val
112
113
  end
113
114
 
115
+ def rejected
116
+ [@rej_cnt, @rej]
117
+ end
118
+
114
119
  end
115
120
 
116
121
  end