lingo 1.8.3 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/lingo/app.rb CHANGED
@@ -24,9 +24,11 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
+ require 'json'
27
28
  require 'optparse'
28
29
  require 'shellwords'
29
30
  require 'sinatra/base'
31
+ require 'sinatra/cookies'
30
32
 
31
33
  class Lingo
32
34
 
@@ -37,6 +39,7 @@ class Lingo
37
39
  def init_app(file, *args, &block)
38
40
  set :root, File.chomp_ext(file)
39
41
  parse_options(*args, &block)
42
+ helpers Sinatra::Cookies
40
43
  end
41
44
 
42
45
  def parse_options(lingo_options = false)
@@ -173,6 +173,12 @@ class Lingo
173
173
  @lingo.warn(*msg)
174
174
  end
175
175
 
176
+ def require_lib(lib)
177
+ require lib
178
+ rescue LoadError => err
179
+ raise LibraryLoadError.new(self.class, lib, err)
180
+ end
181
+
176
182
  end
177
183
 
178
184
  end
@@ -77,7 +77,7 @@ class Lingo
77
77
  end
78
78
 
79
79
  def control(cmd, param)
80
- send_abbr(nil) if [STR_CMD_RECORD, STR_CMD_EOF].include?(cmd)
80
+ send_abbr(@abbr) if [STR_CMD_RECORD, STR_CMD_EOF].include?(cmd)
81
81
  end
82
82
 
83
83
  def process(obj)
@@ -180,7 +180,7 @@ class Lingo
180
180
  seq = []
181
181
 
182
182
  @buffer.each { |obj|
183
- next [obj] unless obj.is_a?(WordForm)
183
+ next seq << [obj] unless obj.is_a?(WordForm)
184
184
  next if (form = obj.form) == CHAR_PUNCT
185
185
 
186
186
  w = find_word(form, @lex_dic, @lex_gra)
@@ -71,7 +71,11 @@ class Lingo
71
71
  protected
72
72
 
73
73
  def init
74
- @nonewords, @sort = [], get_key('sort', true)
74
+ @sort = get_key('sort', !ENV['LINGO_NO_SORT'])
75
+ @dict = get_key('dict', false)
76
+ @dict = '=' if @dict == true
77
+
78
+ @nonewords = []
75
79
  end
76
80
 
77
81
  def control(cmd, param)
@@ -80,14 +84,17 @@ class Lingo
80
84
  @nonewords.clear
81
85
  when STR_CMD_EOL
82
86
  skip_command
83
- when STR_CMD_RECORD, STR_CMD_EOF
84
- send_nonewords unless @nonewords.empty?
87
+ when STR_CMD_RECORD
88
+ send_nonewords unless @dict
89
+ when STR_CMD_EOF
90
+ send_nonewords
85
91
  end
86
92
  end
87
93
 
88
94
  def process(obj)
89
95
  if obj.is_a?(Word) && obj.unknown?
90
96
  non = Unicode.downcase(obj.form)
97
+ non = "#{non}#{@dict}#{non} #?" if @dict
91
98
  @sort ? @nonewords << non : forward(non)
92
99
  end
93
100
  end
@@ -95,8 +102,10 @@ class Lingo
95
102
  private
96
103
 
97
104
  def send_nonewords
98
- @nonewords.uniq!
99
- flush(@nonewords.sort!)
105
+ unless @nonewords.empty?
106
+ @nonewords.uniq!
107
+ flush(@nonewords.sort!)
108
+ end
100
109
  end
101
110
 
102
111
  end
@@ -96,15 +96,20 @@ class Lingo
96
96
  protected
97
97
 
98
98
  def init
99
- @stopper = get_array('stopper', DEFAULT_SKIP, :upcase)
100
- @classes = []
99
+ @stopper = get_array('stopper', DEFAULT_SKIP)
100
+ .push(WA_UNKNOWN, WA_UNKMULPART)
101
101
 
102
- @seq = get_key('sequences').map { |string, format|
103
- @classes.concat(classes = string.downcase!.chars.to_a)
104
- [string, classes, format]
102
+ @mwc = get_key('multiword', LA_MULTIWORD)
103
+ @cls = []
104
+
105
+ @seq = get_key('sequences').map { |str, fmt|
106
+ @cls.concat(cls = (str = str.downcase).scan(/[[:alpha:]]/))
107
+
108
+ (str =~ /\W/ ? [Regexp.new(str), nil] : [str, cls]).push(
109
+ fmt == true ? '|' : fmt ? fmt.gsub(/\d+/, '%\&$s') : nil)
105
110
  }
106
111
 
107
- @classes.uniq!
112
+ @cls.uniq!
108
113
 
109
114
  raise MissingConfigError.new(:sequences) if @seq.empty?
110
115
  end
@@ -114,67 +119,88 @@ class Lingo
114
119
  end
115
120
 
116
121
  def process_buffer?
117
- (obj = @buffer.last).is_a?(WordForm) && (obj.is_a?(Word) &&
118
- obj.unknown? || @stopper.include?(obj.attr.upcase))
122
+ (obj = @buffer.last).is_a?(WordForm) && @stopper.include?(obj.attr)
119
123
  end
120
124
 
121
125
  def process_buffer
122
- matches = []
126
+ flush(@buffer.size < 2 ? @buffer : begin
127
+ arg, cls, mwc, unk = [[], buf = [], map = [], @seq], @cls, @mwc, %w[#]
128
+
129
+ iter, skip, rewind = @buffer.each_with_index, 0, lambda {
130
+ iter.rewind; skip.times { iter.next }; skip = 0
131
+ }
123
132
 
124
- if @buffer.size > 1
125
- buf, map, seq, cls, unk = [], [], @seq, @classes, %w[#]
133
+ loop {
134
+ obj, idx = begin
135
+ iter.next
136
+ rescue StopIteration
137
+ raise unless skip > 0
138
+
139
+ buf.slice!(0, skip)
140
+ map.slice!(0, skip)
141
+
142
+ rewind.call
143
+ end
126
144
 
127
- @buffer.each { |obj|
128
145
  att = obj.is_a?(Word) && !obj.unknown? ? obj.attrs(false) : unk
129
146
 
130
- (att &= cls).empty? ? find_seq(buf, map, seq, matches) : begin
147
+ if (att &= cls).empty?
148
+ find_seq(*arg)
149
+ rewind.call if skip > 0
150
+ else
151
+ if n = obj.multiword_size(mwc)
152
+ n.times { iter.next }
153
+ skip = idx + 1
154
+ end
155
+
131
156
  buf << obj
132
157
  map << att
133
158
  end
134
159
  }
135
160
 
136
- find_seq(buf, map, seq, matches)
137
- end
138
-
139
- flush(@buffer.concat(matches))
161
+ @buffer.concat(find_seq(*arg))
162
+ end)
140
163
  end
141
164
 
142
165
  private
143
166
 
144
- def find_seq(buf, map, seq, matches)
145
- return if buf.empty?
167
+ def find_seq(mat, buf, map, seq)
168
+ return mat if buf.empty?
146
169
 
147
- match = Hash.new { |h, k| h[k] = [] }
170
+ forms, args = [], []
148
171
 
149
- map.replace(map.shift.product(*map))
150
- map.map! { |i| i.join }
151
- map.uniq!
172
+ map.replace(map.shift.product(*map)).map! { |i| i.join }.uniq!
152
173
 
153
174
  map.each { |q|
154
- seq.each { |string, classes, format|
155
- while pos = q.index(string, pos || 0)
156
- form = format.dup
175
+ seq.each { |str, cls, fmt|
176
+ _str, _cls = [str, cls]
177
+
178
+ while pos = q.index(str, pos || 0)
179
+ _str, _cls = [$&, $&.chars] unless cls
157
180
 
158
- classes.each_with_index { |wc, i|
181
+ args.clear
182
+
183
+ _cls.each_with_index { |wc, i|
159
184
  buf[pos + i].lexicals.find { |l|
160
- form.gsub!(i.succ.to_s, l.form) if l.attr == wc
185
+ args[i] = l.form if l.attr == wc
161
186
  } or break
162
187
  } or next
163
188
 
164
- match[pos += 1] << form
189
+ forms << (
190
+ fmt =~ /\d/ ? fmt.gsub('%0$s', _str) % args :
191
+ fmt ? "#{_str}:#{args.join(fmt)}" : args.join(' ')
192
+ )
193
+
194
+ pos += 1
165
195
  end
166
196
  }
167
- }
197
+ }.clear
168
198
 
169
- match.each_value { |forms|
170
- forms.uniq!
171
- forms.each { |form|
172
- matches << Word.new_lexical(form, WA_SEQUENCE, LA_SEQUENCE)
173
- }
174
- }
199
+ forms.uniq!
200
+ forms.each { |f| mat << Word.new_lexical(f, WA_SEQUENCE, LA_SEQUENCE) }
175
201
 
176
202
  buf.clear
177
- map.clear
203
+ mat
178
204
  end
179
205
 
180
206
  end
@@ -133,21 +133,24 @@ class Lingo
133
133
 
134
134
  # Gibt eine Datei zeilenweise in den Ausgabekanal
135
135
  def spool(path)
136
- unless stdin = stdin?(path)
137
- size = File.size(path) if @progress
138
- end
139
-
140
136
  forward(STR_CMD_FILE, path)
141
137
 
142
- ShowProgress.new(self, size, path) { |progress|
143
- filter(path, stdin) { |line, pos|
138
+ if stdin?(path)
139
+ io = @lingo.config.stdin.set_encoding(ENC)
140
+ io = StringIO.new(io.read) if @progress
141
+ else
142
+ io, name = File.open(path, 'rb', encoding: ENC), path
143
+ end
144
+
145
+ ShowProgress.new(self, @progress && io.size, name) { |progress|
146
+ filter(io) { |line, pos|
144
147
  progress[pos]
145
148
 
146
149
  line.chomp! if @chomp
147
150
  next if line =~ @skip
148
151
 
149
152
  if line =~ @lir
150
- forward(STR_CMD_RECORD, $1)
153
+ forward(STR_CMD_RECORD, $1 || $&)
151
154
  else
152
155
  line.sub!(@cut, '') if @cut
153
156
  forward(line) unless line.empty?
@@ -158,14 +161,10 @@ class Lingo
158
161
  forward(STR_CMD_EOF, path)
159
162
  end
160
163
 
161
- def filter(path, stdin = stdin?(path))
162
- io = stdin ?
163
- @lingo.config.stdin.set_encoding(ENC) :
164
- File.open(path, 'rb', encoding: ENC)
165
-
166
- block = stdin || !@progress ?
167
- lambda { |line| yield line, 0 } :
168
- lambda { |line| yield line, io.pos }
164
+ def filter(io)
165
+ block = @progress ?
166
+ lambda { |line| yield line, io.pos } :
167
+ lambda { |line| yield line, 0 }
169
168
 
170
169
  case @filter == true ? file_type(path, io) : @filter.to_s
171
170
  when /html/i then io = filter_html(io)
@@ -93,7 +93,7 @@ class Lingo
93
93
  def control(cmd, param)
94
94
  case cmd
95
95
  when STR_CMD_LIR
96
- @lir = true
96
+ @lir = true unless @lir.nil?
97
97
  when STR_CMD_FILE
98
98
  @no_sep = true
99
99
 
@@ -105,9 +105,9 @@ class Lingo
105
105
 
106
106
  @lir_rec_no, @lir_rec_buf = '', []
107
107
  when STR_CMD_RECORD
108
- @no_sep = true
109
-
110
108
  if @lir
109
+ @no_sep = true
110
+
111
111
  flush_lir_buffer
112
112
  @lir_rec_no = param
113
113
  end
@@ -93,12 +93,12 @@ class Lingo
93
93
  @src = get_key('src', false)
94
94
  @src = DEFAULT_SRC_SEP if @src == true
95
95
 
96
- if sort = get_key('sort', 'normal')
96
+ if sort = get_key('sort', ENV['LINGO_NO_SORT'] ? false : 'normal')
97
97
  @sort_format, @sort_method = sort.downcase.split('_', 2)
98
98
  end
99
99
  end
100
100
 
101
- @vectors, @word_count = [], 0.0
101
+ @vectors, @word_count = [], 0
102
102
  end
103
103
 
104
104
  def control(cmd, param)
@@ -137,12 +137,12 @@ class Lingo
137
137
  vec = cnt.sort_by { |v, c| [-c, v] }
138
138
 
139
139
  if @sort_method == 'rel'
140
- vec.each { |v| v[1] /= @word_count }
141
- fmt = '%6.5f'
140
+ fmt, wc = '%6.5f', @word_count.to_f
141
+ vec.each { |v| v[1] /= wc }
142
142
  end
143
143
 
144
144
  if @sort_format == 'sto'
145
- fmt, @word_count = "%s {#{fmt}}", 0.0
145
+ fmt, @word_count = "%s {#{fmt}}", 0
146
146
  else
147
147
  fmt.insert(1, '2$') << ' %1$s'
148
148
  end
data/lib/lingo/call.rb CHANGED
@@ -66,7 +66,7 @@ class Lingo
66
66
  }
67
67
 
68
68
  block_given? ? res.map! { |i| yield i } : begin
69
- res.sort!
69
+ Lingo.sort!(res)
70
70
  res.uniq!
71
71
  res
72
72
  end
data/lib/lingo/cli.rb CHANGED
@@ -24,11 +24,11 @@
24
24
  ###############################################################################
25
25
  #++
26
26
 
27
- require 'nuggets/util/cli'
27
+ require 'nuggets/cli'
28
28
 
29
29
  class Lingo
30
30
 
31
- class CLI < ::Util::CLI
31
+ class CLI < ::Nuggets::CLI
32
32
 
33
33
  class << self
34
34
 
data/lib/lingo/ctl.rb CHANGED
@@ -121,13 +121,15 @@ Usage: #{PROG} <command> [arguments] [options]
121
121
  end
122
122
 
123
123
  def do_demo
124
- OPTIONS.update(path: ARGV.shift, scope: :system)
124
+ OPTIONS.update(path: path = ARGV.shift, scope: :system)
125
125
  no_args
126
126
 
127
127
  copy_list(:config) { |i| !File.basename(i).start_with?('test') }
128
128
  copy_list(:lang)
129
129
  copy_list(:dict) { |i| File.basename(i).start_with?('user') }
130
130
  copy_list(:sample)
131
+
132
+ puts "Demo directory successfully initialized at `#{path}'."
131
133
  end
132
134
 
133
135
  def do_rackup(doit = true)
@@ -240,7 +240,7 @@ class Lingo
240
240
  }
241
241
  end
242
242
 
243
- ShowProgress.new(self, src.size, verbose) { |progress| create {
243
+ ShowProgress.new(self, src, verbose) { |progress| create {
244
244
  src.each { |key, val|
245
245
  progress[src.pos]
246
246
 
@@ -30,8 +30,21 @@ class Lingo
30
30
 
31
31
  class ShowProgress < ShowProgress
32
32
 
33
- def initialize(obj, max, act = true)
34
- super(obj, max, obj.instance_variable_get(:@config)['name'], act, 'convert')
33
+ def initialize(obj, src, doit = true)
34
+ name = obj.instance_variable_get(:@config)['name']
35
+ super(obj, src.size, name, doit, 'convert', false)
36
+
37
+ if defined?(@cnt)
38
+ cnt, rej = src.rejected
39
+
40
+ if cnt > 0
41
+ print ' (', cnt, ' rejected'
42
+ print ': ', rej if rej
43
+ print ')'
44
+ end
45
+
46
+ print "\n"
47
+ end
35
48
  end
36
49
 
37
50
  end
@@ -74,7 +74,7 @@ class Lingo
74
74
  @wrd = "(?:#{Language::Char::ANY})+"
75
75
  @pat = /^#{@wrd}$/
76
76
 
77
- @pos = 0
77
+ @pos = @rej_cnt = 0
78
78
  end
79
79
 
80
80
  def size
@@ -95,6 +95,7 @@ class Lingo
95
95
  if length < 4096 && line =~ @pat
96
96
  yield convert_line(line, $1, $2)
97
97
  else
98
+ @rej_cnt += 1
98
99
  reject_file.puts(line) if reject_file
99
100
  end
100
101
  }
@@ -111,6 +112,10 @@ class Lingo
111
112
  db[key] = val
112
113
  end
113
114
 
115
+ def rejected
116
+ [@rej_cnt, @rej]
117
+ end
118
+
114
119
  end
115
120
 
116
121
  end