lingo 1.8.3 → 1.8.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ChangeLog +24 -2
- data/README +16 -10
- data/Rakefile +15 -6
- data/en/lingo-irr.txt +60 -60
- data/lib/lingo.rb +14 -6
- data/lib/lingo/app.rb +3 -0
- data/lib/lingo/attendee.rb +6 -0
- data/lib/lingo/attendee/abbreviator.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +1 -1
- data/lib/lingo/attendee/noneword_filter.rb +14 -5
- data/lib/lingo/attendee/sequencer.rb +63 -37
- data/lib/lingo/attendee/text_reader.rb +14 -15
- data/lib/lingo/attendee/text_writer.rb +3 -3
- data/lib/lingo/attendee/vector_filter.rb +5 -5
- data/lib/lingo/call.rb +1 -1
- data/lib/lingo/cli.rb +2 -2
- data/lib/lingo/ctl.rb +3 -1
- data/lib/lingo/database.rb +1 -1
- data/lib/lingo/database/show_progress.rb +15 -2
- data/lib/lingo/database/source.rb +6 -1
- data/lib/lingo/error.rb +28 -4
- data/lib/lingo/language/grammar.rb +7 -7
- data/lib/lingo/language/word.rb +6 -2
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/show_progress.rb +3 -2
- data/lib/lingo/srv.rb +15 -6
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web.rb +40 -10
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/lib/lingo/web/public/lingoweb.css +7 -4
- data/lib/lingo/web/views/index.erb +97 -39
- data/lingo.cfg +1 -1
- data/lir.cfg +1 -1
- data/test/attendee/ts_abbreviator.rb +22 -0
- data/test/attendee/ts_sequencer.rb +278 -1
- data/test/attendee/ts_text_reader.rb +34 -0
- data/test/attendee/ts_text_writer.rb +1 -1
- metadata +139 -133
data/lib/lingo/app.rb
CHANGED
@@ -24,9 +24,11 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'json'
|
27
28
|
require 'optparse'
|
28
29
|
require 'shellwords'
|
29
30
|
require 'sinatra/base'
|
31
|
+
require 'sinatra/cookies'
|
30
32
|
|
31
33
|
class Lingo
|
32
34
|
|
@@ -37,6 +39,7 @@ class Lingo
|
|
37
39
|
def init_app(file, *args, &block)
|
38
40
|
set :root, File.chomp_ext(file)
|
39
41
|
parse_options(*args, &block)
|
42
|
+
helpers Sinatra::Cookies
|
40
43
|
end
|
41
44
|
|
42
45
|
def parse_options(lingo_options = false)
|
data/lib/lingo/attendee.rb
CHANGED
@@ -71,7 +71,11 @@ class Lingo
|
|
71
71
|
protected
|
72
72
|
|
73
73
|
def init
|
74
|
-
@
|
74
|
+
@sort = get_key('sort', !ENV['LINGO_NO_SORT'])
|
75
|
+
@dict = get_key('dict', false)
|
76
|
+
@dict = '=' if @dict == true
|
77
|
+
|
78
|
+
@nonewords = []
|
75
79
|
end
|
76
80
|
|
77
81
|
def control(cmd, param)
|
@@ -80,14 +84,17 @@ class Lingo
|
|
80
84
|
@nonewords.clear
|
81
85
|
when STR_CMD_EOL
|
82
86
|
skip_command
|
83
|
-
when STR_CMD_RECORD
|
84
|
-
send_nonewords unless @
|
87
|
+
when STR_CMD_RECORD
|
88
|
+
send_nonewords unless @dict
|
89
|
+
when STR_CMD_EOF
|
90
|
+
send_nonewords
|
85
91
|
end
|
86
92
|
end
|
87
93
|
|
88
94
|
def process(obj)
|
89
95
|
if obj.is_a?(Word) && obj.unknown?
|
90
96
|
non = Unicode.downcase(obj.form)
|
97
|
+
non = "#{non}#{@dict}#{non} #?" if @dict
|
91
98
|
@sort ? @nonewords << non : forward(non)
|
92
99
|
end
|
93
100
|
end
|
@@ -95,8 +102,10 @@ class Lingo
|
|
95
102
|
private
|
96
103
|
|
97
104
|
def send_nonewords
|
98
|
-
@nonewords.
|
99
|
-
|
105
|
+
unless @nonewords.empty?
|
106
|
+
@nonewords.uniq!
|
107
|
+
flush(@nonewords.sort!)
|
108
|
+
end
|
100
109
|
end
|
101
110
|
|
102
111
|
end
|
@@ -96,15 +96,20 @@ class Lingo
|
|
96
96
|
protected
|
97
97
|
|
98
98
|
def init
|
99
|
-
@stopper = get_array('stopper', DEFAULT_SKIP
|
100
|
-
|
99
|
+
@stopper = get_array('stopper', DEFAULT_SKIP)
|
100
|
+
.push(WA_UNKNOWN, WA_UNKMULPART)
|
101
101
|
|
102
|
-
@
|
103
|
-
|
104
|
-
|
102
|
+
@mwc = get_key('multiword', LA_MULTIWORD)
|
103
|
+
@cls = []
|
104
|
+
|
105
|
+
@seq = get_key('sequences').map { |str, fmt|
|
106
|
+
@cls.concat(cls = (str = str.downcase).scan(/[[:alpha:]]/))
|
107
|
+
|
108
|
+
(str =~ /\W/ ? [Regexp.new(str), nil] : [str, cls]).push(
|
109
|
+
fmt == true ? '|' : fmt ? fmt.gsub(/\d+/, '%\&$s') : nil)
|
105
110
|
}
|
106
111
|
|
107
|
-
@
|
112
|
+
@cls.uniq!
|
108
113
|
|
109
114
|
raise MissingConfigError.new(:sequences) if @seq.empty?
|
110
115
|
end
|
@@ -114,67 +119,88 @@ class Lingo
|
|
114
119
|
end
|
115
120
|
|
116
121
|
def process_buffer?
|
117
|
-
(obj = @buffer.last).is_a?(WordForm) && (obj.
|
118
|
-
obj.unknown? || @stopper.include?(obj.attr.upcase))
|
122
|
+
(obj = @buffer.last).is_a?(WordForm) && @stopper.include?(obj.attr)
|
119
123
|
end
|
120
124
|
|
121
125
|
def process_buffer
|
122
|
-
|
126
|
+
flush(@buffer.size < 2 ? @buffer : begin
|
127
|
+
arg, cls, mwc, unk = [[], buf = [], map = [], @seq], @cls, @mwc, %w[#]
|
128
|
+
|
129
|
+
iter, skip, rewind = @buffer.each_with_index, 0, lambda {
|
130
|
+
iter.rewind; skip.times { iter.next }; skip = 0
|
131
|
+
}
|
123
132
|
|
124
|
-
|
125
|
-
|
133
|
+
loop {
|
134
|
+
obj, idx = begin
|
135
|
+
iter.next
|
136
|
+
rescue StopIteration
|
137
|
+
raise unless skip > 0
|
138
|
+
|
139
|
+
buf.slice!(0, skip)
|
140
|
+
map.slice!(0, skip)
|
141
|
+
|
142
|
+
rewind.call
|
143
|
+
end
|
126
144
|
|
127
|
-
@buffer.each { |obj|
|
128
145
|
att = obj.is_a?(Word) && !obj.unknown? ? obj.attrs(false) : unk
|
129
146
|
|
130
|
-
(att &= cls).empty?
|
147
|
+
if (att &= cls).empty?
|
148
|
+
find_seq(*arg)
|
149
|
+
rewind.call if skip > 0
|
150
|
+
else
|
151
|
+
if n = obj.multiword_size(mwc)
|
152
|
+
n.times { iter.next }
|
153
|
+
skip = idx + 1
|
154
|
+
end
|
155
|
+
|
131
156
|
buf << obj
|
132
157
|
map << att
|
133
158
|
end
|
134
159
|
}
|
135
160
|
|
136
|
-
find_seq(
|
137
|
-
end
|
138
|
-
|
139
|
-
flush(@buffer.concat(matches))
|
161
|
+
@buffer.concat(find_seq(*arg))
|
162
|
+
end)
|
140
163
|
end
|
141
164
|
|
142
165
|
private
|
143
166
|
|
144
|
-
def find_seq(buf, map, seq
|
145
|
-
return if buf.empty?
|
167
|
+
def find_seq(mat, buf, map, seq)
|
168
|
+
return mat if buf.empty?
|
146
169
|
|
147
|
-
|
170
|
+
forms, args = [], []
|
148
171
|
|
149
|
-
map.replace(map.shift.product(*map))
|
150
|
-
map.map! { |i| i.join }
|
151
|
-
map.uniq!
|
172
|
+
map.replace(map.shift.product(*map)).map! { |i| i.join }.uniq!
|
152
173
|
|
153
174
|
map.each { |q|
|
154
|
-
seq.each { |
|
155
|
-
|
156
|
-
|
175
|
+
seq.each { |str, cls, fmt|
|
176
|
+
_str, _cls = [str, cls]
|
177
|
+
|
178
|
+
while pos = q.index(str, pos || 0)
|
179
|
+
_str, _cls = [$&, $&.chars] unless cls
|
157
180
|
|
158
|
-
|
181
|
+
args.clear
|
182
|
+
|
183
|
+
_cls.each_with_index { |wc, i|
|
159
184
|
buf[pos + i].lexicals.find { |l|
|
160
|
-
|
185
|
+
args[i] = l.form if l.attr == wc
|
161
186
|
} or break
|
162
187
|
} or next
|
163
188
|
|
164
|
-
|
189
|
+
forms << (
|
190
|
+
fmt =~ /\d/ ? fmt.gsub('%0$s', _str) % args :
|
191
|
+
fmt ? "#{_str}:#{args.join(fmt)}" : args.join(' ')
|
192
|
+
)
|
193
|
+
|
194
|
+
pos += 1
|
165
195
|
end
|
166
196
|
}
|
167
|
-
}
|
197
|
+
}.clear
|
168
198
|
|
169
|
-
|
170
|
-
|
171
|
-
forms.each { |form|
|
172
|
-
matches << Word.new_lexical(form, WA_SEQUENCE, LA_SEQUENCE)
|
173
|
-
}
|
174
|
-
}
|
199
|
+
forms.uniq!
|
200
|
+
forms.each { |f| mat << Word.new_lexical(f, WA_SEQUENCE, LA_SEQUENCE) }
|
175
201
|
|
176
202
|
buf.clear
|
177
|
-
|
203
|
+
mat
|
178
204
|
end
|
179
205
|
|
180
206
|
end
|
@@ -133,21 +133,24 @@ class Lingo
|
|
133
133
|
|
134
134
|
# Gibt eine Datei zeilenweise in den Ausgabekanal
|
135
135
|
def spool(path)
|
136
|
-
unless stdin = stdin?(path)
|
137
|
-
size = File.size(path) if @progress
|
138
|
-
end
|
139
|
-
|
140
136
|
forward(STR_CMD_FILE, path)
|
141
137
|
|
142
|
-
|
143
|
-
|
138
|
+
if stdin?(path)
|
139
|
+
io = @lingo.config.stdin.set_encoding(ENC)
|
140
|
+
io = StringIO.new(io.read) if @progress
|
141
|
+
else
|
142
|
+
io, name = File.open(path, 'rb', encoding: ENC), path
|
143
|
+
end
|
144
|
+
|
145
|
+
ShowProgress.new(self, @progress && io.size, name) { |progress|
|
146
|
+
filter(io) { |line, pos|
|
144
147
|
progress[pos]
|
145
148
|
|
146
149
|
line.chomp! if @chomp
|
147
150
|
next if line =~ @skip
|
148
151
|
|
149
152
|
if line =~ @lir
|
150
|
-
forward(STR_CMD_RECORD, $1)
|
153
|
+
forward(STR_CMD_RECORD, $1 || $&)
|
151
154
|
else
|
152
155
|
line.sub!(@cut, '') if @cut
|
153
156
|
forward(line) unless line.empty?
|
@@ -158,14 +161,10 @@ class Lingo
|
|
158
161
|
forward(STR_CMD_EOF, path)
|
159
162
|
end
|
160
163
|
|
161
|
-
def filter(
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
block = stdin || !@progress ?
|
167
|
-
lambda { |line| yield line, 0 } :
|
168
|
-
lambda { |line| yield line, io.pos }
|
164
|
+
def filter(io)
|
165
|
+
block = @progress ?
|
166
|
+
lambda { |line| yield line, io.pos } :
|
167
|
+
lambda { |line| yield line, 0 }
|
169
168
|
|
170
169
|
case @filter == true ? file_type(path, io) : @filter.to_s
|
171
170
|
when /html/i then io = filter_html(io)
|
@@ -93,7 +93,7 @@ class Lingo
|
|
93
93
|
def control(cmd, param)
|
94
94
|
case cmd
|
95
95
|
when STR_CMD_LIR
|
96
|
-
@lir = true
|
96
|
+
@lir = true unless @lir.nil?
|
97
97
|
when STR_CMD_FILE
|
98
98
|
@no_sep = true
|
99
99
|
|
@@ -105,9 +105,9 @@ class Lingo
|
|
105
105
|
|
106
106
|
@lir_rec_no, @lir_rec_buf = '', []
|
107
107
|
when STR_CMD_RECORD
|
108
|
-
@no_sep = true
|
109
|
-
|
110
108
|
if @lir
|
109
|
+
@no_sep = true
|
110
|
+
|
111
111
|
flush_lir_buffer
|
112
112
|
@lir_rec_no = param
|
113
113
|
end
|
@@ -93,12 +93,12 @@ class Lingo
|
|
93
93
|
@src = get_key('src', false)
|
94
94
|
@src = DEFAULT_SRC_SEP if @src == true
|
95
95
|
|
96
|
-
if sort = get_key('sort', 'normal')
|
96
|
+
if sort = get_key('sort', ENV['LINGO_NO_SORT'] ? false : 'normal')
|
97
97
|
@sort_format, @sort_method = sort.downcase.split('_', 2)
|
98
98
|
end
|
99
99
|
end
|
100
100
|
|
101
|
-
@vectors, @word_count = [], 0
|
101
|
+
@vectors, @word_count = [], 0
|
102
102
|
end
|
103
103
|
|
104
104
|
def control(cmd, param)
|
@@ -137,12 +137,12 @@ class Lingo
|
|
137
137
|
vec = cnt.sort_by { |v, c| [-c, v] }
|
138
138
|
|
139
139
|
if @sort_method == 'rel'
|
140
|
-
|
141
|
-
|
140
|
+
fmt, wc = '%6.5f', @word_count.to_f
|
141
|
+
vec.each { |v| v[1] /= wc }
|
142
142
|
end
|
143
143
|
|
144
144
|
if @sort_format == 'sto'
|
145
|
-
fmt, @word_count = "%s {#{fmt}}", 0
|
145
|
+
fmt, @word_count = "%s {#{fmt}}", 0
|
146
146
|
else
|
147
147
|
fmt.insert(1, '2$') << ' %1$s'
|
148
148
|
end
|
data/lib/lingo/call.rb
CHANGED
data/lib/lingo/cli.rb
CHANGED
@@ -24,11 +24,11 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
-
require 'nuggets/
|
27
|
+
require 'nuggets/cli'
|
28
28
|
|
29
29
|
class Lingo
|
30
30
|
|
31
|
-
class CLI < ::
|
31
|
+
class CLI < ::Nuggets::CLI
|
32
32
|
|
33
33
|
class << self
|
34
34
|
|
data/lib/lingo/ctl.rb
CHANGED
@@ -121,13 +121,15 @@ Usage: #{PROG} <command> [arguments] [options]
|
|
121
121
|
end
|
122
122
|
|
123
123
|
def do_demo
|
124
|
-
OPTIONS.update(path: ARGV.shift, scope: :system)
|
124
|
+
OPTIONS.update(path: path = ARGV.shift, scope: :system)
|
125
125
|
no_args
|
126
126
|
|
127
127
|
copy_list(:config) { |i| !File.basename(i).start_with?('test') }
|
128
128
|
copy_list(:lang)
|
129
129
|
copy_list(:dict) { |i| File.basename(i).start_with?('user') }
|
130
130
|
copy_list(:sample)
|
131
|
+
|
132
|
+
puts "Demo directory successfully initialized at `#{path}'."
|
131
133
|
end
|
132
134
|
|
133
135
|
def do_rackup(doit = true)
|
data/lib/lingo/database.rb
CHANGED
@@ -30,8 +30,21 @@ class Lingo
|
|
30
30
|
|
31
31
|
class ShowProgress < ShowProgress
|
32
32
|
|
33
|
-
def initialize(obj,
|
34
|
-
|
33
|
+
def initialize(obj, src, doit = true)
|
34
|
+
name = obj.instance_variable_get(:@config)['name']
|
35
|
+
super(obj, src.size, name, doit, 'convert', false)
|
36
|
+
|
37
|
+
if defined?(@cnt)
|
38
|
+
cnt, rej = src.rejected
|
39
|
+
|
40
|
+
if cnt > 0
|
41
|
+
print ' (', cnt, ' rejected'
|
42
|
+
print ': ', rej if rej
|
43
|
+
print ')'
|
44
|
+
end
|
45
|
+
|
46
|
+
print "\n"
|
47
|
+
end
|
35
48
|
end
|
36
49
|
|
37
50
|
end
|
@@ -74,7 +74,7 @@ class Lingo
|
|
74
74
|
@wrd = "(?:#{Language::Char::ANY})+"
|
75
75
|
@pat = /^#{@wrd}$/
|
76
76
|
|
77
|
-
@pos = 0
|
77
|
+
@pos = @rej_cnt = 0
|
78
78
|
end
|
79
79
|
|
80
80
|
def size
|
@@ -95,6 +95,7 @@ class Lingo
|
|
95
95
|
if length < 4096 && line =~ @pat
|
96
96
|
yield convert_line(line, $1, $2)
|
97
97
|
else
|
98
|
+
@rej_cnt += 1
|
98
99
|
reject_file.puts(line) if reject_file
|
99
100
|
end
|
100
101
|
}
|
@@ -111,6 +112,10 @@ class Lingo
|
|
111
112
|
db[key] = val
|
112
113
|
end
|
113
114
|
|
115
|
+
def rejected
|
116
|
+
[@rej_cnt, @rej]
|
117
|
+
end
|
118
|
+
|
114
119
|
end
|
115
120
|
|
116
121
|
end
|