lingo 1.8.3 → 1.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ChangeLog +24 -2
- data/README +16 -10
- data/Rakefile +15 -6
- data/en/lingo-irr.txt +60 -60
- data/lib/lingo.rb +14 -6
- data/lib/lingo/app.rb +3 -0
- data/lib/lingo/attendee.rb +6 -0
- data/lib/lingo/attendee/abbreviator.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +1 -1
- data/lib/lingo/attendee/noneword_filter.rb +14 -5
- data/lib/lingo/attendee/sequencer.rb +63 -37
- data/lib/lingo/attendee/text_reader.rb +14 -15
- data/lib/lingo/attendee/text_writer.rb +3 -3
- data/lib/lingo/attendee/vector_filter.rb +5 -5
- data/lib/lingo/call.rb +1 -1
- data/lib/lingo/cli.rb +2 -2
- data/lib/lingo/ctl.rb +3 -1
- data/lib/lingo/database.rb +1 -1
- data/lib/lingo/database/show_progress.rb +15 -2
- data/lib/lingo/database/source.rb +6 -1
- data/lib/lingo/error.rb +28 -4
- data/lib/lingo/language/grammar.rb +7 -7
- data/lib/lingo/language/word.rb +6 -2
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/show_progress.rb +3 -2
- data/lib/lingo/srv.rb +15 -6
- data/lib/lingo/srv/lingosrv.cfg +1 -1
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web.rb +40 -10
- data/lib/lingo/web/lingoweb.cfg +1 -1
- data/lib/lingo/web/public/lingoweb.css +7 -4
- data/lib/lingo/web/views/index.erb +97 -39
- data/lingo.cfg +1 -1
- data/lir.cfg +1 -1
- data/test/attendee/ts_abbreviator.rb +22 -0
- data/test/attendee/ts_sequencer.rb +278 -1
- data/test/attendee/ts_text_reader.rb +34 -0
- data/test/attendee/ts_text_writer.rb +1 -1
- metadata +139 -133
data/lib/lingo/app.rb
CHANGED
@@ -24,9 +24,11 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
+
require 'json'
|
27
28
|
require 'optparse'
|
28
29
|
require 'shellwords'
|
29
30
|
require 'sinatra/base'
|
31
|
+
require 'sinatra/cookies'
|
30
32
|
|
31
33
|
class Lingo
|
32
34
|
|
@@ -37,6 +39,7 @@ class Lingo
|
|
37
39
|
def init_app(file, *args, &block)
|
38
40
|
set :root, File.chomp_ext(file)
|
39
41
|
parse_options(*args, &block)
|
42
|
+
helpers Sinatra::Cookies
|
40
43
|
end
|
41
44
|
|
42
45
|
def parse_options(lingo_options = false)
|
data/lib/lingo/attendee.rb
CHANGED
@@ -71,7 +71,11 @@ class Lingo
|
|
71
71
|
protected
|
72
72
|
|
73
73
|
def init
|
74
|
-
@
|
74
|
+
@sort = get_key('sort', !ENV['LINGO_NO_SORT'])
|
75
|
+
@dict = get_key('dict', false)
|
76
|
+
@dict = '=' if @dict == true
|
77
|
+
|
78
|
+
@nonewords = []
|
75
79
|
end
|
76
80
|
|
77
81
|
def control(cmd, param)
|
@@ -80,14 +84,17 @@ class Lingo
|
|
80
84
|
@nonewords.clear
|
81
85
|
when STR_CMD_EOL
|
82
86
|
skip_command
|
83
|
-
when STR_CMD_RECORD
|
84
|
-
send_nonewords unless @
|
87
|
+
when STR_CMD_RECORD
|
88
|
+
send_nonewords unless @dict
|
89
|
+
when STR_CMD_EOF
|
90
|
+
send_nonewords
|
85
91
|
end
|
86
92
|
end
|
87
93
|
|
88
94
|
def process(obj)
|
89
95
|
if obj.is_a?(Word) && obj.unknown?
|
90
96
|
non = Unicode.downcase(obj.form)
|
97
|
+
non = "#{non}#{@dict}#{non} #?" if @dict
|
91
98
|
@sort ? @nonewords << non : forward(non)
|
92
99
|
end
|
93
100
|
end
|
@@ -95,8 +102,10 @@ class Lingo
|
|
95
102
|
private
|
96
103
|
|
97
104
|
def send_nonewords
|
98
|
-
@nonewords.
|
99
|
-
|
105
|
+
unless @nonewords.empty?
|
106
|
+
@nonewords.uniq!
|
107
|
+
flush(@nonewords.sort!)
|
108
|
+
end
|
100
109
|
end
|
101
110
|
|
102
111
|
end
|
@@ -96,15 +96,20 @@ class Lingo
|
|
96
96
|
protected
|
97
97
|
|
98
98
|
def init
|
99
|
-
@stopper = get_array('stopper', DEFAULT_SKIP
|
100
|
-
|
99
|
+
@stopper = get_array('stopper', DEFAULT_SKIP)
|
100
|
+
.push(WA_UNKNOWN, WA_UNKMULPART)
|
101
101
|
|
102
|
-
@
|
103
|
-
|
104
|
-
|
102
|
+
@mwc = get_key('multiword', LA_MULTIWORD)
|
103
|
+
@cls = []
|
104
|
+
|
105
|
+
@seq = get_key('sequences').map { |str, fmt|
|
106
|
+
@cls.concat(cls = (str = str.downcase).scan(/[[:alpha:]]/))
|
107
|
+
|
108
|
+
(str =~ /\W/ ? [Regexp.new(str), nil] : [str, cls]).push(
|
109
|
+
fmt == true ? '|' : fmt ? fmt.gsub(/\d+/, '%\&$s') : nil)
|
105
110
|
}
|
106
111
|
|
107
|
-
@
|
112
|
+
@cls.uniq!
|
108
113
|
|
109
114
|
raise MissingConfigError.new(:sequences) if @seq.empty?
|
110
115
|
end
|
@@ -114,67 +119,88 @@ class Lingo
|
|
114
119
|
end
|
115
120
|
|
116
121
|
def process_buffer?
|
117
|
-
(obj = @buffer.last).is_a?(WordForm) && (obj.
|
118
|
-
obj.unknown? || @stopper.include?(obj.attr.upcase))
|
122
|
+
(obj = @buffer.last).is_a?(WordForm) && @stopper.include?(obj.attr)
|
119
123
|
end
|
120
124
|
|
121
125
|
def process_buffer
|
122
|
-
|
126
|
+
flush(@buffer.size < 2 ? @buffer : begin
|
127
|
+
arg, cls, mwc, unk = [[], buf = [], map = [], @seq], @cls, @mwc, %w[#]
|
128
|
+
|
129
|
+
iter, skip, rewind = @buffer.each_with_index, 0, lambda {
|
130
|
+
iter.rewind; skip.times { iter.next }; skip = 0
|
131
|
+
}
|
123
132
|
|
124
|
-
|
125
|
-
|
133
|
+
loop {
|
134
|
+
obj, idx = begin
|
135
|
+
iter.next
|
136
|
+
rescue StopIteration
|
137
|
+
raise unless skip > 0
|
138
|
+
|
139
|
+
buf.slice!(0, skip)
|
140
|
+
map.slice!(0, skip)
|
141
|
+
|
142
|
+
rewind.call
|
143
|
+
end
|
126
144
|
|
127
|
-
@buffer.each { |obj|
|
128
145
|
att = obj.is_a?(Word) && !obj.unknown? ? obj.attrs(false) : unk
|
129
146
|
|
130
|
-
(att &= cls).empty?
|
147
|
+
if (att &= cls).empty?
|
148
|
+
find_seq(*arg)
|
149
|
+
rewind.call if skip > 0
|
150
|
+
else
|
151
|
+
if n = obj.multiword_size(mwc)
|
152
|
+
n.times { iter.next }
|
153
|
+
skip = idx + 1
|
154
|
+
end
|
155
|
+
|
131
156
|
buf << obj
|
132
157
|
map << att
|
133
158
|
end
|
134
159
|
}
|
135
160
|
|
136
|
-
find_seq(
|
137
|
-
end
|
138
|
-
|
139
|
-
flush(@buffer.concat(matches))
|
161
|
+
@buffer.concat(find_seq(*arg))
|
162
|
+
end)
|
140
163
|
end
|
141
164
|
|
142
165
|
private
|
143
166
|
|
144
|
-
def find_seq(buf, map, seq
|
145
|
-
return if buf.empty?
|
167
|
+
def find_seq(mat, buf, map, seq)
|
168
|
+
return mat if buf.empty?
|
146
169
|
|
147
|
-
|
170
|
+
forms, args = [], []
|
148
171
|
|
149
|
-
map.replace(map.shift.product(*map))
|
150
|
-
map.map! { |i| i.join }
|
151
|
-
map.uniq!
|
172
|
+
map.replace(map.shift.product(*map)).map! { |i| i.join }.uniq!
|
152
173
|
|
153
174
|
map.each { |q|
|
154
|
-
seq.each { |
|
155
|
-
|
156
|
-
|
175
|
+
seq.each { |str, cls, fmt|
|
176
|
+
_str, _cls = [str, cls]
|
177
|
+
|
178
|
+
while pos = q.index(str, pos || 0)
|
179
|
+
_str, _cls = [$&, $&.chars] unless cls
|
157
180
|
|
158
|
-
|
181
|
+
args.clear
|
182
|
+
|
183
|
+
_cls.each_with_index { |wc, i|
|
159
184
|
buf[pos + i].lexicals.find { |l|
|
160
|
-
|
185
|
+
args[i] = l.form if l.attr == wc
|
161
186
|
} or break
|
162
187
|
} or next
|
163
188
|
|
164
|
-
|
189
|
+
forms << (
|
190
|
+
fmt =~ /\d/ ? fmt.gsub('%0$s', _str) % args :
|
191
|
+
fmt ? "#{_str}:#{args.join(fmt)}" : args.join(' ')
|
192
|
+
)
|
193
|
+
|
194
|
+
pos += 1
|
165
195
|
end
|
166
196
|
}
|
167
|
-
}
|
197
|
+
}.clear
|
168
198
|
|
169
|
-
|
170
|
-
|
171
|
-
forms.each { |form|
|
172
|
-
matches << Word.new_lexical(form, WA_SEQUENCE, LA_SEQUENCE)
|
173
|
-
}
|
174
|
-
}
|
199
|
+
forms.uniq!
|
200
|
+
forms.each { |f| mat << Word.new_lexical(f, WA_SEQUENCE, LA_SEQUENCE) }
|
175
201
|
|
176
202
|
buf.clear
|
177
|
-
|
203
|
+
mat
|
178
204
|
end
|
179
205
|
|
180
206
|
end
|
@@ -133,21 +133,24 @@ class Lingo
|
|
133
133
|
|
134
134
|
# Gibt eine Datei zeilenweise in den Ausgabekanal
|
135
135
|
def spool(path)
|
136
|
-
unless stdin = stdin?(path)
|
137
|
-
size = File.size(path) if @progress
|
138
|
-
end
|
139
|
-
|
140
136
|
forward(STR_CMD_FILE, path)
|
141
137
|
|
142
|
-
|
143
|
-
|
138
|
+
if stdin?(path)
|
139
|
+
io = @lingo.config.stdin.set_encoding(ENC)
|
140
|
+
io = StringIO.new(io.read) if @progress
|
141
|
+
else
|
142
|
+
io, name = File.open(path, 'rb', encoding: ENC), path
|
143
|
+
end
|
144
|
+
|
145
|
+
ShowProgress.new(self, @progress && io.size, name) { |progress|
|
146
|
+
filter(io) { |line, pos|
|
144
147
|
progress[pos]
|
145
148
|
|
146
149
|
line.chomp! if @chomp
|
147
150
|
next if line =~ @skip
|
148
151
|
|
149
152
|
if line =~ @lir
|
150
|
-
forward(STR_CMD_RECORD, $1)
|
153
|
+
forward(STR_CMD_RECORD, $1 || $&)
|
151
154
|
else
|
152
155
|
line.sub!(@cut, '') if @cut
|
153
156
|
forward(line) unless line.empty?
|
@@ -158,14 +161,10 @@ class Lingo
|
|
158
161
|
forward(STR_CMD_EOF, path)
|
159
162
|
end
|
160
163
|
|
161
|
-
def filter(
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
block = stdin || !@progress ?
|
167
|
-
lambda { |line| yield line, 0 } :
|
168
|
-
lambda { |line| yield line, io.pos }
|
164
|
+
def filter(io)
|
165
|
+
block = @progress ?
|
166
|
+
lambda { |line| yield line, io.pos } :
|
167
|
+
lambda { |line| yield line, 0 }
|
169
168
|
|
170
169
|
case @filter == true ? file_type(path, io) : @filter.to_s
|
171
170
|
when /html/i then io = filter_html(io)
|
@@ -93,7 +93,7 @@ class Lingo
|
|
93
93
|
def control(cmd, param)
|
94
94
|
case cmd
|
95
95
|
when STR_CMD_LIR
|
96
|
-
@lir = true
|
96
|
+
@lir = true unless @lir.nil?
|
97
97
|
when STR_CMD_FILE
|
98
98
|
@no_sep = true
|
99
99
|
|
@@ -105,9 +105,9 @@ class Lingo
|
|
105
105
|
|
106
106
|
@lir_rec_no, @lir_rec_buf = '', []
|
107
107
|
when STR_CMD_RECORD
|
108
|
-
@no_sep = true
|
109
|
-
|
110
108
|
if @lir
|
109
|
+
@no_sep = true
|
110
|
+
|
111
111
|
flush_lir_buffer
|
112
112
|
@lir_rec_no = param
|
113
113
|
end
|
@@ -93,12 +93,12 @@ class Lingo
|
|
93
93
|
@src = get_key('src', false)
|
94
94
|
@src = DEFAULT_SRC_SEP if @src == true
|
95
95
|
|
96
|
-
if sort = get_key('sort', 'normal')
|
96
|
+
if sort = get_key('sort', ENV['LINGO_NO_SORT'] ? false : 'normal')
|
97
97
|
@sort_format, @sort_method = sort.downcase.split('_', 2)
|
98
98
|
end
|
99
99
|
end
|
100
100
|
|
101
|
-
@vectors, @word_count = [], 0
|
101
|
+
@vectors, @word_count = [], 0
|
102
102
|
end
|
103
103
|
|
104
104
|
def control(cmd, param)
|
@@ -137,12 +137,12 @@ class Lingo
|
|
137
137
|
vec = cnt.sort_by { |v, c| [-c, v] }
|
138
138
|
|
139
139
|
if @sort_method == 'rel'
|
140
|
-
|
141
|
-
|
140
|
+
fmt, wc = '%6.5f', @word_count.to_f
|
141
|
+
vec.each { |v| v[1] /= wc }
|
142
142
|
end
|
143
143
|
|
144
144
|
if @sort_format == 'sto'
|
145
|
-
fmt, @word_count = "%s {#{fmt}}", 0
|
145
|
+
fmt, @word_count = "%s {#{fmt}}", 0
|
146
146
|
else
|
147
147
|
fmt.insert(1, '2$') << ' %1$s'
|
148
148
|
end
|
data/lib/lingo/call.rb
CHANGED
data/lib/lingo/cli.rb
CHANGED
@@ -24,11 +24,11 @@
|
|
24
24
|
###############################################################################
|
25
25
|
#++
|
26
26
|
|
27
|
-
require 'nuggets/
|
27
|
+
require 'nuggets/cli'
|
28
28
|
|
29
29
|
class Lingo
|
30
30
|
|
31
|
-
class CLI < ::
|
31
|
+
class CLI < ::Nuggets::CLI
|
32
32
|
|
33
33
|
class << self
|
34
34
|
|
data/lib/lingo/ctl.rb
CHANGED
@@ -121,13 +121,15 @@ Usage: #{PROG} <command> [arguments] [options]
|
|
121
121
|
end
|
122
122
|
|
123
123
|
def do_demo
|
124
|
-
OPTIONS.update(path: ARGV.shift, scope: :system)
|
124
|
+
OPTIONS.update(path: path = ARGV.shift, scope: :system)
|
125
125
|
no_args
|
126
126
|
|
127
127
|
copy_list(:config) { |i| !File.basename(i).start_with?('test') }
|
128
128
|
copy_list(:lang)
|
129
129
|
copy_list(:dict) { |i| File.basename(i).start_with?('user') }
|
130
130
|
copy_list(:sample)
|
131
|
+
|
132
|
+
puts "Demo directory successfully initialized at `#{path}'."
|
131
133
|
end
|
132
134
|
|
133
135
|
def do_rackup(doit = true)
|
data/lib/lingo/database.rb
CHANGED
@@ -30,8 +30,21 @@ class Lingo
|
|
30
30
|
|
31
31
|
class ShowProgress < ShowProgress
|
32
32
|
|
33
|
-
def initialize(obj,
|
34
|
-
|
33
|
+
def initialize(obj, src, doit = true)
|
34
|
+
name = obj.instance_variable_get(:@config)['name']
|
35
|
+
super(obj, src.size, name, doit, 'convert', false)
|
36
|
+
|
37
|
+
if defined?(@cnt)
|
38
|
+
cnt, rej = src.rejected
|
39
|
+
|
40
|
+
if cnt > 0
|
41
|
+
print ' (', cnt, ' rejected'
|
42
|
+
print ': ', rej if rej
|
43
|
+
print ')'
|
44
|
+
end
|
45
|
+
|
46
|
+
print "\n"
|
47
|
+
end
|
35
48
|
end
|
36
49
|
|
37
50
|
end
|
@@ -74,7 +74,7 @@ class Lingo
|
|
74
74
|
@wrd = "(?:#{Language::Char::ANY})+"
|
75
75
|
@pat = /^#{@wrd}$/
|
76
76
|
|
77
|
-
@pos = 0
|
77
|
+
@pos = @rej_cnt = 0
|
78
78
|
end
|
79
79
|
|
80
80
|
def size
|
@@ -95,6 +95,7 @@ class Lingo
|
|
95
95
|
if length < 4096 && line =~ @pat
|
96
96
|
yield convert_line(line, $1, $2)
|
97
97
|
else
|
98
|
+
@rej_cnt += 1
|
98
99
|
reject_file.puts(line) if reject_file
|
99
100
|
end
|
100
101
|
}
|
@@ -111,6 +112,10 @@ class Lingo
|
|
111
112
|
db[key] = val
|
112
113
|
end
|
113
114
|
|
115
|
+
def rejected
|
116
|
+
[@rej_cnt, @rej]
|
117
|
+
end
|
118
|
+
|
114
119
|
end
|
115
120
|
|
116
121
|
end
|