RubyGems - lingo - Versions diffs - 1.8.3 → 1.8.4 - Mend

lingo 1.8.3 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

checksums.yaml +7 -0
data/ChangeLog +24 -2
data/README +16 -10
data/Rakefile +15 -6
data/en/lingo-irr.txt +60 -60
data/lib/lingo.rb +14 -6
data/lib/lingo/app.rb +3 -0
data/lib/lingo/attendee.rb +6 -0
data/lib/lingo/attendee/abbreviator.rb +1 -1
data/lib/lingo/attendee/multi_worder.rb +1 -1
data/lib/lingo/attendee/noneword_filter.rb +14 -5
data/lib/lingo/attendee/sequencer.rb +63 -37
data/lib/lingo/attendee/text_reader.rb +14 -15
data/lib/lingo/attendee/text_writer.rb +3 -3
data/lib/lingo/attendee/vector_filter.rb +5 -5
data/lib/lingo/call.rb +1 -1
data/lib/lingo/cli.rb +2 -2
data/lib/lingo/ctl.rb +3 -1
data/lib/lingo/database.rb +1 -1
data/lib/lingo/database/show_progress.rb +15 -2
data/lib/lingo/database/source.rb +6 -1
data/lib/lingo/error.rb +28 -4
data/lib/lingo/language/grammar.rb +7 -7
data/lib/lingo/language/word.rb +6 -2
data/lib/lingo/language/word_form.rb +1 -1
data/lib/lingo/show_progress.rb +3 -2
data/lib/lingo/srv.rb +15 -6
data/lib/lingo/srv/lingosrv.cfg +1 -1
data/lib/lingo/version.rb +1 -1
data/lib/lingo/web.rb +40 -10
data/lib/lingo/web/lingoweb.cfg +1 -1
data/lib/lingo/web/public/lingoweb.css +7 -4
data/lib/lingo/web/views/index.erb +97 -39
data/lingo.cfg +1 -1
data/lir.cfg +1 -1
data/test/attendee/ts_abbreviator.rb +22 -0
data/test/attendee/ts_sequencer.rb +278 -1
data/test/attendee/ts_text_reader.rb +34 -0
data/test/attendee/ts_text_writer.rb +1 -1
metadata +139 -133

data/lib/lingo/app.rb CHANGED Viewed

@@ -24,9 +24,11 @@
 ###############################################################################
 #++
+require 'json'
 require 'optparse'
 require 'shellwords'
 require 'sinatra/base'
+require 'sinatra/cookies'
 class Lingo
@@ -37,6 +39,7 @@ class Lingo
       def init_app(file, *args, &block)
         set :root, File.chomp_ext(file)
         parse_options(*args, &block)
+        helpers Sinatra::Cookies
       end
       def parse_options(lingo_options = false)

data/lib/lingo/attendee.rb CHANGED Viewed

@@ -173,6 +173,12 @@ class Lingo
       @lingo.warn(*msg)
     end
+    def require_lib(lib)
+      require lib
+    rescue LoadError => err
+      raise LibraryLoadError.new(self.class, lib, err)
+    end
   end
 end

data/lib/lingo/attendee/abbreviator.rb CHANGED Viewed

@@ -77,7 +77,7 @@ class Lingo
       end
       def control(cmd, param)
-        send_abbr(nil) if [STR_CMD_RECORD, STR_CMD_EOF].include?(cmd)
+        send_abbr(@abbr) if [STR_CMD_RECORD, STR_CMD_EOF].include?(cmd)
       end
       def process(obj)

data/lib/lingo/attendee/multi_worder.rb CHANGED Viewed

@@ -180,7 +180,7 @@ class Lingo
         seq = []
         @buffer.each { |obj|
-          next [obj] unless obj.is_a?(WordForm)
+          next seq << [obj] unless obj.is_a?(WordForm)
           next if (form = obj.form) == CHAR_PUNCT
           w = find_word(form, @lex_dic, @lex_gra)

data/lib/lingo/attendee/noneword_filter.rb CHANGED Viewed

@@ -71,7 +71,11 @@ class Lingo
       protected
       def init
-        @nonewords, @sort = [], get_key('sort', true)
+        @sort = get_key('sort', !ENV['LINGO_NO_SORT'])
+        @dict = get_key('dict', false)
+        @dict = '=' if @dict == true
+        @nonewords = []
       end
       def control(cmd, param)
@@ -80,14 +84,17 @@ class Lingo
             @nonewords.clear
           when STR_CMD_EOL
             skip_command
-          when STR_CMD_RECORD, STR_CMD_EOF
-            send_nonewords unless @nonewords.empty?
+          when STR_CMD_RECORD
+            send_nonewords unless @dict
+          when STR_CMD_EOF
+            send_nonewords
         end
       end
       def process(obj)
         if obj.is_a?(Word) && obj.unknown?
           non = Unicode.downcase(obj.form)
+          non = "#{non}#{@dict}#{non} #?" if @dict
           @sort ? @nonewords << non : forward(non)
         end
       end
@@ -95,8 +102,10 @@ class Lingo
       private
       def send_nonewords
-        @nonewords.uniq!
-        flush(@nonewords.sort!)
+        unless @nonewords.empty?
+          @nonewords.uniq!
+          flush(@nonewords.sort!)
+        end
       end
     end

data/lib/lingo/attendee/sequencer.rb CHANGED Viewed

@@ -96,15 +96,20 @@ class Lingo
       protected
       def init
-        @stopper = get_array('stopper', DEFAULT_SKIP, :upcase)
-        @classes = []
+        @stopper = get_array('stopper', DEFAULT_SKIP)
+                     .push(WA_UNKNOWN, WA_UNKMULPART)
-        @seq = get_key('sequences').map { |string, format|
-          @classes.concat(classes = string.downcase!.chars.to_a)
-          [string, classes, format]
+        @mwc = get_key('multiword', LA_MULTIWORD)
+        @cls = []
+        @seq = get_key('sequences').map { |str, fmt|
+          @cls.concat(cls = (str = str.downcase).scan(/[[:alpha:]]/))
+          (str =~ /\W/ ? [Regexp.new(str), nil] : [str, cls]).push(
+            fmt == true ? '|' : fmt ? fmt.gsub(/\d+/, '%\&$s') : nil)
         }
-        @classes.uniq!
+        @cls.uniq!
         raise MissingConfigError.new(:sequences) if @seq.empty?
       end
@@ -114,67 +119,88 @@ class Lingo
       end
       def process_buffer?
-        (obj = @buffer.last).is_a?(WordForm) && (obj.is_a?(Word) &&
-          obj.unknown? || @stopper.include?(obj.attr.upcase))
+        (obj = @buffer.last).is_a?(WordForm) && @stopper.include?(obj.attr)
       end
       def process_buffer
-        matches = []
+        flush(@buffer.size < 2 ? @buffer : begin
+          arg, cls, mwc, unk = [[], buf = [], map = [], @seq], @cls, @mwc, %w[#]
+          iter, skip, rewind = @buffer.each_with_index, 0, lambda {
+            iter.rewind; skip.times { iter.next }; skip = 0
+          }
-        if @buffer.size > 1
-          buf, map, seq, cls, unk = [], [], @seq, @classes, %w[#]
+          loop {
+            obj, idx = begin
+              iter.next
+            rescue StopIteration
+              raise unless skip > 0
+              buf.slice!(0, skip)
+              map.slice!(0, skip)
+              rewind.call
+            end
-          @buffer.each { |obj|
             att = obj.is_a?(Word) && !obj.unknown? ? obj.attrs(false) : unk
-            (att &= cls).empty? ? find_seq(buf, map, seq, matches) : begin
+            if (att &= cls).empty?
+              find_seq(*arg)
+              rewind.call if skip > 0
+            else
+              if n = obj.multiword_size(mwc)
+                n.times { iter.next }
+                skip = idx + 1
+              end
               buf << obj
               map << att
             end
           }
-          find_seq(buf, map, seq, matches)
-        end
-        flush(@buffer.concat(matches))
+          @buffer.concat(find_seq(*arg))
+        end)
       end
       private
-      def find_seq(buf, map, seq, matches)
-        return if buf.empty?
+      def find_seq(mat, buf, map, seq)
+        return mat if buf.empty?
-        match = Hash.new { |h, k| h[k] = [] }
+        forms, args = [], []
-        map.replace(map.shift.product(*map))
-        map.map! { |i| i.join }
-        map.uniq!
+        map.replace(map.shift.product(*map)).map! { |i| i.join }.uniq!
         map.each { |q|
-          seq.each { |string, classes, format|
-            while pos = q.index(string, pos || 0)
-              form = format.dup
+          seq.each { |str, cls, fmt|
+            _str, _cls = [str, cls]
+            while pos = q.index(str, pos || 0)
+              _str, _cls = [$&, $&.chars] unless cls
-              classes.each_with_index { |wc, i|
+              args.clear
+              _cls.each_with_index { |wc, i|
                 buf[pos + i].lexicals.find { |l|
-                  form.gsub!(i.succ.to_s, l.form) if l.attr == wc
+                  args[i] = l.form if l.attr == wc
                 } or break
               } or next
-              match[pos += 1] << form
+              forms << (
+                fmt =~ /\d/ ? fmt.gsub('%0$s', _str) % args :
+                fmt ? "#{_str}:#{args.join(fmt)}" : args.join(' ')
+              )
+              pos += 1
             end
           }
-        }
+        }.clear
-        match.each_value { |forms|
-          forms.uniq!
-          forms.each { |form|
-            matches << Word.new_lexical(form, WA_SEQUENCE, LA_SEQUENCE)
-          }
-        }
+        forms.uniq!
+        forms.each { |f| mat << Word.new_lexical(f, WA_SEQUENCE, LA_SEQUENCE) }
         buf.clear
-        map.clear
+        mat
       end
     end

data/lib/lingo/attendee/text_reader.rb CHANGED Viewed

@@ -133,21 +133,24 @@ class Lingo
       # Gibt eine Datei zeilenweise in den Ausgabekanal
       def spool(path)
-        unless stdin = stdin?(path)
-          size = File.size(path) if @progress
-        end
         forward(STR_CMD_FILE, path)
-        ShowProgress.new(self, size, path) { |progress|
-          filter(path, stdin) { |line, pos|
+        if stdin?(path)
+          io = @lingo.config.stdin.set_encoding(ENC)
+          io = StringIO.new(io.read) if @progress
+        else
+          io, name = File.open(path, 'rb', encoding: ENC), path
+        end
+        ShowProgress.new(self, @progress && io.size, name) { |progress|
+          filter(io) { |line, pos|
             progress[pos]
             line.chomp! if @chomp
             next if line =~ @skip
             if line =~ @lir
-              forward(STR_CMD_RECORD, $1)
+              forward(STR_CMD_RECORD, $1 || $&)
             else
               line.sub!(@cut, '') if @cut
               forward(line) unless line.empty?
@@ -158,14 +161,10 @@ class Lingo
         forward(STR_CMD_EOF, path)
       end
-      def filter(path, stdin = stdin?(path))
-        io = stdin ?
-          @lingo.config.stdin.set_encoding(ENC) :
-          File.open(path, 'rb', encoding: ENC)
-        block = stdin || !@progress ?
-          lambda { |line| yield line, 0 } :
-          lambda { |line| yield line, io.pos }
+      def filter(io)
+        block = @progress ?
+          lambda { |line| yield line, io.pos } :
+          lambda { |line| yield line, 0 }
         case @filter == true ? file_type(path, io) : @filter.to_s
           when /html/i then io = filter_html(io)

data/lib/lingo/attendee/text_writer.rb CHANGED Viewed

@@ -93,7 +93,7 @@ class Lingo
       def control(cmd, param)
         case cmd
           when STR_CMD_LIR
-            @lir = true
+            @lir = true unless @lir.nil?
           when STR_CMD_FILE
             @no_sep = true
@@ -105,9 +105,9 @@ class Lingo
             @lir_rec_no, @lir_rec_buf = '', []
           when STR_CMD_RECORD
-            @no_sep = true
             if @lir
+              @no_sep = true
               flush_lir_buffer
               @lir_rec_no = param
             end

data/lib/lingo/attendee/vector_filter.rb CHANGED Viewed

@@ -93,12 +93,12 @@ class Lingo
           @src = get_key('src', false)
           @src = DEFAULT_SRC_SEP if @src == true
-          if sort = get_key('sort', 'normal')
+          if sort = get_key('sort', ENV['LINGO_NO_SORT'] ? false : 'normal')
             @sort_format, @sort_method = sort.downcase.split('_', 2)
           end
         end
-        @vectors, @word_count = [], 0.0
+        @vectors, @word_count = [], 0
       end
       def control(cmd, param)
@@ -137,12 +137,12 @@ class Lingo
           vec = cnt.sort_by { |v, c| [-c, v] }
           if @sort_method == 'rel'
-            vec.each { |v| v[1] /= @word_count }
-            fmt = '%6.5f'
+            fmt, wc = '%6.5f', @word_count.to_f
+            vec.each { |v| v[1] /= wc }
           end
           if @sort_format == 'sto'
-            fmt, @word_count = "%s {#{fmt}}", 0.0
+            fmt, @word_count = "%s {#{fmt}}", 0
           else
             fmt.insert(1, '2$') << ' %1$s'
           end

data/lib/lingo/call.rb CHANGED Viewed

@@ -66,7 +66,7 @@ class Lingo
       }
       block_given? ? res.map! { |i| yield i } : begin
-        res.sort!
+        Lingo.sort!(res)
         res.uniq!
         res
       end

data/lib/lingo/cli.rb CHANGED Viewed

@@ -24,11 +24,11 @@
 ###############################################################################
 #++
-require 'nuggets/util/cli'
+require 'nuggets/cli'
 class Lingo
-  class CLI < ::Util::CLI
+  class CLI < ::Nuggets::CLI
     class << self

data/lib/lingo/ctl.rb CHANGED Viewed

@@ -121,13 +121,15 @@ Usage: #{PROG} <command> [arguments] [options]
     end
     def do_demo
-      OPTIONS.update(path: ARGV.shift, scope: :system)
+      OPTIONS.update(path: path = ARGV.shift, scope: :system)
       no_args
       copy_list(:config) { |i| !File.basename(i).start_with?('test') }
       copy_list(:lang)
       copy_list(:dict)   { |i|  File.basename(i).start_with?('user') }
       copy_list(:sample)
+      puts "Demo directory successfully initialized at `#{path}'."
     end
     def do_rackup(doit = true)

data/lib/lingo/database.rb CHANGED Viewed

@@ -240,7 +240,7 @@ class Lingo
         }
       end
-      ShowProgress.new(self, src.size, verbose) { |progress| create {
+      ShowProgress.new(self, src, verbose) { |progress| create {
         src.each { |key, val|
           progress[src.pos]

data/lib/lingo/database/show_progress.rb CHANGED Viewed

@@ -30,8 +30,21 @@ class Lingo
     class ShowProgress < ShowProgress
-      def initialize(obj, max, act = true)
-        super(obj, max, obj.instance_variable_get(:@config)['name'], act, 'convert')
+      def initialize(obj, src, doit = true)
+        name = obj.instance_variable_get(:@config)['name']
+        super(obj, src.size, name, doit, 'convert', false)
+        if defined?(@cnt)
+          cnt, rej = src.rejected
+          if cnt > 0
+            print ' (', cnt, ' rejected'
+            print ': ', rej if rej
+            print ')'
+          end
+          print "\n"
+        end
       end
     end

data/lib/lingo/database/source.rb CHANGED Viewed

@@ -74,7 +74,7 @@ class Lingo
         @wrd = "(?:#{Language::Char::ANY})+"
         @pat = /^#{@wrd}$/
-        @pos = 0
+        @pos = @rej_cnt = 0
       end
       def size
@@ -95,6 +95,7 @@ class Lingo
           if length < 4096 && line =~ @pat
             yield convert_line(line, $1, $2)
           else
+            @rej_cnt += 1
             reject_file.puts(line) if reject_file
           end
         }
@@ -111,6 +112,10 @@ class Lingo
         db[key] = val
       end
+      def rejected
+        [@rej_cnt, @rej]
+      end
     end
   end