lingo 1.8.1 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +23 -5
- data/README +1 -1
- data/Rakefile +5 -7
- data/TODO +2 -0
- data/bin/lingo +5 -1
- data/de.lang +1 -1
- data/en/lingo-syn.txt +0 -0
- data/en.lang +2 -1
- data/lib/lingo/attendee/abbreviator.rb +8 -9
- data/lib/lingo/attendee/debugger.rb +5 -4
- data/lib/lingo/attendee/decomposer.rb +8 -3
- data/lib/lingo/attendee/dehyphenizer.rb +19 -63
- data/lib/lingo/attendee/formatter.rb +1 -1
- data/lib/lingo/attendee/multi_worder.rb +67 -155
- data/lib/lingo/attendee/noneword_filter.rb +16 -9
- data/lib/lingo/attendee/object_filter.rb +1 -1
- data/lib/lingo/attendee/sequencer.rb +32 -63
- data/lib/lingo/attendee/stemmer/porter.rb +343 -0
- data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
- data/lib/lingo/attendee/synonymer.rb +10 -9
- data/lib/lingo/attendee/text_reader.rb +102 -76
- data/lib/lingo/attendee/text_writer.rb +23 -26
- data/lib/lingo/attendee/tokenizer.rb +13 -27
- data/lib/lingo/attendee/variator.rb +26 -66
- data/lib/lingo/attendee/vector_filter.rb +42 -43
- data/lib/lingo/attendee/word_searcher.rb +6 -7
- data/lib/lingo/attendee.rb +25 -7
- data/lib/lingo/buffered_attendee.rb +36 -10
- data/lib/lingo/cachable.rb +8 -8
- data/lib/lingo/config.rb +5 -6
- data/lib/lingo/ctl.rb +2 -3
- data/lib/lingo/database/crypter.rb +9 -26
- data/lib/lingo/database/gdbm_store.rb +3 -5
- data/lib/lingo/database/libcdb_store.rb +4 -6
- data/lib/lingo/database/sdbm_store.rb +11 -6
- data/lib/lingo/database/show_progress.rb +3 -43
- data/lib/lingo/database/source/key_value.rb +2 -6
- data/lib/lingo/database/source/multi_key.rb +3 -5
- data/lib/lingo/database/source/multi_value.rb +2 -6
- data/lib/lingo/database/source/single_word.rb +4 -6
- data/lib/lingo/database/source/word_class.rb +4 -10
- data/lib/lingo/database/source.rb +20 -18
- data/lib/lingo/database.rb +84 -59
- data/lib/lingo/error.rb +57 -1
- data/lib/lingo/language/dictionary.rb +21 -18
- data/lib/lingo/language/grammar.rb +40 -49
- data/lib/lingo/language/lexical.rb +6 -6
- data/lib/lingo/language/lexical_hash.rb +6 -0
- data/lib/lingo/language/word.rb +32 -15
- data/lib/lingo/language/word_form.rb +1 -1
- data/lib/lingo/language.rb +14 -25
- data/lib/lingo/reportable.rb +12 -10
- data/lib/lingo/show_progress.rb +81 -0
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo.rb +63 -24
- data/lingo-call.cfg +6 -10
- data/lingo.cfg +60 -44
- data/lir.cfg +42 -41
- data/test/attendee/ts_abbreviator.rb +3 -5
- data/test/attendee/ts_decomposer.rb +3 -5
- data/test/attendee/ts_multi_worder.rb +87 -145
- data/test/attendee/ts_noneword_filter.rb +5 -3
- data/test/attendee/ts_object_filter.rb +5 -3
- data/test/attendee/ts_sequencer.rb +3 -5
- data/test/attendee/ts_stemmer.rb +309 -0
- data/test/attendee/ts_synonymer.rb +15 -11
- data/test/attendee/ts_text_reader.rb +12 -15
- data/test/attendee/ts_text_writer.rb +24 -29
- data/test/attendee/ts_tokenizer.rb +9 -7
- data/test/attendee/ts_variator.rb +4 -4
- data/test/attendee/ts_vector_filter.rb +24 -16
- data/test/attendee/ts_word_searcher.rb +20 -36
- data/test/{lir.csv → lir.vec} +0 -0
- data/test/ref/artikel.vec +943 -943
- data/test/ref/artikel.ven +943 -943
- data/test/ref/lir.non +201 -201
- data/test/ref/lir.seq +178 -178
- data/test/ref/lir.syn +49 -49
- data/test/ref/lir.vec +329 -0
- data/test/test_helper.rb +20 -36
- data/test/ts_database.rb +10 -10
- data/test/ts_language.rb +279 -319
- metadata +93 -104
- data/info/Objekte.png +0 -0
- data/info/Typen.png +0 -0
- data/info/database.png +0 -0
- data/info/db_small.png +0 -0
- data/info/download.png +0 -0
- data/info/kerze.png +0 -0
- data/info/language.png +0 -0
- data/info/lingo.png +0 -0
- data/info/logo.png +0 -0
- data/info/meeting.png +0 -0
- data/info/types.png +0 -0
- data/lingo-all.cfg +0 -89
- data/porter/stem.cfg +0 -311
- data/porter/stem.rb +0 -150
- data/test/ref/lir.csv +0 -329
- data/test.cfg +0 -79
| @@ -29,7 +29,7 @@ class Lingo | |
| 29 29 | 
             
              module Language
         | 
| 30 30 |  | 
| 31 31 | 
             
                # Die Klasse Grammar beinhaltet grammatikalische Spezialitäten einer Sprache. Derzeit findet die
         | 
| 32 | 
            -
                # Kompositumerkennung hier ihren Platz, die mit der Methode  | 
| 32 | 
            +
                # Kompositumerkennung hier ihren Platz, die mit der Methode find_compound aufgerufen werden kann.
         | 
| 33 33 | 
             
                # Die Klasse Grammar wird genau wie ein Dictionary initialisiert. Das bei der Initialisierung angegebene Wörterbuch ist Grundlage
         | 
| 34 34 | 
             
                # für die Erkennung der Kompositumteile.
         | 
| 35 35 |  | 
| @@ -40,31 +40,26 @@ class Lingo | |
| 40 40 |  | 
| 41 41 | 
             
                  HYPHEN_RE = %r{\A(.+)-([^-]+)\z}
         | 
| 42 42 |  | 
| 43 | 
            -
                   | 
| 44 | 
            -
             | 
| 45 | 
            -
                   | 
| 43 | 
            +
                  def self.open(*args)
         | 
| 44 | 
            +
                    yield grammar = new(*args)
         | 
| 45 | 
            +
                  ensure
         | 
| 46 | 
            +
                    grammar.close if grammar
         | 
| 47 | 
            +
                  end
         | 
| 48 | 
            +
             | 
| 46 49 | 
             
                  def initialize(config, lingo)
         | 
| 47 50 | 
             
                    init_cachable
         | 
| 48 51 | 
             
                    init_reportable
         | 
| 49 52 |  | 
| 50 53 | 
             
                    @dic, @suggestions = Dictionary.new(config, lingo), []
         | 
| 51 54 |  | 
| 52 | 
            -
                    cfg = lingo.dictionary_config[' | 
| 53 | 
            -
             | 
| 54 | 
            -
                    # Ein Wort muss mindestens 8 Zeichen lang sein, damit
         | 
| 55 | 
            -
                    # überhaupt eine Prüfung stattfindet.
         | 
| 56 | 
            -
                    @min_word_size = (cfg['min-word-size'] || 8).to_i
         | 
| 57 | 
            -
             | 
| 58 | 
            -
                    # Die durchschnittliche Länge der Kompositum-Wortteile
         | 
| 59 | 
            -
                    # muss mindestens 4 Zeichen lang sein, sonst ist es kein
         | 
| 60 | 
            -
                    # gültiges Kompositum.
         | 
| 61 | 
            -
                    @min_avg_part_size = (cfg['min-avg-part-size'] || 4).to_i
         | 
| 62 | 
            -
             | 
| 63 | 
            -
                    # Der kürzeste Kompositum-Wortteil muss mindestens 1 Zeichen lang sein
         | 
| 64 | 
            -
                    @min_part_size = (cfg['min-part-size'] || 1).to_i
         | 
| 55 | 
            +
                    cfg = lingo.dictionary_config['compound'] ||
         | 
| 56 | 
            +
                          lingo.dictionary_config['compositum']  # DEPRECATE compositum
         | 
| 65 57 |  | 
| 66 | 
            -
                     | 
| 67 | 
            -
             | 
| 58 | 
            +
                    {
         | 
| 59 | 
            +
                      min_word_size: 8, min_avg_part_size: 4, min_part_size: 1, max_parts: 4
         | 
| 60 | 
            +
                    }.each { |k, v|
         | 
| 61 | 
            +
                      instance_variable_set("@#{k}", cfg.fetch(k.to_s.tr('_', '-'), v).to_i)
         | 
| 62 | 
            +
                    }
         | 
| 68 63 |  | 
| 69 64 | 
             
                    # Die Wortklasse eines Kompositum-Wortteils kann separat gekennzeichnet
         | 
| 70 65 | 
             
                    # werden, um sie von Wortklassen normaler Wörter unterscheiden zu
         | 
| @@ -75,7 +70,7 @@ class Lingo | |
| 75 70 | 
             
                    # Bestimmte Sequenzen können als ungültige Komposita erkannt werden,
         | 
| 76 71 | 
             
                    # z.B. ist ein Kompositum aus zwei Adjetiven kein Kompositum, also
         | 
| 77 72 | 
             
                    # skip-sequence = 'aa'
         | 
| 78 | 
            -
                    @sequences = cfg.fetch('skip-sequences', []).map(&:downcase)
         | 
| 73 | 
            +
                    @sequences = cfg.fetch('skip-sequences', []).map!(&:downcase)
         | 
| 79 74 | 
             
                  end
         | 
| 80 75 |  | 
| 81 76 | 
             
                  def close
         | 
| @@ -86,12 +81,12 @@ class Lingo | |
| 86 81 | 
             
                    super.update(@dic.report)
         | 
| 87 82 | 
             
                  end
         | 
| 88 83 |  | 
| 89 | 
            -
                  #  | 
| 90 | 
            -
                  #  | 
| 84 | 
            +
                  # find_compound(str) -> word wenn level=1
         | 
| 85 | 
            +
                  # find_compound(str) -> [lex, sta] wenn level!=1
         | 
| 91 86 | 
             
                  #
         | 
| 92 | 
            -
                  #  | 
| 87 | 
            +
                  # find_compound arbeitet in verschiedenen Leveln, da die Methode auch rekursiv aufgerufen wird. Ein Level größer 1
         | 
| 93 88 | 
             
                  # entspricht daher einem rekursiven Aufruf
         | 
| 94 | 
            -
                  def  | 
| 89 | 
            +
                  def find_compound(str, level = 1, tail = false)
         | 
| 95 90 | 
             
                    key, top, empty = str.downcase, level == 1, [[], [], '']
         | 
| 96 91 |  | 
| 97 92 | 
             
                    if top && hit?(key)
         | 
| @@ -108,16 +103,21 @@ class Lingo | |
| 108 103 |  | 
| 109 104 | 
             
                    inc('Komposita geprüft')
         | 
| 110 105 |  | 
| 111 | 
            -
                    res =  | 
| 112 | 
            -
             | 
| 106 | 
            +
                    lex, sta, seq = res = permute_compound(key, level, tail)
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                    val = !lex.empty? &&
         | 
| 109 | 
            +
                      sta.size              <= @max_parts         &&
         | 
| 110 | 
            +
                      sta.min               >= @min_part_size     &&
         | 
| 111 | 
            +
                      str.length / sta.size >= @min_avg_part_size &&
         | 
| 112 | 
            +
                      (@sequences.empty? || !@sequences.include?(seq))
         | 
| 113 113 |  | 
| 114 114 | 
             
                    if top
         | 
| 115 115 | 
             
                      if val
         | 
| 116 116 | 
             
                        inc('Komposita erkannt')
         | 
| 117 117 |  | 
| 118 | 
            -
                        com.attr =  | 
| 118 | 
            +
                        com.attr = WA_COMPOUND
         | 
| 119 119 | 
             
                        com.lexicals = lex.map { |l|
         | 
| 120 | 
            -
                          l.attr ==  | 
| 120 | 
            +
                          l.attr == LA_COMPOUND ? l :
         | 
| 121 121 | 
             
                            Lexical.new(l.form, l.attr + @append_wc)
         | 
| 122 122 | 
             
                        }
         | 
| 123 123 | 
             
                      end
         | 
| @@ -128,14 +128,14 @@ class Lingo | |
| 128 128 | 
             
                    end
         | 
| 129 129 | 
             
                  end
         | 
| 130 130 |  | 
| 131 | 
            -
                  #  | 
| 132 | 
            -
                  def  | 
| 133 | 
            -
                    return  | 
| 131 | 
            +
                  # permute_compound( _aString_ ) ->  [lex, sta, seq]
         | 
| 132 | 
            +
                  def permute_compound(str, level = 1, tail = false)
         | 
| 133 | 
            +
                    return test_compound($1, '-', $2, level, tail) if str =~ HYPHEN_RE
         | 
| 134 134 |  | 
| 135 135 | 
             
                    sug, len = @suggestions[level] ||= [], str.length
         | 
| 136 136 |  | 
| 137 137 | 
             
                    1.upto(len - 1) { |i|
         | 
| 138 | 
            -
                      res =  | 
| 138 | 
            +
                      res = test_compound(str[0, i], '', str[i, len], level, tail)
         | 
| 139 139 |  | 
| 140 140 | 
             
                      unless (lex = res.first).empty?
         | 
| 141 141 | 
             
                        return res unless lex.last.attr == LA_TAKEITASIS
         | 
| @@ -146,10 +146,10 @@ class Lingo | |
| 146 146 | 
             
                    sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear }
         | 
| 147 147 | 
             
                  end
         | 
| 148 148 |  | 
| 149 | 
            -
                  #  | 
| 149 | 
            +
                  # test_compound() ->  [lex, sta, seq]
         | 
| 150 150 | 
             
                  #
         | 
| 151 151 | 
             
                  # Testet einen definiert zerlegten String auf Kompositum
         | 
| 152 | 
            -
                  def  | 
| 152 | 
            +
                  def test_compound(fstr, infix, bstr, level = 1, tail = false)
         | 
| 153 153 | 
             
                    sta, seq, empty = [fstr.length, bstr.length], %w[? ?], [[], [], '']
         | 
| 154 154 |  | 
| 155 155 | 
             
                    if !(blex = @dic.select_with_suffix(bstr)).sort!.empty?
         | 
| @@ -159,10 +159,10 @@ class Lingo | |
| 159 159 | 
             
                      # 2. Word w/ infix, unless tail part
         | 
| 160 160 | 
             
                      bform, seq[1] = bstr, blex.first.attr
         | 
| 161 161 | 
             
                    elsif infix == '-'
         | 
| 162 | 
            -
                      blex, bsta, bseq =  | 
| 162 | 
            +
                      blex, bsta, bseq = find_compound(bstr, level + 1, tail)
         | 
| 163 163 |  | 
| 164 164 | 
             
                      if !blex.sort!.empty?
         | 
| 165 | 
            -
                        # 3.  | 
| 165 | 
            +
                        # 3. Compound
         | 
| 166 166 | 
             
                        bform, seq[1], sta[1..-1] = blex.first.form, bseq, bsta
         | 
| 167 167 | 
             
                      else
         | 
| 168 168 | 
             
                        # 4. Take it as is
         | 
| @@ -176,10 +176,10 @@ class Lingo | |
| 176 176 | 
             
                      # 1. Word w/ infix
         | 
| 177 177 | 
             
                      fform, seq[0] = fstr, flex.first.attr
         | 
| 178 178 | 
             
                    else
         | 
| 179 | 
            -
                      flex, fsta, fseq =  | 
| 179 | 
            +
                      flex, fsta, fseq = find_compound(fstr, level + 1, true)
         | 
| 180 180 |  | 
| 181 181 | 
             
                      if !flex.sort!.empty?
         | 
| 182 | 
            -
                        # 2.  | 
| 182 | 
            +
                        # 2. Compound
         | 
| 183 183 | 
             
                        fform, seq[0], sta[0..0] = flex.first.form, fseq, fsta
         | 
| 184 184 | 
             
                      elsif infix == '-'
         | 
| 185 185 | 
             
                        # 3. Take it as is
         | 
| @@ -189,21 +189,12 @@ class Lingo | |
| 189 189 | 
             
                      end
         | 
| 190 190 | 
             
                    end
         | 
| 191 191 |  | 
| 192 | 
            -
                    flex.concat(blex).delete_if { |l| l.attr ==  | 
| 193 | 
            -
                      push(Lexical.new(fform + infix + bform,  | 
| 192 | 
            +
                    flex.concat(blex).delete_if { |l| l.attr == LA_COMPOUND }.
         | 
| 193 | 
            +
                      push(Lexical.new(fform + infix + bform, LA_COMPOUND)).sort!
         | 
| 194 194 |  | 
| 195 195 | 
             
                    [flex, sta, seq.join]
         | 
| 196 196 | 
             
                  end
         | 
| 197 197 |  | 
| 198 | 
            -
                  private
         | 
| 199 | 
            -
             | 
| 200 | 
            -
                  def valid?(str, sta, seq)
         | 
| 201 | 
            -
                    sta.size               <= @max_parts         &&
         | 
| 202 | 
            -
                    sta.sort.first         >= @min_part_size     &&
         | 
| 203 | 
            -
                    str.length / sta.size  >= @min_avg_part_size &&
         | 
| 204 | 
            -
                    (@sequences.empty? || !@sequences.include?(seq))
         | 
| 205 | 
            -
                  end
         | 
| 206 | 
            -
             | 
| 207 198 | 
             
                end
         | 
| 208 199 |  | 
| 209 200 | 
             
              end
         | 
| @@ -39,14 +39,14 @@ class Lingo | |
| 39 39 | 
             
                  def <=>(other)
         | 
| 40 40 | 
             
                    return 1 unless other.is_a?(self.class)
         | 
| 41 41 |  | 
| 42 | 
            -
                     | 
| 42 | 
            +
                    a1, a2 = attr, other.attr
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                    if a1 == a2
         | 
| 43 45 | 
             
                      form <=> other.form
         | 
| 44 46 | 
             
                    else
         | 
| 45 | 
            -
                       | 
| 46 | 
            -
                         | 
| 47 | 
            -
                         | 
| 48 | 
            -
             | 
| 49 | 
            -
                        a ? b ? b <=> a : -1 : b ? 1 : attr <=> other.attr
         | 
| 47 | 
            +
                      a1.empty? ? 1 : a2.empty? ? -1 : begin
         | 
| 48 | 
            +
                        i1, i2 = [a1, a2].map(&LA_SORTORDER.method(:index))
         | 
| 49 | 
            +
                        i1 ? i2 ? i2 <=> i1 : -1 : i2 ? 1 : a1 <=> a2
         | 
| 50 50 | 
             
                      end
         | 
| 51 51 | 
             
                    end
         | 
| 52 52 | 
             
                  end
         | 
    
        data/lib/lingo/language/word.rb
    CHANGED
    
    | @@ -33,8 +33,16 @@ class Lingo | |
| 33 33 |  | 
| 34 34 | 
             
                class Word < WordForm
         | 
| 35 35 |  | 
| 36 | 
            -
                   | 
| 37 | 
            -
             | 
| 36 | 
            +
                  class << self
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                    def new_lexicals(form, attr, lex)
         | 
| 39 | 
            +
                      new(form, attr) << lex
         | 
| 40 | 
            +
                    end
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                    def new_lexical(form, attr, lex_attr)
         | 
| 43 | 
            +
                      new_lexicals(form, attr, Lexical.new(form, lex_attr))
         | 
| 44 | 
            +
                    end
         | 
| 45 | 
            +
             | 
| 38 46 | 
             
                  end
         | 
| 39 47 |  | 
| 40 48 | 
             
                  # Exakte Representation der originären Zeichenkette, so wie sie im Satz
         | 
| @@ -56,23 +64,32 @@ class Lingo | |
| 56 64 | 
             
                  end
         | 
| 57 65 |  | 
| 58 66 | 
             
                  def lexicals(compound_parts = true)
         | 
| 59 | 
            -
                    if !compound_parts && attr ==  | 
| 60 | 
            -
                      @lexicals.select { |lex| lex.attr ==  | 
| 67 | 
            +
                    if !compound_parts && attr == WA_COMPOUND
         | 
| 68 | 
            +
                      @lexicals.select { |lex| lex.attr == LA_COMPOUND }
         | 
| 61 69 | 
             
                    else
         | 
| 62 70 | 
             
                      @lexicals
         | 
| 63 71 | 
             
                    end
         | 
| 64 72 | 
             
                  end
         | 
| 65 73 |  | 
| 66 | 
            -
                  def lexicals=( | 
| 67 | 
            -
                    if  | 
| 68 | 
            -
                      @lexicals =  | 
| 74 | 
            +
                  def lexicals=(lex)
         | 
| 75 | 
            +
                    if lex.is_a?(Array)
         | 
| 76 | 
            +
                      @lexicals = lex.sort.uniq
         | 
| 69 77 | 
             
                    else
         | 
| 70 | 
            -
                      raise TypeError, "wrong argument type #{ | 
| 78 | 
            +
                      raise TypeError, "wrong argument type #{lex.class} (expected Array)"
         | 
| 71 79 | 
             
                    end
         | 
| 72 80 | 
             
                  end
         | 
| 73 81 |  | 
| 82 | 
            +
                  def add_lexicals(lex)
         | 
| 83 | 
            +
                    @lexicals.concat(lex)
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                    @lexicals.sort!
         | 
| 86 | 
            +
                    @lexicals.uniq!
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                    self
         | 
| 89 | 
            +
                  end
         | 
| 90 | 
            +
             | 
| 74 91 | 
             
                  def attrs(compound_parts = true)
         | 
| 75 | 
            -
                    lexicals(compound_parts).map | 
| 92 | 
            +
                    lexicals(compound_parts).map(&:attr)
         | 
| 76 93 | 
             
                  end
         | 
| 77 94 |  | 
| 78 95 | 
             
                  def parts
         | 
| @@ -100,15 +117,15 @@ class Lingo | |
| 100 117 | 
             
                  end
         | 
| 101 118 |  | 
| 102 119 | 
             
                  def compo_form
         | 
| 103 | 
            -
                    if attr ==  | 
| 104 | 
            -
             | 
| 105 | 
            -
             | 
| 106 | 
            -
             | 
| 107 | 
            -
                     | 
| 120 | 
            +
                    get_class(LA_COMPOUND).first if attr == WA_COMPOUND
         | 
| 121 | 
            +
                  end
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                  def full_compound?
         | 
| 124 | 
            +
                    attr == WA_COMPOUND && get_class('x+').empty?
         | 
| 108 125 | 
             
                  end
         | 
| 109 126 |  | 
| 110 127 | 
             
                  def <<(*other)
         | 
| 111 | 
            -
                    lexicals.concat(other.flatten)
         | 
| 128 | 
            +
                    lexicals.concat(other.tap(&:flatten!))
         | 
| 112 129 | 
             
                    self
         | 
| 113 130 | 
             
                  end
         | 
| 114 131 |  | 
    
        data/lib/lingo/language.rb
    CHANGED
    
    | @@ -50,7 +50,7 @@ class Lingo | |
| 50 50 | 
             
                # Status, wenn das Word nicht gefunden werden konnte
         | 
| 51 51 | 
             
                WA_UNKNOWN    = '?'
         | 
| 52 52 | 
             
                # Wort ist als Kompositum erkannt worden
         | 
| 53 | 
            -
                 | 
| 53 | 
            +
                WA_COMPOUND   = 'KOM'
         | 
| 54 54 | 
             
                # Wort ist eine Mehrwortgruppe
         | 
| 55 55 | 
             
                WA_MULTIWORD  = 'MUL'
         | 
| 56 56 | 
             
                # Wort ist eine Mehrwortgruppe
         | 
| @@ -58,31 +58,20 @@ class Lingo | |
| 58 58 | 
             
                # Word ist unbekannt, jedoch Teil einer Mehrwortgruppe
         | 
| 59 59 | 
             
                WA_UNKMULPART = 'MU?'
         | 
| 60 60 |  | 
| 61 | 
            -
                LA_SUBSTANTIV = 's'
         | 
| 62 | 
            -
                LA_ADJEKTIV   = 'a'
         | 
| 63 | 
            -
                LA_VERB       = 'v'
         | 
| 64 | 
            -
                LA_EIGENNAME  = 'e'
         | 
| 65 | 
            -
                LA_KOMPOSITUM = 'k'
         | 
| 66 | 
            -
                LA_MULTIWORD  = 'm'
         | 
| 67 | 
            -
                LA_SEQUENCE   = 'q'
         | 
| 68 | 
            -
                LA_WORTFORM   = 'w'
         | 
| 69 | 
            -
                LA_SYNONYM    = 'y'
         | 
| 70 | 
            -
                LA_STOPWORD   = 't'
         | 
| 71 | 
            -
                LA_TAKEITASIS = 'x'
         | 
| 72 | 
            -
                LA_UNKNOWN    = '?'
         | 
| 73 | 
            -
             | 
| 74 61 | 
             
                LA_SORTORDER = [
         | 
| 75 | 
            -
                   | 
| 76 | 
            -
                   | 
| 77 | 
            -
                   | 
| 78 | 
            -
                   | 
| 79 | 
            -
                   | 
| 80 | 
            -
                   | 
| 81 | 
            -
                   | 
| 82 | 
            -
                   | 
| 83 | 
            -
                   | 
| 84 | 
            -
                   | 
| 85 | 
            -
                   | 
| 62 | 
            +
                  LA_SEQUENCE   = 'q',
         | 
| 63 | 
            +
                  LA_MULTIWORD  = 'm',
         | 
| 64 | 
            +
                  LA_COMPOUND   = 'k',
         | 
| 65 | 
            +
                  LA_NOUN       = 's',
         | 
| 66 | 
            +
                  LA_VERB       = 'v',
         | 
| 67 | 
            +
                  LA_ADJECTIVE  = 'a',
         | 
| 68 | 
            +
                  LA_NAME       = 'e',
         | 
| 69 | 
            +
                  LA_WORDFORM   = 'w',
         | 
| 70 | 
            +
                  LA_STOPWORD   = 't',
         | 
| 71 | 
            +
                  LA_TAKEITASIS = 'x',
         | 
| 72 | 
            +
                  LA_SYNONYM    = 'y',
         | 
| 73 | 
            +
                  LA_STEM       = 'z',
         | 
| 74 | 
            +
                  LA_UNKNOWN    = '?'
         | 
| 86 75 | 
             
                ].reverse.join
         | 
| 87 76 |  | 
| 88 77 | 
             
              end
         | 
    
        data/lib/lingo/reportable.rb
    CHANGED
    
    | @@ -31,27 +31,29 @@ class Lingo | |
| 31 31 | 
             
              module Reportable
         | 
| 32 32 |  | 
| 33 33 | 
             
                def init_reportable(prefix = nil)
         | 
| 34 | 
            -
                  @ | 
| 34 | 
            +
                  @reportable_hash   = Hash.new(0)
         | 
| 35 | 
            +
                  @reportable_prefix = prefix ? "#{prefix}: " : ''
         | 
| 35 36 | 
             
                end
         | 
| 36 37 |  | 
| 37 | 
            -
                def inc( | 
| 38 | 
            -
                  @ | 
| 38 | 
            +
                def inc(key)
         | 
| 39 | 
            +
                  @reportable_hash[key] += 1
         | 
| 39 40 | 
             
                end
         | 
| 40 41 |  | 
| 41 | 
            -
                def add( | 
| 42 | 
            -
                  @ | 
| 42 | 
            +
                def add(key, val)
         | 
| 43 | 
            +
                  @reportable_hash[key] += val
         | 
| 43 44 | 
             
                end
         | 
| 44 45 |  | 
| 45 | 
            -
                def set( | 
| 46 | 
            -
                  @ | 
| 46 | 
            +
                def set(key, val)
         | 
| 47 | 
            +
                  @reportable_hash[key] = val
         | 
| 47 48 | 
             
                end
         | 
| 48 49 |  | 
| 49 | 
            -
                def get( | 
| 50 | 
            -
                  @ | 
| 50 | 
            +
                def get(key)
         | 
| 51 | 
            +
                  @reportable_hash[key]
         | 
| 51 52 | 
             
                end
         | 
| 52 53 |  | 
| 53 54 | 
             
                def report
         | 
| 54 | 
            -
                   | 
| 55 | 
            +
                  q = @reportable_prefix
         | 
| 56 | 
            +
                  @reportable_hash.each_with_object({}) { |(k, v), r| r["#{q}#{k}"] = v }
         | 
| 55 57 | 
             
                end
         | 
| 56 58 |  | 
| 57 59 | 
             
              end
         | 
| @@ -0,0 +1,81 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            #--
         | 
| 4 | 
            +
            ###############################################################################
         | 
| 5 | 
            +
            #                                                                             #
         | 
| 6 | 
            +
            # Lingo -- A full-featured automatic indexing system                          #
         | 
| 7 | 
            +
            #                                                                             #
         | 
| 8 | 
            +
            # Copyright (C) 2005-2007 John Vorhauer                                       #
         | 
| 9 | 
            +
            # Copyright (C) 2007-2012 John Vorhauer, Jens Wille                           #
         | 
| 10 | 
            +
            #                                                                             #
         | 
| 11 | 
            +
            # Lingo is free software; you can redistribute it and/or modify it under the  #
         | 
| 12 | 
            +
            # terms of the GNU Affero General Public License as published by the Free     #
         | 
| 13 | 
            +
            # Software Foundation; either version 3 of the License, or (at your option)   #
         | 
| 14 | 
            +
            # any later version.                                                          #
         | 
| 15 | 
            +
            #                                                                             #
         | 
| 16 | 
            +
            # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY    #
         | 
| 17 | 
            +
            # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
         | 
| 18 | 
            +
            # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
         | 
| 19 | 
            +
            # more details.                                                               #
         | 
| 20 | 
            +
            #                                                                             #
         | 
| 21 | 
            +
            # You should have received a copy of the GNU Affero General Public License    #
         | 
| 22 | 
            +
            # along with Lingo. If not, see <http://www.gnu.org/licenses/>.               #
         | 
| 23 | 
            +
            #                                                                             #
         | 
| 24 | 
            +
            ###############################################################################
         | 
| 25 | 
            +
            #++
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            class Lingo
         | 
| 28 | 
            +
             | 
| 29 | 
            +
              class ShowProgress
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                def initialize(obj, max, name = nil, doit = true, text = 'progress')
         | 
| 32 | 
            +
                  return yield self unless max && doit
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                  @out = obj.instance_variable_get(:@lingo).config.stderr
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                  # To get the length of the formatted string we have
         | 
| 37 | 
            +
                  # to actually substitute the placeholder.
         | 
| 38 | 
            +
                  fmt = ' [%3d%%]'
         | 
| 39 | 
            +
                  len = (fmt % 0).length
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                  # Now we know how far to "go back" to
         | 
| 42 | 
            +
                  # overwrite the formatted string...
         | 
| 43 | 
            +
                  back = "\b" * len
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                  @fmt = fmt       + back
         | 
| 46 | 
            +
                  @clr = ' ' * len + back
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                  print name, ': ' if name
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                  @rat, @cnt, @next = max / 100.0, 0, 0
         | 
| 51 | 
            +
                  print text
         | 
| 52 | 
            +
                  step
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                  yield self
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                  print "#{@clr} done.\n"
         | 
| 57 | 
            +
                end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                def [](value)
         | 
| 60 | 
            +
                  if defined?(@cnt)
         | 
| 61 | 
            +
                    @cnt = value
         | 
| 62 | 
            +
                    step if @cnt >= @next
         | 
| 63 | 
            +
                  end
         | 
| 64 | 
            +
                end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                private
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                def step
         | 
| 69 | 
            +
                  percent = @cnt / @rat
         | 
| 70 | 
            +
                  @next = (percent + 1) * @rat
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                  print @fmt % percent if percent.finite?
         | 
| 73 | 
            +
                end
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                def print(*args)
         | 
| 76 | 
            +
                  @out.print(*args)
         | 
| 77 | 
            +
                end
         | 
| 78 | 
            +
             | 
| 79 | 
            +
              end
         | 
| 80 | 
            +
             | 
| 81 | 
            +
            end
         | 
    
        data/lib/lingo/version.rb
    CHANGED
    
    
    
        data/lib/lingo.rb
    CHANGED
    
    | @@ -25,6 +25,8 @@ | |
| 25 25 | 
             
            #++
         | 
| 26 26 |  | 
| 27 27 | 
             
            require 'stringio'
         | 
| 28 | 
            +
            require 'pathname'
         | 
| 29 | 
            +
            require 'fileutils'
         | 
| 28 30 | 
             
            require 'benchmark'
         | 
| 29 31 | 
             
            require 'nuggets/file/ext'
         | 
| 30 32 | 
             
            require 'nuggets/env/user_home'
         | 
| @@ -43,7 +45,8 @@ class Lingo | |
| 43 45 | 
             
              CURR = ENV['LINGO_CURR'] || '.'
         | 
| 44 46 |  | 
| 45 47 | 
             
              # The search path for Lingo dictionary and configuration files.
         | 
| 46 | 
            -
              PATH = ENV['LINGO_PATH']  | 
| 48 | 
            +
              PATH = ENV['LINGO_PATH'].nil? ? [CURR, HOME, BASE] :
         | 
| 49 | 
            +
                     ENV['LINGO_PATH'].split(File::PATH_SEPARATOR)
         | 
| 47 50 |  | 
| 48 51 | 
             
              ENV['LINGO_PLUGIN_PATH'] ||= File.join(HOME, 'plugins')
         | 
| 49 52 |  | 
| @@ -59,7 +62,7 @@ class Lingo | |
| 59 62 | 
             
              # Default encoding
         | 
| 60 63 | 
             
              ENC = 'UTF-8'.freeze
         | 
| 61 64 |  | 
| 62 | 
            -
               | 
| 65 | 
            +
              SEP_RE = %r{[; ,|]}
         | 
| 63 66 |  | 
| 64 67 | 
             
              class << self
         | 
| 65 68 |  | 
| @@ -79,7 +82,7 @@ class Lingo | |
| 79 82 | 
             
                  glob = File.join('??', glob) if type == :dict
         | 
| 80 83 |  | 
| 81 84 | 
             
                  [].tap { |list| walk(path, options) { |dir|
         | 
| 82 | 
            -
                    Dir[File.join(dir, glob)].sort | 
| 85 | 
            +
                    Dir[File.join(dir, glob)].sort!.each { |file|
         | 
| 83 86 | 
             
                      pn = Pathname.new(file)
         | 
| 84 87 | 
             
                      list << realpath_for(pn, path) if pn.file?
         | 
| 85 88 | 
             
                    }
         | 
| @@ -110,29 +113,69 @@ class Lingo | |
| 110 113 | 
             
                  File.join(options_for(type)[:dir], basename(type, file))
         | 
| 111 114 | 
             
                end
         | 
| 112 115 |  | 
| 116 | 
            +
                def append_path(*path)
         | 
| 117 | 
            +
                  include_path(path)
         | 
| 118 | 
            +
                end
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                def prepend_path(*path)
         | 
| 121 | 
            +
                  include_path(path, true)
         | 
| 122 | 
            +
                end
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                def get_const(name, klass = self)
         | 
| 125 | 
            +
                  klass.const_get(name.camelcase)
         | 
| 126 | 
            +
                rescue NameError
         | 
| 127 | 
            +
                  raise NameNotFoundError.new(klass, name)
         | 
| 128 | 
            +
                end
         | 
| 129 | 
            +
             | 
| 113 130 | 
             
                private
         | 
| 114 131 |  | 
| 132 | 
            +
                def include_path(path, pre = false)
         | 
| 133 | 
            +
                  PATH.insert(pre ? 0 : -1, *path.map!(&:to_s))
         | 
| 134 | 
            +
                end
         | 
| 135 | 
            +
             | 
| 115 136 | 
             
                def find_file(file, path, options)
         | 
| 116 | 
            -
                   | 
| 137 | 
            +
                  if glob = options[:glob]
         | 
| 138 | 
            +
                    file = File.chomp_ext(file)
         | 
| 139 | 
            +
                    options[:ext] ||= '*'
         | 
| 140 | 
            +
                  end
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                  file = file_with_ext(file, options)
         | 
| 143 | 
            +
                  pn   = Pathname.new(file).cleanpath
         | 
| 117 144 |  | 
| 118 145 | 
             
                  if pn.relative?
         | 
| 119 146 | 
             
                    walk(path, options) { |dir|
         | 
| 120 147 | 
             
                      pn2 = pn.expand_path(dir)
         | 
| 121 | 
            -
                       | 
| 148 | 
            +
                      ex  = pn2.exist?
         | 
| 149 | 
            +
             | 
| 150 | 
            +
                      pn2 = Pathname.glob(pn2).first if glob && !ex
         | 
| 151 | 
            +
                      pn  = pn2 and break if glob ? pn2 : ex
         | 
| 122 152 | 
             
                    }
         | 
| 123 153 | 
             
                  end
         | 
| 124 154 |  | 
| 125 155 | 
             
                  realpath_for(pn, path)
         | 
| 156 | 
            +
                rescue Errno::ENOENT
         | 
| 157 | 
            +
                  raise unless relax = options[:relax]
         | 
| 158 | 
            +
                  relax.respond_to?(:[]) ? relax[file] : file
         | 
| 126 159 | 
             
                end
         | 
| 127 160 |  | 
| 128 161 | 
             
                def find_store(file, path, options)
         | 
| 129 | 
            -
                  base = basename(:dict, find(:dict, file, path) | 
| 162 | 
            +
                  base = basename(:dict, find(:dict, file, path) {
         | 
| 163 | 
            +
                    raise SourceFileNotFoundError.new(nil, find_file(file, path,
         | 
| 164 | 
            +
                      options.merge(glob: true, relax: lambda { |_file|
         | 
| 165 | 
            +
                        raise SourceFileNotFoundError.new(file, _file)
         | 
| 166 | 
            +
                      })
         | 
| 167 | 
            +
                    ))
         | 
| 168 | 
            +
                  })
         | 
| 130 169 |  | 
| 131 170 | 
             
                  walk(path.reverse, options, false) { |dir|
         | 
| 132 171 | 
             
                    Pathname.new(dir).ascend { |i|
         | 
| 133 | 
            -
                       | 
| 134 | 
            -
             | 
| 135 | 
            -
             | 
| 172 | 
            +
                      begin
         | 
| 173 | 
            +
                        stat = i.stat
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                        break true if stat.file? || !stat.writable?
         | 
| 176 | 
            +
                        return File.chomp_ext(File.join(dir, base))
         | 
| 177 | 
            +
                      rescue Errno::ENOENT
         | 
| 178 | 
            +
                      end
         | 
| 136 179 | 
             
                    }
         | 
| 137 180 | 
             
                  }
         | 
| 138 181 |  | 
| @@ -148,7 +191,7 @@ class Lingo | |
| 148 191 | 
             
                end
         | 
| 149 192 |  | 
| 150 193 | 
             
                def path_for(options)
         | 
| 151 | 
            -
                  options[:path] || PATH | 
| 194 | 
            +
                  options[:path] || PATH
         | 
| 152 195 | 
             
                end
         | 
| 153 196 |  | 
| 154 197 | 
             
                def file_with_ext(file, options)
         | 
| @@ -223,30 +266,25 @@ class Lingo | |
| 223 266 |  | 
| 224 267 | 
             
                list.each { |hash|
         | 
| 225 268 | 
             
                  # {'attendee' => {'name'=>'Attendee', 'in'=>'nase', 'out'=>'ohr', 'param'=>'hase'}}
         | 
| 226 | 
            -
                  cfg = hash.values.first.merge('name' => hash.keys.first.camelcase)
         | 
| 269 | 
            +
                  cfg = hash.values.first.merge('name' => name = hash.keys.first.camelcase)
         | 
| 227 270 |  | 
| 228 271 | 
             
                  %w[in out].each { |key| (cfg[key] ||= '').downcase! }
         | 
| 229 272 |  | 
| 230 | 
            -
                  cfg['in']  = last_link | 
| 231 | 
            -
                  cfg['out'] = " | 
| 273 | 
            +
                  cfg['in']  = last_link                     if cfg['in'].empty?
         | 
| 274 | 
            +
                  cfg['out'] = "auto_link-#{auto_link += 1}" if cfg['out'].empty?
         | 
| 232 275 | 
             
                  last_link  = cfg['out']
         | 
| 233 276 |  | 
| 234 | 
            -
                   | 
| 235 | 
            -
                  cfg.update(data) if data
         | 
| 277 | 
            +
                  cfg.update(config["language/attendees/#{name.downcase}"] || {})
         | 
| 236 278 |  | 
| 237 | 
            -
                  attendee = Attendee.const_get( | 
| 238 | 
            -
                  @attendees << attendee
         | 
| 279 | 
            +
                  @attendees << attendee = Attendee.const_get(name).new(cfg, self)
         | 
| 239 280 |  | 
| 240 | 
            -
                   | 
| 241 | 
            -
                     | 
| 242 | 
            -
                  }
         | 
| 243 | 
            -
                  cfg['out'].split(STRING_SEPARATOR_RE).each { |theme|
         | 
| 244 | 
            -
                    supplier[theme] << attendee
         | 
| 281 | 
            +
                  { 'in' => subscriber, 'out' => supplier }.each { |key, target|
         | 
| 282 | 
            +
                    cfg[key].split(SEP_RE).each { |ch| target[ch] << attendee }
         | 
| 245 283 | 
             
                  }
         | 
| 246 284 | 
             
                }
         | 
| 247 285 |  | 
| 248 | 
            -
                supplier.each { | | 
| 249 | 
            -
                   | 
| 286 | 
            +
                supplier.each { |ch, attendees| attendees.each { |att|
         | 
| 287 | 
            +
                  att.add_subscriber(subscriber[ch])
         | 
| 250 288 | 
             
                } }
         | 
| 251 289 | 
             
              end
         | 
| 252 290 |  | 
| @@ -283,6 +321,7 @@ require_relative 'lingo/core_ext' | |
| 283 321 | 
             
            require_relative 'lingo/cachable'
         | 
| 284 322 | 
             
            require_relative 'lingo/reportable'
         | 
| 285 323 | 
             
            require_relative 'lingo/agenda_item'
         | 
| 324 | 
            +
            require_relative 'lingo/show_progress'
         | 
| 286 325 | 
             
            require_relative 'lingo/database'
         | 
| 287 326 | 
             
            require_relative 'lingo/language'
         | 
| 288 327 | 
             
            require_relative 'lingo/attendee'
         |