RubyGems - lingo - Versions diffs - 1.8.0 → 1.8.1 - Mend

lingo 1.8.0 → 1.8.1

Files changed (100) hide show

data/ChangeLog +13 -0
data/README +49 -29
data/Rakefile +28 -4
data/TODO +2 -9
data/bin/lingo +24 -0
data/bin/lingoctl +24 -0
data/de/lingo-dic.txt +559 -74
data/info/gpl-hdr.txt +21 -24
data/lib/lingo.rb +83 -112
data/lib/lingo/agenda_item.rb +53 -0
data/lib/lingo/attendee.rb +261 -0
data/lib/lingo/attendee/abbreviator.rb +95 -97
data/lib/lingo/attendee/debugger.rb +94 -93
data/lib/lingo/attendee/decomposer.rb +76 -83
data/lib/lingo/attendee/dehyphenizer.rb +141 -144
data/lib/lingo/attendee/formatter.rb +65 -0
data/lib/lingo/attendee/multi_worder.rb +302 -0
data/lib/lingo/attendee/noneword_filter.rb +89 -84
data/lib/lingo/attendee/object_filter.rb +91 -0
data/lib/lingo/attendee/sequencer.rb +159 -158
data/lib/lingo/attendee/synonymer.rb +81 -84
data/lib/lingo/attendee/text_reader.rb +242 -0
data/lib/lingo/attendee/text_writer.rb +169 -0
data/lib/lingo/attendee/tokenizer.rb +192 -191
data/lib/lingo/attendee/variator.rb +152 -156
data/lib/lingo/attendee/vector_filter.rb +140 -135
data/lib/lingo/attendee/word_searcher.rb +98 -0
data/lib/lingo/buffered_attendee.rb +69 -0
data/lib/lingo/cachable.rb +58 -0
data/lib/lingo/call.rb +72 -0
data/lib/lingo/cli.rb +26 -0
data/lib/lingo/config.rb +23 -26
data/lib/lingo/core_ext.rb +42 -0
data/lib/lingo/ctl.rb +239 -173
data/lib/lingo/database.rb +148 -496
data/lib/lingo/database/crypter.rb +85 -0
data/lib/lingo/database/gdbm_store.rb +49 -0
data/lib/lingo/database/hash_store.rb +67 -0
data/lib/lingo/database/libcdb_store.rb +58 -0
data/lib/lingo/database/sdbm_store.rb +64 -0
data/lib/lingo/database/show_progress.rb +81 -0
data/lib/lingo/database/source.rb +134 -0
data/lib/lingo/database/source/key_value.rb +62 -0
data/lib/lingo/database/source/multi_key.rb +65 -0
data/lib/lingo/database/source/multi_value.rb +65 -0
data/lib/lingo/database/source/single_word.rb +60 -0
data/lib/lingo/database/source/word_class.rb +64 -0
data/lib/lingo/error.rb +122 -0
data/lib/lingo/language.rb +78 -518
data/lib/lingo/language/dictionary.rb +173 -0
data/lib/lingo/language/grammar.rb +211 -0
data/lib/lingo/language/lexical.rb +66 -0
data/lib/lingo/language/lexical_hash.rb +88 -0
data/lib/lingo/language/token.rb +48 -0
data/lib/lingo/language/word.rb +130 -0
data/lib/lingo/language/word_form.rb +83 -0
data/lib/lingo/reportable.rb +59 -0
data/lib/lingo/version.rb +1 -1
data/lingo-all.cfg +14 -10
data/lingo-call.cfg +5 -5
data/lingo.cfg +14 -12
data/lingo.rb +26 -0
data/lir.cfg +13 -9
data/spec/spec_helper.rb +1 -0
data/test.cfg +11 -11
data/test/attendee/ts_abbreviator.rb +0 -6
data/test/attendee/ts_decomposer.rb +0 -6
data/test/attendee/{ts_multiworder.rb → ts_multi_worder.rb} +1 -7
data/test/attendee/ts_noneword_filter.rb +1 -7
data/test/attendee/{ts_objectfilter.rb → ts_object_filter.rb} +1 -7
data/test/attendee/ts_sequencer.rb +0 -6
data/test/attendee/ts_synonymer.rb +0 -6
data/test/attendee/{ts_textreader.rb → ts_text_reader.rb} +1 -7
data/test/attendee/{ts_textwriter.rb → ts_text_writer.rb} +1 -7
data/test/attendee/ts_tokenizer.rb +0 -6
data/test/attendee/ts_variator.rb +0 -6
data/test/attendee/ts_vector_filter.rb +1 -7
data/test/attendee/{ts_wordsearcher.rb → ts_word_searcher.rb} +1 -7
data/test/ref/artikel.non +2 -29
data/test/ref/artikel.seq +13 -8
data/test/ref/artikel.vec +30 -15
data/test/ref/artikel.ven +29 -14
data/test/ref/artikel.ver +58 -43
data/test/ref/lir.csv +146 -145
data/test/ref/lir.non +186 -210
data/test/ref/lir.seq +54 -50
data/test/test_helper.rb +41 -36
data/test/ts_database.rb +12 -11
data/test/ts_language.rb +118 -68
metadata +67 -29
data/lib/lingo/attendee/multiworder.rb +0 -301
data/lib/lingo/attendee/objectfilter.rb +0 -86
data/lib/lingo/attendee/textreader.rb +0 -237
data/lib/lingo/attendee/textwriter.rb +0 -196
data/lib/lingo/attendee/wordsearcher.rb +0 -96
data/lib/lingo/attendees.rb +0 -289
data/lib/lingo/const.rb +0 -131
data/lib/lingo/modules.rb +0 -98
data/lib/lingo/types.rb +0 -285
data/lib/lingo/utilities.rb +0 -40

data/lib/lingo/database/source/key_value.rb ADDED

@@ -0,0 +1,62 @@
+# encoding: utf-8
+#--
+###############################################################################
+#                                                                             #
+# Lingo -- A full-featured automatic indexing system                          #
+#                                                                             #
+# Copyright (C) 2005-2007 John Vorhauer                                       #
+# Copyright (C) 2007-2012 John Vorhauer, Jens Wille                           #
+#                                                                             #
+# Lingo is free software; you can redistribute it and/or modify it under the  #
+# terms of the GNU Affero General Public License as published by the Free     #
+# Software Foundation; either version 3 of the License, or (at your option)   #
+# any later version.                                                          #
+#                                                                             #
+# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY    #
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU Affero General Public License    #
+# along with Lingo. If not, see <http://www.gnu.org/licenses/>.               #
+#                                                                             #
+###############################################################################
+#++
+class Lingo
+  class Database
+    class Source
+      # Abgeleitet von Source behandelt die Klasse Dateien mit dem Format <tt>KeyValue</tt>.
+      # Eine Zeile <tt>"Fachbegriff*Fachterminus\n"</tt> wird gewandelt in <tt>[ 'fachbegriff', ['fachterminus#s'] ]</tt>.
+      # Die Wortklasse kann über den Parameter <tt>def-wc</tt> beeinflusst werden.
+      # Der Trenner zwischen Schlüssel und Projektion kann über den Parameter <tt>separator</tt> geändert werden.
+      class KeyValue < self
+        def initialize(id, lingo)
+          super
+          @separator = @config.fetch('separator', '*')
+          @line_pattern = Regexp.new('^(' + @legal_word + ')' + Regexp.escape(@separator) + '(' + @legal_word + ')$')
+        end
+        private
+        def convert_line(line, key, val)
+          key, val = key.strip, val.strip
+          val = '' if key == val
+          val = [val + '#' + @wordclass]
+          [key, val]
+        end
+      end
+    end
+  end
+end

data/lib/lingo/database/source/multi_key.rb ADDED

@@ -0,0 +1,65 @@
+# encoding: utf-8
+#--
+###############################################################################
+#                                                                             #
+# Lingo -- A full-featured automatic indexing system                          #
+#                                                                             #
+# Copyright (C) 2005-2007 John Vorhauer                                       #
+# Copyright (C) 2007-2012 John Vorhauer, Jens Wille                           #
+#                                                                             #
+# Lingo is free software; you can redistribute it and/or modify it under the  #
+# terms of the GNU Affero General Public License as published by the Free     #
+# Software Foundation; either version 3 of the License, or (at your option)   #
+# any later version.                                                          #
+#                                                                             #
+# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY    #
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU Affero General Public License    #
+# along with Lingo. If not, see <http://www.gnu.org/licenses/>.               #
+#                                                                             #
+###############################################################################
+#++
+class Lingo
+  class Database
+    class Source
+      # Abgeleitet von Source behandelt die Klasse Dateien mit dem Format <tt>MultiKey</tt>.
+      # Eine Zeile <tt>"Triumph;Sieg;Erfolg\n"</tt> wird gewandelt in <tt>[ 'triumph', ['sieg', 'erfolg'] ]</tt>.
+      # Die Sonderbehandlung erfolgt in der Methode Database#convert, wo daraus Schlüssel-Werte-Paare in der Form
+      # <tt>[ 'sieg', ['triumph'] ]</tt> und <tt>[ 'erfolg', ['triumph'] ]</tt> erzeugt werden.
+      # Der Trenner zwischen Schlüssel und Projektion kann über den Parameter <tt>separator</tt> geändert werden.
+      class MultiKey < self
+        def initialize(id, lingo)
+          super
+          @separator = @config.fetch('separator', ';')
+          @line_pattern = Regexp.new('^' + @legal_word + '(?:' + Regexp.escape(@separator) + @legal_word + ')*$')
+        end
+        def set(db, key, val)
+          val.each { |v| db[v] = [key] }
+        end
+        private
+        def convert_line(line, key, val)
+          values = line.split(@separator).map { |value| value.strip }
+          [values[0], values[1..-1]]
+        end
+      end
+    end
+  end
+end

data/lib/lingo/database/source/multi_value.rb ADDED

@@ -0,0 +1,65 @@
+# encoding: utf-8
+#--
+###############################################################################
+#                                                                             #
+# Lingo -- A full-featured automatic indexing system                          #
+#                                                                             #
+# Copyright (C) 2005-2007 John Vorhauer                                       #
+# Copyright (C) 2007-2012 John Vorhauer, Jens Wille                           #
+#                                                                             #
+# Lingo is free software; you can redistribute it and/or modify it under the  #
+# terms of the GNU Affero General Public License as published by the Free     #
+# Software Foundation; either version 3 of the License, or (at your option)   #
+# any later version.                                                          #
+#                                                                             #
+# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY    #
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU Affero General Public License    #
+# along with Lingo. If not, see <http://www.gnu.org/licenses/>.               #
+#                                                                             #
+###############################################################################
+#++
+class Lingo
+  class Database
+    class Source
+      # Abgeleitet von Source behandelt die Klasse Dateien mit dem Format <tt>MultiValue</tt>.
+      # Eine Zeile <tt>"Triumph;Sieg;Erfolg\n"</tt> wird gewandelt in <tt>[ nil, ['triumph', 'sieg', 'erfolg'] ]</tt>.
+      # Der Trenner zwischen Schlüssel und Projektion kann über den Parameter <tt>separator</tt> geändert werden.
+      class MultiValue < self
+        def initialize(id, lingo)
+          super
+          @separator = @config.fetch('separator', ';')
+          @line_pattern = Regexp.new('^' + @legal_word + '(?:' + Regexp.escape(@separator) + @legal_word + ')*$')
+          @idx = -1
+        end
+        def set(db, key, val)
+          db[key = "#{IDX_REF}#{@idx += 1}"] = val
+          val.each { |v| db[v] = [key] }
+        end
+        private
+        def convert_line(line, key, val)
+          [nil, line.split(@separator).map { |value| value.strip }]
+        end
+      end
+    end
+  end
+end

data/lib/lingo/database/source/single_word.rb ADDED

@@ -0,0 +1,60 @@
+# encoding: utf-8
+#--
+###############################################################################
+#                                                                             #
+# Lingo -- A full-featured automatic indexing system                          #
+#                                                                             #
+# Copyright (C) 2005-2007 John Vorhauer                                       #
+# Copyright (C) 2007-2012 John Vorhauer, Jens Wille                           #
+#                                                                             #
+# Lingo is free software; you can redistribute it and/or modify it under the  #
+# terms of the GNU Affero General Public License as published by the Free     #
+# Software Foundation; either version 3 of the License, or (at your option)   #
+# any later version.                                                          #
+#                                                                             #
+# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY    #
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU Affero General Public License    #
+# along with Lingo. If not, see <http://www.gnu.org/licenses/>.               #
+#                                                                             #
+###############################################################################
+#++
+class Lingo
+  class Database
+    class Source
+      # Abgeleitet von Source behandelt die Klasse Dateien mit dem Format <tt>SingleWord</tt>.
+      # Eine Zeile <tt>"Fachbegriff\n"</tt> wird gewandelt in <tt>[ 'fachbegriff', ['#s'] ]</tt>.
+      # Die Wortklasse kann über den Parameter <tt>def-wc</tt> beeinflusst werden.
+      class SingleWord < self
+        def initialize(id, lingo)
+          super
+          @wc     = @config.fetch('def-wc',     's').downcase
+          @mul_wc = @config.fetch('def-mul-wc', @wc).downcase
+          @line_pattern = %r{^(#{@legal_word})$}
+        end
+        private
+        def convert_line(line, key, val)
+          [key = key.strip, %W[##{key =~ /\s/ ? @mul_wc : @wc}]]
+        end
+      end
+    end
+  end
+end

data/lib/lingo/database/source/word_class.rb ADDED

@@ -0,0 +1,64 @@
+# encoding: utf-8
+#--
+###############################################################################
+#                                                                             #
+# Lingo -- A full-featured automatic indexing system                          #
+#                                                                             #
+# Copyright (C) 2005-2007 John Vorhauer                                       #
+# Copyright (C) 2007-2012 John Vorhauer, Jens Wille                           #
+#                                                                             #
+# Lingo is free software; you can redistribute it and/or modify it under the  #
+# terms of the GNU Affero General Public License as published by the Free     #
+# Software Foundation; either version 3 of the License, or (at your option)   #
+# any later version.                                                          #
+#                                                                             #
+# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY    #
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU Affero General Public License    #
+# along with Lingo. If not, see <http://www.gnu.org/licenses/>.               #
+#                                                                             #
+###############################################################################
+#++
+class Lingo
+  class Database
+    class Source
+      # Abgeleitet von Source behandelt die Klasse Dateien mit dem Format <tt>WordClass</tt>.
+      # Eine Zeile <tt>"essen,essen #v essen #o esse #s\n"</tt> wird gewandelt in <tt>[ 'essen', ['esse#s', 'essen#v', 'essen#o'] ]</tt>.
+      # Der Trenner zwischen Schlüssel und Projektion kann über den Parameter <tt>separator</tt> geändert werden.
+      class WordClass < self
+        def initialize(id, lingo)
+          super
+          @separator = @config.fetch('separator', ',')
+          @line_pattern = Regexp.new('^(' + @legal_word + ')' + Regexp.escape(@separator) + '((?:' + @legal_word + '#\w)+)$')
+        end
+        private
+        def convert_line(line, key, val)
+          key, valstr = key.strip, val.strip
+          val = valstr.gsub(/\s+#/, '#').scan(/\S.+?\s*#\w/)
+          val = val.map do |str|
+            str =~ /^(.+)#(.)/
+            ($1 == key ? '' : $1) + '#' + $2
+          end
+          [key, val]
+        end
+      end
+    end
+  end
+end

data/lib/lingo/error.rb ADDED

@@ -0,0 +1,122 @@
+# encoding: utf-8
+#--
+###############################################################################
+#                                                                             #
+# Lingo -- A full-featured automatic indexing system                          #
+#                                                                             #
+# Copyright (C) 2005-2007 John Vorhauer                                       #
+# Copyright (C) 2007-2012 John Vorhauer, Jens Wille                           #
+#                                                                             #
+# Lingo is free software; you can redistribute it and/or modify it under the  #
+# terms of the GNU Affero General Public License as published by the Free     #
+# Software Foundation; either version 3 of the License, or (at your option)   #
+# any later version.                                                          #
+#                                                                             #
+# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY    #
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU Affero General Public License    #
+# along with Lingo. If not, see <http://www.gnu.org/licenses/>.               #
+#                                                                             #
+###############################################################################
+#++
+class Lingo
+  class LingoError < StandardError; end
+  class NoWritableStoreError < LingoError
+    attr_reader :file, :path
+    def initialize(file, path)
+      @file, @path = file, path
+    end
+    def to_s
+      'No writable store found in search path'
+    end
+  end
+  class ConfigError < LingoError
+    attr_reader :id
+    def initialize(id)
+      @id = id
+    end
+  end
+  class ConfigLoadError < ConfigError
+    attr_reader :err
+    def initialize(err)
+      @err = err
+    end
+    def to_s
+      "Error loading config: #{err}"
+    end
+  end
+  class NoDatabaseConfigError < ConfigError
+    def to_s
+      "No such database `#{id}' defined."
+    end
+  end
+  class InvalidDatabaseConfigError < ConfigError
+    def to_s
+      "Invalid database configuration `#{id}'."
+    end
+  end
+  class MissingConfigError < ConfigError
+    def to_s
+      "Missing configuration for `#{id}'."
+    end
+  end
+  class FileNotFoundError < LingoError
+    attr_reader :name
+    def initialize(name)
+      @name = name
+    end
+    def to_s
+      "No such file `#{name}'."
+    end
+  end
+  class SourceFileNotFoundError < FileNotFoundError
+    attr_reader :id
+    def initialize(name, id)
+      super(name)
+      @id = id
+    end
+    def to_s
+      "No such source file `#{name}' for `#{id}'."
+    end
+  end
+end

data/lib/lingo/language.rb CHANGED

@@ -1,529 +1,89 @@
 # encoding: utf-8
 #--
-# LINGO ist ein Indexierungssystem mit Grundformreduktion, Kompositumzerlegung,
-# Mehrworterkennung und Relationierung.
-#
-# Copyright (C) 2005-2007 John Vorhauer
-# Copyright (C) 2007-2011 John Vorhauer, Jens Wille
-#
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the GNU Affero General Public License as published by the Free
-# Software Foundation; either version 3 of the License, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
-# details.
-#
-# You should have received a copy of the GNU Affero General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
-#
-# For more information visit http://www.lex-lingo.de or contact me at
-# welcomeATlex-lingoDOTde near 50°55'N+6°55'E.
-#
-# Lex Lingo rules from here on
+###############################################################################
+#                                                                             #
+# Lingo -- A full-featured automatic indexing system                          #
+#                                                                             #
+# Copyright (C) 2005-2007 John Vorhauer                                       #
+# Copyright (C) 2007-2012 John Vorhauer, Jens Wille                           #
+#                                                                             #
+# Lingo is free software; you can redistribute it and/or modify it under the  #
+# terms of the GNU Affero General Public License as published by the Free     #
+# Software Foundation; either version 3 of the License, or (at your option)   #
+# any later version.                                                          #
+#                                                                             #
+# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY    #
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS   #
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for     #
+# more details.                                                               #
+#                                                                             #
+# You should have received a copy of the GNU Affero General Public License    #
+# along with Lingo. If not, see <http://www.gnu.org/licenses/>.               #
+#                                                                             #
+###############################################################################
 #++
-require_relative 'const'
-require_relative 'modules'
-require_relative 'database'
+require_relative 'language/lexical_hash'
+require_relative 'language/dictionary'
+require_relative 'language/grammar'
+require_relative 'language/word_form'
+require_relative 'language/token'
+require_relative 'language/lexical'
+require_relative 'language/word'
 class Lingo
-  # Die Klasse LexicalHash ermöglicht den Zugriff auf die Lingodatenbanken. Im Gegensatz zur
-  # Klasse DbmFile, welche nur Strings als Ergebnis zurück gibt, wird hier als Ergebnis ein
-  # Array von Lexical-Objekten zurück gegeben.
-  class LexicalHash
-    include Cachable
-    include Reportable
-    def initialize(id, lingo)
-      init_reportable
-      init_cachable
-      report_prefix( id )
-      # Parameter aus de.lang:language/dictionary/databases auslesen
-      config = lingo.config['language/dictionary/databases/' + id]
-      Lingo.error( "LexicalHash kann Datenquelle mit ID '#{id}' in de.lang:language/dictionary/databases' nicht finden" ) if config.nil?
-      @wordclass = config.fetch( 'def-wc', LA_UNKNOWN )
-      # Store erzeugen
-      @source = DbmFile.new(id, lingo)
-      @source.open
-    end
-    def close
-      @source.close
-    end
-    def [](ikey)
-      # Schlüssel normalisieren
-      inc('total requests')
-      key = ikey.downcase
-      # Cache abfragen
-      if hit?(key)
-        inc('cache hits')
-        return retrieve(key)
-      end
-      # Wert aus Datenbank lesen
-      inc('source reads')
-      record = @source[key]
-      # Werte in interne Objekte umwandeln
-      record = record.collect do |str|
-        case str
-          when /^\*\d+$/
-            # Hinweis für Multiworder, dass Multiword mit (\d) Wörtern länge zu prüfen ist
-            str
-          when /^#(.)$/
-            # Alleinige Angabe der Wortklasse => Ergebniswort ist gleich dem Schlüssel
-            Lexical.new(key, $1)
-          when /^([^#]+?)\s*#(.)$/
-            # Angabe Ergebniswort und Wortklasse
-            Lexical.new($1, $2)
-          when /^([^#]+)$/
-            # Angabe Ergebniswort ohne Wortklasse
-            Lexical.new($1, @wordclass)
-          else
-            str
-        end
-      end.compact.sort.uniq unless record.nil?
-      # Ergebnis zurückgeben
-      inc('data found') unless record.nil?
-      store(key, record)
-    end
-  end
-  class Dictionary
-    include Cachable
-    include Reportable
-    def initialize(config, lingo)
-      init_reportable
-      init_cachable
-      dictionary_config = lingo.dictionary_config
-      # Parameter prüfen
-      raise "Keine Sprach-Konfiguration angegeben!" if dictionary_config.nil?
-      raise "Keine Parameter angegeben!" if config.nil?
-      raise "Keine Datenquellen angegeben!" unless config.has_key?('source')
-      # Parameter auslesen
-      @all_sources = (config['mode'].nil? || config['mode'].downcase=='all')
-      @sources = config['source'].map { |src| LexicalHash.new(src, lingo) }
-      lingo.dictionaries << self
-      # Parameter aus de.lang:language/dictionary auslesen
-      @suffixes = []
-      @infixes = []
-      dictionary_config['suffix'].each {|arr|
-        typ, sufli = arr
-        typ.downcase!
-        sufli.split.each {|suf|
-          su, ex = suf.split('/')
-          fix = [Regexp.new(su+'$', 'i'), ex.nil? ? '*' : ex, typ]
-          (typ=='f' ? @infixes : @suffixes) << fix
-        }
-      } if dictionary_config.has_key?( 'suffix' )
-    end
-    def close
-      @sources.each(&:close)
-    end
-    def report
-      super.tap { |rep| @sources.each { |src| rep.update(src.report) } }
-    end
-    # _dic_.find_word( _aString_ ) -> _aNewWord_
-    #
-    # Erstellt aus dem String ein Wort und sucht nach diesem im Wörterbuch.
-    def find_word(string)
-      # Cache abfragen
-      key = string.downcase
-      if hit?(key)
-        inc('cache hits')
-        word = retrieve(key)
-        word.form = string
-        return word
-      end
-      word = Word.new(string, WA_UNKNOWN)
-      lexicals = select_with_suffix(string)
-      unless lexicals.empty?
-        word.lexicals = lexicals
-        word.attr = WA_IDENTIFIED
-      end
-      store(key, word)
-    end
-    def find_synonyms(obj)
-      # alle Lexicals des Wortes
-      lexis = obj.lexicals
-      lexis = [obj] if lexis.empty? && obj.attr==WA_UNKNOWN
-      # alle gefundenen Synonyme
-      synos = []
-      # multiworder optimization
-      key_ref = %r{\A#{Regexp.escape(KEY_REF)}\d+}o
-      lexis.each do |lex|
-        # Synonyme für Teile eines Kompositum ausschließen
-        next if obj.attr==WA_KOMPOSITUM && lex.attr!=LA_KOMPOSITUM
-        # Synonyme für Synonyme ausschließen
-        next if lex.attr==LA_SYNONYM
-        select(lex.form).each do |syn|
-          synos << syn unless syn =~ key_ref
-        end
-      end
-      synos
-    end
-    # _dic_.select( _aString_ ) -> _ArrayOfLexicals_
-    #
-    # Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+)
-    def select(string)
-      lexicals = []
-      @sources.each { |src|
-        if lexis = src[string]
-          lexicals += lexis
-          break unless @all_sources
-        end
-      }
-      lexicals.sort.uniq
-    end
-    # _dic_.select_with_suffix( _aString_ ) -> _ArrayOfLexicals_
-    #
-    # Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+).
-    # Sucht dabei auch Wörter, die um wortklassenspezifische Suffixe bereinigt wurden.
-    def select_with_suffix(string)
-      lexicals = select(string)
-      if lexicals.empty?
-        suffix_lexicals(string).each { |suflex|
-          select(suflex.form).each { |srclex|
-            lexicals << srclex if suflex.attr == srclex.attr
-          }
-        }
-      end
-      lexicals
-    end
-    # _dic_.select_with_infix( _aString_ ) -> _ArrayOfLexicals_
-    #
-    # Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+).
-    # Sucht dabei auch Wörter, die eine Fugung am Ende haben.
-    def select_with_infix(string)
-      lexicals = select(string)
-      if lexicals.size == 0
-        infix_lexicals(string).each { |inlex|
-          select(inlex.form).each { |srclex|
-            lexicals << srclex
-          }
-        }
-      end
-      lexicals
-    end
-    # _dic_.suffix_lexicals( _aString_ ) -> _ArrayOfLexicals_
-    #
-    # Gibt alle möglichen Lexicals zurück, die von der Endung her auf den String anwendbar sind:
-    #
-    # dic.suffix_lexicals("Hasens") -> [(hasen/s), (hasen/e), (has/e)]
-    def suffix_lexicals(string)
-      lexicals = []
-      newform = regex = ext = type = nil
-      @suffixes.each { |suf|
-        regex, ext, type = suf
-        if string =~ regex
-          newform = $`+((ext=="*")?'':ext)+$'
-          lexicals << Lexical.new(newform, type)
-        end
-      }
-      lexicals
-    end
-    # _dic_.gap_lexicals( _aString_ ) -> _ArrayOfLexicals_
-    #
-    # Gibt alle möglichen Lexicals zurück, die von der Endung her auf den String anwendbar sind:
-    def infix_lexicals(string)
-      lexicals = []
-      newform = regex = ext = type = nil
-      @infixes.each { |suf|
-        regex, ext, type = suf
-        if string =~ regex
-          newform = $`+((ext=="*")?'':ext)+$'
-          lexicals << Lexical.new(newform, type)
-        end
-      }
-      lexicals
-    end
-  end
-  class Compositum
-  end
-  # Die Klasse Grammar beinhaltet grammatikalische Spezialitäten einer Sprache. Derzeit findet die
-  # Kompositumerkennung hier ihren Platz, die mit der Methode find_compositum aufgerufen werden kann.
-  # Die Klasse Grammar wird genau wie ein Dictionary initialisiert. Das bei der Initialisierung angegebene Wörterbuch ist Grundlage
-  # für die Erkennung der Kompositumteile.
-  class Grammar
-    # Ergebnisse der Kompositumerkennung werden gespeichert und bei erneutem Aufruf mit gleichem Suchwort genutzt
-    include Cachable
-    # Die Verarbeitung wird statistisch erfasst und mit der Option -s angezeigt
-    include Reportable
-    # initialize(config, dictionary_config) -> _Grammar_
-    # config = Attendee-spezifische Parameter
-    # dictionary_config = Datenbankkonfiguration aus de.lang
-    def initialize(config, lingo)
-      init_reportable
-      init_cachable
-      @dictionary = Dictionary.new(config, lingo)
-      # Sprachspezifische Einstellungen für Kompositumverarbeitung laden (die nachfolgenden Werte können in der
-      # Konfigurationsdatei de.lang nach belieben angepasst werden)
-      comp = lingo.dictionary_config['compositum']
-      # Ein Wort muss mindestens 8 Zeichen lang sein, damit überhaupt eine Prüfung stattfindet.
-      @comp_min_word_size = (comp['min-word-size'] || '8').to_i
-      # Die durchschnittliche Länge der Kompositum-Wortteile muss mindestens 4 Zeichen lang sein, sonst ist es kein
-      # gültiges Kompositum.
-      @comp_min_avg_part_size = (comp['min-avg-part-size'] || '4').to_i
-      # Der kürzeste Kompositum-Wortteil muss mindestens 1 Zeichen lang sein
-      @comp_min_part_size = (comp['min-part-size'] || '1').to_i
-      # Ein Kompositum darf aus höchstens 4 Wortteilen bestehen
-      @comp_max_parts = (comp['max-parts'] || '4').to_i
-      # Die Wortklasse eines Kompositum-Wortteils kann separat gekennzeichnet werden, um sie von Wortklassen normaler Wörter
-      # unterscheiden zu können z.B. Hausmeister => ['haus/s', 'meister/s'] oder Hausmeister => ['haus/s+', 'meister/s+'] mit
-      # append-wordclass = '+'
-      @append_wc = comp.fetch( 'append-wordclass', '' )
-      # Bestimmte Sequenzen können als ungültige Komposita erkannt werden, z.B. ist ein Kompositum aus zwei Adjetiven kein
-      # Kompositum, also skip-sequence = 'aa'
-      @sequences = comp.fetch( 'skip-sequences', [] ).collect { |sq| sq.downcase }
-      # Liste der Vorschläge für eine Zerlegung
-      @suggestions = []
-    end
-    def close
-      @dictionary.close
-    end
-    alias_method :report_grammar, :report
-    def report
-      rep = report_grammar
-      rep.update(@dictionary.report)
-      rep
-    end
-    # find_compositum(string) -> word wenn level=1
-    # find_compositum(string) -> [lexicals, stats] wenn level!=1
-    #
-    # find_compositum arbeitet in verschiedenen Leveln, da die Methode auch rekursiv aufgerufen wird. Ein Level größer 1
-    # entspricht daher einem rekursiven Aufruf
-    def find_compositum(string, level=1, has_tail=false)
-      # Prüfen, ob string bereits auf Kompositum getestet wurde. Wenn ja, dann Ergebnis des letztes Aufrufs zurück geben.
-      key = string.downcase
-      if level == 1 && hit?(key)
-        inc('cache hits')
-        return retrieve(key)
-      end
-      # Ergebnis vorbelegen
-      comp = Word.new(string, WA_UNKNOWN)
-      # Validitätsprüfung: nur Strings mit Mindestlänge auf Kompositum prüfen
-      if string.size <= @comp_min_word_size
-        inc('String zu kurz')
-        return (level==1) ? comp : [[],[],'']
-      end
-      # Kompositumerkennung initialisieren
-      inc('Komposita geprüft')
-      stats, lexis, seqs = permute_compositum(string.downcase, level, has_tail)
-      if level==1
-        # Auf Level 1 Kompositum zurück geben
-        if lexis.size > 0 && is_valid?( string, stats, lexis, seqs )
-          inc('Komposita erkannt')
-          comp.attr = WA_KOMPOSITUM
-          comp.lexicals = lexis.collect do |lex|
-            (lex.attr==LA_KOMPOSITUM) ? lex : Lexical.new(lex.form, lex.attr+@append_wc)
-          end
-        end
-        return store(key, comp)
-      end
-      # Validitätsprüfung
-      if lexis.size > 0 && is_valid?(string, stats, lexis, seqs)
-        [stats, lexis, seqs]
-      else
-        [[],[],'']
-      end
-    end
-    private
-    def is_valid?(string, stats, lexis, seqs)
-      is_valid = true
-      is_valid &&= (stats.size <= @comp_max_parts)
-      is_valid &&= (stats.sort[0] >= @comp_min_part_size)
-      is_valid &&= (string.size/stats.size) >= @comp_min_avg_part_size
-      is_valid &&= @sequences.index( seqs ).nil? unless @sequences.empty?
-      is_valid
-    end
-    # permute_string( _aString_ ) ->  [lexicals, stats, seqs]
-    def permute_compositum(string, level, has_tail)
-      @suggestions[level] = [] if @suggestions[level].nil?
-      # Finde letzten Bindesstrich im Wort
-      if string =~ /^(.+)-([^-]+)$/
-        test_compositum($1, '-', $2, level, has_tail)
-      else
-        length = string.length
-        # Wortteilungen testen
-        1.upto(length - 1) do |p|
-          # String teilen und testen
-          fr_str, ba_str = string.slice(0...p), string.slice(p...length)
-          stats, lexis, seqs = test_compositum(fr_str, '', ba_str, level, has_tail)
-          unless lexis.empty?
-            if lexis[-1].attr==LA_TAKEITASIS
-              # => halbes Kompositum
-              @suggestions[level] << [stats, lexis, seqs]
-            else
-              # => ganzes Kompositum
-              return [stats, lexis, seqs]
-            end
-          end
-        end
-        # alle Wortteilungen durchprobiert und noch immer kein definitives Kompositum erkannt. Dann nehme besten Vorschlag.
-        if @suggestions[level].empty?
-          [[],[],'']
-        else
-          stats, lexis, seqs = @suggestions[level][0]
-          @suggestions[level].clear
-          [stats, lexis, seqs]
-        end
-      end
-    end
-    # test_compositum() ->  [stats, lexicals, seq]
-    #
-    # Testet einen definiert zerlegten String auf Kompositum
-    def test_compositum(front_string, infix, back_string, level, has_tail)
-      # Statistik merken für Validitätsprüfung
-      stats = [front_string.size, back_string.size]
-      seqs = ['?', '?']
-      # zuerst hinteren Teil auflösen
-      # 1. Möglichkeit:  Wort mit oder ohne Suffix
-      back_lexicals = @dictionary.select_with_suffix(back_string)
-      unless back_lexicals.empty?
-        back_form = has_tail ? back_string : back_lexicals.sort[0].form
-        seqs[1] = back_lexicals.sort[0].attr
-      end
-      # 2. Möglichkeit:  Wort mit oder ohne Infix, wenn es nicht der letzte Teil des Wortes ist
-      if back_lexicals.empty? && has_tail
-        back_lexicals = @dictionary.select_with_infix(back_string)
-        unless back_lexicals.empty?
-          back_form = back_string
-          seqs[1] = back_lexicals.sort[0].attr
-        end
-      end
-      # 3. Möglichkeit:  Selber ein Kompositum (nur im Bindestrich-Fall!)
-      if back_lexicals.empty? && infix=='-'
-        back_stats, back_lexicals, back_seqs = find_compositum(back_string, level+1, has_tail)
-        unless back_lexicals.empty?
-          back_form = back_lexicals.sort[0].form
-          seqs[1] = back_seqs
-          stats = stats[0..0] + back_stats
-        end
-      end
-      # 4. Möglichkeit:  Take it as is [Nimm's, wie es ist] (nur im Bindestrich-Fall!)
-      if back_lexicals.empty? && infix=='-'
-        back_lexicals = [Lexical.new(back_string, LA_TAKEITASIS)]
-        back_form = back_string
-        seqs[1] = back_lexicals.sort[0].attr
-      end
-      # wenn immer noch nicht erkannt, dann sofort zurück
-      return [[],[],''] if back_lexicals.empty?
-      # dann vorderen Teil auflösen
-      #
-      # 1. Möglichkeit:  Wort mit oder ohne Infix
-      front_lexicals = @dictionary.select_with_infix(front_string)
-      unless front_lexicals.empty?
-        front_form = front_string
-        seqs[0] = front_lexicals.sort[0].attr
-      end
-      # 2. Möglichkeit:  Selber ein Kompositum
-      if front_lexicals.empty?
-        front_stats, front_lexicals, front_seqs = find_compositum(front_string, level+1, true)
-        unless front_lexicals.empty?
-          front_form = front_lexicals.sort[0].form
-          seqs[0] = front_seqs
-          stats = front_stats + stats[1..-1]
-        end
-      end
-      # 3. Möglichkeit:  Take it as is [Nimm's, wie es ist] (nur im Bindestrich-Fall!)
-      if front_lexicals.empty? && infix=='-'
-        front_lexicals = [Lexical.new(front_string, LA_TAKEITASIS)]
-        seqs[0] = front_lexicals.sort[0].attr
-        front_form = front_string
-      end
-      # wenn immer noch nicht erkannt, dann sofort zurück
-      return [[],[],''] if front_lexicals.empty?
-      # Kompositum gefunden, Grundform bilden
-      lexis = (front_lexicals + back_lexicals).collect { |lex|
-        (lex.attr==LA_KOMPOSITUM) ? nil : lex
-      }.compact
-      lexis << Lexical.new(front_form + infix + back_form, LA_KOMPOSITUM)
-      [stats, lexis.sort, seqs.join ]
-    end
+  module Language
+    # String-Konstanten im Datenstrom
+    CHAR_PUNCT = '.'
+    TA_WORD        = 'WORD'
+    TA_PUNCTUATION = 'PUNC'
+    TA_OTHER       = 'OTHR'
+    # Standardattribut bei der Initialisierung eines Word-Objektes
+    WA_UNSET      = '-'
+    # Status, nachdem das Word im Wörterbuch gefunden wurde
+    WA_IDENTIFIED = 'IDF'
+    # Status, wenn das Word nicht gefunden werden konnte
+    WA_UNKNOWN    = '?'
+    # Wort ist als Kompositum erkannt worden
+    WA_KOMPOSITUM = 'KOM'
+    # Wort ist eine Mehrwortgruppe
+    WA_MULTIWORD  = 'MUL'
+    # Wort ist eine Mehrwortgruppe
+    WA_SEQUENCE   = 'SEQ'
+    # Word ist unbekannt, jedoch Teil einer Mehrwortgruppe
+    WA_UNKMULPART = 'MU?'
+    LA_SUBSTANTIV = 's'
+    LA_ADJEKTIV   = 'a'
+    LA_VERB       = 'v'
+    LA_EIGENNAME  = 'e'
+    LA_KOMPOSITUM = 'k'
+    LA_MULTIWORD  = 'm'
+    LA_SEQUENCE   = 'q'
+    LA_WORTFORM   = 'w'
+    LA_SYNONYM    = 'y'
+    LA_STOPWORD   = 't'
+    LA_TAKEITASIS = 'x'
+    LA_UNKNOWN    = '?'
+    LA_SORTORDER = [
+      LA_MULTIWORD,
+      LA_KOMPOSITUM,
+      LA_SUBSTANTIV,
+      LA_VERB,
+      LA_ADJEKTIV,
+      LA_EIGENNAME,
+      LA_WORTFORM,
+      LA_STOPWORD,
+      LA_TAKEITASIS,
+      LA_SYNONYM,
+      LA_UNKNOWN
+    ].reverse.join
   end