RubyGems - lingo - Versions diffs - 1.8.0 - Mend

lingo 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

data/.rspec +1 -0
data/COPYING +663 -0
data/ChangeLog +754 -0
data/README +322 -0
data/Rakefile +100 -0
data/TODO +28 -0
data/bin/lingo +5 -0
data/bin/lingoctl +6 -0
data/de.lang +121 -0
data/de/lingo-abk.txt +74 -0
data/de/lingo-dic.txt +56822 -0
data/de/lingo-mul.txt +3209 -0
data/de/lingo-syn.txt +14841 -0
data/de/test_dic.txt +24 -0
data/de/test_mul.txt +17 -0
data/de/test_mul2.txt +2 -0
data/de/test_singleword.txt +2 -0
data/de/test_syn.txt +4 -0
data/de/test_syn2.txt +1 -0
data/de/user-dic.txt +10 -0
data/en.lang +113 -0
data/en/lingo-dic.txt +55434 -0
data/en/lingo-mul.txt +456 -0
data/en/user-dic.txt +5 -0
data/info/Objekte.png +0 -0
data/info/Typen.png +0 -0
data/info/database.png +0 -0
data/info/db_small.png +0 -0
data/info/download.png +0 -0
data/info/gpl-hdr.txt +27 -0
data/info/kerze.png +0 -0
data/info/language.png +0 -0
data/info/lingo.png +0 -0
data/info/logo.png +0 -0
data/info/meeting.png +0 -0
data/info/types.png +0 -0
data/lib/lingo.rb +321 -0
data/lib/lingo/attendee/abbreviator.rb +119 -0
data/lib/lingo/attendee/debugger.rb +111 -0
data/lib/lingo/attendee/decomposer.rb +101 -0
data/lib/lingo/attendee/dehyphenizer.rb +167 -0
data/lib/lingo/attendee/multiworder.rb +301 -0
data/lib/lingo/attendee/noneword_filter.rb +103 -0
data/lib/lingo/attendee/objectfilter.rb +86 -0
data/lib/lingo/attendee/sequencer.rb +190 -0
data/lib/lingo/attendee/synonymer.rb +105 -0
data/lib/lingo/attendee/textreader.rb +237 -0
data/lib/lingo/attendee/textwriter.rb +196 -0
data/lib/lingo/attendee/tokenizer.rb +218 -0
data/lib/lingo/attendee/variator.rb +185 -0
data/lib/lingo/attendee/vector_filter.rb +158 -0
data/lib/lingo/attendee/wordsearcher.rb +96 -0
data/lib/lingo/attendees.rb +289 -0
data/lib/lingo/cli.rb +62 -0
data/lib/lingo/config.rb +104 -0
data/lib/lingo/const.rb +131 -0
data/lib/lingo/ctl.rb +173 -0
data/lib/lingo/database.rb +587 -0
data/lib/lingo/language.rb +530 -0
data/lib/lingo/modules.rb +98 -0
data/lib/lingo/types.rb +285 -0
data/lib/lingo/utilities.rb +40 -0
data/lib/lingo/version.rb +27 -0
data/lingo-all.cfg +85 -0
data/lingo-call.cfg +15 -0
data/lingo.cfg +78 -0
data/lingo.rb +3 -0
data/lir.cfg +72 -0
data/porter/stem.cfg +311 -0
data/porter/stem.rb +150 -0
data/spec/spec_helper.rb +0 -0
data/test.cfg +79 -0
data/test/attendee/ts_abbreviator.rb +35 -0
data/test/attendee/ts_decomposer.rb +31 -0
data/test/attendee/ts_multiworder.rb +390 -0
data/test/attendee/ts_noneword_filter.rb +19 -0
data/test/attendee/ts_objectfilter.rb +19 -0
data/test/attendee/ts_sequencer.rb +43 -0
data/test/attendee/ts_synonymer.rb +33 -0
data/test/attendee/ts_textreader.rb +58 -0
data/test/attendee/ts_textwriter.rb +98 -0
data/test/attendee/ts_tokenizer.rb +32 -0
data/test/attendee/ts_variator.rb +24 -0
data/test/attendee/ts_vector_filter.rb +62 -0
data/test/attendee/ts_wordsearcher.rb +119 -0
data/test/lir.csv +3 -0
data/test/lir.txt +12 -0
data/test/lir2.txt +12 -0
data/test/mul.txt +1 -0
data/test/ref/artikel.mul +1 -0
data/test/ref/artikel.non +159 -0
data/test/ref/artikel.seq +270 -0
data/test/ref/artikel.syn +16 -0
data/test/ref/artikel.vec +928 -0
data/test/ref/artikel.ven +928 -0
data/test/ref/artikel.ver +928 -0
data/test/ref/lir.csv +328 -0
data/test/ref/lir.mul +1 -0
data/test/ref/lir.non +274 -0
data/test/ref/lir.seq +249 -0
data/test/ref/lir.syn +94 -0
data/test/test_helper.rb +113 -0
data/test/ts_database.rb +269 -0
data/test/ts_language.rb +396 -0
data/txt/artikel-en.txt +157 -0
data/txt/artikel.txt +170 -0
data/txt/lir.txt +1317 -0
metadata +211 -0

data/lib/lingo/modules.rb ADDED Viewed

@@ -0,0 +1,98 @@
+# encoding: utf-8
+#--
+# LINGO ist ein Indexierungssystem mit Grundformreduktion, Kompositumzerlegung,
+# Mehrworterkennung und Relationierung.
+#
+# Copyright (C) 2005-2007 John Vorhauer
+# Copyright (C) 2007-2011 John Vorhauer, Jens Wille
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+#
+# For more information visit http://www.lex-lingo.de or contact me at
+# welcomeATlex-lingoDOTde near 50°55'N+6°55'E.
+#
+# Lex Lingo rules from here on
+#++
+class Lingo
+  # Das Modul Reportable ermöglicht das setzen und hochzählen von statistischen Werten.
+  module Reportable
+    def init_reportable
+      @counters = Hash.new(0)
+      @prefix = ''
+    end
+    def report_prefix(prefix)
+      @prefix = prefix
+    end
+    def inc(counter)
+      @counters[counter] += 1
+    end
+    def add(counter, value)
+      @counters[counter] += value
+    end
+    def set(counter, value)
+      @counters[counter] = value
+    end
+    def get(counter)
+      @counters[counter]
+    end
+    def report
+      rep = Hash.new
+      @counters.each_pair { |stat, value|
+        name = (@prefix=='') ? stat : @prefix+': '+stat
+        rep[name] = value
+      }
+      rep
+    end
+  end
+  # Das Modul Cachable ermöglicht das Verwerten von zwischengespeicherten Ergebnisse
+  # für einen schnelleren Zugriff.
+  module Cachable
+    def init_cachable
+      @cache = Hash.new(false)
+    end
+    def hit?(key)
+      @cache.has_key?(key)
+    end
+    def store(key, value)
+      res = value.nil? ? nil : value.dup
+      @cache[key] = res
+      value
+    end
+    def retrieve(key)
+      value = @cache[key]
+      value.nil? ? nil : value.dup
+    end
+  end
+end

data/lib/lingo/types.rb ADDED Viewed

@@ -0,0 +1,285 @@
+# encoding: utf-8
+#--
+# LINGO ist ein Indexierungssystem mit Grundformreduktion, Kompositumzerlegung,
+# Mehrworterkennung und Relationierung.
+#
+# Copyright (C) 2005-2007 John Vorhauer
+# Copyright (C) 2007-2011 John Vorhauer, Jens Wille
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+#
+# For more information visit http://www.lex-lingo.de or contact me at
+# welcomeATlex-lingoDOTde near 50°55'N+6°55'E.
+#
+# Lex Lingo rules from here on
+#++
+class Lingo
+  # Die Klasse StringA ist die Basisklasse für weitere Klassen, die im Rahmen der
+  # Objektstruktur eines Wortes benötigt werden. Die Klasse stellt eine Zeichenkette bereit,
+  # die mit einem Attribut versehen werden kann.
+  class StringA
+    include Comparable
+    attr_accessor :form, :attr
+    def initialize(form, attr='-')
+      @form = form || ''
+      @attr = attr || ''
+    end
+    def <=>(other)
+      return 1 if other.nil?
+      if @form==other.form
+        @attr<=>other.attr
+      else
+        @form<=>other.form
+      end
+    end
+    def to_s
+      @form + '/' + @attr
+    end
+    def inspect
+      to_s
+    end
+    def hash
+      to_s.hash
+    end
+    def eql?(other)
+      self.class.equal?(other.class) && to_s == other.to_s
+    end
+    alias_method :==, :eql?
+  end
+  # Die Klasse Token, abgeleitet von der Klasse StringA, stellt den Container
+  # für ein einzelnes Wort eines Textes dar. Das Wort wird mit einem Attribut versehen,
+  # welches der Regel entspricht, die dieses Wort identifiziert hat.
+  #
+  # Steht z.B. in ruby.cfg eine Regel zur Erkennung einer Zahl, die mit NUM bezeichnet wird,
+  # so wird dies dem Token angeheftet, z.B. Token.new('100', 'NUM') -> #100/NUM#
+  class Token < StringA
+    def to_s
+      ':' + super + ':'
+    end
+  end
+  # Die Klasse Lexical, abgeleitet von der Klasse StringA, stellt den Container
+  # für eine Grundform eines Wortes bereit, welches mit der Wortklasse versehen ist.
+  #
+  # Wird z.B. aus dem Wörterbuch eine Grundform gelesen, so wird dies in Form eines
+  # Lexical-Objektes zurückgegeben, z.B. Lexical.new('Rennen', 'S') -> (rennen/s)
+  class Lexical < StringA
+    def <=>(other)
+      #v TODO: v1.5.1
+      return 1 unless other.is_a?(Lexical)
+      #v
+      if self.attr==other.attr
+        # gleiche attribute
+        self.form<=>other.form
+      else
+        case  # leeres attribut unterliegt
+          when self.attr==''   then  1
+          when other.attr==''  then  -1
+          else  # vergleich der attribute
+            ss = LA_SORTORDER.index(self.attr) || -1 # ' -weavsk'
+            os = LA_SORTORDER.index(other.attr) || -1
+            case
+              when ss==-1 && os==-1  # beides unpriviligierte attribute (und nicht gleich)
+                self.attr<=>other.attr
+              when ss==-1 && os>-1  then  1
+              when ss>-1 && os==-1  then  -1
+              when ss>-1 && os>-1      # beides priviligierte attribute (und nicht gleich)
+                os<=>ss
+            end
+        end
+      end
+    end
+    #v TODO: v1.5.1
+    def to_a
+      [@form, @attr]
+    end
+    def to_str
+      @form + '#' + @attr
+    end
+    #v
+    def to_s
+      '(' + super + ')'
+    end
+  end
+  # Die Klasse Word bündelt spezifische Eigenschaften eines Wortes mit den
+  # dazu notwendigen Methoden.
+  class Word < StringA
+    def self.new_lexical(form, attr, lex_attr)
+      new(form, attr) << Lexical.new(form, lex_attr)
+    end
+    # Exakte Representation der originären Zeichenkette, so wie sie im Satz
+    # gefunden wurde, z.B. <tt>form = "RubyLing"</tt>
+    #
+    # Ergebnis der Wörterbuch-Suche. Sie stellt die Grundform des Wortes dar.
+    # Dabei kann es mehrere mögliche Grundformen geben, z.B. kann +abgeschoben+
+    # als Grundform das _Adjektiv_ +abgeschoben+ sein, oder aber das _Verb_
+    # +abschieben+.
+    #
+    # <tt>lemma = [['abgeschoben', '#a'], ['abschieben', '#v']]</tt>.
+    #
+    # <b>Achtung: Lemma wird nicht durch die Word-Klasse bestückt, sondern extern
+    # durch die Klasse Dictionary</b>
+    def initialize(form, attr=WA_UNSET)
+      super
+      @lexicals = Array.new
+    end
+    def lexicals(compound_parts = true)
+      if !compound_parts && attr == WA_KOMPOSITUM
+        @lexicals.select { |lex| lex.attr == LA_KOMPOSITUM }
+      else
+        @lexicals
+      end
+    end
+    def lexicals=(lexis)
+      if lexis.is_a?(Array)
+        @lexicals = lexis.sort.uniq
+      else
+        raise 'Falscher Typ bei Zuweisung'
+      end
+    end
+    def attrs(compound_parts = true)
+      lexicals(compound_parts).map { |lex| lex.attr }
+    end
+    # für Compositum
+    def parts
+      1
+    end
+    def min_part_size
+      @form.size
+    end
+    # Gibt genau die Grundform der Wortklasse zurück, die der RegExp des Übergabe-Parameters
+    # entspricht, z.B. <tt>word.get_wc(/a/) = ['abgeschoben', '#a']</tt>
+    def get_class(wc_re)
+      wc_re = Regexp.new(wc_re) unless wc_re.is_a?(Regexp)
+      unless @lexicals.empty?
+        @lexicals.select { |lex| lex.attr =~ wc_re }
+      else
+        attr =~ wc_re ? [self] : []
+      end
+    end
+    def norm
+      if @attr == WA_IDENTIFIED
+        lexicals[0].form
+      else
+        @form
+      end
+    end
+    def compo_form
+      if @attr==WA_KOMPOSITUM
+        get_class(LA_KOMPOSITUM)[0]
+      else
+        nil
+      end
+    end
+    def unknown?
+      [WA_UNKNOWN, WA_UNKMULPART].include?(attr)
+    end
+    def <<(other)
+      case other
+        when Lexical  then @lexicals << other
+        when Array    then @lexicals += other
+      end
+      self
+    end
+    def <=>(other)
+      return 1 if other.nil?
+      if @form==other.form
+        if @attr==other.attr
+          @lexicals<=>other.lexicals
+        else
+          @attr<=>other.attr
+        end
+      else
+        @form<=>other.form
+      end
+    end
+    def to_s
+      s = '<' + @form
+      s << '|' + @attr unless @attr==WA_IDENTIFIED
+      s << ' = ' + @lexicals.inspect unless @lexicals.empty?
+      s << '>'
+    end
+  end
+  class AgendaItem
+    include Comparable
+    attr_reader :cmd, :param
+    def initialize(cmd, param='')
+      @cmd = cmd || ''
+      @param = param || ''
+    end
+    def <=>(other)
+      return 1 unless other.is_a?(AgendaItem)
+      if self.cmd==other.cmd
+        self.param<=>other.param
+      else
+        self.cmd<=>other.cmd
+      end
+    end
+    def inspect
+      "*#{cmd.upcase}('#{param}')"
+    end
+  end
+end

data/lib/lingo/utilities.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# encoding: utf-8
+#--
+# LINGO ist ein Indexierungssystem mit Grundformreduktion, Kompositumzerlegung,
+# Mehrworterkennung und Relationierung.
+#
+# Copyright (C) 2005-2007 John Vorhauer
+# Copyright (C) 2007-2011 John Vorhauer, Jens Wille
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+#
+# For more information visit http://www.lex-lingo.de or contact me at
+# welcomeATlex-lingoDOTde near 50°55'N+6°55'E.
+#
+# Lex Lingo rules from here on
+#++
+require 'unicode'
+class String
+  alias_method :_lingo_original_downcase, :downcase
+  def downcase
+    Unicode.downcase(self)
+  end
+end

data/lib/lingo/version.rb ADDED Viewed

@@ -0,0 +1,27 @@
+class Lingo
+  module Version
+    MAJOR = 1
+    MINOR = 8
+    TINY  = 0
+    class << self
+      # Returns array representation.
+      def to_a
+        [MAJOR, MINOR, TINY]
+      end
+      # Short-cut for version string.
+      def to_s
+        to_a.join('.')
+      end
+    end
+  end
+  VERSION = Version.to_s
+end