lingo 1.8.0 → 1.8.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +13 -0
- data/README +49 -29
- data/Rakefile +28 -4
- data/TODO +2 -9
- data/bin/lingo +24 -0
- data/bin/lingoctl +24 -0
- data/de/lingo-dic.txt +559 -74
- data/info/gpl-hdr.txt +21 -24
- data/lib/lingo.rb +83 -112
- data/lib/lingo/agenda_item.rb +53 -0
- data/lib/lingo/attendee.rb +261 -0
- data/lib/lingo/attendee/abbreviator.rb +95 -97
- data/lib/lingo/attendee/debugger.rb +94 -93
- data/lib/lingo/attendee/decomposer.rb +76 -83
- data/lib/lingo/attendee/dehyphenizer.rb +141 -144
- data/lib/lingo/attendee/formatter.rb +65 -0
- data/lib/lingo/attendee/multi_worder.rb +302 -0
- data/lib/lingo/attendee/noneword_filter.rb +89 -84
- data/lib/lingo/attendee/object_filter.rb +91 -0
- data/lib/lingo/attendee/sequencer.rb +159 -158
- data/lib/lingo/attendee/synonymer.rb +81 -84
- data/lib/lingo/attendee/text_reader.rb +242 -0
- data/lib/lingo/attendee/text_writer.rb +169 -0
- data/lib/lingo/attendee/tokenizer.rb +192 -191
- data/lib/lingo/attendee/variator.rb +152 -156
- data/lib/lingo/attendee/vector_filter.rb +140 -135
- data/lib/lingo/attendee/word_searcher.rb +98 -0
- data/lib/lingo/buffered_attendee.rb +69 -0
- data/lib/lingo/cachable.rb +58 -0
- data/lib/lingo/call.rb +72 -0
- data/lib/lingo/cli.rb +26 -0
- data/lib/lingo/config.rb +23 -26
- data/lib/lingo/core_ext.rb +42 -0
- data/lib/lingo/ctl.rb +239 -173
- data/lib/lingo/database.rb +148 -496
- data/lib/lingo/database/crypter.rb +85 -0
- data/lib/lingo/database/gdbm_store.rb +49 -0
- data/lib/lingo/database/hash_store.rb +67 -0
- data/lib/lingo/database/libcdb_store.rb +58 -0
- data/lib/lingo/database/sdbm_store.rb +64 -0
- data/lib/lingo/database/show_progress.rb +81 -0
- data/lib/lingo/database/source.rb +134 -0
- data/lib/lingo/database/source/key_value.rb +62 -0
- data/lib/lingo/database/source/multi_key.rb +65 -0
- data/lib/lingo/database/source/multi_value.rb +65 -0
- data/lib/lingo/database/source/single_word.rb +60 -0
- data/lib/lingo/database/source/word_class.rb +64 -0
- data/lib/lingo/error.rb +122 -0
- data/lib/lingo/language.rb +78 -518
- data/lib/lingo/language/dictionary.rb +173 -0
- data/lib/lingo/language/grammar.rb +211 -0
- data/lib/lingo/language/lexical.rb +66 -0
- data/lib/lingo/language/lexical_hash.rb +88 -0
- data/lib/lingo/language/token.rb +48 -0
- data/lib/lingo/language/word.rb +130 -0
- data/lib/lingo/language/word_form.rb +83 -0
- data/lib/lingo/reportable.rb +59 -0
- data/lib/lingo/version.rb +1 -1
- data/lingo-all.cfg +14 -10
- data/lingo-call.cfg +5 -5
- data/lingo.cfg +14 -12
- data/lingo.rb +26 -0
- data/lir.cfg +13 -9
- data/spec/spec_helper.rb +1 -0
- data/test.cfg +11 -11
- data/test/attendee/ts_abbreviator.rb +0 -6
- data/test/attendee/ts_decomposer.rb +0 -6
- data/test/attendee/{ts_multiworder.rb → ts_multi_worder.rb} +1 -7
- data/test/attendee/ts_noneword_filter.rb +1 -7
- data/test/attendee/{ts_objectfilter.rb → ts_object_filter.rb} +1 -7
- data/test/attendee/ts_sequencer.rb +0 -6
- data/test/attendee/ts_synonymer.rb +0 -6
- data/test/attendee/{ts_textreader.rb → ts_text_reader.rb} +1 -7
- data/test/attendee/{ts_textwriter.rb → ts_text_writer.rb} +1 -7
- data/test/attendee/ts_tokenizer.rb +0 -6
- data/test/attendee/ts_variator.rb +0 -6
- data/test/attendee/ts_vector_filter.rb +1 -7
- data/test/attendee/{ts_wordsearcher.rb → ts_word_searcher.rb} +1 -7
- data/test/ref/artikel.non +2 -29
- data/test/ref/artikel.seq +13 -8
- data/test/ref/artikel.vec +30 -15
- data/test/ref/artikel.ven +29 -14
- data/test/ref/artikel.ver +58 -43
- data/test/ref/lir.csv +146 -145
- data/test/ref/lir.non +186 -210
- data/test/ref/lir.seq +54 -50
- data/test/test_helper.rb +41 -36
- data/test/ts_database.rb +12 -11
- data/test/ts_language.rb +118 -68
- metadata +67 -29
- data/lib/lingo/attendee/multiworder.rb +0 -301
- data/lib/lingo/attendee/objectfilter.rb +0 -86
- data/lib/lingo/attendee/textreader.rb +0 -237
- data/lib/lingo/attendee/textwriter.rb +0 -196
- data/lib/lingo/attendee/wordsearcher.rb +0 -96
- data/lib/lingo/attendees.rb +0 -289
- data/lib/lingo/const.rb +0 -131
- data/lib/lingo/modules.rb +0 -98
- data/lib/lingo/types.rb +0 -285
- data/lib/lingo/utilities.rb +0 -40
@@ -0,0 +1,173 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
module Language
|
30
|
+
|
31
|
+
class Dictionary
|
32
|
+
|
33
|
+
include Cachable
|
34
|
+
include Reportable
|
35
|
+
|
36
|
+
def initialize(config, lingo)
|
37
|
+
unless config.has_key?('source')
|
38
|
+
raise ArgumentError, 'Required parameter `source\' missing.'
|
39
|
+
end
|
40
|
+
|
41
|
+
init_cachable
|
42
|
+
init_reportable
|
43
|
+
|
44
|
+
@suffixes, @infixes = [], []
|
45
|
+
|
46
|
+
if suffix = lingo.dictionary_config['suffix']
|
47
|
+
suffix.each { |t, s|
|
48
|
+
t.downcase!
|
49
|
+
|
50
|
+
s.split.each { |suf|
|
51
|
+
su, ex = suf.split('/')
|
52
|
+
|
53
|
+
(t == 'f' ? @infixes : @suffixes) << [
|
54
|
+
Regexp.new(su << '$', 'i'), ex || '*', t
|
55
|
+
]
|
56
|
+
}
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
@sources = config['source'].map { |src| lingo.lexical_hash(src) }
|
61
|
+
@all_sources = config['mode'].nil? || config['mode'].downcase == 'all'
|
62
|
+
|
63
|
+
lingo.dictionaries << self
|
64
|
+
end
|
65
|
+
|
66
|
+
def close
|
67
|
+
@sources.each(&:close)
|
68
|
+
end
|
69
|
+
|
70
|
+
def report
|
71
|
+
super.tap { |rep| @sources.each { |src| rep.update(src.report) } }
|
72
|
+
end
|
73
|
+
|
74
|
+
# _dic_.find_word( _aString_ ) -> _aNewWord_
|
75
|
+
#
|
76
|
+
# Erstellt aus dem String ein Wort und sucht nach diesem im Wörterbuch.
|
77
|
+
def find_word(str)
|
78
|
+
if hit?(key = str.downcase)
|
79
|
+
inc('cache hits')
|
80
|
+
return retrieve(key).tap { |word| word.form = str }
|
81
|
+
end
|
82
|
+
|
83
|
+
word = Word.new(str, WA_UNKNOWN)
|
84
|
+
|
85
|
+
unless (lexicals = select_with_suffix(str)).empty?
|
86
|
+
word.lexicals = lexicals
|
87
|
+
word.attr = WA_IDENTIFIED
|
88
|
+
end
|
89
|
+
|
90
|
+
store(key, word)
|
91
|
+
end
|
92
|
+
|
93
|
+
def find_synonyms(obj)
|
94
|
+
lex = obj.lexicals
|
95
|
+
lex = [obj] if lex.empty? && obj.unknown?
|
96
|
+
|
97
|
+
# multiworder optimization
|
98
|
+
ref = %r{\A#{Regexp.escape(Database::KEY_REF)}\d+}o
|
99
|
+
|
100
|
+
lex.each_with_object([]) { |l, s|
|
101
|
+
next if l.attr == LA_SYNONYM
|
102
|
+
next if l.attr != LA_KOMPOSITUM && obj.attr == WA_KOMPOSITUM
|
103
|
+
|
104
|
+
select(l.form).each { |y| s << y unless y =~ ref }
|
105
|
+
}
|
106
|
+
end
|
107
|
+
|
108
|
+
# _dic_.select( _aString_ ) -> _ArrayOfLexicals_
|
109
|
+
#
|
110
|
+
# Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+)
|
111
|
+
def select(str)
|
112
|
+
@sources.each_with_object([]) { |src, lex|
|
113
|
+
l = src[str] or next
|
114
|
+
lex.concat(l)
|
115
|
+
break lex unless @all_sources
|
116
|
+
}.tap { |lex| lex.sort!; lex.uniq! }
|
117
|
+
end
|
118
|
+
|
119
|
+
# _dic_.select_with_suffix( _aString_ ) -> _ArrayOfLexicals_
|
120
|
+
#
|
121
|
+
# Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+).
|
122
|
+
# Sucht dabei auch Wörter, die um wortklassenspezifische Suffixe bereinigt wurden.
|
123
|
+
def select_with_suffix(str)
|
124
|
+
select_with_affix(:suffix, str)
|
125
|
+
end
|
126
|
+
|
127
|
+
# _dic_.select_with_infix( _aString_ ) -> _ArrayOfLexicals_
|
128
|
+
#
|
129
|
+
# Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+).
|
130
|
+
# Sucht dabei auch Wörter, die eine Fugung am Ende haben.
|
131
|
+
def select_with_infix(str)
|
132
|
+
select_with_affix(:infix, str)
|
133
|
+
end
|
134
|
+
|
135
|
+
# _dic_.suffix_lexicals( _aString_ ) -> _ArrayOfLexicals_
|
136
|
+
#
|
137
|
+
# Gibt alle möglichen Lexicals zurück, die von der Endung her auf den String anwendbar sind:
|
138
|
+
#
|
139
|
+
# dic.suffix_lexicals("Hasens") -> [(hasen/s), (hasen/e), (has/e)]
|
140
|
+
def suffix_lexicals(str)
|
141
|
+
affix_lexicals(:suffix, str)
|
142
|
+
end
|
143
|
+
|
144
|
+
# _dic_.gap_lexicals( _aString_ ) -> _ArrayOfLexicals_
|
145
|
+
#
|
146
|
+
# Gibt alle möglichen Lexicals zurück, die von der Endung her auf den String anwendbar sind:
|
147
|
+
def infix_lexicals(str)
|
148
|
+
affix_lexicals(:infix, str)
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def select_with_affix(affix, str)
|
154
|
+
select(str).tap { |l|
|
155
|
+
if l.empty?
|
156
|
+
affix_lexicals(affix, str).each { |a| select(a.form).each { |b|
|
157
|
+
l << b if affix != :suffix || a.attr == b.attr
|
158
|
+
} }
|
159
|
+
end
|
160
|
+
}
|
161
|
+
end
|
162
|
+
|
163
|
+
def affix_lexicals(affix, str)
|
164
|
+
instance_variable_get("@#{affix}es").each_with_object([]) { |(r, e, t), l|
|
165
|
+
l << Lexical.new("#{$`}#{e == '*' ? '' : e}#{$'}", t) if str =~ r
|
166
|
+
}
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
module Language
|
30
|
+
|
31
|
+
# Die Klasse Grammar beinhaltet grammatikalische Spezialitäten einer Sprache. Derzeit findet die
|
32
|
+
# Kompositumerkennung hier ihren Platz, die mit der Methode find_compositum aufgerufen werden kann.
|
33
|
+
# Die Klasse Grammar wird genau wie ein Dictionary initialisiert. Das bei der Initialisierung angegebene Wörterbuch ist Grundlage
|
34
|
+
# für die Erkennung der Kompositumteile.
|
35
|
+
|
36
|
+
class Grammar
|
37
|
+
|
38
|
+
include Cachable
|
39
|
+
include Reportable
|
40
|
+
|
41
|
+
HYPHEN_RE = %r{\A(.+)-([^-]+)\z}
|
42
|
+
|
43
|
+
# initialize(config, dictionary_config) -> _Grammar_
|
44
|
+
# config = Attendee-spezifische Parameter
|
45
|
+
# dictionary_config = Datenbankkonfiguration aus de.lang
|
46
|
+
def initialize(config, lingo)
|
47
|
+
init_cachable
|
48
|
+
init_reportable
|
49
|
+
|
50
|
+
@dic, @suggestions = Dictionary.new(config, lingo), []
|
51
|
+
|
52
|
+
cfg = lingo.dictionary_config['compositum']
|
53
|
+
|
54
|
+
# Ein Wort muss mindestens 8 Zeichen lang sein, damit
|
55
|
+
# überhaupt eine Prüfung stattfindet.
|
56
|
+
@min_word_size = (cfg['min-word-size'] || 8).to_i
|
57
|
+
|
58
|
+
# Die durchschnittliche Länge der Kompositum-Wortteile
|
59
|
+
# muss mindestens 4 Zeichen lang sein, sonst ist es kein
|
60
|
+
# gültiges Kompositum.
|
61
|
+
@min_avg_part_size = (cfg['min-avg-part-size'] || 4).to_i
|
62
|
+
|
63
|
+
# Der kürzeste Kompositum-Wortteil muss mindestens 1 Zeichen lang sein
|
64
|
+
@min_part_size = (cfg['min-part-size'] || 1).to_i
|
65
|
+
|
66
|
+
# Ein Kompositum darf aus höchstens 4 Wortteilen bestehen
|
67
|
+
@max_parts = (cfg['max-parts'] || 4).to_i
|
68
|
+
|
69
|
+
# Die Wortklasse eines Kompositum-Wortteils kann separat gekennzeichnet
|
70
|
+
# werden, um sie von Wortklassen normaler Wörter unterscheiden zu
|
71
|
+
# können z.B. Hausmeister => ['haus/s', 'meister/s'] oder Hausmeister
|
72
|
+
# => ['haus/s+', 'meister/s+'] mit append-wordclass = '+'
|
73
|
+
@append_wc = cfg.fetch('append-wordclass', '')
|
74
|
+
|
75
|
+
# Bestimmte Sequenzen können als ungültige Komposita erkannt werden,
|
76
|
+
# z.B. ist ein Kompositum aus zwei Adjetiven kein Kompositum, also
|
77
|
+
# skip-sequence = 'aa'
|
78
|
+
@sequences = cfg.fetch('skip-sequences', []).map(&:downcase)
|
79
|
+
end
|
80
|
+
|
81
|
+
def close
|
82
|
+
@dic.close
|
83
|
+
end
|
84
|
+
|
85
|
+
def report
|
86
|
+
super.update(@dic.report)
|
87
|
+
end
|
88
|
+
|
89
|
+
# find_compositum(str) -> word wenn level=1
|
90
|
+
# find_compositum(str) -> [lex, sta] wenn level!=1
|
91
|
+
#
|
92
|
+
# find_compositum arbeitet in verschiedenen Leveln, da die Methode auch rekursiv aufgerufen wird. Ein Level größer 1
|
93
|
+
# entspricht daher einem rekursiven Aufruf
|
94
|
+
def find_compositum(str, level = 1, tail = false)
|
95
|
+
key, top, empty = str.downcase, level == 1, [[], [], '']
|
96
|
+
|
97
|
+
if top && hit?(key)
|
98
|
+
inc('cache hits')
|
99
|
+
return retrieve(key)
|
100
|
+
end
|
101
|
+
|
102
|
+
com = Word.new(str, WA_UNKNOWN)
|
103
|
+
|
104
|
+
unless str.length > @min_word_size
|
105
|
+
inc('String zu kurz')
|
106
|
+
return top ? com : empty
|
107
|
+
end
|
108
|
+
|
109
|
+
inc('Komposita geprüft')
|
110
|
+
|
111
|
+
res = permute_compositum(key, level, tail)
|
112
|
+
val = !(lex = res.first).empty? && valid?(str, *res[1..-1])
|
113
|
+
|
114
|
+
if top
|
115
|
+
if val
|
116
|
+
inc('Komposita erkannt')
|
117
|
+
|
118
|
+
com.attr = WA_KOMPOSITUM
|
119
|
+
com.lexicals = lex.map { |l|
|
120
|
+
l.attr == LA_KOMPOSITUM ? l :
|
121
|
+
Lexical.new(l.form, l.attr + @append_wc)
|
122
|
+
}
|
123
|
+
end
|
124
|
+
|
125
|
+
store(key, com)
|
126
|
+
else
|
127
|
+
val ? res : empty
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# permute_compositum( _aString_ ) -> [lex, sta, seq]
|
132
|
+
def permute_compositum(str, level, tail)
|
133
|
+
return test_compositum($1, '-', $2, level, tail) if str =~ HYPHEN_RE
|
134
|
+
|
135
|
+
sug, len = @suggestions[level] ||= [], str.length
|
136
|
+
|
137
|
+
1.upto(len - 1) { |i|
|
138
|
+
res = test_compositum(str[0, i], '', str[i, len], level, tail)
|
139
|
+
|
140
|
+
unless (lex = res.first).empty?
|
141
|
+
return res unless lex.last.attr == LA_TAKEITASIS
|
142
|
+
sug << res
|
143
|
+
end
|
144
|
+
}
|
145
|
+
|
146
|
+
sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear }
|
147
|
+
end
|
148
|
+
|
149
|
+
# test_compositum() -> [lex, sta, seq]
|
150
|
+
#
|
151
|
+
# Testet einen definiert zerlegten String auf Kompositum
|
152
|
+
def test_compositum(fstr, infix, bstr, level, tail)
|
153
|
+
sta, seq, empty = [fstr.length, bstr.length], %w[? ?], [[], [], '']
|
154
|
+
|
155
|
+
if !(blex = @dic.select_with_suffix(bstr)).sort!.empty?
|
156
|
+
# 1. Word w/ suffix
|
157
|
+
bform, seq[1] = tail ? bstr : blex.first.form, blex.first.attr
|
158
|
+
elsif tail && !(blex = @dic.select_with_infix(bstr)).sort!.empty?
|
159
|
+
# 2. Word w/ infix, unless tail part
|
160
|
+
bform, seq[1] = bstr, blex.first.attr
|
161
|
+
elsif infix == '-'
|
162
|
+
blex, bsta, bseq = find_compositum(bstr, level + 1, tail)
|
163
|
+
|
164
|
+
if !blex.sort!.empty?
|
165
|
+
# 3. Compositum
|
166
|
+
bform, seq[1], sta[1..-1] = blex.first.form, bseq, bsta
|
167
|
+
else
|
168
|
+
# 4. Take it as is
|
169
|
+
blex = [Lexical.new(bform = bstr, seq[1] = LA_TAKEITASIS)]
|
170
|
+
end
|
171
|
+
else
|
172
|
+
return empty
|
173
|
+
end
|
174
|
+
|
175
|
+
if !(flex = @dic.select_with_infix(fstr)).sort!.empty?
|
176
|
+
# 1. Word w/ infix
|
177
|
+
fform, seq[0] = fstr, flex.first.attr
|
178
|
+
else
|
179
|
+
flex, fsta, fseq = find_compositum(fstr, level + 1, true)
|
180
|
+
|
181
|
+
if !flex.sort!.empty?
|
182
|
+
# 2. Compositum
|
183
|
+
fform, seq[0], sta[0..0] = flex.first.form, fseq, fsta
|
184
|
+
elsif infix == '-'
|
185
|
+
# 3. Take it as is
|
186
|
+
flex = [Lexical.new(fform = fstr, seq[0] = LA_TAKEITASIS)]
|
187
|
+
else
|
188
|
+
return empty
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
flex.concat(blex).delete_if { |l| l.attr == LA_KOMPOSITUM }.
|
193
|
+
push(Lexical.new(fform + infix + bform, LA_KOMPOSITUM)).sort!
|
194
|
+
|
195
|
+
[flex, sta, seq.join]
|
196
|
+
end
|
197
|
+
|
198
|
+
private
|
199
|
+
|
200
|
+
def valid?(str, sta, seq)
|
201
|
+
sta.size <= @max_parts &&
|
202
|
+
sta.sort.first >= @min_part_size &&
|
203
|
+
str.length / sta.size >= @min_avg_part_size &&
|
204
|
+
(@sequences.empty? || !@sequences.include?(seq))
|
205
|
+
end
|
206
|
+
|
207
|
+
end
|
208
|
+
|
209
|
+
end
|
210
|
+
|
211
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
module Language
|
30
|
+
|
31
|
+
# Die Klasse Lexical, abgeleitet von der Klasse WordForm, stellt den Container
|
32
|
+
# für eine Grundform eines Wortes bereit, welches mit der Wortklasse versehen ist.
|
33
|
+
#
|
34
|
+
# Wird z.B. aus dem Wörterbuch eine Grundform gelesen, so wird dies in Form eines
|
35
|
+
# Lexical-Objektes zurückgegeben, z.B. Lexical.new('Rennen', 'S') -> (rennen/s)
|
36
|
+
|
37
|
+
class Lexical < WordForm
|
38
|
+
|
39
|
+
def <=>(other)
|
40
|
+
return 1 unless other.is_a?(self.class)
|
41
|
+
|
42
|
+
if attr == other.attr
|
43
|
+
form <=> other.form
|
44
|
+
else
|
45
|
+
attr.empty? ? 1 : other.attr.empty? ? -1 : begin
|
46
|
+
a = LA_SORTORDER.index(attr)
|
47
|
+
b = LA_SORTORDER.index(other.attr)
|
48
|
+
|
49
|
+
a ? b ? b <=> a : -1 : b ? 1 : attr <=> other.attr
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def to_str
|
55
|
+
to_a.join('#')
|
56
|
+
end
|
57
|
+
|
58
|
+
def to_s
|
59
|
+
"(#{super})"
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|