lingo 1.8.2 → 1.8.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +33 -0
- data/README +6 -5
- data/Rakefile +6 -4
- data/{lib/lingo/cachable.rb → bin/lingosrv} +30 -58
- data/bin/lingoweb +30 -0
- data/de.lang +2 -13
- data/en/lingo-irr.txt +266 -0
- data/en/lingo-wdn.txt +37319 -0
- data/en.lang +2 -15
- data/lib/lingo/app.rb +82 -0
- data/lib/lingo/attendee/abbreviator.rb +22 -26
- data/lib/lingo/attendee/debugger.rb +8 -4
- data/lib/lingo/attendee/decomposer.rb +0 -1
- data/lib/lingo/attendee/dehyphenizer.rb +2 -2
- data/lib/lingo/attendee/multi_worder.rb +20 -13
- data/lib/lingo/attendee/noneword_filter.rb +2 -7
- data/lib/lingo/attendee/sequencer.rb +43 -19
- data/lib/lingo/attendee/stemmer/porter.rb +2 -2
- data/lib/lingo/attendee/stemmer.rb +1 -1
- data/lib/lingo/attendee/synonymer.rb +1 -9
- data/lib/lingo/attendee/text_reader.rb +42 -29
- data/lib/lingo/attendee/text_writer.rb +3 -6
- data/lib/lingo/attendee/tokenizer.rb +87 -69
- data/lib/lingo/attendee/variator.rb +7 -5
- data/lib/lingo/attendee/vector_filter.rb +11 -11
- data/lib/lingo/attendee/word_searcher.rb +1 -9
- data/lib/lingo/attendee.rb +24 -105
- data/lib/lingo/buffered_attendee.rb +2 -9
- data/lib/lingo/call.rb +18 -13
- data/lib/lingo/cli.rb +5 -10
- data/lib/lingo/config.rb +40 -7
- data/lib/lingo/ctl.rb +69 -57
- data/lib/lingo/database/hash_store.rb +9 -4
- data/lib/lingo/database/sdbm_store.rb +4 -7
- data/lib/lingo/database/source/multi_key.rb +1 -1
- data/lib/lingo/database/source/multi_value.rb +1 -1
- data/lib/lingo/database/source.rb +2 -20
- data/lib/lingo/database.rb +30 -19
- data/lib/lingo/debug.rb +79 -0
- data/lib/lingo/{core_ext.rb → language/char.rb} +43 -42
- data/lib/lingo/language/dictionary.rb +38 -46
- data/lib/lingo/language/grammar.rb +40 -57
- data/lib/lingo/language/lexical.rb +4 -7
- data/lib/lingo/language/lexical_hash.rb +17 -35
- data/lib/lingo/language/token.rb +4 -0
- data/lib/lingo/language/word.rb +7 -8
- data/lib/lingo/language/word_form.rb +4 -4
- data/lib/lingo/language.rb +2 -1
- data/lib/lingo/srv/config.ru +4 -0
- data/lib/lingo/srv/lingosrv.cfg +14 -0
- data/lib/lingo/{reportable.rb → srv.rb} +59 -61
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web/config.ru +4 -0
- data/lib/lingo/web/lingoweb.cfg +14 -0
- data/lib/lingo/web/public/lingo.png +0 -0
- data/lib/lingo/web/public/lingoweb.css +74 -0
- data/lib/lingo/web/views/index.erb +92 -0
- data/lib/lingo/web.rb +94 -0
- data/lib/lingo.rb +27 -29
- data/lingo.cfg +1 -1
- data/lir.cfg +24 -0
- data/ru/lingo-dic.txt +22342 -0
- data/ru/lingo-mul.txt +5151 -0
- data/ru/lingo-syn.txt +0 -0
- data/ru.lang +99 -0
- data/test/attendee/ts_sequencer.rb +2 -2
- data/test/attendee/ts_text_reader.rb +36 -2
- data/test/attendee/ts_text_writer.rb +6 -6
- data/test/lir.vec +3 -3
- data/test/test_helper.rb +104 -102
- data/test/ts_database.rb +1 -1
- data/test/ts_language.rb +55 -96
- data/txt/artikel-ru.txt +45 -0
- data/txt/lir.txt +1 -3
- metadata +143 -83
- data/TODO +0 -23
@@ -1,42 +1,43 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
#--
|
4
|
-
###############################################################################
|
5
|
-
# #
|
6
|
-
# Lingo -- A full-featured automatic indexing system #
|
7
|
-
# #
|
8
|
-
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
-
# #
|
11
|
-
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
-
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
-
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
-
# any later version. #
|
15
|
-
# #
|
16
|
-
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
-
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
-
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
-
# more details. #
|
20
|
-
# #
|
21
|
-
# You should have received a copy of the GNU Affero General Public License #
|
22
|
-
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
-
# #
|
24
|
-
###############################################################################
|
25
|
-
#++
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
module Language
|
30
|
+
|
31
|
+
module Char
|
32
|
+
|
33
|
+
ANY = [
|
34
|
+
CHAR = '[[:alpha:]]',
|
35
|
+
DIGIT = '[[:digit:]]',
|
36
|
+
LEGAL = '[ /&()\[\].,\'<>-]'
|
37
|
+
].join('|')
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -30,8 +30,7 @@ class Lingo
|
|
30
30
|
|
31
31
|
class Dictionary
|
32
32
|
|
33
|
-
|
34
|
-
include Reportable
|
33
|
+
KEY_REF_RE = %r{\A#{Database::KEY_REF_ESC}\d+}
|
35
34
|
|
36
35
|
def self.open(*args)
|
37
36
|
yield dictionary = new(*args)
|
@@ -41,12 +40,9 @@ class Lingo
|
|
41
40
|
|
42
41
|
def initialize(config, lingo)
|
43
42
|
unless config.has_key?('source')
|
44
|
-
raise ArgumentError,
|
43
|
+
raise ArgumentError, "Required parameter `source' missing."
|
45
44
|
end
|
46
45
|
|
47
|
-
init_cachable
|
48
|
-
init_reportable
|
49
|
-
|
50
46
|
@suffixes, @infixes = [], []
|
51
47
|
|
52
48
|
Array(lingo.dictionary_config['suffix']).each { |t, s|
|
@@ -67,56 +63,49 @@ class Lingo
|
|
67
63
|
end
|
68
64
|
|
69
65
|
def close
|
70
|
-
@src.each
|
71
|
-
end
|
72
|
-
|
73
|
-
def report
|
74
|
-
super.tap { |rep| @src.each { |src| rep.update(src.report) } }
|
66
|
+
@src.each { |i| i.close }
|
75
67
|
end
|
76
68
|
|
77
69
|
# _dic_.find_word( _aString_ ) -> _aNewWord_
|
78
70
|
#
|
79
71
|
# Erstellt aus dem String ein Wort und sucht nach diesem im Wörterbuch.
|
80
72
|
def find_word(str)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
unless (lexicals = select_with_suffix(str)).empty?
|
89
|
-
word.lexicals = lexicals
|
90
|
-
word.attr = WA_IDENTIFIED
|
91
|
-
end
|
92
|
-
|
93
|
-
store(key, word)
|
73
|
+
(@_word ||= {})[str] ||= Word.new(str, WA_UNKNOWN).tap { |w|
|
74
|
+
unless (lexicals = select_with_suffix(str)).empty?
|
75
|
+
w.lexicals = lexicals
|
76
|
+
w.attr = WA_IDENTIFIED
|
77
|
+
end
|
78
|
+
}
|
94
79
|
end
|
95
80
|
|
96
|
-
def find_synonyms(obj)
|
81
|
+
def find_synonyms(obj, syn = [])
|
97
82
|
lex = obj.lexicals
|
98
83
|
lex = [obj] if lex.empty? && obj.unknown?
|
99
84
|
|
100
|
-
|
101
|
-
ref = %r{\A#{Database::KEY_REF_ESC}\d+}
|
85
|
+
com, ref = obj.attr == WA_COMPOUND, KEY_REF_RE
|
102
86
|
|
103
|
-
lex.
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
select(l.form).each { |y| s << y unless y =~ ref }
|
87
|
+
lex.each { |l|
|
88
|
+
select(l.form, syn) { |i| i =~ ref } unless com &&
|
89
|
+
l.attr != LA_COMPOUND || l.attr == LA_SYNONYM
|
108
90
|
}
|
91
|
+
|
92
|
+
syn
|
109
93
|
end
|
110
94
|
|
111
95
|
# _dic_.select( _aString_ ) -> _ArrayOfLexicals_
|
112
96
|
#
|
113
97
|
# Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+)
|
114
|
-
def select(str)
|
115
|
-
@src.
|
98
|
+
def select(str, lex = [])
|
99
|
+
@src.each { |src|
|
116
100
|
l = src[str] or next
|
117
|
-
lex.concat(l)
|
118
|
-
break
|
119
|
-
}
|
101
|
+
lex.concat(block_given? ? l.delete_if { |i| yield i } : l)
|
102
|
+
break unless @all
|
103
|
+
}
|
104
|
+
|
105
|
+
lex.sort!
|
106
|
+
lex.uniq!
|
107
|
+
|
108
|
+
lex
|
120
109
|
end
|
121
110
|
|
122
111
|
# _dic_.select_with_suffix( _aString_ ) -> _ArrayOfLexicals_
|
@@ -154,19 +143,22 @@ class Lingo
|
|
154
143
|
private
|
155
144
|
|
156
145
|
def select_with_affix(affix, str)
|
157
|
-
select(str)
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
146
|
+
lex = select(str)
|
147
|
+
|
148
|
+
affix_lexicals(affix, str).each { |a| select(a.form, lex) { |b|
|
149
|
+
affix == :suffix && a.attr != b.attr
|
150
|
+
} } if lex.empty?
|
151
|
+
|
152
|
+
lex
|
164
153
|
end
|
165
154
|
|
166
155
|
def affix_lexicals(affix, str)
|
167
|
-
instance_variable_get("@#{affix}es").
|
168
|
-
|
156
|
+
lex = instance_variable_get("@#{affix}es").map { |r, e, t|
|
157
|
+
Lexical.new("#{$`}#{e == '*' ? '' : e}#{$'}", t) if str =~ r
|
169
158
|
}
|
159
|
+
|
160
|
+
lex.compact!
|
161
|
+
lex
|
170
162
|
end
|
171
163
|
|
172
164
|
end
|
@@ -35,9 +35,6 @@ class Lingo
|
|
35
35
|
|
36
36
|
class Grammar
|
37
37
|
|
38
|
-
include Cachable
|
39
|
-
include Reportable
|
40
|
-
|
41
38
|
HYPHEN_RE = %r{\A(.+)-([^-]+)\z}
|
42
39
|
|
43
40
|
def self.open(*args)
|
@@ -47,11 +44,10 @@ class Lingo
|
|
47
44
|
end
|
48
45
|
|
49
46
|
def initialize(config, lingo)
|
50
|
-
init_cachable
|
51
|
-
init_reportable
|
52
|
-
|
53
47
|
@dic, @suggestions = Dictionary.new(config, lingo), []
|
54
48
|
|
49
|
+
lingo.deprecate(:compositum, :compound, self) if lingo.dictionary_config.has_key?('compositum')
|
50
|
+
|
55
51
|
cfg = lingo.dictionary_config['compound'] ||
|
56
52
|
lingo.dictionary_config['compositum'] # DEPRECATE compositum
|
57
53
|
|
@@ -70,80 +66,63 @@ class Lingo
|
|
70
66
|
# Bestimmte Sequenzen können als ungültige Komposita erkannt werden,
|
71
67
|
# z.B. ist ein Kompositum aus zwei Adjetiven kein Kompositum, also
|
72
68
|
# skip-sequence = 'aa'
|
73
|
-
@sequences = cfg.fetch('skip-sequences', []).map!
|
69
|
+
@sequences = cfg.fetch('skip-sequences', []).map! { |i| i.downcase }
|
74
70
|
end
|
75
71
|
|
76
72
|
def close
|
77
73
|
@dic.close
|
78
74
|
end
|
79
75
|
|
80
|
-
def report
|
81
|
-
super.update(@dic.report)
|
82
|
-
end
|
83
|
-
|
84
76
|
# find_compound(str) -> word wenn level=1
|
85
77
|
# find_compound(str) -> [lex, sta] wenn level!=1
|
86
78
|
#
|
87
79
|
# find_compound arbeitet in verschiedenen Leveln, da die Methode auch rekursiv aufgerufen wird. Ein Level größer 1
|
88
80
|
# entspricht daher einem rekursiven Aufruf
|
89
81
|
def find_compound(str, level = 1, tail = false)
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
82
|
+
return permute_compound([[], [], ''], str, level, tail) if level != 1
|
83
|
+
|
84
|
+
(@_compound ||= {})[str] ||= permute_compound(
|
85
|
+
com = Word.new(str, WA_UNKNOWN), str, level, tail
|
86
|
+
) { |lex|
|
87
|
+
com.attr = WA_COMPOUND
|
88
|
+
com.lexicals = lex.each { |l|
|
89
|
+
l.attr += @append_wc unless l.attr == LA_COMPOUND
|
90
|
+
}
|
91
|
+
}
|
92
|
+
end
|
98
93
|
|
99
|
-
|
100
|
-
inc('String zu kurz')
|
101
|
-
return top ? com : empty
|
102
|
-
end
|
94
|
+
private
|
103
95
|
|
104
|
-
|
96
|
+
def permute_compound(ret, str, level, tail)
|
97
|
+
if (len = str.length) > @min_word_size
|
98
|
+
str = Unicode.downcase(str)
|
105
99
|
|
106
|
-
|
100
|
+
lex, sta, seq = res = if str =~ HYPHEN_RE
|
101
|
+
test_compound($1, '-', $2, level, tail)
|
102
|
+
else
|
103
|
+
sug = @suggestions[level] ||= []
|
107
104
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
str.length / sta.size >= @min_avg_part_size &&
|
112
|
-
(@sequences.empty? || !@sequences.include?(seq))
|
105
|
+
catch(:res) {
|
106
|
+
1.upto(len - 1) { |i|
|
107
|
+
tst = test_compound(str[0, i], '', str[i, len], level, tail)
|
113
108
|
|
114
|
-
|
115
|
-
|
116
|
-
|
109
|
+
unless (lex = tst.first).empty?
|
110
|
+
lex.last.attr == LA_TAKEITASIS ? sug << tst : throw(:res, tst)
|
111
|
+
end
|
112
|
+
}
|
117
113
|
|
118
|
-
|
119
|
-
com.lexicals = lex.map { |l|
|
120
|
-
l.attr == LA_COMPOUND ? l :
|
121
|
-
Lexical.new(l.form, l.attr + @append_wc)
|
114
|
+
sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear }
|
122
115
|
}
|
123
116
|
end
|
124
117
|
|
125
|
-
|
126
|
-
|
127
|
-
|
118
|
+
block_given? ? yield(lex) : ret = res if !lex.empty? &&
|
119
|
+
sta.size <= @max_parts &&
|
120
|
+
sta.min >= @min_part_size &&
|
121
|
+
str.length / sta.size >= @min_avg_part_size &&
|
122
|
+
(@sequences.empty? || !@sequences.include?(seq))
|
128
123
|
end
|
129
|
-
end
|
130
|
-
|
131
|
-
# permute_compound( _aString_ ) -> [lex, sta, seq]
|
132
|
-
def permute_compound(str, level = 1, tail = false)
|
133
|
-
return test_compound($1, '-', $2, level, tail) if str =~ HYPHEN_RE
|
134
|
-
|
135
|
-
sug, len = @suggestions[level] ||= [], str.length
|
136
|
-
|
137
|
-
1.upto(len - 1) { |i|
|
138
|
-
res = test_compound(str[0, i], '', str[i, len], level, tail)
|
139
124
|
|
140
|
-
|
141
|
-
return res unless lex.last.attr == LA_TAKEITASIS
|
142
|
-
sug << res
|
143
|
-
end
|
144
|
-
}
|
145
|
-
|
146
|
-
sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear }
|
125
|
+
ret
|
147
126
|
end
|
148
127
|
|
149
128
|
# test_compound() -> [lex, sta, seq]
|
@@ -189,6 +168,10 @@ class Lingo
|
|
189
168
|
end
|
190
169
|
end
|
191
170
|
|
171
|
+
{ flex => fform, blex => bform }.each { |a, f|
|
172
|
+
a.each { |l| l.src ||= f }
|
173
|
+
}
|
174
|
+
|
192
175
|
flex.concat(blex).delete_if { |l| l.attr == LA_COMPOUND }.
|
193
176
|
push(Lexical.new(fform + infix + bform, LA_COMPOUND)).sort!
|
194
177
|
|
@@ -40,14 +40,11 @@ class Lingo
|
|
40
40
|
return 1 unless other.is_a?(self.class)
|
41
41
|
|
42
42
|
a1, a2 = attr, other.attr
|
43
|
+
return form <=> other.form if a1 == a2
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
a1.empty? ? 1 : a2.empty? ? -1 : begin
|
48
|
-
i1, i2 = [a1, a2].map(&LA_SORTORDER.method(:index))
|
49
|
-
i1 ? i2 ? i2 <=> i1 : -1 : i2 ? 1 : a1 <=> a2
|
50
|
-
end
|
45
|
+
a1.empty? ? 1 : a2.empty? ? -1 : begin
|
46
|
+
i1, i2 = LA_SORTORDER.values_at(a1, a2)
|
47
|
+
i1 ? i2 ? i1 <=> i2 : -1 : i2 ? 1 : a1 <=> a2
|
51
48
|
end
|
52
49
|
end
|
53
50
|
|
@@ -34,9 +34,6 @@ class Lingo
|
|
34
34
|
|
35
35
|
class LexicalHash
|
36
36
|
|
37
|
-
include Cachable
|
38
|
-
include Reportable
|
39
|
-
|
40
37
|
def self.open(*args)
|
41
38
|
yield lexical_hash = new(*args)
|
42
39
|
ensure
|
@@ -44,9 +41,6 @@ class Lingo
|
|
44
41
|
end
|
45
42
|
|
46
43
|
def initialize(id, lingo)
|
47
|
-
init_cachable
|
48
|
-
init_reportable(id)
|
49
|
-
|
50
44
|
@wc = lingo.database_config(id).fetch('def-wc', LA_UNKNOWN)
|
51
45
|
@src = Database.open(id, lingo)
|
52
46
|
end
|
@@ -56,35 +50,23 @@ class Lingo
|
|
56
50
|
end
|
57
51
|
|
58
52
|
def [](key)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
else str
|
77
|
-
end
|
78
|
-
}
|
79
|
-
|
80
|
-
record.compact!
|
81
|
-
record.sort!
|
82
|
-
record.uniq!
|
83
|
-
|
84
|
-
inc('data found')
|
85
|
-
end
|
86
|
-
|
87
|
-
store(key, record)
|
53
|
+
rec = @src[key = Unicode.downcase(key)] or return
|
54
|
+
|
55
|
+
res = rec.map { |str|
|
56
|
+
case str
|
57
|
+
when /^\*\d+$/ then str
|
58
|
+
when /^#(.)$/ then Lexical.new(key, $1)
|
59
|
+
when /^([^#]+?)\s*#(.)$/ then Lexical.new($1, $2)
|
60
|
+
when /^([^#]+)$/ then Lexical.new($1, @wc)
|
61
|
+
else str
|
62
|
+
end
|
63
|
+
}
|
64
|
+
|
65
|
+
res.compact!
|
66
|
+
res.sort!
|
67
|
+
res.uniq!
|
68
|
+
|
69
|
+
res
|
88
70
|
end
|
89
71
|
|
90
72
|
end
|
data/lib/lingo/language/token.rb
CHANGED
data/lib/lingo/language/word.rb
CHANGED
@@ -80,16 +80,14 @@ class Lingo
|
|
80
80
|
end
|
81
81
|
|
82
82
|
def add_lexicals(lex)
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
self
|
83
|
+
unless lex.empty?
|
84
|
+
@lexicals.concat(lex).uniq!
|
85
|
+
@lexicals.sort!
|
86
|
+
end
|
89
87
|
end
|
90
88
|
|
91
89
|
def attrs(compound_parts = true)
|
92
|
-
lexicals(compound_parts).map
|
90
|
+
lexicals(compound_parts).map { |i| i.attr }
|
93
91
|
end
|
94
92
|
|
95
93
|
def parts
|
@@ -125,7 +123,8 @@ class Lingo
|
|
125
123
|
end
|
126
124
|
|
127
125
|
def <<(*other)
|
128
|
-
|
126
|
+
other.flatten!
|
127
|
+
lexicals.concat(other)
|
129
128
|
self
|
130
129
|
end
|
131
130
|
|
@@ -36,10 +36,10 @@ class Lingo
|
|
36
36
|
|
37
37
|
include Comparable
|
38
38
|
|
39
|
-
attr_accessor :form, :attr
|
39
|
+
attr_accessor :form, :attr, :src
|
40
40
|
|
41
|
-
def initialize(form, attr = '-')
|
42
|
-
@form, @attr = form || '', attr || ''
|
41
|
+
def initialize(form, attr = '-', src = nil)
|
42
|
+
@form, @attr, @src = form || '', attr || '', src
|
43
43
|
end
|
44
44
|
|
45
45
|
def unknown?
|
@@ -67,7 +67,7 @@ class Lingo
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def hash
|
70
|
-
|
70
|
+
to_a.hash
|
71
71
|
end
|
72
72
|
|
73
73
|
def eql?(other)
|
data/lib/lingo/language.rb
CHANGED
@@ -31,6 +31,7 @@ require_relative 'language/word_form'
|
|
31
31
|
require_relative 'language/token'
|
32
32
|
require_relative 'language/lexical'
|
33
33
|
require_relative 'language/word'
|
34
|
+
require_relative 'language/char'
|
34
35
|
|
35
36
|
class Lingo
|
36
37
|
|
@@ -72,7 +73,7 @@ class Lingo
|
|
72
73
|
LA_SYNONYM = 'y',
|
73
74
|
LA_STEM = 'z',
|
74
75
|
LA_UNKNOWN = '?'
|
75
|
-
].
|
76
|
+
].each_with_index.inject({}) { |h, (i, j)| h[i] = j; h }
|
76
77
|
|
77
78
|
end
|
78
79
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
---
|
2
|
+
meeting:
|
3
|
+
attendees:
|
4
|
+
- text_reader: { files: STDIN }
|
5
|
+
|
6
|
+
- tokenizer: { }
|
7
|
+
- word_searcher: { source: sys-dic, mode: first }
|
8
|
+
- decomposer: { source: sys-dic }
|
9
|
+
- multi_worder: { source: sys-mul }
|
10
|
+
- sequencer: { stopper: PUNC,OTHR }
|
11
|
+
- synonymer: { skip: '?,t', source: sys-syn }
|
12
|
+
|
13
|
+
- vector_filter: { debug: 'true', prompt: '' }
|
14
|
+
- text_writer: { ext: STDOUT, sep: "\n" }
|