lingo 1.8.2 → 1.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +33 -0
- data/README +6 -5
- data/Rakefile +6 -4
- data/{lib/lingo/cachable.rb → bin/lingosrv} +30 -58
- data/bin/lingoweb +30 -0
- data/de.lang +2 -13
- data/en/lingo-irr.txt +266 -0
- data/en/lingo-wdn.txt +37319 -0
- data/en.lang +2 -15
- data/lib/lingo/app.rb +82 -0
- data/lib/lingo/attendee/abbreviator.rb +22 -26
- data/lib/lingo/attendee/debugger.rb +8 -4
- data/lib/lingo/attendee/decomposer.rb +0 -1
- data/lib/lingo/attendee/dehyphenizer.rb +2 -2
- data/lib/lingo/attendee/multi_worder.rb +20 -13
- data/lib/lingo/attendee/noneword_filter.rb +2 -7
- data/lib/lingo/attendee/sequencer.rb +43 -19
- data/lib/lingo/attendee/stemmer/porter.rb +2 -2
- data/lib/lingo/attendee/stemmer.rb +1 -1
- data/lib/lingo/attendee/synonymer.rb +1 -9
- data/lib/lingo/attendee/text_reader.rb +42 -29
- data/lib/lingo/attendee/text_writer.rb +3 -6
- data/lib/lingo/attendee/tokenizer.rb +87 -69
- data/lib/lingo/attendee/variator.rb +7 -5
- data/lib/lingo/attendee/vector_filter.rb +11 -11
- data/lib/lingo/attendee/word_searcher.rb +1 -9
- data/lib/lingo/attendee.rb +24 -105
- data/lib/lingo/buffered_attendee.rb +2 -9
- data/lib/lingo/call.rb +18 -13
- data/lib/lingo/cli.rb +5 -10
- data/lib/lingo/config.rb +40 -7
- data/lib/lingo/ctl.rb +69 -57
- data/lib/lingo/database/hash_store.rb +9 -4
- data/lib/lingo/database/sdbm_store.rb +4 -7
- data/lib/lingo/database/source/multi_key.rb +1 -1
- data/lib/lingo/database/source/multi_value.rb +1 -1
- data/lib/lingo/database/source.rb +2 -20
- data/lib/lingo/database.rb +30 -19
- data/lib/lingo/debug.rb +79 -0
- data/lib/lingo/{core_ext.rb → language/char.rb} +43 -42
- data/lib/lingo/language/dictionary.rb +38 -46
- data/lib/lingo/language/grammar.rb +40 -57
- data/lib/lingo/language/lexical.rb +4 -7
- data/lib/lingo/language/lexical_hash.rb +17 -35
- data/lib/lingo/language/token.rb +4 -0
- data/lib/lingo/language/word.rb +7 -8
- data/lib/lingo/language/word_form.rb +4 -4
- data/lib/lingo/language.rb +2 -1
- data/lib/lingo/srv/config.ru +4 -0
- data/lib/lingo/srv/lingosrv.cfg +14 -0
- data/lib/lingo/{reportable.rb → srv.rb} +59 -61
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web/config.ru +4 -0
- data/lib/lingo/web/lingoweb.cfg +14 -0
- data/lib/lingo/web/public/lingo.png +0 -0
- data/lib/lingo/web/public/lingoweb.css +74 -0
- data/lib/lingo/web/views/index.erb +92 -0
- data/lib/lingo/web.rb +94 -0
- data/lib/lingo.rb +27 -29
- data/lingo.cfg +1 -1
- data/lir.cfg +24 -0
- data/ru/lingo-dic.txt +22342 -0
- data/ru/lingo-mul.txt +5151 -0
- data/ru/lingo-syn.txt +0 -0
- data/ru.lang +99 -0
- data/test/attendee/ts_sequencer.rb +2 -2
- data/test/attendee/ts_text_reader.rb +36 -2
- data/test/attendee/ts_text_writer.rb +6 -6
- data/test/lir.vec +3 -3
- data/test/test_helper.rb +104 -102
- data/test/ts_database.rb +1 -1
- data/test/ts_language.rb +55 -96
- data/txt/artikel-ru.txt +45 -0
- data/txt/lir.txt +1 -3
- metadata +143 -83
- data/TODO +0 -23
@@ -1,42 +1,43 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
#--
|
4
|
-
###############################################################################
|
5
|
-
# #
|
6
|
-
# Lingo -- A full-featured automatic indexing system #
|
7
|
-
# #
|
8
|
-
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
-
# #
|
11
|
-
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
-
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
-
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
-
# any later version. #
|
15
|
-
# #
|
16
|
-
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
-
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
-
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
-
# more details. #
|
20
|
-
# #
|
21
|
-
# You should have received a copy of the GNU Affero General Public License #
|
22
|
-
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
-
# #
|
24
|
-
###############################################################################
|
25
|
-
#++
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
#--
|
4
|
+
###############################################################################
|
5
|
+
# #
|
6
|
+
# Lingo -- A full-featured automatic indexing system #
|
7
|
+
# #
|
8
|
+
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
+
# Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
|
10
|
+
# #
|
11
|
+
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Lingo
|
28
|
+
|
29
|
+
module Language
|
30
|
+
|
31
|
+
module Char
|
32
|
+
|
33
|
+
ANY = [
|
34
|
+
CHAR = '[[:alpha:]]',
|
35
|
+
DIGIT = '[[:digit:]]',
|
36
|
+
LEGAL = '[ /&()\[\].,\'<>-]'
|
37
|
+
].join('|')
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -30,8 +30,7 @@ class Lingo
|
|
30
30
|
|
31
31
|
class Dictionary
|
32
32
|
|
33
|
-
|
34
|
-
include Reportable
|
33
|
+
KEY_REF_RE = %r{\A#{Database::KEY_REF_ESC}\d+}
|
35
34
|
|
36
35
|
def self.open(*args)
|
37
36
|
yield dictionary = new(*args)
|
@@ -41,12 +40,9 @@ class Lingo
|
|
41
40
|
|
42
41
|
def initialize(config, lingo)
|
43
42
|
unless config.has_key?('source')
|
44
|
-
raise ArgumentError,
|
43
|
+
raise ArgumentError, "Required parameter `source' missing."
|
45
44
|
end
|
46
45
|
|
47
|
-
init_cachable
|
48
|
-
init_reportable
|
49
|
-
|
50
46
|
@suffixes, @infixes = [], []
|
51
47
|
|
52
48
|
Array(lingo.dictionary_config['suffix']).each { |t, s|
|
@@ -67,56 +63,49 @@ class Lingo
|
|
67
63
|
end
|
68
64
|
|
69
65
|
def close
|
70
|
-
@src.each
|
71
|
-
end
|
72
|
-
|
73
|
-
def report
|
74
|
-
super.tap { |rep| @src.each { |src| rep.update(src.report) } }
|
66
|
+
@src.each { |i| i.close }
|
75
67
|
end
|
76
68
|
|
77
69
|
# _dic_.find_word( _aString_ ) -> _aNewWord_
|
78
70
|
#
|
79
71
|
# Erstellt aus dem String ein Wort und sucht nach diesem im Wörterbuch.
|
80
72
|
def find_word(str)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
unless (lexicals = select_with_suffix(str)).empty?
|
89
|
-
word.lexicals = lexicals
|
90
|
-
word.attr = WA_IDENTIFIED
|
91
|
-
end
|
92
|
-
|
93
|
-
store(key, word)
|
73
|
+
(@_word ||= {})[str] ||= Word.new(str, WA_UNKNOWN).tap { |w|
|
74
|
+
unless (lexicals = select_with_suffix(str)).empty?
|
75
|
+
w.lexicals = lexicals
|
76
|
+
w.attr = WA_IDENTIFIED
|
77
|
+
end
|
78
|
+
}
|
94
79
|
end
|
95
80
|
|
96
|
-
def find_synonyms(obj)
|
81
|
+
def find_synonyms(obj, syn = [])
|
97
82
|
lex = obj.lexicals
|
98
83
|
lex = [obj] if lex.empty? && obj.unknown?
|
99
84
|
|
100
|
-
|
101
|
-
ref = %r{\A#{Database::KEY_REF_ESC}\d+}
|
85
|
+
com, ref = obj.attr == WA_COMPOUND, KEY_REF_RE
|
102
86
|
|
103
|
-
lex.
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
select(l.form).each { |y| s << y unless y =~ ref }
|
87
|
+
lex.each { |l|
|
88
|
+
select(l.form, syn) { |i| i =~ ref } unless com &&
|
89
|
+
l.attr != LA_COMPOUND || l.attr == LA_SYNONYM
|
108
90
|
}
|
91
|
+
|
92
|
+
syn
|
109
93
|
end
|
110
94
|
|
111
95
|
# _dic_.select( _aString_ ) -> _ArrayOfLexicals_
|
112
96
|
#
|
113
97
|
# Sucht alle Wörterbücher durch und gibt den ersten Treffer zurück (+mode = first+), oder alle Treffer (+mode = all+)
|
114
|
-
def select(str)
|
115
|
-
@src.
|
98
|
+
def select(str, lex = [])
|
99
|
+
@src.each { |src|
|
116
100
|
l = src[str] or next
|
117
|
-
lex.concat(l)
|
118
|
-
break
|
119
|
-
}
|
101
|
+
lex.concat(block_given? ? l.delete_if { |i| yield i } : l)
|
102
|
+
break unless @all
|
103
|
+
}
|
104
|
+
|
105
|
+
lex.sort!
|
106
|
+
lex.uniq!
|
107
|
+
|
108
|
+
lex
|
120
109
|
end
|
121
110
|
|
122
111
|
# _dic_.select_with_suffix( _aString_ ) -> _ArrayOfLexicals_
|
@@ -154,19 +143,22 @@ class Lingo
|
|
154
143
|
private
|
155
144
|
|
156
145
|
def select_with_affix(affix, str)
|
157
|
-
select(str)
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
146
|
+
lex = select(str)
|
147
|
+
|
148
|
+
affix_lexicals(affix, str).each { |a| select(a.form, lex) { |b|
|
149
|
+
affix == :suffix && a.attr != b.attr
|
150
|
+
} } if lex.empty?
|
151
|
+
|
152
|
+
lex
|
164
153
|
end
|
165
154
|
|
166
155
|
def affix_lexicals(affix, str)
|
167
|
-
instance_variable_get("@#{affix}es").
|
168
|
-
|
156
|
+
lex = instance_variable_get("@#{affix}es").map { |r, e, t|
|
157
|
+
Lexical.new("#{$`}#{e == '*' ? '' : e}#{$'}", t) if str =~ r
|
169
158
|
}
|
159
|
+
|
160
|
+
lex.compact!
|
161
|
+
lex
|
170
162
|
end
|
171
163
|
|
172
164
|
end
|
@@ -35,9 +35,6 @@ class Lingo
|
|
35
35
|
|
36
36
|
class Grammar
|
37
37
|
|
38
|
-
include Cachable
|
39
|
-
include Reportable
|
40
|
-
|
41
38
|
HYPHEN_RE = %r{\A(.+)-([^-]+)\z}
|
42
39
|
|
43
40
|
def self.open(*args)
|
@@ -47,11 +44,10 @@ class Lingo
|
|
47
44
|
end
|
48
45
|
|
49
46
|
def initialize(config, lingo)
|
50
|
-
init_cachable
|
51
|
-
init_reportable
|
52
|
-
|
53
47
|
@dic, @suggestions = Dictionary.new(config, lingo), []
|
54
48
|
|
49
|
+
lingo.deprecate(:compositum, :compound, self) if lingo.dictionary_config.has_key?('compositum')
|
50
|
+
|
55
51
|
cfg = lingo.dictionary_config['compound'] ||
|
56
52
|
lingo.dictionary_config['compositum'] # DEPRECATE compositum
|
57
53
|
|
@@ -70,80 +66,63 @@ class Lingo
|
|
70
66
|
# Bestimmte Sequenzen können als ungültige Komposita erkannt werden,
|
71
67
|
# z.B. ist ein Kompositum aus zwei Adjetiven kein Kompositum, also
|
72
68
|
# skip-sequence = 'aa'
|
73
|
-
@sequences = cfg.fetch('skip-sequences', []).map!
|
69
|
+
@sequences = cfg.fetch('skip-sequences', []).map! { |i| i.downcase }
|
74
70
|
end
|
75
71
|
|
76
72
|
def close
|
77
73
|
@dic.close
|
78
74
|
end
|
79
75
|
|
80
|
-
def report
|
81
|
-
super.update(@dic.report)
|
82
|
-
end
|
83
|
-
|
84
76
|
# find_compound(str) -> word wenn level=1
|
85
77
|
# find_compound(str) -> [lex, sta] wenn level!=1
|
86
78
|
#
|
87
79
|
# find_compound arbeitet in verschiedenen Leveln, da die Methode auch rekursiv aufgerufen wird. Ein Level größer 1
|
88
80
|
# entspricht daher einem rekursiven Aufruf
|
89
81
|
def find_compound(str, level = 1, tail = false)
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
82
|
+
return permute_compound([[], [], ''], str, level, tail) if level != 1
|
83
|
+
|
84
|
+
(@_compound ||= {})[str] ||= permute_compound(
|
85
|
+
com = Word.new(str, WA_UNKNOWN), str, level, tail
|
86
|
+
) { |lex|
|
87
|
+
com.attr = WA_COMPOUND
|
88
|
+
com.lexicals = lex.each { |l|
|
89
|
+
l.attr += @append_wc unless l.attr == LA_COMPOUND
|
90
|
+
}
|
91
|
+
}
|
92
|
+
end
|
98
93
|
|
99
|
-
|
100
|
-
inc('String zu kurz')
|
101
|
-
return top ? com : empty
|
102
|
-
end
|
94
|
+
private
|
103
95
|
|
104
|
-
|
96
|
+
def permute_compound(ret, str, level, tail)
|
97
|
+
if (len = str.length) > @min_word_size
|
98
|
+
str = Unicode.downcase(str)
|
105
99
|
|
106
|
-
|
100
|
+
lex, sta, seq = res = if str =~ HYPHEN_RE
|
101
|
+
test_compound($1, '-', $2, level, tail)
|
102
|
+
else
|
103
|
+
sug = @suggestions[level] ||= []
|
107
104
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
str.length / sta.size >= @min_avg_part_size &&
|
112
|
-
(@sequences.empty? || !@sequences.include?(seq))
|
105
|
+
catch(:res) {
|
106
|
+
1.upto(len - 1) { |i|
|
107
|
+
tst = test_compound(str[0, i], '', str[i, len], level, tail)
|
113
108
|
|
114
|
-
|
115
|
-
|
116
|
-
|
109
|
+
unless (lex = tst.first).empty?
|
110
|
+
lex.last.attr == LA_TAKEITASIS ? sug << tst : throw(:res, tst)
|
111
|
+
end
|
112
|
+
}
|
117
113
|
|
118
|
-
|
119
|
-
com.lexicals = lex.map { |l|
|
120
|
-
l.attr == LA_COMPOUND ? l :
|
121
|
-
Lexical.new(l.form, l.attr + @append_wc)
|
114
|
+
sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear }
|
122
115
|
}
|
123
116
|
end
|
124
117
|
|
125
|
-
|
126
|
-
|
127
|
-
|
118
|
+
block_given? ? yield(lex) : ret = res if !lex.empty? &&
|
119
|
+
sta.size <= @max_parts &&
|
120
|
+
sta.min >= @min_part_size &&
|
121
|
+
str.length / sta.size >= @min_avg_part_size &&
|
122
|
+
(@sequences.empty? || !@sequences.include?(seq))
|
128
123
|
end
|
129
|
-
end
|
130
|
-
|
131
|
-
# permute_compound( _aString_ ) -> [lex, sta, seq]
|
132
|
-
def permute_compound(str, level = 1, tail = false)
|
133
|
-
return test_compound($1, '-', $2, level, tail) if str =~ HYPHEN_RE
|
134
|
-
|
135
|
-
sug, len = @suggestions[level] ||= [], str.length
|
136
|
-
|
137
|
-
1.upto(len - 1) { |i|
|
138
|
-
res = test_compound(str[0, i], '', str[i, len], level, tail)
|
139
124
|
|
140
|
-
|
141
|
-
return res unless lex.last.attr == LA_TAKEITASIS
|
142
|
-
sug << res
|
143
|
-
end
|
144
|
-
}
|
145
|
-
|
146
|
-
sug.empty? ? [[], [], ''] : sug.first.tap { sug.clear }
|
125
|
+
ret
|
147
126
|
end
|
148
127
|
|
149
128
|
# test_compound() -> [lex, sta, seq]
|
@@ -189,6 +168,10 @@ class Lingo
|
|
189
168
|
end
|
190
169
|
end
|
191
170
|
|
171
|
+
{ flex => fform, blex => bform }.each { |a, f|
|
172
|
+
a.each { |l| l.src ||= f }
|
173
|
+
}
|
174
|
+
|
192
175
|
flex.concat(blex).delete_if { |l| l.attr == LA_COMPOUND }.
|
193
176
|
push(Lexical.new(fform + infix + bform, LA_COMPOUND)).sort!
|
194
177
|
|
@@ -40,14 +40,11 @@ class Lingo
|
|
40
40
|
return 1 unless other.is_a?(self.class)
|
41
41
|
|
42
42
|
a1, a2 = attr, other.attr
|
43
|
+
return form <=> other.form if a1 == a2
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
a1.empty? ? 1 : a2.empty? ? -1 : begin
|
48
|
-
i1, i2 = [a1, a2].map(&LA_SORTORDER.method(:index))
|
49
|
-
i1 ? i2 ? i2 <=> i1 : -1 : i2 ? 1 : a1 <=> a2
|
50
|
-
end
|
45
|
+
a1.empty? ? 1 : a2.empty? ? -1 : begin
|
46
|
+
i1, i2 = LA_SORTORDER.values_at(a1, a2)
|
47
|
+
i1 ? i2 ? i1 <=> i2 : -1 : i2 ? 1 : a1 <=> a2
|
51
48
|
end
|
52
49
|
end
|
53
50
|
|
@@ -34,9 +34,6 @@ class Lingo
|
|
34
34
|
|
35
35
|
class LexicalHash
|
36
36
|
|
37
|
-
include Cachable
|
38
|
-
include Reportable
|
39
|
-
|
40
37
|
def self.open(*args)
|
41
38
|
yield lexical_hash = new(*args)
|
42
39
|
ensure
|
@@ -44,9 +41,6 @@ class Lingo
|
|
44
41
|
end
|
45
42
|
|
46
43
|
def initialize(id, lingo)
|
47
|
-
init_cachable
|
48
|
-
init_reportable(id)
|
49
|
-
|
50
44
|
@wc = lingo.database_config(id).fetch('def-wc', LA_UNKNOWN)
|
51
45
|
@src = Database.open(id, lingo)
|
52
46
|
end
|
@@ -56,35 +50,23 @@ class Lingo
|
|
56
50
|
end
|
57
51
|
|
58
52
|
def [](key)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
else str
|
77
|
-
end
|
78
|
-
}
|
79
|
-
|
80
|
-
record.compact!
|
81
|
-
record.sort!
|
82
|
-
record.uniq!
|
83
|
-
|
84
|
-
inc('data found')
|
85
|
-
end
|
86
|
-
|
87
|
-
store(key, record)
|
53
|
+
rec = @src[key = Unicode.downcase(key)] or return
|
54
|
+
|
55
|
+
res = rec.map { |str|
|
56
|
+
case str
|
57
|
+
when /^\*\d+$/ then str
|
58
|
+
when /^#(.)$/ then Lexical.new(key, $1)
|
59
|
+
when /^([^#]+?)\s*#(.)$/ then Lexical.new($1, $2)
|
60
|
+
when /^([^#]+)$/ then Lexical.new($1, @wc)
|
61
|
+
else str
|
62
|
+
end
|
63
|
+
}
|
64
|
+
|
65
|
+
res.compact!
|
66
|
+
res.sort!
|
67
|
+
res.uniq!
|
68
|
+
|
69
|
+
res
|
88
70
|
end
|
89
71
|
|
90
72
|
end
|
data/lib/lingo/language/token.rb
CHANGED
data/lib/lingo/language/word.rb
CHANGED
@@ -80,16 +80,14 @@ class Lingo
|
|
80
80
|
end
|
81
81
|
|
82
82
|
def add_lexicals(lex)
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
self
|
83
|
+
unless lex.empty?
|
84
|
+
@lexicals.concat(lex).uniq!
|
85
|
+
@lexicals.sort!
|
86
|
+
end
|
89
87
|
end
|
90
88
|
|
91
89
|
def attrs(compound_parts = true)
|
92
|
-
lexicals(compound_parts).map
|
90
|
+
lexicals(compound_parts).map { |i| i.attr }
|
93
91
|
end
|
94
92
|
|
95
93
|
def parts
|
@@ -125,7 +123,8 @@ class Lingo
|
|
125
123
|
end
|
126
124
|
|
127
125
|
def <<(*other)
|
128
|
-
|
126
|
+
other.flatten!
|
127
|
+
lexicals.concat(other)
|
129
128
|
self
|
130
129
|
end
|
131
130
|
|
@@ -36,10 +36,10 @@ class Lingo
|
|
36
36
|
|
37
37
|
include Comparable
|
38
38
|
|
39
|
-
attr_accessor :form, :attr
|
39
|
+
attr_accessor :form, :attr, :src
|
40
40
|
|
41
|
-
def initialize(form, attr = '-')
|
42
|
-
@form, @attr = form || '', attr || ''
|
41
|
+
def initialize(form, attr = '-', src = nil)
|
42
|
+
@form, @attr, @src = form || '', attr || '', src
|
43
43
|
end
|
44
44
|
|
45
45
|
def unknown?
|
@@ -67,7 +67,7 @@ class Lingo
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def hash
|
70
|
-
|
70
|
+
to_a.hash
|
71
71
|
end
|
72
72
|
|
73
73
|
def eql?(other)
|
data/lib/lingo/language.rb
CHANGED
@@ -31,6 +31,7 @@ require_relative 'language/word_form'
|
|
31
31
|
require_relative 'language/token'
|
32
32
|
require_relative 'language/lexical'
|
33
33
|
require_relative 'language/word'
|
34
|
+
require_relative 'language/char'
|
34
35
|
|
35
36
|
class Lingo
|
36
37
|
|
@@ -72,7 +73,7 @@ class Lingo
|
|
72
73
|
LA_SYNONYM = 'y',
|
73
74
|
LA_STEM = 'z',
|
74
75
|
LA_UNKNOWN = '?'
|
75
|
-
].
|
76
|
+
].each_with_index.inject({}) { |h, (i, j)| h[i] = j; h }
|
76
77
|
|
77
78
|
end
|
78
79
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
---
|
2
|
+
meeting:
|
3
|
+
attendees:
|
4
|
+
- text_reader: { files: STDIN }
|
5
|
+
|
6
|
+
- tokenizer: { }
|
7
|
+
- word_searcher: { source: sys-dic, mode: first }
|
8
|
+
- decomposer: { source: sys-dic }
|
9
|
+
- multi_worder: { source: sys-mul }
|
10
|
+
- sequencer: { stopper: PUNC,OTHR }
|
11
|
+
- synonymer: { skip: '?,t', source: sys-syn }
|
12
|
+
|
13
|
+
- vector_filter: { debug: 'true', prompt: '' }
|
14
|
+
- text_writer: { ext: STDOUT, sep: "\n" }
|