lingo 1.8.2 → 1.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +33 -0
- data/README +6 -5
- data/Rakefile +6 -4
- data/{lib/lingo/cachable.rb → bin/lingosrv} +30 -58
- data/bin/lingoweb +30 -0
- data/de.lang +2 -13
- data/en/lingo-irr.txt +266 -0
- data/en/lingo-wdn.txt +37319 -0
- data/en.lang +2 -15
- data/lib/lingo/app.rb +82 -0
- data/lib/lingo/attendee/abbreviator.rb +22 -26
- data/lib/lingo/attendee/debugger.rb +8 -4
- data/lib/lingo/attendee/decomposer.rb +0 -1
- data/lib/lingo/attendee/dehyphenizer.rb +2 -2
- data/lib/lingo/attendee/multi_worder.rb +20 -13
- data/lib/lingo/attendee/noneword_filter.rb +2 -7
- data/lib/lingo/attendee/sequencer.rb +43 -19
- data/lib/lingo/attendee/stemmer/porter.rb +2 -2
- data/lib/lingo/attendee/stemmer.rb +1 -1
- data/lib/lingo/attendee/synonymer.rb +1 -9
- data/lib/lingo/attendee/text_reader.rb +42 -29
- data/lib/lingo/attendee/text_writer.rb +3 -6
- data/lib/lingo/attendee/tokenizer.rb +87 -69
- data/lib/lingo/attendee/variator.rb +7 -5
- data/lib/lingo/attendee/vector_filter.rb +11 -11
- data/lib/lingo/attendee/word_searcher.rb +1 -9
- data/lib/lingo/attendee.rb +24 -105
- data/lib/lingo/buffered_attendee.rb +2 -9
- data/lib/lingo/call.rb +18 -13
- data/lib/lingo/cli.rb +5 -10
- data/lib/lingo/config.rb +40 -7
- data/lib/lingo/ctl.rb +69 -57
- data/lib/lingo/database/hash_store.rb +9 -4
- data/lib/lingo/database/sdbm_store.rb +4 -7
- data/lib/lingo/database/source/multi_key.rb +1 -1
- data/lib/lingo/database/source/multi_value.rb +1 -1
- data/lib/lingo/database/source.rb +2 -20
- data/lib/lingo/database.rb +30 -19
- data/lib/lingo/debug.rb +79 -0
- data/lib/lingo/{core_ext.rb → language/char.rb} +43 -42
- data/lib/lingo/language/dictionary.rb +38 -46
- data/lib/lingo/language/grammar.rb +40 -57
- data/lib/lingo/language/lexical.rb +4 -7
- data/lib/lingo/language/lexical_hash.rb +17 -35
- data/lib/lingo/language/token.rb +4 -0
- data/lib/lingo/language/word.rb +7 -8
- data/lib/lingo/language/word_form.rb +4 -4
- data/lib/lingo/language.rb +2 -1
- data/lib/lingo/srv/config.ru +4 -0
- data/lib/lingo/srv/lingosrv.cfg +14 -0
- data/lib/lingo/{reportable.rb → srv.rb} +59 -61
- data/lib/lingo/version.rb +1 -1
- data/lib/lingo/web/config.ru +4 -0
- data/lib/lingo/web/lingoweb.cfg +14 -0
- data/lib/lingo/web/public/lingo.png +0 -0
- data/lib/lingo/web/public/lingoweb.css +74 -0
- data/lib/lingo/web/views/index.erb +92 -0
- data/lib/lingo/web.rb +94 -0
- data/lib/lingo.rb +27 -29
- data/lingo.cfg +1 -1
- data/lir.cfg +24 -0
- data/ru/lingo-dic.txt +22342 -0
- data/ru/lingo-mul.txt +5151 -0
- data/ru/lingo-syn.txt +0 -0
- data/ru.lang +99 -0
- data/test/attendee/ts_sequencer.rb +2 -2
- data/test/attendee/ts_text_reader.rb +36 -2
- data/test/attendee/ts_text_writer.rb +6 -6
- data/test/lir.vec +3 -3
- data/test/test_helper.rb +104 -102
- data/test/ts_database.rb +1 -1
- data/test/ts_language.rb +55 -96
- data/txt/artikel-ru.txt +45 -0
- data/txt/lir.txt +1 -3
- metadata +143 -83
- data/TODO +0 -23
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lingo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.8.
|
4
|
+
version: 1.8.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,11 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-09-20 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ruby-nuggets
|
17
|
-
requirement:
|
17
|
+
requirement: !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,15 @@ dependencies:
|
|
22
22
|
version: 0.8.5
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements:
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ! '>='
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: 0.8.5
|
26
31
|
- !ruby/object:Gem::Dependency
|
27
32
|
name: unicode
|
28
|
-
requirement:
|
33
|
+
requirement: !ruby/object:Gem::Requirement
|
29
34
|
none: false
|
30
35
|
requirements:
|
31
36
|
- - ! '>='
|
@@ -33,10 +38,31 @@ dependencies:
|
|
33
38
|
version: '0'
|
34
39
|
type: :runtime
|
35
40
|
prerelease: false
|
36
|
-
version_requirements:
|
41
|
+
version_requirements: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
37
47
|
- !ruby/object:Gem::Dependency
|
38
48
|
name: highline
|
39
|
-
requirement:
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :runtime
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: sinatra
|
65
|
+
requirement: !ruby/object:Gem::Requirement
|
40
66
|
none: false
|
41
67
|
requirements:
|
42
68
|
- - ! '>='
|
@@ -44,10 +70,15 @@ dependencies:
|
|
44
70
|
version: '0'
|
45
71
|
type: :runtime
|
46
72
|
prerelease: false
|
47
|
-
version_requirements:
|
73
|
+
version_requirements: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ! '>='
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
48
79
|
- !ruby/object:Gem::Dependency
|
49
80
|
name: diff-lcs
|
50
|
-
requirement:
|
81
|
+
requirement: !ruby/object:Gem::Requirement
|
51
82
|
none: false
|
52
83
|
requirements:
|
53
84
|
- - ! '>='
|
@@ -55,10 +86,15 @@ dependencies:
|
|
55
86
|
version: 1.1.3
|
56
87
|
type: :development
|
57
88
|
prerelease: false
|
58
|
-
version_requirements:
|
89
|
+
version_requirements: !ruby/object:Gem::Requirement
|
90
|
+
none: false
|
91
|
+
requirements:
|
92
|
+
- - ! '>='
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: 1.1.3
|
59
95
|
- !ruby/object:Gem::Dependency
|
60
96
|
name: open4
|
61
|
-
requirement:
|
97
|
+
requirement: !ruby/object:Gem::Requirement
|
62
98
|
none: false
|
63
99
|
requirements:
|
64
100
|
- - ! '>='
|
@@ -66,7 +102,12 @@ dependencies:
|
|
66
102
|
version: '0'
|
67
103
|
type: :development
|
68
104
|
prerelease: false
|
69
|
-
version_requirements:
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
none: false
|
107
|
+
requirements:
|
108
|
+
- - ! '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
70
111
|
description: ! "Lingo is an open source indexing system for research and teachings.\nThe
|
71
112
|
main functions of Lingo are:\n\n* identification of (i.e. reduction to) basic word
|
72
113
|
form by means of\n dictionaries and suffix lists\n* algorithmic decomposition\n*
|
@@ -76,133 +117,152 @@ email:
|
|
76
117
|
- lingo@vorhauer.de
|
77
118
|
- jens.wille@uni-koeln.de
|
78
119
|
executables:
|
79
|
-
-
|
120
|
+
- lingosrv
|
80
121
|
- lingo
|
122
|
+
- lingoctl
|
123
|
+
- lingoweb
|
81
124
|
extensions: []
|
82
125
|
extra_rdoc_files:
|
83
126
|
- README
|
84
127
|
- COPYING
|
85
128
|
- ChangeLog
|
86
129
|
files:
|
87
|
-
- lib/lingo.rb
|
88
|
-
- lib/lingo/show_progress.rb
|
89
130
|
- lib/lingo/config.rb
|
90
|
-
- lib/lingo/
|
91
|
-
- lib/lingo/
|
92
|
-
- lib/lingo/
|
93
|
-
- lib/lingo/
|
94
|
-
- lib/lingo/
|
131
|
+
- lib/lingo/version.rb
|
132
|
+
- lib/lingo/cli.rb
|
133
|
+
- lib/lingo/srv.rb
|
134
|
+
- lib/lingo/web.rb
|
135
|
+
- lib/lingo/agenda_item.rb
|
136
|
+
- lib/lingo/ctl.rb
|
95
137
|
- lib/lingo/language/lexical_hash.rb
|
96
|
-
- lib/lingo/language/token.rb
|
97
138
|
- lib/lingo/language/word.rb
|
98
|
-
- lib/lingo/
|
99
|
-
- lib/lingo/
|
100
|
-
- lib/lingo/
|
101
|
-
- lib/lingo/
|
102
|
-
- lib/lingo/
|
103
|
-
- lib/lingo/
|
104
|
-
- lib/lingo/
|
105
|
-
- lib/lingo/attendee/synonymer.rb
|
106
|
-
- lib/lingo/attendee/word_searcher.rb
|
107
|
-
- lib/lingo/attendee/dehyphenizer.rb
|
108
|
-
- lib/lingo/attendee/sequencer.rb
|
109
|
-
- lib/lingo/attendee/debugger.rb
|
110
|
-
- lib/lingo/attendee/text_writer.rb
|
111
|
-
- lib/lingo/attendee/stemmer.rb
|
112
|
-
- lib/lingo/attendee/tokenizer.rb
|
113
|
-
- lib/lingo/attendee/abbreviator.rb
|
114
|
-
- lib/lingo/attendee/decomposer.rb
|
115
|
-
- lib/lingo/attendee/formatter.rb
|
116
|
-
- lib/lingo/database/hash_store.rb
|
117
|
-
- lib/lingo/database/show_progress.rb
|
118
|
-
- lib/lingo/database/sdbm_store.rb
|
139
|
+
- lib/lingo/language/char.rb
|
140
|
+
- lib/lingo/language/lexical.rb
|
141
|
+
- lib/lingo/language/token.rb
|
142
|
+
- lib/lingo/language/grammar.rb
|
143
|
+
- lib/lingo/language/dictionary.rb
|
144
|
+
- lib/lingo/language/word_form.rb
|
145
|
+
- lib/lingo/error.rb
|
119
146
|
- lib/lingo/database/source.rb
|
120
|
-
- lib/lingo/database/
|
147
|
+
- lib/lingo/database/source/key_value.rb
|
121
148
|
- lib/lingo/database/source/multi_value.rb
|
149
|
+
- lib/lingo/database/source/single_word.rb
|
122
150
|
- lib/lingo/database/source/word_class.rb
|
123
|
-
- lib/lingo/database/source/key_value.rb
|
124
151
|
- lib/lingo/database/source/multi_key.rb
|
125
|
-
- lib/lingo/database/
|
126
|
-
- lib/lingo/database/
|
152
|
+
- lib/lingo/database/crypter.rb
|
153
|
+
- lib/lingo/database/sdbm_store.rb
|
127
154
|
- lib/lingo/database/libcdb_store.rb
|
155
|
+
- lib/lingo/database/hash_store.rb
|
156
|
+
- lib/lingo/database/show_progress.rb
|
157
|
+
- lib/lingo/database/gdbm_store.rb
|
128
158
|
- lib/lingo/call.rb
|
159
|
+
- lib/lingo/attendee/abbreviator.rb
|
160
|
+
- lib/lingo/attendee/text_writer.rb
|
161
|
+
- lib/lingo/attendee/debugger.rb
|
162
|
+
- lib/lingo/attendee/dehyphenizer.rb
|
163
|
+
- lib/lingo/attendee/stemmer/porter.rb
|
164
|
+
- lib/lingo/attendee/decomposer.rb
|
165
|
+
- lib/lingo/attendee/multi_worder.rb
|
166
|
+
- lib/lingo/attendee/tokenizer.rb
|
167
|
+
- lib/lingo/attendee/word_searcher.rb
|
168
|
+
- lib/lingo/attendee/variator.rb
|
169
|
+
- lib/lingo/attendee/noneword_filter.rb
|
170
|
+
- lib/lingo/attendee/sequencer.rb
|
171
|
+
- lib/lingo/attendee/object_filter.rb
|
172
|
+
- lib/lingo/attendee/stemmer.rb
|
173
|
+
- lib/lingo/attendee/synonymer.rb
|
174
|
+
- lib/lingo/attendee/text_reader.rb
|
175
|
+
- lib/lingo/attendee/formatter.rb
|
176
|
+
- lib/lingo/attendee/vector_filter.rb
|
177
|
+
- lib/lingo/show_progress.rb
|
178
|
+
- lib/lingo/database.rb
|
179
|
+
- lib/lingo/language.rb
|
129
180
|
- lib/lingo/attendee.rb
|
130
|
-
- lib/lingo/
|
131
|
-
- lib/lingo/
|
132
|
-
- lib/lingo/cli.rb
|
133
|
-
- lib/lingo/core_ext.rb
|
181
|
+
- lib/lingo/debug.rb
|
182
|
+
- lib/lingo/app.rb
|
134
183
|
- lib/lingo/buffered_attendee.rb
|
135
|
-
- lib/lingo
|
136
|
-
-
|
137
|
-
- lib/lingo/language.rb
|
138
|
-
- lib/lingo/error.rb
|
139
|
-
- lib/lingo/reportable.rb
|
140
|
-
- bin/lingoctl
|
184
|
+
- lib/lingo.rb
|
185
|
+
- bin/lingosrv
|
141
186
|
- bin/lingo
|
187
|
+
- bin/lingoctl
|
188
|
+
- bin/lingoweb
|
142
189
|
- lingo.rb
|
143
190
|
- lingo.cfg
|
144
191
|
- lingo-call.cfg
|
145
192
|
- lir.cfg
|
146
193
|
- de.lang
|
147
194
|
- en.lang
|
195
|
+
- ru.lang
|
148
196
|
- de/lingo-dic.txt
|
149
197
|
- de/lingo-abk.txt
|
150
|
-
- de/lingo-syn.txt
|
151
198
|
- de/lingo-mul.txt
|
199
|
+
- de/lingo-syn.txt
|
152
200
|
- de/user-dic.txt
|
201
|
+
- de/test_syn2.txt
|
202
|
+
- de/test_mul2.txt
|
203
|
+
- de/test_mul.txt
|
153
204
|
- de/test_syn.txt
|
154
205
|
- de/test_dic.txt
|
155
|
-
- de/test_syn2.txt
|
156
206
|
- de/test_singleword.txt
|
157
|
-
- de/test_mul.txt
|
158
|
-
- de/test_mul2.txt
|
159
207
|
- en/lingo-dic.txt
|
160
|
-
- en/lingo-syn.txt
|
161
208
|
- en/lingo-mul.txt
|
209
|
+
- en/lingo-wdn.txt
|
210
|
+
- en/lingo-syn.txt
|
211
|
+
- en/lingo-irr.txt
|
162
212
|
- en/user-dic.txt
|
213
|
+
- ru/lingo-dic.txt
|
214
|
+
- ru/lingo-mul.txt
|
215
|
+
- ru/lingo-syn.txt
|
163
216
|
- txt/artikel.txt
|
164
217
|
- txt/artikel-en.txt
|
218
|
+
- txt/artikel-ru.txt
|
165
219
|
- txt/lir.txt
|
166
|
-
-
|
220
|
+
- lib/lingo/srv/lingosrv.cfg
|
221
|
+
- lib/lingo/srv/config.ru
|
222
|
+
- lib/lingo/web/lingoweb.cfg
|
223
|
+
- lib/lingo/web/public/lingo.png
|
224
|
+
- lib/lingo/web/public/lingoweb.css
|
225
|
+
- lib/lingo/web/views/index.erb
|
226
|
+
- lib/lingo/web/config.ru
|
167
227
|
- COPYING
|
168
|
-
-
|
228
|
+
- ChangeLog
|
169
229
|
- Rakefile
|
170
|
-
-
|
230
|
+
- README
|
171
231
|
- spec/spec_helper.rb
|
172
232
|
- .rspec
|
173
233
|
- test/ref/artikel.ven
|
174
234
|
- test/ref/lir.mul
|
175
|
-
- test/ref/lir.
|
176
|
-
- test/ref/artikel.vec
|
177
|
-
- test/ref/lir.syn
|
235
|
+
- test/ref/lir.seq
|
178
236
|
- test/ref/artikel.mul
|
179
|
-
- test/ref/
|
237
|
+
- test/ref/lir.syn
|
238
|
+
- test/ref/artikel.ver
|
180
239
|
- test/ref/artikel.seq
|
181
240
|
- test/ref/artikel.non
|
182
241
|
- test/ref/lir.non
|
183
|
-
- test/ref/
|
184
|
-
- test/ref/
|
185
|
-
- test/
|
242
|
+
- test/ref/artikel.vec
|
243
|
+
- test/ref/lir.vec
|
244
|
+
- test/ref/artikel.syn
|
186
245
|
- test/lir2.txt
|
246
|
+
- test/ts_database.rb
|
247
|
+
- test/test_helper.rb
|
248
|
+
- test/attendee/ts_object_filter.rb
|
249
|
+
- test/attendee/ts_vector_filter.rb
|
250
|
+
- test/attendee/ts_synonymer.rb
|
251
|
+
- test/attendee/ts_decomposer.rb
|
252
|
+
- test/attendee/ts_stemmer.rb
|
187
253
|
- test/attendee/ts_noneword_filter.rb
|
188
254
|
- test/attendee/ts_text_writer.rb
|
189
255
|
- test/attendee/ts_sequencer.rb
|
190
|
-
- test/attendee/
|
256
|
+
- test/attendee/ts_tokenizer.rb
|
191
257
|
- test/attendee/ts_text_reader.rb
|
192
258
|
- test/attendee/ts_multi_worder.rb
|
193
259
|
- test/attendee/ts_variator.rb
|
194
|
-
- test/attendee/ts_decomposer.rb
|
195
260
|
- test/attendee/ts_abbreviator.rb
|
196
|
-
- test/attendee/ts_stemmer.rb
|
197
|
-
- test/attendee/ts_tokenizer.rb
|
198
|
-
- test/attendee/ts_vector_filter.rb
|
199
261
|
- test/attendee/ts_word_searcher.rb
|
200
|
-
- test/attendee/ts_synonymer.rb
|
201
262
|
- test/lir.vec
|
202
|
-
- test/test_helper.rb
|
203
|
-
- test/lir.txt
|
204
263
|
- test/mul.txt
|
205
|
-
- test/
|
264
|
+
- test/lir.txt
|
265
|
+
- test/ts_language.rb
|
206
266
|
homepage: http://lex-lingo.de
|
207
267
|
licenses: []
|
208
268
|
post_install_message:
|
@@ -212,7 +272,7 @@ rdoc_options:
|
|
212
272
|
- --line-numbers
|
213
273
|
- --all
|
214
274
|
- --title
|
215
|
-
- lingo Application documentation (v1.8.
|
275
|
+
- lingo Application documentation (v1.8.3)
|
216
276
|
- --main
|
217
277
|
- README
|
218
278
|
require_paths:
|
@@ -222,7 +282,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
222
282
|
requirements:
|
223
283
|
- - ! '>='
|
224
284
|
- !ruby/object:Gem::Version
|
225
|
-
version:
|
285
|
+
version: 1.9.2
|
226
286
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
227
287
|
none: false
|
228
288
|
requirements:
|
@@ -231,7 +291,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
231
291
|
version: '0'
|
232
292
|
requirements: []
|
233
293
|
rubyforge_project:
|
234
|
-
rubygems_version: 1.8.
|
294
|
+
rubygems_version: 1.8.24
|
235
295
|
signing_key:
|
236
296
|
specification_version: 3
|
237
297
|
summary: The full-featured automatic indexing system
|
data/TODO
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
= ToDo list for Lingo
|
2
|
-
|
3
|
-
* Configuration parameter validation.
|
4
|
-
* Replace regex-based tokenizer with a (Racc/Ragel/ANTLR-based?) lexer.
|
5
|
-
* Update and translate old documentation.
|
6
|
-
* Allow for handling of documents in various encodings, not just the one the
|
7
|
-
dictionaries are encoded in.
|
8
|
-
* Provide automatic encoding detection.
|
9
|
-
* Provide automatic language detection (as fine-grained as possible).
|
10
|
-
* Make lingo run faster!? (benchmark - profile - optimize)
|
11
|
-
* Provide an easy-to-use Lingo API -- just 'require "lingo"' and go for it!
|
12
|
-
* In addition to that, provide sensible string extensions: String#tokenize,
|
13
|
-
String#lemmatize, ...
|
14
|
-
* Provide a DSL for configuration -- in addition to, or instead of, the current
|
15
|
-
YAML format.
|
16
|
-
* Make sure the Crypter is sufficiently secure.
|
17
|
-
* Use RSpec for testing.
|
18
|
-
* Make Lingo capable to use multiple cores or even machines to boost performance
|
19
|
-
by connecting Attendees through sockets and use separate processes.
|
20
|
-
|
21
|
-
NOTE: New code *should* meet the guidelines outlined in the
|
22
|
-
RubyStyleGuide[https://github.com/bbatsov/ruby-style-guide],
|
23
|
-
existing code will be adjusted along the way.
|