lingo 1.8.2 → 1.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/ChangeLog +33 -0
  2. data/README +6 -5
  3. data/Rakefile +6 -4
  4. data/{lib/lingo/cachable.rb → bin/lingosrv} +30 -58
  5. data/bin/lingoweb +30 -0
  6. data/de.lang +2 -13
  7. data/en/lingo-irr.txt +266 -0
  8. data/en/lingo-wdn.txt +37319 -0
  9. data/en.lang +2 -15
  10. data/lib/lingo/app.rb +82 -0
  11. data/lib/lingo/attendee/abbreviator.rb +22 -26
  12. data/lib/lingo/attendee/debugger.rb +8 -4
  13. data/lib/lingo/attendee/decomposer.rb +0 -1
  14. data/lib/lingo/attendee/dehyphenizer.rb +2 -2
  15. data/lib/lingo/attendee/multi_worder.rb +20 -13
  16. data/lib/lingo/attendee/noneword_filter.rb +2 -7
  17. data/lib/lingo/attendee/sequencer.rb +43 -19
  18. data/lib/lingo/attendee/stemmer/porter.rb +2 -2
  19. data/lib/lingo/attendee/stemmer.rb +1 -1
  20. data/lib/lingo/attendee/synonymer.rb +1 -9
  21. data/lib/lingo/attendee/text_reader.rb +42 -29
  22. data/lib/lingo/attendee/text_writer.rb +3 -6
  23. data/lib/lingo/attendee/tokenizer.rb +87 -69
  24. data/lib/lingo/attendee/variator.rb +7 -5
  25. data/lib/lingo/attendee/vector_filter.rb +11 -11
  26. data/lib/lingo/attendee/word_searcher.rb +1 -9
  27. data/lib/lingo/attendee.rb +24 -105
  28. data/lib/lingo/buffered_attendee.rb +2 -9
  29. data/lib/lingo/call.rb +18 -13
  30. data/lib/lingo/cli.rb +5 -10
  31. data/lib/lingo/config.rb +40 -7
  32. data/lib/lingo/ctl.rb +69 -57
  33. data/lib/lingo/database/hash_store.rb +9 -4
  34. data/lib/lingo/database/sdbm_store.rb +4 -7
  35. data/lib/lingo/database/source/multi_key.rb +1 -1
  36. data/lib/lingo/database/source/multi_value.rb +1 -1
  37. data/lib/lingo/database/source.rb +2 -20
  38. data/lib/lingo/database.rb +30 -19
  39. data/lib/lingo/debug.rb +79 -0
  40. data/lib/lingo/{core_ext.rb → language/char.rb} +43 -42
  41. data/lib/lingo/language/dictionary.rb +38 -46
  42. data/lib/lingo/language/grammar.rb +40 -57
  43. data/lib/lingo/language/lexical.rb +4 -7
  44. data/lib/lingo/language/lexical_hash.rb +17 -35
  45. data/lib/lingo/language/token.rb +4 -0
  46. data/lib/lingo/language/word.rb +7 -8
  47. data/lib/lingo/language/word_form.rb +4 -4
  48. data/lib/lingo/language.rb +2 -1
  49. data/lib/lingo/srv/config.ru +4 -0
  50. data/lib/lingo/srv/lingosrv.cfg +14 -0
  51. data/lib/lingo/{reportable.rb → srv.rb} +59 -61
  52. data/lib/lingo/version.rb +1 -1
  53. data/lib/lingo/web/config.ru +4 -0
  54. data/lib/lingo/web/lingoweb.cfg +14 -0
  55. data/lib/lingo/web/public/lingo.png +0 -0
  56. data/lib/lingo/web/public/lingoweb.css +74 -0
  57. data/lib/lingo/web/views/index.erb +92 -0
  58. data/lib/lingo/web.rb +94 -0
  59. data/lib/lingo.rb +27 -29
  60. data/lingo.cfg +1 -1
  61. data/lir.cfg +24 -0
  62. data/ru/lingo-dic.txt +22342 -0
  63. data/ru/lingo-mul.txt +5151 -0
  64. data/ru/lingo-syn.txt +0 -0
  65. data/ru.lang +99 -0
  66. data/test/attendee/ts_sequencer.rb +2 -2
  67. data/test/attendee/ts_text_reader.rb +36 -2
  68. data/test/attendee/ts_text_writer.rb +6 -6
  69. data/test/lir.vec +3 -3
  70. data/test/test_helper.rb +104 -102
  71. data/test/ts_database.rb +1 -1
  72. data/test/ts_language.rb +55 -96
  73. data/txt/artikel-ru.txt +45 -0
  74. data/txt/lir.txt +1 -3
  75. metadata +143 -83
  76. data/TODO +0 -23
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lingo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.2
4
+ version: 1.8.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,11 +10,11 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-04-19 00:00:00.000000000 Z
13
+ date: 2012-09-20 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ruby-nuggets
17
- requirement: &12303700 !ruby/object:Gem::Requirement
17
+ requirement: !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,15 @@ dependencies:
22
22
  version: 0.8.5
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *12303700
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ! '>='
29
+ - !ruby/object:Gem::Version
30
+ version: 0.8.5
26
31
  - !ruby/object:Gem::Dependency
27
32
  name: unicode
28
- requirement: &12302600 !ruby/object:Gem::Requirement
33
+ requirement: !ruby/object:Gem::Requirement
29
34
  none: false
30
35
  requirements:
31
36
  - - ! '>='
@@ -33,10 +38,31 @@ dependencies:
33
38
  version: '0'
34
39
  type: :runtime
35
40
  prerelease: false
36
- version_requirements: *12302600
41
+ version_requirements: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
37
47
  - !ruby/object:Gem::Dependency
38
48
  name: highline
39
- requirement: &12317680 !ruby/object:Gem::Requirement
49
+ requirement: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ - !ruby/object:Gem::Dependency
64
+ name: sinatra
65
+ requirement: !ruby/object:Gem::Requirement
40
66
  none: false
41
67
  requirements:
42
68
  - - ! '>='
@@ -44,10 +70,15 @@ dependencies:
44
70
  version: '0'
45
71
  type: :runtime
46
72
  prerelease: false
47
- version_requirements: *12317680
73
+ version_requirements: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ! '>='
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
48
79
  - !ruby/object:Gem::Dependency
49
80
  name: diff-lcs
50
- requirement: &12315360 !ruby/object:Gem::Requirement
81
+ requirement: !ruby/object:Gem::Requirement
51
82
  none: false
52
83
  requirements:
53
84
  - - ! '>='
@@ -55,10 +86,15 @@ dependencies:
55
86
  version: 1.1.3
56
87
  type: :development
57
88
  prerelease: false
58
- version_requirements: *12315360
89
+ version_requirements: !ruby/object:Gem::Requirement
90
+ none: false
91
+ requirements:
92
+ - - ! '>='
93
+ - !ruby/object:Gem::Version
94
+ version: 1.1.3
59
95
  - !ruby/object:Gem::Dependency
60
96
  name: open4
61
- requirement: &12314340 !ruby/object:Gem::Requirement
97
+ requirement: !ruby/object:Gem::Requirement
62
98
  none: false
63
99
  requirements:
64
100
  - - ! '>='
@@ -66,7 +102,12 @@ dependencies:
66
102
  version: '0'
67
103
  type: :development
68
104
  prerelease: false
69
- version_requirements: *12314340
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
70
111
  description: ! "Lingo is an open source indexing system for research and teachings.\nThe
71
112
  main functions of Lingo are:\n\n* identification of (i.e. reduction to) basic word
72
113
  form by means of\n dictionaries and suffix lists\n* algorithmic decomposition\n*
@@ -76,133 +117,152 @@ email:
76
117
  - lingo@vorhauer.de
77
118
  - jens.wille@uni-koeln.de
78
119
  executables:
79
- - lingoctl
120
+ - lingosrv
80
121
  - lingo
122
+ - lingoctl
123
+ - lingoweb
81
124
  extensions: []
82
125
  extra_rdoc_files:
83
126
  - README
84
127
  - COPYING
85
128
  - ChangeLog
86
129
  files:
87
- - lib/lingo.rb
88
- - lib/lingo/show_progress.rb
89
130
  - lib/lingo/config.rb
90
- - lib/lingo/database.rb
91
- - lib/lingo/language/dictionary.rb
92
- - lib/lingo/language/word_form.rb
93
- - lib/lingo/language/lexical.rb
94
- - lib/lingo/language/grammar.rb
131
+ - lib/lingo/version.rb
132
+ - lib/lingo/cli.rb
133
+ - lib/lingo/srv.rb
134
+ - lib/lingo/web.rb
135
+ - lib/lingo/agenda_item.rb
136
+ - lib/lingo/ctl.rb
95
137
  - lib/lingo/language/lexical_hash.rb
96
- - lib/lingo/language/token.rb
97
138
  - lib/lingo/language/word.rb
98
- - lib/lingo/attendee/stemmer/porter.rb
99
- - lib/lingo/attendee/vector_filter.rb
100
- - lib/lingo/attendee/noneword_filter.rb
101
- - lib/lingo/attendee/object_filter.rb
102
- - lib/lingo/attendee/variator.rb
103
- - lib/lingo/attendee/multi_worder.rb
104
- - lib/lingo/attendee/text_reader.rb
105
- - lib/lingo/attendee/synonymer.rb
106
- - lib/lingo/attendee/word_searcher.rb
107
- - lib/lingo/attendee/dehyphenizer.rb
108
- - lib/lingo/attendee/sequencer.rb
109
- - lib/lingo/attendee/debugger.rb
110
- - lib/lingo/attendee/text_writer.rb
111
- - lib/lingo/attendee/stemmer.rb
112
- - lib/lingo/attendee/tokenizer.rb
113
- - lib/lingo/attendee/abbreviator.rb
114
- - lib/lingo/attendee/decomposer.rb
115
- - lib/lingo/attendee/formatter.rb
116
- - lib/lingo/database/hash_store.rb
117
- - lib/lingo/database/show_progress.rb
118
- - lib/lingo/database/sdbm_store.rb
139
+ - lib/lingo/language/char.rb
140
+ - lib/lingo/language/lexical.rb
141
+ - lib/lingo/language/token.rb
142
+ - lib/lingo/language/grammar.rb
143
+ - lib/lingo/language/dictionary.rb
144
+ - lib/lingo/language/word_form.rb
145
+ - lib/lingo/error.rb
119
146
  - lib/lingo/database/source.rb
120
- - lib/lingo/database/crypter.rb
147
+ - lib/lingo/database/source/key_value.rb
121
148
  - lib/lingo/database/source/multi_value.rb
149
+ - lib/lingo/database/source/single_word.rb
122
150
  - lib/lingo/database/source/word_class.rb
123
- - lib/lingo/database/source/key_value.rb
124
151
  - lib/lingo/database/source/multi_key.rb
125
- - lib/lingo/database/source/single_word.rb
126
- - lib/lingo/database/gdbm_store.rb
152
+ - lib/lingo/database/crypter.rb
153
+ - lib/lingo/database/sdbm_store.rb
127
154
  - lib/lingo/database/libcdb_store.rb
155
+ - lib/lingo/database/hash_store.rb
156
+ - lib/lingo/database/show_progress.rb
157
+ - lib/lingo/database/gdbm_store.rb
128
158
  - lib/lingo/call.rb
159
+ - lib/lingo/attendee/abbreviator.rb
160
+ - lib/lingo/attendee/text_writer.rb
161
+ - lib/lingo/attendee/debugger.rb
162
+ - lib/lingo/attendee/dehyphenizer.rb
163
+ - lib/lingo/attendee/stemmer/porter.rb
164
+ - lib/lingo/attendee/decomposer.rb
165
+ - lib/lingo/attendee/multi_worder.rb
166
+ - lib/lingo/attendee/tokenizer.rb
167
+ - lib/lingo/attendee/word_searcher.rb
168
+ - lib/lingo/attendee/variator.rb
169
+ - lib/lingo/attendee/noneword_filter.rb
170
+ - lib/lingo/attendee/sequencer.rb
171
+ - lib/lingo/attendee/object_filter.rb
172
+ - lib/lingo/attendee/stemmer.rb
173
+ - lib/lingo/attendee/synonymer.rb
174
+ - lib/lingo/attendee/text_reader.rb
175
+ - lib/lingo/attendee/formatter.rb
176
+ - lib/lingo/attendee/vector_filter.rb
177
+ - lib/lingo/show_progress.rb
178
+ - lib/lingo/database.rb
179
+ - lib/lingo/language.rb
129
180
  - lib/lingo/attendee.rb
130
- - lib/lingo/version.rb
131
- - lib/lingo/ctl.rb
132
- - lib/lingo/cli.rb
133
- - lib/lingo/core_ext.rb
181
+ - lib/lingo/debug.rb
182
+ - lib/lingo/app.rb
134
183
  - lib/lingo/buffered_attendee.rb
135
- - lib/lingo/agenda_item.rb
136
- - lib/lingo/cachable.rb
137
- - lib/lingo/language.rb
138
- - lib/lingo/error.rb
139
- - lib/lingo/reportable.rb
140
- - bin/lingoctl
184
+ - lib/lingo.rb
185
+ - bin/lingosrv
141
186
  - bin/lingo
187
+ - bin/lingoctl
188
+ - bin/lingoweb
142
189
  - lingo.rb
143
190
  - lingo.cfg
144
191
  - lingo-call.cfg
145
192
  - lir.cfg
146
193
  - de.lang
147
194
  - en.lang
195
+ - ru.lang
148
196
  - de/lingo-dic.txt
149
197
  - de/lingo-abk.txt
150
- - de/lingo-syn.txt
151
198
  - de/lingo-mul.txt
199
+ - de/lingo-syn.txt
152
200
  - de/user-dic.txt
201
+ - de/test_syn2.txt
202
+ - de/test_mul2.txt
203
+ - de/test_mul.txt
153
204
  - de/test_syn.txt
154
205
  - de/test_dic.txt
155
- - de/test_syn2.txt
156
206
  - de/test_singleword.txt
157
- - de/test_mul.txt
158
- - de/test_mul2.txt
159
207
  - en/lingo-dic.txt
160
- - en/lingo-syn.txt
161
208
  - en/lingo-mul.txt
209
+ - en/lingo-wdn.txt
210
+ - en/lingo-syn.txt
211
+ - en/lingo-irr.txt
162
212
  - en/user-dic.txt
213
+ - ru/lingo-dic.txt
214
+ - ru/lingo-mul.txt
215
+ - ru/lingo-syn.txt
163
216
  - txt/artikel.txt
164
217
  - txt/artikel-en.txt
218
+ - txt/artikel-ru.txt
165
219
  - txt/lir.txt
166
- - ChangeLog
220
+ - lib/lingo/srv/lingosrv.cfg
221
+ - lib/lingo/srv/config.ru
222
+ - lib/lingo/web/lingoweb.cfg
223
+ - lib/lingo/web/public/lingo.png
224
+ - lib/lingo/web/public/lingoweb.css
225
+ - lib/lingo/web/views/index.erb
226
+ - lib/lingo/web/config.ru
167
227
  - COPYING
168
- - README
228
+ - ChangeLog
169
229
  - Rakefile
170
- - TODO
230
+ - README
171
231
  - spec/spec_helper.rb
172
232
  - .rspec
173
233
  - test/ref/artikel.ven
174
234
  - test/ref/lir.mul
175
- - test/ref/lir.vec
176
- - test/ref/artikel.vec
177
- - test/ref/lir.syn
235
+ - test/ref/lir.seq
178
236
  - test/ref/artikel.mul
179
- - test/ref/artikel.syn
237
+ - test/ref/lir.syn
238
+ - test/ref/artikel.ver
180
239
  - test/ref/artikel.seq
181
240
  - test/ref/artikel.non
182
241
  - test/ref/lir.non
183
- - test/ref/lir.seq
184
- - test/ref/artikel.ver
185
- - test/ts_language.rb
242
+ - test/ref/artikel.vec
243
+ - test/ref/lir.vec
244
+ - test/ref/artikel.syn
186
245
  - test/lir2.txt
246
+ - test/ts_database.rb
247
+ - test/test_helper.rb
248
+ - test/attendee/ts_object_filter.rb
249
+ - test/attendee/ts_vector_filter.rb
250
+ - test/attendee/ts_synonymer.rb
251
+ - test/attendee/ts_decomposer.rb
252
+ - test/attendee/ts_stemmer.rb
187
253
  - test/attendee/ts_noneword_filter.rb
188
254
  - test/attendee/ts_text_writer.rb
189
255
  - test/attendee/ts_sequencer.rb
190
- - test/attendee/ts_object_filter.rb
256
+ - test/attendee/ts_tokenizer.rb
191
257
  - test/attendee/ts_text_reader.rb
192
258
  - test/attendee/ts_multi_worder.rb
193
259
  - test/attendee/ts_variator.rb
194
- - test/attendee/ts_decomposer.rb
195
260
  - test/attendee/ts_abbreviator.rb
196
- - test/attendee/ts_stemmer.rb
197
- - test/attendee/ts_tokenizer.rb
198
- - test/attendee/ts_vector_filter.rb
199
261
  - test/attendee/ts_word_searcher.rb
200
- - test/attendee/ts_synonymer.rb
201
262
  - test/lir.vec
202
- - test/test_helper.rb
203
- - test/lir.txt
204
263
  - test/mul.txt
205
- - test/ts_database.rb
264
+ - test/lir.txt
265
+ - test/ts_language.rb
206
266
  homepage: http://lex-lingo.de
207
267
  licenses: []
208
268
  post_install_message:
@@ -212,7 +272,7 @@ rdoc_options:
212
272
  - --line-numbers
213
273
  - --all
214
274
  - --title
215
- - lingo Application documentation (v1.8.2)
275
+ - lingo Application documentation (v1.8.3)
216
276
  - --main
217
277
  - README
218
278
  require_paths:
@@ -222,7 +282,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
222
282
  requirements:
223
283
  - - ! '>='
224
284
  - !ruby/object:Gem::Version
225
- version: '1.9'
285
+ version: 1.9.2
226
286
  required_rubygems_version: !ruby/object:Gem::Requirement
227
287
  none: false
228
288
  requirements:
@@ -231,7 +291,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
231
291
  version: '0'
232
292
  requirements: []
233
293
  rubyforge_project:
234
- rubygems_version: 1.8.17
294
+ rubygems_version: 1.8.24
235
295
  signing_key:
236
296
  specification_version: 3
237
297
  summary: The full-featured automatic indexing system
data/TODO DELETED
@@ -1,23 +0,0 @@
1
- = ToDo list for Lingo
2
-
3
- * Configuration parameter validation.
4
- * Replace regex-based tokenizer with a (Racc/Ragel/ANTLR-based?) lexer.
5
- * Update and translate old documentation.
6
- * Allow for handling of documents in various encodings, not just the one the
7
- dictionaries are encoded in.
8
- * Provide automatic encoding detection.
9
- * Provide automatic language detection (as fine-grained as possible).
10
- * Make lingo run faster!? (benchmark - profile - optimize)
11
- * Provide an easy-to-use Lingo API -- just 'require "lingo"' and go for it!
12
- * In addition to that, provide sensible string extensions: String#tokenize,
13
- String#lemmatize, ...
14
- * Provide a DSL for configuration -- in addition to, or instead of, the current
15
- YAML format.
16
- * Make sure the Crypter is sufficiently secure.
17
- * Use RSpec for testing.
18
- * Make Lingo capable to use multiple cores or even machines to boost performance
19
- by connecting Attendees through sockets and use separate processes.
20
-
21
- NOTE: New code *should* meet the guidelines outlined in the
22
- RubyStyleGuide[https://github.com/bbatsov/ruby-style-guide],
23
- existing code will be adjusted along the way.