lingo 1.8.4.2 → 1.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +413 -325
  3. data/README +380 -131
  4. data/Rakefile +19 -21
  5. data/de/lingo-abk.txt +15 -17
  6. data/de/lingo-dic.txt +20210 -20659
  7. data/de/lingo-mul.txt +5 -13
  8. data/de/lingo-syn.txt +5 -8
  9. data/de/test_dic.txt +2 -0
  10. data/de/test_gen.txt +8 -0
  11. data/de/{test_mul2.txt → test_mu2.txt} +0 -0
  12. data/de/{test_singleword.txt → test_sgw.txt} +0 -0
  13. data/de/user-dic.txt +5 -7
  14. data/de.lang +64 -49
  15. data/en/lingo-dic.txt +6398 -6404
  16. data/en/lingo-irr.txt +2 -3
  17. data/en/lingo-mul.txt +6 -7
  18. data/en/lingo-wdn.txt +881 -1762
  19. data/en/user-dic.txt +2 -5
  20. data/en.lang +39 -39
  21. data/lib/lingo/app.rb +10 -6
  22. data/lib/lingo/attendee/abbreviator.rb +1 -0
  23. data/lib/lingo/attendee/decomposer.rb +2 -1
  24. data/lib/lingo/attendee/multi_worder.rb +5 -6
  25. data/lib/lingo/attendee/stemmer.rb +1 -1
  26. data/lib/lingo/attendee/synonymer.rb +4 -2
  27. data/lib/lingo/attendee/text_reader.rb +77 -57
  28. data/lib/lingo/attendee/text_writer.rb +1 -1
  29. data/lib/lingo/attendee/tokenizer.rb +101 -50
  30. data/lib/lingo/attendee/variator.rb +2 -1
  31. data/lib/lingo/attendee/vector_filter.rb +28 -6
  32. data/lib/lingo/attendee/word_searcher.rb +2 -1
  33. data/lib/lingo/attendee.rb +8 -4
  34. data/lib/lingo/call.rb +7 -3
  35. data/lib/lingo/cli.rb +8 -16
  36. data/lib/lingo/config.rb +11 -6
  37. data/lib/lingo/ctl.rb +54 -3
  38. data/lib/lingo/database/crypter.rb +8 -14
  39. data/lib/lingo/database/hash_store.rb +1 -1
  40. data/lib/lingo/database/{show_progress.rb → progress.rb} +7 -8
  41. data/lib/lingo/database/source/key_value.rb +6 -5
  42. data/lib/lingo/database/source/multi_key.rb +5 -2
  43. data/lib/lingo/database/source/multi_value.rb +6 -4
  44. data/lib/lingo/database/source/single_word.rb +2 -3
  45. data/lib/lingo/database/source/word_class.rb +24 -5
  46. data/lib/lingo/database/source.rb +5 -3
  47. data/lib/lingo/database.rb +102 -41
  48. data/lib/lingo/error.rb +24 -2
  49. data/lib/lingo/language/dictionary.rb +26 -54
  50. data/lib/lingo/language/grammar.rb +19 -23
  51. data/lib/lingo/language/lexical.rb +5 -1
  52. data/lib/lingo/language/lexical_hash.rb +7 -12
  53. data/lib/lingo/language/token.rb +10 -1
  54. data/lib/lingo/language/word.rb +35 -23
  55. data/lib/lingo/language/word_form.rb +5 -4
  56. data/lib/lingo/{show_progress.rb → progress.rb} +43 -30
  57. data/lib/lingo/srv/lingosrv.cfg +1 -1
  58. data/lib/lingo/srv/public/.gitkeep +0 -0
  59. data/lib/lingo/srv.rb +11 -6
  60. data/lib/lingo/version.rb +2 -2
  61. data/lib/lingo/web/lingoweb.cfg +1 -1
  62. data/lib/lingo/web/views/index.erb +4 -4
  63. data/lib/lingo/web.rb +4 -6
  64. data/lib/lingo.rb +4 -12
  65. data/lingo.cfg +1 -1
  66. data/lir.cfg +1 -1
  67. data/ru/lingo-dic.txt +33473 -2113
  68. data/ru/lingo-mul.txt +8430 -1913
  69. data/ru/lingo-syn.txt +1634 -0
  70. data/ru/user-dic.txt +6 -0
  71. data/ru.lang +49 -47
  72. data/spec/spec_helper.rb +4 -0
  73. data/test/attendee/ts_decomposer.rb +2 -2
  74. data/test/attendee/ts_synonymer.rb +3 -3
  75. data/test/attendee/ts_tokenizer.rb +215 -2
  76. data/test/attendee/ts_variator.rb +2 -2
  77. data/test/attendee/ts_word_searcher.rb +10 -6
  78. data/test/ref/artikel.seq +2 -2
  79. data/test/ref/artikel.vec +5 -5
  80. data/test/ref/artikel.ven +11 -11
  81. data/test/ref/artikel.ver +11 -11
  82. data/test/ref/lir.seq +13 -13
  83. data/test/ref/lir.vec +31 -31
  84. data/test/test_helper.rb +19 -5
  85. data/test/ts_database.rb +206 -77
  86. data/test/ts_language.rb +86 -26
  87. metadata +93 -49
  88. data/.rspec +0 -1
  89. data/de/test_syn2.txt +0 -1
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lingo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.4.2
4
+ version: 1.8.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Vorhauer
@@ -9,112 +9,112 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-06-18 00:00:00.000000000 Z
12
+ date: 2014-10-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: highline
15
+ name: cyclops
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '0.0'
18
21
  - - ">="
19
22
  - !ruby/object:Gem::Version
20
- version: '0'
23
+ version: 0.0.4
21
24
  type: :runtime
22
25
  prerelease: false
23
26
  version_requirements: !ruby/object:Gem::Requirement
24
27
  requirements:
28
+ - - "~>"
29
+ - !ruby/object:Gem::Version
30
+ version: '0.0'
25
31
  - - ">="
26
32
  - !ruby/object:Gem::Version
27
- version: '0'
33
+ version: 0.0.4
28
34
  - !ruby/object:Gem::Dependency
29
- name: ruby-nuggets
35
+ name: nuggets
30
36
  requirement: !ruby/object:Gem::Requirement
31
37
  requirements:
32
- - - ">="
33
- - !ruby/object:Gem::Version
34
- version: 0.9.2
35
- - - "<="
38
+ - - "~>"
36
39
  - !ruby/object:Gem::Version
37
- version: 0.9.7
40
+ version: '1.0'
38
41
  type: :runtime
39
42
  prerelease: false
40
43
  version_requirements: !ruby/object:Gem::Requirement
41
44
  requirements:
42
- - - ">="
43
- - !ruby/object:Gem::Version
44
- version: 0.9.2
45
- - - "<="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.9.7
47
+ version: '1.0'
48
48
  - !ruby/object:Gem::Dependency
49
- name: sinatra
49
+ name: rubyzip
50
50
  requirement: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '1.1'
55
55
  type: :runtime
56
56
  prerelease: false
57
57
  version_requirements: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ">="
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: '1.1'
62
62
  - !ruby/object:Gem::Dependency
63
- name: sinatra-contrib
63
+ name: sinatra-bells
64
64
  requirement: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ">="
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: '0.0'
69
69
  type: :runtime
70
70
  prerelease: false
71
71
  version_requirements: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: '0.0'
76
76
  - !ruby/object:Gem::Dependency
77
77
  name: unicode
78
78
  requirement: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: '0.4'
83
83
  type: :runtime
84
84
  prerelease: false
85
85
  version_requirements: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ">="
87
+ - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '0'
89
+ version: '0.4'
90
90
  - !ruby/object:Gem::Dependency
91
91
  name: diff-lcs
92
92
  requirement: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ">="
94
+ - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: 1.1.3
96
+ version: '1.2'
97
97
  type: :development
98
98
  prerelease: false
99
99
  version_requirements: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ">="
101
+ - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 1.1.3
103
+ version: '1.2'
104
104
  - !ruby/object:Gem::Dependency
105
105
  name: open4
106
106
  requirement: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ">="
108
+ - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '0'
110
+ version: '1.3'
111
111
  type: :development
112
112
  prerelease: false
113
113
  version_requirements: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - ">="
115
+ - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: '0'
117
+ version: '1.3'
118
118
  - !ruby/object:Gem::Dependency
119
119
  name: hen
120
120
  requirement: !ruby/object:Gem::Requirement
@@ -181,7 +181,6 @@ extra_rdoc_files:
181
181
  - COPYING
182
182
  - ChangeLog
183
183
  files:
184
- - ".rspec"
185
184
  - COPYING
186
185
  - ChangeLog
187
186
  - README
@@ -196,11 +195,11 @@ files:
196
195
  - de/lingo-mul.txt
197
196
  - de/lingo-syn.txt
198
197
  - de/test_dic.txt
198
+ - de/test_gen.txt
199
+ - de/test_mu2.txt
199
200
  - de/test_mul.txt
200
- - de/test_mul2.txt
201
- - de/test_singleword.txt
201
+ - de/test_sgw.txt
202
202
  - de/test_syn.txt
203
- - de/test_syn2.txt
204
203
  - de/user-dic.txt
205
204
  - en.lang
206
205
  - en/lingo-dic.txt
@@ -241,8 +240,8 @@ files:
241
240
  - lib/lingo/database/gdbm_store.rb
242
241
  - lib/lingo/database/hash_store.rb
243
242
  - lib/lingo/database/libcdb_store.rb
243
+ - lib/lingo/database/progress.rb
244
244
  - lib/lingo/database/sdbm_store.rb
245
- - lib/lingo/database/show_progress.rb
246
245
  - lib/lingo/database/source.rb
247
246
  - lib/lingo/database/source/key_value.rb
248
247
  - lib/lingo/database/source/multi_key.rb
@@ -260,10 +259,11 @@ files:
260
259
  - lib/lingo/language/token.rb
261
260
  - lib/lingo/language/word.rb
262
261
  - lib/lingo/language/word_form.rb
263
- - lib/lingo/show_progress.rb
262
+ - lib/lingo/progress.rb
264
263
  - lib/lingo/srv.rb
265
264
  - lib/lingo/srv/config.ru
266
265
  - lib/lingo/srv/lingosrv.cfg
266
+ - lib/lingo/srv/public/.gitkeep
267
267
  - lib/lingo/version.rb
268
268
  - lib/lingo/web.rb
269
269
  - lib/lingo/web/config.ru
@@ -279,6 +279,7 @@ files:
279
279
  - ru/lingo-dic.txt
280
280
  - ru/lingo-mul.txt
281
281
  - ru/lingo-syn.txt
282
+ - ru/user-dic.txt
282
283
  - spec/spec_helper.rb
283
284
  - test/attendee/ts_abbreviator.rb
284
285
  - test/attendee/ts_decomposer.rb
@@ -319,12 +320,55 @@ files:
319
320
  - txt/lir.txt
320
321
  homepage: http://lex-lingo.de
321
322
  licenses:
322
- - AGPL
323
+ - AGPL-3.0
323
324
  metadata: {}
324
- post_install_message:
325
+ post_install_message: |2+
326
+
327
+ lingo-1.8.5 [2014-10-02]:
328
+
329
+ * Dictionary values (projections) are no longer sorted; hence, order of
330
+ definition affects processing.
331
+ * Lexicals in Lingo::Language::Word are no longer sorted; in particular,
332
+ compound parts keep their original order.
333
+ * Lexicals in Lingo::Language::Word are no longer cleaned from duplicates.
334
+ * Compiled dictionaries are updated whenever the Lingo version or their
335
+ configuration changes, not only when the source file's size or modification
336
+ time changes.
337
+ * Lingo::Attendee::Synonymer learned <tt>compound-parts</tt> option to also
338
+ generate synonyms for compound parts when set to +true+.
339
+ * Lingo::Attendee::TextReader learned better PDF-to-text conversion using the
340
+ +pdftotext+ command; specify <tt>filter: pdftotext</tt> in the config.
341
+ * Lingo::Attendee::VectorFilter learned +dict+ option to print words in
342
+ dictionary format (viz. Lingo::Database::Source::WordClass).
343
+ * Lingo::Attendee::VectorFilter learned +preamble+ option to print current
344
+ configuration to the beginning of the log file (<tt>debug: 'true'</tt>);
345
+ set <tt>preamble: false</tt> to disable.
346
+ * Multiword dictionaries compiled from base forms can now generate inflected
347
+ adjectives based on the gender of the head noun; set <tt>inflect: true</tt>
348
+ in the dictionary config.
349
+ * Lingo::Database::Source::WordClass supports gender information being encoded
350
+ in the dictionary as well as shorthand notation for multiple word
351
+ classes/genders.
352
+ * Lingo::Database::Source::WordClass supports compounds being encoded in the
353
+ dictionary (appending <tt>+</tt> to their parts' word classes is
354
+ recommended).
355
+ * Lingo::Database::Source removes leading and trailing whitespace from
356
+ dictionary lines.
357
+ * Lingo::Database::Crypter uses OpenSSL to encrypt/decrypt dictionaries.
358
+ Note: Can't decrypt dictionaries encrypted with the old scheme anymore.
359
+ * Lingo::Attendee::Tokenizer learned subset of MediaWiki syntax.
360
+ * Eliminated pathological behaviour of the +URLS+ rule in
361
+ Lingo::Attendee::Tokenizer.
362
+ * Fixed regression introduced in 1.8.2 where <tt>combine: all</tt> would no
363
+ longer work in Lingo::Attendee::MultiWorder.
364
+ * Updated and extended Russian dictionaries. (Yulia Dorokhova, Thomas Müller)
365
+ * +lingoctl+ no longer overwrites existing files without confirmation.
366
+ * +lingoctl+ learned +archive+ command.
367
+ * Dictionary cleanup.
368
+
325
369
  rdoc_options:
326
370
  - "--title"
327
- - lingo Application documentation (v1.8.4.2)
371
+ - lingo Application documentation (v1.8.5)
328
372
  - "--charset"
329
373
  - UTF-8
330
374
  - "--line-numbers"
@@ -337,7 +381,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
337
381
  requirements:
338
382
  - - ">="
339
383
  - !ruby/object:Gem::Version
340
- version: 1.9.2
384
+ version: 1.9.3
341
385
  required_rubygems_version: !ruby/object:Gem::Requirement
342
386
  requirements:
343
387
  - - ">="
@@ -345,7 +389,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
345
389
  version: '0'
346
390
  requirements: []
347
391
  rubyforge_project:
348
- rubygems_version: 2.3.0
392
+ rubygems_version: 2.4.2
349
393
  signing_key:
350
394
  specification_version: 4
351
395
  summary: The full-featured automatic indexing system
data/.rspec DELETED
@@ -1 +0,0 @@
1
- --colour
data/de/test_syn2.txt DELETED
@@ -1 +0,0 @@
1
- Albert Einstein;Einstein, Albert;Albert Ainshtain;<Prof> Einstein