lingo 1.8.1 → 1.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. data/ChangeLog +23 -5
  2. data/README +1 -1
  3. data/Rakefile +5 -7
  4. data/TODO +2 -0
  5. data/bin/lingo +5 -1
  6. data/de.lang +1 -1
  7. data/en/lingo-syn.txt +0 -0
  8. data/en.lang +2 -1
  9. data/lib/lingo/attendee/abbreviator.rb +8 -9
  10. data/lib/lingo/attendee/debugger.rb +5 -4
  11. data/lib/lingo/attendee/decomposer.rb +8 -3
  12. data/lib/lingo/attendee/dehyphenizer.rb +19 -63
  13. data/lib/lingo/attendee/formatter.rb +1 -1
  14. data/lib/lingo/attendee/multi_worder.rb +67 -155
  15. data/lib/lingo/attendee/noneword_filter.rb +16 -9
  16. data/lib/lingo/attendee/object_filter.rb +1 -1
  17. data/lib/lingo/attendee/sequencer.rb +32 -63
  18. data/lib/lingo/attendee/stemmer/porter.rb +343 -0
  19. data/{info/gpl-hdr.txt → lib/lingo/attendee/stemmer.rb} +33 -0
  20. data/lib/lingo/attendee/synonymer.rb +10 -9
  21. data/lib/lingo/attendee/text_reader.rb +102 -76
  22. data/lib/lingo/attendee/text_writer.rb +23 -26
  23. data/lib/lingo/attendee/tokenizer.rb +13 -27
  24. data/lib/lingo/attendee/variator.rb +26 -66
  25. data/lib/lingo/attendee/vector_filter.rb +42 -43
  26. data/lib/lingo/attendee/word_searcher.rb +6 -7
  27. data/lib/lingo/attendee.rb +25 -7
  28. data/lib/lingo/buffered_attendee.rb +36 -10
  29. data/lib/lingo/cachable.rb +8 -8
  30. data/lib/lingo/config.rb +5 -6
  31. data/lib/lingo/ctl.rb +2 -3
  32. data/lib/lingo/database/crypter.rb +9 -26
  33. data/lib/lingo/database/gdbm_store.rb +3 -5
  34. data/lib/lingo/database/libcdb_store.rb +4 -6
  35. data/lib/lingo/database/sdbm_store.rb +11 -6
  36. data/lib/lingo/database/show_progress.rb +3 -43
  37. data/lib/lingo/database/source/key_value.rb +2 -6
  38. data/lib/lingo/database/source/multi_key.rb +3 -5
  39. data/lib/lingo/database/source/multi_value.rb +2 -6
  40. data/lib/lingo/database/source/single_word.rb +4 -6
  41. data/lib/lingo/database/source/word_class.rb +4 -10
  42. data/lib/lingo/database/source.rb +20 -18
  43. data/lib/lingo/database.rb +84 -59
  44. data/lib/lingo/error.rb +57 -1
  45. data/lib/lingo/language/dictionary.rb +21 -18
  46. data/lib/lingo/language/grammar.rb +40 -49
  47. data/lib/lingo/language/lexical.rb +6 -6
  48. data/lib/lingo/language/lexical_hash.rb +6 -0
  49. data/lib/lingo/language/word.rb +32 -15
  50. data/lib/lingo/language/word_form.rb +1 -1
  51. data/lib/lingo/language.rb +14 -25
  52. data/lib/lingo/reportable.rb +12 -10
  53. data/lib/lingo/show_progress.rb +81 -0
  54. data/lib/lingo/version.rb +1 -1
  55. data/lib/lingo.rb +63 -24
  56. data/lingo-call.cfg +6 -10
  57. data/lingo.cfg +60 -44
  58. data/lir.cfg +42 -41
  59. data/test/attendee/ts_abbreviator.rb +3 -5
  60. data/test/attendee/ts_decomposer.rb +3 -5
  61. data/test/attendee/ts_multi_worder.rb +87 -145
  62. data/test/attendee/ts_noneword_filter.rb +5 -3
  63. data/test/attendee/ts_object_filter.rb +5 -3
  64. data/test/attendee/ts_sequencer.rb +3 -5
  65. data/test/attendee/ts_stemmer.rb +309 -0
  66. data/test/attendee/ts_synonymer.rb +15 -11
  67. data/test/attendee/ts_text_reader.rb +12 -15
  68. data/test/attendee/ts_text_writer.rb +24 -29
  69. data/test/attendee/ts_tokenizer.rb +9 -7
  70. data/test/attendee/ts_variator.rb +4 -4
  71. data/test/attendee/ts_vector_filter.rb +24 -16
  72. data/test/attendee/ts_word_searcher.rb +20 -36
  73. data/test/{lir.csv → lir.vec} +0 -0
  74. data/test/ref/artikel.vec +943 -943
  75. data/test/ref/artikel.ven +943 -943
  76. data/test/ref/lir.non +201 -201
  77. data/test/ref/lir.seq +178 -178
  78. data/test/ref/lir.syn +49 -49
  79. data/test/ref/lir.vec +329 -0
  80. data/test/test_helper.rb +20 -36
  81. data/test/ts_database.rb +10 -10
  82. data/test/ts_language.rb +279 -319
  83. metadata +93 -104
  84. data/info/Objekte.png +0 -0
  85. data/info/Typen.png +0 -0
  86. data/info/database.png +0 -0
  87. data/info/db_small.png +0 -0
  88. data/info/download.png +0 -0
  89. data/info/kerze.png +0 -0
  90. data/info/language.png +0 -0
  91. data/info/lingo.png +0 -0
  92. data/info/logo.png +0 -0
  93. data/info/meeting.png +0 -0
  94. data/info/types.png +0 -0
  95. data/lingo-all.cfg +0 -89
  96. data/porter/stem.cfg +0 -311
  97. data/porter/stem.rb +0 -150
  98. data/test/ref/lir.csv +0 -329
  99. data/test.cfg +0 -79
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lingo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.1
4
+ version: 1.8.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,11 +10,11 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-02-19 00:00:00.000000000 Z
13
+ date: 2012-04-19 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ruby-nuggets
17
- requirement: &10045620 !ruby/object:Gem::Requirement
17
+ requirement: &12303700 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: 0.8.5
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *10045620
25
+ version_requirements: *12303700
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: unicode
28
- requirement: &10045140 !ruby/object:Gem::Requirement
28
+ requirement: &12302600 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *10045140
36
+ version_requirements: *12302600
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: highline
39
- requirement: &10044660 !ruby/object:Gem::Requirement
39
+ requirement: &12317680 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: '0'
45
45
  type: :runtime
46
46
  prerelease: false
47
- version_requirements: *10044660
47
+ version_requirements: *12317680
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: diff-lcs
50
- requirement: &10044140 !ruby/object:Gem::Requirement
50
+ requirement: &12315360 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ! '>='
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 1.1.3
56
56
  type: :development
57
57
  prerelease: false
58
- version_requirements: *10044140
58
+ version_requirements: *12315360
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: open4
61
- requirement: &10043720 !ruby/object:Gem::Requirement
61
+ requirement: &12314340 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ! '>='
@@ -66,7 +66,7 @@ dependencies:
66
66
  version: '0'
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *10043720
69
+ version_requirements: *12314340
70
70
  description: ! "Lingo is an open source indexing system for research and teachings.\nThe
71
71
  main functions of Lingo are:\n\n* identification of (i.e. reduction to) basic word
72
72
  form by means of\n dictionaries and suffix lists\n* algorithmic decomposition\n*
@@ -76,144 +76,133 @@ email:
76
76
  - lingo@vorhauer.de
77
77
  - jens.wille@uni-koeln.de
78
78
  executables:
79
- - lingo
80
79
  - lingoctl
80
+ - lingo
81
81
  extensions: []
82
82
  extra_rdoc_files:
83
83
  - README
84
84
  - COPYING
85
85
  - ChangeLog
86
86
  files:
87
- - lib/lingo/ctl.rb
87
+ - lib/lingo.rb
88
+ - lib/lingo/show_progress.rb
89
+ - lib/lingo/config.rb
88
90
  - lib/lingo/database.rb
89
- - lib/lingo/error.rb
90
- - lib/lingo/version.rb
91
- - lib/lingo/database/source.rb
92
- - lib/lingo/database/libcdb_store.rb
93
- - lib/lingo/database/sdbm_store.rb
94
- - lib/lingo/database/show_progress.rb
95
- - lib/lingo/database/crypter.rb
96
- - lib/lingo/database/source/multi_key.rb
97
- - lib/lingo/database/source/key_value.rb
98
- - lib/lingo/database/source/single_word.rb
99
- - lib/lingo/database/source/word_class.rb
100
- - lib/lingo/database/source/multi_value.rb
101
- - lib/lingo/database/gdbm_store.rb
102
- - lib/lingo/database/hash_store.rb
103
- - lib/lingo/cli.rb
104
- - lib/lingo/cachable.rb
105
- - lib/lingo/attendee/variator.rb
106
- - lib/lingo/attendee/debugger.rb
91
+ - lib/lingo/language/dictionary.rb
92
+ - lib/lingo/language/word_form.rb
93
+ - lib/lingo/language/lexical.rb
94
+ - lib/lingo/language/grammar.rb
95
+ - lib/lingo/language/lexical_hash.rb
96
+ - lib/lingo/language/token.rb
97
+ - lib/lingo/language/word.rb
98
+ - lib/lingo/attendee/stemmer/porter.rb
99
+ - lib/lingo/attendee/vector_filter.rb
100
+ - lib/lingo/attendee/noneword_filter.rb
107
101
  - lib/lingo/attendee/object_filter.rb
108
- - lib/lingo/attendee/synonymer.rb
109
- - lib/lingo/attendee/text_writer.rb
102
+ - lib/lingo/attendee/variator.rb
110
103
  - lib/lingo/attendee/multi_worder.rb
111
104
  - lib/lingo/attendee/text_reader.rb
105
+ - lib/lingo/attendee/synonymer.rb
106
+ - lib/lingo/attendee/word_searcher.rb
112
107
  - lib/lingo/attendee/dehyphenizer.rb
108
+ - lib/lingo/attendee/sequencer.rb
109
+ - lib/lingo/attendee/debugger.rb
110
+ - lib/lingo/attendee/text_writer.rb
111
+ - lib/lingo/attendee/stemmer.rb
113
112
  - lib/lingo/attendee/tokenizer.rb
114
113
  - lib/lingo/attendee/abbreviator.rb
115
- - lib/lingo/attendee/formatter.rb
116
- - lib/lingo/attendee/noneword_filter.rb
117
- - lib/lingo/attendee/sequencer.rb
118
114
  - lib/lingo/attendee/decomposer.rb
119
- - lib/lingo/attendee/word_searcher.rb
120
- - lib/lingo/attendee/vector_filter.rb
121
- - lib/lingo/config.rb
115
+ - lib/lingo/attendee/formatter.rb
116
+ - lib/lingo/database/hash_store.rb
117
+ - lib/lingo/database/show_progress.rb
118
+ - lib/lingo/database/sdbm_store.rb
119
+ - lib/lingo/database/source.rb
120
+ - lib/lingo/database/crypter.rb
121
+ - lib/lingo/database/source/multi_value.rb
122
+ - lib/lingo/database/source/word_class.rb
123
+ - lib/lingo/database/source/key_value.rb
124
+ - lib/lingo/database/source/multi_key.rb
125
+ - lib/lingo/database/source/single_word.rb
126
+ - lib/lingo/database/gdbm_store.rb
127
+ - lib/lingo/database/libcdb_store.rb
128
+ - lib/lingo/call.rb
129
+ - lib/lingo/attendee.rb
130
+ - lib/lingo/version.rb
131
+ - lib/lingo/ctl.rb
132
+ - lib/lingo/cli.rb
122
133
  - lib/lingo/core_ext.rb
123
- - lib/lingo/agenda_item.rb
124
134
  - lib/lingo/buffered_attendee.rb
125
- - lib/lingo/reportable.rb
135
+ - lib/lingo/agenda_item.rb
136
+ - lib/lingo/cachable.rb
126
137
  - lib/lingo/language.rb
127
- - lib/lingo/language/dictionary.rb
128
- - lib/lingo/language/word.rb
129
- - lib/lingo/language/lexical.rb
130
- - lib/lingo/language/word_form.rb
131
- - lib/lingo/language/token.rb
132
- - lib/lingo/language/grammar.rb
133
- - lib/lingo/language/lexical_hash.rb
134
- - lib/lingo/attendee.rb
135
- - lib/lingo/call.rb
136
- - lib/lingo.rb
137
- - bin/lingo
138
+ - lib/lingo/error.rb
139
+ - lib/lingo/reportable.rb
138
140
  - bin/lingoctl
141
+ - bin/lingo
139
142
  - lingo.rb
140
143
  - lingo.cfg
141
- - lingo-all.cfg
142
144
  - lingo-call.cfg
145
+ - lir.cfg
143
146
  - de.lang
144
147
  - en.lang
145
- - de/lingo-syn.txt
146
- - de/lingo-abk.txt
147
148
  - de/lingo-dic.txt
149
+ - de/lingo-abk.txt
150
+ - de/lingo-syn.txt
148
151
  - de/lingo-mul.txt
149
152
  - de/user-dic.txt
153
+ - de/test_syn.txt
154
+ - de/test_dic.txt
155
+ - de/test_syn2.txt
156
+ - de/test_singleword.txt
157
+ - de/test_mul.txt
158
+ - de/test_mul2.txt
150
159
  - en/lingo-dic.txt
160
+ - en/lingo-syn.txt
151
161
  - en/lingo-mul.txt
152
162
  - en/user-dic.txt
153
163
  - txt/artikel.txt
154
164
  - txt/artikel-en.txt
155
- - info/gpl-hdr.txt
156
- - info/kerze.png
157
- - info/meeting.png
158
- - info/lingo.png
159
- - info/types.png
160
- - info/logo.png
161
- - info/language.png
162
- - info/Typen.png
163
- - info/Objekte.png
164
- - info/download.png
165
- - info/database.png
166
- - info/db_small.png
167
- - lir.cfg
168
165
  - txt/lir.txt
169
- - porter/stem.rb
170
- - porter/stem.cfg
171
- - test.cfg
172
- - de/test_mul.txt
173
- - de/test_singleword.txt
174
- - de/test_mul2.txt
175
- - de/test_syn.txt
176
- - de/test_dic.txt
177
- - de/test_syn2.txt
178
- - TODO
179
- - README
180
166
  - ChangeLog
181
167
  - COPYING
168
+ - README
182
169
  - Rakefile
170
+ - TODO
183
171
  - spec/spec_helper.rb
184
172
  - .rspec
185
- - test/lir.csv
186
- - test/attendee/ts_abbreviator.rb
187
- - test/attendee/ts_noneword_filter.rb
188
- - test/attendee/ts_word_searcher.rb
189
- - test/attendee/ts_object_filter.rb
190
- - test/attendee/ts_vector_filter.rb
191
- - test/attendee/ts_text_writer.rb
192
- - test/attendee/ts_decomposer.rb
193
- - test/attendee/ts_sequencer.rb
194
- - test/attendee/ts_synonymer.rb
195
- - test/attendee/ts_tokenizer.rb
196
- - test/attendee/ts_variator.rb
197
- - test/attendee/ts_text_reader.rb
198
- - test/attendee/ts_multi_worder.rb
199
- - test/mul.txt
200
- - test/test_helper.rb
201
173
  - test/ref/artikel.ven
202
- - test/ref/lir.csv
203
- - test/ref/artikel.vec
204
174
  - test/ref/lir.mul
205
- - test/ref/artikel.syn
175
+ - test/ref/lir.vec
176
+ - test/ref/artikel.vec
206
177
  - test/ref/lir.syn
207
178
  - test/ref/artikel.mul
179
+ - test/ref/artikel.syn
208
180
  - test/ref/artikel.seq
209
- - test/ref/lir.seq
210
181
  - test/ref/artikel.non
211
- - test/ref/artikel.ver
212
182
  - test/ref/lir.non
183
+ - test/ref/lir.seq
184
+ - test/ref/artikel.ver
185
+ - test/ts_language.rb
213
186
  - test/lir2.txt
214
- - test/ts_database.rb
187
+ - test/attendee/ts_noneword_filter.rb
188
+ - test/attendee/ts_text_writer.rb
189
+ - test/attendee/ts_sequencer.rb
190
+ - test/attendee/ts_object_filter.rb
191
+ - test/attendee/ts_text_reader.rb
192
+ - test/attendee/ts_multi_worder.rb
193
+ - test/attendee/ts_variator.rb
194
+ - test/attendee/ts_decomposer.rb
195
+ - test/attendee/ts_abbreviator.rb
196
+ - test/attendee/ts_stemmer.rb
197
+ - test/attendee/ts_tokenizer.rb
198
+ - test/attendee/ts_vector_filter.rb
199
+ - test/attendee/ts_word_searcher.rb
200
+ - test/attendee/ts_synonymer.rb
201
+ - test/lir.vec
202
+ - test/test_helper.rb
215
203
  - test/lir.txt
216
- - test/ts_language.rb
204
+ - test/mul.txt
205
+ - test/ts_database.rb
217
206
  homepage: http://lex-lingo.de
218
207
  licenses: []
219
208
  post_install_message:
@@ -223,7 +212,7 @@ rdoc_options:
223
212
  - --line-numbers
224
213
  - --all
225
214
  - --title
226
- - lingo Application documentation (v1.8.1)
215
+ - lingo Application documentation (v1.8.2)
227
216
  - --main
228
217
  - README
229
218
  require_paths:
data/info/Objekte.png DELETED
Binary file
data/info/Typen.png DELETED
Binary file
data/info/database.png DELETED
Binary file
data/info/db_small.png DELETED
Binary file
data/info/download.png DELETED
Binary file
data/info/kerze.png DELETED
Binary file
data/info/language.png DELETED
Binary file
data/info/lingo.png DELETED
Binary file
data/info/logo.png DELETED
Binary file
data/info/meeting.png DELETED
Binary file
data/info/types.png DELETED
Binary file
data/lingo-all.cfg DELETED
@@ -1,89 +0,0 @@
1
- #
2
- # Lingo-Konfiguration
3
- #
4
- ---
5
- meeting:
6
-
7
- attendees:
8
-
9
- ########################################
10
- # Text bereitstellen
11
- #
12
-
13
- # Angegebene Datei zeilenweise einlesen und verarbeitet
14
- - text_reader: { files: '$(files)' }
15
-
16
-
17
- ########################################
18
- # Inhalte verarbeiten
19
- #
20
-
21
- # Zeile in einzelnen Sinnbestandteile (Token) zerlegen
22
- - tokenizer: { }
23
-
24
- # Abkürzungen erkennen und auflösen
25
- # - abbreviator: { source: 'sys-abk' }
26
-
27
- # Verbleibende Token im Wörterbuch suchen
28
- - word_searcher: { source: 'sys-dic', mode: 'first' }
29
-
30
- # Schreibweisen variieren und erneut suchen
31
- # - variator: { source: 'sys-dic' }
32
-
33
- # Bindestrichergänzungen rekonstruieren
34
- # - dehyphenizer: { source: 'sys-dic' }
35
-
36
- # Nicht erkannte Wörter auf Kompositum testen
37
- # - decomposer: { source: 'sys-dic' }
38
-
39
- # Mehrwortgruppen im Strom erkennen
40
- # - multi_worder: { stopper: 'PUNC,OTHR', source: 'sys-mul' }
41
-
42
- # Wortsequenzen anhand von Regeln identifizieren
43
- # - sequencer: { stopper: 'PUNC,OTHR' }
44
-
45
- # Relationierungen einfügen
46
- # - synonymer: { skip: '?,t', source: 'sys-syn', out: syn }
47
-
48
-
49
- ########################################
50
- # Datenstrom anzeigen
51
- #
52
- - debugger: { eval: 'true', ceval: 'cmd!="EOL"', prompt: 'lex:) ' }
53
-
54
-
55
- ########################################
56
- # Ergebnisse ausgeben
57
- #
58
-
59
- # Erstelle Datei mit Endung .log für Datenstrom
60
- # - vector_filter: { in: syn, debug: 'true', prompt: 'lex:) ' }
61
- # - text_writer: { ext: log, sep: "\n" }
62
-
63
- # Erstelle Datei mit Endung .non für nicht erkannte Wörter
64
- # - noneword_filter: { in: syn }
65
- # - text_writer: { ext: non, sep: "\n" }
66
-
67
- # Erstelle Datei mit Endung .vec für erkannte Indexterme
68
- # - vector_filter: { in: syn, lexicals: '^[ksavem]$' }
69
- # - text_writer: { ext: vec, sep: "\n" }
70
-
71
- # Erstelle Datei mit Endung .ven für erkannte Indexterme mit absoluter Häufigkeit
72
- # - vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: 'term_abs' }
73
- # - text_writer: { ext: ven, sep: "\n" }
74
-
75
- # Erstelle Datei mit Endung .ver für erkannte Indexterme mit relativer Häufigkeit
76
- # - vector_filter: { in: syn, lexicals: '^[ksavem]$', sort: 'term_rel' }
77
- # - text_writer: { ext: ver, sep: "\n" }
78
-
79
- # Erstelle Datei mit Endung .mul für erkannte Mehrwortgruppen
80
- # - vector_filter: { in: syn, lexicals: m }
81
- # - text_writer: { ext: mul, sep: "\n" }
82
-
83
- # Erstelle Datei mit Endung .seq für erkannte Wortsequenzen
84
- # - vector_filter: { in: syn, lexicals: q, sort: 'term_abs' }
85
- # - text_writer: { ext: seq, sep: "\n" }
86
-
87
- # Erstelle Datei mit Endung .syn für erkannte Synonyme
88
- # - vector_filter: { in: syn, lexicals: y, sort: 'term_abs' }
89
- # - text_writer: { ext: syn, sep: "\n" }