lingo 1.8.6 → 1.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +40 -4
  3. data/README +22 -51
  4. data/Rakefile +3 -17
  5. data/config/lingo.cfg +24 -15
  6. data/config/lir.cfg +25 -16
  7. data/dict/de/test_muh.txt +6 -0
  8. data/dict/en/lingo-dic.txt +2 -3
  9. data/lang/de.lang +10 -9
  10. data/lang/en.lang +1 -1
  11. data/lib/lingo.rb +4 -4
  12. data/lib/lingo/attendee.rb +27 -7
  13. data/lib/lingo/attendee/analysis_filter.rb +81 -0
  14. data/lib/lingo/attendee/debug_filter.rb +42 -0
  15. data/lib/lingo/attendee/debugger.rb +2 -11
  16. data/lib/lingo/attendee/decomposer.rb +6 -3
  17. data/lib/lingo/attendee/formatter.rb +6 -6
  18. data/lib/lingo/attendee/hal_filter.rb +94 -0
  19. data/lib/lingo/attendee/lsi_filter.rb +99 -0
  20. data/lib/lingo/attendee/multi_worder.rb +69 -43
  21. data/lib/lingo/attendee/sequencer.rb +32 -19
  22. data/lib/lingo/attendee/synonymer.rb +2 -2
  23. data/lib/lingo/attendee/text_reader.rb +63 -92
  24. data/lib/lingo/attendee/text_writer.rb +12 -21
  25. data/lib/lingo/attendee/tokenizer.rb +32 -21
  26. data/lib/lingo/attendee/variator.rb +3 -3
  27. data/lib/lingo/attendee/vector_filter.rb +7 -9
  28. data/lib/lingo/attendee/word_searcher.rb +3 -3
  29. data/lib/lingo/buffered_attendee.rb +3 -36
  30. data/lib/lingo/config.rb +1 -1
  31. data/lib/lingo/ctl.rb +7 -155
  32. data/lib/lingo/ctl/analysis.rb +136 -0
  33. data/lib/lingo/ctl/files.rb +86 -0
  34. data/lib/lingo/ctl/other.rb +140 -0
  35. data/lib/lingo/database.rb +64 -60
  36. data/lib/lingo/database/crypter.rb +7 -5
  37. data/lib/lingo/error.rb +5 -4
  38. data/lib/lingo/language.rb +13 -5
  39. data/lib/lingo/language/grammar.rb +13 -7
  40. data/lib/lingo/language/token.rb +6 -0
  41. data/lib/lingo/language/word.rb +23 -36
  42. data/lib/lingo/language/word_form.rb +5 -1
  43. data/lib/lingo/srv.rb +2 -2
  44. data/lib/lingo/text_utils.rb +96 -0
  45. data/lib/lingo/version.rb +1 -1
  46. data/lib/lingo/web/views/index.erb +1 -1
  47. data/test/attendee/ts_decomposer.rb +23 -5
  48. data/test/attendee/ts_multi_worder.rb +66 -0
  49. data/test/attendee/ts_sequencer.rb +28 -4
  50. data/test/attendee/ts_text_reader.rb +20 -0
  51. data/test/attendee/ts_tokenizer.rb +20 -0
  52. data/test/attendee/ts_variator.rb +1 -1
  53. data/test/attendee/ts_word_searcher.rb +39 -3
  54. data/test/lir3.txt +12 -0
  55. data/test/ref/artikel.non +1 -12
  56. data/test/ref/artikel.seq +3 -1
  57. data/test/ref/artikel.vec +1 -0
  58. data/test/ref/artikel.vef +35 -34
  59. data/test/ref/artikel.ven +8 -7
  60. data/test/ref/artikel.ver +34 -33
  61. data/test/ref/artikel.vet +2573 -2563
  62. data/test/ref/lir.non +77 -78
  63. data/test/ref/lir.seq +9 -7
  64. data/test/ref/lir.syn +1 -1
  65. data/test/ref/lir.vec +41 -41
  66. data/test/ref/lir.vef +210 -210
  67. data/test/ref/lir.ven +46 -46
  68. data/test/ref/lir.ver +72 -72
  69. data/test/ref/lir.vet +329 -329
  70. data/test/ts_database.rb +166 -62
  71. data/test/ts_language.rb +23 -23
  72. metadata +53 -34
  73. data/lib/lingo/attendee/dehyphenizer.rb +0 -120
  74. data/lib/lingo/attendee/noneword_filter.rb +0 -115
  75. data/test/attendee/ts_noneword_filter.rb +0 -15
@@ -0,0 +1,6 @@
1
+ albert einstein
2
+ john f kennedy
3
+ a priori
4
+ ableitung nicht ganzzahliger ordnung
5
+ academic learning time in physical education
6
+ juristische person
@@ -19913,7 +19913,7 @@ fodder=fodder #s|v
19913
19913
  foe=foe #s
19914
19914
  foederatus=foederatus #s
19915
19915
  foetal=foetal #a
19916
- fœtid=fœtid #a
19916
+ foetid=foetid #a
19917
19917
  foetidness=foetidness #s
19918
19918
  foetus=foetus #s
19919
19919
  fog=fog #s|v
@@ -53175,7 +53175,6 @@ vedette=vedette #s
53175
53175
  veejay=veejay #s
53176
53176
  veel=veel #v
53177
53177
  veer=veer #s|v
53178
- veg*n=veg*n #s|a
53179
53178
  vega=vega #s
53180
53179
  vegan=vegan #s|a
53181
53180
  veganism=veganism #s
@@ -55392,7 +55391,7 @@ zony=zony #s
55392
55391
  zoo=zoo #s
55393
55392
  zooarchaeology=zooarchaeology #s
55394
55393
  zoobie=zoobie #s
55395
- zoœcium=zoœcium #s
55394
+ zooecium=zooecium #s
55396
55395
  zoogeography=zoogeography #s
55397
55396
  zoolater=zoolater #s
55398
55397
  zoological=zoological #a
@@ -56,12 +56,13 @@ language:
56
56
  usr-dic: { name: de/user-dic.txt, txt-format: WordClass, separator: '=' }
57
57
 
58
58
  # Test dictionaries
59
- tst-dic: { name: de/test_dic.txt, txt-format: WordClass } # TEST: Lesen von zwei Quellen
60
- tst-syn: { name: de/test_syn.txt, txt-format: MultiValue, def-wc: y } # TEST: Mehrere Datenquellen
61
- tst-mul: { name: de/test_mul.txt, use-lex: sys-dic, def-wc: m } # TEST: Mehrere Multiwörterbücher
62
- tst-mu2: { name: de/test_mu2.txt, use-lex: sys-dic, def-wc: m } # TEST: Mehrere Multiwörterbücher
63
- tst-sgw: { name: de/test_sgw.txt, txt-format: SingleWord } # TEST: SingleWord-Format
64
- tst-gen: { name: de/test_gen.txt, txt-format: WordClass } # TEST: Genus
59
+ tst-dic: { name: de/test_dic.txt, txt-format: WordClass }
60
+ tst-syn: { name: de/test_syn.txt, txt-format: MultiValue, def-wc: y }
61
+ tst-mul: { name: de/test_mul.txt, use-lex: sys-dic, def-wc: m }
62
+ tst-mu2: { name: de/test_mu2.txt, use-lex: sys-dic, def-wc: m }
63
+ tst-muh: { name: de/test_muh.txt, txt-format: SingleWord, use-lex: sys-dic, def-wc: m, hyphenate: true }
64
+ tst-sgw: { name: de/test_sgw.txt, txt-format: SingleWord }
65
+ tst-gen: { name: de/test_gen.txt, txt-format: WordClass }
65
66
 
66
67
  compound:
67
68
  min-word-size: '7'
@@ -118,8 +119,8 @@ language:
118
119
  # SPAC = \s+
119
120
  # NUMS = [+-]?(?:\d{4,}|\d{1,3}(?:\.\d{3,3})*)(?:\.|(?:,\d+)?%?)
120
121
  # URLS = (?:www\.|mailto:|(?:news|https?|ftps?)://|\S+?[._]\S+?@\S+?\.)\S+
121
- # ABRV = (?:(?:(?:#{CHAR})+\.)+)(?:#{CHAR})+
122
- # WORD = (?:#{CHAR}|#{DIGIT}|-)+
122
+ # ABRV = (?:(?:(?:CHAR)+\.)+)(?:CHAR)+
123
+ # WORD = ALNUM(?:-*ALNUM)*
123
124
  # PUNC = [!,.:;?¡¿]
124
- # OTHR = ["$#%&'()*+\-/<=>@\[\\\]^_{|}~¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾×÷]
125
+ # OTHR = [-"$#%&'()*+\-/<=>@\[\\\]^_{|}~¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾×÷]
125
126
  # HELP = \S*
@@ -69,7 +69,7 @@ language:
69
69
  # Suffixklasse: s = Substantiv, a = Adjektiv, v = Verb, e = Eigenwort, f = Fugung
70
70
  # Suffixe je Klasse: "<suffix>['/'<ersetzung>][ <suffix>['/'<ersetzung>]]"
71
71
  - [s, 'es s ves/f ves/fe ies/y']
72
- - [a, 'er est r st ier/y iest/y']
72
+ - [a, 'er est r st ier/y iest/y ly al ally']
73
73
  - [v, 'd ed en es ing s ing/e']
74
74
  - [e, 's']
75
75
  - [f, '']
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -61,7 +61,8 @@ class Lingo
61
61
  }
62
62
 
63
63
  # Default encoding
64
- Encoding.default_external = Encoding.default_internal = ENC = 'UTF-8'.freeze
64
+ Encoding.default_external = ENC = 'UTF-8'.freeze
65
+ Encoding.default_internal = ENC unless RUBY_ENGINE == 'jruby'
65
66
 
66
67
  SEP_RE = %r{[; ,|]}
67
68
 
@@ -265,8 +266,7 @@ class Lingo
265
266
  end
266
267
 
267
268
  def invite(list = config['meeting/attendees'])
268
- supplier = Hash.nest { [] }
269
- subscriber = Hash.nest { [] }
269
+ supplier, subscriber = Hash.array, Hash.array
270
270
 
271
271
  last_link, auto_link = '', 0
272
272
 
@@ -70,6 +70,8 @@ class Lingo
70
70
 
71
71
  include Language
72
72
 
73
+ TERMINALS = [:FILE, :RECORD, :EOF]
74
+
73
75
  DEFAULT_SKIP = [TA_PUNCTUATION, TA_OTHER].join(',')
74
76
 
75
77
  def initialize(config, lingo)
@@ -124,7 +126,15 @@ class Lingo
124
126
  @config.fetch(key, default)
125
127
  end
126
128
 
127
- def get_array(key, default = nil, method = nil)
129
+ def get_int(*args)
130
+ Integer(get_key(*args))
131
+ end
132
+
133
+ def get_flo(*args)
134
+ ((val = get_key(*args)) && val.respond_to?(:to_f)) ? val.to_f : val
135
+ end
136
+
137
+ def get_ary(key, default = nil, method = nil)
128
138
  ary = get_key(key, default).split(SEP_RE)
129
139
  ary.map!(&method) if method
130
140
  ary
@@ -136,6 +146,12 @@ class Lingo
136
146
  end
137
147
  end
138
148
 
149
+ def get_enc(key = 'encoding', default = ENC)
150
+ Encoding.find(get_key(key, default))
151
+ rescue ArgumentError => err
152
+ raise ConfigLoadError.new(err)
153
+ end
154
+
139
155
  def dictionary(src, mod)
140
156
  Language::Dictionary.new({ 'source' => src, 'mode' => mod }, lingo)
141
157
  end
@@ -145,11 +161,11 @@ class Lingo
145
161
  end
146
162
 
147
163
  def set_dic
148
- @dic = dictionary(get_array('source'), get_key('mode', 'all'))
164
+ @dic = dictionary(get_ary('source'), get_key('mode', 'all'))
149
165
  end
150
166
 
151
167
  def set_gra
152
- @gra = grammar(get_array('source'), get_key('mode', 'all'))
168
+ @gra = grammar(get_ary('source'), get_key('mode', 'all'))
153
169
  end
154
170
 
155
171
  def warn(*msg)
@@ -166,23 +182,27 @@ class Lingo
166
182
 
167
183
  end
168
184
 
185
+ require_relative 'text_utils'
186
+
169
187
  require_relative 'buffered_attendee'
170
188
  require_relative 'deferred_attendee'
171
189
 
172
190
  require_relative 'attendee/abbreviator'
191
+ require_relative 'attendee/analysis_filter'
173
192
  require_relative 'attendee/debugger'
193
+ require_relative 'attendee/debug_filter' # < Debugger
174
194
  require_relative 'attendee/decomposer'
175
- require_relative 'attendee/dehyphenizer'
195
+ require_relative 'attendee/hal_filter'
196
+ require_relative 'attendee/lsi_filter'
176
197
  require_relative 'attendee/multi_worder'
177
- require_relative 'attendee/noneword_filter'
178
198
  require_relative 'attendee/object_filter'
179
- require_relative 'attendee/variator'
180
199
  require_relative 'attendee/sequencer'
181
200
  require_relative 'attendee/stemmer'
182
201
  require_relative 'attendee/synonymer'
183
202
  require_relative 'attendee/text_reader'
184
203
  require_relative 'attendee/text_writer'
185
- require_relative 'attendee/formatter'
204
+ require_relative 'attendee/formatter' # < TextWriter
186
205
  require_relative 'attendee/tokenizer'
206
+ require_relative 'attendee/variator'
187
207
  require_relative 'attendee/vector_filter'
188
208
  require_relative 'attendee/word_searcher'
@@ -0,0 +1,81 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'csv'
28
+
29
+ class Lingo
30
+
31
+ class Attendee
32
+
33
+ class AnalysisFilter < self
34
+
35
+ FIELDS = {
36
+ string: :form,
37
+ token: :attr,
38
+ position: :position,
39
+ offset: :offset,
40
+ word: :attr,
41
+ pattern: :pattern
42
+ }
43
+
44
+ def init
45
+ @csv, @header = CSV.new('', row_sep: ''), FIELDS.keys
46
+ end
47
+
48
+ def control(cmd, *)
49
+ :skip_command if cmd == :EOL
50
+ end
51
+
52
+ def process(obj, *)
53
+ forward_row(@header.tap { @header = nil }) if @header
54
+
55
+ obj.is_a?(Token) ?
56
+ forward_obj(obj, obj, obj, obj) : begin
57
+ tok = obj.token
58
+ forward_obj(obj, nil, tok, tok, obj, obj)
59
+ obj.lexicals.each { |lex|
60
+ forward_obj(lex, nil, tok, tok, lex, obj) }
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ def forward_obj(*args)
67
+ forward_row(FIELDS.map.with_index { |(_, method), index|
68
+ arg = args[index] and arg.send(method) })
69
+ end
70
+
71
+ def forward_row(row)
72
+ forward(@csv.add_row(row).string.dup)
73
+ @csv.string.clear
74
+ @csv.rewind
75
+ end
76
+
77
+ end
78
+
79
+ end
80
+
81
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ class Attendee
30
+
31
+ class DebugFilter < Debugger
32
+
33
+ def init
34
+ @filter = true
35
+ super('')
36
+ end
37
+
38
+ end
39
+
40
+ end
41
+
42
+ end
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -112,7 +112,7 @@ class Lingo
112
112
  end
113
113
  end
114
114
 
115
- def process(obj)
115
+ def process(obj, *)
116
116
  debug(eval(@obj_eval)) { obj.inspect }
117
117
  forward(obj) unless @filter
118
118
  end
@@ -130,15 +130,6 @@ class Lingo
130
130
 
131
131
  end
132
132
 
133
- class DebugFilter < Debugger
134
-
135
- def init
136
- @filter = true
137
- super('')
138
- end
139
-
140
- end
141
-
142
133
  end
143
134
 
144
135
  end
@@ -40,7 +40,8 @@ class Lingo
40
40
  #
41
41
  # === Mögliche Verlinkung
42
42
  # Erwartet:: Daten vom Typ *Word* (andere werden einfach durchgereicht) z.B. von Wordsearcher
43
- # Erzeugt:: Daten vom Typ *Word* (erkannte Komposita werden entsprechend erweitert) z.B. für Synonymer, Ocr_variator, Multiworder, Sequencer, Noneword_filter, Vector_filter
43
+ # Erzeugt:: Daten vom Typ *Word* (erkannte Komposita werden entsprechend erweitert) z.B. für
44
+ # Synonymer, Ocr_variator, Multiworder, Sequencer, Vector_filter
44
45
  #
45
46
  # === Parameter
46
47
  # Kursiv dargestellte Parameter sind optional (ggf. mit Angabe der Voreinstellung).
@@ -65,9 +66,9 @@ class Lingo
65
66
  # out> <Lingo|?>
66
67
  # out> :,/PUNC:
67
68
  # out> <ein = [(ein/w)]>
68
- # out> <Indexierungssystem|KOM = [(indexierungssystem/k), (indexierung/s), (system/s)]>
69
+ # out> <Indexierungssystem|COM = [(indexierungssystem/k), (indexierung/s), (system/s)]>
69
70
  # out> <mit = [(mit/w)]>
70
- # out> <Kompositumerkennung|KOM = [(kompositumerkennung/k), (erkennung/s), (kompositum/s)]>
71
+ # out> <Kompositumerkennung|COM = [(kompositumerkennung/k), (erkennung/s), (kompositum/s)]>
71
72
  # out> :./PUNC:
72
73
  # out> *EOL('test.txt')
73
74
  # out> *EOF('test.txt')
@@ -85,6 +86,8 @@ class Lingo
85
86
  def process(obj)
86
87
  if obj.is_a?(Word) && obj.unknown?
87
88
  com = @gra.find_compound(obj.form)
89
+ com.token = obj.token
90
+
88
91
  obj = com unless com.unknown?
89
92
  end
90
93
 
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -35,7 +35,7 @@ class Lingo
35
35
 
36
36
  @ext = get_key('ext', '-')
37
37
  @format = get_key('format', '%s')
38
- @map = get_key('map', Hash.nest { |k| k })
38
+ @map = get_key('map', Hash.identity)
39
39
 
40
40
  @no_puts = true
41
41
  end
@@ -44,10 +44,10 @@ class Lingo
44
44
  if obj.is_a?(WordForm)
45
45
  str = obj.form
46
46
 
47
- if obj.respond_to?(:lexicals)
48
- lex = obj.lexicals.first # TODO
49
- att = @map[lex.attr] if lex
50
- str = @format % [str, lex.form, att] if att
47
+ if obj.is_a?(Word)
48
+ # TODO: which lexical to select? (currently: first)
49
+ obj.each_lex { |lex|
50
+ att = @map[lex.attr] and str = @format % [str, lex.form, att] }
51
51
  end
52
52
  else
53
53
  str = obj.to_s
@@ -0,0 +1,94 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ class Attendee
30
+
31
+ class HalFilter < self
32
+
33
+ def init
34
+ require_lib('hal4r')
35
+
36
+ @lex = get_re('lexicals', '[sy]')
37
+ @skip = get_ary('skip', DEFAULT_SKIP, :upcase)
38
+
39
+ @norm = get_key('norm', true)
40
+ @sep = get_key('sep', '^')
41
+ @min = get_flo('min', false)
42
+ @dim = get_int('dim', 2)
43
+
44
+ @sort = get_key('sort', false)
45
+ @sort.downcase! if @sort.respond_to?(:downcase!)
46
+
47
+ @hal = Hal4R.new([], get_int('window-size', Hal4R::DEFAULT_WINDOW_SIZE))
48
+ end
49
+
50
+ def control(cmd, *)
51
+ case cmd
52
+ when :EOL then :skip_command
53
+ when *TERMINALS then send_vectors unless @hal.empty?
54
+ end
55
+ end
56
+
57
+ def process(obj)
58
+ obj.is_a?(Word) && !@skip.include?(obj.attr) &&
59
+ # TODO: which lexical to select? (currently: first)
60
+ obj.lex_form(@lex) { |form| @hal << Unicode.downcase(form) }
61
+ end
62
+
63
+ private
64
+
65
+ def send_vectors
66
+ vec = []
67
+
68
+ fmt = @sort ? @sort == 'sto' ?
69
+ '%s {%.5f}' : '%2$.5f %1$s' : '%s %.5f' unless @sort == 'normal'
70
+
71
+ unless @sort
72
+ each_vector { |v| forward(fmt % v) }
73
+ else
74
+ each_vector { |v| vec << v }
75
+
76
+ !fmt ? vec.sort!.each { |v, _| forward(v) } :
77
+ vec.sort_by { |v, w| [-w, v] }.each { |v| forward(fmt % v) }
78
+
79
+ vec.clear
80
+ end
81
+
82
+ @hal.reset
83
+ end
84
+
85
+ def each_vector
86
+ @hal.each_distance(@norm, @dim) { |*t, v| v = 1 / v
87
+ yield [t.join(@sep), v] unless v.nan? || (@min && v < @min) }
88
+ end
89
+
90
+ end
91
+
92
+ end
93
+
94
+ end