lingo 1.8.6 → 1.8.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +40 -4
  3. data/README +22 -51
  4. data/Rakefile +3 -17
  5. data/config/lingo.cfg +24 -15
  6. data/config/lir.cfg +25 -16
  7. data/dict/de/test_muh.txt +6 -0
  8. data/dict/en/lingo-dic.txt +2 -3
  9. data/lang/de.lang +10 -9
  10. data/lang/en.lang +1 -1
  11. data/lib/lingo.rb +4 -4
  12. data/lib/lingo/attendee.rb +27 -7
  13. data/lib/lingo/attendee/analysis_filter.rb +81 -0
  14. data/lib/lingo/attendee/debug_filter.rb +42 -0
  15. data/lib/lingo/attendee/debugger.rb +2 -11
  16. data/lib/lingo/attendee/decomposer.rb +6 -3
  17. data/lib/lingo/attendee/formatter.rb +6 -6
  18. data/lib/lingo/attendee/hal_filter.rb +94 -0
  19. data/lib/lingo/attendee/lsi_filter.rb +99 -0
  20. data/lib/lingo/attendee/multi_worder.rb +69 -43
  21. data/lib/lingo/attendee/sequencer.rb +32 -19
  22. data/lib/lingo/attendee/synonymer.rb +2 -2
  23. data/lib/lingo/attendee/text_reader.rb +63 -92
  24. data/lib/lingo/attendee/text_writer.rb +12 -21
  25. data/lib/lingo/attendee/tokenizer.rb +32 -21
  26. data/lib/lingo/attendee/variator.rb +3 -3
  27. data/lib/lingo/attendee/vector_filter.rb +7 -9
  28. data/lib/lingo/attendee/word_searcher.rb +3 -3
  29. data/lib/lingo/buffered_attendee.rb +3 -36
  30. data/lib/lingo/config.rb +1 -1
  31. data/lib/lingo/ctl.rb +7 -155
  32. data/lib/lingo/ctl/analysis.rb +136 -0
  33. data/lib/lingo/ctl/files.rb +86 -0
  34. data/lib/lingo/ctl/other.rb +140 -0
  35. data/lib/lingo/database.rb +64 -60
  36. data/lib/lingo/database/crypter.rb +7 -5
  37. data/lib/lingo/error.rb +5 -4
  38. data/lib/lingo/language.rb +13 -5
  39. data/lib/lingo/language/grammar.rb +13 -7
  40. data/lib/lingo/language/token.rb +6 -0
  41. data/lib/lingo/language/word.rb +23 -36
  42. data/lib/lingo/language/word_form.rb +5 -1
  43. data/lib/lingo/srv.rb +2 -2
  44. data/lib/lingo/text_utils.rb +96 -0
  45. data/lib/lingo/version.rb +1 -1
  46. data/lib/lingo/web/views/index.erb +1 -1
  47. data/test/attendee/ts_decomposer.rb +23 -5
  48. data/test/attendee/ts_multi_worder.rb +66 -0
  49. data/test/attendee/ts_sequencer.rb +28 -4
  50. data/test/attendee/ts_text_reader.rb +20 -0
  51. data/test/attendee/ts_tokenizer.rb +20 -0
  52. data/test/attendee/ts_variator.rb +1 -1
  53. data/test/attendee/ts_word_searcher.rb +39 -3
  54. data/test/lir3.txt +12 -0
  55. data/test/ref/artikel.non +1 -12
  56. data/test/ref/artikel.seq +3 -1
  57. data/test/ref/artikel.vec +1 -0
  58. data/test/ref/artikel.vef +35 -34
  59. data/test/ref/artikel.ven +8 -7
  60. data/test/ref/artikel.ver +34 -33
  61. data/test/ref/artikel.vet +2573 -2563
  62. data/test/ref/lir.non +77 -78
  63. data/test/ref/lir.seq +9 -7
  64. data/test/ref/lir.syn +1 -1
  65. data/test/ref/lir.vec +41 -41
  66. data/test/ref/lir.vef +210 -210
  67. data/test/ref/lir.ven +46 -46
  68. data/test/ref/lir.ver +72 -72
  69. data/test/ref/lir.vet +329 -329
  70. data/test/ts_database.rb +166 -62
  71. data/test/ts_language.rb +23 -23
  72. metadata +53 -34
  73. data/lib/lingo/attendee/dehyphenizer.rb +0 -120
  74. data/lib/lingo/attendee/noneword_filter.rb +0 -115
  75. data/test/attendee/ts_noneword_filter.rb +0 -15
@@ -0,0 +1,6 @@
1
+ albert einstein
2
+ john f kennedy
3
+ a priori
4
+ ableitung nicht ganzzahliger ordnung
5
+ academic learning time in physical education
6
+ juristische person
@@ -19913,7 +19913,7 @@ fodder=fodder #s|v
19913
19913
  foe=foe #s
19914
19914
  foederatus=foederatus #s
19915
19915
  foetal=foetal #a
19916
- fœtid=fœtid #a
19916
+ foetid=foetid #a
19917
19917
  foetidness=foetidness #s
19918
19918
  foetus=foetus #s
19919
19919
  fog=fog #s|v
@@ -53175,7 +53175,6 @@ vedette=vedette #s
53175
53175
  veejay=veejay #s
53176
53176
  veel=veel #v
53177
53177
  veer=veer #s|v
53178
- veg*n=veg*n #s|a
53179
53178
  vega=vega #s
53180
53179
  vegan=vegan #s|a
53181
53180
  veganism=veganism #s
@@ -55392,7 +55391,7 @@ zony=zony #s
55392
55391
  zoo=zoo #s
55393
55392
  zooarchaeology=zooarchaeology #s
55394
55393
  zoobie=zoobie #s
55395
- zoœcium=zoœcium #s
55394
+ zooecium=zooecium #s
55396
55395
  zoogeography=zoogeography #s
55397
55396
  zoolater=zoolater #s
55398
55397
  zoological=zoological #a
@@ -56,12 +56,13 @@ language:
56
56
  usr-dic: { name: de/user-dic.txt, txt-format: WordClass, separator: '=' }
57
57
 
58
58
  # Test dictionaries
59
- tst-dic: { name: de/test_dic.txt, txt-format: WordClass } # TEST: Lesen von zwei Quellen
60
- tst-syn: { name: de/test_syn.txt, txt-format: MultiValue, def-wc: y } # TEST: Mehrere Datenquellen
61
- tst-mul: { name: de/test_mul.txt, use-lex: sys-dic, def-wc: m } # TEST: Mehrere Multiwörterbücher
62
- tst-mu2: { name: de/test_mu2.txt, use-lex: sys-dic, def-wc: m } # TEST: Mehrere Multiwörterbücher
63
- tst-sgw: { name: de/test_sgw.txt, txt-format: SingleWord } # TEST: SingleWord-Format
64
- tst-gen: { name: de/test_gen.txt, txt-format: WordClass } # TEST: Genus
59
+ tst-dic: { name: de/test_dic.txt, txt-format: WordClass }
60
+ tst-syn: { name: de/test_syn.txt, txt-format: MultiValue, def-wc: y }
61
+ tst-mul: { name: de/test_mul.txt, use-lex: sys-dic, def-wc: m }
62
+ tst-mu2: { name: de/test_mu2.txt, use-lex: sys-dic, def-wc: m }
63
+ tst-muh: { name: de/test_muh.txt, txt-format: SingleWord, use-lex: sys-dic, def-wc: m, hyphenate: true }
64
+ tst-sgw: { name: de/test_sgw.txt, txt-format: SingleWord }
65
+ tst-gen: { name: de/test_gen.txt, txt-format: WordClass }
65
66
 
66
67
  compound:
67
68
  min-word-size: '7'
@@ -118,8 +119,8 @@ language:
118
119
  # SPAC = \s+
119
120
  # NUMS = [+-]?(?:\d{4,}|\d{1,3}(?:\.\d{3,3})*)(?:\.|(?:,\d+)?%?)
120
121
  # URLS = (?:www\.|mailto:|(?:news|https?|ftps?)://|\S+?[._]\S+?@\S+?\.)\S+
121
- # ABRV = (?:(?:(?:#{CHAR})+\.)+)(?:#{CHAR})+
122
- # WORD = (?:#{CHAR}|#{DIGIT}|-)+
122
+ # ABRV = (?:(?:(?:CHAR)+\.)+)(?:CHAR)+
123
+ # WORD = ALNUM(?:-*ALNUM)*
123
124
  # PUNC = [!,.:;?¡¿]
124
- # OTHR = ["$#%&'()*+\-/<=>@\[\\\]^_{|}~¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾×÷]
125
+ # OTHR = [-"$#%&'()*+\-/<=>@\[\\\]^_{|}~¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾×÷]
125
126
  # HELP = \S*
@@ -69,7 +69,7 @@ language:
69
69
  # Suffixklasse: s = Substantiv, a = Adjektiv, v = Verb, e = Eigenwort, f = Fugung
70
70
  # Suffixe je Klasse: "<suffix>['/'<ersetzung>][ <suffix>['/'<ersetzung>]]"
71
71
  - [s, 'es s ves/f ves/fe ies/y']
72
- - [a, 'er est r st ier/y iest/y']
72
+ - [a, 'er est r st ier/y iest/y ly al ally']
73
73
  - [v, 'd ed en es ing s ing/e']
74
74
  - [e, 's']
75
75
  - [f, '']
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -61,7 +61,8 @@ class Lingo
61
61
  }
62
62
 
63
63
  # Default encoding
64
- Encoding.default_external = Encoding.default_internal = ENC = 'UTF-8'.freeze
64
+ Encoding.default_external = ENC = 'UTF-8'.freeze
65
+ Encoding.default_internal = ENC unless RUBY_ENGINE == 'jruby'
65
66
 
66
67
  SEP_RE = %r{[; ,|]}
67
68
 
@@ -265,8 +266,7 @@ class Lingo
265
266
  end
266
267
 
267
268
  def invite(list = config['meeting/attendees'])
268
- supplier = Hash.nest { [] }
269
- subscriber = Hash.nest { [] }
269
+ supplier, subscriber = Hash.array, Hash.array
270
270
 
271
271
  last_link, auto_link = '', 0
272
272
 
@@ -70,6 +70,8 @@ class Lingo
70
70
 
71
71
  include Language
72
72
 
73
+ TERMINALS = [:FILE, :RECORD, :EOF]
74
+
73
75
  DEFAULT_SKIP = [TA_PUNCTUATION, TA_OTHER].join(',')
74
76
 
75
77
  def initialize(config, lingo)
@@ -124,7 +126,15 @@ class Lingo
124
126
  @config.fetch(key, default)
125
127
  end
126
128
 
127
- def get_array(key, default = nil, method = nil)
129
+ def get_int(*args)
130
+ Integer(get_key(*args))
131
+ end
132
+
133
+ def get_flo(*args)
134
+ ((val = get_key(*args)) && val.respond_to?(:to_f)) ? val.to_f : val
135
+ end
136
+
137
+ def get_ary(key, default = nil, method = nil)
128
138
  ary = get_key(key, default).split(SEP_RE)
129
139
  ary.map!(&method) if method
130
140
  ary
@@ -136,6 +146,12 @@ class Lingo
136
146
  end
137
147
  end
138
148
 
149
+ def get_enc(key = 'encoding', default = ENC)
150
+ Encoding.find(get_key(key, default))
151
+ rescue ArgumentError => err
152
+ raise ConfigLoadError.new(err)
153
+ end
154
+
139
155
  def dictionary(src, mod)
140
156
  Language::Dictionary.new({ 'source' => src, 'mode' => mod }, lingo)
141
157
  end
@@ -145,11 +161,11 @@ class Lingo
145
161
  end
146
162
 
147
163
  def set_dic
148
- @dic = dictionary(get_array('source'), get_key('mode', 'all'))
164
+ @dic = dictionary(get_ary('source'), get_key('mode', 'all'))
149
165
  end
150
166
 
151
167
  def set_gra
152
- @gra = grammar(get_array('source'), get_key('mode', 'all'))
168
+ @gra = grammar(get_ary('source'), get_key('mode', 'all'))
153
169
  end
154
170
 
155
171
  def warn(*msg)
@@ -166,23 +182,27 @@ class Lingo
166
182
 
167
183
  end
168
184
 
185
+ require_relative 'text_utils'
186
+
169
187
  require_relative 'buffered_attendee'
170
188
  require_relative 'deferred_attendee'
171
189
 
172
190
  require_relative 'attendee/abbreviator'
191
+ require_relative 'attendee/analysis_filter'
173
192
  require_relative 'attendee/debugger'
193
+ require_relative 'attendee/debug_filter' # < Debugger
174
194
  require_relative 'attendee/decomposer'
175
- require_relative 'attendee/dehyphenizer'
195
+ require_relative 'attendee/hal_filter'
196
+ require_relative 'attendee/lsi_filter'
176
197
  require_relative 'attendee/multi_worder'
177
- require_relative 'attendee/noneword_filter'
178
198
  require_relative 'attendee/object_filter'
179
- require_relative 'attendee/variator'
180
199
  require_relative 'attendee/sequencer'
181
200
  require_relative 'attendee/stemmer'
182
201
  require_relative 'attendee/synonymer'
183
202
  require_relative 'attendee/text_reader'
184
203
  require_relative 'attendee/text_writer'
185
- require_relative 'attendee/formatter'
204
+ require_relative 'attendee/formatter' # < TextWriter
186
205
  require_relative 'attendee/tokenizer'
206
+ require_relative 'attendee/variator'
187
207
  require_relative 'attendee/vector_filter'
188
208
  require_relative 'attendee/word_searcher'
@@ -0,0 +1,81 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'csv'
28
+
29
+ class Lingo
30
+
31
+ class Attendee
32
+
33
+ class AnalysisFilter < self
34
+
35
+ FIELDS = {
36
+ string: :form,
37
+ token: :attr,
38
+ position: :position,
39
+ offset: :offset,
40
+ word: :attr,
41
+ pattern: :pattern
42
+ }
43
+
44
+ def init
45
+ @csv, @header = CSV.new('', row_sep: ''), FIELDS.keys
46
+ end
47
+
48
+ def control(cmd, *)
49
+ :skip_command if cmd == :EOL
50
+ end
51
+
52
+ def process(obj, *)
53
+ forward_row(@header.tap { @header = nil }) if @header
54
+
55
+ obj.is_a?(Token) ?
56
+ forward_obj(obj, obj, obj, obj) : begin
57
+ tok = obj.token
58
+ forward_obj(obj, nil, tok, tok, obj, obj)
59
+ obj.lexicals.each { |lex|
60
+ forward_obj(lex, nil, tok, tok, lex, obj) }
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ def forward_obj(*args)
67
+ forward_row(FIELDS.map.with_index { |(_, method), index|
68
+ arg = args[index] and arg.send(method) })
69
+ end
70
+
71
+ def forward_row(row)
72
+ forward(@csv.add_row(row).string.dup)
73
+ @csv.string.clear
74
+ @csv.rewind
75
+ end
76
+
77
+ end
78
+
79
+ end
80
+
81
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ class Attendee
30
+
31
+ class DebugFilter < Debugger
32
+
33
+ def init
34
+ @filter = true
35
+ super('')
36
+ end
37
+
38
+ end
39
+
40
+ end
41
+
42
+ end
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2014 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -112,7 +112,7 @@ class Lingo
112
112
  end
113
113
  end
114
114
 
115
- def process(obj)
115
+ def process(obj, *)
116
116
  debug(eval(@obj_eval)) { obj.inspect }
117
117
  forward(obj) unless @filter
118
118
  end
@@ -130,15 +130,6 @@ class Lingo
130
130
 
131
131
  end
132
132
 
133
- class DebugFilter < Debugger
134
-
135
- def init
136
- @filter = true
137
- super('')
138
- end
139
-
140
- end
141
-
142
133
  end
143
134
 
144
135
  end
@@ -40,7 +40,8 @@ class Lingo
40
40
  #
41
41
  # === Mögliche Verlinkung
42
42
  # Erwartet:: Daten vom Typ *Word* (andere werden einfach durchgereicht) z.B. von Wordsearcher
43
- # Erzeugt:: Daten vom Typ *Word* (erkannte Komposita werden entsprechend erweitert) z.B. für Synonymer, Ocr_variator, Multiworder, Sequencer, Noneword_filter, Vector_filter
43
+ # Erzeugt:: Daten vom Typ *Word* (erkannte Komposita werden entsprechend erweitert) z.B. für
44
+ # Synonymer, Ocr_variator, Multiworder, Sequencer, Vector_filter
44
45
  #
45
46
  # === Parameter
46
47
  # Kursiv dargestellte Parameter sind optional (ggf. mit Angabe der Voreinstellung).
@@ -65,9 +66,9 @@ class Lingo
65
66
  # out> <Lingo|?>
66
67
  # out> :,/PUNC:
67
68
  # out> <ein = [(ein/w)]>
68
- # out> <Indexierungssystem|KOM = [(indexierungssystem/k), (indexierung/s), (system/s)]>
69
+ # out> <Indexierungssystem|COM = [(indexierungssystem/k), (indexierung/s), (system/s)]>
69
70
  # out> <mit = [(mit/w)]>
70
- # out> <Kompositumerkennung|KOM = [(kompositumerkennung/k), (erkennung/s), (kompositum/s)]>
71
+ # out> <Kompositumerkennung|COM = [(kompositumerkennung/k), (erkennung/s), (kompositum/s)]>
71
72
  # out> :./PUNC:
72
73
  # out> *EOL('test.txt')
73
74
  # out> *EOF('test.txt')
@@ -85,6 +86,8 @@ class Lingo
85
86
  def process(obj)
86
87
  if obj.is_a?(Word) && obj.unknown?
87
88
  com = @gra.find_compound(obj.form)
89
+ com.token = obj.token
90
+
88
91
  obj = com unless com.unknown?
89
92
  end
90
93
 
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2012 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -35,7 +35,7 @@ class Lingo
35
35
 
36
36
  @ext = get_key('ext', '-')
37
37
  @format = get_key('format', '%s')
38
- @map = get_key('map', Hash.nest { |k| k })
38
+ @map = get_key('map', Hash.identity)
39
39
 
40
40
  @no_puts = true
41
41
  end
@@ -44,10 +44,10 @@ class Lingo
44
44
  if obj.is_a?(WordForm)
45
45
  str = obj.form
46
46
 
47
- if obj.respond_to?(:lexicals)
48
- lex = obj.lexicals.first # TODO
49
- att = @map[lex.attr] if lex
50
- str = @format % [str, lex.form, att] if att
47
+ if obj.is_a?(Word)
48
+ # TODO: which lexical to select? (currently: first)
49
+ obj.each_lex { |lex|
50
+ att = @map[lex.attr] and str = @format % [str, lex.form, att] }
51
51
  end
52
52
  else
53
53
  str = obj.to_s
@@ -0,0 +1,94 @@
1
+ # encoding: utf-8
2
+
3
+ #--
4
+ ###############################################################################
5
+ # #
6
+ # Lingo -- A full-featured automatic indexing system #
7
+ # #
8
+ # Copyright (C) 2005-2007 John Vorhauer #
9
+ # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
10
+ # #
11
+ # Lingo is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # Lingo is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with Lingo. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Lingo
28
+
29
+ class Attendee
30
+
31
+ class HalFilter < self
32
+
33
+ def init
34
+ require_lib('hal4r')
35
+
36
+ @lex = get_re('lexicals', '[sy]')
37
+ @skip = get_ary('skip', DEFAULT_SKIP, :upcase)
38
+
39
+ @norm = get_key('norm', true)
40
+ @sep = get_key('sep', '^')
41
+ @min = get_flo('min', false)
42
+ @dim = get_int('dim', 2)
43
+
44
+ @sort = get_key('sort', false)
45
+ @sort.downcase! if @sort.respond_to?(:downcase!)
46
+
47
+ @hal = Hal4R.new([], get_int('window-size', Hal4R::DEFAULT_WINDOW_SIZE))
48
+ end
49
+
50
+ def control(cmd, *)
51
+ case cmd
52
+ when :EOL then :skip_command
53
+ when *TERMINALS then send_vectors unless @hal.empty?
54
+ end
55
+ end
56
+
57
+ def process(obj)
58
+ obj.is_a?(Word) && !@skip.include?(obj.attr) &&
59
+ # TODO: which lexical to select? (currently: first)
60
+ obj.lex_form(@lex) { |form| @hal << Unicode.downcase(form) }
61
+ end
62
+
63
+ private
64
+
65
+ def send_vectors
66
+ vec = []
67
+
68
+ fmt = @sort ? @sort == 'sto' ?
69
+ '%s {%.5f}' : '%2$.5f %1$s' : '%s %.5f' unless @sort == 'normal'
70
+
71
+ unless @sort
72
+ each_vector { |v| forward(fmt % v) }
73
+ else
74
+ each_vector { |v| vec << v }
75
+
76
+ !fmt ? vec.sort!.each { |v, _| forward(v) } :
77
+ vec.sort_by { |v, w| [-w, v] }.each { |v| forward(fmt % v) }
78
+
79
+ vec.clear
80
+ end
81
+
82
+ @hal.reset
83
+ end
84
+
85
+ def each_vector
86
+ @hal.each_distance(@norm, @dim) { |*t, v| v = 1 / v
87
+ yield [t.join(@sep), v] unless v.nan? || (@min && v < @min) }
88
+ end
89
+
90
+ end
91
+
92
+ end
93
+
94
+ end