lingo 1.9.0.pre1 → 1.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +18 -7
  3. data/README +6 -8
  4. data/Rakefile +5 -5
  5. data/dict/en/lingo-dic.txt +52625 -15693
  6. data/lang/en.lang +2 -2
  7. data/lib/lingo.rb +15 -3
  8. data/lib/lingo/array_utils.rb +39 -0
  9. data/lib/lingo/attendee.rb +1 -3
  10. data/lib/lingo/attendee/multi_worder.rb +4 -2
  11. data/lib/lingo/attendee/sequencer.rb +122 -73
  12. data/lib/lingo/attendee/text_writer.rb +4 -6
  13. data/lib/lingo/attendee/vector_filter.rb +5 -5
  14. data/lib/lingo/cli.rb +20 -2
  15. data/lib/lingo/config.rb +4 -3
  16. data/lib/lingo/ctl.rb +2 -20
  17. data/lib/lingo/ctl/analysis.rb +3 -5
  18. data/lib/lingo/ctl/files.rb +3 -3
  19. data/lib/lingo/database.rb +26 -25
  20. data/lib/lingo/database/crypter.rb +10 -6
  21. data/lib/lingo/database/source.rb +72 -25
  22. data/lib/lingo/database/source/key_value.rb +12 -8
  23. data/lib/lingo/database/source/multi_key.rb +11 -9
  24. data/lib/lingo/database/source/multi_value.rb +10 -8
  25. data/lib/lingo/database/source/single_word.rb +10 -6
  26. data/lib/lingo/database/source/word_class.rb +43 -14
  27. data/lib/lingo/debug.rb +2 -2
  28. data/lib/lingo/error.rb +21 -5
  29. data/lib/lingo/filter.rb +1 -1
  30. data/lib/lingo/language.rb +21 -21
  31. data/lib/lingo/language/grammar.rb +4 -2
  32. data/lib/lingo/language/lexical_hash.rb +2 -14
  33. data/lib/lingo/language/word.rb +1 -5
  34. data/lib/lingo/text_utils.rb +113 -20
  35. data/lib/lingo/version.rb +1 -1
  36. data/test/attendee/ts_sequencer.rb +286 -32
  37. data/test/attendee/ts_text_reader.rb +4 -4
  38. data/test/attendee/ts_text_writer.rb +19 -5
  39. data/test/test_helper.rb +2 -0
  40. data/test/ts_database.rb +213 -14
  41. metadata +36 -24
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -25,11 +25,14 @@
25
25
  #++
26
26
 
27
27
  require 'nuggets/file/ext'
28
+ require 'nuggets/string/format'
28
29
 
29
30
  class Lingo
30
31
 
31
32
  module TextUtils
32
33
 
34
+ DEFAULT_MODE = 'rb'.freeze
35
+
33
36
  STDIN_EXT = %w[STDIN -].freeze
34
37
 
35
38
  STDOUT_EXT = %w[STDOUT -].freeze
@@ -46,51 +49,141 @@ class Lingo
46
49
  STDOUT_EXT.include?(path)
47
50
  end
48
51
 
49
- def open_stdin
50
- stdin = set_encoding(lingo.config.stdin)
51
- @progress ? StringIO.new(stdin.read) : stdin
52
+ def overwrite?(path, unlink = false)
53
+ !File.exist?(path) || if agree?("#{path} already exists. Overwrite?")
54
+ File.unlink(path) if unlink
55
+ true
56
+ end
57
+ end
58
+
59
+ def agree?(msg)
60
+ print "#{msg} (y/n) [n]: "
61
+
62
+ case stdin.gets.chomp
63
+ when /\Ano?\z/i, '' then nil
64
+ when /\Ay(?:es)?\z/i then true
65
+ else puts 'Please enter "yes" or "no".'; agree?(msg)
66
+ end
67
+ rescue Interrupt
68
+ abort ''
69
+ end
70
+
71
+ def stdin
72
+ respond_to?(:lingo, true) ? lingo.config.stdin : $stdin
73
+ end
74
+
75
+ def stdout
76
+ respond_to?(:lingo, true) ? lingo.config.stdout : $stdout
77
+ end
78
+
79
+ def open(path, mode = nil, encoding = nil, &block)
80
+ mode ||= DEFAULT_MODE
81
+
82
+ _yield_obj(case mode
83
+ when /r/ then stdin?(path) ? open_stdin(encoding) : File.exist?(path) ?
84
+ open_path(path, mode, encoding) : raise(FileNotFoundError.new(path))
85
+ when /w/ then stdout?(path) ? open_stdout(encoding) : overwrite?(path) ?
86
+ open_path(path, mode, encoding) : raise(FileExistsError.new(path))
87
+ end, &block)
88
+ end
89
+
90
+ def open_csv(path, mode = nil, options = {}, encoding = nil, &block)
91
+ _require_lib('csv')
92
+
93
+ open(path, mode, encoding) { |io|
94
+ _yield_obj(CSV.new(io, options), &block) }
95
+ end
96
+
97
+ def open_stdin(encoding = nil)
98
+ io = set_encoding(stdin, encoding)
99
+ @progress ? StringIO.new(io.read) : io
52
100
  end
53
101
 
54
- def open_stdout
55
- set_encoding(lingo.config.stdout)
102
+ def open_stdout(encoding = nil)
103
+ set_encoding(stdout, encoding)
56
104
  end
57
105
 
58
- def open_path(path, mode = 'rb')
59
- path =~ GZIP_RE ? open_gzip(path, mode) : open_file(path, mode)
106
+ def open_path(path, mode = nil, encoding = nil)
107
+ mode ||= DEFAULT_MODE
108
+
109
+ path =~ GZIP_RE ?
110
+ open_gzip(path, mode, encoding) :
111
+ open_file(path, mode, encoding)
60
112
  end
61
113
 
62
- def open_file(path, mode)
63
- File.open(path, mode, encoding: bom_encoding(mode))
114
+ def open_file(path, mode = nil, encoding = nil)
115
+ File.open(path, mode ||= DEFAULT_MODE,
116
+ encoding: bom_encoding(mode, encoding))
64
117
  end
65
118
 
66
- def open_gzip(path, mode)
67
- require_lib('zlib')
119
+ def open_gzip(path, mode = nil, encoding = nil)
120
+ _require_lib('zlib')
68
121
 
69
- case mode
122
+ case mode ||= DEFAULT_MODE
70
123
  when 'r', 'rb'
71
124
  @progress = false
72
- Zlib::GzipReader.open(path, encoding: @encoding)
125
+ Zlib::GzipReader
73
126
  when 'w', 'wb'
74
- Zlib::GzipWriter.open(path, encoding: @encoding)
127
+ Zlib::GzipWriter
75
128
  else
76
129
  raise ArgumentError, 'invalid access mode %s' % mode
77
- end
130
+ end.open(path, encoding: get_encoding(encoding))
131
+ end
132
+
133
+ def foreach(path, encoding = nil)
134
+ open(path, nil, encoding) { |io|
135
+ io.each { |line| line.chomp!; yield line } }
136
+ end
137
+
138
+ def foreach_csv(path, options = {}, encoding = nil, &block)
139
+ open_csv(path, nil, options, encoding) { |csv| csv.each(&block) }
140
+ end
141
+
142
+ def get_path(path, ext)
143
+ set_ext(path, ext).format { |directive|
144
+ case directive
145
+ when 'd', t = 't' then Time.now.strftime(t ? '%H%M%S' : '%Y%m%d')
146
+ when 'c', l = 'l' then File.chomp_ext(File.basename(
147
+ lingo.config.send("#{l ? :lang : :config}_file")))
148
+ end
149
+ }
78
150
  end
79
151
 
80
152
  def set_ext(path, ext)
81
153
  File.set_ext(path.sub(GZIP_RE, ''), ".#{ext}")
82
154
  end
83
155
 
84
- def set_encoding(io, encoding = @encoding)
85
- io.set_encoding(encoding)
156
+ def set_encoding(io, encoding = nil)
157
+ io.set_encoding(get_encoding(encoding))
86
158
  io
87
159
  end
88
160
 
89
- def bom_encoding(mode = 'r', encoding = @encoding)
90
- (mode.include?('r') || mode.include?('+')) &&
161
+ def get_encoding(encoding = nil, iv = :@encoding)
162
+ encoding ||
163
+ (instance_variable_defined?(iv) ? instance_variable_get(iv) : nil)
164
+ end
165
+
166
+ def bom_encoding(mode = 'r', encoding = nil)
167
+ encoding = get_encoding(encoding)
168
+
169
+ encoding && (mode.include?('r') || mode.include?('+')) &&
91
170
  encoding.name.start_with?('UTF-') ? "BOM|#{encoding}" : encoding
92
171
  end
93
172
 
173
+ private
174
+
175
+ def _require_lib(lib)
176
+ respond_to?(:require_lib, true) ? require_lib(lib) : require(lib)
177
+ end
178
+
179
+ def _yield_obj(obj)
180
+ !block_given? ? obj : begin
181
+ yield obj
182
+ ensure
183
+ obj.close
184
+ end
185
+ end
186
+
94
187
  end
95
188
 
96
189
  end
@@ -10,7 +10,7 @@ class Lingo
10
10
 
11
11
  # Returns array representation.
12
12
  def to_a
13
- [MAJOR, MINOR, TINY] << 'pre1'
13
+ [MAJOR, MINOR, TINY]
14
14
  end
15
15
 
16
16
  # Short-cut for version string.
@@ -4,6 +4,244 @@ require_relative '../test_helper'
4
4
 
5
5
  class TestAttendeeSequencer < AttendeeTestCase
6
6
 
7
+ def setup
8
+ @perm = [
9
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
10
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
11
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
12
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
13
+ wd('cold|IDF', 'cold|s', 'cold|a'),
14
+ wd('hot|IDF', 'hot|a'),
15
+ wd('hot|IDF', 'hot|a'),
16
+ wd('water|IDF', 'water|s', 'water|v'),
17
+ wd('warm|IDF', 'warm|a', 'warm|v'),
18
+ wd('warm|IDF', 'warm|a', 'warm|v'),
19
+ wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
20
+ wd('weather|IDF', 'weather|s', 'weather|v'),
21
+ wd('drink|IDF', 'drink|s', 'drink|v'),
22
+ wd('winter|IDF', 'winter|s', 'winter|v'),
23
+ wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
24
+ wd('fruit|IDF', 'fruit|s', 'fruit|v'),
25
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
26
+ wd('food|IDF', 'food|s'),
27
+ wd('juice|IDF', 'juice|s', 'juice|v'),
28
+ wd('flower|IDF', 'flower|s', 'flower|v'),
29
+ wd('fresh|IDF', 'fresh|s', 'fresh|a'),
30
+ wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
31
+ wd('tree|IDF', 'tree|s', 'tree|v'),
32
+ wd('meat|IDF', 'meat|s'),
33
+ wd('salad|IDF', 'salad|s'),
34
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
35
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
36
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
37
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
38
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
39
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
40
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
41
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
42
+ wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
43
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
44
+ wd('colour|IDF', 'colour|s', 'colour|v'),
45
+ wd('grey|IDF', 'grey|s'),
46
+ wd('tobacco|IDF', 'tobacco|s'),
47
+ wd('advertising|IDF', 'advertising|e'),
48
+ wd('cigarette|IDF', 'cigarette|s'),
49
+ wd('smoke|IDF', 'smoke|s', 'smoke|v'),
50
+ wd('alcohol|IDF', 'alcohol|s'),
51
+ wd('ban|IDF', 'ban|s'),
52
+ wd('coal|IDF', 'coal|s'),
53
+ wd('cigarette|IDF', 'cigarette|s'),
54
+ wd('import|IDF', 'import|s', 'import|v'),
55
+ wd('alcohol|IDF', 'alcohol|s'),
56
+ wd('textile|IDF', 'textile|s'),
57
+ wd('whiskey|IDF', 'whiskey|s'),
58
+ wd('drink|IDF', 'drink|s', 'drink|v'),
59
+ wd('whisky|IDF', 'whisky|s'),
60
+ ai('EOF|'),
61
+ ai('EOT|')
62
+ ]
63
+
64
+ @out1 = [
65
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
66
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
67
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
68
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
69
+ wd('cold|IDF', 'cold|s', 'cold|a'),
70
+ wd('hot|IDF', 'hot|a'),
71
+ wd('hot|IDF', 'hot|a'),
72
+ wd('water|IDF', 'water|s', 'water|v'),
73
+ wd('warm|IDF', 'warm|a', 'warm|v'),
74
+ wd('warm|IDF', 'warm|a', 'warm|v'),
75
+ wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
76
+ wd('weather|IDF', 'weather|s', 'weather|v'),
77
+ wd('drink|IDF', 'drink|s', 'drink|v'),
78
+ wd('winter|IDF', 'winter|s', 'winter|v'),
79
+ wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
80
+ wd('fruit|IDF', 'fruit|s', 'fruit|v'),
81
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
82
+ wd('food|IDF', 'food|s'),
83
+ wd('juice|IDF', 'juice|s', 'juice|v'),
84
+ wd('flower|IDF', 'flower|s', 'flower|v'),
85
+ wd('fresh|IDF', 'fresh|s', 'fresh|a'),
86
+ wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
87
+ wd('tree|IDF', 'tree|s', 'tree|v'),
88
+ wd('meat|IDF', 'meat|s'),
89
+ wd('salad|IDF', 'salad|s'),
90
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
91
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
92
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
93
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
94
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
95
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
96
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
97
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
98
+ wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
99
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
100
+ wd('colour|IDF', 'colour|s', 'colour|v'),
101
+ wd('grey|IDF', 'grey|s'),
102
+ wd('tobacco|IDF', 'tobacco|s'),
103
+ wd('advertising|IDF', 'advertising|e'),
104
+ wd('cigarette|IDF', 'cigarette|s'),
105
+ wd('smoke|IDF', 'smoke|s', 'smoke|v'),
106
+ wd('alcohol|IDF', 'alcohol|s'),
107
+ wd('ban|IDF', 'ban|s'),
108
+ wd('coal|IDF', 'coal|s'),
109
+ wd('cigarette|IDF', 'cigarette|s'),
110
+ wd('import|IDF', 'import|s', 'import|v'),
111
+ wd('alcohol|IDF', 'alcohol|s'),
112
+ wd('textile|IDF', 'textile|s'),
113
+ wd('whiskey|IDF', 'whiskey|s'),
114
+ wd('drink|IDF', 'drink|s', 'drink|v'),
115
+ wd('whisky|IDF', 'whisky|s'),
116
+ wd('white yellow|SEQ', 'yellow, white|q'),
117
+ wd('yellow green|SEQ', 'green, yellow|q'),
118
+ wd('green white|SEQ', 'white, green|q'),
119
+ wd('white cold|SEQ', 'cold, white|q'),
120
+ wd('hot water|SEQ', 'water, hot|q'),
121
+ wd('warm dry|SEQ', 'dry, warm|q'),
122
+ wd('dry weather|SEQ', 'weather, dry|q'),
123
+ wd('cool fruit|SEQ', 'fruit, cool|q'),
124
+ wd('vegetable food|SEQ', 'food, vegetable|q'),
125
+ wd('fresh fish|SEQ', 'fish, fresh|q'),
126
+ wd('fish tree|SEQ', 'tree, fish|q'),
127
+ wd('vegetable green|SEQ', 'green, vegetable|q'),
128
+ wd('green red|SEQ', 'red, green|q'),
129
+ wd('red red|SEQ', 'red, red|q'),
130
+ wd('red blue|SEQ', 'blue, red|q'),
131
+ wd('blue blue|SEQ', 'blue, blue|q'),
132
+ wd('blue yellow|SEQ', 'yellow, blue|q'),
133
+ wd('yellow white|SEQ', 'white, yellow|q'),
134
+ wd('white leaves|SEQ', 'leave, white|q'),
135
+ wd('yellow colour|SEQ', 'colour, yellow|q'),
136
+ wd('white yellow green|SEQ', 'green, white yellow|q'),
137
+ wd('yellow green white|SEQ', 'white, yellow green|q'),
138
+ wd('green white cold|SEQ', 'cold, green white|q'),
139
+ wd('hot hot water|SEQ', 'water, hot hot|q'),
140
+ wd('warm warm dry|SEQ', 'dry, warm warm|q'),
141
+ wd('warm dry weather|SEQ', 'weather, warm dry|q'),
142
+ wd('fresh fish tree|SEQ', 'tree, fresh fish|q'),
143
+ wd('vegetable green red|SEQ', 'red, vegetable green|q'),
144
+ wd('green red red|SEQ', 'red, green red|q'),
145
+ wd('red red blue|SEQ', 'blue, red red|q'),
146
+ wd('red blue blue|SEQ', 'blue, red blue|q'),
147
+ wd('blue blue yellow|SEQ', 'yellow, blue blue|q'),
148
+ wd('blue yellow white|SEQ', 'white, blue yellow|q'),
149
+ wd('yellow white leaves|SEQ', 'leave, yellow white|q'),
150
+ ai('EOF|'),
151
+ ai('EOT|')
152
+ ]
153
+
154
+ @out2 = [
155
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
156
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
157
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
158
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
159
+ wd('cold|IDF', 'cold|s', 'cold|a'),
160
+ wd('hot|IDF', 'hot|a'),
161
+ wd('hot|IDF', 'hot|a'),
162
+ wd('water|IDF', 'water|s', 'water|v'),
163
+ wd('warm|IDF', 'warm|a', 'warm|v'),
164
+ wd('warm|IDF', 'warm|a', 'warm|v'),
165
+ wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
166
+ wd('weather|IDF', 'weather|s', 'weather|v'),
167
+ wd('drink|IDF', 'drink|s', 'drink|v'),
168
+ wd('winter|IDF', 'winter|s', 'winter|v'),
169
+ wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
170
+ wd('fruit|IDF', 'fruit|s', 'fruit|v'),
171
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
172
+ wd('food|IDF', 'food|s'),
173
+ wd('juice|IDF', 'juice|s', 'juice|v'),
174
+ wd('flower|IDF', 'flower|s', 'flower|v'),
175
+ wd('fresh|IDF', 'fresh|s', 'fresh|a'),
176
+ wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
177
+ wd('tree|IDF', 'tree|s', 'tree|v'),
178
+ wd('meat|IDF', 'meat|s'),
179
+ wd('salad|IDF', 'salad|s'),
180
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
181
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
182
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
183
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
184
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
185
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
186
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
187
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
188
+ wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
189
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
190
+ wd('colour|IDF', 'colour|s', 'colour|v'),
191
+ wd('grey|IDF', 'grey|s'),
192
+ wd('tobacco|IDF', 'tobacco|s'),
193
+ wd('advertising|IDF', 'advertising|e'),
194
+ wd('cigarette|IDF', 'cigarette|s'),
195
+ wd('smoke|IDF', 'smoke|s', 'smoke|v'),
196
+ wd('alcohol|IDF', 'alcohol|s'),
197
+ wd('ban|IDF', 'ban|s'),
198
+ wd('coal|IDF', 'coal|s'),
199
+ wd('cigarette|IDF', 'cigarette|s'),
200
+ wd('import|IDF', 'import|s', 'import|v'),
201
+ wd('alcohol|IDF', 'alcohol|s'),
202
+ wd('textile|IDF', 'textile|s'),
203
+ wd('whiskey|IDF', 'whiskey|s'),
204
+ wd('drink|IDF', 'drink|s', 'drink|v'),
205
+ wd('whisky|IDF', 'whisky|s'),
206
+ wd('hot water|SEQ', 'water, hot|q'),
207
+ wd('warm dry|SEQ', 'dry, warm|q'),
208
+ wd('yellow colour|SEQ', 'colour, yellow|q'),
209
+ wd('white leaves|SEQ', 'leave, white|q'),
210
+ wd('yellow white|SEQ', 'white, yellow|q'),
211
+ wd('blue yellow|SEQ', 'yellow, blue|q'),
212
+ wd('blue blue|SEQ', 'blue, blue|q'),
213
+ wd('red blue|SEQ', 'blue, red|q'),
214
+ wd('red red|SEQ', 'red, red|q'),
215
+ wd('green red|SEQ', 'red, green|q'),
216
+ wd('vegetable green|SEQ', 'green, vegetable|q'),
217
+ wd('fish tree|SEQ', 'tree, fish|q'),
218
+ wd('fresh fish|SEQ', 'fish, fresh|q'),
219
+ wd('vegetable food|SEQ', 'food, vegetable|q'),
220
+ wd('cool fruit|SEQ', 'fruit, cool|q'),
221
+ wd('dry weather|SEQ', 'weather, dry|q'),
222
+ wd('white cold|SEQ', 'cold, white|q'),
223
+ wd('green white|SEQ', 'white, green|q'),
224
+ wd('yellow green|SEQ', 'green, yellow|q'),
225
+ wd('white yellow|SEQ', 'yellow, white|q'),
226
+ wd('hot hot water|SEQ', 'water, hot hot|q'),
227
+ wd('warm warm dry|SEQ', 'dry, warm warm|q'),
228
+ wd('yellow white leaves|SEQ', 'leave, yellow white|q'),
229
+ wd('blue yellow white|SEQ', 'white, blue yellow|q'),
230
+ wd('blue blue yellow|SEQ', 'yellow, blue blue|q'),
231
+ wd('red blue blue|SEQ', 'blue, red blue|q'),
232
+ wd('red red blue|SEQ', 'blue, red red|q'),
233
+ wd('green red red|SEQ', 'red, green red|q'),
234
+ wd('vegetable green red|SEQ', 'red, vegetable green|q'),
235
+ wd('fresh fish tree|SEQ', 'tree, fresh fish|q'),
236
+ wd('warm dry weather|SEQ', 'weather, warm dry|q'),
237
+ wd('green white cold|SEQ', 'cold, green white|q'),
238
+ wd('yellow green white|SEQ', 'white, yellow green|q'),
239
+ wd('white yellow green|SEQ', 'green, white yellow|q'),
240
+ ai('EOF|'),
241
+ ai('EOT|')
242
+ ]
243
+ end
244
+
7
245
  def test_basic
8
246
  meet({}, [
9
247
  # AS
@@ -23,12 +261,12 @@ class TestAttendeeSequencer < AttendeeTestCase
23
261
  wd('helle|IDF', 'hell|a'),
24
262
  wd('Sonne|IDF', 'sonne|s'),
25
263
  tk('.|PUNC'),
26
- wd('sonne, hell|SEQ', 'sonne, hell|q'),
264
+ wd('helle Sonne|SEQ', 'sonne, hell|q'),
27
265
  # AK
28
266
  wd('Der|IDF', 'der|w'),
29
267
  wd('schöne|IDF', 'schön|a'),
30
268
  wd('Sonnenuntergang|COM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
31
- wd('sonnenuntergang, schön|SEQ', 'sonnenuntergang, schön|q'),
269
+ wd('schöne Sonnenuntergang|SEQ', 'sonnenuntergang, schön|q'),
32
270
  ai('EOF|'),
33
271
  ai('EOT|')
34
272
  ])
@@ -67,14 +305,14 @@ class TestAttendeeSequencer < AttendeeTestCase
67
305
  wd('Gottes|IDF', 'gott|s'),
68
306
  wd('Turm|IDF', 'turm|s'),
69
307
  tk('.|PUNC'),
70
- wd('abbild gott|SEQ', 'abbild gott|q'),
71
- wd('gott turm|SEQ', 'gott turm|q'),
72
- wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
308
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
309
+ wd('Gottes Turm|SEQ', 'gott turm|q'),
310
+ wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
73
311
  # SS
74
312
  wd('Der|IDF', 'der|w'),
75
313
  wd('Sonne|IDF', 'sonne|s'),
76
314
  wd('Untergang|IDF', 'untergang|s'),
77
- wd('sonne untergang|SEQ', 'sonne untergang|q'),
315
+ wd('Sonne Untergang|SEQ', 'sonne untergang|q'),
78
316
  ai('EOF|'),
79
317
  ai('EOT|')
80
318
  ])
@@ -99,7 +337,7 @@ class TestAttendeeSequencer < AttendeeTestCase
99
337
  wd('Gottes|IDF', 'gott|s'),
100
338
  wd('Turm|IDF', 'turm|s'),
101
339
  tk('.|PUNC'),
102
- wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
340
+ wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
103
341
  ai('EOF|'),
104
342
  ai('EOT|')
105
343
  ])
@@ -125,15 +363,15 @@ class TestAttendeeSequencer < AttendeeTestCase
125
363
  wd('Gottes|IDF', 'gott|s'),
126
364
  wd('Turm|IDF', 'turm|s'),
127
365
  tk('.|PUNC'),
128
- wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
129
- wd('abbild gott|SEQ', 'abbild gott|q'),
130
- wd('gott turm|SEQ', 'gott turm|q'),
131
- wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
366
+ wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
367
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
368
+ wd('Gottes Turm|SEQ', 'gott turm|q'),
369
+ wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
132
370
  # SS
133
371
  wd('Abbild Gottes|MUL', 'abbild gottes|m'),
134
372
  wd('Abbild|IDF', 'abbild|s'),
135
373
  wd('Gottes|IDF', 'gott|s'),
136
- wd('abbild gott|SEQ', 'abbild gott|q'),
374
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
137
375
  ai('EOF|'),
138
376
  ai('EOT|')
139
377
  ])
@@ -162,14 +400,14 @@ class TestAttendeeSequencer < AttendeeTestCase
162
400
  wd('Gottes|IDF', 'gott|s'),
163
401
  wd('Turm|IDF', 'turm|s'),
164
402
  tk('.|PUNC'),
165
- wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
166
- wd('abbild gott|SEQ', 'abbild gott|q'),
167
- wd('gott turm|SEQ', 'gott turm|q'),
403
+ wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
404
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
405
+ wd('Gottes Turm|SEQ', 'gott turm|q'),
168
406
  # SS
169
407
  wd('Abbild Gottes|MUL', 'abbild gottes|m'),
170
408
  wd('Abbild|IDF', 'abbild|s'),
171
409
  wd('Gottes|IDF', 'gott|s'),
172
- wd('abbild gott|SEQ', 'abbild gott|q'),
410
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
173
411
  ai('EOF|'),
174
412
  ai('EOT|')
175
413
  ])
@@ -228,14 +466,14 @@ class TestAttendeeSequencer < AttendeeTestCase
228
466
  wd('Gottes|IDF', 'gott|s'),
229
467
  wd('Turm|IDF', 'turm|s'),
230
468
  tk('.|PUNC'),
231
- wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
232
- wd('abbild gott|SEQ', 'abbild gott|q'),
233
- wd('gott turm|SEQ', 'gott turm|q'),
469
+ wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
470
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
471
+ wd('Gottes Turm|SEQ', 'gott turm|q'),
234
472
  # SS
235
473
  wd('Abbild Gottes|MUL', 'abbild gottes|m'),
236
474
  wd('Abbild|IDF', 'abbild|s'),
237
475
  wd('Gottes|IDF', 'gott|s'),
238
- wd('abbild gott|SEQ', 'abbild gott|q'),
476
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
239
477
  ai('EOF|'),
240
478
  ai('EOT|')
241
479
  ])
@@ -260,10 +498,10 @@ class TestAttendeeSequencer < AttendeeTestCase
260
498
  wd('Gottes|IDF', 'gott|s'),
261
499
  wd('Turm|IDF', 'turm|s'),
262
500
  tk('.|PUNC'),
263
- wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
264
- wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
265
- #wd('abbild gott|SEQ', 'abbild gott|q'), # FIXME
266
- wd('gott turm|SEQ', 'gott turm|q'),
501
+ wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
502
+ wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
503
+ #wd('Abbild Gottes|SEQ', 'abbild gott|q'), # FIXME
504
+ wd('Gottes Turm|SEQ', 'gott turm|q'),
267
505
  ai('EOF|'),
268
506
  ai('EOT|')
269
507
  ])
@@ -288,10 +526,10 @@ class TestAttendeeSequencer < AttendeeTestCase
288
526
  wd('Gottes|IDF', 'gott|s'),
289
527
  wd('Turm|IDF', 'turm|s'),
290
528
  tk('.|PUNC'),
291
- wd('ms:abbild gottes^turm|SEQ', 'ms:abbild gottes^turm|q'),
292
- wd('sss:abbild^gott^turm|SEQ', 'sss:abbild^gott^turm|q'),
293
- #wd('ss:abbild^gott|SEQ', 'ss:abbild^gott|q'), # FIXME
294
- wd('ss:gott^turm|SEQ', 'ss:gott^turm|q'),
529
+ wd('Abbild Gottes Turm|SEQ', 'ms:abbild gottes^turm|q'),
530
+ wd('Abbild Gottes Turm|SEQ', 'sss:abbild^gott^turm|q'),
531
+ #wd('Abbild Gottes|SEQ', 'ss:abbild^gott|q'), # FIXME
532
+ wd('Gottes Turm|SEQ', 'ss:gott^turm|q'),
295
533
  ai('EOF|'),
296
534
  ai('EOT|')
297
535
  ])
@@ -316,14 +554,14 @@ class TestAttendeeSequencer < AttendeeTestCase
316
554
  wd('helle|IDF', 'hell|a'),
317
555
  wd('Sonne|IDF', 'sonne|s'),
318
556
  tk('.|PUNC'),
319
- wd('die hell (wa)|SEQ', 'die hell (wa)|q'),
320
- wd('as: sonne, hell|SEQ', 'as: sonne, hell|q'),
557
+ wd('Die helle|SEQ', 'die hell (wa)|q'),
558
+ wd('helle Sonne|SEQ', 'as: sonne, hell|q'),
321
559
  # WA + AK
322
560
  wd('Der|IDF', 'der|w'),
323
561
  wd('schöne|IDF', 'schön|a'),
324
562
  wd('Sonnenuntergang|COM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
325
- wd('der schön (wa)|SEQ', 'der schön (wa)|q'),
326
- wd('ak: sonnenuntergang, schön|SEQ', 'ak: sonnenuntergang, schön|q'),
563
+ wd('Der schöne|SEQ', 'der schön (wa)|q'),
564
+ wd('schöne Sonnenuntergang|SEQ', 'ak: sonnenuntergang, schön|q'),
327
565
  ai('EOF|'),
328
566
  ai('EOT|')
329
567
  ])
@@ -353,4 +591,20 @@ class TestAttendeeSequencer < AttendeeTestCase
353
591
  ])
354
592
  end
355
593
 
594
+ def test_many_permutations
595
+ meet({}, @perm, @out1)
596
+ end
597
+
598
+ def test_many_permutations_simple_regex1
599
+ meet({ 'sequences' => [['A[SK]', '2, 1'], ['AA[SK]', '3, 1 2']] }, @perm, @out1)
600
+ end
601
+
602
+ def test_many_permutations_simple_regex2
603
+ meet({ 'sequences' => [['A(S|K)', '2, 1'], ['AA(?:S|K)', '3, 1 2']] }, @perm, @out1)
604
+ end
605
+
606
+ def test_many_permutations_complex_regex
607
+ meet({ 'sequences' => [['A{1}(S|K)', '2, 1'], ['A{2}(S|K)', '3, 1 2']] }, @perm, @out2)
608
+ end unless ENV['LINGO_DISABLE_SLOW_TESTS'] # ~60s
609
+
356
610
  end