lingo 1.9.0.pre1 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +18 -7
  3. data/README +6 -8
  4. data/Rakefile +5 -5
  5. data/dict/en/lingo-dic.txt +52625 -15693
  6. data/lang/en.lang +2 -2
  7. data/lib/lingo.rb +15 -3
  8. data/lib/lingo/array_utils.rb +39 -0
  9. data/lib/lingo/attendee.rb +1 -3
  10. data/lib/lingo/attendee/multi_worder.rb +4 -2
  11. data/lib/lingo/attendee/sequencer.rb +122 -73
  12. data/lib/lingo/attendee/text_writer.rb +4 -6
  13. data/lib/lingo/attendee/vector_filter.rb +5 -5
  14. data/lib/lingo/cli.rb +20 -2
  15. data/lib/lingo/config.rb +4 -3
  16. data/lib/lingo/ctl.rb +2 -20
  17. data/lib/lingo/ctl/analysis.rb +3 -5
  18. data/lib/lingo/ctl/files.rb +3 -3
  19. data/lib/lingo/database.rb +26 -25
  20. data/lib/lingo/database/crypter.rb +10 -6
  21. data/lib/lingo/database/source.rb +72 -25
  22. data/lib/lingo/database/source/key_value.rb +12 -8
  23. data/lib/lingo/database/source/multi_key.rb +11 -9
  24. data/lib/lingo/database/source/multi_value.rb +10 -8
  25. data/lib/lingo/database/source/single_word.rb +10 -6
  26. data/lib/lingo/database/source/word_class.rb +43 -14
  27. data/lib/lingo/debug.rb +2 -2
  28. data/lib/lingo/error.rb +21 -5
  29. data/lib/lingo/filter.rb +1 -1
  30. data/lib/lingo/language.rb +21 -21
  31. data/lib/lingo/language/grammar.rb +4 -2
  32. data/lib/lingo/language/lexical_hash.rb +2 -14
  33. data/lib/lingo/language/word.rb +1 -5
  34. data/lib/lingo/text_utils.rb +113 -20
  35. data/lib/lingo/version.rb +1 -1
  36. data/test/attendee/ts_sequencer.rb +286 -32
  37. data/test/attendee/ts_text_reader.rb +4 -4
  38. data/test/attendee/ts_text_writer.rb +19 -5
  39. data/test/test_helper.rb +2 -0
  40. data/test/ts_database.rb +213 -14
  41. metadata +36 -24
@@ -6,7 +6,7 @@
6
6
  # Lingo -- A full-featured automatic indexing system #
7
7
  # #
8
8
  # Copyright (C) 2005-2007 John Vorhauer #
9
- # Copyright (C) 2007-2015 John Vorhauer, Jens Wille #
9
+ # Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
10
10
  # #
11
11
  # Lingo is free software; you can redistribute it and/or modify it under the #
12
12
  # terms of the GNU Affero General Public License as published by the Free #
@@ -25,11 +25,14 @@
25
25
  #++
26
26
 
27
27
  require 'nuggets/file/ext'
28
+ require 'nuggets/string/format'
28
29
 
29
30
  class Lingo
30
31
 
31
32
  module TextUtils
32
33
 
34
+ DEFAULT_MODE = 'rb'.freeze
35
+
33
36
  STDIN_EXT = %w[STDIN -].freeze
34
37
 
35
38
  STDOUT_EXT = %w[STDOUT -].freeze
@@ -46,51 +49,141 @@ class Lingo
46
49
  STDOUT_EXT.include?(path)
47
50
  end
48
51
 
49
- def open_stdin
50
- stdin = set_encoding(lingo.config.stdin)
51
- @progress ? StringIO.new(stdin.read) : stdin
52
+ def overwrite?(path, unlink = false)
53
+ !File.exist?(path) || if agree?("#{path} already exists. Overwrite?")
54
+ File.unlink(path) if unlink
55
+ true
56
+ end
57
+ end
58
+
59
+ def agree?(msg)
60
+ print "#{msg} (y/n) [n]: "
61
+
62
+ case stdin.gets.chomp
63
+ when /\Ano?\z/i, '' then nil
64
+ when /\Ay(?:es)?\z/i then true
65
+ else puts 'Please enter "yes" or "no".'; agree?(msg)
66
+ end
67
+ rescue Interrupt
68
+ abort ''
69
+ end
70
+
71
+ def stdin
72
+ respond_to?(:lingo, true) ? lingo.config.stdin : $stdin
73
+ end
74
+
75
+ def stdout
76
+ respond_to?(:lingo, true) ? lingo.config.stdout : $stdout
77
+ end
78
+
79
+ def open(path, mode = nil, encoding = nil, &block)
80
+ mode ||= DEFAULT_MODE
81
+
82
+ _yield_obj(case mode
83
+ when /r/ then stdin?(path) ? open_stdin(encoding) : File.exist?(path) ?
84
+ open_path(path, mode, encoding) : raise(FileNotFoundError.new(path))
85
+ when /w/ then stdout?(path) ? open_stdout(encoding) : overwrite?(path) ?
86
+ open_path(path, mode, encoding) : raise(FileExistsError.new(path))
87
+ end, &block)
88
+ end
89
+
90
+ def open_csv(path, mode = nil, options = {}, encoding = nil, &block)
91
+ _require_lib('csv')
92
+
93
+ open(path, mode, encoding) { |io|
94
+ _yield_obj(CSV.new(io, options), &block) }
95
+ end
96
+
97
+ def open_stdin(encoding = nil)
98
+ io = set_encoding(stdin, encoding)
99
+ @progress ? StringIO.new(io.read) : io
52
100
  end
53
101
 
54
- def open_stdout
55
- set_encoding(lingo.config.stdout)
102
+ def open_stdout(encoding = nil)
103
+ set_encoding(stdout, encoding)
56
104
  end
57
105
 
58
- def open_path(path, mode = 'rb')
59
- path =~ GZIP_RE ? open_gzip(path, mode) : open_file(path, mode)
106
+ def open_path(path, mode = nil, encoding = nil)
107
+ mode ||= DEFAULT_MODE
108
+
109
+ path =~ GZIP_RE ?
110
+ open_gzip(path, mode, encoding) :
111
+ open_file(path, mode, encoding)
60
112
  end
61
113
 
62
- def open_file(path, mode)
63
- File.open(path, mode, encoding: bom_encoding(mode))
114
+ def open_file(path, mode = nil, encoding = nil)
115
+ File.open(path, mode ||= DEFAULT_MODE,
116
+ encoding: bom_encoding(mode, encoding))
64
117
  end
65
118
 
66
- def open_gzip(path, mode)
67
- require_lib('zlib')
119
+ def open_gzip(path, mode = nil, encoding = nil)
120
+ _require_lib('zlib')
68
121
 
69
- case mode
122
+ case mode ||= DEFAULT_MODE
70
123
  when 'r', 'rb'
71
124
  @progress = false
72
- Zlib::GzipReader.open(path, encoding: @encoding)
125
+ Zlib::GzipReader
73
126
  when 'w', 'wb'
74
- Zlib::GzipWriter.open(path, encoding: @encoding)
127
+ Zlib::GzipWriter
75
128
  else
76
129
  raise ArgumentError, 'invalid access mode %s' % mode
77
- end
130
+ end.open(path, encoding: get_encoding(encoding))
131
+ end
132
+
133
+ def foreach(path, encoding = nil)
134
+ open(path, nil, encoding) { |io|
135
+ io.each { |line| line.chomp!; yield line } }
136
+ end
137
+
138
+ def foreach_csv(path, options = {}, encoding = nil, &block)
139
+ open_csv(path, nil, options, encoding) { |csv| csv.each(&block) }
140
+ end
141
+
142
+ def get_path(path, ext)
143
+ set_ext(path, ext).format { |directive|
144
+ case directive
145
+ when 'd', t = 't' then Time.now.strftime(t ? '%H%M%S' : '%Y%m%d')
146
+ when 'c', l = 'l' then File.chomp_ext(File.basename(
147
+ lingo.config.send("#{l ? :lang : :config}_file")))
148
+ end
149
+ }
78
150
  end
79
151
 
80
152
  def set_ext(path, ext)
81
153
  File.set_ext(path.sub(GZIP_RE, ''), ".#{ext}")
82
154
  end
83
155
 
84
- def set_encoding(io, encoding = @encoding)
85
- io.set_encoding(encoding)
156
+ def set_encoding(io, encoding = nil)
157
+ io.set_encoding(get_encoding(encoding))
86
158
  io
87
159
  end
88
160
 
89
- def bom_encoding(mode = 'r', encoding = @encoding)
90
- (mode.include?('r') || mode.include?('+')) &&
161
+ def get_encoding(encoding = nil, iv = :@encoding)
162
+ encoding ||
163
+ (instance_variable_defined?(iv) ? instance_variable_get(iv) : nil)
164
+ end
165
+
166
+ def bom_encoding(mode = 'r', encoding = nil)
167
+ encoding = get_encoding(encoding)
168
+
169
+ encoding && (mode.include?('r') || mode.include?('+')) &&
91
170
  encoding.name.start_with?('UTF-') ? "BOM|#{encoding}" : encoding
92
171
  end
93
172
 
173
+ private
174
+
175
+ def _require_lib(lib)
176
+ respond_to?(:require_lib, true) ? require_lib(lib) : require(lib)
177
+ end
178
+
179
+ def _yield_obj(obj)
180
+ !block_given? ? obj : begin
181
+ yield obj
182
+ ensure
183
+ obj.close
184
+ end
185
+ end
186
+
94
187
  end
95
188
 
96
189
  end
@@ -10,7 +10,7 @@ class Lingo
10
10
 
11
11
  # Returns array representation.
12
12
  def to_a
13
- [MAJOR, MINOR, TINY] << 'pre1'
13
+ [MAJOR, MINOR, TINY]
14
14
  end
15
15
 
16
16
  # Short-cut for version string.
@@ -4,6 +4,244 @@ require_relative '../test_helper'
4
4
 
5
5
  class TestAttendeeSequencer < AttendeeTestCase
6
6
 
7
+ def setup
8
+ @perm = [
9
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
10
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
11
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
12
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
13
+ wd('cold|IDF', 'cold|s', 'cold|a'),
14
+ wd('hot|IDF', 'hot|a'),
15
+ wd('hot|IDF', 'hot|a'),
16
+ wd('water|IDF', 'water|s', 'water|v'),
17
+ wd('warm|IDF', 'warm|a', 'warm|v'),
18
+ wd('warm|IDF', 'warm|a', 'warm|v'),
19
+ wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
20
+ wd('weather|IDF', 'weather|s', 'weather|v'),
21
+ wd('drink|IDF', 'drink|s', 'drink|v'),
22
+ wd('winter|IDF', 'winter|s', 'winter|v'),
23
+ wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
24
+ wd('fruit|IDF', 'fruit|s', 'fruit|v'),
25
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
26
+ wd('food|IDF', 'food|s'),
27
+ wd('juice|IDF', 'juice|s', 'juice|v'),
28
+ wd('flower|IDF', 'flower|s', 'flower|v'),
29
+ wd('fresh|IDF', 'fresh|s', 'fresh|a'),
30
+ wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
31
+ wd('tree|IDF', 'tree|s', 'tree|v'),
32
+ wd('meat|IDF', 'meat|s'),
33
+ wd('salad|IDF', 'salad|s'),
34
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
35
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
36
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
37
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
38
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
39
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
40
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
41
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
42
+ wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
43
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
44
+ wd('colour|IDF', 'colour|s', 'colour|v'),
45
+ wd('grey|IDF', 'grey|s'),
46
+ wd('tobacco|IDF', 'tobacco|s'),
47
+ wd('advertising|IDF', 'advertising|e'),
48
+ wd('cigarette|IDF', 'cigarette|s'),
49
+ wd('smoke|IDF', 'smoke|s', 'smoke|v'),
50
+ wd('alcohol|IDF', 'alcohol|s'),
51
+ wd('ban|IDF', 'ban|s'),
52
+ wd('coal|IDF', 'coal|s'),
53
+ wd('cigarette|IDF', 'cigarette|s'),
54
+ wd('import|IDF', 'import|s', 'import|v'),
55
+ wd('alcohol|IDF', 'alcohol|s'),
56
+ wd('textile|IDF', 'textile|s'),
57
+ wd('whiskey|IDF', 'whiskey|s'),
58
+ wd('drink|IDF', 'drink|s', 'drink|v'),
59
+ wd('whisky|IDF', 'whisky|s'),
60
+ ai('EOF|'),
61
+ ai('EOT|')
62
+ ]
63
+
64
+ @out1 = [
65
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
66
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
67
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
68
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
69
+ wd('cold|IDF', 'cold|s', 'cold|a'),
70
+ wd('hot|IDF', 'hot|a'),
71
+ wd('hot|IDF', 'hot|a'),
72
+ wd('water|IDF', 'water|s', 'water|v'),
73
+ wd('warm|IDF', 'warm|a', 'warm|v'),
74
+ wd('warm|IDF', 'warm|a', 'warm|v'),
75
+ wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
76
+ wd('weather|IDF', 'weather|s', 'weather|v'),
77
+ wd('drink|IDF', 'drink|s', 'drink|v'),
78
+ wd('winter|IDF', 'winter|s', 'winter|v'),
79
+ wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
80
+ wd('fruit|IDF', 'fruit|s', 'fruit|v'),
81
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
82
+ wd('food|IDF', 'food|s'),
83
+ wd('juice|IDF', 'juice|s', 'juice|v'),
84
+ wd('flower|IDF', 'flower|s', 'flower|v'),
85
+ wd('fresh|IDF', 'fresh|s', 'fresh|a'),
86
+ wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
87
+ wd('tree|IDF', 'tree|s', 'tree|v'),
88
+ wd('meat|IDF', 'meat|s'),
89
+ wd('salad|IDF', 'salad|s'),
90
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
91
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
92
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
93
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
94
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
95
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
96
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
97
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
98
+ wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
99
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
100
+ wd('colour|IDF', 'colour|s', 'colour|v'),
101
+ wd('grey|IDF', 'grey|s'),
102
+ wd('tobacco|IDF', 'tobacco|s'),
103
+ wd('advertising|IDF', 'advertising|e'),
104
+ wd('cigarette|IDF', 'cigarette|s'),
105
+ wd('smoke|IDF', 'smoke|s', 'smoke|v'),
106
+ wd('alcohol|IDF', 'alcohol|s'),
107
+ wd('ban|IDF', 'ban|s'),
108
+ wd('coal|IDF', 'coal|s'),
109
+ wd('cigarette|IDF', 'cigarette|s'),
110
+ wd('import|IDF', 'import|s', 'import|v'),
111
+ wd('alcohol|IDF', 'alcohol|s'),
112
+ wd('textile|IDF', 'textile|s'),
113
+ wd('whiskey|IDF', 'whiskey|s'),
114
+ wd('drink|IDF', 'drink|s', 'drink|v'),
115
+ wd('whisky|IDF', 'whisky|s'),
116
+ wd('white yellow|SEQ', 'yellow, white|q'),
117
+ wd('yellow green|SEQ', 'green, yellow|q'),
118
+ wd('green white|SEQ', 'white, green|q'),
119
+ wd('white cold|SEQ', 'cold, white|q'),
120
+ wd('hot water|SEQ', 'water, hot|q'),
121
+ wd('warm dry|SEQ', 'dry, warm|q'),
122
+ wd('dry weather|SEQ', 'weather, dry|q'),
123
+ wd('cool fruit|SEQ', 'fruit, cool|q'),
124
+ wd('vegetable food|SEQ', 'food, vegetable|q'),
125
+ wd('fresh fish|SEQ', 'fish, fresh|q'),
126
+ wd('fish tree|SEQ', 'tree, fish|q'),
127
+ wd('vegetable green|SEQ', 'green, vegetable|q'),
128
+ wd('green red|SEQ', 'red, green|q'),
129
+ wd('red red|SEQ', 'red, red|q'),
130
+ wd('red blue|SEQ', 'blue, red|q'),
131
+ wd('blue blue|SEQ', 'blue, blue|q'),
132
+ wd('blue yellow|SEQ', 'yellow, blue|q'),
133
+ wd('yellow white|SEQ', 'white, yellow|q'),
134
+ wd('white leaves|SEQ', 'leave, white|q'),
135
+ wd('yellow colour|SEQ', 'colour, yellow|q'),
136
+ wd('white yellow green|SEQ', 'green, white yellow|q'),
137
+ wd('yellow green white|SEQ', 'white, yellow green|q'),
138
+ wd('green white cold|SEQ', 'cold, green white|q'),
139
+ wd('hot hot water|SEQ', 'water, hot hot|q'),
140
+ wd('warm warm dry|SEQ', 'dry, warm warm|q'),
141
+ wd('warm dry weather|SEQ', 'weather, warm dry|q'),
142
+ wd('fresh fish tree|SEQ', 'tree, fresh fish|q'),
143
+ wd('vegetable green red|SEQ', 'red, vegetable green|q'),
144
+ wd('green red red|SEQ', 'red, green red|q'),
145
+ wd('red red blue|SEQ', 'blue, red red|q'),
146
+ wd('red blue blue|SEQ', 'blue, red blue|q'),
147
+ wd('blue blue yellow|SEQ', 'yellow, blue blue|q'),
148
+ wd('blue yellow white|SEQ', 'white, blue yellow|q'),
149
+ wd('yellow white leaves|SEQ', 'leave, yellow white|q'),
150
+ ai('EOF|'),
151
+ ai('EOT|')
152
+ ]
153
+
154
+ @out2 = [
155
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
156
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
157
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
158
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
159
+ wd('cold|IDF', 'cold|s', 'cold|a'),
160
+ wd('hot|IDF', 'hot|a'),
161
+ wd('hot|IDF', 'hot|a'),
162
+ wd('water|IDF', 'water|s', 'water|v'),
163
+ wd('warm|IDF', 'warm|a', 'warm|v'),
164
+ wd('warm|IDF', 'warm|a', 'warm|v'),
165
+ wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
166
+ wd('weather|IDF', 'weather|s', 'weather|v'),
167
+ wd('drink|IDF', 'drink|s', 'drink|v'),
168
+ wd('winter|IDF', 'winter|s', 'winter|v'),
169
+ wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
170
+ wd('fruit|IDF', 'fruit|s', 'fruit|v'),
171
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
172
+ wd('food|IDF', 'food|s'),
173
+ wd('juice|IDF', 'juice|s', 'juice|v'),
174
+ wd('flower|IDF', 'flower|s', 'flower|v'),
175
+ wd('fresh|IDF', 'fresh|s', 'fresh|a'),
176
+ wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
177
+ wd('tree|IDF', 'tree|s', 'tree|v'),
178
+ wd('meat|IDF', 'meat|s'),
179
+ wd('salad|IDF', 'salad|s'),
180
+ wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
181
+ wd('green|IDF', 'green|s', 'green|a', 'green|v'),
182
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
183
+ wd('red|IDF', 'red|s', 'red|a', 'red|v'),
184
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
185
+ wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
186
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
187
+ wd('white|IDF', 'white|s', 'white|a', 'white|v'),
188
+ wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
189
+ wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
190
+ wd('colour|IDF', 'colour|s', 'colour|v'),
191
+ wd('grey|IDF', 'grey|s'),
192
+ wd('tobacco|IDF', 'tobacco|s'),
193
+ wd('advertising|IDF', 'advertising|e'),
194
+ wd('cigarette|IDF', 'cigarette|s'),
195
+ wd('smoke|IDF', 'smoke|s', 'smoke|v'),
196
+ wd('alcohol|IDF', 'alcohol|s'),
197
+ wd('ban|IDF', 'ban|s'),
198
+ wd('coal|IDF', 'coal|s'),
199
+ wd('cigarette|IDF', 'cigarette|s'),
200
+ wd('import|IDF', 'import|s', 'import|v'),
201
+ wd('alcohol|IDF', 'alcohol|s'),
202
+ wd('textile|IDF', 'textile|s'),
203
+ wd('whiskey|IDF', 'whiskey|s'),
204
+ wd('drink|IDF', 'drink|s', 'drink|v'),
205
+ wd('whisky|IDF', 'whisky|s'),
206
+ wd('hot water|SEQ', 'water, hot|q'),
207
+ wd('warm dry|SEQ', 'dry, warm|q'),
208
+ wd('yellow colour|SEQ', 'colour, yellow|q'),
209
+ wd('white leaves|SEQ', 'leave, white|q'),
210
+ wd('yellow white|SEQ', 'white, yellow|q'),
211
+ wd('blue yellow|SEQ', 'yellow, blue|q'),
212
+ wd('blue blue|SEQ', 'blue, blue|q'),
213
+ wd('red blue|SEQ', 'blue, red|q'),
214
+ wd('red red|SEQ', 'red, red|q'),
215
+ wd('green red|SEQ', 'red, green|q'),
216
+ wd('vegetable green|SEQ', 'green, vegetable|q'),
217
+ wd('fish tree|SEQ', 'tree, fish|q'),
218
+ wd('fresh fish|SEQ', 'fish, fresh|q'),
219
+ wd('vegetable food|SEQ', 'food, vegetable|q'),
220
+ wd('cool fruit|SEQ', 'fruit, cool|q'),
221
+ wd('dry weather|SEQ', 'weather, dry|q'),
222
+ wd('white cold|SEQ', 'cold, white|q'),
223
+ wd('green white|SEQ', 'white, green|q'),
224
+ wd('yellow green|SEQ', 'green, yellow|q'),
225
+ wd('white yellow|SEQ', 'yellow, white|q'),
226
+ wd('hot hot water|SEQ', 'water, hot hot|q'),
227
+ wd('warm warm dry|SEQ', 'dry, warm warm|q'),
228
+ wd('yellow white leaves|SEQ', 'leave, yellow white|q'),
229
+ wd('blue yellow white|SEQ', 'white, blue yellow|q'),
230
+ wd('blue blue yellow|SEQ', 'yellow, blue blue|q'),
231
+ wd('red blue blue|SEQ', 'blue, red blue|q'),
232
+ wd('red red blue|SEQ', 'blue, red red|q'),
233
+ wd('green red red|SEQ', 'red, green red|q'),
234
+ wd('vegetable green red|SEQ', 'red, vegetable green|q'),
235
+ wd('fresh fish tree|SEQ', 'tree, fresh fish|q'),
236
+ wd('warm dry weather|SEQ', 'weather, warm dry|q'),
237
+ wd('green white cold|SEQ', 'cold, green white|q'),
238
+ wd('yellow green white|SEQ', 'white, yellow green|q'),
239
+ wd('white yellow green|SEQ', 'green, white yellow|q'),
240
+ ai('EOF|'),
241
+ ai('EOT|')
242
+ ]
243
+ end
244
+
7
245
  def test_basic
8
246
  meet({}, [
9
247
  # AS
@@ -23,12 +261,12 @@ class TestAttendeeSequencer < AttendeeTestCase
23
261
  wd('helle|IDF', 'hell|a'),
24
262
  wd('Sonne|IDF', 'sonne|s'),
25
263
  tk('.|PUNC'),
26
- wd('sonne, hell|SEQ', 'sonne, hell|q'),
264
+ wd('helle Sonne|SEQ', 'sonne, hell|q'),
27
265
  # AK
28
266
  wd('Der|IDF', 'der|w'),
29
267
  wd('schöne|IDF', 'schön|a'),
30
268
  wd('Sonnenuntergang|COM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
31
- wd('sonnenuntergang, schön|SEQ', 'sonnenuntergang, schön|q'),
269
+ wd('schöne Sonnenuntergang|SEQ', 'sonnenuntergang, schön|q'),
32
270
  ai('EOF|'),
33
271
  ai('EOT|')
34
272
  ])
@@ -67,14 +305,14 @@ class TestAttendeeSequencer < AttendeeTestCase
67
305
  wd('Gottes|IDF', 'gott|s'),
68
306
  wd('Turm|IDF', 'turm|s'),
69
307
  tk('.|PUNC'),
70
- wd('abbild gott|SEQ', 'abbild gott|q'),
71
- wd('gott turm|SEQ', 'gott turm|q'),
72
- wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
308
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
309
+ wd('Gottes Turm|SEQ', 'gott turm|q'),
310
+ wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
73
311
  # SS
74
312
  wd('Der|IDF', 'der|w'),
75
313
  wd('Sonne|IDF', 'sonne|s'),
76
314
  wd('Untergang|IDF', 'untergang|s'),
77
- wd('sonne untergang|SEQ', 'sonne untergang|q'),
315
+ wd('Sonne Untergang|SEQ', 'sonne untergang|q'),
78
316
  ai('EOF|'),
79
317
  ai('EOT|')
80
318
  ])
@@ -99,7 +337,7 @@ class TestAttendeeSequencer < AttendeeTestCase
99
337
  wd('Gottes|IDF', 'gott|s'),
100
338
  wd('Turm|IDF', 'turm|s'),
101
339
  tk('.|PUNC'),
102
- wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
340
+ wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
103
341
  ai('EOF|'),
104
342
  ai('EOT|')
105
343
  ])
@@ -125,15 +363,15 @@ class TestAttendeeSequencer < AttendeeTestCase
125
363
  wd('Gottes|IDF', 'gott|s'),
126
364
  wd('Turm|IDF', 'turm|s'),
127
365
  tk('.|PUNC'),
128
- wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
129
- wd('abbild gott|SEQ', 'abbild gott|q'),
130
- wd('gott turm|SEQ', 'gott turm|q'),
131
- wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
366
+ wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
367
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
368
+ wd('Gottes Turm|SEQ', 'gott turm|q'),
369
+ wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
132
370
  # SS
133
371
  wd('Abbild Gottes|MUL', 'abbild gottes|m'),
134
372
  wd('Abbild|IDF', 'abbild|s'),
135
373
  wd('Gottes|IDF', 'gott|s'),
136
- wd('abbild gott|SEQ', 'abbild gott|q'),
374
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
137
375
  ai('EOF|'),
138
376
  ai('EOT|')
139
377
  ])
@@ -162,14 +400,14 @@ class TestAttendeeSequencer < AttendeeTestCase
162
400
  wd('Gottes|IDF', 'gott|s'),
163
401
  wd('Turm|IDF', 'turm|s'),
164
402
  tk('.|PUNC'),
165
- wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
166
- wd('abbild gott|SEQ', 'abbild gott|q'),
167
- wd('gott turm|SEQ', 'gott turm|q'),
403
+ wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
404
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
405
+ wd('Gottes Turm|SEQ', 'gott turm|q'),
168
406
  # SS
169
407
  wd('Abbild Gottes|MUL', 'abbild gottes|m'),
170
408
  wd('Abbild|IDF', 'abbild|s'),
171
409
  wd('Gottes|IDF', 'gott|s'),
172
- wd('abbild gott|SEQ', 'abbild gott|q'),
410
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
173
411
  ai('EOF|'),
174
412
  ai('EOT|')
175
413
  ])
@@ -228,14 +466,14 @@ class TestAttendeeSequencer < AttendeeTestCase
228
466
  wd('Gottes|IDF', 'gott|s'),
229
467
  wd('Turm|IDF', 'turm|s'),
230
468
  tk('.|PUNC'),
231
- wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
232
- wd('abbild gott|SEQ', 'abbild gott|q'),
233
- wd('gott turm|SEQ', 'gott turm|q'),
469
+ wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
470
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
471
+ wd('Gottes Turm|SEQ', 'gott turm|q'),
234
472
  # SS
235
473
  wd('Abbild Gottes|MUL', 'abbild gottes|m'),
236
474
  wd('Abbild|IDF', 'abbild|s'),
237
475
  wd('Gottes|IDF', 'gott|s'),
238
- wd('abbild gott|SEQ', 'abbild gott|q'),
476
+ wd('Abbild Gottes|SEQ', 'abbild gott|q'),
239
477
  ai('EOF|'),
240
478
  ai('EOT|')
241
479
  ])
@@ -260,10 +498,10 @@ class TestAttendeeSequencer < AttendeeTestCase
260
498
  wd('Gottes|IDF', 'gott|s'),
261
499
  wd('Turm|IDF', 'turm|s'),
262
500
  tk('.|PUNC'),
263
- wd('abbild gottes turm|SEQ', 'abbild gottes turm|q'),
264
- wd('abbild gott turm|SEQ', 'abbild gott turm|q'),
265
- #wd('abbild gott|SEQ', 'abbild gott|q'), # FIXME
266
- wd('gott turm|SEQ', 'gott turm|q'),
501
+ wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
502
+ wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
503
+ #wd('Abbild Gottes|SEQ', 'abbild gott|q'), # FIXME
504
+ wd('Gottes Turm|SEQ', 'gott turm|q'),
267
505
  ai('EOF|'),
268
506
  ai('EOT|')
269
507
  ])
@@ -288,10 +526,10 @@ class TestAttendeeSequencer < AttendeeTestCase
288
526
  wd('Gottes|IDF', 'gott|s'),
289
527
  wd('Turm|IDF', 'turm|s'),
290
528
  tk('.|PUNC'),
291
- wd('ms:abbild gottes^turm|SEQ', 'ms:abbild gottes^turm|q'),
292
- wd('sss:abbild^gott^turm|SEQ', 'sss:abbild^gott^turm|q'),
293
- #wd('ss:abbild^gott|SEQ', 'ss:abbild^gott|q'), # FIXME
294
- wd('ss:gott^turm|SEQ', 'ss:gott^turm|q'),
529
+ wd('Abbild Gottes Turm|SEQ', 'ms:abbild gottes^turm|q'),
530
+ wd('Abbild Gottes Turm|SEQ', 'sss:abbild^gott^turm|q'),
531
+ #wd('Abbild Gottes|SEQ', 'ss:abbild^gott|q'), # FIXME
532
+ wd('Gottes Turm|SEQ', 'ss:gott^turm|q'),
295
533
  ai('EOF|'),
296
534
  ai('EOT|')
297
535
  ])
@@ -316,14 +554,14 @@ class TestAttendeeSequencer < AttendeeTestCase
316
554
  wd('helle|IDF', 'hell|a'),
317
555
  wd('Sonne|IDF', 'sonne|s'),
318
556
  tk('.|PUNC'),
319
- wd('die hell (wa)|SEQ', 'die hell (wa)|q'),
320
- wd('as: sonne, hell|SEQ', 'as: sonne, hell|q'),
557
+ wd('Die helle|SEQ', 'die hell (wa)|q'),
558
+ wd('helle Sonne|SEQ', 'as: sonne, hell|q'),
321
559
  # WA + AK
322
560
  wd('Der|IDF', 'der|w'),
323
561
  wd('schöne|IDF', 'schön|a'),
324
562
  wd('Sonnenuntergang|COM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
325
- wd('der schön (wa)|SEQ', 'der schön (wa)|q'),
326
- wd('ak: sonnenuntergang, schön|SEQ', 'ak: sonnenuntergang, schön|q'),
563
+ wd('Der schöne|SEQ', 'der schön (wa)|q'),
564
+ wd('schöne Sonnenuntergang|SEQ', 'ak: sonnenuntergang, schön|q'),
327
565
  ai('EOF|'),
328
566
  ai('EOT|')
329
567
  ])
@@ -353,4 +591,20 @@ class TestAttendeeSequencer < AttendeeTestCase
353
591
  ])
354
592
  end
355
593
 
594
+ def test_many_permutations
595
+ meet({}, @perm, @out1)
596
+ end
597
+
598
+ def test_many_permutations_simple_regex1
599
+ meet({ 'sequences' => [['A[SK]', '2, 1'], ['AA[SK]', '3, 1 2']] }, @perm, @out1)
600
+ end
601
+
602
+ def test_many_permutations_simple_regex2
603
+ meet({ 'sequences' => [['A(S|K)', '2, 1'], ['AA(?:S|K)', '3, 1 2']] }, @perm, @out1)
604
+ end
605
+
606
+ def test_many_permutations_complex_regex
607
+ meet({ 'sequences' => [['A{1}(S|K)', '2, 1'], ['A{2}(S|K)', '3, 1 2']] }, @perm, @out2)
608
+ end unless ENV['LINGO_DISABLE_SLOW_TESTS'] # ~60s
609
+
356
610
  end