lingo 1.9.0.pre1 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +18 -7
- data/README +6 -8
- data/Rakefile +5 -5
- data/dict/en/lingo-dic.txt +52625 -15693
- data/lang/en.lang +2 -2
- data/lib/lingo.rb +15 -3
- data/lib/lingo/array_utils.rb +39 -0
- data/lib/lingo/attendee.rb +1 -3
- data/lib/lingo/attendee/multi_worder.rb +4 -2
- data/lib/lingo/attendee/sequencer.rb +122 -73
- data/lib/lingo/attendee/text_writer.rb +4 -6
- data/lib/lingo/attendee/vector_filter.rb +5 -5
- data/lib/lingo/cli.rb +20 -2
- data/lib/lingo/config.rb +4 -3
- data/lib/lingo/ctl.rb +2 -20
- data/lib/lingo/ctl/analysis.rb +3 -5
- data/lib/lingo/ctl/files.rb +3 -3
- data/lib/lingo/database.rb +26 -25
- data/lib/lingo/database/crypter.rb +10 -6
- data/lib/lingo/database/source.rb +72 -25
- data/lib/lingo/database/source/key_value.rb +12 -8
- data/lib/lingo/database/source/multi_key.rb +11 -9
- data/lib/lingo/database/source/multi_value.rb +10 -8
- data/lib/lingo/database/source/single_word.rb +10 -6
- data/lib/lingo/database/source/word_class.rb +43 -14
- data/lib/lingo/debug.rb +2 -2
- data/lib/lingo/error.rb +21 -5
- data/lib/lingo/filter.rb +1 -1
- data/lib/lingo/language.rb +21 -21
- data/lib/lingo/language/grammar.rb +4 -2
- data/lib/lingo/language/lexical_hash.rb +2 -14
- data/lib/lingo/language/word.rb +1 -5
- data/lib/lingo/text_utils.rb +113 -20
- data/lib/lingo/version.rb +1 -1
- data/test/attendee/ts_sequencer.rb +286 -32
- data/test/attendee/ts_text_reader.rb +4 -4
- data/test/attendee/ts_text_writer.rb +19 -5
- data/test/test_helper.rb +2 -0
- data/test/ts_database.rb +213 -14
- metadata +36 -24
data/lib/lingo/text_utils.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -25,11 +25,14 @@
|
|
25
25
|
#++
|
26
26
|
|
27
27
|
require 'nuggets/file/ext'
|
28
|
+
require 'nuggets/string/format'
|
28
29
|
|
29
30
|
class Lingo
|
30
31
|
|
31
32
|
module TextUtils
|
32
33
|
|
34
|
+
DEFAULT_MODE = 'rb'.freeze
|
35
|
+
|
33
36
|
STDIN_EXT = %w[STDIN -].freeze
|
34
37
|
|
35
38
|
STDOUT_EXT = %w[STDOUT -].freeze
|
@@ -46,51 +49,141 @@ class Lingo
|
|
46
49
|
STDOUT_EXT.include?(path)
|
47
50
|
end
|
48
51
|
|
49
|
-
def
|
50
|
-
|
51
|
-
|
52
|
+
def overwrite?(path, unlink = false)
|
53
|
+
!File.exist?(path) || if agree?("#{path} already exists. Overwrite?")
|
54
|
+
File.unlink(path) if unlink
|
55
|
+
true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def agree?(msg)
|
60
|
+
print "#{msg} (y/n) [n]: "
|
61
|
+
|
62
|
+
case stdin.gets.chomp
|
63
|
+
when /\Ano?\z/i, '' then nil
|
64
|
+
when /\Ay(?:es)?\z/i then true
|
65
|
+
else puts 'Please enter "yes" or "no".'; agree?(msg)
|
66
|
+
end
|
67
|
+
rescue Interrupt
|
68
|
+
abort ''
|
69
|
+
end
|
70
|
+
|
71
|
+
def stdin
|
72
|
+
respond_to?(:lingo, true) ? lingo.config.stdin : $stdin
|
73
|
+
end
|
74
|
+
|
75
|
+
def stdout
|
76
|
+
respond_to?(:lingo, true) ? lingo.config.stdout : $stdout
|
77
|
+
end
|
78
|
+
|
79
|
+
def open(path, mode = nil, encoding = nil, &block)
|
80
|
+
mode ||= DEFAULT_MODE
|
81
|
+
|
82
|
+
_yield_obj(case mode
|
83
|
+
when /r/ then stdin?(path) ? open_stdin(encoding) : File.exist?(path) ?
|
84
|
+
open_path(path, mode, encoding) : raise(FileNotFoundError.new(path))
|
85
|
+
when /w/ then stdout?(path) ? open_stdout(encoding) : overwrite?(path) ?
|
86
|
+
open_path(path, mode, encoding) : raise(FileExistsError.new(path))
|
87
|
+
end, &block)
|
88
|
+
end
|
89
|
+
|
90
|
+
def open_csv(path, mode = nil, options = {}, encoding = nil, &block)
|
91
|
+
_require_lib('csv')
|
92
|
+
|
93
|
+
open(path, mode, encoding) { |io|
|
94
|
+
_yield_obj(CSV.new(io, options), &block) }
|
95
|
+
end
|
96
|
+
|
97
|
+
def open_stdin(encoding = nil)
|
98
|
+
io = set_encoding(stdin, encoding)
|
99
|
+
@progress ? StringIO.new(io.read) : io
|
52
100
|
end
|
53
101
|
|
54
|
-
def open_stdout
|
55
|
-
set_encoding(
|
102
|
+
def open_stdout(encoding = nil)
|
103
|
+
set_encoding(stdout, encoding)
|
56
104
|
end
|
57
105
|
|
58
|
-
def open_path(path, mode =
|
59
|
-
|
106
|
+
def open_path(path, mode = nil, encoding = nil)
|
107
|
+
mode ||= DEFAULT_MODE
|
108
|
+
|
109
|
+
path =~ GZIP_RE ?
|
110
|
+
open_gzip(path, mode, encoding) :
|
111
|
+
open_file(path, mode, encoding)
|
60
112
|
end
|
61
113
|
|
62
|
-
def open_file(path, mode)
|
63
|
-
File.open(path, mode
|
114
|
+
def open_file(path, mode = nil, encoding = nil)
|
115
|
+
File.open(path, mode ||= DEFAULT_MODE,
|
116
|
+
encoding: bom_encoding(mode, encoding))
|
64
117
|
end
|
65
118
|
|
66
|
-
def open_gzip(path, mode)
|
67
|
-
|
119
|
+
def open_gzip(path, mode = nil, encoding = nil)
|
120
|
+
_require_lib('zlib')
|
68
121
|
|
69
|
-
case mode
|
122
|
+
case mode ||= DEFAULT_MODE
|
70
123
|
when 'r', 'rb'
|
71
124
|
@progress = false
|
72
|
-
Zlib::GzipReader
|
125
|
+
Zlib::GzipReader
|
73
126
|
when 'w', 'wb'
|
74
|
-
Zlib::GzipWriter
|
127
|
+
Zlib::GzipWriter
|
75
128
|
else
|
76
129
|
raise ArgumentError, 'invalid access mode %s' % mode
|
77
|
-
end
|
130
|
+
end.open(path, encoding: get_encoding(encoding))
|
131
|
+
end
|
132
|
+
|
133
|
+
def foreach(path, encoding = nil)
|
134
|
+
open(path, nil, encoding) { |io|
|
135
|
+
io.each { |line| line.chomp!; yield line } }
|
136
|
+
end
|
137
|
+
|
138
|
+
def foreach_csv(path, options = {}, encoding = nil, &block)
|
139
|
+
open_csv(path, nil, options, encoding) { |csv| csv.each(&block) }
|
140
|
+
end
|
141
|
+
|
142
|
+
def get_path(path, ext)
|
143
|
+
set_ext(path, ext).format { |directive|
|
144
|
+
case directive
|
145
|
+
when 'd', t = 't' then Time.now.strftime(t ? '%H%M%S' : '%Y%m%d')
|
146
|
+
when 'c', l = 'l' then File.chomp_ext(File.basename(
|
147
|
+
lingo.config.send("#{l ? :lang : :config}_file")))
|
148
|
+
end
|
149
|
+
}
|
78
150
|
end
|
79
151
|
|
80
152
|
def set_ext(path, ext)
|
81
153
|
File.set_ext(path.sub(GZIP_RE, ''), ".#{ext}")
|
82
154
|
end
|
83
155
|
|
84
|
-
def set_encoding(io, encoding =
|
85
|
-
io.set_encoding(encoding)
|
156
|
+
def set_encoding(io, encoding = nil)
|
157
|
+
io.set_encoding(get_encoding(encoding))
|
86
158
|
io
|
87
159
|
end
|
88
160
|
|
89
|
-
def
|
90
|
-
|
161
|
+
def get_encoding(encoding = nil, iv = :@encoding)
|
162
|
+
encoding ||
|
163
|
+
(instance_variable_defined?(iv) ? instance_variable_get(iv) : nil)
|
164
|
+
end
|
165
|
+
|
166
|
+
def bom_encoding(mode = 'r', encoding = nil)
|
167
|
+
encoding = get_encoding(encoding)
|
168
|
+
|
169
|
+
encoding && (mode.include?('r') || mode.include?('+')) &&
|
91
170
|
encoding.name.start_with?('UTF-') ? "BOM|#{encoding}" : encoding
|
92
171
|
end
|
93
172
|
|
173
|
+
private
|
174
|
+
|
175
|
+
def _require_lib(lib)
|
176
|
+
respond_to?(:require_lib, true) ? require_lib(lib) : require(lib)
|
177
|
+
end
|
178
|
+
|
179
|
+
def _yield_obj(obj)
|
180
|
+
!block_given? ? obj : begin
|
181
|
+
yield obj
|
182
|
+
ensure
|
183
|
+
obj.close
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
94
187
|
end
|
95
188
|
|
96
189
|
end
|
data/lib/lingo/version.rb
CHANGED
@@ -4,6 +4,244 @@ require_relative '../test_helper'
|
|
4
4
|
|
5
5
|
class TestAttendeeSequencer < AttendeeTestCase
|
6
6
|
|
7
|
+
def setup
|
8
|
+
@perm = [
|
9
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
10
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
11
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
12
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
13
|
+
wd('cold|IDF', 'cold|s', 'cold|a'),
|
14
|
+
wd('hot|IDF', 'hot|a'),
|
15
|
+
wd('hot|IDF', 'hot|a'),
|
16
|
+
wd('water|IDF', 'water|s', 'water|v'),
|
17
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
18
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
19
|
+
wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
|
20
|
+
wd('weather|IDF', 'weather|s', 'weather|v'),
|
21
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
22
|
+
wd('winter|IDF', 'winter|s', 'winter|v'),
|
23
|
+
wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
|
24
|
+
wd('fruit|IDF', 'fruit|s', 'fruit|v'),
|
25
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
26
|
+
wd('food|IDF', 'food|s'),
|
27
|
+
wd('juice|IDF', 'juice|s', 'juice|v'),
|
28
|
+
wd('flower|IDF', 'flower|s', 'flower|v'),
|
29
|
+
wd('fresh|IDF', 'fresh|s', 'fresh|a'),
|
30
|
+
wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
|
31
|
+
wd('tree|IDF', 'tree|s', 'tree|v'),
|
32
|
+
wd('meat|IDF', 'meat|s'),
|
33
|
+
wd('salad|IDF', 'salad|s'),
|
34
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
35
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
36
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
37
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
38
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
39
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
40
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
41
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
42
|
+
wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
|
43
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
44
|
+
wd('colour|IDF', 'colour|s', 'colour|v'),
|
45
|
+
wd('grey|IDF', 'grey|s'),
|
46
|
+
wd('tobacco|IDF', 'tobacco|s'),
|
47
|
+
wd('advertising|IDF', 'advertising|e'),
|
48
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
49
|
+
wd('smoke|IDF', 'smoke|s', 'smoke|v'),
|
50
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
51
|
+
wd('ban|IDF', 'ban|s'),
|
52
|
+
wd('coal|IDF', 'coal|s'),
|
53
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
54
|
+
wd('import|IDF', 'import|s', 'import|v'),
|
55
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
56
|
+
wd('textile|IDF', 'textile|s'),
|
57
|
+
wd('whiskey|IDF', 'whiskey|s'),
|
58
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
59
|
+
wd('whisky|IDF', 'whisky|s'),
|
60
|
+
ai('EOF|'),
|
61
|
+
ai('EOT|')
|
62
|
+
]
|
63
|
+
|
64
|
+
@out1 = [
|
65
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
66
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
67
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
68
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
69
|
+
wd('cold|IDF', 'cold|s', 'cold|a'),
|
70
|
+
wd('hot|IDF', 'hot|a'),
|
71
|
+
wd('hot|IDF', 'hot|a'),
|
72
|
+
wd('water|IDF', 'water|s', 'water|v'),
|
73
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
74
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
75
|
+
wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
|
76
|
+
wd('weather|IDF', 'weather|s', 'weather|v'),
|
77
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
78
|
+
wd('winter|IDF', 'winter|s', 'winter|v'),
|
79
|
+
wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
|
80
|
+
wd('fruit|IDF', 'fruit|s', 'fruit|v'),
|
81
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
82
|
+
wd('food|IDF', 'food|s'),
|
83
|
+
wd('juice|IDF', 'juice|s', 'juice|v'),
|
84
|
+
wd('flower|IDF', 'flower|s', 'flower|v'),
|
85
|
+
wd('fresh|IDF', 'fresh|s', 'fresh|a'),
|
86
|
+
wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
|
87
|
+
wd('tree|IDF', 'tree|s', 'tree|v'),
|
88
|
+
wd('meat|IDF', 'meat|s'),
|
89
|
+
wd('salad|IDF', 'salad|s'),
|
90
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
91
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
92
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
93
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
94
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
95
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
96
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
97
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
98
|
+
wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
|
99
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
100
|
+
wd('colour|IDF', 'colour|s', 'colour|v'),
|
101
|
+
wd('grey|IDF', 'grey|s'),
|
102
|
+
wd('tobacco|IDF', 'tobacco|s'),
|
103
|
+
wd('advertising|IDF', 'advertising|e'),
|
104
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
105
|
+
wd('smoke|IDF', 'smoke|s', 'smoke|v'),
|
106
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
107
|
+
wd('ban|IDF', 'ban|s'),
|
108
|
+
wd('coal|IDF', 'coal|s'),
|
109
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
110
|
+
wd('import|IDF', 'import|s', 'import|v'),
|
111
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
112
|
+
wd('textile|IDF', 'textile|s'),
|
113
|
+
wd('whiskey|IDF', 'whiskey|s'),
|
114
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
115
|
+
wd('whisky|IDF', 'whisky|s'),
|
116
|
+
wd('white yellow|SEQ', 'yellow, white|q'),
|
117
|
+
wd('yellow green|SEQ', 'green, yellow|q'),
|
118
|
+
wd('green white|SEQ', 'white, green|q'),
|
119
|
+
wd('white cold|SEQ', 'cold, white|q'),
|
120
|
+
wd('hot water|SEQ', 'water, hot|q'),
|
121
|
+
wd('warm dry|SEQ', 'dry, warm|q'),
|
122
|
+
wd('dry weather|SEQ', 'weather, dry|q'),
|
123
|
+
wd('cool fruit|SEQ', 'fruit, cool|q'),
|
124
|
+
wd('vegetable food|SEQ', 'food, vegetable|q'),
|
125
|
+
wd('fresh fish|SEQ', 'fish, fresh|q'),
|
126
|
+
wd('fish tree|SEQ', 'tree, fish|q'),
|
127
|
+
wd('vegetable green|SEQ', 'green, vegetable|q'),
|
128
|
+
wd('green red|SEQ', 'red, green|q'),
|
129
|
+
wd('red red|SEQ', 'red, red|q'),
|
130
|
+
wd('red blue|SEQ', 'blue, red|q'),
|
131
|
+
wd('blue blue|SEQ', 'blue, blue|q'),
|
132
|
+
wd('blue yellow|SEQ', 'yellow, blue|q'),
|
133
|
+
wd('yellow white|SEQ', 'white, yellow|q'),
|
134
|
+
wd('white leaves|SEQ', 'leave, white|q'),
|
135
|
+
wd('yellow colour|SEQ', 'colour, yellow|q'),
|
136
|
+
wd('white yellow green|SEQ', 'green, white yellow|q'),
|
137
|
+
wd('yellow green white|SEQ', 'white, yellow green|q'),
|
138
|
+
wd('green white cold|SEQ', 'cold, green white|q'),
|
139
|
+
wd('hot hot water|SEQ', 'water, hot hot|q'),
|
140
|
+
wd('warm warm dry|SEQ', 'dry, warm warm|q'),
|
141
|
+
wd('warm dry weather|SEQ', 'weather, warm dry|q'),
|
142
|
+
wd('fresh fish tree|SEQ', 'tree, fresh fish|q'),
|
143
|
+
wd('vegetable green red|SEQ', 'red, vegetable green|q'),
|
144
|
+
wd('green red red|SEQ', 'red, green red|q'),
|
145
|
+
wd('red red blue|SEQ', 'blue, red red|q'),
|
146
|
+
wd('red blue blue|SEQ', 'blue, red blue|q'),
|
147
|
+
wd('blue blue yellow|SEQ', 'yellow, blue blue|q'),
|
148
|
+
wd('blue yellow white|SEQ', 'white, blue yellow|q'),
|
149
|
+
wd('yellow white leaves|SEQ', 'leave, yellow white|q'),
|
150
|
+
ai('EOF|'),
|
151
|
+
ai('EOT|')
|
152
|
+
]
|
153
|
+
|
154
|
+
@out2 = [
|
155
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
156
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
157
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
158
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
159
|
+
wd('cold|IDF', 'cold|s', 'cold|a'),
|
160
|
+
wd('hot|IDF', 'hot|a'),
|
161
|
+
wd('hot|IDF', 'hot|a'),
|
162
|
+
wd('water|IDF', 'water|s', 'water|v'),
|
163
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
164
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
165
|
+
wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
|
166
|
+
wd('weather|IDF', 'weather|s', 'weather|v'),
|
167
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
168
|
+
wd('winter|IDF', 'winter|s', 'winter|v'),
|
169
|
+
wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
|
170
|
+
wd('fruit|IDF', 'fruit|s', 'fruit|v'),
|
171
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
172
|
+
wd('food|IDF', 'food|s'),
|
173
|
+
wd('juice|IDF', 'juice|s', 'juice|v'),
|
174
|
+
wd('flower|IDF', 'flower|s', 'flower|v'),
|
175
|
+
wd('fresh|IDF', 'fresh|s', 'fresh|a'),
|
176
|
+
wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
|
177
|
+
wd('tree|IDF', 'tree|s', 'tree|v'),
|
178
|
+
wd('meat|IDF', 'meat|s'),
|
179
|
+
wd('salad|IDF', 'salad|s'),
|
180
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
181
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
182
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
183
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
184
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
185
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
186
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
187
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
188
|
+
wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
|
189
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
190
|
+
wd('colour|IDF', 'colour|s', 'colour|v'),
|
191
|
+
wd('grey|IDF', 'grey|s'),
|
192
|
+
wd('tobacco|IDF', 'tobacco|s'),
|
193
|
+
wd('advertising|IDF', 'advertising|e'),
|
194
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
195
|
+
wd('smoke|IDF', 'smoke|s', 'smoke|v'),
|
196
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
197
|
+
wd('ban|IDF', 'ban|s'),
|
198
|
+
wd('coal|IDF', 'coal|s'),
|
199
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
200
|
+
wd('import|IDF', 'import|s', 'import|v'),
|
201
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
202
|
+
wd('textile|IDF', 'textile|s'),
|
203
|
+
wd('whiskey|IDF', 'whiskey|s'),
|
204
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
205
|
+
wd('whisky|IDF', 'whisky|s'),
|
206
|
+
wd('hot water|SEQ', 'water, hot|q'),
|
207
|
+
wd('warm dry|SEQ', 'dry, warm|q'),
|
208
|
+
wd('yellow colour|SEQ', 'colour, yellow|q'),
|
209
|
+
wd('white leaves|SEQ', 'leave, white|q'),
|
210
|
+
wd('yellow white|SEQ', 'white, yellow|q'),
|
211
|
+
wd('blue yellow|SEQ', 'yellow, blue|q'),
|
212
|
+
wd('blue blue|SEQ', 'blue, blue|q'),
|
213
|
+
wd('red blue|SEQ', 'blue, red|q'),
|
214
|
+
wd('red red|SEQ', 'red, red|q'),
|
215
|
+
wd('green red|SEQ', 'red, green|q'),
|
216
|
+
wd('vegetable green|SEQ', 'green, vegetable|q'),
|
217
|
+
wd('fish tree|SEQ', 'tree, fish|q'),
|
218
|
+
wd('fresh fish|SEQ', 'fish, fresh|q'),
|
219
|
+
wd('vegetable food|SEQ', 'food, vegetable|q'),
|
220
|
+
wd('cool fruit|SEQ', 'fruit, cool|q'),
|
221
|
+
wd('dry weather|SEQ', 'weather, dry|q'),
|
222
|
+
wd('white cold|SEQ', 'cold, white|q'),
|
223
|
+
wd('green white|SEQ', 'white, green|q'),
|
224
|
+
wd('yellow green|SEQ', 'green, yellow|q'),
|
225
|
+
wd('white yellow|SEQ', 'yellow, white|q'),
|
226
|
+
wd('hot hot water|SEQ', 'water, hot hot|q'),
|
227
|
+
wd('warm warm dry|SEQ', 'dry, warm warm|q'),
|
228
|
+
wd('yellow white leaves|SEQ', 'leave, yellow white|q'),
|
229
|
+
wd('blue yellow white|SEQ', 'white, blue yellow|q'),
|
230
|
+
wd('blue blue yellow|SEQ', 'yellow, blue blue|q'),
|
231
|
+
wd('red blue blue|SEQ', 'blue, red blue|q'),
|
232
|
+
wd('red red blue|SEQ', 'blue, red red|q'),
|
233
|
+
wd('green red red|SEQ', 'red, green red|q'),
|
234
|
+
wd('vegetable green red|SEQ', 'red, vegetable green|q'),
|
235
|
+
wd('fresh fish tree|SEQ', 'tree, fresh fish|q'),
|
236
|
+
wd('warm dry weather|SEQ', 'weather, warm dry|q'),
|
237
|
+
wd('green white cold|SEQ', 'cold, green white|q'),
|
238
|
+
wd('yellow green white|SEQ', 'white, yellow green|q'),
|
239
|
+
wd('white yellow green|SEQ', 'green, white yellow|q'),
|
240
|
+
ai('EOF|'),
|
241
|
+
ai('EOT|')
|
242
|
+
]
|
243
|
+
end
|
244
|
+
|
7
245
|
def test_basic
|
8
246
|
meet({}, [
|
9
247
|
# AS
|
@@ -23,12 +261,12 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
23
261
|
wd('helle|IDF', 'hell|a'),
|
24
262
|
wd('Sonne|IDF', 'sonne|s'),
|
25
263
|
tk('.|PUNC'),
|
26
|
-
wd('
|
264
|
+
wd('helle Sonne|SEQ', 'sonne, hell|q'),
|
27
265
|
# AK
|
28
266
|
wd('Der|IDF', 'der|w'),
|
29
267
|
wd('schöne|IDF', 'schön|a'),
|
30
268
|
wd('Sonnenuntergang|COM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
|
31
|
-
wd('
|
269
|
+
wd('schöne Sonnenuntergang|SEQ', 'sonnenuntergang, schön|q'),
|
32
270
|
ai('EOF|'),
|
33
271
|
ai('EOT|')
|
34
272
|
])
|
@@ -67,14 +305,14 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
67
305
|
wd('Gottes|IDF', 'gott|s'),
|
68
306
|
wd('Turm|IDF', 'turm|s'),
|
69
307
|
tk('.|PUNC'),
|
70
|
-
wd('
|
71
|
-
wd('
|
72
|
-
wd('
|
308
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
309
|
+
wd('Gottes Turm|SEQ', 'gott turm|q'),
|
310
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
|
73
311
|
# SS
|
74
312
|
wd('Der|IDF', 'der|w'),
|
75
313
|
wd('Sonne|IDF', 'sonne|s'),
|
76
314
|
wd('Untergang|IDF', 'untergang|s'),
|
77
|
-
wd('
|
315
|
+
wd('Sonne Untergang|SEQ', 'sonne untergang|q'),
|
78
316
|
ai('EOF|'),
|
79
317
|
ai('EOT|')
|
80
318
|
])
|
@@ -99,7 +337,7 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
99
337
|
wd('Gottes|IDF', 'gott|s'),
|
100
338
|
wd('Turm|IDF', 'turm|s'),
|
101
339
|
tk('.|PUNC'),
|
102
|
-
wd('
|
340
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
|
103
341
|
ai('EOF|'),
|
104
342
|
ai('EOT|')
|
105
343
|
])
|
@@ -125,15 +363,15 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
125
363
|
wd('Gottes|IDF', 'gott|s'),
|
126
364
|
wd('Turm|IDF', 'turm|s'),
|
127
365
|
tk('.|PUNC'),
|
128
|
-
wd('
|
129
|
-
wd('
|
130
|
-
wd('
|
131
|
-
wd('
|
366
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
|
367
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
368
|
+
wd('Gottes Turm|SEQ', 'gott turm|q'),
|
369
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
|
132
370
|
# SS
|
133
371
|
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
134
372
|
wd('Abbild|IDF', 'abbild|s'),
|
135
373
|
wd('Gottes|IDF', 'gott|s'),
|
136
|
-
wd('
|
374
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
137
375
|
ai('EOF|'),
|
138
376
|
ai('EOT|')
|
139
377
|
])
|
@@ -162,14 +400,14 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
162
400
|
wd('Gottes|IDF', 'gott|s'),
|
163
401
|
wd('Turm|IDF', 'turm|s'),
|
164
402
|
tk('.|PUNC'),
|
165
|
-
wd('
|
166
|
-
wd('
|
167
|
-
wd('
|
403
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
|
404
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
405
|
+
wd('Gottes Turm|SEQ', 'gott turm|q'),
|
168
406
|
# SS
|
169
407
|
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
170
408
|
wd('Abbild|IDF', 'abbild|s'),
|
171
409
|
wd('Gottes|IDF', 'gott|s'),
|
172
|
-
wd('
|
410
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
173
411
|
ai('EOF|'),
|
174
412
|
ai('EOT|')
|
175
413
|
])
|
@@ -228,14 +466,14 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
228
466
|
wd('Gottes|IDF', 'gott|s'),
|
229
467
|
wd('Turm|IDF', 'turm|s'),
|
230
468
|
tk('.|PUNC'),
|
231
|
-
wd('
|
232
|
-
wd('
|
233
|
-
wd('
|
469
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
|
470
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
471
|
+
wd('Gottes Turm|SEQ', 'gott turm|q'),
|
234
472
|
# SS
|
235
473
|
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
236
474
|
wd('Abbild|IDF', 'abbild|s'),
|
237
475
|
wd('Gottes|IDF', 'gott|s'),
|
238
|
-
wd('
|
476
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
239
477
|
ai('EOF|'),
|
240
478
|
ai('EOT|')
|
241
479
|
])
|
@@ -260,10 +498,10 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
260
498
|
wd('Gottes|IDF', 'gott|s'),
|
261
499
|
wd('Turm|IDF', 'turm|s'),
|
262
500
|
tk('.|PUNC'),
|
263
|
-
wd('
|
264
|
-
wd('
|
265
|
-
#wd('
|
266
|
-
wd('
|
501
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
|
502
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
|
503
|
+
#wd('Abbild Gottes|SEQ', 'abbild gott|q'), # FIXME
|
504
|
+
wd('Gottes Turm|SEQ', 'gott turm|q'),
|
267
505
|
ai('EOF|'),
|
268
506
|
ai('EOT|')
|
269
507
|
])
|
@@ -288,10 +526,10 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
288
526
|
wd('Gottes|IDF', 'gott|s'),
|
289
527
|
wd('Turm|IDF', 'turm|s'),
|
290
528
|
tk('.|PUNC'),
|
291
|
-
wd('
|
292
|
-
wd('
|
293
|
-
#wd('
|
294
|
-
wd('
|
529
|
+
wd('Abbild Gottes Turm|SEQ', 'ms:abbild gottes^turm|q'),
|
530
|
+
wd('Abbild Gottes Turm|SEQ', 'sss:abbild^gott^turm|q'),
|
531
|
+
#wd('Abbild Gottes|SEQ', 'ss:abbild^gott|q'), # FIXME
|
532
|
+
wd('Gottes Turm|SEQ', 'ss:gott^turm|q'),
|
295
533
|
ai('EOF|'),
|
296
534
|
ai('EOT|')
|
297
535
|
])
|
@@ -316,14 +554,14 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
316
554
|
wd('helle|IDF', 'hell|a'),
|
317
555
|
wd('Sonne|IDF', 'sonne|s'),
|
318
556
|
tk('.|PUNC'),
|
319
|
-
wd('
|
320
|
-
wd('
|
557
|
+
wd('Die helle|SEQ', 'die hell (wa)|q'),
|
558
|
+
wd('helle Sonne|SEQ', 'as: sonne, hell|q'),
|
321
559
|
# WA + AK
|
322
560
|
wd('Der|IDF', 'der|w'),
|
323
561
|
wd('schöne|IDF', 'schön|a'),
|
324
562
|
wd('Sonnenuntergang|COM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
|
325
|
-
wd('
|
326
|
-
wd('
|
563
|
+
wd('Der schöne|SEQ', 'der schön (wa)|q'),
|
564
|
+
wd('schöne Sonnenuntergang|SEQ', 'ak: sonnenuntergang, schön|q'),
|
327
565
|
ai('EOF|'),
|
328
566
|
ai('EOT|')
|
329
567
|
])
|
@@ -353,4 +591,20 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
353
591
|
])
|
354
592
|
end
|
355
593
|
|
594
|
+
def test_many_permutations
|
595
|
+
meet({}, @perm, @out1)
|
596
|
+
end
|
597
|
+
|
598
|
+
def test_many_permutations_simple_regex1
|
599
|
+
meet({ 'sequences' => [['A[SK]', '2, 1'], ['AA[SK]', '3, 1 2']] }, @perm, @out1)
|
600
|
+
end
|
601
|
+
|
602
|
+
def test_many_permutations_simple_regex2
|
603
|
+
meet({ 'sequences' => [['A(S|K)', '2, 1'], ['AA(?:S|K)', '3, 1 2']] }, @perm, @out1)
|
604
|
+
end
|
605
|
+
|
606
|
+
def test_many_permutations_complex_regex
|
607
|
+
meet({ 'sequences' => [['A{1}(S|K)', '2, 1'], ['A{2}(S|K)', '3, 1 2']] }, @perm, @out2)
|
608
|
+
end unless ENV['LINGO_DISABLE_SLOW_TESTS'] # ~60s
|
609
|
+
|
356
610
|
end
|