lingo 1.9.0.pre1 → 1.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +18 -7
- data/README +6 -8
- data/Rakefile +5 -5
- data/dict/en/lingo-dic.txt +52625 -15693
- data/lang/en.lang +2 -2
- data/lib/lingo.rb +15 -3
- data/lib/lingo/array_utils.rb +39 -0
- data/lib/lingo/attendee.rb +1 -3
- data/lib/lingo/attendee/multi_worder.rb +4 -2
- data/lib/lingo/attendee/sequencer.rb +122 -73
- data/lib/lingo/attendee/text_writer.rb +4 -6
- data/lib/lingo/attendee/vector_filter.rb +5 -5
- data/lib/lingo/cli.rb +20 -2
- data/lib/lingo/config.rb +4 -3
- data/lib/lingo/ctl.rb +2 -20
- data/lib/lingo/ctl/analysis.rb +3 -5
- data/lib/lingo/ctl/files.rb +3 -3
- data/lib/lingo/database.rb +26 -25
- data/lib/lingo/database/crypter.rb +10 -6
- data/lib/lingo/database/source.rb +72 -25
- data/lib/lingo/database/source/key_value.rb +12 -8
- data/lib/lingo/database/source/multi_key.rb +11 -9
- data/lib/lingo/database/source/multi_value.rb +10 -8
- data/lib/lingo/database/source/single_word.rb +10 -6
- data/lib/lingo/database/source/word_class.rb +43 -14
- data/lib/lingo/debug.rb +2 -2
- data/lib/lingo/error.rb +21 -5
- data/lib/lingo/filter.rb +1 -1
- data/lib/lingo/language.rb +21 -21
- data/lib/lingo/language/grammar.rb +4 -2
- data/lib/lingo/language/lexical_hash.rb +2 -14
- data/lib/lingo/language/word.rb +1 -5
- data/lib/lingo/text_utils.rb +113 -20
- data/lib/lingo/version.rb +1 -1
- data/test/attendee/ts_sequencer.rb +286 -32
- data/test/attendee/ts_text_reader.rb +4 -4
- data/test/attendee/ts_text_writer.rb +19 -5
- data/test/test_helper.rb +2 -0
- data/test/ts_database.rb +213 -14
- metadata +36 -24
data/lib/lingo/text_utils.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# Lingo -- A full-featured automatic indexing system #
|
7
7
|
# #
|
8
8
|
# Copyright (C) 2005-2007 John Vorhauer #
|
9
|
-
# Copyright (C) 2007-
|
9
|
+
# Copyright (C) 2007-2016 John Vorhauer, Jens Wille #
|
10
10
|
# #
|
11
11
|
# Lingo is free software; you can redistribute it and/or modify it under the #
|
12
12
|
# terms of the GNU Affero General Public License as published by the Free #
|
@@ -25,11 +25,14 @@
|
|
25
25
|
#++
|
26
26
|
|
27
27
|
require 'nuggets/file/ext'
|
28
|
+
require 'nuggets/string/format'
|
28
29
|
|
29
30
|
class Lingo
|
30
31
|
|
31
32
|
module TextUtils
|
32
33
|
|
34
|
+
DEFAULT_MODE = 'rb'.freeze
|
35
|
+
|
33
36
|
STDIN_EXT = %w[STDIN -].freeze
|
34
37
|
|
35
38
|
STDOUT_EXT = %w[STDOUT -].freeze
|
@@ -46,51 +49,141 @@ class Lingo
|
|
46
49
|
STDOUT_EXT.include?(path)
|
47
50
|
end
|
48
51
|
|
49
|
-
def
|
50
|
-
|
51
|
-
|
52
|
+
def overwrite?(path, unlink = false)
|
53
|
+
!File.exist?(path) || if agree?("#{path} already exists. Overwrite?")
|
54
|
+
File.unlink(path) if unlink
|
55
|
+
true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def agree?(msg)
|
60
|
+
print "#{msg} (y/n) [n]: "
|
61
|
+
|
62
|
+
case stdin.gets.chomp
|
63
|
+
when /\Ano?\z/i, '' then nil
|
64
|
+
when /\Ay(?:es)?\z/i then true
|
65
|
+
else puts 'Please enter "yes" or "no".'; agree?(msg)
|
66
|
+
end
|
67
|
+
rescue Interrupt
|
68
|
+
abort ''
|
69
|
+
end
|
70
|
+
|
71
|
+
def stdin
|
72
|
+
respond_to?(:lingo, true) ? lingo.config.stdin : $stdin
|
73
|
+
end
|
74
|
+
|
75
|
+
def stdout
|
76
|
+
respond_to?(:lingo, true) ? lingo.config.stdout : $stdout
|
77
|
+
end
|
78
|
+
|
79
|
+
def open(path, mode = nil, encoding = nil, &block)
|
80
|
+
mode ||= DEFAULT_MODE
|
81
|
+
|
82
|
+
_yield_obj(case mode
|
83
|
+
when /r/ then stdin?(path) ? open_stdin(encoding) : File.exist?(path) ?
|
84
|
+
open_path(path, mode, encoding) : raise(FileNotFoundError.new(path))
|
85
|
+
when /w/ then stdout?(path) ? open_stdout(encoding) : overwrite?(path) ?
|
86
|
+
open_path(path, mode, encoding) : raise(FileExistsError.new(path))
|
87
|
+
end, &block)
|
88
|
+
end
|
89
|
+
|
90
|
+
def open_csv(path, mode = nil, options = {}, encoding = nil, &block)
|
91
|
+
_require_lib('csv')
|
92
|
+
|
93
|
+
open(path, mode, encoding) { |io|
|
94
|
+
_yield_obj(CSV.new(io, options), &block) }
|
95
|
+
end
|
96
|
+
|
97
|
+
def open_stdin(encoding = nil)
|
98
|
+
io = set_encoding(stdin, encoding)
|
99
|
+
@progress ? StringIO.new(io.read) : io
|
52
100
|
end
|
53
101
|
|
54
|
-
def open_stdout
|
55
|
-
set_encoding(
|
102
|
+
def open_stdout(encoding = nil)
|
103
|
+
set_encoding(stdout, encoding)
|
56
104
|
end
|
57
105
|
|
58
|
-
def open_path(path, mode =
|
59
|
-
|
106
|
+
def open_path(path, mode = nil, encoding = nil)
|
107
|
+
mode ||= DEFAULT_MODE
|
108
|
+
|
109
|
+
path =~ GZIP_RE ?
|
110
|
+
open_gzip(path, mode, encoding) :
|
111
|
+
open_file(path, mode, encoding)
|
60
112
|
end
|
61
113
|
|
62
|
-
def open_file(path, mode)
|
63
|
-
File.open(path, mode
|
114
|
+
def open_file(path, mode = nil, encoding = nil)
|
115
|
+
File.open(path, mode ||= DEFAULT_MODE,
|
116
|
+
encoding: bom_encoding(mode, encoding))
|
64
117
|
end
|
65
118
|
|
66
|
-
def open_gzip(path, mode)
|
67
|
-
|
119
|
+
def open_gzip(path, mode = nil, encoding = nil)
|
120
|
+
_require_lib('zlib')
|
68
121
|
|
69
|
-
case mode
|
122
|
+
case mode ||= DEFAULT_MODE
|
70
123
|
when 'r', 'rb'
|
71
124
|
@progress = false
|
72
|
-
Zlib::GzipReader
|
125
|
+
Zlib::GzipReader
|
73
126
|
when 'w', 'wb'
|
74
|
-
Zlib::GzipWriter
|
127
|
+
Zlib::GzipWriter
|
75
128
|
else
|
76
129
|
raise ArgumentError, 'invalid access mode %s' % mode
|
77
|
-
end
|
130
|
+
end.open(path, encoding: get_encoding(encoding))
|
131
|
+
end
|
132
|
+
|
133
|
+
def foreach(path, encoding = nil)
|
134
|
+
open(path, nil, encoding) { |io|
|
135
|
+
io.each { |line| line.chomp!; yield line } }
|
136
|
+
end
|
137
|
+
|
138
|
+
def foreach_csv(path, options = {}, encoding = nil, &block)
|
139
|
+
open_csv(path, nil, options, encoding) { |csv| csv.each(&block) }
|
140
|
+
end
|
141
|
+
|
142
|
+
def get_path(path, ext)
|
143
|
+
set_ext(path, ext).format { |directive|
|
144
|
+
case directive
|
145
|
+
when 'd', t = 't' then Time.now.strftime(t ? '%H%M%S' : '%Y%m%d')
|
146
|
+
when 'c', l = 'l' then File.chomp_ext(File.basename(
|
147
|
+
lingo.config.send("#{l ? :lang : :config}_file")))
|
148
|
+
end
|
149
|
+
}
|
78
150
|
end
|
79
151
|
|
80
152
|
def set_ext(path, ext)
|
81
153
|
File.set_ext(path.sub(GZIP_RE, ''), ".#{ext}")
|
82
154
|
end
|
83
155
|
|
84
|
-
def set_encoding(io, encoding =
|
85
|
-
io.set_encoding(encoding)
|
156
|
+
def set_encoding(io, encoding = nil)
|
157
|
+
io.set_encoding(get_encoding(encoding))
|
86
158
|
io
|
87
159
|
end
|
88
160
|
|
89
|
-
def
|
90
|
-
|
161
|
+
def get_encoding(encoding = nil, iv = :@encoding)
|
162
|
+
encoding ||
|
163
|
+
(instance_variable_defined?(iv) ? instance_variable_get(iv) : nil)
|
164
|
+
end
|
165
|
+
|
166
|
+
def bom_encoding(mode = 'r', encoding = nil)
|
167
|
+
encoding = get_encoding(encoding)
|
168
|
+
|
169
|
+
encoding && (mode.include?('r') || mode.include?('+')) &&
|
91
170
|
encoding.name.start_with?('UTF-') ? "BOM|#{encoding}" : encoding
|
92
171
|
end
|
93
172
|
|
173
|
+
private
|
174
|
+
|
175
|
+
def _require_lib(lib)
|
176
|
+
respond_to?(:require_lib, true) ? require_lib(lib) : require(lib)
|
177
|
+
end
|
178
|
+
|
179
|
+
def _yield_obj(obj)
|
180
|
+
!block_given? ? obj : begin
|
181
|
+
yield obj
|
182
|
+
ensure
|
183
|
+
obj.close
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
94
187
|
end
|
95
188
|
|
96
189
|
end
|
data/lib/lingo/version.rb
CHANGED
@@ -4,6 +4,244 @@ require_relative '../test_helper'
|
|
4
4
|
|
5
5
|
class TestAttendeeSequencer < AttendeeTestCase
|
6
6
|
|
7
|
+
def setup
|
8
|
+
@perm = [
|
9
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
10
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
11
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
12
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
13
|
+
wd('cold|IDF', 'cold|s', 'cold|a'),
|
14
|
+
wd('hot|IDF', 'hot|a'),
|
15
|
+
wd('hot|IDF', 'hot|a'),
|
16
|
+
wd('water|IDF', 'water|s', 'water|v'),
|
17
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
18
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
19
|
+
wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
|
20
|
+
wd('weather|IDF', 'weather|s', 'weather|v'),
|
21
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
22
|
+
wd('winter|IDF', 'winter|s', 'winter|v'),
|
23
|
+
wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
|
24
|
+
wd('fruit|IDF', 'fruit|s', 'fruit|v'),
|
25
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
26
|
+
wd('food|IDF', 'food|s'),
|
27
|
+
wd('juice|IDF', 'juice|s', 'juice|v'),
|
28
|
+
wd('flower|IDF', 'flower|s', 'flower|v'),
|
29
|
+
wd('fresh|IDF', 'fresh|s', 'fresh|a'),
|
30
|
+
wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
|
31
|
+
wd('tree|IDF', 'tree|s', 'tree|v'),
|
32
|
+
wd('meat|IDF', 'meat|s'),
|
33
|
+
wd('salad|IDF', 'salad|s'),
|
34
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
35
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
36
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
37
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
38
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
39
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
40
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
41
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
42
|
+
wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
|
43
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
44
|
+
wd('colour|IDF', 'colour|s', 'colour|v'),
|
45
|
+
wd('grey|IDF', 'grey|s'),
|
46
|
+
wd('tobacco|IDF', 'tobacco|s'),
|
47
|
+
wd('advertising|IDF', 'advertising|e'),
|
48
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
49
|
+
wd('smoke|IDF', 'smoke|s', 'smoke|v'),
|
50
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
51
|
+
wd('ban|IDF', 'ban|s'),
|
52
|
+
wd('coal|IDF', 'coal|s'),
|
53
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
54
|
+
wd('import|IDF', 'import|s', 'import|v'),
|
55
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
56
|
+
wd('textile|IDF', 'textile|s'),
|
57
|
+
wd('whiskey|IDF', 'whiskey|s'),
|
58
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
59
|
+
wd('whisky|IDF', 'whisky|s'),
|
60
|
+
ai('EOF|'),
|
61
|
+
ai('EOT|')
|
62
|
+
]
|
63
|
+
|
64
|
+
@out1 = [
|
65
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
66
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
67
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
68
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
69
|
+
wd('cold|IDF', 'cold|s', 'cold|a'),
|
70
|
+
wd('hot|IDF', 'hot|a'),
|
71
|
+
wd('hot|IDF', 'hot|a'),
|
72
|
+
wd('water|IDF', 'water|s', 'water|v'),
|
73
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
74
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
75
|
+
wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
|
76
|
+
wd('weather|IDF', 'weather|s', 'weather|v'),
|
77
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
78
|
+
wd('winter|IDF', 'winter|s', 'winter|v'),
|
79
|
+
wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
|
80
|
+
wd('fruit|IDF', 'fruit|s', 'fruit|v'),
|
81
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
82
|
+
wd('food|IDF', 'food|s'),
|
83
|
+
wd('juice|IDF', 'juice|s', 'juice|v'),
|
84
|
+
wd('flower|IDF', 'flower|s', 'flower|v'),
|
85
|
+
wd('fresh|IDF', 'fresh|s', 'fresh|a'),
|
86
|
+
wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
|
87
|
+
wd('tree|IDF', 'tree|s', 'tree|v'),
|
88
|
+
wd('meat|IDF', 'meat|s'),
|
89
|
+
wd('salad|IDF', 'salad|s'),
|
90
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
91
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
92
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
93
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
94
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
95
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
96
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
97
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
98
|
+
wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
|
99
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
100
|
+
wd('colour|IDF', 'colour|s', 'colour|v'),
|
101
|
+
wd('grey|IDF', 'grey|s'),
|
102
|
+
wd('tobacco|IDF', 'tobacco|s'),
|
103
|
+
wd('advertising|IDF', 'advertising|e'),
|
104
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
105
|
+
wd('smoke|IDF', 'smoke|s', 'smoke|v'),
|
106
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
107
|
+
wd('ban|IDF', 'ban|s'),
|
108
|
+
wd('coal|IDF', 'coal|s'),
|
109
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
110
|
+
wd('import|IDF', 'import|s', 'import|v'),
|
111
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
112
|
+
wd('textile|IDF', 'textile|s'),
|
113
|
+
wd('whiskey|IDF', 'whiskey|s'),
|
114
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
115
|
+
wd('whisky|IDF', 'whisky|s'),
|
116
|
+
wd('white yellow|SEQ', 'yellow, white|q'),
|
117
|
+
wd('yellow green|SEQ', 'green, yellow|q'),
|
118
|
+
wd('green white|SEQ', 'white, green|q'),
|
119
|
+
wd('white cold|SEQ', 'cold, white|q'),
|
120
|
+
wd('hot water|SEQ', 'water, hot|q'),
|
121
|
+
wd('warm dry|SEQ', 'dry, warm|q'),
|
122
|
+
wd('dry weather|SEQ', 'weather, dry|q'),
|
123
|
+
wd('cool fruit|SEQ', 'fruit, cool|q'),
|
124
|
+
wd('vegetable food|SEQ', 'food, vegetable|q'),
|
125
|
+
wd('fresh fish|SEQ', 'fish, fresh|q'),
|
126
|
+
wd('fish tree|SEQ', 'tree, fish|q'),
|
127
|
+
wd('vegetable green|SEQ', 'green, vegetable|q'),
|
128
|
+
wd('green red|SEQ', 'red, green|q'),
|
129
|
+
wd('red red|SEQ', 'red, red|q'),
|
130
|
+
wd('red blue|SEQ', 'blue, red|q'),
|
131
|
+
wd('blue blue|SEQ', 'blue, blue|q'),
|
132
|
+
wd('blue yellow|SEQ', 'yellow, blue|q'),
|
133
|
+
wd('yellow white|SEQ', 'white, yellow|q'),
|
134
|
+
wd('white leaves|SEQ', 'leave, white|q'),
|
135
|
+
wd('yellow colour|SEQ', 'colour, yellow|q'),
|
136
|
+
wd('white yellow green|SEQ', 'green, white yellow|q'),
|
137
|
+
wd('yellow green white|SEQ', 'white, yellow green|q'),
|
138
|
+
wd('green white cold|SEQ', 'cold, green white|q'),
|
139
|
+
wd('hot hot water|SEQ', 'water, hot hot|q'),
|
140
|
+
wd('warm warm dry|SEQ', 'dry, warm warm|q'),
|
141
|
+
wd('warm dry weather|SEQ', 'weather, warm dry|q'),
|
142
|
+
wd('fresh fish tree|SEQ', 'tree, fresh fish|q'),
|
143
|
+
wd('vegetable green red|SEQ', 'red, vegetable green|q'),
|
144
|
+
wd('green red red|SEQ', 'red, green red|q'),
|
145
|
+
wd('red red blue|SEQ', 'blue, red red|q'),
|
146
|
+
wd('red blue blue|SEQ', 'blue, red blue|q'),
|
147
|
+
wd('blue blue yellow|SEQ', 'yellow, blue blue|q'),
|
148
|
+
wd('blue yellow white|SEQ', 'white, blue yellow|q'),
|
149
|
+
wd('yellow white leaves|SEQ', 'leave, yellow white|q'),
|
150
|
+
ai('EOF|'),
|
151
|
+
ai('EOT|')
|
152
|
+
]
|
153
|
+
|
154
|
+
@out2 = [
|
155
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
156
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
157
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
158
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
159
|
+
wd('cold|IDF', 'cold|s', 'cold|a'),
|
160
|
+
wd('hot|IDF', 'hot|a'),
|
161
|
+
wd('hot|IDF', 'hot|a'),
|
162
|
+
wd('water|IDF', 'water|s', 'water|v'),
|
163
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
164
|
+
wd('warm|IDF', 'warm|a', 'warm|v'),
|
165
|
+
wd('dry|IDF', 'dry|s', 'dry|a', 'dry|v'),
|
166
|
+
wd('weather|IDF', 'weather|s', 'weather|v'),
|
167
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
168
|
+
wd('winter|IDF', 'winter|s', 'winter|v'),
|
169
|
+
wd('cool|IDF', 'cool|s', 'cool|a', 'cool|v'),
|
170
|
+
wd('fruit|IDF', 'fruit|s', 'fruit|v'),
|
171
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
172
|
+
wd('food|IDF', 'food|s'),
|
173
|
+
wd('juice|IDF', 'juice|s', 'juice|v'),
|
174
|
+
wd('flower|IDF', 'flower|s', 'flower|v'),
|
175
|
+
wd('fresh|IDF', 'fresh|s', 'fresh|a'),
|
176
|
+
wd('fish|IDF', 'fish|s', 'fish|a', 'fish|v'),
|
177
|
+
wd('tree|IDF', 'tree|s', 'tree|v'),
|
178
|
+
wd('meat|IDF', 'meat|s'),
|
179
|
+
wd('salad|IDF', 'salad|s'),
|
180
|
+
wd('vegetable|IDF', 'vegetable|s', 'vegetable|a'),
|
181
|
+
wd('green|IDF', 'green|s', 'green|a', 'green|v'),
|
182
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
183
|
+
wd('red|IDF', 'red|s', 'red|a', 'red|v'),
|
184
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
185
|
+
wd('blue|IDF', 'blue|s', 'blue|a', 'blue|v'),
|
186
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
187
|
+
wd('white|IDF', 'white|s', 'white|a', 'white|v'),
|
188
|
+
wd('leaves|IDF', 'leave|s', 'leaf|s', 'leave|v'),
|
189
|
+
wd('yellow|IDF', 'yellow|s', 'yellow|a', 'yellow|v'),
|
190
|
+
wd('colour|IDF', 'colour|s', 'colour|v'),
|
191
|
+
wd('grey|IDF', 'grey|s'),
|
192
|
+
wd('tobacco|IDF', 'tobacco|s'),
|
193
|
+
wd('advertising|IDF', 'advertising|e'),
|
194
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
195
|
+
wd('smoke|IDF', 'smoke|s', 'smoke|v'),
|
196
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
197
|
+
wd('ban|IDF', 'ban|s'),
|
198
|
+
wd('coal|IDF', 'coal|s'),
|
199
|
+
wd('cigarette|IDF', 'cigarette|s'),
|
200
|
+
wd('import|IDF', 'import|s', 'import|v'),
|
201
|
+
wd('alcohol|IDF', 'alcohol|s'),
|
202
|
+
wd('textile|IDF', 'textile|s'),
|
203
|
+
wd('whiskey|IDF', 'whiskey|s'),
|
204
|
+
wd('drink|IDF', 'drink|s', 'drink|v'),
|
205
|
+
wd('whisky|IDF', 'whisky|s'),
|
206
|
+
wd('hot water|SEQ', 'water, hot|q'),
|
207
|
+
wd('warm dry|SEQ', 'dry, warm|q'),
|
208
|
+
wd('yellow colour|SEQ', 'colour, yellow|q'),
|
209
|
+
wd('white leaves|SEQ', 'leave, white|q'),
|
210
|
+
wd('yellow white|SEQ', 'white, yellow|q'),
|
211
|
+
wd('blue yellow|SEQ', 'yellow, blue|q'),
|
212
|
+
wd('blue blue|SEQ', 'blue, blue|q'),
|
213
|
+
wd('red blue|SEQ', 'blue, red|q'),
|
214
|
+
wd('red red|SEQ', 'red, red|q'),
|
215
|
+
wd('green red|SEQ', 'red, green|q'),
|
216
|
+
wd('vegetable green|SEQ', 'green, vegetable|q'),
|
217
|
+
wd('fish tree|SEQ', 'tree, fish|q'),
|
218
|
+
wd('fresh fish|SEQ', 'fish, fresh|q'),
|
219
|
+
wd('vegetable food|SEQ', 'food, vegetable|q'),
|
220
|
+
wd('cool fruit|SEQ', 'fruit, cool|q'),
|
221
|
+
wd('dry weather|SEQ', 'weather, dry|q'),
|
222
|
+
wd('white cold|SEQ', 'cold, white|q'),
|
223
|
+
wd('green white|SEQ', 'white, green|q'),
|
224
|
+
wd('yellow green|SEQ', 'green, yellow|q'),
|
225
|
+
wd('white yellow|SEQ', 'yellow, white|q'),
|
226
|
+
wd('hot hot water|SEQ', 'water, hot hot|q'),
|
227
|
+
wd('warm warm dry|SEQ', 'dry, warm warm|q'),
|
228
|
+
wd('yellow white leaves|SEQ', 'leave, yellow white|q'),
|
229
|
+
wd('blue yellow white|SEQ', 'white, blue yellow|q'),
|
230
|
+
wd('blue blue yellow|SEQ', 'yellow, blue blue|q'),
|
231
|
+
wd('red blue blue|SEQ', 'blue, red blue|q'),
|
232
|
+
wd('red red blue|SEQ', 'blue, red red|q'),
|
233
|
+
wd('green red red|SEQ', 'red, green red|q'),
|
234
|
+
wd('vegetable green red|SEQ', 'red, vegetable green|q'),
|
235
|
+
wd('fresh fish tree|SEQ', 'tree, fresh fish|q'),
|
236
|
+
wd('warm dry weather|SEQ', 'weather, warm dry|q'),
|
237
|
+
wd('green white cold|SEQ', 'cold, green white|q'),
|
238
|
+
wd('yellow green white|SEQ', 'white, yellow green|q'),
|
239
|
+
wd('white yellow green|SEQ', 'green, white yellow|q'),
|
240
|
+
ai('EOF|'),
|
241
|
+
ai('EOT|')
|
242
|
+
]
|
243
|
+
end
|
244
|
+
|
7
245
|
def test_basic
|
8
246
|
meet({}, [
|
9
247
|
# AS
|
@@ -23,12 +261,12 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
23
261
|
wd('helle|IDF', 'hell|a'),
|
24
262
|
wd('Sonne|IDF', 'sonne|s'),
|
25
263
|
tk('.|PUNC'),
|
26
|
-
wd('
|
264
|
+
wd('helle Sonne|SEQ', 'sonne, hell|q'),
|
27
265
|
# AK
|
28
266
|
wd('Der|IDF', 'der|w'),
|
29
267
|
wd('schöne|IDF', 'schön|a'),
|
30
268
|
wd('Sonnenuntergang|COM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
|
31
|
-
wd('
|
269
|
+
wd('schöne Sonnenuntergang|SEQ', 'sonnenuntergang, schön|q'),
|
32
270
|
ai('EOF|'),
|
33
271
|
ai('EOT|')
|
34
272
|
])
|
@@ -67,14 +305,14 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
67
305
|
wd('Gottes|IDF', 'gott|s'),
|
68
306
|
wd('Turm|IDF', 'turm|s'),
|
69
307
|
tk('.|PUNC'),
|
70
|
-
wd('
|
71
|
-
wd('
|
72
|
-
wd('
|
308
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
309
|
+
wd('Gottes Turm|SEQ', 'gott turm|q'),
|
310
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
|
73
311
|
# SS
|
74
312
|
wd('Der|IDF', 'der|w'),
|
75
313
|
wd('Sonne|IDF', 'sonne|s'),
|
76
314
|
wd('Untergang|IDF', 'untergang|s'),
|
77
|
-
wd('
|
315
|
+
wd('Sonne Untergang|SEQ', 'sonne untergang|q'),
|
78
316
|
ai('EOF|'),
|
79
317
|
ai('EOT|')
|
80
318
|
])
|
@@ -99,7 +337,7 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
99
337
|
wd('Gottes|IDF', 'gott|s'),
|
100
338
|
wd('Turm|IDF', 'turm|s'),
|
101
339
|
tk('.|PUNC'),
|
102
|
-
wd('
|
340
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
|
103
341
|
ai('EOF|'),
|
104
342
|
ai('EOT|')
|
105
343
|
])
|
@@ -125,15 +363,15 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
125
363
|
wd('Gottes|IDF', 'gott|s'),
|
126
364
|
wd('Turm|IDF', 'turm|s'),
|
127
365
|
tk('.|PUNC'),
|
128
|
-
wd('
|
129
|
-
wd('
|
130
|
-
wd('
|
131
|
-
wd('
|
366
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
|
367
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
368
|
+
wd('Gottes Turm|SEQ', 'gott turm|q'),
|
369
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
|
132
370
|
# SS
|
133
371
|
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
134
372
|
wd('Abbild|IDF', 'abbild|s'),
|
135
373
|
wd('Gottes|IDF', 'gott|s'),
|
136
|
-
wd('
|
374
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
137
375
|
ai('EOF|'),
|
138
376
|
ai('EOT|')
|
139
377
|
])
|
@@ -162,14 +400,14 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
162
400
|
wd('Gottes|IDF', 'gott|s'),
|
163
401
|
wd('Turm|IDF', 'turm|s'),
|
164
402
|
tk('.|PUNC'),
|
165
|
-
wd('
|
166
|
-
wd('
|
167
|
-
wd('
|
403
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
|
404
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
405
|
+
wd('Gottes Turm|SEQ', 'gott turm|q'),
|
168
406
|
# SS
|
169
407
|
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
170
408
|
wd('Abbild|IDF', 'abbild|s'),
|
171
409
|
wd('Gottes|IDF', 'gott|s'),
|
172
|
-
wd('
|
410
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
173
411
|
ai('EOF|'),
|
174
412
|
ai('EOT|')
|
175
413
|
])
|
@@ -228,14 +466,14 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
228
466
|
wd('Gottes|IDF', 'gott|s'),
|
229
467
|
wd('Turm|IDF', 'turm|s'),
|
230
468
|
tk('.|PUNC'),
|
231
|
-
wd('
|
232
|
-
wd('
|
233
|
-
wd('
|
469
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
|
470
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
471
|
+
wd('Gottes Turm|SEQ', 'gott turm|q'),
|
234
472
|
# SS
|
235
473
|
wd('Abbild Gottes|MUL', 'abbild gottes|m'),
|
236
474
|
wd('Abbild|IDF', 'abbild|s'),
|
237
475
|
wd('Gottes|IDF', 'gott|s'),
|
238
|
-
wd('
|
476
|
+
wd('Abbild Gottes|SEQ', 'abbild gott|q'),
|
239
477
|
ai('EOF|'),
|
240
478
|
ai('EOT|')
|
241
479
|
])
|
@@ -260,10 +498,10 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
260
498
|
wd('Gottes|IDF', 'gott|s'),
|
261
499
|
wd('Turm|IDF', 'turm|s'),
|
262
500
|
tk('.|PUNC'),
|
263
|
-
wd('
|
264
|
-
wd('
|
265
|
-
#wd('
|
266
|
-
wd('
|
501
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gottes turm|q'),
|
502
|
+
wd('Abbild Gottes Turm|SEQ', 'abbild gott turm|q'),
|
503
|
+
#wd('Abbild Gottes|SEQ', 'abbild gott|q'), # FIXME
|
504
|
+
wd('Gottes Turm|SEQ', 'gott turm|q'),
|
267
505
|
ai('EOF|'),
|
268
506
|
ai('EOT|')
|
269
507
|
])
|
@@ -288,10 +526,10 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
288
526
|
wd('Gottes|IDF', 'gott|s'),
|
289
527
|
wd('Turm|IDF', 'turm|s'),
|
290
528
|
tk('.|PUNC'),
|
291
|
-
wd('
|
292
|
-
wd('
|
293
|
-
#wd('
|
294
|
-
wd('
|
529
|
+
wd('Abbild Gottes Turm|SEQ', 'ms:abbild gottes^turm|q'),
|
530
|
+
wd('Abbild Gottes Turm|SEQ', 'sss:abbild^gott^turm|q'),
|
531
|
+
#wd('Abbild Gottes|SEQ', 'ss:abbild^gott|q'), # FIXME
|
532
|
+
wd('Gottes Turm|SEQ', 'ss:gott^turm|q'),
|
295
533
|
ai('EOF|'),
|
296
534
|
ai('EOT|')
|
297
535
|
])
|
@@ -316,14 +554,14 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
316
554
|
wd('helle|IDF', 'hell|a'),
|
317
555
|
wd('Sonne|IDF', 'sonne|s'),
|
318
556
|
tk('.|PUNC'),
|
319
|
-
wd('
|
320
|
-
wd('
|
557
|
+
wd('Die helle|SEQ', 'die hell (wa)|q'),
|
558
|
+
wd('helle Sonne|SEQ', 'as: sonne, hell|q'),
|
321
559
|
# WA + AK
|
322
560
|
wd('Der|IDF', 'der|w'),
|
323
561
|
wd('schöne|IDF', 'schön|a'),
|
324
562
|
wd('Sonnenuntergang|COM', 'sonnenuntergang|k', 'sonne|s+', 'untergang|s+'),
|
325
|
-
wd('
|
326
|
-
wd('
|
563
|
+
wd('Der schöne|SEQ', 'der schön (wa)|q'),
|
564
|
+
wd('schöne Sonnenuntergang|SEQ', 'ak: sonnenuntergang, schön|q'),
|
327
565
|
ai('EOF|'),
|
328
566
|
ai('EOT|')
|
329
567
|
])
|
@@ -353,4 +591,20 @@ class TestAttendeeSequencer < AttendeeTestCase
|
|
353
591
|
])
|
354
592
|
end
|
355
593
|
|
594
|
+
def test_many_permutations
|
595
|
+
meet({}, @perm, @out1)
|
596
|
+
end
|
597
|
+
|
598
|
+
def test_many_permutations_simple_regex1
|
599
|
+
meet({ 'sequences' => [['A[SK]', '2, 1'], ['AA[SK]', '3, 1 2']] }, @perm, @out1)
|
600
|
+
end
|
601
|
+
|
602
|
+
def test_many_permutations_simple_regex2
|
603
|
+
meet({ 'sequences' => [['A(S|K)', '2, 1'], ['AA(?:S|K)', '3, 1 2']] }, @perm, @out1)
|
604
|
+
end
|
605
|
+
|
606
|
+
def test_many_permutations_complex_regex
|
607
|
+
meet({ 'sequences' => [['A{1}(S|K)', '2, 1'], ['A{2}(S|K)', '3, 1 2']] }, @perm, @out2)
|
608
|
+
end unless ENV['LINGO_DISABLE_SLOW_TESTS'] # ~60s
|
609
|
+
|
356
610
|
end
|