scout-gear 7.2.0 → 8.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vimproject +51 -6
- data/VERSION +1 -1
- data/bin/scout +6 -3
- data/lib/rbbt-scout.rb +1 -0
- data/lib/scout/cmd.rb +1 -1
- data/lib/scout/concurrent_stream.rb +33 -29
- data/lib/scout/config.rb +1 -1
- data/lib/scout/exceptions.rb +1 -0
- data/lib/scout/log/color.rb +4 -2
- data/lib/scout/log/progress/report.rb +1 -1
- data/lib/scout/log/progress/util.rb +71 -2
- data/lib/scout/log/progress.rb +1 -1
- data/lib/scout/log/trap.rb +107 -0
- data/lib/scout/log.rb +56 -21
- data/lib/scout/meta_extension.rb +13 -6
- data/lib/scout/misc/digest.rb +1 -1
- data/lib/scout/misc/format.rb +12 -0
- data/lib/scout/misc/helper.rb +31 -0
- data/lib/scout/misc/insist.rb +1 -1
- data/lib/scout/misc/monitor.rb +12 -1
- data/lib/scout/misc/system.rb +10 -0
- data/lib/scout/misc.rb +1 -0
- data/lib/scout/named_array.rb +65 -3
- data/lib/scout/open/lock/lockfile.rb +587 -0
- data/lib/scout/open/lock.rb +28 -2
- data/lib/scout/open/remote.rb +4 -0
- data/lib/scout/open/stream.rb +111 -42
- data/lib/scout/open/util.rb +13 -3
- data/lib/scout/path/find.rb +9 -1
- data/lib/scout/path/util.rb +35 -0
- data/lib/scout/persist/serialize.rb +18 -5
- data/lib/scout/persist.rb +60 -30
- data/lib/scout/resource/path.rb +53 -0
- data/lib/scout/resource/produce.rb +0 -8
- data/lib/scout/resource/util.rb +2 -1
- data/lib/scout/semaphore.rb +8 -1
- data/lib/scout/tmpfile.rb +7 -8
- data/lib/scout/tsv/attach.rb +177 -0
- data/lib/scout/tsv/change_id.rb +40 -0
- data/lib/scout/tsv/dumper.rb +85 -54
- data/lib/scout/tsv/index.rb +188 -20
- data/lib/scout/tsv/open.rb +182 -0
- data/lib/scout/tsv/parser.rb +200 -118
- data/lib/scout/tsv/path.rb +5 -6
- data/lib/scout/tsv/persist/adapter.rb +26 -37
- data/lib/scout/tsv/persist/fix_width_table.rb +327 -0
- data/lib/scout/tsv/persist/serialize.rb +117 -0
- data/lib/scout/tsv/persist/tokyocabinet.rb +6 -3
- data/lib/scout/tsv/persist.rb +4 -2
- data/lib/scout/tsv/transformer.rb +141 -0
- data/lib/scout/tsv/traverse.rb +136 -37
- data/lib/scout/tsv/util/filter.rb +312 -0
- data/lib/scout/tsv/util/process.rb +73 -0
- data/lib/scout/tsv/util/reorder.rb +81 -0
- data/lib/scout/tsv/util/select.rb +265 -0
- data/lib/scout/tsv/util/unzip.rb +86 -0
- data/lib/scout/tsv/util.rb +126 -19
- data/lib/scout/tsv.rb +28 -5
- data/lib/scout/work_queue/socket.rb +6 -1
- data/lib/scout/work_queue/worker.rb +5 -2
- data/lib/scout/work_queue.rb +15 -8
- data/lib/scout/workflow/definition.rb +29 -2
- data/lib/scout/workflow/step/dependencies.rb +24 -4
- data/lib/scout/workflow/step/info.rb +40 -5
- data/lib/scout/workflow/step/progress.rb +14 -0
- data/lib/scout/workflow/step/provenance.rb +8 -7
- data/lib/scout/workflow/step/status.rb +45 -0
- data/lib/scout/workflow/step.rb +104 -33
- data/lib/scout/workflow/task/inputs.rb +14 -20
- data/lib/scout/workflow/task.rb +86 -47
- data/lib/scout/workflow/usage.rb +10 -6
- data/scout-gear.gemspec +30 -3
- data/scout_commands/workflow/task +37 -9
- data/scout_commands/workflow/task_old +2 -2
- data/test/scout/open/test_stream.rb +61 -59
- data/test/scout/path/test_find.rb +10 -1
- data/test/scout/resource/test_produce.rb +15 -0
- data/test/scout/test_meta_extension.rb +25 -0
- data/test/scout/test_named_array.rb +18 -0
- data/test/scout/test_persist.rb +67 -0
- data/test/scout/test_tmpfile.rb +1 -1
- data/test/scout/test_tsv.rb +222 -3
- data/test/scout/test_work_queue.rb +21 -18
- data/test/scout/tsv/persist/test_adapter.rb +11 -1
- data/test/scout/tsv/persist/test_fix_width_table.rb +134 -0
- data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
- data/test/scout/tsv/test_attach.rb +227 -0
- data/test/scout/tsv/test_change_id.rb +98 -0
- data/test/scout/tsv/test_dumper.rb +1 -1
- data/test/scout/tsv/test_index.rb +127 -3
- data/test/scout/tsv/test_open.rb +167 -0
- data/test/scout/tsv/test_parser.rb +45 -3
- data/test/scout/tsv/test_persist.rb +9 -0
- data/test/scout/tsv/test_transformer.rb +108 -0
- data/test/scout/tsv/test_traverse.rb +195 -3
- data/test/scout/tsv/test_util.rb +24 -0
- data/test/scout/tsv/util/test_filter.rb +188 -0
- data/test/scout/tsv/util/test_process.rb +47 -0
- data/test/scout/tsv/util/test_reorder.rb +94 -0
- data/test/scout/tsv/util/test_select.rb +58 -0
- data/test/scout/tsv/util/test_unzip.rb +112 -0
- data/test/scout/work_queue/test_socket.rb +0 -1
- data/test/scout/work_queue/test_worker.rb +63 -6
- data/test/scout/workflow/step/test_load.rb +3 -3
- data/test/scout/workflow/step/test_status.rb +31 -0
- data/test/scout/workflow/task/test_inputs.rb +14 -14
- data/test/scout/workflow/test_step.rb +13 -13
- data/test/scout/workflow/test_task.rb +168 -32
- data/test/scout/workflow/test_usage.rb +33 -6
- data/test/test_helper.rb +3 -1
- metadata +29 -2
@@ -0,0 +1,227 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
require 'scout/tsv'
|
5
|
+
|
6
|
+
class TestTSVAttach < Test::Unit::TestCase
|
7
|
+
def test_attach_simple
|
8
|
+
content1 =<<-EOF
|
9
|
+
#: :sep=" "
|
10
|
+
#ID ValueA ValueB
|
11
|
+
row1 a|aa|aaa b
|
12
|
+
row2 A B
|
13
|
+
EOF
|
14
|
+
|
15
|
+
content2 =<<-EOF
|
16
|
+
#: :sep=" "
|
17
|
+
#ID ValueB OtherID
|
18
|
+
row1 b Id1|Id2
|
19
|
+
row3 B Id3
|
20
|
+
EOF
|
21
|
+
|
22
|
+
TmpFile.with_file(content1) do |filename1|
|
23
|
+
TmpFile.with_file(content2) do |filename2|
|
24
|
+
tsv = TSV.open(filename1)
|
25
|
+
other = TSV.open(filename2)
|
26
|
+
tsv.attach other, :complete => true
|
27
|
+
assert_equal %w(Id1 Id2), tsv["row1"]["OtherID"]
|
28
|
+
assert_equal %w(Id3), tsv["row3"]["OtherID"]
|
29
|
+
assert_equal %w(B), tsv["row3"]["ValueB"]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_attach_by_key
|
35
|
+
content1 =<<-EOF
|
36
|
+
#: :sep=" "
|
37
|
+
#ID ValueA ValueB
|
38
|
+
row1 A1|A11 B1|B11
|
39
|
+
row2 A2|A22 B2|B22
|
40
|
+
EOF
|
41
|
+
|
42
|
+
content2 =<<-EOF
|
43
|
+
#: :sep=" "
|
44
|
+
#ID ValueB OtherID
|
45
|
+
row1 B1|B11 Id1|Id11
|
46
|
+
row2.2 B2|B22|B222 Id2.2|Id22.2|Id222.2
|
47
|
+
row3 B3 Id3
|
48
|
+
EOF
|
49
|
+
|
50
|
+
TmpFile.with_file(content1) do |filename1|
|
51
|
+
TmpFile.with_file(content2) do |filename2|
|
52
|
+
tsv = TSV.open(filename1)
|
53
|
+
other = TSV.open(filename2)
|
54
|
+
tsv.attach other, complete: true, match_key: "ValueB"
|
55
|
+
assert_equal %w(A1 A11), tsv["row1"]["ValueA"]
|
56
|
+
assert_equal %w(B1 B11), tsv["row1"]["ValueB"]
|
57
|
+
assert_equal %w(Id1 Id11), tsv["row1"]["OtherID"]
|
58
|
+
assert_equal %w(Id2.2 Id22.2), tsv["row2"]["OtherID"]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_attach_by_reorder
|
64
|
+
content1 =<<-EOF
|
65
|
+
#: :sep=" "
|
66
|
+
#ID ValueA ValueB
|
67
|
+
row1 A1|A11 B1|B11
|
68
|
+
row2 A2|A22 B2|B22
|
69
|
+
EOF
|
70
|
+
|
71
|
+
content2 =<<-EOF
|
72
|
+
#: :sep=" "
|
73
|
+
#ValueB ID OtherID
|
74
|
+
B1 row1|row1.1 Id1|Id11
|
75
|
+
B2 row2 Id2.2|Id22.2|Id222.2
|
76
|
+
B3 row3 Id3
|
77
|
+
EOF
|
78
|
+
|
79
|
+
TmpFile.with_file(content1) do |filename1|
|
80
|
+
TmpFile.with_file(content2) do |filename2|
|
81
|
+
tsv = TSV.open(filename1)
|
82
|
+
other = TSV.open(filename2)
|
83
|
+
tsv.attach other, match_key: "ID", one2one: false
|
84
|
+
assert_equal %w(A1 A11), tsv["row1"]["ValueA"]
|
85
|
+
assert_equal %w(B1 B11), tsv["row1"]["ValueB"]
|
86
|
+
assert_equal %w(Id1 Id11), tsv["row1"]["OtherID"]
|
87
|
+
assert_equal %w(Id2.2 Id22.2 Id222.2), tsv["row2"]["OtherID"]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
def test_attach_same_key
|
94
|
+
content1 =<<-EOF
|
95
|
+
#ID ValueA ValueB
|
96
|
+
row1 a|aa|aaa b
|
97
|
+
row2 A B
|
98
|
+
EOF
|
99
|
+
|
100
|
+
content2 =<<-EOF
|
101
|
+
#ID ValueB OtherID
|
102
|
+
row1 b Id1|Id2
|
103
|
+
row3 B Id3
|
104
|
+
EOF
|
105
|
+
|
106
|
+
tsv1 = tsv2 = nil
|
107
|
+
TmpFile.with_file(content1) do |filename|
|
108
|
+
tsv1 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
109
|
+
end
|
110
|
+
|
111
|
+
TmpFile.with_file(content2) do |filename|
|
112
|
+
tsv2 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
113
|
+
end
|
114
|
+
|
115
|
+
tsv1.attach tsv2, fields: "OtherID"
|
116
|
+
|
117
|
+
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
118
|
+
assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
|
119
|
+
|
120
|
+
TmpFile.with_file(content1) do |filename|
|
121
|
+
tsv1 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
122
|
+
end
|
123
|
+
|
124
|
+
tsv1.attach tsv2
|
125
|
+
|
126
|
+
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
127
|
+
|
128
|
+
tsv1 = tsv2 = nil
|
129
|
+
TmpFile.with_file(content1) do |filename|
|
130
|
+
tsv1 = TSV.open(File.open(filename), type: :list, :sep => /\s+/)
|
131
|
+
end
|
132
|
+
|
133
|
+
TmpFile.with_file(content2) do |filename|
|
134
|
+
tsv2 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
135
|
+
end
|
136
|
+
|
137
|
+
tsv1.attach tsv2, fields: "OtherID"
|
138
|
+
|
139
|
+
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
140
|
+
assert_equal "Id1", tsv1["row1"]["OtherID"]
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_attach_source_field
|
144
|
+
content1 =<<-EOF
|
145
|
+
#Id ValueA ValueB
|
146
|
+
row1 a|aa|aaa b
|
147
|
+
row2 A B
|
148
|
+
EOF
|
149
|
+
|
150
|
+
content2 =<<-EOF
|
151
|
+
#ValueB OtherID
|
152
|
+
b Id1|Id2
|
153
|
+
B Id3
|
154
|
+
EOF
|
155
|
+
|
156
|
+
tsv1 = tsv2 = nil
|
157
|
+
TmpFile.with_file(content1) do |filename|
|
158
|
+
tsv1 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
159
|
+
end
|
160
|
+
|
161
|
+
TmpFile.with_file(content2) do |filename|
|
162
|
+
tsv2 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
163
|
+
end
|
164
|
+
|
165
|
+
tsv1.attach tsv2, bar: true
|
166
|
+
|
167
|
+
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
168
|
+
assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
|
169
|
+
|
170
|
+
TmpFile.with_file(content1) do |filename|
|
171
|
+
tsv1 = TSV.open(File.open(filename), type: :list, :sep => /\s+/)
|
172
|
+
end
|
173
|
+
|
174
|
+
tsv1.attach tsv2
|
175
|
+
|
176
|
+
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
177
|
+
assert_equal "Id1", tsv1["row1"]["OtherID"]
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_attach_transformer
|
181
|
+
content1 =<<-EOF
|
182
|
+
#: :sep=" "
|
183
|
+
#ID ValueA ValueB
|
184
|
+
row1 a|aa|aaa b
|
185
|
+
row2 A B
|
186
|
+
EOF
|
187
|
+
|
188
|
+
content2 =<<-EOF
|
189
|
+
#: :sep=" "
|
190
|
+
#ID ValueB OtherID
|
191
|
+
row1 b Id1|Id2
|
192
|
+
row3 B Id3
|
193
|
+
EOF
|
194
|
+
|
195
|
+
TmpFile.with_file(content1) do |filename1|
|
196
|
+
TmpFile.with_file(content2) do |filename2|
|
197
|
+
out = TSV.attach filename1, filename2, target: :stream, bar: false
|
198
|
+
tsv = out.tsv
|
199
|
+
assert_equal %w(Id1 Id2), tsv["row1"]["OtherID"]
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def test_attach_flexible_names
|
205
|
+
content1 =<<-EOF
|
206
|
+
#: :sep=" "
|
207
|
+
#ID ValueA ValueB
|
208
|
+
row1 a|aa|aaa b
|
209
|
+
row2 A B
|
210
|
+
EOF
|
211
|
+
|
212
|
+
content2 =<<-EOF
|
213
|
+
#: :sep=" "
|
214
|
+
#Identifiers(ID) OtherID
|
215
|
+
row1 Id1|Id2
|
216
|
+
row3 Id3
|
217
|
+
EOF
|
218
|
+
|
219
|
+
TmpFile.with_file(content1) do |filename1|
|
220
|
+
TmpFile.with_file(content2) do |filename2|
|
221
|
+
out = TSV.attach filename1, filename2, target: :stream, bar: false
|
222
|
+
tsv = out.tsv
|
223
|
+
assert_equal %w(Id1 Id2), tsv["row1"]["OtherID"]
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
require 'scout/tsv'
|
5
|
+
class TestChangeID < Test::Unit::TestCase
|
6
|
+
def test_simple_reorder
|
7
|
+
content1 =<<-EOF
|
8
|
+
#: :sep=" "
|
9
|
+
#ID ValueA ValueB
|
10
|
+
row1 A1|A11 B1|B11
|
11
|
+
row2 A2|A22 B2|B22
|
12
|
+
EOF
|
13
|
+
|
14
|
+
tsv = TSV.open StringIO.new(content1)
|
15
|
+
|
16
|
+
res = tsv.change_key "ValueA", keep: true
|
17
|
+
assert_equal ["row1"], res["A1"]["ID"]
|
18
|
+
assert_equal ["row1"], res["A11"]["ID"]
|
19
|
+
assert_equal ["row2"], res["A2"]["ID"]
|
20
|
+
|
21
|
+
res = tsv.change_key "ValueA", keep: false, one2one: true
|
22
|
+
assert_equal ["B1"], res["A1"]["ValueB"]
|
23
|
+
assert_equal ["B11"], res["A11"]["ValueB"]
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_simple_reorder_file
|
27
|
+
content1 =<<-EOF
|
28
|
+
#: :sep=" "
|
29
|
+
#ID ValueA ValueB
|
30
|
+
row1 A1|A11 B1|B11
|
31
|
+
row2 A2|A22 B2|B22
|
32
|
+
EOF
|
33
|
+
|
34
|
+
TmpFile.with_file(content1) do |file1|
|
35
|
+
res = TSV.change_key file1, "ValueA", keep: true
|
36
|
+
assert_equal ["row1"], res["A1"]["ID"]
|
37
|
+
assert_equal ["row1"], res["A11"]["ID"]
|
38
|
+
assert_equal ["row2"], res["A2"]["ID"]
|
39
|
+
assert_equal ["B1","B11"], res["A1"]["ValueB"]
|
40
|
+
|
41
|
+
res = TSV.change_key file1, "ValueA", one2one: true, keep: true
|
42
|
+
assert_equal ["row1"], res["A1"]["ID"]
|
43
|
+
assert_equal ["row1"], res["A11"]["ID"]
|
44
|
+
assert_equal ["row2"], res["A2"]["ID"]
|
45
|
+
assert_equal ["B1"], res["A1"]["ValueB"]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_change_key_identifiers
|
50
|
+
content1 =<<-EOF
|
51
|
+
#: :sep=" "
|
52
|
+
#ID ValueA ValueB
|
53
|
+
row1 A1|A11 B1|B11
|
54
|
+
row2 A2|A22 B2|B22
|
55
|
+
EOF
|
56
|
+
|
57
|
+
identifiers_content =<<-EOF
|
58
|
+
#: :sep=" "
|
59
|
+
#ID ValueC ValueD
|
60
|
+
row1 C1|C11 D1|D11
|
61
|
+
row2 C2|C22 D2|D22
|
62
|
+
EOF
|
63
|
+
|
64
|
+
|
65
|
+
tsv = TSV.open StringIO.new(content1)
|
66
|
+
identifiers = TSV.open StringIO.new(identifiers_content)
|
67
|
+
|
68
|
+
res = tsv.change_key "ValueC", identifiers: identifiers, keep: true
|
69
|
+
assert_equal ["row1"], res["C1"]["ID"]
|
70
|
+
assert_equal ["row1"], res["C11"]["ID"]
|
71
|
+
assert_equal ["row2"], res["C2"]["ID"]
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_change_id_identifiers
|
75
|
+
content1 =<<-EOF
|
76
|
+
#: :sep=" "
|
77
|
+
#ID ValueA ValueB
|
78
|
+
row1 A1|A11 B1|B11
|
79
|
+
row2 A2|A22 B2|B22
|
80
|
+
EOF
|
81
|
+
|
82
|
+
identifiers_content =<<-EOF
|
83
|
+
#: :sep=" "
|
84
|
+
#ID ValueC ValueD
|
85
|
+
row1 C1|C11 D1|D11
|
86
|
+
row2 C2|C22 D2|D22
|
87
|
+
EOF
|
88
|
+
|
89
|
+
|
90
|
+
tsv = TSV.open StringIO.new(content1)
|
91
|
+
identifiers = TSV.open StringIO.new(identifiers_content)
|
92
|
+
|
93
|
+
res = tsv.change_id "ValueA", "ValueC", identifiers: identifiers
|
94
|
+
assert_equal ["C1","C11"], res["row1"]["ValueC"]
|
95
|
+
assert_equal ["C2","C22"], res["row2"]["ValueC"]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
@@ -1,8 +1,25 @@
|
|
1
1
|
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
2
|
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
3
|
|
4
|
+
require 'scout/tsv'
|
4
5
|
class TestTSVIndex < Test::Unit::TestCase
|
5
|
-
def
|
6
|
+
def load_segment_data(data)
|
7
|
+
tsv = TSV.open(data, type: :list, :sep=>":", :cast => proc{|e| e =~ /(\s*)(_*)/; ($1.length..($1.length + $2.length - 1))})
|
8
|
+
|
9
|
+
tsv = tsv.add_field "Start" do |key, values|
|
10
|
+
values["Range"].first
|
11
|
+
end
|
12
|
+
|
13
|
+
tsv = tsv.add_field "End" do |key, values|
|
14
|
+
values["Range"].last
|
15
|
+
end
|
16
|
+
|
17
|
+
tsv = tsv.slice ["Start", "End"]
|
18
|
+
|
19
|
+
tsv
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_index
|
6
23
|
content =<<-'EOF'
|
7
24
|
#: :sep=/\s+/#:type=:double#:merge=:concat
|
8
25
|
#Id ValueA ValueB OtherID
|
@@ -20,12 +37,45 @@ row2 a b id3
|
|
20
37
|
end
|
21
38
|
|
22
39
|
TmpFile.with_file(content) do |filename|
|
23
|
-
index = TSV.index(filename, :target => "ValueB", :fields => "OtherID")
|
40
|
+
index = TSV.index(filename, :target => "ValueB", :fields => ["OtherID"])
|
24
41
|
assert_equal 'B', index["a"]
|
25
42
|
assert_nil index["B"]
|
26
43
|
end
|
27
44
|
end
|
28
45
|
|
46
|
+
def test_from_tsv
|
47
|
+
content =<<-'EOF'
|
48
|
+
#: :sep=/\s+/#:type=:double#:merge=:concat
|
49
|
+
#Id ValueA ValueB OtherID
|
50
|
+
row1 a|aa|aaa b Id1|Id2
|
51
|
+
row2 A|b B Id3|a
|
52
|
+
row2 a b id3
|
53
|
+
EOF
|
54
|
+
|
55
|
+
TmpFile.with_file(content) do |filename|
|
56
|
+
tsv = TSV.open(filename)
|
57
|
+
index = TSV.index(tsv, :target => "ValueB")
|
58
|
+
assert_equal 'b', index["a"]
|
59
|
+
assert_equal 'B', index["B"]
|
60
|
+
assert_equal 'b', index["b"]
|
61
|
+
|
62
|
+
index = tsv.index(:target => "ValueB")
|
63
|
+
assert_equal 'b', index["a"]
|
64
|
+
assert_equal 'B', index["B"]
|
65
|
+
assert_equal 'b', index["b"]
|
66
|
+
|
67
|
+
|
68
|
+
index = TSV.index(tsv, :target => "ValueB", :fields => "OtherID")
|
69
|
+
assert_equal 'B', index["a"]
|
70
|
+
assert_nil index["B"]
|
71
|
+
|
72
|
+
index = tsv.index(:target => "ValueB", :fields => "OtherID")
|
73
|
+
assert_equal 'B', index["a"]
|
74
|
+
assert_nil index["B"]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
|
29
79
|
def test_persist
|
30
80
|
content =<<-'EOF'
|
31
81
|
#: :sep=/\s+/#:type=:double#:merge=:concat
|
@@ -35,7 +85,7 @@ row2 A B Id3|a
|
|
35
85
|
row2 a b id3
|
36
86
|
EOF
|
37
87
|
tsv = TmpFile.with_file(content) do |filename|
|
38
|
-
index = TSV.index(filename, :target => "ValueB", :persist => true)
|
88
|
+
index = TSV.index(filename, :target => "ValueB", :persist => true, bar: true)
|
39
89
|
assert_equal 'b', index["row1"]
|
40
90
|
assert_equal 'b', index["a"]
|
41
91
|
assert_equal 'b', index["aaa"]
|
@@ -43,6 +93,80 @@ row2 a b id3
|
|
43
93
|
end
|
44
94
|
end
|
45
95
|
|
96
|
+
def test_range_index
|
97
|
+
data =<<-EOF
|
98
|
+
# 012345678901234567890
|
99
|
+
#ID:Range
|
100
|
+
a: ______
|
101
|
+
b: ______
|
102
|
+
c: _______
|
103
|
+
d: ____
|
104
|
+
e: ______
|
105
|
+
f: ___
|
106
|
+
g: ____
|
107
|
+
EOF
|
108
|
+
TmpFile.with_file(data) do |datafile|
|
109
|
+
tsv = load_segment_data(datafile)
|
110
|
+
f = tsv.range_index("Start", "End", :persist => true)
|
111
|
+
|
112
|
+
assert_equal %w(), f[0].sort
|
113
|
+
assert_equal %w(b), f[1].sort
|
114
|
+
assert_equal %w(), f[20].sort
|
115
|
+
assert_equal %w(), f[(20..100)].sort
|
116
|
+
assert_equal %w(a b d), f[3].sort
|
117
|
+
assert_equal %w(a b c d e), f[(3..4)].sort
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def test_pos_index
|
122
|
+
data =<<-EOF
|
123
|
+
# 012345678901234567890
|
124
|
+
#ID:Range
|
125
|
+
a: ______
|
126
|
+
b: ______
|
127
|
+
c: _______
|
128
|
+
d: ____
|
129
|
+
e: ______
|
130
|
+
f: ___
|
131
|
+
g: ____
|
132
|
+
EOF
|
133
|
+
TmpFile.with_file(data) do |datafile|
|
134
|
+
tsv = load_segment_data(datafile)
|
135
|
+
f = tsv.pos_index("Start", :persist => true)
|
136
|
+
|
137
|
+
assert_equal %w(), f[0].sort
|
138
|
+
assert_equal %w(a c d e), f[(2..4)].sort
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_range_index_persistent
|
143
|
+
data =<<-EOF
|
144
|
+
# 012345678901234567890
|
145
|
+
#ID:Range
|
146
|
+
a: ______
|
147
|
+
b: ______
|
148
|
+
c: _______
|
149
|
+
d: ____
|
150
|
+
e: ______
|
151
|
+
f: ___
|
152
|
+
g: ____
|
153
|
+
EOF
|
154
|
+
TmpFile.with_file(data) do |datafile|
|
155
|
+
load_segment_data(datafile)
|
156
|
+
TmpFile.with_file(load_segment_data(datafile).to_s) do |tsvfile|
|
157
|
+
f = TSV.range_index(tsvfile, "Start", "End", :persist => true)
|
158
|
+
|
159
|
+
assert_equal %w(), f[0].sort
|
160
|
+
assert_equal %w(b), f[1].sort
|
161
|
+
assert_equal %w(), f[20].sort
|
162
|
+
assert_equal %w(), f[(20..100)].sort
|
163
|
+
assert_equal %w(a b d), f[3].sort
|
164
|
+
assert_equal %w(a b c d e), f[(3..4)].sort
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
|
46
170
|
def __test_speed
|
47
171
|
content =<<-'EOF'
|
48
172
|
#: :sep=/\s+/#:type=:double#:merge=:concat
|
@@ -0,0 +1,167 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
require 'scout/tsv'
|
5
|
+
class TestOpenTraverse < Test::Unit::TestCase
|
6
|
+
def test_array
|
7
|
+
num_lines = 100
|
8
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
9
|
+
|
10
|
+
r = TSV.traverse lines, :into => [] do |l|
|
11
|
+
l + "-" + Process.pid.to_s
|
12
|
+
end
|
13
|
+
|
14
|
+
assert_equal num_lines, r.length
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_array_cpus
|
18
|
+
num_lines = 1000
|
19
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
20
|
+
|
21
|
+
r = TSV.traverse lines, :into => [], :cpus => 2 do |l|
|
22
|
+
l + "-" + Process.pid.to_s
|
23
|
+
end
|
24
|
+
|
25
|
+
assert_equal num_lines, r.length
|
26
|
+
assert_equal 2, r.collect{|l| l.split("-").last}.uniq.length
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_tsv_cpus
|
30
|
+
num_lines = 10000
|
31
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
32
|
+
|
33
|
+
tsv = TSV.setup({}, key_field: "Line", :fields => %w(Prefix Number), :type => :list)
|
34
|
+
lines.each do |line|
|
35
|
+
tsv[line] = ["LINE", line.split("-").last]
|
36
|
+
end
|
37
|
+
|
38
|
+
r = TSV.traverse tsv, :into => [], :cpus => 2, :bar => {desc: "Process", severity: 0} do |l,v|
|
39
|
+
pre, num = v
|
40
|
+
pre + "-" + num.to_s + "-" + Process.pid.to_s
|
41
|
+
end
|
42
|
+
|
43
|
+
assert_equal num_lines, r.length
|
44
|
+
assert_equal 2, r.collect{|l| l.split("-").last}.uniq.length
|
45
|
+
assert_equal "LINE", r.collect{|l| l.split("-").first}.first
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_into_stream
|
49
|
+
num_lines = 100
|
50
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
51
|
+
|
52
|
+
r = TSV.traverse lines, :into => :stream do |l|
|
53
|
+
l + "-" + Process.pid.to_s
|
54
|
+
end
|
55
|
+
|
56
|
+
assert_equal num_lines, r.read.split("\n").length
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_into_stream_error
|
60
|
+
num_lines = 100
|
61
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
62
|
+
|
63
|
+
assert_raise ScoutException do
|
64
|
+
Log.with_severity 7 do
|
65
|
+
i = 0
|
66
|
+
r = TSV.traverse lines, :into => :stream, cpus: 3 do |l|
|
67
|
+
raise ScoutException if i > 10
|
68
|
+
i += 1
|
69
|
+
l + "-" + Process.pid.to_s
|
70
|
+
end
|
71
|
+
|
72
|
+
r.read
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_into_dumper_error
|
78
|
+
num_lines = 100
|
79
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
80
|
+
|
81
|
+
assert_raise ScoutException do
|
82
|
+
i = 0
|
83
|
+
Log.with_severity 7 do
|
84
|
+
dumper = TSV::Dumper.new :key_field => "Key", :fields => ["Value"], :type => :single
|
85
|
+
dumper.init
|
86
|
+
dumper = TSV.traverse lines, :into => dumper, :cpus => 3 do |l|
|
87
|
+
raise ScoutException if i > 10
|
88
|
+
i += 1
|
89
|
+
value = l + "-" + Process.pid.to_s
|
90
|
+
|
91
|
+
[i.to_s, value]
|
92
|
+
end
|
93
|
+
ppp dumper.stream.read
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_traverse_line
|
99
|
+
text=<<-EOF
|
100
|
+
#: :sep=" "
|
101
|
+
#Row LabelA LabelB LabelC
|
102
|
+
row1 A B C
|
103
|
+
row1 a b c
|
104
|
+
row2 AA BB CC
|
105
|
+
row2 aa bb cc
|
106
|
+
EOF
|
107
|
+
|
108
|
+
TmpFile.with_file(text) do |file|
|
109
|
+
lines = Open.traverse file, :type => :line, :into => [] do |line|
|
110
|
+
line
|
111
|
+
end
|
112
|
+
assert_include lines, "row2 AA BB CC"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_collapse_stream
|
117
|
+
text=<<-EOF
|
118
|
+
#: :sep=" "
|
119
|
+
#Row LabelA LabelB LabelC
|
120
|
+
row1 A B C
|
121
|
+
row1 a b c
|
122
|
+
row2 AA BB CC
|
123
|
+
row2 aa bb cc
|
124
|
+
EOF
|
125
|
+
|
126
|
+
s = StringIO.new text
|
127
|
+
collapsed = TSV.collapse_stream(s)
|
128
|
+
tsv = TSV.open collapsed
|
129
|
+
assert_equal ["A", "a"], tsv["row1"][0]
|
130
|
+
assert_equal ["BB", "bb"], tsv["row2"][1]
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_cpus_error_dumper
|
134
|
+
num_lines = 100
|
135
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
136
|
+
|
137
|
+
dumper = TSV::Dumper.new :key_field => "Key", :fields => ["Field"], type: :single
|
138
|
+
dumper.init
|
139
|
+
assert_raise ScoutException do
|
140
|
+
Log.with_severity 0 do
|
141
|
+
i = 0
|
142
|
+
TSV.traverse lines, :into => dumper, cpus: 3 do |l|
|
143
|
+
raise ScoutException if i > 10
|
144
|
+
i += 1
|
145
|
+
[Process.pid.to_s, l + "-" + Process.pid.to_s]
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
ppp dumper.stream.read
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_step_travese_cpus
|
154
|
+
|
155
|
+
size = 1000
|
156
|
+
step = Step.new tmpdir.step[__method__] do
|
157
|
+
lines = size.times.collect{|i| "line-#{i}" }
|
158
|
+
Open.traverse lines, :type => :array, :into => :stream, :cpus => 3 do |line|
|
159
|
+
line.reverse
|
160
|
+
end
|
161
|
+
end
|
162
|
+
step.type = :array
|
163
|
+
|
164
|
+
assert_equal size, step.run.length
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|