scout-gear 7.2.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +51 -6
- data/VERSION +1 -1
- data/bin/scout +6 -3
- data/lib/rbbt-scout.rb +1 -0
- data/lib/scout/cmd.rb +1 -1
- data/lib/scout/concurrent_stream.rb +33 -29
- data/lib/scout/config.rb +1 -1
- data/lib/scout/exceptions.rb +1 -0
- data/lib/scout/log/color.rb +4 -2
- data/lib/scout/log/progress/report.rb +1 -1
- data/lib/scout/log/progress/util.rb +71 -2
- data/lib/scout/log/progress.rb +1 -1
- data/lib/scout/log/trap.rb +107 -0
- data/lib/scout/log.rb +56 -21
- data/lib/scout/meta_extension.rb +13 -6
- data/lib/scout/misc/digest.rb +1 -1
- data/lib/scout/misc/format.rb +12 -0
- data/lib/scout/misc/helper.rb +31 -0
- data/lib/scout/misc/insist.rb +1 -1
- data/lib/scout/misc/monitor.rb +12 -1
- data/lib/scout/misc/system.rb +10 -0
- data/lib/scout/misc.rb +1 -0
- data/lib/scout/named_array.rb +65 -3
- data/lib/scout/open/lock/lockfile.rb +587 -0
- data/lib/scout/open/lock.rb +28 -2
- data/lib/scout/open/remote.rb +4 -0
- data/lib/scout/open/stream.rb +111 -42
- data/lib/scout/open/util.rb +13 -3
- data/lib/scout/path/find.rb +9 -1
- data/lib/scout/path/util.rb +35 -0
- data/lib/scout/persist/serialize.rb +18 -5
- data/lib/scout/persist.rb +60 -30
- data/lib/scout/resource/path.rb +53 -0
- data/lib/scout/resource/produce.rb +0 -8
- data/lib/scout/resource/util.rb +2 -1
- data/lib/scout/semaphore.rb +8 -1
- data/lib/scout/tmpfile.rb +7 -8
- data/lib/scout/tsv/attach.rb +177 -0
- data/lib/scout/tsv/change_id.rb +40 -0
- data/lib/scout/tsv/dumper.rb +85 -54
- data/lib/scout/tsv/index.rb +188 -20
- data/lib/scout/tsv/open.rb +182 -0
- data/lib/scout/tsv/parser.rb +200 -118
- data/lib/scout/tsv/path.rb +5 -6
- data/lib/scout/tsv/persist/adapter.rb +26 -37
- data/lib/scout/tsv/persist/fix_width_table.rb +327 -0
- data/lib/scout/tsv/persist/serialize.rb +117 -0
- data/lib/scout/tsv/persist/tokyocabinet.rb +6 -3
- data/lib/scout/tsv/persist.rb +4 -2
- data/lib/scout/tsv/transformer.rb +141 -0
- data/lib/scout/tsv/traverse.rb +136 -37
- data/lib/scout/tsv/util/filter.rb +312 -0
- data/lib/scout/tsv/util/process.rb +73 -0
- data/lib/scout/tsv/util/reorder.rb +81 -0
- data/lib/scout/tsv/util/select.rb +265 -0
- data/lib/scout/tsv/util/unzip.rb +86 -0
- data/lib/scout/tsv/util.rb +126 -19
- data/lib/scout/tsv.rb +28 -5
- data/lib/scout/work_queue/socket.rb +6 -1
- data/lib/scout/work_queue/worker.rb +5 -2
- data/lib/scout/work_queue.rb +15 -8
- data/lib/scout/workflow/definition.rb +29 -2
- data/lib/scout/workflow/step/dependencies.rb +24 -4
- data/lib/scout/workflow/step/info.rb +40 -5
- data/lib/scout/workflow/step/progress.rb +14 -0
- data/lib/scout/workflow/step/provenance.rb +8 -7
- data/lib/scout/workflow/step/status.rb +45 -0
- data/lib/scout/workflow/step.rb +104 -33
- data/lib/scout/workflow/task/inputs.rb +14 -20
- data/lib/scout/workflow/task.rb +86 -47
- data/lib/scout/workflow/usage.rb +10 -6
- data/scout-gear.gemspec +30 -3
- data/scout_commands/workflow/task +37 -9
- data/scout_commands/workflow/task_old +2 -2
- data/test/scout/open/test_stream.rb +61 -59
- data/test/scout/path/test_find.rb +10 -1
- data/test/scout/resource/test_produce.rb +15 -0
- data/test/scout/test_meta_extension.rb +25 -0
- data/test/scout/test_named_array.rb +18 -0
- data/test/scout/test_persist.rb +67 -0
- data/test/scout/test_tmpfile.rb +1 -1
- data/test/scout/test_tsv.rb +222 -3
- data/test/scout/test_work_queue.rb +21 -18
- data/test/scout/tsv/persist/test_adapter.rb +11 -1
- data/test/scout/tsv/persist/test_fix_width_table.rb +134 -0
- data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
- data/test/scout/tsv/test_attach.rb +227 -0
- data/test/scout/tsv/test_change_id.rb +98 -0
- data/test/scout/tsv/test_dumper.rb +1 -1
- data/test/scout/tsv/test_index.rb +127 -3
- data/test/scout/tsv/test_open.rb +167 -0
- data/test/scout/tsv/test_parser.rb +45 -3
- data/test/scout/tsv/test_persist.rb +9 -0
- data/test/scout/tsv/test_transformer.rb +108 -0
- data/test/scout/tsv/test_traverse.rb +195 -3
- data/test/scout/tsv/test_util.rb +24 -0
- data/test/scout/tsv/util/test_filter.rb +188 -0
- data/test/scout/tsv/util/test_process.rb +47 -0
- data/test/scout/tsv/util/test_reorder.rb +94 -0
- data/test/scout/tsv/util/test_select.rb +58 -0
- data/test/scout/tsv/util/test_unzip.rb +112 -0
- data/test/scout/work_queue/test_socket.rb +0 -1
- data/test/scout/work_queue/test_worker.rb +63 -6
- data/test/scout/workflow/step/test_load.rb +3 -3
- data/test/scout/workflow/step/test_status.rb +31 -0
- data/test/scout/workflow/task/test_inputs.rb +14 -14
- data/test/scout/workflow/test_step.rb +13 -13
- data/test/scout/workflow/test_task.rb +168 -32
- data/test/scout/workflow/test_usage.rb +33 -6
- data/test/test_helper.rb +3 -1
- metadata +29 -2
@@ -0,0 +1,227 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
require 'scout/tsv'
|
5
|
+
|
6
|
+
class TestTSVAttach < Test::Unit::TestCase
|
7
|
+
def test_attach_simple
|
8
|
+
content1 =<<-EOF
|
9
|
+
#: :sep=" "
|
10
|
+
#ID ValueA ValueB
|
11
|
+
row1 a|aa|aaa b
|
12
|
+
row2 A B
|
13
|
+
EOF
|
14
|
+
|
15
|
+
content2 =<<-EOF
|
16
|
+
#: :sep=" "
|
17
|
+
#ID ValueB OtherID
|
18
|
+
row1 b Id1|Id2
|
19
|
+
row3 B Id3
|
20
|
+
EOF
|
21
|
+
|
22
|
+
TmpFile.with_file(content1) do |filename1|
|
23
|
+
TmpFile.with_file(content2) do |filename2|
|
24
|
+
tsv = TSV.open(filename1)
|
25
|
+
other = TSV.open(filename2)
|
26
|
+
tsv.attach other, :complete => true
|
27
|
+
assert_equal %w(Id1 Id2), tsv["row1"]["OtherID"]
|
28
|
+
assert_equal %w(Id3), tsv["row3"]["OtherID"]
|
29
|
+
assert_equal %w(B), tsv["row3"]["ValueB"]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_attach_by_key
|
35
|
+
content1 =<<-EOF
|
36
|
+
#: :sep=" "
|
37
|
+
#ID ValueA ValueB
|
38
|
+
row1 A1|A11 B1|B11
|
39
|
+
row2 A2|A22 B2|B22
|
40
|
+
EOF
|
41
|
+
|
42
|
+
content2 =<<-EOF
|
43
|
+
#: :sep=" "
|
44
|
+
#ID ValueB OtherID
|
45
|
+
row1 B1|B11 Id1|Id11
|
46
|
+
row2.2 B2|B22|B222 Id2.2|Id22.2|Id222.2
|
47
|
+
row3 B3 Id3
|
48
|
+
EOF
|
49
|
+
|
50
|
+
TmpFile.with_file(content1) do |filename1|
|
51
|
+
TmpFile.with_file(content2) do |filename2|
|
52
|
+
tsv = TSV.open(filename1)
|
53
|
+
other = TSV.open(filename2)
|
54
|
+
tsv.attach other, complete: true, match_key: "ValueB"
|
55
|
+
assert_equal %w(A1 A11), tsv["row1"]["ValueA"]
|
56
|
+
assert_equal %w(B1 B11), tsv["row1"]["ValueB"]
|
57
|
+
assert_equal %w(Id1 Id11), tsv["row1"]["OtherID"]
|
58
|
+
assert_equal %w(Id2.2 Id22.2), tsv["row2"]["OtherID"]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_attach_by_reorder
|
64
|
+
content1 =<<-EOF
|
65
|
+
#: :sep=" "
|
66
|
+
#ID ValueA ValueB
|
67
|
+
row1 A1|A11 B1|B11
|
68
|
+
row2 A2|A22 B2|B22
|
69
|
+
EOF
|
70
|
+
|
71
|
+
content2 =<<-EOF
|
72
|
+
#: :sep=" "
|
73
|
+
#ValueB ID OtherID
|
74
|
+
B1 row1|row1.1 Id1|Id11
|
75
|
+
B2 row2 Id2.2|Id22.2|Id222.2
|
76
|
+
B3 row3 Id3
|
77
|
+
EOF
|
78
|
+
|
79
|
+
TmpFile.with_file(content1) do |filename1|
|
80
|
+
TmpFile.with_file(content2) do |filename2|
|
81
|
+
tsv = TSV.open(filename1)
|
82
|
+
other = TSV.open(filename2)
|
83
|
+
tsv.attach other, match_key: "ID", one2one: false
|
84
|
+
assert_equal %w(A1 A11), tsv["row1"]["ValueA"]
|
85
|
+
assert_equal %w(B1 B11), tsv["row1"]["ValueB"]
|
86
|
+
assert_equal %w(Id1 Id11), tsv["row1"]["OtherID"]
|
87
|
+
assert_equal %w(Id2.2 Id22.2 Id222.2), tsv["row2"]["OtherID"]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
def test_attach_same_key
|
94
|
+
content1 =<<-EOF
|
95
|
+
#ID ValueA ValueB
|
96
|
+
row1 a|aa|aaa b
|
97
|
+
row2 A B
|
98
|
+
EOF
|
99
|
+
|
100
|
+
content2 =<<-EOF
|
101
|
+
#ID ValueB OtherID
|
102
|
+
row1 b Id1|Id2
|
103
|
+
row3 B Id3
|
104
|
+
EOF
|
105
|
+
|
106
|
+
tsv1 = tsv2 = nil
|
107
|
+
TmpFile.with_file(content1) do |filename|
|
108
|
+
tsv1 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
109
|
+
end
|
110
|
+
|
111
|
+
TmpFile.with_file(content2) do |filename|
|
112
|
+
tsv2 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
113
|
+
end
|
114
|
+
|
115
|
+
tsv1.attach tsv2, fields: "OtherID"
|
116
|
+
|
117
|
+
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
118
|
+
assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
|
119
|
+
|
120
|
+
TmpFile.with_file(content1) do |filename|
|
121
|
+
tsv1 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
122
|
+
end
|
123
|
+
|
124
|
+
tsv1.attach tsv2
|
125
|
+
|
126
|
+
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
127
|
+
|
128
|
+
tsv1 = tsv2 = nil
|
129
|
+
TmpFile.with_file(content1) do |filename|
|
130
|
+
tsv1 = TSV.open(File.open(filename), type: :list, :sep => /\s+/)
|
131
|
+
end
|
132
|
+
|
133
|
+
TmpFile.with_file(content2) do |filename|
|
134
|
+
tsv2 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
135
|
+
end
|
136
|
+
|
137
|
+
tsv1.attach tsv2, fields: "OtherID"
|
138
|
+
|
139
|
+
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
140
|
+
assert_equal "Id1", tsv1["row1"]["OtherID"]
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_attach_source_field
|
144
|
+
content1 =<<-EOF
|
145
|
+
#Id ValueA ValueB
|
146
|
+
row1 a|aa|aaa b
|
147
|
+
row2 A B
|
148
|
+
EOF
|
149
|
+
|
150
|
+
content2 =<<-EOF
|
151
|
+
#ValueB OtherID
|
152
|
+
b Id1|Id2
|
153
|
+
B Id3
|
154
|
+
EOF
|
155
|
+
|
156
|
+
tsv1 = tsv2 = nil
|
157
|
+
TmpFile.with_file(content1) do |filename|
|
158
|
+
tsv1 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
159
|
+
end
|
160
|
+
|
161
|
+
TmpFile.with_file(content2) do |filename|
|
162
|
+
tsv2 = TSV.open(File.open(filename), type: :double, :sep => /\s+/)
|
163
|
+
end
|
164
|
+
|
165
|
+
tsv1.attach tsv2, bar: true
|
166
|
+
|
167
|
+
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
168
|
+
assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
|
169
|
+
|
170
|
+
TmpFile.with_file(content1) do |filename|
|
171
|
+
tsv1 = TSV.open(File.open(filename), type: :list, :sep => /\s+/)
|
172
|
+
end
|
173
|
+
|
174
|
+
tsv1.attach tsv2
|
175
|
+
|
176
|
+
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
177
|
+
assert_equal "Id1", tsv1["row1"]["OtherID"]
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_attach_transformer
|
181
|
+
content1 =<<-EOF
|
182
|
+
#: :sep=" "
|
183
|
+
#ID ValueA ValueB
|
184
|
+
row1 a|aa|aaa b
|
185
|
+
row2 A B
|
186
|
+
EOF
|
187
|
+
|
188
|
+
content2 =<<-EOF
|
189
|
+
#: :sep=" "
|
190
|
+
#ID ValueB OtherID
|
191
|
+
row1 b Id1|Id2
|
192
|
+
row3 B Id3
|
193
|
+
EOF
|
194
|
+
|
195
|
+
TmpFile.with_file(content1) do |filename1|
|
196
|
+
TmpFile.with_file(content2) do |filename2|
|
197
|
+
out = TSV.attach filename1, filename2, target: :stream, bar: false
|
198
|
+
tsv = out.tsv
|
199
|
+
assert_equal %w(Id1 Id2), tsv["row1"]["OtherID"]
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def test_attach_flexible_names
|
205
|
+
content1 =<<-EOF
|
206
|
+
#: :sep=" "
|
207
|
+
#ID ValueA ValueB
|
208
|
+
row1 a|aa|aaa b
|
209
|
+
row2 A B
|
210
|
+
EOF
|
211
|
+
|
212
|
+
content2 =<<-EOF
|
213
|
+
#: :sep=" "
|
214
|
+
#Identifiers(ID) OtherID
|
215
|
+
row1 Id1|Id2
|
216
|
+
row3 Id3
|
217
|
+
EOF
|
218
|
+
|
219
|
+
TmpFile.with_file(content1) do |filename1|
|
220
|
+
TmpFile.with_file(content2) do |filename2|
|
221
|
+
out = TSV.attach filename1, filename2, target: :stream, bar: false
|
222
|
+
tsv = out.tsv
|
223
|
+
assert_equal %w(Id1 Id2), tsv["row1"]["OtherID"]
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
require 'scout/tsv'
|
5
|
+
class TestChangeID < Test::Unit::TestCase
|
6
|
+
def test_simple_reorder
|
7
|
+
content1 =<<-EOF
|
8
|
+
#: :sep=" "
|
9
|
+
#ID ValueA ValueB
|
10
|
+
row1 A1|A11 B1|B11
|
11
|
+
row2 A2|A22 B2|B22
|
12
|
+
EOF
|
13
|
+
|
14
|
+
tsv = TSV.open StringIO.new(content1)
|
15
|
+
|
16
|
+
res = tsv.change_key "ValueA", keep: true
|
17
|
+
assert_equal ["row1"], res["A1"]["ID"]
|
18
|
+
assert_equal ["row1"], res["A11"]["ID"]
|
19
|
+
assert_equal ["row2"], res["A2"]["ID"]
|
20
|
+
|
21
|
+
res = tsv.change_key "ValueA", keep: false, one2one: true
|
22
|
+
assert_equal ["B1"], res["A1"]["ValueB"]
|
23
|
+
assert_equal ["B11"], res["A11"]["ValueB"]
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_simple_reorder_file
|
27
|
+
content1 =<<-EOF
|
28
|
+
#: :sep=" "
|
29
|
+
#ID ValueA ValueB
|
30
|
+
row1 A1|A11 B1|B11
|
31
|
+
row2 A2|A22 B2|B22
|
32
|
+
EOF
|
33
|
+
|
34
|
+
TmpFile.with_file(content1) do |file1|
|
35
|
+
res = TSV.change_key file1, "ValueA", keep: true
|
36
|
+
assert_equal ["row1"], res["A1"]["ID"]
|
37
|
+
assert_equal ["row1"], res["A11"]["ID"]
|
38
|
+
assert_equal ["row2"], res["A2"]["ID"]
|
39
|
+
assert_equal ["B1","B11"], res["A1"]["ValueB"]
|
40
|
+
|
41
|
+
res = TSV.change_key file1, "ValueA", one2one: true, keep: true
|
42
|
+
assert_equal ["row1"], res["A1"]["ID"]
|
43
|
+
assert_equal ["row1"], res["A11"]["ID"]
|
44
|
+
assert_equal ["row2"], res["A2"]["ID"]
|
45
|
+
assert_equal ["B1"], res["A1"]["ValueB"]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_change_key_identifiers
|
50
|
+
content1 =<<-EOF
|
51
|
+
#: :sep=" "
|
52
|
+
#ID ValueA ValueB
|
53
|
+
row1 A1|A11 B1|B11
|
54
|
+
row2 A2|A22 B2|B22
|
55
|
+
EOF
|
56
|
+
|
57
|
+
identifiers_content =<<-EOF
|
58
|
+
#: :sep=" "
|
59
|
+
#ID ValueC ValueD
|
60
|
+
row1 C1|C11 D1|D11
|
61
|
+
row2 C2|C22 D2|D22
|
62
|
+
EOF
|
63
|
+
|
64
|
+
|
65
|
+
tsv = TSV.open StringIO.new(content1)
|
66
|
+
identifiers = TSV.open StringIO.new(identifiers_content)
|
67
|
+
|
68
|
+
res = tsv.change_key "ValueC", identifiers: identifiers, keep: true
|
69
|
+
assert_equal ["row1"], res["C1"]["ID"]
|
70
|
+
assert_equal ["row1"], res["C11"]["ID"]
|
71
|
+
assert_equal ["row2"], res["C2"]["ID"]
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_change_id_identifiers
|
75
|
+
content1 =<<-EOF
|
76
|
+
#: :sep=" "
|
77
|
+
#ID ValueA ValueB
|
78
|
+
row1 A1|A11 B1|B11
|
79
|
+
row2 A2|A22 B2|B22
|
80
|
+
EOF
|
81
|
+
|
82
|
+
identifiers_content =<<-EOF
|
83
|
+
#: :sep=" "
|
84
|
+
#ID ValueC ValueD
|
85
|
+
row1 C1|C11 D1|D11
|
86
|
+
row2 C2|C22 D2|D22
|
87
|
+
EOF
|
88
|
+
|
89
|
+
|
90
|
+
tsv = TSV.open StringIO.new(content1)
|
91
|
+
identifiers = TSV.open StringIO.new(identifiers_content)
|
92
|
+
|
93
|
+
res = tsv.change_id "ValueA", "ValueC", identifiers: identifiers
|
94
|
+
assert_equal ["C1","C11"], res["row1"]["ValueC"]
|
95
|
+
assert_equal ["C2","C22"], res["row2"]["ValueC"]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
@@ -1,8 +1,25 @@
|
|
1
1
|
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
2
|
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
3
|
|
4
|
+
require 'scout/tsv'
|
4
5
|
class TestTSVIndex < Test::Unit::TestCase
|
5
|
-
def
|
6
|
+
def load_segment_data(data)
|
7
|
+
tsv = TSV.open(data, type: :list, :sep=>":", :cast => proc{|e| e =~ /(\s*)(_*)/; ($1.length..($1.length + $2.length - 1))})
|
8
|
+
|
9
|
+
tsv = tsv.add_field "Start" do |key, values|
|
10
|
+
values["Range"].first
|
11
|
+
end
|
12
|
+
|
13
|
+
tsv = tsv.add_field "End" do |key, values|
|
14
|
+
values["Range"].last
|
15
|
+
end
|
16
|
+
|
17
|
+
tsv = tsv.slice ["Start", "End"]
|
18
|
+
|
19
|
+
tsv
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_index
|
6
23
|
content =<<-'EOF'
|
7
24
|
#: :sep=/\s+/#:type=:double#:merge=:concat
|
8
25
|
#Id ValueA ValueB OtherID
|
@@ -20,12 +37,45 @@ row2 a b id3
|
|
20
37
|
end
|
21
38
|
|
22
39
|
TmpFile.with_file(content) do |filename|
|
23
|
-
index = TSV.index(filename, :target => "ValueB", :fields => "OtherID")
|
40
|
+
index = TSV.index(filename, :target => "ValueB", :fields => ["OtherID"])
|
24
41
|
assert_equal 'B', index["a"]
|
25
42
|
assert_nil index["B"]
|
26
43
|
end
|
27
44
|
end
|
28
45
|
|
46
|
+
def test_from_tsv
|
47
|
+
content =<<-'EOF'
|
48
|
+
#: :sep=/\s+/#:type=:double#:merge=:concat
|
49
|
+
#Id ValueA ValueB OtherID
|
50
|
+
row1 a|aa|aaa b Id1|Id2
|
51
|
+
row2 A|b B Id3|a
|
52
|
+
row2 a b id3
|
53
|
+
EOF
|
54
|
+
|
55
|
+
TmpFile.with_file(content) do |filename|
|
56
|
+
tsv = TSV.open(filename)
|
57
|
+
index = TSV.index(tsv, :target => "ValueB")
|
58
|
+
assert_equal 'b', index["a"]
|
59
|
+
assert_equal 'B', index["B"]
|
60
|
+
assert_equal 'b', index["b"]
|
61
|
+
|
62
|
+
index = tsv.index(:target => "ValueB")
|
63
|
+
assert_equal 'b', index["a"]
|
64
|
+
assert_equal 'B', index["B"]
|
65
|
+
assert_equal 'b', index["b"]
|
66
|
+
|
67
|
+
|
68
|
+
index = TSV.index(tsv, :target => "ValueB", :fields => "OtherID")
|
69
|
+
assert_equal 'B', index["a"]
|
70
|
+
assert_nil index["B"]
|
71
|
+
|
72
|
+
index = tsv.index(:target => "ValueB", :fields => "OtherID")
|
73
|
+
assert_equal 'B', index["a"]
|
74
|
+
assert_nil index["B"]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
|
29
79
|
def test_persist
|
30
80
|
content =<<-'EOF'
|
31
81
|
#: :sep=/\s+/#:type=:double#:merge=:concat
|
@@ -35,7 +85,7 @@ row2 A B Id3|a
|
|
35
85
|
row2 a b id3
|
36
86
|
EOF
|
37
87
|
tsv = TmpFile.with_file(content) do |filename|
|
38
|
-
index = TSV.index(filename, :target => "ValueB", :persist => true)
|
88
|
+
index = TSV.index(filename, :target => "ValueB", :persist => true, bar: true)
|
39
89
|
assert_equal 'b', index["row1"]
|
40
90
|
assert_equal 'b', index["a"]
|
41
91
|
assert_equal 'b', index["aaa"]
|
@@ -43,6 +93,80 @@ row2 a b id3
|
|
43
93
|
end
|
44
94
|
end
|
45
95
|
|
96
|
+
def test_range_index
|
97
|
+
data =<<-EOF
|
98
|
+
# 012345678901234567890
|
99
|
+
#ID:Range
|
100
|
+
a: ______
|
101
|
+
b: ______
|
102
|
+
c: _______
|
103
|
+
d: ____
|
104
|
+
e: ______
|
105
|
+
f: ___
|
106
|
+
g: ____
|
107
|
+
EOF
|
108
|
+
TmpFile.with_file(data) do |datafile|
|
109
|
+
tsv = load_segment_data(datafile)
|
110
|
+
f = tsv.range_index("Start", "End", :persist => true)
|
111
|
+
|
112
|
+
assert_equal %w(), f[0].sort
|
113
|
+
assert_equal %w(b), f[1].sort
|
114
|
+
assert_equal %w(), f[20].sort
|
115
|
+
assert_equal %w(), f[(20..100)].sort
|
116
|
+
assert_equal %w(a b d), f[3].sort
|
117
|
+
assert_equal %w(a b c d e), f[(3..4)].sort
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def test_pos_index
|
122
|
+
data =<<-EOF
|
123
|
+
# 012345678901234567890
|
124
|
+
#ID:Range
|
125
|
+
a: ______
|
126
|
+
b: ______
|
127
|
+
c: _______
|
128
|
+
d: ____
|
129
|
+
e: ______
|
130
|
+
f: ___
|
131
|
+
g: ____
|
132
|
+
EOF
|
133
|
+
TmpFile.with_file(data) do |datafile|
|
134
|
+
tsv = load_segment_data(datafile)
|
135
|
+
f = tsv.pos_index("Start", :persist => true)
|
136
|
+
|
137
|
+
assert_equal %w(), f[0].sort
|
138
|
+
assert_equal %w(a c d e), f[(2..4)].sort
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def test_range_index_persistent
|
143
|
+
data =<<-EOF
|
144
|
+
# 012345678901234567890
|
145
|
+
#ID:Range
|
146
|
+
a: ______
|
147
|
+
b: ______
|
148
|
+
c: _______
|
149
|
+
d: ____
|
150
|
+
e: ______
|
151
|
+
f: ___
|
152
|
+
g: ____
|
153
|
+
EOF
|
154
|
+
TmpFile.with_file(data) do |datafile|
|
155
|
+
load_segment_data(datafile)
|
156
|
+
TmpFile.with_file(load_segment_data(datafile).to_s) do |tsvfile|
|
157
|
+
f = TSV.range_index(tsvfile, "Start", "End", :persist => true)
|
158
|
+
|
159
|
+
assert_equal %w(), f[0].sort
|
160
|
+
assert_equal %w(b), f[1].sort
|
161
|
+
assert_equal %w(), f[20].sort
|
162
|
+
assert_equal %w(), f[(20..100)].sort
|
163
|
+
assert_equal %w(a b d), f[3].sort
|
164
|
+
assert_equal %w(a b c d e), f[(3..4)].sort
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
|
46
170
|
def __test_speed
|
47
171
|
content =<<-'EOF'
|
48
172
|
#: :sep=/\s+/#:type=:double#:merge=:concat
|
@@ -0,0 +1,167 @@
|
|
1
|
+
require File.expand_path(__FILE__).sub(%r(/test/.*), '/test/test_helper.rb')
|
2
|
+
require File.expand_path(__FILE__).sub(%r(.*/test/), '').sub(/test_(.*)\.rb/,'\1')
|
3
|
+
|
4
|
+
require 'scout/tsv'
|
5
|
+
class TestOpenTraverse < Test::Unit::TestCase
|
6
|
+
def test_array
|
7
|
+
num_lines = 100
|
8
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
9
|
+
|
10
|
+
r = TSV.traverse lines, :into => [] do |l|
|
11
|
+
l + "-" + Process.pid.to_s
|
12
|
+
end
|
13
|
+
|
14
|
+
assert_equal num_lines, r.length
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_array_cpus
|
18
|
+
num_lines = 1000
|
19
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
20
|
+
|
21
|
+
r = TSV.traverse lines, :into => [], :cpus => 2 do |l|
|
22
|
+
l + "-" + Process.pid.to_s
|
23
|
+
end
|
24
|
+
|
25
|
+
assert_equal num_lines, r.length
|
26
|
+
assert_equal 2, r.collect{|l| l.split("-").last}.uniq.length
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_tsv_cpus
|
30
|
+
num_lines = 10000
|
31
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
32
|
+
|
33
|
+
tsv = TSV.setup({}, key_field: "Line", :fields => %w(Prefix Number), :type => :list)
|
34
|
+
lines.each do |line|
|
35
|
+
tsv[line] = ["LINE", line.split("-").last]
|
36
|
+
end
|
37
|
+
|
38
|
+
r = TSV.traverse tsv, :into => [], :cpus => 2, :bar => {desc: "Process", severity: 0} do |l,v|
|
39
|
+
pre, num = v
|
40
|
+
pre + "-" + num.to_s + "-" + Process.pid.to_s
|
41
|
+
end
|
42
|
+
|
43
|
+
assert_equal num_lines, r.length
|
44
|
+
assert_equal 2, r.collect{|l| l.split("-").last}.uniq.length
|
45
|
+
assert_equal "LINE", r.collect{|l| l.split("-").first}.first
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_into_stream
|
49
|
+
num_lines = 100
|
50
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
51
|
+
|
52
|
+
r = TSV.traverse lines, :into => :stream do |l|
|
53
|
+
l + "-" + Process.pid.to_s
|
54
|
+
end
|
55
|
+
|
56
|
+
assert_equal num_lines, r.read.split("\n").length
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_into_stream_error
|
60
|
+
num_lines = 100
|
61
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
62
|
+
|
63
|
+
assert_raise ScoutException do
|
64
|
+
Log.with_severity 7 do
|
65
|
+
i = 0
|
66
|
+
r = TSV.traverse lines, :into => :stream, cpus: 3 do |l|
|
67
|
+
raise ScoutException if i > 10
|
68
|
+
i += 1
|
69
|
+
l + "-" + Process.pid.to_s
|
70
|
+
end
|
71
|
+
|
72
|
+
r.read
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_into_dumper_error
|
78
|
+
num_lines = 100
|
79
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
80
|
+
|
81
|
+
assert_raise ScoutException do
|
82
|
+
i = 0
|
83
|
+
Log.with_severity 7 do
|
84
|
+
dumper = TSV::Dumper.new :key_field => "Key", :fields => ["Value"], :type => :single
|
85
|
+
dumper.init
|
86
|
+
dumper = TSV.traverse lines, :into => dumper, :cpus => 3 do |l|
|
87
|
+
raise ScoutException if i > 10
|
88
|
+
i += 1
|
89
|
+
value = l + "-" + Process.pid.to_s
|
90
|
+
|
91
|
+
[i.to_s, value]
|
92
|
+
end
|
93
|
+
ppp dumper.stream.read
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_traverse_line
|
99
|
+
text=<<-EOF
|
100
|
+
#: :sep=" "
|
101
|
+
#Row LabelA LabelB LabelC
|
102
|
+
row1 A B C
|
103
|
+
row1 a b c
|
104
|
+
row2 AA BB CC
|
105
|
+
row2 aa bb cc
|
106
|
+
EOF
|
107
|
+
|
108
|
+
TmpFile.with_file(text) do |file|
|
109
|
+
lines = Open.traverse file, :type => :line, :into => [] do |line|
|
110
|
+
line
|
111
|
+
end
|
112
|
+
assert_include lines, "row2 AA BB CC"
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_collapse_stream
|
117
|
+
text=<<-EOF
|
118
|
+
#: :sep=" "
|
119
|
+
#Row LabelA LabelB LabelC
|
120
|
+
row1 A B C
|
121
|
+
row1 a b c
|
122
|
+
row2 AA BB CC
|
123
|
+
row2 aa bb cc
|
124
|
+
EOF
|
125
|
+
|
126
|
+
s = StringIO.new text
|
127
|
+
collapsed = TSV.collapse_stream(s)
|
128
|
+
tsv = TSV.open collapsed
|
129
|
+
assert_equal ["A", "a"], tsv["row1"][0]
|
130
|
+
assert_equal ["BB", "bb"], tsv["row2"][1]
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_cpus_error_dumper
|
134
|
+
num_lines = 100
|
135
|
+
lines = num_lines.times.collect{|i| "line-#{i}" }
|
136
|
+
|
137
|
+
dumper = TSV::Dumper.new :key_field => "Key", :fields => ["Field"], type: :single
|
138
|
+
dumper.init
|
139
|
+
assert_raise ScoutException do
|
140
|
+
Log.with_severity 0 do
|
141
|
+
i = 0
|
142
|
+
TSV.traverse lines, :into => dumper, cpus: 3 do |l|
|
143
|
+
raise ScoutException if i > 10
|
144
|
+
i += 1
|
145
|
+
[Process.pid.to_s, l + "-" + Process.pid.to_s]
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
ppp dumper.stream.read
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_step_travese_cpus
|
154
|
+
|
155
|
+
size = 1000
|
156
|
+
step = Step.new tmpdir.step[__method__] do
|
157
|
+
lines = size.times.collect{|i| "line-#{i}" }
|
158
|
+
Open.traverse lines, :type => :array, :into => :stream, :cpus => 3 do |line|
|
159
|
+
line.reverse
|
160
|
+
end
|
161
|
+
end
|
162
|
+
step.type = :array
|
163
|
+
|
164
|
+
assert_equal size, step.run.length
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|