scout-gear 6.0.0 → 7.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +465 -432
  3. data/VERSION +1 -1
  4. data/bin/scout +5 -1
  5. data/lib/rbbt-scout.rb +5 -0
  6. data/lib/scout/concurrent_stream.rb +6 -2
  7. data/lib/scout/config.rb +168 -0
  8. data/lib/scout/exceptions.rb +9 -0
  9. data/lib/scout/indiferent_hash/options.rb +1 -0
  10. data/lib/scout/indiferent_hash.rb +4 -2
  11. data/lib/scout/log/color.rb +31 -2
  12. data/lib/scout/log/progress/report.rb +1 -0
  13. data/lib/scout/log/progress/util.rb +3 -1
  14. data/lib/scout/log/progress.rb +7 -3
  15. data/lib/scout/log.rb +8 -3
  16. data/lib/scout/misc/digest.rb +1 -3
  17. data/lib/scout/misc/monitor.rb +3 -0
  18. data/lib/scout/misc/system.rb +15 -0
  19. data/lib/scout/misc.rb +1 -0
  20. data/lib/scout/named_array.rb +68 -0
  21. data/lib/scout/open/stream.rb +58 -26
  22. data/lib/scout/path/find.rb +27 -3
  23. data/lib/scout/path/util.rb +7 -4
  24. data/lib/scout/persist/serialize.rb +7 -14
  25. data/lib/scout/persist.rb +21 -1
  26. data/lib/scout/resource/produce.rb +7 -94
  27. data/lib/scout/resource/software.rb +176 -0
  28. data/lib/scout/tsv/dumper.rb +107 -0
  29. data/lib/scout/tsv/index.rb +49 -0
  30. data/lib/scout/tsv/parser.rb +317 -0
  31. data/lib/scout/tsv/path.rb +13 -0
  32. data/lib/scout/tsv/persist/adapter.rb +348 -0
  33. data/lib/scout/tsv/persist/tokyocabinet.rb +113 -0
  34. data/lib/scout/tsv/persist.rb +15 -0
  35. data/lib/scout/tsv/traverse.rb +48 -0
  36. data/lib/scout/tsv/util.rb +24 -0
  37. data/lib/scout/tsv.rb +27 -0
  38. data/lib/scout/work_queue/worker.rb +16 -11
  39. data/lib/scout/work_queue.rb +63 -21
  40. data/lib/scout/workflow/definition.rb +93 -4
  41. data/lib/scout/workflow/step/config.rb +18 -0
  42. data/lib/scout/workflow/step/dependencies.rb +40 -0
  43. data/lib/scout/workflow/step/file.rb +15 -0
  44. data/lib/scout/workflow/step/info.rb +33 -6
  45. data/lib/scout/workflow/step/provenance.rb +148 -0
  46. data/lib/scout/workflow/step.rb +70 -20
  47. data/lib/scout/workflow/task.rb +5 -4
  48. data/lib/scout/workflow/usage.rb +1 -1
  49. data/lib/scout/workflow.rb +11 -3
  50. data/lib/scout-gear.rb +1 -0
  51. data/lib/scout.rb +1 -0
  52. data/scout-gear.gemspec +38 -3
  53. data/scout_commands/find +1 -1
  54. data/scout_commands/workflow/task +16 -10
  55. data/share/software/install_helpers +523 -0
  56. data/test/scout/log/test_progress.rb +0 -2
  57. data/test/scout/misc/test_system.rb +21 -0
  58. data/test/scout/open/test_stream.rb +160 -1
  59. data/test/scout/path/test_find.rb +14 -7
  60. data/test/scout/resource/test_software.rb +24 -0
  61. data/test/scout/test_config.rb +66 -0
  62. data/test/scout/test_meta_extension.rb +10 -0
  63. data/test/scout/test_named_array.rb +19 -0
  64. data/test/scout/test_persist.rb +35 -0
  65. data/test/scout/test_semaphore.rb +1 -1
  66. data/test/scout/test_tmpfile.rb +2 -2
  67. data/test/scout/test_tsv.rb +74 -0
  68. data/test/scout/test_work_queue.rb +63 -8
  69. data/test/scout/tsv/persist/test_adapter.rb +34 -0
  70. data/test/scout/tsv/persist/test_tokyocabinet.rb +92 -0
  71. data/test/scout/tsv/test_dumper.rb +44 -0
  72. data/test/scout/tsv/test_index.rb +64 -0
  73. data/test/scout/tsv/test_parser.rb +173 -0
  74. data/test/scout/tsv/test_persist.rb +36 -0
  75. data/test/scout/tsv/test_traverse.rb +9 -0
  76. data/test/scout/tsv/test_util.rb +0 -0
  77. data/test/scout/work_queue/test_worker.rb +49 -1
  78. data/test/scout/workflow/step/test_dependencies.rb +25 -0
  79. data/test/scout/workflow/step/test_info.rb +15 -17
  80. data/test/scout/workflow/step/test_load.rb +16 -18
  81. data/test/scout/workflow/step/test_provenance.rb +25 -0
  82. data/test/scout/workflow/test_step.rb +206 -10
  83. data/test/scout/workflow/test_task.rb +0 -3
  84. data/test/test_helper.rb +6 -0
  85. metadata +37 -2
@@ -0,0 +1,107 @@
1
+ module TSV
2
+ class Dumper
3
+ def self.header_lines(key_field, fields, entry_hash = nil)
4
+ if Hash === entry_hash
5
+ sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
6
+ preamble = entry_hash[:preamble]
7
+ header_hash = entry_hash[:header_hash]
8
+ end
9
+
10
+ header_hash = "#" if header_hash.nil?
11
+
12
+ preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
13
+
14
+ str = ""
15
+ str << preamble.strip << "\n" if preamble and not preamble.empty?
16
+ if fields
17
+ if fields.empty?
18
+ str << header_hash << (key_field || "ID").to_s << "\n"
19
+ else
20
+ str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
21
+ end
22
+ end
23
+
24
+ str
25
+ end
26
+
27
+ def self.header(options={})
28
+ key_field, fields, sep, header_hash, preamble = IndiferentHash.process_options options,
29
+ :key_field, :fields, :sep, :header_hash, :preamble,
30
+ :sep => "\t", :header_hash => "#", :preamble => true
31
+
32
+ if fields.nil? || key_field.nil?
33
+ fields_str = nil
34
+ else
35
+ fields_str = "#{header_hash}#{key_field}#{sep}#{fields*sep}"
36
+ end
37
+
38
+ if preamble && options.values.compact.any?
39
+ preamble_str = "#: " << IndiferentHash.hash2string(options)
40
+ else
41
+ preamble_str = nil
42
+ end
43
+
44
+ [preamble_str, fields_str].compact * "\n"
45
+ end
46
+
47
+
48
+ attr_accessor :options
49
+ def initialize(options = {})
50
+ @sep, @type = IndiferentHash.process_options options,
51
+ :sep, :type,
52
+ :sep => "\t", :type => :double
53
+ @options = options
54
+ @sout, @sin = Open.pipe
55
+ ConcurrentStream.setup(@sin, :pair => @sout)
56
+ ConcurrentStream.setup(@sout, :pair => @sin)
57
+ end
58
+
59
+ def init
60
+ header = Dumper.header(@options.merge(:type => @type, :sep => @sep))
61
+ @sin.puts header if header and ! header.empty?
62
+ end
63
+
64
+ def add(key, value)
65
+
66
+ case @type
67
+ when :single
68
+ @sin.puts key + @sep + value
69
+ when :list, :flat
70
+ @sin.puts key + @sep + value * @sep
71
+ when :double
72
+ @sin.puts key + @sep + value.collect{|v| v * "|" } * @sep
73
+ end
74
+ end
75
+
76
+ def close
77
+ @sin.close
78
+ @sin.join
79
+ end
80
+
81
+ def stream
82
+ @sout
83
+ end
84
+
85
+ def abort(exception=nil)
86
+ @sin.abort(exception)
87
+ end
88
+ end
89
+
90
+ def stream
91
+ iii self.extension_attr_hash
92
+ dumper = TSV::Dumper.new self.extension_attr_hash
93
+ dumper.init
94
+ Thread.new do
95
+ Thread.current["name"] = "Dumper thread"
96
+ self.each do |k,v|
97
+ dumper.add k, v
98
+ end
99
+ dumper.close
100
+ end
101
+ dumper.stream
102
+ end
103
+
104
+ def to_s
105
+ stream.read
106
+ end
107
+ end
@@ -0,0 +1,49 @@
1
+ require_relative 'parser'
2
+ module TSV
3
+ def self.index(tsv_file, target: 0, order: true, **kwargs)
4
+ persist, type = IndiferentHash.process_options kwargs,
5
+ :persist, :persist_type,
6
+ :persist => false, :persist_type => "HDB"
7
+ kwargs.delete :type
8
+
9
+ Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :persist_prefix => "Index")) do |filename|
10
+ if filename
11
+ index = ScoutCabinet.open(filename, true, type)
12
+ TSV.setup(index, :type => :single)
13
+ index.extend TSVAdapter
14
+ else
15
+ index = TSV.setup({}, :type => :single)
16
+ end
17
+
18
+ dummy_data = nil
19
+ if order
20
+ tmp_index = {}
21
+ dummy_data = Open.open(tsv_file) do |file|
22
+ TSV.parse file, key_field: target, type: :double, **kwargs do |k,values|
23
+ values.each_with_index do |list,i|
24
+ list.each do |e|
25
+ tmp_index[e] ||= []
26
+ tmp_index[e][i] ||= []
27
+ tmp_index[e][i] << k
28
+ end
29
+ end
30
+ end
31
+ end
32
+ tmp_index.each do |e,list|
33
+ index[e] = list.flatten.compact.uniq.first
34
+ end
35
+ else
36
+ dummy_data = Open.open(tsv_file) do |file|
37
+ TSV.parse file, key_field: target, type: :flat, **kwargs do |k,values|
38
+ values.each do |e|
39
+ index[e] = k unless index.include?(e)
40
+ end
41
+ end
42
+ end
43
+ end
44
+ index.key_field = dummy_data.fields * ", "
45
+ index.fields = [dummy_data.key_field]
46
+ index
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,317 @@
1
+ require_relative '../named_array'
2
+ module TSV
3
+ def self.cast_value(value, cast)
4
+ if Array === value
5
+ value.collect{|e| cast_value(e, cast) }
6
+ else
7
+ value.send(cast)
8
+ end
9
+ end
10
+
11
+ def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil)
12
+ items = line.split(sep, -1)
13
+
14
+ if positions.nil? && key == 0
15
+ key = items.shift
16
+ elsif positions.nil?
17
+ key = items.delete_at(key)
18
+ key = key.split(sep2) if type == :double
19
+ else
20
+ key, items = items[key], items.values_at(*positions)
21
+ key = key.split(sep2) if type == :double
22
+ end
23
+
24
+ items = case type
25
+ when :list
26
+ items
27
+ when :single
28
+ items.first
29
+ when :flat
30
+ [items]
31
+ when :double
32
+ items.collect{|i| i.split(sep2, -1) }
33
+ end
34
+
35
+
36
+ if cast
37
+ items = cast_value(items, cast)
38
+ end
39
+
40
+ [key, items]
41
+ end
42
+
43
+ def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, **kargs, &block)
44
+ begin
45
+ bar = Log::ProgressBar.new_bar(bar) if bar
46
+
47
+ source_type = type if source_type.nil?
48
+
49
+ data = {} if data.nil?
50
+ merge = false if type != :double
51
+ line = first_line || stream.gets
52
+ while line
53
+ begin
54
+ line.strip!
55
+ line = Misc.fixutf8(line) if fix
56
+ bar.tick if bar
57
+ key, items = parse_line(line, type: source_type, **kargs)
58
+
59
+ if Array === key
60
+ keys = key
61
+ if one2one
62
+ key_items = keys.length.times.collect{|i| items.collect{|list| [list[i] || list[0]] } }
63
+ else
64
+ key_items = false
65
+ end
66
+ else
67
+ keys = [key]
68
+ key_items = false
69
+ end
70
+
71
+ keys.each_with_index do |key,i|
72
+ if key_items
73
+ these_items = key_items[i]
74
+ else
75
+ these_items = items
76
+ end
77
+
78
+ these_items = case [source_type, type]
79
+ when [:single, :single]
80
+ these_items
81
+ when [:list, :single]
82
+ these_items.first
83
+ when [:flat, :single]
84
+ these_items.first
85
+ when [:double, :single]
86
+ these_items.first.first
87
+ when [:single, :list]
88
+ [these_items]
89
+ when [:list, :list]
90
+ these_items
91
+ when [:flat, :list]
92
+ these_items
93
+ when [:double, :list]
94
+ these_items.collect{|l| l.first }
95
+ when [:single, :flat]
96
+ [these_items]
97
+ when [:list, :flat]
98
+ these_items
99
+ when [:flat, :flat]
100
+ these_items
101
+ when [:double, :flat]
102
+ these_items.flatten
103
+ when [:single, :double]
104
+ [[these_items]]
105
+ when [:list, :double]
106
+ these_items.collect{|l| [l] }
107
+ when [:flat, :double]
108
+ [these_items]
109
+ when [:double, :double]
110
+ these_items
111
+ end
112
+
113
+ if block_given?
114
+ res = block.call(key, these_items)
115
+ data[key] = res unless res.nil? || FalseClass === data
116
+ next
117
+ end
118
+
119
+ if ! merge || ! data.include?(key)
120
+ data[key] = these_items
121
+ else
122
+ current = data[key]
123
+ if merge == :concat
124
+ these_items.each_with_index do |new,i|
125
+ next if new.empty?
126
+ current[i].concat(new)
127
+ end
128
+ else
129
+ merged = []
130
+ these_items.each_with_index do |new,i|
131
+ next if new.empty?
132
+ merged[i] = current[i] + new
133
+ end
134
+ data[key] = merged
135
+ end
136
+ end
137
+ end
138
+ ensure
139
+ line = stream.gets
140
+ end
141
+ end
142
+ data
143
+ ensure
144
+ Log::ProgressBar.remove_bar(bar) if bar
145
+ end
146
+ end
147
+
148
+ def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
149
+ raise "Closed stream" if IO === stream && stream.closed?
150
+
151
+ options = {}
152
+ preamble = []
153
+
154
+ # Get line
155
+
156
+ #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
157
+ line = stream.gets
158
+ return {} if line.nil?
159
+ line = Misc.fixutf8 line.chomp if fix
160
+
161
+ # Process options line
162
+ if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
163
+ options = IndiferentHash.string2hash m.captures.first.chomp
164
+ line = stream.gets
165
+ line = Misc.fixutf8 line.chomp if line && fix
166
+ end
167
+
168
+ # Determine separator
169
+ sep = options[:sep] if options[:sep]
170
+
171
+ # Process fields line
172
+ preamble << line if line
173
+ while line && (TrueClass === header_hash || (String === header_hash && line.start_with?(header_hash)))
174
+ fields = line.split(sep, -1)
175
+ key_field = fields.shift
176
+ key_field = key_field.sub(header_hash, '') if String === header_hash && ! header_hash.empty?
177
+
178
+ line = (header_hash != "" ? stream.gets : nil)
179
+ line = Misc.fixutf8 line.chomp if line
180
+ preamble << line if line
181
+ break if TrueClass === header_hash || header_hash == ""
182
+ end
183
+
184
+ preamble = preamble[0..-3] * "\n"
185
+
186
+ line ||= stream.gets
187
+
188
+ first_line = line
189
+
190
+ [options, key_field, fields, first_line, preamble]
191
+ end
192
+
193
+ KEY_PARAMETERS = begin
194
+ params = []
195
+ (method(:parse_line).parameters + method(:parse_stream).parameters).each do |type, name|
196
+ params << name if type == :key
197
+ end
198
+ params
199
+ end
200
+
201
+ class Parser
202
+ attr_accessor :stream, :options, :key_field, :fields, :first_line, :preamble
203
+ def initialize(file, fix: true, header_hash: "#", sep: "\t")
204
+ if IO === file
205
+ @stream = file
206
+ else
207
+ @stream = Open.open(file)
208
+ end
209
+ @options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
210
+ @options[:sep] = sep if @options[:sep].nil?
211
+ end
212
+
213
+ def all_fields
214
+ [@key_field] + @fields
215
+ end
216
+
217
+ def traverse(key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
218
+ if fields
219
+ all_field_names ||= [@key_field] + @fields
220
+ positions = NamedArray.identify_name(all_field_names, fields)
221
+ kwargs[:positions] = positions
222
+ field_names = all_field_names.values_at *positions
223
+ else
224
+ field_names = @fields
225
+ end
226
+
227
+ if key_field
228
+ all_field_names ||= [@key_field] + @fields
229
+ key = NamedArray.identify_name(all_field_names, key_field)
230
+ kwargs[:key] = key
231
+ key_field_name = all_field_names[key]
232
+ if fields.nil?
233
+ field_names = all_field_names - [@key_field]
234
+ end
235
+ else
236
+ key_field_name = @key_field
237
+ end
238
+
239
+ @options.each do |option,value|
240
+ option = option.to_sym
241
+ next unless KEY_PARAMETERS.include? option
242
+ kwargs[option] = value unless kwargs.include?(option)
243
+ end
244
+
245
+ kwargs[:source_type] = @options[:type]
246
+ kwargs[:data] = false if kwargs[:data].nil?
247
+
248
+ data = TSV.parse_stream(@stream, first_line: @first_line, **kwargs, &block)
249
+
250
+ TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type) if data
251
+
252
+ data || self
253
+ end
254
+
255
+ end
256
+
257
+ def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, **kwargs, &block)
258
+ parser = TSV::Parser.new stream, fix: fix, header_hash: header_hash, sep: sep
259
+ kwargs = parser.options.merge(kwargs)
260
+
261
+ type = kwargs[:type] ||= :double
262
+ if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
263
+ TSV.setup(data, type: type)
264
+ data.extend TSVAdapter
265
+ end
266
+
267
+ kwargs[:data] = {} if kwargs[:data].nil?
268
+
269
+ data = parser.traverse **kwargs, &block
270
+ data.type = type
271
+ data.filename = filename
272
+ data.namespace = namespace
273
+ data
274
+ end
275
+
276
+ #def self.parse_alt(stream, key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
277
+ # options, key_field_name, field_names, first_line, preamble = parse_header(stream)
278
+
279
+ # if fields
280
+ # all_field_names ||= [key_field_name] + field_names
281
+ # positions = NamedArray.identify_name(all_field_names, fields)
282
+ # kwargs[:positions] = positions
283
+ # field_names = all_field_names.values_at *positions
284
+ # end
285
+
286
+ # if key_field
287
+ # all_field_names ||= [key_field_name] + field_names
288
+ # key = NamedArray.identify_name(all_field_names, key_field)
289
+ # kwargs[:key] = key
290
+ # key_field_name = all_field_names[key]
291
+ # if fields.nil?
292
+ # field_names = all_field_names - [key_field_name]
293
+ # end
294
+ # end
295
+
296
+ # options.each do |option,value|
297
+ # option = option.to_sym
298
+ # next unless KEY_PARAMETERS.include? option
299
+ # kwargs[option] = value unless kwargs.include?(option)
300
+ # end
301
+
302
+ # kwargs[:source_type] = options[:type]
303
+
304
+ # type = kwargs[:type] ||= :double
305
+ # if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
306
+ # TSV.setup(data, type: type, key_field: key_field_name, fields: field_names)
307
+ # data.extend TSVAdapter
308
+ # end
309
+
310
+ # data = parse_stream(stream, first_line: first_line, **kwargs, &block)
311
+
312
+ # TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => type, filename: filename, namespace: namespace)
313
+
314
+ # data
315
+ #end
316
+
317
+ end
@@ -0,0 +1,13 @@
1
+ module Path
2
+ def tsv(...)
3
+ found = self.find
4
+ found = self.set_extension('tsv').find unless found.exists?
5
+ TSV.open(found, ...)
6
+ end
7
+
8
+ def index(...)
9
+ found = self.find
10
+ found = self.set_extension('tsv').find unless found.exists?
11
+ TSV.index(found, ...)
12
+ end
13
+ end