scout-gear 6.0.0 → 7.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +465 -432
  3. data/VERSION +1 -1
  4. data/bin/scout +5 -1
  5. data/lib/rbbt-scout.rb +5 -0
  6. data/lib/scout/concurrent_stream.rb +6 -2
  7. data/lib/scout/config.rb +168 -0
  8. data/lib/scout/exceptions.rb +9 -0
  9. data/lib/scout/indiferent_hash/options.rb +1 -0
  10. data/lib/scout/indiferent_hash.rb +4 -2
  11. data/lib/scout/log/color.rb +31 -2
  12. data/lib/scout/log/progress/report.rb +1 -0
  13. data/lib/scout/log/progress/util.rb +3 -1
  14. data/lib/scout/log/progress.rb +7 -3
  15. data/lib/scout/log.rb +8 -3
  16. data/lib/scout/misc/digest.rb +1 -3
  17. data/lib/scout/misc/monitor.rb +3 -0
  18. data/lib/scout/misc/system.rb +15 -0
  19. data/lib/scout/misc.rb +1 -0
  20. data/lib/scout/named_array.rb +68 -0
  21. data/lib/scout/open/stream.rb +58 -26
  22. data/lib/scout/path/find.rb +27 -3
  23. data/lib/scout/path/util.rb +7 -4
  24. data/lib/scout/persist/serialize.rb +7 -14
  25. data/lib/scout/persist.rb +21 -1
  26. data/lib/scout/resource/produce.rb +7 -94
  27. data/lib/scout/resource/software.rb +176 -0
  28. data/lib/scout/tsv/dumper.rb +107 -0
  29. data/lib/scout/tsv/index.rb +49 -0
  30. data/lib/scout/tsv/parser.rb +317 -0
  31. data/lib/scout/tsv/path.rb +13 -0
  32. data/lib/scout/tsv/persist/adapter.rb +348 -0
  33. data/lib/scout/tsv/persist/tokyocabinet.rb +113 -0
  34. data/lib/scout/tsv/persist.rb +15 -0
  35. data/lib/scout/tsv/traverse.rb +48 -0
  36. data/lib/scout/tsv/util.rb +24 -0
  37. data/lib/scout/tsv.rb +27 -0
  38. data/lib/scout/work_queue/worker.rb +16 -11
  39. data/lib/scout/work_queue.rb +63 -21
  40. data/lib/scout/workflow/definition.rb +93 -4
  41. data/lib/scout/workflow/step/config.rb +18 -0
  42. data/lib/scout/workflow/step/dependencies.rb +40 -0
  43. data/lib/scout/workflow/step/file.rb +15 -0
  44. data/lib/scout/workflow/step/info.rb +33 -6
  45. data/lib/scout/workflow/step/provenance.rb +148 -0
  46. data/lib/scout/workflow/step.rb +70 -20
  47. data/lib/scout/workflow/task.rb +5 -4
  48. data/lib/scout/workflow/usage.rb +1 -1
  49. data/lib/scout/workflow.rb +11 -3
  50. data/lib/scout-gear.rb +1 -0
  51. data/lib/scout.rb +1 -0
  52. data/scout-gear.gemspec +38 -3
  53. data/scout_commands/find +1 -1
  54. data/scout_commands/workflow/task +16 -10
  55. data/share/software/install_helpers +523 -0
  56. data/test/scout/log/test_progress.rb +0 -2
  57. data/test/scout/misc/test_system.rb +21 -0
  58. data/test/scout/open/test_stream.rb +160 -1
  59. data/test/scout/path/test_find.rb +14 -7
  60. data/test/scout/resource/test_software.rb +24 -0
  61. data/test/scout/test_config.rb +66 -0
  62. data/test/scout/test_meta_extension.rb +10 -0
  63. data/test/scout/test_named_array.rb +19 -0
  64. data/test/scout/test_persist.rb +35 -0
  65. data/test/scout/test_semaphore.rb +1 -1
  66. data/test/scout/test_tmpfile.rb +2 -2
  67. data/test/scout/test_tsv.rb +74 -0
  68. data/test/scout/test_work_queue.rb +63 -8
  69. data/test/scout/tsv/persist/test_adapter.rb +34 -0
  70. data/test/scout/tsv/persist/test_tokyocabinet.rb +92 -0
  71. data/test/scout/tsv/test_dumper.rb +44 -0
  72. data/test/scout/tsv/test_index.rb +64 -0
  73. data/test/scout/tsv/test_parser.rb +173 -0
  74. data/test/scout/tsv/test_persist.rb +36 -0
  75. data/test/scout/tsv/test_traverse.rb +9 -0
  76. data/test/scout/tsv/test_util.rb +0 -0
  77. data/test/scout/work_queue/test_worker.rb +49 -1
  78. data/test/scout/workflow/step/test_dependencies.rb +25 -0
  79. data/test/scout/workflow/step/test_info.rb +15 -17
  80. data/test/scout/workflow/step/test_load.rb +16 -18
  81. data/test/scout/workflow/step/test_provenance.rb +25 -0
  82. data/test/scout/workflow/test_step.rb +206 -10
  83. data/test/scout/workflow/test_task.rb +0 -3
  84. data/test/test_helper.rb +6 -0
  85. metadata +37 -2
@@ -0,0 +1,107 @@
1
+ module TSV
2
+ class Dumper
3
+ def self.header_lines(key_field, fields, entry_hash = nil)
4
+ if Hash === entry_hash
5
+ sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
6
+ preamble = entry_hash[:preamble]
7
+ header_hash = entry_hash[:header_hash]
8
+ end
9
+
10
+ header_hash = "#" if header_hash.nil?
11
+
12
+ preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
13
+
14
+ str = ""
15
+ str << preamble.strip << "\n" if preamble and not preamble.empty?
16
+ if fields
17
+ if fields.empty?
18
+ str << header_hash << (key_field || "ID").to_s << "\n"
19
+ else
20
+ str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
21
+ end
22
+ end
23
+
24
+ str
25
+ end
26
+
27
+ def self.header(options={})
28
+ key_field, fields, sep, header_hash, preamble = IndiferentHash.process_options options,
29
+ :key_field, :fields, :sep, :header_hash, :preamble,
30
+ :sep => "\t", :header_hash => "#", :preamble => true
31
+
32
+ if fields.nil? || key_field.nil?
33
+ fields_str = nil
34
+ else
35
+ fields_str = "#{header_hash}#{key_field}#{sep}#{fields*sep}"
36
+ end
37
+
38
+ if preamble && options.values.compact.any?
39
+ preamble_str = "#: " << IndiferentHash.hash2string(options)
40
+ else
41
+ preamble_str = nil
42
+ end
43
+
44
+ [preamble_str, fields_str].compact * "\n"
45
+ end
46
+
47
+
48
+ attr_accessor :options
49
+ def initialize(options = {})
50
+ @sep, @type = IndiferentHash.process_options options,
51
+ :sep, :type,
52
+ :sep => "\t", :type => :double
53
+ @options = options
54
+ @sout, @sin = Open.pipe
55
+ ConcurrentStream.setup(@sin, :pair => @sout)
56
+ ConcurrentStream.setup(@sout, :pair => @sin)
57
+ end
58
+
59
+ def init
60
+ header = Dumper.header(@options.merge(:type => @type, :sep => @sep))
61
+ @sin.puts header if header and ! header.empty?
62
+ end
63
+
64
+ def add(key, value)
65
+
66
+ case @type
67
+ when :single
68
+ @sin.puts key + @sep + value
69
+ when :list, :flat
70
+ @sin.puts key + @sep + value * @sep
71
+ when :double
72
+ @sin.puts key + @sep + value.collect{|v| v * "|" } * @sep
73
+ end
74
+ end
75
+
76
+ def close
77
+ @sin.close
78
+ @sin.join
79
+ end
80
+
81
+ def stream
82
+ @sout
83
+ end
84
+
85
+ def abort(exception=nil)
86
+ @sin.abort(exception)
87
+ end
88
+ end
89
+
90
+ def stream
91
+ iii self.extension_attr_hash
92
+ dumper = TSV::Dumper.new self.extension_attr_hash
93
+ dumper.init
94
+ Thread.new do
95
+ Thread.current["name"] = "Dumper thread"
96
+ self.each do |k,v|
97
+ dumper.add k, v
98
+ end
99
+ dumper.close
100
+ end
101
+ dumper.stream
102
+ end
103
+
104
+ def to_s
105
+ stream.read
106
+ end
107
+ end
@@ -0,0 +1,49 @@
1
+ require_relative 'parser'
2
+ module TSV
3
+ def self.index(tsv_file, target: 0, order: true, **kwargs)
4
+ persist, type = IndiferentHash.process_options kwargs,
5
+ :persist, :persist_type,
6
+ :persist => false, :persist_type => "HDB"
7
+ kwargs.delete :type
8
+
9
+ Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :persist_prefix => "Index")) do |filename|
10
+ if filename
11
+ index = ScoutCabinet.open(filename, true, type)
12
+ TSV.setup(index, :type => :single)
13
+ index.extend TSVAdapter
14
+ else
15
+ index = TSV.setup({}, :type => :single)
16
+ end
17
+
18
+ dummy_data = nil
19
+ if order
20
+ tmp_index = {}
21
+ dummy_data = Open.open(tsv_file) do |file|
22
+ TSV.parse file, key_field: target, type: :double, **kwargs do |k,values|
23
+ values.each_with_index do |list,i|
24
+ list.each do |e|
25
+ tmp_index[e] ||= []
26
+ tmp_index[e][i] ||= []
27
+ tmp_index[e][i] << k
28
+ end
29
+ end
30
+ end
31
+ end
32
+ tmp_index.each do |e,list|
33
+ index[e] = list.flatten.compact.uniq.first
34
+ end
35
+ else
36
+ dummy_data = Open.open(tsv_file) do |file|
37
+ TSV.parse file, key_field: target, type: :flat, **kwargs do |k,values|
38
+ values.each do |e|
39
+ index[e] = k unless index.include?(e)
40
+ end
41
+ end
42
+ end
43
+ end
44
+ index.key_field = dummy_data.fields * ", "
45
+ index.fields = [dummy_data.key_field]
46
+ index
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,317 @@
1
+ require_relative '../named_array'
2
+ module TSV
3
+ def self.cast_value(value, cast)
4
+ if Array === value
5
+ value.collect{|e| cast_value(e, cast) }
6
+ else
7
+ value.send(cast)
8
+ end
9
+ end
10
+
11
+ def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil)
12
+ items = line.split(sep, -1)
13
+
14
+ if positions.nil? && key == 0
15
+ key = items.shift
16
+ elsif positions.nil?
17
+ key = items.delete_at(key)
18
+ key = key.split(sep2) if type == :double
19
+ else
20
+ key, items = items[key], items.values_at(*positions)
21
+ key = key.split(sep2) if type == :double
22
+ end
23
+
24
+ items = case type
25
+ when :list
26
+ items
27
+ when :single
28
+ items.first
29
+ when :flat
30
+ [items]
31
+ when :double
32
+ items.collect{|i| i.split(sep2, -1) }
33
+ end
34
+
35
+
36
+ if cast
37
+ items = cast_value(items, cast)
38
+ end
39
+
40
+ [key, items]
41
+ end
42
+
43
+ def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, **kargs, &block)
44
+ begin
45
+ bar = Log::ProgressBar.new_bar(bar) if bar
46
+
47
+ source_type = type if source_type.nil?
48
+
49
+ data = {} if data.nil?
50
+ merge = false if type != :double
51
+ line = first_line || stream.gets
52
+ while line
53
+ begin
54
+ line.strip!
55
+ line = Misc.fixutf8(line) if fix
56
+ bar.tick if bar
57
+ key, items = parse_line(line, type: source_type, **kargs)
58
+
59
+ if Array === key
60
+ keys = key
61
+ if one2one
62
+ key_items = keys.length.times.collect{|i| items.collect{|list| [list[i] || list[0]] } }
63
+ else
64
+ key_items = false
65
+ end
66
+ else
67
+ keys = [key]
68
+ key_items = false
69
+ end
70
+
71
+ keys.each_with_index do |key,i|
72
+ if key_items
73
+ these_items = key_items[i]
74
+ else
75
+ these_items = items
76
+ end
77
+
78
+ these_items = case [source_type, type]
79
+ when [:single, :single]
80
+ these_items
81
+ when [:list, :single]
82
+ these_items.first
83
+ when [:flat, :single]
84
+ these_items.first
85
+ when [:double, :single]
86
+ these_items.first.first
87
+ when [:single, :list]
88
+ [these_items]
89
+ when [:list, :list]
90
+ these_items
91
+ when [:flat, :list]
92
+ these_items
93
+ when [:double, :list]
94
+ these_items.collect{|l| l.first }
95
+ when [:single, :flat]
96
+ [these_items]
97
+ when [:list, :flat]
98
+ these_items
99
+ when [:flat, :flat]
100
+ these_items
101
+ when [:double, :flat]
102
+ these_items.flatten
103
+ when [:single, :double]
104
+ [[these_items]]
105
+ when [:list, :double]
106
+ these_items.collect{|l| [l] }
107
+ when [:flat, :double]
108
+ [these_items]
109
+ when [:double, :double]
110
+ these_items
111
+ end
112
+
113
+ if block_given?
114
+ res = block.call(key, these_items)
115
+ data[key] = res unless res.nil? || FalseClass === data
116
+ next
117
+ end
118
+
119
+ if ! merge || ! data.include?(key)
120
+ data[key] = these_items
121
+ else
122
+ current = data[key]
123
+ if merge == :concat
124
+ these_items.each_with_index do |new,i|
125
+ next if new.empty?
126
+ current[i].concat(new)
127
+ end
128
+ else
129
+ merged = []
130
+ these_items.each_with_index do |new,i|
131
+ next if new.empty?
132
+ merged[i] = current[i] + new
133
+ end
134
+ data[key] = merged
135
+ end
136
+ end
137
+ end
138
+ ensure
139
+ line = stream.gets
140
+ end
141
+ end
142
+ data
143
+ ensure
144
+ Log::ProgressBar.remove_bar(bar) if bar
145
+ end
146
+ end
147
+
148
+ def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
149
+ raise "Closed stream" if IO === stream && stream.closed?
150
+
151
+ options = {}
152
+ preamble = []
153
+
154
+ # Get line
155
+
156
+ #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
157
+ line = stream.gets
158
+ return {} if line.nil?
159
+ line = Misc.fixutf8 line.chomp if fix
160
+
161
+ # Process options line
162
+ if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
163
+ options = IndiferentHash.string2hash m.captures.first.chomp
164
+ line = stream.gets
165
+ line = Misc.fixutf8 line.chomp if line && fix
166
+ end
167
+
168
+ # Determine separator
169
+ sep = options[:sep] if options[:sep]
170
+
171
+ # Process fields line
172
+ preamble << line if line
173
+ while line && (TrueClass === header_hash || (String === header_hash && line.start_with?(header_hash)))
174
+ fields = line.split(sep, -1)
175
+ key_field = fields.shift
176
+ key_field = key_field.sub(header_hash, '') if String === header_hash && ! header_hash.empty?
177
+
178
+ line = (header_hash != "" ? stream.gets : nil)
179
+ line = Misc.fixutf8 line.chomp if line
180
+ preamble << line if line
181
+ break if TrueClass === header_hash || header_hash == ""
182
+ end
183
+
184
+ preamble = preamble[0..-3] * "\n"
185
+
186
+ line ||= stream.gets
187
+
188
+ first_line = line
189
+
190
+ [options, key_field, fields, first_line, preamble]
191
+ end
192
+
193
+ KEY_PARAMETERS = begin
194
+ params = []
195
+ (method(:parse_line).parameters + method(:parse_stream).parameters).each do |type, name|
196
+ params << name if type == :key
197
+ end
198
+ params
199
+ end
200
+
201
+ class Parser
202
+ attr_accessor :stream, :options, :key_field, :fields, :first_line, :preamble
203
+ def initialize(file, fix: true, header_hash: "#", sep: "\t")
204
+ if IO === file
205
+ @stream = file
206
+ else
207
+ @stream = Open.open(file)
208
+ end
209
+ @options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
210
+ @options[:sep] = sep if @options[:sep].nil?
211
+ end
212
+
213
+ def all_fields
214
+ [@key_field] + @fields
215
+ end
216
+
217
+ def traverse(key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
218
+ if fields
219
+ all_field_names ||= [@key_field] + @fields
220
+ positions = NamedArray.identify_name(all_field_names, fields)
221
+ kwargs[:positions] = positions
222
+ field_names = all_field_names.values_at *positions
223
+ else
224
+ field_names = @fields
225
+ end
226
+
227
+ if key_field
228
+ all_field_names ||= [@key_field] + @fields
229
+ key = NamedArray.identify_name(all_field_names, key_field)
230
+ kwargs[:key] = key
231
+ key_field_name = all_field_names[key]
232
+ if fields.nil?
233
+ field_names = all_field_names - [@key_field]
234
+ end
235
+ else
236
+ key_field_name = @key_field
237
+ end
238
+
239
+ @options.each do |option,value|
240
+ option = option.to_sym
241
+ next unless KEY_PARAMETERS.include? option
242
+ kwargs[option] = value unless kwargs.include?(option)
243
+ end
244
+
245
+ kwargs[:source_type] = @options[:type]
246
+ kwargs[:data] = false if kwargs[:data].nil?
247
+
248
+ data = TSV.parse_stream(@stream, first_line: @first_line, **kwargs, &block)
249
+
250
+ TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type) if data
251
+
252
+ data || self
253
+ end
254
+
255
+ end
256
+
257
+ def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, **kwargs, &block)
258
+ parser = TSV::Parser.new stream, fix: fix, header_hash: header_hash, sep: sep
259
+ kwargs = parser.options.merge(kwargs)
260
+
261
+ type = kwargs[:type] ||= :double
262
+ if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
263
+ TSV.setup(data, type: type)
264
+ data.extend TSVAdapter
265
+ end
266
+
267
+ kwargs[:data] = {} if kwargs[:data].nil?
268
+
269
+ data = parser.traverse **kwargs, &block
270
+ data.type = type
271
+ data.filename = filename
272
+ data.namespace = namespace
273
+ data
274
+ end
275
+
276
+ #def self.parse_alt(stream, key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
277
+ # options, key_field_name, field_names, first_line, preamble = parse_header(stream)
278
+
279
+ # if fields
280
+ # all_field_names ||= [key_field_name] + field_names
281
+ # positions = NamedArray.identify_name(all_field_names, fields)
282
+ # kwargs[:positions] = positions
283
+ # field_names = all_field_names.values_at *positions
284
+ # end
285
+
286
+ # if key_field
287
+ # all_field_names ||= [key_field_name] + field_names
288
+ # key = NamedArray.identify_name(all_field_names, key_field)
289
+ # kwargs[:key] = key
290
+ # key_field_name = all_field_names[key]
291
+ # if fields.nil?
292
+ # field_names = all_field_names - [key_field_name]
293
+ # end
294
+ # end
295
+
296
+ # options.each do |option,value|
297
+ # option = option.to_sym
298
+ # next unless KEY_PARAMETERS.include? option
299
+ # kwargs[option] = value unless kwargs.include?(option)
300
+ # end
301
+
302
+ # kwargs[:source_type] = options[:type]
303
+
304
+ # type = kwargs[:type] ||= :double
305
+ # if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
306
+ # TSV.setup(data, type: type, key_field: key_field_name, fields: field_names)
307
+ # data.extend TSVAdapter
308
+ # end
309
+
310
+ # data = parse_stream(stream, first_line: first_line, **kwargs, &block)
311
+
312
+ # TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => type, filename: filename, namespace: namespace)
313
+
314
+ # data
315
+ #end
316
+
317
+ end
@@ -0,0 +1,13 @@
1
+ module Path
2
+ def tsv(...)
3
+ found = self.find
4
+ found = self.set_extension('tsv').find unless found.exists?
5
+ TSV.open(found, ...)
6
+ end
7
+
8
+ def index(...)
9
+ found = self.find
10
+ found = self.set_extension('tsv').find unless found.exists?
11
+ TSV.index(found, ...)
12
+ end
13
+ end