scout-gear 8.0.0 → 8.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +26 -9
  3. data/Rakefile +6 -1
  4. data/VERSION +1 -1
  5. data/bin/scout +15 -4
  6. data/doc/lib/scout/path.md +35 -0
  7. data/doc/lib/scout/workflow/task.md +13 -0
  8. data/lib/scout/cmd.rb +23 -24
  9. data/lib/scout/concurrent_stream.rb +36 -19
  10. data/lib/scout/exceptions.rb +10 -0
  11. data/lib/scout/log/color.rb +11 -11
  12. data/lib/scout/log/progress/report.rb +7 -5
  13. data/lib/scout/log/progress/util.rb +3 -0
  14. data/lib/scout/log/trap.rb +3 -3
  15. data/lib/scout/log.rb +64 -36
  16. data/lib/scout/meta_extension.rb +34 -0
  17. data/lib/scout/misc/digest.rb +11 -2
  18. data/lib/scout/misc/format.rb +12 -7
  19. data/lib/scout/misc/monitor.rb +11 -0
  20. data/lib/scout/misc/system.rb +48 -0
  21. data/lib/scout/named_array.rb +8 -0
  22. data/lib/scout/offsite/ssh.rb +171 -0
  23. data/lib/scout/offsite/step.rb +83 -0
  24. data/lib/scout/offsite/sync.rb +55 -0
  25. data/lib/scout/offsite.rb +3 -0
  26. data/lib/scout/open/lock.rb +5 -24
  27. data/lib/scout/open/remote.rb +12 -1
  28. data/lib/scout/open/stream.rb +110 -122
  29. data/lib/scout/open/util.rb +9 -0
  30. data/lib/scout/open.rb +5 -4
  31. data/lib/scout/path/find.rb +15 -10
  32. data/lib/scout/path/util.rb +5 -0
  33. data/lib/scout/persist/serialize.rb +3 -3
  34. data/lib/scout/persist.rb +1 -1
  35. data/lib/scout/resource/path.rb +4 -0
  36. data/lib/scout/resource/util.rb +10 -4
  37. data/lib/scout/tsv/dumper.rb +2 -0
  38. data/lib/scout/tsv/index.rb +28 -86
  39. data/lib/scout/tsv/open.rb +35 -14
  40. data/lib/scout/tsv/parser.rb +9 -2
  41. data/lib/scout/tsv/persist/tokyocabinet.rb +2 -0
  42. data/lib/scout/tsv/stream.rb +204 -0
  43. data/lib/scout/tsv/transformer.rb +11 -0
  44. data/lib/scout/tsv.rb +9 -2
  45. data/lib/scout/work_queue/worker.rb +2 -2
  46. data/lib/scout/work_queue.rb +36 -12
  47. data/lib/scout/workflow/definition.rb +2 -1
  48. data/lib/scout/workflow/deployment/orchestrator.rb +245 -0
  49. data/lib/scout/workflow/deployment.rb +1 -0
  50. data/lib/scout/workflow/step/dependencies.rb +37 -11
  51. data/lib/scout/workflow/step/file.rb +5 -0
  52. data/lib/scout/workflow/step/info.rb +5 -3
  53. data/lib/scout/workflow/step/load.rb +1 -1
  54. data/lib/scout/workflow/step/provenance.rb +1 -0
  55. data/lib/scout/workflow/step/status.rb +6 -8
  56. data/lib/scout/workflow/step.rb +75 -30
  57. data/lib/scout/workflow/task/dependencies.rb +114 -0
  58. data/lib/scout/workflow/task/inputs.rb +27 -13
  59. data/lib/scout/workflow/task.rb +9 -108
  60. data/lib/scout/workflow/usage.rb +40 -12
  61. data/lib/scout/workflow.rb +4 -2
  62. data/lib/scout-gear.rb +2 -0
  63. data/lib/scout.rb +6 -0
  64. data/scout-gear.gemspec +32 -7
  65. data/scout_commands/doc +37 -0
  66. data/scout_commands/find +1 -0
  67. data/scout_commands/offsite +30 -0
  68. data/scout_commands/update +29 -0
  69. data/scout_commands/workflow/info +15 -3
  70. data/scout_commands/workflow/install +102 -0
  71. data/scout_commands/workflow/task +26 -5
  72. data/test/scout/offsite/test_ssh.rb +15 -0
  73. data/test/scout/offsite/test_step.rb +33 -0
  74. data/test/scout/offsite/test_sync.rb +36 -0
  75. data/test/scout/offsite/test_task.rb +0 -0
  76. data/test/scout/resource/test_path.rb +6 -0
  77. data/test/scout/test_named_array.rb +6 -0
  78. data/test/scout/test_persist.rb +3 -2
  79. data/test/scout/test_tsv.rb +17 -0
  80. data/test/scout/test_work_queue.rb +63 -41
  81. data/test/scout/tsv/persist/test_adapter.rb +1 -1
  82. data/test/scout/tsv/test_index.rb +14 -0
  83. data/test/scout/tsv/test_parser.rb +14 -0
  84. data/test/scout/tsv/test_stream.rb +200 -0
  85. data/test/scout/tsv/test_transformer.rb +12 -0
  86. data/test/scout/workflow/deployment/test_orchestrator.rb +272 -0
  87. data/test/scout/workflow/step/test_dependencies.rb +68 -0
  88. data/test/scout/workflow/step/test_info.rb +18 -0
  89. data/test/scout/workflow/step/test_status.rb +0 -1
  90. data/test/scout/workflow/task/test_dependencies.rb +355 -0
  91. data/test/scout/workflow/task/test_inputs.rb +53 -0
  92. data/test/scout/workflow/test_definition.rb +18 -0
  93. data/test/scout/workflow/test_documentation.rb +24 -0
  94. data/test/scout/workflow/test_step.rb +109 -0
  95. data/test/scout/workflow/test_task.rb +0 -287
  96. data/test/test_scout.rb +9 -0
  97. metadata +83 -5
  98. data/scout_commands/workflow/task_old +0 -706
@@ -8,7 +8,6 @@ module Resource
8
8
 
9
9
  map_order ||= (path_maps.keys & Path.basic_map_order) + (path_maps.keys - Path.basic_map_order)
10
10
  map_order -= [:current, "current"]
11
- map_order << :current
12
11
 
13
12
  choices = []
14
13
  map_order.uniq.each do |name|
@@ -17,6 +16,7 @@ module Resource
17
16
  next if pattern.nil?
18
17
 
19
18
  pattern = pattern.sub('{PWD}', Dir.pwd)
19
+ pattern = pattern.sub('{HOME}', ENV["HOME"])
20
20
  if String === pattern and pattern.include?('{')
21
21
  regexp = "^" + pattern
22
22
  .gsub(/{(TOPLEVEL)}/,'(?<\1>[^/]+)')
@@ -35,14 +35,20 @@ module Resource
35
35
  end
36
36
  end
37
37
 
38
- Path.setup(choices.sort_by{|s| s.length }.first, self, nil, path_maps)
38
+ identified = choices.sort_by{|s| s.length }.first
39
+
40
+ Path.setup(identified || path, self, nil, path_maps)
39
41
  end
40
42
 
41
- def self.relocate(path)
42
- return path if Open.exists?(path)
43
+ def self.identify(path)
43
44
  resource = path.pkgdir if Path === path
44
45
  resource = Scout unless Resource === resource
45
46
  unlocated = resource.identify path
47
+ end
48
+
49
+ def self.relocate(path)
50
+ return path if Open.exists?(path)
51
+ unlocated = identify(path)
46
52
  unlocated.find
47
53
  end
48
54
  end
@@ -28,7 +28,9 @@ module TSV
28
28
  :sep, :type,
29
29
  :sep => "\t", :type => :double
30
30
  @options = options
31
+ @options[:type] = @type
31
32
  @sout, @sin = Open.pipe
33
+ Log.low{"Dumper pipe #{[Log.fingerprint(@sin), Log.fingerprint(@sout)] * " -> "}"}
32
34
  @initialized = false
33
35
  @mutex = Mutex.new
34
36
  ConcurrentStream.setup(@sin, pair: @sout)
@@ -21,12 +21,14 @@ module TSV
21
21
 
22
22
  tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
23
23
 
24
- bar = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}" if TrueClass === bar
24
+ log_msg = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}"
25
+ Log.low log_msg
26
+ bar = log_msg if TrueClass === bar
25
27
 
26
28
  if order
27
29
  tmp_index = {}
28
30
  include_self = fields == :all || (Array === fields) && fields.include?(target)
29
- target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :double, unnamed: true, bar: bar, **kwargs do |k,values|
31
+ target_key_field, source_field_names = Open.traverse tsv_file, type: :double, key_field: target, fields: fields, unnamed: true, bar: bar, **kwargs do |k,values|
30
32
  tmp_index[k] ||= [[k]] if include_self
31
33
  values.each_with_index do |list,i|
32
34
  i += 1 if include_self
@@ -66,19 +68,26 @@ module TSV
66
68
  TSV.index(self, *args, **kwargs, &block)
67
69
  end
68
70
 
69
- def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, **kwargs)
71
+ def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs)
70
72
  persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
71
73
  :persist, :persist_type, :persist_update, :data_persist,
72
74
  :persist => false, :persist_type => :fwt
73
75
  kwargs.delete :type
76
+ kwargs[:unnamed] = true
74
77
 
75
- Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :prefix => "RangeIndex", :other_options => kwargs, update: persist_update)) do |filename|
78
+ Persist.persist(tsv_file, type,
79
+ :persist => persist, :prefix => "RangeIndex[#{[start_field, end_field]*"-"}]", update: persist_update,
80
+ :other_options => kwargs) do |filename|
76
81
 
77
82
  tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
78
83
 
84
+ log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{[start_field, end_field]*"-"}"
85
+ Log.low log_msg
86
+ bar = log_msg if TrueClass === bar
87
+
79
88
  max_key_size = 0
80
89
  index_data = []
81
- TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], **kwargs do |key, values|
90
+ TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], bar: bar, **kwargs do |key, values|
82
91
  key_size = key.length
83
92
  max_key_size = key_size if key_size > max_key_size
84
93
 
@@ -100,23 +109,26 @@ module TSV
100
109
  end
101
110
  end
102
111
 
103
- def range_index(*args, **kwargs, &block)
104
- TSV.range_index(self, *args, **kwargs, &block)
105
- end
106
-
107
- def self.pos_index(tsv_file, pos_field = nil, key_field: :key, **kwargs)
112
+ def self.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs)
108
113
  persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
109
114
  :persist, :persist_type, :persist_update, :data_persist,
110
115
  :persist => false, :persist_type => :fwt
111
116
  kwargs.delete :type
117
+ kwargs[:unnamed] = true
112
118
 
113
- Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, update: persist_update, :prefix => "RangeIndex", :other_options => kwargs)) do |filename|
119
+ Persist.persist(tsv_file, type,
120
+ :persist => persist, :prefix => "RangeIndex[#{pos_field}]", update: persist_update,
121
+ :other_options => kwargs) do |filename|
114
122
 
115
123
  tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
116
124
 
125
+ log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{pos_field}"
126
+ Log.low log_msg
127
+ bar = log_msg if TrueClass === bar
128
+
117
129
  max_key_size = 0
118
130
  index_data = []
119
- TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, **kwargs do |key, pos|
131
+ TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, bar: bar, **kwargs do |key, pos|
120
132
  key_size = key.length
121
133
  max_key_size = key_size if key_size > max_key_size
122
134
 
@@ -137,81 +149,11 @@ module TSV
137
149
  end
138
150
  end
139
151
 
152
+ def range_index(*args, **kwargs, &block)
153
+ TSV.range_index(self, *args, **kwargs, &block)
154
+ end
155
+
140
156
  def pos_index(*args, **kwargs, &block)
141
157
  TSV.pos_index(self, *args, **kwargs, &block)
142
158
  end
143
-
144
-
145
- #def range_index(start_field = nil, end_field = nil, options = {})
146
- # start_field ||= "Start"
147
- # end_field ||= "End"
148
-
149
- # options = Misc.add_defaults options,
150
- # :persist => false, :persist_file => nil, :persist_update => false
151
-
152
- # persist_options = Misc.pull_keys options, :persist
153
- # persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"
154
-
155
- # Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do
156
- # max_key_size = 0
157
- # index_data = []
158
- # with_unnamed do
159
- # with_monitor :desc => "Creating Index Data", :step => 10000 do
160
- # through :key, [start_field, end_field] do |key, values|
161
- # key_size = key.length
162
- # max_key_size = key_size if key_size > max_key_size
163
-
164
- # start_pos, end_pos = values
165
- # if Array === start_pos
166
- # start_pos.zip(end_pos).each do |s,e|
167
- # index_data << [key, [s.to_i, e.to_i]]
168
- # end
169
- # else
170
- # index_data << [key, [start_pos.to_i, end_pos.to_i]]
171
- # end
172
- # end
173
- # end
174
- # end
175
-
176
- # index = FixWidthTable.get(:memory, max_key_size, true)
177
- # index.add_range index_data
178
- # index.read
179
- # index
180
- # end
181
- #end
182
-
183
- #def self.range_index(file, start_field = nil, end_field = nil, options = {})
184
- # start_field ||= "Start"
185
- # end_field ||= "End"
186
-
187
- # data_options = Misc.pull_keys options, :data
188
- # filename = case
189
- # when (String === file or Path === file)
190
- # file
191
- # when file.respond_to?(:filename)
192
- # file.filename
193
- # else
194
- # file.object_id.to_s
195
- # end
196
- # persist_options = Misc.pull_keys options, :persist
197
- # persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
198
-
199
- # filters = Misc.process_options options, :filters
200
-
201
- # if filters
202
- # filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
203
- # end
204
-
205
- # Persist.persist(filename, :fwt, persist_options) do
206
- # tsv = TSV.open(file, data_options)
207
- # if filters
208
- # tsv.filter
209
- # filters.each do |match, value|
210
- # tsv.add_filter match, value
211
- # end
212
- # end
213
-
214
- # tsv.range_index(start_field, end_field, options)
215
- # end
216
- #end
217
159
  end
@@ -34,7 +34,7 @@ module Open
34
34
  end
35
35
 
36
36
  def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, keep_open: false, **options, &block)
37
- cpus = nil if cpus == 1
37
+ cpus = nil if cpus.to_i == 1
38
38
 
39
39
  if into == :stream
40
40
  sout, sin = Open.pipe
@@ -68,7 +68,15 @@ module Open
68
68
  bar.remove($!) if bar
69
69
  end
70
70
  end
71
+
71
72
  Thread.pass until into_thread["name"]
73
+
74
+ case into
75
+ when IO
76
+ ConcurrentStream.setup into, :threads => into_thread
77
+ when TSV::Dumper
78
+ ConcurrentStream.setup into.stream, :threads => into_thread
79
+ end
72
80
  return into
73
81
  end
74
82
  end
@@ -82,27 +90,29 @@ module Open
82
90
  callback.call res
83
91
  end
84
92
 
85
- self.traverse(obj, **options) do |*args|
86
- queue.write args
87
- end
88
-
89
93
  begin
94
+ self.traverse(obj, **options) do |*args|
95
+ queue.write args
96
+ end
97
+
90
98
  queue.close
91
99
 
92
- queue.join
100
+ queue.join(false)
93
101
 
94
102
  bar.remove if bar
95
103
  return into
96
104
  rescue Exception
97
105
  bar.remove($!) if bar
106
+ queue.abort
98
107
  raise $!
108
+ ensure
109
+ queue.clean
99
110
  end
100
111
  end
101
112
 
102
113
  begin
103
114
  res = case obj
104
115
  when TSV
105
- #obj.traverse options[:key_field], options[:fields], unnamed: unnamed, **options do |k,v,f|
106
116
  obj.traverse unnamed: unnamed, **options do |k,v,f|
107
117
  res = block.call(k, v, f)
108
118
  callback.call res if callback
@@ -122,11 +132,21 @@ module Open
122
132
  raise obj.exception if obj.error?
123
133
  self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
124
134
  when IO
125
- parser = TSV::Parser.new obj
126
- parser.traverse **options do |k,v,f|
127
- res = block.call k,v,f
128
- callback.call res if callback
129
- nil
135
+ if options[:type] == :array || options[:type] == :line
136
+ Log.low "Traverse stream by lines #{Log.fingerprint obj}"
137
+ while line = obj.gets
138
+ line.strip!
139
+ res = block.call(line)
140
+ callback.call res if callback
141
+ end
142
+ else
143
+ Log.low "Traverse stream with parser #{Log.fingerprint obj}"
144
+ parser = TSV::Parser.new obj
145
+ parser.traverse **options do |k,v,f|
146
+ res = block.call k,v,f
147
+ callback.call res if callback
148
+ nil
149
+ end
130
150
  end
131
151
  when TSV::Parser
132
152
  obj.traverse **options do |k,v,f|
@@ -142,9 +162,10 @@ module Open
142
162
  end
143
163
  end
144
164
  bar.remove if bar
145
- rescue
165
+ rescue Exception => exception
166
+ exception = obj.stream_exception if (ConcurrentStream === obj) && obj.stream_exception
146
167
  bar.error if bar
147
- raise $!
168
+ raise exception
148
169
  end
149
170
 
150
171
  into || res
@@ -190,6 +190,11 @@ module TSV
190
190
  end
191
191
 
192
192
  def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
193
+ if (Path === stream) || ((String === stream) && Path.is_filename?(stream))
194
+ Open.open(stream) do |f|
195
+ return parse_header(f, fix: fix, header_hash: header_hash, sep: sep)
196
+ end
197
+ end
193
198
  raise "Closed stream" if IO === stream && stream.closed?
194
199
 
195
200
  opts = {}
@@ -240,7 +245,7 @@ module TSV
240
245
  opts[:type] = opts[:type].to_sym if opts[:type]
241
246
  opts[:cast] = opts[:cast].to_sym if opts[:cast]
242
247
 
243
- [opts, key_field, fields, first_line, preamble]
248
+ NamedArray.setup([opts, key_field, fields, first_line, preamble], %w(options key_field fields first_line preamble))
244
249
  end
245
250
 
246
251
  KEY_PARAMETERS = begin
@@ -263,7 +268,7 @@ module TSV
263
268
  @options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
264
269
  @options[:sep] = sep if @options[:sep].nil?
265
270
  @options.merge!(:key_field => @key_field, :fields => @fields)
266
- @type = type
271
+ @type = @options[:type] || type
267
272
  end
268
273
 
269
274
  def all_fields
@@ -304,6 +309,8 @@ module TSV
304
309
  field_names = @fields
305
310
  end
306
311
 
312
+ kwargs[:positions] = nil if @type == :flat
313
+
307
314
  if key_field
308
315
  if @fields
309
316
  all_field_names ||= [@key_field] + @fields
@@ -42,6 +42,8 @@ module ScoutCabinet
42
42
 
43
43
  database.open(path, tokyocabinet_class::OREADER)
44
44
 
45
+ database.define_singleton_method(:fingerprint){ "#{self.persistence_class}:#{self.persistence_path}" }
46
+
45
47
  Persist::CONNECTIONS[path] = database
46
48
 
47
49
  database
@@ -0,0 +1,204 @@
1
+ module TSV
2
+ def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, field_prefix: nil)
3
+
4
+ streams = streams.collect do |stream|
5
+ case stream
6
+ when(defined? Step and Step)
7
+ stream.stream
8
+ when Path
9
+ stream.open
10
+ when TSV::Dumper
11
+ stream.stream
12
+ else
13
+ stream
14
+ end
15
+ end.compact
16
+
17
+ num_streams = streams.length
18
+
19
+ streams = streams.collect do |stream|
20
+ Open.sort_stream(stream, memory: sort_memory)
21
+ end if sort
22
+
23
+ begin
24
+
25
+ lines =[]
26
+ fields =[]
27
+ sizes =[]
28
+ key_fields =[]
29
+ input_options =[]
30
+ empty =[]
31
+ preambles =[]
32
+ parser_types =[]
33
+
34
+ type ||= :double
35
+
36
+ streams = streams.collect do |stream|
37
+
38
+ parser = TSV::Parser.new stream, type: type
39
+
40
+ sfields = parser.fields
41
+
42
+ if field_prefix
43
+ index = streams.index stream
44
+ prefix = field_prefix[index]
45
+
46
+ sfields = sfields.collect{|f|[prefix, f]* ":"}
47
+ end
48
+
49
+ first_line = parser.first_line
50
+ first_line = nil if first_line == ""
51
+
52
+ lines << first_line
53
+ key_fields << parser.key_field
54
+ fields << sfields
55
+ sizes << sfields.length if sfields
56
+ input_options << parser.options
57
+ preambles << parser.preamble if preamble and not parser.preamble.empty?
58
+ parser_types << parser.type
59
+
60
+ empty << stream if parser.first_line.nil? || parser.first_line.empty?
61
+
62
+ stream
63
+ end
64
+
65
+
66
+ all_fields = fields.dup
67
+
68
+ key_field = key_fields.compact.first
69
+
70
+ if same_fields
71
+ fields = fields.first
72
+ else
73
+ fields = fields.compact.flatten
74
+ end
75
+
76
+ options = input_options.first
77
+ type ||= options[:type]
78
+ type ||= :list if type == :single
79
+ type ||= :double if type == :flat
80
+
81
+ preamble_txt = case preamble
82
+ when TrueClass
83
+ preambles * "\n"
84
+ when String
85
+ if preamble[0]== '+'
86
+ preambles * "\n" + "\n" + preamble[1..-1]
87
+ else
88
+ preamble
89
+ end
90
+ else
91
+ nil
92
+ end
93
+
94
+ empty_pos = empty.collect{|stream| streams.index stream}
95
+
96
+ keys =[]
97
+ parts =[]
98
+ lines.each_with_index do |line,i|
99
+ if line.nil? || line.empty?
100
+ keys[i]= nil
101
+ parts[i]= nil
102
+ else
103
+ vs = line.chomp.split(sep, -1)
104
+ key, *p = vs
105
+ keys[i]= key
106
+ parts[i]= p
107
+ end
108
+ sizes[i]||= parts[i].length-1 unless parts[i].nil?
109
+ end
110
+ done_streams =[]
111
+
112
+ dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type
113
+ dumper.init
114
+
115
+ t = Thread.new do
116
+ Thread.report_on_exception = false
117
+ Thread.current["name"] = "Paste streams"
118
+
119
+ last_min = nil
120
+ while lines.reject{|line| line.nil?}.any?
121
+ min = keys.compact.sort.first
122
+ break if min.nil?
123
+ new_values =[]
124
+
125
+ skip = all_match && keys.uniq !=[min]
126
+
127
+ keys.each_with_index do |key,i|
128
+ case key
129
+ when min
130
+ new_values << parts[i]
131
+
132
+ begin
133
+ line = lines[i]= begin
134
+ streams[i].gets
135
+ rescue
136
+ Log.exception $!
137
+ nil
138
+ end
139
+ if line.nil?
140
+ keys[i]= nil
141
+ parts[i]= nil
142
+ else
143
+ k, *p = line.chomp.split(sep, -1)
144
+ raise TryAgain if k == keys[i]
145
+ keys[i]= k
146
+ parts[i]= p.collect{|e| e.nil? ? "" : e}
147
+ end
148
+ rescue TryAgain
149
+ keys[i]= nil
150
+ parts[i]= nil
151
+ Log.debug "Skipping repeated key in stream #{i}: #{keys[i]}"
152
+ retry
153
+ end
154
+ else
155
+ p = [nil] * sizes[i]
156
+ new_values << p
157
+ end
158
+ end
159
+
160
+ next if skip
161
+
162
+ if same_fields
163
+ new_values_same = []
164
+ new_values.each do |list|
165
+ list.each_with_index do |l,i|
166
+ new_values_same[i] ||= []
167
+ new_values_same[i] << l
168
+ end
169
+ end
170
+ new_values = new_values_same
171
+ else
172
+ new_values = new_values.inject([]){|acc,l| acc.concat l }
173
+ end
174
+
175
+ dumper.add min, new_values
176
+ end
177
+
178
+ dumper.close
179
+
180
+ streams.each do |stream|
181
+ stream.close if stream.respond_to?(:close) && ! stream.closed?
182
+ stream.join if stream.respond_to? :join
183
+ end
184
+ end
185
+ rescue Aborted
186
+ Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
187
+ streams.each do |stream|
188
+ stream.abort if stream.respond_to? :abort
189
+ end
190
+ raise $!
191
+ rescue Exception
192
+ Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
193
+ streams.each do |stream|
194
+ stream.abort if stream.respond_to? :abort
195
+ end
196
+ raise $!
197
+ end
198
+
199
+ Thread.pass until t["name"]
200
+
201
+ ConcurrentStream.setup(dumper.stream, threads: [t])
202
+ end
203
+
204
+ end
@@ -137,5 +137,16 @@ module TSV
137
137
  end
138
138
  res
139
139
  end
140
+
141
+ def to_flat
142
+ res = self.annotate({})
143
+ transformer = Transformer.new self, res
144
+ transformer.type = :flat
145
+ transformer.traverse do |k,v|
146
+ v = Array === v ? v.flatten : [v]
147
+ [k, v]
148
+ end
149
+ res
150
+ end
140
151
  end
141
152
 
data/lib/scout/tsv.rb CHANGED
@@ -10,10 +10,11 @@ require_relative 'tsv/traverse'
10
10
  require_relative 'tsv/open'
11
11
  require_relative 'tsv/attach'
12
12
  require_relative 'tsv/change_id'
13
+ require_relative 'tsv/stream'
13
14
 
14
15
  module TSV
15
16
  extend MetaExtension
16
- extension_attr :key_field, :fields, :type, :filename, :namespace, :unnamed, :identifiers
17
+ extension_attr :key_field, :fields, :type, :cast, :filename, :namespace, :unnamed, :identifiers
17
18
 
18
19
  def self.str2options(str)
19
20
  field_options,_sep, rest = str.partition("#")
@@ -40,7 +41,13 @@ module TSV
40
41
  data = filename ? ScoutCabinet.open(filename, true, type) : nil
41
42
  options[:data] = data if data
42
43
  options[:filename] = file
43
- Log.debug "TSV open #{Log.fingerprint file}"
44
+
45
+ if data
46
+ Log.debug "TSV open #{Log.fingerprint file} into #{Log.fingerprint data}"
47
+ else
48
+ Log.debug "TSV open #{Log.fingerprint file}"
49
+ end
50
+
44
51
  Open.open(file, grep: grep, invert_grep: invert_grep) do |f|
45
52
  TSV.parse(f, **options)
46
53
  end
@@ -27,10 +27,10 @@ class WorkQueue
27
27
  rescue Interrupt
28
28
  rescue Exception
29
29
  output.write WorkerException.new($!, Process.pid)
30
- Process.exit! -1
30
+ exit -1
31
31
  ensure
32
32
  end
33
- Process.exit! 0
33
+ exit 0
34
34
  end
35
35
  end
36
36