scout-gear 7.3.0 → 8.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (134) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +44 -16
  3. data/Rakefile +6 -1
  4. data/VERSION +1 -1
  5. data/bin/scout +21 -7
  6. data/doc/lib/scout/path.md +35 -0
  7. data/doc/lib/scout/workflow/task.md +13 -0
  8. data/lib/rbbt-scout.rb +1 -0
  9. data/lib/scout/cmd.rb +24 -25
  10. data/lib/scout/concurrent_stream.rb +59 -39
  11. data/lib/scout/config.rb +1 -1
  12. data/lib/scout/exceptions.rb +10 -0
  13. data/lib/scout/log/color.rb +15 -12
  14. data/lib/scout/log/progress/report.rb +8 -6
  15. data/lib/scout/log/progress/util.rb +61 -54
  16. data/lib/scout/log/progress.rb +1 -1
  17. data/lib/scout/log/trap.rb +107 -0
  18. data/lib/scout/log.rb +115 -52
  19. data/lib/scout/meta_extension.rb +47 -6
  20. data/lib/scout/misc/digest.rb +12 -3
  21. data/lib/scout/misc/format.rb +24 -7
  22. data/lib/scout/misc/insist.rb +1 -1
  23. data/lib/scout/misc/monitor.rb +22 -0
  24. data/lib/scout/misc/system.rb +58 -0
  25. data/lib/scout/named_array.rb +73 -3
  26. data/lib/scout/offsite/ssh.rb +171 -0
  27. data/lib/scout/offsite/step.rb +83 -0
  28. data/lib/scout/offsite/sync.rb +55 -0
  29. data/lib/scout/offsite.rb +3 -0
  30. data/lib/scout/open/lock/lockfile.rb +587 -0
  31. data/lib/scout/open/lock.rb +9 -2
  32. data/lib/scout/open/remote.rb +16 -1
  33. data/lib/scout/open/stream.rb +146 -83
  34. data/lib/scout/open/util.rb +22 -3
  35. data/lib/scout/open.rb +5 -4
  36. data/lib/scout/path/find.rb +24 -11
  37. data/lib/scout/path/util.rb +40 -0
  38. data/lib/scout/persist/serialize.rb +19 -6
  39. data/lib/scout/persist.rb +29 -13
  40. data/lib/scout/resource/path.rb +57 -0
  41. data/lib/scout/resource/produce.rb +0 -8
  42. data/lib/scout/resource/util.rb +12 -5
  43. data/lib/scout/tmpfile.rb +7 -8
  44. data/lib/scout/tsv/attach.rb +177 -0
  45. data/lib/scout/tsv/change_id.rb +40 -0
  46. data/lib/scout/tsv/dumper.rb +74 -46
  47. data/lib/scout/tsv/index.rb +85 -87
  48. data/lib/scout/tsv/open.rb +160 -85
  49. data/lib/scout/tsv/parser.rb +142 -80
  50. data/lib/scout/tsv/path.rb +1 -2
  51. data/lib/scout/tsv/persist/adapter.rb +15 -45
  52. data/lib/scout/tsv/persist/fix_width_table.rb +3 -0
  53. data/lib/scout/tsv/persist/tokyocabinet.rb +6 -1
  54. data/lib/scout/tsv/persist.rb +4 -0
  55. data/lib/scout/tsv/stream.rb +204 -0
  56. data/lib/scout/tsv/transformer.rb +152 -0
  57. data/lib/scout/tsv/traverse.rb +96 -92
  58. data/lib/scout/tsv/util/filter.rb +9 -0
  59. data/lib/scout/tsv/util/reorder.rb +81 -0
  60. data/lib/scout/tsv/util/select.rb +78 -33
  61. data/lib/scout/tsv/util/unzip.rb +86 -0
  62. data/lib/scout/tsv/util.rb +60 -11
  63. data/lib/scout/tsv.rb +34 -4
  64. data/lib/scout/work_queue/socket.rb +6 -1
  65. data/lib/scout/work_queue/worker.rb +5 -2
  66. data/lib/scout/work_queue.rb +51 -20
  67. data/lib/scout/workflow/definition.rb +23 -3
  68. data/lib/scout/workflow/deployment/orchestrator.rb +245 -0
  69. data/lib/scout/workflow/deployment.rb +1 -0
  70. data/lib/scout/workflow/step/dependencies.rb +56 -10
  71. data/lib/scout/workflow/step/file.rb +5 -0
  72. data/lib/scout/workflow/step/info.rb +40 -7
  73. data/lib/scout/workflow/step/load.rb +1 -1
  74. data/lib/scout/workflow/step/provenance.rb +9 -7
  75. data/lib/scout/workflow/step/status.rb +43 -0
  76. data/lib/scout/workflow/step.rb +160 -49
  77. data/lib/scout/workflow/task/dependencies.rb +114 -0
  78. data/lib/scout/workflow/task/inputs.rb +40 -32
  79. data/lib/scout/workflow/task.rb +38 -102
  80. data/lib/scout/workflow/usage.rb +48 -18
  81. data/lib/scout/workflow.rb +4 -2
  82. data/lib/scout-gear.rb +2 -0
  83. data/lib/scout.rb +6 -0
  84. data/scout-gear.gemspec +52 -23
  85. data/scout_commands/doc +37 -0
  86. data/scout_commands/find +1 -0
  87. data/scout_commands/offsite +30 -0
  88. data/scout_commands/update +29 -0
  89. data/scout_commands/workflow/info +15 -3
  90. data/scout_commands/workflow/install +102 -0
  91. data/scout_commands/workflow/task +57 -9
  92. data/test/scout/offsite/test_ssh.rb +15 -0
  93. data/test/scout/offsite/test_step.rb +33 -0
  94. data/test/scout/offsite/test_sync.rb +36 -0
  95. data/test/scout/offsite/test_task.rb +0 -0
  96. data/test/scout/open/test_stream.rb +60 -58
  97. data/test/scout/path/test_find.rb +10 -1
  98. data/test/scout/resource/test_path.rb +6 -0
  99. data/test/scout/resource/test_produce.rb +15 -0
  100. data/test/scout/test_meta_extension.rb +25 -0
  101. data/test/scout/test_named_array.rb +24 -0
  102. data/test/scout/test_persist.rb +9 -2
  103. data/test/scout/test_tsv.rb +229 -2
  104. data/test/scout/test_work_queue.rb +65 -41
  105. data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
  106. data/test/scout/tsv/test_attach.rb +227 -0
  107. data/test/scout/tsv/test_change_id.rb +98 -0
  108. data/test/scout/tsv/test_dumper.rb +1 -1
  109. data/test/scout/tsv/test_index.rb +49 -3
  110. data/test/scout/tsv/test_open.rb +160 -2
  111. data/test/scout/tsv/test_parser.rb +33 -2
  112. data/test/scout/tsv/test_persist.rb +2 -0
  113. data/test/scout/tsv/test_stream.rb +200 -0
  114. data/test/scout/tsv/test_transformer.rb +120 -0
  115. data/test/scout/tsv/test_traverse.rb +88 -3
  116. data/test/scout/tsv/test_util.rb +1 -0
  117. data/test/scout/tsv/util/test_reorder.rb +94 -0
  118. data/test/scout/tsv/util/test_select.rb +25 -11
  119. data/test/scout/tsv/util/test_unzip.rb +112 -0
  120. data/test/scout/work_queue/test_socket.rb +0 -1
  121. data/test/scout/workflow/deployment/test_orchestrator.rb +272 -0
  122. data/test/scout/workflow/step/test_dependencies.rb +68 -0
  123. data/test/scout/workflow/step/test_info.rb +18 -0
  124. data/test/scout/workflow/step/test_status.rb +30 -0
  125. data/test/scout/workflow/task/test_dependencies.rb +355 -0
  126. data/test/scout/workflow/task/test_inputs.rb +67 -14
  127. data/test/scout/workflow/test_definition.rb +18 -0
  128. data/test/scout/workflow/test_documentation.rb +24 -0
  129. data/test/scout/workflow/test_step.rb +112 -3
  130. data/test/scout/workflow/test_task.rb +0 -151
  131. data/test/scout/workflow/test_usage.rb +33 -6
  132. data/test/test_scout.rb +9 -0
  133. metadata +100 -8
  134. data/scout_commands/workflow/task_old +0 -706
@@ -1,38 +1,14 @@
1
1
  module TSV
2
2
  class Dumper
3
- def self.header_lines(key_field, fields, entry_hash = nil)
4
- if Hash === entry_hash
5
- sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
6
- preamble = entry_hash[:preamble]
7
- header_hash = entry_hash[:header_hash]
8
- end
9
-
10
- header_hash = "#" if header_hash.nil?
11
-
12
- preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
13
-
14
- str = ""
15
- str << preamble.strip << "\n" if preamble and not preamble.empty?
16
- if fields
17
- if fields.empty?
18
- str << header_hash << (key_field || "ID").to_s << "\n"
19
- else
20
- str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
21
- end
22
- end
23
-
24
- str
25
- end
26
-
27
3
  def self.header(options={})
28
- key_field, fields, sep, header_hash, preamble = IndiferentHash.process_options options,
29
- :key_field, :fields, :sep, :header_hash, :preamble,
4
+ key_field, fields, sep, header_hash, preamble, unnamed = IndiferentHash.process_options options,
5
+ :key_field, :fields, :sep, :header_hash, :preamble, :unnamed,
30
6
  :sep => "\t", :header_hash => "#", :preamble => true
31
7
 
32
- if fields.nil? || key_field.nil?
8
+ if fields.nil?
33
9
  fields_str = nil
34
10
  else
35
- fields_str = "#{header_hash}#{key_field}#{sep}#{fields*sep}"
11
+ fields_str = "#{header_hash}#{key_field || "Id"}#{sep}#{fields*sep}"
36
12
  end
37
13
 
38
14
  if preamble && options.values.compact.any?
@@ -45,31 +21,64 @@ module TSV
45
21
  end
46
22
 
47
23
 
48
- attr_accessor :options
24
+ attr_accessor :options, :initialized, :type, :sep
49
25
  def initialize(options = {})
26
+ options = options.options.merge(sep: nil) if TSV::Parser === options || TSV === options
50
27
  @sep, @type = IndiferentHash.process_options options,
51
28
  :sep, :type,
52
29
  :sep => "\t", :type => :double
53
30
  @options = options
31
+ @options[:type] = @type
54
32
  @sout, @sin = Open.pipe
33
+ Log.low{"Dumper pipe #{[Log.fingerprint(@sin), Log.fingerprint(@sout)] * " -> "}"}
34
+ @initialized = false
35
+ @mutex = Mutex.new
55
36
  ConcurrentStream.setup(@sin, pair: @sout)
56
37
  ConcurrentStream.setup(@sout, pair: @sin)
57
38
  end
58
39
 
59
- def init
60
- header = Dumper.header(@options.merge(:type => @type, :sep => @sep))
61
- @sin.puts header if header and ! header.empty?
40
+ def key_field
41
+ @options[:key_field]
42
+ end
43
+
44
+ def fields
45
+ @options[:fields]
62
46
  end
63
47
 
64
- def add(key, value)
48
+ def key_field=(key_field)
49
+ @options[:key_field] = key_field
50
+ end
51
+
52
+ def fields=(fields)
53
+ @options[:fields] = fields
54
+ end
55
+
56
+ def all_fields
57
+ return nil if fields.nil?
58
+ [key_field] + fields
59
+ end
65
60
 
66
- case @type
67
- when :single
68
- @sin.puts key + @sep + value
69
- when :list, :flat
70
- @sin.puts key + @sep + value * @sep
71
- when :double
72
- @sin.puts key + @sep + value.collect{|v| v * "|" } * @sep
61
+
62
+ def init(preamble: true)
63
+ header = Dumper.header(@options.merge(type: @type, sep: @sep, preamble: preamble))
64
+ @mutex.synchronize do
65
+ @initialized = true
66
+ @sin.puts header if header and ! header.empty?
67
+ end
68
+ end
69
+
70
+ def add(key, value)
71
+ @mutex.synchronize do
72
+
73
+ key = key.to_s unless String === key
74
+ case @type
75
+ when :single
76
+ @sin.puts key + @sep + value.to_s
77
+ when :list, :flat
78
+ @sin.puts key + @sep + value * @sep
79
+ when :double
80
+ @sin.puts key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep
81
+ end
73
82
  end
74
83
  end
75
84
 
@@ -85,15 +94,32 @@ module TSV
85
94
  def abort(exception=nil)
86
95
  @sin.abort(exception)
87
96
  end
97
+
98
+ def tsv(*args)
99
+ TSV.open(stream, *args)
100
+ end
101
+
102
+ def fingerprint
103
+ "Dumper:{"<< Log.fingerprint(self.all_fields|| []) << "}"
104
+ end
105
+
106
+ def digest_str
107
+ fingerprint
108
+ end
109
+
110
+ def inspect
111
+ fingerprint
112
+ end
88
113
  end
89
114
 
90
- def stream
91
- dumper = TSV::Dumper.new self.extension_attr_hash
92
- dumper.init
115
+ def dumper_stream(options = {})
116
+ preamble = IndiferentHash.process_options options, :preamble, :preamble => true
117
+ dumper = TSV::Dumper.new self.extension_attr_hash.merge(options)
93
118
  t = Thread.new do
94
119
  begin
95
120
  Thread.current.report_on_exception = true
96
121
  Thread.current["name"] = "Dumper thread"
122
+ dumper.init(preamble: preamble)
97
123
  self.each do |k,v|
98
124
  dumper.add k, v
99
125
  end
@@ -103,10 +129,12 @@ module TSV
103
129
  end
104
130
  end
105
131
  Thread.pass until t["name"]
106
- dumper.stream
132
+ s = dumper.stream
133
+ ConcurrentStream.setup(s, :threads => [t])
134
+ s
107
135
  end
108
136
 
109
- def to_s
110
- stream.read
137
+ def to_s(options = {})
138
+ dumper_stream(options).read
111
139
  end
112
140
  end
@@ -1,13 +1,16 @@
1
1
  require_relative 'parser'
2
+ require_relative 'transformer'
2
3
  require_relative 'persist/fix_width_table'
3
4
  module TSV
4
- def self.index(tsv_file, target: 0, fields: nil, order: true, **kwargs)
5
- persist, type = IndiferentHash.process_options kwargs,
6
- :persist, :persist_type,
5
+ def self.index(tsv_file, target: 0, fields: nil, order: true, bar: nil, **kwargs)
6
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
7
+ :persist, :persist_type, :persist_update, :data_persist,
7
8
  :persist => false, :persist_type => "HDB"
8
9
  kwargs.delete :type
9
10
 
10
- Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :persist_prefix => "Index")) do |filename|
11
+ fields = :all if fields.nil?
12
+
13
+ Persist.persist(tsv_file, type, kwargs.merge(target: target, fields: fields, persist: persist, update: persist_update, :prefix => "Index", :other_options => kwargs)) do |filename|
11
14
  if filename
12
15
  index = ScoutCabinet.open(filename, true, type)
13
16
  TSV.setup(index, :type => :single)
@@ -16,11 +19,19 @@ module TSV
16
19
  index = TSV.setup({}, :type => :single)
17
20
  end
18
21
 
19
- dummy_data = TSV.setup({}, :key_field => "Key", :fields => ["Target"])
22
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
23
+
24
+ log_msg = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}"
25
+ Log.low log_msg
26
+ bar = log_msg if TrueClass === bar
27
+
20
28
  if order
21
29
  tmp_index = {}
22
- key_field, field_names = TSV.traverse tsv_file, key_field: target, fields: fields, type: :double, into: dummy_data, unnamed: true, **kwargs do |k,values|
30
+ include_self = fields == :all || (Array === fields) && fields.include?(target)
31
+ target_key_field, source_field_names = Open.traverse tsv_file, type: :double, key_field: target, fields: fields, unnamed: true, bar: bar, **kwargs do |k,values|
32
+ tmp_index[k] ||= [[k]] if include_self
23
33
  values.each_with_index do |list,i|
34
+ i += 1 if include_self
24
35
  list.each do |e|
25
36
  tmp_index[e] ||= []
26
37
  tmp_index[e][i] ||= []
@@ -31,16 +42,24 @@ module TSV
31
42
  tmp_index.each do |e,list|
32
43
  index[e] = list.flatten.compact.uniq.first
33
44
  end
45
+
46
+ index.key_field = source_field_names * ","
47
+ index.fields = [target_key_field]
48
+
49
+ tmp_index = {}
50
+
34
51
  else
35
- key_field, field_names = TSV.traverse tsv_file, key_field: target, fields: fields, type: :flat, into: dummy_data, unnamed: true, **kwargs do |k,values|
52
+ target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :flat, unnamed: true, bar: bar, **kwargs do |k,values|
36
53
  values.each do |e|
37
54
  index[e] = k unless index.include?(e)
38
55
  end
39
56
  end
57
+
58
+ index.key_field = source_field_names * ","
59
+ index.fields = [target_key_field]
40
60
  end
41
61
 
42
- index.key_field = dummy_data.fields * ", "
43
- index.fields = [dummy_data.key_field]
62
+
44
63
  index
45
64
  end
46
65
  end
@@ -49,17 +68,26 @@ module TSV
49
68
  TSV.index(self, *args, **kwargs, &block)
50
69
  end
51
70
 
52
- def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, **kwargs)
53
- persist, type = IndiferentHash.process_options kwargs,
54
- :persist, :persist_type,
71
+ def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs)
72
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
73
+ :persist, :persist_type, :persist_update, :data_persist,
55
74
  :persist => false, :persist_type => :fwt
56
75
  kwargs.delete :type
76
+ kwargs[:unnamed] = true
57
77
 
58
- Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :persist_prefix => "Index")) do |filename|
78
+ Persist.persist(tsv_file, type,
79
+ :persist => persist, :prefix => "RangeIndex[#{[start_field, end_field]*"-"}]", update: persist_update,
80
+ :other_options => kwargs) do |filename|
81
+
82
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
83
+
84
+ log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{[start_field, end_field]*"-"}"
85
+ Log.low log_msg
86
+ bar = log_msg if TrueClass === bar
59
87
 
60
88
  max_key_size = 0
61
89
  index_data = []
62
- TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field] do |key, values|
90
+ TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], bar: bar, **kwargs do |key, values|
63
91
  key_size = key.length
64
92
  max_key_size = key_size if key_size > max_key_size
65
93
 
@@ -81,81 +109,51 @@ module TSV
81
109
  end
82
110
  end
83
111
 
112
+ def self.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs)
113
+ persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
114
+ :persist, :persist_type, :persist_update, :data_persist,
115
+ :persist => false, :persist_type => :fwt
116
+ kwargs.delete :type
117
+ kwargs[:unnamed] = true
118
+
119
+ Persist.persist(tsv_file, type,
120
+ :persist => persist, :prefix => "RangeIndex[#{pos_field}]", update: persist_update,
121
+ :other_options => kwargs) do |filename|
122
+
123
+ tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
124
+
125
+ log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{pos_field}"
126
+ Log.low log_msg
127
+ bar = log_msg if TrueClass === bar
128
+
129
+ max_key_size = 0
130
+ index_data = []
131
+ TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, bar: bar, **kwargs do |key, pos|
132
+ key_size = key.length
133
+ max_key_size = key_size if key_size > max_key_size
134
+
135
+ if Array === pos
136
+ pos.zip(end_pos).each do |p|
137
+ index_pos << [key, p]
138
+ end
139
+ else
140
+ index_data << [key, pos]
141
+ end
142
+ end
143
+
144
+ filename = :memory if filename.nil?
145
+ index = FixWidthTable.get(filename, max_key_size, false)
146
+ index.add_point index_data
147
+ index.read
148
+ index
149
+ end
150
+ end
151
+
84
152
  def range_index(*args, **kwargs, &block)
85
153
  TSV.range_index(self, *args, **kwargs, &block)
86
154
  end
87
155
 
88
-
89
- #def range_index(start_field = nil, end_field = nil, options = {})
90
- # start_field ||= "Start"
91
- # end_field ||= "End"
92
-
93
- # options = Misc.add_defaults options,
94
- # :persist => false, :persist_file => nil, :persist_update => false
95
-
96
- # persist_options = Misc.pull_keys options, :persist
97
- # persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"
98
-
99
- # Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do
100
- # max_key_size = 0
101
- # index_data = []
102
- # with_unnamed do
103
- # with_monitor :desc => "Creating Index Data", :step => 10000 do
104
- # through :key, [start_field, end_field] do |key, values|
105
- # key_size = key.length
106
- # max_key_size = key_size if key_size > max_key_size
107
-
108
- # start_pos, end_pos = values
109
- # if Array === start_pos
110
- # start_pos.zip(end_pos).each do |s,e|
111
- # index_data << [key, [s.to_i, e.to_i]]
112
- # end
113
- # else
114
- # index_data << [key, [start_pos.to_i, end_pos.to_i]]
115
- # end
116
- # end
117
- # end
118
- # end
119
-
120
- # index = FixWidthTable.get(:memory, max_key_size, true)
121
- # index.add_range index_data
122
- # index.read
123
- # index
124
- # end
125
- #end
126
-
127
- #def self.range_index(file, start_field = nil, end_field = nil, options = {})
128
- # start_field ||= "Start"
129
- # end_field ||= "End"
130
-
131
- # data_options = Misc.pull_keys options, :data
132
- # filename = case
133
- # when (String === file or Path === file)
134
- # file
135
- # when file.respond_to?(:filename)
136
- # file.filename
137
- # else
138
- # file.object_id.to_s
139
- # end
140
- # persist_options = Misc.pull_keys options, :persist
141
- # persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
142
-
143
- # filters = Misc.process_options options, :filters
144
-
145
- # if filters
146
- # filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
147
- # end
148
-
149
- # Persist.persist(filename, :fwt, persist_options) do
150
- # tsv = TSV.open(file, data_options)
151
- # if filters
152
- # tsv.filter
153
- # filters.each do |match, value|
154
- # tsv.add_filter match, value
155
- # end
156
- # end
157
-
158
- # tsv.range_index(start_field, end_field, options)
159
- # end
160
- #end
156
+ def pos_index(*args, **kwargs, &block)
157
+ TSV.pos_index(self, *args, **kwargs, &block)
158
+ end
161
159
  end
@@ -1,61 +1,56 @@
1
1
  require_relative '../open'
2
+ require_relative '../work_queue'
3
+
4
+ module MultipleResult
5
+ def self.setup(obj)
6
+ obj.extend MultipleResult
7
+ obj
8
+ end
9
+ end
10
+
2
11
  module Open
3
12
  def self.traverse_add(into, res)
4
- case into
5
- when TSV::Dumper
6
- into.add *res
7
- when TSV, Hash
8
- key, value = res
9
- into[key] = value
13
+ if Array === res && MultipleResult === res
14
+ res.each do |_res|
15
+ traverse_add into, _res
16
+ end
17
+ else
18
+ case into
19
+ when defined?(TSV::Dumper) && TSV::Dumper
20
+ into.add *res
21
+ when TSV, Hash
22
+ key, value = res
23
+ if into.type == :double
24
+ into.zip_new key, value, insitu: false
25
+ else
26
+ into[key] = value
27
+ end
28
+ when Array, Set
29
+ into << res
30
+ when IO, StringIO
31
+ into.puts res
32
+ end
10
33
  end
11
34
  end
12
35
 
13
- #def self.traverse(obj, into: nil, cpus: nil, bar: nil, **options, &block)
14
- # case obj
15
- # when TSV
16
- # obj.traverse options[:key_field], options[:fields], **options do |k,v|
17
- # res = yield k, v
18
- # end
19
- # when String
20
- # f = Open.open(obj)
21
- # self.traverse(f, into: into, cpus: cpus, bar: bar, **options, &block)
22
- # when Step
23
- # self.traverse(obj.stream, into: into, cpus: cpus, bar: bar, **options, &block)
24
- # when IO
25
- # if into && (IO === into || into.respond_to?(:stream) )
26
- # into_thread = Thread.new do
27
- # Thread.current.report_on_exception = false
28
- # Thread.current["name"] = "Traverse into"
29
- # TSV.parse obj, **options do |k,v|
30
- # begin
31
- # res = block.call k, v
32
- # traverse_add into, res
33
- # rescue
34
- # into.abort $!
35
- # end
36
- # nil
37
- # end
38
- # into.close if into.respond_to?(:close)
39
- # end
40
- # Thread.pass until into_thread
41
- # into
42
- # else
43
- # TSV.parse obj, **options do |k,v|
44
- # block.call k, v
45
- # nil
46
- # end
47
- # end
48
- # end
49
- #end
50
-
51
- def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, **options, &block)
36
+ def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, keep_open: false, **options, &block)
37
+ cpus = nil if cpus.to_i == 1
38
+
39
+ if into == :stream
40
+ sout, sin = Open.pipe
41
+ ConcurrentStream.setup(sout, :pair => sin)
42
+ ConcurrentStream.setup(sin, :pair => sout)
43
+ self.traverse(obj, into: sin, cpus: cpus, bar: bar, callback: callback, unnamed: unnamed, **options, &block)
44
+ return sout
45
+ end
52
46
 
53
47
  if into || bar
54
48
  orig_callback = callback if callback
55
- bar = Log::ProgressBar.get_obj_bar(bar, obj)
49
+ bar = Log::ProgressBar.get_obj_bar(obj, bar) if bar
50
+ bar.init if bar
56
51
  callback = proc do |res|
57
52
  bar.tick if bar
58
- traverse_add into, res if into
53
+ traverse_add into, res if into && ! res.nil?
59
54
  orig_callback.call res if orig_callback
60
55
  end
61
56
 
@@ -65,59 +60,115 @@ module Open
65
60
  Thread.current["name"] = "Traverse into"
66
61
  error = false
67
62
  begin
68
- self.traverse(obj, callback: callback, **options, &block)
69
- into.close if into.respond_to?(:close)
63
+ self.traverse(obj, callback: callback, cpus: cpus, unnamed: unnamed, **options, &block)
64
+ into.close if ! keep_open && into.respond_to?(:close)
70
65
  bar.remove if bar
71
66
  rescue Exception
72
67
  into.abort($!) if into.respond_to?(:abort)
73
68
  bar.remove($!) if bar
74
69
  end
75
70
  end
76
- Thread.pass until into_thread
71
+
72
+ Thread.pass until into_thread["name"]
73
+
74
+ case into
75
+ when IO
76
+ ConcurrentStream.setup into, :threads => into_thread
77
+ when TSV::Dumper
78
+ ConcurrentStream.setup into.stream, :threads => into_thread
79
+ end
77
80
  return into
78
81
  end
79
82
  end
80
83
 
81
- begin
82
- case obj
83
- when TSV
84
- obj.traverse options[:key_field], options[:fields], unnamed: unnamed, **options do |k,v|
85
- res = block.call(k, v)
86
- callback.call res if callback
87
- nil
88
- end
89
- when Array
90
- obj.each do |line|
91
- res = block.call(line)
92
- callback.call res if callback
93
- nil
94
- end
95
- when String
96
- f = Open.open(obj)
97
- self.traverse(f, cpus: cpus, callback: callback, **options, &block)
98
- when Step
99
- raise obj.exception if obj.error?
100
- self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
101
- when IO
102
- TSV.parse obj, **options do |k,v|
103
- res = block.call k, v
104
- callback.call res if callback
105
- nil
106
- end
107
- else
108
- TSV.parse obj, **options do |k,v|
109
- res = block.call k, v
110
- callback.call res if callback
111
- nil
84
+ if cpus
85
+ queue = WorkQueue.new cpus do |args|
86
+ block.call *args
87
+ end
88
+
89
+ queue.process do |res|
90
+ callback.call res
91
+ end
92
+
93
+ begin
94
+ self.traverse(obj, **options) do |*args|
95
+ queue.write args
112
96
  end
97
+
98
+ queue.close
99
+
100
+ queue.join(false)
101
+
102
+ bar.remove if bar
103
+ return into
104
+ rescue Exception
105
+ bar.remove($!) if bar
106
+ queue.abort
107
+ raise $!
108
+ ensure
109
+ queue.clean
113
110
  end
111
+ end
112
+
113
+ begin
114
+ res = case obj
115
+ when TSV
116
+ obj.traverse unnamed: unnamed, **options do |k,v,f|
117
+ res = block.call(k, v, f)
118
+ callback.call res if callback
119
+ nil
120
+ end
121
+ when Array
122
+ obj.each do |line|
123
+ res = block.call(line)
124
+ callback.call res if callback
125
+ nil
126
+ end
127
+ when String
128
+ obj = obj.produce_and_find if Path === obj
129
+ f = Open.open(obj)
130
+ self.traverse(f, cpus: cpus, callback: callback, **options, &block)
131
+ when Step
132
+ raise obj.exception if obj.error?
133
+ self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
134
+ when IO
135
+ if options[:type] == :array || options[:type] == :line
136
+ Log.low "Traverse stream by lines #{Log.fingerprint obj}"
137
+ while line = obj.gets
138
+ line.strip!
139
+ res = block.call(line)
140
+ callback.call res if callback
141
+ end
142
+ else
143
+ Log.low "Traverse stream with parser #{Log.fingerprint obj}"
144
+ parser = TSV::Parser.new obj
145
+ parser.traverse **options do |k,v,f|
146
+ res = block.call k,v,f
147
+ callback.call res if callback
148
+ nil
149
+ end
150
+ end
151
+ when TSV::Parser
152
+ obj.traverse **options do |k,v,f|
153
+ res = block.call k, v, f
154
+ callback.call res if callback
155
+ nil
156
+ end
157
+ else
158
+ TSV.parse obj, **options do |k,v|
159
+ res = block.call k, v
160
+ callback.call res if callback
161
+ nil
162
+ end
163
+ end
114
164
  bar.remove if bar
115
- rescue
116
- bar.abort($!) if bar
117
- raise $!
165
+ rescue Exception => exception
166
+ exception = obj.stream_exception if (ConcurrentStream === obj) && obj.stream_exception
167
+ bar.error if bar
168
+ raise exception
118
169
  end
119
170
 
120
- into
171
+ into || res
121
172
  end
122
173
  end
123
174
 
@@ -125,4 +176,28 @@ module TSV
125
176
  def self.traverse(*args, **kwargs, &block)
126
177
  Open.traverse(*args, **kwargs, &block)
127
178
  end
179
+
180
+ def self.process_stream(stream, header_hash: "#", &block)
181
+ sout = Open.open_pipe do |sin|
182
+ while line = stream.gets
183
+ break unless line.start_with?(header_hash)
184
+ sin.puts line
185
+ end
186
+ yield sin, line
187
+ end
188
+ end
189
+
190
+ def self.collapse_stream(stream, *args, **kwargs, &block)
191
+ stream = stream.stream if stream.respond_to?(:stream)
192
+ self.process_stream(stream) do |sin, line|
193
+ collapsed = Open.collapse_stream(stream, line: line)
194
+ Open.consume_stream(collapsed, false, sin)
195
+ end
196
+ end
197
+
198
+ def collapse_stream(*args, **kwargs, &block)
199
+ TSV.collapse_stream(self.dumper_stream, *args, **kwargs, &block)
200
+ end
201
+
202
+
128
203
  end