scout-gear 10.4.0 → 10.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vimproject +100 -656
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/bin/scout +1 -3
- data/lib/scout/association/fields.rb +170 -0
- data/lib/scout/association/index.rb +229 -0
- data/lib/scout/association/item.rb +227 -0
- data/lib/scout/association/util.rb +7 -0
- data/lib/scout/association.rb +100 -0
- data/lib/scout/entity/format.rb +62 -0
- data/lib/scout/entity/identifiers.rb +111 -0
- data/lib/scout/entity/object.rb +20 -0
- data/lib/scout/entity/property.rb +165 -0
- data/lib/scout/entity.rb +40 -0
- data/lib/scout/offsite/step.rb +2 -2
- data/lib/scout/{tsv/persist → persist/engine}/fix_width_table.rb +25 -33
- data/lib/scout/persist/engine/packed_index.rb +100 -0
- data/lib/scout/persist/engine/sharder.rb +219 -0
- data/lib/scout/{tsv/persist → persist/engine}/tkrzw.rb +0 -17
- data/lib/scout/{tsv/persist → persist/engine}/tokyocabinet.rb +55 -31
- data/lib/scout/persist/engine.rb +4 -0
- data/lib/scout/{tsv/persist/adapter.rb → persist/tsv/adapter/base.rb} +80 -51
- data/lib/scout/persist/tsv/adapter/fix_width_table.rb +106 -0
- data/lib/scout/persist/tsv/adapter/packed_index.rb +95 -0
- data/lib/scout/persist/tsv/adapter/sharder.rb +54 -0
- data/lib/scout/persist/tsv/adapter/tkrzw.rb +18 -0
- data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +65 -0
- data/lib/scout/persist/tsv/adapter.rb +6 -0
- data/lib/scout/{tsv/persist → persist/tsv}/serialize.rb +5 -0
- data/lib/scout/persist/tsv.rb +107 -0
- data/lib/scout/tsv/annotation/repo.rb +83 -0
- data/lib/scout/tsv/annotation.rb +169 -0
- data/lib/scout/tsv/attach.rb +95 -19
- data/lib/scout/tsv/change_id/translate.rb +148 -0
- data/lib/scout/tsv/change_id.rb +3 -0
- data/lib/scout/tsv/csv.rb +85 -0
- data/lib/scout/tsv/dumper.rb +113 -25
- data/lib/scout/tsv/entity.rb +5 -0
- data/lib/scout/tsv/index.rb +88 -36
- data/lib/scout/tsv/open.rb +21 -8
- data/lib/scout/tsv/parser.rb +153 -90
- data/lib/scout/tsv/path.rb +7 -2
- data/lib/scout/tsv/stream.rb +48 -6
- data/lib/scout/tsv/transformer.rb +4 -3
- data/lib/scout/tsv/traverse.rb +26 -18
- data/lib/scout/tsv/util/process.rb +7 -0
- data/lib/scout/tsv/util/reorder.rb +25 -15
- data/lib/scout/tsv/util/select.rb +9 -1
- data/lib/scout/tsv/util/sort.rb +90 -2
- data/lib/scout/tsv/util/unzip.rb +56 -0
- data/lib/scout/tsv/util.rb +52 -5
- data/lib/scout/tsv.rb +45 -27
- data/lib/scout/work_queue/socket.rb +8 -0
- data/lib/scout/work_queue/worker.rb +22 -5
- data/lib/scout/work_queue.rb +38 -24
- data/lib/scout/workflow/definition.rb +11 -10
- data/lib/scout/workflow/deployment/orchestrator.rb +20 -3
- data/lib/scout/workflow/deployment/trace.rb +205 -0
- data/lib/scout/workflow/deployment.rb +1 -0
- data/lib/scout/workflow/documentation.rb +1 -1
- data/lib/scout/workflow/step/archive.rb +42 -0
- data/lib/scout/workflow/step/children.rb +51 -0
- data/lib/scout/workflow/step/config.rb +1 -1
- data/lib/scout/workflow/step/dependencies.rb +24 -7
- data/lib/scout/workflow/step/file.rb +19 -0
- data/lib/scout/workflow/step/info.rb +37 -9
- data/lib/scout/workflow/step/progress.rb +11 -2
- data/lib/scout/workflow/step/status.rb +8 -1
- data/lib/scout/workflow/step.rb +80 -25
- data/lib/scout/workflow/task/dependencies.rb +4 -1
- data/lib/scout/workflow/task/inputs.rb +91 -41
- data/lib/scout/workflow/task.rb +54 -57
- data/lib/scout/workflow/usage.rb +1 -1
- data/lib/scout/workflow/util.rb +4 -0
- data/lib/scout/workflow.rb +110 -13
- data/lib/scout-gear.rb +2 -0
- data/lib/scout.rb +0 -1
- data/scout-gear.gemspec +80 -23
- data/scout_commands/rbbt +2 -0
- data/test/data/person/brothers +4 -0
- data/test/data/person/identifiers +10 -0
- data/test/data/person/marriages +3 -0
- data/test/data/person/parents +6 -0
- data/test/scout/association/test_fields.rb +105 -0
- data/test/scout/association/test_index.rb +70 -0
- data/test/scout/association/test_item.rb +21 -0
- data/test/scout/entity/test_format.rb +19 -0
- data/test/scout/entity/test_identifiers.rb +58 -0
- data/test/scout/entity/test_object.rb +0 -0
- data/test/scout/entity/test_property.rb +345 -0
- data/test/scout/{tsv/persist → persist/engine}/test_fix_width_table.rb +0 -1
- data/test/scout/persist/engine/test_packed_index.rb +99 -0
- data/test/scout/persist/engine/test_sharder.rb +31 -0
- data/test/scout/persist/engine/test_tkrzw.rb +0 -0
- data/test/scout/persist/engine/test_tokyocabinet.rb +17 -0
- data/test/scout/persist/test_tsv.rb +146 -0
- data/test/scout/{tsv/persist/test_adapter.rb → persist/tsv/adapter/test_base.rb} +3 -4
- data/test/scout/persist/tsv/adapter/test_fix_width_table.rb +46 -0
- data/test/scout/persist/tsv/adapter/test_packed_index.rb +37 -0
- data/test/scout/persist/tsv/adapter/test_serialize.rb +0 -0
- data/test/scout/persist/tsv/adapter/test_sharder.rb +290 -0
- data/test/scout/{tsv/persist → persist/tsv/adapter}/test_tkrzw.rb +3 -6
- data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +282 -0
- data/test/scout/persist/tsv/test_serialize.rb +12 -0
- data/test/scout/test_association.rb +51 -0
- data/test/scout/test_entity.rb +40 -0
- data/test/scout/test_tsv.rb +33 -4
- data/test/scout/test_work_queue.rb +3 -2
- data/test/scout/test_workflow.rb +16 -15
- data/test/scout/tsv/annotation/test_repo.rb +150 -0
- data/test/scout/tsv/change_id/test_translate.rb +178 -0
- data/test/scout/tsv/test_annotation.rb +52 -0
- data/test/scout/tsv/test_attach.rb +226 -1
- data/test/scout/tsv/test_change_id.rb +25 -0
- data/test/scout/tsv/test_csv.rb +50 -0
- data/test/scout/tsv/test_dumper.rb +38 -0
- data/test/scout/tsv/test_entity.rb +0 -0
- data/test/scout/tsv/test_index.rb +82 -0
- data/test/scout/tsv/test_open.rb +44 -0
- data/test/scout/tsv/test_parser.rb +70 -0
- data/test/scout/tsv/test_stream.rb +22 -0
- data/test/scout/tsv/test_transformer.rb +27 -3
- data/test/scout/tsv/test_traverse.rb +78 -0
- data/test/scout/tsv/util/test_process.rb +16 -0
- data/test/scout/tsv/util/test_reorder.rb +67 -0
- data/test/scout/tsv/util/test_sort.rb +28 -1
- data/test/scout/tsv/util/test_unzip.rb +32 -0
- data/test/scout/work_queue/test_socket.rb +4 -1
- data/test/scout/workflow/deployment/test_orchestrator.rb +17 -26
- data/test/scout/workflow/deployment/test_trace.rb +25 -0
- data/test/scout/workflow/step/test_archive.rb +28 -0
- data/test/scout/workflow/step/test_children.rb +25 -0
- data/test/scout/workflow/step/test_info.rb +16 -0
- data/test/scout/workflow/task/test_dependencies.rb +16 -16
- data/test/scout/workflow/task/test_inputs.rb +45 -1
- data/test/scout/workflow/test_definition.rb +52 -0
- data/test/scout/workflow/test_step.rb +57 -0
- data/test/scout/workflow/test_task.rb +26 -1
- data/test/scout/workflow/test_usage.rb +4 -4
- data/test/test_helper.rb +23 -1
- metadata +71 -14
- data/lib/scout/tsv/persist.rb +0 -27
- data/test/scout/tsv/persist/test_tokyocabinet.rb +0 -120
- data/test/scout/tsv/test_persist.rb +0 -45
@@ -5,26 +5,36 @@ module TSV
|
|
5
5
|
res = self.annotate({})
|
6
6
|
res.type = kwargs[:type] if kwargs.include?(:type)
|
7
7
|
kwargs[:one2one] = one2one
|
8
|
-
key_field_name, field_names =
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
8
|
+
key_field_name, field_names = with_unnamed do
|
9
|
+
traverse key_field, fields, **kwargs do |k,v|
|
10
|
+
if res.type == :double && merge && res.include?(k)
|
11
|
+
current = res[k]
|
12
|
+
if merge == :concat
|
13
|
+
v.each_with_index do |new,i|
|
14
|
+
next if new.empty?
|
15
|
+
current[i].concat(new)
|
16
|
+
end
|
17
|
+
else
|
18
|
+
merged = []
|
19
|
+
v.each_with_index do |new,i|
|
20
|
+
next if new.empty?
|
21
|
+
merged[i] = current[i] + new
|
22
|
+
end
|
23
|
+
res[k] = merged
|
15
24
|
end
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
25
|
+
elsif res.type == :flat
|
26
|
+
res[k] ||= []
|
27
|
+
if merge == :concat
|
28
|
+
res[k].concat v
|
29
|
+
else
|
30
|
+
res[k] += v
|
21
31
|
end
|
22
|
-
|
32
|
+
else
|
33
|
+
res[k] = v
|
23
34
|
end
|
24
|
-
else
|
25
|
-
res[k] = v
|
26
35
|
end
|
27
36
|
end
|
37
|
+
|
28
38
|
res.key_field = key_field_name
|
29
39
|
res.fields = field_names
|
30
40
|
res
|
@@ -16,7 +16,7 @@ module TSV
|
|
16
16
|
end
|
17
17
|
|
18
18
|
if field
|
19
|
-
field =
|
19
|
+
field = NamedArray.identify_name(fields, field) if fields && String === field
|
20
20
|
set = field == :key ? [key] : (type == :double ? values[field].split(sep) : values[field])
|
21
21
|
else
|
22
22
|
set = [key, (type == :double ? values.collect{|v| v.split(sep) } : values)]
|
@@ -262,4 +262,12 @@ module TSV
|
|
262
262
|
end
|
263
263
|
new
|
264
264
|
end
|
265
|
+
|
266
|
+
def chunked_values_at(keys, max = 5000)
|
267
|
+
Misc.ordered_divide(keys, max).inject([]) do |acc,c|
|
268
|
+
new = self.values_at(*c)
|
269
|
+
new.annotate acc if new.respond_to? :annotate and acc.empty?
|
270
|
+
acc.concat(new)
|
271
|
+
end
|
272
|
+
end
|
265
273
|
end
|
data/lib/scout/tsv/util/sort.rb
CHANGED
@@ -22,6 +22,79 @@ module TSV
|
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
25
|
+
if not block_given?
|
26
|
+
if fields == :all
|
27
|
+
if just_keys
|
28
|
+
keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
|
29
|
+
keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
|
30
|
+
else
|
31
|
+
elems.sort_by{|key, value| key }
|
32
|
+
end
|
33
|
+
else
|
34
|
+
sorted = elems.sort do |a, b|
|
35
|
+
a_value = a.last
|
36
|
+
b_value = b.last
|
37
|
+
a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?)
|
38
|
+
b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?)
|
39
|
+
case
|
40
|
+
when (a_empty and b_empty)
|
41
|
+
0
|
42
|
+
when a_empty
|
43
|
+
-1
|
44
|
+
when b_empty
|
45
|
+
1
|
46
|
+
when Array === a_value
|
47
|
+
if a_value.length == 1 and b_value.length == 1
|
48
|
+
a_value.first <=> b_value.first
|
49
|
+
else
|
50
|
+
a_value.length <=> b_value.length
|
51
|
+
end
|
52
|
+
else
|
53
|
+
a_value <=> b_value
|
54
|
+
end
|
55
|
+
end
|
56
|
+
if just_keys
|
57
|
+
keys = sorted.collect{|key, value| key}
|
58
|
+
keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
|
59
|
+
keys
|
60
|
+
else
|
61
|
+
sorted.collect{|key, value| [key, self[key]]}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
else
|
65
|
+
if just_keys
|
66
|
+
keys = elems.sort_by(&block).collect{|key, value| key}
|
67
|
+
keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
|
68
|
+
keys
|
69
|
+
else
|
70
|
+
elems.sort_by(&block).collect{|key, value| [key, self[key]]}
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def sort(field = nil, just_keys = false, &block)
|
76
|
+
field = :all if field.nil?
|
77
|
+
|
78
|
+
if field == :all
|
79
|
+
elems = collect
|
80
|
+
else
|
81
|
+
elems = []
|
82
|
+
case type
|
83
|
+
when :single
|
84
|
+
through :key, field do |key, field|
|
85
|
+
elems << [key, field]
|
86
|
+
end
|
87
|
+
when :list, :flat
|
88
|
+
through :key, field do |key, fields|
|
89
|
+
elems << [key, fields.first]
|
90
|
+
end
|
91
|
+
when :double
|
92
|
+
through :key, field do |key, fields|
|
93
|
+
elems << [key, fields.first]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
25
98
|
if not block_given?
|
26
99
|
if fields == :all
|
27
100
|
if just_keys
|
@@ -63,12 +136,27 @@ module TSV
|
|
63
136
|
end
|
64
137
|
else
|
65
138
|
if just_keys
|
66
|
-
keys = elems.
|
139
|
+
keys = elems.sort(&block).collect{|key, value| key}
|
67
140
|
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
|
68
141
|
keys
|
69
142
|
else
|
70
|
-
elems.
|
143
|
+
elems.sort(&block).collect{|key, value| [key, self[key]]}
|
71
144
|
end
|
72
145
|
end
|
73
146
|
end
|
147
|
+
|
148
|
+
def page(pnum, psize, field = nil, just_keys = false, reverse = false, &block)
|
149
|
+
pstart = psize * (pnum - 1)
|
150
|
+
pend = psize * pnum - 1
|
151
|
+
field = :key if field == "key"
|
152
|
+
keys = sort_by(field || :key, true, &block)
|
153
|
+
keys.reverse! if reverse
|
154
|
+
|
155
|
+
if just_keys
|
156
|
+
keys[pstart..pend]
|
157
|
+
else
|
158
|
+
select :key => keys[pstart..pend]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
74
162
|
end
|
data/lib/scout/tsv/util/unzip.rb
CHANGED
@@ -83,4 +83,60 @@ module TSV
|
|
83
83
|
def unzip(*args, **kwargs)
|
84
84
|
TSV.unzip(self, *args, **kwargs)
|
85
85
|
end
|
86
|
+
|
87
|
+
def unzip_replicates
|
88
|
+
raise "Can only unzip replicates in :double TSVs" unless type == :double
|
89
|
+
|
90
|
+
new = {}
|
91
|
+
self.with_unnamed do
|
92
|
+
through do |k,vs|
|
93
|
+
NamedArray.zip_fields(vs).each_with_index do |v,i|
|
94
|
+
new[k + "(#{i})"] = v
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
self.annotate(new)
|
100
|
+
new.type = :list
|
101
|
+
|
102
|
+
new
|
103
|
+
end
|
104
|
+
|
105
|
+
def zip(merge = false, field = "New Field", sep = ":")
|
106
|
+
new = {}
|
107
|
+
self.annotate new
|
108
|
+
|
109
|
+
new.type = :double if merge
|
110
|
+
|
111
|
+
new.with_unnamed do
|
112
|
+
if merge
|
113
|
+
self.through do |key,values|
|
114
|
+
new_key, new_value = key.split(sep)
|
115
|
+
new_values = values + [[new_value] * values.first.length]
|
116
|
+
if new.include? new_key
|
117
|
+
current = new[new_key]
|
118
|
+
current.each_with_index do |v,i|
|
119
|
+
v.concat(new_values[i])
|
120
|
+
end
|
121
|
+
else
|
122
|
+
new[new_key] = new_values
|
123
|
+
end
|
124
|
+
end
|
125
|
+
else
|
126
|
+
self.through do |key,values|
|
127
|
+
new_key, new_value = key.split(sep)
|
128
|
+
new_values = values + [new_value]
|
129
|
+
new[new_key] = new_values
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
if self.key_field and self.fields
|
135
|
+
new.key_field = self.key_field.partition(sep).first
|
136
|
+
new.fields = new.fields + [field]
|
137
|
+
end
|
138
|
+
|
139
|
+
new
|
140
|
+
end
|
141
|
+
|
86
142
|
end
|
data/lib/scout/tsv/util.rb
CHANGED
@@ -9,9 +9,43 @@ require_relative 'util/unzip'
|
|
9
9
|
require_relative 'util/sort'
|
10
10
|
require_relative 'util/melt'
|
11
11
|
module TSV
|
12
|
+
|
13
|
+
def self.field_match_counts(file, values, options = {})
|
14
|
+
options = IndiferentHash.add_defaults options, :persist_prefix => "Field_Matches"
|
15
|
+
persist_options = IndiferentHash.pull_keys options, :persist
|
16
|
+
|
17
|
+
filename = TSV === file ? file.filename : file
|
18
|
+
path = Persist.persist filename, :string, persist_options.merge(:no_load => true) do
|
19
|
+
tsv = TSV === file ? file : TSV.open(file, options)
|
20
|
+
|
21
|
+
text = ""
|
22
|
+
fields = nil
|
23
|
+
tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
|
24
|
+
names.zip(fields).each do |list, format|
|
25
|
+
list = [list] unless Array === list
|
26
|
+
list.delete_if do |name| name.empty? end
|
27
|
+
next if list.empty?
|
28
|
+
text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
|
29
|
+
end
|
30
|
+
text << [gene, tsv.key_field] * "\t" << "\n"
|
31
|
+
end
|
32
|
+
text
|
33
|
+
end
|
34
|
+
|
35
|
+
TmpFile.with_file(values.uniq * "\n", false) do |value_file|
|
36
|
+
cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
|
37
|
+
begin
|
38
|
+
TSV.open(CMD.cmd(cmd), :key_field => 1, :fields => [0], :type => :single, :cast => :to_i)
|
39
|
+
rescue
|
40
|
+
Log.exception $!
|
41
|
+
TSV.setup({}, :type => :single, :cast => :to_i)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
12
46
|
def self.identify_field(key_field, fields, name, strict: nil)
|
13
47
|
return :key if name == :key || (! strict && NamedArray.field_match(key_field, name))
|
14
|
-
name.collect!{|n| key_field
|
48
|
+
name.collect!{|n| NamedArray.field_match(key_field, n) ? :key : n } if Array === name
|
15
49
|
NamedArray.identify_name(fields, name, strict: strict)
|
16
50
|
end
|
17
51
|
|
@@ -21,12 +55,12 @@ module TSV
|
|
21
55
|
|
22
56
|
def [](key, *rest)
|
23
57
|
v = super(key, *rest)
|
24
|
-
NamedArray.setup(v, @fields, key) unless @unnamed || ! (Array === v)
|
58
|
+
NamedArray.setup(v, @fields, key) unless @unnamed || @type == :flat || ! (Array === v)
|
25
59
|
v
|
26
60
|
end
|
27
61
|
|
28
62
|
def options
|
29
|
-
|
63
|
+
annotation_hash
|
30
64
|
end
|
31
65
|
|
32
66
|
def zip_new(key, values, insitu: :lax)
|
@@ -51,7 +85,7 @@ module TSV
|
|
51
85
|
def each(*args, &block)
|
52
86
|
if block_given?
|
53
87
|
super(*args) do |k,v|
|
54
|
-
NamedArray.setup(v, @fields) unless @unnamed || ! (Array === v)
|
88
|
+
NamedArray.setup(v, @fields) unless @unnamed || @type == :flat || ! (Array === v)
|
55
89
|
block.call(k, v)
|
56
90
|
end
|
57
91
|
else
|
@@ -115,8 +149,16 @@ Example:
|
|
115
149
|
[@key_field] + @fields
|
116
150
|
end
|
117
151
|
|
152
|
+
def self.all_fields(file)
|
153
|
+
if file.respond_to?(:all_fields)
|
154
|
+
file.all_fields
|
155
|
+
else
|
156
|
+
TSV.parse_header(file)["all_fields"]
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
118
160
|
def options
|
119
|
-
self.
|
161
|
+
self.annotation_hash
|
120
162
|
end
|
121
163
|
|
122
164
|
def fingerprint
|
@@ -130,4 +172,9 @@ Example:
|
|
130
172
|
def inspect
|
131
173
|
fingerprint
|
132
174
|
end
|
175
|
+
|
176
|
+
def merge(other)
|
177
|
+
self.annotate(super(other))
|
178
|
+
end
|
179
|
+
|
133
180
|
end
|
data/lib/scout/tsv.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
require 'scout/
|
1
|
+
require 'scout/annotation'
|
2
2
|
require_relative 'tsv/util'
|
3
3
|
require_relative 'tsv/parser'
|
4
4
|
require_relative 'tsv/dumper'
|
5
5
|
require_relative 'tsv/transformer'
|
6
|
-
require_relative 'tsv
|
6
|
+
require_relative 'persist/tsv'
|
7
7
|
require_relative 'tsv/index'
|
8
8
|
require_relative 'tsv/path'
|
9
9
|
require_relative 'tsv/traverse'
|
@@ -11,10 +11,13 @@ require_relative 'tsv/open'
|
|
11
11
|
require_relative 'tsv/attach'
|
12
12
|
require_relative 'tsv/change_id'
|
13
13
|
require_relative 'tsv/stream'
|
14
|
+
require_relative 'tsv/entity'
|
15
|
+
require_relative 'tsv/annotation'
|
16
|
+
require_relative 'tsv/csv'
|
14
17
|
|
15
18
|
module TSV
|
16
|
-
extend
|
17
|
-
|
19
|
+
extend Annotation
|
20
|
+
annotation :key_field, :fields, :type, :cast, :filename, :namespace, :unnamed, :identifiers, :entity_options, :serializer
|
18
21
|
|
19
22
|
def self.str2options(str)
|
20
23
|
field_options,_sep, rest = str.partition("#")
|
@@ -59,6 +62,10 @@ module TSV
|
|
59
62
|
end
|
60
63
|
original_setup(obj, *rest, &block)
|
61
64
|
end
|
65
|
+
|
66
|
+
obj.save_annotation_hash if obj.respond_to?(:save_annotation_hash)
|
67
|
+
|
68
|
+
obj
|
62
69
|
end
|
63
70
|
end
|
64
71
|
|
@@ -68,35 +75,37 @@ module TSV
|
|
68
75
|
end
|
69
76
|
|
70
77
|
def self.open(file, options = {})
|
71
|
-
grep, invert_grep = IndiferentHash.process_options options, :grep, :invert_grep, :
|
78
|
+
grep, invert_grep, nocache, monitor, entity_options = IndiferentHash.process_options options, :grep, :invert_grep, :nocache, :monitor, :entity_options
|
72
79
|
|
73
80
|
persist_options = IndiferentHash.pull_keys options, :persist
|
74
|
-
persist_options = IndiferentHash.add_defaults persist_options, :
|
81
|
+
persist_options = IndiferentHash.add_defaults persist_options, prefix: "TSV", type: :HDB, persist: false
|
82
|
+
persist_options[:data] ||= options[:data]
|
75
83
|
|
76
84
|
file = StringIO.new file if String === file && ! (Path === file) && file.index("\n")
|
77
85
|
|
78
|
-
source_name,
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
[file, options]
|
85
|
-
end
|
86
|
-
|
87
|
-
Persist.persist(source_name, persist_options[:type], persist_options.merge(:other_options => other_options)) do |filename|
|
88
|
-
if filename
|
89
|
-
data = case persist_options[:type]
|
90
|
-
when :HDB, :BDB
|
91
|
-
ScoutCabinet.open(filename, true, persist_options[:type])
|
92
|
-
when :tkh, :tkt, :tks
|
93
|
-
ScoutTKRZW.open(filename, true, persist_options[:type])
|
94
|
-
end
|
86
|
+
source_name, options =
|
87
|
+
case file
|
88
|
+
when StringIO
|
89
|
+
[file.inspect, options]
|
90
|
+
when TSV::Parser
|
91
|
+
[file.options[:filename], file.options]
|
95
92
|
else
|
96
|
-
|
93
|
+
[file, options]
|
97
94
|
end
|
95
|
+
|
96
|
+
Persist.tsv(source_name, options, persist_options: persist_options) do |data|
|
98
97
|
options[:data] = data if data
|
99
|
-
options[:filename]
|
98
|
+
options[:filename] ||= if TSV::Parser === file
|
99
|
+
file.options[:filename]
|
100
|
+
elsif Path === file
|
101
|
+
file
|
102
|
+
elsif file.respond_to?(:filename)
|
103
|
+
file.filename
|
104
|
+
elsif Path.is_filename?(file)
|
105
|
+
file
|
106
|
+
else
|
107
|
+
nil
|
108
|
+
end
|
100
109
|
|
101
110
|
if data
|
102
111
|
Log.debug "TSV open #{Log.fingerprint file} into #{Log.fingerprint data}"
|
@@ -104,14 +113,23 @@ module TSV
|
|
104
113
|
Log.debug "TSV open #{Log.fingerprint file}"
|
105
114
|
end
|
106
115
|
|
107
|
-
if TSV::Parser === file
|
116
|
+
tsv = if TSV::Parser === file
|
108
117
|
TSV.parse(file, **options)
|
109
118
|
else
|
110
|
-
|
119
|
+
options[:tsv_invert_grep] ||= invert_grep if invert_grep
|
120
|
+
Open.open(file, grep: grep, invert_grep: invert_grep, nocache: nocache) do |f|
|
111
121
|
TSV.parse(f, **options)
|
112
122
|
end
|
113
123
|
end
|
124
|
+
|
125
|
+
tsv.entity_options = entity_options
|
126
|
+
|
127
|
+
tsv
|
114
128
|
end
|
115
129
|
end
|
130
|
+
|
131
|
+
def to_hash
|
132
|
+
self.dup
|
133
|
+
end
|
116
134
|
end
|
117
135
|
|
@@ -17,6 +17,10 @@ class WorkQueue
|
|
17
17
|
ScoutSemaphore.create_semaphore(@read_sem,1)
|
18
18
|
end
|
19
19
|
|
20
|
+
def socket_id
|
21
|
+
@key
|
22
|
+
end
|
23
|
+
|
20
24
|
def clean
|
21
25
|
@cleaned = true
|
22
26
|
@sread.close unless @sread.closed?
|
@@ -37,6 +41,10 @@ class WorkQueue
|
|
37
41
|
when nil
|
38
42
|
size_head = [0,"N"].pack 'La'
|
39
43
|
str = size_head
|
44
|
+
when Annotation::AnnotatedObject
|
45
|
+
payload = @serializer.dump(obj)
|
46
|
+
size_head = [payload.bytesize,"S"].pack 'La'
|
47
|
+
str = size_head << payload
|
40
48
|
when String
|
41
49
|
payload = obj
|
42
50
|
size_head = [payload.bytesize,"C"].pack 'La'
|
@@ -1,13 +1,24 @@
|
|
1
1
|
class WorkQueue
|
2
2
|
class Worker
|
3
|
-
attr_accessor :pid, :ignore_ouput
|
3
|
+
attr_accessor :pid, :ignore_ouput, :queue_id
|
4
4
|
def initialize(ignore_ouput = false)
|
5
5
|
@ignore_output = ignore_ouput
|
6
6
|
end
|
7
7
|
|
8
|
+
def worker_short_id
|
9
|
+
[object_id, pid].compact * "@"
|
10
|
+
end
|
11
|
+
|
12
|
+
def worker_id
|
13
|
+
[worker_short_id, queue_id] * "->"
|
14
|
+
end
|
15
|
+
|
8
16
|
def run
|
9
17
|
@pid = Process.fork do
|
10
|
-
|
18
|
+
Signal.trap("INT") do
|
19
|
+
Kernel.exit! -1
|
20
|
+
end
|
21
|
+
Log.low "Worker start #{worker_id}"
|
11
22
|
yield
|
12
23
|
end
|
13
24
|
end
|
@@ -15,6 +26,12 @@ class WorkQueue
|
|
15
26
|
def process(input, output = nil, &block)
|
16
27
|
run do
|
17
28
|
begin
|
29
|
+
if output
|
30
|
+
Open.purge_pipes(output.swrite)
|
31
|
+
else
|
32
|
+
Open.purge_pipes
|
33
|
+
end
|
34
|
+
|
18
35
|
while obj = input.read
|
19
36
|
if DoneProcessing === obj
|
20
37
|
output.write DoneProcessing.new
|
@@ -36,15 +53,15 @@ class WorkQueue
|
|
36
53
|
|
37
54
|
def abort
|
38
55
|
begin
|
39
|
-
Log.
|
40
|
-
Process.kill "INT", @pid
|
56
|
+
Log.medium "Aborting worker #{worker_id}"
|
57
|
+
Process.kill "INT", @pid
|
41
58
|
rescue Errno::ECHILD
|
42
59
|
rescue Errno::ESRCH
|
43
60
|
end
|
44
61
|
end
|
45
62
|
|
46
63
|
def join
|
47
|
-
Log.
|
64
|
+
Log.low "Joining worker #{worker_id}"
|
48
65
|
Process.waitpid @pid
|
49
66
|
end
|
50
67
|
|
data/lib/scout/work_queue.rb
CHANGED
@@ -6,18 +6,29 @@ require 'timeout'
|
|
6
6
|
class WorkQueue
|
7
7
|
attr_accessor :workers, :worker_proc, :callback
|
8
8
|
|
9
|
+
def new_worker
|
10
|
+
worker = Worker.new
|
11
|
+
worker.queue_id = queue_id
|
12
|
+
worker
|
13
|
+
end
|
14
|
+
|
9
15
|
def initialize(workers = 0, &block)
|
10
16
|
workers = workers.to_i if String === workers
|
11
17
|
@input = WorkQueue::Socket.new
|
12
18
|
@output = WorkQueue::Socket.new
|
13
|
-
@workers = workers.times.collect{
|
19
|
+
@workers = workers.times.collect{ new_worker }
|
14
20
|
@worker_proc = block
|
15
21
|
@worker_mutex = Mutex.new
|
16
22
|
@removed_workers = []
|
23
|
+
Log.medium "Starting queue #{queue_id} with workers: #{Log.fingerprint @workers.collect{|w| w.worker_short_id }} and sockets #{@input.socket_id} and #{@output.socket_id}"
|
24
|
+
end
|
25
|
+
|
26
|
+
def queue_id
|
27
|
+
[object_id, Process.pid] * "@"
|
17
28
|
end
|
18
29
|
|
19
30
|
def add_worker(&block)
|
20
|
-
worker =
|
31
|
+
worker = new_worker
|
21
32
|
@worker_mutex.synchronize do
|
22
33
|
@workers.push(worker)
|
23
34
|
if block_given?
|
@@ -41,9 +52,11 @@ class WorkQueue
|
|
41
52
|
@worker_mutex.synchronize do
|
42
53
|
worker = @workers.index{|w| w.pid == pid}
|
43
54
|
if worker
|
44
|
-
Log.low "Removed worker #{pid}"
|
45
55
|
@workers.delete_at(worker)
|
46
56
|
@removed_workers << pid
|
57
|
+
Log.low "Removed worker #{pid} from #{queue_id}"
|
58
|
+
else
|
59
|
+
Log.medium "Worker #{pid} not from #{queue_id}"
|
47
60
|
end
|
48
61
|
end
|
49
62
|
end
|
@@ -56,14 +69,14 @@ class WorkQueue
|
|
56
69
|
@reader = Thread.new(Thread.current) do |parent|
|
57
70
|
begin
|
58
71
|
Thread.current.report_on_exception = false
|
59
|
-
Thread.current["name"] = "Output reader #{
|
72
|
+
Thread.current["name"] = "Output reader #{queue_id}"
|
60
73
|
@done_workers ||= []
|
61
74
|
while true
|
62
75
|
obj = @output.read
|
63
76
|
if DoneProcessing === obj
|
64
77
|
|
65
78
|
done = @worker_mutex.synchronize do
|
66
|
-
Log.low "Worker #{obj.pid} done"
|
79
|
+
Log.low "Worker #{obj.pid} from #{queue_id} done"
|
67
80
|
@done_workers << obj.pid
|
68
81
|
@closed && @done_workers.length == @removed_workers.length + @workers.length
|
69
82
|
end
|
@@ -78,12 +91,12 @@ class WorkQueue
|
|
78
91
|
rescue DoneProcessing
|
79
92
|
rescue Aborted
|
80
93
|
rescue WorkerException
|
81
|
-
Log.error "Exception in worker #{obj.pid} in queue #{
|
94
|
+
Log.error "Exception in worker #{obj.pid} in queue #{queue_id}: #{obj.worker_exception.message}"
|
82
95
|
self.abort
|
83
96
|
@input.abort obj.worker_exception
|
84
97
|
raise obj.worker_exception
|
85
98
|
rescue
|
86
|
-
Log.error "Exception processing output in queue #{
|
99
|
+
Log.error "Exception processing output in queue #{queue_id}: #{$!.message}"
|
87
100
|
self.abort
|
88
101
|
raise $!
|
89
102
|
end
|
@@ -95,25 +108,19 @@ class WorkQueue
|
|
95
108
|
|
96
109
|
@waiter = Thread.new do
|
97
110
|
Thread.current.report_on_exception = false
|
98
|
-
Thread.current["name"] = "Worker waiter #{
|
111
|
+
Thread.current["name"] = "Worker waiter #{queue_id}"
|
99
112
|
while true
|
100
113
|
break if @worker_mutex.synchronize{ @workers.empty? }
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
remove_worker(pid) if pid
|
106
|
-
rescue Exception
|
107
|
-
Log.exception $!
|
108
|
-
end
|
109
|
-
end
|
110
|
-
rescue Timeout::Error
|
111
|
-
pids = @worker_mutex.synchronize{ @workers.collect{|w| w.pid } }
|
112
|
-
pids.each do |p|
|
113
|
-
pid, status = Process.wait2 p, Process::WNOHANG
|
114
|
+
threads = @workers.collect do |w|
|
115
|
+
t = Thread.new do
|
116
|
+
Thread.current["name"] = "Worker waiter #{queue_id} worker #{w.pid}"
|
117
|
+
pid, status = Process.wait2 w.pid
|
114
118
|
remove_worker(pid) if pid
|
115
119
|
end
|
120
|
+
Thread.pass until t["name"]
|
121
|
+
t
|
116
122
|
end
|
123
|
+
threads.each do |t| t.join end
|
117
124
|
end
|
118
125
|
end
|
119
126
|
|
@@ -131,16 +138,23 @@ class WorkQueue
|
|
131
138
|
end
|
132
139
|
|
133
140
|
def abort
|
134
|
-
Log.low "Aborting #{@workers.length} workers in queue #{
|
141
|
+
Log.low "Aborting #{@workers.length} workers in queue #{queue_id}"
|
135
142
|
@worker_mutex.synchronize do
|
136
|
-
@workers.each
|
143
|
+
@workers.each do |w|
|
144
|
+
ScoutSemaphore.post_semaphore(@output.write_sem) if @output
|
145
|
+
ScoutSemaphore.post_semaphore(@input.read_sem) if @input
|
146
|
+
w.abort
|
147
|
+
end
|
137
148
|
end
|
138
149
|
end
|
139
150
|
|
140
151
|
def close
|
141
152
|
@closed = true
|
142
153
|
@worker_mutex.synchronize{ @workers.length }.times do
|
143
|
-
|
154
|
+
begin
|
155
|
+
@input.write DoneProcessing.new() unless @input.closed_write?
|
156
|
+
rescue IOError
|
157
|
+
end
|
144
158
|
end
|
145
159
|
end
|
146
160
|
|