bio-vcf 0.9.0 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -3
- data/Gemfile.lock +44 -0
- data/README.md +151 -28
- data/VERSION +1 -1
- data/bin/bio-vcf +47 -15
- data/bio-vcf.gemspec +4 -21
- data/features/#cli.feature# +71 -0
- data/features/cli.feature +3 -3
- data/features/filter.feature +12 -0
- data/features/filter.feature~ +35 -0
- data/features/somaticsniper.feature +2 -0
- data/features/step_definitions/cli-feature.rb +5 -0
- data/features/step_definitions/somaticsniper.rb +8 -0
- data/lib/bio-vcf/pcows.rb +123 -36
- data/lib/bio-vcf/vcfgenotypefield.rb +1 -1
- data/lib/bio-vcf/vcfrecord.rb +21 -0
- data/lib/bio-vcf/vcfsample.rb +13 -0
- data/test/data/regression/eval_once-stderr.new +2 -1
- data/test/data/regression/eval_r.info.dp-stderr.new +8 -4
- data/test/data/regression/ifilter_s.dp-stderr.new +8 -4
- data/test/data/regression/pass1-stderr.new +8 -0
- data/test/data/regression/pass1.new +88 -0
- data/test/data/regression/pass1.ref +88 -0
- data/test/data/regression/r.info.dp-stderr.new +8 -4
- data/test/data/regression/rewrite.info.sample-stderr.new +8 -4
- data/test/data/regression/s.dp-stderr.new +8 -4
- data/test/data/regression/seval_s.dp-stderr.new +8 -4
- data/test/data/regression/sfilter_seval_s.dp-stderr.new +8 -4
- data/test/data/regression/thread4-stderr.new +8 -4
- data/test/data/regression/thread4_4-stderr.new +44 -15
- data/test/data/regression/vcf2json_full_header-stderr.new +8 -4
- data/test/data/regression/vcf2json_use_meta-stderr.new +8 -4
- data/test/stress/stress_test.sh +15 -0
- data/test/stress/stress_test.sh~ +8 -0
- metadata +14 -5
data/bio-vcf.gemspec
CHANGED
@@ -3,12 +3,12 @@
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "bio-vcf"
|
6
|
-
s.version =
|
6
|
+
s.version = File.read("VERSION")
|
7
7
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
9
9
|
s.authors = ["Pjotr Prins"]
|
10
10
|
# s.date = "2015-12-28"
|
11
|
-
s.description = "Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting"
|
11
|
+
s.description = "Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting (JSON, RDF etc.)"
|
12
12
|
s.email = "pjotr.public01@thebird.nl"
|
13
13
|
s.executables = ["bio-vcf"]
|
14
14
|
s.extra_rdoc_files = [
|
@@ -35,25 +35,8 @@ Gem::Specification.new do |s|
|
|
35
35
|
s.licenses = ["MIT"]
|
36
36
|
s.require_paths = ["lib"]
|
37
37
|
s.required_ruby_version = Gem::Requirement.new(">= 2.0.0")
|
38
|
-
s.rubygems_version = "2.0.3"
|
39
|
-
s.summary = "Fast multi-threaded VCF parser"
|
38
|
+
# s.rubygems_version = "2.0.3"
|
39
|
+
s.summary = "Fast multi-purpose multi-threaded VCF parser"
|
40
40
|
|
41
|
-
# if s.respond_to? :specification_version then
|
42
|
-
# s.specification_version = 4
|
43
|
-
|
44
|
-
# if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
45
|
-
# s.add_development_dependency(%q<rspec>, [">= 2.14.0"])
|
46
|
-
# s.add_development_dependency(%q<cucumber>, [">= 1.3.11"])
|
47
|
-
# s.add_development_dependency(%q<regressiontest>, [">= 0.0.3"])
|
48
|
-
# else
|
49
|
-
# s.add_dependency(%q<rspec>, [">= 2.14.0"])
|
50
|
-
# s.add_dependency(%q<cucumber>, [">= 1.3.11"])
|
51
|
-
# s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
|
52
|
-
# end
|
53
|
-
# else
|
54
|
-
# s.add_dependency(%q<rspec>, [">= 2.14.0"])
|
55
|
-
# s.add_dependency(%q<cucumber>, [">= 1.3.11"])
|
56
|
-
# s.add_dependency(%q<regressiontest>, [">= 0.0.3"])
|
57
|
-
# end
|
58
41
|
end
|
59
42
|
|
@@ -0,0 +1,71 @@
|
|
1
|
+
@cli
|
2
|
+
Feature: Command-line interface (CLI)
|
3
|
+
|
4
|
+
bio-vcf has a powerful command line interface. Here we regression test features.
|
5
|
+
|
6
|
+
Scenario: Test the info filter using dp
|
7
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
8
|
+
When I execute "./bin/bio-vcf -i --filter 'r.info.dp>100'"
|
9
|
+
Then I expect the named output to match the named output "r.info.dp"
|
10
|
+
|
11
|
+
Scenario: Test the info filter using dp and threads
|
12
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
13
|
+
When I execute "./bin/bio-vcf -i --num-threads 4 --filter 'r.info.dp>2'"
|
14
|
+
Then I expect the named output to match the named output "thread4"
|
15
|
+
|
16
|
+
Scenario: Test the info filter using dp and threads with lines
|
17
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
18
|
+
When I execute "./bin/bio-vcf -i --num-threads 4 --thread-lines 4 --filter 'r.info.dp>2'"
|
19
|
+
Then I expect the named output to match the named output "thread4_4"
|
20
|
+
|
21
|
+
Scenario: Test the sample filter using dp
|
22
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
23
|
+
When I execute "./bin/bio-vcf -i --sfilter 's.dp>20'"
|
24
|
+
Then I expect the named output to match the named output "s.dp"
|
25
|
+
|
26
|
+
Scenario: Test the include sample filter using dp
|
27
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
28
|
+
When I execute "./bin/bio-vcf -i --ifilter 's.dp>100' --seval s.dp"
|
29
|
+
Then I expect the named output to match the named output "ifilter_s.dp"
|
30
|
+
|
31
|
+
Scenario: Test the info eval using dp
|
32
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
33
|
+
When I execute "./bin/bio-vcf -i --eval 'r.info.dp'"
|
34
|
+
Then I expect the named output to match the named output "eval_r.info.dp"
|
35
|
+
|
36
|
+
Scenario: Test the sample eval using dp
|
37
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
38
|
+
When I execute "./bin/bio-vcf -i --seval 's.dp'"
|
39
|
+
Then I expect the named output to match the named output "seval_s.dp"
|
40
|
+
|
41
|
+
Scenario: Test the sample filter + eval using dp
|
42
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
43
|
+
When I execute "./bin/bio-vcf -i --sfilter 's.dp>10' --seval 's.dp'"
|
44
|
+
Then I expect the named output to match the named output "sfilter_seval_s.dp"
|
45
|
+
|
46
|
+
Scenario: Rewrite an info field
|
47
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
48
|
+
When I execute "./bin/bio-vcf --rewrite rec.info[\'sample\']=\'XXXXX\'"
|
49
|
+
Then I expect the named output to match the named output "rewrite.info.sample"
|
50
|
+
|
51
|
+
Scenario: Test eval-once
|
52
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
53
|
+
When I execute "./bin/bio-vcf --eval-once header.meta[\'GATKCommandLine\']"
|
54
|
+
Then I expect the named output to match the named output "eval_once"
|
55
|
+
|
56
|
+
Scenario: Test JSON output with header meta data
|
57
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
58
|
+
When I execute "./bin/bio-vcf --template template/vcf2json_full_header.erb"
|
59
|
+
Then I expect the named output to match the named output "vcf2json_full_header"
|
60
|
+
|
61
|
+
Scenario: Test JSON output with header meta data and query samples
|
62
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
63
|
+
When I execute "./bin/bio-vcf --template template/vcf2json_use_meta.erb"
|
64
|
+
Then I expect the named output to match the named output "vcf2json_use_meta"
|
65
|
+
|
66
|
+
Scenario: Test deadlock on failed filter with threads
|
67
|
+
Given I have input file(s) named "test/data/input/multisample.vcf"
|
68
|
+
When I execute "./bin/bio-vcf -q --timeout 4 --num-threads 4 --thread-lines 4 --filter 't.info.dp>2'"
|
69
|
+
Then I expect an error and the named output to match the named output "thread4_4_failed_filter" in under 30 seconds
|
70
|
+
|
71
|
+
|
data/features/cli.feature
CHANGED
@@ -11,12 +11,12 @@ Feature: Command-line interface (CLI)
|
|
11
11
|
Scenario: Test the info filter using dp and threads
|
12
12
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
13
13
|
When I execute "./bin/bio-vcf -i --num-threads 4 --filter 'r.info.dp>2'"
|
14
|
-
Then I expect the named output to match the named output "thread4"
|
14
|
+
Then I expect the named output to match the named output "thread4" in under 30 seconds
|
15
15
|
|
16
16
|
Scenario: Test the info filter using dp and threads with lines
|
17
17
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
18
18
|
When I execute "./bin/bio-vcf -i --num-threads 4 --thread-lines 4 --filter 'r.info.dp>2'"
|
19
|
-
Then I expect the named output to match the named output "thread4_4"
|
19
|
+
Then I expect the named output to match the named output "thread4_4" in under 30 seconds
|
20
20
|
|
21
21
|
Scenario: Test the sample filter using dp
|
22
22
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
@@ -65,7 +65,7 @@ Feature: Command-line interface (CLI)
|
|
65
65
|
|
66
66
|
Scenario: Test deadlock on failed filter with threads
|
67
67
|
Given I have input file(s) named "test/data/input/multisample.vcf"
|
68
|
-
When I execute "./bin/bio-vcf -q --timeout
|
68
|
+
When I execute "./bin/bio-vcf -q --timeout 4 --num-threads 4 --thread-lines 4 --filter 't.info.dp>2'"
|
69
69
|
Then I expect an error and the named output to match the named output "thread4_4_failed_filter" in under 30 seconds
|
70
70
|
|
71
71
|
|
@@ -0,0 +1,12 @@
|
|
1
|
+
@filter
|
2
|
+
Feature: Adding filters
|
3
|
+
|
4
|
+
bio-vcf can add soft filters. Rather than removing failing items we can
|
5
|
+
inject filter state into the FILTER field. To add state such as PASS or
|
6
|
+
LowDepth simply use a filter and the --set-filter switch. If a filter already
|
7
|
+
has state the new one is appended with a semi-colon.
|
8
|
+
|
9
|
+
Scenario: Test the info filter using dp and threads
|
10
|
+
Given I have input file(s) named "test/data/input/somaticsniper.vcf"
|
11
|
+
When I execute "./bin/bio-vcf --add-filter PASS --filter 'r.normal.dp>5 and r.tumor.dp>7'"
|
12
|
+
Then I expect the named output to match the named output "pass1"
|
@@ -0,0 +1,35 @@
|
|
1
|
+
@meta
|
2
|
+
Feature: Parsing VCF meta information from the header
|
3
|
+
|
4
|
+
Take a header and parse that information as defined by the VCF standard.
|
5
|
+
|
6
|
+
Scenario: When parsing a header line
|
7
|
+
|
8
|
+
Given the VCF header lines
|
9
|
+
"""
|
10
|
+
##fileformat=VCFv4.1
|
11
|
+
##fileDate=20140121
|
12
|
+
##phasing=none
|
13
|
+
##reference=file:///data/GENOMES/human_GATK_GRCh37/GRCh37_gatk.fasta
|
14
|
+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
|
15
|
+
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth">
|
16
|
+
##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
|
17
|
+
##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
|
18
|
+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR
|
19
|
+
"""
|
20
|
+
When I parse the VCF header
|
21
|
+
Then I expect vcf.columns to be [CHROM','POS','ID','REF','ALT','QUAL','FILTER','INFO','FORMAT','NORMAL','TUMOR']
|
22
|
+
And I expect vcf.fileformat to be "VCFv4.1"
|
23
|
+
And I expect vcf.fileDate to be "20140121"
|
24
|
+
And I expect vcf.field['fileDate'] to be "20140121"
|
25
|
+
And I expect vcf.phasing to be "none"
|
26
|
+
And I expect vcf.reference to be "file:///data/GENOMES/human_GATK_GRCh37/GRCh37_gatk.fasta"
|
27
|
+
And I expect vcf.format['GT'] to be {"ID"=>"GT", "Number"=>"1", "Type"=>"String", "Description"=>"Genotype"}
|
28
|
+
And I expect vcf.format['DP'] to be {"ID"=>"DP", "Number"=>"1", "Type"=>"Integer", "Description"=>"Total read depth"}
|
29
|
+
And I expect vcf.format['DP4'] to be {"ID"=>"DP4", "Number"=>"4", "Type"=>"Integer", "Description"=>"# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"}
|
30
|
+
And I expect vcf.info['PM'] to be {"ID"=>"PM", "Number"=>"0", "Type"=>"Flag", "Description"=>"Variant is Precious(Clinical,Pubmed Cited)"}'
|
31
|
+
And I expect vcf.meta to contain all header meta information
|
32
|
+
|
33
|
+
Scenario: When parsing the header of somatic_sniper.vcf
|
34
|
+
|
35
|
+
Do something
|
@@ -46,6 +46,8 @@ Feature: VCF for Somatic Sniper
|
|
46
46
|
And I expect rec.tumor.amq.to_ary to be [37,37]
|
47
47
|
And I expect rec.tumor.mq to be 37
|
48
48
|
And I expect rec.tumor.ss to be 2
|
49
|
+
And I expect rec.tumor.ssc to be 33
|
50
|
+
And I expect rec.normal.ssc to be nil
|
49
51
|
# The following are additional functions
|
50
52
|
And I expect rec.call_diff to be [-4,2,-2,0]
|
51
53
|
And I expect rec.call_nuc to be "C"
|
@@ -11,6 +11,11 @@ Then(/^I expect the named output to match the named output "(.*?)"$/) do |arg1|
|
|
11
11
|
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(##BioVcf|date|"version":)').should be_truthy
|
12
12
|
end
|
13
13
|
|
14
|
+
Then(/^I expect the named output to match the named output "([^"]*)" in under (\d+) seconds$/) do |arg1, arg2|
|
15
|
+
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(##BioVcf|date|"version":)',timeout: arg2.to_i).should be_truthy
|
16
|
+
end
|
17
|
+
|
18
|
+
|
14
19
|
Then(/^I expect an error and the named output to match the named output "(.*?)" in under (\d+) seconds$/) do |arg1,arg2|
|
15
20
|
RegressionTest::CliExec::exec(@cmd,arg1,ignore: '(FATAL|Waiting|from|vcf|Options|Final pid)',should_fail: true,timeout:arg2.to_i).should be_truthy
|
16
21
|
end
|
@@ -99,6 +99,14 @@ Then(/^I expect rec\.tumor\.ss to be (\d+)$/) do |arg1|
|
|
99
99
|
end
|
100
100
|
|
101
101
|
|
102
|
+
Then(/^I expect rec\.tumor\.ssc to be (\d+)$/) do |arg1|
|
103
|
+
expect(@rec.tumor.ssc).to be 33
|
104
|
+
end
|
105
|
+
|
106
|
+
Then(/^I expect rec\.normal\.ssc to be nil$/) do
|
107
|
+
expect(@rec.normal.ssc).to be nil
|
108
|
+
end
|
109
|
+
|
102
110
|
Then(/^I expect rec.call_diff to be \[(\-\d+),(\d+),(\-\d+),(\d+)\]$/) do |arg1, arg2, arg3, arg4|
|
103
111
|
expect(@rec.call_diff).to eq [arg1.to_i,arg2.to_i,arg3.to_i,arg4.to_i]
|
104
112
|
end
|
data/lib/bio-vcf/pcows.rb
CHANGED
@@ -4,25 +4,31 @@ require 'tempfile'
|
|
4
4
|
|
5
5
|
class PCOWS
|
6
6
|
|
7
|
-
RUNNINGEXT = 'part'
|
7
|
+
RUNNINGEXT = 'part' # file extension
|
8
8
|
|
9
|
-
def initialize(num_threads,name=File.basename(__FILE__),timeout=180)
|
9
|
+
def initialize(num_threads,chunk_size,name=File.basename(__FILE__),timeout=180,quiet=false,debug=false)
|
10
10
|
num_threads = cpu_count() if not num_threads # FIXME: set to cpu_num by default
|
11
11
|
# $stderr.print "Using ",num_threads,"threads \n"
|
12
12
|
@num_threads = num_threads
|
13
|
+
@chunk_size = chunk_size
|
13
14
|
@pid_list = []
|
14
15
|
@name = name
|
15
16
|
@timeout = timeout
|
17
|
+
@quiet = quiet
|
18
|
+
@debug = debug
|
19
|
+
if @debug
|
20
|
+
$stderr.print "PCOWS running in DEBUG MODE\n"
|
21
|
+
end
|
16
22
|
if multi_threaded
|
17
23
|
@tmpdir = Dir::mktmpdir(@name+'_')
|
18
24
|
end
|
19
25
|
@last_output = 0 # counter
|
20
|
-
@output_locked =
|
26
|
+
@output_locked = false
|
21
27
|
end
|
22
28
|
|
23
|
-
# Feed the worker func and state to COWS. Note that func is a
|
24
|
-
# closure so it can pick up surrounding scope at invocation
|
25
|
-
# addition to the data captured in 'state'.
|
29
|
+
# Feed the worker 'func and state' to COWS. Note that func is a
|
30
|
+
# lambda closure so it can pick up surrounding scope at invocation
|
31
|
+
# in addition to the data captured in 'state'.
|
26
32
|
|
27
33
|
def submit_worker(func,state)
|
28
34
|
pid = nil
|
@@ -36,17 +42,30 @@ class PCOWS
|
|
36
42
|
func.call(state).each { | line | print line }
|
37
43
|
STDOUT.flush
|
38
44
|
STDOUT.close
|
45
|
+
# sleep 0.1
|
46
|
+
# f.flush
|
47
|
+
# f.close
|
48
|
+
# sleep 0.2 # interval to make sure we are done writing,
|
49
|
+
# otherwise there may be misses at the end of a
|
50
|
+
# block (maybe the f.close fixed it)
|
51
|
+
|
39
52
|
FileUtils::mv(tempfn,fn)
|
40
|
-
exit
|
53
|
+
exit(0)
|
41
54
|
end
|
55
|
+
Process.detach(pid)
|
42
56
|
else
|
43
|
-
# ----
|
57
|
+
# ---- Single threaded: call in main process and output immediately
|
44
58
|
func.call(state).each { | line | print line }
|
45
59
|
end
|
46
60
|
@pid_list << [ pid,count,fn ]
|
47
61
|
return true
|
48
62
|
end
|
49
63
|
|
64
|
+
def submit_final_worker(func,state)
|
65
|
+
@final_worker = true
|
66
|
+
submit_worker(func,state)
|
67
|
+
end
|
68
|
+
|
50
69
|
# Make sure no more than num_threads are running at the same time -
|
51
70
|
# this is achieved by checking the PID table and the running files
|
52
71
|
# in the tmpdir
|
@@ -54,7 +73,7 @@ class PCOWS
|
|
54
73
|
def wait_for_worker_slot()
|
55
74
|
return if single_threaded
|
56
75
|
Timeout.timeout(@timeout) do
|
57
|
-
|
76
|
+
printed_timeout_message = false
|
58
77
|
while true
|
59
78
|
# ---- count running pids
|
60
79
|
running = @pid_list.reduce(0) do | sum, info |
|
@@ -66,9 +85,11 @@ class PCOWS
|
|
66
85
|
end
|
67
86
|
end
|
68
87
|
return if running < @num_threads
|
69
|
-
|
70
|
-
|
71
|
-
|
88
|
+
if not printed_timeout_message
|
89
|
+
$stderr.print "Waiting for slot (timeout=#{@timeout})\n" if not @quiet
|
90
|
+
printed_timeout_message = true
|
91
|
+
end
|
92
|
+
sleep 0.1
|
72
93
|
end
|
73
94
|
end
|
74
95
|
end
|
@@ -81,8 +102,7 @@ class PCOWS
|
|
81
102
|
#
|
82
103
|
# In this implementation type==:by_line will call func for
|
83
104
|
# each line. Otherwise it is called once with the filename.
|
84
|
-
|
85
|
-
def process_output(func=nil,type = :by_line, blocking=false)
|
105
|
+
def process_output(func=nil,type=:by_line, blocking=false)
|
86
106
|
return if single_threaded
|
87
107
|
output = lambda { |fn|
|
88
108
|
if type == :by_line
|
@@ -92,53 +112,87 @@ class PCOWS
|
|
92
112
|
else
|
93
113
|
func.call(fn)
|
94
114
|
end
|
95
|
-
File.unlink(fn)
|
96
115
|
}
|
97
116
|
if @output_locked
|
117
|
+
# ---- is the other thread still running?
|
98
118
|
(pid,count,fn) = @output_locked
|
99
|
-
|
100
|
-
#
|
101
|
-
|
102
|
-
|
119
|
+
$stderr.print "Checking for output_lock on existing #{fn}\n" if not @quiet
|
120
|
+
return if File.exist?(fn) # continue because thread still processing
|
121
|
+
# Now we should remove the .keep file
|
122
|
+
if not @debug
|
123
|
+
sleep 0.1 # give it a little time
|
124
|
+
keep = fn+'.keep'
|
125
|
+
if File.exist?(keep)
|
126
|
+
$stderr.print "Removing #{keep}\n" if not @quiet
|
127
|
+
File.unlink(keep)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
@last_output += 1 # get next one in line
|
131
|
+
@output_locked = false
|
103
132
|
end
|
133
|
+
# Still processing
|
104
134
|
if info = @pid_list[@last_output]
|
105
135
|
(pid,count,fn) = info
|
106
|
-
$stderr.print "
|
136
|
+
$stderr.print "Testing for output file ",[info],"\n" if @debug
|
107
137
|
if File.exist?(fn)
|
108
138
|
# Yes! We have the next output, create outputter
|
139
|
+
@output_locked = info
|
140
|
+
$stderr.print "Set lock on ",[info],"\n" if not @quiet
|
109
141
|
if not blocking
|
142
|
+
$stderr.print "Processing output file #{fn} (non-blocking)\n" if not @quiet
|
110
143
|
pid = fork do
|
111
144
|
output.call(fn)
|
145
|
+
FileUtils::mv(fn,fn+'.keep')
|
146
|
+
# if not @debug
|
147
|
+
# $stderr.print "Removing #{fn}\n" if not @quiet
|
148
|
+
# File.unlink(fn)
|
149
|
+
# else
|
150
|
+
# FileUtils::mv(fn,fn+'.keep')
|
151
|
+
# end
|
152
|
+
|
112
153
|
exit(0)
|
113
154
|
end
|
114
|
-
|
155
|
+
Process.detach(pid)
|
115
156
|
else
|
157
|
+
$stderr.print "Processing output file #{fn} (blocking)\n" if not @quiet
|
116
158
|
output.call(fn)
|
159
|
+
if not @debug
|
160
|
+
$stderr.print "Removing #{fn}\n" if not @quiet
|
161
|
+
File.unlink(fn)
|
162
|
+
else
|
163
|
+
FileUtils::mv(fn,fn+'.keep')
|
164
|
+
end
|
117
165
|
end
|
166
|
+
else
|
167
|
+
sleep 0.2
|
118
168
|
end
|
119
169
|
end
|
120
170
|
end
|
121
171
|
|
172
|
+
# Wait for a worker slot to appear. When working the pid is writing
|
173
|
+
# a file with extension .part(ial). After completion the file is
|
174
|
+
# renamed without .part and a slot is free.
|
122
175
|
def wait_for_worker(info)
|
123
176
|
(pid,count,fn) = info
|
124
177
|
if pid_or_file_running?(pid,fn)
|
125
|
-
$stderr.print "Waiting up to #{@timeout} seconds for pid=#{pid} to complete\n"
|
178
|
+
$stderr.print "Waiting up to #{@timeout} seconds for pid=#{pid} to complete #{fn}\n" if not @quiet
|
126
179
|
begin
|
127
180
|
Timeout.timeout(@timeout) do
|
128
181
|
while not File.exist?(fn) # wait for the result to appear
|
129
182
|
sleep 0.2
|
183
|
+
return if not pid_or_file_running?(pid,fn) # worker is gone
|
130
184
|
end
|
131
185
|
end
|
132
|
-
#
|
133
|
-
raise "FATAL: child process appears to have crashed #{fn}" if not File.exist?(fn)
|
134
|
-
$stderr.print "OK pid=#{pid}, processing #{fn}\n"
|
186
|
+
# Partial file should have been renamed:
|
187
|
+
raise "FATAL: child process #{pid} appears to have crashed #{fn}" if not File.exist?(fn)
|
188
|
+
$stderr.print "OK pid=#{pid}, processing output of #{fn}\n" if not @quiet
|
135
189
|
rescue Timeout::Error
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
190
|
+
# Kill it to speed up exit
|
191
|
+
Process.kill 9, pid
|
192
|
+
Process.wait pid
|
193
|
+
$stderr.print "FATAL: child process killed because it stopped responding, pid = #{pid}, fn = #{fn}, count = #{count}\n"
|
194
|
+
$stderr.print "Bailing out"
|
195
|
+
raise
|
142
196
|
end
|
143
197
|
end
|
144
198
|
end
|
@@ -155,15 +209,41 @@ class PCOWS
|
|
155
209
|
|
156
210
|
def process_remaining_output()
|
157
211
|
return if single_threaded
|
212
|
+
$stderr.print "Processing remaining output...\n" if not @quiet
|
158
213
|
while @output_locked
|
159
214
|
sleep 0.2
|
160
|
-
process_output()
|
215
|
+
process_output() # keep trying
|
161
216
|
end
|
162
217
|
@pid_list.each do |info|
|
163
|
-
|
218
|
+
(pid,count,fn) = info
|
219
|
+
while pid_or_file_running?(pid,fn) or File.exist?(fn)
|
220
|
+
$stderr.print "Trying: ",[info],"\n" if not @quiet
|
221
|
+
process_output(nil,:by_line,true)
|
222
|
+
sleep 0.2
|
223
|
+
end
|
224
|
+
end
|
225
|
+
cleanup_tmpdir()
|
226
|
+
end
|
227
|
+
|
228
|
+
def cleanup()
|
229
|
+
@pid_list.each do |info|
|
230
|
+
(pid,count,fn) = info
|
231
|
+
if pid_running?(pid)
|
232
|
+
$stderr.print "Killing child ",[info],"\n"
|
233
|
+
begin
|
234
|
+
Process.kill 9, pid
|
235
|
+
Process.wait pid
|
236
|
+
rescue Errno::ENOENT
|
237
|
+
$stdout.puts "INFO: #{pidfile} did not exist: Errno::ENOENT" if not @quiet
|
238
|
+
rescue Errno::ESRCH
|
239
|
+
$stdout.puts "INFO: The process #{opid} did not exist: Errno::ESRCH" if not @quiet
|
240
|
+
end
|
241
|
+
end
|
242
|
+
File.unlink(fn) if File.exist?(fn)
|
243
|
+
tempfn = fn+'.'+RUNNINGEXT
|
244
|
+
File.unlink(tempfn) if File.exist?(tempfn)
|
164
245
|
end
|
165
|
-
|
166
|
-
Dir.unlink(@tmpdir) if @tmpdir
|
246
|
+
cleanup_tmpdir()
|
167
247
|
end
|
168
248
|
|
169
249
|
private
|
@@ -203,8 +283,15 @@ class PCOWS
|
|
203
283
|
# Count on MAC
|
204
284
|
return Integer `sysctl -n hw.ncpu 2>/dev/null`
|
205
285
|
end
|
206
|
-
$stderr.print "Could not determine number of CPUs"
|
286
|
+
$stderr.print "Could not determine number of CPUs" if not @quiet
|
207
287
|
1
|
208
288
|
end
|
209
289
|
|
290
|
+
def cleanup_tmpdir
|
291
|
+
if not @debug
|
292
|
+
$stderr.print "Removing dir #{@tmpdir}\n" if not @quiet
|
293
|
+
Dir.unlink(@tmpdir) if @tmpdir
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
210
297
|
end
|