coutinho_assembly 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +43 -0
- data/LICENSE.txt +21 -0
- data/README.md +43 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/coutinho_assembly.gemspec +44 -0
- data/exe/coutinho_assembly +241 -0
- data/lib/coutinho_assembly/version.rb +13 -0
- data/lib/coutinho_assembly.rb +169 -0
- metadata +130 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 75e264a04b8ea87cb37c4c5bc75ec979b1225f20
|
4
|
+
data.tar.gz: 6c3a4566c5537b5251e65b845635cbce66f74668
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7df3fd03583f50956691d7c971a1c5fec2b7e31ca5f3b801a43a1a4eaeb03c96b31e4655d6363a42773df4b24082557bb94eb5ab8cadd6940f7659275f63c0d3
|
7
|
+
data.tar.gz: 9909aa5ee252b2b276f581aa645af5fa5caab8469a17d935b5d6dba6d90c07947c1c8d021079e855012425ebfffdb3d656c9240b9660f90625134eb0934562f7
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
2
|
+
|
3
|
+
## Our Pledge
|
4
|
+
|
5
|
+
In the interest of fostering an open and welcoming environment, we as
|
6
|
+
contributors and maintainers pledge to making participation in our project and
|
7
|
+
our community a harassment-free experience for everyone, regardless of age, body
|
8
|
+
size, disability, ethnicity, gender identity and expression, level of experience,
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity and
|
10
|
+
orientation.
|
11
|
+
|
12
|
+
## Our Standards
|
13
|
+
|
14
|
+
Examples of behavior that contributes to creating a positive environment
|
15
|
+
include:
|
16
|
+
|
17
|
+
* Using welcoming and inclusive language
|
18
|
+
* Being respectful of differing viewpoints and experiences
|
19
|
+
* Gracefully accepting constructive criticism
|
20
|
+
* Focusing on what is best for the community
|
21
|
+
* Showing empathy towards other community members
|
22
|
+
|
23
|
+
Examples of unacceptable behavior by participants include:
|
24
|
+
|
25
|
+
* The use of sexualized language or imagery and unwelcome sexual attention or
|
26
|
+
advances
|
27
|
+
* Trolling, insulting/derogatory comments, and personal or political attacks
|
28
|
+
* Public or private harassment
|
29
|
+
* Publishing others' private information, such as a physical or electronic
|
30
|
+
address, without explicit permission
|
31
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
32
|
+
professional setting
|
33
|
+
|
34
|
+
## Our Responsibilities
|
35
|
+
|
36
|
+
Project maintainers are responsible for clarifying the standards of acceptable
|
37
|
+
behavior and are expected to take appropriate and fair corrective action in
|
38
|
+
response to any instances of unacceptable behavior.
|
39
|
+
|
40
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
41
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
42
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
43
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
44
|
+
threatening, offensive, or harmful.
|
45
|
+
|
46
|
+
## Scope
|
47
|
+
|
48
|
+
This Code of Conduct applies both within project spaces and in public spaces
|
49
|
+
when an individual is representing the project or its community. Examples of
|
50
|
+
representing a project or community include using an official project e-mail
|
51
|
+
address, posting via an official social media account, or acting as an appointed
|
52
|
+
representative at an online or offline event. Representation of a project may be
|
53
|
+
further defined and clarified by project maintainers.
|
54
|
+
|
55
|
+
## Enforcement
|
56
|
+
|
57
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
+
reported by contacting the project team at moorer@udel.edu. All
|
59
|
+
complaints will be reviewed and investigated and will result in a response that
|
60
|
+
is deemed necessary and appropriate to the circumstances. The project team is
|
61
|
+
obligated to maintain confidentiality with regard to the reporter of an incident.
|
62
|
+
Further details of specific enforcement policies may be posted separately.
|
63
|
+
|
64
|
+
Project maintainers who do not follow or enforce the Code of Conduct in good
|
65
|
+
faith may face temporary or permanent repercussions as determined by other
|
66
|
+
members of the project's leadership.
|
67
|
+
|
68
|
+
## Attribution
|
69
|
+
|
70
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
71
|
+
available at [http://contributor-covenant.org/version/1/4][version]
|
72
|
+
|
73
|
+
[homepage]: http://contributor-covenant.org
|
74
|
+
[version]: http://contributor-covenant.org/version/1/4/
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
coutinho_assembly (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
abort_if (0.2.0)
|
10
|
+
diff-lcs (1.3)
|
11
|
+
optimist (3.0.0)
|
12
|
+
rake (10.5.0)
|
13
|
+
rspec (3.8.0)
|
14
|
+
rspec-core (~> 3.8.0)
|
15
|
+
rspec-expectations (~> 3.8.0)
|
16
|
+
rspec-mocks (~> 3.8.0)
|
17
|
+
rspec-core (3.8.0)
|
18
|
+
rspec-support (~> 3.8.0)
|
19
|
+
rspec-expectations (3.8.2)
|
20
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
21
|
+
rspec-support (~> 3.8.0)
|
22
|
+
rspec-mocks (3.8.0)
|
23
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
24
|
+
rspec-support (~> 3.8.0)
|
25
|
+
rspec-support (3.8.0)
|
26
|
+
rya (0.5.1)
|
27
|
+
abort_if (~> 0.2.0)
|
28
|
+
systemu (~> 2.6, >= 2.6.5)
|
29
|
+
systemu (2.6.5)
|
30
|
+
|
31
|
+
PLATFORMS
|
32
|
+
ruby
|
33
|
+
|
34
|
+
DEPENDENCIES
|
35
|
+
bundler (~> 2.0)
|
36
|
+
coutinho_assembly!
|
37
|
+
optimist (~> 3.0)
|
38
|
+
rake (~> 10.0)
|
39
|
+
rspec (~> 3.0)
|
40
|
+
rya (~> 0.5)
|
41
|
+
|
42
|
+
BUNDLED WITH
|
43
|
+
2.0.1
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2019 Ryan Moore
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# CoutinhoAssembly
|
2
|
+
|
3
|
+
Run Coutinho's Epic Assembly Pipeline!
|
4
|
+
|
5
|
+
It's based on Coutinho's epic 2017 paper: https://doi.org/10.1038/ncomms15955
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'coutinho_assembly'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install coutinho_assembly
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
TODO: Write usage instructions here
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/mooreryan/coutinho_assembly. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
36
|
+
|
37
|
+
## License
|
38
|
+
|
39
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
40
|
+
|
41
|
+
## Code of Conduct
|
42
|
+
|
43
|
+
Everyone interacting in the CoutinhoAssembly project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/mooreryan/coutinho_assembly/blob/master/CODE_OF_CONDUCT.md).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "coutinho_assembly"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "coutinho_assembly/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "coutinho_assembly"
|
8
|
+
spec.version = CoutinhoAssembly::VERSION
|
9
|
+
spec.authors = ["Ryan Moore"]
|
10
|
+
spec.email = ["moorer@udel.edu"]
|
11
|
+
|
12
|
+
spec.summary = %q{Run Coutinho's assembly pipeline.}
|
13
|
+
spec.description = %q{Run Coutinho's assembly pipeline.}
|
14
|
+
spec.homepage = "https://github.com/mooreryan/coutinho_assembly"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
21
|
+
#
|
22
|
+
# spec.metadata["homepage_uri"] = spec.homepage
|
23
|
+
# spec.metadata["source_code_uri"] = "TODO: Put your gem's public repo URL here."
|
24
|
+
# spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
|
25
|
+
# else
|
26
|
+
# raise "RubyGems 2.0 or newer is required to protect against " \
|
27
|
+
# "public gem pushes."
|
28
|
+
# end
|
29
|
+
|
30
|
+
# Specify which files should be added to the gem when it is released.
|
31
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
32
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
33
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
34
|
+
end
|
35
|
+
spec.bindir = "exe"
|
36
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
37
|
+
spec.require_paths = ["lib"]
|
38
|
+
|
39
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
40
|
+
spec.add_development_dependency "optimist", "~> 3.0"
|
41
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
42
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
43
|
+
spec.add_development_dependency "rya", "~> 0.5"
|
44
|
+
end
|
@@ -0,0 +1,241 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
Signal.trap("SIGPIPE", "EXIT")
|
4
|
+
|
5
|
+
require "coutinho_assembly"
|
6
|
+
require "rya"
|
7
|
+
require "optimist"
|
8
|
+
require "pp"
|
9
|
+
require "fileutils"
|
10
|
+
|
11
|
+
# TODO ideally, if megahit fails, try and restart it with the --continue option rather than restart it from the beginning each time.
|
12
|
+
|
13
|
+
Process.extend Rya::CoreExtensions::Process
|
14
|
+
|
15
|
+
ASSEMBLY_PRESET = "fast"
|
16
|
+
|
17
|
+
opts = Optimist.options do
|
18
|
+
version CoutinhoAssembly::VERSION_BANNER
|
19
|
+
|
20
|
+
banner <<-EOS
|
21
|
+
|
22
|
+
#{CoutinhoAssembly::VERSION_BANNER}
|
23
|
+
|
24
|
+
Run Coutinho's Epic Assembly Pipeline!
|
25
|
+
|
26
|
+
It's based on Coutinho's epic 2017 paper: https://doi.org/10.1038/ncomms15955
|
27
|
+
|
28
|
+
You need forward, reverse, and single currently. I'll fix that at some point.
|
29
|
+
|
30
|
+
--take can be used if some of you jobs failed and you want to add a prefix to the new ones, like "take_2".
|
31
|
+
|
32
|
+
--max-attempts is the number of retries any external program gets before we just skip it and go on to the next step.
|
33
|
+
|
34
|
+
--sampling-percentage and --num-subsamples go together. So, --sampling-percentage 1 5 10 --num-subsamples 50 35 25 would mean 50 1% subsamples, 35 5% subsamples, and 25 10% subsamples.
|
35
|
+
|
36
|
+
For the binary program options, either provide the path to the binary or the name of the program if the program is already on your path.
|
37
|
+
|
38
|
+
Options:
|
39
|
+
EOS
|
40
|
+
|
41
|
+
# Input reads
|
42
|
+
opt(:forward_reads,
|
43
|
+
"Forward reads",
|
44
|
+
type: :string,
|
45
|
+
short: "-f")
|
46
|
+
opt(:reverse_reads,
|
47
|
+
"Reverse reads",
|
48
|
+
type: :string,
|
49
|
+
short: "-r")
|
50
|
+
opt(:single_reads,
|
51
|
+
"Unpaired reads",
|
52
|
+
type: :string,
|
53
|
+
short: "-s")
|
54
|
+
|
55
|
+
# Output options
|
56
|
+
opt(:out_dir,
|
57
|
+
"Output directory",
|
58
|
+
default: "coutinho_assembly")
|
59
|
+
opt(:take,
|
60
|
+
"If you want to prefix your contig files with something",
|
61
|
+
type: :string,
|
62
|
+
default: "take_1")
|
63
|
+
|
64
|
+
# Sampling options
|
65
|
+
opt(:sampling_percentage,
|
66
|
+
"What percent(s) do you want to subsample?",
|
67
|
+
default: [1, 5, 10, 25, 50, 75])
|
68
|
+
opt(:num_subsamples,
|
69
|
+
"How many subsamples for each level?",
|
70
|
+
default: [50, 50, 25, 15, 10, 5])
|
71
|
+
|
72
|
+
# Pipeline options
|
73
|
+
opt(:num_threads,
|
74
|
+
"Number of threads to use",
|
75
|
+
default: 1)
|
76
|
+
opt(:max_attempts,
|
77
|
+
"Max no. retries before giving up on a pipeline step.",
|
78
|
+
default: 10)
|
79
|
+
|
80
|
+
# External programs I depend on
|
81
|
+
opt(:megahit_binary,
|
82
|
+
"Path to megahit binary",
|
83
|
+
default: "~/bin/megahit")
|
84
|
+
opt(:sample_seqs_binary,
|
85
|
+
"Path to sample_seqs binary",
|
86
|
+
default: "~/bin/sample_seqs")
|
87
|
+
opt(:zip_binary,
|
88
|
+
"Path to zipping program (e.g., pigz, gzip, bzip2",
|
89
|
+
default: "pigz")
|
90
|
+
end
|
91
|
+
|
92
|
+
Runners = Class.new { extend CoutinhoAssembly::Runners }
|
93
|
+
MegahitRunners = Class.new { extend CoutinhoAssembly::Runners::Megahit }
|
94
|
+
|
95
|
+
forward_reads = opts[:forward_reads]
|
96
|
+
reverse_reads = opts[:reverse_reads]
|
97
|
+
single_reads = opts[:single_reads]
|
98
|
+
|
99
|
+
Rya::AbortIf.abort_unless File.exist?(forward_reads),
|
100
|
+
"--forward-reads arg does not exist"
|
101
|
+
Rya::AbortIf.abort_unless File.exist?(reverse_reads),
|
102
|
+
"--reverse-reads arg does not exist"
|
103
|
+
Rya::AbortIf.abort_unless File.exist?(single_reads),
|
104
|
+
"--single-reads arg does not exist"
|
105
|
+
|
106
|
+
take = opts[:take]
|
107
|
+
|
108
|
+
out_dir = opts[:out_dir]
|
109
|
+
FileUtils.mkdir_p out_dir
|
110
|
+
|
111
|
+
subsample_dir = File.join out_dir, "subsamples"
|
112
|
+
|
113
|
+
assembly_dir = File.join out_dir, "assembly_info"
|
114
|
+
FileUtils.mkdir_p assembly_dir
|
115
|
+
|
116
|
+
contigs_dir = File.join out_dir, "final_contigs"
|
117
|
+
FileUtils.mkdir_p contigs_dir
|
118
|
+
|
119
|
+
sampling_percentage = opts[:sampling_percentage]
|
120
|
+
num_subsamples = opts[:num_subsamples]
|
121
|
+
|
122
|
+
sampling_info = sampling_percentage.zip num_subsamples
|
123
|
+
|
124
|
+
Rya::AbortIf.abort_if sampling_percentage.any? { |perc| perc < 1 || perc > 99 },
|
125
|
+
"--sampling-percentage must be between 1 and 99"
|
126
|
+
|
127
|
+
Rya::AbortIf.abort_if num_subsamples.any? { |num| num < 1 },
|
128
|
+
"--num-subsamples must be at least 1"
|
129
|
+
|
130
|
+
Rya::AbortIf.abort_unless sampling_percentage.count == num_subsamples.count,
|
131
|
+
"--sampling-percentage and --num-subsamples must have the same number of items"
|
132
|
+
|
133
|
+
num_threads = opts[:num_threads]
|
134
|
+
max_attempts = opts[:max_attempts]
|
135
|
+
|
136
|
+
Rya::AbortIf.abort_if num_threads < 1,
|
137
|
+
"--num-threads must be at least 1"
|
138
|
+
Rya::AbortIf.abort_if max_attempts < 1,
|
139
|
+
"--max-attempts must be at least 1"
|
140
|
+
|
141
|
+
megahit_binary = opts[:megahit_binary]
|
142
|
+
sample_seqs_binary = opts[:sample_seqs_binary]
|
143
|
+
zip_binary = opts[:zip_binary]
|
144
|
+
|
145
|
+
# TODO check binaries
|
146
|
+
|
147
|
+
|
148
|
+
num_attempts = 0
|
149
|
+
|
150
|
+
sampling_info.each do |(percent, num_samples)|
|
151
|
+
# First, get the subsamples. The out_dir is subsample_dir, the file format is like subsample_dir/percent_NN.sample_M.{1,2,U}.fq
|
152
|
+
sample_seqs_out = nil
|
153
|
+
Process.time_it "Sampling reads", Rya::AbortIf.logger do
|
154
|
+
begin
|
155
|
+
num_attempts = Process.run_until_success max_attempts do
|
156
|
+
sample_seqs_out =
|
157
|
+
Runners.run_sample_seqs exe: sample_seqs_binary,
|
158
|
+
forward_reads: forward_reads,
|
159
|
+
reverse_reads: reverse_reads,
|
160
|
+
single_reads: single_reads,
|
161
|
+
out_dir: subsample_dir,
|
162
|
+
sampling_percentage: percent,
|
163
|
+
num_subsamples: num_samples
|
164
|
+
|
165
|
+
end
|
166
|
+
rescue Rya::MaxAttemptsExceededError => err
|
167
|
+
Rya::AbortIf.logger.fatal "Couldn't sample reads after #{num_attempts}"
|
168
|
+
|
169
|
+
exit 1
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# Now run the assemblies
|
174
|
+
contig_files = []
|
175
|
+
sample_seqs_out.outputs[:subsample_file_names].each do |sample_num, fnames|
|
176
|
+
Rya::AbortIf.logger.info { "Assembling sample #{sample_num}" }
|
177
|
+
|
178
|
+
out_prefix = sprintf "percent_%02d.sample_%d", percent, sample_num
|
179
|
+
sample_assembly_dir = File.join assembly_dir, out_prefix
|
180
|
+
|
181
|
+
if take
|
182
|
+
out_prefix = "#{take}.#{out_prefix}"
|
183
|
+
end
|
184
|
+
|
185
|
+
megahit_output = nil
|
186
|
+
Process.time_it "Running megahit", Rya::AbortIf.logger do
|
187
|
+
begin
|
188
|
+
num_attempts = Process.run_until_success max_attempts do
|
189
|
+
megahit_output =
|
190
|
+
MegahitRunners.run exe: megahit_binary,
|
191
|
+
forward_reads: fnames[:forward_reads],
|
192
|
+
reverse_reads: fnames[:reverse_reads],
|
193
|
+
single_reads: fnames[:single_reads],
|
194
|
+
out_dir: sample_assembly_dir,
|
195
|
+
out_prefix: out_prefix,
|
196
|
+
num_threads: num_threads,
|
197
|
+
preset: ASSEMBLY_PRESET
|
198
|
+
end
|
199
|
+
rescue Rya::MaxAttemptsExceededError => err
|
200
|
+
Rya::AbortIf.logger.error "Couldn't complete assembly #{out_prefix} after #{num_attempts}. You will have to rerun it manually."
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
# Only do the cleanup if the assembly succefully completed.
|
205
|
+
if megahit_output.exitstatus.zero?
|
206
|
+
Process.time_it "Cleaning up assembly out directory", Rya::AbortIf.logger do
|
207
|
+
begin
|
208
|
+
num_attempts = Process.run_until_success max_attempts do
|
209
|
+
MegahitRunners.clean_up_out_dir zip_binary: zip_binary,
|
210
|
+
assembly_dir: sample_assembly_dir,
|
211
|
+
num_threads: num_threads
|
212
|
+
end
|
213
|
+
rescue Rya::MaxAttemptsExceededError => err
|
214
|
+
Rya::AbortIf.logger.error "Couldn't complete assembly directory cleanup for assembly #{out_prefix} after #{num_attempts}. You will have to rerun it manually."
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# Also add the contig files to the container.
|
219
|
+
contig_files << megahit_output.outputs[:final_contigs]
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
# Clean up....
|
224
|
+
|
225
|
+
# Because we aren't sure if the zipping succeeded or not, we need to account for the fact that there might be .gz or .bz2 or something on the end of the contig file names.
|
226
|
+
all_contig_files = []
|
227
|
+
contig_files.each do |fname|
|
228
|
+
# Turn it into a glob
|
229
|
+
names = Dir.glob "#{fname}*"
|
230
|
+
|
231
|
+
names.each do |name|
|
232
|
+
all_contig_files << name
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
# Move all the final contigs files into a single directory
|
237
|
+
FileUtils.mv all_contig_files.compact, contigs_dir
|
238
|
+
|
239
|
+
# Remove subsamples
|
240
|
+
FileUtils.rm_r subsample_dir if Dir.exist?(subsample_dir)
|
241
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module CoutinhoAssembly
|
2
|
+
VERSION = "0.1.0"
|
3
|
+
|
4
|
+
COPYRIGHT = "2019 Ryan Moore"
|
5
|
+
CONTACT = "moorer@udel.edu"
|
6
|
+
WEBSITE = "https://github.com/mooreryan/coutinho_assembly"
|
7
|
+
LICENSE = "MIT"
|
8
|
+
|
9
|
+
VERSION_BANNER = "# Version: #{VERSION}
|
10
|
+
# Copyright: #{COPYRIGHT}
|
11
|
+
# Contact: #{CONTACT}
|
12
|
+
# License: #{LICENSE}"
|
13
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
require "coutinho_assembly/version"
|
2
|
+
|
3
|
+
module CoutinhoAssembly
|
4
|
+
class Error < StandardError;
|
5
|
+
end
|
6
|
+
|
7
|
+
class RunnerExit
|
8
|
+
# outputs is a hash table with info about outdirs and outfiles from the process.
|
9
|
+
attr_accessor :proc_status, :exitstatus, :outputs
|
10
|
+
|
11
|
+
def initialize proc_status, exitstatus, outputs
|
12
|
+
@proc_status = proc_status
|
13
|
+
@exitstatus = exitstatus
|
14
|
+
@outputs = outputs
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
module Runners
|
19
|
+
module Megahit
|
20
|
+
def log_diagnostic_files assembly_dir, assembly_prefix
|
21
|
+
megahit_opts_fname = File.join assembly_dir, "opts.txt"
|
22
|
+
megahit_log_fname = File.join assembly_dir, "#{assembly_prefix}.log"
|
23
|
+
|
24
|
+
[megahit_opts_fname, megahit_log_fname].each do |fname|
|
25
|
+
if File.exist? fname
|
26
|
+
contents = File.open(fname, "rt").read.chomp
|
27
|
+
|
28
|
+
Rya::AbortIf.logger.error { contents }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Retries once with continue then cleans up after itself so it can be restarted with a wrapper.
|
34
|
+
def run(exe:,
|
35
|
+
|
36
|
+
forward_reads: nil,
|
37
|
+
reverse_reads: nil,
|
38
|
+
single_reads: nil,
|
39
|
+
|
40
|
+
out_dir: nil,
|
41
|
+
out_prefix: nil,
|
42
|
+
|
43
|
+
num_threads: 1,
|
44
|
+
preset: nil)
|
45
|
+
|
46
|
+
cmd = "#{exe} " \
|
47
|
+
"--num-cpu-threads #{num_threads} " \
|
48
|
+
"--out-dir #{out_dir} " \
|
49
|
+
"-1 #{forward_reads} " \
|
50
|
+
"-2 #{reverse_reads} " \
|
51
|
+
"-r #{single_reads}"
|
52
|
+
|
53
|
+
# Add the optional opts
|
54
|
+
|
55
|
+
if out_prefix
|
56
|
+
cmd += " --out-prefix #{out_prefix}"
|
57
|
+
end
|
58
|
+
|
59
|
+
if preset == "meta-sensitive"
|
60
|
+
cmd += " --presets meta-sensitive"
|
61
|
+
elsif preset == "fast"
|
62
|
+
cmd += " --k-list 21"
|
63
|
+
end
|
64
|
+
|
65
|
+
# Run the initial assembly
|
66
|
+
proc_status = Process.run_it cmd
|
67
|
+
|
68
|
+
# We check if the assembly finished successfully.
|
69
|
+
unless proc_status.exitstatus.zero?
|
70
|
+
# The assembly failed D:
|
71
|
+
# Try it again with continue.
|
72
|
+
cmd += " --continue"
|
73
|
+
|
74
|
+
# Since megahit has a checkpoint continue mode, if we can save the assembly by trying once more with --continue, it will save time.
|
75
|
+
proc_status = Process.run_it cmd
|
76
|
+
end
|
77
|
+
|
78
|
+
# Now we check if the checkpoint assembly failed as well
|
79
|
+
unless proc_status.exitstatus.zero?
|
80
|
+
# First, we want to dump the megahit opts and log files into the log for the coutinho_assembly program.
|
81
|
+
log_diagnostic_files out_dir, out_prefix
|
82
|
+
|
83
|
+
# Since it failed, we want to remove the output directory, because the retry wrapper function will always fail if you try and use the same assembly directory name.
|
84
|
+
FileUtils.rm_r out_dir if Dir.exist? out_dir
|
85
|
+
|
86
|
+
# Now that we've got the logs and removed the outdir, the runner wrapper method can cleanly rerun this function.
|
87
|
+
end
|
88
|
+
|
89
|
+
outputs = {
|
90
|
+
final_contigs: File.join(out_dir, "#{out_prefix}.contigs.fa")
|
91
|
+
}
|
92
|
+
|
93
|
+
# Return whichever proc_status was the last one to be set, either original assembly or the continued assembly.
|
94
|
+
CoutinhoAssembly::RunnerExit.new proc_status, proc_status.exitstatus, outputs
|
95
|
+
end
|
96
|
+
|
97
|
+
# Removes the intermediate contigs and zips the final contigs. This is meant to be run on a completed assembly out dir.
|
98
|
+
def clean_up_out_dir(zip_binary: nil,
|
99
|
+
assembly_dir: nil,
|
100
|
+
num_threads: nil)
|
101
|
+
|
102
|
+
int_contig_dir = File.join assembly_dir, "intermediate_contigs"
|
103
|
+
|
104
|
+
# Remove the intermediate contigs
|
105
|
+
FileUtils.rm_r int_contig_dir if Dir.exist? int_contig_dir
|
106
|
+
|
107
|
+
contig_glob = File.join assembly_dir, "*.contigs.fa"
|
108
|
+
|
109
|
+
if zip_binary == "pigz"
|
110
|
+
cmd = "#{zip_binary} -p #{num_threads} #{contig_glob}"
|
111
|
+
else
|
112
|
+
cmd = "#{zip_binary} #{contig_glob}"
|
113
|
+
end
|
114
|
+
|
115
|
+
# Zip the contigs file
|
116
|
+
Process.run_it cmd
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def run_sample_seqs(exe:,
|
121
|
+
forward_reads:,
|
122
|
+
reverse_reads:,
|
123
|
+
single_reads:,
|
124
|
+
|
125
|
+
out_dir:,
|
126
|
+
out_prefix: nil,
|
127
|
+
|
128
|
+
sampling_percentage:,
|
129
|
+
num_subsamples:,
|
130
|
+
random_seed: nil)
|
131
|
+
|
132
|
+
unless out_prefix
|
133
|
+
# Zero pad the left for single digits.
|
134
|
+
# TODO maybe use 3? Will you ever take a 100% subsample?
|
135
|
+
out_prefix = sprintf "percent_%02d", sampling_percentage
|
136
|
+
end
|
137
|
+
|
138
|
+
# TODO if we want to make the 1 2 or s reads optional, we'll need to NOT pass those params to this program (it doesn't handle nil inputs)
|
139
|
+
# TODO not passing in the random seed at all yet
|
140
|
+
cmd = "#{exe} " \
|
141
|
+
"-1 #{forward_reads} " \
|
142
|
+
"-2 #{reverse_reads} " \
|
143
|
+
"-s #{single_reads} " \
|
144
|
+
"-p #{sampling_percentage} " \
|
145
|
+
"-n #{num_subsamples} " \
|
146
|
+
"-o #{out_dir} " \
|
147
|
+
"-b #{out_prefix}"
|
148
|
+
|
149
|
+
subsample_file_names = {}
|
150
|
+
|
151
|
+
num_subsamples.times do |sample_num|
|
152
|
+
subsample_file_names[sample_num] = {
|
153
|
+
forward_reads: File.join(out_dir, "#{out_prefix}.sample_#{sample_num}.1.fq"),
|
154
|
+
reverse_reads: File.join(out_dir, "#{out_prefix}.sample_#{sample_num}.2.fq"),
|
155
|
+
single_reads: File.join(out_dir, "#{out_prefix}.sample_#{sample_num}.U.fq")
|
156
|
+
}
|
157
|
+
end
|
158
|
+
|
159
|
+
outputs = {
|
160
|
+
out_dir: out_dir,
|
161
|
+
subsample_file_names: subsample_file_names
|
162
|
+
}
|
163
|
+
|
164
|
+
proc_status = Process.run_it cmd
|
165
|
+
|
166
|
+
CoutinhoAssembly::RunnerExit.new proc_status, proc_status.exitstatus, outputs
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
metadata
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: coutinho_assembly
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ryan Moore
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-01-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: optimist
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rya
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.5'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.5'
|
83
|
+
description: Run Coutinho's assembly pipeline.
|
84
|
+
email:
|
85
|
+
- moorer@udel.edu
|
86
|
+
executables:
|
87
|
+
- coutinho_assembly
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- ".gitignore"
|
92
|
+
- ".rspec"
|
93
|
+
- ".travis.yml"
|
94
|
+
- CODE_OF_CONDUCT.md
|
95
|
+
- Gemfile
|
96
|
+
- Gemfile.lock
|
97
|
+
- LICENSE.txt
|
98
|
+
- README.md
|
99
|
+
- Rakefile
|
100
|
+
- bin/console
|
101
|
+
- bin/setup
|
102
|
+
- coutinho_assembly.gemspec
|
103
|
+
- exe/coutinho_assembly
|
104
|
+
- lib/coutinho_assembly.rb
|
105
|
+
- lib/coutinho_assembly/version.rb
|
106
|
+
homepage: https://github.com/mooreryan/coutinho_assembly
|
107
|
+
licenses:
|
108
|
+
- MIT
|
109
|
+
metadata: {}
|
110
|
+
post_install_message:
|
111
|
+
rdoc_options: []
|
112
|
+
require_paths:
|
113
|
+
- lib
|
114
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: '0'
|
119
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
requirements: []
|
125
|
+
rubyforge_project:
|
126
|
+
rubygems_version: 2.6.14
|
127
|
+
signing_key:
|
128
|
+
specification_version: 4
|
129
|
+
summary: Run Coutinho's assembly pipeline.
|
130
|
+
test_files: []
|