dirseq 0.2.0 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile +2 -2
- data/README.md +34 -12
- data/Rakefile +3 -3
- data/VERSION +1 -1
- data/bin/dirseq +27 -7
- data/spec/script_spec.rb +12 -12
- metadata +23 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d3d1019d1157dc1d53d0d802027b439d03d8cd0aeacc2670e0e7825be6a14f06
|
4
|
+
data.tar.gz: adc9eda6d50933442e3ee82a66757ce553e2a2aae7e92628c2ed5ed6c8304cc9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4bd8397bf9a8a6190d3467daf094c1909a71d90d7f0b349990291b5168954663f11ae54f971324bb7cf53663a5f4e48bbaa7722c3343ec0075f0f3634df263d9
|
7
|
+
data.tar.gz: 5e2dedd8e96ad9699fd949fd61a02a81c556285b2f850a268dbc7702a85f5863054da17fff0c208d3bb319139382e1770970294778ac1aaeb57796d25c5fbf2f
|
data/Gemfile
CHANGED
@@ -11,8 +11,8 @@ gem "bio", "~>1.4", ">=1.4.2"
|
|
11
11
|
group :development do
|
12
12
|
#gem "shoulda", "~> 3.5"
|
13
13
|
#gem "simplecov", "~> 0.8"
|
14
|
-
gem "
|
15
|
-
gem "bundler", "~> 1
|
14
|
+
gem "juwelier", "~> 2.4", ">=2.4.9"
|
15
|
+
gem "bundler", "~> 2.1"
|
16
16
|
gem "rspec", "~> 3.0"
|
17
17
|
gem 'pry', '~>0.10'
|
18
18
|
end
|
data/README.md
CHANGED
@@ -8,18 +8,34 @@ Note: this software is under active development!
|
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
11
|
-
|
11
|
+
Install some prerequisites via conda, and then dirseq itself:
|
12
12
|
```sh
|
13
|
+
conda create -c bioconda -n dirseq -y ruby samtools bedtools'>'2.24
|
14
|
+
conda activate dirseq
|
13
15
|
gem install dirseq
|
14
16
|
```
|
15
|
-
|
17
|
+
|
18
|
+
The following dependencies are installed above, but for completeness of documentation, dirseq requires these dependencies, on top of the Ruby ones:
|
16
19
|
* samtools (tested with 0.1.19 and 1.0+)
|
17
20
|
* bedtools (tested with 2.24.0) - old versions won't work.
|
18
21
|
* Ruby (tested with 2.1.1)
|
19
22
|
|
20
23
|
## Usage
|
21
24
|
|
22
|
-
|
25
|
+
Example usage:
|
26
|
+
|
27
|
+
Download the example data:
|
28
|
+
```sh
|
29
|
+
git clone https://github.com/wwood/dirseq
|
30
|
+
cd dirseq
|
31
|
+
```
|
32
|
+
|
33
|
+
Then run dirseq:
|
34
|
+
```sh
|
35
|
+
dirseq --bam spec/data/eg.bam --gff spec/data/eg.gff --measure-type count
|
36
|
+
```
|
37
|
+
|
38
|
+
Full usage help:
|
23
39
|
```sh
|
24
40
|
$ dirseq -h
|
25
41
|
|
@@ -32,8 +48,14 @@ $ dirseq -h
|
|
32
48
|
|
33
49
|
Optional parameters:
|
34
50
|
|
51
|
+
--forward-read-only consider only forward reads (i.e. read1) and ignore reverse reads. [default false]
|
35
52
|
--ignore-directions ignore directionality, give overall coverage [default: false i.e. differentiate between directions]
|
36
|
-
|
53
|
+
--measure-type TYPE what to count for each gene [options: count, coverage][default: coverage]
|
54
|
+
--accepted-feature-types TYPE
|
55
|
+
Print only features of these type(s) [default CDS]
|
56
|
+
--comment-fields Print elements from the comments in the GFF file [default ID]
|
57
|
+
--sam-filter-flags Apply these samtools filters [default: -F0x100 -F0x800]
|
58
|
+
|
37
59
|
Verbosity:
|
38
60
|
|
39
61
|
-q, --quiet Run quietly, set logging to ERROR level [default INFO]
|
@@ -41,6 +63,11 @@ Verbosity:
|
|
41
63
|
--trace options Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG
|
42
64
|
```
|
43
65
|
|
66
|
+
Running on [EnrichM](https://github.com/geronimp/enrichM) output, the output columns are changed relative to [PROKKA](https://github.com/tseemann/prokka)-generated GFF files:
|
67
|
+
```sh
|
68
|
+
dirseq --bam spec/data/eg.bam --gff spec/data/eg.gff --measure-type count --comment-fields seq_id,annotations
|
69
|
+
```
|
70
|
+
|
44
71
|
## Project home page
|
45
72
|
|
46
73
|
Information on the source tree, documentation, examples, issues and
|
@@ -52,16 +79,11 @@ The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
|
|
52
79
|
|
53
80
|
## Cite
|
54
81
|
|
55
|
-
If you use this software, please cite
|
56
|
-
|
57
|
-
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
58
|
-
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
59
|
-
|
60
|
-
## Biogems.info
|
82
|
+
If you use this software, please cite
|
61
83
|
|
62
|
-
|
84
|
+
Woodcroft, B.J., Singleton, C.M., Boyd, J.A. et al. Genome-centric view of carbon processing in thawing permafrost. Nature 560, 49–54 (2018). https://doi.org/10.1038/s41586-018-0338-1
|
63
85
|
|
64
86
|
## Copyright
|
65
87
|
|
66
|
-
Copyright (c) 2014 Ben J. Woodcroft. See LICENSE.txt for further details.
|
88
|
+
Copyright (c) 2014-2021 Ben J. Woodcroft. See LICENSE.txt for further details.
|
67
89
|
|
data/Rakefile
CHANGED
@@ -11,8 +11,8 @@ rescue Bundler::BundlerError => e
|
|
11
11
|
end
|
12
12
|
require 'rake'
|
13
13
|
|
14
|
-
require '
|
15
|
-
|
14
|
+
require 'juwelier'
|
15
|
+
Juwelier::Tasks.new do |gem|
|
16
16
|
# gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
|
17
17
|
gem.name = "dirseq"
|
18
18
|
gem.homepage = "http://github.com/wwood/dirseq"
|
@@ -23,7 +23,7 @@ Jeweler::Tasks.new do |gem|
|
|
23
23
|
gem.authors = ["Ben J. Woodcroft"]
|
24
24
|
# dependencies defined in Gemfile
|
25
25
|
end
|
26
|
-
|
26
|
+
Juwelier::RubygemsDotOrgTasks.new
|
27
27
|
|
28
28
|
require 'rspec/core'
|
29
29
|
require 'rspec/core/rake_task'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.3
|
data/bin/dirseq
CHANGED
@@ -24,6 +24,8 @@ options = {
|
|
24
24
|
:count_type => COVERAGE_COUNT_TYPE,
|
25
25
|
:forward_read_only => false,
|
26
26
|
:accepted_feature_types => ['CDS'],
|
27
|
+
:comment_fields_to_print => ['ID'],
|
28
|
+
:sam_filter_flags => "-F0x100 -F0x800",
|
27
29
|
}
|
28
30
|
o = OptionParser.new do |opts|
|
29
31
|
opts.banner = "
|
@@ -50,7 +52,14 @@ o = OptionParser.new do |opts|
|
|
50
52
|
end
|
51
53
|
opts.on("--accepted-feature-types TYPE", Array,
|
52
54
|
"Print only features of these type(s) [default #{options[:accepted_feature_types].join(',')}]") do |arg|
|
53
|
-
options[:accepted_feature_types] =
|
55
|
+
options[:accepted_feature_types] = Set.new(arg)
|
56
|
+
end
|
57
|
+
opts.on("--comment-fields COMMA_SEPARATED_FIELDS", Array,
|
58
|
+
"Print elements from the comments in the GFF file [default #{options[:comment_fields_to_print].join(',')}]") do |arg|
|
59
|
+
options[:comment_fields_to_print] = arg
|
60
|
+
end
|
61
|
+
opts.on("--sam-filter-flags", "Apply these samtools filters [default: #{options[:sam_filter_flags]}]") do |arg|
|
62
|
+
options[:sam_filter_flags] = arg
|
54
63
|
end
|
55
64
|
|
56
65
|
# logger options
|
@@ -146,7 +155,7 @@ end
|
|
146
155
|
|
147
156
|
chromosome_file = Tempfile.new('bam_contigs')
|
148
157
|
log.info "Listing contigs in sorted order .."
|
149
|
-
cmd = "samtools idxstats #{bam_file.inspect} |cut -f1,2 >#{chromosome_file.path.inspect}"
|
158
|
+
cmd = "samtools idxstats #{bam_file.inspect} |cut -f1,2 |grep -v '^*' >#{chromosome_file.path.inspect}"
|
150
159
|
Bio::Commandeer.run(cmd, :log => log)
|
151
160
|
|
152
161
|
log.info "Finding featureless contigs"
|
@@ -181,7 +190,7 @@ end
|
|
181
190
|
|
182
191
|
covs_fwd = nil
|
183
192
|
if options[:ignore_directions]
|
184
|
-
cmd1 = "
|
193
|
+
cmd1 = "samtools view -u #{options[:sam_filter_flags]} #{bam_file.inspect} |bedtools coverage -b /dev/stdin -a #{gff_file.inspect} -hist"
|
185
194
|
cov_lines_fwd = Bio::Commandeer.run cmd1, :log => log
|
186
195
|
log.info "Parsing coverage profiles"
|
187
196
|
covs_fwd = get_covs.call(cov_lines_fwd)
|
@@ -194,10 +203,10 @@ else
|
|
194
203
|
if options[:count_type] == COUNT_COUNT_TYPE
|
195
204
|
bedtools_type_flag = '-counts'
|
196
205
|
end
|
197
|
-
cmdf1 = "samtools view -u #{read1_flag} #{bam_file.inspect} |bedtools coverage -sorted -g #{chromosome_file.path.inspect} -b /dev/stdin -a #{sorted_gff_file.inspect} -s #{bedtools_type_flag}"
|
198
|
-
cmdf2 = "samtools view -u #{read2_flag} #{bam_file.inspect} |bedtools coverage -sorted -g #{chromosome_file.path.inspect} -b /dev/stdin -a #{sorted_gff_file.inspect} -s #{bedtools_type_flag}"
|
199
|
-
cmdr1 = "samtools view -u #{read1_flag} #{bam_file.inspect} |bedtools coverage -sorted -g #{chromosome_file.path.inspect} -b /dev/stdin -a #{sorted_gff_file.inspect} -S #{bedtools_type_flag}"
|
200
|
-
cmdr2 = "samtools view -u #{read2_flag} #{bam_file.inspect} |bedtools coverage -sorted -g #{chromosome_file.path.inspect} -b /dev/stdin -a #{sorted_gff_file.inspect} -S #{bedtools_type_flag}"
|
206
|
+
cmdf1 = "samtools view #{options[:sam_filter_flags]} -u #{read1_flag} #{bam_file.inspect} |bedtools coverage -sorted -g #{chromosome_file.path.inspect} -b /dev/stdin -a #{sorted_gff_file.inspect} -s #{bedtools_type_flag}"
|
207
|
+
cmdf2 = "samtools view #{options[:sam_filter_flags]} -u #{read2_flag} #{bam_file.inspect} |bedtools coverage -sorted -g #{chromosome_file.path.inspect} -b /dev/stdin -a #{sorted_gff_file.inspect} -s #{bedtools_type_flag}"
|
208
|
+
cmdr1 = "samtools view #{options[:sam_filter_flags]} -u #{read1_flag} #{bam_file.inspect} |bedtools coverage -sorted -g #{chromosome_file.path.inspect} -b /dev/stdin -a #{sorted_gff_file.inspect} -S #{bedtools_type_flag}"
|
209
|
+
cmdr2 = "samtools view #{options[:sam_filter_flags]} -u #{read2_flag} #{bam_file.inspect} |bedtools coverage -sorted -g #{chromosome_file.path.inspect} -b /dev/stdin -a #{sorted_gff_file.inspect} -S #{bedtools_type_flag}"
|
201
210
|
|
202
211
|
command_to_parsed = lambda do |cmds, name|
|
203
212
|
covs_lines_initial = cmds.collect do |cmd|
|
@@ -246,6 +255,9 @@ else
|
|
246
255
|
raise
|
247
256
|
end
|
248
257
|
headers.push 'annotation'
|
258
|
+
options[:comment_fields_to_print].each do |field|
|
259
|
+
headers.push field
|
260
|
+
end
|
249
261
|
puts headers.join("\t")
|
250
262
|
|
251
263
|
covs_fwd.each do |feature, cov_fwd|
|
@@ -268,5 +280,13 @@ covs_fwd.each do |feature, cov_fwd|
|
|
268
280
|
]
|
269
281
|
to_print.push cov_rev unless options[:ignore_directions]
|
270
282
|
to_print.push product
|
283
|
+
options[:comment_fields_to_print].each do |field|
|
284
|
+
answer1 = record.attributes.select{|a| a[0] == field}
|
285
|
+
if answer1.empty?
|
286
|
+
to_print.push ''
|
287
|
+
else
|
288
|
+
to_print.push answer1[0][1]
|
289
|
+
end
|
290
|
+
end
|
271
291
|
puts to_print.join("\t")
|
272
292
|
end
|
data/spec/script_spec.rb
CHANGED
@@ -6,9 +6,9 @@ describe 'script' do
|
|
6
6
|
|
7
7
|
it "should regular mode" do
|
8
8
|
answer = %w(
|
9
|
-
contig type start end strand forward_average_coverage reverse_average_coverage annotation
|
9
|
+
contig type start end strand forward_average_coverage reverse_average_coverage annotation ID
|
10
10
|
).join("\t")+"\n"+%w(
|
11
|
-
contig_100 CDS 2 127 + 0.0 1.1428571428571428 unannotated
|
11
|
+
contig_100 CDS 2 127 + 0.0 1.1428571428571428 unannotated 40_1
|
12
12
|
).join("\t")+"\n"
|
13
13
|
|
14
14
|
found = Bio::Commandeer.run "#{path_to_script} --bam #{data_dir}/eg.bam --gff #{data_dir}/eg.gff -q"
|
@@ -20,9 +20,9 @@ describe 'script' do
|
|
20
20
|
found = Bio::Commandeer.run "#{path_to_script} --bam #{data_dir}/eg.bam --gff #{data_dir}/eg.gff -q --ignore-direction"
|
21
21
|
|
22
22
|
answer = %w(
|
23
|
-
contig type start end strand average_coverage annotation
|
23
|
+
contig type start end strand average_coverage annotation ID
|
24
24
|
).join("\t")+"\n"+%w(
|
25
|
-
contig_100 CDS 2 127 + 1.1428571428571428 unannotated
|
25
|
+
contig_100 CDS 2 127 + 1.1428571428571428 unannotated 40_1
|
26
26
|
).join("\t")+"\n"
|
27
27
|
|
28
28
|
found.should == answer
|
@@ -30,9 +30,9 @@ describe 'script' do
|
|
30
30
|
|
31
31
|
it 'should not fail when the GFF has a FASTA section' do
|
32
32
|
answer = %w(
|
33
|
-
contig type start end strand forward_average_coverage reverse_average_coverage annotation
|
33
|
+
contig type start end strand forward_average_coverage reverse_average_coverage annotation ID
|
34
34
|
).join("\t")+"\n"+%w(
|
35
|
-
contig_100 CDS 2 127 + 0.0 1.1428571428571428 unannotated
|
35
|
+
contig_100 CDS 2 127 + 0.0 1.1428571428571428 unannotated 40_1
|
36
36
|
).join("\t")+"\n"
|
37
37
|
|
38
38
|
found = Bio::Commandeer.run "#{path_to_script} --bam #{data_dir}/eg.bam --gff #{data_dir}/eg_with_fasta.gff -q"
|
@@ -42,10 +42,10 @@ describe 'script' do
|
|
42
42
|
|
43
43
|
it 'should print annotation out properly' do
|
44
44
|
answer = %w(
|
45
|
-
contig type start end strand forward_average_coverage reverse_average_coverage annotation
|
45
|
+
contig type start end strand forward_average_coverage reverse_average_coverage annotation ID
|
46
46
|
).join("\t")+"\n"+%w(
|
47
47
|
contig_100 CDS 2 127 + 0.0 1.1428571428571428 putative
|
48
|
-
).join("\t")+" methyltransferase YcgJ\n"
|
48
|
+
).join("\t")+" methyltransferase YcgJ PROKKA_00001\n"
|
49
49
|
|
50
50
|
found = Bio::Commandeer.run "#{path_to_script} --bam #{data_dir}/eg.bam --gff #{data_dir}/realer.gff -q"
|
51
51
|
|
@@ -54,10 +54,10 @@ describe 'script' do
|
|
54
54
|
|
55
55
|
it 'should print counts correctly' do
|
56
56
|
answer = %w(
|
57
|
-
contig type start end strand forward_read_count reverse_read_count annotation
|
57
|
+
contig type start end strand forward_read_count reverse_read_count annotation ID
|
58
58
|
).join("\t")+"\n"+%w(
|
59
59
|
contig_100 CDS 2 127 + 0.0 2.0 putative
|
60
|
-
).join("\t")+" methyltransferase YcgJ\n"
|
60
|
+
).join("\t")+" methyltransferase YcgJ PROKKA_00001\n"
|
61
61
|
|
62
62
|
found = Bio::Commandeer.run "#{path_to_script} --bam #{data_dir}/eg.bam --gff #{data_dir}/realer.gff -q --measure-type count"
|
63
63
|
|
@@ -66,10 +66,10 @@ describe 'script' do
|
|
66
66
|
|
67
67
|
it 'should count only the forward read when asked' do
|
68
68
|
answer = %w(
|
69
|
-
contig type start end strand forward_read_count reverse_read_count annotation
|
69
|
+
contig type start end strand forward_read_count reverse_read_count annotation ID
|
70
70
|
).join("\t")+"\n"+%w(
|
71
71
|
contig_100 CDS 2 127 + 0.0 1.0 putative
|
72
|
-
).join("\t")+" methyltransferase YcgJ\n"
|
72
|
+
).join("\t")+" methyltransferase YcgJ PROKKA_00001\n"
|
73
73
|
|
74
74
|
found = Bio::Commandeer.run "#{path_to_script} --bam #{data_dir}/eg.bam --gff #{data_dir}/realer.gff -q --measure-type count --forward-read-only"
|
75
75
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dirseq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben J. Woodcroft
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio-commandeer
|
@@ -42,50 +42,56 @@ dependencies:
|
|
42
42
|
name: bio
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: 1.4.2
|
48
45
|
- - "~>"
|
49
46
|
- !ruby/object:Gem::Version
|
50
47
|
version: '1.4'
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: 1.4.2
|
51
51
|
type: :runtime
|
52
52
|
prerelease: false
|
53
53
|
version_requirements: !ruby/object:Gem::Requirement
|
54
54
|
requirements:
|
55
|
-
- - ">="
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
version: 1.4.2
|
58
55
|
- - "~>"
|
59
56
|
- !ruby/object:Gem::Version
|
60
57
|
version: '1.4'
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 1.4.2
|
61
61
|
- !ruby/object:Gem::Dependency
|
62
|
-
name:
|
62
|
+
name: juwelier
|
63
63
|
requirement: !ruby/object:Gem::Requirement
|
64
64
|
requirements:
|
65
65
|
- - "~>"
|
66
66
|
- !ruby/object:Gem::Version
|
67
|
-
version: '2.
|
67
|
+
version: '2.4'
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: 2.4.9
|
68
71
|
type: :development
|
69
72
|
prerelease: false
|
70
73
|
version_requirements: !ruby/object:Gem::Requirement
|
71
74
|
requirements:
|
72
75
|
- - "~>"
|
73
76
|
- !ruby/object:Gem::Version
|
74
|
-
version: '2.
|
77
|
+
version: '2.4'
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: 2.4.9
|
75
81
|
- !ruby/object:Gem::Dependency
|
76
82
|
name: bundler
|
77
83
|
requirement: !ruby/object:Gem::Requirement
|
78
84
|
requirements:
|
79
85
|
- - "~>"
|
80
86
|
- !ruby/object:Gem::Version
|
81
|
-
version: '1
|
87
|
+
version: '2.1'
|
82
88
|
type: :development
|
83
89
|
prerelease: false
|
84
90
|
version_requirements: !ruby/object:Gem::Requirement
|
85
91
|
requirements:
|
86
92
|
- - "~>"
|
87
93
|
- !ruby/object:Gem::Version
|
88
|
-
version: '1
|
94
|
+
version: '2.1'
|
89
95
|
- !ruby/object:Gem::Dependency
|
90
96
|
name: rspec
|
91
97
|
requirement: !ruby/object:Gem::Requirement
|
@@ -144,7 +150,7 @@ homepage: http://github.com/wwood/dirseq
|
|
144
150
|
licenses:
|
145
151
|
- MIT
|
146
152
|
metadata: {}
|
147
|
-
post_install_message:
|
153
|
+
post_install_message:
|
148
154
|
rdoc_options: []
|
149
155
|
require_paths:
|
150
156
|
- lib
|
@@ -159,9 +165,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
159
165
|
- !ruby/object:Gem::Version
|
160
166
|
version: '0'
|
161
167
|
requirements: []
|
162
|
-
|
163
|
-
|
164
|
-
signing_key:
|
168
|
+
rubygems_version: 3.1.2
|
169
|
+
signing_key:
|
165
170
|
specification_version: 4
|
166
171
|
summary: FPKG calculator for metatranscriptomics
|
167
172
|
test_files: []
|