genome-pipeline 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/Rakefile +12 -0
- data/certs/audy.cert +19 -0
- data/genome-pipeline.gemspec +2 -0
- data/lib/genome/pipeline/dummy_filter.rb +16 -0
- data/lib/genome/pipeline/filter.rb +4 -2
- data/lib/genome/pipeline/pipeline.rb +16 -1
- data/lib/genome/pipeline/prodigal_filter.rb +8 -14
- data/lib/genome/pipeline/version.rb +1 -1
- data/lib/genome/pipeline.rb +4 -1
- data/readme.md +49 -7
- data/spec/data/genome.fasta +0 -53851
- data/spec/pipeline_spec.rb +28 -0
- data/spec/prodigal_filter_spec.rb +4 -1
- data.tar.gz.sig +2 -0
- metadata +37 -3
- metadata.gz.sig +4 -0
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
pkg/
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/Rakefile
CHANGED
@@ -1,2 +1,14 @@
|
|
1
1
|
require 'bundler/gem_tasks'
|
2
2
|
require 'rake/testtask'
|
3
|
+
|
4
|
+
Bundler.require :development
|
5
|
+
|
6
|
+
namespace :docs do
|
7
|
+
YARD::Rake::YardocTask.new do |t|
|
8
|
+
end
|
9
|
+
|
10
|
+
desc 'serve documentation with live reloading'
|
11
|
+
task :serve do
|
12
|
+
`bundle exec yard server --reload`
|
13
|
+
end
|
14
|
+
end
|
data/certs/audy.cert
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
-----BEGIN CERTIFICATE-----
|
2
|
+
MIIDIDCCAgigAwIBAgIBADANBgkqhkiG9w0BAQUFADA2MQowCAYDVQQDDAFfMRQw
|
3
|
+
EgYKCZImiZPyLGQBGRYEYWdkcjESMBAGCgmSJomT8ixkARkWAmNvMB4XDTE0MDcw
|
4
|
+
OTE5MDc1MVoXDTE1MDcwOTE5MDc1MVowNjEKMAgGA1UEAwwBXzEUMBIGCgmSJomT
|
5
|
+
8ixkARkWBGFnZHIxEjAQBgoJkiaJk/IsZAEZFgJjbzCCASIwDQYJKoZIhvcNAQEB
|
6
|
+
BQADggEPADCCAQoCggEBANcJT8ftI2ex4cCJGBTexorBoQTyVE5Zamdi51/zPNgU
|
7
|
+
1ew59izYGv8/JjMpUyTFdFbz8l7rlyeEssoz5Yf6gckxl+nlmiBxvILKu8W2ZjyF
|
8
|
+
a3ymCbyehr/1i+GPozZFyVKdIEIHLagAWej4Md2FsQDKwmaA/+5sSqu1b80R675e
|
9
|
+
Ae9o5G76GniNptiAC/QF/zYwXGcMJRJatYx1qIdK4rdZahlBaozTrI5Dl49yLOb/
|
10
|
+
fZKnRa6IZCL+DuXUViNTipPuOLvnUSPTzfnnw3dq+ybLpD2YppMFHR65gioidbeV
|
11
|
+
3rKgZau6mfzS7bne/m/SIjYTpPlYhHTExiJXhEJNzRUCAwEAAaM5MDcwCQYDVR0T
|
12
|
+
BAIwADAdBgNVHQ4EFgQUMP97xnvyjtlzDTrfwsWNNbFTIC0wCwYDVR0PBAQDAgSw
|
13
|
+
MA0GCSqGSIb3DQEBBQUAA4IBAQCuU9TA7hSQD7dytWLYfQ8S4uHreSdca4PRksxH
|
14
|
+
36iy8SpmynKIh83UxZH9Nr8xX3kVnHX9sFYqUeyQYrLsog+etwwD51C+taMRUzYU
|
15
|
+
y2ICMXGl9U9u/lecKj/kCOJE8bDhbVD9adm+ZKVqAoq0DGlJI4xarIxwzDCVZr3v
|
16
|
+
43LDK6ouZt7pt5TZ3wZbBsSYXcC4NVQHrxb+6YakULkyUFl6Ld2p6ID97hTOAHd2
|
17
|
+
b9RS3+P0pRzQyo2osQTnOup3ZAlfqtG90F/m5mSIvWeJQBwcZld9X2CvJ/DgSfvX
|
18
|
+
hArlAbz8jIiZ5FhHecQVQ6Q0sDljNdFh5N6SQp9afYfJ0Dlb
|
19
|
+
-----END CERTIFICATE-----
|
data/genome-pipeline.gemspec
CHANGED
@@ -15,6 +15,8 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.test_files = gem.files.grep(%r{^test})
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
|
18
|
+
gem.cert_chain = ['certs/audy.cert']
|
19
|
+
gem.signing_key = File.expand_path('~/.ssh/gem-private_key.pem') if $0 =~ /gem\z/
|
18
20
|
|
19
21
|
gem.post_install_message = <<msg
|
20
22
|
-------------------------------------------------
|
@@ -9,15 +9,17 @@ module Genome
|
|
9
9
|
attr_reader :result
|
10
10
|
|
11
11
|
def initialize genome
|
12
|
-
|
12
|
+
# store a deep copy of genome.
|
13
|
+
@genome = Marshal.load(Marshal.dump(genome))
|
13
14
|
end
|
14
15
|
|
15
16
|
def transform
|
17
|
+
@genome.features << @result
|
16
18
|
@genome
|
17
19
|
end
|
18
20
|
|
19
21
|
def inspect
|
20
|
-
"#<#{self.class} #{@result} results genome=#{@genome}>"
|
22
|
+
"#<#{self.class} #{@result.size} results genome=#{@genome}>"
|
21
23
|
end
|
22
24
|
|
23
25
|
end
|
@@ -1,6 +1,21 @@
|
|
1
1
|
module Genome
|
2
2
|
|
3
|
-
|
3
|
+
class Pipeline
|
4
|
+
|
5
|
+
attr_reader :steps
|
6
|
+
|
7
|
+
def initialize *steps
|
8
|
+
@steps = steps
|
9
|
+
end
|
10
|
+
|
11
|
+
def run genome
|
12
|
+
@steps.each do |step|
|
13
|
+
genome = step.new(genome).transform
|
14
|
+
end
|
15
|
+
return genome
|
16
|
+
end
|
17
|
+
|
18
|
+
|
4
19
|
end
|
5
20
|
|
6
21
|
end
|
@@ -1,28 +1,22 @@
|
|
1
|
-
require '
|
1
|
+
require 'open3'
|
2
2
|
|
3
3
|
module Genome
|
4
4
|
class Pipeline
|
5
5
|
|
6
6
|
class ProdigalFilter < Filter
|
7
7
|
|
8
|
-
attr_reader :result
|
9
|
-
|
10
8
|
def transform
|
9
|
+
@result = run_prodigal
|
10
|
+
super
|
11
|
+
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
# run prodigal
|
15
|
-
# read GFF and add add features to `genome`
|
13
|
+
def run_prodigal
|
16
14
|
@genome.fasta do |path|
|
17
|
-
|
15
|
+
stdin, stdout, stderr = Open3.popen3("prodigal -f gff -i #{path}")
|
16
|
+
Features.from_gff(stdout.readlines)
|
18
17
|
end
|
19
|
-
|
20
|
-
@result = Features.from_gff(out_file)
|
21
|
-
|
22
|
-
out_file.close
|
23
|
-
|
24
|
-
super
|
25
18
|
end
|
19
|
+
|
26
20
|
end
|
27
21
|
|
28
22
|
end
|
data/lib/genome/pipeline.rb
CHANGED
@@ -3,10 +3,13 @@ require 'bundler'
|
|
3
3
|
Bundler.require
|
4
4
|
|
5
5
|
module Genome
|
6
|
-
|
6
|
+
|
7
|
+
require 'genome/genome.rb'
|
8
|
+
require 'genome/pipeline/pipeline.rb'
|
7
9
|
|
8
10
|
class Pipeline
|
9
11
|
autoload :Filter, 'genome/pipeline/filter.rb'
|
12
|
+
autoload :DummyFilter, 'genome/pipeline/dummy_filter.rb'
|
10
13
|
autoload :ProdigalFilter, 'genome/pipeline/prodigal_filter.rb'
|
11
14
|
end
|
12
15
|
|
data/readme.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Genome::Pipeline
|
2
2
|
|
3
|
-
|
3
|
+
Genome processing filters and utilities
|
4
4
|
|
5
5
|
Inspired by [HTML::Pipeline](https://github.com/jch/html-pipeline).
|
6
6
|
|
@@ -13,34 +13,76 @@ pipeline = Genome::Pipeline.new Filter::PRODIGAL,
|
|
13
13
|
annotated_genome = pipeline[genome]
|
14
14
|
```
|
15
15
|
|
16
|
+
# Installation
|
17
|
+
|
18
|
+
With RubyGems:
|
19
|
+
|
20
|
+
```bash
|
21
|
+
$ gem install -P HighSecurity genome-pipeline
|
22
|
+
```
|
23
|
+
|
24
|
+
With Bundler:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
gem 'genome-pipeline', '~> 0.0.1'
|
28
|
+
```
|
29
|
+
|
16
30
|
## Filters
|
17
31
|
|
18
32
|
Pipelines are made up of filters. Here is a simple filter that predicts
|
19
33
|
amino-acid coding sequences using Prodigal
|
20
34
|
|
21
35
|
```ruby
|
22
|
-
require 'tempfile'
|
23
|
-
|
24
36
|
class ProdigalFilter < Filter
|
25
37
|
|
26
38
|
attr_reader :result
|
27
39
|
|
28
40
|
def transform
|
41
|
+
|
29
42
|
out_file = Tempfile.new 'prodigal'
|
30
43
|
|
31
44
|
# run prodigal
|
32
45
|
# read GFF and add add features to `genome`
|
33
|
-
@
|
34
|
-
|
35
|
-
|
46
|
+
@result =
|
47
|
+
@genome.fasta do |path|
|
48
|
+
stdin, stdout, stderr = Open3.popen3("prodigal -f gff -i #{path}")
|
49
|
+
Features.from_gff(stdout.readlines)
|
50
|
+
end
|
36
51
|
|
37
|
-
|
52
|
+
out_file.close
|
38
53
|
|
39
54
|
super
|
40
55
|
end
|
41
56
|
end
|
42
57
|
```
|
43
58
|
|
59
|
+
## List of Filters
|
60
|
+
|
61
|
+
### Gene Prediction
|
62
|
+
|
63
|
+
- Prodigal [XX]
|
64
|
+
|
65
|
+
### TODO
|
66
|
+
|
67
|
+
feature finding software:
|
68
|
+
|
69
|
+
(act on genome sequence)
|
70
|
+
|
71
|
+
- Augustus
|
72
|
+
- tRNAscan
|
73
|
+
- rnammer
|
74
|
+
- snap
|
75
|
+
- prokka
|
76
|
+
- barrnap
|
77
|
+
- ARAGORN
|
78
|
+
- repeatscout
|
79
|
+
- repeatmasker
|
80
|
+
- trf
|
81
|
+
|
82
|
+
Filters that act on features:
|
83
|
+
|
84
|
+
- Quality Filter (remove features below a confidence threshold)
|
85
|
+
|
44
86
|
## Genome Object
|
45
87
|
|
46
88
|
Genomes are currently read from FASTA files and stored as objects. My future
|