genome-pipeline 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/Rakefile +12 -0
- data/certs/audy.cert +19 -0
- data/genome-pipeline.gemspec +2 -0
- data/lib/genome/pipeline/dummy_filter.rb +16 -0
- data/lib/genome/pipeline/filter.rb +4 -2
- data/lib/genome/pipeline/pipeline.rb +16 -1
- data/lib/genome/pipeline/prodigal_filter.rb +8 -14
- data/lib/genome/pipeline/version.rb +1 -1
- data/lib/genome/pipeline.rb +4 -1
- data/readme.md +49 -7
- data/spec/data/genome.fasta +0 -53851
- data/spec/pipeline_spec.rb +28 -0
- data/spec/prodigal_filter_spec.rb +4 -1
- data.tar.gz.sig +2 -0
- metadata +37 -3
- metadata.gz.sig +4 -0
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
pkg/
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/Rakefile
CHANGED
@@ -1,2 +1,14 @@
|
|
1
1
|
require 'bundler/gem_tasks'
|
2
2
|
require 'rake/testtask'
|
3
|
+
|
4
|
+
Bundler.require :development
|
5
|
+
|
6
|
+
namespace :docs do
|
7
|
+
YARD::Rake::YardocTask.new do |t|
|
8
|
+
end
|
9
|
+
|
10
|
+
desc 'serve documentation with live reloading'
|
11
|
+
task :serve do
|
12
|
+
`bundle exec yard server --reload`
|
13
|
+
end
|
14
|
+
end
|
data/certs/audy.cert
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
-----BEGIN CERTIFICATE-----
|
2
|
+
MIIDIDCCAgigAwIBAgIBADANBgkqhkiG9w0BAQUFADA2MQowCAYDVQQDDAFfMRQw
|
3
|
+
EgYKCZImiZPyLGQBGRYEYWdkcjESMBAGCgmSJomT8ixkARkWAmNvMB4XDTE0MDcw
|
4
|
+
OTE5MDc1MVoXDTE1MDcwOTE5MDc1MVowNjEKMAgGA1UEAwwBXzEUMBIGCgmSJomT
|
5
|
+
8ixkARkWBGFnZHIxEjAQBgoJkiaJk/IsZAEZFgJjbzCCASIwDQYJKoZIhvcNAQEB
|
6
|
+
BQADggEPADCCAQoCggEBANcJT8ftI2ex4cCJGBTexorBoQTyVE5Zamdi51/zPNgU
|
7
|
+
1ew59izYGv8/JjMpUyTFdFbz8l7rlyeEssoz5Yf6gckxl+nlmiBxvILKu8W2ZjyF
|
8
|
+
a3ymCbyehr/1i+GPozZFyVKdIEIHLagAWej4Md2FsQDKwmaA/+5sSqu1b80R675e
|
9
|
+
Ae9o5G76GniNptiAC/QF/zYwXGcMJRJatYx1qIdK4rdZahlBaozTrI5Dl49yLOb/
|
10
|
+
fZKnRa6IZCL+DuXUViNTipPuOLvnUSPTzfnnw3dq+ybLpD2YppMFHR65gioidbeV
|
11
|
+
3rKgZau6mfzS7bne/m/SIjYTpPlYhHTExiJXhEJNzRUCAwEAAaM5MDcwCQYDVR0T
|
12
|
+
BAIwADAdBgNVHQ4EFgQUMP97xnvyjtlzDTrfwsWNNbFTIC0wCwYDVR0PBAQDAgSw
|
13
|
+
MA0GCSqGSIb3DQEBBQUAA4IBAQCuU9TA7hSQD7dytWLYfQ8S4uHreSdca4PRksxH
|
14
|
+
36iy8SpmynKIh83UxZH9Nr8xX3kVnHX9sFYqUeyQYrLsog+etwwD51C+taMRUzYU
|
15
|
+
y2ICMXGl9U9u/lecKj/kCOJE8bDhbVD9adm+ZKVqAoq0DGlJI4xarIxwzDCVZr3v
|
16
|
+
43LDK6ouZt7pt5TZ3wZbBsSYXcC4NVQHrxb+6YakULkyUFl6Ld2p6ID97hTOAHd2
|
17
|
+
b9RS3+P0pRzQyo2osQTnOup3ZAlfqtG90F/m5mSIvWeJQBwcZld9X2CvJ/DgSfvX
|
18
|
+
hArlAbz8jIiZ5FhHecQVQ6Q0sDljNdFh5N6SQp9afYfJ0Dlb
|
19
|
+
-----END CERTIFICATE-----
|
data/genome-pipeline.gemspec
CHANGED
@@ -15,6 +15,8 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.test_files = gem.files.grep(%r{^test})
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
|
18
|
+
gem.cert_chain = ['certs/audy.cert']
|
19
|
+
gem.signing_key = File.expand_path('~/.ssh/gem-private_key.pem') if $0 =~ /gem\z/
|
18
20
|
|
19
21
|
gem.post_install_message = <<msg
|
20
22
|
-------------------------------------------------
|
@@ -9,15 +9,17 @@ module Genome
|
|
9
9
|
attr_reader :result
|
10
10
|
|
11
11
|
def initialize genome
|
12
|
-
|
12
|
+
# store a deep copy of genome.
|
13
|
+
@genome = Marshal.load(Marshal.dump(genome))
|
13
14
|
end
|
14
15
|
|
15
16
|
def transform
|
17
|
+
@genome.features << @result
|
16
18
|
@genome
|
17
19
|
end
|
18
20
|
|
19
21
|
def inspect
|
20
|
-
"#<#{self.class} #{@result} results genome=#{@genome}>"
|
22
|
+
"#<#{self.class} #{@result.size} results genome=#{@genome}>"
|
21
23
|
end
|
22
24
|
|
23
25
|
end
|
@@ -1,6 +1,21 @@
|
|
1
1
|
module Genome
|
2
2
|
|
3
|
-
|
3
|
+
class Pipeline
|
4
|
+
|
5
|
+
attr_reader :steps
|
6
|
+
|
7
|
+
def initialize *steps
|
8
|
+
@steps = steps
|
9
|
+
end
|
10
|
+
|
11
|
+
def run genome
|
12
|
+
@steps.each do |step|
|
13
|
+
genome = step.new(genome).transform
|
14
|
+
end
|
15
|
+
return genome
|
16
|
+
end
|
17
|
+
|
18
|
+
|
4
19
|
end
|
5
20
|
|
6
21
|
end
|
@@ -1,28 +1,22 @@
|
|
1
|
-
require '
|
1
|
+
require 'open3'
|
2
2
|
|
3
3
|
module Genome
|
4
4
|
class Pipeline
|
5
5
|
|
6
6
|
class ProdigalFilter < Filter
|
7
7
|
|
8
|
-
attr_reader :result
|
9
|
-
|
10
8
|
def transform
|
9
|
+
@result = run_prodigal
|
10
|
+
super
|
11
|
+
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
# run prodigal
|
15
|
-
# read GFF and add add features to `genome`
|
13
|
+
def run_prodigal
|
16
14
|
@genome.fasta do |path|
|
17
|
-
|
15
|
+
stdin, stdout, stderr = Open3.popen3("prodigal -f gff -i #{path}")
|
16
|
+
Features.from_gff(stdout.readlines)
|
18
17
|
end
|
19
|
-
|
20
|
-
@result = Features.from_gff(out_file)
|
21
|
-
|
22
|
-
out_file.close
|
23
|
-
|
24
|
-
super
|
25
18
|
end
|
19
|
+
|
26
20
|
end
|
27
21
|
|
28
22
|
end
|
data/lib/genome/pipeline.rb
CHANGED
@@ -3,10 +3,13 @@ require 'bundler'
|
|
3
3
|
Bundler.require
|
4
4
|
|
5
5
|
module Genome
|
6
|
-
|
6
|
+
|
7
|
+
require 'genome/genome.rb'
|
8
|
+
require 'genome/pipeline/pipeline.rb'
|
7
9
|
|
8
10
|
class Pipeline
|
9
11
|
autoload :Filter, 'genome/pipeline/filter.rb'
|
12
|
+
autoload :DummyFilter, 'genome/pipeline/dummy_filter.rb'
|
10
13
|
autoload :ProdigalFilter, 'genome/pipeline/prodigal_filter.rb'
|
11
14
|
end
|
12
15
|
|
data/readme.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Genome::Pipeline
|
2
2
|
|
3
|
-
|
3
|
+
Genome processing filters and utilities
|
4
4
|
|
5
5
|
Inspired by [HTML::Pipeline](https://github.com/jch/html-pipeline).
|
6
6
|
|
@@ -13,34 +13,76 @@ pipeline = Genome::Pipeline.new Filter::PRODIGAL,
|
|
13
13
|
annotated_genome = pipeline[genome]
|
14
14
|
```
|
15
15
|
|
16
|
+
# Installation
|
17
|
+
|
18
|
+
With RubyGems:
|
19
|
+
|
20
|
+
```bash
|
21
|
+
$ gem install -P HighSecurity genome-pipeline
|
22
|
+
```
|
23
|
+
|
24
|
+
With Bundler:
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
gem 'genome-pipeline', '~> 0.0.1'
|
28
|
+
```
|
29
|
+
|
16
30
|
## Filters
|
17
31
|
|
18
32
|
Pipelines are made up of filters. Here is a simple filter that predicts
|
19
33
|
amino-acid coding sequences using Prodigal
|
20
34
|
|
21
35
|
```ruby
|
22
|
-
require 'tempfile'
|
23
|
-
|
24
36
|
class ProdigalFilter < Filter
|
25
37
|
|
26
38
|
attr_reader :result
|
27
39
|
|
28
40
|
def transform
|
41
|
+
|
29
42
|
out_file = Tempfile.new 'prodigal'
|
30
43
|
|
31
44
|
# run prodigal
|
32
45
|
# read GFF and add add features to `genome`
|
33
|
-
@
|
34
|
-
|
35
|
-
|
46
|
+
@result =
|
47
|
+
@genome.fasta do |path|
|
48
|
+
stdin, stdout, stderr = Open3.popen3("prodigal -f gff -i #{path}")
|
49
|
+
Features.from_gff(stdout.readlines)
|
50
|
+
end
|
36
51
|
|
37
|
-
|
52
|
+
out_file.close
|
38
53
|
|
39
54
|
super
|
40
55
|
end
|
41
56
|
end
|
42
57
|
```
|
43
58
|
|
59
|
+
## List of Filters
|
60
|
+
|
61
|
+
### Gene Prediction
|
62
|
+
|
63
|
+
- Prodigal [XX]
|
64
|
+
|
65
|
+
### TODO
|
66
|
+
|
67
|
+
feature finding software:
|
68
|
+
|
69
|
+
(act on genome sequence)
|
70
|
+
|
71
|
+
- Augustus
|
72
|
+
- tRNAscan
|
73
|
+
- rnammer
|
74
|
+
- snap
|
75
|
+
- prokka
|
76
|
+
- barrnap
|
77
|
+
- ARAGORN
|
78
|
+
- repeatscout
|
79
|
+
- repeatmasker
|
80
|
+
- trf
|
81
|
+
|
82
|
+
Filters that act on features:
|
83
|
+
|
84
|
+
- Quality Filter (remove features below a confidence threshold)
|
85
|
+
|
44
86
|
## Genome Object
|
45
87
|
|
46
88
|
Genomes are currently read from FASTA files and stored as objects. My future
|