genome-pipeline 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ pkg/
data/Gemfile CHANGED
@@ -6,6 +6,7 @@ gem 'pry'
6
6
  group :development do
7
7
  gem 'rake'
8
8
  gem 'bundler'
9
+ gem 'yard'
9
10
  end
10
11
 
11
12
  group :test do
data/Gemfile.lock CHANGED
@@ -23,6 +23,7 @@ GEM
23
23
  rspec-support (~> 3.1.0)
24
24
  rspec-support (3.1.0)
25
25
  slop (3.5.0)
26
+ yard (0.8.7.4)
26
27
 
27
28
  PLATFORMS
28
29
  ruby
@@ -33,3 +34,4 @@ DEPENDENCIES
33
34
  pry
34
35
  rake
35
36
  rspec
37
+ yard
data/Rakefile CHANGED
@@ -1,2 +1,14 @@
1
1
  require 'bundler/gem_tasks'
2
2
  require 'rake/testtask'
3
+
4
+ Bundler.require :development
5
+
6
+ namespace :docs do
7
+ YARD::Rake::YardocTask.new do |t|
8
+ end
9
+
10
+ desc 'serve documentation with live reloading'
11
+ task :serve do
12
+ `bundle exec yard server --reload`
13
+ end
14
+ end
data/certs/audy.cert ADDED
@@ -0,0 +1,19 @@
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIDIDCCAgigAwIBAgIBADANBgkqhkiG9w0BAQUFADA2MQowCAYDVQQDDAFfMRQw
3
+ EgYKCZImiZPyLGQBGRYEYWdkcjESMBAGCgmSJomT8ixkARkWAmNvMB4XDTE0MDcw
4
+ OTE5MDc1MVoXDTE1MDcwOTE5MDc1MVowNjEKMAgGA1UEAwwBXzEUMBIGCgmSJomT
5
+ 8ixkARkWBGFnZHIxEjAQBgoJkiaJk/IsZAEZFgJjbzCCASIwDQYJKoZIhvcNAQEB
6
+ BQADggEPADCCAQoCggEBANcJT8ftI2ex4cCJGBTexorBoQTyVE5Zamdi51/zPNgU
7
+ 1ew59izYGv8/JjMpUyTFdFbz8l7rlyeEssoz5Yf6gckxl+nlmiBxvILKu8W2ZjyF
8
+ a3ymCbyehr/1i+GPozZFyVKdIEIHLagAWej4Md2FsQDKwmaA/+5sSqu1b80R675e
9
+ Ae9o5G76GniNptiAC/QF/zYwXGcMJRJatYx1qIdK4rdZahlBaozTrI5Dl49yLOb/
10
+ fZKnRa6IZCL+DuXUViNTipPuOLvnUSPTzfnnw3dq+ybLpD2YppMFHR65gioidbeV
11
+ 3rKgZau6mfzS7bne/m/SIjYTpPlYhHTExiJXhEJNzRUCAwEAAaM5MDcwCQYDVR0T
12
+ BAIwADAdBgNVHQ4EFgQUMP97xnvyjtlzDTrfwsWNNbFTIC0wCwYDVR0PBAQDAgSw
13
+ MA0GCSqGSIb3DQEBBQUAA4IBAQCuU9TA7hSQD7dytWLYfQ8S4uHreSdca4PRksxH
14
+ 36iy8SpmynKIh83UxZH9Nr8xX3kVnHX9sFYqUeyQYrLsog+etwwD51C+taMRUzYU
15
+ y2ICMXGl9U9u/lecKj/kCOJE8bDhbVD9adm+ZKVqAoq0DGlJI4xarIxwzDCVZr3v
16
+ 43LDK6ouZt7pt5TZ3wZbBsSYXcC4NVQHrxb+6YakULkyUFl6Ld2p6ID97hTOAHd2
17
+ b9RS3+P0pRzQyo2osQTnOup3ZAlfqtG90F/m5mSIvWeJQBwcZld9X2CvJ/DgSfvX
18
+ hArlAbz8jIiZ5FhHecQVQ6Q0sDljNdFh5N6SQp9afYfJ0Dlb
19
+ -----END CERTIFICATE-----
@@ -15,6 +15,8 @@ Gem::Specification.new do |gem|
15
15
  gem.test_files = gem.files.grep(%r{^test})
16
16
  gem.require_paths = ["lib"]
17
17
 
18
+ gem.cert_chain = ['certs/audy.cert']
19
+ gem.signing_key = File.expand_path('~/.ssh/gem-private_key.pem') if $0 =~ /gem\z/
18
20
 
19
21
  gem.post_install_message = <<msg
20
22
  -------------------------------------------------
@@ -0,0 +1,16 @@
1
+ module Genome
2
+ class Pipeline
3
+
4
+ # dummy filter does nothing
5
+ # used for testing
6
+
7
+ class DummyFilter < Filter
8
+
9
+ def transform
10
+ super
11
+ end
12
+
13
+ end
14
+
15
+ end
16
+ end
@@ -9,15 +9,17 @@ module Genome
9
9
  attr_reader :result
10
10
 
11
11
  def initialize genome
12
- @genome = genome
12
+ # store a deep copy of genome.
13
+ @genome = Marshal.load(Marshal.dump(genome))
13
14
  end
14
15
 
15
16
  def transform
17
+ @genome.features << @result
16
18
  @genome
17
19
  end
18
20
 
19
21
  def inspect
20
- "#<#{self.class} #{@result} results genome=#{@genome}>"
22
+ "#<#{self.class} #{@result.size} results genome=#{@genome}>"
21
23
  end
22
24
 
23
25
  end
@@ -1,6 +1,21 @@
1
1
  module Genome
2
2
 
3
- module Pipeline
3
+ class Pipeline
4
+
5
+ attr_reader :steps
6
+
7
+ def initialize *steps
8
+ @steps = steps
9
+ end
10
+
11
+ def run genome
12
+ @steps.each do |step|
13
+ genome = step.new(genome).transform
14
+ end
15
+ return genome
16
+ end
17
+
18
+
4
19
  end
5
20
 
6
21
  end
@@ -1,28 +1,22 @@
1
- require 'tempfile'
1
+ require 'open3'
2
2
 
3
3
  module Genome
4
4
  class Pipeline
5
5
 
6
6
  class ProdigalFilter < Filter
7
7
 
8
- attr_reader :result
9
-
10
8
  def transform
9
+ @result = run_prodigal
10
+ super
11
+ end
11
12
 
12
- out_file = Tempfile.new 'prodigal'
13
-
14
- # run prodigal
15
- # read GFF and add add features to `genome`
13
+ def run_prodigal
16
14
  @genome.fasta do |path|
17
- `prodigal -f gff -i #{path} > #{out_file.path}`
15
+ stdin, stdout, stderr = Open3.popen3("prodigal -f gff -i #{path}")
16
+ Features.from_gff(stdout.readlines)
18
17
  end
19
-
20
- @result = Features.from_gff(out_file)
21
-
22
- out_file.close
23
-
24
- super
25
18
  end
19
+
26
20
  end
27
21
 
28
22
  end
@@ -1,5 +1,5 @@
1
1
  module Genome
2
2
  class Pipeline
3
- VERSION = '0.0.1'
3
+ VERSION = '0.0.2'
4
4
  end
5
5
  end
@@ -3,10 +3,13 @@ require 'bundler'
3
3
  Bundler.require
4
4
 
5
5
  module Genome
6
- autoload :Genome, 'genome/genome.rb'
6
+
7
+ require 'genome/genome.rb'
8
+ require 'genome/pipeline/pipeline.rb'
7
9
 
8
10
  class Pipeline
9
11
  autoload :Filter, 'genome/pipeline/filter.rb'
12
+ autoload :DummyFilter, 'genome/pipeline/dummy_filter.rb'
10
13
  autoload :ProdigalFilter, 'genome/pipeline/prodigal_filter.rb'
11
14
  end
12
15
 
data/readme.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Genome::Pipeline
2
2
 
3
- Annotate or do other things to Bacterial genomes.
3
+ Genome processing filters and utilities
4
4
 
5
5
  Inspired by [HTML::Pipeline](https://github.com/jch/html-pipeline).
6
6
 
@@ -13,34 +13,76 @@ pipeline = Genome::Pipeline.new Filter::PRODIGAL,
13
13
  annotated_genome = pipeline[genome]
14
14
  ```
15
15
 
16
+ # Installation
17
+
18
+ With RubyGems:
19
+
20
+ ```bash
21
+ $ gem install -P HighSecurity genome-pipeline
22
+ ```
23
+
24
+ With Bundler:
25
+
26
+ ```ruby
27
+ gem 'genome-pipeline', '~> 0.0.1'
28
+ ```
29
+
16
30
  ## Filters
17
31
 
18
32
  Pipelines are made up of filters. Here is a simple filter that predicts
19
33
  amino-acid coding sequences using Prodigal
20
34
 
21
35
  ```ruby
22
- require 'tempfile'
23
-
24
36
  class ProdigalFilter < Filter
25
37
 
26
38
  attr_reader :result
27
39
 
28
40
  def transform
41
+
29
42
  out_file = Tempfile.new 'prodigal'
30
43
 
31
44
  # run prodigal
32
45
  # read GFF and add add features to `genome`
33
- @genome.fasta do |path|
34
- `prodigal -f gff -i #{path} > #{out_file.path}`
35
- end
46
+ @result =
47
+ @genome.fasta do |path|
48
+ stdin, stdout, stderr = Open3.popen3("prodigal -f gff -i #{path}")
49
+ Features.from_gff(stdout.readlines)
50
+ end
36
51
 
37
- @result = Features.from_gff(out_file)
52
+ out_file.close
38
53
 
39
54
  super
40
55
  end
41
56
  end
42
57
  ```
43
58
 
59
+ ## List of Filters
60
+
61
+ ### Gene Prediction
62
+
63
+ - Prodigal [XX]
64
+
65
+ ### TODO
66
+
67
+ feature finding software:
68
+
69
+ (act on genome sequence)
70
+
71
+ - Augustus
72
+ - tRNAscan
73
+ - rnammer
74
+ - snap
75
+ - prokka
76
+ - barrnap
77
+ - ARAGORN
78
+ - repeatscout
79
+ - repeatmasker
80
+ - trf
81
+
82
+ Filters that act on features:
83
+
84
+ - Quality Filter (remove features below a confidence threshold)
85
+
44
86
  ## Genome Object
45
87
 
46
88
  Genomes are currently read from FASTA files and stored as objects. My future