bio-express_beta_diversity 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 15dff506cc2fe193776349c0e11f50141ef337a8
4
- data.tar.gz: d9ca06baedd0dc7d991449d89c1925d57527f920
3
+ metadata.gz: e43852822d4a01026d598a484a4bc2612f21cfa8
4
+ data.tar.gz: a7e61d9d1ddb09cb8cdd38d3f5a2f2094178c157
5
5
  SHA512:
6
- metadata.gz: 29b6931c8c18978fe5581aae21429ab2b77eccefea585aaf7c3adff91ad415ddcf0d1b96464ff5b5851308b12bf73d24882c4e4a9e86c83986a2537eff7072a8
7
- data.tar.gz: 937c29d85243dfff76a3bb60e868ae80039f3d162321774996b77f7a5601f85ad13286bec0e7c4474e8ac535f46ce9a72de03d286375e5670f038e0228f8d895
6
+ metadata.gz: 5e4b1ad90efc62d4f3a2d1de7a90aac478f25ee44069341fc3c7d4ac4c64f3c6870bcbfd8627983972154a343384e676f7903d134233643e6e2aa6288568a8de
7
+ data.tar.gz: 904d8162046af7f527294b12912a39f8b300fa8ee084eab1655aea7d33fb1918b607c8489d94e118765e3877d3be354a23f7ddd3bf15982ecddb7a41fc9dba36
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![Build Status](https://secure.travis-ci.org/wwood/bioruby-express_beta_diversity.png)](http://travis-ci.org/wwood/bioruby-express_beta_diversity)
4
4
 
5
- Ruby interface to [express beta diversity](https://github.com/dparks1134/ExpressBetaDiversity) things. Currently, functionality is limited to parsing the output distance matrices.
5
+ Ruby interface to [express beta diversity](https://github.com/dparks1134/ExpressBetaDiversity) things. Currently, functionality is limited to parsing the output distance matrices, and input "OTU table" format.
6
6
 
7
7
  Note: this software is under active development!
8
8
 
@@ -14,6 +14,7 @@ gem install bio-express_beta_diversity
14
14
 
15
15
  ## Usage
16
16
 
17
+ Parsing the distance matrix:
17
18
  ```ruby
18
19
  require 'bio-express_beta_diversity'
19
20
 
@@ -24,9 +25,18 @@ dists.distance('sample2','sample1') #=> 0.251761
24
25
 
25
26
  ```
26
27
 
28
+ Parsing the input OTU table:
29
+ ```ruby
30
+ otus = Bio::EBD::Format.parse_from_file 'my.ebd'
31
+ otus.sample_counts.keys #=> ['sample1','sample2', ..]
32
+ otus.sample_counts['sample1'] #=> [1.0,4.0,0.0,..]
33
+ otus.otu_names #=> ['otu1','otu2','otu3',..]
34
+ ```
35
+ The `otu_names` correspond with the order of the `sample_counts.values`.
36
+
27
37
  The API doc is online. For more code examples see the test files in
28
38
  the source tree.
29
-
39
+
30
40
  ## Project home page
31
41
 
32
42
  Information on the source tree, documentation, examples, issues and
@@ -37,7 +47,7 @@ how to contribute, see
37
47
  ## Cite
38
48
 
39
49
  If you use this software, please cite one of
40
-
50
+
41
51
  * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
42
52
  * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
43
53
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.1.0
@@ -11,4 +11,5 @@ end
11
11
 
12
12
 
13
13
  require 'bio-express_beta_diversity/distance_matrix.rb'
14
+ require 'bio-express_beta_diversity/ebd_format.rb'
14
15
 
@@ -0,0 +1,48 @@
1
+ require 'csv'
2
+
3
+ # Express Beta Diversity input "OTU table" format parser.
4
+ class Bio::EBD::Format
5
+ attr_accessor :otu_names
6
+
7
+ # Hash of sample names to array of counts. The counts
8
+ # are floats that correspond to the otu_names.
9
+ attr_accessor :sample_counts
10
+
11
+ def initialize
12
+ @sample_counts = {}
13
+ @otu_names = []
14
+ end
15
+
16
+ def self.parse_from_file(filename)
17
+ ebd = Bio::EBD::Format.new
18
+
19
+ # 100535 1008038
20
+ # sample1 5.0 0
21
+ # sample2 0 1.0
22
+ first_line = true
23
+ CSV.foreach(filename, :col_sep => "\t") do |row|
24
+ if first_line
25
+ # First line is the IDs of the OTUs
26
+ raise "EBD format file appears to be incorrectly formatted on the first line: #{row.inspect}" if row.length < 2
27
+ ebd.otu_names = row[1...row.length]
28
+ first_line = false
29
+ else
30
+ next if row.empty? #Ignore empty lines
31
+
32
+ # all other lines are the sample names and then number of observations of the OTUs
33
+ raise "Parse exception at this row: #{row.inspect}" unless row.length == ebd.otu_names.length+1
34
+
35
+ sample_name = row[0]
36
+ raise "Duplicate sample name detected in EBD format: #{row[0]}" if ebd.sample_counts.key?(sample_name)
37
+
38
+ ebd.sample_counts[sample_name] = row[1...row.length].collect{|count| count.to_f}
39
+ end
40
+ end
41
+
42
+ return ebd
43
+ end
44
+
45
+ def number_of_samples
46
+ @sample_counts.length
47
+ end
48
+ end
@@ -1,3 +1,4 @@
1
+ require 'tempfile'
1
2
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
3
 
3
4
  describe "BioExpressBetaDiversity" do
@@ -19,3 +20,24 @@ describe "BioExpressBetaDiversity" do
19
20
  d.distance('4451','4446').should == 0.364525
20
21
  end
21
22
  end
23
+
24
+
25
+ describe 'ebd format parser' do
26
+ it 'should parse somethign simple' do
27
+
28
+ Tempfile.open('test_ebd') do |t|
29
+ t.print "\t"
30
+ t.puts %w(100535 1008038).join "\t"
31
+ t.puts %w(sample1 5.0 0.0).join "\t"
32
+ t.puts %w(sample2 0.0 1.0).join "\t"
33
+ t.close
34
+
35
+ ebd = Bio::EBD::Format.parse_from_file t.path
36
+ ebd.otu_names.should == %w(100535 1008038)
37
+ ebd.sample_counts.should == {
38
+ 'sample1' => [5.0,0.0],
39
+ 'sample2' => [0.0,1.0],
40
+ }
41
+ end
42
+ end
43
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-express_beta_diversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben J. Woodcroft
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-12 00:00:00.000000000 Z
11
+ date: 2014-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio-logger
@@ -112,6 +112,7 @@ files:
112
112
  - VERSION
113
113
  - lib/bio-express_beta_diversity.rb
114
114
  - lib/bio-express_beta_diversity/distance_matrix.rb
115
+ - lib/bio-express_beta_diversity/ebd_format.rb
115
116
  - spec/bio-express_beta_diversity_spec.rb
116
117
  - spec/data/eg.diss
117
118
  - spec/spec_helper.rb
@@ -135,7 +136,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
136
  version: '0'
136
137
  requirements: []
137
138
  rubyforge_project:
138
- rubygems_version: 2.2.0.rc.1
139
+ rubygems_version: 2.2.0
139
140
  signing_key:
140
141
  specification_version: 4
141
142
  summary: Interface for express beta diversity