bio-express_beta_diversity 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 15dff506cc2fe193776349c0e11f50141ef337a8
4
- data.tar.gz: d9ca06baedd0dc7d991449d89c1925d57527f920
3
+ metadata.gz: e43852822d4a01026d598a484a4bc2612f21cfa8
4
+ data.tar.gz: a7e61d9d1ddb09cb8cdd38d3f5a2f2094178c157
5
5
  SHA512:
6
- metadata.gz: 29b6931c8c18978fe5581aae21429ab2b77eccefea585aaf7c3adff91ad415ddcf0d1b96464ff5b5851308b12bf73d24882c4e4a9e86c83986a2537eff7072a8
7
- data.tar.gz: 937c29d85243dfff76a3bb60e868ae80039f3d162321774996b77f7a5601f85ad13286bec0e7c4474e8ac535f46ce9a72de03d286375e5670f038e0228f8d895
6
+ metadata.gz: 5e4b1ad90efc62d4f3a2d1de7a90aac478f25ee44069341fc3c7d4ac4c64f3c6870bcbfd8627983972154a343384e676f7903d134233643e6e2aa6288568a8de
7
+ data.tar.gz: 904d8162046af7f527294b12912a39f8b300fa8ee084eab1655aea7d33fb1918b607c8489d94e118765e3877d3be354a23f7ddd3bf15982ecddb7a41fc9dba36
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![Build Status](https://secure.travis-ci.org/wwood/bioruby-express_beta_diversity.png)](http://travis-ci.org/wwood/bioruby-express_beta_diversity)
4
4
 
5
- Ruby interface to [express beta diversity](https://github.com/dparks1134/ExpressBetaDiversity) things. Currently, functionality is limited to parsing the output distance matrices.
5
+ Ruby interface to [express beta diversity](https://github.com/dparks1134/ExpressBetaDiversity) things. Currently, functionality is limited to parsing the output distance matrices, and input "OTU table" format.
6
6
 
7
7
  Note: this software is under active development!
8
8
 
@@ -14,6 +14,7 @@ gem install bio-express_beta_diversity
14
14
 
15
15
  ## Usage
16
16
 
17
+ Parsing the distance matrix:
17
18
  ```ruby
18
19
  require 'bio-express_beta_diversity'
19
20
 
@@ -24,9 +25,18 @@ dists.distance('sample2','sample1') #=> 0.251761
24
25
 
25
26
  ```
26
27
 
28
+ Parsing the input OTU table:
29
+ ```ruby
30
+ otus = Bio::EBD::Format.parse_from_file 'my.ebd'
31
+ otus.sample_counts.keys #=> ['sample1','sample2', ..]
32
+ otus.sample_counts['sample1'] #=> [1.0,4.0,0.0,..]
33
+ otus.otu_names #=> ['otu1','otu2','otu3',..]
34
+ ```
35
+ The `otu_names` correspond with the order of the `sample_counts.values`.
36
+
27
37
  The API doc is online. For more code examples see the test files in
28
38
  the source tree.
29
-
39
+
30
40
  ## Project home page
31
41
 
32
42
  Information on the source tree, documentation, examples, issues and
@@ -37,7 +47,7 @@ how to contribute, see
37
47
  ## Cite
38
48
 
39
49
  If you use this software, please cite one of
40
-
50
+
41
51
  * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
42
52
  * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
43
53
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.1.0
@@ -11,4 +11,5 @@ end
11
11
 
12
12
 
13
13
  require 'bio-express_beta_diversity/distance_matrix.rb'
14
+ require 'bio-express_beta_diversity/ebd_format.rb'
14
15
 
@@ -0,0 +1,48 @@
1
+ require 'csv'
2
+
3
+ # Express Beta Diversity input "OTU table" format parser.
4
+ class Bio::EBD::Format
5
+ attr_accessor :otu_names
6
+
7
+ # Hash of sample names to array of counts. The counts
8
+ # are floats that correspond to the otu_names.
9
+ attr_accessor :sample_counts
10
+
11
+ def initialize
12
+ @sample_counts = {}
13
+ @otu_names = []
14
+ end
15
+
16
+ def self.parse_from_file(filename)
17
+ ebd = Bio::EBD::Format.new
18
+
19
+ # 100535 1008038
20
+ # sample1 5.0 0
21
+ # sample2 0 1.0
22
+ first_line = true
23
+ CSV.foreach(filename, :col_sep => "\t") do |row|
24
+ if first_line
25
+ # First line is the IDs of the OTUs
26
+ raise "EBD format file appears to be incorrectly formatted on the first line: #{row.inspect}" if row.length < 2
27
+ ebd.otu_names = row[1...row.length]
28
+ first_line = false
29
+ else
30
+ next if row.empty? #Ignore empty lines
31
+
32
+ # all other lines are the sample names and then number of observations of the OTUs
33
+ raise "Parse exception at this row: #{row.inspect}" unless row.length == ebd.otu_names.length+1
34
+
35
+ sample_name = row[0]
36
+ raise "Duplicate sample name detected in EBD format: #{row[0]}" if ebd.sample_counts.key?(sample_name)
37
+
38
+ ebd.sample_counts[sample_name] = row[1...row.length].collect{|count| count.to_f}
39
+ end
40
+ end
41
+
42
+ return ebd
43
+ end
44
+
45
+ def number_of_samples
46
+ @sample_counts.length
47
+ end
48
+ end
@@ -1,3 +1,4 @@
1
+ require 'tempfile'
1
2
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
3
 
3
4
  describe "BioExpressBetaDiversity" do
@@ -19,3 +20,24 @@ describe "BioExpressBetaDiversity" do
19
20
  d.distance('4451','4446').should == 0.364525
20
21
  end
21
22
  end
23
+
24
+
25
+ describe 'ebd format parser' do
26
+ it 'should parse somethign simple' do
27
+
28
+ Tempfile.open('test_ebd') do |t|
29
+ t.print "\t"
30
+ t.puts %w(100535 1008038).join "\t"
31
+ t.puts %w(sample1 5.0 0.0).join "\t"
32
+ t.puts %w(sample2 0.0 1.0).join "\t"
33
+ t.close
34
+
35
+ ebd = Bio::EBD::Format.parse_from_file t.path
36
+ ebd.otu_names.should == %w(100535 1008038)
37
+ ebd.sample_counts.should == {
38
+ 'sample1' => [5.0,0.0],
39
+ 'sample2' => [0.0,1.0],
40
+ }
41
+ end
42
+ end
43
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-express_beta_diversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben J. Woodcroft
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-12 00:00:00.000000000 Z
11
+ date: 2014-01-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio-logger
@@ -112,6 +112,7 @@ files:
112
112
  - VERSION
113
113
  - lib/bio-express_beta_diversity.rb
114
114
  - lib/bio-express_beta_diversity/distance_matrix.rb
115
+ - lib/bio-express_beta_diversity/ebd_format.rb
115
116
  - spec/bio-express_beta_diversity_spec.rb
116
117
  - spec/data/eg.diss
117
118
  - spec/spec_helper.rb
@@ -135,7 +136,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
135
136
  version: '0'
136
137
  requirements: []
137
138
  rubyforge_project:
138
- rubygems_version: 2.2.0.rc.1
139
+ rubygems_version: 2.2.0
139
140
  signing_key:
140
141
  specification_version: 4
141
142
  summary: Interface for express beta diversity