bio-express_beta_diversity 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +13 -3
- data/VERSION +1 -1
- data/lib/bio-express_beta_diversity.rb +1 -0
- data/lib/bio-express_beta_diversity/ebd_format.rb +48 -0
- data/spec/bio-express_beta_diversity_spec.rb +22 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e43852822d4a01026d598a484a4bc2612f21cfa8
|
4
|
+
data.tar.gz: a7e61d9d1ddb09cb8cdd38d3f5a2f2094178c157
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5e4b1ad90efc62d4f3a2d1de7a90aac478f25ee44069341fc3c7d4ac4c64f3c6870bcbfd8627983972154a343384e676f7903d134233643e6e2aa6288568a8de
|
7
|
+
data.tar.gz: 904d8162046af7f527294b12912a39f8b300fa8ee084eab1655aea7d33fb1918b607c8489d94e118765e3877d3be354a23f7ddd3bf15982ecddb7a41fc9dba36
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://secure.travis-ci.org/wwood/bioruby-express_beta_diversity.png)](http://travis-ci.org/wwood/bioruby-express_beta_diversity)
|
4
4
|
|
5
|
-
Ruby interface to [express beta diversity](https://github.com/dparks1134/ExpressBetaDiversity) things. Currently, functionality is limited to parsing the output distance matrices.
|
5
|
+
Ruby interface to [express beta diversity](https://github.com/dparks1134/ExpressBetaDiversity) things. Currently, functionality is limited to parsing the output distance matrices, and input "OTU table" format.
|
6
6
|
|
7
7
|
Note: this software is under active development!
|
8
8
|
|
@@ -14,6 +14,7 @@ gem install bio-express_beta_diversity
|
|
14
14
|
|
15
15
|
## Usage
|
16
16
|
|
17
|
+
Parsing the distance matrix:
|
17
18
|
```ruby
|
18
19
|
require 'bio-express_beta_diversity'
|
19
20
|
|
@@ -24,9 +25,18 @@ dists.distance('sample2','sample1') #=> 0.251761
|
|
24
25
|
|
25
26
|
```
|
26
27
|
|
28
|
+
Parsing the input OTU table:
|
29
|
+
```ruby
|
30
|
+
otus = Bio::EBD::Format.parse_from_file 'my.ebd'
|
31
|
+
otus.sample_counts.keys #=> ['sample1','sample2', ..]
|
32
|
+
otus.sample_counts['sample1'] #=> [1.0,4.0,0.0,..]
|
33
|
+
otus.otu_names #=> ['otu1','otu2','otu3',..]
|
34
|
+
```
|
35
|
+
The `otu_names` correspond with the order of the `sample_counts.values`.
|
36
|
+
|
27
37
|
The API doc is online. For more code examples see the test files in
|
28
38
|
the source tree.
|
29
|
-
|
39
|
+
|
30
40
|
## Project home page
|
31
41
|
|
32
42
|
Information on the source tree, documentation, examples, issues and
|
@@ -37,7 +47,7 @@ how to contribute, see
|
|
37
47
|
## Cite
|
38
48
|
|
39
49
|
If you use this software, please cite one of
|
40
|
-
|
50
|
+
|
41
51
|
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
42
52
|
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
43
53
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
# Express Beta Diversity input "OTU table" format parser.
|
4
|
+
class Bio::EBD::Format
|
5
|
+
attr_accessor :otu_names
|
6
|
+
|
7
|
+
# Hash of sample names to array of counts. The counts
|
8
|
+
# are floats that correspond to the otu_names.
|
9
|
+
attr_accessor :sample_counts
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@sample_counts = {}
|
13
|
+
@otu_names = []
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parse_from_file(filename)
|
17
|
+
ebd = Bio::EBD::Format.new
|
18
|
+
|
19
|
+
# 100535 1008038
|
20
|
+
# sample1 5.0 0
|
21
|
+
# sample2 0 1.0
|
22
|
+
first_line = true
|
23
|
+
CSV.foreach(filename, :col_sep => "\t") do |row|
|
24
|
+
if first_line
|
25
|
+
# First line is the IDs of the OTUs
|
26
|
+
raise "EBD format file appears to be incorrectly formatted on the first line: #{row.inspect}" if row.length < 2
|
27
|
+
ebd.otu_names = row[1...row.length]
|
28
|
+
first_line = false
|
29
|
+
else
|
30
|
+
next if row.empty? #Ignore empty lines
|
31
|
+
|
32
|
+
# all other lines are the sample names and then number of observations of the OTUs
|
33
|
+
raise "Parse exception at this row: #{row.inspect}" unless row.length == ebd.otu_names.length+1
|
34
|
+
|
35
|
+
sample_name = row[0]
|
36
|
+
raise "Duplicate sample name detected in EBD format: #{row[0]}" if ebd.sample_counts.key?(sample_name)
|
37
|
+
|
38
|
+
ebd.sample_counts[sample_name] = row[1...row.length].collect{|count| count.to_f}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
return ebd
|
43
|
+
end
|
44
|
+
|
45
|
+
def number_of_samples
|
46
|
+
@sample_counts.length
|
47
|
+
end
|
48
|
+
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'tempfile'
|
1
2
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
3
|
|
3
4
|
describe "BioExpressBetaDiversity" do
|
@@ -19,3 +20,24 @@ describe "BioExpressBetaDiversity" do
|
|
19
20
|
d.distance('4451','4446').should == 0.364525
|
20
21
|
end
|
21
22
|
end
|
23
|
+
|
24
|
+
|
25
|
+
describe 'ebd format parser' do
|
26
|
+
it 'should parse somethign simple' do
|
27
|
+
|
28
|
+
Tempfile.open('test_ebd') do |t|
|
29
|
+
t.print "\t"
|
30
|
+
t.puts %w(100535 1008038).join "\t"
|
31
|
+
t.puts %w(sample1 5.0 0.0).join "\t"
|
32
|
+
t.puts %w(sample2 0.0 1.0).join "\t"
|
33
|
+
t.close
|
34
|
+
|
35
|
+
ebd = Bio::EBD::Format.parse_from_file t.path
|
36
|
+
ebd.otu_names.should == %w(100535 1008038)
|
37
|
+
ebd.sample_counts.should == {
|
38
|
+
'sample1' => [5.0,0.0],
|
39
|
+
'sample2' => [0.0,1.0],
|
40
|
+
}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-express_beta_diversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben J. Woodcroft
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-01-
|
11
|
+
date: 2014-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio-logger
|
@@ -112,6 +112,7 @@ files:
|
|
112
112
|
- VERSION
|
113
113
|
- lib/bio-express_beta_diversity.rb
|
114
114
|
- lib/bio-express_beta_diversity/distance_matrix.rb
|
115
|
+
- lib/bio-express_beta_diversity/ebd_format.rb
|
115
116
|
- spec/bio-express_beta_diversity_spec.rb
|
116
117
|
- spec/data/eg.diss
|
117
118
|
- spec/spec_helper.rb
|
@@ -135,7 +136,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
136
|
version: '0'
|
136
137
|
requirements: []
|
137
138
|
rubyforge_project:
|
138
|
-
rubygems_version: 2.2.0
|
139
|
+
rubygems_version: 2.2.0
|
139
140
|
signing_key:
|
140
141
|
specification_version: 4
|
141
142
|
summary: Interface for express beta diversity
|