bio-express_beta_diversity 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -3
- data/VERSION +1 -1
- data/lib/bio-express_beta_diversity.rb +1 -0
- data/lib/bio-express_beta_diversity/ebd_format.rb +48 -0
- data/spec/bio-express_beta_diversity_spec.rb +22 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e43852822d4a01026d598a484a4bc2612f21cfa8
|
4
|
+
data.tar.gz: a7e61d9d1ddb09cb8cdd38d3f5a2f2094178c157
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5e4b1ad90efc62d4f3a2d1de7a90aac478f25ee44069341fc3c7d4ac4c64f3c6870bcbfd8627983972154a343384e676f7903d134233643e6e2aa6288568a8de
|
7
|
+
data.tar.gz: 904d8162046af7f527294b12912a39f8b300fa8ee084eab1655aea7d33fb1918b607c8489d94e118765e3877d3be354a23f7ddd3bf15982ecddb7a41fc9dba36
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[](http://travis-ci.org/wwood/bioruby-express_beta_diversity)
|
4
4
|
|
5
|
-
Ruby interface to [express beta diversity](https://github.com/dparks1134/ExpressBetaDiversity) things. Currently, functionality is limited to parsing the output distance matrices.
|
5
|
+
Ruby interface to [express beta diversity](https://github.com/dparks1134/ExpressBetaDiversity) things. Currently, functionality is limited to parsing the output distance matrices, and input "OTU table" format.
|
6
6
|
|
7
7
|
Note: this software is under active development!
|
8
8
|
|
@@ -14,6 +14,7 @@ gem install bio-express_beta_diversity
|
|
14
14
|
|
15
15
|
## Usage
|
16
16
|
|
17
|
+
Parsing the distance matrix:
|
17
18
|
```ruby
|
18
19
|
require 'bio-express_beta_diversity'
|
19
20
|
|
@@ -24,9 +25,18 @@ dists.distance('sample2','sample1') #=> 0.251761
|
|
24
25
|
|
25
26
|
```
|
26
27
|
|
28
|
+
Parsing the input OTU table:
|
29
|
+
```ruby
|
30
|
+
otus = Bio::EBD::Format.parse_from_file 'my.ebd'
|
31
|
+
otus.sample_counts.keys #=> ['sample1','sample2', ..]
|
32
|
+
otus.sample_counts['sample1'] #=> [1.0,4.0,0.0,..]
|
33
|
+
otus.otu_names #=> ['otu1','otu2','otu3',..]
|
34
|
+
```
|
35
|
+
The `otu_names` correspond with the order of the `sample_counts.values`.
|
36
|
+
|
27
37
|
The API doc is online. For more code examples see the test files in
|
28
38
|
the source tree.
|
29
|
-
|
39
|
+
|
30
40
|
## Project home page
|
31
41
|
|
32
42
|
Information on the source tree, documentation, examples, issues and
|
@@ -37,7 +47,7 @@ how to contribute, see
|
|
37
47
|
## Cite
|
38
48
|
|
39
49
|
If you use this software, please cite one of
|
40
|
-
|
50
|
+
|
41
51
|
* [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
|
42
52
|
* [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
|
43
53
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
# Express Beta Diversity input "OTU table" format parser.
|
4
|
+
class Bio::EBD::Format
|
5
|
+
attr_accessor :otu_names
|
6
|
+
|
7
|
+
# Hash of sample names to array of counts. The counts
|
8
|
+
# are floats that correspond to the otu_names.
|
9
|
+
attr_accessor :sample_counts
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@sample_counts = {}
|
13
|
+
@otu_names = []
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parse_from_file(filename)
|
17
|
+
ebd = Bio::EBD::Format.new
|
18
|
+
|
19
|
+
# 100535 1008038
|
20
|
+
# sample1 5.0 0
|
21
|
+
# sample2 0 1.0
|
22
|
+
first_line = true
|
23
|
+
CSV.foreach(filename, :col_sep => "\t") do |row|
|
24
|
+
if first_line
|
25
|
+
# First line is the IDs of the OTUs
|
26
|
+
raise "EBD format file appears to be incorrectly formatted on the first line: #{row.inspect}" if row.length < 2
|
27
|
+
ebd.otu_names = row[1...row.length]
|
28
|
+
first_line = false
|
29
|
+
else
|
30
|
+
next if row.empty? #Ignore empty lines
|
31
|
+
|
32
|
+
# all other lines are the sample names and then number of observations of the OTUs
|
33
|
+
raise "Parse exception at this row: #{row.inspect}" unless row.length == ebd.otu_names.length+1
|
34
|
+
|
35
|
+
sample_name = row[0]
|
36
|
+
raise "Duplicate sample name detected in EBD format: #{row[0]}" if ebd.sample_counts.key?(sample_name)
|
37
|
+
|
38
|
+
ebd.sample_counts[sample_name] = row[1...row.length].collect{|count| count.to_f}
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
return ebd
|
43
|
+
end
|
44
|
+
|
45
|
+
def number_of_samples
|
46
|
+
@sample_counts.length
|
47
|
+
end
|
48
|
+
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'tempfile'
|
1
2
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
3
|
|
3
4
|
describe "BioExpressBetaDiversity" do
|
@@ -19,3 +20,24 @@ describe "BioExpressBetaDiversity" do
|
|
19
20
|
d.distance('4451','4446').should == 0.364525
|
20
21
|
end
|
21
22
|
end
|
23
|
+
|
24
|
+
|
25
|
+
describe 'ebd format parser' do
|
26
|
+
it 'should parse somethign simple' do
|
27
|
+
|
28
|
+
Tempfile.open('test_ebd') do |t|
|
29
|
+
t.print "\t"
|
30
|
+
t.puts %w(100535 1008038).join "\t"
|
31
|
+
t.puts %w(sample1 5.0 0.0).join "\t"
|
32
|
+
t.puts %w(sample2 0.0 1.0).join "\t"
|
33
|
+
t.close
|
34
|
+
|
35
|
+
ebd = Bio::EBD::Format.parse_from_file t.path
|
36
|
+
ebd.otu_names.should == %w(100535 1008038)
|
37
|
+
ebd.sample_counts.should == {
|
38
|
+
'sample1' => [5.0,0.0],
|
39
|
+
'sample2' => [0.0,1.0],
|
40
|
+
}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-express_beta_diversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben J. Woodcroft
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-01-
|
11
|
+
date: 2014-01-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio-logger
|
@@ -112,6 +112,7 @@ files:
|
|
112
112
|
- VERSION
|
113
113
|
- lib/bio-express_beta_diversity.rb
|
114
114
|
- lib/bio-express_beta_diversity/distance_matrix.rb
|
115
|
+
- lib/bio-express_beta_diversity/ebd_format.rb
|
115
116
|
- spec/bio-express_beta_diversity_spec.rb
|
116
117
|
- spec/data/eg.diss
|
117
118
|
- spec/spec_helper.rb
|
@@ -135,7 +136,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
136
|
version: '0'
|
136
137
|
requirements: []
|
137
138
|
rubyforge_project:
|
138
|
-
rubygems_version: 2.2.0
|
139
|
+
rubygems_version: 2.2.0
|
139
140
|
signing_key:
|
140
141
|
specification_version: 4
|
141
142
|
summary: Interface for express beta diversity
|