bio-cd-hit-report 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/Gemfile +3 -7
- data/README.md +14 -15
- data/VERSION +1 -1
- data/bin/bio-cd-hit-report +43 -56
- data/lib/bio-cd-hit-report.rb +1 -1
- data/lib/bio-cd-hit-report/cd-hit-report.rb +20 -35
- data/lib/bio-cd-hit-report/cluster.rb +18 -9
- data/lib/bio-cd-hit-report/parser.rb +17 -0
- data/test/data/test.clstr +27 -0
- metadata +24 -38
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MTZhMmJjOTZlZTAzYzg3ZGUwZDdiMGNmNmI2NjRkNzAzYmE0NjM2Mw==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ZDYyZmIyMjM0NzdlY2UzZjI2ZGQwYmQxOTQwOGNlYjQ3Mzk1Zjc5OA==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MzFjN2Q5YjA1Mzk4MGIxNGM1MDFjMWYwNjVmNGEyNmEzYmM1NjIxODFlZjJi
|
10
|
+
NjVmMjRmYTkwYzgwZTcyZDE0MzY5NDljZGY5NjliYTQ3ZWJlNWIyOGRlYTZi
|
11
|
+
ZGI0NjJjNjc5ZDI4NWNiN2MzYzUwNjM2Zjk2MzcwZjYwMWQ3NDY=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
NWVmYjRkMmE1YTczOGY5MGFmNTFiMDdiMjlmMTdhYTEyNmI5Njg1OTQ5NmQ3
|
14
|
+
OTgzNTAyOGY4M2Q0ZjM0Y2JhN2FlMDRhNmQ3MGIzMDZlN2Q3Y2JmMTRiOTAz
|
15
|
+
MjYxOGQyNDY1NTgxOThkMjI5Zjk5MTUwMGNhOTA5YTBlZDM5OGQ=
|
data/Gemfile
CHANGED
@@ -1,15 +1,11 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
|
-
# Add dependencies required to use your gem here.
|
3
|
-
# Example:
|
4
|
-
# gem "activesupport", ">= 2.3.5"
|
5
2
|
|
6
|
-
|
7
|
-
|
3
|
+
gem "bio", "1.4.3"
|
4
|
+
|
8
5
|
group :development do
|
9
6
|
gem "minitest"
|
10
7
|
gem "rdoc"
|
11
|
-
gem "bundler"
|
8
|
+
gem "bundler", ">=1.3.1"
|
12
9
|
gem "jeweler"
|
13
|
-
gem "bio", "1.4.2"
|
14
10
|
gem "rdoc"
|
15
11
|
end
|
data/README.md
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
-
# bio-cd-hit-report
|
1
|
+
[[#]] bio-cd-hit-report
|
2
2
|
|
3
3
|
[](http://travis-ci.org/georgeG/bioruby-cd-hit-report)
|
4
4
|
|
5
|
-
|
5
|
+
Clustering sequences with CD-HIT produces a cluster file(.clstr)
|
6
|
+
containing sequence names and their respective clusters. This plugin
|
7
|
+
provides methods for parsing this file.
|
6
8
|
|
7
|
-
Note: this
|
9
|
+
Note: this plugin is under active development!
|
8
10
|
|
9
11
|
## Installation
|
10
12
|
|
@@ -15,17 +17,11 @@ Note: this software is under active development!
|
|
15
17
|
## Usage
|
16
18
|
|
17
19
|
```ruby
|
18
|
-
|
20
|
+
require 'bio-cd-hit-report'
|
19
21
|
|
20
|
-
|
22
|
+
cluster_file = "cluster95.clstr"
|
21
23
|
report = Bio::CdHitReport.new(cluster_file)
|
22
24
|
|
23
|
-
#print the max number of sequences in a cluster for the entire dataset
|
24
|
-
puts report.max_members
|
25
|
-
|
26
|
-
#print the minimum number of sequences in a cluster for the entire dataset
|
27
|
-
puts report.min_members
|
28
|
-
|
29
25
|
#print total number of clusters in the report
|
30
26
|
puts report.total_clusters
|
31
27
|
|
@@ -37,10 +33,13 @@ Note: this software is under active development!
|
|
37
33
|
puts "#{c.name} - #{c.members}" #print cluster name/id with respective sequences in the cluster
|
38
34
|
puts c.size #print the total number of entries in the cluster
|
39
35
|
end
|
40
|
-
|
36
|
+
|
37
|
+
#print the representative sequence for each cluster
|
38
|
+
report.each_cluster do |c|
|
39
|
+
puts c.rep_seq
|
40
|
+
end
|
41
41
|
|
42
|
-
|
43
|
-
the source tree.
|
42
|
+
```
|
44
43
|
|
45
44
|
## Project home page
|
46
45
|
|
@@ -64,4 +63,4 @@ This Biogem is published at [#bio-cd-hit-report](http://biogems.info/index.html)
|
|
64
63
|
|
65
64
|
## Copyright
|
66
65
|
|
67
|
-
Copyright (c)
|
66
|
+
Copyright (c) 2013 George Githinji. See LICENSE.txt for further details.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0
|
1
|
+
0.1.0
|
data/bin/bio-cd-hit-report
CHANGED
@@ -1,74 +1,61 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
2
|
+
|
3
3
|
# BioRuby bio-cd-hit-report Plugin BioCdHitReport
|
4
|
-
# Author::
|
4
|
+
# Author:: george githinji
|
5
5
|
# Copyright:: 2012
|
6
6
|
|
7
|
-
|
7
|
+
require_relative '../lib/bio-cd-hit-report'
|
8
|
+
require 'ostruct'
|
9
|
+
require 'optparse'
|
8
10
|
|
9
|
-
|
10
|
-
print USAGE
|
11
|
-
end
|
11
|
+
options = OpenStruct.new
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
OptionParser.new do |opts|
|
14
|
+
opts.banner = 'USAGE: bio-cd-hit-report -i file.clstr [options] '
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
opts.on('-h', 'Display this screen') do
|
17
|
+
puts opts
|
18
|
+
exit
|
19
|
+
end
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
opts.on('-i','--infile CLUSTERFILE', 'cluster file') do |infile|
|
22
|
+
options.infile = infile
|
23
|
+
end
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
options[:example_parameter] = 'this is a parameter'
|
25
|
+
opts.on('-o','--outfile OUTPUTFILE', 'output file') do |outfile|
|
26
|
+
options.outfile = outfile
|
28
27
|
end
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
# TODO: your logic here, below an example
|
33
|
-
self[:example_switch] = true
|
28
|
+
|
29
|
+
opts.on('-m','--members') do
|
30
|
+
options.members = true
|
34
31
|
end
|
35
32
|
|
36
|
-
|
37
|
-
|
38
|
-
# o.on("--logger filename",String,"Log to file (default stderr)") do | name |
|
39
|
-
# Bio::Log::CLI.logger(name)
|
40
|
-
# end
|
41
|
-
#
|
42
|
-
# o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
|
43
|
-
# Bio::Log::CLI.trace(s)
|
44
|
-
# end
|
45
|
-
#
|
46
|
-
# o.on("-q", "--quiet", "Run quietly") do |q|
|
47
|
-
# Bio::Log::CLI.trace('error')
|
48
|
-
# end
|
49
|
-
#
|
50
|
-
# o.on("-v", "--verbose", "Run verbosely") do |v|
|
51
|
-
# Bio::Log::CLI.trace('info')
|
52
|
-
# end
|
53
|
-
#
|
54
|
-
# o.on("--debug", "Show debug messages") do |v|
|
55
|
-
# Bio::Log::CLI.trace('debug')
|
56
|
-
# end
|
57
|
-
|
58
|
-
o.separator ""
|
59
|
-
o.on_tail('-h', '--help', 'display this help and exit') do
|
60
|
-
options[:show_help] = true
|
33
|
+
opts.on('-c','--clusterid CLUSTERID',Integer,'cluster id') do |clusterid|
|
34
|
+
options.cluster_id = clusterid
|
61
35
|
end
|
36
|
+
|
37
|
+
end.parse!
|
38
|
+
|
39
|
+
clusterfile = options.infile
|
40
|
+
outfile = options.outfile
|
41
|
+
cluster_id = options.cluster_id
|
42
|
+
|
43
|
+
report = Bio::CdHitReport.new(clusterfile)
|
44
|
+
|
45
|
+
def print_members(report)
|
46
|
+
report.clusters.map{|c| "#{c.cluster_id}:#{c.members}"}
|
62
47
|
end
|
63
48
|
|
64
|
-
|
65
|
-
|
49
|
+
def find_members_for(report,cluster_id)
|
50
|
+
report.get_members(cluster_id)
|
51
|
+
end
|
66
52
|
|
67
|
-
# Uncomment the following when using the bio-logger
|
68
|
-
# Bio::Log::CLI.configure('bio-cd-hit-report')
|
69
53
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
54
|
+
begin
|
55
|
+
unless cluster_id.nil?
|
56
|
+
$stdout.puts find_members_for(report,cluster_id)
|
57
|
+
else
|
58
|
+
$stdout.puts print_members(report) if options.members
|
59
|
+
end
|
74
60
|
end
|
61
|
+
|
data/lib/bio-cd-hit-report.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
require_relative
|
1
|
+
require_relative 'bio-cd-hit-report/cd-hit-report'
|
@@ -1,49 +1,34 @@
|
|
1
1
|
module Bio
|
2
|
-
|
3
|
-
require_relative '
|
2
|
+
require_relative 'cluster'
|
3
|
+
require_relative 'parser'
|
4
4
|
|
5
5
|
class CdHitReport
|
6
|
+
include Enumerable
|
6
7
|
|
7
8
|
def initialize(file)
|
8
|
-
@
|
9
|
-
|
10
|
-
|
11
|
-
def each_cluster(&block)
|
12
|
-
cluster_objs.each(&block)
|
9
|
+
@report = CdHitParser.new
|
10
|
+
@report.report_file = file
|
13
11
|
end
|
14
12
|
|
15
|
-
def
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
def max_members
|
24
|
-
cluster_objs.map{|c|c.size}.max
|
13
|
+
def clusters
|
14
|
+
cls = []
|
15
|
+
@report.each do |c|
|
16
|
+
cls << c
|
17
|
+
end
|
18
|
+
cls
|
25
19
|
end
|
26
20
|
|
27
|
-
def
|
28
|
-
|
21
|
+
def each_cluster(&block)
|
22
|
+
clusters.each(&block)
|
29
23
|
end
|
30
24
|
|
31
|
-
|
32
|
-
|
33
|
-
d = raw_data.map do |line|
|
34
|
-
cluster = line.split("\n").delete_if{|x| x == ">Cluster "}
|
35
|
-
id = cluster.first
|
36
|
-
cluster.shift
|
37
|
-
#puts id.inspect
|
38
|
-
Cluster.new(id,cluster)
|
39
|
-
end
|
40
|
-
d.delete_if {|obj| obj.id.nil?}
|
25
|
+
def total_clusters
|
26
|
+
clusters.size
|
41
27
|
end
|
42
28
|
|
43
|
-
|
44
|
-
|
45
|
-
File.open(@file).readlines
|
29
|
+
def get_members(cluster_id)
|
30
|
+
clusters.select {|cluster| cluster.cluster_id == cluster_id.to_s}.map{|c|c.members}
|
46
31
|
end
|
47
|
-
|
48
|
-
end
|
49
|
-
end
|
32
|
+
alias :get_cluster :get_members
|
33
|
+
end
|
34
|
+
end
|
@@ -1,21 +1,30 @@
|
|
1
|
-
class Cluster
|
2
|
-
|
1
|
+
class Cluster
|
2
|
+
attr_accessor :name, :data
|
3
3
|
|
4
|
-
def
|
5
|
-
name
|
4
|
+
def initialize(arg={})
|
5
|
+
self.name = arg[:name]
|
6
|
+
self.data = arg[:data]
|
6
7
|
end
|
7
8
|
|
8
|
-
def
|
9
|
-
|
9
|
+
def cluster_id
|
10
|
+
name.scan(/Cluster\s(.*)/).join
|
10
11
|
end
|
11
12
|
|
12
13
|
def members
|
13
14
|
entries.join(',')
|
14
15
|
end
|
15
16
|
|
16
|
-
|
17
|
+
def representative
|
18
|
+
data.split("\n").map{|line|line.scan(/>(.+)\.{3}\s\*/)}.join
|
19
|
+
end
|
20
|
+
alias :rep_seq :representative
|
21
|
+
|
22
|
+
def size
|
23
|
+
entries.size
|
24
|
+
end
|
25
|
+
alias :length :size
|
26
|
+
|
17
27
|
def entries
|
18
|
-
data.map
|
28
|
+
data.split("\n").map{|line|line.scan(/>(.+)\.{3}/)}
|
19
29
|
end
|
20
30
|
end
|
21
|
-
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class CdHitParser
|
2
|
+
attr_accessor :report_file
|
3
|
+
|
4
|
+
def each
|
5
|
+
data,header = nil, nil
|
6
|
+
File.open(report_file).each do |line|
|
7
|
+
if line[0].chr == '>'
|
8
|
+
yield Cluster.new(:name => header,:data => data) if data
|
9
|
+
data = ''
|
10
|
+
header = line[1..-1].strip
|
11
|
+
else
|
12
|
+
data << line
|
13
|
+
end
|
14
|
+
end
|
15
|
+
yield Cluster.new(:name => header, :data => data)
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
>Cluster 0
|
2
|
+
0 420nt, >B267-17_Contig1... at +/99.76%
|
3
|
+
1 456nt, >B50-25_Contig1... *
|
4
|
+
2 456nt, >B59-19_Contig1... at +/99.78%
|
5
|
+
3 456nt, >B63-12_Contig1... at +/99.56%
|
6
|
+
4 456nt, >B63-3_Contig1... at +/99.34%
|
7
|
+
>Cluster 1
|
8
|
+
0 450nt, >B189-10_Contig1... *
|
9
|
+
1 414nt, >B189-24_Contig1... at +/99.28%
|
10
|
+
2 414nt, >B189-27_Contig1... at +/99.52%
|
11
|
+
3 414nt, >B189-3_Contig1... at +/98.79%
|
12
|
+
>Cluster 2
|
13
|
+
0 447nt, >B118-11_Contig1... *
|
14
|
+
>Cluster 3
|
15
|
+
0 447nt, >B160-13_Contig1... *
|
16
|
+
1 408nt, >B160-8_Contig1... at +/99.02%
|
17
|
+
>Cluster 4
|
18
|
+
0 444nt, >B216-14_Contig1... *
|
19
|
+
>Cluster 5
|
20
|
+
0 444nt, >B41-13_Contig1... *
|
21
|
+
>Cluster 6
|
22
|
+
0 441nt, >B139-18_Contig1... *
|
23
|
+
1 441nt, >B139-26_Contig1... at +/99.77%
|
24
|
+
2 441nt, >B139-28_Contig1... at +/99.55%
|
25
|
+
3 441nt, >B170-26_Contig1... at +/98.64%
|
26
|
+
4 441nt, >B219-31_Contig1... at +/99.55%
|
27
|
+
>Cluster 7
|
metadata
CHANGED
@@ -1,36 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-cd-hit-report
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- George Githinji
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-04-26 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
14
|
+
name: bio
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '='
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
22
|
-
type: :
|
19
|
+
version: 1.4.3
|
20
|
+
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - '='
|
28
25
|
- !ruby/object:Gem::Version
|
29
|
-
version:
|
26
|
+
version: 1.4.3
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
|
-
name:
|
28
|
+
name: minitest
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
31
|
- - ! '>='
|
36
32
|
- !ruby/object:Gem::Version
|
@@ -38,15 +34,13 @@ dependencies:
|
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
38
|
- - ! '>='
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
|
-
name:
|
42
|
+
name: rdoc
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
45
|
- - ! '>='
|
52
46
|
- !ruby/object:Gem::Version
|
@@ -54,47 +48,41 @@ dependencies:
|
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
52
|
- - ! '>='
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
|
-
name:
|
56
|
+
name: bundler
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
59
|
- - ! '>='
|
68
60
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
61
|
+
version: 1.3.1
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
66
|
- - ! '>='
|
76
67
|
- !ruby/object:Gem::Version
|
77
|
-
version:
|
68
|
+
version: 1.3.1
|
78
69
|
- !ruby/object:Gem::Dependency
|
79
|
-
name:
|
70
|
+
name: jeweler
|
80
71
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
72
|
requirements:
|
83
|
-
- - '
|
73
|
+
- - ! '>='
|
84
74
|
- !ruby/object:Gem::Version
|
85
|
-
version:
|
75
|
+
version: '0'
|
86
76
|
type: :development
|
87
77
|
prerelease: false
|
88
78
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
79
|
requirements:
|
91
|
-
- - '
|
80
|
+
- - ! '>='
|
92
81
|
- !ruby/object:Gem::Version
|
93
|
-
version:
|
82
|
+
version: '0'
|
94
83
|
- !ruby/object:Gem::Dependency
|
95
84
|
name: rdoc
|
96
85
|
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
86
|
requirements:
|
99
87
|
- - ! '>='
|
100
88
|
- !ruby/object:Gem::Version
|
@@ -102,7 +90,6 @@ dependencies:
|
|
102
90
|
type: :development
|
103
91
|
prerelease: false
|
104
92
|
version_requirements: !ruby/object:Gem::Requirement
|
105
|
-
none: false
|
106
93
|
requirements:
|
107
94
|
- - ! '>='
|
108
95
|
- !ruby/object:Gem::Version
|
@@ -127,34 +114,33 @@ files:
|
|
127
114
|
- lib/bio-cd-hit-report.rb
|
128
115
|
- lib/bio-cd-hit-report/cd-hit-report.rb
|
129
116
|
- lib/bio-cd-hit-report/cluster.rb
|
117
|
+
- lib/bio-cd-hit-report/parser.rb
|
118
|
+
- test/data/test.clstr
|
130
119
|
- test/helper.rb
|
131
120
|
- test/test_bio-cd-hit-report.rb
|
132
121
|
homepage: http://github.com/georgeG/bioruby-cd-hit-report
|
133
122
|
licenses:
|
134
123
|
- MIT
|
124
|
+
metadata: {}
|
135
125
|
post_install_message:
|
136
126
|
rdoc_options: []
|
137
127
|
require_paths:
|
138
128
|
- lib
|
139
129
|
required_ruby_version: !ruby/object:Gem::Requirement
|
140
|
-
none: false
|
141
130
|
requirements:
|
142
131
|
- - ! '>='
|
143
132
|
- !ruby/object:Gem::Version
|
144
133
|
version: '0'
|
145
|
-
segments:
|
146
|
-
- 0
|
147
|
-
hash: 3441050478878586342
|
148
134
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
|
-
none: false
|
150
135
|
requirements:
|
151
136
|
- - ! '>='
|
152
137
|
- !ruby/object:Gem::Version
|
153
138
|
version: '0'
|
154
139
|
requirements: []
|
155
140
|
rubyforge_project:
|
156
|
-
rubygems_version:
|
141
|
+
rubygems_version: 2.0.3
|
157
142
|
signing_key:
|
158
|
-
specification_version:
|
143
|
+
specification_version: 4
|
159
144
|
summary: Read and manipulate CD-HIT clusters
|
160
145
|
test_files: []
|
146
|
+
has_rdoc:
|