bio-cd-hit-report 0.0.3 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MTZhMmJjOTZlZTAzYzg3ZGUwZDdiMGNmNmI2NjRkNzAzYmE0NjM2Mw==
5
+ data.tar.gz: !binary |-
6
+ ZDYyZmIyMjM0NzdlY2UzZjI2ZGQwYmQxOTQwOGNlYjQ3Mzk1Zjc5OA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MzFjN2Q5YjA1Mzk4MGIxNGM1MDFjMWYwNjVmNGEyNmEzYmM1NjIxODFlZjJi
10
+ NjVmMjRmYTkwYzgwZTcyZDE0MzY5NDljZGY5NjliYTQ3ZWJlNWIyOGRlYTZi
11
+ ZGI0NjJjNjc5ZDI4NWNiN2MzYzUwNjM2Zjk2MzcwZjYwMWQ3NDY=
12
+ data.tar.gz: !binary |-
13
+ NWVmYjRkMmE1YTczOGY5MGFmNTFiMDdiMjlmMTdhYTEyNmI5Njg1OTQ5NmQ3
14
+ OTgzNTAyOGY4M2Q0ZjM0Y2JhN2FlMDRhNmQ3MGIzMDZlN2Q3Y2JmMTRiOTAz
15
+ MjYxOGQyNDY1NTgxOThkMjI5Zjk5MTUwMGNhOTA5YTBlZDM5OGQ=
data/Gemfile CHANGED
@@ -1,15 +1,11 @@
1
1
  source "http://rubygems.org"
2
- # Add dependencies required to use your gem here.
3
- # Example:
4
- # gem "activesupport", ">= 2.3.5"
5
2
 
6
- # Add dependencies to develop your gem here.
7
- # Include everything needed to run rake, tests, features, etc.
3
+ gem "bio", "1.4.3"
4
+
8
5
  group :development do
9
6
  gem "minitest"
10
7
  gem "rdoc"
11
- gem "bundler"
8
+ gem "bundler", ">=1.3.1"
12
9
  gem "jeweler"
13
- gem "bio", "1.4.2"
14
10
  gem "rdoc"
15
11
  end
data/README.md CHANGED
@@ -1,10 +1,12 @@
1
- # bio-cd-hit-report
1
+ [[#]] bio-cd-hit-report
2
2
 
3
3
  [![Build Status](https://secure.travis-ci.org/georgeG/bioruby-cd-hit-report.png)](http://travis-ci.org/georgeG/bioruby-cd-hit-report)
4
4
 
5
- A bioruby wrapper for parsing and reading CD-HIT cluster reports
5
+ Clustering sequences with CD-HIT produces a cluster file(.clstr)
6
+ containing sequence names and their respective clusters. This plugin
7
+ provides methods for parsing this file.
6
8
 
7
- Note: this software is under active development!
9
+ Note: this plugin is under active development!
8
10
 
9
11
  ## Installation
10
12
 
@@ -15,17 +17,11 @@ Note: this software is under active development!
15
17
  ## Usage
16
18
 
17
19
  ```ruby
18
- require 'bio-cd-hit-report'
20
+ require 'bio-cd-hit-report'
19
21
 
20
- cluster_file = "cluster95.clstr"
22
+ cluster_file = "cluster95.clstr"
21
23
  report = Bio::CdHitReport.new(cluster_file)
22
24
 
23
- #print the max number of sequences in a cluster for the entire dataset
24
- puts report.max_members
25
-
26
- #print the minimum number of sequences in a cluster for the entire dataset
27
- puts report.min_members
28
-
29
25
  #print total number of clusters in the report
30
26
  puts report.total_clusters
31
27
 
@@ -37,10 +33,13 @@ Note: this software is under active development!
37
33
  puts "#{c.name} - #{c.members}" #print cluster name/id with respective sequences in the cluster
38
34
  puts c.size #print the total number of entries in the cluster
39
35
  end
40
- ```
36
+
37
+ #print the representative sequence for each cluster
38
+ report.each_cluster do |c|
39
+ puts c.rep_seq
40
+ end
41
41
 
42
- The API doc is online. For more code examples see the test files in
43
- the source tree.
42
+ ```
44
43
 
45
44
  ## Project home page
46
45
 
@@ -64,4 +63,4 @@ This Biogem is published at [#bio-cd-hit-report](http://biogems.info/index.html)
64
63
 
65
64
  ## Copyright
66
65
 
67
- Copyright (c) 2012 George Githinji. See LICENSE.txt for further details.
66
+ Copyright (c) 2013 George Githinji. See LICENSE.txt for further details.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.3
1
+ 0.1.0
@@ -1,74 +1,61 @@
1
1
  #!/usr/bin/env ruby
2
- #
2
+
3
3
  # BioRuby bio-cd-hit-report Plugin BioCdHitReport
4
- # Author:: georgeG
4
+ # Author:: george githinji
5
5
  # Copyright:: 2012
6
6
 
7
- USAGE = "Describe bio-cd-hit-report"
7
+ require_relative '../lib/bio-cd-hit-report'
8
+ require 'ostruct'
9
+ require 'optparse'
8
10
 
9
- if ARGV.size == 0
10
- print USAGE
11
- end
11
+ options = OpenStruct.new
12
12
 
13
- require 'bio-cd-hit-report'
14
- require 'optparse'
13
+ OptionParser.new do |opts|
14
+ opts.banner = 'USAGE: bio-cd-hit-report -i file.clstr [options] '
15
15
 
16
- # Uncomment when using the bio-logger
17
- # require 'bio-logger'
18
- # Bio::Log::CLI.logger('stderr')
19
- # Bio::Log::CLI.trace('info')
16
+ opts.on('-h', 'Display this screen') do
17
+ puts opts
18
+ exit
19
+ end
20
20
 
21
- options = {:example_switch=>false,:show_help=>false}
22
- opts = OptionParser.new do |o|
23
- o.banner = "Usage: #{File.basename($0)} [options] reponame\ne.g. #{File.basename($0)} the-perfect-gem"
21
+ opts.on('-i','--infile CLUSTERFILE', 'cluster file') do |infile|
22
+ options.infile = infile
23
+ end
24
24
 
25
- o.on('--example_parameter [EXAMPLE_PARAMETER]', 'TODO: put a description for the PARAMETER') do |example_parameter|
26
- # TODO: your logic here, below an example
27
- options[:example_parameter] = 'this is a parameter'
25
+ opts.on('-o','--outfile OUTPUTFILE', 'output file') do |outfile|
26
+ options.outfile = outfile
28
27
  end
29
-
30
- o.separator ""
31
- o.on("--switch-example", 'TODO: put a description for the SWITCH') do
32
- # TODO: your logic here, below an example
33
- self[:example_switch] = true
28
+
29
+ opts.on('-m','--members') do
30
+ options.members = true
34
31
  end
35
32
 
36
- # Uncomment the following when using the bio-logger
37
- # o.separator ""
38
- # o.on("--logger filename",String,"Log to file (default stderr)") do | name |
39
- # Bio::Log::CLI.logger(name)
40
- # end
41
- #
42
- # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
43
- # Bio::Log::CLI.trace(s)
44
- # end
45
- #
46
- # o.on("-q", "--quiet", "Run quietly") do |q|
47
- # Bio::Log::CLI.trace('error')
48
- # end
49
- #
50
- # o.on("-v", "--verbose", "Run verbosely") do |v|
51
- # Bio::Log::CLI.trace('info')
52
- # end
53
- #
54
- # o.on("--debug", "Show debug messages") do |v|
55
- # Bio::Log::CLI.trace('debug')
56
- # end
57
-
58
- o.separator ""
59
- o.on_tail('-h', '--help', 'display this help and exit') do
60
- options[:show_help] = true
33
+ opts.on('-c','--clusterid CLUSTERID',Integer,'cluster id') do |clusterid|
34
+ options.cluster_id = clusterid
61
35
  end
36
+
37
+ end.parse!
38
+
39
+ clusterfile = options.infile
40
+ outfile = options.outfile
41
+ cluster_id = options.cluster_id
42
+
43
+ report = Bio::CdHitReport.new(clusterfile)
44
+
45
+ def print_members(report)
46
+ report.clusters.map{|c| "#{c.cluster_id}:#{c.members}"}
62
47
  end
63
48
 
64
- begin
65
- opts.parse!(ARGV)
49
+ def find_members_for(report,cluster_id)
50
+ report.get_members(cluster_id)
51
+ end
66
52
 
67
- # Uncomment the following when using the bio-logger
68
- # Bio::Log::CLI.configure('bio-cd-hit-report')
69
53
 
70
- # TODO: your code here
71
- # use options for your logic
72
- rescue OptionParser::InvalidOption => e
73
- options[:invalid_argument] = e.message
54
+ begin
55
+ unless cluster_id.nil?
56
+ $stdout.puts find_members_for(report,cluster_id)
57
+ else
58
+ $stdout.puts print_members(report) if options.members
59
+ end
74
60
  end
61
+
@@ -1 +1 @@
1
- require_relative "bio-cd-hit-report/cd-hit-report"
1
+ require_relative 'bio-cd-hit-report/cd-hit-report'
@@ -1,49 +1,34 @@
1
1
  module Bio
2
-
3
- require_relative 'cluster.rb'
2
+ require_relative 'cluster'
3
+ require_relative 'parser'
4
4
 
5
5
  class CdHitReport
6
+ include Enumerable
6
7
 
7
8
  def initialize(file)
8
- @file = file
9
- end
10
-
11
- def each_cluster(&block)
12
- cluster_objs.each(&block)
9
+ @report = CdHitParser.new
10
+ @report.report_file = file
13
11
  end
14
12
 
15
- def total_clusters
16
- cluster_objs.size
17
- end
18
-
19
- def get_cluster(name)
20
- cluster_objs.select{|cluster| cluster.name == name.to_s}.pop.members
21
- end
22
-
23
- def max_members
24
- cluster_objs.map{|c|c.size}.max
13
+ def clusters
14
+ cls = []
15
+ @report.each do |c|
16
+ cls << c
17
+ end
18
+ cls
25
19
  end
26
20
 
27
- def min_members
28
- cluster_objs.map{|c| c.size}.min
21
+ def each_cluster(&block)
22
+ clusters.each(&block)
29
23
  end
30
24
 
31
- private
32
- def cluster_objs
33
- d = raw_data.map do |line|
34
- cluster = line.split("\n").delete_if{|x| x == ">Cluster "}
35
- id = cluster.first
36
- cluster.shift
37
- #puts id.inspect
38
- Cluster.new(id,cluster)
39
- end
40
- d.delete_if {|obj| obj.id.nil?}
25
+ def total_clusters
26
+ clusters.size
41
27
  end
42
28
 
43
-
44
- def raw_data
45
- File.open(@file).readlines
29
+ def get_members(cluster_id)
30
+ clusters.select {|cluster| cluster.cluster_id == cluster_id.to_s}.map{|c|c.members}
46
31
  end
47
-
48
- end #class
49
- end #module
32
+ alias :get_cluster :get_members
33
+ end
34
+ end
@@ -1,21 +1,30 @@
1
- class Cluster < Struct.new(:name,:data)
2
- $/ = ">Cluster "
1
+ class Cluster
2
+ attr_accessor :name, :data
3
3
 
4
- def id
5
- name
4
+ def initialize(arg={})
5
+ self.name = arg[:name]
6
+ self.data = arg[:data]
6
7
  end
7
8
 
8
- def size
9
- entries.size
9
+ def cluster_id
10
+ name.scan(/Cluster\s(.*)/).join
10
11
  end
11
12
 
12
13
  def members
13
14
  entries.join(',')
14
15
  end
15
16
 
16
- private
17
+ def representative
18
+ data.split("\n").map{|line|line.scan(/>(.+)\.{3}\s\*/)}.join
19
+ end
20
+ alias :rep_seq :representative
21
+
22
+ def size
23
+ entries.size
24
+ end
25
+ alias :length :size
26
+
17
27
  def entries
18
- data.map {|entry| entry.scan(/>(.+)\.{3}/)}.flatten
28
+ data.split("\n").map{|line|line.scan(/>(.+)\.{3}/)}
19
29
  end
20
30
  end
21
-
@@ -0,0 +1,17 @@
1
+ class CdHitParser
2
+ attr_accessor :report_file
3
+
4
+ def each
5
+ data,header = nil, nil
6
+ File.open(report_file).each do |line|
7
+ if line[0].chr == '>'
8
+ yield Cluster.new(:name => header,:data => data) if data
9
+ data = ''
10
+ header = line[1..-1].strip
11
+ else
12
+ data << line
13
+ end
14
+ end
15
+ yield Cluster.new(:name => header, :data => data)
16
+ end
17
+ end
@@ -0,0 +1,27 @@
1
+ >Cluster 0
2
+ 0 420nt, >B267-17_Contig1... at +/99.76%
3
+ 1 456nt, >B50-25_Contig1... *
4
+ 2 456nt, >B59-19_Contig1... at +/99.78%
5
+ 3 456nt, >B63-12_Contig1... at +/99.56%
6
+ 4 456nt, >B63-3_Contig1... at +/99.34%
7
+ >Cluster 1
8
+ 0 450nt, >B189-10_Contig1... *
9
+ 1 414nt, >B189-24_Contig1... at +/99.28%
10
+ 2 414nt, >B189-27_Contig1... at +/99.52%
11
+ 3 414nt, >B189-3_Contig1... at +/98.79%
12
+ >Cluster 2
13
+ 0 447nt, >B118-11_Contig1... *
14
+ >Cluster 3
15
+ 0 447nt, >B160-13_Contig1... *
16
+ 1 408nt, >B160-8_Contig1... at +/99.02%
17
+ >Cluster 4
18
+ 0 444nt, >B216-14_Contig1... *
19
+ >Cluster 5
20
+ 0 444nt, >B41-13_Contig1... *
21
+ >Cluster 6
22
+ 0 441nt, >B139-18_Contig1... *
23
+ 1 441nt, >B139-26_Contig1... at +/99.77%
24
+ 2 441nt, >B139-28_Contig1... at +/99.55%
25
+ 3 441nt, >B170-26_Contig1... at +/98.64%
26
+ 4 441nt, >B219-31_Contig1... at +/99.55%
27
+ >Cluster 7
metadata CHANGED
@@ -1,36 +1,32 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-cd-hit-report
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - George Githinji
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-09-16 00:00:00.000000000 Z
11
+ date: 2013-04-26 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
- name: minitest
14
+ name: bio
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - '='
20
18
  - !ruby/object:Gem::Version
21
- version: '0'
22
- type: :development
19
+ version: 1.4.3
20
+ type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - '='
28
25
  - !ruby/object:Gem::Version
29
- version: '0'
26
+ version: 1.4.3
30
27
  - !ruby/object:Gem::Dependency
31
- name: rdoc
28
+ name: minitest
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
31
  - - ! '>='
36
32
  - !ruby/object:Gem::Version
@@ -38,15 +34,13 @@ dependencies:
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
38
  - - ! '>='
44
39
  - !ruby/object:Gem::Version
45
40
  version: '0'
46
41
  - !ruby/object:Gem::Dependency
47
- name: bundler
42
+ name: rdoc
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
45
  - - ! '>='
52
46
  - !ruby/object:Gem::Version
@@ -54,47 +48,41 @@ dependencies:
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
52
  - - ! '>='
60
53
  - !ruby/object:Gem::Version
61
54
  version: '0'
62
55
  - !ruby/object:Gem::Dependency
63
- name: jeweler
56
+ name: bundler
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
59
  - - ! '>='
68
60
  - !ruby/object:Gem::Version
69
- version: '0'
61
+ version: 1.3.1
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
66
  - - ! '>='
76
67
  - !ruby/object:Gem::Version
77
- version: '0'
68
+ version: 1.3.1
78
69
  - !ruby/object:Gem::Dependency
79
- name: bio
70
+ name: jeweler
80
71
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
72
  requirements:
83
- - - '='
73
+ - - ! '>='
84
74
  - !ruby/object:Gem::Version
85
- version: 1.4.2
75
+ version: '0'
86
76
  type: :development
87
77
  prerelease: false
88
78
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
79
  requirements:
91
- - - '='
80
+ - - ! '>='
92
81
  - !ruby/object:Gem::Version
93
- version: 1.4.2
82
+ version: '0'
94
83
  - !ruby/object:Gem::Dependency
95
84
  name: rdoc
96
85
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
86
  requirements:
99
87
  - - ! '>='
100
88
  - !ruby/object:Gem::Version
@@ -102,7 +90,6 @@ dependencies:
102
90
  type: :development
103
91
  prerelease: false
104
92
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
93
  requirements:
107
94
  - - ! '>='
108
95
  - !ruby/object:Gem::Version
@@ -127,34 +114,33 @@ files:
127
114
  - lib/bio-cd-hit-report.rb
128
115
  - lib/bio-cd-hit-report/cd-hit-report.rb
129
116
  - lib/bio-cd-hit-report/cluster.rb
117
+ - lib/bio-cd-hit-report/parser.rb
118
+ - test/data/test.clstr
130
119
  - test/helper.rb
131
120
  - test/test_bio-cd-hit-report.rb
132
121
  homepage: http://github.com/georgeG/bioruby-cd-hit-report
133
122
  licenses:
134
123
  - MIT
124
+ metadata: {}
135
125
  post_install_message:
136
126
  rdoc_options: []
137
127
  require_paths:
138
128
  - lib
139
129
  required_ruby_version: !ruby/object:Gem::Requirement
140
- none: false
141
130
  requirements:
142
131
  - - ! '>='
143
132
  - !ruby/object:Gem::Version
144
133
  version: '0'
145
- segments:
146
- - 0
147
- hash: 3441050478878586342
148
134
  required_rubygems_version: !ruby/object:Gem::Requirement
149
- none: false
150
135
  requirements:
151
136
  - - ! '>='
152
137
  - !ruby/object:Gem::Version
153
138
  version: '0'
154
139
  requirements: []
155
140
  rubyforge_project:
156
- rubygems_version: 1.8.24
141
+ rubygems_version: 2.0.3
157
142
  signing_key:
158
- specification_version: 3
143
+ specification_version: 4
159
144
  summary: Read and manipulate CD-HIT clusters
160
145
  test_files: []
146
+ has_rdoc: