bio-cd-hit-report 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MTZhMmJjOTZlZTAzYzg3ZGUwZDdiMGNmNmI2NjRkNzAzYmE0NjM2Mw==
5
+ data.tar.gz: !binary |-
6
+ ZDYyZmIyMjM0NzdlY2UzZjI2ZGQwYmQxOTQwOGNlYjQ3Mzk1Zjc5OA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MzFjN2Q5YjA1Mzk4MGIxNGM1MDFjMWYwNjVmNGEyNmEzYmM1NjIxODFlZjJi
10
+ NjVmMjRmYTkwYzgwZTcyZDE0MzY5NDljZGY5NjliYTQ3ZWJlNWIyOGRlYTZi
11
+ ZGI0NjJjNjc5ZDI4NWNiN2MzYzUwNjM2Zjk2MzcwZjYwMWQ3NDY=
12
+ data.tar.gz: !binary |-
13
+ NWVmYjRkMmE1YTczOGY5MGFmNTFiMDdiMjlmMTdhYTEyNmI5Njg1OTQ5NmQ3
14
+ OTgzNTAyOGY4M2Q0ZjM0Y2JhN2FlMDRhNmQ3MGIzMDZlN2Q3Y2JmMTRiOTAz
15
+ MjYxOGQyNDY1NTgxOThkMjI5Zjk5MTUwMGNhOTA5YTBlZDM5OGQ=
data/Gemfile CHANGED
@@ -1,15 +1,11 @@
1
1
  source "http://rubygems.org"
2
- # Add dependencies required to use your gem here.
3
- # Example:
4
- # gem "activesupport", ">= 2.3.5"
5
2
 
6
- # Add dependencies to develop your gem here.
7
- # Include everything needed to run rake, tests, features, etc.
3
+ gem "bio", "1.4.3"
4
+
8
5
  group :development do
9
6
  gem "minitest"
10
7
  gem "rdoc"
11
- gem "bundler"
8
+ gem "bundler", ">=1.3.1"
12
9
  gem "jeweler"
13
- gem "bio", "1.4.2"
14
10
  gem "rdoc"
15
11
  end
data/README.md CHANGED
@@ -1,10 +1,12 @@
1
- # bio-cd-hit-report
1
+ [[#]] bio-cd-hit-report
2
2
 
3
3
  [![Build Status](https://secure.travis-ci.org/georgeG/bioruby-cd-hit-report.png)](http://travis-ci.org/georgeG/bioruby-cd-hit-report)
4
4
 
5
- A bioruby wrapper for parsing and reading CD-HIT cluster reports
5
+ Clustering sequences with CD-HIT produces a cluster file(.clstr)
6
+ containing sequence names and their respective clusters. This plugin
7
+ provides methods for parsing this file.
6
8
 
7
- Note: this software is under active development!
9
+ Note: this plugin is under active development!
8
10
 
9
11
  ## Installation
10
12
 
@@ -15,17 +17,11 @@ Note: this software is under active development!
15
17
  ## Usage
16
18
 
17
19
  ```ruby
18
- require 'bio-cd-hit-report'
20
+ require 'bio-cd-hit-report'
19
21
 
20
- cluster_file = "cluster95.clstr"
22
+ cluster_file = "cluster95.clstr"
21
23
  report = Bio::CdHitReport.new(cluster_file)
22
24
 
23
- #print the max number of sequences in a cluster for the entire dataset
24
- puts report.max_members
25
-
26
- #print the minimum number of sequences in a cluster for the entire dataset
27
- puts report.min_members
28
-
29
25
  #print total number of clusters in the report
30
26
  puts report.total_clusters
31
27
 
@@ -37,10 +33,13 @@ Note: this software is under active development!
37
33
  puts "#{c.name} - #{c.members}" #print cluster name/id with respective sequences in the cluster
38
34
  puts c.size #print the total number of entries in the cluster
39
35
  end
40
- ```
36
+
37
+ #print the representative sequence for each cluster
38
+ report.each_cluster do |c|
39
+ puts c.rep_seq
40
+ end
41
41
 
42
- The API doc is online. For more code examples see the test files in
43
- the source tree.
42
+ ```
44
43
 
45
44
  ## Project home page
46
45
 
@@ -64,4 +63,4 @@ This Biogem is published at [#bio-cd-hit-report](http://biogems.info/index.html)
64
63
 
65
64
  ## Copyright
66
65
 
67
- Copyright (c) 2012 George Githinji. See LICENSE.txt for further details.
66
+ Copyright (c) 2013 George Githinji. See LICENSE.txt for further details.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.3
1
+ 0.1.0
@@ -1,74 +1,61 @@
1
1
  #!/usr/bin/env ruby
2
- #
2
+
3
3
  # BioRuby bio-cd-hit-report Plugin BioCdHitReport
4
- # Author:: georgeG
4
+ # Author:: george githinji
5
5
  # Copyright:: 2012
6
6
 
7
- USAGE = "Describe bio-cd-hit-report"
7
+ require_relative '../lib/bio-cd-hit-report'
8
+ require 'ostruct'
9
+ require 'optparse'
8
10
 
9
- if ARGV.size == 0
10
- print USAGE
11
- end
11
+ options = OpenStruct.new
12
12
 
13
- require 'bio-cd-hit-report'
14
- require 'optparse'
13
+ OptionParser.new do |opts|
14
+ opts.banner = 'USAGE: bio-cd-hit-report -i file.clstr [options] '
15
15
 
16
- # Uncomment when using the bio-logger
17
- # require 'bio-logger'
18
- # Bio::Log::CLI.logger('stderr')
19
- # Bio::Log::CLI.trace('info')
16
+ opts.on('-h', 'Display this screen') do
17
+ puts opts
18
+ exit
19
+ end
20
20
 
21
- options = {:example_switch=>false,:show_help=>false}
22
- opts = OptionParser.new do |o|
23
- o.banner = "Usage: #{File.basename($0)} [options] reponame\ne.g. #{File.basename($0)} the-perfect-gem"
21
+ opts.on('-i','--infile CLUSTERFILE', 'cluster file') do |infile|
22
+ options.infile = infile
23
+ end
24
24
 
25
- o.on('--example_parameter [EXAMPLE_PARAMETER]', 'TODO: put a description for the PARAMETER') do |example_parameter|
26
- # TODO: your logic here, below an example
27
- options[:example_parameter] = 'this is a parameter'
25
+ opts.on('-o','--outfile OUTPUTFILE', 'output file') do |outfile|
26
+ options.outfile = outfile
28
27
  end
29
-
30
- o.separator ""
31
- o.on("--switch-example", 'TODO: put a description for the SWITCH') do
32
- # TODO: your logic here, below an example
33
- self[:example_switch] = true
28
+
29
+ opts.on('-m','--members') do
30
+ options.members = true
34
31
  end
35
32
 
36
- # Uncomment the following when using the bio-logger
37
- # o.separator ""
38
- # o.on("--logger filename",String,"Log to file (default stderr)") do | name |
39
- # Bio::Log::CLI.logger(name)
40
- # end
41
- #
42
- # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
43
- # Bio::Log::CLI.trace(s)
44
- # end
45
- #
46
- # o.on("-q", "--quiet", "Run quietly") do |q|
47
- # Bio::Log::CLI.trace('error')
48
- # end
49
- #
50
- # o.on("-v", "--verbose", "Run verbosely") do |v|
51
- # Bio::Log::CLI.trace('info')
52
- # end
53
- #
54
- # o.on("--debug", "Show debug messages") do |v|
55
- # Bio::Log::CLI.trace('debug')
56
- # end
57
-
58
- o.separator ""
59
- o.on_tail('-h', '--help', 'display this help and exit') do
60
- options[:show_help] = true
33
+ opts.on('-c','--clusterid CLUSTERID',Integer,'cluster id') do |clusterid|
34
+ options.cluster_id = clusterid
61
35
  end
36
+
37
+ end.parse!
38
+
39
+ clusterfile = options.infile
40
+ outfile = options.outfile
41
+ cluster_id = options.cluster_id
42
+
43
+ report = Bio::CdHitReport.new(clusterfile)
44
+
45
+ def print_members(report)
46
+ report.clusters.map{|c| "#{c.cluster_id}:#{c.members}"}
62
47
  end
63
48
 
64
- begin
65
- opts.parse!(ARGV)
49
+ def find_members_for(report,cluster_id)
50
+ report.get_members(cluster_id)
51
+ end
66
52
 
67
- # Uncomment the following when using the bio-logger
68
- # Bio::Log::CLI.configure('bio-cd-hit-report')
69
53
 
70
- # TODO: your code here
71
- # use options for your logic
72
- rescue OptionParser::InvalidOption => e
73
- options[:invalid_argument] = e.message
54
+ begin
55
+ unless cluster_id.nil?
56
+ $stdout.puts find_members_for(report,cluster_id)
57
+ else
58
+ $stdout.puts print_members(report) if options.members
59
+ end
74
60
  end
61
+
@@ -1 +1 @@
1
- require_relative "bio-cd-hit-report/cd-hit-report"
1
+ require_relative 'bio-cd-hit-report/cd-hit-report'
@@ -1,49 +1,34 @@
1
1
  module Bio
2
-
3
- require_relative 'cluster.rb'
2
+ require_relative 'cluster'
3
+ require_relative 'parser'
4
4
 
5
5
  class CdHitReport
6
+ include Enumerable
6
7
 
7
8
  def initialize(file)
8
- @file = file
9
- end
10
-
11
- def each_cluster(&block)
12
- cluster_objs.each(&block)
9
+ @report = CdHitParser.new
10
+ @report.report_file = file
13
11
  end
14
12
 
15
- def total_clusters
16
- cluster_objs.size
17
- end
18
-
19
- def get_cluster(name)
20
- cluster_objs.select{|cluster| cluster.name == name.to_s}.pop.members
21
- end
22
-
23
- def max_members
24
- cluster_objs.map{|c|c.size}.max
13
+ def clusters
14
+ cls = []
15
+ @report.each do |c|
16
+ cls << c
17
+ end
18
+ cls
25
19
  end
26
20
 
27
- def min_members
28
- cluster_objs.map{|c| c.size}.min
21
+ def each_cluster(&block)
22
+ clusters.each(&block)
29
23
  end
30
24
 
31
- private
32
- def cluster_objs
33
- d = raw_data.map do |line|
34
- cluster = line.split("\n").delete_if{|x| x == ">Cluster "}
35
- id = cluster.first
36
- cluster.shift
37
- #puts id.inspect
38
- Cluster.new(id,cluster)
39
- end
40
- d.delete_if {|obj| obj.id.nil?}
25
+ def total_clusters
26
+ clusters.size
41
27
  end
42
28
 
43
-
44
- def raw_data
45
- File.open(@file).readlines
29
+ def get_members(cluster_id)
30
+ clusters.select {|cluster| cluster.cluster_id == cluster_id.to_s}.map{|c|c.members}
46
31
  end
47
-
48
- end #class
49
- end #module
32
+ alias :get_cluster :get_members
33
+ end
34
+ end
@@ -1,21 +1,30 @@
1
- class Cluster < Struct.new(:name,:data)
2
- $/ = ">Cluster "
1
+ class Cluster
2
+ attr_accessor :name, :data
3
3
 
4
- def id
5
- name
4
+ def initialize(arg={})
5
+ self.name = arg[:name]
6
+ self.data = arg[:data]
6
7
  end
7
8
 
8
- def size
9
- entries.size
9
+ def cluster_id
10
+ name.scan(/Cluster\s(.*)/).join
10
11
  end
11
12
 
12
13
  def members
13
14
  entries.join(',')
14
15
  end
15
16
 
16
- private
17
+ def representative
18
+ data.split("\n").map{|line|line.scan(/>(.+)\.{3}\s\*/)}.join
19
+ end
20
+ alias :rep_seq :representative
21
+
22
+ def size
23
+ entries.size
24
+ end
25
+ alias :length :size
26
+
17
27
  def entries
18
- data.map {|entry| entry.scan(/>(.+)\.{3}/)}.flatten
28
+ data.split("\n").map{|line|line.scan(/>(.+)\.{3}/)}
19
29
  end
20
30
  end
21
-
@@ -0,0 +1,17 @@
1
+ class CdHitParser
2
+ attr_accessor :report_file
3
+
4
+ def each
5
+ data,header = nil, nil
6
+ File.open(report_file).each do |line|
7
+ if line[0].chr == '>'
8
+ yield Cluster.new(:name => header,:data => data) if data
9
+ data = ''
10
+ header = line[1..-1].strip
11
+ else
12
+ data << line
13
+ end
14
+ end
15
+ yield Cluster.new(:name => header, :data => data)
16
+ end
17
+ end
@@ -0,0 +1,27 @@
1
+ >Cluster 0
2
+ 0 420nt, >B267-17_Contig1... at +/99.76%
3
+ 1 456nt, >B50-25_Contig1... *
4
+ 2 456nt, >B59-19_Contig1... at +/99.78%
5
+ 3 456nt, >B63-12_Contig1... at +/99.56%
6
+ 4 456nt, >B63-3_Contig1... at +/99.34%
7
+ >Cluster 1
8
+ 0 450nt, >B189-10_Contig1... *
9
+ 1 414nt, >B189-24_Contig1... at +/99.28%
10
+ 2 414nt, >B189-27_Contig1... at +/99.52%
11
+ 3 414nt, >B189-3_Contig1... at +/98.79%
12
+ >Cluster 2
13
+ 0 447nt, >B118-11_Contig1... *
14
+ >Cluster 3
15
+ 0 447nt, >B160-13_Contig1... *
16
+ 1 408nt, >B160-8_Contig1... at +/99.02%
17
+ >Cluster 4
18
+ 0 444nt, >B216-14_Contig1... *
19
+ >Cluster 5
20
+ 0 444nt, >B41-13_Contig1... *
21
+ >Cluster 6
22
+ 0 441nt, >B139-18_Contig1... *
23
+ 1 441nt, >B139-26_Contig1... at +/99.77%
24
+ 2 441nt, >B139-28_Contig1... at +/99.55%
25
+ 3 441nt, >B170-26_Contig1... at +/98.64%
26
+ 4 441nt, >B219-31_Contig1... at +/99.55%
27
+ >Cluster 7
metadata CHANGED
@@ -1,36 +1,32 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-cd-hit-report
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - George Githinji
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-09-16 00:00:00.000000000 Z
11
+ date: 2013-04-26 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
- name: minitest
14
+ name: bio
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - '='
20
18
  - !ruby/object:Gem::Version
21
- version: '0'
22
- type: :development
19
+ version: 1.4.3
20
+ type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - '='
28
25
  - !ruby/object:Gem::Version
29
- version: '0'
26
+ version: 1.4.3
30
27
  - !ruby/object:Gem::Dependency
31
- name: rdoc
28
+ name: minitest
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
31
  - - ! '>='
36
32
  - !ruby/object:Gem::Version
@@ -38,15 +34,13 @@ dependencies:
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
38
  - - ! '>='
44
39
  - !ruby/object:Gem::Version
45
40
  version: '0'
46
41
  - !ruby/object:Gem::Dependency
47
- name: bundler
42
+ name: rdoc
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
45
  - - ! '>='
52
46
  - !ruby/object:Gem::Version
@@ -54,47 +48,41 @@ dependencies:
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
52
  - - ! '>='
60
53
  - !ruby/object:Gem::Version
61
54
  version: '0'
62
55
  - !ruby/object:Gem::Dependency
63
- name: jeweler
56
+ name: bundler
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
59
  - - ! '>='
68
60
  - !ruby/object:Gem::Version
69
- version: '0'
61
+ version: 1.3.1
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
66
  - - ! '>='
76
67
  - !ruby/object:Gem::Version
77
- version: '0'
68
+ version: 1.3.1
78
69
  - !ruby/object:Gem::Dependency
79
- name: bio
70
+ name: jeweler
80
71
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
72
  requirements:
83
- - - '='
73
+ - - ! '>='
84
74
  - !ruby/object:Gem::Version
85
- version: 1.4.2
75
+ version: '0'
86
76
  type: :development
87
77
  prerelease: false
88
78
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
79
  requirements:
91
- - - '='
80
+ - - ! '>='
92
81
  - !ruby/object:Gem::Version
93
- version: 1.4.2
82
+ version: '0'
94
83
  - !ruby/object:Gem::Dependency
95
84
  name: rdoc
96
85
  requirement: !ruby/object:Gem::Requirement
97
- none: false
98
86
  requirements:
99
87
  - - ! '>='
100
88
  - !ruby/object:Gem::Version
@@ -102,7 +90,6 @@ dependencies:
102
90
  type: :development
103
91
  prerelease: false
104
92
  version_requirements: !ruby/object:Gem::Requirement
105
- none: false
106
93
  requirements:
107
94
  - - ! '>='
108
95
  - !ruby/object:Gem::Version
@@ -127,34 +114,33 @@ files:
127
114
  - lib/bio-cd-hit-report.rb
128
115
  - lib/bio-cd-hit-report/cd-hit-report.rb
129
116
  - lib/bio-cd-hit-report/cluster.rb
117
+ - lib/bio-cd-hit-report/parser.rb
118
+ - test/data/test.clstr
130
119
  - test/helper.rb
131
120
  - test/test_bio-cd-hit-report.rb
132
121
  homepage: http://github.com/georgeG/bioruby-cd-hit-report
133
122
  licenses:
134
123
  - MIT
124
+ metadata: {}
135
125
  post_install_message:
136
126
  rdoc_options: []
137
127
  require_paths:
138
128
  - lib
139
129
  required_ruby_version: !ruby/object:Gem::Requirement
140
- none: false
141
130
  requirements:
142
131
  - - ! '>='
143
132
  - !ruby/object:Gem::Version
144
133
  version: '0'
145
- segments:
146
- - 0
147
- hash: 3441050478878586342
148
134
  required_rubygems_version: !ruby/object:Gem::Requirement
149
- none: false
150
135
  requirements:
151
136
  - - ! '>='
152
137
  - !ruby/object:Gem::Version
153
138
  version: '0'
154
139
  requirements: []
155
140
  rubyforge_project:
156
- rubygems_version: 1.8.24
141
+ rubygems_version: 2.0.3
157
142
  signing_key:
158
- specification_version: 3
143
+ specification_version: 4
159
144
  summary: Read and manipulate CD-HIT clusters
160
145
  test_files: []
146
+ has_rdoc: