bio-cd-hit-report 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode # JRuby in 1.9 mode
6
+ - rbx-19mode
7
+ # - 1.8.7
8
+ # - jruby-18mode # JRuby in 1.8 mode
9
+ # - rbx-18mode
10
+
11
+ # uncomment this line if your project needs to run something other than `rake`:
12
+ # script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "minitest"
10
+ gem "rdoc"
11
+ gem "bundler"
12
+ gem "jeweler"
13
+ gem "bio", "1.4.2"
14
+ gem "rdoc"
15
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 georgeG
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,58 @@
1
+ # bio-cd-hit-report
2
+
3
+ [![Build Status](https://secure.travis-ci.org/georgeG/bioruby-cd-hit-report.png)](http://travis-ci.org/georgeG/bioruby-cd-hit-report)
4
+
5
+ A bioruby wrapper for parsing and reading CD-HIT cluster reports
6
+
7
+ Note: this software is under active development!
8
+
9
+ ## Installation
10
+
11
+ ```sh
12
+ gem install bio-cd-hit-report
13
+ ```
14
+
15
+ ## Usage
16
+
17
+ ```ruby
18
+ require 'bio-cd-hit-report'
19
+
20
+ cluster_file = "cluster95.clstr"
21
+ report = Bio::CdHitReport.new(cluster_file)
22
+
23
+ puts report.max_members #print the max number of sequences in a cluster for the entire dataset
24
+ puts report.min_members #print the minimum number of sequences in a cluster for the entire dataset
25
+ puts report.total_clusters #print total number of clusters in the report
26
+
27
+ report.each_cluster do |c|
28
+ puts "#{c.name} - #{c.members}" #print cluster name/id with respective sequences in the cluster
29
+ puts c.size #print the total number of entries in the cluster
30
+ end
31
+ ```
32
+
33
+ The API doc is online. For more code examples see the test files in
34
+ the source tree.
35
+
36
+ ## Project home page
37
+
38
+ Information on the source tree, documentation, examples, issues and
39
+ how to contribute, see
40
+
41
+ http://github.com/georgeG/bioruby-cd-hit-report
42
+
43
+ The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
44
+
45
+ ## Cite
46
+
47
+ If you use this software, please cite one of
48
+
49
+ * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
50
+ * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
51
+
52
+ ## Biogems.info
53
+
54
+ This Biogem is published at [#bio-cd-hit-report](http://biogems.info/index.html)
55
+
56
+ ## Copyright
57
+
58
+ Copyright (c) 2012 George Githinji. See LICENSE.txt for further details.
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-cd-hit-report"
18
+ gem.homepage = "http://github.com/georgeG/bioruby-cd-hit-report"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{A bioruby wrapper for parsing and reading CD-HIT cluster reports}
21
+ gem.description = %Q{A bioruby wrapper for parsing and reading CD-HIT cluster reports}
22
+ gem.email = "georgkam@gmail.com"
23
+ gem.authors = ["George Githinji"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ task :default => :test
36
+
37
+ require 'rdoc/task'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "bio-cd-hit-report #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # BioRuby bio-cd-hit-report Plugin BioCdHitReport
4
+ # Author:: georgeG
5
+ # Copyright:: 2012
6
+
7
+ USAGE = "Describe bio-cd-hit-report"
8
+
9
+ if ARGV.size == 0
10
+ print USAGE
11
+ end
12
+
13
+ require 'bio-cd-hit-report'
14
+ require 'optparse'
15
+
16
+ # Uncomment when using the bio-logger
17
+ # require 'bio-logger'
18
+ # Bio::Log::CLI.logger('stderr')
19
+ # Bio::Log::CLI.trace('info')
20
+
21
+ options = {:example_switch=>false,:show_help=>false}
22
+ opts = OptionParser.new do |o|
23
+ o.banner = "Usage: #{File.basename($0)} [options] reponame\ne.g. #{File.basename($0)} the-perfect-gem"
24
+
25
+ o.on('--example_parameter [EXAMPLE_PARAMETER]', 'TODO: put a description for the PARAMETER') do |example_parameter|
26
+ # TODO: your logic here, below an example
27
+ options[:example_parameter] = 'this is a parameter'
28
+ end
29
+
30
+ o.separator ""
31
+ o.on("--switch-example", 'TODO: put a description for the SWITCH') do
32
+ # TODO: your logic here, below an example
33
+ self[:example_switch] = true
34
+ end
35
+
36
+ # Uncomment the following when using the bio-logger
37
+ # o.separator ""
38
+ # o.on("--logger filename",String,"Log to file (default stderr)") do | name |
39
+ # Bio::Log::CLI.logger(name)
40
+ # end
41
+ #
42
+ # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
43
+ # Bio::Log::CLI.trace(s)
44
+ # end
45
+ #
46
+ # o.on("-q", "--quiet", "Run quietly") do |q|
47
+ # Bio::Log::CLI.trace('error')
48
+ # end
49
+ #
50
+ # o.on("-v", "--verbose", "Run verbosely") do |v|
51
+ # Bio::Log::CLI.trace('info')
52
+ # end
53
+ #
54
+ # o.on("--debug", "Show debug messages") do |v|
55
+ # Bio::Log::CLI.trace('debug')
56
+ # end
57
+
58
+ o.separator ""
59
+ o.on_tail('-h', '--help', 'display this help and exit') do
60
+ options[:show_help] = true
61
+ end
62
+ end
63
+
64
+ begin
65
+ opts.parse!(ARGV)
66
+
67
+ # Uncomment the following when using the bio-logger
68
+ # Bio::Log::CLI.configure('bio-cd-hit-report')
69
+
70
+ # TODO: your code here
71
+ # use options for your logic
72
+ rescue OptionParser::InvalidOption => e
73
+ options[:invalid_argument] = e.message
74
+ end
@@ -0,0 +1,14 @@
1
+ require_relative "bio-cd-hit-report/cd-hit-report"
2
+
3
+ #cluster_file = "#{ENV['HOME']}/DBL_tags/cluster/cd-hit/cluster95.clstr"
4
+
5
+ #report = Bio::CdHitReport.new(cluster_file)
6
+ #puts report.max_members #print the max number of sequences in a cluster for the entire dataset
7
+ #puts report.min_members #print the minimum number of sequences in a cluster for the entire dataset
8
+ #puts report.total_clusters #print total number of clusters in the report
9
+
10
+ #report.each_cluster do |c|
11
+ #puts "#{c.name} - #{c.members}" #list cluster name/id with respective number of sequences in the cluster
12
+ #end
13
+
14
+
@@ -0,0 +1,42 @@
1
+ module Bio
2
+
3
+ require_relative 'cluster.rb'
4
+
5
+ class CdHitReport
6
+
7
+ def initialize(file)
8
+ @file = file
9
+ end
10
+
11
+ def each_cluster(&block)
12
+ cluster_objs.each(&block)
13
+ end
14
+
15
+ def total_clusters
16
+ cluster_objs.size
17
+ end
18
+
19
+ def max_members
20
+ cluster_objs.map{|c|c.size}.max
21
+ end
22
+
23
+ def min_members
24
+ cluster_objs.map{|c| c.size}.min
25
+ end
26
+
27
+ private
28
+ def cluster_objs
29
+ raw_data.map do |line|
30
+ cluster = line.split("\n").delete_if{|x| x == ">Cluster "}
31
+ id = cluster.first
32
+ cluster.shift
33
+ Cluster.new(id,cluster)
34
+ end
35
+ end
36
+
37
+ def raw_data
38
+ File.open(@file).readlines
39
+ end
40
+
41
+ end #class
42
+ end #module
@@ -0,0 +1,21 @@
1
+ class Cluster < Struct.new(:name,:data)
2
+ $/ = ">Cluster "
3
+
4
+ def id
5
+ name
6
+ end
7
+
8
+ def size
9
+ entries.size
10
+ end
11
+
12
+ def members
13
+ entries.join(',')
14
+ end
15
+
16
+ private
17
+ def entries
18
+ data.map {|entry| entry.scan(/>(.+)\.{3}/)}.flatten
19
+ end
20
+ end
21
+
@@ -0,0 +1,19 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'minitest/unit'
11
+
12
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ require 'bio-cd-hit-report'
15
+
16
+ class MiniTest::Unit::TestCase
17
+ end
18
+
19
+ MiniTest::Unit.autorun
@@ -0,0 +1,4 @@
1
+ require 'helper'
2
+
3
+ class TestBioCdHitReport < MiniTest::Unit::TestCase
4
+ end
metadata ADDED
@@ -0,0 +1,160 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-cd-hit-report
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - George Githinji
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-14 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: minitest
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rdoc
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: bundler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: jeweler
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: bio
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - '='
84
+ - !ruby/object:Gem::Version
85
+ version: 1.4.2
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - '='
92
+ - !ruby/object:Gem::Version
93
+ version: 1.4.2
94
+ - !ruby/object:Gem::Dependency
95
+ name: rdoc
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ description: A bioruby wrapper for parsing and reading CD-HIT cluster reports
111
+ email: georgkam@gmail.com
112
+ executables:
113
+ - bio-cd-hit-report
114
+ extensions: []
115
+ extra_rdoc_files:
116
+ - LICENSE.txt
117
+ - README.md
118
+ files:
119
+ - .document
120
+ - .travis.yml
121
+ - Gemfile
122
+ - LICENSE.txt
123
+ - README.md
124
+ - Rakefile
125
+ - VERSION
126
+ - bin/bio-cd-hit-report
127
+ - lib/bio-cd-hit-report.rb
128
+ - lib/bio-cd-hit-report/cd-hit-report.rb
129
+ - lib/bio-cd-hit-report/cluster.rb
130
+ - test/helper.rb
131
+ - test/test_bio-cd-hit-report.rb
132
+ homepage: http://github.com/georgeG/bioruby-cd-hit-report
133
+ licenses:
134
+ - MIT
135
+ post_install_message:
136
+ rdoc_options: []
137
+ require_paths:
138
+ - lib
139
+ required_ruby_version: !ruby/object:Gem::Requirement
140
+ none: false
141
+ requirements:
142
+ - - ! '>='
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ segments:
146
+ - 0
147
+ hash: -2084256407332630455
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ none: false
150
+ requirements:
151
+ - - ! '>='
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ requirements: []
155
+ rubyforge_project:
156
+ rubygems_version: 1.8.24
157
+ signing_key:
158
+ specification_version: 3
159
+ summary: A bioruby wrapper for parsing and reading CD-HIT cluster reports
160
+ test_files: []