bio-cd-hit-report 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1,12 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode # JRuby in 1.9 mode
6
+ - rbx-19mode
7
+ # - 1.8.7
8
+ # - jruby-18mode # JRuby in 1.8 mode
9
+ # - rbx-18mode
10
+
11
+ # uncomment this line if your project needs to run something other than `rake`:
12
+ # script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "minitest"
10
+ gem "rdoc"
11
+ gem "bundler"
12
+ gem "jeweler"
13
+ gem "bio", "1.4.2"
14
+ gem "rdoc"
15
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 georgeG
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,58 @@
1
+ # bio-cd-hit-report
2
+
3
+ [![Build Status](https://secure.travis-ci.org/georgeG/bioruby-cd-hit-report.png)](http://travis-ci.org/georgeG/bioruby-cd-hit-report)
4
+
5
+ A bioruby wrapper for parsing and reading CD-HIT cluster reports
6
+
7
+ Note: this software is under active development!
8
+
9
+ ## Installation
10
+
11
+ ```sh
12
+ gem install bio-cd-hit-report
13
+ ```
14
+
15
+ ## Usage
16
+
17
+ ```ruby
18
+ require 'bio-cd-hit-report'
19
+
20
+ cluster_file = "cluster95.clstr"
21
+ report = Bio::CdHitReport.new(cluster_file)
22
+
23
+ puts report.max_members #print the max number of sequences in a cluster for the entire dataset
24
+ puts report.min_members #print the minimum number of sequences in a cluster for the entire dataset
25
+ puts report.total_clusters #print total number of clusters in the report
26
+
27
+ report.each_cluster do |c|
28
+ puts "#{c.name} - #{c.members}" #print cluster name/id with respective sequences in the cluster
29
+ puts c.size #print the total number of entries in the cluster
30
+ end
31
+ ```
32
+
33
+ The API doc is online. For more code examples see the test files in
34
+ the source tree.
35
+
36
+ ## Project home page
37
+
38
+ Information on the source tree, documentation, examples, issues and
39
+ how to contribute, see
40
+
41
+ http://github.com/georgeG/bioruby-cd-hit-report
42
+
43
+ The BioRuby community is on IRC server: irc.freenode.org, channel: #bioruby.
44
+
45
+ ## Cite
46
+
47
+ If you use this software, please cite one of
48
+
49
+ * [BioRuby: bioinformatics software for the Ruby programming language](http://dx.doi.org/10.1093/bioinformatics/btq475)
50
+ * [Biogem: an effective tool-based approach for scaling up open source software development in bioinformatics](http://dx.doi.org/10.1093/bioinformatics/bts080)
51
+
52
+ ## Biogems.info
53
+
54
+ This Biogem is published at [#bio-cd-hit-report](http://biogems.info/index.html)
55
+
56
+ ## Copyright
57
+
58
+ Copyright (c) 2012 George Githinji. See LICENSE.txt for further details.
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "bio-cd-hit-report"
18
+ gem.homepage = "http://github.com/georgeG/bioruby-cd-hit-report"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{A bioruby wrapper for parsing and reading CD-HIT cluster reports}
21
+ gem.description = %Q{A bioruby wrapper for parsing and reading CD-HIT cluster reports}
22
+ gem.email = "georgkam@gmail.com"
23
+ gem.authors = ["George Githinji"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ task :default => :test
36
+
37
+ require 'rdoc/task'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "bio-cd-hit-report #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # BioRuby bio-cd-hit-report Plugin BioCdHitReport
4
+ # Author:: georgeG
5
+ # Copyright:: 2012
6
+
7
+ USAGE = "Describe bio-cd-hit-report"
8
+
9
+ if ARGV.size == 0
10
+ print USAGE
11
+ end
12
+
13
+ require 'bio-cd-hit-report'
14
+ require 'optparse'
15
+
16
+ # Uncomment when using the bio-logger
17
+ # require 'bio-logger'
18
+ # Bio::Log::CLI.logger('stderr')
19
+ # Bio::Log::CLI.trace('info')
20
+
21
+ options = {:example_switch=>false,:show_help=>false}
22
+ opts = OptionParser.new do |o|
23
+ o.banner = "Usage: #{File.basename($0)} [options] reponame\ne.g. #{File.basename($0)} the-perfect-gem"
24
+
25
+ o.on('--example_parameter [EXAMPLE_PARAMETER]', 'TODO: put a description for the PARAMETER') do |example_parameter|
26
+ # TODO: your logic here, below an example
27
+ options[:example_parameter] = 'this is a parameter'
28
+ end
29
+
30
+ o.separator ""
31
+ o.on("--switch-example", 'TODO: put a description for the SWITCH') do
32
+ # TODO: your logic here, below an example
33
+ self[:example_switch] = true
34
+ end
35
+
36
+ # Uncomment the following when using the bio-logger
37
+ # o.separator ""
38
+ # o.on("--logger filename",String,"Log to file (default stderr)") do | name |
39
+ # Bio::Log::CLI.logger(name)
40
+ # end
41
+ #
42
+ # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
43
+ # Bio::Log::CLI.trace(s)
44
+ # end
45
+ #
46
+ # o.on("-q", "--quiet", "Run quietly") do |q|
47
+ # Bio::Log::CLI.trace('error')
48
+ # end
49
+ #
50
+ # o.on("-v", "--verbose", "Run verbosely") do |v|
51
+ # Bio::Log::CLI.trace('info')
52
+ # end
53
+ #
54
+ # o.on("--debug", "Show debug messages") do |v|
55
+ # Bio::Log::CLI.trace('debug')
56
+ # end
57
+
58
+ o.separator ""
59
+ o.on_tail('-h', '--help', 'display this help and exit') do
60
+ options[:show_help] = true
61
+ end
62
+ end
63
+
64
+ begin
65
+ opts.parse!(ARGV)
66
+
67
+ # Uncomment the following when using the bio-logger
68
+ # Bio::Log::CLI.configure('bio-cd-hit-report')
69
+
70
+ # TODO: your code here
71
+ # use options for your logic
72
+ rescue OptionParser::InvalidOption => e
73
+ options[:invalid_argument] = e.message
74
+ end
@@ -0,0 +1,14 @@
1
+ require_relative "bio-cd-hit-report/cd-hit-report"
2
+
3
+ #cluster_file = "#{ENV['HOME']}/DBL_tags/cluster/cd-hit/cluster95.clstr"
4
+
5
+ #report = Bio::CdHitReport.new(cluster_file)
6
+ #puts report.max_members #print the max number of sequences in a cluster for the entire dataset
7
+ #puts report.min_members #print the minimum number of sequences in a cluster for the entire dataset
8
+ #puts report.total_clusters #print total number of clusters in the report
9
+
10
+ #report.each_cluster do |c|
11
+ #puts "#{c.name} - #{c.members}" #list cluster name/id with respective number of sequences in the cluster
12
+ #end
13
+
14
+
@@ -0,0 +1,42 @@
1
+ module Bio
2
+
3
+ require_relative 'cluster.rb'
4
+
5
+ class CdHitReport
6
+
7
+ def initialize(file)
8
+ @file = file
9
+ end
10
+
11
+ def each_cluster(&block)
12
+ cluster_objs.each(&block)
13
+ end
14
+
15
+ def total_clusters
16
+ cluster_objs.size
17
+ end
18
+
19
+ def max_members
20
+ cluster_objs.map{|c|c.size}.max
21
+ end
22
+
23
+ def min_members
24
+ cluster_objs.map{|c| c.size}.min
25
+ end
26
+
27
+ private
28
+ def cluster_objs
29
+ raw_data.map do |line|
30
+ cluster = line.split("\n").delete_if{|x| x == ">Cluster "}
31
+ id = cluster.first
32
+ cluster.shift
33
+ Cluster.new(id,cluster)
34
+ end
35
+ end
36
+
37
+ def raw_data
38
+ File.open(@file).readlines
39
+ end
40
+
41
+ end #class
42
+ end #module
@@ -0,0 +1,21 @@
1
+ class Cluster < Struct.new(:name,:data)
2
+ $/ = ">Cluster "
3
+
4
+ def id
5
+ name
6
+ end
7
+
8
+ def size
9
+ entries.size
10
+ end
11
+
12
+ def members
13
+ entries.join(',')
14
+ end
15
+
16
+ private
17
+ def entries
18
+ data.map {|entry| entry.scan(/>(.+)\.{3}/)}.flatten
19
+ end
20
+ end
21
+
@@ -0,0 +1,19 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'minitest/unit'
11
+
12
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ require 'bio-cd-hit-report'
15
+
16
+ class MiniTest::Unit::TestCase
17
+ end
18
+
19
+ MiniTest::Unit.autorun
@@ -0,0 +1,4 @@
1
+ require 'helper'
2
+
3
+ class TestBioCdHitReport < MiniTest::Unit::TestCase
4
+ end
metadata ADDED
@@ -0,0 +1,160 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-cd-hit-report
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - George Githinji
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-14 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: minitest
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rdoc
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: bundler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: jeweler
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: bio
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - '='
84
+ - !ruby/object:Gem::Version
85
+ version: 1.4.2
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - '='
92
+ - !ruby/object:Gem::Version
93
+ version: 1.4.2
94
+ - !ruby/object:Gem::Dependency
95
+ name: rdoc
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ description: A bioruby wrapper for parsing and reading CD-HIT cluster reports
111
+ email: georgkam@gmail.com
112
+ executables:
113
+ - bio-cd-hit-report
114
+ extensions: []
115
+ extra_rdoc_files:
116
+ - LICENSE.txt
117
+ - README.md
118
+ files:
119
+ - .document
120
+ - .travis.yml
121
+ - Gemfile
122
+ - LICENSE.txt
123
+ - README.md
124
+ - Rakefile
125
+ - VERSION
126
+ - bin/bio-cd-hit-report
127
+ - lib/bio-cd-hit-report.rb
128
+ - lib/bio-cd-hit-report/cd-hit-report.rb
129
+ - lib/bio-cd-hit-report/cluster.rb
130
+ - test/helper.rb
131
+ - test/test_bio-cd-hit-report.rb
132
+ homepage: http://github.com/georgeG/bioruby-cd-hit-report
133
+ licenses:
134
+ - MIT
135
+ post_install_message:
136
+ rdoc_options: []
137
+ require_paths:
138
+ - lib
139
+ required_ruby_version: !ruby/object:Gem::Requirement
140
+ none: false
141
+ requirements:
142
+ - - ! '>='
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ segments:
146
+ - 0
147
+ hash: -2084256407332630455
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ none: false
150
+ requirements:
151
+ - - ! '>='
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ requirements: []
155
+ rubyforge_project:
156
+ rubygems_version: 1.8.24
157
+ signing_key:
158
+ specification_version: 3
159
+ summary: A bioruby wrapper for parsing and reading CD-HIT cluster reports
160
+ test_files: []