qu-seqcluster 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e998132776509d1b787ab5ba02ebf81c43179ce9
4
+ data.tar.gz: ec82c798b27f9eaeb12f8554be750adb46b1a8d7
5
+ SHA512:
6
+ metadata.gz: f8027e2bfa6f14d3d9c7664f2efac98de8dc19e8786552c47ce759c2cf4a7912b8cf5b77b86112ea3b44e7ae98e01fd66be86363ee768ed22adf2f6c88ad8ccf
7
+ data.tar.gz: 51ad5ab6631c161db2e08025c6228fed584dfb640cde2cc1a956dc23e3373689708f0476b4620e8664fb3f3062a551b4ec13a84728f0ae5692d3749054ea0693
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ test
19
+ .DS_store
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in qu-seqcluster.gemspec
4
+ gemspec
5
+ gem 'qu-utils'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Wubin Qu
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,45 @@
1
+ # Qu::Seqcluster
2
+
3
+ Cluster DNA/RNA based on k-mer algorithm
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'qu-seqcluster'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install qu-seqcluster
18
+
19
+ ## Usage
20
+
21
+ ```
22
+ Usage: seqcluster -i seq.fasta -k 9 -c 0.9
23
+
24
+ Required options:
25
+
26
+ -i, --in File Input sequence file in fasta format.
27
+
28
+ Optional options:
29
+
30
+ -k, --kvalue Integer K value, default is 9.
31
+ -o, --out File Output file name for storing the results, default is screen.
32
+ -c, --cutoff Float Cutoff value for the similarity for cluster, default is 0.9.
33
+
34
+
35
+ -h, --help Show this message and quit
36
+ -v, --version Show version
37
+ ```
38
+
39
+ ## Contributing
40
+
41
+ 1. Fork it
42
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
43
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
44
+ 4. Push to the branch (`git push origin my-new-feature`)
45
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/seqcluster ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'qu/seqcluster'
4
+
5
+ opts = Qu::Seqcluster::Options.new(ARGV).opts
6
+ Qu::Seqcluster::Runner.new(opts).run
@@ -0,0 +1,13 @@
1
+ require "qu/utils"
2
+
3
+ require_relative "seqcluster/program"
4
+ require_relative "seqcluster/version"
5
+ require_relative "seqcluster/options"
6
+ require_relative "seqcluster/cluster"
7
+ require_relative "seqcluster/runner"
8
+
9
+ module Qu
10
+ module Seqcluster
11
+ # Your code goes here...
12
+ end
13
+ end
@@ -0,0 +1,53 @@
1
+ require 'set'
2
+
3
+ module Qu
4
+ module Seqcluster
5
+ class Cluster
6
+ attr_reader :opts
7
+ def initialize(opts)
8
+ @opts = opts
9
+ end
10
+
11
+ def find_cluster
12
+ records = Bio::FlatFile.new(Bio::FastaFormat, File.open(@opts.in)).to_a
13
+ records.sort_by! {|r| -r.seq.size}
14
+
15
+ groups = []
16
+ while records.size > 0
17
+ groups << []
18
+ seed_record = records.shift
19
+ groups[-1] << seed_record
20
+ seed_kmer_set = seed_kmer(seed_record, @opts.kvalue, 1)
21
+ records.each_entry do |record|
22
+ plus = cal_similarity(record.naseq, @opts.kvalue, seed_kmer_set)
23
+ minus = cal_similarity(record.naseq.reverse_complement, @opts.kvalue, seed_kmer_set)
24
+
25
+ similarity, strand = plus > minus ? [plus, '+'] : [minus, '-']
26
+
27
+ if similarity >= @opts.cutoff
28
+ groups[-1] << [record, similarity, strand]
29
+ end
30
+ end
31
+ records.delete_if {|record| groups[-1].collect {|group_record, similarity, strand| group_record}.include?(record)}
32
+ end
33
+
34
+ return groups
35
+ end
36
+
37
+ private
38
+
39
+ def seed_kmer(record, window, step)
40
+ mer_set = Set.new
41
+ record.naseq.window_search(window, step) {|s| mer_set.add(s.to_sym)}
42
+ return mer_set
43
+ end
44
+
45
+ def cal_similarity(seq, k, seed_kmer_set)
46
+ sum = 0
47
+ remainder = seq.window_search(k, k) {|s| sum += 1 if seed_kmer_set.include?(s.to_sym)}
48
+ return (sum.to_f * k) / (seq.size - remainder.size)
49
+ end
50
+
51
+ end # Cluster
52
+ end
53
+ end
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'ostruct'
5
+
6
+ module Qu
7
+ module Seqcluster
8
+
9
+ module_function
10
+
11
+ def default_options
12
+ options = OpenStruct.new
13
+ options.in = nil
14
+ options.out = $stdout
15
+
16
+ options.kvalue = 9
17
+ options.cutoff = 0.9
18
+
19
+ return options
20
+ end
21
+
22
+ class Options
23
+
24
+ attr_reader :opts
25
+
26
+ def initialize(argv)
27
+ @opts = parse_opts(argv)
28
+ end
29
+
30
+
31
+ def parse_opts(argv)
32
+
33
+ options = Seqcluster::default_options
34
+
35
+ OptionParser.new do |opts|
36
+ opts.banner = "#{PROGRAM} [#{VERSION}]: Cluster DNA/RNA based on k-mer algorithm
37
+ Usage: #{PROGRAM} -i seq.fasta -k 9 -c 0.9"
38
+
39
+ opts.separator ""
40
+ opts.separator "Required options:"
41
+ opts.separator ""
42
+
43
+ opts.on('-i', '--in File', String, 'Input sequence file in fasta format.') do |value|
44
+ if File.directory?(value)
45
+ $stderr.puts "Error: #{value} is a direcotry."
46
+ exit
47
+ end
48
+
49
+ unless File.exists?(value)
50
+ $stderr.puts "Error: #{value} is not exists."
51
+ exit
52
+ end
53
+ options.in = value
54
+ end
55
+
56
+ opts.separator ""
57
+ opts.separator "Optional options:"
58
+ opts.separator ""
59
+
60
+ opts.on('-k', '--kvalue Integer', Integer, "K value, default is #{options.kvalue}.") do |value|
61
+ options.kvalue = value
62
+ end
63
+
64
+ opts.on("-o", "--out File", String, "Output file name for storing the results, default is screen.") do |value|
65
+ options.out = File.open(value, 'w')
66
+ end
67
+
68
+ opts.on('-c', '--cutoff Float', Float, "Cutoff value for the similarity for cluster, default is #{options.cutoff}.") do |value|
69
+ options.cutoff = value
70
+ end
71
+
72
+ opts.separator ""
73
+ opts.separator ""
74
+
75
+ opts.on("-h", "--help", "Show this message and quit") do
76
+ puts opts
77
+ exit
78
+ end
79
+
80
+ opts.on("-v", "--version", "Show version") do
81
+ puts "#{PROGRAM} #{VERSION}"
82
+ exit
83
+ end
84
+
85
+ opts.separator ""
86
+ opts.separator "Author: Wubin Qu <quwubin@gmail.com>"
87
+ opts.separator ""
88
+
89
+ begin
90
+ argv = ["-h"] if argv.empty?
91
+ opts.parse!(argv)
92
+ rescue OptionParser::ParseError => e
93
+ $stderr.puts e.message, "\n", opts
94
+ exit
95
+ end
96
+
97
+ if options.in.nil?
98
+ $stderr.puts "Error: option -i required."
99
+ exit
100
+ end
101
+ end
102
+ return options
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,5 @@
1
+ module Qu
2
+ module Seqcluster
3
+ PROGRAM = 'seqcluster'
4
+ end
5
+ end
@@ -0,0 +1,29 @@
1
+ require_relative 'options'
2
+ require_relative 'cluster'
3
+
4
+ module Qu
5
+ module Seqcluster
6
+ class Runner
7
+ attr_reader :opts
8
+ def initialize(opts)
9
+ @opts = opts
10
+ end
11
+
12
+ def run
13
+ groups = Cluster.new(@opts).find_cluster
14
+ output(groups)
15
+ end
16
+
17
+ def output(groups)
18
+ @opts.out.puts "Cluster count: #{groups.size}"
19
+ groups.each_with_index do |group, group_index|
20
+ @opts.out.puts "Cluster #{group_index + 1}: #{group[0].entry_id}"
21
+ group[1..-1].each do |record, similarity, strand|
22
+ @opts.out.puts "\tMember: #{record.entry_id} [#{strand}/#{similarity.round(2)}]"
23
+ end
24
+ end
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,5 @@
1
+ module Qu
2
+ module Seqcluster
3
+ VERSION = "1.0.0"
4
+ end
5
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'qu/seqcluster/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "qu-seqcluster"
8
+ spec.version = Qu::Seqcluster::VERSION
9
+ spec.authors = ["Wubin Qu"]
10
+ spec.email = ["quwubin@gmail.com"]
11
+ spec.description = %q{Cluster DNA/RNA based on k-mer algorithm}
12
+ spec.summary = %q{k-mer based cluster program}
13
+ spec.homepage = "https://github.com/quwubin/qu-seqcluster"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'qu-utils', '~> 1.0'
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: qu-seqcluster
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Wubin Qu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: qu-utils
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Cluster DNA/RNA based on k-mer algorithm
56
+ email:
57
+ - quwubin@gmail.com
58
+ executables:
59
+ - seqcluster
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - bin/seqcluster
69
+ - lib/qu/seqcluster.rb
70
+ - lib/qu/seqcluster/cluster.rb
71
+ - lib/qu/seqcluster/options.rb
72
+ - lib/qu/seqcluster/program.rb
73
+ - lib/qu/seqcluster/runner.rb
74
+ - lib/qu/seqcluster/version.rb
75
+ - qu-seqcluster.gemspec
76
+ homepage: https://github.com/quwubin/qu-seqcluster
77
+ licenses:
78
+ - MIT
79
+ metadata: {}
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.2.0
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: k-mer based cluster program
100
+ test_files: []