qu-seqcluster 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e998132776509d1b787ab5ba02ebf81c43179ce9
4
+ data.tar.gz: ec82c798b27f9eaeb12f8554be750adb46b1a8d7
5
+ SHA512:
6
+ metadata.gz: f8027e2bfa6f14d3d9c7664f2efac98de8dc19e8786552c47ce759c2cf4a7912b8cf5b77b86112ea3b44e7ae98e01fd66be86363ee768ed22adf2f6c88ad8ccf
7
+ data.tar.gz: 51ad5ab6631c161db2e08025c6228fed584dfb640cde2cc1a956dc23e3373689708f0476b4620e8664fb3f3062a551b4ec13a84728f0ae5692d3749054ea0693
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ test
19
+ .DS_store
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in qu-seqcluster.gemspec
4
+ gemspec
5
+ gem 'qu-utils'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Wubin Qu
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,45 @@
1
+ # Qu::Seqcluster
2
+
3
+ Cluster DNA/RNA based on k-mer algorithm
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'qu-seqcluster'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install qu-seqcluster
18
+
19
+ ## Usage
20
+
21
+ ```
22
+ Usage: seqcluster -i seq.fasta -k 9 -c 0.9
23
+
24
+ Required options:
25
+
26
+ -i, --in File Input sequence file in fasta format.
27
+
28
+ Optional options:
29
+
30
+ -k, --kvalue Integer K value, default is 9.
31
+ -o, --out File Output file name for storing the results, default is screen.
32
+ -c, --cutoff Float Cutoff value for the similarity for cluster, default is 0.9.
33
+
34
+
35
+ -h, --help Show this message and quit
36
+ -v, --version Show version
37
+ ```
38
+
39
+ ## Contributing
40
+
41
+ 1. Fork it
42
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
43
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
44
+ 4. Push to the branch (`git push origin my-new-feature`)
45
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/seqcluster ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'qu/seqcluster'
4
+
5
+ opts = Qu::Seqcluster::Options.new(ARGV).opts
6
+ Qu::Seqcluster::Runner.new(opts).run
@@ -0,0 +1,13 @@
1
+ require "qu/utils"
2
+
3
+ require_relative "seqcluster/program"
4
+ require_relative "seqcluster/version"
5
+ require_relative "seqcluster/options"
6
+ require_relative "seqcluster/cluster"
7
+ require_relative "seqcluster/runner"
8
+
9
+ module Qu
10
+ module Seqcluster
11
+ # Your code goes here...
12
+ end
13
+ end
@@ -0,0 +1,53 @@
1
+ require 'set'
2
+
3
+ module Qu
4
+ module Seqcluster
5
+ class Cluster
6
+ attr_reader :opts
7
+ def initialize(opts)
8
+ @opts = opts
9
+ end
10
+
11
+ def find_cluster
12
+ records = Bio::FlatFile.new(Bio::FastaFormat, File.open(@opts.in)).to_a
13
+ records.sort_by! {|r| -r.seq.size}
14
+
15
+ groups = []
16
+ while records.size > 0
17
+ groups << []
18
+ seed_record = records.shift
19
+ groups[-1] << seed_record
20
+ seed_kmer_set = seed_kmer(seed_record, @opts.kvalue, 1)
21
+ records.each_entry do |record|
22
+ plus = cal_similarity(record.naseq, @opts.kvalue, seed_kmer_set)
23
+ minus = cal_similarity(record.naseq.reverse_complement, @opts.kvalue, seed_kmer_set)
24
+
25
+ similarity, strand = plus > minus ? [plus, '+'] : [minus, '-']
26
+
27
+ if similarity >= @opts.cutoff
28
+ groups[-1] << [record, similarity, strand]
29
+ end
30
+ end
31
+ records.delete_if {|record| groups[-1].collect {|group_record, similarity, strand| group_record}.include?(record)}
32
+ end
33
+
34
+ return groups
35
+ end
36
+
37
+ private
38
+
39
+ def seed_kmer(record, window, step)
40
+ mer_set = Set.new
41
+ record.naseq.window_search(window, step) {|s| mer_set.add(s.to_sym)}
42
+ return mer_set
43
+ end
44
+
45
+ def cal_similarity(seq, k, seed_kmer_set)
46
+ sum = 0
47
+ remainder = seq.window_search(k, k) {|s| sum += 1 if seed_kmer_set.include?(s.to_sym)}
48
+ return (sum.to_f * k) / (seq.size - remainder.size)
49
+ end
50
+
51
+ end # Cluster
52
+ end
53
+ end
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'ostruct'
5
+
6
+ module Qu
7
+ module Seqcluster
8
+
9
+ module_function
10
+
11
+ def default_options
12
+ options = OpenStruct.new
13
+ options.in = nil
14
+ options.out = $stdout
15
+
16
+ options.kvalue = 9
17
+ options.cutoff = 0.9
18
+
19
+ return options
20
+ end
21
+
22
+ class Options
23
+
24
+ attr_reader :opts
25
+
26
+ def initialize(argv)
27
+ @opts = parse_opts(argv)
28
+ end
29
+
30
+
31
+ def parse_opts(argv)
32
+
33
+ options = Seqcluster::default_options
34
+
35
+ OptionParser.new do |opts|
36
+ opts.banner = "#{PROGRAM} [#{VERSION}]: Cluster DNA/RNA based on k-mer algorithm
37
+ Usage: #{PROGRAM} -i seq.fasta -k 9 -c 0.9"
38
+
39
+ opts.separator ""
40
+ opts.separator "Required options:"
41
+ opts.separator ""
42
+
43
+ opts.on('-i', '--in File', String, 'Input sequence file in fasta format.') do |value|
44
+ if File.directory?(value)
45
+ $stderr.puts "Error: #{value} is a direcotry."
46
+ exit
47
+ end
48
+
49
+ unless File.exists?(value)
50
+ $stderr.puts "Error: #{value} is not exists."
51
+ exit
52
+ end
53
+ options.in = value
54
+ end
55
+
56
+ opts.separator ""
57
+ opts.separator "Optional options:"
58
+ opts.separator ""
59
+
60
+ opts.on('-k', '--kvalue Integer', Integer, "K value, default is #{options.kvalue}.") do |value|
61
+ options.kvalue = value
62
+ end
63
+
64
+ opts.on("-o", "--out File", String, "Output file name for storing the results, default is screen.") do |value|
65
+ options.out = File.open(value, 'w')
66
+ end
67
+
68
+ opts.on('-c', '--cutoff Float', Float, "Cutoff value for the similarity for cluster, default is #{options.cutoff}.") do |value|
69
+ options.cutoff = value
70
+ end
71
+
72
+ opts.separator ""
73
+ opts.separator ""
74
+
75
+ opts.on("-h", "--help", "Show this message and quit") do
76
+ puts opts
77
+ exit
78
+ end
79
+
80
+ opts.on("-v", "--version", "Show version") do
81
+ puts "#{PROGRAM} #{VERSION}"
82
+ exit
83
+ end
84
+
85
+ opts.separator ""
86
+ opts.separator "Author: Wubin Qu <quwubin@gmail.com>"
87
+ opts.separator ""
88
+
89
+ begin
90
+ argv = ["-h"] if argv.empty?
91
+ opts.parse!(argv)
92
+ rescue OptionParser::ParseError => e
93
+ $stderr.puts e.message, "\n", opts
94
+ exit
95
+ end
96
+
97
+ if options.in.nil?
98
+ $stderr.puts "Error: option -i required."
99
+ exit
100
+ end
101
+ end
102
+ return options
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,5 @@
1
+ module Qu
2
+ module Seqcluster
3
+ PROGRAM = 'seqcluster'
4
+ end
5
+ end
@@ -0,0 +1,29 @@
1
+ require_relative 'options'
2
+ require_relative 'cluster'
3
+
4
+ module Qu
5
+ module Seqcluster
6
+ class Runner
7
+ attr_reader :opts
8
+ def initialize(opts)
9
+ @opts = opts
10
+ end
11
+
12
+ def run
13
+ groups = Cluster.new(@opts).find_cluster
14
+ output(groups)
15
+ end
16
+
17
+ def output(groups)
18
+ @opts.out.puts "Cluster count: #{groups.size}"
19
+ groups.each_with_index do |group, group_index|
20
+ @opts.out.puts "Cluster #{group_index + 1}: #{group[0].entry_id}"
21
+ group[1..-1].each do |record, similarity, strand|
22
+ @opts.out.puts "\tMember: #{record.entry_id} [#{strand}/#{similarity.round(2)}]"
23
+ end
24
+ end
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,5 @@
1
+ module Qu
2
+ module Seqcluster
3
+ VERSION = "1.0.0"
4
+ end
5
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'qu/seqcluster/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "qu-seqcluster"
8
+ spec.version = Qu::Seqcluster::VERSION
9
+ spec.authors = ["Wubin Qu"]
10
+ spec.email = ["quwubin@gmail.com"]
11
+ spec.description = %q{Cluster DNA/RNA based on k-mer algorithm}
12
+ spec.summary = %q{k-mer based cluster program}
13
+ spec.homepage = "https://github.com/quwubin/qu-seqcluster"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'qu-utils', '~> 1.0'
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: qu-seqcluster
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Wubin Qu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: qu-utils
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Cluster DNA/RNA based on k-mer algorithm
56
+ email:
57
+ - quwubin@gmail.com
58
+ executables:
59
+ - seqcluster
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - bin/seqcluster
69
+ - lib/qu/seqcluster.rb
70
+ - lib/qu/seqcluster/cluster.rb
71
+ - lib/qu/seqcluster/options.rb
72
+ - lib/qu/seqcluster/program.rb
73
+ - lib/qu/seqcluster/runner.rb
74
+ - lib/qu/seqcluster/version.rb
75
+ - qu-seqcluster.gemspec
76
+ homepage: https://github.com/quwubin/qu-seqcluster
77
+ licenses:
78
+ - MIT
79
+ metadata: {}
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.2.0
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: k-mer based cluster program
100
+ test_files: []