qu-mfeindex 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 078d0efd0389e0fb00ddb3434b227f57a329960e
4
+ data.tar.gz: 3e899029902dd46daec7ec7ba7341c7ea04f19a0
5
+ SHA512:
6
+ metadata.gz: 0196c1634bdfc2455fce27addf74687745bdb9106894d1ed84780800cabf64fc89832cf9c28f97f24e4c6be88a027a4605263bb7545c2737772bd0fbbe051ba5
7
+ data.tar.gz: 84950a7f7cc5003a464d8dcc53a3a712a56748960af70abd70b35956a00a9dd84430698002b80aac5804f9d837e40dddbf3a54d8a4bc0c5d949606c6560130bf
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ test
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in qu-mfeindex.gemspec
4
+ gemspec
5
+ gem 'qu/utils'
6
+ gem 'qu/cmdwrapper'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Wubin Qu
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Qu::Mfeindex
2
+
3
+ DNA sequence indexer originally developed for MFEprimer-2.0
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'qu-mfeindex'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install qu-mfeindex
18
+
19
+ ## Usage
20
+
21
+ `mfeindex fasta_file [kvalue]`
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/mfeindex ADDED
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'qu/mfeindex'
4
+
5
+ def split_db(big_db)
6
+ big_db_dir = Qu::Mfeindex::get_big_db_dir(big_db)
7
+ Dir.mkdir(big_db_dir) unless File.directory?(big_db_dir)
8
+
9
+ sum_size = 0
10
+ small_file_list = []
11
+ Dir.foreach(big_db_dir).each do |small_file|
12
+ prefix = File.basename(big_db)
13
+ next small_file unless small_file =~ /^#{prefix}\.\d$/
14
+ small_file_list << File.join(big_db_dir, small_file)
15
+ sum_size += File.size(File.join(big_db_dir, small_file))
16
+ end
17
+
18
+ if sum_size >= File.size(big_db)
19
+ $stdout.print "#{big_db} has been splited, do you want to resplit it? [y/N]:"
20
+ choice = $stdin.gets.chomp.downcase
21
+ choice = 'n' if choice.empty?
22
+ while !['y', 'n'].include?(choice)
23
+ $stdout.print "The choice should be 'y' or 'n':"
24
+ choice = $stdin.gets.chomp.downcase
25
+ end
26
+
27
+ if choice == 'n'
28
+ return small_file_list
29
+ end
30
+ end
31
+
32
+ small_file_list = []
33
+
34
+ small_file_index = 0
35
+ small_file_name = File.join(big_db_dir, File.basename(big_db) + '.' + small_file_index.to_s)
36
+ fh = File.open(small_file_name, 'w')
37
+ small_file_list << small_file_name
38
+ File.open(big_db).each do |line|
39
+ if line.start_with?('>')
40
+ if fh.size >= Qu::Mfeindex::BIG_DB_SPLIT_CUTOFF
41
+ fh.close
42
+ small_file_index += 1
43
+ small_file_name = File.join(big_db_dir, File.basename(big_db) + '.' + small_file_index.to_s)
44
+ fh = File.open(small_file_name, 'w')
45
+ small_file_list << small_file_name
46
+ end
47
+ end
48
+ fh.write(line)
49
+ end
50
+ fh.close
51
+
52
+ return small_file_list
53
+ end
54
+
55
+
56
+ def index_db(fasta_file, kvalue)
57
+ if Qu::Mfeindex::db_indexed?(fasta_file)
58
+ $stdout.print "#{fasta_file} has already been indexed, do you want to reindex? [y/N]:"
59
+ choice = $stdin.gets.chomp.downcase
60
+ choice = 'n' if choice.empty?
61
+ while !['y', 'n'].include?(choice)
62
+ $stdout.print "The choice should be 'y' or 'n':"
63
+ choice = $stdin.gets.chomp.downcase
64
+ end
65
+ else
66
+ choice = 'y'
67
+ end
68
+
69
+ if choice == 'y'
70
+ Qu::Mfeindex::MFEprimerIndex(fasta_file, kvalue, reindex=true)
71
+ else
72
+ Qu::Mfeindex::MFEprimerIndex(fasta_file, kvalue, reindex=false)
73
+ end
74
+ end
75
+
76
+
77
+ usage = "Index database for MFEprimer-2.0
78
+
79
+ Usage:
80
+
81
+ #{File.basename($0)} fasta_file [kvalue]
82
+
83
+
84
+ Options:
85
+
86
+ kvalue: Default is 9 [Integer].
87
+
88
+ Contact: Wubin Qu <quwubin@gmail.com>
89
+ "
90
+
91
+ case ARGV.size
92
+ when 2
93
+ fasta_file = ARGV[0]
94
+ kvalue = ARGV[1].to_i
95
+ when 1
96
+ fasta_file = ARGV[0]
97
+ kvalue = 9
98
+ else
99
+ $stderr.puts usage
100
+ exit
101
+ end
102
+
103
+ unless File.exists?(fasta_file)
104
+ $stdout.puts "Error: #{fasta_file} is not exitst.\n"
105
+ $stderr.puts usage
106
+ exit
107
+ end
108
+
109
+ if File.size(fasta_file) > Qu::Mfeindex::BIG_DB_SPLIT_CUTOFF
110
+ $stdout.print "#{fasta_file} is too large, do you want to split it first? [Y/n]:"
111
+ choice = $stdin.gets.chomp.downcase
112
+ choice = 'y' if choice.empty?
113
+ while !['y', 'n'].include?(choice)
114
+ $stdout.print "The choice should be 'y' or 'n':"
115
+ choice = $stdin.gets.chomp.downcase
116
+ end
117
+ else
118
+ choice = 'n'
119
+ end
120
+
121
+ if choice == 'y'
122
+ small_file_list = split_db(fasta_file)
123
+ small_file_list.each do |small_file|
124
+ index_db(small_file, kvalue)
125
+ end
126
+ else
127
+ index_db(fasta_file, kvalue)
128
+ end
@@ -0,0 +1,166 @@
1
+ require 'qu/utils'
2
+ require 'qu/cmdwrapper'
3
+
4
+ require_relative "mfeindex/data"
5
+ require_relative "mfeindex/version"
6
+ require 'json'
7
+
8
+ module Qu
9
+ module Mfeindex
10
+ # Your code goes here...
11
+
12
+ module_function
13
+
14
+ def get_big_db_dir(db)
15
+ db + BIG_DB
16
+ end
17
+
18
+ def check_db(db_list)
19
+ new_db_list = []
20
+
21
+ db_list.each do |db|
22
+ big_db_dir = get_big_db_dir(db)
23
+ if db_indexed?(db)
24
+ new_db_list << db
25
+ next
26
+ elsif File.directory?(big_db_dir)
27
+ Dir.foreach(big_db_dir).each do |small_file|
28
+ prefix = File.basename(db)
29
+ next small_file unless small_file =~ /^#{prefix}\.\d$/
30
+ new_db_list << File.join(big_db_dir, small_file)
31
+ end
32
+ elsif !File.exists?(db)
33
+ $stderr.puts "Error: #{db} is not exists."
34
+ exit
35
+ elsif File.size(db) > BIG_DB_SPLIT_CUTOFF
36
+ $stderr.puts "Warning: #{db} is too large, please use mfepindex to index the db first."
37
+ exit
38
+ else
39
+ new_db_list << db
40
+ next
41
+ end
42
+ end
43
+ new_db_list
44
+ end
45
+
46
+ def db_indexed?(db)
47
+ File.exist?(db + DB_SQLITE3) and File.exist?(db + DB_JSON) and File.exist?(db + DB_2BIT)
48
+ end
49
+
50
+ def MFEprimerIndex(fasta_file, k = 9, reindex = false)
51
+ return if !reindex and db_indexed?(fasta_file)
52
+
53
+ unless File.exists?(fasta_file)
54
+ $stderr.puts "Error: #{fasta_file} is not exists."
55
+ exit
56
+ end
57
+ info_json = {}
58
+
59
+ uni_fasta = fasta_file + '.unifasta'
60
+
61
+ File.open(uni_fasta, 'w') do |fh|
62
+ Bio::FlatFile.new(Bio::FastaFormat, File.open(fasta_file)).each_with_index do |record, index|
63
+ info_json[index] = {'id' => record.entry_name, 'desc' => record.desc, 'size' => record.naseq.size}
64
+ fh.write ">#{index}\n#{record.naseq}\n"
65
+ end
66
+ end
67
+
68
+ File.open(fasta_file + DB_JSON, 'w') do |fh|
69
+ fh.write(JSON.dump(info_json))
70
+ end
71
+
72
+ Qu::Cmdwrapper::faToTwoBit(uni_fasta, fasta_file + DB_2BIT)
73
+
74
+ cmd = File.join(__dir__, 'pymfeindex')
75
+ $stderr.puts "Begin index database: #{fasta_file}"
76
+ `#{cmd} -f #{uni_fasta} -k #{k} -o #{fasta_file + DB_SQLITE3}`
77
+ begin
78
+ File.delete(uni_fasta)
79
+ rescue
80
+ if File.exists?(uni_fasta)
81
+ $stderr.puts "You can delete the file #{uni_fasta} by hand."
82
+ end
83
+ end
84
+ $stderr.puts "Done index database: #{fasta_file}"
85
+ end
86
+
87
+ def int2dna(int, k=9, base_number=4)
88
+ seqint = int.to_s(base_number)
89
+ dna = ""
90
+ (0...seqint.length).each do |index|
91
+ dna += D2I[seqint[index].to_i]
92
+ end
93
+ return 'A' * (k - seqint.length) + dna
94
+ end
95
+
96
+ def dna2int(dna, base_number=4)
97
+ plus_int = 0
98
+ dna = dna.upcase
99
+ # This is plus strand position
100
+ dna.each_char.with_index do |base, index|
101
+ plus_int += D2I[base] * base_number ** (dna.length - 1 - index)
102
+ end
103
+ return plus_int
104
+ end
105
+
106
+ def split_pos(data)
107
+ # Split position data from SQLite3 database which generated by mfeindex
108
+ pos_hash = {}
109
+ data.split(';').each do |hit_record|
110
+ hit_id, hit_pos = hit_record.split(':')
111
+ pos_hash[hit_id.to_i] = hit_pos.split(',').collect {|pos| pos.to_i}
112
+ end
113
+
114
+ return pos_hash
115
+ end
116
+
117
+ def detect_kvalue(db_file)
118
+ db = SQLite3::Database.new(db_file)
119
+
120
+ begin
121
+ mer_num = db.execute("select count(*) from pos")[0][0]
122
+ kvalue = Math.log(mer_num, 4).to_i
123
+ rescue Exception => e
124
+ kvalue = 9
125
+ end
126
+
127
+ return kvalue
128
+ end
129
+
130
+ def get_kvalue(db_list)
131
+ kmer_list = []
132
+ db_list.each do |db|
133
+ if db_indexed?(db)
134
+ kmer_list << detect_kvalue(db + DB_SQLITE3)
135
+ end
136
+ end
137
+ kmer_list.uniq!
138
+
139
+ kvalue = 9
140
+
141
+ if kmer_list.size > 1
142
+ $stderr.puts "Different index kmer value among #{@opts.db}."
143
+ exit
144
+ elsif kmer_list.size == 1
145
+ kvalue = kmer_list[0]
146
+ else
147
+ kvalue = 9
148
+ end
149
+ kvalue
150
+ end
151
+
152
+
153
+ def query_sqlite3(db_file, mer_id_list)
154
+ pos = {}
155
+ db = SQLite3::Database.new(db_file)
156
+ db.execute("select mer_id, plus, minus from pos where mer_id in (#{mer_id_list.join(', ')})") do |row|
157
+ mer_id, plus, minus = row
158
+ pos[mer_id] ||= {}
159
+ pos[mer_id][:plus] = split_pos(plus) unless plus.empty?
160
+ pos[mer_id][:minus] = split_pos(minus) unless minus.empty?
161
+ end
162
+
163
+ return pos
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,24 @@
1
+ module Qu
2
+ module Mfeindex
3
+ DB_JSON = '.uni'
4
+ DB_SQLITE3 = '.sqlite3.db'
5
+ DB_2BIT = '.2bit'
6
+ BIG_DB = '_BIG_MFE_DB'
7
+ BIG_DB_SPLIT_CUTOFF = 1024**3
8
+
9
+ D2I = {
10
+ 'A' => 0,
11
+ 'T' => 3,
12
+ 'C' => 2,
13
+ 'G' => 1,
14
+ '-' => 4, # For bubble, added by Zheyan Liu
15
+ 0 => 'A',
16
+ 1 => 'G',
17
+ 2 => 'C',
18
+ 3 => 'T',
19
+ 4 => '-', # For bubble, added by Zheyan Liu
20
+ }
21
+
22
+ ANTISENSE_CHARS = %w{A G C T -}
23
+ end
24
+ end
@@ -0,0 +1,5 @@
1
+ module Qu
2
+ module Mfeindex
3
+ VERSION = "1.0.0"
4
+ end
5
+ end
data/lib/qu/pymfeindex ADDED
@@ -0,0 +1,283 @@
1
+ #!/usr/bin/env python
2
+ from __future__ import division
3
+
4
+ import os
5
+ import sys
6
+ import datetime
7
+ from time import time
8
+ from optparse import OptionParser
9
+ import sqlite3
10
+
11
+ import platform
12
+ import subprocess
13
+ import re
14
+
15
+
16
+ D2n_dic = dict(A=0, T=3, C=2, G=1, a=0, t=3, c=2, g=1)
17
+ n2D_dic = {0:'A', 3:'T', 2:'C', 1:'G', 0:'a', 3:'t', 2:'c', 1:'g'}
18
+
19
+ def print_usage():
20
+ print '''
21
+ %s: Index DB for MFEprimer-2.0
22
+
23
+ Usage:
24
+
25
+ %s -f human.genomic -k 9 -o index_db_name
26
+
27
+ Author: Wubin Qu <quwubin@gmail.com>
28
+ Last updated: 2012-9-28
29
+ ''' % (os.path.basename(sys.argv[0]), os.path.basename(sys.argv[0]))
30
+
31
+ def optget():
32
+ '''parse options'''
33
+ parser = OptionParser()
34
+ parser.add_option("-f", "--file", dest = "filename", help = "DNA file in fasta to be indexed")
35
+ parser.add_option("-k", "--k", dest = "k", type='int', help = "K mer , default is 9", default = 9)
36
+ parser.add_option("-o", "--out", dest = "out", help = "Index db file name")
37
+
38
+ (options, args) = parser.parse_args()
39
+
40
+ if not options.filename:
41
+ print_usage()
42
+ exit()
43
+
44
+ if not options.out:
45
+ options.out = options.filename + '.sqlite3.db'
46
+
47
+ return options
48
+
49
+ def parse_fasta_format(fh):
50
+ '''
51
+ A Fasta-format Parser return Iterator
52
+ '''
53
+ # Remove the comment and blank lines before the first record
54
+ while True:
55
+ line = fh.readline()
56
+ if not line: return # Blank line
57
+
58
+ line = line.strip()
59
+
60
+ if line.startswith('>'):
61
+ break
62
+
63
+ while True:
64
+ if not line.startswith('>'):
65
+ raise ValueError("Records in Fasta files should start with '>' character")
66
+
67
+ id, sep, desc = line[1:].partition(' ')
68
+
69
+ seq_lines = []
70
+ line = fh.readline()
71
+ while True:
72
+ if not line: break
73
+
74
+ line = line.strip()
75
+
76
+ if line.startswith('>'):
77
+ break
78
+
79
+ if not line:
80
+ line = fh.readline()
81
+ continue
82
+
83
+ seq_lines.append(line.replace(' ', '').replace("\r", ''))
84
+ line = fh.readline()
85
+
86
+ yield (id, desc, ''.join(seq_lines))
87
+
88
+ if not line: return
89
+
90
+ assert False, 'Should not reach this line'
91
+
92
+ def get_free_memory_percent():
93
+ if platform.system() == 'Darwin':
94
+ # Get process info
95
+ vm = subprocess.Popen(['vm_stat'], stdout=subprocess.PIPE).communicate()[0]
96
+ installed_memory = float(subprocess.Popen(['sysctl', '-n', 'hw.memsize'], stdout=subprocess.PIPE).communicate()[0])
97
+
98
+ # Process vm_stat
99
+ vmLines = vm.split('\n')
100
+ sep = re.compile(':[\s]+')
101
+ vmStats = {}
102
+ for row in range(1,len(vmLines)-2):
103
+ rowText = vmLines[row].strip()
104
+ rowElements = sep.split(rowText)
105
+ vmStats[(rowElements[0])] = int(rowElements[1].strip('\.')) * 4096
106
+
107
+ total_comsumed = vmStats["Pages wired down"] + vmStats["Pages active"] + vmStats["Pages inactive"]
108
+
109
+ return (installed_memory - total_comsumed) / installed_memory * 100
110
+
111
+ elif platform.system() == 'Linux':
112
+ items = subprocess.Popen(['free', '-m'], stdout=subprocess.PIPE).communicate()[0].splitlines()[1].strip().split()
113
+ free = float(items[3]) + float(items[5]) + float(items[6])
114
+ total = float(items[1])
115
+
116
+ return free / total * 100
117
+ else:
118
+ print "Sorry, currently only support Mac OS and Linux."
119
+ return 0
120
+
121
+
122
+ def insert_db(conn, mer_count, plus, minus):
123
+ for mer_id in xrange(mer_count):
124
+ conn.execute("insert into pos (mer_id, plus, minus) values (?, ?, ?)", \
125
+ [mer_id, plus[mer_id], minus[mer_id]])
126
+
127
+ conn.commit()
128
+
129
+ def update_db(conn, mer_count, plus, minus):
130
+ for mer_id in xrange(mer_count):
131
+ (plus_data, minus_data) = conn.execute("select plus, minus from pos where mer_id=?", [mer_id]).fetchone()
132
+ if plus_data:
133
+ if plus[mer_id]:
134
+ plus_data += ';%s' % plus[mer_id]
135
+ else:
136
+ pass
137
+ else:
138
+ plus_data = plus[mer_id]
139
+
140
+ if minus_data:
141
+ if minus[mer_id]:
142
+ minus_data += ';%s' % minus[mer_id]
143
+ else:
144
+ pass
145
+ else:
146
+ minus_data = minus[mer_id]
147
+
148
+ conn.execute("update pos set plus=?, minus=? where mer_id=?", \
149
+ [plus_data, minus_data, mer_id])
150
+
151
+ conn.commit()
152
+
153
+ def baseN(num, b):
154
+ '''convert non-negative decimal integer n to
155
+ equivalent in another base b (2-36)'''
156
+ return ((num == 0) and '0' ) or ( baseN(num // b, b).lstrip('0') + "0123456789abcdefghijklmnopqrstuvwxyz"[num % b])
157
+
158
+ def int2DNA(num, k):
159
+ seq = baseN(num, 4)
160
+ return 'A' * (k-len(seq)) + (''.join([n2D_dic[int(base)] for base in seq]))
161
+
162
+ def DNA2int_2(seq):
163
+ '''convert a sub-sequence/seq to a non-negative integer'''
164
+ plus_mer = 0
165
+ minus_mer = 0
166
+ length = len(seq) - 1
167
+ for i, letter in enumerate(seq):
168
+ plus_mer += D2n_dic[letter] * 4 ** (length - i)
169
+ minus_mer += (3 - D2n_dic[letter]) * 4 ** i
170
+
171
+ return plus_mer, minus_mer
172
+
173
+ def DNA2int(seq):
174
+ '''convert a sub-sequence/seq to a non-negative integer'''
175
+ plus_mer = 0
176
+ length = len(seq) - 1
177
+ for i, letter in enumerate(seq):
178
+ plus_mer += D2n_dic[letter] * 4 ** (length - i)
179
+
180
+ return plus_mer
181
+
182
+ def index(filename, k, dbname):
183
+ ''''''
184
+ start = time()
185
+
186
+ mer_count = 4**k
187
+
188
+ conn = sqlite3.connect(dbname)
189
+ cur = conn.cursor()
190
+ cur.executescript('''
191
+ drop table if exists pos;
192
+ create table pos(
193
+ mer_id integer primary key,
194
+ plus text,
195
+ minus text
196
+ );''')
197
+
198
+ plus = ['']*mer_count
199
+ minus = ['']*mer_count
200
+
201
+ is_empty = False
202
+ is_db_new = True
203
+
204
+ for record_id, record_desc, fasta_seq in parse_fasta_format(open(filename)):
205
+ is_empty = False
206
+ print record_id
207
+
208
+ #print 'Time used: ', time() - start
209
+
210
+ #plus_mer_list = [''] * mer_count
211
+ #minus_mer_list = [''] * mer_count
212
+ plus_mer_list = {}
213
+ minus_mer_list = {}
214
+
215
+ for i in xrange(len(fasta_seq)-k + 1):
216
+ #start = time()
217
+ kmer = fasta_seq[i:(i+k)]
218
+ #print kmer, i
219
+
220
+ try:
221
+ plus_mer_id, minus_mer_id = DNA2int_2(kmer)
222
+ except:
223
+ # Skip the unrecognized base, such as 'N'
224
+ continue
225
+
226
+ if plus_mer_list.has_key(plus_mer_id):
227
+ plus_mer_list[plus_mer_id] += ',%i' % (i+k-1)
228
+ else:
229
+ plus_mer_list[plus_mer_id] = str(i+k-1)
230
+
231
+ if minus_mer_list.has_key(minus_mer_id):
232
+ minus_mer_list[minus_mer_id] += ',%i' % (i)
233
+ else:
234
+ minus_mer_list[minus_mer_id] = str(i)
235
+
236
+
237
+ #print 'Index time used: ', time() - start
238
+ #start = time()
239
+ for mer_id, pos in plus_mer_list.items():
240
+ if plus[mer_id]:
241
+ plus[mer_id] += ';%s:%s' % (record_id, pos)
242
+ else:
243
+ plus[mer_id] = '%s:%s' % (record_id, pos)
244
+
245
+ for mer_id, pos in minus_mer_list.items():
246
+ if minus[mer_id]:
247
+ minus[mer_id] += ';%s:%s' % (record_id, pos)
248
+ else:
249
+ minus[mer_id] = '%s:%s' % (record_id, pos)
250
+
251
+ #print 'Merge time used: ', time() - start
252
+
253
+ memory_percent = get_free_memory_percent()
254
+ if memory_percent < 30:
255
+ if is_db_new:
256
+ insert_db(conn, mer_count, plus, minus)
257
+ is_db_new = False
258
+ else:
259
+ update_db(conn, mer_count, plus, minus)
260
+
261
+ # Empty the container
262
+ plus = ['']*mer_count
263
+ minus = ['']*mer_count
264
+ is_empty = True
265
+
266
+ print 'Empty plus and minus due to the memory problem.'
267
+
268
+ if not is_empty:
269
+ if is_db_new:
270
+ insert_db(conn, mer_count, plus, minus)
271
+ else:
272
+ update_db(conn, mer_count, plus, minus)
273
+
274
+ print "Time used: %s" % str(datetime.timedelta(seconds=(time() - start)))
275
+ print 'Done.'
276
+
277
+ def main():
278
+ '''main'''
279
+ options = optget()
280
+ index(options.filename, options.k, options.out)
281
+
282
+ if __name__ == "__main__":
283
+ main()
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'qu/mfeindex/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "qu-mfeindex"
8
+ spec.version = Qu::Mfeindex::VERSION
9
+ spec.authors = ["Wubin Qu"]
10
+ spec.email = ["quwubin@gmail.com"]
11
+ spec.description = %q{DNA sequence indexer originally developed for MFEprimer-2.0}
12
+ spec.summary = %q{A DNA sequence idnexer}
13
+ spec.homepage = "https://github.com/quwubin/qu-mfeindex"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'qu-utils', '~> 1.0'
22
+ spec.add_runtime_dependency 'qu-cmdwrapper', '~> 1.0'
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "rake"
26
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: qu-mfeindex
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Wubin Qu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: qu-utils
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: qu-cmdwrapper
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: DNA sequence indexer originally developed for MFEprimer-2.0
70
+ email:
71
+ - quwubin@gmail.com
72
+ executables:
73
+ - mfeindex
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - ".gitignore"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - bin/mfeindex
83
+ - lib/qu/mfeindex.rb
84
+ - lib/qu/mfeindex/data.rb
85
+ - lib/qu/mfeindex/version.rb
86
+ - lib/qu/pymfeindex
87
+ - qu-mfeindex.gemspec
88
+ homepage: https://github.com/quwubin/qu-mfeindex
89
+ licenses:
90
+ - MIT
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.2.0
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: A DNA sequence idnexer
112
+ test_files: []