qu-mfeindex 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 078d0efd0389e0fb00ddb3434b227f57a329960e
4
+ data.tar.gz: 3e899029902dd46daec7ec7ba7341c7ea04f19a0
5
+ SHA512:
6
+ metadata.gz: 0196c1634bdfc2455fce27addf74687745bdb9106894d1ed84780800cabf64fc89832cf9c28f97f24e4c6be88a027a4605263bb7545c2737772bd0fbbe051ba5
7
+ data.tar.gz: 84950a7f7cc5003a464d8dcc53a3a712a56748960af70abd70b35956a00a9dd84430698002b80aac5804f9d837e40dddbf3a54d8a4bc0c5d949606c6560130bf
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ test
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in qu-mfeindex.gemspec
4
+ gemspec
5
+ gem 'qu/utils'
6
+ gem 'qu/cmdwrapper'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Wubin Qu
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Qu::Mfeindex
2
+
3
+ DNA sequence indexer originally developed for MFEprimer-2.0
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'qu-mfeindex'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install qu-mfeindex
18
+
19
+ ## Usage
20
+
21
+ `mfeindex fasta_file [kvalue]`
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/mfeindex ADDED
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'qu/mfeindex'
4
+
5
+ def split_db(big_db)
6
+ big_db_dir = Qu::Mfeindex::get_big_db_dir(big_db)
7
+ Dir.mkdir(big_db_dir) unless File.directory?(big_db_dir)
8
+
9
+ sum_size = 0
10
+ small_file_list = []
11
+ Dir.foreach(big_db_dir).each do |small_file|
12
+ prefix = File.basename(big_db)
13
+ next small_file unless small_file =~ /^#{prefix}\.\d$/
14
+ small_file_list << File.join(big_db_dir, small_file)
15
+ sum_size += File.size(File.join(big_db_dir, small_file))
16
+ end
17
+
18
+ if sum_size >= File.size(big_db)
19
+ $stdout.print "#{big_db} has been splited, do you want to resplit it? [y/N]:"
20
+ choice = $stdin.gets.chomp.downcase
21
+ choice = 'n' if choice.empty?
22
+ while !['y', 'n'].include?(choice)
23
+ $stdout.print "The choice should be 'y' or 'n':"
24
+ choice = $stdin.gets.chomp.downcase
25
+ end
26
+
27
+ if choice == 'n'
28
+ return small_file_list
29
+ end
30
+ end
31
+
32
+ small_file_list = []
33
+
34
+ small_file_index = 0
35
+ small_file_name = File.join(big_db_dir, File.basename(big_db) + '.' + small_file_index.to_s)
36
+ fh = File.open(small_file_name, 'w')
37
+ small_file_list << small_file_name
38
+ File.open(big_db).each do |line|
39
+ if line.start_with?('>')
40
+ if fh.size >= Qu::Mfeindex::BIG_DB_SPLIT_CUTOFF
41
+ fh.close
42
+ small_file_index += 1
43
+ small_file_name = File.join(big_db_dir, File.basename(big_db) + '.' + small_file_index.to_s)
44
+ fh = File.open(small_file_name, 'w')
45
+ small_file_list << small_file_name
46
+ end
47
+ end
48
+ fh.write(line)
49
+ end
50
+ fh.close
51
+
52
+ return small_file_list
53
+ end
54
+
55
+
56
+ def index_db(fasta_file, kvalue)
57
+ if Qu::Mfeindex::db_indexed?(fasta_file)
58
+ $stdout.print "#{fasta_file} has already been indexed, do you want to reindex? [y/N]:"
59
+ choice = $stdin.gets.chomp.downcase
60
+ choice = 'n' if choice.empty?
61
+ while !['y', 'n'].include?(choice)
62
+ $stdout.print "The choice should be 'y' or 'n':"
63
+ choice = $stdin.gets.chomp.downcase
64
+ end
65
+ else
66
+ choice = 'y'
67
+ end
68
+
69
+ if choice == 'y'
70
+ Qu::Mfeindex::MFEprimerIndex(fasta_file, kvalue, reindex=true)
71
+ else
72
+ Qu::Mfeindex::MFEprimerIndex(fasta_file, kvalue, reindex=false)
73
+ end
74
+ end
75
+
76
+
77
+ usage = "Index database for MFEprimer-2.0
78
+
79
+ Usage:
80
+
81
+ #{File.basename($0)} fasta_file [kvalue]
82
+
83
+
84
+ Options:
85
+
86
+ kvalue: Default is 9 [Integer].
87
+
88
+ Contact: Wubin Qu <quwubin@gmail.com>
89
+ "
90
+
91
+ case ARGV.size
92
+ when 2
93
+ fasta_file = ARGV[0]
94
+ kvalue = ARGV[1].to_i
95
+ when 1
96
+ fasta_file = ARGV[0]
97
+ kvalue = 9
98
+ else
99
+ $stderr.puts usage
100
+ exit
101
+ end
102
+
103
+ unless File.exists?(fasta_file)
104
+ $stdout.puts "Error: #{fasta_file} is not exitst.\n"
105
+ $stderr.puts usage
106
+ exit
107
+ end
108
+
109
+ if File.size(fasta_file) > Qu::Mfeindex::BIG_DB_SPLIT_CUTOFF
110
+ $stdout.print "#{fasta_file} is too large, do you want to split it first? [Y/n]:"
111
+ choice = $stdin.gets.chomp.downcase
112
+ choice = 'y' if choice.empty?
113
+ while !['y', 'n'].include?(choice)
114
+ $stdout.print "The choice should be 'y' or 'n':"
115
+ choice = $stdin.gets.chomp.downcase
116
+ end
117
+ else
118
+ choice = 'n'
119
+ end
120
+
121
+ if choice == 'y'
122
+ small_file_list = split_db(fasta_file)
123
+ small_file_list.each do |small_file|
124
+ index_db(small_file, kvalue)
125
+ end
126
+ else
127
+ index_db(fasta_file, kvalue)
128
+ end
@@ -0,0 +1,166 @@
1
+ require 'qu/utils'
2
+ require 'qu/cmdwrapper'
3
+
4
+ require_relative "mfeindex/data"
5
+ require_relative "mfeindex/version"
6
+ require 'json'
7
+
8
+ module Qu
9
+ module Mfeindex
10
+ # Your code goes here...
11
+
12
+ module_function
13
+
14
+ def get_big_db_dir(db)
15
+ db + BIG_DB
16
+ end
17
+
18
+ def check_db(db_list)
19
+ new_db_list = []
20
+
21
+ db_list.each do |db|
22
+ big_db_dir = get_big_db_dir(db)
23
+ if db_indexed?(db)
24
+ new_db_list << db
25
+ next
26
+ elsif File.directory?(big_db_dir)
27
+ Dir.foreach(big_db_dir).each do |small_file|
28
+ prefix = File.basename(db)
29
+ next small_file unless small_file =~ /^#{prefix}\.\d$/
30
+ new_db_list << File.join(big_db_dir, small_file)
31
+ end
32
+ elsif !File.exists?(db)
33
+ $stderr.puts "Error: #{db} is not exists."
34
+ exit
35
+ elsif File.size(db) > BIG_DB_SPLIT_CUTOFF
36
+ $stderr.puts "Warning: #{db} is too large, please use mfepindex to index the db first."
37
+ exit
38
+ else
39
+ new_db_list << db
40
+ next
41
+ end
42
+ end
43
+ new_db_list
44
+ end
45
+
46
+ def db_indexed?(db)
47
+ File.exist?(db + DB_SQLITE3) and File.exist?(db + DB_JSON) and File.exist?(db + DB_2BIT)
48
+ end
49
+
50
+ def MFEprimerIndex(fasta_file, k = 9, reindex = false)
51
+ return if !reindex and db_indexed?(fasta_file)
52
+
53
+ unless File.exists?(fasta_file)
54
+ $stderr.puts "Error: #{fasta_file} is not exists."
55
+ exit
56
+ end
57
+ info_json = {}
58
+
59
+ uni_fasta = fasta_file + '.unifasta'
60
+
61
+ File.open(uni_fasta, 'w') do |fh|
62
+ Bio::FlatFile.new(Bio::FastaFormat, File.open(fasta_file)).each_with_index do |record, index|
63
+ info_json[index] = {'id' => record.entry_name, 'desc' => record.desc, 'size' => record.naseq.size}
64
+ fh.write ">#{index}\n#{record.naseq}\n"
65
+ end
66
+ end
67
+
68
+ File.open(fasta_file + DB_JSON, 'w') do |fh|
69
+ fh.write(JSON.dump(info_json))
70
+ end
71
+
72
+ Qu::Cmdwrapper::faToTwoBit(uni_fasta, fasta_file + DB_2BIT)
73
+
74
+ cmd = File.join(__dir__, 'pymfeindex')
75
+ $stderr.puts "Begin index database: #{fasta_file}"
76
+ `#{cmd} -f #{uni_fasta} -k #{k} -o #{fasta_file + DB_SQLITE3}`
77
+ begin
78
+ File.delete(uni_fasta)
79
+ rescue
80
+ if File.exists?(uni_fasta)
81
+ $stderr.puts "You can delete the file #{uni_fasta} by hand."
82
+ end
83
+ end
84
+ $stderr.puts "Done index database: #{fasta_file}"
85
+ end
86
+
87
+ def int2dna(int, k=9, base_number=4)
88
+ seqint = int.to_s(base_number)
89
+ dna = ""
90
+ (0...seqint.length).each do |index|
91
+ dna += D2I[seqint[index].to_i]
92
+ end
93
+ return 'A' * (k - seqint.length) + dna
94
+ end
95
+
96
+ def dna2int(dna, base_number=4)
97
+ plus_int = 0
98
+ dna = dna.upcase
99
+ # This is plus strand position
100
+ dna.each_char.with_index do |base, index|
101
+ plus_int += D2I[base] * base_number ** (dna.length - 1 - index)
102
+ end
103
+ return plus_int
104
+ end
105
+
106
+ def split_pos(data)
107
+ # Split position data from SQLite3 database which generated by mfeindex
108
+ pos_hash = {}
109
+ data.split(';').each do |hit_record|
110
+ hit_id, hit_pos = hit_record.split(':')
111
+ pos_hash[hit_id.to_i] = hit_pos.split(',').collect {|pos| pos.to_i}
112
+ end
113
+
114
+ return pos_hash
115
+ end
116
+
117
+ def detect_kvalue(db_file)
118
+ db = SQLite3::Database.new(db_file)
119
+
120
+ begin
121
+ mer_num = db.execute("select count(*) from pos")[0][0]
122
+ kvalue = Math.log(mer_num, 4).to_i
123
+ rescue Exception => e
124
+ kvalue = 9
125
+ end
126
+
127
+ return kvalue
128
+ end
129
+
130
+ def get_kvalue(db_list)
131
+ kmer_list = []
132
+ db_list.each do |db|
133
+ if db_indexed?(db)
134
+ kmer_list << detect_kvalue(db + DB_SQLITE3)
135
+ end
136
+ end
137
+ kmer_list.uniq!
138
+
139
+ kvalue = 9
140
+
141
+ if kmer_list.size > 1
142
+ $stderr.puts "Different index kmer value among #{@opts.db}."
143
+ exit
144
+ elsif kmer_list.size == 1
145
+ kvalue = kmer_list[0]
146
+ else
147
+ kvalue = 9
148
+ end
149
+ kvalue
150
+ end
151
+
152
+
153
+ def query_sqlite3(db_file, mer_id_list)
154
+ pos = {}
155
+ db = SQLite3::Database.new(db_file)
156
+ db.execute("select mer_id, plus, minus from pos where mer_id in (#{mer_id_list.join(', ')})") do |row|
157
+ mer_id, plus, minus = row
158
+ pos[mer_id] ||= {}
159
+ pos[mer_id][:plus] = split_pos(plus) unless plus.empty?
160
+ pos[mer_id][:minus] = split_pos(minus) unless minus.empty?
161
+ end
162
+
163
+ return pos
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,24 @@
1
+ module Qu
2
+ module Mfeindex
3
+ DB_JSON = '.uni'
4
+ DB_SQLITE3 = '.sqlite3.db'
5
+ DB_2BIT = '.2bit'
6
+ BIG_DB = '_BIG_MFE_DB'
7
+ BIG_DB_SPLIT_CUTOFF = 1024**3
8
+
9
+ D2I = {
10
+ 'A' => 0,
11
+ 'T' => 3,
12
+ 'C' => 2,
13
+ 'G' => 1,
14
+ '-' => 4, # For bubble, added by Zheyan Liu
15
+ 0 => 'A',
16
+ 1 => 'G',
17
+ 2 => 'C',
18
+ 3 => 'T',
19
+ 4 => '-', # For bubble, added by Zheyan Liu
20
+ }
21
+
22
+ ANTISENSE_CHARS = %w{A G C T -}
23
+ end
24
+ end
@@ -0,0 +1,5 @@
1
+ module Qu
2
+ module Mfeindex
3
+ VERSION = "1.0.0"
4
+ end
5
+ end
data/lib/qu/pymfeindex ADDED
@@ -0,0 +1,283 @@
1
+ #!/usr/bin/env python
2
+ from __future__ import division
3
+
4
+ import os
5
+ import sys
6
+ import datetime
7
+ from time import time
8
+ from optparse import OptionParser
9
+ import sqlite3
10
+
11
+ import platform
12
+ import subprocess
13
+ import re
14
+
15
+
16
+ D2n_dic = dict(A=0, T=3, C=2, G=1, a=0, t=3, c=2, g=1)
17
+ n2D_dic = {0:'A', 3:'T', 2:'C', 1:'G', 0:'a', 3:'t', 2:'c', 1:'g'}
18
+
19
+ def print_usage():
20
+ print '''
21
+ %s: Index DB for MFEprimer-2.0
22
+
23
+ Usage:
24
+
25
+ %s -f human.genomic -k 9 -o index_db_name
26
+
27
+ Author: Wubin Qu <quwubin@gmail.com>
28
+ Last updated: 2012-9-28
29
+ ''' % (os.path.basename(sys.argv[0]), os.path.basename(sys.argv[0]))
30
+
31
+ def optget():
32
+ '''parse options'''
33
+ parser = OptionParser()
34
+ parser.add_option("-f", "--file", dest = "filename", help = "DNA file in fasta to be indexed")
35
+ parser.add_option("-k", "--k", dest = "k", type='int', help = "K mer , default is 9", default = 9)
36
+ parser.add_option("-o", "--out", dest = "out", help = "Index db file name")
37
+
38
+ (options, args) = parser.parse_args()
39
+
40
+ if not options.filename:
41
+ print_usage()
42
+ exit()
43
+
44
+ if not options.out:
45
+ options.out = options.filename + '.sqlite3.db'
46
+
47
+ return options
48
+
49
+ def parse_fasta_format(fh):
50
+ '''
51
+ A Fasta-format Parser return Iterator
52
+ '''
53
+ # Remove the comment and blank lines before the first record
54
+ while True:
55
+ line = fh.readline()
56
+ if not line: return # Blank line
57
+
58
+ line = line.strip()
59
+
60
+ if line.startswith('>'):
61
+ break
62
+
63
+ while True:
64
+ if not line.startswith('>'):
65
+ raise ValueError("Records in Fasta files should start with '>' character")
66
+
67
+ id, sep, desc = line[1:].partition(' ')
68
+
69
+ seq_lines = []
70
+ line = fh.readline()
71
+ while True:
72
+ if not line: break
73
+
74
+ line = line.strip()
75
+
76
+ if line.startswith('>'):
77
+ break
78
+
79
+ if not line:
80
+ line = fh.readline()
81
+ continue
82
+
83
+ seq_lines.append(line.replace(' ', '').replace("\r", ''))
84
+ line = fh.readline()
85
+
86
+ yield (id, desc, ''.join(seq_lines))
87
+
88
+ if not line: return
89
+
90
+ assert False, 'Should not reach this line'
91
+
92
+ def get_free_memory_percent():
93
+ if platform.system() == 'Darwin':
94
+ # Get process info
95
+ vm = subprocess.Popen(['vm_stat'], stdout=subprocess.PIPE).communicate()[0]
96
+ installed_memory = float(subprocess.Popen(['sysctl', '-n', 'hw.memsize'], stdout=subprocess.PIPE).communicate()[0])
97
+
98
+ # Process vm_stat
99
+ vmLines = vm.split('\n')
100
+ sep = re.compile(':[\s]+')
101
+ vmStats = {}
102
+ for row in range(1,len(vmLines)-2):
103
+ rowText = vmLines[row].strip()
104
+ rowElements = sep.split(rowText)
105
+ vmStats[(rowElements[0])] = int(rowElements[1].strip('\.')) * 4096
106
+
107
+ total_comsumed = vmStats["Pages wired down"] + vmStats["Pages active"] + vmStats["Pages inactive"]
108
+
109
+ return (installed_memory - total_comsumed) / installed_memory * 100
110
+
111
+ elif platform.system() == 'Linux':
112
+ items = subprocess.Popen(['free', '-m'], stdout=subprocess.PIPE).communicate()[0].splitlines()[1].strip().split()
113
+ free = float(items[3]) + float(items[5]) + float(items[6])
114
+ total = float(items[1])
115
+
116
+ return free / total * 100
117
+ else:
118
+ print "Sorry, currently only support Mac OS and Linux."
119
+ return 0
120
+
121
+
122
+ def insert_db(conn, mer_count, plus, minus):
123
+ for mer_id in xrange(mer_count):
124
+ conn.execute("insert into pos (mer_id, plus, minus) values (?, ?, ?)", \
125
+ [mer_id, plus[mer_id], minus[mer_id]])
126
+
127
+ conn.commit()
128
+
129
+ def update_db(conn, mer_count, plus, minus):
130
+ for mer_id in xrange(mer_count):
131
+ (plus_data, minus_data) = conn.execute("select plus, minus from pos where mer_id=?", [mer_id]).fetchone()
132
+ if plus_data:
133
+ if plus[mer_id]:
134
+ plus_data += ';%s' % plus[mer_id]
135
+ else:
136
+ pass
137
+ else:
138
+ plus_data = plus[mer_id]
139
+
140
+ if minus_data:
141
+ if minus[mer_id]:
142
+ minus_data += ';%s' % minus[mer_id]
143
+ else:
144
+ pass
145
+ else:
146
+ minus_data = minus[mer_id]
147
+
148
+ conn.execute("update pos set plus=?, minus=? where mer_id=?", \
149
+ [plus_data, minus_data, mer_id])
150
+
151
+ conn.commit()
152
+
153
+ def baseN(num, b):
154
+ '''convert non-negative decimal integer n to
155
+ equivalent in another base b (2-36)'''
156
+ return ((num == 0) and '0' ) or ( baseN(num // b, b).lstrip('0') + "0123456789abcdefghijklmnopqrstuvwxyz"[num % b])
157
+
158
+ def int2DNA(num, k):
159
+ seq = baseN(num, 4)
160
+ return 'A' * (k-len(seq)) + (''.join([n2D_dic[int(base)] for base in seq]))
161
+
162
+ def DNA2int_2(seq):
163
+ '''convert a sub-sequence/seq to a non-negative integer'''
164
+ plus_mer = 0
165
+ minus_mer = 0
166
+ length = len(seq) - 1
167
+ for i, letter in enumerate(seq):
168
+ plus_mer += D2n_dic[letter] * 4 ** (length - i)
169
+ minus_mer += (3 - D2n_dic[letter]) * 4 ** i
170
+
171
+ return plus_mer, minus_mer
172
+
173
+ def DNA2int(seq):
174
+ '''convert a sub-sequence/seq to a non-negative integer'''
175
+ plus_mer = 0
176
+ length = len(seq) - 1
177
+ for i, letter in enumerate(seq):
178
+ plus_mer += D2n_dic[letter] * 4 ** (length - i)
179
+
180
+ return plus_mer
181
+
182
+ def index(filename, k, dbname):
183
+ ''''''
184
+ start = time()
185
+
186
+ mer_count = 4**k
187
+
188
+ conn = sqlite3.connect(dbname)
189
+ cur = conn.cursor()
190
+ cur.executescript('''
191
+ drop table if exists pos;
192
+ create table pos(
193
+ mer_id integer primary key,
194
+ plus text,
195
+ minus text
196
+ );''')
197
+
198
+ plus = ['']*mer_count
199
+ minus = ['']*mer_count
200
+
201
+ is_empty = False
202
+ is_db_new = True
203
+
204
+ for record_id, record_desc, fasta_seq in parse_fasta_format(open(filename)):
205
+ is_empty = False
206
+ print record_id
207
+
208
+ #print 'Time used: ', time() - start
209
+
210
+ #plus_mer_list = [''] * mer_count
211
+ #minus_mer_list = [''] * mer_count
212
+ plus_mer_list = {}
213
+ minus_mer_list = {}
214
+
215
+ for i in xrange(len(fasta_seq)-k + 1):
216
+ #start = time()
217
+ kmer = fasta_seq[i:(i+k)]
218
+ #print kmer, i
219
+
220
+ try:
221
+ plus_mer_id, minus_mer_id = DNA2int_2(kmer)
222
+ except:
223
+ # Skip the unrecognized base, such as 'N'
224
+ continue
225
+
226
+ if plus_mer_list.has_key(plus_mer_id):
227
+ plus_mer_list[plus_mer_id] += ',%i' % (i+k-1)
228
+ else:
229
+ plus_mer_list[plus_mer_id] = str(i+k-1)
230
+
231
+ if minus_mer_list.has_key(minus_mer_id):
232
+ minus_mer_list[minus_mer_id] += ',%i' % (i)
233
+ else:
234
+ minus_mer_list[minus_mer_id] = str(i)
235
+
236
+
237
+ #print 'Index time used: ', time() - start
238
+ #start = time()
239
+ for mer_id, pos in plus_mer_list.items():
240
+ if plus[mer_id]:
241
+ plus[mer_id] += ';%s:%s' % (record_id, pos)
242
+ else:
243
+ plus[mer_id] = '%s:%s' % (record_id, pos)
244
+
245
+ for mer_id, pos in minus_mer_list.items():
246
+ if minus[mer_id]:
247
+ minus[mer_id] += ';%s:%s' % (record_id, pos)
248
+ else:
249
+ minus[mer_id] = '%s:%s' % (record_id, pos)
250
+
251
+ #print 'Merge time used: ', time() - start
252
+
253
+ memory_percent = get_free_memory_percent()
254
+ if memory_percent < 30:
255
+ if is_db_new:
256
+ insert_db(conn, mer_count, plus, minus)
257
+ is_db_new = False
258
+ else:
259
+ update_db(conn, mer_count, plus, minus)
260
+
261
+ # Empty the container
262
+ plus = ['']*mer_count
263
+ minus = ['']*mer_count
264
+ is_empty = True
265
+
266
+ print 'Empty plus and minus due to the memory problem.'
267
+
268
+ if not is_empty:
269
+ if is_db_new:
270
+ insert_db(conn, mer_count, plus, minus)
271
+ else:
272
+ update_db(conn, mer_count, plus, minus)
273
+
274
+ print "Time used: %s" % str(datetime.timedelta(seconds=(time() - start)))
275
+ print 'Done.'
276
+
277
+ def main():
278
+ '''main'''
279
+ options = optget()
280
+ index(options.filename, options.k, options.out)
281
+
282
+ if __name__ == "__main__":
283
+ main()
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'qu/mfeindex/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "qu-mfeindex"
8
+ spec.version = Qu::Mfeindex::VERSION
9
+ spec.authors = ["Wubin Qu"]
10
+ spec.email = ["quwubin@gmail.com"]
11
+ spec.description = %q{DNA sequence indexer originally developed for MFEprimer-2.0}
12
+ spec.summary = %q{A DNA sequence idnexer}
13
+ spec.homepage = "https://github.com/quwubin/qu-mfeindex"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'qu-utils', '~> 1.0'
22
+ spec.add_runtime_dependency 'qu-cmdwrapper', '~> 1.0'
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "rake"
26
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: qu-mfeindex
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Wubin Qu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: qu-utils
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: qu-cmdwrapper
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: DNA sequence indexer originally developed for MFEprimer-2.0
70
+ email:
71
+ - quwubin@gmail.com
72
+ executables:
73
+ - mfeindex
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - ".gitignore"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - bin/mfeindex
83
+ - lib/qu/mfeindex.rb
84
+ - lib/qu/mfeindex/data.rb
85
+ - lib/qu/mfeindex/version.rb
86
+ - lib/qu/pymfeindex
87
+ - qu-mfeindex.gemspec
88
+ homepage: https://github.com/quwubin/qu-mfeindex
89
+ licenses:
90
+ - MIT
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.2.0
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: A DNA sequence idnexer
112
+ test_files: []