cwords 0.1.3-jruby → 0.1.4-jruby

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/scripts/cwords.rb +431 -0
  2. data/scripts/cwords_mkdb.rb +84 -0
  3. metadata +4 -2
data/scripts/cwords.rb ADDED
@@ -0,0 +1,431 @@
1
+ #!/usr/bin/env jruby
2
+
3
+ ### Requires jruby, www.jruby.org
4
+
5
+ ###
6
+ ### Running sum analysis for 5 different measures of word enrichment in a sequence:
7
+ ### obs : use the observed word count
8
+ ### bin : use presence/absence of word
9
+ ### pval : use the p-value of the expected occurrences being >= the observed occurence
10
+
11
+ srcdir = File.dirname(__FILE__)
12
+ basedir = srcdir + "/../"
13
+ libdir = basedir + '/lib/'
14
+ $LOAD_PATH << libdir
15
+
16
+ require 'wordRS-lib.rb'
17
+ require 'rubygems'
18
+ require 'progressbar'
19
+ require 'optparse'
20
+ require 'peach'
21
+ require 'java'
22
+ require libdir + 'ushuffle.jar'
23
+ java_import 'UShuffle'
24
+
25
+ #default options
26
+ options = Hash.new
27
+ options[:wordsize] = [7]
28
+ options[:split_words]=nil
29
+ options[:dbdir] = basedir + "db/"
30
+ options[:scoring_scheme] = 'pval'
31
+ options[:permutations]=50
32
+ options[:seqshuffles]=100
33
+ options[:rankfile]=nil
34
+ options[:seqfile]=nil
35
+ options[:report_words]=nil
36
+ options[:plot_words]=nil
37
+ options[:onlyanno]=nil
38
+ options[:dump]=nil
39
+ options[:testing]=nil
40
+ options[:rank_all]=nil
41
+ options[:rank_inverse]=nil
42
+ options[:rank_split_median]=nil
43
+ options[:rank_abs]=nil
44
+ options[:bg]=1 #mononucleotide shuffling
45
+ options[:threads]=1
46
+
47
+ $coptions = OptionParser.new do |opts|
48
+ opts.banner = "Usage: cwords [options]"
49
+
50
+ # analysis settings
51
+ opts.on("-c", "--scoring_scheme ARG", "scoring scheme") {|o| options[:scoring_scheme] = o}
52
+ opts.on("-p", "--permutations ARG", "number of list permutations") {|o| options[:permutations] = o.to_i}
53
+ opts.on("-q", "--shuffles ARG", "number of sequence shuffles for sequence bias correction") {|o| options[:seqshuffles] = o.to_i}
54
+ opts.on("-w", "--wordsize ARG", "wordsize") { |o| options[:wordsize] = o.split(",").map{|x| x.to_i}}
55
+ opts.on("-b", "--bg ARG", "background nucleotide model") {|o| options[:bg] = o.to_i}
56
+ opts.on("-t", "--threads ARG", "use multiple threads to parallelize computations") {|o| options[:threads] = o.to_i}
57
+ opts.on( "--split_words WORDS", "split sequence set based on occurrences of WORDS") {|o| options[:split_words] = o.split(",")}
58
+ opts.on( "--onlyanno", "only process annotated (i.e. mirbase) words") {|o| options[:onlyanno] = true}
59
+
60
+ # rank control
61
+ opts.on("-x", "--rank_all", "do not split positive and neg. values") {|o| options[:rank_all] = true}
62
+ opts.on("-m", "--rank_split_median", "split ranked list at median") {|o| options[:rank_split_median] = true}
63
+ opts.on("-i", "--rank_inverse", "inverse all ranked lists") {|o| options[:rank_inverse] = true}
64
+ opts.on("-a", "--rank_abs", "rank by absolute value") {|o| options[:rank_abs] = true}
65
+
66
+ # files and directories
67
+ opts.on("-r", "--rankfile ARG", "rank file") {|o| options[:rankfile] = o}
68
+ opts.on("-s", "--seqfile ARG", "sequence file") {|o| options[:seqfile] = o}
69
+ opts.on("-d", "--db ARG", "word database") { |o| options[:db] = o}
70
+
71
+ # output control
72
+ opts.on("-u", "--dump ARG", "dump top words") { |o| options[:dump] = o.to_i}
73
+ opts.on( "--report_words ARG", "report on words (comma separated)") {|o| options[:report_words] = o.split(',')}
74
+ opts.on( "--plot_words ARG", "only make plot files for words (comma separated)") {|o| options[:plot_words] = o.split(',')}
75
+ opts.on( "--testing", "testing mode") {|o| options[:testing] = true}
76
+ end
77
+
78
+ def show_help(msg="", code=0, io=STDOUT)
79
+ io.puts "#{msg}\n#{$coptions}"
80
+ exit(code)
81
+ end
82
+
83
+ $coptions.parse!(ARGV)
84
+ # mandatory parameters
85
+ [:rankfile].each{|p| show_help("option '#{p}' mandatory") if options[p].nil?}
86
+ show_help("db or seqfile required") if !(options[:db] or options[:seqfile])
87
+ show_help("scoring scheme must be one of: obs,bin,pval") if !(['obs','bin','pval'].include?(options[:scoring_scheme]))
88
+
89
+ testing = options[:testing]
90
+
91
+ # get filename without directory
92
+ rankfilename = File.basename(options[:rankfile])
93
+
94
+ # hard-coded
95
+ output_top = 10
96
+
97
+ prankdir = basedir + "/db/" + options[:db] + "/" if options[:db]
98
+ annofile = basedir + "/resources/" + "word_annotation.tsv" #annotation
99
+ tidfile = basedir + "/resources/" + "genemap.tsv"
100
+ seqshuffles = 5000 # currently hardcoded for database
101
+ sequences = nil
102
+ nwords = options[:wordsize].map{|x| 4**x}.to_statarray.sum
103
+ bg=options[:bg] # TODO, make option
104
+ threads=options[:threads]
105
+
106
+ ###
107
+ ### Main program
108
+ ###
109
+
110
+ puts ">> Parameters"
111
+ options.each{|k,v| puts sprintf("%-20s: %s",k,v) if !v.nil?}
112
+
113
+ # read in mirbase seed family
114
+ word_annotation = Hash.new("") # seq => family
115
+ IO.readlines(annofile).each{|l| word_annotation[l.split("\t")[0]] = l.split("\t")[1]}
116
+
117
+ # read optional sequences
118
+ if options[:seqfile]
119
+ puts ">> reading sequences ..."
120
+ sequences = Hash.new
121
+ IO.readlines(options[:seqfile],">")[1..-1].each do |entry|
122
+ ls = entry.split("\n").map{|x| x.chomp}
123
+ # hash ensures sequence ids unique
124
+ sequences[ls[0]] = ls[1..-2].join('').downcase.gsub('u','t') # last field is ">"
125
+ end
126
+ seqshuffles = options[:seqshuffles]
127
+ end
128
+
129
+ # initialize word id hash, word sequence => word id (0..nwords-1)
130
+ wids = Hash.new
131
+ i = 0
132
+ options[:wordsize].each{|ws| ['a','g','c','t'].rep_perm(ws) {|seqa| wids[seqa.join('')]=i ; i+=1 }}
133
+
134
+ ###
135
+ ### ID mapping
136
+ ###
137
+
138
+ # pre-computed word database:
139
+ # map ids given in rankfile to internal ids
140
+ # remove rankfile entries with no match to internal id
141
+ # sequence file:
142
+ # take intersection of rank and sequence IDs
143
+
144
+ puts ">> Mapping and filtering IDs ..."
145
+
146
+ all = []
147
+ begin
148
+ idmap = Hash.new
149
+ internal_ids = nil
150
+
151
+ if sequences
152
+ internal_ids = sequences
153
+ else
154
+ IO.readlines(tidfile).each do |l|
155
+ tid = l.split(" ")[0]
156
+ l.split(" ")[1].split(",").each{|extid| idmap[extid] = tid}
157
+ end
158
+ internal_ids = idmap.invert # allowed internal ids
159
+ end
160
+
161
+ allh = Hash.new {|h,k| h[k] = []}
162
+ filtered = 0
163
+
164
+ IO.readlines(options[:rankfile]).each do |l|
165
+ l = l.split("\t")
166
+ #test if internal id or mapable external id
167
+ tid = (internal_ids.key?(l[0]) ? l[0] : idmap[l[0]])
168
+ tid.nil? ? filtered += 1 : allh[tid] << l[1].to_f
169
+ end
170
+
171
+ # filter unknown sequences
172
+ sequences.keys.each{|id| sequences.delete(id) if !allh.key?(id)} if sequences
173
+
174
+ # we currently mean-collapse ids, we could allow mean/min/max collapsing ...
175
+ all = allh.to_a.map{|tid,values| [tid,values.to_statarray.mean]}
176
+
177
+ puts "removed #{filtered} invalid transcript ids" if filtered > 0
178
+ end
179
+
180
+ allorder = Hash.new # tid => index in all
181
+ all.each_with_index{|x,i| allorder[x[0]] = i}
182
+
183
+ ###
184
+ ### Word enumeration (optional)
185
+ ###
186
+
187
+ wordscores = []
188
+ if sequences
189
+ puts ">> Enumerating words in sequences"
190
+ wordscores = Array.new(all.size) {Array.new(wids.size,0)} # {Java::short[wids.size].new}
191
+ pbar = ProgressBar.new("progress",sequences.size)
192
+ all.peach(threads) do |seqid,val|
193
+ us = UShuffle.new
194
+ seq=sequences[seqid]
195
+ seqidx=allorder[seqid]
196
+ pbar.inc
197
+ seqsize = seq.size
198
+ observed = Array.new(wids.size,0)
199
+ options[:wordsize].each{|ws| (0..seqsize-ws).each{|i| wid = wids[seq[i, ws]]; observed[wid] += 1 if not wid.nil?}}
200
+
201
+ case options[:scoring_scheme]
202
+ when "bin" then wordscores[seqidx] = observed.map{|x| x > 0 ? 1 : -1}
203
+ when "obs" then wordscores[seqidx] = observed
204
+ else
205
+ # pval, compute distribution of expected word occurrences
206
+ us.init_shuffle(seq,bg)
207
+ seqshuffles.times do |si|
208
+ seqsh = us.shuffle
209
+ expected = Array.new(wids.size,0)
210
+ options[:wordsize].each{|ws| (0..seqsize-ws).each{|i| wid = wids[seqsh[i, ws]]; expected[wid] += 1 if !wid.nil?}}
211
+ observed.each_with_index{|x,widx| wordscores[seqidx][widx] =+ 1 if expected[widx]>=x}
212
+ end
213
+ end
214
+ end
215
+ pbar.finish
216
+ end
217
+
218
+ ###
219
+ ### Generate list ranking
220
+ ###
221
+
222
+ analyze = []
223
+ if options[:rank_split_median]
224
+ # we should perhaps use an :inverse option,
225
+ # reversing the two pos and neg lists
226
+ med = all.map{|x| x[1]}.to_statarray.median
227
+ pos_set = all.select{|x| x[1] > med}.sort{|a,b| b[1] <=> a[1]}
228
+ neg_set = all.select{|x| x[1] <= med}.sort{|a,b| a[1] <=> b[1]}
229
+ analyze = [[pos_set,'med_positive'],[neg_set,'med_negative']]
230
+ elsif options[:rank_all] # do not split positive and negative range
231
+ pos_set = all.sort{|a,b| b[1] <=> a[1]}
232
+ neg_set = all.sort{|a,b| a[1] <=> b[1]}
233
+ analyze = [[pos_set,'all_positive'],[neg_set,'all_negative']]
234
+ elsif options[:rank_abs] # rank by absolute values
235
+ pos_set = all.map{|x| [x[0],x[1].abs]}.sort{|a,b| b[1] <=> a[1]}
236
+ neg_set = pos_set.reverse
237
+ analyze = [[pos_set,'abs_positive'],[neg_set,'abs_negative']]
238
+ else
239
+ pos_set = all.select{|x| x[1] > 0}.sort{|a,b| b[1] <=> a[1]}
240
+ neg_set = all.select{|x| x[1] < 0}.sort{|a,b| a[1] <=> b[1]}
241
+ analyze = [[pos_set,'positive'],[neg_set,'negative']]
242
+ end
243
+
244
+ # inverse lists
245
+ analyze.map!{|set,nm| [set.reverse,nm+".inv"]} if options[:rank_inverse]
246
+
247
+ # split sequence set when --split option is given
248
+ if options[:split_words]
249
+ seqs_with_words = Hash.new
250
+
251
+ options[:split_words].each do |split_word|
252
+ begin
253
+ IO.readlines(prankdir + split_word.downcase + ".rnk").each do |x|
254
+ l = x.split("\t")
255
+ seqs_with_words[l[0]] = 1 if l[1].to_i > 0
256
+ end
257
+ rescue
258
+ warn "could not split sequences on word #{split_word}: " + $!
259
+ end
260
+ end
261
+
262
+ analyze_split = []
263
+ analyze.each do |set,nm|
264
+ analyze_split += set.partition{|x| seqs_with_words.key?(x[0])}.zip([nm+".split+"+options[:split_words].join(","),nm+".split-"+options[:split_words].join(",")])
265
+ end
266
+ analyze = analyze_split
267
+ end
268
+
269
+ ###
270
+ ### Correlation analysis
271
+ ###
272
+
273
+ puts ">> Analyzing sequence sets: " + analyze.map{|x| x[1]}.join(", ")
274
+
275
+ analyze.each do |set,nm|
276
+ ngenes = set.size
277
+ puts "\n>> Analyzing #{nm} set ...\nnumber of genes: #{ngenes}"
278
+ next if ngenes == 0
279
+ perms = []
280
+ report = []
281
+ pfdrz = []
282
+
283
+ franks = Hash.new # tid => index in set
284
+ set.each_with_index{|x,i| franks[x[0]] = i}
285
+
286
+ puts "permuting #{options[:permutations]} times ...\n"
287
+ options[:permutations].times{|i| perms << (0..set.size-1).to_a.shuffle}
288
+
289
+ pbar = ProgressBar.new("progress",nwords)
290
+ wids.to_a.sort_by{|x| x[1]}.peach(threads) do |word,wid|
291
+ pbar.inc
292
+ next if options[:onlyanno] and not word_annotation.key?(word) #only process annotated words
293
+ next if options[:plot_words] and !options[:plot_words].include?(word)
294
+
295
+ plotfile = File.new(rankfilename + ".#{word}.#{nm}.csv","w") if options[:plot_words]
296
+
297
+ score = Array.new(ngenes) # scores ordered by fold change
298
+
299
+ if sequences
300
+ score = set.map{|x| wordscores[allorder[x[0]]][wid]}
301
+ score.map!{|x| -Math.log((x+1.0)/(seqshuffles+1))} if options[:scoring_scheme] == 'pval'
302
+ else # use precomputed word database
303
+ wordcounts = IO.readlines(prankdir + word + ".rnk").map{|x| x.split("\t")}.select{|x| franks.key?(x[0])}
304
+ case options[:scoring_scheme]
305
+ when "bin" then wordcounts.each{|id,obs,gte_obs,exp| score[franks[id]] = obs.to_i == 0 ? -1 : 1}
306
+ when "obs" then wordcounts.each{|id,obs,gte_obs,exp| score[franks[id]] = obs.to_f}
307
+ when "pval" then wordcounts.each{|id,obs,gte_obs,exp| score[franks[id]] = -Math.log((gte_obs.to_f+1)/(seqshuffles+1.0))}
308
+ end
309
+ end
310
+
311
+ smean = score.to_statarray.mean
312
+ maxrs = 0
313
+ leading_edge = 0
314
+ rs = 0 #running sum
315
+ rsa = [0]
316
+ score.each_with_index do |x,i|
317
+ rs += (x-smean)
318
+ rsa << rs
319
+ if rs.abs > maxrs.abs
320
+ maxrs = rs
321
+ leading_edge = i+1
322
+ end
323
+ end
324
+
325
+ plotfile.puts(([word+".score"] + [0] + score.map{|x| x.to_e(2)}).join(",")) if options[:plot_words]
326
+ plotfile.puts(([word+".rs"] + rsa).join(",")) if options[:plot_words]
327
+
328
+ # we are only interested in pos. maxrs scores,
329
+ # because we currently analyze up/down regulated seperately
330
+ next if maxrs <= 0
331
+
332
+ pmaxrs_pos = StatArray.new
333
+ perms.each_with_index do |psa,pidx|
334
+ prs = 0
335
+ prsa = [0]
336
+ pmaxrs = 0
337
+ psa.each do |i|
338
+ prs += score[i]-smean
339
+ prsa << prs
340
+ pmaxrs = prs if prs.abs > pmaxrs.abs
341
+ end
342
+ # the permuted scores are approx. symmetric around 0
343
+ pmaxrs_pos << pmaxrs.abs
344
+ plotfile.puts(([word+".rs."+pidx.to_s] + prsa).join(",")) if options[:plot_words]
345
+ end
346
+
347
+ pmean = pmaxrs_pos.mean
348
+ pstd = pmaxrs_pos.stddev
349
+
350
+ #Because the word zscore distr. can be quite different,
351
+ # we compute the deviation from the mean of the absolute dist.
352
+ # The permuted maxRS should be normally distr. (sum of random numbers)
353
+ pfdrz += pmaxrs_pos.map{|x| (x-pmean)/pstd}
354
+
355
+ #pvalue and fdr statistic for word is also computed based on abs. dist.
356
+ pval = (pmaxrs_pos.select{|x| x>=maxrs}.size+1.0)/(pmaxrs_pos.size+1)
357
+ zsc = (maxrs-pmean)/pstd
358
+
359
+ plotfile.close if options[:plot_words]
360
+ report << [wid,zsc,pval,nil,leading_edge]
361
+
362
+ end # wordsize
363
+ pbar.finish
364
+
365
+ ###
366
+ ### FDR
367
+ ###
368
+
369
+ puts "fdr calculation ..."
370
+ fdrrank = pfdrz.map{|x| [x,nil]} # [zscore,word_report_index]
371
+ report.each_with_index{|x,idx| fdrrank << [x[1],idx]}
372
+ fdrrank = fdrrank.sort_by{|x| x[0]}.reverse # sort high zscore to low zscore
373
+ nfp = pfdrz.size.to_f
374
+ ntp = report.size.to_f
375
+ word_fdrrank = Hash.new()
376
+ ifp = 0
377
+ itp = 0
378
+ fdrrank.each do |zsc,idx|
379
+ if idx.nil?
380
+ ifp += 1
381
+ else
382
+ itp += 1
383
+ fpr = ifp/nfp
384
+ tpr = itp/ntp
385
+ report[idx][3] = fpr/tpr
386
+ end
387
+ end
388
+
389
+ cutoff_fdr = [0.001,0.005,0.01,0.05,0.1,0.15,0.2,0.25,0.5]
390
+ puts ""
391
+ puts (["fdr <="] + cutoff_fdr.map{|x| x.to_s(3)} + ["total"]).join("\t")
392
+ puts (["count"] + cutoff_fdr.map{|x| report.select{|y| y[3] <= x}.size} + [report.size]).join("\t")
393
+
394
+ ###
395
+ ### Output summarization
396
+ ###
397
+
398
+ wids2 = wids.invert
399
+ report = report.sort_by{|x| x[1]}.reverse
400
+ puts "\nTop #{output_top} words"
401
+ puts ['rank','word','z-score','p-value','fdr','ledge','annotation'].map{|x| sprintf("%-10s",x)}.join('')
402
+ report[0,output_top].each_with_index do |r,i|
403
+ wd = wids2[r[0]]
404
+ s = [i+1,wd,r[1].to_s(2),r[2].to_e(2),r[3].to_e(2),r[4].to_s,word_annotation[wd]]
405
+ puts s.map{|x| sprintf("%-10s",x)}.join('')
406
+ end
407
+
408
+ if options[:report_words]
409
+ puts "......"
410
+ report.each_with_index do |r,i|
411
+ if options[:report_words].include?(r[0]) # and i > output_top
412
+ wd = wids2[r[0]]
413
+ s = [i+1,wd,r[1].to_s(2),r[2].to_e(2),r[3].to_e(2),r[4].to_s,word_annotation[wd]]
414
+ puts s.map{|x| sprintf("%-10s",x)}.join('')
415
+ end
416
+ end
417
+ end
418
+
419
+ if options[:dump]
420
+ fname = rankfilename + ".#{nm}." + options[:dump].to_s
421
+ of = File.new(fname,"w")
422
+ of.puts ['rank','word','z-score','p-value','fdr','ledge','GS size','annotation'].map{|x| sprintf("%-10s",x)}.join('')
423
+ puts "dumping top #{options[:dump]} words in file: #{fname}"
424
+ report[0..options[:dump]-1].each_with_index do |r,i|
425
+ wd = wids2[r[0]]
426
+ s = [i+1,wd,r[1].to_s(2),r[2].to_e(2),r[3].to_e(2),r[4].to_s,word_annotation[wd]]
427
+ of.puts s.map{|x| sprintf("%-10s",x)}.join('')
428
+ end
429
+ end
430
+
431
+ end
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/ruby
2
+
3
+ srcdir = File.dirname(__FILE__)
4
+ basedir = srcdir + "../"
5
+ libdir = basedir + 'lib/'
6
+ $LOAD_PATH << libdir
7
+
8
+ require 'wordRS-lib.rb'
9
+ require 'progressbar'
10
+ require 'optparse'
11
+ require 'fileutils'
12
+
13
+ tdir = basedir + '/tmp/'
14
+ FileUtils.mkdir_p tdir # create dir if it does not exist
15
+
16
+ ###
17
+ ### Main
18
+ ###
19
+
20
+ #default options
21
+ options = Hash.new
22
+ options[:wordsize] = [7]
23
+ options[:seqfile] = nil
24
+ options[:partitions] = 1
25
+ options[:stats] = ['p'] # p=p
26
+ options[:ruby]='jruby --fast -J-Xmx1024m'
27
+ options[:shuffles]=5000
28
+ options[:bg]=1 #mononucleotide shuffling
29
+
30
+ $coptions = OptionParser.new do |opts|
31
+ opts.on("-w", "--wordsize ARG", "wordsize") { |o| options[:wordsize] = o.split(",").map{|x| x.to_i}}
32
+ opts.on("-s", "--seqfile ARG", "sequence file") {|o| options[:seqfile] = o}
33
+ opts.on("-p", "--partitions ARG", "number of sequence partitions") {|o| options[:partitions] = o.to_i}
34
+ opts.on("-a", "--stats ARG", "sequence file") {|o| options[:stats] = o.split('')}
35
+ opts.on("-u", "--shuffle ARG", "number of shuffles") {|o| options[:shuffles] = o.to_i}
36
+ opts.on("--ruby ARG", "ruby interpreter") {|o| options[:ruby] = o}
37
+ opts.on("-b", "--bg ARG", "background nucleotide model") {|o| options[:bg] = o.to_i}
38
+ end
39
+
40
+ def show_help(msg="", code=0, io=STDOUT)
41
+ io.puts "#{msg}\n#{$coptions}"
42
+ exit(code)
43
+ end
44
+
45
+ $coptions.parse!(ARGV)
46
+ #mandatory parameters
47
+ [:seqfile].each{ |p| show_help("option '#{p}' mandatory") if options[p].nil?}
48
+
49
+ exit("seqfile must have fasta-format") if !options[:seqfile].match(/.fa$/)
50
+ dbname = File.basename(options[:seqfile],'.fa')
51
+ dbdir = basedir + "/db/" + dbname + "_bg#{options[:bg]}"
52
+ FileUtils.mkdir_p dbdir # create dir if it does not exist
53
+
54
+ n=options[:partitions]
55
+
56
+ # word id's
57
+ @seqs = IO.readlines(options[:seqfile],"\n>")
58
+ puts "#{@seqs.size} sequences"
59
+
60
+ puts "purging database ..."
61
+ options[:wordsize].each do |wordsize|
62
+ ['a','g','c','t'].rep_perm(wordsize) {|seqa| wf = "#{dbdir}/#{seqa.join('')}.rnk"; File.delete(wf) if File.exist?(wf)}
63
+ end
64
+
65
+ puts "starting #{n} processes ..."
66
+
67
+ cmd = "#{options[:ruby]} #{basedir}/scripts/wordsrus_mkdb.rb"
68
+ cmd += " -w #{options[:wordsize].join(',')} -s #{options[:seqfile]} -a #{options[:stats].join(",")} -u #{options[:shuffles]} --bg #{options[:bg]}"
69
+
70
+ stamp = Time.now.to_i
71
+
72
+ partsize = @seqs.size/n
73
+ cmds = []
74
+ (n-1).times do |i|
75
+ cmds << cmd + " -p #{(i)*(partsize)+1}-#{(i+1)*(partsize)} &> #{tdir}#{dbname}_b#{options[:bg]}_#{i+1}_#{stamp}.dbout"
76
+ end
77
+ cmds << cmd + " -p #{partsize*(n-1)+1}-#{[n*(partsize),@seqs.size].max} &> #{tdir}#{dbname}_b#{options[:bg]}_#{n}_#{stamp}.dbout"
78
+ cmds.each do |c|
79
+ p c
80
+ exec c if fork.nil?
81
+ end
82
+
83
+ puts "Jobs started."
84
+ puts "Monitor with : tail #{tdir}#{dbname}_*#{stamp}.dbout"
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 3
9
- version: 0.1.3
8
+ - 4
9
+ version: 0.1.4
10
10
  platform: jruby
11
11
  authors:
12
12
  - Anders Jacobsen
@@ -64,6 +64,8 @@ files:
64
64
  - lib/wordRS-lib.rb
65
65
  - resources/genemap.tsv
66
66
  - resources/word_annotation.tsv
67
+ - scripts/cwords.rb
68
+ - scripts/cwords_mkdb.rb
67
69
  - scripts/cluster_words.rb
68
70
  - scripts/complementary_words.rb
69
71
  has_rdoc: true