cwords 0.1.3-jruby → 0.1.4-jruby

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/scripts/cwords.rb +431 -0
  2. data/scripts/cwords_mkdb.rb +84 -0
  3. metadata +4 -2
data/scripts/cwords.rb ADDED
@@ -0,0 +1,431 @@
1
+ #!/usr/bin/env jruby
2
+
3
+ ### Requires jruby, www.jruby.org
4
+
5
+ ###
6
+ ### Running sum analysis for 5 different measures of word enrichment in a sequence:
7
+ ### obs : use the observed word count
8
+ ### bin : use presence/absence of word
9
+ ### pval : use the p-value of the expected occurrences being >= the observed occurence
10
+
11
+ srcdir = File.dirname(__FILE__)
12
+ basedir = srcdir + "/../"
13
+ libdir = basedir + '/lib/'
14
+ $LOAD_PATH << libdir
15
+
16
+ require 'wordRS-lib.rb'
17
+ require 'rubygems'
18
+ require 'progressbar'
19
+ require 'optparse'
20
+ require 'peach'
21
+ require 'java'
22
+ require libdir + 'ushuffle.jar'
23
+ java_import 'UShuffle'
24
+
25
+ #default options
26
+ options = Hash.new
27
+ options[:wordsize] = [7]
28
+ options[:split_words]=nil
29
+ options[:dbdir] = basedir + "db/"
30
+ options[:scoring_scheme] = 'pval'
31
+ options[:permutations]=50
32
+ options[:seqshuffles]=100
33
+ options[:rankfile]=nil
34
+ options[:seqfile]=nil
35
+ options[:report_words]=nil
36
+ options[:plot_words]=nil
37
+ options[:onlyanno]=nil
38
+ options[:dump]=nil
39
+ options[:testing]=nil
40
+ options[:rank_all]=nil
41
+ options[:rank_inverse]=nil
42
+ options[:rank_split_median]=nil
43
+ options[:rank_abs]=nil
44
+ options[:bg]=1 #mononucleotide shuffling
45
+ options[:threads]=1
46
+
47
+ $coptions = OptionParser.new do |opts|
48
+ opts.banner = "Usage: cwords [options]"
49
+
50
+ # analysis settings
51
+ opts.on("-c", "--scoring_scheme ARG", "scoring scheme") {|o| options[:scoring_scheme] = o}
52
+ opts.on("-p", "--permutations ARG", "number of list permutations") {|o| options[:permutations] = o.to_i}
53
+ opts.on("-q", "--shuffles ARG", "number of sequence shuffles for sequence bias correction") {|o| options[:seqshuffles] = o.to_i}
54
+ opts.on("-w", "--wordsize ARG", "wordsize") { |o| options[:wordsize] = o.split(",").map{|x| x.to_i}}
55
+ opts.on("-b", "--bg ARG", "background nucleotide model") {|o| options[:bg] = o.to_i}
56
+ opts.on("-t", "--threads ARG", "use multiple threads to parallelize computations") {|o| options[:threads] = o.to_i}
57
+ opts.on( "--split_words WORDS", "split sequence set based on occurrences of WORDS") {|o| options[:split_words] = o.split(",")}
58
+ opts.on( "--onlyanno", "only process annotated (i.e. mirbase) words") {|o| options[:onlyanno] = true}
59
+
60
+ # rank control
61
+ opts.on("-x", "--rank_all", "do not split positive and neg. values") {|o| options[:rank_all] = true}
62
+ opts.on("-m", "--rank_split_median", "split ranked list at median") {|o| options[:rank_split_median] = true}
63
+ opts.on("-i", "--rank_inverse", "inverse all ranked lists") {|o| options[:rank_inverse] = true}
64
+ opts.on("-a", "--rank_abs", "rank by absolute value") {|o| options[:rank_abs] = true}
65
+
66
+ # files and directories
67
+ opts.on("-r", "--rankfile ARG", "rank file") {|o| options[:rankfile] = o}
68
+ opts.on("-s", "--seqfile ARG", "sequence file") {|o| options[:seqfile] = o}
69
+ opts.on("-d", "--db ARG", "word database") { |o| options[:db] = o}
70
+
71
+ # output control
72
+ opts.on("-u", "--dump ARG", "dump top words") { |o| options[:dump] = o.to_i}
73
+ opts.on( "--report_words ARG", "report on words (comma separated)") {|o| options[:report_words] = o.split(',')}
74
+ opts.on( "--plot_words ARG", "only make plot files for words (comma separated)") {|o| options[:plot_words] = o.split(',')}
75
+ opts.on( "--testing", "testing mode") {|o| options[:testing] = true}
76
+ end
77
+
78
+ def show_help(msg="", code=0, io=STDOUT)
79
+ io.puts "#{msg}\n#{$coptions}"
80
+ exit(code)
81
+ end
82
+
83
+ $coptions.parse!(ARGV)
84
+ # mandatory parameters
85
+ [:rankfile].each{|p| show_help("option '#{p}' mandatory") if options[p].nil?}
86
+ show_help("db or seqfile required") if !(options[:db] or options[:seqfile])
87
+ show_help("scoring scheme must be one of: obs,bin,pval") if !(['obs','bin','pval'].include?(options[:scoring_scheme]))
88
+
89
+ testing = options[:testing]
90
+
91
+ # get filename without directory
92
+ rankfilename = File.basename(options[:rankfile])
93
+
94
+ # hard-coded
95
+ output_top = 10
96
+
97
+ prankdir = basedir + "/db/" + options[:db] + "/" if options[:db]
98
+ annofile = basedir + "/resources/" + "word_annotation.tsv" #annotation
99
+ tidfile = basedir + "/resources/" + "genemap.tsv"
100
+ seqshuffles = 5000 # currently hardcoded for database
101
+ sequences = nil
102
+ nwords = options[:wordsize].map{|x| 4**x}.to_statarray.sum
103
+ bg=options[:bg] # TODO, make option
104
+ threads=options[:threads]
105
+
106
+ ###
107
+ ### Main program
108
+ ###
109
+
110
+ puts ">> Parameters"
111
+ options.each{|k,v| puts sprintf("%-20s: %s",k,v) if !v.nil?}
112
+
113
+ # read in mirbase seed family
114
+ word_annotation = Hash.new("") # seq => family
115
+ IO.readlines(annofile).each{|l| word_annotation[l.split("\t")[0]] = l.split("\t")[1]}
116
+
117
+ # read optional sequences
118
+ if options[:seqfile]
119
+ puts ">> reading sequences ..."
120
+ sequences = Hash.new
121
+ IO.readlines(options[:seqfile],">")[1..-1].each do |entry|
122
+ ls = entry.split("\n").map{|x| x.chomp}
123
+ # hash ensures sequence ids unique
124
+ sequences[ls[0]] = ls[1..-2].join('').downcase.gsub('u','t') # last field is ">"
125
+ end
126
+ seqshuffles = options[:seqshuffles]
127
+ end
128
+
129
+ # initialize word id hash, word sequence => word id (0..nwords-1)
130
+ wids = Hash.new
131
+ i = 0
132
+ options[:wordsize].each{|ws| ['a','g','c','t'].rep_perm(ws) {|seqa| wids[seqa.join('')]=i ; i+=1 }}
133
+
134
+ ###
135
+ ### ID mapping
136
+ ###
137
+
138
+ # pre-computed word database:
139
+ # map ids given in rankfile to internal ids
140
+ # remove rankfile entries with no match to internal id
141
+ # sequence file:
142
+ # take intersection of rank and sequence IDs
143
+
144
+ puts ">> Mapping and filtering IDs ..."
145
+
146
+ all = []
147
+ begin
148
+ idmap = Hash.new
149
+ internal_ids = nil
150
+
151
+ if sequences
152
+ internal_ids = sequences
153
+ else
154
+ IO.readlines(tidfile).each do |l|
155
+ tid = l.split(" ")[0]
156
+ l.split(" ")[1].split(",").each{|extid| idmap[extid] = tid}
157
+ end
158
+ internal_ids = idmap.invert # allowed internal ids
159
+ end
160
+
161
+ allh = Hash.new {|h,k| h[k] = []}
162
+ filtered = 0
163
+
164
+ IO.readlines(options[:rankfile]).each do |l|
165
+ l = l.split("\t")
166
+ #test if internal id or mapable external id
167
+ tid = (internal_ids.key?(l[0]) ? l[0] : idmap[l[0]])
168
+ tid.nil? ? filtered += 1 : allh[tid] << l[1].to_f
169
+ end
170
+
171
+ # filter unknown sequences
172
+ sequences.keys.each{|id| sequences.delete(id) if !allh.key?(id)} if sequences
173
+
174
+ # we currently mean-collapse ids, we could allow mean/min/max collapsing ...
175
+ all = allh.to_a.map{|tid,values| [tid,values.to_statarray.mean]}
176
+
177
+ puts "removed #{filtered} invalid transcript ids" if filtered > 0
178
+ end
179
+
180
+ allorder = Hash.new # tid => index in all
181
+ all.each_with_index{|x,i| allorder[x[0]] = i}
182
+
183
+ ###
184
+ ### Word enumeration (optional)
185
+ ###
186
+
187
+ wordscores = []
188
+ if sequences
189
+ puts ">> Enumerating words in sequences"
190
+ wordscores = Array.new(all.size) {Array.new(wids.size,0)} # {Java::short[wids.size].new}
191
+ pbar = ProgressBar.new("progress",sequences.size)
192
+ all.peach(threads) do |seqid,val|
193
+ us = UShuffle.new
194
+ seq=sequences[seqid]
195
+ seqidx=allorder[seqid]
196
+ pbar.inc
197
+ seqsize = seq.size
198
+ observed = Array.new(wids.size,0)
199
+ options[:wordsize].each{|ws| (0..seqsize-ws).each{|i| wid = wids[seq[i, ws]]; observed[wid] += 1 if not wid.nil?}}
200
+
201
+ case options[:scoring_scheme]
202
+ when "bin" then wordscores[seqidx] = observed.map{|x| x > 0 ? 1 : -1}
203
+ when "obs" then wordscores[seqidx] = observed
204
+ else
205
+ # pval, compute distribution of expected word occurrences
206
+ us.init_shuffle(seq,bg)
207
+ seqshuffles.times do |si|
208
+ seqsh = us.shuffle
209
+ expected = Array.new(wids.size,0)
210
+ options[:wordsize].each{|ws| (0..seqsize-ws).each{|i| wid = wids[seqsh[i, ws]]; expected[wid] += 1 if !wid.nil?}}
211
+ observed.each_with_index{|x,widx| wordscores[seqidx][widx] =+ 1 if expected[widx]>=x}
212
+ end
213
+ end
214
+ end
215
+ pbar.finish
216
+ end
217
+
218
+ ###
219
+ ### Generate list ranking
220
+ ###
221
+
222
+ analyze = []
223
+ if options[:rank_split_median]
224
+ # we should perhaps use an :inverse option,
225
+ # reversing the two pos and neg lists
226
+ med = all.map{|x| x[1]}.to_statarray.median
227
+ pos_set = all.select{|x| x[1] > med}.sort{|a,b| b[1] <=> a[1]}
228
+ neg_set = all.select{|x| x[1] <= med}.sort{|a,b| a[1] <=> b[1]}
229
+ analyze = [[pos_set,'med_positive'],[neg_set,'med_negative']]
230
+ elsif options[:rank_all] # do not split positive and negative range
231
+ pos_set = all.sort{|a,b| b[1] <=> a[1]}
232
+ neg_set = all.sort{|a,b| a[1] <=> b[1]}
233
+ analyze = [[pos_set,'all_positive'],[neg_set,'all_negative']]
234
+ elsif options[:rank_abs] # rank by absolute values
235
+ pos_set = all.map{|x| [x[0],x[1].abs]}.sort{|a,b| b[1] <=> a[1]}
236
+ neg_set = pos_set.reverse
237
+ analyze = [[pos_set,'abs_positive'],[neg_set,'abs_negative']]
238
+ else
239
+ pos_set = all.select{|x| x[1] > 0}.sort{|a,b| b[1] <=> a[1]}
240
+ neg_set = all.select{|x| x[1] < 0}.sort{|a,b| a[1] <=> b[1]}
241
+ analyze = [[pos_set,'positive'],[neg_set,'negative']]
242
+ end
243
+
244
+ # inverse lists
245
+ analyze.map!{|set,nm| [set.reverse,nm+".inv"]} if options[:rank_inverse]
246
+
247
+ # split sequence set when --split option is given
248
+ if options[:split_words]
249
+ seqs_with_words = Hash.new
250
+
251
+ options[:split_words].each do |split_word|
252
+ begin
253
+ IO.readlines(prankdir + split_word.downcase + ".rnk").each do |x|
254
+ l = x.split("\t")
255
+ seqs_with_words[l[0]] = 1 if l[1].to_i > 0
256
+ end
257
+ rescue
258
+ warn "could not split sequences on word #{split_word}: " + $!
259
+ end
260
+ end
261
+
262
+ analyze_split = []
263
+ analyze.each do |set,nm|
264
+ analyze_split += set.partition{|x| seqs_with_words.key?(x[0])}.zip([nm+".split+"+options[:split_words].join(","),nm+".split-"+options[:split_words].join(",")])
265
+ end
266
+ analyze = analyze_split
267
+ end
268
+
269
+ ###
270
+ ### Correlation analysis
271
+ ###
272
+
273
+ puts ">> Analyzing sequence sets: " + analyze.map{|x| x[1]}.join(", ")
274
+
275
+ analyze.each do |set,nm|
276
+ ngenes = set.size
277
+ puts "\n>> Analyzing #{nm} set ...\nnumber of genes: #{ngenes}"
278
+ next if ngenes == 0
279
+ perms = []
280
+ report = []
281
+ pfdrz = []
282
+
283
+ franks = Hash.new # tid => index in set
284
+ set.each_with_index{|x,i| franks[x[0]] = i}
285
+
286
+ puts "permuting #{options[:permutations]} times ...\n"
287
+ options[:permutations].times{|i| perms << (0..set.size-1).to_a.shuffle}
288
+
289
+ pbar = ProgressBar.new("progress",nwords)
290
+ wids.to_a.sort_by{|x| x[1]}.peach(threads) do |word,wid|
291
+ pbar.inc
292
+ next if options[:onlyanno] and not word_annotation.key?(word) #only process annotated words
293
+ next if options[:plot_words] and !options[:plot_words].include?(word)
294
+
295
+ plotfile = File.new(rankfilename + ".#{word}.#{nm}.csv","w") if options[:plot_words]
296
+
297
+ score = Array.new(ngenes) # scores ordered by fold change
298
+
299
+ if sequences
300
+ score = set.map{|x| wordscores[allorder[x[0]]][wid]}
301
+ score.map!{|x| -Math.log((x+1.0)/(seqshuffles+1))} if options[:scoring_scheme] == 'pval'
302
+ else # use precomputed word database
303
+ wordcounts = IO.readlines(prankdir + word + ".rnk").map{|x| x.split("\t")}.select{|x| franks.key?(x[0])}
304
+ case options[:scoring_scheme]
305
+ when "bin" then wordcounts.each{|id,obs,gte_obs,exp| score[franks[id]] = obs.to_i == 0 ? -1 : 1}
306
+ when "obs" then wordcounts.each{|id,obs,gte_obs,exp| score[franks[id]] = obs.to_f}
307
+ when "pval" then wordcounts.each{|id,obs,gte_obs,exp| score[franks[id]] = -Math.log((gte_obs.to_f+1)/(seqshuffles+1.0))}
308
+ end
309
+ end
310
+
311
+ smean = score.to_statarray.mean
312
+ maxrs = 0
313
+ leading_edge = 0
314
+ rs = 0 #running sum
315
+ rsa = [0]
316
+ score.each_with_index do |x,i|
317
+ rs += (x-smean)
318
+ rsa << rs
319
+ if rs.abs > maxrs.abs
320
+ maxrs = rs
321
+ leading_edge = i+1
322
+ end
323
+ end
324
+
325
+ plotfile.puts(([word+".score"] + [0] + score.map{|x| x.to_e(2)}).join(",")) if options[:plot_words]
326
+ plotfile.puts(([word+".rs"] + rsa).join(",")) if options[:plot_words]
327
+
328
+ # we are only interested in pos. maxrs scores,
329
+ # because we currently analyze up/down regulated seperately
330
+ next if maxrs <= 0
331
+
332
+ pmaxrs_pos = StatArray.new
333
+ perms.each_with_index do |psa,pidx|
334
+ prs = 0
335
+ prsa = [0]
336
+ pmaxrs = 0
337
+ psa.each do |i|
338
+ prs += score[i]-smean
339
+ prsa << prs
340
+ pmaxrs = prs if prs.abs > pmaxrs.abs
341
+ end
342
+ # the permuted scores are approx. symmetric around 0
343
+ pmaxrs_pos << pmaxrs.abs
344
+ plotfile.puts(([word+".rs."+pidx.to_s] + prsa).join(",")) if options[:plot_words]
345
+ end
346
+
347
+ pmean = pmaxrs_pos.mean
348
+ pstd = pmaxrs_pos.stddev
349
+
350
+ #Because the word zscore distr. can be quite different,
351
+ # we compute the deviation from the mean of the absolute dist.
352
+ # The permuted maxRS should be normally distr. (sum of random numbers)
353
+ pfdrz += pmaxrs_pos.map{|x| (x-pmean)/pstd}
354
+
355
+ #pvalue and fdr statistic for word is also computed based on abs. dist.
356
+ pval = (pmaxrs_pos.select{|x| x>=maxrs}.size+1.0)/(pmaxrs_pos.size+1)
357
+ zsc = (maxrs-pmean)/pstd
358
+
359
+ plotfile.close if options[:plot_words]
360
+ report << [wid,zsc,pval,nil,leading_edge]
361
+
362
+ end # wordsize
363
+ pbar.finish
364
+
365
+ ###
366
+ ### FDR
367
+ ###
368
+
369
+ puts "fdr calculation ..."
370
+ fdrrank = pfdrz.map{|x| [x,nil]} # [zscore,word_report_index]
371
+ report.each_with_index{|x,idx| fdrrank << [x[1],idx]}
372
+ fdrrank = fdrrank.sort_by{|x| x[0]}.reverse # sort high zscore to low zscore
373
+ nfp = pfdrz.size.to_f
374
+ ntp = report.size.to_f
375
+ word_fdrrank = Hash.new()
376
+ ifp = 0
377
+ itp = 0
378
+ fdrrank.each do |zsc,idx|
379
+ if idx.nil?
380
+ ifp += 1
381
+ else
382
+ itp += 1
383
+ fpr = ifp/nfp
384
+ tpr = itp/ntp
385
+ report[idx][3] = fpr/tpr
386
+ end
387
+ end
388
+
389
+ cutoff_fdr = [0.001,0.005,0.01,0.05,0.1,0.15,0.2,0.25,0.5]
390
+ puts ""
391
+ puts (["fdr <="] + cutoff_fdr.map{|x| x.to_s(3)} + ["total"]).join("\t")
392
+ puts (["count"] + cutoff_fdr.map{|x| report.select{|y| y[3] <= x}.size} + [report.size]).join("\t")
393
+
394
+ ###
395
+ ### Output summarization
396
+ ###
397
+
398
+ wids2 = wids.invert
399
+ report = report.sort_by{|x| x[1]}.reverse
400
+ puts "\nTop #{output_top} words"
401
+ puts ['rank','word','z-score','p-value','fdr','ledge','annotation'].map{|x| sprintf("%-10s",x)}.join('')
402
+ report[0,output_top].each_with_index do |r,i|
403
+ wd = wids2[r[0]]
404
+ s = [i+1,wd,r[1].to_s(2),r[2].to_e(2),r[3].to_e(2),r[4].to_s,word_annotation[wd]]
405
+ puts s.map{|x| sprintf("%-10s",x)}.join('')
406
+ end
407
+
408
+ if options[:report_words]
409
+ puts "......"
410
+ report.each_with_index do |r,i|
411
+ if options[:report_words].include?(r[0]) # and i > output_top
412
+ wd = wids2[r[0]]
413
+ s = [i+1,wd,r[1].to_s(2),r[2].to_e(2),r[3].to_e(2),r[4].to_s,word_annotation[wd]]
414
+ puts s.map{|x| sprintf("%-10s",x)}.join('')
415
+ end
416
+ end
417
+ end
418
+
419
+ if options[:dump]
420
+ fname = rankfilename + ".#{nm}." + options[:dump].to_s
421
+ of = File.new(fname,"w")
422
+ of.puts ['rank','word','z-score','p-value','fdr','ledge','GS size','annotation'].map{|x| sprintf("%-10s",x)}.join('')
423
+ puts "dumping top #{options[:dump]} words in file: #{fname}"
424
+ report[0..options[:dump]-1].each_with_index do |r,i|
425
+ wd = wids2[r[0]]
426
+ s = [i+1,wd,r[1].to_s(2),r[2].to_e(2),r[3].to_e(2),r[4].to_s,word_annotation[wd]]
427
+ of.puts s.map{|x| sprintf("%-10s",x)}.join('')
428
+ end
429
+ end
430
+
431
+ end
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/ruby
2
+
3
+ srcdir = File.dirname(__FILE__)
4
+ basedir = srcdir + "../"
5
+ libdir = basedir + 'lib/'
6
+ $LOAD_PATH << libdir
7
+
8
+ require 'wordRS-lib.rb'
9
+ require 'progressbar'
10
+ require 'optparse'
11
+ require 'fileutils'
12
+
13
+ tdir = basedir + '/tmp/'
14
+ FileUtils.mkdir_p tdir # create dir if it does not exist
15
+
16
+ ###
17
+ ### Main
18
+ ###
19
+
20
+ #default options
21
+ options = Hash.new
22
+ options[:wordsize] = [7]
23
+ options[:seqfile] = nil
24
+ options[:partitions] = 1
25
+ options[:stats] = ['p'] # p=p
26
+ options[:ruby]='jruby --fast -J-Xmx1024m'
27
+ options[:shuffles]=5000
28
+ options[:bg]=1 #mononucleotide shuffling
29
+
30
+ $coptions = OptionParser.new do |opts|
31
+ opts.on("-w", "--wordsize ARG", "wordsize") { |o| options[:wordsize] = o.split(",").map{|x| x.to_i}}
32
+ opts.on("-s", "--seqfile ARG", "sequence file") {|o| options[:seqfile] = o}
33
+ opts.on("-p", "--partitions ARG", "number of sequence partitions") {|o| options[:partitions] = o.to_i}
34
+ opts.on("-a", "--stats ARG", "sequence file") {|o| options[:stats] = o.split('')}
35
+ opts.on("-u", "--shuffle ARG", "number of shuffles") {|o| options[:shuffles] = o.to_i}
36
+ opts.on("--ruby ARG", "ruby interpreter") {|o| options[:ruby] = o}
37
+ opts.on("-b", "--bg ARG", "background nucleotide model") {|o| options[:bg] = o.to_i}
38
+ end
39
+
40
+ def show_help(msg="", code=0, io=STDOUT)
41
+ io.puts "#{msg}\n#{$coptions}"
42
+ exit(code)
43
+ end
44
+
45
+ $coptions.parse!(ARGV)
46
+ #mandatory parameters
47
+ [:seqfile].each{ |p| show_help("option '#{p}' mandatory") if options[p].nil?}
48
+
49
+ exit("seqfile must have fasta-format") if !options[:seqfile].match(/.fa$/)
50
+ dbname = File.basename(options[:seqfile],'.fa')
51
+ dbdir = basedir + "/db/" + dbname + "_bg#{options[:bg]}"
52
+ FileUtils.mkdir_p dbdir # create dir if it does not exist
53
+
54
+ n=options[:partitions]
55
+
56
+ # word id's
57
+ @seqs = IO.readlines(options[:seqfile],"\n>")
58
+ puts "#{@seqs.size} sequences"
59
+
60
+ puts "purging database ..."
61
+ options[:wordsize].each do |wordsize|
62
+ ['a','g','c','t'].rep_perm(wordsize) {|seqa| wf = "#{dbdir}/#{seqa.join('')}.rnk"; File.delete(wf) if File.exist?(wf)}
63
+ end
64
+
65
+ puts "starting #{n} processes ..."
66
+
67
+ cmd = "#{options[:ruby]} #{basedir}/scripts/wordsrus_mkdb.rb"
68
+ cmd += " -w #{options[:wordsize].join(',')} -s #{options[:seqfile]} -a #{options[:stats].join(",")} -u #{options[:shuffles]} --bg #{options[:bg]}"
69
+
70
+ stamp = Time.now.to_i
71
+
72
+ partsize = @seqs.size/n
73
+ cmds = []
74
+ (n-1).times do |i|
75
+ cmds << cmd + " -p #{(i)*(partsize)+1}-#{(i+1)*(partsize)} &> #{tdir}#{dbname}_b#{options[:bg]}_#{i+1}_#{stamp}.dbout"
76
+ end
77
+ cmds << cmd + " -p #{partsize*(n-1)+1}-#{[n*(partsize),@seqs.size].max} &> #{tdir}#{dbname}_b#{options[:bg]}_#{n}_#{stamp}.dbout"
78
+ cmds.each do |c|
79
+ p c
80
+ exec c if fork.nil?
81
+ end
82
+
83
+ puts "Jobs started."
84
+ puts "Monitor with : tail #{tdir}#{dbname}_*#{stamp}.dbout"
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 3
9
- version: 0.1.3
8
+ - 4
9
+ version: 0.1.4
10
10
  platform: jruby
11
11
  authors:
12
12
  - Anders Jacobsen
@@ -64,6 +64,8 @@ files:
64
64
  - lib/wordRS-lib.rb
65
65
  - resources/genemap.tsv
66
66
  - resources/word_annotation.tsv
67
+ - scripts/cwords.rb
68
+ - scripts/cwords_mkdb.rb
67
69
  - scripts/cluster_words.rb
68
70
  - scripts/complementary_words.rb
69
71
  has_rdoc: true