immunoscore_results_aggregator 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/analyzer.rb ADDED
@@ -0,0 +1,664 @@
1
+ require 'bundler/setup'
2
+
3
+ require "mongo_mapper"
4
+ require "csv"
5
+ require "gibberish"
6
+ require 'bicrypt'
7
+ require "chronic"
8
+
9
+
10
+
11
+ require_relative "database_connection"
12
+
13
+
14
+
15
+ MongoMapper.database = DATABASE_NAME
16
+
17
+
18
+ def prompt(*args)
19
+ print(*args)
20
+ gets.strip
21
+ end
22
+
23
+
24
+
25
+
26
+ ### Encryption and decryption
27
+ # monkeypatching String
28
+ class String
29
+ ### cypher p p
30
+ def self.set_encryption
31
+ cypher=prompt "enter encryption cypher: "
32
+ $e = BiCrypt.new(cypher)
33
+ end
34
+ def decrypt
35
+ if $e==nil then String.set_encryption end
36
+ $e.decrypt_string Base64.decode64 self.encode('ascii-8bit')
37
+ end
38
+ def encrypt
39
+ if $e==nil then String.set_encryption end
40
+ Base64.encode64($e.encrypt_string(self)).encode('utf-8')
41
+ end
42
+ # some normalization of SS entries necessary
43
+ def md5
44
+ Gibberish::MD5 (self.gsub "-","").strip()
45
+ end
46
+ end
47
+
48
+ ### creates a mongomapper class
49
+ def make_mongo_class class_name
50
+ self.instance_variable_set "@#{class_name}", Class.new
51
+ c=self.instance_variable_get "@#{class_name}"
52
+ c.class_eval do
53
+ include MongoMapper::Document
54
+ end
55
+ c
56
+ end
57
+
58
+ ###Utility for extrapolating MongoType
59
+ # need dates...
60
+ class String
61
+ def correct
62
+ case self
63
+ when "String"
64
+ "String"
65
+ when "Fixnum"
66
+ "Integer"
67
+ end
68
+ end
69
+ end
70
+
71
+ ##Array of rows to excel file
72
+ class Array
73
+ def to_table
74
+ t=Tempfile.new("foo")
75
+ self.each do |row|
76
+ t.write row.to_csv
77
+ end
78
+ t.close
79
+ puts t.path
80
+ new_table=CSV.table t.path
81
+ `rm #{t.path}`
82
+ new_table
83
+ end
84
+ end
85
+
86
+ ### Splits name in array components
87
+ # always returns array
88
+ def name_split text_string
89
+ if text_string.index " " or text_string.index ","
90
+ return (text_string.split(" ").split(",") ).flatten
91
+ else
92
+ return [text_string].flatten
93
+ end
94
+
95
+ end
96
+
97
+
98
+ ### Removes names from surg path text
99
+ #
100
+ def names_cleaner text, names
101
+
102
+ names.map!{|z| split_if_space z}.flatten! if names.class==Array
103
+ names=name_split names if names.class==String
104
+ names.each do |n|
105
+
106
+ r=Regexp.new(n, Regexp::IGNORECASE)
107
+ text.gsub! r,""
108
+ puts "cleaned #{n}"
109
+ end
110
+ text
111
+ end
112
+
113
+
114
+ class CSV::Row
115
+ def pp
116
+ self.to_hash
117
+ end
118
+
119
+ end
120
+
121
+
122
+
123
+
124
+ ### A modified CSV table class
125
+ # Can Decrypt and encrypt
126
+ class CSV::Table
127
+ attr_accessor :file_path, :data_classifier
128
+
129
+ def encrypt col_name
130
+ if $e==nil then String.set_encryption end
131
+ self.each do |row|
132
+ row[col_name]=row[col_name].to_s.encrypt
133
+ end
134
+ end
135
+
136
+ def encrypt_col_names col_names_array
137
+ col_names_array.each do |col_name|
138
+ self.encrypt col_name
139
+ end
140
+ end
141
+
142
+ def decrypt col_name
143
+ if $e==nil then String.set_encryption end
144
+ self.each do |row|
145
+ row[col_name]=row[col_name].to_s.decrypt
146
+ end
147
+ end
148
+
149
+ def clean_names names_col, diagnosis_col
150
+ self.each do |row|
151
+ row[diagnosis_col]=names_cleaner row[diagnosis_col],row[names_col]
152
+ end
153
+ end
154
+
155
+ def md5 col_name
156
+ self.each do |row|
157
+ row[col_name]=row[col_name].to_s.md5
158
+ end
159
+ end
160
+
161
+ def save file_name
162
+ CSV.open(file_name, "wb") do |csv|
163
+ csv << self.headers
164
+ self.each do |line|
165
+ csv << line.fields
166
+ end
167
+ end
168
+ end
169
+
170
+ #z.find_rows :diagnosis_text, /.(T\d)./i
171
+ def find_rows col_name, regex, decrypt=false
172
+ new_table=[]<<self.headers
173
+ if decrypt then self.decrypt col_name end
174
+ self[col_name].each_with_index do |r,i|
175
+ #puts r
176
+ if r.match regex
177
+ puts i
178
+ new_table<<self[i]
179
+ end
180
+ end
181
+ new_table.to_table
182
+ end
183
+
184
+ def to_mongo mongo_class
185
+ self.each_with_index do |row,i|
186
+ m=mongo_class.new
187
+ self.headers.each do |header|
188
+ puts "working on #{header} in row #{i}"
189
+ m[header]=row[header]
190
+ end
191
+ m.save
192
+ puts "#{i}: #{mongo_class.count}"
193
+ end
194
+ end
195
+
196
+ def mongo_code
197
+ self.data_classifier.print_class
198
+ end
199
+ alias :print_mongo :mongo_code
200
+ end
201
+
202
+ ###Check if a file is an idiotic ; csv
203
+ # by counting ; and , in header line
204
+ def is_semicolon? file_path
205
+ csv=CSV.read(file_path)
206
+ if (csv and csv.count !=0)
207
+ puts file_path
208
+ header=csv[0][0]
209
+ if header==nil then return false end
210
+ if (header and header.count(";"))>header.count(",")
211
+ return true
212
+ else
213
+ return false
214
+ end
215
+ end
216
+ end
217
+
218
+ ###Check if a tab file
219
+ # by counting \t and , in header line
220
+ def is_tab? file_path
221
+ csv=CSV.read(file_path)
222
+ if (csv and csv.count !=0)
223
+ puts file_path
224
+ header=csv[0][0]
225
+ if header==nil then return false end
226
+ if (header and header.count("\t"))>header.count(",")
227
+ return true
228
+ else
229
+ return false
230
+ end
231
+ end
232
+ end
233
+
234
+
235
+ ### Remove semicolons
236
+ # ; => ,
237
+ def remove_semicolon file_path
238
+ puts "removing semicolons in #{file_path}"
239
+ c=CSV.table file_path, :col_sep=> ";"
240
+ fh=File.new file_path, "w"
241
+ fh.write c.to_csv
242
+ fh.close
243
+ return file_path
244
+ end
245
+
246
+ ### Remove tabs
247
+ # ; => ,
248
+ def remove_tabs file_path
249
+ puts "removing tabs in #{file_path}"
250
+ c=CSV.table file_path, :col_sep=> "\t"
251
+ fh=File.new file_path, "w"
252
+ fh.write c.to_csv
253
+ fh.close
254
+ ext=File.extname file_path
255
+ if ext==".xls"
256
+ `cp #{file_path} #{file_path.gsub ext,".csv"}`
257
+ return file_path.gsub ext,".csv"
258
+ else
259
+ return file_path
260
+ end
261
+ end
262
+
263
+
264
+ ### A modified CSV table class
265
+ # Can Decrypt and encrypt
266
+ class CSV::Table
267
+ attr_accessor :file_path, :data_classifier
268
+
269
+ #remove duplicates
270
+ #
271
+ #takes col to search through
272
+ def remove_duplicate col
273
+ col_all=self[col]
274
+ self.each do |row|
275
+ entry=row[col]
276
+ if col_all.count(entry) >= 2
277
+ # deletes entries from index and from table
278
+ self.delete col_all.rindex(entry)
279
+ col_all.delete_at col_all.rindex(entry)
280
+ end
281
+ end
282
+ end
283
+ end
284
+
285
+
286
+
287
+ def make_class_name file_path
288
+ File.basename((file_path).gsub(".","_").gsub("@","").gsub("%","").gsub("-","")).camelize
289
+
290
+ end
291
+
292
+
293
+ module StringToMongo
294
+
295
+ def self.nil? text_string
296
+ if text_string==nil
297
+ true
298
+ else
299
+ false
300
+ end
301
+ end
302
+ def self.integer? text_string
303
+ if text_string.match /^\d*$/
304
+ true
305
+ else
306
+ false
307
+ end
308
+ end
309
+
310
+ def self.float? text_string
311
+ if text_string.match /^\d*\.\d*$/
312
+ true
313
+ else
314
+ false
315
+ end
316
+ end
317
+
318
+ def self.date? text_string
319
+ if Chronic.parse(text_string) != nil
320
+ true
321
+ else
322
+ false
323
+ end
324
+ end
325
+
326
+ def self.mongo_type text_string
327
+ case
328
+ when nil?(text_string)
329
+ "String"
330
+ when integer?(text_string)
331
+ "Integer"
332
+ when float?(text_string)
333
+ "Float"
334
+ when date?(text_string)
335
+ "Time"
336
+ else
337
+ "String"
338
+
339
+ end
340
+ end
341
+ end
342
+
343
+ module ColName
344
+ def self.fix_spaces col_name
345
+ col_name.gsub(".","_").gsub(" ","_")
346
+ end
347
+
348
+ def self.fix_numbers col_name
349
+ matches=(col_name.scan /(\d)/)
350
+ if matches==[]
351
+ puts "no match"
352
+ return col_name
353
+ else
354
+ n=matches[0]
355
+ puts "captures #{n} #{n.class}"
356
+ case n
357
+ when "1"
358
+ puts n
359
+ col_name.gsub!(n,"one")
360
+ when "2"
361
+ puts n
362
+ col_name.gsub!(n,"two")
363
+ when "3"
364
+ puts n
365
+ col_name.gsub!(n,"three")
366
+ when "4"
367
+ puts n
368
+ col_name.gsub!(n,"four")
369
+ when "5"
370
+ puts n
371
+ col_name.gsub!(n,"five")
372
+ when "6"
373
+ puts n
374
+ col_name.gsub!(n,"six")
375
+ when "7"
376
+ puts n
377
+ col_name.gsub!(n,"seven")
378
+ when "8"
379
+ puts n
380
+ col_name.gsub!(n,"eight")
381
+ when "9"
382
+ puts n
383
+ col_name.gsub!(n,"nine")
384
+ end
385
+ end
386
+ col_name
387
+ end
388
+ end
389
+
390
+ ###Creates a Mongo Class mapping csv file
391
+ #
392
+ #d=DataClassifier.new "/Users/carlobifulco/Dropbox/code/next_gen/hotspot2.csv"
393
+ #d.print_class
394
+ class DataClassifier
395
+ attr_accessor :headers, :fs_line,:keys_types,:file_name,:template
396
+ def initialize file_name="test.csv"
397
+
398
+ @header_zip=self.cheap_headers file_name
399
+
400
+ @file_name=file_name
401
+ @class_name=make_class_name file_name
402
+ #self.instance_variable_set "@#{@class_name}",make_mongo_class(@class_name)
403
+
404
+ @template="""
405
+ class #{@class_name}
406
+ include MongoMapper::Document
407
+ include DataUtilities
408
+ safe
409
+ timestamps!
410
+ """
411
+ end
412
+
413
+
414
+
415
+ def cheap_headers file_path
416
+ index=0
417
+ container=[]
418
+ CSV.foreach(file_path) do |row|
419
+ container<<row
420
+ #puts row
421
+ index+=1
422
+ if index>2 then break end
423
+ end
424
+ container[0]=container[0].map{|x| x.gsub(".","_").downcase}
425
+ zipped=container[0].zip container[1]
426
+ zipped.select {|x| x[0]!="" and x[0]!=nil}
427
+ end
428
+
429
+
430
+ def mongo_types
431
+ @header_zip.each_with_object({}) do |hz,container|
432
+ key=hz[0].gsub(" ","_")
433
+ container[hz[0].to_sym]=StringToMongo.mongo_type hz[1]
434
+ end
435
+ end
436
+
437
+ #prints the class
438
+ def print_class
439
+ puts template
440
+ self.mongo_types.sort_by{|k,v| k}.each do |r|
441
+ puts " key :#{r[0]}, #{r[1]} "
442
+ end
443
+ puts "end"
444
+ end
445
+
446
+ end
447
+
448
+
449
+
450
+
451
+ ### remove empty columns from CSV files
452
+ # also deals with some windows encoding issues if needed
453
+ def remove_nil_headers file_path
454
+ begin
455
+ c=CSV.read(file_path,:headers => true)
456
+ rescue
457
+ c=CSV.read(@file_name,:headers => true, :encoding => 'windows-1251:utf-8')
458
+ end
459
+ if c.headers.include? nil
460
+ c.by_col!
461
+ while c.headers.index(nil) != nil
462
+ c.delete(c.headers.index(nil))
463
+ end
464
+ fh=File.new file_path, "w"
465
+ fh.write c.to_csv
466
+ fh.close
467
+ end
468
+ end
469
+
470
+
471
+ ### Factory for new tables
472
+ # takes care of definiens and also stores file paths
473
+ # also removes nil headers\
474
+ #also deals with tab formatted files
475
+ #
476
+ def load_table file_path
477
+ if is_semicolon?(file_path)
478
+ file_path=remove_semicolon(file_path)
479
+ elsif is_tab?(file_path)
480
+ file_path=remove_tabs(file_path)
481
+ end
482
+ remove_nil_headers file_path
483
+ c=CSV.table file_path
484
+ c.file_path=file_path
485
+ begin
486
+ c.data_classifier=DataClassifier.new file_path
487
+ rescue
488
+ c.data_classifier=false
489
+ end
490
+ c
491
+ end
492
+
493
+ ### MongoLoader
494
+ # takes a class and a file csv file and then load it into Mongo
495
+ # def mongo_loader mongo_class, file_path
496
+ # counter=0
497
+ # CSV.foreach(file_path) do |row|
498
+ # #puts row
499
+ # puts counter
500
+ # if counter==0
501
+ # @headers=row
502
+ # counter+=1
503
+ # next
504
+ # end
505
+ # puts counter
506
+ # m=mongo_class.new
507
+ # row.each_with_index do |e,i|
508
+
509
+ # #puts "HEADERS: #{@headers}"
510
+ # #puts @headers[i]
511
+ # m[@headers[i].gsub(".","_").downcase]=e
512
+ # m.save
513
+
514
+
515
+ # end
516
+ # puts counter
517
+ # counter+=1
518
+ # end
519
+ # ""
520
+ # end
521
+
522
+
523
+
524
+ ### Load CSV file into mongo class
525
+ # Mongo class needs to exist
526
+ def csv_to_mongo file_name="test.csv",mongo_class=TestCsv
527
+ t=CSV.table file_name
528
+ t.each_with_index do |row,i|
529
+ m=mongo_class.new
530
+ t.headers.each do |header|
531
+ m[header]=row[header]
532
+ end
533
+ m.save
534
+ puts "#{i}: #{mongo_class.count}"
535
+ end
536
+ end
537
+
538
+
539
+
540
+ ### Convinience for Mongomapper classes
541
+ #
542
+ # export to csv
543
+ #
544
+ # pretty printing of keys
545
+ module DataUtilities
546
+
547
+ ### Export csv
548
+ #
549
+ # file_path - the file to be exported to
550
+ def export file_path
551
+ CSV.open(file_path, "wb") do |csv|
552
+ headers=self.class.keys.keys.sort
553
+ puts headers
554
+ csv << headers
555
+ self.class.all.each do |c|
556
+ line=[]
557
+ headers.each do |h|
558
+ line<<(c[h]).to_s
559
+ end
560
+ csv << line
561
+ end
562
+ puts csv
563
+ end
564
+ end
565
+
566
+
567
+ def pp
568
+ self.keys.keys.sort.each do |k|
569
+ puts "#{k}: #{self[k]}"
570
+ end
571
+ nil
572
+ end
573
+
574
+ def pp_to_s
575
+ text=[]
576
+ self.keys.keys.sort.each do |k|
577
+ text<< " #{k}: #{self[k]};"
578
+ end
579
+ text.join ""
580
+ end
581
+
582
+ end
583
+
584
+
585
+ module ClassDataUtilities
586
+ def export file_path
587
+ CSV.open(file_path, "wb") do |csv|
588
+ headers=self.keys.keys.sort
589
+ puts headers
590
+ csv << headers
591
+ self.all.each do |c|
592
+ line=[]
593
+ headers.each do |h|
594
+ line<<(c[h]).to_s
595
+ end
596
+ csv << line
597
+ end
598
+ puts csv
599
+ end
600
+ end
601
+ end
602
+
603
+ ### Utility function to create mongomapper keys
604
+ #
605
+ # takes a file_path of teh csv file
606
+ #
607
+ # prints keys in mongomapper format
608
+ def csv_headers_to_keys file_path
609
+ CSV.table(file_path).headers.sort!.each do |x|
610
+ puts "key :#{x}, String"
611
+ end
612
+ end
613
+
614
+
615
+
616
+ ###Utility for exporting a mongomapper search
617
+ #
618
+ # takes a file name where all will be saved to in a csv format
619
+ #
620
+ #saves csv file
621
+ class Array
622
+ def mongo_to_csv file_path
623
+ CSV.open(file_path, "wb") do |csv|
624
+ headers=self[0].class.keys.keys.sort
625
+ puts headers
626
+ csv << headers
627
+ self.each do |c|
628
+ line=[]
629
+ headers.each do |h|
630
+ line<<(c[h]).to_s
631
+ end
632
+ csv << line
633
+ end
634
+ puts csv
635
+ end
636
+ end
637
+
638
+ def mongo_to_table
639
+ file_path=Tempfile.new "test"
640
+ self.mongo_to_csv file_path
641
+ r=CSV.table file_path
642
+ `rm #{file_path}`
643
+ r
644
+ end
645
+
646
+ end
647
+
648
+
649
+ def val_to_csv val_name, file_name
650
+ Case.find_all_by_validation_name(val_name).mongo_to_csv(file_name)
651
+ puts "saved #{Case.find_all_by_validation_name(val_name)} in #{File.absolute_path file_name}"
652
+ end
653
+
654
+ ### A modified CSV table class
655
+ # pp
656
+ class CSV::Table
657
+ def pp
658
+ puts self.headers.to_csv
659
+ self.each do |r|
660
+ puts r.to_csv
661
+ end
662
+ end
663
+ end
664
+