protk 1.2.6.pre5 → 1.3.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +84 -45
  3. data/bin/add_retention_times.rb +9 -5
  4. data/bin/augustus_to_proteindb.rb +7 -11
  5. data/bin/interprophet.rb +28 -46
  6. data/bin/make_decoy.rb +16 -48
  7. data/bin/mascot_search.rb +57 -71
  8. data/bin/mascot_to_pepxml.rb +13 -26
  9. data/bin/msgfplus_search.rb +70 -107
  10. data/bin/omssa_search.rb +52 -109
  11. data/bin/peptide_prophet.rb +44 -119
  12. data/bin/pepxml_to_table.rb +24 -27
  13. data/bin/protein_prophet.rb +22 -82
  14. data/bin/protxml_to_gff.rb +22 -519
  15. data/bin/protxml_to_table.rb +2 -16
  16. data/bin/sixframe.rb +10 -32
  17. data/bin/tandem_search.rb +30 -403
  18. data/bin/tandem_to_pepxml.rb +43 -0
  19. data/bin/unimod_to_loc.rb +1 -1
  20. data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
  21. data/ext/decoymaker/extconf.rb +3 -0
  22. data/lib/protk/constants.rb +16 -2
  23. data/lib/protk/data/default_config.yml +2 -1
  24. data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
  25. data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
  26. data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
  27. data/lib/protk/data/tandem_params.xml +17 -54
  28. data/lib/protk/fastadb.rb +2 -2
  29. data/lib/protk/prophet_tool.rb +1 -1
  30. data/lib/protk/protxml_to_gff_tool.rb +474 -0
  31. data/lib/protk/search_tool.rb +58 -103
  32. data/lib/protk/setup_rakefile.rake +9 -5
  33. data/lib/protk/tandem_search_tool.rb +256 -0
  34. data/lib/protk/tool.rb +85 -104
  35. data/lib/protk.rb +1 -6
  36. metadata +24 -103
  37. data/bin/annotate_ids.rb +0 -59
  38. data/bin/asapratio.rb +0 -27
  39. data/bin/blastxml_to_table.rb +0 -119
  40. data/bin/correct_omssa_retention_times.rb +0 -27
  41. data/bin/feature_finder.rb +0 -95
  42. data/bin/file_convert.rb +0 -164
  43. data/bin/generate_omssa_loc.rb +0 -42
  44. data/bin/gffmerge.rb +0 -208
  45. data/bin/libra.rb +0 -70
  46. data/bin/toppas_pipeline.rb +0 -84
  47. data/bin/uniprot_annotation.rb +0 -141
  48. data/bin/xls_to_table.rb +0 -52
  49. data/bin/xpress.rb +0 -27
  50. data/ext/protk/decoymaker/extconf.rb +0 -3
  51. data/ext/protk/simplealign/extconf.rb +0 -3
  52. data/lib/protk/biotools_excel_converter.rb +0 -60
  53. data/lib/protk/eupathdb_gene_information_table.rb +0 -158
  54. data/lib/protk/gapped_aligner.rb +0 -264
  55. data/lib/protk/protein_annotator.rb +0 -646
  56. data/lib/protk/spreadsheet_extensions.rb +0 -79
  57. data/lib/protk/xtandem_defaults.rb +0 -11
@@ -1,646 +0,0 @@
1
- require 'rubygems'
2
- require 'spreadsheet'
3
- require 'protk/swissprot_database'
4
- require 'protk/bio_sptr_extensions'
5
- require 'protk/protxml'
6
- require 'protk/spreadsheet_extensions'
7
- require 'protk/biotools_excel_converter'
8
- require 'protk/plasmodb'
9
- require 'protk/constants'
10
-
11
-
12
- class ProteinAnnotator < Object
13
-
14
-
15
- def initialize()
16
- @genv = Constants.new()
17
- end
18
-
19
- def env
20
- return @genv
21
- end
22
-
23
- def outputBookFromExcelInput(inputFile,numrows=0)
24
-
25
- # Open the original excel workbook for reading
26
- inputBook = Spreadsheet.open "#{inputFile}"
27
-
28
- return inputBook.copyBook(numrows)
29
-
30
- end
31
-
32
-
33
-
34
- # Combines results from prot and pep xml files
35
- #
36
- def outputBookFromProtXMLAndPepXML(inputFileProt,inputFilePep,numrows=0)
37
- protxml=ProtXML.new(inputFileProt)
38
-
39
- # By default here we don't report anything with a probability less than 0.6
40
- # This should be a user parameter someday
41
- #
42
- rows=protxml.as_rows(0.6)
43
-
44
- # Figure out how many rows to convert if not specified
45
- #
46
- if ( numrows==0 || numrows >= rows.length)
47
- numrows=rows.length
48
- else
49
- rows=rows[0...numrows]
50
- end
51
-
52
-
53
-
54
- # Create a new workbook from scratch for writing
55
- outputBook = Spreadsheet::Workbook.new
56
- outputSheet = outputBook.create_worksheet
57
-
58
- rows.reverse!
59
-
60
- rows.each { |row|
61
- outputSheet.insert_row(0,row)
62
- }
63
-
64
- outputBook
65
- end
66
-
67
-
68
-
69
-
70
- # Takes a prot.xml file as input and returns an excel workbook with a single column containing the Accessions of proteins in the file
71
- # The header of the accessions column will be 'Accessions'
72
- # If a protein has 'indistinguishable proteins' each of those is given a separate line
73
- #
74
- # Throws an error if no proteins could be found in the prot.xml file
75
- # In addition to the Accessions column, other information will be extracted from the file including
76
- # 1. A list of indistinguishable proteins
77
- # 2. The number of peptides on which the ID was based
78
- # 3. The protein probability
79
- # 4. A ; separated list of peptides on which the ID is based
80
- # 5. Percent coverage for the protein
81
- #
82
- def outputBookFromProtXML(inputFile,numrows=0)
83
- protxml=ProtXML.new(inputFile)
84
-
85
- # By default here we don't report anything with a probability less than 0.6
86
- # This should be a user parameter someday
87
- #
88
- rows=protxml.as_rows(0.6)
89
-
90
- # Figure out how many rows to convert if not specified
91
- #
92
- if ( numrows==0 || numrows>=rows.length)
93
- numrows=rows.length
94
- else
95
- rows=rows[0...numrows]
96
- end
97
-
98
-
99
-
100
- # Create a new workbook from scratch for writing
101
- outputBook = Spreadsheet::Workbook.new
102
- outputSheet = outputBook.create_worksheet
103
-
104
- rows.reverse!
105
-
106
- rows.each { |row|
107
- outputSheet.insert_row(0,row)
108
- }
109
-
110
- outputBook
111
- end
112
-
113
-
114
-
115
-
116
-
117
-
118
-
119
- # Takes a biotools outputted excel file and produces an excel workbook with a single Accessions column of proteins
120
- #
121
- def outputBookFromBiotoolsExcel(inputFile,numrows=0)
122
- converter=BioToolsExcelConverter.new(inputFile)
123
- rows=converter.get_rows
124
-
125
- # Create a new workbook from scratch for writing
126
- outputBook = Spreadsheet::Workbook.new
127
- outputSheet = outputBook.create_worksheet
128
-
129
- rows.reverse!
130
-
131
- rows.each { |row|
132
- outputSheet.insert_row(0,row)
133
- }
134
-
135
- outputBook
136
-
137
-
138
- end
139
-
140
- # Takes a WarpLC Protein List file as input and returns an excel workbook with a single column containing the Accessions of proteins in the file
141
- # The header of the accessions column will be 'Accessions'
142
- # Throws an error if no proteins could be found in the WarpLC file .. this could also happen if the file is the wrong format
143
- #
144
- def outputBookFromWarpLCInput(inputFile,numrows=0)
145
- file=File.new(inputFile)
146
- xmldoc=REXML::Document.new(file)
147
- accessions=REXML::XPath.match(xmldoc,"//ProteinReport/Protein")
148
- if ( accessions==nil )
149
- throw "No proteins found in the WarpLC Proteinlist file #{inputFile}"
150
- end
151
- accessions=accessions.collect { |el| el.attributes['Accession']}
152
- accessions.insert(0,"Accession")
153
-
154
- # Figure out how many rows to convert if not specified
155
- if ( numrows==0 || numrows>accessions.length)
156
- numrows=accessions.length+1
157
- else
158
- accessions=accessions[0...numrows]
159
- end
160
-
161
-
162
-
163
- # Create a new workbook from scratch for writing
164
- outputBook = Spreadsheet::Workbook.new
165
- outputSheet = outputBook.create_worksheet
166
-
167
- outputSheet.insert_column(accessions,0)
168
-
169
- outputBook
170
- end
171
-
172
- # First looks at the file extension. If it is xls then filetype 'xls' is returned.
173
- # Otherwise, we assume the file is XML
174
- #
175
- def isExcelFile(fileName,input_type)
176
- if ( input_type=="excel")
177
- return true
178
- end
179
-
180
- ext=fileName.split(".").last
181
- if(ext=="xls")
182
- return true
183
- end
184
- return false
185
- end
186
-
187
- def isProtXMLFile(fileName,input_type)
188
- if ( input_type=="protXML")
189
- return true
190
- end
191
-
192
-
193
- if ( fileName.match(/\.prot\.xml$/) != nil )
194
- return true
195
- else
196
- return false
197
- end
198
- end
199
-
200
- def isBioToolsFile(fileName,input_type)
201
- BioToolsExcelConverter.isBiotools(fileName)
202
- end
203
-
204
- # Given a worksheet with a column called 'Status' change true values to 'Validated' and false values to 'Potential'
205
- def renameValuesInColumn(workSheet,colIndex,from,to)
206
- workSheet.rows.each { |row|
207
- if ( row[colIndex]==from)
208
- row[colIndex]=to
209
- end
210
- }
211
- end
212
-
213
- def hasAccession(row)
214
- hasit=false
215
- row.each do|cell|
216
- if (cell.to_s=="Accession")
217
- hasit=true
218
- end
219
- end
220
- hasit
221
- end
222
-
223
- def row_is_empty(row)
224
- isempty=true
225
- if (row==nil)
226
- return true
227
- end
228
-
229
- row.each do |cell|
230
- if ( cell!=nil && cell.to_s!="")
231
- isempty=false
232
- end
233
- end
234
- isempty
235
- end
236
-
237
- def convert(inputFile,outputFile,input_type=nil,output_type="xls",numrows=0,accessionColumnName="Accession",entrezIDColumnName="Entrez.ID",hiddenColumns=[])
238
-
239
- @genv.log("Converting #{inputFile} to #{outputFile}",:info)
240
-
241
- Spreadsheet.client_encoding = 'UTF-8'
242
-
243
-
244
- case true
245
- when isExcelFile(inputFile,input_type)
246
- @genv.log("Excel file was biotools",:info)
247
- if ( isBioToolsFile(inputFile,input_type))
248
- outputBook=outputBookFromBiotoolsExcel(inputFile,numrows)
249
- else
250
- @genv.log("Excel file was non biotools",:info)
251
- outputBook=outputBookFromExcelInput(inputFile,numrows)
252
- end
253
- outputSheet=outputBook.worksheet 0
254
- when isProtXMLFile(inputFile,input_type)
255
- @genv.log("Got a Prot XML File as Input",:info)
256
- outputBook=outputBookFromProtXML(inputFile,numrows)
257
- outputSheet=outputBook.worksheet 0
258
- else
259
- @genv.log("File is not prot.xml or excel .. trying WarpLCResult",:info)
260
- outputBook=outputBookFromWarpLCInput(inputFile,numrows)
261
- outputSheet=outputBook.worksheet 0
262
- end
263
-
264
- # Chop off and save any rows prior to the header and remove any empty rows
265
- #
266
- rows_for_deletion=[]
267
- header_row=nil
268
- keep_rows=[]
269
- rowi=0
270
- outputSheet.each do |row|
271
-
272
- if ( !row_is_empty(row) && header_row==nil && hasAccession(row))
273
- header_row=rowi
274
- end
275
-
276
- if (row_is_empty(row) || header_row==nil)
277
- rows_for_deletion.push(rowi)
278
- end
279
-
280
- if (header_row==nil)
281
- keep_rows.push(row)
282
- end
283
- rowi=rowi+1
284
- end
285
-
286
- deletion_index=0
287
- rows_for_deletion.each do |i|
288
- outputSheet.delete_row(i-deletion_index)
289
- deletion_index=deletion_index+1
290
- end
291
-
292
- header=outputSheet.row 0
293
- lastcolIndex=0
294
- accessionColumn=nil
295
-
296
- # Grab the accession column
297
- for i in 0...header.length
298
- if ( header[i]==accessionColumnName)
299
- accessionColumn=outputSheet.column i
300
- accessionColumnIndex=i
301
- end
302
- if ( header[i]=="" && lastcolIndex==0)
303
- lastcolIndex=i
304
- end
305
-
306
- if ( header[i]=="OK")
307
- header[i]="Status"
308
- renameValuesInColumn(outputSheet,i,"true","Validated")
309
- renameValuesInColumn(outputSheet,i,"false","Contaminant")
310
- end
311
-
312
- end
313
-
314
- # If we didn't find an empty column then just set lastcolIndex to i
315
- if ( lastcolIndex==0)
316
- lastcolIndex=i
317
- end
318
-
319
- if ( accessionColumn==nil)
320
- throw "No Accession column in input excel file. One column must have the header 'Accession'"
321
- end
322
-
323
- ids = accessionColumn.collect { |id|
324
- if ( id!=nil)
325
- id
326
- else
327
- ""
328
- end
329
- }
330
- # Remove the 0th value because it is the header
331
- ids.delete_at(0)
332
-
333
- #### Now grab some additional column information from uniprot ####
334
-
335
- # Create a Hash with keys corresponding to the keys returned by uniprot.parse and with values corresponding to arrays of column values
336
- # We start the columns off with the header name
337
- newColumns={'recname'=>["Primary Name"],'cd'=>["CD Antigen Name"],'altnames'=>["Alternate Names"],
338
- 'location' => ["Subcellular Location"],
339
- 'function' => ["Known Function"],
340
- 'similarity' => ["Similarity"],
341
- 'tissues' => ["Tissue Specificity"],
342
- 'disease' => ["Disease Association"],
343
- 'domain' => ["Domain"],
344
- 'subunit' => ["Sub Unit"],
345
- 'nextbio' => ["NextBio"],
346
- 'ipi' => ["IPI"],
347
- 'intact' => ["Interactions"],
348
- 'pride' => ['Pride'],
349
- 'ensembl'=> ['Ensembl'],
350
- 'num_transmem'=>["Transmembrane Regions"],
351
- 'signalp'=>['Signal Peptide']
352
- }
353
-
354
- newColumnKeys=['recname','cd','altnames','location','function','similarity','tissues','disease','domain','subunit','nextbio','ipi','intact','pride','ensembl','num_transmem','signalp']
355
-
356
-
357
- # xmlurls=accs.collect {|acc| uniprot.entry_url_for_accession(acc,'xml') }
358
-
359
- @genv.log("Initializing database",:info)
360
-
361
- swissprotdb=SwissprotDatabase.new(@genv)
362
- @genv.log("Retrieving data for #{ids.length} entries from Swissprot database ",:info)
363
- accs=[]
364
- plasmodbids=[]
365
- found_plasmodb_ids=false
366
-
367
- $stdout.putc "\n"
368
- ids.each { |uniprot_id|
369
-
370
- $stdout.putc "."
371
- $stdout.flush
372
-
373
- sptr_entry=swissprotdb.get_entry_for_name(uniprot_id)
374
-
375
-
376
- if ( sptr_entry==nil)
377
- @genv.log("No entry for #{uniprot_id} in uniprot database",:warn)
378
- newColumnKeys.each { |key| newColumns[key].push("") }
379
- accs.push("")
380
-
381
- # Bit of a hack. If the id is not sp and not decoy we assume it is plasmodb
382
- #
383
- if ( uniprot_id=~/^decoy_/)
384
- else
385
- plasmodbids.push(uniprot_id)
386
- found_plasmodb_ids=true
387
- end
388
-
389
- else
390
- accs.push(sptr_entry.accession)
391
- plasmodbids.push("")
392
-
393
- newColumnKeys.each { |key|
394
-
395
- val=sptr_entry.send(key)
396
- if ( val==nil)
397
- str=""
398
- elsif ( val.class==Array)
399
- str=val.join(";")
400
- else
401
- str=val.to_s
402
- end
403
- newColumns[key].push(str)
404
- }
405
- end
406
- }
407
- $stdout.putc "\n"
408
-
409
-
410
- # Trying PlasmoDB for unknown IDs
411
- #
412
- if ( found_plasmodb_ids )
413
- $stdout.putc "Searching PlasmoDB for unknown Id's\n"
414
- @genv.log "Searching PlasmoDB for unknown Id's", :info
415
-
416
- plasmodb = PlasmoDB.new(@genv)
417
-
418
- row_index=1 # Starts from 1 because of the header
419
-
420
- plasmodbids.each { |plasmodb_id|
421
-
422
- if ( plasmodb_id!="")
423
- p plasmodb_id
424
-
425
- plasmodb_entry = plasmodb.get_entry_for_name(plasmodb_id)
426
-
427
- if ( plasmodb_entry != nil )
428
-
429
- # newColumnKeys=['recname','cd','altnames','location','function','similarity','tissues','disease','domain','subunit','nextbio','ipi','intact','pride','ensembl','num_transmem','signalp']
430
-
431
- newColumns['recname'][row_index]=plasmodb_entry['Product Description']
432
-
433
- if ( plasmodb_entry['Annotated GO Component']!="null" )
434
- newColumns['location'][row_index]=plasmodb_entry['Annotated GO Component']
435
- else
436
- newColumns['location'][row_index]=plasmodb_entry['Predicted GO Component']
437
- end
438
-
439
- if ( plasmodb_entry['Annotated GO Function'] !="null" )
440
- newColumns['function'][row_index]=plasmodb_entry['Annotated GO Function']
441
- else
442
- newColumns['function'][row_index]=plasmodb_entry['Predicted GO Function']
443
- end
444
-
445
- newColumns['signalp'][row_index]=plasmodb_entry['SignalP Peptide']
446
-
447
- newColumns['num_transmem'][row_index] = plasmodb_entry['# TM Domains']
448
-
449
- end
450
- end
451
-
452
- row_index=row_index+1
453
-
454
-
455
- }
456
-
457
-
458
- end
459
-
460
-
461
- @genv.log("Done",:info)
462
-
463
- newColumnKeys.reverse.each { |key|
464
- outputSheet.insert_column(newColumns[key],lastcolIndex)
465
- }
466
-
467
- # Now hide some columns
468
- hide=hiddenColumns
469
- for i in 0...outputSheet.row(0).length
470
- if ( hide.detect { |h| header[i].include?(h)} !=nil)
471
- outputSheet.column(i).hidden=TRUE
472
- accessionColumn=outputSheet.column i
473
- accessionColumnIndex=i
474
- end
475
- if ( header[i]=="" && lastcolIndex==0)
476
- lastcolIndex=i
477
- end
478
- end
479
-
480
-
481
- # Now add hyperlinks to various columns
482
- @genv.log("Creating Hyperlinks",:info)
483
-
484
- # Figure out column indexes for all the hyperlinked columns
485
- header=outputSheet.row 0
486
-
487
- entrezIDColumn=nil
488
-
489
-
490
- # Grab the column indexes of existing columns to be hyperlinked
491
- for i in 0...header.length
492
- if ( header[i]==accessionColumnName )
493
- accessionColumnIndex=i
494
- end
495
- if ( header[i]=="IPI")
496
- ipiColumnIndex=i
497
- end
498
- if ( header[i]=="Interactions")
499
- intactColumnIndex=i
500
- end
501
- if ( header[i]=="Pride")
502
- prideColumnIndex=i
503
- end
504
- if ( header[i]=="Ensembl")
505
- ensemblColumnIndex=i
506
- end
507
- if ( header[i]=="NextBio")
508
- nextbioColumnIndex=i
509
- end
510
-
511
- if (header[i]==entrezIDColumnName)
512
- entrezIDColumnIndex=i
513
- entrezIDColumn=outputSheet.column i
514
- entrezIDs=entrezIDColumn.collect { |id| id }
515
- end
516
-
517
- end
518
-
519
-
520
- # Create a format for the hyperlinks
521
- hyperlink_format = Spreadsheet::Format.new({:color => :blue,:weight => :bold,:size => 10})
522
-
523
- # Add hyperlink format to the appropriate columns
524
- outputSheet.column(accessionColumnIndex).default_format=hyperlink_format
525
- outputSheet.column(nextbioColumnIndex).default_format=hyperlink_format
526
- outputSheet.column(ipiColumnIndex).default_format=hyperlink_format
527
- outputSheet.column(intactColumnIndex).default_format=hyperlink_format
528
- outputSheet.column(prideColumnIndex).default_format=hyperlink_format
529
- outputSheet.column(ensemblColumnIndex).default_format=hyperlink_format
530
-
531
- if ( entrezIDColumn!=nil)
532
- outputSheet.column(entrezIDColumnIndex).default_format=hyperlink_format
533
- end
534
-
535
- # Create all the hyperlinks
536
- for rowi in 1...outputSheet.rows.length do
537
-
538
- if ( plasmodbids[rowi-1]!="")
539
- # Assume plasmodb .. and use plasmodb url
540
- outputSheet.row(rowi)[accessionColumnIndex]=Spreadsheet::Link.new(url="http://www.plasmodb.org/plasmo/showRecord.do?name=GeneRecordClasses.GeneRecordClass&project_id=&primary_key=#{ids[rowi-1]}",description=plasmodbids[rowi-1])
541
- else
542
- # Otherwise assume sp
543
- outputSheet.row(rowi)[accessionColumnIndex]=Spreadsheet::Link.new(url="http://www.uniprot.org/uniprot/#{accs[rowi-1]}.html",description=ids[rowi-1])
544
- end
545
-
546
- outputSheet.row(rowi)[nextbioColumnIndex]=Spreadsheet::Link.new(url="http://www.nextbio.com/b/home/home.nb?id=#{newColumns['nextbio'][rowi]}&type=feature",description=newColumns['nextbio'][rowi])
547
- outputSheet.row(rowi)[ipiColumnIndex]=Spreadsheet::Link.new(url="http://www.ebi.ac.uk/cgi-bin/dbfetch?db=IPI&id=#{newColumns['ipi'][rowi]}",description=newColumns['ipi'][rowi])
548
- outputSheet.row(rowi)[intactColumnIndex]=Spreadsheet::Link.new(url="http://www.ebi.ac.uk/intact/pages/interactions/interactions.xhtml?query=#{newColumns['intact'][rowi]}*",description=newColumns['intact'][rowi])
549
- outputSheet.row(rowi)[prideColumnIndex]=Spreadsheet::Link.new(url="http://www.ebi.ac.uk/pride/searchSummary.do?queryTypeSelected=identification%20accession%20number&identificationAccessionNumber=#{newColumns['pride'][rowi]}",description=newColumns['pride'][rowi])
550
- outputSheet.row(rowi)[ensemblColumnIndex]=Spreadsheet::Link.new(url="http://www.ensembl.org/Homo_sapiens/Transcript/Summary?db=core;t=#{newColumns['ensembl'][rowi]}",description=newColumns['ensembl'][rowi])
551
- outputSheet.row(rowi).height=24
552
-
553
-
554
- if ( entrezIDColumn!=nil && entrezIDs[rowi]!=nil)
555
- outputSheet.row(rowi)[entrezIDColumnIndex]=Spreadsheet::Link.new(url="http://www.ncbi.nlm.nih.gov/gene/#{entrezIDs[rowi].to_i.to_s}",description=entrezIDs[rowi].to_i.to_s)
556
- end
557
-
558
- end
559
-
560
- # Change the names of any columns to nicer values if you need to
561
- #
562
- outputSheet.row(0)[accessionColumnIndex]="Uniprot Link"
563
-
564
- if ( entrezIDColumn!=nil)
565
- outputSheet.row(0)[entrezIDColumnIndex]="Entrez.ID"
566
- end
567
-
568
-
569
-
570
- # Having hyperlinked existing columns we now add any additional columns (hyperlinks based on existing data)
571
- # Note that all the column indexes will now be invalid which is why this is done near the end
572
- #
573
-
574
- # Insert an entrez ID based iHOP literature search link if possible
575
- if ( entrezIDColumn!=nil)
576
-
577
- @genv.log("Creating iHOP literature search link",:info)
578
-
579
- ihopURLs=entrezIDs.collect do |entrezid|
580
- "http://www.ihop-net.org/UniPub/iHOP/in?dbrefs_1=NCBI_GENE__ID|#{entrezid.to_i.to_s}"
581
- end
582
-
583
- columnIndex=ensemblColumnIndex+1
584
-
585
- # Insert this column after the ensembl Link (which is before other literature based stuff)
586
- outputSheet.insert_column(ihopURLs,columnIndex)
587
-
588
- # Create the links
589
- for rowi in 0...outputSheet.rows.length do
590
- outputSheet.row(rowi)[columnIndex]=Spreadsheet::Link.new(url=ihopURLs[rowi],description=entrezIDs[rowi].to_i.to_s)
591
- end
592
-
593
- # Format the links
594
- outputSheet.column(columnIndex).default_format=hyperlink_format
595
-
596
- # And give the header a proper name
597
- outputSheet.row(0)[columnIndex]="iHOP literature search"
598
-
599
- end
600
-
601
-
602
- @genv.log("Formatting header",:info)
603
-
604
- # Format the Header row
605
- headerFormat=Spreadsheet::Format.new({ :weight => :bold,:size => 11 })
606
- outputSheet.row(0).default_format=headerFormat
607
-
608
- # Here we put in a little workaround for a problem with the Spreadsheet gem.
609
- # If the text "false" is in a column it will substitute nil for the false value and then fail when attempting to convert nil to an integer.
610
- # We workaround by changing the word "true" to "positive" and false to "negative"
611
- outputSheet.rows.each { |row|
612
-
613
- row.each_index { |ri|
614
-
615
- if ( row[ri].class==NilClass)
616
- p "Encountered a nil value in the sheet converting to empty string"
617
- row[ri]=""
618
- end
619
-
620
- if ( row[ri]==true)
621
- row[ri]="positive"
622
- elsif (row[ri]==false)
623
- row[ri]="negative"
624
- end
625
- }
626
-
627
-
628
-
629
- }
630
-
631
-
632
- # Put the header rows back
633
- #
634
- keep_rows.reverse!
635
- keep_rows.each do |row|
636
- outputSheet.insert_row(0,row)
637
- end
638
-
639
-
640
-
641
- # Finally write the results
642
- @genv.log("Writing New Workbook #{outputFile}",:info)
643
- outputBook.write outputFile
644
- end
645
-
646
- end