opentox-ruby 2.0.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,4 @@
1
- ['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment'].each do |lib|
1
+ ['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment', 'gsl'].each do |lib|
2
2
  require lib
3
3
  end
4
4
 
@@ -50,7 +50,8 @@ class Sinatra::Base
50
50
  halt task.http_code,task.to_yaml # PENDING differs from task-webservice
51
51
  when /html/
52
52
  response['Content-Type'] = "text/html"
53
- halt task.http_code,OpenTox.text_to_html(task.to_yaml, @subjectid)
53
+ # html -> task created with html form -> redirect to task uri
54
+ redirect task.uri
54
55
  else # default /uri-list/
55
56
  response['Content-Type'] = "text/uri-list"
56
57
  if task.completed?
@@ -40,8 +40,9 @@ module OpenTox
40
40
  else
41
41
  file = Tempfile.new("ot-rdfxml")
42
42
  if @dataset
43
- # do not concat /metadata to uri string, this would not work for dataset/R401577?max=3
44
43
  uri = URI::parse(@uri)
44
+ #remove params like dataset/<id>?max=3 from uri, not needed for metadata
45
+ uri.query = nil
45
46
  uri.path = File.join(uri.path,"metadata")
46
47
  uri = uri.to_s
47
48
  else
@@ -56,7 +57,7 @@ module OpenTox
56
57
  `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line|
57
58
  triple = line.to_triple
58
59
  if triple[0] == @uri
59
- if triple[1] == RDF.type # allow multiple types
60
+ if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types
60
61
  @metadata[triple[1]] = [] unless @metadata[triple[1]]
61
62
  @metadata[triple[1]] << triple[2].split('^^').first
62
63
  else
@@ -75,6 +76,9 @@ module OpenTox
75
76
  @metadata[OT.parameters] << parameter
76
77
  end
77
78
  end
79
+ #@metadata.each do |k,v|
80
+ #v = v.first if v and v.size == 1
81
+ #end
78
82
  @metadata
79
83
  end
80
84
 
@@ -82,7 +86,11 @@ module OpenTox
82
86
  # @param [String] rdf
83
87
  # @param [String] type of the info (e.g. OT.Task, OT.ErrorReport) needed to get the subject-uri
84
88
  # @return [Owl] with uri and metadata set
85
- def self.from_rdf( rdf, type )
89
+ def self.from_rdf( rdf, type, allow_multiple = false )
90
+
91
+ uris = Array.new
92
+ owls = Array.new
93
+
86
94
  # write to file and read convert with rapper into tripples
87
95
  file = Tempfile.new("ot-rdfxml")
88
96
  file.puts rdf
@@ -95,20 +103,27 @@ module OpenTox
95
103
  triples.each_line do |line|
96
104
  triple = line.to_triple
97
105
  if triple[1] == RDF['type'] and triple[2]==type
98
- raise "uri already set, two uris found with type: "+type.to_s if uri
106
+ if !allow_multiple
107
+ raise "uri already set, two uris found with type: "+type.to_s if uri
108
+ end
99
109
  uri = triple[0]
110
+ uris << uri
100
111
  end
101
112
  end
102
113
  File.delete(file.path)
114
+
103
115
  # load metadata
104
- metadata = {}
105
- triples.each_line do |line|
106
- triple = line.to_triple
107
- metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type']
108
- end
109
- owl = Owl::Generic.new(uri)
110
- owl.metadata = metadata
111
- owl
116
+ uris.each { |uri|
117
+ metadata = {}
118
+ triples.each_line do |line|
119
+ triple = line.to_triple
120
+ metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type']
121
+ end
122
+ owl = Owl::Generic.new(uri)
123
+ owl.metadata = metadata
124
+ owls << owl
125
+ }
126
+ allow_multiple ? owls : owls[0]
112
127
  end
113
128
 
114
129
  # Generic parser for all OpenTox classes
@@ -228,7 +243,12 @@ module OpenTox
228
243
  file = Tempfile.new("ot-rdfxml")
229
244
  # do not concat /features to uri string, this would not work for dataset/R401577?max=3
230
245
  uri = URI::parse(@uri)
231
- uri.path = File.join(uri.path,"features")
246
+ # PENDING
247
+ # ambit models return http://host/dataset/id?feature_uris[]=sth but
248
+ # amibt dataset services does not support http://host/dataset/id/features?feature_uris[]=sth
249
+ # and features are not inlcuded in http://host/dataset/id/features
250
+ # -> load features from complete dataset
251
+ uri.path = File.join(uri.path,"features") unless @uri=~/\?(feature_uris|page|pagesize)/
232
252
  uri = uri.to_s
233
253
  file.puts OpenTox::RestClientWrapper.get uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false
234
254
  file.close
@@ -244,8 +264,13 @@ module OpenTox
244
264
  File.delete(to_delete) if to_delete
245
265
  statements.each do |triple|
246
266
  if features.include? triple[0]
247
- @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]]
248
- @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first
267
+ @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]]
268
+ if triple[1] == RDF.type
269
+ @dataset.features[triple[0]][triple[1]] = [] unless @dataset.features[triple[0]][triple[1]]
270
+ @dataset.features[triple[0]][triple[1]] << triple[2].split('^^').first
271
+ else
272
+ @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first
273
+ end
249
274
  end
250
275
  end
251
276
  @dataset.features
@@ -271,22 +296,39 @@ module OpenTox
271
296
  @duplicates = {}
272
297
  end
273
298
 
299
+ def detect_new_values(row, value_maps)
300
+ row.shift
301
+ row.each_index do |i|
302
+ value = row[i]
303
+ value_maps[i] = Hash.new if value_maps[i].nil?
304
+ value_maps[i][value].nil? ? value_maps[i][value]=0 : value_maps[i][value] += 1
305
+ end
306
+ value_maps
307
+ end
308
+
274
309
  # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
275
310
  # @param [Excel] book Excel workbook object (created with roo gem)
276
311
  # @return [OpenTox::Dataset] Dataset object with Excel data
277
312
  def load_spreadsheet(book)
278
313
  book.default_sheet = 0
279
314
  add_features book.row(1)
315
+ value_maps = Array.new
316
+ regression_features=Array.new
280
317
 
281
- # AM: fix mixed read in
282
- regression_features=false
283
318
  2.upto(book.last_row) { |i|
284
319
  row = book.row(i)
285
- regression_features = detect_regression_features row
286
- break if regression_features==true
320
+ value_maps = detect_new_values(row, value_maps)
321
+ value_maps.each_with_index { |vm,j|
322
+ if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
323
+ regression_features[j]=true
324
+ else
325
+ regression_features[j]=false
326
+ end
327
+ }
328
+ }
329
+ 2.upto(book.last_row) { |i|
330
+ add_values book.row(i), regression_features
287
331
  }
288
-
289
- 2.upto(book.last_row) { |i| add_values book.row(i),regression_features }
290
332
  warnings
291
333
  @dataset
292
334
  end
@@ -298,21 +340,27 @@ module OpenTox
298
340
  row = 0
299
341
  input = csv.split("\n")
300
342
  add_features split_row(input.shift)
343
+ value_maps = Array.new
344
+ regression_features=Array.new
301
345
 
302
-
303
- # AM: fix mixed read in
304
- regression_features=false
305
346
  input.each { |row|
306
347
  row = split_row(row)
307
- regression_features = detect_regression_features row
308
- break if regression_features==true
348
+ value_maps = detect_new_values(row, value_maps)
349
+ value_maps.each_with_index { |vm,j|
350
+ if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
351
+ regression_features[j]=true
352
+ else
353
+ regression_features[j]=false
354
+ end
355
+ }
356
+ }
357
+ input.each { |row|
358
+ add_values split_row(row), regression_features
309
359
  }
310
- input.each { |row| add_values split_row(row),regression_features }
311
360
  warnings
312
361
  @dataset
313
362
  end
314
363
 
315
-
316
364
  private
317
365
 
318
366
  def warnings
@@ -354,20 +402,10 @@ module OpenTox
354
402
  end
355
403
  end
356
404
 
357
- def detect_regression_features row
358
- row.shift
359
- regression_features=false
360
- row.each_index do |i|
361
- value = row[i]
362
- type = feature_type(value)
363
- if type == OT.NumericFeature
364
- regression_features=true
365
- end
366
- end
367
- regression_features
368
- end
369
-
370
- def add_values(row, regression_features=false)
405
+ # Adds a row to a dataset
406
+ # @param Array A row split up as an array
407
+ # @param Array Indicator for regression for each field
408
+ def add_values(row, regression_features)
371
409
 
372
410
  smiles = row.shift
373
411
  compound = Compound.from_smiles(smiles)
@@ -381,27 +419,23 @@ module OpenTox
381
419
  row.each_index do |i|
382
420
  value = row[i]
383
421
  feature = @features[i]
384
- type = feature_type(value)
385
422
 
423
+ type = nil
424
+ if (regression_features[i])
425
+ type = feature_type(value)
426
+ if type != OT.NumericFeature
427
+ raise "Error! Expected numeric values."
428
+ end
429
+ else
430
+ type = OT.NominalFeature
431
+ end
386
432
  @feature_types[feature] << type
387
433
 
388
- if (regression_features)
434
+ case type
435
+ when OT.NumericFeature
389
436
  val = value.to_f
390
- else
391
- case type
392
- when OT.NominalFeature
393
- case value.to_s
394
- when TRUE_REGEXP
395
- val = true
396
- when FALSE_REGEXP
397
- val = false
398
- end
399
- when OT.NumericFeature
400
- val = value.to_f
401
- when OT.StringFeature
402
- val = value.to_s
403
- @activity_errors << smiles+", "+row.join(", ")
404
- end
437
+ when OT.NominalFeature
438
+ val = value.to_s
405
439
  end
406
440
  if val!=nil
407
441
  @dataset.add(compound.uri, feature, val)
@@ -413,26 +447,170 @@ module OpenTox
413
447
  end
414
448
  end
415
449
 
416
- def numeric?(value)
417
- true if Float(value) rescue false
450
+ def feature_type(value)
451
+ if OpenTox::Algorithm::numeric? value
452
+ return OT.NumericFeature
453
+ else
454
+ return OT.NominalFeature
455
+ end
456
+ end
457
+
458
+ def split_row(row)
459
+ row.chomp.gsub(/["']/,'').split(/\s*[,;\t]\s*/) # remove quotes
460
+ end
461
+
462
+ end
463
+
464
+ class Table
465
+
466
+ attr_accessor :data, :features, :compounds
467
+
468
+ def initialize
469
+ @data = {}
470
+ @activity_errors = []
471
+ end
472
+
473
+ def feature_values(feature)
474
+ @data.collect{|c, row| row[feature]}.uniq.compact
475
+ end
476
+
477
+ def feature_types(feature)
478
+ @data.collect{|c, row| feature_type(row[feature])}.uniq.compact
479
+ end
480
+
481
+ def features
482
+ @data.collect{|c,row| row.keys}.flatten.uniq
483
+ end
484
+
485
+ def clean_features
486
+ ignored_features = []
487
+ features.each do |feature|
488
+ if feature_values(feature).size > 5
489
+ if feature_types(feature).size == 1 and feature_types(feature).first == OT.NumericFeature
490
+ # REGRESSION
491
+ elsif feature_types(feature).include? OT.NumericFeature
492
+ @data.each{|c,row| row[feature] = nil unless OpenTox::Algorithm::numeric?(row[feature]) } # delete nominal features
493
+ @activity_errors << "Nominal feature values of #{feature} ignored (using numeric features for regression models)."
494
+ else
495
+ @activity_errors << "Feature #{feature} ignored (more than 5 nominal feature values and no numeric values)."
496
+ ignored_features << feature
497
+ next
498
+ end
499
+ elsif feature_values(feature).size <= 1
500
+ @activity_errors << "Feature #{feature} ignored (less than 2 feature values)."
501
+ ignored_features << feature
502
+ else
503
+ # CLASSIFICATION
504
+ end
505
+ end
506
+ ignored_features.each do |feature|
507
+ @data.each{ |c,row| row.delete feature }
508
+ end
509
+ @activity_errors
418
510
  end
419
511
 
420
- def classification?(value)
421
- !value.to_s.strip.match(TRUE_REGEXP).nil? or !value.to_s.strip.match(FALSE_REGEXP).nil?
512
+ def add_to_dataset(dataset)
513
+ features.each do |feature_name|
514
+ feature_uri = File.join(dataset.uri,"feature",URI.encode(feature_name))
515
+ dataset.add_feature(feature_uri,{DC.title => feature_name})
516
+ end
517
+
518
+ @data.each do |compound,row|
519
+ unless row.empty?
520
+ row.each do |feature,value|
521
+ if OpenTox::Algorithm::numeric?(value)
522
+ value = value.to_f
523
+ elsif value.nil? or value.empty?
524
+ value = nil
525
+ else
526
+ value = value.to_s
527
+ end
528
+ feature_uri = File.join(dataset.uri,"feature",URI.encode(feature))
529
+ dataset.add(compound, feature_uri, value)
530
+ #dataset.features[feature_uri][RDF.type] = feature_types(feature)
531
+ #dataset.features[feature_uri][OT.acceptValue] = feature_values(feature)
532
+ if feature_types(feature).include? OT.NumericFeature
533
+ dataset.features[feature_uri][RDF.type] = [OT.NumericFeature]
534
+ else
535
+ dataset.features[feature_uri][RDF.type] = [OT.NominalFeature]
536
+ dataset.features[feature_uri][OT.acceptValue] = feature_values(feature)
537
+ end
538
+ end
539
+ end
540
+ end
422
541
  end
423
542
 
543
+ private
544
+
424
545
  def feature_type(value)
425
- if classification? value
426
- return OT.NominalFeature
427
- elsif numeric? value
546
+ if OpenTox::Algorithm::numeric? value
428
547
  return OT.NumericFeature
429
548
  else
430
- return OT.StringFeature
549
+ return OT.NominalFeature
431
550
  end
432
551
  end
552
+ end
553
+
554
+ # quick hack to enable sdf import via csv
555
+ # should be refactored
556
+ class Sdf
557
+
558
+ attr_accessor :dataset
559
+
560
+ def initialize
561
+ @data = {}
562
+
563
+ @compound_errors = []
564
+ @activity_errors = []
565
+ @duplicates = {}
566
+ end
567
+
568
+ def load_sdf(sdf)
569
+
570
+ obconversion = OpenBabel::OBConversion.new
571
+ obmol = OpenBabel::OBMol.new
572
+ obconversion.set_in_and_out_formats "sdf", "inchi"
573
+
574
+ table = Table.new
575
+
576
+ properties = []
577
+ sdf.each_line { |l| properties << l.to_s if l.match(/</) }
578
+ properties.uniq!
579
+ properties.sort!
580
+ properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp }
581
+
582
+ rec = 0
583
+ sdf.split(/\$\$\$\$\r*\n/).each do |s|
584
+ rec += 1
585
+ obconversion.read_string obmol, s
586
+ begin
587
+ inchi = obconversion.write_string(obmol).gsub(/\s/,'').chomp
588
+ @duplicates[inchi] = [] unless @duplicates[inchi]
589
+ @duplicates[inchi] << rec #inchi#+", "+row.join(", ")
590
+ compound = Compound.from_inchi inchi
591
+ rescue
592
+ @compound_errors << "Could not convert structure to InChI, all entries for this compound (record #{rec} have been ignored! \n#{s}"
593
+ next
594
+ end
595
+ row = {}
596
+ obmol.get_data.each { |d| row[d.get_attribute] = d.get_value if properties.include?(d.get_attribute) }
597
+ table.data[compound.uri] = row
598
+ end
599
+
600
+ # finda and remove ignored_features
601
+ @activity_errors = table.clean_features
602
+ table.add_to_dataset @dataset
603
+
604
+ warnings = ''
605
+ warnings += "<p>Incorrect Smiles structures (ignored):</p>" + @compound_errors.join("<br/>") unless @compound_errors.empty?
606
+ warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
607
+ duplicate_warnings = ''
608
+ @duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 }
609
+ warnings += "<p>Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty?
610
+
611
+ @dataset.metadata[OT.Warnings] = warnings
612
+ @dataset
433
613
 
434
- def split_row(row)
435
- row.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes
436
614
  end
437
615
 
438
616
  end
@@ -131,13 +131,14 @@ module OpenTox
131
131
  raise "unknown content-type for task : '"+res.content_type.to_s+"'"+" base-uri: "+base_uri.to_s+" content: "+res[0..200].to_s
132
132
  end
133
133
 
134
- LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion"
134
+ #LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion"
135
135
  task.wait_for_completion waiting_task
136
136
  unless task.completed? # maybe task was cancelled / error
137
137
  if task.errorReport
138
138
  received_error task.errorReport, task.http_code, nil, {:rest_uri => task.uri, :rest_code => task.http_code}
139
139
  else
140
- raise "task status: '"+task.status.to_s+"' but errorReport nil"
140
+ raise "status of task '"+task.uri.to_s+"' is no longer running (hasStatus is '"+task.status+
141
+ "'), but it is neither completed nor has an errorReport"
141
142
  end
142
143
  end
143
144