opentox-ruby 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- ['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment'].each do |lib|
1
+ ['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment', 'gsl'].each do |lib|
2
2
  require lib
3
3
  end
4
4
 
@@ -50,7 +50,8 @@ class Sinatra::Base
50
50
  halt task.http_code,task.to_yaml # PENDING differs from task-webservice
51
51
  when /html/
52
52
  response['Content-Type'] = "text/html"
53
- halt task.http_code,OpenTox.text_to_html(task.to_yaml, @subjectid)
53
+ # html -> task created with html form -> redirect to task uri
54
+ redirect task.uri
54
55
  else # default /uri-list/
55
56
  response['Content-Type'] = "text/uri-list"
56
57
  if task.completed?
@@ -40,8 +40,9 @@ module OpenTox
40
40
  else
41
41
  file = Tempfile.new("ot-rdfxml")
42
42
  if @dataset
43
- # do not concat /metadata to uri string, this would not work for dataset/R401577?max=3
44
43
  uri = URI::parse(@uri)
44
+ #remove params like dataset/<id>?max=3 from uri, not needed for metadata
45
+ uri.query = nil
45
46
  uri.path = File.join(uri.path,"metadata")
46
47
  uri = uri.to_s
47
48
  else
@@ -56,7 +57,7 @@ module OpenTox
56
57
  `rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line|
57
58
  triple = line.to_triple
58
59
  if triple[0] == @uri
59
- if triple[1] == RDF.type # allow multiple types
60
+ if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types
60
61
  @metadata[triple[1]] = [] unless @metadata[triple[1]]
61
62
  @metadata[triple[1]] << triple[2].split('^^').first
62
63
  else
@@ -75,6 +76,9 @@ module OpenTox
75
76
  @metadata[OT.parameters] << parameter
76
77
  end
77
78
  end
79
+ #@metadata.each do |k,v|
80
+ #v = v.first if v and v.size == 1
81
+ #end
78
82
  @metadata
79
83
  end
80
84
 
@@ -82,7 +86,11 @@ module OpenTox
82
86
  # @param [String] rdf
83
87
  # @param [String] type of the info (e.g. OT.Task, OT.ErrorReport) needed to get the subject-uri
84
88
  # @return [Owl] with uri and metadata set
85
- def self.from_rdf( rdf, type )
89
+ def self.from_rdf( rdf, type, allow_multiple = false )
90
+
91
+ uris = Array.new
92
+ owls = Array.new
93
+
86
94
  # write to file and read convert with rapper into tripples
87
95
  file = Tempfile.new("ot-rdfxml")
88
96
  file.puts rdf
@@ -95,20 +103,27 @@ module OpenTox
95
103
  triples.each_line do |line|
96
104
  triple = line.to_triple
97
105
  if triple[1] == RDF['type'] and triple[2]==type
98
- raise "uri already set, two uris found with type: "+type.to_s if uri
106
+ if !allow_multiple
107
+ raise "uri already set, two uris found with type: "+type.to_s if uri
108
+ end
99
109
  uri = triple[0]
110
+ uris << uri
100
111
  end
101
112
  end
102
113
  File.delete(file.path)
114
+
103
115
  # load metadata
104
- metadata = {}
105
- triples.each_line do |line|
106
- triple = line.to_triple
107
- metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type']
108
- end
109
- owl = Owl::Generic.new(uri)
110
- owl.metadata = metadata
111
- owl
116
+ uris.each { |uri|
117
+ metadata = {}
118
+ triples.each_line do |line|
119
+ triple = line.to_triple
120
+ metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type']
121
+ end
122
+ owl = Owl::Generic.new(uri)
123
+ owl.metadata = metadata
124
+ owls << owl
125
+ }
126
+ allow_multiple ? owls : owls[0]
112
127
  end
113
128
 
114
129
  # Generic parser for all OpenTox classes
@@ -228,7 +243,12 @@ module OpenTox
228
243
  file = Tempfile.new("ot-rdfxml")
229
244
  # do not concat /features to uri string, this would not work for dataset/R401577?max=3
230
245
  uri = URI::parse(@uri)
231
- uri.path = File.join(uri.path,"features")
246
+ # PENDING
247
+ # ambit models return http://host/dataset/id?feature_uris[]=sth but
248
+ # amibt dataset services does not support http://host/dataset/id/features?feature_uris[]=sth
249
+ # and features are not inlcuded in http://host/dataset/id/features
250
+ # -> load features from complete dataset
251
+ uri.path = File.join(uri.path,"features") unless @uri=~/\?(feature_uris|page|pagesize)/
232
252
  uri = uri.to_s
233
253
  file.puts OpenTox::RestClientWrapper.get uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false
234
254
  file.close
@@ -244,8 +264,13 @@ module OpenTox
244
264
  File.delete(to_delete) if to_delete
245
265
  statements.each do |triple|
246
266
  if features.include? triple[0]
247
- @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]]
248
- @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first
267
+ @dataset.features[triple[0]] = {} unless @dataset.features[triple[0]]
268
+ if triple[1] == RDF.type
269
+ @dataset.features[triple[0]][triple[1]] = [] unless @dataset.features[triple[0]][triple[1]]
270
+ @dataset.features[triple[0]][triple[1]] << triple[2].split('^^').first
271
+ else
272
+ @dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first
273
+ end
249
274
  end
250
275
  end
251
276
  @dataset.features
@@ -271,22 +296,39 @@ module OpenTox
271
296
  @duplicates = {}
272
297
  end
273
298
 
299
+ def detect_new_values(row, value_maps)
300
+ row.shift
301
+ row.each_index do |i|
302
+ value = row[i]
303
+ value_maps[i] = Hash.new if value_maps[i].nil?
304
+ value_maps[i][value].nil? ? value_maps[i][value]=0 : value_maps[i][value] += 1
305
+ end
306
+ value_maps
307
+ end
308
+
274
309
  # Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
275
310
  # @param [Excel] book Excel workbook object (created with roo gem)
276
311
  # @return [OpenTox::Dataset] Dataset object with Excel data
277
312
  def load_spreadsheet(book)
278
313
  book.default_sheet = 0
279
314
  add_features book.row(1)
315
+ value_maps = Array.new
316
+ regression_features=Array.new
280
317
 
281
- # AM: fix mixed read in
282
- regression_features=false
283
318
  2.upto(book.last_row) { |i|
284
319
  row = book.row(i)
285
- regression_features = detect_regression_features row
286
- break if regression_features==true
320
+ value_maps = detect_new_values(row, value_maps)
321
+ value_maps.each_with_index { |vm,j|
322
+ if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
323
+ regression_features[j]=true
324
+ else
325
+ regression_features[j]=false
326
+ end
327
+ }
328
+ }
329
+ 2.upto(book.last_row) { |i|
330
+ add_values book.row(i), regression_features
287
331
  }
288
-
289
- 2.upto(book.last_row) { |i| add_values book.row(i),regression_features }
290
332
  warnings
291
333
  @dataset
292
334
  end
@@ -298,21 +340,27 @@ module OpenTox
298
340
  row = 0
299
341
  input = csv.split("\n")
300
342
  add_features split_row(input.shift)
343
+ value_maps = Array.new
344
+ regression_features=Array.new
301
345
 
302
-
303
- # AM: fix mixed read in
304
- regression_features=false
305
346
  input.each { |row|
306
347
  row = split_row(row)
307
- regression_features = detect_regression_features row
308
- break if regression_features==true
348
+ value_maps = detect_new_values(row, value_maps)
349
+ value_maps.each_with_index { |vm,j|
350
+ if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
351
+ regression_features[j]=true
352
+ else
353
+ regression_features[j]=false
354
+ end
355
+ }
356
+ }
357
+ input.each { |row|
358
+ add_values split_row(row), regression_features
309
359
  }
310
- input.each { |row| add_values split_row(row),regression_features }
311
360
  warnings
312
361
  @dataset
313
362
  end
314
363
 
315
-
316
364
  private
317
365
 
318
366
  def warnings
@@ -354,20 +402,10 @@ module OpenTox
354
402
  end
355
403
  end
356
404
 
357
- def detect_regression_features row
358
- row.shift
359
- regression_features=false
360
- row.each_index do |i|
361
- value = row[i]
362
- type = feature_type(value)
363
- if type == OT.NumericFeature
364
- regression_features=true
365
- end
366
- end
367
- regression_features
368
- end
369
-
370
- def add_values(row, regression_features=false)
405
+ # Adds a row to a dataset
406
+ # @param Array A row split up as an array
407
+ # @param Array Indicator for regression for each field
408
+ def add_values(row, regression_features)
371
409
 
372
410
  smiles = row.shift
373
411
  compound = Compound.from_smiles(smiles)
@@ -381,27 +419,23 @@ module OpenTox
381
419
  row.each_index do |i|
382
420
  value = row[i]
383
421
  feature = @features[i]
384
- type = feature_type(value)
385
422
 
423
+ type = nil
424
+ if (regression_features[i])
425
+ type = feature_type(value)
426
+ if type != OT.NumericFeature
427
+ raise "Error! Expected numeric values."
428
+ end
429
+ else
430
+ type = OT.NominalFeature
431
+ end
386
432
  @feature_types[feature] << type
387
433
 
388
- if (regression_features)
434
+ case type
435
+ when OT.NumericFeature
389
436
  val = value.to_f
390
- else
391
- case type
392
- when OT.NominalFeature
393
- case value.to_s
394
- when TRUE_REGEXP
395
- val = true
396
- when FALSE_REGEXP
397
- val = false
398
- end
399
- when OT.NumericFeature
400
- val = value.to_f
401
- when OT.StringFeature
402
- val = value.to_s
403
- @activity_errors << smiles+", "+row.join(", ")
404
- end
437
+ when OT.NominalFeature
438
+ val = value.to_s
405
439
  end
406
440
  if val!=nil
407
441
  @dataset.add(compound.uri, feature, val)
@@ -413,26 +447,170 @@ module OpenTox
413
447
  end
414
448
  end
415
449
 
416
- def numeric?(value)
417
- true if Float(value) rescue false
450
+ def feature_type(value)
451
+ if OpenTox::Algorithm::numeric? value
452
+ return OT.NumericFeature
453
+ else
454
+ return OT.NominalFeature
455
+ end
456
+ end
457
+
458
+ def split_row(row)
459
+ row.chomp.gsub(/["']/,'').split(/\s*[,;\t]\s*/) # remove quotes
460
+ end
461
+
462
+ end
463
+
464
+ class Table
465
+
466
+ attr_accessor :data, :features, :compounds
467
+
468
+ def initialize
469
+ @data = {}
470
+ @activity_errors = []
471
+ end
472
+
473
+ def feature_values(feature)
474
+ @data.collect{|c, row| row[feature]}.uniq.compact
475
+ end
476
+
477
+ def feature_types(feature)
478
+ @data.collect{|c, row| feature_type(row[feature])}.uniq.compact
479
+ end
480
+
481
+ def features
482
+ @data.collect{|c,row| row.keys}.flatten.uniq
483
+ end
484
+
485
+ def clean_features
486
+ ignored_features = []
487
+ features.each do |feature|
488
+ if feature_values(feature).size > 5
489
+ if feature_types(feature).size == 1 and feature_types(feature).first == OT.NumericFeature
490
+ # REGRESSION
491
+ elsif feature_types(feature).include? OT.NumericFeature
492
+ @data.each{|c,row| row[feature] = nil unless OpenTox::Algorithm::numeric?(row[feature]) } # delete nominal features
493
+ @activity_errors << "Nominal feature values of #{feature} ignored (using numeric features for regression models)."
494
+ else
495
+ @activity_errors << "Feature #{feature} ignored (more than 5 nominal feature values and no numeric values)."
496
+ ignored_features << feature
497
+ next
498
+ end
499
+ elsif feature_values(feature).size <= 1
500
+ @activity_errors << "Feature #{feature} ignored (less than 2 feature values)."
501
+ ignored_features << feature
502
+ else
503
+ # CLASSIFICATION
504
+ end
505
+ end
506
+ ignored_features.each do |feature|
507
+ @data.each{ |c,row| row.delete feature }
508
+ end
509
+ @activity_errors
418
510
  end
419
511
 
420
- def classification?(value)
421
- !value.to_s.strip.match(TRUE_REGEXP).nil? or !value.to_s.strip.match(FALSE_REGEXP).nil?
512
+ def add_to_dataset(dataset)
513
+ features.each do |feature_name|
514
+ feature_uri = File.join(dataset.uri,"feature",URI.encode(feature_name))
515
+ dataset.add_feature(feature_uri,{DC.title => feature_name})
516
+ end
517
+
518
+ @data.each do |compound,row|
519
+ unless row.empty?
520
+ row.each do |feature,value|
521
+ if OpenTox::Algorithm::numeric?(value)
522
+ value = value.to_f
523
+ elsif value.nil? or value.empty?
524
+ value = nil
525
+ else
526
+ value = value.to_s
527
+ end
528
+ feature_uri = File.join(dataset.uri,"feature",URI.encode(feature))
529
+ dataset.add(compound, feature_uri, value)
530
+ #dataset.features[feature_uri][RDF.type] = feature_types(feature)
531
+ #dataset.features[feature_uri][OT.acceptValue] = feature_values(feature)
532
+ if feature_types(feature).include? OT.NumericFeature
533
+ dataset.features[feature_uri][RDF.type] = [OT.NumericFeature]
534
+ else
535
+ dataset.features[feature_uri][RDF.type] = [OT.NominalFeature]
536
+ dataset.features[feature_uri][OT.acceptValue] = feature_values(feature)
537
+ end
538
+ end
539
+ end
540
+ end
422
541
  end
423
542
 
543
+ private
544
+
424
545
  def feature_type(value)
425
- if classification? value
426
- return OT.NominalFeature
427
- elsif numeric? value
546
+ if OpenTox::Algorithm::numeric? value
428
547
  return OT.NumericFeature
429
548
  else
430
- return OT.StringFeature
549
+ return OT.NominalFeature
431
550
  end
432
551
  end
552
+ end
553
+
554
+ # quick hack to enable sdf import via csv
555
+ # should be refactored
556
+ class Sdf
557
+
558
+ attr_accessor :dataset
559
+
560
+ def initialize
561
+ @data = {}
562
+
563
+ @compound_errors = []
564
+ @activity_errors = []
565
+ @duplicates = {}
566
+ end
567
+
568
+ def load_sdf(sdf)
569
+
570
+ obconversion = OpenBabel::OBConversion.new
571
+ obmol = OpenBabel::OBMol.new
572
+ obconversion.set_in_and_out_formats "sdf", "inchi"
573
+
574
+ table = Table.new
575
+
576
+ properties = []
577
+ sdf.each_line { |l| properties << l.to_s if l.match(/</) }
578
+ properties.uniq!
579
+ properties.sort!
580
+ properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp }
581
+
582
+ rec = 0
583
+ sdf.split(/\$\$\$\$\r*\n/).each do |s|
584
+ rec += 1
585
+ obconversion.read_string obmol, s
586
+ begin
587
+ inchi = obconversion.write_string(obmol).gsub(/\s/,'').chomp
588
+ @duplicates[inchi] = [] unless @duplicates[inchi]
589
+ @duplicates[inchi] << rec #inchi#+", "+row.join(", ")
590
+ compound = Compound.from_inchi inchi
591
+ rescue
592
+ @compound_errors << "Could not convert structure to InChI, all entries for this compound (record #{rec} have been ignored! \n#{s}"
593
+ next
594
+ end
595
+ row = {}
596
+ obmol.get_data.each { |d| row[d.get_attribute] = d.get_value if properties.include?(d.get_attribute) }
597
+ table.data[compound.uri] = row
598
+ end
599
+
600
+ # finda and remove ignored_features
601
+ @activity_errors = table.clean_features
602
+ table.add_to_dataset @dataset
603
+
604
+ warnings = ''
605
+ warnings += "<p>Incorrect Smiles structures (ignored):</p>" + @compound_errors.join("<br/>") unless @compound_errors.empty?
606
+ warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
607
+ duplicate_warnings = ''
608
+ @duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 }
609
+ warnings += "<p>Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty?
610
+
611
+ @dataset.metadata[OT.Warnings] = warnings
612
+ @dataset
433
613
 
434
- def split_row(row)
435
- row.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes
436
614
  end
437
615
 
438
616
  end
@@ -131,13 +131,14 @@ module OpenTox
131
131
  raise "unknown content-type for task : '"+res.content_type.to_s+"'"+" base-uri: "+base_uri.to_s+" content: "+res[0..200].to_s
132
132
  end
133
133
 
134
- LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion"
134
+ #LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion"
135
135
  task.wait_for_completion waiting_task
136
136
  unless task.completed? # maybe task was cancelled / error
137
137
  if task.errorReport
138
138
  received_error task.errorReport, task.http_code, nil, {:rest_uri => task.uri, :rest_code => task.http_code}
139
139
  else
140
- raise "task status: '"+task.status.to_s+"' but errorReport nil"
140
+ raise "status of task '"+task.uri.to_s+"' is no longer running (hasStatus is '"+task.status+
141
+ "'), but it is neither completed nor has an errorReport"
141
142
  end
142
143
  end
143
144