opentox-ruby 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +39 -46
- data/VERSION +1 -1
- data/lib/algorithm.rb +797 -80
- data/lib/compound.rb +40 -0
- data/lib/config/config_ru.rb +2 -0
- data/lib/dataset.rb +57 -18
- data/lib/environment.rb +3 -3
- data/lib/feature.rb +15 -13
- data/lib/helper.rb +1 -2
- data/lib/model.rb +185 -82
- data/lib/opentox-ruby.rb +1 -1
- data/lib/overwrite.rb +2 -1
- data/lib/parser.rb +247 -69
- data/lib/rest_client_wrapper.rb +3 -2
- data/lib/serializer.rb +24 -10
- data/lib/task.rb +10 -3
- data/lib/to-html.rb +66 -41
- data/lib/validation.rb +93 -29
- metadata +206 -117
data/lib/opentox-ruby.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment'].each do |lib|
|
1
|
+
['rubygems', 'sinatra', 'sinatra/url_for', 'ohm', 'rest_client', 'yaml', 'cgi', 'spork', 'error', 'overwrite', 'environment', 'gsl'].each do |lib|
|
2
2
|
require lib
|
3
3
|
end
|
4
4
|
|
data/lib/overwrite.rb
CHANGED
@@ -50,7 +50,8 @@ class Sinatra::Base
|
|
50
50
|
halt task.http_code,task.to_yaml # PENDING differs from task-webservice
|
51
51
|
when /html/
|
52
52
|
response['Content-Type'] = "text/html"
|
53
|
-
|
53
|
+
# html -> task created with html form -> redirect to task uri
|
54
|
+
redirect task.uri
|
54
55
|
else # default /uri-list/
|
55
56
|
response['Content-Type'] = "text/uri-list"
|
56
57
|
if task.completed?
|
data/lib/parser.rb
CHANGED
@@ -40,8 +40,9 @@ module OpenTox
|
|
40
40
|
else
|
41
41
|
file = Tempfile.new("ot-rdfxml")
|
42
42
|
if @dataset
|
43
|
-
# do not concat /metadata to uri string, this would not work for dataset/R401577?max=3
|
44
43
|
uri = URI::parse(@uri)
|
44
|
+
#remove params like dataset/<id>?max=3 from uri, not needed for metadata
|
45
|
+
uri.query = nil
|
45
46
|
uri.path = File.join(uri.path,"metadata")
|
46
47
|
uri = uri.to_s
|
47
48
|
else
|
@@ -56,7 +57,7 @@ module OpenTox
|
|
56
57
|
`rapper -i rdfxml -o ntriples #{file.path} 2>/dev/null`.each_line do |line|
|
57
58
|
triple = line.to_triple
|
58
59
|
if triple[0] == @uri
|
59
|
-
if triple[1] == RDF.type # allow multiple types
|
60
|
+
if triple[1] == RDF.type || triple[1]==OT.predictedVariables # allow multiple types
|
60
61
|
@metadata[triple[1]] = [] unless @metadata[triple[1]]
|
61
62
|
@metadata[triple[1]] << triple[2].split('^^').first
|
62
63
|
else
|
@@ -75,6 +76,9 @@ module OpenTox
|
|
75
76
|
@metadata[OT.parameters] << parameter
|
76
77
|
end
|
77
78
|
end
|
79
|
+
#@metadata.each do |k,v|
|
80
|
+
#v = v.first if v and v.size == 1
|
81
|
+
#end
|
78
82
|
@metadata
|
79
83
|
end
|
80
84
|
|
@@ -82,7 +86,11 @@ module OpenTox
|
|
82
86
|
# @param [String] rdf
|
83
87
|
# @param [String] type of the info (e.g. OT.Task, OT.ErrorReport) needed to get the subject-uri
|
84
88
|
# @return [Owl] with uri and metadata set
|
85
|
-
def self.from_rdf( rdf, type )
|
89
|
+
def self.from_rdf( rdf, type, allow_multiple = false )
|
90
|
+
|
91
|
+
uris = Array.new
|
92
|
+
owls = Array.new
|
93
|
+
|
86
94
|
# write to file and read convert with rapper into tripples
|
87
95
|
file = Tempfile.new("ot-rdfxml")
|
88
96
|
file.puts rdf
|
@@ -95,20 +103,27 @@ module OpenTox
|
|
95
103
|
triples.each_line do |line|
|
96
104
|
triple = line.to_triple
|
97
105
|
if triple[1] == RDF['type'] and triple[2]==type
|
98
|
-
|
106
|
+
if !allow_multiple
|
107
|
+
raise "uri already set, two uris found with type: "+type.to_s if uri
|
108
|
+
end
|
99
109
|
uri = triple[0]
|
110
|
+
uris << uri
|
100
111
|
end
|
101
112
|
end
|
102
113
|
File.delete(file.path)
|
114
|
+
|
103
115
|
# load metadata
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
116
|
+
uris.each { |uri|
|
117
|
+
metadata = {}
|
118
|
+
triples.each_line do |line|
|
119
|
+
triple = line.to_triple
|
120
|
+
metadata[triple[1]] = triple[2].split('^^').first if triple[0] == uri and triple[1] != RDF['type']
|
121
|
+
end
|
122
|
+
owl = Owl::Generic.new(uri)
|
123
|
+
owl.metadata = metadata
|
124
|
+
owls << owl
|
125
|
+
}
|
126
|
+
allow_multiple ? owls : owls[0]
|
112
127
|
end
|
113
128
|
|
114
129
|
# Generic parser for all OpenTox classes
|
@@ -228,7 +243,12 @@ module OpenTox
|
|
228
243
|
file = Tempfile.new("ot-rdfxml")
|
229
244
|
# do not concat /features to uri string, this would not work for dataset/R401577?max=3
|
230
245
|
uri = URI::parse(@uri)
|
231
|
-
|
246
|
+
# PENDING
|
247
|
+
# ambit models return http://host/dataset/id?feature_uris[]=sth but
|
248
|
+
# amibt dataset services does not support http://host/dataset/id/features?feature_uris[]=sth
|
249
|
+
# and features are not inlcuded in http://host/dataset/id/features
|
250
|
+
# -> load features from complete dataset
|
251
|
+
uri.path = File.join(uri.path,"features") unless @uri=~/\?(feature_uris|page|pagesize)/
|
232
252
|
uri = uri.to_s
|
233
253
|
file.puts OpenTox::RestClientWrapper.get uri,{:subjectid => subjectid,:accept => "application/rdf+xml"},nil,false
|
234
254
|
file.close
|
@@ -244,8 +264,13 @@ module OpenTox
|
|
244
264
|
File.delete(to_delete) if to_delete
|
245
265
|
statements.each do |triple|
|
246
266
|
if features.include? triple[0]
|
247
|
-
@dataset.features[triple[0]] = {} unless @dataset.features[triple[0]]
|
248
|
-
|
267
|
+
@dataset.features[triple[0]] = {} unless @dataset.features[triple[0]]
|
268
|
+
if triple[1] == RDF.type
|
269
|
+
@dataset.features[triple[0]][triple[1]] = [] unless @dataset.features[triple[0]][triple[1]]
|
270
|
+
@dataset.features[triple[0]][triple[1]] << triple[2].split('^^').first
|
271
|
+
else
|
272
|
+
@dataset.features[triple[0]][triple[1]] = triple[2].split('^^').first
|
273
|
+
end
|
249
274
|
end
|
250
275
|
end
|
251
276
|
@dataset.features
|
@@ -271,22 +296,39 @@ module OpenTox
|
|
271
296
|
@duplicates = {}
|
272
297
|
end
|
273
298
|
|
299
|
+
def detect_new_values(row, value_maps)
|
300
|
+
row.shift
|
301
|
+
row.each_index do |i|
|
302
|
+
value = row[i]
|
303
|
+
value_maps[i] = Hash.new if value_maps[i].nil?
|
304
|
+
value_maps[i][value].nil? ? value_maps[i][value]=0 : value_maps[i][value] += 1
|
305
|
+
end
|
306
|
+
value_maps
|
307
|
+
end
|
308
|
+
|
274
309
|
# Load Spreadsheet book (created with roo gem http://roo.rubyforge.org/, excel format specification: http://toxcreate.org/help)
|
275
310
|
# @param [Excel] book Excel workbook object (created with roo gem)
|
276
311
|
# @return [OpenTox::Dataset] Dataset object with Excel data
|
277
312
|
def load_spreadsheet(book)
|
278
313
|
book.default_sheet = 0
|
279
314
|
add_features book.row(1)
|
315
|
+
value_maps = Array.new
|
316
|
+
regression_features=Array.new
|
280
317
|
|
281
|
-
# AM: fix mixed read in
|
282
|
-
regression_features=false
|
283
318
|
2.upto(book.last_row) { |i|
|
284
319
|
row = book.row(i)
|
285
|
-
|
286
|
-
|
320
|
+
value_maps = detect_new_values(row, value_maps)
|
321
|
+
value_maps.each_with_index { |vm,j|
|
322
|
+
if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
|
323
|
+
regression_features[j]=true
|
324
|
+
else
|
325
|
+
regression_features[j]=false
|
326
|
+
end
|
327
|
+
}
|
328
|
+
}
|
329
|
+
2.upto(book.last_row) { |i|
|
330
|
+
add_values book.row(i), regression_features
|
287
331
|
}
|
288
|
-
|
289
|
-
2.upto(book.last_row) { |i| add_values book.row(i),regression_features }
|
290
332
|
warnings
|
291
333
|
@dataset
|
292
334
|
end
|
@@ -298,21 +340,27 @@ module OpenTox
|
|
298
340
|
row = 0
|
299
341
|
input = csv.split("\n")
|
300
342
|
add_features split_row(input.shift)
|
343
|
+
value_maps = Array.new
|
344
|
+
regression_features=Array.new
|
301
345
|
|
302
|
-
|
303
|
-
# AM: fix mixed read in
|
304
|
-
regression_features=false
|
305
346
|
input.each { |row|
|
306
347
|
row = split_row(row)
|
307
|
-
|
308
|
-
|
348
|
+
value_maps = detect_new_values(row, value_maps)
|
349
|
+
value_maps.each_with_index { |vm,j|
|
350
|
+
if vm.size > 5 # 5 is the maximum nr of classes supported by Fminer.
|
351
|
+
regression_features[j]=true
|
352
|
+
else
|
353
|
+
regression_features[j]=false
|
354
|
+
end
|
355
|
+
}
|
356
|
+
}
|
357
|
+
input.each { |row|
|
358
|
+
add_values split_row(row), regression_features
|
309
359
|
}
|
310
|
-
input.each { |row| add_values split_row(row),regression_features }
|
311
360
|
warnings
|
312
361
|
@dataset
|
313
362
|
end
|
314
363
|
|
315
|
-
|
316
364
|
private
|
317
365
|
|
318
366
|
def warnings
|
@@ -354,20 +402,10 @@ module OpenTox
|
|
354
402
|
end
|
355
403
|
end
|
356
404
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
value = row[i]
|
362
|
-
type = feature_type(value)
|
363
|
-
if type == OT.NumericFeature
|
364
|
-
regression_features=true
|
365
|
-
end
|
366
|
-
end
|
367
|
-
regression_features
|
368
|
-
end
|
369
|
-
|
370
|
-
def add_values(row, regression_features=false)
|
405
|
+
# Adds a row to a dataset
|
406
|
+
# @param Array A row split up as an array
|
407
|
+
# @param Array Indicator for regression for each field
|
408
|
+
def add_values(row, regression_features)
|
371
409
|
|
372
410
|
smiles = row.shift
|
373
411
|
compound = Compound.from_smiles(smiles)
|
@@ -381,27 +419,23 @@ module OpenTox
|
|
381
419
|
row.each_index do |i|
|
382
420
|
value = row[i]
|
383
421
|
feature = @features[i]
|
384
|
-
type = feature_type(value)
|
385
422
|
|
423
|
+
type = nil
|
424
|
+
if (regression_features[i])
|
425
|
+
type = feature_type(value)
|
426
|
+
if type != OT.NumericFeature
|
427
|
+
raise "Error! Expected numeric values."
|
428
|
+
end
|
429
|
+
else
|
430
|
+
type = OT.NominalFeature
|
431
|
+
end
|
386
432
|
@feature_types[feature] << type
|
387
433
|
|
388
|
-
|
434
|
+
case type
|
435
|
+
when OT.NumericFeature
|
389
436
|
val = value.to_f
|
390
|
-
|
391
|
-
|
392
|
-
when OT.NominalFeature
|
393
|
-
case value.to_s
|
394
|
-
when TRUE_REGEXP
|
395
|
-
val = true
|
396
|
-
when FALSE_REGEXP
|
397
|
-
val = false
|
398
|
-
end
|
399
|
-
when OT.NumericFeature
|
400
|
-
val = value.to_f
|
401
|
-
when OT.StringFeature
|
402
|
-
val = value.to_s
|
403
|
-
@activity_errors << smiles+", "+row.join(", ")
|
404
|
-
end
|
437
|
+
when OT.NominalFeature
|
438
|
+
val = value.to_s
|
405
439
|
end
|
406
440
|
if val!=nil
|
407
441
|
@dataset.add(compound.uri, feature, val)
|
@@ -413,26 +447,170 @@ module OpenTox
|
|
413
447
|
end
|
414
448
|
end
|
415
449
|
|
416
|
-
def
|
417
|
-
|
450
|
+
def feature_type(value)
|
451
|
+
if OpenTox::Algorithm::numeric? value
|
452
|
+
return OT.NumericFeature
|
453
|
+
else
|
454
|
+
return OT.NominalFeature
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
def split_row(row)
|
459
|
+
row.chomp.gsub(/["']/,'').split(/\s*[,;\t]\s*/) # remove quotes
|
460
|
+
end
|
461
|
+
|
462
|
+
end
|
463
|
+
|
464
|
+
class Table
|
465
|
+
|
466
|
+
attr_accessor :data, :features, :compounds
|
467
|
+
|
468
|
+
def initialize
|
469
|
+
@data = {}
|
470
|
+
@activity_errors = []
|
471
|
+
end
|
472
|
+
|
473
|
+
def feature_values(feature)
|
474
|
+
@data.collect{|c, row| row[feature]}.uniq.compact
|
475
|
+
end
|
476
|
+
|
477
|
+
def feature_types(feature)
|
478
|
+
@data.collect{|c, row| feature_type(row[feature])}.uniq.compact
|
479
|
+
end
|
480
|
+
|
481
|
+
def features
|
482
|
+
@data.collect{|c,row| row.keys}.flatten.uniq
|
483
|
+
end
|
484
|
+
|
485
|
+
def clean_features
|
486
|
+
ignored_features = []
|
487
|
+
features.each do |feature|
|
488
|
+
if feature_values(feature).size > 5
|
489
|
+
if feature_types(feature).size == 1 and feature_types(feature).first == OT.NumericFeature
|
490
|
+
# REGRESSION
|
491
|
+
elsif feature_types(feature).include? OT.NumericFeature
|
492
|
+
@data.each{|c,row| row[feature] = nil unless OpenTox::Algorithm::numeric?(row[feature]) } # delete nominal features
|
493
|
+
@activity_errors << "Nominal feature values of #{feature} ignored (using numeric features for regression models)."
|
494
|
+
else
|
495
|
+
@activity_errors << "Feature #{feature} ignored (more than 5 nominal feature values and no numeric values)."
|
496
|
+
ignored_features << feature
|
497
|
+
next
|
498
|
+
end
|
499
|
+
elsif feature_values(feature).size <= 1
|
500
|
+
@activity_errors << "Feature #{feature} ignored (less than 2 feature values)."
|
501
|
+
ignored_features << feature
|
502
|
+
else
|
503
|
+
# CLASSIFICATION
|
504
|
+
end
|
505
|
+
end
|
506
|
+
ignored_features.each do |feature|
|
507
|
+
@data.each{ |c,row| row.delete feature }
|
508
|
+
end
|
509
|
+
@activity_errors
|
418
510
|
end
|
419
511
|
|
420
|
-
def
|
421
|
-
|
512
|
+
def add_to_dataset(dataset)
|
513
|
+
features.each do |feature_name|
|
514
|
+
feature_uri = File.join(dataset.uri,"feature",URI.encode(feature_name))
|
515
|
+
dataset.add_feature(feature_uri,{DC.title => feature_name})
|
516
|
+
end
|
517
|
+
|
518
|
+
@data.each do |compound,row|
|
519
|
+
unless row.empty?
|
520
|
+
row.each do |feature,value|
|
521
|
+
if OpenTox::Algorithm::numeric?(value)
|
522
|
+
value = value.to_f
|
523
|
+
elsif value.nil? or value.empty?
|
524
|
+
value = nil
|
525
|
+
else
|
526
|
+
value = value.to_s
|
527
|
+
end
|
528
|
+
feature_uri = File.join(dataset.uri,"feature",URI.encode(feature))
|
529
|
+
dataset.add(compound, feature_uri, value)
|
530
|
+
#dataset.features[feature_uri][RDF.type] = feature_types(feature)
|
531
|
+
#dataset.features[feature_uri][OT.acceptValue] = feature_values(feature)
|
532
|
+
if feature_types(feature).include? OT.NumericFeature
|
533
|
+
dataset.features[feature_uri][RDF.type] = [OT.NumericFeature]
|
534
|
+
else
|
535
|
+
dataset.features[feature_uri][RDF.type] = [OT.NominalFeature]
|
536
|
+
dataset.features[feature_uri][OT.acceptValue] = feature_values(feature)
|
537
|
+
end
|
538
|
+
end
|
539
|
+
end
|
540
|
+
end
|
422
541
|
end
|
423
542
|
|
543
|
+
private
|
544
|
+
|
424
545
|
def feature_type(value)
|
425
|
-
if
|
426
|
-
return OT.NominalFeature
|
427
|
-
elsif numeric? value
|
546
|
+
if OpenTox::Algorithm::numeric? value
|
428
547
|
return OT.NumericFeature
|
429
548
|
else
|
430
|
-
return OT.
|
549
|
+
return OT.NominalFeature
|
431
550
|
end
|
432
551
|
end
|
552
|
+
end
|
553
|
+
|
554
|
+
# quick hack to enable sdf import via csv
|
555
|
+
# should be refactored
|
556
|
+
class Sdf
|
557
|
+
|
558
|
+
attr_accessor :dataset
|
559
|
+
|
560
|
+
def initialize
|
561
|
+
@data = {}
|
562
|
+
|
563
|
+
@compound_errors = []
|
564
|
+
@activity_errors = []
|
565
|
+
@duplicates = {}
|
566
|
+
end
|
567
|
+
|
568
|
+
def load_sdf(sdf)
|
569
|
+
|
570
|
+
obconversion = OpenBabel::OBConversion.new
|
571
|
+
obmol = OpenBabel::OBMol.new
|
572
|
+
obconversion.set_in_and_out_formats "sdf", "inchi"
|
573
|
+
|
574
|
+
table = Table.new
|
575
|
+
|
576
|
+
properties = []
|
577
|
+
sdf.each_line { |l| properties << l.to_s if l.match(/</) }
|
578
|
+
properties.uniq!
|
579
|
+
properties.sort!
|
580
|
+
properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp }
|
581
|
+
|
582
|
+
rec = 0
|
583
|
+
sdf.split(/\$\$\$\$\r*\n/).each do |s|
|
584
|
+
rec += 1
|
585
|
+
obconversion.read_string obmol, s
|
586
|
+
begin
|
587
|
+
inchi = obconversion.write_string(obmol).gsub(/\s/,'').chomp
|
588
|
+
@duplicates[inchi] = [] unless @duplicates[inchi]
|
589
|
+
@duplicates[inchi] << rec #inchi#+", "+row.join(", ")
|
590
|
+
compound = Compound.from_inchi inchi
|
591
|
+
rescue
|
592
|
+
@compound_errors << "Could not convert structure to InChI, all entries for this compound (record #{rec} have been ignored! \n#{s}"
|
593
|
+
next
|
594
|
+
end
|
595
|
+
row = {}
|
596
|
+
obmol.get_data.each { |d| row[d.get_attribute] = d.get_value if properties.include?(d.get_attribute) }
|
597
|
+
table.data[compound.uri] = row
|
598
|
+
end
|
599
|
+
|
600
|
+
# finda and remove ignored_features
|
601
|
+
@activity_errors = table.clean_features
|
602
|
+
table.add_to_dataset @dataset
|
603
|
+
|
604
|
+
warnings = ''
|
605
|
+
warnings += "<p>Incorrect Smiles structures (ignored):</p>" + @compound_errors.join("<br/>") unless @compound_errors.empty?
|
606
|
+
warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
|
607
|
+
duplicate_warnings = ''
|
608
|
+
@duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 }
|
609
|
+
warnings += "<p>Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty?
|
610
|
+
|
611
|
+
@dataset.metadata[OT.Warnings] = warnings
|
612
|
+
@dataset
|
433
613
|
|
434
|
-
def split_row(row)
|
435
|
-
row.chomp.gsub(/["']/,'').split(/\s*[,;]\s*/) # remove quotes
|
436
614
|
end
|
437
615
|
|
438
616
|
end
|
data/lib/rest_client_wrapper.rb
CHANGED
@@ -131,13 +131,14 @@ module OpenTox
|
|
131
131
|
raise "unknown content-type for task : '"+res.content_type.to_s+"'"+" base-uri: "+base_uri.to_s+" content: "+res[0..200].to_s
|
132
132
|
end
|
133
133
|
|
134
|
-
LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion"
|
134
|
+
#LOGGER.debug "result is a task '"+task.uri.to_s+"', wait for completion"
|
135
135
|
task.wait_for_completion waiting_task
|
136
136
|
unless task.completed? # maybe task was cancelled / error
|
137
137
|
if task.errorReport
|
138
138
|
received_error task.errorReport, task.http_code, nil, {:rest_uri => task.uri, :rest_code => task.http_code}
|
139
139
|
else
|
140
|
-
raise "task
|
140
|
+
raise "status of task '"+task.uri.to_s+"' is no longer running (hasStatus is '"+task.status+
|
141
|
+
"'), but it is neither completed nor has an errorReport"
|
141
142
|
end
|
142
143
|
end
|
143
144
|
|