opentox-ruby 3.1.0 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +19 -9
- data/README.markdown +1 -1
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/lib/algorithm.rb +143 -37
- data/lib/compound.rb +66 -18
- data/lib/dataset.rb +38 -3
- data/lib/model.rb +36 -13
- data/lib/parser.rb +34 -19
- data/lib/r-util.rb +93 -34
- data/lib/serializer.rb +70 -22
- data/lib/stratification.R +71 -7
- data/lib/transform.rb +5 -3
- data/lib/utils.rb +356 -97
- data/lib/validation.rb +6 -4
- metadata +20 -4
data/ChangeLog
CHANGED
@@ -1,13 +1,23 @@
|
|
1
|
+
v4.0.0 2012-07-12
|
2
|
+
* fminer addition of compounds fixed
|
3
|
+
* improved performance for CSV download
|
4
|
+
* switch to opentox-ruby version 4.0.0
|
5
|
+
|
6
|
+
2012-04-20
|
7
|
+
* Support for joelib and openbabel descriptors in a completely unified interface with CDK (Ambit)
|
8
|
+
* Features can have multiple types (nominal and numeric), PC descriptors have detailed meta data
|
9
|
+
* Myriads of bugfixes to CSV download code (e.g. missing descriptors, handling of duplicates)
|
10
|
+
|
1
11
|
v3.1.0 2012-02-24
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
gauss() removed
|
12
|
+
* utils.rb: added for special routines (e.g. descriptor calculation)
|
13
|
+
* task.rb: Polling with increasing interval
|
14
|
+
* parser.rb: CSV up and download fixed
|
15
|
+
* transform.rb: routines to create machine learning data matrices
|
16
|
+
* algorithm.rb: SVM parameter grid search, cos similarity as algorithm, gauss() removed
|
8
17
|
|
9
18
|
v3.0.1 2011-10-19
|
10
|
-
|
11
|
-
|
19
|
+
* feature: model registration to ontology service
|
20
|
+
* ontology lib gets endpoints from ontology service
|
21
|
+
|
12
22
|
v3.0.0 2011-09-23
|
13
|
-
|
23
|
+
* datasets stored as json (with Yajl) to improve performance
|
data/README.markdown
CHANGED
@@ -38,4 +38,4 @@ This example shows how to create a lazar model and predict a compound, it assume
|
|
38
38
|
Copyright
|
39
39
|
---------
|
40
40
|
|
41
|
-
Copyright (c) 2009-
|
41
|
+
Copyright (c) 2009-2012 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -42,9 +42,10 @@ begin
|
|
42
42
|
gem.add_dependency "dm-migrations", "=1.1.0"
|
43
43
|
gem.add_dependency "dm-validations", "=1.1.0"
|
44
44
|
gem.add_dependency "dm-sqlite-adapter", "=1.1.0"
|
45
|
-
gem.add_dependency "ruby-plot", "=0.6.
|
45
|
+
gem.add_dependency "ruby-plot", "=0.6.1"
|
46
46
|
gem.add_dependency "gsl", "=1.14.7"
|
47
47
|
gem.add_dependency "statsample", "=1.1.0"
|
48
|
+
gem.add_dependency "redis", "=2.2.2"
|
48
49
|
|
49
50
|
gem.add_development_dependency 'jeweler'
|
50
51
|
gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
4.0.0
|
data/lib/algorithm.rb
CHANGED
@@ -56,25 +56,73 @@ module OpenTox
|
|
56
56
|
|
57
57
|
def check_params(params,per_mil,subjectid=nil)
|
58
58
|
raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
|
59
|
-
raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
|
60
|
-
@prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid
|
61
59
|
@training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid
|
60
|
+
|
61
|
+
unless params[:prediction_feature] # try to read prediction_feature from dataset
|
62
|
+
raise OpenTox::NotFoundError.new "Please provide a prediction_feature parameter" unless @training_dataset.features.size == 1
|
63
|
+
prediction_feature = OpenTox::Feature.find(@training_dataset.features.keys.first,@subjectid)
|
64
|
+
params[:prediction_feature] = prediction_feature.uri
|
65
|
+
end
|
66
|
+
@prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid
|
67
|
+
|
62
68
|
raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless @training_dataset.features and @training_dataset.features.include?(params[:prediction_feature])
|
63
69
|
|
64
70
|
unless params[:min_frequency].nil?
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
71
|
+
# check for percentage
|
72
|
+
if params[:min_frequency].include? "pc"
|
73
|
+
per_mil=params[:min_frequency].gsub(/pc/,"")
|
74
|
+
if OpenTox::Algorithm.numeric? per_mil
|
75
|
+
per_mil = per_mil.to_i * 10
|
76
|
+
else
|
77
|
+
bad_request=true
|
78
|
+
end
|
79
|
+
# check for per-mil
|
80
|
+
elsif params[:min_frequency].include? "pm"
|
81
|
+
per_mil=params[:min_frequency].gsub(/pm/,"")
|
82
|
+
if OpenTox::Algorithm.numeric? per_mil
|
83
|
+
per_mil = per_mil.to_i
|
84
|
+
else
|
85
|
+
bad_request=true
|
86
|
+
end
|
87
|
+
# set minfreq directly
|
88
|
+
else
|
89
|
+
if OpenTox::Algorithm.numeric? params[:min_frequency]
|
90
|
+
@minfreq=params[:min_frequency].to_i
|
91
|
+
LOGGER.debug "min_frequency #{@minfreq}"
|
92
|
+
else
|
93
|
+
bad_request=true
|
94
|
+
end
|
95
|
+
end
|
96
|
+
raise OpenTox::BadRequestError.new "Minimum frequency must be integer [n], or a percentage [n]pc, or a per-mil [n]pm , with n greater 0" if bad_request
|
97
|
+
end
|
98
|
+
if @minfreq.nil?
|
99
|
+
@minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,per_mil)
|
100
|
+
LOGGER.debug "min_frequency #{@minfreq} (input was #{per_mil} per-mil)"
|
69
101
|
end
|
70
102
|
end
|
71
103
|
|
72
|
-
def add_fminer_data(fminer_instance,
|
104
|
+
def add_fminer_data(fminer_instance, value_map)
|
105
|
+
|
106
|
+
|
107
|
+
# detect nr duplicates per compound
|
108
|
+
compound_sizes = {}
|
109
|
+
@training_dataset.compounds.each do |compound|
|
110
|
+
entries=@training_dataset.data_entries[compound]
|
111
|
+
entries.each do |feature, values|
|
112
|
+
compound_sizes[compound] || compound_sizes[compound] = []
|
113
|
+
compound_sizes[compound] << values.size unless values.size == 0
|
114
|
+
end
|
115
|
+
compound_sizes[compound].uniq!
|
116
|
+
raise "Inappropriate data for fminer" if compound_sizes[compound].size > 1
|
117
|
+
compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array
|
118
|
+
end
|
73
119
|
|
74
120
|
id = 1 # fminer start id is not 0
|
75
|
-
|
121
|
+
|
122
|
+
@training_dataset.compounds.each do |compound|
|
123
|
+
entry=@training_dataset.data_entries[compound]
|
76
124
|
begin
|
77
|
-
smiles = OpenTox::Compound.
|
125
|
+
smiles = OpenTox::Compound.new(compound).to_smiles
|
78
126
|
rescue
|
79
127
|
LOGGER.warn "No resource for #{compound.to_s}"
|
80
128
|
next
|
@@ -84,32 +132,31 @@ module OpenTox
|
|
84
132
|
next
|
85
133
|
end
|
86
134
|
|
87
|
-
value_map=params[:value_map] unless params[:value_map].nil?
|
88
135
|
entry.each do |feature,values|
|
89
136
|
if feature == @prediction_feature.uri
|
90
|
-
|
91
|
-
if
|
137
|
+
(0...compound_sizes[compound]).each { |i|
|
138
|
+
if values[i].nil?
|
92
139
|
LOGGER.warn "No #{feature} activity for #{compound.to_s}."
|
93
140
|
else
|
94
141
|
if @prediction_feature.feature_type == "classification"
|
95
|
-
activity= value_map.invert[
|
142
|
+
activity= value_map.invert[values[i]].to_i # activities are mapped to 1..n
|
96
143
|
@db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect
|
97
144
|
elsif @prediction_feature.feature_type == "regression"
|
98
|
-
activity=
|
145
|
+
activity= values[i].to_f
|
99
146
|
end
|
100
147
|
begin
|
101
|
-
fminer_instance.AddCompound(smiles,id)
|
102
|
-
fminer_instance.AddActivity(activity, id)
|
148
|
+
fminer_instance.AddCompound(smiles,id) if fminer_instance
|
149
|
+
fminer_instance.AddActivity(activity, id) if fminer_instance
|
103
150
|
@all_activities[id]=activity # DV: insert global information
|
104
151
|
@compounds[id] = compound
|
105
152
|
@smi[id] = smiles
|
106
153
|
id += 1
|
107
154
|
rescue Exception => e
|
108
|
-
LOGGER.warn "Could not add " + smiles + "\t" +
|
155
|
+
LOGGER.warn "Could not add " + smiles + "\t" + values[i].to_s + " to fminer"
|
109
156
|
LOGGER.warn e.backtrace
|
110
157
|
end
|
111
158
|
end
|
112
|
-
|
159
|
+
}
|
113
160
|
end
|
114
161
|
end
|
115
162
|
end
|
@@ -380,11 +427,11 @@ module OpenTox
|
|
380
427
|
prediction = acts[0]
|
381
428
|
else
|
382
429
|
#LOGGER.debug gram_matrix.to_yaml
|
383
|
-
@r = RinRuby.new(
|
384
|
-
@r.eval "set.seed(1)"
|
430
|
+
@r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests
|
385
431
|
@r.eval "suppressPackageStartupMessages(library('caret'))" # requires R packages "caret" and "kernlab"
|
386
432
|
@r.eval "suppressPackageStartupMessages(library('doMC'))" # requires R packages "multicore"
|
387
433
|
@r.eval "registerDoMC()" # switch on parallel processing
|
434
|
+
@r.eval "set.seed(1)"
|
388
435
|
begin
|
389
436
|
|
390
437
|
# set data
|
@@ -400,7 +447,14 @@ module OpenTox
|
|
400
447
|
|
401
448
|
# prepare data
|
402
449
|
LOGGER.debug "Preparing R data ..."
|
403
|
-
@r.eval
|
450
|
+
@r.eval <<-EOR
|
451
|
+
weights=NULL
|
452
|
+
if (class(y) == 'character') {
|
453
|
+
y = factor(y)
|
454
|
+
suppressPackageStartupMessages(library('class'))
|
455
|
+
#weights=unlist(as.list(prop.table(table(y))))
|
456
|
+
}
|
457
|
+
EOR
|
404
458
|
|
405
459
|
@r.eval <<-EOR
|
406
460
|
rem = nearZeroVar(prop_matrix)
|
@@ -417,8 +471,18 @@ module OpenTox
|
|
417
471
|
|
418
472
|
# model + support vectors
|
419
473
|
LOGGER.debug "Creating R SVM model ..."
|
420
|
-
@r.eval <<-EOR
|
421
|
-
|
474
|
+
train_success = @r.eval <<-EOR
|
475
|
+
# AM: TODO: evaluate class weight effect by altering:
|
476
|
+
# AM: comment in 'weights' above run and class.weights=weights vs. class.weights=1-weights
|
477
|
+
# AM: vs
|
478
|
+
# AM: comment out 'weights' above (status quo), thereby disabling weights
|
479
|
+
model = train(prop_matrix,y,
|
480
|
+
method="svmradial",
|
481
|
+
preProcess=c("center", "scale"),
|
482
|
+
class.weights=weights,
|
483
|
+
trControl=trainControl(method="LGOCV",number=10),
|
484
|
+
tuneLength=8
|
485
|
+
)
|
422
486
|
perf = ifelse ( class(y)!='numeric', max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
|
423
487
|
EOR
|
424
488
|
|
@@ -431,6 +495,7 @@ module OpenTox
|
|
431
495
|
|
432
496
|
# censoring
|
433
497
|
prediction = nil if ( @r.perf.nan? || @r.perf < min_train_performance )
|
498
|
+
prediction = nil unless train_success
|
434
499
|
LOGGER.debug "Performance: #{sprintf("%.2f", @r.perf)}"
|
435
500
|
rescue Exception => e
|
436
501
|
LOGGER.debug "#{e.class}: #{e.message}"
|
@@ -456,30 +521,42 @@ module OpenTox
|
|
456
521
|
@r.del_missing = params[:del_missing] == true ? 1 : 0
|
457
522
|
r_result_file = params[:fds_csv_file].sub("rfe_", "rfe_R_")
|
458
523
|
@r.f_fds_r = r_result_file.to_s
|
459
|
-
|
524
|
+
|
460
525
|
# need packs 'randomForest', 'RANN'
|
461
526
|
@r.eval <<-EOR
|
462
|
-
set.seed(1)
|
463
527
|
suppressPackageStartupMessages(library('caret'))
|
464
528
|
suppressPackageStartupMessages(library('randomForest'))
|
465
529
|
suppressPackageStartupMessages(library('RANN'))
|
466
530
|
suppressPackageStartupMessages(library('doMC'))
|
467
531
|
registerDoMC()
|
468
|
-
|
532
|
+
set.seed(1)
|
533
|
+
|
469
534
|
acts = read.csv(ds_csv_file, check.names=F)
|
470
535
|
feats = read.csv(fds_csv_file, check.names=F)
|
471
536
|
ds = merge(acts, feats, by="SMILES") # duplicates features for duplicate SMILES :-)
|
472
|
-
|
537
|
+
|
473
538
|
features = ds[,(dim(acts)[2]+1):(dim(ds)[2])]
|
474
539
|
y = ds[,which(names(ds) == prediction_feature)]
|
475
|
-
|
540
|
+
|
476
541
|
# assumes a data matrix 'features' and a vector 'y' of target values
|
477
542
|
row.names(features)=NULL
|
478
|
-
|
543
|
+
|
544
|
+
# features with all values missing removed
|
545
|
+
na_col = names ( which ( apply ( features, 2, function(x) all ( is.na ( x ) ) ) ) )
|
546
|
+
features = features[,!names(features) %in% na_col]
|
547
|
+
|
548
|
+
# features with infinite values removed
|
549
|
+
inf_col = names ( which ( apply ( features, 2, function(x) any ( is.infinite ( x ) ) ) ) )
|
550
|
+
features = features[,!names(features) %in% inf_col]
|
551
|
+
|
552
|
+
# features with zero variance removed
|
553
|
+
zero_var = names ( which ( apply ( features, 2, function(x) var(x, na.rm=T) ) == 0 ) )
|
554
|
+
features = features[,!names(features) %in% zero_var]
|
555
|
+
|
479
556
|
pp = NULL
|
480
557
|
if (del_missing) {
|
481
558
|
# needed if rows should be removed
|
482
|
-
na_ids = apply(features,1,function(x)any(is.na(x)))
|
559
|
+
na_ids = apply ( features,1,function(x) any ( is.na ( x ) ) )
|
483
560
|
features = features[!na_ids,]
|
484
561
|
y = y[!na_ids]
|
485
562
|
pp = preProcess(features, method=c("scale", "center"))
|
@@ -488,17 +565,23 @@ module OpenTox
|
|
488
565
|
pp = preProcess(features, method=c("scale", "center", "knnImpute"))
|
489
566
|
}
|
490
567
|
features = predict(pp, features)
|
491
|
-
|
568
|
+
|
569
|
+
# features with nan values removed (sometimes preProcess return NaN values)
|
570
|
+
nan_col = names ( which ( apply ( features, 2, function(x) any ( is.nan ( x ) ) ) ) )
|
571
|
+
features = features[,!names(features) %in% nan_col]
|
572
|
+
|
492
573
|
# determine subsets
|
493
|
-
subsets = dim(features)[2]*c(0.
|
494
|
-
subsets = c(2,3,4,5,7
|
574
|
+
subsets = dim(features)[2]*c(0.3, 0.32, 0.34, 0.36, 0.38, 0.4, 0.42, 0.44, 0.46, 0.48, 0.5, 0.52, 0.54, 0.56, 0.58, 0.6, 0.62, 0.64, 0.66, 0.68, 0.7)
|
575
|
+
#subsets = dim(features)[2]*c(0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7)
|
576
|
+
#subsets = c(2,3,4,5,7,10,subsets)
|
577
|
+
#subsets = c(2,3,4,5,7,10,13,16,19,22,25,28,30)
|
495
578
|
subsets = unique(sort(round(subsets)))
|
496
579
|
subsets = subsets[subsets<=dim(features)[2]]
|
497
580
|
subsets = subsets[subsets>1]
|
498
|
-
|
581
|
+
|
499
582
|
# Recursive feature elimination
|
500
|
-
rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=
|
501
|
-
|
583
|
+
rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=150), sizes=subsets)
|
584
|
+
|
502
585
|
# read existing dataset and select most useful features
|
503
586
|
csv=feats[,c("SMILES", rfProfile$optVariables)]
|
504
587
|
write.csv(x=csv,file=f_fds_r, row.names=F, quote=F, na='')
|
@@ -527,7 +610,7 @@ module OpenTox
|
|
527
610
|
# @param [Hash] required keys: compound, features, feature_dataset_uri, pc_type
|
528
611
|
# @return [Hash] Hash with matching Smarts and number of hits
|
529
612
|
def self.lookup(params)
|
530
|
-
params[:compound].lookup(params[:features], params[:feature_dataset_uri],params[:pc_type],params[:subjectid])
|
613
|
+
params[:compound].lookup(params[:features], params[:feature_dataset_uri], params[:pc_type], params[:lib], params[:subjectid])
|
531
614
|
end
|
532
615
|
end
|
533
616
|
|
@@ -539,3 +622,26 @@ module OpenTox
|
|
539
622
|
end
|
540
623
|
end
|
541
624
|
end
|
625
|
+
|
626
|
+
class Array
|
627
|
+
# collect method extended for parallel processing.
|
628
|
+
# Note: assign return value as: ans = arr.pcollect(n) { |obj| ... }
|
629
|
+
# @param n the number of processes to spawn (default: unlimited)
|
630
|
+
def pcollect(n = nil)
|
631
|
+
nproc = 0
|
632
|
+
result = collect do |*a|
|
633
|
+
r, w = IO.pipe
|
634
|
+
fork do
|
635
|
+
r.close
|
636
|
+
w.write( Marshal.dump( yield(*a) ) )
|
637
|
+
end
|
638
|
+
if n and (nproc+=1) >= n
|
639
|
+
Process.wait ; nproc -= 1
|
640
|
+
end
|
641
|
+
[ w.close, r ].last
|
642
|
+
end
|
643
|
+
Process.waitall
|
644
|
+
result.collect{|r| Marshal.load [ r.read, r.close ].first}
|
645
|
+
end
|
646
|
+
end
|
647
|
+
|
data/lib/compound.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
module OpenTox
|
5
5
|
|
6
|
+
require "rexml/document"
|
6
7
|
# Ruby wrapper for OpenTox Compound Webservices (http://opentox.org/dev/apis/api-1.2/structure).
|
7
8
|
class Compound
|
8
9
|
|
@@ -17,16 +18,20 @@ module OpenTox
|
|
17
18
|
# @return [Compound] Compound
|
18
19
|
def initialize(uri=nil)
|
19
20
|
@uri = uri
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
if (@uri =~ URI::regexp) || @uri.nil?
|
22
|
+
case @uri
|
23
|
+
when /InChI/ # shortcut for IST services
|
24
|
+
@inchi = @uri.sub(/^.*InChI/, 'InChI')
|
25
|
+
else
|
26
|
+
@inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri
|
27
|
+
end
|
28
|
+
|
29
|
+
if @uri and @inchi.to_s.size==0
|
30
|
+
LOGGER.warn "REMOVE ABMIT HACK: no inchi for compound "+@uri.to_s+", load via smiles"
|
31
|
+
@inchi = Compound.smiles2inchi(Compound.smiles(@uri))
|
32
|
+
end
|
23
33
|
else
|
24
|
-
|
25
|
-
end
|
26
|
-
|
27
|
-
if @uri and @inchi.to_s.size==0
|
28
|
-
LOGGER.warn "REMOVE ABMIT HACK: no inchi for compound "+@uri.to_s+", load via smiles"
|
29
|
-
@inchi = Compound.smiles2inchi(Compound.smiles(@uri))
|
34
|
+
raise "Not able to create compound with uri: #{@uri}"
|
30
35
|
end
|
31
36
|
end
|
32
37
|
|
@@ -130,6 +135,47 @@ module OpenTox
|
|
130
135
|
"not available"
|
131
136
|
end
|
132
137
|
end
|
138
|
+
|
139
|
+
|
140
|
+
# Get all known compound names sorted by classification. Relies on an external service for name lookups.
|
141
|
+
# @example
|
142
|
+
# names = compound.to_names_hash
|
143
|
+
# @return [Hash] Classification => Name Array
|
144
|
+
def to_names_hash
|
145
|
+
begin
|
146
|
+
xml = RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/names/xml")
|
147
|
+
xmldoc = REXML::Document.new(xml)
|
148
|
+
data = {}
|
149
|
+
|
150
|
+
xmldoc.root.elements[1].elements.each{|e|
|
151
|
+
if data.has_key?(e.attribute("classification").value) == false
|
152
|
+
data[e.attribute("classification").value] = [e.text]
|
153
|
+
else
|
154
|
+
data[e.attribute("classification").value].push(e.text)
|
155
|
+
end
|
156
|
+
}
|
157
|
+
data
|
158
|
+
rescue
|
159
|
+
"not available"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# Get all known compound names sorted by classification. Relies on an external service for name lookups.
|
164
|
+
# @example
|
165
|
+
# names = compound.to_names_hash
|
166
|
+
# @return [Hash] Classification => Name Array
|
167
|
+
def to_ambit_names_hash
|
168
|
+
begin
|
169
|
+
ds = OpenTox::Dataset.new
|
170
|
+
ds.save
|
171
|
+
ds.load_rdfxml(RestClientWrapper.get("http://apps.ideaconsult.net:8080/ambit2/query/compound/search/names?type=smiles&property=&search=#{@inchi}"))
|
172
|
+
ds.save
|
173
|
+
ds.uri
|
174
|
+
rescue
|
175
|
+
"not available"
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
133
179
|
|
134
180
|
# Match a smarts string
|
135
181
|
# @example
|
@@ -197,25 +243,28 @@ module OpenTox
|
|
197
243
|
# Lookup numerical values, returns hash with feature name as key and value as value
|
198
244
|
# @param [Array] Array of feature names
|
199
245
|
# @param [String] Feature dataset uri
|
246
|
+
# @param [String] Comma separated pc types
|
247
|
+
# @param [String] Comma separated lib
|
200
248
|
# @return [Hash] Hash with feature name as key and value as value
|
201
|
-
|
249
|
+
def lookup(feature_array,feature_dataset_uri,pc_type,lib,subjectid=nil)
|
202
250
|
ds = OpenTox::Dataset.find(feature_dataset_uri,subjectid)
|
203
251
|
#entry = ds.data_entries[self.uri]
|
204
252
|
entry = nil
|
205
|
-
ds.data_entries.each { |c_uri, values|
|
206
|
-
|
207
|
-
|
253
|
+
ds.data_entries.each { |c_uri, values|
|
254
|
+
compound = OpenTox::Compound.new(c_uri)
|
255
|
+
if compound.to_inchi == self.to_inchi # Compare compounds by InChI
|
256
|
+
entry = ds.data_entries[c_uri]
|
208
257
|
break
|
209
258
|
end
|
210
259
|
}
|
211
260
|
LOGGER.debug "#{entry.size} entries in feature ds for query." unless entry.nil?
|
212
|
-
|
213
261
|
if entry.nil?
|
214
|
-
|
215
|
-
uri =
|
216
|
-
ds = OpenTox::Dataset.find(uri,subjectid)
|
262
|
+
temp_ds = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"],subjectid); temp_ds.add_compound(self.uri); temp_uri = temp_ds.save(subjectid)
|
263
|
+
uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-algorithm"], "/pc/AllDescriptors"), {:dataset_uri => temp_uri, :pc_type => pc_type, :lib => lib, :subjectid => subjectid})
|
264
|
+
ds = OpenTox::Dataset.find(uri, subjectid)
|
217
265
|
entry = ds.data_entries[self.uri]
|
218
266
|
ds.delete(subjectid)
|
267
|
+
temp_ds.delete(subjectid)
|
219
268
|
end
|
220
269
|
features = entry.keys
|
221
270
|
features.each { |feature|
|
@@ -224,7 +273,6 @@ module OpenTox
|
|
224
273
|
entry.delete(feature) unless feature == new_feature # e.g. when loading from ambit
|
225
274
|
}
|
226
275
|
#res = feature_array.collect {|v| entry[v]}
|
227
|
-
#LOGGER.debug "----- am #{entry.to_yaml}"
|
228
276
|
entry
|
229
277
|
end
|
230
278
|
|