opentox-ruby 3.1.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +19 -9
- data/README.markdown +1 -1
- data/Rakefile +2 -1
- data/VERSION +1 -1
- data/lib/algorithm.rb +143 -37
- data/lib/compound.rb +66 -18
- data/lib/dataset.rb +38 -3
- data/lib/model.rb +36 -13
- data/lib/parser.rb +34 -19
- data/lib/r-util.rb +93 -34
- data/lib/serializer.rb +70 -22
- data/lib/stratification.R +71 -7
- data/lib/transform.rb +5 -3
- data/lib/utils.rb +356 -97
- data/lib/validation.rb +6 -4
- metadata +20 -4
data/ChangeLog
CHANGED
@@ -1,13 +1,23 @@
|
|
1
|
+
v4.0.0 2012-07-12
|
2
|
+
* fminer addition of compounds fixed
|
3
|
+
* improved performance for CSV download
|
4
|
+
* switch to opentox-ruby version 4.0.0
|
5
|
+
|
6
|
+
2012-04-20
|
7
|
+
* Support for joelib and openbabel descriptors in a completely unified interface with CDK (Ambit)
|
8
|
+
* Features can have multiple types (nominal and numeric), PC descriptors have detailed meta data
|
9
|
+
* Myriads of bugfixes to CSV download code (e.g. missing descriptors, handling of duplicates)
|
10
|
+
|
1
11
|
v3.1.0 2012-02-24
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
gauss() removed
|
12
|
+
* utils.rb: added for special routines (e.g. descriptor calculation)
|
13
|
+
* task.rb: Polling with increasing interval
|
14
|
+
* parser.rb: CSV up and download fixed
|
15
|
+
* transform.rb: routines to create machine learning data matrices
|
16
|
+
* algorithm.rb: SVM parameter grid search, cos similarity as algorithm, gauss() removed
|
8
17
|
|
9
18
|
v3.0.1 2011-10-19
|
10
|
-
|
11
|
-
|
19
|
+
* feature: model registration to ontology service
|
20
|
+
* ontology lib gets endpoints from ontology service
|
21
|
+
|
12
22
|
v3.0.0 2011-09-23
|
13
|
-
|
23
|
+
* datasets stored as json (with Yajl) to improve performance
|
data/README.markdown
CHANGED
@@ -38,4 +38,4 @@ This example shows how to create a lazar model and predict a compound, it assume
|
|
38
38
|
Copyright
|
39
39
|
---------
|
40
40
|
|
41
|
-
Copyright (c) 2009-
|
41
|
+
Copyright (c) 2009-2012 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -42,9 +42,10 @@ begin
|
|
42
42
|
gem.add_dependency "dm-migrations", "=1.1.0"
|
43
43
|
gem.add_dependency "dm-validations", "=1.1.0"
|
44
44
|
gem.add_dependency "dm-sqlite-adapter", "=1.1.0"
|
45
|
-
gem.add_dependency "ruby-plot", "=0.6.
|
45
|
+
gem.add_dependency "ruby-plot", "=0.6.1"
|
46
46
|
gem.add_dependency "gsl", "=1.14.7"
|
47
47
|
gem.add_dependency "statsample", "=1.1.0"
|
48
|
+
gem.add_dependency "redis", "=2.2.2"
|
48
49
|
|
49
50
|
gem.add_development_dependency 'jeweler'
|
50
51
|
gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
4.0.0
|
data/lib/algorithm.rb
CHANGED
@@ -56,25 +56,73 @@ module OpenTox
|
|
56
56
|
|
57
57
|
def check_params(params,per_mil,subjectid=nil)
|
58
58
|
raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and !params[:dataset_uri].nil?
|
59
|
-
raise OpenTox::NotFoundError.new "Please submit a prediction_feature." unless params[:prediction_feature] and !params[:prediction_feature].nil?
|
60
|
-
@prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid
|
61
59
|
@training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid
|
60
|
+
|
61
|
+
unless params[:prediction_feature] # try to read prediction_feature from dataset
|
62
|
+
raise OpenTox::NotFoundError.new "Please provide a prediction_feature parameter" unless @training_dataset.features.size == 1
|
63
|
+
prediction_feature = OpenTox::Feature.find(@training_dataset.features.keys.first,@subjectid)
|
64
|
+
params[:prediction_feature] = prediction_feature.uri
|
65
|
+
end
|
66
|
+
@prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid
|
67
|
+
|
62
68
|
raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless @training_dataset.features and @training_dataset.features.include?(params[:prediction_feature])
|
63
69
|
|
64
70
|
unless params[:min_frequency].nil?
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
71
|
+
# check for percentage
|
72
|
+
if params[:min_frequency].include? "pc"
|
73
|
+
per_mil=params[:min_frequency].gsub(/pc/,"")
|
74
|
+
if OpenTox::Algorithm.numeric? per_mil
|
75
|
+
per_mil = per_mil.to_i * 10
|
76
|
+
else
|
77
|
+
bad_request=true
|
78
|
+
end
|
79
|
+
# check for per-mil
|
80
|
+
elsif params[:min_frequency].include? "pm"
|
81
|
+
per_mil=params[:min_frequency].gsub(/pm/,"")
|
82
|
+
if OpenTox::Algorithm.numeric? per_mil
|
83
|
+
per_mil = per_mil.to_i
|
84
|
+
else
|
85
|
+
bad_request=true
|
86
|
+
end
|
87
|
+
# set minfreq directly
|
88
|
+
else
|
89
|
+
if OpenTox::Algorithm.numeric? params[:min_frequency]
|
90
|
+
@minfreq=params[:min_frequency].to_i
|
91
|
+
LOGGER.debug "min_frequency #{@minfreq}"
|
92
|
+
else
|
93
|
+
bad_request=true
|
94
|
+
end
|
95
|
+
end
|
96
|
+
raise OpenTox::BadRequestError.new "Minimum frequency must be integer [n], or a percentage [n]pc, or a per-mil [n]pm , with n greater 0" if bad_request
|
97
|
+
end
|
98
|
+
if @minfreq.nil?
|
99
|
+
@minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,per_mil)
|
100
|
+
LOGGER.debug "min_frequency #{@minfreq} (input was #{per_mil} per-mil)"
|
69
101
|
end
|
70
102
|
end
|
71
103
|
|
72
|
-
def add_fminer_data(fminer_instance,
|
104
|
+
def add_fminer_data(fminer_instance, value_map)
|
105
|
+
|
106
|
+
|
107
|
+
# detect nr duplicates per compound
|
108
|
+
compound_sizes = {}
|
109
|
+
@training_dataset.compounds.each do |compound|
|
110
|
+
entries=@training_dataset.data_entries[compound]
|
111
|
+
entries.each do |feature, values|
|
112
|
+
compound_sizes[compound] || compound_sizes[compound] = []
|
113
|
+
compound_sizes[compound] << values.size unless values.size == 0
|
114
|
+
end
|
115
|
+
compound_sizes[compound].uniq!
|
116
|
+
raise "Inappropriate data for fminer" if compound_sizes[compound].size > 1
|
117
|
+
compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array
|
118
|
+
end
|
73
119
|
|
74
120
|
id = 1 # fminer start id is not 0
|
75
|
-
|
121
|
+
|
122
|
+
@training_dataset.compounds.each do |compound|
|
123
|
+
entry=@training_dataset.data_entries[compound]
|
76
124
|
begin
|
77
|
-
smiles = OpenTox::Compound.
|
125
|
+
smiles = OpenTox::Compound.new(compound).to_smiles
|
78
126
|
rescue
|
79
127
|
LOGGER.warn "No resource for #{compound.to_s}"
|
80
128
|
next
|
@@ -84,32 +132,31 @@ module OpenTox
|
|
84
132
|
next
|
85
133
|
end
|
86
134
|
|
87
|
-
value_map=params[:value_map] unless params[:value_map].nil?
|
88
135
|
entry.each do |feature,values|
|
89
136
|
if feature == @prediction_feature.uri
|
90
|
-
|
91
|
-
if
|
137
|
+
(0...compound_sizes[compound]).each { |i|
|
138
|
+
if values[i].nil?
|
92
139
|
LOGGER.warn "No #{feature} activity for #{compound.to_s}."
|
93
140
|
else
|
94
141
|
if @prediction_feature.feature_type == "classification"
|
95
|
-
activity= value_map.invert[
|
142
|
+
activity= value_map.invert[values[i]].to_i # activities are mapped to 1..n
|
96
143
|
@db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect
|
97
144
|
elsif @prediction_feature.feature_type == "regression"
|
98
|
-
activity=
|
145
|
+
activity= values[i].to_f
|
99
146
|
end
|
100
147
|
begin
|
101
|
-
fminer_instance.AddCompound(smiles,id)
|
102
|
-
fminer_instance.AddActivity(activity, id)
|
148
|
+
fminer_instance.AddCompound(smiles,id) if fminer_instance
|
149
|
+
fminer_instance.AddActivity(activity, id) if fminer_instance
|
103
150
|
@all_activities[id]=activity # DV: insert global information
|
104
151
|
@compounds[id] = compound
|
105
152
|
@smi[id] = smiles
|
106
153
|
id += 1
|
107
154
|
rescue Exception => e
|
108
|
-
LOGGER.warn "Could not add " + smiles + "\t" +
|
155
|
+
LOGGER.warn "Could not add " + smiles + "\t" + values[i].to_s + " to fminer"
|
109
156
|
LOGGER.warn e.backtrace
|
110
157
|
end
|
111
158
|
end
|
112
|
-
|
159
|
+
}
|
113
160
|
end
|
114
161
|
end
|
115
162
|
end
|
@@ -380,11 +427,11 @@ module OpenTox
|
|
380
427
|
prediction = acts[0]
|
381
428
|
else
|
382
429
|
#LOGGER.debug gram_matrix.to_yaml
|
383
|
-
@r = RinRuby.new(
|
384
|
-
@r.eval "set.seed(1)"
|
430
|
+
@r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests
|
385
431
|
@r.eval "suppressPackageStartupMessages(library('caret'))" # requires R packages "caret" and "kernlab"
|
386
432
|
@r.eval "suppressPackageStartupMessages(library('doMC'))" # requires R packages "multicore"
|
387
433
|
@r.eval "registerDoMC()" # switch on parallel processing
|
434
|
+
@r.eval "set.seed(1)"
|
388
435
|
begin
|
389
436
|
|
390
437
|
# set data
|
@@ -400,7 +447,14 @@ module OpenTox
|
|
400
447
|
|
401
448
|
# prepare data
|
402
449
|
LOGGER.debug "Preparing R data ..."
|
403
|
-
@r.eval
|
450
|
+
@r.eval <<-EOR
|
451
|
+
weights=NULL
|
452
|
+
if (class(y) == 'character') {
|
453
|
+
y = factor(y)
|
454
|
+
suppressPackageStartupMessages(library('class'))
|
455
|
+
#weights=unlist(as.list(prop.table(table(y))))
|
456
|
+
}
|
457
|
+
EOR
|
404
458
|
|
405
459
|
@r.eval <<-EOR
|
406
460
|
rem = nearZeroVar(prop_matrix)
|
@@ -417,8 +471,18 @@ module OpenTox
|
|
417
471
|
|
418
472
|
# model + support vectors
|
419
473
|
LOGGER.debug "Creating R SVM model ..."
|
420
|
-
@r.eval <<-EOR
|
421
|
-
|
474
|
+
train_success = @r.eval <<-EOR
|
475
|
+
# AM: TODO: evaluate class weight effect by altering:
|
476
|
+
# AM: comment in 'weights' above run and class.weights=weights vs. class.weights=1-weights
|
477
|
+
# AM: vs
|
478
|
+
# AM: comment out 'weights' above (status quo), thereby disabling weights
|
479
|
+
model = train(prop_matrix,y,
|
480
|
+
method="svmradial",
|
481
|
+
preProcess=c("center", "scale"),
|
482
|
+
class.weights=weights,
|
483
|
+
trControl=trainControl(method="LGOCV",number=10),
|
484
|
+
tuneLength=8
|
485
|
+
)
|
422
486
|
perf = ifelse ( class(y)!='numeric', max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
|
423
487
|
EOR
|
424
488
|
|
@@ -431,6 +495,7 @@ module OpenTox
|
|
431
495
|
|
432
496
|
# censoring
|
433
497
|
prediction = nil if ( @r.perf.nan? || @r.perf < min_train_performance )
|
498
|
+
prediction = nil unless train_success
|
434
499
|
LOGGER.debug "Performance: #{sprintf("%.2f", @r.perf)}"
|
435
500
|
rescue Exception => e
|
436
501
|
LOGGER.debug "#{e.class}: #{e.message}"
|
@@ -456,30 +521,42 @@ module OpenTox
|
|
456
521
|
@r.del_missing = params[:del_missing] == true ? 1 : 0
|
457
522
|
r_result_file = params[:fds_csv_file].sub("rfe_", "rfe_R_")
|
458
523
|
@r.f_fds_r = r_result_file.to_s
|
459
|
-
|
524
|
+
|
460
525
|
# need packs 'randomForest', 'RANN'
|
461
526
|
@r.eval <<-EOR
|
462
|
-
set.seed(1)
|
463
527
|
suppressPackageStartupMessages(library('caret'))
|
464
528
|
suppressPackageStartupMessages(library('randomForest'))
|
465
529
|
suppressPackageStartupMessages(library('RANN'))
|
466
530
|
suppressPackageStartupMessages(library('doMC'))
|
467
531
|
registerDoMC()
|
468
|
-
|
532
|
+
set.seed(1)
|
533
|
+
|
469
534
|
acts = read.csv(ds_csv_file, check.names=F)
|
470
535
|
feats = read.csv(fds_csv_file, check.names=F)
|
471
536
|
ds = merge(acts, feats, by="SMILES") # duplicates features for duplicate SMILES :-)
|
472
|
-
|
537
|
+
|
473
538
|
features = ds[,(dim(acts)[2]+1):(dim(ds)[2])]
|
474
539
|
y = ds[,which(names(ds) == prediction_feature)]
|
475
|
-
|
540
|
+
|
476
541
|
# assumes a data matrix 'features' and a vector 'y' of target values
|
477
542
|
row.names(features)=NULL
|
478
|
-
|
543
|
+
|
544
|
+
# features with all values missing removed
|
545
|
+
na_col = names ( which ( apply ( features, 2, function(x) all ( is.na ( x ) ) ) ) )
|
546
|
+
features = features[,!names(features) %in% na_col]
|
547
|
+
|
548
|
+
# features with infinite values removed
|
549
|
+
inf_col = names ( which ( apply ( features, 2, function(x) any ( is.infinite ( x ) ) ) ) )
|
550
|
+
features = features[,!names(features) %in% inf_col]
|
551
|
+
|
552
|
+
# features with zero variance removed
|
553
|
+
zero_var = names ( which ( apply ( features, 2, function(x) var(x, na.rm=T) ) == 0 ) )
|
554
|
+
features = features[,!names(features) %in% zero_var]
|
555
|
+
|
479
556
|
pp = NULL
|
480
557
|
if (del_missing) {
|
481
558
|
# needed if rows should be removed
|
482
|
-
na_ids = apply(features,1,function(x)any(is.na(x)))
|
559
|
+
na_ids = apply ( features,1,function(x) any ( is.na ( x ) ) )
|
483
560
|
features = features[!na_ids,]
|
484
561
|
y = y[!na_ids]
|
485
562
|
pp = preProcess(features, method=c("scale", "center"))
|
@@ -488,17 +565,23 @@ module OpenTox
|
|
488
565
|
pp = preProcess(features, method=c("scale", "center", "knnImpute"))
|
489
566
|
}
|
490
567
|
features = predict(pp, features)
|
491
|
-
|
568
|
+
|
569
|
+
# features with nan values removed (sometimes preProcess return NaN values)
|
570
|
+
nan_col = names ( which ( apply ( features, 2, function(x) any ( is.nan ( x ) ) ) ) )
|
571
|
+
features = features[,!names(features) %in% nan_col]
|
572
|
+
|
492
573
|
# determine subsets
|
493
|
-
subsets = dim(features)[2]*c(0.
|
494
|
-
subsets = c(2,3,4,5,7
|
574
|
+
subsets = dim(features)[2]*c(0.3, 0.32, 0.34, 0.36, 0.38, 0.4, 0.42, 0.44, 0.46, 0.48, 0.5, 0.52, 0.54, 0.56, 0.58, 0.6, 0.62, 0.64, 0.66, 0.68, 0.7)
|
575
|
+
#subsets = dim(features)[2]*c(0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7)
|
576
|
+
#subsets = c(2,3,4,5,7,10,subsets)
|
577
|
+
#subsets = c(2,3,4,5,7,10,13,16,19,22,25,28,30)
|
495
578
|
subsets = unique(sort(round(subsets)))
|
496
579
|
subsets = subsets[subsets<=dim(features)[2]]
|
497
580
|
subsets = subsets[subsets>1]
|
498
|
-
|
581
|
+
|
499
582
|
# Recursive feature elimination
|
500
|
-
rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=
|
501
|
-
|
583
|
+
rfProfile = rfe( x=features, y=y, rfeControl=rfeControl(functions=rfFuncs, number=150), sizes=subsets)
|
584
|
+
|
502
585
|
# read existing dataset and select most useful features
|
503
586
|
csv=feats[,c("SMILES", rfProfile$optVariables)]
|
504
587
|
write.csv(x=csv,file=f_fds_r, row.names=F, quote=F, na='')
|
@@ -527,7 +610,7 @@ module OpenTox
|
|
527
610
|
# @param [Hash] required keys: compound, features, feature_dataset_uri, pc_type
|
528
611
|
# @return [Hash] Hash with matching Smarts and number of hits
|
529
612
|
def self.lookup(params)
|
530
|
-
params[:compound].lookup(params[:features], params[:feature_dataset_uri],params[:pc_type],params[:subjectid])
|
613
|
+
params[:compound].lookup(params[:features], params[:feature_dataset_uri], params[:pc_type], params[:lib], params[:subjectid])
|
531
614
|
end
|
532
615
|
end
|
533
616
|
|
@@ -539,3 +622,26 @@ module OpenTox
|
|
539
622
|
end
|
540
623
|
end
|
541
624
|
end
|
625
|
+
|
626
|
+
class Array
|
627
|
+
# collect method extended for parallel processing.
|
628
|
+
# Note: assign return value as: ans = arr.pcollect(n) { |obj| ... }
|
629
|
+
# @param n the number of processes to spawn (default: unlimited)
|
630
|
+
def pcollect(n = nil)
|
631
|
+
nproc = 0
|
632
|
+
result = collect do |*a|
|
633
|
+
r, w = IO.pipe
|
634
|
+
fork do
|
635
|
+
r.close
|
636
|
+
w.write( Marshal.dump( yield(*a) ) )
|
637
|
+
end
|
638
|
+
if n and (nproc+=1) >= n
|
639
|
+
Process.wait ; nproc -= 1
|
640
|
+
end
|
641
|
+
[ w.close, r ].last
|
642
|
+
end
|
643
|
+
Process.waitall
|
644
|
+
result.collect{|r| Marshal.load [ r.read, r.close ].first}
|
645
|
+
end
|
646
|
+
end
|
647
|
+
|
data/lib/compound.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
module OpenTox
|
5
5
|
|
6
|
+
require "rexml/document"
|
6
7
|
# Ruby wrapper for OpenTox Compound Webservices (http://opentox.org/dev/apis/api-1.2/structure).
|
7
8
|
class Compound
|
8
9
|
|
@@ -17,16 +18,20 @@ module OpenTox
|
|
17
18
|
# @return [Compound] Compound
|
18
19
|
def initialize(uri=nil)
|
19
20
|
@uri = uri
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
if (@uri =~ URI::regexp) || @uri.nil?
|
22
|
+
case @uri
|
23
|
+
when /InChI/ # shortcut for IST services
|
24
|
+
@inchi = @uri.sub(/^.*InChI/, 'InChI')
|
25
|
+
else
|
26
|
+
@inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri
|
27
|
+
end
|
28
|
+
|
29
|
+
if @uri and @inchi.to_s.size==0
|
30
|
+
LOGGER.warn "REMOVE ABMIT HACK: no inchi for compound "+@uri.to_s+", load via smiles"
|
31
|
+
@inchi = Compound.smiles2inchi(Compound.smiles(@uri))
|
32
|
+
end
|
23
33
|
else
|
24
|
-
|
25
|
-
end
|
26
|
-
|
27
|
-
if @uri and @inchi.to_s.size==0
|
28
|
-
LOGGER.warn "REMOVE ABMIT HACK: no inchi for compound "+@uri.to_s+", load via smiles"
|
29
|
-
@inchi = Compound.smiles2inchi(Compound.smiles(@uri))
|
34
|
+
raise "Not able to create compound with uri: #{@uri}"
|
30
35
|
end
|
31
36
|
end
|
32
37
|
|
@@ -130,6 +135,47 @@ module OpenTox
|
|
130
135
|
"not available"
|
131
136
|
end
|
132
137
|
end
|
138
|
+
|
139
|
+
|
140
|
+
# Get all known compound names sorted by classification. Relies on an external service for name lookups.
|
141
|
+
# @example
|
142
|
+
# names = compound.to_names_hash
|
143
|
+
# @return [Hash] Classification => Name Array
|
144
|
+
def to_names_hash
|
145
|
+
begin
|
146
|
+
xml = RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/names/xml")
|
147
|
+
xmldoc = REXML::Document.new(xml)
|
148
|
+
data = {}
|
149
|
+
|
150
|
+
xmldoc.root.elements[1].elements.each{|e|
|
151
|
+
if data.has_key?(e.attribute("classification").value) == false
|
152
|
+
data[e.attribute("classification").value] = [e.text]
|
153
|
+
else
|
154
|
+
data[e.attribute("classification").value].push(e.text)
|
155
|
+
end
|
156
|
+
}
|
157
|
+
data
|
158
|
+
rescue
|
159
|
+
"not available"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# Get all known compound names sorted by classification. Relies on an external service for name lookups.
|
164
|
+
# @example
|
165
|
+
# names = compound.to_names_hash
|
166
|
+
# @return [Hash] Classification => Name Array
|
167
|
+
def to_ambit_names_hash
|
168
|
+
begin
|
169
|
+
ds = OpenTox::Dataset.new
|
170
|
+
ds.save
|
171
|
+
ds.load_rdfxml(RestClientWrapper.get("http://apps.ideaconsult.net:8080/ambit2/query/compound/search/names?type=smiles&property=&search=#{@inchi}"))
|
172
|
+
ds.save
|
173
|
+
ds.uri
|
174
|
+
rescue
|
175
|
+
"not available"
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
133
179
|
|
134
180
|
# Match a smarts string
|
135
181
|
# @example
|
@@ -197,25 +243,28 @@ module OpenTox
|
|
197
243
|
# Lookup numerical values, returns hash with feature name as key and value as value
|
198
244
|
# @param [Array] Array of feature names
|
199
245
|
# @param [String] Feature dataset uri
|
246
|
+
# @param [String] Comma separated pc types
|
247
|
+
# @param [String] Comma separated lib
|
200
248
|
# @return [Hash] Hash with feature name as key and value as value
|
201
|
-
|
249
|
+
def lookup(feature_array,feature_dataset_uri,pc_type,lib,subjectid=nil)
|
202
250
|
ds = OpenTox::Dataset.find(feature_dataset_uri,subjectid)
|
203
251
|
#entry = ds.data_entries[self.uri]
|
204
252
|
entry = nil
|
205
|
-
ds.data_entries.each { |c_uri, values|
|
206
|
-
|
207
|
-
|
253
|
+
ds.data_entries.each { |c_uri, values|
|
254
|
+
compound = OpenTox::Compound.new(c_uri)
|
255
|
+
if compound.to_inchi == self.to_inchi # Compare compounds by InChI
|
256
|
+
entry = ds.data_entries[c_uri]
|
208
257
|
break
|
209
258
|
end
|
210
259
|
}
|
211
260
|
LOGGER.debug "#{entry.size} entries in feature ds for query." unless entry.nil?
|
212
|
-
|
213
261
|
if entry.nil?
|
214
|
-
|
215
|
-
uri =
|
216
|
-
ds = OpenTox::Dataset.find(uri,subjectid)
|
262
|
+
temp_ds = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"],subjectid); temp_ds.add_compound(self.uri); temp_uri = temp_ds.save(subjectid)
|
263
|
+
uri = RestClientWrapper.post(File.join(CONFIG[:services]["opentox-algorithm"], "/pc/AllDescriptors"), {:dataset_uri => temp_uri, :pc_type => pc_type, :lib => lib, :subjectid => subjectid})
|
264
|
+
ds = OpenTox::Dataset.find(uri, subjectid)
|
217
265
|
entry = ds.data_entries[self.uri]
|
218
266
|
ds.delete(subjectid)
|
267
|
+
temp_ds.delete(subjectid)
|
219
268
|
end
|
220
269
|
features = entry.keys
|
221
270
|
features.each { |feature|
|
@@ -224,7 +273,6 @@ module OpenTox
|
|
224
273
|
entry.delete(feature) unless feature == new_feature # e.g. when loading from ambit
|
225
274
|
}
|
226
275
|
#res = feature_array.collect {|v| entry[v]}
|
227
|
-
#LOGGER.debug "----- am #{entry.to_yaml}"
|
228
276
|
entry
|
229
277
|
end
|
230
278
|
|