RubyGems - lazar - Versions diffs - 0.0.7 → 0.0.9 - Mend

lazar 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

checksums.yaml +4 -4
data/.gitignore +3 -0
data/README.md +2 -1
data/VERSION +1 -1
data/ext/lazar/extconf.rb +15 -76
data/ext/lazar/rinstall.R +9 -0
data/lazar.gemspec +7 -7
data/lib/classification.rb +5 -78
data/lib/compound.rb +201 -44
data/lib/crossvalidation.rb +224 -121
data/lib/dataset.rb +83 -93
data/lib/error.rb +1 -1
data/lib/experiment.rb +99 -0
data/lib/feature.rb +2 -54
data/lib/lazar.rb +47 -34
data/lib/leave-one-out-validation.rb +205 -0
data/lib/model.rb +131 -76
data/lib/opentox.rb +2 -2
data/lib/overwrite.rb +37 -0
data/lib/physchem.rb +133 -0
data/lib/regression.rb +117 -189
data/lib/rest-client-wrapper.rb +4 -5
data/lib/unique_descriptors.rb +6 -7
data/lib/validation.rb +63 -69
data/test/all.rb +2 -2
data/test/classification.rb +41 -0
data/test/compound.rb +116 -7
data/test/data/LOAEL_log_mg_corrected_smiles.csv +567 -567
data/test/data/LOAEL_log_mmol_corrected_smiles.csv +566 -566
data/test/data/LOAEL_mmol_corrected_smiles.csv +568 -0
data/test/data/batch_prediction.csv +25 -0
data/test/data/batch_prediction_inchi_small.csv +4 -0
data/test/data/batch_prediction_smiles_small.csv +4 -0
data/test/data/hamster_carcinogenicity.json +3 -0
data/test/data/loael.csv +568 -0
data/test/dataset-long.rb +5 -8
data/test/dataset.rb +31 -11
data/test/default_environment.rb +11 -0
data/test/descriptor.rb +26 -41
data/test/error.rb +1 -3
data/test/experiment.rb +301 -0
data/test/feature.rb +22 -10
data/test/lazar-long.rb +43 -23
data/test/lazar-physchem-short.rb +19 -16
data/test/prediction_models.rb +20 -0
data/test/regression.rb +43 -0
data/test/setup.rb +3 -1
data/test/test_environment.rb +10 -0
data/test/validation.rb +92 -26
metadata +64 -38
data/lib/SMARTS_InteLigand.txt +0 -983
data/lib/bbrc.rb +0 -165
data/lib/descriptor.rb +0 -247
data/lib/neighbor.rb +0 -25
data/lib/similarity.rb +0 -58
data/mongoid.yml +0 -8
data/test/descriptor-long.rb +0 -26
data/test/fminer-long.rb +0 -38
data/test/fminer.rb +0 -52
data/test/lazar-fminer.rb +0 -50
data/test/lazar-regression.rb +0 -27

data/lib/physchem.rb ADDED Viewed

@@ -0,0 +1,133 @@
+module OpenTox
+  # Feature for physico-chemical descriptors
+  class PhysChem < NumericFeature
+    field :library, type: String
+    field :descriptor, type: String
+    field :description, type: String
+    JAVA_DIR = File.join(File.dirname(__FILE__),"..","java")
+    CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last
+    JOELIB_JAR = File.join(JAVA_DIR,"joelib2.jar")
+    LOG4J_JAR = File.join(JAVA_DIR,"log4j.jar")
+    JMOL_JAR = File.join(JAVA_DIR,"Jmol.jar")
+    obexclude = ["cansmi","cansmiNS","formula","InChI","InChIKey","s","smarts","title","L5"]
+    OBDESCRIPTORS = Hash[OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d|
+      name,description = d.split(/\s+/,2)
+      ["Openbabel."+name,description] unless obexclude.include? name
+    end.compact.sort{|a,b| a[0] <=> b[0]}]
+    cdkdescriptors = {}
+    CDK_DESCRIPTIONS = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR}  CdkDescriptorInfo`)
+    CDK_DESCRIPTIONS.each do |d|
+      prefix="Cdk."+d[:java_class].split('.').last.sub(/Descriptor/,'')
+      d[:names].each { |name| cdkdescriptors[prefix+"."+name] = d[:description] }
+    end
+    CDKDESCRIPTORS = cdkdescriptors
+    # exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug)
+    joelibexclude = ["MoleculeHashcode","GlobalTopologicalChargeIndex"]
+    # strip Joelib messages from stdout
+    JOELIBDESCRIPTORS = Hash[YAML.load(`java -classpath #{JOELIB_JAR}:#{LOG4J_JAR}:#{JAVA_DIR}  JoelibDescriptorInfo | sed '0,/---/d'`).collect do |d|
+      name = d[:java_class].sub(/^joelib2.feature.types./,'')
+      ["Joelib."+name, "JOELIb does not provide meaningful descriptions, see java/JoelibDescriptors.java for details."] unless joelibexclude.include? name
+    end.compact.sort{|a,b| a[0] <=> b[0]}]
+    DESCRIPTORS = OBDESCRIPTORS.merge(CDKDESCRIPTORS.merge(JOELIBDESCRIPTORS))
+    require_relative "unique_descriptors.rb"
+    def self.descriptors desc=DESCRIPTORS
+      desc.collect do |name,description|
+        lib,desc = name.split('.',2)
+        self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
+      end
+    end
+    def self.unique_descriptors
+      udesc = []
+      UNIQUEDESCRIPTORS.each do |name|
+        lib,desc = name.split('.',2)
+        if lib == "Cdk"
+          CDK_DESCRIPTIONS.select{|d| desc == d[:java_class].split('.').last.sub('Descriptor','') }.first[:names].each do |n|
+            dname = "#{name}.#{n}"
+            description = DESCRIPTORS[dname]
+            udesc << self.find_or_create_by(:name => dname, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
+          end
+        else
+          description = DESCRIPTORS[name]
+          udesc << self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
+        end
+      end
+      udesc
+    end
+    def self.openbabel_descriptors
+      descriptors OBDESCRIPTORS
+    end
+    def self.cdk_descriptors
+      descriptors CDKDESCRIPTORS
+    end
+    def self.joelib_descriptors
+      descriptors JOELIBDESCRIPTORS
+    end
+    def calculate compound
+      result = send library.downcase,descriptor,compound
+      result[self.name]
+    end
+    def openbabel descriptor, compound
+      obdescriptor = OpenBabel::OBDescriptor.find_type descriptor
+      obmol = OpenBabel::OBMol.new
+      obconversion = OpenBabel::OBConversion.new
+      obconversion.set_in_format 'smi'
+      obconversion.read_string obmol, compound.smiles
+      {"#{library.capitalize}.#{descriptor}" => fix_value(obdescriptor.predict(obmol))}
+    end
+    def cdk descriptor, compound
+      java_descriptor "cdk", descriptor, compound
+    end
+    def joelib descriptor, compound
+      java_descriptor "joelib", descriptor, compound
+    end
+    private
+    def java_descriptor lib, descriptor, compound
+      sdf_3d = "/tmp/#{SecureRandom.uuid}.sdf"
+      File.open(sdf_3d,"w+"){|f| f.print compound.sdf}
+      # use java system call (rjb blocks within tasks)
+      # use Tempfiles to avoid "Argument list too long" error
+      case lib
+      when "cdk"
+        `java -classpath #{CDK_JAR}:#{JAVA_DIR}  CdkDescriptors #{sdf_3d} #{descriptor}`
+      when "joelib"
+        `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR}  JoelibDescriptors  #{sdf_3d} #{descriptor}`
+      end
+      result = YAML.load_file("#{sdf_3d}#{lib}.yaml").first
+      result.keys.each{|k| result[k] = result.delete(k)}
+      result
+    end
+    def fix_value val
+      val = val.first if val.is_a? Array and val.size == 1
+      val = nil if val == "NaN"
+      if val.numeric?
+        val = Float(val)
+        val = nil if val.nan? or val.infinite?
+      end
+      val
+    end
+  end
+end

data/lib/regression.rb CHANGED Viewed

@@ -1,223 +1,151 @@
-# TODO install R packages kernlab, caret, doMC, class, e1071
-        # log transform activities (create new dataset)
-        # scale, normalize features, might not be necessary
-        # http://stats.stackexchange.com/questions/19216/variables-are-often-adjusted-e-g-standardised-before-making-a-model-when-is
-        # http://stats.stackexchange.com/questions/7112/when-and-how-to-use-standardized-explanatory-variables-in-linear-regression
-        # zero-order correlation and the semi-partial correlation
-        # seems to be necessary for svm
-        #   http://stats.stackexchange.com/questions/77876/why-would-scaling-features-decrease-svm-performance?lq=1
-        #   http://stackoverflow.com/questions/15436367/svm-scaling-input-values
-        # use lasso or elastic net??
-        # select relevant features
-        #   remove features with a single value
-        #   remove correlated features
-        #   remove features not correlated with endpoint
 module OpenTox
   module Algorithm
     class Regression
-      def self.weighted_average neighbors
+      def self.local_weighted_average compound, params
         weighted_sum = 0.0
         sim_sum = 0.0
+        neighbors = params[:neighbors]
         neighbors.each do |row|
-          n,sim,acts = row
-          acts.each do |act|
-            weighted_sum += sim*Math.log10(act)
-            sim_sum += sim
+          sim = row["tanimoto"]
+          if row["features"][params[:prediction_feature_id].to_s]
+            row["features"][params[:prediction_feature_id].to_s].each do |act|
+              weighted_sum += sim*Math.log10(act)
+              sim_sum += sim
+            end
           end
         end
-        confidence = sim_sum/neighbors.size.to_f
         sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
-        {:value => prediction,:confidence => confidence}
+        {:value => prediction}
       end
-      def self.weighted_average_with_relevant_fingerprints neighbors
-        weighted_sum = 0.0
-        sim_sum = 0.0
-        fingerprint_features = []
-        neighbors.each do |row|
-          n,sim,acts = row
-          neighbor = Compound.find n
-          fingerprint_features += neighbor.fp4
-        end
-        fingerprint_features.uniq!
-        p fingerprint_features
-=begin
-          p n
-          acts.each do |act|
-            weighted_sum += sim*Math.log10(act)
-            sim_sum += sim
+      # TODO explicit neighbors, also for physchem
+      def self.local_fingerprint_regression  compound, params, method='pls'#, method_params="sigma=0.05"
+        neighbors = params[:neighbors]
+        return {:value => nil, :confidence => nil, :warning => "No similar compounds in the training data"} unless neighbors.size > 0
+        activities = []
+        fingerprints = {}
+        weights = []
+        fingerprint_ids = neighbors.collect{|row| Compound.find(row["_id"]).fingerprint}.flatten.uniq.sort
+        neighbors.each_with_index do |row,i|
+          neighbor = Compound.find row["_id"]
+          fingerprint = neighbor.fingerprint
+          if row["features"][params[:prediction_feature_id].to_s]
+            row["features"][params[:prediction_feature_id].to_s].each do |act|
+              activities << Math.log10(act)
+              weights << row["tanimoto"]
+              fingerprint_ids.each_with_index do |id,j|
+                fingerprints[id] ||= []
+                fingerprints[id] << fingerprint.include?(id)
+              end
+            end
           end
         end
-=end
-        confidence = sim_sum/neighbors.size.to_f
-        sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
-        {:value => prediction,:confidence => confidence}
-      end
-      # Local support vector regression from neighbors
-      # @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
-      # @return [Numeric] A prediction value.
-      def self.local_svm_regression neighbors, params={:min_train_performance => 0.1}
+        variables = []
+        data_frame = [activities]
+        fingerprints.each do |k,v|
+          unless v.uniq.size == 1
+            data_frame << v.collect{|m| m ? "T" : "F"}
+            variables << k
+          end
+        end
-        confidence = 0.0
-        prediction = nil
+        if variables.empty?
+            result = local_weighted_average(compound, params)
+            result[:warning] = "No variables for regression model. Using weighted average of similar compounds."
+            return result
-        $logger.debug "Local SVM."
-        props = neighbors.collect{|row| row[3] }
-        neighbors.shift
-        activities = neighbors.collect{|n| n[2]}
-        prediction = self.local_svm_prop( props, activities, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
-        prediction = nil if (!prediction.nil? && prediction.infinite?)
-        $logger.debug "Prediction: '#{prediction}' ('#{prediction.class}')."
-        if prediction
-          confidence = get_confidence({:sims => neighbors.collect{|n| n[1]}, :activities => activities})
         else
-          confidence = nil if prediction.nil?
+          compound_features = variables.collect{|f| compound.fingerprint.include?(f) ? "T" : "F"}
+          prediction = r_model_prediction method, data_frame, variables, weights, compound_features
+          if prediction.nil? or prediction[:value].nil?
+            prediction = local_weighted_average(compound, params)
+            prediction[:warning] = "Could not create local PLS model. Using weighted average of similar compounds."
+            return prediction
+          else
+            prediction[:prediction_interval] = [10**(prediction[:value]-1.96*prediction[:rmse]), 10**(prediction[:value]+1.96*prediction[:rmse])]
+            prediction[:value] = 10**prediction[:value]
+            prediction[:rmse] = 10**prediction[:rmse]
+            prediction
+          end
         end
-          [prediction, confidence]
       end
+      def self.local_physchem_regression  compound, params, method="plsr"#, method_params="ncomp = 4"
+        neighbors = params[:neighbors]
+        return {:value => nil, :confidence => nil, :warning => "No similar compounds in the training data"} unless neighbors.size > 0
+        return {:value => neighbors.first["features"][params[:prediction_feature_id]], :confidence => nil, :warning => "Only one similar compound in the training set"} unless neighbors.size > 1
+        activities = []
+        weights = []
+        physchem = {}
+        neighbors.each_with_index do |row,i|
+          neighbor = Compound.find row["_id"]
+          if row["features"][params[:prediction_feature_id].to_s]
+            row["features"][params[:prediction_feature_id].to_s].each do |act|
+              activities << Math.log10(act)
+              weights << row["tanimoto"] # TODO cosine ?
+              neighbor.physchem.each do |pid,v| # insert physchem only if there is an activity
+                physchem[pid] ||= []
+                physchem[pid] <<  v
+              end
+            end
+          end
+        end
-      # Local support vector prediction from neighbors.
-      # Uses propositionalized setting.
-      # Not to be called directly (use local_svm_regression or local_svm_classification).
-      # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
-      # @param [Array] activities, activities for neighbors.
-      # @param [Float] min_train_performance, parameter to control censoring
-      # @return [Numeric] A prediction value.
-      def self.local_svm_prop(props, activities, min_train_performance)
+        # remove properties with a single value
+        physchem.each do |pid,v|
+          physchem.delete(pid) if v.uniq.size <= 1
+        end
-        $logger.debug "Local SVM (Propositionalization / Kernlab Kernel)."
-        n_prop = props[1..-1] # is a matrix, i.e. two nested Arrays.
-        q_prop = props[0] # is an Array.
+        if physchem.empty?
+          result = local_weighted_average(compound, params)
+          result[:warning] = "No variables for regression model. Using weighted average of similar compounds."
+          return result
-        prediction = nil
-        if activities.uniq.size == 1
-          prediction = activities[0]
         else
-          t = Time.now
-          #$logger.debug gram_matrix.to_yaml
-          #@r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests
-          @r = Rserve::Connection.new#(true,false) # global R instance leads to Socket errors after a large number of requests
-          rs = []
-          ["caret", "doMC", "class"].each do |lib|
-            #raise "failed to load R-package #{lib}" unless @r.void_eval "suppressPackageStartupMessages(library('#{lib}'))"
-            rs << "suppressPackageStartupMessages(library('#{lib}'))"
+          data_frame = [activities] + physchem.keys.collect { |pid| physchem[pid] }
+          prediction = r_model_prediction method, data_frame, physchem.keys, weights, physchem.keys.collect{|pid| compound.physchem[pid]}
+          if prediction.nil?
+            prediction = local_weighted_average(compound, params)
+            prediction[:warning] = "Could not create local PLS model. Using weighted average of similar compounds."
+            return prediction
+          else
+            prediction[:value] = 10**prediction[:value]
+            prediction
           end
-          #@r.eval "registerDoMC()" # switch on parallel processing
-          rs << "registerDoMC()" # switch on parallel processing
-          #@r.eval "set.seed(1)"
-          rs << "set.seed(1)"
-          $logger.debug "Loading R packages: #{Time.now-t}"
-          t = Time.now
-          p n_prop
-          begin
-            # set data
-            rs << "n_prop <- c(#{n_prop.flatten.join(',')})"
-            rs << "n_prop <- c(#{n_prop.flatten.join(',')})"
-            rs << "n_prop_x_size <- c(#{n_prop.size})"
-            rs << "n_prop_y_size <- c(#{n_prop[0].size})"
-            rs << "y <- c(#{activities.join(',')})"
-            rs << "q_prop <- c(#{q_prop.join(',')})"
-            rs << "y = matrix(y)"
-            rs << "prop_matrix = matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=T)"
-            rs << "q_prop = matrix(q_prop, 1, n_prop_y_size, byrow=T)"
-            $logger.debug "Setting R data: #{Time.now-t}"
-            t = Time.now
-            # prepare data
-            rs << "
-              weights=NULL
-              if (!(class(y) == 'numeric')) {
-                y = factor(y)
-                weights=unlist(as.list(prop.table(table(y))))
-                weights=(weights-1)^2
-              }
-            "
-            rs << "
-              rem = nearZeroVar(prop_matrix)
-              if (length(rem) > 0) {
-                prop_matrix = prop_matrix[,-rem,drop=F]
-                q_prop = q_prop[,-rem,drop=F]
-              }
-              rem = findCorrelation(cor(prop_matrix))
-              if (length(rem) > 0) {
-                prop_matrix = prop_matrix[,-rem,drop=F]
-                q_prop = q_prop[,-rem,drop=F]
-              }
-            "
-            #p @r.eval("y").to_ruby
-            #p "weights"
-            #p @r.eval("weights").to_ruby
-            $logger.debug "Preparing R data: #{Time.now-t}"
-            t = Time.now
-            # model + support vectors
-            #train_success = @r.eval <<-EOR
-            rs << '
-              model = train(prop_matrix,y,
-                             method="svmRadial",
-                             preProcess=c("center", "scale"),
-                             class.weights=weights,
-                             trControl=trainControl(method="LGOCV",number=10),
-                             tuneLength=8
-                           )
-              perf = ifelse ( class(y)!="numeric", max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
-            '
-            File.open("/tmp/r.r","w+"){|f| f.puts rs.join("\n")}
-            p rs.join("\n")
-            p `Rscript /tmp/r.r`
-=begin
-            @r.void_eval <<-EOR
-              model = train(prop_matrix,y,
-                             method="svmRadial",
-                             #preProcess=c("center", "scale"),
-                             #class.weights=weights,
-                             #trControl=trainControl(method="LGOCV",number=10),
-                             #tuneLength=8
-                           )
-              perf = ifelse ( class(y)!='numeric', max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
-            EOR
-=end
-            $logger.debug "Creating R SVM model: #{Time.now-t}"
-            t = Time.now
-            if train_success
-              # prediction
-              @r.eval "predict(model,q_prop); p = predict(model,q_prop)" # kernlab bug: predict twice
-              #@r.eval "p = predict(model,q_prop)" # kernlab bug: predict twice
-              @r.eval "if (class(y)!='numeric') p = as.character(p)"
-              prediction = @r.p
+        end
+      end
-              # censoring
-              prediction = nil if ( @r.perf.nan? || @r.perf < min_train_performance.to_f )
-              prediction = nil if prediction =~ /NA/
-              $logger.debug "Performance: '#{sprintf("%.2f", @r.perf)}'"
-            else
-              $logger.debug "Model creation failed."
-              prediction = nil
-            end
-            $logger.debug "R Prediction: #{Time.now-t}"
-          rescue Exception => e
-            $logger.debug "#{e.class}: #{e.message}"
-            $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
-          ensure
-            #puts @r.inspect
-            #TODO: broken pipe
-            #@r.quit # free R
-          end
+      def self.r_model_prediction method, training_data, training_features, training_weights, query_feature_values
+        R.assign "weights", training_weights
+        r_data_frame = "data.frame(#{training_data.collect{|r| "c(#{r.join(',')})"}.join(', ')})"
+        R.eval "data <- #{r_data_frame}"
+        R.assign "features", training_features
+        R.eval "names(data) <- append(c('activities'),features)" #
+        begin
+          R.eval "model <- train(activities ~ ., data = data, method = '#{method}')"
+        rescue
+          return nil
         end
-        prediction
+        R.eval "fingerprint <- data.frame(rbind(c(#{query_feature_values.join ','})))"
+        R.eval "names(fingerprint) <- features"
+        R.eval "prediction <- predict(model,fingerprint)"
+        {
+          :value => R.eval("prediction").to_f,
+          :rmse => R.eval("getTrainPerf(model)$TrainRMSE").to_f,
+          :r_squared => R.eval("getTrainPerf(model)$TrainRsquared").to_f,
+        }
       end
-    end
+    end
   end
 end

data/lib/rest-client-wrapper.rb CHANGED Viewed

@@ -26,15 +26,14 @@ module OpenTox
       define_singleton_method method do |uri,payload={},headers={},waiting_task=nil|
         # check input
-        bad_request_error "Headers are not a hash: #{headers.inspect}", uri unless headers==nil or headers.is_a?(Hash)
+        bad_request_error "Headers are not a hash: #{headers.inspect} for #{uri}." unless headers==nil or headers.is_a?(Hash)
         headers[:subjectid] ||= @@subjectid
-        bad_request_error "Invalid URI: '#{uri}'", uri unless URI.valid? uri
-        #resource_not_found_error "URI '#{uri}' not found.", uri unless URI.accessible?(uri, @subjectid) unless URI.ssl?(uri)
+        bad_request_error "Invalid URI: '#{uri}'" unless URI.valid? uri
         # make sure that no header parameters are set in the payload
         [:accept,:content_type,:subjectid].each do |header|
           if defined? $aa || URI(uri).host == URI($aa[:uri]).host
           else
-            bad_request_error "#{header} should be submitted in the headers", uri if payload and payload.is_a?(Hash) and payload[header]
+            bad_request_error "#{header} should be submitted in the headers of URI: #{uri}" if payload and payload.is_a?(Hash) and payload[header]
           end
         end
@@ -72,7 +71,7 @@ module OpenTox
               msg = "Could not parse error response from rest call '#{method}' to '#{uri}':\n#{response}"
               cause = nil
             end
-            Object.method(error[:method]).call msg, uri, cause # call error method
+            Object.method(error[:method]).call "#{msg}, #{uri}, #{cause}" # call error method
           else
             response
           end

data/lib/unique_descriptors.rb CHANGED Viewed

@@ -12,7 +12,7 @@ UNIQUEDESCRIPTORS = [
   "Openbabel.HBA1", #Number of Hydrogen Bond Acceptors 1 (JoelLib)
   "Openbabel.HBA2", #Number of Hydrogen Bond Acceptors 2 (JoelLib)
   "Openbabel.HBD", #Number of Hydrogen Bond Donors (JoelLib)
-  "Openbabel.L5", #Lipinski Rule of Five
+  #"Openbabe..L5", #Lipinski Rule of Five# TODO Openbabel.L5 returns nil, investigate!!!
   "Openbabel.logP", #octanol/water partition coefficient
   "Openbabel.MP", #Melting point
   "Openbabel.MR", #molar refractivity
@@ -24,7 +24,7 @@ UNIQUEDESCRIPTORS = [
   "Cdk.ALOGP", #Calculates atom additive logP and molar refractivity values as described by Ghose and Crippen and
   "Cdk.APol", #Descriptor that calculates the sum of the atomic polarizabilities (including implicit hydrogens).
   "Cdk.AcidicGroupCount", #Returns the number of acidic groups.
-  "Cdk.AminoAcidCount", #Returns the number of amino acids found in the system
+  #"Cdk.AminoAcidCount", #Returns the number of amino acids found in the system
   #"Cdk.AromaticAtomsCount", #Descriptor based on the number of aromatic atoms of a molecule.
   #"Cdk.AromaticBondsCount", #Descriptor based on the number of aromatic bonds of a molecule.
   #"Cdk.AtomCount", #Descriptor based on the number of atoms of a certain element type.
@@ -56,7 +56,7 @@ UNIQUEDESCRIPTORS = [
   "Cdk.LengthOverBreadth", #Calculates the ratio of length to breadth.
   "Cdk.LongestAliphaticChain", #Returns the number of atoms in the longest aliphatic chain
   "Cdk.MDE", #Evaluate molecular distance edge descriptors for C, N and O
-  "Cdk.MannholdLogP", #Descriptor that calculates the LogP based on a simple equation using the number of carbons and hetero atoms .
+  #"Cdk.MannholdLogP", #Descriptor that calculates the LogP based on a simple equation using the number of carbons and hetero atoms .
   "Cdk.MomentOfInertia", #Descriptor that calculates the principal moments of inertia and ratios of the principal moments. Als calculates the radius of gyration.
   "Cdk.PetitjeanNumber", #Descriptor that calculates the Petitjean Number of a molecule.
   "Cdk.PetitjeanShapeIndex", #The topological and geometric shape indices described Petitjean and Bath et al. respectively. Both measure the anisotropy in a molecule.
@@ -75,7 +75,7 @@ UNIQUEDESCRIPTORS = [
   "Joelib.count.NumberOfP", #no description available
   "Joelib.count.NumberOfO", #no description available
   "Joelib.count.NumberOfN", #no description available
-  #"Joelib.count.AromaticBonds", #no description available
+  #"Joeli#.count.AromaticBonds", #no description available
   "Joelib.count.NumberOfI", #no description available
   "Joelib.count.NumberOfF", #no description available
   "Joelib.count.NumberOfC", #no description available
@@ -91,7 +91,7 @@ UNIQUEDESCRIPTORS = [
   "Joelib.GeometricalShapeCoefficient", #no description available
   #"Joelib.MolecularWeight", #no description available
   "Joelib.FractionRotatableBonds", #no description available
-  #"Joelib.count.HBD2", #no description available
+  #"Joeli..count.HBD2", #no description available
   #"Joelib.count.HBD1", #no description available
   "Joelib.LogP", #no description available
   "Joelib.GraphShapeCoefficient", #no description available
@@ -116,5 +116,4 @@ UNIQUEDESCRIPTORS = [
   "Joelib.count.SOGroups", #no description available
   "Joelib.TopologicalDiameter", #no description available
   "Joelib.count.NumberOfHal", #no description available
-].sort
+]