RubyGems - lazar - Versions diffs - 0.0.1 - Mend

lazar 0.0.1

Files changed (98) hide show

checksums.yaml +7 -0
data/.gitignore +10 -0
data/.yardopts +4 -0
data/Gemfile +2 -0
data/LICENSE +674 -0
data/README.md +44 -0
data/Rakefile +1 -0
data/VERSION +1 -0
data/ext/lazar/extconf.rb +87 -0
data/java/CdkDescriptorInfo.class +0 -0
data/java/CdkDescriptorInfo.java +22 -0
data/java/CdkDescriptors.class +0 -0
data/java/CdkDescriptors.java +141 -0
data/java/Jmol.jar +0 -0
data/java/JoelibDescriptorInfo.class +0 -0
data/java/JoelibDescriptorInfo.java +15 -0
data/java/JoelibDescriptors.class +0 -0
data/java/JoelibDescriptors.java +60 -0
data/java/Rakefile +15 -0
data/java/cdk-1.4.19.jar +0 -0
data/java/joelib2.jar +0 -0
data/java/log4j.jar +0 -0
data/lazar.gemspec +29 -0
data/lib/SMARTS_InteLigand.txt +983 -0
data/lib/algorithm.rb +21 -0
data/lib/bbrc.rb +165 -0
data/lib/classification.rb +107 -0
data/lib/compound.rb +254 -0
data/lib/crossvalidation.rb +187 -0
data/lib/dataset.rb +334 -0
data/lib/descriptor.rb +247 -0
data/lib/error.rb +66 -0
data/lib/feature.rb +97 -0
data/lib/lazar-model.rb +170 -0
data/lib/lazar.rb +69 -0
data/lib/neighbor.rb +25 -0
data/lib/opentox.rb +22 -0
data/lib/overwrite.rb +119 -0
data/lib/regression.rb +199 -0
data/lib/rest-client-wrapper.rb +98 -0
data/lib/similarity.rb +58 -0
data/lib/unique_descriptors.rb +120 -0
data/lib/validation.rb +114 -0
data/mongoid.yml +8 -0
data/test/all.rb +5 -0
data/test/compound.rb +100 -0
data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +13553 -0
data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +436 -0
data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +568 -0
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +87 -0
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +978 -0
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +1120 -0
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +1113 -0
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +850 -0
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +829 -0
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +1198 -0
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +1505 -0
data/test/data/EPAFHM.csv +618 -0
data/test/data/EPAFHM.medi.csv +100 -0
data/test/data/EPAFHM.mini.csv +22 -0
data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +581 -0
data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +1217 -0
data/test/data/ISSCAN-multi.csv +59 -0
data/test/data/LOAEL_log_mg_corrected_smiles.csv +568 -0
data/test/data/LOAEL_log_mmol_corrected_smiles.csv +568 -0
data/test/data/acetaldehyde.sdf +14 -0
data/test/data/boiling_points.ext.sdf +11460 -0
data/test/data/cpdb_100.csv +101 -0
data/test/data/hamster_carcinogenicity.csv +86 -0
data/test/data/hamster_carcinogenicity.mini.bool_float.csv +11 -0
data/test/data/hamster_carcinogenicity.mini.bool_int.csv +11 -0
data/test/data/hamster_carcinogenicity.mini.bool_string.csv +11 -0
data/test/data/hamster_carcinogenicity.mini.csv +11 -0
data/test/data/hamster_carcinogenicity.ntriples +618 -0
data/test/data/hamster_carcinogenicity.sdf +2805 -0
data/test/data/hamster_carcinogenicity.xls +0 -0
data/test/data/hamster_carcinogenicity.yaml +352 -0
data/test/data/hamster_carcinogenicity_with_errors.csv +88 -0
data/test/data/kazius.csv +4070 -0
data/test/data/multi_cell_call.csv +1067 -0
data/test/data/multi_cell_call_no_dup.csv +1057 -0
data/test/data/multicolumn.csv +8 -0
data/test/data/rat_feature_dataset.csv +1179 -0
data/test/data/wrong_dataset.csv +8 -0
data/test/dataset-long.rb +117 -0
data/test/dataset.rb +199 -0
data/test/descriptor-long.rb +26 -0
data/test/descriptor.rb +83 -0
data/test/error.rb +24 -0
data/test/feature.rb +65 -0
data/test/fminer-long.rb +38 -0
data/test/fminer.rb +52 -0
data/test/lazar-fminer.rb +50 -0
data/test/lazar-long.rb +72 -0
data/test/lazar-physchem-short.rb +27 -0
data/test/setup.rb +6 -0
data/test/validation.rb +41 -0
metadata +212 -0

data/lib/overwrite.rb ADDED Viewed

@@ -0,0 +1,119 @@
+require "base64"
+class Object
+  # An object is blank if it's false, empty, or a whitespace string.
+  # For example, "", "   ", +nil+, [], and {} are all blank.
+  def blank?
+    respond_to?(:empty?) ? empty? : !self
+  end
+  def numeric?
+    true if Float(self) rescue false
+  end
+end
+class Numeric
+  def percent_of(n)
+    self.to_f / n.to_f * 100.0
+  end
+end
+module Enumerable
+  # @return [Array] only the duplicates of an enumerable
+  def duplicates
+    inject({}) {|h,v| h[v]=h[v].to_i+1; h}.reject{|k,v| v==1}.keys
+  end
+  # http://stackoverflow.com/questions/2562256/find-most-common-string-in-an-array
+  Enumerable.class_eval do
+    def mode
+      group_by do |e|
+        e
+      end.values.max_by(&:size).first
+    end
+  end
+end
+class String
+  # @return [String] converts camel-case to underscore-case (OpenTox::SuperModel -> open_tox/super_model)
+  def underscore
+    self.gsub(/::/, '/').
+    gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
+    gsub(/([a-z\d])([A-Z])/,'\1_\2').
+    tr("-", "_").
+    downcase
+  end
+  # convert strings to boolean values
+  # @return [TrueClass,FalseClass] true or false
+  def to_boolean
+    return true if self == true || self =~ (/(true|t|yes|y|1)$/i)
+    return false if self == false || self.nil? || self =~ (/(false|f|no|n|0)$/i)
+    bad_request_error "invalid value for Boolean: \"#{self}\""
+  end
+end
+class File
+  # @return [String] mime_type including charset using linux cmd command
+  def mime_type
+    `file -ib '#{self.path}'`.chomp
+  end
+end
+class Array
+  # Sum up the size of single arrays in an array of arrays
+  # @param [Array] Array of arrays
+  # @return [Integer] Sum of size of array elements
+  def sum_size
+    self.inject(0) { |s,a|
+      if a.respond_to?('size')
+        s+=a.size
+      else
+        internal_server_error "No size available: #{a.inspect}"
+      end
+    }
+  end
+  # For symbolic features
+  # @param [Array] Array to test.
+  # @return [Boolean] Whether the array has just one unique value.
+  def zero_variance?
+    return self.uniq.size == 1
+  end
+end
+module URI
+  def self.ssl? uri
+    URI.parse(uri).instance_of? URI::HTTPS
+  end
+  # @return [Boolean] checks if resource exists by making a HEAD-request
+  def self.accessible?(uri)
+    parsed_uri = URI.parse(uri + (OpenTox::RestClientWrapper.subjectid ? "?subjectid=#{CGI.escape OpenTox::RestClientWrapper.subjectid}" : ""))
+    http_code = URI.task?(uri) ? 600 : 400
+    http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
+    unless (URI.ssl? uri) == true
+      http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
+      request = Net::HTTP::Head.new(parsed_uri.request_uri)
+      http.request(request).code.to_i < http_code
+    else
+      http = Net::HTTP.new(parsed_uri.host, parsed_uri.port)
+      http.use_ssl = true
+      http.verify_mode = OpenSSL::SSL::VERIFY_NONE
+      request = Net::HTTP::Head.new(parsed_uri.request_uri)
+      http.request(request).code.to_i < http_code
+    end
+  rescue
+    false
+  end
+  def self.valid? uri
+    u = URI.parse(uri)
+    u.scheme!=nil and u.host!=nil
+  rescue URI::InvalidURIError
+    false
+  end
+end

data/lib/regression.rb ADDED Viewed

@@ -0,0 +1,199 @@
+# TODO install R packages kernlab, caret, doMC, class, e1071
+        # log transform activities (create new dataset)
+        # scale, normalize features, might not be necessary
+        # http://stats.stackexchange.com/questions/19216/variables-are-often-adjusted-e-g-standardised-before-making-a-model-when-is
+        # http://stats.stackexchange.com/questions/7112/when-and-how-to-use-standardized-explanatory-variables-in-linear-regression
+        # zero-order correlation and the semi-partial correlation
+        # seems to be necessary for svm
+        #   http://stats.stackexchange.com/questions/77876/why-would-scaling-features-decrease-svm-performance?lq=1
+        #   http://stackoverflow.com/questions/15436367/svm-scaling-input-values
+        # use lasso or elastic net??
+        # select relevant features
+        #   remove features with a single value
+        #   remove correlated features
+        #   remove features not correlated with endpoint
+module OpenTox
+  module Algorithm
+    class Regression
+      def self.weighted_average neighbors
+        weighted_sum = 0.0
+        sim_sum = 0.0
+        neighbors.each do |row|
+          n,sim,acts = row
+          acts.each do |act|
+            weighted_sum += sim*Math.log10(act)
+            sim_sum += sim
+          end
+        end
+        confidence = sim_sum/neighbors.size.to_f
+        sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
+        {:value => prediction,:confidence => confidence}
+      end
+      # Local support vector regression from neighbors
+      # @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
+      # @return [Numeric] A prediction value.
+      def self.local_svm_regression neighbors, params={:min_train_performance => 0.1}
+        confidence = 0.0
+        prediction = nil
+        $logger.debug "Local SVM."
+        props = neighbors.collect{|row| row[3] }
+        neighbors.shift
+        activities = neighbors.collect{|n| n[2]}
+        prediction = self.local_svm_prop( props, activities, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
+        prediction = nil if (!prediction.nil? && prediction.infinite?)
+        $logger.debug "Prediction: '#{prediction}' ('#{prediction.class}')."
+        if prediction
+          confidence = get_confidence({:sims => neighbors.collect{|n| n[1]}, :activities => activities})
+        else
+          confidence = nil if prediction.nil?
+        end
+          [prediction, confidence]
+      end
+      # Local support vector prediction from neighbors.
+      # Uses propositionalized setting.
+      # Not to be called directly (use local_svm_regression or local_svm_classification).
+      # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
+      # @param [Array] activities, activities for neighbors.
+      # @param [Float] min_train_performance, parameter to control censoring
+      # @return [Numeric] A prediction value.
+      def self.local_svm_prop(props, activities, min_train_performance)
+        $logger.debug "Local SVM (Propositionalization / Kernlab Kernel)."
+        n_prop = props[1..-1] # is a matrix, i.e. two nested Arrays.
+        q_prop = props[0] # is an Array.
+        prediction = nil
+        if activities.uniq.size == 1
+          prediction = activities[0]
+        else
+          t = Time.now
+          #$logger.debug gram_matrix.to_yaml
+          #@r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests
+          @r = Rserve::Connection.new#(true,false) # global R instance leads to Socket errors after a large number of requests
+          rs = []
+          ["caret", "doMC", "class"].each do |lib|
+            #raise "failed to load R-package #{lib}" unless @r.void_eval "suppressPackageStartupMessages(library('#{lib}'))"
+            rs << "suppressPackageStartupMessages(library('#{lib}'))"
+          end
+          #@r.eval "registerDoMC()" # switch on parallel processing
+          rs << "registerDoMC()" # switch on parallel processing
+          #@r.eval "set.seed(1)"
+          rs << "set.seed(1)"
+          $logger.debug "Loading R packages: #{Time.now-t}"
+          t = Time.now
+          p n_prop
+          begin
+            # set data
+            rs << "n_prop <- c(#{n_prop.flatten.join(',')})"
+            rs << "n_prop <- c(#{n_prop.flatten.join(',')})"
+            rs << "n_prop_x_size <- c(#{n_prop.size})"
+            rs << "n_prop_y_size <- c(#{n_prop[0].size})"
+            rs << "y <- c(#{activities.join(',')})"
+            rs << "q_prop <- c(#{q_prop.join(',')})"
+            rs << "y = matrix(y)"
+            rs << "prop_matrix = matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=T)"
+            rs << "q_prop = matrix(q_prop, 1, n_prop_y_size, byrow=T)"
+            $logger.debug "Setting R data: #{Time.now-t}"
+            t = Time.now
+            # prepare data
+            rs << "
+              weights=NULL
+              if (!(class(y) == 'numeric')) {
+                y = factor(y)
+                weights=unlist(as.list(prop.table(table(y))))
+                weights=(weights-1)^2
+              }
+            "
+            rs << "
+              rem = nearZeroVar(prop_matrix)
+              if (length(rem) > 0) {
+                prop_matrix = prop_matrix[,-rem,drop=F]
+                q_prop = q_prop[,-rem,drop=F]
+              }
+              rem = findCorrelation(cor(prop_matrix))
+              if (length(rem) > 0) {
+                prop_matrix = prop_matrix[,-rem,drop=F]
+                q_prop = q_prop[,-rem,drop=F]
+              }
+            "
+            #p @r.eval("y").to_ruby
+            #p "weights"
+            #p @r.eval("weights").to_ruby
+            $logger.debug "Preparing R data: #{Time.now-t}"
+            t = Time.now
+            # model + support vectors
+            #train_success = @r.eval <<-EOR
+            rs << '
+              model = train(prop_matrix,y,
+                             method="svmRadial",
+                             preProcess=c("center", "scale"),
+                             class.weights=weights,
+                             trControl=trainControl(method="LGOCV",number=10),
+                             tuneLength=8
+                           )
+              perf = ifelse ( class(y)!="numeric", max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
+            '
+            File.open("/tmp/r.r","w+"){|f| f.puts rs.join("\n")}
+            p rs.join("\n")
+            p `Rscript /tmp/r.r`
+=begin
+            @r.void_eval <<-EOR
+              model = train(prop_matrix,y,
+                             method="svmRadial",
+                             #preProcess=c("center", "scale"),
+                             #class.weights=weights,
+                             #trControl=trainControl(method="LGOCV",number=10),
+                             #tuneLength=8
+                           )
+              perf = ifelse ( class(y)!='numeric', max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
+            EOR
+=end
+            $logger.debug "Creating R SVM model: #{Time.now-t}"
+            t = Time.now
+            if train_success
+              # prediction
+              @r.eval "predict(model,q_prop); p = predict(model,q_prop)" # kernlab bug: predict twice
+              #@r.eval "p = predict(model,q_prop)" # kernlab bug: predict twice
+              @r.eval "if (class(y)!='numeric') p = as.character(p)"
+              prediction = @r.p
+              # censoring
+              prediction = nil if ( @r.perf.nan? || @r.perf < min_train_performance.to_f )
+              prediction = nil if prediction =~ /NA/
+              $logger.debug "Performance: '#{sprintf("%.2f", @r.perf)}'"
+            else
+              $logger.debug "Model creation failed."
+              prediction = nil
+            end
+            $logger.debug "R Prediction: #{Time.now-t}"
+          rescue Exception => e
+            $logger.debug "#{e.class}: #{e.message}"
+            $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
+          ensure
+            #puts @r.inspect
+            #TODO: broken pipe
+            #@r.quit # free R
+          end
+        end
+        prediction
+      end
+    end
+  end
+end

data/lib/rest-client-wrapper.rb ADDED Viewed

@@ -0,0 +1,98 @@
+module OpenTox
+  class RestClientWrapper
+    attr_accessor :request, :response
+    @@subjectid = nil
+    def self.subjectid=(subjectid)
+      @@subjectid = subjectid
+    end
+    def self.subjectid
+      @@subjectid
+    end
+    # REST methods
+    # Raises OpenTox::Error if call fails (rescued in overwrite.rb -> halt 502)
+    # Does not wait for task to finish and returns task uri
+    # @param [String] destination URI
+    # @param [optional,Hash|String] Payload data posted to the service
+    # @param [optional,Hash] Headers with params like :accept, :content_type, :subjectid, :verify_ssl
+    # @return [RestClient::Response] REST call response
+    [:head,:get,:post,:put,:delete].each do |method|
+      define_singleton_method method do |uri,payload={},headers={},waiting_task=nil|
+        # check input
+        bad_request_error "Headers are not a hash: #{headers.inspect}", uri unless headers==nil or headers.is_a?(Hash)
+        headers[:subjectid] ||= @@subjectid
+        bad_request_error "Invalid URI: '#{uri}'", uri unless URI.valid? uri
+        #resource_not_found_error "URI '#{uri}' not found.", uri unless URI.accessible?(uri, @subjectid) unless URI.ssl?(uri)
+        # make sure that no header parameters are set in the payload
+        [:accept,:content_type,:subjectid].each do |header|
+          if defined? $aa || URI(uri).host == URI($aa[:uri]).host
+          else
+            bad_request_error "#{header} should be submitted in the headers", uri if payload and payload.is_a?(Hash) and payload[header]
+          end
+        end
+        # create request
+        args={}
+        args[:method] = method
+        args[:url] = uri
+        args[:verify_ssl] = 0 if headers[:verify_ssl].nil? || headers[:verify_ssl].empty?
+        args[:timeout] = 1800
+        args[:payload] = payload
+        headers.each{ |k,v| headers.delete(k) if v==nil } if headers #remove keys with empty values, as this can cause problems
+        args[:headers] = headers
+        $logger.debug "post to #{uri} with params #{payload.inspect.to_s[0..1000]}" if method.to_s=="post"
+        @request = RestClient::Request.new(args)
+        # ignore error codes from Task services (may return error codes >= 400 according to API, which causes exceptions in RestClient and RDF::Reader)
+        @response = @request.execute do |response, request, result|
+          if [301, 302, 307].include? response.code and request.method == :get
+            response.follow_redirection(request, result)
+          elsif response.code >= 400 and !URI.task?(uri)
+            #TODO add parameters to error-report
+            #parameters = request.args
+            #parameters[:headers][:subjectid] = "REMOVED" if parameters[:headers] and parameters[:headers][:subjectid]
+            #parameters[:url] = parameters[:url].gsub(/(http|https|)\:\/\/[a-zA-Z0-9\-]+\:[a-zA-Z0-9]+\@/, "REMOVED@") if parameters[:url]
+            #message += "\nREST parameters:\n#{parameters.inspect}"
+            error = known_errors.collect{|e| e if e[:code] == response.code}.compact.first
+            begin # errors are returned as error reports in json, try to parse
+              # TODO: may be the reason for failure of task.rb -n test_11_wait_for_error_task
+              content = JSON.parse(response)
+              msg = content["message"].to_s
+              cause = content["errorCause"].to_s
+              raise if msg.size==0 && cause.size==0 # parsing failed
+            rescue # parsing error failed, use complete content as message
+              msg = "Could not parse error response from rest call '#{method}' to '#{uri}':\n#{response}"
+              cause = nil
+            end
+            Object.method(error[:method]).call msg, uri, cause # call error method
+          else
+            response
+          end
+        end
+      end
+    end
+    #@return [Array] of hashes with error code, method and class
+    def self.known_errors
+      errors = []
+      RestClient::STATUSES.each do |code,k|
+        if code >= 400
+          method = k.underscore.gsub(/ |'/,'_')
+          method += "_error" unless method.match(/_error$/)
+          klass = method.split("_").collect{|s| s.capitalize}.join("")
+          errors << {:code => code, :method => method.to_sym, :class => klass}
+        end
+      end
+      errors
+    end
+  end
+end

data/lib/similarity.rb ADDED Viewed

@@ -0,0 +1,58 @@
+=begin
+* Name: similarity.rb
+* Description: Similarity algorithms
+* Author: Andreas Maunz <andreas@maunz.de
+* Date: 10/2012
+=end
+module OpenTox
+  module Algorithm
+    class Similarity
+      #TODO weighted tanimoto
+      # Tanimoto similarity
+      # @param [Array] a fingerprints of first compound
+      # @param [Array] b fingerprints of second compound
+      # @return [Float] Tanimoto similarity
+      def self.tanimoto(a,b)
+        bad_request_error "fingerprints #{a} and #{b} don't have equal size" unless a.size == b.size
+        #common = 0.0
+        #a.each_with_index do |n,i|
+          #common += 1 if n == b[i]
+        #end
+        #common/a.size
+        # TODO check if calculation speed can be improved
+        common_p_sum = 0.0
+        all_p_sum = 0.0
+        (0...a.size).each { |idx|
+          common_p_sum += [ a[idx], b[idx] ].min
+          all_p_sum += [ a[idx], b[idx] ].max
+        }
+        common_p_sum/all_p_sum
+      end
+      # Cosine similarity
+      # @param [Array] a fingerprints of first compound
+      # @param [Array] b fingerprints of second compound
+      # @return [Float] Cosine similarity, the cosine of angle enclosed between vectors a and b
+      def self.cosine(a, b)
+        val = 0.0
+        if a.size>0 and b.size>0
+          if a.size>12 && b.size>12
+            a = a[0..11]
+            b = b[0..11]
+          end
+          a_vec = a.to_gv
+          b_vec = b.to_gv
+          val = a_vec.dot(b_vec) / (a_vec.norm * b_vec.norm)
+        end
+        val
+      end
+    end
+  end
+end

data/lib/unique_descriptors.rb ADDED Viewed

@@ -0,0 +1,120 @@
+# set of non redundant descriptors, faster algorithms are preferred
+# TODO:
+# select logP algorithm
+# select l5 algorithm
+# use smarts matcher for atom counts
+# check correlations
+UNIQUEDESCRIPTORS = [
+  "Openbabel.abonds", #Number of aromatic bonds
+  "Openbabel.atoms", #Number of atoms
+  "Openbabel.bonds", #Number of bonds
+  "Openbabel.dbonds", #Number of double bonds
+  "Openbabel.HBA1", #Number of Hydrogen Bond Acceptors 1 (JoelLib)
+  "Openbabel.HBA2", #Number of Hydrogen Bond Acceptors 2 (JoelLib)
+  "Openbabel.HBD", #Number of Hydrogen Bond Donors (JoelLib)
+  "Openbabel.L5", #Lipinski Rule of Five
+  "Openbabel.logP", #octanol/water partition coefficient
+  "Openbabel.MP", #Melting point
+  "Openbabel.MR", #molar refractivity
+  "Openbabel.MW", #Molecular Weight filter
+  "Openbabel.nF", #Number of Fluorine Atoms
+  "Openbabel.sbonds", #Number of single bonds
+  "Openbabel.tbonds", #Number of triple bonds
+  "Openbabel.TPSA", #topological polar surface area
+  "Cdk.ALOGP", #Calculates atom additive logP and molar refractivity values as described by Ghose and Crippen and
+  "Cdk.APol", #Descriptor that calculates the sum of the atomic polarizabilities (including implicit hydrogens).
+  "Cdk.AcidicGroupCount", #Returns the number of acidic groups.
+  "Cdk.AminoAcidCount", #Returns the number of amino acids found in the system
+  #"Cdk.AromaticAtomsCount", #Descriptor based on the number of aromatic atoms of a molecule.
+  #"Cdk.AromaticBondsCount", #Descriptor based on the number of aromatic bonds of a molecule.
+  #"Cdk.AtomCount", #Descriptor based on the number of atoms of a certain element type.
+  "Cdk.AutocorrelationCharge", #The Moreau-Broto autocorrelation descriptors using partial charges
+  "Cdk.AutocorrelationMass", #The Moreau-Broto autocorrelation descriptors using atomic weight
+  "Cdk.AutocorrelationPolarizability", #The Moreau-Broto autocorrelation descriptors using polarizability
+  "Cdk.BCUT", #Eigenvalue based descriptor noted for its utility in chemical diversity described by Pearlman et al. .
+  "Cdk.BPol", #Descriptor that calculates the sum of the absolute value of the difference between atomic polarizabilities of all bonded atoms in the molecule (including implicit hydrogens).
+  "Cdk.BasicGroupCount", #Returns the number of basic groups.
+  #"Cdk.BondCount", #Descriptor based on the number of bonds of a certain bond order.
+  "Cdk.CPSA", #A variety of descriptors combining surface area and partial charge information
+  "Cdk.CarbonTypes", #Characterizes the carbon connectivity in terms of hybridization
+  "Cdk.ChiChain", #Evaluates the Kier & Hall Chi chain indices of orders 3,4,5 and 6
+  "Cdk.ChiCluster", #Evaluates the Kier & Hall Chi cluster indices of orders 3,4,5,6 and 7
+  "Cdk.ChiPathCluster", #Evaluates the Kier & Hall Chi path cluster indices of orders 4,5 and 6
+  "Cdk.ChiPath", #Evaluates the Kier & Hall Chi path indices of orders 0,1,2,3,4,5,6 and 7
+  "Cdk.EccentricConnectivityIndex", #A topological descriptor combining distance and adjacency information.
+  "Cdk.FMF", #Descriptor characterizing molecular complexity in terms of its Murcko framework
+  "Cdk.FragmentComplexity", #Class that returns the complexity of a system. The complexity is defined as @cdk.cite{Nilakantan06}
+  "Cdk.GravitationalIndex", #Descriptor characterizing the mass distribution of the molecule.
+  #"Cdk.HBondAcceptorCount", #Descriptor that calculates the number of hydrogen bond acceptors.
+  #"Cdk.HBondDonorCount", #Descriptor that calculates the number of hydrogen bond donors.
+  "Cdk.HybridizationRatio", #Characterizes molecular complexity in terms of carbon hybridization states.
+  "Cdk.IPMolecularLearning", #Descriptor that evaluates the ionization potential.
+  "Cdk.KappaShapeIndices", #Descriptor that calculates Kier and Hall kappa molecular shape indices.
+  "Cdk.KierHallSmarts", #Counts the number of occurrences of the E-state fragments
+  "Cdk.LargestChain", #Returns the number of atoms in the largest chain
+  "Cdk.LargestPiSystem", #Returns the number of atoms in the largest pi chain
+  "Cdk.LengthOverBreadth", #Calculates the ratio of length to breadth.
+  "Cdk.LongestAliphaticChain", #Returns the number of atoms in the longest aliphatic chain
+  "Cdk.MDE", #Evaluate molecular distance edge descriptors for C, N and O
+  "Cdk.MannholdLogP", #Descriptor that calculates the LogP based on a simple equation using the number of carbons and hetero atoms .
+  "Cdk.MomentOfInertia", #Descriptor that calculates the principal moments of inertia and ratios of the principal moments. Als calculates the radius of gyration.
+  "Cdk.PetitjeanNumber", #Descriptor that calculates the Petitjean Number of a molecule.
+  "Cdk.PetitjeanShapeIndex", #The topological and geometric shape indices described Petitjean and Bath et al. respectively. Both measure the anisotropy in a molecule.
+  "Cdk.RotatableBondsCount", #Descriptor that calculates the number of nonrotatable bonds on a molecule.
+  #"Cdk.RuleOfFive", #This Class contains a method that returns the number failures of the Lipinski's Rule Of Five.
+  #"Cdk.TPSA", #Calculation of topological polar surface area based on fragment contributions .
+  "Cdk.VABC", #Describes the volume of a molecule.
+  "Cdk.VAdjMa", #Descriptor that calculates the vertex adjacency information of a molecule.
+  "Cdk.WHIM", #Holistic descriptors described by Todeschini et al .
+  #"Cdk.Weight", #Descriptor based on the weight of atoms of a certain element type. If no element is specified, the returned value is the Molecular Weight
+  "Cdk.WeightedPath", #The weighted path (molecular ID) descriptors described by Randic. They characterize molecular branching.
+  "Cdk.WienerNumbers", #This class calculates Wiener path number and Wiener polarity number.
+  "Cdk.XLogP", #Prediction of logP based on the atom-type method called XLogP.
+  "Cdk.ZagrebIndex", #The sum of the squared atom degrees of all heavy atoms.
+  "Joelib.count.NumberOfS", #no description available
+  "Joelib.count.NumberOfP", #no description available
+  "Joelib.count.NumberOfO", #no description available
+  "Joelib.count.NumberOfN", #no description available
+  #"Joelib.count.AromaticBonds", #no description available
+  "Joelib.count.NumberOfI", #no description available
+  "Joelib.count.NumberOfF", #no description available
+  "Joelib.count.NumberOfC", #no description available
+  "Joelib.count.NumberOfB", #no description available
+  "Joelib.count.HydrophobicGroups", #no description available
+  #"Joelib.KierShape3", #no description available
+  #"Joelib.KierShape2", #no description available
+  #"Joelib.KierShape1", #no description available
+  #"Joelib.count.AcidicGroups", #no description available
+  "Joelib.count.AliphaticOHGroups", #no description available
+  #"Joelib.count.NumberOfAtoms", #no description available
+  "Joelib.TopologicalRadius", #no description available
+  "Joelib.GeometricalShapeCoefficient", #no description available
+  #"Joelib.MolecularWeight", #no description available
+  "Joelib.FractionRotatableBonds", #no description available
+  #"Joelib.count.HBD2", #no description available
+  #"Joelib.count.HBD1", #no description available
+  "Joelib.LogP", #no description available
+  "Joelib.GraphShapeCoefficient", #no description available
+  "Joelib.count.BasicGroups", #no description available
+  #"Joelib.count.RotatableBonds", #no description available
+  "Joelib.count.HeavyBonds", #no description available
+  "Joelib.PolarSurfaceArea", #no description available
+  #"Joelib.ZagrebIndex1", #no description available
+  "Joelib.GeometricalRadius", #no description available
+  "Joelib.count.SO2Groups", #no description available
+  "Joelib.count.AromaticOHGroups", #no description available
+  "Joelib.GeometricalDiameter", #no description available
+  #"Joelib.MolarRefractivity", #no description available
+  "Joelib.count.NumberOfCl", #no description available
+  "Joelib.count.OSOGroups", #no description available
+  "Joelib.count.NumberOfBr", #no description available
+  "Joelib.count.NO2Groups", #no description available
+  "Joelib.count.HeteroCycles", #no description available
+  #"Joelib.count.HBA2", #no description available
+  #"Joelib.count.HBA1", #no description available
+  #"Joelib.count.NumberOfBonds", #no description available
+  "Joelib.count.SOGroups", #no description available
+  "Joelib.TopologicalDiameter", #no description available
+  "Joelib.count.NumberOfHal", #no description available
+].sort