lazar 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +10 -0
  3. data/.yardopts +4 -0
  4. data/Gemfile +2 -0
  5. data/LICENSE +674 -0
  6. data/README.md +44 -0
  7. data/Rakefile +1 -0
  8. data/VERSION +1 -0
  9. data/ext/lazar/Makefile +5 -0
  10. data/java/CdkDescriptorInfo.class +0 -0
  11. data/java/CdkDescriptorInfo.java +22 -0
  12. data/java/CdkDescriptors.class +0 -0
  13. data/java/CdkDescriptors.java +141 -0
  14. data/java/Jmol.jar +0 -0
  15. data/java/JoelibDescriptorInfo.class +0 -0
  16. data/java/JoelibDescriptorInfo.java +15 -0
  17. data/java/JoelibDescriptors.class +0 -0
  18. data/java/JoelibDescriptors.java +60 -0
  19. data/java/Rakefile +15 -0
  20. data/java/cdk-1.4.19.jar +0 -0
  21. data/java/joelib2.jar +0 -0
  22. data/java/log4j.jar +0 -0
  23. data/lazar.gemspec +28 -0
  24. data/lib/SMARTS_InteLigand.txt +983 -0
  25. data/mongoid.yml +8 -0
  26. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +13553 -0
  27. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +436 -0
  28. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +568 -0
  29. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +87 -0
  30. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +978 -0
  31. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +1120 -0
  32. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +1113 -0
  33. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +850 -0
  34. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +829 -0
  35. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +1198 -0
  36. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +1505 -0
  37. data/test/data/EPAFHM.csv +618 -0
  38. data/test/data/EPAFHM.medi.csv +100 -0
  39. data/test/data/EPAFHM.mini.csv +22 -0
  40. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +581 -0
  41. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +1217 -0
  42. data/test/data/ISSCAN-multi.csv +59 -0
  43. data/test/data/LOAEL_log_mg_corrected_smiles.csv +568 -0
  44. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +568 -0
  45. data/test/data/acetaldehyde.sdf +14 -0
  46. data/test/data/boiling_points.ext.sdf +11460 -0
  47. data/test/data/cpdb_100.csv +101 -0
  48. data/test/data/hamster_carcinogenicity.csv +86 -0
  49. data/test/data/hamster_carcinogenicity.mini.bool_float.csv +11 -0
  50. data/test/data/hamster_carcinogenicity.mini.bool_int.csv +11 -0
  51. data/test/data/hamster_carcinogenicity.mini.bool_string.csv +11 -0
  52. data/test/data/hamster_carcinogenicity.mini.csv +11 -0
  53. data/test/data/hamster_carcinogenicity.ntriples +618 -0
  54. data/test/data/hamster_carcinogenicity.sdf +2805 -0
  55. data/test/data/hamster_carcinogenicity.xls +0 -0
  56. data/test/data/hamster_carcinogenicity.yaml +352 -0
  57. data/test/data/hamster_carcinogenicity_with_errors.csv +88 -0
  58. data/test/data/kazius.csv +4070 -0
  59. data/test/data/multi_cell_call.csv +1067 -0
  60. data/test/data/multi_cell_call_no_dup.csv +1057 -0
  61. data/test/data/multicolumn.csv +8 -0
  62. data/test/data/rat_feature_dataset.csv +1179 -0
  63. data/test/data/wrong_dataset.csv +8 -0
  64. metadata +108 -8
data/README.md ADDED
@@ -0,0 +1,44 @@
1
+ lazar
2
+ =====
3
+
4
+ Ruby libraries for the lazar framework
5
+
6
+ Dependencies
7
+ ------------
8
+
9
+ lazar depends on a couple of external programs and libraries. On Debian 7 "Wheezy" systems you can install them with
10
+
11
+ `sudo apt-get install build-essential ruby ruby-dev git cmake swig r-base r-base-dev r-cran-rserve openjdk-7-jre libgsl0-dev libxml2-dev zlib1g-dev libcairo2-dev`
12
+
13
+ You will also need at least mongodb version 3.0, but Debian "Wheezy" provides version 2.4. Please follow the instructions at http://docs.mongodb.org/manual/tutorial/install-mongodb-on-debian/:
14
+
15
+ ```
16
+ sudo apt-key adv --keyserver keyserver.ubuntu.com --recv 7F0CEB10
17
+ echo "deb http://repo.mongodb.org/apt/debian wheezy/mongodb-org/3.0 main" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list
18
+ sudo apt-get update
19
+ sudo apt-get install -y mongodb-org
20
+ ```
21
+
22
+ Installation
23
+ ------------
24
+
25
+ `gem install lazar`
26
+
27
+ Please be patient, the compilation of OpenBabel and Fminer libraries can be very time consuming. If installation fails you can try to install manually:
28
+
29
+ ```
30
+ git clone https://github.com/opentox/lazar.git
31
+ cd lazar
32
+ ruby ext/lazar/extconf.rb
33
+ bundle install
34
+ ```
35
+
36
+ The output should give you more verbose information that can help in debugging (e.g. to identify missing libraries).
37
+
38
+ Documentation
39
+ -------------
40
+ * [API documentation](http://rdoc.info/gems/lazar)
41
+
42
+ Copyright
43
+ ---------
44
+ Copyright (c) 2009-2015 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.6
@@ -0,0 +1,5 @@
1
+ all:
2
+ true
3
+
4
+ install:
5
+ true
Binary file
@@ -0,0 +1,22 @@
1
+ import java.util.*;
2
+ import org.openscience.cdk.qsar.descriptors.molecular.*;
3
+ import org.openscience.cdk.qsar.*;
4
+
5
+ class CdkDescriptorInfo {
6
+ public static void main(String[] args) {
7
+
8
+ DescriptorEngine engine = new DescriptorEngine(DescriptorEngine.MOLECULAR);
9
+
10
+ for (Iterator<IDescriptor> it = engine.getDescriptorInstances().iterator(); it.hasNext(); ) {
11
+ IDescriptor descriptor = it.next();
12
+ String cdk_class = descriptor.getClass().toString().replaceAll("class ","");
13
+ System.out.println("- :java_class: \""+cdk_class+"\"");
14
+ String description = engine.getDictionaryDefinition(cdk_class).replaceAll("^\\s+", "" ).replaceAll("\\s+$", "").replaceAll("\\s+", " ");
15
+ System.out.println(" :description: \""+description+"\"");
16
+ System.out.println(" :names:");
17
+ for (String name : descriptor.getDescriptorNames()) {
18
+ System.out.println(" - \""+name+"\"");
19
+ }
20
+ }
21
+ }
22
+ }
Binary file
@@ -0,0 +1,141 @@
1
+ import java.util.*;
2
+ import java.io.*;
3
+ import org.openscience.cdk.DefaultChemObjectBuilder;
4
+ import org.openscience.cdk.interfaces.IMolecule;
5
+ import org.openscience.cdk.io.iterator.IteratingMDLReader;
6
+ import org.openscience.cdk.qsar.*;
7
+ import org.openscience.cdk.qsar.DescriptorValue;
8
+ import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector;
9
+ import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
10
+ import org.openscience.cdk.exception.NoSuchAtomTypeException;
11
+
12
+ class CdkDescriptors {
13
+ public static void main(String[] args) {
14
+
15
+ if (args==null || args.length<2) {
16
+ System.err.println("required params: <sd-file> <descriptor1> <descriptor2(optional)> <descriptor3(optional)> ...");
17
+ System.exit(1);
18
+ }
19
+ if (! new File(args[0]).exists()){
20
+ System.err.println("file not found "+args[0]);
21
+ System.exit(1);
22
+ }
23
+
24
+ // command line descriptor params can be either "descriptorName" or "descriptorValueName"
25
+ // terminology:
26
+ // A descriptor can calculate serveral values, e.g., ALOGP produces ALOGP.ALogP, ALOGP.ALogp2, ALOGP.AMR
27
+ // "descriptorName" ALOGP
28
+ // "valueName" AMR
29
+ // "descriptorValueName" ALOGP.AMR
30
+ DescriptorEngine engine;
31
+ Set<String> classNames = new LinkedHashSet<String>(); // descriptors to be computed
32
+ Set<String> descriptorNames = new LinkedHashSet<String>(); // all values of this descriptor will be printed
33
+ Set<String> descriptorValueNames = new LinkedHashSet<String>(); // only these values of a descriptor will be printed
34
+ for (int i =1; i < args.length; i++) {
35
+ String descriptorName;
36
+ if (args[i].indexOf(".")!=-1) {
37
+ descriptorValueNames.add(args[i]);
38
+ descriptorName = args[i].substring(0,args[i].indexOf("."));
39
+ }
40
+ else {
41
+ descriptorNames.add(args[i]);
42
+ descriptorName = args[i];
43
+ }
44
+ classNames.add(getDescriptorClassName(descriptorName));
45
+ }
46
+
47
+ engine = new DescriptorEngine(new ArrayList<String>(classNames));
48
+ List<IDescriptor> instances = engine.instantiateDescriptors(new ArrayList<String>(classNames));
49
+ List<DescriptorSpecification> specs = engine.initializeSpecifications(instances);
50
+ engine.setDescriptorInstances(instances);
51
+ engine.setDescriptorSpecifications(specs);
52
+
53
+ try {
54
+ BufferedReader br = new BufferedReader(new FileReader(args[0]));
55
+ PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"cdk.yaml"));
56
+ // parse 3d sdf from file and calculate descriptors
57
+ IteratingMDLReader reader = new IteratingMDLReader( br, DefaultChemObjectBuilder.getInstance());
58
+ int c = 0;
59
+ while (reader.hasNext()) {
60
+ try {
61
+ System.out.println("computing "+(args.length-1)+" descriptors for compound "+(++c));
62
+ IMolecule molecule = (IMolecule)reader.next();
63
+ molecule = (IMolecule) AtomContainerManipulator.removeHydrogens(molecule);
64
+ try {
65
+ AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molecule);
66
+ }
67
+ catch (NoSuchAtomTypeException e) {
68
+ e.printStackTrace();
69
+ }
70
+ CDKHueckelAromaticityDetector.detectAromaticity(molecule);
71
+
72
+ engine.process(molecule);
73
+ Map<Object,Object> properties = molecule.getProperties();
74
+ Boolean first = true;
75
+ for (Map.Entry<Object, Object> entry : properties.entrySet()) {
76
+ try {
77
+ if ((entry.getKey() instanceof DescriptorSpecification) && (entry.getValue() instanceof DescriptorValue)) {
78
+ DescriptorSpecification property = (DescriptorSpecification)entry.getKey();
79
+ DescriptorValue value = (DescriptorValue)entry.getValue();
80
+ String[] values = value.getValue().toString().split(",");
81
+ for (int i = 0; i < values.length; i++) {
82
+ String cdk_class = property.getImplementationTitle();
83
+ String descriptorName = cdk_class.substring(cdk_class.lastIndexOf(".")+1).replace("Descriptor","");
84
+ String descriptorValueName = descriptorName + "." + value.getNames()[i];
85
+ if (descriptorNames.contains(descriptorName) || descriptorValueNames.contains(descriptorValueName)) {
86
+ if (first) { yaml.print("- "); first = false; }
87
+ else { yaml.print(" "); }
88
+ yaml.println("Cdk." + descriptorValueName + ": " + values[i]);
89
+ }
90
+ }
91
+ }
92
+ }
93
+ catch (ClassCastException e) { } // sdf properties are stored as molecules properties (strings), ignore them
94
+ catch (Exception e) { e.printStackTrace(); } // output nothing to yaml
95
+ }
96
+ }
97
+ catch (Exception e) {
98
+ yaml.println("- {}");
99
+ e.printStackTrace();
100
+ continue;
101
+ }
102
+ }
103
+ yaml.close();
104
+ }
105
+ catch (Exception e) { e.printStackTrace(); }
106
+ }
107
+
108
+
109
+ /** HACK to find the class for a descriptor
110
+ * problem: Descriptor is not always at the end of the class (APolDescriptor), but may be in the middle (AutocorrelationDescriptorPolarizability)
111
+ * this method makes a class-lookup using trial and error */
112
+ static String getDescriptorClassName(String descriptorName) {
113
+ String split = splitCamelCase(descriptorName)+" "; // space mark possible positions for 'Descriptor'
114
+ for(int i = split.length()-1; i>0; i--) {
115
+ if (split.charAt(i)==' ') { // iterate over all spaces, starting with the trailing one
116
+ String test = split.substring(0,i)+"Descriptor"+split.substring(i+1,split.length()); // replace current space with 'Descriptor' ..
117
+ test = test.replaceAll("\\s",""); // .. and remove other spaces
118
+ String className = "org.openscience.cdk.qsar.descriptors.molecular." + test;
119
+ try {
120
+ Class.forName(className);
121
+ return className;
122
+ } catch (ClassNotFoundException e) {}
123
+ }
124
+ }
125
+ System.err.println("Descriptor not found: "+descriptorName);
126
+ System.exit(1);
127
+ return null;
128
+ }
129
+
130
+ /** inserts space in between camel words */
131
+ static String splitCamelCase(String s) {
132
+ return s.replaceAll(
133
+ String.format("%s|%s|%s",
134
+ "(?<=[A-Z])(?=[A-Z][a-z])",
135
+ "(?<=[^A-Z])(?=[A-Z])",
136
+ "(?<=[A-Za-z])(?=[^A-Za-z])"
137
+ ),
138
+ " "
139
+ );
140
+ }
141
+ }
data/java/Jmol.jar ADDED
Binary file
Binary file
@@ -0,0 +1,15 @@
1
+ import joelib2.feature.FeatureHelper;
2
+
3
+ class JoelibDescriptorInfo {
4
+ public static void main(String[] args) {
5
+ FeatureHelper helper = FeatureHelper.instance();
6
+ System.out.println("---"); // document separator for Joelib debug messages
7
+ for (Object feature : helper.getNativeFeatures() ) {
8
+ System.out.println("- :java_class: \""+feature.toString()+"\"");
9
+ // methods for accessing feature descriptions e.g. with
10
+ // FeatureFactory.instance().getFeature(feature.toString()).getDescription().getText() or
11
+ // FeatureFactory.instance().getFeature(feature.toString()).getDescription().getHtml()
12
+ // are defunct
13
+ }
14
+ }
15
+ }
Binary file
@@ -0,0 +1,60 @@
1
+ import java.util.*;
2
+ import java.io.*;
3
+ import joelib2.feature.Feature;
4
+ import joelib2.feature.FeatureHelper;
5
+ import joelib2.feature.FeatureFactory;
6
+ import joelib2.feature.FeatureResult;
7
+ import joelib2.io.BasicIOType;
8
+ import joelib2.io.BasicIOTypeHolder;
9
+ import joelib2.io.BasicReader;
10
+ import joelib2.io.MoleculeFileHelper;
11
+ import joelib2.io.MoleculeFileIO;
12
+ import joelib2.io.MoleculeIOException;
13
+ import joelib2.molecule.BasicConformerMolecule;
14
+
15
+ class JoelibDescriptors {
16
+ public static void main(String[] args) {
17
+
18
+ String[] features = null;
19
+ features = new String[args.length-1];
20
+ System.arraycopy(args,1,features,0,args.length-1);
21
+
22
+ FeatureFactory factory = FeatureFactory.instance();
23
+ MoleculeFileIO loader = null;
24
+ String line = new String();
25
+ String sdf = new String();
26
+ try {
27
+ // parse 3d sdf from file and calculate descriptors
28
+ InputStream is = new FileInputStream(args[0]);
29
+ PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"joelib.yaml"));
30
+ BasicIOType inType = BasicIOTypeHolder.instance().getIOType("SDF");
31
+ loader = MoleculeFileHelper.getMolReader(is, inType);
32
+ BasicConformerMolecule mol = new BasicConformerMolecule(inType, inType);
33
+ while (true) {
34
+ try {
35
+ Boolean success = loader.read(mol);
36
+ if (!success) { break; } // last molecule
37
+ for (int i =0; i < features.length; i++) {
38
+ String name = "joelib2.feature.types." + features[i];
39
+ Feature feature = factory.getFeature(name);
40
+ FeatureResult result = feature.calculate(mol);
41
+ if (i == 0) { yaml.print("- "); }
42
+ else { yaml.print(" "); }
43
+ yaml.print( "Joelib."+features[i]+": " );
44
+ yaml.println( result.toString() );
45
+ }
46
+
47
+ }
48
+ catch (Exception e) {
49
+ System.err.println(e.toString());
50
+ e.printStackTrace();
51
+ }
52
+ }
53
+ yaml.close();
54
+ }
55
+ catch (Exception e) {
56
+ System.err.println(e.toString());
57
+ e.printStackTrace();
58
+ }
59
+ }
60
+ }
data/java/Rakefile ADDED
@@ -0,0 +1,15 @@
1
+ # Java class, classpath
2
+ java_classes = [
3
+ ["CdkDescriptors", "cdk-1.4.19.jar"],
4
+ ["CdkDescriptorInfo", "cdk-1.4.19.jar"],
5
+ ["JoelibDescriptors", "joelib2.jar:."],
6
+ ["JoelibDescriptorInfo", "joelib2.jar:."],
7
+ ]
8
+
9
+ task :default => java_classes.collect{|c| "#{c.first}.class"}
10
+
11
+ java_classes.each do |c|
12
+ file "#{c.first}.class" => "#{c.first}.java" do
13
+ puts `javac -classpath #{c.last} #{c.first}.java`
14
+ end
15
+ end
Binary file
data/java/joelib2.jar ADDED
Binary file
data/java/log4j.jar ADDED
Binary file
data/lazar.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "lazar"
6
+ s.version = File.read("./VERSION").strip
7
+ s.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler, Denis Gebele"]
8
+ s.email = ["helma@in-silico.ch"]
9
+ s.homepage = "http://github.com/opentox/lazar"
10
+ s.summary = %q{Lazar framework}
11
+ s.description = %q{Libraries for lazy structure-activity relationships and read-across.}
12
+ s.license = 'GPL-3'
13
+
14
+ s.rubyforge_project = "lazar"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.extensions = %w[ext/lazar/extconf.rb]
19
+ s.require_paths = ["lib"]
20
+
21
+ # specify any dependencies here; for example:
22
+ s.add_runtime_dependency "bundler"
23
+ s.add_runtime_dependency "rest-client"
24
+ s.add_runtime_dependency 'nokogiri'
25
+ s.add_runtime_dependency 'rserve-client'
26
+ s.add_runtime_dependency "mongoid", '~> 5.0beta'
27
+
28
+ end