lazar 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +10 -0
- data/.yardopts +4 -0
- data/Gemfile +2 -0
- data/LICENSE +674 -0
- data/README.md +44 -0
- data/Rakefile +1 -0
- data/VERSION +1 -0
- data/ext/lazar/Makefile +5 -0
- data/java/CdkDescriptorInfo.class +0 -0
- data/java/CdkDescriptorInfo.java +22 -0
- data/java/CdkDescriptors.class +0 -0
- data/java/CdkDescriptors.java +141 -0
- data/java/Jmol.jar +0 -0
- data/java/JoelibDescriptorInfo.class +0 -0
- data/java/JoelibDescriptorInfo.java +15 -0
- data/java/JoelibDescriptors.class +0 -0
- data/java/JoelibDescriptors.java +60 -0
- data/java/Rakefile +15 -0
- data/java/cdk-1.4.19.jar +0 -0
- data/java/joelib2.jar +0 -0
- data/java/log4j.jar +0 -0
- data/lazar.gemspec +28 -0
- data/lib/SMARTS_InteLigand.txt +983 -0
- data/mongoid.yml +8 -0
- data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +13553 -0
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +436 -0
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +568 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +87 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +978 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +1120 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +1113 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +850 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +829 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +1198 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +1505 -0
- data/test/data/EPAFHM.csv +618 -0
- data/test/data/EPAFHM.medi.csv +100 -0
- data/test/data/EPAFHM.mini.csv +22 -0
- data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +581 -0
- data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +1217 -0
- data/test/data/ISSCAN-multi.csv +59 -0
- data/test/data/LOAEL_log_mg_corrected_smiles.csv +568 -0
- data/test/data/LOAEL_log_mmol_corrected_smiles.csv +568 -0
- data/test/data/acetaldehyde.sdf +14 -0
- data/test/data/boiling_points.ext.sdf +11460 -0
- data/test/data/cpdb_100.csv +101 -0
- data/test/data/hamster_carcinogenicity.csv +86 -0
- data/test/data/hamster_carcinogenicity.mini.bool_float.csv +11 -0
- data/test/data/hamster_carcinogenicity.mini.bool_int.csv +11 -0
- data/test/data/hamster_carcinogenicity.mini.bool_string.csv +11 -0
- data/test/data/hamster_carcinogenicity.mini.csv +11 -0
- data/test/data/hamster_carcinogenicity.ntriples +618 -0
- data/test/data/hamster_carcinogenicity.sdf +2805 -0
- data/test/data/hamster_carcinogenicity.xls +0 -0
- data/test/data/hamster_carcinogenicity.yaml +352 -0
- data/test/data/hamster_carcinogenicity_with_errors.csv +88 -0
- data/test/data/kazius.csv +4070 -0
- data/test/data/multi_cell_call.csv +1067 -0
- data/test/data/multi_cell_call_no_dup.csv +1057 -0
- data/test/data/multicolumn.csv +8 -0
- data/test/data/rat_feature_dataset.csv +1179 -0
- data/test/data/wrong_dataset.csv +8 -0
- metadata +108 -8
data/README.md
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
lazar
|
2
|
+
=====
|
3
|
+
|
4
|
+
Ruby libraries for the lazar framework
|
5
|
+
|
6
|
+
Dependencies
|
7
|
+
------------
|
8
|
+
|
9
|
+
lazar depends on a couple of external programs and libraries. On Debian 7 "Wheezy" systems you can install them with
|
10
|
+
|
11
|
+
`sudo apt-get install build-essential ruby ruby-dev git cmake swig r-base r-base-dev r-cran-rserve openjdk-7-jre libgsl0-dev libxml2-dev zlib1g-dev libcairo2-dev`
|
12
|
+
|
13
|
+
You will also need at least mongodb version 3.0, but Debian "Wheezy" provides version 2.4. Please follow the instructions at http://docs.mongodb.org/manual/tutorial/install-mongodb-on-debian/:
|
14
|
+
|
15
|
+
```
|
16
|
+
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv 7F0CEB10
|
17
|
+
echo "deb http://repo.mongodb.org/apt/debian wheezy/mongodb-org/3.0 main" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list
|
18
|
+
sudo apt-get update
|
19
|
+
sudo apt-get install -y mongodb-org
|
20
|
+
```
|
21
|
+
|
22
|
+
Installation
|
23
|
+
------------
|
24
|
+
|
25
|
+
`gem install lazar`
|
26
|
+
|
27
|
+
Please be patient, the compilation of OpenBabel and Fminer libraries can be very time consuming. If installation fails you can try to install manually:
|
28
|
+
|
29
|
+
```
|
30
|
+
git clone https://github.com/opentox/lazar.git
|
31
|
+
cd lazar
|
32
|
+
ruby ext/lazar/extconf.rb
|
33
|
+
bundle install
|
34
|
+
```
|
35
|
+
|
36
|
+
The output should give you more verbose information that can help in debugging (e.g. to identify missing libraries).
|
37
|
+
|
38
|
+
Documentation
|
39
|
+
-------------
|
40
|
+
* [API documentation](http://rdoc.info/gems/lazar)
|
41
|
+
|
42
|
+
Copyright
|
43
|
+
---------
|
44
|
+
Copyright (c) 2009-2015 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.6
|
Binary file
|
@@ -0,0 +1,22 @@
|
|
1
|
+
import java.util.*;
|
2
|
+
import org.openscience.cdk.qsar.descriptors.molecular.*;
|
3
|
+
import org.openscience.cdk.qsar.*;
|
4
|
+
|
5
|
+
class CdkDescriptorInfo {
|
6
|
+
public static void main(String[] args) {
|
7
|
+
|
8
|
+
DescriptorEngine engine = new DescriptorEngine(DescriptorEngine.MOLECULAR);
|
9
|
+
|
10
|
+
for (Iterator<IDescriptor> it = engine.getDescriptorInstances().iterator(); it.hasNext(); ) {
|
11
|
+
IDescriptor descriptor = it.next();
|
12
|
+
String cdk_class = descriptor.getClass().toString().replaceAll("class ","");
|
13
|
+
System.out.println("- :java_class: \""+cdk_class+"\"");
|
14
|
+
String description = engine.getDictionaryDefinition(cdk_class).replaceAll("^\\s+", "" ).replaceAll("\\s+$", "").replaceAll("\\s+", " ");
|
15
|
+
System.out.println(" :description: \""+description+"\"");
|
16
|
+
System.out.println(" :names:");
|
17
|
+
for (String name : descriptor.getDescriptorNames()) {
|
18
|
+
System.out.println(" - \""+name+"\"");
|
19
|
+
}
|
20
|
+
}
|
21
|
+
}
|
22
|
+
}
|
Binary file
|
@@ -0,0 +1,141 @@
|
|
1
|
+
import java.util.*;
|
2
|
+
import java.io.*;
|
3
|
+
import org.openscience.cdk.DefaultChemObjectBuilder;
|
4
|
+
import org.openscience.cdk.interfaces.IMolecule;
|
5
|
+
import org.openscience.cdk.io.iterator.IteratingMDLReader;
|
6
|
+
import org.openscience.cdk.qsar.*;
|
7
|
+
import org.openscience.cdk.qsar.DescriptorValue;
|
8
|
+
import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector;
|
9
|
+
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
|
10
|
+
import org.openscience.cdk.exception.NoSuchAtomTypeException;
|
11
|
+
|
12
|
+
class CdkDescriptors {
|
13
|
+
public static void main(String[] args) {
|
14
|
+
|
15
|
+
if (args==null || args.length<2) {
|
16
|
+
System.err.println("required params: <sd-file> <descriptor1> <descriptor2(optional)> <descriptor3(optional)> ...");
|
17
|
+
System.exit(1);
|
18
|
+
}
|
19
|
+
if (! new File(args[0]).exists()){
|
20
|
+
System.err.println("file not found "+args[0]);
|
21
|
+
System.exit(1);
|
22
|
+
}
|
23
|
+
|
24
|
+
// command line descriptor params can be either "descriptorName" or "descriptorValueName"
|
25
|
+
// terminology:
|
26
|
+
// A descriptor can calculate serveral values, e.g., ALOGP produces ALOGP.ALogP, ALOGP.ALogp2, ALOGP.AMR
|
27
|
+
// "descriptorName" ALOGP
|
28
|
+
// "valueName" AMR
|
29
|
+
// "descriptorValueName" ALOGP.AMR
|
30
|
+
DescriptorEngine engine;
|
31
|
+
Set<String> classNames = new LinkedHashSet<String>(); // descriptors to be computed
|
32
|
+
Set<String> descriptorNames = new LinkedHashSet<String>(); // all values of this descriptor will be printed
|
33
|
+
Set<String> descriptorValueNames = new LinkedHashSet<String>(); // only these values of a descriptor will be printed
|
34
|
+
for (int i =1; i < args.length; i++) {
|
35
|
+
String descriptorName;
|
36
|
+
if (args[i].indexOf(".")!=-1) {
|
37
|
+
descriptorValueNames.add(args[i]);
|
38
|
+
descriptorName = args[i].substring(0,args[i].indexOf("."));
|
39
|
+
}
|
40
|
+
else {
|
41
|
+
descriptorNames.add(args[i]);
|
42
|
+
descriptorName = args[i];
|
43
|
+
}
|
44
|
+
classNames.add(getDescriptorClassName(descriptorName));
|
45
|
+
}
|
46
|
+
|
47
|
+
engine = new DescriptorEngine(new ArrayList<String>(classNames));
|
48
|
+
List<IDescriptor> instances = engine.instantiateDescriptors(new ArrayList<String>(classNames));
|
49
|
+
List<DescriptorSpecification> specs = engine.initializeSpecifications(instances);
|
50
|
+
engine.setDescriptorInstances(instances);
|
51
|
+
engine.setDescriptorSpecifications(specs);
|
52
|
+
|
53
|
+
try {
|
54
|
+
BufferedReader br = new BufferedReader(new FileReader(args[0]));
|
55
|
+
PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"cdk.yaml"));
|
56
|
+
// parse 3d sdf from file and calculate descriptors
|
57
|
+
IteratingMDLReader reader = new IteratingMDLReader( br, DefaultChemObjectBuilder.getInstance());
|
58
|
+
int c = 0;
|
59
|
+
while (reader.hasNext()) {
|
60
|
+
try {
|
61
|
+
System.out.println("computing "+(args.length-1)+" descriptors for compound "+(++c));
|
62
|
+
IMolecule molecule = (IMolecule)reader.next();
|
63
|
+
molecule = (IMolecule) AtomContainerManipulator.removeHydrogens(molecule);
|
64
|
+
try {
|
65
|
+
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molecule);
|
66
|
+
}
|
67
|
+
catch (NoSuchAtomTypeException e) {
|
68
|
+
e.printStackTrace();
|
69
|
+
}
|
70
|
+
CDKHueckelAromaticityDetector.detectAromaticity(molecule);
|
71
|
+
|
72
|
+
engine.process(molecule);
|
73
|
+
Map<Object,Object> properties = molecule.getProperties();
|
74
|
+
Boolean first = true;
|
75
|
+
for (Map.Entry<Object, Object> entry : properties.entrySet()) {
|
76
|
+
try {
|
77
|
+
if ((entry.getKey() instanceof DescriptorSpecification) && (entry.getValue() instanceof DescriptorValue)) {
|
78
|
+
DescriptorSpecification property = (DescriptorSpecification)entry.getKey();
|
79
|
+
DescriptorValue value = (DescriptorValue)entry.getValue();
|
80
|
+
String[] values = value.getValue().toString().split(",");
|
81
|
+
for (int i = 0; i < values.length; i++) {
|
82
|
+
String cdk_class = property.getImplementationTitle();
|
83
|
+
String descriptorName = cdk_class.substring(cdk_class.lastIndexOf(".")+1).replace("Descriptor","");
|
84
|
+
String descriptorValueName = descriptorName + "." + value.getNames()[i];
|
85
|
+
if (descriptorNames.contains(descriptorName) || descriptorValueNames.contains(descriptorValueName)) {
|
86
|
+
if (first) { yaml.print("- "); first = false; }
|
87
|
+
else { yaml.print(" "); }
|
88
|
+
yaml.println("Cdk." + descriptorValueName + ": " + values[i]);
|
89
|
+
}
|
90
|
+
}
|
91
|
+
}
|
92
|
+
}
|
93
|
+
catch (ClassCastException e) { } // sdf properties are stored as molecules properties (strings), ignore them
|
94
|
+
catch (Exception e) { e.printStackTrace(); } // output nothing to yaml
|
95
|
+
}
|
96
|
+
}
|
97
|
+
catch (Exception e) {
|
98
|
+
yaml.println("- {}");
|
99
|
+
e.printStackTrace();
|
100
|
+
continue;
|
101
|
+
}
|
102
|
+
}
|
103
|
+
yaml.close();
|
104
|
+
}
|
105
|
+
catch (Exception e) { e.printStackTrace(); }
|
106
|
+
}
|
107
|
+
|
108
|
+
|
109
|
+
/** HACK to find the class for a descriptor
|
110
|
+
* problem: Descriptor is not always at the end of the class (APolDescriptor), but may be in the middle (AutocorrelationDescriptorPolarizability)
|
111
|
+
* this method makes a class-lookup using trial and error */
|
112
|
+
static String getDescriptorClassName(String descriptorName) {
|
113
|
+
String split = splitCamelCase(descriptorName)+" "; // space mark possible positions for 'Descriptor'
|
114
|
+
for(int i = split.length()-1; i>0; i--) {
|
115
|
+
if (split.charAt(i)==' ') { // iterate over all spaces, starting with the trailing one
|
116
|
+
String test = split.substring(0,i)+"Descriptor"+split.substring(i+1,split.length()); // replace current space with 'Descriptor' ..
|
117
|
+
test = test.replaceAll("\\s",""); // .. and remove other spaces
|
118
|
+
String className = "org.openscience.cdk.qsar.descriptors.molecular." + test;
|
119
|
+
try {
|
120
|
+
Class.forName(className);
|
121
|
+
return className;
|
122
|
+
} catch (ClassNotFoundException e) {}
|
123
|
+
}
|
124
|
+
}
|
125
|
+
System.err.println("Descriptor not found: "+descriptorName);
|
126
|
+
System.exit(1);
|
127
|
+
return null;
|
128
|
+
}
|
129
|
+
|
130
|
+
/** inserts space in between camel words */
|
131
|
+
static String splitCamelCase(String s) {
|
132
|
+
return s.replaceAll(
|
133
|
+
String.format("%s|%s|%s",
|
134
|
+
"(?<=[A-Z])(?=[A-Z][a-z])",
|
135
|
+
"(?<=[^A-Z])(?=[A-Z])",
|
136
|
+
"(?<=[A-Za-z])(?=[^A-Za-z])"
|
137
|
+
),
|
138
|
+
" "
|
139
|
+
);
|
140
|
+
}
|
141
|
+
}
|
data/java/Jmol.jar
ADDED
Binary file
|
Binary file
|
@@ -0,0 +1,15 @@
|
|
1
|
+
import joelib2.feature.FeatureHelper;
|
2
|
+
|
3
|
+
class JoelibDescriptorInfo {
|
4
|
+
public static void main(String[] args) {
|
5
|
+
FeatureHelper helper = FeatureHelper.instance();
|
6
|
+
System.out.println("---"); // document separator for Joelib debug messages
|
7
|
+
for (Object feature : helper.getNativeFeatures() ) {
|
8
|
+
System.out.println("- :java_class: \""+feature.toString()+"\"");
|
9
|
+
// methods for accessing feature descriptions e.g. with
|
10
|
+
// FeatureFactory.instance().getFeature(feature.toString()).getDescription().getText() or
|
11
|
+
// FeatureFactory.instance().getFeature(feature.toString()).getDescription().getHtml()
|
12
|
+
// are defunct
|
13
|
+
}
|
14
|
+
}
|
15
|
+
}
|
Binary file
|
@@ -0,0 +1,60 @@
|
|
1
|
+
import java.util.*;
|
2
|
+
import java.io.*;
|
3
|
+
import joelib2.feature.Feature;
|
4
|
+
import joelib2.feature.FeatureHelper;
|
5
|
+
import joelib2.feature.FeatureFactory;
|
6
|
+
import joelib2.feature.FeatureResult;
|
7
|
+
import joelib2.io.BasicIOType;
|
8
|
+
import joelib2.io.BasicIOTypeHolder;
|
9
|
+
import joelib2.io.BasicReader;
|
10
|
+
import joelib2.io.MoleculeFileHelper;
|
11
|
+
import joelib2.io.MoleculeFileIO;
|
12
|
+
import joelib2.io.MoleculeIOException;
|
13
|
+
import joelib2.molecule.BasicConformerMolecule;
|
14
|
+
|
15
|
+
class JoelibDescriptors {
|
16
|
+
public static void main(String[] args) {
|
17
|
+
|
18
|
+
String[] features = null;
|
19
|
+
features = new String[args.length-1];
|
20
|
+
System.arraycopy(args,1,features,0,args.length-1);
|
21
|
+
|
22
|
+
FeatureFactory factory = FeatureFactory.instance();
|
23
|
+
MoleculeFileIO loader = null;
|
24
|
+
String line = new String();
|
25
|
+
String sdf = new String();
|
26
|
+
try {
|
27
|
+
// parse 3d sdf from file and calculate descriptors
|
28
|
+
InputStream is = new FileInputStream(args[0]);
|
29
|
+
PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"joelib.yaml"));
|
30
|
+
BasicIOType inType = BasicIOTypeHolder.instance().getIOType("SDF");
|
31
|
+
loader = MoleculeFileHelper.getMolReader(is, inType);
|
32
|
+
BasicConformerMolecule mol = new BasicConformerMolecule(inType, inType);
|
33
|
+
while (true) {
|
34
|
+
try {
|
35
|
+
Boolean success = loader.read(mol);
|
36
|
+
if (!success) { break; } // last molecule
|
37
|
+
for (int i =0; i < features.length; i++) {
|
38
|
+
String name = "joelib2.feature.types." + features[i];
|
39
|
+
Feature feature = factory.getFeature(name);
|
40
|
+
FeatureResult result = feature.calculate(mol);
|
41
|
+
if (i == 0) { yaml.print("- "); }
|
42
|
+
else { yaml.print(" "); }
|
43
|
+
yaml.print( "Joelib."+features[i]+": " );
|
44
|
+
yaml.println( result.toString() );
|
45
|
+
}
|
46
|
+
|
47
|
+
}
|
48
|
+
catch (Exception e) {
|
49
|
+
System.err.println(e.toString());
|
50
|
+
e.printStackTrace();
|
51
|
+
}
|
52
|
+
}
|
53
|
+
yaml.close();
|
54
|
+
}
|
55
|
+
catch (Exception e) {
|
56
|
+
System.err.println(e.toString());
|
57
|
+
e.printStackTrace();
|
58
|
+
}
|
59
|
+
}
|
60
|
+
}
|
data/java/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# Java class, classpath
|
2
|
+
java_classes = [
|
3
|
+
["CdkDescriptors", "cdk-1.4.19.jar"],
|
4
|
+
["CdkDescriptorInfo", "cdk-1.4.19.jar"],
|
5
|
+
["JoelibDescriptors", "joelib2.jar:."],
|
6
|
+
["JoelibDescriptorInfo", "joelib2.jar:."],
|
7
|
+
]
|
8
|
+
|
9
|
+
task :default => java_classes.collect{|c| "#{c.first}.class"}
|
10
|
+
|
11
|
+
java_classes.each do |c|
|
12
|
+
file "#{c.first}.class" => "#{c.first}.java" do
|
13
|
+
puts `javac -classpath #{c.last} #{c.first}.java`
|
14
|
+
end
|
15
|
+
end
|
data/java/cdk-1.4.19.jar
ADDED
Binary file
|
data/java/joelib2.jar
ADDED
Binary file
|
data/java/log4j.jar
ADDED
Binary file
|
data/lazar.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "lazar"
|
6
|
+
s.version = File.read("./VERSION").strip
|
7
|
+
s.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler, Denis Gebele"]
|
8
|
+
s.email = ["helma@in-silico.ch"]
|
9
|
+
s.homepage = "http://github.com/opentox/lazar"
|
10
|
+
s.summary = %q{Lazar framework}
|
11
|
+
s.description = %q{Libraries for lazy structure-activity relationships and read-across.}
|
12
|
+
s.license = 'GPL-3'
|
13
|
+
|
14
|
+
s.rubyforge_project = "lazar"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.extensions = %w[ext/lazar/extconf.rb]
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
# specify any dependencies here; for example:
|
22
|
+
s.add_runtime_dependency "bundler"
|
23
|
+
s.add_runtime_dependency "rest-client"
|
24
|
+
s.add_runtime_dependency 'nokogiri'
|
25
|
+
s.add_runtime_dependency 'rserve-client'
|
26
|
+
s.add_runtime_dependency "mongoid", '~> 5.0beta'
|
27
|
+
|
28
|
+
end
|