lazar 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.yardopts +4 -0
- data/Gemfile +2 -0
- data/LICENSE +674 -0
- data/README.md +44 -0
- data/Rakefile +1 -0
- data/VERSION +1 -0
- data/ext/lazar/extconf.rb +87 -0
- data/java/CdkDescriptorInfo.class +0 -0
- data/java/CdkDescriptorInfo.java +22 -0
- data/java/CdkDescriptors.class +0 -0
- data/java/CdkDescriptors.java +141 -0
- data/java/Jmol.jar +0 -0
- data/java/JoelibDescriptorInfo.class +0 -0
- data/java/JoelibDescriptorInfo.java +15 -0
- data/java/JoelibDescriptors.class +0 -0
- data/java/JoelibDescriptors.java +60 -0
- data/java/Rakefile +15 -0
- data/java/cdk-1.4.19.jar +0 -0
- data/java/joelib2.jar +0 -0
- data/java/log4j.jar +0 -0
- data/lazar.gemspec +29 -0
- data/lib/SMARTS_InteLigand.txt +983 -0
- data/lib/algorithm.rb +21 -0
- data/lib/bbrc.rb +165 -0
- data/lib/classification.rb +107 -0
- data/lib/compound.rb +254 -0
- data/lib/crossvalidation.rb +187 -0
- data/lib/dataset.rb +334 -0
- data/lib/descriptor.rb +247 -0
- data/lib/error.rb +66 -0
- data/lib/feature.rb +97 -0
- data/lib/lazar-model.rb +170 -0
- data/lib/lazar.rb +69 -0
- data/lib/neighbor.rb +25 -0
- data/lib/opentox.rb +22 -0
- data/lib/overwrite.rb +119 -0
- data/lib/regression.rb +199 -0
- data/lib/rest-client-wrapper.rb +98 -0
- data/lib/similarity.rb +58 -0
- data/lib/unique_descriptors.rb +120 -0
- data/lib/validation.rb +114 -0
- data/mongoid.yml +8 -0
- data/test/all.rb +5 -0
- data/test/compound.rb +100 -0
- data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +13553 -0
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +436 -0
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +568 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +87 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +978 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +1120 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +1113 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +850 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +829 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +1198 -0
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +1505 -0
- data/test/data/EPAFHM.csv +618 -0
- data/test/data/EPAFHM.medi.csv +100 -0
- data/test/data/EPAFHM.mini.csv +22 -0
- data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +581 -0
- data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +1217 -0
- data/test/data/ISSCAN-multi.csv +59 -0
- data/test/data/LOAEL_log_mg_corrected_smiles.csv +568 -0
- data/test/data/LOAEL_log_mmol_corrected_smiles.csv +568 -0
- data/test/data/acetaldehyde.sdf +14 -0
- data/test/data/boiling_points.ext.sdf +11460 -0
- data/test/data/cpdb_100.csv +101 -0
- data/test/data/hamster_carcinogenicity.csv +86 -0
- data/test/data/hamster_carcinogenicity.mini.bool_float.csv +11 -0
- data/test/data/hamster_carcinogenicity.mini.bool_int.csv +11 -0
- data/test/data/hamster_carcinogenicity.mini.bool_string.csv +11 -0
- data/test/data/hamster_carcinogenicity.mini.csv +11 -0
- data/test/data/hamster_carcinogenicity.ntriples +618 -0
- data/test/data/hamster_carcinogenicity.sdf +2805 -0
- data/test/data/hamster_carcinogenicity.xls +0 -0
- data/test/data/hamster_carcinogenicity.yaml +352 -0
- data/test/data/hamster_carcinogenicity_with_errors.csv +88 -0
- data/test/data/kazius.csv +4070 -0
- data/test/data/multi_cell_call.csv +1067 -0
- data/test/data/multi_cell_call_no_dup.csv +1057 -0
- data/test/data/multicolumn.csv +8 -0
- data/test/data/rat_feature_dataset.csv +1179 -0
- data/test/data/wrong_dataset.csv +8 -0
- data/test/dataset-long.rb +117 -0
- data/test/dataset.rb +199 -0
- data/test/descriptor-long.rb +26 -0
- data/test/descriptor.rb +83 -0
- data/test/error.rb +24 -0
- data/test/feature.rb +65 -0
- data/test/fminer-long.rb +38 -0
- data/test/fminer.rb +52 -0
- data/test/lazar-fminer.rb +50 -0
- data/test/lazar-long.rb +72 -0
- data/test/lazar-physchem-short.rb +27 -0
- data/test/setup.rb +6 -0
- data/test/validation.rb +41 -0
- metadata +212 -0
data/README.md
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
lazar
|
|
2
|
+
=====
|
|
3
|
+
|
|
4
|
+
Ruby libraries for the lazar framework
|
|
5
|
+
|
|
6
|
+
Dependencies
|
|
7
|
+
------------
|
|
8
|
+
|
|
9
|
+
lazar depends on a couple of external programs and libraries. On Debian 7 "Wheezy" systems you can install them with
|
|
10
|
+
|
|
11
|
+
`sudo apt-get install build-essential ruby ruby-dev git cmake swig r-base r-base-dev r-cran-rserve openjdk-7-jre libgsl0-dev libxml2-dev zlib1g-dev libcairo2-dev`
|
|
12
|
+
|
|
13
|
+
You will also need at least mongodb version 3.0, but Debian "Wheezy" provides version 2.4. Please follow the instructions at http://docs.mongodb.org/manual/tutorial/install-mongodb-on-debian/:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv 7F0CEB10
|
|
17
|
+
echo "deb http://repo.mongodb.org/apt/debian wheezy/mongodb-org/3.0 main" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list
|
|
18
|
+
sudo apt-get update
|
|
19
|
+
sudo apt-get install -y mongodb-org
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Installation
|
|
23
|
+
------------
|
|
24
|
+
|
|
25
|
+
`gem install lazar`
|
|
26
|
+
|
|
27
|
+
Please be patient, the compilation of OpenBabel and Fminer libraries can be very time consuming. If installation fails you can try to install manually:
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
git clone https://github.com/opentox/lazar.git
|
|
31
|
+
cd lazar
|
|
32
|
+
ruby ext/lazar/extconf.rb
|
|
33
|
+
bundle install
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
The output should give you more verbose information that can help in debugging (e.g. to identify missing libraries).
|
|
37
|
+
|
|
38
|
+
Documentation
|
|
39
|
+
-------------
|
|
40
|
+
* [API documentation](http://rdoc.info/gems/lazar)
|
|
41
|
+
|
|
42
|
+
Copyright
|
|
43
|
+
---------
|
|
44
|
+
Copyright (c) 2009-2015 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
|
data/Rakefile
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require "bundler/gem_tasks"
|
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.0.1
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
require 'fileutils'
|
|
2
|
+
require 'rbconfig'
|
|
3
|
+
|
|
4
|
+
main_dir = File.expand_path(File.join(File.dirname(__FILE__),"..",".."))
|
|
5
|
+
|
|
6
|
+
# install OpenBabel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
openbabel_version = "2.3.2"
|
|
10
|
+
|
|
11
|
+
openbabel_dir = File.join main_dir, "openbabel"
|
|
12
|
+
src_dir = openbabel_dir #File.join openbabel_dir, "openbabel-#{openbabel_version}"
|
|
13
|
+
build_dir = File.join src_dir, "build"
|
|
14
|
+
install_dir = openbabel_dir
|
|
15
|
+
install_lib_dir = File.join install_dir, "lib"
|
|
16
|
+
lib_dir = File.join openbabel_dir, "lib", "openbabel"
|
|
17
|
+
ruby_src_dir = File.join src_dir, "scripts", "ruby"
|
|
18
|
+
|
|
19
|
+
begin
|
|
20
|
+
nr_processors = `grep processor /proc/cpuinfo | wc -l` # speed up compilation, Linux only
|
|
21
|
+
rescue
|
|
22
|
+
nr_processors = 1
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
FileUtils.mkdir_p openbabel_dir
|
|
26
|
+
Dir.chdir main_dir do
|
|
27
|
+
FileUtils.rm_rf src_dir
|
|
28
|
+
puts "Downloading OpenBabel sources"
|
|
29
|
+
system "git clone https://github.com/openbabel/openbabel.git"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
FileUtils.mkdir_p build_dir
|
|
33
|
+
FileUtils.mkdir_p install_dir
|
|
34
|
+
Dir.chdir build_dir do
|
|
35
|
+
puts "Configuring OpenBabel"
|
|
36
|
+
cmake = "cmake #{src_dir} -DCMAKE_INSTALL_PREFIX=#{install_dir} -DBUILD_GUI=OFF -DENABLE_TESTS=OFF -DRUN_SWIG=ON -DRUBY_BINDINGS=ON"
|
|
37
|
+
# set rpath for local installations
|
|
38
|
+
# http://www.cmake.org/Wiki/CMake_RPATH_handling
|
|
39
|
+
# http://vtk.1045678.n5.nabble.com/How-to-force-cmake-not-to-remove-install-rpath-td5721193.html
|
|
40
|
+
cmake += " -DCMAKE_INSTALL_RPATH:STRING=\"#{install_lib_dir}\""
|
|
41
|
+
system cmake
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# local installation in gem directory
|
|
45
|
+
Dir.chdir build_dir do
|
|
46
|
+
puts "Compiling OpenBabel sources."
|
|
47
|
+
system "make -j#{nr_processors}"
|
|
48
|
+
system "make install"
|
|
49
|
+
ENV["PKG_CONFIG_PATH"] = File.dirname(File.expand_path(Dir["#{install_dir}/**/openbabel*pc"].first))
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
ob_include= File.expand_path File.join(File.dirname(__FILE__),"../../openbabel/include/openbabel-2.0")
|
|
53
|
+
ob_lib= File.expand_path File.join(File.dirname(__FILE__),"../../openbabel/lib")
|
|
54
|
+
|
|
55
|
+
# compile ruby bindings
|
|
56
|
+
=begin
|
|
57
|
+
puts "Compiling and installing OpenBabel Ruby bindings."
|
|
58
|
+
Dir.chdir ruby_src_dir do
|
|
59
|
+
# fix rpath
|
|
60
|
+
system "sed -i 's|with_ldflags.*$|with_ldflags(\"#\$LDFLAGS -dynamic -Wl,-rpath,#{install_lib_dir}\") do|' #{File.join(ruby_src_dir,'extconf.rb')}"
|
|
61
|
+
system "#{RbConfig.ruby} extconf.rb --with-openbabel-include=#{ob_include} --with-openbabel-lib=#{ob_lib}"
|
|
62
|
+
system "make -j#{nr_processors}"
|
|
63
|
+
end
|
|
64
|
+
=end
|
|
65
|
+
|
|
66
|
+
# install fminer
|
|
67
|
+
fminer_dir = File.join main_dir, "libfminer"
|
|
68
|
+
system "git clone git://github.com/amaunz/fminer2.git #{fminer_dir}"
|
|
69
|
+
|
|
70
|
+
["libbbrc","liblast"].each do |lib|
|
|
71
|
+
FileUtils.cd File.join(fminer_dir,lib)
|
|
72
|
+
system "sed -i 's,^INCLUDE_OB.*,INCLUDE_OB\ =\ #{ob_include},g' Makefile"
|
|
73
|
+
system "sed -i 's,^LDFLAGS_OB.*,LDFLAGS_OB\ =\ #{ob_lib},g' Makefile"
|
|
74
|
+
system "sed -i 's,^INCLUDE_RB.*,INCLUDE_RB\ =\ #{RbConfig::CONFIG['rubyhdrdir']},g' Makefile"
|
|
75
|
+
# TODO fix in fminer Makefile
|
|
76
|
+
system "sed -i 's,-g, -g -I #{RbConfig::CONFIG['rubyhdrdir']} -I #{RbConfig::CONFIG['rubyarchhdrdir']} -I,' Makefile" # fix include path (CH)
|
|
77
|
+
system "sed -i '74s/$(CC)/$(CC) -Wl,-rpath,#{ob_lib.gsub('/','\/')} -L/' Makefile" # fix library path (CH)
|
|
78
|
+
system "make ruby"
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# install last-utils
|
|
82
|
+
FileUtils.cd main_dir
|
|
83
|
+
system "git clone git://github.com/amaunz/last-utils.git"
|
|
84
|
+
FileUtils.cd File.join(main_dir,"last-utils")
|
|
85
|
+
`sed -i '8s/"openbabel", //' lu.rb`
|
|
86
|
+
|
|
87
|
+
# install R packagemain_dir
|
|
Binary file
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import java.util.*;
|
|
2
|
+
import org.openscience.cdk.qsar.descriptors.molecular.*;
|
|
3
|
+
import org.openscience.cdk.qsar.*;
|
|
4
|
+
|
|
5
|
+
class CdkDescriptorInfo {
|
|
6
|
+
public static void main(String[] args) {
|
|
7
|
+
|
|
8
|
+
DescriptorEngine engine = new DescriptorEngine(DescriptorEngine.MOLECULAR);
|
|
9
|
+
|
|
10
|
+
for (Iterator<IDescriptor> it = engine.getDescriptorInstances().iterator(); it.hasNext(); ) {
|
|
11
|
+
IDescriptor descriptor = it.next();
|
|
12
|
+
String cdk_class = descriptor.getClass().toString().replaceAll("class ","");
|
|
13
|
+
System.out.println("- :java_class: \""+cdk_class+"\"");
|
|
14
|
+
String description = engine.getDictionaryDefinition(cdk_class).replaceAll("^\\s+", "" ).replaceAll("\\s+$", "").replaceAll("\\s+", " ");
|
|
15
|
+
System.out.println(" :description: \""+description+"\"");
|
|
16
|
+
System.out.println(" :names:");
|
|
17
|
+
for (String name : descriptor.getDescriptorNames()) {
|
|
18
|
+
System.out.println(" - \""+name+"\"");
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
Binary file
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import java.util.*;
|
|
2
|
+
import java.io.*;
|
|
3
|
+
import org.openscience.cdk.DefaultChemObjectBuilder;
|
|
4
|
+
import org.openscience.cdk.interfaces.IMolecule;
|
|
5
|
+
import org.openscience.cdk.io.iterator.IteratingMDLReader;
|
|
6
|
+
import org.openscience.cdk.qsar.*;
|
|
7
|
+
import org.openscience.cdk.qsar.DescriptorValue;
|
|
8
|
+
import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector;
|
|
9
|
+
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
|
|
10
|
+
import org.openscience.cdk.exception.NoSuchAtomTypeException;
|
|
11
|
+
|
|
12
|
+
class CdkDescriptors {
|
|
13
|
+
public static void main(String[] args) {
|
|
14
|
+
|
|
15
|
+
if (args==null || args.length<2) {
|
|
16
|
+
System.err.println("required params: <sd-file> <descriptor1> <descriptor2(optional)> <descriptor3(optional)> ...");
|
|
17
|
+
System.exit(1);
|
|
18
|
+
}
|
|
19
|
+
if (! new File(args[0]).exists()){
|
|
20
|
+
System.err.println("file not found "+args[0]);
|
|
21
|
+
System.exit(1);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// command line descriptor params can be either "descriptorName" or "descriptorValueName"
|
|
25
|
+
// terminology:
|
|
26
|
+
// A descriptor can calculate serveral values, e.g., ALOGP produces ALOGP.ALogP, ALOGP.ALogp2, ALOGP.AMR
|
|
27
|
+
// "descriptorName" ALOGP
|
|
28
|
+
// "valueName" AMR
|
|
29
|
+
// "descriptorValueName" ALOGP.AMR
|
|
30
|
+
DescriptorEngine engine;
|
|
31
|
+
Set<String> classNames = new LinkedHashSet<String>(); // descriptors to be computed
|
|
32
|
+
Set<String> descriptorNames = new LinkedHashSet<String>(); // all values of this descriptor will be printed
|
|
33
|
+
Set<String> descriptorValueNames = new LinkedHashSet<String>(); // only these values of a descriptor will be printed
|
|
34
|
+
for (int i =1; i < args.length; i++) {
|
|
35
|
+
String descriptorName;
|
|
36
|
+
if (args[i].indexOf(".")!=-1) {
|
|
37
|
+
descriptorValueNames.add(args[i]);
|
|
38
|
+
descriptorName = args[i].substring(0,args[i].indexOf("."));
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
descriptorNames.add(args[i]);
|
|
42
|
+
descriptorName = args[i];
|
|
43
|
+
}
|
|
44
|
+
classNames.add(getDescriptorClassName(descriptorName));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
engine = new DescriptorEngine(new ArrayList<String>(classNames));
|
|
48
|
+
List<IDescriptor> instances = engine.instantiateDescriptors(new ArrayList<String>(classNames));
|
|
49
|
+
List<DescriptorSpecification> specs = engine.initializeSpecifications(instances);
|
|
50
|
+
engine.setDescriptorInstances(instances);
|
|
51
|
+
engine.setDescriptorSpecifications(specs);
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
BufferedReader br = new BufferedReader(new FileReader(args[0]));
|
|
55
|
+
PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"cdk.yaml"));
|
|
56
|
+
// parse 3d sdf from file and calculate descriptors
|
|
57
|
+
IteratingMDLReader reader = new IteratingMDLReader( br, DefaultChemObjectBuilder.getInstance());
|
|
58
|
+
int c = 0;
|
|
59
|
+
while (reader.hasNext()) {
|
|
60
|
+
try {
|
|
61
|
+
System.out.println("computing "+(args.length-1)+" descriptors for compound "+(++c));
|
|
62
|
+
IMolecule molecule = (IMolecule)reader.next();
|
|
63
|
+
molecule = (IMolecule) AtomContainerManipulator.removeHydrogens(molecule);
|
|
64
|
+
try {
|
|
65
|
+
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molecule);
|
|
66
|
+
}
|
|
67
|
+
catch (NoSuchAtomTypeException e) {
|
|
68
|
+
e.printStackTrace();
|
|
69
|
+
}
|
|
70
|
+
CDKHueckelAromaticityDetector.detectAromaticity(molecule);
|
|
71
|
+
|
|
72
|
+
engine.process(molecule);
|
|
73
|
+
Map<Object,Object> properties = molecule.getProperties();
|
|
74
|
+
Boolean first = true;
|
|
75
|
+
for (Map.Entry<Object, Object> entry : properties.entrySet()) {
|
|
76
|
+
try {
|
|
77
|
+
if ((entry.getKey() instanceof DescriptorSpecification) && (entry.getValue() instanceof DescriptorValue)) {
|
|
78
|
+
DescriptorSpecification property = (DescriptorSpecification)entry.getKey();
|
|
79
|
+
DescriptorValue value = (DescriptorValue)entry.getValue();
|
|
80
|
+
String[] values = value.getValue().toString().split(",");
|
|
81
|
+
for (int i = 0; i < values.length; i++) {
|
|
82
|
+
String cdk_class = property.getImplementationTitle();
|
|
83
|
+
String descriptorName = cdk_class.substring(cdk_class.lastIndexOf(".")+1).replace("Descriptor","");
|
|
84
|
+
String descriptorValueName = descriptorName + "." + value.getNames()[i];
|
|
85
|
+
if (descriptorNames.contains(descriptorName) || descriptorValueNames.contains(descriptorValueName)) {
|
|
86
|
+
if (first) { yaml.print("- "); first = false; }
|
|
87
|
+
else { yaml.print(" "); }
|
|
88
|
+
yaml.println("Cdk." + descriptorValueName + ": " + values[i]);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
catch (ClassCastException e) { } // sdf properties are stored as molecules properties (strings), ignore them
|
|
94
|
+
catch (Exception e) { e.printStackTrace(); } // output nothing to yaml
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
catch (Exception e) {
|
|
98
|
+
yaml.println("- {}");
|
|
99
|
+
e.printStackTrace();
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
yaml.close();
|
|
104
|
+
}
|
|
105
|
+
catch (Exception e) { e.printStackTrace(); }
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
/** HACK to find the class for a descriptor
|
|
110
|
+
* problem: Descriptor is not always at the end of the class (APolDescriptor), but may be in the middle (AutocorrelationDescriptorPolarizability)
|
|
111
|
+
* this method makes a class-lookup using trial and error */
|
|
112
|
+
static String getDescriptorClassName(String descriptorName) {
|
|
113
|
+
String split = splitCamelCase(descriptorName)+" "; // space mark possible positions for 'Descriptor'
|
|
114
|
+
for(int i = split.length()-1; i>0; i--) {
|
|
115
|
+
if (split.charAt(i)==' ') { // iterate over all spaces, starting with the trailing one
|
|
116
|
+
String test = split.substring(0,i)+"Descriptor"+split.substring(i+1,split.length()); // replace current space with 'Descriptor' ..
|
|
117
|
+
test = test.replaceAll("\\s",""); // .. and remove other spaces
|
|
118
|
+
String className = "org.openscience.cdk.qsar.descriptors.molecular." + test;
|
|
119
|
+
try {
|
|
120
|
+
Class.forName(className);
|
|
121
|
+
return className;
|
|
122
|
+
} catch (ClassNotFoundException e) {}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
System.err.println("Descriptor not found: "+descriptorName);
|
|
126
|
+
System.exit(1);
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/** inserts space in between camel words */
|
|
131
|
+
static String splitCamelCase(String s) {
|
|
132
|
+
return s.replaceAll(
|
|
133
|
+
String.format("%s|%s|%s",
|
|
134
|
+
"(?<=[A-Z])(?=[A-Z][a-z])",
|
|
135
|
+
"(?<=[^A-Z])(?=[A-Z])",
|
|
136
|
+
"(?<=[A-Za-z])(?=[^A-Za-z])"
|
|
137
|
+
),
|
|
138
|
+
" "
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
}
|
data/java/Jmol.jar
ADDED
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import joelib2.feature.FeatureHelper;
|
|
2
|
+
|
|
3
|
+
class JoelibDescriptorInfo {
|
|
4
|
+
public static void main(String[] args) {
|
|
5
|
+
FeatureHelper helper = FeatureHelper.instance();
|
|
6
|
+
System.out.println("---"); // document separator for Joelib debug messages
|
|
7
|
+
for (Object feature : helper.getNativeFeatures() ) {
|
|
8
|
+
System.out.println("- :java_class: \""+feature.toString()+"\"");
|
|
9
|
+
// methods for accessing feature descriptions e.g. with
|
|
10
|
+
// FeatureFactory.instance().getFeature(feature.toString()).getDescription().getText() or
|
|
11
|
+
// FeatureFactory.instance().getFeature(feature.toString()).getDescription().getHtml()
|
|
12
|
+
// are defunct
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
}
|
|
Binary file
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import java.util.*;
|
|
2
|
+
import java.io.*;
|
|
3
|
+
import joelib2.feature.Feature;
|
|
4
|
+
import joelib2.feature.FeatureHelper;
|
|
5
|
+
import joelib2.feature.FeatureFactory;
|
|
6
|
+
import joelib2.feature.FeatureResult;
|
|
7
|
+
import joelib2.io.BasicIOType;
|
|
8
|
+
import joelib2.io.BasicIOTypeHolder;
|
|
9
|
+
import joelib2.io.BasicReader;
|
|
10
|
+
import joelib2.io.MoleculeFileHelper;
|
|
11
|
+
import joelib2.io.MoleculeFileIO;
|
|
12
|
+
import joelib2.io.MoleculeIOException;
|
|
13
|
+
import joelib2.molecule.BasicConformerMolecule;
|
|
14
|
+
|
|
15
|
+
class JoelibDescriptors {
|
|
16
|
+
public static void main(String[] args) {
|
|
17
|
+
|
|
18
|
+
String[] features = null;
|
|
19
|
+
features = new String[args.length-1];
|
|
20
|
+
System.arraycopy(args,1,features,0,args.length-1);
|
|
21
|
+
|
|
22
|
+
FeatureFactory factory = FeatureFactory.instance();
|
|
23
|
+
MoleculeFileIO loader = null;
|
|
24
|
+
String line = new String();
|
|
25
|
+
String sdf = new String();
|
|
26
|
+
try {
|
|
27
|
+
// parse 3d sdf from file and calculate descriptors
|
|
28
|
+
InputStream is = new FileInputStream(args[0]);
|
|
29
|
+
PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"joelib.yaml"));
|
|
30
|
+
BasicIOType inType = BasicIOTypeHolder.instance().getIOType("SDF");
|
|
31
|
+
loader = MoleculeFileHelper.getMolReader(is, inType);
|
|
32
|
+
BasicConformerMolecule mol = new BasicConformerMolecule(inType, inType);
|
|
33
|
+
while (true) {
|
|
34
|
+
try {
|
|
35
|
+
Boolean success = loader.read(mol);
|
|
36
|
+
if (!success) { break; } // last molecule
|
|
37
|
+
for (int i =0; i < features.length; i++) {
|
|
38
|
+
String name = "joelib2.feature.types." + features[i];
|
|
39
|
+
Feature feature = factory.getFeature(name);
|
|
40
|
+
FeatureResult result = feature.calculate(mol);
|
|
41
|
+
if (i == 0) { yaml.print("- "); }
|
|
42
|
+
else { yaml.print(" "); }
|
|
43
|
+
yaml.print( "Joelib."+features[i]+": " );
|
|
44
|
+
yaml.println( result.toString() );
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
}
|
|
48
|
+
catch (Exception e) {
|
|
49
|
+
System.err.println(e.toString());
|
|
50
|
+
e.printStackTrace();
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
yaml.close();
|
|
54
|
+
}
|
|
55
|
+
catch (Exception e) {
|
|
56
|
+
System.err.println(e.toString());
|
|
57
|
+
e.printStackTrace();
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
data/java/Rakefile
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Java class, classpath
|
|
2
|
+
java_classes = [
|
|
3
|
+
["CdkDescriptors", "cdk-1.4.19.jar"],
|
|
4
|
+
["CdkDescriptorInfo", "cdk-1.4.19.jar"],
|
|
5
|
+
["JoelibDescriptors", "joelib2.jar:."],
|
|
6
|
+
["JoelibDescriptorInfo", "joelib2.jar:."],
|
|
7
|
+
]
|
|
8
|
+
|
|
9
|
+
task :default => java_classes.collect{|c| "#{c.first}.class"}
|
|
10
|
+
|
|
11
|
+
java_classes.each do |c|
|
|
12
|
+
file "#{c.first}.class" => "#{c.first}.java" do
|
|
13
|
+
puts `javac -classpath #{c.last} #{c.first}.java`
|
|
14
|
+
end
|
|
15
|
+
end
|
data/java/cdk-1.4.19.jar
ADDED
|
Binary file
|
data/java/joelib2.jar
ADDED
|
Binary file
|
data/java/log4j.jar
ADDED
|
Binary file
|
data/lazar.gemspec
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
|
3
|
+
|
|
4
|
+
Gem::Specification.new do |s|
|
|
5
|
+
s.name = "lazar"
|
|
6
|
+
s.version = File.read("./VERSION").strip
|
|
7
|
+
s.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler, Denis Gebele"]
|
|
8
|
+
s.email = ["helma@in-silico.ch"]
|
|
9
|
+
s.homepage = "http://github.com/opentox/lazar"
|
|
10
|
+
s.summary = %q{Lazar framework}
|
|
11
|
+
s.description = %q{Libraries for lazy structure-activity relationships and read-across.}
|
|
12
|
+
s.license = 'GPL-3'
|
|
13
|
+
|
|
14
|
+
s.rubyforge_project = "lazar"
|
|
15
|
+
|
|
16
|
+
s.files = `git ls-files`.split("\n")
|
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
|
18
|
+
s.extensions = %w[ext/lazar/extconf.rb]
|
|
19
|
+
s.require_paths = ["lib"]
|
|
20
|
+
|
|
21
|
+
# specify any dependencies here; for example:
|
|
22
|
+
s.add_runtime_dependency "bundler"
|
|
23
|
+
s.add_runtime_dependency "rest-client"
|
|
24
|
+
s.add_runtime_dependency 'nokogiri'
|
|
25
|
+
#s.add_runtime_dependency "openbabel"
|
|
26
|
+
s.add_runtime_dependency 'rserve-client'
|
|
27
|
+
s.add_runtime_dependency "mongoid", '~> 5.0beta'
|
|
28
|
+
|
|
29
|
+
end
|