RubyGems - chemruby - Versions diffs - 0.9.3 → 1.1.9 - Mend

chemruby 0.9.3 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

data/README +2 -2
data/Rakefile +67 -63
data/ext/extconf.rb +2 -0
data/ext/subcomp.c +461 -320
data/ext/utils.c +56 -0
data/ext/utils.h +13 -0
data/lib/chem.rb +34 -8
data/lib/chem/db.rb +8 -0
data/lib/chem/db/cansmi.rb +1 -1
data/lib/chem/db/cdx.rb +1 -1
data/lib/chem/db/cml.rb +52 -0
data/lib/chem/db/gd.rb +64 -0
data/lib/chem/db/gspan.rb +2 -2
data/lib/chem/db/kcf_rpair.rb +34 -0
data/lib/chem/db/kegg.rb +35 -1
data/lib/chem/db/mdl.rb +75 -34
data/lib/chem/db/opsin.rb +24 -0
data/lib/chem/db/pdb.rb +105 -0
data/lib/chem/db/pdf.rb +2 -0
data/lib/chem/db/pubchem.rb +1071 -88
data/lib/chem/db/rmagick.rb +5 -3
data/lib/chem/db/sdf.rb +28 -2
data/lib/chem/db/smiles/smiles.ry +27 -25
data/lib/chem/db/smiles/smiparser.rb +29 -27
data/lib/chem/db/types/type_gd.rb +35 -0
data/lib/chem/db/types/type_gspan.rb +2 -2
data/lib/chem/db/types/type_kcf.rb +19 -0
data/lib/chem/db/types/type_kegg.rb +2 -0
data/lib/chem/db/types/type_mdl.rb +1 -1
data/lib/chem/db/types/type_png.rb +5 -1
data/lib/chem/db/types/type_rdf.rb +22 -0
data/lib/chem/db/types/type_xyz.rb +1 -1
data/lib/chem/db/vector.rb +19 -3
data/lib/chem/model.rb +5 -2
data/lib/chem/utils.rb +17 -1
data/lib/chem/utils/bitdb.rb +49 -0
data/lib/chem/utils/cas.rb +28 -0
data/lib/chem/utils/cdk.rb +403 -0
data/lib/chem/utils/fingerprint.rb +98 -0
data/lib/chem/utils/geometry.rb +8 -0
data/lib/chem/utils/net.rb +303 -0
data/lib/chem/utils/once.rb +28 -0
data/lib/chem/utils/openbabel.rb +204 -0
data/lib/chem/utils/sssr.rb +33 -25
data/lib/chem/utils/sub.rb +6 -0
data/lib/chem/utils/transform.rb +9 -8
data/lib/chem/utils/ullmann.rb +138 -95
data/lib/graph.rb +5 -6
data/lib/graph/utils.rb +8 -0
data/sample/calc_maximum_common_subgraph.rb +27 -0
data/sample/calc_properties.rb +9 -0
data/sample/data/atp.mol +69 -0
data/sample/data/pioglitazone.mol +58 -0
data/sample/data/rosiglitazone.mol +55 -0
data/sample/data/troglitazone.mol +70 -0
data/sample/find_compound_by_keggapi.rb +19 -0
data/sample/generate_inchi.rb +7 -0
data/sample/generate_substructurekey.rb +11 -0
data/sample/images/ex6.rb +17 -0
data/sample/images/ex7.rb +18 -0
data/sample/iupac2mol.rb +8 -0
data/sample/kekule.rb +13 -0
data/sample/logp.rb +4 -0
data/sample/mcs.rb +13 -0
data/sample/mol2pdf.rb +8 -0
data/sample/pubchem_fetch.rb +8 -0
data/sample/pubchem_search.rb +12 -0
data/sample/rosiglitazone.mol +57 -0
data/sample/smarts.rb +10 -0
data/sample/structure_match.rb +8 -0
data/sample/structure_match_color.rb +22 -0
data/sample/thiazolidinedione.mol +19 -0
data/sample/troglitazone.mol +232 -0
data/sample/vicinity.rb +8 -0
data/test/data/CID_704.sdf +236 -0
data/test/data/CID_994.sdf +146 -0
data/test/data/db_EXPT03276.txt +321 -0
data/test/data/pioglitazone.mol +58 -0
data/test/data/rosiglitazone.mol +55 -0
data/test/data/thiazolidinedione.mol +19 -0
data/test/data/troglitazone.mol +70 -0
data/test/{test_adj.rb → tc_adj.rb} +0 -0
data/test/{test_canonical_smiles.rb → tc_canonical_smiles.rb} +0 -0
data/test/tc_casrn.rb +17 -0
data/test/tc_cdk.rb +89 -0
data/test/{test_cdx.rb → tc_cdx.rb} +0 -0
data/test/{test_chem.rb → tc_chem.rb} +0 -0
data/test/{test_cluster.rb → tc_cluster.rb} +0 -0
data/test/{test_db.rb → tc_db.rb} +0 -0
data/test/tc_develop.rb +38 -0
data/test/tc_drugbank.rb +13 -0
data/test/{test_eps.rb → tc_eps.rb} +0 -0
data/test/tc_gd.rb +8 -0
data/test/{test_geometry.rb → tc_geometry.rb} +0 -0
data/test/tc_graph.rb +15 -0
data/test/{test_gspan.rb → tc_gspan.rb} +0 -0
data/test/{test_iupac.rb → tc_iupac.rb} +0 -0
data/test/{test_kcf.rb → tc_kcf.rb} +0 -0
data/test/{test_kcf_glycan.rb → tc_kcf_glycan.rb} +0 -0
data/test/{test_kegg.rb → tc_kegg.rb} +13 -0
data/test/{test_linucs.rb → tc_linucs.rb} +0 -0
data/test/{test_mdl.rb → tc_mdl.rb} +20 -0
data/test/{test_mol2.rb → tc_mol2.rb} +1 -1
data/test/{test_morgan.rb → tc_morgan.rb} +0 -0
data/test/tc_net.rb +5 -0
data/test/tc_once.rb +29 -0
data/test/tc_openbabel.rb +57 -0
data/test/{test_pdf.rb → tc_pdf.rb} +0 -0
data/test/{test_prop.rb → tc_prop.rb} +1 -1
data/test/tc_pubchem.rb +32 -0
data/test/{test_rmagick.rb → tc_rmagick.rb} +0 -0
data/test/{test_sbdb.rb → tc_sbdb.rb} +0 -0
data/test/{test_sdf.rb → tc_sdf.rb} +2 -0
data/test/{test_smiles.rb → tc_smiles.rb} +46 -30
data/test/tc_sssr.rb +1 -0
data/test/{test_sub.rb → tc_sub.rb} +0 -0
data/test/tc_subcomp.rb +59 -0
data/test/{test_traverse.rb → tc_traverse.rb} +0 -0
data/test/{test_writer.rb → tc_writer.rb} +0 -0
data/test/{test_xyz.rb → tc_xyz.rb} +0 -0
data/test/ts_current.rb +11 -0
data/test/ts_image.rb +6 -0
data/test/ts_main.rb +12 -0
metadata +259 -194
data/lib/chem/utils/graph_db.rb +0 -146
data/test/test_sssr.rb +0 -18
data/test/test_subcomp.rb +0 -37

data/lib/chem/utils/sssr.rb CHANGED

@@ -31,31 +31,35 @@ module Chem
       end
     end
+    # Fix me! This is not sufficient
     def canonical_ring ring
-#      ring.sort # Fix me! This is not sufficient
-      ring.sort{|a, b| @atoms.index(a) <=> @atoms.index(b)}
+      ring.sort{|a, b| nodes.index(a) <=> nodes.index(b)}
     end
+    # Returns Smallest Set of Smallest Ring
     def find_sssr
-      return @sssr if @sssr
-      fullSet = []
+      fullSet = nodes.dup
       trimSet = []
       rings = []
-      @mol = {}
-#     mol = {1=>[2, 5], 2=> [1, 3], 3=> [2, 4], 4=>[3, 5], 5=>[4,1]}
-#       @nodes.each do |k, atom|
-# 	atom.set_neighbor
-# 	@mol[atom] = atom.neighbor
-#       end
+      mol = {}
+      nodes.each do |node|
+        mol[node] = []
+        adjacent_to(node).each do |bond, atom|
+          mol[node] << atom
+        end
+      end
       loop do
 	nodesN2 = []
 	smallest_degree = 10
 	smallest = nil
-	@mol.each do |k, a|
+	mol.each do |k, a|
 	  case a.length
 	  when 0
-	    @mol.delete(k)# Is this OK?
+	    mol.delete(k)# Is this OK?
 	    trimSet.push(k)
 	  when 2
 	    nodesN2.push(k)
@@ -65,36 +69,40 @@ module Chem
 	    smallest_degree = a.length
 	  end
 	end
 	case smallest_degree
 	when 1
-	  trim(smallest)
+	  trim(mol, smallest)
 	when 2
 	  nodesN2.each do |k|
 	    ring = find_smallest_ring(k)
-#	    rings.push(canonical_ring(ring)) if !rings.include?(canonical_ring(ring))
-	    rings.push(canonical_ring(ring)) if ring && !rings.include?(canonical_ring(ring))
+            if ring && !rings.include?(canonical_ring(ring))
+              rings.push(canonical_ring(ring))
+            end
 	  end
 	  nodesN2.each do |k|
-	    trim(k)
+	    trim(mol, k)
 	  end
 	when 3
 	  ring = find_smallest_ring(smallest)
-	  trim(smallest)
+	  trim(mol, smallest)
 	end
-	break if @mol.length  == 0
+	break if mol.length  == 0
       end
-      @sssr = rings
+      rings
     end
-    def trim smallest
-      if @mol.length > 0 && @mol.include?(smallest)
-	@mol[smallest].each do |n|
-	  @mol[n] = @mol[n] - [smallest]
-	  @mol.delete(smallest)
-	  @mol.delete(n) if @mol[n].length == 0
+    def trim mol, smallest
+      if mol.length > 0 && mol.include?(smallest)
+	mol[smallest].each do |n|
+	  mol[n] = mol[n] - [smallest]
+	  mol.delete(smallest)
+	  mol.delete(n) if mol[n].length == 0
 	end
       end
     end
+    private :trim
   end
 end

data/lib/chem/utils/sub.rb CHANGED

@@ -1,3 +1,9 @@
+#
+# = chem/utils/sub.rb - Subgraph
+#
+# Author::	Nobuya Tanaka <t@chemruby.org>
+#
+# Copyright::	Copyright (c) 2005, 2006 ChemRuby project
 module Chem
   module Molecule

data/lib/chem/utils/transform.rb CHANGED

@@ -8,10 +8,11 @@ module Chem
     module TwoDimension
-      def initialize
-        super
-        @pos = Vector[0.0, 0.0]
-      end
+#      attr_reader :pos
+#       def initialize
+#         super
+#         @pos = Vector[0.0, 0.0]
+#       end
       def pos ; @pos ||= Vector[@x, @y, @z] ; end
       def x ; pos[0] ; end
@@ -23,10 +24,10 @@ module Chem
     module ThreeDimension
       include TwoDimension
-      def initialize
-        super
-        @pos = Vector[0.0, 0.0]
-      end
+#       def initialize
+#         super
+#         @pos = Vector[0.0, 0.0]
+#       end
       def z ; pos[2] ; end
       def z=(z_val) ; pos[2] = z_val ; end

data/lib/chem/utils/ullmann.rb CHANGED

@@ -1,134 +1,177 @@
 #
-# = chem/utils/subgraph.rb - Subgraph isomorphism
+# = chem/utils/ullmann.rb - Subgraph isomorphism
 #
-# Author::	Nobuya Tanaka <tanaka@chemruby.org>
+# Author::	Nobuya Tanaka <t@chemruby.org>
 #
-# Copyright::	Copyright (c) 2001, 2005 ChemRuby project
+# Copyright::	Copyright (c) 2005, 2006 ChemRuby project
 #
-# $Id: ullmann.rb 139 2006-02-07 07:39:20Z tanaka $
+# $Id: ullmann.rb 180 2006-04-19 08:52:15Z tanaka $
 #
-require 'subcomp'
 $ARC = 4 # for 32-bit computer
-module Graph
+ARCH = 32
-  def adj_matrix
-    n_long = (nodes.length - 1) / 32 + 1
-    mat = Array.new(n_long * @nodes.length, 0)
-    nodes.each_with_index do |node, idx|
-      adjacent_to(node).each do |bond, node|
-        keta = nodes.index(node) / 32
-        mat[idx * n_long + keta] += 1 << (nodes.index(node) - keta * 32)
-      end
-    end
-    mat.pack("L*")
-  end
+module Chem
-  def match_by_adj_mat mat, len
-    m = Array.new("0xff", 100).pack("c*")
-    subcomp_match_by_ullmann(mat, len, self.adjacency_list, self.nodes.length, m)
-  end
+  module Molecule
-  def match_by_ullmann other, &block
-    if other.nodes.length == 1
-      self.nodes.find{|node| node.element == other.nodes[0].element}
+    def match_by_ullmann(target, &block)
+      require 'subcomp'
+      Chem.match_by_ullmann(self, target, &block)
     end
-    subcomp_match_by_ullmann(adj_matrix, nodes.length, other.adjacency_list, other.nodes.length, other.matchable(self, &block))
-  end
-  alias match match_by_ullmann
-  # returns match correspondences without duplicate
-  def match_exhaustively other
-    correspond = {}
-    result = []
-    while true
-      match = self.match_by_ullmann(other) do |a, b|
-        a.element == b.element and not (correspond[a] and correspond[a].include? b)
+    def match(target, &block)
+      ary = nil
+      if block_given?
+        ary = match_by_ullmann(target){ |i, j|
+          yield(self.nodes[i], target.nodes[j])
+        }
+      else
+        ary = match_by_ullmann(target)
       end
-      break if not match
-      result.push(match)
-      match.each_with_index do |n, m|
-        (correspond[other.nodes[n]] ||=[]).push @nodes[m]
+      ret = []
+      ary.each do |a|
+        hash = {}
+        a.each_with_index do |i, j|
+          hash[nodes[j]] = target.nodes[i]
+        end
+        hash
+        ret << hash
       end
+      ret
+    end
+    def typ_str
+      nodes.collect{|atom| atom.atomic_number}.pack("l*")
     end
-    result
-  end
-  def matchable other, exlucde = {}
-    n_long = (other.nodes.length - 1) / 32 + 1
-    mat = Array.new(n_long * @nodes.length, 0)
-    @nodes.each_with_index do |node, index|
-      other.nodes.each_with_index do |n, idx|
-        if node.element == n.element
-          keta = idx / 32
-          mat[index * n_long + keta] += 1 << (idx - keta * 32)
+    def adjacent_index
+      nodes.inject([]) do |ret, node|
+        ary = ret[nodes.index(node)] = []
+        adjacent_to(node).each do |bond, ad_node|
+          ary << nodes.index(ad_node)
         end
+        ret
       end
     end
-    mat.pack("L*")
-  end
-  #obsolete
-  def matchable_old other, exlucde = {}
-    n_long = (other.nodes.length - 1) / 32 + 1
-    row_unit = n_long * ( 32 / 8)
-    r = "\0" * 10000
-    if block_given?
-      @nodes.each_with_index do |node, index|
-        other.nodes.each_with_index do |o_node, idx|
-          if yield(node, o_node)
-            col_byte = idx / 8
-            col_bit  = idx - col_byte * 8
-            r[index * row_unit + col_byte] += (1 << col_bit)
+    def bit_mat
+      bm = BitMatrix.new(nodes.length, nodes.length)
+      if edges.length == 0
+        bm.has_matrix = false
+      else
+        adj = {}
+        nodes.each do |node|
+          adj[node] = []
+          adjacent_to(node).each do |bond, to|
+            adj[node] << to
           end
         end
-      end
-    else
-      @nodes.each_with_index do |node, index|
-        other.nodes.each_with_index do |o_node, idx|
-          if node.element == o_node.element or node.element == :R or o_node.element == :R
-            col_byte = idx / 8
-            col_bit  = idx - col_byte * 8
-            r[index * row_unit + col_byte] += (1 << col_bit)
+        nodes.each_with_index do |atom1, idx1|
+          ary = []
+          nodes.each_with_index do |atom2, idx2|
+            if adj[atom1].include?(atom2)
+              bm.set(idx1, idx2)
+            end
           end
         end
       end
+      bm
     end
-    r
   end
-  def adjacency_list
-    ret = []
-    @nodes.each do |node|
-      r = []
-      self.adjacent_to(node).each do |bond, to|
-        r << @nodes.index(to)
+  class BitMatrix
+    attr_reader :height, :widht, :n_bytes
+    attr_accessor :has_matrix
+    def initialize(height, width)
+      @height = height
+      @width  = width
+      @n_bytes = (width - 1) / ARCH + 1
+      @bits = []
+      height.times do |n|
+        @bits[n] = []
+        @n_bytes.times do |m|
+          @bits[n][m] = 0
+        end
       end
-      ret << r
+      @has_matrix = true
     end
-    ret
-  end
-  # Obsolete!?
-  def connection
-    self_adj = []
-    @nodes.each do |node|
-      i = 0
-      self.adjacent_to(node).each do |bond, to|
-        i += 1<< @nodes.index(to)
+    def set(row, col)
+      @bits[row][col / ARCH] += (1 << (col % ARCH))
+    end
+    def to_s
+      s = "     "
+      @width.times{|n| s << "%d" % (n % 10)}
+      s << "\n"
+      @bits.each_with_index do |ary, idx|
+        s << "%3d  " % idx
+        ary.each_with_index do |a, idx2|
+          s << bit_to_str(a, (idx2 == @n_bytes - 1) ? (@width % ARCH) : ARCH)
+        end
+        s << "\n"
       end
-      self_adj << i
+      s
     end
-    self_adj
+    def bit_str
+      @bits.flatten.pack("L*")
+    end
+    def bit_to_str bits, num
+      s = ""
+      num.times do |n|
+        s << (((1 << n) & bits != 0) ? "*" : ".")
+      end
+      s
+    end
+    private :bit_to_str
   end
-end
+  # Database Specification
+  # * idx file
+  # 32 bit : n_bytes
+  class CompoundDB
+    def initialize(name)
+      @current_id = 0
+      @mat = File.open(name + ".mat", "w")
+      @idx = File.open(name + ".idx", "w")
+      @typ = File.open(name + ".typ", "w")
+    end
+    def store(mol)
+      bm = mol.bit_mat
+      @current_id += 1
+      if bm.has_matrix
+        @idx.print [bm.height, bm.n_bytes, @mat.tell, 0].pack("l*")
+        @mat.print bm.bit_str
+      else
+        @idx.print [bm.height, bm.n_bytes, @mat.tell, -1].pack("l*")
+      end
+      @typ.print mol.typ_str
+      @current_id
+    end
+    def close
+      @idx.print [-1, -1, -1].pack("l*")
+      @mat.close
+      @idx.close
+      @typ.close
+    end
-module Chem
-  module Molecule
-    include Graph
   end
 end

data/lib/graph.rb CHANGED

@@ -1,7 +1,7 @@
 #
 # graph.rb - Graph
 #
-#   Copyright (C) 2003-2005 TANAKA Nobuya <tanaka@chemruby.net>
+#   Copyright (C) 2005, 2006 TANAKA Nobuya <t@chemruby.net>
 #
 # $Id: graph.rb 61 2005-10-12 09:17:39Z tanaka $
 #
@@ -9,23 +9,22 @@
 require 'graph/morgan'
 require 'graph/cluster'
+require 'graph/utils'
 module Graph
   attr_accessor :nodes, :edges, :adjacencies
   def each
-    @nodes.each do |atom|
+    nodes.each do |atom|
       yield atom
     end
   end
   def adjacent_to(atom)
-    #       instance_eval "alias :tmp_adjacent_to :adjacent_to"
-    #       instance_eval "alias :adjacent_to :adjacencies"
     if @adjacencies == nil
-      @adjacencies = {}
-      @edges.each do |bond, atom_a, atom_b|
+      @adjacencies = Hash.new
+      edges.each do |bond, atom_a, atom_b|
         (@adjacencies[atom_a] ||= []).push([bond, atom_b])
         (@adjacencies[atom_b] ||= []).push([bond, atom_a])
       end