RubyGems - chemruby - Versions diffs - 0.9.3 - Mend

chemruby 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (241) hide show

data/README +120 -0
data/Rakefile +195 -0
data/ext/extconf.rb +4 -0
data/ext/subcomp.c +416 -0
data/lib/chem.rb +130 -0
data/lib/chem/appl.rb +1 -0
data/lib/chem/appl/chem3dole.rb +36 -0
data/lib/chem/appl/tinker/nucleic.rb +40 -0
data/lib/chem/appl/tinker/tinker_reader.rb +43 -0
data/lib/chem/data.rb +4 -0
data/lib/chem/data/atomic_weight.rb +124 -0
data/lib/chem/data/character.rb +2 -0
data/lib/chem/data/electronegativity.rb +14 -0
data/lib/chem/data/periodic_table.rb +6 -0
data/lib/chem/data/prime_numbers.rb +1 -0
data/lib/chem/data/vdw_radii.rb +1 -0
data/lib/chem/db.rb +64 -0
data/lib/chem/db/cansmi.rb +234 -0
data/lib/chem/db/cdx.rb +1525 -0
data/lib/chem/db/eps.rb +164 -0
data/lib/chem/db/g98.rb +909 -0
data/lib/chem/db/gspan.rb +130 -0
data/lib/chem/db/iupac.rb +5 -0
data/lib/chem/db/iupac/a_1.rb +46 -0
data/lib/chem/db/iupac/iuparser.rb +226 -0
data/lib/chem/db/iupac/iuparser.ry +97 -0
data/lib/chem/db/iupac/postfix.rb +2 -0
data/lib/chem/db/kcf.rb +390 -0
data/lib/chem/db/kcf_glycan.rb +19 -0
data/lib/chem/db/kegg.rb +516 -0
data/lib/chem/db/linucs/linparser.rb +144 -0
data/lib/chem/db/linucs/linucs.ry +53 -0
data/lib/chem/db/mdl.rb +379 -0
data/lib/chem/db/molconnz.rb +12 -0
data/lib/chem/db/mopac.rb +88 -0
data/lib/chem/db/msi.rb +107 -0
data/lib/chem/db/pdb_dic.rb +115 -0
data/lib/chem/db/pdf.rb +131 -0
data/lib/chem/db/pubchem.rb +113 -0
data/lib/chem/db/rmagick.rb +70 -0
data/lib/chem/db/sdf.rb +37 -0
data/lib/chem/db/smbl.rb +88 -0
data/lib/chem/db/smiles.rb +2 -0
data/lib/chem/db/smiles/smiles.ry +203 -0
data/lib/chem/db/smiles/smiparser.rb +375 -0
data/lib/chem/db/swf.rb +74 -0
data/lib/chem/db/sybyl.rb +150 -0
data/lib/chem/db/tinker.rb +77 -0
data/lib/chem/db/types/type_cansmi.rb +9 -0
data/lib/chem/db/types/type_cdx.rb +24 -0
data/lib/chem/db/types/type_gspan.rb +31 -0
data/lib/chem/db/types/type_kcf.rb +28 -0
data/lib/chem/db/types/type_kcf_glycan.rb +26 -0
data/lib/chem/db/types/type_kegg.rb +92 -0
data/lib/chem/db/types/type_mdl.rb +31 -0
data/lib/chem/db/types/type_pdf.rb +33 -0
data/lib/chem/db/types/type_png.rb +31 -0
data/lib/chem/db/types/type_rxn.rb +25 -0
data/lib/chem/db/types/type_sdf.rb +25 -0
data/lib/chem/db/types/type_sybyl.rb +30 -0
data/lib/chem/db/types/type_xyz.rb +26 -0
data/lib/chem/db/vector.rb +128 -0
data/lib/chem/db/xyz.rb +39 -0
data/lib/chem/model.rb +119 -0
data/lib/chem/model/skeleton.rb +37 -0
data/lib/chem/utils.rb +11 -0
data/lib/chem/utils/geometry.rb +27 -0
data/lib/chem/utils/graph_db.rb +146 -0
data/lib/chem/utils/math.rb +17 -0
data/lib/chem/utils/prop.rb +123 -0
data/lib/chem/utils/sssr.rb +101 -0
data/lib/chem/utils/sub.rb +78 -0
data/lib/chem/utils/transform.rb +110 -0
data/lib/chem/utils/traverse.rb +37 -0
data/lib/chem/utils/ullmann.rb +134 -0
data/lib/graph.rb +41 -0
data/lib/graph/cluster.rb +20 -0
data/lib/graph/morgan.rb +38 -0
data/sample/frequent_subgraph.rb +46 -0
data/sample/images/ex1.rb +11 -0
data/sample/images/ex2.rb +4 -0
data/sample/images/ex3.rb +5 -0
data/sample/images/ex4.rb +17 -0
data/sample/images/ex5.rb +10 -0
data/sample/images/mol/adenine.mol +26 -0
data/sample/images/mol/atp.mol +69 -0
data/sample/images/temp/ex5.mol +344 -0
data/sample/kegg_db.rb +116 -0
data/setup.rb +1551 -0
data/test/all.rb +6 -0
data/test/coord_test.rb +17 -0
data/test/ctab_test.rb +31 -0
data/test/data/A_21.tar.gz +0 -0
data/test/data/A_21/aceanthrylene.cdx +0 -0
data/test/data/A_21/aceanthrylene.mol +40 -0
data/test/data/A_21/acenaphthylene.cdx +0 -0
data/test/data/A_21/acenaphthylene.mol +31 -0
data/test/data/A_21/acephenanthrylene.cdx +0 -0
data/test/data/A_21/acephenanthrylene.mol +40 -0
data/test/data/A_21/anthracene.cdx +0 -0
data/test/data/A_21/anthracene.mol +35 -0
data/test/data/A_21/as-indacene.cdx +0 -0
data/test/data/A_21/as-indacene.mol +31 -0
data/test/data/A_21/azulene.cdx +0 -0
data/test/data/A_21/azulene.mol +26 -0
data/test/data/A_21/biphenylene.cdx +0 -0
data/test/data/A_21/biphenylene.mol +31 -0
data/test/data/A_21/chrysene.cdx +0 -0
data/test/data/A_21/chrysene.mol +44 -0
data/test/data/A_21/coronen.cdx +0 -0
data/test/data/A_21/coronen.mol +59 -0
data/test/data/A_21/fluoranthene.cdx +0 -0
data/test/data/A_21/fluoranthene.mol +40 -0
data/test/data/A_21/fluorene.cdx +0 -0
data/test/data/A_21/fluorene.mol +33 -0
data/test/data/A_21/heptacene.cdx +0 -0
data/test/data/A_21/heptacene.mol +71 -0
data/test/data/A_21/heptalene.cdx +0 -0
data/test/data/A_21/heptalene.mol +30 -0
data/test/data/A_21/heptaphene.cdx +0 -0
data/test/data/A_21/heptaphene.mol +71 -0
data/test/data/A_21/hexacene.cdx +0 -0
data/test/data/A_21/hexacene.mol +62 -0
data/test/data/A_21/hexaphene.cdx +0 -0
data/test/data/A_21/hexaphene.mol +62 -0
data/test/data/A_21/indene.cdx +0 -0
data/test/data/A_21/indene.mol +24 -0
data/test/data/A_21/iupac.txt +41 -0
data/test/data/A_21/naphthacene.cdx +0 -0
data/test/data/A_21/naphthacene.mol +44 -0
data/test/data/A_21/naphthalene.cdx +0 -0
data/test/data/A_21/naphthalene.mol +26 -0
data/test/data/A_21/ovalene.cdx +0 -0
data/test/data/A_21/ovalene.mol +78 -0
data/test/data/A_21/pentacene.cdx +0 -0
data/test/data/A_21/pentacene.mol +53 -0
data/test/data/A_21/pentalene.cdx +0 -0
data/test/data/A_21/pentalene.mol +22 -0
data/test/data/A_21/pentaphene.cdx +0 -0
data/test/data/A_21/pentaphene.mol +53 -0
data/test/data/A_21/perylene.cdx +0 -0
data/test/data/A_21/perylene.mol +49 -0
data/test/data/A_21/phenalene.cdx +0 -0
data/test/data/A_21/phenalene.mol +33 -0
data/test/data/A_21/phenanthrene.cdx +0 -0
data/test/data/A_21/phenanthrene.mol +35 -0
data/test/data/A_21/picene.cdx +0 -0
data/test/data/A_21/picene.mol +53 -0
data/test/data/A_21/pleiadene.cdx +0 -0
data/test/data/A_21/pleiadene.mol +44 -0
data/test/data/A_21/pyranthrene.cdx +0 -0
data/test/data/A_21/pyranthrene.mol +72 -0
data/test/data/A_21/pyrene.cdx +0 -0
data/test/data/A_21/pyrene.mol +40 -0
data/test/data/A_21/rubicene.cdx +0 -0
data/test/data/A_21/rubicene.mol +63 -0
data/test/data/A_21/s-indacene.cdx +0 -0
data/test/data/A_21/s-indacene.mol +31 -0
data/test/data/A_21/tetraphenylene.cdx +0 -0
data/test/data/A_21/tetraphenylene.mol +57 -0
data/test/data/A_21/trinaphthylene.cdx +0 -0
data/test/data/A_21/trinaphthylene.mol +71 -0
data/test/data/A_21/triphenylene.cdx +0 -0
data/test/data/A_21/triphenylene.mol +44 -0
data/test/data/C00147.kcf +25 -0
data/test/data/G00147.kcf +13 -0
data/test/data/atp.mol +69 -0
data/test/data/cyclohexane.mol +17 -0
data/test/data/cyclohexane.ps +485 -0
data/test/data/fullerene.mol +155 -0
data/test/data/glycan +33 -0
data/test/data/hypericin.cdx +0 -0
data/test/data/hypericin.cdxml +596 -0
data/test/data/hypericin.chm +0 -0
data/test/data/hypericin.ct +85 -0
data/test/data/hypericin.f1d +0 -0
data/test/data/hypericin.f1q +0 -0
data/test/data/hypericin.gif +0 -0
data/test/data/hypericin.mol +88 -0
data/test/data/hypericin.mol2 +159 -0
data/test/data/hypericin.msm +123 -0
data/test/data/hypericin.pdf +359 -0
data/test/data/hypericin.png +0 -0
data/test/data/hypericin.ps +0 -0
data/test/data/hypericin.skc +0 -0
data/test/data/hypericin2.gif +0 -0
data/test/data/hypericin2.ps +0 -0
data/test/data/kegg/genomes/hsa/hsa_enzyme.list +4 -0
data/test/data/kegg/genomes/hsa/hsa_pfam.list +4 -0
data/test/data/kegg/ligand/mol/C00147.mol +26 -0
data/test/data/kegg/ligand/reaction +14 -0
data/test/data/kegg/ligand/reaction.lst +1 -0
data/test/data/kegg/ligand/reaction_mapformula.lst +3 -0
data/test/data/reaction +14 -0
data/test/data/reaction.lst +1 -0
data/test/data/reaction_mapformula.lst +3 -0
data/test/data/rxn/C00001.mol +6 -0
data/test/data/rxn/C00011.mol +10 -0
data/test/data/rxn/C00014.mol +6 -0
data/test/data/rxn/C01010.mol +18 -0
data/test/data/rxn/sample.rxn +50 -0
data/test/data/rxn/substitution.rxn +45 -0
data/test/data/test.eps +0 -0
data/test/data/test.mol +28 -0
data/test/data/test.sdf +143 -0
data/test/data/test.skc +0 -0
data/test/data/test.xyz +4 -0
data/test/data/test_lf.sdf +143 -0
data/test/heavy_test_pubchem.rb +16 -0
data/test/multiple_test.rb +22 -0
data/test/test_adj.rb +54 -0
data/test/test_canonical_smiles.rb +46 -0
data/test/test_cdx.rb +32 -0
data/test/test_chem.rb +18 -0
data/test/test_cluster.rb +19 -0
data/test/test_db.rb +11 -0
data/test/test_eps.rb +24 -0
data/test/test_geometry.rb +11 -0
data/test/test_gspan.rb +28 -0
data/test/test_iupac.rb +36 -0
data/test/test_kcf.rb +24 -0
data/test/test_kcf_glycan.rb +10 -0
data/test/test_kegg.rb +118 -0
data/test/test_linucs.rb +21 -0
data/test/test_mdl.rb +45 -0
data/test/test_mol2.rb +62 -0
data/test/test_morgan.rb +21 -0
data/test/test_pdf.rb +12 -0
data/test/test_prop.rb +86 -0
data/test/test_rmagick.rb +15 -0
data/test/test_sbdb.rb +23 -0
data/test/test_sdf.rb +30 -0
data/test/test_smiles.rb +84 -0
data/test/test_sssr.rb +18 -0
data/test/test_sub.rb +47 -0
data/test/test_subcomp.rb +37 -0
data/test/test_traverse.rb +29 -0
data/test/test_writer.rb +13 -0
data/test/test_xyz.rb +15 -0
data/test/type_test.rb +25 -0
metadata +290 -0

data/lib/chem/db/iupac/postfix.rb ADDED

	@@ -0,0 +1,2 @@
1	+
2	+ $reg_postfix = /(ane\|anol)/

data/lib/chem/db/kcf.rb ADDED

@@ -0,0 +1,390 @@
+#
+#
+# = chem/db/kcf.rb - KEGG Compound Function parser
+#
+module Chem
+  module KEGG
+    class ANumber
+      def self.open filename
+        @input = File.open(filename)
+        KCFCorrespondence.new(@input)
+      end
+    end
+    class KCFAtom
+      include Atom
+      attr_accessor :kcf_type, :atom_id, :next_atom
+      def initialize line
+        @line = line
+        @next_atom = {}
+      end
+      def x ; @x || @x = @line[22...32].to_f ; end
+      def y ; @y || @y = @line[32...42].to_f ; end
+      def kcf_type ; @kcf_type || @kcf_type = @line[16...19].strip ; end
+      def element ; @element || @element = @line[19...22].strip.intern ; end
+      def atom_id ; @atom_id || @atom_id = @line[0...16].to_i ; end
+    end
+    class KCFBond
+      include Bond
+      attr_accessor :bond_id, :property
+      def initialize line
+        @line = line
+      end
+      def bond_id  ; @bond_id  ||= @line[0...16].to_i  ; end
+      def v        ; @v        ||= @line[23...25].to_i ; end
+      def property ; @property ||= @line[27..-1]       ; end
+    end
+    class KCF
+      include Molecule
+      include Enumerable
+      def initialize input
+        @nodes = []
+        @edges = []
+        hash = {}
+        while ! /\/\/\//.match(line = input.readline)
+          case line[0...12]
+          when 'ENTRY       '
+          when 'ATOM        '
+            line.split[1].to_i.times do |n|
+              atom = KCFAtom.new input.readline
+              hash[atom.atom_id] = atom
+              @nodes.push(atom)
+            end
+          when 'BOND        '
+            line.split[1].to_i.times do |n|
+              bond = KCFBond.new input.readline
+              @edges.push([bond, hash[line[16...19].to_i], hash[line[19...23].to_i]])
+            end
+          end
+        end
+      end
+      def KCF.open filename
+        @input = File.open(filename)
+        KCF.new(@input)
+      end
+    end
+    class KeggReaction
+      class ReactionEntry
+        attr_accessor :entry, :name, :definition, :reactants, :products, :rpair, :ec, :comment, :pathway
+        def initialize
+          @comment = []
+          @name = []
+          @definition = []
+        end
+      end
+      def initialize input
+        @input = input
+      end
+      def KeggReaction.open filename
+        KeggReaction.new(File.open(filename))
+      end
+      def each
+        while ! @input.eof?
+          entry = ReactionEntry.new
+          state = :INITIAL
+          while ! /\/\/\//.match(line = @input.readline)
+            #case line[0...12]
+            type = line[0...12]
+            if 'ENTRY       ' == type
+              entry.entry = line[12...-1]
+            elsif 'NAME        ' == type || state == :NAME
+              state = :NAME
+              entry.name = line[12...-1]
+            elsif 'DEFINITION  '  == type || state == :DEFINITION
+              state = :DEFINITION
+              entry.definition.push(line[12...-1])
+            elsif 'EQUATION    ' == type
+              ary = line[12...-1].split('<=>')
+              entry.reactants = ary[0].split('+').collect{|mol| mol.strip}
+              entry.products = ary[1].split('+').collect{|mol| mol.strip}
+            elsif 'RPAIR       ' == type
+              entry.rpair = line[12...-1]
+            elsif 'ENZYME      ' == type
+              entry.ec = line[12...-1].split('.').collect{|n| n.to_i}
+            elsif 'COMMENT     ' == type || state == :COMMENT
+              state = :COMMENT
+              entry.comment.push(line[12...-1])
+            elsif 'PATHWAY     ' == type || state == :PATHWAY
+              state = :PATHWAY
+            else
+              puts "Error Unknown line : %s" % line
+            end
+          end
+          yield entry
+        end
+      end
+    end
+    class KCFRXN
+      def initialize reactant, product
+        @reactant = reactant
+        @product = product
+        @matched_reactants = []
+        @matched_products = []
+        @nodes = []
+      end
+      def corresponds from, to
+        @matched_reactants.push(@reactant.atoms[from])
+        @matched_products.push(@product.atoms[from])
+        @nodes.push(RXNNode.new(@reactant.atoms[from], @product.atoms[to]))
+      end
+      def setup_bonds
+        @edges = []
+        @reactant.atoms.each do |atom|
+          if atom && ! @matched_reactants.member?(atom)
+            @nodes.push(RXNNode.new(atom, nil))
+          end
+        end
+        @product.atoms.each do |atom|
+          if atom && ! @matched_products.member?(atom)
+            @nodes.push(RXNNode.new(nil, atom))
+          end
+        end
+        @reactant.bonds.each do |bond|
+          bond.e.next_atom[bond.b] = bond
+          bond.b.next_atom[bond.e] = bond
+        end
+        @product.bonds.each do |bond|
+          bond.e.next_atom[bond.b] = bond
+          bond.b.next_atom[bond.e] = bond
+        end
+        @nodes.each_with_index do |node, index|
+          index.upto(@nodes.length - 1) do |n|
+            r_edge = p_edge = nil
+            if @nodes[n].reactant_node && @nodes[n].reactant_node.next_atom.has_key?(node.reactant_node)
+              r_edge = @nodes[n].reactant_node.next_atom[node.reactant_node]
+            end
+            if @nodes[n].product_node && @nodes[n].product_node.next_atom.has_key?(node.product_node)
+              p_edge = @nodes[n].product_node.next_atom[node.product_node]
+            end
+            if r_edge || p_edge
+              edge = RXNEdge.new
+              edge.reactant_edge = r_edge
+              edge.product_edge = p_edge
+              @edges.push(edge)
+            end
+          end
+        end
+        @edges.each do |edge|
+          from = edge.reactant_edge ? edge.reactant_edge.multiplicity : 0
+          to = edge.product_edge ? edge.product_edge.multiplicity : 0
+          puts "%3d %3d" % [from, to]
+        end
+      end
+      class RXNNode
+        attr_reader :reactant_node, :product_node
+        def initialize reactant, product
+          @reactant_node = reactant
+          @product_node = product
+        end
+      end
+      class RXNEdge
+        attr_accessor :product_edge, :reactant_edge
+      end
+    end
+    class KCFCorrespondence
+      attr_reader :compounds, :correspondence
+      def initialize input
+        @name = []
+        @input = input
+        @compounds = []
+        @correspondence = {}
+        parse(input)
+      end
+      def make_rxn dir
+        reactant = KCF.open("#{dir}#{@compounds[0]}.kcf")
+        product = KCF.open("#{dir}#{@compounds[1]}.kcf")
+        rxn = KCFRXN.new(reactant, product)
+        @correspondence.each do |k, corres|
+          rxn.corresponds(corres[0][0], corres[1][0])
+        end
+        rxn.setup_bonds
+      end
+      def parse input
+        while ! /\/\/\//.match(line = input.readline)
+          case line[0...12]
+          when 'ENTRY       '
+            @no = /(\d+)/.match(line)[1].to_i
+          when 'NAME        '
+            @name.push(line[12...-1])
+          when 'COMPOUND    '
+            @compounds.push(line[12...-1])
+          when 'TYPE        '
+            @type = line[12...-1]
+          when 'ALIGN       '
+            @align = line[12...-1].to_i
+            alignment_mode = true
+          else
+            ary = line[12...-1].split
+            @correspondence[ary[0].to_i] = ary[1..2].collect{|e| a = e.split(':'); [a[0].to_i, a[1]]}
+          end
+        end
+      end
+    end
+    module Atom
+      attr_accessor :kcf_type, :kcf_prop
+      # Returns KCF formatted line
+      def kcf_line
+        if @kcf_prop
+          "%14d  %3s%2s %10.4f%10.4f #%s" % [@number, @kcf_type, @element, @x, @y, @kcf_prop]
+        else
+          "%14d  %3s%2s %10.4f%10.4f" % [@number, @kcf_type, @element, @x, @y]
+        end
+      end
+    end
+    module Bond
+      attr_accessor :kcf_prop
+      # Returns KCF formatted line
+      def kcf_line
+        if @kcf_prop
+          "%13d  %4d%4d%2d #%s" % [@number, @b.number, @e.number, @multiplicity, @kcf_prop]
+        else
+          "%13d  %4d%4d%2d" % [@number, @b.number, @e.number, @multiplicity, @kcf_prop]
+        end
+      end
+    end
+    class KCFReader
+      def KCFReader.open(file, &method)
+        input = File.open(file, 'r')
+        KCFReader.new.read(input, &method)
+      end
+      def read input, &method
+        #       0.upto(2) do |m|
+        #         0.upto(9) do |n|
+        #           print n
+        #         end
+        #       end
+        #       puts
+        status = :NEW
+        mol = KCFMolecule.new
+        input.each do |line|
+          case line[0..11]
+          when /ANUMBER/
+            mol.a_no = /A(\d+)/.match(line)[1].to_i
+          when /ENTRY/
+            entry = /C(\d+)/.match(line)[1].to_i
+          when /ATOM/
+            n_atoms = /(\d+)/.match(line)[1].to_i
+            status = :ATOM
+          when /BOND/
+            n_bonds = /(\d+)/.match(line)[1].to_i
+            status = :BOND
+          when /\/\/\//
+            if(method)
+              yield mol
+            end
+            mol = KCFMolecule.new
+            status = :NEW
+          else
+            case status
+            when :ATOM
+              atom = KCFAtom.new
+              atom.number, atom.kcf_type, atom.element, atom.x, atom.y, = line[12..-1].scanf("%d%s%s%f%f%s")
+              mol.atoms[atom.number] = atom
+            when :BOND
+              bond = KCFBond.new
+              no, b, e, bond.multiplicity, prop = line[12..-1].scanf("%d%d%d%d%s")
+              bond.b = mol.atoms[b]
+              bond.e = mol.atoms[e]
+              mol.bonds.push(bond)
+            end
+          end
+        end
+      end
+    end
+    class KCFMolecule
+      include Molecule
+      attr_accessor :a_no
+      def KCFMolecule.write_kcf molecule
+        n_atom = 1
+        molecule.atoms.each do |k, atom|
+          puts atom.kcf
+          n_atom += 1
+        end
+        n_bond = 1
+        molecule.bonds.each do |bond|
+          #            1     2   1 1 #UP
+          kcf.number = 48
+          puts bond.kcf_line
+          n_bond += 1
+        end
+      end
+      def KCFMolecule.open file
+        input = File.open(file, 'r')
+        KCFMolecule.new.read(input)
+      end
+      def read input
+        @entry = input.readline
+        number_of_atom = input.readline.split[1].to_i
+        1.upto(number_of_atom) do |n|
+          atom = KCFAtom.new
+          atom.number, atom.kcf_type, atom.element, atom.x, atom.y, = input.readline.scanf("%d%s%s%f%f%s")
+          @atoms[atom.number] = atom
+        end
+        number_of_bond = input.readline.split[1].to_i
+        1.upto(number_of_bond) do |n|
+          bond = KCFBond.new
+          no, b, e, bond.multiplicity, prop = input.readline.scanf("%d%d%d%d%s")
+          bond.b = @atoms[b]
+          bond.e = @atoms[e]
+          @bonds.push(bond)
+        end
+        self
+      end
+    end
+  end
+end

data/lib/chem/db/kcf_glycan.rb ADDED

@@ -0,0 +1,19 @@
+# = KEGG Compound Function Glycan parser
+# Not implemented
+module Chem
+  module KEGG
+    class KCFGlycan
+      def initialize filename
+        #       filename.each do |line|
+        #         puts line
+        #       end
+      end
+    end
+  end
+end

data/lib/chem/db/kegg.rb ADDED

@@ -0,0 +1,516 @@
+#
+# = chem/db/kegg.rb - KEGG (Kyoto Encylopedia of Genes and Genomes)
+#
+# Author::	Nobuya Tanaka <tanaka@chemruby.org>
+#
+# $Id:$
+#
+require 'chem/db/mdl'
+module Chem
+  module KEGG
+  class KeggDirectory
+    attr_reader :dir
+    def initialize dir
+      @dir = dir
+      @compounds = {}
+      @ligand_dir = File.join(@dir, "ligand")
+      @mol_dir = File.join(@ligand_dir, "mol")
+      @parsed_file = []
+    end
+    def get_organism organism, file
+      File.join(@dir, "genomes", organism, file)
+    end
+    def gene_to_pfam organism
+      filename = File.join(@dir, "genomes", organism, organism + "_pfam.list")
+      return @pfam2gene if @parsed_file.include?(filename)
+      @parsed_file.push filename
+      @gene2pfam ||= {}
+      @pfam2gene ||= {}
+      open(filename).each do |line|
+        gene, pfam = line.split("\t")
+        @gene2pfam[gene] = pfam.chop
+        (@pfam2gene[pfam.chop] ||= []).push(KeggGene.new(gene, organism, self))
+      end
+      @pfam2gene
+    end
+    def get_ec_number gene
+      @gene2enzyme ||= {}
+      @enzyme2gene ||= {}
+      filename = File.join(@dir, "genomes", gene.organism, gene.organism + "_enzyme.list")
+      return @gene2enzyme[gene.gene] if @parsed_file.include?(filename)
+      @parsed_file.push filename
+      open(filename).each do |line|
+        gn, ec = line.chop.split("\t")
+        @gene2enzyme[gn] = ec
+        @enzyme2gene[ec] = gn
+      end
+      @gene2enzyme[gene.gene]
+    end
+    def [](key)
+      case key
+      when /(R\d+)/
+        get_reaction $1
+      when /(C\d+)/
+        get_compound $1
+      when /pf:(.+)/
+        KeggPfam.new($1, self)
+      when /^([^:]{3,4}):(\d+)/
+        # gene
+        raise "Parser for Organism not implemented!"
+      when /^([^:]{3,4})/
+        # organism
+        KeggOrganism.new($1, self)
+      else
+        raise "unknown KEGG key type : #{key}"
+      end
+    end
+    def map_formula
+      @reaction_map_formula = parse_reaction_map_formula unless @reaction_map_formula
+      @reaction_map_formula
+    end
+    def parse_reaction_map_formula
+      rxns = {}
+      parser = Chem.parse_file(File.join(@dir, "ligand", "reaction_mapformula.lst"))
+      parser.each do |rxn|
+        rxns[rxn.entry] = rxn
+      end
+      rxns
+    end
+    # Private methods
+    private
+    class KeggOrganism
+      def initialize organism, kegg
+        @organism = organism
+        @kegg = kegg
+      end
+      def pfam
+        pfam2gene = @kegg.gene_to_pfam(@organism)
+        pfam2gene
+      end
+      def [](key)
+        @kegg
+      end
+    end
+    private
+    class KeggGene
+      attr_reader :organism, :gene
+      def initialize gene, organism, kegg
+        @gene = gene
+        @organism = organism
+        @kegg = kegg
+      end
+      def ec_number
+        @kegg.get_ec_number(self).inspect
+      end
+    end
+    private
+    class KeggPfam
+      def initialize pfam_key, kegg
+        @kegg = kegg
+        @pfam_key = pfam_key
+      end
+      def [](organism)
+        @kegg[organism][@pfam_key]
+      end
+    end
+    private
+    def get_compound name
+      unless @compounds[name]
+        @compounds[name] = Chem.open_mol(File.join(@dir, "ligand", "mol", name) + ".mol")
+      end
+      @compounds[name]
+    end
+    def get_reaction name
+      @reactions ||= parse_reaction
+      @reactions[name]
+    end
+    def parse_reaction
+      rxns = {}
+      parser = Chem.parse_file(File.join(@dir, "ligand", "reaction"))
+      parser.each do |reaction|
+        reaction.kegg = self
+        rxns[reaction.entry] = reaction
+      end
+      rxns
+    end
+  end
+  #obsolete
+    @@kegg_compound_folder = nil
+    def self.kegg_compound_folder= (folder)
+      @@kegg_compound_folder = folder
+    end
+    def self.kegg_compound_folder
+      @@kegg_compound_folder
+    end
+    # Duplication definition!
+    class KEGGReaction
+      include Chem::Reaction
+      attr_accessor :entry, :name, :ecs, :compounds, :direction
+      def initialize
+        @ecs = []
+        @compounds = []
+      end
+      def kegg= kegg
+        @kegg = kegg
+      end
+      def map_formula
+        return nil unless @kegg.map_formula[@entry]
+        @kegg.map_formula[@entry].compounds
+      end
+    end
+    class KeggCompound
+      include Molecule
+      include Enumerable
+      include MDL::MdlMolParser
+      attr_reader :entry
+      def initialize
+        @nodes = []
+        @edges = []
+      end
+      @@entries = {}
+      def entry= entry_no
+        @entry = entry_no
+        if @@entries[entry_no] == nil
+          if Chem::Kegg.kegg_compound_folder == nil
+            raise ArgumentError.new("Chem::Kegg.kegg_compound_folder" +
+                                      " not specified")
+          end
+#           mol = KeggCompound.new
+#           mol.open(Chem::Kegg.kegg_compound_folder + entry_no + ".mol")
+          filename = File.join(Chem::Kegg.kegg_compound_folder, entry_no + ".mol")
+          mol = nil
+          if File.exist?(filename)
+            mol = Chem.open_mol(filename)
+          end
+          @@entries[entry_no] = mol
+        end
+        @fly_weight = @@entries[entry_no]
+        if @fly_weight
+          @nodes = @fly_weight.nodes
+          @edges = @fly_weight.edges
+        end
+      end
+    end
+    class KeggGlycan
+      attr_accessor :entry, :name
+    end
+    class KeggEc
+      attr_accessor :entry, :number
+    end
+    module KeggFormat
+      def compound_folder= (folder)
+        Chem::Kegg.kegg_compound_folder = folder
+      end
+      def each_entry
+        state = nil
+        str = ''
+        @input.each do |line|
+          if line[0..11] == '            '
+            str += line[12..-1]
+          else
+            yield(str, state) if state # Not first state
+            str = line[12..-1]
+            state = line[0..11].strip
+          end
+        end
+      end
+    end
+    class KeggReactionParser
+      include KeggFormat
+      include Enumerable
+      def initialize filename
+        @input = File.open(filename)
+      end
+      def parse_compounds species
+        ary = []
+        species.split(" + ").each do |mol|
+          stoichiometry = 1
+          if m = /(\d+) *[CG]/.match(mol)
+            stoichiometry = m[1].to_i
+          end
+          compound_entry = ""
+          if m = /(C\d+)/.match(mol)
+            compound_entry = m[1]
+          elsif m = /(G\d+)/.match(mol)
+            compound_entry = m[1]
+          end
+          ary.push([compound_entry, stoichiometry])
+        end
+        ary
+      end
+      def each
+        reaction = nil
+        each_entry do |str, state|
+          case state
+          when "ENTRY"
+#          reaction = Reaction.find(:first, :conditions => ["entry = ?", str.split[0]])
+#            if reaction == nil
+            reaction = KEGGReaction.new
+            reaction.entry = str.split[0]
+#          end
+          when "NAME"
+            reaction.name = str
+          when "DEFINITION"
+            #@definition = str
+          when "EQUATION"
+            c = str.split("<=>")
+            reaction.compounds << parse_compounds(c[0])
+            reaction.compounds << parse_compounds(c[1])
+          when "RPAIR"
+            # @rpair = str
+          when "ENZYME"
+            str.split.each do |e|
+              ec = KeggEc.new
+              ec.entry = "EC" + e
+              sp = e.split(".")
+              ec.number = sp.collect{|i| i.to_i}
+              reaction.ecs << ec
+            end
+          when "///"
+            #          reaction.save
+            yield reaction
+          when "PATHWAY"
+          when "COMMENT"
+          when "REFERENCE"
+          else
+            p state
+          end
+        end
+      end
+    end
+    class KeggReactionLstParser
+      include Enumerable
+      include KeggFormat
+      def initialize filename
+        @input = open(filename)
+      end
+      def each
+        @input.each do |line|
+          rxn = KEGGReaction.new
+          r_number, comps = line.split(":")
+          rxn.entry = r_number
+          cc = comps.split(/<=>/)
+          reactant = cc[0].split("+").collect do |c|
+            ary = c.split
+            #compound = KeggCompound.new
+            if ary.length == 1
+              #compound.entry = c.strip
+              [c.strip, 1]
+            else
+              #compound.entry = ary[1].strip
+              [c.strip, ary[0].to_i]
+            end
+          end
+          product = cc[1].split("+").collect do |c|
+            ary = c.split
+            #compound = KeggCompound.new
+            if ary.length == 1
+              #compound.entry = c.strip
+              [c.strip, 1]
+            else
+              #compound.entry = ary[1].strip
+              [c.strip, ary[0].to_i]
+            end
+          end
+          rxn.compounds = [reactant, product]
+          yield rxn
+        end
+      end
+    end
+    # ftp://ftp.genome.ad.jp/pub/kegg/ligand/reaction_mapformula.lst
+    class KeggReactionMapParser
+      include Enumerable
+      include KeggFormat
+      def initialize filename
+        @input = open(filename)
+        @reactions = @input.inject({}) do |ret, line|
+          ary = line.split(":")
+          ret[ary[0]] = ary[1..-1]
+          ret
+        end
+      end
+      def each
+        @reactions.each do |r_number, (map_number, comps)|
+          yield self[r_number]
+        end
+      end
+      def [](r_number)
+        return nil if @reactions[r_number] == nil
+        map_number, comps = @reactions[r_number]
+        rxn = KEGGReaction.new
+        #          r_number, map_number, comps = line.split(":")
+        rxn.entry = r_number
+        cc = comps.split(/(<?=>?)/)
+        case cc[1]
+        when "<="
+          rxn.direction = -1
+        when "<=>"
+          rxn.direction = 0
+        when "=>"
+          rxn.direction = 1
+        end
+        reactant = cc[0].split("+").collect do |c|
+          #compound = KeggCompound.new
+          #compound.entry = c.strip
+          [c.strip, 1]
+        end
+        product = cc[2].split("+").collect do |c|
+          #compound = KeggCompound.new
+          #compound.entry = c.strip
+          [c.strip, 1]
+        end
+        rxn.compounds = [reactant, product]
+        rxn
+      end
+    end
+    # Parses KEGG Glycan format
+    # http://www.genome.jp/ligand/kcam/kcam/kcf.html
+    # Not fully implemented
+    class KeggGlycanParser
+      include Enumerable
+      include KeggFormat
+      def initialize filename
+        @input = open(filename)
+      end
+      def each
+        glycan = nil
+        each_entry do |str, state|
+          case state
+          when "ENTRY"
+            glycan = KeggGlycan.new
+#            glycan = Compound.find(:first, :conditions => ["glycan_entry = ?", str.split[0]])
+            if glycan == nil
+#              glycan = Compound.new
+              glycan.entry = str.split[0]
+            end
+          when "NAME"
+            if glycan.name
+              glycan.name = glycan.name + str.split("\n").join if str
+            else
+              glycan.name = str.split("\n").join if str
+            end
+          when "///"
+            #          glycan.save
+          end
+        end
+      end
+    end
+    def self.parse_compound_file
+      compound = nil
+      parse($home + "compound") do |str, state|
+        case state
+        when "ENTRY"
+          compound = Compound.find(:first, :conditions => ["entry = ?", str.split[0]])
+          if compound == nil
+            compound = Compound.new
+            compound.entry = str.split[0]
+          end
+        when "NAME"
+          compound.name = str.split("\n").join if str
+        when "DBLINKS"
+          str.split("\n").each do |line|
+            if m = /ChEBI: (\d+)/.match(line)
+              compound.chebi = m[1].to_i
+            elsif m = /PubChem: (\d+)/.match(line)
+              compound.pubchem = m[1].to_i
+            end
+          end
+        when "GLYCAN"
+          compound.glycan_entry = str
+        when "///"
+          #compound.save
+        end
+      end
+    end
+    def set_compounds
+      require 'util'
+      Dir.glob($home + "/mol/*.mol").each do |mol|
+        entry = /(.\d+).mol/.match(mol)[1]
+#        comp = KeggCompound.find(:first, :conditions => ["entry = ?", entry])
+        mol = Chem.open_mol(mol)
+        if comp == nil
+          puts mol
+          next
+        end
+        if comp.ctab == nil
+          comp.ctab = Marshal.dump(mol)
+          comp.save
+        end
+        #p comp
+      end
+    end
+  end
+end