sylfy 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/Rakefile +13 -0
- data/lib/sylfy.rb +12 -25
- data/lib/sylfy/add.rb +10 -0
- data/lib/sylfy/add/bio_kegg_kgml.rb +108 -0
- data/lib/sylfy/datamodel.rb +141 -0
- data/lib/sylfy/feature.rb +128 -0
- data/lib/sylfy/mathf.rb +40 -0
- data/lib/sylfy/pattern.rb +14 -0
- data/lib/sylfy/service.rb +34 -0
- data/lib/sylfy/service/biocycrest.rb +41 -0
- data/lib/sylfy/service/cactusncirest.rb +64 -0
- data/lib/sylfy/service/cbioportal.rb +156 -0
- data/lib/sylfy/service/chembl.rb +106 -0
- data/lib/sylfy/service/ebisoap.rb +55 -0
- data/lib/sylfy/service/ensemblrest.rb +64 -0
- data/lib/sylfy/service/ensemblrest/archive.rb +35 -0
- data/lib/sylfy/service/ensemblrest/comparative.rb +146 -0
- data/lib/sylfy/service/ensemblrest/xrefs.rb +73 -0
- data/lib/sylfy/service/hgncrest.rb +52 -0
- data/lib/sylfy/service/keggrest.rb +6 -19
- data/lib/sylfy/service/keggrest/conv.rb +64 -40
- data/lib/sylfy/service/keggrest/find.rb +27 -48
- data/lib/sylfy/service/keggrest/get.rb +82 -0
- data/lib/sylfy/service/keggrest/link.rb +17 -37
- data/lib/sylfy/service/keggrest/list.rb +18 -50
- data/lib/sylfy/service/lipidmaprest.rb +228 -0
- data/lib/sylfy/service/pubchem.rb +71 -0
- data/lib/sylfy/service/pubchemrest.rb +249 -0
- data/lib/sylfy/service/rest.rb +26 -0
- data/lib/sylfy/service/soapwsdl.rb +78 -0
- data/lib/sylfy/service/unichemrest.rb +106 -0
- data/lib/sylfy/utils.rb +18 -0
- data/lib/sylfy/utils/keyhash.rb +1149 -0
- data/lib/sylfy/utils/reactionkey.rb +197 -0
- data/lib/sylfy/version.rb +1 -1
- data/sylfy.gemspec +11 -15
- data/test/test_kegg_rest.rb +58 -0
- data/test/test_reactionkey.rb +37 -0
- metadata +87 -15
- data/lib/sylfy/math.rb +0 -24
- data/lib/sylfy/service/keggrest/restKegg_get.rb +0 -130
@@ -1,7 +1,3 @@
|
|
1
|
-
#
|
2
|
-
# UniSysDB library in Ruby
|
3
|
-
# Copyright (C) 2012
|
4
|
-
#
|
5
1
|
# @author Natapol Pornputtapong <natapol@chalmers.se>
|
6
2
|
#
|
7
3
|
|
@@ -12,47 +8,75 @@ module Sylfy
|
|
12
8
|
module KEGGREST
|
13
9
|
|
14
10
|
module_function
|
15
|
-
|
16
|
-
|
11
|
+
# interface to KEGG conv service
|
12
|
+
#
|
13
|
+
# @param targetdb [Symbol] please consult {http://www.kegg.jp/kegg/rest/keggapi.html}
|
14
|
+
#
|
15
|
+
# @param sourcedb [String] can be database name or specific ID(s) joined with '+'
|
16
|
+
#
|
17
|
+
# @return [Hash] results with sourcedb ID as keys
|
18
|
+
#
|
19
|
+
def conv(targetdb, *sourcedb)
|
17
20
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
help:
|
22
|
-
help: targetdb = <database>
|
23
|
-
help: For gene identifiers:
|
24
|
-
help: <kegg_db> = <org>
|
25
|
-
help: <org> = KEGG organism code or T number
|
26
|
-
help: <outside_db> = ncbi-gi | ncbi-geneid | uniprot
|
27
|
-
help:
|
28
|
-
help: For chemical substance identifiers:
|
29
|
-
help: <kegg_db> = drug | compound | glycan
|
30
|
-
help: <outside_db> = pubchem | chebi
|
31
|
-
help:
|
32
|
-
help: return a Hash
|
33
|
-
)
|
34
|
-
if id == :help
|
35
|
-
puts help
|
36
|
-
return {}
|
37
|
-
else
|
21
|
+
begin
|
22
|
+
doc = URI.parse("#{Sylfy::Service::KEGGREST::BASEURI}/conv/#{targetdb.to_s}/#{sourcedb.join("+")}").read().strip()
|
23
|
+
result = {}
|
38
24
|
|
39
|
-
|
40
|
-
|
41
|
-
result =
|
42
|
-
|
43
|
-
doc.split(/\n/).each do |line|
|
44
|
-
dat = line.chomp.split(/\t/)
|
45
|
-
result[dat[0]] = [] if !result.has_key?(dat[0])
|
46
|
-
result[dat[0]].push(dat[1])
|
47
|
-
end
|
48
|
-
|
49
|
-
return result
|
50
|
-
|
51
|
-
rescue OpenURI::HTTPError
|
52
|
-
raise Sylfy::Service::DataNotFound, "Query not found."
|
25
|
+
doc.split(/\n/).each do |line|
|
26
|
+
dat = line.chomp.split(/\t/)
|
27
|
+
result[dat[0]] = [] if !result.has_key?(dat[0])
|
28
|
+
result[dat[0]].push(dat[1])
|
53
29
|
end
|
54
30
|
|
31
|
+
return result
|
32
|
+
|
33
|
+
rescue OpenURI::HTTPError
|
34
|
+
raise Sylfy::Service::DataNotFound, "Query not found."
|
55
35
|
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
def ncbi2ko(*sourcedb)
|
40
|
+
begin
|
41
|
+
ids = sourcedb.flatten.map { |x| x =~ /^\d+$/ ? "ncbi-geneid:" + x : x}
|
42
|
+
hash1 = conv(:genes, ids)
|
43
|
+
list1 = []
|
44
|
+
hash1.each_value do |v|
|
45
|
+
list1 += v
|
46
|
+
end
|
47
|
+
hash2 = link(:ko, list1)
|
48
|
+
results = {}
|
49
|
+
hash1.each_pair do |k, vs|
|
50
|
+
results[k] ||= []
|
51
|
+
vs.each {|v| results[k] = results[k] | hash2[v] if hash2.has_key?(v)}
|
52
|
+
end
|
53
|
+
|
54
|
+
results.delete_if {|key, value| value.empty? }
|
55
|
+
|
56
|
+
return results
|
57
|
+
rescue Sylfy::Service::DataNotFound
|
58
|
+
return {}
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def ncbi2ko_org(org)
|
63
|
+
|
64
|
+
results = {}
|
65
|
+
|
66
|
+
begin
|
67
|
+
hash1 = conv(org.to_s, 'ncbi-geneid')
|
68
|
+
hash2 = link(:ko, org.to_s)
|
69
|
+
hash1.each_pair do |k, vs|
|
70
|
+
results[k] ||= []
|
71
|
+
vs.each {|v| results[k] = results[k] | hash2[v] if hash2.has_key?(v)}
|
72
|
+
end
|
73
|
+
results.delete_if {|key, value| value.empty? }
|
74
|
+
return results
|
75
|
+
rescue
|
76
|
+
return results
|
77
|
+
end
|
78
|
+
|
79
|
+
|
56
80
|
end
|
57
81
|
|
58
82
|
end
|
@@ -1,7 +1,3 @@
|
|
1
|
-
#
|
2
|
-
# UniSysDB library in Ruby
|
3
|
-
# Copyright (C) 2012
|
4
|
-
#
|
5
1
|
# @author Natapol Pornputtapong <natapol@chalmers.se>
|
6
2
|
#
|
7
3
|
|
@@ -12,28 +8,17 @@ module Sylfy
|
|
12
8
|
module KEGGREST
|
13
9
|
|
14
10
|
module_function
|
15
|
-
#
|
11
|
+
# interface to KEGG find service
|
16
12
|
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
help:
|
27
|
-
help: database = <database>
|
28
|
-
help: <database> = pathway | module | disease | drug | environ | ko | genome |
|
29
|
-
help: <org> | compound | glycan | reaction | rpair | rclass | enzyme |
|
30
|
-
help: genes | ligand
|
31
|
-
help: <org> = KEGG organism code or T number
|
32
|
-
help:
|
33
|
-
help: <option> = formula | exact_mass | mol_weight only for <database> = compound | drug
|
34
|
-
help:
|
35
|
-
help: return a List
|
36
|
-
"
|
13
|
+
# @param database [Symbol] database name can be :pathway, :module, :disease, :drug, :environ, :ko, :genome, org, :compound, :glycan, :reaction, :rpair, :rclass, :enzyme, :genes, :ligand
|
14
|
+
#
|
15
|
+
# @param query [String] Query text keyword can be join with + sign or in double qoutw
|
16
|
+
#
|
17
|
+
# @param option [Symbol] can be :formula, :exact_mass, :mol_weight with database = :compound, :drug
|
18
|
+
#
|
19
|
+
# @return [Hash] results with sourcedb ID as keys
|
20
|
+
#
|
21
|
+
def find(database, query, option=nil)
|
37
22
|
|
38
23
|
result_tag = {
|
39
24
|
:pathway => [:NAME],
|
@@ -52,31 +37,25 @@ help: return a List
|
|
52
37
|
}
|
53
38
|
result_tag.default = [:NAME, :DEFINITION, :ORTHOLOGY]
|
54
39
|
result = {}
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
dat = line.chomp.split(/\t/)
|
69
|
-
result[dat[0]] = {}
|
70
|
-
info = dat[1].split('; ')
|
71
|
-
info.each_index {|ind| result[dat[0]][result_tag[database.to_sym][ind]] = info[ind]}
|
72
|
-
end
|
40
|
+
begin
|
41
|
+
if ['compound', 'drug'].include?(database.to_s) && option != nil &&
|
42
|
+
['formula', 'exact_mass' 'mol_weight'].include?(option.to_s)
|
43
|
+
URI.parse("#{Sylfy::Service::KEGGREST::BASEURI}/find/#{database}/#{URI.escape(query)}/#{option}").read.strip.split(/\n/).each do |line|
|
44
|
+
dat = line.chomp.split(/\t/)
|
45
|
+
result[dat[0]] = { option.to_sym => dat[1]}
|
46
|
+
end
|
47
|
+
else
|
48
|
+
URI.parse("#{Sylfy::Service::KEGGREST::BASEURI}/find/#{database}/#{URI.escape(query)}").read.strip.split(/\n/).each do |line|
|
49
|
+
dat = line.chomp.split(/\t/)
|
50
|
+
result[dat[0]] = {}
|
51
|
+
info = dat[1].split('; ')
|
52
|
+
info.each_index {|ind| result[dat[0]][result_tag[database.to_sym][ind]] = info[ind]}
|
73
53
|
end
|
74
|
-
|
75
|
-
return result
|
76
|
-
rescue OpenURI::HTTPError
|
77
|
-
raise Unisys::ServiceException, "Query not found."
|
78
54
|
end
|
79
|
-
|
55
|
+
|
56
|
+
return result
|
57
|
+
rescue OpenURI::HTTPError
|
58
|
+
raise Unisys::ServiceException, "Query not found."
|
80
59
|
end
|
81
60
|
|
82
61
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# @author Natapol Pornputtapong <natapol@chalmers.se>
|
2
|
+
#
|
3
|
+
|
4
|
+
module Sylfy
|
5
|
+
|
6
|
+
module Service
|
7
|
+
|
8
|
+
module KEGGREST
|
9
|
+
|
10
|
+
module_function
|
11
|
+
# interface to KEGG get service
|
12
|
+
#
|
13
|
+
# @param dbentries [Symbol, String] can be :pathway, :brite, :module, :disease, :drug, :environ, :ko, :genome, :org, :compound, :glycan, :reaction, :rpair, :rclass, :enzyme
|
14
|
+
#
|
15
|
+
# @param option [Symbol] can be aaseq, :ntseq, :mol, :kcf, :image, :kgml
|
16
|
+
#
|
17
|
+
# @return [Array] An array of Bio::KEGG object or string if option = :mol, :kcf, :kgml or Bio::FastaFormat if option = :aaseq, :ntseq
|
18
|
+
#
|
19
|
+
def get(dbentries, option = nil)
|
20
|
+
|
21
|
+
if option != nil && ['aaseq', 'ntseq', 'mol', 'kcf', 'kgml'].include?(option.to_s)
|
22
|
+
text = "#{dbentries}/#{option}"
|
23
|
+
else
|
24
|
+
text = "#{dbentries}"
|
25
|
+
end
|
26
|
+
|
27
|
+
begin
|
28
|
+
doc = URI.parse("#{Sylfy::Service::KEGGREST::BASEURI}/get/#{text}").read
|
29
|
+
|
30
|
+
rescue OpenURI::HTTPError
|
31
|
+
raise Sylfy::Service::DataNotFound, "Query not found."
|
32
|
+
end
|
33
|
+
|
34
|
+
if ['aaseq', 'ntseq'].include?(option.to_s)
|
35
|
+
return Bio::FastaFormat.new(doc)
|
36
|
+
elsif option.to_s == 'mol'
|
37
|
+
return doc
|
38
|
+
elsif option.to_s == 'kcf'
|
39
|
+
return doc
|
40
|
+
elsif option.to_s == 'kgml'
|
41
|
+
return Bio::KEGG::KGML.new(doc.strip)
|
42
|
+
else
|
43
|
+
result = []
|
44
|
+
|
45
|
+
doc.strip.split('///').each do |entry|
|
46
|
+
case entry
|
47
|
+
when /^ENTRY\s+[A-Z0-9]+\s+Glycan/
|
48
|
+
result.push(Bio::KEGG::GLYCAN.new(entry))
|
49
|
+
when /^ENTRY\s+[A-Z0-9]+\s+Compound/
|
50
|
+
result.push(Bio::KEGG::COMPOUND.new(entry))
|
51
|
+
when /^ENTRY\s+[A-Z0-9]+\s+CDS/
|
52
|
+
result.push(Bio::KEGG::GENES.new(entry))
|
53
|
+
when /^ENTRY\s+[A-Z0-9]+\s+Drug/
|
54
|
+
result.push(Bio::KEGG::DRUG.new(entry))
|
55
|
+
when /^ENTRY\s+[A-Z0-9]+\s+Reaction/
|
56
|
+
result.push(Bio::KEGG::REACTION.new(entry))
|
57
|
+
when /^ENTRY\s+[A-Z0-9]+\s+RPair/
|
58
|
+
result.push(entry)
|
59
|
+
when /^ENTRY\s+[A-Z0-9]+\s+Genome/
|
60
|
+
result.push(Bio::KEGG::GENOME.new(entry))
|
61
|
+
when /^ENTRY\s+[A-Z0-9]+\s+Enzyme/
|
62
|
+
result.push(Bio::KEGG::ENZYME.new(entry))
|
63
|
+
when /^ENTRY\s+[A-Z0-9]+\s+Module/
|
64
|
+
result.push(Bio::KEGG::MODULE.new(entry))
|
65
|
+
when /^ENTRY\s+[A-Z0-9]+\s+Pathway/
|
66
|
+
result.push(Bio::KEGG::PATH.new(entry))
|
67
|
+
else
|
68
|
+
result.push(entry)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
return result
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
@@ -1,7 +1,3 @@
|
|
1
|
-
#
|
2
|
-
# UniSysDB library in Ruby
|
3
|
-
# Copyright (C) 2012
|
4
|
-
#
|
5
1
|
# @author Natapol Pornputtapong <natapol@chalmers.se>
|
6
2
|
#
|
7
3
|
|
@@ -12,44 +8,28 @@ module Sylfy
|
|
12
8
|
module KEGGREST
|
13
9
|
|
14
10
|
module_function
|
15
|
-
#
|
11
|
+
# interface to KEGG link service
|
12
|
+
#
|
13
|
+
# @param targetdb [Symbol] can be pathway, :brite, :module, :disease, :drug, :environ, :ko, :genome, org, :compound, :glycan, :reaction, :rpair, :rclass, :enzyme
|
14
|
+
#
|
15
|
+
# @param sourcedb [Symbol] :pathway, :brite, :module, :disease, :drug, :environ, :ko, :genome, org, :compound, :glycan, :reaction, :rpair, :rclass, :enzyme, :genes
|
16
16
|
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
|
20
|
-
def link(id, targetdb = nil)
|
17
|
+
# @return [Hash] results with ID and description
|
18
|
+
#
|
19
|
+
def link(targetdb, *sourcedb)
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
help: targetdb = pathway | brite | module | disease | drug | environ | ko | genome | <org> | compound | glycan | reaction | rpair | rclass | enzyme
|
26
|
-
help:
|
27
|
-
help: id = KEGG database entries involving the following databases: pathway | brite | module | disease | drug | environ | ko | genome |
|
28
|
-
help: <org> | compound | glycan | reaction | rpair | rclass | enzyme
|
29
|
-
help: <org> = KEGG organism code or T number
|
30
|
-
help:
|
31
|
-
help: <option> = aaseq | ntseq | mol | kcf | image
|
32
|
-
"
|
33
|
-
if id == :help
|
34
|
-
puts help
|
35
|
-
return {}
|
36
|
-
else
|
37
|
-
|
38
|
-
begin
|
39
|
-
doc = URI.parse("#{@@baseuri}/link/#{targetdb}/#{id}").read().strip()
|
40
|
-
result = {}
|
41
|
-
|
42
|
-
doc.each do |line|
|
43
|
-
dat = line.chomp.split(/\t/)
|
44
|
-
result[dat[0]] = result.has_key?(dat[0]) ? result[dat[0]].push(dat[1]) : [dat[1]]
|
45
|
-
end
|
21
|
+
begin
|
22
|
+
doc = URI.parse("#{Sylfy::Service::KEGGREST::BASEURI}/link/#{targetdb.to_s}/#{sourcedb.join("+")}").read().strip()
|
23
|
+
result = {}
|
46
24
|
|
47
|
-
|
48
|
-
|
49
|
-
|
25
|
+
doc.split(/\n/).each do |line|
|
26
|
+
dat = line.chomp.split(/\t/)
|
27
|
+
result[dat[0]] = result.has_key?(dat[0]) ? result[dat[0]].push(dat[1]) : [dat[1]]
|
50
28
|
end
|
51
29
|
|
52
|
-
|
30
|
+
return result
|
31
|
+
rescue OpenURI::HTTPError
|
32
|
+
raise Sylfy::Service::DataNotFound, "Query not found."
|
53
33
|
end
|
54
34
|
|
55
35
|
end
|
@@ -1,7 +1,3 @@
|
|
1
|
-
#
|
2
|
-
# UniSysDB library in Ruby
|
3
|
-
# Copyright (C) 2012
|
4
|
-
#
|
5
1
|
# @author Natapol Pornputtapong <natapol@chalmers.se>
|
6
2
|
#
|
7
3
|
|
@@ -12,55 +8,27 @@ module Sylfy
|
|
12
8
|
module KEGGREST
|
13
9
|
|
14
10
|
module_function
|
15
|
-
|
16
|
-
|
11
|
+
# interface to KEGG list service
|
12
|
+
#
|
13
|
+
# @param database [Symbol] :pathway, :module, :disease, :drug, :environ, :ko, :genome, org, :compound, :glycan, :reaction, :rpair, :rclass, :enzyme, :genes, :ligand
|
14
|
+
#
|
15
|
+
# @param org [Symbol] organism code can be :hsa, :eco, etc {http://www.kegg.jp/kegg/rest/keggapi.html}
|
16
|
+
#
|
17
|
+
# @return [Hash] results with ID and description
|
18
|
+
#
|
19
|
+
def list(database, org = nil)
|
17
20
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
help: <database> = pathway | brite | module | disease | drug | environ | ko | genome |
|
25
|
-
help: <org> | compound | glycan | reaction | rpair | rclass | enzyme | organism
|
26
|
-
help:
|
27
|
-
help: <org> = KEGG organism code or T number
|
28
|
-
help:
|
29
|
-
help:
|
30
|
-
help: type#2
|
31
|
-
help:
|
32
|
-
help: param = \"<database>/<org>\"
|
33
|
-
help: <database> = pathway | module
|
34
|
-
help: <org> = KEGG organism code
|
35
|
-
help:
|
36
|
-
help: type#3
|
37
|
-
help:
|
38
|
-
help: param = \"<dbentries>\"
|
39
|
-
help: <dbentries> = KEGG database entries involving the following databases: pathway | brite | module | disease | drug | environ | ko | genome |
|
40
|
-
help: <org> | compound | glycan | reaction | rpair | rclass | enzyme | organism
|
41
|
-
help:
|
42
|
-
help: return a List
|
43
|
-
)
|
44
|
-
|
45
|
-
if param == :help
|
46
|
-
puts help
|
47
|
-
return {}
|
48
|
-
else
|
49
|
-
|
50
|
-
begin
|
51
|
-
doc = URI.parse("#{@@baseuri}/list/#{param.to_s}").read().strip()
|
52
|
-
result = {}
|
53
|
-
|
54
|
-
doc.each do |line|
|
55
|
-
dat = line.chomp.split(/\t/, 2)
|
56
|
-
result[dat[0]] = dat[1]
|
57
|
-
end
|
58
|
-
|
59
|
-
return result
|
60
|
-
rescue OpenURI::HTTPError
|
61
|
-
raise Sylfy::Service::DataNotFound, "Data not found."
|
21
|
+
begin
|
22
|
+
doc = URI.parse("#{Sylfy::Service::KEGGREST::BASEURI}/list/#{database.to_s}#{org ? "/#{org}" : ""}").read.strip
|
23
|
+
result = {}
|
24
|
+
doc.split(/\n/).each do |line|
|
25
|
+
dat = line.chomp.split(/\t/, 2)
|
26
|
+
result[dat[0]] = dat[1]
|
62
27
|
end
|
63
28
|
|
29
|
+
return result
|
30
|
+
rescue OpenURI::HTTPError
|
31
|
+
raise Sylfy::Service::DataNotFound, "Data not found."
|
64
32
|
end
|
65
33
|
|
66
34
|
end
|
@@ -0,0 +1,228 @@
|
|
1
|
+
#
|
2
|
+
# UniSysDB library in Ruby
|
3
|
+
# Copyright (C) 2012
|
4
|
+
#
|
5
|
+
# @author Natapol Pornputtapong <natapol@chalmers.se>
|
6
|
+
#
|
7
|
+
require 'open-uri'
|
8
|
+
require 'net/http'
|
9
|
+
|
10
|
+
module Sylfy
|
11
|
+
|
12
|
+
module Service
|
13
|
+
|
14
|
+
|
15
|
+
module LipidMap
|
16
|
+
module REST
|
17
|
+
@@baseuri = "http://www.lipidmaps.org/data"
|
18
|
+
|
19
|
+
module_function
|
20
|
+
# Method for setting inchi data member
|
21
|
+
#
|
22
|
+
# == Parameters:
|
23
|
+
# id::
|
24
|
+
# LipidMap id to search
|
25
|
+
# outputtype::
|
26
|
+
# TSV, CSV, SDF or MDLMOL [defualt: TSV]
|
27
|
+
|
28
|
+
def lmsdRecord(id, outputtype = "TSV")
|
29
|
+
|
30
|
+
uri = "#{@@baseuri}/LMSDRecord.php?LMID=#{id}"
|
31
|
+
uri += "&Mode=File"
|
32
|
+
uri += "&OutputType=#{["TSV", "CSV", "SDF"].include?(outputtype) ? outputtype : 'TSV'}"
|
33
|
+
uri += "&OutputDelimiter=Comma&OutputQuote=Yes&OutputColumnHeader=Yes"
|
34
|
+
|
35
|
+
begin
|
36
|
+
doc = URI.parse(uri).read().strip()
|
37
|
+
case outputtype
|
38
|
+
when 'TSV', nil
|
39
|
+
return CSV.parse(doc, :headers => true, :col_sep => "\t")
|
40
|
+
when 'CSV'
|
41
|
+
return CSV.parse(doc, :headers => true)
|
42
|
+
else
|
43
|
+
return doc
|
44
|
+
end
|
45
|
+
rescue OpenURI::HTTPError
|
46
|
+
return nil
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
# Method for setting inchi data member
|
52
|
+
#
|
53
|
+
# == Parameters:
|
54
|
+
# str::
|
55
|
+
# search string
|
56
|
+
# mode::
|
57
|
+
# Search mode ProcessStrSearch, ProcessTextSearch or ProcessTextOntologySearch
|
58
|
+
# outputtype::
|
59
|
+
# TSV, CSV, SDF or MDLMOL [defualt: TSV]
|
60
|
+
|
61
|
+
def lmsdSearch(str, mode, outputtype = "TSV")
|
62
|
+
|
63
|
+
uri = "#{@@baseuri}/structure/LMSDSearch.php?#{str}&OutputMode=File&Mode=#{mode}"
|
64
|
+
uri += "&OutputType=#{["TSV", "CSV", "SDF"].include?(outputtype) ? outputtype : 'TSV'}"
|
65
|
+
uri += "&OutputDelimiter=Comma&OutputQuote=Yes&OutputColumnHeader=Yes"
|
66
|
+
|
67
|
+
begin
|
68
|
+
doc = URI.parse(uri).read().strip()
|
69
|
+
case outputtype
|
70
|
+
when 'TSV', nil
|
71
|
+
return CSV.parse(doc, :headers => true, :col_sep => "\t")
|
72
|
+
when 'CSV'
|
73
|
+
return CSV.parse(doc, :headers => true)
|
74
|
+
else
|
75
|
+
return doc
|
76
|
+
end
|
77
|
+
rescue OpenURI::HTTPError
|
78
|
+
return nil
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
private :lmsdSearch
|
84
|
+
|
85
|
+
# Method for setting inchi data member
|
86
|
+
#
|
87
|
+
# == Parameters:
|
88
|
+
# string::
|
89
|
+
# search string
|
90
|
+
# field::
|
91
|
+
# field type: LMID, Name, Formula, SMILESString [defualt: LMID]
|
92
|
+
# exactMatch::
|
93
|
+
# Boolean [defualt: yes]
|
94
|
+
# outputtype::
|
95
|
+
# TSV, CSV, SDF or MDLMOL [defualt: TSV]
|
96
|
+
|
97
|
+
def lmsdStrSearch(string, field = 'LMID', exactMatch = true, outputtype = "TSV")
|
98
|
+
|
99
|
+
searchType = exactMatch ? 'ExactMatch' : 'SubStructure'
|
100
|
+
|
101
|
+
if ['LMID', 'Name', 'Formula', 'SMILESString'].include?(field)
|
102
|
+
return lmsdSearch("#{field}=#{URI.encode(string)}&SearchType=#{searchType}", "ProcessStrSearch", outputtype)
|
103
|
+
else
|
104
|
+
raise Sylfy::Service::ParameterError, "#{field} field does not in service."
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Method for setting inchi data member
|
109
|
+
#
|
110
|
+
# == Parameters:
|
111
|
+
# hash::
|
112
|
+
# searh hash
|
113
|
+
# {
|
114
|
+
# :LMID => LM_ID,
|
115
|
+
# :Name => part of any common name,
|
116
|
+
# :Formula => formula,
|
117
|
+
# :ExactMass => positive number,
|
118
|
+
# :ExactMassOffSet => positive number,
|
119
|
+
# :CoreClass => integer [1 to 8],
|
120
|
+
# 1=Fatty Acyls [FA]
|
121
|
+
# 2=Glycerolipids [GL]
|
122
|
+
# 3=Glycerophospholipids [GP]
|
123
|
+
# 4=Sphingolipids [SP]
|
124
|
+
# 5=Sterol Lipids [ST]
|
125
|
+
# 6=Prenol Lipids [PR]
|
126
|
+
# 7=Saccharolipids [SL]
|
127
|
+
# 8=Polyketides [PK]
|
128
|
+
# :MainClass => integer,
|
129
|
+
# e.g. FA10=101
|
130
|
+
# FA11=111
|
131
|
+
# ST05=505
|
132
|
+
# :SubClass => integer
|
133
|
+
# e.g. ST0402=50402
|
134
|
+
# }
|
135
|
+
#
|
136
|
+
# outputtype::
|
137
|
+
# TSV, CSV, SDF or MDLMOL [defualt: TSV]
|
138
|
+
|
139
|
+
def lmsdTxtSearch(hash, outputtype = "TSV")
|
140
|
+
|
141
|
+
string = ''
|
142
|
+
|
143
|
+
hash.each_pair do |k, v|
|
144
|
+
string += '&' if string != ''
|
145
|
+
string += "#{k.to_s}=#{v}"
|
146
|
+
end
|
147
|
+
|
148
|
+
return lmsdSearch(string, "ProcessTextSearch", outputtype)
|
149
|
+
end
|
150
|
+
|
151
|
+
# Method for setting inchi data member
|
152
|
+
#
|
153
|
+
# == Parameters:
|
154
|
+
# onto::
|
155
|
+
# string = "<ontoStr>[,<ontoStr>,<ontoStr>,<ontoStr>]"
|
156
|
+
# <ontoStr> = <ParamName> <ParamModifier>
|
157
|
+
# <ParamName> = carbons, doublebonds, triplebonds, rings, OH, NH2, OOH, ketones, epoxides, COOH, methyls, SH, Br, Cl, F, Methylenes, CHO, OMe, OAc, COOMe, Ester, Ether
|
158
|
+
# <ParamModifier> = eq, ge, le
|
159
|
+
# <ParamValue> = integer
|
160
|
+
# example: "carbons eq 20, triplebonds ge 2" or "carbons eq 10"
|
161
|
+
# option::
|
162
|
+
# {
|
163
|
+
# :OutputType => TSV, CSV, SDF [defualt: TSV],
|
164
|
+
# }
|
165
|
+
def lmsdOntoSearch(onto, outputtype = "TSV")
|
166
|
+
|
167
|
+
paramName = ["carbons", "doublebonds", "triplebonds", "rings", "OH", "NH2", "OOH", "ketones", "epoxides"]
|
168
|
+
paramName = paramName + ["COOH", "methyls", "SH", "Br", "Cl", "F", "Methylenes", "CHO", "OMe", "OAc", "COOMe", "Ester", "Ether"]
|
169
|
+
|
170
|
+
paramModifier = ["eq", "ge", "le"]
|
171
|
+
|
172
|
+
i = 1
|
173
|
+
ontostr = ""
|
174
|
+
|
175
|
+
onto.chomp.gsub(/, /, ',').split(/,/).each do |str|
|
176
|
+
dat = str.split(/ /)
|
177
|
+
if paramName.include?(dat[0]) && paramModifier.include?(dat[1]) && i < 5
|
178
|
+
ontostr += '&' if i > 1
|
179
|
+
ontostr += "OntologyParamName#{i}=#{dat[0]}&OntologyParamModifier#{i}=#{dat[1]}&OntologyParamValue#{i}=#{dat[2].to_i}"
|
180
|
+
i += 1
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
if ontostr != ''
|
185
|
+
return lmsdSearch(ontostr, "ProcessTextOntologySearch", outputtype)
|
186
|
+
else
|
187
|
+
raise Sylfy::Service::ParameterError, "ontology string error."
|
188
|
+
end
|
189
|
+
|
190
|
+
|
191
|
+
end
|
192
|
+
|
193
|
+
|
194
|
+
|
195
|
+
end
|
196
|
+
|
197
|
+
module_function
|
198
|
+
|
199
|
+
# Method for setting inchi data member
|
200
|
+
#
|
201
|
+
# == Parameters:
|
202
|
+
# id::
|
203
|
+
# LipidMap id to search
|
204
|
+
# :id, :dataPrimarySource, :xrefs, :relations :inchi, :formula, :smiles, :inchiKey :names
|
205
|
+
def idbyinchi(inchi)
|
206
|
+
result = []
|
207
|
+
if inchi =~ Sylfy::Pattern::INCHI
|
208
|
+
smile = Rubabel[inchi, :inchi].to_s(:smi)
|
209
|
+
REST::lmsdStrSearch(smile, 'SMILESString').each {|e| result.push(e["LM_ID"])} if smile != nil
|
210
|
+
else
|
211
|
+
raise Sylfy::Service::ParameterError, "#{inchi} is not InChI."
|
212
|
+
end
|
213
|
+
return result
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
#test = File.open('tmp.mol', 'w')
|
220
|
+
#test.write(Unisys::Service::RESTLipidMap.lmsdRecord('LMFA08040013', {:OutputType => 'MDLMOL'}))
|
221
|
+
#test.close
|
222
|
+
#output = `obabel -imol tmp.mol -oinchi`
|
223
|
+
#p output
|
224
|
+
#Unisys::Service::RESTLipidMap.getSmallMolecule('LMFA08040013')
|
225
|
+
#puts Unisys::Service::RESTLipidMap.lmsdRecord('LMFA08040013', {:OutputType => 'MDLMOL'})
|
226
|
+
#puts Unisys::Service::RESTLipidMap.lmsdSearchStr('LMSL02000001')
|
227
|
+
#puts Unisys::Service::RESTLipidMap.lmsdSearchTxt({:CoreClass => 1, :MainClass => 101, :SubClass => 10101})
|
228
|
+
#puts Unisys::Service::RESTLipidMap.lmsdSearchOnto('carbons eq 20')
|