rbbt-sources 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NjczYWU0NDMyM2IwZDBlYWFjNGVlNWU4NTg5ODFhMGEzYmEwZGJiYw==
5
+ data.tar.gz: !binary |-
6
+ MjUzNGFjZDJjYzk1ZGJiMjIwNzllMjA4ZDMyODI2YTQzYzhhNzU0Yg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ NGZiMjgxYzQ0OGY2MzgxYmUzMzEzN2E1NzBjNDc4MjU3YjRmZjM0OTMwMTcz
10
+ YzFmMTU4Y2FkMzI4OTljZTA2MTJhNmVhZDQzNzA2NDAwNGM4ODc0ZTAwYzEx
11
+ MDZjYzAzODEyZjc1OTlmODJhYWE5YjE3ZjI3ODNlYWZlODZmYzc=
12
+ data.tar.gz: !binary |-
13
+ NWExMTU0MGMyZWExY2U5NWI2YWJhODYzZDcxMDFkYTc0NWZjN2M3ZDAzZTRh
14
+ Njk4NTgwMDgwZWJkNjhiNWM3OTA0MDE5Y2IwZjI1OTFhYzU3YmJkZWFhN2M4
15
+ ZGY2ZTA3NGNjOTM4MDBmZWY4NmQ0ZTMzODc3NmIwMzE1MTM1YjY=
@@ -1,21 +1,31 @@
1
1
  require 'rbbt'
2
2
  require 'rbbt/resource'
3
+
3
4
  module COSMIC
4
5
  extend Resource
5
6
  self.subdir = "share/databases/COSMIC"
6
7
 
7
- COSMIC.claim COSMIC.Mutations, :proc do
8
- url = "ftp://ftp.sanger.ac.uk/pub/CGP/wgs/data_export/CosmicWGS_MutantExport_v61_260912.tsv.gz"
8
+ COSMIC.claim COSMIC.mutations, :proc do
9
+ url = "ftp://ftp.sanger.ac.uk/pub/CGP/cosmic/data_export/CosmicCompleteExport_v64_260313.tsv.gz"
9
10
 
10
- tsv = TSV.open(Open.open(url), :type => :list, :header_hash => "", :key_field => "Mutation ID", :namespace => "Hsa/jun2011")
11
+ stream = CMD.cmd('awk \'BEGIN{FS="\t"} { if ($12 != "" && $12 != "Mutation ID") { sub($12, "COSM" $12 ":" $4)}; print}\'', :in => Open.open(url), :pipe => true)
12
+ tsv = TSV.open(stream, :type => :list, :header_hash => "", :key_field => "Mutation ID", :namespace => "Hsa/jun2011")
11
13
  tsv.fields = tsv.fields.collect{|f| f == "Gene name" ? "Associated Gene Name" : f}
12
14
  tsv.add_field "Genomic Mutation" do |mid, values|
13
15
  position = values["Mutation GRCh37 genome position"]
14
16
  cds = values["Mutation CDS"]
17
+
15
18
  if position.nil? or position.empty?
16
19
  nil
17
20
  else
18
21
  position = position.split("-").first
22
+
23
+ chr, pos = position.split(":")
24
+ chr = "X" if chr == "23"
25
+ chr = "Y" if chr == "24"
26
+ chr = "M" if chr == "25"
27
+ position = [chr, pos ] * ":"
28
+
19
29
  if cds.nil?
20
30
  position
21
31
  else
@@ -52,6 +62,92 @@ module COSMIC
52
62
  end
53
63
  end
54
64
  end
55
- tsv.to_s.gsub(/^(\d)/m,'COSM\1').gsub(/(\d)-(\d)/,'\1:\2')
65
+
66
+ tsv.to_s.gsub(/(\d)-(\d)/,'\1:\2')
67
+ end
68
+
69
+ COSMIC.claim COSMIC.mutations_hg18, :proc do |filename|
70
+ require 'rbbt/sources/organism'
71
+ file = COSMIC.mutations.open
72
+ begin
73
+
74
+ while (line = file.gets) !~ /Genomic Mutation/; end
75
+ fields = line[1..-2].split("\t")
76
+ mutation_pos = fields.index "Genomic Mutation"
77
+
78
+ mutations = CMD.cmd("grep -v '^#'|cut -f #{mutation_pos + 1}|sort -u", :in => COSMIC.mutations.open).read.split("\n").select{|m| m.include? ":" }
79
+
80
+ translations = Misc.process_to_hash(mutations){|mutations| Organism.liftOver(mutations, "Hsa/jun2011", "Hsa/may2009")}
81
+
82
+ File.open(filename, 'w') do |f|
83
+ f.puts "#: :type=:list#:namespace=Hsa/may2009"
84
+ f.puts "#" + fields * "\t"
85
+ while line = file.gets do
86
+ next if line[0] == "#"[0]
87
+ line.strip!
88
+ parts = line.split("\t")
89
+ parts[mutation_pos] = translations[parts[mutation_pos]]
90
+ f.puts parts * "\t"
91
+ end
92
+ end
93
+ rescue Exception
94
+ FileUtils.rm filename if File.exists? filename
95
+ raise $!
96
+ ensure
97
+ file.close
98
+ end
99
+
100
+ nil
101
+ end
102
+
103
+
104
+ def self.rsid_index(organism, chromosome = nil)
105
+ build = Organism.hg_build(organism)
106
+
107
+ tag = [build, chromosome] * ":"
108
+ fwt = nil
109
+ Persist.persist("StaticPosIndex for COSMIC [#{ tag }]", :fwt, :persist => true) do
110
+ value_size = 0
111
+ file = COSMIC[build == "hg19" ? "mutations" : "mutations_hg18"]
112
+ chr_positions = []
113
+ begin
114
+ Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
115
+ next if line[0] == "#"[0]
116
+ rsid, mutation = line.split("\t").values_at 0, 25
117
+ next if mutation.nil? or mutation.empty?
118
+ chr, pos = mutation.split(":")
119
+ next if chr != chromosome or pos.nil? or pos.empty?
120
+ chr_positions << [rsid, pos.to_i]
121
+ value_size = rsid.length if rsid.length > value_size
122
+ end
123
+ rescue
124
+ end
125
+ fwt = FixWidthTable.new :memory, value_size
126
+ fwt.add_point(chr_positions)
127
+ fwt
128
+ end
129
+ end
130
+
131
+ def self.mutation_index(organism)
132
+ build = Organism.hg_build(organism)
133
+ file = COSMIC[build == "hg19" ? "mutations" : "mutations_hg18"]
134
+ @mutation_index ||= {}
135
+ @mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
136
+ end
137
+
138
+
139
+ end
140
+
141
+ if defined? Entity
142
+ if defined? Gene and Entity === Gene
143
+ module Gene
144
+ property :COSMIC_rsids => :single2array do
145
+ COSMIC.rsid_index(organism, chromosome)[self.chr_range]
146
+ end
147
+
148
+ property :COSMIC_mutations => :single2array do
149
+ GenomicMutation.setup(COSMIC.mutation_index(organism).values_at(*self.COSMIC_rsids).uniq, "COSMIC mutations over #{self.name || self}", organism, false)
150
+ end
151
+ end
56
152
  end
57
153
  end
@@ -3,7 +3,7 @@ module NCI
3
3
  extend Resource
4
4
  self.subdir = "share/databases/NCI"
5
5
 
6
- NCI.claim NCI.root.find, :rake, Rbbt.share.install.NCI.Rakefile.find(:lib)
6
+ NCI.claim NCI.root, :rake, Rbbt.share.install.NCI.Rakefile.find(:lib)
7
7
  end
8
8
 
9
9
  if defined? Entity
@@ -5,11 +5,11 @@ module STITCH
5
5
  extend Resource
6
6
  self.subdir = "share/databases/STITCH"
7
7
 
8
- STITCH.claim STITCH.source.chemical_chemical.find, :url, "http://stitch.embl.de/download/chemical_chemical.links.detailed.v3.1.tsv.gz"
9
- STITCH.claim STITCH.source.protein_chemical.find, :url, "http://stitch.embl.de/download/protein_chemical.links.detailed.v3.1.tsv.gz"
10
- STITCH.claim STITCH.source.actions.find, :url, "http://stitch.embl.de/download/actions.v3.1.tsv.gz"
11
- STITCH.claim STITCH.source.aliases.find, :url, "http://stitch.embl.de/download/chemical.aliases.v3.1.tsv.gz"
12
- STITCH.claim STITCH.source.sources.find, :url, "http://stitch.embl.de/download/chemical.sources.v3.1.tsv.gz"
8
+ STITCH.claim STITCH.source.chemical_chemical, :url, "http://stitch.embl.de/download/chemical_chemical.links.detailed.v3.1.tsv.gz"
9
+ STITCH.claim STITCH.source.protein_chemical, :url, "http://stitch.embl.de/download/protein_chemical.links.detailed.v3.1.tsv.gz"
10
+ STITCH.claim STITCH.source.actions, :url, "http://stitch.embl.de/download/actions.v3.1.tsv.gz"
11
+ STITCH.claim STITCH.source.aliases, :url, "http://stitch.embl.de/download/chemical.aliases.v3.1.tsv.gz"
12
+ STITCH.claim STITCH.source.sources, :url, "http://stitch.embl.de/download/chemical.sources.v3.1.tsv.gz"
13
13
 
14
14
  Organism.installable_organisms.each do |organism|
15
15
  STITCH.claim STITCH.chemical_protein(organism), :proc do
@@ -10,25 +10,27 @@ module DbSNP
10
10
  URL = "ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606/VCF/common_all.vcf.gz"
11
11
 
12
12
  DbSNP.claim DbSNP.mutations_ncbi, :proc do
13
- tsv = TSV.setup({}, :key_field => "RS ID", :fields => ["Genomic Mutation"], :type => :single)
13
+ tsv = TSV.setup({}, :key_field => "RS ID", :fields => ["Genomic Mutation"], :type => :flat)
14
14
  file = Open.open(URL, :nocache => true)
15
15
  while line = file.gets do
16
16
  next if line[0] == "#"[0]
17
17
  chr, position, id, ref, alt = line.split "\t"
18
- alt = alt.split(",").first
19
- if alt[0] == ref[0]
20
- alt[0] = '+'[0]
18
+
19
+ mutations = alt.split(",").collect do |a|
20
+ if alt[0] == ref[0]
21
+ alt[0] = '+'[0]
22
+ end
23
+ [chr, position, alt] * ":"
21
24
  end
22
- mutation = [chr, position, alt] * ":"
23
25
 
24
26
  tsv.namespace = "Hsa/may2012"
25
- tsv[id] = mutation
27
+ tsv[id] = mutations
26
28
  end
27
29
 
28
30
  tsv.to_s
29
31
  end
30
32
 
31
- DbSNP.claim DbSNP.mutations, :proc do
33
+ DbSNP.claim DbSNP.rsids, :proc do |filename|
32
34
  ftp = Net::FTP.new('ftp.broadinstitute.org')
33
35
  ftp.passive = true
34
36
  ftp.login('gsapubftp-anonymous', 'devnull@nomail.org')
@@ -37,65 +39,156 @@ module DbSNP
37
39
  tmpfile = TmpFile.tmp_file + '.gz'
38
40
  ftp.getbinaryfile('dbsnp_137.hg19.vcf.gz', tmpfile, 1024)
39
41
 
40
- tsv = TSV.setup({}, :key_field => "RS ID", :fields => ["Genomic Mutation", "GMAF", "G5", "G5A", "dbSNP Build ID"], :type => :list)
41
42
  file = Open.open(tmpfile, :nocache => true)
42
- while line = file.gets do
43
- next if line[0] == "#"[0]
43
+ begin
44
+ File.open(filename, 'w') do |f|
45
+ f.puts "#: :type=:list#:namespace=Hsa/may2012"
46
+ f.puts "#" + ["RS ID", "GMAF", "G5", "G5A", "dbSNP Build ID"] * "\t"
47
+ while line = file.gets do
48
+ next if line[0] == "#"[0]
49
+
50
+ chr, position, id, ref, muts, qual, filter, info = line.split "\t"
51
+
52
+ g5 = g5a = dbsnp_build_id = gmaf = nil
53
+
54
+ gmaf = $1 if info =~ /GMAF=([0-9.]+)/
55
+ g5 = true if info =~ /\bG5\b/
56
+ g5a = true if info =~ /\bG5A\b/
57
+ dbsnp_build_id = $1 if info =~ /dbSNPBuildID=(\d+)/
44
58
 
45
- chr, position, id, ref, mut, qual, filter, info = line.split "\t"
46
-
47
- chr.sub!('chr', '')
48
-
49
- mut = mut.split(",").first
50
- case
51
- when ref == '-'
52
- mut = "+" << mut
53
- when mut == '-'
54
- mut = "-" * ref.length
55
- when (mut.length > 1 and ref.length > 1)
56
- mut = '-' * ref.length << mut
57
- when (mut.length > 1 and ref.length == 1 and mut.index(ref) == 0)
58
- mut = '+' << mut[1..-1]
59
- when (mut.length == 1 and ref.length > 1 and ref.index(mut) == 0)
60
- mut = '-' * (ref.length - 1)
61
- else
62
- mut = mut
59
+ f.puts [id, gmaf, g5, g5a, dbsnp_build_id] * "\t"
60
+ end
63
61
  end
62
+ rescue Exception
63
+ FileUtils.rm filename if File.exists? filename
64
+ raise $!
65
+ ensure
66
+ file.close
67
+ FileUtils.rm tmpfile
68
+ end
64
69
 
65
- g5 = g5a = dbsnp_build_id = gmaf = nil
66
-
67
- gmaf = $1 if info =~ /GMAF=([0-9.]+)/
68
- g5 = true if info =~ /\bG5\b/
69
- g5a = true if info =~ /\bG5A\b/
70
- dbsnp_build_id = $1 if info =~ /dbSNPBuildID=(\d+)/
70
+ nil
71
+ end
71
72
 
72
- mutation = [chr, position, mut] * ":"
73
+ DbSNP.claim DbSNP.mutations, :proc do |filename|
74
+ ftp = Net::FTP.new('ftp.broadinstitute.org')
75
+ ftp.passive = true
76
+ ftp.login('gsapubftp-anonymous', 'devnull@nomail.org')
77
+ ftp.chdir('/bundle/2.3/hg19')
73
78
 
74
- tsv.namespace = "Hsa/may2012"
79
+ tmpfile = TmpFile.tmp_file + '.gz'
80
+ ftp.getbinaryfile('dbsnp_137.hg19.vcf.gz', tmpfile, 1024)
75
81
 
76
- tsv[id] = [mutation, gmaf, g5, g5a, dbsnp_build_id]
77
- end
82
+ file = Open.open(tmpfile, :nocache => true)
83
+ begin
84
+ File.open(filename, 'w') do |f|
85
+ f.puts "#: :type=:flat#:namespace=Hsa/may2012"
86
+ f.puts "#" + ["RS ID", "Genomic Mutation"] * "\t"
87
+ while line = file.gets do
88
+ next if line[0] == "#"[0]
78
89
 
79
- FileUtils.rm tmpfile
90
+ chr, position, id, ref, muts, qual, filter, info = line.split "\t"
80
91
 
81
- tsv.to_s
92
+ chr.sub!('chr', '')
93
+
94
+ position, muts = Misc.correct_vcf_mutation(position.to_i, ref, muts)
95
+
96
+ mutations = muts.collect{|mut| [chr, position, mut] * ":" }
97
+
98
+ f.puts ([id] + mutations) * "\t"
99
+ end
100
+ end
101
+ rescue Exception
102
+ FileUtils.rm filename if File.exists? filename
103
+ raise $!
104
+ ensure
105
+ file.close
106
+ FileUtils.rm tmpfile
107
+ end
108
+
109
+ nil
82
110
  end
83
111
 
84
- DbSNP.claim DbSNP.mutations_hg18, :proc do
112
+ DbSNP.claim DbSNP.mutations_hg18, :proc do |filename|
85
113
  require 'rbbt/sources/organism'
86
114
 
87
- hg19_tsv = DbSNP.mutations.tsv :unnamed => true
88
-
89
- mutations = hg19_tsv.values
115
+ mutations = CMD.cmd("grep -v '^#'|cut -f 2|sort -u", :in => DbSNP.mutations.open).read.split("\n").collect{|l| l.split("|")}.flatten
90
116
 
91
117
  translations = Misc.process_to_hash(mutations){|mutations| Organism.liftOver(mutations, "Hsa/jun2011", "Hsa/may2009")}
118
+ begin
119
+ file = Open.open(DbSNP.mutations.find, :nocache => true)
120
+ File.open(filename, 'w') do |f|
121
+ f.puts "#: :type=:flat#:namespace=Hsa/may2009"
122
+ f.puts "#" + ["RS ID", "Genomic Mutation"] * "\t"
123
+ while line = file.gets do
124
+ next if line[0] == "#"[0]
125
+ parts = line.split("\t")
126
+ parts[1..-1] = parts[1..-1].collect{|p| translations[p]} * "|"
127
+ f.puts parts * "\t"
128
+ end
129
+ end
130
+ rescue Exception
131
+ FileUtils.rm filename if File.exists? filename
132
+ raise $!
133
+ ensure
134
+ file.close
135
+ end
136
+
137
+ nil
138
+ end
92
139
 
93
- tsv = hg19_tsv.process "Genomic Mutation" do |mutation|
94
- translations[mutation]
140
+ def self.rsid_index(organism, chromosome = nil)
141
+ build = Organism.hg_build(organism)
142
+
143
+ tag = [build, chromosome] * ":"
144
+ Persist.persist("StaticPosIndex for dbSNP [#{ tag }]", :fwt, :persist => true) do
145
+ value_size = 0
146
+ file = DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"]
147
+ chr_positions = []
148
+ Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
149
+ next if line[0] == "#"[0]
150
+ rsid, mutation = line.split("\t")
151
+ next if mutation.nil? or mutation.empty?
152
+ chr, pos = mutation.split(":")
153
+ next if chr != chromosome or pos.nil? or pos.empty?
154
+ chr_positions << [rsid, pos.to_i]
155
+ value_size = rsid.length if rsid.length > value_size
156
+ end
157
+ fwt = FixWidthTable.new :memory, value_size
158
+ fwt.add_point(chr_positions)
159
+ fwt
95
160
  end
161
+ end
96
162
 
97
- tsv.namespace = "Hsa/may2009"
163
+ def self.mutation_index(organism)
164
+ build = Organism.hg_build(organism)
165
+ file = DbSNP[build == "hg19" ? "mutations" : "mutations_hg18"]
166
+ @mutation_index ||= {}
167
+ @mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
168
+ end
169
+
170
+ end
171
+
172
+ if defined? Entity
173
+ if defined? Gene and Entity === Gene
174
+ module Gene
175
+ property :dbSNP_rsids => :single2array do
176
+ DbSNP.rsid_index(organism, chromosome)[self.chr_range]
177
+ end
178
+
179
+ property :dbSNP_mutations => :single2array do
180
+ GenomicMutation.setup(DbSNP.mutation_index(organism).values_at(*self.dbSNP_rsids).compact.flatten.uniq, "dbSNP mutations over #{self.name || self}", organism, true)
181
+ end
182
+ end
183
+ end
184
+
185
+ if defined? GenomicMutation and Entity === GenomicMutation
186
+ module GenomicMutation
187
+ property :dbSNP => :array2single do
188
+ dbSNP.mutations.tsv(:persist => true, :key_field => "Genomic Mutation", :fields => ["RS ID"], :type => :single).values_at *self
189
+ end
190
+ end
98
191
 
99
- tsv.to_s
100
192
  end
101
193
  end
194
+
@@ -0,0 +1,13 @@
1
+ require 'rbbt'
2
+
3
+ module Ensembl
4
+ def self.releases
5
+ @releases ||= Rbbt.share.Ensembl.release_dates.find.tsv :key_field => "build"
6
+ end
7
+
8
+ def self.org2release(organism)
9
+ releases[organism.split("/").last || "current"]
10
+ end
11
+ end
12
+
13
+
@@ -1,24 +1,31 @@
1
1
  require 'rbbt/util/open'
2
2
  require 'rbbt/sources/organism'
3
3
  require 'rbbt/tsv'
4
+ require 'rbbt/sources/ensembl'
4
5
  require 'net/ftp'
5
6
 
6
7
  module Ensembl
7
-
8
8
 
9
- def self.releases
10
- @releases ||= Rbbt.share.Ensembl.release_dates.find.tsv :key_field => "build"
11
- end
12
-
13
9
  module FTP
14
10
 
15
11
  SERVER = "ftp.ensembl.org"
16
12
 
13
+ def self.mysql_path(release)
14
+ end
15
+
17
16
  def self.ftp_name_for(organism)
18
17
  code, build = organism.split "/"
19
18
  build ||= "current"
20
19
 
21
20
  if build.to_s == "current"
21
+ release = 'current'
22
+ name = Organism.scientific_name(organism)
23
+ ftp = Net::FTP.new(Ensembl::FTP::SERVER)
24
+ ftp.passive = true
25
+ ftp.login
26
+ ftp.chdir(File.join('pub', 'current_mysql'))
27
+ file = ftp.list(name.downcase.gsub(" ",'_') + "_core_*").collect{|l| l.split(" ").last}.last
28
+ ftp.close
22
29
  else
23
30
  release = Ensembl.releases[build]
24
31
  name = Organism.scientific_name(organism)
@@ -34,7 +41,11 @@ module Ensembl
34
41
 
35
42
  def self.ftp_directory_for(organism)
36
43
  release, ftp_name = ftp_name_for(organism)
37
- File.join('/pub/', release, 'mysql', ftp_name)
44
+ if release == 'current'
45
+ File.join('/pub/', 'current_mysql', ftp_name)
46
+ else
47
+ File.join('/pub/', release, 'mysql', ftp_name)
48
+ end
38
49
  end
39
50
 
40
51
  def self.base_url(organism)
@@ -139,10 +139,12 @@ module Entrez
139
139
  else
140
140
  filename = gene_filename geneid
141
141
 
142
+
142
143
  if FileCache.found(filename)
143
144
  return Gene.new(Open.read(FileCache.path(filename)))
144
145
  else
145
146
  xml = get_online(geneid)
147
+
146
148
  FileCache.add(filename, xml) unless FileCache.found(filename)
147
149
 
148
150
  return Gene.new(xml)
@@ -150,30 +152,30 @@ module Entrez
150
152
  end
151
153
  end
152
154
 
153
- # Counts the words in common between a chunk of text and the text
154
- # found in Entrez Gene for that particular gene. The +gene+ may be a
155
- # gene identifier or a Gene class instance.
156
- def self.gene_text_similarity(gene, text)
157
-
158
- case
159
- when Entrez::Gene === gene
160
- gene_text = gene.text
161
- when String === gene || Fixnum === gene
162
- begin
163
- gene_text = get_gene(gene).text
164
- rescue CMD::CMDError
165
- return 0
166
- end
167
- else
155
+ # Counts the words in common between a chunk of text and the text
156
+ # found in Entrez Gene for that particular gene. The +gene+ may be a
157
+ # gene identifier or a Gene class instance.
158
+ def self.gene_text_similarity(gene, text)
159
+
160
+ case
161
+ when Entrez::Gene === gene
162
+ gene_text = gene.text
163
+ when String === gene || Fixnum === gene
164
+ begin
165
+ gene_text = get_gene(gene).text
166
+ rescue CMD::CMDError
168
167
  return 0
169
168
  end
169
+ else
170
+ return 0
171
+ end
170
172
 
171
- gene_words = gene_text.words.to_set
172
- text_words = text.words.to_set
173
+ gene_words = gene_text.words.to_set
174
+ text_words = text.words.to_set
173
175
 
174
- return 0 if gene_words.empty? || text_words.empty?
176
+ return 0 if gene_words.empty? || text_words.empty?
175
177
 
176
- common = gene_words.intersection(text_words)
177
- common.length / (gene_words.length + text_words.length).to_f
178
- end
178
+ common = gene_words.intersection(text_words)
179
+ common.length / (gene_words.length + text_words.length).to_f
180
+ end
179
181
  end
@@ -1,6 +1,7 @@
1
1
  require 'rbbt'
2
2
  require 'rbbt/util/open'
3
3
  require 'rbbt/resource'
4
+ require 'rbbt/entity/gene'
4
5
 
5
6
  module Genomes1000
6
7
  extend Resource
@@ -49,4 +50,60 @@ module Genomes1000
49
50
  tsv.to_s
50
51
  end
51
52
 
53
+ def self.rsid_index(organism, chromosome = nil)
54
+ build = Organism.hg_build(organism)
55
+
56
+ tag = [build, chromosome] * ":"
57
+ Persist.persist("StaticPosIndex for Genomes1000 [#{ tag }]", :fwt, :persist => true) do
58
+ value_size = 0
59
+ file = Genomes1000[build == "hg19" ? "mutations" : "mutations_hg18"]
60
+ chr_positions = []
61
+ Open.read(CMD.cmd("grep '\t#{chromosome}:'", :in => file.open, :pipe => true)) do |line|
62
+ next if line[0] == "#"[0]
63
+ rsid, mutation = line.split("\t")
64
+ next if mutation.nil? or mutation.empty?
65
+ chr, pos = mutation.split(":")
66
+ next if chr != chromosome or pos.nil? or pos.empty?
67
+ chr_positions << [rsid, pos.to_i]
68
+ value_size = rsid.length if rsid.length > value_size
69
+ end
70
+ fwt = FixWidthTable.new :memory, value_size
71
+ fwt.add_point(chr_positions)
72
+ fwt
73
+ end
74
+ end
75
+
76
+ def self.mutation_index(organism)
77
+ build = Organism.hg_build(organism)
78
+ file = Genomes1000[build == "hg19" ? "mutations" : "mutations_hg18"]
79
+ @mutation_index ||= {}
80
+ @mutation_index[build] ||= file.tsv :persist => true, :fields => ["Genomic Mutation"], :type => :single, :persist => true
81
+ end
82
+
83
+
52
84
  end
85
+
86
+
87
+ if defined? Entity
88
+ if defined? Gene and Entity === Gene
89
+ module Gene
90
+ property :genomes_1000_rsids => :single2array do
91
+ Genomes1000.rsid_index(organism, chromosome)[self.chr_range]
92
+ end
93
+
94
+ property :genomes_1000_mutations => :single2array do
95
+ GenomicMutation.setup(Genomes1000.mutation_index(organism).values_at(*self.genomes_1000_rsids).uniq, "1000 Genomes mutations over #{self.name || self}", organism, true)
96
+ end
97
+ end
98
+ end
99
+
100
+ if defined? GenomicMutation and Entity === GenomicMutation
101
+ module GenomicMutation
102
+ property :genomes_1000 => :array2single do
103
+ Genomes1000.mutations.tsv(:persist => true, :key_field => "Genomic Mutation", :fields => ["Variant ID"], :type => :single).values_at *self
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+
@@ -18,7 +18,7 @@ module GO
18
18
  # only the name field is used.
19
19
  def self.init
20
20
  Persist.persist_tsv(nil, 'gene_ontology', {}, :persist => true) do |info|
21
- info.serializer = :marshal if info.respond_to? :serializer and info.serializer == :type
21
+ info.serializer = :marshal if info.respond_to? :serializer
22
22
  Rbbt.share.databases.GO.gene_ontology.read.split(/\[Term\]/).each{|term|
23
23
  term_info = {}
24
24
 
@@ -37,11 +37,11 @@ module GO
37
37
  }
38
38
 
39
39
  info
40
- end
40
+ end.tap{|o| o.unnamed = true}
41
41
  end
42
42
 
43
43
  def self.info
44
- @info ||= self.init
44
+ @@info ||= self.init
45
45
  end
46
46
 
47
47
  def self.goterms
@@ -94,7 +94,7 @@ if defined? Entity
94
94
  self.annotation :organism
95
95
 
96
96
  property :name => :array2single do
97
- @name ||= GO.id2name(self)
97
+ GO.id2name(self)
98
98
  end
99
99
 
100
100
  property :genes => :array2single do |*args|
@@ -117,19 +117,19 @@ if defined? Entity
117
117
  if defined? Gene and Entity === Gene
118
118
  module Gene
119
119
  property :go_terms => :array2single do
120
- @go_terms ||= Organism.gene_go(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat, :merge => true, :namespace => organism).values_at *self.ensembl
120
+ @go_terms ||= Organism.gene_go(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat, :merge => true, :namespace => organism).chunked_values_at self.ensembl
121
121
  end
122
122
 
123
123
  property :go_bp_terms => :array2single do
124
- @go_bp_terms ||= Organism.gene_go_bp(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat, :merge => true, :namespace => organism).values_at *self.ensembl
124
+ @go_bp_terms ||= Organism.gene_go_bp(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat, :merge => true, :namespace => organism).chunked_values_at self.ensembl
125
125
  end
126
126
 
127
127
  property :go_cc_terms => :array2single do
128
- @go_cc_terms ||= Organism.gene_go_cc(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat, :merge => true, :namespace => organism).values_at *self.ensembl
128
+ @go_cc_terms ||= Organism.gene_go_cc(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat, :merge => true, :namespace => organism).chunked_values_at self.ensembl
129
129
  end
130
130
 
131
131
  property :go_mf_terms => :array2single do
132
- @go_mf_terms ||= Organism.gene_go_mf(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat, :merge => true, :namespace => organism).values_at *self.ensembl
132
+ @go_mf_terms ||= Organism.gene_go_mf(organism).tsv(:persist => true, :key_field => "Ensembl Gene ID", :fields => ["GO ID"], :type => :flat, :merge => true, :namespace => organism).chunked_values_at self.ensembl
133
133
  end
134
134
 
135
135
  end
@@ -46,7 +46,11 @@ module Organism
46
46
  return positions
47
47
  end
48
48
 
49
- positions_bed = positions.collect{|position| chr, pos = position.split(":").values_at(0,1); ["chr" << chr, pos.to_i-1, pos, position] * "\t"} * "\n" + "\n"
49
+ positions_bed = positions.collect{|position|
50
+ chr, pos = position.split(":").values_at(0,1)
51
+ ["chr" << chr, pos.to_i-1, pos, position] * "\t"
52
+ } * "\n" + "\n"
53
+
50
54
  new_positions = {}
51
55
 
52
56
  TmpFile.with_file(positions_bed) do |source_bed|
@@ -4,6 +4,29 @@ require 'rbbt/resource'
4
4
  require 'rbbt/entity'
5
5
  require 'rbbt/sources/InterPro'
6
6
 
7
+ InterPro.claim InterPro.pfam_names, :proc do
8
+ pfam_domains = Pfam.domains.read.split("\n").collect{|l| l.split("\t").first}.compact.flatten
9
+ tsv = nil
10
+ TmpFile.with_file(pfam_domains * "\n") do |tmpfile|
11
+ tsv = TSV.open(CMD.cmd("cut -f 4,3 | sort -u |grep -w -f #{ tmpfile }", :in => InterPro.source.protein2ipr.open, :pipe => true), :key_field => 1, :fields => [0], :type => :single)
12
+ end
13
+ tsv.key_field = "InterPro ID"
14
+ tsv.fields = ["Domain Name"]
15
+ tsv.to_s
16
+ end
17
+
18
+ InterPro.claim InterPro.pfam_equivalences, :proc do
19
+ pfam_domains = Pfam.domains.read.split("\n").collect{|l| l.split("\t").first}.compact.flatten
20
+ tsv = nil
21
+ TmpFile.with_file(pfam_domains * "\n") do |tmpfile|
22
+ tsv = TSV.open(CMD.cmd("cut -f 2,4 | sort -u |grep -w -f #{ tmpfile }", :in => InterPro.source.protein2ipr.open, :pipe => true), :key_field => 0, :fields => [1], :type => :single)
23
+ end
24
+ tsv.key_field = "InterPro ID"
25
+ tsv.fields = ["Pfam Domain"]
26
+ tsv.to_s
27
+ end
28
+
29
+
7
30
  module Pfam
8
31
  extend Resource
9
32
  self.subdir = "share/databases/Pfam"
@@ -14,7 +37,7 @@ module Pfam
14
37
  tsv.to_s
15
38
  end
16
39
 
17
- NAMES_FILE = InterPro.pfam_names.find
40
+ NAMES_FILE = InterPro.pfam_names.produce
18
41
 
19
42
  def self.name_index
20
43
  @name_index ||= TSV.open NAMES_FILE, :single, :unnamed => true
@@ -31,28 +54,6 @@ module InterPro
31
54
  end
32
55
  end
33
56
 
34
- InterPro.claim InterPro.pfam_names, :proc do
35
- pfam_domains = Pfam.domains.read.split("\n").collect{|l| l.split("\t").first}.compact.flatten
36
- tsv = nil
37
- TmpFile.with_file(pfam_domains * "\n") do |tmpfile|
38
- tsv = TSV.open(CMD.cmd("cut -f 4,3 | sort -u |grep -w -f #{ tmpfile }", :in => InterPro.source.protein2ipr.open, :pipe => true), :key_field => 1, :fields => [0], :type => :single)
39
- end
40
- tsv.key_field = "InterPro ID"
41
- tsv.fields = ["Domain Name"]
42
- tsv.to_s
43
- end
44
-
45
- InterPro.claim InterPro.pfam_equivalences, :proc do
46
- pfam_domains = Pfam.domains.read.split("\n").collect{|l| l.split("\t").first}.compact.flatten
47
- tsv = nil
48
- TmpFile.with_file(pfam_domains * "\n") do |tmpfile|
49
- tsv = TSV.open(CMD.cmd("cut -f 2,4 | sort -u |grep -w -f #{ tmpfile }", :in => InterPro.source.protein2ipr.open, :pipe => true), :key_field => 0, :fields => [1], :type => :single)
50
- end
51
- tsv.key_field = "InterPro ID"
52
- tsv.fields = ["Pfam Domain"]
53
- tsv.to_s
54
- end
55
-
56
57
 
57
58
  if defined? Entity
58
59
  module PfamDomain
@@ -54,6 +54,7 @@ module PubMed
54
54
  [:year , "Journal/JournalIssue/PubDate/Year"],
55
55
  [:month , "Journal/JournalIssue/PubDate/Month"],
56
56
  [:pages , "Pagination/MedlinePgn"],
57
+ [:author , "AuthorList/Author"],
57
58
  [:abstract , "Abstract/AbstractText"],
58
59
  ]
59
60
 
@@ -154,7 +155,7 @@ module PubMed
154
155
  end
155
156
  end
156
157
 
157
- text
158
+ Misc.fixutf8(text)
158
159
  end
159
160
 
160
161
  def bibtex
@@ -187,7 +188,9 @@ module PubMed
187
188
 
188
189
  # Join the text from title and abstract
189
190
  def text
190
- [title, abstract].join("\n")
191
+ text = [title, abstract].join("\n")
192
+
193
+ Misc.fixutf8(text)
191
194
  end
192
195
  end
193
196
 
@@ -50,16 +50,13 @@ if defined? Entity and defined? Gene and Entity === Gene
50
50
  tfs = TFacts.targets.keys
51
51
  self.name.collect{|gene| tfs.include? gene}
52
52
  end
53
- persist :_ary_is_transcription_factor?
54
53
 
55
54
  property :transcription_regulators => :array2single do
56
55
  Gene.setup(TFacts.regulators.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
57
56
  end
58
- persist :_ary_transcription_regulators
59
57
 
60
58
  property :transcription_targets => :array2single do
61
59
  Gene.setup(TFacts.targets.tsv(:persist => true).values_at(*self.name), "Associated Gene Name", self.organism)
62
60
  end
63
- persist :_ary_transcription_targets
64
61
  end
65
62
  end
@@ -1,3 +1,4 @@
1
+ require 'rbbt'
1
2
  require 'rbbt/util/open'
2
3
  require 'rbbt/resource'
3
4
  require 'rbbt/sources/cath'
@@ -33,6 +34,7 @@ module UniProt
33
34
 
34
35
 
35
36
  UNIPROT_TEXT="http://www.uniprot.org/uniprot/[PROTEIN].txt"
37
+ UNIPROT_FASTA="http://www.uniprot.org/uniprot/[PROTEIN].fasta"
36
38
  def self.pdbs(protein)
37
39
  url = UNIPROT_TEXT.sub "[PROTEIN]", protein
38
40
  text = Open.read(url)
@@ -44,15 +46,70 @@ module UniProt
44
46
  id, method, resolution, region = $1.split(";").collect{|v| v.strip}
45
47
  begin
46
48
  chains, start, eend = region.match(/(\w+)=(\d+)-(\d+)/).values_at(1,2,3)
49
+ start = start.to_i
50
+ eend = eend.to_i
51
+ start, eend = eend, start if start > eend
47
52
  rescue
48
53
  Log.warn("Error process Uniprot PDB line: #{line}")
49
54
  next
50
55
  end
51
- pdb[id.downcase] = {:method => method, :resolution => resolution, :region => (start.to_i..eend.to_i), :chains => chains}
56
+ pdb[id.downcase] = {:method => method, :resolution => resolution, :region => (start..eend), :chains => chains}
52
57
  }
53
58
  pdb
54
59
  end
55
60
 
61
+ def self.sequence(protein)
62
+ url = UNIPROT_FASTA.sub "[PROTEIN]", protein
63
+ text = Open.read(url)
64
+
65
+ text.split(/\n/).select{|line| line !~ /^>/} * ""
66
+ end
67
+
68
+ def self.features(protein)
69
+ url = UNIPROT_TEXT.sub "[PROTEIN]", protein
70
+ text = Open.read(url)
71
+
72
+ text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n"
73
+
74
+ parts = text.split(/^(FT \w+)/)
75
+ parts.shift
76
+
77
+ features = []
78
+
79
+ type = nil
80
+ parts.each do |part|
81
+ parts
82
+ if part[0..1] == "FT"
83
+ type = part.gsub(/FT\s+/,'')
84
+ next
85
+ end
86
+ value = part.gsub("\nFT", '').gsub(/\s+/, ' ')
87
+ case
88
+ when value.match(/(\d+) (\d+) (.*)/)
89
+ start, eend, description = $1, $2, $3
90
+ description.gsub(/^FT\s+/m, '')
91
+ when value.match(/(\d+) (\d+)/)
92
+ start, eend = $1, $2
93
+ description = nil
94
+ else
95
+ Log.debug "Value not understood: #{ value }"
96
+ end
97
+
98
+
99
+ feature = {
100
+ :type => type,
101
+ :start => start.to_i,
102
+ :end => eend.to_i,
103
+ :description => description,
104
+ }
105
+
106
+ features << feature
107
+ end
108
+
109
+ features
110
+ end
111
+
112
+
56
113
  def self.variants(protein)
57
114
  url = UNIPROT_TEXT.sub "[PROTEIN]", protein
58
115
  text = Open.read(url)
@@ -1,6 +1,7 @@
1
1
  #: :type=:single
2
2
  #Release build
3
- current jul2012
3
+ release-70 jan2013
4
+ release-69 oct2012
4
5
  release-68 jul2012
5
6
  release-67 may2012
6
7
  release-66 feb2012
@@ -290,7 +290,8 @@ def coding_transcripts_for_exon(exon, exon_transcripts, transcript_info)
290
290
  []
291
291
  end
292
292
 
293
- transcripts.reject{|transcript| transcript_info[transcript].first.empty?}
293
+ #transcripts.reject{|transcript| transcript_info[transcript].first.empty?}
294
+ transcripts
294
295
  end
295
296
 
296
297
  def exon_offset_in_transcript(exon, transcript, exons, transcript_exons)
@@ -440,6 +441,23 @@ file 'chromosomes' do |t|
440
441
  File.open(t.name, 'w') do |f| f.puts goterms end
441
442
  end
442
443
 
444
+ file 'blacklist_chromosomes' => 'chromosomes' do |t|
445
+ list = TSV.open(t.prerequisites.first).keys.select{|c| c.index('_') or c.index('.')}
446
+ File.open(t.name, 'w') do |f| f.puts list * "\n" end
447
+ end
448
+
449
+ file 'blacklist_genes' => ['blacklist_chromosomes', 'gene_positions'] do |t|
450
+ Open.read(t.prerequisites.first)
451
+ genes = CMD.cmd("grep -f '#{t.prerequisites.first}' | cut -f 1", :in => Open.open(t.prerequisites.last)).read.split("\n").uniq
452
+ File.open(t.name, 'w') do |f| f.puts genes * "\n" end
453
+ end
454
+
455
+ file 'sanctioned_genes' => ['blacklist_genes', 'gene_positions'] do |t|
456
+ genes = CMD.cmd("cut -f 1", :in => Open.open(t.prerequisites.last)).read.split("\n").uniq - Open.read(t.prerequisites.first).split("\n")
457
+ File.open(t.name, 'w') do |f| f.puts genes * "\n" end
458
+ end
459
+
460
+
443
461
  rule /^chromosome_.*/ do |t|
444
462
  chr = t.name.match(/chromosome_(.*)/)[1]
445
463
 
@@ -450,7 +468,11 @@ rule /^chromosome_.*/ do |t|
450
468
  ftp = Net::FTP.new("ftp.ensembl.org")
451
469
  ftp.passive = true
452
470
  ftp.login
453
- ftp.chdir("pub/#{ release }/fasta/")
471
+ if release.nil? or release == 'current'
472
+ ftp.chdir("pub/current_fasta/")
473
+ else
474
+ ftp.chdir("pub/#{ release }/fasta/")
475
+ end
454
476
  ftp.chdir($scientific_name.downcase.sub(" ",'_'))
455
477
  ftp.chdir('dna')
456
478
  file = ftp.nlst.select{|file| file =~ /chromosome\.#{ chr }\.fa/}.first
@@ -530,7 +552,7 @@ file 'transcript_sequence' => ["exons", "transcript_exons"] do |t|
530
552
  begin
531
553
  p = Organism.root
532
554
  p.replace File.expand_path("./chromosome_#{chr}")
533
- p.sub!(/.*\/.rbbt\//,'')
555
+ p.sub!(%r{.*/organisms/},'share/organisms/')
534
556
  p = Path.setup(p, 'rbbt', Organism)
535
557
  chr_str = p.produce.read
536
558
  rescue Exception
@@ -624,10 +646,10 @@ end
624
646
 
625
647
  file 'transcript_3utr' => ["transcript_5utr"] do |t|
626
648
  end
627
-
628
- file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr", "transcript_sequence"] do |t|
649
+ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr", "transcript_phase", "transcript_sequence"] do |t|
629
650
  transcript_5utr = TSV.open(File.expand_path('./transcript_5utr'), :unnamed => true)
630
651
  transcript_3utr = TSV.open(File.expand_path('./transcript_3utr'), :unnamed => true)
652
+ transcript_phase = TSV.open(File.expand_path('./transcript_phase'), :unnamed => true)
631
653
  transcript_sequence = TSV.open(File.expand_path('./transcript_sequence'), :unnamed => true)
632
654
  transcript_protein = TSV.open(File.expand_path('./transcripts'), :fields => ["Ensembl Protein ID"], :type => :single, :unnamed => true)
633
655
 
@@ -638,7 +660,12 @@ file 'protein_sequence' => ["transcripts", "transcript_5utr", "transcript_3utr",
638
660
  next if protein.nil? or protein.empty?
639
661
  utr5 = transcript_5utr[transcript]
640
662
  utr3 = transcript_3utr[transcript]
641
- psequence = Bio::Sequence::NA.new(sequence[utr5..sequence.length-utr3-1]).translate
663
+ phase = transcript_phase[transcript] || 0
664
+ if phase < 0
665
+ utr5 = - phase if utr5 == 0
666
+ phase = 0
667
+ end
668
+ psequence = Bio::Sequence::NA.new(("N" * phase) << sequence[utr5..sequence.length-utr3-1]).translate
642
669
  protein_sequence[protein]=psequence
643
670
  end
644
671
 
@@ -0,0 +1,14 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+
3
+ require 'rbbt/sources/gscholar'
4
+ require 'test/unit'
5
+
6
+ class TestGScholar < Test::Unit::TestCase
7
+ def test_citation
8
+ assert_match GoogleScholar.citation_link("Ten Years of Pathway Analysis: Current Approaches and Outstanding Challenges").to_s, /cites/
9
+ assert_match GoogleScholar.number_cites("Ten Years of Pathway Analysis: Current Approaches and Outstanding Challenges").to_s, /\d+/
10
+ end
11
+
12
+ end
13
+
14
+
@@ -61,6 +61,11 @@ class TestOrganism < Test::Unit::TestCase
61
61
  assert_equal mutation_19, Organism.liftOver([mutation_18], target_build, source_build).first
62
62
  end
63
63
 
64
+ def test_orhtolog
65
+ require 'rbbt/entity/gene'
66
+ assert_equal ["ENSG00000133703"], Gene.setup("Kras", "Associated Gene Name", "Mmu/jun2011").ensembl.ortholog("Hsa/jun2011")
67
+ end
68
+
64
69
  #def test_genes_at_chromosome
65
70
  # pos = [12, 117799500]
66
71
  # assert_equal "ENSG00000089250", Organism::Hsa.genes_at_chromosome_positions(pos.first, pos.last)
metadata CHANGED
@@ -1,20 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
5
- prerelease:
4
+ version: 2.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Miguel Vazquez
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-01-09 00:00:00.000000000 Z
11
+ date: 2013-10-21 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rbbt-util
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - ! '>='
20
18
  - !ruby/object:Gem::Version
@@ -22,7 +20,6 @@ dependencies:
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - ! '>='
28
25
  - !ruby/object:Gem::Version
@@ -30,7 +27,6 @@ dependencies:
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: rbbt-text
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
31
  - - ! '>='
36
32
  - !ruby/object:Gem::Version
@@ -38,7 +34,6 @@ dependencies:
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
38
  - - ! '>='
44
39
  - !ruby/object:Gem::Version
@@ -46,7 +41,6 @@ dependencies:
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: libxml-ruby
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
45
  - - ! '>='
52
46
  - !ruby/object:Gem::Version
@@ -54,7 +48,6 @@ dependencies:
54
48
  type: :runtime
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
52
  - - ! '>='
60
53
  - !ruby/object:Gem::Version
@@ -62,7 +55,6 @@ dependencies:
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: bio
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
59
  - - ! '>='
68
60
  - !ruby/object:Gem::Version
@@ -70,7 +62,6 @@ dependencies:
70
62
  type: :runtime
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
66
  - - ! '>='
76
67
  - !ruby/object:Gem::Version
@@ -78,7 +69,6 @@ dependencies:
78
69
  - !ruby/object:Gem::Dependency
79
70
  name: mechanize
80
71
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
72
  requirements:
83
73
  - - ! '>='
84
74
  - !ruby/object:Gem::Version
@@ -86,7 +76,6 @@ dependencies:
86
76
  type: :runtime
87
77
  prerelease: false
88
78
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
79
  requirements:
91
80
  - - ! '>='
92
81
  - !ruby/object:Gem::Version
@@ -111,6 +100,7 @@ files:
111
100
  - lib/rbbt/sources/biomart.rb
112
101
  - lib/rbbt/sources/cath.rb
113
102
  - lib/rbbt/sources/dbSNP.rb
103
+ - lib/rbbt/sources/ensembl.rb
114
104
  - lib/rbbt/sources/ensembl_ftp.rb
115
105
  - lib/rbbt/sources/entrez.rb
116
106
  - lib/rbbt/sources/genomes1000.rb
@@ -139,37 +129,38 @@ files:
139
129
  - test/rbbt/sources/test_go.rb
140
130
  - test/rbbt/sources/test_entrez.rb
141
131
  - test/rbbt/sources/test_biomart.rb
132
+ - test/rbbt/sources/test_gscholar.rb
142
133
  - test/rbbt/sources/test_organism.rb
143
134
  - test/rbbt/sources/test_pubmed.rb
144
135
  - test/test_helper.rb
145
136
  homepage: http://github.com/mikisvaz/rbbt-sources
146
137
  licenses: []
138
+ metadata: {}
147
139
  post_install_message:
148
140
  rdoc_options: []
149
141
  require_paths:
150
142
  - lib
151
143
  required_ruby_version: !ruby/object:Gem::Requirement
152
- none: false
153
144
  requirements:
154
145
  - - ! '>='
155
146
  - !ruby/object:Gem::Version
156
147
  version: '0'
157
148
  required_rubygems_version: !ruby/object:Gem::Requirement
158
- none: false
159
149
  requirements:
160
150
  - - ! '>='
161
151
  - !ruby/object:Gem::Version
162
152
  version: '0'
163
153
  requirements: []
164
154
  rubyforge_project:
165
- rubygems_version: 1.8.24
155
+ rubygems_version: 2.0.3
166
156
  signing_key:
167
- specification_version: 3
157
+ specification_version: 4
168
158
  summary: Data sources for the Ruby Bioinformatics Toolkit (rbbt)
169
159
  test_files:
170
160
  - test/rbbt/sources/test_go.rb
171
161
  - test/rbbt/sources/test_entrez.rb
172
162
  - test/rbbt/sources/test_biomart.rb
163
+ - test/rbbt/sources/test_gscholar.rb
173
164
  - test/rbbt/sources/test_organism.rb
174
165
  - test/rbbt/sources/test_pubmed.rb
175
166
  - test/test_helper.rb