rbbt-sources 3.1.52 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19722279e8b56d1b0cb9e72001e11333e8e436d3a59bf746be3ed744afd047e1
4
- data.tar.gz: 7eca716d5a2ee5ab6ba35547230d01466417f373df775fda110cae79366b56d5
3
+ metadata.gz: 8d0830b8d8d26a21d37daf89ef7fcc0738cd05a6445608bc8c9de0284fc9e4c9
4
+ data.tar.gz: 9351cb34b08a7de18d3754c8ed2ed3924f1713fefe228f39887b6943cd042d45
5
5
  SHA512:
6
- metadata.gz: 9acfd6f4718444fed3891d431e49396d42293105767915375bf86d485e84ed58c5dbb81eff6509ced77e6ae15c38bbbcf91d52590aca6d53e0b3feb4db8c90b6
7
- data.tar.gz: 572c173002d2bad704df755542448fc294071c0849bfbac997ce254001591fb463ea8244c167066953224b1e758108ef91417a10b22c02bd0991d819efc68453
6
+ metadata.gz: 14b92f24a7278439d7026aeb70ad6aa2c5177f27d161f628c458d33cf45f7815349e54f3dc47dcc580e3a0da0f4c6fa759f22209338acba07671eb2dcef33d6a
7
+ data.tar.gz: 7d909c93f08ba0a6b7810dc77dd1ce9759d563ee4faf3078d4276df203e8b125c316f48465442628cb9a3249915dbda1d2c9e1d161b14d6b82a324568c8f0bb3
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010-2022 Miguel Vázquez García
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -2,3 +2,4 @@ may2009
2
2
  feb2014
3
3
  may2017
4
4
  apr2019
5
+ feb2021
@@ -58,8 +58,10 @@ module ClinVar
58
58
  require 'rbbt/workflow'
59
59
  Workflow.require_workflow "Sequence"
60
60
  variants = ClinVar.hg19.snv_summary.produce
61
- muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
62
- consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg19).clean.run(true)
61
+ muts = CMD.cmd("cut -f 1 #{ variants.find }", :pipe => true)
62
+ consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg19)
63
+ consequence.run
64
+ iif consequence
63
65
 
64
66
  options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
65
67
  fields = options[:fields].length
@@ -72,9 +72,10 @@ module Organism
72
72
 
73
73
  def self.hg_build(organism)
74
74
  require 'rbbt/sources/ensembl_ftp'
75
+ organism = organism.strip
75
76
  return organism if organism =~ /^hg\d\d$/
76
77
 
77
- return 'hg19' unless organism =~ /\//
78
+ return organism unless organism =~ /\//
78
79
 
79
80
  species, date = organism.split("/")
80
81
 
@@ -101,35 +102,43 @@ module Organism
101
102
  end
102
103
  end
103
104
 
104
- def self.GRC_build(organism)
105
+ def self.GRC_build(organism, with_release = false)
105
106
  require 'rbbt/sources/ensembl_ftp'
106
- return organism if organism =~ /^hg\d\d$/
107
+ return organism if organism =~ /^GRC$/
107
108
 
108
- return 'hg19' unless organism =~ /\//
109
+ if organism == "hg19" || organism == "b37"
110
+ return "GRCh37"
111
+ elsif organism == "hg38"
112
+ return "GRCh38"
113
+ end
109
114
 
110
- species, date = organism.split("/")
115
+ return self.GRC_build(default_code(organism)) unless organism =~ /\//
111
116
 
112
- case species
113
- when "Hsa"
114
- date = organism.split("/")[1]
117
+ species, date = organism.split("/")
115
118
 
116
- release = Ensembl.releases[date]
119
+ build = case species
120
+ when "Hsa"
121
+ date = organism.split("/")[1]
122
+
123
+ release = Ensembl.releases[date]
124
+
125
+ release_number = release.sub(/.*-/,'').to_i
126
+ if release_number <= 54
127
+ 'GRCh36'
128
+ elsif release_number <= 75
129
+ 'GRCh37'
130
+ else
131
+ 'GRCh38'
132
+ end
133
+ when "Mmu"
134
+ "GRCm38"
135
+ when "Rno"
136
+ "Rnor_6.0"
137
+ else
138
+ raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
139
+ end
117
140
 
118
- release_number = release.sub(/.*-/,'').to_i
119
- if release_number <= 54
120
- 'GRCh36'
121
- elsif release_number <= 75
122
- 'GRCh37'
123
- else
124
- 'GRCh38'
125
- end
126
- when "Mmu"
127
- "GRCm38"
128
- when "Rno"
129
- "Rnor_6.0"
130
- else
131
- raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
132
- end
141
+ (release_number && with_release) ? build + "." + release_number.to_s : build
133
142
  end
134
143
 
135
144
  def self.organism_for_build(build)
@@ -332,4 +341,5 @@ module Organism
332
341
 
333
342
  chromosome_sizes
334
343
  end
344
+
335
345
  end
@@ -1,5 +1,4 @@
1
1
  require 'rbbt-util'
2
- require 'libxml'
3
2
  require 'rbbt/sources/gscholar'
4
3
  require 'rbbt/util/filecache'
5
4
 
@@ -53,18 +52,24 @@ module PubMed
53
52
  [lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
54
53
  end
55
54
  def self.parse_xml(xml)
56
- parser = LibXML::XML::Parser.string(xml)
57
- pubmed = parser.parse.find("/PubmedArticle").first
58
- medline = pubmed.find("MedlineCitation").first
59
- article = medline.find("Article").first
55
+ require 'nokogiri'
56
+
57
+ #parser = LibXML::XML::Parser.string(xml)
58
+ #pubmed = parser.parse.find("/PubmedArticle").first
59
+ #medline = parser.find("MedlineCitation").first
60
+ #article = medline.find("Article").first
61
+
62
+ parser = Nokogiri.XML(xml)
63
+ medline = parser.search("MedlineCitation").first
64
+ article = medline.search("Article").first
60
65
 
61
66
  info = {}
62
67
 
63
- info[:pmid] = medline.find("PMID").first.content
68
+ info[:pmid] = medline.search("PMID").first.content
64
69
 
65
70
  XML_KEYS.each do |p|
66
71
  name, key = p
67
- node = article.find(key).first
72
+ node = article.search(key).first
68
73
 
69
74
  next if node.nil?
70
75
 
@@ -72,13 +77,13 @@ module PubMed
72
77
  end
73
78
 
74
79
  bibentry = nil
75
- info[:author] = article.find("AuthorList/Author").collect do |author|
80
+ info[:author] = article.search("AuthorList/Author").collect do |author|
76
81
  begin
77
- lastname = author.find("LastName").first.content
78
- if author.find("ForeName").first.nil?
82
+ lastname = author.search("LastName").first.content
83
+ if author.search("ForeName").first.nil?
79
84
  forename = nil
80
85
  else
81
- forename = author.find("ForeName").first.content.split(/\s/).collect{|word| if word.length == 1; then word + '.'; else word; end} * " "
86
+ forename = author.search("ForeName").first.content.split(/\s/).collect{|word| if word.length == 1; then word + '.'; else word; end} * " "
82
87
  end
83
88
  bibentry ||= make_bibentry lastname, info[:year], info[:title]
84
89
  rescue
@@ -88,7 +93,7 @@ module PubMed
88
93
 
89
94
  info[:bibentry] = bibentry.downcase if bibentry
90
95
 
91
- info[:pmc_pdf] = pubmed.find("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
96
+ info[:pmc_pdf] = parser.search("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
92
97
 
93
98
  if info[:pmc_pdf]
94
99
  info[:pmc_pdf] = PMC_PDF_URL.sub(/PMCID/, info[:pmc_pdf].content)
@@ -270,7 +275,7 @@ module PubMed
270
275
  result[pmid] = xml
271
276
  end
272
277
 
273
- ids.each{|id| next if id.nil? or result[id]; fid = id.sub(/^0+/,''); next unless result[fid]; result[id] = result[fid]}
278
+ ids.each{|id| next if id.nil? or result[id]; fid = String === id ? id.sub(/^0+/,'') : id; next unless result[fid]; result[id] = result[fid]}
274
279
  ids.each{|id| next if id.nil? or result[id]; result[id] = ""}
275
280
 
276
281
  result
@@ -33,6 +33,8 @@ module UniProt
33
33
  "Ensembl Transcript ID"
34
34
  when "Ensembl_PRO"
35
35
  "Ensembl Protein ID"
36
+ when "GeneID"
37
+ "Entrez Gene ID"
36
38
  else
37
39
  field
38
40
  end
@@ -64,6 +66,11 @@ module UniProt
64
66
  tsv.to_s
65
67
  end
66
68
 
69
+ UniProt.claim UniProt.identifiers.Rno, :proc do
70
+ url = "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/RAT_10116_idmapping.dat.gz"
71
+ tsv = UniProt.get_organism_ids(url, "Rno")
72
+ tsv.to_s
73
+ end
67
74
 
68
75
  UniProt.claim UniProt.annotated_variants, :proc do
69
76
  url = "https://www.uniprot.org/docs/humsavar.txt"
@@ -1,5 +1,8 @@
1
1
  #: :type=:single
2
2
  #Release build
3
+ release-103 feb2021
4
+ release-102 nov2020
5
+ release-101 aug2020
3
6
  release-100 apr2020
4
7
  release-99 jan2020
5
8
  release-98 sep2019
@@ -169,11 +169,11 @@ end
169
169
  file 'lexicon' => 'identifiers' do |t|
170
170
  tsv = TSV.open(t.prerequisites.first).slice(["Associated Gene Name", "Entrez Gene Name Synonyms"])
171
171
 
172
- entrez_description = Rbbt.share.databases.entrez.gene_info.tsv :grep => $taxs.collect{|tax| "^#{tax}"}, :key_field => 1, :fields => 8
172
+ entrez_description = Rbbt.share.databases.entrez.gene_info.tsv :grep => $taxs.collect{|tax| "^#{tax}"}, :fixed_grep => false, :key_field => 1, :fields => 8
173
173
  entrez_description.key_field = "Entrez Gene ID"
174
174
  entrez_description.fields = ["Entrez Gene Description"]
175
175
 
176
- tsv.attach entrez_description
176
+ tsv = tsv.attach entrez_description
177
177
  Misc.sensiblewrite(t.name, tsv.to_s)
178
178
  end
179
179
 
@@ -220,7 +220,7 @@ file 'transcript_cds' do |t|
220
220
  end
221
221
 
222
222
  file 'gene_positions' do |t|
223
- sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_positions, [])
223
+ sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_positions, [], nil, :type => :list, :namespace => Thread.current['namespace'])
224
224
 
225
225
  Misc.sensiblewrite(t.name, sequences.to_s)
226
226
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.52
4
+ version: 3.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-04-06 00:00:00.000000000 Z
11
+ date: 2022-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: libxml-ruby
42
+ name: nokogiri
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -70,8 +70,10 @@ description: Data sources like PubMed, Entrez Gene, or Gene Ontology
70
70
  email: miguel.vazquez@fdi.ucm.es
71
71
  executables: []
72
72
  extensions: []
73
- extra_rdoc_files: []
73
+ extra_rdoc_files:
74
+ - LICENSE
74
75
  files:
76
+ - LICENSE
75
77
  - etc/allowed_biomart_archives
76
78
  - etc/biomart/missing_in_archive
77
79
  - etc/build_organism