rbbt-sources 3.1.52 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19722279e8b56d1b0cb9e72001e11333e8e436d3a59bf746be3ed744afd047e1
4
- data.tar.gz: 7eca716d5a2ee5ab6ba35547230d01466417f373df775fda110cae79366b56d5
3
+ metadata.gz: 973d827d476c42023692f813a588ad500da04dd27f6903e1da9c74673985b40e
4
+ data.tar.gz: ed995de19a62f17908c2b7ed3f327782c75ece60aa0f7c5f373cc9309228c8b5
5
5
  SHA512:
6
- metadata.gz: 9acfd6f4718444fed3891d431e49396d42293105767915375bf86d485e84ed58c5dbb81eff6509ced77e6ae15c38bbbcf91d52590aca6d53e0b3feb4db8c90b6
7
- data.tar.gz: 572c173002d2bad704df755542448fc294071c0849bfbac997ce254001591fb463ea8244c167066953224b1e758108ef91417a10b22c02bd0991d819efc68453
6
+ metadata.gz: cff68ce8c57381ec1581a6df61b56d40384b108030f87858ea1a735a82b8922b9d71a8b8a42930846f30186610058cbf738dccfb788ebf49e3f4d5e1b000809f
7
+ data.tar.gz: 771625cad70732c54cc2a1b798f6c0ac44004e73a9435e6699a7988a0904e987fecd9e375e6f888061b0f88a4c03ff3200322c59ca75a7f8f83dcdad8b19b156
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010-2022 Miguel Vázquez García
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -2,3 +2,4 @@ may2009
2
2
  feb2014
3
3
  may2017
4
4
  apr2019
5
+ feb2021
@@ -58,8 +58,10 @@ module ClinVar
58
58
  require 'rbbt/workflow'
59
59
  Workflow.require_workflow "Sequence"
60
60
  variants = ClinVar.hg19.snv_summary.produce
61
- muts = CMD.cmd('cut -f 1', :in => variants.open, :pipe => true)
62
- consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg19).clean.run(true)
61
+ muts = CMD.cmd("cut -f 1 #{ variants.find }", :pipe => true)
62
+ consequence = Sequence.job(:mutated_isoforms_fast, "Clinvar", :mutations => muts, :non_synonymous => true, :organism => ClinVar.organism_hg19)
63
+ consequence.run
64
+ iif consequence
63
65
 
64
66
  options = TSV.parse_header(variants).options.merge({:key_field => "Mutated Isoform"})
65
67
  fields = options[:fields].length
@@ -72,9 +72,10 @@ module Organism
72
72
 
73
73
  def self.hg_build(organism)
74
74
  require 'rbbt/sources/ensembl_ftp'
75
+ organism = organism.strip
75
76
  return organism if organism =~ /^hg\d\d$/
76
77
 
77
- return 'hg19' unless organism =~ /\//
78
+ return organism unless organism =~ /\//
78
79
 
79
80
  species, date = organism.split("/")
80
81
 
@@ -101,35 +102,43 @@ module Organism
101
102
  end
102
103
  end
103
104
 
104
- def self.GRC_build(organism)
105
+ def self.GRC_build(organism, with_release = false)
105
106
  require 'rbbt/sources/ensembl_ftp'
106
- return organism if organism =~ /^hg\d\d$/
107
+ return organism if organism =~ /^GRC$/
107
108
 
108
- return 'hg19' unless organism =~ /\//
109
+ if organism == "hg19" || organism == "b37"
110
+ return "GRCh37"
111
+ elsif organism == "hg38"
112
+ return "GRCh38"
113
+ end
109
114
 
110
- species, date = organism.split("/")
115
+ return self.GRC_build(default_code(organism)) unless organism =~ /\//
111
116
 
112
- case species
113
- when "Hsa"
114
- date = organism.split("/")[1]
117
+ species, date = organism.split("/")
115
118
 
116
- release = Ensembl.releases[date]
119
+ build = case species
120
+ when "Hsa"
121
+ date = organism.split("/")[1]
122
+
123
+ release = Ensembl.releases[date]
124
+
125
+ release_number = release.sub(/.*-/,'').to_i
126
+ if release_number <= 54
127
+ 'GRCh36'
128
+ elsif release_number <= 75
129
+ 'GRCh37'
130
+ else
131
+ 'GRCh38'
132
+ end
133
+ when "Mmu"
134
+ "GRCm38"
135
+ when "Rno"
136
+ "Rnor_6.0"
137
+ else
138
+ raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
139
+ end
117
140
 
118
- release_number = release.sub(/.*-/,'').to_i
119
- if release_number <= 54
120
- 'GRCh36'
121
- elsif release_number <= 75
122
- 'GRCh37'
123
- else
124
- 'GRCh38'
125
- end
126
- when "Mmu"
127
- "GRCm38"
128
- when "Rno"
129
- "Rnor_6.0"
130
- else
131
- raise "Only organism 'Hsa' (Homo sapiens) and Mmu (Mus musculus) supported"
132
- end
141
+ (release_number && with_release) ? build + "." + release_number.to_s : build
133
142
  end
134
143
 
135
144
  def self.organism_for_build(build)
@@ -332,4 +341,5 @@ module Organism
332
341
 
333
342
  chromosome_sizes
334
343
  end
344
+
335
345
  end
@@ -1,5 +1,4 @@
1
1
  require 'rbbt-util'
2
- require 'libxml'
3
2
  require 'rbbt/sources/gscholar'
4
3
  require 'rbbt/util/filecache'
5
4
 
@@ -53,32 +52,38 @@ module PubMed
53
52
  [lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
54
53
  end
55
54
  def self.parse_xml(xml)
56
- parser = LibXML::XML::Parser.string(xml)
57
- pubmed = parser.parse.find("/PubmedArticle").first
58
- medline = pubmed.find("MedlineCitation").first
59
- article = medline.find("Article").first
55
+ require 'nokogiri'
56
+
57
+ #parser = LibXML::XML::Parser.string(xml)
58
+ #pubmed = parser.parse.find("/PubmedArticle").first
59
+ #medline = parser.find("MedlineCitation").first
60
+ #article = medline.find("Article").first
61
+
62
+ parser = Nokogiri.XML(xml)
63
+ medline = parser.search("MedlineCitation").first
64
+ article = medline.search("Article").first
60
65
 
61
66
  info = {}
62
67
 
63
- info[:pmid] = medline.find("PMID").first.content
68
+ info[:pmid] = medline.search("PMID").first.content
64
69
 
65
70
  XML_KEYS.each do |p|
66
71
  name, key = p
67
- node = article.find(key).first
72
+ nodes = article.search(key)
68
73
 
69
- next if node.nil?
74
+ next if nodes.nil? || nodes.empty?
70
75
 
71
- info[name] = node.content
76
+ info[name] = nodes.collect{|n| n.content } * "\n\n"
72
77
  end
73
78
 
74
79
  bibentry = nil
75
- info[:author] = article.find("AuthorList/Author").collect do |author|
80
+ info[:author] = article.search("AuthorList/Author").collect do |author|
76
81
  begin
77
- lastname = author.find("LastName").first.content
78
- if author.find("ForeName").first.nil?
82
+ lastname = author.search("LastName").first.content
83
+ if author.search("ForeName").first.nil?
79
84
  forename = nil
80
85
  else
81
- forename = author.find("ForeName").first.content.split(/\s/).collect{|word| if word.length == 1; then word + '.'; else word; end} * " "
86
+ forename = author.search("ForeName").first.content.split(/\s/).collect{|word| if word.length == 1; then word + '.'; else word; end} * " "
82
87
  end
83
88
  bibentry ||= make_bibentry lastname, info[:year], info[:title]
84
89
  rescue
@@ -88,7 +93,7 @@ module PubMed
88
93
 
89
94
  info[:bibentry] = bibentry.downcase if bibentry
90
95
 
91
- info[:pmc_pdf] = pubmed.find("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
96
+ info[:pmc_pdf] = parser.search("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
92
97
 
93
98
  if info[:pmc_pdf]
94
99
  info[:pmc_pdf] = PMC_PDF_URL.sub(/PMCID/, info[:pmc_pdf].content)
@@ -270,7 +275,7 @@ module PubMed
270
275
  result[pmid] = xml
271
276
  end
272
277
 
273
- ids.each{|id| next if id.nil? or result[id]; fid = id.sub(/^0+/,''); next unless result[fid]; result[id] = result[fid]}
278
+ ids.each{|id| next if id.nil? or result[id]; fid = String === id ? id.sub(/^0+/,'') : id; next unless result[fid]; result[id] = result[fid]}
274
279
  ids.each{|id| next if id.nil? or result[id]; result[id] = ""}
275
280
 
276
281
  result
@@ -33,6 +33,8 @@ module UniProt
33
33
  "Ensembl Transcript ID"
34
34
  when "Ensembl_PRO"
35
35
  "Ensembl Protein ID"
36
+ when "GeneID"
37
+ "Entrez Gene ID"
36
38
  else
37
39
  field
38
40
  end
@@ -64,6 +66,11 @@ module UniProt
64
66
  tsv.to_s
65
67
  end
66
68
 
69
+ UniProt.claim UniProt.identifiers.Rno, :proc do
70
+ url = "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/RAT_10116_idmapping.dat.gz"
71
+ tsv = UniProt.get_organism_ids(url, "Rno")
72
+ tsv.to_s
73
+ end
67
74
 
68
75
  UniProt.claim UniProt.annotated_variants, :proc do
69
76
  url = "https://www.uniprot.org/docs/humsavar.txt"
@@ -1,5 +1,8 @@
1
1
  #: :type=:single
2
2
  #Release build
3
+ release-103 feb2021
4
+ release-102 nov2020
5
+ release-101 aug2020
3
6
  release-100 apr2020
4
7
  release-99 jan2020
5
8
  release-98 sep2019
@@ -169,11 +169,11 @@ end
169
169
  file 'lexicon' => 'identifiers' do |t|
170
170
  tsv = TSV.open(t.prerequisites.first).slice(["Associated Gene Name", "Entrez Gene Name Synonyms"])
171
171
 
172
- entrez_description = Rbbt.share.databases.entrez.gene_info.tsv :grep => $taxs.collect{|tax| "^#{tax}"}, :key_field => 1, :fields => 8
172
+ entrez_description = Rbbt.share.databases.entrez.gene_info.tsv :grep => $taxs.collect{|tax| "^#{tax}"}, :fixed_grep => false, :key_field => 1, :fields => 8
173
173
  entrez_description.key_field = "Entrez Gene ID"
174
174
  entrez_description.fields = ["Entrez Gene Description"]
175
175
 
176
- tsv.attach entrez_description
176
+ tsv = tsv.attach entrez_description
177
177
  Misc.sensiblewrite(t.name, tsv.to_s)
178
178
  end
179
179
 
@@ -220,7 +220,7 @@ file 'transcript_cds' do |t|
220
220
  end
221
221
 
222
222
  file 'gene_positions' do |t|
223
- sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_positions, [])
223
+ sequences = BioMart.tsv($biomart_db, $biomart_ensembl_gene, $biomart_gene_positions, [], nil, :type => :list, :namespace => Thread.current['namespace'])
224
224
 
225
225
  Misc.sensiblewrite(t.name, sequences.to_s)
226
226
  end
@@ -12,6 +12,12 @@ class TestPubMed < Test::Unit::TestCase
12
12
  pmids = ['16438716', 17204154]
13
13
  assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
14
14
  end
15
+
16
+ def test_get_multi_abstract
17
+ pmid = "32141403"
18
+
19
+ assert PubMed.get_article(pmid).abstract.include?("This study shows PCOS patients are at increased risk of incident schizophrenia, and the metformin treatment has a protective effect against incident schizophrenia.")
20
+ end
15
21
 
16
22
  def test_full_text
17
23
  pmid = '16438716'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.52
4
+ version: 3.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-04-06 00:00:00.000000000 Z
11
+ date: 2022-12-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: libxml-ruby
42
+ name: nokogiri
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -70,8 +70,10 @@ description: Data sources like PubMed, Entrez Gene, or Gene Ontology
70
70
  email: miguel.vazquez@fdi.ucm.es
71
71
  executables: []
72
72
  extensions: []
73
- extra_rdoc_files: []
73
+ extra_rdoc_files:
74
+ - LICENSE
74
75
  files:
76
+ - LICENSE
75
77
  - etc/allowed_biomart_archives
76
78
  - etc/biomart/missing_in_archive
77
79
  - etc/build_organism