rbbt-sources 3.2.3 → 3.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/sources/entrez.rb +26 -26
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fc74a3b7d26cc888bbd71f7aaa41918c2abb97e7a99abd027b9ea9086ee909e2
|
4
|
+
data.tar.gz: 4ed848475f09c4fcfb0c3c21cba8a515ee4005b475883ec148bb9b27808e24af
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e389de3b690237e299428a8967f8e62283cfb2be86aaf595b0360152efe0f08a294c8d853700652802885f772aeed1b74bab9f7a4e948a6c85053d300e7cf770
|
7
|
+
data.tar.gz: 11f70fd175e9c1cca781103da79f443558a5078ea2827bc2302e3a4e178f561bdc69bb7385c00e999a8067f938479d6f64baba418799e9bc32fd57bba6086566
|
data/lib/rbbt/sources/entrez.rb
CHANGED
@@ -2,7 +2,6 @@ require 'rbbt-util'
|
|
2
2
|
require 'rbbt/tsv'
|
3
3
|
require 'rbbt/resource'
|
4
4
|
require 'rbbt/util/filecache'
|
5
|
-
require 'rbbt/bow/bow'
|
6
5
|
require 'set'
|
7
6
|
|
8
7
|
module Entrez
|
@@ -15,7 +14,7 @@ module Entrez
|
|
15
14
|
|
16
15
|
taxs = [taxs] unless Array === taxs
|
17
16
|
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
|
18
|
-
|
17
|
+
|
19
18
|
tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
|
20
19
|
tsv.key_field = "Entrez Gene ID"
|
21
20
|
tsv.fields = ["Native ID"]
|
@@ -27,7 +26,7 @@ module Entrez
|
|
27
26
|
|
28
27
|
taxs = [taxs] unless Array === taxs
|
29
28
|
options.merge! :grep => taxs.collect{|t| "^" + t.to_s}, :fixed_grep => false
|
30
|
-
|
29
|
+
|
31
30
|
tsv = Rbbt.share.databases.entrez.gene_info.tsv :flat, options
|
32
31
|
tsv.key_field = "Entrez Gene ID"
|
33
32
|
tsv.fields = ["Associated Gene Name"]
|
@@ -43,7 +42,7 @@ module Entrez
|
|
43
42
|
|
44
43
|
Rbbt.share.databases.entrez.gene2pubmed.tsv :flat, options
|
45
44
|
end
|
46
|
-
|
45
|
+
|
47
46
|
class Gene
|
48
47
|
attr_reader :organism, :symbol, :description, :aka, :protnames, :summary, :comentaries
|
49
48
|
|
@@ -97,7 +96,7 @@ module Entrez
|
|
97
96
|
|
98
97
|
values.each do |xml|
|
99
98
|
geneid = xml.match(/<Gene-track_geneid>(\d+)/)[1]
|
100
|
-
|
99
|
+
|
101
100
|
result[geneid] = xml
|
102
101
|
end
|
103
102
|
|
@@ -114,30 +113,31 @@ module Entrez
|
|
114
113
|
end
|
115
114
|
end
|
116
115
|
|
117
|
-
# Counts the words in common between a chunk of text and the text
|
118
|
-
# found in Entrez Gene for that particular gene. The +gene+ may be a
|
119
|
-
# gene identifier or a Gene class instance.
|
120
|
-
def self.gene_text_similarity(gene, text)
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
116
|
+
# Counts the words in common between a chunk of text and the text
|
117
|
+
# found in Entrez Gene for that particular gene. The +gene+ may be a
|
118
|
+
# gene identifier or a Gene class instance.
|
119
|
+
def self.gene_text_similarity(gene, text)
|
120
|
+
require 'rbbt/bow/bow'
|
121
|
+
|
122
|
+
case
|
123
|
+
when Entrez::Gene === gene
|
124
|
+
gene_text = gene.text
|
125
|
+
when String === gene || Fixnum === gene
|
126
|
+
begin
|
127
|
+
gene_text = get_gene(gene).text
|
128
|
+
rescue NoMethodError, CMD::CMDError
|
129
|
+
return 0
|
130
|
+
end
|
131
|
+
else
|
129
132
|
return 0
|
130
133
|
end
|
131
|
-
else
|
132
|
-
return 0
|
133
|
-
end
|
134
134
|
|
135
|
-
|
136
|
-
|
135
|
+
gene_words = gene_text.words.to_set
|
136
|
+
text_words = text.words.to_set
|
137
137
|
|
138
|
-
|
138
|
+
return 0 if gene_words.empty? || text_words.empty?
|
139
139
|
|
140
|
-
|
141
|
-
|
142
|
-
end
|
140
|
+
common = gene_words.intersection(text_words)
|
141
|
+
common.length / (gene_words.length + text_words.length).to_f
|
142
|
+
end
|
143
143
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-sources
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2.
|
4
|
+
version: 3.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-03-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 4.0.0
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: mechanize
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: nokogiri
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|