miga-base 0.4.0.0 → 0.4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/dataset.rb +16 -12
- data/lib/miga/remote_dataset.rb +2 -2
- data/lib/miga/taxonomy.rb +58 -13
- data/lib/miga/version.rb +2 -2
- data/test/remote_dataset_test.rb +16 -9
- data/test/taxonomy_test.rb +35 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3c339de89529e19ad52d1d3522fbe25206045d3db3b030e4ee8c7ddf7d3c1db
|
4
|
+
data.tar.gz: d220804d9a9bc722496f271135da3c4415a77c84d40e49db5d39d015e13d9865
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 27b5d98711e579c077eda211387457578e70183a30459dc6c6e917b5fc20cdbc730d649dfd58b0a2477ea019e0577c1c4993fc6d29eaee9bbdeb77a753ddb107
|
7
|
+
data.tar.gz: 8a391e9090bdf8dbf555fcdd456c1ae7c909a69b1f59cf5eac039d6f18ffbd0b9caf04bdcad406485f914c1c14681105d086e2b36f82039102df4d6b29baace0
|
data/lib/miga/dataset.rb
CHANGED
@@ -48,8 +48,10 @@ class MiGA::Dataset < MiGA::MiGA
|
|
48
48
|
# be treated as reference (true, default) or query (false). Pass any
|
49
49
|
# additional +metadata+ as a Hash.
|
50
50
|
def initialize(project, name, is_ref=true, metadata={})
|
51
|
-
|
52
|
-
|
51
|
+
unless name.miga_name?
|
52
|
+
raise 'Invalid name, please use only alphanumerics and underscores: ' +
|
53
|
+
name.to_s
|
54
|
+
end
|
53
55
|
@project = project
|
54
56
|
@name = name
|
55
57
|
metadata[:ref] = is_ref
|
@@ -60,8 +62,9 @@ class MiGA::Dataset < MiGA::MiGA
|
|
60
62
|
##
|
61
63
|
# Save any changes you've made in the dataset.
|
62
64
|
def save
|
63
|
-
|
64
|
-
|
65
|
+
if t = metadata[:tax] and n = t[:ns] and n == 'COMMUNITY'
|
66
|
+
self.metadata[:type] = :metagenome
|
67
|
+
end
|
65
68
|
self.metadata.save
|
66
69
|
end
|
67
70
|
|
@@ -95,7 +98,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
95
98
|
# Get standard metadata values for the dataset as Array.
|
96
99
|
def info
|
97
100
|
MiGA::Dataset.INFO_FIELDS.map do |k|
|
98
|
-
(k==
|
101
|
+
(k == 'name') ? self.name : metadata[k.to_sym]
|
99
102
|
end
|
100
103
|
end
|
101
104
|
|
@@ -132,11 +135,11 @@ class MiGA::Dataset < MiGA::MiGA
|
|
132
135
|
def ignore_task?(task)
|
133
136
|
return true unless is_active?
|
134
137
|
return !metadata["run_#{task}"] unless metadata["run_#{task}"].nil?
|
135
|
-
return true if task
|
138
|
+
return true if task == :taxonomy and project.metadata[:ref_project].nil?
|
136
139
|
pattern = [true, false]
|
137
|
-
( [@@_EXCLUDE_NOREF_TASKS_H[task], is_ref? ]==pattern or
|
138
|
-
[@@_ONLY_MULTI_TASKS_H[task], is_multi? ]==pattern or
|
139
|
-
[@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?]==pattern )
|
140
|
+
( [@@_EXCLUDE_NOREF_TASKS_H[task], is_ref? ] == pattern or
|
141
|
+
[@@_ONLY_MULTI_TASKS_H[task], is_multi? ] == pattern or
|
142
|
+
[@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?] == pattern )
|
140
143
|
end
|
141
144
|
|
142
145
|
##
|
@@ -146,13 +149,14 @@ class MiGA::Dataset < MiGA::MiGA
|
|
146
149
|
# This function is currently only supported for query datasets when
|
147
150
|
# +ref_project+ is false (default), and only for reference dataset when
|
148
151
|
# +ref_project+ is true. It returns +nil+ if this analysis is not supported.
|
149
|
-
def closest_relatives(how_many=1, ref_project=false)
|
152
|
+
def closest_relatives(how_many = 1, ref_project = false)
|
150
153
|
return nil if (is_ref? != ref_project) or is_multi?
|
151
154
|
r = result(ref_project ? :taxonomy : :distances)
|
152
155
|
return nil if r.nil?
|
153
156
|
db = SQLite3::Database.new(r.file_path :aai_db)
|
154
|
-
db.execute(
|
155
|
-
|
157
|
+
db.execute(
|
158
|
+
'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
|
159
|
+
'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many])
|
156
160
|
end
|
157
161
|
|
158
162
|
end # class MiGA::Dataset
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -42,7 +42,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
42
42
|
@db = db.to_sym
|
43
43
|
@universe = universe.to_sym
|
44
44
|
@metadata = {}
|
45
|
-
@metadata[:"#{universe}_#{db}"] = ids.join(
|
45
|
+
@metadata[:"#{universe}_#{db}"] = ids.join(',')
|
46
46
|
@@UNIVERSE.keys.include?(@universe) or
|
47
47
|
raise "Unknown Universe: #{@universe}. Try: #{@@UNIVERSE.keys}"
|
48
48
|
@@UNIVERSE[@universe][:dbs].include?(@db) or
|
@@ -130,7 +130,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
130
130
|
def get_ncbi_taxonomy
|
131
131
|
tax_id = get_ncbi_taxid
|
132
132
|
return nil if tax_id.nil?
|
133
|
-
lineage = {}
|
133
|
+
lineage = {ns: 'ncbi'}
|
134
134
|
doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
|
135
135
|
doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
|
136
136
|
name = i.scan(%r{<ScientificName>(.*)</ScientificName>}).first.to_a.first
|
data/lib/miga/taxonomy.rb
CHANGED
@@ -38,7 +38,9 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
38
38
|
|
39
39
|
##
|
40
40
|
# Initialize from JSON-derived Hash +o+.
|
41
|
-
def self.json_create(o)
|
41
|
+
def self.json_create(o)
|
42
|
+
new(o['str'], nil, o['alt'])
|
43
|
+
end
|
42
44
|
|
43
45
|
##
|
44
46
|
# Returns cannonical rank (Symbol) for the +rank+ String.
|
@@ -63,8 +65,10 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
63
65
|
# space-delimited entries, the array is a vector of entries. Each entry can be
|
64
66
|
# either a rank:value pair (if +ranks+ is nil), or just values in the same
|
65
67
|
# order as ther ranks in +ranks+. Alternatively, +str+ as a Hash with rank =>
|
66
|
-
# value pairs is also supported.
|
67
|
-
|
68
|
+
# value pairs is also supported. If +alt+ is passed, it must be an Array of
|
69
|
+
# String, Array, or Hash entries as defined above (except +ranks+ are not
|
70
|
+
# allowed).
|
71
|
+
def initialize(str, ranks = nil, alt = [])
|
68
72
|
@ranks = {}
|
69
73
|
if ranks.nil?
|
70
74
|
case str when Array, Hash
|
@@ -74,12 +78,13 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
74
78
|
end
|
75
79
|
else
|
76
80
|
ranks = ranks.split(/\s+/) unless ranks.is_a? Array
|
77
|
-
str = str.split(/\s
|
81
|
+
str = str.split(/\s+/) unless str.is_a? Array
|
78
82
|
raise "Unequal number of ranks (#{ranks.size}) " +
|
79
83
|
"and names (#{str.size}):#{ranks} => #{str}" unless
|
80
84
|
ranks.size==str.size
|
81
85
|
(0 .. str.size).each{ |i| self << "#{ranks[i]}:#{str[i]}" }
|
82
86
|
end
|
87
|
+
@alt = (alt || []).map { |i| Taxonomy.new(i) }
|
83
88
|
end
|
84
89
|
|
85
90
|
##
|
@@ -89,7 +94,7 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
89
94
|
if value.is_a? Hash
|
90
95
|
value.each_pair do |rank_i, name_i|
|
91
96
|
next if name_i.nil? or name_i == ""
|
92
|
-
@ranks[ Taxonomy.normalize_rank rank_i ] = name_i.tr(
|
97
|
+
@ranks[ Taxonomy.normalize_rank rank_i ] = name_i.tr('_',' ')
|
93
98
|
end
|
94
99
|
elsif value.is_a? Array
|
95
100
|
value.each{ |v| self << v }
|
@@ -104,6 +109,26 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
104
109
|
##
|
105
110
|
# Get +rank+ value.
|
106
111
|
def [](rank) @ranks[ rank.to_sym ] ; end
|
112
|
+
|
113
|
+
##
|
114
|
+
# Get the alternative taxonomies.
|
115
|
+
# - If +which+ is nil (default), returns all alternative taxonomies as Array
|
116
|
+
# (not including the master taxonomy).
|
117
|
+
# - If +which+ is Integer, returns the indexed taxonomy
|
118
|
+
# (starting with 0, the master taxonomy).
|
119
|
+
# - Otherwise, returns the first taxonomy with namespace +which+ (coerced as
|
120
|
+
# String), including the master taxonomy.
|
121
|
+
# In the latter two cases it can be nil.
|
122
|
+
def alternative(which = nil)
|
123
|
+
case which
|
124
|
+
when nil
|
125
|
+
@alt
|
126
|
+
when Integer
|
127
|
+
([self] + @alt)[which]
|
128
|
+
else
|
129
|
+
([self] + @alt).find{ |i| i.namespace.to_s == which.to_s }
|
130
|
+
end
|
131
|
+
end
|
107
132
|
|
108
133
|
##
|
109
134
|
# Evaluates if the loaded taxonomy includes +taxon+. It assumes that +taxon+
|
@@ -116,30 +141,50 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
116
141
|
|
117
142
|
##
|
118
143
|
# Sorted list of ranks, as an Array of two-entry Arrays (rank and value).
|
119
|
-
|
144
|
+
# If +force_ranks+ is true, it returns all standard ranks even if undefined.
|
145
|
+
# If +with_namespace+ is true, it includes also the namespace.
|
146
|
+
def sorted_ranks(force_ranks = false, with_namespace = false)
|
120
147
|
@@KNOWN_RANKS.map do |r|
|
121
|
-
|
148
|
+
next if r == :ns and not with_namespace
|
149
|
+
next if ranks[r].nil? and not force_ranks
|
150
|
+
[r, ranks[r]]
|
122
151
|
end.compact
|
123
152
|
end
|
153
|
+
|
154
|
+
##
|
155
|
+
# Namespace of the taxonomy (a String) or +nil+.
|
156
|
+
def namespace; self[ :ns ] ; end
|
124
157
|
|
125
158
|
##
|
126
159
|
# Get the most general rank as a two-entry Array (rank and value).
|
127
|
-
|
160
|
+
# If +force_ranks+ is true, it always returns the value for domain (d)
|
161
|
+
# even if undefined.
|
162
|
+
def highest(force_ranks = false)
|
163
|
+
sorted_ranks.first
|
164
|
+
end
|
128
165
|
|
129
166
|
##
|
130
167
|
# Get the most specific rank as a two-entry Array (rank and value).
|
131
|
-
|
168
|
+
# If +force_ranks+ is true, it always returns the value for dataset (ds)
|
169
|
+
# even if undefined.
|
170
|
+
def lowest(force_ranks = false)
|
171
|
+
sorted_ranks(force_ranks).last
|
172
|
+
end
|
132
173
|
|
133
174
|
##
|
134
|
-
# Generate cannonical String for the taxonomy.
|
135
|
-
|
136
|
-
|
175
|
+
# Generate cannonical String for the taxonomy. If +force_ranks+ is true,
|
176
|
+
# it returns all the standard ranks even if undefined.
|
177
|
+
def to_s(force_ranks = false)
|
178
|
+
sorted_ranks(force_ranks, true).
|
179
|
+
map{ |r| "#{r[0]}:#{(r[1] || '').gsub(/[\s:]/, '_')}" }.join(' ')
|
137
180
|
end
|
138
181
|
|
139
182
|
##
|
140
183
|
# Generate JSON-formated String representing the taxonomy.
|
141
184
|
def to_json(*a)
|
142
|
-
{ JSON.create_id => self.class.name,
|
185
|
+
hsh = { JSON.create_id => self.class.name, 'str' => self.to_s }
|
186
|
+
hsh['alt'] = alternative.map(&:to_s) unless alternative.empty?
|
187
|
+
hsh.to_json(*a)
|
143
188
|
end
|
144
189
|
|
145
190
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.4,
|
13
|
+
VERSION = [0.4, 1, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2019, 8,
|
21
|
+
VERSION_DATE = Date.new(2019, 8, 17)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
data/test/remote_dataset_test.rb
CHANGED
@@ -24,28 +24,35 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
24
24
|
end
|
25
25
|
|
26
26
|
def test_bad_remote_dataset
|
27
|
-
assert_raise { MiGA::RemoteDataset.new(
|
28
|
-
assert_raise { MiGA::RemoteDataset.new(
|
27
|
+
assert_raise { MiGA::RemoteDataset.new('ids', :embl, :marvel) }
|
28
|
+
assert_raise { MiGA::RemoteDataset.new('ids', :google, :ebi) }
|
29
29
|
end
|
30
30
|
|
31
31
|
def test_rest
|
32
|
-
hiv2 =
|
32
|
+
hiv2 = 'M30502.1'
|
33
33
|
{embl: :ebi, nuccore: :ncbi}.each do |db, universe|
|
34
34
|
rd = MiGA::RemoteDataset.new(hiv2, db, universe)
|
35
35
|
assert_equal([hiv2], rd.ids)
|
36
|
-
omit_if(!$remote_tests,
|
36
|
+
omit_if(!$remote_tests, 'Remote access is error-prone.')
|
37
37
|
tx = rd.get_ncbi_taxonomy
|
38
|
-
|
39
|
-
assert_equal(
|
38
|
+
msg = "Failed on #{universe}:#{db}"
|
39
|
+
assert_equal(MiGA::Taxonomy, tx.class, msg)
|
40
|
+
assert_equal('Lentivirus', tx[:g], msg)
|
41
|
+
assert_equal('ns:ncbi o:Ortervirales f:Retroviridae ' \
|
42
|
+
'g:Lentivirus s:Human_immunodeficiency_virus_2', tx.to_s, msg)
|
43
|
+
assert_equal('ns:ncbi d: k: p: c: o:Ortervirales f:Retroviridae ' \
|
44
|
+
'g:Lentivirus s:Human_immunodeficiency_virus_2 ssp: str: ds:',
|
45
|
+
tx.to_s(true), msg)
|
46
|
+
assert_equal('ncbi', tx.namespace, msg)
|
40
47
|
end
|
41
48
|
end
|
42
49
|
|
43
50
|
def test_net_ftp
|
44
|
-
cjac =
|
45
|
-
n =
|
51
|
+
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/ga/GAPJ01.fasta.gz'
|
52
|
+
n = 'Cjac_L14'
|
46
53
|
rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
|
47
54
|
assert_equal([cjac], rd.ids)
|
48
|
-
omit_if(!$remote_tests,
|
55
|
+
omit_if(!$remote_tests, 'Remote access is error-prone.')
|
49
56
|
p = $p1
|
50
57
|
assert_nil(p.dataset(n))
|
51
58
|
rd.save_to(p, n)
|
data/test/taxonomy_test.rb
CHANGED
@@ -6,22 +6,32 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
6
6
|
def test_ranks
|
7
7
|
assert_respond_to(MiGA::Taxonomy, :KNOWN_RANKS)
|
8
8
|
assert(MiGA::Taxonomy.KNOWN_RANKS.include? :s)
|
9
|
-
assert_nil(MiGA::Taxonomy.normalize_rank
|
10
|
-
assert_nil(MiGA::Taxonomy.normalize_rank
|
9
|
+
assert_nil(MiGA::Taxonomy.normalize_rank 'No Rank')
|
10
|
+
assert_nil(MiGA::Taxonomy.normalize_rank 'Captain')
|
11
11
|
assert_equal(:f, MiGA::Taxonomy.normalize_rank(:Family))
|
12
12
|
end
|
13
13
|
|
14
14
|
def test_json
|
15
|
-
|
16
|
-
|
17
|
-
tx = JSON.parse(js, {:
|
15
|
+
txt = 'k:Fantasia f:Dragonaceae s:Dragonia_azura'
|
16
|
+
js = '{"json_class":"MiGA::Taxonomy","str":"' + txt + '"}'
|
17
|
+
tx = JSON.parse(js, {symbolize_names: false, create_additions: true})
|
18
18
|
assert_equal(MiGA::Taxonomy, tx.class)
|
19
|
-
assert_equal(
|
19
|
+
assert_equal('Dragonaceae', tx[:f])
|
20
20
|
assert_equal(js, tx.to_json)
|
21
21
|
end
|
22
22
|
|
23
|
+
def test_namespace
|
24
|
+
txt = 'ns:Irrealis k:Fantasia f:Dragonaceae s:Dragonia_azura'
|
25
|
+
tx = MiGA::Taxonomy.new(txt)
|
26
|
+
assert_equal(txt, tx.to_s)
|
27
|
+
assert_equal(
|
28
|
+
[[:k, 'Fantasia'],[:f, 'Dragonaceae'],[:s, 'Dragonia azura']],
|
29
|
+
tx.sorted_ranks)
|
30
|
+
assert_equal('Irrealis', tx.namespace)
|
31
|
+
end
|
32
|
+
|
23
33
|
def test_append
|
24
|
-
tx = MiGA::Taxonomy.new
|
34
|
+
tx = MiGA::Taxonomy.new ''
|
25
35
|
assert_equal("", "#{tx}")
|
26
36
|
tx << ["domain:Public","family:GNU"]
|
27
37
|
assert_equal("GNU", tx[:f])
|
@@ -44,4 +54,22 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
44
54
|
end
|
45
55
|
end
|
46
56
|
|
57
|
+
def test_alt
|
58
|
+
tx = MiGA::Taxonomy.new('ns:a s:Arnie', nil,
|
59
|
+
['ns:b s:Bernie','ns:c s:Cornie','s:Darnie'])
|
60
|
+
assert_equal('ns:a s:Arnie', tx.to_s)
|
61
|
+
assert_equal([[:s, 'Arnie']], tx.sorted_ranks)
|
62
|
+
assert_equal('ns:a s:Arnie', tx.alternative(0).to_s)
|
63
|
+
assert_equal('ns:b s:Bernie', tx.alternative(1).to_s)
|
64
|
+
assert_equal('ns:c s:Cornie', tx.alternative(:c).to_s)
|
65
|
+
assert_equal('s:Darnie', tx.alternative('').to_s)
|
66
|
+
assert_nil(tx.alternative(:x))
|
67
|
+
assert_equal(3, tx.alternative.size)
|
68
|
+
js = tx.to_json
|
69
|
+
tx_js = JSON.parse(js, {symbolize_names: false, create_additions: true})
|
70
|
+
assert_equal(tx.to_s, tx_js.to_s)
|
71
|
+
assert_equal(tx.alternative(2).to_s, tx_js.alternative(2).to_s)
|
72
|
+
assert_equal(tx.alternative.size, tx_js.alternative.size)
|
73
|
+
end
|
74
|
+
|
47
75
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|