miga-base 0.4.0.0 → 0.4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/dataset.rb +16 -12
- data/lib/miga/remote_dataset.rb +2 -2
- data/lib/miga/taxonomy.rb +58 -13
- data/lib/miga/version.rb +2 -2
- data/test/remote_dataset_test.rb +16 -9
- data/test/taxonomy_test.rb +35 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3c339de89529e19ad52d1d3522fbe25206045d3db3b030e4ee8c7ddf7d3c1db
|
4
|
+
data.tar.gz: d220804d9a9bc722496f271135da3c4415a77c84d40e49db5d39d015e13d9865
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 27b5d98711e579c077eda211387457578e70183a30459dc6c6e917b5fc20cdbc730d649dfd58b0a2477ea019e0577c1c4993fc6d29eaee9bbdeb77a753ddb107
|
7
|
+
data.tar.gz: 8a391e9090bdf8dbf555fcdd456c1ae7c909a69b1f59cf5eac039d6f18ffbd0b9caf04bdcad406485f914c1c14681105d086e2b36f82039102df4d6b29baace0
|
data/lib/miga/dataset.rb
CHANGED
@@ -48,8 +48,10 @@ class MiGA::Dataset < MiGA::MiGA
|
|
48
48
|
# be treated as reference (true, default) or query (false). Pass any
|
49
49
|
# additional +metadata+ as a Hash.
|
50
50
|
def initialize(project, name, is_ref=true, metadata={})
|
51
|
-
|
52
|
-
|
51
|
+
unless name.miga_name?
|
52
|
+
raise 'Invalid name, please use only alphanumerics and underscores: ' +
|
53
|
+
name.to_s
|
54
|
+
end
|
53
55
|
@project = project
|
54
56
|
@name = name
|
55
57
|
metadata[:ref] = is_ref
|
@@ -60,8 +62,9 @@ class MiGA::Dataset < MiGA::MiGA
|
|
60
62
|
##
|
61
63
|
# Save any changes you've made in the dataset.
|
62
64
|
def save
|
63
|
-
|
64
|
-
|
65
|
+
if t = metadata[:tax] and n = t[:ns] and n == 'COMMUNITY'
|
66
|
+
self.metadata[:type] = :metagenome
|
67
|
+
end
|
65
68
|
self.metadata.save
|
66
69
|
end
|
67
70
|
|
@@ -95,7 +98,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
95
98
|
# Get standard metadata values for the dataset as Array.
|
96
99
|
def info
|
97
100
|
MiGA::Dataset.INFO_FIELDS.map do |k|
|
98
|
-
(k==
|
101
|
+
(k == 'name') ? self.name : metadata[k.to_sym]
|
99
102
|
end
|
100
103
|
end
|
101
104
|
|
@@ -132,11 +135,11 @@ class MiGA::Dataset < MiGA::MiGA
|
|
132
135
|
def ignore_task?(task)
|
133
136
|
return true unless is_active?
|
134
137
|
return !metadata["run_#{task}"] unless metadata["run_#{task}"].nil?
|
135
|
-
return true if task
|
138
|
+
return true if task == :taxonomy and project.metadata[:ref_project].nil?
|
136
139
|
pattern = [true, false]
|
137
|
-
( [@@_EXCLUDE_NOREF_TASKS_H[task], is_ref? ]==pattern or
|
138
|
-
[@@_ONLY_MULTI_TASKS_H[task], is_multi? ]==pattern or
|
139
|
-
[@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?]==pattern )
|
140
|
+
( [@@_EXCLUDE_NOREF_TASKS_H[task], is_ref? ] == pattern or
|
141
|
+
[@@_ONLY_MULTI_TASKS_H[task], is_multi? ] == pattern or
|
142
|
+
[@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?] == pattern )
|
140
143
|
end
|
141
144
|
|
142
145
|
##
|
@@ -146,13 +149,14 @@ class MiGA::Dataset < MiGA::MiGA
|
|
146
149
|
# This function is currently only supported for query datasets when
|
147
150
|
# +ref_project+ is false (default), and only for reference dataset when
|
148
151
|
# +ref_project+ is true. It returns +nil+ if this analysis is not supported.
|
149
|
-
def closest_relatives(how_many=1, ref_project=false)
|
152
|
+
def closest_relatives(how_many = 1, ref_project = false)
|
150
153
|
return nil if (is_ref? != ref_project) or is_multi?
|
151
154
|
r = result(ref_project ? :taxonomy : :distances)
|
152
155
|
return nil if r.nil?
|
153
156
|
db = SQLite3::Database.new(r.file_path :aai_db)
|
154
|
-
db.execute(
|
155
|
-
|
157
|
+
db.execute(
|
158
|
+
'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
|
159
|
+
'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many])
|
156
160
|
end
|
157
161
|
|
158
162
|
end # class MiGA::Dataset
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -42,7 +42,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
42
42
|
@db = db.to_sym
|
43
43
|
@universe = universe.to_sym
|
44
44
|
@metadata = {}
|
45
|
-
@metadata[:"#{universe}_#{db}"] = ids.join(
|
45
|
+
@metadata[:"#{universe}_#{db}"] = ids.join(',')
|
46
46
|
@@UNIVERSE.keys.include?(@universe) or
|
47
47
|
raise "Unknown Universe: #{@universe}. Try: #{@@UNIVERSE.keys}"
|
48
48
|
@@UNIVERSE[@universe][:dbs].include?(@db) or
|
@@ -130,7 +130,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
130
130
|
def get_ncbi_taxonomy
|
131
131
|
tax_id = get_ncbi_taxid
|
132
132
|
return nil if tax_id.nil?
|
133
|
-
lineage = {}
|
133
|
+
lineage = {ns: 'ncbi'}
|
134
134
|
doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
|
135
135
|
doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
|
136
136
|
name = i.scan(%r{<ScientificName>(.*)</ScientificName>}).first.to_a.first
|
data/lib/miga/taxonomy.rb
CHANGED
@@ -38,7 +38,9 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
38
38
|
|
39
39
|
##
|
40
40
|
# Initialize from JSON-derived Hash +o+.
|
41
|
-
def self.json_create(o)
|
41
|
+
def self.json_create(o)
|
42
|
+
new(o['str'], nil, o['alt'])
|
43
|
+
end
|
42
44
|
|
43
45
|
##
|
44
46
|
# Returns cannonical rank (Symbol) for the +rank+ String.
|
@@ -63,8 +65,10 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
63
65
|
# space-delimited entries, the array is a vector of entries. Each entry can be
|
64
66
|
# either a rank:value pair (if +ranks+ is nil), or just values in the same
|
65
67
|
# order as ther ranks in +ranks+. Alternatively, +str+ as a Hash with rank =>
|
66
|
-
# value pairs is also supported.
|
67
|
-
|
68
|
+
# value pairs is also supported. If +alt+ is passed, it must be an Array of
|
69
|
+
# String, Array, or Hash entries as defined above (except +ranks+ are not
|
70
|
+
# allowed).
|
71
|
+
def initialize(str, ranks = nil, alt = [])
|
68
72
|
@ranks = {}
|
69
73
|
if ranks.nil?
|
70
74
|
case str when Array, Hash
|
@@ -74,12 +78,13 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
74
78
|
end
|
75
79
|
else
|
76
80
|
ranks = ranks.split(/\s+/) unless ranks.is_a? Array
|
77
|
-
str = str.split(/\s
|
81
|
+
str = str.split(/\s+/) unless str.is_a? Array
|
78
82
|
raise "Unequal number of ranks (#{ranks.size}) " +
|
79
83
|
"and names (#{str.size}):#{ranks} => #{str}" unless
|
80
84
|
ranks.size==str.size
|
81
85
|
(0 .. str.size).each{ |i| self << "#{ranks[i]}:#{str[i]}" }
|
82
86
|
end
|
87
|
+
@alt = (alt || []).map { |i| Taxonomy.new(i) }
|
83
88
|
end
|
84
89
|
|
85
90
|
##
|
@@ -89,7 +94,7 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
89
94
|
if value.is_a? Hash
|
90
95
|
value.each_pair do |rank_i, name_i|
|
91
96
|
next if name_i.nil? or name_i == ""
|
92
|
-
@ranks[ Taxonomy.normalize_rank rank_i ] = name_i.tr(
|
97
|
+
@ranks[ Taxonomy.normalize_rank rank_i ] = name_i.tr('_',' ')
|
93
98
|
end
|
94
99
|
elsif value.is_a? Array
|
95
100
|
value.each{ |v| self << v }
|
@@ -104,6 +109,26 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
104
109
|
##
|
105
110
|
# Get +rank+ value.
|
106
111
|
def [](rank) @ranks[ rank.to_sym ] ; end
|
112
|
+
|
113
|
+
##
|
114
|
+
# Get the alternative taxonomies.
|
115
|
+
# - If +which+ is nil (default), returns all alternative taxonomies as Array
|
116
|
+
# (not including the master taxonomy).
|
117
|
+
# - If +which+ is Integer, returns the indexed taxonomy
|
118
|
+
# (starting with 0, the master taxonomy).
|
119
|
+
# - Otherwise, returns the first taxonomy with namespace +which+ (coerced as
|
120
|
+
# String), including the master taxonomy.
|
121
|
+
# In the latter two cases it can be nil.
|
122
|
+
def alternative(which = nil)
|
123
|
+
case which
|
124
|
+
when nil
|
125
|
+
@alt
|
126
|
+
when Integer
|
127
|
+
([self] + @alt)[which]
|
128
|
+
else
|
129
|
+
([self] + @alt).find{ |i| i.namespace.to_s == which.to_s }
|
130
|
+
end
|
131
|
+
end
|
107
132
|
|
108
133
|
##
|
109
134
|
# Evaluates if the loaded taxonomy includes +taxon+. It assumes that +taxon+
|
@@ -116,30 +141,50 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
116
141
|
|
117
142
|
##
|
118
143
|
# Sorted list of ranks, as an Array of two-entry Arrays (rank and value).
|
119
|
-
|
144
|
+
# If +force_ranks+ is true, it returns all standard ranks even if undefined.
|
145
|
+
# If +with_namespace+ is true, it includes also the namespace.
|
146
|
+
def sorted_ranks(force_ranks = false, with_namespace = false)
|
120
147
|
@@KNOWN_RANKS.map do |r|
|
121
|
-
|
148
|
+
next if r == :ns and not with_namespace
|
149
|
+
next if ranks[r].nil? and not force_ranks
|
150
|
+
[r, ranks[r]]
|
122
151
|
end.compact
|
123
152
|
end
|
153
|
+
|
154
|
+
##
|
155
|
+
# Namespace of the taxonomy (a String) or +nil+.
|
156
|
+
def namespace; self[ :ns ] ; end
|
124
157
|
|
125
158
|
##
|
126
159
|
# Get the most general rank as a two-entry Array (rank and value).
|
127
|
-
|
160
|
+
# If +force_ranks+ is true, it always returns the value for domain (d)
|
161
|
+
# even if undefined.
|
162
|
+
def highest(force_ranks = false)
|
163
|
+
sorted_ranks.first
|
164
|
+
end
|
128
165
|
|
129
166
|
##
|
130
167
|
# Get the most specific rank as a two-entry Array (rank and value).
|
131
|
-
|
168
|
+
# If +force_ranks+ is true, it always returns the value for dataset (ds)
|
169
|
+
# even if undefined.
|
170
|
+
def lowest(force_ranks = false)
|
171
|
+
sorted_ranks(force_ranks).last
|
172
|
+
end
|
132
173
|
|
133
174
|
##
|
134
|
-
# Generate cannonical String for the taxonomy.
|
135
|
-
|
136
|
-
|
175
|
+
# Generate cannonical String for the taxonomy. If +force_ranks+ is true,
|
176
|
+
# it returns all the standard ranks even if undefined.
|
177
|
+
def to_s(force_ranks = false)
|
178
|
+
sorted_ranks(force_ranks, true).
|
179
|
+
map{ |r| "#{r[0]}:#{(r[1] || '').gsub(/[\s:]/, '_')}" }.join(' ')
|
137
180
|
end
|
138
181
|
|
139
182
|
##
|
140
183
|
# Generate JSON-formated String representing the taxonomy.
|
141
184
|
def to_json(*a)
|
142
|
-
{ JSON.create_id => self.class.name,
|
185
|
+
hsh = { JSON.create_id => self.class.name, 'str' => self.to_s }
|
186
|
+
hsh['alt'] = alternative.map(&:to_s) unless alternative.empty?
|
187
|
+
hsh.to_json(*a)
|
143
188
|
end
|
144
189
|
|
145
190
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.4,
|
13
|
+
VERSION = [0.4, 1, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2019, 8,
|
21
|
+
VERSION_DATE = Date.new(2019, 8, 17)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
data/test/remote_dataset_test.rb
CHANGED
@@ -24,28 +24,35 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
24
24
|
end
|
25
25
|
|
26
26
|
def test_bad_remote_dataset
|
27
|
-
assert_raise { MiGA::RemoteDataset.new(
|
28
|
-
assert_raise { MiGA::RemoteDataset.new(
|
27
|
+
assert_raise { MiGA::RemoteDataset.new('ids', :embl, :marvel) }
|
28
|
+
assert_raise { MiGA::RemoteDataset.new('ids', :google, :ebi) }
|
29
29
|
end
|
30
30
|
|
31
31
|
def test_rest
|
32
|
-
hiv2 =
|
32
|
+
hiv2 = 'M30502.1'
|
33
33
|
{embl: :ebi, nuccore: :ncbi}.each do |db, universe|
|
34
34
|
rd = MiGA::RemoteDataset.new(hiv2, db, universe)
|
35
35
|
assert_equal([hiv2], rd.ids)
|
36
|
-
omit_if(!$remote_tests,
|
36
|
+
omit_if(!$remote_tests, 'Remote access is error-prone.')
|
37
37
|
tx = rd.get_ncbi_taxonomy
|
38
|
-
|
39
|
-
assert_equal(
|
38
|
+
msg = "Failed on #{universe}:#{db}"
|
39
|
+
assert_equal(MiGA::Taxonomy, tx.class, msg)
|
40
|
+
assert_equal('Lentivirus', tx[:g], msg)
|
41
|
+
assert_equal('ns:ncbi o:Ortervirales f:Retroviridae ' \
|
42
|
+
'g:Lentivirus s:Human_immunodeficiency_virus_2', tx.to_s, msg)
|
43
|
+
assert_equal('ns:ncbi d: k: p: c: o:Ortervirales f:Retroviridae ' \
|
44
|
+
'g:Lentivirus s:Human_immunodeficiency_virus_2 ssp: str: ds:',
|
45
|
+
tx.to_s(true), msg)
|
46
|
+
assert_equal('ncbi', tx.namespace, msg)
|
40
47
|
end
|
41
48
|
end
|
42
49
|
|
43
50
|
def test_net_ftp
|
44
|
-
cjac =
|
45
|
-
n =
|
51
|
+
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/ga/GAPJ01.fasta.gz'
|
52
|
+
n = 'Cjac_L14'
|
46
53
|
rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
|
47
54
|
assert_equal([cjac], rd.ids)
|
48
|
-
omit_if(!$remote_tests,
|
55
|
+
omit_if(!$remote_tests, 'Remote access is error-prone.')
|
49
56
|
p = $p1
|
50
57
|
assert_nil(p.dataset(n))
|
51
58
|
rd.save_to(p, n)
|
data/test/taxonomy_test.rb
CHANGED
@@ -6,22 +6,32 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
6
6
|
def test_ranks
|
7
7
|
assert_respond_to(MiGA::Taxonomy, :KNOWN_RANKS)
|
8
8
|
assert(MiGA::Taxonomy.KNOWN_RANKS.include? :s)
|
9
|
-
assert_nil(MiGA::Taxonomy.normalize_rank
|
10
|
-
assert_nil(MiGA::Taxonomy.normalize_rank
|
9
|
+
assert_nil(MiGA::Taxonomy.normalize_rank 'No Rank')
|
10
|
+
assert_nil(MiGA::Taxonomy.normalize_rank 'Captain')
|
11
11
|
assert_equal(:f, MiGA::Taxonomy.normalize_rank(:Family))
|
12
12
|
end
|
13
13
|
|
14
14
|
def test_json
|
15
|
-
|
16
|
-
|
17
|
-
tx = JSON.parse(js, {:
|
15
|
+
txt = 'k:Fantasia f:Dragonaceae s:Dragonia_azura'
|
16
|
+
js = '{"json_class":"MiGA::Taxonomy","str":"' + txt + '"}'
|
17
|
+
tx = JSON.parse(js, {symbolize_names: false, create_additions: true})
|
18
18
|
assert_equal(MiGA::Taxonomy, tx.class)
|
19
|
-
assert_equal(
|
19
|
+
assert_equal('Dragonaceae', tx[:f])
|
20
20
|
assert_equal(js, tx.to_json)
|
21
21
|
end
|
22
22
|
|
23
|
+
def test_namespace
|
24
|
+
txt = 'ns:Irrealis k:Fantasia f:Dragonaceae s:Dragonia_azura'
|
25
|
+
tx = MiGA::Taxonomy.new(txt)
|
26
|
+
assert_equal(txt, tx.to_s)
|
27
|
+
assert_equal(
|
28
|
+
[[:k, 'Fantasia'],[:f, 'Dragonaceae'],[:s, 'Dragonia azura']],
|
29
|
+
tx.sorted_ranks)
|
30
|
+
assert_equal('Irrealis', tx.namespace)
|
31
|
+
end
|
32
|
+
|
23
33
|
def test_append
|
24
|
-
tx = MiGA::Taxonomy.new
|
34
|
+
tx = MiGA::Taxonomy.new ''
|
25
35
|
assert_equal("", "#{tx}")
|
26
36
|
tx << ["domain:Public","family:GNU"]
|
27
37
|
assert_equal("GNU", tx[:f])
|
@@ -44,4 +54,22 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
44
54
|
end
|
45
55
|
end
|
46
56
|
|
57
|
+
def test_alt
|
58
|
+
tx = MiGA::Taxonomy.new('ns:a s:Arnie', nil,
|
59
|
+
['ns:b s:Bernie','ns:c s:Cornie','s:Darnie'])
|
60
|
+
assert_equal('ns:a s:Arnie', tx.to_s)
|
61
|
+
assert_equal([[:s, 'Arnie']], tx.sorted_ranks)
|
62
|
+
assert_equal('ns:a s:Arnie', tx.alternative(0).to_s)
|
63
|
+
assert_equal('ns:b s:Bernie', tx.alternative(1).to_s)
|
64
|
+
assert_equal('ns:c s:Cornie', tx.alternative(:c).to_s)
|
65
|
+
assert_equal('s:Darnie', tx.alternative('').to_s)
|
66
|
+
assert_nil(tx.alternative(:x))
|
67
|
+
assert_equal(3, tx.alternative.size)
|
68
|
+
js = tx.to_json
|
69
|
+
tx_js = JSON.parse(js, {symbolize_names: false, create_additions: true})
|
70
|
+
assert_equal(tx.to_s, tx_js.to_s)
|
71
|
+
assert_equal(tx.alternative(2).to_s, tx_js.alternative(2).to_s)
|
72
|
+
assert_equal(tx.alternative.size, tx_js.alternative.size)
|
73
|
+
end
|
74
|
+
|
47
75
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|