miga-base 0.4.0.0 → 0.4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 37bf8085cc9b33f88367c8134cd2b01115cfcd9e9116fab7ba3f93abc33c6d6f
4
- data.tar.gz: bdefeaa9f965ea991071a70b48239ad7129763bcb05f0cc2ebcd6d66511bd3e6
3
+ metadata.gz: c3c339de89529e19ad52d1d3522fbe25206045d3db3b030e4ee8c7ddf7d3c1db
4
+ data.tar.gz: d220804d9a9bc722496f271135da3c4415a77c84d40e49db5d39d015e13d9865
5
5
  SHA512:
6
- metadata.gz: 9e1463de777f3d77bcb8f804bdfedeb3781ccf0b36330faf21d501b1a87cf9b55f0d03f2668a1889ae165cbaaf2030ab955ad8c0f42a5aaad341a6eb7154b356
7
- data.tar.gz: 6738493c3a716f553fea6486841f612616bad38cc4091103ec490417336df327ae03046a04907b9dcaed9c5d81e17ab918d64d841a67a50b23565dc46308736b
6
+ metadata.gz: 27b5d98711e579c077eda211387457578e70183a30459dc6c6e917b5fc20cdbc730d649dfd58b0a2477ea019e0577c1c4993fc6d29eaee9bbdeb77a753ddb107
7
+ data.tar.gz: 8a391e9090bdf8dbf555fcdd456c1ae7c909a69b1f59cf5eac039d6f18ffbd0b9caf04bdcad406485f914c1c14681105d086e2b36f82039102df4d6b29baace0
data/lib/miga/dataset.rb CHANGED
@@ -48,8 +48,10 @@ class MiGA::Dataset < MiGA::MiGA
48
48
  # be treated as reference (true, default) or query (false). Pass any
49
49
  # additional +metadata+ as a Hash.
50
50
  def initialize(project, name, is_ref=true, metadata={})
51
- raise "Invalid name '#{name}', please use only alphanumerics and " +
52
- "underscores." unless name.miga_name?
51
+ unless name.miga_name?
52
+ raise 'Invalid name, please use only alphanumerics and underscores: ' +
53
+ name.to_s
54
+ end
53
55
  @project = project
54
56
  @name = name
55
57
  metadata[:ref] = is_ref
@@ -60,8 +62,9 @@ class MiGA::Dataset < MiGA::MiGA
60
62
  ##
61
63
  # Save any changes you've made in the dataset.
62
64
  def save
63
- self.metadata[:type] = :metagenome if !metadata[:tax].nil? and
64
- !metadata[:tax][:ns].nil? and metadata[:tax][:ns]=="COMMUNITY"
65
+ if t = metadata[:tax] and n = t[:ns] and n == 'COMMUNITY'
66
+ self.metadata[:type] = :metagenome
67
+ end
65
68
  self.metadata.save
66
69
  end
67
70
 
@@ -95,7 +98,7 @@ class MiGA::Dataset < MiGA::MiGA
95
98
  # Get standard metadata values for the dataset as Array.
96
99
  def info
97
100
  MiGA::Dataset.INFO_FIELDS.map do |k|
98
- (k=="name") ? self.name : metadata[k.to_sym]
101
+ (k == 'name') ? self.name : metadata[k.to_sym]
99
102
  end
100
103
  end
101
104
 
@@ -132,11 +135,11 @@ class MiGA::Dataset < MiGA::MiGA
132
135
  def ignore_task?(task)
133
136
  return true unless is_active?
134
137
  return !metadata["run_#{task}"] unless metadata["run_#{task}"].nil?
135
- return true if task==:taxonomy and project.metadata[:ref_project].nil?
138
+ return true if task == :taxonomy and project.metadata[:ref_project].nil?
136
139
  pattern = [true, false]
137
- ( [@@_EXCLUDE_NOREF_TASKS_H[task], is_ref? ]==pattern or
138
- [@@_ONLY_MULTI_TASKS_H[task], is_multi? ]==pattern or
139
- [@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?]==pattern )
140
+ ( [@@_EXCLUDE_NOREF_TASKS_H[task], is_ref? ] == pattern or
141
+ [@@_ONLY_MULTI_TASKS_H[task], is_multi? ] == pattern or
142
+ [@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?] == pattern )
140
143
  end
141
144
 
142
145
  ##
@@ -146,13 +149,14 @@ class MiGA::Dataset < MiGA::MiGA
146
149
  # This function is currently only supported for query datasets when
147
150
  # +ref_project+ is false (default), and only for reference dataset when
148
151
  # +ref_project+ is true. It returns +nil+ if this analysis is not supported.
149
- def closest_relatives(how_many=1, ref_project=false)
152
+ def closest_relatives(how_many = 1, ref_project = false)
150
153
  return nil if (is_ref? != ref_project) or is_multi?
151
154
  r = result(ref_project ? :taxonomy : :distances)
152
155
  return nil if r.nil?
153
156
  db = SQLite3::Database.new(r.file_path :aai_db)
154
- db.execute("SELECT seq2, aai FROM aai WHERE seq2 != ? " +
155
- "GROUP BY seq2 ORDER BY aai DESC LIMIT ?", [name, how_many])
157
+ db.execute(
158
+ 'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
159
+ 'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many])
156
160
  end
157
161
 
158
162
  end # class MiGA::Dataset
@@ -42,7 +42,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
42
42
  @db = db.to_sym
43
43
  @universe = universe.to_sym
44
44
  @metadata = {}
45
- @metadata[:"#{universe}_#{db}"] = ids.join(",")
45
+ @metadata[:"#{universe}_#{db}"] = ids.join(',')
46
46
  @@UNIVERSE.keys.include?(@universe) or
47
47
  raise "Unknown Universe: #{@universe}. Try: #{@@UNIVERSE.keys}"
48
48
  @@UNIVERSE[@universe][:dbs].include?(@db) or
@@ -130,7 +130,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
130
130
  def get_ncbi_taxonomy
131
131
  tax_id = get_ncbi_taxid
132
132
  return nil if tax_id.nil?
133
- lineage = {}
133
+ lineage = {ns: 'ncbi'}
134
134
  doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
135
135
  doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
136
136
  name = i.scan(%r{<ScientificName>(.*)</ScientificName>}).first.to_a.first
data/lib/miga/taxonomy.rb CHANGED
@@ -38,7 +38,9 @@ class MiGA::Taxonomy < MiGA::MiGA
38
38
 
39
39
  ##
40
40
  # Initialize from JSON-derived Hash +o+.
41
- def self.json_create(o) new(o['str']) ; end
41
+ def self.json_create(o)
42
+ new(o['str'], nil, o['alt'])
43
+ end
42
44
 
43
45
  ##
44
46
  # Returns cannonical rank (Symbol) for the +rank+ String.
@@ -63,8 +65,10 @@ class MiGA::Taxonomy < MiGA::MiGA
63
65
  # space-delimited entries, the array is a vector of entries. Each entry can be
64
66
  # either a rank:value pair (if +ranks+ is nil), or just values in the same
65
67
  # order as ther ranks in +ranks+. Alternatively, +str+ as a Hash with rank =>
66
- # value pairs is also supported.
67
- def initialize(str, ranks = nil)
68
+ # value pairs is also supported. If +alt+ is passed, it must be an Array of
69
+ # String, Array, or Hash entries as defined above (except +ranks+ are not
70
+ # allowed).
71
+ def initialize(str, ranks = nil, alt = [])
68
72
  @ranks = {}
69
73
  if ranks.nil?
70
74
  case str when Array, Hash
@@ -74,12 +78,13 @@ class MiGA::Taxonomy < MiGA::MiGA
74
78
  end
75
79
  else
76
80
  ranks = ranks.split(/\s+/) unless ranks.is_a? Array
77
- str = str.split(/\s/) unless str.is_a? Array
81
+ str = str.split(/\s+/) unless str.is_a? Array
78
82
  raise "Unequal number of ranks (#{ranks.size}) " +
79
83
  "and names (#{str.size}):#{ranks} => #{str}" unless
80
84
  ranks.size==str.size
81
85
  (0 .. str.size).each{ |i| self << "#{ranks[i]}:#{str[i]}" }
82
86
  end
87
+ @alt = (alt || []).map { |i| Taxonomy.new(i) }
83
88
  end
84
89
 
85
90
  ##
@@ -89,7 +94,7 @@ class MiGA::Taxonomy < MiGA::MiGA
89
94
  if value.is_a? Hash
90
95
  value.each_pair do |rank_i, name_i|
91
96
  next if name_i.nil? or name_i == ""
92
- @ranks[ Taxonomy.normalize_rank rank_i ] = name_i.tr("_"," ")
97
+ @ranks[ Taxonomy.normalize_rank rank_i ] = name_i.tr('_',' ')
93
98
  end
94
99
  elsif value.is_a? Array
95
100
  value.each{ |v| self << v }
@@ -104,6 +109,26 @@ class MiGA::Taxonomy < MiGA::MiGA
104
109
  ##
105
110
  # Get +rank+ value.
106
111
  def [](rank) @ranks[ rank.to_sym ] ; end
112
+
113
+ ##
114
+ # Get the alternative taxonomies.
115
+ # - If +which+ is nil (default), returns all alternative taxonomies as Array
116
+ # (not including the master taxonomy).
117
+ # - If +which+ is Integer, returns the indexed taxonomy
118
+ # (starting with 0, the master taxonomy).
119
+ # - Otherwise, returns the first taxonomy with namespace +which+ (coerced as
120
+ # String), including the master taxonomy.
121
+ # In the latter two cases it can be nil.
122
+ def alternative(which = nil)
123
+ case which
124
+ when nil
125
+ @alt
126
+ when Integer
127
+ ([self] + @alt)[which]
128
+ else
129
+ ([self] + @alt).find{ |i| i.namespace.to_s == which.to_s }
130
+ end
131
+ end
107
132
 
108
133
  ##
109
134
  # Evaluates if the loaded taxonomy includes +taxon+. It assumes that +taxon+
@@ -116,30 +141,50 @@ class MiGA::Taxonomy < MiGA::MiGA
116
141
 
117
142
  ##
118
143
  # Sorted list of ranks, as an Array of two-entry Arrays (rank and value).
119
- def sorted_ranks
144
+ # If +force_ranks+ is true, it returns all standard ranks even if undefined.
145
+ # If +with_namespace+ is true, it includes also the namespace.
146
+ def sorted_ranks(force_ranks = false, with_namespace = false)
120
147
  @@KNOWN_RANKS.map do |r|
121
- ranks[r].nil? ? nil : [r, ranks[r]]
148
+ next if r == :ns and not with_namespace
149
+ next if ranks[r].nil? and not force_ranks
150
+ [r, ranks[r]]
122
151
  end.compact
123
152
  end
153
+
154
+ ##
155
+ # Namespace of the taxonomy (a String) or +nil+.
156
+ def namespace; self[ :ns ] ; end
124
157
 
125
158
  ##
126
159
  # Get the most general rank as a two-entry Array (rank and value).
127
- def highest; sorted_ranks.first ; end
160
+ # If +force_ranks+ is true, it always returns the value for domain (d)
161
+ # even if undefined.
162
+ def highest(force_ranks = false)
163
+ sorted_ranks.first
164
+ end
128
165
 
129
166
  ##
130
167
  # Get the most specific rank as a two-entry Array (rank and value).
131
- def lowest; sorted_ranks.last ; end
168
+ # If +force_ranks+ is true, it always returns the value for dataset (ds)
169
+ # even if undefined.
170
+ def lowest(force_ranks = false)
171
+ sorted_ranks(force_ranks).last
172
+ end
132
173
 
133
174
  ##
134
- # Generate cannonical String for the taxonomy.
135
- def to_s
136
- sorted_ranks.map{ |r| "#{r[0]}:#{r[1].gsub(/[\s:]/,"_")}" }.join(" ")
175
+ # Generate cannonical String for the taxonomy. If +force_ranks+ is true,
176
+ # it returns all the standard ranks even if undefined.
177
+ def to_s(force_ranks = false)
178
+ sorted_ranks(force_ranks, true).
179
+ map{ |r| "#{r[0]}:#{(r[1] || '').gsub(/[\s:]/, '_')}" }.join(' ')
137
180
  end
138
181
 
139
182
  ##
140
183
  # Generate JSON-formated String representing the taxonomy.
141
184
  def to_json(*a)
142
- { JSON.create_id => self.class.name, "str" => self.to_s }.to_json(*a)
185
+ hsh = { JSON.create_id => self.class.name, 'str' => self.to_s }
186
+ hsh['alt'] = alternative.map(&:to_s) unless alternative.empty?
187
+ hsh.to_json(*a)
143
188
  end
144
189
 
145
190
  end
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.4, 0, 0]
13
+ VERSION = [0.4, 1, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2019, 8, 15)
21
+ VERSION_DATE = Date.new(2019, 8, 17)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -24,28 +24,35 @@ class RemoteDatasetTest < Test::Unit::TestCase
24
24
  end
25
25
 
26
26
  def test_bad_remote_dataset
27
- assert_raise { MiGA::RemoteDataset.new("ids", :embl, :marvel) }
28
- assert_raise { MiGA::RemoteDataset.new("ids", :google, :ebi) }
27
+ assert_raise { MiGA::RemoteDataset.new('ids', :embl, :marvel) }
28
+ assert_raise { MiGA::RemoteDataset.new('ids', :google, :ebi) }
29
29
  end
30
30
 
31
31
  def test_rest
32
- hiv2 = "M30502.1"
32
+ hiv2 = 'M30502.1'
33
33
  {embl: :ebi, nuccore: :ncbi}.each do |db, universe|
34
34
  rd = MiGA::RemoteDataset.new(hiv2, db, universe)
35
35
  assert_equal([hiv2], rd.ids)
36
- omit_if(!$remote_tests, "Remote access is error-prone.")
36
+ omit_if(!$remote_tests, 'Remote access is error-prone.')
37
37
  tx = rd.get_ncbi_taxonomy
38
- assert_equal(MiGA::Taxonomy, tx.class, "Failed on #{universe}:#{db}")
39
- assert_equal("Lentivirus", tx[:g], "Failed on #{universe}:#{db}")
38
+ msg = "Failed on #{universe}:#{db}"
39
+ assert_equal(MiGA::Taxonomy, tx.class, msg)
40
+ assert_equal('Lentivirus', tx[:g], msg)
41
+ assert_equal('ns:ncbi o:Ortervirales f:Retroviridae ' \
42
+ 'g:Lentivirus s:Human_immunodeficiency_virus_2', tx.to_s, msg)
43
+ assert_equal('ns:ncbi d: k: p: c: o:Ortervirales f:Retroviridae ' \
44
+ 'g:Lentivirus s:Human_immunodeficiency_virus_2 ssp: str: ds:',
45
+ tx.to_s(true), msg)
46
+ assert_equal('ncbi', tx.namespace, msg)
40
47
  end
41
48
  end
42
49
 
43
50
  def test_net_ftp
44
- cjac = "ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/ga/GAPJ01.fasta.gz"
45
- n = "Cjac_L14"
51
+ cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/ga/GAPJ01.fasta.gz'
52
+ n = 'Cjac_L14'
46
53
  rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
47
54
  assert_equal([cjac], rd.ids)
48
- omit_if(!$remote_tests, "Remote access is error-prone.")
55
+ omit_if(!$remote_tests, 'Remote access is error-prone.')
49
56
  p = $p1
50
57
  assert_nil(p.dataset(n))
51
58
  rd.save_to(p, n)
@@ -6,22 +6,32 @@ class TaxonomyTest < Test::Unit::TestCase
6
6
  def test_ranks
7
7
  assert_respond_to(MiGA::Taxonomy, :KNOWN_RANKS)
8
8
  assert(MiGA::Taxonomy.KNOWN_RANKS.include? :s)
9
- assert_nil(MiGA::Taxonomy.normalize_rank "No Rank")
10
- assert_nil(MiGA::Taxonomy.normalize_rank "Captain")
9
+ assert_nil(MiGA::Taxonomy.normalize_rank 'No Rank')
10
+ assert_nil(MiGA::Taxonomy.normalize_rank 'Captain')
11
11
  assert_equal(:f, MiGA::Taxonomy.normalize_rank(:Family))
12
12
  end
13
13
 
14
14
  def test_json
15
- js = '{"json_class":"MiGA::Taxonomy",' +
16
- '"str":"k:Fantasia f:Dragonaceae s:Dragonia_azura"}'
17
- tx = JSON.parse(js, {:symbolize_names=>false, :create_additions=>true})
15
+ txt = 'k:Fantasia f:Dragonaceae s:Dragonia_azura'
16
+ js = '{"json_class":"MiGA::Taxonomy","str":"' + txt + '"}'
17
+ tx = JSON.parse(js, {symbolize_names: false, create_additions: true})
18
18
  assert_equal(MiGA::Taxonomy, tx.class)
19
- assert_equal("Dragonaceae", tx[:f])
19
+ assert_equal('Dragonaceae', tx[:f])
20
20
  assert_equal(js, tx.to_json)
21
21
  end
22
22
 
23
+ def test_namespace
24
+ txt = 'ns:Irrealis k:Fantasia f:Dragonaceae s:Dragonia_azura'
25
+ tx = MiGA::Taxonomy.new(txt)
26
+ assert_equal(txt, tx.to_s)
27
+ assert_equal(
28
+ [[:k, 'Fantasia'],[:f, 'Dragonaceae'],[:s, 'Dragonia azura']],
29
+ tx.sorted_ranks)
30
+ assert_equal('Irrealis', tx.namespace)
31
+ end
32
+
23
33
  def test_append
24
- tx = MiGA::Taxonomy.new ""
34
+ tx = MiGA::Taxonomy.new ''
25
35
  assert_equal("", "#{tx}")
26
36
  tx << ["domain:Public","family:GNU"]
27
37
  assert_equal("GNU", tx[:f])
@@ -44,4 +54,22 @@ class TaxonomyTest < Test::Unit::TestCase
44
54
  end
45
55
  end
46
56
 
57
+ def test_alt
58
+ tx = MiGA::Taxonomy.new('ns:a s:Arnie', nil,
59
+ ['ns:b s:Bernie','ns:c s:Cornie','s:Darnie'])
60
+ assert_equal('ns:a s:Arnie', tx.to_s)
61
+ assert_equal([[:s, 'Arnie']], tx.sorted_ranks)
62
+ assert_equal('ns:a s:Arnie', tx.alternative(0).to_s)
63
+ assert_equal('ns:b s:Bernie', tx.alternative(1).to_s)
64
+ assert_equal('ns:c s:Cornie', tx.alternative(:c).to_s)
65
+ assert_equal('s:Darnie', tx.alternative('').to_s)
66
+ assert_nil(tx.alternative(:x))
67
+ assert_equal(3, tx.alternative.size)
68
+ js = tx.to_json
69
+ tx_js = JSON.parse(js, {symbolize_names: false, create_additions: true})
70
+ assert_equal(tx.to_s, tx_js.to_s)
71
+ assert_equal(tx.alternative(2).to_s, tx_js.alternative(2).to_s)
72
+ assert_equal(tx.alternative.size, tx_js.alternative.size)
73
+ end
74
+
47
75
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0.0
4
+ version: 0.4.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-15 00:00:00.000000000 Z
11
+ date: 2019-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons