miga-base 0.4.0.0 → 0.4.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 37bf8085cc9b33f88367c8134cd2b01115cfcd9e9116fab7ba3f93abc33c6d6f
4
- data.tar.gz: bdefeaa9f965ea991071a70b48239ad7129763bcb05f0cc2ebcd6d66511bd3e6
3
+ metadata.gz: c3c339de89529e19ad52d1d3522fbe25206045d3db3b030e4ee8c7ddf7d3c1db
4
+ data.tar.gz: d220804d9a9bc722496f271135da3c4415a77c84d40e49db5d39d015e13d9865
5
5
  SHA512:
6
- metadata.gz: 9e1463de777f3d77bcb8f804bdfedeb3781ccf0b36330faf21d501b1a87cf9b55f0d03f2668a1889ae165cbaaf2030ab955ad8c0f42a5aaad341a6eb7154b356
7
- data.tar.gz: 6738493c3a716f553fea6486841f612616bad38cc4091103ec490417336df327ae03046a04907b9dcaed9c5d81e17ab918d64d841a67a50b23565dc46308736b
6
+ metadata.gz: 27b5d98711e579c077eda211387457578e70183a30459dc6c6e917b5fc20cdbc730d649dfd58b0a2477ea019e0577c1c4993fc6d29eaee9bbdeb77a753ddb107
7
+ data.tar.gz: 8a391e9090bdf8dbf555fcdd456c1ae7c909a69b1f59cf5eac039d6f18ffbd0b9caf04bdcad406485f914c1c14681105d086e2b36f82039102df4d6b29baace0
data/lib/miga/dataset.rb CHANGED
@@ -48,8 +48,10 @@ class MiGA::Dataset < MiGA::MiGA
48
48
  # be treated as reference (true, default) or query (false). Pass any
49
49
  # additional +metadata+ as a Hash.
50
50
  def initialize(project, name, is_ref=true, metadata={})
51
- raise "Invalid name '#{name}', please use only alphanumerics and " +
52
- "underscores." unless name.miga_name?
51
+ unless name.miga_name?
52
+ raise 'Invalid name, please use only alphanumerics and underscores: ' +
53
+ name.to_s
54
+ end
53
55
  @project = project
54
56
  @name = name
55
57
  metadata[:ref] = is_ref
@@ -60,8 +62,9 @@ class MiGA::Dataset < MiGA::MiGA
60
62
  ##
61
63
  # Save any changes you've made in the dataset.
62
64
  def save
63
- self.metadata[:type] = :metagenome if !metadata[:tax].nil? and
64
- !metadata[:tax][:ns].nil? and metadata[:tax][:ns]=="COMMUNITY"
65
+ if t = metadata[:tax] and n = t[:ns] and n == 'COMMUNITY'
66
+ self.metadata[:type] = :metagenome
67
+ end
65
68
  self.metadata.save
66
69
  end
67
70
 
@@ -95,7 +98,7 @@ class MiGA::Dataset < MiGA::MiGA
95
98
  # Get standard metadata values for the dataset as Array.
96
99
  def info
97
100
  MiGA::Dataset.INFO_FIELDS.map do |k|
98
- (k=="name") ? self.name : metadata[k.to_sym]
101
+ (k == 'name') ? self.name : metadata[k.to_sym]
99
102
  end
100
103
  end
101
104
 
@@ -132,11 +135,11 @@ class MiGA::Dataset < MiGA::MiGA
132
135
  def ignore_task?(task)
133
136
  return true unless is_active?
134
137
  return !metadata["run_#{task}"] unless metadata["run_#{task}"].nil?
135
- return true if task==:taxonomy and project.metadata[:ref_project].nil?
138
+ return true if task == :taxonomy and project.metadata[:ref_project].nil?
136
139
  pattern = [true, false]
137
- ( [@@_EXCLUDE_NOREF_TASKS_H[task], is_ref? ]==pattern or
138
- [@@_ONLY_MULTI_TASKS_H[task], is_multi? ]==pattern or
139
- [@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?]==pattern )
140
+ ( [@@_EXCLUDE_NOREF_TASKS_H[task], is_ref? ] == pattern or
141
+ [@@_ONLY_MULTI_TASKS_H[task], is_multi? ] == pattern or
142
+ [@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?] == pattern )
140
143
  end
141
144
 
142
145
  ##
@@ -146,13 +149,14 @@ class MiGA::Dataset < MiGA::MiGA
146
149
  # This function is currently only supported for query datasets when
147
150
  # +ref_project+ is false (default), and only for reference dataset when
148
151
  # +ref_project+ is true. It returns +nil+ if this analysis is not supported.
149
- def closest_relatives(how_many=1, ref_project=false)
152
+ def closest_relatives(how_many = 1, ref_project = false)
150
153
  return nil if (is_ref? != ref_project) or is_multi?
151
154
  r = result(ref_project ? :taxonomy : :distances)
152
155
  return nil if r.nil?
153
156
  db = SQLite3::Database.new(r.file_path :aai_db)
154
- db.execute("SELECT seq2, aai FROM aai WHERE seq2 != ? " +
155
- "GROUP BY seq2 ORDER BY aai DESC LIMIT ?", [name, how_many])
157
+ db.execute(
158
+ 'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
159
+ 'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many])
156
160
  end
157
161
 
158
162
  end # class MiGA::Dataset
@@ -42,7 +42,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
42
42
  @db = db.to_sym
43
43
  @universe = universe.to_sym
44
44
  @metadata = {}
45
- @metadata[:"#{universe}_#{db}"] = ids.join(",")
45
+ @metadata[:"#{universe}_#{db}"] = ids.join(',')
46
46
  @@UNIVERSE.keys.include?(@universe) or
47
47
  raise "Unknown Universe: #{@universe}. Try: #{@@UNIVERSE.keys}"
48
48
  @@UNIVERSE[@universe][:dbs].include?(@db) or
@@ -130,7 +130,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
130
130
  def get_ncbi_taxonomy
131
131
  tax_id = get_ncbi_taxid
132
132
  return nil if tax_id.nil?
133
- lineage = {}
133
+ lineage = {ns: 'ncbi'}
134
134
  doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
135
135
  doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
136
136
  name = i.scan(%r{<ScientificName>(.*)</ScientificName>}).first.to_a.first
data/lib/miga/taxonomy.rb CHANGED
@@ -38,7 +38,9 @@ class MiGA::Taxonomy < MiGA::MiGA
38
38
 
39
39
  ##
40
40
  # Initialize from JSON-derived Hash +o+.
41
- def self.json_create(o) new(o['str']) ; end
41
+ def self.json_create(o)
42
+ new(o['str'], nil, o['alt'])
43
+ end
42
44
 
43
45
  ##
44
46
  # Returns cannonical rank (Symbol) for the +rank+ String.
@@ -63,8 +65,10 @@ class MiGA::Taxonomy < MiGA::MiGA
63
65
  # space-delimited entries, the array is a vector of entries. Each entry can be
64
66
  # either a rank:value pair (if +ranks+ is nil), or just values in the same
65
67
  # order as ther ranks in +ranks+. Alternatively, +str+ as a Hash with rank =>
66
- # value pairs is also supported.
67
- def initialize(str, ranks = nil)
68
+ # value pairs is also supported. If +alt+ is passed, it must be an Array of
69
+ # String, Array, or Hash entries as defined above (except +ranks+ are not
70
+ # allowed).
71
+ def initialize(str, ranks = nil, alt = [])
68
72
  @ranks = {}
69
73
  if ranks.nil?
70
74
  case str when Array, Hash
@@ -74,12 +78,13 @@ class MiGA::Taxonomy < MiGA::MiGA
74
78
  end
75
79
  else
76
80
  ranks = ranks.split(/\s+/) unless ranks.is_a? Array
77
- str = str.split(/\s/) unless str.is_a? Array
81
+ str = str.split(/\s+/) unless str.is_a? Array
78
82
  raise "Unequal number of ranks (#{ranks.size}) " +
79
83
  "and names (#{str.size}):#{ranks} => #{str}" unless
80
84
  ranks.size==str.size
81
85
  (0 .. str.size).each{ |i| self << "#{ranks[i]}:#{str[i]}" }
82
86
  end
87
+ @alt = (alt || []).map { |i| Taxonomy.new(i) }
83
88
  end
84
89
 
85
90
  ##
@@ -89,7 +94,7 @@ class MiGA::Taxonomy < MiGA::MiGA
89
94
  if value.is_a? Hash
90
95
  value.each_pair do |rank_i, name_i|
91
96
  next if name_i.nil? or name_i == ""
92
- @ranks[ Taxonomy.normalize_rank rank_i ] = name_i.tr("_"," ")
97
+ @ranks[ Taxonomy.normalize_rank rank_i ] = name_i.tr('_',' ')
93
98
  end
94
99
  elsif value.is_a? Array
95
100
  value.each{ |v| self << v }
@@ -104,6 +109,26 @@ class MiGA::Taxonomy < MiGA::MiGA
104
109
  ##
105
110
  # Get +rank+ value.
106
111
  def [](rank) @ranks[ rank.to_sym ] ; end
112
+
113
+ ##
114
+ # Get the alternative taxonomies.
115
+ # - If +which+ is nil (default), returns all alternative taxonomies as Array
116
+ # (not including the master taxonomy).
117
+ # - If +which+ is Integer, returns the indexed taxonomy
118
+ # (starting with 0, the master taxonomy).
119
+ # - Otherwise, returns the first taxonomy with namespace +which+ (coerced as
120
+ # String), including the master taxonomy.
121
+ # In the latter two cases it can be nil.
122
+ def alternative(which = nil)
123
+ case which
124
+ when nil
125
+ @alt
126
+ when Integer
127
+ ([self] + @alt)[which]
128
+ else
129
+ ([self] + @alt).find{ |i| i.namespace.to_s == which.to_s }
130
+ end
131
+ end
107
132
 
108
133
  ##
109
134
  # Evaluates if the loaded taxonomy includes +taxon+. It assumes that +taxon+
@@ -116,30 +141,50 @@ class MiGA::Taxonomy < MiGA::MiGA
116
141
 
117
142
  ##
118
143
  # Sorted list of ranks, as an Array of two-entry Arrays (rank and value).
119
- def sorted_ranks
144
+ # If +force_ranks+ is true, it returns all standard ranks even if undefined.
145
+ # If +with_namespace+ is true, it includes also the namespace.
146
+ def sorted_ranks(force_ranks = false, with_namespace = false)
120
147
  @@KNOWN_RANKS.map do |r|
121
- ranks[r].nil? ? nil : [r, ranks[r]]
148
+ next if r == :ns and not with_namespace
149
+ next if ranks[r].nil? and not force_ranks
150
+ [r, ranks[r]]
122
151
  end.compact
123
152
  end
153
+
154
+ ##
155
+ # Namespace of the taxonomy (a String) or +nil+.
156
+ def namespace; self[ :ns ] ; end
124
157
 
125
158
  ##
126
159
  # Get the most general rank as a two-entry Array (rank and value).
127
- def highest; sorted_ranks.first ; end
160
+ # If +force_ranks+ is true, it always returns the value for domain (d)
161
+ # even if undefined.
162
+ def highest(force_ranks = false)
163
+ sorted_ranks.first
164
+ end
128
165
 
129
166
  ##
130
167
  # Get the most specific rank as a two-entry Array (rank and value).
131
- def lowest; sorted_ranks.last ; end
168
+ # If +force_ranks+ is true, it always returns the value for dataset (ds)
169
+ # even if undefined.
170
+ def lowest(force_ranks = false)
171
+ sorted_ranks(force_ranks).last
172
+ end
132
173
 
133
174
  ##
134
- # Generate cannonical String for the taxonomy.
135
- def to_s
136
- sorted_ranks.map{ |r| "#{r[0]}:#{r[1].gsub(/[\s:]/,"_")}" }.join(" ")
175
+ # Generate cannonical String for the taxonomy. If +force_ranks+ is true,
176
+ # it returns all the standard ranks even if undefined.
177
+ def to_s(force_ranks = false)
178
+ sorted_ranks(force_ranks, true).
179
+ map{ |r| "#{r[0]}:#{(r[1] || '').gsub(/[\s:]/, '_')}" }.join(' ')
137
180
  end
138
181
 
139
182
  ##
140
183
  # Generate JSON-formated String representing the taxonomy.
141
184
  def to_json(*a)
142
- { JSON.create_id => self.class.name, "str" => self.to_s }.to_json(*a)
185
+ hsh = { JSON.create_id => self.class.name, 'str' => self.to_s }
186
+ hsh['alt'] = alternative.map(&:to_s) unless alternative.empty?
187
+ hsh.to_json(*a)
143
188
  end
144
189
 
145
190
  end
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.4, 0, 0]
13
+ VERSION = [0.4, 1, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2019, 8, 15)
21
+ VERSION_DATE = Date.new(2019, 8, 17)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -24,28 +24,35 @@ class RemoteDatasetTest < Test::Unit::TestCase
24
24
  end
25
25
 
26
26
  def test_bad_remote_dataset
27
- assert_raise { MiGA::RemoteDataset.new("ids", :embl, :marvel) }
28
- assert_raise { MiGA::RemoteDataset.new("ids", :google, :ebi) }
27
+ assert_raise { MiGA::RemoteDataset.new('ids', :embl, :marvel) }
28
+ assert_raise { MiGA::RemoteDataset.new('ids', :google, :ebi) }
29
29
  end
30
30
 
31
31
  def test_rest
32
- hiv2 = "M30502.1"
32
+ hiv2 = 'M30502.1'
33
33
  {embl: :ebi, nuccore: :ncbi}.each do |db, universe|
34
34
  rd = MiGA::RemoteDataset.new(hiv2, db, universe)
35
35
  assert_equal([hiv2], rd.ids)
36
- omit_if(!$remote_tests, "Remote access is error-prone.")
36
+ omit_if(!$remote_tests, 'Remote access is error-prone.')
37
37
  tx = rd.get_ncbi_taxonomy
38
- assert_equal(MiGA::Taxonomy, tx.class, "Failed on #{universe}:#{db}")
39
- assert_equal("Lentivirus", tx[:g], "Failed on #{universe}:#{db}")
38
+ msg = "Failed on #{universe}:#{db}"
39
+ assert_equal(MiGA::Taxonomy, tx.class, msg)
40
+ assert_equal('Lentivirus', tx[:g], msg)
41
+ assert_equal('ns:ncbi o:Ortervirales f:Retroviridae ' \
42
+ 'g:Lentivirus s:Human_immunodeficiency_virus_2', tx.to_s, msg)
43
+ assert_equal('ns:ncbi d: k: p: c: o:Ortervirales f:Retroviridae ' \
44
+ 'g:Lentivirus s:Human_immunodeficiency_virus_2 ssp: str: ds:',
45
+ tx.to_s(true), msg)
46
+ assert_equal('ncbi', tx.namespace, msg)
40
47
  end
41
48
  end
42
49
 
43
50
  def test_net_ftp
44
- cjac = "ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/ga/GAPJ01.fasta.gz"
45
- n = "Cjac_L14"
51
+ cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/ga/GAPJ01.fasta.gz'
52
+ n = 'Cjac_L14'
46
53
  rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
47
54
  assert_equal([cjac], rd.ids)
48
- omit_if(!$remote_tests, "Remote access is error-prone.")
55
+ omit_if(!$remote_tests, 'Remote access is error-prone.')
49
56
  p = $p1
50
57
  assert_nil(p.dataset(n))
51
58
  rd.save_to(p, n)
@@ -6,22 +6,32 @@ class TaxonomyTest < Test::Unit::TestCase
6
6
  def test_ranks
7
7
  assert_respond_to(MiGA::Taxonomy, :KNOWN_RANKS)
8
8
  assert(MiGA::Taxonomy.KNOWN_RANKS.include? :s)
9
- assert_nil(MiGA::Taxonomy.normalize_rank "No Rank")
10
- assert_nil(MiGA::Taxonomy.normalize_rank "Captain")
9
+ assert_nil(MiGA::Taxonomy.normalize_rank 'No Rank')
10
+ assert_nil(MiGA::Taxonomy.normalize_rank 'Captain')
11
11
  assert_equal(:f, MiGA::Taxonomy.normalize_rank(:Family))
12
12
  end
13
13
 
14
14
  def test_json
15
- js = '{"json_class":"MiGA::Taxonomy",' +
16
- '"str":"k:Fantasia f:Dragonaceae s:Dragonia_azura"}'
17
- tx = JSON.parse(js, {:symbolize_names=>false, :create_additions=>true})
15
+ txt = 'k:Fantasia f:Dragonaceae s:Dragonia_azura'
16
+ js = '{"json_class":"MiGA::Taxonomy","str":"' + txt + '"}'
17
+ tx = JSON.parse(js, {symbolize_names: false, create_additions: true})
18
18
  assert_equal(MiGA::Taxonomy, tx.class)
19
- assert_equal("Dragonaceae", tx[:f])
19
+ assert_equal('Dragonaceae', tx[:f])
20
20
  assert_equal(js, tx.to_json)
21
21
  end
22
22
 
23
+ def test_namespace
24
+ txt = 'ns:Irrealis k:Fantasia f:Dragonaceae s:Dragonia_azura'
25
+ tx = MiGA::Taxonomy.new(txt)
26
+ assert_equal(txt, tx.to_s)
27
+ assert_equal(
28
+ [[:k, 'Fantasia'],[:f, 'Dragonaceae'],[:s, 'Dragonia azura']],
29
+ tx.sorted_ranks)
30
+ assert_equal('Irrealis', tx.namespace)
31
+ end
32
+
23
33
  def test_append
24
- tx = MiGA::Taxonomy.new ""
34
+ tx = MiGA::Taxonomy.new ''
25
35
  assert_equal("", "#{tx}")
26
36
  tx << ["domain:Public","family:GNU"]
27
37
  assert_equal("GNU", tx[:f])
@@ -44,4 +54,22 @@ class TaxonomyTest < Test::Unit::TestCase
44
54
  end
45
55
  end
46
56
 
57
+ def test_alt
58
+ tx = MiGA::Taxonomy.new('ns:a s:Arnie', nil,
59
+ ['ns:b s:Bernie','ns:c s:Cornie','s:Darnie'])
60
+ assert_equal('ns:a s:Arnie', tx.to_s)
61
+ assert_equal([[:s, 'Arnie']], tx.sorted_ranks)
62
+ assert_equal('ns:a s:Arnie', tx.alternative(0).to_s)
63
+ assert_equal('ns:b s:Bernie', tx.alternative(1).to_s)
64
+ assert_equal('ns:c s:Cornie', tx.alternative(:c).to_s)
65
+ assert_equal('s:Darnie', tx.alternative('').to_s)
66
+ assert_nil(tx.alternative(:x))
67
+ assert_equal(3, tx.alternative.size)
68
+ js = tx.to_json
69
+ tx_js = JSON.parse(js, {symbolize_names: false, create_additions: true})
70
+ assert_equal(tx.to_s, tx_js.to_s)
71
+ assert_equal(tx.alternative(2).to_s, tx_js.alternative(2).to_s)
72
+ assert_equal(tx.alternative.size, tx_js.alternative.size)
73
+ end
74
+
47
75
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0.0
4
+ version: 0.4.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-15 00:00:00.000000000 Z
11
+ date: 2019-08-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons