miga-base 0.3.5.0 → 0.3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c7f9c242373a48634effe2905d33ddd49c2c1bce
4
- data.tar.gz: cbdf5d91e364987718b6f0d29b57a0bab372afdc
2
+ SHA256:
3
+ metadata.gz: 25f6a339288dbdbeda1f84f5da6b5de697d3790c0b38d138a4416ff762cae936
4
+ data.tar.gz: 60187223750e983fafd6088935a912d016bbf4acafaf79b548e206c50076cc04
5
5
  SHA512:
6
- metadata.gz: d790bb6056fc556ae86915093d6d16973222009e3249f1d33351ae9e35ab11614ea9163722a63592e2e1e95a0ea1345a537f98f8ffa675ef421ce6dd1f077642
7
- data.tar.gz: d435fcda801d87f51f1dcc8d090812c7fbae12e74dfbbea5470ed573a993ad61edcf6855376f2ef3ea9acb9900407072b827852716c24ab64c4252e87a982d95
6
+ metadata.gz: 60b59ccc8fc3bf9f5a584f3221c268839ba4cd41eb3a6c16911f1808944c20100758f74908797f13ada9d2475e4fda382c1e204b257cf6c6adcfd17db694efcf
7
+ data.tar.gz: e62de30119c51dc4e92e64aec954c263f84a4820f1541f014c49d93dadd8de1efb5d331c35ecbda6ccfa79c402393894b7b6ffd52edf2362b8e117fe505ff230
data/actions/add.rb CHANGED
@@ -60,7 +60,9 @@ def cp_result(o, d, p, sym, res_sym, ext)
60
60
  r_dir = MiGA::Dataset.RESULT_DIRS[res_sym]
61
61
  r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
62
62
  ext.each_index do |i|
63
- FileUtils.cp o[sym][i], "#{r_path}#{ext[i]}" unless o[sym][i].nil?
63
+ next if o[sym][i].nil?
64
+ gz = o[sym][i] =~ /\.gz/ ? '.gz' : ''
65
+ FileUtils.cp(o[sym][i], "#{r_path}#{ext[i]}#{gz}")
64
66
  end
65
67
  File.open("#{r_path}.done", "w") { |f| f.print Time.now.to_s }
66
68
  end
data/actions/get.rb CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  require 'miga/remote_dataset'
7
7
 
8
- o = {q: true, query: false, universe: :ebi, db: :embl}
8
+ o = {q: true, query: false, universe: :ebi, db: :embl, get_md: false}
9
9
  OptionParser.new do |opt|
10
10
  opt_banner(opt)
11
11
  opt_object(opt, o, [:project, :dataset, :dataset_type])
@@ -36,6 +36,9 @@ OptionParser.new do |opt|
36
36
  'Metadata as key-value pairs separated by = and delimited by comma.',
37
37
  'Values are saved as strings except for booleans (true / false) or nil.'
38
38
  ){ |v| o[:metadata]=v }
39
+ opt.on('--get-metadata',
40
+ 'Only download and update metadata for existing datasets'
41
+ ){ |v| o[:get_md] = v }
39
42
  opt_common(opt, o)
40
43
  end.parse!
41
44
 
@@ -74,12 +77,20 @@ glob.each do |o_i|
74
77
  $stderr.puts 'Locating remote dataset.' unless o_i[:q]
75
78
  rd = MiGA::RemoteDataset.new(o_i[:ids], o_i[:db], o_i[:universe])
76
79
 
77
- $stderr.puts 'Creating dataset.' unless o_i[:q]
78
- dummy_d = MiGA::Dataset.new(p, o_i[:dataset])
79
- md = add_metadata(o_i, dummy_d).metadata.data
80
- dummy_d.remove!
81
- rd.save_to(p, o_i[:dataset], !o_i[:query], md)
82
- p.add_dataset(o_i[:dataset])
80
+ if o[:get_md]
81
+ $stderr.puts 'Updating dataset.' unless o_i[:q]
82
+ d = p.dataset(o_i[:dataset])
83
+ next if d.nil?
84
+ md = add_metadata(o_i, d).metadata.data
85
+ rd.update_metadata(d, md)
86
+ else
87
+ $stderr.puts 'Creating dataset.' unless o_i[:q]
88
+ dummy_d = MiGA::Dataset.new(p, o_i[:dataset])
89
+ md = add_metadata(o_i, dummy_d).metadata.data
90
+ dummy_d.remove!
91
+ rd.save_to(p, o_i[:dataset], !o_i[:query], md)
92
+ p.add_dataset(o_i[:dataset])
93
+ end
83
94
 
84
95
  $stderr.puts 'Done.' unless o_i[:q]
85
96
  end
data/bin/miga CHANGED
@@ -19,7 +19,7 @@ $task_desc = {
19
19
  # Datasets
20
20
  add: "Creates an empty dataset in a pre-existing MiGA project.",
21
21
  get: "Downloads a dataset from public databases into a MiGA project.",
22
- ncbi_get: "Downloads all genomes in a taxon or RefSeq from NCBI into a MiGA project.",
22
+ ncbi_get: "Downloads all genomes in a taxon from NCBI into a MiGA project.",
23
23
  rm: "Removes a dataset from an MiGA project.",
24
24
  find: "Finds unregistered datasets based on result files.",
25
25
  ln: "Link datasets (including results) from one project to another.",
@@ -137,13 +137,16 @@ def opt_common(opt, o)
137
137
  end
138
138
 
139
139
  # OptParse flags to filter lists of datasets.
140
- def opt_filter_datasets(opt, o, what=[:ref, :multi, :taxonomy])
140
+ def opt_filter_datasets(opt, o, what=[:ref, :multi, :active, :taxonomy])
141
141
  opt.on("--[no-]ref",
142
142
  "If set, uses only reference (or only non-reference) datasets."
143
143
  ){ |v| o[:ref]=v } if what.include? :ref
144
144
  opt.on("--[no-]multi",
145
145
  "If set, uses only multi-species (or only single-species) datasets."
146
146
  ){ |v| o[:multi]=v } if what.include? :multi
147
+ opt.on("--[no-]active",
148
+ "If set, uses only active (or inactive) datasets."
149
+ ){ |v| o[:active]=v } if what.include? :active
147
150
  opt.on("-t", "--taxonomy RANK:TAXON", "Filter by taxonomy."
148
151
  ){ |v| o[:taxonomy]=MiGA::Taxonomy.new v } if what.include? :taxonomy
149
152
  opt.on("-k", "--key INTEGER",
@@ -163,7 +166,8 @@ end
163
166
 
164
167
  # Filters datasets by keys set in +opt_filter_datasets+.
165
168
  def filter_datasets!(ds, o)
166
- ds.select!{|d| d.is_ref? == o[:ref] } unless o[:ref].nil?
169
+ ds.select! { |d| d.is_ref? == o[:ref] } unless o[:ref].nil?
170
+ ds.select! { |d| d.is_active? == o[:active] } unless o[:active].nil?
167
171
  ds.select! do |d|
168
172
  o[:multi] ? d.is_multi? : d.is_nonmulti?
169
173
  end unless o[:multi].nil?
@@ -122,6 +122,7 @@ module MiGA::Dataset::Result
122
122
  # the project as reference datasets.
123
123
  def cleanup_distances!
124
124
  r = get_result(:distances)
125
+ ref = project.datasets.select(&:is_ref?).select(&:is_active?).map(&:name)
125
126
  return if r.nil?
126
127
  [:haai_db, :aai_db, :ani_db].each do |db_type|
127
128
  db = r.file_path(db_type)
@@ -130,7 +131,7 @@ module MiGA::Dataset::Result
130
131
  table = db_type[-6..-4]
131
132
  val = sqlite_db.execute "select seq2 from #{table}"
132
133
  next if val.empty?
133
- (val.map{ |i| i.first } - project.dataset_names).each do |extra|
134
+ (val.map(&:first) - ref).each do |extra|
134
135
  sqlite_db.execute "delete from #{table} where seq2=?", extra
135
136
  end
136
137
  end
data/lib/miga/dataset.rb CHANGED
@@ -17,7 +17,7 @@ class MiGA::Dataset < MiGA::MiGA
17
17
  ##
18
18
  # Does the +project+ already have a dataset with that +name+?
19
19
  def exist?(project, name)
20
- project.dataset_names.include? name
20
+ not project.dataset_names_hash[name].nil?
21
21
  end
22
22
 
23
23
  ##
@@ -5,7 +5,6 @@
5
5
  # Helper module including specific functions handle datasets.
6
6
  module MiGA::Project::Dataset
7
7
 
8
-
9
8
  ##
10
9
  # Returns Array of MiGA::Dataset.
11
10
  def datasets
@@ -17,6 +16,13 @@ module MiGA::Project::Dataset
17
16
  def dataset_names
18
17
  metadata[:datasets]
19
18
  end
19
+
20
+ ##
21
+ # Returns Hash of Strings => true. Similar to +dataset_names+ but as
22
+ # Hash for efficiency.
23
+ def dataset_names_hash
24
+ @dataset_names_hash ||= Hash[dataset_names.map{ |i| [i,true] }]
25
+ end
20
26
 
21
27
  ##
22
28
  # Returns MiGA::Dataset.
data/lib/miga/project.rb CHANGED
@@ -63,6 +63,7 @@ class MiGA::Project < MiGA::MiGA
63
63
  # (Re-)load project data and metadata.
64
64
  def load
65
65
  @datasets = {}
66
+ @dataset_names_hash = nil
66
67
  @metadata = MiGA::Metadata.load "#{path}/miga.project.json"
67
68
  raise "Couldn't find project metadata at #{path}" if metadata.nil?
68
69
  end
@@ -50,10 +50,10 @@ class MiGA::RemoteDataset
50
50
  doc = ''
51
51
  @timeout_try = 0
52
52
  begin
53
- open(url, open_timeout: 600, read_timeout: 600) { |f| doc = f.read }
54
- rescue Net::ReadTimeout
53
+ open(url, read_timeout: 600) { |f| doc = f.read }
54
+ rescue => e
55
55
  @timeout_try += 1
56
- raise Net::ReadTimeout if @timeout_try >= 3
56
+ raise e if @timeout_try >= 3
57
57
  retry
58
58
  end
59
59
  doc
@@ -17,6 +17,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
17
17
  attr_reader :db
18
18
  # Array of IDs of the entries composing the dataset.
19
19
  attr_reader :ids
20
+ # Internal metadata hash
21
+ attr_reader :metadata
20
22
 
21
23
  ##
22
24
  # Initialize MiGA::RemoteDataset with +ids+ in database +db+ from +universe+.
@@ -25,6 +27,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
25
27
  @ids = (ids.is_a?(Array) ? ids : [ids])
26
28
  @db = db.to_sym
27
29
  @universe = universe.to_sym
30
+ @metadata = {}
31
+ @metadata[:"#{universe}_#{db}"] = ids.join(",")
28
32
  @@UNIVERSE.keys.include?(@universe) or
29
33
  raise "Unknown Universe: #{@universe}. Try: #{@@UNIVERSE.keys}"
30
34
  @@UNIVERSE[@universe][:dbs].include?(@db) or
@@ -37,15 +41,15 @@ class MiGA::RemoteDataset < MiGA::MiGA
37
41
 
38
42
  ##
39
43
  # Save dataset to the MiGA::Project +project+ identified with +name+. +is_ref+
40
- # indicates if it should be a reference dataset, and contains +metadata+.
41
- def save_to(project, name = nil, is_ref = true, metadata = {})
44
+ # indicates if it should be a reference dataset, and contains +metadata_def+.
45
+ def save_to(project, name = nil, is_ref = true, metadata_def = {})
42
46
  name ||= ids.join('_').miga_name
43
47
  project = MiGA::Project.new(project) if project.is_a? String
44
48
  MiGA::Dataset.exist?(project, name) and
45
49
  raise "Dataset #{name} exists in the project, aborting..."
46
- metadata = get_metadata(metadata)
50
+ @metadata = get_metadata(metadata_def)
47
51
  udb = @@UNIVERSE[universe][:dbs][db]
48
- metadata["#{universe}_#{db}"] = ids.join(',')
52
+ @metadata["#{universe}_#{db}"] = ids.join(',')
49
53
  respond_to?("save_#{udb[:stage]}_to", true) or
50
54
  raise "Unexpected error: Unsupported stage #{udb[:stage]} for #{db}."
51
55
  send "save_#{udb[:stage]}_to", project, name, udb
@@ -70,15 +74,14 @@ class MiGA::RemoteDataset < MiGA::MiGA
70
74
 
71
75
  ##
72
76
  # Get metadata from the remote location.
73
- def get_metadata(metadata = {})
77
+ def get_metadata(metadata_def = {})
78
+ metadata_def.each { |k,v| @metadata[k] = v }
74
79
  case universe
75
- when :ebi, :ncbi
80
+ when :ebi, :ncbi, :web
76
81
  # Get taxonomy
77
- metadata[:tax] = get_ncbi_taxonomy
82
+ @metadata[:tax] = get_ncbi_taxonomy
78
83
  end
79
- metadata[:"#{universe}_#{db}"] = ids.join(",")
80
- metadata = get_type_status(metadata)
81
- metadata
84
+ @metadata = get_type_status(metadata)
82
85
  end
83
86
 
84
87
  ##
@@ -118,6 +121,15 @@ class MiGA::RemoteDataset < MiGA::MiGA
118
121
 
119
122
  private
120
123
 
124
+ def get_ncbi_taxid_from_web
125
+ return nil unless metadata[:ncbi_asm]
126
+ base_url = 'https://www.ncbi.nlm.nih.gov/assembly'
127
+ doc = self.class.download_url(
128
+ "#{base_url}/#{metadata[:ncbi_asm]}?report=xml&format=text")
129
+ taxid = doc.scan(%r{&lt;Taxid&gt;(\S+)&lt;/Taxid&gt;}).first
130
+ taxid.nil? ? taxid : taxid.first
131
+ end
132
+
121
133
  def get_ncbi_taxid_from_ncbi
122
134
  doc = self.class.download(universe, db, ids, :gb).split(/\n/)
123
135
  ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 5, 0]
13
+ VERSION = [0.3, 5, 1]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/02.aai"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  echo -n "" > miga-project.log
15
- DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
15
+ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
16
16
 
17
17
  # Extract values
18
18
  echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/03.ani"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  echo -n "" > miga-project.log
15
- DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
15
+ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
16
16
 
17
17
  # Extract values
18
18
  echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/01.haai"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  echo -n "" > miga-project.log
15
- DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
15
+ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
16
16
 
17
17
  # Extract values
18
18
  echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
@@ -1 +1 @@
1
- ../../Scripts/FastA.N50.pl
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
@@ -1 +1 @@
1
- ../../Scripts/FastA.filterN.pl
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
@@ -1 +1 @@
1
- ../../Scripts/FastA.length.pl
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
@@ -1 +1 @@
1
- ../../Scripts/FastA.split.pl
1
+ utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
@@ -1 +1 @@
1
- ../../enveomics.R
1
+ utils/enveomics/Scripts/lib/../../enveomics.R
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5.0
4
+ version: 0.3.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-04 00:00:00.000000000 Z
11
+ date: 2018-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -495,7 +495,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
495
495
  version: '0'
496
496
  requirements: []
497
497
  rubyforge_project:
498
- rubygems_version: 2.5.2.3
498
+ rubygems_version: 2.7.7
499
499
  signing_key:
500
500
  specification_version: 4
501
501
  summary: MiGA