miga-base 0.3.5.0 → 0.3.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c7f9c242373a48634effe2905d33ddd49c2c1bce
4
- data.tar.gz: cbdf5d91e364987718b6f0d29b57a0bab372afdc
2
+ SHA256:
3
+ metadata.gz: 25f6a339288dbdbeda1f84f5da6b5de697d3790c0b38d138a4416ff762cae936
4
+ data.tar.gz: 60187223750e983fafd6088935a912d016bbf4acafaf79b548e206c50076cc04
5
5
  SHA512:
6
- metadata.gz: d790bb6056fc556ae86915093d6d16973222009e3249f1d33351ae9e35ab11614ea9163722a63592e2e1e95a0ea1345a537f98f8ffa675ef421ce6dd1f077642
7
- data.tar.gz: d435fcda801d87f51f1dcc8d090812c7fbae12e74dfbbea5470ed573a993ad61edcf6855376f2ef3ea9acb9900407072b827852716c24ab64c4252e87a982d95
6
+ metadata.gz: 60b59ccc8fc3bf9f5a584f3221c268839ba4cd41eb3a6c16911f1808944c20100758f74908797f13ada9d2475e4fda382c1e204b257cf6c6adcfd17db694efcf
7
+ data.tar.gz: e62de30119c51dc4e92e64aec954c263f84a4820f1541f014c49d93dadd8de1efb5d331c35ecbda6ccfa79c402393894b7b6ffd52edf2362b8e117fe505ff230
data/actions/add.rb CHANGED
@@ -60,7 +60,9 @@ def cp_result(o, d, p, sym, res_sym, ext)
60
60
  r_dir = MiGA::Dataset.RESULT_DIRS[res_sym]
61
61
  r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
62
62
  ext.each_index do |i|
63
- FileUtils.cp o[sym][i], "#{r_path}#{ext[i]}" unless o[sym][i].nil?
63
+ next if o[sym][i].nil?
64
+ gz = o[sym][i] =~ /\.gz/ ? '.gz' : ''
65
+ FileUtils.cp(o[sym][i], "#{r_path}#{ext[i]}#{gz}")
64
66
  end
65
67
  File.open("#{r_path}.done", "w") { |f| f.print Time.now.to_s }
66
68
  end
data/actions/get.rb CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  require 'miga/remote_dataset'
7
7
 
8
- o = {q: true, query: false, universe: :ebi, db: :embl}
8
+ o = {q: true, query: false, universe: :ebi, db: :embl, get_md: false}
9
9
  OptionParser.new do |opt|
10
10
  opt_banner(opt)
11
11
  opt_object(opt, o, [:project, :dataset, :dataset_type])
@@ -36,6 +36,9 @@ OptionParser.new do |opt|
36
36
  'Metadata as key-value pairs separated by = and delimited by comma.',
37
37
  'Values are saved as strings except for booleans (true / false) or nil.'
38
38
  ){ |v| o[:metadata]=v }
39
+ opt.on('--get-metadata',
40
+ 'Only download and update metadata for existing datasets'
41
+ ){ |v| o[:get_md] = v }
39
42
  opt_common(opt, o)
40
43
  end.parse!
41
44
 
@@ -74,12 +77,20 @@ glob.each do |o_i|
74
77
  $stderr.puts 'Locating remote dataset.' unless o_i[:q]
75
78
  rd = MiGA::RemoteDataset.new(o_i[:ids], o_i[:db], o_i[:universe])
76
79
 
77
- $stderr.puts 'Creating dataset.' unless o_i[:q]
78
- dummy_d = MiGA::Dataset.new(p, o_i[:dataset])
79
- md = add_metadata(o_i, dummy_d).metadata.data
80
- dummy_d.remove!
81
- rd.save_to(p, o_i[:dataset], !o_i[:query], md)
82
- p.add_dataset(o_i[:dataset])
80
+ if o[:get_md]
81
+ $stderr.puts 'Updating dataset.' unless o_i[:q]
82
+ d = p.dataset(o_i[:dataset])
83
+ next if d.nil?
84
+ md = add_metadata(o_i, d).metadata.data
85
+ rd.update_metadata(d, md)
86
+ else
87
+ $stderr.puts 'Creating dataset.' unless o_i[:q]
88
+ dummy_d = MiGA::Dataset.new(p, o_i[:dataset])
89
+ md = add_metadata(o_i, dummy_d).metadata.data
90
+ dummy_d.remove!
91
+ rd.save_to(p, o_i[:dataset], !o_i[:query], md)
92
+ p.add_dataset(o_i[:dataset])
93
+ end
83
94
 
84
95
  $stderr.puts 'Done.' unless o_i[:q]
85
96
  end
data/bin/miga CHANGED
@@ -19,7 +19,7 @@ $task_desc = {
19
19
  # Datasets
20
20
  add: "Creates an empty dataset in a pre-existing MiGA project.",
21
21
  get: "Downloads a dataset from public databases into a MiGA project.",
22
- ncbi_get: "Downloads all genomes in a taxon or RefSeq from NCBI into a MiGA project.",
22
+ ncbi_get: "Downloads all genomes in a taxon from NCBI into a MiGA project.",
23
23
  rm: "Removes a dataset from an MiGA project.",
24
24
  find: "Finds unregistered datasets based on result files.",
25
25
  ln: "Link datasets (including results) from one project to another.",
@@ -137,13 +137,16 @@ def opt_common(opt, o)
137
137
  end
138
138
 
139
139
  # OptParse flags to filter lists of datasets.
140
- def opt_filter_datasets(opt, o, what=[:ref, :multi, :taxonomy])
140
+ def opt_filter_datasets(opt, o, what=[:ref, :multi, :active, :taxonomy])
141
141
  opt.on("--[no-]ref",
142
142
  "If set, uses only reference (or only non-reference) datasets."
143
143
  ){ |v| o[:ref]=v } if what.include? :ref
144
144
  opt.on("--[no-]multi",
145
145
  "If set, uses only multi-species (or only single-species) datasets."
146
146
  ){ |v| o[:multi]=v } if what.include? :multi
147
+ opt.on("--[no-]active",
148
+ "If set, uses only active (or inactive) datasets."
149
+ ){ |v| o[:active]=v } if what.include? :active
147
150
  opt.on("-t", "--taxonomy RANK:TAXON", "Filter by taxonomy."
148
151
  ){ |v| o[:taxonomy]=MiGA::Taxonomy.new v } if what.include? :taxonomy
149
152
  opt.on("-k", "--key INTEGER",
@@ -163,7 +166,8 @@ end
163
166
 
164
167
  # Filters datasets by keys set in +opt_filter_datasets+.
165
168
  def filter_datasets!(ds, o)
166
- ds.select!{|d| d.is_ref? == o[:ref] } unless o[:ref].nil?
169
+ ds.select! { |d| d.is_ref? == o[:ref] } unless o[:ref].nil?
170
+ ds.select! { |d| d.is_active? == o[:active] } unless o[:active].nil?
167
171
  ds.select! do |d|
168
172
  o[:multi] ? d.is_multi? : d.is_nonmulti?
169
173
  end unless o[:multi].nil?
@@ -122,6 +122,7 @@ module MiGA::Dataset::Result
122
122
  # the project as reference datasets.
123
123
  def cleanup_distances!
124
124
  r = get_result(:distances)
125
+ ref = project.datasets.select(&:is_ref?).select(&:is_active?).map(&:name)
125
126
  return if r.nil?
126
127
  [:haai_db, :aai_db, :ani_db].each do |db_type|
127
128
  db = r.file_path(db_type)
@@ -130,7 +131,7 @@ module MiGA::Dataset::Result
130
131
  table = db_type[-6..-4]
131
132
  val = sqlite_db.execute "select seq2 from #{table}"
132
133
  next if val.empty?
133
- (val.map{ |i| i.first } - project.dataset_names).each do |extra|
134
+ (val.map(&:first) - ref).each do |extra|
134
135
  sqlite_db.execute "delete from #{table} where seq2=?", extra
135
136
  end
136
137
  end
data/lib/miga/dataset.rb CHANGED
@@ -17,7 +17,7 @@ class MiGA::Dataset < MiGA::MiGA
17
17
  ##
18
18
  # Does the +project+ already have a dataset with that +name+?
19
19
  def exist?(project, name)
20
- project.dataset_names.include? name
20
+ not project.dataset_names_hash[name].nil?
21
21
  end
22
22
 
23
23
  ##
@@ -5,7 +5,6 @@
5
5
  # Helper module including specific functions handle datasets.
6
6
  module MiGA::Project::Dataset
7
7
 
8
-
9
8
  ##
10
9
  # Returns Array of MiGA::Dataset.
11
10
  def datasets
@@ -17,6 +16,13 @@ module MiGA::Project::Dataset
17
16
  def dataset_names
18
17
  metadata[:datasets]
19
18
  end
19
+
20
+ ##
21
+ # Returns Hash of Strings => true. Similar to +dataset_names+ but as
22
+ # Hash for efficiency.
23
+ def dataset_names_hash
24
+ @dataset_names_hash ||= Hash[dataset_names.map{ |i| [i,true] }]
25
+ end
20
26
 
21
27
  ##
22
28
  # Returns MiGA::Dataset.
data/lib/miga/project.rb CHANGED
@@ -63,6 +63,7 @@ class MiGA::Project < MiGA::MiGA
63
63
  # (Re-)load project data and metadata.
64
64
  def load
65
65
  @datasets = {}
66
+ @dataset_names_hash = nil
66
67
  @metadata = MiGA::Metadata.load "#{path}/miga.project.json"
67
68
  raise "Couldn't find project metadata at #{path}" if metadata.nil?
68
69
  end
@@ -50,10 +50,10 @@ class MiGA::RemoteDataset
50
50
  doc = ''
51
51
  @timeout_try = 0
52
52
  begin
53
- open(url, open_timeout: 600, read_timeout: 600) { |f| doc = f.read }
54
- rescue Net::ReadTimeout
53
+ open(url, read_timeout: 600) { |f| doc = f.read }
54
+ rescue => e
55
55
  @timeout_try += 1
56
- raise Net::ReadTimeout if @timeout_try >= 3
56
+ raise e if @timeout_try >= 3
57
57
  retry
58
58
  end
59
59
  doc
@@ -17,6 +17,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
17
17
  attr_reader :db
18
18
  # Array of IDs of the entries composing the dataset.
19
19
  attr_reader :ids
20
+ # Internal metadata hash
21
+ attr_reader :metadata
20
22
 
21
23
  ##
22
24
  # Initialize MiGA::RemoteDataset with +ids+ in database +db+ from +universe+.
@@ -25,6 +27,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
25
27
  @ids = (ids.is_a?(Array) ? ids : [ids])
26
28
  @db = db.to_sym
27
29
  @universe = universe.to_sym
30
+ @metadata = {}
31
+ @metadata[:"#{universe}_#{db}"] = ids.join(",")
28
32
  @@UNIVERSE.keys.include?(@universe) or
29
33
  raise "Unknown Universe: #{@universe}. Try: #{@@UNIVERSE.keys}"
30
34
  @@UNIVERSE[@universe][:dbs].include?(@db) or
@@ -37,15 +41,15 @@ class MiGA::RemoteDataset < MiGA::MiGA
37
41
 
38
42
  ##
39
43
  # Save dataset to the MiGA::Project +project+ identified with +name+. +is_ref+
40
- # indicates if it should be a reference dataset, and contains +metadata+.
41
- def save_to(project, name = nil, is_ref = true, metadata = {})
44
+ # indicates if it should be a reference dataset, and contains +metadata_def+.
45
+ def save_to(project, name = nil, is_ref = true, metadata_def = {})
42
46
  name ||= ids.join('_').miga_name
43
47
  project = MiGA::Project.new(project) if project.is_a? String
44
48
  MiGA::Dataset.exist?(project, name) and
45
49
  raise "Dataset #{name} exists in the project, aborting..."
46
- metadata = get_metadata(metadata)
50
+ @metadata = get_metadata(metadata_def)
47
51
  udb = @@UNIVERSE[universe][:dbs][db]
48
- metadata["#{universe}_#{db}"] = ids.join(',')
52
+ @metadata["#{universe}_#{db}"] = ids.join(',')
49
53
  respond_to?("save_#{udb[:stage]}_to", true) or
50
54
  raise "Unexpected error: Unsupported stage #{udb[:stage]} for #{db}."
51
55
  send "save_#{udb[:stage]}_to", project, name, udb
@@ -70,15 +74,14 @@ class MiGA::RemoteDataset < MiGA::MiGA
70
74
 
71
75
  ##
72
76
  # Get metadata from the remote location.
73
- def get_metadata(metadata = {})
77
+ def get_metadata(metadata_def = {})
78
+ metadata_def.each { |k,v| @metadata[k] = v }
74
79
  case universe
75
- when :ebi, :ncbi
80
+ when :ebi, :ncbi, :web
76
81
  # Get taxonomy
77
- metadata[:tax] = get_ncbi_taxonomy
82
+ @metadata[:tax] = get_ncbi_taxonomy
78
83
  end
79
- metadata[:"#{universe}_#{db}"] = ids.join(",")
80
- metadata = get_type_status(metadata)
81
- metadata
84
+ @metadata = get_type_status(metadata)
82
85
  end
83
86
 
84
87
  ##
@@ -118,6 +121,15 @@ class MiGA::RemoteDataset < MiGA::MiGA
118
121
 
119
122
  private
120
123
 
124
+ def get_ncbi_taxid_from_web
125
+ return nil unless metadata[:ncbi_asm]
126
+ base_url = 'https://www.ncbi.nlm.nih.gov/assembly'
127
+ doc = self.class.download_url(
128
+ "#{base_url}/#{metadata[:ncbi_asm]}?report=xml&format=text")
129
+ taxid = doc.scan(%r{&lt;Taxid&gt;(\S+)&lt;/Taxid&gt;}).first
130
+ taxid.nil? ? taxid : taxid.first
131
+ end
132
+
121
133
  def get_ncbi_taxid_from_ncbi
122
134
  doc = self.class.download(universe, db, ids, :gb).split(/\n/)
123
135
  ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 5, 0]
13
+ VERSION = [0.3, 5, 1]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/02.aai"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  echo -n "" > miga-project.log
15
- DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
15
+ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
16
16
 
17
17
  # Extract values
18
18
  echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/03.ani"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  echo -n "" > miga-project.log
15
- DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
15
+ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
16
16
 
17
17
  # Extract values
18
18
  echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/01.haai"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  echo -n "" > miga-project.log
15
- DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
15
+ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
16
16
 
17
17
  # Extract values
18
18
  echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
@@ -1 +1 @@
1
- ../../Scripts/FastA.N50.pl
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
@@ -1 +1 @@
1
- ../../Scripts/FastA.filterN.pl
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
@@ -1 +1 @@
1
- ../../Scripts/FastA.length.pl
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
@@ -1 +1 @@
1
- ../../Scripts/FastA.split.pl
1
+ utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
@@ -1 +1 @@
1
- ../../enveomics.R
1
+ utils/enveomics/Scripts/lib/../../enveomics.R
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5.0
4
+ version: 0.3.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-04 00:00:00.000000000 Z
11
+ date: 2018-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -495,7 +495,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
495
495
  version: '0'
496
496
  requirements: []
497
497
  rubyforge_project:
498
- rubygems_version: 2.5.2.3
498
+ rubygems_version: 2.7.7
499
499
  signing_key:
500
500
  specification_version: 4
501
501
  summary: MiGA