miga-base 0.3.13.1 → 0.4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f3dfdd455be947124886bb35a9e6ebc800e63e9eae62d8d61f7e3f647732ebfd
4
- data.tar.gz: 536a76534d93cfdd50ea58e7036df5cfc01d6bb28314d42d8f43490b318ad3ee
3
+ metadata.gz: 37bf8085cc9b33f88367c8134cd2b01115cfcd9e9116fab7ba3f93abc33c6d6f
4
+ data.tar.gz: bdefeaa9f965ea991071a70b48239ad7129763bcb05f0cc2ebcd6d66511bd3e6
5
5
  SHA512:
6
- metadata.gz: 1294bcb2f772e520bfe5f289df53f8b03dc9b8cfb97d1a514753d776295a5052f2cd19d831b7e015b124d4eede94a58b518b0c4a52ed7ae2b9ba68c1e9c1ba93
7
- data.tar.gz: dfc521fe2478f0cbf8c5a4d1ac25c5a20705885a1bdd94a2445d317dfccab7018fe80e4664b929a45e2b3c773427d2bd34fed7597eb890f28ad2e99bf4632dc5
6
+ metadata.gz: 9e1463de777f3d77bcb8f804bdfedeb3781ccf0b36330faf21d501b1a87cf9b55f0d03f2668a1889ae165cbaaf2030ab955ad8c0f42a5aaad341a6eb7154b356
7
+ data.tar.gz: 6738493c3a716f553fea6486841f612616bad38cc4091103ec490417336df327ae03046a04907b9dcaed9c5d81e17ab918d64d841a67a50b23565dc46308736b
@@ -3,10 +3,32 @@
3
3
  # @package MiGA
4
4
  # @license Artistic-2.0
5
5
 
6
- o = {q: true, ref: true, update: false}
6
+ input_types = {
7
+ raw_reads_single:
8
+ ['Single raw reads in a single FastQ file',
9
+ :raw_reads, %w[.1.fastq]],
10
+ raw_reads_paired:
11
+ ['Paired raw reads in two FastQ files',
12
+ :raw_reads, %w[.1.fastq .2.fastq]],
13
+ trimmed_reads_single:
14
+ ['Single trimmed reads in a single FastA file',
15
+ :trimmed_fasta, %w[.SingleReads.fa]],
16
+ trimmed_reads_paired:
17
+ ['Paired trimmed reads in two FastA files',
18
+ :trimmed_fasta, %w[.1.fasta .2.fasta]],
19
+ trimmed_reads_interleaved:
20
+ ['Paired trimmed reads in a single FastA file',
21
+ :trimmed_fasta, %w[.CoupledReads.fa]],
22
+ assembly:
23
+ ['Assembled contigs or scaffolds in FastA format',
24
+ :assembly, %w[.LargeContigs.fna]]
25
+ }
26
+
27
+ o = {q: true, ref: true, ignore_dups: false,
28
+ regexp: /^(?:.*\/)?(.+?)(?:\..*(?:[12]|Reads|Contigs))?(?i:\.f[nastq]+)?$/}
7
29
  OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset, :dataset_type_req])
30
+ opt_banner(opt, true)
31
+ opt_object(opt, o, [:project, :dataset_opt, :dataset_type_req])
10
32
  opt.on('-q', '--query',
11
33
  'If set, the dataset is registered as a query, not a reference dataset.'
12
34
  ){ |v| o[:ref] = !v }
@@ -18,73 +40,90 @@ OptionParser.new do |opt|
18
40
  'Metadata as key-value pairs separated by = and delimited by comma.',
19
41
  'Values are saved as strings except for booleans (true / false) or nil.'
20
42
  ){ |v| o[:metadata] = v }
21
- opt.on('--update', 'Updates the dataset if it already exists.',
22
- 'Same as "miga edit".'){ o[:update] = true }
43
+ opt.on('-r', '--name-regexp REGEXP', Regexp,
44
+ 'Regular expression indicating how to extract the name from the file path.',
45
+ "By default: '#{o[:regexp]}'"){ |v| o[:regexp] = v }
46
+ opt.on('-i', '--input-type STRING',
47
+ 'Type of input data, one of the following:',
48
+ *input_types.map{ |k,v| "~ #{k}: #{v[0]}." }
49
+ ){ |v| o[:input_type] = v.downcase.to_sym }
50
+ opt.on('--ignore-dups', 'Continue with a warning if a dataset already exists.'
51
+ ){ |v| o[:ignore_dups] = v }
23
52
  opt_common(opt, o)
53
+
54
+ opt.separator 'You can create multiple datasets with a single command, ' \
55
+ 'simply pass all the files at the end (FILES...).'
56
+ opt.separator 'If -D is passed, only one dataset will be added. ' \
57
+ 'Otherwise, dataset names will be determined by the file paths (-r).'
24
58
  opt.separator ''
25
- opt.separator 'External input data'
26
- opt.on('--raw-reads FILE1,FILE2', Array,
27
- 'Comma-delimited paths to the raw reads in FastQ format.',
28
- 'One file is assumed to be single reads, two are assumed to be paired.'
29
- ){ |v| o[:raw_reads] = v }
30
- opt.on('--trimmed-fasta-single FILE', Array,
31
- 'Path to the single trimmed reads in FastA format.'
32
- ){ |v| o[:trimmed_fasta_s] = v }
33
- opt.on('--trimmed-fasta-coupled FILE1,FILE2', Array,
34
- 'Comma-delimited paths to the coupled trimmed reads in FastA format.',
35
- 'One file is assumed to be interposed, two are assumed to contain sisters.'
36
- ){ |v| o[:trimmed_fasta_c] = v }
37
- opt.on('--assembly FILE', Array,
38
- 'Path to the contigs (or scaffolds) of the assembly in FastA format.'
39
- ){ |v| o[:assembly] = v }
40
59
  end.parse!
41
60
 
42
61
  ##=> Main <=
43
- opt_require(o)
44
- opt_require_type(o, MiGA::Dataset) unless o[:update]
62
+ opt_require(o, project: '-P')
63
+ files = ARGV
64
+ file_type = nil
65
+ if files.empty?
66
+ opt_require_type(o, MiGA::Dataset)
67
+ files = [nil]
68
+ else
69
+ raise 'Please specify input type (-i).' if o[:input_type].nil?
70
+ file_type = input_types[o[:input_type]]
71
+ raise "Unrecognized input type: #{o[:input_type]}." if file_type.nil?
72
+ raise 'Some files are duplicated, files must be unique.' if
73
+ files.size != files.uniq.size
74
+ if o[:input_type].to_s =~ /_paired$/
75
+ raise 'Odd number of files incompatible with input type.' if files.size.odd?
76
+ files = Hash[*files].to_a
77
+ else
78
+ files = files.map{ |i| [i] }
79
+ end
80
+ raise 'The dataset name (-D) can only be specified with one input file.' if
81
+ files.size > 1 and not o[:dataset].nil?
82
+ end
45
83
 
46
84
  $stderr.puts 'Loading project.' unless o[:q]
47
85
  p = MiGA::Project.load(o[:project])
48
86
  raise "Impossible to load project: #{o[:project]}" if p.nil?
49
87
 
50
- raise 'Dataset already exists, aborting.' unless
51
- o[:update] or not MiGA::Dataset.exist?(p, o[:dataset])
52
- $stderr.puts 'Loading dataset.' unless o[:q]
53
- d = o[:update] ? p.dataset(o[:dataset]) :
54
- MiGA::Dataset.new(p, o[:dataset], o[:ref])
55
- raise 'Dataset does not exist.' if d.nil?
88
+ $stderr.puts 'Creating datasets:' unless o[:q]
89
+ files.each do |file|
90
+ name = o[:dataset]
91
+ if name.nil?
92
+ ref_file = file.is_a?(Array) ? file.first : file
93
+ m = o[:regexp].match(ref_file)
94
+ raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
95
+ name = m[1].miga_name
96
+ end
56
97
 
57
- in_files = [:raw_reads, :trimmed_fasta_s, :trimmed_fasta_c, :assembly]
58
- def cp_result(o, d, p, sym, res_sym, ext)
59
- return if o[sym].nil? or o[sym].empty?
60
- r_dir = MiGA::Dataset.RESULT_DIRS[res_sym]
61
- r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
62
- ext.each_index do |i|
63
- next if o[sym][i].nil?
64
- gz = o[sym][i] =~ /\.gz/ ? '.gz' : ''
65
- FileUtils.cp(o[sym][i], "#{r_path}#{ext[i]}#{gz}")
98
+ if MiGA::Dataset.exist?(p, name)
99
+ if o[:ignore_dups]
100
+ warn "Dataset already exists: #{name}."
101
+ next
102
+ else
103
+ raise "Dataset already exists: #{name}."
104
+ end
66
105
  end
67
- File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
68
- end
69
- if in_files.any? { |i| not o[i].nil? }
70
- $stderr.puts 'Copying files.' unless o[:q]
71
- # :raw_reads
72
- cp_result(o, d, p, :raw_reads, :raw_reads, %w[.1.fastq .2.fastq])
73
- # :trimmed_fasta
74
- cp_result(o, d, p, :trimmed_fasta_s, :trimmed_fasta, %w[.SingleReads.fa])
75
- if (o[:trimmed_fasta_c] || []).size > 1
76
- cp_result(o, d, p, :trimmed_fasta_c, :trimmed_fasta, %w[.1.fasta .2.fasta])
77
- else
78
- cp_result(o, d, p, :trimmed_fasta_c, :trimmed_fasta, %w[.CoupledReads.fa])
106
+
107
+ $stderr.puts "o #{name}" unless o[:q]
108
+ d = MiGA::Dataset.new(p, name, o[:ref])
109
+ raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
110
+
111
+ unless file.nil?
112
+ r_dir = MiGA::Dataset.RESULT_DIRS[ file_type[1] ]
113
+ r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
114
+ file_type[2].each_with_index do |ext, i|
115
+ gz = file[i] =~ /\.gz/ ? '.gz' : ''
116
+ FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
117
+ $stderr.puts " file: #{file[i]}" unless o[:q]
118
+ end
119
+ File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
79
120
  end
80
- # :assembly
81
- cp_result(o, d, p, :assembly, :assembly, %w[.LargeContigs.fna])
121
+
122
+ d = add_metadata(o, d)
123
+ d.save
124
+ p.add_dataset(name)
125
+ res = d.first_preprocessing(true)
126
+ $stderr.puts " result: #{res}" unless o[:q]
82
127
  end
83
128
 
84
- d = add_metadata(o, d)
85
- d.save
86
- p.add_dataset(o[:dataset]) unless o[:update]
87
- res = d.first_preprocessing(true)
88
- $stderr.puts "- #{res}" unless o[:q]
89
-
90
129
  $stderr.puts 'Done.' unless o[:q]
data/bin/miga CHANGED
@@ -84,11 +84,11 @@ end
84
84
  ##=> Functions <=
85
85
 
86
86
  # OptParse banner
87
- def opt_banner(opt)
87
+ def opt_banner(opt, files = false)
88
88
  opt.banner = <<BAN
89
89
  #{$task_desc[$task]}
90
90
 
91
- Usage: #{$0} #{$task} [options]
91
+ Usage: #{$0} #{$task} [options]#{ ' FILES...' if files}
92
92
  BAN
93
93
  opt.separator ""
94
94
  end
@@ -10,22 +10,22 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 13, 1]
13
+ VERSION = [0.4, 0, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
17
- VERSION_NAME = 'tinge'
17
+ VERSION_NAME = 'aquatint'
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2019, 8, 8)
21
+ VERSION_DATE = Date.new(2019, 8, 15)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
25
- CITATION = "Rodriguez-R et al (2018). " +
26
- "The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene " +
27
- "diversity analysis of Archaea and Bacteria at the whole genome level. " +
28
- "Nucleic Acids Research 46(W1):W282-W288. doi:10.1093/nar/gky467."
25
+ CITATION = 'Rodriguez-R et al (2018). ' \
26
+ 'The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene ' \
27
+ 'diversity analysis of Archaea and Bacteria at the whole genome level. ' \
28
+ 'Nucleic Acids Research 46(W1):W282-W288. doi:10.1093/nar/gky467.'
29
29
 
30
30
  end
31
31
 
@@ -39,12 +39,12 @@ class MiGA::MiGA
39
39
 
40
40
  ##
41
41
  # Complete version as string.
42
- def self.FULL_VERSION ; VERSION.join(".") ; end
42
+ def self.FULL_VERSION ; VERSION.join('.') ; end
43
43
 
44
44
  ##
45
45
  # Complete version with nickname and date as string.
46
46
  def self.LONG_VERSION
47
- "MiGA #{VERSION.join(".")} - #{VERSION_NAME} - #{VERSION_DATE}"
47
+ "MiGA #{VERSION.join('.')} - #{VERSION_NAME} - #{VERSION_DATE}"
48
48
  end
49
49
 
50
50
  ##
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
1
+ ../../Scripts/FastA.N50.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
1
+ ../../Scripts/FastA.filterN.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
1
+ ../../Scripts/FastA.length.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
1
+ ../../Scripts/FastA.split.pl
@@ -1 +1 @@
1
- utils/enveomics/Scripts/lib/../../enveomics.R
1
+ ../../enveomics.R
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.13.1
4
+ version: 0.4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-08 00:00:00.000000000 Z
11
+ date: 2019-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -509,8 +509,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
509
509
  - !ruby/object:Gem::Version
510
510
  version: '0'
511
511
  requirements: []
512
- rubyforge_project:
513
- rubygems_version: 2.7.6
512
+ rubygems_version: 3.0.3
514
513
  signing_key:
515
514
  specification_version: 4
516
515
  summary: MiGA