miga-base 0.3.13.1 → 0.4.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f3dfdd455be947124886bb35a9e6ebc800e63e9eae62d8d61f7e3f647732ebfd
4
- data.tar.gz: 536a76534d93cfdd50ea58e7036df5cfc01d6bb28314d42d8f43490b318ad3ee
3
+ metadata.gz: 37bf8085cc9b33f88367c8134cd2b01115cfcd9e9116fab7ba3f93abc33c6d6f
4
+ data.tar.gz: bdefeaa9f965ea991071a70b48239ad7129763bcb05f0cc2ebcd6d66511bd3e6
5
5
  SHA512:
6
- metadata.gz: 1294bcb2f772e520bfe5f289df53f8b03dc9b8cfb97d1a514753d776295a5052f2cd19d831b7e015b124d4eede94a58b518b0c4a52ed7ae2b9ba68c1e9c1ba93
7
- data.tar.gz: dfc521fe2478f0cbf8c5a4d1ac25c5a20705885a1bdd94a2445d317dfccab7018fe80e4664b929a45e2b3c773427d2bd34fed7597eb890f28ad2e99bf4632dc5
6
+ metadata.gz: 9e1463de777f3d77bcb8f804bdfedeb3781ccf0b36330faf21d501b1a87cf9b55f0d03f2668a1889ae165cbaaf2030ab955ad8c0f42a5aaad341a6eb7154b356
7
+ data.tar.gz: 6738493c3a716f553fea6486841f612616bad38cc4091103ec490417336df327ae03046a04907b9dcaed9c5d81e17ab918d64d841a67a50b23565dc46308736b
@@ -3,10 +3,32 @@
3
3
  # @package MiGA
4
4
  # @license Artistic-2.0
5
5
 
6
- o = {q: true, ref: true, update: false}
6
+ input_types = {
7
+ raw_reads_single:
8
+ ['Single raw reads in a single FastQ file',
9
+ :raw_reads, %w[.1.fastq]],
10
+ raw_reads_paired:
11
+ ['Paired raw reads in two FastQ files',
12
+ :raw_reads, %w[.1.fastq .2.fastq]],
13
+ trimmed_reads_single:
14
+ ['Single trimmed reads in a single FastA file',
15
+ :trimmed_fasta, %w[.SingleReads.fa]],
16
+ trimmed_reads_paired:
17
+ ['Paired trimmed reads in two FastA files',
18
+ :trimmed_fasta, %w[.1.fasta .2.fasta]],
19
+ trimmed_reads_interleaved:
20
+ ['Paired trimmed reads in a single FastA file',
21
+ :trimmed_fasta, %w[.CoupledReads.fa]],
22
+ assembly:
23
+ ['Assembled contigs or scaffolds in FastA format',
24
+ :assembly, %w[.LargeContigs.fna]]
25
+ }
26
+
27
+ o = {q: true, ref: true, ignore_dups: false,
28
+ regexp: /^(?:.*\/)?(.+?)(?:\..*(?:[12]|Reads|Contigs))?(?i:\.f[nastq]+)?$/}
7
29
  OptionParser.new do |opt|
8
- opt_banner(opt)
9
- opt_object(opt, o, [:project, :dataset, :dataset_type_req])
30
+ opt_banner(opt, true)
31
+ opt_object(opt, o, [:project, :dataset_opt, :dataset_type_req])
10
32
  opt.on('-q', '--query',
11
33
  'If set, the dataset is registered as a query, not a reference dataset.'
12
34
  ){ |v| o[:ref] = !v }
@@ -18,73 +40,90 @@ OptionParser.new do |opt|
18
40
  'Metadata as key-value pairs separated by = and delimited by comma.',
19
41
  'Values are saved as strings except for booleans (true / false) or nil.'
20
42
  ){ |v| o[:metadata] = v }
21
- opt.on('--update', 'Updates the dataset if it already exists.',
22
- 'Same as "miga edit".'){ o[:update] = true }
43
+ opt.on('-r', '--name-regexp REGEXP', Regexp,
44
+ 'Regular expression indicating how to extract the name from the file path.',
45
+ "By default: '#{o[:regexp]}'"){ |v| o[:regexp] = v }
46
+ opt.on('-i', '--input-type STRING',
47
+ 'Type of input data, one of the following:',
48
+ *input_types.map{ |k,v| "~ #{k}: #{v[0]}." }
49
+ ){ |v| o[:input_type] = v.downcase.to_sym }
50
+ opt.on('--ignore-dups', 'Continue with a warning if a dataset already exists.'
51
+ ){ |v| o[:ignore_dups] = v }
23
52
  opt_common(opt, o)
53
+
54
+ opt.separator 'You can create multiple datasets with a single command, ' \
55
+ 'simply pass all the files at the end (FILES...).'
56
+ opt.separator 'If -D is passed, only one dataset will be added. ' \
57
+ 'Otherwise, dataset names will be determined by the file paths (-r).'
24
58
  opt.separator ''
25
- opt.separator 'External input data'
26
- opt.on('--raw-reads FILE1,FILE2', Array,
27
- 'Comma-delimited paths to the raw reads in FastQ format.',
28
- 'One file is assumed to be single reads, two are assumed to be paired.'
29
- ){ |v| o[:raw_reads] = v }
30
- opt.on('--trimmed-fasta-single FILE', Array,
31
- 'Path to the single trimmed reads in FastA format.'
32
- ){ |v| o[:trimmed_fasta_s] = v }
33
- opt.on('--trimmed-fasta-coupled FILE1,FILE2', Array,
34
- 'Comma-delimited paths to the coupled trimmed reads in FastA format.',
35
- 'One file is assumed to be interposed, two are assumed to contain sisters.'
36
- ){ |v| o[:trimmed_fasta_c] = v }
37
- opt.on('--assembly FILE', Array,
38
- 'Path to the contigs (or scaffolds) of the assembly in FastA format.'
39
- ){ |v| o[:assembly] = v }
40
59
  end.parse!
41
60
 
42
61
  ##=> Main <=
43
- opt_require(o)
44
- opt_require_type(o, MiGA::Dataset) unless o[:update]
62
+ opt_require(o, project: '-P')
63
+ files = ARGV
64
+ file_type = nil
65
+ if files.empty?
66
+ opt_require_type(o, MiGA::Dataset)
67
+ files = [nil]
68
+ else
69
+ raise 'Please specify input type (-i).' if o[:input_type].nil?
70
+ file_type = input_types[o[:input_type]]
71
+ raise "Unrecognized input type: #{o[:input_type]}." if file_type.nil?
72
+ raise 'Some files are duplicated, files must be unique.' if
73
+ files.size != files.uniq.size
74
+ if o[:input_type].to_s =~ /_paired$/
75
+ raise 'Odd number of files incompatible with input type.' if files.size.odd?
76
+ files = Hash[*files].to_a
77
+ else
78
+ files = files.map{ |i| [i] }
79
+ end
80
+ raise 'The dataset name (-D) can only be specified with one input file.' if
81
+ files.size > 1 and not o[:dataset].nil?
82
+ end
45
83
 
46
84
  $stderr.puts 'Loading project.' unless o[:q]
47
85
  p = MiGA::Project.load(o[:project])
48
86
  raise "Impossible to load project: #{o[:project]}" if p.nil?
49
87
 
50
- raise 'Dataset already exists, aborting.' unless
51
- o[:update] or not MiGA::Dataset.exist?(p, o[:dataset])
52
- $stderr.puts 'Loading dataset.' unless o[:q]
53
- d = o[:update] ? p.dataset(o[:dataset]) :
54
- MiGA::Dataset.new(p, o[:dataset], o[:ref])
55
- raise 'Dataset does not exist.' if d.nil?
88
+ $stderr.puts 'Creating datasets:' unless o[:q]
89
+ files.each do |file|
90
+ name = o[:dataset]
91
+ if name.nil?
92
+ ref_file = file.is_a?(Array) ? file.first : file
93
+ m = o[:regexp].match(ref_file)
94
+ raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
95
+ name = m[1].miga_name
96
+ end
56
97
 
57
- in_files = [:raw_reads, :trimmed_fasta_s, :trimmed_fasta_c, :assembly]
58
- def cp_result(o, d, p, sym, res_sym, ext)
59
- return if o[sym].nil? or o[sym].empty?
60
- r_dir = MiGA::Dataset.RESULT_DIRS[res_sym]
61
- r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
62
- ext.each_index do |i|
63
- next if o[sym][i].nil?
64
- gz = o[sym][i] =~ /\.gz/ ? '.gz' : ''
65
- FileUtils.cp(o[sym][i], "#{r_path}#{ext[i]}#{gz}")
98
+ if MiGA::Dataset.exist?(p, name)
99
+ if o[:ignore_dups]
100
+ warn "Dataset already exists: #{name}."
101
+ next
102
+ else
103
+ raise "Dataset already exists: #{name}."
104
+ end
66
105
  end
67
- File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
68
- end
69
- if in_files.any? { |i| not o[i].nil? }
70
- $stderr.puts 'Copying files.' unless o[:q]
71
- # :raw_reads
72
- cp_result(o, d, p, :raw_reads, :raw_reads, %w[.1.fastq .2.fastq])
73
- # :trimmed_fasta
74
- cp_result(o, d, p, :trimmed_fasta_s, :trimmed_fasta, %w[.SingleReads.fa])
75
- if (o[:trimmed_fasta_c] || []).size > 1
76
- cp_result(o, d, p, :trimmed_fasta_c, :trimmed_fasta, %w[.1.fasta .2.fasta])
77
- else
78
- cp_result(o, d, p, :trimmed_fasta_c, :trimmed_fasta, %w[.CoupledReads.fa])
106
+
107
+ $stderr.puts "o #{name}" unless o[:q]
108
+ d = MiGA::Dataset.new(p, name, o[:ref])
109
+ raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
110
+
111
+ unless file.nil?
112
+ r_dir = MiGA::Dataset.RESULT_DIRS[ file_type[1] ]
113
+ r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
114
+ file_type[2].each_with_index do |ext, i|
115
+ gz = file[i] =~ /\.gz/ ? '.gz' : ''
116
+ FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
117
+ $stderr.puts " file: #{file[i]}" unless o[:q]
118
+ end
119
+ File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
79
120
  end
80
- # :assembly
81
- cp_result(o, d, p, :assembly, :assembly, %w[.LargeContigs.fna])
121
+
122
+ d = add_metadata(o, d)
123
+ d.save
124
+ p.add_dataset(name)
125
+ res = d.first_preprocessing(true)
126
+ $stderr.puts " result: #{res}" unless o[:q]
82
127
  end
83
128
 
84
- d = add_metadata(o, d)
85
- d.save
86
- p.add_dataset(o[:dataset]) unless o[:update]
87
- res = d.first_preprocessing(true)
88
- $stderr.puts "- #{res}" unless o[:q]
89
-
90
129
  $stderr.puts 'Done.' unless o[:q]
data/bin/miga CHANGED
@@ -84,11 +84,11 @@ end
84
84
  ##=> Functions <=
85
85
 
86
86
  # OptParse banner
87
- def opt_banner(opt)
87
+ def opt_banner(opt, files = false)
88
88
  opt.banner = <<BAN
89
89
  #{$task_desc[$task]}
90
90
 
91
- Usage: #{$0} #{$task} [options]
91
+ Usage: #{$0} #{$task} [options]#{ ' FILES...' if files}
92
92
  BAN
93
93
  opt.separator ""
94
94
  end
@@ -10,22 +10,22 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 13, 1]
13
+ VERSION = [0.4, 0, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
17
- VERSION_NAME = 'tinge'
17
+ VERSION_NAME = 'aquatint'
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2019, 8, 8)
21
+ VERSION_DATE = Date.new(2019, 8, 15)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
25
- CITATION = "Rodriguez-R et al (2018). " +
26
- "The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene " +
27
- "diversity analysis of Archaea and Bacteria at the whole genome level. " +
28
- "Nucleic Acids Research 46(W1):W282-W288. doi:10.1093/nar/gky467."
25
+ CITATION = 'Rodriguez-R et al (2018). ' \
26
+ 'The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene ' \
27
+ 'diversity analysis of Archaea and Bacteria at the whole genome level. ' \
28
+ 'Nucleic Acids Research 46(W1):W282-W288. doi:10.1093/nar/gky467.'
29
29
 
30
30
  end
31
31
 
@@ -39,12 +39,12 @@ class MiGA::MiGA
39
39
 
40
40
  ##
41
41
  # Complete version as string.
42
- def self.FULL_VERSION ; VERSION.join(".") ; end
42
+ def self.FULL_VERSION ; VERSION.join('.') ; end
43
43
 
44
44
  ##
45
45
  # Complete version with nickname and date as string.
46
46
  def self.LONG_VERSION
47
- "MiGA #{VERSION.join(".")} - #{VERSION_NAME} - #{VERSION_DATE}"
47
+ "MiGA #{VERSION.join('.')} - #{VERSION_NAME} - #{VERSION_DATE}"
48
48
  end
49
49
 
50
50
  ##
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
1
+ ../../Scripts/FastA.N50.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
1
+ ../../Scripts/FastA.filterN.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
1
+ ../../Scripts/FastA.length.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
1
+ ../../Scripts/FastA.split.pl
@@ -1 +1 @@
1
- utils/enveomics/Scripts/lib/../../enveomics.R
1
+ ../../enveomics.R
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.13.1
4
+ version: 0.4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-08 00:00:00.000000000 Z
11
+ date: 2019-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -509,8 +509,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
509
509
  - !ruby/object:Gem::Version
510
510
  version: '0'
511
511
  requirements: []
512
- rubyforge_project:
513
- rubygems_version: 2.7.6
512
+ rubygems_version: 3.0.3
514
513
  signing_key:
515
514
  specification_version: 4
516
515
  summary: MiGA