miga-base 0.3.13.1 → 0.4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/actions/add.rb +96 -57
- data/bin/miga +2 -2
- data/lib/miga/version.rb +9 -9
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37bf8085cc9b33f88367c8134cd2b01115cfcd9e9116fab7ba3f93abc33c6d6f
|
4
|
+
data.tar.gz: bdefeaa9f965ea991071a70b48239ad7129763bcb05f0cc2ebcd6d66511bd3e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e1463de777f3d77bcb8f804bdfedeb3781ccf0b36330faf21d501b1a87cf9b55f0d03f2668a1889ae165cbaaf2030ab955ad8c0f42a5aaad341a6eb7154b356
|
7
|
+
data.tar.gz: 6738493c3a716f553fea6486841f612616bad38cc4091103ec490417336df327ae03046a04907b9dcaed9c5d81e17ab918d64d841a67a50b23565dc46308736b
|
data/actions/add.rb
CHANGED
@@ -3,10 +3,32 @@
|
|
3
3
|
# @package MiGA
|
4
4
|
# @license Artistic-2.0
|
5
5
|
|
6
|
-
|
6
|
+
input_types = {
|
7
|
+
raw_reads_single:
|
8
|
+
['Single raw reads in a single FastQ file',
|
9
|
+
:raw_reads, %w[.1.fastq]],
|
10
|
+
raw_reads_paired:
|
11
|
+
['Paired raw reads in two FastQ files',
|
12
|
+
:raw_reads, %w[.1.fastq .2.fastq]],
|
13
|
+
trimmed_reads_single:
|
14
|
+
['Single trimmed reads in a single FastA file',
|
15
|
+
:trimmed_fasta, %w[.SingleReads.fa]],
|
16
|
+
trimmed_reads_paired:
|
17
|
+
['Paired trimmed reads in two FastA files',
|
18
|
+
:trimmed_fasta, %w[.1.fasta .2.fasta]],
|
19
|
+
trimmed_reads_interleaved:
|
20
|
+
['Paired trimmed reads in a single FastA file',
|
21
|
+
:trimmed_fasta, %w[.CoupledReads.fa]],
|
22
|
+
assembly:
|
23
|
+
['Assembled contigs or scaffolds in FastA format',
|
24
|
+
:assembly, %w[.LargeContigs.fna]]
|
25
|
+
}
|
26
|
+
|
27
|
+
o = {q: true, ref: true, ignore_dups: false,
|
28
|
+
regexp: /^(?:.*\/)?(.+?)(?:\..*(?:[12]|Reads|Contigs))?(?i:\.f[nastq]+)?$/}
|
7
29
|
OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :
|
30
|
+
opt_banner(opt, true)
|
31
|
+
opt_object(opt, o, [:project, :dataset_opt, :dataset_type_req])
|
10
32
|
opt.on('-q', '--query',
|
11
33
|
'If set, the dataset is registered as a query, not a reference dataset.'
|
12
34
|
){ |v| o[:ref] = !v }
|
@@ -18,73 +40,90 @@ OptionParser.new do |opt|
|
|
18
40
|
'Metadata as key-value pairs separated by = and delimited by comma.',
|
19
41
|
'Values are saved as strings except for booleans (true / false) or nil.'
|
20
42
|
){ |v| o[:metadata] = v }
|
21
|
-
opt.on('
|
22
|
-
'
|
43
|
+
opt.on('-r', '--name-regexp REGEXP', Regexp,
|
44
|
+
'Regular expression indicating how to extract the name from the file path.',
|
45
|
+
"By default: '#{o[:regexp]}'"){ |v| o[:regexp] = v }
|
46
|
+
opt.on('-i', '--input-type STRING',
|
47
|
+
'Type of input data, one of the following:',
|
48
|
+
*input_types.map{ |k,v| "~ #{k}: #{v[0]}." }
|
49
|
+
){ |v| o[:input_type] = v.downcase.to_sym }
|
50
|
+
opt.on('--ignore-dups', 'Continue with a warning if a dataset already exists.'
|
51
|
+
){ |v| o[:ignore_dups] = v }
|
23
52
|
opt_common(opt, o)
|
53
|
+
|
54
|
+
opt.separator 'You can create multiple datasets with a single command, ' \
|
55
|
+
'simply pass all the files at the end (FILES...).'
|
56
|
+
opt.separator 'If -D is passed, only one dataset will be added. ' \
|
57
|
+
'Otherwise, dataset names will be determined by the file paths (-r).'
|
24
58
|
opt.separator ''
|
25
|
-
opt.separator 'External input data'
|
26
|
-
opt.on('--raw-reads FILE1,FILE2', Array,
|
27
|
-
'Comma-delimited paths to the raw reads in FastQ format.',
|
28
|
-
'One file is assumed to be single reads, two are assumed to be paired.'
|
29
|
-
){ |v| o[:raw_reads] = v }
|
30
|
-
opt.on('--trimmed-fasta-single FILE', Array,
|
31
|
-
'Path to the single trimmed reads in FastA format.'
|
32
|
-
){ |v| o[:trimmed_fasta_s] = v }
|
33
|
-
opt.on('--trimmed-fasta-coupled FILE1,FILE2', Array,
|
34
|
-
'Comma-delimited paths to the coupled trimmed reads in FastA format.',
|
35
|
-
'One file is assumed to be interposed, two are assumed to contain sisters.'
|
36
|
-
){ |v| o[:trimmed_fasta_c] = v }
|
37
|
-
opt.on('--assembly FILE', Array,
|
38
|
-
'Path to the contigs (or scaffolds) of the assembly in FastA format.'
|
39
|
-
){ |v| o[:assembly] = v }
|
40
59
|
end.parse!
|
41
60
|
|
42
61
|
##=> Main <=
|
43
|
-
opt_require(o)
|
44
|
-
|
62
|
+
opt_require(o, project: '-P')
|
63
|
+
files = ARGV
|
64
|
+
file_type = nil
|
65
|
+
if files.empty?
|
66
|
+
opt_require_type(o, MiGA::Dataset)
|
67
|
+
files = [nil]
|
68
|
+
else
|
69
|
+
raise 'Please specify input type (-i).' if o[:input_type].nil?
|
70
|
+
file_type = input_types[o[:input_type]]
|
71
|
+
raise "Unrecognized input type: #{o[:input_type]}." if file_type.nil?
|
72
|
+
raise 'Some files are duplicated, files must be unique.' if
|
73
|
+
files.size != files.uniq.size
|
74
|
+
if o[:input_type].to_s =~ /_paired$/
|
75
|
+
raise 'Odd number of files incompatible with input type.' if files.size.odd?
|
76
|
+
files = Hash[*files].to_a
|
77
|
+
else
|
78
|
+
files = files.map{ |i| [i] }
|
79
|
+
end
|
80
|
+
raise 'The dataset name (-D) can only be specified with one input file.' if
|
81
|
+
files.size > 1 and not o[:dataset].nil?
|
82
|
+
end
|
45
83
|
|
46
84
|
$stderr.puts 'Loading project.' unless o[:q]
|
47
85
|
p = MiGA::Project.load(o[:project])
|
48
86
|
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
49
87
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
88
|
+
$stderr.puts 'Creating datasets:' unless o[:q]
|
89
|
+
files.each do |file|
|
90
|
+
name = o[:dataset]
|
91
|
+
if name.nil?
|
92
|
+
ref_file = file.is_a?(Array) ? file.first : file
|
93
|
+
m = o[:regexp].match(ref_file)
|
94
|
+
raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
|
95
|
+
name = m[1].miga_name
|
96
|
+
end
|
56
97
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
gz = o[sym][i] =~ /\.gz/ ? '.gz' : ''
|
65
|
-
FileUtils.cp(o[sym][i], "#{r_path}#{ext[i]}#{gz}")
|
98
|
+
if MiGA::Dataset.exist?(p, name)
|
99
|
+
if o[:ignore_dups]
|
100
|
+
warn "Dataset already exists: #{name}."
|
101
|
+
next
|
102
|
+
else
|
103
|
+
raise "Dataset already exists: #{name}."
|
104
|
+
end
|
66
105
|
end
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
106
|
+
|
107
|
+
$stderr.puts "o #{name}" unless o[:q]
|
108
|
+
d = MiGA::Dataset.new(p, name, o[:ref])
|
109
|
+
raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
|
110
|
+
|
111
|
+
unless file.nil?
|
112
|
+
r_dir = MiGA::Dataset.RESULT_DIRS[ file_type[1] ]
|
113
|
+
r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
|
114
|
+
file_type[2].each_with_index do |ext, i|
|
115
|
+
gz = file[i] =~ /\.gz/ ? '.gz' : ''
|
116
|
+
FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
|
117
|
+
$stderr.puts " file: #{file[i]}" unless o[:q]
|
118
|
+
end
|
119
|
+
File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
|
79
120
|
end
|
80
|
-
|
81
|
-
|
121
|
+
|
122
|
+
d = add_metadata(o, d)
|
123
|
+
d.save
|
124
|
+
p.add_dataset(name)
|
125
|
+
res = d.first_preprocessing(true)
|
126
|
+
$stderr.puts " result: #{res}" unless o[:q]
|
82
127
|
end
|
83
128
|
|
84
|
-
d = add_metadata(o, d)
|
85
|
-
d.save
|
86
|
-
p.add_dataset(o[:dataset]) unless o[:update]
|
87
|
-
res = d.first_preprocessing(true)
|
88
|
-
$stderr.puts "- #{res}" unless o[:q]
|
89
|
-
|
90
129
|
$stderr.puts 'Done.' unless o[:q]
|
data/bin/miga
CHANGED
@@ -84,11 +84,11 @@ end
|
|
84
84
|
##=> Functions <=
|
85
85
|
|
86
86
|
# OptParse banner
|
87
|
-
def opt_banner(opt)
|
87
|
+
def opt_banner(opt, files = false)
|
88
88
|
opt.banner = <<BAN
|
89
89
|
#{$task_desc[$task]}
|
90
90
|
|
91
|
-
Usage: #{$0} #{$task} [options]
|
91
|
+
Usage: #{$0} #{$task} [options]#{ ' FILES...' if files}
|
92
92
|
BAN
|
93
93
|
opt.separator ""
|
94
94
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -10,22 +10,22 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.
|
13
|
+
VERSION = [0.4, 0, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
17
|
-
VERSION_NAME = '
|
17
|
+
VERSION_NAME = 'aquatint'
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2019, 8,
|
21
|
+
VERSION_DATE = Date.new(2019, 8, 15)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
25
|
-
CITATION =
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
CITATION = 'Rodriguez-R et al (2018). ' \
|
26
|
+
'The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene ' \
|
27
|
+
'diversity analysis of Archaea and Bacteria at the whole genome level. ' \
|
28
|
+
'Nucleic Acids Research 46(W1):W282-W288. doi:10.1093/nar/gky467.'
|
29
29
|
|
30
30
|
end
|
31
31
|
|
@@ -39,12 +39,12 @@ class MiGA::MiGA
|
|
39
39
|
|
40
40
|
##
|
41
41
|
# Complete version as string.
|
42
|
-
def self.FULL_VERSION ; VERSION.join(
|
42
|
+
def self.FULL_VERSION ; VERSION.join('.') ; end
|
43
43
|
|
44
44
|
##
|
45
45
|
# Complete version with nickname and date as string.
|
46
46
|
def self.LONG_VERSION
|
47
|
-
"MiGA #{VERSION.join(
|
47
|
+
"MiGA #{VERSION.join('.')} - #{VERSION_NAME} - #{VERSION_DATE}"
|
48
48
|
end
|
49
49
|
|
50
50
|
##
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.N50.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.filterN.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.length.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.split.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../enveomics.R
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -509,8 +509,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
509
509
|
- !ruby/object:Gem::Version
|
510
510
|
version: '0'
|
511
511
|
requirements: []
|
512
|
-
|
513
|
-
rubygems_version: 2.7.6
|
512
|
+
rubygems_version: 3.0.3
|
514
513
|
signing_key:
|
515
514
|
specification_version: 4
|
516
515
|
summary: MiGA
|