miga-base 0.3.13.1 → 0.4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/add.rb +96 -57
- data/bin/miga +2 -2
- data/lib/miga/version.rb +9 -9
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37bf8085cc9b33f88367c8134cd2b01115cfcd9e9116fab7ba3f93abc33c6d6f
|
4
|
+
data.tar.gz: bdefeaa9f965ea991071a70b48239ad7129763bcb05f0cc2ebcd6d66511bd3e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e1463de777f3d77bcb8f804bdfedeb3781ccf0b36330faf21d501b1a87cf9b55f0d03f2668a1889ae165cbaaf2030ab955ad8c0f42a5aaad341a6eb7154b356
|
7
|
+
data.tar.gz: 6738493c3a716f553fea6486841f612616bad38cc4091103ec490417336df327ae03046a04907b9dcaed9c5d81e17ab918d64d841a67a50b23565dc46308736b
|
data/actions/add.rb
CHANGED
@@ -3,10 +3,32 @@
|
|
3
3
|
# @package MiGA
|
4
4
|
# @license Artistic-2.0
|
5
5
|
|
6
|
-
|
6
|
+
input_types = {
|
7
|
+
raw_reads_single:
|
8
|
+
['Single raw reads in a single FastQ file',
|
9
|
+
:raw_reads, %w[.1.fastq]],
|
10
|
+
raw_reads_paired:
|
11
|
+
['Paired raw reads in two FastQ files',
|
12
|
+
:raw_reads, %w[.1.fastq .2.fastq]],
|
13
|
+
trimmed_reads_single:
|
14
|
+
['Single trimmed reads in a single FastA file',
|
15
|
+
:trimmed_fasta, %w[.SingleReads.fa]],
|
16
|
+
trimmed_reads_paired:
|
17
|
+
['Paired trimmed reads in two FastA files',
|
18
|
+
:trimmed_fasta, %w[.1.fasta .2.fasta]],
|
19
|
+
trimmed_reads_interleaved:
|
20
|
+
['Paired trimmed reads in a single FastA file',
|
21
|
+
:trimmed_fasta, %w[.CoupledReads.fa]],
|
22
|
+
assembly:
|
23
|
+
['Assembled contigs or scaffolds in FastA format',
|
24
|
+
:assembly, %w[.LargeContigs.fna]]
|
25
|
+
}
|
26
|
+
|
27
|
+
o = {q: true, ref: true, ignore_dups: false,
|
28
|
+
regexp: /^(?:.*\/)?(.+?)(?:\..*(?:[12]|Reads|Contigs))?(?i:\.f[nastq]+)?$/}
|
7
29
|
OptionParser.new do |opt|
|
8
|
-
opt_banner(opt)
|
9
|
-
opt_object(opt, o, [:project, :
|
30
|
+
opt_banner(opt, true)
|
31
|
+
opt_object(opt, o, [:project, :dataset_opt, :dataset_type_req])
|
10
32
|
opt.on('-q', '--query',
|
11
33
|
'If set, the dataset is registered as a query, not a reference dataset.'
|
12
34
|
){ |v| o[:ref] = !v }
|
@@ -18,73 +40,90 @@ OptionParser.new do |opt|
|
|
18
40
|
'Metadata as key-value pairs separated by = and delimited by comma.',
|
19
41
|
'Values are saved as strings except for booleans (true / false) or nil.'
|
20
42
|
){ |v| o[:metadata] = v }
|
21
|
-
opt.on('
|
22
|
-
'
|
43
|
+
opt.on('-r', '--name-regexp REGEXP', Regexp,
|
44
|
+
'Regular expression indicating how to extract the name from the file path.',
|
45
|
+
"By default: '#{o[:regexp]}'"){ |v| o[:regexp] = v }
|
46
|
+
opt.on('-i', '--input-type STRING',
|
47
|
+
'Type of input data, one of the following:',
|
48
|
+
*input_types.map{ |k,v| "~ #{k}: #{v[0]}." }
|
49
|
+
){ |v| o[:input_type] = v.downcase.to_sym }
|
50
|
+
opt.on('--ignore-dups', 'Continue with a warning if a dataset already exists.'
|
51
|
+
){ |v| o[:ignore_dups] = v }
|
23
52
|
opt_common(opt, o)
|
53
|
+
|
54
|
+
opt.separator 'You can create multiple datasets with a single command, ' \
|
55
|
+
'simply pass all the files at the end (FILES...).'
|
56
|
+
opt.separator 'If -D is passed, only one dataset will be added. ' \
|
57
|
+
'Otherwise, dataset names will be determined by the file paths (-r).'
|
24
58
|
opt.separator ''
|
25
|
-
opt.separator 'External input data'
|
26
|
-
opt.on('--raw-reads FILE1,FILE2', Array,
|
27
|
-
'Comma-delimited paths to the raw reads in FastQ format.',
|
28
|
-
'One file is assumed to be single reads, two are assumed to be paired.'
|
29
|
-
){ |v| o[:raw_reads] = v }
|
30
|
-
opt.on('--trimmed-fasta-single FILE', Array,
|
31
|
-
'Path to the single trimmed reads in FastA format.'
|
32
|
-
){ |v| o[:trimmed_fasta_s] = v }
|
33
|
-
opt.on('--trimmed-fasta-coupled FILE1,FILE2', Array,
|
34
|
-
'Comma-delimited paths to the coupled trimmed reads in FastA format.',
|
35
|
-
'One file is assumed to be interposed, two are assumed to contain sisters.'
|
36
|
-
){ |v| o[:trimmed_fasta_c] = v }
|
37
|
-
opt.on('--assembly FILE', Array,
|
38
|
-
'Path to the contigs (or scaffolds) of the assembly in FastA format.'
|
39
|
-
){ |v| o[:assembly] = v }
|
40
59
|
end.parse!
|
41
60
|
|
42
61
|
##=> Main <=
|
43
|
-
opt_require(o)
|
44
|
-
|
62
|
+
opt_require(o, project: '-P')
|
63
|
+
files = ARGV
|
64
|
+
file_type = nil
|
65
|
+
if files.empty?
|
66
|
+
opt_require_type(o, MiGA::Dataset)
|
67
|
+
files = [nil]
|
68
|
+
else
|
69
|
+
raise 'Please specify input type (-i).' if o[:input_type].nil?
|
70
|
+
file_type = input_types[o[:input_type]]
|
71
|
+
raise "Unrecognized input type: #{o[:input_type]}." if file_type.nil?
|
72
|
+
raise 'Some files are duplicated, files must be unique.' if
|
73
|
+
files.size != files.uniq.size
|
74
|
+
if o[:input_type].to_s =~ /_paired$/
|
75
|
+
raise 'Odd number of files incompatible with input type.' if files.size.odd?
|
76
|
+
files = Hash[*files].to_a
|
77
|
+
else
|
78
|
+
files = files.map{ |i| [i] }
|
79
|
+
end
|
80
|
+
raise 'The dataset name (-D) can only be specified with one input file.' if
|
81
|
+
files.size > 1 and not o[:dataset].nil?
|
82
|
+
end
|
45
83
|
|
46
84
|
$stderr.puts 'Loading project.' unless o[:q]
|
47
85
|
p = MiGA::Project.load(o[:project])
|
48
86
|
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
49
87
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
88
|
+
$stderr.puts 'Creating datasets:' unless o[:q]
|
89
|
+
files.each do |file|
|
90
|
+
name = o[:dataset]
|
91
|
+
if name.nil?
|
92
|
+
ref_file = file.is_a?(Array) ? file.first : file
|
93
|
+
m = o[:regexp].match(ref_file)
|
94
|
+
raise "Cannot extract name from file: #{ref_file}" if m.nil? or m[1].nil?
|
95
|
+
name = m[1].miga_name
|
96
|
+
end
|
56
97
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
gz = o[sym][i] =~ /\.gz/ ? '.gz' : ''
|
65
|
-
FileUtils.cp(o[sym][i], "#{r_path}#{ext[i]}#{gz}")
|
98
|
+
if MiGA::Dataset.exist?(p, name)
|
99
|
+
if o[:ignore_dups]
|
100
|
+
warn "Dataset already exists: #{name}."
|
101
|
+
next
|
102
|
+
else
|
103
|
+
raise "Dataset already exists: #{name}."
|
104
|
+
end
|
66
105
|
end
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
106
|
+
|
107
|
+
$stderr.puts "o #{name}" unless o[:q]
|
108
|
+
d = MiGA::Dataset.new(p, name, o[:ref])
|
109
|
+
raise "Unexpected: Couldn't create dataset: #{name}." if d.nil?
|
110
|
+
|
111
|
+
unless file.nil?
|
112
|
+
r_dir = MiGA::Dataset.RESULT_DIRS[ file_type[1] ]
|
113
|
+
r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
|
114
|
+
file_type[2].each_with_index do |ext, i|
|
115
|
+
gz = file[i] =~ /\.gz/ ? '.gz' : ''
|
116
|
+
FileUtils.cp(file[i], "#{r_path}#{ext}#{gz}")
|
117
|
+
$stderr.puts " file: #{file[i]}" unless o[:q]
|
118
|
+
end
|
119
|
+
File.open("#{r_path}.done", 'w') { |f| f.print Time.now.to_s }
|
79
120
|
end
|
80
|
-
|
81
|
-
|
121
|
+
|
122
|
+
d = add_metadata(o, d)
|
123
|
+
d.save
|
124
|
+
p.add_dataset(name)
|
125
|
+
res = d.first_preprocessing(true)
|
126
|
+
$stderr.puts " result: #{res}" unless o[:q]
|
82
127
|
end
|
83
128
|
|
84
|
-
d = add_metadata(o, d)
|
85
|
-
d.save
|
86
|
-
p.add_dataset(o[:dataset]) unless o[:update]
|
87
|
-
res = d.first_preprocessing(true)
|
88
|
-
$stderr.puts "- #{res}" unless o[:q]
|
89
|
-
|
90
129
|
$stderr.puts 'Done.' unless o[:q]
|
data/bin/miga
CHANGED
@@ -84,11 +84,11 @@ end
|
|
84
84
|
##=> Functions <=
|
85
85
|
|
86
86
|
# OptParse banner
|
87
|
-
def opt_banner(opt)
|
87
|
+
def opt_banner(opt, files = false)
|
88
88
|
opt.banner = <<BAN
|
89
89
|
#{$task_desc[$task]}
|
90
90
|
|
91
|
-
Usage: #{$0} #{$task} [options]
|
91
|
+
Usage: #{$0} #{$task} [options]#{ ' FILES...' if files}
|
92
92
|
BAN
|
93
93
|
opt.separator ""
|
94
94
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -10,22 +10,22 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.
|
13
|
+
VERSION = [0.4, 0, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
17
|
-
VERSION_NAME = '
|
17
|
+
VERSION_NAME = 'aquatint'
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2019, 8,
|
21
|
+
VERSION_DATE = Date.new(2019, 8, 15)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
25
|
-
CITATION =
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
CITATION = 'Rodriguez-R et al (2018). ' \
|
26
|
+
'The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene ' \
|
27
|
+
'diversity analysis of Archaea and Bacteria at the whole genome level. ' \
|
28
|
+
'Nucleic Acids Research 46(W1):W282-W288. doi:10.1093/nar/gky467.'
|
29
29
|
|
30
30
|
end
|
31
31
|
|
@@ -39,12 +39,12 @@ class MiGA::MiGA
|
|
39
39
|
|
40
40
|
##
|
41
41
|
# Complete version as string.
|
42
|
-
def self.FULL_VERSION ; VERSION.join(
|
42
|
+
def self.FULL_VERSION ; VERSION.join('.') ; end
|
43
43
|
|
44
44
|
##
|
45
45
|
# Complete version with nickname and date as string.
|
46
46
|
def self.LONG_VERSION
|
47
|
-
"MiGA #{VERSION.join(
|
47
|
+
"MiGA #{VERSION.join('.')} - #{VERSION_NAME} - #{VERSION_DATE}"
|
48
48
|
end
|
49
49
|
|
50
50
|
##
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.N50.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.filterN.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.length.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.split.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../enveomics.R
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -509,8 +509,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
509
509
|
- !ruby/object:Gem::Version
|
510
510
|
version: '0'
|
511
511
|
requirements: []
|
512
|
-
|
513
|
-
rubygems_version: 2.7.6
|
512
|
+
rubygems_version: 3.0.3
|
514
513
|
signing_key:
|
515
514
|
specification_version: 4
|
516
515
|
summary: MiGA
|