miga-base 0.3.6.0 → 0.3.6.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 8b285b9906876a9f1b5366f929a4776d1689dbc1
4
- data.tar.gz: 83b6843d00417fef4a8de18e4a102ad4d1899f0d
2
+ SHA256:
3
+ metadata.gz: c629b49cedd42f76fd8c466ecbc561e915dcaeef9dbbc2140f66300ac21c4e86
4
+ data.tar.gz: 2174cd7e010340ea865b7ec251a9d8b2823a059bbcec782924052a5da0c0a247
5
5
  SHA512:
6
- metadata.gz: acfa6eb243f7fa8985cb649ab3b701db68515e8a18c221d94cb149e51cddeec49642b6176da46956765cca8f1961aa88d71b5cb9625131cf64f9287e79e173c6
7
- data.tar.gz: 4ac1f2f81854959679b53d4865efba3a36ddca14216ff9f2aef06e1c27b7b415b47c833387e73ddf3cae41369f382146f1041b9a833743d58d3aad0a954bd1ab
6
+ metadata.gz: f20e4c7312402beec67de7a458356f76bd932edbeadffdee83061d040c8eaaddf31ada6304873638237ca299b806054e7de9656ecfebaeaa7e8e5ddb83710a93
7
+ data.tar.gz: 5dcae9006b7b84d75ce05019f9ac3f6defe4305c52f574a7c3426a3f8ee098ee4cd0ecf0968d5669066886fbc77185cd477ea265df5509eaf66733e2d4dfb421
data/actions/stats.rb CHANGED
@@ -127,8 +127,6 @@ if o[:compute]
127
127
  end
128
128
  d.save
129
129
  end
130
- when :distances
131
- d.cleanup_distances! unless d.nil?
132
130
  else
133
131
  stats = nil
134
132
  end
data/lib/miga/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
- require "date"
2
+ require 'date'
3
3
 
4
4
  ##
5
5
  # High-level minimal requirements for the MiGA::MiGA class.
@@ -10,11 +10,11 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 6, 0]
13
+ VERSION = [0.3, 6, 1]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
17
- VERSION_NAME = "tinge"
17
+ VERSION_NAME = 'tinge'
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
@@ -11,6 +11,9 @@ cd "$PROJECT/data/10.clades/01.find"
11
11
  # Initialize
12
12
  miga date > "miga-project.start"
13
13
 
14
+ # Cleanup databases
15
+ ruby -I "$MIGA/lib" "$MIGA/utils/cleanup-databases.rb" "$PROJECT" "$CORES"
16
+
14
17
  # Run
15
18
  ruby -I "$MIGA/lib" "$MIGA/utils/subclades.rb" "$PROJECT" "$SCRIPT"
16
19
 
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'thread'
4
+ require 'miga'
5
+
6
+ ARGV[1] or abort "Usage: #{$0} path/to/project threads"
7
+
8
+ $stderr.puts "Cleaning databases..."
9
+ ds_list = MiGA::Project.load(ARGV[0]).datasets.
10
+ select(&:is_ref?).select(&:is_active?)
11
+
12
+ thr = ARGV[1].to_i
13
+
14
+ (0 .. thr-1).each do |t|
15
+ fork do
16
+ k = -1
17
+ ds_list.each do |i|
18
+ k = (k+1) % thr
19
+ next unless k == t
20
+ i.cleanup_distances!
21
+ end
22
+ end
23
+ end
24
+ Process.waitall
25
+
@@ -0,0 +1 @@
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
@@ -0,0 +1 @@
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
@@ -0,0 +1 @@
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
@@ -0,0 +1 @@
1
+ utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
@@ -0,0 +1 @@
1
+ utils/enveomics/Scripts/lib/../../enveomics.R
@@ -26,16 +26,18 @@ module MiGA::SubcladeRunner::Pipeline
26
26
  `ogs.mcl.rb -o '#{ogs_file}.tmp' --abc '#{abc_path}' -t '#{opts[:thr]}'`
27
27
  File.open(ogs_file, 'w') do |fh|
28
28
  File.foreach("#{ogs_file}.tmp").with_index do |ln, lno|
29
- fh.puts ln if lno != 0
29
+ fh.puts ln if lno > 0
30
30
  end
31
31
  end
32
32
  File.unlink "#{ogs_file}.tmp"
33
33
  end
34
34
 
35
35
  # Find species medoids
36
- src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
37
- `Rscript '#{src}' miga-project.dist.rdata \
38
- miga-project.ani95-medoids miga-project.ani95-clades`
36
+ if File.size? 'miga-project.dist.rdata'
37
+ src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
38
+ `Rscript '#{src}' miga-project.dist.rdata \
39
+ miga-project.ani95-medoids miga-project.ani95-clades`
40
+ end
39
41
 
40
42
  # Propose clades
41
43
  ofh = File.open('miga-project.proposed-clades', 'w')
data/utils/subclades.R CHANGED
@@ -22,11 +22,11 @@ subclades <- function(ani_file, out_base, thr = 1, ani.d = dist(0), sel = NA) {
22
22
  if(length(ani.d) == 0 && !file.exists(dist_rdata)){
23
23
  # Read from ani_file
24
24
  a <- read.table(gzfile(ani_file), sep = '\t', header = TRUE, as.is = TRUE)
25
- if(nrow(a)==0){
25
+ if(nrow(a) == 0){
26
26
  generate_empty_files(out_base)
27
27
  return(NULL)
28
28
  }
29
- if(!is.na(sel) and file.exists(sel)){
29
+ if(!is.na(sel) && file.exists(sel)){
30
30
  say('Filter selection')
31
31
  lab <- read.table(sel, sep='\t', head=FALSE, as.is=TRUE)[,1]
32
32
  a <- a[a$a %in% lab & a$b %in% lab, ]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6.0
4
+ version: 0.3.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
@@ -188,6 +188,7 @@ files:
188
188
  - test/test_helper.rb
189
189
  - utils/adapters.fa
190
190
  - utils/arch-ess-genes.rb
191
+ - utils/cleanup-databases.rb
191
192
  - utils/core-pan-plot.R
192
193
  - utils/distance/base.rb
193
194
  - utils/distance/commands.rb
@@ -341,6 +342,7 @@ files:
341
342
  - utils/enveomics/Scripts/gi2tax.rb
342
343
  - utils/enveomics/Scripts/in_silico_GA_GI.pl
343
344
  - utils/enveomics/Scripts/lib/data/essential.hmm.gz
345
+ - utils/enveomics/Scripts/lib/enveomics.R
344
346
  - utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb
345
347
  - utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb
346
348
  - utils/enveomics/Scripts/lib/enveomics_rb/og.rb
@@ -495,7 +497,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
495
497
  version: '0'
496
498
  requirements: []
497
499
  rubyforge_project:
498
- rubygems_version: 2.2.2
500
+ rubygems_version: 2.7.7
499
501
  signing_key:
500
502
  specification_version: 4
501
503
  summary: MiGA
@@ -1,56 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update: Oct 07 2015
5
- # @license: artistic license 2.0
6
- #
7
- use strict;
8
- use warnings;
9
- use List::Util qw/sum min max/;
10
-
11
- my ($seqs, $minlen, $n__) = @ARGV;
12
- $seqs or die "
13
- Description:
14
- Calculates the N50 value of a set of sequences. Alternatively, it
15
- can calculate other N** values. It also calculates the total number
16
- of sequences and the total added length.
17
-
18
- Usage:
19
- $0 seqs.fa[ minlen[ **]]
20
-
21
- seqs.fa A FastA file containing the sequences.
22
- minlen (optional) The minimum length to take into consideration.
23
- By default: 0.
24
- ** Value N** to calculate. By default: 50 (N50).
25
- ";
26
- $minlen ||= 0;
27
- $n__ ||= 50;
28
-
29
- my @len = ();
30
- open SEQ, "<", $seqs or die "Cannot open file: $seqs: $!\n";
31
- while(<SEQ>){
32
- if(/^>/){
33
- push @len, 0;
34
- }else{
35
- next if /^;/;
36
- chomp;
37
- s/\W//g;
38
- $len[-1]+=length $_;
39
- }
40
- }
41
- close SEQ;
42
- @len = sort { $a <=> $b } map { $_>=$minlen?$_:() } @len;
43
- my $tot = (sum(@len) || 0);
44
-
45
- my $thr = $n__*$tot/100;
46
- my $pos = 0;
47
- for(@len){
48
- $pos+= $_;
49
- if($pos>=$thr){
50
- print "N$n__: $_\n";
51
- last;
52
- }
53
- }
54
- print "Sequences: ".scalar(@len)."\n";
55
- print "Total length: $tot\n";
56
-
@@ -1,60 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Oct-07-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
-
11
- my($file, $content, $stretch) = @ARGV;
12
- $file or die <<HELP
13
-
14
- Description:
15
- Filter sequences by N-content and presence of long homopolymers.
16
- Usage:
17
- $0 sequences.fa [content [stretch]] > filtered.fa
18
- Where:
19
- sequences.fa Input file in FastA format
20
- content A number between 0 and 1 indicating the maximum proportion of Ns
21
- (1 to turn off, 0.5 by default)
22
- stretch A number indicating the maximum number of consecutive identical
23
- nucleotides allowed (0 to turn off, 100 by default)
24
- filtered.fa Filtered set of sequences.
25
-
26
- HELP
27
- ;
28
- ($content ||= 0.5)+=0;
29
- ($stretch ||= 100)+=0;
30
-
31
- my $good = 0;
32
- my $N = 0;
33
-
34
- FASTA: {
35
- local $/ = "\n>";
36
- open FILE, "<", $file or die "I can not open the file: $file: $!\n";
37
- SEQ: while(<FILE>){
38
- $N++;
39
- s/^;.*//gm;
40
- s/>//g;
41
- my($n,$s) = split /\n/, $_, 2;
42
- (my $clean = $s) =~ s/[^ACTGN]//g;
43
- if($content < 1){
44
- (my $Ns = $clean) =~ s/[^N]//g;
45
- next SEQ if length($Ns)>length($clean)*$content;
46
- }
47
- if($stretch > 0){
48
- for my $nuc (qw(A C T G N)){
49
- next SEQ if $clean =~ m/[$nuc]{$stretch}/;
50
- }
51
- }
52
- print ">$n\n$s\n";
53
- $good++;
54
- }
55
- close FILE;
56
- print STDERR "Total sequences: $N\nAfter filtering: $good\n";
57
- }
58
-
59
-
60
-
@@ -1,38 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M Rodriguez-R
4
- # @update Oct-07-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
-
11
- $#ARGV>=0 or die "
12
- Usage:
13
- $0 seqs.fa... > length.txt
14
-
15
- seqs.fa One or more FastA files.
16
- length.txt A table with the lengths of the sequences.
17
-
18
- ";
19
-
20
- for my $fa (@ARGV){
21
- open FA, "<", $fa or die "Cannot open file: $fa: $!\n";
22
- my $def = '';
23
- my $len = 0;
24
- while(<FA>){
25
- next if /^;/;
26
- if(m/^>(\S+)\s?/){
27
- print "$def\t$len\n" if $def;
28
- $def = $1;
29
- $len = 0;
30
- }else{
31
- s/[^A-Za-z]//g;
32
- $len+= length $_;
33
- }
34
- }
35
- print "$def\t$len\n" if $def;
36
- close FA;
37
- }
38
-
@@ -1,55 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Oct-13-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Symbol;
11
-
12
- my ($file, $base, $outN) = @ARGV;
13
-
14
- $outN ||= 12;
15
- ($file and $base) or die "
16
- Usage
17
- $0 in_file.fa out_base[ no_files]
18
-
19
- in_file.fa Input file in FastA format.
20
- out_base Prefix for the name of the output files. It will
21
- be appended with .<i>.fa, where <i> is a consecutive
22
- number starting in 1.
23
- no_files Number of files to generate. By default: 12.
24
-
25
- ";
26
-
27
-
28
- my @outSym = ();
29
- for my $i (1 .. $outN){
30
- $outSym[$i-1] = gensym;
31
- open $outSym[$i-1], ">", "$base.$i.fa" or
32
- die "I can not create the file: $base.$i.fa: $!\n";
33
- }
34
-
35
-
36
- my($i, $seq) = (-1, '');
37
- open FILE, "<", $file or die "I can not read the file: $file: $!\n";
38
- while(my $ln=<FILE>){
39
- next if $ln=~/^;/;
40
- if($ln =~ m/^>/){
41
- print { $outSym[$i % $outN] } $seq if $seq;
42
- $i++;
43
- $seq = '';
44
- }
45
- $seq.=$ln;
46
- }
47
- print { $outSym[$i % $outN] } $seq if $seq;
48
- close FILE;
49
-
50
- for(my $j=0; $j<$outN; $j++){
51
- close $outSym[$j];
52
- }
53
-
54
- print STDERR "Sequences: ".($i+1)."\nFiles: $outN\n";
55
-