RubyGems - miga-base - Versions diffs - 0.3.6.0 → 0.3.6.1 - Mend

miga-base 0.3.6.0 → 0.3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

checksums.yaml +5 -5
data/actions/stats.rb +0 -2
data/lib/miga/version.rb +3 -3
data/scripts/clade_finding.bash +3 -0
data/utils/cleanup-databases.rb +25 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
data/utils/subclade/pipeline.rb +6 -4
data/utils/subclades.R +2 -2
metadata +4 -2
data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -56
data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -60
data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -38
data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -55

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: 8b285b9906876a9f1b5366f929a4776d1689dbc1
-  data.tar.gz: 83b6843d00417fef4a8de18e4a102ad4d1899f0d
+SHA256:
+  metadata.gz: c629b49cedd42f76fd8c466ecbc561e915dcaeef9dbbc2140f66300ac21c4e86
+  data.tar.gz: 2174cd7e010340ea865b7ec251a9d8b2823a059bbcec782924052a5da0c0a247
 SHA512:
-  metadata.gz: acfa6eb243f7fa8985cb649ab3b701db68515e8a18c221d94cb149e51cddeec49642b6176da46956765cca8f1961aa88d71b5cb9625131cf64f9287e79e173c6
-  data.tar.gz: 4ac1f2f81854959679b53d4865efba3a36ddca14216ff9f2aef06e1c27b7b415b47c833387e73ddf3cae41369f382146f1041b9a833743d58d3aad0a954bd1ab
+  metadata.gz: f20e4c7312402beec67de7a458356f76bd932edbeadffdee83061d040c8eaaddf31ada6304873638237ca299b806054e7de9656ecfebaeaa7e8e5ddb83710a93
+  data.tar.gz: 5dcae9006b7b84d75ce05019f9ac3f6defe4305c52f574a7c3426a3f8ee098ee4cd0ecf0968d5669066886fbc77185cd477ea265df5509eaf66733e2d4dfb421

data/actions/stats.rb CHANGED Viewed

@@ -127,8 +127,6 @@ if o[:compute]
       end
       d.save
     end
-  when :distances
-    d.cleanup_distances! unless d.nil?
   else
     stats = nil
   end

data/lib/miga/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
-require "date"
+require 'date'
 ##
 # High-level minimal requirements for the MiGA::MiGA class.
@@ -10,11 +10,11 @@ module MiGA
   # - Float representing the major.minor version.
   # - Integer representing gem releases of the current version.
   # - Integer representing minor changes that require new version number.
-  VERSION = [0.3, 6, 0]
+  VERSION = [0.3, 6, 1]
   ##
   # Nickname for the current major.minor version.
-  VERSION_NAME = "tinge"
+  VERSION_NAME = 'tinge'
   ##
   # Date of the current gem release.

data/scripts/clade_finding.bash CHANGED Viewed

@@ -11,6 +11,9 @@ cd "$PROJECT/data/10.clades/01.find"
 # Initialize
 miga date > "miga-project.start"
+# Cleanup databases
+ruby -I "$MIGA/lib" "$MIGA/utils/cleanup-databases.rb" "$PROJECT" "$CORES"
 # Run
 ruby -I "$MIGA/lib" "$MIGA/utils/subclades.rb" "$PROJECT" "$SCRIPT"

data/utils/cleanup-databases.rb ADDED Viewed

@@ -0,0 +1,25 @@
+#!/usr/bin/env ruby
+require 'thread'
+require 'miga'
+ARGV[1] or abort "Usage: #{$0} path/to/project threads"
+$stderr.puts "Cleaning databases..."
+ds_list = MiGA::Project.load(ARGV[0]).datasets.
+  select(&:is_ref?).select(&:is_active?)
+thr = ARGV[1].to_i
+(0 .. thr-1).each do |t|
+  fork do
+    k = -1
+    ds_list.each do |i|
+      k = (k+1) % thr
+      next unless k == t
+      i.cleanup_distances!
+    end
+  end
+end
+Process.waitall

data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl ADDED Viewed

	@@ -0,0 +1 @@
1	+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl

data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl ADDED Viewed

	@@ -0,0 +1 @@
1	+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl

data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl ADDED Viewed

	@@ -0,0 +1 @@
1	+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl

data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl ADDED Viewed

	@@ -0,0 +1 @@
1	+ utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl

data/utils/enveomics/Scripts/lib/enveomics.R ADDED Viewed

	@@ -0,0 +1 @@
1	+ utils/enveomics/Scripts/lib/../../enveomics.R

data/utils/subclade/pipeline.rb CHANGED Viewed

@@ -26,16 +26,18 @@ module MiGA::SubcladeRunner::Pipeline
       `ogs.mcl.rb -o '#{ogs_file}.tmp' --abc '#{abc_path}' -t '#{opts[:thr]}'`
       File.open(ogs_file, 'w') do |fh|
         File.foreach("#{ogs_file}.tmp").with_index do |ln, lno|
-          fh.puts ln if lno != 0
+          fh.puts ln if lno > 0
         end
       end
       File.unlink "#{ogs_file}.tmp"
     end
     # Find species medoids
-    src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
-    `Rscript '#{src}' miga-project.dist.rdata \
-      miga-project.ani95-medoids miga-project.ani95-clades`
+    if File.size? 'miga-project.dist.rdata'
+      src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
+      `Rscript '#{src}' miga-project.dist.rdata \
+        miga-project.ani95-medoids miga-project.ani95-clades`
+    end
     # Propose clades
     ofh = File.open('miga-project.proposed-clades', 'w')

data/utils/subclades.R CHANGED Viewed

@@ -22,11 +22,11 @@ subclades <- function(ani_file, out_base, thr = 1, ani.d = dist(0), sel = NA) {
     if(length(ani.d) == 0 && !file.exists(dist_rdata)){
       # Read from ani_file
       a <- read.table(gzfile(ani_file), sep = '\t', header = TRUE, as.is = TRUE)
-      if(nrow(a)==0){
+      if(nrow(a) == 0){
         generate_empty_files(out_base)
         return(NULL)
       }
-      if(!is.na(sel) and file.exists(sel)){
+      if(!is.na(sel) && file.exists(sel)){
         say('Filter selection')
         lab <- read.table(sel, sep='\t', head=FALSE, as.is=TRUE)[,1]
         a <- a[a$a %in% lab & a$b %in% lab, ]

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: miga-base
 version: !ruby/object:Gem::Version
-  version: 0.3.6.0
+  version: 0.3.6.1
 platform: ruby
 authors:
 - Luis M. Rodriguez-R
@@ -188,6 +188,7 @@ files:
 - test/test_helper.rb
 - utils/adapters.fa
 - utils/arch-ess-genes.rb
+- utils/cleanup-databases.rb
 - utils/core-pan-plot.R
 - utils/distance/base.rb
 - utils/distance/commands.rb
@@ -341,6 +342,7 @@ files:
 - utils/enveomics/Scripts/gi2tax.rb
 - utils/enveomics/Scripts/in_silico_GA_GI.pl
 - utils/enveomics/Scripts/lib/data/essential.hmm.gz
+- utils/enveomics/Scripts/lib/enveomics.R
 - utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb
 - utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb
 - utils/enveomics/Scripts/lib/enveomics_rb/og.rb
@@ -495,7 +497,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.2.2
+rubygems_version: 2.7.7
 signing_key:
 specification_version: 4
 summary: MiGA

data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl DELETED Viewed

@@ -1,56 +0,0 @@
-#!/usr/bin/env perl
-#
-# @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @update: Oct 07 2015
-# @license: artistic license 2.0
-#
-use strict;
-use warnings;
-use List::Util qw/sum min max/;
-my ($seqs, $minlen, $n__) = @ARGV;
-$seqs or die "
-Description:
-   Calculates the N50 value of a set of sequences.  Alternatively, it
-   can calculate other N** values.  It also calculates the total number
-   of sequences and the total added length.
-Usage:
-   $0 seqs.fa[ minlen[ **]]
-   seqs.fa	A FastA file containing the sequences.
-   minlen	(optional) The minimum length to take into consideration.
-   		By default: 0.
-   **		Value N** to calculate.  By default: 50 (N50).
-";
-$minlen ||= 0;
-$n__    ||= 50;
-my @len = ();
-open SEQ, "<", $seqs or die "Cannot open file: $seqs: $!\n";
-while(<SEQ>){
-   if(/^>/){
-      push @len, 0;
-   }else{
-      next if /^;/;
-      chomp;
-      s/\W//g;
-      $len[-1]+=length $_;
-   }
-}
-close SEQ;
-@len = sort { $a <=> $b } map { $_>=$minlen?$_:() } @len;
-my $tot = (sum(@len) || 0);
-my $thr = $n__*$tot/100;
-my $pos = 0;
-for(@len){
-   $pos+= $_;
-   if($pos>=$thr){
-      print "N$n__: $_\n";
-      last;
-   }
-}
-print "Sequences: ".scalar(@len)."\n";
-print "Total length: $tot\n";

data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl DELETED Viewed

@@ -1,60 +0,0 @@
-#!/usr/bin/env perl
-#
-# @author  Luis M. Rodriguez-R
-# @update  Oct-07-2015
-# @license artistic license 2.0
-#
-use warnings;
-use strict;
-my($file, $content, $stretch) = @ARGV;
-$file or die <<HELP
-Description:
-   Filter sequences by N-content and presence of long homopolymers.
-Usage:
-   $0 sequences.fa [content [stretch]] > filtered.fa
-Where:
-   sequences.fa	Input file in FastA format
-   content	A number between 0 and 1 indicating the maximum proportion of Ns
-   		(1 to turn off, 0.5 by default)
-   stretch	A number indicating the maximum number of consecutive identical
-   		nucleotides allowed (0 to turn off, 100 by default)
-   filtered.fa	Filtered set of sequences.
-HELP
-;
-($content ||= 0.5)+=0;
-($stretch ||= 100)+=0;
-my $good = 0;
-my $N = 0;
-FASTA: {
-   local $/ = "\n>";
-   open FILE, "<", $file or die "I can not open the file: $file: $!\n";
-   SEQ: while(<FILE>){
-      $N++;
-      s/^;.*//gm;
-      s/>//g;
-      my($n,$s) = split /\n/, $_, 2;
-      (my $clean = $s) =~ s/[^ACTGN]//g;
-      if($content < 1){
-         (my $Ns = $clean) =~ s/[^N]//g;
-	 next SEQ if length($Ns)>length($clean)*$content;
-      }
-      if($stretch > 0){
-         for my $nuc (qw(A C T G N)){
-	    next SEQ if $clean =~ m/[$nuc]{$stretch}/;
-	 }
-      }
-      print ">$n\n$s\n";
-      $good++;
-   }
-   close FILE;
-   print STDERR "Total sequences: $N\nAfter filtering: $good\n";
-}

data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl DELETED Viewed

@@ -1,38 +0,0 @@
-#!/usr/bin/env perl
-#
-# @author  Luis M Rodriguez-R
-# @update  Oct-07-2015
-# @license artistic license 2.0
-#
-use warnings;
-use strict;
-$#ARGV>=0 or die "
-Usage:
-   $0 seqs.fa... > length.txt
-   seqs.fa	One or more FastA files.
-   length.txt	A table with the lengths of the sequences.
-";
-for my $fa (@ARGV){
-   open FA, "<", $fa or die "Cannot open file: $fa: $!\n";
-   my $def = '';
-   my $len = 0;
-   while(<FA>){
-      next if /^;/;
-      if(m/^>(\S+)\s?/){
-         print "$def\t$len\n" if $def;
-	 $def = $1;
-	 $len = 0;
-      }else{
-         s/[^A-Za-z]//g;
-	 $len+= length $_;
-      }
-   }
-   print "$def\t$len\n" if $def;
-   close FA;
-}

data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl DELETED Viewed

@@ -1,55 +0,0 @@
-#!/usr/bin/env perl
-#
-# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @update Oct-13-2015
-# @license artistic license 2.0
-#
-use warnings;
-use strict;
-use Symbol;
-my ($file, $base, $outN) = @ARGV;
-$outN ||= 12;
-($file and $base) or die "
-Usage
-   $0 in_file.fa out_base[ no_files]
-   in_file.fa	Input file in FastA format.
-   out_base	Prefix for the name of the output files.  It will
-   		be appended with .<i>.fa, where <i> is a consecutive
-		number starting in 1.
-   no_files	Number of files to generate.  By default: 12.
-";
-my @outSym = ();
-for my $i (1 .. $outN){
-   $outSym[$i-1] = gensym;
-   open $outSym[$i-1], ">", "$base.$i.fa" or
-      die "I can not create the file: $base.$i.fa: $!\n";
-}
-my($i, $seq) = (-1, '');
-open FILE, "<", $file or die "I can not read the file: $file: $!\n";
-while(my $ln=<FILE>){
-   next if $ln=~/^;/;
-   if($ln =~ m/^>/){
-      print { $outSym[$i % $outN] } $seq if $seq;
-      $i++;
-      $seq = '';
-   }
-   $seq.=$ln;
-}
-print { $outSym[$i % $outN] } $seq if $seq;
-close FILE;
-for(my $j=0; $j<$outN; $j++){
-   close $outSym[$j];
-}
-print STDERR "Sequences: ".($i+1)."\nFiles: $outN\n";