miga-base 0.3.6.0 → 0.3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 8b285b9906876a9f1b5366f929a4776d1689dbc1
4
- data.tar.gz: 83b6843d00417fef4a8de18e4a102ad4d1899f0d
2
+ SHA256:
3
+ metadata.gz: c629b49cedd42f76fd8c466ecbc561e915dcaeef9dbbc2140f66300ac21c4e86
4
+ data.tar.gz: 2174cd7e010340ea865b7ec251a9d8b2823a059bbcec782924052a5da0c0a247
5
5
  SHA512:
6
- metadata.gz: acfa6eb243f7fa8985cb649ab3b701db68515e8a18c221d94cb149e51cddeec49642b6176da46956765cca8f1961aa88d71b5cb9625131cf64f9287e79e173c6
7
- data.tar.gz: 4ac1f2f81854959679b53d4865efba3a36ddca14216ff9f2aef06e1c27b7b415b47c833387e73ddf3cae41369f382146f1041b9a833743d58d3aad0a954bd1ab
6
+ metadata.gz: f20e4c7312402beec67de7a458356f76bd932edbeadffdee83061d040c8eaaddf31ada6304873638237ca299b806054e7de9656ecfebaeaa7e8e5ddb83710a93
7
+ data.tar.gz: 5dcae9006b7b84d75ce05019f9ac3f6defe4305c52f574a7c3426a3f8ee098ee4cd0ecf0968d5669066886fbc77185cd477ea265df5509eaf66733e2d4dfb421
data/actions/stats.rb CHANGED
@@ -127,8 +127,6 @@ if o[:compute]
127
127
  end
128
128
  d.save
129
129
  end
130
- when :distances
131
- d.cleanup_distances! unless d.nil?
132
130
  else
133
131
  stats = nil
134
132
  end
data/lib/miga/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
- require "date"
2
+ require 'date'
3
3
 
4
4
  ##
5
5
  # High-level minimal requirements for the MiGA::MiGA class.
@@ -10,11 +10,11 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 6, 0]
13
+ VERSION = [0.3, 6, 1]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
17
- VERSION_NAME = "tinge"
17
+ VERSION_NAME = 'tinge'
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
@@ -11,6 +11,9 @@ cd "$PROJECT/data/10.clades/01.find"
11
11
  # Initialize
12
12
  miga date > "miga-project.start"
13
13
 
14
+ # Cleanup databases
15
+ ruby -I "$MIGA/lib" "$MIGA/utils/cleanup-databases.rb" "$PROJECT" "$CORES"
16
+
14
17
  # Run
15
18
  ruby -I "$MIGA/lib" "$MIGA/utils/subclades.rb" "$PROJECT" "$SCRIPT"
16
19
 
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'thread'
4
+ require 'miga'
5
+
6
+ ARGV[1] or abort "Usage: #{$0} path/to/project threads"
7
+
8
+ $stderr.puts "Cleaning databases..."
9
+ ds_list = MiGA::Project.load(ARGV[0]).datasets.
10
+ select(&:is_ref?).select(&:is_active?)
11
+
12
+ thr = ARGV[1].to_i
13
+
14
+ (0 .. thr-1).each do |t|
15
+ fork do
16
+ k = -1
17
+ ds_list.each do |i|
18
+ k = (k+1) % thr
19
+ next unless k == t
20
+ i.cleanup_distances!
21
+ end
22
+ end
23
+ end
24
+ Process.waitall
25
+
@@ -0,0 +1 @@
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
@@ -0,0 +1 @@
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
@@ -0,0 +1 @@
1
+ utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
@@ -0,0 +1 @@
1
+ utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
@@ -0,0 +1 @@
1
+ utils/enveomics/Scripts/lib/../../enveomics.R
@@ -26,16 +26,18 @@ module MiGA::SubcladeRunner::Pipeline
26
26
  `ogs.mcl.rb -o '#{ogs_file}.tmp' --abc '#{abc_path}' -t '#{opts[:thr]}'`
27
27
  File.open(ogs_file, 'w') do |fh|
28
28
  File.foreach("#{ogs_file}.tmp").with_index do |ln, lno|
29
- fh.puts ln if lno != 0
29
+ fh.puts ln if lno > 0
30
30
  end
31
31
  end
32
32
  File.unlink "#{ogs_file}.tmp"
33
33
  end
34
34
 
35
35
  # Find species medoids
36
- src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
37
- `Rscript '#{src}' miga-project.dist.rdata \
38
- miga-project.ani95-medoids miga-project.ani95-clades`
36
+ if File.size? 'miga-project.dist.rdata'
37
+ src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
38
+ `Rscript '#{src}' miga-project.dist.rdata \
39
+ miga-project.ani95-medoids miga-project.ani95-clades`
40
+ end
39
41
 
40
42
  # Propose clades
41
43
  ofh = File.open('miga-project.proposed-clades', 'w')
data/utils/subclades.R CHANGED
@@ -22,11 +22,11 @@ subclades <- function(ani_file, out_base, thr = 1, ani.d = dist(0), sel = NA) {
22
22
  if(length(ani.d) == 0 && !file.exists(dist_rdata)){
23
23
  # Read from ani_file
24
24
  a <- read.table(gzfile(ani_file), sep = '\t', header = TRUE, as.is = TRUE)
25
- if(nrow(a)==0){
25
+ if(nrow(a) == 0){
26
26
  generate_empty_files(out_base)
27
27
  return(NULL)
28
28
  }
29
- if(!is.na(sel) and file.exists(sel)){
29
+ if(!is.na(sel) && file.exists(sel)){
30
30
  say('Filter selection')
31
31
  lab <- read.table(sel, sep='\t', head=FALSE, as.is=TRUE)[,1]
32
32
  a <- a[a$a %in% lab & a$b %in% lab, ]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6.0
4
+ version: 0.3.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
@@ -188,6 +188,7 @@ files:
188
188
  - test/test_helper.rb
189
189
  - utils/adapters.fa
190
190
  - utils/arch-ess-genes.rb
191
+ - utils/cleanup-databases.rb
191
192
  - utils/core-pan-plot.R
192
193
  - utils/distance/base.rb
193
194
  - utils/distance/commands.rb
@@ -341,6 +342,7 @@ files:
341
342
  - utils/enveomics/Scripts/gi2tax.rb
342
343
  - utils/enveomics/Scripts/in_silico_GA_GI.pl
343
344
  - utils/enveomics/Scripts/lib/data/essential.hmm.gz
345
+ - utils/enveomics/Scripts/lib/enveomics.R
344
346
  - utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb
345
347
  - utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb
346
348
  - utils/enveomics/Scripts/lib/enveomics_rb/og.rb
@@ -495,7 +497,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
495
497
  version: '0'
496
498
  requirements: []
497
499
  rubyforge_project:
498
- rubygems_version: 2.2.2
500
+ rubygems_version: 2.7.7
499
501
  signing_key:
500
502
  specification_version: 4
501
503
  summary: MiGA
@@ -1,56 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update: Oct 07 2015
5
- # @license: artistic license 2.0
6
- #
7
- use strict;
8
- use warnings;
9
- use List::Util qw/sum min max/;
10
-
11
- my ($seqs, $minlen, $n__) = @ARGV;
12
- $seqs or die "
13
- Description:
14
- Calculates the N50 value of a set of sequences. Alternatively, it
15
- can calculate other N** values. It also calculates the total number
16
- of sequences and the total added length.
17
-
18
- Usage:
19
- $0 seqs.fa[ minlen[ **]]
20
-
21
- seqs.fa A FastA file containing the sequences.
22
- minlen (optional) The minimum length to take into consideration.
23
- By default: 0.
24
- ** Value N** to calculate. By default: 50 (N50).
25
- ";
26
- $minlen ||= 0;
27
- $n__ ||= 50;
28
-
29
- my @len = ();
30
- open SEQ, "<", $seqs or die "Cannot open file: $seqs: $!\n";
31
- while(<SEQ>){
32
- if(/^>/){
33
- push @len, 0;
34
- }else{
35
- next if /^;/;
36
- chomp;
37
- s/\W//g;
38
- $len[-1]+=length $_;
39
- }
40
- }
41
- close SEQ;
42
- @len = sort { $a <=> $b } map { $_>=$minlen?$_:() } @len;
43
- my $tot = (sum(@len) || 0);
44
-
45
- my $thr = $n__*$tot/100;
46
- my $pos = 0;
47
- for(@len){
48
- $pos+= $_;
49
- if($pos>=$thr){
50
- print "N$n__: $_\n";
51
- last;
52
- }
53
- }
54
- print "Sequences: ".scalar(@len)."\n";
55
- print "Total length: $tot\n";
56
-
@@ -1,60 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Oct-07-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
-
11
- my($file, $content, $stretch) = @ARGV;
12
- $file or die <<HELP
13
-
14
- Description:
15
- Filter sequences by N-content and presence of long homopolymers.
16
- Usage:
17
- $0 sequences.fa [content [stretch]] > filtered.fa
18
- Where:
19
- sequences.fa Input file in FastA format
20
- content A number between 0 and 1 indicating the maximum proportion of Ns
21
- (1 to turn off, 0.5 by default)
22
- stretch A number indicating the maximum number of consecutive identical
23
- nucleotides allowed (0 to turn off, 100 by default)
24
- filtered.fa Filtered set of sequences.
25
-
26
- HELP
27
- ;
28
- ($content ||= 0.5)+=0;
29
- ($stretch ||= 100)+=0;
30
-
31
- my $good = 0;
32
- my $N = 0;
33
-
34
- FASTA: {
35
- local $/ = "\n>";
36
- open FILE, "<", $file or die "I can not open the file: $file: $!\n";
37
- SEQ: while(<FILE>){
38
- $N++;
39
- s/^;.*//gm;
40
- s/>//g;
41
- my($n,$s) = split /\n/, $_, 2;
42
- (my $clean = $s) =~ s/[^ACTGN]//g;
43
- if($content < 1){
44
- (my $Ns = $clean) =~ s/[^N]//g;
45
- next SEQ if length($Ns)>length($clean)*$content;
46
- }
47
- if($stretch > 0){
48
- for my $nuc (qw(A C T G N)){
49
- next SEQ if $clean =~ m/[$nuc]{$stretch}/;
50
- }
51
- }
52
- print ">$n\n$s\n";
53
- $good++;
54
- }
55
- close FILE;
56
- print STDERR "Total sequences: $N\nAfter filtering: $good\n";
57
- }
58
-
59
-
60
-
@@ -1,38 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M Rodriguez-R
4
- # @update Oct-07-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
-
11
- $#ARGV>=0 or die "
12
- Usage:
13
- $0 seqs.fa... > length.txt
14
-
15
- seqs.fa One or more FastA files.
16
- length.txt A table with the lengths of the sequences.
17
-
18
- ";
19
-
20
- for my $fa (@ARGV){
21
- open FA, "<", $fa or die "Cannot open file: $fa: $!\n";
22
- my $def = '';
23
- my $len = 0;
24
- while(<FA>){
25
- next if /^;/;
26
- if(m/^>(\S+)\s?/){
27
- print "$def\t$len\n" if $def;
28
- $def = $1;
29
- $len = 0;
30
- }else{
31
- s/[^A-Za-z]//g;
32
- $len+= length $_;
33
- }
34
- }
35
- print "$def\t$len\n" if $def;
36
- close FA;
37
- }
38
-
@@ -1,55 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Oct-13-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Symbol;
11
-
12
- my ($file, $base, $outN) = @ARGV;
13
-
14
- $outN ||= 12;
15
- ($file and $base) or die "
16
- Usage
17
- $0 in_file.fa out_base[ no_files]
18
-
19
- in_file.fa Input file in FastA format.
20
- out_base Prefix for the name of the output files. It will
21
- be appended with .<i>.fa, where <i> is a consecutive
22
- number starting in 1.
23
- no_files Number of files to generate. By default: 12.
24
-
25
- ";
26
-
27
-
28
- my @outSym = ();
29
- for my $i (1 .. $outN){
30
- $outSym[$i-1] = gensym;
31
- open $outSym[$i-1], ">", "$base.$i.fa" or
32
- die "I can not create the file: $base.$i.fa: $!\n";
33
- }
34
-
35
-
36
- my($i, $seq) = (-1, '');
37
- open FILE, "<", $file or die "I can not read the file: $file: $!\n";
38
- while(my $ln=<FILE>){
39
- next if $ln=~/^;/;
40
- if($ln =~ m/^>/){
41
- print { $outSym[$i % $outN] } $seq if $seq;
42
- $i++;
43
- $seq = '';
44
- }
45
- $seq.=$ln;
46
- }
47
- print { $outSym[$i % $outN] } $seq if $seq;
48
- close FILE;
49
-
50
- for(my $j=0; $j<$outN; $j++){
51
- close $outSym[$j];
52
- }
53
-
54
- print STDERR "Sequences: ".($i+1)."\nFiles: $outN\n";
55
-