miga-base 1.3.13.10 → 1.3.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/miga/cli/action/browse.rb +1 -1
 - data/lib/miga/cli/action/find.rb +1 -1
 - data/lib/miga/project/result.rb +6 -1
 - data/lib/miga/version.rb +2 -2
 - data/scripts/aai_distances.bash +10 -9
 - data/scripts/ani_distances.bash +10 -9
 - data/utils/find-medoid.R +12 -7
 - data/utils/subclade/pipeline.rb +3 -2
 - data/utils/subclades.R +18 -11
 - metadata +2 -2
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 5ca873c0a599eec97f55a5226b4b0ffc524bf3ffb577ac36b951df0dc36f4ef2
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 11bac7b7c287ff99e019f4c29c5af1949e0039a3d706aa26e2e60cf3845d170d
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: b0d13451f7d4a15e64755ef6c09cde833112f1299e0ee924470bc51c0f02dea6040947d40f837ab0e3d91496423cfe8e5f2f219973afabb4cccb1979f9b3e211
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: a4f98424ab210e01bb549755ded82e0343d4080c1e15daa1545d25051bca40402d44801f14ea8eb9f978470c5892d7b96016b9a226bedaea8917012a4b854169
         
     | 
| 
         @@ -179,7 +179,7 @@ class MiGA::Cli::Action::Browse < MiGA::Cli::Action 
     | 
|
| 
       179 
179 
     | 
    
         
             
                str
         
     | 
| 
       180 
180 
     | 
    
         
             
                  .to_s.unmiga_name
         
     | 
| 
       181 
181 
     | 
    
         
             
                  .sub(/^./, &:upcase)
         
     | 
| 
       182 
     | 
    
         
            -
                  .gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|ani95|aai90| db$| ssu )/, &:upcase)
         
     | 
| 
      
 182 
     | 
    
         
            +
                  .gsub(/(Aai|Ani|Ogs|Cds|Ssu|Rds|Rda|ani95|aai90| db$| ssu )/, &:upcase)
         
     | 
| 
       183 
183 
     | 
    
         
             
                  .sub(/Haai/, 'hAAI')
         
     | 
| 
       184 
184 
     | 
    
         
             
                  .sub(/Mytaxa/, 'MyTaxa')
         
     | 
| 
       185 
185 
     | 
    
         
             
                  .sub(/ pvalue$/, ' p-value')
         
     | 
    
        data/lib/miga/cli/action/find.rb
    CHANGED
    
    
    
        data/lib/miga/project/result.rb
    CHANGED
    
    | 
         @@ -55,10 +55,13 @@ module MiGA::Project::Result 
     | 
|
| 
       55 
55 
     | 
    
         
             
              ##
         
     | 
| 
       56 
56 
     | 
    
         
             
              # Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
         
     | 
| 
       57 
57 
     | 
    
         
             
              def add_result_distances(base, _opts)
         
     | 
| 
       58 
     | 
    
         
            -
                return nil unless result_files_exist?(base,  
     | 
| 
      
 58 
     | 
    
         
            +
                return nil unless result_files_exist?(base, ['.txt']) &&
         
     | 
| 
      
 59 
     | 
    
         
            +
                  (result_files_exist?(base, ['.rds']) ||
         
     | 
| 
      
 60 
     | 
    
         
            +
                   result_files_exist?(base, ['.rda']))
         
     | 
| 
       59 
61 
     | 
    
         | 
| 
       60 
62 
     | 
    
         
             
                r = MiGA::Result.new("#{base}.json")
         
     | 
| 
       61 
63 
     | 
    
         
             
                r.add_file(:rds,    'miga-project.rds')
         
     | 
| 
      
 64 
     | 
    
         
            +
                r.add_file(:rda,    'miga-project.rda')
         
     | 
| 
       62 
65 
     | 
    
         
             
                r.add_file(:rdata,  'miga-project.Rdata') # Legacy file
         
     | 
| 
       63 
66 
     | 
    
         
             
                r.add_file(:matrix, 'miga-project.txt')
         
     | 
| 
       64 
67 
     | 
    
         
             
                r.add_file(:log,    'miga-project.log') # Legacy file
         
     | 
| 
         @@ -84,6 +87,7 @@ module MiGA::Project::Result 
     | 
|
| 
       84 
87 
     | 
    
         | 
| 
       85 
88 
     | 
    
         
             
                r = add_result_iter_clades(base)
         
     | 
| 
       86 
89 
     | 
    
         
             
                r.add_file(:aai_dist_rds, 'miga-project.dist.rds')
         
     | 
| 
      
 90 
     | 
    
         
            +
                r.add_file(:aai_dist_rda, 'miga-project.dist.rda')
         
     | 
| 
       87 
91 
     | 
    
         
             
                r.add_file(:aai_tree,     'miga-project.aai.nwk')
         
     | 
| 
       88 
92 
     | 
    
         
             
                r.add_file(:proposal,     'miga-project.proposed-clades')
         
     | 
| 
       89 
93 
     | 
    
         
             
                r.add_file(:clades_aai90, 'miga-project.aai90-clades')
         
     | 
| 
         @@ -108,6 +112,7 @@ module MiGA::Project::Result 
     | 
|
| 
       108 
112 
     | 
    
         
             
                r = add_result_iter_clades(base)
         
     | 
| 
       109 
113 
     | 
    
         
             
                r.add_file(:ani_tree, 'miga-project.ani.nwk')
         
     | 
| 
       110 
114 
     | 
    
         
             
                r.add_file(:ani_dist_rds, 'miga-project.dist.rds')
         
     | 
| 
      
 115 
     | 
    
         
            +
                r.add_file(:ani_dist_rda, 'miga-project.dist.rda')
         
     | 
| 
       111 
116 
     | 
    
         
             
                r
         
     | 
| 
       112 
117 
     | 
    
         
             
              end
         
     | 
| 
       113 
118 
     | 
    
         | 
    
        data/lib/miga/version.rb
    CHANGED
    
    | 
         @@ -12,7 +12,7 @@ module MiGA 
     | 
|
| 
       12 
12 
     | 
    
         
             
              # - String indicating release status:
         
     | 
| 
       13 
13 
     | 
    
         
             
              #   - rc* release candidate, not released as gem
         
     | 
| 
       14 
14 
     | 
    
         
             
              #   - [0-9]+ stable release, released as gem
         
     | 
| 
       15 
     | 
    
         
            -
              VERSION = [1.3,  
     | 
| 
      
 15 
     | 
    
         
            +
              VERSION = [1.3, 14, 2].freeze
         
     | 
| 
       16 
16 
     | 
    
         | 
| 
       17 
17 
     | 
    
         
             
              ##
         
     | 
| 
       18 
18 
     | 
    
         
             
              # Nickname for the current major.minor version.
         
     | 
| 
         @@ -20,7 +20,7 @@ module MiGA 
     | 
|
| 
       20 
20 
     | 
    
         | 
| 
       21 
21 
     | 
    
         
             
              ##
         
     | 
| 
       22 
22 
     | 
    
         
             
              # Date of the current gem relese.
         
     | 
| 
       23 
     | 
    
         
            -
              VERSION_DATE = Date.new(2024, 4,  
     | 
| 
      
 23 
     | 
    
         
            +
              VERSION_DATE = Date.new(2024, 4, 12)
         
     | 
| 
       24 
24 
     | 
    
         | 
| 
       25 
25 
     | 
    
         
             
              ##
         
     | 
| 
       26 
26 
     | 
    
         
             
              # References of MiGA
         
     | 
    
        data/scripts/aai_distances.bash
    CHANGED
    
    | 
         @@ -40,15 +40,16 @@ rm "miga-project.txt.lno" 
     | 
|
| 
       40 
40 
     | 
    
         
             
            # R-ify
         
     | 
| 
       41 
41 
     | 
    
         
             
            cat <<R | R --vanilla
         
     | 
| 
       42 
42 
     | 
    
         
             
            file <- gzfile("miga-project.txt.gz")
         
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
            )
         
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
      
 43 
     | 
    
         
            +
            text <- readLines(file, n = $LNO + 1, ok = FALSE)
         
     | 
| 
      
 44 
     | 
    
         
            +
            list <- strsplit(text[-1], "\t", fixed = TRUE)
         
     | 
| 
      
 45 
     | 
    
         
            +
            a <- sapply(list, function(x) x[1])
         
     | 
| 
      
 46 
     | 
    
         
            +
            b <- sapply(list, function(x) x[2])
         
     | 
| 
      
 47 
     | 
    
         
            +
            d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
         
     | 
| 
      
 48 
     | 
    
         
            +
            save(a, b, d, file = "miga-project.rda")
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
            non_self <- a != b
         
     | 
| 
      
 51 
     | 
    
         
            +
            if(sum(non_self) > 0) {
         
     | 
| 
      
 52 
     | 
    
         
            +
              h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
         
     | 
| 
       52 
53 
     | 
    
         
             
              len <- length(h[["breaks"]])
         
     | 
| 
       53 
54 
     | 
    
         
             
              write.table(
         
     | 
| 
       54 
55 
     | 
    
         
             
                cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
         
     | 
    
        data/scripts/ani_distances.bash
    CHANGED
    
    | 
         @@ -34,15 +34,16 @@ rm "miga-project.txt.lno" 
     | 
|
| 
       34 
34 
     | 
    
         
             
            # R-ify
         
     | 
| 
       35 
35 
     | 
    
         
             
            cat <<R | R --vanilla
         
     | 
| 
       36 
36 
     | 
    
         
             
            file <- gzfile("miga-project.txt.gz")
         
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
            )
         
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
      
 37 
     | 
    
         
            +
            text <- readLines(file, n = $LNO + 1, ok = FALSE)
         
     | 
| 
      
 38 
     | 
    
         
            +
            list <- strsplit(text[-1], "\t", fixed = TRUE)
         
     | 
| 
      
 39 
     | 
    
         
            +
            a <- sapply(list, function(x) x[1])
         
     | 
| 
      
 40 
     | 
    
         
            +
            b <- sapply(list, function(x) x[2])
         
     | 
| 
      
 41 
     | 
    
         
            +
            d <- sapply(list, function(x) 1 - (as.numeric(x[3]) / 100))
         
     | 
| 
      
 42 
     | 
    
         
            +
            save(a, b, d, file = "miga-project.rda")
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
            non_self <- a != b
         
     | 
| 
      
 45 
     | 
    
         
            +
            if(sum(non_self) > 0) {
         
     | 
| 
      
 46 
     | 
    
         
            +
              h <- hist((1 - d[non_self]) * 100, breaks = 100, plot = FALSE)
         
     | 
| 
       46 
47 
     | 
    
         
             
              len <- length(h[["breaks"]])
         
     | 
| 
       47 
48 
     | 
    
         
             
              write.table(
         
     | 
| 
       48 
49 
     | 
    
         
             
                cbind(h[["breaks"]][-len], h[["breaks"]][-1], h[["counts"]]),
         
     | 
    
        data/utils/find-medoid.R
    CHANGED
    
    | 
         @@ -16,15 +16,14 @@ if(Sys.getenv("MIGA") == ""){ 
     | 
|
| 
       16 
16 
     | 
    
         
             
              ))
         
     | 
| 
       17 
17 
     | 
    
         
             
            }
         
     | 
| 
       18 
18 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
            find_medoids <- function ( 
     | 
| 
       20 
     | 
    
         
            -
              if( 
     | 
| 
       21 
     | 
    
         
            -
               
     | 
| 
       22 
     | 
    
         
            -
              dist <- enve.df2dist(ani.df, "a", "b", "d", default.d = max(ani.df$d) * 1.2)
         
     | 
| 
      
 19 
     | 
    
         
            +
            find_medoids <- function (a, b, d, out, clades) {
         
     | 
| 
      
 20 
     | 
    
         
            +
              if (length(d) == 0) return(NULL)
         
     | 
| 
      
 21 
     | 
    
         
            +
              dist <- enve.df2dist(cbind(a, b, d), "a", "b", "d", default.d = max(d) * 1.2)
         
     | 
| 
       23 
22 
     | 
    
         
             
              dist <- as.matrix(dist)
         
     | 
| 
       24 
23 
     | 
    
         
             
              cl <- read.table(clades, header = FALSE, sep = "\t", as.is = TRUE)[,1]
         
     | 
| 
       25 
24 
     | 
    
         
             
              cl.s <- c()
         
     | 
| 
       26 
25 
     | 
    
         
             
              medoids <- c()
         
     | 
| 
       27 
     | 
    
         
            -
              for(i in cl){
         
     | 
| 
      
 26 
     | 
    
         
            +
              for (i in cl) {
         
     | 
| 
       28 
27 
     | 
    
         
             
                lab <- strsplit(i, ",")[[1]]
         
     | 
| 
       29 
28 
     | 
    
         
             
                if(length(lab) == 1) {
         
     | 
| 
       30 
29 
     | 
    
         
             
                  lab.s <- lab
         
     | 
| 
         @@ -44,6 +43,12 @@ find_medoids <- function (ani.df, out, clades) { 
     | 
|
| 
       44 
43 
     | 
    
         | 
| 
       45 
44 
     | 
    
         
             
            #= Main
         
     | 
| 
       46 
45 
     | 
    
         
             
            cat("Finding Medoids\n")
         
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
      
 46 
     | 
    
         
            +
            if (grepl("\\.rds$", argv[1])) {
         
     | 
| 
      
 47 
     | 
    
         
            +
              ani <- readRDS(argv[1])
         
     | 
| 
      
 48 
     | 
    
         
            +
              find_medoids(ani$a, ani$b, 1 - (ani$value / 100),
         
     | 
| 
      
 49 
     | 
    
         
            +
                out = argv[2], clades = argv[3])
         
     | 
| 
      
 50 
     | 
    
         
            +
            } else {
         
     | 
| 
      
 51 
     | 
    
         
            +
              load(argv[1]) # assume .rda
         
     | 
| 
      
 52 
     | 
    
         
            +
              find_medoids(a, b, d, out = argv[2], clades = argv[3])
         
     | 
| 
      
 53 
     | 
    
         
            +
            }
         
     | 
| 
       49 
54 
     | 
    
         | 
    
        data/utils/subclade/pipeline.rb
    CHANGED
    
    | 
         @@ -48,9 +48,10 @@ module MiGA::SubcladeRunner::Pipeline 
     | 
|
| 
       48 
48 
     | 
    
         
             
                # Find genomospecies medoids
         
     | 
| 
       49 
49 
     | 
    
         
             
                src = File.expand_path('utils/find-medoid.R', MiGA::MiGA.root_path)
         
     | 
| 
       50 
50 
     | 
    
         
             
                dir = opts[:gsp_metric] == 'aai' ? '02.aai' : '03.ani'
         
     | 
| 
      
 51 
     | 
    
         
            +
                dat = "../../09.distances/#{dir}/miga-project.rda"
         
     | 
| 
      
 52 
     | 
    
         
            +
                dat = "../../09.distances/#{dir}/miga-project.rds" unless File.exist?(dat)
         
     | 
| 
       51 
53 
     | 
    
         
             
                run_cmd([
         
     | 
| 
       52 
     | 
    
         
            -
                  'Rscript', src,  
     | 
| 
       53 
     | 
    
         
            -
                  'miga-project.gsp-medoids', 'miga-project.gsp-clades'
         
     | 
| 
      
 54 
     | 
    
         
            +
                  'Rscript', src, dat, 'miga-project.gsp-medoids', 'miga-project.gsp-clades'
         
     | 
| 
       54 
55 
     | 
    
         
             
                ])
         
     | 
| 
       55 
56 
     | 
    
         
             
                if File.exist? 'miga-project.gsp-clades.sorted'
         
     | 
| 
       56 
57 
     | 
    
         
             
                  File.rename 'miga-project.gsp-clades.sorted', 'miga-project.gsp-clades'
         
     | 
    
        data/utils/subclades.R
    CHANGED
    
    | 
         @@ -338,18 +338,25 @@ ggplotColours <- function (n = 6, h = c(0, 360) + 15, alpha = 1) { 
     | 
|
| 
       338 
338 
     | 
    
         
             
            }
         
     | 
| 
       339 
339 
     | 
    
         | 
| 
       340 
340 
     | 
    
         
             
            ani_distance <- function (ani_file, sel) {
         
     | 
| 
       341 
     | 
    
         
            -
              # Try to locate rds, otherwise read gzipped table
         
     | 
| 
       342 
     | 
    
         
            -
               
     | 
| 
       343 
     | 
    
         
            -
              if (file.exists( 
     | 
| 
       344 
     | 
    
         
            -
                 
     | 
| 
      
 341 
     | 
    
         
            +
              # Try to locate rda, then rds, and otherwise read gzipped table
         
     | 
| 
      
 342 
     | 
    
         
            +
              rda <- gsub("\\.txt\\.gz$", ".rda", ani_file)
         
     | 
| 
      
 343 
     | 
    
         
            +
              if (file.exists(rda)) {
         
     | 
| 
      
 344 
     | 
    
         
            +
                load(rda) # Should already contain `a`, `b`, and `d` as vectors
         
     | 
| 
       345 
345 
     | 
    
         
             
              } else {
         
     | 
| 
       346 
     | 
    
         
            -
                 
     | 
| 
       347 
     | 
    
         
            -
             
     | 
| 
      
 346 
     | 
    
         
            +
                rds <- gsub("\\.txt\\.gz$", ".rds", ani_file)
         
     | 
| 
      
 347 
     | 
    
         
            +
                if (file.exists(rds)) {
         
     | 
| 
      
 348 
     | 
    
         
            +
                  sim <- readRDS(rds)
         
     | 
| 
      
 349 
     | 
    
         
            +
                } else {
         
     | 
| 
      
 350 
     | 
    
         
            +
                  sim <- read.table(
         
     | 
| 
      
 351 
     | 
    
         
            +
                    gzfile(ani_file), sep = "\t", header = TRUE, as.is = TRUE
         
     | 
| 
      
 352 
     | 
    
         
            +
                  )
         
     | 
| 
      
 353 
     | 
    
         
            +
                }
         
     | 
| 
       348 
354 
     | 
    
         | 
| 
       349 
     | 
    
         
            -
             
     | 
| 
       350 
     | 
    
         
            -
             
     | 
| 
       351 
     | 
    
         
            -
             
     | 
| 
       352 
     | 
    
         
            -
             
     | 
| 
      
 355 
     | 
    
         
            +
                # Extract individual variables to deal with very large matrices
         
     | 
| 
      
 356 
     | 
    
         
            +
                a <- sim$a
         
     | 
| 
      
 357 
     | 
    
         
            +
                b <- sim$b
         
     | 
| 
      
 358 
     | 
    
         
            +
                d <- 1 - (sim$value / 100)
         
     | 
| 
      
 359 
     | 
    
         
            +
              }
         
     | 
| 
       353 
360 
     | 
    
         | 
| 
       354 
361 
     | 
    
         
             
              # If there is no data, end process
         
     | 
| 
       355 
362 
     | 
    
         
             
              if (length(a) == 0) return(NULL)
         
     | 
| 
         @@ -359,7 +366,7 @@ ani_distance <- function (ani_file, sel) { 
     | 
|
| 
       359 
366 
     | 
    
         
             
              if (!is.na(sel) && file.exists(sel)) {
         
     | 
| 
       360 
367 
     | 
    
         
             
                say("Filter selection")
         
     | 
| 
       361 
368 
     | 
    
         
             
                ids <- read.table(sel, sep = "\t", head = FALSE, as.is = TRUE)[,1]
         
     | 
| 
       362 
     | 
    
         
            -
                sel.idx <- which( 
     | 
| 
      
 369 
     | 
    
         
            +
                sel.idx <- which(a %in% ids & b %in% ids)
         
     | 
| 
       363 
370 
     | 
    
         
             
                a <- a[sel.idx]
         
     | 
| 
       364 
371 
     | 
    
         
             
                b <- b[sel.idx]
         
     | 
| 
       365 
372 
     | 
    
         
             
                d <- d[sel.idx]
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: miga-base
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 1.3. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 1.3.14.2
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Luis M. Rodriguez-R
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2024-04- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2024-04-12 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: daemons
         
     |