RubyGems - rbbt-dm - Versions diffs - 1.1.18 → 1.1.19 - Mend

rbbt-dm 1.1.18 → 1.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/lib/rbbt/matrix/differential.rb +1 -1
data/share/R/MA.R +545 -0
data/share/R/barcode.R +41 -0
data/share/R/heatmap.3.R +516 -0
metadata +5 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ba8688bb3e7913ef167e240940388ab69fa33e5c
-  data.tar.gz: 40f43f13ce557e8ae994e651cd11ab7e054ae3b0
+  metadata.gz: 27188d3dbc9c8d88409655bea507f70bfd14e9e5
+  data.tar.gz: 69bc8947fb0f736721eff76d58ec6502f4cfb840
 SHA512:
-  metadata.gz: 8ed0a89e1c2d6a449b86c68536c1a55626f7a7f074714f227d8a862d1974096a40027041c254482fce4edecdd53f2518da72ce29e0ac4f106934f629fb8e29fe
-  data.tar.gz: e26cd0ce1e3ab61b2774f89f7a0c83ae63d440b4016abcb6118421a56126a21ef5f40fd1fcf3846aaa128fe508d4cffb8c89edd426affac0d36ab1002642f174
+  metadata.gz: bcd2fa58747a8373cc7cf1ccdfd2f5a27e2ed55f40152e9eb72c0ca15130db40d24fc92c554ddb6f6c1ad42cf4640b04fd8f93a605134692f9a761a7cc447da6
+  data.tar.gz: e807c21ef69d7a7aebbccb5f1710b199fe2a573c485cd2d04adddf42aff2042762aca332c54a2550ad514fd2a9816f3c2288b82bc3d49ac229eac301240a3fb3

data/lib/rbbt/matrix/differential.rb CHANGED Viewed

@@ -47,7 +47,7 @@ class Matrix
       cmd = <<-EOS
-source('#{Rbbt.share.R["MA.R"].find}')
+source('#{Rbbt.share.R["MA.R"].find(:lib)}')
 data = rbbt.dm.matrix.differential(#{ R.ruby2R data_file },
   main = #{R.ruby2R(main_samples)},

data/share/R/MA.R ADDED Viewed

@@ -0,0 +1,545 @@
+library(limma)
+#########################################################################
+# Model processing
+# Ratio
+rbbt.dm.matrix.differential.ratio.oneside <- function(expr){
+    ratio = apply(expr, 1 ,function(x){mean(x, na.rm = TRUE)})
+    names(ratio) <- rownames(expr);
+    return(ratio);
+}
+rbbt.dm.matrix.differential.ratio.twoside <- function(expr, contrast){
+    ratio = rbbt.dm.matrix.differential.ratio.oneside(expr) - rbbt.dm.matrix.differential.ratio.oneside(contrast)
+    names(ratio) <- rownames(expr);
+    return(ratio);
+}
+# Limma
+rbbt.dm.matrix.differential.limma.oneside <- function(expr, subset = NULL, eBayes.trend=FALSE){
+    if (is.null(subset)){
+        fit <- lmFit(expr);
+    }else{
+        design = rep(0, dim(expr)[2]);
+        design[names(expr) %in% subset] = 1;
+    }
+    fit <- lmFit(expr, design);
+    fit <- eBayes(fit, trend=eBayes.trend);
+    sign = fit$t < 0;
+    sign[is.na(sign)] = FALSE;
+    fit$p.value[sign] =  - fit$p.value[sign];
+    return(list(t= fit$t, p.values= fit$p.value));
+}
+rbbt.dm.matrix.differential.limma.twoside <- function(expr, subset.main, subset.contrast, eBayes.trend=FALSE){
+    names.expr = dimnames(expr)[[2]]
+    design = cbind(rep(1,dim(expr)[2]), rep(0,dim(expr)[2]));
+    colnames(design) <-c('intercept', 'expr');
+    design[names.expr %in% subset.main,]     = 1;
+    design[names.expr %in% subset.contrast,'intercept']     = 1;
+    fit <- lmFit(expr, design);
+    fit <- eBayes(fit,trend=eBayes.trend);
+    sign = fit$t[,2] < 0;
+    sign[is.na(sign)] = FALSE;
+    fit$p.value[sign,2] = - fit$p.value[sign,2];
+    return(list(t= fit$t[,2], p.values= fit$p.value[,2]));
+}
+rbbt.dm.matrix.guess.log2 <- function(m, two.channel){
+    if (two.channel){
+        return (sum(m < 0, na.rm = TRUE) == 0);
+    }else{
+        return (max(m, na.rm = TRUE) > 100);
+    }
+}
+rbbt.dm.matrix.differential <- function(file, main, contrast = NULL, log2 = FALSE, outfile = NULL, key.field = NULL, two.channel = NULL, namespace = NULL, eBayes.trend = FALSE){
+    if (is.null(namespace)) namespace = rbbt.default_code("Hsa")
+    data = data.matrix(rbbt.tsv(file));
+    dimnames = dimnames(data)
+    original.dimnames = dimnames;
+    dimnames[[1]] = make.names(dimnames[[1]])
+    dimnames[[2]] = make.names(dimnames[[2]])
+    dimnames(data) <- dimnames
+    main <- make.names(main);
+    contrast <- make.names(contrast);
+    ids = rownames(data);
+    if (is.null(key.field)){ key.field = "ID" }
+    if (is.null(log2)){
+      log2 = rbbt.dm.matrix.guess.log2(data, two.channel)
+    }
+    if (log2){
+       data = log2(data);
+       min = min(data[data != -Inf])
+       data[data == -Inf] = min
+    }
+    if (is.null(contrast)){
+      ratio = rbbt.dm.matrix.differential.ratio.oneside(subset(data, select=main));
+    }else{
+      ratio = rbbt.dm.matrix.differential.ratio.twoside(subset(data, select=main), subset(data, select=contrast));
+    }
+    if (is.null(contrast)){
+        limma = NULL;
+        tryCatch({
+            limma = rbbt.dm.matrix.differential.limma.oneside(data, main, eBayes.trend=eBayes.trend);
+        }, error=function(x){
+            cat("Limma failed for complete dataset. Trying just subset.\n", file=stderr());
+            print(x, file=stderr());
+            tryCatch({
+                limma = rbbt.dm.matrix.differential.limma.oneside(subset(data, select=main), eBayes.trend=eBayes.trend);
+            }, error=function(x){
+                cat("Limma failed for subset dataset.\n", file=stderr());
+                print(x, file=stderr());
+            });
+         })
+    }else{
+        limma = NULL;
+        tryCatch({
+            limma = rbbt.dm.matrix.differential.limma.twoside(data, main, contrast, eBayes.trend=eBayes.trend);
+        }, error=function(x){
+            cat("Limma failed for complete dataset. Trying just subset.\n", file=stderr());
+            print(x, file=stderr());
+            tryCatch({
+                limma = rbbt.dm.matrix.differential.limma.twoside(subset(data, select=c(main, contrast)), main, contrast, eBayes.trend=eBayes.trend);
+            }, error=function(x){
+                cat("Limma failed for subset dataset.\n", file=stderr());
+                print(x, file=stderr());
+            });
+         })
+    }
+    if (! is.null(limma) && sum(is.na(limma$t)) != length(limma$t)){
+       result = data.frame(ratio = ratio[ids], t.values = limma$t[ids], p.values = limma$p.values[ids])
+       result["adjusted.p.values"] = p.adjust(abs(result$p.values), "fdr") * sign(result$p.values)
+    }else{
+       result = data.frame(ratio = ratio)
+    }
+    rownames(result) <- original.dimnames[[1]]
+   if (is.null(outfile)){
+       return(result);
+   }else{
+       rbbt.tsv.write(outfile, result, key.field, paste(":type=:list#:cast=:to_f#:namespace=", namespace, "#comment=Negative values mark downregulation", sep=""));
+       return(NULL);
+   }
+}
+############################################################################
+############################################################################
+############################################################################
+############################################################################
+############################################################################
+# OLD STUFF
+#MA.get_order <- function(values){
+#    orders = values;
+#    orders[,] = NA;
+#
+#    for (i in 1:dim(values)[2]){
+#        positions = names(sort(values[,i],decreasing=T,na.last=NA));
+#        orders[,i] = NA;
+#        orders[positions,i] = 1:length(positions)
+#    }
+#    orders
+#}
+#
+#MA.guess.do.log2 <- function(m, two.channel){
+#    if (two.channel){
+#        return (sum(m < 0, na.rm = TRUE) == 0);
+#    }else{
+#        return (max(m, na.rm = TRUE) > 100);
+#    }
+#}
+#
+#MA.translate <- function(m, trans){
+#    trans[trans==""] = NA;
+#    trans[trans=="NO MATCH"] = NA;
+#
+#    missing = length(trans) - dim(m)[1];
+#
+## If extra genes
+#    if (missing < 0){
+#        trans = c(trans,rep(NA, - missing));
+#        missing = 0;
+#    }
+#    n = apply(m,2,function(x){
+## Complete data with missing genes
+#         x.complete = c(x,rep(NA, missing));
+#         tapply(x.complete, factor(trans), median)
+#         });
+#    n[sort(rownames(n),index.return=T)$ix,]
+#}
+#
+## Conditions
+#
+#MA.conditions.has_control <- function(x){
+#    keywords = c('none', 'control', 'normal', 'wild', 'baseline', 'untreat', 'uninfected', 'universal', 'reference', 'vehicle', 'w.t.','wt');
+#    for(keyword in keywords){
+#        control = grep(keyword, x, ignore.case = TRUE);
+#        if (any(control)){
+#            return(x[control[1]]);
+#        }
+#    }
+#    return(NULL)
+#}
+#
+#MA.condition.values <- function(values){
+#    control = MA.conditions.has_control(values);
+#
+#    values.factor = factor(values);
+#    values.levels = levels(values.factor);
+#
+## If there is a control state remove it from sorting
+#    if (!is.null(control))
+#        values.levels = values.levels[values.levels != control];
+#
+#
+## Use numeric sort if they all have numbers
+#    if (length(grep('^ *[0-9]+',values.levels,perl=TRUE)) == length(values.levels)){
+#        ix = sort(as.numeric(sub('^ *([0-9]+).*',"\\1",values.levels)), decreasing = T, index.return = TRUE)$ix
+#    }else{
+#        ix = sort(values.levels, decreasing = T, index.return = TRUE)$ix
+#    }
+#
+#    return(list(values = values.levels[ix], control = control));
+#}
+#
+#
+##########################################################################
+## Model processing
+#
+## Ratio
+#MA.ratio.two_channel <- function(m, conditions, main){
+#    main = m[,conditions==main];
+#    if (!is.null(dim(main))){
+#        main = apply(main, 1 ,function(x){mean(x, na.rm = TRUE)});
+#    }
+#    return(main);
+#}
+#
+#MA.ratio.contrast <- function(m, conditions, main, contrast){
+#    main = m[,conditions==main];
+#    if (!is.null(dim(main))){
+#        main = apply(main, 1 ,function(x){mean(x, na.rm = TRUE)});
+#    }
+#
+#    contrast = m[,conditions==contrast];
+#    if (!is.null(dim(contrast))){
+#        contrast = apply(contrast, 1 ,function(x){mean(x, na.rm = TRUE)});
+#    }
+#
+#    return (main - contrast);
+#}
+#
+#
+## Limma
+#
+#MA.limma.two_channel <- function(m, conditions, main){
+#    if (sum(conditions == main) < 3){
+#        return(NULL);
+#    }
+#
+#    design = rep(0,dim(m)[2]);
+#    design[conditions == main] = 1;
+#
+## We need to subset the columns because of a problem with NA values. This
+## might affect eBayes variance estimations, thats my guess anyway...
+#
+#    fit <- lmFit(m[,design == 1],rep(1, sum(design)));
+#
+#    tryCatch({
+#             fit <- eBayes(fit);
+#             sign = fit$t < 0;
+#             sign[is.na(sign)] = FALSE;
+#             fit$p.value[sign] =  - fit$p.value[sign];
+#             return(list(t= fit$t, p.values= fit$p.value));
+#     }, error=function(x){
+#             print("Exception caught in eBayes", file=stderr);
+#             print(x, file=stderr);
+#     })
+#
+#    return(NULL);
+#}
+#
+#MA.limma.contrast <- function(m, conditions, main, contrast){
+#    if (sum(conditions == main) + sum(conditions == contrast) < 3){
+#        return(NULL);
+#    }
+#    m = cbind(m[,conditions == main],m[,conditions == contrast]);
+#
+#    design = cbind(rep(1,dim(m)[2]), rep(0,dim(m)[2]));
+#    colnames(design) <-c('intercept', 'main');
+#    design[1:sum(conditions==main),2] = 1;
+#
+#
+#    fit <- lmFit(m,design);
+#    tryCatch({
+#             fit <- eBayes(fit);
+#             sign = fit$t[,2] < 0;
+#             sign[is.na(sign)] = FALSE;
+#             fit$p.value[sign,2] = - fit$p.value[sign,2]
+#             return(list(t= fit$t[,2], p.values= fit$p.value[,2] ));
+#    }, error=function(x){
+#             print("Exception caught in eBayes", file=stderr);
+#             print(x, file=stderr);
+#    })
+#
+#    return(NULL);
+#}
+#
+#
+##########################################################################
+## Process conditions
+#
+#MA.strip_blanks <- function(text){
+#    text = sub(' *$', '' ,text);
+#    text = sub('^ *', '' ,text);
+#
+#    return(text);
+#}
+#
+#MA.orders <- function(ratios, t){
+#    best  = vector();
+#    names = vector();
+#    for (name in colnames(ratios)){
+#        if (sum(colnames(t) == name) > 0){
+#            best = cbind(best, t[,name]);
+#            names = c(names, name);
+#        }else{
+#            best = cbind(best, ratios[,name]);
+#            names = c(names, paste(name,'[ratio]', sep=" "));
+#        }
+#    }
+#    rownames(best)   <- rownames(ratios);
+#    orders           <- as.data.frame(MA.get_order(best));
+#    colnames(orders) <- names;
+#
+#    return(orders);
+#}
+#
+#MA.process_conditions.contrasts <- function(m, conditions, two.channel){
+#    max_levels             = 10;
+#    max_levels_control     = 1;
+#
+#
+#    values = MA.condition.values(conditions);
+#
+#
+#    ratios   = vector();
+#    t       = vector();
+#    p.values = vector();
+#
+#    ratio_names = vector();
+#    t_names     = vector();
+#
+#    if (!is.null(values$control)){
+#        contrast = values$control;
+#        for (main in values$values){
+#            name =  paste(main, contrast, sep = " <=> ")
+#
+#                ratio       = MA.ratio.contrast(m, conditions, main, contrast);
+#            ratio_names = c(ratio_names, name);
+#            ratios      = cbind(ratios, ratio);
+#
+#            res      = MA.limma.contrast(m, conditions, main, contrast);
+#            if (!is.null(res)){
+#                t_names = c(t_names, name);
+#                t           = cbind(t, res$t);
+#                p.values     = cbind(p.values, res$p.values);
+#            }
+#        }
+#    }
+#
+#
+#    if (length(values$values) <= max_levels_control || (is.null(values$control) && !two.channel && length(values$values) <= max_levels )){
+#
+#        remaining = values$values;
+#        for (main in values$values){
+#            remaining = remaining[remaining != main];
+#            for (contrast in remaining){
+#                name =  paste(main, contrast, sep = " <=> ");
+#
+#                ratio       = MA.ratio.contrast(m, conditions, main, contrast);
+#                ratio_names = c(ratio_names, name);
+#                ratios      = cbind(ratios, ratio);
+#
+#                res      = MA.limma.contrast(m, conditions, main, contrast);
+#                if (!is.null(res)){
+#                    t_names  = c(t_names, name);
+#                    t        = cbind(t, res$t);
+#                    p.values = cbind(p.values, res$p.values);
+#                }
+#            }
+#        }
+#    }
+#
+#
+#    if (length(ratio_names) != 0){
+#        ratio_names = as.vector(sapply(ratio_names, MA.strip_blanks));
+#        colnames(ratios) <- ratio_names
+#    }
+#
+#    if (length(t_names) != 0){
+#        t_names = as.vector(sapply(t_names, MA.strip_blanks));
+#        colnames(t) <- t_names;
+#        colnames(p.values) <- t_names;
+#    }
+#
+#
+#    return(list(ratios = ratios, t=t, p.values = p.values));
+#}
+#
+#MA.process_conditions.two_channel <- function(m, conditions){
+#    values = MA.condition.values(conditions);
+#
+#    all_values = values$values;
+#    if (!is.null(values$control)){
+#        all_values = c(all_values, values$control);
+#    }
+#
+#
+#    ratios   = vector();
+#    t        = vector();
+#    p.values = vector();
+#
+#    ratio_names = vector();
+#    t_names     = vector();
+#
+#
+#    for (main in all_values){
+#        name =  main;
+#
+#        ratio       = MA.ratio.two_channel(m, conditions, main);
+#        ratio_names = c(ratio_names, name);
+#        ratios      = cbind(ratios, ratio);
+#
+#        res      = MA.limma.two_channel(m, conditions, main);
+#        if (!is.null(res)){
+#            t_names  = c(t_names, name);
+#            t        = cbind(t, res$t);
+#            p.values = cbind(p.values, res$p.values);
+#        }
+#    }
+#
+#    if (length(ratio_names) != 0){
+#        ratio_names = as.vector(sapply(ratio_names, MA.strip_blanks));
+#        colnames(ratios) <- ratio_names
+#    }
+#
+#    if (length(t_names) != 0){
+#        t_names = as.vector(sapply(t_names, MA.strip_blanks));
+#        colnames(t) <- t_names;
+#        colnames(p.values) <- t_names;
+#    }
+#
+#    return(list(ratios = ratios, t=t, p.values = p.values));
+#}
+#
+#
+#
+## Process microarray matrix
+#
+#MA.process <- function(m, conditions_list, two.channel = FALSE){
+#
+#    ratios   = vector();
+#    t        = vector();
+#    p.values = vector();
+#
+#    for(type in colnames(conditions_list)){
+#        conditions = conditions_list[,type]
+#
+#            if (two.channel){
+#                res = MA.process_conditions.two_channel(m, conditions);
+#                if (length(res$ratios) != 0){    colnames(res$ratios) <- sapply(colnames(res$ratios),function(x){paste(type,x,sep=": ")});     ratios   = cbind(ratios,res$ratios);}
+#                if (length(res$t) != 0){         colnames(res$t) <- sapply(colnames(res$t),function(x){paste(type,x,sep=": ")});               t        = cbind(t,res$t);}
+#                if (length(res$p.values) != 0){  colnames(res$p.values) <- sapply(colnames(res$p.values),function(x){paste(type,x,sep=": ")}); p.values = cbind(p.values,res$p.values);}
+#            }
+#
+#        res = MA.process_conditions.contrasts(m, conditions, two.channel);
+#        if (length(res$ratios) != 0){    colnames(res$ratios) <- sapply(colnames(res$ratios),function(x){paste(type,x,sep=": ")});     ratios   = cbind(ratios,res$ratios);}
+#        if (length(res$t) != 0){         colnames(res$t) <- sapply(colnames(res$t),function(x){paste(type,x,sep=": ")});               t        = cbind(t,res$t);}
+#        if (length(res$p.values) != 0){  colnames(res$p.values) <- sapply(colnames(res$p.values),function(x){paste(type,x,sep=": ")}); p.values = cbind(p.values,res$p.values);}
+#    }
+#
+#    orders <- MA.orders(ratios,t);
+#    return(list(ratios = ratios, t=t, p.values = p.values, orders=orders));
+#}
+#
+#
+#MA.save <- function(prefix, orders, ratios, t , p.values, experiments, description = NULL) {
+#    if (is.null(orders)){
+#        cat("No suitable samples for analysis\n")
+#            write(file=paste(prefix,'skip',sep="."), "No suitable samples for analysis" );
+#    } else {
+#        write.table(file=paste(prefix,'orders',sep="."), orders, sep="\t",  row.names=F, col.names=F, quote=F);
+#        write.table(file=paste(prefix,'codes',sep="."), rownames(orders), sep="\t",  row.names=F, col.names=F, quote=F);
+#        write.table(file=paste(prefix,'logratios',sep="."), ratios, sep="\t",  row.names=F, col.names=F, quote=F);
+#        write.table(file=paste(prefix,'t',sep="."), t, sep="\t",  row.names=F, col.names=F, quote=F);
+#        write.table(file=paste(prefix,'pvalues',sep="."), p.values, sep="\t",  row.names=F, col.names=F, quote=F);
+#        write.table(file=paste(prefix,'experiments',sep="."), experiments, sep="\t",  row.names=F, col.names=F, quote=F);
+#
+#        write(file=paste(prefix,'description',sep="."),  description)
+#    }
+#}
+#
+#MA.load <- function(prefix, orders = TRUE, logratios = TRUE, t = TRUE, p.values = TRUE){
+#    data = list();
+#    genes <- scan(file=paste(prefix,'codes',sep="."),sep="\n",quiet=T,what=character());
+#    experiments <- scan(file=paste(prefix,'experiments',sep="."),sep="\n",quiet=T,what=character());
+#
+#    experiments.no.ratio = experiments[- grep('ratio', experiments)];
+#
+#    if (orders){
+#        orders <- read.table(file=paste(prefix,'orders',sep="."),sep="\t");
+#        rownames(orders) <- genes;
+#        colnames(orders) <- experiments;
+#        data$orders=orders;
+#    }
+#    if (logratios){
+#        logratios <- read.table(file=paste(prefix,'logratios',sep="."),sep="\t");
+#        rownames(logratios) <- genes;
+#        colnames(logratios) <- experiments;
+#        data$logratios=logratios;
+#    }
+#    if (t){
+#        t <- read.table(file=paste(prefix,'t',sep="."),sep="\t");
+#        rownames(t) <- genes;
+#        colnames(t) <- experiments.no.ratio;
+#        data$t=t;
+#    }
+#    if (p.values){
+#        p.values <- read.table(file=paste(prefix,'pvalues',sep="."),sep="\t");
+#        rownames(p.values) <- genes;
+#        colnames(p.values) <- experiments.no.ratio;
+#        data$p.values=p.values;
+#    }
+#
+#
+#    return(data);
+#}

data/share/R/barcode.R ADDED Viewed

@@ -0,0 +1,41 @@
+rbbt.GE.barcode <- function(matrix_file, output_file, sd.factor = 2, key.field = "Ensembl Gene ID"){
+  data = rbbt.tsv(matrix_file)
+  data.mean = rowMeans(data, na.rm=T)
+  data.sd = apply(data, 1, sd, na.rm=T)
+  data.threshold = as.matrix(data.mean) + sd.factor * as.matrix(data.sd)
+  names(data.threshold) = names(data.mean)
+  rm(data.mean)
+  rm(data.sd)
+  file.barcode = file(output_file, 'w')
+  cat("#", file = file.barcode)
+  cat(key.field, file = file.barcode)
+  cat("\t", file = file.barcode)
+  cat(colnames(data), file = file.barcode, sep="\t")
+  cat("\n", file = file.barcode)
+  for (gene in rownames(data)){
+    barcode = (data[gene,] - data.threshold[gene]) > 0
+    cat(gene, file = file.barcode)
+    cat("\t", file = file.barcode)
+    cat(barcode, file = file.barcode, sep = "\t")
+    cat("\n", file = file.barcode)
+  }
+  close(file.barcode)
+}
+rbbt.GE.activity_cluster <- function(matrix_file, output_file, key.field = "ID"){
+    library(mclust)
+    data = rbbt.tsv(matrix_file)
+    classes = apply(data,2,function(row){Mclust(row)$classification})
+    rownames(classes) <- rownames(data)
+    names(classes) <- c("Cluster")
+    rbbt.tsv.write(output_file, classes, key.field)
+}

data/share/R/heatmap.3.R ADDED Viewed

@@ -0,0 +1,516 @@
+# FROM: https://gist.github.com/nachocab/3853004
+#
+# EXAMPLE USAGE
+# example of colsidecolors rowsidecolors (single column, single row)
+#mat <- matrix(1:100, byrow=T, nrow=10)
+#column_annotation <- sample(c("red", "blue", "green"), 10, replace=T)
+#column_annotation <- as.matrix(column_annotation)
+#colnames(column_annotation) <- c("Variable X")
+#
+#row_annotation <- sample(c("red", "blue", "green"), 10, replace=T)
+#row_annotation <- as.matrix(t(row_annotation))
+#rownames(row_annotation) <- c("Variable Y")
+#
+#heatmap.3(mat, RowSideColors=row_annotation, ColSideColors=column_annotation)
+#
+## multiple column and row
+#mat <- matrix(1:100, byrow=T, nrow=10)
+#column_annotation <- matrix(sample(c("red", "blue", "green"), 20, replace=T), ncol=2)
+#colnames(column_annotation) <- c("Variable X1", "Variable X2")
+#
+#row_annotation <- matrix(sample(c("red", "blue", "green"), 20, replace=T), nrow=2)
+#rownames(row_annotation) <- c("Variable Y1", "Variable Y2")
+#
+#heatmap.3(mat, RowSideColors=row_annotation, ColSideColors=column_annotation)
+#
+# CODE
+heatmap.3 <- function(x,
+                      Rowv = TRUE, Colv = if (symm) "Rowv" else TRUE,
+                      distfun = dist,
+                      hclustfun = hclust,
+                      dendrogram = c("both","row", "column", "none"),
+                      symm = FALSE,
+                      scale = c("none","row", "column"),
+                      na.rm = TRUE,
+                      revC = identical(Colv,"Rowv"),
+                      add.expr,
+                      breaks,
+                      symbreaks = max(x < 0, na.rm = TRUE) || scale != "none",
+                      col = "heat.colors",
+                      colsep,
+                      rowsep,
+                      sepcolor = "white",
+                      sepwidth = c(0.05, 0.05),
+                      cellnote,
+                      notecex = 1,
+                      notecol = "cyan",
+                      na.color = par("bg"),
+                      trace = c("none", "column","row", "both"),
+                      tracecol = "cyan",
+                      hline = median(breaks),
+                      vline = median(breaks),
+                      linecol = tracecol,
+                      margins = c(5,5),
+                      ColSideColors,
+                      RowSideColors,
+                      side.height.fraction=0.3,
+                      cexRow = 0.2 + 1/log10(nr),
+                      cexCol = 0.2 + 1/log10(nc),
+                      labRow = NULL,
+                      labCol = NULL,
+                      key = TRUE,
+                      keysize = 1.5,
+                      density.info = c("none", "histogram", "density"),
+                      denscol = tracecol,
+                      symkey = max(x < 0, na.rm = TRUE) || symbreaks,
+                      densadj = 0.25,
+                      main = NULL,
+                      xlab = NULL,
+                      ylab = NULL,
+                      lmat = NULL,
+                      lhei = NULL,
+                      lwid = NULL,
+                      NumColSideColors = 1,
+                      NumRowSideColors = 1,
+                      KeyValueName="Value",...){
+    invalid <- function (x) {
+      if (missing(x) || is.null(x) || length(x) == 0)
+          return(TRUE)
+      if (is.list(x))
+          return(all(sapply(x, invalid)))
+      else if (is.vector(x))
+          return(all(is.na(x)))
+      else return(FALSE)
+    }
+    x <- as.matrix(x)
+    scale01 <- function(x, low = min(x), high = max(x)) {
+        x <- (x - low)/(high - low)
+        x
+    }
+    retval <- list()
+    scale <- if (symm && missing(scale))
+        "none"
+    else match.arg(scale)
+    dendrogram <- match.arg(dendrogram)
+    trace <- match.arg(trace)
+    density.info <- match.arg(density.info)
+    if (length(col) == 1 && is.character(col))
+        col <- get(col, mode = "function")
+    if (!missing(breaks) && (scale != "none"))
+        warning("Using scale=\"row\" or scale=\"column\" when breaks are",
+            "specified can produce unpredictable results.", "Please consider using only one or the other.")
+    if (is.null(Rowv) || is.na(Rowv))
+        Rowv <- FALSE
+    if (is.null(Colv) || is.na(Colv))
+        Colv <- FALSE
+    else if (Colv == "Rowv" && !isTRUE(Rowv))
+        Colv <- FALSE
+    if (length(di <- dim(x)) != 2 || !is.numeric(x))
+        stop("`x' must be a numeric matrix")
+    nr <- di[1]
+    nc <- di[2]
+    if (nr <= 1 || nc <= 1)
+        stop("`x' must have at least 2 rows and 2 columns")
+    if (!is.numeric(margins) || length(margins) != 2)
+        stop("`margins' must be a numeric vector of length 2")
+    if (missing(cellnote))
+        cellnote <- matrix("", ncol = ncol(x), nrow = nrow(x))
+    if (!inherits(Rowv, "dendrogram")) {
+        if (((!isTRUE(Rowv)) || (is.null(Rowv))) && (dendrogram %in%
+            c("both", "row"))) {
+            if (is.logical(Colv) && (Colv))
+                dendrogram <- "column"
+            else dedrogram <- "none"
+            warning("Discrepancy: Rowv is FALSE, while dendrogram is `",
+                dendrogram, "'. Omitting row dendogram.")
+        }
+    }
+    if (!inherits(Colv, "dendrogram")) {
+        if (((!isTRUE(Colv)) || (is.null(Colv))) && (dendrogram %in%
+            c("both", "column"))) {
+            if (is.logical(Rowv) && (Rowv))
+                dendrogram <- "row"
+            else dendrogram <- "none"
+            warning("Discrepancy: Colv is FALSE, while dendrogram is `",
+                dendrogram, "'. Omitting column dendogram.")
+        }
+    }
+    if (inherits(Rowv, "dendrogram")) {
+        ddr <- Rowv
+        rowInd <- order.dendrogram(ddr)
+    }
+    else if (is.integer(Rowv)) {
+        hcr <- hclustfun(distfun(x))
+        ddr <- as.dendrogram(hcr)
+        ddr <- reorder(ddr, Rowv)
+        rowInd <- order.dendrogram(ddr)
+        if (nr != length(rowInd))
+            stop("row dendrogram ordering gave index of wrong length")
+    }
+    else if (isTRUE(Rowv)) {
+        Rowv <- rowMeans(x, na.rm = na.rm)
+        hcr <- hclustfun(distfun(x))
+        ddr <- as.dendrogram(hcr)
+        ddr <- reorder(ddr, Rowv)
+        rowInd <- order.dendrogram(ddr)
+        if (nr != length(rowInd))
+            stop("row dendrogram ordering gave index of wrong length")
+    }
+    else {
+        rowInd <- nr:1
+    }
+    if (inherits(Colv, "dendrogram")) {
+        ddc <- Colv
+        colInd <- order.dendrogram(ddc)
+    }
+    else if (identical(Colv, "Rowv")) {
+        if (nr != nc)
+            stop("Colv = \"Rowv\" but nrow(x) != ncol(x)")
+        if (exists("ddr")) {
+            ddc <- ddr
+            colInd <- order.dendrogram(ddc)
+        }
+        else colInd <- rowInd
+    }
+    else if (is.integer(Colv)) {
+        hcc <- hclustfun(distfun(if (symm)
+            x
+        else t(x)))
+        ddc <- as.dendrogram(hcc)
+        ddc <- reorder(ddc, Colv)
+        colInd <- order.dendrogram(ddc)
+        if (nc != length(colInd))
+            stop("column dendrogram ordering gave index of wrong length")
+    }
+    else if (isTRUE(Colv)) {
+        Colv <- colMeans(x, na.rm = na.rm)
+        hcc <- hclustfun(distfun(if (symm)
+            x
+        else t(x)))
+        ddc <- as.dendrogram(hcc)
+        ddc <- reorder(ddc, Colv)
+        colInd <- order.dendrogram(ddc)
+        if (nc != length(colInd))
+            stop("column dendrogram ordering gave index of wrong length")
+    }
+    else {
+        colInd <- 1:nc
+    }
+    retval$rowInd <- rowInd
+    retval$colInd <- colInd
+    retval$call <- match.call()
+    x <- x[rowInd, colInd]
+    x.unscaled <- x
+    cellnote <- cellnote[rowInd, colInd]
+    if (is.null(labRow))
+        labRow <- if (is.null(rownames(x)))
+            (1:nr)[rowInd]
+        else rownames(x)
+    else labRow <- labRow[rowInd]
+    if (is.null(labCol))
+        labCol <- if (is.null(colnames(x)))
+            (1:nc)[colInd]
+        else colnames(x)
+    else labCol <- labCol[colInd]
+    if (scale == "row") {
+        retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm)
+        x <- sweep(x, 1, rm)
+        retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm)
+        x <- sweep(x, 1, sx, "/")
+    }
+    else if (scale == "column") {
+        retval$colMeans <- rm <- colMeans(x, na.rm = na.rm)
+        x <- sweep(x, 2, rm)
+        retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm)
+        x <- sweep(x, 2, sx, "/")
+    }
+    if (missing(breaks) || is.null(breaks) || length(breaks) < 1) {
+        if (missing(col) || is.function(col))
+            breaks <- 16
+        else breaks <- length(col) + 1
+    }
+    if (length(breaks) == 1) {
+        if (!symbreaks)
+            breaks <- seq(min(x, na.rm = na.rm), max(x, na.rm = na.rm),
+                length = breaks)
+        else {
+            extreme <- max(abs(x), na.rm = TRUE)
+            breaks <- seq(-extreme, extreme, length = breaks)
+        }
+    }
+    nbr <- length(breaks)
+    ncol <- length(breaks) - 1
+    if (class(col) == "function")
+        col <- col(ncol)
+    min.breaks <- min(breaks)
+    max.breaks <- max(breaks)
+    x[x < min.breaks] <- min.breaks
+    x[x > max.breaks] <- max.breaks
+    if (missing(lhei) || is.null(lhei))
+        lhei <- c(keysize, 4)
+    if (missing(lwid) || is.null(lwid))
+        lwid <- c(keysize, 4)
+    if (missing(lmat) || is.null(lmat)) {
+        lmat <- rbind(4:3, 2:1)
+        if (!missing(ColSideColors)) {
+           #if (!is.matrix(ColSideColors))
+           #stop("'ColSideColors' must be a matrix")
+            if (!is.character(ColSideColors) || nrow(ColSideColors) != nc)
+                stop("'ColSideColors' must be a matrix of nrow(x) rows")
+            lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] + 1)
+            #lhei <- c(lhei[1], 0.2, lhei[2])
+             lhei=c(lhei[1], side.height.fraction*NumColSideColors, lhei[2])
+        }
+        if (!missing(RowSideColors)) {
+            #if (!is.matrix(RowSideColors))
+            #stop("'RowSideColors' must be a matrix")
+            if (!is.character(RowSideColors) || ncol(RowSideColors) != nr)
+                stop("'RowSideColors' must be a matrix of ncol(x) columns")
+            lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) - 1), 1), lmat[,2] + 1)
+            #lwid <- c(lwid[1], 0.2, lwid[2])
+            lwid <- c(lwid[1], side.height.fraction*NumRowSideColors, lwid[2])
+        }
+        lmat[is.na(lmat)] <- 0
+    }
+    if (length(lhei) != nrow(lmat))
+        stop("lhei must have length = nrow(lmat) = ", nrow(lmat))
+    if (length(lwid) != ncol(lmat))
+        stop("lwid must have length = ncol(lmat) =", ncol(lmat))
+    op <- par(no.readonly = TRUE)
+    on.exit(par(op))
+    layout(lmat, widths = lwid, heights = lhei, respect = FALSE)
+    if (!missing(RowSideColors)) {
+        if (!is.matrix(RowSideColors)){
+                par(mar = c(margins[1], 0, 0, 0.5))
+                image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE)
+        } else {
+            par(mar = c(margins[1], 0, 0, 0.5))
+            rsc = t(RowSideColors[,rowInd, drop=F])
+            rsc.colors = matrix()
+            rsc.names = names(table(rsc))
+            rsc.i = 1
+            for (rsc.name in rsc.names) {
+                rsc.colors[rsc.i] = rsc.name
+                rsc[rsc == rsc.name] = rsc.i
+                rsc.i = rsc.i + 1
+            }
+            rsc = matrix(as.numeric(rsc), nrow = dim(rsc)[1])
+            image(t(rsc), col = as.vector(rsc.colors), axes = FALSE)
+            if (length(colnames(RowSideColors)) > 0) {
+                axis(1, 0:(dim(rsc)[2] - 1)/(dim(rsc)[2] - 1), colnames(RowSideColors), las = 2, tick = FALSE)
+            }
+        }
+    }
+    if (!missing(ColSideColors)) {
+        if (!is.matrix(ColSideColors)){
+            par(mar = c(0.5, 0, 0, margins[2]))
+            image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE)
+        } else {
+            par(mar = c(0.5, 0, 0, margins[2]))
+            csc = ColSideColors[colInd, , drop=F]
+            csc.colors = matrix()
+            csc.names = names(table(csc))
+            csc.i = 1
+            for (csc.name in csc.names) {
+                csc.colors[csc.i] = csc.name
+                csc[csc == csc.name] = csc.i
+                csc.i = csc.i + 1
+            }
+            csc = matrix(as.numeric(csc), nrow = dim(csc)[1])
+            image(csc, col = as.vector(csc.colors), axes = FALSE)
+            if (length(colnames(ColSideColors)) > 0) {
+                axis(2, 0:(dim(csc)[2] - 1)/max(1,(dim(csc)[2] - 1)), colnames(ColSideColors), las = 2, tick = FALSE)
+            }
+        }
+    }
+    par(mar = c(margins[1], 0, 0, margins[2]))
+    x <- t(x)
+    cellnote <- t(cellnote)
+    if (revC) {
+        iy <- nr:1
+        if (exists("ddr"))
+            ddr <- rev(ddr)
+        x <- x[, iy]
+        cellnote <- cellnote[, iy]
+    }
+    else iy <- 1:nr
+    image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + c(0, nr), axes = FALSE, xlab = "", ylab = "", col = col, breaks = breaks, ...)
+    retval$carpet <- x
+    if (exists("ddr"))
+        retval$rowDendrogram <- ddr
+    if (exists("ddc"))
+        retval$colDendrogram <- ddc
+    retval$breaks <- breaks
+    retval$col <- col
+    if (!invalid(na.color) & any(is.na(x))) { # load library(gplots)
+        mmat <- ifelse(is.na(x), 1, NA)
+        image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "",
+            col = na.color, add = TRUE)
+    }
+    axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0,
+        cex.axis = cexCol)
+    if (!is.null(xlab))
+        mtext(xlab, side = 1, line = margins[1] - 1.25)
+    axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0,
+        cex.axis = cexRow)
+    if (!is.null(ylab))
+        mtext(ylab, side = 4, line = margins[2] - 1.25)
+    if (!missing(add.expr))
+        eval(substitute(add.expr))
+    if (!missing(colsep))
+        for (csep in colsep) rect(xleft = csep + 0.5, ybottom = rep(0, length(csep)), xright = csep + 0.5 + sepwidth[1], ytop = rep(ncol(x) + 1, csep), lty = 1, lwd = 1, col = sepcolor, border = sepcolor)
+    if (!missing(rowsep))
+        for (rsep in rowsep) rect(xleft = 0, ybottom = (ncol(x) + 1 - rsep) - 0.5, xright = nrow(x) + 1, ytop = (ncol(x) + 1 - rsep) - 0.5 - sepwidth[2], lty = 1, lwd = 1, col = sepcolor, border = sepcolor)
+    min.scale <- min(breaks)
+    max.scale <- max(breaks)
+    x.scaled <- scale01(t(x), min.scale, max.scale)
+    if (trace %in% c("both", "column")) {
+        retval$vline <- vline
+        vline.vals <- scale01(vline, min.scale, max.scale)
+        for (i in colInd) {
+            if (!is.null(vline)) {
+                abline(v = i - 0.5 + vline.vals, col = linecol,
+                  lty = 2)
+            }
+            xv <- rep(i, nrow(x.scaled)) + x.scaled[, i] - 0.5
+            xv <- c(xv[1], xv)
+            yv <- 1:length(xv) - 0.5
+            lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
+        }
+    }
+    if (trace %in% c("both", "row")) {
+        retval$hline <- hline
+        hline.vals <- scale01(hline, min.scale, max.scale)
+        for (i in rowInd) {
+            if (!is.null(hline)) {
+                abline(h = i + hline, col = linecol, lty = 2)
+            }
+            yv <- rep(i, ncol(x.scaled)) + x.scaled[i, ] - 0.5
+            yv <- rev(c(yv[1], yv))
+            xv <- length(yv):1 - 0.5
+            lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
+        }
+    }
+    if (!missing(cellnote))
+        text(x = c(row(cellnote)), y = c(col(cellnote)), labels = c(cellnote),
+            col = notecol, cex = notecex)
+    par(mar = c(margins[1], 0, 0, 0))
+    if (dendrogram %in% c("both", "row")) {
+        plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
+    }
+    else plot.new()
+    par(mar = c(0, 0, if (!is.null(main)) 5 else 0, margins[2]))
+    if (dendrogram %in% c("both", "column")) {
+        plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")
+    }
+    else plot.new()
+    if (!is.null(main))
+        title(main, cex.main = 1.5 * op[["cex.main"]])
+    if (key) {
+        par(mar = c(5, 4, 2, 1), cex = 0.75)
+        tmpbreaks <- breaks
+        if (symkey) {
+            max.raw <- max(abs(c(x, breaks)), na.rm = TRUE)
+            min.raw <- -max.raw
+            tmpbreaks[1] <- -max(abs(x), na.rm = TRUE)
+            tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE)
+        }
+        else {
+            min.raw <- min(x, na.rm = TRUE)
+            max.raw <- max(x, na.rm = TRUE)
+        }
+        z <- seq(min.raw, max.raw, length = length(col))
+        image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks,
+            xaxt = "n", yaxt = "n")
+        par(usr = c(0, 1, 0, 1))
+        lv <- pretty(breaks)
+        xv <- scale01(as.numeric(lv), min.raw, max.raw)
+        axis(1, at = xv, labels = lv)
+        if (scale == "row")
+            mtext(side = 1, "Row Z-Score", line = 2)
+        else if (scale == "column")
+            mtext(side = 1, "Column Z-Score", line = 2)
+        else mtext(side = 1, KeyValueName, line = 2)
+        if (density.info == "density") {
+            dens <- density(x, adjust = densadj, na.rm = TRUE)
+            omit <- dens$x < min(breaks) | dens$x > max(breaks)
+            dens$x <- dens$x[-omit]
+            dens$y <- dens$y[-omit]
+            dens$x <- scale01(dens$x, min.raw, max.raw)
+            lines(dens$x, dens$y/max(dens$y) * 0.95, col = denscol,
+                lwd = 1)
+            axis(2, at = pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y))
+            title("Color Key\nand Density Plot")
+            par(cex = 0.5)
+            mtext(side = 2, "Density", line = 2)
+        }
+        else if (density.info == "histogram") {
+            h <- hist(x, plot = FALSE, breaks = breaks)
+            hx <- scale01(breaks, min.raw, max.raw)
+            hy <- c(h$counts, h$counts[length(h$counts)])
+            lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s",
+                col = denscol)
+            axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy))
+            title("Color Key\nand Histogram")
+            par(cex = 0.5)
+            mtext(side = 2, "Count", line = 2)
+        }
+        else title("Color Key")
+    }
+    else plot.new()
+    retval$colorTable <- data.frame(low = retval$breaks[-length(retval$breaks)],
+        high = retval$breaks[-1], color = retval$col)
+    invisible(retval)
+}
+rbbt.heatmap.3 <- function(filename, width, height, data, take_log=FALSE, keys=NULL, colors=NULL, ...){
+    # Quieted by MV
+    require(gplots, quietly = TRUE, warn.conflicts = FALSE)
+    library(pls, quietly = TRUE, warn.conflicts = FALSE)
+    opar = par()
+    png(filename=filename, width=width, height=height);
+    #par(cex.lab=0.5, cex=0.5, ...)
+    data = as.matrix(data)
+    data[is.nan(data)] = NA
+    data = data[rowSums(!is.na(data))!=0, colSums(!is.na(data))!=0]
+    data = data[rowSums(is.na(data))==0, ]
+    if (take_log){
+        for (study in colnames(data)){
+            skip = sum(data[, study] <= 0) != 0
+            if (!skip){
+                data[, study] = log(data[, study])
+            }
+        }
+        data = data[, colSums(is.na(data))==0]
+    }
+    #data = stdize(data)
+    heatmap.3(data, margins = c(20,5), scale='column', na.rm=TRUE, ...)
+    if (!is.null(keys)){
+        legend("bottomleft",legend=keys, fill=colors, border=FALSE, bty="n", y.intersp = 1.7, cex=1.7)
+    }
+    dev.off();
+    par(opar)
+}

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rbbt-dm
 version: !ruby/object:Gem::Version
-  version: 1.1.18
+  version: 1.1.19
 platform: ruby
 authors:
 - Miguel Vazquez
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-11-25 00:00:00.000000000 Z
+date: 2015-12-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rbbt-util
@@ -104,6 +104,9 @@ files:
 - lib/rbbt/statistics/rank_product.rb
 - lib/rbbt/vector/model.rb
 - lib/rbbt/vector/model/svm.rb
+- share/R/MA.R
+- share/R/barcode.R
+- share/R/heatmap.3.R
 - test/rbbt/network/test_paths.rb
 - test/rbbt/statistics/test_fdr.rb
 - test/rbbt/statistics/test_hypergeometric.rb