npm - @platforma-open/milaboratories.mixcr-clonotyping-2.workflow - Versions diffs - 3.9.0 → 3.10.0 - Mend

@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 3.9.0 → 3.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/.turbo/turbo-build.log +1 -1
package/CHANGELOG.md +6 -0
package/dist/tengo/lib/qc-report-columns.lib.tengo +67 -1
package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
package/dist/tengo/tpl/calculate-preset-info.plj.gz +0 -0
package/dist/tengo/tpl/export-report.plj.gz +0 -0
package/dist/tengo/tpl/list-presets.plj.gz +0 -0
package/dist/tengo/tpl/main.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
package/dist/tengo/tpl/prerun.plj.gz +0 -0
package/dist/tengo/tpl/process-single-cell.plj.gz +0 -0
package/dist/tengo/tpl/process.plj.gz +0 -0
package/dist/tengo/tpl/test.columns-calculate.plj.gz +0 -0
package/dist/tengo/tpl/test.columns.test.plj.gz +0 -0
package/package.json +1 -1
package/src/export-report.tpl.tengo +171 -4
package/src/process.tpl.tengo +15 -7
package/src/qc-report-columns.lib.tengo +67 -1

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,6 +1,6 @@
  WARN  Issue while reading "/home/runner/work/mixcr-clonotyping/mixcr-clonotyping/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
-> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.9.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
+> @platforma-open/milaboratories.mixcr-clonotyping-2.workflow@3.10.0 build /home/runner/work/mixcr-clonotyping/mixcr-clonotyping/workflow
 > rm -rf dist && pl-tengo check && pl-tengo build
   info: Skipping unknown file type: test/columns.test.ts

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 # @platforma-open/milaboratories.mixcr-clonotyping.workflow
+## 3.10.0
+### Minor Changes
+- d2b6d24: fix number of reads, clonotypes, cells in report table
 ## 3.9.0
 ### Minor Changes

package/dist/tengo/lib/qc-report-columns.lib.tengo CHANGED Viewed

@@ -600,6 +600,30 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
         }
     }]
+    if !isSingleCell {
+        bulkOrder := 107800
+        for chain in chains {
+            baseColumns = baseColumns + [{
+                column: "clonotypesByChain." + chain,
+                id: "clonotypes-by-chain-" + chain,
+                allowNA: true,
+                naRegex: "NaN",
+                spec: {
+                    name: "mixcr.com/reports/bulk/clonotypesByChain/" + chain,
+                    valueType: "Long",
+                    annotations: {
+                        "pl7.app/min": "0",
+                        "pl7.app/table/orderPriority": string(bulkOrder),
+                        "pl7.app/table/visibility": "default",
+                        "pl7.app/label": "Clonotypes by Chain " + chain
+                    }
+                }
+            }]
+            bulkOrder -= 100
+        }
+    }
     dataWithUmiColumns := [    {
         column: "refineTags.UMI.outputCount",
         id: "refine-tags-umi-output-count",
@@ -733,9 +757,51 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
         }
     }
     ]
+    singleCellColumns := []
-    singleCellColumns := [
+    singleCellColumns += [{
+        column: "scCellsTotal",
+        id: "sc-cells-total",
+        allowNA: true,
+        naRegex: "NaN",
+        spec: {
+            name: "mixcr.com/reports/singleCell/pairedCellsTotal",
+            valueType: "Long",
+            annotations: {
+                "pl7.app/min": "0",
+                "pl7.app/table/orderPriority": "107900",
+                "pl7.app/table/visibility": "default",
+                "pl7.app/label": "Total number of cells (with paired chains)"
+            }
+        }
+    }]
+    n := 107800
+    for chain in chains {
+        singleCellColumns += [{
+            column: "clonotypesByChain." + chain,
+            id: "clonotypes-by-chain-" + chain,
+            allowNA: true,
+            naRegex: "NaN",
+            spec: {
+                name: "mixcr.com/reports/singleCell/clonotypesByChain/" + chain,
+                valueType: "Long",
+                annotations: {
+                    "pl7.app/min": "0",
+                    "pl7.app/table/orderPriority": string(n),
+                    "pl7.app/table/visibility": "default",
+                    "pl7.app/label": "Clonotypes by Chain " + chain
+                }
+            }
+        }]
+        n -= 100
+    }
+    singleCellColumns += [

package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/calculate-preset-info.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/export-report.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/list-presets.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-analyze.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/mixcr-export.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/prerun.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process-single-cell.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/test.columns-calculate.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/test.columns.test.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@platforma-open/milaboratories.mixcr-clonotyping-2.workflow",
-  "version": "3.9.0",
+  "version": "3.10.0",
   "description": "Tengo-based template",
   "dependencies": {
     "@platforma-sdk/workflow-tengo": "^5.3.3",

package/src/export-report.tpl.tengo CHANGED Viewed

@@ -29,10 +29,12 @@ self.body(func(inputs) {
     chains := inputs.chains
     library := inputs.library
     isLibraryFileGzipped := inputs.isLibraryFileGzipped
+    clonotypeTablesData := inputs.clonotypeTablesData
     isSingleCell := len(presetSpecForBack.cellTags) > 0
 	hasUmi := !is_undefined(presetSpecForBack.umiTags) && len(presetSpecForBack.umiTags) > 0
     cellTags := presetSpecForBack.cellTags
+    singleCellChainTsvsData := inputs.singleCellChainTsvsData
     chainInfos := {
 	"IGHeavy": { mixcrFilter: "IGH", name: "IG Heavy", shortName: "Heavy" },
@@ -97,9 +99,174 @@ self.body(func(inputs) {
         pt.col("fileName").strSlice(0, pt.col("fileName").strLenChars().minus(5)).alias("sampleId")
     )
-    // For now, let's just use the processed DataFrame without zero column filtering
-    // since the columns() method is not available in this pTabler version
-    finalDf := processedDf
+    // Calculate real number of exported (productive) clonotypes per sample
+    // Use existing clonotype tables produced earlier; per-sample counts and read sums across chains
+    countDfs := []
+    for chain in chains {
+        chainData := clonotypeTablesData[chain]
+        if is_undefined(chainData) { continue }
+        for key, clonesFile in chainData.inputs() {
+            sampleId := json.decode(key)[0]
+            dfCountSource := wf.frame(clonesFile, { xsvType: "tsv", inferSchema: false, schema: [ { column: "readCount", type: "Double" } ] })
+            dfCount := dfCountSource.select(
+                pt.lit(sampleId).alias("sampleId"),
+                pt.col("clonotypeKey").count().alias("exportedClonotypes"),
+                pt.col("readCount").round().cast("Long").sum().alias("readsUsedInClonotypes")
+            )
+            countDfs = append(countDfs, dfCount)
+        }
+    }
+    countsDf := undefined
+    if len(countDfs) > 1 { countsDf = pt.concat(countDfs) } else { countsDf = countDfs[0] }
+    aggregatedCounts := countsDf.groupBy("sampleId").agg(
+        pt.col("exportedClonotypes").sum().alias("exportedClonotypes"),
+        pt.col("readsUsedInClonotypes").sum().alias("readsUsedInClonotypesNew")
+    )
+    // Join counts and overwrite totalClonotypes to reflect exported (productive) clones
+    joinedDf := processedDf.join(aggregatedCounts, { how: "left", on: ["sampleId"] })
+    // Per-chain clonotype counts
+    perChainJoined := joinedDf
+    for chain in chains {
+        chainData := clonotypeTablesData[chain]
+        chainCol := "clonotypesByChain." + chain
+        if is_undefined(chainData) {
+            perChainJoined = perChainJoined.withColumns(pt.lit(0).alias(chainCol))
+            continue
+        }
+        perChainDfs := []
+        for key, clonesFile in chainData.inputs() {
+            sampleId := json.decode(key)[0]
+            dfSrc := wf.frame(clonesFile, { xsvType: "tsv", inferSchema: false })
+            dfCnt := dfSrc.select(
+                pt.lit(sampleId).alias("sampleId"),
+                pt.col("clonotypeKey").count().alias("__chainCount")
+            )
+            perChainDfs = append(perChainDfs, dfCnt)
+        }
+        if len(perChainDfs) == 0 {
+            perChainJoined = perChainJoined.withColumns(pt.lit(0).alias(chainCol))
+            continue
+        }
+        chainCountsDf := len(perChainDfs) > 1 ? pt.concat(perChainDfs) : perChainDfs[0]
+        chainAgg := chainCountsDf.groupBy("sampleId").agg(
+            pt.col("__chainCount").sum().alias(chainCol)
+        )
+        perChainJoined = perChainJoined.join(chainAgg, { how: "left", on: ["sampleId"] })
+    }
+    // Single-cell: compute per-sample cell pairing stats (both chains vs one chain)
+    if isSingleCell && !is_undefined(singleCellChainTsvsData) {
+        // Expect two chains for receptor; if more, we count any cell having A1 and B1 as both
+        // Build a map of per-sample cellKey presence per chain
+        scDfs := []
+        maps.forEach(singleCellChainTsvsData, func(chainName, chainFiles) {
+            maps.forEach(chainFiles.inputs(), func(key, f) {
+                sampleId := json.decode(key)[0]
+                df := wf.frame(f, { xsvType: "tsv", inferSchema: false, schema: [ { column: "cellKey", type: "String" } ] })
+                df2 := df.select(
+                    pt.lit(sampleId).alias("sampleId"),
+                    pt.col("cellKey").alias("cellKey"),
+                    pt.lit(chainName).alias("chain")
+                )
+                scDfs = append(scDfs, df2)
+            })
+        })
+        if len(scDfs) > 0 {
+            scAll := len(scDfs) > 1 ? pt.concat(scDfs) : scDfs[0]
+            // Count cells per sample across all chains (unique cellKey)
+            cellsPerSample := scAll.groupBy("sampleId").agg(pt.col("cellKey").nUnique().alias("scCellsTotal"))
+            // Cells paired across different chains: require the same cellKey to appear in >1 distinct chains per sample
+            cellsPerSampleChainCounts := scAll.groupBy("sampleId", "cellKey").agg(pt.col("chain").nUnique().alias("_numChains"))
+            bothChainCells := cellsPerSampleChainCounts.filter(pt.col("_numChains").gt(1)).groupBy("sampleId").agg(pt.col("cellKey").count().alias("scCellsBothChains"))
+            pairedKeys := cellsPerSampleChainCounts.filter(pt.col("_numChains").gt(1)).select(pt.col("sampleId"), pt.col("cellKey"))
+            perChainJoined = perChainJoined.join(cellsPerSample, { how: "left", on: ["sampleId"] })
+            perChainJoined = perChainJoined.join(bothChainCells, { how: "left", on: ["sampleId"] })
+            // Recompute per-chain clonotype counts using only paired cells
+            perChainPairedJoined := perChainJoined
+            totalPairedParts := []
+            for chain in chains {
+                chainFiles := singleCellChainTsvsData[chain]
+                chainColPaired := "clonotypesByChain." + chain + ".paired"
+                if is_undefined(chainFiles) {
+                    perChainPairedJoined = perChainPairedJoined.withColumns(pt.lit(0).alias(chainColPaired))
+                    continue
+                }
+                parts := []
+                maps.forEach(chainFiles.inputs(), func(key, f) {
+                    sampleId := json.decode(key)[0]
+                    dfc := wf.frame(f, { xsvType: "tsv", inferSchema: false, schema: [ { column: "cellKey", type: "String" }, { column: "clonotypeKey", type: "String" } ] })
+                    dfc2 := dfc.select(pt.lit(sampleId).alias("sampleId"), pt.col("cellKey"), pt.col("clonotypeKey"))
+                    // join with paired keys for this sample
+                    dfcJoined := dfc2.join(pairedKeys, { how: "inner", on: ["sampleId", "cellKey"] })
+                    parts = append(parts, dfcJoined.select(pt.col("sampleId"), pt.col("clonotypeKey")))
+                })
+                if len(parts) == 0 {
+                    perChainPairedJoined = perChainPairedJoined.withColumns(pt.lit(0).alias(chainColPaired))
+                    continue
+                }
+                partsDf := len(parts) > 1 ? pt.concat(parts) : parts[0]
+                chainAggPaired := partsDf.groupBy("sampleId").agg(pt.col("clonotypeKey").nUnique().alias(chainColPaired))
+                perChainPairedJoined = perChainPairedJoined.join(chainAggPaired, { how: "left", on: ["sampleId"] })
+                totalPairedParts = append(totalPairedParts, chainAggPaired.select(pt.col("sampleId"), pt.col(chainColPaired).alias("__pairedPart")))
+            }
+            if len(totalPairedParts) > 0 {
+                totalPairedDf := len(totalPairedParts) > 1 ? pt.concat(totalPairedParts) : totalPairedParts[0]
+                totalPairedAgg := totalPairedDf.groupBy("sampleId").agg(pt.col("__pairedPart").sum().alias("exportedClonotypesPaired"))
+                perChainJoined = perChainPairedJoined.join(totalPairedAgg, { how: "left", on: ["sampleId"] })
+            } else {
+                perChainJoined = perChainPairedJoined.withColumns(pt.lit(0).alias("exportedClonotypesPaired"))
+            }
+        } else {
+            perChainJoined = perChainJoined.withColumns(pt.lit(0).alias("scCellsTotal"), pt.lit(0).alias("scCellsBothChains"))
+        }
+    }
+    // Finalize: cast/fill totals and per-chain counts
+    finalDf := perChainJoined
+    if isSingleCell {
+        // Use paired-only totals and per-chain counts
+        // Replace per-chain columns from ".paired" variants and set total from exportedClonotypesPaired
+        for chain in chains {
+            colPaired := "clonotypesByChain." + chain + ".paired"
+            col := "clonotypesByChain." + chain
+            finalDf = finalDf.withColumns(pt.col(colPaired).fillNull(0).cast("Long").alias(col))
+        }
+        finalDf = finalDf.withColumns(
+            pt.col("exportedClonotypesPaired").fillNull(0).cast("Long").alias("totalClonotypes"),
+            pt.col("readsUsedInClonotypesNew").fillNull(0).cast("Long").alias("readsUsedInClonotypes")
+        )
+    } else {
+        finalDf = finalDf.withColumns(
+            pt.col("exportedClonotypes").fillNull(0).cast("Long").alias("totalClonotypes"),
+            pt.col("readsUsedInClonotypesNew").fillNull(0).cast("Long").alias("readsUsedInClonotypes")
+        )
+        for chain in chains {
+            col := "clonotypesByChain." + chain
+            finalDf = finalDf.withColumns(
+                pt.col(col).fillNull(0).cast("Long").alias(col)
+            )
+        }
+    }
+    for chain in chains {
+        col := "clonotypesByChain." + chain
+        finalDf = finalDf.withColumns(
+            pt.col(col).fillNull(0).cast("Long").alias(col)
+        )
+    }
+    if isSingleCell {
+        // Keep only paired cells count and name it scCellsTotal
+        finalDf = finalDf.withColumns(
+            pt.col("scCellsBothChains").fillNull(0).cast("Long").alias("scCellsTotal")
+        )
+    }
     // Save the final DataFrame back to TSV
     finalDf.save("qc-report-processed.tsv", {
@@ -111,7 +278,7 @@ self.body(func(inputs) {
     tsvFile := wfResult.getFile("qc-report-processed.tsv")
-    qcReportColumns := qcReportColumns(hasUmi, isSingleCell, sampleIdAxisSpec, chainsForMixcr, cellTags)
+    qcReportColumns := qcReportColumns(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellTags)
     reportColumnsSpec := qcReportColumns.reportColumnsSpec
     qcReportTable := xsv.importFile(

package/src/process.tpl.tengo CHANGED Viewed

@@ -319,7 +319,9 @@ self.body(func(inputs) {
 		return schema
 	}
-	clonotypeTables := pframes.pFrameBuilder()
+    clonotypeTables := pframes.pFrameBuilder()
+    singleCellChainTsvs := {}
+    clonotypeTablesData := {}
 	resultsToCache := {}
 	for chain in chains {
@@ -456,8 +458,9 @@ self.body(func(inputs) {
 			} ]
 		}
-		resultsToCache["clonotypeTable/" + chain] = exportResults.outputData("clonotypeTable")
-		clonotypeTables.add(chain, exportResults.outputSpec("clonotypeTable"), exportResults.outputData("clonotypeTable"))
+        resultsToCache["clonotypeTable/" + chain] = exportResults.outputData("clonotypeTable")
+        clonotypeTablesData[chain] = exportResults.outputData("clonotypeTable")
+        clonotypeTables.add(chain, exportResults.outputSpec("clonotypeTable"), exportResults.outputData("clonotypeTable"))
 		aggregateByCloneKey := pframes.processColumn(
 			exportResults.output("clonotypeTable"),
@@ -485,7 +488,7 @@ self.body(func(inputs) {
 			}
 		)
-		if isSingleCell {
+        if isSingleCell {
 			// collecting results for future single cell processing
 			perChainResults[chain] = {
 				tsvForSingleCell: exportResults.output("clonotypeTableForSingleCell"),
@@ -495,6 +498,9 @@ self.body(func(inputs) {
 			// caching intermediate results until the block is removed
 			resultsToCache["clonotypeTableForSingleCell/" + chain] = exportResults.outputData("clonotypeTableForSingleCell")
 			resultsToCache["clonotypeProperties/" + chain] = aggregateByCloneKey.outputData("clonotypeProperties")
+            // collect per-chain single-cell TSVs for QC report stats
+            singleCellChainTsvs[chain] = exportResults.outputData("clonotypeTableForSingleCell")
 		} else {
 			// only adding data outputs if we are in bulk mode
 			exportResults.addXsvOutputToBuilder(clonotypes, "byCloneKeyBySample", "byCloneKeyBySample/" + chain + "/")
@@ -693,13 +699,15 @@ self.body(func(inputs) {
 		}
 	}
-	qcReportTable := render.create(exportReportTpl, {
+    qcReportTable := render.create(exportReportTpl, {
 		clnsData: mixcrResults.outputData("clns"),
 		presetSpecForBack: presetSpecForBack,
 		sampleIdAxisSpec: sampleIdAxisSpec,
-		chains: chains,
+        chains: chains,
 		library: library,
-		isLibraryFileGzipped: isLibraryFileGzipped
+		isLibraryFileGzipped: isLibraryFileGzipped,
+        clonotypeTablesData: clonotypeTablesData,
+		singleCellChainTsvsData: singleCellChainTsvs
 	})
 	return {

package/src/qc-report-columns.lib.tengo CHANGED Viewed

@@ -600,6 +600,30 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
         }
     }]
+    // Add per-chain clonotype counts for bulk datasets (avoid duplicates in single-cell)
+    if !isSingleCell {
+        bulkOrder := 107800
+        for chain in chains {
+            baseColumns = baseColumns + [{
+                column: "clonotypesByChain." + chain,
+                id: "clonotypes-by-chain-" + chain,
+                allowNA: true,
+                naRegex: "NaN",
+                spec: {
+                    name: "mixcr.com/reports/bulk/clonotypesByChain/" + chain,
+                    valueType: "Long",
+                    annotations: {
+                        "pl7.app/min": "0",
+                        "pl7.app/table/orderPriority": string(bulkOrder),
+                        "pl7.app/table/visibility": "default",
+                        "pl7.app/label": "Clonotypes by Chain " + chain
+                    }
+                }
+            }]
+            bulkOrder -= 100
+        }
+    }
     dataWithUmiColumns := [    {
         column: "refineTags.UMI.outputCount",
         id: "refine-tags-umi-output-count",
@@ -733,9 +757,51 @@ getQcReportColumns := func(hasUmi, isSingleCell, sampleIdAxisSpec, chains, cellT
         }
     }
     ]
+    singleCellColumns := []
+    // Paired cells total (single-cell): number of cells with distinct chains per sample
+    singleCellColumns += [{
+        column: "scCellsTotal",
+        id: "sc-cells-total",
+        allowNA: true,
+        naRegex: "NaN",
+        spec: {
+            name: "mixcr.com/reports/singleCell/pairedCellsTotal",
+            valueType: "Long",
+            annotations: {
+                "pl7.app/min": "0",
+                "pl7.app/table/orderPriority": "107900",
+                "pl7.app/table/visibility": "default",
+                "pl7.app/label": "Total number of cells (with paired chains)"
+            }
+        }
+    }]
+    // Per-chain clonotype counts (paired-only in single-cell), one column per available chain
+    n := 107800
+    for chain in chains {
+        singleCellColumns += [{
+            column: "clonotypesByChain." + chain,
+            id: "clonotypes-by-chain-" + chain,
+            allowNA: true,
+            naRegex: "NaN",
+            spec: {
+                name: "mixcr.com/reports/singleCell/clonotypesByChain/" + chain,
+                valueType: "Long",
+                annotations: {
+                    "pl7.app/min": "0",
+                    "pl7.app/table/orderPriority": string(n),
+                    "pl7.app/table/visibility": "default",
+                    "pl7.app/label": "Clonotypes by Chain " + chain
+                }
+            }
+        }]
+        n -= 100
+    }
     // Single Cell columns
-    singleCellColumns := [
+    singleCellColumns += [
     // {
     //     column: "align.readsWithChain.IGH",
     //     id: "align-reads-with-chain-igh",