npm - @platforma-open/milaboratories.mixcr-shm-trees.workflow - Versions diffs - 2.2.0 → 2.3.0 - Mend

@platforma-open/milaboratories.mixcr-shm-trees.workflow 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +10 -0
package/dist/tengo/lib/prepare-donor-column.lib.tengo +23 -23
package/dist/tengo/tpl/main.plj.gz +0 -0
package/dist/tengo/tpl/process.plj.gz +0 -0
package/dist/tengo/tpl/reconstruct-shm-trees.plj.gz +0 -0
package/package.json +7 -7
package/src/main.tpl.tengo +1 -1
package/src/prepare-donor-column.lib.tengo +23 -23
package/src/process.tpl.tengo +22 -12
package/src/reconstruct-shm-trees.tpl.tengo +55 -3

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,15 @@
 # @platforma-open/milaboratories.mixcr-shm-trees.workflow
+## 2.3.0
+### Minor Changes
+- 34af50b: Optional dowsampling of clonesets before building SHM trees
+### Patch Changes
+- 61c4b6b: SDK upgrade
 ## 2.2.0
 ### Minor Changes

package/dist/tengo/lib/prepare-donor-column.lib.tengo CHANGED Viewed

@@ -9,7 +9,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
     // we need to form a pColumn with two axes:
 	//    axes[0]: donorId
 	//    axes[1]: sampleId
-	//    axes[2]: mixcrBlockId
+	//    axes[2]: mixcrclonotypingBlockId
 	//      value: fileRef resource
 	// we have:
@@ -21,34 +21,34 @@ groupDataByDonorId := func(donorColumn, datasets) {
 	//          value: fileRef resource
     donorColumnSpec := donorColumn.get("spec").getDataAsJson()
-    domain := {}
-    if !is_undefined(donorColumnSpec["domain"]) {
-            domain = donorColumnSpec["domain"]
-    }
+    sampleIdAxis := donorColumnSpec.axesSpec[0]
     resultSpec := {
+        kind: "PColumn",
+        name: "mixcr.com/clns",
+        valueType: "File",
         // annotations and domain could differ between datasets
-        "axesSpec": [
+        axesSpec: [
             {
-                "annotations": donorColumnSpec["annotations"],
-                "domain": domain,
-                "name": donorColumnSpec["name"],
-                "type": donorColumnSpec["valueType"]
+                name: donorColumnSpec.name,
+                type: donorColumnSpec.valueType,
+                domain: donorColumnSpec.domain,
+                annotations: donorColumnSpec.annotations
             },
-            donorColumnSpec["axesSpec"][0],
+            sampleIdAxis,
             {
-                "annotations": {
+                name: "pl7.app/block",
+                type: "String",
+                annotations: {
                     "pl7.app/label": "Clonotyping block id"
-                },
-                "name": "pl7.app/blockId",
-                "type": "String"
+                }
             }
-        ],
-        "kind": "PColumn",
-        "name": "mixcr.com/clns",
-        "valueType": "File"
+        ]
     }
+    // creating sample to donor map
     sampleToDonor := {}
@@ -62,11 +62,11 @@ groupDataByDonorId := func(donorColumn, datasets) {
     dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
     // collect all the clns files that we have into pColumn
-	for blockId, dataset in datasets {
+	for clonotypingBlockId, dataset in datasets {
 		for sKey, fileRef in dataset.get("data").inputs() {
             sampleId := json.decode(sKey)[0]
             donor := sampleToDonor[sampleId]
-            dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
+            dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
 		}
     }

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/reconstruct-shm-trees.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,20 +1,20 @@
 {
   "name": "@platforma-open/milaboratories.mixcr-shm-trees.workflow",
-  "version": "2.2.0",
+  "version": "2.3.0",
   "type": "module",
   "description": "Tengo-based template",
   "//": {
     "build": "node ./scripts/build-static.mjs src/pfconv_params.json src/pfconv_params.lib.tengo && rm -rf dist && pl-tengo check && pl-tengo build && ./create_tags.sh"
   },
   "devDependencies": {
-    "@platforma-sdk/tengo-builder": "^1.16.1",
-    "@platforma-sdk/workflow-tengo": "^2.2.0",
-    "@milaboratories/software-pframes-conv": "^2.0.1",
+    "@platforma-sdk/tengo-builder": "^1.17.3",
+    "@platforma-sdk/workflow-tengo": "^2.6.0",
+    "@milaboratories/software-pframes-conv": "^2.1.2",
     "@platforma-open/milaboratories.software-small-binaries": "^1.14.6",
     "@platforma-open/milaboratories.software-mixcr": "4.7.0-133-develop",
-    "@platforma-sdk/test": "^1.9.0",
-    "vitest": "^2.1.5",
-    "typescript": "~5.5.4"
+    "@platforma-sdk/test": "^1.17.0",
+    "vitest": "^2.1.8",
+    "typescript": "~5.6.3"
   },
   "scripts": {
     "build": "rm -rf dist && pl-tengo check && pl-tengo build",

package/src/main.tpl.tengo CHANGED Viewed

@@ -35,7 +35,7 @@ wf.body(func(args) {
 		datasets: datasets,
 		donorColumn: donorColumn,
 		params: {
-			seed: args.seed
+			downsampling: args.downsampling
 		}
 	})

package/src/prepare-donor-column.lib.tengo CHANGED Viewed

@@ -9,7 +9,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
     // we need to form a pColumn with two axes:
 	//    axes[0]: donorId
 	//    axes[1]: sampleId
-	//    axes[2]: mixcrBlockId
+	//    axes[2]: mixcrclonotypingBlockId
 	//      value: fileRef resource
 	// we have:
@@ -21,34 +21,34 @@ groupDataByDonorId := func(donorColumn, datasets) {
 	//          value: fileRef resource
     donorColumnSpec := donorColumn.get("spec").getDataAsJson()
-    domain := {}
-    if !is_undefined(donorColumnSpec["domain"]) {
-            domain = donorColumnSpec["domain"]
-    }
+    sampleIdAxis := donorColumnSpec.axesSpec[0]
     resultSpec := {
+        kind: "PColumn",
+        name: "mixcr.com/clns",
+        valueType: "File",
         // annotations and domain could differ between datasets
-        "axesSpec": [
+        axesSpec: [
             {
-                "annotations": donorColumnSpec["annotations"],
-                "domain": domain,
-                "name": donorColumnSpec["name"],
-                "type": donorColumnSpec["valueType"]
+                name: donorColumnSpec.name,
+                type: donorColumnSpec.valueType,
+                domain: donorColumnSpec.domain,
+                annotations: donorColumnSpec.annotations
             },
-            donorColumnSpec["axesSpec"][0],
+            sampleIdAxis,
             {
-                "annotations": {
+                name: "pl7.app/block",
+                type: "String",
+                annotations: {
                     "pl7.app/label": "Clonotyping block id"
-                },
-                "name": "pl7.app/blockId",
-                "type": "String"
+                }
             }
-        ],
-        "kind": "PColumn",
-        "name": "mixcr.com/clns",
-        "valueType": "File"
+        ]
     }
+    // creating sample to donor map
     sampleToDonor := {}
@@ -62,11 +62,11 @@ groupDataByDonorId := func(donorColumn, datasets) {
     dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
     // collect all the clns files that we have into pColumn
-	for blockId, dataset in datasets {
+	for clonotypingBlockId, dataset in datasets {
 		for sKey, fileRef in dataset.get("data").inputs() {
             sampleId := json.decode(sKey)[0]
             donor := sampleToDonor[sampleId]
-            dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
+            dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
 		}
     }

package/src/process.tpl.tengo CHANGED Viewed

@@ -2,6 +2,7 @@ self := import("@platforma-sdk/workflow-tengo:tpl")
 llPFrames := import("@platforma-sdk/workflow-tengo:pframes.ll")
 ll := import("@platforma-sdk/workflow-tengo:ll")
+maps := import("@platforma-sdk/workflow-tengo:maps")
 assets := import("@platforma-sdk/workflow-tengo:assets")
 xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
 text := import("text")
@@ -22,32 +23,38 @@ self.awaitState("params", "ResourceReady")
 self.body(func(inputs) {
 	// overall description of data that we have.
     dataDescription := {
-		"hasUmiTags": false,
-		"hasCellTags": false,
+		hasUmiTags: false,
+		hasCellTags: false,
 		// will be filled
-		"coveredFeatures": [],
-		"cellsAssembled": false
+		coveredFeatures: [],
+		cellsAssembled: false
 	}
+	// clonotypingBlockId -> "bulk" | "sc"
+	datasetTypes := {}
 	assemblingFeature := ""
-	for _, dataset in inputs.datasets {
+	for clonotypingBlockId, dataset in inputs.datasets {
 		presetAnnotations := dataset.get("spec").getDataAsJson()["annotations"]
+		datasetTypes[clonotypingBlockId] = "bulk"
 		if presetAnnotations["mixcr.com/cellTags"] != "" {
-			dataDescription["hasCellTags"] = true
+			dataDescription.hasCellTags = true
 		}
 		if presetAnnotations["mixcr.com/umiTags"] != "" {
-			dataDescription["hasUmiTags"] = true
+			dataDescription.hasUmiTags = true
 		}
 		if presetAnnotations["mixcr.com/cellsAssembled"] == "true" {
-			dataDescription["cellsAssembled"] = true
+			dataDescription.cellsAssembled = true
+			datasetTypes[clonotypingBlockId] = "sc"
 		}
-		dataDescription["coveredFeatures"] = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
+		dataDescription.coveredFeatures = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
 		// check that assemblingFeature feature is the same. If so, coveredFeatures will be the same too
 		if (assemblingFeature == "") {
 			assemblingFeature = dataDescription["mixcr.com/assemblingFeature"]
 		} else if (assemblingFeature != dataDescription["mixcr.com/assemblingFeature"]) {
-			ll.panic("Assmble features should be the same for process tress. Got " + assemblingFeature + " and " + dataDescription["mixcr.com/assemblingFeature"])
+			ll.panic("Assmble features should be the same to process tress. Got " + assemblingFeature + " and " + dataDescription["mixcr.com/assemblingFeature"])
 		}
 	}
@@ -62,7 +69,7 @@ self.body(func(inputs) {
 	// TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
 	mixcrResults := llPFrames.aggregate(
 		// files to iterate through
-		dataGroupedByDonorId["data"],
+		dataGroupedByDonorId.data,
 		// columns not to combine - sampleId and mixcrBlockId
 		[1, 2],
 		reconstructShmTreesTpl,
@@ -106,7 +113,10 @@ self.body(func(inputs) {
 			"shmTreeTableOptions": shmTreeTableOptions["cmdArgs"],
 			"shmTreeNodesTableOptions": shmTreeNodesTableOptions["cmdArgs"],
 			"shmTreeNodesWithClonesTableOptions": shmTreeNodesWithClonesTableOptions["cmdArgs"],
-			"globalParams": inputs.params
+			"globalParams": maps.merge(
+				inputs.params,
+				{ datasetTypes: datasetTypes }
+			)
 		}
 	)

package/src/reconstruct-shm-trees.tpl.tengo CHANGED Viewed

@@ -24,6 +24,13 @@ progressPrefix := "[==PROGRESS==]"
 self.body(func(inputs) {
 	inputData := inputs[pConstants.VALUE_FIELD_NAME]
     globalParams := inputs.globalParams
+    datasetTypes := globalParams.datasetTypes
+    downsampling := globalParams.downsampling
+    ll.print("__THE_LOG__ " + json.encode(datasetTypes))
+    ll.print("__THE_LOG__ " + json.encode(downsampling))
+    ll.assert(!is_undefined(datasetTypes), "datasetTypes undefined")
     allelesCmdBuilder := exec.builder().
         printErrStreamToStdout().
@@ -50,8 +57,10 @@ self.body(func(inputs) {
         // file name should encode axis values. It will be parsed by xsv.importFileMap afterwards to restore axis for clones data
         fileName := sampleId + "___" + clonotypingBlockId + ".clns"
         toProcess = append(toProcess, {
-            "fileName": fileName,
-            "input": inputFile
+            clonotypingBlockId: clonotypingBlockId,
+            sampleId: sampleId,
+            fileName: fileName,
+            input: inputFile
         })
     }
@@ -63,6 +72,49 @@ self.body(func(inputs) {
     alleles := allelesCmdBuilder.run()
+    for input in toProcess {
+        input.alleles = alleles.getFile("alleles/" + input.fileName)
+    }
+    if !is_undefined(downsampling) {
+        downsamplingParam := ""
+        if downsampling.type == "CountReadsFixed" {
+            downsamplingParam = "count-reads-fixed-" + string(downsampling.number)
+        } else if downsampling.type == "CountMoleculesFixed" {
+            downsamplingParam = "count-molecule-fixed-" + string(downsampling.number)
+        } else if downsampling.type == "TopClonotypesByReads" {
+            downsamplingParam = "top-reads-" + string(downsampling.number)
+        } else if downsampling.type == "TopClonotypesByMolecules" {
+            downsamplingParam = "top-molecule-" + string(downsampling.number)
+        } else if downsampling.type == "CumulativeTopClonotypesByReads" {
+            downsamplingParam = "cumtop-reads-" + string(downsampling.percent)
+        } else if downsampling.type == "CumulativeTopClonotypesByMolecules" {
+            downsamplingParam = "cumtop-molecule-" + string(downsampling.percent)
+        } else {
+            ll.panic("Unknown downsampling type: " + downsampling.type)
+        }
+        ll.print("__THE_LOG__ " + downsamplingParam)
+        for input in toProcess {
+            if datasetTypes[input.clonotypingBlockId] == "bulk" {
+                downsamplingCmd := exec.builder().
+                    printErrStreamToStdout().
+                    secret("MI_LICENSE", "MI_LICENSE").
+                    env("MI_PROGRESS_PREFIX", progressPrefix).
+                    software(mixcrSw).
+                    arg("downsample").
+                    arg("--downsampling").
+                    arg(downsamplingParam).
+                    arg("clones.clns").
+                    addFile("clones.clns", input.alleles).
+                    saveFile("clones.downsampled.clns").
+                    run()
+                input.alleles = downsamplingCmd.getFile("clones.downsampled.clns")
+            }
+        }
+    }
     shmTreesCmdBuilder := exec.builder().
         printErrStreamToStdout().
         secret("MI_LICENSE", "MI_LICENSE").
@@ -80,7 +132,7 @@ self.body(func(inputs) {
     for input in toProcess {
         shmTreesCmdBuilder.
-            addFile(input.fileName, alleles.getFile("alleles/" + input.fileName)).
+            addFile(input.fileName, input.alleles).
             arg(input.fileName)
     }