npm - @platforma-open/milaboratories.mixcr-shm-trees.workflow - Versions diffs - 2.2.0 → 2.3.1 - Mend

@platforma-open/milaboratories.mixcr-shm-trees.workflow 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +19 -0
package/dist/tengo/lib/export-settings.lib.tengo +24 -1
package/dist/tengo/lib/prepare-donor-column.lib.tengo +23 -23
package/dist/tengo/tpl/main.plj.gz +0 -0
package/dist/tengo/tpl/process.plj.gz +0 -0
package/dist/tengo/tpl/reconstruct-shm-trees.plj.gz +0 -0
package/package.json +7 -7
package/src/export-settings.lib.tengo +24 -1
package/src/main.tpl.tengo +1 -1
package/src/prepare-donor-column.lib.tengo +23 -23
package/src/process.tpl.tengo +27 -14
package/src/reconstruct-shm-trees.tpl.tengo +55 -3

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,24 @@
 # @platforma-open/milaboratories.mixcr-shm-trees.workflow
+## 2.3.1
+### Patch Changes
+- 1edb8af: - Fix for column name "Number of clones" -> "Number of nodes"
+  - Added "Distance to parent" column
+  - "Distance to parent" column is now used as distance measure for tree visualization by default
+  - Assembling feature (i.e. VDJRegion) is added as individual export column
+## 2.3.0
+### Minor Changes
+- 34af50b: Optional dowsampling of clonesets before building SHM trees
+### Patch Changes
+- 61c4b6b: SDK upgrade
 ## 2.2.0
 ### Minor Changes

package/dist/tengo/lib/export-settings.lib.tengo CHANGED Viewed

@@ -101,7 +101,7 @@ shmTreeTableOptions := func(dataDescription) {
             "name": "pl7.app/vdj/numberOfNodesWithClones",
             "valueType": "Long",
             "annotations": {
-                "pl7.app/label": "Number of clones"
+                "pl7.app/label": "Number of nodes"
             }
         }
     })
@@ -306,8 +306,31 @@ shmTreeNodesTableOptions := func(dataDescription) {
         "spec": {
             "name": "pl7.app/dendrogram/distance",
             "valueType": "Double",
+            "domain": {
+                "pl7.app/dendrogram/distance/from": "germline"
+            },
             "annotations": {
                 "pl7.app/label": "Distanse from germline",
+                "pl7.app/dendrogram/distance/from": "germline", // change to domain only, once can be selected in graphmaker
+                "pl7.app/dendrogram/isDistance": "true"
+            }
+        }
+    })
+    cmdArgs = append(cmdArgs, "-distance", "parent")
+    columns = append(columns, {
+        "column": "DistanceFromParent",
+        "id": "distance-from-parent",
+        "allowNA": true,
+        "spec": {
+            "name": "pl7.app/dendrogram/distance",
+            "valueType": "Double",
+            "domain": {
+                "pl7.app/dendrogram/distance/from": "parent"
+            },
+            "annotations": {
+                "pl7.app/label": "Distanse from parent",
+                "pl7.app/dendrogram/distance/from": "parent", // change to domain only, once can be selected in graphmaker
                 "pl7.app/dendrogram/isDistance": "true"
             }
         }

package/dist/tengo/lib/prepare-donor-column.lib.tengo CHANGED Viewed

@@ -9,7 +9,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
     // we need to form a pColumn with two axes:
 	//    axes[0]: donorId
 	//    axes[1]: sampleId
-	//    axes[2]: mixcrBlockId
+	//    axes[2]: mixcrclonotypingBlockId
 	//      value: fileRef resource
 	// we have:
@@ -21,34 +21,34 @@ groupDataByDonorId := func(donorColumn, datasets) {
 	//          value: fileRef resource
     donorColumnSpec := donorColumn.get("spec").getDataAsJson()
-    domain := {}
-    if !is_undefined(donorColumnSpec["domain"]) {
-            domain = donorColumnSpec["domain"]
-    }
+    sampleIdAxis := donorColumnSpec.axesSpec[0]
     resultSpec := {
+        kind: "PColumn",
+        name: "mixcr.com/clns",
+        valueType: "File",
         // annotations and domain could differ between datasets
-        "axesSpec": [
+        axesSpec: [
             {
-                "annotations": donorColumnSpec["annotations"],
-                "domain": domain,
-                "name": donorColumnSpec["name"],
-                "type": donorColumnSpec["valueType"]
+                name: donorColumnSpec.name,
+                type: donorColumnSpec.valueType,
+                domain: donorColumnSpec.domain,
+                annotations: donorColumnSpec.annotations
             },
-            donorColumnSpec["axesSpec"][0],
+            sampleIdAxis,
             {
-                "annotations": {
+                name: "pl7.app/block",
+                type: "String",
+                annotations: {
                     "pl7.app/label": "Clonotyping block id"
-                },
-                "name": "pl7.app/blockId",
-                "type": "String"
+                }
             }
-        ],
-        "kind": "PColumn",
-        "name": "mixcr.com/clns",
-        "valueType": "File"
+        ]
     }
+    // creating sample to donor map
     sampleToDonor := {}
@@ -62,11 +62,11 @@ groupDataByDonorId := func(donorColumn, datasets) {
     dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
     // collect all the clns files that we have into pColumn
-	for blockId, dataset in datasets {
+	for clonotypingBlockId, dataset in datasets {
 		for sKey, fileRef in dataset.get("data").inputs() {
             sampleId := json.decode(sKey)[0]
             donor := sampleToDonor[sampleId]
-            dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
+            dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
 		}
     }

package/dist/tengo/tpl/main.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/process.plj.gz CHANGED Viewed

Binary file

package/dist/tengo/tpl/reconstruct-shm-trees.plj.gz CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,20 +1,20 @@
 {
   "name": "@platforma-open/milaboratories.mixcr-shm-trees.workflow",
-  "version": "2.2.0",
+  "version": "2.3.1",
   "type": "module",
   "description": "Tengo-based template",
   "//": {
     "build": "node ./scripts/build-static.mjs src/pfconv_params.json src/pfconv_params.lib.tengo && rm -rf dist && pl-tengo check && pl-tengo build && ./create_tags.sh"
   },
   "devDependencies": {
-    "@platforma-sdk/tengo-builder": "^1.16.1",
-    "@platforma-sdk/workflow-tengo": "^2.2.0",
-    "@milaboratories/software-pframes-conv": "^2.0.1",
+    "@platforma-sdk/tengo-builder": "^1.17.3",
+    "@platforma-sdk/workflow-tengo": "^2.6.0",
+    "@milaboratories/software-pframes-conv": "^2.1.2",
     "@platforma-open/milaboratories.software-small-binaries": "^1.14.6",
     "@platforma-open/milaboratories.software-mixcr": "4.7.0-133-develop",
-    "@platforma-sdk/test": "^1.9.0",
-    "vitest": "^2.1.5",
-    "typescript": "~5.5.4"
+    "@platforma-sdk/test": "^1.17.0",
+    "vitest": "^2.1.8",
+    "typescript": "~5.6.3"
   },
   "scripts": {
     "build": "rm -rf dist && pl-tengo check && pl-tengo build",

package/src/export-settings.lib.tengo CHANGED Viewed

@@ -101,7 +101,7 @@ shmTreeTableOptions := func(dataDescription) {
             "name": "pl7.app/vdj/numberOfNodesWithClones",
             "valueType": "Long",
             "annotations": {
-                "pl7.app/label": "Number of clones"
+                "pl7.app/label": "Number of nodes"
             }
         }
     })
@@ -306,8 +306,31 @@ shmTreeNodesTableOptions := func(dataDescription) {
         "spec": {
             "name": "pl7.app/dendrogram/distance",
             "valueType": "Double",
+            "domain": {
+                "pl7.app/dendrogram/distance/from": "germline"
+            },
             "annotations": {
                 "pl7.app/label": "Distanse from germline",
+                "pl7.app/dendrogram/distance/from": "germline", // change to domain only, once can be selected in graphmaker
+                "pl7.app/dendrogram/isDistance": "true"
+            }
+        }
+    })
+    cmdArgs = append(cmdArgs, "-distance", "parent")
+    columns = append(columns, {
+        "column": "DistanceFromParent",
+        "id": "distance-from-parent",
+        "allowNA": true,
+        "spec": {
+            "name": "pl7.app/dendrogram/distance",
+            "valueType": "Double",
+            "domain": {
+                "pl7.app/dendrogram/distance/from": "parent"
+            },
+            "annotations": {
+                "pl7.app/label": "Distanse from parent",
+                "pl7.app/dendrogram/distance/from": "parent", // change to domain only, once can be selected in graphmaker
                 "pl7.app/dendrogram/isDistance": "true"
             }
         }

package/src/main.tpl.tengo CHANGED Viewed

@@ -35,7 +35,7 @@ wf.body(func(args) {
 		datasets: datasets,
 		donorColumn: donorColumn,
 		params: {
-			seed: args.seed
+			downsampling: args.downsampling
 		}
 	})

package/src/prepare-donor-column.lib.tengo CHANGED Viewed

@@ -9,7 +9,7 @@ groupDataByDonorId := func(donorColumn, datasets) {
     // we need to form a pColumn with two axes:
 	//    axes[0]: donorId
 	//    axes[1]: sampleId
-	//    axes[2]: mixcrBlockId
+	//    axes[2]: mixcrclonotypingBlockId
 	//      value: fileRef resource
 	// we have:
@@ -21,34 +21,34 @@ groupDataByDonorId := func(donorColumn, datasets) {
 	//          value: fileRef resource
     donorColumnSpec := donorColumn.get("spec").getDataAsJson()
-    domain := {}
-    if !is_undefined(donorColumnSpec["domain"]) {
-            domain = donorColumnSpec["domain"]
-    }
+    sampleIdAxis := donorColumnSpec.axesSpec[0]
     resultSpec := {
+        kind: "PColumn",
+        name: "mixcr.com/clns",
+        valueType: "File",
         // annotations and domain could differ between datasets
-        "axesSpec": [
+        axesSpec: [
             {
-                "annotations": donorColumnSpec["annotations"],
-                "domain": domain,
-                "name": donorColumnSpec["name"],
-                "type": donorColumnSpec["valueType"]
+                name: donorColumnSpec.name,
+                type: donorColumnSpec.valueType,
+                domain: donorColumnSpec.domain,
+                annotations: donorColumnSpec.annotations
             },
-            donorColumnSpec["axesSpec"][0],
+            sampleIdAxis,
             {
-                "annotations": {
+                name: "pl7.app/block",
+                type: "String",
+                annotations: {
                     "pl7.app/label": "Clonotyping block id"
-                },
-                "name": "pl7.app/blockId",
-                "type": "String"
+                }
             }
-        ],
-        "kind": "PColumn",
-        "name": "mixcr.com/clns",
-        "valueType": "File"
+        ]
     }
+    // creating sample to donor map
     sampleToDonor := {}
@@ -62,11 +62,11 @@ groupDataByDonorId := func(donorColumn, datasets) {
     dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
     // collect all the clns files that we have into pColumn
-	for blockId, dataset in datasets {
+	for clonotypingBlockId, dataset in datasets {
 		for sKey, fileRef in dataset.get("data").inputs() {
             sampleId := json.decode(sKey)[0]
             donor := sampleToDonor[sampleId]
-            dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
+            dataBuilder.createInputField(json.encode([donor, sampleId, clonotypingBlockId])).set(fileRef)
 		}
     }

package/src/process.tpl.tengo CHANGED Viewed

@@ -2,6 +2,7 @@ self := import("@platforma-sdk/workflow-tengo:tpl")
 llPFrames := import("@platforma-sdk/workflow-tengo:pframes.ll")
 ll := import("@platforma-sdk/workflow-tengo:ll")
+maps := import("@platforma-sdk/workflow-tengo:maps")
 assets := import("@platforma-sdk/workflow-tengo:assets")
 xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
 text := import("text")
@@ -22,35 +23,44 @@ self.awaitState("params", "ResourceReady")
 self.body(func(inputs) {
 	// overall description of data that we have.
     dataDescription := {
-		"hasUmiTags": false,
-		"hasCellTags": false,
+		hasUmiTags: false,
+		hasCellTags: false,
 		// will be filled
-		"coveredFeatures": [],
-		"cellsAssembled": false
+		coveredFeatures: [],
+		cellsAssembled: false
 	}
+	// clonotypingBlockId -> "bulk" | "sc"
+	datasetTypes := {}
 	assemblingFeature := ""
-	for _, dataset in inputs.datasets {
+	for clonotypingBlockId, dataset in inputs.datasets {
 		presetAnnotations := dataset.get("spec").getDataAsJson()["annotations"]
+		datasetTypes[clonotypingBlockId] = "bulk"
 		if presetAnnotations["mixcr.com/cellTags"] != "" {
-			dataDescription["hasCellTags"] = true
+			dataDescription.hasCellTags = true
 		}
 		if presetAnnotations["mixcr.com/umiTags"] != "" {
-			dataDescription["hasUmiTags"] = true
+			dataDescription.hasUmiTags = true
 		}
 		if presetAnnotations["mixcr.com/cellsAssembled"] == "true" {
-			dataDescription["cellsAssembled"] = true
+			dataDescription.cellsAssembled = true
+			datasetTypes[clonotypingBlockId] = "sc"
 		}
-		dataDescription["coveredFeatures"] = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
+		dataDescription.coveredFeatures = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
 		// check that assemblingFeature feature is the same. If so, coveredFeatures will be the same too
 		if (assemblingFeature == "") {
-			assemblingFeature = dataDescription["mixcr.com/assemblingFeature"]
-		} else if (assemblingFeature != dataDescription["mixcr.com/assemblingFeature"]) {
-			ll.panic("Assmble features should be the same for process tress. Got " + assemblingFeature + " and " + dataDescription["mixcr.com/assemblingFeature"])
+			assemblingFeature = presetAnnotations["mixcr.com/assemblingFeature"]
+		} else if (assemblingFeature != presetAnnotations["mixcr.com/assemblingFeature"]) {
+			ll.panic("Assmble features should be the same to process tress. Got " + assemblingFeature + " and " + presetAnnotations["mixcr.com/assemblingFeature"])
 		}
 	}
+	// adding assembling feature to the list of covered features
+	dataDescription.coveredFeatures = append(dataDescription.coveredFeatures, assemblingFeature)
 	// there should be call join on pfFrames, but it's not implements, so we will do it by hand
 	dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
@@ -62,7 +72,7 @@ self.body(func(inputs) {
 	// TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
 	mixcrResults := llPFrames.aggregate(
 		// files to iterate through
-		dataGroupedByDonorId["data"],
+		dataGroupedByDonorId.data,
 		// columns not to combine - sampleId and mixcrBlockId
 		[1, 2],
 		reconstructShmTreesTpl,
@@ -106,7 +116,10 @@ self.body(func(inputs) {
 			"shmTreeTableOptions": shmTreeTableOptions["cmdArgs"],
 			"shmTreeNodesTableOptions": shmTreeNodesTableOptions["cmdArgs"],
 			"shmTreeNodesWithClonesTableOptions": shmTreeNodesWithClonesTableOptions["cmdArgs"],
-			"globalParams": inputs.params
+			"globalParams": maps.merge(
+				inputs.params,
+				{ datasetTypes: datasetTypes }
+			)
 		}
 	)

package/src/reconstruct-shm-trees.tpl.tengo CHANGED Viewed

@@ -24,6 +24,13 @@ progressPrefix := "[==PROGRESS==]"
 self.body(func(inputs) {
 	inputData := inputs[pConstants.VALUE_FIELD_NAME]
     globalParams := inputs.globalParams
+    datasetTypes := globalParams.datasetTypes
+    downsampling := globalParams.downsampling
+    ll.print("__THE_LOG__ " + json.encode(datasetTypes))
+    ll.print("__THE_LOG__ " + json.encode(downsampling))
+    ll.assert(!is_undefined(datasetTypes), "datasetTypes undefined")
     allelesCmdBuilder := exec.builder().
         printErrStreamToStdout().
@@ -50,8 +57,10 @@ self.body(func(inputs) {
         // file name should encode axis values. It will be parsed by xsv.importFileMap afterwards to restore axis for clones data
         fileName := sampleId + "___" + clonotypingBlockId + ".clns"
         toProcess = append(toProcess, {
-            "fileName": fileName,
-            "input": inputFile
+            clonotypingBlockId: clonotypingBlockId,
+            sampleId: sampleId,
+            fileName: fileName,
+            input: inputFile
         })
     }
@@ -63,6 +72,49 @@ self.body(func(inputs) {
     alleles := allelesCmdBuilder.run()
+    for input in toProcess {
+        input.alleles = alleles.getFile("alleles/" + input.fileName)
+    }
+    if !is_undefined(downsampling) {
+        downsamplingParam := ""
+        if downsampling.type == "CountReadsFixed" {
+            downsamplingParam = "count-reads-fixed-" + string(downsampling.number)
+        } else if downsampling.type == "CountMoleculesFixed" {
+            downsamplingParam = "count-molecule-fixed-" + string(downsampling.number)
+        } else if downsampling.type == "TopClonotypesByReads" {
+            downsamplingParam = "top-reads-" + string(downsampling.number)
+        } else if downsampling.type == "TopClonotypesByMolecules" {
+            downsamplingParam = "top-molecule-" + string(downsampling.number)
+        } else if downsampling.type == "CumulativeTopClonotypesByReads" {
+            downsamplingParam = "cumtop-reads-" + string(downsampling.percent)
+        } else if downsampling.type == "CumulativeTopClonotypesByMolecules" {
+            downsamplingParam = "cumtop-molecule-" + string(downsampling.percent)
+        } else {
+            ll.panic("Unknown downsampling type: " + downsampling.type)
+        }
+        ll.print("__THE_LOG__ " + downsamplingParam)
+        for input in toProcess {
+            if datasetTypes[input.clonotypingBlockId] == "bulk" {
+                downsamplingCmd := exec.builder().
+                    printErrStreamToStdout().
+                    secret("MI_LICENSE", "MI_LICENSE").
+                    env("MI_PROGRESS_PREFIX", progressPrefix).
+                    software(mixcrSw).
+                    arg("downsample").
+                    arg("--downsampling").
+                    arg(downsamplingParam).
+                    arg("clones.clns").
+                    addFile("clones.clns", input.alleles).
+                    saveFile("clones.downsampled.clns").
+                    run()
+                input.alleles = downsamplingCmd.getFile("clones.downsampled.clns")
+            }
+        }
+    }
     shmTreesCmdBuilder := exec.builder().
         printErrStreamToStdout().
         secret("MI_LICENSE", "MI_LICENSE").
@@ -80,7 +132,7 @@ self.body(func(inputs) {
     for input in toProcess {
         shmTreesCmdBuilder.
-            addFile(input.fileName, alleles.getFile("alleles/" + input.fileName)).
+            addFile(input.fileName, input.alleles).
             arg(input.fileName)
     }