npm - @platforma-open/milaboratories.mixcr-shm-trees.workflow - Versions diffs - 2.0.0 - Mend

@platforma-open/milaboratories.mixcr-shm-trees.workflow 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +7 -0
package/dist/index.cjs +5 -0
package/dist/index.d.ts +4 -0
package/dist/index.js +6 -0
package/dist/tengo/lib/export-settings.lib.tengo +666 -0
package/dist/tengo/lib/prepare-donor-column.lib.tengo +78 -0
package/dist/tengo/tpl/main.plj.gz +0 -0
package/dist/tengo/tpl/process.plj.gz +0 -0
package/dist/tengo/tpl/reconstruct-shm-trees.plj.gz +0 -0
package/format.el +43 -0
package/index.d.ts +4 -0
package/index.js +5 -0
package/package.json +23 -0
package/src/export-settings.lib.tengo +666 -0
package/src/main.tpl.tengo +61 -0
package/src/prepare-donor-column.lib.tengo +78 -0
package/src/process.tpl.tengo +155 -0
package/src/reconstruct-shm-trees.tpl.tengo +182 -0
package/tsconfig.json +16 -0
package/vitest.config.mts +9 -0

package/src/main.tpl.tengo ADDED Viewed

@@ -0,0 +1,61 @@
+wf := import("@platforma-sdk/workflow-tengo:workflow")
+render := import("@platforma-sdk/workflow-tengo:render")
+assets := import("@platforma-sdk/workflow-tengo:assets")
+ll := import("@platforma-sdk/workflow-tengo:ll")
+pframes := import("@platforma-sdk/workflow-tengo:pframes")
+processTpl := assets.importTemplate(":process")
+wf.body(func(args) {
+	if is_undefined(args.donorColumn) {
+		ll.panic("No donor column")
+	}
+	if (len(args.datasetColumns) == 0) {
+		ll.panic("No datasets to process")
+	}
+	// we could not use array as request for waiting (see below), so we store datasets in a dictionary
+	datasets := {}
+	for datasetRef in args.datasetColumns {
+		if is_undefined(datasetRef) {
+			ll.panic("Dataset is undefined")
+		}
+		// it's blockId of MiXCR, we suppose that it procuce only one clns column
+		datasets[datasetRef.blockId] = wf.resolve(datasetRef)
+	}
+	donorColumn := wf.resolve(args.donorColumn)
+	// The problem is that refs for data is not resolved.
+	// To deal with it, we should call resolve resolve that will return feature.
+	// Then to resolve feature we should call another templete where we will describe what's to wait
+	results := render.createEphemeral(processTpl, {
+		datasets: datasets,
+		donorColumn: donorColumn,
+		params: {
+			seed: args.seed
+		}
+	})
+	return {
+		outputs: {
+			"trees": results.output("trees"),
+			"treeNodes": results.output("treeNodes"),
+			"treeNodesWithClones": results.output("treeNodesWithClones"),
+			"tsvs": results.output("tsvs"),
+			"allelesLogs": results.output("allelesLogs"),
+			"treesLogs": results.output("treesLogs"),
+			// files should be explicitly published, otherwise it will not be assesable from GUI
+			// that's why pframes.exportColumnData
+			"allelesReports": pframes.exportColumnData(results.output("allelesReports")),
+			"treesReports": pframes.exportColumnData(results.output("treesReports")),
+			"allelesReportsJson": pframes.exportColumnData(results.output("allelesReportsJson")),
+			"treesReportsJson": pframes.exportColumnData(results.output("treesReportsJson"))
+		},
+		exports: {}
+	}
+})

package/src/prepare-donor-column.lib.tengo ADDED Viewed

@@ -0,0 +1,78 @@
+ll := import("@platforma-sdk/workflow-tengo:ll")
+smart := import("@platforma-sdk/workflow-tengo:smart")
+json := import("json")
+_P_COLUMN_DATA_RESOURCE_MAP := { Name: "PColumnData/ResourceMap", Version: "1" }
+groupDataByDonorId := func(donorColumn, datasets) {
+    // we need to form a pColumn with two axes:
+	//    axes[0]: donorId
+	//    axes[1]: sampleId
+	//    axes[2]: mixcrBlockId
+	//      value: fileRef resource
+	// we have:
+	//    column of donorIds:
+	//        axes[0]: sampleId
+	//          value: donorId
+	//    several columns of clns:
+	//        axes[0]: sampleId
+	//          value: fileRef resource
+    donorColumnSpec := donorColumn.get("spec").getDataAsJson()
+    resultSpec := {
+        // annotations and domain could differ between datasets
+        "axesSpec": [
+            {
+                "annotations": donorColumnSpec["annotations"],
+                "domain": donorColumnSpec["domain"],
+                "name": donorColumnSpec["name"],
+                "type": donorColumnSpec["valueType"]
+            },
+            donorColumnSpec["axesSpec"][0],
+            {
+                "annotations": {
+                    "pl7.app/label": "Clonotyping block id"
+                },
+                "name": "pl7.app/blockId",
+                "type": "String"
+            }
+        ],
+        "kind": "PColumn",
+        "name": "mixcr.com/clns",
+        "valueType": "File"
+    }
+    sampleToDonor := {}
+    // columns with meta could be fetched as data direcctly
+	for k, v in donorColumn.get("data").getDataAsJson()["data"] {
+        sampleId := json.decode(k)[0]
+        sampleToDonor[sampleId] = v
+	}
+    // build pColumn by hand
+    dataBuilder := smart.structBuilder(_P_COLUMN_DATA_RESOURCE_MAP, json.encode({ keyLength: 3 }))
+    // collect all the clns files that we have into pColumn
+	for blockId, dataset in datasets {
+		for sKey, fileRef in dataset.get("data").inputs() {
+            sampleId := json.decode(sKey)[0]
+            donor := sampleToDonor[sampleId]
+            dataBuilder.createInputField(json.encode([donor, sampleId, blockId])).set(fileRef)
+		}
+    }
+    return {
+        spec: resultSpec,
+        data: dataBuilder.lockAndBuild()
+    }
+}
+// to use the file as a library, we should explicitly export functions
+export ll.toStrict({
+	groupDataByDonorId: groupDataByDonorId
+})

package/src/process.tpl.tengo ADDED Viewed

@@ -0,0 +1,155 @@
+self := import("@platforma-sdk/workflow-tengo:tpl")
+llPFrames := import("@platforma-sdk/workflow-tengo:pframes.ll")
+ll := import("@platforma-sdk/workflow-tengo:ll")
+assets := import("@platforma-sdk/workflow-tengo:assets")
+xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
+text := import("text")
+exportSettings := import(":export-settings")
+prepareDonorColumn := import(":prepare-donor-column")
+pframes := import("@platforma-sdk/workflow-tengo:pframes")
+reconstructShmTreesTpl := assets.importTemplate(":reconstruct-shm-trees")
+// this templete should run only after resolving of all inputs
+// we don't need to wait for file content, just refs
+self.awaitState("datasets", { wildcard: "*" }, "data", "InputsLocked")
+// but we need spec already
+self.awaitState("datasets", { wildcard: "*" }, "spec", "ResourceReady")
+self.awaitState("donorColumn", "ResourceReady")
+self.awaitState("params", "ResourceReady")
+self.body(func(inputs) {
+	// overall description of data that we have.
+    dataDescription := {
+		"hasUmiTags": false,
+		"hasCellTags": false,
+		// will be filled
+		"coveredFeatures": []
+	}
+	assemblingFeature := ""
+	for _, dataset in inputs.datasets {
+		presetAnnotations := dataset.get("spec").getDataAsJson()["annotations"]
+		if presetAnnotations["mixcr.com/cellTags"] != "" {
+			dataDescription["hasCellTags"] = true
+		}
+		if presetAnnotations["mixcr.com/umiTags"] != "" {
+			dataDescription["hasUmiTags"] = true
+		}
+		dataDescription["coveredFeatures"] = text.re_split(',', presetAnnotations["mixcr.com/coveredFeaturesOnExport"])
+		// check that assemblingFeature feature is the same. If so, coveredFeatures will be the same too
+		if (assemblingFeature == "") {
+			assemblingFeature = dataDescription["mixcr.com/assemblingFeature"]
+		} else if (assemblingFeature != dataDescription["mixcr.com/assemblingFeature"]) {
+			ll.panic("Assmble features should be the same for process tress. Got " + assemblingFeature + " and " + dataDescription["mixcr.com/assemblingFeature"])
+		}
+	}
+	// there should be call join on pfFrames, but it's not implements, so we will do it by hand
+	dataGroupedByDonorId := prepareDonorColumn.groupDataByDonorId(inputs.donorColumn, inputs.datasets)
+	// collect params for running export commands and to parse result tsv files into pColumns
+	shmTreeTableOptions := exportSettings.shmTreeTableOptions(dataDescription, false)
+	shmTreeNodesTableOptions := exportSettings.shmTreeNodesTableOptions(dataDescription, false)
+	shmTreeNodesWithClonesTableOptions := exportSettings.shmTreeNodesWithClonesTableOptions(dataDescription, inputs.donorColumn, false)
+	// TODO that call is too low level. Should be replaced with something that works with pColumns, not data only
+	mixcrResults := llPFrames.aggregate(
+		// files to iterate through
+		dataGroupedByDonorId["data"],
+		// columns not to combine - sampleId and mixcrBlockId
+		[1, 2],
+		reconstructShmTreesTpl,
+		// all the outputs that should be gethered
+		[
+			{
+				"name": "trees",
+				"type": "Resource"
+			}, {
+				"name": "treeNodes",
+				"type": "Resource"
+			}, {
+				"name": "treeNodesWithClones",
+				"type": "Resource"
+			}, {
+				"name": "tsvs",
+				"type": "Resource"
+			}, {
+				"name": "allelesLog",
+				"type": "Resource"
+			}, {
+				"name": "treesLog",
+				"type": "Resource"
+			}, {
+				"name": "allelesReport",
+				"type": "Resource"
+			}, {
+				"name": "treesReport",
+				"type": "Resource"
+			}, {
+				"name": "allelesReportJson",
+				"type": "Resource"
+			}, {
+				"name": "treesReportJson",
+				"type": "Resource"
+			}
+		],
+		false,
+		// inputs
+		{
+			"shmTreeTableOptions": shmTreeTableOptions["cmdArgs"],
+			"shmTreeNodesTableOptions": shmTreeNodesTableOptions["cmdArgs"],
+			"shmTreeNodesWithClonesTableOptions": shmTreeNodesWithClonesTableOptions["cmdArgs"],
+			"globalParams": inputs.params
+		}
+	)
+	// donorId axis is inherited from dataGroupedByDonorId and we should specify it explicitly (other axes will be supplied by pfconvParams)
+	additionalArgsForImportTsv := {
+		additionalAxesSpec: dataGroupedByDonorId["spec"]["axesSpec"][:1]
+	}
+	trees := xsv.importFileMap(
+        mixcrResults.output("trees"),
+        "tsv",
+        shmTreeTableOptions["pfconvParams"],
+        additionalArgsForImportTsv
+    )
+	treeNodes := xsv.importFileMap(
+        mixcrResults.output("treeNodes"),
+        "tsv",
+        shmTreeNodesTableOptions["pfconvParams"],
+        additionalArgsForImportTsv
+    )
+	treeNodesWithClones := xsv.importFileMap(
+        mixcrResults.output("treeNodesWithClones"),
+        "tsv",
+        shmTreeNodesWithClonesTableOptions["pfconvParams"],
+        additionalArgsForImportTsv
+    )
+	tsvs := mixcrResults.output("tsvs")
+    return {
+		// combine columns into pFrame
+        "trees": pframes.exportFrame(trees),
+        // combine columns into pFrame
+        "treeNodes": pframes.exportFrame(treeNodes),
+        // combine columns into pFrame
+        "treeNodesWithClones": pframes.exportFrame(treeNodesWithClones),
+        "tsvs": tsvs,
+		"allelesLogs": mixcrResults.output("allelesLog"),
+        "treesLogs": mixcrResults.output("treesLog"),
+		"allelesReports": mixcrResults.output("allelesReport"),
+        "treesReports": mixcrResults.output("treesReport"),
+		"allelesReportsJson": mixcrResults.output("allelesReportJson"),
+        "treesReportsJson": mixcrResults.output("treesReportJson")
+	}
+})

package/src/reconstruct-shm-trees.tpl.tengo ADDED Viewed

@@ -0,0 +1,182 @@
+self := import("@platforma-sdk/workflow-tengo:tpl")
+pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
+ll := import("@platforma-sdk/workflow-tengo:ll")
+assets := import("@platforma-sdk/workflow-tengo:assets")
+exec := import("@platforma-sdk/workflow-tengo:exec")
+json := import("json")
+// for usage in aggregate function, we should specify all outputs that will be used
+self.defineOutputs(
+    "trees", "treeNodes", "treeNodesWithClones",
+    "tsvs",
+    "allelesLog", "treesLog",
+    "allelesReport", "treesReport",
+    "allelesReportJson", "treesReportJson"
+)
+// import MiXCR as a software to use
+mixcrSw := assets.importSoftware("@platforma-open/milaboratories.software-mixcr:main")
+// env for MiXCR to format progress messages
+progressPrefix := "[==PROGRESS==]"
+self.body(func(inputs) {
+	inputData := inputs[pConstants.VALUE_FIELD_NAME]
+    globalParams := inputs.globalParams
+    allelesCmdBuilder := exec.builder().
+        printErrStreamToStdout().
+        secret("MI_LICENSE", "MI_LICENSE").
+        env("MI_PROGRESS_PREFIX", progressPrefix).
+        software(mixcrSw).
+        arg("findAlleles").
+        arg("--report").arg("report.txt").
+        saveFile("report.txt").
+        arg("--json-report").arg("report.json").
+        saveFile("report.json").
+        // template specifies where result files will be written
+        arg("--output-template").arg("alleles/{file_name}.clns")
+    if !is_undefined(globalParams.seed) {
+        allelesCmdBuilder.env("SEED", globalParams.seed)
+    }
+    toProcess := []
+    for sKey, inputFile in inputData.inputs() {
+        key := json.decode(sKey)
+        sampleId := key[0]
+        clonotypingBlockId := key[1]
+        // file name should encode axis values. It will be parsed by xsv.importFileMap afterwards to restore axis for clones data
+        fileName := sampleId + "___" + clonotypingBlockId + ".clns"
+        toProcess = append(toProcess, {
+            "fileName": fileName,
+            "input": inputFile
+        })
+    }
+    for input in toProcess {
+        allelesCmdBuilder.addFile(input.fileName, input.input).
+            arg(input.fileName).
+            saveFile("alleles/" + input.fileName)
+    }
+    alleles := allelesCmdBuilder.run()
+    shmTreesCmdBuilder := exec.builder().
+        printErrStreamToStdout().
+        secret("MI_LICENSE", "MI_LICENSE").
+        env("MI_PROGRESS_PREFIX", progressPrefix).
+        software(mixcrSw).
+        arg("findShmTrees").
+        arg("--report").arg("report.txt").
+        saveFile("report.txt").
+        arg("--json-report").arg("report.json").
+        saveFile("report.json").
+        // TODO support single cell trees
+        arg("--dont-combine-tree-by-cells")
+    if !is_undefined(globalParams.seed) {
+        shmTreesCmdBuilder.env("SEED", globalParams.seed)
+    }
+    for input in toProcess {
+        shmTreesCmdBuilder.
+            addFile(input.fileName, alleles.getFile("alleles/" + input.fileName)).
+            arg(input.fileName)
+    }
+    shmTreesCmdBuilder.arg("output.shmt").saveFile("output.shmt")
+    shmTrees := shmTreesCmdBuilder.run()
+    // export trees without nodes
+    shmTreeExportsCmdBuilder := exec.builder().
+        printErrStreamToStdout().
+        inLightQueue().
+        secret("MI_LICENSE", "MI_LICENSE").
+        env("MI_PROGRESS_PREFIX", progressPrefix).
+        software(mixcrSw).
+        arg("exportShmTrees")
+    for arg in inputs.shmTreeTableOptions {
+        shmTreeExportsCmdBuilder = shmTreeExportsCmdBuilder.arg(arg)
+    }
+    shmTreeExportsCmdBuilder = shmTreeExportsCmdBuilder.
+        arg("input.shmt").
+        arg("output.tsv").
+        addFile("input.shmt", shmTrees.getFile("output.shmt")).
+        saveFile("output.tsv")
+    shmTreeExports := shmTreeExportsCmdBuilder.run()
+    // export tree nodes with data uniq for the node
+    shmTreeNodesExportsCmdBuilder := exec.builder().
+        printErrStreamToStdout().
+        inLightQueue().
+        secret("MI_LICENSE", "MI_LICENSE").
+        env("MI_PROGRESS_PREFIX", progressPrefix).
+        software(mixcrSw).
+        arg("exportShmTreesWithNodes")
+    for arg in inputs.shmTreeNodesTableOptions {
+        shmTreeNodesExportsCmdBuilder = shmTreeNodesExportsCmdBuilder.arg(arg)
+    }
+    shmTreeNodesExportsCmdBuilder = shmTreeNodesExportsCmdBuilder.
+        arg("input.shmt").
+        arg("output.tsv").
+        addFile("input.shmt", shmTrees.getFile("output.shmt")).
+        saveFile("output.tsv")
+    shmTreeNodesExports := shmTreeNodesExportsCmdBuilder.run()
+    // export nodes with clones. For each node could be several clones
+    shmTreeNodesWithClonesExportsCmdBuilder := exec.builder().
+        printErrStreamToStdout().
+        inLightQueue().
+        secret("MI_LICENSE", "MI_LICENSE").
+        env("MI_PROGRESS_PREFIX", progressPrefix).
+        software(mixcrSw).
+        arg("exportShmTreesWithNodes").
+        // don't export nodes that don't have clones
+        arg("--only-observed")
+    for arg in inputs.shmTreeNodesWithClonesTableOptions {
+        shmTreeNodesWithClonesExportsCmdBuilder = shmTreeNodesWithClonesExportsCmdBuilder.arg(arg)
+    }
+    shmTreeNodesWithClonesExportsCmdBuilder = shmTreeNodesWithClonesExportsCmdBuilder.
+        arg("input.shmt").
+        arg("output.tsv").
+        addFile("input.shmt", shmTrees.getFile("output.shmt")).
+        saveFile("output.tsv")
+    shmTreeNodesWithClonesExports := shmTreeNodesWithClonesExportsCmdBuilder.run()
+    return {
+        "trees": shmTreeExports.getFile("output.tsv"),
+        "treeNodes": shmTreeNodesExports.getFile("output.tsv"),
+        "treeNodesWithClones": shmTreeNodesWithClonesExports.getFile("output.tsv"),
+        "tsvs": {
+            "trees": shmTreeExports.getFile("output.tsv"),
+            "treeNodes": shmTreeNodesExports.getFile("output.tsv"),
+            "treeNodesWithClones": shmTreeNodesWithClonesExports.getFile("output.tsv")
+        },
+        "allelesLog": alleles.getStdoutStream(),
+        "treesLog": shmTrees.getStdoutStream(),
+        "allelesReport": alleles.getFile("report.txt"),
+        "treesReport": shmTrees.getFile("report.txt"),
+        "allelesReportJson": alleles.getFile("report.json"),
+        "treesReportJson": shmTrees.getFile("report.json")
+    }
+})

package/tsconfig.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "compilerOptions": {
+    "target": "es2022",
+    "module": "commonjs",
+    "moduleResolution": "node",
+    "esModuleInterop": true,
+    "strict": true,
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "sourceMap": true,
+    "declaration": true
+  },
+  "types": [],
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}

package/vitest.config.mts ADDED Viewed

@@ -0,0 +1,9 @@
+import { defineConfig } from 'vitest/config';
+export default defineConfig({
+  test: {
+    watch: false,
+    maxConcurrency: 3,
+    testTimeout: 5000
+  }
+});