@platforma-open/milaboratories.top-antibodies.workflow 1.10.5 → 1.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +22 -0
- package/dist/tengo/tpl/filter-and-sample.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/package.json +7 -7
- package/src/main.tpl.tengo +3 -316
- package/src/prerun.tpl.tengo +121 -4
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.top-antibodies.workflow@1.
|
|
3
|
+
> @platforma-open/milaboratories.top-antibodies.workflow@1.11.1 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
|
|
4
4
|
> rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
Processing "src/filter-and-sample.tpl.tengo"...
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,27 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.top-antibodies.workflow
|
|
2
2
|
|
|
3
|
+
## 1.11.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- edbd894: technical release
|
|
8
|
+
- 6dc2d2b: technical release
|
|
9
|
+
- e581493: technical release
|
|
10
|
+
- 1c26f0d: technical release
|
|
11
|
+
- Updated dependencies [edbd894]
|
|
12
|
+
- Updated dependencies [6dc2d2b]
|
|
13
|
+
- Updated dependencies [e581493]
|
|
14
|
+
- Updated dependencies [1c26f0d]
|
|
15
|
+
- @platforma-open/milaboratories.top-antibodies.sample-clonotypes@1.4.4
|
|
16
|
+
- @platforma-open/milaboratories.top-antibodies.spectratype@1.4.4
|
|
17
|
+
- @platforma-open/milaboratories.top-antibodies.umap@1.1.4
|
|
18
|
+
|
|
19
|
+
## 1.11.0
|
|
20
|
+
|
|
21
|
+
### Minor Changes
|
|
22
|
+
|
|
23
|
+
- 67443d9: Move all calculations to prerun
|
|
24
|
+
|
|
3
25
|
## 1.10.5
|
|
4
26
|
|
|
5
27
|
### Patch Changes
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.top-antibodies.workflow",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.11.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Block Workflow",
|
|
6
6
|
"dependencies": {
|
|
7
|
-
"@platforma-sdk/workflow-tengo": "^5.
|
|
8
|
-
"@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "1.4.
|
|
9
|
-
"@platforma-open/milaboratories.top-antibodies.
|
|
10
|
-
"@platforma-open/milaboratories.top-antibodies.
|
|
7
|
+
"@platforma-sdk/workflow-tengo": "^5.3.3",
|
|
8
|
+
"@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "1.4.4",
|
|
9
|
+
"@platforma-open/milaboratories.top-antibodies.spectratype": "1.4.4",
|
|
10
|
+
"@platforma-open/milaboratories.top-antibodies.umap": "1.1.4"
|
|
11
11
|
},
|
|
12
12
|
"devDependencies": {
|
|
13
|
-
"@platforma-sdk/tengo-builder": "^2.
|
|
14
|
-
"@platforma-sdk/test": "^1.
|
|
13
|
+
"@platforma-sdk/tengo-builder": "^2.3.0",
|
|
14
|
+
"@platforma-sdk/test": "^1.44.7",
|
|
15
15
|
"vitest": "^2.1.8"
|
|
16
16
|
},
|
|
17
17
|
"scripts": {
|
package/src/main.tpl.tengo
CHANGED
|
@@ -1,326 +1,13 @@
|
|
|
1
|
-
// light block with no workflow
|
|
2
1
|
wf := import("@platforma-sdk/workflow-tengo:workflow")
|
|
3
|
-
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
4
2
|
assets:= import("@platforma-sdk/workflow-tengo:assets")
|
|
5
|
-
xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
|
|
6
|
-
pframes := import("@platforma-sdk/workflow-tengo:pframes")
|
|
7
|
-
spectratypeConv := import(":pf-spectratype-conv")
|
|
8
|
-
vjUsageConv := import(":pf-vj-usage-conv")
|
|
9
|
-
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
10
|
-
render := import("@platforma-sdk/workflow-tengo:render")
|
|
11
|
-
|
|
12
|
-
filterAndSampleTpl := assets.importTemplate(":filter-and-sample")
|
|
13
3
|
|
|
14
4
|
// Set prerun template for clonotype filtering
|
|
15
5
|
wf.setPreRun(assets.importTemplate(":prerun"))
|
|
16
6
|
|
|
17
|
-
wf.
|
|
18
|
-
// We need a table with cluster ID (optional) | clonotype id | selected ranking columns
|
|
19
|
-
bundleBuilder := wf.createPBundleBuilder()
|
|
20
|
-
bundleBuilder.ignoreMissingDomains() // to make query work for both bulk and single cell data
|
|
21
|
-
bundleBuilder.addAnchor("main", args.inputAnchor)
|
|
22
|
-
|
|
23
|
-
if len(args.rankingOrder) > 0 {
|
|
24
|
-
for col in args.rankingOrder {
|
|
25
|
-
bundleBuilder.addAnchor(col.value.anchorName, col.value.anchorRef)
|
|
26
|
-
bundleBuilder.addSingle(col.value.column)
|
|
27
|
-
}
|
|
28
|
-
} else {
|
|
29
|
-
bundleBuilder.addAnchor(args.rankingOrderDefault.value.anchorName,
|
|
30
|
-
args.rankingOrderDefault.value.anchorRef)
|
|
31
|
-
bundleBuilder.addSingle(args.rankingOrderDefault.value.column)
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// Load filter columns
|
|
35
|
-
if len(args.filters) > 0 {
|
|
36
|
-
for filter in args.filters {
|
|
37
|
-
if filter.value != undefined {
|
|
38
|
-
bundleBuilder.addAnchor(filter.value.anchorName, filter.value.anchorRef)
|
|
39
|
-
bundleBuilder.addSingle(filter.value.column)
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
// Add linker column
|
|
46
|
-
bundleBuilder.addMulti({
|
|
47
|
-
axes: [{ anchor: "main", idx: 1 }], // this will do partial axes match (unlike in the model)
|
|
48
|
-
annotations: { "pl7.app/isLinkerColumn": "true" },
|
|
49
|
-
partialAxesMatch: true
|
|
50
|
-
}, "linkers")
|
|
51
|
-
|
|
52
|
-
// Add cluster size columns from clustering blocks
|
|
53
|
-
bundleBuilder.addMulti({
|
|
54
|
-
name: "pl7.app/vdj/clustering/clusterSize",
|
|
55
|
-
partialAxesMatch: true
|
|
56
|
-
}, "clusterSizes")
|
|
57
|
-
|
|
58
|
-
// Add CDR3 sequences
|
|
59
|
-
bundleBuilder.addMulti({
|
|
60
|
-
axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
|
|
61
|
-
name: "pl7.app/vdj/sequence",
|
|
62
|
-
domain: {
|
|
63
|
-
"pl7.app/alphabet": "aminoacid",
|
|
64
|
-
"pl7.app/vdj/feature": "CDR3" // Specify CDR3 feature
|
|
65
|
-
}
|
|
66
|
-
}, "cdr3Sequences") // New collection name for CDR3 sequences
|
|
67
|
-
|
|
68
|
-
// Add V gene
|
|
69
|
-
bundleBuilder.addMulti({
|
|
70
|
-
axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
|
|
71
|
-
name: "pl7.app/vdj/geneHit",
|
|
72
|
-
domain: {
|
|
73
|
-
"pl7.app/vdj/reference": "VGene"
|
|
74
|
-
}
|
|
75
|
-
}, "VGenes")
|
|
76
|
-
|
|
77
|
-
// Add J gene
|
|
78
|
-
bundleBuilder.addMulti({
|
|
79
|
-
axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
|
|
80
|
-
name: "pl7.app/vdj/geneHit",
|
|
81
|
-
domain: {
|
|
82
|
-
"pl7.app/vdj/reference": "JGene"
|
|
83
|
-
}
|
|
84
|
-
}, "JGenes")
|
|
85
|
-
|
|
7
|
+
wf.body(func(args) {
|
|
86
8
|
return {
|
|
87
|
-
|
|
9
|
+
outputs: {},
|
|
10
|
+
exports: {}
|
|
88
11
|
}
|
|
89
12
|
})
|
|
90
13
|
|
|
91
|
-
wf.body(func(args) {
|
|
92
|
-
|
|
93
|
-
// Input arguments
|
|
94
|
-
columns := args.columns
|
|
95
|
-
datasetSpec := columns.getSpec(args.inputAnchor)
|
|
96
|
-
topClonotypes := args.topClonotypes
|
|
97
|
-
|
|
98
|
-
// Needed conditional variable
|
|
99
|
-
isSingleCell := datasetSpec.axesSpec[1].name == "pl7.app/vdj/scClonotypeKey"
|
|
100
|
-
|
|
101
|
-
// Output container
|
|
102
|
-
outputs := {}
|
|
103
|
-
|
|
104
|
-
// Build clonotype table csv for filtering script
|
|
105
|
-
cloneTable := pframes.csvFileBuilder()
|
|
106
|
-
cloneTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
|
|
107
|
-
|
|
108
|
-
// Add filter columns to table
|
|
109
|
-
// Filter structure {id: UI id, value: AnchoredColumnId, filter: Filter criteria, isExpanded: boolean (UI state)}
|
|
110
|
-
// AnchoredColumnId {anchorRef: PlRef, anchorName: string, column: SUniversalPColumnId (unique column id)}
|
|
111
|
-
addedAxes := [] // Keep track of axes that are added to the table to prevent duplicates
|
|
112
|
-
filterMap := {} // Map column headers to filter criteria
|
|
113
|
-
rankingMap := {} // Map column headers to ranking order (increasing/decreasing)
|
|
114
|
-
if len(args.filters) > 0 {
|
|
115
|
-
for i, filter in args.filters {
|
|
116
|
-
if filter.value != undefined {
|
|
117
|
-
// Columns added here might also be in ranking list, so we add default IDs
|
|
118
|
-
cloneTable.add(columns.getColumn(filter.value.column),
|
|
119
|
-
{header: "Filter_" + string(i), id: "filter_" + string(i)})
|
|
120
|
-
// Store reference value and filter type associated to this column
|
|
121
|
-
filterMap["Filter_" + string(i)] = filter.filter
|
|
122
|
-
|
|
123
|
-
// If column does not have main anchor axis we have to include theirs (columns coming from clustering blocks for example)
|
|
124
|
-
colsSpec := columns.getSpec(filter.value.column)
|
|
125
|
-
axesNames := slices.map(colsSpec.axesSpec, func (a) {return a.name})
|
|
126
|
-
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
127
|
-
for na, ax in colsSpec.axesSpec {
|
|
128
|
-
if ax.name != datasetSpec.axesSpec[1].name {
|
|
129
|
-
cloneTable.setAxisHeader(ax.name, "cluster_" + string(i) + string(na))
|
|
130
|
-
addedAxes = append(addedAxes, ax.name)
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
// Add ranking columns to table
|
|
139
|
-
if len(args.rankingOrder) > 0 {
|
|
140
|
-
for i, col in args.rankingOrder {
|
|
141
|
-
cloneTable.add(columns.getColumn(col.value.column), {header: "Col" + string(i)})
|
|
142
|
-
// Store ranking order for this column
|
|
143
|
-
rankingMap["Col" + string(i)] = col.rankingOrder
|
|
144
|
-
|
|
145
|
-
// If column does not have main anchor axis we have to include theirs (columns coming from clustering blocks for example)
|
|
146
|
-
colsSpec := columns.getSpec(col.value.column)
|
|
147
|
-
axesNames := slices.map(colsSpec.axesSpec, func (a) {return a.name})
|
|
148
|
-
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
149
|
-
for na, ax in colsSpec.axesSpec {
|
|
150
|
-
if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) { // Prevent duplicates
|
|
151
|
-
cloneTable.setAxisHeader(ax.name, "cluster_" + string(i) + string(na))
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
} else {
|
|
157
|
-
i := 0
|
|
158
|
-
cloneTable.add(columns.getColumn(args.rankingOrderDefault.value.column), {header: "Col" + string(i)})
|
|
159
|
-
// Store default ranking order
|
|
160
|
-
rankingMap["Col" + string(i)] = args.rankingOrderDefault.rankingOrder
|
|
161
|
-
|
|
162
|
-
// If column does not have main anchor axis we have to include theirs
|
|
163
|
-
colsSpec := columns.getSpec(args.rankingOrderDefault.value.column)
|
|
164
|
-
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
165
|
-
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
166
|
-
for na, ax in colsSpec.axesSpec {
|
|
167
|
-
if ax.name != datasetSpec.axesSpec[1].name {
|
|
168
|
-
cloneTable.setAxisHeader(ax.name, "cluster_" + string(i) + string(na))
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
// Add linker columns when needed
|
|
175
|
-
linkerAxisSpec := {} // Map cluster axis names to specs. Is it needed????
|
|
176
|
-
if len(columns.getColumns("linkers")) > 0 {
|
|
177
|
-
for i, col in columns.getColumns("linkers") {
|
|
178
|
-
if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
|
|
179
|
-
cloneTable.add(col, {header: "linker." + string(i)})
|
|
180
|
-
cloneTable.setAxisHeader(col.spec.axesSpec[0].name, "cluster_" + string(i))
|
|
181
|
-
linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[0]
|
|
182
|
-
} else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
|
|
183
|
-
cloneTable.add(col, {header: "linker." + string(i)})
|
|
184
|
-
cloneTable.setAxisHeader(col.spec.axesSpec[1].name, "cluster_" + string(i))
|
|
185
|
-
linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[1]
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
// Add cluster size columns if available
|
|
191
|
-
if len(columns.getColumns("clusterSizes")) > 0 {
|
|
192
|
-
for i, col in columns.getColumns("clusterSizes") {
|
|
193
|
-
cloneTable.add(col, {header: "clusterSize." + string(i)})
|
|
194
|
-
// Add the cluster axis header
|
|
195
|
-
for axisIdx, axis in col.spec.axesSpec {
|
|
196
|
-
if axis.name != datasetSpec.axesSpec[1].name {
|
|
197
|
-
cloneTable.setAxisHeader(axis.name, "clusterAxis_" + string(i) + "_" + string(axisIdx))
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
cloneTable.mem("16GiB")
|
|
204
|
-
cloneTable.cpu(1)
|
|
205
|
-
cloneTable = cloneTable.build()
|
|
206
|
-
|
|
207
|
-
// Use render.create to call the filtering and sampling clonotypes template
|
|
208
|
-
filterSampleResult := render.create(filterAndSampleTpl, {
|
|
209
|
-
inputAnchor: args.inputAnchor,
|
|
210
|
-
cloneTable: cloneTable,
|
|
211
|
-
rankingOrder: args.rankingOrder,
|
|
212
|
-
rankingOrderDefault: args.rankingOrderDefault,
|
|
213
|
-
filters: args.filters,
|
|
214
|
-
filterMap: filterMap,
|
|
215
|
-
rankingMap: rankingMap,
|
|
216
|
-
datasetSpec: datasetSpec,
|
|
217
|
-
topClonotypes: args.topClonotypes
|
|
218
|
-
})
|
|
219
|
-
|
|
220
|
-
// Get the filtered and sampled clonotypes P-frame and CSV from the template result
|
|
221
|
-
finalClonotypesCsv := filterSampleResult.output("finalClonotypesCsv", 24 * 60 * 60 * 1000)
|
|
222
|
-
// outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
|
|
223
|
-
|
|
224
|
-
////////// CDR3 Length Calculation //////////
|
|
225
|
-
|
|
226
|
-
cdr3SeqTable := pframes.tsvFileBuilder()
|
|
227
|
-
cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
|
|
228
|
-
|
|
229
|
-
// Must deal with multiple CDR3 sequences (two for each cell in single cell data)
|
|
230
|
-
// Chain will be added in the header as cdr3Sequence.chain and used in python script
|
|
231
|
-
// Notice chain is in spec.domain for single cell data and spec.axesSpec[0].domain for bulk data
|
|
232
|
-
|
|
233
|
-
// Helper function to add chain information to the headers dynamically
|
|
234
|
-
chainMapping := {
|
|
235
|
-
"IG": { "A": "Heavy", "B": "Light" },
|
|
236
|
-
"TCRAB": { "A": "TRA", "B": "TRB" },
|
|
237
|
-
"TCRGD": { "A": "TRG", "B": "TRD" }
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
makeHeaderName := func(col, baseHeaderName, isSingleCell) {
|
|
241
|
-
if isSingleCell {
|
|
242
|
-
chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
|
|
243
|
-
receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
|
|
244
|
-
chainLabel := chainMapping[receptor][chain]
|
|
245
|
-
return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
|
|
246
|
-
} else {
|
|
247
|
-
// For bulk, if chain info is available (e.g. IGH, IGK, IGL)
|
|
248
|
-
chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
|
|
249
|
-
if chainFromDomain != undefined {
|
|
250
|
-
return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
return baseHeaderName
|
|
254
|
-
};
|
|
255
|
-
|
|
256
|
-
// Process CDR3 sequences
|
|
257
|
-
cdr3Sequences := columns.getColumns("cdr3Sequences")
|
|
258
|
-
|
|
259
|
-
for col in cdr3Sequences {
|
|
260
|
-
headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
|
|
261
|
-
cdr3SeqTable.add(col, {header: headerName})
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
// Process V genes
|
|
265
|
-
vGenes := columns.getColumns("VGenes")
|
|
266
|
-
|
|
267
|
-
for col in vGenes {
|
|
268
|
-
headerName := makeHeaderName(col, "vGene", isSingleCell)
|
|
269
|
-
cdr3SeqTable.add(col, {header: headerName})
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
// Process J genes
|
|
273
|
-
jGenes := columns.getColumns("JGenes")
|
|
274
|
-
|
|
275
|
-
for col in jGenes {
|
|
276
|
-
headerName := makeHeaderName(col, "jGene", isSingleCell)
|
|
277
|
-
cdr3SeqTable.add(col, {header: headerName})
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
cdr3SeqTable.mem("16GiB")
|
|
281
|
-
cdr3SeqTable.cpu(1)
|
|
282
|
-
cdr3SeqTableBuilt := cdr3SeqTable.build()
|
|
283
|
-
|
|
284
|
-
cdr3VspectratypeCmd := exec.builder().
|
|
285
|
-
software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
|
|
286
|
-
mem("16GiB").
|
|
287
|
-
cpu(1).
|
|
288
|
-
addFile("cdr3_sequences_input.tsv", cdr3SeqTableBuilt).
|
|
289
|
-
arg("--input_tsv").arg("cdr3_sequences_input.tsv").
|
|
290
|
-
arg("--spectratype_tsv").arg("spectratype.tsv").
|
|
291
|
-
arg("--vj_usage_tsv").arg("vj_usage.tsv") // no dot here
|
|
292
|
-
|
|
293
|
-
// Add top clonotypes argument and file to the builder if provided
|
|
294
|
-
if finalClonotypesCsv != undefined {
|
|
295
|
-
cdr3VspectratypeCmd = cdr3VspectratypeCmd.
|
|
296
|
-
arg("--final_clonotypes_csv").arg("finalClonotypes.csv").
|
|
297
|
-
addFile("finalClonotypes.csv", finalClonotypesCsv)
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
cdr3VspectratypeCmd = cdr3VspectratypeCmd. // continue building the command
|
|
301
|
-
saveFile("spectratype.tsv").
|
|
302
|
-
saveFile("vj_usage.tsv").
|
|
303
|
-
printErrStreamToStdout().
|
|
304
|
-
saveStdoutContent().
|
|
305
|
-
cache(24 * 60 * 60 * 1000).
|
|
306
|
-
run()
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
// Spectratype PFrame structure is [chain][cdr3Length][vGene] -> count
|
|
310
|
-
|
|
311
|
-
cdr3VspectratypePf := xsv.importFile(cdr3VspectratypeCmd.getFile("spectratype.tsv"),
|
|
312
|
-
"tsv", spectratypeConv.getColumns(),
|
|
313
|
-
{cpu: 1, mem: "16GiB"})
|
|
314
|
-
outputs["cdr3VspectratypePf"] = pframes.exportFrame(cdr3VspectratypePf)
|
|
315
|
-
|
|
316
|
-
// For vjUsage structure is [chain][vGene][jGene] -> count
|
|
317
|
-
vjUsagePf := xsv.importFile(cdr3VspectratypeCmd.getFile("vj_usage.tsv"),
|
|
318
|
-
"tsv", vjUsageConv.getColumns(),
|
|
319
|
-
{cpu: 1, mem: "16GiB"})
|
|
320
|
-
outputs["vjUsagePf"] = pframes.exportFrame(vjUsagePf)
|
|
321
|
-
|
|
322
|
-
return {
|
|
323
|
-
outputs: outputs,
|
|
324
|
-
exports: {}
|
|
325
|
-
}
|
|
326
|
-
})
|
package/src/prerun.tpl.tengo
CHANGED
|
@@ -1,14 +1,24 @@
|
|
|
1
1
|
// Prerun template for clonotype filtering
|
|
2
2
|
wf := import("@platforma-sdk/workflow-tengo:workflow")
|
|
3
|
-
|
|
4
|
-
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
3
|
+
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
5
4
|
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
5
|
+
xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
|
|
6
6
|
pframes := import("@platforma-sdk/workflow-tengo:pframes")
|
|
7
7
|
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
8
|
+
render := import("@platforma-sdk/workflow-tengo:render")
|
|
9
|
+
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
10
|
+
|
|
11
|
+
spectratypeConv := import(":pf-spectratype-conv")
|
|
12
|
+
vjUsageConv := import(":pf-vj-usage-conv")
|
|
8
13
|
|
|
9
14
|
filterAndSampleTpl := assets.importTemplate(":filter-and-sample")
|
|
10
15
|
|
|
11
16
|
wf.prepare(func(args){
|
|
17
|
+
if is_undefined(args.inputAnchor) {
|
|
18
|
+
return {
|
|
19
|
+
columns: wf.createPBundleBuilder().build()
|
|
20
|
+
}
|
|
21
|
+
}
|
|
12
22
|
// We need a table with cluster ID (optional) | clonotype id | selected ranking columns
|
|
13
23
|
bundleBuilder := wf.createPBundleBuilder()
|
|
14
24
|
bundleBuilder.ignoreMissingDomains() // to make query work for both bulk and single cell data
|
|
@@ -98,8 +108,13 @@ wf.body(func(args) {
|
|
|
98
108
|
outputs := {}
|
|
99
109
|
|
|
100
110
|
if !is_undefined(args.inputAnchor) {
|
|
111
|
+
// Input arguments
|
|
101
112
|
columns := args.columns
|
|
102
113
|
datasetSpec := columns.getSpec(args.inputAnchor)
|
|
114
|
+
topClonotypes := args.topClonotypes
|
|
115
|
+
|
|
116
|
+
// Needed conditional variable
|
|
117
|
+
isSingleCell := datasetSpec.axesSpec[1].name == "pl7.app/vdj/scClonotypeKey"
|
|
103
118
|
|
|
104
119
|
////////// Clonotype Filtering //////////
|
|
105
120
|
// Build clonotype table
|
|
@@ -217,17 +232,119 @@ wf.body(func(args) {
|
|
|
217
232
|
filterSampleResult := render.create(filterAndSampleTpl, {
|
|
218
233
|
inputAnchor: args.inputAnchor,
|
|
219
234
|
cloneTable: cloneTable,
|
|
220
|
-
topClonotypes: args.topClonotypes,
|
|
221
235
|
rankingOrder: args.rankingOrder,
|
|
222
236
|
rankingOrderDefault: args.rankingOrderDefault,
|
|
223
237
|
filters: args.filters,
|
|
224
238
|
filterMap: filterMap,
|
|
225
239
|
rankingMap: rankingMap,
|
|
226
|
-
datasetSpec: datasetSpec
|
|
240
|
+
datasetSpec: datasetSpec,
|
|
241
|
+
topClonotypes: args.topClonotypes
|
|
227
242
|
})
|
|
228
243
|
|
|
229
244
|
// Get the filtered clonotypes from the template result
|
|
230
245
|
outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
|
|
246
|
+
|
|
247
|
+
// Get the filtered and sampled clonotypes P-frame and CSV from the template result
|
|
248
|
+
finalClonotypesCsv := filterSampleResult.output("finalClonotypesCsv", 24 * 60 * 60 * 1000)
|
|
249
|
+
// outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
|
|
250
|
+
|
|
251
|
+
////////// CDR3 Length Calculation //////////
|
|
252
|
+
|
|
253
|
+
cdr3SeqTable := pframes.tsvFileBuilder()
|
|
254
|
+
cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
|
|
255
|
+
|
|
256
|
+
// Must deal with multiple CDR3 sequences (two for each cell in single cell data)
|
|
257
|
+
// Chain will be added in the header as cdr3Sequence.chain and used in python script
|
|
258
|
+
// Notice chain is in spec.domain for single cell data and spec.axesSpec[0].domain for bulk data
|
|
259
|
+
|
|
260
|
+
// Helper function to add chain information to the headers dynamically
|
|
261
|
+
chainMapping := {
|
|
262
|
+
"IG": { "A": "Heavy", "B": "Light" },
|
|
263
|
+
"TCRAB": { "A": "TRA", "B": "TRB" },
|
|
264
|
+
"TCRGD": { "A": "TRG", "B": "TRD" }
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
makeHeaderName := func(col, baseHeaderName, isSingleCell) {
|
|
268
|
+
if isSingleCell {
|
|
269
|
+
chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
|
|
270
|
+
receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
|
|
271
|
+
chainLabel := chainMapping[receptor][chain]
|
|
272
|
+
return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
|
|
273
|
+
} else {
|
|
274
|
+
// For bulk, if chain info is available (e.g. IGH, IGK, IGL)
|
|
275
|
+
chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
|
|
276
|
+
if chainFromDomain != undefined {
|
|
277
|
+
return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
return baseHeaderName
|
|
281
|
+
};
|
|
282
|
+
|
|
283
|
+
// Process CDR3 sequences
|
|
284
|
+
cdr3Sequences := columns.getColumns("cdr3Sequences")
|
|
285
|
+
|
|
286
|
+
for col in cdr3Sequences {
|
|
287
|
+
headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
|
|
288
|
+
cdr3SeqTable.add(col, {header: headerName})
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Process V genes
|
|
292
|
+
vGenes := columns.getColumns("VGenes")
|
|
293
|
+
|
|
294
|
+
for col in vGenes {
|
|
295
|
+
headerName := makeHeaderName(col, "vGene", isSingleCell)
|
|
296
|
+
cdr3SeqTable.add(col, {header: headerName})
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Process J genes
|
|
300
|
+
jGenes := columns.getColumns("JGenes")
|
|
301
|
+
|
|
302
|
+
for col in jGenes {
|
|
303
|
+
headerName := makeHeaderName(col, "jGene", isSingleCell)
|
|
304
|
+
cdr3SeqTable.add(col, {header: headerName})
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
cdr3SeqTable.mem("16GiB")
|
|
308
|
+
cdr3SeqTable.cpu(1)
|
|
309
|
+
cdr3SeqTableBuilt := cdr3SeqTable.build()
|
|
310
|
+
|
|
311
|
+
cdr3VspectratypeCmd := exec.builder().
|
|
312
|
+
software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
|
|
313
|
+
mem("16GiB").
|
|
314
|
+
cpu(1).
|
|
315
|
+
addFile("cdr3_sequences_input.tsv", cdr3SeqTableBuilt).
|
|
316
|
+
arg("--input_tsv").arg("cdr3_sequences_input.tsv").
|
|
317
|
+
arg("--spectratype_tsv").arg("spectratype.tsv").
|
|
318
|
+
arg("--vj_usage_tsv").arg("vj_usage.tsv") // no dot here
|
|
319
|
+
|
|
320
|
+
// Add top clonotypes argument and file to the builder if provided
|
|
321
|
+
if finalClonotypesCsv != undefined {
|
|
322
|
+
cdr3VspectratypeCmd = cdr3VspectratypeCmd.
|
|
323
|
+
arg("--final_clonotypes_csv").arg("finalClonotypes.csv").
|
|
324
|
+
addFile("finalClonotypes.csv", finalClonotypesCsv)
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
cdr3VspectratypeCmd = cdr3VspectratypeCmd. // continue building the command
|
|
328
|
+
saveFile("spectratype.tsv").
|
|
329
|
+
saveFile("vj_usage.tsv").
|
|
330
|
+
printErrStreamToStdout().
|
|
331
|
+
saveStdoutContent().
|
|
332
|
+
cache(24 * 60 * 60 * 1000).
|
|
333
|
+
run()
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
// Spectratype PFrame structure is [chain][cdr3Length][vGene] -> count
|
|
337
|
+
|
|
338
|
+
cdr3VspectratypePf := xsv.importFile(cdr3VspectratypeCmd.getFile("spectratype.tsv"),
|
|
339
|
+
"tsv", spectratypeConv.getColumns(),
|
|
340
|
+
{cpu: 1, mem: "16GiB"})
|
|
341
|
+
outputs["cdr3VspectratypePf"] = pframes.exportFrame(cdr3VspectratypePf)
|
|
342
|
+
|
|
343
|
+
// For vjUsage structure is [chain][vGene][jGene] -> count
|
|
344
|
+
vjUsagePf := xsv.importFile(cdr3VspectratypeCmd.getFile("vj_usage.tsv"),
|
|
345
|
+
"tsv", vjUsageConv.getColumns(),
|
|
346
|
+
{cpu: 1, mem: "16GiB"})
|
|
347
|
+
outputs["vjUsagePf"] = pframes.exportFrame(vjUsagePf)
|
|
231
348
|
}
|
|
232
349
|
|
|
233
350
|
return {
|