@platforma-open/milaboratories.mixcr-clonotyping-2.workflow 2.18.2 → 2.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +3 -1
- package/CHANGELOG.md +13 -0
- package/dist/tengo/lib/calculate-export-specs.lib.tengo +77 -47
- package/dist/tengo/lib/clonotype-label.lib.tengo +121 -0
- package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
- package/dist/tengo/tpl/calculate-preset-info.plj.gz +0 -0
- package/dist/tengo/tpl/list-presets.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/dist/tengo/tpl/process-single-cell.plj.gz +0 -0
- package/dist/tengo/tpl/process.plj.gz +0 -0
- package/dist/tengo/tpl/test.columns-calculate.plj.gz +0 -0
- package/dist/tengo/tpl/test.columns.test.plj.gz +0 -0
- package/package.json +6 -9
- package/src/aggregate-by-clonotype-key.tpl.tengo +55 -49
- package/src/calculate-export-specs.lib.tengo +77 -47
- package/src/clonotype-label.lib.tengo +121 -0
- package/src/mixcr-export.tpl.tengo +46 -101
- package/src/process-single-cell.tpl.tengo +259 -75
- package/src/process.tpl.tengo +41 -9
|
@@ -4,14 +4,14 @@ pConstants := import("@platforma-sdk/workflow-tengo:pframes.constants")
|
|
|
4
4
|
assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
5
5
|
exec := import("@platforma-sdk/workflow-tengo:exec")
|
|
6
6
|
maps := import("@platforma-sdk/workflow-tengo:maps")
|
|
7
|
-
|
|
7
|
+
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
8
|
+
clonotypeLabel := import(":clonotype-label")
|
|
8
9
|
json := import("json")
|
|
10
|
+
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
9
11
|
|
|
10
12
|
self.defineOutputs("abundanceTsv", "clonotypeTsv", "propertiesAPrimaryTsv", "propertiesASecondaryTsv", "propertiesBPrimaryTsv", "propertiesBSecondaryTsv")
|
|
11
13
|
|
|
12
|
-
|
|
13
|
-
scPreprocessingSw := assets.importSoftware("@platforma-open/milaboratories.mixcr-clonotyping-2.single-cell-scripts:preprocessing")
|
|
14
|
-
scOutputProcessingSw := assets.importSoftware("@platforma-open/milaboratories.mixcr-clonotyping-2.single-cell-scripts:output-processing")
|
|
14
|
+
ptablerSw := assets.importSoftware("@platforma-open/milaboratories.software-ptabler:main")
|
|
15
15
|
|
|
16
16
|
self.body(func(inputs) {
|
|
17
17
|
byCellTagA := inputs[pConstants.VALUE_FIELD_NAME]
|
|
@@ -22,93 +22,277 @@ self.body(func(inputs) {
|
|
|
22
22
|
propertiesA := inputs.propertiesA
|
|
23
23
|
propertiesB := inputs.propertiesB
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
25
|
+
mainAbundanceColumn := inputs.params.mainAbundanceColumn
|
|
26
|
+
mainIsProductiveColumn := inputs.params.mainIsProductiveColumn
|
|
27
|
+
|
|
28
|
+
schemaPerClonotypeNoAggregates := inputs.params.schemaPerClonotypeNoAggregates
|
|
29
|
+
|
|
30
|
+
//
|
|
31
|
+
// Preprocessing
|
|
32
|
+
//
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Universal preprocessing step for A and B chain files
|
|
36
|
+
* @param byCellTag: Map<string:[sampleId], tsv_by_cell_tag>
|
|
37
|
+
* @return the output file
|
|
38
|
+
*/
|
|
39
|
+
preprocessByCell := func(byCellTag) {
|
|
40
|
+
wf := pt.workflow()
|
|
41
|
+
sampleDataFrames := []
|
|
42
|
+
|
|
43
|
+
inputMap := byCellTag.inputs()
|
|
44
|
+
maps.forEach(inputMap, func(sKey, inputFile) {
|
|
45
|
+
key := json.decode(sKey)
|
|
46
|
+
ll.assert(len(key) == 1, "preprocessByCell: byCellTag key should have one element, got %v", key)
|
|
47
|
+
sampleId := key[0]
|
|
48
|
+
|
|
49
|
+
// Create a DataFrame for the current sample's file
|
|
50
|
+
sampleDf := wf.frame(inputFile, {
|
|
51
|
+
xsvType: "tsv",
|
|
52
|
+
schema: [
|
|
53
|
+
{ column: "cellKey", type: "String" },
|
|
54
|
+
{ column: "clonotypeKey", type: "String" },
|
|
55
|
+
{ column: mainAbundanceColumn, type: "Long" }, // Ensure mainAbundanceColumn is treated as Long
|
|
56
|
+
{ column: mainIsProductiveColumn, type: "String" } // Keep as String for direct comparison
|
|
57
|
+
],
|
|
58
|
+
inferSchema: false // Disable further inference so columns not defined in schema are interpreted as strings
|
|
59
|
+
}).withColumns(
|
|
60
|
+
pt.col("cellKey"), pt.col("clonotypeKey"), pt.col(mainAbundanceColumn), pt.col(mainIsProductiveColumn)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
// Add sampleId column
|
|
64
|
+
sampleDfWithId := sampleDf.addColumns(
|
|
65
|
+
pt.lit(sampleId).alias("sampleId")
|
|
66
|
+
)
|
|
67
|
+
sampleDataFrames = append(sampleDataFrames, sampleDfWithId)
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
ll.assert(len(sampleDataFrames) > 0, "No input files to process in preprocessByCell")
|
|
71
|
+
|
|
72
|
+
concatenatedDf := pt.concat(sampleDataFrames)
|
|
73
|
+
|
|
74
|
+
// Add rawChainRank column
|
|
75
|
+
dfWithRawRank := concatenatedDf.withColumns(
|
|
76
|
+
pt.rank(pt.col(mainAbundanceColumn), {descending: true}).
|
|
77
|
+
over([pt.col("sampleId"), pt.col("cellKey")]).
|
|
78
|
+
alias("rawChainRank")
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
// Filter by rawChainRank <= 2
|
|
82
|
+
dfFilteredByRawRank := dfWithRawRank.filter(
|
|
83
|
+
pt.col("rawChainRank").le(2)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
// Prepare expressions for chainRank ordering
|
|
87
|
+
// 1. isProductiveNumeric: "True" -> 0, "False" -> 1 (for ascending sort)
|
|
88
|
+
isProductiveNumericExpr := pt.when(pt.col(mainIsProductiveColumn).eq("True")).
|
|
89
|
+
then(pt.lit(0)).
|
|
90
|
+
otherwise(pt.lit(1))
|
|
34
91
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
92
|
+
// 2. negativeAbundance: mainAbundanceColumn * -1 (for ascending sort by this, meaning descending by original abundance)
|
|
93
|
+
negativeAbundanceExpr := pt.col(mainAbundanceColumn).multiply(-1)
|
|
94
|
+
|
|
95
|
+
// Add chainRank column and remove temp column rawChainRank
|
|
96
|
+
dfWithChainRank := dfFilteredByRawRank.withColumns(
|
|
97
|
+
pt.rank([
|
|
98
|
+
isProductiveNumericExpr,
|
|
99
|
+
negativeAbundanceExpr
|
|
100
|
+
], {descending: false}).
|
|
101
|
+
over([pt.col("sampleId"), pt.col("cellKey")]).
|
|
102
|
+
alias("chainRank")
|
|
103
|
+
).withoutColumns("rawChainRank")
|
|
104
|
+
|
|
105
|
+
dfWithChainRank.save("output.tsv")
|
|
106
|
+
|
|
107
|
+
// Run the workflow
|
|
108
|
+
runResult := wf.run()
|
|
109
|
+
return runResult.getFile("output.tsv")
|
|
41
110
|
}
|
|
42
111
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
112
|
+
chainAOutput := preprocessByCell(byCellTagA)
|
|
113
|
+
chainBOutput := preprocessByCell(byCellTagB)
|
|
114
|
+
|
|
115
|
+
//
|
|
116
|
+
// Cell grouping - Reimplemented with PTabler pt API
|
|
117
|
+
//
|
|
118
|
+
cellGroupingWf := pt.workflow()
|
|
46
119
|
|
|
47
|
-
|
|
48
|
-
|
|
120
|
+
chainATableDf := cellGroupingWf.frame(chainAOutput, {xsvType: "tsv"})
|
|
121
|
+
chainBTableDf := cellGroupingWf.frame(chainBOutput, {xsvType: "tsv"})
|
|
122
|
+
|
|
123
|
+
// Dynamically generate filter steps for chains A and B, ranks 1 and 2
|
|
124
|
+
// Store resulting DataFrames in a map for easier access
|
|
125
|
+
rankedDfs := {}
|
|
126
|
+
for chainData in [{prefix: "a", df: chainATableDf}, {prefix: "b", df: chainBTableDf}] {
|
|
127
|
+
for rankVal in [1, 2] {
|
|
128
|
+
dfKey := "chain_" + chainData.prefix + "_rank" + string(rankVal) + "_df"
|
|
129
|
+
rankedDfs[dfKey] = chainData.df.filter(pt.col("chainRank").eq(rankVal))
|
|
130
|
+
}
|
|
49
131
|
}
|
|
50
132
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
133
|
+
chainAMergedDf := rankedDfs["chain_a_rank1_df"].join(rankedDfs["chain_a_rank2_df"], {
|
|
134
|
+
how: "full",
|
|
135
|
+
on: ["sampleId", "cellKey"],
|
|
136
|
+
coalesce: true,
|
|
137
|
+
leftColumns: [{ column: "clonotypeKey", rename: "clonotypeKeyA1" }],
|
|
138
|
+
rightColumns: [{ column: "clonotypeKey", rename: "clonotypeKeyA2" }]
|
|
139
|
+
})
|
|
140
|
+
|
|
141
|
+
chainBMergedDf := rankedDfs["chain_b_rank1_df"].join(rankedDfs["chain_b_rank2_df"], {
|
|
142
|
+
how: "full",
|
|
143
|
+
on: ["sampleId", "cellKey"],
|
|
144
|
+
coalesce: true,
|
|
145
|
+
leftColumns: [{ column: "clonotypeKey", rename: "clonotypeKeyB1" }],
|
|
146
|
+
rightColumns: [{ column: "clonotypeKey", rename: "clonotypeKeyB2" }]
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
allChainsMergedDf := chainAMergedDf.join(chainBMergedDf, {
|
|
150
|
+
how: "full",
|
|
151
|
+
on: ["sampleId", "cellKey"],
|
|
152
|
+
coalesce: true
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
scClonotypeKeyExpr := pt.concatStr(
|
|
156
|
+
[
|
|
157
|
+
pt.col("clonotypeKeyA1").fillNull("NA"),
|
|
158
|
+
pt.col("clonotypeKeyA2").fillNull("NA"),
|
|
159
|
+
pt.col("clonotypeKeyB1").fillNull("NA"),
|
|
160
|
+
pt.col("clonotypeKeyB2").fillNull("NA")
|
|
161
|
+
],
|
|
162
|
+
{delimiter: "#"}
|
|
163
|
+
).hash("sha256", "base64_alphanumeric", 120).alias("scClonotypeKey")
|
|
164
|
+
|
|
165
|
+
allChainsMergedWithScKeyDf := allChainsMergedDf.withColumns(scClonotypeKeyExpr)
|
|
166
|
+
|
|
167
|
+
filterCondition := pt.and(
|
|
168
|
+
pt.col("clonotypeKeyA1").isNotNull(),
|
|
169
|
+
pt.col("clonotypeKeyB1").isNotNull()
|
|
170
|
+
)
|
|
171
|
+
allChainsFilteredDf := allChainsMergedWithScKeyDf.filter(filterCondition)
|
|
172
|
+
|
|
173
|
+
clonotypeTableDf := allChainsFilteredDf.groupBy(
|
|
174
|
+
"scClonotypeKey", "clonotypeKeyA1", "clonotypeKeyA2", "clonotypeKeyB1", "clonotypeKeyB2"
|
|
175
|
+
).agg(
|
|
176
|
+
pt.col("sampleId").nUnique().alias("sampleCount")
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
clonotypeTableDf = clonotypeLabel.addClonotypeLabelColumnsPt(clonotypeTableDf, "scClonotypeKey", "clonotypeLabel", pt)
|
|
180
|
+
|
|
181
|
+
clonotypeTableDf.save("clonotype.tsv")
|
|
182
|
+
|
|
183
|
+
cellCountsDf := allChainsFilteredDf.groupBy("sampleId", "scClonotypeKey").agg(
|
|
184
|
+
pt.col("cellKey").count().alias("uniqueCellCount")
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
cellCountsWithFractionDf := cellCountsDf.withColumns(
|
|
188
|
+
pt.col("uniqueCellCount").truediv(
|
|
189
|
+
pt.col("uniqueCellCount").sum().over("sampleId")
|
|
190
|
+
).alias("uniqueCellFraction")
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
cellCountsWithFractionDf.save("abundance.tsv")
|
|
194
|
+
|
|
195
|
+
cellGroupingRunResult := cellGroupingWf.run()
|
|
196
|
+
|
|
197
|
+
clonotypeTsv := cellGroupingRunResult.getFile("clonotype.tsv")
|
|
198
|
+
abundanceTsv := cellGroupingRunResult.getFile("abundance.tsv")
|
|
199
|
+
|
|
200
|
+
//
|
|
201
|
+
// Output processing - Reimplemented with PTabler pt API
|
|
202
|
+
//
|
|
78
203
|
|
|
79
204
|
propertiesAFile := propertiesA.inputs()["[]"]
|
|
80
205
|
propertiesBFile := propertiesB.inputs()["[]"]
|
|
81
206
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
207
|
+
outputProcessingWf := pt.workflow()
|
|
208
|
+
|
|
209
|
+
// Schema for clonotype.tsv (main_clonotypes table)
|
|
210
|
+
clonotypeTableSchema := [
|
|
211
|
+
{ column: "scClonotypeKey", type: "String" },
|
|
212
|
+
{ column: "clonotypeKeyA1", type: "String" },
|
|
213
|
+
{ column: "clonotypeKeyA2", type: "String" },
|
|
214
|
+
{ column: "clonotypeKeyB1", type: "String" },
|
|
215
|
+
{ column: "clonotypeKeyB2", type: "String" },
|
|
216
|
+
{ column: "sampleCount", type: "Int" }
|
|
217
|
+
]
|
|
218
|
+
|
|
219
|
+
mainClonotypesDf := outputProcessingWf.frame(clonotypeTsv, {
|
|
220
|
+
xsvType: "tsv",
|
|
221
|
+
schema: clonotypeTableSchema,
|
|
222
|
+
inferSchema: false
|
|
223
|
+
})
|
|
224
|
+
|
|
225
|
+
propsASchema := [{ column: "clonotypeKey", type: "String" }]
|
|
226
|
+
propsADf := outputProcessingWf.frame(propertiesAFile, {
|
|
227
|
+
xsvType: "tsv",
|
|
228
|
+
schema: propsASchema,
|
|
229
|
+
inferSchema: false
|
|
230
|
+
})
|
|
231
|
+
|
|
232
|
+
propsBSchema := [{ column: "clonotypeKey", type: "String" }]
|
|
233
|
+
propsBDf := outputProcessingWf.frame(propertiesBFile, {
|
|
234
|
+
xsvType: "tsv",
|
|
235
|
+
id: "props_b",
|
|
236
|
+
schema: propsBSchema,
|
|
237
|
+
inferSchema: false
|
|
238
|
+
})
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
clonotypeColumnNames := []
|
|
242
|
+
for cc in schemaPerClonotypeNoAggregates {
|
|
243
|
+
clonotypeColumnNames = append(clonotypeColumnNames, cc.column)
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
finalOutputColumns := ["scClonotypeKey", "clonotypeKey"] + clonotypeColumnNames
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
chainMappings := [
|
|
250
|
+
{ chainKeyCol: "clonotypeKeyA1", propsTable: "props_a", internalOutTable: "props_a1_joined", finalOutFile: "properties_a_primary.tsv" },
|
|
251
|
+
{ chainKeyCol: "clonotypeKeyA2", propsTable: "props_a", internalOutTable: "props_a2_joined", finalOutFile: "properties_a_secondary.tsv" },
|
|
252
|
+
{ chainKeyCol: "clonotypeKeyB1", propsTable: "props_b", internalOutTable: "props_b1_joined", finalOutFile: "properties_b_primary.tsv" },
|
|
253
|
+
{ chainKeyCol: "clonotypeKeyB2", propsTable: "props_b", internalOutTable: "props_b2_joined", finalOutFile: "properties_b_secondary.tsv" }
|
|
254
|
+
]
|
|
255
|
+
|
|
256
|
+
propsMapDfs := {
|
|
257
|
+
"props_a": propsADf,
|
|
258
|
+
"props_b": propsBDf
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
for mapping in chainMappings {
|
|
262
|
+
filteredClonotypesDf := mainClonotypesDf.filter(
|
|
263
|
+
pt.col(mapping.chainKeyCol).isNotNull()
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
filteredClonotypesWithKeyDf := filteredClonotypesDf.withColumns(
|
|
267
|
+
pt.col(mapping.chainKeyCol).alias("clonotypeKey")
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
leftDfForJoin := propsMapDfs[mapping.propsTable]
|
|
271
|
+
|
|
272
|
+
joinedDf := leftDfForJoin.join(filteredClonotypesWithKeyDf, {
|
|
273
|
+
how: "inner",
|
|
274
|
+
on: ["clonotypeKey"]
|
|
275
|
+
})
|
|
276
|
+
|
|
277
|
+
joinedDf.save(mapping.finalOutFile, {
|
|
278
|
+
columns: finalOutputColumns,
|
|
279
|
+
xsvType: "tsv"
|
|
280
|
+
})
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
outputProcessingRunResult := outputProcessingWf.run()
|
|
101
284
|
|
|
102
285
|
return {
|
|
103
286
|
// must have sampleId and scClonotypeKey columns
|
|
104
287
|
abundanceTsv: abundanceTsv,
|
|
105
|
-
|
|
288
|
+
|
|
289
|
+
// used for aggregates (i.e. sampleCount and clonotypeLabel)
|
|
106
290
|
clonotypeTsv: clonotypeTsv,
|
|
107
291
|
|
|
108
292
|
// must have scClonotypeKey columns
|
|
109
|
-
propertiesAPrimaryTsv:
|
|
110
|
-
propertiesASecondaryTsv:
|
|
111
|
-
propertiesBPrimaryTsv:
|
|
112
|
-
propertiesBSecondaryTsv:
|
|
293
|
+
propertiesAPrimaryTsv: outputProcessingRunResult.getFile(chainMappings[0].finalOutFile),
|
|
294
|
+
propertiesASecondaryTsv: outputProcessingRunResult.getFile(chainMappings[1].finalOutFile),
|
|
295
|
+
propertiesBPrimaryTsv: outputProcessingRunResult.getFile(chainMappings[2].finalOutFile),
|
|
296
|
+
propertiesBSecondaryTsv: outputProcessingRunResult.getFile(chainMappings[3].finalOutFile)
|
|
113
297
|
}
|
|
114
298
|
})
|
package/src/process.tpl.tengo
CHANGED
|
@@ -7,6 +7,7 @@ assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
|
7
7
|
pframes := import("@platforma-sdk/workflow-tengo:pframes")
|
|
8
8
|
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
9
9
|
maps := import("@platforma-sdk/workflow-tengo:maps")
|
|
10
|
+
sets := import("@platforma-sdk/workflow-tengo:sets")
|
|
10
11
|
|
|
11
12
|
calculateExportSpecs := import(":calculate-export-specs")
|
|
12
13
|
|
|
@@ -200,9 +201,9 @@ self.body(func(inputs) {
|
|
|
200
201
|
|
|
201
202
|
columnsSpecPerSample := exportSpecs.columnsSpecPerSample
|
|
202
203
|
columnsSpecPerSampleSc := exportSpecs.columnsSpecPerSampleSc
|
|
203
|
-
|
|
204
|
+
columnsSpecPerClonotypeNoAggregates := exportSpecs.columnsSpecPerClonotypeNoAggregates
|
|
205
|
+
columnsSpecPerClonotypeAggregates := exportSpecs.columnsSpecPerClonotypeAggregates
|
|
204
206
|
columnsSpecPerClonotypeSc := exportSpecs.columnsSpecPerClonotypeSc
|
|
205
|
-
// columnsSpec := exportSpecs.columnsSpec
|
|
206
207
|
|
|
207
208
|
clonotypeKeyColumns := exportSpecs.clonotypeKeyColumns
|
|
208
209
|
clonotypeKeyArgs := exportSpecs.clonotypeKeyArgs
|
|
@@ -214,8 +215,13 @@ self.body(func(inputs) {
|
|
|
214
215
|
|
|
215
216
|
exportArgs := exportSpecs.exportArgs
|
|
216
217
|
|
|
218
|
+
mainIsProductiveColumn := exportSpecs.mainIsProductiveColumn
|
|
219
|
+
mainIsProductiveArgs := exportSpecs.mainIsProductiveArgs
|
|
220
|
+
|
|
217
221
|
mainAbundanceColumnNormalized := exportSpecs.mainAbundanceColumnNormalized
|
|
218
222
|
mainAbundanceColumnUnnormalized := exportSpecs.mainAbundanceColumnUnnormalized
|
|
223
|
+
mainAbundanceColumnNormalizedArgs := exportSpecs.mainAbundanceColumnNormalizedArgs
|
|
224
|
+
mainAbundanceColumnUnnormalizedArgs := exportSpecs.mainAbundanceColumnUnnormalizedArgs
|
|
219
225
|
|
|
220
226
|
if is_undefined(axesByClonotypeKey) {
|
|
221
227
|
ll.panic("Absent clonotype key not supported")
|
|
@@ -284,6 +290,19 @@ self.body(func(inputs) {
|
|
|
284
290
|
|
|
285
291
|
perChainResults := {}
|
|
286
292
|
|
|
293
|
+
// mapping removing duplicates
|
|
294
|
+
columnsToSchema := func(columns) {
|
|
295
|
+
schema := []
|
|
296
|
+
columnsAdded := {}
|
|
297
|
+
for col in columns {
|
|
298
|
+
if !columnsAdded[col.column] {
|
|
299
|
+
schema += [ { column: col.column, type: col.spec.valueType } ]
|
|
300
|
+
columnsAdded[col.column] = true
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
return schema
|
|
304
|
+
}
|
|
305
|
+
|
|
287
306
|
for chain in chains {
|
|
288
307
|
chainInfo := chainInfos[chain]
|
|
289
308
|
ll.assert(!is_undefined(chainInfo), "chainInfo not found for chain %v", chain)
|
|
@@ -356,6 +375,14 @@ self.body(func(inputs) {
|
|
|
356
375
|
chains: chainInfo.mixcrFilter,
|
|
357
376
|
clonotypeKeyColumns: clonotypeKeyColumns,
|
|
358
377
|
clonotypeKeyArgs: clonotypeKeyArgs,
|
|
378
|
+
|
|
379
|
+
mainIsProductiveColumn: mainIsProductiveColumn,
|
|
380
|
+
mainIsProductiveArgs: mainIsProductiveArgs,
|
|
381
|
+
mainAbundanceColumnNormalized: mainAbundanceColumnNormalized,
|
|
382
|
+
mainAbundanceColumnUnnormalized: mainAbundanceColumnUnnormalized,
|
|
383
|
+
mainAbundanceColumnNormalizedArgs: mainAbundanceColumnNormalizedArgs,
|
|
384
|
+
mainAbundanceColumnUnnormalizedArgs: mainAbundanceColumnUnnormalizedArgs,
|
|
385
|
+
|
|
359
386
|
cellTagColumns: cellTagColumns,
|
|
360
387
|
exportArgs: exportArgs,
|
|
361
388
|
isLibraryFileGzipped: isLibraryFileGzipped
|
|
@@ -392,7 +419,7 @@ self.body(func(inputs) {
|
|
|
392
419
|
xsvType: "tsv",
|
|
393
420
|
settings: {
|
|
394
421
|
axes: axesByClonotypeKeyWithChain,
|
|
395
|
-
columns:
|
|
422
|
+
columns: columnsSpecPerClonotypeNoAggregates + columnsSpecPerClonotypeAggregates,
|
|
396
423
|
storageFormat: "Binary",
|
|
397
424
|
partitionKeyLength: 0
|
|
398
425
|
},
|
|
@@ -411,9 +438,9 @@ self.body(func(inputs) {
|
|
|
411
438
|
params: {
|
|
412
439
|
mainAbundanceColumnNormalized: mainAbundanceColumnNormalized,
|
|
413
440
|
mainAbundanceColumnUnnormalized: mainAbundanceColumnUnnormalized,
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
441
|
+
schemaPerClonotypeNoAggregates: columnsToSchema(columnsSpecPerClonotypeNoAggregates),
|
|
442
|
+
schemaPerClonotypeAggregates: columnsToSchema(columnsSpecPerClonotypeAggregates),
|
|
443
|
+
schemaPerSample: columnsToSchema(columnsSpecPerSample)
|
|
417
444
|
}
|
|
418
445
|
}
|
|
419
446
|
}
|
|
@@ -480,7 +507,7 @@ self.body(func(inputs) {
|
|
|
480
507
|
|
|
481
508
|
//removing columns from secondary chain except aaSeqCDR3
|
|
482
509
|
columnsSpecPerClonotypeSecondary := []
|
|
483
|
-
for col in
|
|
510
|
+
for col in columnsSpecPerClonotypeNoAggregates {
|
|
484
511
|
if col.column == "aaSeqCDR3" {
|
|
485
512
|
columnsSpecPerClonotypeSecondary += [ col ]
|
|
486
513
|
}
|
|
@@ -530,7 +557,7 @@ self.body(func(inputs) {
|
|
|
530
557
|
xsvType: "tsv",
|
|
531
558
|
settings: {
|
|
532
559
|
axes: axesByScClonotypeKeyWithReceptor,
|
|
533
|
-
columns: transformSpecs(isPrimary ?
|
|
560
|
+
columns: transformSpecs(isPrimary ? columnsSpecPerClonotypeNoAggregates : columnsSpecPerClonotypeSecondary, {
|
|
534
561
|
spec: {
|
|
535
562
|
domain: {
|
|
536
563
|
"pl7.app/vdj/scClonotypeChain": chainLetterU,
|
|
@@ -563,7 +590,12 @@ self.body(func(inputs) {
|
|
|
563
590
|
extra: {
|
|
564
591
|
byCellTagB: perChainResults[chainB].tsvForSingleCell.data,
|
|
565
592
|
propertiesA: perChainResults[chainA].clonotypeProperties.data,
|
|
566
|
-
propertiesB: perChainResults[chainB].clonotypeProperties.data
|
|
593
|
+
propertiesB: perChainResults[chainB].clonotypeProperties.data,
|
|
594
|
+
params: {
|
|
595
|
+
mainAbundanceColumn: mainAbundanceColumnUnnormalized,
|
|
596
|
+
mainIsProductiveColumn: mainIsProductiveColumn,
|
|
597
|
+
schemaPerClonotypeNoAggregates: columnsToSchema(columnsSpecPerClonotypeNoAggregates)
|
|
598
|
+
}
|
|
567
599
|
}
|
|
568
600
|
}
|
|
569
601
|
)
|