@platforma-open/milaboratories.top-antibodies.workflow 1.13.2 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,324 @@
1
+ // Data utility functions for clonotype filtering and processing
2
+ slices := import("@platforma-sdk/workflow-tengo:slices")
3
+ pt := import("@platforma-sdk/workflow-tengo:pt")
4
+ ll := import("@platforma-sdk/workflow-tengo:ll")
5
+ times := import("times")
6
+ text := import("text")
7
+
8
+ // Helper function to add chain information to the headers dynamically
9
+ makeHeaderName := func(col, baseHeaderName, isSingleCell) {
10
+ chainMapping := {
11
+ "IG": { "A": "Heavy", "B": "Light" },
12
+ "TCRAB": { "A": "TRA", "B": "TRB" },
13
+ "TCRGD": { "A": "TRG", "B": "TRD" }
14
+ }
15
+
16
+ if isSingleCell {
17
+ chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
18
+ receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
19
+ chainLabel := chainMapping[receptor][chain]
20
+ return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
21
+ } else {
22
+ // For bulk, if chain info is available (e.g. IGH, IGK, IGL)
23
+ chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
24
+ if chainFromDomain != undefined {
25
+ return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
26
+ }
27
+ }
28
+ return baseHeaderName
29
+ }
30
+
31
+ // Prepare clonotype data: filters, ranking columns, linkers, cluster sizes
32
+ prepareClonotypeData := func(filters, rankingOrder, rankingOrderDefault, columns, datasetSpec) {
33
+ structuredMap := {}
34
+ axisRenames := {}
35
+ filterMap := {}
36
+ rankingMap := {}
37
+ addedAxes := []
38
+ addedCols := false
39
+ linkerAxisSpec := {}
40
+
41
+ // Add Filters to table
42
+ if len(filters) > 0 {
43
+ for i, filter in filters {
44
+ if filter.value != undefined {
45
+ // Columns added here might also be in ranking list, so we add default IDs
46
+ col := columns.getColumn(filter.value.column)
47
+ structuredMap["Filter_" + string(i)] = { spec: col.spec, data: col.data }
48
+ addedCols = true
49
+ // Store reference value and filter type associated to this column
50
+ filterMap["Filter_" + string(i)] = filter.filter
51
+ filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
52
+
53
+ // If column does not have main anchor axis we have to include theirs
54
+ colsSpec := columns.getSpec(filter.value.column)
55
+ axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
56
+ if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
57
+ for na, ax in colsSpec.axesSpec {
58
+ if ax.name != datasetSpec.axesSpec[1].name {
59
+ axisAlias := "cluster_" + string(i) + string(na)
60
+ axisRenames[ax.name] = axisAlias
61
+ addedAxes = append(addedAxes, ax.name)
62
+ }
63
+ }
64
+ }
65
+ }
66
+ }
67
+ }
68
+
69
+ // Add ranking columns to table
70
+ validRanks := false
71
+ if len(rankingOrder) > 0 {
72
+ for i, rankCol in rankingOrder {
73
+ if rankCol.value != undefined {
74
+ validRanks = true
75
+ col := columns.getColumn(rankCol.value.column)
76
+ structuredMap["Col" + string(i)] = { spec: col.spec, data: col.data }
77
+ addedCols = true
78
+ // Store ranking order for this column
79
+ rankingMap["Col" + string(i)] = rankCol.rankingOrder
80
+
81
+ // If column does not have main anchor axis we have to include theirs
82
+ colsSpec := columns.getSpec(rankCol.value.column)
83
+ axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
84
+ if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
85
+ for na, ax in colsSpec.axesSpec {
86
+ if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
87
+ axisAlias := "cluster_" + string(i) + string(na)
88
+ axisRenames[ax.name] = axisAlias
89
+ }
90
+ }
91
+ }
92
+ }
93
+ }
94
+ }
95
+ // If we didn't have any ranking column or all where not valid
96
+ if !validRanks {
97
+ // @TODO: this is a temporal patch for issue where rankingOrderDefault
98
+ // are not defined by the time prerun works
99
+ if rankingOrderDefault.value != undefined {
100
+ i := 0
101
+ col := columns.getColumn(rankingOrderDefault.value.column)
102
+ structuredMap["Col" + string(i)] = { spec: col.spec, data: col.data }
103
+ addedCols = true
104
+ // Store default ranking order
105
+ rankingMap["Col" + string(i)] = rankingOrderDefault.rankingOrder
106
+
107
+ // If column does not have main anchor axis we have to include theirs
108
+ colsSpec := columns.getSpec(rankingOrderDefault.value.column)
109
+ axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
110
+ if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
111
+ for na, ax in colsSpec.axesSpec {
112
+ if ax.name != datasetSpec.axesSpec[1].name {
113
+ axisAlias := "cluster_" + string(i) + string(na)
114
+ axisRenames[ax.name] = axisAlias
115
+ }
116
+ }
117
+ }
118
+ }
119
+ }
120
+
121
+ // Get linker columns if needed
122
+ if len(columns.getColumns("linkers")) > 0 {
123
+ for i, col in columns.getColumns("linkers") {
124
+ if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
125
+ structuredMap["linker." + string(i)] = { spec: col.spec, data: col.data }
126
+ axisAlias := "cluster_" + string(i)
127
+ axisRenames[col.spec.axesSpec[0].name] = axisAlias
128
+ linkerAxisSpec[axisAlias] = col.spec.axesSpec[0]
129
+ } else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
130
+ structuredMap["linker." + string(i)] = { spec: col.spec, data: col.data }
131
+ axisAlias := "cluster_" + string(i)
132
+ axisRenames[col.spec.axesSpec[1].name] = axisAlias
133
+ linkerAxisSpec[axisAlias] = col.spec.axesSpec[1]
134
+ }
135
+ addedCols = true
136
+ }
137
+ }
138
+
139
+ // Add cluster size columns if available
140
+ if len(columns.getColumns("clusterSizes")) > 0 {
141
+ for i, col in columns.getColumns("clusterSizes") {
142
+ structuredMap["clusterSize." + string(i)] = { spec: col.spec, data: col.data }
143
+ addedCols = true
144
+ // Add the cluster axis header
145
+ for axisIdx, axis in col.spec.axesSpec {
146
+ if axis.name != datasetSpec.axesSpec[1].name {
147
+ axisAlias := "clusterAxis_" + string(i) + "_" + string(axisIdx)
148
+ axisRenames[axis.name] = axisAlias
149
+ }
150
+ }
151
+ }
152
+ }
153
+
154
+ return {
155
+ structuredMap: structuredMap,
156
+ axisRenames: axisRenames,
157
+ filterMap: filterMap,
158
+ rankingMap: rankingMap,
159
+ addedCols: addedCols,
160
+ linkerAxisSpec: linkerAxisSpec
161
+ }
162
+ }
163
+
164
+ // Prepare CDR3 sequence data: CDR3 sequences, V genes, J genes
165
+ prepareCdr3Data := func(columns, datasetSpec, isSingleCell) {
166
+ cdr3SeqStructuredMap := {}
167
+ cdr3SeqAxisRenames := {}
168
+ cdr3SeqAxisRenames[datasetSpec.axesSpec[1].name] = "clonotypeKey"
169
+
170
+ // Process CDR3 sequences
171
+ cdr3Sequences := columns.getColumns("cdr3Sequences")
172
+
173
+ for col in cdr3Sequences {
174
+ headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
175
+ if isSingleCell {
176
+ if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
177
+ cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
178
+ }
179
+ } else {
180
+ cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
181
+ }
182
+ }
183
+
184
+ // Process V genes
185
+ vGenes := columns.getColumns("VGenes")
186
+
187
+ for col in vGenes {
188
+ headerName := makeHeaderName(col, "vGene", isSingleCell)
189
+ cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
190
+ }
191
+
192
+ // Process J genes
193
+ jGenes := columns.getColumns("JGenes")
194
+
195
+ for col in jGenes {
196
+ headerName := makeHeaderName(col, "jGene", isSingleCell)
197
+ cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
198
+ }
199
+
200
+ return {
201
+ structuredMap: cdr3SeqStructuredMap,
202
+ axisRenames: cdr3SeqAxisRenames
203
+ }
204
+ }
205
+
206
+ // Ptabler-based filtering implementation
207
+ // Replicates the logic from filter.py
208
+ filterClonotypes := func(structuredMap, axisRenames, filterMap, datasetSpec) {
209
+ // Helper function to build filter predicate from filter spec
210
+ buildFilterPredicate := func(columnName, filterSpec) {
211
+ filterType := filterSpec["type"]
212
+ referenceValue := filterSpec["reference"]
213
+ col := pt.col(columnName)
214
+
215
+ if filterType == "number_greaterThan" {
216
+ return col.gt(referenceValue)
217
+ } else if filterType == "number_greaterThanOrEqualTo" {
218
+ return col.ge(referenceValue)
219
+ } else if filterType == "number_lessThan" {
220
+ return col.lt(referenceValue)
221
+ } else if filterType == "number_lessThanOrEqualTo" {
222
+ return col.le(referenceValue)
223
+ } else if filterType == "number_equals" {
224
+ return col.eq(referenceValue)
225
+ } else if filterType == "number_notEquals" {
226
+ return col.neq(referenceValue)
227
+ } else if filterType == "string_equals" {
228
+ return col.eq(string(referenceValue))
229
+ } else if filterType == "string_notEquals" {
230
+ return col.neq(string(referenceValue))
231
+ } else if filterType == "string_contains" {
232
+ return col.strContains(string(referenceValue), {literal: true})
233
+ } else if filterType == "string_doesNotContain" {
234
+ return col.strContains(string(referenceValue), {literal: true}).not()
235
+ }
236
+ ll.panic("Unknown filter type: %s", filterType)
237
+ return undefined
238
+ }
239
+
240
+ // Build projection with axis renames and column selections
241
+ projection := []
242
+ // Add main clonotypeKey axis first
243
+ projection = append(projection, pt.axis(datasetSpec.axesSpec[1].name).alias("clonotypeKey"))
244
+ // Add other renamed axes
245
+ for origAxis, aliasName in axisRenames {
246
+ projection = append(projection, pt.axis(origAxis).alias(aliasName))
247
+ }
248
+ // Add all columns
249
+ for colName, _ in structuredMap {
250
+ projection = append(projection, pt.col(colName))
251
+ }
252
+
253
+ // Start ptabler workflow
254
+ wfPt := pt.workflow().cacheInputs(24 * 60 * 60 * 1000)
255
+
256
+ // Create initial frame with axis renames
257
+ dfPt := wfPt.frame(pt.p.full(structuredMap)).select(projection...)
258
+
259
+ // Step 1: Apply filters from filterMap (matches filter.py lines 123-164)
260
+ // Only apply filters if the data type matches the filter type
261
+ filterPredicates := []
262
+ for filterColName, filterSpec in filterMap {
263
+ filterType := filterSpec["type"]
264
+ valueType := filterSpec["valueType"]
265
+
266
+ // Validate filter type matches data type (same logic as filter.py)
267
+ isValidFilter := false
268
+ if valueType == "String" && text.has_prefix(filterType, "string_") {
269
+ isValidFilter = true
270
+ } else if valueType != "String" && text.has_prefix(filterType, "number_") {
271
+ isValidFilter = true
272
+ }
273
+
274
+ if isValidFilter {
275
+ predicate := buildFilterPredicate(filterColName, filterSpec)
276
+ filterPredicates = append(filterPredicates, predicate)
277
+ }
278
+ }
279
+
280
+ if len(filterPredicates) > 0 {
281
+ dfPt = dfPt.filter(filterPredicates...)
282
+ }
283
+
284
+ // Step 2: Add "top" column with value 1 (matches filter.py line 211)
285
+ dfPt = dfPt.withColumns(pt.lit(1).alias("top"))
286
+
287
+ // Save both CSV and pframe
288
+ frameParams := {
289
+ axes: [{
290
+ column: "clonotypeKey",
291
+ spec: datasetSpec.axesSpec[1]
292
+ }],
293
+ columns: [{
294
+ column: "top",
295
+ spec: {
296
+ name: "pl7.app/vdj/sampling-column",
297
+ valueType: "Int",
298
+ domain: {},
299
+ annotations: {
300
+ "pl7.app/label": "Sampling column",
301
+ "pl7.app/table/visibility": "optional",
302
+ "pl7.app/isSubset": "true"
303
+ }
304
+ }
305
+ }]
306
+ }
307
+
308
+ dfPt.save("filteredClonotypes.parquet")
309
+ dfPt.saveFrameDirect("filteredClonotypes", frameParams)
310
+ wfPtResult := wfPt.run()
311
+
312
+ return {
313
+ filteredParquet: wfPtResult.getFile("filteredClonotypes.parquet"),
314
+ pframe: wfPtResult.getFrameDirect("filteredClonotypes")
315
+ }
316
+ }
317
+
318
+ export ll.toStrict({
319
+ makeHeaderName: makeHeaderName,
320
+ prepareClonotypeData: prepareClonotypeData,
321
+ prepareCdr3Data: prepareCdr3Data,
322
+ filterClonotypes: filterClonotypes
323
+ })
324
+
@@ -19,7 +19,7 @@ getColumns := func(datasetSpec, featureName, bulkChain) {
19
19
  annotations: {
20
20
  "pl7.app/label": "KABAT sequence " + featureName + " Heavy",
21
21
  "pl7.app/table/orderPriority": "10",
22
- "pl7.app/table/visibility": "default"
22
+ "pl7.app/table/visibility": "default"
23
23
  }
24
24
  }
25
25
  },
@@ -34,7 +34,7 @@ getColumns := func(datasetSpec, featureName, bulkChain) {
34
34
  annotations: {
35
35
  "pl7.app/label": "KABAT positions " + featureName + " Heavy",
36
36
  "pl7.app/table/orderPriority": "9",
37
- "pl7.app/table/visibility": "optional"
37
+ "pl7.app/table/visibility": "optional"
38
38
  }
39
39
  }
40
40
  }
@@ -53,7 +53,7 @@ getColumns := func(datasetSpec, featureName, bulkChain) {
53
53
  annotations: {
54
54
  "pl7.app/label": "KABAT sequence " + featureName + " Light",
55
55
  "pl7.app/table/orderPriority": "8",
56
- "pl7.app/table/visibility": "default"
56
+ "pl7.app/table/visibility": "default"
57
57
  }
58
58
  }
59
59
  },
@@ -68,7 +68,7 @@ getColumns := func(datasetSpec, featureName, bulkChain) {
68
68
  annotations: {
69
69
  "pl7.app/label": "KABAT positions " + featureName + " Light",
70
70
  "pl7.app/table/orderPriority": "7",
71
- "pl7.app/table/visibility": "optional"
71
+ "pl7.app/table/visibility": "optional"
72
72
  }
73
73
  }
74
74
  }
@@ -128,4 +128,3 @@ export ll.toStrict({
128
128
  getColumns: getColumns
129
129
  })
130
130
 
131
-
@@ -57,3 +57,4 @@ getColumns := func() {
57
57
  export ll.toStrict({
58
58
  getColumns: getColumns
59
59
  })
60
+
@@ -52,3 +52,4 @@ getColumns := func() {
52
52
  export ll.toStrict({
53
53
  getColumns: getColumns
54
54
  })
55
+
@@ -48,3 +48,4 @@ getColumns := func(datasetSpec, addRanking) {
48
48
  export ll.toStrict({
49
49
  getColumns: getColumns
50
50
  })
51
+