@platforma-open/milaboratories.top-antibodies.workflow 1.14.0 → 1.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,324 +0,0 @@
1
-
2
- slices := import("@platforma-sdk/workflow-tengo:slices")
3
- pt := import("@platforma-sdk/workflow-tengo:pt")
4
- ll := import("@platforma-sdk/workflow-tengo:ll")
5
- times := import("times")
6
- text := import("text")
7
-
8
-
9
- makeHeaderName := func(col, baseHeaderName, isSingleCell) {
10
- chainMapping := {
11
- "IG": { "A": "Heavy", "B": "Light" },
12
- "TCRAB": { "A": "TRA", "B": "TRB" },
13
- "TCRGD": { "A": "TRG", "B": "TRD" }
14
- }
15
-
16
- if isSingleCell {
17
- chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
18
- receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
19
- chainLabel := chainMapping[receptor][chain]
20
- return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
21
- } else {
22
-
23
- chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
24
- if chainFromDomain != undefined {
25
- return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
26
- }
27
- }
28
- return baseHeaderName
29
- }
30
-
31
-
32
- prepareClonotypeData := func(filters, rankingOrder, rankingOrderDefault, columns, datasetSpec) {
33
- structuredMap := {}
34
- axisRenames := {}
35
- filterMap := {}
36
- rankingMap := {}
37
- addedAxes := []
38
- addedCols := false
39
- linkerAxisSpec := {}
40
-
41
-
42
- if len(filters) > 0 {
43
- for i, filter in filters {
44
- if filter.value != undefined {
45
-
46
- col := columns.getColumn(filter.value.column)
47
- structuredMap["Filter_" + string(i)] = { spec: col.spec, data: col.data }
48
- addedCols = true
49
-
50
- filterMap["Filter_" + string(i)] = filter.filter
51
- filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
52
-
53
-
54
- colsSpec := columns.getSpec(filter.value.column)
55
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
56
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
57
- for na, ax in colsSpec.axesSpec {
58
- if ax.name != datasetSpec.axesSpec[1].name {
59
- axisAlias := "cluster_" + string(i) + string(na)
60
- axisRenames[ax.name] = axisAlias
61
- addedAxes = append(addedAxes, ax.name)
62
- }
63
- }
64
- }
65
- }
66
- }
67
- }
68
-
69
-
70
- validRanks := false
71
- if len(rankingOrder) > 0 {
72
- for i, rankCol in rankingOrder {
73
- if rankCol.value != undefined {
74
- validRanks = true
75
- col := columns.getColumn(rankCol.value.column)
76
- structuredMap["Col" + string(i)] = { spec: col.spec, data: col.data }
77
- addedCols = true
78
-
79
- rankingMap["Col" + string(i)] = rankCol.rankingOrder
80
-
81
-
82
- colsSpec := columns.getSpec(rankCol.value.column)
83
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
84
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
85
- for na, ax in colsSpec.axesSpec {
86
- if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
87
- axisAlias := "cluster_" + string(i) + string(na)
88
- axisRenames[ax.name] = axisAlias
89
- }
90
- }
91
- }
92
- }
93
- }
94
- }
95
-
96
- if !validRanks {
97
-
98
-
99
- if rankingOrderDefault.value != undefined {
100
- i := 0
101
- col := columns.getColumn(rankingOrderDefault.value.column)
102
- structuredMap["Col" + string(i)] = { spec: col.spec, data: col.data }
103
- addedCols = true
104
-
105
- rankingMap["Col" + string(i)] = rankingOrderDefault.rankingOrder
106
-
107
-
108
- colsSpec := columns.getSpec(rankingOrderDefault.value.column)
109
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
110
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
111
- for na, ax in colsSpec.axesSpec {
112
- if ax.name != datasetSpec.axesSpec[1].name {
113
- axisAlias := "cluster_" + string(i) + string(na)
114
- axisRenames[ax.name] = axisAlias
115
- }
116
- }
117
- }
118
- }
119
- }
120
-
121
-
122
- if len(columns.getColumns("linkers")) > 0 {
123
- for i, col in columns.getColumns("linkers") {
124
- if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
125
- structuredMap["linker." + string(i)] = { spec: col.spec, data: col.data }
126
- axisAlias := "cluster_" + string(i)
127
- axisRenames[col.spec.axesSpec[0].name] = axisAlias
128
- linkerAxisSpec[axisAlias] = col.spec.axesSpec[0]
129
- } else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
130
- structuredMap["linker." + string(i)] = { spec: col.spec, data: col.data }
131
- axisAlias := "cluster_" + string(i)
132
- axisRenames[col.spec.axesSpec[1].name] = axisAlias
133
- linkerAxisSpec[axisAlias] = col.spec.axesSpec[1]
134
- }
135
- addedCols = true
136
- }
137
- }
138
-
139
-
140
- if len(columns.getColumns("clusterSizes")) > 0 {
141
- for i, col in columns.getColumns("clusterSizes") {
142
- structuredMap["clusterSize." + string(i)] = { spec: col.spec, data: col.data }
143
- addedCols = true
144
-
145
- for axisIdx, axis in col.spec.axesSpec {
146
- if axis.name != datasetSpec.axesSpec[1].name {
147
- axisAlias := "clusterAxis_" + string(i) + "_" + string(axisIdx)
148
- axisRenames[axis.name] = axisAlias
149
- }
150
- }
151
- }
152
- }
153
-
154
- return {
155
- structuredMap: structuredMap,
156
- axisRenames: axisRenames,
157
- filterMap: filterMap,
158
- rankingMap: rankingMap,
159
- addedCols: addedCols,
160
- linkerAxisSpec: linkerAxisSpec
161
- }
162
- }
163
-
164
-
165
- prepareCdr3Data := func(columns, datasetSpec, isSingleCell) {
166
- cdr3SeqStructuredMap := {}
167
- cdr3SeqAxisRenames := {}
168
- cdr3SeqAxisRenames[datasetSpec.axesSpec[1].name] = "clonotypeKey"
169
-
170
-
171
- cdr3Sequences := columns.getColumns("cdr3Sequences")
172
-
173
- for col in cdr3Sequences {
174
- headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
175
- if isSingleCell {
176
- if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
177
- cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
178
- }
179
- } else {
180
- cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
181
- }
182
- }
183
-
184
-
185
- vGenes := columns.getColumns("VGenes")
186
-
187
- for col in vGenes {
188
- headerName := makeHeaderName(col, "vGene", isSingleCell)
189
- cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
190
- }
191
-
192
-
193
- jGenes := columns.getColumns("JGenes")
194
-
195
- for col in jGenes {
196
- headerName := makeHeaderName(col, "jGene", isSingleCell)
197
- cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
198
- }
199
-
200
- return {
201
- structuredMap: cdr3SeqStructuredMap,
202
- axisRenames: cdr3SeqAxisRenames
203
- }
204
- }
205
-
206
-
207
-
208
- filterClonotypes := func(structuredMap, axisRenames, filterMap, datasetSpec) {
209
-
210
- buildFilterPredicate := func(columnName, filterSpec) {
211
- filterType := filterSpec["type"]
212
- referenceValue := filterSpec["reference"]
213
- col := pt.col(columnName)
214
-
215
- if filterType == "number_greaterThan" {
216
- return col.gt(referenceValue)
217
- } else if filterType == "number_greaterThanOrEqualTo" {
218
- return col.ge(referenceValue)
219
- } else if filterType == "number_lessThan" {
220
- return col.lt(referenceValue)
221
- } else if filterType == "number_lessThanOrEqualTo" {
222
- return col.le(referenceValue)
223
- } else if filterType == "number_equals" {
224
- return col.eq(referenceValue)
225
- } else if filterType == "number_notEquals" {
226
- return col.neq(referenceValue)
227
- } else if filterType == "string_equals" {
228
- return col.eq(string(referenceValue))
229
- } else if filterType == "string_notEquals" {
230
- return col.neq(string(referenceValue))
231
- } else if filterType == "string_contains" {
232
- return col.strContains(string(referenceValue), {literal: true})
233
- } else if filterType == "string_doesNotContain" {
234
- return col.strContains(string(referenceValue), {literal: true}).not()
235
- }
236
- ll.panic("Unknown filter type: %s", filterType)
237
- return undefined
238
- }
239
-
240
-
241
- projection := []
242
-
243
- projection = append(projection, pt.axis(datasetSpec.axesSpec[1].name).alias("clonotypeKey"))
244
-
245
- for origAxis, aliasName in axisRenames {
246
- projection = append(projection, pt.axis(origAxis).alias(aliasName))
247
- }
248
-
249
- for colName, _ in structuredMap {
250
- projection = append(projection, pt.col(colName))
251
- }
252
-
253
-
254
- wfPt := pt.workflow().cacheInputs(24 * 60 * 60 * 1000)
255
-
256
-
257
- dfPt := wfPt.frame(pt.p.full(structuredMap)).select(projection...)
258
-
259
-
260
-
261
- filterPredicates := []
262
- for filterColName, filterSpec in filterMap {
263
- filterType := filterSpec["type"]
264
- valueType := filterSpec["valueType"]
265
-
266
-
267
- isValidFilter := false
268
- if valueType == "String" && text.has_prefix(filterType, "string_") {
269
- isValidFilter = true
270
- } else if valueType != "String" && text.has_prefix(filterType, "number_") {
271
- isValidFilter = true
272
- }
273
-
274
- if isValidFilter {
275
- predicate := buildFilterPredicate(filterColName, filterSpec)
276
- filterPredicates = append(filterPredicates, predicate)
277
- }
278
- }
279
-
280
- if len(filterPredicates) > 0 {
281
- dfPt = dfPt.filter(filterPredicates...)
282
- }
283
-
284
-
285
- dfPt = dfPt.withColumns(pt.lit(1).alias("top"))
286
-
287
-
288
- frameParams := {
289
- axes: [{
290
- column: "clonotypeKey",
291
- spec: datasetSpec.axesSpec[1]
292
- }],
293
- columns: [{
294
- column: "top",
295
- spec: {
296
- name: "pl7.app/vdj/sampling-column",
297
- valueType: "Int",
298
- domain: {},
299
- annotations: {
300
- "pl7.app/label": "Sampling column",
301
- "pl7.app/table/visibility": "optional",
302
- "pl7.app/isSubset": "true"
303
- }
304
- }
305
- }]
306
- }
307
-
308
- dfPt.save("filteredClonotypes.parquet")
309
- dfPt.saveFrameDirect("filteredClonotypes", frameParams)
310
- wfPtResult := wfPt.run()
311
-
312
- return {
313
- filteredParquet: wfPtResult.getFile("filteredClonotypes.parquet"),
314
- pframe: wfPtResult.getFrameDirect("filteredClonotypes")
315
- }
316
- }
317
-
318
- export ll.toStrict({
319
- makeHeaderName: makeHeaderName,
320
- prepareClonotypeData: prepareClonotypeData,
321
- prepareCdr3Data: prepareCdr3Data,
322
- filterClonotypes: filterClonotypes
323
- })
324
-
@@ -1,324 +0,0 @@
1
- // Data utility functions for clonotype filtering and processing
2
- slices := import("@platforma-sdk/workflow-tengo:slices")
3
- pt := import("@platforma-sdk/workflow-tengo:pt")
4
- ll := import("@platforma-sdk/workflow-tengo:ll")
5
- times := import("times")
6
- text := import("text")
7
-
8
- // Helper function to add chain information to the headers dynamically
9
- makeHeaderName := func(col, baseHeaderName, isSingleCell) {
10
- chainMapping := {
11
- "IG": { "A": "Heavy", "B": "Light" },
12
- "TCRAB": { "A": "TRA", "B": "TRB" },
13
- "TCRGD": { "A": "TRG", "B": "TRD" }
14
- }
15
-
16
- if isSingleCell {
17
- chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
18
- receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
19
- chainLabel := chainMapping[receptor][chain]
20
- return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
21
- } else {
22
- // For bulk, if chain info is available (e.g. IGH, IGK, IGL)
23
- chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
24
- if chainFromDomain != undefined {
25
- return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
26
- }
27
- }
28
- return baseHeaderName
29
- }
30
-
31
- // Prepare clonotype data: filters, ranking columns, linkers, cluster sizes
32
- prepareClonotypeData := func(filters, rankingOrder, rankingOrderDefault, columns, datasetSpec) {
33
- structuredMap := {}
34
- axisRenames := {}
35
- filterMap := {}
36
- rankingMap := {}
37
- addedAxes := []
38
- addedCols := false
39
- linkerAxisSpec := {}
40
-
41
- // Add Filters to table
42
- if len(filters) > 0 {
43
- for i, filter in filters {
44
- if filter.value != undefined {
45
- // Columns added here might also be in ranking list, so we add default IDs
46
- col := columns.getColumn(filter.value.column)
47
- structuredMap["Filter_" + string(i)] = { spec: col.spec, data: col.data }
48
- addedCols = true
49
- // Store reference value and filter type associated to this column
50
- filterMap["Filter_" + string(i)] = filter.filter
51
- filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
52
-
53
- // If column does not have main anchor axis we have to include theirs
54
- colsSpec := columns.getSpec(filter.value.column)
55
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
56
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
57
- for na, ax in colsSpec.axesSpec {
58
- if ax.name != datasetSpec.axesSpec[1].name {
59
- axisAlias := "cluster_" + string(i) + string(na)
60
- axisRenames[ax.name] = axisAlias
61
- addedAxes = append(addedAxes, ax.name)
62
- }
63
- }
64
- }
65
- }
66
- }
67
- }
68
-
69
- // Add ranking columns to table
70
- validRanks := false
71
- if len(rankingOrder) > 0 {
72
- for i, rankCol in rankingOrder {
73
- if rankCol.value != undefined {
74
- validRanks = true
75
- col := columns.getColumn(rankCol.value.column)
76
- structuredMap["Col" + string(i)] = { spec: col.spec, data: col.data }
77
- addedCols = true
78
- // Store ranking order for this column
79
- rankingMap["Col" + string(i)] = rankCol.rankingOrder
80
-
81
- // If column does not have main anchor axis we have to include theirs
82
- colsSpec := columns.getSpec(rankCol.value.column)
83
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
84
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
85
- for na, ax in colsSpec.axesSpec {
86
- if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
87
- axisAlias := "cluster_" + string(i) + string(na)
88
- axisRenames[ax.name] = axisAlias
89
- }
90
- }
91
- }
92
- }
93
- }
94
- }
95
- // If we didn't have any ranking column or all where not valid
96
- if !validRanks {
97
- // @TODO: this is a temporal patch for issue where rankingOrderDefault
98
- // are not defined by the time prerun works
99
- if rankingOrderDefault.value != undefined {
100
- i := 0
101
- col := columns.getColumn(rankingOrderDefault.value.column)
102
- structuredMap["Col" + string(i)] = { spec: col.spec, data: col.data }
103
- addedCols = true
104
- // Store default ranking order
105
- rankingMap["Col" + string(i)] = rankingOrderDefault.rankingOrder
106
-
107
- // If column does not have main anchor axis we have to include theirs
108
- colsSpec := columns.getSpec(rankingOrderDefault.value.column)
109
- axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
110
- if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
111
- for na, ax in colsSpec.axesSpec {
112
- if ax.name != datasetSpec.axesSpec[1].name {
113
- axisAlias := "cluster_" + string(i) + string(na)
114
- axisRenames[ax.name] = axisAlias
115
- }
116
- }
117
- }
118
- }
119
- }
120
-
121
- // Get linker columns if needed
122
- if len(columns.getColumns("linkers")) > 0 {
123
- for i, col in columns.getColumns("linkers") {
124
- if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
125
- structuredMap["linker." + string(i)] = { spec: col.spec, data: col.data }
126
- axisAlias := "cluster_" + string(i)
127
- axisRenames[col.spec.axesSpec[0].name] = axisAlias
128
- linkerAxisSpec[axisAlias] = col.spec.axesSpec[0]
129
- } else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
130
- structuredMap["linker." + string(i)] = { spec: col.spec, data: col.data }
131
- axisAlias := "cluster_" + string(i)
132
- axisRenames[col.spec.axesSpec[1].name] = axisAlias
133
- linkerAxisSpec[axisAlias] = col.spec.axesSpec[1]
134
- }
135
- addedCols = true
136
- }
137
- }
138
-
139
- // Add cluster size columns if available
140
- if len(columns.getColumns("clusterSizes")) > 0 {
141
- for i, col in columns.getColumns("clusterSizes") {
142
- structuredMap["clusterSize." + string(i)] = { spec: col.spec, data: col.data }
143
- addedCols = true
144
- // Add the cluster axis header
145
- for axisIdx, axis in col.spec.axesSpec {
146
- if axis.name != datasetSpec.axesSpec[1].name {
147
- axisAlias := "clusterAxis_" + string(i) + "_" + string(axisIdx)
148
- axisRenames[axis.name] = axisAlias
149
- }
150
- }
151
- }
152
- }
153
-
154
- return {
155
- structuredMap: structuredMap,
156
- axisRenames: axisRenames,
157
- filterMap: filterMap,
158
- rankingMap: rankingMap,
159
- addedCols: addedCols,
160
- linkerAxisSpec: linkerAxisSpec
161
- }
162
- }
163
-
164
- // Prepare CDR3 sequence data: CDR3 sequences, V genes, J genes
165
- prepareCdr3Data := func(columns, datasetSpec, isSingleCell) {
166
- cdr3SeqStructuredMap := {}
167
- cdr3SeqAxisRenames := {}
168
- cdr3SeqAxisRenames[datasetSpec.axesSpec[1].name] = "clonotypeKey"
169
-
170
- // Process CDR3 sequences
171
- cdr3Sequences := columns.getColumns("cdr3Sequences")
172
-
173
- for col in cdr3Sequences {
174
- headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
175
- if isSingleCell {
176
- if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
177
- cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
178
- }
179
- } else {
180
- cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
181
- }
182
- }
183
-
184
- // Process V genes
185
- vGenes := columns.getColumns("VGenes")
186
-
187
- for col in vGenes {
188
- headerName := makeHeaderName(col, "vGene", isSingleCell)
189
- cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
190
- }
191
-
192
- // Process J genes
193
- jGenes := columns.getColumns("JGenes")
194
-
195
- for col in jGenes {
196
- headerName := makeHeaderName(col, "jGene", isSingleCell)
197
- cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
198
- }
199
-
200
- return {
201
- structuredMap: cdr3SeqStructuredMap,
202
- axisRenames: cdr3SeqAxisRenames
203
- }
204
- }
205
-
206
- // Ptabler-based filtering implementation
207
- // Replicates the logic from filter.py
208
- filterClonotypes := func(structuredMap, axisRenames, filterMap, datasetSpec) {
209
- // Helper function to build filter predicate from filter spec
210
- buildFilterPredicate := func(columnName, filterSpec) {
211
- filterType := filterSpec["type"]
212
- referenceValue := filterSpec["reference"]
213
- col := pt.col(columnName)
214
-
215
- if filterType == "number_greaterThan" {
216
- return col.gt(referenceValue)
217
- } else if filterType == "number_greaterThanOrEqualTo" {
218
- return col.ge(referenceValue)
219
- } else if filterType == "number_lessThan" {
220
- return col.lt(referenceValue)
221
- } else if filterType == "number_lessThanOrEqualTo" {
222
- return col.le(referenceValue)
223
- } else if filterType == "number_equals" {
224
- return col.eq(referenceValue)
225
- } else if filterType == "number_notEquals" {
226
- return col.neq(referenceValue)
227
- } else if filterType == "string_equals" {
228
- return col.eq(string(referenceValue))
229
- } else if filterType == "string_notEquals" {
230
- return col.neq(string(referenceValue))
231
- } else if filterType == "string_contains" {
232
- return col.strContains(string(referenceValue), {literal: true})
233
- } else if filterType == "string_doesNotContain" {
234
- return col.strContains(string(referenceValue), {literal: true}).not()
235
- }
236
- ll.panic("Unknown filter type: %s", filterType)
237
- return undefined
238
- }
239
-
240
- // Build projection with axis renames and column selections
241
- projection := []
242
- // Add main clonotypeKey axis first
243
- projection = append(projection, pt.axis(datasetSpec.axesSpec[1].name).alias("clonotypeKey"))
244
- // Add other renamed axes
245
- for origAxis, aliasName in axisRenames {
246
- projection = append(projection, pt.axis(origAxis).alias(aliasName))
247
- }
248
- // Add all columns
249
- for colName, _ in structuredMap {
250
- projection = append(projection, pt.col(colName))
251
- }
252
-
253
- // Start ptabler workflow
254
- wfPt := pt.workflow().cacheInputs(24 * 60 * 60 * 1000)
255
-
256
- // Create initial frame with axis renames
257
- dfPt := wfPt.frame(pt.p.full(structuredMap)).select(projection...)
258
-
259
- // Step 1: Apply filters from filterMap (matches filter.py lines 123-164)
260
- // Only apply filters if the data type matches the filter type
261
- filterPredicates := []
262
- for filterColName, filterSpec in filterMap {
263
- filterType := filterSpec["type"]
264
- valueType := filterSpec["valueType"]
265
-
266
- // Validate filter type matches data type (same logic as filter.py)
267
- isValidFilter := false
268
- if valueType == "String" && text.has_prefix(filterType, "string_") {
269
- isValidFilter = true
270
- } else if valueType != "String" && text.has_prefix(filterType, "number_") {
271
- isValidFilter = true
272
- }
273
-
274
- if isValidFilter {
275
- predicate := buildFilterPredicate(filterColName, filterSpec)
276
- filterPredicates = append(filterPredicates, predicate)
277
- }
278
- }
279
-
280
- if len(filterPredicates) > 0 {
281
- dfPt = dfPt.filter(filterPredicates...)
282
- }
283
-
284
- // Step 2: Add "top" column with value 1 (matches filter.py line 211)
285
- dfPt = dfPt.withColumns(pt.lit(1).alias("top"))
286
-
287
- // Save both CSV and pframe
288
- frameParams := {
289
- axes: [{
290
- column: "clonotypeKey",
291
- spec: datasetSpec.axesSpec[1]
292
- }],
293
- columns: [{
294
- column: "top",
295
- spec: {
296
- name: "pl7.app/vdj/sampling-column",
297
- valueType: "Int",
298
- domain: {},
299
- annotations: {
300
- "pl7.app/label": "Sampling column",
301
- "pl7.app/table/visibility": "optional",
302
- "pl7.app/isSubset": "true"
303
- }
304
- }
305
- }]
306
- }
307
-
308
- dfPt.save("filteredClonotypes.parquet")
309
- dfPt.saveFrameDirect("filteredClonotypes", frameParams)
310
- wfPtResult := wfPt.run()
311
-
312
- return {
313
- filteredParquet: wfPtResult.getFile("filteredClonotypes.parquet"),
314
- pframe: wfPtResult.getFrameDirect("filteredClonotypes")
315
- }
316
- }
317
-
318
- export ll.toStrict({
319
- makeHeaderName: makeHeaderName,
320
- prepareClonotypeData: prepareClonotypeData,
321
- prepareCdr3Data: prepareCdr3Data,
322
- filterClonotypes: filterClonotypes
323
- })
324
-