@platforma-open/milaboratories.top-antibodies.workflow 1.13.2 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +11 -11
- package/CHANGELOG.md +15 -0
- package/dist/index.cjs +0 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +0 -1
- package/dist/tengo/lib/libs.data-utils.lib.tengo +324 -0
- package/dist/tengo/lib/{pf-kabat-conv.lib.tengo → libs.pf-kabat-conv.lib.tengo} +4 -5
- package/dist/tengo/lib/{pf-spectratype-conv.lib.tengo → libs.pf-spectratype-conv.lib.tengo} +1 -0
- package/{src/pf-vj-usage-conv.lib.tengo → dist/tengo/lib/libs.pf-vj-usage-conv.lib.tengo} +1 -0
- package/dist/tengo/lib/{sampled-cols-conv.lib.tengo → libs.sampled-cols-conv.lib.tengo} +1 -0
- package/dist/tengo/tpl/assembling-fasta.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/prerun.plj.gz +0 -0
- package/package.json +9 -9
- package/src/assembling-fasta.tpl.tengo +4 -4
- package/src/libs/data-utils.lib.tengo +324 -0
- package/src/{pf-kabat-conv.lib.tengo → libs/pf-kabat-conv.lib.tengo} +4 -5
- package/src/{pf-spectratype-conv.lib.tengo → libs/pf-spectratype-conv.lib.tengo} +1 -0
- package/{dist/tengo/lib → src/libs}/pf-vj-usage-conv.lib.tengo +1 -0
- package/src/{sampled-cols-conv.lib.tengo → libs/sampled-cols-conv.lib.tengo} +1 -0
- package/src/prerun.tpl.tengo +71 -213
- package/dist/tengo/tpl/filter-and-sample.plj.gz +0 -0
- package/src/filter-and-sample.tpl.tengo +0 -81
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
// Data utility functions for clonotype filtering and processing
|
|
2
|
+
slices := import("@platforma-sdk/workflow-tengo:slices")
|
|
3
|
+
pt := import("@platforma-sdk/workflow-tengo:pt")
|
|
4
|
+
ll := import("@platforma-sdk/workflow-tengo:ll")
|
|
5
|
+
times := import("times")
|
|
6
|
+
text := import("text")
|
|
7
|
+
|
|
8
|
+
// Helper function to add chain information to the headers dynamically
|
|
9
|
+
makeHeaderName := func(col, baseHeaderName, isSingleCell) {
|
|
10
|
+
chainMapping := {
|
|
11
|
+
"IG": { "A": "Heavy", "B": "Light" },
|
|
12
|
+
"TCRAB": { "A": "TRA", "B": "TRB" },
|
|
13
|
+
"TCRGD": { "A": "TRG", "B": "TRD" }
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
if isSingleCell {
|
|
17
|
+
chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
|
|
18
|
+
receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
|
|
19
|
+
chainLabel := chainMapping[receptor][chain]
|
|
20
|
+
return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
|
|
21
|
+
} else {
|
|
22
|
+
// For bulk, if chain info is available (e.g. IGH, IGK, IGL)
|
|
23
|
+
chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
|
|
24
|
+
if chainFromDomain != undefined {
|
|
25
|
+
return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return baseHeaderName
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Prepare clonotype data: filters, ranking columns, linkers, cluster sizes
|
|
32
|
+
prepareClonotypeData := func(filters, rankingOrder, rankingOrderDefault, columns, datasetSpec) {
|
|
33
|
+
structuredMap := {}
|
|
34
|
+
axisRenames := {}
|
|
35
|
+
filterMap := {}
|
|
36
|
+
rankingMap := {}
|
|
37
|
+
addedAxes := []
|
|
38
|
+
addedCols := false
|
|
39
|
+
linkerAxisSpec := {}
|
|
40
|
+
|
|
41
|
+
// Add Filters to table
|
|
42
|
+
if len(filters) > 0 {
|
|
43
|
+
for i, filter in filters {
|
|
44
|
+
if filter.value != undefined {
|
|
45
|
+
// Columns added here might also be in ranking list, so we add default IDs
|
|
46
|
+
col := columns.getColumn(filter.value.column)
|
|
47
|
+
structuredMap["Filter_" + string(i)] = { spec: col.spec, data: col.data }
|
|
48
|
+
addedCols = true
|
|
49
|
+
// Store reference value and filter type associated to this column
|
|
50
|
+
filterMap["Filter_" + string(i)] = filter.filter
|
|
51
|
+
filterMap["Filter_" + string(i)]["valueType"] = columns.getSpec(filter.value.column).valueType
|
|
52
|
+
|
|
53
|
+
// If column does not have main anchor axis we have to include theirs
|
|
54
|
+
colsSpec := columns.getSpec(filter.value.column)
|
|
55
|
+
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
56
|
+
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
57
|
+
for na, ax in colsSpec.axesSpec {
|
|
58
|
+
if ax.name != datasetSpec.axesSpec[1].name {
|
|
59
|
+
axisAlias := "cluster_" + string(i) + string(na)
|
|
60
|
+
axisRenames[ax.name] = axisAlias
|
|
61
|
+
addedAxes = append(addedAxes, ax.name)
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Add ranking columns to table
|
|
70
|
+
validRanks := false
|
|
71
|
+
if len(rankingOrder) > 0 {
|
|
72
|
+
for i, rankCol in rankingOrder {
|
|
73
|
+
if rankCol.value != undefined {
|
|
74
|
+
validRanks = true
|
|
75
|
+
col := columns.getColumn(rankCol.value.column)
|
|
76
|
+
structuredMap["Col" + string(i)] = { spec: col.spec, data: col.data }
|
|
77
|
+
addedCols = true
|
|
78
|
+
// Store ranking order for this column
|
|
79
|
+
rankingMap["Col" + string(i)] = rankCol.rankingOrder
|
|
80
|
+
|
|
81
|
+
// If column does not have main anchor axis we have to include theirs
|
|
82
|
+
colsSpec := columns.getSpec(rankCol.value.column)
|
|
83
|
+
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
84
|
+
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
85
|
+
for na, ax in colsSpec.axesSpec {
|
|
86
|
+
if ax.name != datasetSpec.axesSpec[1].name && !slices.hasElement(addedAxes, ax.name) {
|
|
87
|
+
axisAlias := "cluster_" + string(i) + string(na)
|
|
88
|
+
axisRenames[ax.name] = axisAlias
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
// If we didn't have any ranking column or all where not valid
|
|
96
|
+
if !validRanks {
|
|
97
|
+
// @TODO: this is a temporal patch for issue where rankingOrderDefault
|
|
98
|
+
// are not defined by the time prerun works
|
|
99
|
+
if rankingOrderDefault.value != undefined {
|
|
100
|
+
i := 0
|
|
101
|
+
col := columns.getColumn(rankingOrderDefault.value.column)
|
|
102
|
+
structuredMap["Col" + string(i)] = { spec: col.spec, data: col.data }
|
|
103
|
+
addedCols = true
|
|
104
|
+
// Store default ranking order
|
|
105
|
+
rankingMap["Col" + string(i)] = rankingOrderDefault.rankingOrder
|
|
106
|
+
|
|
107
|
+
// If column does not have main anchor axis we have to include theirs
|
|
108
|
+
colsSpec := columns.getSpec(rankingOrderDefault.value.column)
|
|
109
|
+
axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
|
|
110
|
+
if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
|
|
111
|
+
for na, ax in colsSpec.axesSpec {
|
|
112
|
+
if ax.name != datasetSpec.axesSpec[1].name {
|
|
113
|
+
axisAlias := "cluster_" + string(i) + string(na)
|
|
114
|
+
axisRenames[ax.name] = axisAlias
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Get linker columns if needed
|
|
122
|
+
if len(columns.getColumns("linkers")) > 0 {
|
|
123
|
+
for i, col in columns.getColumns("linkers") {
|
|
124
|
+
if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
|
|
125
|
+
structuredMap["linker." + string(i)] = { spec: col.spec, data: col.data }
|
|
126
|
+
axisAlias := "cluster_" + string(i)
|
|
127
|
+
axisRenames[col.spec.axesSpec[0].name] = axisAlias
|
|
128
|
+
linkerAxisSpec[axisAlias] = col.spec.axesSpec[0]
|
|
129
|
+
} else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
|
|
130
|
+
structuredMap["linker." + string(i)] = { spec: col.spec, data: col.data }
|
|
131
|
+
axisAlias := "cluster_" + string(i)
|
|
132
|
+
axisRenames[col.spec.axesSpec[1].name] = axisAlias
|
|
133
|
+
linkerAxisSpec[axisAlias] = col.spec.axesSpec[1]
|
|
134
|
+
}
|
|
135
|
+
addedCols = true
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Add cluster size columns if available
|
|
140
|
+
if len(columns.getColumns("clusterSizes")) > 0 {
|
|
141
|
+
for i, col in columns.getColumns("clusterSizes") {
|
|
142
|
+
structuredMap["clusterSize." + string(i)] = { spec: col.spec, data: col.data }
|
|
143
|
+
addedCols = true
|
|
144
|
+
// Add the cluster axis header
|
|
145
|
+
for axisIdx, axis in col.spec.axesSpec {
|
|
146
|
+
if axis.name != datasetSpec.axesSpec[1].name {
|
|
147
|
+
axisAlias := "clusterAxis_" + string(i) + "_" + string(axisIdx)
|
|
148
|
+
axisRenames[axis.name] = axisAlias
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return {
|
|
155
|
+
structuredMap: structuredMap,
|
|
156
|
+
axisRenames: axisRenames,
|
|
157
|
+
filterMap: filterMap,
|
|
158
|
+
rankingMap: rankingMap,
|
|
159
|
+
addedCols: addedCols,
|
|
160
|
+
linkerAxisSpec: linkerAxisSpec
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Prepare CDR3 sequence data: CDR3 sequences, V genes, J genes
|
|
165
|
+
prepareCdr3Data := func(columns, datasetSpec, isSingleCell) {
|
|
166
|
+
cdr3SeqStructuredMap := {}
|
|
167
|
+
cdr3SeqAxisRenames := {}
|
|
168
|
+
cdr3SeqAxisRenames[datasetSpec.axesSpec[1].name] = "clonotypeKey"
|
|
169
|
+
|
|
170
|
+
// Process CDR3 sequences
|
|
171
|
+
cdr3Sequences := columns.getColumns("cdr3Sequences")
|
|
172
|
+
|
|
173
|
+
for col in cdr3Sequences {
|
|
174
|
+
headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
|
|
175
|
+
if isSingleCell {
|
|
176
|
+
if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
|
|
177
|
+
cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
|
|
178
|
+
}
|
|
179
|
+
} else {
|
|
180
|
+
cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Process V genes
|
|
185
|
+
vGenes := columns.getColumns("VGenes")
|
|
186
|
+
|
|
187
|
+
for col in vGenes {
|
|
188
|
+
headerName := makeHeaderName(col, "vGene", isSingleCell)
|
|
189
|
+
cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Process J genes
|
|
193
|
+
jGenes := columns.getColumns("JGenes")
|
|
194
|
+
|
|
195
|
+
for col in jGenes {
|
|
196
|
+
headerName := makeHeaderName(col, "jGene", isSingleCell)
|
|
197
|
+
cdr3SeqStructuredMap[headerName] = { spec: col.spec, data: col.data }
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
structuredMap: cdr3SeqStructuredMap,
|
|
202
|
+
axisRenames: cdr3SeqAxisRenames
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Ptabler-based filtering implementation
|
|
207
|
+
// Replicates the logic from filter.py
|
|
208
|
+
filterClonotypes := func(structuredMap, axisRenames, filterMap, datasetSpec) {
|
|
209
|
+
// Helper function to build filter predicate from filter spec
|
|
210
|
+
buildFilterPredicate := func(columnName, filterSpec) {
|
|
211
|
+
filterType := filterSpec["type"]
|
|
212
|
+
referenceValue := filterSpec["reference"]
|
|
213
|
+
col := pt.col(columnName)
|
|
214
|
+
|
|
215
|
+
if filterType == "number_greaterThan" {
|
|
216
|
+
return col.gt(referenceValue)
|
|
217
|
+
} else if filterType == "number_greaterThanOrEqualTo" {
|
|
218
|
+
return col.ge(referenceValue)
|
|
219
|
+
} else if filterType == "number_lessThan" {
|
|
220
|
+
return col.lt(referenceValue)
|
|
221
|
+
} else if filterType == "number_lessThanOrEqualTo" {
|
|
222
|
+
return col.le(referenceValue)
|
|
223
|
+
} else if filterType == "number_equals" {
|
|
224
|
+
return col.eq(referenceValue)
|
|
225
|
+
} else if filterType == "number_notEquals" {
|
|
226
|
+
return col.neq(referenceValue)
|
|
227
|
+
} else if filterType == "string_equals" {
|
|
228
|
+
return col.eq(string(referenceValue))
|
|
229
|
+
} else if filterType == "string_notEquals" {
|
|
230
|
+
return col.neq(string(referenceValue))
|
|
231
|
+
} else if filterType == "string_contains" {
|
|
232
|
+
return col.strContains(string(referenceValue), {literal: true})
|
|
233
|
+
} else if filterType == "string_doesNotContain" {
|
|
234
|
+
return col.strContains(string(referenceValue), {literal: true}).not()
|
|
235
|
+
}
|
|
236
|
+
ll.panic("Unknown filter type: %s", filterType)
|
|
237
|
+
return undefined
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Build projection with axis renames and column selections
|
|
241
|
+
projection := []
|
|
242
|
+
// Add main clonotypeKey axis first
|
|
243
|
+
projection = append(projection, pt.axis(datasetSpec.axesSpec[1].name).alias("clonotypeKey"))
|
|
244
|
+
// Add other renamed axes
|
|
245
|
+
for origAxis, aliasName in axisRenames {
|
|
246
|
+
projection = append(projection, pt.axis(origAxis).alias(aliasName))
|
|
247
|
+
}
|
|
248
|
+
// Add all columns
|
|
249
|
+
for colName, _ in structuredMap {
|
|
250
|
+
projection = append(projection, pt.col(colName))
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Start ptabler workflow
|
|
254
|
+
wfPt := pt.workflow().cacheInputs(24 * 60 * 60 * 1000)
|
|
255
|
+
|
|
256
|
+
// Create initial frame with axis renames
|
|
257
|
+
dfPt := wfPt.frame(pt.p.full(structuredMap)).select(projection...)
|
|
258
|
+
|
|
259
|
+
// Step 1: Apply filters from filterMap (matches filter.py lines 123-164)
|
|
260
|
+
// Only apply filters if the data type matches the filter type
|
|
261
|
+
filterPredicates := []
|
|
262
|
+
for filterColName, filterSpec in filterMap {
|
|
263
|
+
filterType := filterSpec["type"]
|
|
264
|
+
valueType := filterSpec["valueType"]
|
|
265
|
+
|
|
266
|
+
// Validate filter type matches data type (same logic as filter.py)
|
|
267
|
+
isValidFilter := false
|
|
268
|
+
if valueType == "String" && text.has_prefix(filterType, "string_") {
|
|
269
|
+
isValidFilter = true
|
|
270
|
+
} else if valueType != "String" && text.has_prefix(filterType, "number_") {
|
|
271
|
+
isValidFilter = true
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if isValidFilter {
|
|
275
|
+
predicate := buildFilterPredicate(filterColName, filterSpec)
|
|
276
|
+
filterPredicates = append(filterPredicates, predicate)
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if len(filterPredicates) > 0 {
|
|
281
|
+
dfPt = dfPt.filter(filterPredicates...)
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// Step 2: Add "top" column with value 1 (matches filter.py line 211)
|
|
285
|
+
dfPt = dfPt.withColumns(pt.lit(1).alias("top"))
|
|
286
|
+
|
|
287
|
+
// Save both CSV and pframe
|
|
288
|
+
frameParams := {
|
|
289
|
+
axes: [{
|
|
290
|
+
column: "clonotypeKey",
|
|
291
|
+
spec: datasetSpec.axesSpec[1]
|
|
292
|
+
}],
|
|
293
|
+
columns: [{
|
|
294
|
+
column: "top",
|
|
295
|
+
spec: {
|
|
296
|
+
name: "pl7.app/vdj/sampling-column",
|
|
297
|
+
valueType: "Int",
|
|
298
|
+
domain: {},
|
|
299
|
+
annotations: {
|
|
300
|
+
"pl7.app/label": "Sampling column",
|
|
301
|
+
"pl7.app/table/visibility": "optional",
|
|
302
|
+
"pl7.app/isSubset": "true"
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}]
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
dfPt.save("filteredClonotypes.parquet")
|
|
309
|
+
dfPt.saveFrameDirect("filteredClonotypes", frameParams)
|
|
310
|
+
wfPtResult := wfPt.run()
|
|
311
|
+
|
|
312
|
+
return {
|
|
313
|
+
filteredParquet: wfPtResult.getFile("filteredClonotypes.parquet"),
|
|
314
|
+
pframe: wfPtResult.getFrameDirect("filteredClonotypes")
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
export ll.toStrict({
|
|
319
|
+
makeHeaderName: makeHeaderName,
|
|
320
|
+
prepareClonotypeData: prepareClonotypeData,
|
|
321
|
+
prepareCdr3Data: prepareCdr3Data,
|
|
322
|
+
filterClonotypes: filterClonotypes
|
|
323
|
+
})
|
|
324
|
+
|
|
@@ -19,7 +19,7 @@ getColumns := func(datasetSpec, featureName, bulkChain) {
|
|
|
19
19
|
annotations: {
|
|
20
20
|
"pl7.app/label": "KABAT sequence " + featureName + " Heavy",
|
|
21
21
|
"pl7.app/table/orderPriority": "10",
|
|
22
|
-
|
|
22
|
+
"pl7.app/table/visibility": "default"
|
|
23
23
|
}
|
|
24
24
|
}
|
|
25
25
|
},
|
|
@@ -34,7 +34,7 @@ getColumns := func(datasetSpec, featureName, bulkChain) {
|
|
|
34
34
|
annotations: {
|
|
35
35
|
"pl7.app/label": "KABAT positions " + featureName + " Heavy",
|
|
36
36
|
"pl7.app/table/orderPriority": "9",
|
|
37
|
-
|
|
37
|
+
"pl7.app/table/visibility": "optional"
|
|
38
38
|
}
|
|
39
39
|
}
|
|
40
40
|
}
|
|
@@ -53,7 +53,7 @@ getColumns := func(datasetSpec, featureName, bulkChain) {
|
|
|
53
53
|
annotations: {
|
|
54
54
|
"pl7.app/label": "KABAT sequence " + featureName + " Light",
|
|
55
55
|
"pl7.app/table/orderPriority": "8",
|
|
56
|
-
|
|
56
|
+
"pl7.app/table/visibility": "default"
|
|
57
57
|
}
|
|
58
58
|
}
|
|
59
59
|
},
|
|
@@ -68,7 +68,7 @@ getColumns := func(datasetSpec, featureName, bulkChain) {
|
|
|
68
68
|
annotations: {
|
|
69
69
|
"pl7.app/label": "KABAT positions " + featureName + " Light",
|
|
70
70
|
"pl7.app/table/orderPriority": "7",
|
|
71
|
-
|
|
71
|
+
"pl7.app/table/visibility": "optional"
|
|
72
72
|
}
|
|
73
73
|
}
|
|
74
74
|
}
|
|
@@ -128,4 +128,3 @@ export ll.toStrict({
|
|
|
128
128
|
getColumns: getColumns
|
|
129
129
|
})
|
|
130
130
|
|
|
131
|
-
|