@platforma-open/milaboratories.top-antibodies.workflow 1.11.1 → 1.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.top-antibodies.workflow@1.11.1 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
3
+ > @platforma-open/milaboratories.top-antibodies.workflow@1.11.3 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/filter-and-sample.tpl.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @platforma-open/milaboratories.top-antibodies.workflow
2
2
 
3
+ ## 1.11.3
4
+
5
+ ### Patch Changes
6
+
7
+ - 44895be: Support parquet format
8
+
9
+ ## 1.11.2
10
+
11
+ ### Patch Changes
12
+
13
+ - 65e8749: Minor bugs correction and SDK update
14
+
3
15
  ## 1.11.1
4
16
 
5
17
  ### Patch Changes
@@ -49,7 +49,7 @@ getColumns := func() {
49
49
  }
50
50
  }
51
51
  ],
52
- storageFormat: "Binary",
52
+ storageFormat: "Parquet",
53
53
  partitionKeyLength: 0
54
54
  }
55
55
  }
@@ -44,7 +44,7 @@ getColumns := func() {
44
44
  }
45
45
  }
46
46
  ],
47
- storageFormat: "Binary",
47
+ storageFormat: "Parquet",
48
48
  partitionKeyLength: 0
49
49
  }
50
50
  }
@@ -40,7 +40,7 @@ getColumns := func(datasetSpec, addRanking) {
40
40
  spec: datasetSpec.axesSpec[1]
41
41
  }],
42
42
  columns: columns,
43
- storageFormat: "Binary",
43
+ storageFormat: "Parquet",
44
44
  partitionKeyLength: 0
45
45
  }
46
46
  }
Binary file
Binary file
package/package.json CHANGED
@@ -1,17 +1,17 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.top-antibodies.workflow",
3
- "version": "1.11.1",
3
+ "version": "1.11.3",
4
4
  "type": "module",
5
5
  "description": "Block Workflow",
6
6
  "dependencies": {
7
- "@platforma-sdk/workflow-tengo": "^5.3.3",
7
+ "@platforma-sdk/workflow-tengo": "^5.4.2",
8
8
  "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "1.4.4",
9
9
  "@platforma-open/milaboratories.top-antibodies.spectratype": "1.4.4",
10
10
  "@platforma-open/milaboratories.top-antibodies.umap": "1.1.4"
11
11
  },
12
12
  "devDependencies": {
13
- "@platforma-sdk/tengo-builder": "^2.3.0",
14
- "@platforma-sdk/test": "^1.44.7",
13
+ "@platforma-sdk/tengo-builder": "^2.3.2",
14
+ "@platforma-sdk/test": "^1.44.19",
15
15
  "vitest": "^2.1.8"
16
16
  },
17
17
  "scripts": {
@@ -49,7 +49,7 @@ getColumns := func() {
49
49
  }
50
50
  }
51
51
  ],
52
- storageFormat: "Binary",
52
+ storageFormat: "Parquet",
53
53
  partitionKeyLength: 0
54
54
  }
55
55
  }
@@ -44,7 +44,7 @@ getColumns := func() {
44
44
  }
45
45
  }
46
46
  ],
47
- storageFormat: "Binary",
47
+ storageFormat: "Parquet",
48
48
  partitionKeyLength: 0
49
49
  }
50
50
  }
@@ -125,12 +125,14 @@ wf.body(func(args) {
125
125
  addedAxes := []
126
126
  filterMap := {}
127
127
  rankingMap := {}
128
+ addedCols := false
128
129
  if len(args.filters) > 0 {
129
130
  for i, filter in args.filters {
130
131
  if filter.value != undefined {
131
132
  // Columns added here might also be in ranking list, so we add default IDs
132
133
  cloneTable.add(columns.getColumn(filter.value.column),
133
134
  {header: "Filter_" + string(i), id: "filter_" + string(i)})
135
+ addedCols = true
134
136
  // Store reference value and filter type associated to this column
135
137
  filterMap["Filter_" + string(i)] = filter.filter
136
138
 
@@ -156,6 +158,7 @@ wf.body(func(args) {
156
158
  if col.value != undefined {
157
159
  validRanks = true
158
160
  cloneTable.add(columns.getColumn(col.value.column), {header: "Col" + string(i)})
161
+ addedCols = true
159
162
  // Store ranking order for this column
160
163
  rankingMap["Col" + string(i)] = col.rankingOrder
161
164
 
@@ -179,6 +182,7 @@ wf.body(func(args) {
179
182
  if args.rankingOrderDefault.value != undefined {
180
183
  i := 0
181
184
  cloneTable.add(columns.getColumn(args.rankingOrderDefault.value.column), {header: "Col" + string(i)})
185
+ addedCols = true
182
186
  // Store default ranking order
183
187
  rankingMap["Col" + string(i)] = args.rankingOrderDefault.rankingOrder
184
188
 
@@ -208,6 +212,7 @@ wf.body(func(args) {
208
212
  cloneTable.setAxisHeader(col.spec.axesSpec[1].name, "cluster_" + string(i))
209
213
  linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[1]
210
214
  }
215
+ addedCols = true
211
216
  }
212
217
  }
213
218
 
@@ -215,6 +220,7 @@ wf.body(func(args) {
215
220
  if len(columns.getColumns("clusterSizes")) > 0 {
216
221
  for i, col in columns.getColumns("clusterSizes") {
217
222
  cloneTable.add(col, {header: "clusterSize." + string(i)})
223
+ addedCols = true
218
224
  // Add the cluster axis header
219
225
  for axisIdx, axis in col.spec.axesSpec {
220
226
  if axis.name != datasetSpec.axesSpec[1].name {
@@ -224,127 +230,132 @@ wf.body(func(args) {
224
230
  }
225
231
  }
226
232
 
227
- cloneTable.mem("16GiB")
228
- cloneTable.cpu(1)
229
- cloneTable = cloneTable.build()
230
-
231
- // Use ender.create to call the filter-clonotypes template
232
- filterSampleResult := render.create(filterAndSampleTpl, {
233
- inputAnchor: args.inputAnchor,
234
- cloneTable: cloneTable,
235
- rankingOrder: args.rankingOrder,
236
- rankingOrderDefault: args.rankingOrderDefault,
237
- filters: args.filters,
238
- filterMap: filterMap,
239
- rankingMap: rankingMap,
240
- datasetSpec: datasetSpec,
241
- topClonotypes: args.topClonotypes
242
- })
243
-
244
- // Get the filtered clonotypes from the template result
245
- outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
246
-
247
- // Get the filtered and sampled clonotypes P-frame and CSV from the template result
248
- finalClonotypesCsv := filterSampleResult.output("finalClonotypesCsv", 24 * 60 * 60 * 1000)
249
- // outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
250
-
251
- ////////// CDR3 Length Calculation //////////
252
-
253
- cdr3SeqTable := pframes.tsvFileBuilder()
254
- cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
255
-
256
- // Must deal with multiple CDR3 sequences (two for each cell in single cell data)
257
- // Chain will be added in the header as cdr3Sequence.chain and used in python script
258
- // Notice chain is in spec.domain for single cell data and spec.axesSpec[0].domain for bulk data
259
-
260
- // Helper function to add chain information to the headers dynamically
261
- chainMapping := {
262
- "IG": { "A": "Heavy", "B": "Light" },
263
- "TCRAB": { "A": "TRA", "B": "TRB" },
264
- "TCRGD": { "A": "TRG", "B": "TRD" }
265
- }
233
+ // Continue only if we have at least a column
234
+ // This condition prevents temporal intermittent error while filters are
235
+ // being processed and possibly in other situations too
236
+ if addedCols {
237
+ cloneTable.mem("16GiB")
238
+ cloneTable.cpu(1)
239
+ cloneTable = cloneTable.build()
240
+
241
+ // Use ender.create to call the filter-clonotypes template
242
+ filterSampleResult := render.create(filterAndSampleTpl, {
243
+ inputAnchor: args.inputAnchor,
244
+ cloneTable: cloneTable,
245
+ rankingOrder: args.rankingOrder,
246
+ rankingOrderDefault: args.rankingOrderDefault,
247
+ filters: args.filters,
248
+ filterMap: filterMap,
249
+ rankingMap: rankingMap,
250
+ datasetSpec: datasetSpec,
251
+ topClonotypes: args.topClonotypes
252
+ })
253
+
254
+ // Get the filtered clonotypes from the template result
255
+ outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
266
256
 
267
- makeHeaderName := func(col, baseHeaderName, isSingleCell) {
268
- if isSingleCell {
269
- chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
270
- receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
271
- chainLabel := chainMapping[receptor][chain]
272
- return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
273
- } else {
274
- // For bulk, if chain info is available (e.g. IGH, IGK, IGL)
275
- chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
276
- if chainFromDomain != undefined {
277
- return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
278
- }
257
+ // Get the filtered and sampled clonotypes P-frame and CSV from the template result
258
+ finalClonotypesCsv := filterSampleResult.output("finalClonotypesCsv", 24 * 60 * 60 * 1000)
259
+ // outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
260
+
261
+ ////////// CDR3 Length Calculation //////////
262
+
263
+ cdr3SeqTable := pframes.tsvFileBuilder()
264
+ cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
265
+
266
+ // Must deal with multiple CDR3 sequences (two for each cell in single cell data)
267
+ // Chain will be added in the header as cdr3Sequence.chain and used in python script
268
+ // Notice chain is in spec.domain for single cell data and spec.axesSpec[0].domain for bulk data
269
+
270
+ // Helper function to add chain information to the headers dynamically
271
+ chainMapping := {
272
+ "IG": { "A": "Heavy", "B": "Light" },
273
+ "TCRAB": { "A": "TRA", "B": "TRB" },
274
+ "TCRGD": { "A": "TRG", "B": "TRD" }
279
275
  }
280
- return baseHeaderName
281
- };
282
276
 
283
- // Process CDR3 sequences
284
- cdr3Sequences := columns.getColumns("cdr3Sequences")
277
+ makeHeaderName := func(col, baseHeaderName, isSingleCell) {
278
+ if isSingleCell {
279
+ chain := col.spec.domain["pl7.app/vdj/scClonotypeChain"] // e.g., "A", "B"
280
+ receptor := col.spec.axesSpec[0].domain["pl7.app/vdj/receptor"] // e.g., "IG", "TCRAB", "TCRGD"
281
+ chainLabel := chainMapping[receptor][chain]
282
+ return baseHeaderName + "." + chainLabel // e.g., "cdr3Sequence.Heavy"
283
+ } else {
284
+ // For bulk, if chain info is available (e.g. IGH, IGK, IGL)
285
+ chainFromDomain := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g. "IGH", "IGK"
286
+ if chainFromDomain != undefined {
287
+ return baseHeaderName + "." + chainFromDomain // e.g., "cdr3Sequence.IGH"
288
+ }
289
+ }
290
+ return baseHeaderName
291
+ };
285
292
 
286
- for col in cdr3Sequences {
287
- headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
288
- cdr3SeqTable.add(col, {header: headerName})
289
- }
293
+ // Process CDR3 sequences
294
+ cdr3Sequences := columns.getColumns("cdr3Sequences")
290
295
 
291
- // Process V genes
292
- vGenes := columns.getColumns("VGenes")
296
+ for col in cdr3Sequences {
297
+ headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
298
+ cdr3SeqTable.add(col, {header: headerName})
299
+ }
293
300
 
294
- for col in vGenes {
295
- headerName := makeHeaderName(col, "vGene", isSingleCell)
296
- cdr3SeqTable.add(col, {header: headerName})
297
- }
301
+ // Process V genes
302
+ vGenes := columns.getColumns("VGenes")
298
303
 
299
- // Process J genes
300
- jGenes := columns.getColumns("JGenes")
304
+ for col in vGenes {
305
+ headerName := makeHeaderName(col, "vGene", isSingleCell)
306
+ cdr3SeqTable.add(col, {header: headerName})
307
+ }
301
308
 
302
- for col in jGenes {
303
- headerName := makeHeaderName(col, "jGene", isSingleCell)
304
- cdr3SeqTable.add(col, {header: headerName})
305
- }
309
+ // Process J genes
310
+ jGenes := columns.getColumns("JGenes")
306
311
 
307
- cdr3SeqTable.mem("16GiB")
308
- cdr3SeqTable.cpu(1)
309
- cdr3SeqTableBuilt := cdr3SeqTable.build()
310
-
311
- cdr3VspectratypeCmd := exec.builder().
312
- software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
313
- mem("16GiB").
314
- cpu(1).
315
- addFile("cdr3_sequences_input.tsv", cdr3SeqTableBuilt).
316
- arg("--input_tsv").arg("cdr3_sequences_input.tsv").
317
- arg("--spectratype_tsv").arg("spectratype.tsv").
318
- arg("--vj_usage_tsv").arg("vj_usage.tsv") // no dot here
319
-
320
- // Add top clonotypes argument and file to the builder if provided
321
- if finalClonotypesCsv != undefined {
322
- cdr3VspectratypeCmd = cdr3VspectratypeCmd.
323
- arg("--final_clonotypes_csv").arg("finalClonotypes.csv").
324
- addFile("finalClonotypes.csv", finalClonotypesCsv)
325
- }
312
+ for col in jGenes {
313
+ headerName := makeHeaderName(col, "jGene", isSingleCell)
314
+ cdr3SeqTable.add(col, {header: headerName})
315
+ }
316
+
317
+ cdr3SeqTable.mem("16GiB")
318
+ cdr3SeqTable.cpu(1)
319
+ cdr3SeqTableBuilt := cdr3SeqTable.build()
320
+
321
+ cdr3VspectratypeCmd := exec.builder().
322
+ software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
323
+ mem("16GiB").
324
+ cpu(1).
325
+ addFile("cdr3_sequences_input.tsv", cdr3SeqTableBuilt).
326
+ arg("--input_tsv").arg("cdr3_sequences_input.tsv").
327
+ arg("--spectratype_tsv").arg("spectratype.tsv").
328
+ arg("--vj_usage_tsv").arg("vj_usage.tsv") // no dot here
329
+
330
+ // Add top clonotypes argument and file to the builder if provided
331
+ if finalClonotypesCsv != undefined {
332
+ cdr3VspectratypeCmd = cdr3VspectratypeCmd.
333
+ arg("--final_clonotypes_csv").arg("finalClonotypes.csv").
334
+ addFile("finalClonotypes.csv", finalClonotypesCsv)
335
+ }
326
336
 
327
- cdr3VspectratypeCmd = cdr3VspectratypeCmd. // continue building the command
328
- saveFile("spectratype.tsv").
329
- saveFile("vj_usage.tsv").
330
- printErrStreamToStdout().
331
- saveStdoutContent().
332
- cache(24 * 60 * 60 * 1000).
333
- run()
337
+ cdr3VspectratypeCmd = cdr3VspectratypeCmd. // continue building the command
338
+ saveFile("spectratype.tsv").
339
+ saveFile("vj_usage.tsv").
340
+ printErrStreamToStdout().
341
+ saveStdoutContent().
342
+ cache(24 * 60 * 60 * 1000).
343
+ run()
334
344
 
335
345
 
336
- // Spectratype PFrame structure is [chain][cdr3Length][vGene] -> count
346
+ // Spectratype PFrame structure is [chain][cdr3Length][vGene] -> count
337
347
 
338
- cdr3VspectratypePf := xsv.importFile(cdr3VspectratypeCmd.getFile("spectratype.tsv"),
339
- "tsv", spectratypeConv.getColumns(),
340
- {cpu: 1, mem: "16GiB"})
341
- outputs["cdr3VspectratypePf"] = pframes.exportFrame(cdr3VspectratypePf)
348
+ cdr3VspectratypePf := xsv.importFile(cdr3VspectratypeCmd.getFile("spectratype.tsv"),
349
+ "tsv", spectratypeConv.getColumns(),
350
+ {cpu: 1, mem: "16GiB"})
351
+ outputs["cdr3VspectratypePf"] = pframes.exportFrame(cdr3VspectratypePf)
342
352
 
343
- // For vjUsage structure is [chain][vGene][jGene] -> count
344
- vjUsagePf := xsv.importFile(cdr3VspectratypeCmd.getFile("vj_usage.tsv"),
345
- "tsv", vjUsageConv.getColumns(),
346
- {cpu: 1, mem: "16GiB"})
347
- outputs["vjUsagePf"] = pframes.exportFrame(vjUsagePf)
353
+ // For vjUsage structure is [chain][vGene][jGene] -> count
354
+ vjUsagePf := xsv.importFile(cdr3VspectratypeCmd.getFile("vj_usage.tsv"),
355
+ "tsv", vjUsageConv.getColumns(),
356
+ {cpu: 1, mem: "16GiB"})
357
+ outputs["vjUsagePf"] = pframes.exportFrame(vjUsagePf)
358
+ }
348
359
  }
349
360
 
350
361
  return {
@@ -40,7 +40,7 @@ getColumns := func(datasetSpec, addRanking) {
40
40
  spec: datasetSpec.axesSpec[1]
41
41
  }],
42
42
  columns: columns,
43
- storageFormat: "Binary",
43
+ storageFormat: "Parquet",
44
44
  partitionKeyLength: 0
45
45
  }
46
46
  }