@platforma-open/milaboratories.top-antibodies.workflow 1.11.2 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,23 @@
1
1
   WARN  Issue while reading "/home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.top-antibodies.workflow@1.11.2 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
3
+ > @platforma-open/milaboratories.top-antibodies.workflow@1.12.0 build /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
+ Processing "src/assembling-fasta.tpl.tengo"...
6
7
  Processing "src/filter-and-sample.tpl.tengo"...
7
8
  Processing "src/main.tpl.tengo"...
9
+ Processing "src/pf-kabat-conv.lib.tengo"...
8
10
  Processing "src/pf-spectratype-conv.lib.tengo"...
9
11
  Processing "src/pf-vj-usage-conv.lib.tengo"...
10
12
  Processing "src/prerun.tpl.tengo"...
11
13
  Processing "src/sampled-cols-conv.lib.tengo"...
12
14
  No syntax errors found.
13
15
  info: Compiling 'dist'...
16
+ info: - writing /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow/dist/tengo/lib/pf-kabat-conv.lib.tengo
14
17
  info: - writing /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow/dist/tengo/lib/pf-spectratype-conv.lib.tengo
15
18
  info: - writing /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow/dist/tengo/lib/pf-vj-usage-conv.lib.tengo
16
19
  info: - writing /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow/dist/tengo/lib/sampled-cols-conv.lib.tengo
20
+ info: - writing /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow/dist/tengo/tpl/assembling-fasta.plj.gz
17
21
  info: - writing /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow/dist/tengo/tpl/filter-and-sample.plj.gz
18
22
  info: - writing /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow/dist/tengo/tpl/prerun.plj.gz
19
23
  info: - writing /home/runner/work/antibody-tcr-lead-selection/antibody-tcr-lead-selection/workflow/dist/tengo/tpl/main.plj.gz
package/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # @platforma-open/milaboratories.top-antibodies.workflow
2
2
 
3
+ ## 1.12.0
4
+
5
+ ### Minor Changes
6
+
7
+ - ccc8076: kabat numbering added
8
+
9
+ ### Patch Changes
10
+
11
+ - Updated dependencies [ccc8076]
12
+ - @platforma-open/milaboratories.top-antibodies.sample-clonotypes@1.5.0
13
+ - @platforma-open/milaboratories.top-antibodies.assembling-fasta@1.1.0
14
+ - @platforma-open/milaboratories.top-antibodies.anarci-kabat@1.1.0
15
+ - @platforma-open/milaboratories.top-antibodies.spectratype@1.5.0
16
+
17
+ ## 1.11.3
18
+
19
+ ### Patch Changes
20
+
21
+ - 44895be: Support parquet format
22
+
3
23
  ## 1.11.2
4
24
 
5
25
  ### Patch Changes
package/dist/index.cjs CHANGED
@@ -1,4 +1,5 @@
1
1
  module.exports = { Templates: {
2
+ 'assembling-fasta': { type: 'from-file', path: require.resolve('./tengo/tpl/assembling-fasta.plj.gz') },
2
3
  'filter-and-sample': { type: 'from-file', path: require.resolve('./tengo/tpl/filter-and-sample.plj.gz') },
3
4
  'prerun': { type: 'from-file', path: require.resolve('./tengo/tpl/prerun.plj.gz') },
4
5
  'main': { type: 'from-file', path: require.resolve('./tengo/tpl/main.plj.gz') }
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  declare type TemplateFromFile = { readonly type: "from-file"; readonly path: string; };
2
- declare type TplName = "filter-and-sample" | "prerun" | "main";
2
+ declare type TplName = "assembling-fasta" | "filter-and-sample" | "prerun" | "main";
3
3
  declare const Templates: Record<TplName, TemplateFromFile>;
4
4
  export { Templates };
package/dist/index.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { resolve } from 'node:path';
2
2
  export const Templates = {
3
+ 'assembling-fasta': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/assembling-fasta.plj.gz') },
3
4
  'filter-and-sample': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/filter-and-sample.plj.gz') },
4
5
  'prerun': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/prerun.plj.gz') },
5
6
  'main': { type: 'from-file', path: resolve(import.meta.dirname, './tengo/tpl/main.plj.gz') }
@@ -0,0 +1,131 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+
3
+ getColumns := func(datasetSpec, featureName, bulkChain) {
4
+ isSingleCell := datasetSpec.axesSpec[1].name == "pl7.app/vdj/scClonotypeKey"
5
+
6
+
7
+ featSuf := ""
8
+ if !is_undefined(featureName) && featureName != "" { featSuf = "." + string(featureName) }
9
+
10
+ cols := [
11
+ {
12
+ column: "kabatSequence_H",
13
+ spec: {
14
+ name: "pl7.app/vdj/kabatSequence" + featSuf,
15
+ valueType: "String",
16
+ domain: {
17
+ "pl7.app/vdj/chain": "IGHeavy"
18
+ },
19
+ annotations: {
20
+ "pl7.app/label": "KABAT sequence " + featureName + " Heavy",
21
+ "pl7.app/table/orderPriority": "10",
22
+ "pl7.app/table/visibility": "default"
23
+ }
24
+ }
25
+ },
26
+ {
27
+ column: "kabatPositions_H",
28
+ spec: {
29
+ name: "pl7.app/vdj/kabatPositions" + featSuf,
30
+ valueType: "String",
31
+ domain: {
32
+ "pl7.app/vdj/chain": "IGHeavy"
33
+ },
34
+ annotations: {
35
+ "pl7.app/label": "KABAT positions " + featureName + " Heavy",
36
+ "pl7.app/table/orderPriority": "9",
37
+ "pl7.app/table/visibility": "optional"
38
+ }
39
+ }
40
+ }
41
+ ]
42
+
43
+ if isSingleCell {
44
+ cols = cols + [
45
+ {
46
+ column: "kabatSequence_KL",
47
+ spec: {
48
+ name: "pl7.app/vdj/kabatSequence" + featSuf,
49
+ valueType: "String",
50
+ domain: {
51
+ "pl7.app/vdj/chain": "IGLight"
52
+ },
53
+ annotations: {
54
+ "pl7.app/label": "KABAT sequence " + featureName + " Light",
55
+ "pl7.app/table/orderPriority": "8",
56
+ "pl7.app/table/visibility": "default"
57
+ }
58
+ }
59
+ },
60
+ {
61
+ column: "kabatPositions_KL",
62
+ spec: {
63
+ name: "pl7.app/vdj/kabatPositions" + featSuf,
64
+ valueType: "String",
65
+ domain: {
66
+ "pl7.app/vdj/chain": "IGLight"
67
+ },
68
+ annotations: {
69
+ "pl7.app/label": "KABAT positions " + featureName + " Light",
70
+ "pl7.app/table/orderPriority": "7",
71
+ "pl7.app/table/visibility": "optional"
72
+ }
73
+ }
74
+ }
75
+ ]
76
+ } else {
77
+
78
+ if bulkChain == "KL" {
79
+ cols = [
80
+ {
81
+ column: "kabatSequence_KL",
82
+ spec: {
83
+ name: "pl7.app/vdj/kabatSequence" + featSuf,
84
+ valueType: "String",
85
+ domain: {
86
+ "pl7.app/vdj/chain": "IGLight"
87
+ },
88
+ annotations: {
89
+ "pl7.app/label": "KABAT sequence " + featureName + " Light",
90
+ "pl7.app/table/orderPriority": "8",
91
+ "pl7.app/table/visibility": "default"
92
+ }
93
+ }
94
+ },
95
+ {
96
+ column: "kabatPositions_KL",
97
+ spec: {
98
+ name: "pl7.app/vdj/kabatPositions" + featSuf,
99
+ valueType: "String",
100
+ domain: {
101
+ "pl7.app/vdj/chain": "IGLight"
102
+ },
103
+ annotations: {
104
+ "pl7.app/label": "KABAT positions " + featureName + " Light",
105
+ "pl7.app/table/orderPriority": "7",
106
+ "pl7.app/table/visibility": "optional"
107
+ }
108
+ }
109
+ }
110
+ ]
111
+ }
112
+ }
113
+
114
+ return {
115
+ axes: [
116
+ {
117
+ column: "clonotypeKey",
118
+ spec: datasetSpec.axesSpec[1]
119
+ }
120
+ ],
121
+ columns: cols,
122
+ storageFormat: "Parquet",
123
+ partitionKeyLength: 0
124
+ }
125
+ }
126
+
127
+ export ll.toStrict({
128
+ getColumns: getColumns
129
+ })
130
+
131
+
@@ -49,7 +49,7 @@ getColumns := func() {
49
49
  }
50
50
  }
51
51
  ],
52
- storageFormat: "Binary",
52
+ storageFormat: "Parquet",
53
53
  partitionKeyLength: 0
54
54
  }
55
55
  }
@@ -44,7 +44,7 @@ getColumns := func() {
44
44
  }
45
45
  }
46
46
  ],
47
- storageFormat: "Binary",
47
+ storageFormat: "Parquet",
48
48
  partitionKeyLength: 0
49
49
  }
50
50
  }
@@ -40,7 +40,7 @@ getColumns := func(datasetSpec, addRanking) {
40
40
  spec: datasetSpec.axesSpec[1]
41
41
  }],
42
42
  columns: columns,
43
- storageFormat: "Binary",
43
+ storageFormat: "Parquet",
44
44
  partitionKeyLength: 0
45
45
  }
46
46
  }
Binary file
Binary file
package/package.json CHANGED
@@ -1,17 +1,20 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.top-antibodies.workflow",
3
- "version": "1.11.2",
3
+ "version": "1.12.0",
4
4
  "type": "module",
5
5
  "description": "Block Workflow",
6
6
  "dependencies": {
7
- "@platforma-sdk/workflow-tengo": "^5.3.8",
7
+ "@platforma-sdk/workflow-tengo": "^5.5.1",
8
+ "@platforma-open/milaboratories.software-anarci": "^0.0.3",
9
+ "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "1.5.0",
10
+ "@platforma-open/milaboratories.top-antibodies.spectratype": "1.5.0",
8
11
  "@platforma-open/milaboratories.top-antibodies.umap": "1.1.4",
9
- "@platforma-open/milaboratories.top-antibodies.spectratype": "1.4.4",
10
- "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "1.4.4"
12
+ "@platforma-open/milaboratories.top-antibodies.assembling-fasta": "1.1.0",
13
+ "@platforma-open/milaboratories.top-antibodies.anarci-kabat": "1.1.0"
11
14
  },
12
15
  "devDependencies": {
13
- "@platforma-sdk/tengo-builder": "^2.3.2",
14
- "@platforma-sdk/test": "^1.44.14",
16
+ "@platforma-sdk/tengo-builder": "^2.3.3",
17
+ "@platforma-sdk/test": "^1.45.1",
15
18
  "vitest": "^2.1.8"
16
19
  },
17
20
  "scripts": {
@@ -0,0 +1,92 @@
1
+ self := import("@platforma-sdk/workflow-tengo:tpl")
2
+ exec := import("@platforma-sdk/workflow-tengo:exec")
3
+ assets := import("@platforma-sdk/workflow-tengo:assets")
4
+ wf := import("@platforma-sdk/workflow-tengo:workflow")
5
+ pt := import("@platforma-sdk/workflow-tengo:pt")
6
+ pframes := import("@platforma-sdk/workflow-tengo:pframes")
7
+ anarciSw := assets.importSoftware("@platforma-open/milaboratories.software-anarci:main")
8
+
9
+ self.defineOutputs("kabat")
10
+
11
+ self.body(func(inputs) {
12
+
13
+ inputTsv := inputs.inputTsv
14
+ keyColumn := inputs.keyColumn // "clonotypeKey" or "scClonotypeKey"
15
+ finalClonotypesCsv := inputs.finalClonotypesCsv // optional
16
+ isSingleCell := inputs.isSingleCell // boolean
17
+ bulkChain := inputs.bulkChain // "H" or "KL" when !isSingleCell
18
+
19
+ cmd := exec.builder().
20
+ software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.assembling-fasta:main")).
21
+ cpu(1).
22
+ mem("4GiB").
23
+ addFile("assembling.tsv", inputTsv).
24
+ arg("--input_tsv").arg("assembling.tsv").
25
+ arg("--key_column").arg(keyColumn).
26
+ arg("--output_fasta").arg("assembling.fasta")
27
+
28
+ if finalClonotypesCsv != undefined {
29
+ cmd = cmd.addFile("finalClonotypes.csv", finalClonotypesCsv).
30
+ arg("--final_clonotypes_csv").arg("finalClonotypes.csv")
31
+ }
32
+
33
+ cmd = cmd.saveFile("assembling.fasta").
34
+ printErrStreamToStdout().
35
+ saveStdoutContent().
36
+ cache(24 * 60 * 60 * 1000).
37
+ run()
38
+
39
+ anarciFileNameBulk := "anarci.csv_" + bulkChain + ".csv"
40
+
41
+ anarciBuilder := exec.builder().
42
+ software(anarciSw).
43
+ arg("-i").arg("assembling.fasta").
44
+ arg("--scheme").arg("kabat").
45
+ arg("--ncpu").argWithVar("{system.cpu}").
46
+ arg("-o").arg("anarci.csv").arg("--csv").
47
+ addFile("assembling.fasta", cmd.getFile("assembling.fasta"))
48
+ if isSingleCell {
49
+ anarciBuilder = anarciBuilder.saveFile("anarci.csv_H.csv").saveFile("anarci.csv_KL.csv")
50
+ } else {
51
+ anarciBuilder = anarciBuilder.saveFile(anarciFileNameBulk)
52
+ }
53
+ anarciBuilder = anarciBuilder.
54
+ printErrStreamToStdout().
55
+ saveStdoutContent().
56
+ cache(24 * 60 * 60 * 1000).
57
+ run()
58
+
59
+ kabatSw := assets.importSoftware("@platforma-open/milaboratories.top-antibodies.anarci-kabat:main")
60
+ kabatExec := exec.builder().
61
+ software(kabatSw)
62
+ if isSingleCell {
63
+ kabatExec = kabatExec.addFile("anarci.csv_KL.csv", anarciBuilder.getFile("anarci.csv_KL.csv")).
64
+ arg("--kl_csv").arg("anarci.csv_KL.csv").
65
+ addFile("anarci.csv_H.csv", anarciBuilder.getFile("anarci.csv_H.csv")).
66
+ arg("--h_csv").arg("anarci.csv_H.csv")
67
+ } else {
68
+ if bulkChain == "H" {
69
+ kabatExec = kabatExec.addFile("anarci.csv_H.csv", anarciBuilder.getFile("anarci.csv_H.csv")).
70
+ arg("--h_csv").arg("anarci.csv_H.csv")
71
+ } else {
72
+ kabatExec = kabatExec.addFile("anarci.csv_KL.csv", anarciBuilder.getFile("anarci.csv_KL.csv")).
73
+ arg("--kl_csv").arg("anarci.csv_KL.csv")
74
+ }
75
+ }
76
+ kabatExec = kabatExec.
77
+ arg("--out_tsv").arg("kabat.tsv").
78
+ saveFile("kabat.tsv").
79
+ printErrStreamToStdout().
80
+ saveStdoutContent().
81
+ cache(24 * 60 * 60 * 1000).
82
+ run()
83
+
84
+ kabat := kabatExec.getFile("kabat.tsv")
85
+
86
+ return {
87
+ kabat: kabat
88
+ // kabatPf: pframes.exportFrame(kabatDf)
89
+ }
90
+ })
91
+
92
+
@@ -0,0 +1,131 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+
3
+ getColumns := func(datasetSpec, featureName, bulkChain) {
4
+ isSingleCell := datasetSpec.axesSpec[1].name == "pl7.app/vdj/scClonotypeKey"
5
+
6
+ // Compose feature suffix for spec names
7
+ featSuf := ""
8
+ if !is_undefined(featureName) && featureName != "" { featSuf = "." + string(featureName) }
9
+
10
+ cols := [
11
+ {
12
+ column: "kabatSequence_H",
13
+ spec: {
14
+ name: "pl7.app/vdj/kabatSequence" + featSuf,
15
+ valueType: "String",
16
+ domain: {
17
+ "pl7.app/vdj/chain": "IGHeavy"
18
+ },
19
+ annotations: {
20
+ "pl7.app/label": "KABAT sequence " + featureName + " Heavy",
21
+ "pl7.app/table/orderPriority": "10",
22
+ "pl7.app/table/visibility": "default"
23
+ }
24
+ }
25
+ },
26
+ {
27
+ column: "kabatPositions_H",
28
+ spec: {
29
+ name: "pl7.app/vdj/kabatPositions" + featSuf,
30
+ valueType: "String",
31
+ domain: {
32
+ "pl7.app/vdj/chain": "IGHeavy"
33
+ },
34
+ annotations: {
35
+ "pl7.app/label": "KABAT positions " + featureName + " Heavy",
36
+ "pl7.app/table/orderPriority": "9",
37
+ "pl7.app/table/visibility": "optional"
38
+ }
39
+ }
40
+ }
41
+ ]
42
+
43
+ if isSingleCell {
44
+ cols = cols + [
45
+ {
46
+ column: "kabatSequence_KL",
47
+ spec: {
48
+ name: "pl7.app/vdj/kabatSequence" + featSuf,
49
+ valueType: "String",
50
+ domain: {
51
+ "pl7.app/vdj/chain": "IGLight"
52
+ },
53
+ annotations: {
54
+ "pl7.app/label": "KABAT sequence " + featureName + " Light",
55
+ "pl7.app/table/orderPriority": "8",
56
+ "pl7.app/table/visibility": "default"
57
+ }
58
+ }
59
+ },
60
+ {
61
+ column: "kabatPositions_KL",
62
+ spec: {
63
+ name: "pl7.app/vdj/kabatPositions" + featSuf,
64
+ valueType: "String",
65
+ domain: {
66
+ "pl7.app/vdj/chain": "IGLight"
67
+ },
68
+ annotations: {
69
+ "pl7.app/label": "KABAT positions " + featureName + " Light",
70
+ "pl7.app/table/orderPriority": "7",
71
+ "pl7.app/table/visibility": "optional"
72
+ }
73
+ }
74
+ }
75
+ ]
76
+ } else {
77
+ // bulk: include only heavy or only light according to bulkChain
78
+ if bulkChain == "KL" {
79
+ cols = [
80
+ {
81
+ column: "kabatSequence_KL",
82
+ spec: {
83
+ name: "pl7.app/vdj/kabatSequence" + featSuf,
84
+ valueType: "String",
85
+ domain: {
86
+ "pl7.app/vdj/chain": "IGLight"
87
+ },
88
+ annotations: {
89
+ "pl7.app/label": "KABAT sequence " + featureName + " Light",
90
+ "pl7.app/table/orderPriority": "8",
91
+ "pl7.app/table/visibility": "default"
92
+ }
93
+ }
94
+ },
95
+ {
96
+ column: "kabatPositions_KL",
97
+ spec: {
98
+ name: "pl7.app/vdj/kabatPositions" + featSuf,
99
+ valueType: "String",
100
+ domain: {
101
+ "pl7.app/vdj/chain": "IGLight"
102
+ },
103
+ annotations: {
104
+ "pl7.app/label": "KABAT positions " + featureName + " Light",
105
+ "pl7.app/table/orderPriority": "7",
106
+ "pl7.app/table/visibility": "optional"
107
+ }
108
+ }
109
+ }
110
+ ]
111
+ }
112
+ }
113
+
114
+ return {
115
+ axes: [
116
+ {
117
+ column: "clonotypeKey",
118
+ spec: datasetSpec.axesSpec[1]
119
+ }
120
+ ],
121
+ columns: cols,
122
+ storageFormat: "Parquet",
123
+ partitionKeyLength: 0
124
+ }
125
+ }
126
+
127
+ export ll.toStrict({
128
+ getColumns: getColumns
129
+ })
130
+
131
+
@@ -49,7 +49,7 @@ getColumns := func() {
49
49
  }
50
50
  }
51
51
  ],
52
- storageFormat: "Binary",
52
+ storageFormat: "Parquet",
53
53
  partitionKeyLength: 0
54
54
  }
55
55
  }
@@ -44,7 +44,7 @@ getColumns := func() {
44
44
  }
45
45
  }
46
46
  ],
47
- storageFormat: "Binary",
47
+ storageFormat: "Parquet",
48
48
  partitionKeyLength: 0
49
49
  }
50
50
  }
@@ -7,6 +7,7 @@ pframes := import("@platforma-sdk/workflow-tengo:pframes")
7
7
  slices := import("@platforma-sdk/workflow-tengo:slices")
8
8
  render := import("@platforma-sdk/workflow-tengo:render")
9
9
  ll := import("@platforma-sdk/workflow-tengo:ll")
10
+ kabatConv := import(":pf-kabat-conv")
10
11
 
11
12
  spectratypeConv := import(":pf-spectratype-conv")
12
13
  vjUsageConv := import(":pf-vj-usage-conv")
@@ -97,6 +98,13 @@ wf.prepare(func(args){
97
98
  "pl7.app/vdj/reference": "JGene"
98
99
  }
99
100
  }, "JGenes")
101
+
102
+ // Add assembling feature aminoacid sequences (bulk, sc, scFv)
103
+ bundleBuilder.addMulti({
104
+ axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
105
+ annotations: { "pl7.app/vdj/isAssemblingFeature": "true" },
106
+ domain: { "pl7.app/alphabet": "aminoacid" }
107
+ }, "assemblingAaSeqs")
100
108
 
101
109
  return {
102
110
  columns: bundleBuilder.build()
@@ -259,7 +267,7 @@ wf.body(func(args) {
259
267
  // outputs["sampledRows"] = filterSampleResult.output("sampledRows", 24 * 60 * 60 * 1000)
260
268
 
261
269
  ////////// CDR3 Length Calculation //////////
262
-
270
+
263
271
  cdr3SeqTable := pframes.tsvFileBuilder()
264
272
  cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
265
273
 
@@ -295,7 +303,13 @@ wf.body(func(args) {
295
303
 
296
304
  for col in cdr3Sequences {
297
305
  headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
298
- cdr3SeqTable.add(col, {header: headerName})
306
+ if isSingleCell {
307
+ if col.spec.domain["pl7.app/vdj/scClonotypeChain/index"] == "primary" {
308
+ cdr3SeqTable.add(col, {header: headerName})
309
+ }
310
+ } else {
311
+ cdr3SeqTable.add(col, {header: headerName})
312
+ }
299
313
  }
300
314
 
301
315
  // Process V genes
@@ -355,6 +369,55 @@ wf.body(func(args) {
355
369
  "tsv", vjUsageConv.getColumns(),
356
370
  {cpu: 1, mem: "16GiB"})
357
371
  outputs["vjUsagePf"] = pframes.exportFrame(vjUsagePf)
372
+
373
+ if args.kabatNumbering == true {
374
+ ////////// Assembling AA sequences //////////
375
+ assemSeqTable := pframes.tsvFileBuilder()
376
+ keyHeader := "clonotypeKey"
377
+ assemSeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, keyHeader)
378
+
379
+ seqCols := columns.getColumns("assemblingAaSeqs")
380
+ for col in seqCols {
381
+ headerName := makeHeaderName(col, "assemblingFeature", isSingleCell)
382
+ assemSeqTable.add(col, {header: headerName})
383
+ }
384
+
385
+ assemSeqTable.mem("16GiB")
386
+ assemSeqTable.cpu(1)
387
+ assemSeqTableBuilt := assemSeqTable.build()
388
+
389
+ // Convert assembling feature sequences to FASTA via sub-template
390
+ assemFastaTpl := assets.importTemplate(":assembling-fasta")
391
+ bulkChain := undefined
392
+ if !isSingleCell {
393
+ // infer bulk chain by header names of incoming seq columns (domain uses IGHeavy / IGLight)
394
+ chainDetected := "KL"
395
+ for col in seqCols {
396
+ ch := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"] // e.g., IGHeavy, IGLight
397
+ if ch == "IGHeavy" { chainDetected = "H"; break }
398
+ if ch == "IGLight" { chainDetected = "KL" }
399
+ }
400
+ bulkChain = chainDetected
401
+ }
402
+ assem := render.create(assemFastaTpl, {
403
+ inputTsv: assemSeqTableBuilt,
404
+ keyColumn: "clonotypeKey",
405
+ finalClonotypesCsv: finalClonotypesCsv,
406
+ isSingleCell: isSingleCell,
407
+ bulkChain: bulkChain
408
+ })
409
+ //outputs["assemblingAnarci"] = assem.output("anarci", 24 * 60 * 60 * 1000)
410
+ kabatFile := assem.output("kabat", 24 * 60 * 60 * 1000)
411
+ // Derive feature name from assembling feature columns (prefer first column's feature)
412
+ featName := ""
413
+ if len(seqCols) > 0 {
414
+ f := seqCols[0].spec.domain["pl7.app/vdj/feature"]
415
+ if f != undefined { featName = f }
416
+ }
417
+ // Convert kabat.tsv to PFrame with proper specs (bulk: select heavy/light)
418
+ kabatPf := xsv.importFile(kabatFile, "tsv", kabatConv.getColumns(datasetSpec, featName, bulkChain), {cpu: 1, mem: "8GiB"})
419
+ outputs["assemblingKabatPf"] = pframes.exportFrame(kabatPf)
420
+ }
358
421
  }
359
422
  }
360
423
 
@@ -40,7 +40,7 @@ getColumns := func(datasetSpec, addRanking) {
40
40
  spec: datasetSpec.axesSpec[1]
41
41
  }],
42
42
  columns: columns,
43
- storageFormat: "Binary",
43
+ storageFormat: "Parquet",
44
44
  partitionKeyLength: 0
45
45
  }
46
46
  }