@platforma-open/milaboratories.top-antibodies.workflow 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,19 @@
1
1
   WARN  Issue while reading "/home/runner/work/top-antibodies/top-antibodies/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.top-antibodies.workflow@1.2.0 build /home/runner/work/top-antibodies/top-antibodies/workflow
3
+ > @platforma-open/milaboratories.top-antibodies.workflow@1.3.0 build /home/runner/work/top-antibodies/top-antibodies/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/main.tpl.tengo"...
7
+ Processing "src/pf-spectratype-conv.lib.tengo"...
7
8
  Processing "src/pf-umap-conv.lib.tengo"...
9
+ Processing "src/pf-vj-usage-conv.lib.tengo"...
8
10
  Processing "src/sampled-cols-conv.lib.tengo"...
9
11
  Processing "src/sampled-cols-umap-conv.lib.tengo"...
10
12
  No syntax errors found.
11
13
  info: Compiling 'dist'...
14
+ info: - writing /home/runner/work/top-antibodies/top-antibodies/workflow/dist/tengo/lib/pf-spectratype-conv.lib.tengo
12
15
  info: - writing /home/runner/work/top-antibodies/top-antibodies/workflow/dist/tengo/lib/pf-umap-conv.lib.tengo
16
+ info: - writing /home/runner/work/top-antibodies/top-antibodies/workflow/dist/tengo/lib/pf-vj-usage-conv.lib.tengo
13
17
  info: - writing /home/runner/work/top-antibodies/top-antibodies/workflow/dist/tengo/lib/sampled-cols-conv.lib.tengo
14
18
  info: - writing /home/runner/work/top-antibodies/top-antibodies/workflow/dist/tengo/lib/sampled-cols-umap-conv.lib.tengo
15
19
  info: - writing /home/runner/work/top-antibodies/top-antibodies/workflow/dist/tengo/tpl/main.plj.gz
package/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # @platforma-open/milaboratories.top-antibodies.workflow
2
2
 
3
+ ## 1.3.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 2e24f7a: Disable default normalization in VJ usage plot and change spectratype/VJ usage script to run on top clonotypes if provided
8
+
9
+ ### Patch Changes
10
+
11
+ - Updated dependencies [2e24f7a]
12
+ - @platforma-open/milaboratories.top-antibodies.spectratype@1.3.0
13
+
14
+ ## 1.2.1
15
+
16
+ ### Patch Changes
17
+
18
+ - Updated dependencies [6443da1]
19
+ - @platforma-open/milaboratories.top-antibodies.spectratype@1.2.0
20
+
3
21
  ## 1.2.0
4
22
 
5
23
  ### Minor Changes
@@ -0,0 +1,59 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+
3
+ getColumns := func() {
4
+ return {
5
+ "axes": [
6
+ {
7
+ "column": "chain",
8
+ "spec": {
9
+ "name": "pl7.app/vdj/chain",
10
+ "type": "String",
11
+ "annotations": { "pl7.app/label": "CDR3 chain" }
12
+ }
13
+ },
14
+ {
15
+ "column": "cdr3Length",
16
+ "spec": {
17
+ "name": "pl7.app/vdj/sequenceLength",
18
+ "type": "Int",
19
+ "domain": {
20
+ "pl7.app/vdj/feature": "CDR3",
21
+ "pl7.app/alphabet": "aminoacid"
22
+ },
23
+ "annotations": { "pl7.app/label": "CDR3 aa Length" }
24
+ }
25
+ },
26
+ {
27
+ "column": "vGene",
28
+ "spec": {
29
+ "name": "pl7.app/vdj/geneHit",
30
+ "type": "String",
31
+ "domain": { "pl7.app/vdj/reference": "VGene" },
32
+ "annotations": {
33
+ "pl7.app/label": "Best V gene"
34
+ }
35
+ }
36
+ }
37
+ ],
38
+ "columns": [
39
+ {
40
+ "column": "count",
41
+ "spec": {
42
+ "name": "pl7.app/vdj/vSpectratype",
43
+ "valueType": "Int",
44
+ "domain": {
45
+ "pl7.app/vdj/feature": "CDR3",
46
+ "pl7.app/alphabet": "aminoacid"
47
+ },
48
+ "annotations": { "pl7.app/label": "CDR3 V Spectratype" }
49
+ }
50
+ }
51
+ ],
52
+ "storageFormat": "Binary",
53
+ "partitionKeyLength": 0
54
+ }
55
+ }
56
+
57
+ export ll.toStrict({
58
+ getColumns: getColumns
59
+ })
@@ -0,0 +1,54 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+
3
+ getColumns := func() {
4
+ return {
5
+ "axes": [
6
+ {
7
+ "column": "chain",
8
+ "spec": {
9
+ "name": "pl7.app/vdj/chain",
10
+ "type": "String",
11
+ "annotations": { "pl7.app/label": "CDR3 chain" }
12
+ }
13
+ },
14
+ {
15
+ "column": "vGene",
16
+ "spec": {
17
+ "name": "pl7.app/vdj/geneHit",
18
+ "type": "String",
19
+ "domain": { "pl7.app/vdj/reference": "VGene" },
20
+ "annotations": {
21
+ "pl7.app/label": "Best V gene"
22
+ }
23
+ }
24
+ },
25
+ {
26
+ "column": "jGene",
27
+ "spec": {
28
+ "name": "pl7.app/vdj/geneHit",
29
+ "type": "String",
30
+ "domain": { "pl7.app/vdj/reference": "JGene" },
31
+ "annotations": {
32
+ "pl7.app/label": "Best J gene"
33
+ }
34
+ }
35
+ }
36
+ ],
37
+ "columns": [
38
+ {
39
+ "column": "count",
40
+ "spec": {
41
+ "name": "pl7.app/vdj/vjGeneUsage",
42
+ "valueType": "Int",
43
+ "annotations": { "pl7.app/label": "V/J usage" }
44
+ }
45
+ }
46
+ ],
47
+ "storageFormat": "Binary",
48
+ "partitionKeyLength": 0
49
+ }
50
+ }
51
+
52
+ export ll.toStrict({
53
+ getColumns: getColumns
54
+ })
Binary file
package/package.json CHANGED
@@ -1,17 +1,17 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.top-antibodies.workflow",
3
- "version": "1.2.0",
3
+ "version": "1.3.0",
4
4
  "type": "module",
5
5
  "description": "Block Workflow",
6
6
  "dependencies": {
7
- "@platforma-sdk/workflow-tengo": "^4.3.2",
7
+ "@platforma-sdk/workflow-tengo": "^4.6.1",
8
+ "@platforma-open/milaboratories.top-antibodies.spectratype": "1.3.0",
8
9
  "@platforma-open/milaboratories.top-antibodies.sample-clonotypes": "1.0.1",
9
- "@platforma-open/milaboratories.top-antibodies.spectratype": "1.1.0",
10
10
  "@platforma-open/milaboratories.top-antibodies.umap": "1.0.1"
11
11
  },
12
12
  "devDependencies": {
13
- "@platforma-sdk/tengo-builder": "^2.1.3",
14
- "@platforma-sdk/test": "^1.30.24",
13
+ "@platforma-sdk/tengo-builder": "^2.1.5",
14
+ "@platforma-sdk/test": "^1.31.16",
15
15
  "vitest": "^2.1.8"
16
16
  },
17
17
  "scripts": {
@@ -6,6 +6,8 @@ xsv := import("@platforma-sdk/workflow-tengo:pframes.xsv")
6
6
  pframes := import("@platforma-sdk/workflow-tengo:pframes")
7
7
  sampledColsConv := import(":sampled-cols-conv")
8
8
  sampledColsUmapConv := import(":sampled-cols-umap-conv")
9
+ spectratypeConv := import(":pf-spectratype-conv")
10
+ vjUsageConv := import(":pf-vj-usage-conv")
9
11
  slices := import("@platforma-sdk/workflow-tengo:slices")
10
12
  umapConv := import(":pf-umap-conv")
11
13
 
@@ -17,7 +19,8 @@ wf.prepare(func(args){
17
19
  bundleBuilder.addAnchor("main", args.inputAnchor)
18
20
 
19
21
  for col in args.rankingOrder {
20
- bundleBuilder.addSingle(col)
22
+ bundleBuilder.addAnchor(col.value.anchorName, col.value.anchorRef)
23
+ bundleBuilder.addSingle(col.value.column)
21
24
  }
22
25
 
23
26
  // Add linker column
@@ -58,6 +61,15 @@ wf.prepare(func(args){
58
61
  }
59
62
  }, "VGenes")
60
63
 
64
+ // Add J gene
65
+ bundleBuilder.addMulti({
66
+ axes: [{ anchor: "main", idx: 1 }], // Clonotype axis
67
+ name: "pl7.app/vdj/geneHit",
68
+ domain: {
69
+ "pl7.app/vdj/reference": "JGene"
70
+ }
71
+ }, "JGenes")
72
+
61
73
  return {
62
74
  columns: bundleBuilder.build()
63
75
  }
@@ -80,14 +92,14 @@ wf.body(func(args) {
80
92
  ////////// Clonotype Filtering //////////
81
93
 
82
94
  // Build clonotype table
83
- cloneTable := columns.xsvTableBuilder()
95
+ cloneTable := pframes.csvFileBuilder()
84
96
  cloneTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
85
97
 
86
98
  for i, col in args.rankingOrder {
87
- cloneTable.add(col, {header: "Col" + string(i)})
99
+ cloneTable.add(columns.getColumn(col.value.column), {header: "Col" + string(i)})
88
100
 
89
101
  // If column does not have main anchor axis we have to include theirs
90
- colsSpec := columns.getSpec(col)
102
+ colsSpec := columns.getSpec(col.value.column)
91
103
  axesNames := slices.map(colsSpec.axesSpec, func (a) { return a.name})
92
104
  if !slices.hasElement(axesNames, datasetSpec.axesSpec[1].name) {
93
105
  for na, ax in colsSpec.axesSpec {
@@ -102,16 +114,18 @@ wf.body(func(args) {
102
114
  linkerAxisSpec := {}
103
115
  for i, col in columns.getColumns("linkers") {
104
116
  if datasetSpec.axesSpec[1].name == col.spec.axesSpec[1].name {
105
- cloneTable.add(col.key, {header: "linker." + string(i)})
117
+ cloneTable.add(col, {header: "linker." + string(i)})
106
118
  cloneTable.setAxisHeader(col.spec.axesSpec[0].name, "cluster_" + string(i))
107
119
  linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[0]
108
120
  } else if datasetSpec.axesSpec[1].name == col.spec.axesSpec[0].name {
109
- cloneTable.add(col.key, {header: "linker." + string(i)})
121
+ cloneTable.add(col, {header: "linker." + string(i)})
110
122
  cloneTable.setAxisHeader(col.spec.axesSpec[1].name, "cluster_" + string(i))
111
123
  linkerAxisSpec["cluster_" + string(i)] = col.spec.axesSpec[1]
112
124
  }
113
125
  }
114
- cloneTable = cloneTable.build("csv")
126
+ cloneTable = cloneTable.build()
127
+
128
+ topClonotypesCsv := undefined
115
129
 
116
130
  if topClonotypes != undefined {
117
131
  // Run sampling script
@@ -126,33 +140,36 @@ wf.body(func(args) {
126
140
  saveStdoutContent().
127
141
  cache(24 * 60 * 60 * 1000).
128
142
  run()
129
-
143
+
144
+ // Save top clonotypes CSV file
145
+ topClonotypesCsv = sampleClones.getFile("sampledClonotypes_top.csv")
146
+
130
147
  // Store outputs
131
148
  sampledColsParams := sampledColsConv.getColumns(datasetSpec, linkerAxisSpec)
132
- sampledColumnsPf := xsv.importFile(sampleClones.getFile("sampledClonotypes_top.csv"), "csv", sampledColsParams)
149
+ sampledColumnsPf := xsv.importFile(topClonotypesCsv, "csv", sampledColsParams)
133
150
  outputs["sampledRows"] = pframes.exportFrame(sampledColumnsPf)
134
151
 
135
152
  // Prepare filter col subset for UMAP
136
153
  // Avoid taking cluster axis using other params
137
154
  sampledColsUmapParams := sampledColsUmapConv.getColumns(datasetSpec)
138
- sampledColsUmapPf := xsv.importFile(sampleClones.getFile("sampledClonotypes_top.csv"), "csv", sampledColsUmapParams)
155
+ sampledColsUmapPf := xsv.importFile(topClonotypesCsv, "csv", sampledColsUmapParams)
139
156
  outputs["sampledRowsUmap"] = pframes.exportFrame(sampledColsUmapPf)
140
157
  }
141
158
 
142
159
  ////////// UMAP //////////
143
160
  // Generate input TSV with Clonotype ID and aa sequence
144
- umapTable := columns.xsvTableBuilder()
161
+ umapTable := pframes.tsvFileBuilder()
145
162
  umapTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
146
163
  for col in columns.getColumns("aaSequence") {
147
164
  if isSingleCell {
148
165
  chainLabel := col.spec.domain["pl7.app/vdj/scClonotypeChain"]
149
- umapTable.add(col.key, {header: "aaSequence." + chainLabel})
166
+ umapTable.add(col, {header: "aaSequence." + chainLabel})
150
167
  } else {
151
168
  chainLabel := col.spec.axesSpec[0].domain["pl7.app/vdj/chain"]
152
- umapTable.add(col.key, {header: "aaSequence." + chainLabel})
169
+ umapTable.add(col, {header: "aaSequence." + chainLabel})
153
170
  }
154
171
  }
155
- umapTable = umapTable.build("tsv")
172
+ umapTable = umapTable.build()
156
173
 
157
174
  // UMAP script should go here
158
175
  umapClones := exec.builder().
@@ -172,7 +189,7 @@ wf.body(func(args) {
172
189
 
173
190
  ////////// CDR3 Length Calculation //////////
174
191
 
175
- cdr3SeqTable := columns.xsvTableBuilder()
192
+ cdr3SeqTable := pframes.tsvFileBuilder()
176
193
  cdr3SeqTable.setAxisHeader(datasetSpec.axesSpec[1].name, "clonotypeKey")
177
194
 
178
195
  // Must deal with multiple CDR3 sequences (two for each cell in single cell data)
@@ -207,7 +224,7 @@ wf.body(func(args) {
207
224
 
208
225
  for col in cdr3Sequences {
209
226
  headerName := makeHeaderName(col, "cdr3Sequence", isSingleCell)
210
- cdr3SeqTable.add(col.key, {header: headerName})
227
+ cdr3SeqTable.add(col, {header: headerName})
211
228
  }
212
229
 
213
230
  // Process V genes
@@ -215,92 +232,50 @@ wf.body(func(args) {
215
232
 
216
233
  for col in vGenes {
217
234
  headerName := makeHeaderName(col, "vGene", isSingleCell)
218
- cdr3SeqTable.add(col.key, {header: headerName})
235
+ cdr3SeqTable.add(col, {header: headerName})
236
+ }
237
+
238
+ // Process J genes
239
+ jGenes := columns.getColumns("JGenes")
240
+
241
+ for col in jGenes {
242
+ headerName := makeHeaderName(col, "jGene", isSingleCell)
243
+ cdr3SeqTable.add(col, {header: headerName})
219
244
  }
220
245
 
221
- cdr3SeqTableBuilt := cdr3SeqTable.build("tsv")
246
+ cdr3SeqTableBuilt := cdr3SeqTable.build()
222
247
 
223
248
  cdr3VspectratypeCmd := exec.builder().
224
249
  software(assets.importSoftware("@platforma-open/milaboratories.top-antibodies.spectratype:main")).
225
250
  addFile("cdr3_sequences_input.tsv", cdr3SeqTableBuilt).
226
251
  arg("--input_tsv").arg("cdr3_sequences_input.tsv").
227
- arg("--output_tsv").arg("cdr3_lengths.tsv").
228
- saveFile("cdr3_lengths.tsv").
252
+ arg("--spectratype_tsv").arg("spectratype.tsv").
253
+ arg("--vj_usage_tsv").arg("vj_usage.tsv") // no dot here
254
+
255
+ // Add top clonotypes argument and file to the builder if provided
256
+ if topClonotypes != undefined {
257
+ cdr3VspectratypeCmd = cdr3VspectratypeCmd.
258
+ arg("--top_clonotypes_csv").arg("topClonotypes.csv").
259
+ addFile("topClonotypes.csv", topClonotypesCsv)
260
+ }
261
+
262
+ cdr3VspectratypeCmd = cdr3VspectratypeCmd. // continue building the command
263
+ saveFile("spectratype.tsv").
264
+ saveFile("vj_usage.tsv").
229
265
  printErrStreamToStdout().
230
266
  saveStdoutContent().
231
267
  cache(24 * 60 * 60 * 1000).
232
268
  run()
233
269
 
234
270
 
235
- // For spectratype structure is:
236
- // [chain][cdr3Length][vGene] -> count
271
+ // For spectratype structure is [chain][cdr3Length][vGene] -> count
237
272
 
238
- // Get the spec for the vGene column
239
- vGeneSpec := columns.getSpec(vGenes[0].key)
240
-
241
- // Set up the axes for the output data
242
- axes := [
243
- {
244
- column: "chain",
245
- spec: {
246
- name: "pl7.app/vdj/chain",
247
- type: "String", // For axis it is type, not valueType
248
- annotations: { "pl7.app/label": "CDR3 chain" }
249
- }
250
- },
251
- {
252
- column: "cdr3Length",
253
- spec: {
254
- name: "pl7.app/vdj/sequenceLength",
255
- type: "Int",
256
- domain: {
257
- "pl7.app/vdj/feature": "CDR3",
258
- "pl7.app/alphabet": "aminoacid"
259
- },
260
- annotations: { "pl7.app/label": "CDR3 aa Length" }
261
- }
262
- },
263
- {
264
- column: "vGene",
265
- spec: {
266
- name: "pl7.app/vdj/geneHit",
267
- type: "String",
268
- domain: vGeneSpec.domain,
269
- annotations: {
270
- "pl7.app/label": "Best V gene"
271
- }
272
- }
273
- }
274
-
275
- ]
276
-
277
-
278
- spectratypeColumns := [
279
- {
280
- column: "count",
281
- spec: {
282
- name: "pl7.app/vdj/vSpectratype",
283
- valueType: "Int",
284
- domain: {
285
- "pl7.app/vdj/feature": "CDR3",
286
- "pl7.app/alphabet": "aminoacid"
287
- },
288
- annotations: { "pl7.app/label": "CDR3 V Spectratype" }
289
- }
290
- }
291
- ]
292
-
293
- spectratypeSpec := {
294
- axes: axes,
295
- columns: spectratypeColumns,
296
- storageFormat: "Binary",
297
- partitionKeyLength: 0
298
- }
299
-
300
- cdr3VspectratypePf := xsv.importFile(cdr3VspectratypeCmd.getFile("cdr3_lengths.tsv"), "tsv", spectratypeSpec)
301
-
273
+ cdr3VspectratypePf := xsv.importFile(cdr3VspectratypeCmd.getFile("spectratype.tsv"), "tsv", spectratypeConv.getColumns())
302
274
  outputs["cdr3VspectratypePf"] = pframes.exportFrame(cdr3VspectratypePf)
303
- //ll.print("CDR3 lengths PFrame imported.")
275
+
276
+ // For vjUsage structure is [chain][vGene][jGene] -> count
277
+ vjUsagePf := xsv.importFile(cdr3VspectratypeCmd.getFile("vj_usage.tsv"), "tsv", vjUsageConv.getColumns())
278
+ outputs["vjUsagePf"] = pframes.exportFrame(vjUsagePf)
304
279
 
305
280
  return {
306
281
  outputs: outputs,
@@ -0,0 +1,59 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+
3
+ getColumns := func() {
4
+ return {
5
+ "axes": [
6
+ {
7
+ "column": "chain",
8
+ "spec": {
9
+ "name": "pl7.app/vdj/chain",
10
+ "type": "String",
11
+ "annotations": { "pl7.app/label": "CDR3 chain" }
12
+ }
13
+ },
14
+ {
15
+ "column": "cdr3Length",
16
+ "spec": {
17
+ "name": "pl7.app/vdj/sequenceLength",
18
+ "type": "Int",
19
+ "domain": {
20
+ "pl7.app/vdj/feature": "CDR3",
21
+ "pl7.app/alphabet": "aminoacid"
22
+ },
23
+ "annotations": { "pl7.app/label": "CDR3 aa Length" }
24
+ }
25
+ },
26
+ {
27
+ "column": "vGene",
28
+ "spec": {
29
+ "name": "pl7.app/vdj/geneHit",
30
+ "type": "String",
31
+ "domain": { "pl7.app/vdj/reference": "VGene" },
32
+ "annotations": {
33
+ "pl7.app/label": "Best V gene"
34
+ }
35
+ }
36
+ }
37
+ ],
38
+ "columns": [
39
+ {
40
+ "column": "count",
41
+ "spec": {
42
+ "name": "pl7.app/vdj/vSpectratype",
43
+ "valueType": "Int",
44
+ "domain": {
45
+ "pl7.app/vdj/feature": "CDR3",
46
+ "pl7.app/alphabet": "aminoacid"
47
+ },
48
+ "annotations": { "pl7.app/label": "CDR3 V Spectratype" }
49
+ }
50
+ }
51
+ ],
52
+ "storageFormat": "Binary",
53
+ "partitionKeyLength": 0
54
+ }
55
+ }
56
+
57
+ export ll.toStrict({
58
+ getColumns: getColumns
59
+ })
@@ -0,0 +1,54 @@
1
+ ll := import("@platforma-sdk/workflow-tengo:ll")
2
+
3
+ getColumns := func() {
4
+ return {
5
+ "axes": [
6
+ {
7
+ "column": "chain",
8
+ "spec": {
9
+ "name": "pl7.app/vdj/chain",
10
+ "type": "String",
11
+ "annotations": { "pl7.app/label": "CDR3 chain" }
12
+ }
13
+ },
14
+ {
15
+ "column": "vGene",
16
+ "spec": {
17
+ "name": "pl7.app/vdj/geneHit",
18
+ "type": "String",
19
+ "domain": { "pl7.app/vdj/reference": "VGene" },
20
+ "annotations": {
21
+ "pl7.app/label": "Best V gene"
22
+ }
23
+ }
24
+ },
25
+ {
26
+ "column": "jGene",
27
+ "spec": {
28
+ "name": "pl7.app/vdj/geneHit",
29
+ "type": "String",
30
+ "domain": { "pl7.app/vdj/reference": "JGene" },
31
+ "annotations": {
32
+ "pl7.app/label": "Best J gene"
33
+ }
34
+ }
35
+ }
36
+ ],
37
+ "columns": [
38
+ {
39
+ "column": "count",
40
+ "spec": {
41
+ "name": "pl7.app/vdj/vjGeneUsage",
42
+ "valueType": "Int",
43
+ "annotations": { "pl7.app/label": "V/J usage" }
44
+ }
45
+ }
46
+ ],
47
+ "storageFormat": "Binary",
48
+ "partitionKeyLength": 0
49
+ }
50
+ }
51
+
52
+ export ll.toStrict({
53
+ getColumns: getColumns
54
+ })