@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow 1.19.7 → 1.19.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.7 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
3
+ > @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.9 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow
2
2
 
3
+ ## 1.19.9
4
+
5
+ ### Patch Changes
6
+
7
+ - 2149d28: Fix column naming for range assembling features (e.g. CDR1:CDR3, FR2:FR4) without imputation.
8
+
9
+ When using a range assembling feature without "Impute non-covered part", the workflow would fail with
10
+ "column nSeqVDJRegion does not exist in export" because VDJRegion is never exported for non-full-range features.
11
+
12
+ Changes:
13
+
14
+ - Use the assembling feature itself as clonotype key column when VDJRegion is unavailable
15
+ - Fix column naming to match MiXCR output format (e.g. `CDR1_TO_FR4` instead of `{CDR1Begin:FR4End}`)
16
+ - Add unit tests covering column naming for all assembling feature variants with/without imputation
17
+
18
+ ## 1.19.8
19
+
20
+ ### Patch Changes
21
+
22
+ - cd0f414: Support custom assembling feature and imputation in amplicon alignment
23
+
3
24
  ## 1.19.7
4
25
 
5
26
  ### Patch Changes
@@ -37,11 +37,6 @@ toCombinedDomainValue := func(spec) {
37
37
 
38
38
 
39
39
 
40
- assemblingFeature := "VDJRegion"
41
- productiveFeature := "VDJRegion"
42
- coreVFeature := "{FR1Begin:FR3End}"
43
- coreJFeature := "FR4"
44
- splitByC := false
45
40
 
46
41
 
47
42
  formatId := func(input) {
@@ -57,22 +52,142 @@ addSpec := func(columns, additionalSpec) {
57
52
  })
58
53
  }
59
54
 
55
+ parseAssemblingFeature := func(assemblingFeature) {
56
+ if assemblingFeature == "VDJRegion" || assemblingFeature == "CDR3" {
57
+ return {
58
+ imputed: [],
59
+ nonImputed: assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"],
60
+ coreGeneFeatures: {
61
+ V: "{FR1Begin:FR3End}",
62
+ J: "FR4"
63
+ }
64
+ }
65
+ }
66
+
67
+ be := text.split(assemblingFeature, ":")
68
+ if len(be) != 2 {
69
+ ll.panic("assemblingFeature must be in the format of 'begin:end', got " + assemblingFeature)
70
+ }
71
+ begin := be[0]
72
+ end := be[1]
73
+
74
+ features := ["FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"]
75
+
76
+ iBegin := -1
77
+ iEnd := -1
78
+
79
+ for i, f in features {
80
+ if f == begin {
81
+ iBegin = i
82
+ }
83
+ if f == end {
84
+ iEnd = i
85
+ }
86
+ }
87
+
88
+ if iBegin == -1 || iEnd == -1 {
89
+ ll.panic("begin or end not found in features: " + assemblingFeature)
90
+ }
91
+
92
+ if iBegin > iEnd {
93
+ ll.panic("begin is after end: " + assemblingFeature)
94
+ }
95
+
96
+ imputed := []
97
+ nonImputed := []
98
+
99
+ for i := 0; i < iBegin; i++ {
100
+ imputed = append(imputed, features[i])
101
+ }
102
+
103
+ for i := iEnd + 1; i < len(features); i++ {
104
+ imputed = append(imputed, features[i])
105
+ }
106
+
107
+ for i := iBegin; i <= iEnd; i++ {
108
+ nonImputed = append(nonImputed, features[i])
109
+ }
110
+
111
+ coreVFeature := undefined
112
+ coreJFeature := undefined
113
+
114
+ if begin != "CDR3" {
115
+ coreVFeature = "{"+begin+"Begin:FR3End}"
116
+ }
117
+
118
+ if end == "FR4" {
119
+ coreJFeature = "FR4"
120
+ }
121
+
122
+ if begin == "FR1" && end == "FR4" {
123
+ nonImputed = append(nonImputed, "VDJRegion")
124
+ } else {
125
+ imputed = append(imputed, "VDJRegion")
126
+ }
127
+
128
+ return {
129
+ imputed: imputed,
130
+ nonImputed: nonImputed,
131
+ coreGeneFeatures: {
132
+ V: coreVFeature,
133
+ J: coreJFeature
134
+ }
135
+ }
136
+ }
137
+
60
138
  calculateExportSpecs := func(presetSpecForBack, blockId) {
61
139
 
62
- assemblingFeature = presetSpecForBack.assemblingFeature
140
+ assemblingFeature := presetSpecForBack.assemblingFeature
141
+ imputeGermline := presetSpecForBack.imputeGermline
142
+ if is_undefined(imputeGermline) {
143
+ imputeGermline = false
144
+ }
63
145
 
64
146
  splitByC := false
65
147
 
66
- productiveFeature := assemblingFeature
67
- coreGeneFeatures := {
68
- V: "{FR1Begin:FR3End}",
69
- J: "FR4"
148
+ parsedFeature := parseAssemblingFeature(assemblingFeature)
149
+
150
+ imputedFeaturesMap := {}
151
+ for f in parsedFeature.imputed {
152
+ imputedFeaturesMap[f] = true
153
+ }
154
+
155
+ formatAssemblingFeature := func(fstr) {
156
+ if fstr == "VDJRegion" || fstr == "CDR3" {
157
+ return fstr
158
+ }
159
+ parts := text.split(fstr, ":")
160
+ if len(parts) == 1 {
161
+ return "{" + parts[0] + "Begin:" + parts[0] + "End}"
162
+ }
163
+ return "{" + parts[0] + "Begin:" + parts[1] + "End}"
164
+ }
165
+
166
+ productiveFeature := formatAssemblingFeature(assemblingFeature)
167
+
168
+
169
+
170
+
171
+
172
+
173
+ outputProductiveFeature := productiveFeature
174
+ if assemblingFeature != "VDJRegion" && assemblingFeature != "CDR3" {
175
+ parts := text.split(assemblingFeature, ":")
176
+ if len(parts) == 2 && parts[1] == "FR4" {
177
+
178
+ outputProductiveFeature = parts[0] + "_TO_FR4"
179
+ }
70
180
  }
181
+
182
+ coreGeneFeatures := parsedFeature.coreGeneFeatures
71
183
 
72
184
 
73
185
  anchorFeature := assemblingFeature
74
186
 
75
- features := assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"]
187
+ features := parsedFeature.nonImputed
188
+ if imputeGermline {
189
+ features = features + parsedFeature.imputed
190
+ }
76
191
 
77
192
  clonotypeKeyColumns := []
78
193
  clonotypeKeyArgs := []
@@ -84,18 +199,67 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
84
199
  [ "-jGene" ]
85
200
  ]
86
201
  } else {
87
- clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
88
- clonotypeKeyArgs = [
89
- [ "-nFeature", "VDJRegion" ],
90
- [ "-vGene" ],
91
- [ "-jGene" ]
92
- ]
202
+
203
+
204
+ vdjIsAssemblingFeature := is_undefined(imputedFeaturesMap["VDJRegion"])
205
+
206
+ if vdjIsAssemblingFeature {
207
+
208
+ clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
209
+ clonotypeKeyArgs = [
210
+ [ "-nFeature", "VDJRegion" ],
211
+ [ "-vGene" ],
212
+ [ "-jGene" ]
213
+ ]
214
+ } else {
215
+
216
+
217
+
218
+
219
+
220
+ keyColName := "nSeq" + outputProductiveFeature
221
+ clonotypeKeyColumns = [keyColName, "bestVGene", "bestJGene"]
222
+ clonotypeKeyArgs = [
223
+ [ "-nFeature", productiveFeature ],
224
+ [ "-vGene" ],
225
+ [ "-jGene" ]
226
+ ]
227
+ }
93
228
  }
94
229
 
95
230
  columnsSpecPerSample := []
96
231
  columnsSpecPerClonotypeNoAggregates := []
97
232
  mutationColumns := []
98
233
 
234
+
235
+
236
+
237
+ needsAssemblingFeatureExport := assemblingFeature != "CDR3" && assemblingFeature != "VDJRegion" && !is_undefined(imputedFeaturesMap["VDJRegion"])
238
+ if needsAssemblingFeatureExport {
239
+ featureIdL := text.to_lower(formatId(assemblingFeature))
240
+ keyColName := "nSeq" + outputProductiveFeature
241
+ columnsSpecPerClonotypeNoAggregates += [ {
242
+ column: keyColName,
243
+ id: "n-seq-" + featureIdL,
244
+ naRegex: "region_not_covered",
245
+ spec: {
246
+ name: "pl7.app/vdj/sequence",
247
+ valueType: "String",
248
+ domain: {
249
+ "pl7.app/vdj/feature": outputProductiveFeature,
250
+ "pl7.app/alphabet": "nucleotide"
251
+ },
252
+ annotations: a(80100, false, {
253
+ "pl7.app/vdj/isAssemblingFeature": "true",
254
+ "pl7.app/vdj/isMainSequence": "false",
255
+ "pl7.app/vdj/imputed": "false",
256
+ "pl7.app/table/fontFamily": "monospace",
257
+ "pl7.app/label": outputProductiveFeature + " nt"
258
+ })
259
+ }
260
+ } ]
261
+ }
262
+
99
263
  clonotypeLabelColumn := {
100
264
  column: "clonotypeLabel",
101
265
  id: "clonotype-label",
@@ -113,6 +277,11 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
113
277
  exportArgs := []
114
278
 
115
279
 
280
+ if needsAssemblingFeatureExport {
281
+ exportArgs += [ [ "-nFeature", productiveFeature ] ]
282
+ }
283
+
284
+
116
285
 
117
286
  hasUmi := !is_undefined(presetSpecForBack) && !is_undefined(presetSpecForBack.umiTags) && len(presetSpecForBack.umiTags) > 0
118
287
 
@@ -314,100 +483,113 @@ inFrameFeatures := {
314
483
  "CDR3": "CDR3"
315
484
  }
316
485
 
317
- for featureU in features {
318
- featureL := text.to_lower(formatId(featureU))
319
- for isAminoAcid in [true, false] {
320
- featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
321
- if is_undefined(featureInFrameU) {
322
- featureInFrameU = featureU
323
- }
324
- featureInFrameL := text.to_lower(formatId(featureInFrameU))
486
+ for isImputed in [false, true] {
487
+ featuresList := isImputed ? parsedFeature.imputed : parsedFeature.nonImputed
488
+ if len(featuresList) == 0 {
489
+ continue
490
+ }
491
+ if isImputed && !imputeGermline {
492
+ continue
493
+ }
325
494
 
326
- alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
327
- alphabetShort := isAminoAcid ? "aa" : "nt"
328
- alphabetShortMixcr := isAminoAcid ? "aa" : "n"
329
- columnName := alphabetShortMixcr + "Seq" + featureInFrameU
330
- visibility := featureU == "VDJRegion" || featureU == "CDR3"
331
- if featureU == "CDR3" {
332
- cdr3SeqColumns += [ columnName ]
333
- }
334
- if isAminoAcid {
335
- aminoAcidSeqColumns += [ columnName ]
336
- aminoAcidSeqColumnPairs += [ {
337
- aa: columnName,
338
- nt: "nSeq" + featureU
339
- } ]
340
- }
341
- columnsSpecPerClonotypeNoAggregates += [ {
342
- column: columnName,
343
- id: alphabetShortMixcr + "-seq-" + featureInFrameL,
344
- naRegex: "region_not_covered",
345
- spec: {
346
- name: "pl7.app/vdj/sequence",
347
- valueType: "String",
348
- domain: {
349
- "pl7.app/vdj/feature": featureInFrameU,
350
- "pl7.app/alphabet": alphabet
351
- },
352
- annotations: a(orderP, visibility, {
353
- "pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
354
- "pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
355
- "pl7.app/vdj/imputed": "false",
356
- "pl7.app/table/fontFamily": "monospace",
357
- "pl7.app/label": featureInFrameU + " " + alphabetShort
358
- })
359
- }
360
- } ]
361
- exportArgs += [ [ "-" + alphabetShortMixcr + "Feature", featureInFrameU ] ]
362
- orderP -= 100
363
-
495
+ imputedU := isImputed ? "Imputed" : ""
496
+ imputedL := text.to_lower(imputedU)
364
497
 
365
- if featureU == assemblingFeature {
366
- for annotationType in annotationTypes {
367
- columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
368
- columnsSpecPerClonotypeNoAggregates += [ {
498
+ for featureU in featuresList {
499
+ featureL := text.to_lower(formatId(featureU))
500
+ for isAminoAcid in [true, false] {
501
+ featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
502
+ if is_undefined(featureInFrameU) {
503
+ featureInFrameU = featureU
504
+ }
505
+ featureInFrameL := text.to_lower(formatId(featureInFrameU))
506
+
507
+ alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
508
+ alphabetShort := isAminoAcid ? "aa" : "nt"
509
+ alphabetShortMixcr := isAminoAcid ? "aa" : "n"
510
+ columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
511
+ visibility := featureU == "VDJRegion" || featureU == "CDR3"
512
+ if featureU == "CDR3" {
513
+ cdr3SeqColumns += [ columnName ]
514
+ }
515
+ if isAminoAcid {
516
+ aminoAcidSeqColumns += [ columnName ]
517
+ aminoAcidSeqColumnPairs += [ {
518
+ aa: columnName,
519
+ nt: "nSeq" + imputedU + featureU
520
+ } ]
521
+ }
522
+ columnsSpecPerClonotypeNoAggregates += [ {
369
523
  column: columnName,
370
- id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
524
+ id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
371
525
  naRegex: "region_not_covered",
372
526
  spec: {
373
- name: "pl7.app/vdj/sequence/annotation",
527
+ name: "pl7.app/vdj/sequence",
374
528
  valueType: "String",
375
529
  domain: {
376
530
  "pl7.app/vdj/feature": featureInFrameU,
377
- "pl7.app/alphabet": alphabet,
378
- "pl7.app/sequence/annotation/type": annotationType
531
+ "pl7.app/alphabet": alphabet
379
532
  },
380
- annotations: a(orderP, undefined, {
381
- "pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
382
- "pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
383
- "pl7.app/sequence/isAnnotation": "true"
533
+ annotations: a(orderP, visibility, {
534
+ "pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
535
+ "pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
536
+ "pl7.app/vdj/imputed": string(isImputed),
537
+ "pl7.app/table/fontFamily": "monospace",
538
+ "pl7.app/label": (isImputed ? "Imputed " : "") + featureInFrameU + " " + alphabetShort
384
539
  })
385
540
  }
386
541
  } ]
387
- exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
388
- orderP -= 100
389
- }
390
- }
391
-
542
+ exportArgs += [ [ "-" + alphabetShortMixcr + "Feature" + imputedU, featureInFrameU ] ]
543
+ orderP -= 100
544
+
392
545
 
393
- if featureU == "CDR3" {
394
- columnsSpecPerClonotypeNoAggregates += [ {
395
- column: alphabetShortMixcr + "Length" + featureU,
396
- id: alphabetShortMixcr + "-length-" + featureL,
397
- naRegex: "region_not_covered",
398
- spec: {
399
- name: "pl7.app/vdj/sequenceLength",
400
- valueType: "Int",
401
- domain: {
402
- "pl7.app/vdj/feature": featureU,
403
- "pl7.app/alphabet": alphabet
404
- },
405
- annotations: a(orderP, false, {
406
- "pl7.app/label": "Length of " + featureU + " " + alphabetShort
407
- })
546
+ if !isImputed && featureU == assemblingFeature {
547
+ for annotationType in annotationTypes {
548
+ columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
549
+ columnsSpecPerClonotypeNoAggregates += [ {
550
+ column: columnName,
551
+ id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
552
+ naRegex: "region_not_covered",
553
+ spec: {
554
+ name: "pl7.app/vdj/sequence/annotation",
555
+ valueType: "String",
556
+ domain: {
557
+ "pl7.app/vdj/feature": featureInFrameU,
558
+ "pl7.app/alphabet": alphabet,
559
+ "pl7.app/sequence/annotation/type": annotationType
560
+ },
561
+ annotations: a(orderP, undefined, {
562
+ "pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
563
+ "pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
564
+ "pl7.app/sequence/isAnnotation": "true"
565
+ })
566
+ }
567
+ } ]
568
+ exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
569
+ orderP -= 100
408
570
  }
409
- } ]
410
- exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
571
+ }
572
+
573
+
574
+ if !isImputed && featureU == "CDR3" {
575
+ columnsSpecPerClonotypeNoAggregates += [ {
576
+ column: alphabetShortMixcr + "Length" + featureU,
577
+ id: alphabetShortMixcr + "-length-" + featureL,
578
+ naRegex: "region_not_covered",
579
+ spec: {
580
+ name: "pl7.app/vdj/sequenceLength",
581
+ valueType: "Int",
582
+ domain: {
583
+ "pl7.app/vdj/feature": featureU,
584
+ "pl7.app/alphabet": alphabet
585
+ },
586
+ annotations: a(orderP, false, {
587
+ "pl7.app/label": "Length of " + featureU + " " + alphabetShort
588
+ })
589
+ }
590
+ } ]
591
+ exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
592
+ }
411
593
  }
412
594
  }
413
595
  }
@@ -510,7 +692,7 @@ inFrameFeatures := {
510
692
  }
511
693
 
512
694
 
513
- if assemblingFeature == "VDJRegion" {
695
+ if assemblingFeature == "VDJRegion" || assemblingFeature == "FR1:FR4" {
514
696
  orderP = 9500
515
697
 
516
698
 
@@ -793,11 +975,11 @@ inFrameFeatures := {
793
975
  visibility: false
794
976
  }
795
977
  ]
796
- mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
978
+ mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + outputProductiveFeature
797
979
  mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
798
980
  for variant in flagColumnVariants {
799
981
  columnsSpecPerClonotypeNoAggregates += [ {
800
- column: variant.columnPrefix + productiveFeature,
982
+ column: variant.columnPrefix + outputProductiveFeature,
801
983
  id: variant.id,
802
984
  allowNA: false,
803
985
  spec: {
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow",
3
- "version": "1.19.7",
3
+ "version": "1.19.9",
4
4
  "description": "MiXCR Amplicon Alignment Workflow",
5
5
  "type": "module",
6
6
  "dependencies": {
@@ -37,11 +37,6 @@ toCombinedDomainValue := func(spec) {
37
37
  * - V: formatted identifier for the V gene core feature
38
38
  * - J: formatted identifier for the J gene core feature (if applicable)
39
39
  */
40
- assemblingFeature := "VDJRegion"
41
- productiveFeature := "VDJRegion"
42
- coreVFeature := "{FR1Begin:FR3End}"
43
- coreJFeature := "FR4"
44
- splitByC := false
45
40
 
46
41
  // sometimes we need to format assembling feature to be used in column ids
47
42
  formatId := func(input) {
@@ -57,22 +52,142 @@ addSpec := func(columns, additionalSpec) {
57
52
  })
58
53
  }
59
54
 
55
+ parseAssemblingFeature := func(assemblingFeature) {
56
+ if assemblingFeature == "VDJRegion" || assemblingFeature == "CDR3" {
57
+ return {
58
+ imputed: [],
59
+ nonImputed: assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"],
60
+ coreGeneFeatures: {
61
+ V: "{FR1Begin:FR3End}",
62
+ J: "FR4"
63
+ }
64
+ }
65
+ }
66
+
67
+ be := text.split(assemblingFeature, ":")
68
+ if len(be) != 2 {
69
+ ll.panic("assemblingFeature must be in the format of 'begin:end', got " + assemblingFeature)
70
+ }
71
+ begin := be[0]
72
+ end := be[1]
73
+
74
+ features := ["FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"]
75
+
76
+ iBegin := -1
77
+ iEnd := -1
78
+
79
+ for i, f in features {
80
+ if f == begin {
81
+ iBegin = i
82
+ }
83
+ if f == end {
84
+ iEnd = i
85
+ }
86
+ }
87
+
88
+ if iBegin == -1 || iEnd == -1 {
89
+ ll.panic("begin or end not found in features: " + assemblingFeature)
90
+ }
91
+
92
+ if iBegin > iEnd {
93
+ ll.panic("begin is after end: " + assemblingFeature)
94
+ }
95
+
96
+ imputed := []
97
+ nonImputed := []
98
+
99
+ for i := 0; i < iBegin; i++ {
100
+ imputed = append(imputed, features[i])
101
+ }
102
+
103
+ for i := iEnd + 1; i < len(features); i++ {
104
+ imputed = append(imputed, features[i])
105
+ }
106
+
107
+ for i := iBegin; i <= iEnd; i++ {
108
+ nonImputed = append(nonImputed, features[i])
109
+ }
110
+
111
+ coreVFeature := undefined
112
+ coreJFeature := undefined
113
+
114
+ if begin != "CDR3" {
115
+ coreVFeature = "{"+begin+"Begin:FR3End}"
116
+ }
117
+
118
+ if end == "FR4" {
119
+ coreJFeature = "FR4"
120
+ }
121
+
122
+ if begin == "FR1" && end == "FR4" {
123
+ nonImputed = append(nonImputed, "VDJRegion")
124
+ } else {
125
+ imputed = append(imputed, "VDJRegion")
126
+ }
127
+
128
+ return {
129
+ imputed: imputed,
130
+ nonImputed: nonImputed,
131
+ coreGeneFeatures: {
132
+ V: coreVFeature,
133
+ J: coreJFeature
134
+ }
135
+ }
136
+ }
137
+
60
138
  calculateExportSpecs := func(presetSpecForBack, blockId) {
61
139
 
62
- assemblingFeature = presetSpecForBack.assemblingFeature
140
+ assemblingFeature := presetSpecForBack.assemblingFeature
141
+ imputeGermline := presetSpecForBack.imputeGermline
142
+ if is_undefined(imputeGermline) {
143
+ imputeGermline = false
144
+ }
63
145
 
64
146
  splitByC := false
65
147
 
66
- productiveFeature := assemblingFeature
67
- coreGeneFeatures := {
68
- V: "{FR1Begin:FR3End}",
69
- J: "FR4"
148
+ parsedFeature := parseAssemblingFeature(assemblingFeature)
149
+
150
+ imputedFeaturesMap := {}
151
+ for f in parsedFeature.imputed {
152
+ imputedFeaturesMap[f] = true
70
153
  }
71
154
 
155
+ formatAssemblingFeature := func(fstr) {
156
+ if fstr == "VDJRegion" || fstr == "CDR3" {
157
+ return fstr
158
+ }
159
+ parts := text.split(fstr, ":")
160
+ if len(parts) == 1 {
161
+ return "{" + parts[0] + "Begin:" + parts[0] + "End}"
162
+ }
163
+ return "{" + parts[0] + "Begin:" + parts[1] + "End}"
164
+ }
165
+
166
+ productiveFeature := formatAssemblingFeature(assemblingFeature)
167
+
168
+ // MiXCR column naming for range features:
169
+ // - Ranges ending at FR4 have named aliases: CDR1_TO_FR4, FR2_TO_FR4, CDR2_TO_FR4, FR3_TO_FR4
170
+ // (defined in repseqio GeneFeature.java)
171
+ // - All other ranges use {XBegin:YEnd} format (e.g. {CDR1Begin:CDR3End})
172
+ // - Simple features (CDR3, VDJRegion) use their name directly
173
+ outputProductiveFeature := productiveFeature
174
+ if assemblingFeature != "VDJRegion" && assemblingFeature != "CDR3" {
175
+ parts := text.split(assemblingFeature, ":")
176
+ if len(parts) == 2 && parts[1] == "FR4" {
177
+ // MiXCR has a named alias for this range
178
+ outputProductiveFeature = parts[0] + "_TO_FR4"
179
+ }
180
+ }
181
+
182
+ coreGeneFeatures := parsedFeature.coreGeneFeatures
183
+
72
184
  // column with nucleotide sequence of this feature will be marked as anchor
73
185
  anchorFeature := assemblingFeature
74
186
 
75
- features := assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"]
187
+ features := parsedFeature.nonImputed
188
+ if imputeGermline {
189
+ features = features + parsedFeature.imputed
190
+ }
76
191
 
77
192
  clonotypeKeyColumns := []
78
193
  clonotypeKeyArgs := []
@@ -84,18 +199,67 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
84
199
  [ "-jGene" ]
85
200
  ]
86
201
  } else {
87
- clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
88
- clonotypeKeyArgs = [
89
- [ "-nFeature", "VDJRegion" ],
90
- [ "-vGene" ],
91
- [ "-jGene" ]
92
- ]
202
+ // VDJRegion is the assembling feature itself only when it's NOT in the imputed list
203
+ // (e.g. VDJRegion or FR1:FR4 as the assembling feature)
204
+ vdjIsAssemblingFeature := is_undefined(imputedFeaturesMap["VDJRegion"])
205
+
206
+ if vdjIsAssemblingFeature {
207
+ // VDJRegion IS the assembling feature, use it directly as the key
208
+ clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
209
+ clonotypeKeyArgs = [
210
+ [ "-nFeature", "VDJRegion" ],
211
+ [ "-vGene" ],
212
+ [ "-jGene" ]
213
+ ]
214
+ } else {
215
+ // Range feature where VDJRegion is NOT the assembling feature (e.g. CDR1:CDR3, FR2:FR4)
216
+ // Always use the assembling feature itself as the key, even with imputation enabled.
217
+ // Imputed VDJRegion is NOT guaranteed unique per clone (two clones with different
218
+ // assembling feature sequences can produce the same imputed VDJRegion).
219
+ // The assembling feature sequence IS unique by definition (it defines the clone).
220
+ keyColName := "nSeq" + outputProductiveFeature
221
+ clonotypeKeyColumns = [keyColName, "bestVGene", "bestJGene"]
222
+ clonotypeKeyArgs = [
223
+ [ "-nFeature", productiveFeature ],
224
+ [ "-vGene" ],
225
+ [ "-jGene" ]
226
+ ]
227
+ }
93
228
  }
94
229
 
95
230
  columnsSpecPerSample := []
96
231
  columnsSpecPerClonotypeNoAggregates := []
97
232
  mutationColumns := []
98
233
 
234
+ // For range features where VDJRegion is not the assembling feature, we need to export
235
+ // the combined assembling feature sequence column explicitly (individual features are
236
+ // exported in the loop below, but the combined feature like {CDR1Begin:CDR3End} is not)
237
+ needsAssemblingFeatureExport := assemblingFeature != "CDR3" && assemblingFeature != "VDJRegion" && !is_undefined(imputedFeaturesMap["VDJRegion"])
238
+ if needsAssemblingFeatureExport {
239
+ featureIdL := text.to_lower(formatId(assemblingFeature))
240
+ keyColName := "nSeq" + outputProductiveFeature
241
+ columnsSpecPerClonotypeNoAggregates += [ {
242
+ column: keyColName,
243
+ id: "n-seq-" + featureIdL,
244
+ naRegex: "region_not_covered",
245
+ spec: {
246
+ name: "pl7.app/vdj/sequence",
247
+ valueType: "String",
248
+ domain: {
249
+ "pl7.app/vdj/feature": outputProductiveFeature,
250
+ "pl7.app/alphabet": "nucleotide"
251
+ },
252
+ annotations: a(80100, false, {
253
+ "pl7.app/vdj/isAssemblingFeature": "true",
254
+ "pl7.app/vdj/isMainSequence": "false",
255
+ "pl7.app/vdj/imputed": "false",
256
+ "pl7.app/table/fontFamily": "monospace",
257
+ "pl7.app/label": outputProductiveFeature + " nt"
258
+ })
259
+ }
260
+ } ]
261
+ }
262
+
99
263
  clonotypeLabelColumn := {
100
264
  column: "clonotypeLabel",
101
265
  id: "clonotype-label",
@@ -112,6 +276,11 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
112
276
  // array of array of arg groups
113
277
  exportArgs := []
114
278
 
279
+ // Add the assembling feature export arg if needed (column spec was added above)
280
+ if needsAssemblingFeatureExport {
281
+ exportArgs += [ [ "-nFeature", productiveFeature ] ]
282
+ }
283
+
115
284
  // Abundance - reads by default; switch to UMI columns if umiTags are present
116
285
 
117
286
  hasUmi := !is_undefined(presetSpecForBack) && !is_undefined(presetSpecForBack.umiTags) && len(presetSpecForBack.umiTags) > 0
@@ -314,100 +483,113 @@ inFrameFeatures := {
314
483
  "CDR3": "CDR3"
315
484
  }
316
485
 
317
- for featureU in features {
318
- featureL := text.to_lower(formatId(featureU))
319
- for isAminoAcid in [true, false] {
320
- featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
321
- if is_undefined(featureInFrameU) {
322
- featureInFrameU = featureU
323
- }
324
- featureInFrameL := text.to_lower(formatId(featureInFrameU))
486
+ for isImputed in [false, true] {
487
+ featuresList := isImputed ? parsedFeature.imputed : parsedFeature.nonImputed
488
+ if len(featuresList) == 0 {
489
+ continue
490
+ }
491
+ if isImputed && !imputeGermline {
492
+ continue
493
+ }
325
494
 
326
- alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
327
- alphabetShort := isAminoAcid ? "aa" : "nt"
328
- alphabetShortMixcr := isAminoAcid ? "aa" : "n"
329
- columnName := alphabetShortMixcr + "Seq" + featureInFrameU
330
- visibility := featureU == "VDJRegion" || featureU == "CDR3"
331
- if featureU == "CDR3" {
332
- cdr3SeqColumns += [ columnName ]
333
- }
334
- if isAminoAcid {
335
- aminoAcidSeqColumns += [ columnName ]
336
- aminoAcidSeqColumnPairs += [ {
337
- aa: columnName,
338
- nt: "nSeq" + featureU
339
- } ]
340
- }
341
- columnsSpecPerClonotypeNoAggregates += [ {
342
- column: columnName,
343
- id: alphabetShortMixcr + "-seq-" + featureInFrameL,
344
- naRegex: "region_not_covered",
345
- spec: {
346
- name: "pl7.app/vdj/sequence",
347
- valueType: "String",
348
- domain: {
349
- "pl7.app/vdj/feature": featureInFrameU,
350
- "pl7.app/alphabet": alphabet
351
- },
352
- annotations: a(orderP, visibility, {
353
- "pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
354
- "pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
355
- "pl7.app/vdj/imputed": "false",
356
- "pl7.app/table/fontFamily": "monospace",
357
- "pl7.app/label": featureInFrameU + " " + alphabetShort
358
- })
359
- }
360
- } ]
361
- exportArgs += [ [ "-" + alphabetShortMixcr + "Feature", featureInFrameU ] ]
362
- orderP -= 100
363
-
364
- // Adding sequence annotation columns for assembling feature
365
- if featureU == assemblingFeature {
366
- for annotationType in annotationTypes {
367
- columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
368
- columnsSpecPerClonotypeNoAggregates += [ {
495
+ imputedU := isImputed ? "Imputed" : ""
496
+ imputedL := text.to_lower(imputedU)
497
+
498
+ for featureU in featuresList {
499
+ featureL := text.to_lower(formatId(featureU))
500
+ for isAminoAcid in [true, false] {
501
+ featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
502
+ if is_undefined(featureInFrameU) {
503
+ featureInFrameU = featureU
504
+ }
505
+ featureInFrameL := text.to_lower(formatId(featureInFrameU))
506
+
507
+ alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
508
+ alphabetShort := isAminoAcid ? "aa" : "nt"
509
+ alphabetShortMixcr := isAminoAcid ? "aa" : "n"
510
+ columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
511
+ visibility := featureU == "VDJRegion" || featureU == "CDR3"
512
+ if featureU == "CDR3" {
513
+ cdr3SeqColumns += [ columnName ]
514
+ }
515
+ if isAminoAcid {
516
+ aminoAcidSeqColumns += [ columnName ]
517
+ aminoAcidSeqColumnPairs += [ {
518
+ aa: columnName,
519
+ nt: "nSeq" + imputedU + featureU
520
+ } ]
521
+ }
522
+ columnsSpecPerClonotypeNoAggregates += [ {
369
523
  column: columnName,
370
- id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
524
+ id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
371
525
  naRegex: "region_not_covered",
372
526
  spec: {
373
- name: "pl7.app/vdj/sequence/annotation",
527
+ name: "pl7.app/vdj/sequence",
374
528
  valueType: "String",
375
529
  domain: {
376
530
  "pl7.app/vdj/feature": featureInFrameU,
377
- "pl7.app/alphabet": alphabet,
378
- "pl7.app/sequence/annotation/type": annotationType
531
+ "pl7.app/alphabet": alphabet
379
532
  },
380
- annotations: a(orderP, undefined, {
381
- "pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
382
- "pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
383
- "pl7.app/sequence/isAnnotation": "true"
533
+ annotations: a(orderP, visibility, {
534
+ "pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
535
+ "pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
536
+ "pl7.app/vdj/imputed": string(isImputed),
537
+ "pl7.app/table/fontFamily": "monospace",
538
+ "pl7.app/label": (isImputed ? "Imputed " : "") + featureInFrameU + " " + alphabetShort
384
539
  })
385
540
  }
386
541
  } ]
387
- exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
388
- orderP -= 100
389
- }
390
- }
391
-
392
- // For now calculate length only for CDR3 to keep the number of columns manageable
393
- if featureU == "CDR3" {
394
- columnsSpecPerClonotypeNoAggregates += [ {
395
- column: alphabetShortMixcr + "Length" + featureU,
396
- id: alphabetShortMixcr + "-length-" + featureL,
397
- naRegex: "region_not_covered",
398
- spec: {
399
- name: "pl7.app/vdj/sequenceLength",
400
- valueType: "Int",
401
- domain: {
402
- "pl7.app/vdj/feature": featureU,
403
- "pl7.app/alphabet": alphabet
404
- },
405
- annotations: a(orderP, false, {
406
- "pl7.app/label": "Length of " + featureU + " " + alphabetShort
407
- })
542
+ exportArgs += [ [ "-" + alphabetShortMixcr + "Feature" + imputedU, featureInFrameU ] ]
543
+ orderP -= 100
544
+
545
+ // Adding sequence annotation columns for assembling feature
546
+ if !isImputed && featureU == assemblingFeature {
547
+ for annotationType in annotationTypes {
548
+ columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
549
+ columnsSpecPerClonotypeNoAggregates += [ {
550
+ column: columnName,
551
+ id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
552
+ naRegex: "region_not_covered",
553
+ spec: {
554
+ name: "pl7.app/vdj/sequence/annotation",
555
+ valueType: "String",
556
+ domain: {
557
+ "pl7.app/vdj/feature": featureInFrameU,
558
+ "pl7.app/alphabet": alphabet,
559
+ "pl7.app/sequence/annotation/type": annotationType
560
+ },
561
+ annotations: a(orderP, undefined, {
562
+ "pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
563
+ "pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
564
+ "pl7.app/sequence/isAnnotation": "true"
565
+ })
566
+ }
567
+ } ]
568
+ exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
569
+ orderP -= 100
408
570
  }
409
- } ]
410
- exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
571
+ }
572
+
573
+ // For now calculate length only for CDR3 to keep the number of columns manageable
574
+ if !isImputed && featureU == "CDR3" {
575
+ columnsSpecPerClonotypeNoAggregates += [ {
576
+ column: alphabetShortMixcr + "Length" + featureU,
577
+ id: alphabetShortMixcr + "-length-" + featureL,
578
+ naRegex: "region_not_covered",
579
+ spec: {
580
+ name: "pl7.app/vdj/sequenceLength",
581
+ valueType: "Int",
582
+ domain: {
583
+ "pl7.app/vdj/feature": featureU,
584
+ "pl7.app/alphabet": alphabet
585
+ },
586
+ annotations: a(orderP, false, {
587
+ "pl7.app/label": "Length of " + featureU + " " + alphabetShort
588
+ })
589
+ }
590
+ } ]
591
+ exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
592
+ }
411
593
  }
412
594
  }
413
595
  }
@@ -509,8 +691,8 @@ inFrameFeatures := {
509
691
  }
510
692
  }
511
693
 
512
- // All nucleotide mutations count for each feature (only for VDJRegion assembling feature)
513
- if assemblingFeature == "VDJRegion" {
694
+ // All nucleotide mutations count for each feature (only for VDJRegion or FR1:FR4 assembling feature)
695
+ if assemblingFeature == "VDJRegion" || assemblingFeature == "FR1:FR4" {
514
696
  orderP = 9500
515
697
 
516
698
  // MixCR -allNMutationsCount exports columns for all features between specified boundaries.
@@ -793,11 +975,11 @@ inFrameFeatures := {
793
975
  visibility: false
794
976
  }
795
977
  ]
796
- mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
978
+ mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + outputProductiveFeature
797
979
  mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
798
980
  for variant in flagColumnVariants {
799
981
  columnsSpecPerClonotypeNoAggregates += [ {
800
- column: variant.columnPrefix + productiveFeature,
982
+ column: variant.columnPrefix + outputProductiveFeature,
801
983
  id: variant.id,
802
984
  allowNA: false,
803
985
  spec: {
@@ -69,6 +69,7 @@ wf.body(func(args) {
69
69
  cloneClusteringMode: cloneClusteringMode,
70
70
  tagPattern: args.tagPattern,
71
71
  assemblingFeature: args.assemblingFeature,
72
+ imputeGermline: args.imputeGermline,
72
73
  badQualityThreshold: args.badQualityThreshold,
73
74
  stopCodonTypes: args.stopCodonTypes,
74
75
  stopCodonReplacements: args.stopCodonReplacements
@@ -11,6 +11,7 @@ assets := import("@platforma-sdk/workflow-tengo:assets")
11
11
  pcolumn := import("@platforma-sdk/workflow-tengo:pframes.pcolumn")
12
12
  times := import("times")
13
13
  maps := import("@platforma-sdk/workflow-tengo:maps")
14
+ text := import("text")
14
15
 
15
16
  json := import("json")
16
17
 
@@ -69,8 +70,19 @@ self.body(func(inputs) {
69
70
  mixcrCmdBuilder.arg("generic-amplicon")
70
71
  }
71
72
 
73
+ formatAssemblingFeature := func(fstr) {
74
+ if fstr == "VDJRegion" || fstr == "CDR3" {
75
+ return fstr
76
+ }
77
+ parts := text.split(fstr, ":")
78
+ if len(parts) == 1 {
79
+ return "{" + parts[0] + "Begin:" + parts[0] + "End}"
80
+ }
81
+ return "{" + parts[0] + "Begin:" + parts[1] + "End}"
82
+ }
83
+
72
84
  mixcrCmdBuilder.
73
- arg("--assemble-clonotypes-by").arg(params.assemblingFeature).
85
+ arg("--assemble-clonotypes-by").arg(formatAssemblingFeature(params.assemblingFeature)).
74
86
  arg("--species").arg("custom").
75
87
  arg("--library").arg("library.json").
76
88
  addFile("library.json", params.referenceLibrary).
@@ -90,6 +90,7 @@ self.body(func(inputs) {
90
90
  // Use calculateExportSpecs for output columns
91
91
  presetSpecForBack := {
92
92
  assemblingFeature: params.assemblingFeature,
93
+ imputeGermline: params.imputeGermline,
93
94
  splitByC: true,
94
95
  umiTags: hasUMI ? umiTags : undefined,
95
96
  cellTags: []
@@ -221,6 +222,7 @@ self.body(func(inputs) {
221
222
  hasUMI: hasUMI,
222
223
  tagPattern: tagPattern,
223
224
  assemblingFeature: params.assemblingFeature,
225
+ imputeGermline: params.imputeGermline,
224
226
  badQualityThreshold: params.badQualityThreshold
225
227
  }, { removeUndefs: true }),
226
228
  limitInput: limitInput