@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow 1.19.6 → 1.19.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.6 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
3
+ > @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.8 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow
2
2
 
3
+ ## 1.19.8
4
+
5
+ ### Patch Changes
6
+
7
+ - cd0f414: Support custom assembling feature and imputation in amplicon alignment
8
+
9
+ ## 1.19.7
10
+
11
+ ### Patch Changes
12
+
13
+ - Updated dependencies [0b08dfc]
14
+ - @platforma-open/milaboratories.mixcr-amplicon-alignment.software@1.2.0
15
+
3
16
  ## 1.19.6
4
17
 
5
18
  ### Patch Changes
@@ -37,11 +37,6 @@ toCombinedDomainValue := func(spec) {
37
37
 
38
38
 
39
39
 
40
- assemblingFeature := "VDJRegion"
41
- productiveFeature := "VDJRegion"
42
- coreVFeature := "{FR1Begin:FR3End}"
43
- coreJFeature := "FR4"
44
- splitByC := false
45
40
 
46
41
 
47
42
  formatId := func(input) {
@@ -57,22 +52,130 @@ addSpec := func(columns, additionalSpec) {
57
52
  })
58
53
  }
59
54
 
55
+ parseAssemblingFeature := func(assemblingFeature) {
56
+ if assemblingFeature == "VDJRegion" || assemblingFeature == "CDR3" {
57
+ return {
58
+ imputed: [],
59
+ nonImputed: assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"],
60
+ coreGeneFeatures: {
61
+ V: "{FR1Begin:FR3End}",
62
+ J: "FR4"
63
+ }
64
+ }
65
+ }
66
+
67
+ be := text.split(assemblingFeature, ":")
68
+ if len(be) != 2 {
69
+ ll.panic("assemblingFeature must be in the format of 'begin:end', got " + assemblingFeature)
70
+ }
71
+ begin := be[0]
72
+ end := be[1]
73
+
74
+ features := ["FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"]
75
+
76
+ iBegin := -1
77
+ iEnd := -1
78
+
79
+ for i, f in features {
80
+ if f == begin {
81
+ iBegin = i
82
+ }
83
+ if f == end {
84
+ iEnd = i
85
+ }
86
+ }
87
+
88
+ if iBegin == -1 || iEnd == -1 {
89
+ ll.panic("begin or end not found in features: " + assemblingFeature)
90
+ }
91
+
92
+ if iBegin > iEnd {
93
+ ll.panic("begin is after end: " + assemblingFeature)
94
+ }
95
+
96
+ imputed := []
97
+ nonImputed := []
98
+
99
+ for i := 0; i < iBegin; i++ {
100
+ imputed = append(imputed, features[i])
101
+ }
102
+
103
+ for i := iEnd + 1; i < len(features); i++ {
104
+ imputed = append(imputed, features[i])
105
+ }
106
+
107
+ for i := iBegin; i <= iEnd; i++ {
108
+ nonImputed = append(nonImputed, features[i])
109
+ }
110
+
111
+ coreVFeature := undefined
112
+ coreJFeature := undefined
113
+
114
+ if begin != "CDR3" {
115
+ coreVFeature = "{"+begin+"Begin:FR3End}"
116
+ }
117
+
118
+ if end == "FR4" {
119
+ coreJFeature = "FR4"
120
+ }
121
+
122
+ if begin == "FR1" && end == "FR4" {
123
+ nonImputed = append(nonImputed, "VDJRegion")
124
+ } else {
125
+ imputed = append(imputed, "VDJRegion")
126
+ }
127
+
128
+ return {
129
+ imputed: imputed,
130
+ nonImputed: nonImputed,
131
+ coreGeneFeatures: {
132
+ V: coreVFeature,
133
+ J: coreJFeature
134
+ }
135
+ }
136
+ }
137
+
60
138
  calculateExportSpecs := func(presetSpecForBack, blockId) {
61
139
 
62
- assemblingFeature = presetSpecForBack.assemblingFeature
140
+ assemblingFeature := presetSpecForBack.assemblingFeature
141
+ imputeGermline := presetSpecForBack.imputeGermline
142
+ if is_undefined(imputeGermline) {
143
+ imputeGermline = false
144
+ }
63
145
 
64
146
  splitByC := false
65
147
 
66
- productiveFeature := assemblingFeature
67
- coreGeneFeatures := {
68
- V: "{FR1Begin:FR3End}",
69
- J: "FR4"
148
+ parsedFeature := parseAssemblingFeature(assemblingFeature)
149
+
150
+ imputedFeaturesMap := {}
151
+ for f in parsedFeature.imputed {
152
+ imputedFeaturesMap[f] = true
70
153
  }
71
154
 
155
+ formatAssemblingFeature := func(fstr) {
156
+ if fstr == "VDJRegion" || fstr == "CDR3" {
157
+ return fstr
158
+ }
159
+ parts := text.split(fstr, ":")
160
+ if len(parts) == 1 {
161
+ return "{" + parts[0] + "Begin:" + parts[0] + "End}"
162
+ }
163
+ return "{" + parts[0] + "Begin:" + parts[1] + "End}"
164
+ }
165
+
166
+ productiveFeature := formatAssemblingFeature(assemblingFeature)
167
+
168
+ outputProductiveFeature := productiveFeature
169
+
170
+ coreGeneFeatures := parsedFeature.coreGeneFeatures
171
+
72
172
 
73
173
  anchorFeature := assemblingFeature
74
174
 
75
- features := assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"]
175
+ features := parsedFeature.nonImputed
176
+ if imputeGermline {
177
+ features = features + parsedFeature.imputed
178
+ }
76
179
 
77
180
  clonotypeKeyColumns := []
78
181
  clonotypeKeyArgs := []
@@ -84,9 +187,13 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
84
187
  [ "-jGene" ]
85
188
  ]
86
189
  } else {
87
- clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
190
+ isVdjImputed := !is_undefined(imputedFeaturesMap["VDJRegion"]) && imputeGermline
191
+ vdjColName := "nSeq" + (isVdjImputed ? "Imputed" : "") + "VDJRegion"
192
+ vdjArgLabel := "-nFeature" + (isVdjImputed ? "Imputed" : "")
193
+
194
+ clonotypeKeyColumns = [vdjColName, "bestVGene", "bestJGene"]
88
195
  clonotypeKeyArgs = [
89
- [ "-nFeature", "VDJRegion" ],
196
+ [ vdjArgLabel, "VDJRegion" ],
90
197
  [ "-vGene" ],
91
198
  [ "-jGene" ]
92
199
  ]
@@ -314,100 +421,113 @@ inFrameFeatures := {
314
421
  "CDR3": "CDR3"
315
422
  }
316
423
 
317
- for featureU in features {
318
- featureL := text.to_lower(formatId(featureU))
319
- for isAminoAcid in [true, false] {
320
- featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
321
- if is_undefined(featureInFrameU) {
322
- featureInFrameU = featureU
323
- }
324
- featureInFrameL := text.to_lower(formatId(featureInFrameU))
424
+ for isImputed in [false, true] {
425
+ featuresList := isImputed ? parsedFeature.imputed : parsedFeature.nonImputed
426
+ if len(featuresList) == 0 {
427
+ continue
428
+ }
429
+ if isImputed && !imputeGermline {
430
+ continue
431
+ }
325
432
 
326
- alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
327
- alphabetShort := isAminoAcid ? "aa" : "nt"
328
- alphabetShortMixcr := isAminoAcid ? "aa" : "n"
329
- columnName := alphabetShortMixcr + "Seq" + featureInFrameU
330
- visibility := featureU == "VDJRegion" || featureU == "CDR3"
331
- if featureU == "CDR3" {
332
- cdr3SeqColumns += [ columnName ]
333
- }
334
- if isAminoAcid {
335
- aminoAcidSeqColumns += [ columnName ]
336
- aminoAcidSeqColumnPairs += [ {
337
- aa: columnName,
338
- nt: "nSeq" + featureU
339
- } ]
340
- }
341
- columnsSpecPerClonotypeNoAggregates += [ {
342
- column: columnName,
343
- id: alphabetShortMixcr + "-seq-" + featureInFrameL,
344
- naRegex: "region_not_covered",
345
- spec: {
346
- name: "pl7.app/vdj/sequence",
347
- valueType: "String",
348
- domain: {
349
- "pl7.app/vdj/feature": featureInFrameU,
350
- "pl7.app/alphabet": alphabet
351
- },
352
- annotations: a(orderP, visibility, {
353
- "pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
354
- "pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
355
- "pl7.app/vdj/imputed": "false",
356
- "pl7.app/table/fontFamily": "monospace",
357
- "pl7.app/label": featureInFrameU + " " + alphabetShort
358
- })
359
- }
360
- } ]
361
- exportArgs += [ [ "-" + alphabetShortMixcr + "Feature", featureInFrameU ] ]
362
- orderP -= 100
363
-
433
+ imputedU := isImputed ? "Imputed" : ""
434
+ imputedL := text.to_lower(imputedU)
364
435
 
365
- if featureU == assemblingFeature {
366
- for annotationType in annotationTypes {
367
- columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
368
- columnsSpecPerClonotypeNoAggregates += [ {
436
+ for featureU in featuresList {
437
+ featureL := text.to_lower(formatId(featureU))
438
+ for isAminoAcid in [true, false] {
439
+ featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
440
+ if is_undefined(featureInFrameU) {
441
+ featureInFrameU = featureU
442
+ }
443
+ featureInFrameL := text.to_lower(formatId(featureInFrameU))
444
+
445
+ alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
446
+ alphabetShort := isAminoAcid ? "aa" : "nt"
447
+ alphabetShortMixcr := isAminoAcid ? "aa" : "n"
448
+ columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
449
+ visibility := featureU == "VDJRegion" || featureU == "CDR3"
450
+ if featureU == "CDR3" {
451
+ cdr3SeqColumns += [ columnName ]
452
+ }
453
+ if isAminoAcid {
454
+ aminoAcidSeqColumns += [ columnName ]
455
+ aminoAcidSeqColumnPairs += [ {
456
+ aa: columnName,
457
+ nt: "nSeq" + imputedU + featureU
458
+ } ]
459
+ }
460
+ columnsSpecPerClonotypeNoAggregates += [ {
369
461
  column: columnName,
370
- id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
462
+ id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
371
463
  naRegex: "region_not_covered",
372
464
  spec: {
373
- name: "pl7.app/vdj/sequence/annotation",
465
+ name: "pl7.app/vdj/sequence",
374
466
  valueType: "String",
375
467
  domain: {
376
468
  "pl7.app/vdj/feature": featureInFrameU,
377
- "pl7.app/alphabet": alphabet,
378
- "pl7.app/sequence/annotation/type": annotationType
469
+ "pl7.app/alphabet": alphabet
379
470
  },
380
- annotations: a(orderP, undefined, {
381
- "pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
382
- "pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
383
- "pl7.app/sequence/isAnnotation": "true"
471
+ annotations: a(orderP, visibility, {
472
+ "pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
473
+ "pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
474
+ "pl7.app/vdj/imputed": string(isImputed),
475
+ "pl7.app/table/fontFamily": "monospace",
476
+ "pl7.app/label": (isImputed ? "Imputed " : "") + featureInFrameU + " " + alphabetShort
384
477
  })
385
478
  }
386
479
  } ]
387
- exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
388
- orderP -= 100
389
- }
390
- }
391
-
480
+ exportArgs += [ [ "-" + alphabetShortMixcr + "Feature" + imputedU, featureInFrameU ] ]
481
+ orderP -= 100
482
+
392
483
 
393
- if featureU == "CDR3" {
394
- columnsSpecPerClonotypeNoAggregates += [ {
395
- column: alphabetShortMixcr + "Length" + featureU,
396
- id: alphabetShortMixcr + "-length-" + featureL,
397
- naRegex: "region_not_covered",
398
- spec: {
399
- name: "pl7.app/vdj/sequenceLength",
400
- valueType: "Int",
401
- domain: {
402
- "pl7.app/vdj/feature": featureU,
403
- "pl7.app/alphabet": alphabet
404
- },
405
- annotations: a(orderP, false, {
406
- "pl7.app/label": "Length of " + featureU + " " + alphabetShort
407
- })
484
+ if !isImputed && featureU == assemblingFeature {
485
+ for annotationType in annotationTypes {
486
+ columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
487
+ columnsSpecPerClonotypeNoAggregates += [ {
488
+ column: columnName,
489
+ id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
490
+ naRegex: "region_not_covered",
491
+ spec: {
492
+ name: "pl7.app/vdj/sequence/annotation",
493
+ valueType: "String",
494
+ domain: {
495
+ "pl7.app/vdj/feature": featureInFrameU,
496
+ "pl7.app/alphabet": alphabet,
497
+ "pl7.app/sequence/annotation/type": annotationType
498
+ },
499
+ annotations: a(orderP, undefined, {
500
+ "pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
501
+ "pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
502
+ "pl7.app/sequence/isAnnotation": "true"
503
+ })
504
+ }
505
+ } ]
506
+ exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
507
+ orderP -= 100
408
508
  }
409
- } ]
410
- exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
509
+ }
510
+
511
+
512
+ if !isImputed && featureU == "CDR3" {
513
+ columnsSpecPerClonotypeNoAggregates += [ {
514
+ column: alphabetShortMixcr + "Length" + featureU,
515
+ id: alphabetShortMixcr + "-length-" + featureL,
516
+ naRegex: "region_not_covered",
517
+ spec: {
518
+ name: "pl7.app/vdj/sequenceLength",
519
+ valueType: "Int",
520
+ domain: {
521
+ "pl7.app/vdj/feature": featureU,
522
+ "pl7.app/alphabet": alphabet
523
+ },
524
+ annotations: a(orderP, false, {
525
+ "pl7.app/label": "Length of " + featureU + " " + alphabetShort
526
+ })
527
+ }
528
+ } ]
529
+ exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
530
+ }
411
531
  }
412
532
  }
413
533
  }
@@ -510,7 +630,7 @@ inFrameFeatures := {
510
630
  }
511
631
 
512
632
 
513
- if assemblingFeature == "VDJRegion" {
633
+ if assemblingFeature == "VDJRegion" || assemblingFeature == "FR1:FR4" {
514
634
  orderP = 9500
515
635
 
516
636
 
@@ -793,11 +913,11 @@ inFrameFeatures := {
793
913
  visibility: false
794
914
  }
795
915
  ]
796
- mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
916
+ mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + outputProductiveFeature
797
917
  mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
798
918
  for variant in flagColumnVariants {
799
919
  columnsSpecPerClonotypeNoAggregates += [ {
800
- column: variant.columnPrefix + productiveFeature,
920
+ column: variant.columnPrefix + outputProductiveFeature,
801
921
  id: variant.id,
802
922
  allowNA: false,
803
923
  spec: {
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,13 +1,13 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow",
3
- "version": "1.19.6",
3
+ "version": "1.19.8",
4
4
  "description": "MiXCR Amplicon Alignment Workflow",
5
5
  "type": "module",
6
6
  "dependencies": {
7
7
  "@platforma-sdk/workflow-tengo": "5.8.2",
8
8
  "@platforma-open/milaboratories.software-mixcr": "4.7.0-309-develop",
9
9
  "@platforma-open/milaboratories.software-repseqio": "^2.5.0-13-master",
10
- "@platforma-open/milaboratories.mixcr-amplicon-alignment.software": "1.1.0"
10
+ "@platforma-open/milaboratories.mixcr-amplicon-alignment.software": "1.2.0"
11
11
  },
12
12
  "devDependencies": {
13
13
  "@platforma-sdk/tengo-builder": "2.4.17"
@@ -37,11 +37,6 @@ toCombinedDomainValue := func(spec) {
37
37
  * - V: formatted identifier for the V gene core feature
38
38
  * - J: formatted identifier for the J gene core feature (if applicable)
39
39
  */
40
- assemblingFeature := "VDJRegion"
41
- productiveFeature := "VDJRegion"
42
- coreVFeature := "{FR1Begin:FR3End}"
43
- coreJFeature := "FR4"
44
- splitByC := false
45
40
 
46
41
  // sometimes we need to format assembling feature to be used in column ids
47
42
  formatId := func(input) {
@@ -57,22 +52,130 @@ addSpec := func(columns, additionalSpec) {
57
52
  })
58
53
  }
59
54
 
55
+ parseAssemblingFeature := func(assemblingFeature) {
56
+ if assemblingFeature == "VDJRegion" || assemblingFeature == "CDR3" {
57
+ return {
58
+ imputed: [],
59
+ nonImputed: assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"],
60
+ coreGeneFeatures: {
61
+ V: "{FR1Begin:FR3End}",
62
+ J: "FR4"
63
+ }
64
+ }
65
+ }
66
+
67
+ be := text.split(assemblingFeature, ":")
68
+ if len(be) != 2 {
69
+ ll.panic("assemblingFeature must be in the format of 'begin:end', got " + assemblingFeature)
70
+ }
71
+ begin := be[0]
72
+ end := be[1]
73
+
74
+ features := ["FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"]
75
+
76
+ iBegin := -1
77
+ iEnd := -1
78
+
79
+ for i, f in features {
80
+ if f == begin {
81
+ iBegin = i
82
+ }
83
+ if f == end {
84
+ iEnd = i
85
+ }
86
+ }
87
+
88
+ if iBegin == -1 || iEnd == -1 {
89
+ ll.panic("begin or end not found in features: " + assemblingFeature)
90
+ }
91
+
92
+ if iBegin > iEnd {
93
+ ll.panic("begin is after end: " + assemblingFeature)
94
+ }
95
+
96
+ imputed := []
97
+ nonImputed := []
98
+
99
+ for i := 0; i < iBegin; i++ {
100
+ imputed = append(imputed, features[i])
101
+ }
102
+
103
+ for i := iEnd + 1; i < len(features); i++ {
104
+ imputed = append(imputed, features[i])
105
+ }
106
+
107
+ for i := iBegin; i <= iEnd; i++ {
108
+ nonImputed = append(nonImputed, features[i])
109
+ }
110
+
111
+ coreVFeature := undefined
112
+ coreJFeature := undefined
113
+
114
+ if begin != "CDR3" {
115
+ coreVFeature = "{"+begin+"Begin:FR3End}"
116
+ }
117
+
118
+ if end == "FR4" {
119
+ coreJFeature = "FR4"
120
+ }
121
+
122
+ if begin == "FR1" && end == "FR4" {
123
+ nonImputed = append(nonImputed, "VDJRegion")
124
+ } else {
125
+ imputed = append(imputed, "VDJRegion")
126
+ }
127
+
128
+ return {
129
+ imputed: imputed,
130
+ nonImputed: nonImputed,
131
+ coreGeneFeatures: {
132
+ V: coreVFeature,
133
+ J: coreJFeature
134
+ }
135
+ }
136
+ }
137
+
60
138
  calculateExportSpecs := func(presetSpecForBack, blockId) {
61
139
 
62
- assemblingFeature = presetSpecForBack.assemblingFeature
140
+ assemblingFeature := presetSpecForBack.assemblingFeature
141
+ imputeGermline := presetSpecForBack.imputeGermline
142
+ if is_undefined(imputeGermline) {
143
+ imputeGermline = false
144
+ }
63
145
 
64
146
  splitByC := false
65
147
 
66
- productiveFeature := assemblingFeature
67
- coreGeneFeatures := {
68
- V: "{FR1Begin:FR3End}",
69
- J: "FR4"
148
+ parsedFeature := parseAssemblingFeature(assemblingFeature)
149
+
150
+ imputedFeaturesMap := {}
151
+ for f in parsedFeature.imputed {
152
+ imputedFeaturesMap[f] = true
70
153
  }
71
154
 
155
+ formatAssemblingFeature := func(fstr) {
156
+ if fstr == "VDJRegion" || fstr == "CDR3" {
157
+ return fstr
158
+ }
159
+ parts := text.split(fstr, ":")
160
+ if len(parts) == 1 {
161
+ return "{" + parts[0] + "Begin:" + parts[0] + "End}"
162
+ }
163
+ return "{" + parts[0] + "Begin:" + parts[1] + "End}"
164
+ }
165
+
166
+ productiveFeature := formatAssemblingFeature(assemblingFeature)
167
+
168
+ outputProductiveFeature := productiveFeature
169
+
170
+ coreGeneFeatures := parsedFeature.coreGeneFeatures
171
+
72
172
  // column with nucleotide sequence of this feature will be marked as anchor
73
173
  anchorFeature := assemblingFeature
74
174
 
75
- features := assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"]
175
+ features := parsedFeature.nonImputed
176
+ if imputeGermline {
177
+ features = features + parsedFeature.imputed
178
+ }
76
179
 
77
180
  clonotypeKeyColumns := []
78
181
  clonotypeKeyArgs := []
@@ -84,9 +187,13 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
84
187
  [ "-jGene" ]
85
188
  ]
86
189
  } else {
87
- clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
190
+ isVdjImputed := !is_undefined(imputedFeaturesMap["VDJRegion"]) && imputeGermline
191
+ vdjColName := "nSeq" + (isVdjImputed ? "Imputed" : "") + "VDJRegion"
192
+ vdjArgLabel := "-nFeature" + (isVdjImputed ? "Imputed" : "")
193
+
194
+ clonotypeKeyColumns = [vdjColName, "bestVGene", "bestJGene"]
88
195
  clonotypeKeyArgs = [
89
- [ "-nFeature", "VDJRegion" ],
196
+ [ vdjArgLabel, "VDJRegion" ],
90
197
  [ "-vGene" ],
91
198
  [ "-jGene" ]
92
199
  ]
@@ -314,100 +421,113 @@ inFrameFeatures := {
314
421
  "CDR3": "CDR3"
315
422
  }
316
423
 
317
- for featureU in features {
318
- featureL := text.to_lower(formatId(featureU))
319
- for isAminoAcid in [true, false] {
320
- featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
321
- if is_undefined(featureInFrameU) {
322
- featureInFrameU = featureU
323
- }
324
- featureInFrameL := text.to_lower(formatId(featureInFrameU))
424
+ for isImputed in [false, true] {
425
+ featuresList := isImputed ? parsedFeature.imputed : parsedFeature.nonImputed
426
+ if len(featuresList) == 0 {
427
+ continue
428
+ }
429
+ if isImputed && !imputeGermline {
430
+ continue
431
+ }
325
432
 
326
- alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
327
- alphabetShort := isAminoAcid ? "aa" : "nt"
328
- alphabetShortMixcr := isAminoAcid ? "aa" : "n"
329
- columnName := alphabetShortMixcr + "Seq" + featureInFrameU
330
- visibility := featureU == "VDJRegion" || featureU == "CDR3"
331
- if featureU == "CDR3" {
332
- cdr3SeqColumns += [ columnName ]
333
- }
334
- if isAminoAcid {
335
- aminoAcidSeqColumns += [ columnName ]
336
- aminoAcidSeqColumnPairs += [ {
337
- aa: columnName,
338
- nt: "nSeq" + featureU
339
- } ]
340
- }
341
- columnsSpecPerClonotypeNoAggregates += [ {
342
- column: columnName,
343
- id: alphabetShortMixcr + "-seq-" + featureInFrameL,
344
- naRegex: "region_not_covered",
345
- spec: {
346
- name: "pl7.app/vdj/sequence",
347
- valueType: "String",
348
- domain: {
349
- "pl7.app/vdj/feature": featureInFrameU,
350
- "pl7.app/alphabet": alphabet
351
- },
352
- annotations: a(orderP, visibility, {
353
- "pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
354
- "pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
355
- "pl7.app/vdj/imputed": "false",
356
- "pl7.app/table/fontFamily": "monospace",
357
- "pl7.app/label": featureInFrameU + " " + alphabetShort
358
- })
359
- }
360
- } ]
361
- exportArgs += [ [ "-" + alphabetShortMixcr + "Feature", featureInFrameU ] ]
362
- orderP -= 100
363
-
364
- // Adding sequence annotation columns for assembling feature
365
- if featureU == assemblingFeature {
366
- for annotationType in annotationTypes {
367
- columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
368
- columnsSpecPerClonotypeNoAggregates += [ {
433
+ imputedU := isImputed ? "Imputed" : ""
434
+ imputedL := text.to_lower(imputedU)
435
+
436
+ for featureU in featuresList {
437
+ featureL := text.to_lower(formatId(featureU))
438
+ for isAminoAcid in [true, false] {
439
+ featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
440
+ if is_undefined(featureInFrameU) {
441
+ featureInFrameU = featureU
442
+ }
443
+ featureInFrameL := text.to_lower(formatId(featureInFrameU))
444
+
445
+ alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
446
+ alphabetShort := isAminoAcid ? "aa" : "nt"
447
+ alphabetShortMixcr := isAminoAcid ? "aa" : "n"
448
+ columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
449
+ visibility := featureU == "VDJRegion" || featureU == "CDR3"
450
+ if featureU == "CDR3" {
451
+ cdr3SeqColumns += [ columnName ]
452
+ }
453
+ if isAminoAcid {
454
+ aminoAcidSeqColumns += [ columnName ]
455
+ aminoAcidSeqColumnPairs += [ {
456
+ aa: columnName,
457
+ nt: "nSeq" + imputedU + featureU
458
+ } ]
459
+ }
460
+ columnsSpecPerClonotypeNoAggregates += [ {
369
461
  column: columnName,
370
- id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
462
+ id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
371
463
  naRegex: "region_not_covered",
372
464
  spec: {
373
- name: "pl7.app/vdj/sequence/annotation",
465
+ name: "pl7.app/vdj/sequence",
374
466
  valueType: "String",
375
467
  domain: {
376
468
  "pl7.app/vdj/feature": featureInFrameU,
377
- "pl7.app/alphabet": alphabet,
378
- "pl7.app/sequence/annotation/type": annotationType
469
+ "pl7.app/alphabet": alphabet
379
470
  },
380
- annotations: a(orderP, undefined, {
381
- "pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
382
- "pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
383
- "pl7.app/sequence/isAnnotation": "true"
471
+ annotations: a(orderP, visibility, {
472
+ "pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
473
+ "pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
474
+ "pl7.app/vdj/imputed": string(isImputed),
475
+ "pl7.app/table/fontFamily": "monospace",
476
+ "pl7.app/label": (isImputed ? "Imputed " : "") + featureInFrameU + " " + alphabetShort
384
477
  })
385
478
  }
386
479
  } ]
387
- exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
388
- orderP -= 100
389
- }
390
- }
391
-
392
- // For now calculate length only for CDR3 to keep the number of columns manageable
393
- if featureU == "CDR3" {
394
- columnsSpecPerClonotypeNoAggregates += [ {
395
- column: alphabetShortMixcr + "Length" + featureU,
396
- id: alphabetShortMixcr + "-length-" + featureL,
397
- naRegex: "region_not_covered",
398
- spec: {
399
- name: "pl7.app/vdj/sequenceLength",
400
- valueType: "Int",
401
- domain: {
402
- "pl7.app/vdj/feature": featureU,
403
- "pl7.app/alphabet": alphabet
404
- },
405
- annotations: a(orderP, false, {
406
- "pl7.app/label": "Length of " + featureU + " " + alphabetShort
407
- })
480
+ exportArgs += [ [ "-" + alphabetShortMixcr + "Feature" + imputedU, featureInFrameU ] ]
481
+ orderP -= 100
482
+
483
+ // Adding sequence annotation columns for assembling feature
484
+ if !isImputed && featureU == assemblingFeature {
485
+ for annotationType in annotationTypes {
486
+ columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
487
+ columnsSpecPerClonotypeNoAggregates += [ {
488
+ column: columnName,
489
+ id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
490
+ naRegex: "region_not_covered",
491
+ spec: {
492
+ name: "pl7.app/vdj/sequence/annotation",
493
+ valueType: "String",
494
+ domain: {
495
+ "pl7.app/vdj/feature": featureInFrameU,
496
+ "pl7.app/alphabet": alphabet,
497
+ "pl7.app/sequence/annotation/type": annotationType
498
+ },
499
+ annotations: a(orderP, undefined, {
500
+ "pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
501
+ "pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
502
+ "pl7.app/sequence/isAnnotation": "true"
503
+ })
504
+ }
505
+ } ]
506
+ exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
507
+ orderP -= 100
408
508
  }
409
- } ]
410
- exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
509
+ }
510
+
511
+ // For now calculate length only for CDR3 to keep the number of columns manageable
512
+ if !isImputed && featureU == "CDR3" {
513
+ columnsSpecPerClonotypeNoAggregates += [ {
514
+ column: alphabetShortMixcr + "Length" + featureU,
515
+ id: alphabetShortMixcr + "-length-" + featureL,
516
+ naRegex: "region_not_covered",
517
+ spec: {
518
+ name: "pl7.app/vdj/sequenceLength",
519
+ valueType: "Int",
520
+ domain: {
521
+ "pl7.app/vdj/feature": featureU,
522
+ "pl7.app/alphabet": alphabet
523
+ },
524
+ annotations: a(orderP, false, {
525
+ "pl7.app/label": "Length of " + featureU + " " + alphabetShort
526
+ })
527
+ }
528
+ } ]
529
+ exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
530
+ }
411
531
  }
412
532
  }
413
533
  }
@@ -509,8 +629,8 @@ inFrameFeatures := {
509
629
  }
510
630
  }
511
631
 
512
- // All nucleotide mutations count for each feature (only for VDJRegion assembling feature)
513
- if assemblingFeature == "VDJRegion" {
632
+ // All nucleotide mutations count for each feature (only for VDJRegion or FR1:FR4 assembling feature)
633
+ if assemblingFeature == "VDJRegion" || assemblingFeature == "FR1:FR4" {
514
634
  orderP = 9500
515
635
 
516
636
  // MixCR -allNMutationsCount exports columns for all features between specified boundaries.
@@ -793,11 +913,11 @@ inFrameFeatures := {
793
913
  visibility: false
794
914
  }
795
915
  ]
796
- mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + productiveFeature
916
+ mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + outputProductiveFeature
797
917
  mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
798
918
  for variant in flagColumnVariants {
799
919
  columnsSpecPerClonotypeNoAggregates += [ {
800
- column: variant.columnPrefix + productiveFeature,
920
+ column: variant.columnPrefix + outputProductiveFeature,
801
921
  id: variant.id,
802
922
  allowNA: false,
803
923
  spec: {
@@ -69,6 +69,7 @@ wf.body(func(args) {
69
69
  cloneClusteringMode: cloneClusteringMode,
70
70
  tagPattern: args.tagPattern,
71
71
  assemblingFeature: args.assemblingFeature,
72
+ imputeGermline: args.imputeGermline,
72
73
  badQualityThreshold: args.badQualityThreshold,
73
74
  stopCodonTypes: args.stopCodonTypes,
74
75
  stopCodonReplacements: args.stopCodonReplacements
@@ -11,6 +11,7 @@ assets := import("@platforma-sdk/workflow-tengo:assets")
11
11
  pcolumn := import("@platforma-sdk/workflow-tengo:pframes.pcolumn")
12
12
  times := import("times")
13
13
  maps := import("@platforma-sdk/workflow-tengo:maps")
14
+ text := import("text")
14
15
 
15
16
  json := import("json")
16
17
 
@@ -69,8 +70,19 @@ self.body(func(inputs) {
69
70
  mixcrCmdBuilder.arg("generic-amplicon")
70
71
  }
71
72
 
73
+ formatAssemblingFeature := func(fstr) {
74
+ if fstr == "VDJRegion" || fstr == "CDR3" {
75
+ return fstr
76
+ }
77
+ parts := text.split(fstr, ":")
78
+ if len(parts) == 1 {
79
+ return "{" + parts[0] + "Begin:" + parts[0] + "End}"
80
+ }
81
+ return "{" + parts[0] + "Begin:" + parts[1] + "End}"
82
+ }
83
+
72
84
  mixcrCmdBuilder.
73
- arg("--assemble-clonotypes-by").arg(params.assemblingFeature).
85
+ arg("--assemble-clonotypes-by").arg(formatAssemblingFeature(params.assemblingFeature)).
74
86
  arg("--species").arg("custom").
75
87
  arg("--library").arg("library.json").
76
88
  addFile("library.json", params.referenceLibrary).
@@ -90,6 +90,7 @@ self.body(func(inputs) {
90
90
  // Use calculateExportSpecs for output columns
91
91
  presetSpecForBack := {
92
92
  assemblingFeature: params.assemblingFeature,
93
+ imputeGermline: params.imputeGermline,
93
94
  splitByC: true,
94
95
  umiTags: hasUMI ? umiTags : undefined,
95
96
  cellTags: []
@@ -221,6 +222,7 @@ self.body(func(inputs) {
221
222
  hasUMI: hasUMI,
222
223
  tagPattern: tagPattern,
223
224
  assemblingFeature: params.assemblingFeature,
225
+ imputeGermline: params.imputeGermline,
224
226
  badQualityThreshold: params.badQualityThreshold
225
227
  }, { removeUndefs: true }),
226
228
  limitInput: limitInput