@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow 1.19.6 → 1.19.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +13 -0
- package/dist/tengo/lib/calculate-export-specs.lib.tengo +216 -96
- package/dist/tengo/tpl/aggregate-by-clonotype-key.plj.gz +0 -0
- package/dist/tengo/tpl/export-report.plj.gz +0 -0
- package/dist/tengo/tpl/main.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-analyze.plj.gz +0 -0
- package/dist/tengo/tpl/mixcr-export.plj.gz +0 -0
- package/dist/tengo/tpl/process.plj.gz +0 -0
- package/dist/tengo/tpl/repseqio-library.plj.gz +0 -0
- package/package.json +2 -2
- package/src/calculate-export-specs.lib.tengo +219 -99
- package/src/main.tpl.tengo +1 -0
- package/src/mixcr-analyze.tpl.tengo +13 -1
- package/src/process.tpl.tengo +2 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
WARN Issue while reading "/home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
2
|
|
|
3
|
-
> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.
|
|
3
|
+
> @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.8 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
|
|
4
4
|
> rm -rf dist && pl-tengo check && pl-tengo build
|
|
5
5
|
|
|
6
6
|
Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow
|
|
2
2
|
|
|
3
|
+
## 1.19.8
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- cd0f414: Support custom assembling feature and imputation in amplicon alignment
|
|
8
|
+
|
|
9
|
+
## 1.19.7
|
|
10
|
+
|
|
11
|
+
### Patch Changes
|
|
12
|
+
|
|
13
|
+
- Updated dependencies [0b08dfc]
|
|
14
|
+
- @platforma-open/milaboratories.mixcr-amplicon-alignment.software@1.2.0
|
|
15
|
+
|
|
3
16
|
## 1.19.6
|
|
4
17
|
|
|
5
18
|
### Patch Changes
|
|
@@ -37,11 +37,6 @@ toCombinedDomainValue := func(spec) {
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
assemblingFeature := "VDJRegion"
|
|
41
|
-
productiveFeature := "VDJRegion"
|
|
42
|
-
coreVFeature := "{FR1Begin:FR3End}"
|
|
43
|
-
coreJFeature := "FR4"
|
|
44
|
-
splitByC := false
|
|
45
40
|
|
|
46
41
|
|
|
47
42
|
formatId := func(input) {
|
|
@@ -57,22 +52,130 @@ addSpec := func(columns, additionalSpec) {
|
|
|
57
52
|
})
|
|
58
53
|
}
|
|
59
54
|
|
|
55
|
+
parseAssemblingFeature := func(assemblingFeature) {
|
|
56
|
+
if assemblingFeature == "VDJRegion" || assemblingFeature == "CDR3" {
|
|
57
|
+
return {
|
|
58
|
+
imputed: [],
|
|
59
|
+
nonImputed: assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"],
|
|
60
|
+
coreGeneFeatures: {
|
|
61
|
+
V: "{FR1Begin:FR3End}",
|
|
62
|
+
J: "FR4"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
be := text.split(assemblingFeature, ":")
|
|
68
|
+
if len(be) != 2 {
|
|
69
|
+
ll.panic("assemblingFeature must be in the format of 'begin:end', got " + assemblingFeature)
|
|
70
|
+
}
|
|
71
|
+
begin := be[0]
|
|
72
|
+
end := be[1]
|
|
73
|
+
|
|
74
|
+
features := ["FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"]
|
|
75
|
+
|
|
76
|
+
iBegin := -1
|
|
77
|
+
iEnd := -1
|
|
78
|
+
|
|
79
|
+
for i, f in features {
|
|
80
|
+
if f == begin {
|
|
81
|
+
iBegin = i
|
|
82
|
+
}
|
|
83
|
+
if f == end {
|
|
84
|
+
iEnd = i
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if iBegin == -1 || iEnd == -1 {
|
|
89
|
+
ll.panic("begin or end not found in features: " + assemblingFeature)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if iBegin > iEnd {
|
|
93
|
+
ll.panic("begin is after end: " + assemblingFeature)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
imputed := []
|
|
97
|
+
nonImputed := []
|
|
98
|
+
|
|
99
|
+
for i := 0; i < iBegin; i++ {
|
|
100
|
+
imputed = append(imputed, features[i])
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
for i := iEnd + 1; i < len(features); i++ {
|
|
104
|
+
imputed = append(imputed, features[i])
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
for i := iBegin; i <= iEnd; i++ {
|
|
108
|
+
nonImputed = append(nonImputed, features[i])
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
coreVFeature := undefined
|
|
112
|
+
coreJFeature := undefined
|
|
113
|
+
|
|
114
|
+
if begin != "CDR3" {
|
|
115
|
+
coreVFeature = "{"+begin+"Begin:FR3End}"
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if end == "FR4" {
|
|
119
|
+
coreJFeature = "FR4"
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if begin == "FR1" && end == "FR4" {
|
|
123
|
+
nonImputed = append(nonImputed, "VDJRegion")
|
|
124
|
+
} else {
|
|
125
|
+
imputed = append(imputed, "VDJRegion")
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
imputed: imputed,
|
|
130
|
+
nonImputed: nonImputed,
|
|
131
|
+
coreGeneFeatures: {
|
|
132
|
+
V: coreVFeature,
|
|
133
|
+
J: coreJFeature
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
60
138
|
calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
61
139
|
|
|
62
|
-
assemblingFeature
|
|
140
|
+
assemblingFeature := presetSpecForBack.assemblingFeature
|
|
141
|
+
imputeGermline := presetSpecForBack.imputeGermline
|
|
142
|
+
if is_undefined(imputeGermline) {
|
|
143
|
+
imputeGermline = false
|
|
144
|
+
}
|
|
63
145
|
|
|
64
146
|
splitByC := false
|
|
65
147
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
148
|
+
parsedFeature := parseAssemblingFeature(assemblingFeature)
|
|
149
|
+
|
|
150
|
+
imputedFeaturesMap := {}
|
|
151
|
+
for f in parsedFeature.imputed {
|
|
152
|
+
imputedFeaturesMap[f] = true
|
|
70
153
|
}
|
|
71
154
|
|
|
155
|
+
formatAssemblingFeature := func(fstr) {
|
|
156
|
+
if fstr == "VDJRegion" || fstr == "CDR3" {
|
|
157
|
+
return fstr
|
|
158
|
+
}
|
|
159
|
+
parts := text.split(fstr, ":")
|
|
160
|
+
if len(parts) == 1 {
|
|
161
|
+
return "{" + parts[0] + "Begin:" + parts[0] + "End}"
|
|
162
|
+
}
|
|
163
|
+
return "{" + parts[0] + "Begin:" + parts[1] + "End}"
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
productiveFeature := formatAssemblingFeature(assemblingFeature)
|
|
167
|
+
|
|
168
|
+
outputProductiveFeature := productiveFeature
|
|
169
|
+
|
|
170
|
+
coreGeneFeatures := parsedFeature.coreGeneFeatures
|
|
171
|
+
|
|
72
172
|
|
|
73
173
|
anchorFeature := assemblingFeature
|
|
74
174
|
|
|
75
|
-
features :=
|
|
175
|
+
features := parsedFeature.nonImputed
|
|
176
|
+
if imputeGermline {
|
|
177
|
+
features = features + parsedFeature.imputed
|
|
178
|
+
}
|
|
76
179
|
|
|
77
180
|
clonotypeKeyColumns := []
|
|
78
181
|
clonotypeKeyArgs := []
|
|
@@ -84,9 +187,13 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
84
187
|
[ "-jGene" ]
|
|
85
188
|
]
|
|
86
189
|
} else {
|
|
87
|
-
|
|
190
|
+
isVdjImputed := !is_undefined(imputedFeaturesMap["VDJRegion"]) && imputeGermline
|
|
191
|
+
vdjColName := "nSeq" + (isVdjImputed ? "Imputed" : "") + "VDJRegion"
|
|
192
|
+
vdjArgLabel := "-nFeature" + (isVdjImputed ? "Imputed" : "")
|
|
193
|
+
|
|
194
|
+
clonotypeKeyColumns = [vdjColName, "bestVGene", "bestJGene"]
|
|
88
195
|
clonotypeKeyArgs = [
|
|
89
|
-
[
|
|
196
|
+
[ vdjArgLabel, "VDJRegion" ],
|
|
90
197
|
[ "-vGene" ],
|
|
91
198
|
[ "-jGene" ]
|
|
92
199
|
]
|
|
@@ -314,100 +421,113 @@ inFrameFeatures := {
|
|
|
314
421
|
"CDR3": "CDR3"
|
|
315
422
|
}
|
|
316
423
|
|
|
317
|
-
for
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
424
|
+
for isImputed in [false, true] {
|
|
425
|
+
featuresList := isImputed ? parsedFeature.imputed : parsedFeature.nonImputed
|
|
426
|
+
if len(featuresList) == 0 {
|
|
427
|
+
continue
|
|
428
|
+
}
|
|
429
|
+
if isImputed && !imputeGermline {
|
|
430
|
+
continue
|
|
431
|
+
}
|
|
325
432
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
alphabetShortMixcr := isAminoAcid ? "aa" : "n"
|
|
329
|
-
columnName := alphabetShortMixcr + "Seq" + featureInFrameU
|
|
330
|
-
visibility := featureU == "VDJRegion" || featureU == "CDR3"
|
|
331
|
-
if featureU == "CDR3" {
|
|
332
|
-
cdr3SeqColumns += [ columnName ]
|
|
333
|
-
}
|
|
334
|
-
if isAminoAcid {
|
|
335
|
-
aminoAcidSeqColumns += [ columnName ]
|
|
336
|
-
aminoAcidSeqColumnPairs += [ {
|
|
337
|
-
aa: columnName,
|
|
338
|
-
nt: "nSeq" + featureU
|
|
339
|
-
} ]
|
|
340
|
-
}
|
|
341
|
-
columnsSpecPerClonotypeNoAggregates += [ {
|
|
342
|
-
column: columnName,
|
|
343
|
-
id: alphabetShortMixcr + "-seq-" + featureInFrameL,
|
|
344
|
-
naRegex: "region_not_covered",
|
|
345
|
-
spec: {
|
|
346
|
-
name: "pl7.app/vdj/sequence",
|
|
347
|
-
valueType: "String",
|
|
348
|
-
domain: {
|
|
349
|
-
"pl7.app/vdj/feature": featureInFrameU,
|
|
350
|
-
"pl7.app/alphabet": alphabet
|
|
351
|
-
},
|
|
352
|
-
annotations: a(orderP, visibility, {
|
|
353
|
-
"pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
|
|
354
|
-
"pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
|
|
355
|
-
"pl7.app/vdj/imputed": "false",
|
|
356
|
-
"pl7.app/table/fontFamily": "monospace",
|
|
357
|
-
"pl7.app/label": featureInFrameU + " " + alphabetShort
|
|
358
|
-
})
|
|
359
|
-
}
|
|
360
|
-
} ]
|
|
361
|
-
exportArgs += [ [ "-" + alphabetShortMixcr + "Feature", featureInFrameU ] ]
|
|
362
|
-
orderP -= 100
|
|
363
|
-
|
|
433
|
+
imputedU := isImputed ? "Imputed" : ""
|
|
434
|
+
imputedL := text.to_lower(imputedU)
|
|
364
435
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
436
|
+
for featureU in featuresList {
|
|
437
|
+
featureL := text.to_lower(formatId(featureU))
|
|
438
|
+
for isAminoAcid in [true, false] {
|
|
439
|
+
featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
|
|
440
|
+
if is_undefined(featureInFrameU) {
|
|
441
|
+
featureInFrameU = featureU
|
|
442
|
+
}
|
|
443
|
+
featureInFrameL := text.to_lower(formatId(featureInFrameU))
|
|
444
|
+
|
|
445
|
+
alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
|
|
446
|
+
alphabetShort := isAminoAcid ? "aa" : "nt"
|
|
447
|
+
alphabetShortMixcr := isAminoAcid ? "aa" : "n"
|
|
448
|
+
columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
|
|
449
|
+
visibility := featureU == "VDJRegion" || featureU == "CDR3"
|
|
450
|
+
if featureU == "CDR3" {
|
|
451
|
+
cdr3SeqColumns += [ columnName ]
|
|
452
|
+
}
|
|
453
|
+
if isAminoAcid {
|
|
454
|
+
aminoAcidSeqColumns += [ columnName ]
|
|
455
|
+
aminoAcidSeqColumnPairs += [ {
|
|
456
|
+
aa: columnName,
|
|
457
|
+
nt: "nSeq" + imputedU + featureU
|
|
458
|
+
} ]
|
|
459
|
+
}
|
|
460
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
369
461
|
column: columnName,
|
|
370
|
-
id: alphabetShortMixcr + "-
|
|
462
|
+
id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
|
|
371
463
|
naRegex: "region_not_covered",
|
|
372
464
|
spec: {
|
|
373
|
-
name: "pl7.app/vdj/sequence
|
|
465
|
+
name: "pl7.app/vdj/sequence",
|
|
374
466
|
valueType: "String",
|
|
375
467
|
domain: {
|
|
376
468
|
"pl7.app/vdj/feature": featureInFrameU,
|
|
377
|
-
"pl7.app/alphabet": alphabet
|
|
378
|
-
"pl7.app/sequence/annotation/type": annotationType
|
|
469
|
+
"pl7.app/alphabet": alphabet
|
|
379
470
|
},
|
|
380
|
-
annotations: a(orderP,
|
|
381
|
-
"pl7.app/
|
|
382
|
-
"pl7.app/
|
|
383
|
-
"pl7.app/
|
|
471
|
+
annotations: a(orderP, visibility, {
|
|
472
|
+
"pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
|
|
473
|
+
"pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
|
|
474
|
+
"pl7.app/vdj/imputed": string(isImputed),
|
|
475
|
+
"pl7.app/table/fontFamily": "monospace",
|
|
476
|
+
"pl7.app/label": (isImputed ? "Imputed " : "") + featureInFrameU + " " + alphabetShort
|
|
384
477
|
})
|
|
385
478
|
}
|
|
386
479
|
} ]
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
}
|
|
391
|
-
|
|
480
|
+
exportArgs += [ [ "-" + alphabetShortMixcr + "Feature" + imputedU, featureInFrameU ] ]
|
|
481
|
+
orderP -= 100
|
|
482
|
+
|
|
392
483
|
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
484
|
+
if !isImputed && featureU == assemblingFeature {
|
|
485
|
+
for annotationType in annotationTypes {
|
|
486
|
+
columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
|
|
487
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
488
|
+
column: columnName,
|
|
489
|
+
id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
|
|
490
|
+
naRegex: "region_not_covered",
|
|
491
|
+
spec: {
|
|
492
|
+
name: "pl7.app/vdj/sequence/annotation",
|
|
493
|
+
valueType: "String",
|
|
494
|
+
domain: {
|
|
495
|
+
"pl7.app/vdj/feature": featureInFrameU,
|
|
496
|
+
"pl7.app/alphabet": alphabet,
|
|
497
|
+
"pl7.app/sequence/annotation/type": annotationType
|
|
498
|
+
},
|
|
499
|
+
annotations: a(orderP, undefined, {
|
|
500
|
+
"pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
|
|
501
|
+
"pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
|
|
502
|
+
"pl7.app/sequence/isAnnotation": "true"
|
|
503
|
+
})
|
|
504
|
+
}
|
|
505
|
+
} ]
|
|
506
|
+
exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
|
|
507
|
+
orderP -= 100
|
|
408
508
|
}
|
|
409
|
-
}
|
|
410
|
-
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
if !isImputed && featureU == "CDR3" {
|
|
513
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
514
|
+
column: alphabetShortMixcr + "Length" + featureU,
|
|
515
|
+
id: alphabetShortMixcr + "-length-" + featureL,
|
|
516
|
+
naRegex: "region_not_covered",
|
|
517
|
+
spec: {
|
|
518
|
+
name: "pl7.app/vdj/sequenceLength",
|
|
519
|
+
valueType: "Int",
|
|
520
|
+
domain: {
|
|
521
|
+
"pl7.app/vdj/feature": featureU,
|
|
522
|
+
"pl7.app/alphabet": alphabet
|
|
523
|
+
},
|
|
524
|
+
annotations: a(orderP, false, {
|
|
525
|
+
"pl7.app/label": "Length of " + featureU + " " + alphabetShort
|
|
526
|
+
})
|
|
527
|
+
}
|
|
528
|
+
} ]
|
|
529
|
+
exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
|
|
530
|
+
}
|
|
411
531
|
}
|
|
412
532
|
}
|
|
413
533
|
}
|
|
@@ -510,7 +630,7 @@ inFrameFeatures := {
|
|
|
510
630
|
}
|
|
511
631
|
|
|
512
632
|
|
|
513
|
-
if assemblingFeature == "VDJRegion" {
|
|
633
|
+
if assemblingFeature == "VDJRegion" || assemblingFeature == "FR1:FR4" {
|
|
514
634
|
orderP = 9500
|
|
515
635
|
|
|
516
636
|
|
|
@@ -793,11 +913,11 @@ inFrameFeatures := {
|
|
|
793
913
|
visibility: false
|
|
794
914
|
}
|
|
795
915
|
]
|
|
796
|
-
mainIsProductiveColumn := flagColumnVariants[0].columnPrefix +
|
|
916
|
+
mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + outputProductiveFeature
|
|
797
917
|
mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
|
|
798
918
|
for variant in flagColumnVariants {
|
|
799
919
|
columnsSpecPerClonotypeNoAggregates += [ {
|
|
800
|
-
column: variant.columnPrefix +
|
|
920
|
+
column: variant.columnPrefix + outputProductiveFeature,
|
|
801
921
|
id: variant.id,
|
|
802
922
|
allowNA: false,
|
|
803
923
|
spec: {
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow",
|
|
3
|
-
"version": "1.19.
|
|
3
|
+
"version": "1.19.8",
|
|
4
4
|
"description": "MiXCR Amplicon Alignment Workflow",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"dependencies": {
|
|
7
7
|
"@platforma-sdk/workflow-tengo": "5.8.2",
|
|
8
8
|
"@platforma-open/milaboratories.software-mixcr": "4.7.0-309-develop",
|
|
9
9
|
"@platforma-open/milaboratories.software-repseqio": "^2.5.0-13-master",
|
|
10
|
-
"@platforma-open/milaboratories.mixcr-amplicon-alignment.software": "1.
|
|
10
|
+
"@platforma-open/milaboratories.mixcr-amplicon-alignment.software": "1.2.0"
|
|
11
11
|
},
|
|
12
12
|
"devDependencies": {
|
|
13
13
|
"@platforma-sdk/tengo-builder": "2.4.17"
|
|
@@ -37,11 +37,6 @@ toCombinedDomainValue := func(spec) {
|
|
|
37
37
|
* - V: formatted identifier for the V gene core feature
|
|
38
38
|
* - J: formatted identifier for the J gene core feature (if applicable)
|
|
39
39
|
*/
|
|
40
|
-
assemblingFeature := "VDJRegion"
|
|
41
|
-
productiveFeature := "VDJRegion"
|
|
42
|
-
coreVFeature := "{FR1Begin:FR3End}"
|
|
43
|
-
coreJFeature := "FR4"
|
|
44
|
-
splitByC := false
|
|
45
40
|
|
|
46
41
|
// sometimes we need to format assembling feature to be used in column ids
|
|
47
42
|
formatId := func(input) {
|
|
@@ -57,22 +52,130 @@ addSpec := func(columns, additionalSpec) {
|
|
|
57
52
|
})
|
|
58
53
|
}
|
|
59
54
|
|
|
55
|
+
parseAssemblingFeature := func(assemblingFeature) {
|
|
56
|
+
if assemblingFeature == "VDJRegion" || assemblingFeature == "CDR3" {
|
|
57
|
+
return {
|
|
58
|
+
imputed: [],
|
|
59
|
+
nonImputed: assemblingFeature == "CDR3" ? ["CDR3"] : ["CDR1", "FR1", "FR2", "CDR2", "FR3", "CDR3", "FR4", "VDJRegion"],
|
|
60
|
+
coreGeneFeatures: {
|
|
61
|
+
V: "{FR1Begin:FR3End}",
|
|
62
|
+
J: "FR4"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
be := text.split(assemblingFeature, ":")
|
|
68
|
+
if len(be) != 2 {
|
|
69
|
+
ll.panic("assemblingFeature must be in the format of 'begin:end', got " + assemblingFeature)
|
|
70
|
+
}
|
|
71
|
+
begin := be[0]
|
|
72
|
+
end := be[1]
|
|
73
|
+
|
|
74
|
+
features := ["FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"]
|
|
75
|
+
|
|
76
|
+
iBegin := -1
|
|
77
|
+
iEnd := -1
|
|
78
|
+
|
|
79
|
+
for i, f in features {
|
|
80
|
+
if f == begin {
|
|
81
|
+
iBegin = i
|
|
82
|
+
}
|
|
83
|
+
if f == end {
|
|
84
|
+
iEnd = i
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if iBegin == -1 || iEnd == -1 {
|
|
89
|
+
ll.panic("begin or end not found in features: " + assemblingFeature)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if iBegin > iEnd {
|
|
93
|
+
ll.panic("begin is after end: " + assemblingFeature)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
imputed := []
|
|
97
|
+
nonImputed := []
|
|
98
|
+
|
|
99
|
+
for i := 0; i < iBegin; i++ {
|
|
100
|
+
imputed = append(imputed, features[i])
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
for i := iEnd + 1; i < len(features); i++ {
|
|
104
|
+
imputed = append(imputed, features[i])
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
for i := iBegin; i <= iEnd; i++ {
|
|
108
|
+
nonImputed = append(nonImputed, features[i])
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
coreVFeature := undefined
|
|
112
|
+
coreJFeature := undefined
|
|
113
|
+
|
|
114
|
+
if begin != "CDR3" {
|
|
115
|
+
coreVFeature = "{"+begin+"Begin:FR3End}"
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if end == "FR4" {
|
|
119
|
+
coreJFeature = "FR4"
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if begin == "FR1" && end == "FR4" {
|
|
123
|
+
nonImputed = append(nonImputed, "VDJRegion")
|
|
124
|
+
} else {
|
|
125
|
+
imputed = append(imputed, "VDJRegion")
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
imputed: imputed,
|
|
130
|
+
nonImputed: nonImputed,
|
|
131
|
+
coreGeneFeatures: {
|
|
132
|
+
V: coreVFeature,
|
|
133
|
+
J: coreJFeature
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
60
138
|
calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
61
139
|
|
|
62
|
-
assemblingFeature
|
|
140
|
+
assemblingFeature := presetSpecForBack.assemblingFeature
|
|
141
|
+
imputeGermline := presetSpecForBack.imputeGermline
|
|
142
|
+
if is_undefined(imputeGermline) {
|
|
143
|
+
imputeGermline = false
|
|
144
|
+
}
|
|
63
145
|
|
|
64
146
|
splitByC := false
|
|
65
147
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
148
|
+
parsedFeature := parseAssemblingFeature(assemblingFeature)
|
|
149
|
+
|
|
150
|
+
imputedFeaturesMap := {}
|
|
151
|
+
for f in parsedFeature.imputed {
|
|
152
|
+
imputedFeaturesMap[f] = true
|
|
70
153
|
}
|
|
71
154
|
|
|
155
|
+
formatAssemblingFeature := func(fstr) {
|
|
156
|
+
if fstr == "VDJRegion" || fstr == "CDR3" {
|
|
157
|
+
return fstr
|
|
158
|
+
}
|
|
159
|
+
parts := text.split(fstr, ":")
|
|
160
|
+
if len(parts) == 1 {
|
|
161
|
+
return "{" + parts[0] + "Begin:" + parts[0] + "End}"
|
|
162
|
+
}
|
|
163
|
+
return "{" + parts[0] + "Begin:" + parts[1] + "End}"
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
productiveFeature := formatAssemblingFeature(assemblingFeature)
|
|
167
|
+
|
|
168
|
+
outputProductiveFeature := productiveFeature
|
|
169
|
+
|
|
170
|
+
coreGeneFeatures := parsedFeature.coreGeneFeatures
|
|
171
|
+
|
|
72
172
|
// column with nucleotide sequence of this feature will be marked as anchor
|
|
73
173
|
anchorFeature := assemblingFeature
|
|
74
174
|
|
|
75
|
-
features :=
|
|
175
|
+
features := parsedFeature.nonImputed
|
|
176
|
+
if imputeGermline {
|
|
177
|
+
features = features + parsedFeature.imputed
|
|
178
|
+
}
|
|
76
179
|
|
|
77
180
|
clonotypeKeyColumns := []
|
|
78
181
|
clonotypeKeyArgs := []
|
|
@@ -84,9 +187,13 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
|
|
|
84
187
|
[ "-jGene" ]
|
|
85
188
|
]
|
|
86
189
|
} else {
|
|
87
|
-
|
|
190
|
+
isVdjImputed := !is_undefined(imputedFeaturesMap["VDJRegion"]) && imputeGermline
|
|
191
|
+
vdjColName := "nSeq" + (isVdjImputed ? "Imputed" : "") + "VDJRegion"
|
|
192
|
+
vdjArgLabel := "-nFeature" + (isVdjImputed ? "Imputed" : "")
|
|
193
|
+
|
|
194
|
+
clonotypeKeyColumns = [vdjColName, "bestVGene", "bestJGene"]
|
|
88
195
|
clonotypeKeyArgs = [
|
|
89
|
-
[
|
|
196
|
+
[ vdjArgLabel, "VDJRegion" ],
|
|
90
197
|
[ "-vGene" ],
|
|
91
198
|
[ "-jGene" ]
|
|
92
199
|
]
|
|
@@ -314,100 +421,113 @@ inFrameFeatures := {
|
|
|
314
421
|
"CDR3": "CDR3"
|
|
315
422
|
}
|
|
316
423
|
|
|
317
|
-
for
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
424
|
+
for isImputed in [false, true] {
|
|
425
|
+
featuresList := isImputed ? parsedFeature.imputed : parsedFeature.nonImputed
|
|
426
|
+
if len(featuresList) == 0 {
|
|
427
|
+
continue
|
|
428
|
+
}
|
|
429
|
+
if isImputed && !imputeGermline {
|
|
430
|
+
continue
|
|
431
|
+
}
|
|
325
432
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
"
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
"pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
|
|
355
|
-
"pl7.app/vdj/imputed": "false",
|
|
356
|
-
"pl7.app/table/fontFamily": "monospace",
|
|
357
|
-
"pl7.app/label": featureInFrameU + " " + alphabetShort
|
|
358
|
-
})
|
|
359
|
-
}
|
|
360
|
-
} ]
|
|
361
|
-
exportArgs += [ [ "-" + alphabetShortMixcr + "Feature", featureInFrameU ] ]
|
|
362
|
-
orderP -= 100
|
|
363
|
-
|
|
364
|
-
// Adding sequence annotation columns for assembling feature
|
|
365
|
-
if featureU == assemblingFeature {
|
|
366
|
-
for annotationType in annotationTypes {
|
|
367
|
-
columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
|
|
368
|
-
columnsSpecPerClonotypeNoAggregates += [ {
|
|
433
|
+
imputedU := isImputed ? "Imputed" : ""
|
|
434
|
+
imputedL := text.to_lower(imputedU)
|
|
435
|
+
|
|
436
|
+
for featureU in featuresList {
|
|
437
|
+
featureL := text.to_lower(formatId(featureU))
|
|
438
|
+
for isAminoAcid in [true, false] {
|
|
439
|
+
featureInFrameU := isAminoAcid ? inFrameFeatures[featureU] : featureU
|
|
440
|
+
if is_undefined(featureInFrameU) {
|
|
441
|
+
featureInFrameU = featureU
|
|
442
|
+
}
|
|
443
|
+
featureInFrameL := text.to_lower(formatId(featureInFrameU))
|
|
444
|
+
|
|
445
|
+
alphabet := isAminoAcid ? "aminoacid" : "nucleotide"
|
|
446
|
+
alphabetShort := isAminoAcid ? "aa" : "nt"
|
|
447
|
+
alphabetShortMixcr := isAminoAcid ? "aa" : "n"
|
|
448
|
+
columnName := alphabetShortMixcr + "Seq" + imputedU + featureInFrameU
|
|
449
|
+
visibility := featureU == "VDJRegion" || featureU == "CDR3"
|
|
450
|
+
if featureU == "CDR3" {
|
|
451
|
+
cdr3SeqColumns += [ columnName ]
|
|
452
|
+
}
|
|
453
|
+
if isAminoAcid {
|
|
454
|
+
aminoAcidSeqColumns += [ columnName ]
|
|
455
|
+
aminoAcidSeqColumnPairs += [ {
|
|
456
|
+
aa: columnName,
|
|
457
|
+
nt: "nSeq" + imputedU + featureU
|
|
458
|
+
} ]
|
|
459
|
+
}
|
|
460
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
369
461
|
column: columnName,
|
|
370
|
-
id: alphabetShortMixcr + "-
|
|
462
|
+
id: alphabetShortMixcr + "-seq-" + featureInFrameL + (isImputed ? "-imputed" : ""),
|
|
371
463
|
naRegex: "region_not_covered",
|
|
372
464
|
spec: {
|
|
373
|
-
name: "pl7.app/vdj/sequence
|
|
465
|
+
name: "pl7.app/vdj/sequence",
|
|
374
466
|
valueType: "String",
|
|
375
467
|
domain: {
|
|
376
468
|
"pl7.app/vdj/feature": featureInFrameU,
|
|
377
|
-
"pl7.app/alphabet": alphabet
|
|
378
|
-
"pl7.app/sequence/annotation/type": annotationType
|
|
469
|
+
"pl7.app/alphabet": alphabet
|
|
379
470
|
},
|
|
380
|
-
annotations: a(orderP,
|
|
381
|
-
"pl7.app/
|
|
382
|
-
"pl7.app/
|
|
383
|
-
"pl7.app/
|
|
471
|
+
annotations: a(orderP, visibility, {
|
|
472
|
+
"pl7.app/vdj/isAssemblingFeature": featureU == anchorFeature ? "true" : "false",
|
|
473
|
+
"pl7.app/vdj/isMainSequence": featureU == anchorFeature ? "true" : "false",
|
|
474
|
+
"pl7.app/vdj/imputed": string(isImputed),
|
|
475
|
+
"pl7.app/table/fontFamily": "monospace",
|
|
476
|
+
"pl7.app/label": (isImputed ? "Imputed " : "") + featureInFrameU + " " + alphabetShort
|
|
384
477
|
})
|
|
385
478
|
}
|
|
386
479
|
} ]
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
480
|
+
exportArgs += [ [ "-" + alphabetShortMixcr + "Feature" + imputedU, featureInFrameU ] ]
|
|
481
|
+
orderP -= 100
|
|
482
|
+
|
|
483
|
+
// Adding sequence annotation columns for assembling feature
|
|
484
|
+
if !isImputed && featureU == assemblingFeature {
|
|
485
|
+
for annotationType in annotationTypes {
|
|
486
|
+
columnName := alphabetShortMixcr + "AnnotationOf" + annotationType + "For" + featureInFrameU
|
|
487
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
488
|
+
column: columnName,
|
|
489
|
+
id: alphabetShortMixcr + "-annotation-" + annotationType + "-" + featureInFrameL,
|
|
490
|
+
naRegex: "region_not_covered",
|
|
491
|
+
spec: {
|
|
492
|
+
name: "pl7.app/vdj/sequence/annotation",
|
|
493
|
+
valueType: "String",
|
|
494
|
+
domain: {
|
|
495
|
+
"pl7.app/vdj/feature": featureInFrameU,
|
|
496
|
+
"pl7.app/alphabet": alphabet,
|
|
497
|
+
"pl7.app/sequence/annotation/type": annotationType
|
|
498
|
+
},
|
|
499
|
+
annotations: a(orderP, undefined, {
|
|
500
|
+
"pl7.app/label": annotationType + " annotation for " + featureInFrameU + " " + alphabetShort,
|
|
501
|
+
"pl7.app/sequence/annotation/mapping": annotationMappings[annotationType],
|
|
502
|
+
"pl7.app/sequence/isAnnotation": "true"
|
|
503
|
+
})
|
|
504
|
+
}
|
|
505
|
+
} ]
|
|
506
|
+
exportArgs += [ [ "-" + alphabetShortMixcr + "AnnotationString", annotationType, featureInFrameU ] ]
|
|
507
|
+
orderP -= 100
|
|
408
508
|
}
|
|
409
|
-
}
|
|
410
|
-
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
// For now calculate length only for CDR3 to keep the number of columns manageable
|
|
512
|
+
if !isImputed && featureU == "CDR3" {
|
|
513
|
+
columnsSpecPerClonotypeNoAggregates += [ {
|
|
514
|
+
column: alphabetShortMixcr + "Length" + featureU,
|
|
515
|
+
id: alphabetShortMixcr + "-length-" + featureL,
|
|
516
|
+
naRegex: "region_not_covered",
|
|
517
|
+
spec: {
|
|
518
|
+
name: "pl7.app/vdj/sequenceLength",
|
|
519
|
+
valueType: "Int",
|
|
520
|
+
domain: {
|
|
521
|
+
"pl7.app/vdj/feature": featureU,
|
|
522
|
+
"pl7.app/alphabet": alphabet
|
|
523
|
+
},
|
|
524
|
+
annotations: a(orderP, false, {
|
|
525
|
+
"pl7.app/label": "Length of " + featureU + " " + alphabetShort
|
|
526
|
+
})
|
|
527
|
+
}
|
|
528
|
+
} ]
|
|
529
|
+
exportArgs += [ [ "-" + alphabetShortMixcr + "Length", featureU ] ]
|
|
530
|
+
}
|
|
411
531
|
}
|
|
412
532
|
}
|
|
413
533
|
}
|
|
@@ -509,8 +629,8 @@ inFrameFeatures := {
|
|
|
509
629
|
}
|
|
510
630
|
}
|
|
511
631
|
|
|
512
|
-
// All nucleotide mutations count for each feature (only for VDJRegion assembling feature)
|
|
513
|
-
if assemblingFeature == "VDJRegion" {
|
|
632
|
+
// All nucleotide mutations count for each feature (only for VDJRegion or FR1:FR4 assembling feature)
|
|
633
|
+
if assemblingFeature == "VDJRegion" || assemblingFeature == "FR1:FR4" {
|
|
514
634
|
orderP = 9500
|
|
515
635
|
|
|
516
636
|
// MixCR -allNMutationsCount exports columns for all features between specified boundaries.
|
|
@@ -793,11 +913,11 @@ inFrameFeatures := {
|
|
|
793
913
|
visibility: false
|
|
794
914
|
}
|
|
795
915
|
]
|
|
796
|
-
mainIsProductiveColumn := flagColumnVariants[0].columnPrefix +
|
|
916
|
+
mainIsProductiveColumn := flagColumnVariants[0].columnPrefix + outputProductiveFeature
|
|
797
917
|
mainIsProductiveArgs := [ [ flagColumnVariants[0].arg, productiveFeature ] ]
|
|
798
918
|
for variant in flagColumnVariants {
|
|
799
919
|
columnsSpecPerClonotypeNoAggregates += [ {
|
|
800
|
-
column: variant.columnPrefix +
|
|
920
|
+
column: variant.columnPrefix + outputProductiveFeature,
|
|
801
921
|
id: variant.id,
|
|
802
922
|
allowNA: false,
|
|
803
923
|
spec: {
|
package/src/main.tpl.tengo
CHANGED
|
@@ -69,6 +69,7 @@ wf.body(func(args) {
|
|
|
69
69
|
cloneClusteringMode: cloneClusteringMode,
|
|
70
70
|
tagPattern: args.tagPattern,
|
|
71
71
|
assemblingFeature: args.assemblingFeature,
|
|
72
|
+
imputeGermline: args.imputeGermline,
|
|
72
73
|
badQualityThreshold: args.badQualityThreshold,
|
|
73
74
|
stopCodonTypes: args.stopCodonTypes,
|
|
74
75
|
stopCodonReplacements: args.stopCodonReplacements
|
|
@@ -11,6 +11,7 @@ assets := import("@platforma-sdk/workflow-tengo:assets")
|
|
|
11
11
|
pcolumn := import("@platforma-sdk/workflow-tengo:pframes.pcolumn")
|
|
12
12
|
times := import("times")
|
|
13
13
|
maps := import("@platforma-sdk/workflow-tengo:maps")
|
|
14
|
+
text := import("text")
|
|
14
15
|
|
|
15
16
|
json := import("json")
|
|
16
17
|
|
|
@@ -69,8 +70,19 @@ self.body(func(inputs) {
|
|
|
69
70
|
mixcrCmdBuilder.arg("generic-amplicon")
|
|
70
71
|
}
|
|
71
72
|
|
|
73
|
+
formatAssemblingFeature := func(fstr) {
|
|
74
|
+
if fstr == "VDJRegion" || fstr == "CDR3" {
|
|
75
|
+
return fstr
|
|
76
|
+
}
|
|
77
|
+
parts := text.split(fstr, ":")
|
|
78
|
+
if len(parts) == 1 {
|
|
79
|
+
return "{" + parts[0] + "Begin:" + parts[0] + "End}"
|
|
80
|
+
}
|
|
81
|
+
return "{" + parts[0] + "Begin:" + parts[1] + "End}"
|
|
82
|
+
}
|
|
83
|
+
|
|
72
84
|
mixcrCmdBuilder.
|
|
73
|
-
arg("--assemble-clonotypes-by").arg(params.assemblingFeature).
|
|
85
|
+
arg("--assemble-clonotypes-by").arg(formatAssemblingFeature(params.assemblingFeature)).
|
|
74
86
|
arg("--species").arg("custom").
|
|
75
87
|
arg("--library").arg("library.json").
|
|
76
88
|
addFile("library.json", params.referenceLibrary).
|
package/src/process.tpl.tengo
CHANGED
|
@@ -90,6 +90,7 @@ self.body(func(inputs) {
|
|
|
90
90
|
// Use calculateExportSpecs for output columns
|
|
91
91
|
presetSpecForBack := {
|
|
92
92
|
assemblingFeature: params.assemblingFeature,
|
|
93
|
+
imputeGermline: params.imputeGermline,
|
|
93
94
|
splitByC: true,
|
|
94
95
|
umiTags: hasUMI ? umiTags : undefined,
|
|
95
96
|
cellTags: []
|
|
@@ -221,6 +222,7 @@ self.body(func(inputs) {
|
|
|
221
222
|
hasUMI: hasUMI,
|
|
222
223
|
tagPattern: tagPattern,
|
|
223
224
|
assemblingFeature: params.assemblingFeature,
|
|
225
|
+
imputeGermline: params.imputeGermline,
|
|
224
226
|
badQualityThreshold: params.badQualityThreshold
|
|
225
227
|
}, { removeUndefs: true }),
|
|
226
228
|
limitInput: limitInput
|