@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow 1.19.8 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
   WARN  Issue while reading "/home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
2
 
3
- > @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.19.8 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
3
+ > @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow@1.20.0 build /home/runner/work/mixcr-amplicon-alignment/mixcr-amplicon-alignment/workflow
4
4
  > rm -rf dist && pl-tengo check && pl-tengo build
5
5
 
6
6
  Processing "src/aggregate-by-clonotype-key.tpl.tengo"...
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # @platforma-open/milaboratories.mixcr-amplicon-alignment.workflow
2
2
 
3
+ ## 1.20.0
4
+
5
+ ### Minor Changes
6
+
7
+ - e2b65c7: Support custom reference library file
8
+
9
+ ## 1.19.9
10
+
11
+ ### Patch Changes
12
+
13
+ - 2149d28: Fix column naming for range assembling features (e.g. CDR1:CDR3, FR2:FR4) without imputation.
14
+
15
+ When using a range assembling feature without "Impute non-covered part", the workflow would fail with
16
+ "column nSeqVDJRegion does not exist in export" because VDJRegion is never exported for non-full-range features.
17
+
18
+ Changes:
19
+
20
+ - Use the assembling feature itself as clonotype key column when VDJRegion is unavailable
21
+ - Fix column naming to match MiXCR output format (e.g. `CDR1_TO_FR4` instead of `{CDR1Begin:FR4End}`)
22
+ - Add unit tests covering column naming for all assembling feature variants with/without imputation
23
+
3
24
  ## 1.19.8
4
25
 
5
26
  ### Patch Changes
@@ -164,8 +164,20 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
164
164
  }
165
165
 
166
166
  productiveFeature := formatAssemblingFeature(assemblingFeature)
167
-
167
+
168
+
169
+
170
+
171
+
172
+
168
173
  outputProductiveFeature := productiveFeature
174
+ if assemblingFeature != "VDJRegion" && assemblingFeature != "CDR3" {
175
+ parts := text.split(assemblingFeature, ":")
176
+ if len(parts) == 2 && parts[1] == "FR4" {
177
+
178
+ outputProductiveFeature = parts[0] + "_TO_FR4"
179
+ }
180
+ }
169
181
 
170
182
  coreGeneFeatures := parsedFeature.coreGeneFeatures
171
183
 
@@ -187,22 +199,67 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
187
199
  [ "-jGene" ]
188
200
  ]
189
201
  } else {
190
- isVdjImputed := !is_undefined(imputedFeaturesMap["VDJRegion"]) && imputeGermline
191
- vdjColName := "nSeq" + (isVdjImputed ? "Imputed" : "") + "VDJRegion"
192
- vdjArgLabel := "-nFeature" + (isVdjImputed ? "Imputed" : "")
193
202
 
194
- clonotypeKeyColumns = [vdjColName, "bestVGene", "bestJGene"]
195
- clonotypeKeyArgs = [
196
- [ vdjArgLabel, "VDJRegion" ],
197
- [ "-vGene" ],
198
- [ "-jGene" ]
199
- ]
203
+
204
+ vdjIsAssemblingFeature := is_undefined(imputedFeaturesMap["VDJRegion"])
205
+
206
+ if vdjIsAssemblingFeature {
207
+
208
+ clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
209
+ clonotypeKeyArgs = [
210
+ [ "-nFeature", "VDJRegion" ],
211
+ [ "-vGene" ],
212
+ [ "-jGene" ]
213
+ ]
214
+ } else {
215
+
216
+
217
+
218
+
219
+
220
+ keyColName := "nSeq" + outputProductiveFeature
221
+ clonotypeKeyColumns = [keyColName, "bestVGene", "bestJGene"]
222
+ clonotypeKeyArgs = [
223
+ [ "-nFeature", productiveFeature ],
224
+ [ "-vGene" ],
225
+ [ "-jGene" ]
226
+ ]
227
+ }
200
228
  }
201
229
 
202
230
  columnsSpecPerSample := []
203
231
  columnsSpecPerClonotypeNoAggregates := []
204
232
  mutationColumns := []
205
233
 
234
+
235
+
236
+
237
+ needsAssemblingFeatureExport := assemblingFeature != "CDR3" && assemblingFeature != "VDJRegion" && !is_undefined(imputedFeaturesMap["VDJRegion"])
238
+ if needsAssemblingFeatureExport {
239
+ featureIdL := text.to_lower(formatId(assemblingFeature))
240
+ keyColName := "nSeq" + outputProductiveFeature
241
+ columnsSpecPerClonotypeNoAggregates += [ {
242
+ column: keyColName,
243
+ id: "n-seq-" + featureIdL,
244
+ naRegex: "region_not_covered",
245
+ spec: {
246
+ name: "pl7.app/vdj/sequence",
247
+ valueType: "String",
248
+ domain: {
249
+ "pl7.app/vdj/feature": outputProductiveFeature,
250
+ "pl7.app/alphabet": "nucleotide"
251
+ },
252
+ annotations: a(80100, false, {
253
+ "pl7.app/vdj/isAssemblingFeature": "true",
254
+ "pl7.app/vdj/isMainSequence": "false",
255
+ "pl7.app/vdj/imputed": "false",
256
+ "pl7.app/table/fontFamily": "monospace",
257
+ "pl7.app/label": outputProductiveFeature + " nt"
258
+ })
259
+ }
260
+ } ]
261
+ }
262
+
206
263
  clonotypeLabelColumn := {
207
264
  column: "clonotypeLabel",
208
265
  id: "clonotype-label",
@@ -220,6 +277,11 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
220
277
  exportArgs := []
221
278
 
222
279
 
280
+ if needsAssemblingFeatureExport {
281
+ exportArgs += [ [ "-nFeature", productiveFeature ] ]
282
+ }
283
+
284
+
223
285
 
224
286
  hasUmi := !is_undefined(presetSpecForBack) && !is_undefined(presetSpecForBack.umiTags) && len(presetSpecForBack.umiTags) > 0
225
287
 
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@platforma-open/milaboratories.mixcr-amplicon-alignment.workflow",
3
- "version": "1.19.8",
3
+ "version": "1.20.0",
4
4
  "description": "MiXCR Amplicon Alignment Workflow",
5
5
  "type": "module",
6
6
  "dependencies": {
@@ -164,8 +164,20 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
164
164
  }
165
165
 
166
166
  productiveFeature := formatAssemblingFeature(assemblingFeature)
167
-
167
+
168
+ // MiXCR column naming for range features:
169
+ // - Ranges ending at FR4 have named aliases: CDR1_TO_FR4, FR2_TO_FR4, CDR2_TO_FR4, FR3_TO_FR4
170
+ // (defined in repseqio GeneFeature.java)
171
+ // - All other ranges use {XBegin:YEnd} format (e.g. {CDR1Begin:CDR3End})
172
+ // - Simple features (CDR3, VDJRegion) use their name directly
168
173
  outputProductiveFeature := productiveFeature
174
+ if assemblingFeature != "VDJRegion" && assemblingFeature != "CDR3" {
175
+ parts := text.split(assemblingFeature, ":")
176
+ if len(parts) == 2 && parts[1] == "FR4" {
177
+ // MiXCR has a named alias for this range
178
+ outputProductiveFeature = parts[0] + "_TO_FR4"
179
+ }
180
+ }
169
181
 
170
182
  coreGeneFeatures := parsedFeature.coreGeneFeatures
171
183
 
@@ -187,22 +199,67 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
187
199
  [ "-jGene" ]
188
200
  ]
189
201
  } else {
190
- isVdjImputed := !is_undefined(imputedFeaturesMap["VDJRegion"]) && imputeGermline
191
- vdjColName := "nSeq" + (isVdjImputed ? "Imputed" : "") + "VDJRegion"
192
- vdjArgLabel := "-nFeature" + (isVdjImputed ? "Imputed" : "")
193
-
194
- clonotypeKeyColumns = [vdjColName, "bestVGene", "bestJGene"]
195
- clonotypeKeyArgs = [
196
- [ vdjArgLabel, "VDJRegion" ],
197
- [ "-vGene" ],
198
- [ "-jGene" ]
199
- ]
202
+ // VDJRegion is the assembling feature itself only when it's NOT in the imputed list
203
+ // (e.g. VDJRegion or FR1:FR4 as the assembling feature)
204
+ vdjIsAssemblingFeature := is_undefined(imputedFeaturesMap["VDJRegion"])
205
+
206
+ if vdjIsAssemblingFeature {
207
+ // VDJRegion IS the assembling feature, use it directly as the key
208
+ clonotypeKeyColumns = ["nSeqVDJRegion", "bestVGene", "bestJGene"]
209
+ clonotypeKeyArgs = [
210
+ [ "-nFeature", "VDJRegion" ],
211
+ [ "-vGene" ],
212
+ [ "-jGene" ]
213
+ ]
214
+ } else {
215
+ // Range feature where VDJRegion is NOT the assembling feature (e.g. CDR1:CDR3, FR2:FR4)
216
+ // Always use the assembling feature itself as the key, even with imputation enabled.
217
+ // Imputed VDJRegion is NOT guaranteed unique per clone (two clones with different
218
+ // assembling feature sequences can produce the same imputed VDJRegion).
219
+ // The assembling feature sequence IS unique by definition (it defines the clone).
220
+ keyColName := "nSeq" + outputProductiveFeature
221
+ clonotypeKeyColumns = [keyColName, "bestVGene", "bestJGene"]
222
+ clonotypeKeyArgs = [
223
+ [ "-nFeature", productiveFeature ],
224
+ [ "-vGene" ],
225
+ [ "-jGene" ]
226
+ ]
227
+ }
200
228
  }
201
229
 
202
230
  columnsSpecPerSample := []
203
231
  columnsSpecPerClonotypeNoAggregates := []
204
232
  mutationColumns := []
205
233
 
234
+ // For range features where VDJRegion is not the assembling feature, we need to export
235
+ // the combined assembling feature sequence column explicitly (individual features are
236
+ // exported in the loop below, but the combined feature like {CDR1Begin:CDR3End} is not)
237
+ needsAssemblingFeatureExport := assemblingFeature != "CDR3" && assemblingFeature != "VDJRegion" && !is_undefined(imputedFeaturesMap["VDJRegion"])
238
+ if needsAssemblingFeatureExport {
239
+ featureIdL := text.to_lower(formatId(assemblingFeature))
240
+ keyColName := "nSeq" + outputProductiveFeature
241
+ columnsSpecPerClonotypeNoAggregates += [ {
242
+ column: keyColName,
243
+ id: "n-seq-" + featureIdL,
244
+ naRegex: "region_not_covered",
245
+ spec: {
246
+ name: "pl7.app/vdj/sequence",
247
+ valueType: "String",
248
+ domain: {
249
+ "pl7.app/vdj/feature": outputProductiveFeature,
250
+ "pl7.app/alphabet": "nucleotide"
251
+ },
252
+ annotations: a(80100, false, {
253
+ "pl7.app/vdj/isAssemblingFeature": "true",
254
+ "pl7.app/vdj/isMainSequence": "false",
255
+ "pl7.app/vdj/imputed": "false",
256
+ "pl7.app/table/fontFamily": "monospace",
257
+ "pl7.app/label": outputProductiveFeature + " nt"
258
+ })
259
+ }
260
+ } ]
261
+ }
262
+
206
263
  clonotypeLabelColumn := {
207
264
  column: "clonotypeLabel",
208
265
  id: "clonotype-label",
@@ -219,6 +276,11 @@ calculateExportSpecs := func(presetSpecForBack, blockId) {
219
276
  // array of array of arg groups
220
277
  exportArgs := []
221
278
 
279
+ // Add the assembling feature export arg if needed (column spec was added above)
280
+ if needsAssemblingFeatureExport {
281
+ exportArgs += [ [ "-nFeature", productiveFeature ] ]
282
+ }
283
+
222
284
  // Abundance - reads by default; switch to UMI columns if umiTags are present
223
285
 
224
286
  hasUmi := !is_undefined(presetSpecForBack) && !is_undefined(presetSpecForBack.umiTags) && len(presetSpecForBack.umiTags) > 0
@@ -41,16 +41,25 @@ wf.body(func(args) {
41
41
  "TCRDelta": { mixcrFilter: "TRG", name: "TCR Delta", shortName: "Delta" }
42
42
  }
43
43
 
44
- // Generate reference library using repseqio if both V and J genes are available
45
-
46
- repseqioResults := render.create(repseqioLibraryTpl, {
47
- vGenes: args.vGenes,
48
- jGenes: args.jGenes,
49
- chains: chainInfos[chains].mixcrFilter
50
- })
51
-
52
- referenceLibrary := repseqioResults.output("referenceLibrary")
53
- debugOutput := repseqioResults.output("debugOutput")
44
+ // Generate reference library using repseqio, or use directly imported library file
45
+ isLibraryFileGzipped := false
46
+ referenceLibrary := false
47
+ debugOutput := undefined
48
+ libraryImportHandle := undefined
49
+
50
+ if !is_undefined(args.libraryFile) {
51
+ fImport := file.importFile(args.libraryFile)
52
+ libraryImportHandle = fImport.handle
53
+ referenceLibrary = fImport.file
54
+ } else {
55
+ repseqioResults := render.create(repseqioLibraryTpl, {
56
+ vGenes: args.vGenes,
57
+ jGenes: args.jGenes,
58
+ chains: chainInfos[chains].mixcrFilter
59
+ })
60
+ referenceLibrary = repseqioResults.output("referenceLibrary")
61
+ debugOutput = repseqioResults.output("debugOutput")
62
+ }
54
63
 
55
64
 
56
65
  runMixcr := render.createEphemeral(processTpl, {
@@ -71,6 +80,7 @@ wf.body(func(args) {
71
80
  assemblingFeature: args.assemblingFeature,
72
81
  imputeGermline: args.imputeGermline,
73
82
  badQualityThreshold: args.badQualityThreshold,
83
+ isLibraryFileGzipped: isLibraryFileGzipped,
74
84
  stopCodonTypes: args.stopCodonTypes,
75
85
  stopCodonReplacements: args.stopCodonReplacements
76
86
  }, { removeUndefs: true }))
@@ -92,6 +102,23 @@ wf.body(func(args) {
92
102
  }
93
103
  }
94
104
 
105
+ if !is_undefined(args.libraryFile) {
106
+ exports.library = {
107
+ data: referenceLibrary,
108
+ spec: {
109
+ kind: "File",
110
+ name: "pl7.app/vdj/library",
111
+ domain: {
112
+ "pl7.app/vdj/libraryId": blockId
113
+ },
114
+ annotations: {
115
+ "pl7.app/vdj/isLibrary": "true",
116
+ "pl7.app/vdj/libraryFormat": (isLibraryFileGzipped ? "repseqio.json.gz" : "repseqio.json")
117
+ }
118
+ }
119
+ }
120
+ }
121
+
95
122
  outputs := {
96
123
  qc: pframes.exportColumnData(runMixcr.output("qc.data")),
97
124
  reports: pframes.exportColumnData(runMixcr.output("reports.data")),
@@ -99,9 +126,15 @@ wf.body(func(args) {
99
126
  clones: pframes.exportFrame(runMixcr.output("clones")),
100
127
  clns: runMixcr.output("clns.data"),
101
128
  progress: runMixcr.output("progress.data"),
102
- clonotypeTables: pframes.exportFrame(runMixcr.output("clonotypeTables")),
103
- debugOutput: debugOutput
129
+ clonotypeTables: pframes.exportFrame(runMixcr.output("clonotypeTables"))
130
+ }
131
+ outputs.referenceLibrary = file.exportFile(referenceLibrary)
132
+ if !is_undefined(debugOutput) {
133
+ outputs.debugOutput = debugOutput
104
134
  }
135
+ if !is_undefined(libraryImportHandle) {
136
+ outputs.libraryImportHandle = libraryImportHandle
137
+ }
105
138
 
106
139
  qcReportTable := runMixcr.output("qcReportTable")
107
140
  outputs.qcReportTable = pframes.exportFrame(qcReportTable)
@@ -81,11 +81,16 @@ self.body(func(inputs) {
81
81
  return "{" + parts[0] + "Begin:" + parts[1] + "End}"
82
82
  }
83
83
 
84
+ libraryFileName := "library.json"
85
+ if !is_undefined(params.isLibraryFileGzipped) && params.isLibraryFileGzipped {
86
+ libraryFileName = "library.json.gz"
87
+ }
88
+
84
89
  mixcrCmdBuilder.
85
90
  arg("--assemble-clonotypes-by").arg(formatAssemblingFeature(params.assemblingFeature)).
86
91
  arg("--species").arg("custom").
87
- arg("--library").arg("library.json").
88
- addFile("library.json", params.referenceLibrary).
92
+ arg("--library").arg(libraryFileName).
93
+ addFile(libraryFileName, params.referenceLibrary).
89
94
  arg("--rna").
90
95
  arg("--force-overwrite").
91
96
  arg("--rigid-left-alignment-boundary").
@@ -216,12 +216,17 @@ self.body(func(inputs) {
216
216
 
217
217
  additionalAction(mixcrCmdBuilder)
218
218
 
219
+ libraryFileName := "library.json"
220
+ if !is_undefined(params.isLibraryFileGzipped) && params.isLibraryFileGzipped {
221
+ libraryFileName = "library.json.gz"
222
+ }
223
+
219
224
  return mixcrCmdBuilder.
220
225
  arg("clones.clns").
221
226
  addFile("clones.clns", clnsFile).
222
227
  arg("clones.tsv").
223
228
  saveFile("clones.tsv").
224
- addFile("library.json", params.referenceLibrary).
229
+ addFile(libraryFileName, params.referenceLibrary).
225
230
  cacheHours(3).
226
231
  run()
227
232
  }
@@ -22,8 +22,6 @@ calculateExportSpecs := import(":calculate-export-specs")
22
22
  self.awaitState("InputsLocked")
23
23
  self.awaitState("params", "ResourceReady")
24
24
  self.awaitState("inputSpec", "ResourceReady")
25
- self.awaitState("referenceLibrary", "ResourceReady")
26
- self.awaitState("cdr3Sequences", "ResourceReady")
27
25
 
28
26
  self.body(func(inputs) {
29
27
 
@@ -223,7 +221,8 @@ self.body(func(inputs) {
223
221
  tagPattern: tagPattern,
224
222
  assemblingFeature: params.assemblingFeature,
225
223
  imputeGermline: params.imputeGermline,
226
- badQualityThreshold: params.badQualityThreshold
224
+ badQualityThreshold: params.badQualityThreshold,
225
+ isLibraryFileGzipped: params.isLibraryFileGzipped
227
226
  }, { removeUndefs: true }),
228
227
  limitInput: limitInput
229
228
  },
@@ -287,7 +286,8 @@ self.body(func(inputs) {
287
286
  cdr3SeqColumns: cdr3SeqColumns,
288
287
  stopCodonTypes: params.stopCodonTypes,
289
288
  stopCodonReplacements: params.stopCodonReplacements,
290
- perProcessMemGB: perProcessMemGB
289
+ perProcessMemGB: perProcessMemGB,
290
+ isLibraryFileGzipped: params.isLibraryFileGzipped
291
291
  }, { removeUndefs: true })
292
292
  }
293
293
  }
@@ -369,7 +369,7 @@ self.body(func(inputs) {
369
369
  sampleIdAxisSpec: sampleIdAxisSpec,
370
370
  chains: [chains],
371
371
  library: referenceLibrary,
372
- isLibraryFileGzipped: false,
372
+ isLibraryFileGzipped: params.isLibraryFileGzipped,
373
373
  clonotypeTablesData: clonotypeTablesData,
374
374
  hasUmi: hasUMI,
375
375
  umiTags: umiTags,