@datagrok/sequence-translator 1.10.16 → 1.10.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,6 +45,7 @@ interface CardOpts {
45
45
  subtitle: string;
46
46
  error?: string;
47
47
  onEdit?: () => void;
48
+ onDuplicate?: () => void;
48
49
  onRemove?: () => void;
49
50
  }
50
51
 
@@ -108,15 +109,22 @@ function buildCard(opts: CardOpts): HTMLElement {
108
109
  }});
109
110
  if (opts.onEdit) {
110
111
  const editBtn = ui.icons.edit((e: MouseEvent) => { e.stopPropagation(); opts.onEdit!(); }, 'Edit');
112
+ editBtn.style.color = 'var(--blue-3)';
111
113
  actions.appendChild(editBtn);
112
114
  }
115
+ if (opts.onDuplicate) {
116
+ const dupBtn = ui.icons.copy((e: MouseEvent) => { e.stopPropagation(); opts.onDuplicate!(); }, 'Duplicate');
117
+ dupBtn.style.color = 'var(--blue-3)';
118
+ actions.appendChild(dupBtn);
119
+ }
113
120
  if (opts.onRemove) {
114
121
  const delBtn = ui.icons.delete((e: MouseEvent) => { e.stopPropagation(); opts.onRemove!(); }, 'Remove');
122
+ delBtn.style.color = 'var(--red-3)';
115
123
  actions.appendChild(delBtn);
116
124
  }
117
125
  card.addEventListener('mouseenter', () => { actions.style.display = 'flex'; });
118
126
  card.addEventListener('mouseleave', () => { actions.style.display = 'none'; });
119
- if (opts.onEdit || opts.onRemove) card.appendChild(actions);
127
+ if (opts.onEdit || opts.onDuplicate || opts.onRemove) card.appendChild(actions);
120
128
 
121
129
  return card;
122
130
  }
@@ -197,25 +205,38 @@ async function openRGroupSketchDialog(rdkit: RDModule, initialMolfile?: string,
197
205
  const status = ui.divText('', {style: {fontSize: '11px', padding: '4px 0', minHeight: '18px'}});
198
206
  let currentSmiles: string | null = null;
199
207
  let detectedRs: number[] = [];
208
+ let isSingleAtom = false;
200
209
  let okBtn: HTMLButtonElement | null = null;
201
210
  let userTouchedR = false;
202
211
  rNumberInput.onChanged.subscribe(() => { userTouchedR = true; updateOk(); });
203
212
 
204
213
  const updateOk = () => {
205
214
  const n = rNumberInput.value;
206
- const ok = currentSmiles != null && n != null && n >= 1 && detectedRs.length === 1;
215
+ const ok = currentSmiles != null && n != null && n >= 1 &&
216
+ (detectedRs.length === 1 || isSingleAtom);
207
217
  if (okBtn) okBtn.disabled = !ok;
208
218
  };
209
219
 
210
220
  const revalidate = async () => {
211
221
  currentSmiles = await smilesFromSketcher(sk, rdkit);
212
222
  detectedRs = currentSmiles ? extractRNumbers(currentSmiles) : [];
223
+ // Probe single-atom mode only when there's no R-label — keeps the
224
+ // common labeled path free of an extra RDKit parse.
225
+ isSingleAtom = false;
226
+ if (currentSmiles && detectedRs.length === 0) {
227
+ const probe = makeRGroup(currentSmiles, rNumberInput.value ?? 1, '', rdkit);
228
+ isSingleAtom = !!probe.isSingleAtom;
229
+ }
213
230
  if (!currentSmiles) {
214
- status.innerText = 'Draw an R-group with exactly one attachment point.';
231
+ status.innerText = 'Draw an R-group with one attachment point, or a single atom (e.g. N, O, Cl).';
215
232
  status.style.color = 'var(--grey-4)';
216
- } else if (detectedRs.length === 0) {
217
- status.innerText = 'No R-group detected — add [*:N] to mark the attachment point.';
233
+ } else if (detectedRs.length === 0 && !isSingleAtom) {
234
+ status.innerText = 'No R-group detected — add [*:N] to mark the attachment point, or draw a single atom (e.g. N, O, Cl).';
218
235
  status.style.color = 'var(--red-3)';
236
+ } else if (detectedRs.length === 0 && isSingleAtom) {
237
+ const targetN = rNumberInput.value;
238
+ status.innerText = `Single atom — will be substituted into [*:${targetN ?? '?'}] in the core.`;
239
+ status.style.color = 'var(--green-2)';
219
240
  } else if (detectedRs.length > 1) {
220
241
  status.innerText = `R-group must contain exactly one attachment point. Found ${detectedRs.length}: ${detectedRs.map((n) => 'R' + n).join(', ')}.`;
221
242
  status.style.color = 'var(--red-3)';
@@ -266,6 +287,10 @@ async function openImportWizard(
266
287
  const columnInput = ui.input.choice<string>('Column', {items: [] as string[], nullable: true});
267
288
  const rNumberInput = kind === 'rgroups' ?
268
289
  ui.input.int('Target R#', {value: 1, min: 1}) : null;
290
+ if (rNumberInput) {
291
+ ui.tooltip.bind(rNumberInput.input,
292
+ 'Target R-number for these R-groups. Single-atom rows (no [*:N] label, e.g. just N or O) get substituted into the core\'s R# slot directly.');
293
+ }
269
294
  const dedupInput = ui.input.bool('Remove duplicates', {value: defaultDedup});
270
295
  ui.tooltip.bind(dedupInput.input,
271
296
  kind === 'cores' ?
@@ -361,10 +386,10 @@ async function openImportWizard(
361
386
  built.rGroups = rGroups;
362
387
  rGroups.forEach((rg, i) => {
363
388
  if (rg.error) errs.push(`R-group ${i + 1}: ${rg.error}`);
364
- items.push({
365
- smi: rg.error ? '' : rg.smiles, err: rg.error,
366
- subtitle: rg.error ? 'invalid' : `R${rg.rNumber}${rg.sourceRNumber != null && rg.sourceRNumber !== rg.rNumber ? ` (from R${rg.sourceRNumber})` : ''}`,
367
- });
389
+ const subtitle = rg.error ? 'invalid' :
390
+ rg.isSingleAtom ? `R${rg.rNumber} · atom` :
391
+ `R${rg.rNumber}${rg.sourceRNumber != null && rg.sourceRNumber !== rg.rNumber ? ` (from R${rg.sourceRNumber})` : ''}`;
392
+ items.push({smi: rg.error ? '' : rg.smiles, err: rg.error, subtitle});
368
393
  });
369
394
  countText.innerText = `${rGroups.length} R-group${rGroups.length === 1 ? '' : 's'} for R${n}` +
370
395
  (dedup && dupCount ? ` (${dupCount} duplicate${dupCount === 1 ? '' : 's'} skipped)` : '') + '.';
@@ -558,6 +583,10 @@ function buildChemEnumPanel(rdkit: RDModule, preloadCore: ChemEnumCore | null):
558
583
  const edited = await openCoreSketchDialog(rdkit, c.smiles);
559
584
  if (edited) { state.cores[i] = edited; refresh(); }
560
585
  },
586
+ onDuplicate: async () => {
587
+ const dup = await openCoreSketchDialog(rdkit, c.smiles);
588
+ if (dup) { state.cores.push(dup); refresh(); }
589
+ },
561
590
  onRemove: () => { state.cores.splice(i, 1); refresh(); },
562
591
  });
563
592
  };
@@ -625,9 +654,12 @@ function buildChemEnumPanel(rdkit: RDModule, preloadCore: ChemEnumCore | null):
625
654
  const renderer = (i: number): HTMLElement => {
626
655
  const rg = list[i];
627
656
  const remap = rg.sourceRNumber != null && rg.sourceRNumber !== rg.rNumber ? ` (from R${rg.sourceRNumber})` : '';
657
+ const subtitle = rg.error ? 'invalid' :
658
+ rg.isSingleAtom ? `r group ${i + 1} · R${rg.rNumber} · atom` :
659
+ `r group ${i + 1} · R${rg.rNumber}${remap}`;
628
660
  return buildCard({
629
661
  smiles: rg.error ? '' : rg.smiles,
630
- subtitle: rg.error ? 'invalid' : `r group ${i + 1} · R${rg.rNumber}${remap}`,
662
+ subtitle,
631
663
  error: rg.error,
632
664
  onEdit: async () => {
633
665
  const edited = await openRGroupSketchDialog(rdkit, rg.smiles, rg.rNumber);
@@ -644,6 +676,14 @@ function buildChemEnumPanel(rdkit: RDModule, preloadCore: ChemEnumCore | null):
644
676
  }
645
677
  refresh();
646
678
  },
679
+ onDuplicate: async () => {
680
+ const dup = await openRGroupSketchDialog(rdkit, rg.smiles, rg.rNumber);
681
+ if (!dup) return;
682
+ const target = state.rGroupsByNum.get(dup.rNumber) ?? [];
683
+ target.push(dup);
684
+ state.rGroupsByNum.set(dup.rNumber, target);
685
+ refresh();
686
+ },
647
687
  onRemove: () => {
648
688
  list.splice(i, 1);
649
689
  if (list.length === 0) state.rGroupsByNum.delete(n);
@@ -42,7 +42,11 @@ export interface ChemEnumCore {
42
42
  }
43
43
 
44
44
  export interface ChemEnumRGroup {
45
- /** Normalized SMILES with its single R-label remapped to the target R number. */
45
+ /**
46
+ * Normalized SMILES with its single R-label remapped to the target R number,
47
+ * OR — when {@link isSingleAtom} is set — the canonical single-atom token
48
+ * (e.g. `N`, `O`, `[N+]`) to splice into the core's `[*:N]` slot.
49
+ */
46
50
  smiles: string;
47
51
  /** SMILES as supplied (pre-normalization and pre-remap). */
48
52
  originalSmiles: string;
@@ -50,6 +54,12 @@ export interface ChemEnumRGroup {
50
54
  rNumber: number;
51
55
  /** R number as originally written in `originalSmiles` (pre-remap). */
52
56
  sourceRNumber?: number;
57
+ /**
58
+ * True when the R-group has zero R-labels and is exactly one heavy atom.
59
+ * Such groups are spliced into the core's `[*:N]` slot via plain string
60
+ * replace instead of ring-closure joining.
61
+ */
62
+ isSingleAtom?: boolean;
53
63
  id: string;
54
64
  error?: string;
55
65
  }
@@ -169,9 +179,21 @@ export function makeRGroup(
169
179
  const rNumbers = extractRNumbers(normalized);
170
180
 
171
181
  if (rNumbers.length === 0) {
182
+ // Single-atom mode: a no-R-label SMILES is acceptable iff RDKit confirms
183
+ // exactly one heavy atom. The atom is then substituted into the core's
184
+ // `[*:N]` slot by `buildJoinedSmiles` instead of ring-closure-joined.
185
+ if (rdkit) {
186
+ const atomSmi = trySingleAtomCanonical(normalized, rdkit);
187
+ if (atomSmi) {
188
+ return {
189
+ smiles: atomSmi, originalSmiles, rNumber: targetRNumber, id,
190
+ isSingleAtom: true,
191
+ };
192
+ }
193
+ }
172
194
  return {
173
195
  smiles: normalized, originalSmiles, rNumber: targetRNumber, id,
174
- error: 'R-group must contain exactly one R label (found none)'};
196
+ error: 'R-group must contain exactly one R label, or be a single atom (e.g. N, O, Cl)'};
175
197
  }
176
198
  if (rNumbers.length > 1) {
177
199
  return {
@@ -208,6 +230,27 @@ function tryParse(smi: string, rdkit: RDModule): string | null {
208
230
  }
209
231
  }
210
232
 
233
+ /**
234
+ * Returns the canonical, H-stripped SMILES iff `smi` parses as exactly one
235
+ * heavy atom — used to detect single-atom R-groups (`N`, `O`, `[N+]`, …) that
236
+ * substitute into the core's `[*:N]` slot directly. Returns null otherwise.
237
+ */
238
+ function trySingleAtomCanonical(smi: string, rdkit: RDModule): string | null {
239
+ let mol: RDMol | null = null;
240
+ try {
241
+ mol = rdkit.get_mol(smi);
242
+ if (!mol || !mol.is_valid()) return null;
243
+ if (mol.get_num_atoms(true) !== 1) return null;
244
+ mol.remove_hs_in_place();
245
+ const canon = mol.get_smiles();
246
+ return canon && canon.length > 0 ? canon : null;
247
+ } catch {
248
+ return null;
249
+ } finally {
250
+ mol?.delete();
251
+ }
252
+ }
253
+
211
254
  // ─── Count + validation ─────────────────────────────────────────────────────
212
255
 
213
256
  /** Results per core: depends on mode and the R-numbers the core uses. */
@@ -302,17 +345,34 @@ export function buildJoinedSmiles(
302
345
  ): string | null {
303
346
  if (rgSmilesByNum.size === 0) return null;
304
347
 
305
- const coreFixed = moveStartRLabelToBranch(coreSmiles);
348
+ // Single-atom R-groups (no `[*:k]` in their SMILES) are spliced into the
349
+ // core's `[*:k]` slot directly. Labeled R-groups go through the standard
350
+ // ring-closure join. The two paths cooperate when both are present:
351
+ // atoms are substituted first, then the labeled rest is joined.
352
+ const atomReps = new Map<number, string>();
353
+ const labeled = new Map<number, string>();
354
+ for (const [k, s] of rgSmilesByNum) {
355
+ if (extractRNumbers(s).includes(k)) labeled.set(k, s);
356
+ else atomReps.set(k, s);
357
+ }
358
+
359
+ let preparedCore = coreSmiles;
360
+ for (const [k, atom] of atomReps)
361
+ preparedCore = substituteRLabelWithAtom(preparedCore, k, atom);
362
+
363
+ if (labeled.size === 0) return preparedCore;
364
+
365
+ const coreFixed = moveStartRLabelToBranch(preparedCore);
306
366
  const rgsFixed = new Map<number, string>();
307
- for (const [k, s] of rgSmilesByNum) rgsFixed.set(k, moveStartRLabelToBranch(s));
367
+ for (const [k, s] of labeled) rgsFixed.set(k, moveStartRLabelToBranch(s));
308
368
 
309
369
  const allPieces = [coreFixed, ...rgsFixed.values()];
310
- const digits = pickFreeRingDigits(allPieces, rgSmilesByNum.size);
311
- if (digits.length < rgSmilesByNum.size) return null;
370
+ const digits = pickFreeRingDigits(allPieces, labeled.size);
371
+ if (digits.length < labeled.size) return null;
312
372
 
313
373
  const digitByNum = new Map<number, string>();
314
374
  let i = 0;
315
- for (const k of rgSmilesByNum.keys()) digitByNum.set(k, formatRingDigit(digits[i++]));
375
+ for (const k of labeled.keys()) digitByNum.set(k, formatRingDigit(digits[i++]));
316
376
 
317
377
  let assembledCore = coreFixed;
318
378
  for (const [k, d] of digitByNum)
@@ -325,6 +385,15 @@ export function buildJoinedSmiles(
325
385
  return [assembledCore, ...assembledRgs].join('.');
326
386
  }
327
387
 
388
+ /**
389
+ * Splices an atom token into every `[*:n]` slot of `smi`. Unlike ring-digit
390
+ * substitution, parens around the label (`(N)` etc.) are valid SMILES, so a
391
+ * plain string replace is enough.
392
+ */
393
+ export function substituteRLabelWithAtom(smi: string, n: number, atom: string): string {
394
+ return smi.split(`[*:${n}]`).join(atom);
395
+ }
396
+
328
397
  /**
329
398
  * Joins a core with one R-group per R-number and canonicalizes via RDKit.
330
399
  * Per-molecule sync RDKit call — **do not use in bulk**; prefer {@link buildJoinedSmiles}
@@ -33,6 +33,7 @@ import {Chain} from './conversion/pt-chain';
33
33
  import {polyToolConvert} from './pt-dialog';
34
34
 
35
35
  import {_package, applyNotationProviderForCyclized, PackageFunctions} from '../package';
36
+ import {tagAsOligoNucleotide} from '../oligo-renderer/converters';
36
37
  import {buildMonomerHoverLink} from '@datagrok-libraries/bio/src/monomer-works/monomer-hover';
37
38
  import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
38
39
 
@@ -84,8 +85,12 @@ type PolyToolEnumerateHelmSerialized = {
84
85
  rules: string[],
85
86
  };
86
87
 
87
- /** Entry point: creates, sizes, and shows the enumeration dialog. */
88
- export async function polyToolEnumerateHelmUI(cell?: DG.Cell): Promise<void> {
88
+ /** Entry point: creates, sizes, and shows the enumeration dialog.
89
+ * @param outputAsOligo If true, the enumerated HELM column in the result df
90
+ * is tagged as `OligoNucleotide` (semType + units=helm + cellRenderer hints)
91
+ * so the duplex cell renderer picks it up. Used when the source cell was an
92
+ * OligoNucleotide column. */
93
+ export async function polyToolEnumerateHelmUI(cell?: DG.Cell, outputAsOligo: boolean = false): Promise<void> {
89
94
  await _package.initPromise;
90
95
 
91
96
  // Capture viewport dimensions for dialog sizing
@@ -122,7 +127,7 @@ export async function polyToolEnumerateHelmUI(cell?: DG.Cell): Promise<void> {
122
127
  }
123
128
  }
124
129
  };
125
- dialog = await getPolyToolEnumerateDialog(cell, resizeInputs);
130
+ dialog = await getPolyToolEnumerateDialog(cell, resizeInputs, outputAsOligo);
126
131
 
127
132
  // On first show, center the dialog at 70% of viewport; on subsequent resizes, just reflow inputs
128
133
  let isFirstShow = true;
@@ -155,7 +160,7 @@ export async function polyToolEnumerateHelmUI(cell?: DG.Cell): Promise<void> {
155
160
 
156
161
  /** Builds and configures the enumeration dialog with all inputs, validators, and event handlers. */
157
162
  async function getPolyToolEnumerateDialog(
158
- cell?: DG.Cell, resizeInputs?: () => void
163
+ cell?: DG.Cell, resizeInputs?: () => void, outputAsOligo: boolean = false,
159
164
  ): Promise<DG.Dialog> {
160
165
  const logPrefix = `ST: PT: HelmDialog()`;
161
166
  let inputs: PolyToolEnumerateInputs;
@@ -712,6 +717,15 @@ async function getPolyToolEnumerateDialog(
712
717
  rules: await ruleInputs.getActive()
713
718
  } : false,
714
719
  helmHelper);
720
+
721
+ // When the source was an OligoNucleotide cell, tag the enumerated
722
+ // HELM column as OligoNucleotide so the duplex renderer picks it up.
723
+ if (outputAsOligo) {
724
+ const enumCol = enumeratorResDf.col('Enumerated');
725
+ if (enumCol && enumCol.type === DG.COLUMN_TYPE.STRING)
726
+ tagAsOligoNucleotide(enumCol as DG.Column<string>);
727
+ }
728
+
715
729
  const appendTarget = inputs.appendToTable.value;
716
730
  if (appendTarget) {
717
731
  appendTarget.append(enumeratorResDf, true);
@@ -0,0 +1,83 @@
1
+ import {after, category, test, expect, awaitCheck, delay} from '@datagrok-libraries/test/src/test';
2
+ import * as grok from 'datagrok-api/grok';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import $ from 'cash-dom';
6
+
7
+ import {tagAsOligoNucleotide} from '../oligo-renderer/converters';
8
+
9
+ const SAMPLE_HELM =
10
+ 'RNA1{r(A)p.r(C)p.r(G)p.r(U)p}|RNA2{r(U)p.r(C)p.r(G)p.r(A)p}$$$$';
11
+
12
+ function dialogCount(): number {
13
+ return $('.d4-dialog').length;
14
+ }
15
+
16
+ function closeAllDialogs(): void {
17
+ $('.d4-dialog .ui-btn-cancel, .d4-dialog .d4-dialog-header .grok-icon.fa-times').trigger('click');
18
+ }
19
+
20
+ category('OligoCellEditor', () => {
21
+ after(async () => {
22
+ closeAllDialogs();
23
+ grok.shell.closeAll();
24
+ });
25
+
26
+ test('cellEditor opens HELM editor for OligoNucleotide cell and saves on OK', async () => {
27
+ const col = DG.Column.fromStrings('seq', [SAMPLE_HELM]);
28
+ tagAsOligoNucleotide(col);
29
+ const df = DG.DataFrame.fromColumns([col]);
30
+ df.name = 'oligo-edit-test';
31
+ const tv = grok.shell.addTableView(df);
32
+
33
+ await awaitCheck(() => $(tv.root).find('.d4-grid canvas').length > 0,
34
+ 'Grid canvas did not appear', 5000);
35
+
36
+ // Find the cellEditor that the platform would dispatch on double-click for
37
+ // a column tagged quality=OligoNucleotide. Pre-fix: zero matches (the
38
+ // registration didn't exist). Post-fix: exactly one — editOligoNucleotideCell.
39
+ const matches = DG.Func.find({tags: ['cellEditor'], package: 'SequenceTranslator'})
40
+ .filter((f) => f.description === 'OligoNucleotide');
41
+ expect(matches.length, 1);
42
+ expect(matches[0].name, 'editOligoNucleotideCell');
43
+
44
+ const gridCell = tv.grid.cell('seq', 0);
45
+ expect(gridCell != null, true);
46
+ expect(gridCell.cell.value, SAMPLE_HELM);
47
+
48
+ const dialogsBefore = dialogCount();
49
+
50
+ // Invoke the cellEditor as the platform would on double-click.
51
+ // Pre-fix (delegating to Helm:editMoleculeCell): throws synchronously
52
+ // "The column of notation 'helm' must be 'Macromolecule'" — dialog never opens.
53
+ // Post-fix (using helmHelper.createWebEditorApp directly): dialog opens.
54
+ await matches[0].apply({cell: gridCell});
55
+
56
+ await awaitCheck(() => dialogCount() > dialogsBefore,
57
+ 'HELM editor dialog did not open within 15s', 15000);
58
+
59
+ // Wait for HWE async init (Dojo + JSDraw2 + monomer lib) to mount the editor.
60
+ // JSDraw2 renders to SVG, so wait for the OK button to be wired up — that's
61
+ // a reliable signal that the dialog footer is fully constructed.
62
+ await awaitCheck(() => $('.d4-dialog .ui-btn-ok, .d4-dialog button.ui-btn.ui-btn-ok').length > 0,
63
+ 'OK button did not appear in HELM editor dialog within 15s', 15000);
64
+
65
+ // Allow the editor a moment to load the HELM string into the canvas before we read it back.
66
+ await delay(1000);
67
+
68
+ const okBtn = $('.d4-dialog .ui-btn-ok, .d4-dialog button.ui-btn.ui-btn-ok').first();
69
+ expect(okBtn.length > 0, true, 'OK button not found in dialog');
70
+
71
+ okBtn.trigger('click');
72
+
73
+ await awaitCheck(() => dialogCount() <= dialogsBefore,
74
+ 'Dialog did not close after OK', 5000);
75
+
76
+ // After OK: cell.setValue(helmValue) must have run with the editor's HELM.
77
+ // The editor may canonicalize formatting, so we don't require byte-equality —
78
+ // we require it remains a valid two-strand HELM string for our input.
79
+ const after = gridCell.cell.value as string;
80
+ expect(typeof after === 'string' && after.includes('RNA1{') && after.includes('RNA2{') && after.includes('$$$$'), true,
81
+ `Expected cell value to remain valid HELM after OK; got: ${after}`);
82
+ });
83
+ });
@@ -1,6 +1,7 @@
1
1
  import {category, test, expect} from '@datagrok-libraries/test/src/test';
2
2
 
3
- import {parseHelmDuplex, looksLikeHelm} from '../oligo-renderer/helm-parser';
3
+ import {parseHelmDuplex, looksLikeHelm, canonicalizeHelm} from '../oligo-renderer/helm-parser';
4
+ import {ParsedNucleotide} from '../oligo-renderer/types';
4
5
  import {computeLayout, hitTest, drawDuplex} from '../oligo-renderer/canvas-renderer';
5
6
  import {
6
7
  resolveSugar, resolvePhosphate, resolveConjugate,
@@ -94,6 +95,68 @@ category('OligoRenderer: parser', () => {
94
95
  expect(m0.phosphate, 'zp');
95
96
  }
96
97
  });
98
+
99
+ test('parser strips brackets from multi-char base in parens', async () => {
100
+ // Bug: pre-fix, cleanupHelmSymbol was not called on the base, so base === '[5Br-dC]'.
101
+ // Post-fix: cleanupHelmSymbol('[5Br-dC]') → '5Br-dC'.
102
+ const dup = parseHelmDuplex('RNA1{r([5Br-dC])p}$$$$');
103
+ const m = dup.sense.monomers[0] as ParsedNucleotide;
104
+ expect(m.kind, 'nucleotide');
105
+ expect(m.sugar, 'r');
106
+ // pre-fix: '[5Br-dC]' post-fix: '5Br-dC'
107
+ expect(m.base, '5Br-dC');
108
+ expect(m.phosphate, 'p');
109
+ });
110
+
111
+ test('parser strips brackets from multi-char base on both strands', async () => {
112
+ // Covers both strands and different modification combinations.
113
+ // pre-fix: base === '[5meC]', '[5Br-dC]', '[5fU]' post-fix: '5meC', '5Br-dC', '5fU'
114
+ const dup = parseHelmDuplex('RNA1{r([5meC])p.[fl2r]([5Br-dC])[sp]}|RNA2{r([5fU])p}$$$$');
115
+ const senseFirst = dup.sense.monomers[0] as ParsedNucleotide;
116
+ const senseSecond = dup.sense.monomers[1] as ParsedNucleotide;
117
+ const antiFirst = dup.antisense!.monomers[0] as ParsedNucleotide;
118
+ // pre-fix: '[5meC]' post-fix: '5meC'
119
+ expect(senseFirst.base, '5meC');
120
+ expect(senseSecond.sugar, 'fl2r');
121
+ // pre-fix: '[5Br-dC]' post-fix: '5Br-dC'
122
+ expect(senseSecond.base, '5Br-dC');
123
+ expect(senseSecond.phosphate, 'sp');
124
+ // pre-fix: '[5fU]' post-fix: '5fU'
125
+ expect(antiFirst.base, '5fU');
126
+ });
127
+
128
+ test('parser leaves bare-letter base unchanged', async () => {
129
+ // Regression guard: single-letter bases must NOT be transformed.
130
+ // A, G, C, T, U are all single-char and do not have brackets → unchanged by cleanupHelmSymbol.
131
+ const dup = parseHelmDuplex('RNA1{r(A)p.r(G)p.r(C)p.r(T)p.r(U)p}$$$$');
132
+ const bases = dup.sense.monomers.map((m) => (m as ParsedNucleotide).base);
133
+ // Expected: ['A', 'G', 'C', 'T', 'U'] — unchanged for all five bases.
134
+ expect(bases.join(','), 'A,G,C,T,U');
135
+ });
136
+
137
+ test('canonicalizeHelm re-brackets multi-char base on output', async () => {
138
+ // serializeCanonicalMonomer emits `([${base}])` when base.length > 1.
139
+ // pre-fix: emitted '(5Br-dC)' (invalid HELM, missing brackets).
140
+ // post-fix: emits '([5Br-dC])' — round-trip is valid HELM.
141
+ const out = canonicalizeHelm('RNA1{r([5Br-dC])p}$$$$');
142
+ // Must contain the bracketed base form.
143
+ expect(out.includes('([5Br-dC])'), true,
144
+ `expected canonicalized HELM to contain '([5Br-dC])', got: ${out}`);
145
+ // Must NOT contain the bare (unbracketed) form.
146
+ expect(out.includes('(5Br-dC)') && !out.includes('([5Br-dC])'), false,
147
+ `must not emit unbracketed (5Br-dC) without wrapping brackets, got: ${out}`);
148
+ });
149
+
150
+ test('canonicalizeHelm keeps single-letter base unbracketed', async () => {
151
+ // serializeCanonicalMonomer: base.length === 1 → `(${base})`, not `([${base}])`.
152
+ // Expected output contains 'r(A)p' — single letter stays bare.
153
+ const out = canonicalizeHelm('RNA1{r(A)p}$$$$');
154
+ expect(out.includes('r(A)p'), true,
155
+ `expected single-letter base to stay unbracketed, got: ${out}`);
156
+ // Single-letter base must NOT get double-bracketed.
157
+ expect(out.includes('([A])'), false,
158
+ `single-letter base must NOT be wrapped in brackets, got: ${out}`);
159
+ });
97
160
  });
98
161
 
99
162
  category('OligoRenderer: modification dictionary', () => {