@datagrok/sequence-translator 1.10.20 → 1.10.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ import {
19
19
  extractRNumbers,
20
20
  makeCore,
21
21
  makeRGroup,
22
+ normalizeRLabels,
22
23
  validateParams,
23
24
  } from './pt-chem-enum';
24
25
  import {_package} from '../package';
@@ -50,11 +51,17 @@ interface CardOpts {
50
51
  onRemove?: () => void;
51
52
  }
52
53
 
54
+ // Popular multi symbol single atoms for quick lookup in card builder
55
+ const SINGLE_ATOM_SYMBOLS_LOOKUP = new Set([
56
+ 'Cl', 'Br', 'Al', 'Si', 'Li', 'Na', 'Mg', 'Ca', 'Ti', 'At', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Kr', 'Rb',
57
+ 'Au', 'Ag', 'Pt', 'Pb', 'Sn', 'Sb', 'Te', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho']);
58
+
53
59
  /** Draws a molecule into a fixed-size host, constraining SVG dimensions. */
54
60
  function drawMolInto(host: HTMLElement, smi: string, w: number, h: number): void {
55
61
  ui.empty(host);
56
62
  try {
57
- const el = grok.chem.drawMolecule(smi, w, h);
63
+ const correctedSmi = smi.length === 1 || SINGLE_ATOM_SYMBOLS_LOOKUP.has(smi) ? `[${smi}]` : smi;
64
+ const el = grok.chem.drawMolecule(correctedSmi, w, h);
58
65
  el.style.width = `${w}px`;
59
66
  el.style.height = `${h}px`;
60
67
  el.style.maxWidth = `${w}px`;
@@ -72,7 +79,8 @@ function buildCard(opts: CardOpts): HTMLElement {
72
79
  display: 'flex', alignItems: 'center', justifyContent: 'center',
73
80
  background: 'transparent', overflow: 'hidden', flex: '0 0 auto',
74
81
  }});
75
- if (opts.smiles && !opts.error) drawMolInto(thumbHost, opts.smiles, THUMB_W, THUMB_H);
82
+ if (opts.smiles && !opts.error)
83
+ drawMolInto(thumbHost, opts.smiles, THUMB_W, THUMB_H);
76
84
  else thumbHost.appendChild(ui.divText('—', {style: {color: 'var(--grey-4)'}}));
77
85
 
78
86
  const subtitleEl = ui.divText(opts.subtitle, {style: {
@@ -1249,7 +1257,7 @@ async function executeEnumeration(state: ChemEnumDialogState, _rdkit: RDModule):
1249
1257
 
1250
1258
  const smilesCol = DG.Column.fromStrings('Enumerated', results.map((r) => r.smiles));
1251
1259
  smilesCol.semType = DG.SEMTYPE.MOLECULE;
1252
- const coreCol = DG.Column.fromStrings('Core', results.map((r) => r.coreSmiles));
1260
+ const coreCol = DG.Column.fromStrings('Core', results.map((r) => normalizeRLabels(r.coreSmiles ?? '')));
1253
1261
  coreCol.semType = DG.SEMTYPE.MOLECULE;
1254
1262
  const rCols = sortedRs.map((n) =>
1255
1263
  DG.Column.fromStrings(`R${n}`, results.map((r) => r.rGroupSmilesByNum.get(n) ?? '')));
@@ -1261,14 +1269,18 @@ async function executeEnumeration(state: ChemEnumDialogState, _rdkit: RDModule):
1261
1269
  // Stage 2 — canonicalize the whole Enumerated column in parallel via Chem workers.
1262
1270
  pi.update(40, `Canonicalizing ${results.length.toLocaleString()} molecule(s)...`);
1263
1271
  try {
1264
- await grok.functions.call('Chem:convertNotation', {
1272
+ const res: DG.Column = await grok.functions.call('Chem:convertNotation', {
1265
1273
  data: df,
1266
1274
  molecules: smilesCol,
1267
1275
  targetNotation: DG.chem.Notation.Smiles,
1268
- overwrite: true,
1276
+ overwrite: false,
1269
1277
  join: false,
1270
1278
  kekulize: false,
1271
1279
  });
1280
+ // in older version of the chem, overwrite is super slow, it has been updated but we can do it like this here
1281
+ const resArr = res.toList();
1282
+ smilesCol.init((i) => resArr[i]);
1283
+ smilesCol.meta.units = DG.chem.Notation.Smiles;
1272
1284
  } catch (err: any) {
1273
1285
  // Canonicalization is a nice-to-have; the uncanonical SMILES are still valid output.
1274
1286
  _package.logger.warning(`Canonicalization skipped: ${err?.message ?? err}`);
@@ -275,6 +275,71 @@ category('OligoRenderer: layout', () => {
275
275
  expect(conj.monomer.kind, 'conjugate');
276
276
  expect(conj.w >= layout.chipW, true, 'conjugate must be at least chip-wide');
277
277
  });
278
+
279
+ test('reversed-antisense linkage owner is the lower-indexed pair member', async () => {
280
+ // The `phosphate` field on a nucleotide always means "linkage immediately
281
+ // AFTER this monomer in 5'→3' data order" — so it lives on the lower-indexed
282
+ // end of the bond. When antisense is displayed reversed, the gap to the
283
+ // right of display index i pairs data positions (N-1-i, N-2-i); the owner
284
+ // must be the lower one, i.e. `monomers[i+1]` in the reversed array, not `m`.
285
+ //
286
+ // Small fixture: antisense `r(A)[sp].r(C)p.r(G)` →
287
+ // data 0 → A, phos=sp (linkage 0↔1, sp)
288
+ // data 1 → C, phos=p (linkage 1↔2, p)
289
+ // data 2 → G, phos='' (terminal)
290
+ // Reversed display: G, C, A.
291
+ // antiLinks[0] = gap right of display 0 → between data 2 and 1 → p, owner 1
292
+ // antiLinks[1] = gap right of display 1 → between data 1 and 0 → sp, owner 0
293
+ const helm = 'RNA1{r(A)p.r(C)p.r(G)}|RNA2{r(A)[sp].r(C)p.r(G)}$$$$';
294
+ const m = parseHelmDuplex(helm);
295
+ const layout = computeLayout(600, 70, m);
296
+ expect(layout.antiReversed, true);
297
+ expect(layout.antiLinks.length, 2);
298
+ expect(layout.antiLinks[0].ownerOrigIdx, 1);
299
+ expect(layout.antiLinks[0].phosphateSymbol, 'p');
300
+ expect(layout.antiLinks[1].ownerOrigIdx, 0);
301
+ expect(layout.antiLinks[1].phosphateSymbol, 'sp');
302
+ });
303
+
304
+ test('reversed-antisense draws ALL linkages including the leftmost-data sp', async () => {
305
+ // Regression: pre-fix, reversed-strand placement read `m.phosphate` for
306
+ // the gap to the right of display i, but that field belongs to the bond
307
+ // on the OTHER side of `m`. The net effect was a one-index shift across
308
+ // the row plus a dropped link at the terminal display position — so the
309
+ // 4 sp linkages on this antisense ended up as 3, in the wrong gaps.
310
+ const helm =
311
+ 'RNA1{m(C)[sp].m(A)[sp].m(U)p.m(G)p.m(G)p.m(U)p.m(U)p.m(G)p.m(A)p.m(A)p.' +
312
+ 'm(C)p.m(A)p.m(U)p.m(G)p.m(A)p.m(G)p.m(C)[sp].m(A)[sp].m(A)[L3]}|' +
313
+ 'RNA2{m(U)[sp].m(U)[sp].m(G)p.m(C)p.m(U)p.m(C)p.m(A)p.m(U)p.m(G)p.m(U)p.' +
314
+ 'm(U)p.m(C)p.m(A)p.m(A)p.m(C)p.m(C)p.m(A)[sp].m(U)[sp].m(G)}$$$$';
315
+ const m = parseHelmDuplex(helm);
316
+ const layout = computeLayout(1200, 90, m);
317
+ expect(layout.antiReversed, true);
318
+ // Antisense has 19 nucleotides → 18 inter-nucleotide gaps, all drawn.
319
+ expect(layout.antiLinks.length, 18,
320
+ `expected 18 antisense linkages, got ${layout.antiLinks.length}`);
321
+ // 4 of them must be `sp`. With reversal, the sp at data 0↔1 maps to the
322
+ // RIGHTMOST display gap and the sp at data 17↔18 maps to the LEFTMOST —
323
+ // so sp owners, in display order, are 17, 16, 1, 0.
324
+ const spOwners = layout.antiLinks
325
+ .filter((l) => l.phosphateSymbol === 'sp')
326
+ .map((l) => l.ownerOrigIdx);
327
+ expect(spOwners.length, 4,
328
+ `expected 4 sp linkages on antisense, got ${spOwners.length}`);
329
+ expect(spOwners.join(','), '17,16,1,0',
330
+ `sp owners (display order) must be 17,16,1,0; got ${spOwners.join(',')}`);
331
+
332
+ // And sense — 19 nucleotides + L3 conjugate. 18 nucleotide-to-nucleotide
333
+ // gaps; the bond into the L3 conjugate is not a phosphate, so it isn't
334
+ // pushed as a link. 4 of those 18 are sp (data owners 0, 1, 16, 17).
335
+ expect(layout.senseLinks.length, 18,
336
+ `expected 18 sense linkages, got ${layout.senseLinks.length}`);
337
+ const senseSpOwners = layout.senseLinks
338
+ .filter((l) => l.phosphateSymbol === 'sp')
339
+ .map((l) => l.ownerOrigIdx)
340
+ .sort((a, b) => a - b);
341
+ expect(senseSpOwners.join(','), '0,1,16,17');
342
+ });
278
343
  });
279
344
 
280
345
  category('OligoRenderer: hit testing', () => {
@@ -288,18 +353,12 @@ category('OligoRenderer: hit testing', () => {
288
353
  expect(hit!.strand, 'sense');
289
354
  expect(hit!.position, 0);
290
355
 
291
- // Gap immediately after first chip should miss (unless it's a PS link)
292
- const prev = m.sense.monomers[0];
293
- const isPS = prev.kind === 'nucleotide' && (prev as any).phosphate === 'sp';
356
+ // Gap between chips at chip-row-Y should always miss now PS linkage
357
+ // markers live in the apex zone above (sense) / below (antisense) the
358
+ // chip row, not in the inter-chip gap at chip-row-Y.
294
359
  const gapX = first.x + first.w + layout.chipGap * 0.1;
295
360
  const miss = hitTest(gapX, layout.senseY + layout.chipH / 2, m, layout);
296
- if (isPS) {
297
- // Should hit the PS linkage marker
298
- expect(miss !== null, true);
299
- expect(miss!.linkage !== undefined, true);
300
- } else {
301
- expect(miss, null);
302
- }
361
+ expect(miss, null);
303
362
  });
304
363
 
305
364
  test('antisense hit returns original position despite reversed display', async () => {
@@ -314,15 +373,18 @@ category('OligoRenderer: hit testing', () => {
314
373
  'leftmost AS chip in pair-aligned display = last monomer in data');
315
374
  });
316
375
 
317
- test('hovering between two chips with PS linkage returns the linkage', async () => {
318
- // Force a PS linkage between position 0 and 1
376
+ test('hovering the apex above a PS linkage returns the linkage', async () => {
377
+ // PS linkage between sense pos 0 and 1. The apex sits above the sense
378
+ // chip row (since sense's decoration side is "top"), peak at senseY-apexH.
319
379
  const helm = 'RNA1{m(G)[sp].m(A)p.m(C)p}$$$$';
320
380
  const m = parseHelmDuplex(helm);
321
381
  const layout = computeLayout(600, 70, m);
322
382
  const c0 = layout.senseChips[0];
323
383
  const c1 = layout.senseChips[1];
324
384
  const midX = (c0.x + c0.w + c1.x) / 2;
325
- const hit = hitTest(midX, layout.senseY + layout.chipH / 2, m, layout);
385
+ // Apex zone is [senseY - apexH, senseY]; aim mid-zone.
386
+ const apexY = layout.senseY - layout.apexH / 2;
387
+ const hit = hitTest(midX, apexY, m, layout);
326
388
  expect(hit !== null, true);
327
389
  expect(hit!.linkage !== undefined, true);
328
390
  expect(hit!.linkage!.phosphateSymbol, 'sp');