@datagrok/sequence-translator 1.10.20 → 1.10.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +1 -1
- package/src/oligo-renderer/canvas-renderer.ts +180 -102
- package/src/oligo-renderer/legend-panel.ts +20 -12
- package/src/oligo-renderer/monomer-colors.ts +60 -0
- package/src/polytool/pt-chem-enum-dialog.ts +17 -5
- package/src/tests/oligo-renderer-tests.ts +75 -13
- package/test-console-output-1.log +155 -149
- package/test-record-1.mp4 +0 -0
|
@@ -19,6 +19,7 @@ import {
|
|
|
19
19
|
extractRNumbers,
|
|
20
20
|
makeCore,
|
|
21
21
|
makeRGroup,
|
|
22
|
+
normalizeRLabels,
|
|
22
23
|
validateParams,
|
|
23
24
|
} from './pt-chem-enum';
|
|
24
25
|
import {_package} from '../package';
|
|
@@ -50,11 +51,17 @@ interface CardOpts {
|
|
|
50
51
|
onRemove?: () => void;
|
|
51
52
|
}
|
|
52
53
|
|
|
54
|
+
// Popular multi symbol single atoms for quick lookup in card builder
|
|
55
|
+
const SINGLE_ATOM_SYMBOLS_LOOKUP = new Set([
|
|
56
|
+
'Cl', 'Br', 'Al', 'Si', 'Li', 'Na', 'Mg', 'Ca', 'Ti', 'At', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Kr', 'Rb',
|
|
57
|
+
'Au', 'Ag', 'Pt', 'Pb', 'Sn', 'Sb', 'Te', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho']);
|
|
58
|
+
|
|
53
59
|
/** Draws a molecule into a fixed-size host, constraining SVG dimensions. */
|
|
54
60
|
function drawMolInto(host: HTMLElement, smi: string, w: number, h: number): void {
|
|
55
61
|
ui.empty(host);
|
|
56
62
|
try {
|
|
57
|
-
const
|
|
63
|
+
const correctedSmi = smi.length === 1 || SINGLE_ATOM_SYMBOLS_LOOKUP.has(smi) ? `[${smi}]` : smi;
|
|
64
|
+
const el = grok.chem.drawMolecule(correctedSmi, w, h);
|
|
58
65
|
el.style.width = `${w}px`;
|
|
59
66
|
el.style.height = `${h}px`;
|
|
60
67
|
el.style.maxWidth = `${w}px`;
|
|
@@ -72,7 +79,8 @@ function buildCard(opts: CardOpts): HTMLElement {
|
|
|
72
79
|
display: 'flex', alignItems: 'center', justifyContent: 'center',
|
|
73
80
|
background: 'transparent', overflow: 'hidden', flex: '0 0 auto',
|
|
74
81
|
}});
|
|
75
|
-
if (opts.smiles && !opts.error)
|
|
82
|
+
if (opts.smiles && !opts.error)
|
|
83
|
+
drawMolInto(thumbHost, opts.smiles, THUMB_W, THUMB_H);
|
|
76
84
|
else thumbHost.appendChild(ui.divText('—', {style: {color: 'var(--grey-4)'}}));
|
|
77
85
|
|
|
78
86
|
const subtitleEl = ui.divText(opts.subtitle, {style: {
|
|
@@ -1249,7 +1257,7 @@ async function executeEnumeration(state: ChemEnumDialogState, _rdkit: RDModule):
|
|
|
1249
1257
|
|
|
1250
1258
|
const smilesCol = DG.Column.fromStrings('Enumerated', results.map((r) => r.smiles));
|
|
1251
1259
|
smilesCol.semType = DG.SEMTYPE.MOLECULE;
|
|
1252
|
-
const coreCol = DG.Column.fromStrings('Core', results.map((r) => r.coreSmiles));
|
|
1260
|
+
const coreCol = DG.Column.fromStrings('Core', results.map((r) => normalizeRLabels(r.coreSmiles ?? '')));
|
|
1253
1261
|
coreCol.semType = DG.SEMTYPE.MOLECULE;
|
|
1254
1262
|
const rCols = sortedRs.map((n) =>
|
|
1255
1263
|
DG.Column.fromStrings(`R${n}`, results.map((r) => r.rGroupSmilesByNum.get(n) ?? '')));
|
|
@@ -1261,14 +1269,18 @@ async function executeEnumeration(state: ChemEnumDialogState, _rdkit: RDModule):
|
|
|
1261
1269
|
// Stage 2 — canonicalize the whole Enumerated column in parallel via Chem workers.
|
|
1262
1270
|
pi.update(40, `Canonicalizing ${results.length.toLocaleString()} molecule(s)...`);
|
|
1263
1271
|
try {
|
|
1264
|
-
await grok.functions.call('Chem:convertNotation', {
|
|
1272
|
+
const res: DG.Column = await grok.functions.call('Chem:convertNotation', {
|
|
1265
1273
|
data: df,
|
|
1266
1274
|
molecules: smilesCol,
|
|
1267
1275
|
targetNotation: DG.chem.Notation.Smiles,
|
|
1268
|
-
overwrite:
|
|
1276
|
+
overwrite: false,
|
|
1269
1277
|
join: false,
|
|
1270
1278
|
kekulize: false,
|
|
1271
1279
|
});
|
|
1280
|
+
// in older version of the chem, overwrite is super slow, it has been updated but we can do it like this here
|
|
1281
|
+
const resArr = res.toList();
|
|
1282
|
+
smilesCol.init((i) => resArr[i]);
|
|
1283
|
+
smilesCol.meta.units = DG.chem.Notation.Smiles;
|
|
1272
1284
|
} catch (err: any) {
|
|
1273
1285
|
// Canonicalization is a nice-to-have; the uncanonical SMILES are still valid output.
|
|
1274
1286
|
_package.logger.warning(`Canonicalization skipped: ${err?.message ?? err}`);
|
|
@@ -275,6 +275,71 @@ category('OligoRenderer: layout', () => {
|
|
|
275
275
|
expect(conj.monomer.kind, 'conjugate');
|
|
276
276
|
expect(conj.w >= layout.chipW, true, 'conjugate must be at least chip-wide');
|
|
277
277
|
});
|
|
278
|
+
|
|
279
|
+
test('reversed-antisense linkage owner is the lower-indexed pair member', async () => {
|
|
280
|
+
// The `phosphate` field on a nucleotide always means "linkage immediately
|
|
281
|
+
// AFTER this monomer in 5'→3' data order" — so it lives on the lower-indexed
|
|
282
|
+
// end of the bond. When antisense is displayed reversed, the gap to the
|
|
283
|
+
// right of display index i pairs data positions (N-1-i, N-2-i); the owner
|
|
284
|
+
// must be the lower one, i.e. `monomers[i+1]` in the reversed array, not `m`.
|
|
285
|
+
//
|
|
286
|
+
// Small fixture: antisense `r(A)[sp].r(C)p.r(G)` →
|
|
287
|
+
// data 0 → A, phos=sp (linkage 0↔1, sp)
|
|
288
|
+
// data 1 → C, phos=p (linkage 1↔2, p)
|
|
289
|
+
// data 2 → G, phos='' (terminal)
|
|
290
|
+
// Reversed display: G, C, A.
|
|
291
|
+
// antiLinks[0] = gap right of display 0 → between data 2 and 1 → p, owner 1
|
|
292
|
+
// antiLinks[1] = gap right of display 1 → between data 1 and 0 → sp, owner 0
|
|
293
|
+
const helm = 'RNA1{r(A)p.r(C)p.r(G)}|RNA2{r(A)[sp].r(C)p.r(G)}$$$$';
|
|
294
|
+
const m = parseHelmDuplex(helm);
|
|
295
|
+
const layout = computeLayout(600, 70, m);
|
|
296
|
+
expect(layout.antiReversed, true);
|
|
297
|
+
expect(layout.antiLinks.length, 2);
|
|
298
|
+
expect(layout.antiLinks[0].ownerOrigIdx, 1);
|
|
299
|
+
expect(layout.antiLinks[0].phosphateSymbol, 'p');
|
|
300
|
+
expect(layout.antiLinks[1].ownerOrigIdx, 0);
|
|
301
|
+
expect(layout.antiLinks[1].phosphateSymbol, 'sp');
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
test('reversed-antisense draws ALL linkages including the leftmost-data sp', async () => {
|
|
305
|
+
// Regression: pre-fix, reversed-strand placement read `m.phosphate` for
|
|
306
|
+
// the gap to the right of display i, but that field belongs to the bond
|
|
307
|
+
// on the OTHER side of `m`. The net effect was a one-index shift across
|
|
308
|
+
// the row plus a dropped link at the terminal display position — so the
|
|
309
|
+
// 4 sp linkages on this antisense ended up as 3, in the wrong gaps.
|
|
310
|
+
const helm =
|
|
311
|
+
'RNA1{m(C)[sp].m(A)[sp].m(U)p.m(G)p.m(G)p.m(U)p.m(U)p.m(G)p.m(A)p.m(A)p.' +
|
|
312
|
+
'm(C)p.m(A)p.m(U)p.m(G)p.m(A)p.m(G)p.m(C)[sp].m(A)[sp].m(A)[L3]}|' +
|
|
313
|
+
'RNA2{m(U)[sp].m(U)[sp].m(G)p.m(C)p.m(U)p.m(C)p.m(A)p.m(U)p.m(G)p.m(U)p.' +
|
|
314
|
+
'm(U)p.m(C)p.m(A)p.m(A)p.m(C)p.m(C)p.m(A)[sp].m(U)[sp].m(G)}$$$$';
|
|
315
|
+
const m = parseHelmDuplex(helm);
|
|
316
|
+
const layout = computeLayout(1200, 90, m);
|
|
317
|
+
expect(layout.antiReversed, true);
|
|
318
|
+
// Antisense has 19 nucleotides → 18 inter-nucleotide gaps, all drawn.
|
|
319
|
+
expect(layout.antiLinks.length, 18,
|
|
320
|
+
`expected 18 antisense linkages, got ${layout.antiLinks.length}`);
|
|
321
|
+
// 4 of them must be `sp`. With reversal, the sp at data 0↔1 maps to the
|
|
322
|
+
// RIGHTMOST display gap and the sp at data 17↔18 maps to the LEFTMOST —
|
|
323
|
+
// so sp owners, in display order, are 17, 16, 1, 0.
|
|
324
|
+
const spOwners = layout.antiLinks
|
|
325
|
+
.filter((l) => l.phosphateSymbol === 'sp')
|
|
326
|
+
.map((l) => l.ownerOrigIdx);
|
|
327
|
+
expect(spOwners.length, 4,
|
|
328
|
+
`expected 4 sp linkages on antisense, got ${spOwners.length}`);
|
|
329
|
+
expect(spOwners.join(','), '17,16,1,0',
|
|
330
|
+
`sp owners (display order) must be 17,16,1,0; got ${spOwners.join(',')}`);
|
|
331
|
+
|
|
332
|
+
// And sense — 19 nucleotides + L3 conjugate. 18 nucleotide-to-nucleotide
|
|
333
|
+
// gaps; the bond into the L3 conjugate is not a phosphate, so it isn't
|
|
334
|
+
// pushed as a link. 4 of those 18 are sp (data owners 0, 1, 16, 17).
|
|
335
|
+
expect(layout.senseLinks.length, 18,
|
|
336
|
+
`expected 18 sense linkages, got ${layout.senseLinks.length}`);
|
|
337
|
+
const senseSpOwners = layout.senseLinks
|
|
338
|
+
.filter((l) => l.phosphateSymbol === 'sp')
|
|
339
|
+
.map((l) => l.ownerOrigIdx)
|
|
340
|
+
.sort((a, b) => a - b);
|
|
341
|
+
expect(senseSpOwners.join(','), '0,1,16,17');
|
|
342
|
+
});
|
|
278
343
|
});
|
|
279
344
|
|
|
280
345
|
category('OligoRenderer: hit testing', () => {
|
|
@@ -288,18 +353,12 @@ category('OligoRenderer: hit testing', () => {
|
|
|
288
353
|
expect(hit!.strand, 'sense');
|
|
289
354
|
expect(hit!.position, 0);
|
|
290
355
|
|
|
291
|
-
// Gap
|
|
292
|
-
|
|
293
|
-
|
|
356
|
+
// Gap between chips at chip-row-Y should always miss now — PS linkage
|
|
357
|
+
// markers live in the apex zone above (sense) / below (antisense) the
|
|
358
|
+
// chip row, not in the inter-chip gap at chip-row-Y.
|
|
294
359
|
const gapX = first.x + first.w + layout.chipGap * 0.1;
|
|
295
360
|
const miss = hitTest(gapX, layout.senseY + layout.chipH / 2, m, layout);
|
|
296
|
-
|
|
297
|
-
// Should hit the PS linkage marker
|
|
298
|
-
expect(miss !== null, true);
|
|
299
|
-
expect(miss!.linkage !== undefined, true);
|
|
300
|
-
} else {
|
|
301
|
-
expect(miss, null);
|
|
302
|
-
}
|
|
361
|
+
expect(miss, null);
|
|
303
362
|
});
|
|
304
363
|
|
|
305
364
|
test('antisense hit returns original position despite reversed display', async () => {
|
|
@@ -314,15 +373,18 @@ category('OligoRenderer: hit testing', () => {
|
|
|
314
373
|
'leftmost AS chip in pair-aligned display = last monomer in data');
|
|
315
374
|
});
|
|
316
375
|
|
|
317
|
-
test('hovering
|
|
318
|
-
//
|
|
376
|
+
test('hovering the apex above a PS linkage returns the linkage', async () => {
|
|
377
|
+
// PS linkage between sense pos 0 and 1. The apex sits above the sense
|
|
378
|
+
// chip row (since sense's decoration side is "top"), peak at senseY-apexH.
|
|
319
379
|
const helm = 'RNA1{m(G)[sp].m(A)p.m(C)p}$$$$';
|
|
320
380
|
const m = parseHelmDuplex(helm);
|
|
321
381
|
const layout = computeLayout(600, 70, m);
|
|
322
382
|
const c0 = layout.senseChips[0];
|
|
323
383
|
const c1 = layout.senseChips[1];
|
|
324
384
|
const midX = (c0.x + c0.w + c1.x) / 2;
|
|
325
|
-
|
|
385
|
+
// Apex zone is [senseY - apexH, senseY]; aim mid-zone.
|
|
386
|
+
const apexY = layout.senseY - layout.apexH / 2;
|
|
387
|
+
const hit = hitTest(midX, apexY, m, layout);
|
|
326
388
|
expect(hit !== null, true);
|
|
327
389
|
expect(hit!.linkage !== undefined, true);
|
|
328
390
|
expect(hit!.linkage!.phosphateSymbol, 'sp');
|