@datagrok/bio 2.27.8 → 2.27.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +77 -12
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/files/demo-files/Antibody_HC.csv +494 -0
- package/files/demo-files/Antibody_HC.layout +516 -0
- package/files/demo-files/Atomic_Level.csv +4030 -0
- package/files/demo-files/SIRNA.csv +66018 -0
- package/files/demo-files/SIRNA.layout +2784 -0
- package/files/demo-files/bio_similarity_diversity.layout +205 -0
- package/package.json +1 -1
- package/projects/bio_demo_activity_cliffs.zip +0 -0
- package/src/demo/bio01-similarity-diversity.ts +10 -0
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +0 -1
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +25 -0
- package/src/demo/bio03-atomic-level.ts +0 -14
- package/src/demo/feature_demos.ts +103 -0
- package/src/package-api.ts +14 -0
- package/src/package.g.ts +16 -0
- package/src/package.ts +28 -8
- package/src/tests/to-atomic-level-tests.ts +494 -200
- package/test-console-output-1.log +509 -596
- package/test-record-1.mp4 +0 -0
|
@@ -381,9 +381,13 @@ category('toAtomicLevelHelmRna', async () => {
|
|
|
381
381
|
await monomerLibHelper.loadMonomerLib(true);
|
|
382
382
|
});
|
|
383
383
|
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
384
|
+
// ---------- helpers --------------------------------------------------------
|
|
385
|
+
|
|
386
|
+
/** Run the linear converter on a single HELM, returning both the molfile
|
|
387
|
+
* and canonical SMILES. The molfile is the source of truth for structural
|
|
388
|
+
* checks (atom indices, coordinates); the SMILES is kept for legacy /
|
|
389
|
+
* presence-style assertions. */
|
|
390
|
+
async function helmRnaLinear(srcHelm: string): Promise<{molfile: string; smiles: string}> {
|
|
387
391
|
const srcCsv = `seq\n${srcHelm}`;
|
|
388
392
|
const df = DG.DataFrame.fromCsv(srcCsv);
|
|
389
393
|
await grok.data.detectSemanticTypes(df);
|
|
@@ -405,206 +409,481 @@ category('toAtomicLevelHelmRna', async () => {
|
|
|
405
409
|
throw new Error(`SMILES conversion threw for HELM '${srcHelm}': ${err?.message ?? err}\n` +
|
|
406
410
|
`--- MOLFILE START ---\n${molfile}\n--- MOLFILE END ---`);
|
|
407
411
|
}
|
|
408
|
-
// RDKit signals a parse failure by returning the literal string
|
|
409
|
-
// "MALFORMED_INPUT_VALUE" — surface it together with the offending molfile.
|
|
410
412
|
if (smiles === 'MALFORMED_INPUT_VALUE' || /^MALFORMED/.test(smiles)) {
|
|
411
413
|
throw new Error(`RDKit could not parse molfile produced for HELM '${srcHelm}'.\n` +
|
|
412
414
|
`--- MOLFILE START ---\n${molfile}\n--- MOLFILE END ---`);
|
|
413
415
|
}
|
|
414
|
-
return smiles;
|
|
416
|
+
return {molfile, smiles};
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
/** Build an RDKit `RDMol` from the molfile, run `fn`, and free the mol.
|
|
420
|
+
* Always pass the produced molfile (not its SMILES round-trip) — atom
|
|
421
|
+
* indices and coordinates here are the same ones we want to assert on. */
|
|
422
|
+
function withMol<T>(molfile: string, fn: (mol: any) => T): T {
|
|
423
|
+
const mol = rdKitModule.get_mol(molfile);
|
|
424
|
+
if (!mol || !mol.is_valid())
|
|
425
|
+
throw new Error(`RDKit refused the produced molfile:\n${molfile}`);
|
|
426
|
+
try {
|
|
427
|
+
return fn(mol);
|
|
428
|
+
} finally {
|
|
429
|
+
mol.delete();
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/** True iff the molecule contains at least one match of the SMARTS query. */
|
|
434
|
+
function hasSmarts(mol: any, smarts: string): boolean {
|
|
435
|
+
const qmol = rdKitModule.get_qmol(smarts);
|
|
436
|
+
try {
|
|
437
|
+
const raw = mol.get_substruct_match(qmol);
|
|
438
|
+
// RDKit JS returns the literal '{}' when there is no match.
|
|
439
|
+
return !!raw && raw !== '{}';
|
|
440
|
+
} finally {
|
|
441
|
+
qmol.delete();
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/** Number of distinct matches of the SMARTS query in the molecule.
|
|
446
|
+
* `get_substruct_matches` returns either '{}' (no match), a JSON array
|
|
447
|
+
* of `{atoms,bonds}` objects, or — depending on the build — a single
|
|
448
|
+
* match object. Normalise all three. */
|
|
449
|
+
function countSmarts(mol: any, smarts: string): number {
|
|
450
|
+
const qmol = rdKitModule.get_qmol(smarts);
|
|
451
|
+
try {
|
|
452
|
+
const raw = mol.get_substruct_matches(qmol);
|
|
453
|
+
if (!raw || raw === '{}') return 0;
|
|
454
|
+
const parsed = JSON.parse(raw);
|
|
455
|
+
if (Array.isArray(parsed)) return parsed.length;
|
|
456
|
+
// Single-match object
|
|
457
|
+
if (parsed && typeof parsed === 'object' && Array.isArray(parsed.atoms))
|
|
458
|
+
return parsed.atoms.length > 0 ? 1 : 0;
|
|
459
|
+
return 0;
|
|
460
|
+
} finally {
|
|
461
|
+
qmol.delete();
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
/** Atoms-by-element via a single-atom SMARTS — strictly counts the heavy
|
|
466
|
+
* element (no false positives from `[Pa]`, `Si`, etc. that plain regex
|
|
467
|
+
* on SMILES would produce). */
|
|
468
|
+
function countAtoms(mol: any, atomicNumber: number): number {
|
|
469
|
+
return countSmarts(mol, `[#${atomicNumber}]`);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
/** SMARTS shortcuts used by several tests below. Bracketed atom specs are
|
|
473
|
+
* deliberately permissive — the produced SMILES may render an atom
|
|
474
|
+
* aromatic or kekulised depending on context. */
|
|
475
|
+
const SMARTS = {
|
|
476
|
+
// Generic phosphodiester backbone: C-O-P(=O)(X)-O-C with both bridging
|
|
477
|
+
// oxygens present. X covers OH / O- (canonical p), SH / S- (sp), etc.
|
|
478
|
+
PHOSPHODIESTER:
|
|
479
|
+
'[#6][OX2][PX4](=[OX1])([OX2,SX2,OX1H,SX1H,OX1-,SX1-])[OX2][#6]',
|
|
480
|
+
// Same but the non-bridging substituent is sulfur — phosphorothioate.
|
|
481
|
+
PHOSPHOROTHIOATE_DIESTER:
|
|
482
|
+
'[#6][OX2][PX4](=[OX1])([SX2,SX1H,SX1-])[OX2][#6]',
|
|
483
|
+
// Direct sp3 C-P bond — appears ONLY when a bridging O on the linker
|
|
484
|
+
// R-side has been (incorrectly) removed.
|
|
485
|
+
DIRECT_C_P: '[CX4][PX4]',
|
|
486
|
+
// Five-membered ring with exactly one ring oxygen — furanose.
|
|
487
|
+
FURANOSE: '[#6;R]1[#6;R][#6;R][#6;R][O;R]1',
|
|
488
|
+
// Adenine bicyclic core (aromatic Kekule-tolerant).
|
|
489
|
+
ADENINE_RING: 'n1cnc2c1ncnc2N',
|
|
490
|
+
// Cytosine 4-amino-pyrimidone.
|
|
491
|
+
CYTOSINE_RING: 'Nc1ccn[cH0](=O)n1',
|
|
492
|
+
// m5C: cytosine with a methyl at position 5.
|
|
493
|
+
METHYL_CYTOSINE: '[CH3]c1cn([!#1])c(=O)nc1N',
|
|
494
|
+
// 2'-fluoro on a sugar ring carbon (fl2r marker). Just `F` on a ring
|
|
495
|
+
// sp3 C — no other monomer in our tests has fluorine, so this is
|
|
496
|
+
// unambiguous; ring-position-specific patterns are too brittle to ring
|
|
497
|
+
// traversal direction.
|
|
498
|
+
FLUORO_ON_FURANOSE: '[F][CX4;R]',
|
|
499
|
+
// Acetamide N-C(=O)-CH3 — GalNAc / N-acetyl marker.
|
|
500
|
+
N_ACETYL: '[NX3]C(=O)[CH3]',
|
|
501
|
+
// LNA-only marker: an sp3 carbon shared between two rings (R2). Plain
|
|
502
|
+
// riboses have no such atom; LNA's bicyclic core puts C2', C3', C4'
|
|
503
|
+
// each in two rings.
|
|
504
|
+
LNA_BRIDGEHEAD: '[#6;R2]',
|
|
505
|
+
// Methyl ether on a ring carbon (2'-OMe, the `m` ribose marker).
|
|
506
|
+
TWO_PRIME_OME: '[CH3][OX2][#6;R]',
|
|
507
|
+
// Biotin's cyclic urea (ureido) — 5-mem ring with N-C(=O)-N-C-C
|
|
508
|
+
// pattern. The two C ring atoms are also bridgeheads to biotin's
|
|
509
|
+
// thiolane ring (containing S), but we check that with a separate
|
|
510
|
+
// ring-S query so this SMARTS stays robust to atom-order variations.
|
|
511
|
+
BIOTIN_UREIDO: '[#7;R]1[#6;R](=[OX1])[#7;R][#6;R][#6;R]1',
|
|
512
|
+
// Cholesterol gonane: four fused rings (3 cyclohexane + 1 cyclopentane).
|
|
513
|
+
// Tested via two ring-counting heuristics rather than one rigid pattern,
|
|
514
|
+
// see `looksLikeSteroid` below.
|
|
515
|
+
} as const;
|
|
516
|
+
|
|
517
|
+
/** Cholesterol detection: gonane has 4 fused rings; the D ring is a
|
|
518
|
+
* cyclopentane (5-mem all-carbon) and the rest are cyclohexanes. None
|
|
519
|
+
* of the other monomers we test against — sugars (always have a ring O),
|
|
520
|
+
* nucleobases (always have N), biotin (5-mem rings have N or S), LNA
|
|
521
|
+
* (5-mem rings have O) — produce an all-carbon 5-mem ring, so this
|
|
522
|
+
* SMARTS is unique to steroids. We additionally require ≥ 4 ring
|
|
523
|
+
* carbons in two rings (R2) to confirm a fused polycyclic system, not
|
|
524
|
+
* an isolated cyclopentane. */
|
|
525
|
+
function looksLikeSteroid(mol: any): boolean {
|
|
526
|
+
const cyclopentane = hasSmarts(mol, '[#6]1[#6][#6][#6][#6]1');
|
|
527
|
+
const fusedRingAtoms = countSmarts(mol, '[#6;R2]');
|
|
528
|
+
return cyclopentane && fusedRingAtoms >= 4;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/** Parse a V3K molblock atom block into 0-indexed coordinate records.
|
|
532
|
+
* The element symbol and x/y are sufficient for layout assertions; we
|
|
533
|
+
* deliberately ignore z, charges, isotopes, etc. */
|
|
534
|
+
function parseV3KAtoms(molfile: string): { element: string; x: number; y: number }[] {
|
|
535
|
+
const atoms: { element: string; x: number; y: number }[] = [];
|
|
536
|
+
const begin = molfile.indexOf('M V30 BEGIN ATOM');
|
|
537
|
+
if (begin < 0) return atoms;
|
|
538
|
+
const end = molfile.indexOf('M V30 END ATOM', begin);
|
|
539
|
+
const block = molfile.substring(begin, end >= 0 ? end : molfile.length);
|
|
540
|
+
const lineRe = /^M\s+V30\s+(\d+)\s+(\S+)\s+(-?\d+(?:\.\d+)?)\s+(-?\d+(?:\.\d+)?)/gm;
|
|
541
|
+
let m: RegExpExecArray | null;
|
|
542
|
+
while ((m = lineRe.exec(block))) {
|
|
543
|
+
const idx = parseInt(m[1]) - 1;
|
|
544
|
+
// Atoms are emitted in order; sanity check.
|
|
545
|
+
if (idx !== atoms.length) continue;
|
|
546
|
+
atoms.push({element: m[2], x: parseFloat(m[3]), y: parseFloat(m[4])});
|
|
547
|
+
}
|
|
548
|
+
return atoms;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
/** Run a SMARTS against the molecule and collect every atom index that
|
|
552
|
+
* appears in any match. Used to bin atoms by role (sugar / base / etc.). */
|
|
553
|
+
function collectMatchedAtoms(mol: any, smarts: string): Set<number> {
|
|
554
|
+
const set = new Set<number>();
|
|
555
|
+
const qmol = rdKitModule.get_qmol(smarts);
|
|
556
|
+
try {
|
|
557
|
+
const raw = mol.get_substruct_matches(qmol);
|
|
558
|
+
if (!raw || raw === '{}') return set;
|
|
559
|
+
const parsed = JSON.parse(raw);
|
|
560
|
+
const list = Array.isArray(parsed) ? parsed : [parsed];
|
|
561
|
+
for (const m of list)
|
|
562
|
+
for (const a of (m?.atoms ?? [])) set.add(a as number);
|
|
563
|
+
} finally {
|
|
564
|
+
qmol.delete();
|
|
565
|
+
}
|
|
566
|
+
return set;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
/** Layout assertion: every atom in any nucleobase ring sits at a higher
|
|
570
|
+
* Y than every sugar (furanose) ring atom. With the abnormal-sugar
|
|
571
|
+
* override, the base is placed above the topmost atom of the sugar
|
|
572
|
+
* cluster — including LNA's 2',4'-bridge oxygen / CH2. Without the
|
|
573
|
+
* override the LNA bridge sits ABOVE the base attachment point and
|
|
574
|
+
* this assertion fails. */
|
|
575
|
+
function expectBaseAboveSugar(molfile: string): void {
|
|
576
|
+
const atoms = parseV3KAtoms(molfile);
|
|
577
|
+
if (atoms.length === 0) throw new Error(`failed to parse molblock atoms`);
|
|
578
|
+
withMol(molfile, (mol) => {
|
|
579
|
+
const sugarIdx = collectMatchedAtoms(mol, SMARTS.FURANOSE);
|
|
580
|
+
// Base atoms = aromatic ring atoms (purines and pyrimidines aromatize
|
|
581
|
+
// in RDKit's perception). Sugars are sp3, won't match `[a]`.
|
|
582
|
+
const baseIdx = collectMatchedAtoms(mol, '[a]');
|
|
583
|
+
if (sugarIdx.size === 0)
|
|
584
|
+
throw new Error('no furanose ring atoms found — cannot verify layout');
|
|
585
|
+
if (baseIdx.size === 0)
|
|
586
|
+
throw new Error('no aromatic base atoms found — cannot verify layout');
|
|
587
|
+
let maxSugarY = -Infinity;
|
|
588
|
+
for (const i of sugarIdx) maxSugarY = Math.max(maxSugarY, atoms[i].y);
|
|
589
|
+
let minBaseY = Infinity;
|
|
590
|
+
for (const i of baseIdx) minBaseY = Math.min(minBaseY, atoms[i].y);
|
|
591
|
+
expect(minBaseY > maxSugarY, true,
|
|
592
|
+
`expected base atoms above sugar (minBaseY=${minBaseY.toFixed(3)}, ` +
|
|
593
|
+
`maxSugarY=${maxSugarY.toFixed(3)})`);
|
|
594
|
+
});
|
|
415
595
|
}
|
|
416
596
|
|
|
417
597
|
// Unmodified RNA HELM — regression baseline. The linear path must produce
|
|
418
|
-
// a real RNA backbone
|
|
419
|
-
//
|
|
598
|
+
// a real RNA backbone: a furanose ring per nucleotide, two inter-nucleotide
|
|
599
|
+
// phosphodiester linkers (C-O-P(=O)(O)-O-C) for three nucleotides, and
|
|
600
|
+
// recognisable purine / pyrimidine bases attached to the sugars.
|
|
420
601
|
test('rna-canonical', async () => {
|
|
421
|
-
const
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
602
|
+
const {molfile} = await helmRnaLinear(`RNA1{r(A)p.r(C)p.r(G)p}$$$$`);
|
|
603
|
+
withMol(molfile, (mol) => {
|
|
604
|
+
// 3 ribose furanose rings (one per nucleotide).
|
|
605
|
+
const furanoses = countSmarts(mol, SMARTS.FURANOSE);
|
|
606
|
+
expect(furanoses >= 3, true, `expected ≥ 3 furanose rings, got ${furanoses}`);
|
|
607
|
+
// Inter-nucleotide phosphodiesters: r-r and r-r joints, so ≥ 2.
|
|
608
|
+
// (The 3'-trailing P is a monoester and won't match the diester SMARTS.)
|
|
609
|
+
const diesters = countSmarts(mol, SMARTS.PHOSPHODIESTER);
|
|
610
|
+
expect(diesters >= 2, true,
|
|
611
|
+
`expected ≥ 2 inter-nucleotide phosphodiester linkers, got ${diesters}`);
|
|
612
|
+
// No direct sp3 C–P bond (would mean a bridging O was lost).
|
|
613
|
+
const directCP = countSmarts(mol, SMARTS.DIRECT_C_P);
|
|
614
|
+
expect(directCP, 0,
|
|
615
|
+
`expected 0 direct C-P bonds (chain must use C-O-P-O-C), got ${directCP}`);
|
|
616
|
+
// Purine ring (A and G are purines).
|
|
617
|
+
const purines = countSmarts(mol, SMARTS.ADENINE_RING);
|
|
618
|
+
expect(purines >= 1, true, `expected ≥ 1 purine ring, got ${purines}`);
|
|
619
|
+
// Total phosphorus count: 3 (one per nucleotide as written).
|
|
620
|
+
expect(countAtoms(mol, 15), 3, 'expected 3 phosphorus atoms');
|
|
621
|
+
});
|
|
427
622
|
});
|
|
428
623
|
|
|
429
|
-
// Modified base — 5-methylcytosine.
|
|
430
|
-
// methyl
|
|
624
|
+
// Modified base — 5-methylcytosine. The methyl must end up at C5 of a
|
|
625
|
+
// cytosine ring (not just any methyl on any ring), and only one m5C
|
|
626
|
+
// appears in this row.
|
|
431
627
|
test('rna-modified-base', async () => {
|
|
432
|
-
const
|
|
433
|
-
const
|
|
434
|
-
|
|
435
|
-
|
|
628
|
+
const {molfile: plain} = await helmRnaLinear(`RNA1{r(C)p.r(A)p}$$$$`);
|
|
629
|
+
const {molfile: mod} = await helmRnaLinear(`RNA1{r([m5C])p.r(A)p}$$$$`);
|
|
630
|
+
withMol(plain, (mol) => {
|
|
631
|
+
// No 5-methyl-cytosine in the plain version.
|
|
632
|
+
expect(countSmarts(mol, SMARTS.METHYL_CYTOSINE), 0,
|
|
633
|
+
'plain r(C) must not contain 5-methylcytosine');
|
|
634
|
+
});
|
|
635
|
+
withMol(mod, (mol) => {
|
|
636
|
+
// Exactly one m5C ring; cytosine ring still present.
|
|
637
|
+
expect(countSmarts(mol, SMARTS.METHYL_CYTOSINE), 1,
|
|
638
|
+
'r([m5C]) must contain exactly one 5-methylcytosine ring');
|
|
639
|
+
});
|
|
436
640
|
});
|
|
437
641
|
|
|
438
|
-
// Modified phosphate — phosphorothioate. The
|
|
439
|
-
//
|
|
642
|
+
// Modified phosphate — phosphorothioate (Rsp). The S MUST be on the
|
|
643
|
+
// phosphorus of the linker between positions 0 and 1 (not just somewhere
|
|
644
|
+
// in the molecule), the linker must remain a diester (both bridging O
|
|
645
|
+
// preserved), and the unmodified `p` at position 1 must stay unchanged.
|
|
440
646
|
test('rna-modified-phosphate', async () => {
|
|
441
|
-
const
|
|
442
|
-
const
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
647
|
+
const {molfile: plain} = await helmRnaLinear(`RNA1{r(A)p.r(C)p}$$$$`);
|
|
648
|
+
const {molfile: mod} = await helmRnaLinear(`RNA1{r(A)[Rsp].r(C)p}$$$$`);
|
|
649
|
+
withMol(plain, (mol) => {
|
|
650
|
+
expect(countAtoms(mol, 16), 0, 'plain RNA must contain no sulfur');
|
|
651
|
+
expect(countAtoms(mol, 15), 2, 'expected 2 phosphates in plain');
|
|
652
|
+
expect(countSmarts(mol, SMARTS.PHOSPHODIESTER) >= 1, true,
|
|
653
|
+
'plain inter-nucleotide diester must be present');
|
|
654
|
+
});
|
|
655
|
+
withMol(mod, (mol) => {
|
|
656
|
+
// Sulfur is on phosphorus, not somewhere else.
|
|
657
|
+
expect(hasSmarts(mol, '[PX4]=S') || hasSmarts(mol, '[PX4][SX2,SX1H,SX1-]'),
|
|
658
|
+
true, 'sulfur must be bonded to a phosphorus atom');
|
|
659
|
+
// Phosphorothioate diester has both bridging oxygens around the P.
|
|
660
|
+
expect(countSmarts(mol, SMARTS.PHOSPHOROTHIOATE_DIESTER), 1,
|
|
661
|
+
'expected exactly one phosphorothioate diester linker');
|
|
662
|
+
// 2 phosphates total (Rsp + p).
|
|
663
|
+
expect(countAtoms(mol, 15), 2, 'expected 2 phosphates in modified');
|
|
664
|
+
// No direct C-P bond (regression check from sp/Rsp fix).
|
|
665
|
+
expect(countSmarts(mol, SMARTS.DIRECT_C_P), 0,
|
|
666
|
+
'expected zero direct C-P bonds');
|
|
667
|
+
});
|
|
668
|
+
});
|
|
669
|
+
|
|
670
|
+
// Modified sugar — 2'-fluoro ribose. F must end up on a ring carbon of
|
|
671
|
+
// a furanose (i.e., a sugar atom), not on an arbitrary aliphatic carbon.
|
|
456
672
|
test('rna-modified-sugar', async () => {
|
|
457
|
-
const
|
|
458
|
-
const
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
673
|
+
const {molfile: plain} = await helmRnaLinear(`RNA1{r(A)p.r(C)p}$$$$`);
|
|
674
|
+
const {molfile: mod} = await helmRnaLinear(`RNA1{[fl2r](A)p.r(C)p}$$$$`);
|
|
675
|
+
withMol(plain, (mol) => {
|
|
676
|
+
expect(countAtoms(mol, 9), 0, 'plain RNA must contain no fluorine');
|
|
677
|
+
});
|
|
678
|
+
withMol(mod, (mol) => {
|
|
679
|
+
expect(countAtoms(mol, 9), 1, 'fl2r contributes exactly one fluorine');
|
|
680
|
+
// F is on a ring carbon of a furanose.
|
|
681
|
+
expect(countSmarts(mol, SMARTS.FLUORO_ON_FURANOSE) >= 1, true,
|
|
682
|
+
'fluorine must be on a furanose ring carbon (2\'-F)');
|
|
683
|
+
// Furanose count unchanged (one ribose replaced by 2'-F ribose).
|
|
684
|
+
expect(countSmarts(mol, SMARTS.FURANOSE) >= 2, true,
|
|
685
|
+
'expected ≥ 2 furanose rings');
|
|
686
|
+
});
|
|
463
687
|
});
|
|
464
688
|
|
|
465
689
|
// HELM omits the trailing phosphate (3'-OH terminus on the sugar). The
|
|
466
690
|
// splitter must split the partial `r(C)` into [r, C], assembly must skip
|
|
467
691
|
// the trailing P emit, and counts must agree.
|
|
468
692
|
test('rna-no-trailing-phosphate', async () => {
|
|
469
|
-
const
|
|
470
|
-
const
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
expect(
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
693
|
+
const {molfile: withTail} = await helmRnaLinear(`RNA1{r(A)p.r(C)p}$$$$`);
|
|
694
|
+
const {molfile: noTail} = await helmRnaLinear(`RNA1{r(A)p.r(C)}$$$$`);
|
|
695
|
+
const pCountWith = withMol(withTail, (mol) => countAtoms(mol, 15));
|
|
696
|
+
const pCountNoTail = withMol(noTail, (mol) => countAtoms(mol, 15));
|
|
697
|
+
expect(pCountWith, 2, 'with trailing P: 2 phosphates (1 linker + 1 trail)');
|
|
698
|
+
expect(pCountNoTail, 1, 'no trailing P: 1 phosphate (the linker only)');
|
|
699
|
+
withMol(noTail, (mol) => {
|
|
700
|
+
// The remaining phosphate is still a proper diester (both bridging O
|
|
701
|
+
// present, no direct C-P bond).
|
|
702
|
+
expect(countSmarts(mol, SMARTS.PHOSPHODIESTER), 1,
|
|
703
|
+
'inter-nucleotide diester must still be present');
|
|
704
|
+
expect(countSmarts(mol, SMARTS.DIRECT_C_P), 0,
|
|
705
|
+
'no direct C-P bond');
|
|
706
|
+
// Both furanose rings still present.
|
|
707
|
+
expect(countSmarts(mol, SMARTS.FURANOSE), 2, 'both furanose rings present');
|
|
708
|
+
});
|
|
482
709
|
});
|
|
483
710
|
|
|
484
711
|
// Missing trailing phosphate combined with modifications.
|
|
485
712
|
test('rna-no-trailing-phosphate-with-modifications', async () => {
|
|
486
|
-
const
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
713
|
+
const {molfile} = await helmRnaLinear(`RNA1{[fl2r]([m5C])[Rsp].r(A)}$$$$`);
|
|
714
|
+
withMol(molfile, (mol) => {
|
|
715
|
+
// 1 F (2'-F on the fl2r sugar), on a furanose carbon.
|
|
716
|
+
expect(countAtoms(mol, 9), 1, 'expected exactly 1 fluorine');
|
|
717
|
+
expect(countSmarts(mol, SMARTS.FLUORO_ON_FURANOSE), 1,
|
|
718
|
+
'2\'-F must be on a furanose ring carbon');
|
|
719
|
+
// 1 P, 1 S — single Rsp linker, no trailing P.
|
|
720
|
+
expect(countAtoms(mol, 15), 1, 'expected exactly 1 phosphorus (Rsp)');
|
|
721
|
+
expect(countAtoms(mol, 16), 1, 'expected exactly 1 sulfur (Rsp)');
|
|
722
|
+
// Linker is a phosphorothioate diester (both bridging O present).
|
|
723
|
+
expect(countSmarts(mol, SMARTS.PHOSPHOROTHIOATE_DIESTER), 1,
|
|
724
|
+
'Rsp linker must remain a phosphorothioate diester');
|
|
725
|
+
// m5C base present.
|
|
726
|
+
expect(countSmarts(mol, SMARTS.METHYL_CYTOSINE), 1,
|
|
727
|
+
'expected one 5-methylcytosine base');
|
|
728
|
+
});
|
|
492
729
|
});
|
|
493
730
|
|
|
494
|
-
// All three modifications combined. End-to-end smoke test
|
|
731
|
+
// All three modifications combined. End-to-end smoke test — every
|
|
732
|
+
// modification's structural fingerprint must be detectable.
|
|
495
733
|
test('rna-all-modifications', async () => {
|
|
496
|
-
const
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
734
|
+
const {molfile} = await helmRnaLinear(`RNA1{[fl2r]([m5C])[Rsp].r(A)p}$$$$`);
|
|
735
|
+
withMol(molfile, (mol) => {
|
|
736
|
+
expect(countSmarts(mol, SMARTS.FLUORO_ON_FURANOSE), 1,
|
|
737
|
+
'fl2r: 2\'-F on furanose');
|
|
738
|
+
expect(countSmarts(mol, SMARTS.METHYL_CYTOSINE), 1,
|
|
739
|
+
'm5C: 5-methylcytosine');
|
|
740
|
+
expect(countSmarts(mol, SMARTS.PHOSPHOROTHIOATE_DIESTER), 1,
|
|
741
|
+
'Rsp: phosphorothioate diester');
|
|
742
|
+
expect(countAtoms(mol, 15), 2, 'two phosphates (Rsp + trailing p)');
|
|
743
|
+
expect(countAtoms(mol, 16), 1, 'exactly one sulfur (from Rsp)');
|
|
744
|
+
});
|
|
500
745
|
});
|
|
501
746
|
|
|
502
747
|
// 3'-end terminal modifier (GalNAc, R1 only). HELM puts it in the
|
|
503
748
|
// "phosphate" slot of the last triple, but it's actually a chain end.
|
|
504
|
-
//
|
|
505
|
-
//
|
|
749
|
+
// GalNAc carries an N-acetyl group — that's the structural fingerprint
|
|
750
|
+
// the test should pin to (not "any nitrogen", which thymine satisfies).
|
|
506
751
|
test('rna-helm-3p-terminal-galnac', async () => {
|
|
507
|
-
const
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
752
|
+
const {molfile} = await helmRnaLinear(`RNA1{r(T)[GalNAc]}$$$$V2.0`);
|
|
753
|
+
withMol(molfile, (mol) => {
|
|
754
|
+
// No phosphate at all (GalNAc replaces the trailing P slot).
|
|
755
|
+
expect(countAtoms(mol, 15), 0, 'GalNAc terminus: no P expected');
|
|
756
|
+
// Acetamide group from GalNAc — must be present.
|
|
757
|
+
expect(countSmarts(mol, SMARTS.N_ACETYL) >= 1, true,
|
|
758
|
+
'expected N-acetyl group from GalNAc');
|
|
759
|
+
// GalNAc is a hexopyranose (6-mem ring with one O). Plus thymine ring
|
|
760
|
+
// and the ribose furanose, the molecule has more than one ring.
|
|
761
|
+
// Pyranose: C-C-C-C-C-O 6-membered.
|
|
762
|
+
expect(hasSmarts(mol, '[#6]1[#6][#6][#6][#6][O]1'), true,
|
|
763
|
+
'expected a pyranose (6-membered) ring from GalNAc');
|
|
764
|
+
});
|
|
514
765
|
});
|
|
515
766
|
|
|
516
767
|
// 5'-end terminal modifier (Chol, R2 only) at the start of the chain.
|
|
517
|
-
//
|
|
518
|
-
//
|
|
768
|
+
// Cholesterol's structural fingerprint is the gonane: four fused rings
|
|
769
|
+
// including a cyclopentane fused to a cyclohexane (D-C ring junction).
|
|
519
770
|
test('rna-helm-5p-terminal-chol', async () => {
|
|
520
|
-
const
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
771
|
+
const {molfile} = await helmRnaLinear(`RNA1{[Chol].r(T)}$$$$V2.0`);
|
|
772
|
+
withMol(molfile, (mol) => {
|
|
773
|
+
expect(looksLikeSteroid(mol), true,
|
|
774
|
+
'Chol terminus must produce the steroid (gonane) ring system');
|
|
775
|
+
// Chol replaces the first sugar — only one furanose left (from r(T)).
|
|
776
|
+
expect(countSmarts(mol, SMARTS.FURANOSE), 1,
|
|
777
|
+
'expected exactly 1 furanose ring (from r(T))');
|
|
778
|
+
});
|
|
528
779
|
});
|
|
529
780
|
|
|
530
781
|
// Chol at 5' with explicit trailing phosphate (the original failing case).
|
|
531
|
-
// Chain: Chol → r(T) → P-OH.
|
|
782
|
+
// Chain: Chol → r(T) → P-OH. Steroid rings + ribose + 1 phosphate.
|
|
532
783
|
test('rna-helm-5p-terminal-chol-with-trailing-phosphate', async () => {
|
|
533
|
-
const
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
784
|
+
const {molfile} = await helmRnaLinear(`RNA1{[Chol].r(T)p}$$$$V2.0`);
|
|
785
|
+
withMol(molfile, (mol) => {
|
|
786
|
+
expect(looksLikeSteroid(mol), true,
|
|
787
|
+
'expected steroid ring system from Chol');
|
|
788
|
+
expect(countAtoms(mol, 15), 1, 'expected exactly 1 phosphorus');
|
|
789
|
+
expect(countSmarts(mol, SMARTS.FURANOSE), 1, 'expected 1 furanose');
|
|
790
|
+
});
|
|
538
791
|
});
|
|
539
792
|
|
|
540
793
|
// Both terminals at once: Chol at 5', GalNAc at 3', single nucleotide
|
|
541
|
-
//
|
|
794
|
+
// between. Both terminus markers must be present, no phosphate.
|
|
542
795
|
test('rna-helm-both-terminals', async () => {
|
|
543
|
-
const
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
//
|
|
796
|
+
const {molfile} = await helmRnaLinear(`RNA1{[Chol].r(T)[GalNAc]}$$$$V2.0`);
|
|
797
|
+
withMol(molfile, (mol) => {
|
|
798
|
+
expect(countAtoms(mol, 15), 0, 'expected zero phosphates');
|
|
799
|
+
expect(looksLikeSteroid(mol), true,
|
|
800
|
+
'expected steroid (Chol) ring system');
|
|
801
|
+
expect(countSmarts(mol, SMARTS.N_ACETYL) >= 1, true,
|
|
802
|
+
'expected N-acetyl group from GalNAc');
|
|
803
|
+
// r(T) brings exactly one furanose, GalNAc brings the pyranose.
|
|
804
|
+
expect(hasSmarts(mol, '[#6]1[#6][#6][#6][#6][O]1'), true,
|
|
805
|
+
'expected pyranose ring from GalNAc');
|
|
806
|
+
});
|
|
807
|
+
});
|
|
808
|
+
|
|
809
|
+
// LNA (2',4'-BNA) regression. The structural marker is the bicyclic
|
|
810
|
+
// sugar: every ring carbon of the LNA furanose is shared with a second
|
|
811
|
+
// ring (the C2'-O-CH2-C4' bridge). Standard riboses produce zero such
|
|
812
|
+
// R2-shared atoms — so this test is exclusive to LNA.
|
|
813
|
+
//
|
|
814
|
+
// Additionally, the depiction-level claim ("base above sugar") is
|
|
815
|
+
// verified by reading molblock coordinates and confirming the base
|
|
816
|
+
// atoms sit higher in Y than every sugar atom.
|
|
557
817
|
test('rna-helm-lna-base-above-sugar', async () => {
|
|
558
|
-
const
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
818
|
+
const {molfile} = await helmRnaLinear(`RNA1{[lna](A)p.[lna](T)}$$$$V2.0`);
|
|
819
|
+
withMol(molfile, (mol) => {
|
|
820
|
+
// Single connected fragment.
|
|
821
|
+
expect(hasSmarts(mol, '[*]'), true, 'molecule must be non-empty');
|
|
822
|
+
// LNA-specific bicyclic sugar: ring atoms shared between two rings.
|
|
823
|
+
// Two LNA sugars × 3 bridgehead-class carbons each = ≥ 4.
|
|
824
|
+
const r2 = countSmarts(mol, SMARTS.LNA_BRIDGEHEAD);
|
|
825
|
+
expect(r2 >= 4, true,
|
|
826
|
+
`expected ≥ 4 ring carbons in 2 rings (LNA bicyclic), got ${r2}`);
|
|
827
|
+
// Inter-nucleotide phosphodiester present, no direct C-P.
|
|
828
|
+
expect(countSmarts(mol, SMARTS.PHOSPHODIESTER) >= 1, true,
|
|
829
|
+
'expected ≥ 1 phosphodiester linker');
|
|
830
|
+
expect(countSmarts(mol, SMARTS.DIRECT_C_P), 0,
|
|
831
|
+
'expected zero direct C-P bonds');
|
|
832
|
+
// Adenine + thymine present.
|
|
833
|
+
expect(countSmarts(mol, SMARTS.ADENINE_RING) >= 1, true,
|
|
834
|
+
'expected adenine ring (purine)');
|
|
835
|
+
expect(hasSmarts(mol, '[CH3][#6]1=[#6][#7]([!#1])[#6](=O)[#7][#6]1=O') ||
|
|
836
|
+
hasSmarts(mol, 'Cc1cn([!#1])c(=O)[nH]c1=O'),
|
|
837
|
+
true, 'expected thymine ring (5-methyluracil)');
|
|
838
|
+
});
|
|
839
|
+
// Depiction: base atoms above sugar atoms in Y.
|
|
840
|
+
expectBaseAboveSugar(molfile);
|
|
572
841
|
});
|
|
573
842
|
|
|
574
843
|
// GalNAc oxygen-count regression. Previously the R1 placeholder atom
|
|
575
844
|
// (substituted to 'O' from the "OH" cap) was left in the assembly,
|
|
576
|
-
// adding a stray OH on the chain-attach carbon. lna(T)GalNAc has known
|
|
577
|
-
// expected
|
|
845
|
+
// adding a stray OH on the chain-attach carbon. lna(T)GalNAc has a known
|
|
846
|
+
// expected oxygen count; an extra OH would push it to 11.
|
|
578
847
|
test('rna-helm-3p-terminal-galnac-no-extra-oh', async () => {
|
|
579
|
-
const
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
848
|
+
const {molfile} = await helmRnaLinear(`RNA1{[lna](T)[GalNAc]}$$$$V2.0`);
|
|
849
|
+
withMol(molfile, (mol) => {
|
|
850
|
+
// Heavy oxygen atom count — RDKit doesn't double-count ring closures
|
|
851
|
+
// or atoms inside brackets.
|
|
852
|
+
expect(countAtoms(mol, 8), 10,
|
|
853
|
+
'expected exactly 10 oxygen atoms in lna(T)GalNAc');
|
|
854
|
+
// No phosphate (GalNAc replaces the trailing P slot).
|
|
855
|
+
expect(countAtoms(mol, 15), 0, 'expected no phosphate');
|
|
856
|
+
// GalNAc N-acetyl preserved.
|
|
857
|
+
expect(hasSmarts(mol, SMARTS.N_ACETYL), true,
|
|
858
|
+
'expected GalNAc N-acetyl group');
|
|
859
|
+
// LNA still bicyclic.
|
|
860
|
+
expect(countSmarts(mol, SMARTS.LNA_BRIDGEHEAD) >= 2, true,
|
|
861
|
+
'expected LNA bicyclic bridgeheads');
|
|
862
|
+
});
|
|
587
863
|
});
|
|
588
864
|
|
|
589
865
|
// sp (and similar phosphates with R-cap = H) used to disconnect the chain
|
|
590
866
|
// because the H placeholder was removed by removeHydrogen, leaving
|
|
591
867
|
// terminalNodes[0] pointing at the now-deleted atom. The result was a
|
|
592
|
-
//
|
|
593
|
-
//
|
|
594
|
-
//
|
|
595
|
-
// the chain bond goes there directly.
|
|
868
|
+
// molecule with two disconnected fragments. The fix promotes the H cap
|
|
869
|
+
// to an O so the chain bond attaches at a real atom; the linker becomes
|
|
870
|
+
// a true phosphorothioate diester.
|
|
596
871
|
test('rna-helm-h-cap-phosphate-sp-connects', async () => {
|
|
597
|
-
const smiles = await
|
|
598
|
-
|
|
599
|
-
`valid SMILES expected: ${smiles}`);
|
|
600
|
-
// No '.' → single connected fragment.
|
|
872
|
+
const {molfile, smiles} = await helmRnaLinear(`RNA1{r(T)[sp].r(A)}$$$$V2.0`);
|
|
873
|
+
// SMILES dot count is the canonical fragment-count test — keep it.
|
|
601
874
|
expect(smiles.indexOf('.') === -1, true,
|
|
602
|
-
`expected single fragment
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
875
|
+
`expected single connected fragment, got: ${smiles}`);
|
|
876
|
+
withMol(molfile, (mol) => {
|
|
877
|
+
// Sulfur is bonded to the phosphorus, not floating somewhere else.
|
|
878
|
+
expect(countSmarts(mol, '[PX4][SX2,SX1H,SX1-]'), 1,
|
|
879
|
+
'sp\'s sulfur must be on its phosphorus');
|
|
880
|
+
expect(countAtoms(mol, 15), 1, 'one phosphorus from the sp linker');
|
|
881
|
+
expect(countAtoms(mol, 16), 1, 'one sulfur from the sp linker');
|
|
882
|
+
expect(countSmarts(mol, SMARTS.PHOSPHOROTHIOATE_DIESTER), 1,
|
|
883
|
+
'sp linker must be a phosphorothioate diester (C-O-P-O-C)');
|
|
884
|
+
expect(countSmarts(mol, SMARTS.DIRECT_C_P), 0,
|
|
885
|
+
'no direct C-P bond');
|
|
886
|
+
});
|
|
608
887
|
});
|
|
609
888
|
|
|
610
889
|
// Regression: H-cap phosphates (sp et al.) used to drop the bridging O
|
|
@@ -617,79 +896,94 @@ category('toAtomicLevelHelmRna', async () => {
|
|
|
617
896
|
// Use m(2'-OMe ribose) so we can also verify the methoxy group survives
|
|
618
897
|
// the sp chain assembly.
|
|
619
898
|
test('rna-helm-sp-bridging-o-preserved', async () => {
|
|
620
|
-
const
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
899
|
+
const {molfile} = await helmRnaLinear(`RNA1{m(A)[sp].r(A)[sp]}$$$$V2.0`);
|
|
900
|
+
withMol(molfile, (mol) => {
|
|
901
|
+
// Element counts via RDKit (no SMILES regex).
|
|
902
|
+
expect(countAtoms(mol, 15), 2, 'expected exactly 2 phosphorus atoms');
|
|
903
|
+
expect(countAtoms(mol, 16), 2, 'expected exactly 2 sulfur atoms');
|
|
904
|
+
// Each P carries its own sulfur (not floating somewhere else).
|
|
905
|
+
expect(countSmarts(mol, '[PX4][SX2,SX1H,SX1-]'), 2,
|
|
906
|
+
'both sulfurs must be bonded to a phosphorus atom');
|
|
907
|
+
// Inter-nucleotide sp is a phosphorothioate diester (bridging O on
|
|
908
|
+
// both sides). The trailing sp is a monoester (P-O-cap on the 3'
|
|
909
|
+
// side), so we expect exactly ONE diester match.
|
|
910
|
+
expect(countSmarts(mol, SMARTS.PHOSPHOROTHIOATE_DIESTER), 1,
|
|
911
|
+
'inter-nucleotide sp must remain a phosphorothioate diester');
|
|
912
|
+
// Bridging-O presence on the 5' side of every phosphorothioate.
|
|
913
|
+
// The diester P has two C-O-P matches (5' and 3' bridges) and the
|
|
914
|
+
// terminal monoester P has one — total 3 matches across both linkers.
|
|
915
|
+
// The bug we guard against (lost 3'-O) would drop this to 1 or 2.
|
|
916
|
+
expect(countSmarts(mol, '[CX4][OX2][PX4](=[OX1])[SX2,SX1H,SX1-]'), 3,
|
|
917
|
+
'every C-O-P-P=O-S match must be present (3: 2 from diester, 1 from monoester)');
|
|
918
|
+
// No direct C-P bond anywhere (the bug we're guarding against).
|
|
919
|
+
expect(countSmarts(mol, SMARTS.DIRECT_C_P), 0,
|
|
920
|
+
'expected zero direct C-P bonds — bridging O must be present');
|
|
921
|
+
// Methoxy group on the m sugar must survive — exactly one (m only at
|
|
922
|
+
// position 0). 2'-OMe = OCH3 on a ring carbon. The 2nd nucleotide is
|
|
923
|
+
// r(A), no methoxy.
|
|
924
|
+
expect(countSmarts(mol, SMARTS.TWO_PRIME_OME), 1,
|
|
925
|
+
'expected exactly one 2\'-OMe group on the m sugar');
|
|
926
|
+
});
|
|
642
927
|
});
|
|
643
928
|
|
|
644
929
|
// R-group swap heuristic: a single-R-group terminal monomer can be placed
|
|
645
930
|
// at either end of a HELM chain, even if its R-group label "should" only
|
|
646
931
|
// belong at one end. The conversion swaps rNodes so the existing
|
|
647
|
-
// TERMINAL_5P/3P role logic still works.
|
|
932
|
+
// TERMINAL_5P/3P role logic still works. Each test asserts the terminal
|
|
933
|
+
// monomer's STRUCTURAL fingerprint as well as topology.
|
|
648
934
|
//
|
|
649
935
|
// Bio (R1 only) — naturally a 3'-terminal, but we accept it at 5' too.
|
|
936
|
+
// Biotin's fingerprint is its bicyclic head: a thiophene (C-C-C-C-S 5-mem
|
|
937
|
+
// ring) fused to an imidazolidone (N-C(=O)-N 5-mem ring with two NH).
|
|
650
938
|
test('rna-helm-bio-terminal-at-end', async () => {
|
|
651
|
-
const
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
939
|
+
const {molfile} = await helmRnaLinear(`RNA1{r(T)[Bio]}$$$$V2.0`);
|
|
940
|
+
withMol(molfile, (mol) => {
|
|
941
|
+
expect(countAtoms(mol, 15), 0, 'Bio terminus: no phosphate');
|
|
942
|
+
// Biotin's cyclic urea (ureido) ring.
|
|
943
|
+
expect(hasSmarts(mol, SMARTS.BIOTIN_UREIDO), true,
|
|
944
|
+
'expected biotin ureido (cyclic urea) ring system');
|
|
945
|
+
// Biotin's thiolane: a sulfur in a ring.
|
|
946
|
+
expect(hasSmarts(mol, '[#16;R]'), true,
|
|
947
|
+
'expected ring sulfur (biotin\'s thiolane)');
|
|
948
|
+
// r(T) sugar still present.
|
|
949
|
+
expect(countSmarts(mol, SMARTS.FURANOSE), 1, 'expected the r(T) furanose');
|
|
950
|
+
});
|
|
660
951
|
});
|
|
661
952
|
|
|
662
953
|
test('rna-helm-bio-terminal-at-start', async () => {
|
|
663
|
-
const
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
954
|
+
const {molfile} = await helmRnaLinear(`RNA1{[Bio].r(T)}$$$$V2.0`);
|
|
955
|
+
withMol(molfile, (mol) => {
|
|
956
|
+
expect(countAtoms(mol, 15), 0, 'no phosphates');
|
|
957
|
+
// Biotin's ureido + ring-S marker (the thiolane).
|
|
958
|
+
expect(hasSmarts(mol, SMARTS.BIOTIN_UREIDO), true,
|
|
959
|
+
'expected biotin ureido ring system at the 5\' end');
|
|
960
|
+
expect(hasSmarts(mol, '[#16;R]'), true,
|
|
961
|
+
'expected biotin\'s thiolane ring sulfur');
|
|
962
|
+
// r(T) sugar still present and connected (single fragment via R-swap).
|
|
963
|
+
expect(countSmarts(mol, SMARTS.FURANOSE), 1, 'expected the r(T) furanose');
|
|
964
|
+
});
|
|
672
965
|
});
|
|
673
966
|
|
|
674
967
|
// Chol (R2 only) — naturally a 5'-terminal, but we accept it at 3' too.
|
|
968
|
+
// Chol's structural fingerprint is the steroid 4-ring core plus a
|
|
969
|
+
// ring-fused junction, see `looksLikeSteroid()`.
|
|
675
970
|
test('rna-helm-chol-terminal-at-start', async () => {
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
971
|
+
const {molfile} = await helmRnaLinear(`RNA1{[Chol].r(T)}$$$$V2.0`);
|
|
972
|
+
withMol(molfile, (mol) => {
|
|
973
|
+
expect(looksLikeSteroid(mol), true,
|
|
974
|
+
'expected steroid (gonane) ring system from Chol at 5\'');
|
|
975
|
+
expect(countSmarts(mol, SMARTS.FURANOSE), 1, 'expected one r(T) furanose');
|
|
976
|
+
});
|
|
681
977
|
});
|
|
682
978
|
|
|
683
979
|
test('rna-helm-chol-terminal-at-end', async () => {
|
|
684
|
-
const
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
const pCount = (smiles.match(/P/g) || []).length;
|
|
692
|
-
expect(pCount, 0, `expected 0 phosphates with Chol terminal: ${smiles}`);
|
|
980
|
+
const {molfile} = await helmRnaLinear(`RNA1{r(T)[Chol]}$$$$V2.0`);
|
|
981
|
+
withMol(molfile, (mol) => {
|
|
982
|
+
expect(countAtoms(mol, 15), 0, 'no phosphate when Chol replaces trailing P');
|
|
983
|
+
expect(looksLikeSteroid(mol), true,
|
|
984
|
+
'expected steroid (gonane) ring system from Chol at 3\'');
|
|
985
|
+
expect(countSmarts(mol, SMARTS.FURANOSE), 1, 'expected one r(T) furanose');
|
|
986
|
+
});
|
|
693
987
|
});
|
|
694
988
|
});
|
|
695
989
|
|