@datagrok/bio 2.27.6 → 2.27.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.27.6",
8
+ "version": "2.27.8",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,7 +44,7 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.64.1",
47
+ "@datagrok-libraries/bio": "^5.65.1",
48
48
  "@datagrok-libraries/chem-meta": "^1.2.9",
49
49
  "@datagrok-libraries/math": "^1.2.6",
50
50
  "@datagrok-libraries/ml": "^6.10.11",
@@ -498,6 +498,199 @@ category('toAtomicLevelHelmRna', async () => {
498
498
  expect(/S/.test(smiles), true, `expected sulfur: ${smiles}`);
499
499
  expect(/P/.test(smiles), true, `expected phosphorus: ${smiles}`);
500
500
  });
501
+
502
+ // 3'-end terminal modifier (GalNAc, R1 only). HELM puts it in the
503
+ // "phosphate" slot of the last triple, but it's actually a chain end.
504
+ // Expectations: chain ends at GalNAc (no extra OH cap), no phosphate
505
+ // at all, GalNAc structural features (acetamide N) are present.
506
+ test('rna-helm-3p-terminal-galnac', async () => {
507
+ const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[GalNAc]}$$$$V2.0`);
508
+ const pCount = (smiles.match(/P/g) || []).length;
509
+ expect(pCount, 0, `expected 0 phosphates (GalNAc replaces P): ${smiles}`);
510
+ expect(/N/.test(smiles), true, `expected nitrogen from GalNAc acetamide: ${smiles}`);
511
+ // Sanity: SMILES should not be RDKit's parse-failure sentinel.
512
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
513
+ `valid SMILES expected: ${smiles}`);
514
+ });
515
+
516
+ // 5'-end terminal modifier (Chol, R2 only) at the start of the chain.
517
+ // HELM puts Chol where the first sugar would be. With no trailing P,
518
+ // the chain is Chol → r(T)-3'-OH.
519
+ test('rna-helm-5p-terminal-chol', async () => {
520
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)}$$$$V2.0`);
521
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
522
+ `valid SMILES expected: ${smiles}`);
523
+ // Cholesterol has 4 fused rings (3 six-membered + 1 five-membered) — sanity-check
524
+ // by requiring at least 4 non-aromatic ring closures (digits 1-4) in the SMILES
525
+ // (cholesterol fragment alone uses ring closures 1-4).
526
+ expect(/1/.test(smiles) && /2/.test(smiles) && /3/.test(smiles) && /4/.test(smiles), true,
527
+ `expected cholesterol ring fragments: ${smiles}`);
528
+ });
529
+
530
+ // Chol at 5' with explicit trailing phosphate (the original failing case).
531
+ // Chain: Chol → r(T) → P-OH. Should produce exactly 1 phosphate.
532
+ test('rna-helm-5p-terminal-chol-with-trailing-phosphate', async () => {
533
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)p}$$$$V2.0`);
534
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
535
+ `valid SMILES expected: ${smiles}`);
536
+ const pCount = (smiles.match(/P/g) || []).length;
537
+ expect(pCount, 1, `expected exactly 1 phosphate: ${smiles}`);
538
+ });
539
+
540
+ // Both terminals at once: Chol at 5', GalNAc at 3', single nucleotide
541
+ // in between. No phosphates anywhere.
542
+ test('rna-helm-both-terminals', async () => {
543
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)[GalNAc]}$$$$V2.0`);
544
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
545
+ `valid SMILES expected: ${smiles}`);
546
+ const pCount = (smiles.match(/P/g) || []).length;
547
+ expect(pCount, 0, `expected 0 phosphates with both terminals: ${smiles}`);
548
+ });
549
+
550
+ // LNA (2,4-BNA) regression. The 2,4-O-CH2 bridge sits ABOVE C1' once the
551
+ // sugar is oriented with R1/R2 atoms horizontal, so the natural R3 vector
552
+ // points sideways instead of up. Without the abnormal-sugar override the
553
+ // base ends up sideways from the sugar (or worse, overlapping it). The
554
+ // assertion here is structural: must produce a valid single-fragment
555
+ // SMILES with the LNA-specific bridge oxygen plus the normal nucleoside
556
+ // features. Coordinates aren't checked — only connectivity.
557
+ test('rna-helm-lna-base-above-sugar', async () => {
558
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[lna](A)p.[lna](T)}$$$$V2.0`);
559
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
560
+ `valid SMILES expected: ${smiles}`);
561
+ expect(smiles.indexOf('.') === -1, true,
562
+ `expected single fragment: ${smiles}`);
563
+ // Sanity check for nitrogens — adenine brings 5 (4 ring + 1 NH2) and
564
+ // thymine brings 2 (both ring), so at least 7 total. Match both
565
+ // uppercase (N, [nH]) and lowercase aromatic (n) — N atoms in heterocyclic
566
+ // SMILES are written lowercase when aromatic.
567
+ const nCount = (smiles.match(/[Nn]/g) || []).length;
568
+ expect(nCount >= 7, true, `expected at least 7 nitrogens: ${smiles}`);
569
+ // One inter-nucleotide phosphate.
570
+ const pCount = (smiles.match(/P/g) || []).length;
571
+ expect(pCount, 1, `expected exactly 1 phosphate: ${smiles}`);
572
+ });
573
+
574
+ // GalNAc oxygen-count regression. Previously the R1 placeholder atom
575
+ // (substituted to 'O' from the "OH" cap) was left in the assembly,
576
+ // adding a stray OH on the chain-attach carbon. lna(T)GalNAc has known
577
+ // expected SMILES with exactly 10 oxygens.
578
+ test('rna-helm-3p-terminal-galnac-no-extra-oh', async () => {
579
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[lna](T)[GalNAc]}$$$$V2.0`);
580
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
581
+ `valid SMILES expected: ${smiles}`);
582
+ // Count OXYGEN ATOMS only — uppercase O outside of brackets in standard
583
+ // SMILES denotes a non-aromatic oxygen. Ring-closure digits and atoms
584
+ // inside [] don't match this regex.
585
+ const oCount = (smiles.match(/O/g) || []).length;
586
+ expect(oCount, 10, `expected exactly 10 oxygen atoms in lna-T-GalNAc: ${smiles}`);
587
+ });
588
+
589
+ // sp (and similar phosphates with R-cap = H) used to disconnect the chain
590
+ // because the H placeholder was removed by removeHydrogen, leaving
591
+ // terminalNodes[0] pointing at the now-deleted atom. The result was a
592
+ // SMILES with two disconnected fragments separated by '.'. The fix:
593
+ // when the cap is H, leave terminalNodes[0] at its original
594
+ // setTerminalNodes value (the atom previously bonded to R1, e.g. P) so
595
+ // the chain bond goes there directly.
596
+ test('rna-helm-h-cap-phosphate-sp-connects', async () => {
597
+ const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[sp].r(A)}$$$$V2.0`);
598
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
599
+ `valid SMILES expected: ${smiles}`);
600
+ // No '.' → single connected fragment.
601
+ expect(smiles.indexOf('.') === -1, true,
602
+ `expected single fragment (no '.' separator): ${smiles}`);
603
+ // Exactly one phosphorus from the sp linker.
604
+ const pCount = (smiles.match(/P/g) || []).length;
605
+ expect(pCount, 1, `expected exactly 1 phosphate: ${smiles}`);
606
+ // sp carries a sulfur on the phosphate.
607
+ expect(/S/.test(smiles), true, `expected sulfur from sp: ${smiles}`);
608
+ });
609
+
610
+ // Regression: H-cap phosphates (sp et al.) used to drop the bridging O
611
+ // on the 3' side of the linkage. The previous sugar's 3'-O is removed
612
+ // unconditionally during sugar processing on the assumption that the
613
+ // following linker brings its own bridging oxygen via the R1 cap; with
614
+ // an H cap that assumption breaks and the chain ended up as
615
+ // C3'-P(=O)(SH)-O-C5' instead of the proper C3'-O-P(=O)(SH)-O-C5'.
616
+ // The fix promotes the H cap to an O so the bridging atom always exists.
617
+ // Use m(2'-OMe ribose) so we can also verify the methoxy group survives
618
+ // the sp chain assembly.
619
+ test('rna-helm-sp-bridging-o-preserved', async () => {
620
+ const smiles = await helmRnaLinearToSmiles(`RNA1{m(A)[sp].r(A)[sp]}$$$$V2.0`);
621
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
622
+ `valid SMILES expected: ${smiles}`);
623
+ // Single connected fragment.
624
+ expect(smiles.indexOf('.') === -1, true,
625
+ `expected single fragment: ${smiles}`);
626
+ // 2 sp linkers → 2 phosphorus atoms.
627
+ const pCount = (smiles.match(/P/g) || []).length;
628
+ expect(pCount, 2, `expected exactly 2 phosphates: ${smiles}`);
629
+ // 2 sulfurs (one per sp).
630
+ const sCount = (smiles.match(/S/g) || []).length;
631
+ expect(sCount, 2, `expected exactly 2 sulfurs (one per sp): ${smiles}`);
632
+ // No C-P bond — every P should be bordered by O on both chain sides.
633
+ // A P preceded directly by an aliphatic carbon (lowercase 'c' is
634
+ // aromatic; capital 'C' is sp3) means the bridging O was lost.
635
+ expect(/C\d*P|CP|cP/.test(smiles), false,
636
+ `expected no direct C-P bond (bridging O missing): ${smiles}`);
637
+ // 2' methoxy on m must survive — methyl ether oxygen plus three oxygens
638
+ // per nucleotide gives plenty of O atoms; the structural assertion
639
+ // above is the strict one. Sanity-check that a methyl ether (OC) is
640
+ // present somewhere.
641
+ expect(/OC|CO/.test(smiles), true, `expected methoxy fragment: ${smiles}`);
642
+ });
643
+
644
+ // R-group swap heuristic: a single-R-group terminal monomer can be placed
645
+ // at either end of a HELM chain, even if its R-group label "should" only
646
+ // belong at one end. The conversion swaps rNodes so the existing
647
+ // TERMINAL_5P/3P role logic still works.
648
+ //
649
+ // Bio (R1 only) — naturally a 3'-terminal, but we accept it at 5' too.
650
+ test('rna-helm-bio-terminal-at-end', async () => {
651
+ const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[Bio]}$$$$V2.0`);
652
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
653
+ `valid SMILES expected: ${smiles}`);
654
+ // Bio replaces the trailing P → no phosphate at all.
655
+ const pCount = (smiles.match(/P/g) || []).length;
656
+ expect(pCount, 0, `expected 0 phosphates with Bio terminal: ${smiles}`);
657
+ // Single connected fragment.
658
+ expect(smiles.indexOf('.') === -1, true,
659
+ `expected single fragment: ${smiles}`);
660
+ });
661
+
662
+ test('rna-helm-bio-terminal-at-start', async () => {
663
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[Bio].r(T)}$$$$V2.0`);
664
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
665
+ `valid SMILES expected: ${smiles}`);
666
+ // Single connected fragment (Bio at start must connect to following sugar).
667
+ expect(smiles.indexOf('.') === -1, true,
668
+ `expected single fragment: ${smiles}`);
669
+ // No phosphates (Bio doesn't carry P, no trailing p in HELM).
670
+ const pCount = (smiles.match(/P/g) || []).length;
671
+ expect(pCount, 0, `expected 0 phosphates: ${smiles}`);
672
+ });
673
+
674
+ // Chol (R2 only) — naturally a 5'-terminal, but we accept it at 3' too.
675
+ test('rna-helm-chol-terminal-at-start', async () => {
676
+ // Already covered by rna-helm-5p-terminal-chol; this is the canonical case.
677
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)}$$$$V2.0`);
678
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
679
+ `valid SMILES expected: ${smiles}`);
680
+ expect(smiles.indexOf('.') === -1, true, `expected single fragment: ${smiles}`);
681
+ });
682
+
683
+ test('rna-helm-chol-terminal-at-end', async () => {
684
+ const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[Chol]}$$$$V2.0`);
685
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
686
+ `valid SMILES expected: ${smiles}`);
687
+ // Single connected fragment.
688
+ expect(smiles.indexOf('.') === -1, true,
689
+ `expected single fragment: ${smiles}`);
690
+ // Chol replaces the trailing P → no phosphate.
691
+ const pCount = (smiles.match(/P/g) || []).length;
692
+ expect(pCount, 0, `expected 0 phosphates with Chol terminal: ${smiles}`);
693
+ });
501
694
  });
502
695
 
503
696
 
@@ -9,5 +9,5 @@ export const LIB_SETTINGS_FOR_TESTS: UserLibSettings =
9
9
  {explicit: ['HELMCoreLibrary.json', 'polytool-lib.json'], exclude: [], duplicateMonomerPreferences: {}};
10
10
 
11
11
  /** Summary for settings {@link LIB_SETTINGS_FOR_TESTS} */
12
- export const monomerLibForTestsSummary: MonomerLibSummaryType = {'PEPTIDE': 334, 'RNA': 383, 'CHEM': 0};
12
+ export const monomerLibForTestsSummary: MonomerLibSummaryType = {'PEPTIDE': 334, 'RNA': 390, 'CHEM': 0};
13
13
 
@@ -167,10 +167,11 @@ export class MonomerLibBase implements IMonomerLibBase {
167
167
 
168
168
  /** Get or create {@link Monomer} object (in case it is missing in monomer library current config) */
169
169
  let m: Monomer | null = this.getMonomer(pt, elem);
170
- if (m && biotype == HelmTypes.LINKER && (m[REQ.RGROUPS]?.length ?? 0) < 2) {
171
- // Web Editor expects null
172
- return null;
173
- }
170
+ // there can be linkers that have 1 rgroup that are terminal, so we allow that.
171
+ // if (m && biotype == HelmTypes.LINKER && (m[REQ.RGROUPS]?.length ?? 0) < 2) {
172
+ // // Web Editor expects null
173
+ // return null;
174
+ // }
174
175
  if (m && biotype == HelmTypes.SUGAR && (m[REQ.RGROUPS]?.length ?? 0) < 3) {
175
176
  // Web Editor expects null
176
177
  return null;
@@ -24,8 +24,16 @@ export async function toAtomicLevelSingle(sequence: DG.SemanticValue): Promise<{
24
24
  errorText = 'No sequence handler found';
25
25
  return {errorText, mol: ''};
26
26
  }
27
- if ((seqSh.getSplitted(sequence.cell.rowIndex, 60)?.length ?? 100) > 50) {
28
- errorText = 'Maximum number of monomers is 50';
27
+
28
+ let maxLength = 50;
29
+ if (seqSh.isHelm()) {
30
+ const splitted = seqSh.getSplitted(sequence.cell.rowIndex);
31
+ if (!splitted.graphInfo?.polymerTypes?.some((pt) => pt !== 'RNA'))
32
+ maxLength = 150;
33
+ }
34
+
35
+ if ((seqSh.getSplitted(sequence.cell.rowIndex)?.length ?? 100) > maxLength) {
36
+ errorText = 'Maximum number of monomers is ' + maxLength;
29
37
  return {errorText, mol: ''};
30
38
  }
31
39
  const singleValCol = DG.Column.fromStrings('singleVal', [sequence.value]);
@@ -73,8 +81,11 @@ export async function toAtomicLevelWidget(sequence: DG.SemanticValue): Promise<D
73
81
  const molSemanticValue = DG.SemanticValue.fromValueType(res.mol, DG.SEMTYPE.MOLECULE);
74
82
  const panel = ui.panels.infoPanel(molSemanticValue);
75
83
  let molPanel: DG.Widget | null = null;
76
- if (panel)
77
- molPanel = DG.Widget.fromRoot(panel.root);
84
+ if (panel) {
85
+ const acc = ui.accordion('Sequence Molfile details');
86
+ acc.addPane('Explore', () => panel.root);
87
+ molPanel = DG.Widget.fromRoot(acc.root);
88
+ }
78
89
 
79
90
 
80
91
  const root = grok.chem.drawMolecule(res.mol, 300, 300, false);