@datagrok/bio 2.27.6 → 2.27.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CREDITS.md +246 -0
- package/dist/455.js +1 -1
- package/dist/455.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/files/monomer-libraries/HELMCoreLibrary.json +175 -0
- package/files/tests/to-atomic-level-dna-fasta-output.csv +7152 -7152
- package/package.json +2 -2
- package/src/tests/to-atomic-level-tests.ts +193 -0
- package/src/utils/monomer-lib/consts.ts +1 -1
- package/src/utils/monomer-lib/monomer-lib-base.ts +5 -4
- package/src/widgets/to-atomic-level-widget.ts +15 -4
- package/test-console-output-1.log +643 -605
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.27.
|
|
8
|
+
"version": "2.27.8",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,7 +44,7 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.65.1",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.9",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.6",
|
|
50
50
|
"@datagrok-libraries/ml": "^6.10.11",
|
|
@@ -498,6 +498,199 @@ category('toAtomicLevelHelmRna', async () => {
|
|
|
498
498
|
expect(/S/.test(smiles), true, `expected sulfur: ${smiles}`);
|
|
499
499
|
expect(/P/.test(smiles), true, `expected phosphorus: ${smiles}`);
|
|
500
500
|
});
|
|
501
|
+
|
|
502
|
+
// 3'-end terminal modifier (GalNAc, R1 only). HELM puts it in the
|
|
503
|
+
// "phosphate" slot of the last triple, but it's actually a chain end.
|
|
504
|
+
// Expectations: chain ends at GalNAc (no extra OH cap), no phosphate
|
|
505
|
+
// at all, GalNAc structural features (acetamide N) are present.
|
|
506
|
+
test('rna-helm-3p-terminal-galnac', async () => {
|
|
507
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[GalNAc]}$$$$V2.0`);
|
|
508
|
+
const pCount = (smiles.match(/P/g) || []).length;
|
|
509
|
+
expect(pCount, 0, `expected 0 phosphates (GalNAc replaces P): ${smiles}`);
|
|
510
|
+
expect(/N/.test(smiles), true, `expected nitrogen from GalNAc acetamide: ${smiles}`);
|
|
511
|
+
// Sanity: SMILES should not be RDKit's parse-failure sentinel.
|
|
512
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
513
|
+
`valid SMILES expected: ${smiles}`);
|
|
514
|
+
});
|
|
515
|
+
|
|
516
|
+
// 5'-end terminal modifier (Chol, R2 only) at the start of the chain.
|
|
517
|
+
// HELM puts Chol where the first sugar would be. With no trailing P,
|
|
518
|
+
// the chain is Chol → r(T)-3'-OH.
|
|
519
|
+
test('rna-helm-5p-terminal-chol', async () => {
|
|
520
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)}$$$$V2.0`);
|
|
521
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
522
|
+
`valid SMILES expected: ${smiles}`);
|
|
523
|
+
// Cholesterol has 4 fused rings (3 six-membered + 1 five-membered) — sanity-check
|
|
524
|
+
// by requiring at least 4 non-aromatic ring closures (digits 1-4) in the SMILES
|
|
525
|
+
// (cholesterol fragment alone uses ring closures 1-4).
|
|
526
|
+
expect(/1/.test(smiles) && /2/.test(smiles) && /3/.test(smiles) && /4/.test(smiles), true,
|
|
527
|
+
`expected cholesterol ring fragments: ${smiles}`);
|
|
528
|
+
});
|
|
529
|
+
|
|
530
|
+
// Chol at 5' with explicit trailing phosphate (the original failing case).
|
|
531
|
+
// Chain: Chol → r(T) → P-OH. Should produce exactly 1 phosphate.
|
|
532
|
+
test('rna-helm-5p-terminal-chol-with-trailing-phosphate', async () => {
|
|
533
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)p}$$$$V2.0`);
|
|
534
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
535
|
+
`valid SMILES expected: ${smiles}`);
|
|
536
|
+
const pCount = (smiles.match(/P/g) || []).length;
|
|
537
|
+
expect(pCount, 1, `expected exactly 1 phosphate: ${smiles}`);
|
|
538
|
+
});
|
|
539
|
+
|
|
540
|
+
// Both terminals at once: Chol at 5', GalNAc at 3', single nucleotide
|
|
541
|
+
// in between. No phosphates anywhere.
|
|
542
|
+
test('rna-helm-both-terminals', async () => {
|
|
543
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)[GalNAc]}$$$$V2.0`);
|
|
544
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
545
|
+
`valid SMILES expected: ${smiles}`);
|
|
546
|
+
const pCount = (smiles.match(/P/g) || []).length;
|
|
547
|
+
expect(pCount, 0, `expected 0 phosphates with both terminals: ${smiles}`);
|
|
548
|
+
});
|
|
549
|
+
|
|
550
|
+
// LNA (2,4-BNA) regression. The 2,4-O-CH2 bridge sits ABOVE C1' once the
|
|
551
|
+
// sugar is oriented with R1/R2 atoms horizontal, so the natural R3 vector
|
|
552
|
+
// points sideways instead of up. Without the abnormal-sugar override the
|
|
553
|
+
// base ends up sideways from the sugar (or worse, overlapping it). The
|
|
554
|
+
// assertion here is structural: must produce a valid single-fragment
|
|
555
|
+
// SMILES with the LNA-specific bridge oxygen plus the normal nucleoside
|
|
556
|
+
// features. Coordinates aren't checked — only connectivity.
|
|
557
|
+
test('rna-helm-lna-base-above-sugar', async () => {
|
|
558
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{[lna](A)p.[lna](T)}$$$$V2.0`);
|
|
559
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
560
|
+
`valid SMILES expected: ${smiles}`);
|
|
561
|
+
expect(smiles.indexOf('.') === -1, true,
|
|
562
|
+
`expected single fragment: ${smiles}`);
|
|
563
|
+
// Sanity check for nitrogens — adenine brings 5 (4 ring + 1 NH2) and
|
|
564
|
+
// thymine brings 2 (both ring), so at least 7 total. Match both
|
|
565
|
+
// uppercase (N, [nH]) and lowercase aromatic (n) — N atoms in heterocyclic
|
|
566
|
+
// SMILES are written lowercase when aromatic.
|
|
567
|
+
const nCount = (smiles.match(/[Nn]/g) || []).length;
|
|
568
|
+
expect(nCount >= 7, true, `expected at least 7 nitrogens: ${smiles}`);
|
|
569
|
+
// One inter-nucleotide phosphate.
|
|
570
|
+
const pCount = (smiles.match(/P/g) || []).length;
|
|
571
|
+
expect(pCount, 1, `expected exactly 1 phosphate: ${smiles}`);
|
|
572
|
+
});
|
|
573
|
+
|
|
574
|
+
// GalNAc oxygen-count regression. Previously the R1 placeholder atom
|
|
575
|
+
// (substituted to 'O' from the "OH" cap) was left in the assembly,
|
|
576
|
+
// adding a stray OH on the chain-attach carbon. lna(T)GalNAc has known
|
|
577
|
+
// expected SMILES with exactly 10 oxygens.
|
|
578
|
+
test('rna-helm-3p-terminal-galnac-no-extra-oh', async () => {
|
|
579
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{[lna](T)[GalNAc]}$$$$V2.0`);
|
|
580
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
581
|
+
`valid SMILES expected: ${smiles}`);
|
|
582
|
+
// Count OXYGEN ATOMS only — uppercase O outside of brackets in standard
|
|
583
|
+
// SMILES denotes a non-aromatic oxygen. Ring-closure digits and atoms
|
|
584
|
+
// inside [] don't match this regex.
|
|
585
|
+
const oCount = (smiles.match(/O/g) || []).length;
|
|
586
|
+
expect(oCount, 10, `expected exactly 10 oxygen atoms in lna-T-GalNAc: ${smiles}`);
|
|
587
|
+
});
|
|
588
|
+
|
|
589
|
+
// sp (and similar phosphates with R-cap = H) used to disconnect the chain
|
|
590
|
+
// because the H placeholder was removed by removeHydrogen, leaving
|
|
591
|
+
// terminalNodes[0] pointing at the now-deleted atom. The result was a
|
|
592
|
+
// SMILES with two disconnected fragments separated by '.'. The fix:
|
|
593
|
+
// when the cap is H, leave terminalNodes[0] at its original
|
|
594
|
+
// setTerminalNodes value (the atom previously bonded to R1, e.g. P) so
|
|
595
|
+
// the chain bond goes there directly.
|
|
596
|
+
test('rna-helm-h-cap-phosphate-sp-connects', async () => {
|
|
597
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[sp].r(A)}$$$$V2.0`);
|
|
598
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
599
|
+
`valid SMILES expected: ${smiles}`);
|
|
600
|
+
// No '.' → single connected fragment.
|
|
601
|
+
expect(smiles.indexOf('.') === -1, true,
|
|
602
|
+
`expected single fragment (no '.' separator): ${smiles}`);
|
|
603
|
+
// Exactly one phosphorus from the sp linker.
|
|
604
|
+
const pCount = (smiles.match(/P/g) || []).length;
|
|
605
|
+
expect(pCount, 1, `expected exactly 1 phosphate: ${smiles}`);
|
|
606
|
+
// sp carries a sulfur on the phosphate.
|
|
607
|
+
expect(/S/.test(smiles), true, `expected sulfur from sp: ${smiles}`);
|
|
608
|
+
});
|
|
609
|
+
|
|
610
|
+
// Regression: H-cap phosphates (sp et al.) used to drop the bridging O
|
|
611
|
+
// on the 3' side of the linkage. The previous sugar's 3'-O is removed
|
|
612
|
+
// unconditionally during sugar processing on the assumption that the
|
|
613
|
+
// following linker brings its own bridging oxygen via the R1 cap; with
|
|
614
|
+
// an H cap that assumption breaks and the chain ended up as
|
|
615
|
+
// C3'-P(=O)(SH)-O-C5' instead of the proper C3'-O-P(=O)(SH)-O-C5'.
|
|
616
|
+
// The fix promotes the H cap to an O so the bridging atom always exists.
|
|
617
|
+
// Use m(2'-OMe ribose) so we can also verify the methoxy group survives
|
|
618
|
+
// the sp chain assembly.
|
|
619
|
+
test('rna-helm-sp-bridging-o-preserved', async () => {
|
|
620
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{m(A)[sp].r(A)[sp]}$$$$V2.0`);
|
|
621
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
622
|
+
`valid SMILES expected: ${smiles}`);
|
|
623
|
+
// Single connected fragment.
|
|
624
|
+
expect(smiles.indexOf('.') === -1, true,
|
|
625
|
+
`expected single fragment: ${smiles}`);
|
|
626
|
+
// 2 sp linkers → 2 phosphorus atoms.
|
|
627
|
+
const pCount = (smiles.match(/P/g) || []).length;
|
|
628
|
+
expect(pCount, 2, `expected exactly 2 phosphates: ${smiles}`);
|
|
629
|
+
// 2 sulfurs (one per sp).
|
|
630
|
+
const sCount = (smiles.match(/S/g) || []).length;
|
|
631
|
+
expect(sCount, 2, `expected exactly 2 sulfurs (one per sp): ${smiles}`);
|
|
632
|
+
// No C-P bond — every P should be bordered by O on both chain sides.
|
|
633
|
+
// A P preceded directly by an aliphatic carbon (lowercase 'c' is
|
|
634
|
+
// aromatic; capital 'C' is sp3) means the bridging O was lost.
|
|
635
|
+
expect(/C\d*P|CP|cP/.test(smiles), false,
|
|
636
|
+
`expected no direct C-P bond (bridging O missing): ${smiles}`);
|
|
637
|
+
// 2' methoxy on m must survive — methyl ether oxygen plus three oxygens
|
|
638
|
+
// per nucleotide gives plenty of O atoms; the structural assertion
|
|
639
|
+
// above is the strict one. Sanity-check that a methyl ether (OC) is
|
|
640
|
+
// present somewhere.
|
|
641
|
+
expect(/OC|CO/.test(smiles), true, `expected methoxy fragment: ${smiles}`);
|
|
642
|
+
});
|
|
643
|
+
|
|
644
|
+
// R-group swap heuristic: a single-R-group terminal monomer can be placed
|
|
645
|
+
// at either end of a HELM chain, even if its R-group label "should" only
|
|
646
|
+
// belong at one end. The conversion swaps rNodes so the existing
|
|
647
|
+
// TERMINAL_5P/3P role logic still works.
|
|
648
|
+
//
|
|
649
|
+
// Bio (R1 only) — naturally a 3'-terminal, but we accept it at 5' too.
|
|
650
|
+
test('rna-helm-bio-terminal-at-end', async () => {
|
|
651
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[Bio]}$$$$V2.0`);
|
|
652
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
653
|
+
`valid SMILES expected: ${smiles}`);
|
|
654
|
+
// Bio replaces the trailing P → no phosphate at all.
|
|
655
|
+
const pCount = (smiles.match(/P/g) || []).length;
|
|
656
|
+
expect(pCount, 0, `expected 0 phosphates with Bio terminal: ${smiles}`);
|
|
657
|
+
// Single connected fragment.
|
|
658
|
+
expect(smiles.indexOf('.') === -1, true,
|
|
659
|
+
`expected single fragment: ${smiles}`);
|
|
660
|
+
});
|
|
661
|
+
|
|
662
|
+
test('rna-helm-bio-terminal-at-start', async () => {
|
|
663
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{[Bio].r(T)}$$$$V2.0`);
|
|
664
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
665
|
+
`valid SMILES expected: ${smiles}`);
|
|
666
|
+
// Single connected fragment (Bio at start must connect to following sugar).
|
|
667
|
+
expect(smiles.indexOf('.') === -1, true,
|
|
668
|
+
`expected single fragment: ${smiles}`);
|
|
669
|
+
// No phosphates (Bio doesn't carry P, no trailing p in HELM).
|
|
670
|
+
const pCount = (smiles.match(/P/g) || []).length;
|
|
671
|
+
expect(pCount, 0, `expected 0 phosphates: ${smiles}`);
|
|
672
|
+
});
|
|
673
|
+
|
|
674
|
+
// Chol (R2 only) — naturally a 5'-terminal, but we accept it at 3' too.
|
|
675
|
+
test('rna-helm-chol-terminal-at-start', async () => {
|
|
676
|
+
// Already covered by rna-helm-5p-terminal-chol; this is the canonical case.
|
|
677
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)}$$$$V2.0`);
|
|
678
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
679
|
+
`valid SMILES expected: ${smiles}`);
|
|
680
|
+
expect(smiles.indexOf('.') === -1, true, `expected single fragment: ${smiles}`);
|
|
681
|
+
});
|
|
682
|
+
|
|
683
|
+
test('rna-helm-chol-terminal-at-end', async () => {
|
|
684
|
+
const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[Chol]}$$$$V2.0`);
|
|
685
|
+
expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
|
|
686
|
+
`valid SMILES expected: ${smiles}`);
|
|
687
|
+
// Single connected fragment.
|
|
688
|
+
expect(smiles.indexOf('.') === -1, true,
|
|
689
|
+
`expected single fragment: ${smiles}`);
|
|
690
|
+
// Chol replaces the trailing P → no phosphate.
|
|
691
|
+
const pCount = (smiles.match(/P/g) || []).length;
|
|
692
|
+
expect(pCount, 0, `expected 0 phosphates with Chol terminal: ${smiles}`);
|
|
693
|
+
});
|
|
501
694
|
});
|
|
502
695
|
|
|
503
696
|
|
|
@@ -9,5 +9,5 @@ export const LIB_SETTINGS_FOR_TESTS: UserLibSettings =
|
|
|
9
9
|
{explicit: ['HELMCoreLibrary.json', 'polytool-lib.json'], exclude: [], duplicateMonomerPreferences: {}};
|
|
10
10
|
|
|
11
11
|
/** Summary for settings {@link LIB_SETTINGS_FOR_TESTS} */
|
|
12
|
-
export const monomerLibForTestsSummary: MonomerLibSummaryType = {'PEPTIDE': 334, 'RNA':
|
|
12
|
+
export const monomerLibForTestsSummary: MonomerLibSummaryType = {'PEPTIDE': 334, 'RNA': 390, 'CHEM': 0};
|
|
13
13
|
|
|
@@ -167,10 +167,11 @@ export class MonomerLibBase implements IMonomerLibBase {
|
|
|
167
167
|
|
|
168
168
|
/** Get or create {@link Monomer} object (in case it is missing in monomer library current config) */
|
|
169
169
|
let m: Monomer | null = this.getMonomer(pt, elem);
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
170
|
+
// there can be linkers that have 1 rgroup that are terminal, so we allow that.
|
|
171
|
+
// if (m && biotype == HelmTypes.LINKER && (m[REQ.RGROUPS]?.length ?? 0) < 2) {
|
|
172
|
+
// // Web Editor expects null
|
|
173
|
+
// return null;
|
|
174
|
+
// }
|
|
174
175
|
if (m && biotype == HelmTypes.SUGAR && (m[REQ.RGROUPS]?.length ?? 0) < 3) {
|
|
175
176
|
// Web Editor expects null
|
|
176
177
|
return null;
|
|
@@ -24,8 +24,16 @@ export async function toAtomicLevelSingle(sequence: DG.SemanticValue): Promise<{
|
|
|
24
24
|
errorText = 'No sequence handler found';
|
|
25
25
|
return {errorText, mol: ''};
|
|
26
26
|
}
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
|
|
28
|
+
let maxLength = 50;
|
|
29
|
+
if (seqSh.isHelm()) {
|
|
30
|
+
const splitted = seqSh.getSplitted(sequence.cell.rowIndex);
|
|
31
|
+
if (!splitted.graphInfo?.polymerTypes?.some((pt) => pt !== 'RNA'))
|
|
32
|
+
maxLength = 150;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if ((seqSh.getSplitted(sequence.cell.rowIndex)?.length ?? 100) > maxLength) {
|
|
36
|
+
errorText = 'Maximum number of monomers is ' + maxLength;
|
|
29
37
|
return {errorText, mol: ''};
|
|
30
38
|
}
|
|
31
39
|
const singleValCol = DG.Column.fromStrings('singleVal', [sequence.value]);
|
|
@@ -73,8 +81,11 @@ export async function toAtomicLevelWidget(sequence: DG.SemanticValue): Promise<D
|
|
|
73
81
|
const molSemanticValue = DG.SemanticValue.fromValueType(res.mol, DG.SEMTYPE.MOLECULE);
|
|
74
82
|
const panel = ui.panels.infoPanel(molSemanticValue);
|
|
75
83
|
let molPanel: DG.Widget | null = null;
|
|
76
|
-
if (panel)
|
|
77
|
-
|
|
84
|
+
if (panel) {
|
|
85
|
+
const acc = ui.accordion('Sequence Molfile details');
|
|
86
|
+
acc.addPane('Explore', () => panel.root);
|
|
87
|
+
molPanel = DG.Widget.fromRoot(acc.root);
|
|
88
|
+
}
|
|
78
89
|
|
|
79
90
|
|
|
80
91
|
const root = grok.chem.drawMolecule(res.mol, 300, 300, false);
|