@datagrok/bio 2.27.6 → 2.27.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18925,5 +18925,180 @@
18925
18925
  "meta": {},
18926
18926
  "author": "Pistoia Alliance HELM project",
18927
18927
  "createDate": null
18928
+ },
18929
+ {
18930
+ "symbol": "GalNAc",
18931
+ "name": "N-Acetyl-D-galactosamine",
18932
+ "molfile": "\n RDKit 2D\n\n 15 15 0 0 0 0 0 0 0 0999 V2000\n 4.4167 1.5500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.1177 0.8000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.1177 -0.7000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1.8187 1.5500 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 0.5196 0.8000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.5196 -0.7000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.8187 -1.4500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -0.7794 -1.4500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.7794 -2.9500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -2.0785 -0.7000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -3.3775 -1.4500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -4.6765 -0.7000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -2.0785 0.8000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -0.7794 1.5500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.7794 3.0500 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\n 2 3 2 0\n 2 4 1 0\n 4 5 1 0\n 5 6 1 0\n 6 7 1 0\n 6 8 1 0\n 8 9 1 0\n 8 10 1 0\n 10 11 1 0\n 11 12 1 0\n 10 13 1 0\n 13 14 1 0\n 14 15 1 0\n 14 5 1 0\nM RGP 1 15 1\nM END\n",
18933
+ "smiles": "CC(=O)NC1C(O)C(O)C(CO)OC1[*:1]",
18934
+ "polymerType": "RNA",
18935
+ "monomerType": "Undefined",
18936
+ "id": 0,
18937
+ "rgroups": [
18938
+ {
18939
+ "alternateId": "R1-OH",
18940
+ "capGroupName": "OH",
18941
+ "capGroupSMILES": "O[*:1]",
18942
+ "label": "R1"
18943
+ }
18944
+ ],
18945
+ "author": "Admin",
18946
+ "createDate": "2026-05-01T12:39:24.944Z",
18947
+ "meta": {
18948
+ "pubchemCID": 35717,
18949
+ "description": "Liver-targeting GalNAc sugar — single-arm form. Asialoglycoprotein receptor ligand.",
18950
+ "source": "PubChem CID 35717",
18951
+ "note": "Molfile from PubChem; R-group atom not embedded — for visualization only."
18952
+ }
18953
+ },
18954
+ {
18955
+ "symbol": "L3",
18956
+ "name": "Triantennary GalNAc-L3 conjugate",
18957
+ "molfile": "\n RDKit 2D\n\n121124 0 0 0 0 0 0 0 0999 V2000\n -1.0705 16.4252 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.0668 17.5399 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1.4004 17.2281 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.4041 18.3428 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.0922 19.8100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.3913 20.5600 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.5481 22.0518 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 4.5060 19.5563 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.8959 18.1860 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 4.6459 16.8870 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 6.1459 16.8870 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 3.8959 15.5879 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.6459 14.2889 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.8959 12.9898 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.6459 11.6908 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.8959 10.3918 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.6459 9.0927 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.8959 7.7937 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.6459 6.4946 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.8959 5.1956 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.6459 3.8966 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.8959 2.5975 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.3959 2.5975 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 4.6459 1.2985 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 3.8959 -0.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.1949 -0.7505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 6.4940 -0.0005 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 7.7930 -0.7505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 9.0921 -0.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 10.3911 -0.7505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 10.3911 -2.2505 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 11.6901 -0.0005 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 12.9892 -0.7505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 14.2882 -0.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 15.5872 -0.7505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 16.8863 -0.0005 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 18.1853 -0.7505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 18.1853 -2.2505 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 19.4844 -0.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 20.7834 -0.7505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 22.0824 -0.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 23.3815 -0.7505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 24.6805 -0.0005 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 25.9795 -0.7505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 27.2786 -0.0005 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 28.5776 -0.7505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 29.8767 -0.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 29.8767 1.4995 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 28.5776 -2.2505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 29.8767 -3.0005 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 27.2786 -3.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 27.2786 -4.5005 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 25.9795 -2.2505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 24.6805 -3.0005 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 24.6805 -4.5005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 23.3815 -5.2505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 25.9795 -5.2505 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 2.5969 0.7495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2978 -0.0005 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -0.0012 0.7495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.3003 -0.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.5993 0.7495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.5993 2.2495 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -3.8983 -0.0005 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n -5.1974 0.7495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -6.4964 -0.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -7.7954 0.7495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -9.0945 -0.0005 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n -10.3935 0.7495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -10.3935 2.2495 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -11.6926 -0.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -12.9916 0.7495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -14.2906 -0.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -15.5897 0.7495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -16.8887 -0.0005 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -18.1877 0.7495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -18.1877 2.2495 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -19.4868 2.9995 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -19.4868 4.4995 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -18.1877 5.2495 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -20.7858 2.2495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -22.0849 2.9995 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -20.7858 0.7495 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -22.0849 -0.0005 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -19.4868 -0.0005 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -19.4868 -1.5005 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n -20.7858 -2.2505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -20.7858 -3.7505 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -22.0849 -1.5005 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 3.1459 -1.2996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.6459 -1.2996 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 0.8959 -2.5986 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.6041 -2.5986 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.3541 -3.8977 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.6041 -5.1967 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -2.8541 -3.8977 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n -3.6041 -5.1967 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.1041 -5.1967 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.8541 -6.4957 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -7.3541 -6.4957 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n -8.1041 -7.7948 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -7.3541 -9.0938 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -9.6041 -7.7948 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -10.3541 -9.0938 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -11.8541 -9.0938 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -12.6041 -10.3928 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -14.1041 -10.3928 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -14.8541 -11.6919 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -14.1041 -12.9909 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -14.8541 -14.2900 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -14.1041 -15.5890 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -12.6041 -15.5890 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -16.3541 -14.2900 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -17.1041 -15.5890 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -17.1041 -12.9909 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -18.6041 -12.9909 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -16.3541 -11.6919 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -17.1041 -10.3928 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n -18.6041 -10.3928 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -19.3541 -9.0938 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -19.3541 -11.6919 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\n 2 3 1 0\n 3 4 1 0\n 4 5 1 0\n 5 6 1 0\n 6 7 1 0\n 6 8 1 0\n 8 9 1 0\n 9 10 1 0\n 10 11 2 0\n 10 12 1 0\n 12 13 1 0\n 13 14 1 0\n 14 15 1 0\n 15 16 1 0\n 16 17 1 0\n 17 18 1 0\n 18 19 1 0\n 19 20 1 0\n 20 21 1 0\n 21 22 1 0\n 22 23 2 0\n 22 24 1 0\n 24 25 1 0\n 25 26 1 0\n 26 27 1 0\n 27 28 1 0\n 28 29 1 0\n 29 30 1 0\n 30 31 2 0\n 30 32 1 0\n 32 33 1 0\n 33 34 1 0\n 34 35 1 0\n 35 36 1 0\n 36 37 1 0\n 37 38 2 0\n 37 39 1 0\n 39 40 1 0\n 40 41 1 0\n 41 42 1 0\n 42 43 1 0\n 43 44 1 0\n 44 45 1 0\n 45 46 1 0\n 46 47 1 0\n 47 48 1 0\n 46 49 1 0\n 49 50 1 0\n 49 51 1 0\n 51 52 1 0\n 51 53 1 0\n 53 54 1 0\n 54 55 1 0\n 55 56 1 0\n 55 57 2 0\n 25 58 1 0\n 58 59 1 0\n 59 60 1 0\n 60 61 1 0\n 61 62 1 0\n 62 63 2 0\n 62 64 1 0\n 64 65 1 0\n 65 66 1 0\n 66 67 1 0\n 67 68 1 0\n 68 69 1 0\n 69 70 2 0\n 69 71 1 0\n 71 72 1 0\n 72 73 1 0\n 73 74 1 0\n 74 75 1 0\n 75 76 1 0\n 76 77 1 0\n 77 78 1 0\n 78 79 1 0\n 79 80 1 0\n 78 81 1 0\n 81 82 1 0\n 81 83 1 0\n 83 84 1 0\n 83 85 1 0\n 85 86 1 0\n 86 87 1 0\n 87 88 1 0\n 87 89 2 0\n 25 90 1 0\n 90 91 1 0\n 91 92 1 0\n 92 93 1 0\n 93 94 1 0\n 94 95 2 0\n 94 96 1 0\n 96 97 1 0\n 97 98 1 0\n 98 99 1 0\n 99100 1 0\n100101 1 0\n101102 2 0\n101103 1 0\n103104 1 0\n104105 1 0\n105106 1 0\n106107 1 0\n107108 1 0\n108109 1 0\n109110 1 0\n110111 1 0\n111112 1 0\n110113 1 0\n113114 1 0\n113115 1 0\n115116 1 0\n115117 1 0\n117118 1 0\n118119 1 0\n119120 1 0\n119121 2 0\n 9 4 1 0\n 53 44 1 0\n 85 76 1 0\n117108 1 0\nM RGP 1 7 1\nM END\n",
18958
+ "smiles": "COCC1CC([*:1])CN1C(=O)CCCCCCCCCCC(=O)NC(COCCC(=O)NCCCNC(=O)CCCCOC1OC(CO)C(O)C(O)C1NC(C)=O)(COCCC(=O)NCCCNC(=O)CCCCOC1OC(CO)C(O)C(O)C1NC(C)=O)COCCC(=O)NCCCNC(=O)CCCCOC1OC(CO)C(O)C(O)C1NC(C)=O",
18959
+ "polymerType": "RNA",
18960
+ "monomerType": "Undefined",
18961
+ "id": 1,
18962
+ "rgroups": [
18963
+ {
18964
+ "alternateId": "R1-OH",
18965
+ "capGroupName": "OH",
18966
+ "capGroupSMILES": "O[*:1]",
18967
+ "label": "R1"
18968
+ }
18969
+ ],
18970
+ "author": "Admin",
18971
+ "createDate": "2026-05-01T12:40:24.469Z",
18972
+ "meta": {
18973
+ "pubchemCID": 117789597,
18974
+ "description": "Trivalent GalNAc cluster (Inclisiran / Vutrisiran-style 3'-terminal conjugate).",
18975
+ "source": "PubChem CID 117789597",
18976
+ "note": "Molfile from PubChem; R-group atom not embedded — for visualization only."
18977
+ }
18978
+ },
18979
+ {
18980
+ "symbol": "Chol",
18981
+ "name": "Cholesterol",
18982
+ "molfile": "\n RDKit 2D\n\n 28 31 0 0 0 0 0 0 0 0999 V2000\n -7.5125 2.9728 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -7.0490 1.5462 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -8.0527 0.4315 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.5817 1.2343 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.1182 -0.1923 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -3.6510 -0.5041 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -3.1875 -1.9307 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -4.1912 -3.0454 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.7202 -2.2426 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.1101 -3.6129 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.3816 -3.4561 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.6935 -1.9889 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.9925 -1.2389 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.2916 -1.9889 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.5906 -1.2389 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.5906 0.2611 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.8897 1.0111 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.8897 2.5111 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 7.1887 3.2611 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 4.5906 3.2611 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.2916 2.5111 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.2916 1.0111 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.9925 1.7611 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.9925 0.2611 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.6935 1.0111 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.6055 0.2611 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.6055 -1.2389 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.9758 -0.6288 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\n 2 3 1 0\n 2 4 1 0\n 4 5 1 0\n 5 6 1 0\n 6 7 1 0\n 7 8 1 0\n 7 9 1 0\n 9 10 1 0\n 10 11 1 0\n 11 12 1 0\n 12 13 1 0\n 13 14 1 0\n 14 15 1 0\n 15 16 2 0\n 16 17 1 0\n 17 18 1 0\n 18 19 1 0\n 18 20 1 0\n 20 21 1 0\n 21 22 1 0\n 22 23 1 0\n 22 24 1 0\n 24 25 1 0\n 25 26 1 0\n 26 27 1 0\n 27 28 1 0\n 27 9 1 0\n 27 12 1 0\n 24 13 1 0\n 22 16 1 0\nM RGP 1 19 2\nM END\n",
18983
+ "smiles": "CC(C)CCCC(C)C1CCC2C3CC=C4CC([*:2])CCC4(C)C3CCC12C",
18984
+ "polymerType": "RNA",
18985
+ "monomerType": "Undefined",
18986
+ "id": 2,
18987
+ "rgroups": [
18988
+ {
18989
+ "alternateId": "R2-OH",
18990
+ "capGroupName": "OH",
18991
+ "capGroupSMILES": "O[*:2]",
18992
+ "label": "R2"
18993
+ }
18994
+ ],
18995
+ "author": "Admin",
18996
+ "createDate": "2026-05-01T12:41:45.930Z",
18997
+ "meta": {
18998
+ "pubchemCID": 5997,
18999
+ "description": "Cholesterol delivery moiety. Common 3' or 5' lipid conjugate for hepatocyte / lipoprotein-mediated delivery.",
19000
+ "source": "PubChem CID 5997",
19001
+ "note": "Molfile from PubChem; R-group atom not embedded — for visualization only."
19002
+ }
19003
+ },
19004
+ {
19005
+ "symbol": "Bio",
19006
+ "name": "D-Biotin",
19007
+ "molfile": "\n RDKit 2D\n\n 16 17 0 0 0 0 0 0 0 0999 V2000\n 5.5832 -2.1308 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 4.4685 -1.1271 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.6253 0.3647 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 3.2550 0.9748 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.5050 2.2738 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.0378 1.9620 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0\n 0.8810 0.4702 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.4181 -0.2798 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.7171 0.4702 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -3.0162 -0.2798 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -4.3152 0.4702 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.6142 -0.2798 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.6142 -1.7798 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -6.9133 0.4702 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 2.2513 -0.1399 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.0013 -1.4390 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 2 0\n 2 3 1 0\n 3 4 1 0\n 4 5 1 0\n 5 6 1 0\n 6 7 1 0\n 7 8 1 0\n 8 9 1 0\n 9 10 1 0\n 10 11 1 0\n 11 12 1 0\n 12 13 2 0\n 12 14 1 0\n 7 15 1 0\n 15 16 1 0\n 16 2 1 0\n 15 4 1 0\nM RGP 1 14 1\nM END\n",
19008
+ "smiles": "O=C1NC2CSC(CCCCC(=O)[*:1])C2N1",
19009
+ "polymerType": "RNA",
19010
+ "monomerType": "Undefined",
19011
+ "id": 3,
19012
+ "rgroups": [
19013
+ {
19014
+ "alternateId": "R1-OH",
19015
+ "capGroupName": "OH",
19016
+ "capGroupSMILES": "O[*:1]",
19017
+ "label": "R1"
19018
+ }
19019
+ ],
19020
+ "author": "Admin",
19021
+ "createDate": "2026-05-01T12:42:04.506Z",
19022
+ "meta": {
19023
+ "pubchemCID": 171548,
19024
+ "description": "Biotin tag — affinity / pull-down handle, also used as a delivery moiety.",
19025
+ "source": "PubChem CID 171548",
19026
+ "note": "Molfile from PubChem; R-group atom not embedded — for visualization only."
19027
+ }
19028
+ },
19029
+ {
19030
+ "symbol": "Toc",
19031
+ "name": "alpha-Tocopherol",
19032
+ "molfile": "\n RDKit 2D\n\n 31 32 0 0 0 0 0 0 0 0999 V2000\n -1.0421 -0.5934 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.5193 -0.8538 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -3.0324 -2.2634 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.0682 -3.4124 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -4.5096 -2.5239 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.0226 -3.9334 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n -5.4738 -1.3748 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -6.9510 -1.6353 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -4.9607 0.0348 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -3.4835 0.2952 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.9705 1.7048 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -3.9347 2.8538 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -4.4477 4.2634 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.6356 3.6038 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.3366 2.8538 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.0376 3.6038 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2615 2.8538 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.5605 3.6038 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2615 1.3538 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.5605 0.6038 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.5605 -0.8962 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.8596 -1.6462 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.8596 -3.1462 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.1586 -0.8962 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 6.4576 -1.6462 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 7.7567 -0.8962 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 9.0557 -1.6462 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 10.3547 -0.8962 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 9.0557 -3.1462 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.4119 2.5934 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.9249 1.1838 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\n 2 3 2 0\n 3 4 1 0\n 3 5 1 0\n 5 6 1 0\n 5 7 2 0\n 7 8 1 0\n 7 9 1 0\n 9 10 2 0\n 10 11 1 0\n 11 12 1 0\n 12 13 1 0\n 12 14 1 0\n 14 15 1 0\n 15 16 1 0\n 16 17 1 0\n 17 18 1 0\n 17 19 1 0\n 19 20 1 0\n 20 21 1 0\n 21 22 1 0\n 22 23 1 0\n 22 24 1 0\n 24 25 1 0\n 25 26 1 0\n 26 27 1 0\n 27 28 1 0\n 27 29 1 0\n 12 30 1 0\n 30 31 1 0\n 10 2 1 0\n 31 9 1 0\nM RGP 1 6 1\nM END\n",
19033
+ "smiles": "Cc1c(C)c([*:1])c(C)c2c1OC(C)(CCCC(C)CCCC(C)CCCC(C)C)CC2",
19034
+ "polymerType": "RNA",
19035
+ "monomerType": "Undefined",
19036
+ "id": 4,
19037
+ "rgroups": [
19038
+ {
19039
+ "alternateId": "R1-OH",
19040
+ "capGroupName": "OH",
19041
+ "capGroupSMILES": "O[*:1]",
19042
+ "label": "R1"
19043
+ }
19044
+ ],
19045
+ "author": "Admin",
19046
+ "createDate": "2026-05-01T12:43:04.558Z",
19047
+ "meta": {
19048
+ "pubchemCID": 14985,
19049
+ "description": "Vitamin E delivery moiety. Used in tocopherol-conjugated siRNA for systemic / liver delivery.",
19050
+ "source": "PubChem CID 14985",
19051
+ "note": "Molfile from PubChem; R-group atom not embedded — for visualization only."
19052
+ }
19053
+ },
19054
+ {
19055
+ "symbol": "Pal",
19056
+ "name": "Palmitic acid",
19057
+ "molfile": "\n RDKit 2D\n\n 18 17 0 0 0 0 0 0 0 0999 V2000\n 10.8975 -0.4583 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 9.5984 0.2917 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 8.2994 -0.4583 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 7.0004 0.2917 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.7013 -0.4583 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.4023 0.2917 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.1033 -0.4583 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.8042 0.2917 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.5052 -0.4583 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.7939 0.2917 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.0929 -0.4583 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -3.3919 0.2917 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -4.6910 -0.4583 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.9900 0.2917 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -7.2890 -0.4583 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -8.5881 0.2917 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -8.5881 1.7917 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n -9.8871 -0.4583 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\n 2 3 1 0\n 3 4 1 0\n 4 5 1 0\n 5 6 1 0\n 6 7 1 0\n 7 8 1 0\n 8 9 1 0\n 9 10 1 0\n 10 11 1 0\n 11 12 1 0\n 12 13 1 0\n 13 14 1 0\n 14 15 1 0\n 15 16 1 0\n 16 17 2 0\n 16 18 1 0\nM RGP 1 18 1\nM END\n",
19058
+ "smiles": "CCCCCCCCCCCCCCCC(=O)[*:1]",
19059
+ "polymerType": "RNA",
19060
+ "monomerType": "Undefined",
19061
+ "id": 5,
19062
+ "rgroups": [
19063
+ {
19064
+ "alternateId": "R1-OH",
19065
+ "capGroupName": "OH",
19066
+ "capGroupSMILES": "O[*:1]",
19067
+ "label": "R1"
19068
+ }
19069
+ ],
19070
+ "author": "Admin",
19071
+ "createDate": "2026-05-01T12:43:13.428Z",
19072
+ "meta": {
19073
+ "pubchemCID": 985,
19074
+ "description": "Lipophilic palmitoyl conjugate (C16 fatty acid).",
19075
+ "source": "PubChem CID 985",
19076
+ "note": "Molfile from PubChem; R-group atom not embedded — for visualization only."
19077
+ }
19078
+ },
19079
+ {
19080
+ "symbol": "DBCO",
19081
+ "name": "Dibenzocyclooctyne",
19082
+ "molfile": "\n RDKit 2D\n\n 32 37 0 0 0 0 0 0 0 0999 V2000\n 7.5117 -5.2438 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 6.0445 -5.5556 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.0408 -4.4409 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.5043 -3.0143 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.5006 -1.8996 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.0334 -2.2115 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.4233 -3.5818 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.9315 -3.4250 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 0.6196 -1.9578 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n -0.6794 -1.2078 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.6794 0.2922 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.9784 1.0422 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.9784 2.5422 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -3.2775 3.2922 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -4.5765 2.5422 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -5.8756 3.2922 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n -5.8756 4.7922 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -7.1746 5.5422 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -8.4736 4.7922 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n -9.7727 5.5422 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n -8.4736 3.2922 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -7.1746 2.5422 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -4.5765 1.0422 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -3.2775 0.2922 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.6196 1.0422 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.9187 0.2922 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 1.9187 -1.2078 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 4.9641 -0.4730 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 6.4314 -0.1612 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 7.4351 -1.2759 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 6.9715 -2.7025 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 7.9752 -3.8172 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 2 0\n 2 3 1 0\n 3 4 2 0\n 4 5 1 0\n 5 6 1 0\n 6 7 1 0\n 7 8 2 0\n 8 9 1 0\n 9 10 1 0\n 10 11 2 0\n 11 12 1 0\n 12 13 2 0\n 13 14 1 0\n 14 15 2 0\n 15 16 1 0\n 16 17 1 0\n 17 18 1 0\n 18 19 1 0\n 19 20 1 0\n 19 21 1 0\n 21 22 1 0\n 15 23 1 0\n 23 24 2 0\n 11 25 1 0\n 25 26 2 0\n 26 27 1 0\n 5 28 2 0\n 28 29 1 0\n 29 30 2 0\n 30 31 1 0\n 31 32 2 0\n 32 1 1 0\n 31 4 1 0\n 27 6 2 0\n 27 9 1 0\n 24 12 1 0\n 22 16 1 0\nM RGP 1 20 1\nM END\n",
19083
+ "smiles": "c1ccc2c(-c3cnn4cc(-c5ccc(N6CCN([*:1])CC6)cc5)cnc34)ccnc2c1",
19084
+ "polymerType": "RNA",
19085
+ "monomerType": "Undefined",
19086
+ "id": 6,
19087
+ "rgroups": [
19088
+ {
19089
+ "alternateId": "R1-OH",
19090
+ "capGroupName": "OH",
19091
+ "capGroupSMILES": "O[*:1]",
19092
+ "label": "R1"
19093
+ }
19094
+ ],
19095
+ "author": "Admin",
19096
+ "createDate": "2026-05-01T12:43:53.749Z",
19097
+ "meta": {
19098
+ "pubchemCID": 25195294,
19099
+ "description": "DBCO group for strain-promoted azide-alkyne click (SPAAC) conjugation.",
19100
+ "source": "PubChem CID 25195294",
19101
+ "note": "Molfile from PubChem; R-group atom not embedded — for visualization only."
18928
19102
  }
19103
+ }
18929
19104
  ]
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.27.6",
8
+ "version": "2.27.7",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,7 +44,7 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.64.1",
47
+ "@datagrok-libraries/bio": "^5.65.0",
48
48
  "@datagrok-libraries/chem-meta": "^1.2.9",
49
49
  "@datagrok-libraries/math": "^1.2.6",
50
50
  "@datagrok-libraries/ml": "^6.10.11",
@@ -498,6 +498,141 @@ category('toAtomicLevelHelmRna', async () => {
498
498
  expect(/S/.test(smiles), true, `expected sulfur: ${smiles}`);
499
499
  expect(/P/.test(smiles), true, `expected phosphorus: ${smiles}`);
500
500
  });
501
+
502
+ // 3'-end terminal modifier (GalNAc, R1 only). HELM puts it in the
503
+ // "phosphate" slot of the last triple, but it's actually a chain end.
504
+ // Expectations: chain ends at GalNAc (no extra OH cap), no phosphate
505
+ // at all, GalNAc structural features (acetamide N) are present.
506
+ test('rna-helm-3p-terminal-galnac', async () => {
507
+ const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[GalNAc]}$$$$V2.0`);
508
+ const pCount = (smiles.match(/P/g) || []).length;
509
+ expect(pCount, 0, `expected 0 phosphates (GalNAc replaces P): ${smiles}`);
510
+ expect(/N/.test(smiles), true, `expected nitrogen from GalNAc acetamide: ${smiles}`);
511
+ // Sanity: SMILES should not be RDKit's parse-failure sentinel.
512
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
513
+ `valid SMILES expected: ${smiles}`);
514
+ });
515
+
516
+ // 5'-end terminal modifier (Chol, R2 only) at the start of the chain.
517
+ // HELM puts Chol where the first sugar would be. With no trailing P,
518
+ // the chain is Chol → r(T)-3'-OH.
519
+ test('rna-helm-5p-terminal-chol', async () => {
520
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)}$$$$V2.0`);
521
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
522
+ `valid SMILES expected: ${smiles}`);
523
+ // Cholesterol has 4 fused rings (3 six-membered + 1 five-membered) — sanity-check
524
+ // by requiring at least 4 non-aromatic ring closures (digits 1-4) in the SMILES
525
+ // (cholesterol fragment alone uses ring closures 1-4).
526
+ expect(/1/.test(smiles) && /2/.test(smiles) && /3/.test(smiles) && /4/.test(smiles), true,
527
+ `expected cholesterol ring fragments: ${smiles}`);
528
+ });
529
+
530
+ // Chol at 5' with explicit trailing phosphate (the original failing case).
531
+ // Chain: Chol → r(T) → P-OH. Should produce exactly 1 phosphate.
532
+ test('rna-helm-5p-terminal-chol-with-trailing-phosphate', async () => {
533
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)p}$$$$V2.0`);
534
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
535
+ `valid SMILES expected: ${smiles}`);
536
+ const pCount = (smiles.match(/P/g) || []).length;
537
+ expect(pCount, 1, `expected exactly 1 phosphate: ${smiles}`);
538
+ });
539
+
540
+ // Both terminals at once: Chol at 5', GalNAc at 3', single nucleotide
541
+ // in between. No phosphates anywhere.
542
+ test('rna-helm-both-terminals', async () => {
543
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)[GalNAc]}$$$$V2.0`);
544
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
545
+ `valid SMILES expected: ${smiles}`);
546
+ const pCount = (smiles.match(/P/g) || []).length;
547
+ expect(pCount, 0, `expected 0 phosphates with both terminals: ${smiles}`);
548
+ });
549
+
550
+ // GalNAc oxygen-count regression. Previously the R1 placeholder atom
551
+ // (substituted to 'O' from the "OH" cap) was left in the assembly,
552
+ // adding a stray OH on the chain-attach carbon. lna(T)GalNAc has known
553
+ // expected SMILES with exactly 10 oxygens.
554
+ test('rna-helm-3p-terminal-galnac-no-extra-oh', async () => {
555
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[lna](T)[GalNAc]}$$$$V2.0`);
556
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
557
+ `valid SMILES expected: ${smiles}`);
558
+ // Count OXYGEN ATOMS only — uppercase O outside of brackets in standard
559
+ // SMILES denotes a non-aromatic oxygen. Ring-closure digits and atoms
560
+ // inside [] don't match this regex.
561
+ const oCount = (smiles.match(/O/g) || []).length;
562
+ expect(oCount, 10, `expected exactly 10 oxygen atoms in lna-T-GalNAc: ${smiles}`);
563
+ });
564
+
565
+ // sp (and similar phosphates with R-cap = H) used to disconnect the chain
566
+ // because the H placeholder was removed by removeHydrogen, leaving
567
+ // terminalNodes[0] pointing at the now-deleted atom. The result was a
568
+ // SMILES with two disconnected fragments separated by '.'. The fix:
569
+ // when the cap is H, leave terminalNodes[0] at its original
570
+ // setTerminalNodes value (the atom previously bonded to R1, e.g. P) so
571
+ // the chain bond goes there directly.
572
+ test('rna-helm-h-cap-phosphate-sp-connects', async () => {
573
+ const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[sp].r(A)}$$$$V2.0`);
574
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
575
+ `valid SMILES expected: ${smiles}`);
576
+ // No '.' → single connected fragment.
577
+ expect(smiles.indexOf('.') === -1, true,
578
+ `expected single fragment (no '.' separator): ${smiles}`);
579
+ // Exactly one phosphorus from the sp linker.
580
+ const pCount = (smiles.match(/P/g) || []).length;
581
+ expect(pCount, 1, `expected exactly 1 phosphate: ${smiles}`);
582
+ // sp carries a sulfur on the phosphate.
583
+ expect(/S/.test(smiles), true, `expected sulfur from sp: ${smiles}`);
584
+ });
585
+
586
+ // R-group swap heuristic: a single-R-group terminal monomer can be placed
587
+ // at either end of a HELM chain, even if its R-group label "should" only
588
+ // belong at one end. The conversion swaps rNodes so the existing
589
+ // TERMINAL_5P/3P role logic still works.
590
+ //
591
+ // Bio (R1 only) — naturally a 3'-terminal, but we accept it at 5' too.
592
+ test('rna-helm-bio-terminal-at-end', async () => {
593
+ const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[Bio]}$$$$V2.0`);
594
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
595
+ `valid SMILES expected: ${smiles}`);
596
+ // Bio replaces the trailing P → no phosphate at all.
597
+ const pCount = (smiles.match(/P/g) || []).length;
598
+ expect(pCount, 0, `expected 0 phosphates with Bio terminal: ${smiles}`);
599
+ // Single connected fragment.
600
+ expect(smiles.indexOf('.') === -1, true,
601
+ `expected single fragment: ${smiles}`);
602
+ });
603
+
604
+ test('rna-helm-bio-terminal-at-start', async () => {
605
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[Bio].r(T)}$$$$V2.0`);
606
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
607
+ `valid SMILES expected: ${smiles}`);
608
+ // Single connected fragment (Bio at start must connect to following sugar).
609
+ expect(smiles.indexOf('.') === -1, true,
610
+ `expected single fragment: ${smiles}`);
611
+ // No phosphates (Bio doesn't carry P, no trailing p in HELM).
612
+ const pCount = (smiles.match(/P/g) || []).length;
613
+ expect(pCount, 0, `expected 0 phosphates: ${smiles}`);
614
+ });
615
+
616
+ // Chol (R2 only) — naturally a 5'-terminal, but we accept it at 3' too.
617
+ test('rna-helm-chol-terminal-at-start', async () => {
618
+ // Already covered by rna-helm-5p-terminal-chol; this is the canonical case.
619
+ const smiles = await helmRnaLinearToSmiles(`RNA1{[Chol].r(T)}$$$$V2.0`);
620
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
621
+ `valid SMILES expected: ${smiles}`);
622
+ expect(smiles.indexOf('.') === -1, true, `expected single fragment: ${smiles}`);
623
+ });
624
+
625
+ test('rna-helm-chol-terminal-at-end', async () => {
626
+ const smiles = await helmRnaLinearToSmiles(`RNA1{r(T)[Chol]}$$$$V2.0`);
627
+ expect(smiles !== 'MALFORMED_INPUT_VALUE' && smiles.length > 10, true,
628
+ `valid SMILES expected: ${smiles}`);
629
+ // Single connected fragment.
630
+ expect(smiles.indexOf('.') === -1, true,
631
+ `expected single fragment: ${smiles}`);
632
+ // Chol replaces the trailing P → no phosphate.
633
+ const pCount = (smiles.match(/P/g) || []).length;
634
+ expect(pCount, 0, `expected 0 phosphates with Chol terminal: ${smiles}`);
635
+ });
501
636
  });
502
637
 
503
638
 
@@ -9,5 +9,5 @@ export const LIB_SETTINGS_FOR_TESTS: UserLibSettings =
9
9
  {explicit: ['HELMCoreLibrary.json', 'polytool-lib.json'], exclude: [], duplicateMonomerPreferences: {}};
10
10
 
11
11
  /** Summary for settings {@link LIB_SETTINGS_FOR_TESTS} */
12
- export const monomerLibForTestsSummary: MonomerLibSummaryType = {'PEPTIDE': 334, 'RNA': 383, 'CHEM': 0};
12
+ export const monomerLibForTestsSummary: MonomerLibSummaryType = {'PEPTIDE': 334, 'RNA': 390, 'CHEM': 0};
13
13
 
@@ -167,10 +167,11 @@ export class MonomerLibBase implements IMonomerLibBase {
167
167
 
168
168
  /** Get or create {@link Monomer} object (in case it is missing in monomer library current config) */
169
169
  let m: Monomer | null = this.getMonomer(pt, elem);
170
- if (m && biotype == HelmTypes.LINKER && (m[REQ.RGROUPS]?.length ?? 0) < 2) {
171
- // Web Editor expects null
172
- return null;
173
- }
170
+ // there can be linkers that have 1 rgroup that are terminal, so we allow that.
171
+ // if (m && biotype == HelmTypes.LINKER && (m[REQ.RGROUPS]?.length ?? 0) < 2) {
172
+ // // Web Editor expects null
173
+ // return null;
174
+ // }
174
175
  if (m && biotype == HelmTypes.SUGAR && (m[REQ.RGROUPS]?.length ?? 0) < 3) {
175
176
  // Web Editor expects null
176
177
  return null;
@@ -24,8 +24,16 @@ export async function toAtomicLevelSingle(sequence: DG.SemanticValue): Promise<{
24
24
  errorText = 'No sequence handler found';
25
25
  return {errorText, mol: ''};
26
26
  }
27
- if ((seqSh.getSplitted(sequence.cell.rowIndex, 60)?.length ?? 100) > 50) {
28
- errorText = 'Maximum number of monomers is 50';
27
+
28
+ let maxLength = 50;
29
+ if (seqSh.isHelm()) {
30
+ const splitted = seqSh.getSplitted(sequence.cell.rowIndex);
31
+ if (!splitted.graphInfo?.polymerTypes?.some((pt) => pt !== 'RNA'))
32
+ maxLength = 150;
33
+ }
34
+
35
+ if ((seqSh.getSplitted(sequence.cell.rowIndex)?.length ?? 100) > maxLength) {
36
+ errorText = 'Maximum number of monomers is ' + maxLength;
29
37
  return {errorText, mol: ''};
30
38
  }
31
39
  const singleValCol = DG.Column.fromStrings('singleVal', [sequence.value]);
@@ -73,8 +81,11 @@ export async function toAtomicLevelWidget(sequence: DG.SemanticValue): Promise<D
73
81
  const molSemanticValue = DG.SemanticValue.fromValueType(res.mol, DG.SEMTYPE.MOLECULE);
74
82
  const panel = ui.panels.infoPanel(molSemanticValue);
75
83
  let molPanel: DG.Widget | null = null;
76
- if (panel)
77
- molPanel = DG.Widget.fromRoot(panel.root);
84
+ if (panel) {
85
+ const acc = ui.accordion('Sequence Molfile details');
86
+ acc.addPane('Explore', () => panel.root);
87
+ molPanel = DG.Widget.fromRoot(acc.root);
88
+ }
78
89
 
79
90
 
80
91
  const root = grok.chem.drawMolecule(res.mol, 300, 300, false);