npm - @datagrok/bio - Versions diffs - 1.7.9 → 1.7.13 - Mend

@datagrok/bio 1.7.9 → 1.7.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/detectors.js +15 -1
package/dist/package-test.js +634 -156
package/dist/package.js +562 -147
package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +3 -3
package/files/tests/testSpgi100.csv +8437 -0
package/files/tests/testUnichemSources.csv +36 -0
package/package.json +10 -10
package/scripts/embed.py +13 -0
package/src/const.ts +5 -0
package/src/package.ts +94 -4
package/src/tests/activity-cliffs-tests.ts +3 -0
package/src/tests/convert-test.ts +42 -9
package/src/tests/detectors-test.ts +37 -4
package/src/tests/renderers-test.ts +1 -1
package/src/utils/cell-renderer.ts +114 -1
package/src/utils/constants.ts +2 -1
package/src/utils/convert.ts +13 -10
package/src/utils/utils.ts +53 -3
package/src/widgets/representations.ts +54 -0
package/{test-Bio-34f75e5127b8-0a8a5821.html → test-Bio-34f75e5127b8-781e9df9.html} +20 -12

package/files/tests/testUnichemSources.csv ADDED Viewed

@@ -0,0 +1,36 @@
+src_id,name,description,current_release_u,created,last_updated,userstamp,number_or_date,name_long,src_details,src_url,base_id_url,base_id_url_available,aux_required,aux_for_url,aux_description,private,name_label,src_class,keys_only,update_min,update_opt,update_max,update_j,update_comments,synonyms
+1,chembl,A database of bioactive drug-like small molecules and bioactivities abstracted from the scientific literature.,17,31-DEC-2010 09:00:01,01-JUN-2018 00:00:00,autoloader,number,ChEMBL,Standard InChIs and Keys provided on ftp site for each release.,https://www.ebi.ac.uk/chembl/,https://www.ebi.ac.uk/chembldb/compound/inspect/,1,0,0,none,0,ChEMBL,2,0,280,290,330,0,approx 6 monthly updates,0
+3,pdb,"The European resource for the collection, organisation and dissemination of data on biological macromolecular structures, including structures of small molecule ligands for proteins.",229,28-SEP-2011 09:00:01,31-DEC-2018 00:00:00,autoloader,date,PDBe (Protein Data Bank Europe),Standard InChIs and Keys provided by direct querying of Oracle DB.,http://www.ebi.ac.uk/pdbe/,http://www.ebi.ac.uk/pdbe-srv/pdbechem/chemicalCompound/show/,1,0,0,none,0,PDBe,2,0,6,7,14,0,weekly updates,0
+2,drugbank,"A database that combines drug (i.e. chemical, pharmacological and pharmaceutical) data with drug target (i.e. sequence, structure, and pathway) information.",32,28-MAY-2011 09:00:01,06-JUL-2018 00:00:00,autoloader,date,DrugBank,Standard InChIs and Keys provided within sd file on ftp site for each release.,http://drugbank.ca/,http://www.drugbank.ca/drugs/,1,0,0,none,0,DrugBank,2,0,90,95,130,0,Quarterly updates,
+5,pubchem_dotf,A subset of the PubChem DB: from the original depositor 'drugs of the future' (Prous).,2,25-OCT-2011 07:00:00,29-JUL-2013 00:00:00,autoloader,date,PubChem ('Drugs of the Future' subset),"Mol files for SIDs downloaded manually, via PubChem interface, and Standard InChIs and Keys generated by InChI software. SIDs used as identifiers.",http://pubchem.ncbi.nlm.nih.gov/sources/sources.cgi,http://pubchem.ncbi.nlm.nih.gov/substance/,1,0,0,none,0,PubChem: Drugs of the Future ,2,0,100000,100000,100000,0,Considered a static deposition into pubchem,
+4,gtopdb,"The IUPHAR (International Union of Basic and Clinical Pharmacology)/BPS (British Pharmacological Society) Guide to PHARMACOLOGY database contains structures of small molecule ligands, peptides and antibodies, with their affinities at protein targets.",17,21-OCT-2011 09:00:01,21-NOV-2018 00:00:00,autoloader,date,Guide to Pharmacology,Standard InChIs and Keys available for download at http://www.guidetopharmacology.org/download.jsp,http://www.guidetopharmacology.org,http://www.guidetopharmacology.org/GRAC/LigandDisplayForward?ligandId=,1,0,0,none,0,Guide to Pharmacology,2,0,90,95,130,0,Quarterly updates,
+11,ibm,"The data are provided by IBM-NIH and include all chemistry extracted by means of text and image mining from the patent corpus (USPTO, WIPO and EPO) for patent documents published through 31-12-2010. Identifiers in UniChem are IBM compound identifiers.",2,16-APR-2012 09:00:01,11-FEB-2015 00:00:00,autoloader,date,IBM strategic IP insight platform and the National Institutes of Health,InChIs and InChI keys were generated from SMILES in house.,http://www-935.ibm.com/services/us/gbs/bao/siip/nih/,http://www-935.ibm.com/services/us/gbs/bao/siip/nih/?sid=,1,0,0,none,0,IBM Patent System,2,0,100000,100000,100000,0,static set,
+6,kegg_ligand,"KEGG LIGAND is a composite DB consisting of COMPOUND, GLYCAN, REACTION, RPAIR, RCLASS, and ENZYME DBs, whose entries are identified by C, G, R, RP, RC, and EC numbers, respectively.",1,25-JUN-2011 09:00:01,27-JUN-2011 00:00:00,autoloader,date,KEGG (Kyoto Encyclopedia of Genes and Genomes) Ligand,Mol files were downloaded manually prior to this download becoming private. Standard InChIs and Keys generated by InChI software.,http://www.genome.jp/kegg/ligand.html,http://www.genome.jp/dbget-bin/www_bget?,1,0,0,none,0,KEGG Ligand,2,0,100000,100000,100000,0,downloads now no longer available,
+9,zinc,"A free database of commercially-available compounds for virtual screening, provided by the Shoichet Laboratory in the Department of Pharmaceutical Chemistry at the University of California, San Francisco (UCSF).  [Irwin and Shoichet, J. Chem. Inf. Model. 2005;45(1):177-82]",6,11-MAR-2012 09:00:01,21-AUG-2017 00:00:00,autoloader,date,ZINC,"novirtual subset of ZINC15, as a file containing inchis and keys from http://files.docking.org/export/unichem/",http://zinc15.docking.org,http://zinc15.docking.org/substances/,1,0,0,none,0,ZINC,3,0,1290,1295,1305,1,monthly updates,
+8,nih_ncc,Collections of plated arrays of small molecules that have a history of use in human clinical trials. Assembled by the National Institutes of Health (NIH) through the Molecular Libraries Roadmap Initiative,1,26-NOV-2011 09:00:01,02-DEC-2011 00:00:00,autoloader,date,NIH Clinical Collection,Mol files downloaded manually and Standard InChIs and Keys generated by InChI software,http://nihsmr.evotec.com/evotec/,,0,0,0,none,0,NIH Clinical Collection,2,0,100000,100000,100000,0,"static set, not updated",
+10,emolecules,"A free chemical structure search engine containing millions of public domain structures. Pricing, availabilities, and vendor information requires an eMolecules Plus subscription.",1,13-MAR-2012 09:00:01,15-MAR-2012 00:00:00,autoloader,date,eMolecules,"Downloaded as an SD file from source, Converted to InChI and INChIKeys by UniChem",https://www.emolecules.com/,https://www.emolecules.com/cgi-bin/more?vid=,1,0,0,none,0,eMolecules,3,0,100000,100000,100000,0,downloads now no longer available,
+12,atlas,"The Gene Expression Atlas is a semantically enriched database of meta-analysis based summary statistics over a curated subset of ArrayExpress Archive, servicing queries for condition-specific gene expression patterns as well as broader exploratory searches for biologically interesting genes/samples.",1,18-JUN-2012 09:00:01,28-SEP-2012 00:00:00,autoloader,date,Gene Expression Atlas,Currently extracted from compound names.,http://www.ebi.ac.uk/gxa/home,http://www.ebi.ac.uk/gxa/query?conditionQuery=,1,0,0,none,0,Atlas,2,0,100000,100000,100000,0,manual updates,0
+7,chebi,ChEBI is a freely available dictionary of molecular entities focused on 'small' chemical compounds,62,24-NOV-2011 09:00:01,04-JAN-2019 00:00:00,autoloader,date,ChEBI (Chemical Entities of Biological Interest).,Std InChis (but no keys) provided on ftp site. Keys generated by UniChem. 'all star' compounds downloaded,http://www.ebi.ac.uk/chebi/downloadsForward.do,http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A,1,0,0,none,0,ChEBI,2,0,25,30,50,0,monthly updates,
+14,fdasrs,"The primary goal of the FDA/USP Substance Registration System (SRS) is to unambiguously define all substances present in regulated products. Once a substance has been defined, the SRS assigns a strong identifier that is permanently associated with the substance: a UNII (Unique Ingredient Identifier). This is a a non-proprietary, free, unique, unambiguous, nonsemantic, alphanumeric identifier based on a substances molecular structure and/or descriptive information.",20,05-APR-2013 00:00:00,07-NOV-2018 00:00:00,autoloader,date,FDA/USP Substance Registration System (SRS),Download of InChIKeys in file UNII Data  from http://fdasis.nlm.nih.gov/srs/jsp/srs/uniiListDownload.jsp,http://fdasis.nlm.nih.gov/srs/srs.jsp,http://fdasis.nlm.nih.gov/srs/ProxyServlet?mergeData=true&objectHandle=DBMaint&APPLICATION_NAME=fdasrs&actionHandle=default&nextPage=jsp/srs/ResultScreen.jsp&TXTSUPERLISTID=,1,0,0,none,0,FDA SRS,2,1,90,95,130,0,presumed to be quartely updates,
+15,surechembl,"SureChEMBL automatically extracts chemistry from the full text of all major patent authorities. Compounds are derived from either chemical names found in text or in chemical depictions. All SureChEMBL compounds are included, except those failing UniChem loading rules.",207,05-APR-2013 00:00:00,01-JAN-2019 00:00:00,autoloader,date,SureChEMBL,Standard InChIs and InChIKeys provided by a direct feed from the SureChEMBL database,https://www.surechembl.org/search/,https://www.surechembl.org/chemical/,1,0,0,none,0,SureChEMBL,2,0,5,7,9,1,weekly,
+21,pubchem_tpharma,A subset of the PubChem DB: from the original depositor 'Thomson Pharma'.,1,29-JUL-2013 00:00:00,30-JUL-2013 00:00:00,autoloader,date,PubChem ('Thomson Pharma' subset),"Mol files for SIDs downloaded manually, via PubChem interface, and Standard InChIs and Keys generated by InChI software. SIDs used as identifiers.",http://www.thomson-pharma.com/,http://pubchem.ncbi.nlm.nih.gov/substance/,1,0,0,none,0,PubChem: Thomson Pharma ,2,0,100000,100000,100000,0,Considered a static deposition into pubchem,
+22,pubchem,A database of normalized PubChem compounds (CIDs) from the PubChem Database.,112,21-OCT-2013 00:00:00,25-DEC-2018 00:00:00,autoloader,date,PubChem Compounds,Standard InChIs and Keys provided on ftp site.,http://pubchem.ncbi.nlm.nih.gov,http://pubchem.ncbi.nlm.nih.gov/compound/,1,0,0,none,0,PubChem,2,0,12,14,18,1,fortnightly updates,
+27,recon,A biochemical knowledge-base on human metabolism,1,13-OCT-2014 00:00:00,13-OCT-2014 00:00:00,autoloader,date,Recon,Standard InChIs and Keys provided by email,http://vmh.uni.lu,http://vmh.uni.lu/#metabolite/,1,0,0,none,0,Recon,2,0,100000,100000,100000,0,none,
+28,molport,MolPort. A database designed to assist users find commercial sources of compounds. Access requires (free) registration. Only stock compounds included from Nov 2017.,15,17-OCT-2014 00:00:00,13-DEC-2018 00:00:00,autoloader,date,MolPort,"Standard InChIs and Keys provided on from MolPort ftp site, access on request.",https://www.molport.com/shop/index,https://www.molport.com/shop/molecule-link/,1,0,0,none,0,MolPort,2,0,25,30,40,1,monthly updates,
+31,bindingdb,"A public, web-accessible database of measured binding affinities, focusing chiefly on the interactions of proteins considered to be drug-targets with small, drug-like molecules",48,27-FEB-2015 00:00:00,02-JAN-2019 00:00:00,autoloader,date,BindingDB,Standard InChIs and Keys available within a tsv file from download page.,https://www.bindingdb.org/bind/index.jsp,http://www.bindingdb.org/bind/chemsearch/marvin/MolStructure.jsp?monomerid=,1,0,0,none,0,BindingDB,2,0,27,30,34,0,Monthly updates,
+29,nikkaji,Nakkaji (The Japan Chemical Substance Dictionary) is an organic compound dictionary database prepared by the Japan Science and Technology Agency (JST).,4,15-JAN-2015 00:00:00,09-APR-2018 00:00:00,autoloader,date,Nikkaji,Standard InChIs and Keys available from ftp site,http://jglobal.jst.go.jp/en/,http://jglobal.jst.go.jp/en/redirect?Nikkaji_No=,1,0,0,none,0,Nikkaji,2,0,350,365,380,1,annual updates,
+32,comptox,"The foundation of chemical safety testing relies on chemistry information such as high-quality chemical structures and physicochemical properties. This information is used by scientists to predict the potential health risks of chemicals.The CompTox Dashboard is part of a suite of dashboards developed by EPA to help evaluate the safety of chemicals. It provides access to a variety of data and information on over 700,000 chemicals currently in use and of interest to environmental researchers. Within the CompTox Dashboard, users can access chemical structures, experimental and predicted physicochemical and toxicity data, and additional links to relevant websites and applications. It maps curated physicochemical property data associated with chemical substances to their corresponding chemical structures",2,12-JUL-2016 00:00:00,26-NOV-2018 00:00:00,autoloader,date,EPA (Environmental Protection Agency) CompTox Dashboard,Standard InChIs and Keys obtained from download page,https://comptox.epa.gov/dashboard/,https://comptox.epa.gov/dashboard/,1,0,0,none,0,EPA CompTox Dashboard,2,0,100000,100000,100000,0,none,
+33,lipidmaps,"LIPID Metabolites And Pathways Strategy (LIPID MAPS) is a multi-institutional effort created to identify and quantitate, using a systems biology approach and sophisticated mass spectrometers, all of the major, and many minor, lipid species in mammalian cells, as well as to quantitate the changes in these species in response to perturbation",9,07-DEC-2016 00:00:00,07-DEC-2018 00:00:00,autoloader,date,LipidMaps,Standard InChIs and Keys obtained from download page,http://www.lipidmaps.org,http://www.lipidmaps.org/data/LMSDRecord.php?LMID=,1,0,0,none,0,LipidMaps,2,0,90,95,130,0,Quarterly,
+35,carotenoiddb,"A Database of information on naturally occurring carotenoids from many organisms, extracted  from the literature.",10,19-MAY-2017 00:00:00,11-JUL-2018 00:00:00,autoloader,date,Carotenoid Database,Standard InChIs and Keys available as a download,http://carotenoiddb.jp/index.html,http://carotenoiddb.jp/Entries/,1,0,0,none,0,CarotenoidDB,2,0,27,30,34,0,monthly,
+36,metabolights,"A database for Metabolomics experiments and derived information. The database is cross-species, cross-technique and covers metabolite structures and their reference spectra as well as their biological roles, locations and concentrations, and experimental data from metabolic experiments.",8,12-JUN-2017 00:00:00,06-DEC-2018 00:00:00,autoloader,date,Metabolights,Standard InChIs and Keys available from FTP site,http://www.ebi.ac.uk/metabolights/,http://www.ebi.ac.uk/metabolights/,1,0,0,none,0,Metabolights,2,0,25,30,50,0,monthly,
+37,brenda,A comprehensive Enzyme Information system containing enzyme functional data extracted directly from the primary literature.,3,03-JUL-2017 00:00:00,30-AUG-2018 00:00:00,autoloader,date,Brenda,Standard InChIKeys available as a download,http://www.brenda-enzymes.org/index.php,http://www.brenda-enzymes.org/ligand.php?brenda_ligand_id=,1,0,0,none,0,Brenda,2,1,180,190,230,0,"6 monthly, Jan and July",
+17,pharmgkb,PharmGKB (Pharmacogenomics Knowledgebase) is a comprehensive resource that curates knowledge about the impact of genetic variation on drug response for clinicians and researchers.,20,24-APR-2013 00:00:00,06-DEC-2018 00:00:00,autoloader,date,PharmGKB,drugs.zip file from from download site https://www.pharmgkb.org/downloads/. Smiles contained in this file Converted to Std InChIs internally,http://www.pharmgkb.org,http://www.pharmgkb.org/drug/,1,0,0,none,0,PharmGKB,2,0,25,30,40,0,monthly updates,
+18,hmdb,"The Human Metabolome Database (HMDB) is a freely available electronic database containing detailed information about small molecule metabolites found in the human body. It is intended to be used for applications in metabolomics, clinical chemistry, biomarker discovery and general education. The database is designed to contain or link three kinds of data: 1) chemical data, 2) clinical data, and 3) molecular biology/biochemistry data",10,06-JUN-2013 00:00:00,18-JUL-2018 00:00:00,autoloader,date,Human Metabolome Database (HMDB),"SD file download from source, converted to InChIs within UniChem",http://www.hmdb.ca,http://www.hmdb.ca/metabolites/,1,0,0,none,0,Human Metabolome Database,2,0,60,65,70,0,presumed to be annual,
+24,nmrshiftdb2,"An NMR database (web database) for organic structures and their nuclear magnetic resonance (nmr) spectra. It allows for spectrum prediction (13C, 1H and other nuclei) as well as for searching spectra, structures and other properties. Last not least, it features peer-reviewed submission of datasets by its users.",58,01-MAY-2014 07:07:07,18-DEC-2018 00:00:00,autoloader,date,NMRShiftDB,Standard InChI and Keys available for download at http://nmrshiftdb.nmr.uni-koeln.de/nmrshiftdb2unichem.txt,http://nmrshiftdb.nmr.uni-koeln.de/portal/media-type/html/user/anon/page/default.psml/js_pane/P-Home,http://nmrshiftdb.org/molecule/,1,0,0,none,0,NMRShiftDB,2,0,25,30,40,0,monthly updates,
+25,lincs,The LINCS DCIC facilitates and standardized the information relevant to LINCS assays as described in http://www.lincsproject.org/data/data-standards/,13,11-JUL-2014 00:00:00,12-SEP-2018 00:00:00,autoloader,date,Library of Integrated Network-based Cellular Signatures,Standard InChIs and Keys downloadable fromhttp://lincs-dcic.org/metadata/SmallMolecules,http://www.lincsproject.org/,http://identifiers.org/lincs.smallmolecule/,1,0,0,none,0,LINCS,2,0,90,95,130,0,quarterly updates,
+39,chemicalbook,An online knowledge-base of chemicals and a platform of the Chinese domestic vendors in chemical industry,8,06-JUN-2018 14:00:00,03-JAN-2019 00:00:00,autoloader,date,ChemicalBook,Standard InChIs and Keys provided by email.,https://www.chemicalbook.com,https://www.chemicalbook.com/ChemicalProductProperty_EN_,1,0,0,none,0,ChemicalBook,2,0,25,30,40,0,approx 3 monthly updates,0
+20,selleck,"Selleck Chemicals is a supplier of biochemical products, including over 1,000 inhibitor products",1,25-JUL-2013 00:00:00,26-JUL-2013 00:00:00,autoloader,date,Selleck,SD file provided by email. InChIs generated by UniChem,http://www.selleckchem.com,http://www.selleckchem.com/products/,1,1,1,Auxiliary data required to create URL. Append to bse URL instead of src_compound_id,0,Selleck,1,0,100000,100000,100000,0,Need to evaluate - currently default static set,
+23,mcule,An online drug discovery platform with virtual screening and molecular modelling services.,2,17-JAN-2014 00:00:00,29-FEB-2016 00:00:00,autoloader,date,Mcule,Standard InChIs and Keys provided by email.,https://mcule.com,https://mcule.com/,1,0,0,none,0,Mcule,3,0,100000,100000,100000,1,quarterly updates,
+26,actor,ACToR (Aggregated Computational Toxicology Resource),1,02-OCT-2014 00:00:00,06-OCT-2014 00:00:00,autoloader,date,ACToR,Standard InChIs and Keys generated from SMILES from DB download,https://actor.epa.gov,http://actor.epa.gov/actor/chemical.xhtml?casrn=,1,0,0,none,0,ACToR,2,0,100000,100000,100000,0,none,
+34,drugcentral,"DrugCentral is an online drug information resource created and maintained by Division of Translational Informatics at University of New Mexico, providing information on active ingredients chemical entities, pharmaceutical products, drug mode of action, indications, pharmacologic action",5,24-FEB-2017 00:00:00,09-NOV-2018 00:00:00,autoloader,date,DrugCentral,Standard InChIs and Keys available as a download.,http://drugcentral.org,http://drugcentral.org/drugcard/,1,0,0,none,0,DrugCentral,2,0,90,95,130,0,Quarterly,
+38,rhea,An expert curated resource of biochemical reactions designed for the annotation of enzymes and genome-scale metabolic networks and models,14,15-NOV-2017 00:00:00,14-DEC-2018 00:00:00,autoloader,date,Rhea,src_compound_ids  extracted from ChEBI download file.,http://www.rhea-db.org,http://www.rhea-db.org/searchresults?q=CHEBI:,1,0,0,none,0,Rhea,2,0,29,30,32,0,monthly,

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "@datagrok/bio",
   "beta": false,
   "friendlyName": "Bio",
-  "version": "1.7.9",
+  "version": "1.7.13",
   "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
   "repository": {
     "type": "git",
@@ -11,25 +11,25 @@
   },
   "dependencies": {
     "@biowasm/aioli": ">=2.4.0",
-    "@datagrok-libraries/bio": "^2.8.4",
-    "@datagrok-libraries/utils": "^1.0.0",
-    "@datagrok-libraries/ml": "^2.0.10",
+    "@datagrok-libraries/bio": "^2.8.6",
+    "@datagrok-libraries/ml": "^3.0.0",
+    "@datagrok-libraries/utils": "^1.4.0",
     "cash-dom": "latest",
-    "datagrok-api": "^1.4.12",
-    "dayjs": "latest",
+    "datagrok-api": "^1.5.1",
+    "dayjs": "^1.11.4",
+    "openchemlib": "6.0.1",
     "rxjs": "^6.5.5",
     "ts-loader": "^9.2.5",
-    "typescript": "^4.4.2",
-    "openchemlib": "6.0.1"
+    "typescript": "^4.4.2"
   },
   "devDependencies": {
     "@types/jest": "^27.0.0",
     "@typescript-eslint/eslint-plugin": "latest",
     "@typescript-eslint/parser": "latest",
-    "eslint": "latest",
+    "eslint": "^8.20.0",
     "eslint-config-google": "latest",
     "jest": "^27.0.0",
-    "jest-html-reporter": "^3.5.0",
+    "jest-html-reporter": "^3.6.0",
     "puppeteer": "^13.7.0",
     "ts-jest": "^27.0.0",
     "webpack": "latest",

package/scripts/embed.py ADDED Viewed

@@ -0,0 +1,13 @@
+#name: Embed
+#language: python
+#input: string molecule
+#output: string sdf
+from rdkit.Chem import AllChem
+from rdkit import Chem
+mol = AllChem.MolFromMolBlock(molecule) if ("M  END" in molecule) else AllChem.MolFromSmiles(molecule)
+AllChem.EmbedMolecule(mol, AllChem.ETKDG())
+#AllChem.UFFOptimizeMolecule(mol)
+#mol = Chem.RemoveHs(mol)
+sdf = Chem.MolToMolBlock(mol)

package/src/const.ts CHANGED Viewed

@@ -23,3 +23,8 @@ export const CAP_GROUP_NAME = 'capGroupName';
 export const RGROUP_LABEL = 'label';
 export const MONOMER_SYMBOL = 'symbol';
 export const SDF_MONOMER_NAME = 'MonomerName';
+// range of hex nubers used in PepSea library to endode monomers
+export const MONOMER_ENCODE_MIN = 0x100;
+export const MONOMER_ENCODE_MAX = 0x40A;

package/src/package.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import * as DG from 'datagrok-api/dg';
 export const _package = new DG.Package();
+import {AlignedSequenceDifferenceCellRenderer, AminoAcidsCellRenderer} from './utils/cell-renderer';
 import {WebLogo, SeqColStats} from '@datagrok-libraries/bio/src/viewers/web-logo';
 import {VdRegionsViewer} from './viewers/vd-regions-viewer';
 import {runKalign, testMSAEnoughMemory} from './utils/multiple-sequence-alignment';
@@ -15,10 +16,12 @@ import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
 import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
 import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
 import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
-import {createJsonMonomerLibFromSdf, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
+import {createJsonMonomerLibFromSdf, encodeMonomers, getMolfilesFromSeq, HELM_CORE_LIB_FILENAME} from './utils/utils';
 import {getMacroMol} from './utils/atomic-works';
 import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
 import {convert} from './utils/convert';
+import {lru} from './utils/cell-renderer';
+import {representationsWidget} from './widgets/representations';
 //tags: init
 export async function initBio(): Promise<void> {
@@ -29,6 +32,11 @@ export async function initBio(): Promise<void> {
   });
 }
+//name: Lru
+//output: object lruCache
+export function Lru() {
+  return lru;
+}
 //name: macromoleculeSequenceCellRenderer
 //tags: cellRenderer
@@ -104,7 +112,9 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
   similarity: number, methodName: string): Promise<void> {
   if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
     return;
+  const encodedCol = encodeMonomers(macroMolecule);
+  if (!encodedCol)
+    return;
   const axesNames = getEmbeddingColsNames(df);
   const options = {
     'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
@@ -113,6 +123,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
   await getActivityCliffs(
     df,
     macroMolecule,
+    encodedCol,
     axesNames,
     'Activity cliffs',
     activities,
@@ -138,10 +149,12 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
   similarityMetric: string = 'Levenshtein', plotEmbeddings: boolean): Promise<void> {
   if (!checkInputColumn(macroMolecule, 'Activity Cliffs'))
     return;
+  const encodedCol = encodeMonomers(macroMolecule);
+  if (!encodedCol)
+    return;
   const embedColsNames = getEmbeddingColsNames(table);
   const chemSpaceParams = {
-    seqCol: macroMolecule,
+    seqCol: encodedCol,
     methodName: methodName,
     similarityMetric: similarityMetric,
     embedAxesNames: embedColsNames
@@ -267,6 +280,17 @@ function parseMacromolecule(
   return seqArray.join('');
 }
+//name: Representations
+//tags: panel, widgets
+//input: cell macroMolecule {semType: Macromolecule}
+//output: widget result
+export async function peptideMolecule(macroMolecule: DG.Cell): Promise<DG.Widget> {
+  const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
+  const monomersLibObject: any[] = JSON.parse(monomersLibFile);
+  return representationsWidget(macroMolecule, monomersLibObject);
+}
 //name: importFasta
 //description: Opens FASTA file
 //tags: file-handler
@@ -340,3 +364,69 @@ export function importFasta(fileContent: string): DG.DataFrame [] {
 export function convertPanel(col: DG.Column): void {
   convert(col);
 }
+//name: aminoAcidsCellRenderer
+//tags: cellRenderer
+//meta.cellType: aminoAcids
+//output: grid_cell_renderer result
+export function aminoAcidsCellRenderer(): AminoAcidsCellRenderer {
+  return new AminoAcidsCellRenderer();
+}
+//name: alignedSequenceDifferenceCellRenderer
+//tags: cellRenderer
+//meta.cellType: alignedSequenceDifference
+//output: grid_cell_renderer result
+export function alignedSequenceDifferenceCellRenderer(): AlignedSequenceDifferenceCellRenderer {
+  return new AlignedSequenceDifferenceCellRenderer();
+}
+//name: testDetectMacromolecule
+//input: string path {choices: ['Demo:Files/', 'System:AppData/']}
+//output: dataframe result
+export async function testDetectMacromolecule(path: string): Promise<DG.DataFrame> {
+  const pi = DG.TaskBarProgressIndicator.create('Test detectMacromolecule...');
+  const fileList = await grok.dapi.files.list(path, true, '');
+  const fileListToTest = fileList.filter((fi) => fi.fileName.endsWith('.csv'));
+  let readyCount = 0;
+  const res = [];
+  for (const fileInfo of fileListToTest) {
+    try {
+      const csv = await grok.dapi.files.readAsText(path + fileInfo.fullPath);
+      const df = DG.DataFrame.fromCsv(csv);
+      for (const col of df.columns) {
+        const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
+        if (semType === DG.SEMTYPE.MACROMOLECULE) {
+          //console.warn(`file: ${fileInfo.path}, column: ${col.name}, ` +
+          //  `semType: ${semType}, units: ${col.getTag(DG.TAGS.UNITS)}`);
+          // console.warn('file: "' + fileInfo.path + '", semType: "' + semType + '", ' +
+          //   'units: "' + col.getTag('units') + '"');
+          res.push({
+            file: fileInfo.path, result: 'detected', column: col.name,
+            message: `units: ${col.getTag('units')}`
+          });
+        }
+      }
+    } catch (err: unknown) {
+      // console.error('file: ' + fileInfo.path + ', error: ' + ex.toString());
+      res.push({
+        file: fileInfo.path, result: 'error', column: null,
+        message: err instanceof Error ? err.message : (err as Object).toString(),
+      });
+    } finally {
+      readyCount += 1;
+      pi.update(100 * readyCount / fileListToTest.length, `Test ${fileInfo.fileName}`);
+    }
+  }
+  grok.shell.info('Test Demo:Files for detectMacromolecule finished.');
+  pi.close();
+  const resDf = DG.DataFrame.fromObjects(res)!;
+  resDf.name = `datasets_detectMacromolecule_${path}`;
+  return resDf;
+}

package/src/tests/activity-cliffs-tests.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import {readDataframe} from './utils';
 import {getEmbeddingColsNames, sequenceSpace} from '../utils/sequence-space';
 import {drawTooltip, sequenceGetSimilarities} from '../utils/sequence-activity-cliffs';
 import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
+import { encodeMonomers } from '../utils/utils';
 category('activityCliffs', async () => {
@@ -31,9 +32,11 @@ category('activityCliffs', async () => {
     const options = {
       'SPE': {cycles: 2000, lambda: 1.0, dlambda: 0.0005},
     };
+    const encodedCol = encodeMonomers(actCliffsDf.col('MSA')!) as DG.Column;
     const scatterPlot = await getActivityCliffs(
       actCliffsDf,
       actCliffsDf.col('MSA')!,
+      encodedCol,
       axesNames,
       'Activity cliffs',
       actCliffsDf.col('Activity')!,

package/src/tests/convert-test.ts CHANGED Viewed

@@ -4,7 +4,8 @@ import * as grok from 'datagrok-api/grok';
 import * as DG from 'datagrok-api/dg';
 import {ConverterFunc} from './types';
-import {NOTATION, NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
+import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
+import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
 // import {mmSemType} from '../const';
 // import {importFasta} from '../package';
@@ -25,7 +26,14 @@ category('converters', () => {
     fastaGaps = 'fastaGaps',
     separatorGaps = 'separatorGaps',
-    helmGaps = 'helmGaps'
+    helmGaps = 'helmGaps',
+    helmLoneDeoxyribose = 'helmLoneDeoxyribose',
+    helmLoneRibose = 'helmLoneRibose',
+    helmLonePhosphorus = 'helmLonePhosphorus',
+    fastaLoneDeoxyribose = 'fastaLoneDeoxyribose',
+    fastaLoneRibose = 'fastaLoneRibose',
+    fastaLonePhosphorus = 'fastaLonePhosphorus',
   }
   const _csvTxts: { [key: string]: string } = {
@@ -74,7 +82,6 @@ RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$
 RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$
 RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$
 `,
     fastaGaps: `seq
 FW-PH-EYY
 FYNRQWYV-
@@ -89,6 +96,21 @@ F/K/P//Q//S/E/Y/V
 PEPTIDE1{F.W.*.P.H.*.E.Y.Y}$$$
 PEPTIDE1{F.Y.N.R.Q.W.Y.V.*}$$$
 PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$
+`,
+    helmLoneDeoxyribose: `seq
+DNA1{D(A).D(C).D(G).D(T).D(C)}$$$
+DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}$$$
+DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}$$$
+`,
+    helmLoneRibose: `seq
+RNA1{R(A).R(C).R(G).R(U).R(C)}$$$
+RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}$$$
+RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}$$$
+`,
+    helmLonePhosphorus: `seq
+RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$
+RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}$$$
+RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
 `,
   };
@@ -195,24 +217,35 @@ PEPTIDE1{F.K.P.*.Q.*.S.E.Y.V}$$$
   // HELM tests
   // helm -> fasta
-  test('HelmDnaToFasta', async () => {
+  test('testHelmDnaToFasta', async () => {
     await _testConvert(Samples.helmDna, converter(NOTATION.FASTA), Samples.fastaDna);
   });
-  test('HelmRnaToFasta', async () => {
+  test('testHelmRnaToFasta', async () => {
     await _testConvert(Samples.helmRna, converter(NOTATION.FASTA), Samples.fastaRna);
   });
-  test('HelmPtToFasta', async () => {
+  test('testHelmPtToFasta', async () => {
     await _testConvert(Samples.helmPt, converter(NOTATION.FASTA), Samples.fastaPt);
   });
   // helm -> separator
-  test('HelmDnaToSeparator', async () => {
+  test('testHelmDnaToSeparator', async () => {
     await _testConvert(Samples.helmDna, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
   });
-  test('HelmRnaToSeparator', async () => {
+  test('testHelmRnaToSeparator', async () => {
     await _testConvert(Samples.helmRna, converter(NOTATION.SEPARATOR, '*'), Samples.separatorRna);
   });
-  test('HelmPtToSeparator', async () => {
+  test('testHelmPtToSeparator', async () => {
     await _testConvert(Samples.helmPt, converter(NOTATION.SEPARATOR, '-'), Samples.separatorPt);
   });
+  // helm miscellaneous
+  test('testHelmLoneRibose', async () => {
+    await _testConvert(Samples.helmLoneRibose, converter(NOTATION.FASTA), Samples.fastaRna);
+  });
+  test('testHelmLoneDeoxyribose', async () => {
+    await _testConvert(Samples.helmLoneDeoxyribose, converter(NOTATION.SEPARATOR, '/'), Samples.separatorDna);
+  });
+  test('testHelmLonePhosphorus', async () => {
+    await _testConvert(Samples.helmLonePhosphorus, converter(NOTATION.FASTA), Samples.fastaRna);
+  });
 });

package/src/tests/detectors-test.ts CHANGED Viewed

@@ -115,6 +115,8 @@ MWRSWY-CKHP
     testSmiles2Csv = 'testSmiles2Csv',
     testCerealCsv = 'testCerealCsv',
     testActivityCliffsCsv = 'testActivityCliffsCsv',
+    testSpgi100 = 'testSpgi100',
+    testUnichemSources = 'testUnichemSources',
   }
   const samples: { [key: string]: string } = {
@@ -132,6 +134,8 @@ MWRSWY-CKHP
     'testSmiles2Csv': 'System:AppData/Bio/tests/testSmiles2.csv',
     'testActivityCliffsCsv': 'System:AppData/Bio/tests/testActivityCliffs.csv', // smiles
     'testCerealCsv': 'System:AppData/Bio/tests/testCereal.csv',
+    'testSpgi100': 'System:AppData/Bio/tests/testSpgi100.csv',
+    'testUnichemSources': 'System:AppData/Bio/tests/testUnichemSources.csv',
   };
   const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
@@ -141,7 +145,7 @@ MWRSWY-CKHP
       if (!(key in _samplesDfs)) {
         _samplesDfs[key] = (async (): Promise<DG.DataFrame> => {
           const df: DG.DataFrame = await readFile(samples[key]);
-          await grok.data.detectSemanticTypes(df);
+          // await grok.data.detectSemanticTypes(df);
           return df;
         })();
       }
@@ -318,19 +322,48 @@ MWRSWY-CKHP
   test('samplesTestCerealNegativeCerealName', async () => {
     await _testNeg(readSamples(Samples.testCerealCsv), 'cereal_name');
   });
+  test('samplesTestSpgi100NegativeStereoCategory', async () => {
+    await _testNeg(readSamples(Samples.testSpgi100), 'Stereo Category');
+  });
+  test('samplesTestSpgi100NegativeScaffoldNames', async () => {
+    await _testNeg(readSamples(Samples.testSpgi100), 'Scaffold Names');
+  });
+  test('samplesTestSpgi100NegativePrimaryScaffoldName', async () => {
+    await _testNeg(readSamples(Samples.testSpgi100), 'Primary Scaffold Name');
+  });
+  test('samplesTestUnichemSourcesNegativeSrcUrl', async () => {
+    await _testNeg(readSamples(Samples.testUnichemSources), 'src_url');
+  });
+  test('samplesTestUnichemSourcesNegativeBaseIdUrl', async () => {
+    await _testNeg(readSamples(Samples.testUnichemSources), 'base_id_url');
+  });
 });
 export async function _testNeg(readDf: DfReaderFunc, colName: string) {
   const df: DG.DataFrame = await readDf();
   const col: DG.Column = df.col(colName)!;
-  expect(col.semType === DG.SEMTYPE.MACROMOLECULE, false);
+  const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
+  if (semType)
+    col.semType = semType;
+  if (col.semType === DG.SEMTYPE.MACROMOLECULE) {
+    const msg = `Negative test detected semType='${col.semType}', units='${col.getTag(DG.TAGS.UNITS)}'.`;
+    throw new Error(msg);
+    // col.semType = '';
+    // col.setTag(DG.TAGS.UNITS, '');
+    // col.setTag('separator', '');
+  }
 }
 export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string | null = null) {
   const df: DG.DataFrame = await readDf();
   const col: DG.Column = df.col(colName)!;
+  const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
+  if (semType)
+    col.semType = semType;
   expect(col.semType === DG.SEMTYPE.MACROMOLECULE, true);
   expect(col.getTag(DG.TAGS.UNITS), units);
   if (separator)

package/src/tests/renderers-test.ts CHANGED Viewed

@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
 import {importFasta, multipleSequenceAlignmentAny} from '../package';
 import {readDataframe} from './utils';
 import {convertDo} from '../utils/convert';
-import {NOTATION} from '@datagrok-libraries/bio/src/utils/notation-converter';
+import {NOTATION} from '@datagrok-libraries/bio/src/utils/units-handler';
 category('renderers', () => {
   let tvList: DG.TableView[];

package/src/utils/cell-renderer.ts CHANGED Viewed

@@ -7,7 +7,7 @@ import {SplitterFunc, WebLogo} from '@datagrok-libraries/bio/src/viewers/web-log
 import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
 import * as ui from 'datagrok-api/ui';
-const lru = new DG.LruCache<any, any>();
+export const lru = new DG.LruCache<any, any>();
 const undefinedColor = 'rgb(100,100,100)';
 const grayColor = '#808080'
@@ -155,6 +155,11 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
         if (!formula) {
           gridCell.element = ui.divText(gridCell.cell.value, {style: {color: 'red'}});
         }
+        const molWeight = Math.round(canvas.getMolWeight() * 100) / 100;
+        const coef = Math.round(canvas.getExtinctionCoefficient(true) * 100) / 100;
+        const molfile = canvas.getMolfile();
+        const result = formula + ', ' + molWeight + ', ' + coef + ', ' + molfile;
+        lru.set(gridCell.cell.value, result);
         return;
       }
       if (monomers.size > 0) {
@@ -222,3 +227,111 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
     }
   }
 }
+export class AminoAcidsCellRenderer extends DG.GridCellRenderer {
+  get name(): string {return 'aminoAcidsCR';}
+  get cellType(): string {return C.SEM_TYPES.AMINO_ACIDS;}
+  get defaultHeight(): number {return 15;}
+  get defaultWidth(): number {return 30;}
+  /**
+     * Cell renderer function.
+     *
+     * @param {CanvasRenderingContext2D} g Canvas rendering context.
+     * @param {number} x x coordinate on the canvas.
+     * @param {number} y y coordinate on the canvas.
+     * @param {number} w width of the cell.
+     * @param {number} h height of the cell.
+     * @param {DG.GridCell} gridCell Grid cell.
+     * @param {DG.GridCellStyle} cellStyle Cell style.
+     */
+  render(
+    g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
+    cellStyle: DG.GridCellStyle): void {
+    y -= 2;
+    g.save();
+    g.beginPath();
+    g.rect(x, y, w, h);
+    g.clip();
+    g.font = `12px monospace`;
+    g.textBaseline = 'top';
+    const palette = getPalleteByType(gridCell.tableColumn!.tags[C.TAGS.ALPHABET]);
+    const s: string = gridCell.cell.value ? gridCell.cell.value : '-';
+    const color = palette.get(s);
+    printLeftOrCentered(x, y, w, h, g, s, color, 0, false);
+    g.restore();
+  }
+}
+export class AlignedSequenceDifferenceCellRenderer extends DG.GridCellRenderer {
+  get name(): string {return 'alignedSequenceDifferenceCR';}
+  get cellType(): string {return C.SEM_TYPES.ALIGNED_SEQUENCE_DIFFERENCE;}
+  get defaultHeight(): number {return 30;}
+  get defaultWidth(): number {return 230;}
+  /**
+   * Cell renderer function.
+   *
+   * @param {CanvasRenderingContext2D} g Canvas rendering context.
+   * @param {number} x x coordinate on the canvas.
+   * @param {number} y y coordinate on the canvas.
+   * @param {number} w width of the cell.
+   * @param {number} h height of the cell.
+   * @param {DG.GridCell} gridCell Grid cell.
+   * @param {DG.GridCellStyle} cellStyle Cell style.
+   * @memberof AlignedSequenceDifferenceCellRenderer
+   */
+  render(
+    g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
+    cellStyle: DG.GridCellStyle): void {
+    const grid = gridCell.grid;
+    const cell = gridCell.cell;
+    w = grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
+    g.save();
+    g.beginPath();
+    g.rect(x, y, w, h);
+    g.clip();
+    g.font = '12px monospace';
+    g.textBaseline = 'top';
+    const s: string = cell.value ?? '';
+    //TODO: can this be replaced/merged with splitSequence?
+    const [s1, s2] = s.split('#');
+    const separator = gridCell.tableColumn!.tags[C.TAGS.SEPARATOR];
+    const subParts1 = s1.split(separator);
+    const subParts2 = s2.split(separator);
+    const [text] = processSequence(subParts1);
+    const textSize = g.measureText(text.join(''));
+    let updatedX = Math.max(x, x + (w - (textSize.width + subParts1.length * 4)) / 2);
+    // 28 is the height of the two substitutions on top of each other + space
+    const updatedY = Math.max(y, y + (h - 28) / 2);
+    const palette = getPalleteByType(gridCell.tableColumn!.tags[C.TAGS.ALPHABET]);
+    for (let i = 0; i < subParts1.length; i++) {
+      const amino1 = subParts1[i]
+      const amino2 = subParts2[i];
+      const color1 = palette.get(amino1);
+      const color2 = palette.get(amino2);
+      if (amino1 != amino2) {
+        const vShift = 7;
+        const subX0 = printLeftOrCentered(updatedX, updatedY - vShift, w, h, g, amino1, color1, 0, true);
+        const subX1 = printLeftOrCentered(updatedX, updatedY + vShift, w, h, g, amino2, color2, 0, true);
+        updatedX = Math.max(subX1, subX0);
+      } else
+        updatedX = printLeftOrCentered(updatedX, updatedY, w, h, g, amino1, color1, 0, true, 0.5);
+      updatedX += 4;
+    }
+    g.restore();
+  }
+}

package/src/utils/constants.ts CHANGED Viewed

@@ -17,8 +17,9 @@ export enum CATEGORIES {
 export enum TAGS {
   AAR = 'AAR',
   POSITION = 'Pos',
-  SEPARATOR = 'monomer-separator',
+  SEPARATOR = 'separator',
   SELECTION = 'selection',
+  ALPHABET = 'alphabet',
 }
 export enum SEM_TYPES {