npm - @datagrok/sequence-translator - Versions diffs - 1.0.17 → 1.1.0 - Mend

@datagrok/sequence-translator 1.0.17 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

package/.eslintrc.json +4 -3
package/CHANGELOG.md +3 -0
package/detectors.js +8 -0
package/dist/package-test.js +2 -73079
package/dist/package-test.js.map +1 -0
package/dist/package.js +2 -72284
package/dist/package.js.map +1 -0
package/files/axolabs-style.json +97 -0
package/files/codes-to-symbols.json +66 -0
package/files/formats-to-helm.json +59 -0
package/files/linkers.json +22 -0
package/files/monomer-lib.json +1094 -0
package/link-bio +7 -0
package/package.json +30 -28
package/scripts/build-monomer-lib.py +391 -122
package/src/demo/demo-st-ui.ts +71 -0
package/src/demo/handle-error.ts +12 -0
package/src/model/axolabs/axolabs-tab.ts +111 -0
package/src/model/axolabs/const.ts +33 -0
package/src/{axolabs-tab → model/axolabs}/draw-svg.ts +1 -1
package/src/{axolabs-tab → model/axolabs}/helpers.ts +7 -5
package/src/model/const.ts +19 -0
package/src/model/data-loading-utils/const.ts +8 -0
package/src/model/data-loading-utils/json-loader.ts +38 -0
package/src/model/data-loading-utils/types.ts +30 -0
package/src/model/format-translation/const.ts +8 -0
package/src/model/format-translation/conversion-utils.ts +48 -0
package/src/model/format-translation/format-converter.ts +107 -0
package/src/model/helpers.ts +12 -0
package/src/model/monomer-lib/const.ts +3 -0
package/src/model/monomer-lib/lib-wrapper.ts +106 -0
package/src/model/parsing-validation/format-detector.ts +57 -0
package/src/model/parsing-validation/sequence-validator.ts +52 -0
package/src/model/sequence-to-structure-utils/const.ts +1 -0
package/src/{utils/structures-works → model/sequence-to-structure-utils}/mol-transformations.ts +33 -41
package/src/model/sequence-to-structure-utils/monomer-code-parser.ts +92 -0
package/src/model/sequence-to-structure-utils/sdf-tab.ts +94 -0
package/src/model/sequence-to-structure-utils/sequence-to-molfile.ts +409 -0
package/src/package.ts +104 -92
package/src/tests/const.ts +17 -0
package/src/tests/smiles-tests.ts +32 -457
package/src/view/const/main-tab.ts +3 -0
package/src/view/const/view.ts +10 -0
package/src/view/css/axolabs-tab.css +1 -0
package/src/view/css/colored-text-input.css +27 -0
package/src/view/css/main-tab.css +46 -0
package/src/view/css/sdf-tab.css +39 -0
package/src/view/monomer-lib-viewer/viewer.ts +22 -0
package/src/view/tabs/axolabs.ts +720 -0
package/src/view/tabs/main.ts +174 -0
package/src/view/tabs/sdf.ts +173 -0
package/src/view/utils/app-info-dialog.ts +18 -0
package/src/view/utils/colored-input/colored-text-input.ts +56 -0
package/src/view/utils/colored-input/input-painters.ts +44 -0
package/src/view/utils/draw-molecule.ts +86 -0
package/src/view/utils/molecule-img.ts +106 -0
package/src/view/view.ts +129 -0
package/tsconfig.json +12 -18
package/webpack.config.js +17 -4
package/README.md +0 -84
package/css/style.css +0 -18
package/img/Sequence Translator Axolabs.png +0 -0
package/jest.config.js +0 -33
package/setup-unlink-clean.cmd +0 -14
package/setup-unlink-clean.sh +0 -21
package/setup.cmd +0 -14
package/setup.sh +0 -37
package/src/__jest__/remote.test.ts +0 -77
package/src/__jest__/test-node.ts +0 -97
package/src/apps/oligo-sd-file-app.ts +0 -58
package/src/autostart/calculations.ts +0 -40
package/src/autostart/constants.ts +0 -37
package/src/autostart/registration.ts +0 -306
package/src/axolabs-tab/axolabs-tab.ts +0 -873
package/src/axolabs-tab/define-pattern.ts +0 -874
package/src/hardcode-to-be-eliminated/ICDs.ts +0 -3
package/src/hardcode-to-be-eliminated/IDPs.ts +0 -3
package/src/hardcode-to-be-eliminated/const.ts +0 -5
package/src/hardcode-to-be-eliminated/constants.ts +0 -101
package/src/hardcode-to-be-eliminated/converters.ts +0 -323
package/src/hardcode-to-be-eliminated/map.ts +0 -720
package/src/hardcode-to-be-eliminated/salts.ts +0 -2
package/src/hardcode-to-be-eliminated/sources.ts +0 -3
package/src/hardcode-to-be-eliminated/users.ts +0 -3
package/src/main-tab/main-tab.ts +0 -210
package/src/sdf-tab/sdf-tab.ts +0 -163
package/src/sdf-tab/sequence-codes-tools.ts +0 -347
package/src/utils/const.ts +0 -0
package/src/utils/helpers.ts +0 -28
package/src/utils/parse.ts +0 -27
package/src/utils/sdf-add-columns.ts +0 -118
package/src/utils/sdf-save-table.ts +0 -56
package/src/utils/structures-works/draw-molecule.ts +0 -84
package/src/utils/structures-works/from-monomers.ts +0 -266
package/test-SequenceTranslator-6288c2fbe346-695b7b55.html +0 -259
package/vendors/openchemlib-full.js +0 -293

package/link-bio ADDED Viewed

@@ -0,0 +1,7 @@
+#!/bin/bash
+cd ../../libraries/bio
+./cleanup && npm i && npm run build && npm link
+cd -
+npm link @datagrok-libraries/bio

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@datagrok/sequence-translator",
   "friendlyName": "Sequence Translator",
-  "version": "1.0.17",
+  "version": "1.1.0",
   "author": {
     "name": "Alexey Choposky",
     "email": "achopovsky@datagrok.ai"
@@ -13,12 +13,13 @@
     "directory": "packages/SequenceTranslator"
   },
   "dependencies": {
+    "@datagrok-libraries/chem-meta": "^1.0.9",
     "@datagrok-libraries/utils": "^1.17.2",
+    "@datagrok-libraries/tutorials": "^1.3.2",
+    "cash-dom": "^8.1.0",
+    "datagrok-api": "^1.10.2",
     "@types/react": "^18.0.15",
-    "@datagrok-libraries/bio": "^5.11.1",
-    "@deck.gl/core": "8.8.12",
-    "@luma.gl/core": "8.5.17",
-    "datagrok-api": "^1.8.2",
+    "@datagrok-libraries/bio": "^5.32.1",
     "datagrok-tools": "^4.1.2",
     "npm": "^8.11.0",
     "openchemlib": "6.0.1",
@@ -27,26 +28,23 @@
     "typescript": "^4.7.4"
   },
   "devDependencies": {
-    "@types/jest": "^27.0.0",
     "@types/jquery": "^3.5.14",
-    "@typescript-eslint/eslint-plugin": "^4.29.1",
-    "@typescript-eslint/parser": "^4.29.1",
-    "cash-dom": "^8.1.0",
-    "eslint": "^7.32.0",
-    "eslint-config-google": "^0.14.0",
-    "jest": "^27.0.0",
-    "jest-html-reporter": "^3.5.0",
-    "puppeteer": "^13.7.0",
-    "ts-jest": "^27.0.0",
-    "webpack": "^5.31.0",
-    "webpack-cli": "^4.6.0",
     "@types/js-yaml": "^4.0.5",
-    "js-yaml": "^4.1.0",
     "@types/node-fetch": "^2.6.2",
-    "node-fetch": "^2.6.7"
-  },
-  "grokDependencies": {
-    "@datagrok/chem": "1.3.32"
+    "@types/react": "^18.0.15",
+    "@typescript-eslint/eslint-plugin": "latest",
+    "@typescript-eslint/parser": "parser",
+    "css-loader": "^6.7.3",
+    "datagrok-tools": "^4.7.10",
+    "eslint": "^7.32.0",
+    "eslint-config-google": "latest",
+    "style-loader": "^3.3.1",
+    "ts-loader": "^9.3.1",
+    "typescript": "^4.7.4",
+    "webpack": "^5.75.0",
+    "webpack-cli": "latest",
+    "@datagrok/chem": "1.5.7",
+    "@datagrok/bio": "^2.1.12"
   },
   "scripts": {
     "link-api": "npm link datagrok-api",
@@ -59,13 +57,17 @@
     "release-sequencetranslator-public": "grok publish public --release",
     "debug-sequencetranslator-local": "grok publish local",
     "release-sequencetranslator-local": "grok publish local --release",
-    "test": "jest",
-    "test-dev": "set HOST=dev && jest",
-    "test-local": "set HOST=localhost && jest"
+    "lint": "eslint \"./src/**/*.ts\"",
+    "lint-fix": "eslint \"./src/**/*.ts\" --fix",
+    "test": "grok test",
+    "test-dev": "grok test --host dev",
+    "test-local": "grok test --host localhost"
   },
-  "sources": [
-    "css/style.css",
-    "vendors/openchemlib-full.js"
+  "canEdit": [
+    "Developers"
+  ],
+  "canView": [
+    "All users"
   ],
   "category": "Bioinformatics"
 }

package/scripts/build-monomer-lib.py CHANGED Viewed

@@ -1,178 +1,447 @@
+# pylint: disable=no-member
+import os.path
+import sys
 from io import TextIOWrapper
+from typing import Optional
 from rdkit import Chem
+from rdkit.Chem.rdchem import Mol
 import orjson
 import click
-from click_default_group import DefaultGroup
-from rdkit.Chem.rdchem import Mol
-def molAddCollection(mol: Mol, name: str, title: str = None) -> str:
+BEGIN_ATOM_LINE = 'M  V30 BEGIN ATOM'
+END_ATOM_LINE = 'M  V30 END ATOM'
+BEGIN_BOND_LINE = 'M  V30 BEGIN BOND'
+END_BOND_LINE = 'M  V30 END BOND'
+BEGIN_COLLECTION_LINE = 'M  V30 BEGIN COLLECTION'
+END_COLLECTION_LINE = 'M  V30 END COLLECTION'
+COLLECTION_STEABS_LINE = 'M  V30 MDLV30/STEABS'
+IDX_OF_FIRST_VALUE = 7
+NUM_OF_BOND_POSITIONAL_ARGS = 4
+CFG = "CFG="
+def mol_add_collection(mol: Mol,
+                       name: str,
+                       title: Optional[str] = None,
+                       src_mol: Optional[str] = None) -> str:
     """
     Get and postprocess (atom's CFG, title, e.t.c.) molblock
-    :param mol:    Mol molecule structure / object
-    :param name:   Monomer name to add to molblock title
-    :param title:  title to replace in Chem.MolToMolBlock() string output
-    :return:       molblock string
+    :param mol:      Mol molecule structure / object
+    :param name:     Monomer name to add to molblock title string
+    :param title:    Title to replace in Chem.MolToMolBlock() string output
+    :param src_mol:  Source molblock data, to restore optional CFG
+    :return:         molblock string
     """
     res: str = Chem.MolToMolBlock(mol, forceV3000=True)  # MolToMolFile
-    mb_line_list: list[str] = res.split('\n')
+    molblock_line_list: list[str] = res.split('\n')
     if title:
-        mb_line_list[1] = title
+        molblock_line_list[1] = title
-    if name and name not in mb_line_list[1]:
-        mb_line_list[1] += '|' + name
+    if name and name not in molblock_line_list[1]:
+        molblock_line_list[1] += '|' + name
-    end_bond_idx: int = mb_line_list.index('M  V30 END BOND')
     chirality = [atom.GetChiralTag() for atom in mol.GetAtoms()]
-    begin_atom_idx = mb_line_list.index('M  V30 BEGIN ATOM')
-    end_atom_idx = mb_line_list.index('M  V30 END ATOM')
-    for atom_idx in range(1, end_atom_idx - begin_atom_idx):
-        line_idx = begin_atom_idx + atom_idx
-        atom_ch = chirality[atom_idx - 1]
-        if atom_ch != Chem.rdchem.CHI_UNSPECIFIED:
-            mb_line_list[line_idx] += " CFG={0}".format(int(atom_ch))
-    steabs: list[int] = [i + 1 for (i, ch) in enumerate(chirality) if ch != Chem.rdchem.CHI_UNSPECIFIED]
-    if len(steabs) > 0:
-        steabs_str: str = "M  V30 MDLV30/STEABS ATOMS=({count} {list})" \
-            .format(count=len(steabs), list=' '.join([str(idx) for idx in steabs]))
-        mb_line_list = mb_line_list[:(end_bond_idx + 1)] + \
-                       ["M  V30 BEGIN COLLECTION", steabs_str, "M  V30 END COLLECTION"] + \
-                       mb_line_list[(end_bond_idx + 1):]
-    return '\n'.join(mb_line_list)
+    # preserve chirality for bonds from src_mol
+    tgt_mol_file_map = MolFileMap.parse(res)
+    steabs = []
+    if src_mol:
+        src_mol_file_map = MolFileMap.parse(src_mol)
+        if len(tgt_mol_file_map.mol_file.atom_list) != len(src_mol_file_map.mol_file.atom_list):
+            raise ValueError(f"Atoms count of src and tgt differs for monomer '{name}'.")
+        # restore bond cfg values lost/transformed by rdkit
+        for (src_bond_idx0, (bond_key, src_bond)) in enumerate(src_mol_file_map.mol_file.bonds.items()):
+            if src_bond.cfg:
+                if bond_key not in tgt_mol_file_map.mol_file.bonds:
+                    raise KeyError(f"Bond key '{bond_key}' not found in tgt bonds.")
+                tgt_bond: MolFileBond = tgt_mol_file_map.mol_file.bonds[bond_key]
+                tgt_bond_cfg_str: str = ' '.join(tgt_bond.cfg)
+                src_bond_cfg_str: str = ' '.join(src_bond.cfg)
+                if tgt_bond_cfg_str != src_bond_cfg_str:
+                    molblock_line_list[tgt_mol_file_map.begin_bond_idx + tgt_bond.bond_idx] += f" {src_bond_cfg_str}"
+        # remove bond cfg values added by rdkit
+        for (tgt_bond_idx0, (bond_key, tgt_bond)) in enumerate(tgt_mol_file_map.mol_file.bonds.items()):
+            if tgt_bond.cfg:
+                if bond_key not in src_mol_file_map.mol_file.bonds:
+                    raise KeyError(f"Bond key '{bond_key}' not found in src bonds.")
+                src_bond: MolFileBond = src_mol_file_map.mol_file.bonds[bond_key]
+                src_bond_cfg_str: str = ' '.join(src_bond.cfg)
+                tgt_bond_cfg_str: str = ' '.join(tgt_bond.cfg)
+                if tgt_bond_cfg_str != src_bond_cfg_str:
+                    new_line = molblock_line_list[tgt_mol_file_map.begin_bond_idx + tgt_bond.bond_idx].replace(tgt_bond_cfg_str, "")
+                    molblock_line_list[tgt_mol_file_map.begin_bond_idx + tgt_bond.bond_idx] = new_line
+        for (tgt_atom_idx0, tgt_atom) in enumerate(tgt_mol_file_map.mol_file.atom_list):
+            src_atom = src_mol_file_map.mol_file.atom_list[tgt_atom_idx0]
+            atom_chirality = chirality[tgt_atom_idx0]
+            if src_atom.cfg:
+                molblock_line_list[tgt_mol_file_map.begin_atom_idx + tgt_atom_idx0 + 1] += " {0}".format(
+                    ' '.join(src_atom.cfg))
+                steabs.append(tgt_atom_idx0 + 1)
+            elif atom_chirality != Chem.rdchem.CHI_UNSPECIFIED:
+                molblock_line_list[tgt_mol_file_map.begin_atom_idx + tgt_atom_idx0 + 1] += " CFG={0}".format(int(atom_chirality))
+                steabs.append(tgt_atom_idx0 + 1)
+            elif src_atom.atom_idx in src_mol_file_map.mol_file.collection_steabs:
+                raise KeyError(f"Source STEABS atom '{src_atom}' not accounted")
+            elif tgt_atom.atom_idx in tgt_mol_file_map.mol_file.collection_steabs:
+                raise KeyError(f"Target STEABS atom '{tgt_atom}' not accounted")
-def molfile2molfile(src_mol: str, name: str) -> str:
-    mol: Mol = Chem.MolFromMolBlock(src_mol)
-    src_mf_lines = src_mol.split('\n')
-    title = src_mf_lines[1]
-    return molAddCollection(mol, name, title=title)
-def smiles2molfile(smiles: str, name: str) -> str:
-    mol: Mol = Chem.MolFromSmiles(smiles)
-    return molAddCollection(mol, name)
-CodesType = dict[str, dict[str, list[str]]]
+    if len(steabs) > 0:
+        steabs_str: str = COLLECTION_STEABS_LINE + " ATOMS=({count} {list})".format(
+            count=len(steabs),
+            list=' '.join([str(idx) for idx in steabs]))
+        if tgt_mol_file_map.collection_steabs_idx:
+            molblock_line_list[tgt_mol_file_map.collection_steabs_idx] = steabs_str
+        elif tgt_mol_file_map.begin_collection_idx is not None:
+            tgt_collection_steabs_idx = tgt_mol_file_map.begin_collection_idx + 1
+            molblock_line_list = molblock_line_list[:tgt_collection_steabs_idx] + \
+                           [steabs_str] + \
+                           molblock_line_list[tgt_collection_steabs_idx:]
+        else:
+            tgt_collection_idx = tgt_mol_file_map.end_bond_idx + 1
+            molblock_line_list = molblock_line_list[:tgt_collection_idx] + \
+                           [BEGIN_COLLECTION_LINE, steabs_str, END_COLLECTION_LINE] + \
+                           molblock_line_list[tgt_collection_idx:]
+    return '\n'.join(molblock_line_list)
+def prepare_molblock(src_molblock: str, name: str) -> str:
+    """Loads mol from src_mol str. Fixed title, adds chirality to atoms and preserves chirality for bonds."""
+    # Using sanitize=False leads to unwanted moving stereo (invalid?) CFGs to other bonds
+    mol: Mol = Chem.MolFromMolBlock(src_molblock, removeHs=False)
+    src_molblock_lines = src_molblock.split('\n')
+    title = src_molblock_lines[1]
+    return mol_add_collection(mol, name, title=title, src_mol=src_molblock)
 class Monomer:
-    def __init__(self,
-                 symbol: str, name: str, molfile: str, smiles: str,
-                 codes: CodesType):
+    def __init__(self, symbol: str, name: str, molfile: str, smiles: str,
+                 meta: dict):
         self.monomerType = 'Backbone'
         self.smiles = smiles
         self.name = name
         self.author = 'SequenceTranslator'
-        self.molfile = molfile2molfile(molfile, name) if molfile else smiles2molfile(smiles, name)
-        self.naturalAnalog = ''
-        self.rgroups = [
-            {
-                "capGroupSmiles": "O[*:1]",
-                "alternateId": "R1-OH",
-                "capGroupName": "OH",
-                "label": "R1"
-            },
-            {
-                "capGroupSmiles": "O[*:2]",
-                "alternateId": "R2-OH",
-                "capGroupName": "OH",
-                "label": "R2"
-            }]
+        self.molfile = prepare_molblock(molfile, name)
+        self.rgroups = [{
+            "capGroupSmiles": "O[*:1]",
+            "alternateId": "R1-OH",
+            "capGroupName": "OH",
+            "label": "R1"
+        }, {
+            "capGroupSmiles": "O[*:2]",
+            "alternateId": "R2-OH",
+            "capGroupName": "OH",
+            "label": "R2"
+        }]
         self.createDate = None
         self.id = 0
         self.polymerType = 'RNA'
         self.symbol = symbol
-        self.codes: CodesType = codes
+        self.meta = meta
     @staticmethod
     def from_json(src_json: {}):
         obj = Monomer(src_json['symbol'], src_json['name'],
                       src_json['molfile'], src_json['smiles'],
-                      src_json['codes'])
+                      src_json['meta'])
         return obj
     def to_json(self):
         return {
-            'monomerType': self.monomerType,
-            'smiles': self.smiles,
+            'symbol': self.symbol,
             'name': self.name,
-            'author': self.author,
             'molfile': self.molfile,
-            'naturalAnalog': self.naturalAnalog,
-            'rgroups': self.rgroups,
-            'createDate': self.createDate,
+            'author': self.author,
             'id': self.id,
+            'rgroups': self.rgroups,
+            'smiles': self.smiles,
             'polymerType': self.polymerType,
-            'symbol': self.symbol,
-            'codes': self.codes,
+            'monomerType': self.monomerType,
+            'createDate': self.createDate,
+            'meta': self.meta,
         }
-def codes2monomers(codes_json: {}) -> dict[str, Monomer]:
-    monomers_res: dict[str, Monomer] = {}
-    for (codes_src, src_dict) in codes_json.items():
-        for (codes_type, monomers_dict) in src_dict.items():
-            for (codes_code, monomer_json) in monomers_dict.items():
-                monomer_name = monomer_json['name']
-                if monomer_name not in monomers_res:
-                    symbol = monomer_json['name']
-                    name = monomer_json['name']
-                    smiles = monomer_json['SMILES']
-                    monomers_res[monomer_name] = Monomer(symbol, name, None, smiles, {})
-                codes = monomers_res[monomer_name].codes
-                if codes_src not in codes:
-                    codes[codes_src] = {}
-                if codes_type not in codes[codes_src]:
-                    codes[codes_src][codes_type] = [];
-                codes[codes_src][codes_type].append(codes_code)
-    return monomers_res
-@click.group(cls=DefaultGroup, default='main')
-def cli():
-    pass
-@cli.command()
-@click.pass_context
-@click.option('--initial', 'initial_f',
-              help='Initial monomers source file.',
-              type=click.File('r', 'utf-8'))
-@click.option('--lib', 'lib_f',
+class MolFileAtom:
+    """
+    Wrapper for data extracted from molfile atom line
+    """
+    def __init__(self, v3k_atom_line: str):
+        self._atom_line = v3k_atom_line
+        self._atom_line_splitted: [] = self.\
+            _atom_line[IDX_OF_FIRST_VALUE:].split(' ')
+        self._atom_idx = int(self._atom_line_splitted[0].strip())
+        # we cannot use positional argument for cfg for it is a kwarg
+        cfg_item = list(filter(
+            lambda x: x.startswith(CFG), self._atom_line_splitted
+            ))
+        self._cfg = cfg_item
+    @property
+    def atom_line_str(self) -> str:
+        return self._atom_line
+    @property
+    def atom_idx(self):
+        return self._atom_idx
+    @property
+    def atom_line_splitted(self) -> list[str]:
+        return self.atom_line_splitted
+    @property
+    def cfg(self) -> list[str]:
+        return self._cfg
+    @property
+    def cfg_int(self) -> int:
+        return self._cfg
+    def __str__(self):
+        return self._atom_line
+    def __repr__(self):
+        return str(self)
+class MolFileBond:
+    """
+    Wrapper for data extracted from molfile bond line
+    """
+    def __init__(self, v3k_bond_line: str):
+        self._bond_line = v3k_bond_line
+        self._bond_line_splitted: [] = self.\
+            _bond_line[IDX_OF_FIRST_VALUE:].split(' ')
+        self._bond_idx = int(self._bond_line_splitted[0].strip())
+        self._key = self._bond_line_splitted[0:NUM_OF_BOND_POSITIONAL_ARGS]
+        cfg_item = list(filter(
+            lambda x: x.startswith(CFG), self._bond_line_splitted
+            ))
+        self._cfg = cfg_item
+    @property
+    def bond_line(self) -> str:
+        return self._bond_line
+    @property
+    def bond_idx(self):
+        return self._bond_idx
+    @property
+    def bond_line_splitted(self) -> list[str]:
+        return self._bond_line_splitted
+    @property
+    def key(self):
+        return self._key
+    @property
+    def cfg(self) -> list[str]:
+        return self._cfg
+    def __str__(self):
+        return self._bond_line
+    def __repr__(self):
+        return str(self)
+class MolFileV3K:
+    """
+    Wrapper for data extracted from molfile
+    """
+    def __init__(
+            self, title: str, atom_list: list[MolFileAtom],
+            bond_list: list[MolFileBond],
+            collection_steabs: list[int] = None
+            ):
+        self._title = title
+        self._atom_list = atom_list
+        self._bond_list = bond_list
+        self.collection_steabs = [] if collection_steabs is None \
+            else collection_steabs
+        self._bonds: dict = {}
+        for bond in self._bond_list:
+            # list is unhashable type, but tuple is
+            bond_key = tuple((int(v) for v in bond.key))
+            self._bonds[bond_key] = bond
+    @property
+    def atom_list(self):
+        return self._atom_list
+    @property
+    def bond_list(self):
+        return self._bond_list
+    @property
+    def bonds(self) -> dict:
+        return self._bonds
+class MolFileMap:
+    def __init__(self, src: str, mol_file_obj: MolFileV3K,
+                 atom_block_idx_boundaries: tuple[int, int],
+                 bond_block_idx_boundaries: tuple[int, int],
+                 collection_idx_boundaries: tuple[int, int] = None,
+                 collection_steabs_idx: int = None):
+        self._src = src
+        self._mol_file = mol_file_obj
+        self.begin_atom_idx = atom_block_idx_boundaries[0]
+        self.end_atom_idx = atom_block_idx_boundaries[1]
+        self.begin_bond_idx = bond_block_idx_boundaries[0]
+        self.end_bond_idx = bond_block_idx_boundaries[1]
+        self.begin_collection_idx = None if collection_idx_boundaries is None \
+            else collection_idx_boundaries[0]
+        self.end_collection_idx = None if collection_idx_boundaries is None \
+            else collection_idx_boundaries[1]
+        self.collection_steabs_idx = collection_steabs_idx
+    @property
+    def src(self):
+        return self._src
+    @property
+    def mol_file(self):
+        return self._mol_file
+    @staticmethod
+    def parse(molblock_src: str):
+        molblock_line_list: list[str] = \
+            [line.rstrip() for line in molblock_src.split('\n')]
+        title: str = molblock_line_list[1]
+        def get_idx_boundaries(begin_str: str, end_str: str):
+            return tuple([
+                molblock_line_list.index(begin_str),
+                molblock_line_list.index(end_str)
+                ])
+        def get_wrapper_list(
+                begin_idx: int, end_idx: int, wrapper_constructor
+                ):
+            """
+            For the list of atom/bond wrapper objects
+            """
+            item_count = end_idx - begin_idx - 1  # for atoms or bonds
+            wrapper_list = [None] * item_count
+            for item_idx in range(1, item_count + 1):
+                line_idx = begin_idx + item_idx
+                line = molblock_line_list[line_idx]
+                item = wrapper_constructor(line)
+                wrapper_list[item_idx - 1] = item
+            return wrapper_list
+        atom_block_idx_boundaries = get_idx_boundaries(
+                BEGIN_ATOM_LINE, END_ATOM_LINE)
+        bond_block_idx_boundaries = get_idx_boundaries(
+                BEGIN_BOND_LINE, END_BOND_LINE)
+        atom_list = get_wrapper_list(
+                atom_block_idx_boundaries[0],
+                atom_block_idx_boundaries[1], MolFileAtom)
+        bond_list = get_wrapper_list(
+                bond_block_idx_boundaries[0],
+                bond_block_idx_boundaries[1], MolFileBond)
+        collection_idx_boundaries = None
+        collection_steabs_idx = None
+        collection_steabs: list[int] = []
+        if BEGIN_COLLECTION_LINE in molblock_line_list and END_COLLECTION_LINE in molblock_line_list:
+            collection_idx_boundaries = get_idx_boundaries(
+                    BEGIN_COLLECTION_LINE, END_COLLECTION_LINE)
+            collection_count: int = collection_idx_boundaries[1] - \
+                collection_idx_boundaries[0] - 1
+            for collection_idx in range(1, collection_count + 1):
+                line_idx = collection_idx_boundaries[0] + collection_idx
+                collection_line = molblock_line_list[line_idx]
+                if collection_line.startswith(COLLECTION_STEABS_LINE):
+                    steabs_str = collection_line[len(COLLECTION_STEABS_LINE + " ATOMS=("):-1]
+                    collection_steabs = [int(atom_num_str.strip()) for atom_num_str in steabs_str.split(' ')[1:]]
+                    collection_steabs_idx = line_idx
+                else:
+                    raise ValueError(f"Unexpected collection line '{collection_line}'.")
+        mol_file = MolFileV3K(title, atom_list, bond_list, collection_steabs)
+        return MolFileMap(
+                molblock_src, mol_file,
+                atom_block_idx_boundaries, bond_block_idx_boundaries,
+                collection_idx_boundaries, collection_steabs_idx)
+def compile_object_for_monomer(monomer_name: str):
+    """
+    Compile HELM library object for the given monomers from files
+    """
+    default = monomer_name + '/default.json'
+    meta = monomer_name + '/meta.json'
+    molfile = monomer_name + '/molfile.mol'
+    for file in [default, meta, molfile]:
+        if not os.path.isfile(file):
+            raise FileNotFoundError(file)
+    monomer_json = {}
+    default_json = {}
+    meta_json = {}
+    with open(default, 'r') as default_json_file:
+        default_json_str = default_json_file.read()
+        default_json = orjson.loads(default_json_str)
+    with open(meta, 'r') as meta_json_file:
+        meta_json_str = meta_json_file.read()
+        meta_json = orjson.loads(meta_json_str)
+    monomer_json = {**default_json, 'meta': meta_json}
+    with open(molfile, 'r') as monomer_mol_f:
+        monomer_mol_lines = [line.rstrip() for line in monomer_mol_f.readlines()]
+        monomer_mol_txt = '\n'.join(monomer_mol_lines)
+        monomer_json['molfile'] = monomer_mol_txt
+    # print(monomer_json)
+    return monomer_json
+@click.command()
+@click.option('--lib',
+              'output_library',
               help='Output library (HELM format) file.',
               type=click.File('wb', 'utf-8'))
-@click.option('--add', 'add_f_list', multiple=True,
-              help='Additional libraries to build.',
+@click.option('--add-list',
+              'monomer_list_file',
+              multiple=False,
+              help='File with list of monomer names',
               type=click.File('r', 'utf-8'))
-def main(ctx, initial_f: TextIOWrapper, lib_f: TextIOWrapper, add_f_list: list[TextIOWrapper]):
-    initial_json_str = initial_f.read()
-    initial_json = orjson.loads(initial_json_str)
+def main(output_library: TextIOWrapper,
+         monomer_list_file: TextIOWrapper):
+    name_to_monomer_dict: dict[str, Monomer] = {}
+    monomer_name_list = []
+    for monomer_name in [m for m in monomer_list_file.read().split('\n') if m]:
+        monomer_name_list.append(monomer_name)
-    monomers: dict[str, Monomer] = codes2monomers(initial_json)
+    print(monomer_name_list)
-    for add_f in add_f_list:
-        add_json_str = add_f.read()
-        add_json = orjson.loads(add_json_str)
-        for add_m in add_json:
-            m = Monomer.from_json(add_m)
-            monomers[m.name] = m
+    for monomer_name in monomer_name_list:
+        # trying to load mol data if file with .mol extension exists
+        monomer_obj = compile_object_for_monomer(monomer_name)
+        try:
+            monomer_obj = Monomer.from_json(monomer_obj)
+            name_to_monomer_dict[monomer_obj.name] = monomer_obj
+        except Exception as ex:
+            sys.stderr.write(f"Invalid monomer '{monomer_obj['name']}' error:\n{str(ex)}")
-    add_json = [m.to_json() for m in monomers.values()]
+    resulting_json = [obj.to_json() for obj in name_to_monomer_dict.values()]
+    resulting_json = sorted(resulting_json, key=lambda x: x['name'])
-    lib_json_txt = orjson.dumps(add_json, option=orjson.OPT_INDENT_2)
-    lib_f.write(lib_json_txt)
-    k = 11
+    lib_json_txt = orjson.dumps(resulting_json, option=orjson.OPT_INDENT_2)
+    output_library.write(lib_json_txt)
 if __name__ == '__main__':
-    cli()
+    main()