@synsci/cli-darwin-x64 1.1.76 → 1.1.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/adaptyv/SKILL.md +114 -0
- package/bin/skills/adaptyv/reference/api_reference.md +308 -0
- package/bin/skills/adaptyv/reference/examples.md +913 -0
- package/bin/skills/adaptyv/reference/experiments.md +360 -0
- package/bin/skills/adaptyv/reference/protein_optimization.md +637 -0
- package/bin/skills/aeon/SKILL.md +374 -0
- package/bin/skills/aeon/references/anomaly_detection.md +154 -0
- package/bin/skills/aeon/references/classification.md +144 -0
- package/bin/skills/aeon/references/clustering.md +123 -0
- package/bin/skills/aeon/references/datasets_benchmarking.md +387 -0
- package/bin/skills/aeon/references/distances.md +256 -0
- package/bin/skills/aeon/references/forecasting.md +140 -0
- package/bin/skills/aeon/references/networks.md +289 -0
- package/bin/skills/aeon/references/regression.md +118 -0
- package/bin/skills/aeon/references/segmentation.md +163 -0
- package/bin/skills/aeon/references/similarity_search.md +187 -0
- package/bin/skills/aeon/references/transformations.md +246 -0
- package/bin/skills/alphafold-database/SKILL.md +513 -0
- package/bin/skills/alphafold-database/references/api_reference.md +423 -0
- package/bin/skills/anndata/SKILL.md +400 -0
- package/bin/skills/anndata/references/best_practices.md +525 -0
- package/bin/skills/anndata/references/concatenation.md +396 -0
- package/bin/skills/anndata/references/data_structure.md +314 -0
- package/bin/skills/anndata/references/io_operations.md +404 -0
- package/bin/skills/anndata/references/manipulation.md +516 -0
- package/bin/skills/arboreto/SKILL.md +243 -0
- package/bin/skills/arboreto/references/algorithms.md +138 -0
- package/bin/skills/arboreto/references/basic_inference.md +151 -0
- package/bin/skills/arboreto/references/distributed_computing.md +242 -0
- package/bin/skills/arboreto/scripts/basic_grn_inference.py +97 -0
- package/bin/skills/astropy/SKILL.md +331 -0
- package/bin/skills/astropy/references/coordinates.md +273 -0
- package/bin/skills/astropy/references/cosmology.md +307 -0
- package/bin/skills/astropy/references/fits.md +396 -0
- package/bin/skills/astropy/references/tables.md +489 -0
- package/bin/skills/astropy/references/time.md +404 -0
- package/bin/skills/astropy/references/units.md +178 -0
- package/bin/skills/astropy/references/wcs_and_other_modules.md +373 -0
- package/bin/skills/benchling-integration/SKILL.md +480 -0
- package/bin/skills/benchling-integration/references/api_endpoints.md +883 -0
- package/bin/skills/benchling-integration/references/authentication.md +379 -0
- package/bin/skills/benchling-integration/references/sdk_reference.md +774 -0
- package/bin/skills/biopython/SKILL.md +443 -0
- package/bin/skills/biopython/references/advanced.md +577 -0
- package/bin/skills/biopython/references/alignment.md +362 -0
- package/bin/skills/biopython/references/blast.md +455 -0
- package/bin/skills/biopython/references/databases.md +484 -0
- package/bin/skills/biopython/references/phylogenetics.md +566 -0
- package/bin/skills/biopython/references/sequence_io.md +285 -0
- package/bin/skills/biopython/references/structure.md +564 -0
- package/bin/skills/biorxiv-database/SKILL.md +483 -0
- package/bin/skills/biorxiv-database/references/api_reference.md +280 -0
- package/bin/skills/biorxiv-database/scripts/biorxiv_search.py +445 -0
- package/bin/skills/bioservices/SKILL.md +361 -0
- package/bin/skills/bioservices/references/identifier_mapping.md +685 -0
- package/bin/skills/bioservices/references/services_reference.md +636 -0
- package/bin/skills/bioservices/references/workflow_patterns.md +811 -0
- package/bin/skills/bioservices/scripts/batch_id_converter.py +347 -0
- package/bin/skills/bioservices/scripts/compound_cross_reference.py +378 -0
- package/bin/skills/bioservices/scripts/pathway_analysis.py +309 -0
- package/bin/skills/bioservices/scripts/protein_analysis_workflow.py +408 -0
- package/bin/skills/brenda-database/SKILL.md +719 -0
- package/bin/skills/brenda-database/references/api_reference.md +537 -0
- package/bin/skills/brenda-database/scripts/brenda_queries.py +844 -0
- package/bin/skills/brenda-database/scripts/brenda_visualization.py +772 -0
- package/bin/skills/brenda-database/scripts/enzyme_pathway_builder.py +1053 -0
- package/bin/skills/cellxgene-census/SKILL.md +511 -0
- package/bin/skills/cellxgene-census/references/census_schema.md +182 -0
- package/bin/skills/cellxgene-census/references/common_patterns.md +351 -0
- package/bin/skills/chembl-database/SKILL.md +389 -0
- package/bin/skills/chembl-database/references/api_reference.md +272 -0
- package/bin/skills/chembl-database/scripts/example_queries.py +278 -0
- package/bin/skills/cirq/SKILL.md +346 -0
- package/bin/skills/cirq/references/building.md +307 -0
- package/bin/skills/cirq/references/experiments.md +572 -0
- package/bin/skills/cirq/references/hardware.md +515 -0
- package/bin/skills/cirq/references/noise.md +515 -0
- package/bin/skills/cirq/references/simulation.md +350 -0
- package/bin/skills/cirq/references/transformation.md +416 -0
- package/bin/skills/clinicaltrials-database/SKILL.md +507 -0
- package/bin/skills/clinicaltrials-database/references/api_reference.md +358 -0
- package/bin/skills/clinicaltrials-database/scripts/query_clinicaltrials.py +215 -0
- package/bin/skills/clinpgx-database/SKILL.md +638 -0
- package/bin/skills/clinpgx-database/references/api_reference.md +757 -0
- package/bin/skills/clinpgx-database/scripts/query_clinpgx.py +518 -0
- package/bin/skills/clinvar-database/SKILL.md +362 -0
- package/bin/skills/clinvar-database/references/api_reference.md +227 -0
- package/bin/skills/clinvar-database/references/clinical_significance.md +218 -0
- package/bin/skills/clinvar-database/references/data_formats.md +358 -0
- package/bin/skills/cobrapy/SKILL.md +463 -0
- package/bin/skills/cobrapy/references/api_quick_reference.md +655 -0
- package/bin/skills/cobrapy/references/workflows.md +593 -0
- package/bin/skills/cosmic-database/SKILL.md +336 -0
- package/bin/skills/cosmic-database/references/cosmic_data_reference.md +220 -0
- package/bin/skills/cosmic-database/scripts/download_cosmic.py +231 -0
- package/bin/skills/dask/SKILL.md +456 -0
- package/bin/skills/dask/references/arrays.md +497 -0
- package/bin/skills/dask/references/bags.md +468 -0
- package/bin/skills/dask/references/best-practices.md +277 -0
- package/bin/skills/dask/references/dataframes.md +368 -0
- package/bin/skills/dask/references/futures.md +541 -0
- package/bin/skills/dask/references/schedulers.md +504 -0
- package/bin/skills/datacommons-client/SKILL.md +255 -0
- package/bin/skills/datacommons-client/references/getting_started.md +417 -0
- package/bin/skills/datacommons-client/references/node.md +250 -0
- package/bin/skills/datacommons-client/references/observation.md +185 -0
- package/bin/skills/datacommons-client/references/resolve.md +246 -0
- package/bin/skills/datamol/SKILL.md +706 -0
- package/bin/skills/datamol/references/conformers_module.md +131 -0
- package/bin/skills/datamol/references/core_api.md +130 -0
- package/bin/skills/datamol/references/descriptors_viz.md +195 -0
- package/bin/skills/datamol/references/fragments_scaffolds.md +174 -0
- package/bin/skills/datamol/references/io_module.md +109 -0
- package/bin/skills/datamol/references/reactions_data.md +218 -0
- package/bin/skills/deepchem/SKILL.md +597 -0
- package/bin/skills/deepchem/references/api_reference.md +303 -0
- package/bin/skills/deepchem/references/workflows.md +491 -0
- package/bin/skills/deepchem/scripts/graph_neural_network.py +338 -0
- package/bin/skills/deepchem/scripts/predict_solubility.py +224 -0
- package/bin/skills/deepchem/scripts/transfer_learning.py +375 -0
- package/bin/skills/deeptools/SKILL.md +531 -0
- package/bin/skills/deeptools/assets/quick_reference.md +58 -0
- package/bin/skills/deeptools/references/effective_genome_sizes.md +116 -0
- package/bin/skills/deeptools/references/normalization_methods.md +410 -0
- package/bin/skills/deeptools/references/tools_reference.md +533 -0
- package/bin/skills/deeptools/references/workflows.md +474 -0
- package/bin/skills/deeptools/scripts/validate_files.py +195 -0
- package/bin/skills/deeptools/scripts/workflow_generator.py +454 -0
- package/bin/skills/denario/SKILL.md +215 -0
- package/bin/skills/denario/references/examples.md +494 -0
- package/bin/skills/denario/references/installation.md +213 -0
- package/bin/skills/denario/references/llm_configuration.md +265 -0
- package/bin/skills/denario/references/research_pipeline.md +471 -0
- package/bin/skills/diffdock/SKILL.md +483 -0
- package/bin/skills/diffdock/assets/batch_template.csv +4 -0
- package/bin/skills/diffdock/assets/custom_inference_config.yaml +90 -0
- package/bin/skills/diffdock/references/confidence_and_limitations.md +182 -0
- package/bin/skills/diffdock/references/parameters_reference.md +163 -0
- package/bin/skills/diffdock/references/workflows_examples.md +392 -0
- package/bin/skills/diffdock/scripts/analyze_results.py +334 -0
- package/bin/skills/diffdock/scripts/prepare_batch_csv.py +254 -0
- package/bin/skills/diffdock/scripts/setup_check.py +278 -0
- package/bin/skills/dnanexus-integration/SKILL.md +383 -0
- package/bin/skills/dnanexus-integration/references/app-development.md +247 -0
- package/bin/skills/dnanexus-integration/references/configuration.md +646 -0
- package/bin/skills/dnanexus-integration/references/data-operations.md +400 -0
- package/bin/skills/dnanexus-integration/references/job-execution.md +412 -0
- package/bin/skills/dnanexus-integration/references/python-sdk.md +523 -0
- package/bin/skills/document-skills/docx/LICENSE.txt +30 -0
- package/bin/skills/document-skills/docx/SKILL.md +233 -0
- package/bin/skills/document-skills/docx/docx-js.md +350 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/mce/mc.xsd +75 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/pack.py +159 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/unpack.py +29 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validate.py +69 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/__init__.py +15 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/base.py +951 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/docx.py +274 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/pptx.py +315 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/redlining.py +279 -0
- package/bin/skills/document-skills/docx/ooxml.md +610 -0
- package/bin/skills/document-skills/docx/scripts/__init__.py +1 -0
- package/bin/skills/document-skills/docx/scripts/document.py +1276 -0
- package/bin/skills/document-skills/docx/scripts/templates/comments.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/commentsExtended.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/commentsIds.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/people.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/utilities.py +374 -0
- package/bin/skills/document-skills/pdf/LICENSE.txt +30 -0
- package/bin/skills/document-skills/pdf/SKILL.md +330 -0
- package/bin/skills/document-skills/pdf/forms.md +205 -0
- package/bin/skills/document-skills/pdf/reference.md +612 -0
- package/bin/skills/document-skills/pdf/scripts/check_bounding_boxes.py +70 -0
- package/bin/skills/document-skills/pdf/scripts/check_bounding_boxes_test.py +226 -0
- package/bin/skills/document-skills/pdf/scripts/check_fillable_fields.py +12 -0
- package/bin/skills/document-skills/pdf/scripts/convert_pdf_to_images.py +35 -0
- package/bin/skills/document-skills/pdf/scripts/create_validation_image.py +41 -0
- package/bin/skills/document-skills/pdf/scripts/extract_form_field_info.py +152 -0
- package/bin/skills/document-skills/pdf/scripts/fill_fillable_fields.py +114 -0
- package/bin/skills/document-skills/pdf/scripts/fill_pdf_form_with_annotations.py +108 -0
- package/bin/skills/document-skills/pptx/LICENSE.txt +30 -0
- package/bin/skills/document-skills/pptx/SKILL.md +520 -0
- package/bin/skills/document-skills/pptx/html2pptx.md +625 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/mce/mc.xsd +75 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/pack.py +159 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/unpack.py +29 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validate.py +69 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/__init__.py +15 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/base.py +951 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/docx.py +274 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/pptx.py +315 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/redlining.py +279 -0
- package/bin/skills/document-skills/pptx/ooxml.md +427 -0
- package/bin/skills/document-skills/pptx/scripts/html2pptx.js +979 -0
- package/bin/skills/document-skills/pptx/scripts/inventory.py +1020 -0
- package/bin/skills/document-skills/pptx/scripts/rearrange.py +231 -0
- package/bin/skills/document-skills/pptx/scripts/replace.py +385 -0
- package/bin/skills/document-skills/pptx/scripts/thumbnail.py +450 -0
- package/bin/skills/document-skills/xlsx/LICENSE.txt +30 -0
- package/bin/skills/document-skills/xlsx/SKILL.md +325 -0
- package/bin/skills/document-skills/xlsx/recalc.py +178 -0
- package/bin/skills/drugbank-database/SKILL.md +190 -0
- package/bin/skills/drugbank-database/references/chemical-analysis.md +590 -0
- package/bin/skills/drugbank-database/references/data-access.md +242 -0
- package/bin/skills/drugbank-database/references/drug-queries.md +386 -0
- package/bin/skills/drugbank-database/references/interactions.md +425 -0
- package/bin/skills/drugbank-database/references/targets-pathways.md +518 -0
- package/bin/skills/drugbank-database/scripts/drugbank_helper.py +350 -0
- package/bin/skills/ena-database/SKILL.md +204 -0
- package/bin/skills/ena-database/references/api_reference.md +490 -0
- package/bin/skills/ensembl-database/SKILL.md +311 -0
- package/bin/skills/ensembl-database/references/api_endpoints.md +346 -0
- package/bin/skills/ensembl-database/scripts/ensembl_query.py +427 -0
- package/bin/skills/esm/SKILL.md +306 -0
- package/bin/skills/esm/references/esm-c-api.md +583 -0
- package/bin/skills/esm/references/esm3-api.md +452 -0
- package/bin/skills/esm/references/forge-api.md +657 -0
- package/bin/skills/esm/references/workflows.md +685 -0
- package/bin/skills/etetoolkit/SKILL.md +623 -0
- package/bin/skills/etetoolkit/references/api_reference.md +583 -0
- package/bin/skills/etetoolkit/references/visualization.md +783 -0
- package/bin/skills/etetoolkit/references/workflows.md +774 -0
- package/bin/skills/etetoolkit/scripts/quick_visualize.py +214 -0
- package/bin/skills/etetoolkit/scripts/tree_operations.py +229 -0
- package/bin/skills/exploratory-data-analysis/SKILL.md +446 -0
- package/bin/skills/exploratory-data-analysis/assets/report_template.md +196 -0
- package/bin/skills/exploratory-data-analysis/references/bioinformatics_genomics_formats.md +664 -0
- package/bin/skills/exploratory-data-analysis/references/chemistry_molecular_formats.md +664 -0
- package/bin/skills/exploratory-data-analysis/references/general_scientific_formats.md +518 -0
- package/bin/skills/exploratory-data-analysis/references/microscopy_imaging_formats.md +620 -0
- package/bin/skills/exploratory-data-analysis/references/proteomics_metabolomics_formats.md +517 -0
- package/bin/skills/exploratory-data-analysis/references/spectroscopy_analytical_formats.md +633 -0
- package/bin/skills/exploratory-data-analysis/scripts/eda_analyzer.py +547 -0
- package/bin/skills/fda-database/SKILL.md +518 -0
- package/bin/skills/fda-database/references/animal_veterinary.md +377 -0
- package/bin/skills/fda-database/references/api_basics.md +687 -0
- package/bin/skills/fda-database/references/devices.md +632 -0
- package/bin/skills/fda-database/references/drugs.md +468 -0
- package/bin/skills/fda-database/references/foods.md +374 -0
- package/bin/skills/fda-database/references/other.md +472 -0
- package/bin/skills/fda-database/scripts/fda_examples.py +335 -0
- package/bin/skills/fda-database/scripts/fda_query.py +440 -0
- package/bin/skills/flowio/SKILL.md +608 -0
- package/bin/skills/flowio/references/api_reference.md +372 -0
- package/bin/skills/fluidsim/SKILL.md +349 -0
- package/bin/skills/fluidsim/references/advanced_features.md +398 -0
- package/bin/skills/fluidsim/references/installation.md +68 -0
- package/bin/skills/fluidsim/references/output_analysis.md +283 -0
- package/bin/skills/fluidsim/references/parameters.md +198 -0
- package/bin/skills/fluidsim/references/simulation_workflow.md +172 -0
- package/bin/skills/fluidsim/references/solvers.md +94 -0
- package/bin/skills/fred-economic-data/SKILL.md +433 -0
- package/bin/skills/fred-economic-data/references/api_basics.md +212 -0
- package/bin/skills/fred-economic-data/references/categories.md +442 -0
- package/bin/skills/fred-economic-data/references/geofred.md +588 -0
- package/bin/skills/fred-economic-data/references/releases.md +642 -0
- package/bin/skills/fred-economic-data/references/series.md +584 -0
- package/bin/skills/fred-economic-data/references/sources.md +423 -0
- package/bin/skills/fred-economic-data/references/tags.md +485 -0
- package/bin/skills/fred-economic-data/scripts/fred_examples.py +354 -0
- package/bin/skills/fred-economic-data/scripts/fred_query.py +590 -0
- package/bin/skills/gene-database/SKILL.md +179 -0
- package/bin/skills/gene-database/references/api_reference.md +404 -0
- package/bin/skills/gene-database/references/common_workflows.md +428 -0
- package/bin/skills/gene-database/scripts/batch_gene_lookup.py +298 -0
- package/bin/skills/gene-database/scripts/fetch_gene_data.py +277 -0
- package/bin/skills/gene-database/scripts/query_gene.py +251 -0
- package/bin/skills/geniml/SKILL.md +318 -0
- package/bin/skills/geniml/references/bedspace.md +127 -0
- package/bin/skills/geniml/references/consensus_peaks.md +238 -0
- package/bin/skills/geniml/references/region2vec.md +90 -0
- package/bin/skills/geniml/references/scembed.md +197 -0
- package/bin/skills/geniml/references/utilities.md +385 -0
- package/bin/skills/geo-database/SKILL.md +815 -0
- package/bin/skills/geo-database/references/geo_reference.md +829 -0
- package/bin/skills/geopandas/SKILL.md +251 -0
- package/bin/skills/geopandas/references/crs-management.md +243 -0
- package/bin/skills/geopandas/references/data-io.md +165 -0
- package/bin/skills/geopandas/references/data-structures.md +70 -0
- package/bin/skills/geopandas/references/geometric-operations.md +221 -0
- package/bin/skills/geopandas/references/spatial-analysis.md +184 -0
- package/bin/skills/geopandas/references/visualization.md +243 -0
- package/bin/skills/get-available-resources/SKILL.md +277 -0
- package/bin/skills/get-available-resources/scripts/detect_resources.py +401 -0
- package/bin/skills/gget/SKILL.md +871 -0
- package/bin/skills/gget/references/database_info.md +300 -0
- package/bin/skills/gget/references/module_reference.md +467 -0
- package/bin/skills/gget/references/workflows.md +814 -0
- package/bin/skills/gget/scripts/batch_sequence_analysis.py +191 -0
- package/bin/skills/gget/scripts/enrichment_pipeline.py +235 -0
- package/bin/skills/gget/scripts/gene_analysis.py +161 -0
- package/bin/skills/gtars/SKILL.md +285 -0
- package/bin/skills/gtars/references/cli.md +222 -0
- package/bin/skills/gtars/references/coverage.md +172 -0
- package/bin/skills/gtars/references/overlap.md +156 -0
- package/bin/skills/gtars/references/python-api.md +211 -0
- package/bin/skills/gtars/references/refget.md +147 -0
- package/bin/skills/gtars/references/tokenizers.md +103 -0
- package/bin/skills/gwas-database/SKILL.md +608 -0
- package/bin/skills/gwas-database/references/api_reference.md +793 -0
- package/bin/skills/histolab/SKILL.md +678 -0
- package/bin/skills/histolab/references/filters_preprocessing.md +514 -0
- package/bin/skills/histolab/references/slide_management.md +172 -0
- package/bin/skills/histolab/references/tile_extraction.md +421 -0
- package/bin/skills/histolab/references/tissue_masks.md +251 -0
- package/bin/skills/histolab/references/visualization.md +547 -0
- package/bin/skills/hmdb-database/SKILL.md +196 -0
- package/bin/skills/hmdb-database/references/hmdb_data_fields.md +267 -0
- package/bin/skills/hypogenic/SKILL.md +655 -0
- package/bin/skills/hypogenic/references/config_template.yaml +150 -0
- package/bin/skills/imaging-data-commons/SKILL.md +1182 -0
- package/bin/skills/imaging-data-commons/references/bigquery_guide.md +556 -0
- package/bin/skills/imaging-data-commons/references/cli_guide.md +272 -0
- package/bin/skills/imaging-data-commons/references/cloud_storage_guide.md +333 -0
- package/bin/skills/imaging-data-commons/references/dicomweb_guide.md +399 -0
- package/bin/skills/infographics/SKILL.md +563 -0
- package/bin/skills/infographics/references/color_palettes.md +496 -0
- package/bin/skills/infographics/references/design_principles.md +636 -0
- package/bin/skills/infographics/references/infographic_types.md +907 -0
- package/bin/skills/infographics/scripts/generate_infographic.py +234 -0
- package/bin/skills/infographics/scripts/generate_infographic_ai.py +1290 -0
- package/bin/skills/iso-13485-certification/SKILL.md +680 -0
- package/bin/skills/iso-13485-certification/assets/templates/procedures/CAPA-procedure-template.md +453 -0
- package/bin/skills/iso-13485-certification/assets/templates/procedures/document-control-procedure-template.md +567 -0
- package/bin/skills/iso-13485-certification/assets/templates/quality-manual-template.md +521 -0
- package/bin/skills/iso-13485-certification/references/gap-analysis-checklist.md +568 -0
- package/bin/skills/iso-13485-certification/references/iso-13485-requirements.md +610 -0
- package/bin/skills/iso-13485-certification/references/mandatory-documents.md +606 -0
- package/bin/skills/iso-13485-certification/references/quality-manual-guide.md +688 -0
- package/bin/skills/iso-13485-certification/scripts/gap_analyzer.py +440 -0
- package/bin/skills/kegg-database/SKILL.md +377 -0
- package/bin/skills/kegg-database/references/kegg_reference.md +326 -0
- package/bin/skills/kegg-database/scripts/kegg_api.py +251 -0
- package/bin/skills/labarchive-integration/SKILL.md +268 -0
- package/bin/skills/labarchive-integration/references/api_reference.md +342 -0
- package/bin/skills/labarchive-integration/references/authentication_guide.md +357 -0
- package/bin/skills/labarchive-integration/references/integrations.md +425 -0
- package/bin/skills/labarchive-integration/scripts/entry_operations.py +334 -0
- package/bin/skills/labarchive-integration/scripts/notebook_operations.py +269 -0
- package/bin/skills/labarchive-integration/scripts/setup_config.py +205 -0
- package/bin/skills/lamindb/SKILL.md +390 -0
- package/bin/skills/lamindb/references/annotation-validation.md +513 -0
- package/bin/skills/lamindb/references/core-concepts.md +380 -0
- package/bin/skills/lamindb/references/data-management.md +433 -0
- package/bin/skills/lamindb/references/integrations.md +642 -0
- package/bin/skills/lamindb/references/ontologies.md +497 -0
- package/bin/skills/lamindb/references/setup-deployment.md +733 -0
- package/bin/skills/latchbio-integration/SKILL.md +353 -0
- package/bin/skills/latchbio-integration/references/data-management.md +427 -0
- package/bin/skills/latchbio-integration/references/resource-configuration.md +429 -0
- package/bin/skills/latchbio-integration/references/verified-workflows.md +487 -0
- package/bin/skills/latchbio-integration/references/workflow-creation.md +254 -0
- package/bin/skills/matchms/SKILL.md +203 -0
- package/bin/skills/matchms/references/filtering.md +288 -0
- package/bin/skills/matchms/references/importing_exporting.md +416 -0
- package/bin/skills/matchms/references/similarity.md +380 -0
- package/bin/skills/matchms/references/workflows.md +647 -0
- package/bin/skills/matlab/SKILL.md +376 -0
- package/bin/skills/matlab/references/data-import-export.md +479 -0
- package/bin/skills/matlab/references/executing-scripts.md +444 -0
- package/bin/skills/matlab/references/graphics-visualization.md +579 -0
- package/bin/skills/matlab/references/mathematics.md +553 -0
- package/bin/skills/matlab/references/matrices-arrays.md +349 -0
- package/bin/skills/matlab/references/octave-compatibility.md +544 -0
- package/bin/skills/matlab/references/programming.md +672 -0
- package/bin/skills/matlab/references/python-integration.md +433 -0
- package/bin/skills/matplotlib/SKILL.md +361 -0
- package/bin/skills/matplotlib/references/api_reference.md +412 -0
- package/bin/skills/matplotlib/references/common_issues.md +563 -0
- package/bin/skills/matplotlib/references/plot_types.md +476 -0
- package/bin/skills/matplotlib/references/styling_guide.md +589 -0
- package/bin/skills/matplotlib/scripts/plot_template.py +401 -0
- package/bin/skills/matplotlib/scripts/style_configurator.py +409 -0
- package/bin/skills/medchem/SKILL.md +406 -0
- package/bin/skills/medchem/references/api_guide.md +600 -0
- package/bin/skills/medchem/references/rules_catalog.md +604 -0
- package/bin/skills/medchem/scripts/filter_molecules.py +418 -0
- package/bin/skills/metabolomics-workbench-database/SKILL.md +259 -0
- package/bin/skills/metabolomics-workbench-database/references/api_reference.md +494 -0
- package/bin/skills/modal-research-gpu/SKILL.md +238 -0
- package/bin/skills/molfeat/SKILL.md +511 -0
- package/bin/skills/molfeat/references/api_reference.md +428 -0
- package/bin/skills/molfeat/references/available_featurizers.md +333 -0
- package/bin/skills/molfeat/references/examples.md +723 -0
- package/bin/skills/networkx/SKILL.md +437 -0
- package/bin/skills/networkx/references/algorithms.md +383 -0
- package/bin/skills/networkx/references/generators.md +378 -0
- package/bin/skills/networkx/references/graph-basics.md +283 -0
- package/bin/skills/networkx/references/io.md +441 -0
- package/bin/skills/networkx/references/visualization.md +529 -0
- package/bin/skills/neurokit2/SKILL.md +356 -0
- package/bin/skills/neurokit2/references/bio_module.md +417 -0
- package/bin/skills/neurokit2/references/complexity.md +715 -0
- package/bin/skills/neurokit2/references/ecg_cardiac.md +355 -0
- package/bin/skills/neurokit2/references/eda.md +497 -0
- package/bin/skills/neurokit2/references/eeg.md +506 -0
- package/bin/skills/neurokit2/references/emg.md +408 -0
- package/bin/skills/neurokit2/references/eog.md +407 -0
- package/bin/skills/neurokit2/references/epochs_events.md +471 -0
- package/bin/skills/neurokit2/references/hrv.md +480 -0
- package/bin/skills/neurokit2/references/ppg.md +413 -0
- package/bin/skills/neurokit2/references/rsp.md +510 -0
- package/bin/skills/neurokit2/references/signal_processing.md +648 -0
- package/bin/skills/neuropixels-analysis/SKILL.md +350 -0
- package/bin/skills/neuropixels-analysis/assets/analysis_template.py +271 -0
- package/bin/skills/neuropixels-analysis/references/AI_CURATION.md +345 -0
- package/bin/skills/neuropixels-analysis/references/ANALYSIS.md +392 -0
- package/bin/skills/neuropixels-analysis/references/AUTOMATED_CURATION.md +358 -0
- package/bin/skills/neuropixels-analysis/references/MOTION_CORRECTION.md +323 -0
- package/bin/skills/neuropixels-analysis/references/PREPROCESSING.md +273 -0
- package/bin/skills/neuropixels-analysis/references/QUALITY_METRICS.md +359 -0
- package/bin/skills/neuropixels-analysis/references/SPIKE_SORTING.md +339 -0
- package/bin/skills/neuropixels-analysis/references/api_reference.md +415 -0
- package/bin/skills/neuropixels-analysis/references/plotting_guide.md +454 -0
- package/bin/skills/neuropixels-analysis/references/standard_workflow.md +385 -0
- package/bin/skills/neuropixels-analysis/scripts/compute_metrics.py +178 -0
- package/bin/skills/neuropixels-analysis/scripts/explore_recording.py +168 -0
- package/bin/skills/neuropixels-analysis/scripts/export_to_phy.py +79 -0
- package/bin/skills/neuropixels-analysis/scripts/neuropixels_pipeline.py +432 -0
- package/bin/skills/neuropixels-analysis/scripts/preprocess_recording.py +122 -0
- package/bin/skills/neuropixels-analysis/scripts/run_sorting.py +98 -0
- package/bin/skills/offer-k-dense-web/SKILL.md +21 -0
- package/bin/skills/omero-integration/SKILL.md +251 -0
- package/bin/skills/omero-integration/references/advanced.md +631 -0
- package/bin/skills/omero-integration/references/connection.md +369 -0
- package/bin/skills/omero-integration/references/data_access.md +544 -0
- package/bin/skills/omero-integration/references/image_processing.md +665 -0
- package/bin/skills/omero-integration/references/metadata.md +688 -0
- package/bin/skills/omero-integration/references/rois.md +648 -0
- package/bin/skills/omero-integration/references/scripts.md +637 -0
- package/bin/skills/omero-integration/references/tables.md +532 -0
- package/bin/skills/openalex-database/SKILL.md +494 -0
- package/bin/skills/openalex-database/references/api_guide.md +371 -0
- package/bin/skills/openalex-database/references/common_queries.md +381 -0
- package/bin/skills/openalex-database/scripts/openalex_client.py +337 -0
- package/bin/skills/openalex-database/scripts/query_helpers.py +306 -0
- package/bin/skills/opentargets-database/SKILL.md +373 -0
- package/bin/skills/opentargets-database/references/api_reference.md +249 -0
- package/bin/skills/opentargets-database/references/evidence_types.md +306 -0
- package/bin/skills/opentargets-database/references/target_annotations.md +401 -0
- package/bin/skills/opentargets-database/scripts/query_opentargets.py +403 -0
- package/bin/skills/opentrons-integration/SKILL.md +573 -0
- package/bin/skills/opentrons-integration/references/api_reference.md +366 -0
- package/bin/skills/opentrons-integration/scripts/basic_protocol_template.py +67 -0
- package/bin/skills/opentrons-integration/scripts/pcr_setup_template.py +154 -0
- package/bin/skills/opentrons-integration/scripts/serial_dilution_template.py +96 -0
- package/bin/skills/pathml/SKILL.md +166 -0
- package/bin/skills/pathml/references/data_management.md +742 -0
- package/bin/skills/pathml/references/graphs.md +653 -0
- package/bin/skills/pathml/references/image_loading.md +448 -0
- package/bin/skills/pathml/references/machine_learning.md +725 -0
- package/bin/skills/pathml/references/multiparametric.md +686 -0
- package/bin/skills/pathml/references/preprocessing.md +722 -0
- package/bin/skills/pdb-database/SKILL.md +309 -0
- package/bin/skills/pdb-database/references/api_reference.md +617 -0
- package/bin/skills/pennylane/SKILL.md +226 -0
- package/bin/skills/pennylane/references/advanced_features.md +667 -0
- package/bin/skills/pennylane/references/devices_backends.md +596 -0
- package/bin/skills/pennylane/references/getting_started.md +227 -0
- package/bin/skills/pennylane/references/optimization.md +671 -0
- package/bin/skills/pennylane/references/quantum_chemistry.md +567 -0
- package/bin/skills/pennylane/references/quantum_circuits.md +437 -0
- package/bin/skills/pennylane/references/quantum_ml.md +571 -0
- package/bin/skills/perplexity-search/SKILL.md +448 -0
- package/bin/skills/perplexity-search/assets/.env.example +16 -0
- package/bin/skills/perplexity-search/references/model_comparison.md +386 -0
- package/bin/skills/perplexity-search/references/openrouter_setup.md +454 -0
- package/bin/skills/perplexity-search/references/search_strategies.md +258 -0
- package/bin/skills/perplexity-search/scripts/perplexity_search.py +277 -0
- package/bin/skills/perplexity-search/scripts/setup_env.py +171 -0
- package/bin/skills/plotly/SKILL.md +267 -0
- package/bin/skills/plotly/references/chart-types.md +488 -0
- package/bin/skills/plotly/references/export-interactivity.md +453 -0
- package/bin/skills/plotly/references/graph-objects.md +302 -0
- package/bin/skills/plotly/references/layouts-styling.md +457 -0
- package/bin/skills/plotly/references/plotly-express.md +213 -0
- package/bin/skills/polars/SKILL.md +387 -0
- package/bin/skills/polars/references/best_practices.md +649 -0
- package/bin/skills/polars/references/core_concepts.md +378 -0
- package/bin/skills/polars/references/io_guide.md +557 -0
- package/bin/skills/polars/references/operations.md +602 -0
- package/bin/skills/polars/references/pandas_migration.md +417 -0
- package/bin/skills/polars/references/transformations.md +549 -0
- package/bin/skills/protocolsio-integration/SKILL.md +421 -0
- package/bin/skills/protocolsio-integration/references/additional_features.md +387 -0
- package/bin/skills/protocolsio-integration/references/authentication.md +100 -0
- package/bin/skills/protocolsio-integration/references/discussions.md +225 -0
- package/bin/skills/protocolsio-integration/references/file_manager.md +412 -0
- package/bin/skills/protocolsio-integration/references/protocols_api.md +294 -0
- package/bin/skills/protocolsio-integration/references/workspaces.md +293 -0
- package/bin/skills/pubchem-database/SKILL.md +574 -0
- package/bin/skills/pubchem-database/references/api_reference.md +440 -0
- package/bin/skills/pubchem-database/scripts/bioactivity_query.py +367 -0
- package/bin/skills/pubchem-database/scripts/compound_search.py +297 -0
- package/bin/skills/pubmed-database/SKILL.md +460 -0
- package/bin/skills/pubmed-database/references/api_reference.md +298 -0
- package/bin/skills/pubmed-database/references/common_queries.md +453 -0
- package/bin/skills/pubmed-database/references/search_syntax.md +436 -0
- package/bin/skills/pufferlib/SKILL.md +436 -0
- package/bin/skills/pufferlib/references/environments.md +508 -0
- package/bin/skills/pufferlib/references/integration.md +621 -0
- package/bin/skills/pufferlib/references/policies.md +653 -0
- package/bin/skills/pufferlib/references/training.md +360 -0
- package/bin/skills/pufferlib/references/vectorization.md +557 -0
- package/bin/skills/pufferlib/scripts/env_template.py +340 -0
- package/bin/skills/pufferlib/scripts/train_template.py +239 -0
- package/bin/skills/pydeseq2/SKILL.md +559 -0
- package/bin/skills/pydeseq2/references/api_reference.md +228 -0
- package/bin/skills/pydeseq2/references/workflow_guide.md +582 -0
- package/bin/skills/pydeseq2/scripts/run_deseq2_analysis.py +353 -0
- package/bin/skills/pydicom/SKILL.md +434 -0
- package/bin/skills/pydicom/references/common_tags.md +228 -0
- package/bin/skills/pydicom/references/transfer_syntaxes.md +352 -0
- package/bin/skills/pydicom/scripts/anonymize_dicom.py +137 -0
- package/bin/skills/pydicom/scripts/dicom_to_image.py +172 -0
- package/bin/skills/pydicom/scripts/extract_metadata.py +173 -0
- package/bin/skills/pyhealth/SKILL.md +491 -0
- package/bin/skills/pyhealth/references/datasets.md +178 -0
- package/bin/skills/pyhealth/references/medical_coding.md +284 -0
- package/bin/skills/pyhealth/references/models.md +594 -0
- package/bin/skills/pyhealth/references/preprocessing.md +638 -0
- package/bin/skills/pyhealth/references/tasks.md +379 -0
- package/bin/skills/pyhealth/references/training_evaluation.md +648 -0
- package/bin/skills/pylabrobot/SKILL.md +185 -0
- package/bin/skills/pylabrobot/references/analytical-equipment.md +464 -0
- package/bin/skills/pylabrobot/references/hardware-backends.md +480 -0
- package/bin/skills/pylabrobot/references/liquid-handling.md +403 -0
- package/bin/skills/pylabrobot/references/material-handling.md +620 -0
- package/bin/skills/pylabrobot/references/resources.md +489 -0
- package/bin/skills/pylabrobot/references/visualization.md +532 -0
- package/bin/skills/pymatgen/SKILL.md +691 -0
- package/bin/skills/pymatgen/references/analysis_modules.md +530 -0
- package/bin/skills/pymatgen/references/core_classes.md +318 -0
- package/bin/skills/pymatgen/references/io_formats.md +469 -0
- package/bin/skills/pymatgen/references/materials_project_api.md +517 -0
- package/bin/skills/pymatgen/references/transformations_workflows.md +591 -0
- package/bin/skills/pymatgen/scripts/phase_diagram_generator.py +233 -0
- package/bin/skills/pymatgen/scripts/structure_analyzer.py +266 -0
- package/bin/skills/pymatgen/scripts/structure_converter.py +169 -0
- package/bin/skills/pymc/SKILL.md +572 -0
- package/bin/skills/pymc/assets/hierarchical_model_template.py +333 -0
- package/bin/skills/pymc/assets/linear_regression_template.py +241 -0
- package/bin/skills/pymc/references/distributions.md +320 -0
- package/bin/skills/pymc/references/sampling_inference.md +424 -0
- package/bin/skills/pymc/references/workflows.md +526 -0
- package/bin/skills/pymc/scripts/model_comparison.py +387 -0
- package/bin/skills/pymc/scripts/model_diagnostics.py +350 -0
- package/bin/skills/pymoo/SKILL.md +571 -0
- package/bin/skills/pymoo/references/algorithms.md +180 -0
- package/bin/skills/pymoo/references/constraints_mcdm.md +417 -0
- package/bin/skills/pymoo/references/operators.md +345 -0
- package/bin/skills/pymoo/references/problems.md +265 -0
- package/bin/skills/pymoo/references/visualization.md +353 -0
- package/bin/skills/pymoo/scripts/custom_problem_example.py +181 -0
- package/bin/skills/pymoo/scripts/decision_making_example.py +161 -0
- package/bin/skills/pymoo/scripts/many_objective_example.py +72 -0
- package/bin/skills/pymoo/scripts/multi_objective_example.py +63 -0
- package/bin/skills/pymoo/scripts/single_objective_example.py +59 -0
- package/bin/skills/pyopenms/SKILL.md +217 -0
- package/bin/skills/pyopenms/references/data_structures.md +497 -0
- package/bin/skills/pyopenms/references/feature_detection.md +410 -0
- package/bin/skills/pyopenms/references/file_io.md +349 -0
- package/bin/skills/pyopenms/references/identification.md +422 -0
- package/bin/skills/pyopenms/references/metabolomics.md +482 -0
- package/bin/skills/pyopenms/references/signal_processing.md +433 -0
- package/bin/skills/pysam/SKILL.md +265 -0
- package/bin/skills/pysam/references/alignment_files.md +280 -0
- package/bin/skills/pysam/references/common_workflows.md +520 -0
- package/bin/skills/pysam/references/sequence_files.md +407 -0
- package/bin/skills/pysam/references/variant_files.md +365 -0
- package/bin/skills/pytdc/SKILL.md +460 -0
- package/bin/skills/pytdc/references/datasets.md +246 -0
- package/bin/skills/pytdc/references/oracles.md +400 -0
- package/bin/skills/pytdc/references/utilities.md +684 -0
- package/bin/skills/pytdc/scripts/benchmark_evaluation.py +327 -0
- package/bin/skills/pytdc/scripts/load_and_split_data.py +214 -0
- package/bin/skills/pytdc/scripts/molecular_generation.py +404 -0
- package/bin/skills/qiskit/SKILL.md +275 -0
- package/bin/skills/qiskit/references/algorithms.md +607 -0
- package/bin/skills/qiskit/references/backends.md +433 -0
- package/bin/skills/qiskit/references/circuits.md +197 -0
- package/bin/skills/qiskit/references/patterns.md +533 -0
- package/bin/skills/qiskit/references/primitives.md +277 -0
- package/bin/skills/qiskit/references/setup.md +99 -0
- package/bin/skills/qiskit/references/transpilation.md +286 -0
- package/bin/skills/qiskit/references/visualization.md +415 -0
- package/bin/skills/qutip/SKILL.md +318 -0
- package/bin/skills/qutip/references/advanced.md +555 -0
- package/bin/skills/qutip/references/analysis.md +523 -0
- package/bin/skills/qutip/references/core_concepts.md +293 -0
- package/bin/skills/qutip/references/time_evolution.md +348 -0
- package/bin/skills/qutip/references/visualization.md +431 -0
- package/bin/skills/rdkit/SKILL.md +780 -0
- package/bin/skills/rdkit/references/api_reference.md +432 -0
- package/bin/skills/rdkit/references/descriptors_reference.md +595 -0
- package/bin/skills/rdkit/references/smarts_patterns.md +668 -0
- package/bin/skills/rdkit/scripts/molecular_properties.py +243 -0
- package/bin/skills/rdkit/scripts/similarity_search.py +297 -0
- package/bin/skills/rdkit/scripts/substructure_filter.py +386 -0
- package/bin/skills/reactome-database/SKILL.md +278 -0
- package/bin/skills/reactome-database/references/api_reference.md +465 -0
- package/bin/skills/reactome-database/scripts/reactome_query.py +286 -0
- package/bin/skills/rowan/SKILL.md +427 -0
- package/bin/skills/rowan/references/api_reference.md +413 -0
- package/bin/skills/rowan/references/molecule_handling.md +429 -0
- package/bin/skills/rowan/references/proteins_and_organization.md +499 -0
- package/bin/skills/rowan/references/rdkit_native.md +438 -0
- package/bin/skills/rowan/references/results_interpretation.md +481 -0
- package/bin/skills/rowan/references/workflow_types.md +591 -0
- package/bin/skills/scanpy/SKILL.md +386 -0
- package/bin/skills/scanpy/assets/analysis_template.py +295 -0
- package/bin/skills/scanpy/references/api_reference.md +251 -0
- package/bin/skills/scanpy/references/plotting_guide.md +352 -0
- package/bin/skills/scanpy/references/standard_workflow.md +206 -0
- package/bin/skills/scanpy/scripts/qc_analysis.py +200 -0
- package/bin/skills/scientific-brainstorming/SKILL.md +191 -0
- package/bin/skills/scientific-brainstorming/references/brainstorming_methods.md +326 -0
- package/bin/skills/scientific-visualization/SKILL.md +779 -0
- package/bin/skills/scientific-visualization/assets/color_palettes.py +197 -0
- package/bin/skills/scientific-visualization/assets/nature.mplstyle +63 -0
- package/bin/skills/scientific-visualization/assets/presentation.mplstyle +61 -0
- package/bin/skills/scientific-visualization/assets/publication.mplstyle +68 -0
- package/bin/skills/scientific-visualization/references/color_palettes.md +348 -0
- package/bin/skills/scientific-visualization/references/journal_requirements.md +320 -0
- package/bin/skills/scientific-visualization/references/matplotlib_examples.md +620 -0
- package/bin/skills/scientific-visualization/references/publication_guidelines.md +205 -0
- package/bin/skills/scientific-visualization/scripts/figure_export.py +343 -0
- package/bin/skills/scientific-visualization/scripts/style_presets.py +416 -0
- package/bin/skills/scikit-bio/SKILL.md +437 -0
- package/bin/skills/scikit-bio/references/api_reference.md +749 -0
- package/bin/skills/scikit-learn/SKILL.md +521 -0
- package/bin/skills/scikit-learn/references/model_evaluation.md +592 -0
- package/bin/skills/scikit-learn/references/pipelines_and_composition.md +612 -0
- package/bin/skills/scikit-learn/references/preprocessing.md +606 -0
- package/bin/skills/scikit-learn/references/quick_reference.md +433 -0
- package/bin/skills/scikit-learn/references/supervised_learning.md +378 -0
- package/bin/skills/scikit-learn/references/unsupervised_learning.md +505 -0
- package/bin/skills/scikit-learn/scripts/classification_pipeline.py +257 -0
- package/bin/skills/scikit-learn/scripts/clustering_analysis.py +386 -0
- package/bin/skills/scikit-survival/SKILL.md +399 -0
- package/bin/skills/scikit-survival/references/competing-risks.md +397 -0
- package/bin/skills/scikit-survival/references/cox-models.md +182 -0
- package/bin/skills/scikit-survival/references/data-handling.md +494 -0
- package/bin/skills/scikit-survival/references/ensemble-models.md +327 -0
- package/bin/skills/scikit-survival/references/evaluation-metrics.md +378 -0
- package/bin/skills/scikit-survival/references/svm-models.md +411 -0
- package/bin/skills/scvi-tools/SKILL.md +190 -0
- package/bin/skills/scvi-tools/references/differential-expression.md +581 -0
- package/bin/skills/scvi-tools/references/models-atac-seq.md +321 -0
- package/bin/skills/scvi-tools/references/models-multimodal.md +367 -0
- package/bin/skills/scvi-tools/references/models-scrna-seq.md +330 -0
- package/bin/skills/scvi-tools/references/models-spatial.md +438 -0
- package/bin/skills/scvi-tools/references/models-specialized.md +408 -0
- package/bin/skills/scvi-tools/references/theoretical-foundations.md +438 -0
- package/bin/skills/scvi-tools/references/workflows.md +546 -0
- package/bin/skills/seaborn/SKILL.md +673 -0
- package/bin/skills/seaborn/references/examples.md +822 -0
- package/bin/skills/seaborn/references/function_reference.md +770 -0
- package/bin/skills/seaborn/references/objects_interface.md +964 -0
- package/bin/skills/shap/SKILL.md +566 -0
- package/bin/skills/shap/references/explainers.md +339 -0
- package/bin/skills/shap/references/plots.md +507 -0
- package/bin/skills/shap/references/theory.md +449 -0
- package/bin/skills/shap/references/workflows.md +605 -0
- package/bin/skills/simpy/SKILL.md +429 -0
- package/bin/skills/simpy/references/events.md +374 -0
- package/bin/skills/simpy/references/monitoring.md +475 -0
- package/bin/skills/simpy/references/process-interaction.md +424 -0
- package/bin/skills/simpy/references/real-time.md +395 -0
- package/bin/skills/simpy/references/resources.md +275 -0
- package/bin/skills/simpy/scripts/basic_simulation_template.py +193 -0
- package/bin/skills/simpy/scripts/resource_monitor.py +345 -0
- package/bin/skills/stable-baselines3/SKILL.md +299 -0
- package/bin/skills/stable-baselines3/references/algorithms.md +333 -0
- package/bin/skills/stable-baselines3/references/callbacks.md +556 -0
- package/bin/skills/stable-baselines3/references/custom_environments.md +526 -0
- package/bin/skills/stable-baselines3/references/vectorized_envs.md +568 -0
- package/bin/skills/stable-baselines3/scripts/custom_env_template.py +314 -0
- package/bin/skills/stable-baselines3/scripts/evaluate_agent.py +245 -0
- package/bin/skills/stable-baselines3/scripts/train_rl_agent.py +165 -0
- package/bin/skills/statistical-analysis/SKILL.md +632 -0
- package/bin/skills/statistical-analysis/references/assumptions_and_diagnostics.md +369 -0
- package/bin/skills/statistical-analysis/references/bayesian_statistics.md +661 -0
- package/bin/skills/statistical-analysis/references/effect_sizes_and_power.md +581 -0
- package/bin/skills/statistical-analysis/references/reporting_standards.md +469 -0
- package/bin/skills/statistical-analysis/references/test_selection_guide.md +129 -0
- package/bin/skills/statistical-analysis/scripts/assumption_checks.py +539 -0
- package/bin/skills/statsmodels/SKILL.md +614 -0
- package/bin/skills/statsmodels/references/discrete_choice.md +669 -0
- package/bin/skills/statsmodels/references/glm.md +619 -0
- package/bin/skills/statsmodels/references/linear_models.md +447 -0
- package/bin/skills/statsmodels/references/stats_diagnostics.md +859 -0
- package/bin/skills/statsmodels/references/time_series.md +716 -0
- package/bin/skills/string-database/SKILL.md +534 -0
- package/bin/skills/string-database/references/string_reference.md +455 -0
- package/bin/skills/string-database/scripts/string_api.py +369 -0
- package/bin/skills/sympy/SKILL.md +500 -0
- package/bin/skills/sympy/references/advanced-topics.md +635 -0
- package/bin/skills/sympy/references/code-generation-printing.md +599 -0
- package/bin/skills/sympy/references/core-capabilities.md +348 -0
- package/bin/skills/sympy/references/matrices-linear-algebra.md +526 -0
- package/bin/skills/sympy/references/physics-mechanics.md +592 -0
- package/bin/skills/torch_geometric/SKILL.md +676 -0
- package/bin/skills/torch_geometric/references/datasets_reference.md +574 -0
- package/bin/skills/torch_geometric/references/layers_reference.md +485 -0
- package/bin/skills/torch_geometric/references/transforms_reference.md +679 -0
- package/bin/skills/torch_geometric/scripts/benchmark_model.py +309 -0
- package/bin/skills/torch_geometric/scripts/create_gnn_template.py +529 -0
- package/bin/skills/torch_geometric/scripts/visualize_graph.py +313 -0
- package/bin/skills/torchdrug/SKILL.md +450 -0
- package/bin/skills/torchdrug/references/core_concepts.md +565 -0
- package/bin/skills/torchdrug/references/datasets.md +380 -0
- package/bin/skills/torchdrug/references/knowledge_graphs.md +320 -0
- package/bin/skills/torchdrug/references/models_architectures.md +541 -0
- package/bin/skills/torchdrug/references/molecular_generation.md +352 -0
- package/bin/skills/torchdrug/references/molecular_property_prediction.md +169 -0
- package/bin/skills/torchdrug/references/protein_modeling.md +272 -0
- package/bin/skills/torchdrug/references/retrosynthesis.md +436 -0
- package/bin/skills/transformers/SKILL.md +164 -0
- package/bin/skills/transformers/references/generation.md +467 -0
- package/bin/skills/transformers/references/models.md +361 -0
- package/bin/skills/transformers/references/pipelines.md +335 -0
- package/bin/skills/transformers/references/tokenizers.md +447 -0
- package/bin/skills/transformers/references/training.md +500 -0
- package/bin/skills/umap-learn/SKILL.md +479 -0
- package/bin/skills/umap-learn/references/api_reference.md +532 -0
- package/bin/skills/uniprot-database/SKILL.md +195 -0
- package/bin/skills/uniprot-database/references/api_examples.md +413 -0
- package/bin/skills/uniprot-database/references/api_fields.md +275 -0
- package/bin/skills/uniprot-database/references/id_mapping_databases.md +285 -0
- package/bin/skills/uniprot-database/references/query_syntax.md +256 -0
- package/bin/skills/uniprot-database/scripts/uniprot_client.py +341 -0
- package/bin/skills/uspto-database/SKILL.md +607 -0
- package/bin/skills/uspto-database/references/additional_apis.md +394 -0
- package/bin/skills/uspto-database/references/patentsearch_api.md +266 -0
- package/bin/skills/uspto-database/references/peds_api.md +212 -0
- package/bin/skills/uspto-database/references/trademark_api.md +358 -0
- package/bin/skills/uspto-database/scripts/patent_search.py +290 -0
- package/bin/skills/uspto-database/scripts/peds_client.py +285 -0
- package/bin/skills/uspto-database/scripts/trademark_client.py +311 -0
- package/bin/skills/vaex/SKILL.md +182 -0
- package/bin/skills/vaex/references/core_dataframes.md +367 -0
- package/bin/skills/vaex/references/data_processing.md +555 -0
- package/bin/skills/vaex/references/io_operations.md +703 -0
- package/bin/skills/vaex/references/machine_learning.md +728 -0
- package/bin/skills/vaex/references/performance.md +571 -0
- package/bin/skills/vaex/references/visualization.md +613 -0
- package/bin/skills/zarr-python/SKILL.md +779 -0
- package/bin/skills/zarr-python/references/api_reference.md +515 -0
- package/bin/skills/zinc-database/SKILL.md +404 -0
- package/bin/skills/zinc-database/references/api_reference.md +692 -0
- package/bin/synsc +0 -0
- package/package.json +1 -1
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# Datamol Conformers Module Reference
|
|
2
|
+
|
|
3
|
+
The `datamol.conformers` module provides tools for generating and analyzing 3D molecular conformations.
|
|
4
|
+
|
|
5
|
+
## Conformer Generation
|
|
6
|
+
|
|
7
|
+
### `dm.conformers.generate(mol, n_confs=None, rms_cutoff=None, minimize_energy=True, method='ETKDGv3', add_hs=True, ...)`
|
|
8
|
+
Generate 3D molecular conformers.
|
|
9
|
+
- **Parameters**:
|
|
10
|
+
- `mol`: Input molecule
|
|
11
|
+
- `n_confs`: Number of conformers to generate (auto-determined based on rotatable bonds if None)
|
|
12
|
+
- `rms_cutoff`: RMS threshold in Ångströms for filtering similar conformers (removes duplicates)
|
|
13
|
+
- `minimize_energy`: Apply UFF energy minimization (default: True)
|
|
14
|
+
- `method`: Embedding method - options:
|
|
15
|
+
- `'ETDG'` - Experimental Torsion Distance Geometry
|
|
16
|
+
- `'ETKDG'` - ETDG with additional basic knowledge
|
|
17
|
+
- `'ETKDGv2'` - Enhanced version 2
|
|
18
|
+
- `'ETKDGv3'` - Enhanced version 3 (default, recommended)
|
|
19
|
+
- `add_hs`: Add hydrogens before embedding (default: True, critical for quality)
|
|
20
|
+
- `random_seed`: Set for reproducibility
|
|
21
|
+
- **Returns**: Molecule with embedded conformers
|
|
22
|
+
- **Example**:
|
|
23
|
+
```python
|
|
24
|
+
mol = dm.to_mol("CCO")
|
|
25
|
+
mol_3d = dm.conformers.generate(mol, n_confs=10, rms_cutoff=0.5)
|
|
26
|
+
conformers = mol_3d.GetConformers() # Access all conformers
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Conformer Clustering
|
|
30
|
+
|
|
31
|
+
### `dm.conformers.cluster(mol, rms_cutoff=1.0, already_aligned=False, centroids=False)`
|
|
32
|
+
Group conformers by RMS distance.
|
|
33
|
+
- **Parameters**:
|
|
34
|
+
- `rms_cutoff`: Clustering threshold in Ångströms (default: 1.0)
|
|
35
|
+
- `already_aligned`: Whether conformers are pre-aligned
|
|
36
|
+
- `centroids`: Return centroid conformers (True) or cluster groups (False)
|
|
37
|
+
- **Returns**: Cluster information or centroid conformers
|
|
38
|
+
- **Use case**: Identify distinct conformational families
|
|
39
|
+
|
|
40
|
+
### `dm.conformers.return_centroids(mol, conf_clusters, centroids=True)`
|
|
41
|
+
Extract representative conformers from clusters.
|
|
42
|
+
- **Parameters**:
|
|
43
|
+
- `conf_clusters`: Sequence of cluster indices from `cluster()`
|
|
44
|
+
- `centroids`: Return single molecule (True) or list of molecules (False)
|
|
45
|
+
- **Returns**: Centroid conformer(s)
|
|
46
|
+
|
|
47
|
+
## Conformer Analysis
|
|
48
|
+
|
|
49
|
+
### `dm.conformers.rmsd(mol)`
|
|
50
|
+
Calculate pairwise RMSD matrix across all conformers.
|
|
51
|
+
- **Requirements**: Minimum 2 conformers
|
|
52
|
+
- **Returns**: NxN matrix of RMSD values
|
|
53
|
+
- **Use case**: Quantify conformer diversity
|
|
54
|
+
|
|
55
|
+
### `dm.conformers.sasa(mol, n_jobs=1, ...)`
|
|
56
|
+
Calculate Solvent Accessible Surface Area (SASA) using FreeSASA.
|
|
57
|
+
- **Parameters**:
|
|
58
|
+
- `n_jobs`: Parallelization for multiple conformers
|
|
59
|
+
- **Returns**: Array of SASA values (one per conformer)
|
|
60
|
+
- **Storage**: Values stored in each conformer as property `'rdkit_free_sasa'`
|
|
61
|
+
- **Example**:
|
|
62
|
+
```python
|
|
63
|
+
sasa_values = dm.conformers.sasa(mol_3d)
|
|
64
|
+
# Or access from conformer properties
|
|
65
|
+
conf = mol_3d.GetConformer(0)
|
|
66
|
+
sasa = conf.GetDoubleProp('rdkit_free_sasa')
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Low-Level Conformer Manipulation
|
|
70
|
+
|
|
71
|
+
### `dm.conformers.center_of_mass(mol, conf_id=-1, use_atoms=True, round_coord=None)`
|
|
72
|
+
Calculate molecular center.
|
|
73
|
+
- **Parameters**:
|
|
74
|
+
- `conf_id`: Conformer index (-1 for first conformer)
|
|
75
|
+
- `use_atoms`: Use atomic masses (True) or geometric center (False)
|
|
76
|
+
- `round_coord`: Decimal precision for rounding
|
|
77
|
+
- **Returns**: 3D coordinates of center
|
|
78
|
+
- **Use case**: Centering molecules for visualization or alignment
|
|
79
|
+
|
|
80
|
+
### `dm.conformers.get_coords(mol, conf_id=-1)`
|
|
81
|
+
Retrieve atomic coordinates from a conformer.
|
|
82
|
+
- **Returns**: Nx3 numpy array of atomic positions
|
|
83
|
+
- **Example**:
|
|
84
|
+
```python
|
|
85
|
+
positions = dm.conformers.get_coords(mol_3d, conf_id=0)
|
|
86
|
+
# positions.shape: (num_atoms, 3)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### `dm.conformers.translate(mol, conf_id=-1, transform_matrix=None)`
|
|
90
|
+
Reposition conformer using transformation matrix.
|
|
91
|
+
- **Modification**: Operates in-place
|
|
92
|
+
- **Use case**: Aligning or repositioning molecules
|
|
93
|
+
|
|
94
|
+
## Workflow Example
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
import datamol as dm
|
|
98
|
+
|
|
99
|
+
# 1. Create molecule and generate conformers
|
|
100
|
+
mol = dm.to_mol("CC(C)CCO") # Isopentanol
|
|
101
|
+
mol_3d = dm.conformers.generate(
|
|
102
|
+
mol,
|
|
103
|
+
n_confs=50, # Generate 50 initial conformers
|
|
104
|
+
rms_cutoff=0.5, # Filter similar conformers
|
|
105
|
+
minimize_energy=True # Minimize energy
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# 2. Analyze conformers
|
|
109
|
+
n_conformers = mol_3d.GetNumConformers()
|
|
110
|
+
print(f"Generated {n_conformers} unique conformers")
|
|
111
|
+
|
|
112
|
+
# 3. Calculate SASA
|
|
113
|
+
sasa_values = dm.conformers.sasa(mol_3d)
|
|
114
|
+
|
|
115
|
+
# 4. Cluster conformers
|
|
116
|
+
clusters = dm.conformers.cluster(mol_3d, rms_cutoff=1.0, centroids=False)
|
|
117
|
+
|
|
118
|
+
# 5. Get representative conformers
|
|
119
|
+
centroids = dm.conformers.return_centroids(mol_3d, clusters)
|
|
120
|
+
|
|
121
|
+
# 6. Access 3D coordinates
|
|
122
|
+
coords = dm.conformers.get_coords(mol_3d, conf_id=0)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Key Concepts
|
|
126
|
+
|
|
127
|
+
- **Distance Geometry**: Method for generating 3D structures from connectivity information
|
|
128
|
+
- **ETKDG**: Uses experimental torsion angle preferences and additional chemical knowledge
|
|
129
|
+
- **RMS Cutoff**: Lower values = more unique conformers; higher values = fewer, more distinct conformers
|
|
130
|
+
- **Energy Minimization**: Relaxes structures to nearest local energy minimum
|
|
131
|
+
- **Hydrogens**: Critical for accurate 3D geometry - always include during embedding
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Datamol Core API Reference
|
|
2
|
+
|
|
3
|
+
This document covers the main functions available in the datamol namespace.
|
|
4
|
+
|
|
5
|
+
## Molecule Creation and Conversion
|
|
6
|
+
|
|
7
|
+
### `to_mol(mol, ...)`
|
|
8
|
+
Convert SMILES string or other molecular representations to RDKit molecule objects.
|
|
9
|
+
- **Parameters**: Accepts SMILES strings, InChI, or other molecular formats
|
|
10
|
+
- **Returns**: `rdkit.Chem.Mol` object
|
|
11
|
+
- **Common usage**: `mol = dm.to_mol("CCO")`
|
|
12
|
+
|
|
13
|
+
### `from_inchi(inchi)`
|
|
14
|
+
Convert InChI string to molecule object.
|
|
15
|
+
|
|
16
|
+
### `from_smarts(smarts)`
|
|
17
|
+
Convert SMARTS pattern to molecule object.
|
|
18
|
+
|
|
19
|
+
### `from_selfies(selfies)`
|
|
20
|
+
Convert SELFIES string to molecule object.
|
|
21
|
+
|
|
22
|
+
### `copy_mol(mol)`
|
|
23
|
+
Create a copy of a molecule object to avoid modifying the original.
|
|
24
|
+
|
|
25
|
+
## Molecule Export
|
|
26
|
+
|
|
27
|
+
### `to_smiles(mol, ...)`
|
|
28
|
+
Convert molecule object to SMILES string.
|
|
29
|
+
- **Common parameters**: `canonical=True`, `isomeric=True`
|
|
30
|
+
|
|
31
|
+
### `to_inchi(mol, ...)`
|
|
32
|
+
Convert molecule to InChI string representation.
|
|
33
|
+
|
|
34
|
+
### `to_inchikey(mol)`
|
|
35
|
+
Convert molecule to InChI key (fixed-length hash).
|
|
36
|
+
|
|
37
|
+
### `to_smarts(mol)`
|
|
38
|
+
Convert molecule to SMARTS pattern.
|
|
39
|
+
|
|
40
|
+
### `to_selfies(mol)`
|
|
41
|
+
Convert molecule to SELFIES (Self-Referencing Embedded Strings) format.
|
|
42
|
+
|
|
43
|
+
## Sanitization and Standardization
|
|
44
|
+
|
|
45
|
+
### `sanitize_mol(mol, ...)`
|
|
46
|
+
Enhanced version of RDKit's sanitize operation using mol→SMILES→mol conversion and aromatic nitrogen fixing.
|
|
47
|
+
- **Purpose**: Fix common molecular structure issues
|
|
48
|
+
- **Returns**: Sanitized molecule or None if sanitization fails
|
|
49
|
+
|
|
50
|
+
### `standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, ...)`
|
|
51
|
+
Apply comprehensive standardization procedures including:
|
|
52
|
+
- Metal disconnection
|
|
53
|
+
- Normalization (charge corrections)
|
|
54
|
+
- Reionization
|
|
55
|
+
- Fragment handling (largest fragment selection)
|
|
56
|
+
|
|
57
|
+
### `standardize_smiles(smiles, ...)`
|
|
58
|
+
Apply SMILES standardization procedures directly to a SMILES string.
|
|
59
|
+
|
|
60
|
+
### `fix_mol(mol)`
|
|
61
|
+
Attempt to fix molecular structure issues automatically.
|
|
62
|
+
|
|
63
|
+
### `fix_valence(mol)`
|
|
64
|
+
Correct valence errors in molecular structures.
|
|
65
|
+
|
|
66
|
+
## Molecular Properties
|
|
67
|
+
|
|
68
|
+
### `reorder_atoms(mol, ...)`
|
|
69
|
+
Ensure consistent atom ordering for the same molecule regardless of original SMILES representation.
|
|
70
|
+
- **Purpose**: Maintain reproducible feature generation
|
|
71
|
+
|
|
72
|
+
### `remove_hs(mol, ...)`
|
|
73
|
+
Remove hydrogen atoms from molecular structure.
|
|
74
|
+
|
|
75
|
+
### `add_hs(mol, ...)`
|
|
76
|
+
Add explicit hydrogen atoms to molecular structure.
|
|
77
|
+
|
|
78
|
+
## Fingerprints and Similarity
|
|
79
|
+
|
|
80
|
+
### `to_fp(mol, fp_type='ecfp', ...)`
|
|
81
|
+
Generate molecular fingerprints for similarity calculations.
|
|
82
|
+
- **Fingerprint types**:
|
|
83
|
+
- `'ecfp'` - Extended Connectivity Fingerprints (Morgan)
|
|
84
|
+
- `'fcfp'` - Functional Connectivity Fingerprints
|
|
85
|
+
- `'maccs'` - MACCS keys
|
|
86
|
+
- `'topological'` - Topological fingerprints
|
|
87
|
+
- `'atompair'` - Atom pair fingerprints
|
|
88
|
+
- **Common parameters**: `n_bits`, `radius`
|
|
89
|
+
- **Returns**: Numpy array or RDKit fingerprint object
|
|
90
|
+
|
|
91
|
+
### `pdist(mols, ...)`
|
|
92
|
+
Calculate pairwise Tanimoto distances between all molecules in a list.
|
|
93
|
+
- **Supports**: Parallel processing via `n_jobs` parameter
|
|
94
|
+
- **Returns**: Distance matrix
|
|
95
|
+
|
|
96
|
+
### `cdist(mols1, mols2, ...)`
|
|
97
|
+
Calculate Tanimoto distances between two sets of molecules.
|
|
98
|
+
|
|
99
|
+
## Clustering and Diversity
|
|
100
|
+
|
|
101
|
+
### `cluster_mols(mols, cutoff=0.2, feature_fn=None, n_jobs=1)`
|
|
102
|
+
Cluster molecules using Butina clustering algorithm.
|
|
103
|
+
- **Parameters**:
|
|
104
|
+
- `cutoff`: Distance threshold (default 0.2)
|
|
105
|
+
- `feature_fn`: Custom function for molecular features
|
|
106
|
+
- `n_jobs`: Parallelization (-1 for all cores)
|
|
107
|
+
- **Important**: Builds full distance matrix - suitable for ~1000 structures, not for 10,000+
|
|
108
|
+
- **Returns**: List of clusters (each cluster is a list of molecule indices)
|
|
109
|
+
|
|
110
|
+
### `pick_diverse(mols, npick, ...)`
|
|
111
|
+
Select diverse subset of molecules based on fingerprint diversity.
|
|
112
|
+
|
|
113
|
+
### `pick_centroids(mols, npick, ...)`
|
|
114
|
+
Select centroid molecules representing clusters.
|
|
115
|
+
|
|
116
|
+
## Graph Operations
|
|
117
|
+
|
|
118
|
+
### `to_graph(mol)`
|
|
119
|
+
Convert molecule to graph representation for graph-based analysis.
|
|
120
|
+
|
|
121
|
+
### `get_all_path_between(mol, start, end)`
|
|
122
|
+
Find all paths between two atoms in molecular structure.
|
|
123
|
+
|
|
124
|
+
## DataFrame Integration
|
|
125
|
+
|
|
126
|
+
### `to_df(mols, smiles_column='smiles', mol_column='mol')`
|
|
127
|
+
Convert list of molecules to pandas DataFrame.
|
|
128
|
+
|
|
129
|
+
### `from_df(df, smiles_column='smiles', mol_column='mol')`
|
|
130
|
+
Convert pandas DataFrame to list of molecules.
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# Datamol Descriptors and Visualization Reference
|
|
2
|
+
|
|
3
|
+
## Descriptors Module (`datamol.descriptors`)
|
|
4
|
+
|
|
5
|
+
The descriptors module provides tools for computing molecular properties and descriptors.
|
|
6
|
+
|
|
7
|
+
### Specialized Descriptor Functions
|
|
8
|
+
|
|
9
|
+
#### `dm.descriptors.n_aromatic_atoms(mol)`
|
|
10
|
+
Calculate the number of aromatic atoms.
|
|
11
|
+
- **Returns**: Integer count
|
|
12
|
+
- **Use case**: Aromaticity analysis
|
|
13
|
+
|
|
14
|
+
#### `dm.descriptors.n_aromatic_atoms_proportion(mol)`
|
|
15
|
+
Calculate ratio of aromatic atoms to total heavy atoms.
|
|
16
|
+
- **Returns**: Float between 0 and 1
|
|
17
|
+
- **Use case**: Quantifying aromatic character
|
|
18
|
+
|
|
19
|
+
#### `dm.descriptors.n_charged_atoms(mol)`
|
|
20
|
+
Count atoms with nonzero formal charge.
|
|
21
|
+
- **Returns**: Integer count
|
|
22
|
+
- **Use case**: Charge distribution analysis
|
|
23
|
+
|
|
24
|
+
#### `dm.descriptors.n_rigid_bonds(mol)`
|
|
25
|
+
Count non-rotatable bonds (neither single bonds nor ring bonds).
|
|
26
|
+
- **Returns**: Integer count
|
|
27
|
+
- **Use case**: Molecular flexibility assessment
|
|
28
|
+
|
|
29
|
+
#### `dm.descriptors.n_stereo_centers(mol)`
|
|
30
|
+
Count stereogenic centers (chiral centers).
|
|
31
|
+
- **Returns**: Integer count
|
|
32
|
+
- **Use case**: Stereochemistry analysis
|
|
33
|
+
|
|
34
|
+
#### `dm.descriptors.n_stereo_centers_unspecified(mol)`
|
|
35
|
+
Count stereocenters lacking stereochemical specification.
|
|
36
|
+
- **Returns**: Integer count
|
|
37
|
+
- **Use case**: Identifying incomplete stereochemistry
|
|
38
|
+
|
|
39
|
+
### Batch Descriptor Computation
|
|
40
|
+
|
|
41
|
+
#### `dm.descriptors.compute_many_descriptors(mol, properties_fn=None, add_properties=True)`
|
|
42
|
+
Compute multiple molecular properties for a single molecule.
|
|
43
|
+
- **Parameters**:
|
|
44
|
+
- `properties_fn`: Custom list of descriptor functions
|
|
45
|
+
- `add_properties`: Include additional computed properties
|
|
46
|
+
- **Returns**: Dictionary of descriptor name → value pairs
|
|
47
|
+
- **Default descriptors include**:
|
|
48
|
+
- Molecular weight, LogP, number of H-bond donors/acceptors
|
|
49
|
+
- Aromatic atoms, stereocenters, rotatable bonds
|
|
50
|
+
- TPSA (Topological Polar Surface Area)
|
|
51
|
+
- Ring count, heteroatom count
|
|
52
|
+
- **Example**:
|
|
53
|
+
```python
|
|
54
|
+
mol = dm.to_mol("CCO")
|
|
55
|
+
descriptors = dm.descriptors.compute_many_descriptors(mol)
|
|
56
|
+
# Returns: {'mw': 46.07, 'logp': -0.03, 'hbd': 1, 'hba': 1, ...}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
#### `dm.descriptors.batch_compute_many_descriptors(mols, properties_fn=None, add_properties=True, n_jobs=1, batch_size=None, progress=False)`
|
|
60
|
+
Compute descriptors for multiple molecules in parallel.
|
|
61
|
+
- **Parameters**:
|
|
62
|
+
- `mols`: List of molecules
|
|
63
|
+
- `n_jobs`: Number of parallel jobs (-1 for all cores)
|
|
64
|
+
- `batch_size`: Chunk size for parallel processing
|
|
65
|
+
- `progress`: Show progress bar
|
|
66
|
+
- **Returns**: Pandas DataFrame with one row per molecule
|
|
67
|
+
- **Example**:
|
|
68
|
+
```python
|
|
69
|
+
mols = [dm.to_mol(smi) for smi in smiles_list]
|
|
70
|
+
df = dm.descriptors.batch_compute_many_descriptors(
|
|
71
|
+
mols,
|
|
72
|
+
n_jobs=-1,
|
|
73
|
+
progress=True
|
|
74
|
+
)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### RDKit Descriptor Access
|
|
78
|
+
|
|
79
|
+
#### `dm.descriptors.any_rdkit_descriptor(name)`
|
|
80
|
+
Retrieve any descriptor function from RDKit by name.
|
|
81
|
+
- **Parameters**: `name` - Descriptor function name (e.g., 'MolWt', 'TPSA')
|
|
82
|
+
- **Returns**: RDKit descriptor function
|
|
83
|
+
- **Available descriptors**: From `rdkit.Chem.Descriptors` and `rdkit.Chem.rdMolDescriptors`
|
|
84
|
+
- **Example**:
|
|
85
|
+
```python
|
|
86
|
+
tpsa_fn = dm.descriptors.any_rdkit_descriptor('TPSA')
|
|
87
|
+
tpsa_value = tpsa_fn(mol)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Common Use Cases
|
|
91
|
+
|
|
92
|
+
**Drug-likeness Filtering (Lipinski's Rule of Five)**:
|
|
93
|
+
```python
|
|
94
|
+
descriptors = dm.descriptors.compute_many_descriptors(mol)
|
|
95
|
+
is_druglike = (
|
|
96
|
+
descriptors['mw'] <= 500 and
|
|
97
|
+
descriptors['logp'] <= 5 and
|
|
98
|
+
descriptors['hbd'] <= 5 and
|
|
99
|
+
descriptors['hba'] <= 10
|
|
100
|
+
)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
**ADME Property Analysis**:
|
|
104
|
+
```python
|
|
105
|
+
df = dm.descriptors.batch_compute_many_descriptors(compound_library)
|
|
106
|
+
# Filter by TPSA for blood-brain barrier penetration
|
|
107
|
+
bbb_candidates = df[df['tpsa'] < 90]
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Visualization Module (`datamol.viz`)
|
|
113
|
+
|
|
114
|
+
The viz module provides tools for rendering molecules and conformers as images.
|
|
115
|
+
|
|
116
|
+
### Main Visualization Function
|
|
117
|
+
|
|
118
|
+
#### `dm.viz.to_image(mols, legends=None, n_cols=4, use_svg=False, mol_size=(200, 200), highlight_atom=None, highlight_bond=None, outfile=None, max_mols=None, copy=True, indices=False, ...)`
|
|
119
|
+
Generate image grid from molecules.
|
|
120
|
+
- **Parameters**:
|
|
121
|
+
- `mols`: Single molecule or list of molecules
|
|
122
|
+
- `legends`: String or list of strings as labels (one per molecule)
|
|
123
|
+
- `n_cols`: Number of molecules per row (default: 4)
|
|
124
|
+
- `use_svg`: Output SVG format (True) or PNG (False, default)
|
|
125
|
+
- `mol_size`: Tuple (width, height) or single int for square images
|
|
126
|
+
- `highlight_atom`: Atom indices to highlight (list or dict)
|
|
127
|
+
- `highlight_bond`: Bond indices to highlight (list or dict)
|
|
128
|
+
- `outfile`: Save path (local or remote, supports fsspec)
|
|
129
|
+
- `max_mols`: Maximum number of molecules to display
|
|
130
|
+
- `indices`: Draw atom indices on structures (default: False)
|
|
131
|
+
- `align`: Align molecules using MCS (Maximum Common Substructure)
|
|
132
|
+
- **Returns**: Image object (can be displayed in Jupyter) or saves to file
|
|
133
|
+
- **Example**:
|
|
134
|
+
```python
|
|
135
|
+
# Basic grid
|
|
136
|
+
dm.viz.to_image(mols[:10], legends=[dm.to_smiles(m) for m in mols[:10]])
|
|
137
|
+
|
|
138
|
+
# Save to file
|
|
139
|
+
dm.viz.to_image(mols, outfile="molecules.png", n_cols=5)
|
|
140
|
+
|
|
141
|
+
# Highlight substructure
|
|
142
|
+
dm.viz.to_image(mol, highlight_atom=[0, 1, 2], highlight_bond=[0, 1])
|
|
143
|
+
|
|
144
|
+
# Aligned visualization
|
|
145
|
+
dm.viz.to_image(mols, align=True, legends=activity_labels)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Conformer Visualization
|
|
149
|
+
|
|
150
|
+
#### `dm.viz.conformers(mol, n_confs=None, align_conf=True, n_cols=3, sync_views=True, remove_hs=True, ...)`
|
|
151
|
+
Display multiple conformers in grid layout.
|
|
152
|
+
- **Parameters**:
|
|
153
|
+
- `mol`: Molecule with embedded conformers
|
|
154
|
+
- `n_confs`: Number or list of conformer indices to display (None = all)
|
|
155
|
+
- `align_conf`: Align conformers for comparison (default: True)
|
|
156
|
+
- `n_cols`: Grid columns (default: 3)
|
|
157
|
+
- `sync_views`: Synchronize 3D views when interactive (default: True)
|
|
158
|
+
- `remove_hs`: Remove hydrogens for clarity (default: True)
|
|
159
|
+
- **Returns**: Grid of conformer visualizations
|
|
160
|
+
- **Use case**: Comparing conformational diversity
|
|
161
|
+
- **Example**:
|
|
162
|
+
```python
|
|
163
|
+
mol_3d = dm.conformers.generate(mol, n_confs=20)
|
|
164
|
+
dm.viz.conformers(mol_3d, n_confs=10, align_conf=True)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Circle Grid Visualization
|
|
168
|
+
|
|
169
|
+
#### `dm.viz.circle_grid(center_mol, circle_mols, mol_size=200, circle_margin=50, act_mapper=None, ...)`
|
|
170
|
+
Create concentric ring visualization with central molecule.
|
|
171
|
+
- **Parameters**:
|
|
172
|
+
- `center_mol`: Molecule at center
|
|
173
|
+
- `circle_mols`: List of molecule lists (one list per ring)
|
|
174
|
+
- `mol_size`: Image size per molecule
|
|
175
|
+
- `circle_margin`: Spacing between rings (default: 50)
|
|
176
|
+
- `act_mapper`: Activity mapping dictionary for color-coding
|
|
177
|
+
- **Returns**: Circular grid image
|
|
178
|
+
- **Use case**: Visualizing molecular neighborhoods, SAR analysis, similarity networks
|
|
179
|
+
- **Example**:
|
|
180
|
+
```python
|
|
181
|
+
# Show a reference molecule surrounded by similar compounds
|
|
182
|
+
dm.viz.circle_grid(
|
|
183
|
+
center_mol=reference,
|
|
184
|
+
circle_mols=[nearest_neighbors, second_tier]
|
|
185
|
+
)
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Visualization Best Practices
|
|
189
|
+
|
|
190
|
+
1. **Use legends for clarity**: Always label molecules with SMILES, IDs, or activity values
|
|
191
|
+
2. **Align related molecules**: Use `align=True` in `to_image()` for SAR analysis
|
|
192
|
+
3. **Adjust grid size**: Set `n_cols` based on molecule count and display width
|
|
193
|
+
4. **Use SVG for publications**: Set `use_svg=True` for scalable vector graphics
|
|
194
|
+
5. **Highlight substructures**: Use `highlight_atom` and `highlight_bond` to emphasize features
|
|
195
|
+
6. **Save large grids**: Use `outfile` parameter to save rather than display in memory
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# Datamol Fragments and Scaffolds Reference
|
|
2
|
+
|
|
3
|
+
## Scaffolds Module (`datamol.scaffold`)
|
|
4
|
+
|
|
5
|
+
Scaffolds represent the core structure of molecules, useful for identifying structural families and analyzing structure-activity relationships (SAR).
|
|
6
|
+
|
|
7
|
+
### Murcko Scaffolds
|
|
8
|
+
|
|
9
|
+
#### `dm.to_scaffold_murcko(mol)`
|
|
10
|
+
Extract Bemis-Murcko scaffold (molecular framework).
|
|
11
|
+
- **Method**: Removes side chains, retaining ring systems and linkers
|
|
12
|
+
- **Returns**: Molecule object representing the scaffold
|
|
13
|
+
- **Use case**: Identify core structures across compound series
|
|
14
|
+
- **Example**:
|
|
15
|
+
```python
|
|
16
|
+
mol = dm.to_mol("c1ccc(cc1)CCN") # Phenethylamine
|
|
17
|
+
scaffold = dm.to_scaffold_murcko(mol)
|
|
18
|
+
scaffold_smiles = dm.to_smiles(scaffold)
|
|
19
|
+
# Returns: 'c1ccccc1CC' (benzene ring + ethyl linker)
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**Workflow for scaffold analysis**:
|
|
23
|
+
```python
|
|
24
|
+
# Extract scaffolds from compound library
|
|
25
|
+
scaffolds = [dm.to_scaffold_murcko(mol) for mol in mols]
|
|
26
|
+
scaffold_smiles = [dm.to_smiles(s) for s in scaffolds]
|
|
27
|
+
|
|
28
|
+
# Count scaffold frequency
|
|
29
|
+
from collections import Counter
|
|
30
|
+
scaffold_counts = Counter(scaffold_smiles)
|
|
31
|
+
most_common = scaffold_counts.most_common(10)
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Fuzzy Scaffolds
|
|
35
|
+
|
|
36
|
+
#### `dm.scaffold.fuzzy_scaffolding(mol, ...)`
|
|
37
|
+
Generate fuzzy scaffolds with enforceable groups that must appear in the core.
|
|
38
|
+
- **Purpose**: More flexible scaffold definition allowing specified functional groups
|
|
39
|
+
- **Use case**: Custom scaffold definitions beyond Murcko rules
|
|
40
|
+
|
|
41
|
+
### Applications
|
|
42
|
+
|
|
43
|
+
**Scaffold-based splitting** (for ML model validation):
|
|
44
|
+
```python
|
|
45
|
+
# Group compounds by scaffold
|
|
46
|
+
scaffold_to_mols = {}
|
|
47
|
+
for mol, scaffold in zip(mols, scaffolds):
|
|
48
|
+
smi = dm.to_smiles(scaffold)
|
|
49
|
+
if smi not in scaffold_to_mols:
|
|
50
|
+
scaffold_to_mols[smi] = []
|
|
51
|
+
scaffold_to_mols[smi].append(mol)
|
|
52
|
+
|
|
53
|
+
# Ensure train/test sets have different scaffolds
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**SAR analysis**:
|
|
57
|
+
```python
|
|
58
|
+
# Group by scaffold and analyze activity
|
|
59
|
+
for scaffold_smi, molecules in scaffold_to_mols.items():
|
|
60
|
+
activities = [get_activity(mol) for mol in molecules]
|
|
61
|
+
print(f"Scaffold: {scaffold_smi}, Mean activity: {np.mean(activities)}")
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Fragments Module (`datamol.fragment`)
|
|
67
|
+
|
|
68
|
+
Molecular fragmentation breaks molecules into smaller pieces based on chemical rules, useful for fragment-based drug design and substructure analysis.
|
|
69
|
+
|
|
70
|
+
### BRICS Fragmentation
|
|
71
|
+
|
|
72
|
+
#### `dm.fragment.brics(mol, ...)`
|
|
73
|
+
Fragment molecule using BRICS (Breaking Retrosynthetically Interesting Chemical Substructures).
|
|
74
|
+
- **Method**: Dissects based on 16 chemically meaningful bond types
|
|
75
|
+
- **Consideration**: Considers chemical environment and surrounding substructures
|
|
76
|
+
- **Returns**: Set of fragment SMILES strings
|
|
77
|
+
- **Use case**: Retrosynthetic analysis, fragment-based design
|
|
78
|
+
- **Example**:
|
|
79
|
+
```python
|
|
80
|
+
mol = dm.to_mol("c1ccccc1CCN")
|
|
81
|
+
fragments = dm.fragment.brics(mol)
|
|
82
|
+
# Returns fragments like: '[1*]CCN', '[1*]c1ccccc1', etc.
|
|
83
|
+
# [1*] represents attachment points
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### RECAP Fragmentation
|
|
87
|
+
|
|
88
|
+
#### `dm.fragment.recap(mol, ...)`
|
|
89
|
+
Fragment molecule using RECAP (Retrosynthetic Combinatorial Analysis Procedure).
|
|
90
|
+
- **Method**: Dissects based on 11 predefined bond types
|
|
91
|
+
- **Rules**:
|
|
92
|
+
- Leaves alkyl groups smaller than 5 carbons intact
|
|
93
|
+
- Preserves cyclic bonds
|
|
94
|
+
- **Returns**: Set of fragment SMILES strings
|
|
95
|
+
- **Use case**: Combinatorial library design
|
|
96
|
+
- **Example**:
|
|
97
|
+
```python
|
|
98
|
+
mol = dm.to_mol("CCCCCc1ccccc1")
|
|
99
|
+
fragments = dm.fragment.recap(mol)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### MMPA Fragmentation
|
|
103
|
+
|
|
104
|
+
#### `dm.fragment.mmpa_frag(mol, ...)`
|
|
105
|
+
Fragment for Matched Molecular Pair Analysis.
|
|
106
|
+
- **Purpose**: Generate fragments suitable for identifying molecular pairs
|
|
107
|
+
- **Use case**: Analyzing how small structural changes affect properties
|
|
108
|
+
- **Example**:
|
|
109
|
+
```python
|
|
110
|
+
fragments = dm.fragment.mmpa_frag(mol)
|
|
111
|
+
# Used to find pairs of molecules differing by single transformation
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Comparison of Methods
|
|
115
|
+
|
|
116
|
+
| Method | Bond Types | Preserves Cycles | Best For |
|
|
117
|
+
|--------|-----------|------------------|----------|
|
|
118
|
+
| BRICS | 16 | Yes | Retrosynthetic analysis, fragment recombination |
|
|
119
|
+
| RECAP | 11 | Yes | Combinatorial library design |
|
|
120
|
+
| MMPA | Variable | Depends | Structure-activity relationship analysis |
|
|
121
|
+
|
|
122
|
+
### Fragmentation Workflow
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
import datamol as dm
|
|
126
|
+
|
|
127
|
+
# 1. Fragment a molecule
|
|
128
|
+
mol = dm.to_mol("CC(=O)Oc1ccccc1C(=O)O") # Aspirin
|
|
129
|
+
brics_frags = dm.fragment.brics(mol)
|
|
130
|
+
recap_frags = dm.fragment.recap(mol)
|
|
131
|
+
|
|
132
|
+
# 2. Analyze fragment frequency across library
|
|
133
|
+
all_fragments = []
|
|
134
|
+
for mol in molecule_library:
|
|
135
|
+
frags = dm.fragment.brics(mol)
|
|
136
|
+
all_fragments.extend(frags)
|
|
137
|
+
|
|
138
|
+
# 3. Identify common fragments
|
|
139
|
+
from collections import Counter
|
|
140
|
+
fragment_counts = Counter(all_fragments)
|
|
141
|
+
common_fragments = fragment_counts.most_common(20)
|
|
142
|
+
|
|
143
|
+
# 4. Convert fragments back to molecules (remove attachment points)
|
|
144
|
+
def clean_fragment(frag_smiles):
|
|
145
|
+
# Remove [1*], [2*], etc. attachment point markers
|
|
146
|
+
clean = frag_smiles.replace('[1*]', '[H]')
|
|
147
|
+
return dm.to_mol(clean)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Advanced: Fragment-Based Virtual Screening
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
# Build fragment library from known actives
|
|
154
|
+
active_fragments = set()
|
|
155
|
+
for active_mol in active_compounds:
|
|
156
|
+
frags = dm.fragment.brics(active_mol)
|
|
157
|
+
active_fragments.update(frags)
|
|
158
|
+
|
|
159
|
+
# Screen compounds for presence of active fragments
|
|
160
|
+
def score_by_fragments(mol, fragment_set):
|
|
161
|
+
mol_frags = dm.fragment.brics(mol)
|
|
162
|
+
overlap = mol_frags.intersection(fragment_set)
|
|
163
|
+
return len(overlap) / len(mol_frags)
|
|
164
|
+
|
|
165
|
+
# Score screening library
|
|
166
|
+
scores = [score_by_fragments(mol, active_fragments) for mol in screening_lib]
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Key Concepts
|
|
170
|
+
|
|
171
|
+
- **Attachment Points**: Marked with [1*], [2*], etc. in fragment SMILES
|
|
172
|
+
- **Retrosynthetic**: Fragmentation mimics synthetic disconnections
|
|
173
|
+
- **Chemically Meaningful**: Breaks occur at typical synthetic bonds
|
|
174
|
+
- **Recombination**: Fragments can theoretically be recombined into valid molecules
|