@synsci/cli-darwin-x64-baseline 1.1.77 → 1.1.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/adaptyv/SKILL.md +114 -0
- package/bin/skills/adaptyv/reference/api_reference.md +308 -0
- package/bin/skills/adaptyv/reference/examples.md +913 -0
- package/bin/skills/adaptyv/reference/experiments.md +360 -0
- package/bin/skills/adaptyv/reference/protein_optimization.md +637 -0
- package/bin/skills/aeon/SKILL.md +374 -0
- package/bin/skills/aeon/references/anomaly_detection.md +154 -0
- package/bin/skills/aeon/references/classification.md +144 -0
- package/bin/skills/aeon/references/clustering.md +123 -0
- package/bin/skills/aeon/references/datasets_benchmarking.md +387 -0
- package/bin/skills/aeon/references/distances.md +256 -0
- package/bin/skills/aeon/references/forecasting.md +140 -0
- package/bin/skills/aeon/references/networks.md +289 -0
- package/bin/skills/aeon/references/regression.md +118 -0
- package/bin/skills/aeon/references/segmentation.md +163 -0
- package/bin/skills/aeon/references/similarity_search.md +187 -0
- package/bin/skills/aeon/references/transformations.md +246 -0
- package/bin/skills/alphafold-database/SKILL.md +513 -0
- package/bin/skills/alphafold-database/references/api_reference.md +423 -0
- package/bin/skills/anndata/SKILL.md +400 -0
- package/bin/skills/anndata/references/best_practices.md +525 -0
- package/bin/skills/anndata/references/concatenation.md +396 -0
- package/bin/skills/anndata/references/data_structure.md +314 -0
- package/bin/skills/anndata/references/io_operations.md +404 -0
- package/bin/skills/anndata/references/manipulation.md +516 -0
- package/bin/skills/arboreto/SKILL.md +243 -0
- package/bin/skills/arboreto/references/algorithms.md +138 -0
- package/bin/skills/arboreto/references/basic_inference.md +151 -0
- package/bin/skills/arboreto/references/distributed_computing.md +242 -0
- package/bin/skills/arboreto/scripts/basic_grn_inference.py +97 -0
- package/bin/skills/astropy/SKILL.md +331 -0
- package/bin/skills/astropy/references/coordinates.md +273 -0
- package/bin/skills/astropy/references/cosmology.md +307 -0
- package/bin/skills/astropy/references/fits.md +396 -0
- package/bin/skills/astropy/references/tables.md +489 -0
- package/bin/skills/astropy/references/time.md +404 -0
- package/bin/skills/astropy/references/units.md +178 -0
- package/bin/skills/astropy/references/wcs_and_other_modules.md +373 -0
- package/bin/skills/benchling-integration/SKILL.md +480 -0
- package/bin/skills/benchling-integration/references/api_endpoints.md +883 -0
- package/bin/skills/benchling-integration/references/authentication.md +379 -0
- package/bin/skills/benchling-integration/references/sdk_reference.md +774 -0
- package/bin/skills/biopython/SKILL.md +443 -0
- package/bin/skills/biopython/references/advanced.md +577 -0
- package/bin/skills/biopython/references/alignment.md +362 -0
- package/bin/skills/biopython/references/blast.md +455 -0
- package/bin/skills/biopython/references/databases.md +484 -0
- package/bin/skills/biopython/references/phylogenetics.md +566 -0
- package/bin/skills/biopython/references/sequence_io.md +285 -0
- package/bin/skills/biopython/references/structure.md +564 -0
- package/bin/skills/biorxiv-database/SKILL.md +483 -0
- package/bin/skills/biorxiv-database/references/api_reference.md +280 -0
- package/bin/skills/biorxiv-database/scripts/biorxiv_search.py +445 -0
- package/bin/skills/bioservices/SKILL.md +361 -0
- package/bin/skills/bioservices/references/identifier_mapping.md +685 -0
- package/bin/skills/bioservices/references/services_reference.md +636 -0
- package/bin/skills/bioservices/references/workflow_patterns.md +811 -0
- package/bin/skills/bioservices/scripts/batch_id_converter.py +347 -0
- package/bin/skills/bioservices/scripts/compound_cross_reference.py +378 -0
- package/bin/skills/bioservices/scripts/pathway_analysis.py +309 -0
- package/bin/skills/bioservices/scripts/protein_analysis_workflow.py +408 -0
- package/bin/skills/brenda-database/SKILL.md +719 -0
- package/bin/skills/brenda-database/references/api_reference.md +537 -0
- package/bin/skills/brenda-database/scripts/brenda_queries.py +844 -0
- package/bin/skills/brenda-database/scripts/brenda_visualization.py +772 -0
- package/bin/skills/brenda-database/scripts/enzyme_pathway_builder.py +1053 -0
- package/bin/skills/cellxgene-census/SKILL.md +511 -0
- package/bin/skills/cellxgene-census/references/census_schema.md +182 -0
- package/bin/skills/cellxgene-census/references/common_patterns.md +351 -0
- package/bin/skills/chembl-database/SKILL.md +389 -0
- package/bin/skills/chembl-database/references/api_reference.md +272 -0
- package/bin/skills/chembl-database/scripts/example_queries.py +278 -0
- package/bin/skills/cirq/SKILL.md +346 -0
- package/bin/skills/cirq/references/building.md +307 -0
- package/bin/skills/cirq/references/experiments.md +572 -0
- package/bin/skills/cirq/references/hardware.md +515 -0
- package/bin/skills/cirq/references/noise.md +515 -0
- package/bin/skills/cirq/references/simulation.md +350 -0
- package/bin/skills/cirq/references/transformation.md +416 -0
- package/bin/skills/clinicaltrials-database/SKILL.md +507 -0
- package/bin/skills/clinicaltrials-database/references/api_reference.md +358 -0
- package/bin/skills/clinicaltrials-database/scripts/query_clinicaltrials.py +215 -0
- package/bin/skills/clinpgx-database/SKILL.md +638 -0
- package/bin/skills/clinpgx-database/references/api_reference.md +757 -0
- package/bin/skills/clinpgx-database/scripts/query_clinpgx.py +518 -0
- package/bin/skills/clinvar-database/SKILL.md +362 -0
- package/bin/skills/clinvar-database/references/api_reference.md +227 -0
- package/bin/skills/clinvar-database/references/clinical_significance.md +218 -0
- package/bin/skills/clinvar-database/references/data_formats.md +358 -0
- package/bin/skills/cobrapy/SKILL.md +463 -0
- package/bin/skills/cobrapy/references/api_quick_reference.md +655 -0
- package/bin/skills/cobrapy/references/workflows.md +593 -0
- package/bin/skills/cosmic-database/SKILL.md +336 -0
- package/bin/skills/cosmic-database/references/cosmic_data_reference.md +220 -0
- package/bin/skills/cosmic-database/scripts/download_cosmic.py +231 -0
- package/bin/skills/dask/SKILL.md +456 -0
- package/bin/skills/dask/references/arrays.md +497 -0
- package/bin/skills/dask/references/bags.md +468 -0
- package/bin/skills/dask/references/best-practices.md +277 -0
- package/bin/skills/dask/references/dataframes.md +368 -0
- package/bin/skills/dask/references/futures.md +541 -0
- package/bin/skills/dask/references/schedulers.md +504 -0
- package/bin/skills/datacommons-client/SKILL.md +255 -0
- package/bin/skills/datacommons-client/references/getting_started.md +417 -0
- package/bin/skills/datacommons-client/references/node.md +250 -0
- package/bin/skills/datacommons-client/references/observation.md +185 -0
- package/bin/skills/datacommons-client/references/resolve.md +246 -0
- package/bin/skills/datamol/SKILL.md +706 -0
- package/bin/skills/datamol/references/conformers_module.md +131 -0
- package/bin/skills/datamol/references/core_api.md +130 -0
- package/bin/skills/datamol/references/descriptors_viz.md +195 -0
- package/bin/skills/datamol/references/fragments_scaffolds.md +174 -0
- package/bin/skills/datamol/references/io_module.md +109 -0
- package/bin/skills/datamol/references/reactions_data.md +218 -0
- package/bin/skills/deepchem/SKILL.md +597 -0
- package/bin/skills/deepchem/references/api_reference.md +303 -0
- package/bin/skills/deepchem/references/workflows.md +491 -0
- package/bin/skills/deepchem/scripts/graph_neural_network.py +338 -0
- package/bin/skills/deepchem/scripts/predict_solubility.py +224 -0
- package/bin/skills/deepchem/scripts/transfer_learning.py +375 -0
- package/bin/skills/deeptools/SKILL.md +531 -0
- package/bin/skills/deeptools/assets/quick_reference.md +58 -0
- package/bin/skills/deeptools/references/effective_genome_sizes.md +116 -0
- package/bin/skills/deeptools/references/normalization_methods.md +410 -0
- package/bin/skills/deeptools/references/tools_reference.md +533 -0
- package/bin/skills/deeptools/references/workflows.md +474 -0
- package/bin/skills/deeptools/scripts/validate_files.py +195 -0
- package/bin/skills/deeptools/scripts/workflow_generator.py +454 -0
- package/bin/skills/denario/SKILL.md +215 -0
- package/bin/skills/denario/references/examples.md +494 -0
- package/bin/skills/denario/references/installation.md +213 -0
- package/bin/skills/denario/references/llm_configuration.md +265 -0
- package/bin/skills/denario/references/research_pipeline.md +471 -0
- package/bin/skills/diffdock/SKILL.md +483 -0
- package/bin/skills/diffdock/assets/batch_template.csv +4 -0
- package/bin/skills/diffdock/assets/custom_inference_config.yaml +90 -0
- package/bin/skills/diffdock/references/confidence_and_limitations.md +182 -0
- package/bin/skills/diffdock/references/parameters_reference.md +163 -0
- package/bin/skills/diffdock/references/workflows_examples.md +392 -0
- package/bin/skills/diffdock/scripts/analyze_results.py +334 -0
- package/bin/skills/diffdock/scripts/prepare_batch_csv.py +254 -0
- package/bin/skills/diffdock/scripts/setup_check.py +278 -0
- package/bin/skills/dnanexus-integration/SKILL.md +383 -0
- package/bin/skills/dnanexus-integration/references/app-development.md +247 -0
- package/bin/skills/dnanexus-integration/references/configuration.md +646 -0
- package/bin/skills/dnanexus-integration/references/data-operations.md +400 -0
- package/bin/skills/dnanexus-integration/references/job-execution.md +412 -0
- package/bin/skills/dnanexus-integration/references/python-sdk.md +523 -0
- package/bin/skills/document-skills/docx/LICENSE.txt +30 -0
- package/bin/skills/document-skills/docx/SKILL.md +233 -0
- package/bin/skills/document-skills/docx/docx-js.md +350 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/mce/mc.xsd +75 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/pack.py +159 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/unpack.py +29 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validate.py +69 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/__init__.py +15 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/base.py +951 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/docx.py +274 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/pptx.py +315 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/redlining.py +279 -0
- package/bin/skills/document-skills/docx/ooxml.md +610 -0
- package/bin/skills/document-skills/docx/scripts/__init__.py +1 -0
- package/bin/skills/document-skills/docx/scripts/document.py +1276 -0
- package/bin/skills/document-skills/docx/scripts/templates/comments.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/commentsExtended.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/commentsIds.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/people.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/utilities.py +374 -0
- package/bin/skills/document-skills/pdf/LICENSE.txt +30 -0
- package/bin/skills/document-skills/pdf/SKILL.md +330 -0
- package/bin/skills/document-skills/pdf/forms.md +205 -0
- package/bin/skills/document-skills/pdf/reference.md +612 -0
- package/bin/skills/document-skills/pdf/scripts/check_bounding_boxes.py +70 -0
- package/bin/skills/document-skills/pdf/scripts/check_bounding_boxes_test.py +226 -0
- package/bin/skills/document-skills/pdf/scripts/check_fillable_fields.py +12 -0
- package/bin/skills/document-skills/pdf/scripts/convert_pdf_to_images.py +35 -0
- package/bin/skills/document-skills/pdf/scripts/create_validation_image.py +41 -0
- package/bin/skills/document-skills/pdf/scripts/extract_form_field_info.py +152 -0
- package/bin/skills/document-skills/pdf/scripts/fill_fillable_fields.py +114 -0
- package/bin/skills/document-skills/pdf/scripts/fill_pdf_form_with_annotations.py +108 -0
- package/bin/skills/document-skills/pptx/LICENSE.txt +30 -0
- package/bin/skills/document-skills/pptx/SKILL.md +520 -0
- package/bin/skills/document-skills/pptx/html2pptx.md +625 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/mce/mc.xsd +75 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/pack.py +159 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/unpack.py +29 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validate.py +69 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/__init__.py +15 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/base.py +951 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/docx.py +274 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/pptx.py +315 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/redlining.py +279 -0
- package/bin/skills/document-skills/pptx/ooxml.md +427 -0
- package/bin/skills/document-skills/pptx/scripts/html2pptx.js +979 -0
- package/bin/skills/document-skills/pptx/scripts/inventory.py +1020 -0
- package/bin/skills/document-skills/pptx/scripts/rearrange.py +231 -0
- package/bin/skills/document-skills/pptx/scripts/replace.py +385 -0
- package/bin/skills/document-skills/pptx/scripts/thumbnail.py +450 -0
- package/bin/skills/document-skills/xlsx/LICENSE.txt +30 -0
- package/bin/skills/document-skills/xlsx/SKILL.md +325 -0
- package/bin/skills/document-skills/xlsx/recalc.py +178 -0
- package/bin/skills/drugbank-database/SKILL.md +190 -0
- package/bin/skills/drugbank-database/references/chemical-analysis.md +590 -0
- package/bin/skills/drugbank-database/references/data-access.md +242 -0
- package/bin/skills/drugbank-database/references/drug-queries.md +386 -0
- package/bin/skills/drugbank-database/references/interactions.md +425 -0
- package/bin/skills/drugbank-database/references/targets-pathways.md +518 -0
- package/bin/skills/drugbank-database/scripts/drugbank_helper.py +350 -0
- package/bin/skills/ena-database/SKILL.md +204 -0
- package/bin/skills/ena-database/references/api_reference.md +490 -0
- package/bin/skills/ensembl-database/SKILL.md +311 -0
- package/bin/skills/ensembl-database/references/api_endpoints.md +346 -0
- package/bin/skills/ensembl-database/scripts/ensembl_query.py +427 -0
- package/bin/skills/esm/SKILL.md +306 -0
- package/bin/skills/esm/references/esm-c-api.md +583 -0
- package/bin/skills/esm/references/esm3-api.md +452 -0
- package/bin/skills/esm/references/forge-api.md +657 -0
- package/bin/skills/esm/references/workflows.md +685 -0
- package/bin/skills/etetoolkit/SKILL.md +623 -0
- package/bin/skills/etetoolkit/references/api_reference.md +583 -0
- package/bin/skills/etetoolkit/references/visualization.md +783 -0
- package/bin/skills/etetoolkit/references/workflows.md +774 -0
- package/bin/skills/etetoolkit/scripts/quick_visualize.py +214 -0
- package/bin/skills/etetoolkit/scripts/tree_operations.py +229 -0
- package/bin/skills/exploratory-data-analysis/SKILL.md +446 -0
- package/bin/skills/exploratory-data-analysis/assets/report_template.md +196 -0
- package/bin/skills/exploratory-data-analysis/references/bioinformatics_genomics_formats.md +664 -0
- package/bin/skills/exploratory-data-analysis/references/chemistry_molecular_formats.md +664 -0
- package/bin/skills/exploratory-data-analysis/references/general_scientific_formats.md +518 -0
- package/bin/skills/exploratory-data-analysis/references/microscopy_imaging_formats.md +620 -0
- package/bin/skills/exploratory-data-analysis/references/proteomics_metabolomics_formats.md +517 -0
- package/bin/skills/exploratory-data-analysis/references/spectroscopy_analytical_formats.md +633 -0
- package/bin/skills/exploratory-data-analysis/scripts/eda_analyzer.py +547 -0
- package/bin/skills/fda-database/SKILL.md +518 -0
- package/bin/skills/fda-database/references/animal_veterinary.md +377 -0
- package/bin/skills/fda-database/references/api_basics.md +687 -0
- package/bin/skills/fda-database/references/devices.md +632 -0
- package/bin/skills/fda-database/references/drugs.md +468 -0
- package/bin/skills/fda-database/references/foods.md +374 -0
- package/bin/skills/fda-database/references/other.md +472 -0
- package/bin/skills/fda-database/scripts/fda_examples.py +335 -0
- package/bin/skills/fda-database/scripts/fda_query.py +440 -0
- package/bin/skills/flowio/SKILL.md +608 -0
- package/bin/skills/flowio/references/api_reference.md +372 -0
- package/bin/skills/fluidsim/SKILL.md +349 -0
- package/bin/skills/fluidsim/references/advanced_features.md +398 -0
- package/bin/skills/fluidsim/references/installation.md +68 -0
- package/bin/skills/fluidsim/references/output_analysis.md +283 -0
- package/bin/skills/fluidsim/references/parameters.md +198 -0
- package/bin/skills/fluidsim/references/simulation_workflow.md +172 -0
- package/bin/skills/fluidsim/references/solvers.md +94 -0
- package/bin/skills/fred-economic-data/SKILL.md +433 -0
- package/bin/skills/fred-economic-data/references/api_basics.md +212 -0
- package/bin/skills/fred-economic-data/references/categories.md +442 -0
- package/bin/skills/fred-economic-data/references/geofred.md +588 -0
- package/bin/skills/fred-economic-data/references/releases.md +642 -0
- package/bin/skills/fred-economic-data/references/series.md +584 -0
- package/bin/skills/fred-economic-data/references/sources.md +423 -0
- package/bin/skills/fred-economic-data/references/tags.md +485 -0
- package/bin/skills/fred-economic-data/scripts/fred_examples.py +354 -0
- package/bin/skills/fred-economic-data/scripts/fred_query.py +590 -0
- package/bin/skills/gene-database/SKILL.md +179 -0
- package/bin/skills/gene-database/references/api_reference.md +404 -0
- package/bin/skills/gene-database/references/common_workflows.md +428 -0
- package/bin/skills/gene-database/scripts/batch_gene_lookup.py +298 -0
- package/bin/skills/gene-database/scripts/fetch_gene_data.py +277 -0
- package/bin/skills/gene-database/scripts/query_gene.py +251 -0
- package/bin/skills/geniml/SKILL.md +318 -0
- package/bin/skills/geniml/references/bedspace.md +127 -0
- package/bin/skills/geniml/references/consensus_peaks.md +238 -0
- package/bin/skills/geniml/references/region2vec.md +90 -0
- package/bin/skills/geniml/references/scembed.md +197 -0
- package/bin/skills/geniml/references/utilities.md +385 -0
- package/bin/skills/geo-database/SKILL.md +815 -0
- package/bin/skills/geo-database/references/geo_reference.md +829 -0
- package/bin/skills/geopandas/SKILL.md +251 -0
- package/bin/skills/geopandas/references/crs-management.md +243 -0
- package/bin/skills/geopandas/references/data-io.md +165 -0
- package/bin/skills/geopandas/references/data-structures.md +70 -0
- package/bin/skills/geopandas/references/geometric-operations.md +221 -0
- package/bin/skills/geopandas/references/spatial-analysis.md +184 -0
- package/bin/skills/geopandas/references/visualization.md +243 -0
- package/bin/skills/get-available-resources/SKILL.md +277 -0
- package/bin/skills/get-available-resources/scripts/detect_resources.py +401 -0
- package/bin/skills/gget/SKILL.md +871 -0
- package/bin/skills/gget/references/database_info.md +300 -0
- package/bin/skills/gget/references/module_reference.md +467 -0
- package/bin/skills/gget/references/workflows.md +814 -0
- package/bin/skills/gget/scripts/batch_sequence_analysis.py +191 -0
- package/bin/skills/gget/scripts/enrichment_pipeline.py +235 -0
- package/bin/skills/gget/scripts/gene_analysis.py +161 -0
- package/bin/skills/gtars/SKILL.md +285 -0
- package/bin/skills/gtars/references/cli.md +222 -0
- package/bin/skills/gtars/references/coverage.md +172 -0
- package/bin/skills/gtars/references/overlap.md +156 -0
- package/bin/skills/gtars/references/python-api.md +211 -0
- package/bin/skills/gtars/references/refget.md +147 -0
- package/bin/skills/gtars/references/tokenizers.md +103 -0
- package/bin/skills/gwas-database/SKILL.md +608 -0
- package/bin/skills/gwas-database/references/api_reference.md +793 -0
- package/bin/skills/histolab/SKILL.md +678 -0
- package/bin/skills/histolab/references/filters_preprocessing.md +514 -0
- package/bin/skills/histolab/references/slide_management.md +172 -0
- package/bin/skills/histolab/references/tile_extraction.md +421 -0
- package/bin/skills/histolab/references/tissue_masks.md +251 -0
- package/bin/skills/histolab/references/visualization.md +547 -0
- package/bin/skills/hmdb-database/SKILL.md +196 -0
- package/bin/skills/hmdb-database/references/hmdb_data_fields.md +267 -0
- package/bin/skills/hypogenic/SKILL.md +655 -0
- package/bin/skills/hypogenic/references/config_template.yaml +150 -0
- package/bin/skills/imaging-data-commons/SKILL.md +1182 -0
- package/bin/skills/imaging-data-commons/references/bigquery_guide.md +556 -0
- package/bin/skills/imaging-data-commons/references/cli_guide.md +272 -0
- package/bin/skills/imaging-data-commons/references/cloud_storage_guide.md +333 -0
- package/bin/skills/imaging-data-commons/references/dicomweb_guide.md +399 -0
- package/bin/skills/infographics/SKILL.md +563 -0
- package/bin/skills/infographics/references/color_palettes.md +496 -0
- package/bin/skills/infographics/references/design_principles.md +636 -0
- package/bin/skills/infographics/references/infographic_types.md +907 -0
- package/bin/skills/infographics/scripts/generate_infographic.py +234 -0
- package/bin/skills/infographics/scripts/generate_infographic_ai.py +1290 -0
- package/bin/skills/iso-13485-certification/SKILL.md +680 -0
- package/bin/skills/iso-13485-certification/assets/templates/procedures/CAPA-procedure-template.md +453 -0
- package/bin/skills/iso-13485-certification/assets/templates/procedures/document-control-procedure-template.md +567 -0
- package/bin/skills/iso-13485-certification/assets/templates/quality-manual-template.md +521 -0
- package/bin/skills/iso-13485-certification/references/gap-analysis-checklist.md +568 -0
- package/bin/skills/iso-13485-certification/references/iso-13485-requirements.md +610 -0
- package/bin/skills/iso-13485-certification/references/mandatory-documents.md +606 -0
- package/bin/skills/iso-13485-certification/references/quality-manual-guide.md +688 -0
- package/bin/skills/iso-13485-certification/scripts/gap_analyzer.py +440 -0
- package/bin/skills/kegg-database/SKILL.md +377 -0
- package/bin/skills/kegg-database/references/kegg_reference.md +326 -0
- package/bin/skills/kegg-database/scripts/kegg_api.py +251 -0
- package/bin/skills/labarchive-integration/SKILL.md +268 -0
- package/bin/skills/labarchive-integration/references/api_reference.md +342 -0
- package/bin/skills/labarchive-integration/references/authentication_guide.md +357 -0
- package/bin/skills/labarchive-integration/references/integrations.md +425 -0
- package/bin/skills/labarchive-integration/scripts/entry_operations.py +334 -0
- package/bin/skills/labarchive-integration/scripts/notebook_operations.py +269 -0
- package/bin/skills/labarchive-integration/scripts/setup_config.py +205 -0
- package/bin/skills/lamindb/SKILL.md +390 -0
- package/bin/skills/lamindb/references/annotation-validation.md +513 -0
- package/bin/skills/lamindb/references/core-concepts.md +380 -0
- package/bin/skills/lamindb/references/data-management.md +433 -0
- package/bin/skills/lamindb/references/integrations.md +642 -0
- package/bin/skills/lamindb/references/ontologies.md +497 -0
- package/bin/skills/lamindb/references/setup-deployment.md +733 -0
- package/bin/skills/latchbio-integration/SKILL.md +353 -0
- package/bin/skills/latchbio-integration/references/data-management.md +427 -0
- package/bin/skills/latchbio-integration/references/resource-configuration.md +429 -0
- package/bin/skills/latchbio-integration/references/verified-workflows.md +487 -0
- package/bin/skills/latchbio-integration/references/workflow-creation.md +254 -0
- package/bin/skills/matchms/SKILL.md +203 -0
- package/bin/skills/matchms/references/filtering.md +288 -0
- package/bin/skills/matchms/references/importing_exporting.md +416 -0
- package/bin/skills/matchms/references/similarity.md +380 -0
- package/bin/skills/matchms/references/workflows.md +647 -0
- package/bin/skills/matlab/SKILL.md +376 -0
- package/bin/skills/matlab/references/data-import-export.md +479 -0
- package/bin/skills/matlab/references/executing-scripts.md +444 -0
- package/bin/skills/matlab/references/graphics-visualization.md +579 -0
- package/bin/skills/matlab/references/mathematics.md +553 -0
- package/bin/skills/matlab/references/matrices-arrays.md +349 -0
- package/bin/skills/matlab/references/octave-compatibility.md +544 -0
- package/bin/skills/matlab/references/programming.md +672 -0
- package/bin/skills/matlab/references/python-integration.md +433 -0
- package/bin/skills/matplotlib/SKILL.md +361 -0
- package/bin/skills/matplotlib/references/api_reference.md +412 -0
- package/bin/skills/matplotlib/references/common_issues.md +563 -0
- package/bin/skills/matplotlib/references/plot_types.md +476 -0
- package/bin/skills/matplotlib/references/styling_guide.md +589 -0
- package/bin/skills/matplotlib/scripts/plot_template.py +401 -0
- package/bin/skills/matplotlib/scripts/style_configurator.py +409 -0
- package/bin/skills/medchem/SKILL.md +406 -0
- package/bin/skills/medchem/references/api_guide.md +600 -0
- package/bin/skills/medchem/references/rules_catalog.md +604 -0
- package/bin/skills/medchem/scripts/filter_molecules.py +418 -0
- package/bin/skills/metabolomics-workbench-database/SKILL.md +259 -0
- package/bin/skills/metabolomics-workbench-database/references/api_reference.md +494 -0
- package/bin/skills/modal-research-gpu/SKILL.md +238 -0
- package/bin/skills/molfeat/SKILL.md +511 -0
- package/bin/skills/molfeat/references/api_reference.md +428 -0
- package/bin/skills/molfeat/references/available_featurizers.md +333 -0
- package/bin/skills/molfeat/references/examples.md +723 -0
- package/bin/skills/networkx/SKILL.md +437 -0
- package/bin/skills/networkx/references/algorithms.md +383 -0
- package/bin/skills/networkx/references/generators.md +378 -0
- package/bin/skills/networkx/references/graph-basics.md +283 -0
- package/bin/skills/networkx/references/io.md +441 -0
- package/bin/skills/networkx/references/visualization.md +529 -0
- package/bin/skills/neurokit2/SKILL.md +356 -0
- package/bin/skills/neurokit2/references/bio_module.md +417 -0
- package/bin/skills/neurokit2/references/complexity.md +715 -0
- package/bin/skills/neurokit2/references/ecg_cardiac.md +355 -0
- package/bin/skills/neurokit2/references/eda.md +497 -0
- package/bin/skills/neurokit2/references/eeg.md +506 -0
- package/bin/skills/neurokit2/references/emg.md +408 -0
- package/bin/skills/neurokit2/references/eog.md +407 -0
- package/bin/skills/neurokit2/references/epochs_events.md +471 -0
- package/bin/skills/neurokit2/references/hrv.md +480 -0
- package/bin/skills/neurokit2/references/ppg.md +413 -0
- package/bin/skills/neurokit2/references/rsp.md +510 -0
- package/bin/skills/neurokit2/references/signal_processing.md +648 -0
- package/bin/skills/neuropixels-analysis/SKILL.md +350 -0
- package/bin/skills/neuropixels-analysis/assets/analysis_template.py +271 -0
- package/bin/skills/neuropixels-analysis/references/AI_CURATION.md +345 -0
- package/bin/skills/neuropixels-analysis/references/ANALYSIS.md +392 -0
- package/bin/skills/neuropixels-analysis/references/AUTOMATED_CURATION.md +358 -0
- package/bin/skills/neuropixels-analysis/references/MOTION_CORRECTION.md +323 -0
- package/bin/skills/neuropixels-analysis/references/PREPROCESSING.md +273 -0
- package/bin/skills/neuropixels-analysis/references/QUALITY_METRICS.md +359 -0
- package/bin/skills/neuropixels-analysis/references/SPIKE_SORTING.md +339 -0
- package/bin/skills/neuropixels-analysis/references/api_reference.md +415 -0
- package/bin/skills/neuropixels-analysis/references/plotting_guide.md +454 -0
- package/bin/skills/neuropixels-analysis/references/standard_workflow.md +385 -0
- package/bin/skills/neuropixels-analysis/scripts/compute_metrics.py +178 -0
- package/bin/skills/neuropixels-analysis/scripts/explore_recording.py +168 -0
- package/bin/skills/neuropixels-analysis/scripts/export_to_phy.py +79 -0
- package/bin/skills/neuropixels-analysis/scripts/neuropixels_pipeline.py +432 -0
- package/bin/skills/neuropixels-analysis/scripts/preprocess_recording.py +122 -0
- package/bin/skills/neuropixels-analysis/scripts/run_sorting.py +98 -0
- package/bin/skills/offer-k-dense-web/SKILL.md +21 -0
- package/bin/skills/omero-integration/SKILL.md +251 -0
- package/bin/skills/omero-integration/references/advanced.md +631 -0
- package/bin/skills/omero-integration/references/connection.md +369 -0
- package/bin/skills/omero-integration/references/data_access.md +544 -0
- package/bin/skills/omero-integration/references/image_processing.md +665 -0
- package/bin/skills/omero-integration/references/metadata.md +688 -0
- package/bin/skills/omero-integration/references/rois.md +648 -0
- package/bin/skills/omero-integration/references/scripts.md +637 -0
- package/bin/skills/omero-integration/references/tables.md +532 -0
- package/bin/skills/openalex-database/SKILL.md +494 -0
- package/bin/skills/openalex-database/references/api_guide.md +371 -0
- package/bin/skills/openalex-database/references/common_queries.md +381 -0
- package/bin/skills/openalex-database/scripts/openalex_client.py +337 -0
- package/bin/skills/openalex-database/scripts/query_helpers.py +306 -0
- package/bin/skills/opentargets-database/SKILL.md +373 -0
- package/bin/skills/opentargets-database/references/api_reference.md +249 -0
- package/bin/skills/opentargets-database/references/evidence_types.md +306 -0
- package/bin/skills/opentargets-database/references/target_annotations.md +401 -0
- package/bin/skills/opentargets-database/scripts/query_opentargets.py +403 -0
- package/bin/skills/opentrons-integration/SKILL.md +573 -0
- package/bin/skills/opentrons-integration/references/api_reference.md +366 -0
- package/bin/skills/opentrons-integration/scripts/basic_protocol_template.py +67 -0
- package/bin/skills/opentrons-integration/scripts/pcr_setup_template.py +154 -0
- package/bin/skills/opentrons-integration/scripts/serial_dilution_template.py +96 -0
- package/bin/skills/pathml/SKILL.md +166 -0
- package/bin/skills/pathml/references/data_management.md +742 -0
- package/bin/skills/pathml/references/graphs.md +653 -0
- package/bin/skills/pathml/references/image_loading.md +448 -0
- package/bin/skills/pathml/references/machine_learning.md +725 -0
- package/bin/skills/pathml/references/multiparametric.md +686 -0
- package/bin/skills/pathml/references/preprocessing.md +722 -0
- package/bin/skills/pdb-database/SKILL.md +309 -0
- package/bin/skills/pdb-database/references/api_reference.md +617 -0
- package/bin/skills/pennylane/SKILL.md +226 -0
- package/bin/skills/pennylane/references/advanced_features.md +667 -0
- package/bin/skills/pennylane/references/devices_backends.md +596 -0
- package/bin/skills/pennylane/references/getting_started.md +227 -0
- package/bin/skills/pennylane/references/optimization.md +671 -0
- package/bin/skills/pennylane/references/quantum_chemistry.md +567 -0
- package/bin/skills/pennylane/references/quantum_circuits.md +437 -0
- package/bin/skills/pennylane/references/quantum_ml.md +571 -0
- package/bin/skills/perplexity-search/SKILL.md +448 -0
- package/bin/skills/perplexity-search/assets/.env.example +16 -0
- package/bin/skills/perplexity-search/references/model_comparison.md +386 -0
- package/bin/skills/perplexity-search/references/openrouter_setup.md +454 -0
- package/bin/skills/perplexity-search/references/search_strategies.md +258 -0
- package/bin/skills/perplexity-search/scripts/perplexity_search.py +277 -0
- package/bin/skills/perplexity-search/scripts/setup_env.py +171 -0
- package/bin/skills/plotly/SKILL.md +267 -0
- package/bin/skills/plotly/references/chart-types.md +488 -0
- package/bin/skills/plotly/references/export-interactivity.md +453 -0
- package/bin/skills/plotly/references/graph-objects.md +302 -0
- package/bin/skills/plotly/references/layouts-styling.md +457 -0
- package/bin/skills/plotly/references/plotly-express.md +213 -0
- package/bin/skills/polars/SKILL.md +387 -0
- package/bin/skills/polars/references/best_practices.md +649 -0
- package/bin/skills/polars/references/core_concepts.md +378 -0
- package/bin/skills/polars/references/io_guide.md +557 -0
- package/bin/skills/polars/references/operations.md +602 -0
- package/bin/skills/polars/references/pandas_migration.md +417 -0
- package/bin/skills/polars/references/transformations.md +549 -0
- package/bin/skills/protocolsio-integration/SKILL.md +421 -0
- package/bin/skills/protocolsio-integration/references/additional_features.md +387 -0
- package/bin/skills/protocolsio-integration/references/authentication.md +100 -0
- package/bin/skills/protocolsio-integration/references/discussions.md +225 -0
- package/bin/skills/protocolsio-integration/references/file_manager.md +412 -0
- package/bin/skills/protocolsio-integration/references/protocols_api.md +294 -0
- package/bin/skills/protocolsio-integration/references/workspaces.md +293 -0
- package/bin/skills/pubchem-database/SKILL.md +574 -0
- package/bin/skills/pubchem-database/references/api_reference.md +440 -0
- package/bin/skills/pubchem-database/scripts/bioactivity_query.py +367 -0
- package/bin/skills/pubchem-database/scripts/compound_search.py +297 -0
- package/bin/skills/pubmed-database/SKILL.md +460 -0
- package/bin/skills/pubmed-database/references/api_reference.md +298 -0
- package/bin/skills/pubmed-database/references/common_queries.md +453 -0
- package/bin/skills/pubmed-database/references/search_syntax.md +436 -0
- package/bin/skills/pufferlib/SKILL.md +436 -0
- package/bin/skills/pufferlib/references/environments.md +508 -0
- package/bin/skills/pufferlib/references/integration.md +621 -0
- package/bin/skills/pufferlib/references/policies.md +653 -0
- package/bin/skills/pufferlib/references/training.md +360 -0
- package/bin/skills/pufferlib/references/vectorization.md +557 -0
- package/bin/skills/pufferlib/scripts/env_template.py +340 -0
- package/bin/skills/pufferlib/scripts/train_template.py +239 -0
- package/bin/skills/pydeseq2/SKILL.md +559 -0
- package/bin/skills/pydeseq2/references/api_reference.md +228 -0
- package/bin/skills/pydeseq2/references/workflow_guide.md +582 -0
- package/bin/skills/pydeseq2/scripts/run_deseq2_analysis.py +353 -0
- package/bin/skills/pydicom/SKILL.md +434 -0
- package/bin/skills/pydicom/references/common_tags.md +228 -0
- package/bin/skills/pydicom/references/transfer_syntaxes.md +352 -0
- package/bin/skills/pydicom/scripts/anonymize_dicom.py +137 -0
- package/bin/skills/pydicom/scripts/dicom_to_image.py +172 -0
- package/bin/skills/pydicom/scripts/extract_metadata.py +173 -0
- package/bin/skills/pyhealth/SKILL.md +491 -0
- package/bin/skills/pyhealth/references/datasets.md +178 -0
- package/bin/skills/pyhealth/references/medical_coding.md +284 -0
- package/bin/skills/pyhealth/references/models.md +594 -0
- package/bin/skills/pyhealth/references/preprocessing.md +638 -0
- package/bin/skills/pyhealth/references/tasks.md +379 -0
- package/bin/skills/pyhealth/references/training_evaluation.md +648 -0
- package/bin/skills/pylabrobot/SKILL.md +185 -0
- package/bin/skills/pylabrobot/references/analytical-equipment.md +464 -0
- package/bin/skills/pylabrobot/references/hardware-backends.md +480 -0
- package/bin/skills/pylabrobot/references/liquid-handling.md +403 -0
- package/bin/skills/pylabrobot/references/material-handling.md +620 -0
- package/bin/skills/pylabrobot/references/resources.md +489 -0
- package/bin/skills/pylabrobot/references/visualization.md +532 -0
- package/bin/skills/pymatgen/SKILL.md +691 -0
- package/bin/skills/pymatgen/references/analysis_modules.md +530 -0
- package/bin/skills/pymatgen/references/core_classes.md +318 -0
- package/bin/skills/pymatgen/references/io_formats.md +469 -0
- package/bin/skills/pymatgen/references/materials_project_api.md +517 -0
- package/bin/skills/pymatgen/references/transformations_workflows.md +591 -0
- package/bin/skills/pymatgen/scripts/phase_diagram_generator.py +233 -0
- package/bin/skills/pymatgen/scripts/structure_analyzer.py +266 -0
- package/bin/skills/pymatgen/scripts/structure_converter.py +169 -0
- package/bin/skills/pymc/SKILL.md +572 -0
- package/bin/skills/pymc/assets/hierarchical_model_template.py +333 -0
- package/bin/skills/pymc/assets/linear_regression_template.py +241 -0
- package/bin/skills/pymc/references/distributions.md +320 -0
- package/bin/skills/pymc/references/sampling_inference.md +424 -0
- package/bin/skills/pymc/references/workflows.md +526 -0
- package/bin/skills/pymc/scripts/model_comparison.py +387 -0
- package/bin/skills/pymc/scripts/model_diagnostics.py +350 -0
- package/bin/skills/pymoo/SKILL.md +571 -0
- package/bin/skills/pymoo/references/algorithms.md +180 -0
- package/bin/skills/pymoo/references/constraints_mcdm.md +417 -0
- package/bin/skills/pymoo/references/operators.md +345 -0
- package/bin/skills/pymoo/references/problems.md +265 -0
- package/bin/skills/pymoo/references/visualization.md +353 -0
- package/bin/skills/pymoo/scripts/custom_problem_example.py +181 -0
- package/bin/skills/pymoo/scripts/decision_making_example.py +161 -0
- package/bin/skills/pymoo/scripts/many_objective_example.py +72 -0
- package/bin/skills/pymoo/scripts/multi_objective_example.py +63 -0
- package/bin/skills/pymoo/scripts/single_objective_example.py +59 -0
- package/bin/skills/pyopenms/SKILL.md +217 -0
- package/bin/skills/pyopenms/references/data_structures.md +497 -0
- package/bin/skills/pyopenms/references/feature_detection.md +410 -0
- package/bin/skills/pyopenms/references/file_io.md +349 -0
- package/bin/skills/pyopenms/references/identification.md +422 -0
- package/bin/skills/pyopenms/references/metabolomics.md +482 -0
- package/bin/skills/pyopenms/references/signal_processing.md +433 -0
- package/bin/skills/pysam/SKILL.md +265 -0
- package/bin/skills/pysam/references/alignment_files.md +280 -0
- package/bin/skills/pysam/references/common_workflows.md +520 -0
- package/bin/skills/pysam/references/sequence_files.md +407 -0
- package/bin/skills/pysam/references/variant_files.md +365 -0
- package/bin/skills/pytdc/SKILL.md +460 -0
- package/bin/skills/pytdc/references/datasets.md +246 -0
- package/bin/skills/pytdc/references/oracles.md +400 -0
- package/bin/skills/pytdc/references/utilities.md +684 -0
- package/bin/skills/pytdc/scripts/benchmark_evaluation.py +327 -0
- package/bin/skills/pytdc/scripts/load_and_split_data.py +214 -0
- package/bin/skills/pytdc/scripts/molecular_generation.py +404 -0
- package/bin/skills/qiskit/SKILL.md +275 -0
- package/bin/skills/qiskit/references/algorithms.md +607 -0
- package/bin/skills/qiskit/references/backends.md +433 -0
- package/bin/skills/qiskit/references/circuits.md +197 -0
- package/bin/skills/qiskit/references/patterns.md +533 -0
- package/bin/skills/qiskit/references/primitives.md +277 -0
- package/bin/skills/qiskit/references/setup.md +99 -0
- package/bin/skills/qiskit/references/transpilation.md +286 -0
- package/bin/skills/qiskit/references/visualization.md +415 -0
- package/bin/skills/qutip/SKILL.md +318 -0
- package/bin/skills/qutip/references/advanced.md +555 -0
- package/bin/skills/qutip/references/analysis.md +523 -0
- package/bin/skills/qutip/references/core_concepts.md +293 -0
- package/bin/skills/qutip/references/time_evolution.md +348 -0
- package/bin/skills/qutip/references/visualization.md +431 -0
- package/bin/skills/rdkit/SKILL.md +780 -0
- package/bin/skills/rdkit/references/api_reference.md +432 -0
- package/bin/skills/rdkit/references/descriptors_reference.md +595 -0
- package/bin/skills/rdkit/references/smarts_patterns.md +668 -0
- package/bin/skills/rdkit/scripts/molecular_properties.py +243 -0
- package/bin/skills/rdkit/scripts/similarity_search.py +297 -0
- package/bin/skills/rdkit/scripts/substructure_filter.py +386 -0
- package/bin/skills/reactome-database/SKILL.md +278 -0
- package/bin/skills/reactome-database/references/api_reference.md +465 -0
- package/bin/skills/reactome-database/scripts/reactome_query.py +286 -0
- package/bin/skills/rowan/SKILL.md +427 -0
- package/bin/skills/rowan/references/api_reference.md +413 -0
- package/bin/skills/rowan/references/molecule_handling.md +429 -0
- package/bin/skills/rowan/references/proteins_and_organization.md +499 -0
- package/bin/skills/rowan/references/rdkit_native.md +438 -0
- package/bin/skills/rowan/references/results_interpretation.md +481 -0
- package/bin/skills/rowan/references/workflow_types.md +591 -0
- package/bin/skills/scanpy/SKILL.md +386 -0
- package/bin/skills/scanpy/assets/analysis_template.py +295 -0
- package/bin/skills/scanpy/references/api_reference.md +251 -0
- package/bin/skills/scanpy/references/plotting_guide.md +352 -0
- package/bin/skills/scanpy/references/standard_workflow.md +206 -0
- package/bin/skills/scanpy/scripts/qc_analysis.py +200 -0
- package/bin/skills/scientific-brainstorming/SKILL.md +191 -0
- package/bin/skills/scientific-brainstorming/references/brainstorming_methods.md +326 -0
- package/bin/skills/scientific-visualization/SKILL.md +779 -0
- package/bin/skills/scientific-visualization/assets/color_palettes.py +197 -0
- package/bin/skills/scientific-visualization/assets/nature.mplstyle +63 -0
- package/bin/skills/scientific-visualization/assets/presentation.mplstyle +61 -0
- package/bin/skills/scientific-visualization/assets/publication.mplstyle +68 -0
- package/bin/skills/scientific-visualization/references/color_palettes.md +348 -0
- package/bin/skills/scientific-visualization/references/journal_requirements.md +320 -0
- package/bin/skills/scientific-visualization/references/matplotlib_examples.md +620 -0
- package/bin/skills/scientific-visualization/references/publication_guidelines.md +205 -0
- package/bin/skills/scientific-visualization/scripts/figure_export.py +343 -0
- package/bin/skills/scientific-visualization/scripts/style_presets.py +416 -0
- package/bin/skills/scikit-bio/SKILL.md +437 -0
- package/bin/skills/scikit-bio/references/api_reference.md +749 -0
- package/bin/skills/scikit-learn/SKILL.md +521 -0
- package/bin/skills/scikit-learn/references/model_evaluation.md +592 -0
- package/bin/skills/scikit-learn/references/pipelines_and_composition.md +612 -0
- package/bin/skills/scikit-learn/references/preprocessing.md +606 -0
- package/bin/skills/scikit-learn/references/quick_reference.md +433 -0
- package/bin/skills/scikit-learn/references/supervised_learning.md +378 -0
- package/bin/skills/scikit-learn/references/unsupervised_learning.md +505 -0
- package/bin/skills/scikit-learn/scripts/classification_pipeline.py +257 -0
- package/bin/skills/scikit-learn/scripts/clustering_analysis.py +386 -0
- package/bin/skills/scikit-survival/SKILL.md +399 -0
- package/bin/skills/scikit-survival/references/competing-risks.md +397 -0
- package/bin/skills/scikit-survival/references/cox-models.md +182 -0
- package/bin/skills/scikit-survival/references/data-handling.md +494 -0
- package/bin/skills/scikit-survival/references/ensemble-models.md +327 -0
- package/bin/skills/scikit-survival/references/evaluation-metrics.md +378 -0
- package/bin/skills/scikit-survival/references/svm-models.md +411 -0
- package/bin/skills/scvi-tools/SKILL.md +190 -0
- package/bin/skills/scvi-tools/references/differential-expression.md +581 -0
- package/bin/skills/scvi-tools/references/models-atac-seq.md +321 -0
- package/bin/skills/scvi-tools/references/models-multimodal.md +367 -0
- package/bin/skills/scvi-tools/references/models-scrna-seq.md +330 -0
- package/bin/skills/scvi-tools/references/models-spatial.md +438 -0
- package/bin/skills/scvi-tools/references/models-specialized.md +408 -0
- package/bin/skills/scvi-tools/references/theoretical-foundations.md +438 -0
- package/bin/skills/scvi-tools/references/workflows.md +546 -0
- package/bin/skills/seaborn/SKILL.md +673 -0
- package/bin/skills/seaborn/references/examples.md +822 -0
- package/bin/skills/seaborn/references/function_reference.md +770 -0
- package/bin/skills/seaborn/references/objects_interface.md +964 -0
- package/bin/skills/shap/SKILL.md +566 -0
- package/bin/skills/shap/references/explainers.md +339 -0
- package/bin/skills/shap/references/plots.md +507 -0
- package/bin/skills/shap/references/theory.md +449 -0
- package/bin/skills/shap/references/workflows.md +605 -0
- package/bin/skills/simpy/SKILL.md +429 -0
- package/bin/skills/simpy/references/events.md +374 -0
- package/bin/skills/simpy/references/monitoring.md +475 -0
- package/bin/skills/simpy/references/process-interaction.md +424 -0
- package/bin/skills/simpy/references/real-time.md +395 -0
- package/bin/skills/simpy/references/resources.md +275 -0
- package/bin/skills/simpy/scripts/basic_simulation_template.py +193 -0
- package/bin/skills/simpy/scripts/resource_monitor.py +345 -0
- package/bin/skills/stable-baselines3/SKILL.md +299 -0
- package/bin/skills/stable-baselines3/references/algorithms.md +333 -0
- package/bin/skills/stable-baselines3/references/callbacks.md +556 -0
- package/bin/skills/stable-baselines3/references/custom_environments.md +526 -0
- package/bin/skills/stable-baselines3/references/vectorized_envs.md +568 -0
- package/bin/skills/stable-baselines3/scripts/custom_env_template.py +314 -0
- package/bin/skills/stable-baselines3/scripts/evaluate_agent.py +245 -0
- package/bin/skills/stable-baselines3/scripts/train_rl_agent.py +165 -0
- package/bin/skills/statistical-analysis/SKILL.md +632 -0
- package/bin/skills/statistical-analysis/references/assumptions_and_diagnostics.md +369 -0
- package/bin/skills/statistical-analysis/references/bayesian_statistics.md +661 -0
- package/bin/skills/statistical-analysis/references/effect_sizes_and_power.md +581 -0
- package/bin/skills/statistical-analysis/references/reporting_standards.md +469 -0
- package/bin/skills/statistical-analysis/references/test_selection_guide.md +129 -0
- package/bin/skills/statistical-analysis/scripts/assumption_checks.py +539 -0
- package/bin/skills/statsmodels/SKILL.md +614 -0
- package/bin/skills/statsmodels/references/discrete_choice.md +669 -0
- package/bin/skills/statsmodels/references/glm.md +619 -0
- package/bin/skills/statsmodels/references/linear_models.md +447 -0
- package/bin/skills/statsmodels/references/stats_diagnostics.md +859 -0
- package/bin/skills/statsmodels/references/time_series.md +716 -0
- package/bin/skills/string-database/SKILL.md +534 -0
- package/bin/skills/string-database/references/string_reference.md +455 -0
- package/bin/skills/string-database/scripts/string_api.py +369 -0
- package/bin/skills/sympy/SKILL.md +500 -0
- package/bin/skills/sympy/references/advanced-topics.md +635 -0
- package/bin/skills/sympy/references/code-generation-printing.md +599 -0
- package/bin/skills/sympy/references/core-capabilities.md +348 -0
- package/bin/skills/sympy/references/matrices-linear-algebra.md +526 -0
- package/bin/skills/sympy/references/physics-mechanics.md +592 -0
- package/bin/skills/torch_geometric/SKILL.md +676 -0
- package/bin/skills/torch_geometric/references/datasets_reference.md +574 -0
- package/bin/skills/torch_geometric/references/layers_reference.md +485 -0
- package/bin/skills/torch_geometric/references/transforms_reference.md +679 -0
- package/bin/skills/torch_geometric/scripts/benchmark_model.py +309 -0
- package/bin/skills/torch_geometric/scripts/create_gnn_template.py +529 -0
- package/bin/skills/torch_geometric/scripts/visualize_graph.py +313 -0
- package/bin/skills/torchdrug/SKILL.md +450 -0
- package/bin/skills/torchdrug/references/core_concepts.md +565 -0
- package/bin/skills/torchdrug/references/datasets.md +380 -0
- package/bin/skills/torchdrug/references/knowledge_graphs.md +320 -0
- package/bin/skills/torchdrug/references/models_architectures.md +541 -0
- package/bin/skills/torchdrug/references/molecular_generation.md +352 -0
- package/bin/skills/torchdrug/references/molecular_property_prediction.md +169 -0
- package/bin/skills/torchdrug/references/protein_modeling.md +272 -0
- package/bin/skills/torchdrug/references/retrosynthesis.md +436 -0
- package/bin/skills/transformers/SKILL.md +164 -0
- package/bin/skills/transformers/references/generation.md +467 -0
- package/bin/skills/transformers/references/models.md +361 -0
- package/bin/skills/transformers/references/pipelines.md +335 -0
- package/bin/skills/transformers/references/tokenizers.md +447 -0
- package/bin/skills/transformers/references/training.md +500 -0
- package/bin/skills/umap-learn/SKILL.md +479 -0
- package/bin/skills/umap-learn/references/api_reference.md +532 -0
- package/bin/skills/uniprot-database/SKILL.md +195 -0
- package/bin/skills/uniprot-database/references/api_examples.md +413 -0
- package/bin/skills/uniprot-database/references/api_fields.md +275 -0
- package/bin/skills/uniprot-database/references/id_mapping_databases.md +285 -0
- package/bin/skills/uniprot-database/references/query_syntax.md +256 -0
- package/bin/skills/uniprot-database/scripts/uniprot_client.py +341 -0
- package/bin/skills/uspto-database/SKILL.md +607 -0
- package/bin/skills/uspto-database/references/additional_apis.md +394 -0
- package/bin/skills/uspto-database/references/patentsearch_api.md +266 -0
- package/bin/skills/uspto-database/references/peds_api.md +212 -0
- package/bin/skills/uspto-database/references/trademark_api.md +358 -0
- package/bin/skills/uspto-database/scripts/patent_search.py +290 -0
- package/bin/skills/uspto-database/scripts/peds_client.py +285 -0
- package/bin/skills/uspto-database/scripts/trademark_client.py +311 -0
- package/bin/skills/vaex/SKILL.md +182 -0
- package/bin/skills/vaex/references/core_dataframes.md +367 -0
- package/bin/skills/vaex/references/data_processing.md +555 -0
- package/bin/skills/vaex/references/io_operations.md +703 -0
- package/bin/skills/vaex/references/machine_learning.md +728 -0
- package/bin/skills/vaex/references/performance.md +571 -0
- package/bin/skills/vaex/references/visualization.md +613 -0
- package/bin/skills/zarr-python/SKILL.md +779 -0
- package/bin/skills/zarr-python/references/api_reference.md +515 -0
- package/bin/skills/zinc-database/SKILL.md +404 -0
- package/bin/skills/zinc-database/references/api_reference.md +692 -0
- package/bin/synsc +0 -0
- package/package.json +1 -1
|
@@ -0,0 +1,684 @@
|
|
|
1
|
+
# TDC Utilities and Data Functions
|
|
2
|
+
|
|
3
|
+
This document provides comprehensive documentation for TDC's data processing, evaluation, and utility functions.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
TDC provides utilities organized into four main categories:
|
|
8
|
+
1. **Dataset Splits** - Train/validation/test partitioning strategies
|
|
9
|
+
2. **Model Evaluation** - Standardized performance metrics
|
|
10
|
+
3. **Data Processing** - Molecule conversion, filtering, and transformation
|
|
11
|
+
4. **Entity Retrieval** - Database queries and conversions
|
|
12
|
+
|
|
13
|
+
## 1. Dataset Splits
|
|
14
|
+
|
|
15
|
+
Dataset splitting is crucial for evaluating model generalization. TDC provides multiple splitting strategies designed for therapeutic ML.
|
|
16
|
+
|
|
17
|
+
### Basic Split Usage
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from tdc.single_pred import ADME
|
|
21
|
+
|
|
22
|
+
data = ADME(name='Caco2_Wang')
|
|
23
|
+
|
|
24
|
+
# Get split with default parameters
|
|
25
|
+
split = data.get_split()
|
|
26
|
+
# Returns: {'train': DataFrame, 'valid': DataFrame, 'test': DataFrame}
|
|
27
|
+
|
|
28
|
+
# Customize split parameters
|
|
29
|
+
split = data.get_split(
|
|
30
|
+
method='scaffold',
|
|
31
|
+
seed=42,
|
|
32
|
+
frac=[0.7, 0.1, 0.2]
|
|
33
|
+
)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Split Methods
|
|
37
|
+
|
|
38
|
+
#### Random Split
|
|
39
|
+
Random shuffling of data - suitable for general ML tasks.
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
split = data.get_split(method='random', seed=1)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**When to use:**
|
|
46
|
+
- Baseline model evaluation
|
|
47
|
+
- When chemical/temporal structure is not important
|
|
48
|
+
- Quick prototyping
|
|
49
|
+
|
|
50
|
+
**Not recommended for:**
|
|
51
|
+
- Realistic drug discovery scenarios
|
|
52
|
+
- Evaluating generalization to new chemical matter
|
|
53
|
+
|
|
54
|
+
#### Scaffold Split
|
|
55
|
+
Splits based on molecular scaffolds (Bemis-Murcko scaffolds) - ensures test molecules are structurally distinct from training.
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
split = data.get_split(method='scaffold', seed=1)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
**When to use:**
|
|
62
|
+
- Default for most single prediction tasks
|
|
63
|
+
- Evaluating generalization to new chemical series
|
|
64
|
+
- Realistic drug discovery scenarios
|
|
65
|
+
|
|
66
|
+
**How it works:**
|
|
67
|
+
1. Extract Bemis-Murcko scaffold from each molecule
|
|
68
|
+
2. Group molecules by scaffold
|
|
69
|
+
3. Assign scaffolds to train/valid/test sets
|
|
70
|
+
4. Ensures test molecules have unseen scaffolds
|
|
71
|
+
|
|
72
|
+
#### Cold Splits (DTI/DDI Tasks)
|
|
73
|
+
For multi-instance prediction, cold splits ensure test set contains unseen drugs, targets, or both.
|
|
74
|
+
|
|
75
|
+
**Cold Drug Split:**
|
|
76
|
+
```python
|
|
77
|
+
from tdc.multi_pred import DTI
|
|
78
|
+
data = DTI(name='BindingDB_Kd')
|
|
79
|
+
split = data.get_split(method='cold_drug', seed=1)
|
|
80
|
+
```
|
|
81
|
+
- Test set contains drugs not seen during training
|
|
82
|
+
- Evaluates generalization to new compounds
|
|
83
|
+
|
|
84
|
+
**Cold Target Split:**
|
|
85
|
+
```python
|
|
86
|
+
split = data.get_split(method='cold_target', seed=1)
|
|
87
|
+
```
|
|
88
|
+
- Test set contains targets not seen during training
|
|
89
|
+
- Evaluates generalization to new proteins
|
|
90
|
+
|
|
91
|
+
**Cold Drug-Target Split:**
|
|
92
|
+
```python
|
|
93
|
+
split = data.get_split(method='cold_drug_target', seed=1)
|
|
94
|
+
```
|
|
95
|
+
- Test set contains novel drug-target pairs
|
|
96
|
+
- Most challenging evaluation scenario
|
|
97
|
+
|
|
98
|
+
#### Temporal Split
|
|
99
|
+
For datasets with temporal information - ensures test data is from later time points.
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
split = data.get_split(method='temporal', seed=1)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
**When to use:**
|
|
106
|
+
- Datasets with time stamps
|
|
107
|
+
- Simulating prospective prediction
|
|
108
|
+
- Clinical trial outcome prediction
|
|
109
|
+
|
|
110
|
+
### Custom Split Fractions
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
# 80% train, 10% valid, 10% test
|
|
114
|
+
split = data.get_split(method='scaffold', frac=[0.8, 0.1, 0.1])
|
|
115
|
+
|
|
116
|
+
# 70% train, 15% valid, 15% test
|
|
117
|
+
split = data.get_split(method='scaffold', frac=[0.7, 0.15, 0.15])
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Stratified Splits
|
|
121
|
+
|
|
122
|
+
For classification tasks with imbalanced labels:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
split = data.get_split(method='scaffold', stratified=True)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Maintains label distribution across train/valid/test sets.
|
|
129
|
+
|
|
130
|
+
## 2. Model Evaluation
|
|
131
|
+
|
|
132
|
+
TDC provides standardized evaluation metrics for different task types.
|
|
133
|
+
|
|
134
|
+
### Basic Evaluator Usage
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from tdc import Evaluator
|
|
138
|
+
|
|
139
|
+
# Initialize evaluator
|
|
140
|
+
evaluator = Evaluator(name='ROC-AUC')
|
|
141
|
+
|
|
142
|
+
# Evaluate predictions
|
|
143
|
+
score = evaluator(y_true, y_pred)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Classification Metrics
|
|
147
|
+
|
|
148
|
+
#### ROC-AUC
|
|
149
|
+
Receiver Operating Characteristic - Area Under Curve
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
evaluator = Evaluator(name='ROC-AUC')
|
|
153
|
+
score = evaluator(y_true, y_pred_proba)
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
**Best for:**
|
|
157
|
+
- Binary classification
|
|
158
|
+
- Imbalanced datasets
|
|
159
|
+
- Overall discriminative ability
|
|
160
|
+
|
|
161
|
+
**Range:** 0-1 (higher is better, 0.5 is random)
|
|
162
|
+
|
|
163
|
+
#### PR-AUC
|
|
164
|
+
Precision-Recall Area Under Curve
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
evaluator = Evaluator(name='PR-AUC')
|
|
168
|
+
score = evaluator(y_true, y_pred_proba)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
**Best for:**
|
|
172
|
+
- Highly imbalanced datasets
|
|
173
|
+
- When positive class is rare
|
|
174
|
+
- Complements ROC-AUC
|
|
175
|
+
|
|
176
|
+
**Range:** 0-1 (higher is better)
|
|
177
|
+
|
|
178
|
+
#### F1 Score
|
|
179
|
+
Harmonic mean of precision and recall
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
evaluator = Evaluator(name='F1')
|
|
183
|
+
score = evaluator(y_true, y_pred_binary)
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
**Best for:**
|
|
187
|
+
- Balance between precision and recall
|
|
188
|
+
- Multi-class classification
|
|
189
|
+
|
|
190
|
+
**Range:** 0-1 (higher is better)
|
|
191
|
+
|
|
192
|
+
#### Accuracy
|
|
193
|
+
Fraction of correct predictions
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
evaluator = Evaluator(name='Accuracy')
|
|
197
|
+
score = evaluator(y_true, y_pred_binary)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
**Best for:**
|
|
201
|
+
- Balanced datasets
|
|
202
|
+
- Simple baseline metric
|
|
203
|
+
|
|
204
|
+
**Not recommended for:** Imbalanced datasets
|
|
205
|
+
|
|
206
|
+
#### Cohen's Kappa
|
|
207
|
+
Agreement between predictions and ground truth, accounting for chance
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
evaluator = Evaluator(name='Kappa')
|
|
211
|
+
score = evaluator(y_true, y_pred_binary)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
**Range:** -1 to 1 (higher is better, 0 is random)
|
|
215
|
+
|
|
216
|
+
### Regression Metrics
|
|
217
|
+
|
|
218
|
+
#### RMSE - Root Mean Squared Error
|
|
219
|
+
```python
|
|
220
|
+
evaluator = Evaluator(name='RMSE')
|
|
221
|
+
score = evaluator(y_true, y_pred)
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
**Best for:**
|
|
225
|
+
- Continuous predictions
|
|
226
|
+
- Penalizes large errors heavily
|
|
227
|
+
|
|
228
|
+
**Range:** 0-∞ (lower is better)
|
|
229
|
+
|
|
230
|
+
#### MAE - Mean Absolute Error
|
|
231
|
+
```python
|
|
232
|
+
evaluator = Evaluator(name='MAE')
|
|
233
|
+
score = evaluator(y_true, y_pred)
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
**Best for:**
|
|
237
|
+
- Continuous predictions
|
|
238
|
+
- More robust to outliers than RMSE
|
|
239
|
+
|
|
240
|
+
**Range:** 0-∞ (lower is better)
|
|
241
|
+
|
|
242
|
+
#### R² - Coefficient of Determination
|
|
243
|
+
```python
|
|
244
|
+
evaluator = Evaluator(name='R2')
|
|
245
|
+
score = evaluator(y_true, y_pred)
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
**Best for:**
|
|
249
|
+
- Variance explained by model
|
|
250
|
+
- Comparing different models
|
|
251
|
+
|
|
252
|
+
**Range:** -∞ to 1 (higher is better, 1 is perfect)
|
|
253
|
+
|
|
254
|
+
#### MSE - Mean Squared Error
|
|
255
|
+
```python
|
|
256
|
+
evaluator = Evaluator(name='MSE')
|
|
257
|
+
score = evaluator(y_true, y_pred)
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
**Range:** 0-∞ (lower is better)
|
|
261
|
+
|
|
262
|
+
### Ranking Metrics
|
|
263
|
+
|
|
264
|
+
#### Spearman Correlation
|
|
265
|
+
Rank correlation coefficient
|
|
266
|
+
|
|
267
|
+
```python
|
|
268
|
+
evaluator = Evaluator(name='Spearman')
|
|
269
|
+
score = evaluator(y_true, y_pred)
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
**Best for:**
|
|
273
|
+
- Ranking tasks
|
|
274
|
+
- Non-linear relationships
|
|
275
|
+
- Ordinal data
|
|
276
|
+
|
|
277
|
+
**Range:** -1 to 1 (higher is better)
|
|
278
|
+
|
|
279
|
+
#### Pearson Correlation
|
|
280
|
+
Linear correlation coefficient
|
|
281
|
+
|
|
282
|
+
```python
|
|
283
|
+
evaluator = Evaluator(name='Pearson')
|
|
284
|
+
score = evaluator(y_true, y_pred)
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
**Best for:**
|
|
288
|
+
- Linear relationships
|
|
289
|
+
- Continuous data
|
|
290
|
+
|
|
291
|
+
**Range:** -1 to 1 (higher is better)
|
|
292
|
+
|
|
293
|
+
### Multi-Label Classification
|
|
294
|
+
|
|
295
|
+
```python
|
|
296
|
+
evaluator = Evaluator(name='Micro-F1')
|
|
297
|
+
score = evaluator(y_true_multilabel, y_pred_multilabel)
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
Available: `Micro-F1`, `Macro-F1`, `Micro-AUPR`, `Macro-AUPR`
|
|
301
|
+
|
|
302
|
+
### Benchmark Group Evaluation
|
|
303
|
+
|
|
304
|
+
For benchmark groups, evaluation requires multiple seeds:
|
|
305
|
+
|
|
306
|
+
```python
|
|
307
|
+
from tdc.benchmark_group import admet_group
|
|
308
|
+
|
|
309
|
+
group = admet_group(path='data/')
|
|
310
|
+
benchmark = group.get('Caco2_Wang')
|
|
311
|
+
|
|
312
|
+
# Predictions must be dict with seeds as keys
|
|
313
|
+
predictions = {}
|
|
314
|
+
for seed in [1, 2, 3, 4, 5]:
|
|
315
|
+
# Train model and predict
|
|
316
|
+
predictions[seed] = model_predictions
|
|
317
|
+
|
|
318
|
+
# Evaluate with mean and std across seeds
|
|
319
|
+
results = group.evaluate(predictions)
|
|
320
|
+
print(results) # {'Caco2_Wang': [mean_score, std_score]}
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
## 3. Data Processing
|
|
324
|
+
|
|
325
|
+
TDC provides 11 comprehensive data processing utilities.
|
|
326
|
+
|
|
327
|
+
### Molecule Format Conversion
|
|
328
|
+
|
|
329
|
+
Convert between ~15 molecular representations.
|
|
330
|
+
|
|
331
|
+
```python
|
|
332
|
+
from tdc.chem_utils import MolConvert
|
|
333
|
+
|
|
334
|
+
# SMILES to PyTorch Geometric
|
|
335
|
+
converter = MolConvert(src='SMILES', dst='PyG')
|
|
336
|
+
pyg_graph = converter('CC(C)Cc1ccc(cc1)C(C)C(O)=O')
|
|
337
|
+
|
|
338
|
+
# SMILES to DGL
|
|
339
|
+
converter = MolConvert(src='SMILES', dst='DGL')
|
|
340
|
+
dgl_graph = converter('CC(C)Cc1ccc(cc1)C(C)C(O)=O')
|
|
341
|
+
|
|
342
|
+
# SMILES to Morgan Fingerprint (ECFP)
|
|
343
|
+
converter = MolConvert(src='SMILES', dst='ECFP')
|
|
344
|
+
fingerprint = converter('CC(C)Cc1ccc(cc1)C(C)C(O)=O')
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
**Available formats:**
|
|
348
|
+
- **Text**: SMILES, SELFIES, InChI
|
|
349
|
+
- **Fingerprints**: ECFP (Morgan), MACCS, RDKit, AtomPair, TopologicalTorsion
|
|
350
|
+
- **Graphs**: PyG (PyTorch Geometric), DGL (Deep Graph Library)
|
|
351
|
+
- **3D**: Graph3D, Coulomb Matrix, Distance Matrix
|
|
352
|
+
|
|
353
|
+
**Batch conversion:**
|
|
354
|
+
```python
|
|
355
|
+
converter = MolConvert(src='SMILES', dst='PyG')
|
|
356
|
+
graphs = converter(['SMILES1', 'SMILES2', 'SMILES3'])
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
### Molecule Filters
|
|
360
|
+
|
|
361
|
+
Remove non-drug-like molecules using curated chemical rules.
|
|
362
|
+
|
|
363
|
+
```python
|
|
364
|
+
from tdc.chem_utils import MolFilter
|
|
365
|
+
|
|
366
|
+
# Initialize filter with rules
|
|
367
|
+
mol_filter = MolFilter(
|
|
368
|
+
rules=['PAINS', 'BMS'], # Chemical filter rules
|
|
369
|
+
property_filters_dict={
|
|
370
|
+
'MW': (150, 500), # Molecular weight range
|
|
371
|
+
'LogP': (-0.4, 5.6), # Lipophilicity range
|
|
372
|
+
'HBD': (0, 5), # H-bond donors
|
|
373
|
+
'HBA': (0, 10) # H-bond acceptors
|
|
374
|
+
}
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
# Filter molecules
|
|
378
|
+
filtered_smiles = mol_filter(smiles_list)
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
**Available filter rules:**
|
|
382
|
+
- `PAINS` - Pan-Assay Interference Compounds
|
|
383
|
+
- `BMS` - Bristol-Myers Squibb HTS deck filters
|
|
384
|
+
- `Glaxo` - GlaxoSmithKline filters
|
|
385
|
+
- `Dundee` - University of Dundee filters
|
|
386
|
+
- `Inpharmatica` - Inpharmatica filters
|
|
387
|
+
- `LINT` - Pfizer LINT filters
|
|
388
|
+
|
|
389
|
+
### Label Distribution Visualization
|
|
390
|
+
|
|
391
|
+
```python
|
|
392
|
+
# Visualize label distribution
|
|
393
|
+
data.label_distribution()
|
|
394
|
+
|
|
395
|
+
# Print statistics
|
|
396
|
+
data.print_stats()
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
Displays histogram and computes mean, median, std for continuous labels.
|
|
400
|
+
|
|
401
|
+
### Label Binarization
|
|
402
|
+
|
|
403
|
+
Convert continuous labels to binary using threshold.
|
|
404
|
+
|
|
405
|
+
```python
|
|
406
|
+
from tdc.utils import binarize
|
|
407
|
+
|
|
408
|
+
# Binarize with threshold
|
|
409
|
+
binary_labels = binarize(y_continuous, threshold=5.0, order='ascending')
|
|
410
|
+
# order='ascending': values >= threshold become 1
|
|
411
|
+
# order='descending': values <= threshold become 1
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
### Label Units Conversion
|
|
415
|
+
|
|
416
|
+
Transform between measurement units.
|
|
417
|
+
|
|
418
|
+
```python
|
|
419
|
+
from tdc.chem_utils import label_transform
|
|
420
|
+
|
|
421
|
+
# Convert nM to pKd
|
|
422
|
+
y_pkd = label_transform(y_nM, from_unit='nM', to_unit='p')
|
|
423
|
+
|
|
424
|
+
# Convert μM to nM
|
|
425
|
+
y_nM = label_transform(y_uM, from_unit='uM', to_unit='nM')
|
|
426
|
+
```
|
|
427
|
+
|
|
428
|
+
**Available conversions:**
|
|
429
|
+
- Binding affinity: nM, μM, pKd, pKi, pIC50
|
|
430
|
+
- Log transformations
|
|
431
|
+
- Natural log conversions
|
|
432
|
+
|
|
433
|
+
### Label Meaning
|
|
434
|
+
|
|
435
|
+
Get interpretable descriptions for labels.
|
|
436
|
+
|
|
437
|
+
```python
|
|
438
|
+
# Get label mapping
|
|
439
|
+
label_map = data.get_label_map(name='DrugBank')
|
|
440
|
+
print(label_map)
|
|
441
|
+
# {0: 'No interaction', 1: 'Increased effect', 2: 'Decreased effect', ...}
|
|
442
|
+
```
|
|
443
|
+
|
|
444
|
+
### Data Balancing
|
|
445
|
+
|
|
446
|
+
Handle class imbalance via over/under-sampling.
|
|
447
|
+
|
|
448
|
+
```python
|
|
449
|
+
from tdc.utils import balance
|
|
450
|
+
|
|
451
|
+
# Oversample minority class
|
|
452
|
+
X_balanced, y_balanced = balance(X, y, method='oversample')
|
|
453
|
+
|
|
454
|
+
# Undersample majority class
|
|
455
|
+
X_balanced, y_balanced = balance(X, y, method='undersample')
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
### Graph Transformation for Pair Data
|
|
459
|
+
|
|
460
|
+
Convert paired data to graph representations.
|
|
461
|
+
|
|
462
|
+
```python
|
|
463
|
+
from tdc.utils import create_graph_from_pairs
|
|
464
|
+
|
|
465
|
+
# Create graph from drug-drug pairs
|
|
466
|
+
graph = create_graph_from_pairs(
|
|
467
|
+
pairs=ddi_pairs, # [(drug1, drug2, label), ...]
|
|
468
|
+
format='edge_list' # or 'PyG', 'DGL'
|
|
469
|
+
)
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
### Negative Sampling
|
|
473
|
+
|
|
474
|
+
Generate negative samples for binary tasks.
|
|
475
|
+
|
|
476
|
+
```python
|
|
477
|
+
from tdc.utils import negative_sample
|
|
478
|
+
|
|
479
|
+
# Generate negative samples for DTI
|
|
480
|
+
negative_pairs = negative_sample(
|
|
481
|
+
positive_pairs=known_interactions,
|
|
482
|
+
all_drugs=drug_list,
|
|
483
|
+
all_targets=target_list,
|
|
484
|
+
ratio=1.0 # Negative:positive ratio
|
|
485
|
+
)
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
**Use cases:**
|
|
489
|
+
- Drug-target interaction prediction
|
|
490
|
+
- Drug-drug interaction tasks
|
|
491
|
+
- Creating balanced datasets
|
|
492
|
+
|
|
493
|
+
### Entity Retrieval
|
|
494
|
+
|
|
495
|
+
Convert between database identifiers.
|
|
496
|
+
|
|
497
|
+
#### PubChem CID to SMILES
|
|
498
|
+
```python
|
|
499
|
+
from tdc.utils import cid2smiles
|
|
500
|
+
|
|
501
|
+
smiles = cid2smiles(2244) # Aspirin
|
|
502
|
+
# Returns: 'CC(=O)Oc1ccccc1C(=O)O'
|
|
503
|
+
```
|
|
504
|
+
|
|
505
|
+
#### UniProt ID to Amino Acid Sequence
|
|
506
|
+
```python
|
|
507
|
+
from tdc.utils import uniprot2seq
|
|
508
|
+
|
|
509
|
+
sequence = uniprot2seq('P12345')
|
|
510
|
+
# Returns: 'MVKVYAPASS...'
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
#### Batch Retrieval
|
|
514
|
+
```python
|
|
515
|
+
# Multiple CIDs
|
|
516
|
+
smiles_list = [cid2smiles(cid) for cid in [2244, 5090, 6323]]
|
|
517
|
+
|
|
518
|
+
# Multiple UniProt IDs
|
|
519
|
+
sequences = [uniprot2seq(uid) for uid in ['P12345', 'Q9Y5S9']]
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
## 4. Advanced Utilities
|
|
523
|
+
|
|
524
|
+
### Retrieve Dataset Names
|
|
525
|
+
|
|
526
|
+
```python
|
|
527
|
+
from tdc.utils import retrieve_dataset_names
|
|
528
|
+
|
|
529
|
+
# Get all datasets for a task
|
|
530
|
+
adme_datasets = retrieve_dataset_names('ADME')
|
|
531
|
+
dti_datasets = retrieve_dataset_names('DTI')
|
|
532
|
+
tox_datasets = retrieve_dataset_names('Tox')
|
|
533
|
+
|
|
534
|
+
print(f"ADME datasets: {adme_datasets}")
|
|
535
|
+
```
|
|
536
|
+
|
|
537
|
+
### Fuzzy Search
|
|
538
|
+
|
|
539
|
+
TDC supports fuzzy matching for dataset names:
|
|
540
|
+
|
|
541
|
+
```python
|
|
542
|
+
from tdc.single_pred import ADME
|
|
543
|
+
|
|
544
|
+
# These all work (typo-tolerant)
|
|
545
|
+
data = ADME(name='Caco2_Wang')
|
|
546
|
+
data = ADME(name='caco2_wang')
|
|
547
|
+
data = ADME(name='Caco2') # Partial match
|
|
548
|
+
```
|
|
549
|
+
|
|
550
|
+
### Data Format Options
|
|
551
|
+
|
|
552
|
+
```python
|
|
553
|
+
# Pandas DataFrame (default)
|
|
554
|
+
df = data.get_data(format='df')
|
|
555
|
+
|
|
556
|
+
# Dictionary
|
|
557
|
+
data_dict = data.get_data(format='dict')
|
|
558
|
+
|
|
559
|
+
# DeepPurpose format (for DeepPurpose library)
|
|
560
|
+
dp_format = data.get_data(format='DeepPurpose')
|
|
561
|
+
|
|
562
|
+
# PyG/DGL graphs (if applicable)
|
|
563
|
+
graphs = data.get_data(format='PyG')
|
|
564
|
+
```
|
|
565
|
+
|
|
566
|
+
### Data Loader Utilities
|
|
567
|
+
|
|
568
|
+
```python
|
|
569
|
+
from tdc.utils import create_fold
|
|
570
|
+
|
|
571
|
+
# Create cross-validation folds
|
|
572
|
+
folds = create_fold(data, fold=5, seed=42)
|
|
573
|
+
# Returns list of (train_idx, test_idx) tuples
|
|
574
|
+
|
|
575
|
+
# Iterate through folds
|
|
576
|
+
for i, (train_idx, test_idx) in enumerate(folds):
|
|
577
|
+
train_data = data.iloc[train_idx]
|
|
578
|
+
test_data = data.iloc[test_idx]
|
|
579
|
+
# Train and evaluate
|
|
580
|
+
```
|
|
581
|
+
|
|
582
|
+
## Common Workflows
|
|
583
|
+
|
|
584
|
+
### Workflow 1: Complete Data Pipeline
|
|
585
|
+
|
|
586
|
+
```python
|
|
587
|
+
from tdc.single_pred import ADME
|
|
588
|
+
from tdc import Evaluator
|
|
589
|
+
from tdc.chem_utils import MolConvert, MolFilter
|
|
590
|
+
|
|
591
|
+
# 1. Load data
|
|
592
|
+
data = ADME(name='Caco2_Wang')
|
|
593
|
+
|
|
594
|
+
# 2. Filter molecules
|
|
595
|
+
mol_filter = MolFilter(rules=['PAINS'])
|
|
596
|
+
filtered_data = data.get_data()
|
|
597
|
+
filtered_data = filtered_data[
|
|
598
|
+
filtered_data['Drug'].apply(lambda x: mol_filter([x]))
|
|
599
|
+
]
|
|
600
|
+
|
|
601
|
+
# 3. Split data
|
|
602
|
+
split = data.get_split(method='scaffold', seed=42)
|
|
603
|
+
train, valid, test = split['train'], split['valid'], split['test']
|
|
604
|
+
|
|
605
|
+
# 4. Convert to graph representations
|
|
606
|
+
converter = MolConvert(src='SMILES', dst='PyG')
|
|
607
|
+
train_graphs = converter(train['Drug'].tolist())
|
|
608
|
+
|
|
609
|
+
# 5. Train model (user implements)
|
|
610
|
+
# model.fit(train_graphs, train['Y'])
|
|
611
|
+
|
|
612
|
+
# 6. Evaluate
|
|
613
|
+
evaluator = Evaluator(name='MAE')
|
|
614
|
+
# score = evaluator(test['Y'], predictions)
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
### Workflow 2: Multi-Task Learning Preparation
|
|
618
|
+
|
|
619
|
+
```python
|
|
620
|
+
from tdc.benchmark_group import admet_group
|
|
621
|
+
from tdc.chem_utils import MolConvert
|
|
622
|
+
|
|
623
|
+
# Load benchmark group
|
|
624
|
+
group = admet_group(path='data/')
|
|
625
|
+
|
|
626
|
+
# Get multiple datasets
|
|
627
|
+
datasets = ['Caco2_Wang', 'HIA_Hou', 'Bioavailability_Ma']
|
|
628
|
+
all_data = {}
|
|
629
|
+
|
|
630
|
+
for dataset_name in datasets:
|
|
631
|
+
benchmark = group.get(dataset_name)
|
|
632
|
+
all_data[dataset_name] = benchmark
|
|
633
|
+
|
|
634
|
+
# Prepare for multi-task learning
|
|
635
|
+
converter = MolConvert(src='SMILES', dst='ECFP')
|
|
636
|
+
# Process each dataset...
|
|
637
|
+
```
|
|
638
|
+
|
|
639
|
+
### Workflow 3: DTI Cold Split Evaluation
|
|
640
|
+
|
|
641
|
+
```python
|
|
642
|
+
from tdc.multi_pred import DTI
|
|
643
|
+
from tdc import Evaluator
|
|
644
|
+
|
|
645
|
+
# Load DTI data
|
|
646
|
+
data = DTI(name='BindingDB_Kd')
|
|
647
|
+
|
|
648
|
+
# Cold drug split
|
|
649
|
+
split = data.get_split(method='cold_drug', seed=42)
|
|
650
|
+
train, test = split['train'], split['test']
|
|
651
|
+
|
|
652
|
+
# Verify no drug overlap
|
|
653
|
+
train_drugs = set(train['Drug_ID'])
|
|
654
|
+
test_drugs = set(test['Drug_ID'])
|
|
655
|
+
assert len(train_drugs & test_drugs) == 0, "Drug leakage detected!"
|
|
656
|
+
|
|
657
|
+
# Train and evaluate
|
|
658
|
+
# model.fit(train)
|
|
659
|
+
evaluator = Evaluator(name='RMSE')
|
|
660
|
+
# score = evaluator(test['Y'], predictions)
|
|
661
|
+
```
|
|
662
|
+
|
|
663
|
+
## Best Practices
|
|
664
|
+
|
|
665
|
+
1. **Always use meaningful splits** - Use scaffold or cold splits for realistic evaluation
|
|
666
|
+
2. **Multiple seeds** - Run experiments with multiple seeds for robust results
|
|
667
|
+
3. **Appropriate metrics** - Choose metrics that match your task and dataset characteristics
|
|
668
|
+
4. **Data filtering** - Remove PAINS and non-drug-like molecules before training
|
|
669
|
+
5. **Format conversion** - Convert molecules to appropriate format for your model
|
|
670
|
+
6. **Batch processing** - Use batch operations for efficiency with large datasets
|
|
671
|
+
|
|
672
|
+
## Performance Tips
|
|
673
|
+
|
|
674
|
+
- Convert molecules in batch mode for faster processing
|
|
675
|
+
- Cache converted representations to avoid recomputation
|
|
676
|
+
- Use appropriate data formats for your framework (PyG, DGL, etc.)
|
|
677
|
+
- Filter data early in the pipeline to reduce computation
|
|
678
|
+
|
|
679
|
+
## References
|
|
680
|
+
|
|
681
|
+
- TDC Documentation: https://tdc.readthedocs.io
|
|
682
|
+
- Data Functions: https://tdcommons.ai/fct_overview/
|
|
683
|
+
- Evaluation Metrics: https://tdcommons.ai/functions/model_eval/
|
|
684
|
+
- Data Splits: https://tdcommons.ai/functions/data_split/
|