@synsci/cli-darwin-x64-baseline 1.1.76 → 1.1.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/adaptyv/SKILL.md +114 -0
- package/bin/skills/adaptyv/reference/api_reference.md +308 -0
- package/bin/skills/adaptyv/reference/examples.md +913 -0
- package/bin/skills/adaptyv/reference/experiments.md +360 -0
- package/bin/skills/adaptyv/reference/protein_optimization.md +637 -0
- package/bin/skills/aeon/SKILL.md +374 -0
- package/bin/skills/aeon/references/anomaly_detection.md +154 -0
- package/bin/skills/aeon/references/classification.md +144 -0
- package/bin/skills/aeon/references/clustering.md +123 -0
- package/bin/skills/aeon/references/datasets_benchmarking.md +387 -0
- package/bin/skills/aeon/references/distances.md +256 -0
- package/bin/skills/aeon/references/forecasting.md +140 -0
- package/bin/skills/aeon/references/networks.md +289 -0
- package/bin/skills/aeon/references/regression.md +118 -0
- package/bin/skills/aeon/references/segmentation.md +163 -0
- package/bin/skills/aeon/references/similarity_search.md +187 -0
- package/bin/skills/aeon/references/transformations.md +246 -0
- package/bin/skills/alphafold-database/SKILL.md +513 -0
- package/bin/skills/alphafold-database/references/api_reference.md +423 -0
- package/bin/skills/anndata/SKILL.md +400 -0
- package/bin/skills/anndata/references/best_practices.md +525 -0
- package/bin/skills/anndata/references/concatenation.md +396 -0
- package/bin/skills/anndata/references/data_structure.md +314 -0
- package/bin/skills/anndata/references/io_operations.md +404 -0
- package/bin/skills/anndata/references/manipulation.md +516 -0
- package/bin/skills/arboreto/SKILL.md +243 -0
- package/bin/skills/arboreto/references/algorithms.md +138 -0
- package/bin/skills/arboreto/references/basic_inference.md +151 -0
- package/bin/skills/arboreto/references/distributed_computing.md +242 -0
- package/bin/skills/arboreto/scripts/basic_grn_inference.py +97 -0
- package/bin/skills/astropy/SKILL.md +331 -0
- package/bin/skills/astropy/references/coordinates.md +273 -0
- package/bin/skills/astropy/references/cosmology.md +307 -0
- package/bin/skills/astropy/references/fits.md +396 -0
- package/bin/skills/astropy/references/tables.md +489 -0
- package/bin/skills/astropy/references/time.md +404 -0
- package/bin/skills/astropy/references/units.md +178 -0
- package/bin/skills/astropy/references/wcs_and_other_modules.md +373 -0
- package/bin/skills/benchling-integration/SKILL.md +480 -0
- package/bin/skills/benchling-integration/references/api_endpoints.md +883 -0
- package/bin/skills/benchling-integration/references/authentication.md +379 -0
- package/bin/skills/benchling-integration/references/sdk_reference.md +774 -0
- package/bin/skills/biopython/SKILL.md +443 -0
- package/bin/skills/biopython/references/advanced.md +577 -0
- package/bin/skills/biopython/references/alignment.md +362 -0
- package/bin/skills/biopython/references/blast.md +455 -0
- package/bin/skills/biopython/references/databases.md +484 -0
- package/bin/skills/biopython/references/phylogenetics.md +566 -0
- package/bin/skills/biopython/references/sequence_io.md +285 -0
- package/bin/skills/biopython/references/structure.md +564 -0
- package/bin/skills/biorxiv-database/SKILL.md +483 -0
- package/bin/skills/biorxiv-database/references/api_reference.md +280 -0
- package/bin/skills/biorxiv-database/scripts/biorxiv_search.py +445 -0
- package/bin/skills/bioservices/SKILL.md +361 -0
- package/bin/skills/bioservices/references/identifier_mapping.md +685 -0
- package/bin/skills/bioservices/references/services_reference.md +636 -0
- package/bin/skills/bioservices/references/workflow_patterns.md +811 -0
- package/bin/skills/bioservices/scripts/batch_id_converter.py +347 -0
- package/bin/skills/bioservices/scripts/compound_cross_reference.py +378 -0
- package/bin/skills/bioservices/scripts/pathway_analysis.py +309 -0
- package/bin/skills/bioservices/scripts/protein_analysis_workflow.py +408 -0
- package/bin/skills/brenda-database/SKILL.md +719 -0
- package/bin/skills/brenda-database/references/api_reference.md +537 -0
- package/bin/skills/brenda-database/scripts/brenda_queries.py +844 -0
- package/bin/skills/brenda-database/scripts/brenda_visualization.py +772 -0
- package/bin/skills/brenda-database/scripts/enzyme_pathway_builder.py +1053 -0
- package/bin/skills/cellxgene-census/SKILL.md +511 -0
- package/bin/skills/cellxgene-census/references/census_schema.md +182 -0
- package/bin/skills/cellxgene-census/references/common_patterns.md +351 -0
- package/bin/skills/chembl-database/SKILL.md +389 -0
- package/bin/skills/chembl-database/references/api_reference.md +272 -0
- package/bin/skills/chembl-database/scripts/example_queries.py +278 -0
- package/bin/skills/cirq/SKILL.md +346 -0
- package/bin/skills/cirq/references/building.md +307 -0
- package/bin/skills/cirq/references/experiments.md +572 -0
- package/bin/skills/cirq/references/hardware.md +515 -0
- package/bin/skills/cirq/references/noise.md +515 -0
- package/bin/skills/cirq/references/simulation.md +350 -0
- package/bin/skills/cirq/references/transformation.md +416 -0
- package/bin/skills/clinicaltrials-database/SKILL.md +507 -0
- package/bin/skills/clinicaltrials-database/references/api_reference.md +358 -0
- package/bin/skills/clinicaltrials-database/scripts/query_clinicaltrials.py +215 -0
- package/bin/skills/clinpgx-database/SKILL.md +638 -0
- package/bin/skills/clinpgx-database/references/api_reference.md +757 -0
- package/bin/skills/clinpgx-database/scripts/query_clinpgx.py +518 -0
- package/bin/skills/clinvar-database/SKILL.md +362 -0
- package/bin/skills/clinvar-database/references/api_reference.md +227 -0
- package/bin/skills/clinvar-database/references/clinical_significance.md +218 -0
- package/bin/skills/clinvar-database/references/data_formats.md +358 -0
- package/bin/skills/cobrapy/SKILL.md +463 -0
- package/bin/skills/cobrapy/references/api_quick_reference.md +655 -0
- package/bin/skills/cobrapy/references/workflows.md +593 -0
- package/bin/skills/cosmic-database/SKILL.md +336 -0
- package/bin/skills/cosmic-database/references/cosmic_data_reference.md +220 -0
- package/bin/skills/cosmic-database/scripts/download_cosmic.py +231 -0
- package/bin/skills/dask/SKILL.md +456 -0
- package/bin/skills/dask/references/arrays.md +497 -0
- package/bin/skills/dask/references/bags.md +468 -0
- package/bin/skills/dask/references/best-practices.md +277 -0
- package/bin/skills/dask/references/dataframes.md +368 -0
- package/bin/skills/dask/references/futures.md +541 -0
- package/bin/skills/dask/references/schedulers.md +504 -0
- package/bin/skills/datacommons-client/SKILL.md +255 -0
- package/bin/skills/datacommons-client/references/getting_started.md +417 -0
- package/bin/skills/datacommons-client/references/node.md +250 -0
- package/bin/skills/datacommons-client/references/observation.md +185 -0
- package/bin/skills/datacommons-client/references/resolve.md +246 -0
- package/bin/skills/datamol/SKILL.md +706 -0
- package/bin/skills/datamol/references/conformers_module.md +131 -0
- package/bin/skills/datamol/references/core_api.md +130 -0
- package/bin/skills/datamol/references/descriptors_viz.md +195 -0
- package/bin/skills/datamol/references/fragments_scaffolds.md +174 -0
- package/bin/skills/datamol/references/io_module.md +109 -0
- package/bin/skills/datamol/references/reactions_data.md +218 -0
- package/bin/skills/deepchem/SKILL.md +597 -0
- package/bin/skills/deepchem/references/api_reference.md +303 -0
- package/bin/skills/deepchem/references/workflows.md +491 -0
- package/bin/skills/deepchem/scripts/graph_neural_network.py +338 -0
- package/bin/skills/deepchem/scripts/predict_solubility.py +224 -0
- package/bin/skills/deepchem/scripts/transfer_learning.py +375 -0
- package/bin/skills/deeptools/SKILL.md +531 -0
- package/bin/skills/deeptools/assets/quick_reference.md +58 -0
- package/bin/skills/deeptools/references/effective_genome_sizes.md +116 -0
- package/bin/skills/deeptools/references/normalization_methods.md +410 -0
- package/bin/skills/deeptools/references/tools_reference.md +533 -0
- package/bin/skills/deeptools/references/workflows.md +474 -0
- package/bin/skills/deeptools/scripts/validate_files.py +195 -0
- package/bin/skills/deeptools/scripts/workflow_generator.py +454 -0
- package/bin/skills/denario/SKILL.md +215 -0
- package/bin/skills/denario/references/examples.md +494 -0
- package/bin/skills/denario/references/installation.md +213 -0
- package/bin/skills/denario/references/llm_configuration.md +265 -0
- package/bin/skills/denario/references/research_pipeline.md +471 -0
- package/bin/skills/diffdock/SKILL.md +483 -0
- package/bin/skills/diffdock/assets/batch_template.csv +4 -0
- package/bin/skills/diffdock/assets/custom_inference_config.yaml +90 -0
- package/bin/skills/diffdock/references/confidence_and_limitations.md +182 -0
- package/bin/skills/diffdock/references/parameters_reference.md +163 -0
- package/bin/skills/diffdock/references/workflows_examples.md +392 -0
- package/bin/skills/diffdock/scripts/analyze_results.py +334 -0
- package/bin/skills/diffdock/scripts/prepare_batch_csv.py +254 -0
- package/bin/skills/diffdock/scripts/setup_check.py +278 -0
- package/bin/skills/dnanexus-integration/SKILL.md +383 -0
- package/bin/skills/dnanexus-integration/references/app-development.md +247 -0
- package/bin/skills/dnanexus-integration/references/configuration.md +646 -0
- package/bin/skills/dnanexus-integration/references/data-operations.md +400 -0
- package/bin/skills/dnanexus-integration/references/job-execution.md +412 -0
- package/bin/skills/dnanexus-integration/references/python-sdk.md +523 -0
- package/bin/skills/document-skills/docx/LICENSE.txt +30 -0
- package/bin/skills/document-skills/docx/SKILL.md +233 -0
- package/bin/skills/document-skills/docx/docx-js.md +350 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/mce/mc.xsd +75 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bin/skills/document-skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/pack.py +159 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/unpack.py +29 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validate.py +69 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/__init__.py +15 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/base.py +951 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/docx.py +274 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/pptx.py +315 -0
- package/bin/skills/document-skills/docx/ooxml/scripts/validation/redlining.py +279 -0
- package/bin/skills/document-skills/docx/ooxml.md +610 -0
- package/bin/skills/document-skills/docx/scripts/__init__.py +1 -0
- package/bin/skills/document-skills/docx/scripts/document.py +1276 -0
- package/bin/skills/document-skills/docx/scripts/templates/comments.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/commentsExtended.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/commentsIds.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/templates/people.xml +3 -0
- package/bin/skills/document-skills/docx/scripts/utilities.py +374 -0
- package/bin/skills/document-skills/pdf/LICENSE.txt +30 -0
- package/bin/skills/document-skills/pdf/SKILL.md +330 -0
- package/bin/skills/document-skills/pdf/forms.md +205 -0
- package/bin/skills/document-skills/pdf/reference.md +612 -0
- package/bin/skills/document-skills/pdf/scripts/check_bounding_boxes.py +70 -0
- package/bin/skills/document-skills/pdf/scripts/check_bounding_boxes_test.py +226 -0
- package/bin/skills/document-skills/pdf/scripts/check_fillable_fields.py +12 -0
- package/bin/skills/document-skills/pdf/scripts/convert_pdf_to_images.py +35 -0
- package/bin/skills/document-skills/pdf/scripts/create_validation_image.py +41 -0
- package/bin/skills/document-skills/pdf/scripts/extract_form_field_info.py +152 -0
- package/bin/skills/document-skills/pdf/scripts/fill_fillable_fields.py +114 -0
- package/bin/skills/document-skills/pdf/scripts/fill_pdf_form_with_annotations.py +108 -0
- package/bin/skills/document-skills/pptx/LICENSE.txt +30 -0
- package/bin/skills/document-skills/pptx/SKILL.md +520 -0
- package/bin/skills/document-skills/pptx/html2pptx.md +625 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/mce/mc.xsd +75 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bin/skills/document-skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/pack.py +159 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/unpack.py +29 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validate.py +69 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/__init__.py +15 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/base.py +951 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/docx.py +274 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/pptx.py +315 -0
- package/bin/skills/document-skills/pptx/ooxml/scripts/validation/redlining.py +279 -0
- package/bin/skills/document-skills/pptx/ooxml.md +427 -0
- package/bin/skills/document-skills/pptx/scripts/html2pptx.js +979 -0
- package/bin/skills/document-skills/pptx/scripts/inventory.py +1020 -0
- package/bin/skills/document-skills/pptx/scripts/rearrange.py +231 -0
- package/bin/skills/document-skills/pptx/scripts/replace.py +385 -0
- package/bin/skills/document-skills/pptx/scripts/thumbnail.py +450 -0
- package/bin/skills/document-skills/xlsx/LICENSE.txt +30 -0
- package/bin/skills/document-skills/xlsx/SKILL.md +325 -0
- package/bin/skills/document-skills/xlsx/recalc.py +178 -0
- package/bin/skills/drugbank-database/SKILL.md +190 -0
- package/bin/skills/drugbank-database/references/chemical-analysis.md +590 -0
- package/bin/skills/drugbank-database/references/data-access.md +242 -0
- package/bin/skills/drugbank-database/references/drug-queries.md +386 -0
- package/bin/skills/drugbank-database/references/interactions.md +425 -0
- package/bin/skills/drugbank-database/references/targets-pathways.md +518 -0
- package/bin/skills/drugbank-database/scripts/drugbank_helper.py +350 -0
- package/bin/skills/ena-database/SKILL.md +204 -0
- package/bin/skills/ena-database/references/api_reference.md +490 -0
- package/bin/skills/ensembl-database/SKILL.md +311 -0
- package/bin/skills/ensembl-database/references/api_endpoints.md +346 -0
- package/bin/skills/ensembl-database/scripts/ensembl_query.py +427 -0
- package/bin/skills/esm/SKILL.md +306 -0
- package/bin/skills/esm/references/esm-c-api.md +583 -0
- package/bin/skills/esm/references/esm3-api.md +452 -0
- package/bin/skills/esm/references/forge-api.md +657 -0
- package/bin/skills/esm/references/workflows.md +685 -0
- package/bin/skills/etetoolkit/SKILL.md +623 -0
- package/bin/skills/etetoolkit/references/api_reference.md +583 -0
- package/bin/skills/etetoolkit/references/visualization.md +783 -0
- package/bin/skills/etetoolkit/references/workflows.md +774 -0
- package/bin/skills/etetoolkit/scripts/quick_visualize.py +214 -0
- package/bin/skills/etetoolkit/scripts/tree_operations.py +229 -0
- package/bin/skills/exploratory-data-analysis/SKILL.md +446 -0
- package/bin/skills/exploratory-data-analysis/assets/report_template.md +196 -0
- package/bin/skills/exploratory-data-analysis/references/bioinformatics_genomics_formats.md +664 -0
- package/bin/skills/exploratory-data-analysis/references/chemistry_molecular_formats.md +664 -0
- package/bin/skills/exploratory-data-analysis/references/general_scientific_formats.md +518 -0
- package/bin/skills/exploratory-data-analysis/references/microscopy_imaging_formats.md +620 -0
- package/bin/skills/exploratory-data-analysis/references/proteomics_metabolomics_formats.md +517 -0
- package/bin/skills/exploratory-data-analysis/references/spectroscopy_analytical_formats.md +633 -0
- package/bin/skills/exploratory-data-analysis/scripts/eda_analyzer.py +547 -0
- package/bin/skills/fda-database/SKILL.md +518 -0
- package/bin/skills/fda-database/references/animal_veterinary.md +377 -0
- package/bin/skills/fda-database/references/api_basics.md +687 -0
- package/bin/skills/fda-database/references/devices.md +632 -0
- package/bin/skills/fda-database/references/drugs.md +468 -0
- package/bin/skills/fda-database/references/foods.md +374 -0
- package/bin/skills/fda-database/references/other.md +472 -0
- package/bin/skills/fda-database/scripts/fda_examples.py +335 -0
- package/bin/skills/fda-database/scripts/fda_query.py +440 -0
- package/bin/skills/flowio/SKILL.md +608 -0
- package/bin/skills/flowio/references/api_reference.md +372 -0
- package/bin/skills/fluidsim/SKILL.md +349 -0
- package/bin/skills/fluidsim/references/advanced_features.md +398 -0
- package/bin/skills/fluidsim/references/installation.md +68 -0
- package/bin/skills/fluidsim/references/output_analysis.md +283 -0
- package/bin/skills/fluidsim/references/parameters.md +198 -0
- package/bin/skills/fluidsim/references/simulation_workflow.md +172 -0
- package/bin/skills/fluidsim/references/solvers.md +94 -0
- package/bin/skills/fred-economic-data/SKILL.md +433 -0
- package/bin/skills/fred-economic-data/references/api_basics.md +212 -0
- package/bin/skills/fred-economic-data/references/categories.md +442 -0
- package/bin/skills/fred-economic-data/references/geofred.md +588 -0
- package/bin/skills/fred-economic-data/references/releases.md +642 -0
- package/bin/skills/fred-economic-data/references/series.md +584 -0
- package/bin/skills/fred-economic-data/references/sources.md +423 -0
- package/bin/skills/fred-economic-data/references/tags.md +485 -0
- package/bin/skills/fred-economic-data/scripts/fred_examples.py +354 -0
- package/bin/skills/fred-economic-data/scripts/fred_query.py +590 -0
- package/bin/skills/gene-database/SKILL.md +179 -0
- package/bin/skills/gene-database/references/api_reference.md +404 -0
- package/bin/skills/gene-database/references/common_workflows.md +428 -0
- package/bin/skills/gene-database/scripts/batch_gene_lookup.py +298 -0
- package/bin/skills/gene-database/scripts/fetch_gene_data.py +277 -0
- package/bin/skills/gene-database/scripts/query_gene.py +251 -0
- package/bin/skills/geniml/SKILL.md +318 -0
- package/bin/skills/geniml/references/bedspace.md +127 -0
- package/bin/skills/geniml/references/consensus_peaks.md +238 -0
- package/bin/skills/geniml/references/region2vec.md +90 -0
- package/bin/skills/geniml/references/scembed.md +197 -0
- package/bin/skills/geniml/references/utilities.md +385 -0
- package/bin/skills/geo-database/SKILL.md +815 -0
- package/bin/skills/geo-database/references/geo_reference.md +829 -0
- package/bin/skills/geopandas/SKILL.md +251 -0
- package/bin/skills/geopandas/references/crs-management.md +243 -0
- package/bin/skills/geopandas/references/data-io.md +165 -0
- package/bin/skills/geopandas/references/data-structures.md +70 -0
- package/bin/skills/geopandas/references/geometric-operations.md +221 -0
- package/bin/skills/geopandas/references/spatial-analysis.md +184 -0
- package/bin/skills/geopandas/references/visualization.md +243 -0
- package/bin/skills/get-available-resources/SKILL.md +277 -0
- package/bin/skills/get-available-resources/scripts/detect_resources.py +401 -0
- package/bin/skills/gget/SKILL.md +871 -0
- package/bin/skills/gget/references/database_info.md +300 -0
- package/bin/skills/gget/references/module_reference.md +467 -0
- package/bin/skills/gget/references/workflows.md +814 -0
- package/bin/skills/gget/scripts/batch_sequence_analysis.py +191 -0
- package/bin/skills/gget/scripts/enrichment_pipeline.py +235 -0
- package/bin/skills/gget/scripts/gene_analysis.py +161 -0
- package/bin/skills/gtars/SKILL.md +285 -0
- package/bin/skills/gtars/references/cli.md +222 -0
- package/bin/skills/gtars/references/coverage.md +172 -0
- package/bin/skills/gtars/references/overlap.md +156 -0
- package/bin/skills/gtars/references/python-api.md +211 -0
- package/bin/skills/gtars/references/refget.md +147 -0
- package/bin/skills/gtars/references/tokenizers.md +103 -0
- package/bin/skills/gwas-database/SKILL.md +608 -0
- package/bin/skills/gwas-database/references/api_reference.md +793 -0
- package/bin/skills/histolab/SKILL.md +678 -0
- package/bin/skills/histolab/references/filters_preprocessing.md +514 -0
- package/bin/skills/histolab/references/slide_management.md +172 -0
- package/bin/skills/histolab/references/tile_extraction.md +421 -0
- package/bin/skills/histolab/references/tissue_masks.md +251 -0
- package/bin/skills/histolab/references/visualization.md +547 -0
- package/bin/skills/hmdb-database/SKILL.md +196 -0
- package/bin/skills/hmdb-database/references/hmdb_data_fields.md +267 -0
- package/bin/skills/hypogenic/SKILL.md +655 -0
- package/bin/skills/hypogenic/references/config_template.yaml +150 -0
- package/bin/skills/imaging-data-commons/SKILL.md +1182 -0
- package/bin/skills/imaging-data-commons/references/bigquery_guide.md +556 -0
- package/bin/skills/imaging-data-commons/references/cli_guide.md +272 -0
- package/bin/skills/imaging-data-commons/references/cloud_storage_guide.md +333 -0
- package/bin/skills/imaging-data-commons/references/dicomweb_guide.md +399 -0
- package/bin/skills/infographics/SKILL.md +563 -0
- package/bin/skills/infographics/references/color_palettes.md +496 -0
- package/bin/skills/infographics/references/design_principles.md +636 -0
- package/bin/skills/infographics/references/infographic_types.md +907 -0
- package/bin/skills/infographics/scripts/generate_infographic.py +234 -0
- package/bin/skills/infographics/scripts/generate_infographic_ai.py +1290 -0
- package/bin/skills/iso-13485-certification/SKILL.md +680 -0
- package/bin/skills/iso-13485-certification/assets/templates/procedures/CAPA-procedure-template.md +453 -0
- package/bin/skills/iso-13485-certification/assets/templates/procedures/document-control-procedure-template.md +567 -0
- package/bin/skills/iso-13485-certification/assets/templates/quality-manual-template.md +521 -0
- package/bin/skills/iso-13485-certification/references/gap-analysis-checklist.md +568 -0
- package/bin/skills/iso-13485-certification/references/iso-13485-requirements.md +610 -0
- package/bin/skills/iso-13485-certification/references/mandatory-documents.md +606 -0
- package/bin/skills/iso-13485-certification/references/quality-manual-guide.md +688 -0
- package/bin/skills/iso-13485-certification/scripts/gap_analyzer.py +440 -0
- package/bin/skills/kegg-database/SKILL.md +377 -0
- package/bin/skills/kegg-database/references/kegg_reference.md +326 -0
- package/bin/skills/kegg-database/scripts/kegg_api.py +251 -0
- package/bin/skills/labarchive-integration/SKILL.md +268 -0
- package/bin/skills/labarchive-integration/references/api_reference.md +342 -0
- package/bin/skills/labarchive-integration/references/authentication_guide.md +357 -0
- package/bin/skills/labarchive-integration/references/integrations.md +425 -0
- package/bin/skills/labarchive-integration/scripts/entry_operations.py +334 -0
- package/bin/skills/labarchive-integration/scripts/notebook_operations.py +269 -0
- package/bin/skills/labarchive-integration/scripts/setup_config.py +205 -0
- package/bin/skills/lamindb/SKILL.md +390 -0
- package/bin/skills/lamindb/references/annotation-validation.md +513 -0
- package/bin/skills/lamindb/references/core-concepts.md +380 -0
- package/bin/skills/lamindb/references/data-management.md +433 -0
- package/bin/skills/lamindb/references/integrations.md +642 -0
- package/bin/skills/lamindb/references/ontologies.md +497 -0
- package/bin/skills/lamindb/references/setup-deployment.md +733 -0
- package/bin/skills/latchbio-integration/SKILL.md +353 -0
- package/bin/skills/latchbio-integration/references/data-management.md +427 -0
- package/bin/skills/latchbio-integration/references/resource-configuration.md +429 -0
- package/bin/skills/latchbio-integration/references/verified-workflows.md +487 -0
- package/bin/skills/latchbio-integration/references/workflow-creation.md +254 -0
- package/bin/skills/matchms/SKILL.md +203 -0
- package/bin/skills/matchms/references/filtering.md +288 -0
- package/bin/skills/matchms/references/importing_exporting.md +416 -0
- package/bin/skills/matchms/references/similarity.md +380 -0
- package/bin/skills/matchms/references/workflows.md +647 -0
- package/bin/skills/matlab/SKILL.md +376 -0
- package/bin/skills/matlab/references/data-import-export.md +479 -0
- package/bin/skills/matlab/references/executing-scripts.md +444 -0
- package/bin/skills/matlab/references/graphics-visualization.md +579 -0
- package/bin/skills/matlab/references/mathematics.md +553 -0
- package/bin/skills/matlab/references/matrices-arrays.md +349 -0
- package/bin/skills/matlab/references/octave-compatibility.md +544 -0
- package/bin/skills/matlab/references/programming.md +672 -0
- package/bin/skills/matlab/references/python-integration.md +433 -0
- package/bin/skills/matplotlib/SKILL.md +361 -0
- package/bin/skills/matplotlib/references/api_reference.md +412 -0
- package/bin/skills/matplotlib/references/common_issues.md +563 -0
- package/bin/skills/matplotlib/references/plot_types.md +476 -0
- package/bin/skills/matplotlib/references/styling_guide.md +589 -0
- package/bin/skills/matplotlib/scripts/plot_template.py +401 -0
- package/bin/skills/matplotlib/scripts/style_configurator.py +409 -0
- package/bin/skills/medchem/SKILL.md +406 -0
- package/bin/skills/medchem/references/api_guide.md +600 -0
- package/bin/skills/medchem/references/rules_catalog.md +604 -0
- package/bin/skills/medchem/scripts/filter_molecules.py +418 -0
- package/bin/skills/metabolomics-workbench-database/SKILL.md +259 -0
- package/bin/skills/metabolomics-workbench-database/references/api_reference.md +494 -0
- package/bin/skills/modal-research-gpu/SKILL.md +238 -0
- package/bin/skills/molfeat/SKILL.md +511 -0
- package/bin/skills/molfeat/references/api_reference.md +428 -0
- package/bin/skills/molfeat/references/available_featurizers.md +333 -0
- package/bin/skills/molfeat/references/examples.md +723 -0
- package/bin/skills/networkx/SKILL.md +437 -0
- package/bin/skills/networkx/references/algorithms.md +383 -0
- package/bin/skills/networkx/references/generators.md +378 -0
- package/bin/skills/networkx/references/graph-basics.md +283 -0
- package/bin/skills/networkx/references/io.md +441 -0
- package/bin/skills/networkx/references/visualization.md +529 -0
- package/bin/skills/neurokit2/SKILL.md +356 -0
- package/bin/skills/neurokit2/references/bio_module.md +417 -0
- package/bin/skills/neurokit2/references/complexity.md +715 -0
- package/bin/skills/neurokit2/references/ecg_cardiac.md +355 -0
- package/bin/skills/neurokit2/references/eda.md +497 -0
- package/bin/skills/neurokit2/references/eeg.md +506 -0
- package/bin/skills/neurokit2/references/emg.md +408 -0
- package/bin/skills/neurokit2/references/eog.md +407 -0
- package/bin/skills/neurokit2/references/epochs_events.md +471 -0
- package/bin/skills/neurokit2/references/hrv.md +480 -0
- package/bin/skills/neurokit2/references/ppg.md +413 -0
- package/bin/skills/neurokit2/references/rsp.md +510 -0
- package/bin/skills/neurokit2/references/signal_processing.md +648 -0
- package/bin/skills/neuropixels-analysis/SKILL.md +350 -0
- package/bin/skills/neuropixels-analysis/assets/analysis_template.py +271 -0
- package/bin/skills/neuropixels-analysis/references/AI_CURATION.md +345 -0
- package/bin/skills/neuropixels-analysis/references/ANALYSIS.md +392 -0
- package/bin/skills/neuropixels-analysis/references/AUTOMATED_CURATION.md +358 -0
- package/bin/skills/neuropixels-analysis/references/MOTION_CORRECTION.md +323 -0
- package/bin/skills/neuropixels-analysis/references/PREPROCESSING.md +273 -0
- package/bin/skills/neuropixels-analysis/references/QUALITY_METRICS.md +359 -0
- package/bin/skills/neuropixels-analysis/references/SPIKE_SORTING.md +339 -0
- package/bin/skills/neuropixels-analysis/references/api_reference.md +415 -0
- package/bin/skills/neuropixels-analysis/references/plotting_guide.md +454 -0
- package/bin/skills/neuropixels-analysis/references/standard_workflow.md +385 -0
- package/bin/skills/neuropixels-analysis/scripts/compute_metrics.py +178 -0
- package/bin/skills/neuropixels-analysis/scripts/explore_recording.py +168 -0
- package/bin/skills/neuropixels-analysis/scripts/export_to_phy.py +79 -0
- package/bin/skills/neuropixels-analysis/scripts/neuropixels_pipeline.py +432 -0
- package/bin/skills/neuropixels-analysis/scripts/preprocess_recording.py +122 -0
- package/bin/skills/neuropixels-analysis/scripts/run_sorting.py +98 -0
- package/bin/skills/offer-k-dense-web/SKILL.md +21 -0
- package/bin/skills/omero-integration/SKILL.md +251 -0
- package/bin/skills/omero-integration/references/advanced.md +631 -0
- package/bin/skills/omero-integration/references/connection.md +369 -0
- package/bin/skills/omero-integration/references/data_access.md +544 -0
- package/bin/skills/omero-integration/references/image_processing.md +665 -0
- package/bin/skills/omero-integration/references/metadata.md +688 -0
- package/bin/skills/omero-integration/references/rois.md +648 -0
- package/bin/skills/omero-integration/references/scripts.md +637 -0
- package/bin/skills/omero-integration/references/tables.md +532 -0
- package/bin/skills/openalex-database/SKILL.md +494 -0
- package/bin/skills/openalex-database/references/api_guide.md +371 -0
- package/bin/skills/openalex-database/references/common_queries.md +381 -0
- package/bin/skills/openalex-database/scripts/openalex_client.py +337 -0
- package/bin/skills/openalex-database/scripts/query_helpers.py +306 -0
- package/bin/skills/opentargets-database/SKILL.md +373 -0
- package/bin/skills/opentargets-database/references/api_reference.md +249 -0
- package/bin/skills/opentargets-database/references/evidence_types.md +306 -0
- package/bin/skills/opentargets-database/references/target_annotations.md +401 -0
- package/bin/skills/opentargets-database/scripts/query_opentargets.py +403 -0
- package/bin/skills/opentrons-integration/SKILL.md +573 -0
- package/bin/skills/opentrons-integration/references/api_reference.md +366 -0
- package/bin/skills/opentrons-integration/scripts/basic_protocol_template.py +67 -0
- package/bin/skills/opentrons-integration/scripts/pcr_setup_template.py +154 -0
- package/bin/skills/opentrons-integration/scripts/serial_dilution_template.py +96 -0
- package/bin/skills/pathml/SKILL.md +166 -0
- package/bin/skills/pathml/references/data_management.md +742 -0
- package/bin/skills/pathml/references/graphs.md +653 -0
- package/bin/skills/pathml/references/image_loading.md +448 -0
- package/bin/skills/pathml/references/machine_learning.md +725 -0
- package/bin/skills/pathml/references/multiparametric.md +686 -0
- package/bin/skills/pathml/references/preprocessing.md +722 -0
- package/bin/skills/pdb-database/SKILL.md +309 -0
- package/bin/skills/pdb-database/references/api_reference.md +617 -0
- package/bin/skills/pennylane/SKILL.md +226 -0
- package/bin/skills/pennylane/references/advanced_features.md +667 -0
- package/bin/skills/pennylane/references/devices_backends.md +596 -0
- package/bin/skills/pennylane/references/getting_started.md +227 -0
- package/bin/skills/pennylane/references/optimization.md +671 -0
- package/bin/skills/pennylane/references/quantum_chemistry.md +567 -0
- package/bin/skills/pennylane/references/quantum_circuits.md +437 -0
- package/bin/skills/pennylane/references/quantum_ml.md +571 -0
- package/bin/skills/perplexity-search/SKILL.md +448 -0
- package/bin/skills/perplexity-search/assets/.env.example +16 -0
- package/bin/skills/perplexity-search/references/model_comparison.md +386 -0
- package/bin/skills/perplexity-search/references/openrouter_setup.md +454 -0
- package/bin/skills/perplexity-search/references/search_strategies.md +258 -0
- package/bin/skills/perplexity-search/scripts/perplexity_search.py +277 -0
- package/bin/skills/perplexity-search/scripts/setup_env.py +171 -0
- package/bin/skills/plotly/SKILL.md +267 -0
- package/bin/skills/plotly/references/chart-types.md +488 -0
- package/bin/skills/plotly/references/export-interactivity.md +453 -0
- package/bin/skills/plotly/references/graph-objects.md +302 -0
- package/bin/skills/plotly/references/layouts-styling.md +457 -0
- package/bin/skills/plotly/references/plotly-express.md +213 -0
- package/bin/skills/polars/SKILL.md +387 -0
- package/bin/skills/polars/references/best_practices.md +649 -0
- package/bin/skills/polars/references/core_concepts.md +378 -0
- package/bin/skills/polars/references/io_guide.md +557 -0
- package/bin/skills/polars/references/operations.md +602 -0
- package/bin/skills/polars/references/pandas_migration.md +417 -0
- package/bin/skills/polars/references/transformations.md +549 -0
- package/bin/skills/protocolsio-integration/SKILL.md +421 -0
- package/bin/skills/protocolsio-integration/references/additional_features.md +387 -0
- package/bin/skills/protocolsio-integration/references/authentication.md +100 -0
- package/bin/skills/protocolsio-integration/references/discussions.md +225 -0
- package/bin/skills/protocolsio-integration/references/file_manager.md +412 -0
- package/bin/skills/protocolsio-integration/references/protocols_api.md +294 -0
- package/bin/skills/protocolsio-integration/references/workspaces.md +293 -0
- package/bin/skills/pubchem-database/SKILL.md +574 -0
- package/bin/skills/pubchem-database/references/api_reference.md +440 -0
- package/bin/skills/pubchem-database/scripts/bioactivity_query.py +367 -0
- package/bin/skills/pubchem-database/scripts/compound_search.py +297 -0
- package/bin/skills/pubmed-database/SKILL.md +460 -0
- package/bin/skills/pubmed-database/references/api_reference.md +298 -0
- package/bin/skills/pubmed-database/references/common_queries.md +453 -0
- package/bin/skills/pubmed-database/references/search_syntax.md +436 -0
- package/bin/skills/pufferlib/SKILL.md +436 -0
- package/bin/skills/pufferlib/references/environments.md +508 -0
- package/bin/skills/pufferlib/references/integration.md +621 -0
- package/bin/skills/pufferlib/references/policies.md +653 -0
- package/bin/skills/pufferlib/references/training.md +360 -0
- package/bin/skills/pufferlib/references/vectorization.md +557 -0
- package/bin/skills/pufferlib/scripts/env_template.py +340 -0
- package/bin/skills/pufferlib/scripts/train_template.py +239 -0
- package/bin/skills/pydeseq2/SKILL.md +559 -0
- package/bin/skills/pydeseq2/references/api_reference.md +228 -0
- package/bin/skills/pydeseq2/references/workflow_guide.md +582 -0
- package/bin/skills/pydeseq2/scripts/run_deseq2_analysis.py +353 -0
- package/bin/skills/pydicom/SKILL.md +434 -0
- package/bin/skills/pydicom/references/common_tags.md +228 -0
- package/bin/skills/pydicom/references/transfer_syntaxes.md +352 -0
- package/bin/skills/pydicom/scripts/anonymize_dicom.py +137 -0
- package/bin/skills/pydicom/scripts/dicom_to_image.py +172 -0
- package/bin/skills/pydicom/scripts/extract_metadata.py +173 -0
- package/bin/skills/pyhealth/SKILL.md +491 -0
- package/bin/skills/pyhealth/references/datasets.md +178 -0
- package/bin/skills/pyhealth/references/medical_coding.md +284 -0
- package/bin/skills/pyhealth/references/models.md +594 -0
- package/bin/skills/pyhealth/references/preprocessing.md +638 -0
- package/bin/skills/pyhealth/references/tasks.md +379 -0
- package/bin/skills/pyhealth/references/training_evaluation.md +648 -0
- package/bin/skills/pylabrobot/SKILL.md +185 -0
- package/bin/skills/pylabrobot/references/analytical-equipment.md +464 -0
- package/bin/skills/pylabrobot/references/hardware-backends.md +480 -0
- package/bin/skills/pylabrobot/references/liquid-handling.md +403 -0
- package/bin/skills/pylabrobot/references/material-handling.md +620 -0
- package/bin/skills/pylabrobot/references/resources.md +489 -0
- package/bin/skills/pylabrobot/references/visualization.md +532 -0
- package/bin/skills/pymatgen/SKILL.md +691 -0
- package/bin/skills/pymatgen/references/analysis_modules.md +530 -0
- package/bin/skills/pymatgen/references/core_classes.md +318 -0
- package/bin/skills/pymatgen/references/io_formats.md +469 -0
- package/bin/skills/pymatgen/references/materials_project_api.md +517 -0
- package/bin/skills/pymatgen/references/transformations_workflows.md +591 -0
- package/bin/skills/pymatgen/scripts/phase_diagram_generator.py +233 -0
- package/bin/skills/pymatgen/scripts/structure_analyzer.py +266 -0
- package/bin/skills/pymatgen/scripts/structure_converter.py +169 -0
- package/bin/skills/pymc/SKILL.md +572 -0
- package/bin/skills/pymc/assets/hierarchical_model_template.py +333 -0
- package/bin/skills/pymc/assets/linear_regression_template.py +241 -0
- package/bin/skills/pymc/references/distributions.md +320 -0
- package/bin/skills/pymc/references/sampling_inference.md +424 -0
- package/bin/skills/pymc/references/workflows.md +526 -0
- package/bin/skills/pymc/scripts/model_comparison.py +387 -0
- package/bin/skills/pymc/scripts/model_diagnostics.py +350 -0
- package/bin/skills/pymoo/SKILL.md +571 -0
- package/bin/skills/pymoo/references/algorithms.md +180 -0
- package/bin/skills/pymoo/references/constraints_mcdm.md +417 -0
- package/bin/skills/pymoo/references/operators.md +345 -0
- package/bin/skills/pymoo/references/problems.md +265 -0
- package/bin/skills/pymoo/references/visualization.md +353 -0
- package/bin/skills/pymoo/scripts/custom_problem_example.py +181 -0
- package/bin/skills/pymoo/scripts/decision_making_example.py +161 -0
- package/bin/skills/pymoo/scripts/many_objective_example.py +72 -0
- package/bin/skills/pymoo/scripts/multi_objective_example.py +63 -0
- package/bin/skills/pymoo/scripts/single_objective_example.py +59 -0
- package/bin/skills/pyopenms/SKILL.md +217 -0
- package/bin/skills/pyopenms/references/data_structures.md +497 -0
- package/bin/skills/pyopenms/references/feature_detection.md +410 -0
- package/bin/skills/pyopenms/references/file_io.md +349 -0
- package/bin/skills/pyopenms/references/identification.md +422 -0
- package/bin/skills/pyopenms/references/metabolomics.md +482 -0
- package/bin/skills/pyopenms/references/signal_processing.md +433 -0
- package/bin/skills/pysam/SKILL.md +265 -0
- package/bin/skills/pysam/references/alignment_files.md +280 -0
- package/bin/skills/pysam/references/common_workflows.md +520 -0
- package/bin/skills/pysam/references/sequence_files.md +407 -0
- package/bin/skills/pysam/references/variant_files.md +365 -0
- package/bin/skills/pytdc/SKILL.md +460 -0
- package/bin/skills/pytdc/references/datasets.md +246 -0
- package/bin/skills/pytdc/references/oracles.md +400 -0
- package/bin/skills/pytdc/references/utilities.md +684 -0
- package/bin/skills/pytdc/scripts/benchmark_evaluation.py +327 -0
- package/bin/skills/pytdc/scripts/load_and_split_data.py +214 -0
- package/bin/skills/pytdc/scripts/molecular_generation.py +404 -0
- package/bin/skills/qiskit/SKILL.md +275 -0
- package/bin/skills/qiskit/references/algorithms.md +607 -0
- package/bin/skills/qiskit/references/backends.md +433 -0
- package/bin/skills/qiskit/references/circuits.md +197 -0
- package/bin/skills/qiskit/references/patterns.md +533 -0
- package/bin/skills/qiskit/references/primitives.md +277 -0
- package/bin/skills/qiskit/references/setup.md +99 -0
- package/bin/skills/qiskit/references/transpilation.md +286 -0
- package/bin/skills/qiskit/references/visualization.md +415 -0
- package/bin/skills/qutip/SKILL.md +318 -0
- package/bin/skills/qutip/references/advanced.md +555 -0
- package/bin/skills/qutip/references/analysis.md +523 -0
- package/bin/skills/qutip/references/core_concepts.md +293 -0
- package/bin/skills/qutip/references/time_evolution.md +348 -0
- package/bin/skills/qutip/references/visualization.md +431 -0
- package/bin/skills/rdkit/SKILL.md +780 -0
- package/bin/skills/rdkit/references/api_reference.md +432 -0
- package/bin/skills/rdkit/references/descriptors_reference.md +595 -0
- package/bin/skills/rdkit/references/smarts_patterns.md +668 -0
- package/bin/skills/rdkit/scripts/molecular_properties.py +243 -0
- package/bin/skills/rdkit/scripts/similarity_search.py +297 -0
- package/bin/skills/rdkit/scripts/substructure_filter.py +386 -0
- package/bin/skills/reactome-database/SKILL.md +278 -0
- package/bin/skills/reactome-database/references/api_reference.md +465 -0
- package/bin/skills/reactome-database/scripts/reactome_query.py +286 -0
- package/bin/skills/rowan/SKILL.md +427 -0
- package/bin/skills/rowan/references/api_reference.md +413 -0
- package/bin/skills/rowan/references/molecule_handling.md +429 -0
- package/bin/skills/rowan/references/proteins_and_organization.md +499 -0
- package/bin/skills/rowan/references/rdkit_native.md +438 -0
- package/bin/skills/rowan/references/results_interpretation.md +481 -0
- package/bin/skills/rowan/references/workflow_types.md +591 -0
- package/bin/skills/scanpy/SKILL.md +386 -0
- package/bin/skills/scanpy/assets/analysis_template.py +295 -0
- package/bin/skills/scanpy/references/api_reference.md +251 -0
- package/bin/skills/scanpy/references/plotting_guide.md +352 -0
- package/bin/skills/scanpy/references/standard_workflow.md +206 -0
- package/bin/skills/scanpy/scripts/qc_analysis.py +200 -0
- package/bin/skills/scientific-brainstorming/SKILL.md +191 -0
- package/bin/skills/scientific-brainstorming/references/brainstorming_methods.md +326 -0
- package/bin/skills/scientific-visualization/SKILL.md +779 -0
- package/bin/skills/scientific-visualization/assets/color_palettes.py +197 -0
- package/bin/skills/scientific-visualization/assets/nature.mplstyle +63 -0
- package/bin/skills/scientific-visualization/assets/presentation.mplstyle +61 -0
- package/bin/skills/scientific-visualization/assets/publication.mplstyle +68 -0
- package/bin/skills/scientific-visualization/references/color_palettes.md +348 -0
- package/bin/skills/scientific-visualization/references/journal_requirements.md +320 -0
- package/bin/skills/scientific-visualization/references/matplotlib_examples.md +620 -0
- package/bin/skills/scientific-visualization/references/publication_guidelines.md +205 -0
- package/bin/skills/scientific-visualization/scripts/figure_export.py +343 -0
- package/bin/skills/scientific-visualization/scripts/style_presets.py +416 -0
- package/bin/skills/scikit-bio/SKILL.md +437 -0
- package/bin/skills/scikit-bio/references/api_reference.md +749 -0
- package/bin/skills/scikit-learn/SKILL.md +521 -0
- package/bin/skills/scikit-learn/references/model_evaluation.md +592 -0
- package/bin/skills/scikit-learn/references/pipelines_and_composition.md +612 -0
- package/bin/skills/scikit-learn/references/preprocessing.md +606 -0
- package/bin/skills/scikit-learn/references/quick_reference.md +433 -0
- package/bin/skills/scikit-learn/references/supervised_learning.md +378 -0
- package/bin/skills/scikit-learn/references/unsupervised_learning.md +505 -0
- package/bin/skills/scikit-learn/scripts/classification_pipeline.py +257 -0
- package/bin/skills/scikit-learn/scripts/clustering_analysis.py +386 -0
- package/bin/skills/scikit-survival/SKILL.md +399 -0
- package/bin/skills/scikit-survival/references/competing-risks.md +397 -0
- package/bin/skills/scikit-survival/references/cox-models.md +182 -0
- package/bin/skills/scikit-survival/references/data-handling.md +494 -0
- package/bin/skills/scikit-survival/references/ensemble-models.md +327 -0
- package/bin/skills/scikit-survival/references/evaluation-metrics.md +378 -0
- package/bin/skills/scikit-survival/references/svm-models.md +411 -0
- package/bin/skills/scvi-tools/SKILL.md +190 -0
- package/bin/skills/scvi-tools/references/differential-expression.md +581 -0
- package/bin/skills/scvi-tools/references/models-atac-seq.md +321 -0
- package/bin/skills/scvi-tools/references/models-multimodal.md +367 -0
- package/bin/skills/scvi-tools/references/models-scrna-seq.md +330 -0
- package/bin/skills/scvi-tools/references/models-spatial.md +438 -0
- package/bin/skills/scvi-tools/references/models-specialized.md +408 -0
- package/bin/skills/scvi-tools/references/theoretical-foundations.md +438 -0
- package/bin/skills/scvi-tools/references/workflows.md +546 -0
- package/bin/skills/seaborn/SKILL.md +673 -0
- package/bin/skills/seaborn/references/examples.md +822 -0
- package/bin/skills/seaborn/references/function_reference.md +770 -0
- package/bin/skills/seaborn/references/objects_interface.md +964 -0
- package/bin/skills/shap/SKILL.md +566 -0
- package/bin/skills/shap/references/explainers.md +339 -0
- package/bin/skills/shap/references/plots.md +507 -0
- package/bin/skills/shap/references/theory.md +449 -0
- package/bin/skills/shap/references/workflows.md +605 -0
- package/bin/skills/simpy/SKILL.md +429 -0
- package/bin/skills/simpy/references/events.md +374 -0
- package/bin/skills/simpy/references/monitoring.md +475 -0
- package/bin/skills/simpy/references/process-interaction.md +424 -0
- package/bin/skills/simpy/references/real-time.md +395 -0
- package/bin/skills/simpy/references/resources.md +275 -0
- package/bin/skills/simpy/scripts/basic_simulation_template.py +193 -0
- package/bin/skills/simpy/scripts/resource_monitor.py +345 -0
- package/bin/skills/stable-baselines3/SKILL.md +299 -0
- package/bin/skills/stable-baselines3/references/algorithms.md +333 -0
- package/bin/skills/stable-baselines3/references/callbacks.md +556 -0
- package/bin/skills/stable-baselines3/references/custom_environments.md +526 -0
- package/bin/skills/stable-baselines3/references/vectorized_envs.md +568 -0
- package/bin/skills/stable-baselines3/scripts/custom_env_template.py +314 -0
- package/bin/skills/stable-baselines3/scripts/evaluate_agent.py +245 -0
- package/bin/skills/stable-baselines3/scripts/train_rl_agent.py +165 -0
- package/bin/skills/statistical-analysis/SKILL.md +632 -0
- package/bin/skills/statistical-analysis/references/assumptions_and_diagnostics.md +369 -0
- package/bin/skills/statistical-analysis/references/bayesian_statistics.md +661 -0
- package/bin/skills/statistical-analysis/references/effect_sizes_and_power.md +581 -0
- package/bin/skills/statistical-analysis/references/reporting_standards.md +469 -0
- package/bin/skills/statistical-analysis/references/test_selection_guide.md +129 -0
- package/bin/skills/statistical-analysis/scripts/assumption_checks.py +539 -0
- package/bin/skills/statsmodels/SKILL.md +614 -0
- package/bin/skills/statsmodels/references/discrete_choice.md +669 -0
- package/bin/skills/statsmodels/references/glm.md +619 -0
- package/bin/skills/statsmodels/references/linear_models.md +447 -0
- package/bin/skills/statsmodels/references/stats_diagnostics.md +859 -0
- package/bin/skills/statsmodels/references/time_series.md +716 -0
- package/bin/skills/string-database/SKILL.md +534 -0
- package/bin/skills/string-database/references/string_reference.md +455 -0
- package/bin/skills/string-database/scripts/string_api.py +369 -0
- package/bin/skills/sympy/SKILL.md +500 -0
- package/bin/skills/sympy/references/advanced-topics.md +635 -0
- package/bin/skills/sympy/references/code-generation-printing.md +599 -0
- package/bin/skills/sympy/references/core-capabilities.md +348 -0
- package/bin/skills/sympy/references/matrices-linear-algebra.md +526 -0
- package/bin/skills/sympy/references/physics-mechanics.md +592 -0
- package/bin/skills/torch_geometric/SKILL.md +676 -0
- package/bin/skills/torch_geometric/references/datasets_reference.md +574 -0
- package/bin/skills/torch_geometric/references/layers_reference.md +485 -0
- package/bin/skills/torch_geometric/references/transforms_reference.md +679 -0
- package/bin/skills/torch_geometric/scripts/benchmark_model.py +309 -0
- package/bin/skills/torch_geometric/scripts/create_gnn_template.py +529 -0
- package/bin/skills/torch_geometric/scripts/visualize_graph.py +313 -0
- package/bin/skills/torchdrug/SKILL.md +450 -0
- package/bin/skills/torchdrug/references/core_concepts.md +565 -0
- package/bin/skills/torchdrug/references/datasets.md +380 -0
- package/bin/skills/torchdrug/references/knowledge_graphs.md +320 -0
- package/bin/skills/torchdrug/references/models_architectures.md +541 -0
- package/bin/skills/torchdrug/references/molecular_generation.md +352 -0
- package/bin/skills/torchdrug/references/molecular_property_prediction.md +169 -0
- package/bin/skills/torchdrug/references/protein_modeling.md +272 -0
- package/bin/skills/torchdrug/references/retrosynthesis.md +436 -0
- package/bin/skills/transformers/SKILL.md +164 -0
- package/bin/skills/transformers/references/generation.md +467 -0
- package/bin/skills/transformers/references/models.md +361 -0
- package/bin/skills/transformers/references/pipelines.md +335 -0
- package/bin/skills/transformers/references/tokenizers.md +447 -0
- package/bin/skills/transformers/references/training.md +500 -0
- package/bin/skills/umap-learn/SKILL.md +479 -0
- package/bin/skills/umap-learn/references/api_reference.md +532 -0
- package/bin/skills/uniprot-database/SKILL.md +195 -0
- package/bin/skills/uniprot-database/references/api_examples.md +413 -0
- package/bin/skills/uniprot-database/references/api_fields.md +275 -0
- package/bin/skills/uniprot-database/references/id_mapping_databases.md +285 -0
- package/bin/skills/uniprot-database/references/query_syntax.md +256 -0
- package/bin/skills/uniprot-database/scripts/uniprot_client.py +341 -0
- package/bin/skills/uspto-database/SKILL.md +607 -0
- package/bin/skills/uspto-database/references/additional_apis.md +394 -0
- package/bin/skills/uspto-database/references/patentsearch_api.md +266 -0
- package/bin/skills/uspto-database/references/peds_api.md +212 -0
- package/bin/skills/uspto-database/references/trademark_api.md +358 -0
- package/bin/skills/uspto-database/scripts/patent_search.py +290 -0
- package/bin/skills/uspto-database/scripts/peds_client.py +285 -0
- package/bin/skills/uspto-database/scripts/trademark_client.py +311 -0
- package/bin/skills/vaex/SKILL.md +182 -0
- package/bin/skills/vaex/references/core_dataframes.md +367 -0
- package/bin/skills/vaex/references/data_processing.md +555 -0
- package/bin/skills/vaex/references/io_operations.md +703 -0
- package/bin/skills/vaex/references/machine_learning.md +728 -0
- package/bin/skills/vaex/references/performance.md +571 -0
- package/bin/skills/vaex/references/visualization.md +613 -0
- package/bin/skills/zarr-python/SKILL.md +779 -0
- package/bin/skills/zarr-python/references/api_reference.md +515 -0
- package/bin/skills/zinc-database/SKILL.md +404 -0
- package/bin/skills/zinc-database/references/api_reference.md +692 -0
- package/bin/synsc +0 -0
- package/package.json +1 -1
|
@@ -0,0 +1,814 @@
|
|
|
1
|
+
# gget Workflow Examples
|
|
2
|
+
|
|
3
|
+
Extended workflow examples demonstrating how to combine multiple gget modules for common bioinformatics tasks.
|
|
4
|
+
|
|
5
|
+
## Table of Contents
|
|
6
|
+
1. [Complete Gene Analysis Pipeline](#complete-gene-analysis-pipeline)
|
|
7
|
+
2. [Comparative Structural Biology](#comparative-structural-biology)
|
|
8
|
+
3. [Cancer Genomics Analysis](#cancer-genomics-analysis)
|
|
9
|
+
4. [Single-Cell Expression Analysis](#single-cell-expression-analysis)
|
|
10
|
+
5. [Building Reference Transcriptomes](#building-reference-transcriptomes)
|
|
11
|
+
6. [Mutation Impact Assessment](#mutation-impact-assessment)
|
|
12
|
+
7. [Drug Target Discovery](#drug-target-discovery)
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Complete Gene Analysis Pipeline
|
|
17
|
+
|
|
18
|
+
Comprehensive analysis of a gene from discovery to functional annotation.
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
import gget
|
|
22
|
+
import pandas as pd
|
|
23
|
+
|
|
24
|
+
# Step 1: Search for genes of interest
|
|
25
|
+
print("Step 1: Searching for GABA receptor genes...")
|
|
26
|
+
search_results = gget.search(["GABA", "receptor", "alpha"],
|
|
27
|
+
species="homo_sapiens",
|
|
28
|
+
andor="and")
|
|
29
|
+
print(f"Found {len(search_results)} genes")
|
|
30
|
+
|
|
31
|
+
# Step 2: Get detailed information
|
|
32
|
+
print("\nStep 2: Getting detailed information...")
|
|
33
|
+
gene_ids = search_results["ensembl_id"].tolist()[:5] # Top 5 genes
|
|
34
|
+
gene_info = gget.info(gene_ids, pdb=True)
|
|
35
|
+
print(gene_info[["ensembl_id", "gene_name", "uniprot_id", "description"]])
|
|
36
|
+
|
|
37
|
+
# Step 3: Retrieve sequences
|
|
38
|
+
print("\nStep 3: Retrieving sequences...")
|
|
39
|
+
nucleotide_seqs = gget.seq(gene_ids)
|
|
40
|
+
protein_seqs = gget.seq(gene_ids, translate=True)
|
|
41
|
+
|
|
42
|
+
# Save sequences
|
|
43
|
+
with open("gaba_receptors_nt.fasta", "w") as f:
|
|
44
|
+
f.write(nucleotide_seqs)
|
|
45
|
+
with open("gaba_receptors_aa.fasta", "w") as f:
|
|
46
|
+
f.write(protein_seqs)
|
|
47
|
+
|
|
48
|
+
# Step 4: Get expression data
|
|
49
|
+
print("\nStep 4: Getting tissue expression...")
|
|
50
|
+
for gene_id, gene_name in zip(gene_ids, gene_info["gene_name"]):
|
|
51
|
+
expr_data = gget.archs4(gene_name, which="tissue")
|
|
52
|
+
print(f"\n{gene_name} expression:")
|
|
53
|
+
print(expr_data.head())
|
|
54
|
+
|
|
55
|
+
# Step 5: Find correlated genes
|
|
56
|
+
print("\nStep 5: Finding correlated genes...")
|
|
57
|
+
correlated = gget.archs4(gene_info["gene_name"].iloc[0], which="correlation")
|
|
58
|
+
correlated_top = correlated.head(20)
|
|
59
|
+
print(correlated_top)
|
|
60
|
+
|
|
61
|
+
# Step 6: Enrichment analysis on correlated genes
|
|
62
|
+
print("\nStep 6: Performing enrichment analysis...")
|
|
63
|
+
gene_list = correlated_top["gene_symbol"].tolist()
|
|
64
|
+
enrichment = gget.enrichr(gene_list, database="ontology", plot=True)
|
|
65
|
+
print(enrichment.head(10))
|
|
66
|
+
|
|
67
|
+
# Step 7: Get disease associations
|
|
68
|
+
print("\nStep 7: Getting disease associations...")
|
|
69
|
+
for gene_id, gene_name in zip(gene_ids[:3], gene_info["gene_name"][:3]):
|
|
70
|
+
diseases = gget.opentargets(gene_id, resource="diseases", limit=5)
|
|
71
|
+
print(f"\n{gene_name} disease associations:")
|
|
72
|
+
print(diseases)
|
|
73
|
+
|
|
74
|
+
# Step 8: Check for orthologs
|
|
75
|
+
print("\nStep 8: Finding orthologs...")
|
|
76
|
+
orthologs = gget.bgee(gene_ids[0], type="orthologs")
|
|
77
|
+
print(orthologs)
|
|
78
|
+
|
|
79
|
+
print("\nComplete gene analysis pipeline finished!")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Comparative Structural Biology
|
|
85
|
+
|
|
86
|
+
Compare protein structures across species and analyze functional motifs.
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
import gget
|
|
90
|
+
|
|
91
|
+
# Define genes for comparison
|
|
92
|
+
human_gene = "ENSG00000169174" # PCSK9
|
|
93
|
+
mouse_gene = "ENSMUSG00000044254" # Pcsk9
|
|
94
|
+
|
|
95
|
+
print("Comparative Structural Biology Workflow")
|
|
96
|
+
print("=" * 50)
|
|
97
|
+
|
|
98
|
+
# Step 1: Get gene information
|
|
99
|
+
print("\n1. Getting gene information...")
|
|
100
|
+
human_info = gget.info([human_gene])
|
|
101
|
+
mouse_info = gget.info([mouse_gene])
|
|
102
|
+
|
|
103
|
+
print(f"Human: {human_info['gene_name'].iloc[0]}")
|
|
104
|
+
print(f"Mouse: {mouse_info['gene_name'].iloc[0]}")
|
|
105
|
+
|
|
106
|
+
# Step 2: Retrieve protein sequences
|
|
107
|
+
print("\n2. Retrieving protein sequences...")
|
|
108
|
+
human_seq = gget.seq(human_gene, translate=True)
|
|
109
|
+
mouse_seq = gget.seq(mouse_gene, translate=True)
|
|
110
|
+
|
|
111
|
+
# Save to file for alignment
|
|
112
|
+
with open("pcsk9_sequences.fasta", "w") as f:
|
|
113
|
+
f.write(human_seq)
|
|
114
|
+
f.write("\n")
|
|
115
|
+
f.write(mouse_seq)
|
|
116
|
+
|
|
117
|
+
# Step 3: Align sequences
|
|
118
|
+
print("\n3. Aligning sequences...")
|
|
119
|
+
alignment = gget.muscle("pcsk9_sequences.fasta")
|
|
120
|
+
print("Alignment completed. Visualizing in ClustalW format:")
|
|
121
|
+
print(alignment)
|
|
122
|
+
|
|
123
|
+
# Step 4: Get existing structures from PDB
|
|
124
|
+
print("\n4. Searching PDB for existing structures...")
|
|
125
|
+
# Search by sequence using BLAST
|
|
126
|
+
pdb_results = gget.blast(human_seq, database="pdbaa", limit=5)
|
|
127
|
+
print("Top PDB matches:")
|
|
128
|
+
print(pdb_results[["Description", "Max Score", "Query Coverage"]])
|
|
129
|
+
|
|
130
|
+
# Download top structure
|
|
131
|
+
if len(pdb_results) > 0:
|
|
132
|
+
# Extract PDB ID from description (usually format: "PDB|XXXX|...")
|
|
133
|
+
pdb_id = pdb_results.iloc[0]["Description"].split("|")[1]
|
|
134
|
+
print(f"\nDownloading PDB structure: {pdb_id}")
|
|
135
|
+
gget.pdb(pdb_id, save=True)
|
|
136
|
+
|
|
137
|
+
# Step 5: Predict AlphaFold structures
|
|
138
|
+
print("\n5. Predicting structures with AlphaFold...")
|
|
139
|
+
# Note: This requires gget setup alphafold and is computationally intensive
|
|
140
|
+
# Uncomment to run:
|
|
141
|
+
# human_structure = gget.alphafold(human_seq, plot=True)
|
|
142
|
+
# mouse_structure = gget.alphafold(mouse_seq, plot=True)
|
|
143
|
+
print("(AlphaFold prediction skipped - uncomment to run)")
|
|
144
|
+
|
|
145
|
+
# Step 6: Identify functional motifs
|
|
146
|
+
print("\n6. Identifying functional motifs with ELM...")
|
|
147
|
+
# Note: Requires gget setup elm
|
|
148
|
+
# Uncomment to run:
|
|
149
|
+
# human_ortholog_df, human_regex_df = gget.elm(human_seq)
|
|
150
|
+
# print("Human PCSK9 functional motifs:")
|
|
151
|
+
# print(human_regex_df)
|
|
152
|
+
print("(ELM analysis skipped - uncomment to run)")
|
|
153
|
+
|
|
154
|
+
# Step 7: Get orthology information
|
|
155
|
+
print("\n7. Getting orthology information from Bgee...")
|
|
156
|
+
orthologs = gget.bgee(human_gene, type="orthologs")
|
|
157
|
+
print("PCSK9 orthologs:")
|
|
158
|
+
print(orthologs)
|
|
159
|
+
|
|
160
|
+
print("\nComparative structural biology workflow completed!")
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## Cancer Genomics Analysis
|
|
166
|
+
|
|
167
|
+
Analyze cancer-associated genes and their mutations.
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
import gget
|
|
171
|
+
import matplotlib.pyplot as plt
|
|
172
|
+
|
|
173
|
+
print("Cancer Genomics Analysis Workflow")
|
|
174
|
+
print("=" * 50)
|
|
175
|
+
|
|
176
|
+
# Step 1: Search for cancer-related genes
|
|
177
|
+
print("\n1. Searching for breast cancer genes...")
|
|
178
|
+
genes = gget.search(["breast", "cancer", "BRCA"],
|
|
179
|
+
species="homo_sapiens",
|
|
180
|
+
andor="or",
|
|
181
|
+
limit=20)
|
|
182
|
+
print(f"Found {len(genes)} genes")
|
|
183
|
+
|
|
184
|
+
# Focus on specific genes
|
|
185
|
+
target_genes = ["BRCA1", "BRCA2", "TP53", "PIK3CA", "ESR1"]
|
|
186
|
+
print(f"\nAnalyzing: {', '.join(target_genes)}")
|
|
187
|
+
|
|
188
|
+
# Step 2: Get gene information
|
|
189
|
+
print("\n2. Getting gene information...")
|
|
190
|
+
gene_search = []
|
|
191
|
+
for gene in target_genes:
|
|
192
|
+
result = gget.search([gene], species="homo_sapiens", limit=1)
|
|
193
|
+
if len(result) > 0:
|
|
194
|
+
gene_search.append(result.iloc[0])
|
|
195
|
+
|
|
196
|
+
gene_df = pd.DataFrame(gene_search)
|
|
197
|
+
gene_ids = gene_df["ensembl_id"].tolist()
|
|
198
|
+
|
|
199
|
+
# Step 3: Get disease associations
|
|
200
|
+
print("\n3. Getting disease associations from OpenTargets...")
|
|
201
|
+
for gene_id, gene_name in zip(gene_ids, target_genes):
|
|
202
|
+
print(f"\n{gene_name} disease associations:")
|
|
203
|
+
diseases = gget.opentargets(gene_id, resource="diseases", limit=3)
|
|
204
|
+
print(diseases[["disease_name", "overall_score"]])
|
|
205
|
+
|
|
206
|
+
# Step 4: Get drug associations
|
|
207
|
+
print("\n4. Getting drug associations...")
|
|
208
|
+
for gene_id, gene_name in zip(gene_ids[:3], target_genes[:3]):
|
|
209
|
+
print(f"\n{gene_name} drug associations:")
|
|
210
|
+
drugs = gget.opentargets(gene_id, resource="drugs", limit=3)
|
|
211
|
+
if len(drugs) > 0:
|
|
212
|
+
print(drugs[["drug_name", "drug_type", "max_phase_for_all_diseases"]])
|
|
213
|
+
|
|
214
|
+
# Step 5: Search cBioPortal for studies
|
|
215
|
+
print("\n5. Searching cBioPortal for breast cancer studies...")
|
|
216
|
+
studies = gget.cbio_search(["breast", "cancer"])
|
|
217
|
+
print(f"Found {len(studies)} studies")
|
|
218
|
+
print(studies[:5])
|
|
219
|
+
|
|
220
|
+
# Step 6: Create cancer genomics heatmap
|
|
221
|
+
print("\n6. Creating cancer genomics heatmap...")
|
|
222
|
+
if len(studies) > 0:
|
|
223
|
+
# Select relevant studies
|
|
224
|
+
selected_studies = studies[:2] # Top 2 studies
|
|
225
|
+
|
|
226
|
+
gget.cbio_plot(
|
|
227
|
+
selected_studies,
|
|
228
|
+
target_genes,
|
|
229
|
+
stratification="cancer_type",
|
|
230
|
+
variation_type="mutation_occurrences",
|
|
231
|
+
show=False
|
|
232
|
+
)
|
|
233
|
+
print("Heatmap saved to ./gget_cbio_figures/")
|
|
234
|
+
|
|
235
|
+
# Step 7: Query COSMIC database (requires setup)
|
|
236
|
+
print("\n7. Querying COSMIC database...")
|
|
237
|
+
# Note: Requires COSMIC account and database download
|
|
238
|
+
# Uncomment to run:
|
|
239
|
+
# for gene in target_genes[:2]:
|
|
240
|
+
# cosmic_results = gget.cosmic(
|
|
241
|
+
# gene,
|
|
242
|
+
# cosmic_tsv_path="cosmic_cancer.tsv",
|
|
243
|
+
# limit=10
|
|
244
|
+
# )
|
|
245
|
+
# print(f"\n{gene} mutations in COSMIC:")
|
|
246
|
+
# print(cosmic_results)
|
|
247
|
+
print("(COSMIC query skipped - requires database download)")
|
|
248
|
+
|
|
249
|
+
# Step 8: Enrichment analysis
|
|
250
|
+
print("\n8. Performing pathway enrichment...")
|
|
251
|
+
enrichment = gget.enrichr(target_genes, database="pathway", plot=True)
|
|
252
|
+
print("\nTop enriched pathways:")
|
|
253
|
+
print(enrichment.head(10))
|
|
254
|
+
|
|
255
|
+
print("\nCancer genomics analysis completed!")
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
---
|
|
259
|
+
|
|
260
|
+
## Single-Cell Expression Analysis
|
|
261
|
+
|
|
262
|
+
Analyze single-cell RNA-seq data for specific cell types and tissues.
|
|
263
|
+
|
|
264
|
+
```python
|
|
265
|
+
import gget
|
|
266
|
+
import scanpy as sc
|
|
267
|
+
|
|
268
|
+
print("Single-Cell Expression Analysis Workflow")
|
|
269
|
+
print("=" * 50)
|
|
270
|
+
|
|
271
|
+
# Note: Requires gget setup cellxgene
|
|
272
|
+
|
|
273
|
+
# Step 1: Define genes and cell types of interest
|
|
274
|
+
genes_of_interest = ["ACE2", "TMPRSS2", "CD4", "CD8A"]
|
|
275
|
+
tissue = "lung"
|
|
276
|
+
cell_types = ["type ii pneumocyte", "macrophage", "t cell"]
|
|
277
|
+
|
|
278
|
+
print(f"\nAnalyzing genes: {', '.join(genes_of_interest)}")
|
|
279
|
+
print(f"Tissue: {tissue}")
|
|
280
|
+
print(f"Cell types: {', '.join(cell_types)}")
|
|
281
|
+
|
|
282
|
+
# Step 2: Get metadata first
|
|
283
|
+
print("\n1. Retrieving metadata...")
|
|
284
|
+
metadata = gget.cellxgene(
|
|
285
|
+
gene=genes_of_interest,
|
|
286
|
+
tissue=tissue,
|
|
287
|
+
species="homo_sapiens",
|
|
288
|
+
meta_only=True
|
|
289
|
+
)
|
|
290
|
+
print(f"Found {len(metadata)} datasets")
|
|
291
|
+
print(metadata.head())
|
|
292
|
+
|
|
293
|
+
# Step 3: Download count matrices
|
|
294
|
+
print("\n2. Downloading single-cell data...")
|
|
295
|
+
# Note: This can be a large download
|
|
296
|
+
adata = gget.cellxgene(
|
|
297
|
+
gene=genes_of_interest,
|
|
298
|
+
tissue=tissue,
|
|
299
|
+
species="homo_sapiens",
|
|
300
|
+
census_version="stable"
|
|
301
|
+
)
|
|
302
|
+
print(f"AnnData shape: {adata.shape}")
|
|
303
|
+
print(f"Genes: {adata.n_vars}")
|
|
304
|
+
print(f"Cells: {adata.n_obs}")
|
|
305
|
+
|
|
306
|
+
# Step 4: Basic QC and filtering with scanpy
|
|
307
|
+
print("\n3. Performing quality control...")
|
|
308
|
+
sc.pp.filter_cells(adata, min_genes=200)
|
|
309
|
+
sc.pp.filter_genes(adata, min_cells=3)
|
|
310
|
+
print(f"After QC - Cells: {adata.n_obs}, Genes: {adata.n_vars}")
|
|
311
|
+
|
|
312
|
+
# Step 5: Normalize and log-transform
|
|
313
|
+
print("\n4. Normalizing data...")
|
|
314
|
+
sc.pp.normalize_total(adata, target_sum=1e4)
|
|
315
|
+
sc.pp.log1p(adata)
|
|
316
|
+
|
|
317
|
+
# Step 6: Calculate gene expression statistics
|
|
318
|
+
print("\n5. Calculating expression statistics...")
|
|
319
|
+
for gene in genes_of_interest:
|
|
320
|
+
if gene in adata.var_names:
|
|
321
|
+
expr = adata[:, gene].X.toarray().flatten()
|
|
322
|
+
print(f"\n{gene} expression:")
|
|
323
|
+
print(f" Mean: {expr.mean():.3f}")
|
|
324
|
+
print(f" Median: {np.median(expr):.3f}")
|
|
325
|
+
print(f" % expressing: {(expr > 0).sum() / len(expr) * 100:.1f}%")
|
|
326
|
+
|
|
327
|
+
# Step 7: Get tissue expression from ARCHS4 for comparison
|
|
328
|
+
print("\n6. Getting bulk tissue expression from ARCHS4...")
|
|
329
|
+
for gene in genes_of_interest:
|
|
330
|
+
tissue_expr = gget.archs4(gene, which="tissue")
|
|
331
|
+
lung_expr = tissue_expr[tissue_expr["tissue"] == "lung"]
|
|
332
|
+
if len(lung_expr) > 0:
|
|
333
|
+
print(f"\n{gene} in lung (ARCHS4):")
|
|
334
|
+
print(f" Median: {lung_expr['median'].iloc[0]:.3f}")
|
|
335
|
+
|
|
336
|
+
# Step 8: Enrichment analysis
|
|
337
|
+
print("\n7. Performing enrichment analysis...")
|
|
338
|
+
enrichment = gget.enrichr(genes_of_interest, database="celltypes", plot=True)
|
|
339
|
+
print("\nTop cell type associations:")
|
|
340
|
+
print(enrichment.head(10))
|
|
341
|
+
|
|
342
|
+
# Step 9: Get disease associations
|
|
343
|
+
print("\n8. Getting disease associations...")
|
|
344
|
+
for gene in genes_of_interest:
|
|
345
|
+
gene_search = gget.search([gene], species="homo_sapiens", limit=1)
|
|
346
|
+
if len(gene_search) > 0:
|
|
347
|
+
gene_id = gene_search["ensembl_id"].iloc[0]
|
|
348
|
+
diseases = gget.opentargets(gene_id, resource="diseases", limit=3)
|
|
349
|
+
print(f"\n{gene} disease associations:")
|
|
350
|
+
print(diseases[["disease_name", "overall_score"]])
|
|
351
|
+
|
|
352
|
+
print("\nSingle-cell expression analysis completed!")
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
---
|
|
356
|
+
|
|
357
|
+
## Building Reference Transcriptomes
|
|
358
|
+
|
|
359
|
+
Prepare reference data for RNA-seq analysis pipelines.
|
|
360
|
+
|
|
361
|
+
```bash
|
|
362
|
+
#!/bin/bash
|
|
363
|
+
# Reference transcriptome building workflow
|
|
364
|
+
|
|
365
|
+
echo "Reference Transcriptome Building Workflow"
|
|
366
|
+
echo "=========================================="
|
|
367
|
+
|
|
368
|
+
# Step 1: List available species
|
|
369
|
+
echo -e "\n1. Listing available species..."
|
|
370
|
+
gget ref --list_species > available_species.txt
|
|
371
|
+
echo "Available species saved to available_species.txt"
|
|
372
|
+
|
|
373
|
+
# Step 2: Download reference files for human
|
|
374
|
+
echo -e "\n2. Downloading human reference files..."
|
|
375
|
+
SPECIES="homo_sapiens"
|
|
376
|
+
RELEASE=110 # Specify release for reproducibility
|
|
377
|
+
|
|
378
|
+
# Download GTF annotation
|
|
379
|
+
echo "Downloading GTF annotation..."
|
|
380
|
+
gget ref -w gtf -r $RELEASE -d $SPECIES -o human_ref_gtf.json
|
|
381
|
+
|
|
382
|
+
# Download cDNA sequences
|
|
383
|
+
echo "Downloading cDNA sequences..."
|
|
384
|
+
gget ref -w cdna -r $RELEASE -d $SPECIES -o human_ref_cdna.json
|
|
385
|
+
|
|
386
|
+
# Download protein sequences
|
|
387
|
+
echo "Downloading protein sequences..."
|
|
388
|
+
gget ref -w pep -r $RELEASE -d $SPECIES -o human_ref_pep.json
|
|
389
|
+
|
|
390
|
+
# Step 3: Build kallisto index (if kallisto is installed)
|
|
391
|
+
echo -e "\n3. Building kallisto index..."
|
|
392
|
+
if command -v kallisto &> /dev/null; then
|
|
393
|
+
# Get cDNA FASTA file from download
|
|
394
|
+
CDNA_FILE=$(ls *.cdna.all.fa.gz)
|
|
395
|
+
if [ -f "$CDNA_FILE" ]; then
|
|
396
|
+
kallisto index -i transcriptome.idx $CDNA_FILE
|
|
397
|
+
echo "Kallisto index created: transcriptome.idx"
|
|
398
|
+
else
|
|
399
|
+
echo "cDNA FASTA file not found"
|
|
400
|
+
fi
|
|
401
|
+
else
|
|
402
|
+
echo "kallisto not installed, skipping index building"
|
|
403
|
+
fi
|
|
404
|
+
|
|
405
|
+
# Step 4: Download genome for alignment-based methods
|
|
406
|
+
echo -e "\n4. Downloading genome sequence..."
|
|
407
|
+
gget ref -w dna -r $RELEASE -d $SPECIES -o human_ref_dna.json
|
|
408
|
+
|
|
409
|
+
# Step 5: Get gene information for genes of interest
|
|
410
|
+
echo -e "\n5. Getting information for specific genes..."
|
|
411
|
+
gget search -s $SPECIES "TP53 BRCA1 BRCA2" -o key_genes.csv
|
|
412
|
+
|
|
413
|
+
echo -e "\nReference transcriptome building completed!"
|
|
414
|
+
```
|
|
415
|
+
|
|
416
|
+
```python
|
|
417
|
+
# Python version
|
|
418
|
+
import gget
|
|
419
|
+
import json
|
|
420
|
+
|
|
421
|
+
print("Reference Transcriptome Building Workflow")
|
|
422
|
+
print("=" * 50)
|
|
423
|
+
|
|
424
|
+
# Configuration
|
|
425
|
+
species = "homo_sapiens"
|
|
426
|
+
release = 110
|
|
427
|
+
genes_of_interest = ["TP53", "BRCA1", "BRCA2", "MYC", "EGFR"]
|
|
428
|
+
|
|
429
|
+
# Step 1: Get reference information
|
|
430
|
+
print("\n1. Getting reference information...")
|
|
431
|
+
ref_info = gget.ref(species, release=release)
|
|
432
|
+
|
|
433
|
+
# Save reference information
|
|
434
|
+
with open("reference_info.json", "w") as f:
|
|
435
|
+
json.dump(ref_info, f, indent=2)
|
|
436
|
+
print("Reference information saved to reference_info.json")
|
|
437
|
+
|
|
438
|
+
# Step 2: Download specific files
|
|
439
|
+
print("\n2. Downloading reference files...")
|
|
440
|
+
# GTF annotation
|
|
441
|
+
gget.ref(species, which="gtf", release=release, download=True)
|
|
442
|
+
# cDNA sequences
|
|
443
|
+
gget.ref(species, which="cdna", release=release, download=True)
|
|
444
|
+
|
|
445
|
+
# Step 3: Get information for genes of interest
|
|
446
|
+
print(f"\n3. Getting information for {len(genes_of_interest)} genes...")
|
|
447
|
+
gene_data = []
|
|
448
|
+
for gene in genes_of_interest:
|
|
449
|
+
result = gget.search([gene], species=species, limit=1)
|
|
450
|
+
if len(result) > 0:
|
|
451
|
+
gene_data.append(result.iloc[0])
|
|
452
|
+
|
|
453
|
+
# Get detailed info
|
|
454
|
+
if gene_data:
|
|
455
|
+
gene_ids = [g["ensembl_id"] for g in gene_data]
|
|
456
|
+
detailed_info = gget.info(gene_ids)
|
|
457
|
+
detailed_info.to_csv("genes_of_interest_info.csv", index=False)
|
|
458
|
+
print("Gene information saved to genes_of_interest_info.csv")
|
|
459
|
+
|
|
460
|
+
# Step 4: Get sequences
|
|
461
|
+
print("\n4. Retrieving sequences...")
|
|
462
|
+
sequences_nt = gget.seq(gene_ids)
|
|
463
|
+
sequences_aa = gget.seq(gene_ids, translate=True)
|
|
464
|
+
|
|
465
|
+
with open("key_genes_nucleotide.fasta", "w") as f:
|
|
466
|
+
f.write(sequences_nt)
|
|
467
|
+
with open("key_genes_protein.fasta", "w") as f:
|
|
468
|
+
f.write(sequences_aa)
|
|
469
|
+
|
|
470
|
+
print("\nReference transcriptome building completed!")
|
|
471
|
+
print(f"Files created:")
|
|
472
|
+
print(" - reference_info.json")
|
|
473
|
+
print(" - genes_of_interest_info.csv")
|
|
474
|
+
print(" - key_genes_nucleotide.fasta")
|
|
475
|
+
print(" - key_genes_protein.fasta")
|
|
476
|
+
```
|
|
477
|
+
|
|
478
|
+
---
|
|
479
|
+
|
|
480
|
+
## Mutation Impact Assessment
|
|
481
|
+
|
|
482
|
+
Analyze the impact of genetic mutations on protein structure and function.
|
|
483
|
+
|
|
484
|
+
```python
|
|
485
|
+
import gget
|
|
486
|
+
import pandas as pd
|
|
487
|
+
|
|
488
|
+
print("Mutation Impact Assessment Workflow")
|
|
489
|
+
print("=" * 50)
|
|
490
|
+
|
|
491
|
+
# Define mutations to analyze
|
|
492
|
+
mutations = [
|
|
493
|
+
{"gene": "TP53", "mutation": "c.818G>A", "description": "R273H hotspot"},
|
|
494
|
+
{"gene": "EGFR", "mutation": "c.2573T>G", "description": "L858R activating"},
|
|
495
|
+
]
|
|
496
|
+
|
|
497
|
+
# Step 1: Get gene information
|
|
498
|
+
print("\n1. Getting gene information...")
|
|
499
|
+
for mut in mutations:
|
|
500
|
+
results = gget.search([mut["gene"]], species="homo_sapiens", limit=1)
|
|
501
|
+
if len(results) > 0:
|
|
502
|
+
mut["ensembl_id"] = results["ensembl_id"].iloc[0]
|
|
503
|
+
print(f"{mut['gene']}: {mut['ensembl_id']}")
|
|
504
|
+
|
|
505
|
+
# Step 2: Get sequences
|
|
506
|
+
print("\n2. Retrieving wild-type sequences...")
|
|
507
|
+
for mut in mutations:
|
|
508
|
+
# Get nucleotide sequence
|
|
509
|
+
nt_seq = gget.seq(mut["ensembl_id"])
|
|
510
|
+
mut["wt_sequence"] = nt_seq
|
|
511
|
+
|
|
512
|
+
# Get protein sequence
|
|
513
|
+
aa_seq = gget.seq(mut["ensembl_id"], translate=True)
|
|
514
|
+
mut["wt_protein"] = aa_seq
|
|
515
|
+
|
|
516
|
+
# Step 3: Generate mutated sequences
|
|
517
|
+
print("\n3. Generating mutated sequences...")
|
|
518
|
+
# Create mutation dataframe for gget mutate
|
|
519
|
+
mut_df = pd.DataFrame({
|
|
520
|
+
"seq_ID": [m["gene"] for m in mutations],
|
|
521
|
+
"mutation": [m["mutation"] for m in mutations]
|
|
522
|
+
})
|
|
523
|
+
|
|
524
|
+
# For each mutation
|
|
525
|
+
for mut in mutations:
|
|
526
|
+
# Extract sequence from FASTA
|
|
527
|
+
lines = mut["wt_sequence"].split("\n")
|
|
528
|
+
seq = "".join(lines[1:])
|
|
529
|
+
|
|
530
|
+
# Create single mutation df
|
|
531
|
+
single_mut = pd.DataFrame({
|
|
532
|
+
"seq_ID": [mut["gene"]],
|
|
533
|
+
"mutation": [mut["mutation"]]
|
|
534
|
+
})
|
|
535
|
+
|
|
536
|
+
# Generate mutated sequence
|
|
537
|
+
mutated = gget.mutate([seq], mutations=single_mut)
|
|
538
|
+
mut["mutated_sequence"] = mutated
|
|
539
|
+
|
|
540
|
+
print("Mutated sequences generated")
|
|
541
|
+
|
|
542
|
+
# Step 4: Get existing structure information
|
|
543
|
+
print("\n4. Getting structure information...")
|
|
544
|
+
for mut in mutations:
|
|
545
|
+
# Get info with PDB IDs
|
|
546
|
+
info = gget.info([mut["ensembl_id"]], pdb=True)
|
|
547
|
+
|
|
548
|
+
if "pdb_id" in info.columns and pd.notna(info["pdb_id"].iloc[0]):
|
|
549
|
+
pdb_ids = info["pdb_id"].iloc[0].split(";")
|
|
550
|
+
print(f"\n{mut['gene']} PDB structures: {', '.join(pdb_ids[:3])}")
|
|
551
|
+
|
|
552
|
+
# Download first structure
|
|
553
|
+
if len(pdb_ids) > 0:
|
|
554
|
+
pdb_id = pdb_ids[0].strip()
|
|
555
|
+
mut["pdb_id"] = pdb_id
|
|
556
|
+
gget.pdb(pdb_id, save=True)
|
|
557
|
+
else:
|
|
558
|
+
print(f"\n{mut['gene']}: No PDB structure available")
|
|
559
|
+
mut["pdb_id"] = None
|
|
560
|
+
|
|
561
|
+
# Step 5: Predict structures with AlphaFold (optional)
|
|
562
|
+
print("\n5. Predicting structures with AlphaFold...")
|
|
563
|
+
# Note: Requires gget setup alphafold and is computationally intensive
|
|
564
|
+
# Uncomment to run:
|
|
565
|
+
# for mut in mutations:
|
|
566
|
+
# print(f"Predicting {mut['gene']} wild-type structure...")
|
|
567
|
+
# wt_structure = gget.alphafold(mut["wt_protein"])
|
|
568
|
+
#
|
|
569
|
+
# print(f"Predicting {mut['gene']} mutant structure...")
|
|
570
|
+
# # Would need to translate mutated sequence first
|
|
571
|
+
# # mutant_structure = gget.alphafold(mutated_protein)
|
|
572
|
+
print("(AlphaFold prediction skipped - uncomment to run)")
|
|
573
|
+
|
|
574
|
+
# Step 6: Find functional motifs
|
|
575
|
+
print("\n6. Identifying functional motifs...")
|
|
576
|
+
# Note: Requires gget setup elm
|
|
577
|
+
# Uncomment to run:
|
|
578
|
+
# for mut in mutations:
|
|
579
|
+
# ortholog_df, regex_df = gget.elm(mut["wt_protein"])
|
|
580
|
+
# print(f"\n{mut['gene']} functional motifs:")
|
|
581
|
+
# print(regex_df)
|
|
582
|
+
print("(ELM analysis skipped - uncomment to run)")
|
|
583
|
+
|
|
584
|
+
# Step 7: Get disease associations
|
|
585
|
+
print("\n7. Getting disease associations...")
|
|
586
|
+
for mut in mutations:
|
|
587
|
+
diseases = gget.opentargets(
|
|
588
|
+
mut["ensembl_id"],
|
|
589
|
+
resource="diseases",
|
|
590
|
+
limit=5
|
|
591
|
+
)
|
|
592
|
+
print(f"\n{mut['gene']} ({mut['description']}) disease associations:")
|
|
593
|
+
print(diseases[["disease_name", "overall_score"]])
|
|
594
|
+
|
|
595
|
+
# Step 8: Query COSMIC for mutation frequency
|
|
596
|
+
print("\n8. Querying COSMIC database...")
|
|
597
|
+
# Note: Requires COSMIC database download
|
|
598
|
+
# Uncomment to run:
|
|
599
|
+
# for mut in mutations:
|
|
600
|
+
# cosmic_results = gget.cosmic(
|
|
601
|
+
# mut["mutation"],
|
|
602
|
+
# cosmic_tsv_path="cosmic_cancer.tsv",
|
|
603
|
+
# limit=10
|
|
604
|
+
# )
|
|
605
|
+
# print(f"\n{mut['gene']} {mut['mutation']} in COSMIC:")
|
|
606
|
+
# print(cosmic_results)
|
|
607
|
+
print("(COSMIC query skipped - requires database download)")
|
|
608
|
+
|
|
609
|
+
print("\nMutation impact assessment completed!")
|
|
610
|
+
```
|
|
611
|
+
|
|
612
|
+
---
|
|
613
|
+
|
|
614
|
+
## Drug Target Discovery
|
|
615
|
+
|
|
616
|
+
Identify and validate potential drug targets for specific diseases.
|
|
617
|
+
|
|
618
|
+
```python
|
|
619
|
+
import gget
|
|
620
|
+
import pandas as pd
|
|
621
|
+
|
|
622
|
+
print("Drug Target Discovery Workflow")
|
|
623
|
+
print("=" * 50)
|
|
624
|
+
|
|
625
|
+
# Step 1: Search for disease-related genes
|
|
626
|
+
disease = "alzheimer"
|
|
627
|
+
print(f"\n1. Searching for {disease} disease genes...")
|
|
628
|
+
genes = gget.search([disease], species="homo_sapiens", limit=50)
|
|
629
|
+
print(f"Found {len(genes)} potential genes")
|
|
630
|
+
|
|
631
|
+
# Step 2: Get detailed information
|
|
632
|
+
print("\n2. Getting detailed gene information...")
|
|
633
|
+
gene_ids = genes["ensembl_id"].tolist()[:20] # Top 20
|
|
634
|
+
gene_info = gget.info(gene_ids[:10]) # Limit to avoid timeout
|
|
635
|
+
|
|
636
|
+
# Step 3: Get disease associations from OpenTargets
|
|
637
|
+
print("\n3. Getting disease associations...")
|
|
638
|
+
disease_scores = []
|
|
639
|
+
for gene_id, gene_name in zip(gene_info["ensembl_id"], gene_info["gene_name"]):
|
|
640
|
+
diseases = gget.opentargets(gene_id, resource="diseases", limit=10)
|
|
641
|
+
|
|
642
|
+
# Filter for Alzheimer's disease
|
|
643
|
+
alzheimer = diseases[diseases["disease_name"].str.contains("Alzheimer", case=False, na=False)]
|
|
644
|
+
|
|
645
|
+
if len(alzheimer) > 0:
|
|
646
|
+
disease_scores.append({
|
|
647
|
+
"ensembl_id": gene_id,
|
|
648
|
+
"gene_name": gene_name,
|
|
649
|
+
"disease_score": alzheimer["overall_score"].max()
|
|
650
|
+
})
|
|
651
|
+
|
|
652
|
+
disease_df = pd.DataFrame(disease_scores).sort_values("disease_score", ascending=False)
|
|
653
|
+
print("\nTop disease-associated genes:")
|
|
654
|
+
print(disease_df.head(10))
|
|
655
|
+
|
|
656
|
+
# Step 4: Get tractability information
|
|
657
|
+
print("\n4. Assessing target tractability...")
|
|
658
|
+
top_targets = disease_df.head(5)
|
|
659
|
+
for _, row in top_targets.iterrows():
|
|
660
|
+
tractability = gget.opentargets(
|
|
661
|
+
row["ensembl_id"],
|
|
662
|
+
resource="tractability"
|
|
663
|
+
)
|
|
664
|
+
print(f"\n{row['gene_name']} tractability:")
|
|
665
|
+
print(tractability)
|
|
666
|
+
|
|
667
|
+
# Step 5: Get expression data
|
|
668
|
+
print("\n5. Getting tissue expression data...")
|
|
669
|
+
for _, row in top_targets.iterrows():
|
|
670
|
+
# Brain expression from OpenTargets
|
|
671
|
+
expression = gget.opentargets(
|
|
672
|
+
row["ensembl_id"],
|
|
673
|
+
resource="expression",
|
|
674
|
+
filter_tissue="brain"
|
|
675
|
+
)
|
|
676
|
+
print(f"\n{row['gene_name']} brain expression:")
|
|
677
|
+
print(expression)
|
|
678
|
+
|
|
679
|
+
# Tissue expression from ARCHS4
|
|
680
|
+
tissue_expr = gget.archs4(row["gene_name"], which="tissue")
|
|
681
|
+
brain_expr = tissue_expr[tissue_expr["tissue"].str.contains("brain", case=False, na=False)]
|
|
682
|
+
print(f"ARCHS4 brain expression:")
|
|
683
|
+
print(brain_expr)
|
|
684
|
+
|
|
685
|
+
# Step 6: Check for existing drugs
|
|
686
|
+
print("\n6. Checking for existing drugs...")
|
|
687
|
+
for _, row in top_targets.iterrows():
|
|
688
|
+
drugs = gget.opentargets(row["ensembl_id"], resource="drugs", limit=5)
|
|
689
|
+
print(f"\n{row['gene_name']} drug associations:")
|
|
690
|
+
if len(drugs) > 0:
|
|
691
|
+
print(drugs[["drug_name", "drug_type", "max_phase_for_all_diseases"]])
|
|
692
|
+
else:
|
|
693
|
+
print("No drugs found")
|
|
694
|
+
|
|
695
|
+
# Step 7: Get protein-protein interactions
|
|
696
|
+
print("\n7. Getting protein-protein interactions...")
|
|
697
|
+
for _, row in top_targets.iterrows():
|
|
698
|
+
interactions = gget.opentargets(
|
|
699
|
+
row["ensembl_id"],
|
|
700
|
+
resource="interactions",
|
|
701
|
+
limit=10
|
|
702
|
+
)
|
|
703
|
+
print(f"\n{row['gene_name']} interacts with:")
|
|
704
|
+
if len(interactions) > 0:
|
|
705
|
+
print(interactions[["gene_b_symbol", "interaction_score"]])
|
|
706
|
+
|
|
707
|
+
# Step 8: Enrichment analysis
|
|
708
|
+
print("\n8. Performing pathway enrichment...")
|
|
709
|
+
gene_list = top_targets["gene_name"].tolist()
|
|
710
|
+
enrichment = gget.enrichr(gene_list, database="pathway", plot=True)
|
|
711
|
+
print("\nTop enriched pathways:")
|
|
712
|
+
print(enrichment.head(10))
|
|
713
|
+
|
|
714
|
+
# Step 9: Get structure information
|
|
715
|
+
print("\n9. Getting structure information...")
|
|
716
|
+
for _, row in top_targets.iterrows():
|
|
717
|
+
info = gget.info([row["ensembl_id"]], pdb=True)
|
|
718
|
+
|
|
719
|
+
if "pdb_id" in info.columns and pd.notna(info["pdb_id"].iloc[0]):
|
|
720
|
+
pdb_ids = info["pdb_id"].iloc[0].split(";")
|
|
721
|
+
print(f"\n{row['gene_name']} PDB structures: {', '.join(pdb_ids[:3])}")
|
|
722
|
+
else:
|
|
723
|
+
print(f"\n{row['gene_name']}: No PDB structure available")
|
|
724
|
+
# Could predict with AlphaFold
|
|
725
|
+
print(f" Consider AlphaFold prediction")
|
|
726
|
+
|
|
727
|
+
# Step 10: Generate target summary report
|
|
728
|
+
print("\n10. Generating target summary report...")
|
|
729
|
+
report = []
|
|
730
|
+
for _, row in top_targets.iterrows():
|
|
731
|
+
report.append({
|
|
732
|
+
"Gene": row["gene_name"],
|
|
733
|
+
"Ensembl ID": row["ensembl_id"],
|
|
734
|
+
"Disease Score": row["disease_score"],
|
|
735
|
+
"Target Status": "High Priority"
|
|
736
|
+
})
|
|
737
|
+
|
|
738
|
+
report_df = pd.DataFrame(report)
|
|
739
|
+
report_df.to_csv("drug_targets_report.csv", index=False)
|
|
740
|
+
print("\nTarget report saved to drug_targets_report.csv")
|
|
741
|
+
|
|
742
|
+
print("\nDrug target discovery workflow completed!")
|
|
743
|
+
```
|
|
744
|
+
|
|
745
|
+
---
|
|
746
|
+
|
|
747
|
+
## Tips for Workflow Development
|
|
748
|
+
|
|
749
|
+
### Error Handling
|
|
750
|
+
```python
|
|
751
|
+
import gget
|
|
752
|
+
|
|
753
|
+
def safe_gget_call(func, *args, **kwargs):
|
|
754
|
+
"""Wrapper for gget calls with error handling"""
|
|
755
|
+
try:
|
|
756
|
+
result = func(*args, **kwargs)
|
|
757
|
+
return result
|
|
758
|
+
except Exception as e:
|
|
759
|
+
print(f"Error in {func.__name__}: {str(e)}")
|
|
760
|
+
return None
|
|
761
|
+
|
|
762
|
+
# Usage
|
|
763
|
+
result = safe_gget_call(gget.search, ["ACE2"], species="homo_sapiens")
|
|
764
|
+
if result is not None:
|
|
765
|
+
print(result)
|
|
766
|
+
```
|
|
767
|
+
|
|
768
|
+
### Rate Limiting
|
|
769
|
+
```python
|
|
770
|
+
import time
|
|
771
|
+
import gget
|
|
772
|
+
|
|
773
|
+
def rate_limited_queries(gene_ids, delay=1):
|
|
774
|
+
"""Query multiple genes with rate limiting"""
|
|
775
|
+
results = []
|
|
776
|
+
for i, gene_id in enumerate(gene_ids):
|
|
777
|
+
print(f"Querying {i+1}/{len(gene_ids)}: {gene_id}")
|
|
778
|
+
result = gget.info([gene_id])
|
|
779
|
+
results.append(result)
|
|
780
|
+
|
|
781
|
+
if i < len(gene_ids) - 1: # Don't sleep after last query
|
|
782
|
+
time.sleep(delay)
|
|
783
|
+
|
|
784
|
+
return pd.concat(results, ignore_index=True)
|
|
785
|
+
```
|
|
786
|
+
|
|
787
|
+
### Caching Results
|
|
788
|
+
```python
|
|
789
|
+
import os
|
|
790
|
+
import pickle
|
|
791
|
+
import gget
|
|
792
|
+
|
|
793
|
+
def cached_gget(cache_file, func, *args, **kwargs):
|
|
794
|
+
"""Cache gget results to avoid repeated queries"""
|
|
795
|
+
if os.path.exists(cache_file):
|
|
796
|
+
print(f"Loading from cache: {cache_file}")
|
|
797
|
+
with open(cache_file, "rb") as f:
|
|
798
|
+
return pickle.load(f)
|
|
799
|
+
|
|
800
|
+
result = func(*args, **kwargs)
|
|
801
|
+
|
|
802
|
+
with open(cache_file, "wb") as f:
|
|
803
|
+
pickle.dump(result, f)
|
|
804
|
+
print(f"Saved to cache: {cache_file}")
|
|
805
|
+
|
|
806
|
+
return result
|
|
807
|
+
|
|
808
|
+
# Usage
|
|
809
|
+
result = cached_gget("ace2_info.pkl", gget.info, ["ENSG00000130234"])
|
|
810
|
+
```
|
|
811
|
+
|
|
812
|
+
---
|
|
813
|
+
|
|
814
|
+
These workflows demonstrate how to combine multiple gget modules for comprehensive bioinformatics analyses. Adapt them to your specific research questions and data types.
|