smftools 0.1.7__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {smftools-0.1.7 → smftools-0.2.3}/PKG-INFO +14 -9
- {smftools-0.1.7 → smftools-0.2.3}/README.md +6 -6
- smftools-0.2.3/docs/source/basic_usage.md +114 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/installation.md +6 -3
- {smftools-0.1.7 → smftools-0.2.3}/experiment_config.csv +2 -7
- {smftools-0.1.7 → smftools-0.2.3}/pyproject.toml +10 -2
- {smftools-0.1.7 → smftools-0.2.3}/requirements.txt +7 -2
- {smftools-0.1.7 → smftools-0.2.3}/smftools/__init__.py +7 -6
- smftools-0.2.3/smftools/_version.py +1 -0
- smftools-0.2.3/smftools/cli/cli_flows.py +94 -0
- smftools-0.2.3/smftools/cli/hmm_adata.py +338 -0
- smftools-0.2.3/smftools/cli/load_adata.py +577 -0
- smftools-0.2.3/smftools/cli/preprocess_adata.py +363 -0
- smftools-0.2.3/smftools/cli/spatial_adata.py +564 -0
- smftools-0.2.3/smftools/cli_entry.py +435 -0
- smftools-0.2.3/smftools/config/__init__.py +1 -0
- smftools-0.2.3/smftools/config/conversion.yaml +38 -0
- smftools-0.2.3/smftools/config/deaminase.yaml +61 -0
- smftools-0.2.3/smftools/config/default.yaml +264 -0
- smftools-0.2.3/smftools/config/direct.yaml +41 -0
- smftools-0.2.3/smftools/config/discover_input_files.py +115 -0
- smftools-0.2.3/smftools/config/experiment_config.py +1288 -0
- smftools-0.2.3/smftools/hmm/HMM.py +1576 -0
- smftools-0.2.3/smftools/hmm/__init__.py +20 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/hmm}/apply_hmm_batched.py +8 -7
- smftools-0.2.3/smftools/hmm/call_hmm_peaks.py +106 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/hmm}/display_hmm.py +3 -3
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/hmm}/nucleosome_hmm_refinement.py +2 -2
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/hmm}/train_hmm.py +1 -1
- smftools-0.2.3/smftools/informatics/__init__.py +20 -0
- smftools-0.2.3/smftools/informatics/archived/deaminase_smf.py +132 -0
- smftools-0.2.3/smftools/informatics/archived/fast5_to_pod5.py +43 -0
- smftools-0.2.3/smftools/informatics/archived/helpers/archived/__init__.py +71 -0
- smftools-0.2.3/smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +126 -0
- smftools-0.2.3/smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +87 -0
- smftools-0.2.3/smftools/informatics/archived/helpers/archived/bam_qc.py +213 -0
- smftools-0.2.3/smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +90 -0
- smftools-0.2.3/smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +259 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/count_aligned_reads.py +2 -2
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/demux_and_index_BAM.py +8 -10
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/extract_base_identities.py +30 -4
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/extract_mods.py +15 -13
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/extract_read_features_from_bam.py +4 -2
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/find_conversion_sites.py +5 -4
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/generate_converted_FASTA.py +2 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/get_chromosome_lengths.py +9 -8
- smftools-0.2.3/smftools/informatics/archived/helpers/archived/index_fasta.py +24 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/make_modbed.py +1 -2
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/modQC.py +2 -2
- smftools-0.2.3/smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +250 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/separate_bam_by_bc.py +8 -7
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/split_and_index_BAM.py +8 -12
- smftools-0.2.3/smftools/informatics/archived/subsample_fasta_from_bed.py +49 -0
- smftools-0.2.3/smftools/informatics/bam_functions.py +812 -0
- smftools-0.2.3/smftools/informatics/basecalling.py +67 -0
- smftools-0.2.3/smftools/informatics/bed_functions.py +366 -0
- smftools-0.2.3/smftools/informatics/binarize_converted_base_identities.py +172 -0
- smftools-0.1.7/smftools/informatics/helpers/converted_BAM_to_adata_II.py → smftools-0.2.3/smftools/informatics/converted_BAM_to_adata.py +198 -50
- smftools-0.2.3/smftools/informatics/fasta_functions.py +255 -0
- smftools-0.2.3/smftools/informatics/h5ad_functions.py +197 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics}/modkit_extract_to_adata.py +147 -61
- smftools-0.2.3/smftools/informatics/modkit_functions.py +129 -0
- smftools-0.2.3/smftools/informatics/ohe.py +160 -0
- smftools-0.2.3/smftools/informatics/pod5_functions.py +224 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics}/run_multiqc.py +5 -2
- smftools-0.2.3/smftools/machine_learning/__init__.py +12 -0
- smftools-0.2.3/smftools/machine_learning/data/__init__.py +2 -0
- smftools-0.2.3/smftools/machine_learning/data/anndata_data_module.py +234 -0
- smftools-0.2.3/smftools/machine_learning/evaluation/__init__.py +2 -0
- smftools-0.2.3/smftools/machine_learning/evaluation/eval_utils.py +31 -0
- smftools-0.2.3/smftools/machine_learning/evaluation/evaluators.py +223 -0
- smftools-0.2.3/smftools/machine_learning/inference/__init__.py +3 -0
- smftools-0.2.3/smftools/machine_learning/inference/inference_utils.py +27 -0
- smftools-0.2.3/smftools/machine_learning/inference/lightning_inference.py +68 -0
- smftools-0.2.3/smftools/machine_learning/inference/sklearn_inference.py +55 -0
- smftools-0.2.3/smftools/machine_learning/inference/sliding_window_inference.py +114 -0
- smftools-0.2.3/smftools/machine_learning/models/base.py +295 -0
- smftools-0.2.3/smftools/machine_learning/models/cnn.py +138 -0
- smftools-0.2.3/smftools/machine_learning/models/lightning_base.py +345 -0
- smftools-0.2.3/smftools/machine_learning/models/mlp.py +26 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/machine_learning}/models/positional.py +3 -2
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/machine_learning}/models/rnn.py +2 -1
- smftools-0.2.3/smftools/machine_learning/models/sklearn_models.py +273 -0
- smftools-0.2.3/smftools/machine_learning/models/transformer.py +303 -0
- smftools-0.2.3/smftools/machine_learning/training/__init__.py +2 -0
- smftools-0.2.3/smftools/machine_learning/training/train_lightning_model.py +135 -0
- smftools-0.2.3/smftools/machine_learning/training/train_sklearn_model.py +114 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/plotting/__init__.py +4 -1
- smftools-0.2.3/smftools/plotting/autocorrelation_plotting.py +609 -0
- smftools-0.2.3/smftools/plotting/general_plotting.py +1357 -0
- smftools-0.2.3/smftools/plotting/hmm_plotting.py +260 -0
- smftools-0.2.3/smftools/plotting/qc_plotting.py +270 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/__init__.py +15 -8
- smftools-0.2.3/smftools/preprocessing/add_read_length_and_mapping_qc.py +129 -0
- smftools-0.2.3/smftools/preprocessing/append_base_context.py +122 -0
- smftools-0.2.3/smftools/preprocessing/append_binary_layer_by_base_context.py +143 -0
- smftools-0.2.3/smftools/preprocessing/binarize.py +17 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/binarize_on_Youden.py +2 -2
- smftools-0.2.3/smftools/preprocessing/calculate_complexity_II.py +248 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/calculate_coverage.py +10 -1
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/calculate_position_Youden.py +1 -1
- smftools-0.2.3/smftools/preprocessing/calculate_read_modification_stats.py +101 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/clean_NaN.py +17 -1
- smftools-0.2.3/smftools/preprocessing/filter_reads_on_length_quality_mapping.py +158 -0
- smftools-0.2.3/smftools/preprocessing/filter_reads_on_modification_thresholds.py +352 -0
- smftools-0.2.3/smftools/preprocessing/flag_duplicate_reads.py +1351 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/invert_adata.py +12 -5
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/load_sample_sheet.py +19 -4
- smftools-0.2.3/smftools/readwrite.py +1130 -0
- smftools-0.2.3/smftools/tools/__init__.py +20 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/tools/calculate_umap.py +5 -5
- {smftools-0.1.7 → smftools-0.2.3}/smftools/tools/general_tools.py +3 -3
- smftools-0.2.3/smftools/tools/position_stats.py +601 -0
- smftools-0.2.3/smftools/tools/read_stats.py +184 -0
- smftools-0.2.3/smftools/tools/spatial_autocorrelation.py +562 -0
- smftools-0.1.7/docs/source/basic_usage.md +0 -75
- smftools-0.1.7/smftools/_version.py +0 -1
- smftools-0.1.7/smftools/informatics/__init__.py +0 -16
- smftools-0.1.7/smftools/informatics/fast5_to_pod5.py +0 -21
- smftools-0.1.7/smftools/informatics/helpers/LoadExperimentConfig.py +0 -75
- smftools-0.1.7/smftools/informatics/helpers/__init__.py +0 -74
- smftools-0.1.7/smftools/informatics/helpers/align_and_sort_BAM.py +0 -59
- smftools-0.1.7/smftools/informatics/helpers/aligned_BAM_to_bed.py +0 -74
- smftools-0.1.7/smftools/informatics/helpers/bam_qc.py +0 -66
- smftools-0.1.7/smftools/informatics/helpers/bed_to_bigwig.py +0 -39
- smftools-0.1.7/smftools/informatics/helpers/binarize_converted_base_identities.py +0 -79
- smftools-0.1.7/smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -55
- smftools-0.1.7/smftools/informatics/helpers/index_fasta.py +0 -12
- smftools-0.1.7/smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +0 -53
- smftools-0.1.7/smftools/informatics/load_adata.py +0 -182
- smftools-0.1.7/smftools/informatics/readwrite.py +0 -106
- smftools-0.1.7/smftools/informatics/subsample_fasta_from_bed.py +0 -47
- smftools-0.1.7/smftools/plotting/general_plotting.py +0 -205
- smftools-0.1.7/smftools/preprocessing/append_C_context.py +0 -82
- smftools-0.1.7/smftools/preprocessing/calculate_converted_read_methylation_stats.py +0 -94
- smftools-0.1.7/smftools/preprocessing/filter_converted_reads_on_methylation.py +0 -44
- smftools-0.1.7/smftools/preprocessing/filter_reads_on_length.py +0 -51
- smftools-0.1.7/smftools/preprocessing/flag_duplicate_reads.py +0 -149
- smftools-0.1.7/smftools/preprocessing/make_dirs.py +0 -21
- smftools-0.1.7/smftools/readwrite.py +0 -198
- smftools-0.1.7/smftools/tools/__init__.py +0 -49
- smftools-0.1.7/smftools/tools/call_hmm_peaks.py +0 -105
- smftools-0.1.7/smftools/tools/data/__init__.py +0 -2
- smftools-0.1.7/smftools/tools/data/anndata_data_module.py +0 -90
- smftools-0.1.7/smftools/tools/inference/__init__.py +0 -1
- smftools-0.1.7/smftools/tools/inference/lightning_inference.py +0 -41
- smftools-0.1.7/smftools/tools/models/base.py +0 -14
- smftools-0.1.7/smftools/tools/models/cnn.py +0 -34
- smftools-0.1.7/smftools/tools/models/lightning_base.py +0 -41
- smftools-0.1.7/smftools/tools/models/mlp.py +0 -17
- smftools-0.1.7/smftools/tools/models/sklearn_models.py +0 -40
- smftools-0.1.7/smftools/tools/models/transformer.py +0 -133
- smftools-0.1.7/smftools/tools/position_stats.py +0 -239
- smftools-0.1.7/smftools/tools/read_stats.py +0 -70
- smftools-0.1.7/smftools/tools/training/__init__.py +0 -1
- smftools-0.1.7/smftools/tools/training/train_lightning_model.py +0 -47
- {smftools-0.1.7 → smftools-0.2.3}/.gitattributes +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/.gitignore +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/.readthedocs.yaml +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/CONTRIBUTING.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/LICENSE +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/Makefile +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/make.bat +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/_static/converted_BAM_to_adata.png +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/_static/modkit_extract_to_adata.png +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/_static/smftools-1.svg +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/_static/smftools-1.tif +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/_static/smftools_informatics_diagram.pdf +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/_static/smftools_informatics_diagram.png +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/_static/smftools_preprocessing_diagram.png +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/_templates/tmp +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/api/datasets.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/api/index.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/api/informatics.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/api/preprocessing.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/api/tools.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/conf.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/contributors.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/dev/index.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/index.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/references.bib +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/references.rst +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/release-notes/0.1.0.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/release-notes/index.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/requirements.txt +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/docs/source/tutorials/index.md +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/notebooks/Kissiov_and_McKenna_2025_example_notebook.ipynb +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/notebooks/Kissiov_and_McKenna_2025_sample_sheet.csv +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/sample_sheet.csv +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/_settings.py +0 -0
- {smftools-0.1.7/smftools/tools/evaluation → smftools-0.2.3/smftools/cli}/__init__.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/datasets/F1_sample_sheet.csv +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/datasets/__init__.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/datasets/datasets.py +0 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/hmm}/calculate_distances.py +0 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/hmm}/hmm_readwrite.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/informatics/archived/bam_conversion.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/informatics/archived/bam_direct.py +0 -0
- {smftools-0.1.7/smftools/informatics → smftools-0.2.3/smftools/informatics/archived}/basecall_pod5s.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/informatics/archived/basecalls_to_adata.py +0 -0
- {smftools-0.1.7/smftools/informatics → smftools-0.2.3/smftools/informatics/archived}/conversion_smf.py +0 -0
- {smftools-0.1.7/smftools/informatics → smftools-0.2.3/smftools/informatics/archived}/direct_smf.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/canoncall.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/converted_BAM_to_adata.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/extract_read_lengths_from_bed.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/extract_readnames_from_BAM.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/get_native_references.py +0 -0
- {smftools-0.1.7/smftools/informatics → smftools-0.2.3/smftools/informatics/archived}/helpers/archived/informatics.py +0 -0
- {smftools-0.1.7/smftools/informatics → smftools-0.2.3/smftools/informatics/archived}/helpers/archived/load_adata.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/modcall.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/ohe_batching.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/ohe_layers_decode.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/one_hot_decode.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics/archived/helpers/archived}/one_hot_encode.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/informatics/archived/print_bam_query_seq.py +0 -0
- {smftools-0.1.7/smftools/informatics → smftools-0.2.3/smftools/informatics/archived}/subsample_pod5.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/informatics}/complement_base_list.py +0 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/machine_learning}/data/preprocessing.py +0 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/machine_learning}/models/__init__.py +0 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/machine_learning}/models/wrappers.py +0 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/machine_learning}/utils/__init__.py +0 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/machine_learning}/utils/device.py +0 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/machine_learning}/utils/grl.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/plotting/classifiers.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/plotting/position_stats.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/archives/mark_duplicates.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/archives/preprocessing.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/archives/remove_duplicates.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/binary_layers_to_ohe.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/calculate_complexity.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/calculate_consensus.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/calculate_pairwise_differences.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/calculate_pairwise_hamming_distances.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/calculate_read_length_stats.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/filter_adata_by_nan_proportion.py +0 -0
- {smftools-0.1.7/smftools/informatics/helpers → smftools-0.2.3/smftools/preprocessing}/make_dirs.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/min_non_diagonal.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/recipes.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/preprocessing/subsample_adata.py +0 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/tools/archived}/apply_hmm.py +0 -0
- {smftools-0.1.7/smftools/tools → smftools-0.2.3/smftools/tools/archived}/classifiers.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/tools/archived/classify_methylated_features.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/tools/archived/classify_non_methylated_features.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/tools/archived/subset_adata_v1.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/tools/archived/subset_adata_v2.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/tools/cluster_adata_on_methylation.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/smftools/tools/subset_adata.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/tests/datasets/test_datasets.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/tests/informatics/helpers/test_LoadExperimentConfig.py +0 -0
- {smftools-0.1.7 → smftools-0.2.3}/tests/test_readwrite.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: smftools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Single Molecule Footprinting Analysis in Python.
|
|
5
5
|
Project-URL: Source, https://github.com/jkmckenna/smftools
|
|
6
6
|
Project-URL: Documentation, https://smftools.readthedocs.io/
|
|
@@ -43,9 +43,11 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
43
43
|
Classifier: Programming Language :: Python :: 3.12
|
|
44
44
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
45
45
|
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
46
|
-
Requires-Python:
|
|
46
|
+
Requires-Python: <3.13,>=3.9
|
|
47
47
|
Requires-Dist: anndata>=0.10.0
|
|
48
48
|
Requires-Dist: biopython>=1.79
|
|
49
|
+
Requires-Dist: captum
|
|
50
|
+
Requires-Dist: click
|
|
49
51
|
Requires-Dist: fastcluster
|
|
50
52
|
Requires-Dist: hydra-core
|
|
51
53
|
Requires-Dist: igraph
|
|
@@ -57,15 +59,18 @@ Requires-Dist: numpy<2,>=1.22.0
|
|
|
57
59
|
Requires-Dist: omegaconf
|
|
58
60
|
Requires-Dist: pandas>=1.4.2
|
|
59
61
|
Requires-Dist: pod5>=0.1.21
|
|
60
|
-
Requires-Dist:
|
|
62
|
+
Requires-Dist: pybedtools>=0.12.0
|
|
63
|
+
Requires-Dist: pybigwig>=0.3.24
|
|
61
64
|
Requires-Dist: pyfaidx>=0.8.0
|
|
62
65
|
Requires-Dist: pysam>=0.19.1
|
|
63
66
|
Requires-Dist: scanpy>=1.9
|
|
64
67
|
Requires-Dist: scikit-learn>=1.0.2
|
|
65
68
|
Requires-Dist: scipy>=1.7.3
|
|
66
69
|
Requires-Dist: seaborn>=0.11
|
|
70
|
+
Requires-Dist: shap
|
|
67
71
|
Requires-Dist: torch>=1.9.0
|
|
68
72
|
Requires-Dist: tqdm
|
|
73
|
+
Requires-Dist: upsetplot
|
|
69
74
|
Requires-Dist: wandb
|
|
70
75
|
Provides-Extra: docs
|
|
71
76
|
Requires-Dist: ipython>=7.20; extra == 'docs'
|
|
@@ -98,12 +103,9 @@ While most genomic data structures handle low-coverage data (<100X) along large
|
|
|
98
103
|
|
|
99
104
|
## Dependencies
|
|
100
105
|
The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools:
|
|
101
|
-
1) [Dorado](https://github.com/nanoporetech/dorado) ->
|
|
102
|
-
2) [
|
|
103
|
-
3) [
|
|
104
|
-
4) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting summary statistics and read level methylation calls from modified BAM files
|
|
105
|
-
5) [Bedtools](https://github.com/arq5x/bedtools2) -> For generating Bedgraphs from BAM alignment files.
|
|
106
|
-
6) [BedGraphToBigWig](https://genome.ucsc.edu/goldenPath/help/bigWig.html) -> For converting BedGraphs to BigWig files for IGV sessions.
|
|
106
|
+
1) [Dorado](https://github.com/nanoporetech/dorado) -> Basecalling, alignment, demultiplexing.
|
|
107
|
+
2) [Minimap2](https://github.com/lh3/minimap2) -> Alignment if not using dorado.
|
|
108
|
+
3) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting read level methylation metrics from modified BAM files.
|
|
107
109
|
|
|
108
110
|
## Modules
|
|
109
111
|
### Informatics: Processes raw Nanopore/Illumina data from SMF experiments into an AnnData object.
|
|
@@ -118,6 +120,9 @@ The following CLI tools need to be installed and configured before using the inf
|
|
|
118
120
|
|
|
119
121
|
## Announcements
|
|
120
122
|
|
|
123
|
+
### 11/05/25 - Version 0.2.1 is available through PyPI
|
|
124
|
+
Version 0.2.1 makes the core workflow (smftools load) a command line tool that takes in an experiment_config.csv file for input/output and parameter management.
|
|
125
|
+
|
|
121
126
|
### 05/29/25 - Version 0.1.6 is available through PyPI.
|
|
122
127
|
Informatics, preprocessing, tools, plotting modules have core functionality that is approaching stability on MacOS(Intel/Silicon) and Linux(Ubuntu). I will work on improving documentation/tutorials shortly. The base PyTorch/Scikit-Learn ML-infrastructure is going through some organizational changes to work with PyTorch Lightning, Hydra, and WanDB to facilitate organizational scaling, multi-device usage, and logging.
|
|
123
128
|
|
|
@@ -9,12 +9,9 @@ While most genomic data structures handle low-coverage data (<100X) along large
|
|
|
9
9
|
|
|
10
10
|
## Dependencies
|
|
11
11
|
The following CLI tools need to be installed and configured before using the informatics (smftools.inform) module of smftools:
|
|
12
|
-
1) [Dorado](https://github.com/nanoporetech/dorado) ->
|
|
13
|
-
2) [
|
|
14
|
-
3) [
|
|
15
|
-
4) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting summary statistics and read level methylation calls from modified BAM files
|
|
16
|
-
5) [Bedtools](https://github.com/arq5x/bedtools2) -> For generating Bedgraphs from BAM alignment files.
|
|
17
|
-
6) [BedGraphToBigWig](https://genome.ucsc.edu/goldenPath/help/bigWig.html) -> For converting BedGraphs to BigWig files for IGV sessions.
|
|
12
|
+
1) [Dorado](https://github.com/nanoporetech/dorado) -> Basecalling, alignment, demultiplexing.
|
|
13
|
+
2) [Minimap2](https://github.com/lh3/minimap2) -> Alignment if not using dorado.
|
|
14
|
+
3) [Modkit](https://github.com/nanoporetech/modkit) -> Extracting read level methylation metrics from modified BAM files.
|
|
18
15
|
|
|
19
16
|
## Modules
|
|
20
17
|
### Informatics: Processes raw Nanopore/Illumina data from SMF experiments into an AnnData object.
|
|
@@ -29,6 +26,9 @@ The following CLI tools need to be installed and configured before using the inf
|
|
|
29
26
|
|
|
30
27
|
## Announcements
|
|
31
28
|
|
|
29
|
+
### 11/05/25 - Version 0.2.1 is available through PyPI
|
|
30
|
+
Version 0.2.1 makes the core workflow (smftools load) a command line tool that takes in an experiment_config.csv file for input/output and parameter management.
|
|
31
|
+
|
|
32
32
|
### 05/29/25 - Version 0.1.6 is available through PyPI.
|
|
33
33
|
Informatics, preprocessing, tools, plotting modules have core functionality that is approaching stability on MacOS(Intel/Silicon) and Linux(Ubuntu). I will work on improving documentation/tutorials shortly. The base PyTorch/Scikit-Learn ML-infrastructure is going through some organizational changes to work with PyTorch Lightning, Hydra, and WanDB to facilitate organizational scaling, multi-device usage, and logging.
|
|
34
34
|
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Basic Usage
|
|
2
|
+
|
|
3
|
+
## Load Usage
|
|
4
|
+
|
|
5
|
+
Many use cases for smftools begin here. For most users, the call below will be sufficient to convert any raw SMF dataset from Nanopore/Illumina to an AnnData object:
|
|
6
|
+
|
|
7
|
+
```shell
|
|
8
|
+
smftools load "/Path_to_experiment_config.csv"
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
This command takes a user passed config file handling:
|
|
12
|
+
- I/O pathes (With data input path, FASTA path, optional BED path for subsampling FASTA, and a data output path)
|
|
13
|
+
- Experiment info (SMF modality, sequencer type, barcoding kit if nanopore, sample sheet with metadata mapping)
|
|
14
|
+
- Options to override default workflow parameters from smftools/config. Params are handled from default.yaml -> modality_type.yaml -> user passed config.csv.
|
|
15
|
+
|
|
16
|
+
## Preprocess Usage
|
|
17
|
+
|
|
18
|
+
This command performs preprocessing on the anndata object. It automatically runs the load command under the hood if starting from raw data.
|
|
19
|
+
|
|
20
|
+
```shell
|
|
21
|
+
smftools preprocess "/Path_to_experiment_config.csv"
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Spatial Usage
|
|
25
|
+
|
|
26
|
+
This command performs spatial analysis on the anndata object. It automatically runs the load command and preprocessing under the hood if they have not been already run.
|
|
27
|
+
|
|
28
|
+
```shell
|
|
29
|
+
smftools spatial "/Path_to_experiment_config.csv"
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## HMM Usage
|
|
33
|
+
|
|
34
|
+
This command performs hmm based feature annotation on the anndata object. It automatically runs the load command and preprocessing under the hood if they have not been already run.
|
|
35
|
+
|
|
36
|
+
```shell
|
|
37
|
+
smftools hmm "/Path_to_experiment_config.csv"
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Batch Usage
|
|
41
|
+
|
|
42
|
+
This command performs batch processing of any of the above commands across multiple experiments. It takes in a tsv, txt, or csv of experiment specific config csvs.
|
|
43
|
+
```shell
|
|
44
|
+
smftools batch preprocess "/Path_to_experiment_config_path_list.csv"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Concatenate Usage
|
|
48
|
+
|
|
49
|
+
This command concatenates multiple h5ad files and saves them to a new output. The h5ads to concatenate are provided as a txt, tsv, or h5ad file of paths.
|
|
50
|
+
```shell
|
|
51
|
+
smftools concatenate output.h5ad "/Path_to_h5ad_path_list.csv"
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Reading AnnData objects created by smftools
|
|
55
|
+
|
|
56
|
+
After creating an AnnData object holding your experiment's SMF data, you can load the AnnData object as so:
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
import smftools as smf
|
|
60
|
+
input_adata = "/Path_to_experiment_AnnData.h5ad.gz"
|
|
61
|
+
adata = safe_read_h5ad(input_adata)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
This custom read function will take an optional directory of pickle files for data types that can not normally be saved directly in hdf5 formatting that was saved with the safe_write_h5ad function.
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
If you don't have an AnnData object yet, but want to play with the downstream Preprocessing, Tools, and Plotting modules, you can load a pre-loaded SMF dataset.
|
|
68
|
+
|
|
69
|
+
Currently, you can do this with our lab's in vitro dCas9 binding kinetics dataset generated from a Hia5 SMF dataset generated with direct m6A high accuracy basecalls:
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
adata = smf.datasets.dCas9_kinetics()
|
|
73
|
+
adata.obs_names_make_unique()
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Alternatively, you can do this with our lab's M.CviPI SMF test data in F1-hybrid natural killer cells generated by NEB EMseq conversion followed by canonical basecalling:
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
adata = smf.datasets.Kissiov_and_McKenna_2025()
|
|
80
|
+
adata.obs_names_make_unique()
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Writing out AnnData objects to save analysis progress
|
|
84
|
+
|
|
85
|
+
After preprocessing and downstream analysis of the AnnData object, you can save the AnnData object at any step as so:
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
import smftools as smf
|
|
89
|
+
from pathlib import Path
|
|
90
|
+
|
|
91
|
+
output_dir = Path('/Path_to_output_directory')
|
|
92
|
+
output_adata = 'analyzed_adata.h5ad.gz'
|
|
93
|
+
final_output_path = output_dir / output_adata
|
|
94
|
+
safe_write_h5ad(adata, final_output_path, compression='gzip')
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
This custom save function will make a directory of pickle files for data types that can not normally be saved directly in hdf5 formatting.
|
|
98
|
+
|
|
99
|
+
## Troubleshooting
|
|
100
|
+
For more advanced usage and help troubleshooting, the API and tutorials for each of the modules is still being developed.
|
|
101
|
+
However, you can currently learn about the functions contained within the module by calling:
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
smf.inform.__all__
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
This lists the functions within any given module. If you want to see the associated docstring for a given function, here is an example:
|
|
108
|
+
|
|
109
|
+
```
|
|
110
|
+
print(smf.inform.load_adata.__doc__)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
These docstrings will provide a brief description of the function and also tell you the input parameters and what the function returns.
|
|
114
|
+
In some cases, usage examples will also be provided in the docstring in the form of doctests.
|
|
@@ -16,7 +16,7 @@ conda activate smftools
|
|
|
16
16
|
pip install smftools
|
|
17
17
|
```
|
|
18
18
|
|
|
19
|
-
Ensure that you can access dorado,
|
|
19
|
+
Ensure that you can access dorado, modkit, and minimap2 executables from the terminal in this environment.
|
|
20
20
|
You may need to add them to $PATH if they are not globally configured.
|
|
21
21
|
For example, if you want to check if dorado is executable, simply run this in the terminal:
|
|
22
22
|
|
|
@@ -24,10 +24,10 @@ For example, if you want to check if dorado is executable, simply run this in th
|
|
|
24
24
|
dorado
|
|
25
25
|
```
|
|
26
26
|
|
|
27
|
-
On Mac OSX, the following can be used to congigure
|
|
27
|
+
On Mac OSX, the following can be used to congigure minimap2 (with brew) and BedGraphToBigWig (with wget).
|
|
28
28
|
|
|
29
29
|
```shell
|
|
30
|
-
brew install
|
|
30
|
+
brew install minimap2
|
|
31
31
|
wget http://hgdownload.soe.ucsc.edu/admin/exe/macOSX.x86_64/bedGraphToBigWig
|
|
32
32
|
chmod +x bedGraphToBigWig
|
|
33
33
|
sudo mv bedGraphToBigWig /usr/local/bin/
|
|
@@ -47,7 +47,10 @@ A python virtual environment can be created as an alternative to conda. I like t
|
|
|
47
47
|
```shell
|
|
48
48
|
python -m venv venv-smftools
|
|
49
49
|
source venv-smftools/bin/activate
|
|
50
|
+
pip install --upgrade pip
|
|
50
51
|
pip install .
|
|
52
|
+
pip install ipykernel jupyter
|
|
53
|
+
python -m ipykernel install --user --name=venv-smftools --display-name "Python (smftools)"
|
|
51
54
|
```
|
|
52
55
|
|
|
53
56
|
Subsequent use of the installed version of smftools can be run by changing to the smftools directory and activating the venv:
|
|
@@ -5,15 +5,10 @@ fasta,/path_to_fasta.fasta,Path to initial FASTA file,,str
|
|
|
5
5
|
fasta_regions_of_interest,/path_to_bed.bed,Path to a bed file to subsample the fasta on.,,str
|
|
6
6
|
output_directory,/outputs,Directory to act as root for all analysis outputs,,str
|
|
7
7
|
experiment_name,,An experiment name for the final h5ad file,,str
|
|
8
|
+
model_dir,/path_to_dorado_model_dir,Path,,str
|
|
8
9
|
model,None,The dorado basecalling model to use,,str
|
|
9
10
|
barcode_kit,SQK-NBD114-24,The barcoding kit used for the experiment,,str
|
|
10
11
|
mapping_threshold,0.05,Minimum proportion of reads mapping to a reference to further use that reference (Ranges from 0-1 as a proportion of mapped reads),,float
|
|
11
|
-
filter_threshold,0.8,Minimum probability to call a canonical base identity,,float
|
|
12
|
-
m6A_threshold,0.8,Minimum probability to flag m6A as True,,float
|
|
13
|
-
m5C_threshold,0.8,Minimum probability to flag m5C as True,,float
|
|
14
|
-
hm5C_threshold,0.8,Minimum probability to flag hm5C as True,,float
|
|
15
12
|
mod_list,[5mC_5hmC],Modified base names for Dorado,"""6mA"", ""5mC_5hmC""",list
|
|
16
13
|
batch_size,4,number of samples to analyze at a time,,int
|
|
17
|
-
conversion_types,[5mC],Types of modification types to use in conversion SMF,"5mC', '6mA'",list
|
|
18
|
-
barcode_both_ends,TRUE,whether to require both ends of a read to be barcoded for demultiplexing,,bool
|
|
19
|
-
trim,FALSE,whether to trim barcodes and adapters from reads during demultiplexing,,bool
|
|
14
|
+
conversion_types,[5mC],Types of modification types to use in conversion SMF,"5mC', '6mA'",list
|
|
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "smftools"
|
|
7
7
|
description = "Single Molecule Footprinting Analysis in Python."
|
|
8
|
-
requires-python = ">=3.9"
|
|
8
|
+
requires-python = ">=3.9,<3.13"
|
|
9
9
|
license = { file = "LICENSE" }
|
|
10
10
|
authors = [
|
|
11
11
|
{name = "Joseph McKenna"}
|
|
@@ -42,6 +42,8 @@ classifiers = [
|
|
|
42
42
|
dependencies = [
|
|
43
43
|
"anndata>=0.10.0",
|
|
44
44
|
"biopython>=1.79",
|
|
45
|
+
"captum",
|
|
46
|
+
"click",
|
|
45
47
|
"fastcluster",
|
|
46
48
|
"hydra-core",
|
|
47
49
|
"igraph",
|
|
@@ -53,15 +55,18 @@ dependencies = [
|
|
|
53
55
|
"omegaconf",
|
|
54
56
|
"pandas>=1.4.2",
|
|
55
57
|
"pod5>=0.1.21",
|
|
56
|
-
"pomegranate>=1.0.0",
|
|
57
58
|
"pyfaidx>=0.8.0",
|
|
59
|
+
"pybedtools>=0.12.0",
|
|
60
|
+
"pyBigWig>=0.3.24",
|
|
58
61
|
"pysam>=0.19.1",
|
|
59
62
|
"scanpy>=1.9",
|
|
60
63
|
"scikit-learn>=1.0.2",
|
|
61
64
|
"scipy>=1.7.3",
|
|
65
|
+
"shap",
|
|
62
66
|
"seaborn>=0.11",
|
|
63
67
|
"torch>=1.9.0",
|
|
64
68
|
"tqdm",
|
|
69
|
+
"upsetplot",
|
|
65
70
|
"wandb"
|
|
66
71
|
]
|
|
67
72
|
dynamic = ["version"]
|
|
@@ -70,6 +75,9 @@ dynamic = ["version"]
|
|
|
70
75
|
Source = "https://github.com/jkmckenna/smftools"
|
|
71
76
|
Documentation = "https://smftools.readthedocs.io/"
|
|
72
77
|
|
|
78
|
+
[project.scripts]
|
|
79
|
+
smftools = "smftools.cli_entry:cli"
|
|
80
|
+
|
|
73
81
|
[project.optional-dependencies]
|
|
74
82
|
tests = [
|
|
75
83
|
"pytest",
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# Essential packages
|
|
2
2
|
anndata>=0.10.0
|
|
3
3
|
biopython>=1.79
|
|
4
|
+
captum
|
|
5
|
+
click
|
|
4
6
|
fastcluster
|
|
5
7
|
hydra-core
|
|
6
8
|
leidenalg
|
|
@@ -14,13 +16,16 @@ numpy>=1.22.0,<2
|
|
|
14
16
|
omegaconf
|
|
15
17
|
pandas>=1.4.2
|
|
16
18
|
pod5>=0.1.21
|
|
17
|
-
|
|
19
|
+
pybedtools>=0.12.0
|
|
20
|
+
pyBigWig>=0.3.24
|
|
18
21
|
pyfaidx>=0.8.0
|
|
19
22
|
pysam>=0.19.1
|
|
20
|
-
scanpy>=1.
|
|
23
|
+
scanpy>=1.11
|
|
21
24
|
scikit-learn>=1.0.2
|
|
22
25
|
scipy>=1.7.3
|
|
23
26
|
seaborn>=0.11
|
|
27
|
+
shap
|
|
24
28
|
torch>=1.9.0
|
|
25
29
|
tqdm
|
|
30
|
+
upsetplot
|
|
26
31
|
wandb
|
|
@@ -4,12 +4,13 @@ import logging
|
|
|
4
4
|
import warnings
|
|
5
5
|
|
|
6
6
|
from . import informatics as inform
|
|
7
|
+
from . import machine_learning as ml
|
|
8
|
+
from . import plotting as pl
|
|
7
9
|
from . import preprocessing as pp
|
|
8
10
|
from . import tools as tl
|
|
9
|
-
from . import plotting as pl
|
|
10
|
-
from . import readwrite, datasets
|
|
11
|
-
from .readwrite import adata_to_df, safe_write_h5ad, merge_barcoded_anndatas
|
|
12
11
|
|
|
12
|
+
from . import cli, config, datasets, hmm
|
|
13
|
+
from .readwrite import adata_to_df, safe_write_h5ad, safe_read_h5ad, merge_barcoded_anndatas_core
|
|
13
14
|
|
|
14
15
|
from importlib.metadata import version
|
|
15
16
|
|
|
@@ -19,11 +20,11 @@ __version__ = version(package_name)
|
|
|
19
20
|
__all__ = [
|
|
20
21
|
"adata_to_df",
|
|
21
22
|
"inform",
|
|
23
|
+
"ml",
|
|
22
24
|
"pp",
|
|
23
25
|
"tl",
|
|
24
26
|
"pl",
|
|
25
|
-
"
|
|
26
|
-
"datasets",
|
|
27
|
+
"datasets"
|
|
27
28
|
"safe_write_h5ad",
|
|
28
|
-
"
|
|
29
|
+
"safe_read_h5ad"
|
|
29
30
|
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.3"
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
def flow_I(config_path):
|
|
2
|
+
"""
|
|
3
|
+
High-level function to call for converting raw sequencing data to an adata object.
|
|
4
|
+
Command line accesses this through smftools load <config_path>
|
|
5
|
+
Works for nanopore pod5, fast5, and unaligned modBAM data types for direct SMF workflows.
|
|
6
|
+
Works for nanopore pod5, fast5, unaligned BAM for conversion SMF workflows.
|
|
7
|
+
Also works for illumina fastq and unaligned BAM for conversion SMF workflows.
|
|
8
|
+
|
|
9
|
+
Parameters:
|
|
10
|
+
config_path (str): A string representing the file path to the experiment configuration csv file.
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
None
|
|
14
|
+
"""
|
|
15
|
+
from ..readwrite import safe_read_h5ad, safe_write_h5ad, make_dirs
|
|
16
|
+
from ..config import LoadExperimentConfig, ExperimentConfig
|
|
17
|
+
from .load_adata import load_adata
|
|
18
|
+
from .preprocess_adata import preprocess_adata
|
|
19
|
+
from .spatial_adata import spatial_adata
|
|
20
|
+
|
|
21
|
+
import numpy as np
|
|
22
|
+
import pandas as pd
|
|
23
|
+
import anndata as ad
|
|
24
|
+
import scanpy as sc
|
|
25
|
+
|
|
26
|
+
import os
|
|
27
|
+
from importlib import resources
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
|
|
30
|
+
from datetime import datetime
|
|
31
|
+
date_str = datetime.today().strftime("%y%m%d")
|
|
32
|
+
################################### 1) General params and input organization ###################################
|
|
33
|
+
# Load experiment config parameters into global variables
|
|
34
|
+
loader = LoadExperimentConfig(config_path)
|
|
35
|
+
defaults_dir = resources.files("smftools").joinpath("config")
|
|
36
|
+
cfg, report = ExperimentConfig.from_var_dict(loader.var_dict, date_str=date_str, defaults_dir=defaults_dir)
|
|
37
|
+
|
|
38
|
+
# General config variable init - Necessary user passed inputs
|
|
39
|
+
smf_modality = cfg.smf_modality # needed for specifying if the data is conversion SMF or direct methylation detection SMF. Or deaminase smf Necessary.
|
|
40
|
+
input_data_path = Path(cfg.input_data_path) # Path to a directory of POD5s/FAST5s or to a BAM/FASTQ file. Necessary.
|
|
41
|
+
output_directory = Path(cfg.output_directory) # Path to the output directory to make for the analysis. Necessary.
|
|
42
|
+
fasta = Path(cfg.fasta) # Path to reference FASTA. Necessary.
|
|
43
|
+
split_dir = Path(cfg.split_dir) # Relative path to directory for demultiplexing reads
|
|
44
|
+
split_path = output_directory / split_dir # Absolute path to directory for demultiplexing reads
|
|
45
|
+
|
|
46
|
+
# Make initial output directory
|
|
47
|
+
make_dirs([output_directory])
|
|
48
|
+
|
|
49
|
+
bam_suffix = cfg.bam_suffix
|
|
50
|
+
strands = cfg.strands
|
|
51
|
+
|
|
52
|
+
# General config variable init - Optional user passed inputs for enzyme base specificity
|
|
53
|
+
mod_target_bases = cfg.mod_target_bases # Nucleobases of interest that may be modified. ['GpC', 'CpG', 'C', 'A']
|
|
54
|
+
|
|
55
|
+
# Conversion/deamination specific variable init
|
|
56
|
+
conversion_types = cfg.conversion_types # 5mC
|
|
57
|
+
conversions = cfg.conversions
|
|
58
|
+
|
|
59
|
+
# Common Anndata accession params
|
|
60
|
+
reference_column = cfg.reference_column
|
|
61
|
+
|
|
62
|
+
# If conversion_types is passed:
|
|
63
|
+
if conversion_types:
|
|
64
|
+
conversions += conversion_types
|
|
65
|
+
|
|
66
|
+
############################################### smftools load start ###############################################
|
|
67
|
+
initial_adata, initial_adata_path = load_adata(config_path)
|
|
68
|
+
|
|
69
|
+
# Initial adata path info
|
|
70
|
+
initial_backup_dir = initial_adata_path.parent / 'adata_accessory_data'
|
|
71
|
+
############################################### smftools load end ###############################################
|
|
72
|
+
|
|
73
|
+
############################################### smftools preprocess start ###############################################
|
|
74
|
+
pp_adata, pp_adata_path, pp_dedup_adata, pp_dup_rem_adata_path = preprocess_adata(config_path)
|
|
75
|
+
|
|
76
|
+
# Preprocessed adata path info
|
|
77
|
+
pp_adata_basename = initial_adata_path.with_suffix("").name + '_preprocessed.h5ad.gz'
|
|
78
|
+
pp_adata_path = initial_adata_path / pp_adata_basename
|
|
79
|
+
pp_backup_dir = pp_adata_path.parent / 'pp_adata_accessory_data'
|
|
80
|
+
|
|
81
|
+
# Preprocessed duplicate removed adata path info
|
|
82
|
+
pp_dup_rem_adata_basename = pp_adata_path.with_suffix("").name + '_duplicates_removed.h5ad.gz'
|
|
83
|
+
pp_dup_rem_adata_path = pp_adata_path / pp_dup_rem_adata_basename
|
|
84
|
+
pp_dup_rem_backup_dir= pp_adata_path.parent / 'pp_dup_rem_adata_accessory_data'
|
|
85
|
+
############################################### smftools preprocess end ###############################################
|
|
86
|
+
|
|
87
|
+
############################################### smftools spatial start ###############################################
|
|
88
|
+
# Preprocessed duplicate removed adata with basic analyses appended path info
|
|
89
|
+
basic_analyzed_adata_basename = pp_dup_rem_adata_path.with_suffix("").name + '_analyzed_I.h5ad.gz'
|
|
90
|
+
basic_analyzed_adata_path = pp_dup_rem_adata_path / basic_analyzed_adata_basename
|
|
91
|
+
basic_analyzed_backup_dir= pp_dup_rem_adata_path.parent /'duplicate_removed_analyzed_adata_I_accessory_data'
|
|
92
|
+
|
|
93
|
+
spatial_adata, spatial_adata_path = spatial_adata(config_path)
|
|
94
|
+
############################################### smftools spatial end ###############################################
|