smftools 0.1.6__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {smftools-0.1.6 → smftools-0.1.7}/.gitignore +1 -0
- {smftools-0.1.6 → smftools-0.1.7}/PKG-INFO +5 -2
- {smftools-0.1.6 → smftools-0.1.7}/README.md +4 -1
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/installation.md +4 -4
- {smftools-0.1.6 → smftools-0.1.7}/pyproject.toml +1 -1
- smftools-0.1.7/smftools/_version.py +1 -0
- smftools-0.1.7/smftools/tools/data/__init__.py +2 -0
- smftools-0.1.7/smftools/tools/data/anndata_data_module.py +90 -0
- smftools-0.1.7/smftools/tools/inference/__init__.py +1 -0
- smftools-0.1.7/smftools/tools/inference/lightning_inference.py +41 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/__init__.py +3 -1
- smftools-0.1.7/smftools/tools/models/lightning_base.py +41 -0
- smftools-0.1.7/smftools/tools/models/sklearn_models.py +40 -0
- smftools-0.1.7/smftools/tools/training/__init__.py +1 -0
- smftools-0.1.7/smftools/tools/training/train_lightning_model.py +47 -0
- smftools-0.1.6/smftools/_version.py +0 -1
- smftools-0.1.6/smftools/tools/data/__init__.py +0 -1
- smftools-0.1.6/smftools/tools/inference/__init__.py +0 -0
- smftools-0.1.6/smftools/tools/models/sklearn_models.py +0 -0
- smftools-0.1.6/smftools/tools/training/__init__.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/.gitattributes +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/.readthedocs.yaml +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/CONTRIBUTING.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/LICENSE +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/Makefile +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/make.bat +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/converted_BAM_to_adata.png +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/modkit_extract_to_adata.png +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/smftools-1.svg +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/smftools-1.tif +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/smftools_informatics_diagram.pdf +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/smftools_informatics_diagram.png +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/_static/smftools_preprocessing_diagram.png +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/_templates/tmp +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/api/datasets.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/api/index.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/api/informatics.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/api/preprocessing.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/api/tools.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/basic_usage.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/conf.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/contributors.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/dev/index.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/index.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/references.bib +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/references.rst +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/release-notes/0.1.0.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/release-notes/index.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/requirements.txt +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/docs/source/tutorials/index.md +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/experiment_config.csv +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/notebooks/Kissiov_and_McKenna_2025_example_notebook.ipynb +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/notebooks/Kissiov_and_McKenna_2025_sample_sheet.csv +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/requirements.txt +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/sample_sheet.csv +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/__init__.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/_settings.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/datasets/F1_sample_sheet.csv +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/datasets/__init__.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/datasets/datasets.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/__init__.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/archived/bam_conversion.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/archived/bam_direct.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/archived/basecalls_to_adata.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/archived/print_bam_query_seq.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/basecall_pod5s.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/conversion_smf.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/direct_smf.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/fast5_to_pod5.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/LoadExperimentConfig.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/__init__.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/align_and_sort_BAM.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/aligned_BAM_to_bed.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/archived/informatics.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/archived/load_adata.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/bam_qc.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/bed_to_bigwig.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/binarize_converted_base_identities.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/canoncall.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/complement_base_list.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/converted_BAM_to_adata.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/converted_BAM_to_adata_II.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/count_aligned_reads.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/demux_and_index_BAM.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_base_identities.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_mods.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_read_features_from_bam.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_read_lengths_from_bed.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_readnames_from_BAM.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/find_conversion_sites.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/generate_converted_FASTA.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/get_chromosome_lengths.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/get_native_references.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/index_fasta.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/make_dirs.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/make_modbed.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/modQC.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/modcall.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/modkit_extract_to_adata.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/ohe_batching.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/ohe_layers_decode.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/one_hot_decode.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/one_hot_encode.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/plot_read_length_and_coverage_histograms.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/run_multiqc.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/separate_bam_by_bc.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/split_and_index_BAM.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/load_adata.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/readwrite.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/subsample_fasta_from_bed.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/subsample_pod5.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/plotting/__init__.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/plotting/classifiers.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/plotting/general_plotting.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/plotting/position_stats.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/__init__.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/append_C_context.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/archives/mark_duplicates.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/archives/preprocessing.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/archives/remove_duplicates.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/binarize_on_Youden.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/binary_layers_to_ohe.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_complexity.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_consensus.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_converted_read_methylation_stats.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_coverage.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_pairwise_differences.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_pairwise_hamming_distances.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_position_Youden.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_read_length_stats.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/clean_NaN.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/filter_adata_by_nan_proportion.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/filter_converted_reads_on_methylation.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/filter_reads_on_length.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/flag_duplicate_reads.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/invert_adata.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/load_sample_sheet.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/make_dirs.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/min_non_diagonal.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/recipes.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/subsample_adata.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/readwrite.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/__init__.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/apply_hmm.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/apply_hmm_batched.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/archived/classify_methylated_features.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/archived/classify_non_methylated_features.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/archived/subset_adata_v1.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/archived/subset_adata_v2.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/calculate_distances.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/calculate_umap.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/call_hmm_peaks.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/classifiers.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/cluster_adata_on_methylation.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/data/preprocessing.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/display_hmm.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/evaluation/__init__.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/general_tools.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/hmm_readwrite.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/base.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/cnn.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/mlp.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/positional.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/rnn.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/transformer.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/models/wrappers.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/nucleosome_hmm_refinement.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/position_stats.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/read_stats.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/subset_adata.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/train_hmm.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/utils/__init__.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/utils/device.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/smftools/tools/utils/grl.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/tests/datasets/test_datasets.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/tests/informatics/helpers/test_LoadExperimentConfig.py +0 -0
- {smftools-0.1.6 → smftools-0.1.7}/tests/test_readwrite.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: smftools
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: Single Molecule Footprinting Analysis in Python.
|
|
5
5
|
Project-URL: Source, https://github.com/jkmckenna/smftools
|
|
6
6
|
Project-URL: Documentation, https://smftools.readthedocs.io/
|
|
@@ -118,7 +118,10 @@ The following CLI tools need to be installed and configured before using the inf
|
|
|
118
118
|
|
|
119
119
|
## Announcements
|
|
120
120
|
|
|
121
|
-
###
|
|
121
|
+
### 05/29/25 - Version 0.1.6 is available through PyPI.
|
|
122
|
+
Informatics, preprocessing, tools, plotting modules have core functionality that is approaching stability on MacOS(Intel/Silicon) and Linux(Ubuntu). I will work on improving documentation/tutorials shortly. The base PyTorch/Scikit-Learn ML-infrastructure is going through some organizational changes to work with PyTorch Lightning, Hydra, and WanDB to facilitate organizational scaling, multi-device usage, and logging.
|
|
123
|
+
|
|
124
|
+
### 10/01/24 - More recent versions are being updated frequently. Installation from source over PyPI is recommended!
|
|
122
125
|
|
|
123
126
|
### 09/09/24 - The version 0.1.1 package ([smftools-0.1.1](https://pypi.org/project/smftools/)) is installable through pypi!
|
|
124
127
|
The informatics module has been bumped to alpha-phase status. This module can deal with POD5s and unaligned BAMS from nanopore conversion and direct SMF experiments, as well as FASTQs from Illumina conversion SMF experiments. Primary output from this module is an AnnData object containing all relevant SMF data, which is compatible with all downstream smftools modules. The other modules are still in pre-alpha phase. Preprocessing, Tools, and Plotting modules should be promoted to alpha-phase within the next month or so.
|
|
@@ -29,7 +29,10 @@ The following CLI tools need to be installed and configured before using the inf
|
|
|
29
29
|
|
|
30
30
|
## Announcements
|
|
31
31
|
|
|
32
|
-
###
|
|
32
|
+
### 05/29/25 - Version 0.1.6 is available through PyPI.
|
|
33
|
+
Informatics, preprocessing, tools, plotting modules have core functionality that is approaching stability on MacOS(Intel/Silicon) and Linux(Ubuntu). I will work on improving documentation/tutorials shortly. The base PyTorch/Scikit-Learn ML-infrastructure is going through some organizational changes to work with PyTorch Lightning, Hydra, and WanDB to facilitate organizational scaling, multi-device usage, and logging.
|
|
34
|
+
|
|
35
|
+
### 10/01/24 - More recent versions are being updated frequently. Installation from source over PyPI is recommended!
|
|
33
36
|
|
|
34
37
|
### 09/09/24 - The version 0.1.1 package ([smftools-0.1.1](https://pypi.org/project/smftools/)) is installable through pypi!
|
|
35
38
|
The informatics module has been bumped to alpha-phase status. This module can deal with POD5s and unaligned BAMS from nanopore conversion and direct SMF experiments, as well as FASTQs from Illumina conversion SMF experiments. Primary output from this module is an AnnData object containing all relevant SMF data, which is compatible with all downstream smftools modules. The other modules are still in pre-alpha phase. Preprocessing, Tools, and Plotting modules should be promoted to alpha-phase within the next month or so.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# Installation
|
|
2
2
|
|
|
3
|
-
## PyPi version
|
|
3
|
+
## PyPi version - Easiest starting point
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Install smftools from [PyPI](https://pypi.org/project/smftools):
|
|
6
6
|
|
|
7
7
|
```shell
|
|
8
8
|
pip install smftools
|
|
@@ -33,7 +33,7 @@ chmod +x bedGraphToBigWig
|
|
|
33
33
|
sudo mv bedGraphToBigWig /usr/local/bin/
|
|
34
34
|
```
|
|
35
35
|
|
|
36
|
-
## Development Version
|
|
36
|
+
## Development Version - recommended to use this method for most up to date versions
|
|
37
37
|
|
|
38
38
|
Clone smftools from source and change into the smftools directory:
|
|
39
39
|
|
|
@@ -42,7 +42,7 @@ git clone https://github.com/jkmckenna/smftools.git
|
|
|
42
42
|
cd smftools
|
|
43
43
|
```
|
|
44
44
|
|
|
45
|
-
A virtual environment can be created
|
|
45
|
+
A python virtual environment can be created as an alternative to conda. I like to do venv-smftools-X.X.X to keep a seperate venv for each version:
|
|
46
46
|
|
|
47
47
|
```shell
|
|
48
48
|
python -m venv venv-smftools
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.7"
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
from torch.utils.data import DataLoader, TensorDataset, random_split
|
|
3
|
+
import pytorch_lightning as pl
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
class AnnDataModule(pl.LightningDataModule):
|
|
8
|
+
def __init__(self, adata, tensor_source="X", tensor_key=None, label_col="labels",
|
|
9
|
+
batch_size=64, train_frac=0.7, random_seed=42, split_col='train_val_split', split_save_path=None, load_existing_split=False,
|
|
10
|
+
inference_mode=False):
|
|
11
|
+
super().__init__()
|
|
12
|
+
self.adata = adata # The adata object
|
|
13
|
+
self.tensor_source = tensor_source # X, layers, obsm
|
|
14
|
+
self.tensor_key = tensor_key # name of the layer or obsm key
|
|
15
|
+
self.label_col = label_col # name of the label column in obs
|
|
16
|
+
self.batch_size = batch_size
|
|
17
|
+
self.train_frac = train_frac
|
|
18
|
+
self.random_seed = random_seed
|
|
19
|
+
self.split_col = split_col # Name of obs column to store "train"/"val"
|
|
20
|
+
self.split_save_path = split_save_path # Where to save the obs_names and train/test split logging
|
|
21
|
+
self.load_existing_split = load_existing_split # Whether to load from an existing split
|
|
22
|
+
self.inference_mode = inference_mode # Whether to load the AnnDataModule in inference mode.
|
|
23
|
+
|
|
24
|
+
def setup(self, stage=None):
|
|
25
|
+
# Load feature matrix
|
|
26
|
+
if self.tensor_source == "X":
|
|
27
|
+
X = self.adata.X
|
|
28
|
+
elif self.tensor_source == "layers":
|
|
29
|
+
assert self.tensor_key in self.adata.layers, f"Layer '{self.tensor_key}' not found."
|
|
30
|
+
X = self.adata.layers[self.tensor_key]
|
|
31
|
+
elif self.tensor_source == "obsm":
|
|
32
|
+
assert self.tensor_key in self.adata.obsm, f"obsm key '{self.tensor_key}' not found."
|
|
33
|
+
X = self.adata.obsm[self.tensor_key]
|
|
34
|
+
else:
|
|
35
|
+
raise ValueError(f"Invalid tensor_source: {self.tensor_source}")
|
|
36
|
+
|
|
37
|
+
# Convert to tensor
|
|
38
|
+
X_tensor = torch.tensor(X, dtype=torch.float32)
|
|
39
|
+
|
|
40
|
+
if self.inference_mode:
|
|
41
|
+
self.infer_dataset = TensorDataset(X_tensor)
|
|
42
|
+
|
|
43
|
+
else:
|
|
44
|
+
# Load and encode labels
|
|
45
|
+
y = self.adata.obs[self.label_col]
|
|
46
|
+
if y.dtype.name == 'category':
|
|
47
|
+
y = y.cat.codes
|
|
48
|
+
y_tensor = torch.tensor(y.values, dtype=torch.long)
|
|
49
|
+
|
|
50
|
+
# Use existing split
|
|
51
|
+
if self.load_existing_split:
|
|
52
|
+
split_df = pd.read_csv(self.split_save_path, index_col=0)
|
|
53
|
+
assert self.split_col in split_df.columns, f"'{self.split_col}' column missing in split file."
|
|
54
|
+
self.adata.obs[self.split_col] = split_df.loc[self.adata.obs_names][self.split_col].values
|
|
55
|
+
|
|
56
|
+
# If no split exists, create one
|
|
57
|
+
if self.split_col not in self.adata.obs:
|
|
58
|
+
full_dataset = TensorDataset(X_tensor, y_tensor)
|
|
59
|
+
n_train = int(self.train_frac * len(full_dataset))
|
|
60
|
+
n_val = len(full_dataset) - n_train
|
|
61
|
+
self.train_set, self.val_set = random_split(
|
|
62
|
+
full_dataset, [n_train, n_val],
|
|
63
|
+
generator=torch.Generator().manual_seed(self.random_seed)
|
|
64
|
+
)
|
|
65
|
+
# Assign split labels
|
|
66
|
+
split_array = np.full(len(self.adata), "val", dtype=object)
|
|
67
|
+
train_idx = self.train_set.indices if hasattr(self.train_set, "indices") else self.train_set._indices
|
|
68
|
+
split_array[train_idx] = "train"
|
|
69
|
+
self.adata.obs[self.split_col] = split_array
|
|
70
|
+
|
|
71
|
+
# Save to disk
|
|
72
|
+
if self.split_save_path:
|
|
73
|
+
self.adata.obs[[self.split_col]].to_csv(self.split_save_path)
|
|
74
|
+
else:
|
|
75
|
+
split_labels = self.adata.obs[self.split_col].values
|
|
76
|
+
train_mask = split_labels == "train"
|
|
77
|
+
val_mask = split_labels == "val"
|
|
78
|
+
self.train_set = TensorDataset(X_tensor[train_mask], y_tensor[train_mask])
|
|
79
|
+
self.val_set = TensorDataset(X_tensor[val_mask], y_tensor[val_mask])
|
|
80
|
+
|
|
81
|
+
def train_dataloader(self):
|
|
82
|
+
return DataLoader(self.train_set, batch_size=self.batch_size, shuffle=True)
|
|
83
|
+
|
|
84
|
+
def val_dataloader(self):
|
|
85
|
+
return DataLoader(self.val_set, batch_size=self.batch_size)
|
|
86
|
+
|
|
87
|
+
def predict_dataloader(self):
|
|
88
|
+
if not self.inference_mode:
|
|
89
|
+
raise RuntimeError("predict_dataloader only available in inference mode.")
|
|
90
|
+
return DataLoader(self.infer_dataset, batch_size=self.batch_size)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .lightning_inference import run_lightning_inference
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
from pytorch_lightning import Trainer
|
|
5
|
+
|
|
6
|
+
def run_lightning_inference(
|
|
7
|
+
adata,
|
|
8
|
+
model,
|
|
9
|
+
datamodule,
|
|
10
|
+
label_col="labels",
|
|
11
|
+
prefix="model"
|
|
12
|
+
):
|
|
13
|
+
|
|
14
|
+
# Get class labels
|
|
15
|
+
if label_col in adata.obs and pd.api.types.is_categorical_dtype(adata.obs[label_col]):
|
|
16
|
+
class_labels = adata.obs[label_col].cat.categories.tolist()
|
|
17
|
+
else:
|
|
18
|
+
raise ValueError("label_col must be a categorical column in adata.obs")
|
|
19
|
+
|
|
20
|
+
# Run predictions
|
|
21
|
+
trainer = Trainer(accelerator="auto", devices=1, logger=False, enable_checkpointing=False)
|
|
22
|
+
preds = trainer.predict(model, datamodule=datamodule)
|
|
23
|
+
probs = torch.cat(preds, dim=0).cpu().numpy() # (N, C)
|
|
24
|
+
pred_class_idx = probs.argmax(axis=1)
|
|
25
|
+
pred_class_labels = [class_labels[i] for i in pred_class_idx]
|
|
26
|
+
pred_class_probs = probs[np.arange(len(probs)), pred_class_idx]
|
|
27
|
+
|
|
28
|
+
# Construct full prefix with label_col
|
|
29
|
+
full_prefix = f"{prefix}_{label_col}"
|
|
30
|
+
|
|
31
|
+
# Store predictions in obs
|
|
32
|
+
adata.obs[f"{full_prefix}_pred"] = pred_class_idx
|
|
33
|
+
adata.obs[f"{full_prefix}_pred_label"] = pd.Categorical(pred_class_labels, categories=class_labels)
|
|
34
|
+
adata.obs[f"{full_prefix}_pred_prob"] = pred_class_probs
|
|
35
|
+
|
|
36
|
+
# Per-class probabilities
|
|
37
|
+
for i, class_name in enumerate(class_labels):
|
|
38
|
+
adata.obs[f"{full_prefix}_prob_{class_name}"] = probs[:, i]
|
|
39
|
+
|
|
40
|
+
# Full probability matrix in obsm
|
|
41
|
+
adata.obsm[f"{full_prefix}_pred_prob_all"] = probs
|
|
@@ -4,4 +4,6 @@ from .cnn import CNNClassifier
|
|
|
4
4
|
from .rnn import RNNClassifier
|
|
5
5
|
from .transformer import BaseTransformer, TransformerClassifier, DANNTransformerClassifier, MaskedTransformerPretrainer
|
|
6
6
|
from .positional import PositionalEncoding
|
|
7
|
-
from .wrappers import ScaledModel
|
|
7
|
+
from .wrappers import ScaledModel
|
|
8
|
+
from .lightning_base import TorchClassifierWrapper
|
|
9
|
+
from .sklearn_models import SklearnModelWrapper
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import pytorch_lightning as pl
|
|
3
|
+
|
|
4
|
+
class TorchClassifierWrapper(pl.LightningModule):
|
|
5
|
+
def __init__(
|
|
6
|
+
self,
|
|
7
|
+
model: torch.nn.Module,
|
|
8
|
+
optimizer_cls=torch.optim.AdamW,
|
|
9
|
+
optimizer_kwargs=None,
|
|
10
|
+
criterion_cls=torch.nn.CrossEntropyLoss,
|
|
11
|
+
criterion_kwargs=None,
|
|
12
|
+
lr: float = 1e-3,
|
|
13
|
+
):
|
|
14
|
+
super().__init__()
|
|
15
|
+
self.model = model
|
|
16
|
+
self.save_hyperparameters(ignore=['model']) # logs all except actual model instance
|
|
17
|
+
self.optimizer_cls = optimizer_cls
|
|
18
|
+
self.optimizer_kwargs = optimizer_kwargs or {}
|
|
19
|
+
self.criterion = criterion_cls(**(criterion_kwargs or {}))
|
|
20
|
+
self.lr = lr
|
|
21
|
+
|
|
22
|
+
def forward(self, x):
|
|
23
|
+
return self.model(x)
|
|
24
|
+
|
|
25
|
+
def training_step(self, batch, batch_idx):
|
|
26
|
+
x, y = batch
|
|
27
|
+
logits = self(x)
|
|
28
|
+
loss = self.criterion(logits, y)
|
|
29
|
+
self.log("train_loss", loss, prog_bar=True)
|
|
30
|
+
return loss
|
|
31
|
+
|
|
32
|
+
def validation_step(self, batch, batch_idx):
|
|
33
|
+
x, y = batch
|
|
34
|
+
logits = self(x)
|
|
35
|
+
loss = self.criterion(logits, y)
|
|
36
|
+
acc = (logits.argmax(dim=1) == y).float().mean()
|
|
37
|
+
self.log_dict({"val_loss": loss, "val_acc": acc}, prog_bar=True)
|
|
38
|
+
return loss
|
|
39
|
+
|
|
40
|
+
def configure_optimizers(self):
|
|
41
|
+
return self.optimizer_cls(self.parameters(), lr=self.lr, **self.optimizer_kwargs)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
2
|
+
from sklearn.naive_bayes import GaussianNB
|
|
3
|
+
from sklearn.metrics import (
|
|
4
|
+
roc_curve, precision_recall_curve, auc, f1_score, confusion_matrix
|
|
5
|
+
)
|
|
6
|
+
from sklearn.utils.class_weight import compute_class_weight
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
class SklearnModelWrapper:
|
|
11
|
+
def __init__(self, model):
|
|
12
|
+
self.model = model
|
|
13
|
+
|
|
14
|
+
def fit(self, X_train, y_train):
|
|
15
|
+
self.model.fit(X_train, y_train)
|
|
16
|
+
|
|
17
|
+
def predict(self, X):
|
|
18
|
+
return self.model.predict(X)
|
|
19
|
+
|
|
20
|
+
def predict_proba(self, X):
|
|
21
|
+
return self.model.predict_proba(X)
|
|
22
|
+
|
|
23
|
+
def evaluate(self, X_test, y_test):
|
|
24
|
+
probs = self.predict_proba(X_test)[:, 1]
|
|
25
|
+
preds = self.predict(X_test)
|
|
26
|
+
|
|
27
|
+
fpr, tpr, _ = roc_curve(y_test, probs)
|
|
28
|
+
precision, recall, _ = precision_recall_curve(y_test, probs)
|
|
29
|
+
f1 = f1_score(y_test, preds)
|
|
30
|
+
auc_score = auc(fpr, tpr)
|
|
31
|
+
pr_auc = auc(recall, precision)
|
|
32
|
+
cm = confusion_matrix(y_test, preds)
|
|
33
|
+
pos_freq = np.mean(y_test == 1)
|
|
34
|
+
pr_auc_norm = pr_auc / pos_freq
|
|
35
|
+
|
|
36
|
+
return {
|
|
37
|
+
"fpr": fpr, "tpr": tpr, "precision": precision, "recall": recall,
|
|
38
|
+
"f1": f1, "auc": auc_score, "pr_auc": pr_auc,
|
|
39
|
+
"pr_auc_norm": pr_auc_norm, "confusion_matrix": cm
|
|
40
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .train_lightning_model import train_lightning_model
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
from pytorch_lightning import Trainer
|
|
3
|
+
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
|
|
4
|
+
|
|
5
|
+
def train_lightning_model(
|
|
6
|
+
model,
|
|
7
|
+
datamodule,
|
|
8
|
+
max_epochs=20,
|
|
9
|
+
patience=5,
|
|
10
|
+
monitor_metric="val_loss",
|
|
11
|
+
checkpoint_path=None,
|
|
12
|
+
):
|
|
13
|
+
# Device logic
|
|
14
|
+
if torch.cuda.is_available():
|
|
15
|
+
accelerator = "gpu"
|
|
16
|
+
devices = 1
|
|
17
|
+
elif torch.backends.mps.is_available():
|
|
18
|
+
accelerator = "mps"
|
|
19
|
+
devices = 1
|
|
20
|
+
else:
|
|
21
|
+
accelerator = "cpu"
|
|
22
|
+
devices = 1
|
|
23
|
+
|
|
24
|
+
# Callbacks
|
|
25
|
+
callbacks = [
|
|
26
|
+
EarlyStopping(monitor=monitor_metric, patience=patience, mode="min"),
|
|
27
|
+
]
|
|
28
|
+
if checkpoint_path:
|
|
29
|
+
callbacks.append(ModelCheckpoint(
|
|
30
|
+
dirpath=checkpoint_path,
|
|
31
|
+
filename="{epoch}-{val_loss:.4f}",
|
|
32
|
+
monitor=monitor_metric,
|
|
33
|
+
save_top_k=1,
|
|
34
|
+
mode="min",
|
|
35
|
+
))
|
|
36
|
+
|
|
37
|
+
# Trainer setup
|
|
38
|
+
trainer = Trainer(
|
|
39
|
+
max_epochs=max_epochs,
|
|
40
|
+
callbacks=callbacks,
|
|
41
|
+
accelerator=accelerator,
|
|
42
|
+
devices=devices,
|
|
43
|
+
log_every_n_steps=10,
|
|
44
|
+
)
|
|
45
|
+
trainer.fit(model, datamodule=datamodule)
|
|
46
|
+
|
|
47
|
+
return trainer
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.1.6"
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
from .preprocessing import random_fill_nans
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/binarize_converted_base_identities.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_read_features_from_bam.py
RENAMED
|
File without changes
|
{smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_read_lengths_from_bed.py
RENAMED
|
File without changes
|
{smftools-0.1.6 → smftools-0.1.7}/smftools/informatics/helpers/extract_readnames_from_BAM.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/calculate_pairwise_hamming_distances.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{smftools-0.1.6 → smftools-0.1.7}/smftools/preprocessing/filter_converted_reads_on_methylation.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{smftools-0.1.6 → smftools-0.1.7}/smftools/tools/archived/classify_non_methylated_features.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|