smftools 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {smftools-0.3.1 → smftools-0.3.2}/.github/workflows/ci.yml +6 -6
- {smftools-0.3.1 → smftools-0.3.2}/.gitignore +3 -0
- smftools-0.3.2/AGENTS.md +172 -0
- smftools-0.3.2/CLAUDE.md +3 -0
- {smftools-0.3.1 → smftools-0.3.2}/PKG-INFO +3 -1
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/basic_usage.md +38 -4
- smftools-0.3.2/docs/source/tutorials/cli_usage.md +136 -0
- {smftools-0.3.1 → smftools-0.3.2}/pyproject.toml +2 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/_version.py +1 -1
- smftools-0.3.2/smftools/cli/chimeric_adata.py +1563 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/cli/helpers.py +18 -2
- {smftools-0.3.1 → smftools-0.3.2}/smftools/cli/hmm_adata.py +18 -1
- smftools-0.3.2/smftools/cli/latent_adata.py +773 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/cli/load_adata.py +2 -2
- {smftools-0.3.1 → smftools-0.3.2}/smftools/cli/preprocess_adata.py +32 -93
- smftools-0.3.2/smftools/cli/recipes.py +26 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/cli/spatial_adata.py +23 -109
- smftools-0.3.2/smftools/cli/variant_adata.py +423 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/cli_entry.py +41 -5
- {smftools-0.3.1 → smftools-0.3.2}/smftools/config/conversion.yaml +0 -10
- {smftools-0.3.1 → smftools-0.3.2}/smftools/config/deaminase.yaml +3 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/config/default.yaml +49 -13
- {smftools-0.3.1 → smftools-0.3.2}/smftools/config/experiment_config.py +96 -3
- {smftools-0.3.1 → smftools-0.3.2}/smftools/constants.py +4 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/hmm/call_hmm_peaks.py +1 -1
- smftools-0.3.2/smftools/informatics/binarize_converted_base_identities.py +99 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/converted_BAM_to_adata.py +53 -13
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/h5ad_functions.py +83 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/modkit_extract_to_adata.py +4 -0
- smftools-0.3.2/smftools/plotting/__init__.py +56 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/plotting/autocorrelation_plotting.py +22 -4
- smftools-0.3.2/smftools/plotting/chimeric_plotting.py +1893 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/plotting/classifiers.py +28 -14
- smftools-0.3.2/smftools/plotting/general_plotting.py +64 -0
- smftools-0.3.2/smftools/plotting/hmm_plotting.py +1961 -0
- smftools-0.3.2/smftools/plotting/latent_plotting.py +804 -0
- smftools-0.3.2/smftools/plotting/plotting_utils.py +243 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/plotting/position_stats.py +16 -8
- smftools-0.3.2/smftools/plotting/preprocess_plotting.py +281 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/plotting/qc_plotting.py +8 -3
- smftools-0.3.2/smftools/plotting/spatial_plotting.py +1134 -0
- smftools-0.3.2/smftools/plotting/variant_plotting.py +1231 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/__init__.py +3 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/append_base_context.py +1 -1
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/append_mismatch_frequency_sites.py +35 -6
- smftools-0.3.2/smftools/preprocessing/append_sequence_mismatch_annotations.py +171 -0
- smftools-0.3.2/smftools/preprocessing/append_variant_call_layer.py +480 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/flag_duplicate_reads.py +4 -4
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/invert_adata.py +1 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/readwrite.py +109 -85
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/__init__.py +6 -0
- smftools-0.3.2/smftools/tools/calculate_knn.py +121 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/calculate_nmf.py +18 -7
- smftools-0.3.2/smftools/tools/calculate_pca.py +180 -0
- smftools-0.3.2/smftools/tools/calculate_umap.py +102 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/position_stats.py +4 -4
- smftools-0.3.2/smftools/tools/rolling_nn_distance.py +872 -0
- smftools-0.3.2/smftools/tools/sequence_alignment.py +140 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/tensor_factorization.py +52 -4
- smftools-0.3.2/tests/conftest.py +13 -0
- smftools-0.3.2/tests/unit/test_annotate_zero_hamming_segments_parent_layer.py +122 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_append_mismatch_frequency_sites.py +12 -2
- smftools-0.3.2/tests/unit/test_append_reference_strand_quality_stats.py +78 -0
- smftools-0.3.2/tests/unit/test_append_sequence_mismatch_annotations.py +27 -0
- smftools-0.3.2/tests/unit/test_append_variant_segment_layer.py +94 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_calculate_nmf.py +22 -0
- smftools-0.3.2/tests/unit/test_chimeric_adata_mod_hamming_flag.py +24 -0
- smftools-0.3.2/tests/unit/test_chimeric_adata_span_layer.py +72 -0
- smftools-0.3.2/tests/unit/test_chimeric_adata_top_segments.py +36 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_combined_hmm_length_clustermap_barplot.py +1 -1
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_combined_raw_clustermap_barplot_nan_ignore.py +3 -3
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_hmm_clustermap_colormap.py +2 -1
- smftools-0.3.2/tests/unit/test_hmm_variant_overlay_index_mapping.py +86 -0
- smftools-0.3.2/tests/unit/test_latent_adata_var_filters.py +88 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_length_layer_subclass_mapping.py +2 -1
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_methylation_fraction_nan_handling.py +2 -1
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_plot_cp_sequence_components.py +28 -0
- smftools-0.3.2/tests/unit/test_plot_hamming_span_trio.py +65 -0
- smftools-0.3.2/tests/unit/test_plot_mismatch_base_frequency_by_position.py +196 -0
- smftools-0.3.2/tests/unit/test_plot_pca_components.py +27 -0
- smftools-0.3.2/tests/unit/test_plot_rolling_nn_and_layer.py +121 -0
- smftools-0.3.2/tests/unit/test_plot_rolling_nn_and_two_layers.py +74 -0
- smftools-0.3.2/tests/unit/test_plot_sequence_integer_encoding_clustermaps.py +82 -0
- smftools-0.3.2/tests/unit/test_plot_variant_segment_clustermaps.py +66 -0
- smftools-0.3.2/tests/unit/test_plot_zero_hamming_pair_counts.py +45 -0
- smftools-0.3.1/tests/unit/test_plot_rolling_nn_and_layer.py → smftools-0.3.2/tests/unit/test_plot_zero_hamming_span_and_layer.py +20 -12
- smftools-0.3.2/tests/unit/test_rolling_nn_distance.py +368 -0
- smftools-0.3.2/tests/unit/test_tensor_factorization.py +151 -0
- smftools-0.3.2/tests/unit/test_variant_adata_overlay_config_forwarding.py +93 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/tools/test_calculate_umap.py +49 -8
- smftools-0.3.2/tests/unit/tools/test_sequence_alignment.py +39 -0
- smftools-0.3.1/AGENTS.md +0 -69
- smftools-0.3.1/docs/source/tutorials/cli_usage.md +0 -91
- smftools-0.3.1/smftools/cli/latent_adata.py +0 -318
- smftools-0.3.1/smftools/informatics/binarize_converted_base_identities.py +0 -186
- smftools-0.3.1/smftools/plotting/__init__.py +0 -42
- smftools-0.3.1/smftools/plotting/general_plotting.py +0 -3368
- smftools-0.3.1/smftools/plotting/hmm_plotting.py +0 -377
- smftools-0.3.1/smftools/tools/calculate_umap.py +0 -186
- smftools-0.3.1/smftools/tools/rolling_nn_distance.py +0 -235
- smftools-0.3.1/tests/unit/test_plot_sequence_integer_encoding_clustermaps.py +0 -40
- smftools-0.3.1/tests/unit/test_rolling_nn_distance.py +0 -232
- smftools-0.3.1/tests/unit/test_tensor_factorization.py +0 -65
- {smftools-0.3.1 → smftools-0.3.2}/.gitattributes +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/.pre-commit-config.yaml +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/.readthedocs.yaml +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/CONTRIBUTING.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/LICENSE +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/README.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/Makefile +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/make.bat +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/_static/converted_BAM_to_adata.png +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/_static/modkit_extract_to_adata.png +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/_static/smftools-1.svg +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/_static/smftools-1.tif +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/_static/smftools_informatics_diagram.pdf +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/_static/smftools_informatics_diagram.png +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/_static/smftools_preprocessing_diagram.png +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/_templates/tmp +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/api/datasets.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/api/index.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/api/informatics.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/api/preprocessing.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/api/tools.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/cli.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/conf.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/contributors.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/dev/index.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/index.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/installation.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/references.bib +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/references.rst +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/release-notes/0.1.0.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/release-notes/0.1.1.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/release-notes/0.1.6.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/release-notes/0.2.1.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/release-notes/0.2.3.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/release-notes/0.3.0.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/release-notes/index.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/requirements.txt +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/schema/anndata_schema.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/tutorials/experiment_config.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/docs/source/tutorials/index.md +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/experiment_config.csv +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/notebooks/Kissiov_and_McKenna_2025_example_notebook.ipynb +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/notebooks/Kissiov_and_McKenna_2025_sample_sheet.csv +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/requirements.txt +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/sample_sheet.csv +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/_settings.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/cli/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/cli/archived/cli_flows.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/config/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/config/direct.yaml +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/config/discover_input_files.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/datasets/F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/datasets/F1_sample_sheet.csv +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/datasets/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/datasets/dCas9_m6A_invitro_kinetics.h5ad.gz +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/datasets/datasets.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/hmm/HMM.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/hmm/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/hmm/archived/apply_hmm_batched.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/hmm/archived/calculate_distances.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/hmm/archived/call_hmm_peaks.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/hmm/archived/train_hmm.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/hmm/display_hmm.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/hmm/hmm_readwrite.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/hmm/nucleosome_hmm_refinement.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/bam_conversion.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/bam_direct.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/basecall_pod5s.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/basecalls_to_adata.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/conversion_smf.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/deaminase_smf.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/direct_smf.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/fast5_to_pod5.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/bam_qc.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/canoncall.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/count_aligned_reads.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/extract_base_identities.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/extract_mods.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/find_conversion_sites.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/get_native_references.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/index_fasta.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/informatics.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/load_adata.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/make_modbed.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/modQC.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/modcall.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/ohe_batching.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/one_hot_decode.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/one_hot_encode.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/print_bam_query_seq.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/subsample_fasta_from_bed.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/archived/subsample_pod5.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/bam_functions.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/basecalling.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/bed_functions.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/complement_base_list.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/fasta_functions.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/modkit_functions.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/ohe.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/pod5_functions.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/run_multiqc.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/informatics/sequence_encoding.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/logging_utils.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/data/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/data/anndata_data_module.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/data/preprocessing.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/evaluation/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/evaluation/eval_utils.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/evaluation/evaluators.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/inference/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/inference/inference_utils.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/inference/lightning_inference.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/inference/sklearn_inference.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/inference/sliding_window_inference.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/models/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/models/base.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/models/cnn.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/models/lightning_base.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/models/mlp.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/models/positional.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/models/rnn.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/models/sklearn_models.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/models/transformer.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/models/wrappers.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/training/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/training/train_lightning_model.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/training/train_sklearn_model.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/utils/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/utils/device.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/machine_learning/utils/grl.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/metadata.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/optional_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/append_binary_layer_by_base_context.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/archived/add_read_length_and_mapping_qc.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/archived/calculate_complexity.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/archived/mark_duplicates.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/archived/preprocessing.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/archived/remove_duplicates.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/binarize.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/binarize_on_Youden.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/binary_layers_to_ohe.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/calculate_complexity_II.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/calculate_consensus.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/calculate_coverage.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/calculate_pairwise_differences.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/calculate_pairwise_hamming_distances.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/calculate_position_Youden.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/calculate_read_length_stats.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/calculate_read_modification_stats.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/clean_NaN.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/filter_adata_by_nan_proportion.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/filter_reads_on_length_quality_mapping.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/filter_reads_on_modification_thresholds.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/load_sample_sheet.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/make_dirs.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/min_non_diagonal.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/recipes.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/reindex_references_adata.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/preprocessing/subsample_adata.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/schema/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/schema/anndata_schema_v1.yaml +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/archived/apply_hmm.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/archived/classifiers.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/archived/classify_methylated_features.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/archived/classify_non_methylated_features.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/archived/subset_adata_v1.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/archived/subset_adata_v2.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/calculate_leiden.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/cluster_adata_on_methylation.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/general_tools.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/read_stats.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/spatial_autocorrelation.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/smftools/tools/subset_adata.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/_test_inputs/_test_bed_I.bed +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/_test_inputs/_test_fasta_I.fa +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/_test_inputs/_test_fasta_I.fa.fai +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/_test_inputs/_test_pod5_I.pod5 +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/_test_inputs/test_experiment_config_conversion_I.csv +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/_test_inputs/test_experiment_config_deaminase_I.csv +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/_test_inputs/test_experiment_config_direct_I.csv +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/e2e/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/e2e/cli/test_load_adata.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/e2e/cli/test_spatial_adata.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/integration/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/cli/test_cli_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/config/test_config_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/datasets/test_datasets_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/hmm/test_hmm_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/import_helpers.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/informatics/test_informatics_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/machine_learning/data/test_data_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/machine_learning/evaluation/test_evaluation_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/machine_learning/inference/test_inference_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/machine_learning/models/test_models_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/machine_learning/training/test_training_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/machine_learning/utils/test_utils_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/plotting/test_plotting_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/preprocessing/test_preprocessing_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/test_smftools_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/smoke/tools/test_tools_imports.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/__init__.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/config/test_LoadExperimentConfig.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/datasets/test_datasets.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/hmm/test_mask_read_span.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/informatics/test_bam_base_identities.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/informatics/test_bam_read_tags.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/informatics/test_bam_secondary_supplementary.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/informatics/test_h5ad_secondary_supplementary.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/informatics/test_modkit_sequence_batch_files.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/informatics/test_modkit_sequence_encoding.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/informatics/test_tool_backends.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_combined_hmm_length_clustermap_outputs.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_combined_hmm_raw_clustermap_nan_fill.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_combined_raw_clustermap_nan_fill.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_metadata.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_plot_hmm_size_contours_feature_ranges.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_plot_hmm_size_contours_nan_values.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_plot_nmf_components.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_plot_read_span_quality_clustermaps.py +0 -0
- {smftools-0.3.1 → smftools-0.3.2}/tests/unit/test_readwrite.py +0 -0
|
@@ -2,9 +2,9 @@ name: CI
|
|
|
2
2
|
|
|
3
3
|
on:
|
|
4
4
|
push:
|
|
5
|
-
branches: ["main"
|
|
5
|
+
branches: ["main"]
|
|
6
6
|
pull_request:
|
|
7
|
-
branches: ["main"
|
|
7
|
+
branches: ["main"]
|
|
8
8
|
|
|
9
9
|
concurrency:
|
|
10
10
|
group: ${{ github.workflow }}-${{ github.ref }}
|
|
@@ -49,7 +49,7 @@ jobs:
|
|
|
49
49
|
- name: Lint with ruff
|
|
50
50
|
run: ruff check --output-format=github .
|
|
51
51
|
|
|
52
|
-
|
|
52
|
+
pytest:
|
|
53
53
|
runs-on: ubuntu-latest
|
|
54
54
|
strategy:
|
|
55
55
|
fail-fast: false
|
|
@@ -74,9 +74,9 @@ jobs:
|
|
|
74
74
|
- name: Install dependencies
|
|
75
75
|
run: |
|
|
76
76
|
python -m pip install --upgrade pip
|
|
77
|
-
python -m pip install .[dev]
|
|
78
|
-
- name: Run
|
|
79
|
-
run: pytest -m smoke -q
|
|
77
|
+
python -m pip install .[dev,torch,plotting]
|
|
78
|
+
- name: Run pytest
|
|
79
|
+
run: pytest -m "smoke" -q
|
|
80
80
|
|
|
81
81
|
docs:
|
|
82
82
|
runs-on: ubuntu-latest
|
smftools-0.3.2/AGENTS.md
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# AGENTS.md
|
|
2
|
+
|
|
3
|
+
This file tells coding agents (including OpenAI's Codex, Anthropic's Claude Code, and Google's Gemini) how to work in this repo.
|
|
4
|
+
|
|
5
|
+
- For AGENTS.md or CLAUDE.md files:
|
|
6
|
+
- Agents can read from these files.
|
|
7
|
+
- Agents can never edit these files.
|
|
8
|
+
|
|
9
|
+
## Goals
|
|
10
|
+
- Make minimal, correct changes.
|
|
11
|
+
- Prefer small PRs / diffs.
|
|
12
|
+
- Keep behavior stable unless the task explicitly requests changes.
|
|
13
|
+
- Generate production grade, scalable code.
|
|
14
|
+
|
|
15
|
+
## Prompt interface
|
|
16
|
+
- When asked about a problem or task, first read all files relevent to the task's scope.
|
|
17
|
+
- Describe the problem given the context.
|
|
18
|
+
- Formulate a plan to address the problem within scope.
|
|
19
|
+
- Refine the plan with user input.
|
|
20
|
+
- Implement code after being told to proceed.
|
|
21
|
+
|
|
22
|
+
## Repo orientation
|
|
23
|
+
- Read existing patterns before inventing new ones.
|
|
24
|
+
- Don’t refactor broadly unless asked.
|
|
25
|
+
- If you’re unsure about intended behavior, look for tests or docs first.
|
|
26
|
+
- If behavior is not clear after reading tests and docs, look at the Click commands section in this file.
|
|
27
|
+
- Ignore all files in any directory named "archived".
|
|
28
|
+
- User defined parameters exist within src/smftools/config.
|
|
29
|
+
- Parameters are herited from default.yaml -> MODALITY.yaml -> user_defined_config.csv
|
|
30
|
+
- Frequently used non user defined variables should exist within src/smftools/constants.py
|
|
31
|
+
- Logging functionality is defined within src/smftools/logging_utils.py
|
|
32
|
+
- Optional dependency handling is defined within src/smftools/optional_imports.py
|
|
33
|
+
- Frequently used I/O functionality is defined within src/smftools/readwrite.py
|
|
34
|
+
- CLI functionality is provided through click and is defined within:
|
|
35
|
+
- src/smftools/cli_entry.py
|
|
36
|
+
- Modules of the src/smtools/cli subpackage
|
|
37
|
+
- RTD documentation organization through smftools/docs
|
|
38
|
+
- Pytest testing within smftools/tests
|
|
39
|
+
|
|
40
|
+
## Project dependencies
|
|
41
|
+
- A core set of dependencies is required for the project.
|
|
42
|
+
- Various optional dependencies are provided for:
|
|
43
|
+
- Optional functional modules of the package (ont, plotting, ml-base, ml-extended, umap, qc)
|
|
44
|
+
- If available, a Python version of a CLI tool is preferred (Such as for Samtools, Bedtools, BedGraphToBigWig).
|
|
45
|
+
- torch is listed as an extra dependency, but is currently required.
|
|
46
|
+
- All dependencies can be installed with `pip install -e ".[all]"`
|
|
47
|
+
- Certain command line tools are currently needed for certain functionalities within smftools load:
|
|
48
|
+
- dorado: Used for nanopore basecalling from POD5/FAST5 files to BAM.
|
|
49
|
+
- dorado/minimap2: Used for alignment of reads to reference.
|
|
50
|
+
- dorado: Used for demultiplexing of nanopore derived BAMs.
|
|
51
|
+
- modkit: Used for extracting modification probabilities from MM/ML BAM tags for native smf modality.
|
|
52
|
+
|
|
53
|
+
## Setup
|
|
54
|
+
- Use current environment if the core dependencies are installed.
|
|
55
|
+
- If dependencies are not found, create a venv in smftools/venvs/ directory:
|
|
56
|
+
- `python3 -m venv .temp-venv && source .temp-venv/bin/activate`
|
|
57
|
+
- Install the core dependencies and development dependencies for testing/formatting/linting:
|
|
58
|
+
- `pip install -e ".[dev,torch]"`
|
|
59
|
+
- If code is raising dependencies errors and they are in the optional dependencies:
|
|
60
|
+
- `pip install -e ".[EXTRA_DEPENDENCY_NAME]"`
|
|
61
|
+
|
|
62
|
+
## How to run checks
|
|
63
|
+
- Smoke tests: `pytest -m smoke -q`
|
|
64
|
+
- Unit tests: `pytest -m unit -q`
|
|
65
|
+
- Integration tests: `pytest -m integration -q`
|
|
66
|
+
- E2E tests: `pytest -m e2e -q`
|
|
67
|
+
- Coverage (if configured): `pytest --cov`
|
|
68
|
+
- Lint: `ruff check .`
|
|
69
|
+
- Format: `ruff format .`
|
|
70
|
+
- Type-check (if configured): `mypy .`
|
|
71
|
+
|
|
72
|
+
## Coding conventions
|
|
73
|
+
- Follow existing style and module layout.
|
|
74
|
+
- Prefer clear, explicit code over cleverness.
|
|
75
|
+
- Prefer modular functionality to facilitate testing and future development.
|
|
76
|
+
- Do not over-parametize functions when possible.
|
|
77
|
+
- For function parameters that a user may want to tune, use the config management strategy.
|
|
78
|
+
- Use constants.py when appropriate.
|
|
79
|
+
- Annotate code blocks to describe functionality.
|
|
80
|
+
- Add/adjust tests for bug fixes and new behavior.
|
|
81
|
+
- Keep public APIs backward compatible unless explicitly changing them.
|
|
82
|
+
- Python:
|
|
83
|
+
- Use type hints for new/modified functions where reasonable.
|
|
84
|
+
- Use Google style docstring format.
|
|
85
|
+
- Avoid heavy dependencies unless necessary.
|
|
86
|
+
- Use typing.TYPE_CHECKING and annotations.
|
|
87
|
+
- In docstring of new functions, define the purpose of the function and what it does.
|
|
88
|
+
|
|
89
|
+
## Testing expectations
|
|
90
|
+
- New functionality must include tests.
|
|
91
|
+
- If tests are flaky or slow, note it and scope the change.
|
|
92
|
+
|
|
93
|
+
## Logging & secrets
|
|
94
|
+
- Don’t log secrets, tokens, or PII.
|
|
95
|
+
- Never hardcode credentials.
|
|
96
|
+
- If sample keys are needed, use obvious placeholders like `YOUR_API_KEY_HERE`.
|
|
97
|
+
|
|
98
|
+
## Git / PR hygiene
|
|
99
|
+
- Keep commits focused.
|
|
100
|
+
- Update docs/changelog if behavior or user-facing CLI changes.
|
|
101
|
+
- If you change a CLI flag or config schema, add a migration note.
|
|
102
|
+
|
|
103
|
+
## If something fails
|
|
104
|
+
- If a command fails, paste the full error and summarize likely causes.
|
|
105
|
+
- Don’t “fix” by deleting tests or weakening assertions unless explicitly instructed.
|
|
106
|
+
|
|
107
|
+
## Click commands and their primary intent. Look in docs first, and underneath if the task is still not clear.
|
|
108
|
+
- smftools load:
|
|
109
|
+
- Take a variety of raw sequencing input options (FASTQs, POD5s, BAMs) from a single molecule footprinting experiment.
|
|
110
|
+
- Determine the smf modality specified by the user (conversion, deaminase, native).
|
|
111
|
+
- Handle FASTA inputs
|
|
112
|
+
- Basecall the files using dorado if needed.
|
|
113
|
+
- Align the reads using dorado or minimap2.
|
|
114
|
+
- Sort/Index/Demultiplex BAMs.
|
|
115
|
+
- BAM QC.
|
|
116
|
+
- Extract Base modification probabilities for native smf modality
|
|
117
|
+
- Load an AnnData object containing:
|
|
118
|
+
- adata.X with a read X position matrix of SMF data.
|
|
119
|
+
- adata.layers with:
|
|
120
|
+
- integer encoded DNA sequences of each read.
|
|
121
|
+
- mismatch encodings of DNA sequence vs reference for each read.
|
|
122
|
+
- Base Q-scores for each read.
|
|
123
|
+
- Read span masks indicating where the read aligned.
|
|
124
|
+
- adata.var with per Reference_strand FASTA bases across positions.
|
|
125
|
+
- adata.var_names being positional indexes within each read.
|
|
126
|
+
- adata.obs_names being read names.
|
|
127
|
+
- adata.obs with read level metadata
|
|
128
|
+
- adata.uns with various unstructured data metrics.
|
|
129
|
+
- Run multiqc on the BAM qc files.
|
|
130
|
+
- Directory temp file cleanup.
|
|
131
|
+
- Write out the adata, it's backup accessory data, and csv files of obs, var, and keys.
|
|
132
|
+
- smftools preprocess:
|
|
133
|
+
- Requires the adata produced by smftools load.
|
|
134
|
+
- Adds various QC metrics and performs data preprocessing and filtering.
|
|
135
|
+
- Read length, quality, and mapping based QC.
|
|
136
|
+
- Per reference position level QC.
|
|
137
|
+
- Appending base context for each reference.
|
|
138
|
+
- Binarization of SMF probabilities for the native smf modality
|
|
139
|
+
- NaN filling strategies in adata.layers.
|
|
140
|
+
- Read level modification QC and filtering.
|
|
141
|
+
- Duplicate detection and complexity analysis for conversion/deaminase modalities.
|
|
142
|
+
- Visualizing read spans and base quality clustermaps.
|
|
143
|
+
- Optionally inverts the adata along the var-axis.
|
|
144
|
+
- Optionally reindexes var.
|
|
145
|
+
- smftools variant:
|
|
146
|
+
- Requires at least a preprocessed adata object.
|
|
147
|
+
- Calculates per position mismatch frequencies/types for each reference/sample.
|
|
148
|
+
- Optional variant site labeling if comparing two references.
|
|
149
|
+
- Visualized sequence encodings and mismatch encodings with clustermaps.
|
|
150
|
+
- smftools chimeric:
|
|
151
|
+
- Requires at least a preprocessed adata object.
|
|
152
|
+
- Meant to detect putative PCR chimeras.
|
|
153
|
+
- smftools spatial:
|
|
154
|
+
- Requires at least a preprocessed adata object.
|
|
155
|
+
- Basic spatial signal analyses.
|
|
156
|
+
- Clustermaps to visualize smf signal per reference/sample.
|
|
157
|
+
- Spatial autocorrelation.
|
|
158
|
+
- Position x position correlation matrices (Pearson, Binary covariance, chi2, relative risk)
|
|
159
|
+
- smftools hmm:
|
|
160
|
+
- Requires at least a preprocessed adata object.
|
|
161
|
+
- Fits/saves/applies HMM to adata to label putative molecular features.
|
|
162
|
+
- Creates adata.layers that hold binary masks of each feature class/subclass.
|
|
163
|
+
- Creates adata.layers that hold HMM emission probabilities.
|
|
164
|
+
- Visualizes HMM layers with clustermaps.
|
|
165
|
+
- Performs peak calling on HMM layers and labels reads with the features in obs.
|
|
166
|
+
- smftools latent:
|
|
167
|
+
- Requires at least a preprocessed adata object.
|
|
168
|
+
- Generates latent representations of the smf data.
|
|
169
|
+
- PCA/KNN/UMAP/NMF/CP decomposition strategies.
|
|
170
|
+
- Represents full sequences.
|
|
171
|
+
- Represents modified sites only.
|
|
172
|
+
- Represents non-modified sites only.
|
smftools-0.3.2/CLAUDE.md
ADDED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: smftools
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Single Molecule Footprinting Analysis in Python.
|
|
5
5
|
Project-URL: Source, https://github.com/jkmckenna/smftools
|
|
6
6
|
Project-URL: Documentation, https://smftools.readthedocs.io/
|
|
@@ -55,6 +55,7 @@ Provides-Extra: all
|
|
|
55
55
|
Requires-Dist: captum; extra == 'all'
|
|
56
56
|
Requires-Dist: fastcluster; extra == 'all'
|
|
57
57
|
Requires-Dist: hydra-core; extra == 'all'
|
|
58
|
+
Requires-Dist: igraph; extra == 'all'
|
|
58
59
|
Requires-Dist: leidenalg; extra == 'all'
|
|
59
60
|
Requires-Dist: lightning; extra == 'all'
|
|
60
61
|
Requires-Dist: matplotlib>=3.6.2; extra == 'all'
|
|
@@ -69,6 +70,7 @@ Requires-Dist: pysam>=0.19.1; extra == 'all'
|
|
|
69
70
|
Requires-Dist: scikit-learn>=1.0.2; extra == 'all'
|
|
70
71
|
Requires-Dist: seaborn>=0.11; extra == 'all'
|
|
71
72
|
Requires-Dist: shap; extra == 'all'
|
|
73
|
+
Requires-Dist: tensorly; extra == 'all'
|
|
72
74
|
Requires-Dist: torch>=1.9.0; extra == 'all'
|
|
73
75
|
Requires-Dist: umap-learn>=0.5.5; extra == 'all'
|
|
74
76
|
Requires-Dist: upsetplot; extra == 'all'
|
|
@@ -17,7 +17,7 @@ This command takes a user passed config file handling:
|
|
|
17
17
|
|
|
18
18
|
## Preprocess Usage
|
|
19
19
|
|
|
20
|
-
This command performs preprocessing on the anndata object.
|
|
20
|
+
This command performs preprocessing on the anndata object.
|
|
21
21
|
|
|
22
22
|
```shell
|
|
23
23
|
smftools preprocess "/Path_to_experiment_config.csv"
|
|
@@ -25,19 +25,36 @@ smftools preprocess "/Path_to_experiment_config.csv"
|
|
|
25
25
|
|
|
26
26
|

|
|
27
27
|
|
|
28
|
+
|
|
29
|
+
## Variant Usage
|
|
30
|
+
|
|
31
|
+
This command performs DNA sequence variation based analyses on the anndata object.
|
|
32
|
+
|
|
33
|
+
```shell
|
|
34
|
+
smftools variant "/Path_to_experiment_config.csv"
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Chimeric Usage
|
|
38
|
+
|
|
39
|
+
This command performs putative PCR chimera detection on the anndata object.
|
|
40
|
+
|
|
41
|
+
```shell
|
|
42
|
+
smftools chimeric "/Path_to_experiment_config.csv"
|
|
43
|
+
```
|
|
44
|
+
|
|
28
45
|
## Spatial Usage
|
|
29
46
|
|
|
30
|
-
This command performs spatial analysis on the anndata object.
|
|
47
|
+
This command performs spatial analysis on the anndata object.
|
|
31
48
|
|
|
32
49
|
```shell
|
|
33
50
|
smftools spatial "/Path_to_experiment_config.csv"
|
|
34
51
|
```
|
|
35
52
|
|
|
36
|
-
- Currently Includes: Position X Position correlation matrices,
|
|
53
|
+
- Currently Includes: Position X Position correlation matrices, read x position clustermaps, and spatial autocorrelation.
|
|
37
54
|
|
|
38
55
|
## HMM Usage
|
|
39
56
|
|
|
40
|
-
This command performs hmm based feature annotation on the anndata object.
|
|
57
|
+
This command performs hmm based feature annotation on the anndata object.
|
|
41
58
|
|
|
42
59
|
```shell
|
|
43
60
|
smftools hmm "/Path_to_experiment_config.csv"
|
|
@@ -45,6 +62,23 @@ smftools hmm "/Path_to_experiment_config.csv"
|
|
|
45
62
|
|
|
46
63
|
- Main outputs wills be stored in adata.layers
|
|
47
64
|
|
|
65
|
+
|
|
66
|
+
## Latent Usage
|
|
67
|
+
|
|
68
|
+
This command constructs various latent representations of the anndata object.
|
|
69
|
+
|
|
70
|
+
```shell
|
|
71
|
+
smftools latent "/Path_to_experiment_config.csv"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Full Usage
|
|
75
|
+
|
|
76
|
+
This command is a wrapper that sequentially runs load, preprocess, variant, chimeric, spatial, hmm, latent workflows.
|
|
77
|
+
|
|
78
|
+
```shell
|
|
79
|
+
smftools full "/Path_to_experiment_config.csv"
|
|
80
|
+
```
|
|
81
|
+
|
|
48
82
|
## Batch Usage
|
|
49
83
|
|
|
50
84
|
This command performs batch processing of any of the above commands across multiple experiments. It takes in a tsv, txt, or csv of experiment specific config csvs.
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# Command line tutorials
|
|
2
|
+
|
|
3
|
+
## Quick start
|
|
4
|
+
|
|
5
|
+
Most CLI workflows start with an experiment configuration CSV that points to your data, FASTA, and
|
|
6
|
+
output directory. Once the configuration is ready, you can run commands such as:
|
|
7
|
+
|
|
8
|
+
```shell
|
|
9
|
+
smftools load /path/to/experiment_config.csv
|
|
10
|
+
smftools preprocess /path/to/experiment_config.csv
|
|
11
|
+
smftools full /path/to/experiment_config.csv
|
|
12
|
+
smftools batch full /path/to/config_paths.csv
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Each command will create (or reuse) stage-specific AnnData files in the output directory. Later
|
|
16
|
+
commands reuse results from earlier stages unless you explicitly force a redo via configuration
|
|
17
|
+
flags.
|
|
18
|
+
|
|
19
|
+
## What each command does
|
|
20
|
+
|
|
21
|
+
### `smftools load`
|
|
22
|
+
|
|
23
|
+
The load command builds the raw AnnData object from your raw sequencing data. It:
|
|
24
|
+
|
|
25
|
+
- Handles input formats (fast5/pod5/fastq/bam).
|
|
26
|
+
- Performs basecalling, alignment, demultiplexing, and BAM QC.
|
|
27
|
+
- Optionally generates BED/bigWig outputs for alignment summaries.
|
|
28
|
+
- Constructs the raw AnnData object (Single molecules x Positional coordinates).
|
|
29
|
+
- adata.X contains binarized modification data (conversion/deaminase), or modification probabilitiesc (native).
|
|
30
|
+
- Adds basic read-level QC annotations (Read start, end, length, mean quality).
|
|
31
|
+
- Adds layers encoding read DNA sequences, base quality scores, base mismatches.
|
|
32
|
+
- Maintains BAM Tags/Flags in adata.obs.
|
|
33
|
+
- Writes the raw AnnData to the canonical output path and runs MultiQC.
|
|
34
|
+
- Optionally deletes intermediate BAMs, H5ADs, and TSVs.
|
|
35
|
+
|
|
36
|
+
### `smftools preprocess`
|
|
37
|
+
|
|
38
|
+
The preprocess command performs QC, binarization, filtering, and duplicate detection. It:
|
|
39
|
+
|
|
40
|
+
- Requires an Anndata created by smftools load.
|
|
41
|
+
- Loads sample sheet metadata (if provided).
|
|
42
|
+
- Generates read length/quality QC plots and filters reads on these metrics.
|
|
43
|
+
- Binarizes direct-modification calls based on thresholds (hard or fit thresholds).
|
|
44
|
+
- Cleans NaNs from adata.X and stores in adata.layers (nan0_0minus1, nan_half).
|
|
45
|
+
- Computes positional coverage and base-context annotations (GpC, CpG, ambiguous, other C, any C).
|
|
46
|
+
- Calculates read modification statistics and QC plots.
|
|
47
|
+
- Filters reads based on modification thresholds.
|
|
48
|
+
- Adds base-context binary modification layers.
|
|
49
|
+
- Optionally inverts and reindexes the data along the var (positions) axis.
|
|
50
|
+
- Flags duplicate reads based on nearest neighbor hamming distance of overlapping valid sites (Conversion/deamination).
|
|
51
|
+
- Performs complexity analyses using duplicate read clusters and Lander/Waterman fits (conversion/deamination workflows).
|
|
52
|
+
- Visualizes read span masks and base quality scores with clustermaps.
|
|
53
|
+
- Writes preprocessed (duplicates flagged, but kept) and preprocessed/deduplicated AnnData outputs.
|
|
54
|
+
|
|
55
|
+
### `smftools variant`
|
|
56
|
+
|
|
57
|
+
The variant command focuses on DNA sequence variation analyses. It:
|
|
58
|
+
|
|
59
|
+
- Requires at least a preprocessed AnnData object.
|
|
60
|
+
- Calculates position level variation frequencies per reference/sample.
|
|
61
|
+
- Generates z-scores for variant occurance given read level Q-scores and assuming uniform Palt transitions.
|
|
62
|
+
- Visualizes read DNA sequence encodings and mismatch encodings.
|
|
63
|
+
|
|
64
|
+
### `smftools chimeric`
|
|
65
|
+
|
|
66
|
+
The chimeric command is meant to find putative PCR chimeras. It:
|
|
67
|
+
|
|
68
|
+
- Requires at least a preprocessed AnnData object.
|
|
69
|
+
- Performs sliding window nearest neighbor hamming distance analysis per read.
|
|
70
|
+
- Visualizes the windowed nearest neighbor hamming distances per read.
|
|
71
|
+
- Assembles maximum spanning intervals of 0-hamming distance neighbors per read within the reference/sample.
|
|
72
|
+
- In progress.
|
|
73
|
+
|
|
74
|
+
### `smftools spatial`
|
|
75
|
+
|
|
76
|
+
The spatial command runs downstream spatial analyses on the preprocessed data. It:
|
|
77
|
+
|
|
78
|
+
- Requires at least a preprocessed AnnData object.
|
|
79
|
+
- Optionally loads sample sheet metadata.
|
|
80
|
+
- Optionally inverts and reindexes the data along the positions axis.
|
|
81
|
+
- Generates clustermaps for preprocessed (and deduplicated) AnnData.
|
|
82
|
+
- Computes spatial autocorrelation, rolling metrics, and grid summaries.
|
|
83
|
+
- Generates positionwise correlation matrices.
|
|
84
|
+
- Writes the spatial AnnData output.
|
|
85
|
+
|
|
86
|
+
### `smftools hmm`
|
|
87
|
+
|
|
88
|
+
The hmm command adds HMM-based feature annotation and summary plots. It:
|
|
89
|
+
|
|
90
|
+
- Requires at least a preprocessed AnnData object.
|
|
91
|
+
- Fits or reuses HMM models for configured feature sets.
|
|
92
|
+
- Annotates AnnData with HMM-derived feature layers (State layers and probability layers)
|
|
93
|
+
- Calls HMM feature peaks and writes peak-calling outputs.
|
|
94
|
+
- Generates clustermaps, bulk feature traces, and fragment size distribution plots for HMM layers.
|
|
95
|
+
- Writes the HMM AnnData output.
|
|
96
|
+
|
|
97
|
+
### `smftools latent`
|
|
98
|
+
|
|
99
|
+
The latent command constructs latent representations of the data. It:
|
|
100
|
+
|
|
101
|
+
- Requires at least a preprocessed AnnData object.
|
|
102
|
+
- Runs various dimensionality reduction and graph construction modalities:
|
|
103
|
+
- Principle component analysis (PCA)
|
|
104
|
+
- K-nearest neighbor (KNN)
|
|
105
|
+
- Uniform manifold approximation and projection (UMAP)
|
|
106
|
+
- Non-negative matrix factorization (NMF)
|
|
107
|
+
- Canonical polyadic decomposition (PARAFAC)
|
|
108
|
+
|
|
109
|
+
### `smftools full`
|
|
110
|
+
|
|
111
|
+
The full command is a workflow wrapper. It runs the following sequentially:
|
|
112
|
+
|
|
113
|
+
- Load / preprocess / variant / chimeric / spatial / hmm / latent.
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
## Batch processing
|
|
117
|
+
|
|
118
|
+
Use the batch command to run a single task across multiple experiments.
|
|
119
|
+
|
|
120
|
+
```shell
|
|
121
|
+
smftools batch preprocess /path/to/config_paths.csv
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
The batch command accepts:
|
|
125
|
+
|
|
126
|
+
- **CSV/TSV** tables with a column of config paths (default column name: `config_path`).
|
|
127
|
+
- **TXT** files with one config path per line.
|
|
128
|
+
|
|
129
|
+
You can override the column name or delimiter if needed:
|
|
130
|
+
|
|
131
|
+
```shell
|
|
132
|
+
smftools batch spatial /path/to/configs.tsv --column my_config --sep $'\t'
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Each path is validated; missing configs are skipped with a message, while valid configs run the
|
|
136
|
+
requested task in sequence.
|
|
@@ -147,6 +147,7 @@ torch = [
|
|
|
147
147
|
all = [
|
|
148
148
|
# cluster
|
|
149
149
|
"fastcluster",
|
|
150
|
+
"igraph",
|
|
150
151
|
"leidenalg",
|
|
151
152
|
|
|
152
153
|
# informatics
|
|
@@ -172,6 +173,7 @@ all = [
|
|
|
172
173
|
"omegaconf",
|
|
173
174
|
"scikit-learn>=1.0.2",
|
|
174
175
|
"shap",
|
|
176
|
+
"tensorly",
|
|
175
177
|
"torch>=1.9.0",
|
|
176
178
|
"wandb",
|
|
177
179
|
|