boltznet 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. boltznet-0.3.0/LICENSE +16 -0
  2. boltznet-0.3.0/PKG-INFO +115 -0
  3. boltznet-0.3.0/README.md +92 -0
  4. boltznet-0.3.0/pyproject.toml +45 -0
  5. boltznet-0.3.0/setup.cfg +4 -0
  6. boltznet-0.3.0/src/boltznet/__init__.py +3 -0
  7. boltznet-0.3.0/src/boltznet/_internal/CONFIG.py +51 -0
  8. boltznet-0.3.0/src/boltznet/_internal/aln.py +1323 -0
  9. boltznet-0.3.0/src/boltznet/_internal/base_data_type.py +107 -0
  10. boltznet-0.3.0/src/boltznet/_internal/browser.py +538 -0
  11. boltznet-0.3.0/src/boltznet/_internal/browser_util.py +344 -0
  12. boltznet-0.3.0/src/boltznet/_internal/coverage.py +96 -0
  13. boltznet-0.3.0/src/boltznet/_internal/factory.py +63 -0
  14. boltznet-0.3.0/src/boltznet/_internal/fileDataType.py +332 -0
  15. boltznet-0.3.0/src/boltznet/_internal/genomes.py +304 -0
  16. boltznet-0.3.0/src/boltznet/_internal/model_create.py +1711 -0
  17. boltznet-0.3.0/src/boltznet/_internal/model_disect.py +1190 -0
  18. boltznet-0.3.0/src/boltznet/_internal/model_predict.py +2254 -0
  19. boltznet-0.3.0/src/boltznet/_internal/model_predict_create.py +263 -0
  20. boltznet-0.3.0/src/boltznet/_internal/model_predict_models.py +132 -0
  21. boltznet-0.3.0/src/boltznet/_internal/sequence.py +467 -0
  22. boltznet-0.3.0/src/boltznet/_internal/tf_util.py +2160 -0
  23. boltznet-0.3.0/src/boltznet/_internal/util.py +732 -0
  24. boltznet-0.3.0/src/boltznet/boltznet_tf.py +328 -0
  25. boltznet-0.3.0/src/boltznet/selftest.py +32 -0
  26. boltznet-0.3.0/src/boltznet/test_module.py +14 -0
  27. boltznet-0.3.0/src/boltznet/testdata/__init__.py +0 -0
  28. boltznet-0.3.0/src/boltznet/testdata/ecoli.gff +6447 -0
  29. boltznet-0.3.0/src/boltznet/testdata/promoters.fa +1185 -0
  30. boltznet-0.3.0/src/boltznet.egg-info/PKG-INFO +115 -0
  31. boltznet-0.3.0/src/boltznet.egg-info/SOURCES.txt +33 -0
  32. boltznet-0.3.0/src/boltznet.egg-info/dependency_links.txt +1 -0
  33. boltznet-0.3.0/src/boltznet.egg-info/entry_points.txt +3 -0
  34. boltznet-0.3.0/src/boltznet.egg-info/requires.txt +14 -0
  35. boltznet-0.3.0/src/boltznet.egg-info/top_level.txt +1 -0
boltznet-0.3.0/LICENSE ADDED
@@ -0,0 +1,16 @@
1
+ Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)
2
+
3
+ Copyright (c) 2025 James Galagan
4
+
5
+ You are free to:
6
+ - Share — copy and redistribute the material in any medium or format
7
+ - Adapt — remix, transform, and build upon the material
8
+ - Use — for research and academic purposes
9
+
10
+ Under the following terms:
11
+ - Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made.
12
+ - NonCommercial — You may not use the material for commercial purposes without prior written permission.
13
+
14
+ Full license text: https://creativecommons.org/licenses/by-nc/4.0/
15
+
16
+ For commercial licensing inquiries, please contact: you@example.com
@@ -0,0 +1,115 @@
1
+ Metadata-Version: 2.4
2
+ Name: boltznet
3
+ Version: 0.3.0
4
+ Summary: BoltzNet
5
+ Requires-Python: >=3.9
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+ Requires-Dist: numpy<2,>=1.26
9
+ Requires-Dist: pandas<3,>=1.5
10
+ Requires-Dist: tensorflow<3,>=2.15
11
+ Requires-Dist: logomaker
12
+ Requires-Dist: biopython
13
+ Requires-Dist: dna-features-viewer
14
+ Requires-Dist: seaborn
15
+ Requires-Dist: scipy
16
+ Requires-Dist: scikit-learn
17
+ Requires-Dist: setproctitle
18
+ Requires-Dist: bokeh
19
+ Requires-Dist: bcbio-gff
20
+ Requires-Dist: keras<3,>2.12.0
21
+ Requires-Dist: platformdirs
22
+ Dynamic: license-file
23
+
24
+ # boltznet
25
+
26
+ BoltzNet is a biophysically designed neural network that learns a quantitative model of TF-DNA binding energy from ChIP-Seq data. BoltzNet mirrors a quantitative biophysical model and provides directly interpretable predictions genome-wide at nucleotide resolution. We have performed ChIP-Seq mapping of genome-wide DNA binding for 139 E. coli TFs. From these data we have generated BoltzNet models for 124 TFs.
27
+
28
+ The Boltznet models are described in our [publication](https://rdcu.be/ek2Sq) and through the companion [website](https://boltznet.bu.edu):
29
+
30
+ https://boltznet.bu.edu
31
+
32
+ This python package provides a high-level object interface for downloading pretrained models, running predictions on DNA sequences, and visualizing results.
33
+
34
+ ## Installation
35
+
36
+ Create a conda environment and activate it. The run:
37
+
38
+ ```
39
+ pip install boltznet
40
+ boltznet-init
41
+ ```
42
+
43
+ This installs the package and downloads available models to the package cache dir. To perform selftests, run
44
+
45
+ ```
46
+ boltznet-selftest
47
+ ```
48
+
49
+ This builds a model on all TFs, performs predictions on a set of ecoli promoter sequences, and then generates and saves a plot for as selftest_pdhR,pdhR-aceE-aceF-lpd.png. Basically runs the example code in Usage below.
50
+
51
+
52
+ ## USAGE
53
+
54
+ ```
55
+ from boltznet import boltznet_tf
56
+
57
+ ####################################
58
+ # create a tfmodel on all TFs that have been loaded into the package cache
59
+ ####################################
60
+ tfmodel=boltznet_tf.create()
61
+
62
+ ####################################################
63
+ # load sequences from fasta file and run predictions
64
+ # Returns a np.array of predicions at each position on both
65
+ # strands of each sequence for all TFs
66
+ #
67
+ # The numpy array has shape:
68
+ # (nseqs,2,seqlen,numtfs)
69
+ # - nseqs: number of sequences
70
+ # - 2: forward and reverse strands
71
+ # - seqlen: length of each sequence
72
+ # - numtf: number of models
73
+ ####################################################
74
+ fa_name='teset.fa'
75
+ y=tfmodel(fastafile=fa_name)
76
+
77
+ ####################################################
78
+ # load annotations for the sequences for plotting
79
+ ####################################################
80
+ gff_name='ecoli.gff'
81
+ tfmodel.loadGff(gff_name)
82
+
83
+ ####################################################
84
+ # Plot the predictions for sequences by sequence index or sequence name patterns
85
+ # Below will plot sequence number 76 as well as any sequences that
86
+ # contain chaC or pdhR in the name. But will not plot the same sequence twice
87
+ # If savefilename is None, generate plots in a window
88
+ 3 If savefilename is given, generate plots named savefilename_<seqid>.png
89
+ ####################################################
90
+ tfmodel.plotPrediction(inds=[76],seqnames=['chaC','pdhR'],model_names=None,seqlogo=False,baseseq=False, maxN=3, savefilename='test')
91
+
92
+ ```
93
+
94
+ ## Test data
95
+
96
+ The package comes bundled with two datafiles that can be used for testing:
97
+ - promoters.fa: a fasta file with a small subset of promoters (see https://boltznet.bu.edu/ecoli/promoters)
98
+ - ecoli.gff: a gff file with annotations of genes and known binding sites
99
+
100
+ You can retrieve and use these data files with code like the following:
101
+
102
+ ```
103
+ from importlib import resources
104
+ import boltznet.testdata as testdata_pkg
105
+
106
+ fa_name=resources.files(testdata_pkg).joinpath('promoters.fa')
107
+
108
+ gff_name=resources.files(testdata_pkg).joinpath('ecoli.gff')
109
+ ```
110
+
111
+ ## Citation
112
+
113
+ The code for BoltzNet is freely available for academic use. BoltzNet can be used by molecular biologists seeking to quantitatively predict TF binding, by synthetic biologists seeking to predictively engineer new regulatory interactions, and by computational biologists seeking to develop biophysically motivated bioinformatic tools.
114
+
115
+ - Lally, Patrick, Gómez-Romero, Laura, Tierrafría, Víctor H., Aquino, Patricia, Rioualen, Claire, Zhang, Xiaoman, Kim, Sunyoung, Baniulyte, Gabriele, Plitnick, Jonathan, Smith, Carol, Babu, Mohan, Collado-Vides, Julio, Wade, Joseph, Galagan, James E. (2025) Predictive Biophysical Neural Network Modeling of a Compendium of in vivo Transcription Factor DNA Binding Profiles for Escherichia coli. Nature Communications
@@ -0,0 +1,92 @@
1
+ # boltznet
2
+
3
+ BoltzNet is a biophysically designed neural network that learns a quantitative model of TF-DNA binding energy from ChIP-Seq data. BoltzNet mirrors a quantitative biophysical model and provides directly interpretable predictions genome-wide at nucleotide resolution. We have performed ChIP-Seq mapping of genome-wide DNA binding for 139 E. coli TFs. From these data we have generated BoltzNet models for 124 TFs.
4
+
5
+ The Boltznet models are described in our [publication](https://rdcu.be/ek2Sq) and through the companion [website](https://boltznet.bu.edu):
6
+
7
+ https://boltznet.bu.edu
8
+
9
+ This python package provides a high-level object interface for downloading pretrained models, running predictions on DNA sequences, and visualizing results.
10
+
11
+ ## Installation
12
+
13
+ Create a conda environment and activate it. The run:
14
+
15
+ ```
16
+ pip install boltznet
17
+ boltznet-init
18
+ ```
19
+
20
+ This installs the package and downloads available models to the package cache dir. To perform selftests, run
21
+
22
+ ```
23
+ boltznet-selftest
24
+ ```
25
+
26
+ This builds a model on all TFs, performs predictions on a set of ecoli promoter sequences, and then generates and saves a plot for as selftest_pdhR,pdhR-aceE-aceF-lpd.png. Basically runs the example code in Usage below.
27
+
28
+
29
+ ## USAGE
30
+
31
+ ```
32
+ from boltznet import boltznet_tf
33
+
34
+ ####################################
35
+ # create a tfmodel on all TFs that have been loaded into the package cache
36
+ ####################################
37
+ tfmodel=boltznet_tf.create()
38
+
39
+ ####################################################
40
+ # load sequences from fasta file and run predictions
41
+ # Returns a np.array of predicions at each position on both
42
+ # strands of each sequence for all TFs
43
+ #
44
+ # The numpy array has shape:
45
+ # (nseqs,2,seqlen,numtfs)
46
+ # - nseqs: number of sequences
47
+ # - 2: forward and reverse strands
48
+ # - seqlen: length of each sequence
49
+ # - numtf: number of models
50
+ ####################################################
51
+ fa_name='teset.fa'
52
+ y=tfmodel(fastafile=fa_name)
53
+
54
+ ####################################################
55
+ # load annotations for the sequences for plotting
56
+ ####################################################
57
+ gff_name='ecoli.gff'
58
+ tfmodel.loadGff(gff_name)
59
+
60
+ ####################################################
61
+ # Plot the predictions for sequences by sequence index or sequence name patterns
62
+ # Below will plot sequence number 76 as well as any sequences that
63
+ # contain chaC or pdhR in the name. But will not plot the same sequence twice
64
+ # If savefilename is None, generate plots in a window
65
+ 3 If savefilename is given, generate plots named savefilename_<seqid>.png
66
+ ####################################################
67
+ tfmodel.plotPrediction(inds=[76],seqnames=['chaC','pdhR'],model_names=None,seqlogo=False,baseseq=False, maxN=3, savefilename='test')
68
+
69
+ ```
70
+
71
+ ## Test data
72
+
73
+ The package comes bundled with two datafiles that can be used for testing:
74
+ - promoters.fa: a fasta file with a small subset of promoters (see https://boltznet.bu.edu/ecoli/promoters)
75
+ - ecoli.gff: a gff file with annotations of genes and known binding sites
76
+
77
+ You can retrieve and use these data files with code like the following:
78
+
79
+ ```
80
+ from importlib import resources
81
+ import boltznet.testdata as testdata_pkg
82
+
83
+ fa_name=resources.files(testdata_pkg).joinpath('promoters.fa')
84
+
85
+ gff_name=resources.files(testdata_pkg).joinpath('ecoli.gff')
86
+ ```
87
+
88
+ ## Citation
89
+
90
+ The code for BoltzNet is freely available for academic use. BoltzNet can be used by molecular biologists seeking to quantitatively predict TF binding, by synthetic biologists seeking to predictively engineer new regulatory interactions, and by computational biologists seeking to develop biophysically motivated bioinformatic tools.
91
+
92
+ - Lally, Patrick, Gómez-Romero, Laura, Tierrafría, Víctor H., Aquino, Patricia, Rioualen, Claire, Zhang, Xiaoman, Kim, Sunyoung, Baniulyte, Gabriele, Plitnick, Jonathan, Smith, Carol, Babu, Mohan, Collado-Vides, Julio, Wade, Joseph, Galagan, James E. (2025) Predictive Biophysical Neural Network Modeling of a Compendium of in vivo Transcription Factor DNA Binding Profiles for Escherichia coli. Nature Communications
@@ -0,0 +1,45 @@
1
+ # -*- coding: utf-8 -*-
2
+ [build-system]
3
+ requires = ["setuptools>=68", "wheel"]
4
+ build-backend = "setuptools.build_meta"
5
+
6
+ [project]
7
+ name = "boltznet"
8
+ version = "0.3.0"
9
+ requires-python = ">=3.9"
10
+ description = "BoltzNet"
11
+ readme = "README.md"
12
+ dependencies = [
13
+ "numpy>=1.26,<2", # lib-style
14
+ "pandas>=1.5,<3",
15
+ "tensorflow>=2.15,<3",
16
+ "logomaker",
17
+ "biopython",
18
+ 'dna-features-viewer',
19
+ 'seaborn',
20
+ 'scipy',
21
+ 'scikit-learn',
22
+ 'setproctitle',
23
+ 'bokeh',
24
+ 'bcbio-gff',
25
+ 'keras>2.12.0,<3',
26
+ 'platformdirs'
27
+
28
+ ]
29
+
30
+ [tool.setuptools]
31
+ package-dir = {"" = "src"}
32
+ include-package-data = true # if you ship any data files
33
+
34
+ [tool.setuptools.packages.find]
35
+ where = ["src"]
36
+ include = ["boltznet", "boltznet.*"]
37
+
38
+ # Tell setuptools to include your test data files in the wheel
39
+ [tool.setuptools.package-data]
40
+ boltznet = ["testdata/*"]
41
+
42
+ [project.scripts]
43
+ boltznet-init = "boltznet.boltznet_tf:__download_all_models__" # downloads index.json
44
+ boltznet-selftest = "boltznet.selftest:main" # runs a selftest
45
+
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Only export the public submodule name
3
+ __all__ = ["boltznet_tf"]
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Configuration file that can be modified for each user
5
+ Primarily to indicate locations of files etc
6
+
7
+ @author: jgalag
8
+ """
9
+
10
+ # SERVER LOCATIONS
11
+
12
+ server_data_root = '/Volumes/eng_research_galagan/seqdata/projects/ecoli'
13
+ '''Mounted server location of seqdata/projects/ecoli'''
14
+ server_projects_root = '/Volumes/eng_research_galagan/seqdata/projects/'
15
+ '''Mounted server location of seqdata/projects/'''
16
+
17
+ server_meme_root = '%s/downstream_files/analysis/meme' % server_data_root
18
+ '''Mounted server location of meme analysis directory'''
19
+
20
+ boltznet_comparison_server='/Volumes/eng_research_galagan/seqdata/projects/boltzNetComparisons/'
21
+
22
+ new_regulon_dir='/Volumes/eng_research_galagan/seqdata/projects/ecoli/downstream_files/analysis/known_motifs/Regulon-v12-confirmed'
23
+ '''Location of regulonDB meme file'''
24
+
25
+
26
+ # LOCAL PLACES
27
+
28
+ model_dir = '/Users/jgalag/Dropbox/Python/CNNModels/models'
29
+ '''Location for models'''
30
+
31
+ model_json_dir = '%s/json'%model_dir
32
+ '''Location for model json files'''
33
+
34
+
35
+ bokeh_temp_dir = '/Users/jgalag/Dropbox/Python/CNNModels/bokeh_temp'
36
+ '''Temp dir for browsers'''
37
+
38
+ browser_dir = '/Users/jgalag/Dropbox/Python/CNNModels/browsers'
39
+ '''Standard location for TF genome browsers and model browsers'''
40
+
41
+ data_dir = '/Users/jgalag/Dropbox/Python/CNNModels/source/datafiles'
42
+ '''Location of datafiles including genome files and geneview files'''
43
+
44
+ # figpath='/Users/jgalag/Dropbox/Projects/Papers/Ecoli Papers/Model Paper/resub'
45
+ figpath='/Users/jgalag/Dropbox/Projects/Papers/Ecoli Papers/Boltznet/NatureFormat/Nat Comm Files/Final Submission/figures'
46
+
47
+ file_dir = '/Users/jgalag/Dropbox/Python/CNNModels/input_files'
48
+ '''Loction of Files for import and export'''
49
+
50
+ source_dir ='/Users/jgalag/Dropbox/Python/CNNModels/source'
51
+ '''Location of source folder'''