LZGraphs 2.3.1__tar.gz → 3.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lzgraphs-2.3.1 → lzgraphs-3.0.1}/LICENSE +1 -1
- lzgraphs-3.0.1/MANIFEST.in +7 -0
- lzgraphs-3.0.1/Makefile +125 -0
- lzgraphs-3.0.1/PKG-INFO +246 -0
- lzgraphs-3.0.1/README.md +202 -0
- lzgraphs-3.0.1/include/lzgraph/analytics.h +105 -0
- lzgraphs-3.0.1/include/lzgraph/common.h +231 -0
- lzgraphs-3.0.1/include/lzgraph/crc32c.h +19 -0
- lzgraphs-3.0.1/include/lzgraph/diversity.h +116 -0
- lzgraphs-3.0.1/include/lzgraph/edge_builder.h +49 -0
- lzgraphs-3.0.1/include/lzgraph/features.h +55 -0
- lzgraphs-3.0.1/include/lzgraph/forward.h +107 -0
- lzgraphs-3.0.1/include/lzgraph/gene_data.h +85 -0
- lzgraphs-3.0.1/include/lzgraph/genomic_simulate.h +5 -0
- lzgraphs-3.0.1/include/lzgraph/graph.h +167 -0
- lzgraphs-3.0.1/include/lzgraph/graph_ops.h +66 -0
- lzgraphs-3.0.1/include/lzgraph/hash_map.h +58 -0
- lzgraphs-3.0.1/include/lzgraph/io.h +107 -0
- lzgraphs-3.0.1/include/lzgraph/lz76.h +99 -0
- lzgraphs-3.0.1/include/lzgraph/occupancy.h +56 -0
- lzgraphs-3.0.1/include/lzgraph/pgen_dist.h +67 -0
- lzgraphs-3.0.1/include/lzgraph/posterior.h +41 -0
- lzgraphs-3.0.1/include/lzgraph/rng.h +41 -0
- lzgraphs-3.0.1/include/lzgraph/sharing.h +48 -0
- lzgraphs-3.0.1/include/lzgraph/simulate.h +138 -0
- lzgraphs-3.0.1/include/lzgraph/string_pool.h +46 -0
- lzgraphs-3.0.1/include/lzgraph/walk_dict.h +232 -0
- lzgraphs-3.0.1/include/lzgraph/wynn.h +26 -0
- lzgraphs-3.0.1/lib/analytics/analytics.c +291 -0
- lzgraphs-3.0.1/lib/analytics/diversity.c +288 -0
- lzgraphs-3.0.1/lib/analytics/features.c +199 -0
- lzgraphs-3.0.1/lib/analytics/pgen_dist.c +259 -0
- lzgraphs-3.0.1/lib/core/common.c +91 -0
- lzgraphs-3.0.1/lib/core/crc32c.c +33 -0
- lzgraphs-3.0.1/lib/core/hash_map.c +173 -0
- lzgraphs-3.0.1/lib/core/rng.c +20 -0
- lzgraphs-3.0.1/lib/core/string_pool.c +117 -0
- lzgraphs-3.0.1/lib/core/wynn.c +72 -0
- lzgraphs-3.0.1/lib/graph/csr_graph.c +1854 -0
- lzgraphs-3.0.1/lib/graph/edge_builder.c +70 -0
- lzgraphs-3.0.1/lib/graph/forward.c +87 -0
- lzgraphs-3.0.1/lib/graph/gene_data.c +81 -0
- lzgraphs-3.0.1/lib/graph/graph_ops.c +466 -0
- lzgraphs-3.0.1/lib/graph/lz76.c +289 -0
- lzgraphs-3.0.1/lib/io/io.c +632 -0
- lzgraphs-3.0.1/lib/occupancy/occupancy.c +213 -0
- lzgraphs-3.0.1/lib/occupancy/sharing.c +199 -0
- lzgraphs-3.0.1/lib/posterior/posterior.c +174 -0
- lzgraphs-3.0.1/lib/simulation/genomic_simulate.c +267 -0
- lzgraphs-3.0.1/lib/simulation/simulate.c +323 -0
- {lzgraphs-2.3.1 → lzgraphs-3.0.1}/pyproject.toml +21 -39
- lzgraphs-3.0.1/setup.cfg +4 -0
- lzgraphs-3.0.1/setup.py +44 -0
- lzgraphs-3.0.1/src/LZGraphs/__init__.py +94 -0
- lzgraphs-3.0.1/src/LZGraphs/_clzgraph.c +1473 -0
- lzgraphs-3.0.1/src/LZGraphs/_errors.py +49 -0
- lzgraphs-3.0.1/src/LZGraphs/_graph.py +584 -0
- lzgraphs-3.0.1/src/LZGraphs/_io.py +598 -0
- lzgraphs-3.0.1/src/LZGraphs/_pgen_dist.py +75 -0
- lzgraphs-3.0.1/src/LZGraphs/_simulation_result.py +44 -0
- lzgraphs-3.0.1/src/LZGraphs/cli.py +804 -0
- lzgraphs-3.0.1/src/LZGraphs.egg-info/PKG-INFO +246 -0
- lzgraphs-3.0.1/src/LZGraphs.egg-info/SOURCES.txt +79 -0
- lzgraphs-3.0.1/src/LZGraphs.egg-info/entry_points.txt +2 -0
- {lzgraphs-2.3.1 → lzgraphs-3.0.1}/src/LZGraphs.egg-info/requires.txt +2 -9
- lzgraphs-3.0.1/tests/test_analytics.py +144 -0
- lzgraphs-3.0.1/tests/test_cli.py +219 -0
- lzgraphs-3.0.1/tests/test_deep_audit.py +258 -0
- lzgraphs-3.0.1/tests/test_graph.py +314 -0
- lzgraphs-3.0.1/tests/test_io.py +60 -0
- lzgraphs-3.0.1/tests/test_known_distributions.py +779 -0
- lzgraphs-3.0.1/tests/test_lzpgen.py +55 -0
- lzgraphs-3.0.1/tests/test_mathematician_audit.py +278 -0
- lzgraphs-3.0.1/tests/test_model_audit.py +301 -0
- lzgraphs-3.0.1/tests/test_operations.py +184 -0
- lzgraphs-3.0.1/tests/test_simulate.py +88 -0
- lzgraphs-3.0.1/tests/test_statistical_validity.py +303 -0
- lzgraphs-2.3.1/MANIFEST.in +0 -5
- lzgraphs-2.3.1/PKG-INFO +0 -401
- lzgraphs-2.3.1/README.md +0 -350
- lzgraphs-2.3.1/requirements.txt +0 -5
- lzgraphs-2.3.1/setup.cfg +0 -7
- lzgraphs-2.3.1/setup.py +0 -40
- lzgraphs-2.3.1/src/LZGraphs/__init__.py +0 -203
- lzgraphs-2.3.1/src/LZGraphs/_fast_walk.c +0 -321
- lzgraphs-2.3.1/src/LZGraphs/bag_of_words/__init__.py +0 -3
- lzgraphs-2.3.1/src/LZGraphs/bag_of_words/bow_encoder.py +0 -185
- lzgraphs-2.3.1/src/LZGraphs/constants.py +0 -6
- lzgraphs-2.3.1/src/LZGraphs/exceptions/__init__.py +0 -550
- lzgraphs-2.3.1/src/LZGraphs/graphs/__init__.py +0 -6
- lzgraphs-2.3.1/src/LZGraphs/graphs/amino_acid_positional.py +0 -699
- lzgraphs-2.3.1/src/LZGraphs/graphs/edge_data.py +0 -222
- lzgraphs-2.3.1/src/LZGraphs/graphs/graph_operations.py +0 -106
- lzgraphs-2.3.1/src/LZGraphs/graphs/lz_graph_base.py +0 -1066
- lzgraphs-2.3.1/src/LZGraphs/graphs/naive.py +0 -343
- lzgraphs-2.3.1/src/LZGraphs/graphs/nucleotide_double_positional.py +0 -384
- lzgraphs-2.3.1/src/LZGraphs/metrics/__init__.py +0 -80
- lzgraphs-2.3.1/src/LZGraphs/metrics/convenience.py +0 -90
- lzgraphs-2.3.1/src/LZGraphs/metrics/diversity.py +0 -367
- lzgraphs-2.3.1/src/LZGraphs/metrics/entropy.py +0 -1007
- lzgraphs-2.3.1/src/LZGraphs/metrics/pgen_distribution.py +0 -351
- lzgraphs-2.3.1/src/LZGraphs/metrics/saturation.py +0 -445
- lzgraphs-2.3.1/src/LZGraphs/mixins/__init__.py +0 -8
- lzgraphs-2.3.1/src/LZGraphs/mixins/bayesian_posterior.py +0 -277
- lzgraphs-2.3.1/src/LZGraphs/mixins/gene_logic.py +0 -122
- lzgraphs-2.3.1/src/LZGraphs/mixins/gene_prediction.py +0 -215
- lzgraphs-2.3.1/src/LZGraphs/mixins/graph_topology.py +0 -59
- lzgraphs-2.3.1/src/LZGraphs/mixins/lzpgen_distribution.py +0 -457
- lzgraphs-2.3.1/src/LZGraphs/mixins/random_walk.py +0 -151
- lzgraphs-2.3.1/src/LZGraphs/mixins/serialization.py +0 -628
- lzgraphs-2.3.1/src/LZGraphs/mixins/walk_analysis.py +0 -177
- lzgraphs-2.3.1/src/LZGraphs/py.typed +0 -0
- lzgraphs-2.3.1/src/LZGraphs/utilities/__init__.py +0 -13
- lzgraphs-2.3.1/src/LZGraphs/utilities/decomposition.py +0 -71
- lzgraphs-2.3.1/src/LZGraphs/utilities/helpers.py +0 -50
- lzgraphs-2.3.1/src/LZGraphs/utilities/misc.py +0 -133
- lzgraphs-2.3.1/src/LZGraphs/visualization/__init__.py +0 -18
- lzgraphs-2.3.1/src/LZGraphs/visualization/visualize.py +0 -234
- lzgraphs-2.3.1/src/LZGraphs.egg-info/PKG-INFO +0 -401
- lzgraphs-2.3.1/src/LZGraphs.egg-info/SOURCES.txt +0 -66
- lzgraphs-2.3.1/tests/test_aap_lzgraph.py +0 -306
- lzgraphs-2.3.1/tests/test_abundance.py +0 -796
- lzgraphs-2.3.1/tests/test_analytical_distribution.py +0 -481
- lzgraphs-2.3.1/tests/test_base_class_methods.py +0 -320
- lzgraphs-2.3.1/tests/test_bow_encoder.py +0 -307
- lzgraphs-2.3.1/tests/test_diversity_theory.py +0 -686
- lzgraphs-2.3.1/tests/test_flexible_input.py +0 -213
- lzgraphs-2.3.1/tests/test_graph_operations.py +0 -264
- lzgraphs-2.3.1/tests/test_lzpgen_distribution.py +0 -267
- lzgraphs-2.3.1/tests/test_metrics.py +0 -507
- lzgraphs-2.3.1/tests/test_naive_lzgraph.py +0 -141
- lzgraphs-2.3.1/tests/test_ndp_lzgraph.py +0 -156
- lzgraphs-2.3.1/tests/test_new_features.py +0 -396
- lzgraphs-2.3.1/tests/test_pgen_fixes.py +0 -225
- lzgraphs-2.3.1/tests/test_serialization.py +0 -411
- lzgraphs-2.3.1/tests/test_simulate.py +0 -225
- lzgraphs-2.3.1/tests/test_utilities.py +0 -296
- {lzgraphs-2.3.1 → lzgraphs-3.0.1}/CHANGELOG.md +0 -0
- {lzgraphs-2.3.1 → lzgraphs-3.0.1}/CONTRIBUTING.md +0 -0
- {lzgraphs-2.3.1 → lzgraphs-3.0.1}/src/LZGraphs.egg-info/dependency_links.txt +0 -0
- {lzgraphs-2.3.1 → lzgraphs-3.0.1}/src/LZGraphs.egg-info/top_level.txt +0 -0
lzgraphs-3.0.1/Makefile
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
CC = gcc
|
|
2
|
+
CFLAGS = -O2 -Wall -Wextra -std=c11 -Iinclude
|
|
3
|
+
LDFLAGS = -lm
|
|
4
|
+
|
|
5
|
+
# Collect all .c files under lib/
|
|
6
|
+
SRCS = $(shell find lib -name '*.c')
|
|
7
|
+
OBJS = $(patsubst lib/%.c, build/%.o, $(SRCS))
|
|
8
|
+
|
|
9
|
+
.PHONY: all clean test
|
|
10
|
+
|
|
11
|
+
all: build/liblzgraph.a
|
|
12
|
+
|
|
13
|
+
# Static library
|
|
14
|
+
build/liblzgraph.a: $(OBJS)
|
|
15
|
+
ar rcs $@ $^
|
|
16
|
+
|
|
17
|
+
# Compile each .c → .o, mirroring the lib/ directory structure in build/
|
|
18
|
+
build/%.o: lib/%.c | build_dirs
|
|
19
|
+
$(CC) $(CFLAGS) -c $< -o $@
|
|
20
|
+
|
|
21
|
+
# Create build subdirectories matching lib/
|
|
22
|
+
build_dirs:
|
|
23
|
+
@mkdir -p $(sort $(dir $(OBJS)))
|
|
24
|
+
|
|
25
|
+
# Test runners
|
|
26
|
+
test: test_core test_graph test_forward test_simulate test_analytics test_occupancy test_io_posterior test_pgen_dist test_variants test_gene_data test_sharing test_graph_ops test_diversity test_features test_genomic_simulate test_walk_dict
|
|
27
|
+
|
|
28
|
+
test_core: build/test_core
|
|
29
|
+
./build/test_core
|
|
30
|
+
|
|
31
|
+
test_graph: build/test_graph
|
|
32
|
+
./build/test_graph
|
|
33
|
+
|
|
34
|
+
build/test_core: tests/c_unit/test_core.c build/liblzgraph.a | build_dirs
|
|
35
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
36
|
+
|
|
37
|
+
build/test_graph: tests/c_unit/test_graph.c build/liblzgraph.a | build_dirs
|
|
38
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
39
|
+
|
|
40
|
+
test_forward: build/test_forward
|
|
41
|
+
./build/test_forward
|
|
42
|
+
|
|
43
|
+
build/test_forward: tests/c_unit/test_forward.c build/liblzgraph.a | build_dirs
|
|
44
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
45
|
+
|
|
46
|
+
test_simulate: build/test_simulate
|
|
47
|
+
./build/test_simulate
|
|
48
|
+
|
|
49
|
+
build/test_simulate: tests/c_unit/test_simulate.c build/liblzgraph.a | build_dirs
|
|
50
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
51
|
+
|
|
52
|
+
test_analytics: build/test_analytics
|
|
53
|
+
./build/test_analytics
|
|
54
|
+
|
|
55
|
+
build/test_analytics: tests/c_unit/test_analytics.c build/liblzgraph.a | build_dirs
|
|
56
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
57
|
+
|
|
58
|
+
test_occupancy: build/test_occupancy
|
|
59
|
+
./build/test_occupancy
|
|
60
|
+
|
|
61
|
+
build/test_occupancy: tests/c_unit/test_occupancy.c build/liblzgraph.a | build_dirs
|
|
62
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
63
|
+
|
|
64
|
+
test_io_posterior: build/test_io_posterior
|
|
65
|
+
./build/test_io_posterior
|
|
66
|
+
|
|
67
|
+
build/test_io_posterior: tests/c_unit/test_io_posterior.c build/liblzgraph.a | build_dirs
|
|
68
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
69
|
+
|
|
70
|
+
test_pgen_dist: build/test_pgen_dist
|
|
71
|
+
./build/test_pgen_dist
|
|
72
|
+
|
|
73
|
+
build/test_pgen_dist: tests/c_unit/test_pgen_dist.c build/liblzgraph.a | build_dirs
|
|
74
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
75
|
+
|
|
76
|
+
test_variants: build/test_variants
|
|
77
|
+
./build/test_variants
|
|
78
|
+
|
|
79
|
+
build/test_variants: tests/c_unit/test_variants.c build/liblzgraph.a | build_dirs
|
|
80
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
81
|
+
|
|
82
|
+
test_gene_data: build/test_gene_data
|
|
83
|
+
./build/test_gene_data
|
|
84
|
+
|
|
85
|
+
build/test_gene_data: tests/c_unit/test_gene_data.c build/liblzgraph.a | build_dirs
|
|
86
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
87
|
+
|
|
88
|
+
test_sharing: build/test_sharing
|
|
89
|
+
./build/test_sharing
|
|
90
|
+
|
|
91
|
+
build/test_sharing: tests/c_unit/test_sharing.c build/liblzgraph.a | build_dirs
|
|
92
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
93
|
+
|
|
94
|
+
test_graph_ops: build/test_graph_ops
|
|
95
|
+
./build/test_graph_ops
|
|
96
|
+
|
|
97
|
+
build/test_graph_ops: tests/c_unit/test_graph_ops.c build/liblzgraph.a | build_dirs
|
|
98
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
99
|
+
|
|
100
|
+
test_diversity: build/test_diversity
|
|
101
|
+
./build/test_diversity
|
|
102
|
+
|
|
103
|
+
build/test_diversity: tests/c_unit/test_diversity.c build/liblzgraph.a | build_dirs
|
|
104
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
105
|
+
|
|
106
|
+
test_features: build/test_features
|
|
107
|
+
./build/test_features
|
|
108
|
+
|
|
109
|
+
build/test_features: tests/c_unit/test_features.c build/liblzgraph.a | build_dirs
|
|
110
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
111
|
+
|
|
112
|
+
test_genomic_simulate: build/test_genomic_simulate
|
|
113
|
+
./build/test_genomic_simulate
|
|
114
|
+
|
|
115
|
+
build/test_genomic_simulate: tests/c_unit/test_genomic_simulate.c build/liblzgraph.a | build_dirs
|
|
116
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
117
|
+
|
|
118
|
+
test_walk_dict: build/test_walk_dict
|
|
119
|
+
./build/test_walk_dict
|
|
120
|
+
|
|
121
|
+
build/test_walk_dict: tests/c_unit/test_walk_dict.c build/liblzgraph.a | build_dirs
|
|
122
|
+
$(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
|
|
123
|
+
|
|
124
|
+
clean:
|
|
125
|
+
rm -rf build
|
lzgraphs-3.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: LZGraphs
|
|
3
|
+
Version: 3.0.1
|
|
4
|
+
Summary: High-performance LZ76 compression graphs for immune receptor repertoire analysis
|
|
5
|
+
Author-email: Thomas Konstantinovsky <thomaskon90@gmail.com>
|
|
6
|
+
Maintainer-email: Thomas Konstantinovsky <thomaskon90@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/MuteJester/LZGraphs
|
|
9
|
+
Project-URL: Documentation, https://mutejester.github.io/LZGraphs/
|
|
10
|
+
Project-URL: Repository, https://github.com/MuteJester/LZGraphs
|
|
11
|
+
Project-URL: Issues, https://github.com/MuteJester/LZGraphs/issues
|
|
12
|
+
Project-URL: Changelog, https://github.com/MuteJester/LZGraphs/blob/master/CHANGELOG.md
|
|
13
|
+
Keywords: Graph Theory,Immunology,Analytics,Biology,T-cell,Repertoire,CDR3,Bioinformatics,LZ76,Lempel-Ziv
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
19
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
20
|
+
Classifier: Programming Language :: Python :: 3
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
25
|
+
Classifier: Programming Language :: C
|
|
26
|
+
Classifier: Typing :: Typed
|
|
27
|
+
Requires-Python: >=3.9
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Requires-Dist: numpy>=1.20
|
|
31
|
+
Provides-Extra: viz
|
|
32
|
+
Requires-Dist: matplotlib>=3.7; extra == "viz"
|
|
33
|
+
Requires-Dist: seaborn>=0.12; extra == "viz"
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
37
|
+
Requires-Dist: scipy>=1.9; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
39
|
+
Requires-Dist: build>=1.0; extra == "dev"
|
|
40
|
+
Provides-Extra: docs
|
|
41
|
+
Requires-Dist: mkdocs-material>=9.5; extra == "docs"
|
|
42
|
+
Requires-Dist: mkdocstrings[python]>=0.24; extra == "docs"
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
<p align="center">
|
|
46
|
+
<a href="https://github.com/MuteJester/LZGraphs">
|
|
47
|
+
<img src="docs/images/lzglogo2.png" alt="LZGraphs" width="400">
|
|
48
|
+
</a>
|
|
49
|
+
</p>
|
|
50
|
+
|
|
51
|
+
<p align="center">
|
|
52
|
+
<strong>High-performance LZ76 compression graphs for immune receptor repertoire analysis</strong>
|
|
53
|
+
</p>
|
|
54
|
+
|
|
55
|
+
<p align="center">
|
|
56
|
+
<a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/v/LZGraphs.svg" alt="PyPI"></a>
|
|
57
|
+
<a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/pyversions/LZGraphs.svg" alt="Python"></a>
|
|
58
|
+
<a href="https://github.com/MuteJester/LZGraphs/blob/master/LICENSE"><img src="https://img.shields.io/github/license/MuteJester/LZGraphs.svg" alt="License"></a>
|
|
59
|
+
<a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/dm/LZGraphs.svg" alt="Downloads"></a>
|
|
60
|
+
<a href="https://github.com/MuteJester/LZGraphs/stargazers"><img src="https://img.shields.io/github/stars/MuteJester/LZGraphs.svg" alt="Stars"></a>
|
|
61
|
+
</p>
|
|
62
|
+
|
|
63
|
+
<p align="center">
|
|
64
|
+
<a href="https://MuteJester.github.io/LZGraphs/"><strong>Documentation</strong></a> ·
|
|
65
|
+
<a href="https://MuteJester.github.io/LZGraphs/getting-started/quickstart/"><strong>Quick Start</strong></a> ·
|
|
66
|
+
<a href="https://MuteJester.github.io/LZGraphs/api/lzgraph/"><strong>API Reference</strong></a> ·
|
|
67
|
+
<a href="https://github.com/MuteJester/LZGraphs/issues">Report Bug</a>
|
|
68
|
+
</p>
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
**LZGraphs** is a Python library that transforms T-cell and B-cell receptor CDR3 sequences into probabilistic directed graphs using the Lempel-Ziv 76 compression algorithm. Built on a C core with Python bindings, it provides:
|
|
73
|
+
|
|
74
|
+
- **Exact generation probabilities** for any CDR3 sequence
|
|
75
|
+
- **LZ76-constrained sequence simulation** that guarantees valid outputs
|
|
76
|
+
- **Analytical diversity metrics** (Hill numbers, richness predictions, sharing spectra)
|
|
77
|
+
- **Graph algebra** (union, intersection, difference) for repertoire comparison
|
|
78
|
+
- **ML feature extraction** with fixed-size vectors for classification pipelines
|
|
79
|
+
- **Bayesian posterior personalization** to adapt population models to individuals
|
|
80
|
+
- **A CLI tool** (`lzg`) for terminal-based analysis
|
|
81
|
+
|
|
82
|
+
## Quick Start
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install LZGraphs
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from LZGraphs import LZGraph
|
|
90
|
+
|
|
91
|
+
# Build a graph from CDR3 amino acid sequences
|
|
92
|
+
graph = LZGraph(
|
|
93
|
+
['CASSLEPSGGTDTQYF', 'CASSDTSGGTDTQYF', 'CASSLEPQTFTDTFFF',
|
|
94
|
+
'CASSLGQGSTEAFF', 'CASSLGIRRT'],
|
|
95
|
+
variant='aap',
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Score a sequence
|
|
99
|
+
log_p = graph.lzpgen('CASSLEPSGGTDTQYF')
|
|
100
|
+
print(f"log P(gen) = {log_p:.2f}")
|
|
101
|
+
|
|
102
|
+
# Simulate new sequences
|
|
103
|
+
result = graph.simulate(1000, seed=42)
|
|
104
|
+
print(f"Generated {len(result)} sequences")
|
|
105
|
+
|
|
106
|
+
# Diversity
|
|
107
|
+
print(f"D(1) = {graph.effective_diversity():.1f}")
|
|
108
|
+
print(f"D(2) = {graph.hill_number(2):.1f}")
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### With gene annotation
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
graph = LZGraph(
|
|
115
|
+
sequences,
|
|
116
|
+
variant='aap',
|
|
117
|
+
v_genes=['TRBV16-1*01', 'TRBV1-1*01', ...],
|
|
118
|
+
j_genes=['TRBJ1-2*01', 'TRBJ1-5*01', ...],
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Gene-constrained simulation
|
|
122
|
+
result = graph.simulate(100, sample_genes=True, seed=42)
|
|
123
|
+
print(result.v_genes[0], result.j_genes[0])
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Command line
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
lzg build repertoire.tsv -o rep.lzg
|
|
130
|
+
lzg score rep.lzg sequences.txt
|
|
131
|
+
lzg diversity rep.lzg
|
|
132
|
+
lzg simulate rep.lzg -n 10000 --seed 42
|
|
133
|
+
lzg compare healthy.lzg disease.lzg
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Graph Variants
|
|
137
|
+
|
|
138
|
+
One unified `LZGraph` class with three encoding schemes:
|
|
139
|
+
|
|
140
|
+
| Variant | Input | Node format | Best for |
|
|
141
|
+
|---------|-------|-------------|----------|
|
|
142
|
+
| `'aap'` | Amino acid CDR3 | `C_2`, `SL_6` | Most TCR/BCR analysis |
|
|
143
|
+
| `'ndp'` | Nucleotide CDR3 | `TG0_4` | Nucleotide-level analysis |
|
|
144
|
+
| `'naive'` | Any strings | `C`, `SL` | Motif discovery, ML features |
|
|
145
|
+
|
|
146
|
+
## Key Capabilities
|
|
147
|
+
|
|
148
|
+
### Scoring & Simulation
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
# Log-probability of a sequence
|
|
152
|
+
graph.lzpgen('CASSLEPSGGTDTQYF') # single
|
|
153
|
+
graph.lzpgen(['seq1', 'seq2', 'seq3']) # batch → np.ndarray
|
|
154
|
+
|
|
155
|
+
# Simulate with optional gene constraints
|
|
156
|
+
result = graph.simulate(1000, seed=42)
|
|
157
|
+
result = graph.simulate(100, v_gene='TRBV5-1*01', j_gene='TRBJ2-7*01')
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Diversity & Analytics
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
graph.effective_diversity() # exp(Shannon entropy)
|
|
164
|
+
graph.hill_number(2) # inverse Simpson
|
|
165
|
+
graph.hill_numbers([0, 1, 2, 5]) # multiple orders → np.ndarray
|
|
166
|
+
graph.predicted_richness(100_000) # expected unique seqs at depth
|
|
167
|
+
graph.predicted_overlap(10000, 50000)# expected shared sequences
|
|
168
|
+
graph.pgen_distribution() # analytical Gaussian mixture
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Graph Algebra
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
combined = graph_a | graph_b # union
|
|
175
|
+
shared = graph_a & graph_b # intersection
|
|
176
|
+
unique_a = graph_a - graph_b # difference
|
|
177
|
+
personal = population.posterior(patient_seqs, kappa=10.0) # Bayesian update
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Repertoire Comparison
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
from LZGraphs import jensen_shannon_divergence
|
|
184
|
+
jsd = jensen_shannon_divergence(graph_a, graph_b) # 0 = identical, 1 = max different
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### ML Feature Extraction
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
# Project any repertoire into a fixed reference space
|
|
191
|
+
features = reference.feature_aligned(LZGraph(sample_seqs, variant='aap'))
|
|
192
|
+
stats = graph.feature_stats() # 15-element summary vector
|
|
193
|
+
profile = graph.feature_mass_profile() # position-based mass distribution
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Serialization
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
graph.save('repertoire.lzg') # fast binary format
|
|
200
|
+
loaded = LZGraph.load('repertoire.lzg')
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Documentation
|
|
204
|
+
|
|
205
|
+
Full documentation with tutorials, concept guides, and API reference:
|
|
206
|
+
|
|
207
|
+
**[https://MuteJester.github.io/LZGraphs/](https://MuteJester.github.io/LZGraphs/)**
|
|
208
|
+
|
|
209
|
+
- [Quick Start](https://MuteJester.github.io/LZGraphs/getting-started/quickstart/) — build your first graph in 5 minutes
|
|
210
|
+
- [Tutorials](https://MuteJester.github.io/LZGraphs/tutorials/) — graph construction, sequence analysis, diversity metrics
|
|
211
|
+
- [API Reference](https://MuteJester.github.io/LZGraphs/api/lzgraph/) — complete class and function reference
|
|
212
|
+
- [CLI Reference](https://MuteJester.github.io/LZGraphs/api/cli/) — terminal tool documentation
|
|
213
|
+
|
|
214
|
+
## Citation
|
|
215
|
+
|
|
216
|
+
If you use LZGraphs in your research, please cite:
|
|
217
|
+
|
|
218
|
+
```bibtex
|
|
219
|
+
@article{konstantinovsky2023lzgraphs,
|
|
220
|
+
title={A Novel Approach to T-Cell Receptor Beta Chain (TCRB) Repertoire Encoding
|
|
221
|
+
Using Lossless String Compression},
|
|
222
|
+
author={Konstantinovsky, Thomas and Nagar, Maor and Louzoun, Yoram},
|
|
223
|
+
journal={Bioinformatics},
|
|
224
|
+
year={2023},
|
|
225
|
+
publisher={Oxford University Press}
|
|
226
|
+
}
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## Contributing
|
|
230
|
+
|
|
231
|
+
Contributions are welcome. Please open an issue or submit a pull request.
|
|
232
|
+
|
|
233
|
+
1. Fork the repository
|
|
234
|
+
2. Create a feature branch (`git checkout -b feature/my-feature`)
|
|
235
|
+
3. Commit your changes
|
|
236
|
+
4. Push and open a Pull Request
|
|
237
|
+
|
|
238
|
+
## License
|
|
239
|
+
|
|
240
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
241
|
+
|
|
242
|
+
## Contact
|
|
243
|
+
|
|
244
|
+
Thomas Konstantinovsky — [thomaskon90@gmail.com](mailto:thomaskon90@gmail.com)
|
|
245
|
+
|
|
246
|
+
[GitHub](https://github.com/MuteJester/LZGraphs) · [PyPI](https://pypi.org/project/LZGraphs/) · [Documentation](https://MuteJester.github.io/LZGraphs/)
|
lzgraphs-3.0.1/README.md
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<a href="https://github.com/MuteJester/LZGraphs">
|
|
3
|
+
<img src="docs/images/lzglogo2.png" alt="LZGraphs" width="400">
|
|
4
|
+
</a>
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<strong>High-performance LZ76 compression graphs for immune receptor repertoire analysis</strong>
|
|
9
|
+
</p>
|
|
10
|
+
|
|
11
|
+
<p align="center">
|
|
12
|
+
<a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/v/LZGraphs.svg" alt="PyPI"></a>
|
|
13
|
+
<a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/pyversions/LZGraphs.svg" alt="Python"></a>
|
|
14
|
+
<a href="https://github.com/MuteJester/LZGraphs/blob/master/LICENSE"><img src="https://img.shields.io/github/license/MuteJester/LZGraphs.svg" alt="License"></a>
|
|
15
|
+
<a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/dm/LZGraphs.svg" alt="Downloads"></a>
|
|
16
|
+
<a href="https://github.com/MuteJester/LZGraphs/stargazers"><img src="https://img.shields.io/github/stars/MuteJester/LZGraphs.svg" alt="Stars"></a>
|
|
17
|
+
</p>
|
|
18
|
+
|
|
19
|
+
<p align="center">
|
|
20
|
+
<a href="https://MuteJester.github.io/LZGraphs/"><strong>Documentation</strong></a> ·
|
|
21
|
+
<a href="https://MuteJester.github.io/LZGraphs/getting-started/quickstart/"><strong>Quick Start</strong></a> ·
|
|
22
|
+
<a href="https://MuteJester.github.io/LZGraphs/api/lzgraph/"><strong>API Reference</strong></a> ·
|
|
23
|
+
<a href="https://github.com/MuteJester/LZGraphs/issues">Report Bug</a>
|
|
24
|
+
</p>
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
**LZGraphs** is a Python library that transforms T-cell and B-cell receptor CDR3 sequences into probabilistic directed graphs using the Lempel-Ziv 76 compression algorithm. Built on a C core with Python bindings, it provides:
|
|
29
|
+
|
|
30
|
+
- **Exact generation probabilities** for any CDR3 sequence
|
|
31
|
+
- **LZ76-constrained sequence simulation** that guarantees valid outputs
|
|
32
|
+
- **Analytical diversity metrics** (Hill numbers, richness predictions, sharing spectra)
|
|
33
|
+
- **Graph algebra** (union, intersection, difference) for repertoire comparison
|
|
34
|
+
- **ML feature extraction** with fixed-size vectors for classification pipelines
|
|
35
|
+
- **Bayesian posterior personalization** to adapt population models to individuals
|
|
36
|
+
- **A CLI tool** (`lzg`) for terminal-based analysis
|
|
37
|
+
|
|
38
|
+
## Quick Start
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install LZGraphs
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from LZGraphs import LZGraph
|
|
46
|
+
|
|
47
|
+
# Build a graph from CDR3 amino acid sequences
|
|
48
|
+
graph = LZGraph(
|
|
49
|
+
['CASSLEPSGGTDTQYF', 'CASSDTSGGTDTQYF', 'CASSLEPQTFTDTFFF',
|
|
50
|
+
'CASSLGQGSTEAFF', 'CASSLGIRRT'],
|
|
51
|
+
variant='aap',
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Score a sequence
|
|
55
|
+
log_p = graph.lzpgen('CASSLEPSGGTDTQYF')
|
|
56
|
+
print(f"log P(gen) = {log_p:.2f}")
|
|
57
|
+
|
|
58
|
+
# Simulate new sequences
|
|
59
|
+
result = graph.simulate(1000, seed=42)
|
|
60
|
+
print(f"Generated {len(result)} sequences")
|
|
61
|
+
|
|
62
|
+
# Diversity
|
|
63
|
+
print(f"D(1) = {graph.effective_diversity():.1f}")
|
|
64
|
+
print(f"D(2) = {graph.hill_number(2):.1f}")
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### With gene annotation
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
graph = LZGraph(
|
|
71
|
+
sequences,
|
|
72
|
+
variant='aap',
|
|
73
|
+
v_genes=['TRBV16-1*01', 'TRBV1-1*01', ...],
|
|
74
|
+
j_genes=['TRBJ1-2*01', 'TRBJ1-5*01', ...],
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Gene-constrained simulation
|
|
78
|
+
result = graph.simulate(100, sample_genes=True, seed=42)
|
|
79
|
+
print(result.v_genes[0], result.j_genes[0])
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Command line
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
lzg build repertoire.tsv -o rep.lzg
|
|
86
|
+
lzg score rep.lzg sequences.txt
|
|
87
|
+
lzg diversity rep.lzg
|
|
88
|
+
lzg simulate rep.lzg -n 10000 --seed 42
|
|
89
|
+
lzg compare healthy.lzg disease.lzg
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Graph Variants
|
|
93
|
+
|
|
94
|
+
One unified `LZGraph` class with three encoding schemes:
|
|
95
|
+
|
|
96
|
+
| Variant | Input | Node format | Best for |
|
|
97
|
+
|---------|-------|-------------|----------|
|
|
98
|
+
| `'aap'` | Amino acid CDR3 | `C_2`, `SL_6` | Most TCR/BCR analysis |
|
|
99
|
+
| `'ndp'` | Nucleotide CDR3 | `TG0_4` | Nucleotide-level analysis |
|
|
100
|
+
| `'naive'` | Any strings | `C`, `SL` | Motif discovery, ML features |
|
|
101
|
+
|
|
102
|
+
## Key Capabilities
|
|
103
|
+
|
|
104
|
+
### Scoring & Simulation
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
# Log-probability of a sequence
|
|
108
|
+
graph.lzpgen('CASSLEPSGGTDTQYF') # single
|
|
109
|
+
graph.lzpgen(['seq1', 'seq2', 'seq3']) # batch → np.ndarray
|
|
110
|
+
|
|
111
|
+
# Simulate with optional gene constraints
|
|
112
|
+
result = graph.simulate(1000, seed=42)
|
|
113
|
+
result = graph.simulate(100, v_gene='TRBV5-1*01', j_gene='TRBJ2-7*01')
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Diversity & Analytics
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
graph.effective_diversity() # exp(Shannon entropy)
|
|
120
|
+
graph.hill_number(2) # inverse Simpson
|
|
121
|
+
graph.hill_numbers([0, 1, 2, 5]) # multiple orders → np.ndarray
|
|
122
|
+
graph.predicted_richness(100_000) # expected unique seqs at depth
|
|
123
|
+
graph.predicted_overlap(10000, 50000)# expected shared sequences
|
|
124
|
+
graph.pgen_distribution() # analytical Gaussian mixture
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Graph Algebra
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
combined = graph_a | graph_b # union
|
|
131
|
+
shared = graph_a & graph_b # intersection
|
|
132
|
+
unique_a = graph_a - graph_b # difference
|
|
133
|
+
personal = population.posterior(patient_seqs, kappa=10.0) # Bayesian update
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Repertoire Comparison
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
from LZGraphs import jensen_shannon_divergence
|
|
140
|
+
jsd = jensen_shannon_divergence(graph_a, graph_b) # 0 = identical, 1 = max different
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### ML Feature Extraction
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
# Project any repertoire into a fixed reference space
|
|
147
|
+
features = reference.feature_aligned(LZGraph(sample_seqs, variant='aap'))
|
|
148
|
+
stats = graph.feature_stats() # 15-element summary vector
|
|
149
|
+
profile = graph.feature_mass_profile() # position-based mass distribution
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Serialization
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
graph.save('repertoire.lzg') # fast binary format
|
|
156
|
+
loaded = LZGraph.load('repertoire.lzg')
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Documentation
|
|
160
|
+
|
|
161
|
+
Full documentation with tutorials, concept guides, and API reference:
|
|
162
|
+
|
|
163
|
+
**[https://MuteJester.github.io/LZGraphs/](https://MuteJester.github.io/LZGraphs/)**
|
|
164
|
+
|
|
165
|
+
- [Quick Start](https://MuteJester.github.io/LZGraphs/getting-started/quickstart/) — build your first graph in 5 minutes
|
|
166
|
+
- [Tutorials](https://MuteJester.github.io/LZGraphs/tutorials/) — graph construction, sequence analysis, diversity metrics
|
|
167
|
+
- [API Reference](https://MuteJester.github.io/LZGraphs/api/lzgraph/) — complete class and function reference
|
|
168
|
+
- [CLI Reference](https://MuteJester.github.io/LZGraphs/api/cli/) — terminal tool documentation
|
|
169
|
+
|
|
170
|
+
## Citation
|
|
171
|
+
|
|
172
|
+
If you use LZGraphs in your research, please cite:
|
|
173
|
+
|
|
174
|
+
```bibtex
|
|
175
|
+
@article{konstantinovsky2023lzgraphs,
|
|
176
|
+
title={A Novel Approach to T-Cell Receptor Beta Chain (TCRB) Repertoire Encoding
|
|
177
|
+
Using Lossless String Compression},
|
|
178
|
+
author={Konstantinovsky, Thomas and Nagar, Maor and Louzoun, Yoram},
|
|
179
|
+
journal={Bioinformatics},
|
|
180
|
+
year={2023},
|
|
181
|
+
publisher={Oxford University Press}
|
|
182
|
+
}
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Contributing
|
|
186
|
+
|
|
187
|
+
Contributions are welcome. Please open an issue or submit a pull request.
|
|
188
|
+
|
|
189
|
+
1. Fork the repository
|
|
190
|
+
2. Create a feature branch (`git checkout -b feature/my-feature`)
|
|
191
|
+
3. Commit your changes
|
|
192
|
+
4. Push and open a Pull Request
|
|
193
|
+
|
|
194
|
+
## License
|
|
195
|
+
|
|
196
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
197
|
+
|
|
198
|
+
## Contact
|
|
199
|
+
|
|
200
|
+
Thomas Konstantinovsky — [thomaskon90@gmail.com](mailto:thomaskon90@gmail.com)
|
|
201
|
+
|
|
202
|
+
[GitHub](https://github.com/MuteJester/LZGraphs) · [PyPI](https://pypi.org/project/LZGraphs/) · [Documentation](https://MuteJester.github.io/LZGraphs/)
|