LZGraphs 2.3.1__tar.gz → 3.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. {lzgraphs-2.3.1 → lzgraphs-3.0.1}/LICENSE +1 -1
  2. lzgraphs-3.0.1/MANIFEST.in +7 -0
  3. lzgraphs-3.0.1/Makefile +125 -0
  4. lzgraphs-3.0.1/PKG-INFO +246 -0
  5. lzgraphs-3.0.1/README.md +202 -0
  6. lzgraphs-3.0.1/include/lzgraph/analytics.h +105 -0
  7. lzgraphs-3.0.1/include/lzgraph/common.h +231 -0
  8. lzgraphs-3.0.1/include/lzgraph/crc32c.h +19 -0
  9. lzgraphs-3.0.1/include/lzgraph/diversity.h +116 -0
  10. lzgraphs-3.0.1/include/lzgraph/edge_builder.h +49 -0
  11. lzgraphs-3.0.1/include/lzgraph/features.h +55 -0
  12. lzgraphs-3.0.1/include/lzgraph/forward.h +107 -0
  13. lzgraphs-3.0.1/include/lzgraph/gene_data.h +85 -0
  14. lzgraphs-3.0.1/include/lzgraph/genomic_simulate.h +5 -0
  15. lzgraphs-3.0.1/include/lzgraph/graph.h +167 -0
  16. lzgraphs-3.0.1/include/lzgraph/graph_ops.h +66 -0
  17. lzgraphs-3.0.1/include/lzgraph/hash_map.h +58 -0
  18. lzgraphs-3.0.1/include/lzgraph/io.h +107 -0
  19. lzgraphs-3.0.1/include/lzgraph/lz76.h +99 -0
  20. lzgraphs-3.0.1/include/lzgraph/occupancy.h +56 -0
  21. lzgraphs-3.0.1/include/lzgraph/pgen_dist.h +67 -0
  22. lzgraphs-3.0.1/include/lzgraph/posterior.h +41 -0
  23. lzgraphs-3.0.1/include/lzgraph/rng.h +41 -0
  24. lzgraphs-3.0.1/include/lzgraph/sharing.h +48 -0
  25. lzgraphs-3.0.1/include/lzgraph/simulate.h +138 -0
  26. lzgraphs-3.0.1/include/lzgraph/string_pool.h +46 -0
  27. lzgraphs-3.0.1/include/lzgraph/walk_dict.h +232 -0
  28. lzgraphs-3.0.1/include/lzgraph/wynn.h +26 -0
  29. lzgraphs-3.0.1/lib/analytics/analytics.c +291 -0
  30. lzgraphs-3.0.1/lib/analytics/diversity.c +288 -0
  31. lzgraphs-3.0.1/lib/analytics/features.c +199 -0
  32. lzgraphs-3.0.1/lib/analytics/pgen_dist.c +259 -0
  33. lzgraphs-3.0.1/lib/core/common.c +91 -0
  34. lzgraphs-3.0.1/lib/core/crc32c.c +33 -0
  35. lzgraphs-3.0.1/lib/core/hash_map.c +173 -0
  36. lzgraphs-3.0.1/lib/core/rng.c +20 -0
  37. lzgraphs-3.0.1/lib/core/string_pool.c +117 -0
  38. lzgraphs-3.0.1/lib/core/wynn.c +72 -0
  39. lzgraphs-3.0.1/lib/graph/csr_graph.c +1854 -0
  40. lzgraphs-3.0.1/lib/graph/edge_builder.c +70 -0
  41. lzgraphs-3.0.1/lib/graph/forward.c +87 -0
  42. lzgraphs-3.0.1/lib/graph/gene_data.c +81 -0
  43. lzgraphs-3.0.1/lib/graph/graph_ops.c +466 -0
  44. lzgraphs-3.0.1/lib/graph/lz76.c +289 -0
  45. lzgraphs-3.0.1/lib/io/io.c +632 -0
  46. lzgraphs-3.0.1/lib/occupancy/occupancy.c +213 -0
  47. lzgraphs-3.0.1/lib/occupancy/sharing.c +199 -0
  48. lzgraphs-3.0.1/lib/posterior/posterior.c +174 -0
  49. lzgraphs-3.0.1/lib/simulation/genomic_simulate.c +267 -0
  50. lzgraphs-3.0.1/lib/simulation/simulate.c +323 -0
  51. {lzgraphs-2.3.1 → lzgraphs-3.0.1}/pyproject.toml +21 -39
  52. lzgraphs-3.0.1/setup.cfg +4 -0
  53. lzgraphs-3.0.1/setup.py +44 -0
  54. lzgraphs-3.0.1/src/LZGraphs/__init__.py +94 -0
  55. lzgraphs-3.0.1/src/LZGraphs/_clzgraph.c +1473 -0
  56. lzgraphs-3.0.1/src/LZGraphs/_errors.py +49 -0
  57. lzgraphs-3.0.1/src/LZGraphs/_graph.py +584 -0
  58. lzgraphs-3.0.1/src/LZGraphs/_io.py +598 -0
  59. lzgraphs-3.0.1/src/LZGraphs/_pgen_dist.py +75 -0
  60. lzgraphs-3.0.1/src/LZGraphs/_simulation_result.py +44 -0
  61. lzgraphs-3.0.1/src/LZGraphs/cli.py +804 -0
  62. lzgraphs-3.0.1/src/LZGraphs.egg-info/PKG-INFO +246 -0
  63. lzgraphs-3.0.1/src/LZGraphs.egg-info/SOURCES.txt +79 -0
  64. lzgraphs-3.0.1/src/LZGraphs.egg-info/entry_points.txt +2 -0
  65. {lzgraphs-2.3.1 → lzgraphs-3.0.1}/src/LZGraphs.egg-info/requires.txt +2 -9
  66. lzgraphs-3.0.1/tests/test_analytics.py +144 -0
  67. lzgraphs-3.0.1/tests/test_cli.py +219 -0
  68. lzgraphs-3.0.1/tests/test_deep_audit.py +258 -0
  69. lzgraphs-3.0.1/tests/test_graph.py +314 -0
  70. lzgraphs-3.0.1/tests/test_io.py +60 -0
  71. lzgraphs-3.0.1/tests/test_known_distributions.py +779 -0
  72. lzgraphs-3.0.1/tests/test_lzpgen.py +55 -0
  73. lzgraphs-3.0.1/tests/test_mathematician_audit.py +278 -0
  74. lzgraphs-3.0.1/tests/test_model_audit.py +301 -0
  75. lzgraphs-3.0.1/tests/test_operations.py +184 -0
  76. lzgraphs-3.0.1/tests/test_simulate.py +88 -0
  77. lzgraphs-3.0.1/tests/test_statistical_validity.py +303 -0
  78. lzgraphs-2.3.1/MANIFEST.in +0 -5
  79. lzgraphs-2.3.1/PKG-INFO +0 -401
  80. lzgraphs-2.3.1/README.md +0 -350
  81. lzgraphs-2.3.1/requirements.txt +0 -5
  82. lzgraphs-2.3.1/setup.cfg +0 -7
  83. lzgraphs-2.3.1/setup.py +0 -40
  84. lzgraphs-2.3.1/src/LZGraphs/__init__.py +0 -203
  85. lzgraphs-2.3.1/src/LZGraphs/_fast_walk.c +0 -321
  86. lzgraphs-2.3.1/src/LZGraphs/bag_of_words/__init__.py +0 -3
  87. lzgraphs-2.3.1/src/LZGraphs/bag_of_words/bow_encoder.py +0 -185
  88. lzgraphs-2.3.1/src/LZGraphs/constants.py +0 -6
  89. lzgraphs-2.3.1/src/LZGraphs/exceptions/__init__.py +0 -550
  90. lzgraphs-2.3.1/src/LZGraphs/graphs/__init__.py +0 -6
  91. lzgraphs-2.3.1/src/LZGraphs/graphs/amino_acid_positional.py +0 -699
  92. lzgraphs-2.3.1/src/LZGraphs/graphs/edge_data.py +0 -222
  93. lzgraphs-2.3.1/src/LZGraphs/graphs/graph_operations.py +0 -106
  94. lzgraphs-2.3.1/src/LZGraphs/graphs/lz_graph_base.py +0 -1066
  95. lzgraphs-2.3.1/src/LZGraphs/graphs/naive.py +0 -343
  96. lzgraphs-2.3.1/src/LZGraphs/graphs/nucleotide_double_positional.py +0 -384
  97. lzgraphs-2.3.1/src/LZGraphs/metrics/__init__.py +0 -80
  98. lzgraphs-2.3.1/src/LZGraphs/metrics/convenience.py +0 -90
  99. lzgraphs-2.3.1/src/LZGraphs/metrics/diversity.py +0 -367
  100. lzgraphs-2.3.1/src/LZGraphs/metrics/entropy.py +0 -1007
  101. lzgraphs-2.3.1/src/LZGraphs/metrics/pgen_distribution.py +0 -351
  102. lzgraphs-2.3.1/src/LZGraphs/metrics/saturation.py +0 -445
  103. lzgraphs-2.3.1/src/LZGraphs/mixins/__init__.py +0 -8
  104. lzgraphs-2.3.1/src/LZGraphs/mixins/bayesian_posterior.py +0 -277
  105. lzgraphs-2.3.1/src/LZGraphs/mixins/gene_logic.py +0 -122
  106. lzgraphs-2.3.1/src/LZGraphs/mixins/gene_prediction.py +0 -215
  107. lzgraphs-2.3.1/src/LZGraphs/mixins/graph_topology.py +0 -59
  108. lzgraphs-2.3.1/src/LZGraphs/mixins/lzpgen_distribution.py +0 -457
  109. lzgraphs-2.3.1/src/LZGraphs/mixins/random_walk.py +0 -151
  110. lzgraphs-2.3.1/src/LZGraphs/mixins/serialization.py +0 -628
  111. lzgraphs-2.3.1/src/LZGraphs/mixins/walk_analysis.py +0 -177
  112. lzgraphs-2.3.1/src/LZGraphs/py.typed +0 -0
  113. lzgraphs-2.3.1/src/LZGraphs/utilities/__init__.py +0 -13
  114. lzgraphs-2.3.1/src/LZGraphs/utilities/decomposition.py +0 -71
  115. lzgraphs-2.3.1/src/LZGraphs/utilities/helpers.py +0 -50
  116. lzgraphs-2.3.1/src/LZGraphs/utilities/misc.py +0 -133
  117. lzgraphs-2.3.1/src/LZGraphs/visualization/__init__.py +0 -18
  118. lzgraphs-2.3.1/src/LZGraphs/visualization/visualize.py +0 -234
  119. lzgraphs-2.3.1/src/LZGraphs.egg-info/PKG-INFO +0 -401
  120. lzgraphs-2.3.1/src/LZGraphs.egg-info/SOURCES.txt +0 -66
  121. lzgraphs-2.3.1/tests/test_aap_lzgraph.py +0 -306
  122. lzgraphs-2.3.1/tests/test_abundance.py +0 -796
  123. lzgraphs-2.3.1/tests/test_analytical_distribution.py +0 -481
  124. lzgraphs-2.3.1/tests/test_base_class_methods.py +0 -320
  125. lzgraphs-2.3.1/tests/test_bow_encoder.py +0 -307
  126. lzgraphs-2.3.1/tests/test_diversity_theory.py +0 -686
  127. lzgraphs-2.3.1/tests/test_flexible_input.py +0 -213
  128. lzgraphs-2.3.1/tests/test_graph_operations.py +0 -264
  129. lzgraphs-2.3.1/tests/test_lzpgen_distribution.py +0 -267
  130. lzgraphs-2.3.1/tests/test_metrics.py +0 -507
  131. lzgraphs-2.3.1/tests/test_naive_lzgraph.py +0 -141
  132. lzgraphs-2.3.1/tests/test_ndp_lzgraph.py +0 -156
  133. lzgraphs-2.3.1/tests/test_new_features.py +0 -396
  134. lzgraphs-2.3.1/tests/test_pgen_fixes.py +0 -225
  135. lzgraphs-2.3.1/tests/test_serialization.py +0 -411
  136. lzgraphs-2.3.1/tests/test_simulate.py +0 -225
  137. lzgraphs-2.3.1/tests/test_utilities.py +0 -296
  138. {lzgraphs-2.3.1 → lzgraphs-3.0.1}/CHANGELOG.md +0 -0
  139. {lzgraphs-2.3.1 → lzgraphs-3.0.1}/CONTRIBUTING.md +0 -0
  140. {lzgraphs-2.3.1 → lzgraphs-3.0.1}/src/LZGraphs.egg-info/dependency_links.txt +0 -0
  141. {lzgraphs-2.3.1 → lzgraphs-3.0.1}/src/LZGraphs.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2021 Tom Chen (tomchen.org)
3
+ Copyright (c) 2021-2026 Thomas Konstantinovsky
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -0,0 +1,7 @@
1
+ include pyproject.toml
2
+ include *.md
3
+ include LICENSE
4
+ include Makefile
5
+ recursive-include include *.h
6
+ recursive-include lib *.c
7
+ recursive-include src/LZGraphs *.c
@@ -0,0 +1,125 @@
1
+ CC = gcc
2
+ CFLAGS = -O2 -Wall -Wextra -std=c11 -Iinclude
3
+ LDFLAGS = -lm
4
+
5
+ # Collect all .c files under lib/
6
+ SRCS = $(shell find lib -name '*.c')
7
+ OBJS = $(patsubst lib/%.c, build/%.o, $(SRCS))
8
+
9
+ .PHONY: all clean test
10
+
11
+ all: build/liblzgraph.a
12
+
13
+ # Static library
14
+ build/liblzgraph.a: $(OBJS)
15
+ ar rcs $@ $^
16
+
17
+ # Compile each .c → .o, mirroring the lib/ directory structure in build/
18
+ build/%.o: lib/%.c | build_dirs
19
+ $(CC) $(CFLAGS) -c $< -o $@
20
+
21
+ # Create build subdirectories matching lib/
22
+ build_dirs:
23
+ @mkdir -p $(sort $(dir $(OBJS)))
24
+
25
+ # Test runners
26
+ test: test_core test_graph test_forward test_simulate test_analytics test_occupancy test_io_posterior test_pgen_dist test_variants test_gene_data test_sharing test_graph_ops test_diversity test_features test_genomic_simulate test_walk_dict
27
+
28
+ test_core: build/test_core
29
+ ./build/test_core
30
+
31
+ test_graph: build/test_graph
32
+ ./build/test_graph
33
+
34
+ build/test_core: tests/c_unit/test_core.c build/liblzgraph.a | build_dirs
35
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
36
+
37
+ build/test_graph: tests/c_unit/test_graph.c build/liblzgraph.a | build_dirs
38
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
39
+
40
+ test_forward: build/test_forward
41
+ ./build/test_forward
42
+
43
+ build/test_forward: tests/c_unit/test_forward.c build/liblzgraph.a | build_dirs
44
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
45
+
46
+ test_simulate: build/test_simulate
47
+ ./build/test_simulate
48
+
49
+ build/test_simulate: tests/c_unit/test_simulate.c build/liblzgraph.a | build_dirs
50
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
51
+
52
+ test_analytics: build/test_analytics
53
+ ./build/test_analytics
54
+
55
+ build/test_analytics: tests/c_unit/test_analytics.c build/liblzgraph.a | build_dirs
56
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
57
+
58
+ test_occupancy: build/test_occupancy
59
+ ./build/test_occupancy
60
+
61
+ build/test_occupancy: tests/c_unit/test_occupancy.c build/liblzgraph.a | build_dirs
62
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
63
+
64
+ test_io_posterior: build/test_io_posterior
65
+ ./build/test_io_posterior
66
+
67
+ build/test_io_posterior: tests/c_unit/test_io_posterior.c build/liblzgraph.a | build_dirs
68
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
69
+
70
+ test_pgen_dist: build/test_pgen_dist
71
+ ./build/test_pgen_dist
72
+
73
+ build/test_pgen_dist: tests/c_unit/test_pgen_dist.c build/liblzgraph.a | build_dirs
74
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
75
+
76
+ test_variants: build/test_variants
77
+ ./build/test_variants
78
+
79
+ build/test_variants: tests/c_unit/test_variants.c build/liblzgraph.a | build_dirs
80
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
81
+
82
+ test_gene_data: build/test_gene_data
83
+ ./build/test_gene_data
84
+
85
+ build/test_gene_data: tests/c_unit/test_gene_data.c build/liblzgraph.a | build_dirs
86
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
87
+
88
+ test_sharing: build/test_sharing
89
+ ./build/test_sharing
90
+
91
+ build/test_sharing: tests/c_unit/test_sharing.c build/liblzgraph.a | build_dirs
92
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
93
+
94
+ test_graph_ops: build/test_graph_ops
95
+ ./build/test_graph_ops
96
+
97
+ build/test_graph_ops: tests/c_unit/test_graph_ops.c build/liblzgraph.a | build_dirs
98
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
99
+
100
+ test_diversity: build/test_diversity
101
+ ./build/test_diversity
102
+
103
+ build/test_diversity: tests/c_unit/test_diversity.c build/liblzgraph.a | build_dirs
104
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
105
+
106
+ test_features: build/test_features
107
+ ./build/test_features
108
+
109
+ build/test_features: tests/c_unit/test_features.c build/liblzgraph.a | build_dirs
110
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
111
+
112
+ test_genomic_simulate: build/test_genomic_simulate
113
+ ./build/test_genomic_simulate
114
+
115
+ build/test_genomic_simulate: tests/c_unit/test_genomic_simulate.c build/liblzgraph.a | build_dirs
116
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
117
+
118
+ test_walk_dict: build/test_walk_dict
119
+ ./build/test_walk_dict
120
+
121
+ build/test_walk_dict: tests/c_unit/test_walk_dict.c build/liblzgraph.a | build_dirs
122
+ $(CC) $(CFLAGS) $< -Lbuild -llzgraph $(LDFLAGS) -o $@
123
+
124
+ clean:
125
+ rm -rf build
@@ -0,0 +1,246 @@
1
+ Metadata-Version: 2.4
2
+ Name: LZGraphs
3
+ Version: 3.0.1
4
+ Summary: High-performance LZ76 compression graphs for immune receptor repertoire analysis
5
+ Author-email: Thomas Konstantinovsky <thomaskon90@gmail.com>
6
+ Maintainer-email: Thomas Konstantinovsky <thomaskon90@gmail.com>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/MuteJester/LZGraphs
9
+ Project-URL: Documentation, https://mutejester.github.io/LZGraphs/
10
+ Project-URL: Repository, https://github.com/MuteJester/LZGraphs
11
+ Project-URL: Issues, https://github.com/MuteJester/LZGraphs/issues
12
+ Project-URL: Changelog, https://github.com/MuteJester/LZGraphs/blob/master/CHANGELOG.md
13
+ Keywords: Graph Theory,Immunology,Analytics,Biology,T-cell,Repertoire,CDR3,Bioinformatics,LZ76,Lempel-Ziv
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
19
+ Classifier: License :: OSI Approved :: MIT License
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.9
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
25
+ Classifier: Programming Language :: C
26
+ Classifier: Typing :: Typed
27
+ Requires-Python: >=3.9
28
+ Description-Content-Type: text/markdown
29
+ License-File: LICENSE
30
+ Requires-Dist: numpy>=1.20
31
+ Provides-Extra: viz
32
+ Requires-Dist: matplotlib>=3.7; extra == "viz"
33
+ Requires-Dist: seaborn>=0.12; extra == "viz"
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest>=7.0; extra == "dev"
36
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
37
+ Requires-Dist: scipy>=1.9; extra == "dev"
38
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
39
+ Requires-Dist: build>=1.0; extra == "dev"
40
+ Provides-Extra: docs
41
+ Requires-Dist: mkdocs-material>=9.5; extra == "docs"
42
+ Requires-Dist: mkdocstrings[python]>=0.24; extra == "docs"
43
+ Dynamic: license-file
44
+
45
+ <p align="center">
46
+ <a href="https://github.com/MuteJester/LZGraphs">
47
+ <img src="docs/images/lzglogo2.png" alt="LZGraphs" width="400">
48
+ </a>
49
+ </p>
50
+
51
+ <p align="center">
52
+ <strong>High-performance LZ76 compression graphs for immune receptor repertoire analysis</strong>
53
+ </p>
54
+
55
+ <p align="center">
56
+ <a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/v/LZGraphs.svg" alt="PyPI"></a>
57
+ <a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/pyversions/LZGraphs.svg" alt="Python"></a>
58
+ <a href="https://github.com/MuteJester/LZGraphs/blob/master/LICENSE"><img src="https://img.shields.io/github/license/MuteJester/LZGraphs.svg" alt="License"></a>
59
+ <a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/dm/LZGraphs.svg" alt="Downloads"></a>
60
+ <a href="https://github.com/MuteJester/LZGraphs/stargazers"><img src="https://img.shields.io/github/stars/MuteJester/LZGraphs.svg" alt="Stars"></a>
61
+ </p>
62
+
63
+ <p align="center">
64
+ <a href="https://MuteJester.github.io/LZGraphs/"><strong>Documentation</strong></a> &nbsp;&middot;&nbsp;
65
+ <a href="https://MuteJester.github.io/LZGraphs/getting-started/quickstart/"><strong>Quick Start</strong></a> &nbsp;&middot;&nbsp;
66
+ <a href="https://MuteJester.github.io/LZGraphs/api/lzgraph/"><strong>API Reference</strong></a> &nbsp;&middot;&nbsp;
67
+ <a href="https://github.com/MuteJester/LZGraphs/issues">Report Bug</a>
68
+ </p>
69
+
70
+ ---
71
+
72
+ **LZGraphs** is a Python library that transforms T-cell and B-cell receptor CDR3 sequences into probabilistic directed graphs using the Lempel-Ziv 76 compression algorithm. Built on a C core with Python bindings, it provides:
73
+
74
+ - **Exact generation probabilities** for any CDR3 sequence
75
+ - **LZ76-constrained sequence simulation** that guarantees valid outputs
76
+ - **Analytical diversity metrics** (Hill numbers, richness predictions, sharing spectra)
77
+ - **Graph algebra** (union, intersection, difference) for repertoire comparison
78
+ - **ML feature extraction** with fixed-size vectors for classification pipelines
79
+ - **Bayesian posterior personalization** to adapt population models to individuals
80
+ - **A CLI tool** (`lzg`) for terminal-based analysis
81
+
82
+ ## Quick Start
83
+
84
+ ```bash
85
+ pip install LZGraphs
86
+ ```
87
+
88
+ ```python
89
+ from LZGraphs import LZGraph
90
+
91
+ # Build a graph from CDR3 amino acid sequences
92
+ graph = LZGraph(
93
+ ['CASSLEPSGGTDTQYF', 'CASSDTSGGTDTQYF', 'CASSLEPQTFTDTFFF',
94
+ 'CASSLGQGSTEAFF', 'CASSLGIRRT'],
95
+ variant='aap',
96
+ )
97
+
98
+ # Score a sequence
99
+ log_p = graph.lzpgen('CASSLEPSGGTDTQYF')
100
+ print(f"log P(gen) = {log_p:.2f}")
101
+
102
+ # Simulate new sequences
103
+ result = graph.simulate(1000, seed=42)
104
+ print(f"Generated {len(result)} sequences")
105
+
106
+ # Diversity
107
+ print(f"D(1) = {graph.effective_diversity():.1f}")
108
+ print(f"D(2) = {graph.hill_number(2):.1f}")
109
+ ```
110
+
111
+ ### With gene annotation
112
+
113
+ ```python
114
+ graph = LZGraph(
115
+ sequences,
116
+ variant='aap',
117
+ v_genes=['TRBV16-1*01', 'TRBV1-1*01', ...],
118
+ j_genes=['TRBJ1-2*01', 'TRBJ1-5*01', ...],
119
+ )
120
+
121
+ # Gene-constrained simulation
122
+ result = graph.simulate(100, sample_genes=True, seed=42)
123
+ print(result.v_genes[0], result.j_genes[0])
124
+ ```
125
+
126
+ ### Command line
127
+
128
+ ```bash
129
+ lzg build repertoire.tsv -o rep.lzg
130
+ lzg score rep.lzg sequences.txt
131
+ lzg diversity rep.lzg
132
+ lzg simulate rep.lzg -n 10000 --seed 42
133
+ lzg compare healthy.lzg disease.lzg
134
+ ```
135
+
136
+ ## Graph Variants
137
+
138
+ One unified `LZGraph` class with three encoding schemes:
139
+
140
+ | Variant | Input | Node format | Best for |
141
+ |---------|-------|-------------|----------|
142
+ | `'aap'` | Amino acid CDR3 | `C_2`, `SL_6` | Most TCR/BCR analysis |
143
+ | `'ndp'` | Nucleotide CDR3 | `TG0_4` | Nucleotide-level analysis |
144
+ | `'naive'` | Any strings | `C`, `SL` | Motif discovery, ML features |
145
+
146
+ ## Key Capabilities
147
+
148
+ ### Scoring & Simulation
149
+
150
+ ```python
151
+ # Log-probability of a sequence
152
+ graph.lzpgen('CASSLEPSGGTDTQYF') # single
153
+ graph.lzpgen(['seq1', 'seq2', 'seq3']) # batch → np.ndarray
154
+
155
+ # Simulate with optional gene constraints
156
+ result = graph.simulate(1000, seed=42)
157
+ result = graph.simulate(100, v_gene='TRBV5-1*01', j_gene='TRBJ2-7*01')
158
+ ```
159
+
160
+ ### Diversity & Analytics
161
+
162
+ ```python
163
+ graph.effective_diversity() # exp(Shannon entropy)
164
+ graph.hill_number(2) # inverse Simpson
165
+ graph.hill_numbers([0, 1, 2, 5]) # multiple orders → np.ndarray
166
+ graph.predicted_richness(100_000) # expected unique seqs at depth
167
+ graph.predicted_overlap(10000, 50000)# expected shared sequences
168
+ graph.pgen_distribution() # analytical Gaussian mixture
169
+ ```
170
+
171
+ ### Graph Algebra
172
+
173
+ ```python
174
+ combined = graph_a | graph_b # union
175
+ shared = graph_a & graph_b # intersection
176
+ unique_a = graph_a - graph_b # difference
177
+ personal = population.posterior(patient_seqs, kappa=10.0) # Bayesian update
178
+ ```
179
+
180
+ ### Repertoire Comparison
181
+
182
+ ```python
183
+ from LZGraphs import jensen_shannon_divergence
184
+ jsd = jensen_shannon_divergence(graph_a, graph_b) # 0 = identical, 1 = max different
185
+ ```
186
+
187
+ ### ML Feature Extraction
188
+
189
+ ```python
190
+ # Project any repertoire into a fixed reference space
191
+ features = reference.feature_aligned(LZGraph(sample_seqs, variant='aap'))
192
+ stats = graph.feature_stats() # 15-element summary vector
193
+ profile = graph.feature_mass_profile() # position-based mass distribution
194
+ ```
195
+
196
+ ### Serialization
197
+
198
+ ```python
199
+ graph.save('repertoire.lzg') # fast binary format
200
+ loaded = LZGraph.load('repertoire.lzg')
201
+ ```
202
+
203
+ ## Documentation
204
+
205
+ Full documentation with tutorials, concept guides, and API reference:
206
+
207
+ **[https://MuteJester.github.io/LZGraphs/](https://MuteJester.github.io/LZGraphs/)**
208
+
209
+ - [Quick Start](https://MuteJester.github.io/LZGraphs/getting-started/quickstart/) — build your first graph in 5 minutes
210
+ - [Tutorials](https://MuteJester.github.io/LZGraphs/tutorials/) — graph construction, sequence analysis, diversity metrics
211
+ - [API Reference](https://MuteJester.github.io/LZGraphs/api/lzgraph/) — complete class and function reference
212
+ - [CLI Reference](https://MuteJester.github.io/LZGraphs/api/cli/) — terminal tool documentation
213
+
214
+ ## Citation
215
+
216
+ If you use LZGraphs in your research, please cite:
217
+
218
+ ```bibtex
219
+ @article{konstantinovsky2023lzgraphs,
220
+ title={A Novel Approach to T-Cell Receptor Beta Chain (TCRB) Repertoire Encoding
221
+ Using Lossless String Compression},
222
+ author={Konstantinovsky, Thomas and Nagar, Maor and Louzoun, Yoram},
223
+ journal={Bioinformatics},
224
+ year={2023},
225
+ publisher={Oxford University Press}
226
+ }
227
+ ```
228
+
229
+ ## Contributing
230
+
231
+ Contributions are welcome. Please open an issue or submit a pull request.
232
+
233
+ 1. Fork the repository
234
+ 2. Create a feature branch (`git checkout -b feature/my-feature`)
235
+ 3. Commit your changes
236
+ 4. Push and open a Pull Request
237
+
238
+ ## License
239
+
240
+ MIT License. See [LICENSE](LICENSE) for details.
241
+
242
+ ## Contact
243
+
244
+ Thomas Konstantinovsky — [thomaskon90@gmail.com](mailto:thomaskon90@gmail.com)
245
+
246
+ [GitHub](https://github.com/MuteJester/LZGraphs) · [PyPI](https://pypi.org/project/LZGraphs/) · [Documentation](https://MuteJester.github.io/LZGraphs/)
@@ -0,0 +1,202 @@
1
+ <p align="center">
2
+ <a href="https://github.com/MuteJester/LZGraphs">
3
+ <img src="docs/images/lzglogo2.png" alt="LZGraphs" width="400">
4
+ </a>
5
+ </p>
6
+
7
+ <p align="center">
8
+ <strong>High-performance LZ76 compression graphs for immune receptor repertoire analysis</strong>
9
+ </p>
10
+
11
+ <p align="center">
12
+ <a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/v/LZGraphs.svg" alt="PyPI"></a>
13
+ <a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/pyversions/LZGraphs.svg" alt="Python"></a>
14
+ <a href="https://github.com/MuteJester/LZGraphs/blob/master/LICENSE"><img src="https://img.shields.io/github/license/MuteJester/LZGraphs.svg" alt="License"></a>
15
+ <a href="https://pypi.org/project/LZGraphs/"><img src="https://img.shields.io/pypi/dm/LZGraphs.svg" alt="Downloads"></a>
16
+ <a href="https://github.com/MuteJester/LZGraphs/stargazers"><img src="https://img.shields.io/github/stars/MuteJester/LZGraphs.svg" alt="Stars"></a>
17
+ </p>
18
+
19
+ <p align="center">
20
+ <a href="https://MuteJester.github.io/LZGraphs/"><strong>Documentation</strong></a> &nbsp;&middot;&nbsp;
21
+ <a href="https://MuteJester.github.io/LZGraphs/getting-started/quickstart/"><strong>Quick Start</strong></a> &nbsp;&middot;&nbsp;
22
+ <a href="https://MuteJester.github.io/LZGraphs/api/lzgraph/"><strong>API Reference</strong></a> &nbsp;&middot;&nbsp;
23
+ <a href="https://github.com/MuteJester/LZGraphs/issues">Report Bug</a>
24
+ </p>
25
+
26
+ ---
27
+
28
+ **LZGraphs** is a Python library that transforms T-cell and B-cell receptor CDR3 sequences into probabilistic directed graphs using the Lempel-Ziv 76 compression algorithm. Built on a C core with Python bindings, it provides:
29
+
30
+ - **Exact generation probabilities** for any CDR3 sequence
31
+ - **LZ76-constrained sequence simulation** that guarantees valid outputs
32
+ - **Analytical diversity metrics** (Hill numbers, richness predictions, sharing spectra)
33
+ - **Graph algebra** (union, intersection, difference) for repertoire comparison
34
+ - **ML feature extraction** with fixed-size vectors for classification pipelines
35
+ - **Bayesian posterior personalization** to adapt population models to individuals
36
+ - **A CLI tool** (`lzg`) for terminal-based analysis
37
+
38
+ ## Quick Start
39
+
40
+ ```bash
41
+ pip install LZGraphs
42
+ ```
43
+
44
+ ```python
45
+ from LZGraphs import LZGraph
46
+
47
+ # Build a graph from CDR3 amino acid sequences
48
+ graph = LZGraph(
49
+ ['CASSLEPSGGTDTQYF', 'CASSDTSGGTDTQYF', 'CASSLEPQTFTDTFFF',
50
+ 'CASSLGQGSTEAFF', 'CASSLGIRRT'],
51
+ variant='aap',
52
+ )
53
+
54
+ # Score a sequence
55
+ log_p = graph.lzpgen('CASSLEPSGGTDTQYF')
56
+ print(f"log P(gen) = {log_p:.2f}")
57
+
58
+ # Simulate new sequences
59
+ result = graph.simulate(1000, seed=42)
60
+ print(f"Generated {len(result)} sequences")
61
+
62
+ # Diversity
63
+ print(f"D(1) = {graph.effective_diversity():.1f}")
64
+ print(f"D(2) = {graph.hill_number(2):.1f}")
65
+ ```
66
+
67
+ ### With gene annotation
68
+
69
+ ```python
70
+ graph = LZGraph(
71
+ sequences,
72
+ variant='aap',
73
+ v_genes=['TRBV16-1*01', 'TRBV1-1*01', ...],
74
+ j_genes=['TRBJ1-2*01', 'TRBJ1-5*01', ...],
75
+ )
76
+
77
+ # Gene-constrained simulation
78
+ result = graph.simulate(100, sample_genes=True, seed=42)
79
+ print(result.v_genes[0], result.j_genes[0])
80
+ ```
81
+
82
+ ### Command line
83
+
84
+ ```bash
85
+ lzg build repertoire.tsv -o rep.lzg
86
+ lzg score rep.lzg sequences.txt
87
+ lzg diversity rep.lzg
88
+ lzg simulate rep.lzg -n 10000 --seed 42
89
+ lzg compare healthy.lzg disease.lzg
90
+ ```
91
+
92
+ ## Graph Variants
93
+
94
+ One unified `LZGraph` class with three encoding schemes:
95
+
96
+ | Variant | Input | Node format | Best for |
97
+ |---------|-------|-------------|----------|
98
+ | `'aap'` | Amino acid CDR3 | `C_2`, `SL_6` | Most TCR/BCR analysis |
99
+ | `'ndp'` | Nucleotide CDR3 | `TG0_4` | Nucleotide-level analysis |
100
+ | `'naive'` | Any strings | `C`, `SL` | Motif discovery, ML features |
101
+
102
+ ## Key Capabilities
103
+
104
+ ### Scoring & Simulation
105
+
106
+ ```python
107
+ # Log-probability of a sequence
108
+ graph.lzpgen('CASSLEPSGGTDTQYF') # single
109
+ graph.lzpgen(['seq1', 'seq2', 'seq3']) # batch → np.ndarray
110
+
111
+ # Simulate with optional gene constraints
112
+ result = graph.simulate(1000, seed=42)
113
+ result = graph.simulate(100, v_gene='TRBV5-1*01', j_gene='TRBJ2-7*01')
114
+ ```
115
+
116
+ ### Diversity & Analytics
117
+
118
+ ```python
119
+ graph.effective_diversity() # exp(Shannon entropy)
120
+ graph.hill_number(2) # inverse Simpson
121
+ graph.hill_numbers([0, 1, 2, 5]) # multiple orders → np.ndarray
122
+ graph.predicted_richness(100_000) # expected unique seqs at depth
123
+ graph.predicted_overlap(10000, 50000)# expected shared sequences
124
+ graph.pgen_distribution() # analytical Gaussian mixture
125
+ ```
126
+
127
+ ### Graph Algebra
128
+
129
+ ```python
130
+ combined = graph_a | graph_b # union
131
+ shared = graph_a & graph_b # intersection
132
+ unique_a = graph_a - graph_b # difference
133
+ personal = population.posterior(patient_seqs, kappa=10.0) # Bayesian update
134
+ ```
135
+
136
+ ### Repertoire Comparison
137
+
138
+ ```python
139
+ from LZGraphs import jensen_shannon_divergence
140
+ jsd = jensen_shannon_divergence(graph_a, graph_b) # 0 = identical, 1 = max different
141
+ ```
142
+
143
+ ### ML Feature Extraction
144
+
145
+ ```python
146
+ # Project any repertoire into a fixed reference space
147
+ features = reference.feature_aligned(LZGraph(sample_seqs, variant='aap'))
148
+ stats = graph.feature_stats() # 15-element summary vector
149
+ profile = graph.feature_mass_profile() # position-based mass distribution
150
+ ```
151
+
152
+ ### Serialization
153
+
154
+ ```python
155
+ graph.save('repertoire.lzg') # fast binary format
156
+ loaded = LZGraph.load('repertoire.lzg')
157
+ ```
158
+
159
+ ## Documentation
160
+
161
+ Full documentation with tutorials, concept guides, and API reference:
162
+
163
+ **[https://MuteJester.github.io/LZGraphs/](https://MuteJester.github.io/LZGraphs/)**
164
+
165
+ - [Quick Start](https://MuteJester.github.io/LZGraphs/getting-started/quickstart/) — build your first graph in 5 minutes
166
+ - [Tutorials](https://MuteJester.github.io/LZGraphs/tutorials/) — graph construction, sequence analysis, diversity metrics
167
+ - [API Reference](https://MuteJester.github.io/LZGraphs/api/lzgraph/) — complete class and function reference
168
+ - [CLI Reference](https://MuteJester.github.io/LZGraphs/api/cli/) — terminal tool documentation
169
+
170
+ ## Citation
171
+
172
+ If you use LZGraphs in your research, please cite:
173
+
174
+ ```bibtex
175
+ @article{konstantinovsky2023lzgraphs,
176
+ title={A Novel Approach to T-Cell Receptor Beta Chain (TCRB) Repertoire Encoding
177
+ Using Lossless String Compression},
178
+ author={Konstantinovsky, Thomas and Nagar, Maor and Louzoun, Yoram},
179
+ journal={Bioinformatics},
180
+ year={2023},
181
+ publisher={Oxford University Press}
182
+ }
183
+ ```
184
+
185
+ ## Contributing
186
+
187
+ Contributions are welcome. Please open an issue or submit a pull request.
188
+
189
+ 1. Fork the repository
190
+ 2. Create a feature branch (`git checkout -b feature/my-feature`)
191
+ 3. Commit your changes
192
+ 4. Push and open a Pull Request
193
+
194
+ ## License
195
+
196
+ MIT License. See [LICENSE](LICENSE) for details.
197
+
198
+ ## Contact
199
+
200
+ Thomas Konstantinovsky — [thomaskon90@gmail.com](mailto:thomaskon90@gmail.com)
201
+
202
+ [GitHub](https://github.com/MuteJester/LZGraphs) · [PyPI](https://pypi.org/project/LZGraphs/) · [Documentation](https://MuteJester.github.io/LZGraphs/)