msdrg 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. msdrg-0.1.0/LICENSE +21 -0
  2. msdrg-0.1.0/MANIFEST.in +10 -0
  3. msdrg-0.1.0/PKG-INFO +311 -0
  4. msdrg-0.1.0/README.md +289 -0
  5. msdrg-0.1.0/data/bin/base_drg_descriptions.bin +0 -0
  6. msdrg-0.1.0/data/bin/cluster_info.bin +0 -0
  7. msdrg-0.1.0/data/bin/cluster_map.bin +0 -0
  8. msdrg-0.1.0/data/bin/diagnosis.bin +0 -0
  9. msdrg-0.1.0/data/bin/drg_descriptions.bin +0 -0
  10. msdrg-0.1.0/data/bin/drg_formulas.bin +0 -0
  11. msdrg-0.1.0/data/bin/dx_patterns.bin +0 -0
  12. msdrg-0.1.0/data/bin/exclusion_groups.bin +0 -0
  13. msdrg-0.1.0/data/bin/exclusion_ids.bin +0 -0
  14. msdrg-0.1.0/data/bin/gender_mdcs.bin +0 -0
  15. msdrg-0.1.0/data/bin/hac_descriptions.bin +0 -0
  16. msdrg-0.1.0/data/bin/hac_formulas.bin +0 -0
  17. msdrg-0.1.0/data/bin/hac_operands.bin +0 -0
  18. msdrg-0.1.0/data/bin/mdc_descriptions.bin +0 -0
  19. msdrg-0.1.0/data/bin/pr_patterns.bin +0 -0
  20. msdrg-0.1.0/data/bin/procedure_attributes.bin +0 -0
  21. msdrg-0.1.0/msdrg/__init__.py +26 -0
  22. msdrg-0.1.0/msdrg/grouper.py +238 -0
  23. msdrg-0.1.0/msdrg.egg-info/PKG-INFO +311 -0
  24. msdrg-0.1.0/msdrg.egg-info/SOURCES.txt +67 -0
  25. msdrg-0.1.0/msdrg.egg-info/dependency_links.txt +1 -0
  26. msdrg-0.1.0/msdrg.egg-info/top_level.txt +1 -0
  27. msdrg-0.1.0/pyproject.toml +38 -0
  28. msdrg-0.1.0/scripts/analyze_clusters.py +63 -0
  29. msdrg-0.1.0/scripts/build_wheels.py +325 -0
  30. msdrg-0.1.0/scripts/compile_clusters.py +185 -0
  31. msdrg-0.1.0/scripts/compile_descriptions.py +69 -0
  32. msdrg-0.1.0/scripts/compile_diagnosis.py +108 -0
  33. msdrg-0.1.0/scripts/compile_drg_formulas.py +153 -0
  34. msdrg-0.1.0/scripts/compile_exclusion_groups.py +58 -0
  35. msdrg-0.1.0/scripts/compile_gender_mdcs.py +62 -0
  36. msdrg-0.1.0/scripts/compile_hac.py +183 -0
  37. msdrg-0.1.0/scripts/compile_patterns.py +108 -0
  38. msdrg-0.1.0/scripts/compile_simple_maps.py +63 -0
  39. msdrg-0.1.0/scripts/extract_data.py +252 -0
  40. msdrg-0.1.0/scripts/import_to_sqlite.py +92 -0
  41. msdrg-0.1.0/scripts/setup_data.sh +42 -0
  42. msdrg-0.1.0/setup.cfg +4 -0
  43. msdrg-0.1.0/setup.py +198 -0
  44. msdrg-0.1.0/zig_src/build.zig +54 -0
  45. msdrg-0.1.0/zig_src/main.zig +62 -0
  46. msdrg-0.1.0/zig_src/src/c_api.zig +276 -0
  47. msdrg-0.1.0/zig_src/src/chain.zig +126 -0
  48. msdrg-0.1.0/zig_src/src/cluster.zig +322 -0
  49. msdrg-0.1.0/zig_src/src/code_map.zig +151 -0
  50. msdrg-0.1.0/zig_src/src/common.zig +135 -0
  51. msdrg-0.1.0/zig_src/src/description.zig +145 -0
  52. msdrg-0.1.0/zig_src/src/diagnosis.zig +168 -0
  53. msdrg-0.1.0/zig_src/src/exclusion.zig +137 -0
  54. msdrg-0.1.0/zig_src/src/final_grouping_test.zig +177 -0
  55. msdrg-0.1.0/zig_src/src/formula.zig +503 -0
  56. msdrg-0.1.0/zig_src/src/gender.zig +155 -0
  57. msdrg-0.1.0/zig_src/src/grouping.zig +892 -0
  58. msdrg-0.1.0/zig_src/src/grouping_test.zig +457 -0
  59. msdrg-0.1.0/zig_src/src/hac.zig +706 -0
  60. msdrg-0.1.0/zig_src/src/integration_test.zig +422 -0
  61. msdrg-0.1.0/zig_src/src/json_api.zig +189 -0
  62. msdrg-0.1.0/zig_src/src/marking.zig +1291 -0
  63. msdrg-0.1.0/zig_src/src/marking_test.zig +270 -0
  64. msdrg-0.1.0/zig_src/src/models.zig +477 -0
  65. msdrg-0.1.0/zig_src/src/msdrg.zig +352 -0
  66. msdrg-0.1.0/zig_src/src/msdrg_data.zig +130 -0
  67. msdrg-0.1.0/zig_src/src/pattern.zig +129 -0
  68. msdrg-0.1.0/zig_src/src/preprocess.zig +547 -0
  69. msdrg-0.1.0/zig_src/src/preprocess_test.zig +466 -0
msdrg-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 LibrePPS
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,10 @@
1
+ include LICENSE
2
+ include README.md
3
+ include pyproject.toml
4
+ include setup.py
5
+ recursive-include msdrg *.py
6
+ recursive-include msdrg/data *.bin
7
+ recursive-include zig_src *.zig build.zig
8
+ recursive-include scripts *.py *.sh
9
+ recursive-include data/bin *.bin
10
+ recursive-exclude msdrg/_lib *
msdrg-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,311 @@
1
+ Metadata-Version: 2.4
2
+ Name: msdrg
3
+ Version: 0.1.0
4
+ Summary: High-performance MS-DRG (Medicare Severity Diagnosis Related Groups) grouper
5
+ Author: MZ-DRG Contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Bedrock-Billing/mz-drg
8
+ Project-URL: Repository, https://github.com/Bedrock-Billing/mz-drg
9
+ Keywords: drg,ms-drg,grouper,healthcare,medicare
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Healthcare Industry
12
+ Classifier: Operating System :: POSIX :: Linux
13
+ Classifier: Operating System :: MacOS :: MacOS X
14
+ Classifier: Operating System :: Microsoft :: Windows
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Zig
17
+ Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Dynamic: license-file
22
+
23
+ # mz-drg
24
+
25
+ **A high-performance MS-DRG grouper written in Zig with Python bindings.**
26
+
27
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
28
+ [![Zig](https://img.shields.io/badge/Zig-0.16-orange.svg)](https://ziglang.org)
29
+ [![Python](https://img.shields.io/badge/Python-3.11+-green.svg)](https://python.org)
30
+
31
+ ---
32
+
33
+ mz-drg is an open-source reimplementation of the CMS MS-DRG (Medicare Severity Diagnosis Related Groups) classification engine, written in [Zig](https://ziglang.org) and callable from Python. It takes patient claim data — diagnoses, procedures, demographics — and assigns the appropriate DRG, MDC, severity, and return codes.
34
+
35
+ **It is validated against 50,000+ claims against the reference Java grouper with a 100% match rate.**
36
+
37
+ ## Why mz-drg?
38
+
39
+ The official CMS MS-DRG grouper is a Java application. While accurate, it comes with practical limitations:
40
+
41
+ | | Java Grouper | mz-drg |
42
+ |---|---|---|
43
+ | **Startup** | JVM warmup, seconds | Instant |
44
+ | **Throughput (tested on a Ryzen 5 5600U laptop)** | ~500 claims/sec | ~7,000+ claims/sec |
45
+ | **Memory** | JVM heap overhead | Minimal, memory-mapped data |
46
+ | **Dependencies** | JRE 17+, classpath management | Single shared library |
47
+ | **Python integration** | JPype bridge (fragile) | Native ctypes (simple) |
48
+ | **Embedding** | Requires JVM process | C ABI, any language |
49
+
50
+ mz-drg is not a black-box reimplementation. The grouping logic — preprocessing, exclusion handling, diagnosis clustering, severity assignment, formula evaluation, rerouting, marking, and final grouping — is ported line-by-line from the decompiled Java source and validated claim-by-claim against the original.
51
+
52
+ ## Quick start
53
+
54
+ ### Install
55
+
56
+ ```bash
57
+ pip install msdrg
58
+ ```
59
+
60
+ > **Requires Zig 0.16+** at build time. Install from [ziglang.org/download](https://ziglang.org/download/) or set the `ZIG` environment variable to point to your zig binary.
61
+
62
+ ### Use
63
+
64
+ ```python
65
+ import msdrg
66
+
67
+ with msdrg.MsdrgGrouper() as grouper:
68
+ result = grouper.group({
69
+ "version": 431,
70
+ "age": 65,
71
+ "sex": 0,
72
+ "discharge_status": 1,
73
+ "pdx": {"code": "I5020"},
74
+ "sdx": [{"code": "E1165"}],
75
+ "procedures": []
76
+ })
77
+
78
+ print(result["final_drg"]) # 293
79
+ print(result["final_mdc"]) # 5
80
+ print(result["final_drg_description"]) # "Heart Failure and Shock without CC/MCC"
81
+ ```
82
+
83
+ ### Helper function
84
+
85
+ ```python
86
+ import msdrg
87
+
88
+ claim = msdrg.create_claim(
89
+ version=431,
90
+ age=65,
91
+ sex=0,
92
+ discharge_status=1,
93
+ pdx="I5020",
94
+ sdx=["E1165", "I10"],
95
+ procedures=["02703DZ"],
96
+ )
97
+
98
+ with msdrg.MsdrgGrouper() as g:
99
+ result = g.group(claim)
100
+ ```
101
+
102
+ ## Input format
103
+
104
+ The `group()` method accepts a dictionary:
105
+
106
+ ```python
107
+ {
108
+ "version": 431, # MS-DRG version (e.g. 400, 410, 421, 431)
109
+ "age": 65, # Patient age in years
110
+ "sex": 0, # 0=Male, 1=Female, 2=Unknown
111
+ "discharge_status": 1, # 1=Home/Self Care, 20=Died
112
+ "pdx": { # Principal diagnosis (required)
113
+ "code": "I5020",
114
+ "poa": "Y" # Present on Admission: Y/N/U/W (optional)
115
+ },
116
+ "admit_dx": { # Admission diagnosis (optional)
117
+ "code": "R0602"
118
+ },
119
+ "sdx": [ # Secondary diagnoses (optional)
120
+ {"code": "E1165", "poa": "Y"},
121
+ {"code": "I10", "poa": "Y"}
122
+ ],
123
+ "procedures": [ # Procedure codes (optional)
124
+ {"code": "02703DZ"}
125
+ ]
126
+ }
127
+ ```
128
+
129
+ ## Output format
130
+
131
+ `group()` returns a dictionary:
132
+
133
+ ```python
134
+ {
135
+ "initial_drg": 293,
136
+ "final_drg": 293,
137
+ "initial_mdc": 5,
138
+ "final_mdc": 5,
139
+ "initial_drg_description": "Heart Failure and Shock without CC/MCC",
140
+ "final_drg_description": "Heart Failure and Shock without CC/MCC",
141
+ "initial_mdc_description": "Diseases and Disorders of the Circulatory System",
142
+ "final_mdc_description": "Diseases and Disorders of the Circulatory System",
143
+ "return_code": "OK",
144
+ "pdx_output": {
145
+ "code": "I5020",
146
+ "mdc": 5,
147
+ "severity": "CC",
148
+ "drg_impact": "BOTH",
149
+ "poa_error": "POA_NOT_CHECKED",
150
+ "flags": ["VALID", "MARKED_FOR_INITIAL", "MARKED_FOR_FINAL"]
151
+ },
152
+ "sdx_output": [...],
153
+ "proc_output": [...]
154
+ }
155
+ ```
156
+
157
+ ## Supported DRG versions
158
+
159
+ | Version | CMS Fiscal Year |
160
+ |---------|----------------|
161
+ | 400 | FY 2023 (Oct 2022 – Apr 2023) |
162
+ | 401 | FY 2023 (Apr 2023 – Sep 2023) |
163
+ | 410 | FY 2024 (Oct 2023 – Apr 2024) |
164
+ | 411 | FY 2024 (Apr 2024 – Sep 2024) |
165
+ | 420 | FY 2025 (Oct 2024 – Apr 2025) |
166
+ | 421 | FY 2025 (Apr 2025 – Sep 2025) |
167
+ | 430 | FY 2026 (Oct 2025 – Apr 2026) |
168
+ | 431 | FY 2026 (Apr 2026 – Sep 2026) |
169
+
170
+ Pass the version number in the claim's `version` field.
171
+
172
+ ## Architecture
173
+
174
+ ```
175
+ ┌─────────────────────────────────────────────────┐
176
+ │ Python (msdrg) │
177
+ │ ctypes ──► C API (c_api.zig) │
178
+ │ │ │
179
+ │ ▼ │
180
+ │ GrouperChain (data loader + version router) │
181
+ │ │ │
182
+ │ ▼ │
183
+ │ Chain of Links: │
184
+ │ ┌──────────────────────────────────────────┐ │
185
+ │ │ Preprocess → Exclusions → Grouping │ │
186
+ │ │ ↓ ↓ ↓ │ │
187
+ │ │ Attributes Cluster Map Formulas │ │
188
+ │ │ ↓ ↓ ↓ │ │
189
+ │ │ Diagnosis Marking Final DRG │ │
190
+ │ └──────────────────────────────────────────┘ │
191
+ │ │ │
192
+ │ ▼ │
193
+ │ Memory-mapped binary data (16 .bin files) │
194
+ └─────────────────────────────────────────────────┘
195
+ ```
196
+
197
+ The grouper loads 16 precompiled binary data files at startup (diagnosis definitions, DRG formulas, cluster maps, exclusion groups, etc.) via memory mapping. The grouping pipeline is a chain of composable processors, each transforming the claim context. This design mirrors the original Java architecture for validation purposes.
198
+
199
+ ## Building from source
200
+
201
+ ### Prerequisites
202
+
203
+ - **Zig 0.16+** — [download](https://ziglang.org/download/) or via package manager
204
+ - **Python 3.11+**
205
+ - **uv** (recommended) or **pip**
206
+
207
+ ### Setup
208
+
209
+ ```bash
210
+ git clone https://github.com/Bedrock-Billing/mz-drg.git
211
+ cd mz-drg
212
+
213
+ # Create venv and install
214
+ python3 -m venv .venv
215
+ source .venv/bin/activate
216
+ pip install -e .
217
+ ```
218
+
219
+ This compiles the Zig shared library and bundles the data files into the Python package.
220
+
221
+ ### Run tests
222
+
223
+ ```bash
224
+ # Zig unit tests (27 tests)
225
+ cd zig_src && zig build test
226
+
227
+ # Python smoke test
228
+ python -c "import msdrg; print(msdrg.MsdrgGrouper().group({'version': 431, 'age': 65, 'sex': 0, 'discharge_status': 1, 'pdx': {'code': 'I5020'}, 'sdx': [], 'procedures': []}))"
229
+ ```
230
+
231
+ ### Data pipeline
232
+
233
+ The binary data files (`data/bin/*.bin`) are prebuilt and included in the repository. To regenerate them from the raw CMS CSVs:
234
+
235
+ ```bash
236
+ bash scripts/setup_data.sh
237
+ ```
238
+
239
+ This runs extract → import → compile → zig build in sequence. See `scripts/` for individual steps.
240
+
241
+ ## Comparison testing
242
+
243
+ The `tests/` directory contains tools for validating mz-drg against the reference Java grouper.
244
+
245
+ ```bash
246
+ # Generate random test claims
247
+ python tests/generate_test_claims.py --count 1000 --out tests/claims.json
248
+
249
+ # Compare Java vs Zig output
250
+ python tests/compare_groupers.py --file tests/claims.json
251
+
252
+ # Benchmark both
253
+ python tests/compare_groupers.py --file tests/claims.json --benchmark
254
+ ```
255
+
256
+ > The Java comparison requires JDK 17+ and the reference JARs in `jars/`. This is only needed for validation — the Python package itself has no Java dependency.
257
+
258
+ ## C API
259
+
260
+ mz-drg exposes a C ABI for integration with any language. See `zig_src/src/c_api.zig` for the full API.
261
+
262
+ ```c
263
+ // Initialize (loads all data, pre-builds chains)
264
+ void* ctx = msdrg_context_init("/path/to/data/bin");
265
+
266
+ // Group a claim via JSON
267
+ const char* result_json = msdrg_group_json(ctx, "{\"version\":431,...}");
268
+
269
+ // Free
270
+ msdrg_string_free(result_json);
271
+ msdrg_context_free(ctx);
272
+ ```
273
+
274
+ Functions are thread-safe after initialization. The context is immutable and can be shared across threads.
275
+
276
+ ## Project structure
277
+
278
+ ```
279
+ mz-drg/
280
+ ├── msdrg/ # Python package
281
+ │ ├── __init__.py
282
+ │ └── grouper.py # MsdrgGrouper class
283
+ ├── zig_src/ # Zig source
284
+ │ ├── build.zig
285
+ │ ├── main.zig
286
+ │ └── src/
287
+ │ ├── c_api.zig # C ABI exports
288
+ │ ├── json_api.zig # JSON in/out
289
+ │ ├── msdrg.zig # GrouperChain + version routing
290
+ │ ├── chain.zig # Composable processor chain
291
+ │ ├── models.zig # Data models
292
+ │ ├── preprocess.zig # Exclusion & attribute handling
293
+ │ ├── grouping.zig # DRG formula matching
294
+ │ ├── marking.zig # Code marking logic
295
+ │ ├── hac.zig # Hospital-Acquired Conditions
296
+ │ └── ... # 20+ modules, ~8,500 lines
297
+ ├── data/bin/ # Prebuilt binary data (16 files)
298
+ ├── scripts/ # Data extraction & compilation
299
+ ├── tests/ # Comparison & benchmark tools
300
+ ├── python_client/ # Legacy Python wrapper
301
+ ├── pyproject.toml
302
+ └── setup.py
303
+ ```
304
+
305
+ ## License
306
+
307
+ MIT — see [LICENSE](LICENSE).
308
+
309
+ ## Acknowledgments
310
+
311
+ This project is intended for healthcare IT professionals who need a fast, embeddable, and auditable DRG classification engine.
msdrg-0.1.0/README.md ADDED
@@ -0,0 +1,289 @@
1
+ # mz-drg
2
+
3
+ **A high-performance MS-DRG grouper written in Zig with Python bindings.**
4
+
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
+ [![Zig](https://img.shields.io/badge/Zig-0.16-orange.svg)](https://ziglang.org)
7
+ [![Python](https://img.shields.io/badge/Python-3.11+-green.svg)](https://python.org)
8
+
9
+ ---
10
+
11
+ mz-drg is an open-source reimplementation of the CMS MS-DRG (Medicare Severity Diagnosis Related Groups) classification engine, written in [Zig](https://ziglang.org) and callable from Python. It takes patient claim data — diagnoses, procedures, demographics — and assigns the appropriate DRG, MDC, severity, and return codes.
12
+
13
+ **It is validated against 50,000+ claims against the reference Java grouper with a 100% match rate.**
14
+
15
+ ## Why mz-drg?
16
+
17
+ The official CMS MS-DRG grouper is a Java application. While accurate, it comes with practical limitations:
18
+
19
+ | | Java Grouper | mz-drg |
20
+ |---|---|---|
21
+ | **Startup** | JVM warmup, seconds | Instant |
22
+ | **Throughput (tested on a Ryzen 5 5600U laptop)** | ~500 claims/sec | ~7,000+ claims/sec |
23
+ | **Memory** | JVM heap overhead | Minimal, memory-mapped data |
24
+ | **Dependencies** | JRE 17+, classpath management | Single shared library |
25
+ | **Python integration** | JPype bridge (fragile) | Native ctypes (simple) |
26
+ | **Embedding** | Requires JVM process | C ABI, any language |
27
+
28
+ mz-drg is not a black-box reimplementation. The grouping logic — preprocessing, exclusion handling, diagnosis clustering, severity assignment, formula evaluation, rerouting, marking, and final grouping — is ported line-by-line from the decompiled Java source and validated claim-by-claim against the original.
29
+
30
+ ## Quick start
31
+
32
+ ### Install
33
+
34
+ ```bash
35
+ pip install msdrg
36
+ ```
37
+
38
+ > **Requires Zig 0.16+** at build time. Install from [ziglang.org/download](https://ziglang.org/download/) or set the `ZIG` environment variable to point to your zig binary.
39
+
40
+ ### Use
41
+
42
+ ```python
43
+ import msdrg
44
+
45
+ with msdrg.MsdrgGrouper() as grouper:
46
+ result = grouper.group({
47
+ "version": 431,
48
+ "age": 65,
49
+ "sex": 0,
50
+ "discharge_status": 1,
51
+ "pdx": {"code": "I5020"},
52
+ "sdx": [{"code": "E1165"}],
53
+ "procedures": []
54
+ })
55
+
56
+ print(result["final_drg"]) # 293
57
+ print(result["final_mdc"]) # 5
58
+ print(result["final_drg_description"]) # "Heart Failure and Shock without CC/MCC"
59
+ ```
60
+
61
+ ### Helper function
62
+
63
+ ```python
64
+ import msdrg
65
+
66
+ claim = msdrg.create_claim(
67
+ version=431,
68
+ age=65,
69
+ sex=0,
70
+ discharge_status=1,
71
+ pdx="I5020",
72
+ sdx=["E1165", "I10"],
73
+ procedures=["02703DZ"],
74
+ )
75
+
76
+ with msdrg.MsdrgGrouper() as g:
77
+ result = g.group(claim)
78
+ ```
79
+
80
+ ## Input format
81
+
82
+ The `group()` method accepts a dictionary:
83
+
84
+ ```python
85
+ {
86
+ "version": 431, # MS-DRG version (e.g. 400, 410, 421, 431)
87
+ "age": 65, # Patient age in years
88
+ "sex": 0, # 0=Male, 1=Female, 2=Unknown
89
+ "discharge_status": 1, # 1=Home/Self Care, 20=Died
90
+ "pdx": { # Principal diagnosis (required)
91
+ "code": "I5020",
92
+ "poa": "Y" # Present on Admission: Y/N/U/W (optional)
93
+ },
94
+ "admit_dx": { # Admission diagnosis (optional)
95
+ "code": "R0602"
96
+ },
97
+ "sdx": [ # Secondary diagnoses (optional)
98
+ {"code": "E1165", "poa": "Y"},
99
+ {"code": "I10", "poa": "Y"}
100
+ ],
101
+ "procedures": [ # Procedure codes (optional)
102
+ {"code": "02703DZ"}
103
+ ]
104
+ }
105
+ ```
106
+
107
+ ## Output format
108
+
109
+ `group()` returns a dictionary:
110
+
111
+ ```python
112
+ {
113
+ "initial_drg": 293,
114
+ "final_drg": 293,
115
+ "initial_mdc": 5,
116
+ "final_mdc": 5,
117
+ "initial_drg_description": "Heart Failure and Shock without CC/MCC",
118
+ "final_drg_description": "Heart Failure and Shock without CC/MCC",
119
+ "initial_mdc_description": "Diseases and Disorders of the Circulatory System",
120
+ "final_mdc_description": "Diseases and Disorders of the Circulatory System",
121
+ "return_code": "OK",
122
+ "pdx_output": {
123
+ "code": "I5020",
124
+ "mdc": 5,
125
+ "severity": "CC",
126
+ "drg_impact": "BOTH",
127
+ "poa_error": "POA_NOT_CHECKED",
128
+ "flags": ["VALID", "MARKED_FOR_INITIAL", "MARKED_FOR_FINAL"]
129
+ },
130
+ "sdx_output": [...],
131
+ "proc_output": [...]
132
+ }
133
+ ```
134
+
135
+ ## Supported DRG versions
136
+
137
+ | Version | CMS Fiscal Year |
138
+ |---------|----------------|
139
+ | 400 | FY 2023 (Oct 2022 – Apr 2023) |
140
+ | 401 | FY 2023 (Apr 2023 – Sep 2023) |
141
+ | 410 | FY 2024 (Oct 2023 – Apr 2024) |
142
+ | 411 | FY 2024 (Apr 2024 – Sep 2024) |
143
+ | 420 | FY 2025 (Oct 2024 – Apr 2025) |
144
+ | 421 | FY 2025 (Apr 2025 – Sep 2025) |
145
+ | 430 | FY 2026 (Oct 2025 – Apr 2026) |
146
+ | 431 | FY 2026 (Apr 2026 – Sep 2026) |
147
+
148
+ Pass the version number in the claim's `version` field.
149
+
150
+ ## Architecture
151
+
152
+ ```
153
+ ┌─────────────────────────────────────────────────┐
154
+ │ Python (msdrg) │
155
+ │ ctypes ──► C API (c_api.zig) │
156
+ │ │ │
157
+ │ ▼ │
158
+ │ GrouperChain (data loader + version router) │
159
+ │ │ │
160
+ │ ▼ │
161
+ │ Chain of Links: │
162
+ │ ┌──────────────────────────────────────────┐ │
163
+ │ │ Preprocess → Exclusions → Grouping │ │
164
+ │ │ ↓ ↓ ↓ │ │
165
+ │ │ Attributes Cluster Map Formulas │ │
166
+ │ │ ↓ ↓ ↓ │ │
167
+ │ │ Diagnosis Marking Final DRG │ │
168
+ │ └──────────────────────────────────────────┘ │
169
+ │ │ │
170
+ │ ▼ │
171
+ │ Memory-mapped binary data (16 .bin files) │
172
+ └─────────────────────────────────────────────────┘
173
+ ```
174
+
175
+ The grouper loads 16 precompiled binary data files at startup (diagnosis definitions, DRG formulas, cluster maps, exclusion groups, etc.) via memory mapping. The grouping pipeline is a chain of composable processors, each transforming the claim context. This design mirrors the original Java architecture for validation purposes.
176
+
177
+ ## Building from source
178
+
179
+ ### Prerequisites
180
+
181
+ - **Zig 0.16+** — [download](https://ziglang.org/download/) or via package manager
182
+ - **Python 3.11+**
183
+ - **uv** (recommended) or **pip**
184
+
185
+ ### Setup
186
+
187
+ ```bash
188
+ git clone https://github.com/Bedrock-Billing/mz-drg.git
189
+ cd mz-drg
190
+
191
+ # Create venv and install
192
+ python3 -m venv .venv
193
+ source .venv/bin/activate
194
+ pip install -e .
195
+ ```
196
+
197
+ This compiles the Zig shared library and bundles the data files into the Python package.
198
+
199
+ ### Run tests
200
+
201
+ ```bash
202
+ # Zig unit tests (27 tests)
203
+ cd zig_src && zig build test
204
+
205
+ # Python smoke test
206
+ python -c "import msdrg; print(msdrg.MsdrgGrouper().group({'version': 431, 'age': 65, 'sex': 0, 'discharge_status': 1, 'pdx': {'code': 'I5020'}, 'sdx': [], 'procedures': []}))"
207
+ ```
208
+
209
+ ### Data pipeline
210
+
211
+ The binary data files (`data/bin/*.bin`) are prebuilt and included in the repository. To regenerate them from the raw CMS CSVs:
212
+
213
+ ```bash
214
+ bash scripts/setup_data.sh
215
+ ```
216
+
217
+ This runs extract → import → compile → zig build in sequence. See `scripts/` for individual steps.
218
+
219
+ ## Comparison testing
220
+
221
+ The `tests/` directory contains tools for validating mz-drg against the reference Java grouper.
222
+
223
+ ```bash
224
+ # Generate random test claims
225
+ python tests/generate_test_claims.py --count 1000 --out tests/claims.json
226
+
227
+ # Compare Java vs Zig output
228
+ python tests/compare_groupers.py --file tests/claims.json
229
+
230
+ # Benchmark both
231
+ python tests/compare_groupers.py --file tests/claims.json --benchmark
232
+ ```
233
+
234
+ > The Java comparison requires JDK 17+ and the reference JARs in `jars/`. This is only needed for validation — the Python package itself has no Java dependency.
235
+
236
+ ## C API
237
+
238
+ mz-drg exposes a C ABI for integration with any language. See `zig_src/src/c_api.zig` for the full API.
239
+
240
+ ```c
241
+ // Initialize (loads all data, pre-builds chains)
242
+ void* ctx = msdrg_context_init("/path/to/data/bin");
243
+
244
+ // Group a claim via JSON
245
+ const char* result_json = msdrg_group_json(ctx, "{\"version\":431,...}");
246
+
247
+ // Free
248
+ msdrg_string_free(result_json);
249
+ msdrg_context_free(ctx);
250
+ ```
251
+
252
+ Functions are thread-safe after initialization. The context is immutable and can be shared across threads.
253
+
254
+ ## Project structure
255
+
256
+ ```
257
+ mz-drg/
258
+ ├── msdrg/ # Python package
259
+ │ ├── __init__.py
260
+ │ └── grouper.py # MsdrgGrouper class
261
+ ├── zig_src/ # Zig source
262
+ │ ├── build.zig
263
+ │ ├── main.zig
264
+ │ └── src/
265
+ │ ├── c_api.zig # C ABI exports
266
+ │ ├── json_api.zig # JSON in/out
267
+ │ ├── msdrg.zig # GrouperChain + version routing
268
+ │ ├── chain.zig # Composable processor chain
269
+ │ ├── models.zig # Data models
270
+ │ ├── preprocess.zig # Exclusion & attribute handling
271
+ │ ├── grouping.zig # DRG formula matching
272
+ │ ├── marking.zig # Code marking logic
273
+ │ ├── hac.zig # Hospital-Acquired Conditions
274
+ │ └── ... # 20+ modules, ~8,500 lines
275
+ ├── data/bin/ # Prebuilt binary data (16 files)
276
+ ├── scripts/ # Data extraction & compilation
277
+ ├── tests/ # Comparison & benchmark tools
278
+ ├── python_client/ # Legacy Python wrapper
279
+ ├── pyproject.toml
280
+ └── setup.py
281
+ ```
282
+
283
+ ## License
284
+
285
+ MIT — see [LICENSE](LICENSE).
286
+
287
+ ## Acknowledgments
288
+
289
+ This project is intended for healthcare IT professionals who need a fast, embeddable, and auditable DRG classification engine.
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file