msdrg 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- msdrg-0.1.0/LICENSE +21 -0
- msdrg-0.1.0/MANIFEST.in +10 -0
- msdrg-0.1.0/PKG-INFO +311 -0
- msdrg-0.1.0/README.md +289 -0
- msdrg-0.1.0/data/bin/base_drg_descriptions.bin +0 -0
- msdrg-0.1.0/data/bin/cluster_info.bin +0 -0
- msdrg-0.1.0/data/bin/cluster_map.bin +0 -0
- msdrg-0.1.0/data/bin/diagnosis.bin +0 -0
- msdrg-0.1.0/data/bin/drg_descriptions.bin +0 -0
- msdrg-0.1.0/data/bin/drg_formulas.bin +0 -0
- msdrg-0.1.0/data/bin/dx_patterns.bin +0 -0
- msdrg-0.1.0/data/bin/exclusion_groups.bin +0 -0
- msdrg-0.1.0/data/bin/exclusion_ids.bin +0 -0
- msdrg-0.1.0/data/bin/gender_mdcs.bin +0 -0
- msdrg-0.1.0/data/bin/hac_descriptions.bin +0 -0
- msdrg-0.1.0/data/bin/hac_formulas.bin +0 -0
- msdrg-0.1.0/data/bin/hac_operands.bin +0 -0
- msdrg-0.1.0/data/bin/mdc_descriptions.bin +0 -0
- msdrg-0.1.0/data/bin/pr_patterns.bin +0 -0
- msdrg-0.1.0/data/bin/procedure_attributes.bin +0 -0
- msdrg-0.1.0/msdrg/__init__.py +26 -0
- msdrg-0.1.0/msdrg/grouper.py +238 -0
- msdrg-0.1.0/msdrg.egg-info/PKG-INFO +311 -0
- msdrg-0.1.0/msdrg.egg-info/SOURCES.txt +67 -0
- msdrg-0.1.0/msdrg.egg-info/dependency_links.txt +1 -0
- msdrg-0.1.0/msdrg.egg-info/top_level.txt +1 -0
- msdrg-0.1.0/pyproject.toml +38 -0
- msdrg-0.1.0/scripts/analyze_clusters.py +63 -0
- msdrg-0.1.0/scripts/build_wheels.py +325 -0
- msdrg-0.1.0/scripts/compile_clusters.py +185 -0
- msdrg-0.1.0/scripts/compile_descriptions.py +69 -0
- msdrg-0.1.0/scripts/compile_diagnosis.py +108 -0
- msdrg-0.1.0/scripts/compile_drg_formulas.py +153 -0
- msdrg-0.1.0/scripts/compile_exclusion_groups.py +58 -0
- msdrg-0.1.0/scripts/compile_gender_mdcs.py +62 -0
- msdrg-0.1.0/scripts/compile_hac.py +183 -0
- msdrg-0.1.0/scripts/compile_patterns.py +108 -0
- msdrg-0.1.0/scripts/compile_simple_maps.py +63 -0
- msdrg-0.1.0/scripts/extract_data.py +252 -0
- msdrg-0.1.0/scripts/import_to_sqlite.py +92 -0
- msdrg-0.1.0/scripts/setup_data.sh +42 -0
- msdrg-0.1.0/setup.cfg +4 -0
- msdrg-0.1.0/setup.py +198 -0
- msdrg-0.1.0/zig_src/build.zig +54 -0
- msdrg-0.1.0/zig_src/main.zig +62 -0
- msdrg-0.1.0/zig_src/src/c_api.zig +276 -0
- msdrg-0.1.0/zig_src/src/chain.zig +126 -0
- msdrg-0.1.0/zig_src/src/cluster.zig +322 -0
- msdrg-0.1.0/zig_src/src/code_map.zig +151 -0
- msdrg-0.1.0/zig_src/src/common.zig +135 -0
- msdrg-0.1.0/zig_src/src/description.zig +145 -0
- msdrg-0.1.0/zig_src/src/diagnosis.zig +168 -0
- msdrg-0.1.0/zig_src/src/exclusion.zig +137 -0
- msdrg-0.1.0/zig_src/src/final_grouping_test.zig +177 -0
- msdrg-0.1.0/zig_src/src/formula.zig +503 -0
- msdrg-0.1.0/zig_src/src/gender.zig +155 -0
- msdrg-0.1.0/zig_src/src/grouping.zig +892 -0
- msdrg-0.1.0/zig_src/src/grouping_test.zig +457 -0
- msdrg-0.1.0/zig_src/src/hac.zig +706 -0
- msdrg-0.1.0/zig_src/src/integration_test.zig +422 -0
- msdrg-0.1.0/zig_src/src/json_api.zig +189 -0
- msdrg-0.1.0/zig_src/src/marking.zig +1291 -0
- msdrg-0.1.0/zig_src/src/marking_test.zig +270 -0
- msdrg-0.1.0/zig_src/src/models.zig +477 -0
- msdrg-0.1.0/zig_src/src/msdrg.zig +352 -0
- msdrg-0.1.0/zig_src/src/msdrg_data.zig +130 -0
- msdrg-0.1.0/zig_src/src/pattern.zig +129 -0
- msdrg-0.1.0/zig_src/src/preprocess.zig +547 -0
- msdrg-0.1.0/zig_src/src/preprocess_test.zig +466 -0
msdrg-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 LibrePPS
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
msdrg-0.1.0/MANIFEST.in
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
include LICENSE
|
|
2
|
+
include README.md
|
|
3
|
+
include pyproject.toml
|
|
4
|
+
include setup.py
|
|
5
|
+
recursive-include msdrg *.py
|
|
6
|
+
recursive-include msdrg/data *.bin
|
|
7
|
+
recursive-include zig_src *.zig build.zig
|
|
8
|
+
recursive-include scripts *.py *.sh
|
|
9
|
+
recursive-include data/bin *.bin
|
|
10
|
+
recursive-exclude msdrg/_lib *
|
msdrg-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: msdrg
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: High-performance MS-DRG (Medicare Severity Diagnosis Related Groups) grouper
|
|
5
|
+
Author: MZ-DRG Contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Bedrock-Billing/mz-drg
|
|
8
|
+
Project-URL: Repository, https://github.com/Bedrock-Billing/mz-drg
|
|
9
|
+
Keywords: drg,ms-drg,grouper,healthcare,medicare
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Healthcare Industry
|
|
12
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
13
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
14
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Zig
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# mz-drg
|
|
24
|
+
|
|
25
|
+
**A high-performance MS-DRG grouper written in Zig with Python bindings.**
|
|
26
|
+
|
|
27
|
+
[](LICENSE)
|
|
28
|
+
[](https://ziglang.org)
|
|
29
|
+
[](https://python.org)
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
mz-drg is an open-source reimplementation of the CMS MS-DRG (Medicare Severity Diagnosis Related Groups) classification engine, written in [Zig](https://ziglang.org) and callable from Python. It takes patient claim data — diagnoses, procedures, demographics — and assigns the appropriate DRG, MDC, severity, and return codes.
|
|
34
|
+
|
|
35
|
+
**It is validated against 50,000+ claims against the reference Java grouper with a 100% match rate.**
|
|
36
|
+
|
|
37
|
+
## Why mz-drg?
|
|
38
|
+
|
|
39
|
+
The official CMS MS-DRG grouper is a Java application. While accurate, it comes with practical limitations:
|
|
40
|
+
|
|
41
|
+
| | Java Grouper | mz-drg |
|
|
42
|
+
|---|---|---|
|
|
43
|
+
| **Startup** | JVM warmup, seconds | Instant |
|
|
44
|
+
| **Throughput (tested on a Ryzen 5 5600U laptop)** | ~500 claims/sec | ~7,000+ claims/sec |
|
|
45
|
+
| **Memory** | JVM heap overhead | Minimal, memory-mapped data |
|
|
46
|
+
| **Dependencies** | JRE 17+, classpath management | Single shared library |
|
|
47
|
+
| **Python integration** | JPype bridge (fragile) | Native ctypes (simple) |
|
|
48
|
+
| **Embedding** | Requires JVM process | C ABI, any language |
|
|
49
|
+
|
|
50
|
+
mz-drg is not a black-box reimplementation. The grouping logic — preprocessing, exclusion handling, diagnosis clustering, severity assignment, formula evaluation, rerouting, marking, and final grouping — is ported line-by-line from the decompiled Java source and validated claim-by-claim against the original.
|
|
51
|
+
|
|
52
|
+
## Quick start
|
|
53
|
+
|
|
54
|
+
### Install
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install msdrg
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
> **Requires Zig 0.16+** at build time. Install from [ziglang.org/download](https://ziglang.org/download/) or set the `ZIG` environment variable to point to your zig binary.
|
|
61
|
+
|
|
62
|
+
### Use
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
import msdrg
|
|
66
|
+
|
|
67
|
+
with msdrg.MsdrgGrouper() as grouper:
|
|
68
|
+
result = grouper.group({
|
|
69
|
+
"version": 431,
|
|
70
|
+
"age": 65,
|
|
71
|
+
"sex": 0,
|
|
72
|
+
"discharge_status": 1,
|
|
73
|
+
"pdx": {"code": "I5020"},
|
|
74
|
+
"sdx": [{"code": "E1165"}],
|
|
75
|
+
"procedures": []
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
print(result["final_drg"]) # 293
|
|
79
|
+
print(result["final_mdc"]) # 5
|
|
80
|
+
print(result["final_drg_description"]) # "Heart Failure and Shock without CC/MCC"
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Helper function
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
import msdrg
|
|
87
|
+
|
|
88
|
+
claim = msdrg.create_claim(
|
|
89
|
+
version=431,
|
|
90
|
+
age=65,
|
|
91
|
+
sex=0,
|
|
92
|
+
discharge_status=1,
|
|
93
|
+
pdx="I5020",
|
|
94
|
+
sdx=["E1165", "I10"],
|
|
95
|
+
procedures=["02703DZ"],
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
with msdrg.MsdrgGrouper() as g:
|
|
99
|
+
result = g.group(claim)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Input format
|
|
103
|
+
|
|
104
|
+
The `group()` method accepts a dictionary:
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
{
|
|
108
|
+
"version": 431, # MS-DRG version (e.g. 400, 410, 421, 431)
|
|
109
|
+
"age": 65, # Patient age in years
|
|
110
|
+
"sex": 0, # 0=Male, 1=Female, 2=Unknown
|
|
111
|
+
"discharge_status": 1, # 1=Home/Self Care, 20=Died
|
|
112
|
+
"pdx": { # Principal diagnosis (required)
|
|
113
|
+
"code": "I5020",
|
|
114
|
+
"poa": "Y" # Present on Admission: Y/N/U/W (optional)
|
|
115
|
+
},
|
|
116
|
+
"admit_dx": { # Admission diagnosis (optional)
|
|
117
|
+
"code": "R0602"
|
|
118
|
+
},
|
|
119
|
+
"sdx": [ # Secondary diagnoses (optional)
|
|
120
|
+
{"code": "E1165", "poa": "Y"},
|
|
121
|
+
{"code": "I10", "poa": "Y"}
|
|
122
|
+
],
|
|
123
|
+
"procedures": [ # Procedure codes (optional)
|
|
124
|
+
{"code": "02703DZ"}
|
|
125
|
+
]
|
|
126
|
+
}
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Output format
|
|
130
|
+
|
|
131
|
+
`group()` returns a dictionary:
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
{
|
|
135
|
+
"initial_drg": 293,
|
|
136
|
+
"final_drg": 293,
|
|
137
|
+
"initial_mdc": 5,
|
|
138
|
+
"final_mdc": 5,
|
|
139
|
+
"initial_drg_description": "Heart Failure and Shock without CC/MCC",
|
|
140
|
+
"final_drg_description": "Heart Failure and Shock without CC/MCC",
|
|
141
|
+
"initial_mdc_description": "Diseases and Disorders of the Circulatory System",
|
|
142
|
+
"final_mdc_description": "Diseases and Disorders of the Circulatory System",
|
|
143
|
+
"return_code": "OK",
|
|
144
|
+
"pdx_output": {
|
|
145
|
+
"code": "I5020",
|
|
146
|
+
"mdc": 5,
|
|
147
|
+
"severity": "CC",
|
|
148
|
+
"drg_impact": "BOTH",
|
|
149
|
+
"poa_error": "POA_NOT_CHECKED",
|
|
150
|
+
"flags": ["VALID", "MARKED_FOR_INITIAL", "MARKED_FOR_FINAL"]
|
|
151
|
+
},
|
|
152
|
+
"sdx_output": [...],
|
|
153
|
+
"proc_output": [...]
|
|
154
|
+
}
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Supported DRG versions
|
|
158
|
+
|
|
159
|
+
| Version | CMS Fiscal Year |
|
|
160
|
+
|---------|----------------|
|
|
161
|
+
| 400 | FY 2023 (Oct 2022 – Apr 2023) |
|
|
162
|
+
| 401 | FY 2023 (Apr 2023 – Sep 2023) |
|
|
163
|
+
| 410 | FY 2024 (Oct 2023 – Apr 2024) |
|
|
164
|
+
| 411 | FY 2024 (Apr 2024 – Sep 2024) |
|
|
165
|
+
| 420 | FY 2025 (Oct 2024 – Apr 2025) |
|
|
166
|
+
| 421 | FY 2025 (Apr 2025 – Sep 2025) |
|
|
167
|
+
| 430 | FY 2026 (Oct 2025 – Apr 2026) |
|
|
168
|
+
| 431 | FY 2026 (Apr 2026 – Sep 2026) |
|
|
169
|
+
|
|
170
|
+
Pass the version number in the claim's `version` field.
|
|
171
|
+
|
|
172
|
+
## Architecture
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
┌─────────────────────────────────────────────────┐
|
|
176
|
+
│ Python (msdrg) │
|
|
177
|
+
│ ctypes ──► C API (c_api.zig) │
|
|
178
|
+
│ │ │
|
|
179
|
+
│ ▼ │
|
|
180
|
+
│ GrouperChain (data loader + version router) │
|
|
181
|
+
│ │ │
|
|
182
|
+
│ ▼ │
|
|
183
|
+
│ Chain of Links: │
|
|
184
|
+
│ ┌──────────────────────────────────────────┐ │
|
|
185
|
+
│ │ Preprocess → Exclusions → Grouping │ │
|
|
186
|
+
│ │ ↓ ↓ ↓ │ │
|
|
187
|
+
│ │ Attributes Cluster Map Formulas │ │
|
|
188
|
+
│ │ ↓ ↓ ↓ │ │
|
|
189
|
+
│ │ Diagnosis Marking Final DRG │ │
|
|
190
|
+
│ └──────────────────────────────────────────┘ │
|
|
191
|
+
│ │ │
|
|
192
|
+
│ ▼ │
|
|
193
|
+
│ Memory-mapped binary data (16 .bin files) │
|
|
194
|
+
└─────────────────────────────────────────────────┘
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
The grouper loads 16 precompiled binary data files at startup (diagnosis definitions, DRG formulas, cluster maps, exclusion groups, etc.) via memory mapping. The grouping pipeline is a chain of composable processors, each transforming the claim context. This design mirrors the original Java architecture for validation purposes.
|
|
198
|
+
|
|
199
|
+
## Building from source
|
|
200
|
+
|
|
201
|
+
### Prerequisites
|
|
202
|
+
|
|
203
|
+
- **Zig 0.16+** — [download](https://ziglang.org/download/) or via package manager
|
|
204
|
+
- **Python 3.11+**
|
|
205
|
+
- **uv** (recommended) or **pip**
|
|
206
|
+
|
|
207
|
+
### Setup
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
git clone https://github.com/Bedrock-Billing/mz-drg.git
|
|
211
|
+
cd mz-drg
|
|
212
|
+
|
|
213
|
+
# Create venv and install
|
|
214
|
+
python3 -m venv .venv
|
|
215
|
+
source .venv/bin/activate
|
|
216
|
+
pip install -e .
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
This compiles the Zig shared library and bundles the data files into the Python package.
|
|
220
|
+
|
|
221
|
+
### Run tests
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
# Zig unit tests (27 tests)
|
|
225
|
+
cd zig_src && zig build test
|
|
226
|
+
|
|
227
|
+
# Python smoke test
|
|
228
|
+
python -c "import msdrg; print(msdrg.MsdrgGrouper().group({'version': 431, 'age': 65, 'sex': 0, 'discharge_status': 1, 'pdx': {'code': 'I5020'}, 'sdx': [], 'procedures': []}))"
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### Data pipeline
|
|
232
|
+
|
|
233
|
+
The binary data files (`data/bin/*.bin`) are prebuilt and included in the repository. To regenerate them from the raw CMS CSVs:
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
bash scripts/setup_data.sh
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
This runs extract → import → compile → zig build in sequence. See `scripts/` for individual steps.
|
|
240
|
+
|
|
241
|
+
## Comparison testing
|
|
242
|
+
|
|
243
|
+
The `tests/` directory contains tools for validating mz-drg against the reference Java grouper.
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
# Generate random test claims
|
|
247
|
+
python tests/generate_test_claims.py --count 1000 --out tests/claims.json
|
|
248
|
+
|
|
249
|
+
# Compare Java vs Zig output
|
|
250
|
+
python tests/compare_groupers.py --file tests/claims.json
|
|
251
|
+
|
|
252
|
+
# Benchmark both
|
|
253
|
+
python tests/compare_groupers.py --file tests/claims.json --benchmark
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
> The Java comparison requires JDK 17+ and the reference JARs in `jars/`. This is only needed for validation — the Python package itself has no Java dependency.
|
|
257
|
+
|
|
258
|
+
## C API
|
|
259
|
+
|
|
260
|
+
mz-drg exposes a C ABI for integration with any language. See `zig_src/src/c_api.zig` for the full API.
|
|
261
|
+
|
|
262
|
+
```c
|
|
263
|
+
// Initialize (loads all data, pre-builds chains)
|
|
264
|
+
void* ctx = msdrg_context_init("/path/to/data/bin");
|
|
265
|
+
|
|
266
|
+
// Group a claim via JSON
|
|
267
|
+
const char* result_json = msdrg_group_json(ctx, "{\"version\":431,...}");
|
|
268
|
+
|
|
269
|
+
// Free
|
|
270
|
+
msdrg_string_free(result_json);
|
|
271
|
+
msdrg_context_free(ctx);
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Functions are thread-safe after initialization. The context is immutable and can be shared across threads.
|
|
275
|
+
|
|
276
|
+
## Project structure
|
|
277
|
+
|
|
278
|
+
```
|
|
279
|
+
mz-drg/
|
|
280
|
+
├── msdrg/ # Python package
|
|
281
|
+
│ ├── __init__.py
|
|
282
|
+
│ └── grouper.py # MsdrgGrouper class
|
|
283
|
+
├── zig_src/ # Zig source
|
|
284
|
+
│ ├── build.zig
|
|
285
|
+
│ ├── main.zig
|
|
286
|
+
│ └── src/
|
|
287
|
+
│ ├── c_api.zig # C ABI exports
|
|
288
|
+
│ ├── json_api.zig # JSON in/out
|
|
289
|
+
│ ├── msdrg.zig # GrouperChain + version routing
|
|
290
|
+
│ ├── chain.zig # Composable processor chain
|
|
291
|
+
│ ├── models.zig # Data models
|
|
292
|
+
│ ├── preprocess.zig # Exclusion & attribute handling
|
|
293
|
+
│ ├── grouping.zig # DRG formula matching
|
|
294
|
+
│ ├── marking.zig # Code marking logic
|
|
295
|
+
│ ├── hac.zig # Hospital-Acquired Conditions
|
|
296
|
+
│ └── ... # 20+ modules, ~8,500 lines
|
|
297
|
+
├── data/bin/ # Prebuilt binary data (16 files)
|
|
298
|
+
├── scripts/ # Data extraction & compilation
|
|
299
|
+
├── tests/ # Comparison & benchmark tools
|
|
300
|
+
├── python_client/ # Legacy Python wrapper
|
|
301
|
+
├── pyproject.toml
|
|
302
|
+
└── setup.py
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
## License
|
|
306
|
+
|
|
307
|
+
MIT — see [LICENSE](LICENSE).
|
|
308
|
+
|
|
309
|
+
## Acknowledgments
|
|
310
|
+
|
|
311
|
+
This project is intended for healthcare IT professionals who need a fast, embeddable, and auditable DRG classification engine.
|
msdrg-0.1.0/README.md
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
# mz-drg
|
|
2
|
+
|
|
3
|
+
**A high-performance MS-DRG grouper written in Zig with Python bindings.**
|
|
4
|
+
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
[](https://ziglang.org)
|
|
7
|
+
[](https://python.org)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
mz-drg is an open-source reimplementation of the CMS MS-DRG (Medicare Severity Diagnosis Related Groups) classification engine, written in [Zig](https://ziglang.org) and callable from Python. It takes patient claim data — diagnoses, procedures, demographics — and assigns the appropriate DRG, MDC, severity, and return codes.
|
|
12
|
+
|
|
13
|
+
**It is validated against 50,000+ claims against the reference Java grouper with a 100% match rate.**
|
|
14
|
+
|
|
15
|
+
## Why mz-drg?
|
|
16
|
+
|
|
17
|
+
The official CMS MS-DRG grouper is a Java application. While accurate, it comes with practical limitations:
|
|
18
|
+
|
|
19
|
+
| | Java Grouper | mz-drg |
|
|
20
|
+
|---|---|---|
|
|
21
|
+
| **Startup** | JVM warmup, seconds | Instant |
|
|
22
|
+
| **Throughput (tested on a Ryzen 5 5600U laptop)** | ~500 claims/sec | ~7,000+ claims/sec |
|
|
23
|
+
| **Memory** | JVM heap overhead | Minimal, memory-mapped data |
|
|
24
|
+
| **Dependencies** | JRE 17+, classpath management | Single shared library |
|
|
25
|
+
| **Python integration** | JPype bridge (fragile) | Native ctypes (simple) |
|
|
26
|
+
| **Embedding** | Requires JVM process | C ABI, any language |
|
|
27
|
+
|
|
28
|
+
mz-drg is not a black-box reimplementation. The grouping logic — preprocessing, exclusion handling, diagnosis clustering, severity assignment, formula evaluation, rerouting, marking, and final grouping — is ported line-by-line from the decompiled Java source and validated claim-by-claim against the original.
|
|
29
|
+
|
|
30
|
+
## Quick start
|
|
31
|
+
|
|
32
|
+
### Install
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install msdrg
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
> **Requires Zig 0.16+** at build time. Install from [ziglang.org/download](https://ziglang.org/download/) or set the `ZIG` environment variable to point to your zig binary.
|
|
39
|
+
|
|
40
|
+
### Use
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
import msdrg
|
|
44
|
+
|
|
45
|
+
with msdrg.MsdrgGrouper() as grouper:
|
|
46
|
+
result = grouper.group({
|
|
47
|
+
"version": 431,
|
|
48
|
+
"age": 65,
|
|
49
|
+
"sex": 0,
|
|
50
|
+
"discharge_status": 1,
|
|
51
|
+
"pdx": {"code": "I5020"},
|
|
52
|
+
"sdx": [{"code": "E1165"}],
|
|
53
|
+
"procedures": []
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
print(result["final_drg"]) # 293
|
|
57
|
+
print(result["final_mdc"]) # 5
|
|
58
|
+
print(result["final_drg_description"]) # "Heart Failure and Shock without CC/MCC"
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Helper function
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
import msdrg
|
|
65
|
+
|
|
66
|
+
claim = msdrg.create_claim(
|
|
67
|
+
version=431,
|
|
68
|
+
age=65,
|
|
69
|
+
sex=0,
|
|
70
|
+
discharge_status=1,
|
|
71
|
+
pdx="I5020",
|
|
72
|
+
sdx=["E1165", "I10"],
|
|
73
|
+
procedures=["02703DZ"],
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
with msdrg.MsdrgGrouper() as g:
|
|
77
|
+
result = g.group(claim)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Input format
|
|
81
|
+
|
|
82
|
+
The `group()` method accepts a dictionary:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
{
|
|
86
|
+
"version": 431, # MS-DRG version (e.g. 400, 410, 421, 431)
|
|
87
|
+
"age": 65, # Patient age in years
|
|
88
|
+
"sex": 0, # 0=Male, 1=Female, 2=Unknown
|
|
89
|
+
"discharge_status": 1, # 1=Home/Self Care, 20=Died
|
|
90
|
+
"pdx": { # Principal diagnosis (required)
|
|
91
|
+
"code": "I5020",
|
|
92
|
+
"poa": "Y" # Present on Admission: Y/N/U/W (optional)
|
|
93
|
+
},
|
|
94
|
+
"admit_dx": { # Admission diagnosis (optional)
|
|
95
|
+
"code": "R0602"
|
|
96
|
+
},
|
|
97
|
+
"sdx": [ # Secondary diagnoses (optional)
|
|
98
|
+
{"code": "E1165", "poa": "Y"},
|
|
99
|
+
{"code": "I10", "poa": "Y"}
|
|
100
|
+
],
|
|
101
|
+
"procedures": [ # Procedure codes (optional)
|
|
102
|
+
{"code": "02703DZ"}
|
|
103
|
+
]
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Output format
|
|
108
|
+
|
|
109
|
+
`group()` returns a dictionary:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
{
|
|
113
|
+
"initial_drg": 293,
|
|
114
|
+
"final_drg": 293,
|
|
115
|
+
"initial_mdc": 5,
|
|
116
|
+
"final_mdc": 5,
|
|
117
|
+
"initial_drg_description": "Heart Failure and Shock without CC/MCC",
|
|
118
|
+
"final_drg_description": "Heart Failure and Shock without CC/MCC",
|
|
119
|
+
"initial_mdc_description": "Diseases and Disorders of the Circulatory System",
|
|
120
|
+
"final_mdc_description": "Diseases and Disorders of the Circulatory System",
|
|
121
|
+
"return_code": "OK",
|
|
122
|
+
"pdx_output": {
|
|
123
|
+
"code": "I5020",
|
|
124
|
+
"mdc": 5,
|
|
125
|
+
"severity": "CC",
|
|
126
|
+
"drg_impact": "BOTH",
|
|
127
|
+
"poa_error": "POA_NOT_CHECKED",
|
|
128
|
+
"flags": ["VALID", "MARKED_FOR_INITIAL", "MARKED_FOR_FINAL"]
|
|
129
|
+
},
|
|
130
|
+
"sdx_output": [...],
|
|
131
|
+
"proc_output": [...]
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Supported DRG versions
|
|
136
|
+
|
|
137
|
+
| Version | CMS Fiscal Year |
|
|
138
|
+
|---------|----------------|
|
|
139
|
+
| 400 | FY 2023 (Oct 2022 – Apr 2023) |
|
|
140
|
+
| 401 | FY 2023 (Apr 2023 – Sep 2023) |
|
|
141
|
+
| 410 | FY 2024 (Oct 2023 – Apr 2024) |
|
|
142
|
+
| 411 | FY 2024 (Apr 2024 – Sep 2024) |
|
|
143
|
+
| 420 | FY 2025 (Oct 2024 – Apr 2025) |
|
|
144
|
+
| 421 | FY 2025 (Apr 2025 – Sep 2025) |
|
|
145
|
+
| 430 | FY 2026 (Oct 2025 – Apr 2026) |
|
|
146
|
+
| 431 | FY 2026 (Apr 2026 – Sep 2026) |
|
|
147
|
+
|
|
148
|
+
Pass the version number in the claim's `version` field.
|
|
149
|
+
|
|
150
|
+
## Architecture
|
|
151
|
+
|
|
152
|
+
```
|
|
153
|
+
┌─────────────────────────────────────────────────┐
|
|
154
|
+
│ Python (msdrg) │
|
|
155
|
+
│ ctypes ──► C API (c_api.zig) │
|
|
156
|
+
│ │ │
|
|
157
|
+
│ ▼ │
|
|
158
|
+
│ GrouperChain (data loader + version router) │
|
|
159
|
+
│ │ │
|
|
160
|
+
│ ▼ │
|
|
161
|
+
│ Chain of Links: │
|
|
162
|
+
│ ┌──────────────────────────────────────────┐ │
|
|
163
|
+
│ │ Preprocess → Exclusions → Grouping │ │
|
|
164
|
+
│ │ ↓ ↓ ↓ │ │
|
|
165
|
+
│ │ Attributes Cluster Map Formulas │ │
|
|
166
|
+
│ │ ↓ ↓ ↓ │ │
|
|
167
|
+
│ │ Diagnosis Marking Final DRG │ │
|
|
168
|
+
│ └──────────────────────────────────────────┘ │
|
|
169
|
+
│ │ │
|
|
170
|
+
│ ▼ │
|
|
171
|
+
│ Memory-mapped binary data (16 .bin files) │
|
|
172
|
+
└─────────────────────────────────────────────────┘
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
The grouper loads 16 precompiled binary data files at startup (diagnosis definitions, DRG formulas, cluster maps, exclusion groups, etc.) via memory mapping. The grouping pipeline is a chain of composable processors, each transforming the claim context. This design mirrors the original Java architecture for validation purposes.
|
|
176
|
+
|
|
177
|
+
## Building from source
|
|
178
|
+
|
|
179
|
+
### Prerequisites
|
|
180
|
+
|
|
181
|
+
- **Zig 0.16+** — [download](https://ziglang.org/download/) or via package manager
|
|
182
|
+
- **Python 3.11+**
|
|
183
|
+
- **uv** (recommended) or **pip**
|
|
184
|
+
|
|
185
|
+
### Setup
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
git clone https://github.com/Bedrock-Billing/mz-drg.git
|
|
189
|
+
cd mz-drg
|
|
190
|
+
|
|
191
|
+
# Create venv and install
|
|
192
|
+
python3 -m venv .venv
|
|
193
|
+
source .venv/bin/activate
|
|
194
|
+
pip install -e .
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
This compiles the Zig shared library and bundles the data files into the Python package.
|
|
198
|
+
|
|
199
|
+
### Run tests
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
# Zig unit tests (27 tests)
|
|
203
|
+
cd zig_src && zig build test
|
|
204
|
+
|
|
205
|
+
# Python smoke test
|
|
206
|
+
python -c "import msdrg; print(msdrg.MsdrgGrouper().group({'version': 431, 'age': 65, 'sex': 0, 'discharge_status': 1, 'pdx': {'code': 'I5020'}, 'sdx': [], 'procedures': []}))"
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Data pipeline
|
|
210
|
+
|
|
211
|
+
The binary data files (`data/bin/*.bin`) are prebuilt and included in the repository. To regenerate them from the raw CMS CSVs:
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
bash scripts/setup_data.sh
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
This runs extract → import → compile → zig build in sequence. See `scripts/` for individual steps.
|
|
218
|
+
|
|
219
|
+
## Comparison testing
|
|
220
|
+
|
|
221
|
+
The `tests/` directory contains tools for validating mz-drg against the reference Java grouper.
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
# Generate random test claims
|
|
225
|
+
python tests/generate_test_claims.py --count 1000 --out tests/claims.json
|
|
226
|
+
|
|
227
|
+
# Compare Java vs Zig output
|
|
228
|
+
python tests/compare_groupers.py --file tests/claims.json
|
|
229
|
+
|
|
230
|
+
# Benchmark both
|
|
231
|
+
python tests/compare_groupers.py --file tests/claims.json --benchmark
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
> The Java comparison requires JDK 17+ and the reference JARs in `jars/`. This is only needed for validation — the Python package itself has no Java dependency.
|
|
235
|
+
|
|
236
|
+
## C API
|
|
237
|
+
|
|
238
|
+
mz-drg exposes a C ABI for integration with any language. See `zig_src/src/c_api.zig` for the full API.
|
|
239
|
+
|
|
240
|
+
```c
|
|
241
|
+
// Initialize (loads all data, pre-builds chains)
|
|
242
|
+
void* ctx = msdrg_context_init("/path/to/data/bin");
|
|
243
|
+
|
|
244
|
+
// Group a claim via JSON
|
|
245
|
+
const char* result_json = msdrg_group_json(ctx, "{\"version\":431,...}");
|
|
246
|
+
|
|
247
|
+
// Free
|
|
248
|
+
msdrg_string_free(result_json);
|
|
249
|
+
msdrg_context_free(ctx);
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
Functions are thread-safe after initialization. The context is immutable and can be shared across threads.
|
|
253
|
+
|
|
254
|
+
## Project structure
|
|
255
|
+
|
|
256
|
+
```
|
|
257
|
+
mz-drg/
|
|
258
|
+
├── msdrg/ # Python package
|
|
259
|
+
│ ├── __init__.py
|
|
260
|
+
│ └── grouper.py # MsdrgGrouper class
|
|
261
|
+
├── zig_src/ # Zig source
|
|
262
|
+
│ ├── build.zig
|
|
263
|
+
│ ├── main.zig
|
|
264
|
+
│ └── src/
|
|
265
|
+
│ ├── c_api.zig # C ABI exports
|
|
266
|
+
│ ├── json_api.zig # JSON in/out
|
|
267
|
+
│ ├── msdrg.zig # GrouperChain + version routing
|
|
268
|
+
│ ├── chain.zig # Composable processor chain
|
|
269
|
+
│ ├── models.zig # Data models
|
|
270
|
+
│ ├── preprocess.zig # Exclusion & attribute handling
|
|
271
|
+
│ ├── grouping.zig # DRG formula matching
|
|
272
|
+
│ ├── marking.zig # Code marking logic
|
|
273
|
+
│ ├── hac.zig # Hospital-Acquired Conditions
|
|
274
|
+
│ └── ... # 20+ modules, ~8,500 lines
|
|
275
|
+
├── data/bin/ # Prebuilt binary data (16 files)
|
|
276
|
+
├── scripts/ # Data extraction & compilation
|
|
277
|
+
├── tests/ # Comparison & benchmark tools
|
|
278
|
+
├── python_client/ # Legacy Python wrapper
|
|
279
|
+
├── pyproject.toml
|
|
280
|
+
└── setup.py
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
## License
|
|
284
|
+
|
|
285
|
+
MIT — see [LICENSE](LICENSE).
|
|
286
|
+
|
|
287
|
+
## Acknowledgments
|
|
288
|
+
|
|
289
|
+
This project is intended for healthcare IT professionals who need a fast, embeddable, and auditable DRG classification engine.
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|