hccinfhir 0.1.8__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hccinfhir/__init__.py +2 -0
- hccinfhir/datamodels.py +102 -9
- hccinfhir/defaults.py +31 -0
- hccinfhir/extractor_834.py +530 -0
- hccinfhir/hccinfhir.py +28 -13
- hccinfhir/model_calculate.py +29 -25
- hccinfhir/model_coefficients.py +2 -29
- hccinfhir/model_dx_to_cc.py +6 -11
- hccinfhir/model_hierarchies.py +6 -35
- hccinfhir/sample_files/sample_834_01.txt +1 -0
- hccinfhir/samples.py +50 -5
- hccinfhir/utils.py +217 -44
- hccinfhir-0.2.0.dist-info/METADATA +946 -0
- {hccinfhir-0.1.8.dist-info → hccinfhir-0.2.0.dist-info}/RECORD +16 -13
- hccinfhir-0.1.8.dist-info/METADATA +0 -782
- {hccinfhir-0.1.8.dist-info → hccinfhir-0.2.0.dist-info}/WHEEL +0 -0
- {hccinfhir-0.1.8.dist-info → hccinfhir-0.2.0.dist-info}/licenses/LICENSE +0 -0
hccinfhir/utils.py
CHANGED
|
@@ -1,74 +1,247 @@
|
|
|
1
|
-
from typing import Set, Dict, Tuple
|
|
1
|
+
from typing import Set, Dict, Tuple, Optional
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
import importlib.resources
|
|
3
4
|
from hccinfhir.datamodels import ModelName, ProcFilteringFilename, DxCCMappingFilename
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
|
|
7
|
+
def resolve_data_file(file_path: str) -> str:
|
|
8
|
+
"""
|
|
9
|
+
Resolve data file location with clear search priority.
|
|
10
|
+
|
|
11
|
+
Priority:
|
|
12
|
+
1. Absolute path (if provided)
|
|
13
|
+
2. Relative to current working directory
|
|
14
|
+
3. Package data directory
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
file_path: Filename or path to the file (e.g., "ra_dx_to_cc_2026.csv"
|
|
18
|
+
or "/custom/path/file.csv")
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Full path to the resolved file
|
|
22
|
+
|
|
23
|
+
Raises:
|
|
24
|
+
FileNotFoundError: If file cannot be found in any location
|
|
25
|
+
"""
|
|
26
|
+
path = Path(file_path)
|
|
27
|
+
|
|
28
|
+
# 1. If absolute path provided, use it directly
|
|
29
|
+
if path.is_absolute():
|
|
30
|
+
if path.exists():
|
|
31
|
+
return str(path)
|
|
32
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
33
|
+
|
|
34
|
+
# 2. Check current working directory
|
|
35
|
+
cwd_path = Path.cwd() / file_path
|
|
36
|
+
if cwd_path.exists():
|
|
37
|
+
return str(cwd_path)
|
|
38
|
+
|
|
39
|
+
# 3. Check package data directory
|
|
40
|
+
try:
|
|
41
|
+
with importlib.resources.path('hccinfhir.data', file_path) as pkg_path:
|
|
42
|
+
if pkg_path.exists():
|
|
43
|
+
return str(pkg_path)
|
|
44
|
+
except (FileNotFoundError, TypeError):
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
raise FileNotFoundError(
|
|
48
|
+
f"File '{file_path}' not found in:\n"
|
|
49
|
+
f" - Current directory: {Path.cwd()}\n"
|
|
50
|
+
f" - Package data: hccinfhir.data"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def load_is_chronic(file_path: str) -> Dict[Tuple[str, ModelName], bool]:
|
|
6
55
|
"""
|
|
7
56
|
Load a CSV file into a dictionary mapping (cc, model_name) to a boolean value indicating whether the HCC is chronic.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
file_path: Filename or path to the CSV file
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Dictionary mapping (cc, model_name) to boolean chronic indicator
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
FileNotFoundError: If file cannot be found
|
|
66
|
+
RuntimeError: If file cannot be loaded or parsed
|
|
8
67
|
"""
|
|
9
68
|
mapping: Dict[Tuple[str, ModelName], bool] = {}
|
|
69
|
+
|
|
10
70
|
try:
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
model_name = f"{model_domain} Model {model_version}"
|
|
17
|
-
key = (cc, model_name)
|
|
18
|
-
if key not in mapping:
|
|
19
|
-
mapping[key] = (is_chronic == 'Y')
|
|
20
|
-
except ValueError:
|
|
21
|
-
continue # Skip malformed lines
|
|
71
|
+
resolved_path = resolve_data_file(file_path)
|
|
72
|
+
with open(resolved_path, "r", encoding="utf-8") as file:
|
|
73
|
+
content = file.read()
|
|
74
|
+
except FileNotFoundError as e:
|
|
75
|
+
raise FileNotFoundError(f"Could not load is_chronic mapping: {e}")
|
|
22
76
|
except Exception as e:
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
77
|
+
raise RuntimeError(f"Error loading mapping file '{file_path}': {e}")
|
|
78
|
+
|
|
79
|
+
for line in content.splitlines()[1:]: # Skip header
|
|
80
|
+
try:
|
|
81
|
+
hcc, is_chronic, model_version, model_domain = line.strip().split(',')
|
|
82
|
+
cc = hcc.replace('HCC', '')
|
|
83
|
+
model_name = f"{model_domain} Model {model_version}"
|
|
84
|
+
key = (cc, model_name)
|
|
85
|
+
if key not in mapping:
|
|
86
|
+
mapping[key] = (is_chronic == 'Y')
|
|
87
|
+
except ValueError:
|
|
88
|
+
continue # Skip malformed lines
|
|
89
|
+
|
|
26
90
|
return mapping
|
|
27
91
|
|
|
28
|
-
def load_proc_filtering(
|
|
92
|
+
def load_proc_filtering(file_path: ProcFilteringFilename) -> Set[str]:
|
|
29
93
|
"""
|
|
30
94
|
Load a single-column CSV file into a set of strings.
|
|
31
|
-
|
|
95
|
+
|
|
32
96
|
Args:
|
|
33
|
-
|
|
34
|
-
|
|
97
|
+
file_path: Filename or path to the CSV file
|
|
98
|
+
|
|
35
99
|
Returns:
|
|
36
100
|
Set of strings from the CSV file
|
|
101
|
+
|
|
102
|
+
Raises:
|
|
103
|
+
FileNotFoundError: If file cannot be found
|
|
104
|
+
RuntimeError: If file cannot be loaded
|
|
37
105
|
"""
|
|
38
106
|
try:
|
|
39
|
-
|
|
40
|
-
|
|
107
|
+
resolved_path = resolve_data_file(file_path)
|
|
108
|
+
with open(resolved_path, "r", encoding="utf-8") as file:
|
|
109
|
+
content = file.read()
|
|
110
|
+
except FileNotFoundError as e:
|
|
111
|
+
raise FileNotFoundError(f"Could not load proc_filtering file: {e}")
|
|
41
112
|
except Exception as e:
|
|
42
|
-
|
|
43
|
-
return set()
|
|
113
|
+
raise RuntimeError(f"Error loading file '{file_path}': {e}")
|
|
44
114
|
|
|
45
|
-
|
|
115
|
+
return set(content.splitlines())
|
|
116
|
+
|
|
117
|
+
def load_dx_to_cc_mapping(file_path: DxCCMappingFilename) -> Dict[Tuple[str, ModelName], Set[str]]:
|
|
46
118
|
"""
|
|
47
119
|
Load diagnosis to CC mapping from a CSV file.
|
|
48
120
|
Expected format: diagnosis_code,cc,model_name
|
|
49
|
-
|
|
121
|
+
|
|
50
122
|
Args:
|
|
51
|
-
|
|
52
|
-
|
|
123
|
+
file_path: Filename or path to the CSV file
|
|
124
|
+
|
|
53
125
|
Returns:
|
|
54
126
|
Dictionary mapping (diagnosis_code, model_name) to a set of CC codes
|
|
127
|
+
|
|
128
|
+
Raises:
|
|
129
|
+
FileNotFoundError: If file cannot be found
|
|
130
|
+
RuntimeError: If file cannot be loaded or parsed
|
|
55
131
|
"""
|
|
56
132
|
mapping: Dict[Tuple[str, ModelName], Set[str]] = {}
|
|
57
|
-
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
resolved_path = resolve_data_file(file_path)
|
|
136
|
+
with open(resolved_path, "r", encoding="utf-8") as file:
|
|
137
|
+
content = file.read()
|
|
138
|
+
except FileNotFoundError as e:
|
|
139
|
+
raise FileNotFoundError(f"Could not load dx_to_cc mapping: {e}")
|
|
140
|
+
except Exception as e:
|
|
141
|
+
raise RuntimeError(f"Error loading mapping file '{file_path}': {e}")
|
|
142
|
+
|
|
143
|
+
for line in content.splitlines()[1:]: # Skip header
|
|
144
|
+
try:
|
|
145
|
+
diagnosis_code, cc, model_name = line.strip().split(',')
|
|
146
|
+
key = (diagnosis_code, model_name)
|
|
147
|
+
if key not in mapping:
|
|
148
|
+
mapping[key] = {cc}
|
|
149
|
+
else:
|
|
150
|
+
mapping[key].add(cc)
|
|
151
|
+
except ValueError:
|
|
152
|
+
continue # Skip malformed lines
|
|
153
|
+
|
|
154
|
+
return mapping
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def load_hierarchies(file_path: str) -> Dict[Tuple[str, ModelName], Set[str]]:
|
|
158
|
+
"""
|
|
159
|
+
Load hierarchies from a CSV file.
|
|
160
|
+
Expected format: cc_parent,cc_child,model_domain,model_version,...
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
file_path: Filename or path to the CSV file
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Dictionary mapping (cc_parent, model_name) to a set of child CCs
|
|
167
|
+
|
|
168
|
+
Raises:
|
|
169
|
+
FileNotFoundError: If file cannot be found
|
|
170
|
+
RuntimeError: If file cannot be loaded or parsed
|
|
171
|
+
"""
|
|
172
|
+
hierarchies: Dict[Tuple[str, ModelName], Set[str]] = {}
|
|
173
|
+
|
|
58
174
|
try:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
if key not in mapping:
|
|
65
|
-
mapping[key] = {cc}
|
|
66
|
-
else:
|
|
67
|
-
mapping[key].add(cc)
|
|
68
|
-
except ValueError:
|
|
69
|
-
continue # Skip malformed lines
|
|
175
|
+
resolved_path = resolve_data_file(file_path)
|
|
176
|
+
with open(resolved_path, "r", encoding="utf-8") as file:
|
|
177
|
+
content = file.read()
|
|
178
|
+
except FileNotFoundError as e:
|
|
179
|
+
raise FileNotFoundError(f"Could not load hierarchies: {e}")
|
|
70
180
|
except Exception as e:
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
181
|
+
raise RuntimeError(f"Error loading hierarchies file '{file_path}': {e}")
|
|
182
|
+
|
|
183
|
+
for line in content.splitlines()[1:]: # Skip header
|
|
184
|
+
try:
|
|
185
|
+
parts = line.strip().split(',')
|
|
186
|
+
cc_parent, cc_child, model_domain, model_version = parts[0], parts[1], parts[2], parts[3]
|
|
187
|
+
|
|
188
|
+
# Construct model name based on domain
|
|
189
|
+
if model_domain == 'ESRD':
|
|
190
|
+
model_name = f"CMS-HCC {model_domain} Model {model_version}"
|
|
191
|
+
else:
|
|
192
|
+
model_name = f"{model_domain} Model {model_version}"
|
|
193
|
+
|
|
194
|
+
key = (cc_parent, model_name)
|
|
195
|
+
if key not in hierarchies:
|
|
196
|
+
hierarchies[key] = {cc_child}
|
|
197
|
+
else:
|
|
198
|
+
hierarchies[key].add(cc_child)
|
|
199
|
+
except (ValueError, IndexError):
|
|
200
|
+
continue # Skip malformed lines
|
|
201
|
+
|
|
202
|
+
return hierarchies
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def load_coefficients(file_path: str) -> Dict[Tuple[str, ModelName], float]:
|
|
206
|
+
"""
|
|
207
|
+
Load coefficients from a CSV file.
|
|
208
|
+
Expected format: coefficient,value,model_domain,model_version
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
file_path: Filename or path to the CSV file
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Dictionary mapping (coefficient_name, model_name) to float value
|
|
215
|
+
|
|
216
|
+
Raises:
|
|
217
|
+
FileNotFoundError: If file cannot be found
|
|
218
|
+
RuntimeError: If file cannot be loaded or parsed
|
|
219
|
+
"""
|
|
220
|
+
coefficients: Dict[Tuple[str, ModelName], float] = {}
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
resolved_path = resolve_data_file(file_path)
|
|
224
|
+
with open(resolved_path, "r", encoding="utf-8") as file:
|
|
225
|
+
content = file.read()
|
|
226
|
+
except FileNotFoundError as e:
|
|
227
|
+
raise FileNotFoundError(f"Could not load coefficients: {e}")
|
|
228
|
+
except Exception as e:
|
|
229
|
+
raise RuntimeError(f"Error loading coefficients file '{file_path}': {e}")
|
|
230
|
+
|
|
231
|
+
for line in content.splitlines()[1:]: # Skip header
|
|
232
|
+
try:
|
|
233
|
+
parts = line.strip().split(',')
|
|
234
|
+
coefficient, value, model_domain, model_version = parts[0], parts[1], parts[2], parts[3]
|
|
235
|
+
|
|
236
|
+
# Construct model name based on domain
|
|
237
|
+
if model_domain == 'ESRD':
|
|
238
|
+
model_name = f"CMS-HCC {model_domain} Model V{model_version[-2:]}"
|
|
239
|
+
else:
|
|
240
|
+
model_name = f"{model_domain} Model V{model_version[-2:]}"
|
|
241
|
+
|
|
242
|
+
key = (coefficient.lower(), model_name)
|
|
243
|
+
coefficients[key] = float(value)
|
|
244
|
+
except (ValueError, IndexError):
|
|
245
|
+
continue # Skip malformed lines
|
|
246
|
+
|
|
247
|
+
return coefficients
|