hccinfhir 0.1.8__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hccinfhir/utils.py CHANGED
@@ -1,74 +1,247 @@
1
- from typing import Set, Dict, Tuple
1
+ from typing import Set, Dict, Tuple, Optional
2
+ from pathlib import Path
2
3
  import importlib.resources
3
4
  from hccinfhir.datamodels import ModelName, ProcFilteringFilename, DxCCMappingFilename
4
5
 
5
- def load_is_chronic(filename: str) -> Dict[Tuple[str, ModelName], bool]:
6
+
7
+ def resolve_data_file(file_path: str) -> str:
8
+ """
9
+ Resolve data file location with clear search priority.
10
+
11
+ Priority:
12
+ 1. Absolute path (if provided)
13
+ 2. Relative to current working directory
14
+ 3. Package data directory
15
+
16
+ Args:
17
+ file_path: Filename or path to the file (e.g., "ra_dx_to_cc_2026.csv"
18
+ or "/custom/path/file.csv")
19
+
20
+ Returns:
21
+ Full path to the resolved file
22
+
23
+ Raises:
24
+ FileNotFoundError: If file cannot be found in any location
25
+ """
26
+ path = Path(file_path)
27
+
28
+ # 1. If absolute path provided, use it directly
29
+ if path.is_absolute():
30
+ if path.exists():
31
+ return str(path)
32
+ raise FileNotFoundError(f"File not found: {path}")
33
+
34
+ # 2. Check current working directory
35
+ cwd_path = Path.cwd() / file_path
36
+ if cwd_path.exists():
37
+ return str(cwd_path)
38
+
39
+ # 3. Check package data directory
40
+ try:
41
+ with importlib.resources.path('hccinfhir.data', file_path) as pkg_path:
42
+ if pkg_path.exists():
43
+ return str(pkg_path)
44
+ except (FileNotFoundError, TypeError):
45
+ pass
46
+
47
+ raise FileNotFoundError(
48
+ f"File '{file_path}' not found in:\n"
49
+ f" - Current directory: {Path.cwd()}\n"
50
+ f" - Package data: hccinfhir.data"
51
+ )
52
+
53
+
54
+ def load_is_chronic(file_path: str) -> Dict[Tuple[str, ModelName], bool]:
6
55
  """
7
56
  Load a CSV file into a dictionary mapping (cc, model_name) to a boolean value indicating whether the HCC is chronic.
57
+
58
+ Args:
59
+ file_path: Filename or path to the CSV file
60
+
61
+ Returns:
62
+ Dictionary mapping (cc, model_name) to boolean chronic indicator
63
+
64
+ Raises:
65
+ FileNotFoundError: If file cannot be found
66
+ RuntimeError: If file cannot be loaded or parsed
8
67
  """
9
68
  mapping: Dict[Tuple[str, ModelName], bool] = {}
69
+
10
70
  try:
11
- with importlib.resources.open_text('hccinfhir.data', filename) as f:
12
- for line in f.readlines()[1:]: # Skip header
13
- try:
14
- hcc, is_chronic, model_version, model_domain = line.strip().split(',')
15
- cc = hcc.replace('HCC', '')
16
- model_name = f"{model_domain} Model {model_version}"
17
- key = (cc, model_name)
18
- if key not in mapping:
19
- mapping[key] = (is_chronic == 'Y')
20
- except ValueError:
21
- continue # Skip malformed lines
71
+ resolved_path = resolve_data_file(file_path)
72
+ with open(resolved_path, "r", encoding="utf-8") as file:
73
+ content = file.read()
74
+ except FileNotFoundError as e:
75
+ raise FileNotFoundError(f"Could not load is_chronic mapping: {e}")
22
76
  except Exception as e:
23
- print(f"Error loading mapping file: {e}")
24
- return {}
25
-
77
+ raise RuntimeError(f"Error loading mapping file '{file_path}': {e}")
78
+
79
+ for line in content.splitlines()[1:]: # Skip header
80
+ try:
81
+ hcc, is_chronic, model_version, model_domain = line.strip().split(',')
82
+ cc = hcc.replace('HCC', '')
83
+ model_name = f"{model_domain} Model {model_version}"
84
+ key = (cc, model_name)
85
+ if key not in mapping:
86
+ mapping[key] = (is_chronic == 'Y')
87
+ except ValueError:
88
+ continue # Skip malformed lines
89
+
26
90
  return mapping
27
91
 
28
- def load_proc_filtering(filename: ProcFilteringFilename) -> Set[str]:
92
+ def load_proc_filtering(file_path: ProcFilteringFilename) -> Set[str]:
29
93
  """
30
94
  Load a single-column CSV file into a set of strings.
31
-
95
+
32
96
  Args:
33
- filename: Name of the CSV file in the hccinfhir.data package
34
-
97
+ file_path: Filename or path to the CSV file
98
+
35
99
  Returns:
36
100
  Set of strings from the CSV file
101
+
102
+ Raises:
103
+ FileNotFoundError: If file cannot be found
104
+ RuntimeError: If file cannot be loaded
37
105
  """
38
106
  try:
39
- with importlib.resources.open_text('hccinfhir.data', filename) as f:
40
- return set(f.read().splitlines())
107
+ resolved_path = resolve_data_file(file_path)
108
+ with open(resolved_path, "r", encoding="utf-8") as file:
109
+ content = file.read()
110
+ except FileNotFoundError as e:
111
+ raise FileNotFoundError(f"Could not load proc_filtering file: {e}")
41
112
  except Exception as e:
42
- print(f"Error loading {filename}: {e}")
43
- return set()
113
+ raise RuntimeError(f"Error loading file '{file_path}': {e}")
44
114
 
45
- def load_dx_to_cc_mapping(filename: DxCCMappingFilename) -> Dict[Tuple[str, ModelName], Set[str]]:
115
+ return set(content.splitlines())
116
+
117
+ def load_dx_to_cc_mapping(file_path: DxCCMappingFilename) -> Dict[Tuple[str, ModelName], Set[str]]:
46
118
  """
47
119
  Load diagnosis to CC mapping from a CSV file.
48
120
  Expected format: diagnosis_code,cc,model_name
49
-
121
+
50
122
  Args:
51
- filename: Name of the CSV file in the hccinfhir.data package
52
-
123
+ file_path: Filename or path to the CSV file
124
+
53
125
  Returns:
54
126
  Dictionary mapping (diagnosis_code, model_name) to a set of CC codes
127
+
128
+ Raises:
129
+ FileNotFoundError: If file cannot be found
130
+ RuntimeError: If file cannot be loaded or parsed
55
131
  """
56
132
  mapping: Dict[Tuple[str, ModelName], Set[str]] = {}
57
-
133
+
134
+ try:
135
+ resolved_path = resolve_data_file(file_path)
136
+ with open(resolved_path, "r", encoding="utf-8") as file:
137
+ content = file.read()
138
+ except FileNotFoundError as e:
139
+ raise FileNotFoundError(f"Could not load dx_to_cc mapping: {e}")
140
+ except Exception as e:
141
+ raise RuntimeError(f"Error loading mapping file '{file_path}': {e}")
142
+
143
+ for line in content.splitlines()[1:]: # Skip header
144
+ try:
145
+ diagnosis_code, cc, model_name = line.strip().split(',')
146
+ key = (diagnosis_code, model_name)
147
+ if key not in mapping:
148
+ mapping[key] = {cc}
149
+ else:
150
+ mapping[key].add(cc)
151
+ except ValueError:
152
+ continue # Skip malformed lines
153
+
154
+ return mapping
155
+
156
+
157
+ def load_hierarchies(file_path: str) -> Dict[Tuple[str, ModelName], Set[str]]:
158
+ """
159
+ Load hierarchies from a CSV file.
160
+ Expected format: cc_parent,cc_child,model_domain,model_version,...
161
+
162
+ Args:
163
+ file_path: Filename or path to the CSV file
164
+
165
+ Returns:
166
+ Dictionary mapping (cc_parent, model_name) to a set of child CCs
167
+
168
+ Raises:
169
+ FileNotFoundError: If file cannot be found
170
+ RuntimeError: If file cannot be loaded or parsed
171
+ """
172
+ hierarchies: Dict[Tuple[str, ModelName], Set[str]] = {}
173
+
58
174
  try:
59
- with importlib.resources.open_text('hccinfhir.data', filename) as f:
60
- for line in f.readlines()[1:]: # Skip header
61
- try:
62
- diagnosis_code, cc, model_name = line.strip().split(',')
63
- key = (diagnosis_code, model_name)
64
- if key not in mapping:
65
- mapping[key] = {cc}
66
- else:
67
- mapping[key].add(cc)
68
- except ValueError:
69
- continue # Skip malformed lines
175
+ resolved_path = resolve_data_file(file_path)
176
+ with open(resolved_path, "r", encoding="utf-8") as file:
177
+ content = file.read()
178
+ except FileNotFoundError as e:
179
+ raise FileNotFoundError(f"Could not load hierarchies: {e}")
70
180
  except Exception as e:
71
- print(f"Error loading mapping file: {e}")
72
- return {}
73
-
74
- return mapping
181
+ raise RuntimeError(f"Error loading hierarchies file '{file_path}': {e}")
182
+
183
+ for line in content.splitlines()[1:]: # Skip header
184
+ try:
185
+ parts = line.strip().split(',')
186
+ cc_parent, cc_child, model_domain, model_version = parts[0], parts[1], parts[2], parts[3]
187
+
188
+ # Construct model name based on domain
189
+ if model_domain == 'ESRD':
190
+ model_name = f"CMS-HCC {model_domain} Model {model_version}"
191
+ else:
192
+ model_name = f"{model_domain} Model {model_version}"
193
+
194
+ key = (cc_parent, model_name)
195
+ if key not in hierarchies:
196
+ hierarchies[key] = {cc_child}
197
+ else:
198
+ hierarchies[key].add(cc_child)
199
+ except (ValueError, IndexError):
200
+ continue # Skip malformed lines
201
+
202
+ return hierarchies
203
+
204
+
205
+ def load_coefficients(file_path: str) -> Dict[Tuple[str, ModelName], float]:
206
+ """
207
+ Load coefficients from a CSV file.
208
+ Expected format: coefficient,value,model_domain,model_version
209
+
210
+ Args:
211
+ file_path: Filename or path to the CSV file
212
+
213
+ Returns:
214
+ Dictionary mapping (coefficient_name, model_name) to float value
215
+
216
+ Raises:
217
+ FileNotFoundError: If file cannot be found
218
+ RuntimeError: If file cannot be loaded or parsed
219
+ """
220
+ coefficients: Dict[Tuple[str, ModelName], float] = {}
221
+
222
+ try:
223
+ resolved_path = resolve_data_file(file_path)
224
+ with open(resolved_path, "r", encoding="utf-8") as file:
225
+ content = file.read()
226
+ except FileNotFoundError as e:
227
+ raise FileNotFoundError(f"Could not load coefficients: {e}")
228
+ except Exception as e:
229
+ raise RuntimeError(f"Error loading coefficients file '{file_path}': {e}")
230
+
231
+ for line in content.splitlines()[1:]: # Skip header
232
+ try:
233
+ parts = line.strip().split(',')
234
+ coefficient, value, model_domain, model_version = parts[0], parts[1], parts[2], parts[3]
235
+
236
+ # Construct model name based on domain
237
+ if model_domain == 'ESRD':
238
+ model_name = f"CMS-HCC {model_domain} Model V{model_version[-2:]}"
239
+ else:
240
+ model_name = f"{model_domain} Model V{model_version[-2:]}"
241
+
242
+ key = (coefficient.lower(), model_name)
243
+ coefficients[key] = float(value)
244
+ except (ValueError, IndexError):
245
+ continue # Skip malformed lines
246
+
247
+ return coefficients