hccinfhir 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hccinfhir/__init__.py +2 -1
- hccinfhir/data/ra_labels_2026.csv +784 -0
- hccinfhir/datamodels.py +138 -6
- hccinfhir/defaults.py +3 -1
- hccinfhir/extractor_834.py +552 -359
- hccinfhir/hccinfhir.py +10 -10
- hccinfhir/model_calculate.py +18 -2
- hccinfhir/sample_files/sample_834_02.txt +1 -0
- hccinfhir/sample_files/sample_834_03.txt +1 -0
- hccinfhir/sample_files/sample_834_04.txt +1 -0
- hccinfhir/sample_files/sample_834_05.txt +1 -0
- hccinfhir/sample_files/sample_834_06.txt +1 -0
- hccinfhir/utils.py +68 -1
- {hccinfhir-0.2.1.dist-info → hccinfhir-0.2.3.dist-info}/METADATA +147 -5
- {hccinfhir-0.2.1.dist-info → hccinfhir-0.2.3.dist-info}/RECORD +17 -11
- {hccinfhir-0.2.1.dist-info → hccinfhir-0.2.3.dist-info}/WHEEL +0 -0
- {hccinfhir-0.2.1.dist-info → hccinfhir-0.2.3.dist-info}/licenses/LICENSE +0 -0
hccinfhir/hccinfhir.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Dict, Any, Union, Optional, Tuple, Set
|
|
1
|
+
from typing import List, Dict, Any, Union, Optional, Tuple, Set, Iterable
|
|
2
2
|
from hccinfhir.extractor import extract_sld_list
|
|
3
3
|
from hccinfhir.filter import apply_filter
|
|
4
4
|
from hccinfhir.model_calculate import calculate_raf
|
|
@@ -184,16 +184,16 @@ class HCCInFHIR:
|
|
|
184
184
|
# Create new result with service data included
|
|
185
185
|
return raf_result.model_copy(update={'service_level_data': standardized_data})
|
|
186
186
|
|
|
187
|
-
def calculate_from_diagnosis(self, diagnosis_codes:
|
|
187
|
+
def calculate_from_diagnosis(self, diagnosis_codes: Iterable[str],
|
|
188
188
|
demographics: Union[Demographics, Dict[str, Any]],
|
|
189
189
|
prefix_override: Optional[PrefixOverride] = None,
|
|
190
190
|
maci: float = 0.0,
|
|
191
191
|
norm_factor: float = 1.0,
|
|
192
192
|
frailty_score: float = 0.0) -> RAFResult:
|
|
193
|
-
"""Calculate RAF scores from
|
|
193
|
+
"""Calculate RAF scores from diagnosis codes.
|
|
194
194
|
|
|
195
195
|
Args:
|
|
196
|
-
diagnosis_codes:
|
|
196
|
+
diagnosis_codes: Iterable of diagnosis codes (list, tuple, numpy array, etc.)
|
|
197
197
|
demographics: Demographics information
|
|
198
198
|
prefix_override: Optional prefix to override auto-detected demographic prefix.
|
|
199
199
|
Use when demographic categorization is incorrect (e.g., ESRD patients with orec=0).
|
|
@@ -201,14 +201,14 @@ class HCCInFHIR:
|
|
|
201
201
|
norm_factor: Normalization factor (default 1.0)
|
|
202
202
|
frailty_score: Frailty adjustment score (default 0.0)
|
|
203
203
|
|
|
204
|
-
|
|
205
|
-
|
|
204
|
+
Returns:
|
|
205
|
+
RAFResult object containing calculated scores
|
|
206
206
|
"""
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
207
|
+
# Convert to list to ensure consistent handling downstream
|
|
208
|
+
diagnosis_list = list(diagnosis_codes) if diagnosis_codes is not None else []
|
|
209
|
+
|
|
210
210
|
demographics = self._ensure_demographics(demographics)
|
|
211
211
|
raf_result = self._calculate_raf_from_demographics_and_dx_codes(
|
|
212
|
-
|
|
212
|
+
diagnosis_list, demographics, prefix_override, maci, norm_factor, frailty_score
|
|
213
213
|
)
|
|
214
214
|
return raf_result
|
hccinfhir/model_calculate.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from typing import List, Union, Dict, Tuple, Set, Optional
|
|
2
|
-
from hccinfhir.datamodels import ModelName, RAFResult, PrefixOverride
|
|
2
|
+
from hccinfhir.datamodels import ModelName, RAFResult, PrefixOverride, HCCDetail
|
|
3
3
|
from hccinfhir.model_demographics import categorize_demographics
|
|
4
4
|
from hccinfhir.model_dx_to_cc import apply_mapping
|
|
5
5
|
from hccinfhir.model_hierarchies import apply_hierarchies
|
|
6
6
|
from hccinfhir.model_coefficients import apply_coefficients
|
|
7
7
|
from hccinfhir.model_interactions import apply_interactions
|
|
8
|
-
from hccinfhir.defaults import dx_to_cc_default, hierarchies_default, is_chronic_default, coefficients_default
|
|
8
|
+
from hccinfhir.defaults import dx_to_cc_default, hierarchies_default, is_chronic_default, coefficients_default, labels_default
|
|
9
9
|
|
|
10
10
|
def calculate_raf(diagnosis_codes: List[str],
|
|
11
11
|
model_name: ModelName = "CMS-HCC Model V28",
|
|
@@ -23,6 +23,7 @@ def calculate_raf(diagnosis_codes: List[str],
|
|
|
23
23
|
is_chronic_mapping: Dict[Tuple[str, ModelName], bool] = is_chronic_default,
|
|
24
24
|
hierarchies_mapping: Dict[Tuple[str, ModelName], Set[str]] = hierarchies_default,
|
|
25
25
|
coefficients_mapping: Dict[Tuple[str, ModelName], float] = coefficients_default,
|
|
26
|
+
labels_mapping: Dict[Tuple[str, ModelName], str] = labels_default,
|
|
26
27
|
prefix_override: Optional[PrefixOverride] = None,
|
|
27
28
|
maci: float = 0.0,
|
|
28
29
|
norm_factor: float = 1.0,
|
|
@@ -47,6 +48,7 @@ def calculate_raf(diagnosis_codes: List[str],
|
|
|
47
48
|
is_chronic_mapping: Mapping of HCCs to a chronic flag for the selected model; defaults to packaged mappings.
|
|
48
49
|
hierarchies_mapping: Mapping of parent HCCs to child HCCs for hierarchical rules; defaults to packaged 2026 mappings.
|
|
49
50
|
coefficients_mapping: Mapping of coefficient names to values; defaults to packaged 2026 mappings.
|
|
51
|
+
labels_mapping: Mapping of (cc, model_name) to human-readable HCC labels; defaults to packaged 2026 mappings.
|
|
50
52
|
prefix_override: Optional prefix to override auto-detected demographic prefix.
|
|
51
53
|
Use when demographic categorization from orec/crec is incorrect.
|
|
52
54
|
Common values: 'DI_' (ESRD Dialysis), 'DNE_' (ESRD Dialysis New Enrollee),
|
|
@@ -136,6 +138,19 @@ def calculate_raf(diagnosis_codes: List[str],
|
|
|
136
138
|
risk_score_hcc = risk_score - risk_score_demographics
|
|
137
139
|
risk_score_payment = risk_score * (1 - maci) / norm_factor + frailty_score
|
|
138
140
|
|
|
141
|
+
# Build HCC details with labels and chronic status
|
|
142
|
+
hcc_details = []
|
|
143
|
+
for hcc in hcc_set:
|
|
144
|
+
label = labels_mapping.get((hcc, model_name))
|
|
145
|
+
is_chronic = is_chronic_mapping.get((hcc, model_name), False)
|
|
146
|
+
coef = coefficients.get(hcc)
|
|
147
|
+
hcc_details.append(HCCDetail(
|
|
148
|
+
hcc=hcc,
|
|
149
|
+
label=label,
|
|
150
|
+
is_chronic=is_chronic,
|
|
151
|
+
coefficient=coef
|
|
152
|
+
))
|
|
153
|
+
|
|
139
154
|
return RAFResult(
|
|
140
155
|
risk_score=risk_score,
|
|
141
156
|
risk_score_demographics=risk_score_demographics,
|
|
@@ -143,6 +158,7 @@ def calculate_raf(diagnosis_codes: List[str],
|
|
|
143
158
|
risk_score_hcc=risk_score_hcc,
|
|
144
159
|
risk_score_payment=risk_score_payment,
|
|
145
160
|
hcc_list=list(hcc_set),
|
|
161
|
+
hcc_details=hcc_details,
|
|
146
162
|
cc_to_dx=cc_to_dx,
|
|
147
163
|
coefficients=coefficients,
|
|
148
164
|
interactions=interactions,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ISA*00* *00* *ZZ*CADHCS_5010_834*30*999999991 *250124*1927*^*00501*000000001*0*P*:~GS*BE*CADHCS_5010_834*999999991*20250124*192730*10000001*X*005010X220A1~ST*834*0001*005010X220A1~BGN*00*DHCS834-DA-20250124-Sample PACE-001*20250124*19273000****2~QTY*TO*1~N1*P5*California Department of Health Care Services.........*FI*999999990~N1*IN*Sample PACE Inc.*FI*999999991~INS*Y*18*001*AI*A*E**AC~REF*0F*randomParticipantID~REF*1L*randomCIN1~REF*17*;;202501;~REF*23*4;20200101;;~REF*3H*19;10;randomCaseNum1;;~REF*6O*;W;Y;;60;~REF*ZZ*01051;41609;;;;30451;41651;;;~NM1*IL*1*randomLastName*randomFirstName*A~PER*IP**TE*randomPhone~N3*randomStreetAddress1~N4*LOS ANGELES CA*CA*90019**CY*19~DMG*D8*randomDoB*F**7~LUI*LD*SPA**7~HD*021**LTC*010;51~DTP*348*D8*20250101~DTP*349*D8*20250131~REF*17*N;;;;;;;;;;;;;1~REF*9V*9;9;0~REF*CE*10;401;;;;;;~REF*RB*10~REF*ZX*19~REF*ZZ*;;10~SE*29*0001~GE*1*10000001~IEA*1*000000001~
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ISA*00* *00* *ZZ*CADHCS_5010_834*30*999999992 *250812*1936*^*00501*000000002*0*P*:~GS*BE*CADHCS_5010_834*999999992*20250812*193641*10000002*X*005010X220A1~ST*834*0001*005010X220A1~BGN*00*DHCS834-DA-20250812-Sample South LA PACE-957-001*20250812*19364100****2~QTY*TO*1~N1*P5*California Department of Health Care Services.........*FI*999999990~N1*IN*Sample South LA PACE Inc.*FI*999999992~INS*Y*18*001*AI*A*C**AC~REF*0F*randomMemberId~REF*1L*randomCIN2~REF*17*;;202508;~REF*23*2;20200201;;;~REF*3H*19;60;randomCaseNum2;;~REF*6O*;W;Y;;08;~REF*DX*H9999;006;20250201;;;~REF*F6*randomMbi~REF*QQ*;;202001;202001~REF*ZZ*95701;;;;;35201;;;;~NM1*IL*1*randomLastName*randomFirstName~PER*IP**TE*randomPhone~N3*randomAddress1~N4*LONG BEACH CA*CA*90813**CY*19~DMG*D8*randomDoB*F**7~LUI*LD*SPA**7~HD*021**LTC*957;01~DTP*348*D8*20250801~DTP*349*D8*20250831~REF*17*F;;;;;;;;;;;;;1~REF*9V*2;2;2~REF*CE*60;401;80;401;;;;~REF*RB*60~REF*ZX*19~REF*ZZ*;;10~SE*32*0001~GE*1*10000002~IEA*1*000000002~
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ISA*00* *00* *ZZ*CADHCS_5010_834*30*999999992 *251022*2000*^*00501*000000003*0*P*:~GS*BE*CADHCS_5010_834*999999992*20251022*200019*10000003*X*005010X220A1~ST*834*0001*005010X220A1~BGN*00*DHCS834-DA-20251022-Sample South LA PACE-957-001*20251022*20001900****2~QTY*TO*1~N1*P5*California Department of Health Care Services.........*FI*999999990~N1*IN*Sample South LA PACE Inc.*FI*999999992~INS*Y*18*001*AI*A*C**AC~REF*0F*randomMemberId~REF*1L*randomCIN3~REF*17*202605;;202510;~REF*23*2;20200301;;;~REF*3H*19;16;randomCode;;08~REF*6O*;A;Y;;29;~REF*DX*H9999;001;20251101;;;~REF*F6*randomMbi~REF*QQ*;;202002;202002~REF*ZZ*95701;;;;;20101;;;;~NM1*IL*1*randomLastName*randomFirstName~PER*IP**TE*randomPhone~N3*randomAddress1~N4*LOS ANGELES CA*CA*90044**CY*19~DMG*D8*randomDoB*F**:RET:2054-5~HD*021**LTC*957;01~DTP*348*D8*20251001~DTP*349*D8*20251031~REF*17*F;;;;1~REF*9V*3;2;2~REF*CE*16;401;80;401;;;;~REF*RB*16~REF*ZX*19~REF*ZZ*;;10~SE*31*0001~GE*1*10000003~IEA*1*000000003~
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ISA*00* *00* *ZZ*CADHCS_5010_834*30*999999992 *251023*1959*^*00501*000000004*0*P*:~GS*BE*CADHCS_5010_834*999999992*20251023*195928*10000004*X*005010X220A1~ST*834*0001*005010X220A1~BGN*00*DHCS834-DA-20251023-Sample South LA PACE-957-001*20251023*19592800****2~QTY*TO*1~N1*P5*California Department of Health Care Services.........*FI*999999990~N1*IN*Sample South LA PACE Inc.*FI*999999992~INS*Y*18*001*AI*A*C**AC~REF*0F*randomMemberId~REF*1L*randomCIN4~REF*17*202601;;202510;~REF*23*6;20200401;20200101;;~REF*3H*19;17;randomId1;;07~REF*6O*;W;Y;;19;~REF*DX*H9999;006;20230401;;;~REF*F6*randomId2~REF*QQ*;;202003;202003~REF*ZZ*957P4;;;;;;;;;~NM1*IL*1*randomLastName*randomFirstName~PER*IP**TE*randomPhone~N3*randomAddress1~N4*LONG BEACH CA*CA*90810**CY*19~DMG*D8*randomDoB*F**:RET:2135-2~HD*001**LTC*957;P4~DTP*348*D8*20251001~DTP*349*D8*20250930~AMT*R*1237~REF*17*F;;;;1~REF*9V*3;1;0~REF*CE*17;501;2K;691;;;;~REF*ZX*19~REF*ZZ*;;10~SE*31*0001~GE*1*10000004~IEA*1*000000004~
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ISA*00* *00* *ZZ*CADHCS_5010_834*30*999999991 *250206*2008*^*00501*000000005*0*P*:~GS*BE*CADHCS_5010_834*999999991*20250206*200823*10000005*X*005010X220A1~ST*834*0001*005010X220A1~BGN*00*DHCS834-DA-20250206-Sample PACE-001*20250206*20082300****2~QTY*TO*2~N1*P5*California Department of Health Care Services.........*FI*999999990~N1*IN*Sample PACE Inc.*FI*999999991~INS*Y*18*001*AI*A*E**AC~REF*0F*randomMemberId~REF*1L*randomCIN5~REF*17*;;202502;~REF*23*3;20200501;;~REF*3H*19;60;randomCaseNum1;;~REF*6O*;A;Y;D;67;~REF*Q4*randomProviderId;~REF*ZZ*01059;;;;;010S1;;;;~NM1*IL*1*randomLName1*randomFName1~PER*IP**TE*randomPhone1~N3*randomFullAddress1~N4*LOS ANGELES CA*CA*90037**CY*19~DMG*D8*randomDoB1*F**:RET:2054-5~NM1*31*1~N3*randomAddress~N4*LOS ANGELES CA*CA*90037~HD*001**LTC*010;59~DTP*348*D8*20250201~DTP*349*D8*20250131~REF*17*N;;;;;;;;;;;;;1~REF*CE*60;001;80;891;;;;~REF*RB*60~REF*ZX*19~REF*ZZ*;;10~HD*021**LTC*010;S1~DTP*348*D8*20250101~DTP*349*D8*20250131~REF*17*N;;;;;;;;;;;;;1~REF*CE*60;401;80;891;;;;~REF*RB*60~REF*ZX*19~REF*ZZ*;;11~INS*Y*18*001*AI*A*C**AC~REF*0F*randomMemberId2~REF*1L*randomCIN6~REF*17*;;202502;~REF*23*9;20200601;;~REF*3H*19;10;randomCaseNum2;;~REF*6O*;A;Y;;25;~REF*DX*;;;S5617;D635;20240901~REF*F6*randomId1~REF*QQ*;;202004;202004~REF*ZZ*01001;;;;;30401;;;;~NM1*IL*1*randomLName2*randomFName2*M~PER*IP**TE*randomPhone2~N3*randomAddress2~N4*LOS ANGELES CA*CA*90029**CY*19~DMG*D8*randomDoB2*M**:RET:2135-2~HD*021**LTC*010;01~DTP*348*D8*20250201~DTP*349*D8*20250228~REF*17*D;;;;;;;;;;;;;1~REF*9V*2;2;2~REF*CE*10;401;9G;999;80;401;;~REF*RB*10~REF*ZX*19~REF*ZZ*;;10~SE*64*0001~GE*1*10000005~IEA*1*000000005~
|
hccinfhir/utils.py
CHANGED
|
@@ -244,4 +244,71 @@ def load_coefficients(file_path: str) -> Dict[Tuple[str, ModelName], float]:
|
|
|
244
244
|
except (ValueError, IndexError):
|
|
245
245
|
continue # Skip malformed lines
|
|
246
246
|
|
|
247
|
-
return coefficients
|
|
247
|
+
return coefficients
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def load_labels(file_path: str) -> Dict[Tuple[str, ModelName], str]:
|
|
251
|
+
"""
|
|
252
|
+
Load HCC labels from a CSV file.
|
|
253
|
+
Expected format: cc,label,model_domain,model_version,...
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
file_path: Filename or path to the CSV file
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
Dictionary mapping (cc, model_name) to label string
|
|
260
|
+
|
|
261
|
+
Raises:
|
|
262
|
+
FileNotFoundError: If file cannot be found
|
|
263
|
+
RuntimeError: If file cannot be loaded or parsed
|
|
264
|
+
"""
|
|
265
|
+
labels: Dict[Tuple[str, ModelName], str] = {}
|
|
266
|
+
|
|
267
|
+
try:
|
|
268
|
+
resolved_path = resolve_data_file(file_path)
|
|
269
|
+
with open(resolved_path, "r", encoding="utf-8") as file:
|
|
270
|
+
content = file.read()
|
|
271
|
+
except FileNotFoundError as e:
|
|
272
|
+
raise FileNotFoundError(f"Could not load labels: {e}")
|
|
273
|
+
except Exception as e:
|
|
274
|
+
raise RuntimeError(f"Error loading labels file '{file_path}': {e}")
|
|
275
|
+
|
|
276
|
+
for line in content.splitlines()[1:]: # Skip header
|
|
277
|
+
try:
|
|
278
|
+
parts = line.strip().split(',')
|
|
279
|
+
if len(parts) < 4:
|
|
280
|
+
continue
|
|
281
|
+
cc_raw, label, model_domain, model_version = parts[0], parts[1], parts[2], parts[3]
|
|
282
|
+
|
|
283
|
+
# Strip HCC prefix if present to get just the number
|
|
284
|
+
cc = cc_raw.replace('HCC', '').replace('RxHCC', '')
|
|
285
|
+
|
|
286
|
+
# Handle quoted labels with commas
|
|
287
|
+
if label.startswith('"'):
|
|
288
|
+
# Find closing quote
|
|
289
|
+
label_parts = [label]
|
|
290
|
+
for i, p in enumerate(parts[2:], start=2):
|
|
291
|
+
if p.endswith('"'):
|
|
292
|
+
label_parts.append(p)
|
|
293
|
+
# Recalculate domain and version after the quoted label
|
|
294
|
+
model_domain = parts[i + 1] if len(parts) > i + 1 else ''
|
|
295
|
+
model_version = parts[i + 2] if len(parts) > i + 2 else ''
|
|
296
|
+
break
|
|
297
|
+
label_parts.append(p)
|
|
298
|
+
label = ','.join(label_parts).strip('"')
|
|
299
|
+
|
|
300
|
+
# Construct model name based on domain
|
|
301
|
+
if model_domain == 'ESRD':
|
|
302
|
+
model_name = f"CMS-HCC {model_domain} Model {model_version}"
|
|
303
|
+
elif model_domain == 'RxHCC':
|
|
304
|
+
model_name = f"{model_domain} Model {model_version}"
|
|
305
|
+
else:
|
|
306
|
+
model_name = f"{model_domain} Model {model_version}"
|
|
307
|
+
|
|
308
|
+
key = (cc, model_name)
|
|
309
|
+
if key not in labels:
|
|
310
|
+
labels[key] = label
|
|
311
|
+
except (ValueError, IndexError):
|
|
312
|
+
continue # Skip malformed lines
|
|
313
|
+
|
|
314
|
+
return labels
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: hccinfhir
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: HCC Algorithm for FHIR Resources
|
|
5
5
|
Project-URL: Homepage, https://github.com/mimilabs/hccinfhir
|
|
6
6
|
Project-URL: Issues, https://github.com/mimilabs/hccinfhir/issues
|
|
@@ -10,6 +10,7 @@ Classifier: Operating System :: OS Independent
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Requires-Python: >=3.8
|
|
12
12
|
Requires-Dist: pydantic>=2.10.3
|
|
13
|
+
Requires-Dist: typing-extensions>=4.6.0
|
|
13
14
|
Description-Content-Type: text/markdown
|
|
14
15
|
|
|
15
16
|
# HCCInFHIR
|
|
@@ -62,6 +63,7 @@ print(f"HCCs: {result.hcc_list}")
|
|
|
62
63
|
- [Demographic Prefix Override](#demographic-prefix-override)
|
|
63
64
|
- [Custom File Path Resolution](#custom-file-path-resolution)
|
|
64
65
|
- [Batch Processing](#batch-processing)
|
|
66
|
+
- [Large-Scale Processing with Databricks](#large-scale-processing-with-databricks)
|
|
65
67
|
- [Converting to Dictionaries](#converting-to-dictionaries)
|
|
66
68
|
- [Sample Data](#sample-data)
|
|
67
69
|
- [Testing](#testing)
|
|
@@ -78,7 +80,7 @@ print(f"HCCs: {result.hcc_list}")
|
|
|
78
80
|
- **Custom Data Files**: Full support for custom coefficients, mappings, and hierarchies
|
|
79
81
|
- **Flexible File Resolution**: Absolute paths, relative paths, or bundled data files
|
|
80
82
|
- **Type-Safe**: Built on Pydantic with full type hints
|
|
81
|
-
- **Well-Tested**:
|
|
83
|
+
- **Well-Tested**: 189 comprehensive tests covering all features
|
|
82
84
|
|
|
83
85
|
## 📊 Data Sources & Use Cases
|
|
84
86
|
|
|
@@ -834,6 +836,146 @@ with open("risk_scores.json", "w") as f:
|
|
|
834
836
|
json.dump(results, f, indent=2)
|
|
835
837
|
```
|
|
836
838
|
|
|
839
|
+
### Large-Scale Processing with Databricks
|
|
840
|
+
|
|
841
|
+
For processing millions of beneficiaries, use PySpark's `pandas_udf` for distributed computation. The hccinfhir logic is well-suited for batch operations with clear, simple transformations.
|
|
842
|
+
|
|
843
|
+
**Performance Benchmark**:
|
|
844
|
+
|
|
845
|
+

|
|
846
|
+
|
|
847
|
+
*Tested with ACO data on Databricks Runtime 17.3 LTS, Worker: i3.4xlarge (122GB, 16 cores)*
|
|
848
|
+
|
|
849
|
+
The chart shows execution time varies based on condition complexity - members with more diagnoses require additional internal processing loops. While the relationship isn't perfectly linear, **1 million members can be processed in under 2 minutes** with this configuration.
|
|
850
|
+
|
|
851
|
+
```python
|
|
852
|
+
from pyspark.sql import SparkSession
|
|
853
|
+
from pyspark.sql.types import StructType, StructField, FloatType, ArrayType, StringType
|
|
854
|
+
from pyspark.sql import functions as F
|
|
855
|
+
from pyspark.sql.functions import pandas_udf
|
|
856
|
+
import pandas as pd
|
|
857
|
+
|
|
858
|
+
from hccinfhir import HCCInFHIR, Demographics
|
|
859
|
+
|
|
860
|
+
# Define the return schema
|
|
861
|
+
hcc_schema = StructType([
|
|
862
|
+
StructField("risk_score", FloatType(), True),
|
|
863
|
+
StructField("risk_score_demographics", FloatType(), True),
|
|
864
|
+
StructField("risk_score_chronic_only", FloatType(), True),
|
|
865
|
+
StructField("risk_score_hcc", FloatType(), True),
|
|
866
|
+
StructField("hcc_list", ArrayType(StringType()), True)
|
|
867
|
+
])
|
|
868
|
+
|
|
869
|
+
# Initialize processor (will be serialized to each executor)
|
|
870
|
+
hcc_processor = HCCInFHIR(model_name="CMS-HCC Model V28")
|
|
871
|
+
|
|
872
|
+
# Create the pandas UDF
|
|
873
|
+
@pandas_udf(hcc_schema)
|
|
874
|
+
def calculate_hcc(
|
|
875
|
+
age_series: pd.Series,
|
|
876
|
+
sex_series: pd.Series,
|
|
877
|
+
diagnosis_series: pd.Series
|
|
878
|
+
) -> pd.DataFrame:
|
|
879
|
+
results = []
|
|
880
|
+
|
|
881
|
+
for age, sex, diagnosis_codes in zip(age_series, sex_series, diagnosis_series):
|
|
882
|
+
try:
|
|
883
|
+
demographics = Demographics(age=int(age), sex=sex)
|
|
884
|
+
|
|
885
|
+
# diagnosis_codes can be passed directly - accepts any iterable including numpy arrays
|
|
886
|
+
result = hcc_processor.calculate_from_diagnosis(diagnosis_codes, demographics)
|
|
887
|
+
|
|
888
|
+
results.append({
|
|
889
|
+
'risk_score': float(result.risk_score),
|
|
890
|
+
'risk_score_demographics': float(result.risk_score_demographics),
|
|
891
|
+
'risk_score_chronic_only': float(result.risk_score_chronic_only),
|
|
892
|
+
'risk_score_hcc': float(result.risk_score_hcc),
|
|
893
|
+
'hcc_list': result.hcc_list
|
|
894
|
+
})
|
|
895
|
+
except Exception as e:
|
|
896
|
+
# Log error and return nulls for failed rows
|
|
897
|
+
print(f"ERROR processing row: {e}")
|
|
898
|
+
results.append({
|
|
899
|
+
'risk_score': None,
|
|
900
|
+
'risk_score_demographics': None,
|
|
901
|
+
'risk_score_chronic_only': None,
|
|
902
|
+
'risk_score_hcc': None,
|
|
903
|
+
'hcc_list': None
|
|
904
|
+
})
|
|
905
|
+
|
|
906
|
+
return pd.DataFrame(results)
|
|
907
|
+
|
|
908
|
+
# Apply the UDF to your DataFrame
|
|
909
|
+
# Assumes df has columns: age, patient_gender, diagnosis_codes (array of strings)
|
|
910
|
+
df = df.withColumn(
|
|
911
|
+
"hcc_results",
|
|
912
|
+
calculate_hcc(
|
|
913
|
+
F.col("age"),
|
|
914
|
+
F.col("patient_gender"),
|
|
915
|
+
F.col("diagnosis_codes")
|
|
916
|
+
)
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
# Expand the struct into separate columns
|
|
920
|
+
df = df.select(
|
|
921
|
+
"*",
|
|
922
|
+
F.col("hcc_results.risk_score").alias("risk_score"),
|
|
923
|
+
F.col("hcc_results.risk_score_demographics").alias("risk_score_demographics"),
|
|
924
|
+
F.col("hcc_results.risk_score_chronic_only").alias("risk_score_chronic_only"),
|
|
925
|
+
F.col("hcc_results.risk_score_hcc").alias("risk_score_hcc"),
|
|
926
|
+
F.col("hcc_results.hcc_list").alias("hcc_list")
|
|
927
|
+
).drop("hcc_results")
|
|
928
|
+
```
|
|
929
|
+
|
|
930
|
+
**Performance Tips**:
|
|
931
|
+
- **Repartition** your DataFrame before applying the UDF to balance workload across executors
|
|
932
|
+
- **Cache** the processor initialization by defining it at module level
|
|
933
|
+
- **Batch size**: pandas_udf processes data in batches; Spark handles optimal batch sizing automatically
|
|
934
|
+
- **Install hccinfhir** on all cluster nodes: `%pip install hccinfhir` in a notebook cell or add to cluster init script
|
|
935
|
+
|
|
936
|
+
**Extended Schema with Demographics**:
|
|
937
|
+
|
|
938
|
+
```python
|
|
939
|
+
# Include additional demographic parameters
|
|
940
|
+
@pandas_udf(hcc_schema)
|
|
941
|
+
def calculate_hcc_full(
|
|
942
|
+
age_series: pd.Series,
|
|
943
|
+
sex_series: pd.Series,
|
|
944
|
+
dual_status_series: pd.Series,
|
|
945
|
+
diagnosis_series: pd.Series
|
|
946
|
+
) -> pd.DataFrame:
|
|
947
|
+
results = []
|
|
948
|
+
|
|
949
|
+
for age, sex, dual_status, diagnosis_codes in zip(
|
|
950
|
+
age_series, sex_series, dual_status_series, diagnosis_series
|
|
951
|
+
):
|
|
952
|
+
try:
|
|
953
|
+
demographics = Demographics(
|
|
954
|
+
age=int(age),
|
|
955
|
+
sex=sex,
|
|
956
|
+
dual_elgbl_cd=dual_status if dual_status else "00"
|
|
957
|
+
)
|
|
958
|
+
result = hcc_processor.calculate_from_diagnosis(diagnosis_codes, demographics)
|
|
959
|
+
|
|
960
|
+
results.append({
|
|
961
|
+
'risk_score': float(result.risk_score),
|
|
962
|
+
'risk_score_demographics': float(result.risk_score_demographics),
|
|
963
|
+
'risk_score_chronic_only': float(result.risk_score_chronic_only),
|
|
964
|
+
'risk_score_hcc': float(result.risk_score_hcc),
|
|
965
|
+
'hcc_list': result.hcc_list
|
|
966
|
+
})
|
|
967
|
+
except Exception as e:
|
|
968
|
+
results.append({
|
|
969
|
+
'risk_score': None,
|
|
970
|
+
'risk_score_demographics': None,
|
|
971
|
+
'risk_score_chronic_only': None,
|
|
972
|
+
'risk_score_hcc': None,
|
|
973
|
+
'hcc_list': None
|
|
974
|
+
})
|
|
975
|
+
|
|
976
|
+
return pd.DataFrame(results)
|
|
977
|
+
```
|
|
978
|
+
|
|
837
979
|
### Converting to Dictionaries
|
|
838
980
|
|
|
839
981
|
All Pydantic models support dictionary conversion for JSON serialization, database storage, or legacy code:
|
|
@@ -895,8 +1037,8 @@ result = processor.run([eob], demographics) # Note: [eob] not eob
|
|
|
895
1037
|
claim = get_837_sample(0) # Cases 0-12 (returns string)
|
|
896
1038
|
claims = get_837_sample_list([0, 1, 2]) # Returns list
|
|
897
1039
|
|
|
898
|
-
# X12 834 enrollment samples
|
|
899
|
-
enrollment_834 = get_834_sample(1) #
|
|
1040
|
+
# X12 834 enrollment samples (6 CA DHCS scenarios)
|
|
1041
|
+
enrollment_834 = get_834_sample(1) # Cases 1-6 available (returns string)
|
|
900
1042
|
|
|
901
1043
|
# List all available samples
|
|
902
1044
|
info = list_available_samples()
|
|
@@ -914,7 +1056,7 @@ hatch shell
|
|
|
914
1056
|
# Install in development mode
|
|
915
1057
|
pip install -e .
|
|
916
1058
|
|
|
917
|
-
# Run all tests (
|
|
1059
|
+
# Run all tests (189 tests)
|
|
918
1060
|
pytest tests/
|
|
919
1061
|
|
|
920
1062
|
# Run specific test file
|
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
hccinfhir/__init__.py,sha256=
|
|
1
|
+
hccinfhir/__init__.py,sha256=3aFYtjTklZJg3wIlnMJNgfDBaDCfKXVlYsacdsZ9L4I,1113
|
|
2
2
|
hccinfhir/constants.py,sha256=C4Vyjtzgyd4Jm2I2X6cTYQZLe-jAMC8boUcy-7OXQDQ,8473
|
|
3
|
-
hccinfhir/datamodels.py,sha256=
|
|
4
|
-
hccinfhir/defaults.py,sha256=
|
|
3
|
+
hccinfhir/datamodels.py,sha256=xGh9E5RVi4vONhtIZw2XiaFwVLc5UK027trY31YMUWc,15457
|
|
4
|
+
hccinfhir/defaults.py,sha256=aKdXPhf9bYUzpGvXM1GIXZaKxqkKInt3v9meLB9fWog,1394
|
|
5
5
|
hccinfhir/extractor.py,sha256=xL9c2VT-e2I7_c8N8j4Og42UEgVuCzyn9WFp3ntM5Ro,1822
|
|
6
|
-
hccinfhir/extractor_834.py,sha256=
|
|
6
|
+
hccinfhir/extractor_834.py,sha256=zH2nOUJvIJvbDLf6HJWmwCw2yAjT-6RCJyuH4kmIKIQ,27862
|
|
7
7
|
hccinfhir/extractor_837.py,sha256=fGsvBTWIj9dsHLGGR67AdlYDSsFi5qnSVlTgwkL1f-E,15334
|
|
8
8
|
hccinfhir/extractor_fhir.py,sha256=wUN3vTm1oTZ-KvfcDebnpQMxAC-7YlRKv12Wrv3p85A,8490
|
|
9
9
|
hccinfhir/filter.py,sha256=j_yD2g6RBXVUV9trKkWzsQ35x3fRvfKUPvEXKUefI64,2007
|
|
10
|
-
hccinfhir/hccinfhir.py,sha256=
|
|
11
|
-
hccinfhir/model_calculate.py,sha256=
|
|
10
|
+
hccinfhir/hccinfhir.py,sha256=NydnH3WBvuyskn76hY70LpUS6XuIEoax_kip1mgfpHw,11225
|
|
11
|
+
hccinfhir/model_calculate.py,sha256=_TUWNVUsBym0pre3wltXvRuipQaONQ0QBfWPFNAeDsQ,8347
|
|
12
12
|
hccinfhir/model_coefficients.py,sha256=5n3QzHX6FJ3MlO0cV9NS7Bqt-lxzVvT_M3zFaWq6Gng,4685
|
|
13
13
|
hccinfhir/model_demographics.py,sha256=nImKtJCq1HkR9w2GU8aikybJFgow71CPufBRV8Jn7fM,8932
|
|
14
14
|
hccinfhir/model_dx_to_cc.py,sha256=Yjc6xKI-jMXsbOzS_chc4NI15Bwagb7BwZZ8cKQaTbk,1540
|
|
15
15
|
hccinfhir/model_hierarchies.py,sha256=cboUnSHZZfOxA8QZKV4QIE-32duElssML32OqYT-65g,1542
|
|
16
16
|
hccinfhir/model_interactions.py,sha256=g6jK27Xu8RQUHS3lk4sk2v6w6wqd52mdbGn0BsnR7Pk,21394
|
|
17
17
|
hccinfhir/samples.py,sha256=2VSWS81cv9EnaHqK7sd6CjwG6FUI9E--5wHgD000REI,9952
|
|
18
|
-
hccinfhir/utils.py,sha256=
|
|
18
|
+
hccinfhir/utils.py,sha256=hQgHjuOcEQcnxemTZwqFBHWvLC5-C1Gup9cDXEYlZjE,10770
|
|
19
19
|
hccinfhir/data/__init__.py,sha256=SGiSkpGrnxbvtEFMMlk82NFHOE50hFXcgKwKUSuVZUg,45
|
|
20
20
|
hccinfhir/data/hcc_is_chronic.csv,sha256=Bwd-RND6SdEsKP-assoBaXnjUJAuDXhSkwWlymux72Y,19701
|
|
21
21
|
hccinfhir/data/hcc_is_chronic_without_esrd_model.csv,sha256=eVVI4_8mQNkiBiNO3kattfT_zfcV18XgmiltdzZEXSo,17720
|
|
@@ -29,8 +29,14 @@ hccinfhir/data/ra_eligible_cpt_hcpcs_2025.csv,sha256=-tMvv2su5tsSbGUh6fZZCMUEkXI
|
|
|
29
29
|
hccinfhir/data/ra_eligible_cpt_hcpcs_2026.csv,sha256=EYGN7k_rgCpJe59lL_yNInUcCkdETDWGSFTXII3LZ0Y,40497
|
|
30
30
|
hccinfhir/data/ra_hierarchies_2025.csv,sha256=HQSPNloe6mvvwMgv8ZwYAfWKkT2b2eUvm4JQy6S_mVQ,13045
|
|
31
31
|
hccinfhir/data/ra_hierarchies_2026.csv,sha256=A6ZQZb0rpRWrySBB_KA5S4PGtMxWuzB2guU3aBE09v0,19596
|
|
32
|
+
hccinfhir/data/ra_labels_2026.csv,sha256=YstfP7s-3ZwjP4I_GYPPj3_yn-PQK3Q0Q_MVYZhsfjY,50248
|
|
32
33
|
hccinfhir/sample_files/__init__.py,sha256=SGiSkpGrnxbvtEFMMlk82NFHOE50hFXcgKwKUSuVZUg,45
|
|
33
34
|
hccinfhir/sample_files/sample_834_01.txt,sha256=J2HMXfY6fAFpV36rvLQ3QymRRS2TPqf3TQY6CNS7TrE,1627
|
|
35
|
+
hccinfhir/sample_files/sample_834_02.txt,sha256=vSvjM69kKfOW9e-8dvlO9zDcRPpOD7LmekLu68z4aB4,926
|
|
36
|
+
hccinfhir/sample_files/sample_834_03.txt,sha256=pD4UTUFCEHxKu3bz3ZZdWo5b8Y1UWTXAo7PmFjWsukU,999
|
|
37
|
+
hccinfhir/sample_files/sample_834_04.txt,sha256=1Cv8kN7At1ce60kXBGlMr5DhLMsuw2clnVDi02mvBJA,991
|
|
38
|
+
hccinfhir/sample_files/sample_834_05.txt,sha256=hjQ5SEgj0cCNZyWas1-sVL9gm8m3rC4R65JHIcbAPRk,993
|
|
39
|
+
hccinfhir/sample_files/sample_834_06.txt,sha256=oC3e4UMmmhVKXI8eB7SlLyJ8kZX4NemlzI_WGznO-48,1659
|
|
34
40
|
hccinfhir/sample_files/sample_837_0.txt,sha256=eggrD259uHa05z2dfxWBpUDseSDp_AQcLyN_adpHyTw,5295
|
|
35
41
|
hccinfhir/sample_files/sample_837_1.txt,sha256=E155MdemSDYoXokuTXUZ6Br_RGGedYv5t5dh-eMRmuk,1322
|
|
36
42
|
hccinfhir/sample_files/sample_837_10.txt,sha256=zSJXI78vHAksA7FFQEVLvepefdpMM2_AexLyoDimV3Q,1129
|
|
@@ -48,7 +54,7 @@ hccinfhir/sample_files/sample_eob_1.json,sha256=_NGSVR2ysFpx-DcTvyga6dFCzhQ8Vi9f
|
|
|
48
54
|
hccinfhir/sample_files/sample_eob_2.json,sha256=FcnJcx0ApOczxjJ_uxVLzCep9THfNf4xs9Yf7hxk8e4,1769
|
|
49
55
|
hccinfhir/sample_files/sample_eob_200.ndjson,sha256=CxpjeQ1DCMUzZILaM68UEhfxO0p45YGhDDoCZeq8PxU,1917986
|
|
50
56
|
hccinfhir/sample_files/sample_eob_3.json,sha256=4BW4wOMBEEU9RDfJR15rBEvk0KNHyuMEh3e055y87Hc,2306
|
|
51
|
-
hccinfhir-0.2.
|
|
52
|
-
hccinfhir-0.2.
|
|
53
|
-
hccinfhir-0.2.
|
|
54
|
-
hccinfhir-0.2.
|
|
57
|
+
hccinfhir-0.2.3.dist-info/METADATA,sha256=YHHcOAObdo2gWJtPmP6y05-EXeXHpuE40W1pdUXlydw,37132
|
|
58
|
+
hccinfhir-0.2.3.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
59
|
+
hccinfhir-0.2.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
60
|
+
hccinfhir-0.2.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|