hccinfhir 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hccinfhir/datamodels.py CHANGED
@@ -93,6 +93,22 @@ PrefixOverride = Literal[
93
93
  "Rx_NE_LTI_", # New Enrollee, Long-Term Institutionalized
94
94
  ]
95
95
 
96
+ class HCCDetail(BaseModel):
97
+ """
98
+ Detailed information about an HCC category.
99
+
100
+ Attributes:
101
+ hcc: HCC code (e.g., "18", "85")
102
+ label: Human-readable description (e.g., "Diabetes with Chronic Complications")
103
+ is_chronic: Whether this HCC is considered a chronic condition
104
+ coefficient: The coefficient value applied for this HCC in the RAF calculation
105
+ """
106
+ hcc: str = Field(..., description="HCC code (e.g., '18', '85')")
107
+ label: Optional[str] = Field(None, description="Human-readable HCC description")
108
+ is_chronic: bool = Field(False, description="Whether this HCC is a chronic condition")
109
+ coefficient: Optional[float] = Field(None, description="Coefficient value for this HCC")
110
+
111
+
96
112
  class ServiceLevelData(BaseModel):
97
113
  """
98
114
  Represents standardized service-level data extracted from healthcare claims.
@@ -167,6 +183,7 @@ class RAFResult(BaseModel):
167
183
  risk_score_hcc: float = Field(..., description="HCC conditions risk score")
168
184
  risk_score_payment: float = Field(..., description="Payment RAF score (adjusted for MACI, normalization, and frailty)")
169
185
  hcc_list: List[str] = Field(default_factory=list, description="List of active HCC categories")
186
+ hcc_details: List[HCCDetail] = Field(default_factory=list, description="Detailed HCC information with labels and chronic status")
170
187
  cc_to_dx: Dict[str, Set[str]] = Field(default_factory=dict, description="Condition categories mapped to diagnosis codes")
171
188
  coefficients: Dict[str, float] = Field(default_factory=dict, description="Applied model coefficients")
172
189
  interactions: Dict[str, float] = Field(default_factory=dict, description="Disease interaction coefficients")
hccinfhir/defaults.py CHANGED
@@ -15,7 +15,8 @@ from hccinfhir.utils import (
15
15
  load_hierarchies,
16
16
  load_is_chronic,
17
17
  load_coefficients,
18
- load_proc_filtering
18
+ load_proc_filtering,
19
+ load_labels
19
20
  )
20
21
 
21
22
  # Load all default data files once at module import time
@@ -29,3 +30,4 @@ hierarchies_default: Dict[Tuple[str, ModelName], Set[str]] = load_hierarchies('r
29
30
  is_chronic_default: Dict[Tuple[str, ModelName], bool] = load_is_chronic('hcc_is_chronic.csv')
30
31
  coefficients_default: Dict[Tuple[str, ModelName], float] = load_coefficients('ra_coefficients_2026.csv')
31
32
  proc_filtering_default: Set[str] = load_proc_filtering('ra_eligible_cpt_hcpcs_2026.csv')
33
+ labels_default: Dict[Tuple[str, ModelName], str] = load_labels('ra_labels_2026.csv')
@@ -1,29 +1,23 @@
1
- from typing import List, Optional, Dict
1
+ from typing import List, Optional, Dict, Any, Tuple
2
2
  from pydantic import BaseModel
3
3
  from datetime import datetime, date
4
4
  from hccinfhir.datamodels import Demographics, EnrollmentData
5
+ from hccinfhir.constants import (
6
+ VALID_DUAL_CODES,
7
+ FULL_BENEFIT_DUAL_CODES,
8
+ PARTIAL_BENEFIT_DUAL_CODES,
9
+ VALID_OREC_VALUES,
10
+ VALID_CREC_VALUES,
11
+ X12_SEX_CODE_MAPPING,
12
+ NON_DUAL_CODE,
13
+ map_medicare_status_to_dual_code,
14
+ map_aid_code_to_dual_status,
15
+ )
5
16
 
6
17
  TRANSACTION_TYPES = {
7
18
  "005010X220A1": "834", # Benefit Enrollment and Maintenance
8
19
  }
9
20
 
10
- # California Medi-Cal Aid Codes mapping to dual eligibility status
11
- MEDI_CAL_AID_CODES = {
12
- # Full Benefit Dual (QMB Plus, SLMB Plus)
13
- '4N': '02', # QMB Plus - Aged
14
- '4P': '02', # QMB Plus - Disabled
15
- '5B': '04', # SLMB Plus - Aged
16
- '5D': '04', # SLMB Plus - Disabled
17
-
18
- # Partial Benefit Dual (QMB Only, SLMB Only, QI)
19
- '4M': '01', # QMB Only - Aged
20
- '4O': '01', # QMB Only - Disabled
21
- '5A': '03', # SLMB Only - Aged
22
- '5C': '03', # SLMB Only - Disabled
23
- '5E': '06', # QI - Aged
24
- '5F': '06', # QI - Disabled
25
- }
26
-
27
21
  class MemberContext(BaseModel):
28
22
  """Tracks member-level data across segments within 834 transaction"""
29
23
  # Identifiers
@@ -95,7 +89,7 @@ def is_medicaid_terminated(enrollment: EnrollmentData) -> bool:
95
89
  """Check if Medicaid coverage is being terminated (maintenance type 024)"""
96
90
  return enrollment.maintenance_type == '024'
97
91
 
98
- def medicaid_status_summary(enrollment: EnrollmentData) -> Dict[str, any]:
92
+ def medicaid_status_summary(enrollment: EnrollmentData) -> Dict[str, Any]:
99
93
  """Get summary of Medicaid coverage status for monitoring
100
94
 
101
95
  Args:
@@ -157,45 +151,28 @@ def get_segment_value(segment: List[str], index: int, default: Optional[str] = N
157
151
  pass
158
152
  return default
159
153
 
160
- def map_medicare_status_to_dual_code(status: Optional[str]) -> Optional[str]:
161
- """Map Medicare status codes to dual eligibility codes
154
+ def parse_composite_ref_value(value: str) -> str:
155
+ """Parse X12 composite element format: 'qualifier;id;...'
162
156
 
163
- California Medi-Cal uses these status codes:
164
- - QMB = Qualified Medicare Beneficiary
165
- - QMBPLUS = QMB Plus (Full Benefit)
166
- - SLMB = Specified Low-Income Medicare Beneficiary
167
- - SLMBPLUS = SLMB Plus (Full Benefit)
168
- - QI = Qualifying Individual
169
- - QDWI = Qualified Disabled Working Individual
157
+ X12 uses semicolons to separate sub-elements within a composite data element.
158
+ Example: REF*23*9;20061234; where 9 is the ID type qualifier
159
+
160
+ Args:
161
+ value: Raw REF segment value (e.g., '9;20061234;' or '20061234')
162
+
163
+ Returns:
164
+ The last non-empty sub-element (the actual ID)
170
165
  """
171
- if not status:
172
- return None
166
+ if not value:
167
+ return value
173
168
 
174
- status_upper = status.upper().replace(' ', '').replace('-', '')
175
-
176
- mapping = {
177
- 'QMB': '01', # QMB Only (Partial)
178
- 'QMBONLY': '01',
179
- 'QMBPLUS': '02', # QMB Plus (Full Benefit)
180
- 'QMB+': '02',
181
- 'SLMB': '03', # SLMB Only (Partial)
182
- 'SLMBONLY': '03',
183
- 'SLMBPLUS': '04', # SLMB Plus (Full Benefit)
184
- 'SLMB+': '04',
185
- 'QDWI': '05',
186
- 'QI': '06',
187
- 'QI1': '06',
188
- 'FBDE': '08', # Full Benefit Dual Eligible (Other)
189
- 'OTHERFULL': '08',
190
- }
169
+ if ';' in value:
170
+ # Split by semicolon and filter out empty parts
171
+ parts = [p for p in value.split(';') if p]
172
+ return parts[-1] if parts else value
191
173
 
192
- return mapping.get(status_upper)
174
+ return value
193
175
 
194
- def map_aid_code_to_dual_status(aid_code: Optional[str]) -> Optional[str]:
195
- """Map California Medi-Cal aid code to dual eligibility status"""
196
- if not aid_code:
197
- return None
198
- return MEDI_CAL_AID_CODES.get(aid_code)
199
176
 
200
177
  def determine_dual_status(member: MemberContext) -> str:
201
178
  """Intelligently derive dual eligibility code from available data
@@ -208,19 +185,19 @@ def determine_dual_status(member: MemberContext) -> str:
208
185
  5. Default to non-dual ('00')
209
186
  """
210
187
  # Priority 1: Explicit dual_elgbl_cd
211
- if member.dual_elgbl_cd and member.dual_elgbl_cd in ['01','02','03','04','05','06','08']:
188
+ if member.dual_elgbl_cd and member.dual_elgbl_cd in VALID_DUAL_CODES:
212
189
  return member.dual_elgbl_cd
213
190
 
214
191
  # Priority 2: California aid code mapping
215
192
  if member.medi_cal_aid_code:
216
193
  dual_code = map_aid_code_to_dual_status(member.medi_cal_aid_code)
217
- if dual_code:
194
+ if dual_code != NON_DUAL_CODE:
218
195
  return dual_code
219
196
 
220
197
  # Priority 3: Medicare status code
221
198
  if member.medicare_status_code:
222
199
  dual_code = map_medicare_status_to_dual_code(member.medicare_status_code)
223
- if dual_code:
200
+ if dual_code != NON_DUAL_CODE:
224
201
  return dual_code
225
202
 
226
203
  # Priority 4: Both Medicare and Medicaid coverage present
@@ -229,9 +206,9 @@ def determine_dual_status(member: MemberContext) -> str:
229
206
  return '08'
230
207
 
231
208
  # Default: Non-dual
232
- return '00'
209
+ return NON_DUAL_CODE
233
210
 
234
- def classify_dual_benefit_level(dual_code: str) -> tuple[bool, bool]:
211
+ def classify_dual_benefit_level(dual_code: str) -> Tuple[bool, bool]:
235
212
  """Classify as Full Benefit Dual (FBD) or Partial Benefit Dual (PBD)
236
213
 
237
214
  Full Benefit Dual codes: 02, 04, 08
@@ -242,11 +219,8 @@ def classify_dual_benefit_level(dual_code: str) -> tuple[bool, bool]:
242
219
  - Uses CPA_ (Community, Partial Benefit Dual, Aged) prefix
243
220
  - Uses CPD_ (Community, Partial Benefit Dual, Disabled) prefix
244
221
  """
245
- full_benefit_codes = {'02', '04', '08'}
246
- partial_benefit_codes = {'01', '03', '05', '06'}
247
-
248
- is_fbd = dual_code in full_benefit_codes
249
- is_pbd = dual_code in partial_benefit_codes
222
+ is_fbd = dual_code in FULL_BENEFIT_DUAL_CODES
223
+ is_pbd = dual_code in PARTIAL_BENEFIT_DUAL_CODES
250
224
 
251
225
  return is_fbd, is_pbd
252
226
 
@@ -284,7 +258,7 @@ def parse_834_enrollment(segments: List[List[str]]) -> List[EnrollmentData]:
284
258
  enrollments = []
285
259
  member = MemberContext()
286
260
 
287
- for i, segment in enumerate(segments):
261
+ for segment in segments:
288
262
  if len(segment) < 2:
289
263
  continue
290
264
 
@@ -330,11 +304,11 @@ def parse_834_enrollment(segments: List[List[str]]) -> List[EnrollmentData]:
330
304
 
331
305
  # Medicaid Identifiers
332
306
  elif qualifier == '1D': # Medicaid/Recipient ID
333
- member.medicaid_id = value
307
+ member.medicaid_id = parse_composite_ref_value(value)
334
308
  member.has_medicaid = True
335
309
  elif qualifier == '23': # Medicaid Recipient ID (alternative)
336
310
  if not member.medicaid_id:
337
- member.medicaid_id = value
311
+ member.medicaid_id = parse_composite_ref_value(value)
338
312
  member.has_medicaid = True
339
313
 
340
314
  # California Medi-Cal Specific
@@ -345,13 +319,13 @@ def parse_834_enrollment(segments: List[List[str]]) -> List[EnrollmentData]:
345
319
 
346
320
  # Custom dual eligibility indicators
347
321
  elif qualifier == 'F5': # Dual Eligibility Code (custom)
348
- if value in ['01','02','03','04','05','06','08']:
322
+ if value in VALID_DUAL_CODES:
349
323
  member.dual_elgbl_cd = value
350
324
  elif qualifier == 'DX': # OREC (custom)
351
- if value in ['0','1','2','3']:
325
+ if value in VALID_OREC_VALUES:
352
326
  member.orec = value
353
327
  elif qualifier == 'DY': # CREC (custom)
354
- if value in ['0','1','2','3']:
328
+ if value in VALID_CREC_VALUES:
355
329
  member.crec = value
356
330
  elif qualifier == 'EJ': # Low Income Subsidy indicator
357
331
  member.low_income = (value.upper() in ['Y', 'YES', '1', 'TRUE'])
@@ -376,8 +350,8 @@ def parse_834_enrollment(segments: List[List[str]]) -> List[EnrollmentData]:
376
350
 
377
351
  # DMG03 = Gender Code
378
352
  sex = get_segment_value(segment, 3)
379
- if sex in ['M', 'F', '1', '2']:
380
- member.sex = 'M' if sex in ['M', '1'] else 'F'
353
+ if sex in X12_SEX_CODE_MAPPING:
354
+ member.sex = X12_SEX_CODE_MAPPING[sex]
381
355
 
382
356
  # ===== DTP - Date Time Periods =====
383
357
  elif seg_id == 'DTP' and len(segment) >= 4:
@@ -440,6 +414,13 @@ def parse_834_enrollment(segments: List[List[str]]) -> List[EnrollmentData]:
440
414
  member.has_medicare = True
441
415
  member.has_medicaid = True
442
416
 
417
+ # Detect LTI (Long Term Institutionalized)
418
+ if any(keyword in combined for keyword in [
419
+ 'LTC', 'LONG TERM CARE', 'LONG-TERM CARE', 'NURSING HOME',
420
+ 'SKILLED NURSING', 'SNF', 'INSTITUTIONALIZED'
421
+ ]):
422
+ member.lti = True
423
+
443
424
  # Don't forget last member
444
425
  if member.member_id or member.has_medicare or member.has_medicaid:
445
426
  enrollments.append(create_enrollment_data(member))
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Dict
1
+ from typing import List, Optional, Dict, Tuple
2
2
  from pydantic import BaseModel
3
3
  from hccinfhir.datamodels import ServiceLevelData
4
4
 
@@ -65,7 +65,7 @@ def parse_diagnosis_codes(segment: List[str]) -> Dict[str, str]:
65
65
  dx_lookup[str(pos)] = code
66
66
  return dx_lookup
67
67
 
68
- def process_service_line(segments: List[List[str]], start_index: int) -> tuple[Optional[str], Optional[str]]:
68
+ def process_service_line(segments: List[List[str]], start_index: int) -> Tuple[Optional[str], Optional[str]]:
69
69
  """Extract NDC and service date from service line segments"""
70
70
  ndc = None
71
71
  service_date = None
hccinfhir/hccinfhir.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import List, Dict, Any, Union, Optional, Tuple, Set
1
+ from typing import List, Dict, Any, Union, Optional, Tuple, Set, Iterable
2
2
  from hccinfhir.extractor import extract_sld_list
3
3
  from hccinfhir.filter import apply_filter
4
4
  from hccinfhir.model_calculate import calculate_raf
@@ -184,16 +184,16 @@ class HCCInFHIR:
184
184
  # Create new result with service data included
185
185
  return raf_result.model_copy(update={'service_level_data': standardized_data})
186
186
 
187
- def calculate_from_diagnosis(self, diagnosis_codes: List[str],
187
+ def calculate_from_diagnosis(self, diagnosis_codes: Iterable[str],
188
188
  demographics: Union[Demographics, Dict[str, Any]],
189
189
  prefix_override: Optional[PrefixOverride] = None,
190
190
  maci: float = 0.0,
191
191
  norm_factor: float = 1.0,
192
192
  frailty_score: float = 0.0) -> RAFResult:
193
- """Calculate RAF scores from a list of diagnosis codes.
193
+ """Calculate RAF scores from diagnosis codes.
194
194
 
195
195
  Args:
196
- diagnosis_codes: List of diagnosis codes
196
+ diagnosis_codes: Iterable of diagnosis codes (list, tuple, numpy array, etc.)
197
197
  demographics: Demographics information
198
198
  prefix_override: Optional prefix to override auto-detected demographic prefix.
199
199
  Use when demographic categorization is incorrect (e.g., ESRD patients with orec=0).
@@ -201,14 +201,14 @@ class HCCInFHIR:
201
201
  norm_factor: Normalization factor (default 1.0)
202
202
  frailty_score: Frailty adjustment score (default 0.0)
203
203
 
204
- Raises:
205
- ValueError: If diagnosis_codes is empty or not a list
204
+ Returns:
205
+ RAFResult object containing calculated scores
206
206
  """
207
- if not isinstance(diagnosis_codes, list):
208
- raise ValueError("diagnosis_codes must be a list")
209
-
207
+ # Convert to list to ensure consistent handling downstream
208
+ diagnosis_list = list(diagnosis_codes) if diagnosis_codes is not None else []
209
+
210
210
  demographics = self._ensure_demographics(demographics)
211
211
  raf_result = self._calculate_raf_from_demographics_and_dx_codes(
212
- diagnosis_codes, demographics, prefix_override, maci, norm_factor, frailty_score
212
+ diagnosis_list, demographics, prefix_override, maci, norm_factor, frailty_score
213
213
  )
214
214
  return raf_result
@@ -1,11 +1,11 @@
1
1
  from typing import List, Union, Dict, Tuple, Set, Optional
2
- from hccinfhir.datamodels import ModelName, RAFResult, PrefixOverride
2
+ from hccinfhir.datamodels import ModelName, RAFResult, PrefixOverride, HCCDetail
3
3
  from hccinfhir.model_demographics import categorize_demographics
4
4
  from hccinfhir.model_dx_to_cc import apply_mapping
5
5
  from hccinfhir.model_hierarchies import apply_hierarchies
6
6
  from hccinfhir.model_coefficients import apply_coefficients
7
7
  from hccinfhir.model_interactions import apply_interactions
8
- from hccinfhir.defaults import dx_to_cc_default, hierarchies_default, is_chronic_default, coefficients_default
8
+ from hccinfhir.defaults import dx_to_cc_default, hierarchies_default, is_chronic_default, coefficients_default, labels_default
9
9
 
10
10
  def calculate_raf(diagnosis_codes: List[str],
11
11
  model_name: ModelName = "CMS-HCC Model V28",
@@ -23,6 +23,7 @@ def calculate_raf(diagnosis_codes: List[str],
23
23
  is_chronic_mapping: Dict[Tuple[str, ModelName], bool] = is_chronic_default,
24
24
  hierarchies_mapping: Dict[Tuple[str, ModelName], Set[str]] = hierarchies_default,
25
25
  coefficients_mapping: Dict[Tuple[str, ModelName], float] = coefficients_default,
26
+ labels_mapping: Dict[Tuple[str, ModelName], str] = labels_default,
26
27
  prefix_override: Optional[PrefixOverride] = None,
27
28
  maci: float = 0.0,
28
29
  norm_factor: float = 1.0,
@@ -47,6 +48,7 @@ def calculate_raf(diagnosis_codes: List[str],
47
48
  is_chronic_mapping: Mapping of HCCs to a chronic flag for the selected model; defaults to packaged mappings.
48
49
  hierarchies_mapping: Mapping of parent HCCs to child HCCs for hierarchical rules; defaults to packaged 2026 mappings.
49
50
  coefficients_mapping: Mapping of coefficient names to values; defaults to packaged 2026 mappings.
51
+ labels_mapping: Mapping of (cc, model_name) to human-readable HCC labels; defaults to packaged 2026 mappings.
50
52
  prefix_override: Optional prefix to override auto-detected demographic prefix.
51
53
  Use when demographic categorization from orec/crec is incorrect.
52
54
  Common values: 'DI_' (ESRD Dialysis), 'DNE_' (ESRD Dialysis New Enrollee),
@@ -136,6 +138,19 @@ def calculate_raf(diagnosis_codes: List[str],
136
138
  risk_score_hcc = risk_score - risk_score_demographics
137
139
  risk_score_payment = risk_score * (1 - maci) / norm_factor + frailty_score
138
140
 
141
+ # Build HCC details with labels and chronic status
142
+ hcc_details = []
143
+ for hcc in hcc_set:
144
+ label = labels_mapping.get((hcc, model_name))
145
+ is_chronic = is_chronic_mapping.get((hcc, model_name), False)
146
+ coef = coefficients.get(hcc)
147
+ hcc_details.append(HCCDetail(
148
+ hcc=hcc,
149
+ label=label,
150
+ is_chronic=is_chronic,
151
+ coefficient=coef
152
+ ))
153
+
139
154
  return RAFResult(
140
155
  risk_score=risk_score,
141
156
  risk_score_demographics=risk_score_demographics,
@@ -143,6 +158,7 @@ def calculate_raf(diagnosis_codes: List[str],
143
158
  risk_score_hcc=risk_score_hcc,
144
159
  risk_score_payment=risk_score_payment,
145
160
  hcc_list=list(hcc_set),
161
+ hcc_details=hcc_details,
146
162
  cc_to_dx=cc_to_dx,
147
163
  coefficients=coefficients,
148
164
  interactions=interactions,
@@ -1,4 +1,4 @@
1
- from typing import Dict, Tuple, Optional
1
+ from typing import Dict, Tuple, Optional, Set
2
2
  from hccinfhir.datamodels import ModelName, Demographics, PrefixOverride
3
3
 
4
4
  def get_coefficent_prefix(demographics: Demographics,
@@ -65,7 +65,7 @@ def get_coefficent_prefix(demographics: Demographics,
65
65
 
66
66
 
67
67
  def apply_coefficients(demographics: Demographics,
68
- hcc_set: set[str],
68
+ hcc_set: Set[str],
69
69
  interactions: dict,
70
70
  model_name: ModelName,
71
71
  coefficients: Dict[Tuple[str, ModelName], float],
@@ -1,5 +1,18 @@
1
1
  from typing import Union, Optional
2
2
  from hccinfhir.datamodels import Demographics, PrefixOverride
3
+ from hccinfhir.constants import (
4
+ FULL_BENEFIT_DUAL_CODES,
5
+ PARTIAL_BENEFIT_DUAL_CODES,
6
+ OREC_ESRD_CODES,
7
+ CREC_ESRD_CODES,
8
+ ESRD_PREFIXES,
9
+ NEW_ENROLLEE_PREFIXES,
10
+ COMMUNITY_PREFIXES,
11
+ INSTITUTIONAL_PREFIXES,
12
+ FULL_BENEFIT_DUAL_PREFIXES,
13
+ PARTIAL_BENEFIT_DUAL_PREFIXES,
14
+ NON_DUAL_PREFIXES,
15
+ )
3
16
 
4
17
  def categorize_demographics(age: Union[int, float],
5
18
  sex: str,
@@ -75,56 +88,40 @@ def categorize_demographics(age: Union[int, float],
75
88
  disabled = age < 65 and (orec is not None and orec != "0")
76
89
  orig_disabled = (orec is not None and orec == '1') and not disabled
77
90
 
78
- # Reference: https://resdac.org/cms-data/variables/medicare-medicaid-dual-eligibility-code-january
79
- # Full benefit dual codes
80
- fbd_codes = {'02', '04', '08'}
81
-
82
- # Partial benefit dual codes
83
- pbd_codes = {'01', '03', '05', '06'}
84
-
85
- is_fbd = dual_elgbl_cd in fbd_codes
86
- is_pbd = dual_elgbl_cd in pbd_codes
91
+ # Reference: https://resdac.org/cms-data/variables/medicare-medicaid-dual-eligibility-code-january
92
+ is_fbd = dual_elgbl_cd in FULL_BENEFIT_DUAL_CODES
93
+ is_pbd = dual_elgbl_cd in PARTIAL_BENEFIT_DUAL_CODES
87
94
 
88
- esrd_orec = orec in {'2', '3', '6'}
89
- esrd_crec = crec in {'2', '3'} if crec else False
95
+ # ESRD detection from OREC/CREC (CMS official codes: 2=ESRD, 3=DIB+ESRD)
96
+ esrd_orec = orec in OREC_ESRD_CODES
97
+ esrd_crec = crec in CREC_ESRD_CODES if crec else False
90
98
  esrd = esrd_orec or esrd_crec
91
99
 
92
100
  # Override demographics based on prefix_override
93
101
  if prefix_override:
94
- # ESRD model prefixes
95
- esrd_prefixes = {'DI_', 'DNE_', 'GI_', 'GNE_', 'GFPA_', 'GFPN_', 'GNPA_', 'GNPN_'}
96
- # CMS-HCC new enrollee prefixes
97
- new_enrollee_prefixes = {'NE_', 'SNPNE_', 'DNE_', 'GNE_'}
98
- # CMS-HCC community prefixes
99
- community_prefixes = {'CNA_', 'CND_', 'CFA_', 'CFD_', 'CPA_', 'CPD_'}
100
- # Institutionalized prefix
101
- institutional_prefixes = {'INS_', 'GI_'}
102
-
103
- # TODO: RxHCC prefixes
104
-
105
102
  # Set esrd flag
106
- if prefix_override in esrd_prefixes:
103
+ if prefix_override in ESRD_PREFIXES:
107
104
  esrd = True
108
105
 
109
106
  # Set new_enrollee flag
110
- if prefix_override in new_enrollee_prefixes:
107
+ if prefix_override in NEW_ENROLLEE_PREFIXES:
111
108
  new_enrollee = True
112
- elif prefix_override in community_prefixes or prefix_override in institutional_prefixes:
109
+ elif prefix_override in COMMUNITY_PREFIXES or prefix_override in INSTITUTIONAL_PREFIXES:
113
110
  new_enrollee = False
114
111
 
115
112
  # Set dual eligibility flags based on prefix
116
- if prefix_override in {'CFA_', 'CFD_', 'GFPA_', 'GFPN_'}:
113
+ if prefix_override in FULL_BENEFIT_DUAL_PREFIXES:
117
114
  is_fbd = True
118
115
  is_pbd = False
119
- elif prefix_override in {'CPA_', 'CPD_'}:
116
+ elif prefix_override in PARTIAL_BENEFIT_DUAL_PREFIXES:
120
117
  is_fbd = False
121
118
  is_pbd = True
122
- elif prefix_override in {'CNA_', 'CND_', 'GNPA_', 'GNPN_'}:
119
+ elif prefix_override in NON_DUAL_PREFIXES:
123
120
  is_fbd = False
124
121
  is_pbd = False
125
122
 
126
123
  # Set lti flag based on prefix
127
- if prefix_override in institutional_prefixes:
124
+ if prefix_override in INSTITUTIONAL_PREFIXES:
128
125
  lti = True
129
126
 
130
127
  result_dict = {
@@ -1,7 +1,7 @@
1
1
  from hccinfhir.datamodels import Demographics, ModelName
2
- from typing import Optional
2
+ from typing import Optional, List, Set, Dict
3
3
 
4
- def has_any_hcc(hcc_list: list[str], hcc_set: set[str]) -> int:
4
+ def has_any_hcc(hcc_list: List[str], hcc_set: Set[str]) -> int:
5
5
  """Returns 1 if any HCC in the list is present, 0 otherwise"""
6
6
  return int(bool(set(hcc_list) & hcc_set))
7
7
 
@@ -81,7 +81,7 @@ def create_dual_interactions(demographics: Demographics) -> dict:
81
81
 
82
82
  return interactions
83
83
 
84
- def create_hcc_counts(hcc_set: set[str]) -> dict:
84
+ def create_hcc_counts(hcc_set: Set[str]) -> Dict:
85
85
  """Creates HCC count variables"""
86
86
  counts = {}
87
87
  hcc_count = len(hcc_set)
@@ -95,7 +95,7 @@ def create_hcc_counts(hcc_set: set[str]) -> dict:
95
95
 
96
96
  return counts
97
97
 
98
- def get_diagnostic_categories(model_name: ModelName, hcc_set: set[str]) -> dict:
98
+ def get_diagnostic_categories(model_name: ModelName, hcc_set: Set[str]) -> Dict:
99
99
  """Creates disease categories based on model version"""
100
100
  categories = {}
101
101
 
@@ -343,9 +343,9 @@ def create_disease_interactions(model_name: ModelName,
343
343
 
344
344
  return interactions
345
345
 
346
- def apply_interactions(demographics: Demographics,
347
- hcc_set: set[str],
348
- model_name: ModelName = "CMS-HCC Model V28") -> dict:
346
+ def apply_interactions(demographics: Demographics,
347
+ hcc_set: Set[str],
348
+ model_name: ModelName = "CMS-HCC Model V28") -> Dict:
349
349
  """
350
350
  Calculate HCC interactions across CMS models. Handles CMS-HCC, ESRD, and RxHCC models.
351
351
  """
hccinfhir/utils.py CHANGED
@@ -244,4 +244,71 @@ def load_coefficients(file_path: str) -> Dict[Tuple[str, ModelName], float]:
244
244
  except (ValueError, IndexError):
245
245
  continue # Skip malformed lines
246
246
 
247
- return coefficients
247
+ return coefficients
248
+
249
+
250
+ def load_labels(file_path: str) -> Dict[Tuple[str, ModelName], str]:
251
+ """
252
+ Load HCC labels from a CSV file.
253
+ Expected format: cc,label,model_domain,model_version,...
254
+
255
+ Args:
256
+ file_path: Filename or path to the CSV file
257
+
258
+ Returns:
259
+ Dictionary mapping (cc, model_name) to label string
260
+
261
+ Raises:
262
+ FileNotFoundError: If file cannot be found
263
+ RuntimeError: If file cannot be loaded or parsed
264
+ """
265
+ labels: Dict[Tuple[str, ModelName], str] = {}
266
+
267
+ try:
268
+ resolved_path = resolve_data_file(file_path)
269
+ with open(resolved_path, "r", encoding="utf-8") as file:
270
+ content = file.read()
271
+ except FileNotFoundError as e:
272
+ raise FileNotFoundError(f"Could not load labels: {e}")
273
+ except Exception as e:
274
+ raise RuntimeError(f"Error loading labels file '{file_path}': {e}")
275
+
276
+ for line in content.splitlines()[1:]: # Skip header
277
+ try:
278
+ parts = line.strip().split(',')
279
+ if len(parts) < 4:
280
+ continue
281
+ cc_raw, label, model_domain, model_version = parts[0], parts[1], parts[2], parts[3]
282
+
283
+ # Strip HCC prefix if present to get just the number
284
+ cc = cc_raw.replace('HCC', '').replace('RxHCC', '')
285
+
286
+ # Handle quoted labels with commas
287
+ if label.startswith('"'):
288
+ # Find closing quote
289
+ label_parts = [label]
290
+ for i, p in enumerate(parts[2:], start=2):
291
+ if p.endswith('"'):
292
+ label_parts.append(p)
293
+ # Recalculate domain and version after the quoted label
294
+ model_domain = parts[i + 1] if len(parts) > i + 1 else ''
295
+ model_version = parts[i + 2] if len(parts) > i + 2 else ''
296
+ break
297
+ label_parts.append(p)
298
+ label = ','.join(label_parts).strip('"')
299
+
300
+ # Construct model name based on domain
301
+ if model_domain == 'ESRD':
302
+ model_name = f"CMS-HCC {model_domain} Model {model_version}"
303
+ elif model_domain == 'RxHCC':
304
+ model_name = f"{model_domain} Model {model_version}"
305
+ else:
306
+ model_name = f"{model_domain} Model {model_version}"
307
+
308
+ key = (cc, model_name)
309
+ if key not in labels:
310
+ labels[key] = label
311
+ except (ValueError, IndexError):
312
+ continue # Skip malformed lines
313
+
314
+ return labels