hccinfhir 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. hccinfhir/data/ra_coefficients_2025.csv +6352 -0
  2. hccinfhir/data/ra_dx_to_cc_2025.csv +53952 -0
  3. hccinfhir/data/ra_eligible_cpt_hcpcs_2024.csv +6697 -0
  4. hccinfhir/data/ra_eligible_cpt_hcpcs_2025.csv +6725 -0
  5. hccinfhir/data/ra_hierarchies_2025.csv +487 -0
  6. hccinfhir/datamodels.py +101 -0
  7. hccinfhir/extractor.py +3 -3
  8. hccinfhir/extractor_837.py +1 -2
  9. hccinfhir/extractor_fhir.py +1 -1
  10. hccinfhir/filter.py +3 -5
  11. hccinfhir/hccinfhir.py +149 -0
  12. hccinfhir/model_calculate.py +95 -0
  13. hccinfhir/model_coefficients.py +143 -0
  14. hccinfhir/model_demographics.py +191 -0
  15. hccinfhir/model_dx_to_cc.py +54 -0
  16. hccinfhir/model_hierarchies.py +70 -0
  17. hccinfhir/model_interactions.py +342 -0
  18. hccinfhir/samples/__init__.py +2 -0
  19. hccinfhir/utils.py +51 -0
  20. {hccinfhir-0.0.3.dist-info → hccinfhir-0.0.5.dist-info}/METADATA +147 -9
  21. hccinfhir-0.0.5.dist-info/RECORD +42 -0
  22. hccinfhir/models.py +0 -44
  23. hccinfhir-0.0.3.dist-info/RECORD +0 -28
  24. /hccinfhir/{data → samples}/sample_837_0.txt +0 -0
  25. /hccinfhir/{data → samples}/sample_837_1.txt +0 -0
  26. /hccinfhir/{data → samples}/sample_837_10.txt +0 -0
  27. /hccinfhir/{data → samples}/sample_837_11.txt +0 -0
  28. /hccinfhir/{data → samples}/sample_837_2.txt +0 -0
  29. /hccinfhir/{data → samples}/sample_837_3.txt +0 -0
  30. /hccinfhir/{data → samples}/sample_837_4.txt +0 -0
  31. /hccinfhir/{data → samples}/sample_837_5.txt +0 -0
  32. /hccinfhir/{data → samples}/sample_837_6.txt +0 -0
  33. /hccinfhir/{data → samples}/sample_837_7.txt +0 -0
  34. /hccinfhir/{data → samples}/sample_837_8.txt +0 -0
  35. /hccinfhir/{data → samples}/sample_837_9.txt +0 -0
  36. /hccinfhir/{data → samples}/sample_eob_1.json +0 -0
  37. /hccinfhir/{data → samples}/sample_eob_2.json +0 -0
  38. /hccinfhir/{data → samples}/sample_eob_200.ndjson +0 -0
  39. /hccinfhir/{data → samples}/sample_eob_3.json +0 -0
  40. {hccinfhir-0.0.3.dist-info → hccinfhir-0.0.5.dist-info}/WHEEL +0 -0
  41. {hccinfhir-0.0.3.dist-info → hccinfhir-0.0.5.dist-info}/licenses/LICENSE +0 -0
hccinfhir/hccinfhir.py ADDED
@@ -0,0 +1,149 @@
1
+ from typing import List, Dict, Any, Union
2
+ from hccinfhir.extractor import extract_sld_list
3
+ from hccinfhir.filter import apply_filter
4
+ from hccinfhir.model_calculate import calculate_raf
5
+ from hccinfhir.datamodels import Demographics, ServiceLevelData, RAFResult, ModelName, ProcFilteringFilename, DxCCMappingFilename
6
+ from hccinfhir.utils import load_proc_filtering, load_dx_to_cc_mapping
7
+
8
+ class HCCInFHIR:
9
+ """
10
+ Main class for processing FHIR EOB resources into HCC risk scores.
11
+
12
+ This class integrates the extraction, filtering, and calculation components
13
+ of the hccinfhir library.
14
+ """
15
+
16
+ def __init__(self,
17
+ filter_claims: bool = True,
18
+ model_name: ModelName = "CMS-HCC Model V28",
19
+ proc_filtering_filename: ProcFilteringFilename = "ra_eligible_cpt_hcpcs_2025.csv",
20
+ dx_cc_mapping_filename: DxCCMappingFilename = "ra_dx_to_cc_2025.csv"):
21
+ """
22
+ Initialize the HCCInFHIR processor.
23
+
24
+ Args:
25
+ filter_claims: Whether to apply filtering rules to claims. Default is True.
26
+ model_name: The name of the model to use for the calculation. Default is "CMS-HCC Model V28".
27
+ proc_filtering_filename: The filename of the professional cpt filtering file. Default is "ra_eligible_cpt_hcpcs_2025.csv".
28
+ dx_cc_mapping_filename: The filename of the dx to cc mapping file. Default is "ra_dx_to_cc_2025.csv".
29
+ """
30
+ self.filter_claims = filter_claims
31
+ self.model_name = model_name
32
+ self.proc_filtering_filename = proc_filtering_filename
33
+ self.dx_cc_mapping_filename = dx_cc_mapping_filename
34
+ self.professional_cpt = load_proc_filtering(proc_filtering_filename)
35
+ self.dx_to_cc_mapping = load_dx_to_cc_mapping(dx_cc_mapping_filename)
36
+
37
+
38
+ def _ensure_demographics(self, demographics: Union[Demographics, Dict[str, Any]]) -> Demographics:
39
+ """Convert demographics dict to Demographics object if needed."""
40
+ if not isinstance(demographics, Demographics):
41
+ return Demographics(**demographics)
42
+ return demographics
43
+
44
+ def _calculate_raf_from_demographics(self, diagnosis_codes: List[str],
45
+ demographics: Demographics) -> RAFResult:
46
+ """Calculate RAF score using demographics data."""
47
+ return calculate_raf(
48
+ diagnosis_codes=diagnosis_codes,
49
+ model_name=self.model_name,
50
+ age=demographics.age,
51
+ sex=demographics.sex,
52
+ dual_elgbl_cd=demographics.dual_elgbl_cd,
53
+ orec=demographics.orec,
54
+ crec=demographics.crec,
55
+ new_enrollee=demographics.new_enrollee,
56
+ snp=demographics.snp,
57
+ low_income=demographics.low_income,
58
+ graft_months=demographics.graft_months,
59
+ dx_to_cc_mapping=self.dx_to_cc_mapping
60
+ )
61
+
62
+ def _get_unique_diagnosis_codes(self, service_data: List[ServiceLevelData]) -> List[str]:
63
+ """Extract unique diagnosis codes from service level data."""
64
+ return list({code for sld in service_data for code in sld.claim_diagnosis_codes})
65
+
66
+ def run(self, eob_list: List[Dict[str, Any]],
67
+ demographics: Union[Demographics, Dict[str, Any]]) -> RAFResult:
68
+ """Process EOB resources and calculate RAF scores.
69
+
70
+ Args:
71
+ eob_list: List of EOB resources
72
+ demographics: Demographics information
73
+
74
+ Returns:
75
+ RAFResult object containing calculated scores and processed data
76
+ """
77
+ if not isinstance(eob_list, list):
78
+ raise ValueError("eob_list must be a list; if no eob, pass empty list")
79
+
80
+ demographics = self._ensure_demographics(demographics)
81
+
82
+ # Extract and filter service level data
83
+ sld_list = extract_sld_list(eob_list)
84
+ if self.filter_claims:
85
+ sld_list = apply_filter(sld_list, self.professional_cpt)
86
+
87
+ # Calculate RAF score
88
+ unique_dx_codes = self._get_unique_diagnosis_codes(sld_list)
89
+ raf_result = self._calculate_raf_from_demographics(unique_dx_codes, demographics)
90
+ raf_result['service_level_data'] = sld_list
91
+ return raf_result
92
+
93
+ def run_from_service_data(self, service_data: List[Union[ServiceLevelData, Dict[str, Any]]],
94
+ demographics: Union[Demographics, Dict[str, Any]]) -> RAFResult:
95
+ demographics = self._ensure_demographics(demographics)
96
+
97
+ if not isinstance(service_data, list):
98
+ raise ValueError("Service data must be a list of service records")
99
+
100
+ if not service_data:
101
+ raise ValueError("Service data list cannot be empty")
102
+
103
+ # Standardize service data with better error handling
104
+ standardized_data = []
105
+ for idx, item in enumerate(service_data):
106
+ try:
107
+ if isinstance(item, dict):
108
+ standardized_data.append(ServiceLevelData(**item))
109
+ elif isinstance(item, ServiceLevelData):
110
+ standardized_data.append(item)
111
+ else:
112
+ raise TypeError(f"Service data item must be a dictionary or ServiceLevelData object")
113
+ except (KeyError, TypeError, ValueError) as e:
114
+ raise ValueError(
115
+ f"Invalid service data at index {idx}: {str(e)}. "
116
+ "Required fields: claim_type, claim_diagnosis_codes, procedure_code, service_date"
117
+ )
118
+
119
+ if self.filter_claims:
120
+ standardized_data = apply_filter(standardized_data,
121
+ professional_cpt=self.professional_cpt)
122
+
123
+
124
+ # Calculate RAF score
125
+ unique_dx_codes = self._get_unique_diagnosis_codes(standardized_data)
126
+ raf_result = self._calculate_raf_from_demographics(unique_dx_codes, demographics)
127
+ raf_result['service_level_data'] = standardized_data
128
+
129
+ return raf_result
130
+
131
+ def calculate_from_diagnosis(self, diagnosis_codes: List[str],
132
+ demographics: Union[Demographics, Dict[str, Any]]) -> RAFResult:
133
+ """Calculate RAF scores from a list of diagnosis codes.
134
+
135
+ Args:
136
+ diagnosis_codes: List of diagnosis codes
137
+ demographics: Demographics information
138
+
139
+ Raises:
140
+ ValueError: If diagnosis_codes is empty or not a list
141
+ """
142
+ if not isinstance(diagnosis_codes, list):
143
+ raise ValueError("diagnosis_codes must be a list")
144
+ if not diagnosis_codes:
145
+ raise ValueError("diagnosis_codes list cannot be empty")
146
+
147
+ demographics = self._ensure_demographics(demographics)
148
+ raf_result = self._calculate_raf_from_demographics(diagnosis_codes, demographics)
149
+ return raf_result
@@ -0,0 +1,95 @@
1
+ from typing import List, Union, Dict, Tuple, Set
2
+ from hccinfhir.datamodels import ModelName, RAFResult
3
+ from hccinfhir.model_demographics import categorize_demographics
4
+ from hccinfhir.model_dx_to_cc import apply_mapping
5
+ from hccinfhir.model_hierarchies import apply_hierarchies
6
+ from hccinfhir.model_coefficients import apply_coefficients
7
+ from hccinfhir.model_interactions import apply_interactions
8
+ from hccinfhir.utils import load_dx_to_cc_mapping
9
+
10
+ # Load default mappings from csv file
11
+ mapping_file_default = 'ra_dx_to_cc_2025.csv'
12
+ dx_to_cc_default = load_dx_to_cc_mapping(mapping_file_default)
13
+
14
+ def calculate_raf(diagnosis_codes: List[str],
15
+ model_name: ModelName = "CMS-HCC Model V28",
16
+ age: Union[int, float] = 65,
17
+ sex: str = 'F',
18
+ dual_elgbl_cd: str = 'NA',
19
+ orec: str = '0',
20
+ crec: str = '0',
21
+ new_enrollee: bool = False,
22
+ snp: bool = False,
23
+ low_income: bool = False,
24
+ graft_months: int = None,
25
+ dx_to_cc_mapping: Dict[Tuple[str, ModelName], Set[str]] = dx_to_cc_default) -> RAFResult:
26
+ """
27
+ Calculate Risk Adjustment Factor (RAF) based on diagnosis codes and demographic information.
28
+
29
+ Args:
30
+ diagnosis_codes: List of ICD-10 diagnosis codes
31
+ model_name: Name of the HCC model to use
32
+ age: Patient's age
33
+ sex: Patient's sex ('M' or 'F')
34
+ dual_elgbl_cd: Dual eligibility code
35
+ orec: Original reason for entitlement code
36
+ crec: Current reason for entitlement code
37
+ new_enrollee: Whether the patient is a new enrollee
38
+ snp: Special Needs Plan indicator
39
+ low_income: Low income subsidy indicator
40
+ graft_months: Number of months since transplant
41
+
42
+ Returns:
43
+ Dictionary containing RAF score and coefficients used in calculation
44
+
45
+ Raises:
46
+ ValueError: If input parameters are invalid
47
+ """
48
+ # Input validation
49
+ if not isinstance(age, (int, float)) or age < 0:
50
+ raise ValueError("Age must be a non-negative number")
51
+
52
+ if sex not in ['M', 'F', '1', '2']:
53
+ raise ValueError("Sex must be 'M' or 'F' or '1' or '2'")
54
+
55
+ version = 'V2'
56
+ if 'RxHCC' in model_name:
57
+ version = 'V4'
58
+ elif 'HHS-HCC' in model_name: # not implemented yet
59
+ version = 'V6'
60
+
61
+ demographics = categorize_demographics(age,
62
+ sex,
63
+ dual_elgbl_cd,
64
+ orec,
65
+ crec,
66
+ version,
67
+ new_enrollee,
68
+ snp,
69
+ low_income,
70
+ graft_months)
71
+
72
+ cc_to_dx = apply_mapping(diagnosis_codes,
73
+ model_name,
74
+ dx_to_cc_mapping=dx_to_cc_mapping)
75
+ hcc_set = set(cc_to_dx.keys())
76
+ hcc_set = apply_hierarchies(hcc_set, model_name)
77
+ interactions = apply_interactions(demographics, hcc_set, model_name)
78
+ coefficients = apply_coefficients(demographics, hcc_set, interactions, model_name)
79
+
80
+ risk_score = sum(coefficients.values())
81
+
82
+ return {
83
+ 'risk_score': risk_score,
84
+ 'hcc_list': list(hcc_set),
85
+ 'cc_to_dx': cc_to_dx,
86
+ 'coefficients': coefficients,
87
+ 'interactions': interactions,
88
+ 'demographics': demographics,
89
+ 'model_name': model_name,
90
+ 'version': version,
91
+ 'diagnosis_codes': diagnosis_codes,
92
+ }
93
+
94
+
95
+
@@ -0,0 +1,143 @@
1
+ from typing import Dict, Tuple
2
+ import importlib.resources
3
+ from hccinfhir.datamodels import ModelName, Demographics
4
+
5
+ # Load default mappings from csv file
6
+ coefficients_file_default = 'ra_coefficients_2025.csv'
7
+ coefficients_default: Dict[Tuple[str, ModelName], float] = {} # (diagnosis_code, model_name) -> value
8
+
9
+ try:
10
+ with importlib.resources.open_text('hccinfhir.data', coefficients_file_default) as f:
11
+ for line in f.readlines()[1:]: # Skip header
12
+ try:
13
+ coefficient, value, model_domain, model_version = line.strip().split(',')
14
+ if model_domain == 'ESRD':
15
+ model_name = f"CMS-HCC {model_domain} Model V{model_version[-2:]}"
16
+ else:
17
+ model_name = f"{model_domain} Model V{model_version[-2:]}"
18
+
19
+ key = (coefficient.lower(), model_name)
20
+ if key not in coefficients_default:
21
+ coefficients_default[key] = float(value)
22
+ else:
23
+ coefficients_default[key] = float(value)
24
+ except ValueError:
25
+ continue # Skip malformed lines
26
+ except Exception as e:
27
+ print(f"Error loading mapping file: {e}")
28
+ coefficients_default = {}
29
+
30
+ def get_coefficent_prefix(demographics: Demographics,
31
+ model_name: ModelName = "CMS-HCC Model V28") -> str:
32
+
33
+ """
34
+ Get the coefficient prefix based on beneficiary demographics.
35
+
36
+ Args:
37
+ demographics: Demographics object containing beneficiary information
38
+
39
+ Returns:
40
+ String prefix used to look up coefficients for this beneficiary type
41
+ """
42
+ # Get base prefix based on model type
43
+ if 'ESRD' in model_name:
44
+ if demographics.esrd:
45
+ if demographics.graft_months is not None:
46
+ # Functioning graft case
47
+ if demographics.lti:
48
+ return 'GI_'
49
+ if demographics.new_enrollee:
50
+ return 'GNE_'
51
+
52
+ # Community functioning graft
53
+ prefix = 'G'
54
+ prefix += 'F' if demographics.fbd else 'NP'
55
+ prefix += 'A' if demographics.age >= 65 else 'N'
56
+ return prefix + '_'
57
+
58
+ # Dialysis case
59
+ return 'DNE_' if demographics.new_enrollee else 'DI_'
60
+
61
+ # Transplant case
62
+ if demographics.graft_months in [1, 2, 3]:
63
+ return f'TRANSPLANT_KIDNEY_ONLY_{demographics.graft_months}M'
64
+
65
+ elif 'RxHCC' in model_name:
66
+ if demographics.lti:
67
+ return 'Rx_NE_LTI_' if demographics.new_enrollee else 'Rx_CE_LTI_'
68
+
69
+ if demographics.new_enrollee:
70
+ return 'Rx_NE_Lo_' if demographics.low_income else 'Rx_NE_NoLo_'
71
+
72
+ # Community case
73
+ prefix = 'Rx_CE_'
74
+ prefix += 'Low' if demographics.low_income else 'NoLow'
75
+ prefix += 'Aged' if demographics.age >= 65 else 'NoAged'
76
+ return prefix + '_'
77
+
78
+ # Default CMS-HCC Model
79
+ if demographics.lti:
80
+ return 'INS_'
81
+
82
+ if demographics.new_enrollee:
83
+ return 'SNPNE_' if demographics.snp else 'NE_'
84
+
85
+ # Community case
86
+ prefix = 'C'
87
+ prefix += 'F' if demographics.fbd else ('P' if demographics.pbd else 'N')
88
+ prefix += 'A' if demographics.age >= 65 else 'D'
89
+ return prefix + '_'
90
+
91
+
92
+ def apply_coefficients(demographics: Demographics,
93
+ hcc_set: set[str],
94
+ interactions: dict,
95
+ model_name: ModelName = "CMS-HCC Model V28",
96
+ coefficients: Dict[Tuple[str, ModelName], float] = coefficients_default) -> dict:
97
+ """Apply risk adjustment coefficients to HCCs and interactions.
98
+
99
+ This function takes demographic information, HCC codes, and interaction variables and returns
100
+ a dictionary mapping each variable to its corresponding coefficient value based on the
101
+ specified model.
102
+
103
+ Args:
104
+ demographics: Demographics object containing patient characteristics
105
+ hcc_set: Set of HCC codes present for the patient
106
+ interactions: Dictionary of interaction variables and their values (0 or 1)
107
+ model_name: Name of the risk adjustment model to use (default: "CMS-HCC Model V28")
108
+ coefficients: Dictionary mapping (variable, model) tuples to coefficient values
109
+ (default: coefficients_default)
110
+
111
+ Returns:
112
+ Dictionary mapping HCC codes and interaction variables to their coefficient values
113
+ for variables that are present (HCC in hcc_set or interaction value = 1)
114
+ """
115
+ # Get the coefficient prefix
116
+ prefix = get_coefficent_prefix(demographics, model_name)
117
+
118
+ output = {}
119
+
120
+ demographics_key = (f"{prefix}{demographics.category}".lower(), model_name)
121
+ if demographics_key in coefficients:
122
+ output[demographics.category] = coefficients[demographics_key]
123
+
124
+ # Apply the coefficients
125
+ for hcc in hcc_set:
126
+ key = (f"{prefix}HCC{hcc}".lower(), model_name)
127
+
128
+ if key in coefficients:
129
+ value = coefficients[key]
130
+ output[hcc] = value
131
+
132
+ # Add interactions
133
+ for interaction_key, interaction_value in interactions.items():
134
+ if interaction_value < 1:
135
+ continue
136
+
137
+ key = (f"{prefix}{interaction_key}".lower(), model_name)
138
+ if key in coefficients:
139
+ value = coefficients[key]
140
+ output[interaction_key] = value
141
+
142
+ return output
143
+
@@ -0,0 +1,191 @@
1
+ from typing import Union
2
+ from hccinfhir.datamodels import Demographics
3
+
4
+ def categorize_demographics(age: Union[int, float],
5
+ sex: str,
6
+ dual_elgbl_cd: str = None,
7
+ orec: str = None,
8
+ crec: str = None,
9
+ version: str = 'V2',
10
+ new_enrollee: bool = False,
11
+ snp: bool = False,
12
+ low_income: bool = False,
13
+ graft_months: int = None
14
+ ) -> Demographics:
15
+ """
16
+ Categorize a beneficiary's demographics into risk adjustment categories.
17
+
18
+ This function takes demographic information about a beneficiary and returns a Demographics
19
+ object containing derived fields used in risk adjustment models.
20
+
21
+ Args:
22
+ age: Beneficiary age (integer or float, will be floored to integer)
23
+ sex: Beneficiary sex ('M'/'F' or '1'/'2')
24
+ dual_elgbl_cd: Dual eligibility code ('00'-'10')
25
+ orec: Original reason for entitlement code ('0'-'3')
26
+ crec: Current reason for entitlement code ('0'-'3')
27
+ version: Version of categorization to use ('V2', 'V4', 'V6')
28
+ new_enrollee: Whether beneficiary is a new enrollee
29
+ snp: Whether beneficiary is in a Special Needs Plan
30
+
31
+ Returns:
32
+ Demographics object containing derived fields like age/sex category,
33
+ disability status, dual status flags, etc.
34
+
35
+ Raises:
36
+ ValueError: If age is negative or non-numeric, or if sex is invalid
37
+ """
38
+
39
+ if not isinstance(age, (int, float)):
40
+ raise ValueError("Age must be a number")
41
+
42
+ if age < 0:
43
+ raise ValueError("Age must be non-negative")
44
+
45
+ # Convert to integer using floor
46
+ age = int(age)
47
+ non_aged = age <= 64
48
+
49
+ # Standardize sex input
50
+ if sex in ('M', '1'):
51
+ std_sex = '1' # For V2/V4
52
+ v6_sex = 'M' # For V6
53
+ elif sex in ('F', '2'):
54
+ std_sex = '2' # For V2/V4
55
+ v6_sex = 'F' # For V6
56
+ else:
57
+ raise ValueError("Sex must be 'M', 'F', '1', or '2'")
58
+
59
+ # Determine if person is disabled or originally disabled
60
+ disabled = age < 65 and (orec is not None and orec != "0")
61
+ orig_disabled = (orec is not None and orec == '1') and not disabled
62
+
63
+ # Reference: https://resdac.org/cms-data/variables/medicare-medicaid-dual-eligibility-code-january
64
+ # Full benefit dual codes
65
+ fbd_codes = {'02', '04', '08'}
66
+
67
+ # Partial benefit dual codes
68
+ pbd_codes = {'01', '03', '05', '06'}
69
+
70
+ is_fbd = dual_elgbl_cd in fbd_codes
71
+ is_pbd = dual_elgbl_cd in pbd_codes
72
+
73
+ esrd_orec = orec in {'2', '3', '6'}
74
+ esrd_crec = crec in {'2', '3'} if crec else False
75
+ esrd = esrd_orec or esrd_crec
76
+
77
+ result_dict = {
78
+ 'version': version,
79
+ 'non_aged': non_aged,
80
+ 'orig_disabled': orig_disabled,
81
+ 'disabled': disabled,
82
+ 'age': age,
83
+ 'sex': std_sex if version in ('V2', 'V4') else v6_sex,
84
+ 'dual_elgbl_cd': dual_elgbl_cd,
85
+ 'orec': orec,
86
+ 'crec': crec,
87
+ 'new_enrollee': new_enrollee,
88
+ 'snp': snp,
89
+ 'fbd': is_fbd,
90
+ 'pbd': is_pbd,
91
+ 'esrd': esrd,
92
+ 'graft_months': graft_months,
93
+ 'low_income': low_income
94
+ }
95
+
96
+ # V6 Logic (ACA Population)
97
+ if version == 'V6':
98
+ age_ranges = [
99
+ (0, 0, '0_0'),
100
+ (1, 1, '1_1'),
101
+ (2, 4, '2_4'),
102
+ (5, 9, '5_9'),
103
+ (10, 14, '10_14'),
104
+ (15, 20, '15_20'),
105
+ (21, 24, '21_24'),
106
+ (25, 29, '25_29'),
107
+ (30, 34, '30_34'),
108
+ (35, 39, '35_39'),
109
+ (40, 44, '40_44'),
110
+ (45, 49, '45_49'),
111
+ (50, 54, '50_54'),
112
+ (55, 59, '55_59'),
113
+ (60, float('inf'), '60_GT')
114
+ ]
115
+
116
+ for low, high, label in age_ranges:
117
+ if low <= age <= high:
118
+ result_dict['category'] = f"{v6_sex}AGE_LAST_{label}"
119
+ return Demographics(**result_dict)
120
+
121
+ # V2/V4 Logic (Medicare Population)
122
+ elif version in ('V2', 'V4'):
123
+ if orec is None:
124
+ raise ValueError("OREC is required for V2/V4 categorization")
125
+
126
+ # New enrollee logic
127
+ if new_enrollee:
128
+ prefix = 'NEF' if std_sex == '2' else 'NEM'
129
+
130
+ if age <= 34:
131
+ category = f'{prefix}0_34'
132
+ elif 34 < age <= 44:
133
+ category = f'{prefix}35_44'
134
+ elif 44 < age <= 54:
135
+ category = f'{prefix}45_54'
136
+ elif 54 < age <= 59:
137
+ category = f'{prefix}55_59'
138
+ elif (59 < age <= 63) or (age == 64 and orec != '0'):
139
+ category = f'{prefix}60_64'
140
+ elif (age == 64 and orec == '0') or age == 65:
141
+ category = f'{prefix}65'
142
+ elif age == 66:
143
+ category = f'{prefix}66'
144
+ elif age == 67:
145
+ category = f'{prefix}67'
146
+ elif age == 68:
147
+ category = f'{prefix}68'
148
+ elif age == 69:
149
+ category = f'{prefix}69'
150
+ elif 69 < age <= 74:
151
+ category = f'{prefix}70_74'
152
+ elif 74 < age <= 79:
153
+ category = f'{prefix}75_79'
154
+ elif 79 < age <= 84:
155
+ category = f'{prefix}80_84'
156
+ elif 84 < age <= 89:
157
+ category = f'{prefix}85_89'
158
+ elif 89 < age <= 94:
159
+ category = f'{prefix}90_94'
160
+ else:
161
+ category = f'{prefix}95_GT'
162
+
163
+ else:
164
+ prefix = 'F' if std_sex == '2' else 'M'
165
+ age_ranges = [
166
+ (0, 34, '0_34'),
167
+ (34, 44, '35_44'),
168
+ (44, 54, '45_54'),
169
+ (54, 59, '55_59'),
170
+ (59, 64, '60_64'),
171
+ (64, 69, '65_69'),
172
+ (69, 74, '70_74'),
173
+ (74, 79, '75_79'),
174
+ (79, 84, '80_84'),
175
+ (84, 89, '85_89'),
176
+ (89, 94, '90_94'),
177
+ (94, float('inf'), '95_GT')
178
+ ]
179
+
180
+ for low, high, suffix in age_ranges:
181
+ if low < age <= high:
182
+ category = f'{prefix}{suffix}'
183
+ break
184
+ else:
185
+ raise ValueError(f"Unable to categorize age: {age}")
186
+
187
+ result_dict['category'] = category
188
+ return Demographics(**result_dict)
189
+
190
+ else:
191
+ raise ValueError("Version must be 'V2', 'V4', or 'V6'")
@@ -0,0 +1,54 @@
1
+ from typing import List, Dict, Set, Tuple, Optional
2
+ from hccinfhir.datamodels import ModelName
3
+ from hccinfhir.utils import load_dx_to_cc_mapping
4
+
5
+ # Load default mappings from csv file
6
+ mapping_file_default = 'ra_dx_to_cc_2025.csv'
7
+ dx_to_cc_default = load_dx_to_cc_mapping(mapping_file_default)
8
+
9
+ def get_cc(
10
+ diagnosis_code: str,
11
+ model_name: ModelName = "CMS-HCC Model V28",
12
+ dx_to_cc_mapping: Dict[Tuple[str, ModelName], Set[str]] = dx_to_cc_default
13
+ ) -> Optional[Set[str]]:
14
+ """
15
+ Get CC for a single diagnosis code.
16
+
17
+ Args:
18
+ diagnosis_code: ICD-10 diagnosis code
19
+ model_name: HCC model name to use for mapping
20
+ dx_to_cc_mapping: Optional custom mapping dictionary
21
+
22
+ Returns:
23
+ CC code if found, None otherwise
24
+ """
25
+ return dx_to_cc_mapping.get((diagnosis_code, model_name))
26
+
27
+ def apply_mapping(
28
+ diagnoses: List[str],
29
+ model_name: ModelName = "CMS-HCC Model V28",
30
+ dx_to_cc_mapping: Dict[Tuple[str, ModelName], Set[str]] = dx_to_cc_default
31
+ ) -> Dict[str, Set[str]]:
32
+ """
33
+ Apply ICD-10 to CC mapping for a list of diagnosis codes.
34
+
35
+ Args:
36
+ diagnoses: List of ICD-10 diagnosis codes
37
+ model_name: HCC model name to use for mapping
38
+ dx_to_cc_mapping: Optional custom mapping dictionary
39
+
40
+ Returns:
41
+ Dictionary mapping CCs to lists of diagnosis codes that map to them
42
+ """
43
+ cc_to_dx: Dict[str, Set[str]] = {}
44
+
45
+ for dx in set(diagnoses):
46
+ dx = dx.upper().replace('.', '')
47
+ ccs = get_cc(dx, model_name, dx_to_cc_mapping)
48
+ if ccs is not None:
49
+ for cc in ccs:
50
+ if cc not in cc_to_dx:
51
+ cc_to_dx[cc] = set()
52
+ cc_to_dx[cc].add(dx)
53
+
54
+ return cc_to_dx