PyPI - hccinfhir - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

hccinfhir 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

hccinfhir/__init__.py +47 -1
hccinfhir/data/ra_eligible_cpt_hcpcs_2026.csv +5130 -5107
hccinfhir/data/ra_hierarchies_2026.csv +12 -0
hccinfhir/extractor.py +2 -2
hccinfhir/extractor_837.py +95 -28
hccinfhir/filter.py +19 -21
hccinfhir/hccinfhir.py +2 -1
hccinfhir/sample_utils.py +252 -0
hccinfhir/samples.py +252 -0
{hccinfhir-0.1.1.dist-info → hccinfhir-0.1.3.dist-info}/METADATA +32 -3
{hccinfhir-0.1.1.dist-info → hccinfhir-0.1.3.dist-info}/RECORD +13 -11
{hccinfhir-0.1.1.dist-info → hccinfhir-0.1.3.dist-info}/WHEEL +0 -0
{hccinfhir-0.1.1.dist-info → hccinfhir-0.1.3.dist-info}/licenses/LICENSE +0 -0

hccinfhir/data/ra_hierarchies_2026.csv CHANGED Viewed

@@ -330,6 +330,12 @@ cc_parent,cc_child,model_domain,model_version,model_fullname
 19,21,CMS-HCC,V28,V28115H1
 19,22,CMS-HCC,V28,V28115H1
 19,23,CMS-HCC,V28,V28115H1
+191,180,CMS-HCC,V28,V28115H1
+191,181,CMS-HCC,V28,V28115H1
+191,182,CMS-HCC,V28,V28115H1
+191,192,CMS-HCC,V28,V28115H1
+191,253,CMS-HCC,V28,V28115H1
+191,254,CMS-HCC,V28,V28115H1
 195,196,CMS-HCC,V28,V28115H1
 20,21,CMS-HCC,V28,V28115H1
 20,22,CMS-HCC,V28,V28115H1
@@ -340,6 +346,12 @@ cc_parent,cc_child,model_domain,model_version,model_fullname
 211,213,CMS-HCC,V28,V28115H1
 212,213,CMS-HCC,V28,V28115H1
 22,23,CMS-HCC,V28,V28115H1
+221,222,CMS-HCC,V28,V28115H1
+221,223,CMS-HCC,V28,V28115H1
+221,224,CMS-HCC,V28,V28115H1
+221,225,CMS-HCC,V28,V28115H1
+221,226,CMS-HCC,V28,V28115H1
+221,227,CMS-HCC,V28,V28115H1
 223,224,CMS-HCC,V28,V28115H1
 223,225,CMS-HCC,V28,V28115H1
 223,226,CMS-HCC,V28,V28115H1

hccinfhir/extractor.py CHANGED Viewed

@@ -36,11 +36,11 @@ def extract_sld(
         raise ValueError(f'Format must be either "837" or "fhir", got {format}')
-def extract_sld_list(data: Union[List[str], List[dict]], format: Literal["837", "fhir"] = "fhir") -> List[ServiceLevelData]:
+def extract_sld_list(data: Union[List[str], List[dict]],
+                     format: Literal["837", "fhir"] = "fhir") -> List[ServiceLevelData]:
     """Extract SLDs from a list of FHIR EOBs"""
     output = []
     for item in data:
         try:
             output.extend(extract_sld(item, format))
         except TypeError as e:

hccinfhir/extractor_837.py CHANGED Viewed

@@ -38,9 +38,11 @@ def parse_amount(amount_str: str) -> Optional[float]:
     except (ValueError, TypeError):
         return None
-def get_segment_value(segment: List[str], index: int) -> Optional[str]:
+def get_segment_value(segment: List[str],
+                      index: int,
+                      default: Optional[str] = None) -> Optional[str]:
     """Safely get value from segment at given index"""
-    return segment[index] if len(segment) > index else None
+    return segment[index] if len(segment) > index else default
 def parse_diagnosis_codes(segment: List[str]) -> Dict[str, str]:
     """Extract diagnosis codes from HI segment"""
@@ -49,7 +51,11 @@ def parse_diagnosis_codes(segment: List[str]) -> Dict[str, str]:
         if ':' not in element:
             continue
         qualifier, code = element.split(':')[:2]
-        if qualifier in ['ABK', 'ABF']:  # ICD-10 qualifiers
+        if qualifier in {'ABK', 'ABF', 'ABJ'}:  # ICD-10 qualifiers
+            # ABK: Primary Diagnosis
+            # ABF: Secondary Diagnosis
+            # ABJ: Admitting Diagnosis
+            # NOTE: In Risk Adjustment, we do not differentiate between primary and secondary diagnoses
             dx_lookup[str(pos)] = code
     return dx_lookup
@@ -61,17 +67,35 @@ def process_service_line(segments: List[List[str]], start_index: int) -> tuple[O
     for seg in segments[start_index:]:
         if seg[0] in ['LX', 'CLM', 'SE']:
             break
-        if seg[0] == 'LIN' and len(seg) > 3 and seg[2] == 'N4':
-            ndc = seg[3]
-        elif seg[0] == 'DTP' and seg[1] == '472':
-            service_date = parse_date(seg[3])
+        if len(seg) > 3:
+            if seg[0] == 'LIN' and seg[2] == 'N4':
+                ndc = seg[3]
+            elif (seg[0] == 'DTP' and
+                  seg[1] in {'472', '434'} and
+                  seg[2].endswith('D8')):
+                # 472: Service Date
+                # 434: From Date in 837I
+                # These are not included currently: 435: To Date in 837I, 096 Discharge Date
+                if seg[3]:
+                    service_date = parse_date(seg[3][:8] if len(seg[3]) >= 8 else seg[3])
         if ndc and service_date:
             break
     return ndc, service_date
 def extract_sld_837(content: str) -> List[ServiceLevelData]:
-    """Extract service level data from 837 Professional or Institutional claims"""
+    """Extract service level data from 837 Professional or Institutional claims
+    Structure:
+    Billing Provider (2000A)
+    └── Subscriber (2000B)
+        └── Patient (2000C) [if needed]
+            └── Claim (2300)
+                ├── Service Line 1 (2400)
+                ├── Service Line 2 (2400)
+                └── Service Line N (2400)
+    """
     if not content:
         raise ValueError("Input X12 data cannot be empty")
@@ -100,7 +124,7 @@ def extract_sld_837(content: str) -> List[ServiceLevelData]:
         seg_id = segment[0]
         # Process NM1 segments (Provider and Patient info)
-        if seg_id == 'NM1':
+        if seg_id == 'NM1' and len(segment) > 1:
             if segment[1] == 'IL':  # Subscriber/Patient
                 current_data.patient_id = get_segment_value(segment, 9)
                 in_claim_loop = False
@@ -112,7 +136,7 @@ def extract_sld_837(content: str) -> List[ServiceLevelData]:
                 current_data.billing_provider_npi = get_segment_value(segment, 9)
         # Process Provider Specialty
-        elif seg_id == 'PRV' and segment[1] == 'PE' and in_rendering_provider_loop:
+        elif seg_id == 'PRV' and len(segment) > 1 and segment[1] == 'PE' and in_rendering_provider_loop:
             current_data.provider_specialty = get_segment_value(segment, 3)
         # Process Claim Information
@@ -122,29 +146,72 @@ def extract_sld_837(content: str) -> List[ServiceLevelData]:
             current_data.claim_id = segment[1] if len(segment) > 1 else None
             # Parse facility and service type for institutional claims
-            if claim_type == "837I" and len(segment) > 5 and ':' in segment[5]:
-                current_data.facility_type = segment[5][0]
+            if claim_type == "837I" and len(segment) > 5 and segment[5] and ':' in segment[5]:
+                current_data.facility_type = segment[5][0] if segment[5] else None
                 current_data.service_type = segment[5][1] if len(segment[5]) > 1 else None
         # Process Diagnosis Codes
         elif seg_id == 'HI' and in_claim_loop:
-            current_data.dx_lookup = parse_diagnosis_codes(segment)
+            # In 837I, there can be multiple HI segments in the claim
+            # Also, in 837I, diagnosis position does not matter
+            # We will use continuous numbering for diagnosis codes
+            # use the last dx_lookup position as the starting position, and update
+            hi_segment = parse_diagnosis_codes(segment)
+            hi_segment_realigned = {
+                str(int(pos) + len(current_data.dx_lookup)): code
+                for pos, code in hi_segment.items()
+            }
+            current_data.dx_lookup.update(hi_segment_realigned)
         # Process Service Lines
+        #
+        # SV1 (Professional Services):
+        #   SV101 (Required) - Procedure Code Composite: HC qualifier + 5-digit HCPCS code, supports up to 4 HCPCS modifiers
+        #   SV102 (Required) - Charge Amount: Format 99999999.99
+        #   SV103 (Required) - Unit Type: F2 (International Unit) or UN (Units)
+        #   SV104 (Required) - Unit Count: Format 9999.99 (decimals allowed)
+        #   SV105 (Situational) - Place of Service Code: Required for First Steps claims
+        #   SV107 (Situational) - Diagnosis Code Pointer: Links to HI segment in 2300 loop, valid values 1-8
+        #
+        # SV2 (Institutional Services):
+        #   SV201 (Required) - Revenue Code: Facility-specific revenue code for service rendered
+        #   SV202 (Required) - Procedure Code Composite: HC qualifier + 5-digit HCPCS code, supports up to 4 HCPCS modifiers
+        #   SV203 (Required) - Charge Amount: Format 99999999.99
+        #   SV204 (Required) - Unit Type: DA (Days) or UN (Units)
+        #   SV205 (Required) - Unit Count: Format 9999999.999 (whole numbers only - fractional quantities not recognized)
+        #   NOTE: Diagnosis Code Pointer is not supported for SV2
+        #
         elif seg_id in ['SV1', 'SV2'] and in_claim_loop:
-            # Parse procedure info
-            proc_info = segment[1].split(':')
-            procedure_code = proc_info[1] if len(proc_info) > 1 else None
-            modifiers = proc_info[2:] if len(proc_info) > 2 else []
-            # Get diagnosis pointers and linked diagnoses
-            dx_pointer_pos = 7 if seg_id == 'SV1' else 11
-            dx_pointers = get_segment_value(segment, dx_pointer_pos)
-            linked_diagnoses = [
-                current_data.dx_lookup[pointer]
-                for pointer in (dx_pointers.split(',') if dx_pointers else [])
-                if pointer in current_data.dx_lookup
-            ]
+            linked_diagnoses = []
+            if seg_id == 'SV1':
+                # SV1 Professional Service: SV101=procedure, SV104=quantity, SV106=place_of_service
+                proc_info = get_segment_value(segment, 1, '').split(':')
+                procedure_code = proc_info[1] if len(proc_info) > 1 else None
+                modifiers = proc_info[2:] if len(proc_info) > 2 else []
+                quantity = parse_amount(get_segment_value(segment, 4))
+                place_of_service = get_segment_value(segment, 5)
+                # Get diagnosis pointers and linked diagnoses
+                dx_pointers = get_segment_value(segment, 7, '')
+                linked_diagnoses = [
+                    current_data.dx_lookup[pointer]
+                    for pointer in (dx_pointers.split(':') if dx_pointers else [])
+                    if pointer in current_data.dx_lookup
+                ]
+            else:
+                # SV2 Institutional Service: SV201=revenue, SV202=procedure, SV205=quantity
+                # Revenue code in SV201
+                revenue_code = get_segment_value(segment, 1)
+                # Procedure code in SV202
+                proc_info = get_segment_value(segment, 2, '').split(':')
+                procedure_code = proc_info[1] if len(proc_info) > 1 else None
+                modifiers = proc_info[2:] if len(proc_info) > 2 else []
+                # Quantity in SV205
+                quantity = parse_amount(get_segment_value(segment, 5))
+                place_of_service = None  # Not applicable for institutional
+                # linked diagnoses are not supported for SV2
             # Get service line details
             ndc, service_date = process_service_line(segments, i)
@@ -154,7 +221,7 @@ def extract_sld_837(content: str) -> List[ServiceLevelData]:
                 claim_id=current_data.claim_id,
                 procedure_code=procedure_code,
                 linked_diagnosis_codes=linked_diagnoses,
-                claim_diagnosis_codes=list(current_data.dx_lookup.values()),
+                claim_diagnosis_codes=list(current_data.dx_lookup.values()), # this is used for risk adjustment
                 claim_type=current_data.claim_type,
                 provider_specialty=current_data.provider_specialty,
                 performing_provider_npi=current_data.performing_provider_npi,
@@ -163,8 +230,8 @@ def extract_sld_837(content: str) -> List[ServiceLevelData]:
                 facility_type=current_data.facility_type,
                 service_type=current_data.service_type,
                 service_date=service_date,
-                place_of_service=get_segment_value(segment, 6) if seg_id == 'SV1' else None,
-                quantity=parse_amount(get_segment_value(segment, 4)),
+                place_of_service=place_of_service,
+                quantity=quantity,
                 modifiers=modifiers,
                 ndc=ndc,
                 allowed_amount=None

hccinfhir/filter.py CHANGED Viewed

@@ -3,39 +3,37 @@ from hccinfhir.datamodels import ServiceLevelData
 from hccinfhir.utils import load_proc_filtering
 # use import importlib.resources to load the professional_cpt_fn file as a list of strings
-professional_cpt_default_fn = 'ra_eligible_cpt_hcpcs_2023.csv'
+professional_cpt_default_fn = 'ra_eligible_cpt_hcpcs_2025.csv'
 professional_cpt_default = load_proc_filtering(professional_cpt_default_fn)
 def apply_filter(
     data: List[ServiceLevelData],
     inpatient_tob: Set[str] = {'11X', '41X'},
-    outpatient_tob: Set[str] = {'12X', '13X', '43X', '71X', '73X', '76X', '77X', '85X'},
+    outpatient_tob: Set[str] = {'12X', '13X', '43X', '71X', '73X', '76X', '77X', '85X', '87X'},
     professional_cpt: Set[str] = professional_cpt_default
 ) -> List[ServiceLevelData]:
     # tob (Type of Bill) Filter is based on:
     # https://www.hhs.gov/guidance/sites/default/files/hhs-guidance-documents/2012181486-wq-092916_ra_webinar_slides_5cr_092816.pdf
-    # https://www.hhs.gov/guidance/sites/default/files/hhs-guidance-documents/final%20industry%20memo%20medicare%20filtering%20logic%2012%2022%2015_85.pdf
+    # https://www.hhs.gov/guidance/sites/default/files/hhs-guidance-documents/FinalEncounterDataDiagnosisFilteringLogic.pdf
+    # https://www.cms.gov/files/document/encounterdatasystemedit20495andedit01415andtob87x07162021.pdf for 87X
+    # NOTE: If no facility_type or service_type, then the claim is professional, in our implementation.
+    # NOTE: The original CMS logic is for the "record" level, not the service level.
+    #  Thus, when preparing the service level data, put all diagnosis codes into the diagnosis field.
-    # Break down the inpatient ToB into facility and service types
-    inpatient_facility_types = {tob[0] for tob in inpatient_tob}
-    inpatient_service_types = {tob[1] for tob in inpatient_tob}
-    # Break down the outpatient ToB into facility and service types
-    outpatient_facility_types = {tob[0] for tob in outpatient_tob}
-    outpatient_service_types = {tob[1] for tob in outpatient_tob}
-    # If ServiceLevelData has a facility_type and service_type, then filter the data based on the facility_type and service_type
-    # If not, then filter the data based on the CPT code
     filtered_data = []
     for item in data:
-        if item.facility_type and item.service_type:
-            if item.facility_type in inpatient_facility_types and item.service_type in inpatient_service_types:
-                filtered_data.append(item)
-            elif (item.facility_type in outpatient_facility_types and
-                  item.service_type in outpatient_service_types and
-                  item.procedure_code in professional_cpt):
+        item_tob = '?' if item.facility_type is None else item.facility_type
+        item_tob += '?' if item.service_type is None else item.service_type
+        item_tob += 'X'
+        if '?' in item_tob: # professional claims
+            if item.procedure_code in professional_cpt:
                 filtered_data.append(item)
         else:
-            if item.procedure_code in professional_cpt:
-                filtered_data.append(item)
+            if item_tob in inpatient_tob:
+                filtered_data.append(item)
+            elif item_tob in outpatient_tob:
+                if item.procedure_code in professional_cpt:
+                    filtered_data.append(item)
     return filtered_data

hccinfhir/hccinfhir.py CHANGED Viewed

@@ -81,8 +81,9 @@ class HCCInFHIR:
         # Extract and filter service level data
         sld_list = extract_sld_list(eob_list)
         if self.filter_claims:
-            sld_list = apply_filter(sld_list, self.professional_cpt)
+            sld_list = apply_filter(sld_list, professional_cpt=self.professional_cpt)
         # Calculate RAF score
         unique_dx_codes = self._get_unique_diagnosis_codes(sld_list)

hccinfhir/sample_utils.py ADDED Viewed

@@ -0,0 +1,252 @@
+"""
+Sample Data Module for HCCInFHIR
+This module provides easy access to sample data files for testing and demonstration purposes.
+End users can call functions to retrieve sample EOB (Explanation of Benefits) and 837 claim data.
+"""
+import importlib.resources
+import json
+from typing import List, Dict, Any, Union, Optional
+from pathlib import Path
+class SampleData:
+    """
+    A class that provides access to sample data files included with the HCCInFHIR package.
+    This class allows end users to easily retrieve sample EOB and 837 claim data
+    for testing, development, and demonstration purposes.
+    """
+    @staticmethod
+    def get_eob_sample(case_number: int = 1) -> Dict[str, Any]:
+        """
+        Retrieve a specific EOB sample by case number.
+        Args:
+            case_number: The case number (1, 2, or 3). Default is 1.
+        Returns:
+            A dictionary containing the EOB data
+        Raises:
+            ValueError: If case_number is not 1, 2, or 3
+            FileNotFoundError: If the sample file cannot be found
+        Example:
+            >>> sample_data = SampleData.get_eob_sample(1)
+            >>> print(sample_data['resourceType'])
+            'ExplanationOfBenefit'
+        """
+        if case_number not in [1, 2, 3]:
+            raise ValueError("case_number must be 1, 2, or 3")
+        try:
+            with importlib.resources.open_text('hccinfhir.samples', f'sample_eob_{case_number}.json') as f:
+                return json.load(f)
+        except FileNotFoundError:
+            raise FileNotFoundError(f"Sample EOB case {case_number} not found")
+    @staticmethod
+    def get_eob_sample_list(limit: Optional[int] = None) -> List[Dict[str, Any]]:
+        """
+        Retrieve a list of EOB samples from the large sample file.
+        Args:
+            limit: Maximum number of samples to return. If None, returns all 200 samples.
+        Returns:
+            A list of EOB data dictionaries
+        Raises:
+            FileNotFoundError: If the sample file cannot be found
+        Example:
+            >>> # Get first 10 samples
+            >>> samples = SampleData.get_eob_sample_list(limit=10)
+            >>> print(len(samples))
+            10
+            >>> # Get all 200 samples
+            >>> all_samples = SampleData.get_eob_sample_list()
+            >>> print(len(all_samples))
+            200
+        """
+        try:
+            output = []
+            with importlib.resources.open_text('hccinfhir.samples', 'sample_eob_200.ndjson') as f:
+                for i, line in enumerate(f):
+                    if limit is not None and i >= limit:
+                        break
+                    eob_data = json.loads(line)
+                    output.append(eob_data)
+            return output
+        except FileNotFoundError:
+            raise FileNotFoundError("Sample EOB list file not found")
+    @staticmethod
+    def get_837_sample(case_number: int = 0) -> str:
+        """
+        Retrieve a specific 837 claim sample by case number.
+        Args:
+            case_number: The case number (0 through 11). Default is 0.
+        Returns:
+            A string containing the 837 X12 claim data
+        Raises:
+            ValueError: If case_number is not between 0 and 11
+            FileNotFoundError: If the sample file cannot be found
+        Example:
+            >>> sample_837 = SampleData.get_837_sample(0)
+            >>> print("ISA" in sample_837)
+            True
+        """
+        if case_number < 0 or case_number > 11:
+            raise ValueError("case_number must be between 0 and 11")
+        try:
+            with importlib.resources.open_text('hccinfhir.samples', f'sample_837_{case_number}.txt') as f:
+                return f.read()
+        except FileNotFoundError:
+            raise FileNotFoundError(f"Sample 837 case {case_number} not found")
+    @staticmethod
+    def get_837_sample_list(case_numbers: Optional[List[int]] = None) -> List[str]:
+        """
+        Retrieve multiple 837 claim samples.
+        Args:
+            case_numbers: List of case numbers to retrieve. If None, returns all 12 samples.
+        Returns:
+            A list of 837 X12 claim data strings
+        Raises:
+            ValueError: If any case_number is not between 0 and 11
+            FileNotFoundError: If any sample file cannot be found
+        Example:
+            >>> # Get specific cases
+            >>> samples = SampleData.get_837_sample_list([0, 1, 2])
+            >>> print(len(samples))
+            3
+            >>> # Get all samples
+            >>> all_samples = SampleData.get_837_sample_list()
+            >>> print(len(all_samples))
+            12
+        """
+        if case_numbers is None:
+            case_numbers = list(range(12))  # 0 through 11
+        # Validate case numbers
+        for case_num in case_numbers:
+            if case_num < 0 or case_num > 11:
+                raise ValueError(f"case_number {case_num} must be between 0 and 11")
+        output = []
+        for case_num in case_numbers:
+            try:
+                with importlib.resources.open_text('hccinfhir.samples', f'sample_837_{case_num}.txt') as f:
+                    output.append(f.read())
+            except FileNotFoundError:
+                raise FileNotFoundError(f"Sample 837 case {case_num} not found")
+        return output
+    @staticmethod
+    def list_available_samples() -> Dict[str, Any]:
+        """
+        Get information about all available sample data.
+        Returns:
+            A dictionary containing information about available samples
+        Example:
+            >>> info = SampleData.list_available_samples()
+            >>> print(info['eob_samples'])
+            ['sample_eob_1.json', 'sample_eob_2.json', 'sample_eob_3.json', 'sample_eob_200.ndjson']
+        """
+        return {
+            "eob_samples": [
+                "sample_eob_1.json",
+                "sample_eob_2.json",
+                "sample_eob_3.json",
+                "sample_eob_200.ndjson"
+            ],
+            "eob_case_numbers": [1, 2, 3],
+            "eob_list_size": 200,
+            "837_samples": [f"sample_837_{i}.txt" for i in range(12)],
+            "837_case_numbers": list(range(12)),
+            "description": {
+                "eob": "Explanation of Benefits (FHIR resources) for testing HCC calculations",
+                "837": "X12 837 claim data for testing claim processing"
+            }
+        }
+# Convenience functions for easy access
+def get_eob_sample(case_number: int = 1) -> Dict[str, Any]:
+    """
+    Convenience function to get an EOB sample.
+    Args:
+        case_number: The case number (1, 2, or 3). Default is 1.
+    Returns:
+        A dictionary containing the EOB data
+    """
+    return SampleData.get_eob_sample(case_number)
+def get_eob_sample_list(limit: Optional[int] = None) -> List[Dict[str, Any]]:
+    """
+    Convenience function to get a list of EOB samples.
+    Args:
+        limit: Maximum number of samples to return. If None, returns all 200 samples.
+    Returns:
+        A list of EOB data dictionaries
+    """
+    return SampleData.get_eob_sample_list(limit)
+def get_837_sample(case_number: int = 0) -> str:
+    """
+    Convenience function to get an 837 claim sample.
+    Args:
+        case_number: The case number (0 through 11). Default is 0.
+    Returns:
+        A string containing the 837 X12 claim data
+    """
+    return SampleData.get_837_sample(case_number)
+def get_837_sample_list(case_numbers: Optional[List[int]] = None) -> List[str]:
+    """
+    Convenience function to get multiple 837 claim samples.
+    Args:
+        case_numbers: List of case numbers to retrieve. If None, returns all 12 samples.
+    Returns:
+        A list of 837 X12 claim data strings
+    """
+    return SampleData.get_837_sample_list(case_numbers)
+def list_available_samples() -> Dict[str, Any]:
+    """
+    Convenience function to get information about available samples.
+    Returns:
+        A dictionary containing information about available samples
+    """
+    return SampleData.list_available_samples()

hccinfhir 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

hccinfhir 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl