hccinfhir 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/PKG-INFO +145 -3
  2. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/README.md +143 -2
  3. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/__init__.py +2 -1
  4. hccinfhir-0.2.2/hccinfhir/constants.py +240 -0
  5. hccinfhir-0.2.2/hccinfhir/data/ra_labels_2026.csv +784 -0
  6. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/datamodels.py +17 -0
  7. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/defaults.py +3 -1
  8. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/extractor_834.py +52 -71
  9. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/extractor_837.py +2 -2
  10. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/hccinfhir.py +10 -10
  11. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/model_calculate.py +18 -2
  12. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/model_coefficients.py +2 -2
  13. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/model_demographics.py +26 -29
  14. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/model_interactions.py +7 -7
  15. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/utils.py +68 -1
  16. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/pyproject.toml +5 -2
  17. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/.gitignore +0 -0
  18. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/LICENSE +0 -0
  19. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/__init__.py +0 -0
  20. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/hcc_is_chronic.csv +0 -0
  21. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/hcc_is_chronic_without_esrd_model.csv +0 -0
  22. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/ra_coefficients_2025.csv +0 -0
  23. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/ra_coefficients_2026.csv +0 -0
  24. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/ra_dx_to_cc_2025.csv +0 -0
  25. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/ra_dx_to_cc_2026.csv +0 -0
  26. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/ra_eligible_cpt_hcpcs_2023.csv +0 -0
  27. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/ra_eligible_cpt_hcpcs_2024.csv +0 -0
  28. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/ra_eligible_cpt_hcpcs_2025.csv +0 -0
  29. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/ra_eligible_cpt_hcpcs_2026.csv +0 -0
  30. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/ra_hierarchies_2025.csv +0 -0
  31. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/data/ra_hierarchies_2026.csv +0 -0
  32. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/extractor.py +0 -0
  33. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/extractor_fhir.py +0 -0
  34. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/filter.py +0 -0
  35. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/model_dx_to_cc.py +0 -0
  36. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/model_hierarchies.py +0 -0
  37. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/__init__.py +0 -0
  38. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_834_01.txt +0 -0
  39. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_0.txt +0 -0
  40. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_1.txt +0 -0
  41. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_10.txt +0 -0
  42. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_11.txt +0 -0
  43. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_12.txt +0 -0
  44. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_2.txt +0 -0
  45. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_3.txt +0 -0
  46. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_4.txt +0 -0
  47. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_5.txt +0 -0
  48. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_6.txt +0 -0
  49. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_7.txt +0 -0
  50. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_8.txt +0 -0
  51. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_837_9.txt +0 -0
  52. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_eob_1.json +0 -0
  53. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_eob_2.json +0 -0
  54. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_eob_200.ndjson +0 -0
  55. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/sample_files/sample_eob_3.json +0 -0
  56. {hccinfhir-0.2.0 → hccinfhir-0.2.2}/hccinfhir/samples.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hccinfhir
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: HCC Algorithm for FHIR Resources
5
5
  Project-URL: Homepage, https://github.com/mimilabs/hccinfhir
6
6
  Project-URL: Issues, https://github.com/mimilabs/hccinfhir/issues
@@ -10,6 +10,7 @@ Classifier: Operating System :: OS Independent
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Requires-Python: >=3.8
12
12
  Requires-Dist: pydantic>=2.10.3
13
+ Requires-Dist: typing-extensions>=4.6.0
13
14
  Description-Content-Type: text/markdown
14
15
 
15
16
  # HCCInFHIR
@@ -62,6 +63,7 @@ print(f"HCCs: {result.hcc_list}")
62
63
  - [Demographic Prefix Override](#demographic-prefix-override)
63
64
  - [Custom File Path Resolution](#custom-file-path-resolution)
64
65
  - [Batch Processing](#batch-processing)
66
+ - [Large-Scale Processing with Databricks](#large-scale-processing-with-databricks)
65
67
  - [Converting to Dictionaries](#converting-to-dictionaries)
66
68
  - [Sample Data](#sample-data)
67
69
  - [Testing](#testing)
@@ -78,7 +80,7 @@ print(f"HCCs: {result.hcc_list}")
78
80
  - **Custom Data Files**: Full support for custom coefficients, mappings, and hierarchies
79
81
  - **Flexible File Resolution**: Absolute paths, relative paths, or bundled data files
80
82
  - **Type-Safe**: Built on Pydantic with full type hints
81
- - **Well-Tested**: 155 comprehensive tests covering all features
83
+ - **Well-Tested**: 181 comprehensive tests covering all features
82
84
 
83
85
  ## 📊 Data Sources & Use Cases
84
86
 
@@ -834,6 +836,146 @@ with open("risk_scores.json", "w") as f:
834
836
  json.dump(results, f, indent=2)
835
837
  ```
836
838
 
839
+ ### Large-Scale Processing with Databricks
840
+
841
+ For processing millions of beneficiaries, use PySpark's `pandas_udf` for distributed computation. The hccinfhir logic is well-suited for batch operations with clear, simple transformations.
842
+
843
+ **Performance Benchmark**:
844
+
845
+ ![Databricks Performance Chart](hccinfhir_pandas_udf_performance_chart.png)
846
+
847
+ *Tested with ACO data on Databricks Runtime 17.3 LTS, Worker: i3.4xlarge (122GB, 16 cores)*
848
+
849
+ The chart shows execution time varies based on condition complexity - members with more diagnoses require additional internal processing loops. While the relationship isn't perfectly linear, **1 million members can be processed in under 2 minutes** with this configuration.
850
+
851
+ ```python
852
+ from pyspark.sql import SparkSession
853
+ from pyspark.sql.types import StructType, StructField, FloatType, ArrayType, StringType
854
+ from pyspark.sql import functions as F
855
+ from pyspark.sql.functions import pandas_udf
856
+ import pandas as pd
857
+
858
+ from hccinfhir import HCCInFHIR, Demographics
859
+
860
+ # Define the return schema
861
+ hcc_schema = StructType([
862
+ StructField("risk_score", FloatType(), True),
863
+ StructField("risk_score_demographics", FloatType(), True),
864
+ StructField("risk_score_chronic_only", FloatType(), True),
865
+ StructField("risk_score_hcc", FloatType(), True),
866
+ StructField("hcc_list", ArrayType(StringType()), True)
867
+ ])
868
+
869
+ # Initialize processor (will be serialized to each executor)
870
+ hcc_processor = HCCInFHIR(model_name="CMS-HCC Model V28")
871
+
872
+ # Create the pandas UDF
873
+ @pandas_udf(hcc_schema)
874
+ def calculate_hcc(
875
+ age_series: pd.Series,
876
+ sex_series: pd.Series,
877
+ diagnosis_series: pd.Series
878
+ ) -> pd.DataFrame:
879
+ results = []
880
+
881
+ for age, sex, diagnosis_codes in zip(age_series, sex_series, diagnosis_series):
882
+ try:
883
+ demographics = Demographics(age=int(age), sex=sex)
884
+
885
+ # diagnosis_codes can be passed directly - accepts any iterable including numpy arrays
886
+ result = hcc_processor.calculate_from_diagnosis(diagnosis_codes, demographics)
887
+
888
+ results.append({
889
+ 'risk_score': float(result.risk_score),
890
+ 'risk_score_demographics': float(result.risk_score_demographics),
891
+ 'risk_score_chronic_only': float(result.risk_score_chronic_only),
892
+ 'risk_score_hcc': float(result.risk_score_hcc),
893
+ 'hcc_list': result.hcc_list
894
+ })
895
+ except Exception as e:
896
+ # Log error and return nulls for failed rows
897
+ print(f"ERROR processing row: {e}")
898
+ results.append({
899
+ 'risk_score': None,
900
+ 'risk_score_demographics': None,
901
+ 'risk_score_chronic_only': None,
902
+ 'risk_score_hcc': None,
903
+ 'hcc_list': None
904
+ })
905
+
906
+ return pd.DataFrame(results)
907
+
908
+ # Apply the UDF to your DataFrame
909
+ # Assumes df has columns: age, patient_gender, diagnosis_codes (array of strings)
910
+ df = df.withColumn(
911
+ "hcc_results",
912
+ calculate_hcc(
913
+ F.col("age"),
914
+ F.col("patient_gender"),
915
+ F.col("diagnosis_codes")
916
+ )
917
+ )
918
+
919
+ # Expand the struct into separate columns
920
+ df = df.select(
921
+ "*",
922
+ F.col("hcc_results.risk_score").alias("risk_score"),
923
+ F.col("hcc_results.risk_score_demographics").alias("risk_score_demographics"),
924
+ F.col("hcc_results.risk_score_chronic_only").alias("risk_score_chronic_only"),
925
+ F.col("hcc_results.risk_score_hcc").alias("risk_score_hcc"),
926
+ F.col("hcc_results.hcc_list").alias("hcc_list")
927
+ ).drop("hcc_results")
928
+ ```
929
+
930
+ **Performance Tips**:
931
+ - **Repartition** your DataFrame before applying the UDF to balance workload across executors
932
+ - **Cache** the processor initialization by defining it at module level
933
+ - **Batch size**: pandas_udf processes data in batches; Spark handles optimal batch sizing automatically
934
+ - **Install hccinfhir** on all cluster nodes: `%pip install hccinfhir` in a notebook cell or add to cluster init script
935
+
936
+ **Extended Schema with Demographics**:
937
+
938
+ ```python
939
+ # Include additional demographic parameters
940
+ @pandas_udf(hcc_schema)
941
+ def calculate_hcc_full(
942
+ age_series: pd.Series,
943
+ sex_series: pd.Series,
944
+ dual_status_series: pd.Series,
945
+ diagnosis_series: pd.Series
946
+ ) -> pd.DataFrame:
947
+ results = []
948
+
949
+ for age, sex, dual_status, diagnosis_codes in zip(
950
+ age_series, sex_series, dual_status_series, diagnosis_series
951
+ ):
952
+ try:
953
+ demographics = Demographics(
954
+ age=int(age),
955
+ sex=sex,
956
+ dual_elgbl_cd=dual_status if dual_status else "00"
957
+ )
958
+ result = hcc_processor.calculate_from_diagnosis(diagnosis_codes, demographics)
959
+
960
+ results.append({
961
+ 'risk_score': float(result.risk_score),
962
+ 'risk_score_demographics': float(result.risk_score_demographics),
963
+ 'risk_score_chronic_only': float(result.risk_score_chronic_only),
964
+ 'risk_score_hcc': float(result.risk_score_hcc),
965
+ 'hcc_list': result.hcc_list
966
+ })
967
+ except Exception as e:
968
+ results.append({
969
+ 'risk_score': None,
970
+ 'risk_score_demographics': None,
971
+ 'risk_score_chronic_only': None,
972
+ 'risk_score_hcc': None,
973
+ 'hcc_list': None
974
+ })
975
+
976
+ return pd.DataFrame(results)
977
+ ```
978
+
837
979
  ### Converting to Dictionaries
838
980
 
839
981
  All Pydantic models support dictionary conversion for JSON serialization, database storage, or legacy code:
@@ -914,7 +1056,7 @@ hatch shell
914
1056
  # Install in development mode
915
1057
  pip install -e .
916
1058
 
917
- # Run all tests (155 tests)
1059
+ # Run all tests (181 tests)
918
1060
  pytest tests/
919
1061
 
920
1062
  # Run specific test file
@@ -48,6 +48,7 @@ print(f"HCCs: {result.hcc_list}")
48
48
  - [Demographic Prefix Override](#demographic-prefix-override)
49
49
  - [Custom File Path Resolution](#custom-file-path-resolution)
50
50
  - [Batch Processing](#batch-processing)
51
+ - [Large-Scale Processing with Databricks](#large-scale-processing-with-databricks)
51
52
  - [Converting to Dictionaries](#converting-to-dictionaries)
52
53
  - [Sample Data](#sample-data)
53
54
  - [Testing](#testing)
@@ -64,7 +65,7 @@ print(f"HCCs: {result.hcc_list}")
64
65
  - **Custom Data Files**: Full support for custom coefficients, mappings, and hierarchies
65
66
  - **Flexible File Resolution**: Absolute paths, relative paths, or bundled data files
66
67
  - **Type-Safe**: Built on Pydantic with full type hints
67
- - **Well-Tested**: 155 comprehensive tests covering all features
68
+ - **Well-Tested**: 181 comprehensive tests covering all features
68
69
 
69
70
  ## 📊 Data Sources & Use Cases
70
71
 
@@ -820,6 +821,146 @@ with open("risk_scores.json", "w") as f:
820
821
  json.dump(results, f, indent=2)
821
822
  ```
822
823
 
824
+ ### Large-Scale Processing with Databricks
825
+
826
+ For processing millions of beneficiaries, use PySpark's `pandas_udf` for distributed computation. The hccinfhir logic is well-suited for batch operations with clear, simple transformations.
827
+
828
+ **Performance Benchmark**:
829
+
830
+ ![Databricks Performance Chart](hccinfhir_pandas_udf_performance_chart.png)
831
+
832
+ *Tested with ACO data on Databricks Runtime 17.3 LTS, Worker: i3.4xlarge (122GB, 16 cores)*
833
+
834
+ The chart shows execution time varies based on condition complexity - members with more diagnoses require additional internal processing loops. While the relationship isn't perfectly linear, **1 million members can be processed in under 2 minutes** with this configuration.
835
+
836
+ ```python
837
+ from pyspark.sql import SparkSession
838
+ from pyspark.sql.types import StructType, StructField, FloatType, ArrayType, StringType
839
+ from pyspark.sql import functions as F
840
+ from pyspark.sql.functions import pandas_udf
841
+ import pandas as pd
842
+
843
+ from hccinfhir import HCCInFHIR, Demographics
844
+
845
+ # Define the return schema
846
+ hcc_schema = StructType([
847
+ StructField("risk_score", FloatType(), True),
848
+ StructField("risk_score_demographics", FloatType(), True),
849
+ StructField("risk_score_chronic_only", FloatType(), True),
850
+ StructField("risk_score_hcc", FloatType(), True),
851
+ StructField("hcc_list", ArrayType(StringType()), True)
852
+ ])
853
+
854
+ # Initialize processor (will be serialized to each executor)
855
+ hcc_processor = HCCInFHIR(model_name="CMS-HCC Model V28")
856
+
857
+ # Create the pandas UDF
858
+ @pandas_udf(hcc_schema)
859
+ def calculate_hcc(
860
+ age_series: pd.Series,
861
+ sex_series: pd.Series,
862
+ diagnosis_series: pd.Series
863
+ ) -> pd.DataFrame:
864
+ results = []
865
+
866
+ for age, sex, diagnosis_codes in zip(age_series, sex_series, diagnosis_series):
867
+ try:
868
+ demographics = Demographics(age=int(age), sex=sex)
869
+
870
+ # diagnosis_codes can be passed directly - accepts any iterable including numpy arrays
871
+ result = hcc_processor.calculate_from_diagnosis(diagnosis_codes, demographics)
872
+
873
+ results.append({
874
+ 'risk_score': float(result.risk_score),
875
+ 'risk_score_demographics': float(result.risk_score_demographics),
876
+ 'risk_score_chronic_only': float(result.risk_score_chronic_only),
877
+ 'risk_score_hcc': float(result.risk_score_hcc),
878
+ 'hcc_list': result.hcc_list
879
+ })
880
+ except Exception as e:
881
+ # Log error and return nulls for failed rows
882
+ print(f"ERROR processing row: {e}")
883
+ results.append({
884
+ 'risk_score': None,
885
+ 'risk_score_demographics': None,
886
+ 'risk_score_chronic_only': None,
887
+ 'risk_score_hcc': None,
888
+ 'hcc_list': None
889
+ })
890
+
891
+ return pd.DataFrame(results)
892
+
893
+ # Apply the UDF to your DataFrame
894
+ # Assumes df has columns: age, patient_gender, diagnosis_codes (array of strings)
895
+ df = df.withColumn(
896
+ "hcc_results",
897
+ calculate_hcc(
898
+ F.col("age"),
899
+ F.col("patient_gender"),
900
+ F.col("diagnosis_codes")
901
+ )
902
+ )
903
+
904
+ # Expand the struct into separate columns
905
+ df = df.select(
906
+ "*",
907
+ F.col("hcc_results.risk_score").alias("risk_score"),
908
+ F.col("hcc_results.risk_score_demographics").alias("risk_score_demographics"),
909
+ F.col("hcc_results.risk_score_chronic_only").alias("risk_score_chronic_only"),
910
+ F.col("hcc_results.risk_score_hcc").alias("risk_score_hcc"),
911
+ F.col("hcc_results.hcc_list").alias("hcc_list")
912
+ ).drop("hcc_results")
913
+ ```
914
+
915
+ **Performance Tips**:
916
+ - **Repartition** your DataFrame before applying the UDF to balance workload across executors
917
+ - **Cache** the processor initialization by defining it at module level
918
+ - **Batch size**: pandas_udf processes data in batches; Spark handles optimal batch sizing automatically
919
+ - **Install hccinfhir** on all cluster nodes: `%pip install hccinfhir` in a notebook cell or add to cluster init script
920
+
921
+ **Extended Schema with Demographics**:
922
+
923
+ ```python
924
+ # Include additional demographic parameters
925
+ @pandas_udf(hcc_schema)
926
+ def calculate_hcc_full(
927
+ age_series: pd.Series,
928
+ sex_series: pd.Series,
929
+ dual_status_series: pd.Series,
930
+ diagnosis_series: pd.Series
931
+ ) -> pd.DataFrame:
932
+ results = []
933
+
934
+ for age, sex, dual_status, diagnosis_codes in zip(
935
+ age_series, sex_series, dual_status_series, diagnosis_series
936
+ ):
937
+ try:
938
+ demographics = Demographics(
939
+ age=int(age),
940
+ sex=sex,
941
+ dual_elgbl_cd=dual_status if dual_status else "00"
942
+ )
943
+ result = hcc_processor.calculate_from_diagnosis(diagnosis_codes, demographics)
944
+
945
+ results.append({
946
+ 'risk_score': float(result.risk_score),
947
+ 'risk_score_demographics': float(result.risk_score_demographics),
948
+ 'risk_score_chronic_only': float(result.risk_score_chronic_only),
949
+ 'risk_score_hcc': float(result.risk_score_hcc),
950
+ 'hcc_list': result.hcc_list
951
+ })
952
+ except Exception as e:
953
+ results.append({
954
+ 'risk_score': None,
955
+ 'risk_score_demographics': None,
956
+ 'risk_score_chronic_only': None,
957
+ 'risk_score_hcc': None,
958
+ 'hcc_list': None
959
+ })
960
+
961
+ return pd.DataFrame(results)
962
+ ```
963
+
823
964
  ### Converting to Dictionaries
824
965
 
825
966
  All Pydantic models support dictionary conversion for JSON serialization, database storage, or legacy code:
@@ -900,7 +1041,7 @@ hatch shell
900
1041
  # Install in development mode
901
1042
  pip install -e .
902
1043
 
903
- # Run all tests (155 tests)
1044
+ # Run all tests (181 tests)
904
1045
  pytest tests/
905
1046
 
906
1047
  # Run specific test file
@@ -9,7 +9,7 @@ from .hccinfhir import HCCInFHIR
9
9
  from .extractor import extract_sld, extract_sld_list
10
10
  from .filter import apply_filter
11
11
  from .model_calculate import calculate_raf
12
- from .datamodels import Demographics, ServiceLevelData, RAFResult, ModelName
12
+ from .datamodels import Demographics, ServiceLevelData, RAFResult, ModelName, HCCDetail
13
13
 
14
14
  # Sample data functions
15
15
  from .samples import (
@@ -37,6 +37,7 @@ __all__ = [
37
37
  "ServiceLevelData",
38
38
  "RAFResult",
39
39
  "ModelName",
40
+ "HCCDetail",
40
41
 
41
42
  # Sample data
42
43
  "SampleData",
@@ -0,0 +1,240 @@
1
+ """
2
+ CMS Risk Adjustment Domain Constants
3
+
4
+ This module contains constants used across the HCC risk adjustment system,
5
+ including dual eligibility codes, OREC/CREC values, and state-specific mappings.
6
+
7
+ References:
8
+ - CMS Rate Announcement and Call Letter
9
+ - Medicare Advantage Enrollment and Disenrollment Guidance
10
+ - X12 834 Implementation Guides
11
+ """
12
+
13
+ from typing import Set, Dict
14
+
15
+ # =============================================================================
16
+ # DUAL ELIGIBILITY CODES
17
+ # =============================================================================
18
+ # CMS Dual Eligibility Status Codes (Medicare + Medicaid)
19
+ # Used in coefficient prefix selection (CNA_, CFA_, CPA_, etc.)
20
+
21
+ VALID_DUAL_CODES: Set[str] = {'00', '01', '02', '03', '04', '05', '06', '08'}
22
+
23
+ # Non-Dual Eligible
24
+ NON_DUAL_CODE: str = '00'
25
+
26
+ # Full Benefit Dual Eligible (receive both Medicare and full Medicaid benefits)
27
+ # Uses CFA_ (Community, Full Benefit Dual, Aged) or CFD_ (Disabled) prefixes
28
+ FULL_BENEFIT_DUAL_CODES: Set[str] = {
29
+ '02', # QMB Plus (Qualified Medicare Beneficiary Plus)
30
+ '04', # SLMB Plus (Specified Low-Income Medicare Beneficiary Plus)
31
+ '08', # Other Full Benefit Dual Eligible
32
+ }
33
+
34
+ # Partial Benefit Dual Eligible (Medicare + limited Medicaid)
35
+ # Uses CPA_ (Community, Partial Benefit Dual, Aged) or CPD_ (Disabled) prefixes
36
+ PARTIAL_BENEFIT_DUAL_CODES: Set[str] = {
37
+ '01', # QMB Only
38
+ '03', # SLMB Only
39
+ '05', # QDWI (Qualified Disabled and Working Individual)
40
+ '06', # QI (Qualifying Individual)
41
+ }
42
+
43
+ # =============================================================================
44
+ # OREC - Original Reason for Entitlement Code
45
+ # =============================================================================
46
+ # Determines if beneficiary has ESRD and affects coefficient prefix selection
47
+
48
+ VALID_OREC_VALUES: Set[str] = {'0', '1', '2', '3'}
49
+
50
+ OREC_DESCRIPTIONS: Dict[str, str] = {
51
+ '0': 'Old Age and Survivors Insurance (OASI)',
52
+ '1': 'Disability Insurance Benefits (DIB)',
53
+ '2': 'ESRD - End-Stage Renal Disease',
54
+ '3': 'DIB and ESRD',
55
+ }
56
+
57
+ # OREC codes indicating ESRD status (per CMS documentation)
58
+ OREC_ESRD_CODES: Set[str] = {'2', '3'}
59
+
60
+ # =============================================================================
61
+ # CREC - Current Reason for Entitlement Code
62
+ # =============================================================================
63
+ # Current entitlement status (may differ from OREC)
64
+
65
+ VALID_CREC_VALUES: Set[str] = {'0', '1', '2', '3'}
66
+
67
+ CREC_DESCRIPTIONS: Dict[str, str] = {
68
+ '0': 'Old Age and Survivors Insurance (OASI)',
69
+ '1': 'Disability Insurance Benefits (DIB)',
70
+ '2': 'ESRD - End-Stage Renal Disease',
71
+ '3': 'DIB and ESRD',
72
+ }
73
+
74
+ # CREC codes indicating ESRD status
75
+ CREC_ESRD_CODES: Set[str] = {'2', '3'}
76
+
77
+ # =============================================================================
78
+ # COEFFICIENT PREFIX GROUPS
79
+ # =============================================================================
80
+ # Used for prefix_override logic in model_demographics.py
81
+
82
+ # ESRD model prefixes
83
+ ESRD_PREFIXES: Set[str] = {'DI_', 'DNE_', 'GI_', 'GNE_', 'GFPA_', 'GFPN_', 'GNPA_', 'GNPN_'}
84
+
85
+ # CMS-HCC new enrollee prefixes
86
+ NEW_ENROLLEE_PREFIXES: Set[str] = {'NE_', 'SNPNE_', 'DNE_', 'GNE_'}
87
+
88
+ # CMS-HCC community prefixes
89
+ COMMUNITY_PREFIXES: Set[str] = {'CNA_', 'CND_', 'CFA_', 'CFD_', 'CPA_', 'CPD_'}
90
+
91
+ # Institutionalized prefixes
92
+ INSTITUTIONAL_PREFIXES: Set[str] = {'INS_', 'GI_'}
93
+
94
+ # Full Benefit Dual prefixes
95
+ FULL_BENEFIT_DUAL_PREFIXES: Set[str] = {'CFA_', 'CFD_', 'GFPA_', 'GFPN_'}
96
+
97
+ # Partial Benefit Dual prefixes
98
+ PARTIAL_BENEFIT_DUAL_PREFIXES: Set[str] = {'CPA_', 'CPD_'}
99
+
100
+ # Non-Dual prefixes
101
+ NON_DUAL_PREFIXES: Set[str] = {'CNA_', 'CND_', 'GNPA_', 'GNPN_'}
102
+
103
+ # =============================================================================
104
+ # DEMOGRAPHIC CODES
105
+ # =============================================================================
106
+
107
+ VALID_SEX_CODES: Set[str] = {'M', 'F'}
108
+
109
+ # X12 834 Gender Code mappings
110
+ X12_SEX_CODE_MAPPING: Dict[str, str] = {
111
+ 'M': 'M',
112
+ 'F': 'F',
113
+ '1': 'M', # X12 numeric code
114
+ '2': 'F', # X12 numeric code
115
+ }
116
+
117
+ # =============================================================================
118
+ # X12 834 MAINTENANCE TYPE CODES
119
+ # =============================================================================
120
+ # INS03 - Maintenance Type Code
121
+
122
+ MAINTENANCE_TYPE_CHANGE: str = '001'
123
+ MAINTENANCE_TYPE_ADD: str = '021'
124
+ MAINTENANCE_TYPE_CANCEL: str = '024'
125
+ MAINTENANCE_TYPE_REINSTATE: str = '025'
126
+
127
+ MAINTENANCE_TYPE_DESCRIPTIONS: Dict[str, str] = {
128
+ '001': 'Change',
129
+ '021': 'Addition',
130
+ '024': 'Cancellation/Termination',
131
+ '025': 'Reinstatement',
132
+ }
133
+
134
+ # =============================================================================
135
+ # STATE-SPECIFIC MAPPINGS
136
+ # =============================================================================
137
+
138
+ # -----------------------------------------------------------------------------
139
+ # California DHCS Medi-Cal Aid Codes
140
+ # -----------------------------------------------------------------------------
141
+ # Maps California-specific aid codes to CMS dual eligibility codes
142
+ # Source: California DHCS 834 Implementation Guide
143
+
144
+ MEDI_CAL_AID_CODES: Dict[str, str] = {
145
+ # Full Benefit Dual (QMB Plus, SLMB Plus)
146
+ '4N': '02', # QMB Plus - Aged
147
+ '4P': '02', # QMB Plus - Disabled
148
+ '5B': '04', # SLMB Plus - Aged
149
+ '5D': '04', # SLMB Plus - Disabled
150
+
151
+ # Partial Benefit Dual (QMB Only, SLMB Only, QI)
152
+ '4M': '01', # QMB Only - Aged
153
+ '4O': '01', # QMB Only - Disabled
154
+ '5A': '03', # SLMB Only - Aged
155
+ '5C': '03', # SLMB Only - Disabled
156
+ '5E': '06', # QI - Aged
157
+ '5F': '06', # QI - Disabled
158
+ }
159
+
160
+ # -----------------------------------------------------------------------------
161
+ # Medicare Status Code Mappings
162
+ # -----------------------------------------------------------------------------
163
+ # Maps Medicare status codes (from various sources) to CMS dual eligibility codes
164
+ # Used in X12 834 REF*ABB segment and other payer files
165
+
166
+ MEDICARE_STATUS_CODE_MAPPING: Dict[str, str] = {
167
+ # QMB - Qualified Medicare Beneficiary
168
+ 'QMB': '01', # QMB Only (Partial)
169
+ 'QMBONLY': '01',
170
+ 'QMBPLUS': '02', # QMB Plus (Full Benefit)
171
+ 'QMB+': '02',
172
+
173
+ # SLMB - Specified Low-Income Medicare Beneficiary
174
+ 'SLMB': '03', # SLMB Only (Partial)
175
+ 'SLMBONLY': '03',
176
+ 'SLMBPLUS': '04', # SLMB Plus (Full Benefit)
177
+ 'SLMB+': '04',
178
+
179
+ # Other dual eligibility programs
180
+ 'QDWI': '05', # Qualified Disabled and Working Individual
181
+ 'QI': '06', # Qualifying Individual
182
+ 'QI1': '06',
183
+ 'FBDE': '08', # Full Benefit Dual Eligible (Other)
184
+ 'OTHERFULL': '08',
185
+ }
186
+
187
+ # =============================================================================
188
+ # HELPER FUNCTIONS
189
+ # =============================================================================
190
+
191
+ def is_full_benefit_dual(dual_code: str) -> bool:
192
+ """Check if dual eligibility code is Full Benefit Dual"""
193
+ return dual_code in FULL_BENEFIT_DUAL_CODES
194
+
195
+ def is_partial_benefit_dual(dual_code: str) -> bool:
196
+ """Check if dual eligibility code is Partial Benefit Dual"""
197
+ return dual_code in PARTIAL_BENEFIT_DUAL_CODES
198
+
199
+ def is_esrd_by_orec(orec: str) -> bool:
200
+ """Check if OREC indicates ESRD status"""
201
+ return orec in OREC_ESRD_CODES
202
+
203
+ def is_esrd_by_crec(crec: str) -> bool:
204
+ """Check if CREC indicates ESRD status"""
205
+ return crec in CREC_ESRD_CODES
206
+
207
+ def normalize_medicare_status_code(status: str) -> str:
208
+ """Normalize Medicare status code (uppercase, no spaces/hyphens)"""
209
+ if not status:
210
+ return ''
211
+ return status.upper().replace(' ', '').replace('-', '')
212
+
213
+ def map_medicare_status_to_dual_code(status: str) -> str:
214
+ """Map Medicare status code to dual eligibility code
215
+
216
+ Args:
217
+ status: Medicare status code (e.g., 'QMB Plus', 'SLMB', 'QI')
218
+
219
+ Returns:
220
+ Dual eligibility code ('01'-'08') or '00' if not found
221
+ """
222
+ if not status:
223
+ return NON_DUAL_CODE
224
+
225
+ normalized = normalize_medicare_status_code(status)
226
+ return MEDICARE_STATUS_CODE_MAPPING.get(normalized, NON_DUAL_CODE)
227
+
228
+ def map_aid_code_to_dual_status(aid_code: str) -> str:
229
+ """Map California Medi-Cal aid code to dual eligibility code
230
+
231
+ Args:
232
+ aid_code: California aid code (e.g., '4N', '5B')
233
+
234
+ Returns:
235
+ Dual eligibility code ('01'-'08') or '00' if not found
236
+ """
237
+ if not aid_code:
238
+ return NON_DUAL_CODE
239
+
240
+ return MEDI_CAL_AID_CODES.get(aid_code, NON_DUAL_CODE)