hccinfhir 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hccinfhir
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: HCC Algorithm for FHIR Resources
5
5
  Project-URL: Homepage, https://github.com/mimilabs/hccinfhir
6
6
  Project-URL: Issues, https://github.com/mimilabs/hccinfhir/issues
@@ -10,6 +10,7 @@ Classifier: Operating System :: OS Independent
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Requires-Python: >=3.8
12
12
  Requires-Dist: pydantic>=2.10.3
13
+ Requires-Dist: typing-extensions>=4.6.0
13
14
  Description-Content-Type: text/markdown
14
15
 
15
16
  # HCCInFHIR
@@ -62,6 +63,7 @@ print(f"HCCs: {result.hcc_list}")
62
63
  - [Demographic Prefix Override](#demographic-prefix-override)
63
64
  - [Custom File Path Resolution](#custom-file-path-resolution)
64
65
  - [Batch Processing](#batch-processing)
66
+ - [Large-Scale Processing with Databricks](#large-scale-processing-with-databricks)
65
67
  - [Converting to Dictionaries](#converting-to-dictionaries)
66
68
  - [Sample Data](#sample-data)
67
69
  - [Testing](#testing)
@@ -78,7 +80,7 @@ print(f"HCCs: {result.hcc_list}")
78
80
  - **Custom Data Files**: Full support for custom coefficients, mappings, and hierarchies
79
81
  - **Flexible File Resolution**: Absolute paths, relative paths, or bundled data files
80
82
  - **Type-Safe**: Built on Pydantic with full type hints
81
- - **Well-Tested**: 155 comprehensive tests covering all features
83
+ - **Well-Tested**: 181 comprehensive tests covering all features
82
84
 
83
85
  ## 📊 Data Sources & Use Cases
84
86
 
@@ -834,6 +836,146 @@ with open("risk_scores.json", "w") as f:
834
836
  json.dump(results, f, indent=2)
835
837
  ```
836
838
 
839
+ ### Large-Scale Processing with Databricks
840
+
841
+ For processing millions of beneficiaries, use PySpark's `pandas_udf` for distributed computation. The hccinfhir logic is well-suited for batch operations with clear, simple transformations.
842
+
843
+ **Performance Benchmark**:
844
+
845
+ ![Databricks Performance Chart](hccinfhir_pandas_udf_performance_chart.png)
846
+
847
+ *Tested with ACO data on Databricks Runtime 17.3 LTS, Worker: i3.4xlarge (122GB, 16 cores)*
848
+
849
+ The chart shows execution time varies based on condition complexity - members with more diagnoses require additional internal processing loops. While the relationship isn't perfectly linear, **1 million members can be processed in under 2 minutes** with this configuration.
850
+
851
+ ```python
852
+ from pyspark.sql import SparkSession
853
+ from pyspark.sql.types import StructType, StructField, FloatType, ArrayType, StringType
854
+ from pyspark.sql import functions as F
855
+ from pyspark.sql.functions import pandas_udf
856
+ import pandas as pd
857
+
858
+ from hccinfhir import HCCInFHIR, Demographics
859
+
860
+ # Define the return schema
861
+ hcc_schema = StructType([
862
+ StructField("risk_score", FloatType(), True),
863
+ StructField("risk_score_demographics", FloatType(), True),
864
+ StructField("risk_score_chronic_only", FloatType(), True),
865
+ StructField("risk_score_hcc", FloatType(), True),
866
+ StructField("hcc_list", ArrayType(StringType()), True)
867
+ ])
868
+
869
+ # Initialize processor (will be serialized to each executor)
870
+ hcc_processor = HCCInFHIR(model_name="CMS-HCC Model V28")
871
+
872
+ # Create the pandas UDF
873
+ @pandas_udf(hcc_schema)
874
+ def calculate_hcc(
875
+ age_series: pd.Series,
876
+ sex_series: pd.Series,
877
+ diagnosis_series: pd.Series
878
+ ) -> pd.DataFrame:
879
+ results = []
880
+
881
+ for age, sex, diagnosis_codes in zip(age_series, sex_series, diagnosis_series):
882
+ try:
883
+ demographics = Demographics(age=int(age), sex=sex)
884
+
885
+ # diagnosis_codes can be passed directly - accepts any iterable including numpy arrays
886
+ result = hcc_processor.calculate_from_diagnosis(diagnosis_codes, demographics)
887
+
888
+ results.append({
889
+ 'risk_score': float(result.risk_score),
890
+ 'risk_score_demographics': float(result.risk_score_demographics),
891
+ 'risk_score_chronic_only': float(result.risk_score_chronic_only),
892
+ 'risk_score_hcc': float(result.risk_score_hcc),
893
+ 'hcc_list': result.hcc_list
894
+ })
895
+ except Exception as e:
896
+ # Log error and return nulls for failed rows
897
+ print(f"ERROR processing row: {e}")
898
+ results.append({
899
+ 'risk_score': None,
900
+ 'risk_score_demographics': None,
901
+ 'risk_score_chronic_only': None,
902
+ 'risk_score_hcc': None,
903
+ 'hcc_list': None
904
+ })
905
+
906
+ return pd.DataFrame(results)
907
+
908
+ # Apply the UDF to your DataFrame
909
+ # Assumes df has columns: age, patient_gender, diagnosis_codes (array of strings)
910
+ df = df.withColumn(
911
+ "hcc_results",
912
+ calculate_hcc(
913
+ F.col("age"),
914
+ F.col("patient_gender"),
915
+ F.col("diagnosis_codes")
916
+ )
917
+ )
918
+
919
+ # Expand the struct into separate columns
920
+ df = df.select(
921
+ "*",
922
+ F.col("hcc_results.risk_score").alias("risk_score"),
923
+ F.col("hcc_results.risk_score_demographics").alias("risk_score_demographics"),
924
+ F.col("hcc_results.risk_score_chronic_only").alias("risk_score_chronic_only"),
925
+ F.col("hcc_results.risk_score_hcc").alias("risk_score_hcc"),
926
+ F.col("hcc_results.hcc_list").alias("hcc_list")
927
+ ).drop("hcc_results")
928
+ ```
929
+
930
+ **Performance Tips**:
931
+ - **Repartition** your DataFrame before applying the UDF to balance workload across executors
932
+ - **Cache** the processor initialization by defining it at module level
933
+ - **Batch size**: pandas_udf processes data in batches; Spark handles optimal batch sizing automatically
934
+ - **Install hccinfhir** on all cluster nodes: `%pip install hccinfhir` in a notebook cell or add to cluster init script
935
+
936
+ **Extended Schema with Demographics**:
937
+
938
+ ```python
939
+ # Include additional demographic parameters
940
+ @pandas_udf(hcc_schema)
941
+ def calculate_hcc_full(
942
+ age_series: pd.Series,
943
+ sex_series: pd.Series,
944
+ dual_status_series: pd.Series,
945
+ diagnosis_series: pd.Series
946
+ ) -> pd.DataFrame:
947
+ results = []
948
+
949
+ for age, sex, dual_status, diagnosis_codes in zip(
950
+ age_series, sex_series, dual_status_series, diagnosis_series
951
+ ):
952
+ try:
953
+ demographics = Demographics(
954
+ age=int(age),
955
+ sex=sex,
956
+ dual_elgbl_cd=dual_status if dual_status else "00"
957
+ )
958
+ result = hcc_processor.calculate_from_diagnosis(diagnosis_codes, demographics)
959
+
960
+ results.append({
961
+ 'risk_score': float(result.risk_score),
962
+ 'risk_score_demographics': float(result.risk_score_demographics),
963
+ 'risk_score_chronic_only': float(result.risk_score_chronic_only),
964
+ 'risk_score_hcc': float(result.risk_score_hcc),
965
+ 'hcc_list': result.hcc_list
966
+ })
967
+ except Exception as e:
968
+ results.append({
969
+ 'risk_score': None,
970
+ 'risk_score_demographics': None,
971
+ 'risk_score_chronic_only': None,
972
+ 'risk_score_hcc': None,
973
+ 'hcc_list': None
974
+ })
975
+
976
+ return pd.DataFrame(results)
977
+ ```
978
+
837
979
  ### Converting to Dictionaries
838
980
 
839
981
  All Pydantic models support dictionary conversion for JSON serialization, database storage, or legacy code:
@@ -914,7 +1056,7 @@ hatch shell
914
1056
  # Install in development mode
915
1057
  pip install -e .
916
1058
 
917
- # Run all tests (155 tests)
1059
+ # Run all tests (181 tests)
918
1060
  pytest tests/
919
1061
 
920
1062
  # Run specific test file
@@ -1,20 +1,21 @@
1
- hccinfhir/__init__.py,sha256=CKpYTUSzZdP3s1eB74w5JTe9OS3MtcvuUkv6ymgSyic,1085
2
- hccinfhir/datamodels.py,sha256=NULDYb57R61v4EklOI_AAIuC1-OkLFH1InbAad48dZM,10601
3
- hccinfhir/defaults.py,sha256=tMNym0R6Nr6ibKTqOu6N1vLcdekL0ZmHyDNIOCOMsP4,1292
1
+ hccinfhir/__init__.py,sha256=3aFYtjTklZJg3wIlnMJNgfDBaDCfKXVlYsacdsZ9L4I,1113
2
+ hccinfhir/constants.py,sha256=C4Vyjtzgyd4Jm2I2X6cTYQZLe-jAMC8boUcy-7OXQDQ,8473
3
+ hccinfhir/datamodels.py,sha256=X9ZHPCfxI12o88AFmeomzSxh5fA6tsDOAmTlXbdncJ8,11471
4
+ hccinfhir/defaults.py,sha256=aKdXPhf9bYUzpGvXM1GIXZaKxqkKInt3v9meLB9fWog,1394
4
5
  hccinfhir/extractor.py,sha256=xL9c2VT-e2I7_c8N8j4Og42UEgVuCzyn9WFp3ntM5Ro,1822
5
- hccinfhir/extractor_834.py,sha256=vODcD53iU5ZwQsSbBE8Gix9D-0fz-EhwkmjqRO6LML8,19541
6
- hccinfhir/extractor_837.py,sha256=D60gUFtMk2S0NrJ0iq3ENo35yIwBmBQvF5TurJgRIa8,15327
6
+ hccinfhir/extractor_834.py,sha256=dIqovUOWm_7k_c6sUqTIzQua_kTQ8dLGy3-4-LECW3Y,18855
7
+ hccinfhir/extractor_837.py,sha256=fGsvBTWIj9dsHLGGR67AdlYDSsFi5qnSVlTgwkL1f-E,15334
7
8
  hccinfhir/extractor_fhir.py,sha256=wUN3vTm1oTZ-KvfcDebnpQMxAC-7YlRKv12Wrv3p85A,8490
8
9
  hccinfhir/filter.py,sha256=j_yD2g6RBXVUV9trKkWzsQ35x3fRvfKUPvEXKUefI64,2007
9
- hccinfhir/hccinfhir.py,sha256=rCnExvxZGKi1vLD4cHQ0nzPAGV6e-8C15MtJ2p7zAAk,11160
10
- hccinfhir/model_calculate.py,sha256=KSeZjKYBCfBYYIWOIckDg941OC8050MX2F7BZ2l3V8g,7663
11
- hccinfhir/model_coefficients.py,sha256=--8Gh5gYJez1v0cBA1ZqDn0QI6On-ia1-wPr-mqbRFs,4680
12
- hccinfhir/model_demographics.py,sha256=CR4WC8XVq-CI1nYJoVFc5-KXTw-pKoVlHkHqfnXlnj0,9121
10
+ hccinfhir/hccinfhir.py,sha256=NydnH3WBvuyskn76hY70LpUS6XuIEoax_kip1mgfpHw,11225
11
+ hccinfhir/model_calculate.py,sha256=_TUWNVUsBym0pre3wltXvRuipQaONQ0QBfWPFNAeDsQ,8347
12
+ hccinfhir/model_coefficients.py,sha256=5n3QzHX6FJ3MlO0cV9NS7Bqt-lxzVvT_M3zFaWq6Gng,4685
13
+ hccinfhir/model_demographics.py,sha256=nImKtJCq1HkR9w2GU8aikybJFgow71CPufBRV8Jn7fM,8932
13
14
  hccinfhir/model_dx_to_cc.py,sha256=Yjc6xKI-jMXsbOzS_chc4NI15Bwagb7BwZZ8cKQaTbk,1540
14
15
  hccinfhir/model_hierarchies.py,sha256=cboUnSHZZfOxA8QZKV4QIE-32duElssML32OqYT-65g,1542
15
- hccinfhir/model_interactions.py,sha256=xdsTuc3ii8U_MaPpYv0SnR4eR_w72eHKUJn1mwmZHm4,21379
16
+ hccinfhir/model_interactions.py,sha256=g6jK27Xu8RQUHS3lk4sk2v6w6wqd52mdbGn0BsnR7Pk,21394
16
17
  hccinfhir/samples.py,sha256=2VSWS81cv9EnaHqK7sd6CjwG6FUI9E--5wHgD000REI,9952
17
- hccinfhir/utils.py,sha256=9ki4o1wXyAYYr8BR9Skkz0PKL_1H_HYNV4LalEsASE0,8260
18
+ hccinfhir/utils.py,sha256=hQgHjuOcEQcnxemTZwqFBHWvLC5-C1Gup9cDXEYlZjE,10770
18
19
  hccinfhir/data/__init__.py,sha256=SGiSkpGrnxbvtEFMMlk82NFHOE50hFXcgKwKUSuVZUg,45
19
20
  hccinfhir/data/hcc_is_chronic.csv,sha256=Bwd-RND6SdEsKP-assoBaXnjUJAuDXhSkwWlymux72Y,19701
20
21
  hccinfhir/data/hcc_is_chronic_without_esrd_model.csv,sha256=eVVI4_8mQNkiBiNO3kattfT_zfcV18XgmiltdzZEXSo,17720
@@ -28,6 +29,7 @@ hccinfhir/data/ra_eligible_cpt_hcpcs_2025.csv,sha256=-tMvv2su5tsSbGUh6fZZCMUEkXI
28
29
  hccinfhir/data/ra_eligible_cpt_hcpcs_2026.csv,sha256=EYGN7k_rgCpJe59lL_yNInUcCkdETDWGSFTXII3LZ0Y,40497
29
30
  hccinfhir/data/ra_hierarchies_2025.csv,sha256=HQSPNloe6mvvwMgv8ZwYAfWKkT2b2eUvm4JQy6S_mVQ,13045
30
31
  hccinfhir/data/ra_hierarchies_2026.csv,sha256=A6ZQZb0rpRWrySBB_KA5S4PGtMxWuzB2guU3aBE09v0,19596
32
+ hccinfhir/data/ra_labels_2026.csv,sha256=YstfP7s-3ZwjP4I_GYPPj3_yn-PQK3Q0Q_MVYZhsfjY,50248
31
33
  hccinfhir/sample_files/__init__.py,sha256=SGiSkpGrnxbvtEFMMlk82NFHOE50hFXcgKwKUSuVZUg,45
32
34
  hccinfhir/sample_files/sample_834_01.txt,sha256=J2HMXfY6fAFpV36rvLQ3QymRRS2TPqf3TQY6CNS7TrE,1627
33
35
  hccinfhir/sample_files/sample_837_0.txt,sha256=eggrD259uHa05z2dfxWBpUDseSDp_AQcLyN_adpHyTw,5295
@@ -47,7 +49,7 @@ hccinfhir/sample_files/sample_eob_1.json,sha256=_NGSVR2ysFpx-DcTvyga6dFCzhQ8Vi9f
47
49
  hccinfhir/sample_files/sample_eob_2.json,sha256=FcnJcx0ApOczxjJ_uxVLzCep9THfNf4xs9Yf7hxk8e4,1769
48
50
  hccinfhir/sample_files/sample_eob_200.ndjson,sha256=CxpjeQ1DCMUzZILaM68UEhfxO0p45YGhDDoCZeq8PxU,1917986
49
51
  hccinfhir/sample_files/sample_eob_3.json,sha256=4BW4wOMBEEU9RDfJR15rBEvk0KNHyuMEh3e055y87Hc,2306
50
- hccinfhir-0.2.0.dist-info/METADATA,sha256=bRJ_IYdfpYNRnKLYeqyhVlKTob5K69SeCiC7FGzX8vM,31674
51
- hccinfhir-0.2.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
52
- hccinfhir-0.2.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
- hccinfhir-0.2.0.dist-info/RECORD,,
52
+ hccinfhir-0.2.2.dist-info/METADATA,sha256=PS0FbHZTjJ4ALMCISUyDf9h2hWCzNZvgThIVI8rdey0,37122
53
+ hccinfhir-0.2.2.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
54
+ hccinfhir-0.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
55
+ hccinfhir-0.2.2.dist-info/RECORD,,