hccinfhir 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hccinfhir/hccinfhir.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import List, Dict, Any, Union, Optional, Tuple, Set
1
+ from typing import List, Dict, Any, Union, Optional, Tuple, Set, Iterable
2
2
  from hccinfhir.extractor import extract_sld_list
3
3
  from hccinfhir.filter import apply_filter
4
4
  from hccinfhir.model_calculate import calculate_raf
@@ -184,16 +184,16 @@ class HCCInFHIR:
184
184
  # Create new result with service data included
185
185
  return raf_result.model_copy(update={'service_level_data': standardized_data})
186
186
 
187
- def calculate_from_diagnosis(self, diagnosis_codes: List[str],
187
+ def calculate_from_diagnosis(self, diagnosis_codes: Iterable[str],
188
188
  demographics: Union[Demographics, Dict[str, Any]],
189
189
  prefix_override: Optional[PrefixOverride] = None,
190
190
  maci: float = 0.0,
191
191
  norm_factor: float = 1.0,
192
192
  frailty_score: float = 0.0) -> RAFResult:
193
- """Calculate RAF scores from a list of diagnosis codes.
193
+ """Calculate RAF scores from diagnosis codes.
194
194
 
195
195
  Args:
196
- diagnosis_codes: List of diagnosis codes
196
+ diagnosis_codes: Iterable of diagnosis codes (list, tuple, numpy array, etc.)
197
197
  demographics: Demographics information
198
198
  prefix_override: Optional prefix to override auto-detected demographic prefix.
199
199
  Use when demographic categorization is incorrect (e.g., ESRD patients with orec=0).
@@ -201,14 +201,14 @@ class HCCInFHIR:
201
201
  norm_factor: Normalization factor (default 1.0)
202
202
  frailty_score: Frailty adjustment score (default 0.0)
203
203
 
204
- Raises:
205
- ValueError: If diagnosis_codes is empty or not a list
204
+ Returns:
205
+ RAFResult object containing calculated scores
206
206
  """
207
- if not isinstance(diagnosis_codes, list):
208
- raise ValueError("diagnosis_codes must be a list")
209
-
207
+ # Convert to list to ensure consistent handling downstream
208
+ diagnosis_list = list(diagnosis_codes) if diagnosis_codes is not None else []
209
+
210
210
  demographics = self._ensure_demographics(demographics)
211
211
  raf_result = self._calculate_raf_from_demographics_and_dx_codes(
212
- diagnosis_codes, demographics, prefix_override, maci, norm_factor, frailty_score
212
+ diagnosis_list, demographics, prefix_override, maci, norm_factor, frailty_score
213
213
  )
214
214
  return raf_result
@@ -1,11 +1,11 @@
1
1
  from typing import List, Union, Dict, Tuple, Set, Optional
2
- from hccinfhir.datamodels import ModelName, RAFResult, PrefixOverride
2
+ from hccinfhir.datamodels import ModelName, RAFResult, PrefixOverride, HCCDetail
3
3
  from hccinfhir.model_demographics import categorize_demographics
4
4
  from hccinfhir.model_dx_to_cc import apply_mapping
5
5
  from hccinfhir.model_hierarchies import apply_hierarchies
6
6
  from hccinfhir.model_coefficients import apply_coefficients
7
7
  from hccinfhir.model_interactions import apply_interactions
8
- from hccinfhir.defaults import dx_to_cc_default, hierarchies_default, is_chronic_default, coefficients_default
8
+ from hccinfhir.defaults import dx_to_cc_default, hierarchies_default, is_chronic_default, coefficients_default, labels_default
9
9
 
10
10
  def calculate_raf(diagnosis_codes: List[str],
11
11
  model_name: ModelName = "CMS-HCC Model V28",
@@ -23,6 +23,7 @@ def calculate_raf(diagnosis_codes: List[str],
23
23
  is_chronic_mapping: Dict[Tuple[str, ModelName], bool] = is_chronic_default,
24
24
  hierarchies_mapping: Dict[Tuple[str, ModelName], Set[str]] = hierarchies_default,
25
25
  coefficients_mapping: Dict[Tuple[str, ModelName], float] = coefficients_default,
26
+ labels_mapping: Dict[Tuple[str, ModelName], str] = labels_default,
26
27
  prefix_override: Optional[PrefixOverride] = None,
27
28
  maci: float = 0.0,
28
29
  norm_factor: float = 1.0,
@@ -47,6 +48,7 @@ def calculate_raf(diagnosis_codes: List[str],
47
48
  is_chronic_mapping: Mapping of HCCs to a chronic flag for the selected model; defaults to packaged mappings.
48
49
  hierarchies_mapping: Mapping of parent HCCs to child HCCs for hierarchical rules; defaults to packaged 2026 mappings.
49
50
  coefficients_mapping: Mapping of coefficient names to values; defaults to packaged 2026 mappings.
51
+ labels_mapping: Mapping of (cc, model_name) to human-readable HCC labels; defaults to packaged 2026 mappings.
50
52
  prefix_override: Optional prefix to override auto-detected demographic prefix.
51
53
  Use when demographic categorization from orec/crec is incorrect.
52
54
  Common values: 'DI_' (ESRD Dialysis), 'DNE_' (ESRD Dialysis New Enrollee),
@@ -136,6 +138,19 @@ def calculate_raf(diagnosis_codes: List[str],
136
138
  risk_score_hcc = risk_score - risk_score_demographics
137
139
  risk_score_payment = risk_score * (1 - maci) / norm_factor + frailty_score
138
140
 
141
+ # Build HCC details with labels and chronic status
142
+ hcc_details = []
143
+ for hcc in hcc_set:
144
+ label = labels_mapping.get((hcc, model_name))
145
+ is_chronic = is_chronic_mapping.get((hcc, model_name), False)
146
+ coef = coefficients.get(hcc)
147
+ hcc_details.append(HCCDetail(
148
+ hcc=hcc,
149
+ label=label,
150
+ is_chronic=is_chronic,
151
+ coefficient=coef
152
+ ))
153
+
139
154
  return RAFResult(
140
155
  risk_score=risk_score,
141
156
  risk_score_demographics=risk_score_demographics,
@@ -143,6 +158,7 @@ def calculate_raf(diagnosis_codes: List[str],
143
158
  risk_score_hcc=risk_score_hcc,
144
159
  risk_score_payment=risk_score_payment,
145
160
  hcc_list=list(hcc_set),
161
+ hcc_details=hcc_details,
146
162
  cc_to_dx=cc_to_dx,
147
163
  coefficients=coefficients,
148
164
  interactions=interactions,
@@ -0,0 +1 @@
1
+ ISA*00* *00* *ZZ*CADHCS_5010_834*30*999999991 *250124*1927*^*00501*000000001*0*P*:~GS*BE*CADHCS_5010_834*999999991*20250124*192730*10000001*X*005010X220A1~ST*834*0001*005010X220A1~BGN*00*DHCS834-DA-20250124-Sample PACE-001*20250124*19273000****2~QTY*TO*1~N1*P5*California Department of Health Care Services.........*FI*999999990~N1*IN*Sample PACE Inc.*FI*999999991~INS*Y*18*001*AI*A*E**AC~REF*0F*randomParticipantID~REF*1L*randomCIN1~REF*17*;;202501;~REF*23*4;20200101;;~REF*3H*19;10;randomCaseNum1;;~REF*6O*;W;Y;;60;~REF*ZZ*01051;41609;;;;30451;41651;;;~NM1*IL*1*randomLastName*randomFirstName*A~PER*IP**TE*randomPhone~N3*randomStreetAddress1~N4*LOS ANGELES CA*CA*90019**CY*19~DMG*D8*randomDoB*F**7~LUI*LD*SPA**7~HD*021**LTC*010;51~DTP*348*D8*20250101~DTP*349*D8*20250131~REF*17*N;;;;;;;;;;;;;1~REF*9V*9;9;0~REF*CE*10;401;;;;;;~REF*RB*10~REF*ZX*19~REF*ZZ*;;10~SE*29*0001~GE*1*10000001~IEA*1*000000001~
@@ -0,0 +1 @@
1
+ ISA*00* *00* *ZZ*CADHCS_5010_834*30*999999992 *250812*1936*^*00501*000000002*0*P*:~GS*BE*CADHCS_5010_834*999999992*20250812*193641*10000002*X*005010X220A1~ST*834*0001*005010X220A1~BGN*00*DHCS834-DA-20250812-Sample South LA PACE-957-001*20250812*19364100****2~QTY*TO*1~N1*P5*California Department of Health Care Services.........*FI*999999990~N1*IN*Sample South LA PACE Inc.*FI*999999992~INS*Y*18*001*AI*A*C**AC~REF*0F*randomMemberId~REF*1L*randomCIN2~REF*17*;;202508;~REF*23*2;20200201;;;~REF*3H*19;60;randomCaseNum2;;~REF*6O*;W;Y;;08;~REF*DX*H9999;006;20250201;;;~REF*F6*randomMbi~REF*QQ*;;202001;202001~REF*ZZ*95701;;;;;35201;;;;~NM1*IL*1*randomLastName*randomFirstName~PER*IP**TE*randomPhone~N3*randomAddress1~N4*LONG BEACH CA*CA*90813**CY*19~DMG*D8*randomDoB*F**7~LUI*LD*SPA**7~HD*021**LTC*957;01~DTP*348*D8*20250801~DTP*349*D8*20250831~REF*17*F;;;;;;;;;;;;;1~REF*9V*2;2;2~REF*CE*60;401;80;401;;;;~REF*RB*60~REF*ZX*19~REF*ZZ*;;10~SE*32*0001~GE*1*10000002~IEA*1*000000002~
@@ -0,0 +1 @@
1
+ ISA*00* *00* *ZZ*CADHCS_5010_834*30*999999992 *251022*2000*^*00501*000000003*0*P*:~GS*BE*CADHCS_5010_834*999999992*20251022*200019*10000003*X*005010X220A1~ST*834*0001*005010X220A1~BGN*00*DHCS834-DA-20251022-Sample South LA PACE-957-001*20251022*20001900****2~QTY*TO*1~N1*P5*California Department of Health Care Services.........*FI*999999990~N1*IN*Sample South LA PACE Inc.*FI*999999992~INS*Y*18*001*AI*A*C**AC~REF*0F*randomMemberId~REF*1L*randomCIN3~REF*17*202605;;202510;~REF*23*2;20200301;;;~REF*3H*19;16;randomCode;;08~REF*6O*;A;Y;;29;~REF*DX*H9999;001;20251101;;;~REF*F6*randomMbi~REF*QQ*;;202002;202002~REF*ZZ*95701;;;;;20101;;;;~NM1*IL*1*randomLastName*randomFirstName~PER*IP**TE*randomPhone~N3*randomAddress1~N4*LOS ANGELES CA*CA*90044**CY*19~DMG*D8*randomDoB*F**:RET:2054-5~HD*021**LTC*957;01~DTP*348*D8*20251001~DTP*349*D8*20251031~REF*17*F;;;;1~REF*9V*3;2;2~REF*CE*16;401;80;401;;;;~REF*RB*16~REF*ZX*19~REF*ZZ*;;10~SE*31*0001~GE*1*10000003~IEA*1*000000003~
@@ -0,0 +1 @@
1
+ ISA*00* *00* *ZZ*CADHCS_5010_834*30*999999992 *251023*1959*^*00501*000000004*0*P*:~GS*BE*CADHCS_5010_834*999999992*20251023*195928*10000004*X*005010X220A1~ST*834*0001*005010X220A1~BGN*00*DHCS834-DA-20251023-Sample South LA PACE-957-001*20251023*19592800****2~QTY*TO*1~N1*P5*California Department of Health Care Services.........*FI*999999990~N1*IN*Sample South LA PACE Inc.*FI*999999992~INS*Y*18*001*AI*A*C**AC~REF*0F*randomMemberId~REF*1L*randomCIN4~REF*17*202601;;202510;~REF*23*6;20200401;20200101;;~REF*3H*19;17;randomId1;;07~REF*6O*;W;Y;;19;~REF*DX*H9999;006;20230401;;;~REF*F6*randomId2~REF*QQ*;;202003;202003~REF*ZZ*957P4;;;;;;;;;~NM1*IL*1*randomLastName*randomFirstName~PER*IP**TE*randomPhone~N3*randomAddress1~N4*LONG BEACH CA*CA*90810**CY*19~DMG*D8*randomDoB*F**:RET:2135-2~HD*001**LTC*957;P4~DTP*348*D8*20251001~DTP*349*D8*20250930~AMT*R*1237~REF*17*F;;;;1~REF*9V*3;1;0~REF*CE*17;501;2K;691;;;;~REF*ZX*19~REF*ZZ*;;10~SE*31*0001~GE*1*10000004~IEA*1*000000004~
@@ -0,0 +1 @@
1
+ ISA*00* *00* *ZZ*CADHCS_5010_834*30*999999991 *250206*2008*^*00501*000000005*0*P*:~GS*BE*CADHCS_5010_834*999999991*20250206*200823*10000005*X*005010X220A1~ST*834*0001*005010X220A1~BGN*00*DHCS834-DA-20250206-Sample PACE-001*20250206*20082300****2~QTY*TO*2~N1*P5*California Department of Health Care Services.........*FI*999999990~N1*IN*Sample PACE Inc.*FI*999999991~INS*Y*18*001*AI*A*E**AC~REF*0F*randomMemberId~REF*1L*randomCIN5~REF*17*;;202502;~REF*23*3;20200501;;~REF*3H*19;60;randomCaseNum1;;~REF*6O*;A;Y;D;67;~REF*Q4*randomProviderId;~REF*ZZ*01059;;;;;010S1;;;;~NM1*IL*1*randomLName1*randomFName1~PER*IP**TE*randomPhone1~N3*randomFullAddress1~N4*LOS ANGELES CA*CA*90037**CY*19~DMG*D8*randomDoB1*F**:RET:2054-5~NM1*31*1~N3*randomAddress~N4*LOS ANGELES CA*CA*90037~HD*001**LTC*010;59~DTP*348*D8*20250201~DTP*349*D8*20250131~REF*17*N;;;;;;;;;;;;;1~REF*CE*60;001;80;891;;;;~REF*RB*60~REF*ZX*19~REF*ZZ*;;10~HD*021**LTC*010;S1~DTP*348*D8*20250101~DTP*349*D8*20250131~REF*17*N;;;;;;;;;;;;;1~REF*CE*60;401;80;891;;;;~REF*RB*60~REF*ZX*19~REF*ZZ*;;11~INS*Y*18*001*AI*A*C**AC~REF*0F*randomMemberId2~REF*1L*randomCIN6~REF*17*;;202502;~REF*23*9;20200601;;~REF*3H*19;10;randomCaseNum2;;~REF*6O*;A;Y;;25;~REF*DX*;;;S5617;D635;20240901~REF*F6*randomId1~REF*QQ*;;202004;202004~REF*ZZ*01001;;;;;30401;;;;~NM1*IL*1*randomLName2*randomFName2*M~PER*IP**TE*randomPhone2~N3*randomAddress2~N4*LOS ANGELES CA*CA*90029**CY*19~DMG*D8*randomDoB2*M**:RET:2135-2~HD*021**LTC*010;01~DTP*348*D8*20250201~DTP*349*D8*20250228~REF*17*D;;;;;;;;;;;;;1~REF*9V*2;2;2~REF*CE*10;401;9G;999;80;401;;~REF*RB*10~REF*ZX*19~REF*ZZ*;;10~SE*64*0001~GE*1*10000005~IEA*1*000000005~
hccinfhir/utils.py CHANGED
@@ -244,4 +244,71 @@ def load_coefficients(file_path: str) -> Dict[Tuple[str, ModelName], float]:
244
244
  except (ValueError, IndexError):
245
245
  continue # Skip malformed lines
246
246
 
247
- return coefficients
247
+ return coefficients
248
+
249
+
250
+ def load_labels(file_path: str) -> Dict[Tuple[str, ModelName], str]:
251
+ """
252
+ Load HCC labels from a CSV file.
253
+ Expected format: cc,label,model_domain,model_version,...
254
+
255
+ Args:
256
+ file_path: Filename or path to the CSV file
257
+
258
+ Returns:
259
+ Dictionary mapping (cc, model_name) to label string
260
+
261
+ Raises:
262
+ FileNotFoundError: If file cannot be found
263
+ RuntimeError: If file cannot be loaded or parsed
264
+ """
265
+ labels: Dict[Tuple[str, ModelName], str] = {}
266
+
267
+ try:
268
+ resolved_path = resolve_data_file(file_path)
269
+ with open(resolved_path, "r", encoding="utf-8") as file:
270
+ content = file.read()
271
+ except FileNotFoundError as e:
272
+ raise FileNotFoundError(f"Could not load labels: {e}")
273
+ except Exception as e:
274
+ raise RuntimeError(f"Error loading labels file '{file_path}': {e}")
275
+
276
+ for line in content.splitlines()[1:]: # Skip header
277
+ try:
278
+ parts = line.strip().split(',')
279
+ if len(parts) < 4:
280
+ continue
281
+ cc_raw, label, model_domain, model_version = parts[0], parts[1], parts[2], parts[3]
282
+
283
+ # Strip HCC prefix if present to get just the number
284
+ cc = cc_raw.replace('HCC', '').replace('RxHCC', '')
285
+
286
+ # Handle quoted labels with commas
287
+ if label.startswith('"'):
288
+ # Find closing quote
289
+ label_parts = [label]
290
+ for i, p in enumerate(parts[2:], start=2):
291
+ if p.endswith('"'):
292
+ label_parts.append(p)
293
+ # Recalculate domain and version after the quoted label
294
+ model_domain = parts[i + 1] if len(parts) > i + 1 else ''
295
+ model_version = parts[i + 2] if len(parts) > i + 2 else ''
296
+ break
297
+ label_parts.append(p)
298
+ label = ','.join(label_parts).strip('"')
299
+
300
+ # Construct model name based on domain
301
+ if model_domain == 'ESRD':
302
+ model_name = f"CMS-HCC {model_domain} Model {model_version}"
303
+ elif model_domain == 'RxHCC':
304
+ model_name = f"{model_domain} Model {model_version}"
305
+ else:
306
+ model_name = f"{model_domain} Model {model_version}"
307
+
308
+ key = (cc, model_name)
309
+ if key not in labels:
310
+ labels[key] = label
311
+ except (ValueError, IndexError):
312
+ continue # Skip malformed lines
313
+
314
+ return labels
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hccinfhir
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: HCC Algorithm for FHIR Resources
5
5
  Project-URL: Homepage, https://github.com/mimilabs/hccinfhir
6
6
  Project-URL: Issues, https://github.com/mimilabs/hccinfhir/issues
@@ -10,6 +10,7 @@ Classifier: Operating System :: OS Independent
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Requires-Python: >=3.8
12
12
  Requires-Dist: pydantic>=2.10.3
13
+ Requires-Dist: typing-extensions>=4.6.0
13
14
  Description-Content-Type: text/markdown
14
15
 
15
16
  # HCCInFHIR
@@ -62,6 +63,7 @@ print(f"HCCs: {result.hcc_list}")
62
63
  - [Demographic Prefix Override](#demographic-prefix-override)
63
64
  - [Custom File Path Resolution](#custom-file-path-resolution)
64
65
  - [Batch Processing](#batch-processing)
66
+ - [Large-Scale Processing with Databricks](#large-scale-processing-with-databricks)
65
67
  - [Converting to Dictionaries](#converting-to-dictionaries)
66
68
  - [Sample Data](#sample-data)
67
69
  - [Testing](#testing)
@@ -78,7 +80,7 @@ print(f"HCCs: {result.hcc_list}")
78
80
  - **Custom Data Files**: Full support for custom coefficients, mappings, and hierarchies
79
81
  - **Flexible File Resolution**: Absolute paths, relative paths, or bundled data files
80
82
  - **Type-Safe**: Built on Pydantic with full type hints
81
- - **Well-Tested**: 155 comprehensive tests covering all features
83
+ - **Well-Tested**: 189 comprehensive tests covering all features
82
84
 
83
85
  ## 📊 Data Sources & Use Cases
84
86
 
@@ -834,6 +836,146 @@ with open("risk_scores.json", "w") as f:
834
836
  json.dump(results, f, indent=2)
835
837
  ```
836
838
 
839
+ ### Large-Scale Processing with Databricks
840
+
841
+ For processing millions of beneficiaries, use PySpark's `pandas_udf` for distributed computation. The hccinfhir logic is well-suited for batch operations with clear, simple transformations.
842
+
843
+ **Performance Benchmark**:
844
+
845
+ ![Databricks Performance Chart](hccinfhir_pandas_udf_performance_chart.png)
846
+
847
+ *Tested with ACO data on Databricks Runtime 17.3 LTS, Worker: i3.4xlarge (122GB, 16 cores)*
848
+
849
+ The chart shows execution time varies based on condition complexity - members with more diagnoses require additional internal processing loops. While the relationship isn't perfectly linear, **1 million members can be processed in under 2 minutes** with this configuration.
850
+
851
+ ```python
852
+ from pyspark.sql import SparkSession
853
+ from pyspark.sql.types import StructType, StructField, FloatType, ArrayType, StringType
854
+ from pyspark.sql import functions as F
855
+ from pyspark.sql.functions import pandas_udf
856
+ import pandas as pd
857
+
858
+ from hccinfhir import HCCInFHIR, Demographics
859
+
860
+ # Define the return schema
861
+ hcc_schema = StructType([
862
+ StructField("risk_score", FloatType(), True),
863
+ StructField("risk_score_demographics", FloatType(), True),
864
+ StructField("risk_score_chronic_only", FloatType(), True),
865
+ StructField("risk_score_hcc", FloatType(), True),
866
+ StructField("hcc_list", ArrayType(StringType()), True)
867
+ ])
868
+
869
+ # Initialize processor (will be serialized to each executor)
870
+ hcc_processor = HCCInFHIR(model_name="CMS-HCC Model V28")
871
+
872
+ # Create the pandas UDF
873
+ @pandas_udf(hcc_schema)
874
+ def calculate_hcc(
875
+ age_series: pd.Series,
876
+ sex_series: pd.Series,
877
+ diagnosis_series: pd.Series
878
+ ) -> pd.DataFrame:
879
+ results = []
880
+
881
+ for age, sex, diagnosis_codes in zip(age_series, sex_series, diagnosis_series):
882
+ try:
883
+ demographics = Demographics(age=int(age), sex=sex)
884
+
885
+ # diagnosis_codes can be passed directly - accepts any iterable including numpy arrays
886
+ result = hcc_processor.calculate_from_diagnosis(diagnosis_codes, demographics)
887
+
888
+ results.append({
889
+ 'risk_score': float(result.risk_score),
890
+ 'risk_score_demographics': float(result.risk_score_demographics),
891
+ 'risk_score_chronic_only': float(result.risk_score_chronic_only),
892
+ 'risk_score_hcc': float(result.risk_score_hcc),
893
+ 'hcc_list': result.hcc_list
894
+ })
895
+ except Exception as e:
896
+ # Log error and return nulls for failed rows
897
+ print(f"ERROR processing row: {e}")
898
+ results.append({
899
+ 'risk_score': None,
900
+ 'risk_score_demographics': None,
901
+ 'risk_score_chronic_only': None,
902
+ 'risk_score_hcc': None,
903
+ 'hcc_list': None
904
+ })
905
+
906
+ return pd.DataFrame(results)
907
+
908
+ # Apply the UDF to your DataFrame
909
+ # Assumes df has columns: age, patient_gender, diagnosis_codes (array of strings)
910
+ df = df.withColumn(
911
+ "hcc_results",
912
+ calculate_hcc(
913
+ F.col("age"),
914
+ F.col("patient_gender"),
915
+ F.col("diagnosis_codes")
916
+ )
917
+ )
918
+
919
+ # Expand the struct into separate columns
920
+ df = df.select(
921
+ "*",
922
+ F.col("hcc_results.risk_score").alias("risk_score"),
923
+ F.col("hcc_results.risk_score_demographics").alias("risk_score_demographics"),
924
+ F.col("hcc_results.risk_score_chronic_only").alias("risk_score_chronic_only"),
925
+ F.col("hcc_results.risk_score_hcc").alias("risk_score_hcc"),
926
+ F.col("hcc_results.hcc_list").alias("hcc_list")
927
+ ).drop("hcc_results")
928
+ ```
929
+
930
+ **Performance Tips**:
931
+ - **Repartition** your DataFrame before applying the UDF to balance workload across executors
932
+ - **Cache** the processor initialization by defining it at module level
933
+ - **Batch size**: pandas_udf processes data in batches; Spark handles optimal batch sizing automatically
934
+ - **Install hccinfhir** on all cluster nodes: `%pip install hccinfhir` in a notebook cell or add to cluster init script
935
+
936
+ **Extended Schema with Demographics**:
937
+
938
+ ```python
939
+ # Include additional demographic parameters
940
+ @pandas_udf(hcc_schema)
941
+ def calculate_hcc_full(
942
+ age_series: pd.Series,
943
+ sex_series: pd.Series,
944
+ dual_status_series: pd.Series,
945
+ diagnosis_series: pd.Series
946
+ ) -> pd.DataFrame:
947
+ results = []
948
+
949
+ for age, sex, dual_status, diagnosis_codes in zip(
950
+ age_series, sex_series, dual_status_series, diagnosis_series
951
+ ):
952
+ try:
953
+ demographics = Demographics(
954
+ age=int(age),
955
+ sex=sex,
956
+ dual_elgbl_cd=dual_status if dual_status else "00"
957
+ )
958
+ result = hcc_processor.calculate_from_diagnosis(diagnosis_codes, demographics)
959
+
960
+ results.append({
961
+ 'risk_score': float(result.risk_score),
962
+ 'risk_score_demographics': float(result.risk_score_demographics),
963
+ 'risk_score_chronic_only': float(result.risk_score_chronic_only),
964
+ 'risk_score_hcc': float(result.risk_score_hcc),
965
+ 'hcc_list': result.hcc_list
966
+ })
967
+ except Exception as e:
968
+ results.append({
969
+ 'risk_score': None,
970
+ 'risk_score_demographics': None,
971
+ 'risk_score_chronic_only': None,
972
+ 'risk_score_hcc': None,
973
+ 'hcc_list': None
974
+ })
975
+
976
+ return pd.DataFrame(results)
977
+ ```
978
+
837
979
  ### Converting to Dictionaries
838
980
 
839
981
  All Pydantic models support dictionary conversion for JSON serialization, database storage, or legacy code:
@@ -895,8 +1037,8 @@ result = processor.run([eob], demographics) # Note: [eob] not eob
895
1037
  claim = get_837_sample(0) # Cases 0-12 (returns string)
896
1038
  claims = get_837_sample_list([0, 1, 2]) # Returns list
897
1039
 
898
- # X12 834 enrollment samples
899
- enrollment_834 = get_834_sample(1) # Currently only case 1 available (returns string)
1040
+ # X12 834 enrollment samples (6 CA DHCS scenarios)
1041
+ enrollment_834 = get_834_sample(1) # Cases 1-6 available (returns string)
900
1042
 
901
1043
  # List all available samples
902
1044
  info = list_available_samples()
@@ -914,7 +1056,7 @@ hatch shell
914
1056
  # Install in development mode
915
1057
  pip install -e .
916
1058
 
917
- # Run all tests (155 tests)
1059
+ # Run all tests (189 tests)
918
1060
  pytest tests/
919
1061
 
920
1062
  # Run specific test file
@@ -1,21 +1,21 @@
1
- hccinfhir/__init__.py,sha256=CKpYTUSzZdP3s1eB74w5JTe9OS3MtcvuUkv6ymgSyic,1085
1
+ hccinfhir/__init__.py,sha256=3aFYtjTklZJg3wIlnMJNgfDBaDCfKXVlYsacdsZ9L4I,1113
2
2
  hccinfhir/constants.py,sha256=C4Vyjtzgyd4Jm2I2X6cTYQZLe-jAMC8boUcy-7OXQDQ,8473
3
- hccinfhir/datamodels.py,sha256=NULDYb57R61v4EklOI_AAIuC1-OkLFH1InbAad48dZM,10601
4
- hccinfhir/defaults.py,sha256=tMNym0R6Nr6ibKTqOu6N1vLcdekL0ZmHyDNIOCOMsP4,1292
3
+ hccinfhir/datamodels.py,sha256=xGh9E5RVi4vONhtIZw2XiaFwVLc5UK027trY31YMUWc,15457
4
+ hccinfhir/defaults.py,sha256=aKdXPhf9bYUzpGvXM1GIXZaKxqkKInt3v9meLB9fWog,1394
5
5
  hccinfhir/extractor.py,sha256=xL9c2VT-e2I7_c8N8j4Og42UEgVuCzyn9WFp3ntM5Ro,1822
6
- hccinfhir/extractor_834.py,sha256=dIqovUOWm_7k_c6sUqTIzQua_kTQ8dLGy3-4-LECW3Y,18855
6
+ hccinfhir/extractor_834.py,sha256=zH2nOUJvIJvbDLf6HJWmwCw2yAjT-6RCJyuH4kmIKIQ,27862
7
7
  hccinfhir/extractor_837.py,sha256=fGsvBTWIj9dsHLGGR67AdlYDSsFi5qnSVlTgwkL1f-E,15334
8
8
  hccinfhir/extractor_fhir.py,sha256=wUN3vTm1oTZ-KvfcDebnpQMxAC-7YlRKv12Wrv3p85A,8490
9
9
  hccinfhir/filter.py,sha256=j_yD2g6RBXVUV9trKkWzsQ35x3fRvfKUPvEXKUefI64,2007
10
- hccinfhir/hccinfhir.py,sha256=rCnExvxZGKi1vLD4cHQ0nzPAGV6e-8C15MtJ2p7zAAk,11160
11
- hccinfhir/model_calculate.py,sha256=KSeZjKYBCfBYYIWOIckDg941OC8050MX2F7BZ2l3V8g,7663
10
+ hccinfhir/hccinfhir.py,sha256=NydnH3WBvuyskn76hY70LpUS6XuIEoax_kip1mgfpHw,11225
11
+ hccinfhir/model_calculate.py,sha256=_TUWNVUsBym0pre3wltXvRuipQaONQ0QBfWPFNAeDsQ,8347
12
12
  hccinfhir/model_coefficients.py,sha256=5n3QzHX6FJ3MlO0cV9NS7Bqt-lxzVvT_M3zFaWq6Gng,4685
13
13
  hccinfhir/model_demographics.py,sha256=nImKtJCq1HkR9w2GU8aikybJFgow71CPufBRV8Jn7fM,8932
14
14
  hccinfhir/model_dx_to_cc.py,sha256=Yjc6xKI-jMXsbOzS_chc4NI15Bwagb7BwZZ8cKQaTbk,1540
15
15
  hccinfhir/model_hierarchies.py,sha256=cboUnSHZZfOxA8QZKV4QIE-32duElssML32OqYT-65g,1542
16
16
  hccinfhir/model_interactions.py,sha256=g6jK27Xu8RQUHS3lk4sk2v6w6wqd52mdbGn0BsnR7Pk,21394
17
17
  hccinfhir/samples.py,sha256=2VSWS81cv9EnaHqK7sd6CjwG6FUI9E--5wHgD000REI,9952
18
- hccinfhir/utils.py,sha256=9ki4o1wXyAYYr8BR9Skkz0PKL_1H_HYNV4LalEsASE0,8260
18
+ hccinfhir/utils.py,sha256=hQgHjuOcEQcnxemTZwqFBHWvLC5-C1Gup9cDXEYlZjE,10770
19
19
  hccinfhir/data/__init__.py,sha256=SGiSkpGrnxbvtEFMMlk82NFHOE50hFXcgKwKUSuVZUg,45
20
20
  hccinfhir/data/hcc_is_chronic.csv,sha256=Bwd-RND6SdEsKP-assoBaXnjUJAuDXhSkwWlymux72Y,19701
21
21
  hccinfhir/data/hcc_is_chronic_without_esrd_model.csv,sha256=eVVI4_8mQNkiBiNO3kattfT_zfcV18XgmiltdzZEXSo,17720
@@ -29,8 +29,14 @@ hccinfhir/data/ra_eligible_cpt_hcpcs_2025.csv,sha256=-tMvv2su5tsSbGUh6fZZCMUEkXI
29
29
  hccinfhir/data/ra_eligible_cpt_hcpcs_2026.csv,sha256=EYGN7k_rgCpJe59lL_yNInUcCkdETDWGSFTXII3LZ0Y,40497
30
30
  hccinfhir/data/ra_hierarchies_2025.csv,sha256=HQSPNloe6mvvwMgv8ZwYAfWKkT2b2eUvm4JQy6S_mVQ,13045
31
31
  hccinfhir/data/ra_hierarchies_2026.csv,sha256=A6ZQZb0rpRWrySBB_KA5S4PGtMxWuzB2guU3aBE09v0,19596
32
+ hccinfhir/data/ra_labels_2026.csv,sha256=YstfP7s-3ZwjP4I_GYPPj3_yn-PQK3Q0Q_MVYZhsfjY,50248
32
33
  hccinfhir/sample_files/__init__.py,sha256=SGiSkpGrnxbvtEFMMlk82NFHOE50hFXcgKwKUSuVZUg,45
33
34
  hccinfhir/sample_files/sample_834_01.txt,sha256=J2HMXfY6fAFpV36rvLQ3QymRRS2TPqf3TQY6CNS7TrE,1627
35
+ hccinfhir/sample_files/sample_834_02.txt,sha256=vSvjM69kKfOW9e-8dvlO9zDcRPpOD7LmekLu68z4aB4,926
36
+ hccinfhir/sample_files/sample_834_03.txt,sha256=pD4UTUFCEHxKu3bz3ZZdWo5b8Y1UWTXAo7PmFjWsukU,999
37
+ hccinfhir/sample_files/sample_834_04.txt,sha256=1Cv8kN7At1ce60kXBGlMr5DhLMsuw2clnVDi02mvBJA,991
38
+ hccinfhir/sample_files/sample_834_05.txt,sha256=hjQ5SEgj0cCNZyWas1-sVL9gm8m3rC4R65JHIcbAPRk,993
39
+ hccinfhir/sample_files/sample_834_06.txt,sha256=oC3e4UMmmhVKXI8eB7SlLyJ8kZX4NemlzI_WGznO-48,1659
34
40
  hccinfhir/sample_files/sample_837_0.txt,sha256=eggrD259uHa05z2dfxWBpUDseSDp_AQcLyN_adpHyTw,5295
35
41
  hccinfhir/sample_files/sample_837_1.txt,sha256=E155MdemSDYoXokuTXUZ6Br_RGGedYv5t5dh-eMRmuk,1322
36
42
  hccinfhir/sample_files/sample_837_10.txt,sha256=zSJXI78vHAksA7FFQEVLvepefdpMM2_AexLyoDimV3Q,1129
@@ -48,7 +54,7 @@ hccinfhir/sample_files/sample_eob_1.json,sha256=_NGSVR2ysFpx-DcTvyga6dFCzhQ8Vi9f
48
54
  hccinfhir/sample_files/sample_eob_2.json,sha256=FcnJcx0ApOczxjJ_uxVLzCep9THfNf4xs9Yf7hxk8e4,1769
49
55
  hccinfhir/sample_files/sample_eob_200.ndjson,sha256=CxpjeQ1DCMUzZILaM68UEhfxO0p45YGhDDoCZeq8PxU,1917986
50
56
  hccinfhir/sample_files/sample_eob_3.json,sha256=4BW4wOMBEEU9RDfJR15rBEvk0KNHyuMEh3e055y87Hc,2306
51
- hccinfhir-0.2.1.dist-info/METADATA,sha256=FrzAPidGEXS8l-1vO_QRlemPZegXavDpn--TkmwQBxY,31674
52
- hccinfhir-0.2.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
53
- hccinfhir-0.2.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
54
- hccinfhir-0.2.1.dist-info/RECORD,,
57
+ hccinfhir-0.2.3.dist-info/METADATA,sha256=YHHcOAObdo2gWJtPmP6y05-EXeXHpuE40W1pdUXlydw,37132
58
+ hccinfhir-0.2.3.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
59
+ hccinfhir-0.2.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
+ hccinfhir-0.2.3.dist-info/RECORD,,