cdiscbuilder 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -149,9 +149,13 @@ class SQLDerivation(BaseDerivation):
149
149
  # The original code raised ValueError here.
150
150
  raise ValueError(f"Column {source} not found in target dataset")
151
151
 
152
- # Apply mapping if present (legacy / optimization for local cols)
153
- if "mapping" in derivation:
154
- series = self._apply_mapping(series, derivation["mapping"])
152
+ # Auto-Strip Whitespace
153
+ if series.dtype == pl.Utf8:
154
+ series = series.str.strip_chars()
155
+
156
+ # Apply mapping if present (legacy / optimization for local cols)
157
+ if "mapping" in derivation:
158
+ series = self._apply_mapping(series, derivation["mapping"])
155
159
 
156
160
  return series
157
161
 
@@ -425,16 +429,22 @@ class SQLDerivation(BaseDerivation):
425
429
  # Calculate distance to target date
426
430
  if target_col in subject_data.columns and date_col in subject_data.columns:
427
431
  # Get target date (should be same for all rows of this subject)
428
- target_date = subject_data[target_col][0]
429
-
430
432
  # Calculate date differences and find closest
431
- # Handle partial dates by using strptime with appropriate format
433
+ # Handle multiple formats: "YYYY-MM-DD HH:MM:SS", "YYYY-MM-DDTHH:MM:SS" (ISO), or "YYYY-MM-DD"
434
+ # We convert everything to Datetime for precision comparison (ms level)
435
+ def parse_to_datetime(expr: pl.Expr) -> pl.Expr:
436
+ return (
437
+ expr.str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S", strict=False)
438
+ .fill_null(expr.str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S", strict=False))
439
+ .fill_null(expr.str.strptime(pl.Date, "%Y-%m-%d", strict=False).cast(pl.Datetime))
440
+ )
441
+
432
442
  with_diff = subject_data.with_columns(
433
443
  (
434
- pl.col(date_col).str.strptime(pl.Date, "%Y-%m-%d", strict=False)
435
- - pl.lit(target_date).str.strptime(pl.Date, "%Y-%m-%d", strict=False)
444
+ parse_to_datetime(pl.col(date_col))
445
+ - parse_to_datetime(pl.lit(target_date))
436
446
  )
437
- .dt.total_days()
447
+ .dt.total_milliseconds()
438
448
  .abs()
439
449
  .alias("date_diff")
440
450
  )
@@ -111,6 +111,10 @@ class FindingProcessor:
111
111
  elif source_expr in source_df.columns:
112
112
  series = source_df[source_expr]
113
113
 
114
+ # Auto-Strip
115
+ if series is not None and pd.api.types.is_object_dtype(series):
116
+ series = series.astype(str).str.strip().replace('nan', None)
117
+
114
118
  if regex_extract and series is not None:
115
119
  # Extract group 1
116
120
  series = series.astype(str).str.extract(regex_extract)[0]
@@ -185,6 +185,13 @@ class GeneralProcessor:
185
185
  # Source defined but not found.
186
186
  print(f"Warning: Source column '{source_expr}' not found for '{domain_name}.{target_col}'. Filling with NaN.")
187
187
  series = pd.Series([None] * len(pivoted))
188
+
189
+ # Auto-Strip Whitespace for strings
190
+ if series is not None and pd.api.types.is_object_dtype(series):
191
+ try:
192
+ series = series.astype(str).str.strip().replace('nan', None)
193
+ except:
194
+ pass
188
195
  else:
189
196
  print(f"Warning: No source or literal defined for '{domain_name}.{target_col}'. Filling with NaN.")
190
197
  series = pd.Series([None] * len(pivoted))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cdiscbuilder
3
- Version: 1.1.5
3
+ Version: 1.1.7
4
4
  Summary: A package to convert ODM XML to SDTM/ADaM Datasets
5
5
  Author-email: Ming-Chun Chen <hellomingchun@gmail.com>
6
6
  Requires-Python: >=3.8
@@ -7,7 +7,7 @@ cdiscbuilder/adam/adam_derivation/engine.py,sha256=5Wk_tc3c6nRKBiiL5gE7YXxvdPgXV
7
7
  cdiscbuilder/adam/adam_derivation/derivations/__init__.py,sha256=iu7xzlQF_uK_5nwYw5ovEsBDNdaX0S8oVFYYxcxheTc,213
8
8
  cdiscbuilder/adam/adam_derivation/derivations/base.py,sha256=thbi0UD17Z8hUA5G6np_wWDcFth-UmoY7moS2KzvIps,1242
9
9
  cdiscbuilder/adam/adam_derivation/derivations/function_derivation.py,sha256=mPBet5JR10RS9euaS9ZLE1GCMRS3Grp9Gc3hdiQOVDI,7167
10
- cdiscbuilder/adam/adam_derivation/derivations/sql_derivation.py,sha256=rXk7vvvMozgAQqFZs2IM5VPGreRFoDqxRkmJ8T62x9s,22603
10
+ cdiscbuilder/adam/adam_derivation/derivations/sql_derivation.py,sha256=vy8u8Et9-DF-rObHjxTWsoaMp95z5cZdqTtP69HGbyM,23166
11
11
  cdiscbuilder/adam/adam_derivation/functions/__init__.py,sha256=Cv4pLpgHTr9i2zf7owbpZwMyxm8JLFmX4J31dvX0jYg,1773
12
12
  cdiscbuilder/adam/adam_derivation/functions/get_bmi.py,sha256=RR1lQpRSDO4cEkhxjnPutAy_xfVvfEwmhYR90XK2eMM,229
13
13
  cdiscbuilder/adam/adam_derivation/loaders/__init__.py,sha256=eUnUKzyahIaLIqjn39sYzXvzPpfabZoipkPdQ5mA9IE,116
@@ -42,16 +42,16 @@ cdiscbuilder/sdtm/engine/functions.py,sha256=SdyhFftFoxIzL-pzIcjVsf9CbZmxKKo7J1J
42
42
  cdiscbuilder/sdtm/engine/processor.py,sha256=7ohnIY8KEGxUA0ENZm4_3yKkxaJzkboRTeL3d5AX-Ac,5040
43
43
  cdiscbuilder/sdtm/engine/validate.py,sha256=vGUirL3yPk6a9D6MlRpmHf54tB-PMNH7FMKCxsXROgQ,3799
44
44
  cdiscbuilder/sdtm/engine/classes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
- cdiscbuilder/sdtm/engine/classes/finding.py,sha256=DtOdQWZU_68jVOyZNec2HXnMl8UfNBK1xiI781AYuoo,8302
46
- cdiscbuilder/sdtm/engine/classes/general.py,sha256=0-KP2mNfwRjeunDt-ZY0q2t-_k7bSwCglXrotsPN3kI,17082
45
+ cdiscbuilder/sdtm/engine/classes/finding.py,sha256=g_16w_PBHIZLe8jH1bpxY-QKzaSAXrtAhFYOIetDCRU,8530
46
+ cdiscbuilder/sdtm/engine/classes/general.py,sha256=htUgE12XL_hGRW5VEQrfXlX_-j5KQwvEHgoXIa4TbCE,17433
47
47
  cdiscbuilder/sdtm/loader/__init__.py,sha256=ZL0EV0TnNNvOslawOpRK0wkzIjVeiPzY2IpfyiN0lgw,104
48
48
  cdiscbuilder/sdtm/loader/load.py,sha256=30x9fAEjnuRq313SRq9I7mBwrWl4sEhiKkmrr9x1cjo,6171
49
49
  cdiscbuilder/sdtm/loader/tests/__init__.py,sha256=ujCd31KMb52gvkWkVBBogLBda3aiRmbqS0nRS-yTnk0,69
50
50
  cdiscbuilder/sdtm/loader/tests/test_load.py,sha256=yrXFQlzH_EkN6HxJc7ItFie8xqwrs2XxwNu2wGaxjok,11180
51
51
  cdiscbuilder/tlf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- cdiscbuilder-1.1.5.dist-info/licenses/LICENSE,sha256=Da4Pm8tEdB1ycKkAkF2EwfHd-8l0wrJ_uwejzfBRHFE,1092
53
- cdiscbuilder-1.1.5.dist-info/METADATA,sha256=EvG3y-fCShEnUX2cPuOVOSj-PQBH7FpMXEu6-J4SF-Y,356
54
- cdiscbuilder-1.1.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
55
- cdiscbuilder-1.1.5.dist-info/entry_points.txt,sha256=KsuiJD4hsfjRqDkdvWgLFPeQCz7QBl73RA665BmF0Pw,53
56
- cdiscbuilder-1.1.5.dist-info/top_level.txt,sha256=3x5ENgnXBENgXKp5Zsht_4_OmA5wLVvDuk0UEkYOGNw,13
57
- cdiscbuilder-1.1.5.dist-info/RECORD,,
52
+ cdiscbuilder-1.1.7.dist-info/licenses/LICENSE,sha256=Da4Pm8tEdB1ycKkAkF2EwfHd-8l0wrJ_uwejzfBRHFE,1092
53
+ cdiscbuilder-1.1.7.dist-info/METADATA,sha256=P20HXeNmjdmImD2_DfSLw3FlBddDH0MNO4CHJu4XzAs,356
54
+ cdiscbuilder-1.1.7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
55
+ cdiscbuilder-1.1.7.dist-info/entry_points.txt,sha256=KsuiJD4hsfjRqDkdvWgLFPeQCz7QBl73RA665BmF0Pw,53
56
+ cdiscbuilder-1.1.7.dist-info/top_level.txt,sha256=3x5ENgnXBENgXKp5Zsht_4_OmA5wLVvDuk0UEkYOGNw,13
57
+ cdiscbuilder-1.1.7.dist-info/RECORD,,