cdiscbuilder 1.1.5__tar.gz → 1.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {cdiscbuilder-1.1.5/src/cdiscbuilder.egg-info → cdiscbuilder-1.1.7}/PKG-INFO +1 -1
  2. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/pyproject.toml +1 -1
  3. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/derivations/sql_derivation.py +19 -9
  4. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/engine/classes/finding.py +4 -0
  5. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/engine/classes/general.py +7 -0
  6. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7/src/cdiscbuilder.egg-info}/PKG-INFO +1 -1
  7. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/LICENSE +0 -0
  8. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/MANIFEST.in +0 -0
  9. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/README.md +0 -0
  10. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/setup.cfg +0 -0
  11. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/__init__.py +0 -0
  12. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/__init__.py +0 -0
  13. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/__init__.py +0 -0
  14. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/derivations/__init__.py +0 -0
  15. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/derivations/base.py +0 -0
  16. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/derivations/function_derivation.py +0 -0
  17. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/engine.py +0 -0
  18. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/functions/__init__.py +0 -0
  19. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/functions/get_bmi.py +0 -0
  20. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/loaders/__init__.py +0 -0
  21. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/loaders/sdtm_loader.py +0 -0
  22. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/utils/__init__.py +0 -0
  23. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_derivation/utils/logger.py +0 -0
  24. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/__init__.py +0 -0
  25. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/adam_spec.py +0 -0
  26. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/merge_yaml.py +0 -0
  27. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/schema_validator.py +0 -0
  28. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/__init__.py +0 -0
  29. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/organization/adsl_common.yaml +0 -0
  30. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/project/adsl_project.yaml +0 -0
  31. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/schema.yaml +0 -0
  32. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study1/adam_study1.yaml +0 -0
  33. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study1/adsl_study1.yaml +0 -0
  34. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study1/final_adsl_study1.yaml +0 -0
  35. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study2/adam_study2.yaml +0 -0
  36. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study2/adsl_study2.yaml +0 -0
  37. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study2/final_adsl_study2.yaml +0 -0
  38. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/test_adam_spec.py +0 -0
  39. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/test_merge_yaml.py +0 -0
  40. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_spec/tests/test_schema_validator.py +0 -0
  41. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_validation/__init__.py +0 -0
  42. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/adam_validation/data_validator.py +0 -0
  43. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/adam/schema.yaml +0 -0
  44. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/cli.py +0 -0
  45. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/__init__.py +0 -0
  46. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/engine/__init__.py +0 -0
  47. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/engine/classes/__init__.py +0 -0
  48. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/engine/config.py +0 -0
  49. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/engine/functions.py +0 -0
  50. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/engine/processor.py +0 -0
  51. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/engine/validate.py +0 -0
  52. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/loader/__init__.py +0 -0
  53. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/loader/load.py +0 -0
  54. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/loader/tests/__init__.py +0 -0
  55. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/loader/tests/test_load.py +0 -0
  56. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/odm_parser.py +0 -0
  57. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/sdtm/sdtm.py +0 -0
  58. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder/tlf/__init__.py +0 -0
  59. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder.egg-info/SOURCES.txt +0 -0
  60. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder.egg-info/dependency_links.txt +0 -0
  61. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder.egg-info/entry_points.txt +0 -0
  62. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder.egg-info/requires.txt +0 -0
  63. {cdiscbuilder-1.1.5 → cdiscbuilder-1.1.7}/src/cdiscbuilder.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cdiscbuilder
3
- Version: 1.1.5
3
+ Version: 1.1.7
4
4
  Summary: A package to convert ODM XML to SDTM/ADaM Datasets
5
5
  Author-email: Ming-Chun Chen <hellomingchun@gmail.com>
6
6
  Requires-Python: >=3.8
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "cdiscbuilder"
7
- version = "1.1.5"
7
+ version = "1.1.7"
8
8
  description = "A package to convert ODM XML to SDTM/ADaM Datasets"
9
9
  authors = [{name = "Ming-Chun Chen", email = "hellomingchun@gmail.com"}]
10
10
  dependencies = [
@@ -149,9 +149,13 @@ class SQLDerivation(BaseDerivation):
149
149
  # The original code raised ValueError here.
150
150
  raise ValueError(f"Column {source} not found in target dataset")
151
151
 
152
- # Apply mapping if present (legacy / optimization for local cols)
153
- if "mapping" in derivation:
154
- series = self._apply_mapping(series, derivation["mapping"])
152
+ # Auto-Strip Whitespace
153
+ if series.dtype == pl.Utf8:
154
+ series = series.str.strip_chars()
155
+
156
+ # Apply mapping if present (legacy / optimization for local cols)
157
+ if "mapping" in derivation:
158
+ series = self._apply_mapping(series, derivation["mapping"])
155
159
 
156
160
  return series
157
161
 
@@ -425,16 +429,22 @@ class SQLDerivation(BaseDerivation):
425
429
  # Calculate distance to target date
426
430
  if target_col in subject_data.columns and date_col in subject_data.columns:
427
431
  # Get target date (should be same for all rows of this subject)
428
- target_date = subject_data[target_col][0]
429
-
430
432
  # Calculate date differences and find closest
431
- # Handle partial dates by using strptime with appropriate format
433
+ # Handle multiple formats: "YYYY-MM-DD HH:MM:SS", "YYYY-MM-DDTHH:MM:SS" (ISO), or "YYYY-MM-DD"
434
+ # We convert everything to Datetime for precision comparison (ms level)
435
+ def parse_to_datetime(expr: pl.Expr) -> pl.Expr:
436
+ return (
437
+ expr.str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S", strict=False)
438
+ .fill_null(expr.str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S", strict=False))
439
+ .fill_null(expr.str.strptime(pl.Date, "%Y-%m-%d", strict=False).cast(pl.Datetime))
440
+ )
441
+
432
442
  with_diff = subject_data.with_columns(
433
443
  (
434
- pl.col(date_col).str.strptime(pl.Date, "%Y-%m-%d", strict=False)
435
- - pl.lit(target_date).str.strptime(pl.Date, "%Y-%m-%d", strict=False)
444
+ parse_to_datetime(pl.col(date_col))
445
+ - parse_to_datetime(pl.lit(target_date))
436
446
  )
437
- .dt.total_days()
447
+ .dt.total_milliseconds()
438
448
  .abs()
439
449
  .alias("date_diff")
440
450
  )
@@ -111,6 +111,10 @@ class FindingProcessor:
111
111
  elif source_expr in source_df.columns:
112
112
  series = source_df[source_expr]
113
113
 
114
+ # Auto-Strip
115
+ if series is not None and pd.api.types.is_object_dtype(series):
116
+ series = series.astype(str).str.strip().replace('nan', None)
117
+
114
118
  if regex_extract and series is not None:
115
119
  # Extract group 1
116
120
  series = series.astype(str).str.extract(regex_extract)[0]
@@ -185,6 +185,13 @@ class GeneralProcessor:
185
185
  # Source defined but not found.
186
186
  print(f"Warning: Source column '{source_expr}' not found for '{domain_name}.{target_col}'. Filling with NaN.")
187
187
  series = pd.Series([None] * len(pivoted))
188
+
189
+ # Auto-Strip Whitespace for strings
190
+ if series is not None and pd.api.types.is_object_dtype(series):
191
+ try:
192
+ series = series.astype(str).str.strip().replace('nan', None)
193
+ except:
194
+ pass
188
195
  else:
189
196
  print(f"Warning: No source or literal defined for '{domain_name}.{target_col}'. Filling with NaN.")
190
197
  series = pd.Series([None] * len(pivoted))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cdiscbuilder
3
- Version: 1.1.5
3
+ Version: 1.1.7
4
4
  Summary: A package to convert ODM XML to SDTM/ADaM Datasets
5
5
  Author-email: Ming-Chun Chen <hellomingchun@gmail.com>
6
6
  Requires-Python: >=3.8
File without changes
File without changes
File without changes
File without changes