cdiscbuilder 1.2.3__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. {cdiscbuilder-1.2.3/src/cdiscbuilder.egg-info → cdiscbuilder-1.3.1}/PKG-INFO +1 -1
  2. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/pyproject.toml +2 -2
  3. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/__init__.py +1 -1
  4. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/functions/__init__.py +14 -0
  5. cdiscbuilder-1.3.1/src/cdiscbuilder/functions/calculate_study_day.py +33 -0
  6. cdiscbuilder-1.3.1/src/cdiscbuilder/functions/coalesce.py +22 -0
  7. cdiscbuilder-1.3.1/src/cdiscbuilder/functions/get_dose_dates.py +86 -0
  8. cdiscbuilder-1.3.1/src/cdiscbuilder/functions/get_earliest_informed_consent_date.py +93 -0
  9. cdiscbuilder-1.3.1/src/cdiscbuilder/functions/get_last_participation_date.py +55 -0
  10. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/classes/base.py +4 -1
  11. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/classes/findings.py +1 -1
  12. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/classes/general.py +106 -19
  13. cdiscbuilder-1.3.1/src/cdiscbuilder/sdtm/engine/processor.py +285 -0
  14. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/tests/test_general.py +47 -1
  15. cdiscbuilder-1.3.1/src/cdiscbuilder/sdtm/engine/tests/test_iso8601.py +39 -0
  16. cdiscbuilder-1.3.1/src/cdiscbuilder/sdtm/engine/tests/test_processor.py +615 -0
  17. cdiscbuilder-1.3.1/src/cdiscbuilder/sdtm/engine/utils/iso8601.py +97 -0
  18. cdiscbuilder-1.3.1/src/cdiscbuilder/sdtm/sdtm.py +153 -0
  19. cdiscbuilder-1.3.1/src/cdiscbuilder/sdtm/specs/schema.yaml +92 -0
  20. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1/src/cdiscbuilder.egg-info}/PKG-INFO +1 -1
  21. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder.egg-info/SOURCES.txt +7 -0
  22. cdiscbuilder-1.2.3/src/cdiscbuilder/functions/calculate_study_day.py +0 -23
  23. cdiscbuilder-1.2.3/src/cdiscbuilder/sdtm/engine/processor.py +0 -144
  24. cdiscbuilder-1.2.3/src/cdiscbuilder/sdtm/engine/tests/test_processor.py +0 -179
  25. cdiscbuilder-1.2.3/src/cdiscbuilder/sdtm/sdtm.py +0 -71
  26. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/LICENSE +0 -0
  27. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/MANIFEST.in +0 -0
  28. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/README.md +0 -0
  29. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/setup.cfg +0 -0
  30. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/__init__.py +0 -0
  31. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/__init__.py +0 -0
  32. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/derivations/__init__.py +0 -0
  33. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/derivations/base.py +0 -0
  34. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/derivations/function_derivation.py +0 -0
  35. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/derivations/sql_derivation.py +0 -0
  36. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/engine.py +0 -0
  37. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/functions/__init__.py +0 -0
  38. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/functions/get_bmi.py +0 -0
  39. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/loaders/__init__.py +0 -0
  40. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/loaders/sdtm_loader.py +0 -0
  41. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/tests/test_engine.py +0 -0
  42. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/utils/__init__.py +0 -0
  43. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_derivation/utils/logger.py +0 -0
  44. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/__init__.py +0 -0
  45. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/adam_spec.py +0 -0
  46. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/merge_yaml.py +0 -0
  47. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/schema_validator.py +0 -0
  48. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/__init__.py +0 -0
  49. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/organization/adsl_common.yaml +0 -0
  50. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/project/adsl_project.yaml +0 -0
  51. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/schema.yaml +0 -0
  52. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study1/adam_study1.yaml +0 -0
  53. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study1/adsl_study1.yaml +0 -0
  54. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study1/final_adsl_study1.yaml +0 -0
  55. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study2/adam_study2.yaml +0 -0
  56. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study2/adsl_study2.yaml +0 -0
  57. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/data/scenarios/study2/final_adsl_study2.yaml +0 -0
  58. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/test_adam_spec.py +0 -0
  59. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/test_merge_yaml.py +0 -0
  60. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_spec/tests/test_schema_validator.py +0 -0
  61. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_validation/__init__.py +0 -0
  62. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/adam_validation/data_validator.py +0 -0
  63. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/adam/schema.yaml +0 -0
  64. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/cli.py +0 -0
  65. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/functions/extract_value.py +0 -0
  66. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/functions/get_bmi.py +0 -0
  67. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/__init__.py +0 -0
  68. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/__init__.py +0 -0
  69. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/classes/__init__.py +0 -0
  70. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/classes/events.py +0 -0
  71. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/classes/interventions.py +0 -0
  72. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/classes/special_purpose.py +0 -0
  73. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/config.py +0 -0
  74. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/tests/test_boundary_standardization.py +0 -0
  75. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/tests/test_config.py +0 -0
  76. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/tests/test_findings.py +0 -0
  77. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/tests/test_metadata_extractor.py +0 -0
  78. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/tests/test_validate.py +0 -0
  79. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/engine/validate.py +0 -0
  80. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/loader/__init__.py +0 -0
  81. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/loader/load.py +0 -0
  82. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/loader/tests/__init__.py +0 -0
  83. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/loader/tests/test_load.py +0 -0
  84. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/sdtm/odm_parser.py +0 -0
  85. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder/tlf/__init__.py +0 -0
  86. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder.egg-info/dependency_links.txt +0 -0
  87. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder.egg-info/entry_points.txt +0 -0
  88. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder.egg-info/requires.txt +0 -0
  89. {cdiscbuilder-1.2.3 → cdiscbuilder-1.3.1}/src/cdiscbuilder.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cdiscbuilder
3
- Version: 1.2.3
3
+ Version: 1.3.1
4
4
  Summary: A package to convert ODM XML to SDTM/ADaM Datasets
5
5
  Author-email: Ming-Chun Chen <hellomingchun@gmail.com>
6
6
  Requires-Python: >=3.9
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "cdiscbuilder"
7
- version = "1.2.3"
7
+ version = "1.3.1"
8
8
  description = "A package to convert ODM XML to SDTM/ADaM Datasets"
9
9
  readme = "README.md"
10
10
  authors = [{name = "Ming-Chun Chen", email = "hellomingchun@gmail.com"}]
@@ -20,7 +20,7 @@ requires-python = ">=3.9"
20
20
  cdisc-sdtm = "cdiscbuilder.cli:main"
21
21
 
22
22
  [tool.setuptools.package-data]
23
- cdiscbuilder = ["adam/schema.yaml", "adam/adam_spec/tests/data/scenarios/**/*.yaml"]
23
+ cdiscbuilder = ["adam/schema.yaml", "adam/adam_spec/tests/data/scenarios/**/*.yaml", "sdtm/specs/schema.yaml"]
24
24
 
25
25
  [tool.setuptools.packages.find]
26
26
  where = ["src"]
@@ -1,4 +1,4 @@
1
- __version__ = "1.2.3"
1
+ __version__ = "1.3.1"
2
2
 
3
3
  from . import adam as adam
4
4
  from . import tlf as tlf
@@ -6,6 +6,10 @@ Maps short function names to full module paths for cleaner specifications.
6
6
  from .get_bmi import get_bmi
7
7
  from .calculate_study_day import calculate_study_day
8
8
  from .extract_value import extract_value
9
+ from .get_dose_dates import get_first_dose_date, get_last_dose_date
10
+ from .coalesce import coalesce
11
+ from .get_last_participation_date import get_last_participation_date
12
+ from .get_earliest_informed_consent_date import get_earliest_informed_consent_date
9
13
 
10
14
  # Function registry mapping short names to full paths
11
15
  FUNCTION_REGISTRY = {
@@ -15,6 +19,11 @@ FUNCTION_REGISTRY = {
15
19
  # SDTM functions
16
20
  "calculate_study_day": "cdiscbuilder.functions.calculate_study_day.calculate_study_day",
17
21
  "extract_value": "cdiscbuilder.functions.extract_value.extract_value",
22
+ "get_first_dose_date": "cdiscbuilder.functions.get_dose_dates.get_first_dose_date",
23
+ "get_last_dose_date": "cdiscbuilder.functions.get_dose_dates.get_last_dose_date",
24
+ "coalesce": "cdiscbuilder.functions.coalesce.coalesce",
25
+ "get_last_participation_date": "cdiscbuilder.functions.get_last_participation_date.get_last_participation_date",
26
+ "get_earliest_informed_consent_date": "cdiscbuilder.functions.get_earliest_informed_consent_date.get_earliest_informed_consent_date",
18
27
  }
19
28
 
20
29
 
@@ -61,6 +70,11 @@ __all__ = [
61
70
  "get_bmi",
62
71
  "calculate_study_day",
63
72
  "extract_value",
73
+ "get_first_dose_date",
74
+ "get_last_dose_date",
75
+ "coalesce",
76
+ "get_last_participation_date",
77
+ "get_earliest_informed_consent_date",
64
78
  "get_function_path",
65
79
  "list_available_functions",
66
80
  "register_function",
@@ -0,0 +1,33 @@
1
+ import pandas as pd
2
+
3
+
4
+ def calculate_study_day(date_series, rfstdtc_series):
5
+ """
6
+ Calculates SDTM Study Day (--DY).
7
+ SDTM Rule:
8
+ - If date is on or after RFSTDTC: (date - RFSTDTC) + 1
9
+ - If date is before RFSTDTC: (date - RFSTDTC)
10
+ - There is no Day 0.
11
+ - Partial dates (missing day or month) cannot be used to calculate study day.
12
+ """
13
+ # Filter out partial dates (ISO 8601 YYYY-MM-DD is at least 10 chars)
14
+ # This prevents pd.to_datetime from assuming the 1st of the month for 'YYYY-MM'
15
+ valid_d = date_series.where(date_series.astype(str).str.len() >= 10)
16
+ valid_rf = rfstdtc_series.where(rfstdtc_series.astype(str).str.len() >= 10)
17
+
18
+ # Convert to datetime (utc=True prevents tz-naive/tz-aware subtraction issues if times exist)
19
+ d = pd.to_datetime(valid_d, errors="coerce", utc=True)
20
+ rf = pd.to_datetime(valid_rf, errors="coerce", utc=True)
21
+
22
+ # Normalize to midnight to remove time components safely
23
+ d = d.dt.normalize()
24
+ rf = rf.dt.normalize()
25
+
26
+ # Calculate difference in days
27
+ diff = (d - rf).dt.days
28
+
29
+ # Apply SDTM rules (no Day 0)
30
+ dy = diff.apply(lambda x: (x + 1) if pd.notnull(x) and x >= 0 else x)
31
+
32
+ # Cast to Int64 (nullable integer)
33
+ return dy.astype("Int64")
@@ -0,0 +1,22 @@
1
+ import pandas as pd
2
+
3
+ def coalesce(*series_list, **kwargs):
4
+ """
5
+ Returns the first non-null value across a list of Pandas Series.
6
+ Similar to SQL COALESCE.
7
+ """
8
+ if not series_list:
9
+ raise ValueError("coalesce requires at least one argument")
10
+
11
+ # Start with the first series
12
+ result = series_list[0].copy()
13
+
14
+ # Iterate through remaining series and fill missing values
15
+ for s in series_list[1:]:
16
+ # Ensure 's' is a Series (in case a literal was passed somehow, though general.py doesn't currently do that)
17
+ if not isinstance(s, pd.Series):
18
+ s = pd.Series([s] * len(result), index=result.index)
19
+
20
+ result = result.combine_first(s)
21
+
22
+ return result
@@ -0,0 +1,86 @@
1
+ import pandas as pd
2
+
3
+ def _get_dose_date(usubjid_series, built_domains, mode="first", **kwargs):
4
+ """
5
+ Core logic to find the first or last dose date from EX or EC domains
6
+ where the dose > 0.
7
+ """
8
+ if not built_domains:
9
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
10
+
11
+ ex_df = built_domains.get("EX")
12
+ ec_df = built_domains.get("EC")
13
+
14
+ # Get custom dose column names if provided in kwargs
15
+ ex_dose_col = kwargs.get("ex_dose_col", "EXDOSE")
16
+ ec_dose_col = kwargs.get("ec_dose_col", "ECDOSE")
17
+
18
+ dfs = []
19
+
20
+ # Process EX
21
+ if ex_df is not None and not ex_df.empty:
22
+ if "USUBJID" in ex_df.columns:
23
+ date_col = "EXSTDTC" if mode == "first" else "EXENDTC"
24
+ if date_col in ex_df.columns:
25
+ valid = ex_df
26
+ if ex_dose_col in ex_df.columns:
27
+ dose = pd.to_numeric(ex_df[ex_dose_col], errors='coerce')
28
+ valid = ex_df[dose > 0]
29
+ dfs.append(valid[["USUBJID", date_col]].rename(columns={date_col: "DATE"}))
30
+
31
+ # Process EC
32
+ if ec_df is not None and not ec_df.empty:
33
+ if "USUBJID" in ec_df.columns:
34
+ date_col = "ECSTDTC" if mode == "first" else "ECENDTC"
35
+ if date_col in ec_df.columns:
36
+ valid = ec_df
37
+ if ec_dose_col in ec_df.columns:
38
+ dose = pd.to_numeric(ec_df[ec_dose_col], errors='coerce')
39
+ valid = ec_df[dose > 0]
40
+ dfs.append(valid[["USUBJID", date_col]].rename(columns={date_col: "DATE"}))
41
+
42
+ if not dfs:
43
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
44
+
45
+ combined = pd.concat(dfs, ignore_index=True)
46
+ combined = combined.dropna(subset=["DATE"])
47
+
48
+ if combined.empty:
49
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
50
+
51
+ # Convert to proper datetime objects for robust chronological sorting
52
+ # We do this to ensure "2026-05-01T08:00" and "2026-05-01" sort correctly as actual time
53
+ # rather than just alphabetical strings.
54
+ combined["DATETIME"] = pd.to_datetime(combined["DATE"], errors='coerce', utc=True)
55
+
56
+ # Drop rows that couldn't be parsed as dates (e.g. completely invalid garbage)
57
+ combined = combined.dropna(subset=["DATETIME"])
58
+
59
+ if combined.empty:
60
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
61
+
62
+ # Find the index of the min/max datetime per subject
63
+ if mode == "first":
64
+ idx = combined.groupby("USUBJID")["DATETIME"].idxmin()
65
+ else:
66
+ idx = combined.groupby("USUBJID")["DATETIME"].idxmax()
67
+
68
+ # Extract the original string format (with the 'T') using the found indices
69
+ res = combined.loc[idx].set_index("USUBJID")["DATE"]
70
+
71
+ # Map back to the exact input series sequence
72
+ return usubjid_series.map(res)
73
+
74
+ def get_first_dose_date(usubjid_series, built_domains=None, **kwargs):
75
+ """
76
+ Calculates RFXSTDTC (First Study Treatment Date).
77
+ Extracts minimum start date from EX or EC domains where dose > 0.
78
+ """
79
+ return _get_dose_date(usubjid_series, built_domains, mode="first", **kwargs)
80
+
81
+ def get_last_dose_date(usubjid_series, built_domains=None, **kwargs):
82
+ """
83
+ Calculates RFXENDTC (Last Study Treatment Date).
84
+ Extracts maximum end date from EX or EC domains where dose > 0.
85
+ """
86
+ return _get_dose_date(usubjid_series, built_domains, mode="last", **kwargs)
@@ -0,0 +1,93 @@
1
+ import pandas as pd
2
+
3
+ def get_earliest_informed_consent_date(usubjid_series, built_domains=None, df_long=None, **kwargs):
4
+ """
5
+ Calculates RFICDTC (Date/Time of Informed Consent).
6
+ Can scan the built DS domain OR raw df_long (to avoid circular dependencies).
7
+
8
+ kwargs:
9
+ raw_mode: bool. If True, searches df_long instead of built_domains (default: False)
10
+ raw_formoid: If raw_mode=True, the FormOID to filter on (optional)
11
+ term_col: The column (or ItemOID) to check for consent terms (default: 'DSDECOD' or 'DSTERM')
12
+ consent_terms: List of terms indicating consent (default: ['INFORMED CONSENT OBTAINED'])
13
+ date_col: The date column (or ItemOID) to extract (default: 'DSSTDTC' or 'DSSTDAT')
14
+ """
15
+ raw_mode = kwargs.get("raw_mode", False)
16
+ consent_terms = kwargs.get("consent_terms", ["INFORMED CONSENT OBTAINED"])
17
+ upper_terms = [t.upper() for t in consent_terms]
18
+
19
+ if raw_mode:
20
+ if df_long is None or df_long.empty:
21
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
22
+
23
+ term_col = kwargs.get("term_col", "DSTERM")
24
+ date_col = kwargs.get("date_col", "DSSTDAT")
25
+ formoid = kwargs.get("raw_formoid")
26
+
27
+ subset = df_long
28
+ if formoid:
29
+ subset = df_long[df_long["FormOID"] == formoid]
30
+
31
+ # We need to find the SubjectKey where term_col has a consent term
32
+ term_mask = (subset["ItemOID"] == term_col) & (subset["Value"].astype(str).str.upper().isin(upper_terms))
33
+ consent_subjects = subset[term_mask]["SubjectKey"].unique()
34
+
35
+ # Now find the date_col for those subjects
36
+ # Wait, if multiple repeats exist, we should match on ItemGroupRepeatKey too
37
+ # To keep it robust, let's just get all date_cols for those subjects on that form
38
+ # and take the minimum date.
39
+ date_mask = (subset["ItemOID"] == date_col) & (subset["SubjectKey"].isin(consent_subjects))
40
+ valid = subset[date_mask].copy()
41
+
42
+ if valid.empty:
43
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
44
+
45
+ valid["DATETIME"] = pd.to_datetime(valid["Value"], errors='coerce', utc=True)
46
+ valid = valid.dropna(subset=["DATETIME"])
47
+
48
+ if valid.empty:
49
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
50
+
51
+ idx = valid.groupby("SubjectKey")["DATETIME"].idxmin()
52
+ # Map raw SubjectKey to our usubjid_series (assuming USUBJID ends with SubjectKey)
53
+ # Note: In CDISC, USUBJID = STUDYID-SubjectKey.
54
+ # But this function receives usubjid_series, so we might need to map via raw SubjectKey.
55
+ # Instead, let's just match SubjectKey directly.
56
+ res = valid.loc[idx].set_index("SubjectKey")["Value"]
57
+
58
+ # Because usubjid_series is standard USUBJID (e.g. 'STUDY-001'), we need to strip study to match
59
+ # Let's extract the subject key from the end
60
+ subject_keys_from_usubjid = usubjid_series.astype(str).str.split("-").str[-1]
61
+ mapped = subject_keys_from_usubjid.map(res)
62
+ return mapped
63
+
64
+ # --- BUILT DOMAIN MODE ---
65
+ if not built_domains:
66
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
67
+
68
+ ds = built_domains.get("DS")
69
+ if ds is None or ds.empty or "USUBJID" not in ds.columns:
70
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
71
+
72
+ term_col = kwargs.get("term_col", "DSDECOD")
73
+ date_col = kwargs.get("date_col", "DSSTDTC")
74
+
75
+ if term_col not in ds.columns or date_col not in ds.columns:
76
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
77
+
78
+ mask = ds[term_col].astype(str).str.upper().isin(upper_terms)
79
+ valid = ds[mask].copy()
80
+
81
+ if valid.empty:
82
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
83
+
84
+ valid["DATETIME"] = pd.to_datetime(valid[date_col], errors='coerce', utc=True)
85
+ valid = valid.dropna(subset=["DATETIME"])
86
+
87
+ if valid.empty:
88
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
89
+
90
+ idx = valid.groupby("USUBJID")["DATETIME"].idxmin()
91
+ res = valid.loc[idx].set_index("USUBJID")[date_col]
92
+
93
+ return usubjid_series.map(res)
@@ -0,0 +1,55 @@
1
+ import pandas as pd
2
+
3
+ def get_last_participation_date(usubjid_series, built_domains=None, **kwargs):
4
+ """
5
+ Calculates RFENDTC (Reference End Date).
6
+ Finds the absolute maximum date across specified domains and date columns
7
+ for each subject.
8
+
9
+ kwargs:
10
+ domain_dates: dictionary mapping domain names to a list of date columns to check.
11
+ Defaults to scanning DS, EX, AE, and SV.
12
+ """
13
+ if not built_domains:
14
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
15
+
16
+ # Default domains and columns to scan if not explicitly provided
17
+ domain_dates = kwargs.get("domain_dates", {
18
+ "DS": ["DSSTDTC"],
19
+ "EX": ["EXSTDTC", "EXENDTC"],
20
+ "EC": ["ECSTDTC", "ECENDTC"],
21
+ "AE": ["AESTDTC", "AEENDTC"],
22
+ "SV": ["SVSTDTC", "SVENDTC"]
23
+ })
24
+
25
+ dfs = []
26
+
27
+ for domain, cols in domain_dates.items():
28
+ df = built_domains.get(domain)
29
+ if df is not None and not df.empty and "USUBJID" in df.columns:
30
+ # For each specified date column, melt it down so we can find the global max
31
+ for col in cols:
32
+ if col in df.columns:
33
+ valid = df[["USUBJID", col]].dropna(subset=[col]).rename(columns={col: "DATE"})
34
+ dfs.append(valid)
35
+
36
+ if not dfs:
37
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
38
+
39
+ combined = pd.concat(dfs, ignore_index=True)
40
+
41
+ # Convert to datetime for mathematical maximum comparison
42
+ combined["DATETIME"] = pd.to_datetime(combined["DATE"], errors='coerce', utc=True)
43
+ combined = combined.dropna(subset=["DATETIME"])
44
+
45
+ if combined.empty:
46
+ return pd.Series([None] * len(usubjid_series), index=usubjid_series.index)
47
+
48
+ # Find the index of the max datetime per subject
49
+ idx = combined.groupby("USUBJID")["DATETIME"].idxmax()
50
+
51
+ # Extract the exact string (preserving any 'T' time component)
52
+ res = combined.loc[idx].set_index("USUBJID")["DATE"]
53
+
54
+ # Map back to the input series sequence
55
+ return usubjid_series.map(res)
@@ -7,7 +7,7 @@ class BaseProcessor(ABC):
7
7
  self.class_name = "GENERAL"
8
8
 
9
9
  @abstractmethod
10
- def process(self, domain_name, sources, df_long, default_keys):
10
+ def process(self, domain_name, sources, df_long, default_keys, built_domains=None):
11
11
  """Main entry point for processing a domain."""
12
12
  pass
13
13
 
@@ -55,6 +55,9 @@ class BaseProcessor(ABC):
55
55
  series = pd.to_datetime(
56
56
  series, errors="coerce", format="mixed"
57
57
  ).dt.strftime("%Y-%m-%d")
58
+ elif target_type == "iso8601":
59
+ from cdiscbuilder.sdtm.engine.utils.iso8601 import parse_iso8601
60
+ series = series.apply(parse_iso8601)
58
61
  elif target_type == "str":
59
62
  series = series.astype(str).replace("nan", None)
60
63
  except Exception as e:
@@ -10,7 +10,7 @@ class FindingsProcessor:
10
10
  def __init__(self):
11
11
  self.class_name = "FINDINGS"
12
12
 
13
- def process(self, domain_name, sources, df_long, default_keys, custom_to_standard=None):
13
+ def process(self, domain_name, sources, df_long, default_keys, custom_to_standard=None, built_domains=None):
14
14
  domain_dfs = []
15
15
 
16
16
  for settings in sources:
@@ -59,7 +59,54 @@ class GeneralProcessor:
59
59
 
60
60
  return expanded_list
61
61
 
62
- def process(self, domain_name, sources, df_long, default_keys, custom_to_standard=None):
62
+ def _resolve_cross_domain(self, source_expr, col_config, final_df, pivoted, built_domains):
63
+ """
64
+ Resolve a cross-domain reference (e.g., 'DM.RFSTDTC') by merging from built_domains.
65
+ Returns (series, resolved) where resolved is True if successfully resolved.
66
+ """
67
+ if not (isinstance(source_expr, str) and "." in source_expr):
68
+ return None, False
69
+
70
+ ref_domain, ref_col = source_expr.split(".", 1)
71
+ if not (ref_domain.isupper() and 2 <= len(ref_domain) <= 4):
72
+ return None, False
73
+
74
+ if not built_domains or ref_domain not in built_domains:
75
+ print(f"Warning: Referenced domain '{ref_domain}' not available for cross-domain ref '{source_expr}'")
76
+ return pd.Series([None] * len(pivoted)), True
77
+
78
+ ref_df = built_domains[ref_domain]
79
+ if ref_col not in ref_df.columns:
80
+ print(f"Warning: Column '{ref_col}' not found in domain '{ref_domain}' for cross-domain ref '{source_expr}'")
81
+ return pd.Series([None] * len(pivoted)), True
82
+
83
+ # Determine merge key
84
+ merge_key = col_config.get("merge_on", ["USUBJID"]) if isinstance(col_config, dict) else ["USUBJID"]
85
+ if isinstance(merge_key, str):
86
+ merge_key = [merge_key]
87
+
88
+ # Validate merge keys exist in both DataFrames
89
+ valid_keys = [k for k in merge_key if k in final_df.columns and k in ref_df.columns]
90
+
91
+ if not valid_keys:
92
+ print(f"Warning: Merge keys {merge_key} missing for cross-domain ref '{source_expr}'")
93
+ return pd.Series([None] * len(pivoted)), True
94
+
95
+ # Get unique ref values to avoid duplicating rows
96
+ ref_cols_needed = valid_keys + [ref_col]
97
+ ref_subset = ref_df[ref_cols_needed].drop_duplicates(subset=valid_keys)
98
+
99
+ # Merge into final_df temporarily
100
+ merged = final_df[valid_keys].merge(ref_subset, on=valid_keys, how="left")
101
+ series = merged[ref_col]
102
+ series.index = final_df.index # Re-align index
103
+
104
+ match_count = series.notna().sum()
105
+ print(f" ↳ Resolved cross-domain ref: {source_expr} ({match_count} matches via {valid_keys})")
106
+
107
+ return series, True
108
+
109
+ def process(self, domain_name, sources, df_long, default_keys, custom_to_standard=None, built_domains=None):
63
110
  domain_dfs = []
64
111
 
65
112
  # Pre-expand sources if they contain lists
@@ -214,35 +261,27 @@ class GeneralProcessor:
214
261
  elif isinstance(col_config, dict) and col_config.get("function"):
215
262
  func_name = col_config.get("function")
216
263
  args = col_config.get("args", [])
264
+ kwargs = col_config.get("kwargs", {})
217
265
 
218
266
  # Resolve Args
219
267
  arg_series = []
220
268
  for arg in args:
221
- # Support cross-domain lookup? For now support local columns in final_df or pivoted
269
+ # Support cross-domain lookup
222
270
  if arg in final_df.columns:
223
271
  arg_series.append(final_df[arg])
224
272
  elif arg in pivoted.columns:
225
273
  arg_series.append(pivoted[arg])
226
274
  else:
227
- # Try loading from DM if it looks like DM.RFSTDTC
275
+ # Try cross-domain resolution
228
276
  if isinstance(arg, str) and "." in arg:
229
- dname, vname = arg.split(".")
230
- # Only DM supported for now as reference
231
- if dname == "DM":
232
-
233
- # Locate output_dir? (We don't have output_dir here easily)
234
- # For now, let's assume RFSTDTC was merged into DM block or AE block already
235
- # Or we pass it in.
236
- # For the demo, let's assume RFSTDTC is in the dataset or handled as a string
237
- print(
238
- f"Warning: Cross-domain arg {arg} resolution not fully implemented in GeneralProcessor"
239
- )
240
- arg_series.append(pd.Series([None] * len(pivoted)))
277
+ cross_series, resolved = self._resolve_cross_domain(
278
+ arg, {}, final_df, pivoted, built_domains
279
+ )
280
+ if resolved and cross_series is not None:
281
+ arg_series.append(cross_series)
241
282
  else:
242
283
  arg_series.append(pd.Series([None] * len(pivoted)))
243
284
  else:
244
- # Not found locally, and not a string with a dot. Treat as a literal or unresolved?
245
- # Wait, ADaM's args might be string constants. Let's just append None for now as before.
246
285
  arg_series.append(pd.Series([None] * len(pivoted)))
247
286
 
248
287
  import importlib
@@ -272,8 +311,18 @@ class GeneralProcessor:
272
311
  raise ImportError(f"Function {fname} not found")
273
312
 
274
313
  try:
314
+ import inspect
275
315
  func = _load_function(func_name)
276
- series = func(*arg_series)
316
+ sig = inspect.signature(func)
317
+
318
+ func_kwargs = kwargs.copy()
319
+ if "built_domains" in sig.parameters:
320
+ func_kwargs["built_domains"] = built_domains
321
+ if "df_long" in sig.parameters:
322
+ func_kwargs["df_long"] = df_long
323
+
324
+ series = func(*arg_series, **func_kwargs)
325
+
277
326
  if not isinstance(series, pd.Series):
278
327
  series = pd.Series(series)
279
328
  except Exception as e:
@@ -282,11 +331,49 @@ class GeneralProcessor:
282
331
  )
283
332
  series = pd.Series([None] * len(pivoted))
284
333
 
334
+ elif isinstance(col_config, dict) and col_config.get("conditions"):
335
+ import numpy as np
336
+ conditions_config = col_config.get("conditions")
337
+
338
+ # Create an evaluation context combining raw domain data and current final_df
339
+ eval_df = pivoted.copy()
340
+ for c in final_df.columns:
341
+ eval_df[c] = final_df[c]
342
+
343
+ cond_list = []
344
+ choice_list = []
345
+
346
+ for cond in conditions_config:
347
+ expr = cond.get("if")
348
+ then_val = cond.get("then")
349
+ try:
350
+ # Evaluate condition string
351
+ mask = eval_df.eval(expr)
352
+ cond_list.append(mask)
353
+ choice_list.append(then_val)
354
+ except Exception as e:
355
+ print(f"Warning: Failed to evaluate condition '{expr}': {e}")
356
+ cond_list.append(pd.Series(False, index=eval_df.index))
357
+ choice_list.append(then_val)
358
+
359
+ default_val = col_config.get("default", None)
360
+ if cond_list:
361
+ # np.select evaluates conditions in order
362
+ series = pd.Series(np.select(cond_list, choice_list, default=default_val), index=eval_df.index)
363
+ else:
364
+ series = pd.Series([default_val] * len(eval_df), index=eval_df.index)
365
+
285
366
  elif literal_expr is not None:
286
367
  # Explicit literal value
287
368
  series = pd.Series([literal_expr] * len(pivoted))
288
369
  elif source_expr:
289
- if source_expr in pivoted.columns:
370
+ # Check for cross-domain reference first
371
+ cross_series, resolved = self._resolve_cross_domain(
372
+ source_expr, col_config, final_df, pivoted, built_domains
373
+ )
374
+ if resolved:
375
+ series = cross_series
376
+ elif source_expr in pivoted.columns:
290
377
  series = pivoted[source_expr].copy()
291
378
  elif source_expr in final_df.columns:
292
379
  series = final_df[source_expr].copy()