csrlite 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
csrlite/common/plan.py CHANGED
@@ -6,17 +6,19 @@ using YAML plans with template inheritance and keyword resolution.
6
6
  """
7
7
 
8
8
  import itertools
9
- from dataclasses import dataclass, field, fields
9
+ import logging
10
10
  from pathlib import Path
11
11
  from typing import Any, Dict, List, Optional, cast
12
12
 
13
13
  import polars as pl
14
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
14
15
 
15
16
  from .yaml_loader import YamlInheritanceLoader
16
17
 
18
+ logger: logging.Logger = logging.getLogger(__name__)
17
19
 
18
- @dataclass
19
- class Keyword:
20
+
21
+ class Keyword(BaseModel):
20
22
  """Base keyword definition."""
21
23
 
22
24
  name: str
@@ -24,54 +26,60 @@ class Keyword:
24
26
  description: Optional[str] = None
25
27
 
26
28
 
27
- @dataclass
28
29
  class Population(Keyword):
29
30
  """Population definition with filter."""
30
31
 
31
32
  filter: str = ""
32
33
 
33
34
 
34
- @dataclass
35
35
  class Observation(Keyword):
36
36
  """Observation/timepoint definition with filter."""
37
37
 
38
38
  filter: str = ""
39
39
 
40
40
 
41
- @dataclass
42
41
  class Parameter(Keyword):
43
- """Parameter definition with filter.
44
-
45
- The terms field supports dynamic title generation:
46
- - terms.before: "serious" → "Serious Adverse Events"
47
- - terms.after: "resulting in death" → "Adverse Events Resulting in Death"
48
- """
42
+ """Parameter definition with filter."""
49
43
 
50
44
  filter: str = ""
51
45
  terms: Optional[Dict[str, str]] = None
52
- indent: int = 0 # Indentation level for hierarchical display
46
+ indent: int = 0
53
47
 
54
48
 
55
- @dataclass
56
49
  class Group(Keyword):
57
50
  """Treatment group definition."""
58
51
 
59
52
  variable: str = ""
60
- level: List[str] = field(default_factory=list)
61
- group_label: List[str] = field(default_factory=list)
62
-
63
-
64
- @dataclass
65
- class DataSource:
53
+ level: List[str] = Field(default_factory=list)
54
+ group_label: List[str] = Field(default_factory=list)
55
+
56
+ # Allow label to be excluded if it conflicts or handled manually
57
+
58
+ # pyre-ignore[56]
59
+ @field_validator("group_label", mode="before")
60
+ @classmethod
61
+ def set_group_label(cls, v: Any, info: Any) -> Any:
62
+ # If group_label is missing, fallback to 'label' field if present in input data
63
+ # Note: Pydantic V2 validation context doesn't easily give access to other fields input
64
+ # unless using model_validator. But here we can rely on standard defaulting or
65
+ # fix it at the registry level like before.
66
+ # Actually, let's keep it simple: if not provided, it's empty.
67
+ # The original code did:
68
+ # if "group_label" not in item_data: item_data["group_label"] = item_data.get("label", [])
69
+ return v or []
70
+
71
+
72
+ class DataSource(BaseModel):
66
73
  """Data source definition."""
67
74
 
68
75
  name: str
69
76
  path: str
70
- dataframe: Optional[pl.DataFrame] = None
77
+ dataframe: Optional[pl.DataFrame] = Field(default=None, exclude=True)
78
+
79
+ model_config = ConfigDict(arbitrary_types_allowed=True)
71
80
 
72
81
 
73
- @dataclass
74
- class AnalysisPlan:
82
+ class AnalysisPlan(BaseModel):
75
83
  """Individual analysis plan specification."""
76
84
 
77
85
  analysis: str
@@ -91,37 +99,48 @@ class AnalysisPlan:
91
99
  return "_".join(parts)
92
100
 
93
101
 
94
- class KeywordRegistry:
102
+ class KeywordRegistry(BaseModel):
95
103
  """Registry for managing keywords."""
96
104
 
97
- def __init__(self) -> None:
98
- self.populations: Dict[str, Population] = {}
99
- self.observations: Dict[str, Observation] = {}
100
- self.parameters: Dict[str, Parameter] = {}
101
- self.groups: Dict[str, Group] = {}
102
- self.data_sources: Dict[str, DataSource] = {}
105
+ populations: Dict[str, Population] = Field(default_factory=dict)
106
+ observations: Dict[str, Observation] = Field(default_factory=dict)
107
+ parameters: Dict[str, Parameter] = Field(default_factory=dict)
108
+ groups: Dict[str, Group] = Field(default_factory=dict)
109
+ data_sources: Dict[str, DataSource] = Field(default_factory=dict)
103
110
 
104
111
  def load_from_dict(self, data: Dict[str, Any]) -> None:
105
112
  """Load keywords from a dictionary."""
106
- self._load_keyword_type(data, "population", Population, self.populations)
107
- self._load_keyword_type(data, "observation", Observation, self.observations)
108
- self._load_keyword_type(data, "parameter", Parameter, self.parameters)
109
- self._load_keyword_type(data, "group", Group, self.groups)
110
- self._load_keyword_type(data, "data", DataSource, self.data_sources)
111
-
112
- def _load_keyword_type(
113
- self, data: Dict[str, Any], key: str, keyword_class: Any, target_dict: Dict[str, Any]
114
- ) -> None:
115
- """Generic method to load a type of keyword."""
116
- for item_data in data.get(key, []):
117
- if keyword_class == Group and "group_label" not in item_data:
118
- item_data["group_label"] = item_data.get("label", [])
119
-
120
- expected_fields = {f.name for f in fields(keyword_class) if f.init}
121
- filtered_data = {k: v for k, v in item_data.items() if k in expected_fields}
122
-
123
- instance = keyword_class(**filtered_data)
124
- target_dict[instance.name] = instance
113
+ # We manually load so we can handle the dict-to-list-of-models transformation
114
+ # and the specific logic for defaults.
115
+
116
+ for item in data.get("population", []):
117
+ pop_item = Population(**item)
118
+ self.populations[pop_item.name] = pop_item
119
+
120
+ for item in data.get("observation", []):
121
+ obs_item = Observation(**item)
122
+ self.observations[obs_item.name] = obs_item
123
+
124
+ for item in data.get("parameter", []):
125
+ param_item = Parameter(**item)
126
+ self.parameters[param_item.name] = param_item
127
+
128
+ for item in data.get("group", []):
129
+ # Special handling for Group where 'label' might be a list (for group_label)
130
+ # but Keyword.label expects a string.
131
+ if "label" in item and isinstance(item["label"], list):
132
+ if "group_label" not in item:
133
+ item["group_label"] = item["label"]
134
+ # Remove label from item to avoid validation error on Keyword.label
135
+ # or set it to a joined string if a label is really needed
136
+ del item["label"]
137
+
138
+ group_item = Group(**item)
139
+ self.groups[group_item.name] = group_item
140
+
141
+ for item in data.get("data", []):
142
+ ds_item = DataSource(**item)
143
+ self.data_sources[ds_item.name] = ds_item
125
144
 
126
145
  def get_population(self, name: str) -> Optional[Population]:
127
146
  return self.populations.get(name)
@@ -228,11 +247,10 @@ class StudyPlan:
228
247
  df = pl.read_parquet(path)
229
248
  self.datasets[name] = df
230
249
  data_source.dataframe = df
231
- print(f"Successfully loaded dataset '{name}' from '{path}'")
250
+ logger.info(f"Successfully loaded dataset '{name}' from '{path}'")
232
251
  except Exception as e:
233
- print(
234
- f"Warning: Could not load dataset '{name}' from '{data_source.path}'. "
235
- f"Reason: {e}"
252
+ logger.warning(
253
+ f"Could not load dataset '{name}' from '{data_source.path}'. Reason: {e}"
236
254
  )
237
255
 
238
256
  def get_plan_df(self) -> pl.DataFrame:
@@ -306,34 +324,28 @@ class StudyPlan:
306
324
 
307
325
  def print(self) -> None:
308
326
  """Print comprehensive study plan information using Polars DataFrames."""
309
- print("ADaM Metadata:")
327
+ logger.info("ADaM Metadata:")
310
328
 
311
329
  if (df := self.get_dataset_df()) is not None:
312
- print("\nData Sources:")
313
- print(df)
330
+ logger.info(f"\nData Sources:\n{df}")
314
331
 
315
332
  if (df := self.get_population_df()) is not None:
316
- print("\nAnalysis Population Type:")
317
- print(df)
333
+ logger.info(f"\nAnalysis Population Type:\n{df}")
318
334
 
319
335
  if (df := self.get_observation_df()) is not None:
320
- print("\nAnalysis Observation Type:")
321
- print(df)
336
+ logger.info(f"\nAnalysis Observation Type:\n{df}")
322
337
 
323
338
  if (df := self.get_parameter_df()) is not None:
324
- print("\nAnalysis Parameter Type:")
325
- print(df)
339
+ logger.info(f"\nAnalysis Parameter Type:\n{df}")
326
340
 
327
341
  if (df := self.get_group_df()) is not None:
328
- print("\nAnalysis Groups:")
329
- print(df)
342
+ logger.info(f"\nAnalysis Groups:\n{df}")
330
343
 
331
344
  if (df := self.get_plan_df()) is not None:
332
- print("\nAnalysis Plans:")
333
- print(df)
345
+ logger.info(f"\nAnalysis Plans:\n{df}")
334
346
 
335
347
  def __str__(self) -> str:
336
- study_name = self.study_data.get("study", Dict[str, Any]()).get("name", "Unknown")
348
+ study_name = self.study_data.get("study", {}).get("name", "Unknown")
337
349
  condensed_plans = len(self.study_data.get("plans", []))
338
350
  individual_analyses = len(self.get_plan_df())
339
351
  return (
csrlite/common/rtf.py ADDED
@@ -0,0 +1,85 @@
1
+ # pyre-strict
2
+ from typing import Any
3
+
4
+ import polars as pl
5
+ from rtflite import RTFBody, RTFColumnHeader, RTFDocument, RTFFootnote, RTFPage, RTFSource, RTFTitle
6
+
7
+
8
+ def create_rtf_table_n_pct(
9
+ df: pl.DataFrame,
10
+ col_header_1: list[str],
11
+ col_header_2: list[str] | None,
12
+ col_widths: list[float] | None,
13
+ title: list[str] | str,
14
+ footnote: list[str] | str | None,
15
+ source: list[str] | str | None,
16
+ borders_2: bool = True,
17
+ orientation: str = "landscape",
18
+ ) -> RTFDocument:
19
+ """
20
+ Create a standardized RTF table document with 1 or 2 header rows.
21
+
22
+ Args:
23
+ df: Polars DataFrame containing the table data.
24
+ col_header_1: List of strings for the first header row.
25
+ col_header_2: Optional list of strings for the second header row.
26
+ col_widths: Optional list of relative column widths. Defaults to equal widths.
27
+ title: Title string or list of title strings.
28
+ footnote: Footnote string or list of footnote strings.
29
+ source: Source string or list of source strings.
30
+ borders_2: Whether to show borders for the second header row. Defaults to True.
31
+ orientation: Page orientation, "landscape" or "portrait". Defaults to "landscape".
32
+
33
+ Returns:
34
+ RTFDocument object.
35
+ """
36
+ n_cols = len(df.columns)
37
+
38
+ # Calculate column widths if None - simple default
39
+ if col_widths is None:
40
+ col_widths = [1.0] * n_cols
41
+
42
+ # Normalize metadata
43
+ title_list = [title] if isinstance(title, str) else title
44
+ footnote_list = [footnote] if isinstance(footnote, str) else (footnote or [])
45
+ source_list = [source] if isinstance(source, str) else (source or [])
46
+
47
+ headers = [
48
+ RTFColumnHeader(
49
+ text=col_header_1,
50
+ col_rel_width=col_widths,
51
+ text_justification=["l"] + ["c"] * (n_cols - 1),
52
+ )
53
+ ]
54
+
55
+ if col_header_2:
56
+ h2_kwargs = {
57
+ "text": col_header_2,
58
+ "col_rel_width": col_widths,
59
+ "text_justification": ["l"] + ["c"] * (n_cols - 1),
60
+ }
61
+ if borders_2:
62
+ h2_kwargs["border_left"] = ["single"]
63
+ h2_kwargs["border_top"] = [""]
64
+
65
+ headers.append(RTFColumnHeader(**h2_kwargs))
66
+
67
+ rtf_components: dict[str, Any] = {
68
+ "df": df,
69
+ "rtf_page": RTFPage(orientation=orientation),
70
+ "rtf_title": RTFTitle(text=title_list),
71
+ "rtf_column_header": headers,
72
+ "rtf_body": RTFBody(
73
+ col_rel_width=col_widths,
74
+ text_justification=["l"] + ["c"] * (n_cols - 1),
75
+ border_left=["single"] * n_cols,
76
+ ),
77
+ }
78
+
79
+ if footnote_list:
80
+ rtf_components["rtf_footnote"] = RTFFootnote(text=footnote_list)
81
+
82
+ if source_list:
83
+ rtf_components["rtf_source"] = RTFSource(text=source_list)
84
+
85
+ return RTFDocument(**rtf_components)
csrlite/common/utils.py CHANGED
@@ -4,11 +4,11 @@ import polars as pl
4
4
 
5
5
  def apply_common_filters(
6
6
  population: pl.DataFrame,
7
- observation: pl.DataFrame,
7
+ observation: pl.DataFrame | None,
8
8
  population_filter: str | None,
9
9
  observation_filter: str | None,
10
10
  parameter_filter: str | None = None,
11
- ) -> tuple[pl.DataFrame, pl.DataFrame]:
11
+ ) -> tuple[pl.DataFrame, pl.DataFrame | None]:
12
12
  """
13
13
  Apply standard population, observation, and parameter filters.
14
14
 
@@ -23,11 +23,11 @@ def apply_common_filters(
23
23
 
24
24
  # Apply observation filter
25
25
  observation_filtered = observation
26
- if observation_filter:
26
+ if observation_filter and observation_filtered is not None:
27
27
  observation_filtered = observation_filtered.filter(pl.sql_expr(observation_filter))
28
28
 
29
29
  # Apply parameter filter
30
- if parameter_filter:
30
+ if parameter_filter and observation_filtered is not None:
31
31
  observation_filtered = observation_filtered.filter(pl.sql_expr(parameter_filter))
32
32
 
33
33
  return population_filtered, observation_filtered