pyqicharts 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyqicharts/__init__.py ADDED
@@ -0,0 +1,51 @@
1
+ """pyqicharts: Quality Improvement and SPC charts for Python.
2
+
3
+ The package root re-exports the supported public API. Internal helper functions
4
+ remain inside their modules so future releases can evolve implementation
5
+ details without forcing users to change imports.
6
+ """
7
+ from .core import QicResult, qic
8
+ from .advanced import autocorrelation_diagnostics, funnel_table, interactive_signal_review, overdispersion_test, rare_event_limits, risk_adjusted_summary, seasonality_diagnostics
9
+ from .chart_selector import ChartSuggestion, suggest_chart
10
+ from .datasets import days_between_falls_with_harm, days_between_serious_incidents, get_sample_data_path, infections_between_events, risk_adjusted_infection_rates, risk_adjusted_readmissions, sample_healthcare_qi_data, sample_subgroup_measurements
11
+ from .education import chart_selection_tutorial, interpretation_mistakes, simulate_process, teaching_dataset
12
+ from .export import create_report_bundle, export_excel, export_png, export_powerpoint
13
+ from .interactive import qic_altair, qic_plotly, qic_widget
14
+ from .localization import TERMINOLOGY, get_terminology, localize_interpretation
15
+ from .nelson import nelson_rule_signals, shewhart_rule_signals
16
+ from .xmr_rules import XmrRuleConfig, xmr_signals
17
+ from .pareto import ParetoResult, pareto_chart, paretochart
18
+ from .powerbi import (
19
+ intervention_metadata_table,
20
+ kpi_table,
21
+ improvement_output_table,
22
+ improvement_warning_table,
23
+ interpretation_table,
24
+ phase_metadata_table,
25
+ powerbi_table,
26
+ signal_table,
27
+ special_cause_summary_table,
28
+ spc_summary_table,
29
+ target_metadata_table,
30
+ )
31
+ from .presets import PRESETS, get_preset, list_presets
32
+ from .reporting import create_qi_report_pack, create_report_pack_from_config, load_chart_config, run_chart_batch
33
+ from .rules import AnhoejResult, anhoej_rules
34
+ from .signals import SIGNAL_SCHEMA_VERSION, Signal, signals_to_frame
35
+ from .tables import CHART_ALIASES, VALID_CHARTS, qic_table, pareto_table
36
+ from .themes import Theme, get_theme, list_themes
37
+ from .validation import compare_to_expected, read_validation_csv, validation_manifest, validation_summary
38
+
39
+ __all__ = [
40
+ "AnhoejResult", "CHART_ALIASES", "ChartSuggestion", "XmrRuleConfig", "PRESETS", "ParetoResult", "QicResult", "TERMINOLOGY", "Theme", "VALID_CHARTS",
41
+ "anhoej_rules", "autocorrelation_diagnostics", "chart_selection_tutorial", "compare_to_expected", "create_report_bundle", "days_between_falls_with_harm",
42
+ "create_qi_report_pack", "create_report_pack_from_config",
43
+ "days_between_serious_incidents", "export_excel", "export_png",
44
+ "export_powerpoint", "funnel_table", "get_preset", "get_sample_data_path", "get_terminology", "get_theme", "interactive_signal_review", "interpretation_mistakes", "intervention_metadata_table", "kpi_table", "list_presets", "list_themes", "load_chart_config", "localize_interpretation", "nelson_rule_signals", "interpretation_table", "improvement_output_table", "improvement_warning_table", "overdispersion_test", "pareto_table",
45
+ "pareto_chart", "paretochart", "powerbi_table", "qic", "qic_table", "infections_between_events", "rare_event_limits", "risk_adjusted_summary", "run_chart_batch", "seasonality_diagnostics", "simulate_process", "teaching_dataset", "validation_manifest", "validation_summary", "xmr_signals",
46
+ "risk_adjusted_infection_rates", "risk_adjusted_readmissions",
47
+ "sample_healthcare_qi_data", "sample_subgroup_measurements", "shewhart_rule_signals",
48
+ "phase_metadata_table", "read_validation_csv", "signal_table", "signals_to_frame", "special_cause_summary_table", "spc_summary_table",
49
+ "qic_altair", "qic_plotly", "qic_widget", "suggest_chart", "target_metadata_table", "SIGNAL_SCHEMA_VERSION", "Signal",
50
+ ]
51
+ __version__ = "2.0.0"
pyqicharts/advanced.py ADDED
@@ -0,0 +1,150 @@
1
+ """Advanced exploratory helpers for SPC review workflows.
2
+
3
+ These functions are deliberately table-first. They help analysts inspect
4
+ autocorrelation, seasonality, overdispersion, risk adjustment and funnel-style
5
+ variation without changing the core ``qic()`` chart API.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import math
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+
15
+ def autocorrelation_diagnostics(values, lag: int = 1) -> pd.DataFrame:
16
+ """Return a simple lag autocorrelation diagnostic table."""
17
+
18
+ series = pd.Series(values, dtype="float64").dropna().reset_index(drop=True)
19
+ if lag < 1:
20
+ raise ValueError("lag must be at least 1.")
21
+ if len(series) <= lag:
22
+ raise ValueError("At least lag + 1 numeric observations are required for autocorrelation diagnostics.")
23
+ autocorrelation = float(series.autocorr(lag=lag))
24
+ return pd.DataFrame({"lag": [lag], "observations": [len(series)], "autocorrelation": [autocorrelation]})
25
+
26
+
27
+ def seasonality_diagnostics(data: pd.DataFrame, x: str, y: str, period: int) -> pd.DataFrame:
28
+ """Estimate average seasonal indices for a repeated period."""
29
+
30
+ if period < 2:
31
+ raise ValueError("period must be 2 or greater.")
32
+ if x not in data.columns or y not in data.columns:
33
+ raise ValueError("seasonality_diagnostics requires existing x and y columns.")
34
+ frame = data[[x, y]].copy()
35
+ frame[y] = pd.to_numeric(frame[y], errors="coerce")
36
+ frame = frame.dropna(subset=[y]).reset_index(drop=True)
37
+ if frame.empty:
38
+ raise ValueError("No numeric values are available for seasonality diagnostics.")
39
+ overall = frame[y].mean()
40
+ frame["season_position"] = frame.index % period
41
+ out = frame.groupby("season_position", as_index=False)[y].mean()
42
+ out["seasonal_index"] = out[y] / overall if overall else np.nan
43
+ return out.rename(columns={y: "seasonal_mean"})
44
+
45
+
46
+ def overdispersion_test(data: pd.DataFrame, y: str, denominator: str | None = None) -> pd.DataFrame:
47
+ """Return a lightweight dispersion diagnostic for count or rate data."""
48
+
49
+ if y not in data.columns:
50
+ raise ValueError(f"Column {y!r} was not found for overdispersion testing.")
51
+ values = pd.to_numeric(data[y], errors="coerce")
52
+ if denominator:
53
+ if denominator not in data.columns:
54
+ raise ValueError(f"Denominator column {denominator!r} was not found.")
55
+ denom = pd.to_numeric(data[denominator], errors="coerce")
56
+ values = values / denom.replace(0, np.nan)
57
+ values = values.dropna()
58
+ if len(values) < 2:
59
+ raise ValueError("At least two numeric observations are required for overdispersion testing.")
60
+ mean = float(values.mean())
61
+ variance = float(values.var(ddof=1))
62
+ ratio = variance / mean if mean else np.nan
63
+ return pd.DataFrame({"observations": [len(values)], "mean": [mean], "variance": [variance], "dispersion_ratio": [ratio]})
64
+
65
+
66
+ def funnel_table(data: pd.DataFrame, numerator: str, denominator: str, group: str | None = None, z: float = 3.0) -> pd.DataFrame:
67
+ """Create a reference-style funnel table for proportions."""
68
+
69
+ for column in [numerator, denominator] + ([group] if group else []):
70
+ if column not in data.columns:
71
+ raise ValueError(f"Column {column!r} was not found for funnel table generation.")
72
+ frame = data.copy()
73
+ frame[numerator] = pd.to_numeric(frame[numerator], errors="coerce")
74
+ frame[denominator] = pd.to_numeric(frame[denominator], errors="coerce")
75
+ frame = frame.dropna(subset=[numerator, denominator])
76
+ frame = frame[frame[denominator] > 0]
77
+ if frame.empty:
78
+ raise ValueError("No rows remain after requiring numeric numerator values and positive denominators.")
79
+ if group:
80
+ frame = frame.groupby(group, as_index=False)[[numerator, denominator]].sum()
81
+ centre = frame[numerator].sum() / frame[denominator].sum()
82
+ se = np.sqrt(centre * (1 - centre) / frame[denominator])
83
+ out = frame.copy()
84
+ out["rate"] = out[numerator] / out[denominator]
85
+ out["centre"] = centre
86
+ out["lcl"] = (centre - z * se).clip(lower=0)
87
+ out["ucl"] = (centre + z * se).clip(upper=1)
88
+ out["outside_limits"] = (out["rate"] < out["lcl"]) | (out["rate"] > out["ucl"])
89
+ return out.reset_index(drop=True)
90
+
91
+
92
+ def rare_event_limits(values, method: str = "geometric") -> pd.DataFrame:
93
+ """Return rare-event centre and approximate limits for G/T style intervals."""
94
+
95
+ series = pd.Series(values, dtype="float64").dropna()
96
+ if (series < 0).any():
97
+ raise ValueError("Rare-event intervals must be non-negative.")
98
+ if series.empty:
99
+ raise ValueError("At least one interval is required for rare-event limits.")
100
+ centre = float(series.mean())
101
+ if method not in {"geometric", "exponential"}:
102
+ raise ValueError("method must be 'geometric' or 'exponential'.")
103
+ lcl = 0.0
104
+ ucl = centre * 3.0 if method == "geometric" else centre * math.log(1 / 0.00135)
105
+ return pd.DataFrame({"method": [method], "centre": [centre], "lcl": [lcl], "ucl": [ucl]})
106
+
107
+
108
+ def risk_adjusted_summary(data: pd.DataFrame, observed: str, expected: str, denominator: str | None = None) -> pd.DataFrame:
109
+ """Summarise observed, expected and observed/expected values."""
110
+
111
+ for column in [observed, expected] + ([denominator] if denominator else []):
112
+ if column not in data.columns:
113
+ raise ValueError(f"Column {column!r} was not found for risk-adjusted summary.")
114
+ frame = data.copy()
115
+ frame[observed] = pd.to_numeric(frame[observed], errors="coerce")
116
+ frame[expected] = pd.to_numeric(frame[expected], errors="coerce")
117
+ frame = frame.dropna(subset=[observed, expected])
118
+ if frame.empty:
119
+ raise ValueError("No rows have usable observed and expected values.")
120
+ safe_expected = frame[expected].replace(0, np.nan)
121
+ result = {
122
+ "observed_total": float(frame[observed].sum()),
123
+ "expected_total": float(frame[expected].sum()),
124
+ "observed_expected_ratio": float(frame[observed].sum() / safe_expected.sum()) if safe_expected.sum() else np.nan,
125
+ "zero_expected_rows": int((frame[expected] == 0).sum()),
126
+ }
127
+ if denominator:
128
+ frame[denominator] = pd.to_numeric(frame[denominator], errors="coerce")
129
+ result["denominator_total"] = float(frame[denominator].sum())
130
+ return pd.DataFrame([result])
131
+
132
+
133
+ def interactive_signal_review(chart) -> pd.DataFrame:
134
+ """Return a compact table for reviewing detected signals interactively."""
135
+
136
+ table = chart.table.copy()
137
+ if "signal" not in table:
138
+ return pd.DataFrame(columns=[chart.x, "plot_value", "signal_rule", "suggested_review"])
139
+ signals = table[table["signal"]].copy()
140
+ if signals.empty:
141
+ return pd.DataFrame(columns=[chart.x, "plot_value", "signal_rule", "suggested_review"])
142
+ signals["suggested_review"] = signals.get("special_cause_type", "signal").astype(str).map(
143
+ {
144
+ "improvement": "Check whether the change is real, understood and sustainable.",
145
+ "concern": "Review context, data quality and potential causes promptly.",
146
+ "neutral": "Treat as a signal for learning before judging direction.",
147
+ }
148
+ ).fillna("Review the point and surrounding process context.")
149
+ columns = [chart.x, "plot_value", "signal_rule", "suggested_review"]
150
+ return signals[[column for column in columns if column in signals.columns]].reset_index(drop=True)
@@ -0,0 +1,55 @@
1
+ """Guided chart selection helpers.
2
+
3
+ The selector is intentionally rule-of-thumb based. It is meant to help users
4
+ choose a sensible starting chart, not to replace analyst judgement.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class ChartSuggestion:
13
+ """A chart recommendation with a plain-English explanation."""
14
+
15
+ chart: str
16
+ reason: str
17
+ example: str
18
+
19
+
20
+ def suggest_chart(
21
+ kind: str | None = None,
22
+ *,
23
+ numerator: str | None = None,
24
+ denominator: str | None = None,
25
+ subgroup: bool = False,
26
+ rare_event: bool = False,
27
+ time_between: bool = False,
28
+ expected: str | None = None,
29
+ ) -> ChartSuggestion:
30
+ """Suggest a chart type from common QI/SPC data shapes.
31
+
32
+ Parameters are deliberately lightweight so the helper works in scripts,
33
+ notebooks, Excel Companion workflows and documentation examples.
34
+ """
35
+
36
+ key = (kind or "").strip().lower().replace("-", "_").replace(" ", "_")
37
+ if expected and denominator:
38
+ return ChartSuggestion("p_prime", "Observed and expected values with a denominator usually indicate a risk-adjusted proportion.", "qic(data, x='week', y='observed', expected='expected', denominator='n', chart='p_prime')")
39
+ if expected:
40
+ return ChartSuggestion("u_prime", "Observed and expected values without a clear proportion denominator can be reviewed as an observed/expected rate.", "qic(data, x='week', y='observed', expected='expected', chart='u_prime')")
41
+ if rare_event:
42
+ return ChartSuggestion("g", "Counts between rare events are suited to a G chart.", "qic(data, x='event_number', y='cases_between_events', chart='g')")
43
+ if time_between:
44
+ return ChartSuggestion("t", "Time between events is suited to a T chart.", "qic(data, x='event_number', y='days_between_events', chart='t')")
45
+ if subgroup or key in {"subgroup", "xbar", "x_bar"}:
46
+ return ChartSuggestion("xbar", "Repeated measurements in subgroups are usually reviewed with Xbar/S charts.", "qic(data, x='subgroup', y='value', chart='xbar')")
47
+ if key in {"proportion", "percentage", "percent", "binary"} or (numerator and denominator and key not in {"rate", "count_per_unit"}):
48
+ return ChartSuggestion("p", "Events out of opportunities are usually a P chart when the result is a proportion.", "qic(data, x='week', y='events', denominator='opportunities', chart='p')")
49
+ if key in {"rate", "count_per_unit", "events_per_unit"}:
50
+ return ChartSuggestion("u", "Events per unit of opportunity are usually a U chart.", "qic(data, x='week', y='events', denominator='bed_days', chart='u')")
51
+ if key in {"count", "counts", "defects"}:
52
+ return ChartSuggestion("c", "Counts with a broadly constant area of opportunity are usually a C chart.", "qic(data, x='week', y='count', chart='c')")
53
+ if key in {"run", "median"}:
54
+ return ChartSuggestion("run", "Use a run chart for a simple time series when you do not need control limits.", "qic(data, x='week', y='value', chart='run')")
55
+ return ChartSuggestion("i", "A continuous measure over time is usually a good starting point for an Individuals chart.", "qic(data, x='week', y='value', chart='i')")
pyqicharts/cli.py ADDED
@@ -0,0 +1,106 @@
1
+ """Command-line interface for simple automated chart/report creation."""
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import pandas as pd
9
+
10
+ from .core import qic
11
+ from .export import export_excel, export_png, export_powerpoint
12
+ from .reporting import create_report_pack_from_config, load_chart_config
13
+ from .tables import qic_table
14
+
15
+
16
+ def _add_chart_arguments(parser: argparse.ArgumentParser) -> None:
17
+ """Attach the common one-chart arguments used by the ``chart`` command."""
18
+
19
+ parser.add_argument("data", help="Input CSV file.")
20
+ parser.add_argument("--x", required=True, help="X-axis/date/week column.")
21
+ parser.add_argument("--y", required=True, help="Measure column.")
22
+ parser.add_argument("--chart", default="i", help="Chart type, for example i, p, u, c, run.")
23
+ parser.add_argument("--denominator", help="Denominator column for P/U charts.")
24
+ parser.add_argument("--expected", help="Expected column for P-prime/U-prime charts.")
25
+ parser.add_argument("--output", required=True, help="Output file path: .xlsx, .pptx or .png.")
26
+ parser.add_argument("--title", help="Optional chart/report title.")
27
+
28
+
29
+ def _run_chart(args: argparse.Namespace) -> int:
30
+ """Create a single chart and write the requested output file."""
31
+
32
+ data = pd.read_csv(args.data)
33
+ chart = qic(data, args.x, args.y, chart=args.chart, denominator=args.denominator, expected=args.expected, title=args.title)
34
+ output = Path(args.output)
35
+ suffix = output.suffix.lower()
36
+ if suffix == ".xlsx":
37
+ export_excel(chart, output)
38
+ elif suffix == ".pptx":
39
+ export_powerpoint(chart, output, title=args.title)
40
+ elif suffix == ".png":
41
+ export_png(chart, output)
42
+ else:
43
+ raise ValueError("Output must end with .xlsx, .pptx or .png. Choose one of those file extensions.")
44
+ return 0
45
+
46
+
47
+ def _run_report(args: argparse.Namespace) -> int:
48
+ """Create a configured multi-chart report folder."""
49
+
50
+ metadata = create_report_pack_from_config(args.config, args.output)
51
+ if metadata.get("successful_charts", 0) != metadata.get("chart_count", 0):
52
+ raise ValueError(f"Report completed with errors. See {metadata['batch_summary']} for details.")
53
+ return 0
54
+
55
+
56
+ def _run_validate(args: argparse.Namespace) -> int:
57
+ """Validate that a CSV and chart configuration can produce a table."""
58
+
59
+ data = pd.read_csv(args.data)
60
+ config = load_chart_config(args.config)
61
+ specs = config.get("charts", [config])
62
+ spec = specs[0] if isinstance(specs, list) and specs else config
63
+ table = qic_table(
64
+ data,
65
+ x=spec.get("x"),
66
+ y=spec.get("y"),
67
+ chart=spec.get("chart", "i"),
68
+ denominator=spec.get("denominator"),
69
+ expected=spec.get("expected"),
70
+ )
71
+ if table.empty:
72
+ raise ValueError("Validation produced an empty calculation table. Check the CSV rows and selected columns.")
73
+ return 0
74
+
75
+
76
+ def main(argv: list[str] | None = None) -> int:
77
+ parser = argparse.ArgumentParser(prog="pyqicharts", description="Create QI/SPC charts from CSV data.")
78
+ subparsers = parser.add_subparsers(dest="command", required=True)
79
+
80
+ chart_parser = subparsers.add_parser("chart", help="Create one chart from one CSV file.")
81
+ _add_chart_arguments(chart_parser)
82
+
83
+ report_parser = subparsers.add_parser("report", help="Create a report folder from a chart config file.")
84
+ report_parser.add_argument("config", help="JSON or simple YAML chart configuration file.")
85
+ report_parser.add_argument("--output", required=True, help="Output report folder.")
86
+
87
+ validate_parser = subparsers.add_parser("validate", help="Check that a CSV and chart config can be calculated.")
88
+ validate_parser.add_argument("data", help="Input CSV file.")
89
+ validate_parser.add_argument("--config", required=True, help="JSON or simple YAML chart configuration file.")
90
+
91
+ args = parser.parse_args(argv)
92
+ try:
93
+ if args.command == "chart":
94
+ return _run_chart(args)
95
+ if args.command == "report":
96
+ return _run_report(args)
97
+ if args.command == "validate":
98
+ return _run_validate(args)
99
+ except Exception as exc: # noqa: BLE001 - CLI should present concise user-facing errors.
100
+ print(f"pyqicharts error: {exc}", file=sys.stderr)
101
+ return 2
102
+ return 0
103
+
104
+
105
+ if __name__ == "__main__": # pragma: no cover
106
+ raise SystemExit(main())
pyqicharts/core.py ADDED
@@ -0,0 +1,204 @@
1
+ """Matplotlib chart construction for pyqicharts.
2
+
3
+ `qic()` is intentionally a thin plotting wrapper around `qic_table()`. The
4
+ calculation table is always retained on the returned `QicResult`, so users can
5
+ move between scripts, notebooks, Excel exports and Power BI without rerunning
6
+ separate calculation logic.
7
+ """
8
+ from __future__ import annotations
9
+ from dataclasses import dataclass
10
+ from typing import Optional
11
+ import matplotlib.pyplot as plt
12
+ import numpy as np
13
+ import pandas as pd
14
+ from .rules import AnhoejResult, anhoej_rules
15
+ from .signals import table_signals
16
+ from .tables import qic_table
17
+ from .themes import get_theme
18
+
19
+ @dataclass
20
+ class QicResult:
21
+ """Container returned by `qic()` with data, calculations and figure objects."""
22
+ data: pd.DataFrame
23
+ chart: str
24
+ x: str
25
+ y: str
26
+ centre: float
27
+ centre_label: str
28
+ lcl: Optional[float]
29
+ ucl: Optional[float]
30
+ anhoej: Optional[AnhoejResult]
31
+ signals: pd.Series
32
+ table: pd.DataFrame
33
+ figure: object
34
+ axes: object
35
+ def save_png(self, path: str, dpi: int = 150):
36
+ """Save the chart figure as a PNG file."""
37
+ from .export import export_png
38
+ return export_png(self, path, dpi=dpi)
39
+ def summary(self) -> dict:
40
+ """Return a compact dictionary summary for reports and dashboards."""
41
+ out = {"chart": self.chart, "centre_label": self.centre_label, "centre": self.centre, "lcl": self.lcl, "ucl": self.ucl, "signals": int(self.signals.sum())}
42
+ if self.anhoej is not None: out["anhoej"] = self.anhoej
43
+ return out
44
+ def signal_table(self) -> pd.DataFrame:
45
+ """Return detected signals using the stable v1.1 signal schema."""
46
+ return table_signals(self.table, self.chart, self.x)
47
+ def show(self):
48
+ plt.show()
49
+
50
+ def _normalise_chart_name(chart: str) -> str:
51
+ """Normalise public chart aliases to the internal chart key."""
52
+ key = chart.lower().replace("-", "_").replace(" ", "_")
53
+ return {"individuals":"i", "movingrange":"mr", "moving_range":"mr", "count":"c", "proportion":"p", "rate":"u", "rare_event":"g", "time_between":"t", "p'":"p_prime", "pprime":"p_prime", "u'":"u_prime", "uprime":"u_prime", "x_bar":"xbar", "x-bar":"xbar"}.get(key, key)
54
+
55
+ def _scalar_or_none(series: pd.Series) -> float | None:
56
+ """Return the first non-missing scalar from a repeated table column."""
57
+ non_null = series.dropna()
58
+ return None if len(non_null) == 0 else float(non_null.iloc[0])
59
+
60
+
61
+ def _signal_annotation_label(row: pd.Series) -> str:
62
+ """Choose the most specific human-readable label for a plotted signal."""
63
+
64
+ for column in ["special_cause_label", "special_cause_rule", "signal_rule"]:
65
+ value = row.get(column, "")
66
+ if pd.notna(value) and str(value).strip():
67
+ label = str(value).strip()
68
+ return label if len(label) <= 48 else label[:45] + "..."
69
+ return "Signal"
70
+
71
+
72
+ def qic(
73
+ data: pd.DataFrame,
74
+ x: str,
75
+ y: str,
76
+ chart: str = "run",
77
+ denominator: str | None = None,
78
+ expected: str | None = None,
79
+ title: str | None = None,
80
+ figsize: tuple[int,int] = (10,5),
81
+ theme: str = "default",
82
+ improvement: str | None = None,
83
+ shift_points: int = 6,
84
+ trend_points: int = 6,
85
+ baseline_points: int | None = None,
86
+ recalculation_points: list | None = None,
87
+ target: float | int | None = None,
88
+ interventions: list[dict] | None = None,
89
+ step_changes: list[dict] | None = None,
90
+ freeze_points: list | None = None,
91
+ break_points: list | None = None,
92
+ exclude_points: list | None = None,
93
+ phases: list[dict] | None = None,
94
+ rules: str | None = None,
95
+ method: str = "anhoej",
96
+ baseline_start=None,
97
+ baseline_end=None,
98
+ freeze: list | None = None,
99
+ breaks: list | None = None,
100
+ exclude: list | None = None,
101
+ recalculate_after: list | None = None,
102
+ targets=None,
103
+ annotate_signals: bool = True,
104
+ exclude_mr_outliers: bool = False,
105
+ ) -> QicResult:
106
+ """Create a QI/SPC chart.
107
+
108
+ Version 1.2.0 supports run, I, MR, C, P, U, Xbar, S, G, T, P-prime and U-prime charts. P and U charts
109
+ require a denominator column. Individuals charts include healthcare QI-style
110
+ special cause colouring and interpretation, plus baseline, recalculation,
111
+ target, intervention and step-change metadata.
112
+ """
113
+ chart_key = _normalise_chart_name(chart); style = get_theme(theme)
114
+ table = qic_table(
115
+ data=data,
116
+ x=x,
117
+ y=y,
118
+ chart=chart_key,
119
+ denominator=denominator,
120
+ expected=expected,
121
+ improvement=improvement,
122
+ shift_points=shift_points,
123
+ trend_points=trend_points,
124
+ baseline_points=baseline_points,
125
+ recalculation_points=recalculation_points,
126
+ target=target,
127
+ interventions=interventions,
128
+ step_changes=step_changes,
129
+ freeze_points=freeze_points,
130
+ break_points=break_points,
131
+ exclude_points=exclude_points,
132
+ phases=phases,
133
+ rules=rules,
134
+ method=method,
135
+ baseline_start=baseline_start,
136
+ baseline_end=baseline_end,
137
+ freeze=freeze,
138
+ breaks=breaks,
139
+ exclude=exclude,
140
+ recalculate_after=recalculate_after,
141
+ targets=targets,
142
+ exclude_mr_outliers=exclude_mr_outliers,
143
+ )
144
+ fig, ax = plt.subplots(figsize=figsize)
145
+ ylabel = y
146
+ if chart_key == "mr": ylabel = f"Moving range of {y}"
147
+ elif chart_key == "p": ylabel = f"Proportion of {y}"
148
+ elif chart_key == "u": ylabel = f"Rate of {y}"
149
+ elif chart_key == "g": ylabel = f"Cases between events: {y}"
150
+ elif chart_key == "t": ylabel = f"Time between events: {y}"
151
+ elif chart_key in {"p_prime", "u_prime"}: ylabel = f"Observed / expected: {y}"
152
+ elif chart_key == "xbar": ylabel = f"Subgroup mean of {y}"
153
+ elif chart_key == "s": ylabel = f"Subgroup standard deviation of {y}"
154
+ ax.plot(table[x], table["plot_value"], marker="o", linewidth=1.8, color=style.line, markerfacecolor=style.marker, markeredgecolor=style.marker)
155
+ signal_rows = table[table["signal"]]
156
+ if not signal_rows.empty:
157
+ if "special_cause_type" in signal_rows:
158
+ plotted_labels = set()
159
+ for signal_type, rows in signal_rows.groupby("special_cause_type", dropna=False):
160
+ label = str(signal_type).title() if signal_type else "Signal"
161
+ color = str(rows["special_cause_colour"].iloc[0]) or style.signal
162
+ ax.scatter(rows[x], rows["plot_value"], s=90, color=color, marker="o", zorder=5, label=None if label in plotted_labels else label)
163
+ plotted_labels.add(label)
164
+ else:
165
+ ax.scatter(signal_rows[x], signal_rows["plot_value"], s=90, color=style.signal, marker="o", zorder=5, label="Signal")
166
+ if annotate_signals:
167
+ # Annotation text is deliberately derived from the calculated table
168
+ # fields so saved PNGs, notebooks and downstream exports tell the
169
+ # same story as `qic_table(...)`.
170
+ for _, row in signal_rows.iterrows():
171
+ ax.annotate(
172
+ _signal_annotation_label(row),
173
+ xy=(row[x], row["plot_value"]),
174
+ xytext=(6, 10),
175
+ textcoords="offset points",
176
+ fontsize=8,
177
+ color="#231F20",
178
+ arrowprops={"arrowstyle": "->", "color": "#425563", "lw": 0.8},
179
+ )
180
+ centre = _scalar_or_none(table["centre"]); lcl = _scalar_or_none(table["lcl"]); ucl = _scalar_or_none(table["ucl"]); centre_label = str(table["centre_label"].iloc[0]) if len(table) else "Centre"
181
+ # Segment-aware horizontal lines let baseline/recalculation periods share
182
+ # one plot without pretending a single limit applies to every segment.
183
+ for segment_id, rows in table.groupby("segment_id" if "segment_id" in table else table.index, sort=True):
184
+ first_x = rows[x].iloc[0]; last_x = rows[x].iloc[-1]
185
+ seg_centre = _scalar_or_none(rows["centre"]); seg_lcl = _scalar_or_none(rows["lcl"]); seg_ucl = _scalar_or_none(rows["ucl"])
186
+ label_suffix = "" if segment_id == 1 else f" S{segment_id}"
187
+ if seg_centre is not None and not np.isnan(seg_centre): ax.hlines(seg_centre, first_x, last_x, linestyle="--", linewidth=1.4, color=style.centre, label=centre_label + label_suffix)
188
+ if seg_lcl is not None and not np.isnan(seg_lcl): ax.hlines(seg_lcl, first_x, last_x, linestyle=":", linewidth=1.2, color=style.limits, label="LCL" + label_suffix)
189
+ if seg_ucl is not None and not np.isnan(seg_ucl): ax.hlines(seg_ucl, first_x, last_x, linestyle=":", linewidth=1.2, color=style.limits, label="UCL" + label_suffix)
190
+ if target is not None:
191
+ ax.axhline(target, linestyle="-.", linewidth=1.2, color="#330072", label="Target")
192
+ if "intervention" in table:
193
+ for _, row in table[table["intervention"]].iterrows():
194
+ ax.axvline(row[x], linestyle="-", linewidth=1.0, color="#425563", alpha=0.65)
195
+ if row["intervention_label"]:
196
+ ax.text(row[x], row["plot_value"], str(row["intervention_label"]), rotation=90, va="bottom", ha="right", fontsize=8)
197
+ if "step_change" in table:
198
+ for _, row in table[table["step_change"]].iterrows():
199
+ ax.axvline(row[x], linestyle="--", linewidth=1.0, color="#007F3B", alpha=0.75)
200
+ if row["step_change_label"]:
201
+ ax.text(row[x], row["plot_value"], str(row["step_change_label"]), rotation=90, va="bottom", ha="left", fontsize=8)
202
+ ax.set_xlabel(x); ax.set_ylabel(ylabel); ax.set_title(title or f"{chart_key.upper()} chart of {y}"); ax.grid(True, alpha=style.grid_alpha); ax.legend(loc="best"); fig.tight_layout()
203
+ anhoej = anhoej_rules(table[y]) if chart_key == "run" else None
204
+ return QicResult(data.copy(), chart_key, x, y, centre if centre is not None else float("nan"), centre_label, lcl, ucl, anhoej, table["signal"], table, fig, ax)
pyqicharts/datasets.py ADDED
@@ -0,0 +1,96 @@
1
+ """Small example and sample datasets for pyqicharts.
2
+
3
+ The bundled datasets are synthetic. They are designed for examples, tests and
4
+ new-user exploration, not for clinical benchmarking.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from importlib import resources
9
+ from pathlib import Path
10
+
11
+ import pandas as pd
12
+
13
+
14
+ def get_sample_data_path(filename: str) -> Path:
15
+ """Return the installed path for a bundled sample-data CSV.
16
+
17
+ The sample CSVs live inside the importable ``pyqicharts`` package so they
18
+ are available after installation from a wheel or source distribution.
19
+ ``importlib.resources`` asks Python where the installed package resources
20
+ are, avoiding brittle assumptions about the user's ``site-packages`` path.
21
+ """
22
+
23
+ sample_root = resources.files("pyqicharts").joinpath("sample_data")
24
+ path = sample_root.joinpath(filename)
25
+ if not path.is_file():
26
+ raise FileNotFoundError(f"Sample dataset not found: {filename}")
27
+ return Path(path)
28
+
29
+
30
+ def infections_between_events() -> pd.DataFrame:
31
+ """Return synthetic cases-between-infection event data for a G chart."""
32
+
33
+ return pd.DataFrame(
34
+ {
35
+ "case_number": range(1, 13),
36
+ "cases_between_events": [22, 18, 25, 21, 19, 24, 85, 20, 23, 17, 2, 21],
37
+ }
38
+ )
39
+
40
+
41
+ def sample_healthcare_qi_data() -> pd.DataFrame:
42
+ """Return the bundled healthcare QI sample dataset as a DataFrame."""
43
+
44
+ return pd.read_csv(get_sample_data_path("sample_healthcare_qi_data.csv"))
45
+
46
+
47
+ def sample_subgroup_measurements() -> pd.DataFrame:
48
+ """Return the bundled subgroup-measurement sample dataset as a DataFrame."""
49
+
50
+ return pd.read_csv(get_sample_data_path("sample_subgroup_measurements.csv"))
51
+
52
+
53
+ def days_between_serious_incidents() -> pd.DataFrame:
54
+ """Return synthetic days-between-serious-incidents data for a T chart."""
55
+
56
+ return pd.DataFrame(
57
+ {
58
+ "event_number": range(1, 13),
59
+ "days_between_events": [31, 27, 35, 29, 33, 30, 120, 28, 32, 26, 1, 34],
60
+ }
61
+ )
62
+
63
+
64
+ def days_between_falls_with_harm() -> pd.DataFrame:
65
+ """Return synthetic days-between-falls-with-harm data for a T chart."""
66
+
67
+ return pd.DataFrame(
68
+ {
69
+ "event_number": range(1, 13),
70
+ "days_between_events": [14, 17, 12, 16, 15, 13, 19, 18, 55, 11, 15, 2],
71
+ }
72
+ )
73
+
74
+
75
+ def risk_adjusted_readmissions() -> pd.DataFrame:
76
+ """Return synthetic observed/expected readmissions data for a P-prime chart."""
77
+
78
+ return pd.DataFrame(
79
+ {
80
+ "month": range(1, 13),
81
+ "observed": [18, 21, 19, 22, 20, 24, 23, 42, 21, 20, 18, 19],
82
+ "expected": [20.0, 20.5, 19.8, 21.0, 20.2, 21.5, 22.0, 22.1, 21.0, 20.6, 19.7, 20.1],
83
+ }
84
+ )
85
+
86
+
87
+ def risk_adjusted_infection_rates() -> pd.DataFrame:
88
+ """Return synthetic observed/expected infection data for a U-prime chart."""
89
+
90
+ return pd.DataFrame(
91
+ {
92
+ "month": range(1, 13),
93
+ "observed": [4, 5, 3, 4, 6, 5, 4, 12, 5, 4, 3, 5],
94
+ "expected": [4.5, 4.8, 4.2, 4.4, 4.9, 5.1, 4.7, 5.0, 4.6, 4.5, 4.3, 4.8],
95
+ }
96
+ )