pyqicharts 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyqicharts/__init__.py +51 -0
- pyqicharts/advanced.py +150 -0
- pyqicharts/chart_selector.py +55 -0
- pyqicharts/cli.py +106 -0
- pyqicharts/core.py +204 -0
- pyqicharts/datasets.py +96 -0
- pyqicharts/education.py +87 -0
- pyqicharts/export.py +177 -0
- pyqicharts/interactive.py +58 -0
- pyqicharts/localization.py +58 -0
- pyqicharts/nelson.py +157 -0
- pyqicharts/pareto.py +38 -0
- pyqicharts/powerbi.py +370 -0
- pyqicharts/presets.py +51 -0
- pyqicharts/reporting.py +231 -0
- pyqicharts/rules.py +193 -0
- pyqicharts/sample_data/sample_healthcare_qi_data.csv +13 -0
- pyqicharts/sample_data/sample_subgroup_measurements.csv +25 -0
- pyqicharts/signals.py +97 -0
- pyqicharts/tables.py +686 -0
- pyqicharts/themes.py +39 -0
- pyqicharts/validation.py +149 -0
- pyqicharts/xmr_rules.py +276 -0
- pyqicharts-2.0.0.dist-info/METADATA +131 -0
- pyqicharts-2.0.0.dist-info/RECORD +37 -0
- pyqicharts-2.0.0.dist-info/WHEEL +5 -0
- pyqicharts-2.0.0.dist-info/entry_points.txt +3 -0
- pyqicharts-2.0.0.dist-info/licenses/LICENSE +21 -0
- pyqicharts-2.0.0.dist-info/top_level.txt +2 -0
- pyqicharts_excel/__init__.py +35 -0
- pyqicharts_excel/config.py +235 -0
- pyqicharts_excel/examples.py +20 -0
- pyqicharts_excel/excel_io.py +265 -0
- pyqicharts_excel/outputs.py +166 -0
- pyqicharts_excel/ribbon.py +36 -0
- pyqicharts_excel/runner.py +218 -0
- pyqicharts_excel/validation.py +81 -0
pyqicharts/__init__.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""pyqicharts: Quality Improvement and SPC charts for Python.
|
|
2
|
+
|
|
3
|
+
The package root re-exports the supported public API. Internal helper functions
|
|
4
|
+
remain inside their modules so future releases can evolve implementation
|
|
5
|
+
details without forcing users to change imports.
|
|
6
|
+
"""
|
|
7
|
+
from .core import QicResult, qic
|
|
8
|
+
from .advanced import autocorrelation_diagnostics, funnel_table, interactive_signal_review, overdispersion_test, rare_event_limits, risk_adjusted_summary, seasonality_diagnostics
|
|
9
|
+
from .chart_selector import ChartSuggestion, suggest_chart
|
|
10
|
+
from .datasets import days_between_falls_with_harm, days_between_serious_incidents, get_sample_data_path, infections_between_events, risk_adjusted_infection_rates, risk_adjusted_readmissions, sample_healthcare_qi_data, sample_subgroup_measurements
|
|
11
|
+
from .education import chart_selection_tutorial, interpretation_mistakes, simulate_process, teaching_dataset
|
|
12
|
+
from .export import create_report_bundle, export_excel, export_png, export_powerpoint
|
|
13
|
+
from .interactive import qic_altair, qic_plotly, qic_widget
|
|
14
|
+
from .localization import TERMINOLOGY, get_terminology, localize_interpretation
|
|
15
|
+
from .nelson import nelson_rule_signals, shewhart_rule_signals
|
|
16
|
+
from .xmr_rules import XmrRuleConfig, xmr_signals
|
|
17
|
+
from .pareto import ParetoResult, pareto_chart, paretochart
|
|
18
|
+
from .powerbi import (
|
|
19
|
+
intervention_metadata_table,
|
|
20
|
+
kpi_table,
|
|
21
|
+
improvement_output_table,
|
|
22
|
+
improvement_warning_table,
|
|
23
|
+
interpretation_table,
|
|
24
|
+
phase_metadata_table,
|
|
25
|
+
powerbi_table,
|
|
26
|
+
signal_table,
|
|
27
|
+
special_cause_summary_table,
|
|
28
|
+
spc_summary_table,
|
|
29
|
+
target_metadata_table,
|
|
30
|
+
)
|
|
31
|
+
from .presets import PRESETS, get_preset, list_presets
|
|
32
|
+
from .reporting import create_qi_report_pack, create_report_pack_from_config, load_chart_config, run_chart_batch
|
|
33
|
+
from .rules import AnhoejResult, anhoej_rules
|
|
34
|
+
from .signals import SIGNAL_SCHEMA_VERSION, Signal, signals_to_frame
|
|
35
|
+
from .tables import CHART_ALIASES, VALID_CHARTS, qic_table, pareto_table
|
|
36
|
+
from .themes import Theme, get_theme, list_themes
|
|
37
|
+
from .validation import compare_to_expected, read_validation_csv, validation_manifest, validation_summary
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
"AnhoejResult", "CHART_ALIASES", "ChartSuggestion", "XmrRuleConfig", "PRESETS", "ParetoResult", "QicResult", "TERMINOLOGY", "Theme", "VALID_CHARTS",
|
|
41
|
+
"anhoej_rules", "autocorrelation_diagnostics", "chart_selection_tutorial", "compare_to_expected", "create_report_bundle", "days_between_falls_with_harm",
|
|
42
|
+
"create_qi_report_pack", "create_report_pack_from_config",
|
|
43
|
+
"days_between_serious_incidents", "export_excel", "export_png",
|
|
44
|
+
"export_powerpoint", "funnel_table", "get_preset", "get_sample_data_path", "get_terminology", "get_theme", "interactive_signal_review", "interpretation_mistakes", "intervention_metadata_table", "kpi_table", "list_presets", "list_themes", "load_chart_config", "localize_interpretation", "nelson_rule_signals", "interpretation_table", "improvement_output_table", "improvement_warning_table", "overdispersion_test", "pareto_table",
|
|
45
|
+
"pareto_chart", "paretochart", "powerbi_table", "qic", "qic_table", "infections_between_events", "rare_event_limits", "risk_adjusted_summary", "run_chart_batch", "seasonality_diagnostics", "simulate_process", "teaching_dataset", "validation_manifest", "validation_summary", "xmr_signals",
|
|
46
|
+
"risk_adjusted_infection_rates", "risk_adjusted_readmissions",
|
|
47
|
+
"sample_healthcare_qi_data", "sample_subgroup_measurements", "shewhart_rule_signals",
|
|
48
|
+
"phase_metadata_table", "read_validation_csv", "signal_table", "signals_to_frame", "special_cause_summary_table", "spc_summary_table",
|
|
49
|
+
"qic_altair", "qic_plotly", "qic_widget", "suggest_chart", "target_metadata_table", "SIGNAL_SCHEMA_VERSION", "Signal",
|
|
50
|
+
]
|
|
51
|
+
__version__ = "2.0.0"
|
pyqicharts/advanced.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Advanced exploratory helpers for SPC review workflows.
|
|
2
|
+
|
|
3
|
+
These functions are deliberately table-first. They help analysts inspect
|
|
4
|
+
autocorrelation, seasonality, overdispersion, risk adjustment and funnel-style
|
|
5
|
+
variation without changing the core ``qic()`` chart API.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import math
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def autocorrelation_diagnostics(values, lag: int = 1) -> pd.DataFrame:
|
|
16
|
+
"""Return a simple lag autocorrelation diagnostic table."""
|
|
17
|
+
|
|
18
|
+
series = pd.Series(values, dtype="float64").dropna().reset_index(drop=True)
|
|
19
|
+
if lag < 1:
|
|
20
|
+
raise ValueError("lag must be at least 1.")
|
|
21
|
+
if len(series) <= lag:
|
|
22
|
+
raise ValueError("At least lag + 1 numeric observations are required for autocorrelation diagnostics.")
|
|
23
|
+
autocorrelation = float(series.autocorr(lag=lag))
|
|
24
|
+
return pd.DataFrame({"lag": [lag], "observations": [len(series)], "autocorrelation": [autocorrelation]})
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def seasonality_diagnostics(data: pd.DataFrame, x: str, y: str, period: int) -> pd.DataFrame:
|
|
28
|
+
"""Estimate average seasonal indices for a repeated period."""
|
|
29
|
+
|
|
30
|
+
if period < 2:
|
|
31
|
+
raise ValueError("period must be 2 or greater.")
|
|
32
|
+
if x not in data.columns or y not in data.columns:
|
|
33
|
+
raise ValueError("seasonality_diagnostics requires existing x and y columns.")
|
|
34
|
+
frame = data[[x, y]].copy()
|
|
35
|
+
frame[y] = pd.to_numeric(frame[y], errors="coerce")
|
|
36
|
+
frame = frame.dropna(subset=[y]).reset_index(drop=True)
|
|
37
|
+
if frame.empty:
|
|
38
|
+
raise ValueError("No numeric values are available for seasonality diagnostics.")
|
|
39
|
+
overall = frame[y].mean()
|
|
40
|
+
frame["season_position"] = frame.index % period
|
|
41
|
+
out = frame.groupby("season_position", as_index=False)[y].mean()
|
|
42
|
+
out["seasonal_index"] = out[y] / overall if overall else np.nan
|
|
43
|
+
return out.rename(columns={y: "seasonal_mean"})
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def overdispersion_test(data: pd.DataFrame, y: str, denominator: str | None = None) -> pd.DataFrame:
|
|
47
|
+
"""Return a lightweight dispersion diagnostic for count or rate data."""
|
|
48
|
+
|
|
49
|
+
if y not in data.columns:
|
|
50
|
+
raise ValueError(f"Column {y!r} was not found for overdispersion testing.")
|
|
51
|
+
values = pd.to_numeric(data[y], errors="coerce")
|
|
52
|
+
if denominator:
|
|
53
|
+
if denominator not in data.columns:
|
|
54
|
+
raise ValueError(f"Denominator column {denominator!r} was not found.")
|
|
55
|
+
denom = pd.to_numeric(data[denominator], errors="coerce")
|
|
56
|
+
values = values / denom.replace(0, np.nan)
|
|
57
|
+
values = values.dropna()
|
|
58
|
+
if len(values) < 2:
|
|
59
|
+
raise ValueError("At least two numeric observations are required for overdispersion testing.")
|
|
60
|
+
mean = float(values.mean())
|
|
61
|
+
variance = float(values.var(ddof=1))
|
|
62
|
+
ratio = variance / mean if mean else np.nan
|
|
63
|
+
return pd.DataFrame({"observations": [len(values)], "mean": [mean], "variance": [variance], "dispersion_ratio": [ratio]})
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def funnel_table(data: pd.DataFrame, numerator: str, denominator: str, group: str | None = None, z: float = 3.0) -> pd.DataFrame:
|
|
67
|
+
"""Create a reference-style funnel table for proportions."""
|
|
68
|
+
|
|
69
|
+
for column in [numerator, denominator] + ([group] if group else []):
|
|
70
|
+
if column not in data.columns:
|
|
71
|
+
raise ValueError(f"Column {column!r} was not found for funnel table generation.")
|
|
72
|
+
frame = data.copy()
|
|
73
|
+
frame[numerator] = pd.to_numeric(frame[numerator], errors="coerce")
|
|
74
|
+
frame[denominator] = pd.to_numeric(frame[denominator], errors="coerce")
|
|
75
|
+
frame = frame.dropna(subset=[numerator, denominator])
|
|
76
|
+
frame = frame[frame[denominator] > 0]
|
|
77
|
+
if frame.empty:
|
|
78
|
+
raise ValueError("No rows remain after requiring numeric numerator values and positive denominators.")
|
|
79
|
+
if group:
|
|
80
|
+
frame = frame.groupby(group, as_index=False)[[numerator, denominator]].sum()
|
|
81
|
+
centre = frame[numerator].sum() / frame[denominator].sum()
|
|
82
|
+
se = np.sqrt(centre * (1 - centre) / frame[denominator])
|
|
83
|
+
out = frame.copy()
|
|
84
|
+
out["rate"] = out[numerator] / out[denominator]
|
|
85
|
+
out["centre"] = centre
|
|
86
|
+
out["lcl"] = (centre - z * se).clip(lower=0)
|
|
87
|
+
out["ucl"] = (centre + z * se).clip(upper=1)
|
|
88
|
+
out["outside_limits"] = (out["rate"] < out["lcl"]) | (out["rate"] > out["ucl"])
|
|
89
|
+
return out.reset_index(drop=True)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def rare_event_limits(values, method: str = "geometric") -> pd.DataFrame:
|
|
93
|
+
"""Return rare-event centre and approximate limits for G/T style intervals."""
|
|
94
|
+
|
|
95
|
+
series = pd.Series(values, dtype="float64").dropna()
|
|
96
|
+
if (series < 0).any():
|
|
97
|
+
raise ValueError("Rare-event intervals must be non-negative.")
|
|
98
|
+
if series.empty:
|
|
99
|
+
raise ValueError("At least one interval is required for rare-event limits.")
|
|
100
|
+
centre = float(series.mean())
|
|
101
|
+
if method not in {"geometric", "exponential"}:
|
|
102
|
+
raise ValueError("method must be 'geometric' or 'exponential'.")
|
|
103
|
+
lcl = 0.0
|
|
104
|
+
ucl = centre * 3.0 if method == "geometric" else centre * math.log(1 / 0.00135)
|
|
105
|
+
return pd.DataFrame({"method": [method], "centre": [centre], "lcl": [lcl], "ucl": [ucl]})
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def risk_adjusted_summary(data: pd.DataFrame, observed: str, expected: str, denominator: str | None = None) -> pd.DataFrame:
|
|
109
|
+
"""Summarise observed, expected and observed/expected values."""
|
|
110
|
+
|
|
111
|
+
for column in [observed, expected] + ([denominator] if denominator else []):
|
|
112
|
+
if column not in data.columns:
|
|
113
|
+
raise ValueError(f"Column {column!r} was not found for risk-adjusted summary.")
|
|
114
|
+
frame = data.copy()
|
|
115
|
+
frame[observed] = pd.to_numeric(frame[observed], errors="coerce")
|
|
116
|
+
frame[expected] = pd.to_numeric(frame[expected], errors="coerce")
|
|
117
|
+
frame = frame.dropna(subset=[observed, expected])
|
|
118
|
+
if frame.empty:
|
|
119
|
+
raise ValueError("No rows have usable observed and expected values.")
|
|
120
|
+
safe_expected = frame[expected].replace(0, np.nan)
|
|
121
|
+
result = {
|
|
122
|
+
"observed_total": float(frame[observed].sum()),
|
|
123
|
+
"expected_total": float(frame[expected].sum()),
|
|
124
|
+
"observed_expected_ratio": float(frame[observed].sum() / safe_expected.sum()) if safe_expected.sum() else np.nan,
|
|
125
|
+
"zero_expected_rows": int((frame[expected] == 0).sum()),
|
|
126
|
+
}
|
|
127
|
+
if denominator:
|
|
128
|
+
frame[denominator] = pd.to_numeric(frame[denominator], errors="coerce")
|
|
129
|
+
result["denominator_total"] = float(frame[denominator].sum())
|
|
130
|
+
return pd.DataFrame([result])
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def interactive_signal_review(chart) -> pd.DataFrame:
|
|
134
|
+
"""Return a compact table for reviewing detected signals interactively."""
|
|
135
|
+
|
|
136
|
+
table = chart.table.copy()
|
|
137
|
+
if "signal" not in table:
|
|
138
|
+
return pd.DataFrame(columns=[chart.x, "plot_value", "signal_rule", "suggested_review"])
|
|
139
|
+
signals = table[table["signal"]].copy()
|
|
140
|
+
if signals.empty:
|
|
141
|
+
return pd.DataFrame(columns=[chart.x, "plot_value", "signal_rule", "suggested_review"])
|
|
142
|
+
signals["suggested_review"] = signals.get("special_cause_type", "signal").astype(str).map(
|
|
143
|
+
{
|
|
144
|
+
"improvement": "Check whether the change is real, understood and sustainable.",
|
|
145
|
+
"concern": "Review context, data quality and potential causes promptly.",
|
|
146
|
+
"neutral": "Treat as a signal for learning before judging direction.",
|
|
147
|
+
}
|
|
148
|
+
).fillna("Review the point and surrounding process context.")
|
|
149
|
+
columns = [chart.x, "plot_value", "signal_rule", "suggested_review"]
|
|
150
|
+
return signals[[column for column in columns if column in signals.columns]].reset_index(drop=True)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Guided chart selection helpers.
|
|
2
|
+
|
|
3
|
+
The selector is intentionally rule-of-thumb based. It is meant to help users
|
|
4
|
+
choose a sensible starting chart, not to replace analyst judgement.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class ChartSuggestion:
|
|
13
|
+
"""A chart recommendation with a plain-English explanation."""
|
|
14
|
+
|
|
15
|
+
chart: str
|
|
16
|
+
reason: str
|
|
17
|
+
example: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def suggest_chart(
|
|
21
|
+
kind: str | None = None,
|
|
22
|
+
*,
|
|
23
|
+
numerator: str | None = None,
|
|
24
|
+
denominator: str | None = None,
|
|
25
|
+
subgroup: bool = False,
|
|
26
|
+
rare_event: bool = False,
|
|
27
|
+
time_between: bool = False,
|
|
28
|
+
expected: str | None = None,
|
|
29
|
+
) -> ChartSuggestion:
|
|
30
|
+
"""Suggest a chart type from common QI/SPC data shapes.
|
|
31
|
+
|
|
32
|
+
Parameters are deliberately lightweight so the helper works in scripts,
|
|
33
|
+
notebooks, Excel Companion workflows and documentation examples.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
key = (kind or "").strip().lower().replace("-", "_").replace(" ", "_")
|
|
37
|
+
if expected and denominator:
|
|
38
|
+
return ChartSuggestion("p_prime", "Observed and expected values with a denominator usually indicate a risk-adjusted proportion.", "qic(data, x='week', y='observed', expected='expected', denominator='n', chart='p_prime')")
|
|
39
|
+
if expected:
|
|
40
|
+
return ChartSuggestion("u_prime", "Observed and expected values without a clear proportion denominator can be reviewed as an observed/expected rate.", "qic(data, x='week', y='observed', expected='expected', chart='u_prime')")
|
|
41
|
+
if rare_event:
|
|
42
|
+
return ChartSuggestion("g", "Counts between rare events are suited to a G chart.", "qic(data, x='event_number', y='cases_between_events', chart='g')")
|
|
43
|
+
if time_between:
|
|
44
|
+
return ChartSuggestion("t", "Time between events is suited to a T chart.", "qic(data, x='event_number', y='days_between_events', chart='t')")
|
|
45
|
+
if subgroup or key in {"subgroup", "xbar", "x_bar"}:
|
|
46
|
+
return ChartSuggestion("xbar", "Repeated measurements in subgroups are usually reviewed with Xbar/S charts.", "qic(data, x='subgroup', y='value', chart='xbar')")
|
|
47
|
+
if key in {"proportion", "percentage", "percent", "binary"} or (numerator and denominator and key not in {"rate", "count_per_unit"}):
|
|
48
|
+
return ChartSuggestion("p", "Events out of opportunities are usually a P chart when the result is a proportion.", "qic(data, x='week', y='events', denominator='opportunities', chart='p')")
|
|
49
|
+
if key in {"rate", "count_per_unit", "events_per_unit"}:
|
|
50
|
+
return ChartSuggestion("u", "Events per unit of opportunity are usually a U chart.", "qic(data, x='week', y='events', denominator='bed_days', chart='u')")
|
|
51
|
+
if key in {"count", "counts", "defects"}:
|
|
52
|
+
return ChartSuggestion("c", "Counts with a broadly constant area of opportunity are usually a C chart.", "qic(data, x='week', y='count', chart='c')")
|
|
53
|
+
if key in {"run", "median"}:
|
|
54
|
+
return ChartSuggestion("run", "Use a run chart for a simple time series when you do not need control limits.", "qic(data, x='week', y='value', chart='run')")
|
|
55
|
+
return ChartSuggestion("i", "A continuous measure over time is usually a good starting point for an Individuals chart.", "qic(data, x='week', y='value', chart='i')")
|
pyqicharts/cli.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Command-line interface for simple automated chart/report creation."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from .core import qic
|
|
11
|
+
from .export import export_excel, export_png, export_powerpoint
|
|
12
|
+
from .reporting import create_report_pack_from_config, load_chart_config
|
|
13
|
+
from .tables import qic_table
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _add_chart_arguments(parser: argparse.ArgumentParser) -> None:
|
|
17
|
+
"""Attach the common one-chart arguments used by the ``chart`` command."""
|
|
18
|
+
|
|
19
|
+
parser.add_argument("data", help="Input CSV file.")
|
|
20
|
+
parser.add_argument("--x", required=True, help="X-axis/date/week column.")
|
|
21
|
+
parser.add_argument("--y", required=True, help="Measure column.")
|
|
22
|
+
parser.add_argument("--chart", default="i", help="Chart type, for example i, p, u, c, run.")
|
|
23
|
+
parser.add_argument("--denominator", help="Denominator column for P/U charts.")
|
|
24
|
+
parser.add_argument("--expected", help="Expected column for P-prime/U-prime charts.")
|
|
25
|
+
parser.add_argument("--output", required=True, help="Output file path: .xlsx, .pptx or .png.")
|
|
26
|
+
parser.add_argument("--title", help="Optional chart/report title.")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _run_chart(args: argparse.Namespace) -> int:
|
|
30
|
+
"""Create a single chart and write the requested output file."""
|
|
31
|
+
|
|
32
|
+
data = pd.read_csv(args.data)
|
|
33
|
+
chart = qic(data, args.x, args.y, chart=args.chart, denominator=args.denominator, expected=args.expected, title=args.title)
|
|
34
|
+
output = Path(args.output)
|
|
35
|
+
suffix = output.suffix.lower()
|
|
36
|
+
if suffix == ".xlsx":
|
|
37
|
+
export_excel(chart, output)
|
|
38
|
+
elif suffix == ".pptx":
|
|
39
|
+
export_powerpoint(chart, output, title=args.title)
|
|
40
|
+
elif suffix == ".png":
|
|
41
|
+
export_png(chart, output)
|
|
42
|
+
else:
|
|
43
|
+
raise ValueError("Output must end with .xlsx, .pptx or .png. Choose one of those file extensions.")
|
|
44
|
+
return 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _run_report(args: argparse.Namespace) -> int:
|
|
48
|
+
"""Create a configured multi-chart report folder."""
|
|
49
|
+
|
|
50
|
+
metadata = create_report_pack_from_config(args.config, args.output)
|
|
51
|
+
if metadata.get("successful_charts", 0) != metadata.get("chart_count", 0):
|
|
52
|
+
raise ValueError(f"Report completed with errors. See {metadata['batch_summary']} for details.")
|
|
53
|
+
return 0
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _run_validate(args: argparse.Namespace) -> int:
|
|
57
|
+
"""Validate that a CSV and chart configuration can produce a table."""
|
|
58
|
+
|
|
59
|
+
data = pd.read_csv(args.data)
|
|
60
|
+
config = load_chart_config(args.config)
|
|
61
|
+
specs = config.get("charts", [config])
|
|
62
|
+
spec = specs[0] if isinstance(specs, list) and specs else config
|
|
63
|
+
table = qic_table(
|
|
64
|
+
data,
|
|
65
|
+
x=spec.get("x"),
|
|
66
|
+
y=spec.get("y"),
|
|
67
|
+
chart=spec.get("chart", "i"),
|
|
68
|
+
denominator=spec.get("denominator"),
|
|
69
|
+
expected=spec.get("expected"),
|
|
70
|
+
)
|
|
71
|
+
if table.empty:
|
|
72
|
+
raise ValueError("Validation produced an empty calculation table. Check the CSV rows and selected columns.")
|
|
73
|
+
return 0
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def main(argv: list[str] | None = None) -> int:
|
|
77
|
+
parser = argparse.ArgumentParser(prog="pyqicharts", description="Create QI/SPC charts from CSV data.")
|
|
78
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
79
|
+
|
|
80
|
+
chart_parser = subparsers.add_parser("chart", help="Create one chart from one CSV file.")
|
|
81
|
+
_add_chart_arguments(chart_parser)
|
|
82
|
+
|
|
83
|
+
report_parser = subparsers.add_parser("report", help="Create a report folder from a chart config file.")
|
|
84
|
+
report_parser.add_argument("config", help="JSON or simple YAML chart configuration file.")
|
|
85
|
+
report_parser.add_argument("--output", required=True, help="Output report folder.")
|
|
86
|
+
|
|
87
|
+
validate_parser = subparsers.add_parser("validate", help="Check that a CSV and chart config can be calculated.")
|
|
88
|
+
validate_parser.add_argument("data", help="Input CSV file.")
|
|
89
|
+
validate_parser.add_argument("--config", required=True, help="JSON or simple YAML chart configuration file.")
|
|
90
|
+
|
|
91
|
+
args = parser.parse_args(argv)
|
|
92
|
+
try:
|
|
93
|
+
if args.command == "chart":
|
|
94
|
+
return _run_chart(args)
|
|
95
|
+
if args.command == "report":
|
|
96
|
+
return _run_report(args)
|
|
97
|
+
if args.command == "validate":
|
|
98
|
+
return _run_validate(args)
|
|
99
|
+
except Exception as exc: # noqa: BLE001 - CLI should present concise user-facing errors.
|
|
100
|
+
print(f"pyqicharts error: {exc}", file=sys.stderr)
|
|
101
|
+
return 2
|
|
102
|
+
return 0
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
if __name__ == "__main__": # pragma: no cover
|
|
106
|
+
raise SystemExit(main())
|
pyqicharts/core.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""Matplotlib chart construction for pyqicharts.
|
|
2
|
+
|
|
3
|
+
`qic()` is intentionally a thin plotting wrapper around `qic_table()`. The
|
|
4
|
+
calculation table is always retained on the returned `QicResult`, so users can
|
|
5
|
+
move between scripts, notebooks, Excel exports and Power BI without rerunning
|
|
6
|
+
separate calculation logic.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Optional
|
|
11
|
+
import matplotlib.pyplot as plt
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pandas as pd
|
|
14
|
+
from .rules import AnhoejResult, anhoej_rules
|
|
15
|
+
from .signals import table_signals
|
|
16
|
+
from .tables import qic_table
|
|
17
|
+
from .themes import get_theme
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class QicResult:
|
|
21
|
+
"""Container returned by `qic()` with data, calculations and figure objects."""
|
|
22
|
+
data: pd.DataFrame
|
|
23
|
+
chart: str
|
|
24
|
+
x: str
|
|
25
|
+
y: str
|
|
26
|
+
centre: float
|
|
27
|
+
centre_label: str
|
|
28
|
+
lcl: Optional[float]
|
|
29
|
+
ucl: Optional[float]
|
|
30
|
+
anhoej: Optional[AnhoejResult]
|
|
31
|
+
signals: pd.Series
|
|
32
|
+
table: pd.DataFrame
|
|
33
|
+
figure: object
|
|
34
|
+
axes: object
|
|
35
|
+
def save_png(self, path: str, dpi: int = 150):
|
|
36
|
+
"""Save the chart figure as a PNG file."""
|
|
37
|
+
from .export import export_png
|
|
38
|
+
return export_png(self, path, dpi=dpi)
|
|
39
|
+
def summary(self) -> dict:
|
|
40
|
+
"""Return a compact dictionary summary for reports and dashboards."""
|
|
41
|
+
out = {"chart": self.chart, "centre_label": self.centre_label, "centre": self.centre, "lcl": self.lcl, "ucl": self.ucl, "signals": int(self.signals.sum())}
|
|
42
|
+
if self.anhoej is not None: out["anhoej"] = self.anhoej
|
|
43
|
+
return out
|
|
44
|
+
def signal_table(self) -> pd.DataFrame:
|
|
45
|
+
"""Return detected signals using the stable v1.1 signal schema."""
|
|
46
|
+
return table_signals(self.table, self.chart, self.x)
|
|
47
|
+
def show(self):
|
|
48
|
+
plt.show()
|
|
49
|
+
|
|
50
|
+
def _normalise_chart_name(chart: str) -> str:
|
|
51
|
+
"""Normalise public chart aliases to the internal chart key."""
|
|
52
|
+
key = chart.lower().replace("-", "_").replace(" ", "_")
|
|
53
|
+
return {"individuals":"i", "movingrange":"mr", "moving_range":"mr", "count":"c", "proportion":"p", "rate":"u", "rare_event":"g", "time_between":"t", "p'":"p_prime", "pprime":"p_prime", "u'":"u_prime", "uprime":"u_prime", "x_bar":"xbar", "x-bar":"xbar"}.get(key, key)
|
|
54
|
+
|
|
55
|
+
def _scalar_or_none(series: pd.Series) -> float | None:
|
|
56
|
+
"""Return the first non-missing scalar from a repeated table column."""
|
|
57
|
+
non_null = series.dropna()
|
|
58
|
+
return None if len(non_null) == 0 else float(non_null.iloc[0])
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _signal_annotation_label(row: pd.Series) -> str:
|
|
62
|
+
"""Choose the most specific human-readable label for a plotted signal."""
|
|
63
|
+
|
|
64
|
+
for column in ["special_cause_label", "special_cause_rule", "signal_rule"]:
|
|
65
|
+
value = row.get(column, "")
|
|
66
|
+
if pd.notna(value) and str(value).strip():
|
|
67
|
+
label = str(value).strip()
|
|
68
|
+
return label if len(label) <= 48 else label[:45] + "..."
|
|
69
|
+
return "Signal"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def qic(
|
|
73
|
+
data: pd.DataFrame,
|
|
74
|
+
x: str,
|
|
75
|
+
y: str,
|
|
76
|
+
chart: str = "run",
|
|
77
|
+
denominator: str | None = None,
|
|
78
|
+
expected: str | None = None,
|
|
79
|
+
title: str | None = None,
|
|
80
|
+
figsize: tuple[int,int] = (10,5),
|
|
81
|
+
theme: str = "default",
|
|
82
|
+
improvement: str | None = None,
|
|
83
|
+
shift_points: int = 6,
|
|
84
|
+
trend_points: int = 6,
|
|
85
|
+
baseline_points: int | None = None,
|
|
86
|
+
recalculation_points: list | None = None,
|
|
87
|
+
target: float | int | None = None,
|
|
88
|
+
interventions: list[dict] | None = None,
|
|
89
|
+
step_changes: list[dict] | None = None,
|
|
90
|
+
freeze_points: list | None = None,
|
|
91
|
+
break_points: list | None = None,
|
|
92
|
+
exclude_points: list | None = None,
|
|
93
|
+
phases: list[dict] | None = None,
|
|
94
|
+
rules: str | None = None,
|
|
95
|
+
method: str = "anhoej",
|
|
96
|
+
baseline_start=None,
|
|
97
|
+
baseline_end=None,
|
|
98
|
+
freeze: list | None = None,
|
|
99
|
+
breaks: list | None = None,
|
|
100
|
+
exclude: list | None = None,
|
|
101
|
+
recalculate_after: list | None = None,
|
|
102
|
+
targets=None,
|
|
103
|
+
annotate_signals: bool = True,
|
|
104
|
+
exclude_mr_outliers: bool = False,
|
|
105
|
+
) -> QicResult:
|
|
106
|
+
"""Create a QI/SPC chart.
|
|
107
|
+
|
|
108
|
+
Version 1.2.0 supports run, I, MR, C, P, U, Xbar, S, G, T, P-prime and U-prime charts. P and U charts
|
|
109
|
+
require a denominator column. Individuals charts include healthcare QI-style
|
|
110
|
+
special cause colouring and interpretation, plus baseline, recalculation,
|
|
111
|
+
target, intervention and step-change metadata.
|
|
112
|
+
"""
|
|
113
|
+
chart_key = _normalise_chart_name(chart); style = get_theme(theme)
|
|
114
|
+
table = qic_table(
|
|
115
|
+
data=data,
|
|
116
|
+
x=x,
|
|
117
|
+
y=y,
|
|
118
|
+
chart=chart_key,
|
|
119
|
+
denominator=denominator,
|
|
120
|
+
expected=expected,
|
|
121
|
+
improvement=improvement,
|
|
122
|
+
shift_points=shift_points,
|
|
123
|
+
trend_points=trend_points,
|
|
124
|
+
baseline_points=baseline_points,
|
|
125
|
+
recalculation_points=recalculation_points,
|
|
126
|
+
target=target,
|
|
127
|
+
interventions=interventions,
|
|
128
|
+
step_changes=step_changes,
|
|
129
|
+
freeze_points=freeze_points,
|
|
130
|
+
break_points=break_points,
|
|
131
|
+
exclude_points=exclude_points,
|
|
132
|
+
phases=phases,
|
|
133
|
+
rules=rules,
|
|
134
|
+
method=method,
|
|
135
|
+
baseline_start=baseline_start,
|
|
136
|
+
baseline_end=baseline_end,
|
|
137
|
+
freeze=freeze,
|
|
138
|
+
breaks=breaks,
|
|
139
|
+
exclude=exclude,
|
|
140
|
+
recalculate_after=recalculate_after,
|
|
141
|
+
targets=targets,
|
|
142
|
+
exclude_mr_outliers=exclude_mr_outliers,
|
|
143
|
+
)
|
|
144
|
+
fig, ax = plt.subplots(figsize=figsize)
|
|
145
|
+
ylabel = y
|
|
146
|
+
if chart_key == "mr": ylabel = f"Moving range of {y}"
|
|
147
|
+
elif chart_key == "p": ylabel = f"Proportion of {y}"
|
|
148
|
+
elif chart_key == "u": ylabel = f"Rate of {y}"
|
|
149
|
+
elif chart_key == "g": ylabel = f"Cases between events: {y}"
|
|
150
|
+
elif chart_key == "t": ylabel = f"Time between events: {y}"
|
|
151
|
+
elif chart_key in {"p_prime", "u_prime"}: ylabel = f"Observed / expected: {y}"
|
|
152
|
+
elif chart_key == "xbar": ylabel = f"Subgroup mean of {y}"
|
|
153
|
+
elif chart_key == "s": ylabel = f"Subgroup standard deviation of {y}"
|
|
154
|
+
ax.plot(table[x], table["plot_value"], marker="o", linewidth=1.8, color=style.line, markerfacecolor=style.marker, markeredgecolor=style.marker)
|
|
155
|
+
signal_rows = table[table["signal"]]
|
|
156
|
+
if not signal_rows.empty:
|
|
157
|
+
if "special_cause_type" in signal_rows:
|
|
158
|
+
plotted_labels = set()
|
|
159
|
+
for signal_type, rows in signal_rows.groupby("special_cause_type", dropna=False):
|
|
160
|
+
label = str(signal_type).title() if signal_type else "Signal"
|
|
161
|
+
color = str(rows["special_cause_colour"].iloc[0]) or style.signal
|
|
162
|
+
ax.scatter(rows[x], rows["plot_value"], s=90, color=color, marker="o", zorder=5, label=None if label in plotted_labels else label)
|
|
163
|
+
plotted_labels.add(label)
|
|
164
|
+
else:
|
|
165
|
+
ax.scatter(signal_rows[x], signal_rows["plot_value"], s=90, color=style.signal, marker="o", zorder=5, label="Signal")
|
|
166
|
+
if annotate_signals:
|
|
167
|
+
# Annotation text is deliberately derived from the calculated table
|
|
168
|
+
# fields so saved PNGs, notebooks and downstream exports tell the
|
|
169
|
+
# same story as `qic_table(...)`.
|
|
170
|
+
for _, row in signal_rows.iterrows():
|
|
171
|
+
ax.annotate(
|
|
172
|
+
_signal_annotation_label(row),
|
|
173
|
+
xy=(row[x], row["plot_value"]),
|
|
174
|
+
xytext=(6, 10),
|
|
175
|
+
textcoords="offset points",
|
|
176
|
+
fontsize=8,
|
|
177
|
+
color="#231F20",
|
|
178
|
+
arrowprops={"arrowstyle": "->", "color": "#425563", "lw": 0.8},
|
|
179
|
+
)
|
|
180
|
+
centre = _scalar_or_none(table["centre"]); lcl = _scalar_or_none(table["lcl"]); ucl = _scalar_or_none(table["ucl"]); centre_label = str(table["centre_label"].iloc[0]) if len(table) else "Centre"
|
|
181
|
+
# Segment-aware horizontal lines let baseline/recalculation periods share
|
|
182
|
+
# one plot without pretending a single limit applies to every segment.
|
|
183
|
+
for segment_id, rows in table.groupby("segment_id" if "segment_id" in table else table.index, sort=True):
|
|
184
|
+
first_x = rows[x].iloc[0]; last_x = rows[x].iloc[-1]
|
|
185
|
+
seg_centre = _scalar_or_none(rows["centre"]); seg_lcl = _scalar_or_none(rows["lcl"]); seg_ucl = _scalar_or_none(rows["ucl"])
|
|
186
|
+
label_suffix = "" if segment_id == 1 else f" S{segment_id}"
|
|
187
|
+
if seg_centre is not None and not np.isnan(seg_centre): ax.hlines(seg_centre, first_x, last_x, linestyle="--", linewidth=1.4, color=style.centre, label=centre_label + label_suffix)
|
|
188
|
+
if seg_lcl is not None and not np.isnan(seg_lcl): ax.hlines(seg_lcl, first_x, last_x, linestyle=":", linewidth=1.2, color=style.limits, label="LCL" + label_suffix)
|
|
189
|
+
if seg_ucl is not None and not np.isnan(seg_ucl): ax.hlines(seg_ucl, first_x, last_x, linestyle=":", linewidth=1.2, color=style.limits, label="UCL" + label_suffix)
|
|
190
|
+
if target is not None:
|
|
191
|
+
ax.axhline(target, linestyle="-.", linewidth=1.2, color="#330072", label="Target")
|
|
192
|
+
if "intervention" in table:
|
|
193
|
+
for _, row in table[table["intervention"]].iterrows():
|
|
194
|
+
ax.axvline(row[x], linestyle="-", linewidth=1.0, color="#425563", alpha=0.65)
|
|
195
|
+
if row["intervention_label"]:
|
|
196
|
+
ax.text(row[x], row["plot_value"], str(row["intervention_label"]), rotation=90, va="bottom", ha="right", fontsize=8)
|
|
197
|
+
if "step_change" in table:
|
|
198
|
+
for _, row in table[table["step_change"]].iterrows():
|
|
199
|
+
ax.axvline(row[x], linestyle="--", linewidth=1.0, color="#007F3B", alpha=0.75)
|
|
200
|
+
if row["step_change_label"]:
|
|
201
|
+
ax.text(row[x], row["plot_value"], str(row["step_change_label"]), rotation=90, va="bottom", ha="left", fontsize=8)
|
|
202
|
+
ax.set_xlabel(x); ax.set_ylabel(ylabel); ax.set_title(title or f"{chart_key.upper()} chart of {y}"); ax.grid(True, alpha=style.grid_alpha); ax.legend(loc="best"); fig.tight_layout()
|
|
203
|
+
anhoej = anhoej_rules(table[y]) if chart_key == "run" else None
|
|
204
|
+
return QicResult(data.copy(), chart_key, x, y, centre if centre is not None else float("nan"), centre_label, lcl, ucl, anhoej, table["signal"], table, fig, ax)
|
pyqicharts/datasets.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Small example and sample datasets for pyqicharts.
|
|
2
|
+
|
|
3
|
+
The bundled datasets are synthetic. They are designed for examples, tests and
|
|
4
|
+
new-user exploration, not for clinical benchmarking.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from importlib import resources
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_sample_data_path(filename: str) -> Path:
|
|
15
|
+
"""Return the installed path for a bundled sample-data CSV.
|
|
16
|
+
|
|
17
|
+
The sample CSVs live inside the importable ``pyqicharts`` package so they
|
|
18
|
+
are available after installation from a wheel or source distribution.
|
|
19
|
+
``importlib.resources`` asks Python where the installed package resources
|
|
20
|
+
are, avoiding brittle assumptions about the user's ``site-packages`` path.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
sample_root = resources.files("pyqicharts").joinpath("sample_data")
|
|
24
|
+
path = sample_root.joinpath(filename)
|
|
25
|
+
if not path.is_file():
|
|
26
|
+
raise FileNotFoundError(f"Sample dataset not found: {filename}")
|
|
27
|
+
return Path(path)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def infections_between_events() -> pd.DataFrame:
|
|
31
|
+
"""Return synthetic cases-between-infection event data for a G chart."""
|
|
32
|
+
|
|
33
|
+
return pd.DataFrame(
|
|
34
|
+
{
|
|
35
|
+
"case_number": range(1, 13),
|
|
36
|
+
"cases_between_events": [22, 18, 25, 21, 19, 24, 85, 20, 23, 17, 2, 21],
|
|
37
|
+
}
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def sample_healthcare_qi_data() -> pd.DataFrame:
|
|
42
|
+
"""Return the bundled healthcare QI sample dataset as a DataFrame."""
|
|
43
|
+
|
|
44
|
+
return pd.read_csv(get_sample_data_path("sample_healthcare_qi_data.csv"))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def sample_subgroup_measurements() -> pd.DataFrame:
|
|
48
|
+
"""Return the bundled subgroup-measurement sample dataset as a DataFrame."""
|
|
49
|
+
|
|
50
|
+
return pd.read_csv(get_sample_data_path("sample_subgroup_measurements.csv"))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def days_between_serious_incidents() -> pd.DataFrame:
|
|
54
|
+
"""Return synthetic days-between-serious-incidents data for a T chart."""
|
|
55
|
+
|
|
56
|
+
return pd.DataFrame(
|
|
57
|
+
{
|
|
58
|
+
"event_number": range(1, 13),
|
|
59
|
+
"days_between_events": [31, 27, 35, 29, 33, 30, 120, 28, 32, 26, 1, 34],
|
|
60
|
+
}
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def days_between_falls_with_harm() -> pd.DataFrame:
|
|
65
|
+
"""Return synthetic days-between-falls-with-harm data for a T chart."""
|
|
66
|
+
|
|
67
|
+
return pd.DataFrame(
|
|
68
|
+
{
|
|
69
|
+
"event_number": range(1, 13),
|
|
70
|
+
"days_between_events": [14, 17, 12, 16, 15, 13, 19, 18, 55, 11, 15, 2],
|
|
71
|
+
}
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def risk_adjusted_readmissions() -> pd.DataFrame:
|
|
76
|
+
"""Return synthetic observed/expected readmissions data for a P-prime chart."""
|
|
77
|
+
|
|
78
|
+
return pd.DataFrame(
|
|
79
|
+
{
|
|
80
|
+
"month": range(1, 13),
|
|
81
|
+
"observed": [18, 21, 19, 22, 20, 24, 23, 42, 21, 20, 18, 19],
|
|
82
|
+
"expected": [20.0, 20.5, 19.8, 21.0, 20.2, 21.5, 22.0, 22.1, 21.0, 20.6, 19.7, 20.1],
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def risk_adjusted_infection_rates() -> pd.DataFrame:
|
|
88
|
+
"""Return synthetic observed/expected infection data for a U-prime chart."""
|
|
89
|
+
|
|
90
|
+
return pd.DataFrame(
|
|
91
|
+
{
|
|
92
|
+
"month": range(1, 13),
|
|
93
|
+
"observed": [4, 5, 3, 4, 6, 5, 4, 12, 5, 4, 3, 5],
|
|
94
|
+
"expected": [4.5, 4.8, 4.2, 4.4, 4.9, 5.1, 4.7, 5.0, 4.6, 4.5, 4.3, 4.8],
|
|
95
|
+
}
|
|
96
|
+
)
|