sdsa 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sdsa/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "1.1.0"
File without changes
@@ -0,0 +1,92 @@
1
+ """Column policy model + apply function.
2
+
3
+ A ColumnPolicy describes how one column should be transformed. The pipeline
4
+ iterates over policies and invokes the matching primitive or DP mechanism.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from typing import Any, Literal
9
+
10
+ import polars as pl
11
+ from pydantic import BaseModel, Field, field_validator
12
+
13
+ from . import primitives as prim
14
+
15
+ Action = Literal[
16
+ "retain", "mask", "hash", "tokenize", "redact",
17
+ "numeric_bin", "date_truncate", "string_truncate",
18
+ "dp_laplace",
19
+ "drop",
20
+ ]
21
+
22
+
23
+ class PolicyApplicationError(ValueError):
24
+ pass
25
+
26
+
27
+ class ColumnPolicy(BaseModel):
28
+ column: str = Field(min_length=1)
29
+ action: Action
30
+ params: dict[str, Any] = Field(default_factory=dict)
31
+ is_quasi_identifier: bool = False
32
+
33
+ # For dp_laplace: caller must supply `epsilon` and column bounds `lower`/`upper`.
34
+
35
+ @field_validator("column")
36
+ @classmethod
37
+ def validate_column(cls, value: str) -> str:
38
+ if "\n" in value or "\r" in value or "\x00" in value:
39
+ raise ValueError("column names must not contain newlines or null bytes")
40
+ if len(value) > 200:
41
+ raise ValueError("column names must not exceed 200 characters")
42
+ return value
43
+
44
+
45
+ def apply_policy(df: pl.DataFrame, policy: ColumnPolicy, hmac_key: bytes) -> pl.DataFrame:
46
+ """Apply a single non-DP policy to a DataFrame. DP is applied separately."""
47
+ col = policy.column
48
+ if col not in df.columns:
49
+ return df
50
+ s = df[col]
51
+ action = policy.action
52
+ p = policy.params
53
+
54
+ try:
55
+ if action == "retain":
56
+ return df
57
+ if action == "drop":
58
+ return df.drop(col)
59
+ if action == "mask":
60
+ out = prim.mask(s, keep_prefix=p.get("keep_prefix", 0),
61
+ keep_suffix=p.get("keep_suffix", 0),
62
+ mask_char=p.get("mask_char", "*"))
63
+ elif action == "hash":
64
+ out = prim.hmac_hash(s, hmac_key)
65
+ elif action == "tokenize":
66
+ out = prim.tokenize(s, hmac_key, prefix=p.get("prefix", "tok_"))
67
+ elif action == "redact":
68
+ out = prim.redact(s, replacement=p.get("replacement", "[REDACTED]"))
69
+ elif action == "numeric_bin":
70
+ if "bin_width" not in p:
71
+ raise PolicyApplicationError(
72
+ f"column '{col}' with action 'numeric_bin' requires param 'bin_width'"
73
+ )
74
+ out = prim.numeric_bin(s, bin_width=float(p["bin_width"]))
75
+ elif action == "date_truncate":
76
+ out = prim.date_truncate(s, granularity=p.get("granularity", "month"))
77
+ elif action == "string_truncate":
78
+ out = prim.string_truncate(s, keep=int(p.get("keep", 3)),
79
+ pad_char=p.get("pad_char", "*"))
80
+ elif action == "dp_laplace":
81
+ # Applied by the DP pass, not here.
82
+ return df
83
+ else:
84
+ raise PolicyApplicationError(f"unknown action {action}")
85
+ except PolicyApplicationError:
86
+ raise
87
+ except (KeyError, TypeError, ValueError) as e:
88
+ raise PolicyApplicationError(
89
+ f"invalid params for column '{col}' action '{action}': {e}"
90
+ ) from e
91
+
92
+ return df.with_columns(out.alias(col))
@@ -0,0 +1,195 @@
1
+ """Per-column anonymization primitives.
2
+
3
+ Each function takes a Polars Series and returns a transformed Series.
4
+ Row count is preserved except for suppression (done by the k-anonymity step).
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from decimal import Decimal, ROUND_FLOOR
9
+ import hashlib
10
+ import hmac
11
+ import math
12
+ import secrets
13
+ from datetime import date, datetime
14
+
15
+ import polars as pl
16
+
17
+ # --- direct-identifier primitives --------------------------------------------
18
+
19
+ def mask(series: pl.Series, keep_prefix: int = 0, keep_suffix: int = 0,
20
+ mask_char: str = "*") -> pl.Series:
21
+ """Replace characters with mask_char, optionally keeping a prefix/suffix.
22
+
23
+ Guarantees at least one masked character when the input is non-empty.
24
+ If keep_prefix + keep_suffix >= len(s), both are scaled down
25
+ proportionally so that at least one character is masked — otherwise
26
+ a short value like "hi" with keep_prefix=5 would leak unchanged.
27
+ """
28
+ if keep_prefix < 0 or keep_suffix < 0:
29
+ raise ValueError("keep_prefix and keep_suffix must be >= 0")
30
+ if not mask_char:
31
+ raise ValueError("mask_char must be a non-empty string")
32
+
33
+ def _mask(v):
34
+ if v is None:
35
+ return None
36
+ s = str(v)
37
+ n = len(s)
38
+ if n == 0:
39
+ return s
40
+ p = keep_prefix
41
+ q = keep_suffix
42
+ # Enforce the privacy invariant: at least one character is masked.
43
+ # If the caller's prefix+suffix would leave zero masked chars, we
44
+ # shrink them proportionally (rounding down) so 1 char gets masked.
45
+ if p + q >= n:
46
+ # Scale so p + q = n - 1 (at least one char masked).
47
+ target = max(n - 1, 0)
48
+ if p + q > 0:
49
+ scale = target / (p + q)
50
+ p = int(p * scale)
51
+ q = int(q * scale)
52
+ else:
53
+ p = q = 0
54
+ p = min(p, n)
55
+ q = min(q, max(n - p, 0))
56
+ middle = mask_char * (n - p - q)
57
+ return s[:p] + middle + (s[n - q:] if q else "")
58
+ return series.map_elements(_mask, return_dtype=pl.Utf8)
59
+
60
+
61
+ def hmac_hash(series: pl.Series, key: bytes) -> pl.Series:
62
+ """HMAC-SHA256, hex-truncated to 16 chars. Keyed → resists rainbow tables."""
63
+ def _h(v):
64
+ if v is None:
65
+ return None
66
+ digest = hmac.new(key, str(v).encode("utf-8"), hashlib.sha256).hexdigest()
67
+ return digest[:16]
68
+ return series.map_elements(_h, return_dtype=pl.Utf8)
69
+
70
+
71
+ def tokenize(series: pl.Series, key: bytes, prefix: str = "tok_") -> pl.Series:
72
+ """Deterministic-within-session token. Uses HMAC to prevent rainbow tables."""
73
+ def _t(v):
74
+ if v is None:
75
+ return None
76
+ digest = hmac.new(key, str(v).encode("utf-8"), hashlib.sha256).hexdigest()
77
+ return f"{prefix}{digest[:12]}"
78
+ return series.map_elements(_t, return_dtype=pl.Utf8)
79
+
80
+
81
+ def redact(series: pl.Series, replacement: str = "[REDACTED]") -> pl.Series:
82
+ return pl.Series(series.name, [replacement if v is not None else None for v in series],
83
+ dtype=pl.Utf8)
84
+
85
+
86
+ # --- generalization primitives -----------------------------------------------
87
+
88
+ def numeric_bin(series: pl.Series, bin_width: float) -> pl.Series:
89
+ """Equal-width binning: value → [lo, lo+width)."""
90
+ if bin_width <= 0:
91
+ raise ValueError("bin_width must be > 0")
92
+ step = Decimal(str(bin_width))
93
+
94
+ def _fmt_decimal(value: Decimal) -> str:
95
+ normalized = format(value.normalize(), "f")
96
+ if "." in normalized:
97
+ normalized = normalized.rstrip("0").rstrip(".")
98
+ return normalized or "0"
99
+
100
+ def _bin(v):
101
+ if v is None:
102
+ return None
103
+ try:
104
+ fv = float(v)
105
+ except (TypeError, ValueError):
106
+ raise ValueError(
107
+ f"numeric_bin cannot convert a value in column '{series.name}' to float; "
108
+ "column must contain numeric data"
109
+ ) from None
110
+ if not math.isfinite(fv):
111
+ return None
112
+ dec_value = Decimal(str(v))
113
+ bucket = (dec_value / step).to_integral_value(rounding=ROUND_FLOOR)
114
+ lo = bucket * step
115
+ hi = lo + step
116
+ return f"[{_fmt_decimal(lo)}, {_fmt_decimal(hi)})"
117
+ return series.map_elements(_bin, return_dtype=pl.Utf8)
118
+
119
+
120
+ def date_truncate(series: pl.Series, granularity: str = "month") -> pl.Series:
121
+ """Truncate dates/datetimes to year / month / day.
122
+
123
+ Requires a Date/Datetime/Time column. For strings (e.g. a column that
124
+ Polars couldn't auto-parse because of a non-ISO format), we attempt a
125
+ best-effort parse with dateutil; if that fails for any non-null value
126
+ we raise rather than silently passing the original value through
127
+ (which would leak full-resolution dates).
128
+ """
129
+ if granularity not in ("year", "month", "day"):
130
+ raise ValueError("granularity must be year/month/day")
131
+
132
+ if series.dtype == pl.Time:
133
+ raise ValueError(
134
+ f"date_truncate does not support pl.Time columns ('{series.name}'); "
135
+ "use a Date or Datetime column"
136
+ )
137
+ if series.dtype in (pl.Date, pl.Datetime):
138
+ fmt = {"year": "%Y", "month": "%Y-%m", "day": "%F"}[granularity]
139
+ return series.dt.strftime(fmt).alias(series.name)
140
+
141
+ from dateutil import parser as _date_parser
142
+
143
+ def _t(v):
144
+ if v is None:
145
+ return None
146
+ if isinstance(v, datetime):
147
+ d = v.date()
148
+ elif isinstance(v, date):
149
+ d = v
150
+ else:
151
+ # Best-effort string parse. We intentionally raise on failure —
152
+ # silently stringifying the value would leak it.
153
+ try:
154
+ d = _date_parser.parse(str(v)).date()
155
+ except (ValueError, TypeError, OverflowError) as e:
156
+ raise ValueError(
157
+ f"date_truncate cannot parse a value in column '{series.name}' as a date "
158
+ f"(row value withheld for privacy): {e}"
159
+ ) from e
160
+ if granularity == "year":
161
+ return f"{d.year:04d}"
162
+ if granularity == "month":
163
+ return f"{d.year:04d}-{d.month:02d}"
164
+ return d.isoformat()
165
+ return series.map_elements(_t, return_dtype=pl.Utf8)
166
+
167
+
168
+ def string_truncate(series: pl.Series, keep: int = 3, pad_char: str = "*") -> pl.Series:
169
+ """Keep first `keep` chars, pad the rest (e.g., ZIP 12345 → 123**).
170
+
171
+ Guarantees at least one character is masked for any non-empty value, even
172
+ when keep >= len(s) — without this, short values like two-letter state
173
+ codes pass through completely unmasked.
174
+ """
175
+ if keep < 0:
176
+ raise ValueError("keep must be >= 0")
177
+ if len(pad_char) != 1:
178
+ raise ValueError("pad_char must be a single character")
179
+
180
+ def _t(v):
181
+ if v is None:
182
+ return None
183
+ s = str(v)
184
+ n = len(s)
185
+ if n == 0:
186
+ return s
187
+ effective_keep = min(keep, max(n - 1, 0))
188
+ return s[:effective_keep] + pad_char * (n - effective_keep)
189
+ return series.map_elements(_t, return_dtype=pl.Utf8)
190
+
191
+
192
+ # --- utility -----------------------------------------------------------------
193
+
194
+ def new_session_key() -> bytes:
195
+ return secrets.token_bytes(32)
sdsa/api/__init__.py ADDED
File without changes
sdsa/api/routes.py ADDED
@@ -0,0 +1,352 @@
1
+ """FastAPI routes: upload → process → download → delete."""
2
+ from __future__ import annotations
3
+
4
+ import io
5
+ import threading
6
+ from typing import Any
7
+
8
+ import polars as pl
9
+ from fastapi import APIRouter, HTTPException, Response, UploadFile
10
+ from fastapi.responses import JSONResponse
11
+ from pydantic import BaseModel, Field
12
+
13
+ from ..core.config import get_config
14
+ from ..core.logging import get_logger
15
+ from ..core.session import get_store
16
+ from ..detect.pii import detect_dataframe
17
+ from ..detect.schema import infer_schema
18
+ from ..ingest import ParseError, parse_upload
19
+ from ..policy_config import PolicyConfigError, build_policy_suggestions
20
+ from ..preflight import PreflightRequest, preflight_k_anonymity
21
+ from ..pipeline import PipelineError, ProcessRequest, _derive_deterministic_key, run_pipeline
22
+ from ..report import render_markdown
23
+ from ..anonymize.policy import PolicyApplicationError, apply_policy
24
+ from ..dp.laplace import LaplaceParams, apply_laplace
25
+
26
+ # A small fixed cap so preview can never leak more than a handful of rows back
27
+ # to the client and is always cheap to render. Five rows is enough to make
28
+ # transformations legible without scaring users with a wall of data.
29
+ PREVIEW_ROW_LIMIT = 5
30
+
31
+ log = get_logger("sdsa.api")
32
+ router = APIRouter(prefix="/api")
33
+
34
+ _processing_sessions: set[str] = set()
35
+ _processing_lock = threading.Lock()
36
+
37
+
38
+ class UploadResponse(BaseModel):
39
+ model_config = {"protected_namespaces": ()}
40
+ session_id: str
41
+ session_ttl_seconds: int
42
+ session_expires_at: float
43
+ default_k: int
44
+ row_count: int
45
+ column_count: int
46
+ format: str
47
+ encoding: str
48
+ parse_meta: dict
49
+ schema_: list[dict] = Field(..., serialization_alias="schema")
50
+ pii_suggestions: dict[str, dict]
51
+ policy_suggestions: dict[str, dict]
52
+ sample_columns: list[str]
53
+ sample_rows: list[list[str | None]]
54
+
55
+
56
+ @router.post("/upload", response_model=UploadResponse)
57
+ async def upload(file: UploadFile) -> UploadResponse:
58
+ cfg = get_config()
59
+ raw = await file.read(cfg.max_upload_bytes + 1)
60
+ if len(raw) > cfg.max_upload_bytes:
61
+ raise HTTPException(413, "file exceeds max upload size")
62
+
63
+ try:
64
+ result = parse_upload(file.filename or "", raw)
65
+ except ParseError as e:
66
+ raise HTTPException(400, str(e))
67
+
68
+ df = result.df
69
+ sample = df.head(cfg.sample_rows_for_detection)
70
+ schema = infer_schema(df)
71
+ pii = {k: asdict_pii(v) for k, v in detect_dataframe(sample).items()}
72
+ preview_sample = _serialize_sample(df.head(PREVIEW_ROW_LIMIT))
73
+ try:
74
+ policy_suggestions = build_policy_suggestions(schema, pii)
75
+ except PolicyConfigError as e:
76
+ raise HTTPException(400, str(e))
77
+
78
+ store = get_store()
79
+ session = store.create()
80
+ session.df = df
81
+ session.detection = {
82
+ "schema": schema,
83
+ "pii": pii,
84
+ "policy_suggestions": policy_suggestions,
85
+ }
86
+
87
+ log.info("upload_complete", extra={
88
+ "session_id": session.session_id,
89
+ "rows": df.height,
90
+ "cols": df.width,
91
+ "format": result.format,
92
+ "encoding": result.encoding,
93
+ })
94
+
95
+ return UploadResponse(
96
+ session_id=session.session_id,
97
+ session_ttl_seconds=cfg.session_ttl_seconds,
98
+ session_expires_at=session.created_at + cfg.session_ttl_seconds,
99
+ default_k=cfg.default_k,
100
+ row_count=df.height,
101
+ column_count=df.width,
102
+ format=result.format,
103
+ encoding=result.encoding,
104
+ parse_meta=result.meta,
105
+ schema_=schema,
106
+ pii_suggestions=pii,
107
+ policy_suggestions=policy_suggestions,
108
+ sample_columns=df.columns,
109
+ sample_rows=preview_sample,
110
+ )
111
+
112
+
113
+ def _stringify_cell(v: Any) -> str | None:
114
+ if v is None:
115
+ return None
116
+ # Floats with long tails clutter the preview; trim to 6 sig digits.
117
+ if isinstance(v, float):
118
+ return f"{v:.6g}"
119
+ s = str(v)
120
+ if len(s) > 80:
121
+ return s[:77] + "…"
122
+ return s
123
+
124
+
125
+ def _serialize_sample(df: pl.DataFrame) -> list[list[str | None]]:
126
+ rows: list[list[str | None]] = []
127
+ cols = df.columns
128
+ for row in df.iter_rows():
129
+ rows.append([_stringify_cell(row[i]) for i in range(len(cols))])
130
+ return rows
131
+
132
+
133
+ def asdict_pii(s) -> dict[str, Any]:
134
+ return {"kind": s.kind, "confidence": round(s.confidence, 3), "reason": s.reason}
135
+
136
+
137
+ class ProcessResponse(BaseModel):
138
+ session_id: str
139
+ report: dict
140
+ ready_for_download: bool = True
141
+
142
+
143
+ class PreflightResponse(BaseModel):
144
+ session_id: str
145
+ preflight: dict
146
+
147
+
148
+ @router.post("/process/{session_id}", response_model=ProcessResponse)
149
+ async def process(session_id: str, request: ProcessRequest) -> ProcessResponse:
150
+ with _processing_lock:
151
+ if session_id in _processing_sessions:
152
+ raise HTTPException(409, "processing already in progress for this session")
153
+ _processing_sessions.add(session_id)
154
+ try:
155
+ store = get_store()
156
+ snapshot = store.checkout(session_id)
157
+ if snapshot is None or snapshot.df is None or snapshot.hmac_key is None:
158
+ raise HTTPException(404, "session not found or expired")
159
+
160
+ detection = snapshot.detection or {"schema": [], "pii": {}}
161
+ # Best-effort clear of previous output. If the session was reaped
162
+ # between checkout and here the snapshot is still valid — proceed
163
+ # with the data we already have rather than failing spuriously.
164
+ store.clear_output(session_id)
165
+
166
+ try:
167
+ result = run_pipeline(
168
+ original=snapshot.df,
169
+ request=request,
170
+ session_id=session_id,
171
+ hmac_key=snapshot.hmac_key,
172
+ schema=detection.get("schema", []),
173
+ pii_suggestions=detection.get("pii", {}),
174
+ )
175
+ except (PipelineError, PolicyApplicationError) as e:
176
+ raise HTTPException(400, str(e))
177
+
178
+ # Serialize CSV into session bytes buffer.
179
+ buf = io.BytesIO()
180
+ result.df.write_csv(buf)
181
+ if not store.store_output(session_id, buf.getvalue(), result.report):
182
+ raise HTTPException(404, "session expired — please re-upload and reprocess")
183
+
184
+ log.info("process_complete", extra={
185
+ "session_id": session_id,
186
+ "rows_out": result.df.height,
187
+ "cols_out": result.df.width,
188
+ })
189
+
190
+ return ProcessResponse(session_id=session_id, report=result.report)
191
+ finally:
192
+ with _processing_lock:
193
+ _processing_sessions.discard(session_id)
194
+
195
+
196
+ class PreviewResponse(BaseModel):
197
+ session_id: str
198
+ columns: list[str]
199
+ original: list[list[str | None]]
200
+ sanitized: list[list[str | None]]
201
+ dropped_columns: list[str]
202
+
203
+
204
+ @router.post("/preview/{session_id}", response_model=PreviewResponse)
205
+ async def preview(session_id: str, request: ProcessRequest) -> PreviewResponse:
206
+ """Return a small before/after sample under the given policies.
207
+
208
+ Skips k-anonymity (it would suppress all rows of a tiny sample). DP noise
209
+ is applied so the user sees realistic post-noise values.
210
+ """
211
+ store = get_store()
212
+ snapshot = store.checkout(session_id)
213
+ if snapshot is None or snapshot.df is None or snapshot.hmac_key is None:
214
+ raise HTTPException(404, "session not found or expired")
215
+
216
+ head = snapshot.df.head(PREVIEW_ROW_LIMIT)
217
+ cols_in = head.columns
218
+ cfg = get_config()
219
+
220
+ # Apply same deterministic key derivation as pipeline/preflight.
221
+ hmac_key = snapshot.hmac_key
222
+ if request.deterministic_key_name:
223
+ if cfg.deployment_salt_is_ephemeral:
224
+ raise HTTPException(400, "Deterministic mode requires SDSA_DEPLOYMENT_SALT to be set.")
225
+ hmac_key = _derive_deterministic_key(request.deterministic_key_name, cfg.deployment_salt)
226
+
227
+ df = head.clone()
228
+ dp_columns = {p.column for p in request.policies if p.action == "dp_laplace"}
229
+ if request.deterministic_key_name and dp_columns:
230
+ raise HTTPException(
231
+ 400,
232
+ "Deterministic mode cannot be combined with DP columns (ADR-0008)."
233
+ )
234
+
235
+ try:
236
+ for p in request.policies:
237
+ df = apply_policy(df, p, hmac_key)
238
+
239
+ for col in dp_columns:
240
+ if col not in df.columns:
241
+ continue
242
+ params = request.dp_params.get(col) or {}
243
+ if "epsilon" not in params or "lower" not in params or "upper" not in params:
244
+ # Preview is best-effort: skip incomplete DP configs rather than
245
+ # error out — the Process step will surface the real error.
246
+ continue
247
+ try:
248
+ eps = float(params["epsilon"])
249
+ except (TypeError, ValueError):
250
+ continue
251
+ if not (cfg.epsilon_min <= eps <= cfg.epsilon_max):
252
+ raise HTTPException(
253
+ 400,
254
+ f"epsilon for '{col}' ({eps:.6g}) outside allowed range "
255
+ f"[{cfg.epsilon_min}, {cfg.epsilon_max}]",
256
+ )
257
+ try:
258
+ lp = LaplaceParams(
259
+ epsilon=eps,
260
+ lower=float(params["lower"]),
261
+ upper=float(params["upper"]),
262
+ )
263
+ except (TypeError, ValueError):
264
+ continue
265
+ if not df[col].dtype.is_numeric():
266
+ continue
267
+ try:
268
+ df = df.with_columns(apply_laplace(df[col], lp).alias(col))
269
+ except ValueError:
270
+ continue
271
+ except PolicyApplicationError as e:
272
+ raise HTTPException(400, str(e))
273
+
274
+ dropped = [c for c in cols_in if c not in df.columns]
275
+ sanitized: list[list[str | None]] = []
276
+ for i in range(head.height):
277
+ row: list[str | None] = []
278
+ for c in cols_in:
279
+ if c in df.columns:
280
+ row.append(_stringify_cell(df[c][i]))
281
+ else:
282
+ row.append(None) # dropped — frontend renders a marker
283
+ sanitized.append(row)
284
+
285
+ return PreviewResponse(
286
+ session_id=session_id,
287
+ columns=cols_in,
288
+ original=_serialize_sample(head),
289
+ sanitized=sanitized,
290
+ dropped_columns=dropped,
291
+ )
292
+
293
+
294
+ @router.post("/preflight/{session_id}", response_model=PreflightResponse)
295
+ async def preflight(session_id: str, request: PreflightRequest) -> PreflightResponse:
296
+ store = get_store()
297
+ snapshot = store.checkout(session_id)
298
+ if snapshot is None or snapshot.df is None or snapshot.hmac_key is None:
299
+ raise HTTPException(404, "session not found or expired")
300
+
301
+ try:
302
+ preview = preflight_k_anonymity(
303
+ original=snapshot.df,
304
+ request=request,
305
+ hmac_key=snapshot.hmac_key,
306
+ )
307
+ except PolicyApplicationError as e:
308
+ raise HTTPException(400, str(e))
309
+ return PreflightResponse(session_id=session_id, preflight=preview)
310
+
311
+
312
+ @router.get("/download/{session_id}/data.csv")
313
+ async def download_csv(session_id: str):
314
+ store = get_store()
315
+ snapshot = store.checkout(session_id)
316
+ if snapshot is None:
317
+ raise HTTPException(404, "session expired — please re-upload and reprocess")
318
+ if snapshot.output_bytes is None:
319
+ raise HTTPException(404, "no output for session")
320
+ headers = {"Content-Disposition": 'attachment; filename="sdsa-export.csv"'}
321
+ return Response(content=snapshot.output_bytes, media_type="text/csv", headers=headers)
322
+
323
+
324
+ @router.get("/download/{session_id}/report.json")
325
+ async def download_report_json(session_id: str):
326
+ store = get_store()
327
+ snapshot = store.checkout(session_id)
328
+ if snapshot is None:
329
+ raise HTTPException(404, "session expired — please re-upload and reprocess")
330
+ if snapshot.output_report is None:
331
+ raise HTTPException(404, "no report for session")
332
+ return JSONResponse(snapshot.output_report)
333
+
334
+
335
+ @router.get("/download/{session_id}/report.md")
336
+ async def download_report_md(session_id: str):
337
+ store = get_store()
338
+ snapshot = store.checkout(session_id)
339
+ if snapshot is None:
340
+ raise HTTPException(404, "session expired — please re-upload and reprocess")
341
+ if snapshot.output_report is None:
342
+ raise HTTPException(404, "no report for session")
343
+ md = render_markdown(snapshot.output_report)
344
+ headers = {"Content-Disposition": 'attachment; filename="sdsa-report.md"'}
345
+ return Response(content=md, media_type="text/markdown", headers=headers)
346
+
347
+
348
+ @router.delete("/session/{session_id}")
349
+ async def delete_session(session_id: str):
350
+ if not get_store().delete(session_id):
351
+ raise HTTPException(404, "session not found or already deleted")
352
+ return {"deleted": session_id}