scorepilot 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. scorepilot/__init__.py +3 -0
  2. scorepilot/api/__init__.py +1 -0
  3. scorepilot/api/datasets.py +116 -0
  4. scorepilot/api/deps.py +34 -0
  5. scorepilot/api/exploration.py +195 -0
  6. scorepilot/api/models.py +243 -0
  7. scorepilot/app.py +80 -0
  8. scorepilot/config.py +52 -0
  9. scorepilot/core/__init__.py +59 -0
  10. scorepilot/core/_pandas.py +27 -0
  11. scorepilot/core/modeling.py +135 -0
  12. scorepilot/core/pca.py +143 -0
  13. scorepilot/core/preprocessing.py +77 -0
  14. scorepilot/core/profiling.py +158 -0
  15. scorepilot/core/quality.py +187 -0
  16. scorepilot/core/schema.py +56 -0
  17. scorepilot/core/transforms.py +80 -0
  18. scorepilot/core/workset.py +181 -0
  19. scorepilot/dataset_store.py +137 -0
  20. scorepilot/db/__init__.py +19 -0
  21. scorepilot/db/models.py +58 -0
  22. scorepilot/db/repository.py +86 -0
  23. scorepilot/db/session.py +41 -0
  24. scorepilot/main.py +53 -0
  25. scorepilot/schemas.py +277 -0
  26. scorepilot/web/_app/immutable/assets/2.BqmOpLO_.css +1 -0
  27. scorepilot/web/_app/immutable/assets/3.4vnlz7rQ.css +1 -0
  28. scorepilot/web/_app/immutable/assets/4.ZtP4eHL5.css +1 -0
  29. scorepilot/web/_app/immutable/assets/5.Db_Au4kE.css +1 -0
  30. scorepilot/web/_app/immutable/assets/6.DzOtZxiW.css +1 -0
  31. scorepilot/web/_app/immutable/chunks/4R7wieXd.js +1 -0
  32. scorepilot/web/_app/immutable/chunks/BFv5X3rB.js +1 -0
  33. scorepilot/web/_app/immutable/chunks/BG11_7tn.js +1 -0
  34. scorepilot/web/_app/immutable/chunks/BvkNMq5N.js +2 -0
  35. scorepilot/web/_app/immutable/chunks/C652voOb.js +1 -0
  36. scorepilot/web/_app/immutable/chunks/CUWj1fbK.js +1 -0
  37. scorepilot/web/_app/immutable/chunks/Cxc2N8I5.js +1 -0
  38. scorepilot/web/_app/immutable/chunks/D29yIDqO.js +1 -0
  39. scorepilot/web/_app/immutable/chunks/DEBUNeTL.js +1 -0
  40. scorepilot/web/_app/immutable/chunks/DTnWt5ot.js +1 -0
  41. scorepilot/web/_app/immutable/chunks/DX4pGedH.js +2 -0
  42. scorepilot/web/_app/immutable/chunks/DagW9dn_.js +1 -0
  43. scorepilot/web/_app/immutable/chunks/DiAmW3g9.js +1 -0
  44. scorepilot/web/_app/immutable/chunks/OWCG57QL.js +1 -0
  45. scorepilot/web/_app/immutable/chunks/XL8PmYae.js +1 -0
  46. scorepilot/web/_app/immutable/chunks/i5N5regl.js +60 -0
  47. scorepilot/web/_app/immutable/entry/app.ByUizNFm.js +2 -0
  48. scorepilot/web/_app/immutable/entry/start.P518WGvt.js +1 -0
  49. scorepilot/web/_app/immutable/nodes/0.B3UdGiPr.js +1 -0
  50. scorepilot/web/_app/immutable/nodes/1.Bm29Ig6r.js +1 -0
  51. scorepilot/web/_app/immutable/nodes/2.DXWavgtc.js +1 -0
  52. scorepilot/web/_app/immutable/nodes/3.BFvNjpq_.js +3 -0
  53. scorepilot/web/_app/immutable/nodes/4.BAIJftHt.js +1 -0
  54. scorepilot/web/_app/immutable/nodes/5.CZMyJzy8.js +2 -0
  55. scorepilot/web/_app/immutable/nodes/6.VxIOLuZA.js +1 -0
  56. scorepilot/web/_app/version.json +1 -0
  57. scorepilot/web/index.html +41 -0
  58. scorepilot-0.1.2.dist-info/METADATA +173 -0
  59. scorepilot-0.1.2.dist-info/RECORD +62 -0
  60. scorepilot-0.1.2.dist-info/WHEEL +4 -0
  61. scorepilot-0.1.2.dist-info/entry_points.txt +2 -0
  62. scorepilot-0.1.2.dist-info/licenses/LICENSE +21 -0
scorepilot/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """ScorePilot: a web-based tool for PCA/PLS model analysis in chemometrics."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ """FastAPI routers: a thin layer translating HTTP to core/db."""
@@ -0,0 +1,116 @@
1
+ """Dataset import and metadata endpoints.
2
+
3
+ Uploaded datasets live in an in-memory store keyed by a generated id. Import keeps
4
+ every column (identifiers and qualitative columns included) and infers each
5
+ column's data type. Roles like X/Y and exclusions are *not* set here - those are
6
+ modelling choices captured in a preprocessing spec.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Annotated
12
+
13
+ from fastapi import APIRouter, File, HTTPException, Query, UploadFile, status
14
+
15
+ from scorepilot.api.deps import DatasetStoreDep
16
+ from scorepilot.core import IdentifierRole
17
+ from scorepilot.dataset_store import Dataset, load_table
18
+ from scorepilot.schemas import ColumnMetaModel, ColumnUpdate, DatasetDetail
19
+
20
+ router = APIRouter(prefix="/datasets", tags=["datasets"])
21
+
22
+
23
+ def to_detail(dataset: Dataset) -> DatasetDetail:
24
+ """Build the API detail model for a dataset."""
25
+ return DatasetDetail(
26
+ dataset_id=dataset.id,
27
+ name=dataset.name,
28
+ source=dataset.source,
29
+ sheet=dataset.sheet,
30
+ sheets=dataset.sheets,
31
+ n_rows=dataset.n_rows,
32
+ n_columns=dataset.n_columns,
33
+ primary_id=dataset.primary_id,
34
+ columns=[
35
+ ColumnMetaModel(
36
+ name=c.name,
37
+ column_type=c.column_type,
38
+ identifier_role=c.identifier_role,
39
+ )
40
+ for c in dataset.columns
41
+ ],
42
+ )
43
+
44
+
45
+ @router.post("", response_model=DatasetDetail, status_code=status.HTTP_201_CREATED)
46
+ async def upload_dataset(
47
+ store: DatasetStoreDep,
48
+ file: Annotated[UploadFile, File(description="A CSV or Excel file")],
49
+ sheet: Annotated[str | None, Query(description="Excel sheet name")] = None,
50
+ ) -> DatasetDetail:
51
+ """Import a CSV or Excel file as a dataset."""
52
+ raw = await file.read()
53
+ filename = file.filename or "dataset.csv"
54
+ try:
55
+ frame, source, sheets, used = load_table(raw, filename, sheet)
56
+ except Exception as exc:
57
+ raise HTTPException(
58
+ status_code=status.HTTP_400_BAD_REQUEST,
59
+ detail=f"Could not parse file: {exc}",
60
+ ) from exc
61
+
62
+ if frame.shape[1] == 0:
63
+ raise HTTPException(
64
+ status_code=status.HTTP_400_BAD_REQUEST,
65
+ detail="No columns found in the uploaded file.",
66
+ )
67
+
68
+ dataset = store.add(filename, frame, source=source, sheets=sheets, sheet=used)
69
+ return to_detail(dataset)
70
+
71
+
72
+ @router.get("", response_model=list[DatasetDetail])
73
+ def list_datasets(store: DatasetStoreDep) -> list[DatasetDetail]:
74
+ """List all imported datasets."""
75
+ return [to_detail(d) for d in store.list()]
76
+
77
+
78
+ @router.get("/{dataset_id}", response_model=DatasetDetail)
79
+ def get_dataset(dataset_id: str, store: DatasetStoreDep) -> DatasetDetail:
80
+ """Return one dataset's metadata."""
81
+ return to_detail(_require(store, dataset_id))
82
+
83
+
84
+ @router.patch("/{dataset_id}/columns/{column}", response_model=DatasetDetail)
85
+ def update_column(
86
+ dataset_id: str, column: str, update: ColumnUpdate, store: DatasetStoreDep
87
+ ) -> DatasetDetail:
88
+ """Update a column's data type or identifier role."""
89
+ dataset = _require(store, dataset_id)
90
+ meta = dataset.column(column)
91
+ if meta is None:
92
+ raise HTTPException(
93
+ status_code=status.HTTP_404_NOT_FOUND,
94
+ detail=f"Unknown column: {column}",
95
+ )
96
+
97
+ if update.column_type is not None:
98
+ meta.column_type = update.column_type
99
+ if update.identifier_role is not None:
100
+ if update.identifier_role is IdentifierRole.PRIMARY:
101
+ for other in dataset.columns:
102
+ if other.identifier_role is IdentifierRole.PRIMARY:
103
+ other.identifier_role = IdentifierRole.NONE
104
+ meta.identifier_role = update.identifier_role
105
+
106
+ return to_detail(dataset)
107
+
108
+
109
+ def _require(store: DatasetStoreDep, dataset_id: str) -> Dataset:
110
+ dataset = store.get(dataset_id)
111
+ if dataset is None:
112
+ raise HTTPException(
113
+ status_code=status.HTTP_404_NOT_FOUND,
114
+ detail=f"Unknown dataset_id: {dataset_id}",
115
+ )
116
+ return dataset
scorepilot/api/deps.py ADDED
@@ -0,0 +1,34 @@
1
+ """FastAPI dependencies wiring requests to the dataset store and repository."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Iterator
6
+ from typing import Annotated
7
+
8
+ from fastapi import Depends, Request
9
+ from sqlalchemy.orm import Session
10
+
11
+ from scorepilot.dataset_store import DatasetStore
12
+ from scorepilot.db import SqlModelRepository, session_scope
13
+
14
+
15
+ def get_dataset_store(request: Request) -> DatasetStore:
16
+ """Return the process-wide in-memory dataset store."""
17
+ return request.app.state.dataset_store
18
+
19
+
20
+ def get_session(request: Request) -> Iterator[Session]:
21
+ """Yield a transactional session, committed when the request succeeds."""
22
+ with session_scope(request.app.state.session_factory) as session:
23
+ yield session
24
+
25
+
26
+ def get_repository(
27
+ session: Annotated[Session, Depends(get_session)],
28
+ ) -> SqlModelRepository:
29
+ """Return a repository bound to the request's session."""
30
+ return SqlModelRepository(session)
31
+
32
+
33
+ DatasetStoreDep = Annotated[DatasetStore, Depends(get_dataset_store)]
34
+ RepositoryDep = Annotated[SqlModelRepository, Depends(get_repository)]
@@ -0,0 +1,195 @@
1
+ """Exploration endpoints: data-quality, grid windows, and the variable inspector.
2
+
3
+ These are read-only views over an immutable dataset. The transform shown in the
4
+ inspector is a non-destructive *preview*; it never changes the stored data.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Annotated
10
+
11
+ import pandas as pd
12
+ from fastapi import APIRouter, HTTPException, Query, status
13
+
14
+ from scorepilot.api.deps import DatasetStoreDep
15
+ from scorepilot.core import (
16
+ PreprocessingSpec,
17
+ TransformKind,
18
+ apply_spec,
19
+ apply_transform,
20
+ histogram,
21
+ quality_report,
22
+ sequence,
23
+ suggest_transform,
24
+ variable_summary,
25
+ )
26
+ from scorepilot.core._pandas import column as get_column
27
+ from scorepilot.dataset_store import Dataset
28
+ from scorepilot.schemas import (
29
+ ColumnQualityModel,
30
+ DuplicateIdentifierModel,
31
+ GridWindow,
32
+ ObservationQualityModel,
33
+ QualityReportModel,
34
+ VariableInspector,
35
+ )
36
+
37
+ router = APIRouter(prefix="/datasets", tags=["exploration"])
38
+
39
+
40
+ def _require(store: DatasetStoreDep, dataset_id: str) -> Dataset:
41
+ dataset = store.get(dataset_id)
42
+ if dataset is None:
43
+ raise HTTPException(
44
+ status_code=status.HTTP_404_NOT_FOUND,
45
+ detail=f"Unknown dataset_id: {dataset_id}",
46
+ )
47
+ return dataset
48
+
49
+
50
+ def _format_cell(value: object) -> str | None:
51
+ if pd.isna(value): # type: ignore[arg-type]
52
+ return None
53
+ if isinstance(value, float):
54
+ return f"{value:.6g}"
55
+ return str(value)
56
+
57
+
58
+ @router.get("/{dataset_id}/quality", response_model=QualityReportModel)
59
+ def get_quality(dataset_id: str, store: DatasetStoreDep) -> QualityReportModel:
60
+ """Return the data-quality report for a dataset."""
61
+ dataset = _require(store, dataset_id)
62
+ report = quality_report(dataset.raw, types=dataset.types(), primary_id=dataset.primary_id)
63
+ return QualityReportModel(
64
+ n_rows=report.n_rows,
65
+ n_columns=report.n_columns,
66
+ n_missing_cells=report.n_missing_cells,
67
+ pct_missing=report.pct_missing,
68
+ primary_id_unique=report.primary_id_unique,
69
+ duplicate_primary_ids=[
70
+ DuplicateIdentifierModel(value=d.value, rows=d.rows)
71
+ for d in report.duplicate_primary_ids
72
+ ],
73
+ columns=[
74
+ ColumnQualityModel(
75
+ name=c.name,
76
+ n_missing=c.n_missing,
77
+ pct_missing=c.pct_missing,
78
+ n_invalid=c.n_invalid,
79
+ invalid_rows=c.invalid_rows,
80
+ exceeds_tolerance=c.exceeds_tolerance,
81
+ )
82
+ for c in report.columns
83
+ ],
84
+ observations_exceeding=[
85
+ ObservationQualityModel(
86
+ index=o.index,
87
+ identifier=o.identifier,
88
+ n_missing=o.n_missing,
89
+ pct_missing=o.pct_missing,
90
+ )
91
+ for o in report.observations_exceeding
92
+ ],
93
+ )
94
+
95
+
96
+ @router.get("/{dataset_id}/grid", response_model=GridWindow)
97
+ def get_grid(
98
+ dataset_id: str,
99
+ store: DatasetStoreDep,
100
+ row_offset: Annotated[int, Query(ge=0)] = 0,
101
+ row_limit: Annotated[int, Query(ge=1, le=1000)] = 100,
102
+ col_offset: Annotated[int, Query(ge=0)] = 0,
103
+ col_limit: Annotated[int, Query(ge=1, le=200)] = 50,
104
+ form: Annotated[str, Query(pattern="^(raw|scaled)$")] = "raw",
105
+ ) -> GridWindow:
106
+ """Return a windowed block of cells for the grid, raw or autoscaled."""
107
+ dataset = _require(store, dataset_id)
108
+ display = _display_frame(dataset, form)
109
+
110
+ all_columns = [str(c) for c in display.columns]
111
+ column_names = all_columns[col_offset : col_offset + col_limit]
112
+ window = display.iloc[row_offset : row_offset + row_limit][column_names]
113
+
114
+ row_identifiers = _row_identifiers(dataset, row_offset, len(window))
115
+ cells = [
116
+ [_format_cell(window.iat[r, c]) for c in range(window.shape[1])]
117
+ for r in range(window.shape[0])
118
+ ]
119
+ return GridWindow(
120
+ row_offset=row_offset,
121
+ column_names=column_names,
122
+ row_identifiers=row_identifiers,
123
+ cells=cells,
124
+ )
125
+
126
+
127
+ def _display_frame(dataset: Dataset, form: str) -> pd.DataFrame:
128
+ if form != "scaled":
129
+ return dataset.raw
130
+ quantitative = dataset.quantitative_columns()
131
+ if not quantitative:
132
+ return dataset.raw
133
+ # Scale using full-column statistics so the view is stable across scrolling.
134
+ scaled = apply_spec(dataset.raw, PreprocessingSpec(x_columns=tuple(quantitative))).X
135
+ display = dataset.raw.copy()
136
+ for name in quantitative:
137
+ display[name] = scaled[name]
138
+ return display
139
+
140
+
141
+ def _row_identifiers(dataset: Dataset, row_offset: int, count: int) -> list[str | None]:
142
+ if dataset.primary_id is not None:
143
+ ids = get_column(dataset.raw, dataset.primary_id).iloc[row_offset : row_offset + count]
144
+ return [_format_cell(v) for v in ids]
145
+ return [str(row_offset + i) for i in range(count)]
146
+
147
+
148
+ @router.get("/{dataset_id}/variables/{column}", response_model=VariableInspector)
149
+ def inspect_variable(
150
+ dataset_id: str,
151
+ column: str,
152
+ store: DatasetStoreDep,
153
+ transform: Annotated[TransformKind, Query()] = TransformKind.NONE,
154
+ c1: Annotated[float, Query()] = 0.0,
155
+ c2: Annotated[float, Query()] = 1.0,
156
+ ) -> VariableInspector:
157
+ """Return summary, histogram, and sequence for a variable (with optional preview)."""
158
+ dataset = _require(store, dataset_id)
159
+ meta = dataset.column(column)
160
+ if meta is None:
161
+ raise HTTPException(
162
+ status_code=status.HTTP_404_NOT_FOUND,
163
+ detail=f"Unknown column: {column}",
164
+ )
165
+
166
+ series = get_column(dataset.raw, column)
167
+ suggested = suggest_transform(variable_summary(series, column_type=meta.column_type))
168
+
169
+ display = series
170
+ if transform is not TransformKind.NONE:
171
+ display = apply_transform(series, transform, c1=c1, c2=c2)
172
+ summary = variable_summary(display, column_type=meta.column_type)
173
+ counts, edges = histogram(display)
174
+
175
+ return VariableInspector(
176
+ name=meta.name,
177
+ column_type=meta.column_type,
178
+ n=summary.n,
179
+ n_missing=summary.n_missing,
180
+ pct_missing=summary.pct_missing,
181
+ n_unique=summary.n_unique,
182
+ mean=summary.mean,
183
+ std=summary.std,
184
+ minimum=summary.minimum,
185
+ maximum=summary.maximum,
186
+ median=summary.median,
187
+ q25=summary.q25,
188
+ q75=summary.q75,
189
+ skewness=summary.skewness,
190
+ min_max_ratio=summary.min_max_ratio,
191
+ suggested_transform=suggested,
192
+ histogram_counts=counts,
193
+ histogram_edges=edges,
194
+ sequence=sequence(display),
195
+ )
@@ -0,0 +1,243 @@
1
+ """Model-fitting endpoints, the Hangar (list), and the Logbook (detail)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+
7
+ import numpy as np
8
+ from fastapi import APIRouter, HTTPException, status
9
+
10
+ from scorepilot.api.deps import DatasetStoreDep, RepositoryDep
11
+ from scorepilot.core import (
12
+ ModelDiagnostics,
13
+ PreprocessingSpec,
14
+ apply_spec,
15
+ fit_model,
16
+ )
17
+ from scorepilot.dataset_store import Dataset
18
+ from scorepilot.db import Model
19
+ from scorepilot.schemas import (
20
+ FitModelRequest,
21
+ FitPCARequest,
22
+ LoadingsPayload,
23
+ ModelDetail,
24
+ ModelDiagnosticsModel,
25
+ ModelSummary,
26
+ PCAFitResponse,
27
+ ScoresPayload,
28
+ )
29
+
30
+ router = APIRouter(prefix="/models", tags=["models"])
31
+
32
+
33
+ # --- helpers ----------------------------------------------------------------
34
+
35
+
36
+ def _pack_params(diag: ModelDiagnostics) -> bytes:
37
+ buffer = io.BytesIO()
38
+ np.savez_compressed(
39
+ buffer,
40
+ x_loadings=diag.x_loadings.to_numpy(),
41
+ explained_variance=diag.explained_variance,
42
+ r2_cumulative=np.asarray(diag.r2_cumulative),
43
+ )
44
+ return buffer.getvalue()
45
+
46
+
47
+ def _default_spec(dataset: Dataset, spec: PreprocessingSpec | None) -> PreprocessingSpec:
48
+ if spec is not None:
49
+ return spec
50
+ return PreprocessingSpec(x_columns=tuple(dataset.quantitative_columns()))
51
+
52
+
53
+ def _run_fit(
54
+ dataset: Dataset, spec: PreprocessingSpec, kind: str, n_components: int, conf_level: float
55
+ ) -> ModelDiagnostics:
56
+ applied = apply_spec(dataset.raw, spec)
57
+ return fit_model(applied.X, applied.Y, kind, n_components, conf_level=conf_level)
58
+
59
+
60
+ def _summary(model: Model) -> ModelSummary:
61
+ return ModelSummary(
62
+ id=model.id,
63
+ kind=model.kind,
64
+ name=model.name,
65
+ n_components=model.n_components,
66
+ parent_id=model.parent_id,
67
+ dataset_id=model.dataset_id,
68
+ created_at=model.created_at,
69
+ )
70
+
71
+
72
+ def _diagnostics_model(diag: ModelDiagnostics) -> ModelDiagnosticsModel:
73
+ y_loadings = None
74
+ if diag.y_loadings is not None:
75
+ y_loadings = LoadingsPayload(
76
+ component_names=diag.component_names,
77
+ variable_names=diag.y_variable_names,
78
+ data=diag.y_loadings.to_numpy().tolist(),
79
+ )
80
+ return ModelDiagnosticsModel(
81
+ kind=diag.kind,
82
+ n_components=diag.n_components,
83
+ conf_level=diag.conf_level,
84
+ component_names=diag.component_names,
85
+ explained_variance=diag.explained_variance.tolist(),
86
+ r2_per_component=diag.r2_per_component,
87
+ r2_cumulative=diag.r2_cumulative,
88
+ scores=ScoresPayload(
89
+ component_names=diag.component_names,
90
+ observation_names=diag.observation_names,
91
+ data=diag.scores.to_numpy().tolist(),
92
+ ),
93
+ x_loadings=LoadingsPayload(
94
+ component_names=diag.component_names,
95
+ variable_names=diag.x_variable_names,
96
+ data=diag.x_loadings.to_numpy().tolist(),
97
+ ),
98
+ y_loadings=y_loadings,
99
+ hotellings_t2=diag.hotellings_t2.to_numpy().tolist(),
100
+ spe=diag.spe.to_numpy().tolist(),
101
+ t2_limit=diag.t2_limit,
102
+ spe_limit=diag.spe_limit,
103
+ ellipse_x=diag.ellipse_x,
104
+ ellipse_y=diag.ellipse_y,
105
+ vip=diag.vip,
106
+ )
107
+
108
+
109
+ def _require_dataset(store: DatasetStoreDep, dataset_id: str) -> Dataset:
110
+ dataset = store.get(dataset_id)
111
+ if dataset is None:
112
+ raise HTTPException(
113
+ status_code=status.HTTP_404_NOT_FOUND,
114
+ detail=f"Unknown dataset_id: {dataset_id}",
115
+ )
116
+ return dataset
117
+
118
+
119
+ # --- endpoints --------------------------------------------------------------
120
+
121
+
122
+ @router.post("", response_model=ModelDetail, status_code=status.HTTP_201_CREATED)
123
+ def fit_model_endpoint(
124
+ request: FitModelRequest, store: DatasetStoreDep, repository: RepositoryDep
125
+ ) -> ModelDetail:
126
+ """Fit a PCA/PLS model variant from a dataset and a spec, and persist it."""
127
+ dataset = _require_dataset(store, request.dataset_id)
128
+ if request.parent_id is not None and repository.get(request.parent_id) is None:
129
+ raise HTTPException(
130
+ status_code=status.HTTP_404_NOT_FOUND,
131
+ detail=f"Unknown parent_id: {request.parent_id}",
132
+ )
133
+
134
+ spec = _default_spec(dataset, request.spec.to_core() if request.spec else None)
135
+ try:
136
+ diag = _run_fit(dataset, spec, request.kind, request.n_components, request.conf_level)
137
+ except ValueError as exc:
138
+ raise HTTPException(
139
+ status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, detail=str(exc)
140
+ ) from exc
141
+
142
+ model = repository.add(
143
+ Model(
144
+ kind=request.kind,
145
+ name=request.name,
146
+ dataset_id=dataset.id,
147
+ n_components=diag.n_components,
148
+ preprocessing=spec.to_dict(),
149
+ excluded_samples=list(spec.excluded_rows),
150
+ params=_pack_params(diag),
151
+ parent_id=request.parent_id,
152
+ )
153
+ )
154
+ return ModelDetail(
155
+ summary=_summary(model),
156
+ preprocessing=spec.to_dict(),
157
+ excluded_samples=list(spec.excluded_rows),
158
+ lineage=[_summary(m) for m in repository.lineage(model.id)],
159
+ diagnostics=_diagnostics_model(diag),
160
+ )
161
+
162
+
163
+ @router.get("", response_model=list[ModelSummary])
164
+ def list_models(repository: RepositoryDep) -> list[ModelSummary]:
165
+ """List all model variants (the Hangar)."""
166
+ return [_summary(m) for m in repository.list()]
167
+
168
+
169
+ @router.get("/{model_id}", response_model=ModelDetail)
170
+ def get_model(model_id: int, store: DatasetStoreDep, repository: RepositoryDep) -> ModelDetail:
171
+ """Return a model's Logbook: metadata, recipe, lineage, and diagnostics.
172
+
173
+ Diagnostics are recomputed from the source dataset and stored spec. If the
174
+ dataset is no longer in memory, the entry is returned without diagnostics.
175
+ """
176
+ model = repository.get(model_id)
177
+ if model is None:
178
+ raise HTTPException(
179
+ status_code=status.HTTP_404_NOT_FOUND, detail=f"Unknown model id: {model_id}"
180
+ )
181
+
182
+ diagnostics = None
183
+ dataset = store.get(model.dataset_id) if model.dataset_id else None
184
+ if dataset is not None:
185
+ spec = PreprocessingSpec.from_dict(model.preprocessing)
186
+ try:
187
+ diag = _run_fit(dataset, spec, model.kind, model.n_components, 0.95)
188
+ diagnostics = _diagnostics_model(diag)
189
+ except ValueError:
190
+ diagnostics = None
191
+
192
+ return ModelDetail(
193
+ summary=_summary(model),
194
+ preprocessing=dict(model.preprocessing),
195
+ excluded_samples=list(model.excluded_samples),
196
+ lineage=[_summary(m) for m in repository.lineage(model.id)],
197
+ diagnostics=diagnostics,
198
+ )
199
+
200
+
201
+ @router.post("/pca", response_model=PCAFitResponse, status_code=status.HTTP_201_CREATED)
202
+ def fit_pca_model(
203
+ request: FitPCARequest, store: DatasetStoreDep, repository: RepositoryDep
204
+ ) -> PCAFitResponse:
205
+ """Fit a PCA model (legacy endpoint used by the scores playground)."""
206
+ dataset = _require_dataset(store, request.dataset_id)
207
+ spec = _default_spec(dataset, request.spec.to_core() if request.spec else None)
208
+ try:
209
+ diag = _run_fit(dataset, spec, "PCA", request.n_components, request.conf_level)
210
+ except ValueError as exc:
211
+ raise HTTPException(
212
+ status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, detail=str(exc)
213
+ ) from exc
214
+
215
+ model = repository.add(
216
+ Model(
217
+ kind="PCA",
218
+ name=request.name,
219
+ dataset_id=dataset.id,
220
+ n_components=diag.n_components,
221
+ preprocessing=spec.to_dict(),
222
+ excluded_samples=list(spec.excluded_rows),
223
+ params=_pack_params(diag),
224
+ )
225
+ )
226
+ return PCAFitResponse(
227
+ model_id=model.id,
228
+ kind="PCA",
229
+ n_components=diag.n_components,
230
+ conf_level=diag.conf_level,
231
+ component_names=diag.component_names,
232
+ explained_variance=diag.explained_variance.tolist(),
233
+ r2_cumulative=diag.r2_cumulative,
234
+ scores=ScoresPayload(
235
+ component_names=diag.component_names,
236
+ observation_names=diag.observation_names,
237
+ data=diag.scores.to_numpy().tolist(),
238
+ ),
239
+ hotellings_t2=diag.hotellings_t2.to_numpy().tolist(),
240
+ spe=diag.spe.to_numpy().tolist(),
241
+ t2_limit=diag.t2_limit,
242
+ spe_limit=diag.spe_limit,
243
+ )
scorepilot/app.py ADDED
@@ -0,0 +1,80 @@
1
+ """FastAPI application factory.
2
+
3
+ The packaged app is a single process: the API is served under ``/api`` and the
4
+ built static frontend at ``/``. A small ``StaticFiles`` subclass falls back to
5
+ ``index.html`` so client-side (SPA) routes resolve.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from pathlib import Path
11
+
12
+ from fastapi import FastAPI
13
+ from fastapi.staticfiles import StaticFiles
14
+ from starlette.exceptions import HTTPException as StarletteHTTPException
15
+ from starlette.types import Scope
16
+
17
+ from scorepilot.api import datasets, exploration, models
18
+ from scorepilot.config import Settings, get_settings
19
+ from scorepilot.dataset_store import DatasetStore
20
+ from scorepilot.db import Base, make_engine, make_session_factory
21
+
22
+ WEB_DIR = Path(__file__).resolve().parent / "web"
23
+
24
+
25
+ class SpaStaticFiles(StaticFiles):
26
+ """Static files that fall back to ``index.html`` on a 404.
27
+
28
+ This lets the single-page app own client-side routing while non-existent
29
+ asset paths still return a real 404.
30
+ """
31
+
32
+ async def get_response(self, path: str, scope: Scope): # noqa: ANN201
33
+ try:
34
+ return await super().get_response(path, scope)
35
+ except StarletteHTTPException as exc:
36
+ if exc.status_code == 404:
37
+ return await super().get_response("index.html", scope)
38
+ raise
39
+
40
+
41
+ def create_app(settings: Settings | None = None) -> FastAPI:
42
+ """Build and configure the FastAPI application.
43
+
44
+ Parameters
45
+ ----------
46
+ settings
47
+ Optional settings override (used by tests). Defaults to the cached
48
+ environment-derived settings.
49
+ """
50
+ settings = settings or get_settings()
51
+
52
+ engine = make_engine(settings.database_url)
53
+ # Convenience for local/dev use: ensure tables exist. Production schema
54
+ # changes are managed with Alembic.
55
+ Base.metadata.create_all(engine)
56
+
57
+ app = FastAPI(
58
+ title="ScorePilot",
59
+ version="0.1.0",
60
+ docs_url="/api/docs",
61
+ redoc_url=None,
62
+ openapi_url="/api/openapi.json",
63
+ )
64
+ app.state.settings = settings
65
+ app.state.engine = engine
66
+ app.state.session_factory = make_session_factory(engine)
67
+ app.state.dataset_store = DatasetStore()
68
+
69
+ @app.get("/api/health", tags=["meta"])
70
+ def health() -> dict[str, str]:
71
+ return {"status": "ok"}
72
+
73
+ app.include_router(datasets.router, prefix="/api")
74
+ app.include_router(exploration.router, prefix="/api")
75
+ app.include_router(models.router, prefix="/api")
76
+
77
+ # Mount the static SPA last so it never shadows the API routes above.
78
+ app.mount("/", SpaStaticFiles(directory=WEB_DIR, html=True), name="web")
79
+
80
+ return app