@zigrivers/scaffold 3.22.0 → 3.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -7
- package/content/knowledge/data-science/README.md +23 -0
- package/content/knowledge/data-science/data-science-architecture.md +163 -0
- package/content/knowledge/data-science/data-science-conventions.md +233 -0
- package/content/knowledge/data-science/data-science-data-versioning.md +198 -0
- package/content/knowledge/data-science/data-science-dev-environment.md +159 -0
- package/content/knowledge/data-science/data-science-experiment-tracking.md +194 -0
- package/content/knowledge/data-science/data-science-model-evaluation.md +160 -0
- package/content/knowledge/data-science/data-science-notebook-discipline.md +170 -0
- package/content/knowledge/data-science/data-science-observability.md +161 -0
- package/content/knowledge/data-science/data-science-project-structure.md +178 -0
- package/content/knowledge/data-science/data-science-reproducibility.md +164 -0
- package/content/knowledge/data-science/data-science-requirements.md +151 -0
- package/content/knowledge/data-science/data-science-security.md +151 -0
- package/content/knowledge/data-science/data-science-testing.md +183 -0
- package/content/knowledge/ml/README.md +10 -0
- package/content/methodology/data-science-overlay.yml +39 -0
- package/dist/config/schema.d.ts +672 -126
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +8 -0
- package/dist/config/schema.js.map +1 -1
- package/dist/config/schema.test.js +2 -2
- package/dist/config/schema.test.js.map +1 -1
- package/dist/config/validators/data-science.d.ts +4 -0
- package/dist/config/validators/data-science.d.ts.map +1 -0
- package/dist/config/validators/data-science.js +15 -0
- package/dist/config/validators/data-science.js.map +1 -0
- package/dist/config/validators/index.d.ts.map +1 -1
- package/dist/config/validators/index.js +2 -0
- package/dist/config/validators/index.js.map +1 -1
- package/dist/core/assembly/knowledge-loader.d.ts.map +1 -1
- package/dist/core/assembly/knowledge-loader.js +6 -0
- package/dist/core/assembly/knowledge-loader.js.map +1 -1
- package/dist/core/assembly/knowledge-loader.test.js +34 -0
- package/dist/core/assembly/knowledge-loader.test.js.map +1 -1
- package/dist/e2e/project-type-overlays.test.js +73 -0
- package/dist/e2e/project-type-overlays.test.js.map +1 -1
- package/dist/project/adopt.d.ts.map +1 -1
- package/dist/project/adopt.js +3 -1
- package/dist/project/adopt.js.map +1 -1
- package/dist/project/detectors/coverage.test.d.ts +2 -0
- package/dist/project/detectors/coverage.test.d.ts.map +1 -0
- package/dist/project/detectors/coverage.test.js +78 -0
- package/dist/project/detectors/coverage.test.js.map +1 -0
- package/dist/project/detectors/data-science.d.ts +4 -0
- package/dist/project/detectors/data-science.d.ts.map +1 -0
- package/dist/project/detectors/data-science.js +32 -0
- package/dist/project/detectors/data-science.js.map +1 -0
- package/dist/project/detectors/data-science.test.d.ts +2 -0
- package/dist/project/detectors/data-science.test.d.ts.map +1 -0
- package/dist/project/detectors/data-science.test.js +62 -0
- package/dist/project/detectors/data-science.test.js.map +1 -0
- package/dist/project/detectors/disambiguate.d.ts +2 -0
- package/dist/project/detectors/disambiguate.d.ts.map +1 -1
- package/dist/project/detectors/disambiguate.js +3 -2
- package/dist/project/detectors/disambiguate.js.map +1 -1
- package/dist/project/detectors/disambiguate.test.js +10 -1
- package/dist/project/detectors/disambiguate.test.js.map +1 -1
- package/dist/project/detectors/index.d.ts.map +1 -1
- package/dist/project/detectors/index.js +2 -0
- package/dist/project/detectors/index.js.map +1 -1
- package/dist/project/detectors/library.d.ts.map +1 -1
- package/dist/project/detectors/library.js +1 -0
- package/dist/project/detectors/library.js.map +1 -1
- package/dist/project/detectors/resolve-detection.test.js +31 -0
- package/dist/project/detectors/resolve-detection.test.js.map +1 -1
- package/dist/project/detectors/types.d.ts +6 -2
- package/dist/project/detectors/types.d.ts.map +1 -1
- package/dist/project/detectors/types.js.map +1 -1
- package/dist/types/config.d.ts +8 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/wizard/copy/core.d.ts.map +1 -1
- package/dist/wizard/copy/core.js +4 -0
- package/dist/wizard/copy/core.js.map +1 -1
- package/dist/wizard/copy/data-science.d.ts +3 -0
- package/dist/wizard/copy/data-science.d.ts.map +1 -0
- package/dist/wizard/copy/data-science.js +15 -0
- package/dist/wizard/copy/data-science.js.map +1 -0
- package/dist/wizard/copy/index.d.ts.map +1 -1
- package/dist/wizard/copy/index.js +2 -0
- package/dist/wizard/copy/index.js.map +1 -1
- package/dist/wizard/copy/types.d.ts +5 -1
- package/dist/wizard/copy/types.d.ts.map +1 -1
- package/dist/wizard/copy/types.test-d.js +7 -0
- package/dist/wizard/copy/types.test-d.js.map +1 -1
- package/dist/wizard/questions.d.ts +2 -1
- package/dist/wizard/questions.d.ts.map +1 -1
- package/dist/wizard/questions.js +9 -1
- package/dist/wizard/questions.js.map +1 -1
- package/dist/wizard/questions.test.js +14 -0
- package/dist/wizard/questions.test.js.map +1 -1
- package/dist/wizard/wizard.d.ts.map +1 -1
- package/dist/wizard/wizard.js +1 -0
- package/dist/wizard/wizard.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: data-science-testing
|
|
3
|
+
description: Testing strategy for solo DS code — pytest for pure functions, pandera for DataFrame schemas at test time and at ingest boundaries, and committed CSV fixtures for deterministic tests
|
|
4
|
+
topics: [data-science, testing, pytest, pandera]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
Data-science code rots quietly. A notebook cell that worked on Tuesday's snapshot silently breaks on Friday's because an upstream column was renamed, a dtype shifted from `int64` to `float64`, or a categorical grew a new level nobody tested for. Refactors that move feature logic out of a notebook into `src/` routinely regress because there was no test pinning the old behavior. Tests catch these failures at the line that introduced them instead of at the end of a three-hour pipeline run.
|
|
8
|
+
|
|
9
|
+
## Summary
|
|
10
|
+
|
|
11
|
+
Treat DS testing as three separate layers with distinct tools. Use `pytest` for pure-function unit tests — feature engineering, metric calculations, preprocessing helpers in `src/`. Use `pandera` for DataFrame-level contracts: schemas assert column names, dtypes, value ranges, and non-null expectations, and those same schemas run both in tests and at runtime at ingest boundaries. Use committed CSV fixtures in `tests/fixtures/` loaded through pytest fixtures for deterministic, reviewable test data. Keep this doc's scope to CODE correctness; model quality (AUC, calibration, drift) belongs in `data-science-model-evaluation.md`.
|
|
12
|
+
|
|
13
|
+
## Deep Guidance
|
|
14
|
+
|
|
15
|
+
### Unit tests with pytest
|
|
16
|
+
|
|
17
|
+
Every helper in `src/` that transforms data is a pure function candidate for `pytest`. Arrange small inputs, act by calling the function, assert on the output. If a helper reaches for a database or filesystem, push that I/O out to the caller so the core logic stays testable without mocks.
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
# tests/test_features.py
|
|
21
|
+
import numpy as np
|
|
22
|
+
import pandas as pd
|
|
23
|
+
import pytest
|
|
24
|
+
from src.features import impute_missing_ages
|
|
25
|
+
|
|
26
|
+
class TestImputeMissingAges:
|
|
27
|
+
def test_fills_nan_with_median(self):
|
|
28
|
+
df = pd.DataFrame({"age": [10.0, 20.0, 30.0, np.nan]})
|
|
29
|
+
result = impute_missing_ages(df)
|
|
30
|
+
assert result["age"].isna().sum() == 0
|
|
31
|
+
assert result.loc[3, "age"] == 20.0 # median of [10, 20, 30]
|
|
32
|
+
|
|
33
|
+
def test_preserves_non_null_values(self):
|
|
34
|
+
df = pd.DataFrame({"age": [10.0, 20.0, 30.0, np.nan]})
|
|
35
|
+
result = impute_missing_ages(df)
|
|
36
|
+
pd.testing.assert_series_equal(
|
|
37
|
+
result.loc[:2, "age"], df.loc[:2, "age"], check_names=False
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def test_all_nan_raises(self):
|
|
41
|
+
df = pd.DataFrame({"age": [np.nan, np.nan]})
|
|
42
|
+
with pytest.raises(ValueError, match="all-null"):
|
|
43
|
+
impute_missing_ages(df)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Run with `pytest -q`. Add `--cov=src` via `pytest-cov` once the project has more than a handful of helpers; aim for coverage on feature-engineering and metrics modules, not notebooks.
|
|
47
|
+
|
|
48
|
+
Four rules keep this layer productive:
|
|
49
|
+
|
|
50
|
+
- **Name tests after the behavior, not the function**: `test_fills_nan_with_median` beats `test_impute_missing_ages_1`. The name is the failure message when CI turns red.
|
|
51
|
+
- **One assertion family per test**: a test checks either output values, or output shape, or error behavior — not all three. Split into three tests. Failures point at the broken property immediately.
|
|
52
|
+
- **Use `pd.testing.assert_frame_equal` and `np.testing.assert_allclose`**: never compare DataFrames with `==` or floats with exact equality. Pass `rtol`/`atol` explicitly so the tolerance is visible in the test.
|
|
53
|
+
- **Mark slow tests**: decorate any test that loads a non-trivial dataset with `@pytest.mark.slow` and run the default suite with `-m "not slow"` so `pytest` stays under ~5 seconds on save.
|
|
54
|
+
|
|
55
|
+
### Data-frame validation with pandera
|
|
56
|
+
|
|
57
|
+
Column drift is the single most common source of silent DS bugs. `pandera` encodes a DataFrame contract once and reuses it as a test assertion and a runtime guard at ingest boundaries — the moment a CSV, parquet file, or API response becomes a DataFrame.
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
# src/schemas.py
|
|
61
|
+
import pandera.pandas as pa
|
|
62
|
+
from pandera.typing import Series
|
|
63
|
+
|
|
64
|
+
class CustomersSchema(pa.DataFrameModel):
|
|
65
|
+
customer_id: Series[int] = pa.Field(unique=True, ge=0)
|
|
66
|
+
age: Series[float] = pa.Field(ge=0, le=120, nullable=True)
|
|
67
|
+
signup_date: Series[pa.DateTime]
|
|
68
|
+
segment: Series[str] = pa.Field(isin=["free", "pro", "enterprise"])
|
|
69
|
+
|
|
70
|
+
class Config:
|
|
71
|
+
strict = True # reject unexpected columns
|
|
72
|
+
|
|
73
|
+
# src/ingest.py — runtime validation at the boundary
|
|
74
|
+
from src.schemas import CustomersSchema
|
|
75
|
+
|
|
76
|
+
def load_customers(path: str) -> pd.DataFrame:
|
|
77
|
+
df = pd.read_csv(path, parse_dates=["signup_date"])
|
|
78
|
+
return CustomersSchema.validate(df) # raises SchemaError on violation
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
The same schema doubles as a test fixture contract:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
# tests/test_ingest.py
|
|
85
|
+
import pandas as pd
|
|
86
|
+
import pytest
|
|
87
|
+
from pandera.errors import SchemaError
|
|
88
|
+
from src.ingest import load_customers
|
|
89
|
+
|
|
90
|
+
def test_rejects_invalid_segment(tmp_path):
|
|
91
|
+
bad = tmp_path / "bad.csv"
|
|
92
|
+
bad.write_text("customer_id,age,signup_date,segment\n1,30,2024-01-01,vip\n")
|
|
93
|
+
with pytest.raises(SchemaError, match="segment"):
|
|
94
|
+
load_customers(str(bad))
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Prefer `schema.validate(df)` calls over the `@pa.check_input` decorator — explicit validation is easier to trace in stack traces and does not hide behind import-time decoration.
|
|
98
|
+
|
|
99
|
+
Three patterns make pandera pay off:
|
|
100
|
+
|
|
101
|
+
- **Validate once at the boundary, trust downstream**: call `Schema.validate(df)` inside `load_customers`, `load_orders`, or whatever function first produces a DataFrame. Downstream code can then assume columns, dtypes, and ranges without re-checking.
|
|
102
|
+
- **Use `lazy=True` during development**: `Schema.validate(df, lazy=True)` collects every violation instead of failing on the first, which is dramatically faster when fixing a bad CSV.
|
|
103
|
+
- **Version schemas alongside migrations**: when a column renames or a new category lands, update the schema in the same PR as the code change. Schema drift caught in code review is cheaper than schema drift caught in production.
|
|
104
|
+
|
|
105
|
+
### Fixtures: deterministic test data
|
|
106
|
+
|
|
107
|
+
Random DataFrames in tests produce flaky failures that are painful to debug. Commit small, hand-curated CSVs to `tests/fixtures/` and load them through pytest `fixture` functions. The CSVs are reviewable in PRs, the fixtures are reusable across test modules.
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
# tests/conftest.py
|
|
111
|
+
from pathlib import Path
|
|
112
|
+
import pandas as pd
|
|
113
|
+
import pytest
|
|
114
|
+
|
|
115
|
+
FIXTURES = Path(__file__).parent / "fixtures"
|
|
116
|
+
|
|
117
|
+
@pytest.fixture
|
|
118
|
+
def customers_df() -> pd.DataFrame:
|
|
119
|
+
return pd.read_csv(FIXTURES / "customers_small.csv", parse_dates=["signup_date"])
|
|
120
|
+
|
|
121
|
+
@pytest.fixture(params=["customers_empty.csv", "customers_one_row.csv", "customers_small.csv"])
|
|
122
|
+
def customers_edge_cases(request) -> pd.DataFrame:
|
|
123
|
+
return pd.read_csv(FIXTURES / request.param, parse_dates=["signup_date"])
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Use `@pytest.mark.parametrize` to cover multiple scenarios without duplicating test bodies:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
@pytest.mark.parametrize(
|
|
130
|
+
"segment,expected_discount",
|
|
131
|
+
[("free", 0.0), ("pro", 0.1), ("enterprise", 0.2)],
|
|
132
|
+
)
|
|
133
|
+
def test_discount_by_segment(segment, expected_discount):
|
|
134
|
+
assert compute_discount(segment) == expected_discount
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Keep fixture CSVs under ~50 rows. Anything larger belongs in a `data/` directory and should be generated or downloaded, not committed.
|
|
138
|
+
|
|
139
|
+
When a test genuinely needs a larger or procedurally generated DataFrame, build it deterministically with a seeded RNG inside a fixture — never inline, and never with the global `np.random` state:
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
@pytest.fixture
|
|
143
|
+
def synthetic_transactions() -> pd.DataFrame:
|
|
144
|
+
rng = np.random.default_rng(seed=42)
|
|
145
|
+
n = 1000
|
|
146
|
+
return pd.DataFrame({
|
|
147
|
+
"user_id": rng.integers(0, 100, size=n),
|
|
148
|
+
"amount": rng.lognormal(mean=3.0, sigma=1.0, size=n),
|
|
149
|
+
"ts": pd.date_range("2024-01-01", periods=n, freq="1h"),
|
|
150
|
+
})
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
A fixed seed means the same test always sees the same data, so flaky-failure postmortems are possible instead of "must have been a weird random sample."
|
|
154
|
+
|
|
155
|
+
### Running the suite
|
|
156
|
+
|
|
157
|
+
Layout and commands stay boring on purpose:
|
|
158
|
+
|
|
159
|
+
```
|
|
160
|
+
tests/
|
|
161
|
+
conftest.py # shared fixtures
|
|
162
|
+
fixtures/ # small committed CSVs
|
|
163
|
+
customers_small.csv
|
|
164
|
+
customers_empty.csv
|
|
165
|
+
test_features.py # pytest for src/features.py
|
|
166
|
+
test_ingest.py # pandera + ingest tests
|
|
167
|
+
test_metrics.py # pytest for src/metrics.py
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Wire `pytest` into `pyproject.toml` so `pytest` alone runs the right suite:
|
|
171
|
+
|
|
172
|
+
```toml
|
|
173
|
+
[tool.pytest.ini_options]
|
|
174
|
+
testpaths = ["tests"]
|
|
175
|
+
addopts = "-q --strict-markers -m 'not slow'"
|
|
176
|
+
markers = ["slow: tests that load non-trivial data"]
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Run the fast suite on every save (a file-watcher like `pytest-watch` helps), and run `pytest -m slow` or `pytest` with no marker filter before each commit. In CI, run the full suite unconditionally.
|
|
180
|
+
|
|
181
|
+
### What NOT to test
|
|
182
|
+
|
|
183
|
+
Don't unit-test `pandas`, `numpy`, or `pandera` themselves — assume upstream libraries work and pin versions in `pyproject.toml` to catch surprises via dependency bumps, not your own test suite. Don't assert on model quality metrics here (AUC, precision, calibration); those live in `data-science-model-evaluation.md` and run on held-out data, not fixtures. Don't write tests that require a live database, S3 bucket, or trained model file — those belong in integration tests run out-of-band, not the fast `pytest` suite a developer runs on save. And don't test notebooks directly; if a notebook cell has logic worth testing, extract it to `src/` first, then test the function.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# `ml/` knowledge
|
|
2
|
+
|
|
3
|
+
Production machine-learning domain knowledge injected into universal pipeline
|
|
4
|
+
steps by `content/methodology/ml-overlay.yml`.
|
|
5
|
+
|
|
6
|
+
## Lockstep pairs with `data-science/`
|
|
7
|
+
|
|
8
|
+
Five documents here mirror documents in `content/knowledge/data-science/`. See
|
|
9
|
+
`content/knowledge/data-science/README.md` for the full pair table. Edits to
|
|
10
|
+
one side should trigger review of the other to prevent recommendation drift.
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# methodology/data-science-overlay.yml
|
|
2
|
+
name: data-science
|
|
3
|
+
description: >
|
|
4
|
+
Data science overlay — injects solo / small-team data science domain
|
|
5
|
+
knowledge into existing pipeline steps for local-first, reproducibility-first
|
|
6
|
+
analytical work and model prototyping.
|
|
7
|
+
project-type: data-science
|
|
8
|
+
|
|
9
|
+
knowledge-overrides:
|
|
10
|
+
# Foundational
|
|
11
|
+
create-prd: { append: [data-science-requirements] }
|
|
12
|
+
user-stories: { append: [data-science-requirements] }
|
|
13
|
+
coding-standards: { append: [data-science-conventions, data-science-notebook-discipline] }
|
|
14
|
+
project-structure: { append: [data-science-project-structure] }
|
|
15
|
+
dev-env-setup: { append: [data-science-dev-environment] }
|
|
16
|
+
git-workflow: { append: [data-science-reproducibility] }
|
|
17
|
+
|
|
18
|
+
# Architecture & Design
|
|
19
|
+
system-architecture: { append: [data-science-architecture] }
|
|
20
|
+
tech-stack: { append: [data-science-architecture, data-science-dev-environment] }
|
|
21
|
+
adrs: { append: [data-science-architecture] }
|
|
22
|
+
domain-modeling: { append: [data-science-data-versioning] }
|
|
23
|
+
database-schema: { append: [data-science-data-versioning] }
|
|
24
|
+
security: { append: [data-science-security] }
|
|
25
|
+
operations: { append: [data-science-experiment-tracking, data-science-observability, data-science-reproducibility] }
|
|
26
|
+
|
|
27
|
+
# Testing
|
|
28
|
+
tdd: { append: [data-science-testing] }
|
|
29
|
+
create-evals: { append: [data-science-testing, data-science-model-evaluation] }
|
|
30
|
+
|
|
31
|
+
# Reviews
|
|
32
|
+
review-architecture: { append: [data-science-architecture] }
|
|
33
|
+
review-database: { append: [data-science-data-versioning] }
|
|
34
|
+
review-security: { append: [data-science-security] }
|
|
35
|
+
review-operations: { append: [data-science-experiment-tracking, data-science-observability] }
|
|
36
|
+
review-testing: { append: [data-science-testing, data-science-model-evaluation] }
|
|
37
|
+
|
|
38
|
+
# Planning
|
|
39
|
+
implementation-plan: { append: [data-science-architecture] }
|