goldenanalysis 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. goldenanalysis-0.1.0/.gitignore +84 -0
  2. goldenanalysis-0.1.0/CHANGELOG.md +83 -0
  3. goldenanalysis-0.1.0/LICENSE +21 -0
  4. goldenanalysis-0.1.0/PKG-INFO +188 -0
  5. goldenanalysis-0.1.0/README.md +135 -0
  6. goldenanalysis-0.1.0/benchmarks/aggregate_benchmark.py +97 -0
  7. goldenanalysis-0.1.0/golden-suite.json +69 -0
  8. goldenanalysis-0.1.0/goldenanalysis/__init__.py +61 -0
  9. goldenanalysis-0.1.0/goldenanalysis/_api.py +154 -0
  10. goldenanalysis-0.1.0/goldenanalysis/_regressions.py +81 -0
  11. goldenanalysis-0.1.0/goldenanalysis/adapters/__init__.py +11 -0
  12. goldenanalysis-0.1.0/goldenanalysis/adapters/check.py +33 -0
  13. goldenanalysis-0.1.0/goldenanalysis/adapters/flow.py +25 -0
  14. goldenanalysis-0.1.0/goldenanalysis/adapters/frame.py +18 -0
  15. goldenanalysis-0.1.0/goldenanalysis/adapters/match.py +59 -0
  16. goldenanalysis-0.1.0/goldenanalysis/adapters/pipe.py +36 -0
  17. goldenanalysis-0.1.0/goldenanalysis/analyzers/__init__.py +7 -0
  18. goldenanalysis-0.1.0/goldenanalysis/analyzers/base.py +19 -0
  19. goldenanalysis-0.1.0/goldenanalysis/analyzers/cluster_dist.py +77 -0
  20. goldenanalysis-0.1.0/goldenanalysis/analyzers/frame_summary.py +72 -0
  21. goldenanalysis-0.1.0/goldenanalysis/analyzers/match_rates.py +92 -0
  22. goldenanalysis-0.1.0/goldenanalysis/analyzers/quality_rollup.py +130 -0
  23. goldenanalysis-0.1.0/goldenanalysis/cli/__init__.py +1 -0
  24. goldenanalysis-0.1.0/goldenanalysis/cli/main.py +160 -0
  25. goldenanalysis-0.1.0/goldenanalysis/core/__init__.py +1 -0
  26. goldenanalysis-0.1.0/goldenanalysis/core/_native_loader.py +67 -0
  27. goldenanalysis-0.1.0/goldenanalysis/core/aggregate.py +113 -0
  28. goldenanalysis-0.1.0/goldenanalysis/history.py +205 -0
  29. goldenanalysis-0.1.0/goldenanalysis/mcp/__init__.py +6 -0
  30. goldenanalysis-0.1.0/goldenanalysis/mcp/server.py +260 -0
  31. goldenanalysis-0.1.0/goldenanalysis/models/__init__.py +31 -0
  32. goldenanalysis-0.1.0/goldenanalysis/models/analyzer.py +43 -0
  33. goldenanalysis-0.1.0/goldenanalysis/models/policy.py +51 -0
  34. goldenanalysis-0.1.0/goldenanalysis/models/report.py +102 -0
  35. goldenanalysis-0.1.0/goldenanalysis/narrative.py +61 -0
  36. goldenanalysis-0.1.0/goldenanalysis/py.typed +0 -0
  37. goldenanalysis-0.1.0/goldenanalysis/registry.py +56 -0
  38. goldenanalysis-0.1.0/goldenanalysis/render.py +67 -0
  39. goldenanalysis-0.1.0/llms.txt +85 -0
  40. goldenanalysis-0.1.0/pyproject.toml +65 -0
  41. goldenanalysis-0.1.0/server.json +23 -0
  42. goldenanalysis-0.1.0/tests/conftest.py +19 -0
  43. goldenanalysis-0.1.0/tests/core/test_native_parity.py +161 -0
  44. goldenanalysis-0.1.0/tests/fixtures/__init__.py +54 -0
  45. goldenanalysis-0.1.0/tests/fixtures/customers_small.parquet +0 -0
  46. goldenanalysis-0.1.0/tests/fixtures/report_frame_summary.json +80 -0
  47. goldenanalysis-0.1.0/tests/integration/test_real_producers.py +131 -0
  48. goldenanalysis-0.1.0/tests/test_adapter_frame.py +19 -0
  49. goldenanalysis-0.1.0/tests/test_adapters_unit.py +79 -0
  50. goldenanalysis-0.1.0/tests/test_aggregate.py +53 -0
  51. goldenanalysis-0.1.0/tests/test_analyze.py +38 -0
  52. goldenanalysis-0.1.0/tests/test_analyze_suite.py +49 -0
  53. goldenanalysis-0.1.0/tests/test_cli.py +94 -0
  54. goldenanalysis-0.1.0/tests/test_cluster_dist.py +48 -0
  55. goldenanalysis-0.1.0/tests/test_exporters.py +64 -0
  56. goldenanalysis-0.1.0/tests/test_frame_summary.py +48 -0
  57. goldenanalysis-0.1.0/tests/test_history_jsonl.py +80 -0
  58. goldenanalysis-0.1.0/tests/test_history_sqlite.py +46 -0
  59. goldenanalysis-0.1.0/tests/test_match_rates.py +63 -0
  60. goldenanalysis-0.1.0/tests/test_mcp_server.py +84 -0
  61. goldenanalysis-0.1.0/tests/test_models.py +60 -0
  62. goldenanalysis-0.1.0/tests/test_narrative.py +46 -0
  63. goldenanalysis-0.1.0/tests/test_native_loader.py +32 -0
  64. goldenanalysis-0.1.0/tests/test_policy_models.py +29 -0
  65. goldenanalysis-0.1.0/tests/test_quality_rollup.py +56 -0
  66. goldenanalysis-0.1.0/tests/test_registry.py +29 -0
  67. goldenanalysis-0.1.0/tests/test_regression_logic.py +44 -0
  68. goldenanalysis-0.1.0/tests/test_report_schema.py +42 -0
  69. goldenanalysis-0.1.0/tests/test_scenario_regression.py +64 -0
  70. goldenanalysis-0.1.0/tests/test_smoke.py +9 -0
@@ -0,0 +1,84 @@
1
+ # Build artifacts
2
+ target/
3
+ dist/
4
+ build/
5
+ *.egg-info/
6
+ node_modules/
7
+ __pycache__/
8
+ **/.hypothesis/
9
+ .venv/
10
+ .uv-cache/
11
+
12
+ # Web UI build output (populated by scripts/build_web.py before `hatch build`).
13
+ # .gitkeep stays so the source tree exists in checkouts and the wheel's
14
+ # force-include glob has something to match.
15
+ packages/python/goldenmatch/goldenmatch/web/static/*
16
+ !packages/python/goldenmatch/goldenmatch/web/static/.gitkeep
17
+
18
+ # Playwright runtime artifacts
19
+ packages/python/goldenmatch/web/frontend/test-results/
20
+ packages/python/goldenmatch/web/frontend/playwright-report/
21
+
22
+ # YAML-edit backups (web UI's POST /api/v1/rules/save writes goldenmatch.yml.bak
23
+ # next to the file before clobbering — local-only safety net, not source).
24
+ *.yml.bak
25
+
26
+ # Steward labels — runtime-written by the inspector's review tab. Keep them
27
+ # out of git so a contributor's labels don't ride along on PRs. If you want
28
+ # seed labels for a demo project, commit a curated labels.seed.jsonl and
29
+ # rename at use time.
30
+ labels.jsonl
31
+
32
+ # Generated outputs
33
+ *_lineage.json
34
+ *_clusters.csv
35
+ # Allow committed test fixtures and demo project that mimic run outputs
36
+ !packages/python/goldenmatch/tests/web/fixtures/**
37
+ !packages/python/goldenmatch/tests/**/fixtures/**
38
+ !packages/python/goldenmatch/web/demo/**
39
+
40
+ # IDE
41
+ .vscode/
42
+ .idea/
43
+
44
+ # Turborepo
45
+ .turbo/
46
+
47
+ # Claude Code agent worktrees (transient isolated checkouts created by
48
+ # background subagents). Never tracked; project-level .claude settings can
49
+ # still be committed since only the worktrees subdir is ignored.
50
+ .claude/worktrees/
51
+
52
+ # Superpowers / manual git worktrees (isolated checkouts; never tracked)
53
+ .worktrees/
54
+
55
+ # Local profiling artifacts (per CLAUDE.md convention — cProfile dumps,
56
+ # scale-audit JSON outputs, synthetic fixtures). Documented as gitignored
57
+ # in CLAUDE.md; this entry makes that real.
58
+ .profile_tmp/
59
+ packages/python/goldenmatch/bench-dataset-v1/
60
+
61
+ # Local runtime state: Learning Memory DB, review queue, identity graph,
62
+ # cross-run autoconfig memory. The engine (and the test suite) rewrites these
63
+ # on every run, so they are never source. `.goldenmatch/memory.db` used to be
64
+ # tracked at the repo root and re-dirtied the working tree on each run; it was
65
+ # `git rm --cached`'d alongside this entry.
66
+ .goldenmatch/
67
+
68
+ # Compiled native acceleration ext (built from packages/rust/extensions/native
69
+ # via scripts/build_native.py). Platform-specific abi3 artifact, never source.
70
+ packages/python/goldenmatch/goldenmatch/_native*.so
71
+ # GoldenCheck's counterpart (packages/rust/extensions/goldencheck-native via
72
+ # scripts/build_goldencheck_native.py). Same rationale.
73
+ packages/python/goldencheck/goldencheck/_native*.so
74
+
75
+ # Benchmark datasets downloaded at runtime (DBLP-ACM, etc.) — not committed.
76
+ datasets/
77
+ # Same, for goldenflow (built from packages/rust/extensions/native-flow).
78
+ packages/python/goldenflow/goldenflow/_native*.so
79
+ # Same, for goldenanalysis (built from packages/rust/extensions/analysis-native
80
+ # via scripts/build_analysis_native.py).
81
+ packages/python/goldenanalysis/goldenanalysis/_native*.so
82
+
83
+ # codebase-memory-mcp local index/snapshot (rebuilt by the SessionStart hook)
84
+ .codebase-memory/
@@ -0,0 +1,83 @@
1
+ # Changelog
2
+
3
+ All notable changes to GoldenAnalysis are documented here. The format follows
4
+ [Keep a Changelog](https://keepachangelog.com/); this project uses semantic
5
+ versioning.
6
+
7
+ ## [0.2.0] - unreleased
8
+
9
+ Phase 2a — suite consumption. Produce an `AnalysisReport` from real suite outputs.
10
+
11
+ ### Added
12
+ - `ga.analyze_match(result, *, certificate=None)` — analyze a GoldenMatch
13
+ `DedupeResult` (`match.rates` + `cluster.distribution`).
14
+ - `ga.analyze_pipeline(result)` — analyze a GoldenPipe `PipeResult`, fanning out to
15
+ every analyzer whose consumed artifacts are present.
16
+ - Analyzers: `match.rates` (pair count, match rate, threshold, recall estimate +
17
+ safe bound from a certificate, mean score, score histogram), `cluster.distribution`
18
+ (count, singleton ratio, size quantiles, reduction ratio, size histogram),
19
+ `quality.rollup` (findings totals + GoldenCheck score + GoldenFlow rows-changed /
20
+ rules-fired, degrading per-producer).
21
+ - Adapters: `match` / `flow` / `pipe` (duck-typed, no eager suite imports) and
22
+ `check` (lazy `goldencheck` import behind the `[check]` extra; pure `from_scan`
23
+ seam). They populate a standardized `AnalyzerInput.artifacts` vocabulary.
24
+
25
+ Phase 2b — cross-run. Trend + regression detection over a run history.
26
+
27
+ - `ReportHistory(backend="jsonl"|"sqlite", path=...)` — append-only store of
28
+ `AnalysisReport`s keyed by `(analysis_name, dataset, run_id)`; mirrors the
29
+ IdentityStore constructor idiom. JSONL default, SQLite optional (durable); both
30
+ stdlib, no new deps.
31
+ - `hist.trend(metric_key, dataset)` → `TrendSeries`; `hist.detect_regressions(
32
+ dataset, baseline=..., policy=...)` → flagged `Regression`s. `Baseline` is a
33
+ strategy (`rolling_median` default / `previous` / `last_known_good`); `RegressionPolicy`
34
+ carries per-metric percent gates and respects each `Metric.direction`.
35
+ - Narrative generation (`narrative.build_narrative`) — names the worst flagged
36
+ regression + co-moving metrics; `to_markdown(regressions=...)` adds the callout +
37
+ Δ column (byte-identical to Phase 1 without it).
38
+ - The `goldenanalysis trend` / `regressions` CLI are now real (no longer stubs),
39
+ with `--policy` and `--fail-on-regression` (CI gate).
40
+
41
+ ### Notes
42
+ - `match.recall_estimate` flows automatically once `goldenmatch.dedupe_df(...,
43
+ certify=True)` attaches a `RecallEstimate` (goldenmatch PR); `match.recall_safe_bound`
44
+ needs a labelled audit and is supplied via `certificate=`. Both degrade silently
45
+ when absent.
46
+ - `frame.summary` does not run under `analyze_pipeline` (a `PipeResult` exposes no
47
+ input frame).
48
+ - `last_known_good` baseline is v1-aliased to `previous` until a per-run health
49
+ signal exists (documented follow-up).
50
+
51
+ ## [0.1.0] - 2026-06-08
52
+
53
+ Phase 1 — Python core. The generic frame path, end to end.
54
+
55
+ ### Added
56
+ - `ga.analyze(df, analyzers=[...])` — run analyzers over a polars DataFrame and
57
+ assemble a single `AnalysisReport`. Works with zero other suite packages
58
+ installed.
59
+ - Model layer: `Metric`, `AnalysisTable`, `AnalysisReport` (`schema_version=1`
60
+ cross-surface contract anchor), and analyzer I/O types.
61
+ - `frame.summary` analyzer — row/column counts, mean null ratio, exact-duplicate
62
+ row ratio, estimated memory, and a `per_column` table.
63
+ - Pure-Python/Polars aggregation primitives (`null_ratio_per_column`,
64
+ `duplicate_row_ratio`, `histogram`, `quantile`) — the byte-identical reference
65
+ for the future Rust accelerator.
66
+ - Analyzer registry over the `goldenanalysis.analyzers` entry-point group, with an
67
+ editable-install fallback map.
68
+ - Exporters: `to_json` / `from_json` (lossless round-trip), `to_markdown`,
69
+ `to_parquet` (long-form metric frame + per-table sidecars).
70
+ - `goldenanalysis` CLI: `report` command; `trend` / `regressions` stubbed to
71
+ `0.2.0`.
72
+ - Native-loader gate (`GOLDENANALYSIS_NATIVE`) with an empty `_GATED_ON` — the
73
+ Phase 4 seam, under contract test from day one (pure-Python fallback).
74
+
75
+ ### Deferred (later phases)
76
+ - Suite adapters + `match.rates` / `cluster.distribution` / `quality.rollup`,
77
+ `ReportHistory` + regression detection + narrative (Phase 2).
78
+ - TypeScript parity port (Phase 3).
79
+ - Rust `analysis-core` / `analysis-native` accelerator (Phase 4).
80
+ - GoldenPipe terminal stage + goldensuite-mcp surfacing, and the
81
+ `publish-goldenanalysis*` workflows (Phase 5 / follow-up).
82
+
83
+ [0.1.0]: https://github.com/benseverndev-oss/goldenmatch/releases/tag/goldenanalysis-v0.1.0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ben Severn
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,188 @@
1
+ Metadata-Version: 2.4
2
+ Name: goldenanalysis
3
+ Version: 0.1.0
4
+ Summary: Read-only cross-cutting analysis, metrics, and reporting engine for the Golden Suite
5
+ Project-URL: Homepage, https://github.com/benseverndev-oss/goldenmatch
6
+ Project-URL: Repository, https://github.com/benseverndev-oss/goldenmatch
7
+ Project-URL: Documentation, https://github.com/benseverndev-oss/goldenmatch/tree/main/packages/python/goldenanalysis#readme
8
+ Project-URL: Issues, https://github.com/benseverndev-oss/goldenmatch/issues
9
+ Project-URL: Changelog, https://github.com/benseverndev-oss/goldenmatch/blob/main/packages/python/goldenanalysis/CHANGELOG.md
10
+ Project-URL: Author, https://bensevern.dev
11
+ Author-email: Ben Severn <ben@bensevern.dev>
12
+ License: MIT
13
+ License-File: LICENSE
14
+ Keywords: analysis,data-quality,drift-detection,entity-resolution,golden-suite,metrics,polars,regression-detection,reporting
15
+ Classifier: Development Status :: 3 - Alpha
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.11
23
+ Requires-Dist: polars>=1.0
24
+ Requires-Dist: pyarrow>=15
25
+ Requires-Dist: pydantic>=2.7
26
+ Requires-Dist: rich>=13.0
27
+ Requires-Dist: typer>=0.12
28
+ Provides-Extra: api
29
+ Requires-Dist: fastapi>=0.110; extra == 'api'
30
+ Requires-Dist: uvicorn>=0.30; extra == 'api'
31
+ Provides-Extra: check
32
+ Requires-Dist: goldencheck>=1.2.0; extra == 'check'
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
35
+ Requires-Dist: pytest>=8.0; extra == 'dev'
36
+ Requires-Dist: ruff>=0.6; extra == 'dev'
37
+ Provides-Extra: flow
38
+ Requires-Dist: goldenflow>=1.1.5; extra == 'flow'
39
+ Provides-Extra: match
40
+ Requires-Dist: goldenmatch>=1.15.0; extra == 'match'
41
+ Provides-Extra: mcp
42
+ Requires-Dist: mcp>=1.0; extra == 'mcp'
43
+ Provides-Extra: native
44
+ Requires-Dist: goldenanalysis-native>=0.1.0; extra == 'native'
45
+ Provides-Extra: pipe
46
+ Requires-Dist: goldenpipe>=1.2.0; extra == 'pipe'
47
+ Provides-Extra: suite
48
+ Requires-Dist: goldencheck>=1.2.0; extra == 'suite'
49
+ Requires-Dist: goldenflow>=1.1.5; extra == 'suite'
50
+ Requires-Dist: goldenmatch>=1.15.0; extra == 'suite'
51
+ Requires-Dist: goldenpipe>=1.2.0; extra == 'suite'
52
+ Description-Content-Type: text/markdown
53
+
54
+ # GoldenAnalysis
55
+
56
+ **Measure and report across the Golden Suite.** A read-only, cross-cutting
57
+ analysis / metrics / reporting engine: it consumes any stage's typed artifacts
58
+ (or a raw DataFrame) and emits a unified, exportable `AnalysisReport`.
59
+
60
+ > GoldenAnalysis ships the generic frame path plus suite adapters (GoldenMatch /
61
+ > GoldenCheck / GoldenFlow / GoldenPipe), cross-run trend + regression detection,
62
+ > an edge-safe TypeScript port (optional WASM), and an optional Rust accelerator
63
+ > for the heavy aggregation primitives — all documented below. See
64
+ > `docs/superpowers/specs/2026-06-08-goldenanalysis-cross-cutting-analysis-engine-design.md`
65
+ > for the design rationale.
66
+
67
+ ## Install
68
+
69
+ ```bash
70
+ pip install goldenanalysis
71
+ ```
72
+
73
+ Zero suite dependencies for the generic path — it works on any polars DataFrame
74
+ even with no other Golden package installed.
75
+
76
+ ## Quickstart
77
+
78
+ ```python
79
+ import polars as pl
80
+ import goldenanalysis as ga
81
+
82
+ df = pl.read_parquet("customers.parquet")
83
+
84
+ report = ga.analyze(df, analyzers=["frame.summary"])
85
+ print(report.to_markdown())
86
+
87
+ report.to_json("report.json")
88
+ report.to_parquet("report.parquet") # long-form metric frame + table sidecars
89
+ ```
90
+
91
+ CLI:
92
+
93
+ ```bash
94
+ goldenanalysis report customers.parquet --analyzers frame.summary --format markdown
95
+ goldenanalysis report report.json --format markdown # re-render a saved report
96
+ ```
97
+
98
+ `trend` and `regressions` operate over a saved run history (see **Cross-run** below).
99
+
100
+ ## Over the suite
101
+
102
+ With the relevant extra installed (`pip install goldenanalysis[match,check,flow,pipe]`):
103
+
104
+ ```python
105
+ # A GoldenMatch dedupe result -> match.rates + cluster.distribution
106
+ report = ga.analyze_match(dedupe_result)
107
+
108
+ # A whole-pipeline manifest -> every analyzer whose artifacts are present
109
+ report = ga.analyze_pipeline(pipe_result)
110
+ ```
111
+
112
+ `match.rates` emits `match.recall_estimate` when GoldenMatch ran
113
+ `dedupe_df(..., certify=True)` (it attaches an unsupervised `RecallEstimate`), and
114
+ `match.recall_safe_bound` when you pass an audit-calibrated certificate
115
+ (`analyze_match(result, certificate=...)`) — the safe bound needs a labelled
116
+ sample, so it can't be computed automatically. Both degrade silently when absent.
117
+
118
+ ## Cross-run — trend + regression detection
119
+
120
+ Store reports over time, then trend a metric or detect regressions without ground
121
+ truth:
122
+
123
+ ```python
124
+ hist = ga.ReportHistory(backend="jsonl", path=".golden/analysis.jsonl") # or backend="sqlite"
125
+ hist.append(report) # keyed by (dataset, run_id)
126
+
127
+ hist.trend("cluster.singleton_ratio", "customers") # -> TrendSeries
128
+
129
+ policy = ga.RegressionPolicy(default_pct=10.0, per_metric={"match.recall_safe_bound": 2.0})
130
+ regs = hist.detect_regressions("customers", baseline="rolling_median", policy=policy)
131
+ print(report.to_markdown(regs)) # callout + Δ-vs-baseline column
132
+ ```
133
+
134
+ The `Baseline` is a strategy (`rolling_median` default — immune to one noisy night
135
+ — plus `previous` / `last_known_good`), and `RegressionPolicy` thresholds are
136
+ per-metric and respect each metric's `direction` (a `higher_better` metric only
137
+ flags on a drop). CLI:
138
+
139
+ ```bash
140
+ goldenanalysis trend --metric cluster.singleton_ratio --dataset customers --history .golden/analysis.jsonl
141
+ goldenanalysis regressions --dataset customers --history .golden/analysis.jsonl \
142
+ --policy "match.recall_safe_bound=2" --fail-on-regression # exit 1 on a flagged regression (CI gate)
143
+ ```
144
+
145
+ ## GoldenCheck vs GoldenAnalysis
146
+
147
+ They are easy to confuse and are deliberately distinct:
148
+
149
+ | | GoldenCheck | GoldenAnalysis |
150
+ |---|---|---|
151
+ | **Scope** | Profiles a *single input dataset at ingest* | *Cross-cutting* over any stage's outputs |
152
+ | **Direction** | A **producer** of artifacts (scan findings) | A **consumer** of artifacts (incl. GoldenCheck's) |
153
+ | **Across runs?** | No — one dataset, one scan | Yes — trend / drift / regression over a run history |
154
+ | **Writes data?** | Suggests/applies fixes | **Never** — read-only by construction |
155
+
156
+ The hard line: **GoldenAnalysis depends on other packages' types; never the
157
+ reverse.** It sits *beside* the pipeline as a reporting step, consuming
158
+ GoldenCheck / GoldenFlow / GoldenMatch / GoldenPipe / InferMap outputs — it does
159
+ not replace GoldenCheck's ingest-time profiling, and GoldenCheck does not import
160
+ GoldenAnalysis.
161
+
162
+ ## Native accelerator (optional, `goldenanalysis[native]`)
163
+
164
+ An optional Rust accelerator for the heavy aggregation primitives, gated exactly
165
+ like `goldenmatch[native]` / `goldencheck[native]`:
166
+
167
+ ```bash
168
+ pip install goldenanalysis[native] # pulls the separate goldenanalysis-native wheel
169
+ ```
170
+
171
+ The pure-Python path stays the **default and the byte-identical reference**. The
172
+ compiled kernel (`analysis-core` pyo3-free + `analysis-native` abi3 wheel) mirrors
173
+ `core/aggregate.py`'s `histogram` / `quantile` value-for-value, reading input as a
174
+ Float64 Arrow array (zero-copy). The loader gate (`core/_native_loader.py`,
175
+ `GOLDENANALYSIS_NATIVE=auto|0|1`) uses a primitive only once it's in `_GATED_ON` —
176
+ which holds **`histogram` and `quantile`**: both proved byte-identical
177
+ (`tests/core/test_native_parity.py`) **and** measured **5.8–9.9x faster** than the
178
+ pure Python loop on Linux x86_64 at 1M–10M rows, *including* the list→Arrow
179
+ conversion the dispatch pays (`benchmarks/aggregate_benchmark.py` +
180
+ `bench-analysis-native.yml`). A new primitive joins only after the same two gates
181
+ clear — "it's Rust" is never enough (the goldencheck composite-key kernel was 2.5x
182
+ *slower* until the gate caught it). With `goldenanalysis[native]` installed, the
183
+ `auto` default uses the native path automatically; `GOLDENANALYSIS_NATIVE=0` forces
184
+ pure. In-tree dev build: `uv run python scripts/build_analysis_native.py`.
185
+
186
+ ## License
187
+
188
+ MIT.
@@ -0,0 +1,135 @@
1
+ # GoldenAnalysis
2
+
3
+ **Measure and report across the Golden Suite.** A read-only, cross-cutting
4
+ analysis / metrics / reporting engine: it consumes any stage's typed artifacts
5
+ (or a raw DataFrame) and emits a unified, exportable `AnalysisReport`.
6
+
7
+ > GoldenAnalysis ships the generic frame path plus suite adapters (GoldenMatch /
8
+ > GoldenCheck / GoldenFlow / GoldenPipe), cross-run trend + regression detection,
9
+ > an edge-safe TypeScript port (optional WASM), and an optional Rust accelerator
10
+ > for the heavy aggregation primitives — all documented below. See
11
+ > `docs/superpowers/specs/2026-06-08-goldenanalysis-cross-cutting-analysis-engine-design.md`
12
+ > for the design rationale.
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ pip install goldenanalysis
18
+ ```
19
+
20
+ Zero suite dependencies for the generic path — it works on any polars DataFrame
21
+ even with no other Golden package installed.
22
+
23
+ ## Quickstart
24
+
25
+ ```python
26
+ import polars as pl
27
+ import goldenanalysis as ga
28
+
29
+ df = pl.read_parquet("customers.parquet")
30
+
31
+ report = ga.analyze(df, analyzers=["frame.summary"])
32
+ print(report.to_markdown())
33
+
34
+ report.to_json("report.json")
35
+ report.to_parquet("report.parquet") # long-form metric frame + table sidecars
36
+ ```
37
+
38
+ CLI:
39
+
40
+ ```bash
41
+ goldenanalysis report customers.parquet --analyzers frame.summary --format markdown
42
+ goldenanalysis report report.json --format markdown # re-render a saved report
43
+ ```
44
+
45
+ `trend` and `regressions` operate over a saved run history (see **Cross-run** below).
46
+
47
+ ## Over the suite
48
+
49
+ With the relevant extra installed (`pip install goldenanalysis[match,check,flow,pipe]`):
50
+
51
+ ```python
52
+ # A GoldenMatch dedupe result -> match.rates + cluster.distribution
53
+ report = ga.analyze_match(dedupe_result)
54
+
55
+ # A whole-pipeline manifest -> every analyzer whose artifacts are present
56
+ report = ga.analyze_pipeline(pipe_result)
57
+ ```
58
+
59
+ `match.rates` emits `match.recall_estimate` when GoldenMatch ran
60
+ `dedupe_df(..., certify=True)` (it attaches an unsupervised `RecallEstimate`), and
61
+ `match.recall_safe_bound` when you pass an audit-calibrated certificate
62
+ (`analyze_match(result, certificate=...)`) — the safe bound needs a labelled
63
+ sample, so it can't be computed automatically. Both degrade silently when absent.
64
+
65
+ ## Cross-run — trend + regression detection
66
+
67
+ Store reports over time, then trend a metric or detect regressions without ground
68
+ truth:
69
+
70
+ ```python
71
+ hist = ga.ReportHistory(backend="jsonl", path=".golden/analysis.jsonl") # or backend="sqlite"
72
+ hist.append(report) # keyed by (dataset, run_id)
73
+
74
+ hist.trend("cluster.singleton_ratio", "customers") # -> TrendSeries
75
+
76
+ policy = ga.RegressionPolicy(default_pct=10.0, per_metric={"match.recall_safe_bound": 2.0})
77
+ regs = hist.detect_regressions("customers", baseline="rolling_median", policy=policy)
78
+ print(report.to_markdown(regs)) # callout + Δ-vs-baseline column
79
+ ```
80
+
81
+ The `Baseline` is a strategy (`rolling_median` default — immune to one noisy night
82
+ — plus `previous` / `last_known_good`), and `RegressionPolicy` thresholds are
83
+ per-metric and respect each metric's `direction` (a `higher_better` metric only
84
+ flags on a drop). CLI:
85
+
86
+ ```bash
87
+ goldenanalysis trend --metric cluster.singleton_ratio --dataset customers --history .golden/analysis.jsonl
88
+ goldenanalysis regressions --dataset customers --history .golden/analysis.jsonl \
89
+ --policy "match.recall_safe_bound=2" --fail-on-regression # exit 1 on a flagged regression (CI gate)
90
+ ```
91
+
92
+ ## GoldenCheck vs GoldenAnalysis
93
+
94
+ They are easy to confuse and are deliberately distinct:
95
+
96
+ | | GoldenCheck | GoldenAnalysis |
97
+ |---|---|---|
98
+ | **Scope** | Profiles a *single input dataset at ingest* | *Cross-cutting* over any stage's outputs |
99
+ | **Direction** | A **producer** of artifacts (scan findings) | A **consumer** of artifacts (incl. GoldenCheck's) |
100
+ | **Across runs?** | No — one dataset, one scan | Yes — trend / drift / regression over a run history |
101
+ | **Writes data?** | Suggests/applies fixes | **Never** — read-only by construction |
102
+
103
+ The hard line: **GoldenAnalysis depends on other packages' types; never the
104
+ reverse.** It sits *beside* the pipeline as a reporting step, consuming
105
+ GoldenCheck / GoldenFlow / GoldenMatch / GoldenPipe / InferMap outputs — it does
106
+ not replace GoldenCheck's ingest-time profiling, and GoldenCheck does not import
107
+ GoldenAnalysis.
108
+
109
+ ## Native accelerator (optional, `goldenanalysis[native]`)
110
+
111
+ An optional Rust accelerator for the heavy aggregation primitives, gated exactly
112
+ like `goldenmatch[native]` / `goldencheck[native]`:
113
+
114
+ ```bash
115
+ pip install goldenanalysis[native] # pulls the separate goldenanalysis-native wheel
116
+ ```
117
+
118
+ The pure-Python path stays the **default and the byte-identical reference**. The
119
+ compiled kernel (`analysis-core` pyo3-free + `analysis-native` abi3 wheel) mirrors
120
+ `core/aggregate.py`'s `histogram` / `quantile` value-for-value, reading input as a
121
+ Float64 Arrow array (zero-copy). The loader gate (`core/_native_loader.py`,
122
+ `GOLDENANALYSIS_NATIVE=auto|0|1`) uses a primitive only once it's in `_GATED_ON` —
123
+ which holds **`histogram` and `quantile`**: both proved byte-identical
124
+ (`tests/core/test_native_parity.py`) **and** measured **5.8–9.9x faster** than the
125
+ pure Python loop on Linux x86_64 at 1M–10M rows, *including* the list→Arrow
126
+ conversion the dispatch pays (`benchmarks/aggregate_benchmark.py` +
127
+ `bench-analysis-native.yml`). A new primitive joins only after the same two gates
128
+ clear — "it's Rust" is never enough (the goldencheck composite-key kernel was 2.5x
129
+ *slower* until the gate caught it). With `goldenanalysis[native]` installed, the
130
+ `auto` default uses the native path automatically; `GOLDENANALYSIS_NATIVE=0` forces
131
+ pure. In-tree dev build: `uv run python scripts/build_analysis_native.py`.
132
+
133
+ ## License
134
+
135
+ MIT.
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env python3
2
+ """A/B bench for the GoldenAnalysis native aggregation kernels.
3
+
4
+ Measures the 5-run median wall of ``histogram`` / ``quantile`` over a large array,
5
+ three ways, to decide the ``_native_loader._GATED_ON`` flip:
6
+
7
+ - ``pure`` -- the pure-Python reference (``core/aggregate``), a Python list in.
8
+ - ``native_raw`` -- the native kernel with the Arrow array ALREADY materialized.
9
+ This is the *frames-out ceiling*: what the kernel is worth when
10
+ a caller hands it Arrow directly (the #663 columnar world).
11
+ - ``native+conv`` -- the REALISTIC dispatch for the current call convention: a Python
12
+ list in, converted to Arrow, then the native kernel. This is what
13
+ ``aggregate.histogram`` would pay today (it receives a list).
14
+
15
+ GATE: flip ``_GATED_ON`` for a primitive ONLY if ``native+conv`` comfortably beats
16
+ ``pure``. Don't gate on ``native_raw`` -- the current call sites pass Python lists, so
17
+ the conversion is real. And don't gate on "it's Rust": the pure ``histogram`` is a
18
+ tight loop and ``quantile`` leans on C ``sorted``; the goldencheck composite-key kernel
19
+ was 2.5x SLOWER than its baseline until the gate caught it. Build the ext first
20
+ (``scripts/build_analysis_native.py``); otherwise this reports pure-only.
21
+
22
+ POLARS_SKIP_CPU_CHECK=1 uv run python \
23
+ packages/python/goldenanalysis/benchmarks/aggregate_benchmark.py --rows 1000000
24
+ """
25
+ from __future__ import annotations
26
+
27
+ import argparse
28
+ import platform
29
+ import random
30
+ import statistics
31
+ import sys
32
+ import time
33
+ from collections.abc import Callable
34
+
35
+ from goldenanalysis.core import aggregate
36
+ from goldenanalysis.core._native_loader import native_available, native_module
37
+
38
+
39
+ def _median_wall(fn: Callable[[], object], runs: int) -> float:
40
+ times = []
41
+ for _ in range(runs):
42
+ t0 = time.perf_counter()
43
+ fn()
44
+ times.append(time.perf_counter() - t0)
45
+ return statistics.median(times)
46
+
47
+
48
+ def main() -> int:
49
+ ap = argparse.ArgumentParser()
50
+ ap.add_argument("--rows", type=int, default=1_000_000)
51
+ ap.add_argument("--bins", type=int, default=10)
52
+ ap.add_argument("--runs", type=int, default=5)
53
+ ap.add_argument("--seed", type=int, default=0)
54
+ args = ap.parse_args()
55
+
56
+ rng = random.Random(args.seed)
57
+ values = [rng.uniform(-1000.0, 1000.0) for _ in range(args.rows)]
58
+ print(f"# {platform.system()} {platform.machine()} | python {sys.version.split()[0]}")
59
+ print(f"rows={args.rows:,} bins={args.bins} runs={args.runs}")
60
+
61
+ nm = None
62
+ arr = None
63
+ pa = None
64
+ if native_available():
65
+ import pyarrow as pa # noqa: F811
66
+
67
+ arr = pa.array(values, type=pa.float64())
68
+ nm = native_module()
69
+ else:
70
+ print("native ext NOT built -> pure-only (run scripts/build_analysis_native.py to A/B)")
71
+
72
+ def bench(name: str, pure: Callable[[], object], native_on_arr: Callable[[object], object]) -> None:
73
+ pure_ms = _median_wall(pure, args.runs) * 1e3
74
+ line = f"{name:<10} pure={pure_ms:9.2f} ms"
75
+ if nm is not None and pa is not None:
76
+ raw_ms = _median_wall(lambda: native_on_arr(arr), args.runs) * 1e3
77
+ conv_ms = _median_wall(
78
+ lambda: native_on_arr(pa.array(values, type=pa.float64())), args.runs
79
+ ) * 1e3
80
+ line += (
81
+ f" native_raw={raw_ms:9.2f} ms ({pure_ms / raw_ms:5.2f}x)"
82
+ f" native+conv={conv_ms:9.2f} ms ({pure_ms / conv_ms:5.2f}x)"
83
+ )
84
+ print(line)
85
+
86
+ bench("histogram", lambda: aggregate.histogram(values, args.bins), lambda a: nm.histogram(a, args.bins))
87
+ bench("quantile", lambda: aggregate.quantile(values, 0.95), lambda a: nm.quantile(a, 0.95))
88
+
89
+ if nm is not None:
90
+ print("\nGATE: flip _GATED_ON only if native+conv (Python list in -> the current")
91
+ print("aggregate.py call convention) comfortably beats pure. native_raw is the")
92
+ print("frames-out ceiling (Arrow already materialized), NOT the current dispatch.")
93
+ return 0
94
+
95
+
96
+ if __name__ == "__main__":
97
+ raise SystemExit(main())
@@ -0,0 +1,69 @@
1
+ {
2
+ "name": "Golden Suite",
3
+ "description": "Data quality toolkit -- validate, transform, deduplicate, orchestrate, and analyze",
4
+ "version": "2026-06-08",
5
+ "tools": [
6
+ {
7
+ "name": "GoldenCheck",
8
+ "purpose": "Data quality validation",
9
+ "repo": "github.com/benseverndev-oss/goldencheck",
10
+ "install": "pip install goldencheck",
11
+ "mcp": {"command": "goldencheck mcp-serve", "tools": 19},
12
+ "a2a": {"command": "goldencheck agent-serve --port 8100", "skills": 9},
13
+ "cli": "goldencheck",
14
+ "python": "from goldencheck import scan_file, validate_file"
15
+ },
16
+ {
17
+ "name": "GoldenFlow",
18
+ "purpose": "Data transformation",
19
+ "repo": "github.com/benseverndev-oss/goldenflow",
20
+ "install": "pip install goldenflow",
21
+ "mcp": {"command": "goldenflow mcp-serve", "tools": 10},
22
+ "a2a": {"command": "goldenflow agent-serve --port 8150", "skills": 6},
23
+ "cli": "goldenflow",
24
+ "python": "from goldenflow import transform_file, transform_df"
25
+ },
26
+ {
27
+ "name": "GoldenMatch",
28
+ "purpose": "Entity resolution and deduplication",
29
+ "repo": "github.com/benseverndev-oss/goldenmatch",
30
+ "install": "pip install goldenmatch",
31
+ "mcp": {"command": "goldenmatch mcp-serve", "tools": 10},
32
+ "a2a": {"command": "goldenmatch agent-serve --port 8200", "skills": 8},
33
+ "cli": "goldenmatch",
34
+ "python": "from goldenmatch import dedupe_df, match_df"
35
+ },
36
+ {
37
+ "name": "GoldenPipe",
38
+ "purpose": "Pipeline orchestrator for the suite",
39
+ "repo": "github.com/benseverndev-oss/goldenpipe",
40
+ "install": "pip install goldenpipe[golden-suite]",
41
+ "mcp": {"command": "goldenpipe mcp-serve", "tools": 4},
42
+ "a2a": {"command": "goldenpipe agent-serve --port 8250", "skills": 4},
43
+ "cli": "goldenpipe",
44
+ "python": "from goldenpipe import run, run_df"
45
+ },
46
+ {
47
+ "name": "GoldenAnalysis",
48
+ "purpose": "Read-only cross-cutting analysis, metrics, and reporting",
49
+ "repo": "github.com/benseverndev-oss/goldenmatch",
50
+ "install": "pip install goldenanalysis",
51
+ "mcp": null,
52
+ "a2a": null,
53
+ "cli": "goldenanalysis",
54
+ "python": "from goldenanalysis import analyze"
55
+ },
56
+ {
57
+ "name": "goldenmatch-extensions",
58
+ "purpose": "SQL extensions for Postgres and DuckDB",
59
+ "repo": "github.com/benseverndev-oss/goldenmatch-extensions",
60
+ "install": "pip install goldenmatch-duckdb",
61
+ "mcp": null,
62
+ "a2a": null,
63
+ "cli": null,
64
+ "python": null
65
+ }
66
+ ],
67
+ "pipeline_order": ["goldencheck", "goldenflow", "goldenmatch", "goldenanalysis"],
68
+ "ports": {"goldencheck": 8100, "goldenflow": 8150, "goldenmatch": 8200, "goldenpipe": 8250}
69
+ }