golden-suite 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ # Build artifacts
2
+ target/
3
+ dist/
4
+ build/
5
+ *.egg-info/
6
+ node_modules/
7
+ __pycache__/
8
+ **/.hypothesis/
9
+ .venv/
10
+ .uv-cache/
11
+
12
+ # Web UI build output (populated by scripts/build_web.py before `hatch build`).
13
+ # .gitkeep stays so the source tree exists in checkouts and the wheel's
14
+ # force-include glob has something to match.
15
+ packages/python/goldenmatch/goldenmatch/web/static/*
16
+ !packages/python/goldenmatch/goldenmatch/web/static/.gitkeep
17
+
18
+ # Playwright runtime artifacts
19
+ packages/python/goldenmatch/web/frontend/test-results/
20
+ packages/python/goldenmatch/web/frontend/playwright-report/
21
+
22
+ # vitest browser-mode runtime output (R1 cross-JS-target WASM harnesses)
23
+ **/.vitest-attachments/
24
+ packages/typescript/goldenmatch/tests/spike/__screenshots__/
25
+
26
+ # YAML-edit backups (web UI's POST /api/v1/rules/save writes goldenmatch.yml.bak
27
+ # next to the file before clobbering — local-only safety net, not source).
28
+ *.yml.bak
29
+
30
+ # Steward labels — runtime-written by the inspector's review tab. Keep them
31
+ # out of git so a contributor's labels don't ride along on PRs. If you want
32
+ # seed labels for a demo project, commit a curated labels.seed.jsonl and
33
+ # rename at use time.
34
+ labels.jsonl
35
+
36
+ # Generated outputs
37
+ *_lineage.json
38
+ *_clusters.csv
39
+ # Allow committed test fixtures and demo project that mimic run outputs
40
+ !packages/python/goldenmatch/tests/web/fixtures/**
41
+ !packages/python/goldenmatch/tests/**/fixtures/**
42
+ !packages/python/goldenmatch/web/demo/**
43
+
44
+ # IDE
45
+ .vscode/
46
+ .idea/
47
+
48
+ # Turborepo
49
+ .turbo/
50
+
51
+ # Claude Code agent worktrees (transient isolated checkouts created by
52
+ # background subagents). Never tracked; project-level .claude settings can
53
+ # still be committed since only the worktrees subdir is ignored.
54
+ .claude/worktrees/
55
+
56
+ # Superpowers / manual git worktrees (isolated checkouts; never tracked)
57
+ .worktrees/
58
+
59
+ # Local profiling artifacts (per CLAUDE.md convention — cProfile dumps,
60
+ # scale-audit JSON outputs, synthetic fixtures). Documented as gitignored
61
+ # in CLAUDE.md; this entry makes that real.
62
+ .profile_tmp/
63
+ packages/python/goldenmatch/bench-dataset-v1/
64
+
65
+ # Local runtime state: Learning Memory DB, review queue, identity graph,
66
+ # cross-run autoconfig memory. The engine (and the test suite) rewrites these
67
+ # on every run, so they are never source. `.goldenmatch/memory.db` used to be
68
+ # tracked at the repo root and re-dirtied the working tree on each run; it was
69
+ # `git rm --cached`'d alongside this entry.
70
+ .goldenmatch/
71
+
72
+ # Compiled native acceleration ext (built from packages/rust/extensions/native
73
+ # via scripts/build_native.py). Platform-specific abi3 artifact, never source.
74
+ # (.pyd = the Windows in-tree build artifact; .so = Linux/macOS.)
75
+ packages/python/goldenmatch/goldenmatch/_native*.so
76
+ packages/python/goldenmatch/goldenmatch/_native*.pyd
77
+ # maturin-develop drops the compiled ext next to the tracked __init__.py in
78
+ # python/goldenmatch_native/. Ignore ONLY the artifacts, not the source dir.
79
+ packages/rust/extensions/native/python/goldenmatch_native/*.pyd
80
+ packages/rust/extensions/native/python/goldenmatch_native/*.so
81
+ # GoldenCheck's counterpart (packages/rust/extensions/goldencheck-native via
82
+ # scripts/build_goldencheck_native.py). Same rationale.
83
+ packages/python/goldencheck/goldencheck/_native*.so
84
+
85
+ # Benchmark datasets downloaded at runtime (DBLP-ACM, etc.) — not committed.
86
+ datasets/
87
+ # Same, for goldenflow (built from packages/rust/extensions/native-flow).
88
+ packages/python/goldenflow/goldenflow/_native*.so
89
+ # Same, for goldenanalysis (built from packages/rust/extensions/analysis-native
90
+ # via scripts/build_analysis_native.py).
91
+ packages/python/goldenanalysis/goldenanalysis/_native*.so
92
+
93
+ # codebase-memory-mcp local index/snapshot (rebuilt by the SessionStart hook)
94
+ .codebase-memory/
95
+
96
+ # scratch Modal run logs (transient)
97
+ modal_*.log
@@ -0,0 +1,151 @@
1
+ # Integrating the Golden Suite (agent guide)
2
+
3
+ You are wiring the Golden Suite into a project. This is the canonical setup — follow it
4
+ instead of re-deriving one. If you only read one thing: **`goldenpipe` is the front door.
5
+ Install `golden-suite`, drive `goldenpipe`, reach for individual tools only when you need
6
+ a single capability.**
7
+
8
+ `pip install golden-suite` gives you the whole suite **plus native acceleration, defaulted
9
+ to the perf-optimized configuration** — no env vars to set. It should never silently run
10
+ the slow pure-Python path; `golden-suite doctor` verifies that and `golden-suite optimize`
11
+ repairs it.
12
+
13
+ ## The suite in one screen
14
+
15
+ | Package | PyPI | What it does | Import |
16
+ | --- | --- | --- | --- |
17
+ | **GoldenPipe** | `goldenpipe` | Orchestrator. Chains the tools as pluggable stages. **Start here.** | `import goldenpipe as gp` |
18
+ | **GoldenMatch** | `goldenmatch` | Entity resolution: dedupe, match across sources, golden records | `import goldenmatch as gm` |
19
+ | **GoldenCheck** | `goldencheck` | Data validation — discovers rules from the data, no rule-writing | `import goldencheck` |
20
+ | **GoldenFlow** | `goldenflow` | Transform / standardize / normalize messy data | `import goldenflow` |
21
+ | **GoldenSchema** | `infermap` | Inference-driven schema mapping (import name is `infermap`) | `import infermap` |
22
+ | **GoldenAnalysis** | `goldenanalysis` | Read-only cross-cutting metrics + reporting | `import goldenanalysis` |
23
+ | `goldencheck-types` | `goldencheck-types` | Shared field-type contracts (transitive; you won't install directly) | — |
24
+ | `goldensuite-mcp` | `goldensuite-mcp` | One MCP server exposing every tool (the agent front door) | — |
25
+
26
+ Dependency shape (a clean DAG — **GoldenMatch is a leaf, not the root**):
27
+
28
+ ```
29
+ goldencheck-types ──► everything (shared contracts)
30
+ infermap (GoldenSchema) ─┐
31
+ goldenmatch ─────────────┤
32
+ goldencheck ─────────────┼──► goldenpipe ──► golden-suite (this meta-package)
33
+ goldenflow ─────────────┤ └──► goldensuite-mcp (all tools, one MCP)
34
+ goldenanalysis ──────────┘
35
+ ```
36
+
37
+ ## Install — pick ONE line
38
+
39
+ Native acceleration is **included by default** (it's a hard dependency, not an extra).
40
+
41
+ | You want... | Install |
42
+ | --- | --- |
43
+ | The whole suite + native, perf-optimized | `pip install golden-suite` |
44
+ | Suite + one MCP server for agents | `pip install "golden-suite[mcp]"` |
45
+ | Everything (suite + mcp + serving) | `pip install "golden-suite[all]"` |
46
+ | Just entity resolution | `pip install goldenmatch` |
47
+ | Just validation | `pip install goldencheck` |
48
+ | Orchestrator + the three core tools only | `pip install "goldenpipe[golden-suite]"` |
49
+
50
+ Supported native platforms: Linux x86_64/aarch64, macOS x86_64/arm64, Windows amd64. On a
51
+ platform without a published wheel the install **fails loudly** (by design — the suite does
52
+ not silently degrade to pure-Python). Those users install the individual pure-Python
53
+ packages directly instead of `golden-suite`.
54
+
55
+ ## Verify + repair the setup (do this after install)
56
+
57
+ ```bash
58
+ golden-suite doctor # lists every component + whether native is ACTIVE; exits non-zero if silently slow
59
+ golden-suite optimize # installs any missing native kernels for this platform, then re-verifies
60
+ ```
61
+
62
+ `doctor` is read-only and CI-safe (non-zero exit when a package is silently on the
63
+ pure-Python path). Programmatic equivalents:
64
+
65
+ ```python
66
+ from golden_suite import installed, native_status
67
+ print(installed()) # {"goldenpipe": "1.2.1", "goldenmatch": "1.30.0", ...}
68
+ print(native_status()) # per-package: native_active / silently_slow / env_mode
69
+ ```
70
+
71
+ ## Three ways to integrate (choose by consumer)
72
+
73
+ 1. **Python API** — you're inside a Python codebase. Import `goldenpipe` (or a single tool).
74
+ 2. **MCP** — the consumer is an agent/LLM. Run **one** server: `goldensuite-mcp` (or `golden-suite[mcp]`). Do **not** wire six per-package MCP servers by hand.
75
+ 3. **CLI** — one-off / shell / CI. Every package ships a Typer CLI: `goldenpipe run`, `goldenmatch dedupe`, `goldencheck scan`, etc.
76
+
77
+ ## Canonical quick-starts
78
+
79
+ ### Full pipeline (validate → transform → match), one call
80
+
81
+ ```python
82
+ import goldenpipe as gp
83
+
84
+ result = gp.run("customers.csv") # zero-config
85
+ print(result.status) # "success"
86
+ print(result.check) # quality findings
87
+ print(result.transform) # what got standardized
88
+ print(result.match) # deduplicated clusters
89
+ print(result.reasoning) # why each decision was made
90
+ ```
91
+
92
+ ### Just deduplicate
93
+
94
+ ```python
95
+ import goldenmatch as gm
96
+ result = gm.dedupe("customers.csv") # zero-config
97
+ # explicit:
98
+ result = gm.dedupe("customers.csv", exact=["email"], fuzzy={"name": 0.85}, blocking=["zip"])
99
+ result.golden.write_csv("deduped.csv")
100
+ ```
101
+
102
+ ### Match two sources
103
+
104
+ ```python
105
+ result = gm.match("crm.csv", "billing.csv", fuzzy={"name": 0.85, "address": 0.80})
106
+ ```
107
+
108
+ ### Validate (rules discovered from the data)
109
+
110
+ ```python
111
+ import goldencheck
112
+ report = goldencheck.scan("customers.csv")
113
+ ```
114
+
115
+ ### Map an unknown schema to a canonical one
116
+
117
+ ```python
118
+ import infermap # GoldenSchema
119
+ mapping = infermap.infer("raw_export.csv")
120
+ ```
121
+
122
+ ### One MCP server for all of it
123
+
124
+ ```bash
125
+ pip install "golden-suite[mcp]"
126
+ goldensuite-mcp # every suite tool, one server
127
+ ```
128
+
129
+ ## Anti-patterns that cause the back-and-forth (don't do these)
130
+
131
+ - **Installing `goldenmatch` and expecting the pipeline / check / transform.** GoldenMatch is
132
+ entity resolution only. For the end-to-end flow use `goldenpipe`.
133
+ - **Hand-wiring each tool into a bespoke pipeline.** `goldenpipe` already registers every tool
134
+ as a stage (`goldencheck.scan`, `goldenflow.transform`, `goldenmatch.dedupe`,
135
+ `goldenmatch.identity_resolve`, `goldenanalysis.report`) via entry-points. Use it.
136
+ - **Running six MCP servers.** One `goldensuite-mcp` exposes them all.
137
+ - **Importing `goldenschema`.** The import name is `infermap` (PyPI/product name is GoldenSchema).
138
+ - **Assuming native is off and setting `<PKG>_NATIVE=1` "to turn it on".** It's already on by
139
+ default (`auto` runs the parity-signed-off hot paths native automatically). `=1` is a
140
+ *require-and-force* mode that also runs components NOT yet parity-signed-off (notably
141
+ goldenflow) and **can change outputs** — only use it via `golden-suite optimize --strict`
142
+ after validating parity for your workload.
143
+ - **Pinning tools to each other's versions.** They release independently. Let `golden-suite`
144
+ carry the compatible lower bounds; don't hard-pin cross-package versions in the consumer.
145
+
146
+ ## Notes
147
+
148
+ - Python 3.11–3.13. Everything is Polars-backed.
149
+ - Repo: `benseverndev-oss/goldenmatch` (monorepo — all suite packages live here under
150
+ `packages/python/<pkg>`).
151
+ - Per-tool detail: each package has its own `AGENTS.md` and `llms.txt`.
@@ -0,0 +1,40 @@
1
+ # Changelog
2
+
3
+ All notable changes to golden-suite are documented here. The format follows
4
+ [Keep a Changelog](https://keepachangelog.com/); this project uses semantic
5
+ versioning.
6
+
7
+ ## [0.1.0] - unreleased
8
+
9
+ Initial release. A one-line, perf-optimized install and a single canonical front
10
+ door for the whole Golden Suite.
11
+
12
+ ### Added
13
+ - `pip install golden-suite` pulls the whole suite — `goldenpipe[golden-suite]`
14
+ (orchestrator + check/flow/match/analysis), plus `goldenmatch`, `goldencheck`,
15
+ `goldenflow`, `infermap` (GoldenSchema), `goldenanalysis`, `goldencheck-types`.
16
+ - **Native acceleration on by default.** The four native (Rust/abi3) kernels
17
+ (`goldenmatch-native`, `goldencheck-native`, `goldenflow-native`,
18
+ `goldenanalysis-native`) are **hard dependencies**, not an opt-in extra, so the
19
+ suite defaults to the perf-optimized configuration and never silently runs the
20
+ slow pure-Python path. Wheels cover Linux x86_64/aarch64, macOS x86_64/arm64,
21
+ and Windows amd64; on an unsupported platform the install fails loudly by design.
22
+ - `golden-suite` CLI:
23
+ - `doctor` — reports every component + version and whether each native kernel is
24
+ actually active; exits non-zero when a package is silently on the pure-Python
25
+ path (CI/verification-safe).
26
+ - `optimize` — installs any missing native kernels for the current platform, then
27
+ re-verifies. `--strict` additionally emits the require-native env vars
28
+ (`<PKG>_NATIVE=1`), with a warning that strict mode force-runs components not
29
+ yet parity-signed-off (notably goldenflow) and can change outputs.
30
+ - Introspection helpers: `golden_suite.installed()` (dist -> version|None) and
31
+ `golden_suite.native_status()` (per-package `native_active` / `silently_slow` /
32
+ `env_mode`).
33
+ - Optional extras: `[mcp]` (`goldensuite-mcp` — one server for every tool),
34
+ `[agent]` (GoldenPipe tui/api/agent serving surfaces), and `[all]`.
35
+ - Integration guide for agents and humans: `AGENTS.md`, `llms.txt`, `README.md`.
36
+
37
+ ### Notes
38
+ - Ships no data-processing logic of its own beyond the CLI + introspection helpers.
39
+ - Published on the `golden-suite-v*` release tag via `publish-golden-suite.yml`
40
+ (distinct from the `goldensuite-mcp-v*` tag).
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: golden-suite
3
+ Version: 0.1.0
4
+ Summary: One-line, perf-optimized install for the entire Golden Suite — goldenmatch, goldencheck, goldenflow, goldenpipe, GoldenSchema (infermap), goldenanalysis, native acceleration on by default
5
+ Project-URL: Homepage, https://github.com/benseverndev-oss/goldenmatch
6
+ Project-URL: Repository, https://github.com/benseverndev-oss/goldenmatch
7
+ Project-URL: Documentation, https://github.com/benseverndev-oss/goldenmatch/tree/main/packages/python/golden-suite#readme
8
+ Project-URL: Issues, https://github.com/benseverndev-oss/goldenmatch/issues
9
+ Project-URL: Changelog, https://github.com/benseverndev-oss/goldenmatch/blob/main/packages/python/golden-suite/CHANGELOG.md
10
+ Author-email: Ben Severn <ben@bensevern.dev>
11
+ License: MIT
12
+ Keywords: data-quality,entity-resolution,golden-suite,meta-package,pipeline,schema-inference
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: <3.14,>=3.11
23
+ Requires-Dist: goldenanalysis-native>=0.1
24
+ Requires-Dist: goldenanalysis>=0.3
25
+ Requires-Dist: goldencheck-native>=0.1
26
+ Requires-Dist: goldencheck-types>=0.1
27
+ Requires-Dist: goldencheck>=1.4
28
+ Requires-Dist: goldenflow-native>=0.1.1
29
+ Requires-Dist: goldenflow>=1.3
30
+ Requires-Dist: goldenmatch-native>=0.1.12
31
+ Requires-Dist: goldenmatch>=2.5
32
+ Requires-Dist: goldenpipe[golden-suite]>=1.3
33
+ Requires-Dist: infermap>=0.5.1
34
+ Requires-Dist: rich>=13.0
35
+ Requires-Dist: typer>=0.12
36
+ Provides-Extra: agent
37
+ Requires-Dist: goldenpipe[agent,api,tui]>=1.3; extra == 'agent'
38
+ Provides-Extra: all
39
+ Requires-Dist: goldenpipe[agent,api,tui]>=1.3; extra == 'all'
40
+ Requires-Dist: goldensuite-mcp>=0.3; extra == 'all'
41
+ Provides-Extra: dev
42
+ Requires-Dist: pytest>=8; extra == 'dev'
43
+ Requires-Dist: ruff>=0.6; extra == 'dev'
44
+ Provides-Extra: mcp
45
+ Requires-Dist: goldensuite-mcp>=0.3; extra == 'mcp'
46
+ Description-Content-Type: text/markdown
47
+
48
+ # golden-suite
49
+
50
+ One-line, perf-optimized install and single front door for the whole **Golden Suite**.
51
+
52
+ ```bash
53
+ pip install golden-suite # whole suite + native acceleration, defaulted to the fast config
54
+ golden-suite doctor # verify native is actually active
55
+ ```
56
+
57
+ This is a thin meta-package. It pulls in every suite tool **plus the native (Rust)
58
+ acceleration kernels, on by default**, and gives you (and your agents) one canonical entry
59
+ point. It ships no data-processing logic of its own — just a `doctor`/`optimize` CLI and
60
+ introspection helpers.
61
+
62
+ ## What you get
63
+
64
+ | Tool | Does | Import |
65
+ | --- | --- | --- |
66
+ | **GoldenPipe** | Orchestrator — chains the tools as pluggable stages. **Start here.** | `import goldenpipe as gp` |
67
+ | **GoldenMatch** | Entity resolution: dedupe, match, golden records | `import goldenmatch as gm` |
68
+ | **GoldenCheck** | Data validation (rules discovered from your data) | `import goldencheck` |
69
+ | **GoldenFlow** | Transform / standardize / normalize | `import goldenflow` |
70
+ | **GoldenSchema** | Inference-driven schema mapping (import name: `infermap`) | `import infermap` |
71
+ | **GoldenAnalysis** | Read-only metrics + reporting | `import goldenanalysis` |
72
+
73
+ `goldenpipe` is the front door: it adapts every other tool as a stage, so most integrations
74
+ only ever touch `goldenpipe`. `goldenmatch` is a leaf (entity resolution only), not the root.
75
+
76
+ ## Install options
77
+
78
+ Native acceleration is included by default (a hard dependency, not an extra).
79
+
80
+ | You want | Install |
81
+ | --- | --- |
82
+ | The whole suite + native | `pip install golden-suite` |
83
+ | Suite + one MCP server | `pip install "golden-suite[mcp]"` |
84
+ | Everything (suite + mcp + serving) | `pip install "golden-suite[all]"` |
85
+
86
+ Native wheels cover Linux x86_64/aarch64, macOS x86_64/arm64, Windows amd64. On an
87
+ unsupported platform the install fails loudly rather than silently degrading — install the
88
+ individual pure-Python packages directly there.
89
+
90
+ ## Quick start
91
+
92
+ ```python
93
+ import goldenpipe as gp
94
+
95
+ result = gp.run("customers.csv") # validate -> transform -> match, one call
96
+ print(result.status, result.match, result.reasoning)
97
+ ```
98
+
99
+ Verify + repair the perf setup:
100
+
101
+ ```bash
102
+ golden-suite doctor # every component + whether native is ACTIVE (non-zero exit if silently slow)
103
+ golden-suite optimize # install any missing native kernels, then re-verify
104
+ ```
105
+
106
+ ```python
107
+ from golden_suite import installed, native_status
108
+ print(installed()) # {"goldenpipe": "1.2.1", "goldenmatch": "1.30.0", ...}
109
+ print(native_status()) # per-package native_active / silently_slow / env_mode
110
+ ```
111
+
112
+ ## For agents
113
+
114
+ See [`AGENTS.md`](./AGENTS.md) and [`llms.txt`](./llms.txt) — the canonical integration guide,
115
+ including the anti-patterns that cause most of the "wrong setup" back-and-forth.
116
+
117
+ ## License
118
+
119
+ MIT. Part of the [Golden Suite monorepo](https://github.com/benseverndev-oss/goldenmatch).
@@ -0,0 +1,72 @@
1
+ # golden-suite
2
+
3
+ One-line, perf-optimized install and single front door for the whole **Golden Suite**.
4
+
5
+ ```bash
6
+ pip install golden-suite # whole suite + native acceleration, defaulted to the fast config
7
+ golden-suite doctor # verify native is actually active
8
+ ```
9
+
10
+ This is a thin meta-package. It pulls in every suite tool **plus the native (Rust)
11
+ acceleration kernels, on by default**, and gives you (and your agents) one canonical entry
12
+ point. It ships no data-processing logic of its own — just a `doctor`/`optimize` CLI and
13
+ introspection helpers.
14
+
15
+ ## What you get
16
+
17
+ | Tool | Does | Import |
18
+ | --- | --- | --- |
19
+ | **GoldenPipe** | Orchestrator — chains the tools as pluggable stages. **Start here.** | `import goldenpipe as gp` |
20
+ | **GoldenMatch** | Entity resolution: dedupe, match, golden records | `import goldenmatch as gm` |
21
+ | **GoldenCheck** | Data validation (rules discovered from your data) | `import goldencheck` |
22
+ | **GoldenFlow** | Transform / standardize / normalize | `import goldenflow` |
23
+ | **GoldenSchema** | Inference-driven schema mapping (import name: `infermap`) | `import infermap` |
24
+ | **GoldenAnalysis** | Read-only metrics + reporting | `import goldenanalysis` |
25
+
26
+ `goldenpipe` is the front door: it adapts every other tool as a stage, so most integrations
27
+ only ever touch `goldenpipe`. `goldenmatch` is a leaf (entity resolution only), not the root.
28
+
29
+ ## Install options
30
+
31
+ Native acceleration is included by default (a hard dependency, not an extra).
32
+
33
+ | You want | Install |
34
+ | --- | --- |
35
+ | The whole suite + native | `pip install golden-suite` |
36
+ | Suite + one MCP server | `pip install "golden-suite[mcp]"` |
37
+ | Everything (suite + mcp + serving) | `pip install "golden-suite[all]"` |
38
+
39
+ Native wheels cover Linux x86_64/aarch64, macOS x86_64/arm64, Windows amd64. On an
40
+ unsupported platform the install fails loudly rather than silently degrading — install the
41
+ individual pure-Python packages directly there.
42
+
43
+ ## Quick start
44
+
45
+ ```python
46
+ import goldenpipe as gp
47
+
48
+ result = gp.run("customers.csv") # validate -> transform -> match, one call
49
+ print(result.status, result.match, result.reasoning)
50
+ ```
51
+
52
+ Verify + repair the perf setup:
53
+
54
+ ```bash
55
+ golden-suite doctor # every component + whether native is ACTIVE (non-zero exit if silently slow)
56
+ golden-suite optimize # install any missing native kernels, then re-verify
57
+ ```
58
+
59
+ ```python
60
+ from golden_suite import installed, native_status
61
+ print(installed()) # {"goldenpipe": "1.2.1", "goldenmatch": "1.30.0", ...}
62
+ print(native_status()) # per-package native_active / silently_slow / env_mode
63
+ ```
64
+
65
+ ## For agents
66
+
67
+ See [`AGENTS.md`](./AGENTS.md) and [`llms.txt`](./llms.txt) — the canonical integration guide,
68
+ including the anti-patterns that cause most of the "wrong setup" back-and-forth.
69
+
70
+ ## License
71
+
72
+ MIT. Part of the [Golden Suite monorepo](https://github.com/benseverndev-oss/goldenmatch).
@@ -0,0 +1,120 @@
1
+ """Golden Suite meta-package.
2
+
3
+ One-line, perf-optimized install and a single canonical entry point for the whole
4
+ suite. Ships almost no logic of its own — just introspection helpers and a
5
+ ``golden-suite`` CLI (``doctor`` / ``optimize``). For real work, import the
6
+ individual tools (or, most of the time, just ``goldenpipe`` — the orchestrator
7
+ that adapts every other tool as a stage).
8
+
9
+ import goldenpipe as gp
10
+ result = gp.run("customers.csv") # runs check -> transform -> match end to end
11
+
12
+ Native acceleration is installed by default. Use :func:`installed` to see which
13
+ components resolved, and :func:`native_status` to see whether each native kernel
14
+ is actually active (the truth behind "am I on the fast path").
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import importlib
20
+ import os
21
+ from importlib import metadata
22
+
23
+ __version__ = "0.1.0"
24
+
25
+ # PyPI distribution name -> import module name. Keep in lockstep with pyproject deps.
26
+ _COMPONENTS: dict[str, str] = {
27
+ "goldenpipe": "goldenpipe",
28
+ "goldenmatch": "goldenmatch",
29
+ "goldencheck": "goldencheck",
30
+ "goldenflow": "goldenflow",
31
+ "infermap": "infermap", # GoldenSchema
32
+ "goldenanalysis": "goldenanalysis",
33
+ "goldencheck-types": "goldencheck_types",
34
+ "goldensuite-mcp": "goldensuite_mcp",
35
+ }
36
+
37
+ # Packages that ship an optional native (Rust/abi3) accelerator, and the pieces
38
+ # needed to reason about it WITHOUT importing the heavy top-level package:
39
+ # base package -> (native distribution, standalone native import module, env var)
40
+ # The runtime loader tries ``<pkg>._native`` (in-tree build) then
41
+ # ``<pkg>_native._native`` (the published wheel). For a pip user only the wheel
42
+ # path exists, so probing the standalone ``<pkg>_native`` module is both accurate
43
+ # and lightweight (it does not pull in polars et al.).
44
+ _NATIVE: dict[str, tuple[str, str, str]] = {
45
+ "goldenmatch": ("goldenmatch-native", "goldenmatch_native", "GOLDENMATCH_NATIVE"),
46
+ "goldencheck": ("goldencheck-native", "goldencheck_native", "GOLDENCHECK_NATIVE"),
47
+ "goldenflow": ("goldenflow-native", "goldenflow_native", "GOLDENFLOW_NATIVE"),
48
+ "goldenanalysis": (
49
+ "goldenanalysis-native",
50
+ "goldenanalysis_native",
51
+ "GOLDENANALYSIS_NATIVE",
52
+ ),
53
+ }
54
+
55
+
56
+ def _version_or_none(dist: str) -> str | None:
57
+ try:
58
+ return metadata.version(dist)
59
+ except metadata.PackageNotFoundError:
60
+ return None
61
+
62
+
63
+ def installed() -> dict[str, str | None]:
64
+ """Return ``{distribution_name: version-or-None}`` for every suite component.
65
+
66
+ ``None`` means the component is not installed in this environment. The fastest
67
+ way to confirm an integration actually got the intended setup.
68
+ """
69
+ return {dist: _version_or_none(dist) for dist in _COMPONENTS}
70
+
71
+
72
+ def _native_importable(native_module: str) -> bool:
73
+ """Whether the standalone native wheel (e.g. ``goldenmatch_native._native``)
74
+ imports cleanly — the same path the runtime loader uses under a pip install."""
75
+ try:
76
+ mod = importlib.import_module(native_module)
77
+ except Exception: # noqa: BLE001 - any import/load failure => not available
78
+ return False
79
+ if getattr(mod, "_native", None) is not None:
80
+ return True
81
+ try: # some builds expose ``_native`` only as a submodule
82
+ importlib.import_module(f"{native_module}._native")
83
+ return True
84
+ except Exception: # noqa: BLE001
85
+ return False
86
+
87
+
88
+ def native_status() -> dict[str, dict[str, object]]:
89
+ """Per-package native-acceleration status.
90
+
91
+ For each accel-capable package returns a dict with:
92
+ - ``base_installed`` : the base package version, or ``None``
93
+ - ``native_version`` : the native wheel version, or ``None``
94
+ - ``native_active`` : whether the native kernel imports (fast path live)
95
+ - ``env_mode`` : the ``<PKG>_NATIVE`` env value (``auto`` if unset)
96
+ - ``silently_slow`` : base installed but native NOT active AND env != "0"
97
+ — i.e. the runtime is silently on the pure-Python path
98
+ """
99
+ out: dict[str, dict[str, object]] = {}
100
+ for pkg, (native_dist, native_module, env_var) in _NATIVE.items():
101
+ base_installed = _version_or_none(pkg)
102
+ native_version = _version_or_none(native_dist)
103
+ native_active = _native_importable(native_module)
104
+ env_mode = os.environ.get(env_var, "auto").lower()
105
+ silently_slow = (
106
+ base_installed is not None and not native_active and env_mode != "0"
107
+ )
108
+ out[pkg] = {
109
+ "base_installed": base_installed,
110
+ "native_dist": native_dist,
111
+ "native_version": native_version,
112
+ "native_active": native_active,
113
+ "env_var": env_var,
114
+ "env_mode": env_mode,
115
+ "silently_slow": silently_slow,
116
+ }
117
+ return out
118
+
119
+
120
+ __all__ = ["__version__", "installed", "native_status"]
@@ -0,0 +1,182 @@
1
+ """``golden-suite`` CLI: verify and repair the perf-optimized setup.
2
+
3
+ golden-suite doctor # report every component + whether native is actually active
4
+ golden-suite optimize # install any missing native kernels, then re-verify
5
+
6
+ ``doctor`` is read-only and exits non-zero when the runtime would silently run the
7
+ slow pure-Python path — so it doubles as a CI/verification gate. ``optimize`` acts.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json as _json
13
+ import subprocess
14
+ import sys
15
+
16
+ import typer
17
+ from rich.console import Console
18
+ from rich.table import Table
19
+
20
+ from . import __version__, installed, native_status
21
+
22
+ app = typer.Typer(
23
+ add_completion=False,
24
+ help="Verify and repair the perf-optimized Golden Suite setup.",
25
+ no_args_is_help=True,
26
+ )
27
+ _console = Console()
28
+ _err = Console(stderr=True)
29
+
30
+
31
+ def _components_table() -> Table:
32
+ table = Table(title="Golden Suite components", title_style="bold")
33
+ table.add_column("Package")
34
+ table.add_column("Version")
35
+ for dist, version in installed().items():
36
+ if version is None:
37
+ table.add_row(dist, "[yellow]not installed[/]")
38
+ else:
39
+ table.add_row(dist, version)
40
+ return table
41
+
42
+
43
+ def _native_table(status: dict[str, dict[str, object]]) -> Table:
44
+ table = Table(title="Native acceleration", title_style="bold")
45
+ table.add_column("Package")
46
+ table.add_column("Native wheel")
47
+ table.add_column("Fast path")
48
+ table.add_column("Env")
49
+ table.add_column("Verdict")
50
+ for pkg, s in status.items():
51
+ if s["base_installed"] is None:
52
+ continue # base package not installed; nothing to accelerate
53
+ wheel = s["native_version"] or "[yellow]missing[/]"
54
+ if s["native_active"]:
55
+ fast = "[green]native[/]"
56
+ else:
57
+ fast = "[red]pure-python[/]"
58
+ if s["env_mode"] == "0":
59
+ verdict = "[yellow]native disabled (env=0)[/]"
60
+ elif s["silently_slow"]:
61
+ verdict = "[red]SILENTLY SLOW[/]"
62
+ elif s["native_active"]:
63
+ verdict = "[green]OK[/]"
64
+ else:
65
+ verdict = "[yellow]inactive[/]"
66
+ table.add_row(pkg, str(wheel), fast, str(s["env_mode"]), verdict)
67
+ return table
68
+
69
+
70
+ @app.command()
71
+ def doctor(
72
+ as_json: bool = typer.Option(False, "--json", help="Emit machine-readable JSON."),
73
+ strict: bool = typer.Option(
74
+ True,
75
+ "--strict/--no-strict",
76
+ help="Exit non-zero if any package is silently on the pure-Python path.",
77
+ ),
78
+ ) -> None:
79
+ """Report every component and whether the native fast path is actually active."""
80
+ status = native_status()
81
+ silently_slow = [p for p, s in status.items() if s["silently_slow"]]
82
+ missing_base = [d for d, v in installed().items() if v is None]
83
+
84
+ if as_json:
85
+ _console.print_json(
86
+ _json.dumps(
87
+ {
88
+ "version": __version__,
89
+ "installed": installed(),
90
+ "native": status,
91
+ "silently_slow": silently_slow,
92
+ "missing_components": missing_base,
93
+ }
94
+ )
95
+ )
96
+ else:
97
+ _console.print(_components_table())
98
+ _console.print(_native_table(status))
99
+ if silently_slow:
100
+ _err.print(
101
+ f"\n[red]FAIL[/]: {', '.join(silently_slow)} installed but running "
102
+ f"pure-Python. Run [bold]golden-suite optimize[/] to fix."
103
+ )
104
+ else:
105
+ _console.print("\n[green]OK[/]: native acceleration active where expected.")
106
+
107
+ if strict and silently_slow:
108
+ raise typer.Exit(code=1)
109
+
110
+
111
+ @app.command()
112
+ def optimize(
113
+ strict_runtime: bool = typer.Option(
114
+ False,
115
+ "--strict/--no-strict",
116
+ help=(
117
+ "Also emit require-native env vars (<PKG>_NATIVE=1). WARNING: strict "
118
+ "mode forces native for components NOT yet parity-signed-off (notably "
119
+ "goldenflow) and can change outputs. Off by default."
120
+ ),
121
+ ),
122
+ dry_run: bool = typer.Option(
123
+ False, "--dry-run", help="Show what would be installed, do nothing."
124
+ ),
125
+ ) -> None:
126
+ """Install any missing native kernels for this platform, then re-verify."""
127
+ status = native_status()
128
+ # A package needs repair when its base is installed but the native wheel is
129
+ # absent or won't import on this interpreter.
130
+ to_install = [
131
+ str(s["native_dist"])
132
+ for s in status.values()
133
+ if s["base_installed"] is not None and not s["native_active"]
134
+ ]
135
+
136
+ if not to_install:
137
+ _console.print("[green]Already optimal[/]: every native kernel is active.")
138
+ else:
139
+ _console.print(
140
+ f"Native kernels to install: [bold]{', '.join(to_install)}[/]"
141
+ )
142
+ if dry_run:
143
+ _console.print("[yellow]--dry-run[/]: nothing installed.")
144
+ else:
145
+ cmd = [sys.executable, "-m", "pip", "install", "--upgrade", *to_install]
146
+ _console.print(f"$ {' '.join(cmd)}")
147
+ result = subprocess.run(cmd, check=False)
148
+ if result.returncode != 0:
149
+ _err.print(
150
+ "[red]pip install failed[/]. On a platform without a published "
151
+ "wheel, native cannot be enabled — see the docs for supported "
152
+ "platforms."
153
+ )
154
+ raise typer.Exit(code=result.returncode)
155
+
156
+ if strict_runtime:
157
+ _console.print(
158
+ "\n[bold]Require-native env[/] (add to your shell/.env to make a missing "
159
+ "kernel raise instead of silently falling back):"
160
+ )
161
+ for s in status.values():
162
+ if s["base_installed"] is not None:
163
+ _console.print(f" export {s['env_var']}=1")
164
+ _err.print(
165
+ "\n[yellow]WARNING[/]: <PKG>_NATIVE=1 forces native for components not "
166
+ "yet parity-signed-off (e.g. goldenflow) and MAY change outputs. Only "
167
+ "use it if you have validated parity for your workload."
168
+ )
169
+
170
+ if not dry_run:
171
+ _console.print()
172
+ doctor(as_json=False, strict=False)
173
+
174
+
175
+ @app.command()
176
+ def version() -> None:
177
+ """Print the golden-suite meta-package version."""
178
+ _console.print(__version__)
179
+
180
+
181
+ if __name__ == "__main__": # pragma: no cover
182
+ app()
@@ -0,0 +1,94 @@
1
+ # Golden Suite
2
+
3
+ > One-line, perf-optimized install and single front door for the whole Golden Suite: entity
4
+ > resolution (GoldenMatch), data validation (GoldenCheck), transformation (GoldenFlow),
5
+ > schema mapping (GoldenSchema/infermap), read-only analysis (GoldenAnalysis), all
6
+ > orchestrated by GoldenPipe. Native (Rust) acceleration is included and on by default;
7
+ > the suite never silently runs the slow pure-Python path. Ships a tiny `golden-suite` CLI
8
+ > (doctor/optimize) plus introspection helpers.
9
+
10
+ ## Mental model
11
+ - `goldenpipe` is the orchestrator and the front door. It adapts every other tool as a
12
+ pluggable stage. Most integrations only touch `goldenpipe`.
13
+ - `goldenmatch` is a LEAF (entity resolution), not the root. Do not expect the pipeline,
14
+ validation, or transforms from it.
15
+ - `goldensuite-mcp` is one MCP server exposing every tool — the agent front door.
16
+
17
+ ## Components
18
+ - GoldenPipe (`goldenpipe`) — orchestrator; `import goldenpipe as gp`
19
+ - GoldenMatch (`goldenmatch`) — dedupe, match, golden records; `import goldenmatch as gm`
20
+ - GoldenCheck (`goldencheck`) — validation, rules discovered from data; `import goldencheck`
21
+ - GoldenFlow (`goldenflow`) — transform / standardize / normalize; `import goldenflow`
22
+ - GoldenSchema (`infermap`) — inference-driven schema mapping; `import infermap`
23
+ - GoldenAnalysis (`goldenanalysis`) — read-only metrics + reporting; `import goldenanalysis`
24
+ - `goldencheck-types` — shared field-type contracts (transitive)
25
+ - `goldensuite-mcp` — one MCP server, every tool
26
+
27
+ ## Install
28
+ - `pip install golden-suite` — whole suite + native acceleration, perf-optimized (Python)
29
+ - `pip install "golden-suite[mcp]"` — suite + one MCP server (`goldensuite-mcp`)
30
+ - `pip install "golden-suite[all]"` — suite + mcp + serving surfaces
31
+ - Single tool instead: `pip install goldenmatch` / `goldencheck` / `goldenflow` / `infermap`
32
+ - Orchestrator + core three: `pip install "goldenpipe[golden-suite]"`
33
+ - Native wheels cover Linux x86_64/aarch64, macOS x86_64/arm64, Windows amd64. On an
34
+ unsupported platform the install fails loudly (no silent pure-Python) — install the
35
+ individual pure-Python packages directly there.
36
+
37
+ ## Verify + repair the setup
38
+ - `golden-suite doctor` — every component + whether native is ACTIVE; non-zero exit if silently slow (CI-safe)
39
+ - `golden-suite optimize` — install missing native kernels for this platform, then re-verify
40
+ ```python
41
+ from golden_suite import installed, native_status
42
+ print(installed()) # dist -> version|None for every suite component
43
+ print(native_status()) # per-package native_active / silently_slow / env_mode
44
+ ```
45
+
46
+ ## Quick examples
47
+
48
+ ### Full pipeline (validate -> transform -> match), one call
49
+ ```python
50
+ import goldenpipe as gp
51
+ result = gp.run("customers.csv")
52
+ print(result.status, result.check, result.transform, result.match, result.reasoning)
53
+ ```
54
+
55
+ ### Deduplicate
56
+ ```python
57
+ import goldenmatch as gm
58
+ result = gm.dedupe("customers.csv", exact=["email"], fuzzy={"name": 0.85}, blocking=["zip"])
59
+ result.golden.write_csv("deduped.csv")
60
+ ```
61
+
62
+ ### Match two sources
63
+ ```python
64
+ result = gm.match("crm.csv", "billing.csv", fuzzy={"name": 0.85, "address": 0.80})
65
+ ```
66
+
67
+ ### Validate
68
+ ```python
69
+ import goldencheck
70
+ report = goldencheck.scan("customers.csv")
71
+ ```
72
+
73
+ ### Map an unknown schema
74
+ ```python
75
+ import infermap # GoldenSchema
76
+ mapping = infermap.infer("raw_export.csv")
77
+ ```
78
+
79
+ ### One MCP server for everything
80
+ ```bash
81
+ pip install "golden-suite[mcp]" && goldensuite-mcp
82
+ ```
83
+
84
+ ## Anti-patterns
85
+ - Installing `goldenmatch` and expecting the pipeline/check/transform — use `goldenpipe`.
86
+ - Hand-wiring each tool — `goldenpipe` already registers them as stages.
87
+ - Running six MCP servers — use one `goldensuite-mcp`.
88
+ - `import goldenschema` — the import name is `infermap`.
89
+ - Setting `<PKG>_NATIVE=1` "to enable native" — it's already on by default; `=1` force-runs
90
+ non-signed-off components (goldenflow) and can change outputs. Use `optimize --strict` only
91
+ after validating parity.
92
+
93
+ ## Repo
94
+ - Monorepo: benseverndev-oss/goldenmatch (packages/python/<pkg>). Python 3.11-3.13, Polars-backed.
@@ -0,0 +1,76 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "golden-suite"
7
+ version = "0.1.0"
8
+ description = "One-line, perf-optimized install for the entire Golden Suite — goldenmatch, goldencheck, goldenflow, goldenpipe, GoldenSchema (infermap), goldenanalysis, native acceleration on by default"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11,<3.14"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Ben Severn", email = "ben@bensevern.dev" }]
13
+ keywords = ["data-quality", "entity-resolution", "pipeline", "schema-inference", "golden-suite", "meta-package"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Topic :: Software Development :: Libraries :: Python Modules",
24
+ ]
25
+
26
+ # `pip install golden-suite` gets the WHOLE suite AND the native acceleration
27
+ # kernels, in one line, defaulting to the perf-optimized configuration.
28
+ #
29
+ # `goldenpipe` is the orchestrator (adapts every tool as a stage). The native
30
+ # wheels are HARD dependencies on purpose: the suite should never silently run
31
+ # the slow pure-Python path. abi3 wheels are published for the five mainstream
32
+ # platforms (Linux x86_64/aarch64, macOS x86_64/arm64, Windows amd64); on any
33
+ # platform without a wheel the install fails LOUDLY rather than degrading.
34
+ # Under each package's default `auto` gate, the parity-signed-off hot paths then
35
+ # run native automatically — no env vars to set. `golden-suite doctor` verifies
36
+ # it; `golden-suite optimize` repairs it.
37
+ dependencies = [
38
+ # --- suite (floors track the current majors) ---
39
+ "goldenpipe[golden-suite]>=1.3", # orchestrator + check/flow/match/analysis
40
+ "goldenmatch>=2.5", # entity resolution: dedupe, match, golden records (2.7.0 is latest on PyPI as of this floor bump)
41
+ "goldencheck>=1.4", # data validation (rules discovered from your data)
42
+ "goldenflow>=1.3", # transform / standardize / normalize
43
+ "infermap>=0.5.1", # GoldenSchema: inference-driven schema mapping
44
+ "goldenanalysis>=0.3", # read-only cross-cutting metrics + reporting
45
+ "goldencheck-types>=0.1", # shared canonical field-type contracts
46
+ # --- native acceleration (on by default; see note above) ---
47
+ "goldenmatch-native>=0.1.12", # >=0.1.12 carries perceptual + the current kernel surface
48
+ "goldencheck-native>=0.1",
49
+ "goldenflow-native>=0.1.1",
50
+ "goldenanalysis-native>=0.1",
51
+ # --- CLI (doctor / optimize) ---
52
+ "typer>=0.12",
53
+ "rich>=13.0",
54
+ ]
55
+
56
+ [project.optional-dependencies]
57
+ # One MCP server exposing every tool — the agent front door.
58
+ mcp = ["goldensuite-mcp>=0.3"]
59
+ # GoldenPipe serving surfaces (A2A + async transports + TUI + REST).
60
+ agent = ["goldenpipe[tui,api,agent]>=1.3"]
61
+ # Everything: full suite + native + MCP + serving.
62
+ all = ["golden-suite[mcp,agent]"]
63
+ dev = ["pytest>=8", "ruff>=0.6"]
64
+
65
+ [project.scripts]
66
+ golden-suite = "golden_suite.cli:app"
67
+
68
+ [project.urls]
69
+ Homepage = "https://github.com/benseverndev-oss/goldenmatch"
70
+ Repository = "https://github.com/benseverndev-oss/goldenmatch"
71
+ Documentation = "https://github.com/benseverndev-oss/goldenmatch/tree/main/packages/python/golden-suite#readme"
72
+ Issues = "https://github.com/benseverndev-oss/goldenmatch/issues"
73
+ Changelog = "https://github.com/benseverndev-oss/goldenmatch/blob/main/packages/python/golden-suite/CHANGELOG.md"
74
+
75
+ [tool.hatch.build.targets.wheel]
76
+ packages = ["golden_suite"]