golden-suite 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- golden_suite-0.1.0/.gitignore +97 -0
- golden_suite-0.1.0/AGENTS.md +151 -0
- golden_suite-0.1.0/CHANGELOG.md +40 -0
- golden_suite-0.1.0/PKG-INFO +119 -0
- golden_suite-0.1.0/README.md +72 -0
- golden_suite-0.1.0/golden_suite/__init__.py +120 -0
- golden_suite-0.1.0/golden_suite/cli.py +182 -0
- golden_suite-0.1.0/llms.txt +94 -0
- golden_suite-0.1.0/pyproject.toml +76 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Build artifacts
|
|
2
|
+
target/
|
|
3
|
+
dist/
|
|
4
|
+
build/
|
|
5
|
+
*.egg-info/
|
|
6
|
+
node_modules/
|
|
7
|
+
__pycache__/
|
|
8
|
+
**/.hypothesis/
|
|
9
|
+
.venv/
|
|
10
|
+
.uv-cache/
|
|
11
|
+
|
|
12
|
+
# Web UI build output (populated by scripts/build_web.py before `hatch build`).
|
|
13
|
+
# .gitkeep stays so the source tree exists in checkouts and the wheel's
|
|
14
|
+
# force-include glob has something to match.
|
|
15
|
+
packages/python/goldenmatch/goldenmatch/web/static/*
|
|
16
|
+
!packages/python/goldenmatch/goldenmatch/web/static/.gitkeep
|
|
17
|
+
|
|
18
|
+
# Playwright runtime artifacts
|
|
19
|
+
packages/python/goldenmatch/web/frontend/test-results/
|
|
20
|
+
packages/python/goldenmatch/web/frontend/playwright-report/
|
|
21
|
+
|
|
22
|
+
# vitest browser-mode runtime output (R1 cross-JS-target WASM harnesses)
|
|
23
|
+
**/.vitest-attachments/
|
|
24
|
+
packages/typescript/goldenmatch/tests/spike/__screenshots__/
|
|
25
|
+
|
|
26
|
+
# YAML-edit backups (web UI's POST /api/v1/rules/save writes goldenmatch.yml.bak
|
|
27
|
+
# next to the file before clobbering — local-only safety net, not source).
|
|
28
|
+
*.yml.bak
|
|
29
|
+
|
|
30
|
+
# Steward labels — runtime-written by the inspector's review tab. Keep them
|
|
31
|
+
# out of git so a contributor's labels don't ride along on PRs. If you want
|
|
32
|
+
# seed labels for a demo project, commit a curated labels.seed.jsonl and
|
|
33
|
+
# rename at use time.
|
|
34
|
+
labels.jsonl
|
|
35
|
+
|
|
36
|
+
# Generated outputs
|
|
37
|
+
*_lineage.json
|
|
38
|
+
*_clusters.csv
|
|
39
|
+
# Allow committed test fixtures and demo project that mimic run outputs
|
|
40
|
+
!packages/python/goldenmatch/tests/web/fixtures/**
|
|
41
|
+
!packages/python/goldenmatch/tests/**/fixtures/**
|
|
42
|
+
!packages/python/goldenmatch/web/demo/**
|
|
43
|
+
|
|
44
|
+
# IDE
|
|
45
|
+
.vscode/
|
|
46
|
+
.idea/
|
|
47
|
+
|
|
48
|
+
# Turborepo
|
|
49
|
+
.turbo/
|
|
50
|
+
|
|
51
|
+
# Claude Code agent worktrees (transient isolated checkouts created by
|
|
52
|
+
# background subagents). Never tracked; project-level .claude settings can
|
|
53
|
+
# still be committed since only the worktrees subdir is ignored.
|
|
54
|
+
.claude/worktrees/
|
|
55
|
+
|
|
56
|
+
# Superpowers / manual git worktrees (isolated checkouts; never tracked)
|
|
57
|
+
.worktrees/
|
|
58
|
+
|
|
59
|
+
# Local profiling artifacts (per CLAUDE.md convention — cProfile dumps,
|
|
60
|
+
# scale-audit JSON outputs, synthetic fixtures). Documented as gitignored
|
|
61
|
+
# in CLAUDE.md; this entry makes that real.
|
|
62
|
+
.profile_tmp/
|
|
63
|
+
packages/python/goldenmatch/bench-dataset-v1/
|
|
64
|
+
|
|
65
|
+
# Local runtime state: Learning Memory DB, review queue, identity graph,
|
|
66
|
+
# cross-run autoconfig memory. The engine (and the test suite) rewrites these
|
|
67
|
+
# on every run, so they are never source. `.goldenmatch/memory.db` used to be
|
|
68
|
+
# tracked at the repo root and re-dirtied the working tree on each run; it was
|
|
69
|
+
# `git rm --cached`'d alongside this entry.
|
|
70
|
+
.goldenmatch/
|
|
71
|
+
|
|
72
|
+
# Compiled native acceleration ext (built from packages/rust/extensions/native
|
|
73
|
+
# via scripts/build_native.py). Platform-specific abi3 artifact, never source.
|
|
74
|
+
# (.pyd = the Windows in-tree build artifact; .so = Linux/macOS.)
|
|
75
|
+
packages/python/goldenmatch/goldenmatch/_native*.so
|
|
76
|
+
packages/python/goldenmatch/goldenmatch/_native*.pyd
|
|
77
|
+
# maturin-develop drops the compiled ext next to the tracked __init__.py in
|
|
78
|
+
# python/goldenmatch_native/. Ignore ONLY the artifacts, not the source dir.
|
|
79
|
+
packages/rust/extensions/native/python/goldenmatch_native/*.pyd
|
|
80
|
+
packages/rust/extensions/native/python/goldenmatch_native/*.so
|
|
81
|
+
# GoldenCheck's counterpart (packages/rust/extensions/goldencheck-native via
|
|
82
|
+
# scripts/build_goldencheck_native.py). Same rationale.
|
|
83
|
+
packages/python/goldencheck/goldencheck/_native*.so
|
|
84
|
+
|
|
85
|
+
# Benchmark datasets downloaded at runtime (DBLP-ACM, etc.) — not committed.
|
|
86
|
+
datasets/
|
|
87
|
+
# Same, for goldenflow (built from packages/rust/extensions/native-flow).
|
|
88
|
+
packages/python/goldenflow/goldenflow/_native*.so
|
|
89
|
+
# Same, for goldenanalysis (built from packages/rust/extensions/analysis-native
|
|
90
|
+
# via scripts/build_analysis_native.py).
|
|
91
|
+
packages/python/goldenanalysis/goldenanalysis/_native*.so
|
|
92
|
+
|
|
93
|
+
# codebase-memory-mcp local index/snapshot (rebuilt by the SessionStart hook)
|
|
94
|
+
.codebase-memory/
|
|
95
|
+
|
|
96
|
+
# scratch Modal run logs (transient)
|
|
97
|
+
modal_*.log
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Integrating the Golden Suite (agent guide)
|
|
2
|
+
|
|
3
|
+
You are wiring the Golden Suite into a project. This is the canonical setup — follow it
|
|
4
|
+
instead of re-deriving one. If you only read one thing: **`goldenpipe` is the front door.
|
|
5
|
+
Install `golden-suite`, drive `goldenpipe`, reach for individual tools only when you need
|
|
6
|
+
a single capability.**
|
|
7
|
+
|
|
8
|
+
`pip install golden-suite` gives you the whole suite **plus native acceleration, defaulted
|
|
9
|
+
to the perf-optimized configuration** — no env vars to set. It should never silently run
|
|
10
|
+
the slow pure-Python path; `golden-suite doctor` verifies that and `golden-suite optimize`
|
|
11
|
+
repairs it.
|
|
12
|
+
|
|
13
|
+
## The suite in one screen
|
|
14
|
+
|
|
15
|
+
| Package | PyPI | What it does | Import |
|
|
16
|
+
| --- | --- | --- | --- |
|
|
17
|
+
| **GoldenPipe** | `goldenpipe` | Orchestrator. Chains the tools as pluggable stages. **Start here.** | `import goldenpipe as gp` |
|
|
18
|
+
| **GoldenMatch** | `goldenmatch` | Entity resolution: dedupe, match across sources, golden records | `import goldenmatch as gm` |
|
|
19
|
+
| **GoldenCheck** | `goldencheck` | Data validation — discovers rules from the data, no rule-writing | `import goldencheck` |
|
|
20
|
+
| **GoldenFlow** | `goldenflow` | Transform / standardize / normalize messy data | `import goldenflow` |
|
|
21
|
+
| **GoldenSchema** | `infermap` | Inference-driven schema mapping (import name is `infermap`) | `import infermap` |
|
|
22
|
+
| **GoldenAnalysis** | `goldenanalysis` | Read-only cross-cutting metrics + reporting | `import goldenanalysis` |
|
|
23
|
+
| `goldencheck-types` | `goldencheck-types` | Shared field-type contracts (transitive; you won't install directly) | — |
|
|
24
|
+
| `goldensuite-mcp` | `goldensuite-mcp` | One MCP server exposing every tool (the agent front door) | — |
|
|
25
|
+
|
|
26
|
+
Dependency shape (a clean DAG — **GoldenMatch is a leaf, not the root**):
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
goldencheck-types ──► everything (shared contracts)
|
|
30
|
+
infermap (GoldenSchema) ─┐
|
|
31
|
+
goldenmatch ─────────────┤
|
|
32
|
+
goldencheck ─────────────┼──► goldenpipe ──► golden-suite (this meta-package)
|
|
33
|
+
goldenflow ─────────────┤ └──► goldensuite-mcp (all tools, one MCP)
|
|
34
|
+
goldenanalysis ──────────┘
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Install — pick ONE line
|
|
38
|
+
|
|
39
|
+
Native acceleration is **included by default** (it's a hard dependency, not an extra).
|
|
40
|
+
|
|
41
|
+
| You want... | Install |
|
|
42
|
+
| --- | --- |
|
|
43
|
+
| The whole suite + native, perf-optimized | `pip install golden-suite` |
|
|
44
|
+
| Suite + one MCP server for agents | `pip install "golden-suite[mcp]"` |
|
|
45
|
+
| Everything (suite + mcp + serving) | `pip install "golden-suite[all]"` |
|
|
46
|
+
| Just entity resolution | `pip install goldenmatch` |
|
|
47
|
+
| Just validation | `pip install goldencheck` |
|
|
48
|
+
| Orchestrator + the three core tools only | `pip install "goldenpipe[golden-suite]"` |
|
|
49
|
+
|
|
50
|
+
Supported native platforms: Linux x86_64/aarch64, macOS x86_64/arm64, Windows amd64. On a
|
|
51
|
+
platform without a published wheel the install **fails loudly** (by design — the suite does
|
|
52
|
+
not silently degrade to pure-Python). Those users install the individual pure-Python
|
|
53
|
+
packages directly instead of `golden-suite`.
|
|
54
|
+
|
|
55
|
+
## Verify + repair the setup (do this after install)
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
golden-suite doctor # lists every component + whether native is ACTIVE; exits non-zero if silently slow
|
|
59
|
+
golden-suite optimize # installs any missing native kernels for this platform, then re-verifies
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
`doctor` is read-only and CI-safe (non-zero exit when a package is silently on the
|
|
63
|
+
pure-Python path). Programmatic equivalents:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from golden_suite import installed, native_status
|
|
67
|
+
print(installed()) # {"goldenpipe": "1.2.1", "goldenmatch": "1.30.0", ...}
|
|
68
|
+
print(native_status()) # per-package: native_active / silently_slow / env_mode
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Three ways to integrate (choose by consumer)
|
|
72
|
+
|
|
73
|
+
1. **Python API** — you're inside a Python codebase. Import `goldenpipe` (or a single tool).
|
|
74
|
+
2. **MCP** — the consumer is an agent/LLM. Run **one** server: `goldensuite-mcp` (or `golden-suite[mcp]`). Do **not** wire six per-package MCP servers by hand.
|
|
75
|
+
3. **CLI** — one-off / shell / CI. Every package ships a Typer CLI: `goldenpipe run`, `goldenmatch dedupe`, `goldencheck scan`, etc.
|
|
76
|
+
|
|
77
|
+
## Canonical quick-starts
|
|
78
|
+
|
|
79
|
+
### Full pipeline (validate → transform → match), one call
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import goldenpipe as gp
|
|
83
|
+
|
|
84
|
+
result = gp.run("customers.csv") # zero-config
|
|
85
|
+
print(result.status) # "success"
|
|
86
|
+
print(result.check) # quality findings
|
|
87
|
+
print(result.transform) # what got standardized
|
|
88
|
+
print(result.match) # deduplicated clusters
|
|
89
|
+
print(result.reasoning) # why each decision was made
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Just deduplicate
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
import goldenmatch as gm
|
|
96
|
+
result = gm.dedupe("customers.csv") # zero-config
|
|
97
|
+
# explicit:
|
|
98
|
+
result = gm.dedupe("customers.csv", exact=["email"], fuzzy={"name": 0.85}, blocking=["zip"])
|
|
99
|
+
result.golden.write_csv("deduped.csv")
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Match two sources
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
result = gm.match("crm.csv", "billing.csv", fuzzy={"name": 0.85, "address": 0.80})
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Validate (rules discovered from the data)
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
import goldencheck
|
|
112
|
+
report = goldencheck.scan("customers.csv")
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Map an unknown schema to a canonical one
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
import infermap # GoldenSchema
|
|
119
|
+
mapping = infermap.infer("raw_export.csv")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### One MCP server for all of it
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
pip install "golden-suite[mcp]"
|
|
126
|
+
goldensuite-mcp # every suite tool, one server
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Anti-patterns that cause the back-and-forth (don't do these)
|
|
130
|
+
|
|
131
|
+
- **Installing `goldenmatch` and expecting the pipeline / check / transform.** GoldenMatch is
|
|
132
|
+
entity resolution only. For the end-to-end flow use `goldenpipe`.
|
|
133
|
+
- **Hand-wiring each tool into a bespoke pipeline.** `goldenpipe` already registers every tool
|
|
134
|
+
as a stage (`goldencheck.scan`, `goldenflow.transform`, `goldenmatch.dedupe`,
|
|
135
|
+
`goldenmatch.identity_resolve`, `goldenanalysis.report`) via entry-points. Use it.
|
|
136
|
+
- **Running six MCP servers.** One `goldensuite-mcp` exposes them all.
|
|
137
|
+
- **Importing `goldenschema`.** The import name is `infermap` (PyPI/product name is GoldenSchema).
|
|
138
|
+
- **Assuming native is off and setting `<PKG>_NATIVE=1` "to turn it on".** It's already on by
|
|
139
|
+
default (`auto` runs the parity-signed-off hot paths native automatically). `=1` is a
|
|
140
|
+
*require-and-force* mode that also runs components NOT yet parity-signed-off (notably
|
|
141
|
+
goldenflow) and **can change outputs** — only use it via `golden-suite optimize --strict`
|
|
142
|
+
after validating parity for your workload.
|
|
143
|
+
- **Pinning tools to each other's versions.** They release independently. Let `golden-suite`
|
|
144
|
+
carry the compatible lower bounds; don't hard-pin cross-package versions in the consumer.
|
|
145
|
+
|
|
146
|
+
## Notes
|
|
147
|
+
|
|
148
|
+
- Python 3.11–3.13. Everything is Polars-backed.
|
|
149
|
+
- Repo: `benseverndev-oss/goldenmatch` (monorepo — all suite packages live here under
|
|
150
|
+
`packages/python/<pkg>`).
|
|
151
|
+
- Per-tool detail: each package has its own `AGENTS.md` and `llms.txt`.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to golden-suite are documented here. The format follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/); this project uses semantic
|
|
5
|
+
versioning.
|
|
6
|
+
|
|
7
|
+
## [0.1.0] - unreleased
|
|
8
|
+
|
|
9
|
+
Initial release. A one-line, perf-optimized install and a single canonical front
|
|
10
|
+
door for the whole Golden Suite.
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `pip install golden-suite` pulls the whole suite — `goldenpipe[golden-suite]`
|
|
14
|
+
(orchestrator + check/flow/match/analysis), plus `goldenmatch`, `goldencheck`,
|
|
15
|
+
`goldenflow`, `infermap` (GoldenSchema), `goldenanalysis`, `goldencheck-types`.
|
|
16
|
+
- **Native acceleration on by default.** The four native (Rust/abi3) kernels
|
|
17
|
+
(`goldenmatch-native`, `goldencheck-native`, `goldenflow-native`,
|
|
18
|
+
`goldenanalysis-native`) are **hard dependencies**, not an opt-in extra, so the
|
|
19
|
+
suite defaults to the perf-optimized configuration and never silently runs the
|
|
20
|
+
slow pure-Python path. Wheels cover Linux x86_64/aarch64, macOS x86_64/arm64,
|
|
21
|
+
and Windows amd64; on an unsupported platform the install fails loudly by design.
|
|
22
|
+
- `golden-suite` CLI:
|
|
23
|
+
- `doctor` — reports every component + version and whether each native kernel is
|
|
24
|
+
actually active; exits non-zero when a package is silently on the pure-Python
|
|
25
|
+
path (CI/verification-safe).
|
|
26
|
+
- `optimize` — installs any missing native kernels for the current platform, then
|
|
27
|
+
re-verifies. `--strict` additionally emits the require-native env vars
|
|
28
|
+
(`<PKG>_NATIVE=1`), with a warning that strict mode force-runs components not
|
|
29
|
+
yet parity-signed-off (notably goldenflow) and can change outputs.
|
|
30
|
+
- Introspection helpers: `golden_suite.installed()` (dist -> version|None) and
|
|
31
|
+
`golden_suite.native_status()` (per-package `native_active` / `silently_slow` /
|
|
32
|
+
`env_mode`).
|
|
33
|
+
- Optional extras: `[mcp]` (`goldensuite-mcp` — one server for every tool),
|
|
34
|
+
`[agent]` (GoldenPipe tui/api/agent serving surfaces), and `[all]`.
|
|
35
|
+
- Integration guide for agents and humans: `AGENTS.md`, `llms.txt`, `README.md`.
|
|
36
|
+
|
|
37
|
+
### Notes
|
|
38
|
+
- Ships no data-processing logic of its own beyond the CLI + introspection helpers.
|
|
39
|
+
- Published on the `golden-suite-v*` release tag via `publish-golden-suite.yml`
|
|
40
|
+
(distinct from the `goldensuite-mcp-v*` tag).
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: golden-suite
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: One-line, perf-optimized install for the entire Golden Suite — goldenmatch, goldencheck, goldenflow, goldenpipe, GoldenSchema (infermap), goldenanalysis, native acceleration on by default
|
|
5
|
+
Project-URL: Homepage, https://github.com/benseverndev-oss/goldenmatch
|
|
6
|
+
Project-URL: Repository, https://github.com/benseverndev-oss/goldenmatch
|
|
7
|
+
Project-URL: Documentation, https://github.com/benseverndev-oss/goldenmatch/tree/main/packages/python/golden-suite#readme
|
|
8
|
+
Project-URL: Issues, https://github.com/benseverndev-oss/goldenmatch/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/benseverndev-oss/goldenmatch/blob/main/packages/python/golden-suite/CHANGELOG.md
|
|
10
|
+
Author-email: Ben Severn <ben@bensevern.dev>
|
|
11
|
+
License: MIT
|
|
12
|
+
Keywords: data-quality,entity-resolution,golden-suite,meta-package,pipeline,schema-inference
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Requires-Python: <3.14,>=3.11
|
|
23
|
+
Requires-Dist: goldenanalysis-native>=0.1
|
|
24
|
+
Requires-Dist: goldenanalysis>=0.3
|
|
25
|
+
Requires-Dist: goldencheck-native>=0.1
|
|
26
|
+
Requires-Dist: goldencheck-types>=0.1
|
|
27
|
+
Requires-Dist: goldencheck>=1.4
|
|
28
|
+
Requires-Dist: goldenflow-native>=0.1.1
|
|
29
|
+
Requires-Dist: goldenflow>=1.3
|
|
30
|
+
Requires-Dist: goldenmatch-native>=0.1.12
|
|
31
|
+
Requires-Dist: goldenmatch>=2.5
|
|
32
|
+
Requires-Dist: goldenpipe[golden-suite]>=1.3
|
|
33
|
+
Requires-Dist: infermap>=0.5.1
|
|
34
|
+
Requires-Dist: rich>=13.0
|
|
35
|
+
Requires-Dist: typer>=0.12
|
|
36
|
+
Provides-Extra: agent
|
|
37
|
+
Requires-Dist: goldenpipe[agent,api,tui]>=1.3; extra == 'agent'
|
|
38
|
+
Provides-Extra: all
|
|
39
|
+
Requires-Dist: goldenpipe[agent,api,tui]>=1.3; extra == 'all'
|
|
40
|
+
Requires-Dist: goldensuite-mcp>=0.3; extra == 'all'
|
|
41
|
+
Provides-Extra: dev
|
|
42
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
43
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
44
|
+
Provides-Extra: mcp
|
|
45
|
+
Requires-Dist: goldensuite-mcp>=0.3; extra == 'mcp'
|
|
46
|
+
Description-Content-Type: text/markdown
|
|
47
|
+
|
|
48
|
+
# golden-suite
|
|
49
|
+
|
|
50
|
+
One-line, perf-optimized install and single front door for the whole **Golden Suite**.
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install golden-suite # whole suite + native acceleration, defaulted to the fast config
|
|
54
|
+
golden-suite doctor # verify native is actually active
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
This is a thin meta-package. It pulls in every suite tool **plus the native (Rust)
|
|
58
|
+
acceleration kernels, on by default**, and gives you (and your agents) one canonical entry
|
|
59
|
+
point. It ships no data-processing logic of its own — just a `doctor`/`optimize` CLI and
|
|
60
|
+
introspection helpers.
|
|
61
|
+
|
|
62
|
+
## What you get
|
|
63
|
+
|
|
64
|
+
| Tool | Does | Import |
|
|
65
|
+
| --- | --- | --- |
|
|
66
|
+
| **GoldenPipe** | Orchestrator — chains the tools as pluggable stages. **Start here.** | `import goldenpipe as gp` |
|
|
67
|
+
| **GoldenMatch** | Entity resolution: dedupe, match, golden records | `import goldenmatch as gm` |
|
|
68
|
+
| **GoldenCheck** | Data validation (rules discovered from your data) | `import goldencheck` |
|
|
69
|
+
| **GoldenFlow** | Transform / standardize / normalize | `import goldenflow` |
|
|
70
|
+
| **GoldenSchema** | Inference-driven schema mapping (import name: `infermap`) | `import infermap` |
|
|
71
|
+
| **GoldenAnalysis** | Read-only metrics + reporting | `import goldenanalysis` |
|
|
72
|
+
|
|
73
|
+
`goldenpipe` is the front door: it adapts every other tool as a stage, so most integrations
|
|
74
|
+
only ever touch `goldenpipe`. `goldenmatch` is a leaf (entity resolution only), not the root.
|
|
75
|
+
|
|
76
|
+
## Install options
|
|
77
|
+
|
|
78
|
+
Native acceleration is included by default (a hard dependency, not an extra).
|
|
79
|
+
|
|
80
|
+
| You want | Install |
|
|
81
|
+
| --- | --- |
|
|
82
|
+
| The whole suite + native | `pip install golden-suite` |
|
|
83
|
+
| Suite + one MCP server | `pip install "golden-suite[mcp]"` |
|
|
84
|
+
| Everything (suite + mcp + serving) | `pip install "golden-suite[all]"` |
|
|
85
|
+
|
|
86
|
+
Native wheels cover Linux x86_64/aarch64, macOS x86_64/arm64, Windows amd64. On an
|
|
87
|
+
unsupported platform the install fails loudly rather than silently degrading — install the
|
|
88
|
+
individual pure-Python packages directly there.
|
|
89
|
+
|
|
90
|
+
## Quick start
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
import goldenpipe as gp
|
|
94
|
+
|
|
95
|
+
result = gp.run("customers.csv") # validate -> transform -> match, one call
|
|
96
|
+
print(result.status, result.match, result.reasoning)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Verify + repair the perf setup:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
golden-suite doctor # every component + whether native is ACTIVE (non-zero exit if silently slow)
|
|
103
|
+
golden-suite optimize # install any missing native kernels, then re-verify
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from golden_suite import installed, native_status
|
|
108
|
+
print(installed()) # {"goldenpipe": "1.2.1", "goldenmatch": "1.30.0", ...}
|
|
109
|
+
print(native_status()) # per-package native_active / silently_slow / env_mode
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## For agents
|
|
113
|
+
|
|
114
|
+
See [`AGENTS.md`](./AGENTS.md) and [`llms.txt`](./llms.txt) — the canonical integration guide,
|
|
115
|
+
including the anti-patterns that cause most of the "wrong setup" back-and-forth.
|
|
116
|
+
|
|
117
|
+
## License
|
|
118
|
+
|
|
119
|
+
MIT. Part of the [Golden Suite monorepo](https://github.com/benseverndev-oss/goldenmatch).
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# golden-suite
|
|
2
|
+
|
|
3
|
+
One-line, perf-optimized install and single front door for the whole **Golden Suite**.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install golden-suite # whole suite + native acceleration, defaulted to the fast config
|
|
7
|
+
golden-suite doctor # verify native is actually active
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
This is a thin meta-package. It pulls in every suite tool **plus the native (Rust)
|
|
11
|
+
acceleration kernels, on by default**, and gives you (and your agents) one canonical entry
|
|
12
|
+
point. It ships no data-processing logic of its own — just a `doctor`/`optimize` CLI and
|
|
13
|
+
introspection helpers.
|
|
14
|
+
|
|
15
|
+
## What you get
|
|
16
|
+
|
|
17
|
+
| Tool | Does | Import |
|
|
18
|
+
| --- | --- | --- |
|
|
19
|
+
| **GoldenPipe** | Orchestrator — chains the tools as pluggable stages. **Start here.** | `import goldenpipe as gp` |
|
|
20
|
+
| **GoldenMatch** | Entity resolution: dedupe, match, golden records | `import goldenmatch as gm` |
|
|
21
|
+
| **GoldenCheck** | Data validation (rules discovered from your data) | `import goldencheck` |
|
|
22
|
+
| **GoldenFlow** | Transform / standardize / normalize | `import goldenflow` |
|
|
23
|
+
| **GoldenSchema** | Inference-driven schema mapping (import name: `infermap`) | `import infermap` |
|
|
24
|
+
| **GoldenAnalysis** | Read-only metrics + reporting | `import goldenanalysis` |
|
|
25
|
+
|
|
26
|
+
`goldenpipe` is the front door: it adapts every other tool as a stage, so most integrations
|
|
27
|
+
only ever touch `goldenpipe`. `goldenmatch` is a leaf (entity resolution only), not the root.
|
|
28
|
+
|
|
29
|
+
## Install options
|
|
30
|
+
|
|
31
|
+
Native acceleration is included by default (a hard dependency, not an extra).
|
|
32
|
+
|
|
33
|
+
| You want | Install |
|
|
34
|
+
| --- | --- |
|
|
35
|
+
| The whole suite + native | `pip install golden-suite` |
|
|
36
|
+
| Suite + one MCP server | `pip install "golden-suite[mcp]"` |
|
|
37
|
+
| Everything (suite + mcp + serving) | `pip install "golden-suite[all]"` |
|
|
38
|
+
|
|
39
|
+
Native wheels cover Linux x86_64/aarch64, macOS x86_64/arm64, Windows amd64. On an
|
|
40
|
+
unsupported platform the install fails loudly rather than silently degrading — install the
|
|
41
|
+
individual pure-Python packages directly there.
|
|
42
|
+
|
|
43
|
+
## Quick start
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
import goldenpipe as gp
|
|
47
|
+
|
|
48
|
+
result = gp.run("customers.csv") # validate -> transform -> match, one call
|
|
49
|
+
print(result.status, result.match, result.reasoning)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Verify + repair the perf setup:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
golden-suite doctor # every component + whether native is ACTIVE (non-zero exit if silently slow)
|
|
56
|
+
golden-suite optimize # install any missing native kernels, then re-verify
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from golden_suite import installed, native_status
|
|
61
|
+
print(installed()) # {"goldenpipe": "1.2.1", "goldenmatch": "1.30.0", ...}
|
|
62
|
+
print(native_status()) # per-package native_active / silently_slow / env_mode
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## For agents
|
|
66
|
+
|
|
67
|
+
See [`AGENTS.md`](./AGENTS.md) and [`llms.txt`](./llms.txt) — the canonical integration guide,
|
|
68
|
+
including the anti-patterns that cause most of the "wrong setup" back-and-forth.
|
|
69
|
+
|
|
70
|
+
## License
|
|
71
|
+
|
|
72
|
+
MIT. Part of the [Golden Suite monorepo](https://github.com/benseverndev-oss/goldenmatch).
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Golden Suite meta-package.
|
|
2
|
+
|
|
3
|
+
One-line, perf-optimized install and a single canonical entry point for the whole
|
|
4
|
+
suite. Ships almost no logic of its own — just introspection helpers and a
|
|
5
|
+
``golden-suite`` CLI (``doctor`` / ``optimize``). For real work, import the
|
|
6
|
+
individual tools (or, most of the time, just ``goldenpipe`` — the orchestrator
|
|
7
|
+
that adapts every other tool as a stage).
|
|
8
|
+
|
|
9
|
+
import goldenpipe as gp
|
|
10
|
+
result = gp.run("customers.csv") # runs check -> transform -> match end to end
|
|
11
|
+
|
|
12
|
+
Native acceleration is installed by default. Use :func:`installed` to see which
|
|
13
|
+
components resolved, and :func:`native_status` to see whether each native kernel
|
|
14
|
+
is actually active (the truth behind "am I on the fast path").
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import importlib
|
|
20
|
+
import os
|
|
21
|
+
from importlib import metadata
|
|
22
|
+
|
|
23
|
+
__version__ = "0.1.0"
|
|
24
|
+
|
|
25
|
+
# PyPI distribution name -> import module name. Keep in lockstep with pyproject deps.
|
|
26
|
+
_COMPONENTS: dict[str, str] = {
|
|
27
|
+
"goldenpipe": "goldenpipe",
|
|
28
|
+
"goldenmatch": "goldenmatch",
|
|
29
|
+
"goldencheck": "goldencheck",
|
|
30
|
+
"goldenflow": "goldenflow",
|
|
31
|
+
"infermap": "infermap", # GoldenSchema
|
|
32
|
+
"goldenanalysis": "goldenanalysis",
|
|
33
|
+
"goldencheck-types": "goldencheck_types",
|
|
34
|
+
"goldensuite-mcp": "goldensuite_mcp",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# Packages that ship an optional native (Rust/abi3) accelerator, and the pieces
|
|
38
|
+
# needed to reason about it WITHOUT importing the heavy top-level package:
|
|
39
|
+
# base package -> (native distribution, standalone native import module, env var)
|
|
40
|
+
# The runtime loader tries ``<pkg>._native`` (in-tree build) then
|
|
41
|
+
# ``<pkg>_native._native`` (the published wheel). For a pip user only the wheel
|
|
42
|
+
# path exists, so probing the standalone ``<pkg>_native`` module is both accurate
|
|
43
|
+
# and lightweight (it does not pull in polars et al.).
|
|
44
|
+
_NATIVE: dict[str, tuple[str, str, str]] = {
|
|
45
|
+
"goldenmatch": ("goldenmatch-native", "goldenmatch_native", "GOLDENMATCH_NATIVE"),
|
|
46
|
+
"goldencheck": ("goldencheck-native", "goldencheck_native", "GOLDENCHECK_NATIVE"),
|
|
47
|
+
"goldenflow": ("goldenflow-native", "goldenflow_native", "GOLDENFLOW_NATIVE"),
|
|
48
|
+
"goldenanalysis": (
|
|
49
|
+
"goldenanalysis-native",
|
|
50
|
+
"goldenanalysis_native",
|
|
51
|
+
"GOLDENANALYSIS_NATIVE",
|
|
52
|
+
),
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _version_or_none(dist: str) -> str | None:
|
|
57
|
+
try:
|
|
58
|
+
return metadata.version(dist)
|
|
59
|
+
except metadata.PackageNotFoundError:
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def installed() -> dict[str, str | None]:
|
|
64
|
+
"""Return ``{distribution_name: version-or-None}`` for every suite component.
|
|
65
|
+
|
|
66
|
+
``None`` means the component is not installed in this environment. The fastest
|
|
67
|
+
way to confirm an integration actually got the intended setup.
|
|
68
|
+
"""
|
|
69
|
+
return {dist: _version_or_none(dist) for dist in _COMPONENTS}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _native_importable(native_module: str) -> bool:
|
|
73
|
+
"""Whether the standalone native wheel (e.g. ``goldenmatch_native._native``)
|
|
74
|
+
imports cleanly — the same path the runtime loader uses under a pip install."""
|
|
75
|
+
try:
|
|
76
|
+
mod = importlib.import_module(native_module)
|
|
77
|
+
except Exception: # noqa: BLE001 - any import/load failure => not available
|
|
78
|
+
return False
|
|
79
|
+
if getattr(mod, "_native", None) is not None:
|
|
80
|
+
return True
|
|
81
|
+
try: # some builds expose ``_native`` only as a submodule
|
|
82
|
+
importlib.import_module(f"{native_module}._native")
|
|
83
|
+
return True
|
|
84
|
+
except Exception: # noqa: BLE001
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def native_status() -> dict[str, dict[str, object]]:
|
|
89
|
+
"""Per-package native-acceleration status.
|
|
90
|
+
|
|
91
|
+
For each accel-capable package returns a dict with:
|
|
92
|
+
- ``base_installed`` : the base package version, or ``None``
|
|
93
|
+
- ``native_version`` : the native wheel version, or ``None``
|
|
94
|
+
- ``native_active`` : whether the native kernel imports (fast path live)
|
|
95
|
+
- ``env_mode`` : the ``<PKG>_NATIVE`` env value (``auto`` if unset)
|
|
96
|
+
- ``silently_slow`` : base installed but native NOT active AND env != "0"
|
|
97
|
+
— i.e. the runtime is silently on the pure-Python path
|
|
98
|
+
"""
|
|
99
|
+
out: dict[str, dict[str, object]] = {}
|
|
100
|
+
for pkg, (native_dist, native_module, env_var) in _NATIVE.items():
|
|
101
|
+
base_installed = _version_or_none(pkg)
|
|
102
|
+
native_version = _version_or_none(native_dist)
|
|
103
|
+
native_active = _native_importable(native_module)
|
|
104
|
+
env_mode = os.environ.get(env_var, "auto").lower()
|
|
105
|
+
silently_slow = (
|
|
106
|
+
base_installed is not None and not native_active and env_mode != "0"
|
|
107
|
+
)
|
|
108
|
+
out[pkg] = {
|
|
109
|
+
"base_installed": base_installed,
|
|
110
|
+
"native_dist": native_dist,
|
|
111
|
+
"native_version": native_version,
|
|
112
|
+
"native_active": native_active,
|
|
113
|
+
"env_var": env_var,
|
|
114
|
+
"env_mode": env_mode,
|
|
115
|
+
"silently_slow": silently_slow,
|
|
116
|
+
}
|
|
117
|
+
return out
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
__all__ = ["__version__", "installed", "native_status"]
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""``golden-suite`` CLI: verify and repair the perf-optimized setup.
|
|
2
|
+
|
|
3
|
+
golden-suite doctor # report every component + whether native is actually active
|
|
4
|
+
golden-suite optimize # install any missing native kernels, then re-verify
|
|
5
|
+
|
|
6
|
+
``doctor`` is read-only and exits non-zero when the runtime would silently run the
|
|
7
|
+
slow pure-Python path — so it doubles as a CI/verification gate. ``optimize`` acts.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json as _json
|
|
13
|
+
import subprocess
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
import typer
|
|
17
|
+
from rich.console import Console
|
|
18
|
+
from rich.table import Table
|
|
19
|
+
|
|
20
|
+
from . import __version__, installed, native_status
|
|
21
|
+
|
|
22
|
+
app = typer.Typer(
|
|
23
|
+
add_completion=False,
|
|
24
|
+
help="Verify and repair the perf-optimized Golden Suite setup.",
|
|
25
|
+
no_args_is_help=True,
|
|
26
|
+
)
|
|
27
|
+
_console = Console()
|
|
28
|
+
_err = Console(stderr=True)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _components_table() -> Table:
|
|
32
|
+
table = Table(title="Golden Suite components", title_style="bold")
|
|
33
|
+
table.add_column("Package")
|
|
34
|
+
table.add_column("Version")
|
|
35
|
+
for dist, version in installed().items():
|
|
36
|
+
if version is None:
|
|
37
|
+
table.add_row(dist, "[yellow]not installed[/]")
|
|
38
|
+
else:
|
|
39
|
+
table.add_row(dist, version)
|
|
40
|
+
return table
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _native_table(status: dict[str, dict[str, object]]) -> Table:
|
|
44
|
+
table = Table(title="Native acceleration", title_style="bold")
|
|
45
|
+
table.add_column("Package")
|
|
46
|
+
table.add_column("Native wheel")
|
|
47
|
+
table.add_column("Fast path")
|
|
48
|
+
table.add_column("Env")
|
|
49
|
+
table.add_column("Verdict")
|
|
50
|
+
for pkg, s in status.items():
|
|
51
|
+
if s["base_installed"] is None:
|
|
52
|
+
continue # base package not installed; nothing to accelerate
|
|
53
|
+
wheel = s["native_version"] or "[yellow]missing[/]"
|
|
54
|
+
if s["native_active"]:
|
|
55
|
+
fast = "[green]native[/]"
|
|
56
|
+
else:
|
|
57
|
+
fast = "[red]pure-python[/]"
|
|
58
|
+
if s["env_mode"] == "0":
|
|
59
|
+
verdict = "[yellow]native disabled (env=0)[/]"
|
|
60
|
+
elif s["silently_slow"]:
|
|
61
|
+
verdict = "[red]SILENTLY SLOW[/]"
|
|
62
|
+
elif s["native_active"]:
|
|
63
|
+
verdict = "[green]OK[/]"
|
|
64
|
+
else:
|
|
65
|
+
verdict = "[yellow]inactive[/]"
|
|
66
|
+
table.add_row(pkg, str(wheel), fast, str(s["env_mode"]), verdict)
|
|
67
|
+
return table
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@app.command()
|
|
71
|
+
def doctor(
|
|
72
|
+
as_json: bool = typer.Option(False, "--json", help="Emit machine-readable JSON."),
|
|
73
|
+
strict: bool = typer.Option(
|
|
74
|
+
True,
|
|
75
|
+
"--strict/--no-strict",
|
|
76
|
+
help="Exit non-zero if any package is silently on the pure-Python path.",
|
|
77
|
+
),
|
|
78
|
+
) -> None:
|
|
79
|
+
"""Report every component and whether the native fast path is actually active."""
|
|
80
|
+
status = native_status()
|
|
81
|
+
silently_slow = [p for p, s in status.items() if s["silently_slow"]]
|
|
82
|
+
missing_base = [d for d, v in installed().items() if v is None]
|
|
83
|
+
|
|
84
|
+
if as_json:
|
|
85
|
+
_console.print_json(
|
|
86
|
+
_json.dumps(
|
|
87
|
+
{
|
|
88
|
+
"version": __version__,
|
|
89
|
+
"installed": installed(),
|
|
90
|
+
"native": status,
|
|
91
|
+
"silently_slow": silently_slow,
|
|
92
|
+
"missing_components": missing_base,
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
_console.print(_components_table())
|
|
98
|
+
_console.print(_native_table(status))
|
|
99
|
+
if silently_slow:
|
|
100
|
+
_err.print(
|
|
101
|
+
f"\n[red]FAIL[/]: {', '.join(silently_slow)} installed but running "
|
|
102
|
+
f"pure-Python. Run [bold]golden-suite optimize[/] to fix."
|
|
103
|
+
)
|
|
104
|
+
else:
|
|
105
|
+
_console.print("\n[green]OK[/]: native acceleration active where expected.")
|
|
106
|
+
|
|
107
|
+
if strict and silently_slow:
|
|
108
|
+
raise typer.Exit(code=1)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@app.command()
|
|
112
|
+
def optimize(
|
|
113
|
+
strict_runtime: bool = typer.Option(
|
|
114
|
+
False,
|
|
115
|
+
"--strict/--no-strict",
|
|
116
|
+
help=(
|
|
117
|
+
"Also emit require-native env vars (<PKG>_NATIVE=1). WARNING: strict "
|
|
118
|
+
"mode forces native for components NOT yet parity-signed-off (notably "
|
|
119
|
+
"goldenflow) and can change outputs. Off by default."
|
|
120
|
+
),
|
|
121
|
+
),
|
|
122
|
+
dry_run: bool = typer.Option(
|
|
123
|
+
False, "--dry-run", help="Show what would be installed, do nothing."
|
|
124
|
+
),
|
|
125
|
+
) -> None:
|
|
126
|
+
"""Install any missing native kernels for this platform, then re-verify."""
|
|
127
|
+
status = native_status()
|
|
128
|
+
# A package needs repair when its base is installed but the native wheel is
|
|
129
|
+
# absent or won't import on this interpreter.
|
|
130
|
+
to_install = [
|
|
131
|
+
str(s["native_dist"])
|
|
132
|
+
for s in status.values()
|
|
133
|
+
if s["base_installed"] is not None and not s["native_active"]
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
if not to_install:
|
|
137
|
+
_console.print("[green]Already optimal[/]: every native kernel is active.")
|
|
138
|
+
else:
|
|
139
|
+
_console.print(
|
|
140
|
+
f"Native kernels to install: [bold]{', '.join(to_install)}[/]"
|
|
141
|
+
)
|
|
142
|
+
if dry_run:
|
|
143
|
+
_console.print("[yellow]--dry-run[/]: nothing installed.")
|
|
144
|
+
else:
|
|
145
|
+
cmd = [sys.executable, "-m", "pip", "install", "--upgrade", *to_install]
|
|
146
|
+
_console.print(f"$ {' '.join(cmd)}")
|
|
147
|
+
result = subprocess.run(cmd, check=False)
|
|
148
|
+
if result.returncode != 0:
|
|
149
|
+
_err.print(
|
|
150
|
+
"[red]pip install failed[/]. On a platform without a published "
|
|
151
|
+
"wheel, native cannot be enabled — see the docs for supported "
|
|
152
|
+
"platforms."
|
|
153
|
+
)
|
|
154
|
+
raise typer.Exit(code=result.returncode)
|
|
155
|
+
|
|
156
|
+
if strict_runtime:
|
|
157
|
+
_console.print(
|
|
158
|
+
"\n[bold]Require-native env[/] (add to your shell/.env to make a missing "
|
|
159
|
+
"kernel raise instead of silently falling back):"
|
|
160
|
+
)
|
|
161
|
+
for s in status.values():
|
|
162
|
+
if s["base_installed"] is not None:
|
|
163
|
+
_console.print(f" export {s['env_var']}=1")
|
|
164
|
+
_err.print(
|
|
165
|
+
"\n[yellow]WARNING[/]: <PKG>_NATIVE=1 forces native for components not "
|
|
166
|
+
"yet parity-signed-off (e.g. goldenflow) and MAY change outputs. Only "
|
|
167
|
+
"use it if you have validated parity for your workload."
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
if not dry_run:
|
|
171
|
+
_console.print()
|
|
172
|
+
doctor(as_json=False, strict=False)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@app.command()
|
|
176
|
+
def version() -> None:
|
|
177
|
+
"""Print the golden-suite meta-package version."""
|
|
178
|
+
_console.print(__version__)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
if __name__ == "__main__": # pragma: no cover
|
|
182
|
+
app()
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Golden Suite
|
|
2
|
+
|
|
3
|
+
> One-line, perf-optimized install and single front door for the whole Golden Suite: entity
|
|
4
|
+
> resolution (GoldenMatch), data validation (GoldenCheck), transformation (GoldenFlow),
|
|
5
|
+
> schema mapping (GoldenSchema/infermap), read-only analysis (GoldenAnalysis), all
|
|
6
|
+
> orchestrated by GoldenPipe. Native (Rust) acceleration is included and on by default;
|
|
7
|
+
> the suite never silently runs the slow pure-Python path. Ships a tiny `golden-suite` CLI
|
|
8
|
+
> (doctor/optimize) plus introspection helpers.
|
|
9
|
+
|
|
10
|
+
## Mental model
|
|
11
|
+
- `goldenpipe` is the orchestrator and the front door. It adapts every other tool as a
|
|
12
|
+
pluggable stage. Most integrations only touch `goldenpipe`.
|
|
13
|
+
- `goldenmatch` is a LEAF (entity resolution), not the root. Do not expect the pipeline,
|
|
14
|
+
validation, or transforms from it.
|
|
15
|
+
- `goldensuite-mcp` is one MCP server exposing every tool — the agent front door.
|
|
16
|
+
|
|
17
|
+
## Components
|
|
18
|
+
- GoldenPipe (`goldenpipe`) — orchestrator; `import goldenpipe as gp`
|
|
19
|
+
- GoldenMatch (`goldenmatch`) — dedupe, match, golden records; `import goldenmatch as gm`
|
|
20
|
+
- GoldenCheck (`goldencheck`) — validation, rules discovered from data; `import goldencheck`
|
|
21
|
+
- GoldenFlow (`goldenflow`) — transform / standardize / normalize; `import goldenflow`
|
|
22
|
+
- GoldenSchema (`infermap`) — inference-driven schema mapping; `import infermap`
|
|
23
|
+
- GoldenAnalysis (`goldenanalysis`) — read-only metrics + reporting; `import goldenanalysis`
|
|
24
|
+
- `goldencheck-types` — shared field-type contracts (transitive)
|
|
25
|
+
- `goldensuite-mcp` — one MCP server, every tool
|
|
26
|
+
|
|
27
|
+
## Install
|
|
28
|
+
- `pip install golden-suite` — whole suite + native acceleration, perf-optimized (Python)
|
|
29
|
+
- `pip install "golden-suite[mcp]"` — suite + one MCP server (`goldensuite-mcp`)
|
|
30
|
+
- `pip install "golden-suite[all]"` — suite + mcp + serving surfaces
|
|
31
|
+
- Single tool instead: `pip install goldenmatch` / `goldencheck` / `goldenflow` / `infermap`
|
|
32
|
+
- Orchestrator + core three: `pip install "goldenpipe[golden-suite]"`
|
|
33
|
+
- Native wheels cover Linux x86_64/aarch64, macOS x86_64/arm64, Windows amd64. On an
|
|
34
|
+
unsupported platform the install fails loudly (no silent pure-Python) — install the
|
|
35
|
+
individual pure-Python packages directly there.
|
|
36
|
+
|
|
37
|
+
## Verify + repair the setup
|
|
38
|
+
- `golden-suite doctor` — every component + whether native is ACTIVE; non-zero exit if silently slow (CI-safe)
|
|
39
|
+
- `golden-suite optimize` — install missing native kernels for this platform, then re-verify
|
|
40
|
+
```python
|
|
41
|
+
from golden_suite import installed, native_status
|
|
42
|
+
print(installed()) # dist -> version|None for every suite component
|
|
43
|
+
print(native_status()) # per-package native_active / silently_slow / env_mode
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Quick examples
|
|
47
|
+
|
|
48
|
+
### Full pipeline (validate -> transform -> match), one call
|
|
49
|
+
```python
|
|
50
|
+
import goldenpipe as gp
|
|
51
|
+
result = gp.run("customers.csv")
|
|
52
|
+
print(result.status, result.check, result.transform, result.match, result.reasoning)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Deduplicate
|
|
56
|
+
```python
|
|
57
|
+
import goldenmatch as gm
|
|
58
|
+
result = gm.dedupe("customers.csv", exact=["email"], fuzzy={"name": 0.85}, blocking=["zip"])
|
|
59
|
+
result.golden.write_csv("deduped.csv")
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Match two sources
|
|
63
|
+
```python
|
|
64
|
+
result = gm.match("crm.csv", "billing.csv", fuzzy={"name": 0.85, "address": 0.80})
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Validate
|
|
68
|
+
```python
|
|
69
|
+
import goldencheck
|
|
70
|
+
report = goldencheck.scan("customers.csv")
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Map an unknown schema
|
|
74
|
+
```python
|
|
75
|
+
import infermap # GoldenSchema
|
|
76
|
+
mapping = infermap.infer("raw_export.csv")
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### One MCP server for everything
|
|
80
|
+
```bash
|
|
81
|
+
pip install "golden-suite[mcp]" && goldensuite-mcp
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Anti-patterns
|
|
85
|
+
- Installing `goldenmatch` and expecting the pipeline/check/transform — use `goldenpipe`.
|
|
86
|
+
- Hand-wiring each tool — `goldenpipe` already registers them as stages.
|
|
87
|
+
- Running six MCP servers — use one `goldensuite-mcp`.
|
|
88
|
+
- `import goldenschema` — the import name is `infermap`.
|
|
89
|
+
- Setting `<PKG>_NATIVE=1` "to enable native" — it's already on by default; `=1` force-runs
|
|
90
|
+
non-signed-off components (goldenflow) and can change outputs. Use `optimize --strict` only
|
|
91
|
+
after validating parity.
|
|
92
|
+
|
|
93
|
+
## Repo
|
|
94
|
+
- Monorepo: benseverndev-oss/goldenmatch (packages/python/<pkg>). Python 3.11-3.13, Polars-backed.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "golden-suite"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "One-line, perf-optimized install for the entire Golden Suite — goldenmatch, goldencheck, goldenflow, goldenpipe, GoldenSchema (infermap), goldenanalysis, native acceleration on by default"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11,<3.14"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Ben Severn", email = "ben@bensevern.dev" }]
|
|
13
|
+
keywords = ["data-quality", "entity-resolution", "pipeline", "schema-inference", "golden-suite", "meta-package"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
# `pip install golden-suite` gets the WHOLE suite AND the native acceleration
|
|
27
|
+
# kernels, in one line, defaulting to the perf-optimized configuration.
|
|
28
|
+
#
|
|
29
|
+
# `goldenpipe` is the orchestrator (adapts every tool as a stage). The native
|
|
30
|
+
# wheels are HARD dependencies on purpose: the suite should never silently run
|
|
31
|
+
# the slow pure-Python path. abi3 wheels are published for the five mainstream
|
|
32
|
+
# platforms (Linux x86_64/aarch64, macOS x86_64/arm64, Windows amd64); on any
|
|
33
|
+
# platform without a wheel the install fails LOUDLY rather than degrading.
|
|
34
|
+
# Under each package's default `auto` gate, the parity-signed-off hot paths then
|
|
35
|
+
# run native automatically — no env vars to set. `golden-suite doctor` verifies
|
|
36
|
+
# it; `golden-suite optimize` repairs it.
|
|
37
|
+
dependencies = [
|
|
38
|
+
# --- suite (floors track the current majors) ---
|
|
39
|
+
"goldenpipe[golden-suite]>=1.3", # orchestrator + check/flow/match/analysis
|
|
40
|
+
"goldenmatch>=2.5", # entity resolution: dedupe, match, golden records (2.7.0 is latest on PyPI as of this floor bump)
|
|
41
|
+
"goldencheck>=1.4", # data validation (rules discovered from your data)
|
|
42
|
+
"goldenflow>=1.3", # transform / standardize / normalize
|
|
43
|
+
"infermap>=0.5.1", # GoldenSchema: inference-driven schema mapping
|
|
44
|
+
"goldenanalysis>=0.3", # read-only cross-cutting metrics + reporting
|
|
45
|
+
"goldencheck-types>=0.1", # shared canonical field-type contracts
|
|
46
|
+
# --- native acceleration (on by default; see note above) ---
|
|
47
|
+
"goldenmatch-native>=0.1.12", # >=0.1.12 carries perceptual + the current kernel surface
|
|
48
|
+
"goldencheck-native>=0.1",
|
|
49
|
+
"goldenflow-native>=0.1.1",
|
|
50
|
+
"goldenanalysis-native>=0.1",
|
|
51
|
+
# --- CLI (doctor / optimize) ---
|
|
52
|
+
"typer>=0.12",
|
|
53
|
+
"rich>=13.0",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
[project.optional-dependencies]
|
|
57
|
+
# One MCP server exposing every tool — the agent front door.
|
|
58
|
+
mcp = ["goldensuite-mcp>=0.3"]
|
|
59
|
+
# GoldenPipe serving surfaces (A2A + async transports + TUI + REST).
|
|
60
|
+
agent = ["goldenpipe[tui,api,agent]>=1.3"]
|
|
61
|
+
# Everything: full suite + native + MCP + serving.
|
|
62
|
+
all = ["golden-suite[mcp,agent]"]
|
|
63
|
+
dev = ["pytest>=8", "ruff>=0.6"]
|
|
64
|
+
|
|
65
|
+
[project.scripts]
|
|
66
|
+
golden-suite = "golden_suite.cli:app"
|
|
67
|
+
|
|
68
|
+
[project.urls]
|
|
69
|
+
Homepage = "https://github.com/benseverndev-oss/goldenmatch"
|
|
70
|
+
Repository = "https://github.com/benseverndev-oss/goldenmatch"
|
|
71
|
+
Documentation = "https://github.com/benseverndev-oss/goldenmatch/tree/main/packages/python/golden-suite#readme"
|
|
72
|
+
Issues = "https://github.com/benseverndev-oss/goldenmatch/issues"
|
|
73
|
+
Changelog = "https://github.com/benseverndev-oss/goldenmatch/blob/main/packages/python/golden-suite/CHANGELOG.md"
|
|
74
|
+
|
|
75
|
+
[tool.hatch.build.targets.wheel]
|
|
76
|
+
packages = ["golden_suite"]
|