rocky-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rocky_sdk-0.1.0/.gitignore +62 -0
- rocky_sdk-0.1.0/CLAUDE.md +92 -0
- rocky_sdk-0.1.0/PKG-INFO +103 -0
- rocky_sdk-0.1.0/README.md +79 -0
- rocky_sdk-0.1.0/pyproject.toml +59 -0
- rocky_sdk-0.1.0/src/rocky_sdk/__init__.py +48 -0
- rocky_sdk-0.1.0/src/rocky_sdk/_subprocess.py +121 -0
- rocky_sdk-0.1.0/src/rocky_sdk/client.py +1128 -0
- rocky_sdk-0.1.0/src/rocky_sdk/exceptions.py +181 -0
- rocky_sdk-0.1.0/src/rocky_sdk/py.typed +0 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types.py +1326 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/__init__.py +476 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/adapter_config_schema.py +459 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ai_contract_schema.py +59 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ai_explain_schema.py +22 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ai_schema.py +28 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ai_sync_schema.py +23 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ai_test_schema.py +28 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/apply_schema.py +42 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/archive_apply_schema.py +55 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/archive_schema.py +66 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/branch_approve_schema.py +95 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/branch_delete_schema.py +20 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/branch_list_schema.py +29 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/branch_promote_schema.py +543 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/branch_schema.py +28 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/catalog_schema.py +146 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ci_diff_schema.py +470 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ci_schema.py +89 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/column_lineage_schema.py +40 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compact_apply_schema.py +55 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compact_dedup_schema.py +129 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compact_schema.py +79 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compare_schema.py +34 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compile_schema.py +472 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compliance_schema.py +114 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/cost_schema.py +79 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/dag_run_schema.py +78 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/dag_schema.py +411 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/discover_schema.py +163 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/doctor_schema.py +41 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/drift_schema.py +28 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/estimate_schema.py +45 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/history_schema.py +84 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/hooks_list_schema.py +23 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/hooks_test_schema.py +23 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/import_dbt_schema.py +223 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/lineage_diff_schema.py +144 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/lineage_schema.py +69 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/load_schema.py +63 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/metrics_schema.py +47 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/model_history_schema.py +86 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/optimize_schema.py +48 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/plan_promote_schema.py +567 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/plan_schema.py +527 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/preview_cost_schema.py +134 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/preview_create_schema.py +83 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/preview_diff_schema.py +210 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/preview_rows_schema.py +56 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/profile_schema.py +58 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/profile_storage_schema.py +26 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/replay_schema.py +48 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/retention_status_schema.py +31 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/review_schema.py +350 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/rocky_fivetran_state_schema.py +197 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/rocky_project_schema.py +3332 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/run_schema.py +708 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/seed_schema.py +33 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/state_clear_schema_cache_schema.py +25 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/state_retention_sweep_schema.py +65 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/state_schema.py +30 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/test_adapter_schema.py +36 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/test_schema.py +103 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/trace_schema.py +55 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/validate_migration_schema.py +35 -0
- rocky_sdk-0.1.0/src/rocky_sdk/types_generated/validate_schema.py +77 -0
- rocky_sdk-0.1.0/tests/test_client.py +311 -0
- rocky_sdk-0.1.0/tests/test_types.py +72 -0
- rocky_sdk-0.1.0/uv.lock +596 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Root .gitignore — cross-cutting safety nets for the Rocky monorepo.
|
|
2
|
+
# Each subproject (engine/, integrations/dagster/, editors/vscode/, examples/playground/)
|
|
3
|
+
# has its own .gitignore tuned to its build system. Patterns here are universal.
|
|
4
|
+
|
|
5
|
+
# --- Secrets — never commit ---
|
|
6
|
+
.env
|
|
7
|
+
.env.*
|
|
8
|
+
!.env.example
|
|
9
|
+
*.pem
|
|
10
|
+
*.key
|
|
11
|
+
id_rsa*
|
|
12
|
+
secrets.*
|
|
13
|
+
credentials.*
|
|
14
|
+
|
|
15
|
+
# --- OS / editor noise ---
|
|
16
|
+
.DS_Store
|
|
17
|
+
Thumbs.db
|
|
18
|
+
*.swp
|
|
19
|
+
*.swo
|
|
20
|
+
*~
|
|
21
|
+
.idea/
|
|
22
|
+
|
|
23
|
+
# --- Rocky runtime state (any subproject) ---
|
|
24
|
+
*.redb
|
|
25
|
+
*.duckdb
|
|
26
|
+
*.duckdb.wal
|
|
27
|
+
.rocky-state*
|
|
28
|
+
|
|
29
|
+
# --- Vendored binaries (downloaded by scripts/vendor_*.sh) ---
|
|
30
|
+
vendor/
|
|
31
|
+
|
|
32
|
+
# --- Per-user Claude state (skills + shared settings.json are committed) ---
|
|
33
|
+
.claude/projects/
|
|
34
|
+
.claude/worktrees/
|
|
35
|
+
.claude/settings.local.json
|
|
36
|
+
.claude/scheduled_tasks.lock
|
|
37
|
+
|
|
38
|
+
# --- Defense-in-depth duplicates of subproject ignores ---
|
|
39
|
+
# (in case someone runs cargo / npm / uv from the wrong directory)
|
|
40
|
+
target/
|
|
41
|
+
target-linux/
|
|
42
|
+
node_modules/
|
|
43
|
+
dist/
|
|
44
|
+
out/
|
|
45
|
+
build/
|
|
46
|
+
*.vsix
|
|
47
|
+
.vscode-test/
|
|
48
|
+
.venv/
|
|
49
|
+
__pycache__/
|
|
50
|
+
*.py[cod]
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
.ruff_cache/
|
|
53
|
+
.mypy_cache/
|
|
54
|
+
|
|
55
|
+
# --- Astro build cache ---
|
|
56
|
+
docs/.astro/
|
|
57
|
+
|
|
58
|
+
# --- Logs and scratch ---
|
|
59
|
+
*.log
|
|
60
|
+
logs/
|
|
61
|
+
scratch/
|
|
62
|
+
tmp/
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# rocky-sdk
|
|
2
|
+
|
|
3
|
+
Standalone, framework-agnostic Python client for the Rocky SQL transformation
|
|
4
|
+
engine. `RockyClient` wraps the `rocky` CLI binary (subprocess + `--output json`)
|
|
5
|
+
behind typed methods and raises a `RockyError` hierarchy. For notebooks, scripts,
|
|
6
|
+
and orchestrators. The `dagster-rocky` integration is a thin Dagster adapter over
|
|
7
|
+
this client.
|
|
8
|
+
|
|
9
|
+
**Positioning:** the SDK is a typed, ergonomic client over the CLI for *human
|
|
10
|
+
Python callers*. It complements — does not replace — `rocky mcp` (the surface for
|
|
11
|
+
AI agents) and `rocky serve` (the language-agnostic HTTP surface).
|
|
12
|
+
|
|
13
|
+
## Architecture
|
|
14
|
+
|
|
15
|
+
Three layers, each independently usable:
|
|
16
|
+
|
|
17
|
+
1. **Types** (`types.py` + `types_generated/`) — Pydantic models for every Rocky
|
|
18
|
+
CLI command's JSON output. The *generated* models in `types_generated/` are
|
|
19
|
+
autogenerated from `../../schemas/*.schema.json` via `datamodel-code-generator`
|
|
20
|
+
(run `just codegen-sdk` from the monorepo root). `types.py` re-exports them
|
|
21
|
+
under both generated and legacy names and defines `parse_rocky_output()`, a
|
|
22
|
+
table-driven dispatch that auto-detects the command and returns the matching
|
|
23
|
+
model. **Do not hand-edit `types_generated/`** — it's clobbered on regen.
|
|
24
|
+
2. **Client** (`client.py`) — `RockyClient`. Binary resolution, lazy version gate
|
|
25
|
+
(`MIN_ROCKY_VERSION`), argv builders, the subprocess core (`run_cli`), the
|
|
26
|
+
`rocky serve` HTTP fallback, the governance pre-flight, and one typed method
|
|
27
|
+
per CLI command. `mirror_stderr` and `logger` are constructor settings so an
|
|
28
|
+
adapter (dagster-rocky) can preserve its own stderr-capture + logger provenance.
|
|
29
|
+
3. **Exceptions** (`exceptions.py`) — `RockyError` base + subclasses
|
|
30
|
+
(`RockyBinaryNotFoundError`, `RockyVersionError`, `RockyTimeoutError`,
|
|
31
|
+
`RockyCommandError`, `RockyPartialFailure`, `RockyOutputParseError`,
|
|
32
|
+
`RockyServerError`, `RockyGovernanceError`). Each carries structured fields so
|
|
33
|
+
a caller can rebuild a rich error (the dagster adapter rebuilds `dagster.Failure`).
|
|
34
|
+
|
|
35
|
+
Subprocess plumbing (`_subprocess.py`): single reader thread per pipe plus an
|
|
36
|
+
external watchdog that kills the process group on timeout — never
|
|
37
|
+
`communicate(timeout=)`, which races with a concurrent pipe reader. This pattern
|
|
38
|
+
is load-bearing; preserve it.
|
|
39
|
+
|
|
40
|
+
## Project structure
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
src/rocky_sdk/
|
|
44
|
+
├── __init__.py # Public API: RockyClient, exceptions, all result types
|
|
45
|
+
├── client.py # RockyClient + module-level parse/governance helpers
|
|
46
|
+
├── exceptions.py # RockyError hierarchy
|
|
47
|
+
├── _subprocess.py # reader threads, watchdog, kill, redact (framework-agnostic)
|
|
48
|
+
├── types.py # parse_rocky_output + dispatch + hand-written models
|
|
49
|
+
└── types_generated/ # Autogenerated from ../../schemas/*.schema.json (do not edit)
|
|
50
|
+
tests/
|
|
51
|
+
├── test_client.py # argv / version / subprocess / governance / HTTP (mocked binary)
|
|
52
|
+
└── test_types.py # parse_rocky_output dispatch + parse-helper error paths
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Coding standards
|
|
56
|
+
|
|
57
|
+
- Python 3.11+, `from __future__ import annotations` in all modules.
|
|
58
|
+
- Pydantic `BaseModel` for data structures. Line length 100.
|
|
59
|
+
- Ruff rules: E, F, I, N, UP, B, SIM. `types_generated/` is excluded from lint/format.
|
|
60
|
+
- Runtime dependency: `pydantic>=2.0` only — keep the SDK free of heavy deps.
|
|
61
|
+
|
|
62
|
+
## Common commands
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
uv sync --dev
|
|
66
|
+
uv run pytest -v # all tests (no binary or creds needed)
|
|
67
|
+
uv run ruff check src/ tests/
|
|
68
|
+
uv run ruff format --check src/ tests/ # CI gate
|
|
69
|
+
uv build # wheel + sdist
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Adding support for a new Rocky CLI command
|
|
73
|
+
|
|
74
|
+
1. Add the typed `*Output` struct in `engine/crates/rocky-cli/src/output.rs` and
|
|
75
|
+
register it in `commands/export_schemas.rs::schemas()`.
|
|
76
|
+
2. From the monorepo root, run `just codegen-sdk` (or `just codegen`).
|
|
77
|
+
3. Re-export the new type from `types.py` (bridge section) if needed and add a
|
|
78
|
+
`parse_rocky_output()` dispatch entry.
|
|
79
|
+
4. Add a typed method to `RockyClient` and a delegating method to
|
|
80
|
+
`RockyResource` (dagster-rocky), translating any new error shape.
|
|
81
|
+
|
|
82
|
+
## Release
|
|
83
|
+
|
|
84
|
+
Tag-namespaced: `sdk-v*` → `rocky-sdk` wheel on PyPI (`sdk-release.yml`).
|
|
85
|
+
`just release-sdk <version> [--publish]`. **Release the SDK before any
|
|
86
|
+
`dagster-rocky` release that raises its `rocky-sdk>=…` floor** — the published
|
|
87
|
+
dagster wheel resolves the SDK from PyPI, not the monorepo path source.
|
|
88
|
+
|
|
89
|
+
## Git conventions
|
|
90
|
+
|
|
91
|
+
- **Never** include `Co-Authored-By` trailers. Conventional commits, scoped
|
|
92
|
+
`feat(sdk): …` / `fix(sdk): …`.
|
rocky_sdk-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rocky-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Typed Python client for the Rocky SQL transformation engine
|
|
5
|
+
Project-URL: Homepage, https://rocky-data.dev/
|
|
6
|
+
Project-URL: Repository, https://github.com/rocky-data/rocky
|
|
7
|
+
Project-URL: Documentation, https://rocky-data.dev/
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/rocky-data/rocky/issues
|
|
9
|
+
Author-email: Hugo Correia <hello@rocky-data.dev>
|
|
10
|
+
License-Expression: Apache-2.0
|
|
11
|
+
Keywords: client,data-engineering,rocky,sdk,sql,transformation
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Requires-Dist: pydantic>=2.0
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# rocky-sdk
|
|
26
|
+
|
|
27
|
+
A typed Python client for the [Rocky](https://rocky-data.dev/) SQL transformation engine.
|
|
28
|
+
|
|
29
|
+
`rocky-sdk` wraps the `rocky` CLI binary (subprocess + `--output json`) behind a
|
|
30
|
+
typed `RockyClient`. Each method builds the right argv, runs the binary, parses
|
|
31
|
+
the JSON output, and returns a Pydantic model. Failures surface as `RockyError`
|
|
32
|
+
subclasses carrying structured fields (exit code, stderr tail, version strings)
|
|
33
|
+
rather than opaque messages.
|
|
34
|
+
|
|
35
|
+
It is for **human Python callers** — notebooks, scripts, and orchestrators. The
|
|
36
|
+
[`dagster-rocky`](https://pypi.org/project/dagster-rocky/) integration is a thin
|
|
37
|
+
Dagster adapter built on this client. For AI agents, use `rocky mcp`; for a
|
|
38
|
+
language-agnostic HTTP surface, use `rocky serve`.
|
|
39
|
+
|
|
40
|
+
## Install
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install rocky-sdk
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
The `rocky` binary is not bundled — install it separately and put it on `$PATH`
|
|
47
|
+
(or pass `binary_path=`). See the
|
|
48
|
+
[releases page](https://github.com/rocky-data/rocky/releases). The SDK requires
|
|
49
|
+
engine **v1.34.0 or newer**.
|
|
50
|
+
|
|
51
|
+
## Usage
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from rocky_sdk import RockyClient
|
|
55
|
+
|
|
56
|
+
client = RockyClient(config_path="rocky.toml")
|
|
57
|
+
|
|
58
|
+
# Read-only inspection — all return typed Pydantic models
|
|
59
|
+
result = client.compile()
|
|
60
|
+
for diag in result.diagnostics:
|
|
61
|
+
print(diag.severity, diag.message)
|
|
62
|
+
|
|
63
|
+
lineage = client.lineage("customer_orders", column="email")
|
|
64
|
+
catalog = client.catalog()
|
|
65
|
+
|
|
66
|
+
# Execute a pipeline; stream live progress to a callback
|
|
67
|
+
run = client.run(filter="tenant=acme", log_callback=print)
|
|
68
|
+
print(run.summary)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Errors
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from rocky_sdk import RockyClient
|
|
75
|
+
from rocky_sdk.exceptions import RockyVersionError, RockyCommandError, RockyTimeoutError
|
|
76
|
+
|
|
77
|
+
client = RockyClient(config_path="rocky.toml", timeout_seconds=600)
|
|
78
|
+
try:
|
|
79
|
+
client.run(filter="tenant=acme")
|
|
80
|
+
except RockyTimeoutError as exc:
|
|
81
|
+
print("timed out after", exc.timeout_seconds, "s")
|
|
82
|
+
print(exc.stderr_tail)
|
|
83
|
+
except RockyCommandError as exc:
|
|
84
|
+
print("exit", exc.returncode)
|
|
85
|
+
print(exc.stderr_tail)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
`RockyError` is the base of the hierarchy:
|
|
89
|
+
|
|
90
|
+
| Exception | Raised when |
|
|
91
|
+
|---|---|
|
|
92
|
+
| `RockyBinaryNotFoundError` | the `rocky` binary is missing |
|
|
93
|
+
| `RockyVersionError` | the binary is older than the SDK's minimum |
|
|
94
|
+
| `RockyTimeoutError` | a command exceeds `timeout_seconds` |
|
|
95
|
+
| `RockyCommandError` | a command exits non-zero |
|
|
96
|
+
| `RockyPartialFailure` | a non-zero run still returned a parseable partial result (only with `allow_partial=False`) |
|
|
97
|
+
| `RockyOutputParseError` | stdout was not the expected JSON shape |
|
|
98
|
+
| `RockyServerError` | a `rocky serve` HTTP request failed |
|
|
99
|
+
| `RockyGovernanceError` | a `governance_override` would silently full-revoke |
|
|
100
|
+
|
|
101
|
+
## License
|
|
102
|
+
|
|
103
|
+
Apache-2.0
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# rocky-sdk
|
|
2
|
+
|
|
3
|
+
A typed Python client for the [Rocky](https://rocky-data.dev/) SQL transformation engine.
|
|
4
|
+
|
|
5
|
+
`rocky-sdk` wraps the `rocky` CLI binary (subprocess + `--output json`) behind a
|
|
6
|
+
typed `RockyClient`. Each method builds the right argv, runs the binary, parses
|
|
7
|
+
the JSON output, and returns a Pydantic model. Failures surface as `RockyError`
|
|
8
|
+
subclasses carrying structured fields (exit code, stderr tail, version strings)
|
|
9
|
+
rather than opaque messages.
|
|
10
|
+
|
|
11
|
+
It is for **human Python callers** — notebooks, scripts, and orchestrators. The
|
|
12
|
+
[`dagster-rocky`](https://pypi.org/project/dagster-rocky/) integration is a thin
|
|
13
|
+
Dagster adapter built on this client. For AI agents, use `rocky mcp`; for a
|
|
14
|
+
language-agnostic HTTP surface, use `rocky serve`.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install rocky-sdk
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
The `rocky` binary is not bundled — install it separately and put it on `$PATH`
|
|
23
|
+
(or pass `binary_path=`). See the
|
|
24
|
+
[releases page](https://github.com/rocky-data/rocky/releases). The SDK requires
|
|
25
|
+
engine **v1.34.0 or newer**.
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from rocky_sdk import RockyClient
|
|
31
|
+
|
|
32
|
+
client = RockyClient(config_path="rocky.toml")
|
|
33
|
+
|
|
34
|
+
# Read-only inspection — all return typed Pydantic models
|
|
35
|
+
result = client.compile()
|
|
36
|
+
for diag in result.diagnostics:
|
|
37
|
+
print(diag.severity, diag.message)
|
|
38
|
+
|
|
39
|
+
lineage = client.lineage("customer_orders", column="email")
|
|
40
|
+
catalog = client.catalog()
|
|
41
|
+
|
|
42
|
+
# Execute a pipeline; stream live progress to a callback
|
|
43
|
+
run = client.run(filter="tenant=acme", log_callback=print)
|
|
44
|
+
print(run.summary)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Errors
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from rocky_sdk import RockyClient
|
|
51
|
+
from rocky_sdk.exceptions import RockyVersionError, RockyCommandError, RockyTimeoutError
|
|
52
|
+
|
|
53
|
+
client = RockyClient(config_path="rocky.toml", timeout_seconds=600)
|
|
54
|
+
try:
|
|
55
|
+
client.run(filter="tenant=acme")
|
|
56
|
+
except RockyTimeoutError as exc:
|
|
57
|
+
print("timed out after", exc.timeout_seconds, "s")
|
|
58
|
+
print(exc.stderr_tail)
|
|
59
|
+
except RockyCommandError as exc:
|
|
60
|
+
print("exit", exc.returncode)
|
|
61
|
+
print(exc.stderr_tail)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
`RockyError` is the base of the hierarchy:
|
|
65
|
+
|
|
66
|
+
| Exception | Raised when |
|
|
67
|
+
|---|---|
|
|
68
|
+
| `RockyBinaryNotFoundError` | the `rocky` binary is missing |
|
|
69
|
+
| `RockyVersionError` | the binary is older than the SDK's minimum |
|
|
70
|
+
| `RockyTimeoutError` | a command exceeds `timeout_seconds` |
|
|
71
|
+
| `RockyCommandError` | a command exits non-zero |
|
|
72
|
+
| `RockyPartialFailure` | a non-zero run still returned a parseable partial result (only with `allow_partial=False`) |
|
|
73
|
+
| `RockyOutputParseError` | stdout was not the expected JSON shape |
|
|
74
|
+
| `RockyServerError` | a `rocky serve` HTTP request failed |
|
|
75
|
+
| `RockyGovernanceError` | a `governance_override` would silently full-revoke |
|
|
76
|
+
|
|
77
|
+
## License
|
|
78
|
+
|
|
79
|
+
Apache-2.0
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "rocky-sdk"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Typed Python client for the Rocky SQL transformation engine"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "Apache-2.0"
|
|
7
|
+
requires-python = ">=3.11"
|
|
8
|
+
authors = [{ name = "Hugo Correia", email = "hello@rocky-data.dev" }]
|
|
9
|
+
keywords = ["rocky", "data-engineering", "sql", "transformation", "client", "sdk"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"License :: OSI Approved :: Apache Software License",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: Python :: 3.11",
|
|
16
|
+
"Programming Language :: Python :: 3.12",
|
|
17
|
+
"Programming Language :: Python :: 3.13",
|
|
18
|
+
"Topic :: Database",
|
|
19
|
+
"Topic :: Software Development :: Libraries",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"pydantic>=2.0",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://rocky-data.dev/"
|
|
27
|
+
Repository = "https://github.com/rocky-data/rocky"
|
|
28
|
+
Documentation = "https://rocky-data.dev/"
|
|
29
|
+
"Bug Tracker" = "https://github.com/rocky-data/rocky/issues"
|
|
30
|
+
|
|
31
|
+
[dependency-groups]
|
|
32
|
+
dev = [
|
|
33
|
+
"datamodel-code-generator>=0.56.0",
|
|
34
|
+
"pytest>=8.0",
|
|
35
|
+
"ruff>=0.4",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[build-system]
|
|
39
|
+
requires = ["hatchling"]
|
|
40
|
+
build-backend = "hatchling.build"
|
|
41
|
+
|
|
42
|
+
[tool.hatch.build.targets.wheel]
|
|
43
|
+
packages = ["src/rocky_sdk"]
|
|
44
|
+
|
|
45
|
+
[tool.ruff]
|
|
46
|
+
target-version = "py311"
|
|
47
|
+
line-length = 100
|
|
48
|
+
# Generated code is produced by datamodel-code-generator from
|
|
49
|
+
# ../../schemas/*.schema.json via `just codegen-sdk`. Hand-editing is
|
|
50
|
+
# forbidden (it gets clobbered on next regen) and the generator's output
|
|
51
|
+
# style doesn't align with our ruff ruleset — linting it produces noise
|
|
52
|
+
# without fixable signal. Exclude from both lint and format.
|
|
53
|
+
extend-exclude = ["src/rocky_sdk/types_generated"]
|
|
54
|
+
|
|
55
|
+
[tool.ruff.lint]
|
|
56
|
+
select = ["E", "F", "I", "N", "UP", "B", "SIM"]
|
|
57
|
+
|
|
58
|
+
[tool.pytest.ini_options]
|
|
59
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""rocky-sdk: a typed Python client for the Rocky SQL transformation engine.
|
|
2
|
+
|
|
3
|
+
The SDK wraps the ``rocky`` CLI binary (subprocess + ``--output json``) behind
|
|
4
|
+
a typed :class:`~rocky_sdk.client.RockyClient`. Each method builds the right
|
|
5
|
+
argv, runs the binary, parses the JSON output, and returns a Pydantic model
|
|
6
|
+
from :mod:`rocky_sdk.types`. Errors surface as
|
|
7
|
+
:class:`~rocky_sdk.exceptions.RockyError` subclasses.
|
|
8
|
+
|
|
9
|
+
For human Python callers — notebooks, scripts, and orchestrators. The
|
|
10
|
+
``dagster-rocky`` integration is a thin Dagster adapter over this client.
|
|
11
|
+
For AI agents use ``rocky mcp``; for a language-agnostic HTTP surface use
|
|
12
|
+
``rocky serve``.
|
|
13
|
+
|
|
14
|
+
from rocky_sdk import RockyClient
|
|
15
|
+
|
|
16
|
+
client = RockyClient(config_path="rocky.toml")
|
|
17
|
+
result = client.compile()
|
|
18
|
+
run = client.run(filter="tenant=acme", log_callback=print)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from rocky_sdk.client import RockyClient
|
|
24
|
+
from rocky_sdk.exceptions import (
|
|
25
|
+
RockyBinaryNotFoundError,
|
|
26
|
+
RockyCommandError,
|
|
27
|
+
RockyError,
|
|
28
|
+
RockyOutputParseError,
|
|
29
|
+
RockyPartialFailure,
|
|
30
|
+
RockyTimeoutError,
|
|
31
|
+
RockyVersionError,
|
|
32
|
+
)
|
|
33
|
+
from rocky_sdk.types import * # noqa: F401,F403 (re-export the full typed surface)
|
|
34
|
+
from rocky_sdk.types import __all__ as _types_all
|
|
35
|
+
from rocky_sdk.types import parse_rocky_output
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"RockyClient",
|
|
39
|
+
"RockyError",
|
|
40
|
+
"RockyBinaryNotFoundError",
|
|
41
|
+
"RockyCommandError",
|
|
42
|
+
"RockyPartialFailure",
|
|
43
|
+
"RockyTimeoutError",
|
|
44
|
+
"RockyVersionError",
|
|
45
|
+
"RockyOutputParseError",
|
|
46
|
+
"parse_rocky_output",
|
|
47
|
+
*_types_all,
|
|
48
|
+
]
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Low-level subprocess plumbing shared by :class:`rocky_sdk.client.RockyClient`.
|
|
2
|
+
|
|
3
|
+
Single-reader-per-pipe + external watchdog. Two dedicated threads are the *sole*
|
|
4
|
+
readers of ``proc.stdout`` / ``proc.stderr`` while a third watchdog thread
|
|
5
|
+
enforces the wall-clock timeout by killing the process group — never
|
|
6
|
+
``communicate(timeout=)``, which races with a concurrent pipe reader and was the
|
|
7
|
+
root cause of intermittent multi-hour hangs in production. See
|
|
8
|
+
:meth:`RockyClient._run_with_log_sink` for how the pieces fit together.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import contextlib
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
import signal
|
|
17
|
+
import subprocess
|
|
18
|
+
import sys
|
|
19
|
+
from collections.abc import Callable, Iterable
|
|
20
|
+
from typing import IO
|
|
21
|
+
|
|
22
|
+
_log = logging.getLogger("rocky_sdk")
|
|
23
|
+
|
|
24
|
+
#: Argv flags whose immediately-following value is credential-bearing or
|
|
25
|
+
#: otherwise sensitive. When the constructed argv is logged, the value of any
|
|
26
|
+
#: matching flag is masked. The subprocess itself still receives the real value
|
|
27
|
+
#: — only the log line is redacted.
|
|
28
|
+
_REDACTED_ARGV_FLAGS = frozenset({"--governance-override", "--idempotency-key"})
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def redact_argv(argv: list[str]) -> list[str]:
|
|
32
|
+
"""Return a copy of ``argv`` with credential-bearing flag values masked.
|
|
33
|
+
|
|
34
|
+
Walks left-to-right; whenever a token is in :data:`_REDACTED_ARGV_FLAGS`,
|
|
35
|
+
the *next* token (its value) is replaced with ``"***"``. For log output
|
|
36
|
+
only — never for the argv handed to the subprocess.
|
|
37
|
+
"""
|
|
38
|
+
out: list[str] = []
|
|
39
|
+
redact_next = False
|
|
40
|
+
for token in argv:
|
|
41
|
+
if redact_next:
|
|
42
|
+
out.append("***")
|
|
43
|
+
redact_next = False
|
|
44
|
+
continue
|
|
45
|
+
out.append(token)
|
|
46
|
+
if token in _REDACTED_ARGV_FLAGS:
|
|
47
|
+
redact_next = True
|
|
48
|
+
return out
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def forward_stderr_to_sink(
|
|
52
|
+
stderr: Iterable[str] | None,
|
|
53
|
+
log_line: Callable[[str], None],
|
|
54
|
+
sink: list[str],
|
|
55
|
+
*,
|
|
56
|
+
mirror_to_stderr: bool = False,
|
|
57
|
+
) -> None:
|
|
58
|
+
"""Reader-thread body: forward rocky stderr lines to ``log_line`` and ``sink``.
|
|
59
|
+
|
|
60
|
+
Reads ``stderr`` line-by-line until EOF. Each non-empty line is appended to
|
|
61
|
+
``sink`` (shared with the parent for failure-tail metadata) and handed to
|
|
62
|
+
``log_line`` (the caller's destination — a logger, ``print``, Dagster's
|
|
63
|
+
``context.log.info``, …). This is the **sole reader** of ``proc.stderr``.
|
|
64
|
+
|
|
65
|
+
When ``mirror_to_stderr`` is set, each line is additionally written to this
|
|
66
|
+
process's ``sys.stderr`` so an outer capture that only sees the real fds
|
|
67
|
+
(e.g. Dagster's compute-log capture) preserves rocky's tracing output.
|
|
68
|
+
Mirroring is best-effort: ``OSError`` / ``ValueError`` from a closed fd are
|
|
69
|
+
swallowed so the reader thread never dies on teardown.
|
|
70
|
+
"""
|
|
71
|
+
if stderr is None:
|
|
72
|
+
return
|
|
73
|
+
try:
|
|
74
|
+
for raw in stderr:
|
|
75
|
+
line = raw.rstrip("\n")
|
|
76
|
+
if not line:
|
|
77
|
+
continue
|
|
78
|
+
sink.append(line)
|
|
79
|
+
log_line(line)
|
|
80
|
+
if mirror_to_stderr:
|
|
81
|
+
with contextlib.suppress(OSError, ValueError):
|
|
82
|
+
print(line, file=sys.stderr, flush=True)
|
|
83
|
+
except (OSError, ValueError) as exc:
|
|
84
|
+
_log.warning("rocky stderr reader terminated: %s", exc)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def accumulate_stdout(stdout: IO[str] | None, sink: list[str]) -> None:
|
|
88
|
+
"""Reader-thread body: accumulate every rocky stdout line into ``sink``.
|
|
89
|
+
|
|
90
|
+
Sole reader of ``proc.stdout``. Appends every line **including blank ones**
|
|
91
|
+
so the final concatenation reconstructs rocky's exact JSON output
|
|
92
|
+
byte-for-byte. On an unexpected read error logs at WARN and exits cleanly;
|
|
93
|
+
the parent surfaces a parse failure if the payload was truncated.
|
|
94
|
+
"""
|
|
95
|
+
if stdout is None:
|
|
96
|
+
return
|
|
97
|
+
try:
|
|
98
|
+
for line in stdout:
|
|
99
|
+
sink.append(line)
|
|
100
|
+
except (OSError, ValueError) as exc:
|
|
101
|
+
_log.warning("rocky stdout accumulator terminated: %s", exc)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def kill_process_group(proc: subprocess.Popen[str]) -> None:
|
|
105
|
+
"""Terminate ``proc`` and any children via its POSIX process group.
|
|
106
|
+
|
|
107
|
+
Called from the watchdog when the wall-clock timeout elapses. On POSIX,
|
|
108
|
+
``os.killpg(os.getpgid(pid), SIGKILL)`` reaps children too (requires the
|
|
109
|
+
Popen was launched with ``start_new_session=True``). On Windows, falls back
|
|
110
|
+
to ``proc.kill()``. ``ProcessLookupError`` / ``OSError`` are swallowed: the
|
|
111
|
+
process may have exited between ``wait()`` returning and the kill call.
|
|
112
|
+
"""
|
|
113
|
+
try:
|
|
114
|
+
if os.name == "nt":
|
|
115
|
+
proc.kill()
|
|
116
|
+
else:
|
|
117
|
+
os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
|
|
118
|
+
except (ProcessLookupError, OSError):
|
|
119
|
+
# Process already reaped, pgid lookup raced with exit, or the kernel
|
|
120
|
+
# refused — nothing useful to do from the watchdog. Swallow and move on.
|
|
121
|
+
pass
|