rocky-sdk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. rocky_sdk-0.1.0/.gitignore +62 -0
  2. rocky_sdk-0.1.0/CLAUDE.md +92 -0
  3. rocky_sdk-0.1.0/PKG-INFO +103 -0
  4. rocky_sdk-0.1.0/README.md +79 -0
  5. rocky_sdk-0.1.0/pyproject.toml +59 -0
  6. rocky_sdk-0.1.0/src/rocky_sdk/__init__.py +48 -0
  7. rocky_sdk-0.1.0/src/rocky_sdk/_subprocess.py +121 -0
  8. rocky_sdk-0.1.0/src/rocky_sdk/client.py +1128 -0
  9. rocky_sdk-0.1.0/src/rocky_sdk/exceptions.py +181 -0
  10. rocky_sdk-0.1.0/src/rocky_sdk/py.typed +0 -0
  11. rocky_sdk-0.1.0/src/rocky_sdk/types.py +1326 -0
  12. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/__init__.py +476 -0
  13. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/adapter_config_schema.py +459 -0
  14. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ai_contract_schema.py +59 -0
  15. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ai_explain_schema.py +22 -0
  16. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ai_schema.py +28 -0
  17. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ai_sync_schema.py +23 -0
  18. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ai_test_schema.py +28 -0
  19. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/apply_schema.py +42 -0
  20. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/archive_apply_schema.py +55 -0
  21. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/archive_schema.py +66 -0
  22. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/branch_approve_schema.py +95 -0
  23. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/branch_delete_schema.py +20 -0
  24. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/branch_list_schema.py +29 -0
  25. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/branch_promote_schema.py +543 -0
  26. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/branch_schema.py +28 -0
  27. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/catalog_schema.py +146 -0
  28. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ci_diff_schema.py +470 -0
  29. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/ci_schema.py +89 -0
  30. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/column_lineage_schema.py +40 -0
  31. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compact_apply_schema.py +55 -0
  32. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compact_dedup_schema.py +129 -0
  33. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compact_schema.py +79 -0
  34. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compare_schema.py +34 -0
  35. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compile_schema.py +472 -0
  36. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/compliance_schema.py +114 -0
  37. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/cost_schema.py +79 -0
  38. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/dag_run_schema.py +78 -0
  39. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/dag_schema.py +411 -0
  40. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/discover_schema.py +163 -0
  41. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/doctor_schema.py +41 -0
  42. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/drift_schema.py +28 -0
  43. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/estimate_schema.py +45 -0
  44. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/history_schema.py +84 -0
  45. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/hooks_list_schema.py +23 -0
  46. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/hooks_test_schema.py +23 -0
  47. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/import_dbt_schema.py +223 -0
  48. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/lineage_diff_schema.py +144 -0
  49. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/lineage_schema.py +69 -0
  50. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/load_schema.py +63 -0
  51. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/metrics_schema.py +47 -0
  52. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/model_history_schema.py +86 -0
  53. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/optimize_schema.py +48 -0
  54. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/plan_promote_schema.py +567 -0
  55. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/plan_schema.py +527 -0
  56. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/preview_cost_schema.py +134 -0
  57. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/preview_create_schema.py +83 -0
  58. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/preview_diff_schema.py +210 -0
  59. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/preview_rows_schema.py +56 -0
  60. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/profile_schema.py +58 -0
  61. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/profile_storage_schema.py +26 -0
  62. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/replay_schema.py +48 -0
  63. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/retention_status_schema.py +31 -0
  64. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/review_schema.py +350 -0
  65. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/rocky_fivetran_state_schema.py +197 -0
  66. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/rocky_project_schema.py +3332 -0
  67. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/run_schema.py +708 -0
  68. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/seed_schema.py +33 -0
  69. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/state_clear_schema_cache_schema.py +25 -0
  70. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/state_retention_sweep_schema.py +65 -0
  71. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/state_schema.py +30 -0
  72. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/test_adapter_schema.py +36 -0
  73. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/test_schema.py +103 -0
  74. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/trace_schema.py +55 -0
  75. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/validate_migration_schema.py +35 -0
  76. rocky_sdk-0.1.0/src/rocky_sdk/types_generated/validate_schema.py +77 -0
  77. rocky_sdk-0.1.0/tests/test_client.py +311 -0
  78. rocky_sdk-0.1.0/tests/test_types.py +72 -0
  79. rocky_sdk-0.1.0/uv.lock +596 -0
@@ -0,0 +1,62 @@
1
+ # Root .gitignore — cross-cutting safety nets for the Rocky monorepo.
2
+ # Each subproject (engine/, integrations/dagster/, editors/vscode/, examples/playground/)
3
+ # has its own .gitignore tuned to its build system. Patterns here are universal.
4
+
5
+ # --- Secrets — never commit ---
6
+ .env
7
+ .env.*
8
+ !.env.example
9
+ *.pem
10
+ *.key
11
+ id_rsa*
12
+ secrets.*
13
+ credentials.*
14
+
15
+ # --- OS / editor noise ---
16
+ .DS_Store
17
+ Thumbs.db
18
+ *.swp
19
+ *.swo
20
+ *~
21
+ .idea/
22
+
23
+ # --- Rocky runtime state (any subproject) ---
24
+ *.redb
25
+ *.duckdb
26
+ *.duckdb.wal
27
+ .rocky-state*
28
+
29
+ # --- Vendored binaries (downloaded by scripts/vendor_*.sh) ---
30
+ vendor/
31
+
32
+ # --- Per-user Claude state (skills + shared settings.json are committed) ---
33
+ .claude/projects/
34
+ .claude/worktrees/
35
+ .claude/settings.local.json
36
+ .claude/scheduled_tasks.lock
37
+
38
+ # --- Defense-in-depth duplicates of subproject ignores ---
39
+ # (in case someone runs cargo / npm / uv from the wrong directory)
40
+ target/
41
+ target-linux/
42
+ node_modules/
43
+ dist/
44
+ out/
45
+ build/
46
+ *.vsix
47
+ .vscode-test/
48
+ .venv/
49
+ __pycache__/
50
+ *.py[cod]
51
+ .pytest_cache/
52
+ .ruff_cache/
53
+ .mypy_cache/
54
+
55
+ # --- Astro build cache ---
56
+ docs/.astro/
57
+
58
+ # --- Logs and scratch ---
59
+ *.log
60
+ logs/
61
+ scratch/
62
+ tmp/
@@ -0,0 +1,92 @@
1
+ # rocky-sdk
2
+
3
+ Standalone, framework-agnostic Python client for the Rocky SQL transformation
4
+ engine. `RockyClient` wraps the `rocky` CLI binary (subprocess + `--output json`)
5
+ behind typed methods and raises a `RockyError` hierarchy. For notebooks, scripts,
6
+ and orchestrators. The `dagster-rocky` integration is a thin Dagster adapter over
7
+ this client.
8
+
9
+ **Positioning:** the SDK is a typed, ergonomic client over the CLI for *human
10
+ Python callers*. It complements — does not replace — `rocky mcp` (the surface for
11
+ AI agents) and `rocky serve` (the language-agnostic HTTP surface).
12
+
13
+ ## Architecture
14
+
15
+ Three layers, each independently usable:
16
+
17
+ 1. **Types** (`types.py` + `types_generated/`) — Pydantic models for every Rocky
18
+ CLI command's JSON output. The *generated* models in `types_generated/` are
19
+ autogenerated from `../../schemas/*.schema.json` via `datamodel-code-generator`
20
+ (run `just codegen-sdk` from the monorepo root). `types.py` re-exports them
21
+ under both generated and legacy names and defines `parse_rocky_output()`, a
22
+ table-driven dispatch that auto-detects the command and returns the matching
23
+ model. **Do not hand-edit `types_generated/`** — it's clobbered on regen.
24
+ 2. **Client** (`client.py`) — `RockyClient`. Binary resolution, lazy version gate
25
+ (`MIN_ROCKY_VERSION`), argv builders, the subprocess core (`run_cli`), the
26
+ `rocky serve` HTTP fallback, the governance pre-flight, and one typed method
27
+ per CLI command. `mirror_stderr` and `logger` are constructor settings so an
28
+ adapter (dagster-rocky) can preserve its own stderr-capture + logger provenance.
29
+ 3. **Exceptions** (`exceptions.py`) — `RockyError` base + subclasses
30
+ (`RockyBinaryNotFoundError`, `RockyVersionError`, `RockyTimeoutError`,
31
+ `RockyCommandError`, `RockyPartialFailure`, `RockyOutputParseError`,
32
+ `RockyServerError`, `RockyGovernanceError`). Each carries structured fields so
33
+ a caller can rebuild a rich error (the dagster adapter rebuilds `dagster.Failure`).
34
+
35
+ Subprocess plumbing (`_subprocess.py`): single reader thread per pipe plus an
36
+ external watchdog that kills the process group on timeout — never
37
+ `communicate(timeout=)`, which races with a concurrent pipe reader. This pattern
38
+ is load-bearing; preserve it.
39
+
40
+ ## Project structure
41
+
42
+ ```
43
+ src/rocky_sdk/
44
+ ├── __init__.py # Public API: RockyClient, exceptions, all result types
45
+ ├── client.py # RockyClient + module-level parse/governance helpers
46
+ ├── exceptions.py # RockyError hierarchy
47
+ ├── _subprocess.py # reader threads, watchdog, kill, redact (framework-agnostic)
48
+ ├── types.py # parse_rocky_output + dispatch + hand-written models
49
+ └── types_generated/ # Autogenerated from ../../schemas/*.schema.json (do not edit)
50
+ tests/
51
+ ├── test_client.py # argv / version / subprocess / governance / HTTP (mocked binary)
52
+ └── test_types.py # parse_rocky_output dispatch + parse-helper error paths
53
+ ```
54
+
55
+ ## Coding standards
56
+
57
+ - Python 3.11+, `from __future__ import annotations` in all modules.
58
+ - Pydantic `BaseModel` for data structures. Line length 100.
59
+ - Ruff rules: E, F, I, N, UP, B, SIM. `types_generated/` is excluded from lint/format.
60
+ - Runtime dependency: `pydantic>=2.0` only — keep the SDK free of heavy deps.
61
+
62
+ ## Common commands
63
+
64
+ ```bash
65
+ uv sync --dev
66
+ uv run pytest -v # all tests (no binary or creds needed)
67
+ uv run ruff check src/ tests/
68
+ uv run ruff format --check src/ tests/ # CI gate
69
+ uv build # wheel + sdist
70
+ ```
71
+
72
+ ## Adding support for a new Rocky CLI command
73
+
74
+ 1. Add the typed `*Output` struct in `engine/crates/rocky-cli/src/output.rs` and
75
+ register it in `commands/export_schemas.rs::schemas()`.
76
+ 2. From the monorepo root, run `just codegen-sdk` (or `just codegen`).
77
+ 3. Re-export the new type from `types.py` (bridge section) if needed and add a
78
+ `parse_rocky_output()` dispatch entry.
79
+ 4. Add a typed method to `RockyClient` and a delegating method to
80
+ `RockyResource` (dagster-rocky), translating any new error shape.
81
+
82
+ ## Release
83
+
84
+ Tag-namespaced: `sdk-v*` → `rocky-sdk` wheel on PyPI (`sdk-release.yml`).
85
+ `just release-sdk <version> [--publish]`. **Release the SDK before any
86
+ `dagster-rocky` release that raises its `rocky-sdk>=…` floor** — the published
87
+ dagster wheel resolves the SDK from PyPI, not the monorepo path source.
88
+
89
+ ## Git conventions
90
+
91
+ - **Never** include `Co-Authored-By` trailers. Conventional commits, scoped
92
+ `feat(sdk): …` / `fix(sdk): …`.
@@ -0,0 +1,103 @@
1
+ Metadata-Version: 2.4
2
+ Name: rocky-sdk
3
+ Version: 0.1.0
4
+ Summary: Typed Python client for the Rocky SQL transformation engine
5
+ Project-URL: Homepage, https://rocky-data.dev/
6
+ Project-URL: Repository, https://github.com/rocky-data/rocky
7
+ Project-URL: Documentation, https://rocky-data.dev/
8
+ Project-URL: Bug Tracker, https://github.com/rocky-data/rocky/issues
9
+ Author-email: Hugo Correia <hello@rocky-data.dev>
10
+ License-Expression: Apache-2.0
11
+ Keywords: client,data-engineering,rocky,sdk,sql,transformation
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: pydantic>=2.0
23
+ Description-Content-Type: text/markdown
24
+
25
+ # rocky-sdk
26
+
27
+ A typed Python client for the [Rocky](https://rocky-data.dev/) SQL transformation engine.
28
+
29
+ `rocky-sdk` wraps the `rocky` CLI binary (subprocess + `--output json`) behind a
30
+ typed `RockyClient`. Each method builds the right argv, runs the binary, parses
31
+ the JSON output, and returns a Pydantic model. Failures surface as `RockyError`
32
+ subclasses carrying structured fields (exit code, stderr tail, version strings)
33
+ rather than opaque messages.
34
+
35
+ It is for **human Python callers** — notebooks, scripts, and orchestrators. The
36
+ [`dagster-rocky`](https://pypi.org/project/dagster-rocky/) integration is a thin
37
+ Dagster adapter built on this client. For AI agents, use `rocky mcp`; for a
38
+ language-agnostic HTTP surface, use `rocky serve`.
39
+
40
+ ## Install
41
+
42
+ ```bash
43
+ pip install rocky-sdk
44
+ ```
45
+
46
+ The `rocky` binary is not bundled — install it separately and put it on `$PATH`
47
+ (or pass `binary_path=`). See the
48
+ [releases page](https://github.com/rocky-data/rocky/releases). The SDK requires
49
+ engine **v1.34.0 or newer**.
50
+
51
+ ## Usage
52
+
53
+ ```python
54
+ from rocky_sdk import RockyClient
55
+
56
+ client = RockyClient(config_path="rocky.toml")
57
+
58
+ # Read-only inspection — all return typed Pydantic models
59
+ result = client.compile()
60
+ for diag in result.diagnostics:
61
+ print(diag.severity, diag.message)
62
+
63
+ lineage = client.lineage("customer_orders", column="email")
64
+ catalog = client.catalog()
65
+
66
+ # Execute a pipeline; stream live progress to a callback
67
+ run = client.run(filter="tenant=acme", log_callback=print)
68
+ print(run.summary)
69
+ ```
70
+
71
+ ### Errors
72
+
73
+ ```python
74
+ from rocky_sdk import RockyClient
75
+ from rocky_sdk.exceptions import RockyVersionError, RockyCommandError, RockyTimeoutError
76
+
77
+ client = RockyClient(config_path="rocky.toml", timeout_seconds=600)
78
+ try:
79
+ client.run(filter="tenant=acme")
80
+ except RockyTimeoutError as exc:
81
+ print("timed out after", exc.timeout_seconds, "s")
82
+ print(exc.stderr_tail)
83
+ except RockyCommandError as exc:
84
+ print("exit", exc.returncode)
85
+ print(exc.stderr_tail)
86
+ ```
87
+
88
+ `RockyError` is the base of the hierarchy:
89
+
90
+ | Exception | Raised when |
91
+ |---|---|
92
+ | `RockyBinaryNotFoundError` | the `rocky` binary is missing |
93
+ | `RockyVersionError` | the binary is older than the SDK's minimum |
94
+ | `RockyTimeoutError` | a command exceeds `timeout_seconds` |
95
+ | `RockyCommandError` | a command exits non-zero |
96
+ | `RockyPartialFailure` | a non-zero run still returned a parseable partial result (only with `allow_partial=False`) |
97
+ | `RockyOutputParseError` | stdout was not the expected JSON shape |
98
+ | `RockyServerError` | a `rocky serve` HTTP request failed |
99
+ | `RockyGovernanceError` | a `governance_override` would silently full-revoke |
100
+
101
+ ## License
102
+
103
+ Apache-2.0
@@ -0,0 +1,79 @@
1
+ # rocky-sdk
2
+
3
+ A typed Python client for the [Rocky](https://rocky-data.dev/) SQL transformation engine.
4
+
5
+ `rocky-sdk` wraps the `rocky` CLI binary (subprocess + `--output json`) behind a
6
+ typed `RockyClient`. Each method builds the right argv, runs the binary, parses
7
+ the JSON output, and returns a Pydantic model. Failures surface as `RockyError`
8
+ subclasses carrying structured fields (exit code, stderr tail, version strings)
9
+ rather than opaque messages.
10
+
11
+ It is for **human Python callers** — notebooks, scripts, and orchestrators. The
12
+ [`dagster-rocky`](https://pypi.org/project/dagster-rocky/) integration is a thin
13
+ Dagster adapter built on this client. For AI agents, use `rocky mcp`; for a
14
+ language-agnostic HTTP surface, use `rocky serve`.
15
+
16
+ ## Install
17
+
18
+ ```bash
19
+ pip install rocky-sdk
20
+ ```
21
+
22
+ The `rocky` binary is not bundled — install it separately and put it on `$PATH`
23
+ (or pass `binary_path=`). See the
24
+ [releases page](https://github.com/rocky-data/rocky/releases). The SDK requires
25
+ engine **v1.34.0 or newer**.
26
+
27
+ ## Usage
28
+
29
+ ```python
30
+ from rocky_sdk import RockyClient
31
+
32
+ client = RockyClient(config_path="rocky.toml")
33
+
34
+ # Read-only inspection — all return typed Pydantic models
35
+ result = client.compile()
36
+ for diag in result.diagnostics:
37
+ print(diag.severity, diag.message)
38
+
39
+ lineage = client.lineage("customer_orders", column="email")
40
+ catalog = client.catalog()
41
+
42
+ # Execute a pipeline; stream live progress to a callback
43
+ run = client.run(filter="tenant=acme", log_callback=print)
44
+ print(run.summary)
45
+ ```
46
+
47
+ ### Errors
48
+
49
+ ```python
50
+ from rocky_sdk import RockyClient
51
+ from rocky_sdk.exceptions import RockyVersionError, RockyCommandError, RockyTimeoutError
52
+
53
+ client = RockyClient(config_path="rocky.toml", timeout_seconds=600)
54
+ try:
55
+ client.run(filter="tenant=acme")
56
+ except RockyTimeoutError as exc:
57
+ print("timed out after", exc.timeout_seconds, "s")
58
+ print(exc.stderr_tail)
59
+ except RockyCommandError as exc:
60
+ print("exit", exc.returncode)
61
+ print(exc.stderr_tail)
62
+ ```
63
+
64
+ `RockyError` is the base of the hierarchy:
65
+
66
+ | Exception | Raised when |
67
+ |---|---|
68
+ | `RockyBinaryNotFoundError` | the `rocky` binary is missing |
69
+ | `RockyVersionError` | the binary is older than the SDK's minimum |
70
+ | `RockyTimeoutError` | a command exceeds `timeout_seconds` |
71
+ | `RockyCommandError` | a command exits non-zero |
72
+ | `RockyPartialFailure` | a non-zero run still returned a parseable partial result (only with `allow_partial=False`) |
73
+ | `RockyOutputParseError` | stdout was not the expected JSON shape |
74
+ | `RockyServerError` | a `rocky serve` HTTP request failed |
75
+ | `RockyGovernanceError` | a `governance_override` would silently full-revoke |
76
+
77
+ ## License
78
+
79
+ Apache-2.0
@@ -0,0 +1,59 @@
1
+ [project]
2
+ name = "rocky-sdk"
3
+ version = "0.1.0"
4
+ description = "Typed Python client for the Rocky SQL transformation engine"
5
+ readme = "README.md"
6
+ license = "Apache-2.0"
7
+ requires-python = ">=3.11"
8
+ authors = [{ name = "Hugo Correia", email = "hello@rocky-data.dev" }]
9
+ keywords = ["rocky", "data-engineering", "sql", "transformation", "client", "sdk"]
10
+ classifiers = [
11
+ "Development Status :: 4 - Beta",
12
+ "Intended Audience :: Developers",
13
+ "License :: OSI Approved :: Apache Software License",
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3.11",
16
+ "Programming Language :: Python :: 3.12",
17
+ "Programming Language :: Python :: 3.13",
18
+ "Topic :: Database",
19
+ "Topic :: Software Development :: Libraries",
20
+ ]
21
+ dependencies = [
22
+ "pydantic>=2.0",
23
+ ]
24
+
25
+ [project.urls]
26
+ Homepage = "https://rocky-data.dev/"
27
+ Repository = "https://github.com/rocky-data/rocky"
28
+ Documentation = "https://rocky-data.dev/"
29
+ "Bug Tracker" = "https://github.com/rocky-data/rocky/issues"
30
+
31
+ [dependency-groups]
32
+ dev = [
33
+ "datamodel-code-generator>=0.56.0",
34
+ "pytest>=8.0",
35
+ "ruff>=0.4",
36
+ ]
37
+
38
+ [build-system]
39
+ requires = ["hatchling"]
40
+ build-backend = "hatchling.build"
41
+
42
+ [tool.hatch.build.targets.wheel]
43
+ packages = ["src/rocky_sdk"]
44
+
45
+ [tool.ruff]
46
+ target-version = "py311"
47
+ line-length = 100
48
+ # Generated code is produced by datamodel-code-generator from
49
+ # ../../schemas/*.schema.json via `just codegen-sdk`. Hand-editing is
50
+ # forbidden (it gets clobbered on next regen) and the generator's output
51
+ # style doesn't align with our ruff ruleset — linting it produces noise
52
+ # without fixable signal. Exclude from both lint and format.
53
+ extend-exclude = ["src/rocky_sdk/types_generated"]
54
+
55
+ [tool.ruff.lint]
56
+ select = ["E", "F", "I", "N", "UP", "B", "SIM"]
57
+
58
+ [tool.pytest.ini_options]
59
+ testpaths = ["tests"]
@@ -0,0 +1,48 @@
1
+ """rocky-sdk: a typed Python client for the Rocky SQL transformation engine.
2
+
3
+ The SDK wraps the ``rocky`` CLI binary (subprocess + ``--output json``) behind
4
+ a typed :class:`~rocky_sdk.client.RockyClient`. Each method builds the right
5
+ argv, runs the binary, parses the JSON output, and returns a Pydantic model
6
+ from :mod:`rocky_sdk.types`. Errors surface as
7
+ :class:`~rocky_sdk.exceptions.RockyError` subclasses.
8
+
9
+ For human Python callers — notebooks, scripts, and orchestrators. The
10
+ ``dagster-rocky`` integration is a thin Dagster adapter over this client.
11
+ For AI agents use ``rocky mcp``; for a language-agnostic HTTP surface use
12
+ ``rocky serve``.
13
+
14
+ from rocky_sdk import RockyClient
15
+
16
+ client = RockyClient(config_path="rocky.toml")
17
+ result = client.compile()
18
+ run = client.run(filter="tenant=acme", log_callback=print)
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from rocky_sdk.client import RockyClient
24
+ from rocky_sdk.exceptions import (
25
+ RockyBinaryNotFoundError,
26
+ RockyCommandError,
27
+ RockyError,
28
+ RockyOutputParseError,
29
+ RockyPartialFailure,
30
+ RockyTimeoutError,
31
+ RockyVersionError,
32
+ )
33
+ from rocky_sdk.types import * # noqa: F401,F403 (re-export the full typed surface)
34
+ from rocky_sdk.types import __all__ as _types_all
35
+ from rocky_sdk.types import parse_rocky_output
36
+
37
+ __all__ = [
38
+ "RockyClient",
39
+ "RockyError",
40
+ "RockyBinaryNotFoundError",
41
+ "RockyCommandError",
42
+ "RockyPartialFailure",
43
+ "RockyTimeoutError",
44
+ "RockyVersionError",
45
+ "RockyOutputParseError",
46
+ "parse_rocky_output",
47
+ *_types_all,
48
+ ]
@@ -0,0 +1,121 @@
1
+ """Low-level subprocess plumbing shared by :class:`rocky_sdk.client.RockyClient`.
2
+
3
+ Single-reader-per-pipe + external watchdog. Two dedicated threads are the *sole*
4
+ readers of ``proc.stdout`` / ``proc.stderr`` while a third watchdog thread
5
+ enforces the wall-clock timeout by killing the process group — never
6
+ ``communicate(timeout=)``, which races with a concurrent pipe reader and was the
7
+ root cause of intermittent multi-hour hangs in production. See
8
+ :meth:`RockyClient._run_with_log_sink` for how the pieces fit together.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import contextlib
14
+ import logging
15
+ import os
16
+ import signal
17
+ import subprocess
18
+ import sys
19
+ from collections.abc import Callable, Iterable
20
+ from typing import IO
21
+
22
+ _log = logging.getLogger("rocky_sdk")
23
+
24
+ #: Argv flags whose immediately-following value is credential-bearing or
25
+ #: otherwise sensitive. When the constructed argv is logged, the value of any
26
+ #: matching flag is masked. The subprocess itself still receives the real value
27
+ #: — only the log line is redacted.
28
+ _REDACTED_ARGV_FLAGS = frozenset({"--governance-override", "--idempotency-key"})
29
+
30
+
31
+ def redact_argv(argv: list[str]) -> list[str]:
32
+ """Return a copy of ``argv`` with credential-bearing flag values masked.
33
+
34
+ Walks left-to-right; whenever a token is in :data:`_REDACTED_ARGV_FLAGS`,
35
+ the *next* token (its value) is replaced with ``"***"``. For log output
36
+ only — never for the argv handed to the subprocess.
37
+ """
38
+ out: list[str] = []
39
+ redact_next = False
40
+ for token in argv:
41
+ if redact_next:
42
+ out.append("***")
43
+ redact_next = False
44
+ continue
45
+ out.append(token)
46
+ if token in _REDACTED_ARGV_FLAGS:
47
+ redact_next = True
48
+ return out
49
+
50
+
51
+ def forward_stderr_to_sink(
52
+ stderr: Iterable[str] | None,
53
+ log_line: Callable[[str], None],
54
+ sink: list[str],
55
+ *,
56
+ mirror_to_stderr: bool = False,
57
+ ) -> None:
58
+ """Reader-thread body: forward rocky stderr lines to ``log_line`` and ``sink``.
59
+
60
+ Reads ``stderr`` line-by-line until EOF. Each non-empty line is appended to
61
+ ``sink`` (shared with the parent for failure-tail metadata) and handed to
62
+ ``log_line`` (the caller's destination — a logger, ``print``, Dagster's
63
+ ``context.log.info``, …). This is the **sole reader** of ``proc.stderr``.
64
+
65
+ When ``mirror_to_stderr`` is set, each line is additionally written to this
66
+ process's ``sys.stderr`` so an outer capture that only sees the real fds
67
+ (e.g. Dagster's compute-log capture) preserves rocky's tracing output.
68
+ Mirroring is best-effort: ``OSError`` / ``ValueError`` from a closed fd are
69
+ swallowed so the reader thread never dies on teardown.
70
+ """
71
+ if stderr is None:
72
+ return
73
+ try:
74
+ for raw in stderr:
75
+ line = raw.rstrip("\n")
76
+ if not line:
77
+ continue
78
+ sink.append(line)
79
+ log_line(line)
80
+ if mirror_to_stderr:
81
+ with contextlib.suppress(OSError, ValueError):
82
+ print(line, file=sys.stderr, flush=True)
83
+ except (OSError, ValueError) as exc:
84
+ _log.warning("rocky stderr reader terminated: %s", exc)
85
+
86
+
87
+ def accumulate_stdout(stdout: IO[str] | None, sink: list[str]) -> None:
88
+ """Reader-thread body: accumulate every rocky stdout line into ``sink``.
89
+
90
+ Sole reader of ``proc.stdout``. Appends every line **including blank ones**
91
+ so the final concatenation reconstructs rocky's exact JSON output
92
+ byte-for-byte. On an unexpected read error logs at WARN and exits cleanly;
93
+ the parent surfaces a parse failure if the payload was truncated.
94
+ """
95
+ if stdout is None:
96
+ return
97
+ try:
98
+ for line in stdout:
99
+ sink.append(line)
100
+ except (OSError, ValueError) as exc:
101
+ _log.warning("rocky stdout accumulator terminated: %s", exc)
102
+
103
+
104
+ def kill_process_group(proc: subprocess.Popen[str]) -> None:
105
+ """Terminate ``proc`` and any children via its POSIX process group.
106
+
107
+ Called from the watchdog when the wall-clock timeout elapses. On POSIX,
108
+ ``os.killpg(os.getpgid(pid), SIGKILL)`` reaps children too (requires the
109
+ Popen was launched with ``start_new_session=True``). On Windows, falls back
110
+ to ``proc.kill()``. ``ProcessLookupError`` / ``OSError`` are swallowed: the
111
+ process may have exited between ``wait()`` returning and the kill call.
112
+ """
113
+ try:
114
+ if os.name == "nt":
115
+ proc.kill()
116
+ else:
117
+ os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
118
+ except (ProcessLookupError, OSError):
119
+ # Process already reaped, pgid lookup raced with exit, or the kernel
120
+ # refused — nothing useful to do from the watchdog. Swallow and move on.
121
+ pass