phlo-sling 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phlo_sling-0.1.0/PKG-INFO +16 -0
- phlo_sling-0.1.0/README.md +111 -0
- phlo_sling-0.1.0/pyproject.toml +44 -0
- phlo_sling-0.1.0/setup.cfg +4 -0
- phlo_sling-0.1.0/src/phlo_sling/__init__.py +28 -0
- phlo_sling-0.1.0/src/phlo_sling/cli_commands.py +187 -0
- phlo_sling-0.1.0/src/phlo_sling/cli_plugin.py +25 -0
- phlo_sling-0.1.0/src/phlo_sling/connections.py +186 -0
- phlo_sling-0.1.0/src/phlo_sling/decorator.py +345 -0
- phlo_sling-0.1.0/src/phlo_sling/executor.py +224 -0
- phlo_sling-0.1.0/src/phlo_sling/plugin.py +59 -0
- phlo_sling-0.1.0/src/phlo_sling/registry.py +80 -0
- phlo_sling-0.1.0/src/phlo_sling/settings.py +40 -0
- phlo_sling-0.1.0/src/phlo_sling.egg-info/PKG-INFO +16 -0
- phlo_sling-0.1.0/src/phlo_sling.egg-info/SOURCES.txt +25 -0
- phlo_sling-0.1.0/src/phlo_sling.egg-info/dependency_links.txt +1 -0
- phlo_sling-0.1.0/src/phlo_sling.egg-info/entry_points.txt +8 -0
- phlo_sling-0.1.0/src/phlo_sling.egg-info/requires.txt +7 -0
- phlo_sling-0.1.0/src/phlo_sling.egg-info/top_level.txt +1 -0
- phlo_sling-0.1.0/tests/test_sling_assets.py +79 -0
- phlo_sling-0.1.0/tests/test_sling_cli.py +131 -0
- phlo_sling-0.1.0/tests/test_sling_connections.py +121 -0
- phlo_sling-0.1.0/tests/test_sling_decorator.py +102 -0
- phlo_sling-0.1.0/tests/test_sling_executor.py +122 -0
- phlo_sling-0.1.0/tests/test_sling_plugin.py +35 -0
- phlo_sling-0.1.0/tests/test_sling_registry.py +49 -0
- phlo_sling-0.1.0/tests/test_sling_settings.py +27 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: phlo-sling
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Sling replication ingestion provider for Phlo
|
|
5
|
+
Author-email: Phlo Team <team@phlo.dev>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Description-Content-Type: text/plain
|
|
9
|
+
Requires-Dist: phlo>=0.1.0
|
|
10
|
+
Requires-Dist: sling>=1.0.0
|
|
11
|
+
Requires-Dist: pyarrow>=21.0.0
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
14
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
15
|
+
|
|
16
|
+
Sling-based database replication ingestion provider for Phlo.
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# phlo-sling
|
|
2
|
+
|
|
3
|
+
Sling-based database replication ingestion provider for Phlo.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
`phlo-sling` wraps [Sling](https://slingdata.io/) as a complementary ingestion engine alongside `phlo-dlt`. Sling is a data movement CLI (DB→DB, File→DB, DB→File) with 30+ connectors, optimized for high-speed database replication.
|
|
8
|
+
|
|
9
|
+
Where DLT excels at API-based ingestion with schema evolution and normalisation, Sling excels at raw-speed database replication with wildcard stream selection (`my_schema.*`), incremental modes, and direct DB-to-DB transfers.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install phlo-sling
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
### Decorator-Based Replication
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
import phlo
|
|
23
|
+
from phlo_sling import phlo_sling_replication
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@phlo_sling_replication(
|
|
27
|
+
stream_name="public.users",
|
|
28
|
+
table_name="users",
|
|
29
|
+
source_conn="PHLO_POSTGRES",
|
|
30
|
+
target_conn="WAREHOUSE",
|
|
31
|
+
group="crm",
|
|
32
|
+
mode="incremental",
|
|
33
|
+
primary_key="id",
|
|
34
|
+
update_key="updated_at",
|
|
35
|
+
cron="0 */2 * * *",
|
|
36
|
+
owner="data-team",
|
|
37
|
+
)
|
|
38
|
+
def replicate_users(context):
|
|
39
|
+
"""Replicate users table from Postgres into raw.users on WAREHOUSE."""
|
|
40
|
+
return None
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Python-First File Discovery
|
|
44
|
+
|
|
45
|
+
Use `phlo_sling_assets` when you want Python logic to discover folders/files
|
|
46
|
+
first and register one Sling-backed asset per result.
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from pathlib import Path
|
|
50
|
+
|
|
51
|
+
from phlo_sling import SlingReplication, phlo_sling_assets
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@phlo_sling_assets(group="finance")
|
|
55
|
+
def discover_workbooks():
|
|
56
|
+
root = Path("/mnt/finance")
|
|
57
|
+
|
|
58
|
+
for workbook in root.rglob("*.xlsx"):
|
|
59
|
+
table_name = workbook.stem.replace("-", "_").lower()
|
|
60
|
+
yield SlingReplication(
|
|
61
|
+
stream_name=f"file://{workbook}",
|
|
62
|
+
table_name=table_name,
|
|
63
|
+
source_conn="LOCAL",
|
|
64
|
+
target_conn="WAREHOUSE",
|
|
65
|
+
object=f"raw.{table_name}",
|
|
66
|
+
mode="full-refresh",
|
|
67
|
+
source_options={"sheet": "Sheet1!A:F"},
|
|
68
|
+
description=f"Ingest workbook {workbook.name}",
|
|
69
|
+
metadata={"workbook_path": str(workbook)},
|
|
70
|
+
tags={"format": "xlsx"},
|
|
71
|
+
)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Use the original `phlo_sling_replication` decorator when you want one stable
|
|
75
|
+
asset whose function may return runtime Sling overrides such as a dynamic
|
|
76
|
+
`src_stream` or `where` clause.
|
|
77
|
+
|
|
78
|
+
### CLI Commands
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# Run replication from YAML
|
|
82
|
+
phlo sling run --replication replications/pg_to_lake.yaml
|
|
83
|
+
|
|
84
|
+
# Run ad-hoc replication
|
|
85
|
+
phlo sling run --source PHLO_POSTGRES --stream public.users --target PHLO_S3 --object raw/users.parquet
|
|
86
|
+
|
|
87
|
+
# Override the inferred destination object when needed
|
|
88
|
+
phlo sling run --source PHLO_POSTGRES --stream public.users --target WAREHOUSE --object raw.users
|
|
89
|
+
|
|
90
|
+
# List connections
|
|
91
|
+
phlo sling conns
|
|
92
|
+
|
|
93
|
+
# Discover available streams
|
|
94
|
+
phlo sling discover PHLO_POSTGRES
|
|
95
|
+
phlo sling discover PHLO_POSTGRES --schema public --format json
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Configuration
|
|
99
|
+
|
|
100
|
+
The following environment variables can be used to configure Sling:
|
|
101
|
+
|
|
102
|
+
- `SLING_DEFAULT_NAMESPACE` - Default namespace for generated replication table names (default: "raw")
|
|
103
|
+
- `SLING_DEFAULT_MODE` - Default replication mode (default: "incremental")
|
|
104
|
+
- `SLING_AUTO_CONNECTIONS` - Auto-generate Sling connections from Phlo capability metadata (default: true)
|
|
105
|
+
- `PHLO_OBJECT_STORE` - Select the active `object_store` capability when more than one is installed
|
|
106
|
+
|
|
107
|
+
Notes:
|
|
108
|
+
|
|
109
|
+
- Decorator-backed replications need a real Sling destination. When `target_conn` is set and `object` is omitted, `phlo-sling` targets `<namespace>.<table_name>` automatically.
|
|
110
|
+
- If you set `SLING_AUTO_CONNECTIONS=false`, `phlo-sling` stops injecting `PHLO_POSTGRES` / `PHLO_S3` connection definitions into the environment.
|
|
111
|
+
- `PHLO_S3` now resolves from the active `object_store` capability instead of importing `phlo-minio` / `phlo-rustfs` directly. If both are installed, set `PHLO_OBJECT_STORE=minio` or `PHLO_OBJECT_STORE=rustfs`.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=45", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "phlo-sling"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Sling replication ingestion provider for Phlo"
|
|
9
|
+
readme = {text = "Sling-based database replication ingestion provider for Phlo.", content-type = "text/plain"}
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "Phlo Team", email = "team@phlo.dev"},
|
|
13
|
+
]
|
|
14
|
+
license = {text = "MIT"}
|
|
15
|
+
dependencies = [
|
|
16
|
+
"phlo>=0.1.0",
|
|
17
|
+
"sling>=1.0.0",
|
|
18
|
+
"pyarrow>=21.0.0",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.optional-dependencies]
|
|
22
|
+
dev = [
|
|
23
|
+
"pytest>=7.0",
|
|
24
|
+
"ruff>=0.1.0",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.entry-points."phlo.plugins.assets"]
|
|
28
|
+
sling = "phlo_sling.plugin:SlingAssetProvider"
|
|
29
|
+
|
|
30
|
+
[project.entry-points."phlo.plugins.ingestion_providers"]
|
|
31
|
+
sling = "phlo_sling.plugin:SlingIngestionProvider"
|
|
32
|
+
|
|
33
|
+
[project.entry-points."phlo.plugins.cli"]
|
|
34
|
+
sling = "phlo_sling.cli_plugin:SlingCliPlugin"
|
|
35
|
+
|
|
36
|
+
[tool.setuptools]
|
|
37
|
+
package-dir = {"" = "src"}
|
|
38
|
+
|
|
39
|
+
[tool.setuptools.packages.find]
|
|
40
|
+
where = ["src"]
|
|
41
|
+
|
|
42
|
+
[tool.ruff]
|
|
43
|
+
line-length = 100
|
|
44
|
+
target-version = "py311"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from phlo_sling.registry import SlingReplication
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def phlo_sling_assets(*args: Any, **kwargs: Any) -> Callable[..., Any]:
|
|
8
|
+
"""Lazily resolve and forward to the Sling asset discovery decorator."""
|
|
9
|
+
from phlo_sling.decorator import phlo_sling_assets as _phlo_sling_assets
|
|
10
|
+
|
|
11
|
+
return _phlo_sling_assets(*args, **kwargs)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def phlo_sling_replication(*args: Any, **kwargs: Any) -> Callable[..., Any]:
|
|
15
|
+
"""Lazily resolve and forward to the sling replication decorator factory."""
|
|
16
|
+
from phlo_sling.decorator import phlo_sling_replication as _phlo_sling_replication
|
|
17
|
+
|
|
18
|
+
return _phlo_sling_replication(*args, **kwargs)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_sling_assets() -> list[Any]:
|
|
22
|
+
"""Lazily resolve and return registered sling replication assets."""
|
|
23
|
+
from phlo_sling.decorator import get_sling_assets as _get_sling_assets
|
|
24
|
+
|
|
25
|
+
return _get_sling_assets()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
__all__ = ["SlingReplication", "get_sling_assets", "phlo_sling_assets", "phlo_sling_replication"]
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""CLI commands for Sling replication management."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import subprocess
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
|
|
10
|
+
from phlo.logging import get_logger
|
|
11
|
+
from phlo_sling.connections import apply_sling_connection_env
|
|
12
|
+
from phlo_sling.settings import get_settings
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@click.group("sling")
|
|
18
|
+
def sling_group() -> None:
|
|
19
|
+
"""Sling replication commands."""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@sling_group.command("run")
|
|
23
|
+
@click.option("--replication", "-r", type=click.Path(exists=True), help="Sling replication YAML.")
|
|
24
|
+
@click.option("--source", "-s", help="Source connection name.")
|
|
25
|
+
@click.option("--target", "-t", help="Target connection name.")
|
|
26
|
+
@click.option("--stream", help="Source stream (e.g., 'public.users').")
|
|
27
|
+
@click.option("--object", "target_object", help="Target object/table name.")
|
|
28
|
+
@click.option(
|
|
29
|
+
"--mode",
|
|
30
|
+
default=None,
|
|
31
|
+
help="Replication mode. Defaults to SLING_DEFAULT_MODE when omitted.",
|
|
32
|
+
)
|
|
33
|
+
def run_command(
|
|
34
|
+
replication: str | None,
|
|
35
|
+
source: str | None,
|
|
36
|
+
target: str | None,
|
|
37
|
+
stream: str | None,
|
|
38
|
+
target_object: str | None,
|
|
39
|
+
mode: str | None,
|
|
40
|
+
) -> None:
|
|
41
|
+
"""Run a Sling replication.
|
|
42
|
+
|
|
43
|
+
Either provide --replication YAML or --source/--stream/--target for ad-hoc runs.
|
|
44
|
+
"""
|
|
45
|
+
from sling import Replication, Sling
|
|
46
|
+
|
|
47
|
+
apply_sling_connection_env()
|
|
48
|
+
resolved_mode = mode or get_settings().sling_default_mode
|
|
49
|
+
|
|
50
|
+
if replication:
|
|
51
|
+
click.echo(f"Running replication from {replication}")
|
|
52
|
+
repl = Replication(file_path=replication)
|
|
53
|
+
repl.run()
|
|
54
|
+
elif source and stream:
|
|
55
|
+
if not target:
|
|
56
|
+
raise click.UsageError("Provide --target for ad-hoc runs.")
|
|
57
|
+
resolved_target_object = _resolve_target_object(stream=stream, target_object=target_object)
|
|
58
|
+
click.echo(f"Replicating {stream} from {source}")
|
|
59
|
+
config = Sling(
|
|
60
|
+
src_conn=source,
|
|
61
|
+
src_stream=stream,
|
|
62
|
+
tgt_conn=target,
|
|
63
|
+
tgt_object=resolved_target_object,
|
|
64
|
+
mode=resolved_mode,
|
|
65
|
+
)
|
|
66
|
+
config.run()
|
|
67
|
+
else:
|
|
68
|
+
raise click.UsageError("Provide --replication YAML or --source/--stream.")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@sling_group.command("conns")
|
|
72
|
+
@click.option("--auto/--no-auto", default=True, help="Include auto-discovered connections.")
|
|
73
|
+
def conns_command(auto: bool) -> None:
|
|
74
|
+
"""List available Sling connections.
|
|
75
|
+
|
|
76
|
+
Shows auto-discovered connections from Phlo capability metadata and any
|
|
77
|
+
connections from explicit env.yaml files.
|
|
78
|
+
"""
|
|
79
|
+
if auto:
|
|
80
|
+
from phlo_sling.connections import resolve_phlo_connections
|
|
81
|
+
|
|
82
|
+
connections = resolve_phlo_connections()
|
|
83
|
+
if connections:
|
|
84
|
+
click.echo("Auto-discovered connections:")
|
|
85
|
+
for name, config in connections.items():
|
|
86
|
+
conn_type = config.get("type", "unknown")
|
|
87
|
+
host = config.get("host") or config.get("endpoint", "")
|
|
88
|
+
click.echo(f" {name}: {conn_type} ({host})")
|
|
89
|
+
else:
|
|
90
|
+
click.echo("No auto-discovered connections found.")
|
|
91
|
+
|
|
92
|
+
click.echo("\nSling native connections:")
|
|
93
|
+
try:
|
|
94
|
+
result = _run_sling_cli_command(["conns", "list"])
|
|
95
|
+
click.echo(result.stdout, nl=False)
|
|
96
|
+
except Exception as exc:
|
|
97
|
+
click.echo(f" Could not list native connections: {exc}")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@sling_group.command("discover")
|
|
101
|
+
@click.argument("connection")
|
|
102
|
+
@click.option("--schema", help="Filter by schema name.")
|
|
103
|
+
@click.option(
|
|
104
|
+
"--format",
|
|
105
|
+
"output_format",
|
|
106
|
+
type=click.Choice(["table", "json"]),
|
|
107
|
+
default="table",
|
|
108
|
+
show_default=True,
|
|
109
|
+
help="Output format.",
|
|
110
|
+
)
|
|
111
|
+
def discover_command(connection: str, schema: str | None, output_format: str) -> None:
|
|
112
|
+
"""Discover available streams from a Sling connection.
|
|
113
|
+
|
|
114
|
+
Lists tables/views available in the source connection for use as
|
|
115
|
+
stream_name in @phlo_sling_replication decorators.
|
|
116
|
+
"""
|
|
117
|
+
apply_sling_connection_env()
|
|
118
|
+
|
|
119
|
+
click.echo(f"Discovering streams from {connection}...")
|
|
120
|
+
try:
|
|
121
|
+
command = ["conns", "discover", connection]
|
|
122
|
+
if schema:
|
|
123
|
+
command.extend(["--pattern", f"{schema}.*"])
|
|
124
|
+
|
|
125
|
+
result = _run_sling_cli_command(command)
|
|
126
|
+
if output_format == "json":
|
|
127
|
+
click.echo(json.dumps(_parse_discovery_output(result.stdout), indent=2))
|
|
128
|
+
return
|
|
129
|
+
|
|
130
|
+
click.echo(result.stdout, nl=False)
|
|
131
|
+
except Exception as exc:
|
|
132
|
+
raise click.ClickException(f"Discovery failed: {exc}") from exc
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _resolve_target_object(stream: str, target_object: str | None) -> str:
|
|
136
|
+
"""Resolve the destination object for an ad-hoc Sling run."""
|
|
137
|
+
if target_object:
|
|
138
|
+
return target_object
|
|
139
|
+
if "*" in stream:
|
|
140
|
+
raise click.UsageError("Provide --object when --stream uses a wildcard.")
|
|
141
|
+
return stream
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _get_sling_binary() -> str:
|
|
145
|
+
"""Return the Sling binary path, honoring package settings."""
|
|
146
|
+
settings = get_settings()
|
|
147
|
+
if settings.sling_binary_path:
|
|
148
|
+
return settings.sling_binary_path
|
|
149
|
+
|
|
150
|
+
from sling.bin import SLING_BIN
|
|
151
|
+
|
|
152
|
+
return SLING_BIN
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _run_sling_cli_command(args: list[str]) -> subprocess.CompletedProcess[str]:
|
|
156
|
+
"""Execute the Sling CLI and return captured output."""
|
|
157
|
+
return subprocess.run(
|
|
158
|
+
[_get_sling_binary(), *args],
|
|
159
|
+
check=True,
|
|
160
|
+
capture_output=True,
|
|
161
|
+
text=True,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _parse_discovery_output(output: str) -> list[dict[str, str]]:
|
|
166
|
+
"""Parse Sling's ASCII discovery table into JSON-serializable rows."""
|
|
167
|
+
lines = [line.rstrip() for line in output.splitlines() if line.strip()]
|
|
168
|
+
table_lines = [line for line in lines if "|" in line]
|
|
169
|
+
if len(table_lines) < 2:
|
|
170
|
+
return []
|
|
171
|
+
|
|
172
|
+
headers = [_normalize_column_name(part) for part in table_lines[0].split("|")]
|
|
173
|
+
rows: list[dict[str, str]] = []
|
|
174
|
+
for line in table_lines[1:]:
|
|
175
|
+
values = [part.strip() for part in line.split("|")]
|
|
176
|
+
if len(values) != len(headers):
|
|
177
|
+
continue
|
|
178
|
+
if all(set(value) <= {"-"} for value in values):
|
|
179
|
+
continue
|
|
180
|
+
rows.append(dict(zip(headers, values, strict=True)))
|
|
181
|
+
|
|
182
|
+
return rows
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _normalize_column_name(value: str) -> str:
|
|
186
|
+
"""Normalize discovery table headers for JSON output."""
|
|
187
|
+
return value.strip().lower().replace(" ", "_")
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""CLI plugin for Sling commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from phlo.plugins.base import CliCommandPlugin, PluginMetadata
|
|
8
|
+
from phlo_sling.cli_commands import sling_group
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SlingCliPlugin(CliCommandPlugin):
|
|
12
|
+
"""Expose Sling CLI command groups to the Phlo plugin system."""
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def metadata(self) -> PluginMetadata:
|
|
16
|
+
"""Return plugin metadata for CLI command discovery."""
|
|
17
|
+
return PluginMetadata(
|
|
18
|
+
name="sling",
|
|
19
|
+
version="0.1.0",
|
|
20
|
+
description="Sling replication CLI commands for Phlo",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
def get_cli_commands(self) -> list[click.Command]:
|
|
24
|
+
"""Return CLI commands contributed by this plugin."""
|
|
25
|
+
return [sling_group]
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Auto-generate Sling connections from Phlo capability metadata."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from collections.abc import MutableMapping
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from phlo.capabilities import list_capabilities, resolve_capability
|
|
10
|
+
from phlo.infrastructure.config import load_project_config
|
|
11
|
+
from phlo.logging import get_logger
|
|
12
|
+
from phlo_sling.settings import get_settings
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def resolve_phlo_connections() -> dict[str, dict[str, Any]]:
|
|
18
|
+
"""Build Sling connection definitions from installed Phlo package settings.
|
|
19
|
+
|
|
20
|
+
Inspects known Phlo capability providers (phlo-postgres, phlo-minio, etc.)
|
|
21
|
+
and generates Sling-compatible connection dicts.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Dict mapping connection name to Sling connection config.
|
|
25
|
+
"""
|
|
26
|
+
if not get_settings().sling_auto_connections:
|
|
27
|
+
logger.debug("sling_auto_connections_disabled")
|
|
28
|
+
return {}
|
|
29
|
+
|
|
30
|
+
connections: dict[str, dict[str, Any]] = {}
|
|
31
|
+
|
|
32
|
+
connections.update(_resolve_postgres_connection())
|
|
33
|
+
connections.update(_resolve_iceberg_connection())
|
|
34
|
+
connections.update(_resolve_s3_connection())
|
|
35
|
+
|
|
36
|
+
return connections
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _project_env_value(name: str) -> str | None:
|
|
40
|
+
"""Read a non-secret default from phlo.yaml env: when host os.environ lacks it."""
|
|
41
|
+
try:
|
|
42
|
+
project_config = load_project_config()
|
|
43
|
+
except Exception as exc:
|
|
44
|
+
logger.debug("project_env_lookup_failed", name=name, error=str(exc))
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
env_config = project_config.get("env", {})
|
|
48
|
+
if not isinstance(env_config, dict):
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
value = env_config.get(name)
|
|
52
|
+
return value if isinstance(value, str) and value else None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _ensure_capabilities_discovered(*kinds: str) -> None:
|
|
56
|
+
"""Populate the capability registry only when the requested kinds are absent."""
|
|
57
|
+
if any(list_capabilities(kind) for kind in kinds):
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
from phlo.capabilities.discovery import discover_capabilities
|
|
61
|
+
|
|
62
|
+
discover_capabilities()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _get_iceberg_settings():
|
|
66
|
+
"""Import phlo-iceberg settings lazily for optional package installs."""
|
|
67
|
+
from phlo_iceberg.settings import get_settings as get_iceberg_settings
|
|
68
|
+
|
|
69
|
+
return get_iceberg_settings()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _resolve_postgres_connection() -> dict[str, dict[str, Any]]:
|
|
73
|
+
"""Resolve Postgres connection from phlo-postgres settings."""
|
|
74
|
+
try:
|
|
75
|
+
from phlo_postgres.settings import get_settings as get_pg_settings
|
|
76
|
+
|
|
77
|
+
pg = get_pg_settings()
|
|
78
|
+
return {
|
|
79
|
+
"PHLO_POSTGRES": {
|
|
80
|
+
"type": "postgres",
|
|
81
|
+
"host": pg.postgres_host,
|
|
82
|
+
"port": pg.postgres_port,
|
|
83
|
+
"database": pg.postgres_db,
|
|
84
|
+
"user": pg.postgres_user,
|
|
85
|
+
"password": pg.postgres_password,
|
|
86
|
+
"schema": getattr(pg, "postgres_schema", "public"),
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
except (ImportError, Exception) as exc:
|
|
90
|
+
logger.debug("postgres_connection_skipped", error=str(exc))
|
|
91
|
+
return {}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _resolve_iceberg_connection() -> dict[str, dict[str, Any]]:
|
|
95
|
+
"""Resolve an Iceberg REST catalog connection from phlo-iceberg settings."""
|
|
96
|
+
try:
|
|
97
|
+
settings = _get_iceberg_settings()
|
|
98
|
+
ref = settings.iceberg_default_ref
|
|
99
|
+
config = settings.get_pyiceberg_catalog_config(ref)
|
|
100
|
+
return {
|
|
101
|
+
"PHLO_ICEBERG": {
|
|
102
|
+
"type": "iceberg",
|
|
103
|
+
"catalog_type": "rest",
|
|
104
|
+
"rest_uri": config["uri"],
|
|
105
|
+
"rest_warehouse": config["warehouse"],
|
|
106
|
+
"s3_endpoint": config["s3.endpoint"],
|
|
107
|
+
"s3_access_key_id": config["s3.access-key-id"],
|
|
108
|
+
"s3_secret_access_key": config["s3.secret-access-key"],
|
|
109
|
+
"s3_region": config["s3.region"],
|
|
110
|
+
"schema": settings.iceberg_default_namespace,
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
except (ImportError, Exception) as exc:
|
|
114
|
+
logger.debug("iceberg_connection_skipped", error=str(exc))
|
|
115
|
+
return {}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _resolve_s3_connection() -> dict[str, dict[str, Any]]:
|
|
119
|
+
"""Resolve S3 connection from the active object-store capability."""
|
|
120
|
+
_ensure_capabilities_discovered("object_store")
|
|
121
|
+
requested_name = os.environ.get("PHLO_OBJECT_STORE") or _project_env_value("PHLO_OBJECT_STORE")
|
|
122
|
+
resolution = resolve_capability("object_store", requested_name)
|
|
123
|
+
if resolution is None:
|
|
124
|
+
available = list_capabilities("object_store")
|
|
125
|
+
logger.debug(
|
|
126
|
+
"object_store_connection_skipped",
|
|
127
|
+
requested_name=requested_name,
|
|
128
|
+
available=available,
|
|
129
|
+
)
|
|
130
|
+
return {}
|
|
131
|
+
|
|
132
|
+
provider = resolution.provider
|
|
133
|
+
if hasattr(provider, "to_sling_connection"):
|
|
134
|
+
config = provider.to_sling_connection()
|
|
135
|
+
else:
|
|
136
|
+
config = {
|
|
137
|
+
key: value
|
|
138
|
+
for key, value in resolution.metadata.items()
|
|
139
|
+
if key in {"type", "endpoint", "access_key_id", "secret_access_key", "region"}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if not config:
|
|
143
|
+
logger.debug(
|
|
144
|
+
"object_store_connection_missing_config",
|
|
145
|
+
capability_name=resolution.name,
|
|
146
|
+
)
|
|
147
|
+
return {}
|
|
148
|
+
|
|
149
|
+
return {"PHLO_S3": config}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def export_sling_env(connections: dict[str, dict[str, Any]]) -> dict[str, str]:
|
|
153
|
+
"""Convert connection dicts to Sling environment variable format.
|
|
154
|
+
|
|
155
|
+
Sling expects connections as environment variables with JSON values.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
connections: Dict of connection name → connection config.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Dict of environment variable name → JSON string value.
|
|
162
|
+
"""
|
|
163
|
+
import json
|
|
164
|
+
|
|
165
|
+
env_vars: dict[str, str] = {}
|
|
166
|
+
for name, config in connections.items():
|
|
167
|
+
env_vars[name] = json.dumps(config)
|
|
168
|
+
return env_vars
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def apply_sling_connection_env(environ: MutableMapping[str, str] | None = None) -> dict[str, str]:
|
|
172
|
+
"""Inject resolved Sling connections into an environment mapping.
|
|
173
|
+
|
|
174
|
+
Existing variables win over auto-generated values.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
environ: Environment mapping to mutate. Defaults to ``os.environ``.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Dict of injected environment variables.
|
|
181
|
+
"""
|
|
182
|
+
target_env = os.environ if environ is None else environ
|
|
183
|
+
env_vars = export_sling_env(resolve_phlo_connections())
|
|
184
|
+
for name, value in env_vars.items():
|
|
185
|
+
target_env.setdefault(name, value)
|
|
186
|
+
return env_vars
|