phlo-clickhouse 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: phlo-clickhouse
3
+ Version: 0.1.0
4
+ Summary: ClickHouse service and resource plugin for Phlo
5
+ Author-email: Phlo Team <team@phlo.dev>
6
+ License: MIT
7
+ Requires-Python: >=3.11
8
+ Description-Content-Type: text/plain
9
+ Requires-Dist: phlo>=0.1.0
10
+ Requires-Dist: clickhouse-connect>=0.8.0
11
+ Requires-Dist: pyyaml>=6.0.1
12
+ Requires-Dist: pandas>=2.0.0
13
+ Requires-Dist: pyarrow>=12.0.0
14
+ Provides-Extra: dbt
15
+ Requires-Dist: dbt-core>=1.8; extra == "dbt"
16
+ Requires-Dist: dbt-clickhouse>=1.8; extra == "dbt"
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=7.0; extra == "dev"
19
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
20
+
21
+ ClickHouse service and resource plugin for Phlo.
@@ -0,0 +1,72 @@
1
+ # phlo-clickhouse
2
+
3
+ ClickHouse service and resource plugin for Phlo.
4
+
5
+ ## Overview
6
+
7
+ `phlo-clickhouse` provides ClickHouse as a combined `table_store`, `query_engine`, and `publish_target` capability in Phlo. Unlike the existing bundled stack (DLT -> Iceberg -> Trino/dbt -> Postgres), ClickHouse can serve all three data plane roles in a single service.
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ pip install phlo-clickhouse
13
+ ```
14
+
15
+ ## Usage
16
+
17
+ ### Starting ClickHouse
18
+
19
+ ```bash
20
+ phlo services start --service clickhouse
21
+ ```
22
+
23
+ This starts both the ClickHouse server and the setup container that creates the default databases (`raw`, `staging`, `curated`, `marts`).
24
+
25
+ ### Running Queries
26
+
27
+ ```bash
28
+ phlo clickhouse query "SELECT version()"
29
+ phlo clickhouse query --file query.sql
30
+ ```
31
+
32
+ ### Checking Status
33
+
34
+ ```bash
35
+ phlo clickhouse status
36
+ ```
37
+
38
+ ## Configuration
39
+
40
+ The following environment variables can be used to configure ClickHouse:
41
+
42
+ | Variable | Default | Description |
43
+ |----------|---------|-------------|
44
+ | `CLICKHOUSE_VERSION` | `latest` | ClickHouse server version tag |
45
+ | `CLICKHOUSE_HTTP_PORT` | `8123` | ClickHouse HTTP interface port |
46
+ | `CLICKHOUSE_NATIVE_PORT` | `19000` | ClickHouse native protocol port |
47
+ | `CLICKHOUSE_METRICS_PORT` | `9363` | ClickHouse Prometheus metrics port |
48
+ | `CLICKHOUSE_USER` | `default` | ClickHouse default username |
49
+ | `CLICKHOUSE_PASSWORD` | | ClickHouse default user password |
50
+ | `CLICKHOUSE_DB` | `default` | Default ClickHouse database |
51
+
52
+ ## Capabilities
53
+
54
+ This plugin registers the following capabilities:
55
+
56
+ - **Table Store**: ClickHouse MergeTree engine
57
+ - **Query Engine**: ClickHouse SQL
58
+ - **Publish Target**: ClickHouse marts database
59
+
60
+ ## dbt Integration
61
+
62
+ Install with dbt support:
63
+
64
+ ```bash
65
+ pip install phlo-clickhouse[dbt]
66
+ ```
67
+
68
+ This provides the `dbt-clickhouse` adapter for running dbt transforms against ClickHouse.
69
+
70
+ ## License
71
+
72
+ MIT
@@ -0,0 +1,55 @@
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "phlo-clickhouse"
7
+ version = "0.1.0"
8
+ description = "ClickHouse service and resource plugin for Phlo"
9
+ readme = {text = "ClickHouse service and resource plugin for Phlo.", content-type = "text/plain"}
10
+ requires-python = ">=3.11"
11
+ authors = [
12
+ {name = "Phlo Team", email = "team@phlo.dev"},
13
+ ]
14
+ license = {text = "MIT"}
15
+ dependencies = [
16
+ "phlo>=0.1.0",
17
+ "clickhouse-connect>=0.8.0",
18
+ "pyyaml>=6.0.1",
19
+ "pandas>=2.0.0",
20
+ "pyarrow>=12.0.0",
21
+ ]
22
+
23
+ [project.optional-dependencies]
24
+ dbt = [
25
+ "dbt-core>=1.8",
26
+ "dbt-clickhouse>=1.8",
27
+ ]
28
+ dev = [
29
+ "pytest>=7.0",
30
+ "ruff>=0.1.0",
31
+ ]
32
+
33
+ [project.entry-points."phlo.plugins.services"]
34
+ clickhouse = "phlo_clickhouse.plugin:ClickHouseServicePlugin"
35
+ clickhouse-setup = "phlo_clickhouse.plugin:ClickHouseSetupServicePlugin"
36
+
37
+ [project.entry-points."phlo.plugins.resources"]
38
+ clickhouse = "phlo_clickhouse.plugin:ClickHouseResourceProvider"
39
+
40
+ [project.entry-points."phlo.plugins.cli"]
41
+ clickhouse = "phlo_clickhouse.cli_plugin:ClickHouseCliPlugin"
42
+
43
+ [tool.setuptools]
44
+ package-dir = {"" = "src"}
45
+ include-package-data = true
46
+
47
+ [tool.setuptools.packages.find]
48
+ where = ["src"]
49
+
50
+ [tool.setuptools.package-data]
51
+ phlo_clickhouse = ["service.yaml", "clickhouse-setup.yaml"]
52
+
53
+ [tool.ruff]
54
+ line-length = 100
55
+ target-version = "py311"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,19 @@
1
+ """ClickHouse service and resource plugin package."""
2
+
3
+ from phlo_clickhouse.plugin import (
4
+ ClickHouseResourceProvider,
5
+ ClickHouseServicePlugin,
6
+ ClickHouseSetupServicePlugin,
7
+ )
8
+ from phlo_clickhouse.resource import ClickHouseResource
9
+ from phlo_clickhouse.settings import ClickHouseSettings, get_settings
10
+
11
+ __all__ = [
12
+ "ClickHouseResource",
13
+ "ClickHouseResourceProvider",
14
+ "ClickHouseServicePlugin",
15
+ "ClickHouseSetupServicePlugin",
16
+ "ClickHouseSettings",
17
+ "get_settings",
18
+ ]
19
+ __version__ = "0.1.0"
@@ -0,0 +1,154 @@
1
+ """CLI commands for the ClickHouse data plane service."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from shutil import which
7
+ from subprocess import TimeoutExpired
8
+
9
+ import click
10
+
11
+ from phlo.cli.infrastructure.command import CommandError, run_command
12
+ from phlo.cli.infrastructure.compose import compose_base_cmd
13
+ from phlo.cli.infrastructure.utils import get_project_name
14
+ from phlo.logging import get_logger
15
+
16
+ logger = get_logger(__name__)
17
+
18
+
19
+ def _read_query(*, query: str | None, file: Path | None) -> str:
20
+ """Return SQL text from inline query or file input."""
21
+ if query and file:
22
+ raise click.ClickException("Use either an inline query or --file, not both.")
23
+ if file is not None:
24
+ try:
25
+ sql = file.read_text(encoding="utf-8")
26
+ except OSError as exc:
27
+ raise click.ClickException(f"Failed to read SQL file: {file}") from exc
28
+ if sql.strip():
29
+ return sql
30
+ raise click.ClickException(f"SQL file is empty: {file}")
31
+ if query and query.strip():
32
+ return query
33
+ raise click.ClickException("Provide a SQL query argument or --file.")
34
+
35
+
36
+ def _ensure_phlo_dir() -> Path:
37
+ """Return the local .phlo directory or exit with a clear error."""
38
+ phlo_dir = Path.cwd() / ".phlo"
39
+ if phlo_dir.exists():
40
+ return phlo_dir
41
+ raise click.ClickException(".phlo directory not found. Run 'phlo services init' first.")
42
+
43
+
44
+ def _require_docker() -> None:
45
+ """Validate that Docker is installed and responsive."""
46
+ if which("docker") is None:
47
+ raise click.ClickException("docker command not found.")
48
+ try:
49
+ result = run_command(
50
+ ["docker", "info"],
51
+ timeout_seconds=10,
52
+ capture_output=True,
53
+ check=False,
54
+ )
55
+ except TimeoutExpired as exc:
56
+ raise click.ClickException("docker info timed out.") from exc
57
+ if result.returncode == 0:
58
+ return
59
+ raise click.ClickException("Docker is not running.")
60
+
61
+
62
+ @click.group(name="clickhouse")
63
+ def clickhouse_group() -> None:
64
+ """Query and inspect the ClickHouse data plane service."""
65
+
66
+
67
+ @clickhouse_group.command(name="query")
68
+ @click.argument("query", required=False)
69
+ @click.option(
70
+ "--file",
71
+ "query_file",
72
+ type=click.Path(exists=True, dir_okay=False, path_type=Path),
73
+ )
74
+ @click.option("--format", "output_format", default="TabSeparatedRaw", show_default=True)
75
+ @click.option("--timeout", "timeout_seconds", default=30, show_default=True, type=int)
76
+ def clickhouse_query(
77
+ query: str | None,
78
+ query_file: Path | None,
79
+ output_format: str,
80
+ timeout_seconds: int,
81
+ ) -> None:
82
+ """Execute a SQL query against the running ClickHouse service."""
83
+ _require_docker()
84
+ phlo_dir = _ensure_phlo_dir()
85
+ project_name = get_project_name()
86
+ sql = _read_query(query=query, file=query_file)
87
+
88
+ cmd = compose_base_cmd(phlo_dir=phlo_dir, project_name=project_name)
89
+ cmd.extend(
90
+ [
91
+ "exec",
92
+ "-T",
93
+ "clickhouse",
94
+ "clickhouse-client",
95
+ "--multiquery",
96
+ "--format",
97
+ output_format,
98
+ "--query",
99
+ sql,
100
+ ]
101
+ )
102
+
103
+ try:
104
+ result = run_command(
105
+ cmd,
106
+ timeout_seconds=timeout_seconds,
107
+ capture_output=True,
108
+ check=True,
109
+ )
110
+ except CommandError as exc:
111
+ stderr = exc.stderr.strip()
112
+ raise click.ClickException(stderr or str(exc)) from exc
113
+ except TimeoutExpired as exc:
114
+ raise click.ClickException(f"Query timed out after {timeout_seconds} seconds.") from exc
115
+
116
+ if result.stdout:
117
+ click.echo(result.stdout, nl=False)
118
+
119
+
120
+ @clickhouse_group.command(name="status")
121
+ def clickhouse_status() -> None:
122
+ """Show ClickHouse service status and basic server info."""
123
+ _require_docker()
124
+ phlo_dir = _ensure_phlo_dir()
125
+ project_name = get_project_name()
126
+
127
+ cmd = compose_base_cmd(phlo_dir=phlo_dir, project_name=project_name)
128
+ cmd.extend(
129
+ [
130
+ "exec",
131
+ "-T",
132
+ "clickhouse",
133
+ "clickhouse-client",
134
+ "--query",
135
+ "SELECT version() AS version, uptime() AS uptime_seconds, "
136
+ "currentDatabase() AS current_database",
137
+ ]
138
+ )
139
+
140
+ try:
141
+ result = run_command(
142
+ cmd,
143
+ timeout_seconds=10,
144
+ capture_output=True,
145
+ check=True,
146
+ )
147
+ except CommandError as exc:
148
+ stderr = exc.stderr.strip()
149
+ raise click.ClickException(stderr or str(exc)) from exc
150
+ except TimeoutExpired as exc:
151
+ raise click.ClickException("Status check timed out.") from exc
152
+
153
+ if result.stdout:
154
+ click.echo(result.stdout, nl=False)
@@ -0,0 +1,24 @@
1
+ """CLI plugin for ClickHouse commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import click
6
+
7
+ from phlo.plugins.base import CliCommandPlugin, PluginMetadata
8
+
9
+ from phlo_clickhouse.cli import clickhouse_group
10
+
11
+
12
+ class ClickHouseCliPlugin(CliCommandPlugin):
13
+ """Register ClickHouse CLI commands."""
14
+
15
+ @property
16
+ def metadata(self) -> PluginMetadata:
17
+ return PluginMetadata(
18
+ name="clickhouse",
19
+ version="0.1.0",
20
+ description="CLI commands for ClickHouse data plane access",
21
+ )
22
+
23
+ def get_cli_commands(self) -> list[click.Command]:
24
+ return [clickhouse_group]
@@ -0,0 +1,30 @@
1
+ # Companion service for clickhouse - creates required databases
2
+ name: clickhouse-setup
3
+ description: Initialize ClickHouse databases for data plane
4
+ category: data
5
+ default: false
6
+
7
+ image: clickhouse/clickhouse-server:${CLICKHOUSE_VERSION:-latest}
8
+
9
+ depends_on:
10
+ - clickhouse
11
+
12
+ compose:
13
+ restart: "no"
14
+ entrypoint: >
15
+ /bin/sh -c "
16
+ until clickhouse-client --host clickhouse --user $${CLICKHOUSE_USER:-default}
17
+ --password $${CLICKHOUSE_PASSWORD:-}
18
+ --query 'SELECT 1' 2>/dev/null; do
19
+ echo 'Waiting for ClickHouse...' && sleep 2;
20
+ done &&
21
+ clickhouse-client --host clickhouse --user $${CLICKHOUSE_USER:-default}
22
+ --password $${CLICKHOUSE_PASSWORD:-}
23
+ --multiquery --query '
24
+ CREATE DATABASE IF NOT EXISTS raw;
25
+ CREATE DATABASE IF NOT EXISTS staging;
26
+ CREATE DATABASE IF NOT EXISTS curated;
27
+ CREATE DATABASE IF NOT EXISTS marts;
28
+ ' &&
29
+ echo 'ClickHouse databases created successfully'
30
+ "
@@ -0,0 +1,145 @@
1
+ """ClickHouse service and resource provider plugins."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from importlib import resources
6
+ from time import perf_counter
7
+ from typing import Any
8
+
9
+ import yaml
10
+
11
+ from phlo.capabilities import (
12
+ CapabilitySupport,
13
+ PublishTargetSpec,
14
+ ResourceSpec,
15
+ TableStoreSpec,
16
+ )
17
+ from phlo.capabilities.specs import QueryEngineSpec
18
+ from phlo.logging import get_logger
19
+ from phlo.plugins import PluginMetadata, ResourceProviderPlugin, ServicePlugin
20
+ from phlo_clickhouse.publish_target import ClickHousePublishTarget
21
+ from phlo_clickhouse.resource import CLICKHOUSE_QUERY_ENGINE_SUPPORT, ClickHouseResource
22
+ from phlo_clickhouse.settings import get_settings as get_clickhouse_settings
23
+
24
+ logger = get_logger(__name__)
25
+
26
+
27
+ def _load_service_definition(resource_name: str, service_name: str) -> dict[str, Any]:
28
+ start = perf_counter()
29
+ logger.info(
30
+ "clickhouse_service_definition_load_started",
31
+ service_name=service_name,
32
+ resource_name=resource_name,
33
+ )
34
+ service_path = resources.files("phlo_clickhouse").joinpath(resource_name)
35
+ try:
36
+ data = yaml.safe_load(service_path.read_text(encoding="utf-8"))
37
+ except Exception:
38
+ logger.error(
39
+ "clickhouse_service_definition_load_failed",
40
+ service_name=service_name,
41
+ resource_name=resource_name,
42
+ elapsed_ms=round((perf_counter() - start) * 1000, 2),
43
+ exc_info=True,
44
+ )
45
+ raise
46
+
47
+ service_count = len(data.get("services", {})) if isinstance(data, dict) else None
48
+ logger.info(
49
+ "clickhouse_service_definition_load_completed",
50
+ service_name=service_name,
51
+ resource_name=resource_name,
52
+ elapsed_ms=round((perf_counter() - start) * 1000, 2),
53
+ service_count=service_count,
54
+ )
55
+ return data
56
+
57
+
58
+ class ClickHouseServicePlugin(ServicePlugin):
59
+ """Service plugin for ClickHouse."""
60
+
61
+ @property
62
+ def metadata(self) -> PluginMetadata:
63
+ return PluginMetadata(
64
+ name="clickhouse",
65
+ version="0.1.0",
66
+ description="ClickHouse analytical database for data plane",
67
+ author="Phlo Team",
68
+ tags=["data", "query", "storage"],
69
+ )
70
+
71
+ @property
72
+ def service_definition(self) -> dict[str, Any]:
73
+ return _load_service_definition("service.yaml", "clickhouse")
74
+
75
+
76
+ class ClickHouseSetupServicePlugin(ServicePlugin):
77
+ """Service plugin for ClickHouse database initialization."""
78
+
79
+ @property
80
+ def metadata(self) -> PluginMetadata:
81
+ return PluginMetadata(
82
+ name="clickhouse-setup",
83
+ version="0.1.0",
84
+ description="Initialize ClickHouse databases for data plane",
85
+ author="Phlo Team",
86
+ tags=["data", "bootstrap"],
87
+ )
88
+
89
+ @property
90
+ def service_definition(self) -> dict[str, Any]:
91
+ return _load_service_definition("clickhouse-setup.yaml", "clickhouse-setup")
92
+
93
+
94
+ class ClickHouseResourceProvider(ResourceProviderPlugin):
95
+ """Resource provider plugin for ClickHouse."""
96
+
97
+ @property
98
+ def metadata(self) -> PluginMetadata:
99
+ return PluginMetadata(
100
+ name="clickhouse",
101
+ version="0.1.0",
102
+ description="ClickHouse resource for Phlo",
103
+ support=CapabilitySupport(),
104
+ )
105
+
106
+ def get_resources(self) -> list[ResourceSpec]:
107
+ return [ResourceSpec(name="clickhouse", resource=ClickHouseResource())]
108
+
109
+ def get_table_stores(self) -> list[TableStoreSpec]:
110
+ return [
111
+ TableStoreSpec(
112
+ name="clickhouse",
113
+ provider=ClickHouseResource(),
114
+ support=CapabilitySupport(
115
+ supports_snapshots=False,
116
+ supports_schema_evolution=True,
117
+ ),
118
+ )
119
+ ]
120
+
121
+ def get_query_engines(self) -> list[QueryEngineSpec]:
122
+ settings = get_clickhouse_settings()
123
+ return [
124
+ QueryEngineSpec(
125
+ name="clickhouse",
126
+ provider=ClickHouseResource(),
127
+ metadata={
128
+ "host": settings.clickhouse_host,
129
+ "port": settings.clickhouse_http_port,
130
+ "native_port": settings.clickhouse_native_port,
131
+ "default_database": settings.clickhouse_db,
132
+ "service_type": "ClickHouse",
133
+ },
134
+ support=CLICKHOUSE_QUERY_ENGINE_SUPPORT,
135
+ )
136
+ ]
137
+
138
+ def get_publish_targets(self) -> list[PublishTargetSpec]:
139
+ return [
140
+ PublishTargetSpec(
141
+ name="clickhouse",
142
+ provider=ClickHousePublishTarget(),
143
+ metadata={"target_system": "clickhouse", "role": "serving"},
144
+ )
145
+ ]
@@ -0,0 +1,16 @@
1
+ """ClickHouse publish target for mart publishing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+
7
+ from phlo_clickhouse.resource import ClickHouseResource
8
+
9
+
10
+ @dataclass
11
+ class ClickHousePublishTarget:
12
+ """Publish target backed by ClickHouse."""
13
+
14
+ resource: ClickHouseResource = field(default_factory=ClickHouseResource)
15
+ target_system: str = "clickhouse"
16
+ default_schema: str = "marts"
@@ -0,0 +1,239 @@
1
+ """ClickHouse resource for executing queries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ import time
8
+ from typing import TYPE_CHECKING, Any, Iterable
9
+
10
+ import clickhouse_connect
11
+ import pandas as pd
12
+
13
+ from phlo.capabilities import CapabilitySupport
14
+ from phlo.logging import get_logger
15
+ from phlo_clickhouse.settings import get_settings as get_clickhouse_settings
16
+
17
+ if TYPE_CHECKING:
18
+ from clickhouse_connect.driver import Client
19
+
20
+ logger = get_logger(__name__)
21
+
22
+ CLICKHOUSE_QUERY_ENGINE_SUPPORT = CapabilitySupport(
23
+ supports_snapshots=False,
24
+ supports_time_travel=False,
25
+ )
26
+
27
+
28
+ @dataclass
29
+ class ClickHouseResource:
30
+ """Resource wrapper for ClickHouse connections and query execution."""
31
+
32
+ host: str | None = None
33
+ port: int | None = None
34
+ user: str | None = None
35
+ password: str | None = None
36
+ database: str | None = None
37
+ secure: bool | None = None
38
+
39
+ def _settings(self):
40
+ return get_clickhouse_settings()
41
+
42
+ def get_client(self) -> "Client":
43
+ """Create and return a ClickHouse client."""
44
+ settings = self._settings()
45
+ return clickhouse_connect.get_client(
46
+ host=self.host or settings.clickhouse_host,
47
+ port=self.port or settings.clickhouse_http_port,
48
+ username=self.user or settings.clickhouse_user,
49
+ password=self.password or settings.clickhouse_password,
50
+ database=self.database or settings.clickhouse_db,
51
+ secure=self.secure if self.secure is not None else settings.clickhouse_secure,
52
+ )
53
+
54
+ def execute(self, sql: str, params: Iterable[object] | None = None) -> list[list[Any]]:
55
+ """Execute SQL and return query results."""
56
+ client = self.get_client()
57
+ try:
58
+ result = client.query(sql, parameters=list(params or []))
59
+ return result.result_rows
60
+ finally:
61
+ client.close()
62
+
63
+ def command(self, sql: str) -> Any:
64
+ """Execute a command (DDL/DML) that returns a single value or None."""
65
+ client = self.get_client()
66
+ try:
67
+ return client.command(sql)
68
+ finally:
69
+ client.close()
70
+
71
+ def wait_ready(
72
+ self,
73
+ *,
74
+ timeout: float = 60.0,
75
+ interval: float = 1.0,
76
+ ) -> None:
77
+ """Wait for ClickHouse to accept queries."""
78
+ deadline = time.monotonic() + timeout
79
+ last_error: Exception | None = None
80
+ interval = max(interval, 0.0)
81
+ settings = self._settings()
82
+ while time.monotonic() < deadline:
83
+ try:
84
+ self.command("SELECT 1")
85
+ logger.info(
86
+ "clickhouse_wait_ready_succeeded",
87
+ host=self.host or settings.clickhouse_host,
88
+ port=self.port or settings.clickhouse_http_port,
89
+ )
90
+ return
91
+ except Exception as exc: # noqa: BLE001
92
+ last_error = exc
93
+ logger.debug(
94
+ "clickhouse_wait_ready_retry",
95
+ host=self.host or settings.clickhouse_host,
96
+ port=self.port or settings.clickhouse_http_port,
97
+ retry_interval_seconds=interval,
98
+ )
99
+ time.sleep(interval)
100
+ logger.error(
101
+ "clickhouse_wait_ready_timeout",
102
+ host=self.host or settings.clickhouse_host,
103
+ port=self.port or settings.clickhouse_http_port,
104
+ timeout_seconds=timeout,
105
+ )
106
+ raise TimeoutError(f"ClickHouse not ready after {timeout:.1f}s") from last_error
107
+
108
+ def _escape_identifier(self, name: str) -> str:
109
+ """Escape a ClickHouse identifier (database, table, column) with backticks."""
110
+ return f"`{name.replace('`', '``')}`"
111
+
112
+ def ensure_table(
113
+ self,
114
+ *,
115
+ table_name: str,
116
+ schema: Any,
117
+ partition_spec: Any = None,
118
+ override_ref: str | None = None,
119
+ ) -> Any:
120
+ """Ensure a destination table exists."""
121
+ settings = self._settings()
122
+ database = self._escape_identifier(self.database or settings.clickhouse_db)
123
+ table = self._escape_identifier(table_name)
124
+
125
+ columns_def = self._schema_to_columns(schema)
126
+
127
+ partition_by = ""
128
+ if partition_spec:
129
+ partition_cols = [self._escape_identifier(p[0]) for p in partition_spec]
130
+ partition_by = f"PARTITION BY ({', '.join(partition_cols)})"
131
+
132
+ sql = f"CREATE TABLE IF NOT EXISTS {database}.{table} ({columns_def}) ENGINE = MergeTree() {partition_by} ORDER BY tuple()"
133
+
134
+ return self.command(sql)
135
+
136
+ def append_parquet(
137
+ self,
138
+ *,
139
+ table_name: str,
140
+ data_path: str | Path,
141
+ override_ref: str | None = None,
142
+ ) -> dict[str, int]:
143
+ """Append staged parquet data to a destination table."""
144
+ settings = self._settings()
145
+ database = self._escape_identifier(self.database or settings.clickhouse_db)
146
+ table = self._escape_identifier(table_name)
147
+
148
+ data_path_str = str(data_path)
149
+ df = pd.read_parquet(data_path_str)
150
+ row_count = len(df)
151
+
152
+ client = self.get_client()
153
+ try:
154
+ client.insert_df(f"{database}.{table}", df)
155
+ finally:
156
+ client.close()
157
+
158
+ return {"rows_inserted": row_count}
159
+
160
+ def merge_parquet(
161
+ self,
162
+ *,
163
+ table_name: str,
164
+ data_path: str | Path,
165
+ unique_key: str,
166
+ override_ref: str | None = None,
167
+ ) -> dict[str, int]:
168
+ """Merge staged parquet data into a destination table."""
169
+ settings = self._settings()
170
+ database = self._escape_identifier(self.database or settings.clickhouse_db)
171
+ table = self._escape_identifier(table_name)
172
+ key = self._escape_identifier(unique_key)
173
+
174
+ data_path_str = str(data_path)
175
+ df = pd.read_parquet(data_path_str)
176
+ row_count = len(df)
177
+
178
+ unique_keys = df[unique_key].tolist()
179
+ if unique_keys:
180
+ keys_str = ", ".join(f"'{k}'" for k in unique_keys)
181
+ delete_sql = f"ALTER TABLE {database}.{table} DELETE WHERE {key} IN ({keys_str})"
182
+ self.command(delete_sql)
183
+
184
+ client = self.get_client()
185
+ try:
186
+ client.insert_df(f"{database}.{table}", df)
187
+ finally:
188
+ client.close()
189
+
190
+ return {"rows_inserted": row_count, "rows_deleted": len(unique_keys)}
191
+
192
+ def _schema_to_columns(self, schema: Any) -> str:
193
+ """Convert a schema to ClickHouse column definitions."""
194
+ if hasattr(schema, "to_schema"):
195
+ schema = schema.to_schema()
196
+
197
+ if hasattr(schema, "columns"):
198
+ columns = []
199
+ for name, col in schema.columns.items():
200
+ ch_type = self._pandas_type_to_clickhouse(col.dtype)
201
+ columns.append(f"{name} {ch_type}")
202
+ return ", ".join(columns)
203
+
204
+ if hasattr(schema, "fields"):
205
+ columns = []
206
+ for field in schema.fields:
207
+ ch_type = self._python_type_to_clickhouse(field.type)
208
+ columns.append(f"{field.name} {ch_type}")
209
+ return ", ".join(columns)
210
+
211
+ raise TypeError(
212
+ f"Unsupported schema type: {type(schema).__name__}. Expected a schema with 'columns' or 'fields' attribute."
213
+ )
214
+
215
+ def _pandas_type_to_clickhouse(self, dtype: Any) -> str:
216
+ """Convert pandas dtype to ClickHouse type."""
217
+ import pandas as pd
218
+
219
+ if pd.api.types.is_integer_dtype(dtype):
220
+ return "Int64"
221
+ if pd.api.types.is_float_dtype(dtype):
222
+ return "Float64"
223
+ if pd.api.types.is_bool_dtype(dtype):
224
+ return "UInt8"
225
+ if pd.api.types.is_datetime64_any_dtype(dtype):
226
+ return "DateTime64"
227
+ if pd.api.types.is_string_dtype(dtype):
228
+ return "String"
229
+ return "String"
230
+
231
+ def _python_type_to_clickhouse(self, py_type: Any) -> str:
232
+ """Convert Python type to ClickHouse type."""
233
+ type_map = {
234
+ int: "Int64",
235
+ float: "Float64",
236
+ str: "String",
237
+ bool: "UInt8",
238
+ }
239
+ return type_map.get(py_type, "String")
@@ -0,0 +1,85 @@
1
+ name: clickhouse
2
+ description: ClickHouse analytical database for data plane
3
+ category: data
4
+ default: false
5
+
6
+ image: clickhouse/clickhouse-server:${CLICKHOUSE_VERSION:-latest}
7
+
8
+ compose:
9
+ restart: unless-stopped
10
+ labels:
11
+ phlo.metrics.enabled: "true"
12
+ phlo.metrics.port: "clickhouse:9363"
13
+ phlo.metrics.path: "/metrics"
14
+ phlo.grafana.datasource: "true"
15
+ phlo.grafana.datasource.type: "grafana-clickhouse-datasource"
16
+ phlo.grafana.datasource.name: "ClickHouse"
17
+ phlo.grafana.datasource.url: "clickhouse:9000"
18
+ traefik.enable: "true"
19
+ traefik.http.routers.clickhouse.rule: "Host(`clickhouse.${TRAEFIK_DOMAIN:-phlo.localhost}`)"
20
+ traefik.http.routers.clickhouse.entrypoints: "web"
21
+ traefik.http.services.clickhouse.loadbalancer.server.port: "8123"
22
+ environment:
23
+ CLICKHOUSE_USER: ${CLICKHOUSE_USER:-default}
24
+ CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-}
25
+ CLICKHOUSE_DB: ${CLICKHOUSE_DB:-default}
26
+ CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: ${CLICKHOUSE_ACCESS_MANAGEMENT:-1}
27
+ ports:
28
+ - "${CLICKHOUSE_HTTP_PORT:-8123}:8123"
29
+ - "${CLICKHOUSE_NATIVE_PORT:-19000}:9000"
30
+ - "${CLICKHOUSE_METRICS_PORT:-9363}:9363"
31
+ volumes:
32
+ - ./volumes/clickhouse/data:/var/lib/clickhouse
33
+ - ./volumes/clickhouse/logs:/var/log/clickhouse-server
34
+ healthcheck:
35
+ test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8123/ping"]
36
+ interval: 10s
37
+ timeout: 5s
38
+ retries: 10
39
+ start_period: 30s
40
+ ulimits:
41
+ nofile:
42
+ soft: 262144
43
+ hard: 262144
44
+
45
+ env_vars:
46
+ CLICKHOUSE_VERSION:
47
+ default: "latest"
48
+ description: ClickHouse server version tag
49
+ CLICKHOUSE_HTTP_PORT:
50
+ default: 8123
51
+ description: ClickHouse HTTP interface port
52
+ CLICKHOUSE_NATIVE_PORT:
53
+ default: 19000
54
+ description: ClickHouse native protocol port
55
+ CLICKHOUSE_METRICS_PORT:
56
+ default: 9363
57
+ description: ClickHouse Prometheus metrics port
58
+ CLICKHOUSE_USER:
59
+ default: "default"
60
+ description: ClickHouse default username
61
+ CLICKHOUSE_PASSWORD:
62
+ default: ""
63
+ description: ClickHouse default user password
64
+ secret: true
65
+ CLICKHOUSE_DB:
66
+ default: "default"
67
+ description: Default ClickHouse database
68
+ CLICKHOUSE_ACCESS_MANAGEMENT:
69
+ default: "1"
70
+ description: "Enable SQL-driven access management (1=enabled, 0=disabled)"
71
+ CLICKHOUSE_HTTPS_PORT:
72
+ default: ""
73
+ description: "HTTPS port (empty = disabled)"
74
+ CLICKHOUSE_TLS_CERT_FILE:
75
+ default: ""
76
+ description: "Path to TLS certificate file"
77
+ CLICKHOUSE_TLS_KEY_FILE:
78
+ default: ""
79
+ description: "Path to TLS private key file"
80
+ CLICKHOUSE_MAX_MEMORY_USAGE:
81
+ default: ""
82
+ description: "Maximum memory usage per query (e.g., 10000000000 for ~10GB)"
83
+ CLICKHOUSE_MAX_SERVER_MEMORY_USAGE_RATIO:
84
+ default: ""
85
+ description: "Max fraction of total RAM the server can use (e.g., 0.9)"
@@ -0,0 +1,37 @@
1
+ """ClickHouse settings."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from functools import lru_cache
6
+
7
+ from pydantic import Field
8
+
9
+ from phlo.config.base import BaseConfig
10
+
11
+
12
+ class ClickHouseSettings(BaseConfig):
13
+ """ClickHouse data plane configuration."""
14
+
15
+ clickhouse_host: str = Field(default="clickhouse", description="ClickHouse service hostname")
16
+ clickhouse_http_port: int = Field(default=8123, description="ClickHouse HTTP interface port")
17
+ clickhouse_native_port: int = Field(
18
+ default=19000, description="ClickHouse native protocol port"
19
+ )
20
+ clickhouse_user: str = Field(default="default", description="ClickHouse username")
21
+ clickhouse_password: str = Field(default="", description="ClickHouse password")
22
+ clickhouse_db: str = Field(default="default", description="Default ClickHouse database")
23
+ clickhouse_secure: bool = Field(default=False, description="Use TLS for ClickHouse connections")
24
+
25
+ def clickhouse_http_endpoint(self) -> str:
26
+ """Return host:port endpoint for ClickHouse HTTP interface."""
27
+ return f"{self.clickhouse_host}:{self.clickhouse_http_port}"
28
+
29
+ def clickhouse_native_endpoint(self) -> str:
30
+ """Return host:port endpoint for ClickHouse native interface."""
31
+ return f"{self.clickhouse_host}:{self.clickhouse_native_port}"
32
+
33
+
34
+ @lru_cache(maxsize=1)
35
+ def get_settings() -> ClickHouseSettings:
36
+ """Return cached ClickHouse settings."""
37
+ return ClickHouseSettings()
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: phlo-clickhouse
3
+ Version: 0.1.0
4
+ Summary: ClickHouse service and resource plugin for Phlo
5
+ Author-email: Phlo Team <team@phlo.dev>
6
+ License: MIT
7
+ Requires-Python: >=3.11
8
+ Description-Content-Type: text/plain
9
+ Requires-Dist: phlo>=0.1.0
10
+ Requires-Dist: clickhouse-connect>=0.8.0
11
+ Requires-Dist: pyyaml>=6.0.1
12
+ Requires-Dist: pandas>=2.0.0
13
+ Requires-Dist: pyarrow>=12.0.0
14
+ Provides-Extra: dbt
15
+ Requires-Dist: dbt-core>=1.8; extra == "dbt"
16
+ Requires-Dist: dbt-clickhouse>=1.8; extra == "dbt"
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=7.0; extra == "dev"
19
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
20
+
21
+ ClickHouse service and resource plugin for Phlo.
@@ -0,0 +1,21 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/phlo_clickhouse/__init__.py
4
+ src/phlo_clickhouse/cli.py
5
+ src/phlo_clickhouse/cli_plugin.py
6
+ src/phlo_clickhouse/clickhouse-setup.yaml
7
+ src/phlo_clickhouse/plugin.py
8
+ src/phlo_clickhouse/publish_target.py
9
+ src/phlo_clickhouse/resource.py
10
+ src/phlo_clickhouse/service.yaml
11
+ src/phlo_clickhouse/settings.py
12
+ src/phlo_clickhouse.egg-info/PKG-INFO
13
+ src/phlo_clickhouse.egg-info/SOURCES.txt
14
+ src/phlo_clickhouse.egg-info/dependency_links.txt
15
+ src/phlo_clickhouse.egg-info/entry_points.txt
16
+ src/phlo_clickhouse.egg-info/requires.txt
17
+ src/phlo_clickhouse.egg-info/top_level.txt
18
+ tests/test_clickhouse_capabilities.py
19
+ tests/test_clickhouse_plugin.py
20
+ tests/test_clickhouse_resource.py
21
+ tests/test_clickhouse_settings.py
@@ -0,0 +1,9 @@
1
+ [phlo.plugins.cli]
2
+ clickhouse = phlo_clickhouse.cli_plugin:ClickHouseCliPlugin
3
+
4
+ [phlo.plugins.resources]
5
+ clickhouse = phlo_clickhouse.plugin:ClickHouseResourceProvider
6
+
7
+ [phlo.plugins.services]
8
+ clickhouse = phlo_clickhouse.plugin:ClickHouseServicePlugin
9
+ clickhouse-setup = phlo_clickhouse.plugin:ClickHouseSetupServicePlugin
@@ -0,0 +1,13 @@
1
+ phlo>=0.1.0
2
+ clickhouse-connect>=0.8.0
3
+ pyyaml>=6.0.1
4
+ pandas>=2.0.0
5
+ pyarrow>=12.0.0
6
+
7
+ [dbt]
8
+ dbt-core>=1.8
9
+ dbt-clickhouse>=1.8
10
+
11
+ [dev]
12
+ pytest>=7.0
13
+ ruff>=0.1.0
@@ -0,0 +1 @@
1
+ phlo_clickhouse
@@ -0,0 +1,59 @@
1
+ """Tests for ClickHouse resource provider capabilities."""
2
+
3
+ from phlo_clickhouse.plugin import ClickHouseResourceProvider
4
+
5
+
6
+ def test_clickhouse_resource_provider_metadata():
7
+ """Validate ClickHouse resource provider metadata."""
8
+
9
+ provider = ClickHouseResourceProvider()
10
+ metadata = provider.metadata
11
+
12
+ assert metadata.name == "clickhouse"
13
+ assert metadata.version == "0.1.0"
14
+
15
+
16
+ def test_clickhouse_resource_provider_get_resources():
17
+ """Validate ClickHouse resource provider returns resources."""
18
+
19
+ provider = ClickHouseResourceProvider()
20
+ resources = provider.get_resources()
21
+
22
+ assert len(resources) == 1
23
+ assert resources[0].name == "clickhouse"
24
+
25
+
26
+ def test_clickhouse_resource_provider_get_table_stores():
27
+ """Validate ClickHouse resource provider returns table store specs."""
28
+
29
+ provider = ClickHouseResourceProvider()
30
+ table_stores = provider.get_table_stores()
31
+
32
+ assert len(table_stores) == 1
33
+ assert table_stores[0].name == "clickhouse"
34
+ assert table_stores[0].support.supports_snapshots is False
35
+ assert table_stores[0].support.supports_schema_evolution is True
36
+
37
+
38
+ def test_clickhouse_resource_provider_get_query_engines():
39
+ """Validate ClickHouse resource provider returns query engine specs."""
40
+
41
+ provider = ClickHouseResourceProvider()
42
+ query_engines = provider.get_query_engines()
43
+
44
+ assert len(query_engines) == 1
45
+ assert query_engines[0].name == "clickhouse"
46
+ assert query_engines[0].metadata["service_type"] == "ClickHouse"
47
+ assert query_engines[0].support.supports_snapshots is False
48
+ assert query_engines[0].support.supports_time_travel is False
49
+
50
+
51
+ def test_clickhouse_resource_provider_get_publish_targets():
52
+ """Validate ClickHouse resource provider returns publish target specs."""
53
+
54
+ provider = ClickHouseResourceProvider()
55
+ publish_targets = provider.get_publish_targets()
56
+
57
+ assert len(publish_targets) == 1
58
+ assert publish_targets[0].name == "clickhouse"
59
+ assert publish_targets[0].metadata["target_system"] == "clickhouse"
@@ -0,0 +1,26 @@
1
+ """Tests for ClickHouse service plugin."""
2
+
3
+ from phlo_clickhouse.plugin import ClickHouseServicePlugin
4
+
5
+
6
+ def test_clickhouse_service_definition():
7
+ """Validate ClickHouse service definition fields."""
8
+
9
+ plugin = ClickHouseServicePlugin()
10
+ service_definition = plugin.service_definition
11
+
12
+ assert service_definition["name"] == "clickhouse"
13
+ assert service_definition["category"] == "data"
14
+
15
+
16
+ def test_clickhouse_service_metadata():
17
+ """Validate ClickHouse service plugin metadata."""
18
+
19
+ plugin = ClickHouseServicePlugin()
20
+ metadata = plugin.metadata
21
+
22
+ assert metadata.name == "clickhouse"
23
+ assert metadata.version == "0.1.0"
24
+ assert "data" in metadata.tags
25
+ assert "query" in metadata.tags
26
+ assert "storage" in metadata.tags
@@ -0,0 +1,43 @@
1
+ """Tests for ClickHouse resource."""
2
+
3
+ from phlo_clickhouse.resource import CLICKHOUSE_QUERY_ENGINE_SUPPORT, ClickHouseResource
4
+
5
+
6
+ def test_clickhouse_resource_defaults():
7
+ """Validate ClickHouse resource default values."""
8
+
9
+ resource = ClickHouseResource()
10
+
11
+ assert resource.host is None
12
+ assert resource.port is None
13
+ assert resource.user is None
14
+ assert resource.password is None
15
+ assert resource.database is None
16
+ assert resource.secure is None
17
+
18
+
19
+ def test_clickhouse_resource_with_overrides():
20
+ """Validate ClickHouse resource with override values."""
21
+
22
+ resource = ClickHouseResource(
23
+ host="my-clickhouse",
24
+ port=9000,
25
+ user="admin",
26
+ password="secret",
27
+ database="mydb",
28
+ secure=True,
29
+ )
30
+
31
+ assert resource.host == "my-clickhouse"
32
+ assert resource.port == 9000
33
+ assert resource.user == "admin"
34
+ assert resource.password == "secret"
35
+ assert resource.database == "mydb"
36
+ assert resource.secure is True
37
+
38
+
39
+ def test_clickhouse_query_engine_support():
40
+ """Validate ClickHouse query engine support flags."""
41
+
42
+ assert CLICKHOUSE_QUERY_ENGINE_SUPPORT.supports_snapshots is False
43
+ assert CLICKHOUSE_QUERY_ENGINE_SUPPORT.supports_time_travel is False
@@ -0,0 +1,64 @@
1
+ """Tests for ClickHouse settings."""
2
+
3
+ from phlo_clickhouse.settings import ClickHouseSettings, get_settings
4
+
5
+
6
+ def test_clickhouse_settings_defaults():
7
+ """Validate ClickHouse settings default values."""
8
+
9
+ settings = ClickHouseSettings()
10
+
11
+ assert settings.clickhouse_host == "clickhouse"
12
+ assert settings.clickhouse_http_port == 8123
13
+ assert settings.clickhouse_native_port == 19000
14
+ assert settings.clickhouse_user == "default"
15
+ assert settings.clickhouse_password == ""
16
+ assert settings.clickhouse_db == "default"
17
+ assert settings.clickhouse_secure is False
18
+
19
+
20
+ def test_clickhouse_settings_http_endpoint():
21
+ """Validate ClickHouse HTTP endpoint generation."""
22
+
23
+ settings = ClickHouseSettings()
24
+
25
+ assert settings.clickhouse_http_endpoint() == "clickhouse:8123"
26
+
27
+
28
+ def test_clickhouse_settings_native_endpoint():
29
+ """Validate ClickHouse native endpoint generation."""
30
+
31
+ settings = ClickHouseSettings()
32
+
33
+ assert settings.clickhouse_native_endpoint() == "clickhouse:19000"
34
+
35
+
36
+ def test_clickhouse_settings_with_overrides():
37
+ """Validate ClickHouse settings with override values."""
38
+
39
+ settings = ClickHouseSettings(
40
+ clickhouse_host="my-host",
41
+ clickhouse_http_port=9000,
42
+ clickhouse_native_port=9001,
43
+ clickhouse_user="admin",
44
+ clickhouse_password="secret",
45
+ clickhouse_db="mydb",
46
+ clickhouse_secure=True,
47
+ )
48
+
49
+ assert settings.clickhouse_host == "my-host"
50
+ assert settings.clickhouse_http_port == 9000
51
+ assert settings.clickhouse_native_port == 9001
52
+ assert settings.clickhouse_user == "admin"
53
+ assert settings.clickhouse_password == "secret"
54
+ assert settings.clickhouse_db == "mydb"
55
+ assert settings.clickhouse_secure is True
56
+
57
+
58
+ def test_get_settings_returns_cached():
59
+ """Validate that get_settings returns cached instance."""
60
+
61
+ settings1 = get_settings()
62
+ settings2 = get_settings()
63
+
64
+ assert settings1 is settings2