phlo-clickhouse 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phlo_clickhouse-0.1.0/PKG-INFO +21 -0
- phlo_clickhouse-0.1.0/README.md +72 -0
- phlo_clickhouse-0.1.0/pyproject.toml +55 -0
- phlo_clickhouse-0.1.0/setup.cfg +4 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse/__init__.py +19 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse/cli.py +154 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse/cli_plugin.py +24 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse/clickhouse-setup.yaml +30 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse/plugin.py +145 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse/publish_target.py +16 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse/resource.py +239 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse/service.yaml +85 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse/settings.py +37 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse.egg-info/PKG-INFO +21 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse.egg-info/SOURCES.txt +21 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse.egg-info/dependency_links.txt +1 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse.egg-info/entry_points.txt +9 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse.egg-info/requires.txt +13 -0
- phlo_clickhouse-0.1.0/src/phlo_clickhouse.egg-info/top_level.txt +1 -0
- phlo_clickhouse-0.1.0/tests/test_clickhouse_capabilities.py +59 -0
- phlo_clickhouse-0.1.0/tests/test_clickhouse_plugin.py +26 -0
- phlo_clickhouse-0.1.0/tests/test_clickhouse_resource.py +43 -0
- phlo_clickhouse-0.1.0/tests/test_clickhouse_settings.py +64 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: phlo-clickhouse
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: ClickHouse service and resource plugin for Phlo
|
|
5
|
+
Author-email: Phlo Team <team@phlo.dev>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Description-Content-Type: text/plain
|
|
9
|
+
Requires-Dist: phlo>=0.1.0
|
|
10
|
+
Requires-Dist: clickhouse-connect>=0.8.0
|
|
11
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
12
|
+
Requires-Dist: pandas>=2.0.0
|
|
13
|
+
Requires-Dist: pyarrow>=12.0.0
|
|
14
|
+
Provides-Extra: dbt
|
|
15
|
+
Requires-Dist: dbt-core>=1.8; extra == "dbt"
|
|
16
|
+
Requires-Dist: dbt-clickhouse>=1.8; extra == "dbt"
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
19
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
20
|
+
|
|
21
|
+
ClickHouse service and resource plugin for Phlo.
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# phlo-clickhouse
|
|
2
|
+
|
|
3
|
+
ClickHouse service and resource plugin for Phlo.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
`phlo-clickhouse` provides ClickHouse as a combined `table_store`, `query_engine`, and `publish_target` capability in Phlo. Unlike the existing bundled stack (DLT -> Iceberg -> Trino/dbt -> Postgres), ClickHouse can serve all three data plane roles in a single service.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install phlo-clickhouse
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Usage
|
|
16
|
+
|
|
17
|
+
### Starting ClickHouse
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
phlo services start --service clickhouse
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
This starts both the ClickHouse server and the setup container that creates the default databases (`raw`, `staging`, `curated`, `marts`).
|
|
24
|
+
|
|
25
|
+
### Running Queries
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
phlo clickhouse query "SELECT version()"
|
|
29
|
+
phlo clickhouse query --file query.sql
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Checking Status
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
phlo clickhouse status
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Configuration
|
|
39
|
+
|
|
40
|
+
The following environment variables can be used to configure ClickHouse:
|
|
41
|
+
|
|
42
|
+
| Variable | Default | Description |
|
|
43
|
+
|----------|---------|-------------|
|
|
44
|
+
| `CLICKHOUSE_VERSION` | `latest` | ClickHouse server version tag |
|
|
45
|
+
| `CLICKHOUSE_HTTP_PORT` | `8123` | ClickHouse HTTP interface port |
|
|
46
|
+
| `CLICKHOUSE_NATIVE_PORT` | `19000` | ClickHouse native protocol port |
|
|
47
|
+
| `CLICKHOUSE_METRICS_PORT` | `9363` | ClickHouse Prometheus metrics port |
|
|
48
|
+
| `CLICKHOUSE_USER` | `default` | ClickHouse default username |
|
|
49
|
+
| `CLICKHOUSE_PASSWORD` | | ClickHouse default user password |
|
|
50
|
+
| `CLICKHOUSE_DB` | `default` | Default ClickHouse database |
|
|
51
|
+
|
|
52
|
+
## Capabilities
|
|
53
|
+
|
|
54
|
+
This plugin registers the following capabilities:
|
|
55
|
+
|
|
56
|
+
- **Table Store**: ClickHouse MergeTree engine
|
|
57
|
+
- **Query Engine**: ClickHouse SQL
|
|
58
|
+
- **Publish Target**: ClickHouse marts database
|
|
59
|
+
|
|
60
|
+
## dbt Integration
|
|
61
|
+
|
|
62
|
+
Install with dbt support:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install phlo-clickhouse[dbt]
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
This provides the `dbt-clickhouse` adapter for running dbt transforms against ClickHouse.
|
|
69
|
+
|
|
70
|
+
## License
|
|
71
|
+
|
|
72
|
+
MIT
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=45", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "phlo-clickhouse"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "ClickHouse service and resource plugin for Phlo"
|
|
9
|
+
readme = {text = "ClickHouse service and resource plugin for Phlo.", content-type = "text/plain"}
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "Phlo Team", email = "team@phlo.dev"},
|
|
13
|
+
]
|
|
14
|
+
license = {text = "MIT"}
|
|
15
|
+
dependencies = [
|
|
16
|
+
"phlo>=0.1.0",
|
|
17
|
+
"clickhouse-connect>=0.8.0",
|
|
18
|
+
"pyyaml>=6.0.1",
|
|
19
|
+
"pandas>=2.0.0",
|
|
20
|
+
"pyarrow>=12.0.0",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
dbt = [
|
|
25
|
+
"dbt-core>=1.8",
|
|
26
|
+
"dbt-clickhouse>=1.8",
|
|
27
|
+
]
|
|
28
|
+
dev = [
|
|
29
|
+
"pytest>=7.0",
|
|
30
|
+
"ruff>=0.1.0",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.entry-points."phlo.plugins.services"]
|
|
34
|
+
clickhouse = "phlo_clickhouse.plugin:ClickHouseServicePlugin"
|
|
35
|
+
clickhouse-setup = "phlo_clickhouse.plugin:ClickHouseSetupServicePlugin"
|
|
36
|
+
|
|
37
|
+
[project.entry-points."phlo.plugins.resources"]
|
|
38
|
+
clickhouse = "phlo_clickhouse.plugin:ClickHouseResourceProvider"
|
|
39
|
+
|
|
40
|
+
[project.entry-points."phlo.plugins.cli"]
|
|
41
|
+
clickhouse = "phlo_clickhouse.cli_plugin:ClickHouseCliPlugin"
|
|
42
|
+
|
|
43
|
+
[tool.setuptools]
|
|
44
|
+
package-dir = {"" = "src"}
|
|
45
|
+
include-package-data = true
|
|
46
|
+
|
|
47
|
+
[tool.setuptools.packages.find]
|
|
48
|
+
where = ["src"]
|
|
49
|
+
|
|
50
|
+
[tool.setuptools.package-data]
|
|
51
|
+
phlo_clickhouse = ["service.yaml", "clickhouse-setup.yaml"]
|
|
52
|
+
|
|
53
|
+
[tool.ruff]
|
|
54
|
+
line-length = 100
|
|
55
|
+
target-version = "py311"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""ClickHouse service and resource plugin package."""
|
|
2
|
+
|
|
3
|
+
from phlo_clickhouse.plugin import (
|
|
4
|
+
ClickHouseResourceProvider,
|
|
5
|
+
ClickHouseServicePlugin,
|
|
6
|
+
ClickHouseSetupServicePlugin,
|
|
7
|
+
)
|
|
8
|
+
from phlo_clickhouse.resource import ClickHouseResource
|
|
9
|
+
from phlo_clickhouse.settings import ClickHouseSettings, get_settings
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ClickHouseResource",
|
|
13
|
+
"ClickHouseResourceProvider",
|
|
14
|
+
"ClickHouseServicePlugin",
|
|
15
|
+
"ClickHouseSetupServicePlugin",
|
|
16
|
+
"ClickHouseSettings",
|
|
17
|
+
"get_settings",
|
|
18
|
+
]
|
|
19
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""CLI commands for the ClickHouse data plane service."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from shutil import which
|
|
7
|
+
from subprocess import TimeoutExpired
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
|
|
11
|
+
from phlo.cli.infrastructure.command import CommandError, run_command
|
|
12
|
+
from phlo.cli.infrastructure.compose import compose_base_cmd
|
|
13
|
+
from phlo.cli.infrastructure.utils import get_project_name
|
|
14
|
+
from phlo.logging import get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _read_query(*, query: str | None, file: Path | None) -> str:
|
|
20
|
+
"""Return SQL text from inline query or file input."""
|
|
21
|
+
if query and file:
|
|
22
|
+
raise click.ClickException("Use either an inline query or --file, not both.")
|
|
23
|
+
if file is not None:
|
|
24
|
+
try:
|
|
25
|
+
sql = file.read_text(encoding="utf-8")
|
|
26
|
+
except OSError as exc:
|
|
27
|
+
raise click.ClickException(f"Failed to read SQL file: {file}") from exc
|
|
28
|
+
if sql.strip():
|
|
29
|
+
return sql
|
|
30
|
+
raise click.ClickException(f"SQL file is empty: {file}")
|
|
31
|
+
if query and query.strip():
|
|
32
|
+
return query
|
|
33
|
+
raise click.ClickException("Provide a SQL query argument or --file.")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _ensure_phlo_dir() -> Path:
|
|
37
|
+
"""Return the local .phlo directory or exit with a clear error."""
|
|
38
|
+
phlo_dir = Path.cwd() / ".phlo"
|
|
39
|
+
if phlo_dir.exists():
|
|
40
|
+
return phlo_dir
|
|
41
|
+
raise click.ClickException(".phlo directory not found. Run 'phlo services init' first.")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _require_docker() -> None:
|
|
45
|
+
"""Validate that Docker is installed and responsive."""
|
|
46
|
+
if which("docker") is None:
|
|
47
|
+
raise click.ClickException("docker command not found.")
|
|
48
|
+
try:
|
|
49
|
+
result = run_command(
|
|
50
|
+
["docker", "info"],
|
|
51
|
+
timeout_seconds=10,
|
|
52
|
+
capture_output=True,
|
|
53
|
+
check=False,
|
|
54
|
+
)
|
|
55
|
+
except TimeoutExpired as exc:
|
|
56
|
+
raise click.ClickException("docker info timed out.") from exc
|
|
57
|
+
if result.returncode == 0:
|
|
58
|
+
return
|
|
59
|
+
raise click.ClickException("Docker is not running.")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@click.group(name="clickhouse")
|
|
63
|
+
def clickhouse_group() -> None:
|
|
64
|
+
"""Query and inspect the ClickHouse data plane service."""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@clickhouse_group.command(name="query")
|
|
68
|
+
@click.argument("query", required=False)
|
|
69
|
+
@click.option(
|
|
70
|
+
"--file",
|
|
71
|
+
"query_file",
|
|
72
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
73
|
+
)
|
|
74
|
+
@click.option("--format", "output_format", default="TabSeparatedRaw", show_default=True)
|
|
75
|
+
@click.option("--timeout", "timeout_seconds", default=30, show_default=True, type=int)
|
|
76
|
+
def clickhouse_query(
|
|
77
|
+
query: str | None,
|
|
78
|
+
query_file: Path | None,
|
|
79
|
+
output_format: str,
|
|
80
|
+
timeout_seconds: int,
|
|
81
|
+
) -> None:
|
|
82
|
+
"""Execute a SQL query against the running ClickHouse service."""
|
|
83
|
+
_require_docker()
|
|
84
|
+
phlo_dir = _ensure_phlo_dir()
|
|
85
|
+
project_name = get_project_name()
|
|
86
|
+
sql = _read_query(query=query, file=query_file)
|
|
87
|
+
|
|
88
|
+
cmd = compose_base_cmd(phlo_dir=phlo_dir, project_name=project_name)
|
|
89
|
+
cmd.extend(
|
|
90
|
+
[
|
|
91
|
+
"exec",
|
|
92
|
+
"-T",
|
|
93
|
+
"clickhouse",
|
|
94
|
+
"clickhouse-client",
|
|
95
|
+
"--multiquery",
|
|
96
|
+
"--format",
|
|
97
|
+
output_format,
|
|
98
|
+
"--query",
|
|
99
|
+
sql,
|
|
100
|
+
]
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
result = run_command(
|
|
105
|
+
cmd,
|
|
106
|
+
timeout_seconds=timeout_seconds,
|
|
107
|
+
capture_output=True,
|
|
108
|
+
check=True,
|
|
109
|
+
)
|
|
110
|
+
except CommandError as exc:
|
|
111
|
+
stderr = exc.stderr.strip()
|
|
112
|
+
raise click.ClickException(stderr or str(exc)) from exc
|
|
113
|
+
except TimeoutExpired as exc:
|
|
114
|
+
raise click.ClickException(f"Query timed out after {timeout_seconds} seconds.") from exc
|
|
115
|
+
|
|
116
|
+
if result.stdout:
|
|
117
|
+
click.echo(result.stdout, nl=False)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@clickhouse_group.command(name="status")
|
|
121
|
+
def clickhouse_status() -> None:
|
|
122
|
+
"""Show ClickHouse service status and basic server info."""
|
|
123
|
+
_require_docker()
|
|
124
|
+
phlo_dir = _ensure_phlo_dir()
|
|
125
|
+
project_name = get_project_name()
|
|
126
|
+
|
|
127
|
+
cmd = compose_base_cmd(phlo_dir=phlo_dir, project_name=project_name)
|
|
128
|
+
cmd.extend(
|
|
129
|
+
[
|
|
130
|
+
"exec",
|
|
131
|
+
"-T",
|
|
132
|
+
"clickhouse",
|
|
133
|
+
"clickhouse-client",
|
|
134
|
+
"--query",
|
|
135
|
+
"SELECT version() AS version, uptime() AS uptime_seconds, "
|
|
136
|
+
"currentDatabase() AS current_database",
|
|
137
|
+
]
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
result = run_command(
|
|
142
|
+
cmd,
|
|
143
|
+
timeout_seconds=10,
|
|
144
|
+
capture_output=True,
|
|
145
|
+
check=True,
|
|
146
|
+
)
|
|
147
|
+
except CommandError as exc:
|
|
148
|
+
stderr = exc.stderr.strip()
|
|
149
|
+
raise click.ClickException(stderr or str(exc)) from exc
|
|
150
|
+
except TimeoutExpired as exc:
|
|
151
|
+
raise click.ClickException("Status check timed out.") from exc
|
|
152
|
+
|
|
153
|
+
if result.stdout:
|
|
154
|
+
click.echo(result.stdout, nl=False)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""CLI plugin for ClickHouse commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from phlo.plugins.base import CliCommandPlugin, PluginMetadata
|
|
8
|
+
|
|
9
|
+
from phlo_clickhouse.cli import clickhouse_group
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ClickHouseCliPlugin(CliCommandPlugin):
|
|
13
|
+
"""Register ClickHouse CLI commands."""
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def metadata(self) -> PluginMetadata:
|
|
17
|
+
return PluginMetadata(
|
|
18
|
+
name="clickhouse",
|
|
19
|
+
version="0.1.0",
|
|
20
|
+
description="CLI commands for ClickHouse data plane access",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
def get_cli_commands(self) -> list[click.Command]:
|
|
24
|
+
return [clickhouse_group]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Companion service for clickhouse - creates required databases
|
|
2
|
+
name: clickhouse-setup
|
|
3
|
+
description: Initialize ClickHouse databases for data plane
|
|
4
|
+
category: data
|
|
5
|
+
default: false
|
|
6
|
+
|
|
7
|
+
image: clickhouse/clickhouse-server:${CLICKHOUSE_VERSION:-latest}
|
|
8
|
+
|
|
9
|
+
depends_on:
|
|
10
|
+
- clickhouse
|
|
11
|
+
|
|
12
|
+
compose:
|
|
13
|
+
restart: "no"
|
|
14
|
+
entrypoint: >
|
|
15
|
+
/bin/sh -c "
|
|
16
|
+
until clickhouse-client --host clickhouse --user $${CLICKHOUSE_USER:-default}
|
|
17
|
+
--password $${CLICKHOUSE_PASSWORD:-}
|
|
18
|
+
--query 'SELECT 1' 2>/dev/null; do
|
|
19
|
+
echo 'Waiting for ClickHouse...' && sleep 2;
|
|
20
|
+
done &&
|
|
21
|
+
clickhouse-client --host clickhouse --user $${CLICKHOUSE_USER:-default}
|
|
22
|
+
--password $${CLICKHOUSE_PASSWORD:-}
|
|
23
|
+
--multiquery --query '
|
|
24
|
+
CREATE DATABASE IF NOT EXISTS raw;
|
|
25
|
+
CREATE DATABASE IF NOT EXISTS staging;
|
|
26
|
+
CREATE DATABASE IF NOT EXISTS curated;
|
|
27
|
+
CREATE DATABASE IF NOT EXISTS marts;
|
|
28
|
+
' &&
|
|
29
|
+
echo 'ClickHouse databases created successfully'
|
|
30
|
+
"
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""ClickHouse service and resource provider plugins."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from importlib import resources
|
|
6
|
+
from time import perf_counter
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
from phlo.capabilities import (
|
|
12
|
+
CapabilitySupport,
|
|
13
|
+
PublishTargetSpec,
|
|
14
|
+
ResourceSpec,
|
|
15
|
+
TableStoreSpec,
|
|
16
|
+
)
|
|
17
|
+
from phlo.capabilities.specs import QueryEngineSpec
|
|
18
|
+
from phlo.logging import get_logger
|
|
19
|
+
from phlo.plugins import PluginMetadata, ResourceProviderPlugin, ServicePlugin
|
|
20
|
+
from phlo_clickhouse.publish_target import ClickHousePublishTarget
|
|
21
|
+
from phlo_clickhouse.resource import CLICKHOUSE_QUERY_ENGINE_SUPPORT, ClickHouseResource
|
|
22
|
+
from phlo_clickhouse.settings import get_settings as get_clickhouse_settings
|
|
23
|
+
|
|
24
|
+
logger = get_logger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _load_service_definition(resource_name: str, service_name: str) -> dict[str, Any]:
|
|
28
|
+
start = perf_counter()
|
|
29
|
+
logger.info(
|
|
30
|
+
"clickhouse_service_definition_load_started",
|
|
31
|
+
service_name=service_name,
|
|
32
|
+
resource_name=resource_name,
|
|
33
|
+
)
|
|
34
|
+
service_path = resources.files("phlo_clickhouse").joinpath(resource_name)
|
|
35
|
+
try:
|
|
36
|
+
data = yaml.safe_load(service_path.read_text(encoding="utf-8"))
|
|
37
|
+
except Exception:
|
|
38
|
+
logger.error(
|
|
39
|
+
"clickhouse_service_definition_load_failed",
|
|
40
|
+
service_name=service_name,
|
|
41
|
+
resource_name=resource_name,
|
|
42
|
+
elapsed_ms=round((perf_counter() - start) * 1000, 2),
|
|
43
|
+
exc_info=True,
|
|
44
|
+
)
|
|
45
|
+
raise
|
|
46
|
+
|
|
47
|
+
service_count = len(data.get("services", {})) if isinstance(data, dict) else None
|
|
48
|
+
logger.info(
|
|
49
|
+
"clickhouse_service_definition_load_completed",
|
|
50
|
+
service_name=service_name,
|
|
51
|
+
resource_name=resource_name,
|
|
52
|
+
elapsed_ms=round((perf_counter() - start) * 1000, 2),
|
|
53
|
+
service_count=service_count,
|
|
54
|
+
)
|
|
55
|
+
return data
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class ClickHouseServicePlugin(ServicePlugin):
|
|
59
|
+
"""Service plugin for ClickHouse."""
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def metadata(self) -> PluginMetadata:
|
|
63
|
+
return PluginMetadata(
|
|
64
|
+
name="clickhouse",
|
|
65
|
+
version="0.1.0",
|
|
66
|
+
description="ClickHouse analytical database for data plane",
|
|
67
|
+
author="Phlo Team",
|
|
68
|
+
tags=["data", "query", "storage"],
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def service_definition(self) -> dict[str, Any]:
|
|
73
|
+
return _load_service_definition("service.yaml", "clickhouse")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ClickHouseSetupServicePlugin(ServicePlugin):
|
|
77
|
+
"""Service plugin for ClickHouse database initialization."""
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def metadata(self) -> PluginMetadata:
|
|
81
|
+
return PluginMetadata(
|
|
82
|
+
name="clickhouse-setup",
|
|
83
|
+
version="0.1.0",
|
|
84
|
+
description="Initialize ClickHouse databases for data plane",
|
|
85
|
+
author="Phlo Team",
|
|
86
|
+
tags=["data", "bootstrap"],
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def service_definition(self) -> dict[str, Any]:
|
|
91
|
+
return _load_service_definition("clickhouse-setup.yaml", "clickhouse-setup")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ClickHouseResourceProvider(ResourceProviderPlugin):
|
|
95
|
+
"""Resource provider plugin for ClickHouse."""
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def metadata(self) -> PluginMetadata:
|
|
99
|
+
return PluginMetadata(
|
|
100
|
+
name="clickhouse",
|
|
101
|
+
version="0.1.0",
|
|
102
|
+
description="ClickHouse resource for Phlo",
|
|
103
|
+
support=CapabilitySupport(),
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def get_resources(self) -> list[ResourceSpec]:
|
|
107
|
+
return [ResourceSpec(name="clickhouse", resource=ClickHouseResource())]
|
|
108
|
+
|
|
109
|
+
def get_table_stores(self) -> list[TableStoreSpec]:
|
|
110
|
+
return [
|
|
111
|
+
TableStoreSpec(
|
|
112
|
+
name="clickhouse",
|
|
113
|
+
provider=ClickHouseResource(),
|
|
114
|
+
support=CapabilitySupport(
|
|
115
|
+
supports_snapshots=False,
|
|
116
|
+
supports_schema_evolution=True,
|
|
117
|
+
),
|
|
118
|
+
)
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
def get_query_engines(self) -> list[QueryEngineSpec]:
|
|
122
|
+
settings = get_clickhouse_settings()
|
|
123
|
+
return [
|
|
124
|
+
QueryEngineSpec(
|
|
125
|
+
name="clickhouse",
|
|
126
|
+
provider=ClickHouseResource(),
|
|
127
|
+
metadata={
|
|
128
|
+
"host": settings.clickhouse_host,
|
|
129
|
+
"port": settings.clickhouse_http_port,
|
|
130
|
+
"native_port": settings.clickhouse_native_port,
|
|
131
|
+
"default_database": settings.clickhouse_db,
|
|
132
|
+
"service_type": "ClickHouse",
|
|
133
|
+
},
|
|
134
|
+
support=CLICKHOUSE_QUERY_ENGINE_SUPPORT,
|
|
135
|
+
)
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
def get_publish_targets(self) -> list[PublishTargetSpec]:
|
|
139
|
+
return [
|
|
140
|
+
PublishTargetSpec(
|
|
141
|
+
name="clickhouse",
|
|
142
|
+
provider=ClickHousePublishTarget(),
|
|
143
|
+
metadata={"target_system": "clickhouse", "role": "serving"},
|
|
144
|
+
)
|
|
145
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""ClickHouse publish target for mart publishing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
from phlo_clickhouse.resource import ClickHouseResource
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ClickHousePublishTarget:
|
|
12
|
+
"""Publish target backed by ClickHouse."""
|
|
13
|
+
|
|
14
|
+
resource: ClickHouseResource = field(default_factory=ClickHouseResource)
|
|
15
|
+
target_system: str = "clickhouse"
|
|
16
|
+
default_schema: str = "marts"
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
"""ClickHouse resource for executing queries."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import time
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Iterable
|
|
9
|
+
|
|
10
|
+
import clickhouse_connect
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
from phlo.capabilities import CapabilitySupport
|
|
14
|
+
from phlo.logging import get_logger
|
|
15
|
+
from phlo_clickhouse.settings import get_settings as get_clickhouse_settings
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from clickhouse_connect.driver import Client
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
CLICKHOUSE_QUERY_ENGINE_SUPPORT = CapabilitySupport(
|
|
23
|
+
supports_snapshots=False,
|
|
24
|
+
supports_time_travel=False,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class ClickHouseResource:
|
|
30
|
+
"""Resource wrapper for ClickHouse connections and query execution."""
|
|
31
|
+
|
|
32
|
+
host: str | None = None
|
|
33
|
+
port: int | None = None
|
|
34
|
+
user: str | None = None
|
|
35
|
+
password: str | None = None
|
|
36
|
+
database: str | None = None
|
|
37
|
+
secure: bool | None = None
|
|
38
|
+
|
|
39
|
+
def _settings(self):
|
|
40
|
+
return get_clickhouse_settings()
|
|
41
|
+
|
|
42
|
+
def get_client(self) -> "Client":
|
|
43
|
+
"""Create and return a ClickHouse client."""
|
|
44
|
+
settings = self._settings()
|
|
45
|
+
return clickhouse_connect.get_client(
|
|
46
|
+
host=self.host or settings.clickhouse_host,
|
|
47
|
+
port=self.port or settings.clickhouse_http_port,
|
|
48
|
+
username=self.user or settings.clickhouse_user,
|
|
49
|
+
password=self.password or settings.clickhouse_password,
|
|
50
|
+
database=self.database or settings.clickhouse_db,
|
|
51
|
+
secure=self.secure if self.secure is not None else settings.clickhouse_secure,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def execute(self, sql: str, params: Iterable[object] | None = None) -> list[list[Any]]:
|
|
55
|
+
"""Execute SQL and return query results."""
|
|
56
|
+
client = self.get_client()
|
|
57
|
+
try:
|
|
58
|
+
result = client.query(sql, parameters=list(params or []))
|
|
59
|
+
return result.result_rows
|
|
60
|
+
finally:
|
|
61
|
+
client.close()
|
|
62
|
+
|
|
63
|
+
def command(self, sql: str) -> Any:
|
|
64
|
+
"""Execute a command (DDL/DML) that returns a single value or None."""
|
|
65
|
+
client = self.get_client()
|
|
66
|
+
try:
|
|
67
|
+
return client.command(sql)
|
|
68
|
+
finally:
|
|
69
|
+
client.close()
|
|
70
|
+
|
|
71
|
+
def wait_ready(
|
|
72
|
+
self,
|
|
73
|
+
*,
|
|
74
|
+
timeout: float = 60.0,
|
|
75
|
+
interval: float = 1.0,
|
|
76
|
+
) -> None:
|
|
77
|
+
"""Wait for ClickHouse to accept queries."""
|
|
78
|
+
deadline = time.monotonic() + timeout
|
|
79
|
+
last_error: Exception | None = None
|
|
80
|
+
interval = max(interval, 0.0)
|
|
81
|
+
settings = self._settings()
|
|
82
|
+
while time.monotonic() < deadline:
|
|
83
|
+
try:
|
|
84
|
+
self.command("SELECT 1")
|
|
85
|
+
logger.info(
|
|
86
|
+
"clickhouse_wait_ready_succeeded",
|
|
87
|
+
host=self.host or settings.clickhouse_host,
|
|
88
|
+
port=self.port or settings.clickhouse_http_port,
|
|
89
|
+
)
|
|
90
|
+
return
|
|
91
|
+
except Exception as exc: # noqa: BLE001
|
|
92
|
+
last_error = exc
|
|
93
|
+
logger.debug(
|
|
94
|
+
"clickhouse_wait_ready_retry",
|
|
95
|
+
host=self.host or settings.clickhouse_host,
|
|
96
|
+
port=self.port or settings.clickhouse_http_port,
|
|
97
|
+
retry_interval_seconds=interval,
|
|
98
|
+
)
|
|
99
|
+
time.sleep(interval)
|
|
100
|
+
logger.error(
|
|
101
|
+
"clickhouse_wait_ready_timeout",
|
|
102
|
+
host=self.host or settings.clickhouse_host,
|
|
103
|
+
port=self.port or settings.clickhouse_http_port,
|
|
104
|
+
timeout_seconds=timeout,
|
|
105
|
+
)
|
|
106
|
+
raise TimeoutError(f"ClickHouse not ready after {timeout:.1f}s") from last_error
|
|
107
|
+
|
|
108
|
+
def _escape_identifier(self, name: str) -> str:
|
|
109
|
+
"""Escape a ClickHouse identifier (database, table, column) with backticks."""
|
|
110
|
+
return f"`{name.replace('`', '``')}`"
|
|
111
|
+
|
|
112
|
+
def ensure_table(
|
|
113
|
+
self,
|
|
114
|
+
*,
|
|
115
|
+
table_name: str,
|
|
116
|
+
schema: Any,
|
|
117
|
+
partition_spec: Any = None,
|
|
118
|
+
override_ref: str | None = None,
|
|
119
|
+
) -> Any:
|
|
120
|
+
"""Ensure a destination table exists."""
|
|
121
|
+
settings = self._settings()
|
|
122
|
+
database = self._escape_identifier(self.database or settings.clickhouse_db)
|
|
123
|
+
table = self._escape_identifier(table_name)
|
|
124
|
+
|
|
125
|
+
columns_def = self._schema_to_columns(schema)
|
|
126
|
+
|
|
127
|
+
partition_by = ""
|
|
128
|
+
if partition_spec:
|
|
129
|
+
partition_cols = [self._escape_identifier(p[0]) for p in partition_spec]
|
|
130
|
+
partition_by = f"PARTITION BY ({', '.join(partition_cols)})"
|
|
131
|
+
|
|
132
|
+
sql = f"CREATE TABLE IF NOT EXISTS {database}.{table} ({columns_def}) ENGINE = MergeTree() {partition_by} ORDER BY tuple()"
|
|
133
|
+
|
|
134
|
+
return self.command(sql)
|
|
135
|
+
|
|
136
|
+
def append_parquet(
|
|
137
|
+
self,
|
|
138
|
+
*,
|
|
139
|
+
table_name: str,
|
|
140
|
+
data_path: str | Path,
|
|
141
|
+
override_ref: str | None = None,
|
|
142
|
+
) -> dict[str, int]:
|
|
143
|
+
"""Append staged parquet data to a destination table."""
|
|
144
|
+
settings = self._settings()
|
|
145
|
+
database = self._escape_identifier(self.database or settings.clickhouse_db)
|
|
146
|
+
table = self._escape_identifier(table_name)
|
|
147
|
+
|
|
148
|
+
data_path_str = str(data_path)
|
|
149
|
+
df = pd.read_parquet(data_path_str)
|
|
150
|
+
row_count = len(df)
|
|
151
|
+
|
|
152
|
+
client = self.get_client()
|
|
153
|
+
try:
|
|
154
|
+
client.insert_df(f"{database}.{table}", df)
|
|
155
|
+
finally:
|
|
156
|
+
client.close()
|
|
157
|
+
|
|
158
|
+
return {"rows_inserted": row_count}
|
|
159
|
+
|
|
160
|
+
def merge_parquet(
|
|
161
|
+
self,
|
|
162
|
+
*,
|
|
163
|
+
table_name: str,
|
|
164
|
+
data_path: str | Path,
|
|
165
|
+
unique_key: str,
|
|
166
|
+
override_ref: str | None = None,
|
|
167
|
+
) -> dict[str, int]:
|
|
168
|
+
"""Merge staged parquet data into a destination table."""
|
|
169
|
+
settings = self._settings()
|
|
170
|
+
database = self._escape_identifier(self.database or settings.clickhouse_db)
|
|
171
|
+
table = self._escape_identifier(table_name)
|
|
172
|
+
key = self._escape_identifier(unique_key)
|
|
173
|
+
|
|
174
|
+
data_path_str = str(data_path)
|
|
175
|
+
df = pd.read_parquet(data_path_str)
|
|
176
|
+
row_count = len(df)
|
|
177
|
+
|
|
178
|
+
unique_keys = df[unique_key].tolist()
|
|
179
|
+
if unique_keys:
|
|
180
|
+
keys_str = ", ".join(f"'{k}'" for k in unique_keys)
|
|
181
|
+
delete_sql = f"ALTER TABLE {database}.{table} DELETE WHERE {key} IN ({keys_str})"
|
|
182
|
+
self.command(delete_sql)
|
|
183
|
+
|
|
184
|
+
client = self.get_client()
|
|
185
|
+
try:
|
|
186
|
+
client.insert_df(f"{database}.{table}", df)
|
|
187
|
+
finally:
|
|
188
|
+
client.close()
|
|
189
|
+
|
|
190
|
+
return {"rows_inserted": row_count, "rows_deleted": len(unique_keys)}
|
|
191
|
+
|
|
192
|
+
def _schema_to_columns(self, schema: Any) -> str:
|
|
193
|
+
"""Convert a schema to ClickHouse column definitions."""
|
|
194
|
+
if hasattr(schema, "to_schema"):
|
|
195
|
+
schema = schema.to_schema()
|
|
196
|
+
|
|
197
|
+
if hasattr(schema, "columns"):
|
|
198
|
+
columns = []
|
|
199
|
+
for name, col in schema.columns.items():
|
|
200
|
+
ch_type = self._pandas_type_to_clickhouse(col.dtype)
|
|
201
|
+
columns.append(f"{name} {ch_type}")
|
|
202
|
+
return ", ".join(columns)
|
|
203
|
+
|
|
204
|
+
if hasattr(schema, "fields"):
|
|
205
|
+
columns = []
|
|
206
|
+
for field in schema.fields:
|
|
207
|
+
ch_type = self._python_type_to_clickhouse(field.type)
|
|
208
|
+
columns.append(f"{field.name} {ch_type}")
|
|
209
|
+
return ", ".join(columns)
|
|
210
|
+
|
|
211
|
+
raise TypeError(
|
|
212
|
+
f"Unsupported schema type: {type(schema).__name__}. Expected a schema with 'columns' or 'fields' attribute."
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
def _pandas_type_to_clickhouse(self, dtype: Any) -> str:
|
|
216
|
+
"""Convert pandas dtype to ClickHouse type."""
|
|
217
|
+
import pandas as pd
|
|
218
|
+
|
|
219
|
+
if pd.api.types.is_integer_dtype(dtype):
|
|
220
|
+
return "Int64"
|
|
221
|
+
if pd.api.types.is_float_dtype(dtype):
|
|
222
|
+
return "Float64"
|
|
223
|
+
if pd.api.types.is_bool_dtype(dtype):
|
|
224
|
+
return "UInt8"
|
|
225
|
+
if pd.api.types.is_datetime64_any_dtype(dtype):
|
|
226
|
+
return "DateTime64"
|
|
227
|
+
if pd.api.types.is_string_dtype(dtype):
|
|
228
|
+
return "String"
|
|
229
|
+
return "String"
|
|
230
|
+
|
|
231
|
+
def _python_type_to_clickhouse(self, py_type: Any) -> str:
|
|
232
|
+
"""Convert Python type to ClickHouse type."""
|
|
233
|
+
type_map = {
|
|
234
|
+
int: "Int64",
|
|
235
|
+
float: "Float64",
|
|
236
|
+
str: "String",
|
|
237
|
+
bool: "UInt8",
|
|
238
|
+
}
|
|
239
|
+
return type_map.get(py_type, "String")
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
name: clickhouse
|
|
2
|
+
description: ClickHouse analytical database for data plane
|
|
3
|
+
category: data
|
|
4
|
+
default: false
|
|
5
|
+
|
|
6
|
+
image: clickhouse/clickhouse-server:${CLICKHOUSE_VERSION:-latest}
|
|
7
|
+
|
|
8
|
+
compose:
|
|
9
|
+
restart: unless-stopped
|
|
10
|
+
labels:
|
|
11
|
+
phlo.metrics.enabled: "true"
|
|
12
|
+
phlo.metrics.port: "clickhouse:9363"
|
|
13
|
+
phlo.metrics.path: "/metrics"
|
|
14
|
+
phlo.grafana.datasource: "true"
|
|
15
|
+
phlo.grafana.datasource.type: "grafana-clickhouse-datasource"
|
|
16
|
+
phlo.grafana.datasource.name: "ClickHouse"
|
|
17
|
+
phlo.grafana.datasource.url: "clickhouse:9000"
|
|
18
|
+
traefik.enable: "true"
|
|
19
|
+
traefik.http.routers.clickhouse.rule: "Host(`clickhouse.${TRAEFIK_DOMAIN:-phlo.localhost}`)"
|
|
20
|
+
traefik.http.routers.clickhouse.entrypoints: "web"
|
|
21
|
+
traefik.http.services.clickhouse.loadbalancer.server.port: "8123"
|
|
22
|
+
environment:
|
|
23
|
+
CLICKHOUSE_USER: ${CLICKHOUSE_USER:-default}
|
|
24
|
+
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-}
|
|
25
|
+
CLICKHOUSE_DB: ${CLICKHOUSE_DB:-default}
|
|
26
|
+
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: ${CLICKHOUSE_ACCESS_MANAGEMENT:-1}
|
|
27
|
+
ports:
|
|
28
|
+
- "${CLICKHOUSE_HTTP_PORT:-8123}:8123"
|
|
29
|
+
- "${CLICKHOUSE_NATIVE_PORT:-19000}:9000"
|
|
30
|
+
- "${CLICKHOUSE_METRICS_PORT:-9363}:9363"
|
|
31
|
+
volumes:
|
|
32
|
+
- ./volumes/clickhouse/data:/var/lib/clickhouse
|
|
33
|
+
- ./volumes/clickhouse/logs:/var/log/clickhouse-server
|
|
34
|
+
healthcheck:
|
|
35
|
+
test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:8123/ping"]
|
|
36
|
+
interval: 10s
|
|
37
|
+
timeout: 5s
|
|
38
|
+
retries: 10
|
|
39
|
+
start_period: 30s
|
|
40
|
+
ulimits:
|
|
41
|
+
nofile:
|
|
42
|
+
soft: 262144
|
|
43
|
+
hard: 262144
|
|
44
|
+
|
|
45
|
+
env_vars:
|
|
46
|
+
CLICKHOUSE_VERSION:
|
|
47
|
+
default: "latest"
|
|
48
|
+
description: ClickHouse server version tag
|
|
49
|
+
CLICKHOUSE_HTTP_PORT:
|
|
50
|
+
default: 8123
|
|
51
|
+
description: ClickHouse HTTP interface port
|
|
52
|
+
CLICKHOUSE_NATIVE_PORT:
|
|
53
|
+
default: 19000
|
|
54
|
+
description: ClickHouse native protocol port
|
|
55
|
+
CLICKHOUSE_METRICS_PORT:
|
|
56
|
+
default: 9363
|
|
57
|
+
description: ClickHouse Prometheus metrics port
|
|
58
|
+
CLICKHOUSE_USER:
|
|
59
|
+
default: "default"
|
|
60
|
+
description: ClickHouse default username
|
|
61
|
+
CLICKHOUSE_PASSWORD:
|
|
62
|
+
default: ""
|
|
63
|
+
description: ClickHouse default user password
|
|
64
|
+
secret: true
|
|
65
|
+
CLICKHOUSE_DB:
|
|
66
|
+
default: "default"
|
|
67
|
+
description: Default ClickHouse database
|
|
68
|
+
CLICKHOUSE_ACCESS_MANAGEMENT:
|
|
69
|
+
default: "1"
|
|
70
|
+
description: "Enable SQL-driven access management (1=enabled, 0=disabled)"
|
|
71
|
+
CLICKHOUSE_HTTPS_PORT:
|
|
72
|
+
default: ""
|
|
73
|
+
description: "HTTPS port (empty = disabled)"
|
|
74
|
+
CLICKHOUSE_TLS_CERT_FILE:
|
|
75
|
+
default: ""
|
|
76
|
+
description: "Path to TLS certificate file"
|
|
77
|
+
CLICKHOUSE_TLS_KEY_FILE:
|
|
78
|
+
default: ""
|
|
79
|
+
description: "Path to TLS private key file"
|
|
80
|
+
CLICKHOUSE_MAX_MEMORY_USAGE:
|
|
81
|
+
default: ""
|
|
82
|
+
description: "Maximum memory usage per query (e.g., 10000000000 for ~10GB)"
|
|
83
|
+
CLICKHOUSE_MAX_SERVER_MEMORY_USAGE_RATIO:
|
|
84
|
+
default: ""
|
|
85
|
+
description: "Max fraction of total RAM the server can use (e.g., 0.9)"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""ClickHouse settings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
|
|
7
|
+
from pydantic import Field
|
|
8
|
+
|
|
9
|
+
from phlo.config.base import BaseConfig
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ClickHouseSettings(BaseConfig):
|
|
13
|
+
"""ClickHouse data plane configuration."""
|
|
14
|
+
|
|
15
|
+
clickhouse_host: str = Field(default="clickhouse", description="ClickHouse service hostname")
|
|
16
|
+
clickhouse_http_port: int = Field(default=8123, description="ClickHouse HTTP interface port")
|
|
17
|
+
clickhouse_native_port: int = Field(
|
|
18
|
+
default=19000, description="ClickHouse native protocol port"
|
|
19
|
+
)
|
|
20
|
+
clickhouse_user: str = Field(default="default", description="ClickHouse username")
|
|
21
|
+
clickhouse_password: str = Field(default="", description="ClickHouse password")
|
|
22
|
+
clickhouse_db: str = Field(default="default", description="Default ClickHouse database")
|
|
23
|
+
clickhouse_secure: bool = Field(default=False, description="Use TLS for ClickHouse connections")
|
|
24
|
+
|
|
25
|
+
def clickhouse_http_endpoint(self) -> str:
|
|
26
|
+
"""Return host:port endpoint for ClickHouse HTTP interface."""
|
|
27
|
+
return f"{self.clickhouse_host}:{self.clickhouse_http_port}"
|
|
28
|
+
|
|
29
|
+
def clickhouse_native_endpoint(self) -> str:
|
|
30
|
+
"""Return host:port endpoint for ClickHouse native interface."""
|
|
31
|
+
return f"{self.clickhouse_host}:{self.clickhouse_native_port}"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@lru_cache(maxsize=1)
|
|
35
|
+
def get_settings() -> ClickHouseSettings:
|
|
36
|
+
"""Return cached ClickHouse settings."""
|
|
37
|
+
return ClickHouseSettings()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: phlo-clickhouse
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: ClickHouse service and resource plugin for Phlo
|
|
5
|
+
Author-email: Phlo Team <team@phlo.dev>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Description-Content-Type: text/plain
|
|
9
|
+
Requires-Dist: phlo>=0.1.0
|
|
10
|
+
Requires-Dist: clickhouse-connect>=0.8.0
|
|
11
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
12
|
+
Requires-Dist: pandas>=2.0.0
|
|
13
|
+
Requires-Dist: pyarrow>=12.0.0
|
|
14
|
+
Provides-Extra: dbt
|
|
15
|
+
Requires-Dist: dbt-core>=1.8; extra == "dbt"
|
|
16
|
+
Requires-Dist: dbt-clickhouse>=1.8; extra == "dbt"
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
19
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
20
|
+
|
|
21
|
+
ClickHouse service and resource plugin for Phlo.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/phlo_clickhouse/__init__.py
|
|
4
|
+
src/phlo_clickhouse/cli.py
|
|
5
|
+
src/phlo_clickhouse/cli_plugin.py
|
|
6
|
+
src/phlo_clickhouse/clickhouse-setup.yaml
|
|
7
|
+
src/phlo_clickhouse/plugin.py
|
|
8
|
+
src/phlo_clickhouse/publish_target.py
|
|
9
|
+
src/phlo_clickhouse/resource.py
|
|
10
|
+
src/phlo_clickhouse/service.yaml
|
|
11
|
+
src/phlo_clickhouse/settings.py
|
|
12
|
+
src/phlo_clickhouse.egg-info/PKG-INFO
|
|
13
|
+
src/phlo_clickhouse.egg-info/SOURCES.txt
|
|
14
|
+
src/phlo_clickhouse.egg-info/dependency_links.txt
|
|
15
|
+
src/phlo_clickhouse.egg-info/entry_points.txt
|
|
16
|
+
src/phlo_clickhouse.egg-info/requires.txt
|
|
17
|
+
src/phlo_clickhouse.egg-info/top_level.txt
|
|
18
|
+
tests/test_clickhouse_capabilities.py
|
|
19
|
+
tests/test_clickhouse_plugin.py
|
|
20
|
+
tests/test_clickhouse_resource.py
|
|
21
|
+
tests/test_clickhouse_settings.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
[phlo.plugins.cli]
|
|
2
|
+
clickhouse = phlo_clickhouse.cli_plugin:ClickHouseCliPlugin
|
|
3
|
+
|
|
4
|
+
[phlo.plugins.resources]
|
|
5
|
+
clickhouse = phlo_clickhouse.plugin:ClickHouseResourceProvider
|
|
6
|
+
|
|
7
|
+
[phlo.plugins.services]
|
|
8
|
+
clickhouse = phlo_clickhouse.plugin:ClickHouseServicePlugin
|
|
9
|
+
clickhouse-setup = phlo_clickhouse.plugin:ClickHouseSetupServicePlugin
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
phlo_clickhouse
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Tests for ClickHouse resource provider capabilities."""
|
|
2
|
+
|
|
3
|
+
from phlo_clickhouse.plugin import ClickHouseResourceProvider
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_clickhouse_resource_provider_metadata():
|
|
7
|
+
"""Validate ClickHouse resource provider metadata."""
|
|
8
|
+
|
|
9
|
+
provider = ClickHouseResourceProvider()
|
|
10
|
+
metadata = provider.metadata
|
|
11
|
+
|
|
12
|
+
assert metadata.name == "clickhouse"
|
|
13
|
+
assert metadata.version == "0.1.0"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_clickhouse_resource_provider_get_resources():
|
|
17
|
+
"""Validate ClickHouse resource provider returns resources."""
|
|
18
|
+
|
|
19
|
+
provider = ClickHouseResourceProvider()
|
|
20
|
+
resources = provider.get_resources()
|
|
21
|
+
|
|
22
|
+
assert len(resources) == 1
|
|
23
|
+
assert resources[0].name == "clickhouse"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_clickhouse_resource_provider_get_table_stores():
|
|
27
|
+
"""Validate ClickHouse resource provider returns table store specs."""
|
|
28
|
+
|
|
29
|
+
provider = ClickHouseResourceProvider()
|
|
30
|
+
table_stores = provider.get_table_stores()
|
|
31
|
+
|
|
32
|
+
assert len(table_stores) == 1
|
|
33
|
+
assert table_stores[0].name == "clickhouse"
|
|
34
|
+
assert table_stores[0].support.supports_snapshots is False
|
|
35
|
+
assert table_stores[0].support.supports_schema_evolution is True
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_clickhouse_resource_provider_get_query_engines():
|
|
39
|
+
"""Validate ClickHouse resource provider returns query engine specs."""
|
|
40
|
+
|
|
41
|
+
provider = ClickHouseResourceProvider()
|
|
42
|
+
query_engines = provider.get_query_engines()
|
|
43
|
+
|
|
44
|
+
assert len(query_engines) == 1
|
|
45
|
+
assert query_engines[0].name == "clickhouse"
|
|
46
|
+
assert query_engines[0].metadata["service_type"] == "ClickHouse"
|
|
47
|
+
assert query_engines[0].support.supports_snapshots is False
|
|
48
|
+
assert query_engines[0].support.supports_time_travel is False
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_clickhouse_resource_provider_get_publish_targets():
|
|
52
|
+
"""Validate ClickHouse resource provider returns publish target specs."""
|
|
53
|
+
|
|
54
|
+
provider = ClickHouseResourceProvider()
|
|
55
|
+
publish_targets = provider.get_publish_targets()
|
|
56
|
+
|
|
57
|
+
assert len(publish_targets) == 1
|
|
58
|
+
assert publish_targets[0].name == "clickhouse"
|
|
59
|
+
assert publish_targets[0].metadata["target_system"] == "clickhouse"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Tests for ClickHouse service plugin."""
|
|
2
|
+
|
|
3
|
+
from phlo_clickhouse.plugin import ClickHouseServicePlugin
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_clickhouse_service_definition():
|
|
7
|
+
"""Validate ClickHouse service definition fields."""
|
|
8
|
+
|
|
9
|
+
plugin = ClickHouseServicePlugin()
|
|
10
|
+
service_definition = plugin.service_definition
|
|
11
|
+
|
|
12
|
+
assert service_definition["name"] == "clickhouse"
|
|
13
|
+
assert service_definition["category"] == "data"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_clickhouse_service_metadata():
|
|
17
|
+
"""Validate ClickHouse service plugin metadata."""
|
|
18
|
+
|
|
19
|
+
plugin = ClickHouseServicePlugin()
|
|
20
|
+
metadata = plugin.metadata
|
|
21
|
+
|
|
22
|
+
assert metadata.name == "clickhouse"
|
|
23
|
+
assert metadata.version == "0.1.0"
|
|
24
|
+
assert "data" in metadata.tags
|
|
25
|
+
assert "query" in metadata.tags
|
|
26
|
+
assert "storage" in metadata.tags
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Tests for ClickHouse resource."""
|
|
2
|
+
|
|
3
|
+
from phlo_clickhouse.resource import CLICKHOUSE_QUERY_ENGINE_SUPPORT, ClickHouseResource
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_clickhouse_resource_defaults():
|
|
7
|
+
"""Validate ClickHouse resource default values."""
|
|
8
|
+
|
|
9
|
+
resource = ClickHouseResource()
|
|
10
|
+
|
|
11
|
+
assert resource.host is None
|
|
12
|
+
assert resource.port is None
|
|
13
|
+
assert resource.user is None
|
|
14
|
+
assert resource.password is None
|
|
15
|
+
assert resource.database is None
|
|
16
|
+
assert resource.secure is None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_clickhouse_resource_with_overrides():
|
|
20
|
+
"""Validate ClickHouse resource with override values."""
|
|
21
|
+
|
|
22
|
+
resource = ClickHouseResource(
|
|
23
|
+
host="my-clickhouse",
|
|
24
|
+
port=9000,
|
|
25
|
+
user="admin",
|
|
26
|
+
password="secret",
|
|
27
|
+
database="mydb",
|
|
28
|
+
secure=True,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
assert resource.host == "my-clickhouse"
|
|
32
|
+
assert resource.port == 9000
|
|
33
|
+
assert resource.user == "admin"
|
|
34
|
+
assert resource.password == "secret"
|
|
35
|
+
assert resource.database == "mydb"
|
|
36
|
+
assert resource.secure is True
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_clickhouse_query_engine_support():
|
|
40
|
+
"""Validate ClickHouse query engine support flags."""
|
|
41
|
+
|
|
42
|
+
assert CLICKHOUSE_QUERY_ENGINE_SUPPORT.supports_snapshots is False
|
|
43
|
+
assert CLICKHOUSE_QUERY_ENGINE_SUPPORT.supports_time_travel is False
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Tests for ClickHouse settings."""
|
|
2
|
+
|
|
3
|
+
from phlo_clickhouse.settings import ClickHouseSettings, get_settings
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_clickhouse_settings_defaults():
|
|
7
|
+
"""Validate ClickHouse settings default values."""
|
|
8
|
+
|
|
9
|
+
settings = ClickHouseSettings()
|
|
10
|
+
|
|
11
|
+
assert settings.clickhouse_host == "clickhouse"
|
|
12
|
+
assert settings.clickhouse_http_port == 8123
|
|
13
|
+
assert settings.clickhouse_native_port == 19000
|
|
14
|
+
assert settings.clickhouse_user == "default"
|
|
15
|
+
assert settings.clickhouse_password == ""
|
|
16
|
+
assert settings.clickhouse_db == "default"
|
|
17
|
+
assert settings.clickhouse_secure is False
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_clickhouse_settings_http_endpoint():
|
|
21
|
+
"""Validate ClickHouse HTTP endpoint generation."""
|
|
22
|
+
|
|
23
|
+
settings = ClickHouseSettings()
|
|
24
|
+
|
|
25
|
+
assert settings.clickhouse_http_endpoint() == "clickhouse:8123"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_clickhouse_settings_native_endpoint():
|
|
29
|
+
"""Validate ClickHouse native endpoint generation."""
|
|
30
|
+
|
|
31
|
+
settings = ClickHouseSettings()
|
|
32
|
+
|
|
33
|
+
assert settings.clickhouse_native_endpoint() == "clickhouse:19000"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_clickhouse_settings_with_overrides():
|
|
37
|
+
"""Validate ClickHouse settings with override values."""
|
|
38
|
+
|
|
39
|
+
settings = ClickHouseSettings(
|
|
40
|
+
clickhouse_host="my-host",
|
|
41
|
+
clickhouse_http_port=9000,
|
|
42
|
+
clickhouse_native_port=9001,
|
|
43
|
+
clickhouse_user="admin",
|
|
44
|
+
clickhouse_password="secret",
|
|
45
|
+
clickhouse_db="mydb",
|
|
46
|
+
clickhouse_secure=True,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
assert settings.clickhouse_host == "my-host"
|
|
50
|
+
assert settings.clickhouse_http_port == 9000
|
|
51
|
+
assert settings.clickhouse_native_port == 9001
|
|
52
|
+
assert settings.clickhouse_user == "admin"
|
|
53
|
+
assert settings.clickhouse_password == "secret"
|
|
54
|
+
assert settings.clickhouse_db == "mydb"
|
|
55
|
+
assert settings.clickhouse_secure is True
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_get_settings_returns_cached():
|
|
59
|
+
"""Validate that get_settings returns cached instance."""
|
|
60
|
+
|
|
61
|
+
settings1 = get_settings()
|
|
62
|
+
settings2 = get_settings()
|
|
63
|
+
|
|
64
|
+
assert settings1 is settings2
|