chqce 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chqce/__init__.py +1 -0
- chqce/cli.py +163 -0
- chqce/connection.py +55 -0
- chqce/errors.py +87 -0
- chqce/estimator.py +125 -0
- chqce/formatter.py +227 -0
- chqce/suggestions.py +156 -0
- chqce-0.1.0.dist-info/METADATA +223 -0
- chqce-0.1.0.dist-info/RECORD +13 -0
- chqce-0.1.0.dist-info/WHEEL +5 -0
- chqce-0.1.0.dist-info/entry_points.txt +2 -0
- chqce-0.1.0.dist-info/licenses/LICENSE +21 -0
- chqce-0.1.0.dist-info/top_level.txt +1 -0
chqce/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
chqce/cli.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
|
|
6
|
+
from . import __version__
|
|
7
|
+
from .connection import create_client, test_connection
|
|
8
|
+
from .estimator import QueryEstimator
|
|
9
|
+
from .formatter import console, print_header, print_result
|
|
10
|
+
from .suggestions import get_index_suggestions
|
|
11
|
+
|
|
12
|
+
_err = Console(stderr=True)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _collect_interactive() -> str:
|
|
16
|
+
"""Collect a multi-line SQL query from stdin.
|
|
17
|
+
|
|
18
|
+
Submit by ending a line with ';' or typing GO on its own line.
|
|
19
|
+
"""
|
|
20
|
+
console.print(
|
|
21
|
+
"\n[dim]Paste or type your SQL query."
|
|
22
|
+
" End with [bold];[/bold] or type [bold]GO[/bold] on its own line."
|
|
23
|
+
" [bold]Ctrl+C[/bold] to exit.[/dim]\n"
|
|
24
|
+
)
|
|
25
|
+
lines: list[str] = []
|
|
26
|
+
try:
|
|
27
|
+
while True:
|
|
28
|
+
prefix = "[bold cyan]SQL>[/bold cyan] " if not lines else " [dim]>[/dim] "
|
|
29
|
+
console.print(prefix, end="")
|
|
30
|
+
try:
|
|
31
|
+
line = input()
|
|
32
|
+
except EOFError:
|
|
33
|
+
break
|
|
34
|
+
stripped = line.strip()
|
|
35
|
+
if stripped.upper() == "GO" or stripped == ";":
|
|
36
|
+
if lines:
|
|
37
|
+
break
|
|
38
|
+
continue
|
|
39
|
+
lines.append(line)
|
|
40
|
+
if stripped.endswith(";"):
|
|
41
|
+
break
|
|
42
|
+
except KeyboardInterrupt:
|
|
43
|
+
console.print("\n[dim]Bye![/dim]")
|
|
44
|
+
sys.exit(0)
|
|
45
|
+
|
|
46
|
+
return "\n".join(lines).strip()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _resolve_query(query: str | None, file: str | None) -> str | None:
|
|
50
|
+
"""Determine the query source, in priority order.
|
|
51
|
+
|
|
52
|
+
1. --file FILE read the query from a file (best for huge queries)
|
|
53
|
+
2. QUERY argument passed directly on the command line
|
|
54
|
+
3. piped stdin e.g. `chqce < query.sql` or `cat q.sql | chqce`
|
|
55
|
+
4. None -> caller falls back to interactive mode
|
|
56
|
+
"""
|
|
57
|
+
if file:
|
|
58
|
+
with open(file, "r", encoding="utf-8") as fh:
|
|
59
|
+
return fh.read().strip()
|
|
60
|
+
if query:
|
|
61
|
+
return query
|
|
62
|
+
# Query piped in on stdin (non-interactive).
|
|
63
|
+
if not sys.stdin.isatty():
|
|
64
|
+
data = sys.stdin.read().strip()
|
|
65
|
+
if data:
|
|
66
|
+
return data
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _run(query: str, estimator: QueryEstimator, client, database: str, execute: bool) -> None:
|
|
71
|
+
with console.status("[bold green]Analyzing…[/bold green]", spinner="dots"):
|
|
72
|
+
result = estimator.estimate(query, execute=execute)
|
|
73
|
+
suggestions = get_index_suggestions(query, client, current_database=database)
|
|
74
|
+
print_result(result, suggestions)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@click.command(context_settings={"help_option_names": ["-h", "--help"]})
|
|
78
|
+
@click.argument("query", required=False)
|
|
79
|
+
@click.option("--file", "-f", "file", type=click.Path(exists=True, dir_okay=False),
|
|
80
|
+
default=None, help="Read the query from a file (best for huge queries)")
|
|
81
|
+
@click.option("--host", "-H", default="localhost", envvar="CLICKHOUSE_HOST",
|
|
82
|
+
show_default=True, help="ClickHouse host")
|
|
83
|
+
@click.option("--port", "-p", default=8123, envvar="CLICKHOUSE_PORT", type=int,
|
|
84
|
+
show_default=True, help="HTTP(S) port")
|
|
85
|
+
@click.option("--user", "-u", default="default", envvar="CLICKHOUSE_USER",
|
|
86
|
+
show_default=True, help="Username")
|
|
87
|
+
@click.option("--password", "-P", default="", envvar="CLICKHOUSE_PASSWORD",
|
|
88
|
+
help="Password (or set CLICKHOUSE_PASSWORD)")
|
|
89
|
+
@click.option("--database", "-d", default="default", envvar="CLICKHOUSE_DATABASE",
|
|
90
|
+
show_default=True, help="Default database")
|
|
91
|
+
@click.option("--max-query-size", default=0, type=int, metavar="BYTES",
|
|
92
|
+
help="Raise ClickHouse max_query_size for very large queries "
|
|
93
|
+
"(server default is 262144)")
|
|
94
|
+
@click.option("--timeout", "-t", default=0, type=int, metavar="SECONDS",
|
|
95
|
+
help="Server-side max_execution_time; query is aborted after this "
|
|
96
|
+
"many seconds (0 = unlimited)")
|
|
97
|
+
@click.option("--max-ast-elements", default=0, type=int, metavar="N",
|
|
98
|
+
help="Raise ClickHouse max_ast_elements for queries that fail with "
|
|
99
|
+
"'AST is too big' (server default is 50000)")
|
|
100
|
+
@click.option("--no-execute", is_flag=True, default=False,
|
|
101
|
+
help="Estimate only — do not actually run the query")
|
|
102
|
+
@click.version_option(__version__, "-V", "--version")
|
|
103
|
+
def cli(query, file, host, port, user, password, database, max_query_size,
|
|
104
|
+
timeout, max_ast_elements, no_execute):
|
|
105
|
+
"""ClickHouse Query Cost Estimator.
|
|
106
|
+
|
|
107
|
+
Estimates rows scanned, memory usage, and execution time for a ClickHouse
|
|
108
|
+
SQL query, and suggests indexes based on WHERE-clause columns.
|
|
109
|
+
|
|
110
|
+
\b
|
|
111
|
+
The query can come from (in priority order):
|
|
112
|
+
• --file query.sql best for huge / multi-line queries
|
|
113
|
+
• a QUERY argument chqce "SELECT ..."
|
|
114
|
+
• piped stdin chqce < query.sql
|
|
115
|
+
• interactive prompt run with no query at all
|
|
116
|
+
|
|
117
|
+
\b
|
|
118
|
+
Environment variables (override defaults):
|
|
119
|
+
CLICKHOUSE_HOST, CLICKHOUSE_PORT, CLICKHOUSE_USER,
|
|
120
|
+
CLICKHOUSE_PASSWORD, CLICKHOUSE_DATABASE
|
|
121
|
+
|
|
122
|
+
\b
|
|
123
|
+
Examples:
|
|
124
|
+
chqce "SELECT count() FROM hits WHERE EventDate = today()"
|
|
125
|
+
chqce -f report.sql --no-execute
|
|
126
|
+
chqce -t 600 "SELECT ... a slow query ..."
|
|
127
|
+
cat report.sql | chqce --max-query-size 1048576 --max-ast-elements 500000
|
|
128
|
+
"""
|
|
129
|
+
try:
|
|
130
|
+
resolved = _resolve_query(query, file)
|
|
131
|
+
except OSError as e:
|
|
132
|
+
_err.print(f"[red]Could not read query file:[/red] {e}")
|
|
133
|
+
sys.exit(1)
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
client = create_client(host=host, port=port, user=user,
|
|
137
|
+
password=password, database=database,
|
|
138
|
+
max_query_size=max_query_size,
|
|
139
|
+
max_execution_time=timeout,
|
|
140
|
+
max_ast_elements=max_ast_elements)
|
|
141
|
+
ok, version_or_err = test_connection(client)
|
|
142
|
+
except Exception as e:
|
|
143
|
+
_err.print(f"[red]Connection error:[/red] {e}")
|
|
144
|
+
sys.exit(1)
|
|
145
|
+
|
|
146
|
+
if not ok:
|
|
147
|
+
_err.print(f"[red]Connection failed:[/red] {version_or_err}")
|
|
148
|
+
sys.exit(1)
|
|
149
|
+
|
|
150
|
+
print_header(version_or_err, host, port, database)
|
|
151
|
+
|
|
152
|
+
estimator = QueryEstimator(client)
|
|
153
|
+
execute = not no_execute
|
|
154
|
+
|
|
155
|
+
if resolved:
|
|
156
|
+
_run(resolved, estimator, client, database, execute)
|
|
157
|
+
else:
|
|
158
|
+
while True:
|
|
159
|
+
q = _collect_interactive()
|
|
160
|
+
if not q:
|
|
161
|
+
continue
|
|
162
|
+
_run(q, estimator, client, database, execute)
|
|
163
|
+
console.print("[dim]" + "─" * 60 + "[/dim]")
|
chqce/connection.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import clickhouse_connect
|
|
2
|
+
from clickhouse_connect.driver import Client
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
DEFAULT_SOCKET_TIMEOUT = 300
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create_client(
|
|
9
|
+
host: str = "localhost",
|
|
10
|
+
port: int = 8123,
|
|
11
|
+
user: str = "default",
|
|
12
|
+
password: str = "",
|
|
13
|
+
database: str = "default",
|
|
14
|
+
max_query_size: int = 0,
|
|
15
|
+
max_execution_time: int = 0,
|
|
16
|
+
max_ast_elements: int = 0,
|
|
17
|
+
) -> Client:
|
|
18
|
+
# Per-session ClickHouse settings, only sent when the caller overrides them.
|
|
19
|
+
settings = {}
|
|
20
|
+
if max_query_size > 0:
|
|
21
|
+
# ClickHouse rejects queries larger than max_query_size (256 KiB default).
|
|
22
|
+
settings["max_query_size"] = max_query_size
|
|
23
|
+
if max_execution_time > 0:
|
|
24
|
+
# Server aborts the query after this many seconds (0 = unlimited).
|
|
25
|
+
settings["max_execution_time"] = max_execution_time
|
|
26
|
+
if max_ast_elements > 0:
|
|
27
|
+
# Raises the limit on parsed-query size (huge IN-lists, deep nesting).
|
|
28
|
+
settings["max_ast_elements"] = max_ast_elements
|
|
29
|
+
settings["max_expanded_ast_elements"] = max_ast_elements
|
|
30
|
+
|
|
31
|
+
# Keep the client socket alive a bit longer than the server-side limit so
|
|
32
|
+
# ClickHouse returns a clean timeout error instead of the socket dropping.
|
|
33
|
+
socket_timeout = DEFAULT_SOCKET_TIMEOUT
|
|
34
|
+
if max_execution_time > 0:
|
|
35
|
+
socket_timeout = max_execution_time + 30
|
|
36
|
+
|
|
37
|
+
return clickhouse_connect.get_client(
|
|
38
|
+
host=host,
|
|
39
|
+
port=port,
|
|
40
|
+
username=user,
|
|
41
|
+
password=password,
|
|
42
|
+
database=database,
|
|
43
|
+
connect_timeout=10,
|
|
44
|
+
send_receive_timeout=socket_timeout,
|
|
45
|
+
settings=settings,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_connection(client: Client) -> tuple[bool, str]:
|
|
50
|
+
try:
|
|
51
|
+
result = client.query("SELECT version()")
|
|
52
|
+
version = result.result_rows[0][0]
|
|
53
|
+
return True, version
|
|
54
|
+
except Exception as e:
|
|
55
|
+
return False, str(e)
|
chqce/errors.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Classify ClickHouse / client errors into actionable hints.
|
|
2
|
+
|
|
3
|
+
ClickHouse surfaces resource limits as server errors (timeout, AST too big,
|
|
4
|
+
memory, query size). We map the raw message to a short category and a hint
|
|
5
|
+
that tells the user which flag or setting can get them unstuck.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class ClassifiedError:
|
|
14
|
+
category: str # machine-readable bucket
|
|
15
|
+
title: str # short human label
|
|
16
|
+
hint: Optional[str] # actionable suggestion, or None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Each rule: (category, title, list-of-substrings-to-match, hint)
|
|
20
|
+
# Substring match is case-insensitive. Order matters — first match wins.
|
|
21
|
+
_RULES = [
|
|
22
|
+
(
|
|
23
|
+
"timeout",
|
|
24
|
+
"Query timed out",
|
|
25
|
+
["timeout_exceeded", "timeout exceeded", "max_execution_time",
|
|
26
|
+
"timed out", "read timed out", "readtimeout"],
|
|
27
|
+
"The query exceeded its time budget. Try:\n"
|
|
28
|
+
" • raise the limit: --timeout 600 (seconds, 0 = unlimited)\n"
|
|
29
|
+
" • estimate without running: --no-execute\n"
|
|
30
|
+
" • narrow the query with a WHERE filter or LIMIT",
|
|
31
|
+
),
|
|
32
|
+
(
|
|
33
|
+
"ast_too_big",
|
|
34
|
+
"Query AST is too big",
|
|
35
|
+
["too_big_ast", "ast is too big", "max_ast_elements",
|
|
36
|
+
"max_expanded_ast_elements"],
|
|
37
|
+
"The parsed query has too many elements (often huge IN-lists or "
|
|
38
|
+
"deeply nested expressions). Try:\n"
|
|
39
|
+
" • raise the limit: --max-ast-elements 500000\n"
|
|
40
|
+
" • replace a long IN (1, 2, 3, …) with a subquery or a "
|
|
41
|
+
"temporary table / JOIN",
|
|
42
|
+
),
|
|
43
|
+
(
|
|
44
|
+
"parser_depth",
|
|
45
|
+
"Query nesting is too deep",
|
|
46
|
+
["too_deep_recursion", "maximum parse depth", "max_parser_depth"],
|
|
47
|
+
"The query nests deeper than the parser allows. Try:\n"
|
|
48
|
+
" • flatten deeply nested subqueries or boolean expressions\n"
|
|
49
|
+
" • raise the server setting max_parser_depth",
|
|
50
|
+
),
|
|
51
|
+
(
|
|
52
|
+
"query_size",
|
|
53
|
+
"Query text is too large",
|
|
54
|
+
["max query size exceeded", "max_query_size"],
|
|
55
|
+
"The raw query exceeds ClickHouse's max_query_size (256 KiB default). "
|
|
56
|
+
"Try:\n"
|
|
57
|
+
" • raise the limit: --max-query-size 1048576 (bytes)",
|
|
58
|
+
),
|
|
59
|
+
(
|
|
60
|
+
"memory",
|
|
61
|
+
"Query ran out of memory",
|
|
62
|
+
["memory_limit_exceeded", "memory limit", "max_memory_usage"],
|
|
63
|
+
"The query exceeded the memory budget. Try:\n"
|
|
64
|
+
" • add a WHERE filter or LIMIT to scan less data\n"
|
|
65
|
+
" • pre-aggregate, or raise the server setting max_memory_usage",
|
|
66
|
+
),
|
|
67
|
+
(
|
|
68
|
+
"read_limit",
|
|
69
|
+
"Query scans too much data",
|
|
70
|
+
["too_many_rows", "max_rows_to_read", "too_many_bytes",
|
|
71
|
+
"max_bytes_to_read"],
|
|
72
|
+
"The query would read more rows/bytes than allowed. Try:\n"
|
|
73
|
+
" • add a WHERE filter on the table's ORDER BY columns\n"
|
|
74
|
+
" • check the Index Suggestions below",
|
|
75
|
+
),
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def classify_error(message: Optional[str]) -> Optional[ClassifiedError]:
|
|
80
|
+
"""Return a ClassifiedError for a known failure, or None if unrecognized."""
|
|
81
|
+
if not message:
|
|
82
|
+
return None
|
|
83
|
+
low = message.lower()
|
|
84
|
+
for category, title, needles, hint in _RULES:
|
|
85
|
+
if any(n in low for n in needles):
|
|
86
|
+
return ClassifiedError(category=category, title=title, hint=hint)
|
|
87
|
+
return None
|
chqce/estimator.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Optional, List
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class TableEstimate:
|
|
8
|
+
database: str
|
|
9
|
+
table: str
|
|
10
|
+
parts: int
|
|
11
|
+
rows: int
|
|
12
|
+
marks: int
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class EstimateResult:
|
|
17
|
+
query: str
|
|
18
|
+
|
|
19
|
+
# EXPLAIN ESTIMATE results
|
|
20
|
+
table_estimates: List[TableEstimate] = field(default_factory=list)
|
|
21
|
+
total_rows: int = 0
|
|
22
|
+
total_parts: int = 0
|
|
23
|
+
total_marks: int = 0
|
|
24
|
+
|
|
25
|
+
# EXPLAIN PLAN
|
|
26
|
+
query_plan: str = ""
|
|
27
|
+
|
|
28
|
+
# Timing (milliseconds)
|
|
29
|
+
explain_time_ms: float = 0.0 # time to run EXPLAIN (analysis + planning)
|
|
30
|
+
execution_time_ms: float = 0.0 # wall-clock time for full execution
|
|
31
|
+
server_time_ms: float = 0.0 # server-side time reported by ClickHouse
|
|
32
|
+
|
|
33
|
+
# Execution stats
|
|
34
|
+
read_rows: int = 0
|
|
35
|
+
read_bytes: int = 0
|
|
36
|
+
result_rows: int = 0
|
|
37
|
+
result_bytes: int = 0
|
|
38
|
+
memory_usage_bytes: int = 0
|
|
39
|
+
|
|
40
|
+
was_executed: bool = False
|
|
41
|
+
|
|
42
|
+
# Errors (non-fatal — other steps still run)
|
|
43
|
+
explain_error: Optional[str] = None
|
|
44
|
+
execution_error: Optional[str] = None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class QueryEstimator:
|
|
48
|
+
def __init__(self, client):
|
|
49
|
+
self.client = client
|
|
50
|
+
|
|
51
|
+
@staticmethod
|
|
52
|
+
def _is_select(query: str) -> bool:
|
|
53
|
+
first = query.strip().split()[0].upper() if query.strip() else ""
|
|
54
|
+
return first in ("SELECT", "WITH")
|
|
55
|
+
|
|
56
|
+
def estimate(self, query: str, execute: bool = True) -> EstimateResult:
|
|
57
|
+
result = EstimateResult(query=query)
|
|
58
|
+
is_select = self._is_select(query)
|
|
59
|
+
|
|
60
|
+
# Step 1: EXPLAIN ESTIMATE — rows/parts/marks per table
|
|
61
|
+
if is_select:
|
|
62
|
+
try:
|
|
63
|
+
t0 = time.perf_counter()
|
|
64
|
+
er = self.client.query(f"EXPLAIN ESTIMATE {query}")
|
|
65
|
+
result.explain_time_ms = (time.perf_counter() - t0) * 1000
|
|
66
|
+
|
|
67
|
+
for row in er.result_rows:
|
|
68
|
+
te = TableEstimate(
|
|
69
|
+
database=str(row[0]),
|
|
70
|
+
table=str(row[1]),
|
|
71
|
+
parts=int(row[2]),
|
|
72
|
+
rows=int(row[3]),
|
|
73
|
+
marks=int(row[4]),
|
|
74
|
+
)
|
|
75
|
+
result.table_estimates.append(te)
|
|
76
|
+
result.total_rows += te.rows
|
|
77
|
+
result.total_parts += te.parts
|
|
78
|
+
result.total_marks += te.marks
|
|
79
|
+
except Exception as e:
|
|
80
|
+
result.explain_error = str(e)
|
|
81
|
+
|
|
82
|
+
# Step 2: EXPLAIN PLAN — human-readable execution plan
|
|
83
|
+
if is_select:
|
|
84
|
+
try:
|
|
85
|
+
pr = self.client.query(f"EXPLAIN PLAN {query}")
|
|
86
|
+
result.query_plan = "\n".join(str(row[0]) for row in pr.result_rows)
|
|
87
|
+
except Exception:
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
# Step 3: Execute and collect real stats
|
|
91
|
+
if execute:
|
|
92
|
+
try:
|
|
93
|
+
t0 = time.perf_counter()
|
|
94
|
+
xr = self.client.query(query)
|
|
95
|
+
result.execution_time_ms = (time.perf_counter() - t0) * 1000
|
|
96
|
+
result.was_executed = True
|
|
97
|
+
result.result_rows = len(xr.result_rows)
|
|
98
|
+
|
|
99
|
+
summary = xr.summary or {}
|
|
100
|
+
result.read_rows = int(summary.get("read_rows", 0))
|
|
101
|
+
result.read_bytes = int(summary.get("read_bytes", 0))
|
|
102
|
+
result.result_bytes = int(summary.get("result_bytes", 0))
|
|
103
|
+
elapsed_ns = int(summary.get("elapsed_ns", 0))
|
|
104
|
+
if elapsed_ns:
|
|
105
|
+
result.server_time_ms = elapsed_ns / 1_000_000
|
|
106
|
+
|
|
107
|
+
# Memory usage lives in query_log; give the flush a moment
|
|
108
|
+
query_id = getattr(xr, "query_id", None)
|
|
109
|
+
if query_id:
|
|
110
|
+
time.sleep(0.05)
|
|
111
|
+
try:
|
|
112
|
+
mem_res = self.client.query(
|
|
113
|
+
"SELECT memory_usage FROM system.query_log "
|
|
114
|
+
"WHERE type = 'QueryFinish' AND query_id = {qid:String} LIMIT 1",
|
|
115
|
+
parameters={"qid": query_id},
|
|
116
|
+
)
|
|
117
|
+
if mem_res.result_rows:
|
|
118
|
+
result.memory_usage_bytes = int(mem_res.result_rows[0][0])
|
|
119
|
+
except Exception:
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
except Exception as e:
|
|
123
|
+
result.execution_error = str(e)
|
|
124
|
+
|
|
125
|
+
return result
|
chqce/formatter.py
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from rich import box
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
from rich.syntax import Syntax
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
|
|
9
|
+
from .errors import classify_error
|
|
10
|
+
from .estimator import EstimateResult
|
|
11
|
+
from .suggestions import IndexSuggestion
|
|
12
|
+
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# ── helpers ─────────────────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
def _fmt_bytes(n: int) -> str:
|
|
19
|
+
if n <= 0:
|
|
20
|
+
return "—"
|
|
21
|
+
for unit, threshold in (("GB", 1 << 30), ("MB", 1 << 20), ("KB", 1 << 10)):
|
|
22
|
+
if n >= threshold:
|
|
23
|
+
return f"{n / threshold:.1f} {unit}"
|
|
24
|
+
return f"{n} B"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _fmt_rows(n: int) -> str:
|
|
28
|
+
if n >= 1_000_000_000:
|
|
29
|
+
return f"{n / 1_000_000_000:.1f}B"
|
|
30
|
+
if n >= 1_000_000:
|
|
31
|
+
return f"{n / 1_000_000:.1f}M"
|
|
32
|
+
if n >= 1_000:
|
|
33
|
+
return f"{n / 1_000:.1f}K"
|
|
34
|
+
return str(n)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _fmt_ms(ms: float) -> str:
|
|
38
|
+
if ms <= 0:
|
|
39
|
+
return "—"
|
|
40
|
+
if ms >= 1_000:
|
|
41
|
+
return f"{ms / 1_000:.2f} s"
|
|
42
|
+
if ms >= 1:
|
|
43
|
+
return f"{ms:.1f} ms"
|
|
44
|
+
return f"{ms * 1_000:.0f} µs"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ── public API ───────────────────────────────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
def print_header(version: str, host: str, port: int, database: str) -> None:
|
|
50
|
+
console.print()
|
|
51
|
+
console.print(
|
|
52
|
+
Panel(
|
|
53
|
+
f"[bold cyan]ClickHouse Query Cost Estimator[/bold cyan] [dim]v0.1.0[/dim]\n"
|
|
54
|
+
f"[dim]Connected to [green]{host}:{port}[/green]"
|
|
55
|
+
f" · database: [green]{database}[/green]"
|
|
56
|
+
f" · ClickHouse [green]{version}[/green][/dim]",
|
|
57
|
+
box=box.ROUNDED,
|
|
58
|
+
border_style="cyan",
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def print_result(result: EstimateResult, suggestions: List[IndexSuggestion]) -> None:
|
|
64
|
+
console.print()
|
|
65
|
+
|
|
66
|
+
# ── Query display ────────────────────────────────────────────────────────
|
|
67
|
+
# Truncate the echo for huge queries so results stay visible.
|
|
68
|
+
QUERY_ECHO_LINES = 30
|
|
69
|
+
q = result.query.strip()
|
|
70
|
+
q_lines = q.split("\n")
|
|
71
|
+
if len(q_lines) > QUERY_ECHO_LINES:
|
|
72
|
+
head = "\n".join(q_lines[:QUERY_ECHO_LINES])
|
|
73
|
+
body = Syntax(head, "sql", theme="monokai")
|
|
74
|
+
title = (
|
|
75
|
+
f"[bold]Query[/bold] "
|
|
76
|
+
f"[dim](showing {QUERY_ECHO_LINES} of {len(q_lines)} lines, "
|
|
77
|
+
f"{len(q):,} chars)[/dim]"
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
body = Syntax(q, "sql", theme="monokai")
|
|
81
|
+
title = "[bold]Query[/bold]"
|
|
82
|
+
console.print(Panel(body, title=title, border_style="blue"))
|
|
83
|
+
console.print()
|
|
84
|
+
|
|
85
|
+
# ── Errors ───────────────────────────────────────────────────────────────
|
|
86
|
+
if result.explain_error:
|
|
87
|
+
_print_error("⚠ Estimate unavailable", result.explain_error,
|
|
88
|
+
style="yellow", border="yellow")
|
|
89
|
+
if result.execution_error:
|
|
90
|
+
_print_error("✗ Execution error", result.execution_error,
|
|
91
|
+
style="red", border="red")
|
|
92
|
+
_print_index_suggestions(suggestions)
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
# ── Cost estimate ────────────────────────────────────────────────────────
|
|
96
|
+
if result.table_estimates:
|
|
97
|
+
t = Table(
|
|
98
|
+
title="[bold]Cost Estimate [dim](from EXPLAIN ESTIMATE)[/dim][/bold]",
|
|
99
|
+
box=box.SIMPLE_HEAD,
|
|
100
|
+
header_style="bold magenta",
|
|
101
|
+
)
|
|
102
|
+
t.add_column("Database", style="cyan")
|
|
103
|
+
t.add_column("Table", style="cyan")
|
|
104
|
+
t.add_column("Parts", justify="right")
|
|
105
|
+
t.add_column("Est. Rows", justify="right", style="yellow")
|
|
106
|
+
t.add_column("Marks", justify="right")
|
|
107
|
+
|
|
108
|
+
for te in result.table_estimates:
|
|
109
|
+
t.add_row(te.database, te.table, str(te.parts), _fmt_rows(te.rows), str(te.marks))
|
|
110
|
+
|
|
111
|
+
if len(result.table_estimates) > 1:
|
|
112
|
+
t.add_section()
|
|
113
|
+
t.add_row(
|
|
114
|
+
"[bold]Total[/bold]",
|
|
115
|
+
"",
|
|
116
|
+
f"[bold]{result.total_parts}[/bold]",
|
|
117
|
+
f"[bold yellow]{_fmt_rows(result.total_rows)}[/bold yellow]",
|
|
118
|
+
f"[bold]{result.total_marks}[/bold]",
|
|
119
|
+
)
|
|
120
|
+
console.print(t)
|
|
121
|
+
console.print()
|
|
122
|
+
|
|
123
|
+
# ── Timing ───────────────────────────────────────────────────────────────
|
|
124
|
+
t = Table(
|
|
125
|
+
title="[bold]Timing[/bold]",
|
|
126
|
+
box=box.SIMPLE_HEAD,
|
|
127
|
+
header_style="bold magenta",
|
|
128
|
+
)
|
|
129
|
+
t.add_column("Phase", style="cyan")
|
|
130
|
+
t.add_column("Time", justify="right", style="green")
|
|
131
|
+
t.add_column("Notes", style="dim")
|
|
132
|
+
|
|
133
|
+
if result.explain_time_ms > 0:
|
|
134
|
+
t.add_row(
|
|
135
|
+
"SQL Analyzer (EXPLAIN)",
|
|
136
|
+
_fmt_ms(result.explain_time_ms),
|
|
137
|
+
"parsing + plan generation",
|
|
138
|
+
)
|
|
139
|
+
if result.was_executed:
|
|
140
|
+
t.add_row(
|
|
141
|
+
"Execution (client)",
|
|
142
|
+
_fmt_ms(result.execution_time_ms),
|
|
143
|
+
"wall-clock including network",
|
|
144
|
+
)
|
|
145
|
+
if result.server_time_ms > 0:
|
|
146
|
+
t.add_row(
|
|
147
|
+
"Execution (server)",
|
|
148
|
+
_fmt_ms(result.server_time_ms),
|
|
149
|
+
"server-side only",
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
console.print(t)
|
|
153
|
+
console.print()
|
|
154
|
+
|
|
155
|
+
# ── Execution stats ───────────────────────────────────────────────────────
|
|
156
|
+
if result.was_executed:
|
|
157
|
+
t = Table(
|
|
158
|
+
title="[bold]Execution Stats[/bold]",
|
|
159
|
+
box=box.SIMPLE_HEAD,
|
|
160
|
+
header_style="bold magenta",
|
|
161
|
+
)
|
|
162
|
+
t.add_column("Metric", style="cyan")
|
|
163
|
+
t.add_column("Value", justify="right", style="green")
|
|
164
|
+
|
|
165
|
+
t.add_row("Rows read", _fmt_rows(result.read_rows))
|
|
166
|
+
t.add_row("Bytes read", _fmt_bytes(result.read_bytes))
|
|
167
|
+
t.add_row("Result rows", _fmt_rows(result.result_rows))
|
|
168
|
+
t.add_row("Result size", _fmt_bytes(result.result_bytes))
|
|
169
|
+
t.add_row("Peak memory", _fmt_bytes(result.memory_usage_bytes))
|
|
170
|
+
|
|
171
|
+
console.print(t)
|
|
172
|
+
console.print()
|
|
173
|
+
|
|
174
|
+
# ── Query plan ────────────────────────────────────────────────────────────
|
|
175
|
+
if result.query_plan:
|
|
176
|
+
lines = result.query_plan.split("\n")
|
|
177
|
+
body = "\n".join(lines[:40])
|
|
178
|
+
if len(lines) > 40:
|
|
179
|
+
body += f"\n[dim]… {len(lines) - 40} more lines[/dim]"
|
|
180
|
+
console.print(
|
|
181
|
+
Panel(body, title="[bold]Query Plan[/bold]", border_style="dim", expand=False)
|
|
182
|
+
)
|
|
183
|
+
console.print()
|
|
184
|
+
|
|
185
|
+
# ── Index suggestions ─────────────────────────────────────────────────────
|
|
186
|
+
_print_index_suggestions(suggestions)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _print_error(label: str, message: str, style: str, border: str) -> None:
|
|
190
|
+
"""Render an error with a classified, actionable hint when we recognize it."""
|
|
191
|
+
classified = classify_error(message)
|
|
192
|
+
msg = message.strip()
|
|
193
|
+
body = f"[{style}]{label}[/{style}]"
|
|
194
|
+
if classified:
|
|
195
|
+
body += f" [bold]{classified.title}[/bold]"
|
|
196
|
+
body += f"\n[dim]{msg}[/dim]"
|
|
197
|
+
if classified and classified.hint:
|
|
198
|
+
body += f"\n\n[bold]Suggestions[/bold]\n{classified.hint}"
|
|
199
|
+
console.print(Panel(body, border_style=border, expand=False))
|
|
200
|
+
console.print()
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _print_index_suggestions(suggestions: List[IndexSuggestion]) -> None:
|
|
204
|
+
if not suggestions:
|
|
205
|
+
console.print("[dim]No index suggestions — add a WHERE clause to get recommendations.[/dim]")
|
|
206
|
+
console.print()
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
console.rule("[bold]Index Suggestions[/bold]", style="magenta")
|
|
210
|
+
console.print()
|
|
211
|
+
|
|
212
|
+
for s in suggestions:
|
|
213
|
+
if s.in_primary_key:
|
|
214
|
+
console.print(
|
|
215
|
+
f" [green]✓[/green] [bold]{s.column}[/bold]"
|
|
216
|
+
f" [dim]on {s.table}[/dim] — {s.reason}"
|
|
217
|
+
)
|
|
218
|
+
else:
|
|
219
|
+
console.print(
|
|
220
|
+
f" [yellow]⚠[/yellow] [bold]{s.column}[/bold]"
|
|
221
|
+
f" [dim]on {s.table}[/dim] — {s.reason}"
|
|
222
|
+
)
|
|
223
|
+
if s.suggestion:
|
|
224
|
+
console.print(
|
|
225
|
+
Syntax(s.suggestion, "sql", theme="monokai", padding=(0, 4))
|
|
226
|
+
)
|
|
227
|
+
console.print()
|
chqce/suggestions.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import List, Set
|
|
3
|
+
|
|
4
|
+
import sqlglot
|
|
5
|
+
import sqlglot.expressions as exp
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class IndexSuggestion:
|
|
10
|
+
table: str
|
|
11
|
+
column: str
|
|
12
|
+
reason: str
|
|
13
|
+
suggestion: str
|
|
14
|
+
in_primary_key: bool = False
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _parse_query(query: str):
|
|
18
|
+
"""Return (tables, where_cols) — tolerates parse failures."""
|
|
19
|
+
try:
|
|
20
|
+
tree = sqlglot.parse_one(query, read="clickhouse")
|
|
21
|
+
except Exception:
|
|
22
|
+
try:
|
|
23
|
+
tree = sqlglot.parse_one(query)
|
|
24
|
+
except Exception:
|
|
25
|
+
return {}, {}
|
|
26
|
+
|
|
27
|
+
# alias/name -> (database, table_name)
|
|
28
|
+
tables: dict[str, tuple[str, str]] = {}
|
|
29
|
+
for node in tree.find_all(exp.Table):
|
|
30
|
+
if not node.name:
|
|
31
|
+
continue
|
|
32
|
+
alias = (node.alias or node.name).lower()
|
|
33
|
+
tables[alias] = (node.db or "", node.name)
|
|
34
|
+
|
|
35
|
+
# column_name -> set of condition kinds
|
|
36
|
+
where_cols: dict[str, set] = {}
|
|
37
|
+
|
|
38
|
+
def _add(col_node: exp.Column, kind: str):
|
|
39
|
+
if col_node.name:
|
|
40
|
+
where_cols.setdefault(col_node.name.lower(), set()).add(kind)
|
|
41
|
+
|
|
42
|
+
where = tree.find(exp.Where)
|
|
43
|
+
if where:
|
|
44
|
+
for node in where.find_all(exp.EQ):
|
|
45
|
+
for c in node.find_all(exp.Column):
|
|
46
|
+
_add(c, "equality")
|
|
47
|
+
for node in where.find_all(exp.Between):
|
|
48
|
+
for c in node.find_all(exp.Column):
|
|
49
|
+
_add(c, "range")
|
|
50
|
+
for node in where.find_all(exp.LT, exp.LTE, exp.GT, exp.GTE):
|
|
51
|
+
for c in node.find_all(exp.Column):
|
|
52
|
+
_add(c, "range")
|
|
53
|
+
for node in where.find_all(exp.Like, exp.ILike):
|
|
54
|
+
for c in node.find_all(exp.Column):
|
|
55
|
+
_add(c, "like")
|
|
56
|
+
for node in where.find_all(exp.In):
|
|
57
|
+
for c in node.find_all(exp.Column):
|
|
58
|
+
_add(c, "in")
|
|
59
|
+
|
|
60
|
+
return tables, where_cols
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _get_order_by_cols(client, database: str, table: str) -> List[str]:
|
|
64
|
+
try:
|
|
65
|
+
res = client.query(
|
|
66
|
+
"SELECT sorting_key, primary_key FROM system.tables "
|
|
67
|
+
"WHERE database = {db:String} AND name = {t:String}",
|
|
68
|
+
parameters={"db": database, "t": table},
|
|
69
|
+
)
|
|
70
|
+
if not res.result_rows:
|
|
71
|
+
return []
|
|
72
|
+
sorting_key, primary_key = res.result_rows[0]
|
|
73
|
+
key = sorting_key or primary_key
|
|
74
|
+
if key:
|
|
75
|
+
return [c.strip() for c in key.split(",") if c.strip()]
|
|
76
|
+
except Exception:
|
|
77
|
+
pass
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _get_col_type(client, database: str, table: str, column: str) -> str:
|
|
82
|
+
try:
|
|
83
|
+
res = client.query(
|
|
84
|
+
"SELECT type FROM system.columns "
|
|
85
|
+
"WHERE database={db:String} AND table={t:String} AND name={c:String}",
|
|
86
|
+
parameters={"db": database, "t": table, "c": column},
|
|
87
|
+
)
|
|
88
|
+
if res.result_rows:
|
|
89
|
+
return res.result_rows[0][0]
|
|
90
|
+
except Exception:
|
|
91
|
+
pass
|
|
92
|
+
return ""
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _best_index_type(col_type: str, conditions: Set[str]) -> str:
|
|
96
|
+
ct = col_type.lower()
|
|
97
|
+
if "like" in conditions:
|
|
98
|
+
return "tokenbf_v1(32768, 3, 0)"
|
|
99
|
+
if "range" in conditions:
|
|
100
|
+
return "minmax"
|
|
101
|
+
if "string" in ct or "fixedstring" in ct:
|
|
102
|
+
return "bloom_filter(0.01)"
|
|
103
|
+
if any(x in ct for x in ("int", "uint", "float", "decimal", "date", "datetime")):
|
|
104
|
+
return "set(100)" if "in" in conditions or "equality" in conditions else "minmax"
|
|
105
|
+
return "bloom_filter(0.01)"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_index_suggestions(
|
|
109
|
+
query: str, client, current_database: str = "default"
|
|
110
|
+
) -> List[IndexSuggestion]:
|
|
111
|
+
tables, where_cols = _parse_query(query)
|
|
112
|
+
if not where_cols or not tables:
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
suggestions: List[IndexSuggestion] = []
|
|
116
|
+
|
|
117
|
+
for _alias, (db, table_name) in tables.items():
|
|
118
|
+
effective_db = db or current_database
|
|
119
|
+
pk_cols = _get_order_by_cols(client, effective_db, table_name)
|
|
120
|
+
pk_lower = {c.lower() for c in pk_cols}
|
|
121
|
+
full_table = f"{effective_db}.{table_name}" if effective_db else table_name
|
|
122
|
+
|
|
123
|
+
for col_name, conditions in where_cols.items():
|
|
124
|
+
in_pk = col_name in pk_lower
|
|
125
|
+
|
|
126
|
+
if in_pk:
|
|
127
|
+
suggestions.append(
|
|
128
|
+
IndexSuggestion(
|
|
129
|
+
table=full_table,
|
|
130
|
+
column=col_name,
|
|
131
|
+
reason=f"already in ORDER BY {pk_cols}",
|
|
132
|
+
suggestion="",
|
|
133
|
+
in_primary_key=True,
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
else:
|
|
137
|
+
col_type = _get_col_type(client, effective_db, table_name, col_name)
|
|
138
|
+
idx_type = _best_index_type(col_type, conditions)
|
|
139
|
+
idx_name = f"idx_{table_name}_{col_name}"
|
|
140
|
+
alter_sql = (
|
|
141
|
+
f"ALTER TABLE {full_table}\n"
|
|
142
|
+
f" ADD INDEX {idx_name} {col_name}\n"
|
|
143
|
+
f" TYPE {idx_type} GRANULARITY 4;"
|
|
144
|
+
)
|
|
145
|
+
pk_label = pk_cols if pk_cols else ["(unknown)"]
|
|
146
|
+
suggestions.append(
|
|
147
|
+
IndexSuggestion(
|
|
148
|
+
table=full_table,
|
|
149
|
+
column=col_name,
|
|
150
|
+
reason=f"not in ORDER BY {pk_label}",
|
|
151
|
+
suggestion=alter_sql,
|
|
152
|
+
in_primary_key=False,
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return suggestions
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chqce
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: ClickHouse Query Cost Estimator CLI
|
|
5
|
+
Author-email: Ahmad Darwich <darw.ahmad@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator
|
|
8
|
+
Project-URL: Repository, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator
|
|
9
|
+
Project-URL: Issues, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/issues
|
|
10
|
+
Keywords: clickhouse,sql,cli,query,cost,performance,explain
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Database
|
|
21
|
+
Classifier: Topic :: Utilities
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: clickhouse-connect>=0.7.0
|
|
26
|
+
Requires-Dist: rich>=13.0.0
|
|
27
|
+
Requires-Dist: click>=8.1.0
|
|
28
|
+
Requires-Dist: sqlglot>=20.0.0
|
|
29
|
+
Provides-Extra: test
|
|
30
|
+
Requires-Dist: pytest>=7.0; extra == "test"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
|
|
33
|
+
# clickhouse-query-cost-estimator
|
|
34
|
+
|
|
35
|
+
[](https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/actions/workflows/ci.yml)
|
|
36
|
+
[](https://pypi.org/project/chqce/)
|
|
37
|
+
[](https://pypi.org/project/chqce/)
|
|
38
|
+
[](LICENSE)
|
|
39
|
+
|
|
40
|
+
A terminal CLI that estimates the cost of a ClickHouse SQL query **before you regret running it**, and helps you tune indexes afterwards.
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
╭──────────────────────────────────────────────────────────────╮
|
|
44
|
+
│ ClickHouse Query Cost Estimator v0.1.0 │
|
|
45
|
+
│ Connected to localhost:8123 · database: default · 23.8 │
|
|
46
|
+
╰──────────────────────────────────────────────────────────────╯
|
|
47
|
+
|
|
48
|
+
Cost Estimate (from EXPLAIN ESTIMATE)
|
|
49
|
+
Database Table Parts Est. Rows Marks
|
|
50
|
+
default orders 12 4.5M 550
|
|
51
|
+
|
|
52
|
+
Timing
|
|
53
|
+
Phase Time Notes
|
|
54
|
+
SQL Analyzer (EXPLAIN) 2.1 ms parsing + plan generation
|
|
55
|
+
Execution (client) 234.5 ms wall-clock including network
|
|
56
|
+
Execution (server) 228.3 ms server-side only
|
|
57
|
+
|
|
58
|
+
Execution Stats
|
|
59
|
+
Rows read 4.5M Bytes read 1.2 GB
|
|
60
|
+
Result rows 18.2K Peak memory 45.6 MB
|
|
61
|
+
|
|
62
|
+
── Index Suggestions ──────────────────────────────────────────
|
|
63
|
+
✓ created_at — already in ORDER BY
|
|
64
|
+
⚠ user_id — not in ORDER BY
|
|
65
|
+
ALTER TABLE default.orders
|
|
66
|
+
ADD INDEX idx_orders_user_id user_id
|
|
67
|
+
TYPE bloom_filter(0.01) GRANULARITY 4;
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## What it tells you
|
|
71
|
+
|
|
72
|
+
| Metric | Source |
|
|
73
|
+
|---|---|
|
|
74
|
+
| **Estimated rows / parts / marks** | `EXPLAIN ESTIMATE` |
|
|
75
|
+
| **SQL analyzer time** | time to run `EXPLAIN PLAN` (parsing + planning) |
|
|
76
|
+
| **Execution time (client)** | wall-clock including network round-trip |
|
|
77
|
+
| **Execution time (server)** | `elapsed_ns` from `X-ClickHouse-Summary` header |
|
|
78
|
+
| **Rows / bytes read, peak memory** | `system.query_log` after execution |
|
|
79
|
+
| **Index suggestions** | `system.tables` ORDER BY vs WHERE columns |
|
|
80
|
+
|
|
81
|
+
## Installation
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
pip install chqce
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Or, for local development:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pip install -e .
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Usage
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
# Analyze a single query
|
|
97
|
+
chqce "SELECT count() FROM hits WHERE EventDate = today()"
|
|
98
|
+
|
|
99
|
+
# Interactive mode — paste any query, then type ; or GO to submit
|
|
100
|
+
chqce
|
|
101
|
+
|
|
102
|
+
# Custom connection
|
|
103
|
+
chqce --host my.ch.host --port 9123 --user admin --database analytics \
|
|
104
|
+
"SELECT count() FROM events WHERE user_id = 42"
|
|
105
|
+
|
|
106
|
+
# Estimate only — skip execution (safe for expensive/destructive queries)
|
|
107
|
+
chqce --no-execute "SELECT * FROM huge_table WHERE x > 0"
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Large queries
|
|
111
|
+
|
|
112
|
+
For big, multi-line queries (hundreds or thousands of lines) you don't want to
|
|
113
|
+
wrestle with shell quoting. Read the query from a file or pipe it in instead:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
# From a file — the cleanest option for huge queries
|
|
117
|
+
chqce -f report.sql
|
|
118
|
+
|
|
119
|
+
# Piped via stdin
|
|
120
|
+
cat report.sql | chqce
|
|
121
|
+
chqce < report.sql
|
|
122
|
+
|
|
123
|
+
# If ClickHouse rejects it with a max_query_size error, raise the limit
|
|
124
|
+
chqce -f report.sql --max-query-size 1048576 # 1 MiB
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
The query source is resolved in this priority order:
|
|
128
|
+
|
|
129
|
+
1. `--file` / `-f` — read from a file
|
|
130
|
+
2. `QUERY` argument — passed on the command line
|
|
131
|
+
3. piped **stdin** — when input isn't a terminal
|
|
132
|
+
4. interactive prompt — when nothing else is provided
|
|
133
|
+
|
|
134
|
+
The echoed query is truncated to the first 30 lines in the output, so a large
|
|
135
|
+
query never buries the results.
|
|
136
|
+
|
|
137
|
+
### Timeouts and resource limits
|
|
138
|
+
|
|
139
|
+
Heavy queries can hit server-side limits. The tool catches these, reports them
|
|
140
|
+
clearly, and tells you which flag gets you unstuck:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
# Abort the query after 5 minutes instead of waiting indefinitely
|
|
144
|
+
chqce -t 300 "SELECT ... a slow aggregation ..."
|
|
145
|
+
|
|
146
|
+
# Fix "AST is too big" (e.g. a giant IN (...) list)
|
|
147
|
+
chqce -f report.sql --max-ast-elements 500000
|
|
148
|
+
|
|
149
|
+
# Fix "Max query size exceeded"
|
|
150
|
+
chqce -f report.sql --max-query-size 1048576 # 1 MiB
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
When a query fails, the error is classified and shown with suggestions. For
|
|
154
|
+
example, a timeout renders as:
|
|
155
|
+
|
|
156
|
+
```
|
|
157
|
+
╭──────────────────────────────────────────────────────────────╮
|
|
158
|
+
│ ✗ Execution error Query timed out │
|
|
159
|
+
│ Code: 159. DB::Exception: Timeout exceeded: elapsed 30 ... │
|
|
160
|
+
│ │
|
|
161
|
+
│ Suggestions │
|
|
162
|
+
│ The query exceeded its time budget. Try: │
|
|
163
|
+
│ • raise the limit: --timeout 600 (seconds, 0 = unlimited)│
|
|
164
|
+
│ • estimate without running: --no-execute │
|
|
165
|
+
│ • narrow the query with a WHERE filter or LIMIT │
|
|
166
|
+
╰──────────────────────────────────────────────────────────────╯
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Recognized failures: **timeout**, **AST too big**, **parser depth**,
|
|
170
|
+
**query size**, **memory limit**, and **read-row/byte limits**.
|
|
171
|
+
|
|
172
|
+
## Options
|
|
173
|
+
|
|
174
|
+
| Flag | Env var | Default | Description |
|
|
175
|
+
|---|---|---|---|
|
|
176
|
+
| `--file` / `-f` | — | — | Read the query from a file (best for huge queries) |
|
|
177
|
+
| `--host` / `-H` | `CLICKHOUSE_HOST` | `localhost` | ClickHouse host |
|
|
178
|
+
| `--port` / `-p` | `CLICKHOUSE_PORT` | `8123` | HTTP port |
|
|
179
|
+
| `--user` / `-u` | `CLICKHOUSE_USER` | `default` | Username |
|
|
180
|
+
| `--password` / `-P` | `CLICKHOUSE_PASSWORD` | _(empty)_ | Password |
|
|
181
|
+
| `--database` / `-d` | `CLICKHOUSE_DATABASE` | `default` | Default database |
|
|
182
|
+
| `--timeout` / `-t` | — | `0` _(unlimited)_ | Server-side `max_execution_time` in seconds |
|
|
183
|
+
| `--max-query-size` | — | _(server default 262144)_ | Raise ClickHouse `max_query_size` for very large queries |
|
|
184
|
+
| `--max-ast-elements` | — | _(server default 50000)_ | Raise ClickHouse `max_ast_elements` for queries with huge ASTs |
|
|
185
|
+
| `--no-execute` | — | `false` | Skip actual execution; estimate only |
|
|
186
|
+
|
|
187
|
+
## Interactive mode
|
|
188
|
+
|
|
189
|
+
Type or paste a multi-line query, then submit by:
|
|
190
|
+
- Ending the last line with `;`
|
|
191
|
+
- Typing `GO` on its own line
|
|
192
|
+
|
|
193
|
+
Press **Ctrl+C** to exit.
|
|
194
|
+
|
|
195
|
+
## How index suggestions work
|
|
196
|
+
|
|
197
|
+
1. The query is parsed with [sqlglot](https://github.com/tobymao/sqlglot) to extract WHERE-clause columns and condition types (equality, range, LIKE, IN).
|
|
198
|
+
2. Each referenced table's `sorting_key` is fetched from `system.tables`.
|
|
199
|
+
3. Columns not covered by the sort key get a skip-index `ALTER TABLE` suggestion, with the type chosen by condition and column type:
|
|
200
|
+
|
|
201
|
+
| Condition | Column type | Suggested index |
|
|
202
|
+
|---|---|---|
|
|
203
|
+
| `LIKE` / `ILIKE` | any | `tokenbf_v1(32768, 3, 0)` |
|
|
204
|
+
| `>` / `<` / `BETWEEN` | any | `minmax` |
|
|
205
|
+
| `=` / `IN` | String | `bloom_filter(0.01)` |
|
|
206
|
+
| `=` / `IN` | numeric / date | `set(100)` |
|
|
207
|
+
|
|
208
|
+
## Requirements
|
|
209
|
+
|
|
210
|
+
- Python ≥ 3.10
|
|
211
|
+
- ClickHouse with HTTP interface enabled (default port 8123)
|
|
212
|
+
|
|
213
|
+
## Development
|
|
214
|
+
|
|
215
|
+
Run the test suite (no ClickHouse server required — tests use a fake client):
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
pip install -e ".[test]" # or: pip install -r requirements-dev.txt
|
|
219
|
+
pytest
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
The tests live in `tests/` and cover error classification, the estimator,
|
|
223
|
+
index suggestions, output formatting, connection settings, and the CLI.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
chqce/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
|
|
2
|
+
chqce/cli.py,sha256=s3PjZsVvpIEXslKxH6rCLEsYImrkwO7ndZgFzvKGZVs,6435
|
|
3
|
+
chqce/connection.py,sha256=qMYLLV3HReHvBkcNgMDA8MzE4yQp9okFWOZveWYK_-g,1810
|
|
4
|
+
chqce/errors.py,sha256=SDw4NPGYRyk58iyaIy6ARN0Zoh2i0zpWcyinJqOjADY,3332
|
|
5
|
+
chqce/estimator.py,sha256=cB3yHXJ8uqDVQOgh_sFnMeAGp0aPYQLe1W6i6I9AOTI,4290
|
|
6
|
+
chqce/formatter.py,sha256=LChQHR2atkawtGqDwiH4dicAasV_zQRN-yD2L9tf5x8,8872
|
|
7
|
+
chqce/suggestions.py,sha256=lb6yhjXUAHSUDqMYPeemqEDInnrxzrn2UThBT46gx14,5246
|
|
8
|
+
chqce-0.1.0.dist-info/licenses/LICENSE,sha256=WzwMceuO8oWaxTqWZNRJFRIcXS4AJIInPqiba2XtI_Y,1070
|
|
9
|
+
chqce-0.1.0.dist-info/METADATA,sha256=Xj__fCoc3mQJXuCgPrse00vGCMNRO1PxjlFam8UlyTk,9014
|
|
10
|
+
chqce-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
11
|
+
chqce-0.1.0.dist-info/entry_points.txt,sha256=0RJ1jIhBZ9T0YTXHkKQP9mPowWEo9aD7SbwYEDdUf2Y,40
|
|
12
|
+
chqce-0.1.0.dist-info/top_level.txt,sha256=BMw-L4xkLh5Frcmnczp6pMbuhzlhoPL7L8rB0BfQXWs,6
|
|
13
|
+
chqce-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ahmad Darwich
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
chqce
|