chqce 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chqce/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
chqce/cli.py ADDED
@@ -0,0 +1,163 @@
1
+ import sys
2
+
3
+ import click
4
+ from rich.console import Console
5
+
6
+ from . import __version__
7
+ from .connection import create_client, test_connection
8
+ from .estimator import QueryEstimator
9
+ from .formatter import console, print_header, print_result
10
+ from .suggestions import get_index_suggestions
11
+
12
+ _err = Console(stderr=True)
13
+
14
+
15
+ def _collect_interactive() -> str:
16
+ """Collect a multi-line SQL query from stdin.
17
+
18
+ Submit by ending a line with ';' or typing GO on its own line.
19
+ """
20
+ console.print(
21
+ "\n[dim]Paste or type your SQL query."
22
+ " End with [bold];[/bold] or type [bold]GO[/bold] on its own line."
23
+ " [bold]Ctrl+C[/bold] to exit.[/dim]\n"
24
+ )
25
+ lines: list[str] = []
26
+ try:
27
+ while True:
28
+ prefix = "[bold cyan]SQL>[/bold cyan] " if not lines else " [dim]>[/dim] "
29
+ console.print(prefix, end="")
30
+ try:
31
+ line = input()
32
+ except EOFError:
33
+ break
34
+ stripped = line.strip()
35
+ if stripped.upper() == "GO" or stripped == ";":
36
+ if lines:
37
+ break
38
+ continue
39
+ lines.append(line)
40
+ if stripped.endswith(";"):
41
+ break
42
+ except KeyboardInterrupt:
43
+ console.print("\n[dim]Bye![/dim]")
44
+ sys.exit(0)
45
+
46
+ return "\n".join(lines).strip()
47
+
48
+
49
+ def _resolve_query(query: str | None, file: str | None) -> str | None:
50
+ """Determine the query source, in priority order.
51
+
52
+ 1. --file FILE read the query from a file (best for huge queries)
53
+ 2. QUERY argument passed directly on the command line
54
+ 3. piped stdin e.g. `chqce < query.sql` or `cat q.sql | chqce`
55
+ 4. None -> caller falls back to interactive mode
56
+ """
57
+ if file:
58
+ with open(file, "r", encoding="utf-8") as fh:
59
+ return fh.read().strip()
60
+ if query:
61
+ return query
62
+ # Query piped in on stdin (non-interactive).
63
+ if not sys.stdin.isatty():
64
+ data = sys.stdin.read().strip()
65
+ if data:
66
+ return data
67
+ return None
68
+
69
+
70
+ def _run(query: str, estimator: QueryEstimator, client, database: str, execute: bool) -> None:
71
+ with console.status("[bold green]Analyzing…[/bold green]", spinner="dots"):
72
+ result = estimator.estimate(query, execute=execute)
73
+ suggestions = get_index_suggestions(query, client, current_database=database)
74
+ print_result(result, suggestions)
75
+
76
+
77
+ @click.command(context_settings={"help_option_names": ["-h", "--help"]})
78
+ @click.argument("query", required=False)
79
+ @click.option("--file", "-f", "file", type=click.Path(exists=True, dir_okay=False),
80
+ default=None, help="Read the query from a file (best for huge queries)")
81
+ @click.option("--host", "-H", default="localhost", envvar="CLICKHOUSE_HOST",
82
+ show_default=True, help="ClickHouse host")
83
+ @click.option("--port", "-p", default=8123, envvar="CLICKHOUSE_PORT", type=int,
84
+ show_default=True, help="HTTP(S) port")
85
+ @click.option("--user", "-u", default="default", envvar="CLICKHOUSE_USER",
86
+ show_default=True, help="Username")
87
+ @click.option("--password", "-P", default="", envvar="CLICKHOUSE_PASSWORD",
88
+ help="Password (or set CLICKHOUSE_PASSWORD)")
89
+ @click.option("--database", "-d", default="default", envvar="CLICKHOUSE_DATABASE",
90
+ show_default=True, help="Default database")
91
+ @click.option("--max-query-size", default=0, type=int, metavar="BYTES",
92
+ help="Raise ClickHouse max_query_size for very large queries "
93
+ "(server default is 262144)")
94
+ @click.option("--timeout", "-t", default=0, type=int, metavar="SECONDS",
95
+ help="Server-side max_execution_time; query is aborted after this "
96
+ "many seconds (0 = unlimited)")
97
+ @click.option("--max-ast-elements", default=0, type=int, metavar="N",
98
+ help="Raise ClickHouse max_ast_elements for queries that fail with "
99
+ "'AST is too big' (server default is 50000)")
100
+ @click.option("--no-execute", is_flag=True, default=False,
101
+ help="Estimate only — do not actually run the query")
102
+ @click.version_option(__version__, "-V", "--version")
103
+ def cli(query, file, host, port, user, password, database, max_query_size,
104
+ timeout, max_ast_elements, no_execute):
105
+ """ClickHouse Query Cost Estimator.
106
+
107
+ Estimates rows scanned, memory usage, and execution time for a ClickHouse
108
+ SQL query, and suggests indexes based on WHERE-clause columns.
109
+
110
+ \b
111
+ The query can come from (in priority order):
112
+ • --file query.sql best for huge / multi-line queries
113
+ • a QUERY argument chqce "SELECT ..."
114
+ • piped stdin chqce < query.sql
115
+ • interactive prompt run with no query at all
116
+
117
+ \b
118
+ Environment variables (override defaults):
119
+ CLICKHOUSE_HOST, CLICKHOUSE_PORT, CLICKHOUSE_USER,
120
+ CLICKHOUSE_PASSWORD, CLICKHOUSE_DATABASE
121
+
122
+ \b
123
+ Examples:
124
+ chqce "SELECT count() FROM hits WHERE EventDate = today()"
125
+ chqce -f report.sql --no-execute
126
+ chqce -t 600 "SELECT ... a slow query ..."
127
+ cat report.sql | chqce --max-query-size 1048576 --max-ast-elements 500000
128
+ """
129
+ try:
130
+ resolved = _resolve_query(query, file)
131
+ except OSError as e:
132
+ _err.print(f"[red]Could not read query file:[/red] {e}")
133
+ sys.exit(1)
134
+
135
+ try:
136
+ client = create_client(host=host, port=port, user=user,
137
+ password=password, database=database,
138
+ max_query_size=max_query_size,
139
+ max_execution_time=timeout,
140
+ max_ast_elements=max_ast_elements)
141
+ ok, version_or_err = test_connection(client)
142
+ except Exception as e:
143
+ _err.print(f"[red]Connection error:[/red] {e}")
144
+ sys.exit(1)
145
+
146
+ if not ok:
147
+ _err.print(f"[red]Connection failed:[/red] {version_or_err}")
148
+ sys.exit(1)
149
+
150
+ print_header(version_or_err, host, port, database)
151
+
152
+ estimator = QueryEstimator(client)
153
+ execute = not no_execute
154
+
155
+ if resolved:
156
+ _run(resolved, estimator, client, database, execute)
157
+ else:
158
+ while True:
159
+ q = _collect_interactive()
160
+ if not q:
161
+ continue
162
+ _run(q, estimator, client, database, execute)
163
+ console.print("[dim]" + "─" * 60 + "[/dim]")
chqce/connection.py ADDED
@@ -0,0 +1,55 @@
1
+ import clickhouse_connect
2
+ from clickhouse_connect.driver import Client
3
+
4
+
5
+ DEFAULT_SOCKET_TIMEOUT = 300
6
+
7
+
8
+ def create_client(
9
+ host: str = "localhost",
10
+ port: int = 8123,
11
+ user: str = "default",
12
+ password: str = "",
13
+ database: str = "default",
14
+ max_query_size: int = 0,
15
+ max_execution_time: int = 0,
16
+ max_ast_elements: int = 0,
17
+ ) -> Client:
18
+ # Per-session ClickHouse settings, only sent when the caller overrides them.
19
+ settings = {}
20
+ if max_query_size > 0:
21
+ # ClickHouse rejects queries larger than max_query_size (256 KiB default).
22
+ settings["max_query_size"] = max_query_size
23
+ if max_execution_time > 0:
24
+ # Server aborts the query after this many seconds (0 = unlimited).
25
+ settings["max_execution_time"] = max_execution_time
26
+ if max_ast_elements > 0:
27
+ # Raises the limit on parsed-query size (huge IN-lists, deep nesting).
28
+ settings["max_ast_elements"] = max_ast_elements
29
+ settings["max_expanded_ast_elements"] = max_ast_elements
30
+
31
+ # Keep the client socket alive a bit longer than the server-side limit so
32
+ # ClickHouse returns a clean timeout error instead of the socket dropping.
33
+ socket_timeout = DEFAULT_SOCKET_TIMEOUT
34
+ if max_execution_time > 0:
35
+ socket_timeout = max_execution_time + 30
36
+
37
+ return clickhouse_connect.get_client(
38
+ host=host,
39
+ port=port,
40
+ username=user,
41
+ password=password,
42
+ database=database,
43
+ connect_timeout=10,
44
+ send_receive_timeout=socket_timeout,
45
+ settings=settings,
46
+ )
47
+
48
+
49
+ def test_connection(client: Client) -> tuple[bool, str]:
50
+ try:
51
+ result = client.query("SELECT version()")
52
+ version = result.result_rows[0][0]
53
+ return True, version
54
+ except Exception as e:
55
+ return False, str(e)
chqce/errors.py ADDED
@@ -0,0 +1,87 @@
1
+ """Classify ClickHouse / client errors into actionable hints.
2
+
3
+ ClickHouse surfaces resource limits as server errors (timeout, AST too big,
4
+ memory, query size). We map the raw message to a short category and a hint
5
+ that tells the user which flag or setting can get them unstuck.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Optional
10
+
11
+
12
+ @dataclass
13
+ class ClassifiedError:
14
+ category: str # machine-readable bucket
15
+ title: str # short human label
16
+ hint: Optional[str] # actionable suggestion, or None
17
+
18
+
19
+ # Each rule: (category, title, list-of-substrings-to-match, hint)
20
+ # Substring match is case-insensitive. Order matters — first match wins.
21
+ _RULES = [
22
+ (
23
+ "timeout",
24
+ "Query timed out",
25
+ ["timeout_exceeded", "timeout exceeded", "max_execution_time",
26
+ "timed out", "read timed out", "readtimeout"],
27
+ "The query exceeded its time budget. Try:\n"
28
+ " • raise the limit: --timeout 600 (seconds, 0 = unlimited)\n"
29
+ " • estimate without running: --no-execute\n"
30
+ " • narrow the query with a WHERE filter or LIMIT",
31
+ ),
32
+ (
33
+ "ast_too_big",
34
+ "Query AST is too big",
35
+ ["too_big_ast", "ast is too big", "max_ast_elements",
36
+ "max_expanded_ast_elements"],
37
+ "The parsed query has too many elements (often huge IN-lists or "
38
+ "deeply nested expressions). Try:\n"
39
+ " • raise the limit: --max-ast-elements 500000\n"
40
+ " • replace a long IN (1, 2, 3, …) with a subquery or a "
41
+ "temporary table / JOIN",
42
+ ),
43
+ (
44
+ "parser_depth",
45
+ "Query nesting is too deep",
46
+ ["too_deep_recursion", "maximum parse depth", "max_parser_depth"],
47
+ "The query nests deeper than the parser allows. Try:\n"
48
+ " • flatten deeply nested subqueries or boolean expressions\n"
49
+ " • raise the server setting max_parser_depth",
50
+ ),
51
+ (
52
+ "query_size",
53
+ "Query text is too large",
54
+ ["max query size exceeded", "max_query_size"],
55
+ "The raw query exceeds ClickHouse's max_query_size (256 KiB default). "
56
+ "Try:\n"
57
+ " • raise the limit: --max-query-size 1048576 (bytes)",
58
+ ),
59
+ (
60
+ "memory",
61
+ "Query ran out of memory",
62
+ ["memory_limit_exceeded", "memory limit", "max_memory_usage"],
63
+ "The query exceeded the memory budget. Try:\n"
64
+ " • add a WHERE filter or LIMIT to scan less data\n"
65
+ " • pre-aggregate, or raise the server setting max_memory_usage",
66
+ ),
67
+ (
68
+ "read_limit",
69
+ "Query scans too much data",
70
+ ["too_many_rows", "max_rows_to_read", "too_many_bytes",
71
+ "max_bytes_to_read"],
72
+ "The query would read more rows/bytes than allowed. Try:\n"
73
+ " • add a WHERE filter on the table's ORDER BY columns\n"
74
+ " • check the Index Suggestions below",
75
+ ),
76
+ ]
77
+
78
+
79
+ def classify_error(message: Optional[str]) -> Optional[ClassifiedError]:
80
+ """Return a ClassifiedError for a known failure, or None if unrecognized."""
81
+ if not message:
82
+ return None
83
+ low = message.lower()
84
+ for category, title, needles, hint in _RULES:
85
+ if any(n in low for n in needles):
86
+ return ClassifiedError(category=category, title=title, hint=hint)
87
+ return None
chqce/estimator.py ADDED
@@ -0,0 +1,125 @@
1
+ import time
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional, List
4
+
5
+
6
+ @dataclass
7
+ class TableEstimate:
8
+ database: str
9
+ table: str
10
+ parts: int
11
+ rows: int
12
+ marks: int
13
+
14
+
15
+ @dataclass
16
+ class EstimateResult:
17
+ query: str
18
+
19
+ # EXPLAIN ESTIMATE results
20
+ table_estimates: List[TableEstimate] = field(default_factory=list)
21
+ total_rows: int = 0
22
+ total_parts: int = 0
23
+ total_marks: int = 0
24
+
25
+ # EXPLAIN PLAN
26
+ query_plan: str = ""
27
+
28
+ # Timing (milliseconds)
29
+ explain_time_ms: float = 0.0 # time to run EXPLAIN (analysis + planning)
30
+ execution_time_ms: float = 0.0 # wall-clock time for full execution
31
+ server_time_ms: float = 0.0 # server-side time reported by ClickHouse
32
+
33
+ # Execution stats
34
+ read_rows: int = 0
35
+ read_bytes: int = 0
36
+ result_rows: int = 0
37
+ result_bytes: int = 0
38
+ memory_usage_bytes: int = 0
39
+
40
+ was_executed: bool = False
41
+
42
+ # Errors (non-fatal — other steps still run)
43
+ explain_error: Optional[str] = None
44
+ execution_error: Optional[str] = None
45
+
46
+
47
+ class QueryEstimator:
48
+ def __init__(self, client):
49
+ self.client = client
50
+
51
+ @staticmethod
52
+ def _is_select(query: str) -> bool:
53
+ first = query.strip().split()[0].upper() if query.strip() else ""
54
+ return first in ("SELECT", "WITH")
55
+
56
+ def estimate(self, query: str, execute: bool = True) -> EstimateResult:
57
+ result = EstimateResult(query=query)
58
+ is_select = self._is_select(query)
59
+
60
+ # Step 1: EXPLAIN ESTIMATE — rows/parts/marks per table
61
+ if is_select:
62
+ try:
63
+ t0 = time.perf_counter()
64
+ er = self.client.query(f"EXPLAIN ESTIMATE {query}")
65
+ result.explain_time_ms = (time.perf_counter() - t0) * 1000
66
+
67
+ for row in er.result_rows:
68
+ te = TableEstimate(
69
+ database=str(row[0]),
70
+ table=str(row[1]),
71
+ parts=int(row[2]),
72
+ rows=int(row[3]),
73
+ marks=int(row[4]),
74
+ )
75
+ result.table_estimates.append(te)
76
+ result.total_rows += te.rows
77
+ result.total_parts += te.parts
78
+ result.total_marks += te.marks
79
+ except Exception as e:
80
+ result.explain_error = str(e)
81
+
82
+ # Step 2: EXPLAIN PLAN — human-readable execution plan
83
+ if is_select:
84
+ try:
85
+ pr = self.client.query(f"EXPLAIN PLAN {query}")
86
+ result.query_plan = "\n".join(str(row[0]) for row in pr.result_rows)
87
+ except Exception:
88
+ pass
89
+
90
+ # Step 3: Execute and collect real stats
91
+ if execute:
92
+ try:
93
+ t0 = time.perf_counter()
94
+ xr = self.client.query(query)
95
+ result.execution_time_ms = (time.perf_counter() - t0) * 1000
96
+ result.was_executed = True
97
+ result.result_rows = len(xr.result_rows)
98
+
99
+ summary = xr.summary or {}
100
+ result.read_rows = int(summary.get("read_rows", 0))
101
+ result.read_bytes = int(summary.get("read_bytes", 0))
102
+ result.result_bytes = int(summary.get("result_bytes", 0))
103
+ elapsed_ns = int(summary.get("elapsed_ns", 0))
104
+ if elapsed_ns:
105
+ result.server_time_ms = elapsed_ns / 1_000_000
106
+
107
+ # Memory usage lives in query_log; give the flush a moment
108
+ query_id = getattr(xr, "query_id", None)
109
+ if query_id:
110
+ time.sleep(0.05)
111
+ try:
112
+ mem_res = self.client.query(
113
+ "SELECT memory_usage FROM system.query_log "
114
+ "WHERE type = 'QueryFinish' AND query_id = {qid:String} LIMIT 1",
115
+ parameters={"qid": query_id},
116
+ )
117
+ if mem_res.result_rows:
118
+ result.memory_usage_bytes = int(mem_res.result_rows[0][0])
119
+ except Exception:
120
+ pass
121
+
122
+ except Exception as e:
123
+ result.execution_error = str(e)
124
+
125
+ return result
chqce/formatter.py ADDED
@@ -0,0 +1,227 @@
1
+ from typing import List
2
+
3
+ from rich import box
4
+ from rich.console import Console
5
+ from rich.panel import Panel
6
+ from rich.syntax import Syntax
7
+ from rich.table import Table
8
+
9
+ from .errors import classify_error
10
+ from .estimator import EstimateResult
11
+ from .suggestions import IndexSuggestion
12
+
13
+ console = Console()
14
+
15
+
16
+ # ── helpers ─────────────────────────────────────────────────────────────────
17
+
18
+ def _fmt_bytes(n: int) -> str:
19
+ if n <= 0:
20
+ return "—"
21
+ for unit, threshold in (("GB", 1 << 30), ("MB", 1 << 20), ("KB", 1 << 10)):
22
+ if n >= threshold:
23
+ return f"{n / threshold:.1f} {unit}"
24
+ return f"{n} B"
25
+
26
+
27
+ def _fmt_rows(n: int) -> str:
28
+ if n >= 1_000_000_000:
29
+ return f"{n / 1_000_000_000:.1f}B"
30
+ if n >= 1_000_000:
31
+ return f"{n / 1_000_000:.1f}M"
32
+ if n >= 1_000:
33
+ return f"{n / 1_000:.1f}K"
34
+ return str(n)
35
+
36
+
37
+ def _fmt_ms(ms: float) -> str:
38
+ if ms <= 0:
39
+ return "—"
40
+ if ms >= 1_000:
41
+ return f"{ms / 1_000:.2f} s"
42
+ if ms >= 1:
43
+ return f"{ms:.1f} ms"
44
+ return f"{ms * 1_000:.0f} µs"
45
+
46
+
47
+ # ── public API ───────────────────────────────────────────────────────────────
48
+
49
+ def print_header(version: str, host: str, port: int, database: str) -> None:
50
+ console.print()
51
+ console.print(
52
+ Panel(
53
+ f"[bold cyan]ClickHouse Query Cost Estimator[/bold cyan] [dim]v0.1.0[/dim]\n"
54
+ f"[dim]Connected to [green]{host}:{port}[/green]"
55
+ f" · database: [green]{database}[/green]"
56
+ f" · ClickHouse [green]{version}[/green][/dim]",
57
+ box=box.ROUNDED,
58
+ border_style="cyan",
59
+ )
60
+ )
61
+
62
+
63
+ def print_result(result: EstimateResult, suggestions: List[IndexSuggestion]) -> None:
64
+ console.print()
65
+
66
+ # ── Query display ────────────────────────────────────────────────────────
67
+ # Truncate the echo for huge queries so results stay visible.
68
+ QUERY_ECHO_LINES = 30
69
+ q = result.query.strip()
70
+ q_lines = q.split("\n")
71
+ if len(q_lines) > QUERY_ECHO_LINES:
72
+ head = "\n".join(q_lines[:QUERY_ECHO_LINES])
73
+ body = Syntax(head, "sql", theme="monokai")
74
+ title = (
75
+ f"[bold]Query[/bold] "
76
+ f"[dim](showing {QUERY_ECHO_LINES} of {len(q_lines)} lines, "
77
+ f"{len(q):,} chars)[/dim]"
78
+ )
79
+ else:
80
+ body = Syntax(q, "sql", theme="monokai")
81
+ title = "[bold]Query[/bold]"
82
+ console.print(Panel(body, title=title, border_style="blue"))
83
+ console.print()
84
+
85
+ # ── Errors ───────────────────────────────────────────────────────────────
86
+ if result.explain_error:
87
+ _print_error("⚠ Estimate unavailable", result.explain_error,
88
+ style="yellow", border="yellow")
89
+ if result.execution_error:
90
+ _print_error("✗ Execution error", result.execution_error,
91
+ style="red", border="red")
92
+ _print_index_suggestions(suggestions)
93
+ return
94
+
95
+ # ── Cost estimate ────────────────────────────────────────────────────────
96
+ if result.table_estimates:
97
+ t = Table(
98
+ title="[bold]Cost Estimate [dim](from EXPLAIN ESTIMATE)[/dim][/bold]",
99
+ box=box.SIMPLE_HEAD,
100
+ header_style="bold magenta",
101
+ )
102
+ t.add_column("Database", style="cyan")
103
+ t.add_column("Table", style="cyan")
104
+ t.add_column("Parts", justify="right")
105
+ t.add_column("Est. Rows", justify="right", style="yellow")
106
+ t.add_column("Marks", justify="right")
107
+
108
+ for te in result.table_estimates:
109
+ t.add_row(te.database, te.table, str(te.parts), _fmt_rows(te.rows), str(te.marks))
110
+
111
+ if len(result.table_estimates) > 1:
112
+ t.add_section()
113
+ t.add_row(
114
+ "[bold]Total[/bold]",
115
+ "",
116
+ f"[bold]{result.total_parts}[/bold]",
117
+ f"[bold yellow]{_fmt_rows(result.total_rows)}[/bold yellow]",
118
+ f"[bold]{result.total_marks}[/bold]",
119
+ )
120
+ console.print(t)
121
+ console.print()
122
+
123
+ # ── Timing ───────────────────────────────────────────────────────────────
124
+ t = Table(
125
+ title="[bold]Timing[/bold]",
126
+ box=box.SIMPLE_HEAD,
127
+ header_style="bold magenta",
128
+ )
129
+ t.add_column("Phase", style="cyan")
130
+ t.add_column("Time", justify="right", style="green")
131
+ t.add_column("Notes", style="dim")
132
+
133
+ if result.explain_time_ms > 0:
134
+ t.add_row(
135
+ "SQL Analyzer (EXPLAIN)",
136
+ _fmt_ms(result.explain_time_ms),
137
+ "parsing + plan generation",
138
+ )
139
+ if result.was_executed:
140
+ t.add_row(
141
+ "Execution (client)",
142
+ _fmt_ms(result.execution_time_ms),
143
+ "wall-clock including network",
144
+ )
145
+ if result.server_time_ms > 0:
146
+ t.add_row(
147
+ "Execution (server)",
148
+ _fmt_ms(result.server_time_ms),
149
+ "server-side only",
150
+ )
151
+
152
+ console.print(t)
153
+ console.print()
154
+
155
+ # ── Execution stats ───────────────────────────────────────────────────────
156
+ if result.was_executed:
157
+ t = Table(
158
+ title="[bold]Execution Stats[/bold]",
159
+ box=box.SIMPLE_HEAD,
160
+ header_style="bold magenta",
161
+ )
162
+ t.add_column("Metric", style="cyan")
163
+ t.add_column("Value", justify="right", style="green")
164
+
165
+ t.add_row("Rows read", _fmt_rows(result.read_rows))
166
+ t.add_row("Bytes read", _fmt_bytes(result.read_bytes))
167
+ t.add_row("Result rows", _fmt_rows(result.result_rows))
168
+ t.add_row("Result size", _fmt_bytes(result.result_bytes))
169
+ t.add_row("Peak memory", _fmt_bytes(result.memory_usage_bytes))
170
+
171
+ console.print(t)
172
+ console.print()
173
+
174
+ # ── Query plan ────────────────────────────────────────────────────────────
175
+ if result.query_plan:
176
+ lines = result.query_plan.split("\n")
177
+ body = "\n".join(lines[:40])
178
+ if len(lines) > 40:
179
+ body += f"\n[dim]… {len(lines) - 40} more lines[/dim]"
180
+ console.print(
181
+ Panel(body, title="[bold]Query Plan[/bold]", border_style="dim", expand=False)
182
+ )
183
+ console.print()
184
+
185
+ # ── Index suggestions ─────────────────────────────────────────────────────
186
+ _print_index_suggestions(suggestions)
187
+
188
+
189
+ def _print_error(label: str, message: str, style: str, border: str) -> None:
190
+ """Render an error with a classified, actionable hint when we recognize it."""
191
+ classified = classify_error(message)
192
+ msg = message.strip()
193
+ body = f"[{style}]{label}[/{style}]"
194
+ if classified:
195
+ body += f" [bold]{classified.title}[/bold]"
196
+ body += f"\n[dim]{msg}[/dim]"
197
+ if classified and classified.hint:
198
+ body += f"\n\n[bold]Suggestions[/bold]\n{classified.hint}"
199
+ console.print(Panel(body, border_style=border, expand=False))
200
+ console.print()
201
+
202
+
203
+ def _print_index_suggestions(suggestions: List[IndexSuggestion]) -> None:
204
+ if not suggestions:
205
+ console.print("[dim]No index suggestions — add a WHERE clause to get recommendations.[/dim]")
206
+ console.print()
207
+ return
208
+
209
+ console.rule("[bold]Index Suggestions[/bold]", style="magenta")
210
+ console.print()
211
+
212
+ for s in suggestions:
213
+ if s.in_primary_key:
214
+ console.print(
215
+ f" [green]✓[/green] [bold]{s.column}[/bold]"
216
+ f" [dim]on {s.table}[/dim] — {s.reason}"
217
+ )
218
+ else:
219
+ console.print(
220
+ f" [yellow]⚠[/yellow] [bold]{s.column}[/bold]"
221
+ f" [dim]on {s.table}[/dim] — {s.reason}"
222
+ )
223
+ if s.suggestion:
224
+ console.print(
225
+ Syntax(s.suggestion, "sql", theme="monokai", padding=(0, 4))
226
+ )
227
+ console.print()
chqce/suggestions.py ADDED
@@ -0,0 +1,156 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Set
3
+
4
+ import sqlglot
5
+ import sqlglot.expressions as exp
6
+
7
+
8
+ @dataclass
9
+ class IndexSuggestion:
10
+ table: str
11
+ column: str
12
+ reason: str
13
+ suggestion: str
14
+ in_primary_key: bool = False
15
+
16
+
17
+ def _parse_query(query: str):
18
+ """Return (tables, where_cols) — tolerates parse failures."""
19
+ try:
20
+ tree = sqlglot.parse_one(query, read="clickhouse")
21
+ except Exception:
22
+ try:
23
+ tree = sqlglot.parse_one(query)
24
+ except Exception:
25
+ return {}, {}
26
+
27
+ # alias/name -> (database, table_name)
28
+ tables: dict[str, tuple[str, str]] = {}
29
+ for node in tree.find_all(exp.Table):
30
+ if not node.name:
31
+ continue
32
+ alias = (node.alias or node.name).lower()
33
+ tables[alias] = (node.db or "", node.name)
34
+
35
+ # column_name -> set of condition kinds
36
+ where_cols: dict[str, set] = {}
37
+
38
+ def _add(col_node: exp.Column, kind: str):
39
+ if col_node.name:
40
+ where_cols.setdefault(col_node.name.lower(), set()).add(kind)
41
+
42
+ where = tree.find(exp.Where)
43
+ if where:
44
+ for node in where.find_all(exp.EQ):
45
+ for c in node.find_all(exp.Column):
46
+ _add(c, "equality")
47
+ for node in where.find_all(exp.Between):
48
+ for c in node.find_all(exp.Column):
49
+ _add(c, "range")
50
+ for node in where.find_all(exp.LT, exp.LTE, exp.GT, exp.GTE):
51
+ for c in node.find_all(exp.Column):
52
+ _add(c, "range")
53
+ for node in where.find_all(exp.Like, exp.ILike):
54
+ for c in node.find_all(exp.Column):
55
+ _add(c, "like")
56
+ for node in where.find_all(exp.In):
57
+ for c in node.find_all(exp.Column):
58
+ _add(c, "in")
59
+
60
+ return tables, where_cols
61
+
62
+
63
+ def _get_order_by_cols(client, database: str, table: str) -> List[str]:
64
+ try:
65
+ res = client.query(
66
+ "SELECT sorting_key, primary_key FROM system.tables "
67
+ "WHERE database = {db:String} AND name = {t:String}",
68
+ parameters={"db": database, "t": table},
69
+ )
70
+ if not res.result_rows:
71
+ return []
72
+ sorting_key, primary_key = res.result_rows[0]
73
+ key = sorting_key or primary_key
74
+ if key:
75
+ return [c.strip() for c in key.split(",") if c.strip()]
76
+ except Exception:
77
+ pass
78
+ return []
79
+
80
+
81
+ def _get_col_type(client, database: str, table: str, column: str) -> str:
82
+ try:
83
+ res = client.query(
84
+ "SELECT type FROM system.columns "
85
+ "WHERE database={db:String} AND table={t:String} AND name={c:String}",
86
+ parameters={"db": database, "t": table, "c": column},
87
+ )
88
+ if res.result_rows:
89
+ return res.result_rows[0][0]
90
+ except Exception:
91
+ pass
92
+ return ""
93
+
94
+
95
+ def _best_index_type(col_type: str, conditions: Set[str]) -> str:
96
+ ct = col_type.lower()
97
+ if "like" in conditions:
98
+ return "tokenbf_v1(32768, 3, 0)"
99
+ if "range" in conditions:
100
+ return "minmax"
101
+ if "string" in ct or "fixedstring" in ct:
102
+ return "bloom_filter(0.01)"
103
+ if any(x in ct for x in ("int", "uint", "float", "decimal", "date", "datetime")):
104
+ return "set(100)" if "in" in conditions or "equality" in conditions else "minmax"
105
+ return "bloom_filter(0.01)"
106
+
107
+
108
+ def get_index_suggestions(
109
+ query: str, client, current_database: str = "default"
110
+ ) -> List[IndexSuggestion]:
111
+ tables, where_cols = _parse_query(query)
112
+ if not where_cols or not tables:
113
+ return []
114
+
115
+ suggestions: List[IndexSuggestion] = []
116
+
117
+ for _alias, (db, table_name) in tables.items():
118
+ effective_db = db or current_database
119
+ pk_cols = _get_order_by_cols(client, effective_db, table_name)
120
+ pk_lower = {c.lower() for c in pk_cols}
121
+ full_table = f"{effective_db}.{table_name}" if effective_db else table_name
122
+
123
+ for col_name, conditions in where_cols.items():
124
+ in_pk = col_name in pk_lower
125
+
126
+ if in_pk:
127
+ suggestions.append(
128
+ IndexSuggestion(
129
+ table=full_table,
130
+ column=col_name,
131
+ reason=f"already in ORDER BY {pk_cols}",
132
+ suggestion="",
133
+ in_primary_key=True,
134
+ )
135
+ )
136
+ else:
137
+ col_type = _get_col_type(client, effective_db, table_name, col_name)
138
+ idx_type = _best_index_type(col_type, conditions)
139
+ idx_name = f"idx_{table_name}_{col_name}"
140
+ alter_sql = (
141
+ f"ALTER TABLE {full_table}\n"
142
+ f" ADD INDEX {idx_name} {col_name}\n"
143
+ f" TYPE {idx_type} GRANULARITY 4;"
144
+ )
145
+ pk_label = pk_cols if pk_cols else ["(unknown)"]
146
+ suggestions.append(
147
+ IndexSuggestion(
148
+ table=full_table,
149
+ column=col_name,
150
+ reason=f"not in ORDER BY {pk_label}",
151
+ suggestion=alter_sql,
152
+ in_primary_key=False,
153
+ )
154
+ )
155
+
156
+ return suggestions
@@ -0,0 +1,223 @@
1
+ Metadata-Version: 2.4
2
+ Name: chqce
3
+ Version: 0.1.0
4
+ Summary: ClickHouse Query Cost Estimator CLI
5
+ Author-email: Ahmad Darwich <darw.ahmad@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator
8
+ Project-URL: Repository, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator
9
+ Project-URL: Issues, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/issues
10
+ Keywords: clickhouse,sql,cli,query,cost,performance,explain
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Database
21
+ Classifier: Topic :: Utilities
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: clickhouse-connect>=0.7.0
26
+ Requires-Dist: rich>=13.0.0
27
+ Requires-Dist: click>=8.1.0
28
+ Requires-Dist: sqlglot>=20.0.0
29
+ Provides-Extra: test
30
+ Requires-Dist: pytest>=7.0; extra == "test"
31
+ Dynamic: license-file
32
+
33
+ # clickhouse-query-cost-estimator
34
+
35
+ [![CI](https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/actions/workflows/ci.yml/badge.svg)](https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/actions/workflows/ci.yml)
36
+ [![PyPI version](https://img.shields.io/pypi/v/chqce.svg)](https://pypi.org/project/chqce/)
37
+ [![Python versions](https://img.shields.io/pypi/pyversions/chqce.svg)](https://pypi.org/project/chqce/)
38
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
39
+
40
+ A terminal CLI that estimates the cost of a ClickHouse SQL query **before you regret running it**, and helps you tune indexes afterwards.
41
+
42
+ ```
43
+ ╭──────────────────────────────────────────────────────────────╮
44
+ │ ClickHouse Query Cost Estimator v0.1.0 │
45
+ │ Connected to localhost:8123 · database: default · 23.8 │
46
+ ╰──────────────────────────────────────────────────────────────╯
47
+
48
+ Cost Estimate (from EXPLAIN ESTIMATE)
49
+ Database Table Parts Est. Rows Marks
50
+ default orders 12 4.5M 550
51
+
52
+ Timing
53
+ Phase Time Notes
54
+ SQL Analyzer (EXPLAIN) 2.1 ms parsing + plan generation
55
+ Execution (client) 234.5 ms wall-clock including network
56
+ Execution (server) 228.3 ms server-side only
57
+
58
+ Execution Stats
59
+ Rows read 4.5M Bytes read 1.2 GB
60
+ Result rows 18.2K Peak memory 45.6 MB
61
+
62
+ ── Index Suggestions ──────────────────────────────────────────
63
+ ✓ created_at — already in ORDER BY
64
+ ⚠ user_id — not in ORDER BY
65
+ ALTER TABLE default.orders
66
+ ADD INDEX idx_orders_user_id user_id
67
+ TYPE bloom_filter(0.01) GRANULARITY 4;
68
+ ```
69
+
70
+ ## What it tells you
71
+
72
+ | Metric | Source |
73
+ |---|---|
74
+ | **Estimated rows / parts / marks** | `EXPLAIN ESTIMATE` |
75
+ | **SQL analyzer time** | time to run `EXPLAIN PLAN` (parsing + planning) |
76
+ | **Execution time (client)** | wall-clock including network round-trip |
77
+ | **Execution time (server)** | `elapsed_ns` from `X-ClickHouse-Summary` header |
78
+ | **Rows / bytes read, peak memory** | `system.query_log` after execution |
79
+ | **Index suggestions** | `system.tables` ORDER BY vs WHERE columns |
80
+
81
+ ## Installation
82
+
83
+ ```bash
84
+ pip install chqce
85
+ ```
86
+
87
+ Or, for local development:
88
+
89
+ ```bash
90
+ pip install -e .
91
+ ```
92
+
93
+ ## Usage
94
+
95
+ ```bash
96
+ # Analyze a single query
97
+ chqce "SELECT count() FROM hits WHERE EventDate = today()"
98
+
99
+ # Interactive mode — paste any query, then type ; or GO to submit
100
+ chqce
101
+
102
+ # Custom connection
103
+ chqce --host my.ch.host --port 9123 --user admin --database analytics \
104
+ "SELECT count() FROM events WHERE user_id = 42"
105
+
106
+ # Estimate only — skip execution (safe for expensive/destructive queries)
107
+ chqce --no-execute "SELECT * FROM huge_table WHERE x > 0"
108
+ ```
109
+
110
+ ### Large queries
111
+
112
+ For big, multi-line queries (hundreds or thousands of lines) you don't want to
113
+ wrestle with shell quoting. Read the query from a file or pipe it in instead:
114
+
115
+ ```bash
116
+ # From a file — the cleanest option for huge queries
117
+ chqce -f report.sql
118
+
119
+ # Piped via stdin
120
+ cat report.sql | chqce
121
+ chqce < report.sql
122
+
123
+ # If ClickHouse rejects it with a max_query_size error, raise the limit
124
+ chqce -f report.sql --max-query-size 1048576 # 1 MiB
125
+ ```
126
+
127
+ The query source is resolved in this priority order:
128
+
129
+ 1. `--file` / `-f` — read from a file
130
+ 2. `QUERY` argument — passed on the command line
131
+ 3. piped **stdin** — when input isn't a terminal
132
+ 4. interactive prompt — when nothing else is provided
133
+
134
+ The echoed query is truncated to the first 30 lines in the output, so a large
135
+ query never buries the results.
136
+
137
+ ### Timeouts and resource limits
138
+
139
+ Heavy queries can hit server-side limits. The tool catches these, reports them
140
+ clearly, and tells you which flag gets you unstuck:
141
+
142
+ ```bash
143
+ # Abort the query after 5 minutes instead of waiting indefinitely
144
+ chqce -t 300 "SELECT ... a slow aggregation ..."
145
+
146
+ # Fix "AST is too big" (e.g. a giant IN (...) list)
147
+ chqce -f report.sql --max-ast-elements 500000
148
+
149
+ # Fix "Max query size exceeded"
150
+ chqce -f report.sql --max-query-size 1048576 # 1 MiB
151
+ ```
152
+
153
+ When a query fails, the error is classified and shown with suggestions. For
154
+ example, a timeout renders as:
155
+
156
+ ```
157
+ ╭──────────────────────────────────────────────────────────────╮
158
+ │ ✗ Execution error Query timed out │
159
+ │ Code: 159. DB::Exception: Timeout exceeded: elapsed 30 ... │
160
+ │ │
161
+ │ Suggestions │
162
+ │ The query exceeded its time budget. Try: │
163
+ │ • raise the limit: --timeout 600 (seconds, 0 = unlimited)│
164
+ │ • estimate without running: --no-execute │
165
+ │ • narrow the query with a WHERE filter or LIMIT │
166
+ ╰──────────────────────────────────────────────────────────────╯
167
+ ```
168
+
169
+ Recognized failures: **timeout**, **AST too big**, **parser depth**,
170
+ **query size**, **memory limit**, and **read-row/byte limits**.
171
+
172
+ ## Options
173
+
174
+ | Flag | Env var | Default | Description |
175
+ |---|---|---|---|
176
+ | `--file` / `-f` | — | — | Read the query from a file (best for huge queries) |
177
+ | `--host` / `-H` | `CLICKHOUSE_HOST` | `localhost` | ClickHouse host |
178
+ | `--port` / `-p` | `CLICKHOUSE_PORT` | `8123` | HTTP port |
179
+ | `--user` / `-u` | `CLICKHOUSE_USER` | `default` | Username |
180
+ | `--password` / `-P` | `CLICKHOUSE_PASSWORD` | _(empty)_ | Password |
181
+ | `--database` / `-d` | `CLICKHOUSE_DATABASE` | `default` | Default database |
182
+ | `--timeout` / `-t` | — | `0` _(unlimited)_ | Server-side `max_execution_time` in seconds |
183
+ | `--max-query-size` | — | _(server default 262144)_ | Raise ClickHouse `max_query_size` for very large queries |
184
+ | `--max-ast-elements` | — | _(server default 50000)_ | Raise ClickHouse `max_ast_elements` for queries with huge ASTs |
185
+ | `--no-execute` | — | `false` | Skip actual execution; estimate only |
186
+
187
+ ## Interactive mode
188
+
189
+ Type or paste a multi-line query, then submit by:
190
+ - Ending the last line with `;`
191
+ - Typing `GO` on its own line
192
+
193
+ Press **Ctrl+C** to exit.
194
+
195
+ ## How index suggestions work
196
+
197
+ 1. The query is parsed with [sqlglot](https://github.com/tobymao/sqlglot) to extract WHERE-clause columns and condition types (equality, range, LIKE, IN).
198
+ 2. Each referenced table's `sorting_key` is fetched from `system.tables`.
199
+ 3. Columns not covered by the sort key get a skip-index `ALTER TABLE` suggestion, with the type chosen by condition and column type:
200
+
201
+ | Condition | Column type | Suggested index |
202
+ |---|---|---|
203
+ | `LIKE` / `ILIKE` | any | `tokenbf_v1(32768, 3, 0)` |
204
+ | `>` / `<` / `BETWEEN` | any | `minmax` |
205
+ | `=` / `IN` | String | `bloom_filter(0.01)` |
206
+ | `=` / `IN` | numeric / date | `set(100)` |
207
+
208
+ ## Requirements
209
+
210
+ - Python ≥ 3.10
211
+ - ClickHouse with HTTP interface enabled (default port 8123)
212
+
213
+ ## Development
214
+
215
+ Run the test suite (no ClickHouse server required — tests use a fake client):
216
+
217
+ ```bash
218
+ pip install -e ".[test]" # or: pip install -r requirements-dev.txt
219
+ pytest
220
+ ```
221
+
222
+ The tests live in `tests/` and cover error classification, the estimator,
223
+ index suggestions, output formatting, connection settings, and the CLI.
@@ -0,0 +1,13 @@
1
+ chqce/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
2
+ chqce/cli.py,sha256=s3PjZsVvpIEXslKxH6rCLEsYImrkwO7ndZgFzvKGZVs,6435
3
+ chqce/connection.py,sha256=qMYLLV3HReHvBkcNgMDA8MzE4yQp9okFWOZveWYK_-g,1810
4
+ chqce/errors.py,sha256=SDw4NPGYRyk58iyaIy6ARN0Zoh2i0zpWcyinJqOjADY,3332
5
+ chqce/estimator.py,sha256=cB3yHXJ8uqDVQOgh_sFnMeAGp0aPYQLe1W6i6I9AOTI,4290
6
+ chqce/formatter.py,sha256=LChQHR2atkawtGqDwiH4dicAasV_zQRN-yD2L9tf5x8,8872
7
+ chqce/suggestions.py,sha256=lb6yhjXUAHSUDqMYPeemqEDInnrxzrn2UThBT46gx14,5246
8
+ chqce-0.1.0.dist-info/licenses/LICENSE,sha256=WzwMceuO8oWaxTqWZNRJFRIcXS4AJIInPqiba2XtI_Y,1070
9
+ chqce-0.1.0.dist-info/METADATA,sha256=Xj__fCoc3mQJXuCgPrse00vGCMNRO1PxjlFam8UlyTk,9014
10
+ chqce-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
11
+ chqce-0.1.0.dist-info/entry_points.txt,sha256=0RJ1jIhBZ9T0YTXHkKQP9mPowWEo9aD7SbwYEDdUf2Y,40
12
+ chqce-0.1.0.dist-info/top_level.txt,sha256=BMw-L4xkLh5Frcmnczp6pMbuhzlhoPL7L8rB0BfQXWs,6
13
+ chqce-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ chqce = chqce.cli:cli
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ahmad Darwich
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ chqce