chqce 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chqce-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ahmad Darwich
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
chqce-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,223 @@
1
+ Metadata-Version: 2.4
2
+ Name: chqce
3
+ Version: 0.1.0
4
+ Summary: ClickHouse Query Cost Estimator CLI
5
+ Author-email: Ahmad Darwich <darw.ahmad@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator
8
+ Project-URL: Repository, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator
9
+ Project-URL: Issues, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/issues
10
+ Keywords: clickhouse,sql,cli,query,cost,performance,explain
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Database
21
+ Classifier: Topic :: Utilities
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: clickhouse-connect>=0.7.0
26
+ Requires-Dist: rich>=13.0.0
27
+ Requires-Dist: click>=8.1.0
28
+ Requires-Dist: sqlglot>=20.0.0
29
+ Provides-Extra: test
30
+ Requires-Dist: pytest>=7.0; extra == "test"
31
+ Dynamic: license-file
32
+
33
+ # clickhouse-query-cost-estimator
34
+
35
+ [![CI](https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/actions/workflows/ci.yml/badge.svg)](https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/actions/workflows/ci.yml)
36
+ [![PyPI version](https://img.shields.io/pypi/v/chqce.svg)](https://pypi.org/project/chqce/)
37
+ [![Python versions](https://img.shields.io/pypi/pyversions/chqce.svg)](https://pypi.org/project/chqce/)
38
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
39
+
40
+ A terminal CLI that estimates the cost of a ClickHouse SQL query **before you regret running it**, and helps you tune indexes afterwards.
41
+
42
+ ```
43
+ ╭──────────────────────────────────────────────────────────────╮
44
+ │ ClickHouse Query Cost Estimator v0.1.0 │
45
+ │ Connected to localhost:8123 · database: default · 23.8 │
46
+ ╰──────────────────────────────────────────────────────────────╯
47
+
48
+ Cost Estimate (from EXPLAIN ESTIMATE)
49
+ Database Table Parts Est. Rows Marks
50
+ default orders 12 4.5M 550
51
+
52
+ Timing
53
+ Phase Time Notes
54
+ SQL Analyzer (EXPLAIN) 2.1 ms parsing + plan generation
55
+ Execution (client) 234.5 ms wall-clock including network
56
+ Execution (server) 228.3 ms server-side only
57
+
58
+ Execution Stats
59
+ Rows read 4.5M Bytes read 1.2 GB
60
+ Result rows 18.2K Peak memory 45.6 MB
61
+
62
+ ── Index Suggestions ──────────────────────────────────────────
63
+ ✓ created_at — already in ORDER BY
64
+ ⚠ user_id — not in ORDER BY
65
+ ALTER TABLE default.orders
66
+ ADD INDEX idx_orders_user_id user_id
67
+ TYPE bloom_filter(0.01) GRANULARITY 4;
68
+ ```
69
+
70
+ ## What it tells you
71
+
72
+ | Metric | Source |
73
+ |---|---|
74
+ | **Estimated rows / parts / marks** | `EXPLAIN ESTIMATE` |
75
+ | **SQL analyzer time** | time to run `EXPLAIN PLAN` (parsing + planning) |
76
+ | **Execution time (client)** | wall-clock including network round-trip |
77
+ | **Execution time (server)** | `elapsed_ns` from `X-ClickHouse-Summary` header |
78
+ | **Rows / bytes read, peak memory** | `system.query_log` after execution |
79
+ | **Index suggestions** | `system.tables` ORDER BY vs WHERE columns |
80
+
81
+ ## Installation
82
+
83
+ ```bash
84
+ pip install chqce
85
+ ```
86
+
87
+ Or, for local development:
88
+
89
+ ```bash
90
+ pip install -e .
91
+ ```
92
+
93
+ ## Usage
94
+
95
+ ```bash
96
+ # Analyze a single query
97
+ chqce "SELECT count() FROM hits WHERE EventDate = today()"
98
+
99
+ # Interactive mode — paste any query, then type ; or GO to submit
100
+ chqce
101
+
102
+ # Custom connection
103
+ chqce --host my.ch.host --port 9123 --user admin --database analytics \
104
+ "SELECT count() FROM events WHERE user_id = 42"
105
+
106
+ # Estimate only — skip execution (safe for expensive/destructive queries)
107
+ chqce --no-execute "SELECT * FROM huge_table WHERE x > 0"
108
+ ```
109
+
110
+ ### Large queries
111
+
112
+ For big, multi-line queries (hundreds or thousands of lines) you don't want to
113
+ wrestle with shell quoting. Read the query from a file or pipe it in instead:
114
+
115
+ ```bash
116
+ # From a file — the cleanest option for huge queries
117
+ chqce -f report.sql
118
+
119
+ # Piped via stdin
120
+ cat report.sql | chqce
121
+ chqce < report.sql
122
+
123
+ # If ClickHouse rejects it with a max_query_size error, raise the limit
124
+ chqce -f report.sql --max-query-size 1048576 # 1 MiB
125
+ ```
126
+
127
+ The query source is resolved in this priority order:
128
+
129
+ 1. `--file` / `-f` — read from a file
130
+ 2. `QUERY` argument — passed on the command line
131
+ 3. piped **stdin** — when input isn't a terminal
132
+ 4. interactive prompt — when nothing else is provided
133
+
134
+ The echoed query is truncated to the first 30 lines in the output, so a large
135
+ query never buries the results.
136
+
137
+ ### Timeouts and resource limits
138
+
139
+ Heavy queries can hit server-side limits. The tool catches these, reports them
140
+ clearly, and tells you which flag gets you unstuck:
141
+
142
+ ```bash
143
+ # Abort the query after 5 minutes instead of waiting indefinitely
144
+ chqce -t 300 "SELECT ... a slow aggregation ..."
145
+
146
+ # Fix "AST is too big" (e.g. a giant IN (...) list)
147
+ chqce -f report.sql --max-ast-elements 500000
148
+
149
+ # Fix "Max query size exceeded"
150
+ chqce -f report.sql --max-query-size 1048576 # 1 MiB
151
+ ```
152
+
153
+ When a query fails, the error is classified and shown with suggestions. For
154
+ example, a timeout renders as:
155
+
156
+ ```
157
+ ╭──────────────────────────────────────────────────────────────╮
158
+ │ ✗ Execution error Query timed out │
159
+ │ Code: 159. DB::Exception: Timeout exceeded: elapsed 30 ... │
160
+ │ │
161
+ │ Suggestions │
162
+ │ The query exceeded its time budget. Try: │
163
+ │ • raise the limit: --timeout 600 (seconds, 0 = unlimited)│
164
+ │ • estimate without running: --no-execute │
165
+ │ • narrow the query with a WHERE filter or LIMIT │
166
+ ╰──────────────────────────────────────────────────────────────╯
167
+ ```
168
+
169
+ Recognized failures: **timeout**, **AST too big**, **parser depth**,
170
+ **query size**, **memory limit**, and **read-row/byte limits**.
171
+
172
+ ## Options
173
+
174
+ | Flag | Env var | Default | Description |
175
+ |---|---|---|---|
176
+ | `--file` / `-f` | — | — | Read the query from a file (best for huge queries) |
177
+ | `--host` / `-H` | `CLICKHOUSE_HOST` | `localhost` | ClickHouse host |
178
+ | `--port` / `-p` | `CLICKHOUSE_PORT` | `8123` | HTTP port |
179
+ | `--user` / `-u` | `CLICKHOUSE_USER` | `default` | Username |
180
+ | `--password` / `-P` | `CLICKHOUSE_PASSWORD` | _(empty)_ | Password |
181
+ | `--database` / `-d` | `CLICKHOUSE_DATABASE` | `default` | Default database |
182
+ | `--timeout` / `-t` | — | `0` _(unlimited)_ | Server-side `max_execution_time` in seconds |
183
+ | `--max-query-size` | — | _(server default 262144)_ | Raise ClickHouse `max_query_size` for very large queries |
184
+ | `--max-ast-elements` | — | _(server default 50000)_ | Raise ClickHouse `max_ast_elements` for queries with huge ASTs |
185
+ | `--no-execute` | — | `false` | Skip actual execution; estimate only |
186
+
187
+ ## Interactive mode
188
+
189
+ Type or paste a multi-line query, then submit by:
190
+ - Ending the last line with `;`
191
+ - Typing `GO` on its own line
192
+
193
+ Press **Ctrl+C** to exit.
194
+
195
+ ## How index suggestions work
196
+
197
+ 1. The query is parsed with [sqlglot](https://github.com/tobymao/sqlglot) to extract WHERE-clause columns and condition types (equality, range, LIKE, IN).
198
+ 2. Each referenced table's `sorting_key` is fetched from `system.tables`.
199
+ 3. Columns not covered by the sort key get a skip-index `ALTER TABLE` suggestion, with the type chosen by condition and column type:
200
+
201
+ | Condition | Column type | Suggested index |
202
+ |---|---|---|
203
+ | `LIKE` / `ILIKE` | any | `tokenbf_v1(32768, 3, 0)` |
204
+ | `>` / `<` / `BETWEEN` | any | `minmax` |
205
+ | `=` / `IN` | String | `bloom_filter(0.01)` |
206
+ | `=` / `IN` | numeric / date | `set(100)` |
207
+
208
+ ## Requirements
209
+
210
+ - Python ≥ 3.10
211
+ - ClickHouse with HTTP interface enabled (default port 8123)
212
+
213
+ ## Development
214
+
215
+ Run the test suite (no ClickHouse server required — tests use a fake client):
216
+
217
+ ```bash
218
+ pip install -e ".[test]" # or: pip install -r requirements-dev.txt
219
+ pytest
220
+ ```
221
+
222
+ The tests live in `tests/` and cover error classification, the estimator,
223
+ index suggestions, output formatting, connection settings, and the CLI.
chqce-0.1.0/README.md ADDED
@@ -0,0 +1,191 @@
1
+ # clickhouse-query-cost-estimator
2
+
3
+ [![CI](https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/actions/workflows/ci.yml/badge.svg)](https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/actions/workflows/ci.yml)
4
+ [![PyPI version](https://img.shields.io/pypi/v/chqce.svg)](https://pypi.org/project/chqce/)
5
+ [![Python versions](https://img.shields.io/pypi/pyversions/chqce.svg)](https://pypi.org/project/chqce/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
7
+
8
+ A terminal CLI that estimates the cost of a ClickHouse SQL query **before you regret running it**, and helps you tune indexes afterwards.
9
+
10
+ ```
11
+ ╭──────────────────────────────────────────────────────────────╮
12
+ │ ClickHouse Query Cost Estimator v0.1.0 │
13
+ │ Connected to localhost:8123 · database: default · 23.8 │
14
+ ╰──────────────────────────────────────────────────────────────╯
15
+
16
+ Cost Estimate (from EXPLAIN ESTIMATE)
17
+ Database Table Parts Est. Rows Marks
18
+ default orders 12 4.5M 550
19
+
20
+ Timing
21
+ Phase Time Notes
22
+ SQL Analyzer (EXPLAIN) 2.1 ms parsing + plan generation
23
+ Execution (client) 234.5 ms wall-clock including network
24
+ Execution (server) 228.3 ms server-side only
25
+
26
+ Execution Stats
27
+ Rows read 4.5M Bytes read 1.2 GB
28
+ Result rows 18.2K Peak memory 45.6 MB
29
+
30
+ ── Index Suggestions ──────────────────────────────────────────
31
+ ✓ created_at — already in ORDER BY
32
+ ⚠ user_id — not in ORDER BY
33
+ ALTER TABLE default.orders
34
+ ADD INDEX idx_orders_user_id user_id
35
+ TYPE bloom_filter(0.01) GRANULARITY 4;
36
+ ```
37
+
38
+ ## What it tells you
39
+
40
+ | Metric | Source |
41
+ |---|---|
42
+ | **Estimated rows / parts / marks** | `EXPLAIN ESTIMATE` |
43
+ | **SQL analyzer time** | time to run `EXPLAIN PLAN` (parsing + planning) |
44
+ | **Execution time (client)** | wall-clock including network round-trip |
45
+ | **Execution time (server)** | `elapsed_ns` from `X-ClickHouse-Summary` header |
46
+ | **Rows / bytes read, peak memory** | `system.query_log` after execution |
47
+ | **Index suggestions** | `system.tables` ORDER BY vs WHERE columns |
48
+
49
+ ## Installation
50
+
51
+ ```bash
52
+ pip install chqce
53
+ ```
54
+
55
+ Or, for local development:
56
+
57
+ ```bash
58
+ pip install -e .
59
+ ```
60
+
61
+ ## Usage
62
+
63
+ ```bash
64
+ # Analyze a single query
65
+ chqce "SELECT count() FROM hits WHERE EventDate = today()"
66
+
67
+ # Interactive mode — paste any query, then type ; or GO to submit
68
+ chqce
69
+
70
+ # Custom connection
71
+ chqce --host my.ch.host --port 9123 --user admin --database analytics \
72
+ "SELECT count() FROM events WHERE user_id = 42"
73
+
74
+ # Estimate only — skip execution (safe for expensive/destructive queries)
75
+ chqce --no-execute "SELECT * FROM huge_table WHERE x > 0"
76
+ ```
77
+
78
+ ### Large queries
79
+
80
+ For big, multi-line queries (hundreds or thousands of lines) you don't want to
81
+ wrestle with shell quoting. Read the query from a file or pipe it in instead:
82
+
83
+ ```bash
84
+ # From a file — the cleanest option for huge queries
85
+ chqce -f report.sql
86
+
87
+ # Piped via stdin
88
+ cat report.sql | chqce
89
+ chqce < report.sql
90
+
91
+ # If ClickHouse rejects it with a max_query_size error, raise the limit
92
+ chqce -f report.sql --max-query-size 1048576 # 1 MiB
93
+ ```
94
+
95
+ The query source is resolved in this priority order:
96
+
97
+ 1. `--file` / `-f` — read from a file
98
+ 2. `QUERY` argument — passed on the command line
99
+ 3. piped **stdin** — when input isn't a terminal
100
+ 4. interactive prompt — when nothing else is provided
101
+
102
+ The echoed query is truncated to the first 30 lines in the output, so a large
103
+ query never buries the results.
104
+
105
+ ### Timeouts and resource limits
106
+
107
+ Heavy queries can hit server-side limits. The tool catches these, reports them
108
+ clearly, and tells you which flag gets you unstuck:
109
+
110
+ ```bash
111
+ # Abort the query after 5 minutes instead of waiting indefinitely
112
+ chqce -t 300 "SELECT ... a slow aggregation ..."
113
+
114
+ # Fix "AST is too big" (e.g. a giant IN (...) list)
115
+ chqce -f report.sql --max-ast-elements 500000
116
+
117
+ # Fix "Max query size exceeded"
118
+ chqce -f report.sql --max-query-size 1048576 # 1 MiB
119
+ ```
120
+
121
+ When a query fails, the error is classified and shown with suggestions. For
122
+ example, a timeout renders as:
123
+
124
+ ```
125
+ ╭──────────────────────────────────────────────────────────────╮
126
+ │ ✗ Execution error Query timed out │
127
+ │ Code: 159. DB::Exception: Timeout exceeded: elapsed 30 ... │
128
+ │ │
129
+ │ Suggestions │
130
+ │ The query exceeded its time budget. Try: │
131
+ │ • raise the limit: --timeout 600 (seconds, 0 = unlimited)│
132
+ │ • estimate without running: --no-execute │
133
+ │ • narrow the query with a WHERE filter or LIMIT │
134
+ ╰──────────────────────────────────────────────────────────────╯
135
+ ```
136
+
137
+ Recognized failures: **timeout**, **AST too big**, **parser depth**,
138
+ **query size**, **memory limit**, and **read-row/byte limits**.
139
+
140
+ ## Options
141
+
142
+ | Flag | Env var | Default | Description |
143
+ |---|---|---|---|
144
+ | `--file` / `-f` | — | — | Read the query from a file (best for huge queries) |
145
+ | `--host` / `-H` | `CLICKHOUSE_HOST` | `localhost` | ClickHouse host |
146
+ | `--port` / `-p` | `CLICKHOUSE_PORT` | `8123` | HTTP port |
147
+ | `--user` / `-u` | `CLICKHOUSE_USER` | `default` | Username |
148
+ | `--password` / `-P` | `CLICKHOUSE_PASSWORD` | _(empty)_ | Password |
149
+ | `--database` / `-d` | `CLICKHOUSE_DATABASE` | `default` | Default database |
150
+ | `--timeout` / `-t` | — | `0` _(unlimited)_ | Server-side `max_execution_time` in seconds |
151
+ | `--max-query-size` | — | _(server default 262144)_ | Raise ClickHouse `max_query_size` for very large queries |
152
+ | `--max-ast-elements` | — | _(server default 50000)_ | Raise ClickHouse `max_ast_elements` for queries with huge ASTs |
153
+ | `--no-execute` | — | `false` | Skip actual execution; estimate only |
154
+
155
+ ## Interactive mode
156
+
157
+ Type or paste a multi-line query, then submit by:
158
+ - Ending the last line with `;`
159
+ - Typing `GO` on its own line
160
+
161
+ Press **Ctrl+C** to exit.
162
+
163
+ ## How index suggestions work
164
+
165
+ 1. The query is parsed with [sqlglot](https://github.com/tobymao/sqlglot) to extract WHERE-clause columns and condition types (equality, range, LIKE, IN).
166
+ 2. Each referenced table's `sorting_key` is fetched from `system.tables`.
167
+ 3. Columns not covered by the sort key get a skip-index `ALTER TABLE` suggestion, with the type chosen by condition and column type:
168
+
169
+ | Condition | Column type | Suggested index |
170
+ |---|---|---|
171
+ | `LIKE` / `ILIKE` | any | `tokenbf_v1(32768, 3, 0)` |
172
+ | `>` / `<` / `BETWEEN` | any | `minmax` |
173
+ | `=` / `IN` | String | `bloom_filter(0.01)` |
174
+ | `=` / `IN` | numeric / date | `set(100)` |
175
+
176
+ ## Requirements
177
+
178
+ - Python ≥ 3.10
179
+ - ClickHouse with HTTP interface enabled (default port 8123)
180
+
181
+ ## Development
182
+
183
+ Run the test suite (no ClickHouse server required — tests use a fake client):
184
+
185
+ ```bash
186
+ pip install -e ".[test]" # or: pip install -r requirements-dev.txt
187
+ pytest
188
+ ```
189
+
190
+ The tests live in `tests/` and cover error classification, the estimator,
191
+ index suggestions, output formatting, connection settings, and the CLI.
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,163 @@
1
+ import sys
2
+
3
+ import click
4
+ from rich.console import Console
5
+
6
+ from . import __version__
7
+ from .connection import create_client, test_connection
8
+ from .estimator import QueryEstimator
9
+ from .formatter import console, print_header, print_result
10
+ from .suggestions import get_index_suggestions
11
+
12
+ _err = Console(stderr=True)
13
+
14
+
15
+ def _collect_interactive() -> str:
16
+ """Collect a multi-line SQL query from stdin.
17
+
18
+ Submit by ending a line with ';' or typing GO on its own line.
19
+ """
20
+ console.print(
21
+ "\n[dim]Paste or type your SQL query."
22
+ " End with [bold];[/bold] or type [bold]GO[/bold] on its own line."
23
+ " [bold]Ctrl+C[/bold] to exit.[/dim]\n"
24
+ )
25
+ lines: list[str] = []
26
+ try:
27
+ while True:
28
+ prefix = "[bold cyan]SQL>[/bold cyan] " if not lines else " [dim]>[/dim] "
29
+ console.print(prefix, end="")
30
+ try:
31
+ line = input()
32
+ except EOFError:
33
+ break
34
+ stripped = line.strip()
35
+ if stripped.upper() == "GO" or stripped == ";":
36
+ if lines:
37
+ break
38
+ continue
39
+ lines.append(line)
40
+ if stripped.endswith(";"):
41
+ break
42
+ except KeyboardInterrupt:
43
+ console.print("\n[dim]Bye![/dim]")
44
+ sys.exit(0)
45
+
46
+ return "\n".join(lines).strip()
47
+
48
+
49
+ def _resolve_query(query: str | None, file: str | None) -> str | None:
50
+ """Determine the query source, in priority order.
51
+
52
+ 1. --file FILE read the query from a file (best for huge queries)
53
+ 2. QUERY argument passed directly on the command line
54
+ 3. piped stdin e.g. `chqce < query.sql` or `cat q.sql | chqce`
55
+ 4. None -> caller falls back to interactive mode
56
+ """
57
+ if file:
58
+ with open(file, "r", encoding="utf-8") as fh:
59
+ return fh.read().strip()
60
+ if query:
61
+ return query
62
+ # Query piped in on stdin (non-interactive).
63
+ if not sys.stdin.isatty():
64
+ data = sys.stdin.read().strip()
65
+ if data:
66
+ return data
67
+ return None
68
+
69
+
70
+ def _run(query: str, estimator: QueryEstimator, client, database: str, execute: bool) -> None:
71
+ with console.status("[bold green]Analyzing…[/bold green]", spinner="dots"):
72
+ result = estimator.estimate(query, execute=execute)
73
+ suggestions = get_index_suggestions(query, client, current_database=database)
74
+ print_result(result, suggestions)
75
+
76
+
77
+ @click.command(context_settings={"help_option_names": ["-h", "--help"]})
78
+ @click.argument("query", required=False)
79
+ @click.option("--file", "-f", "file", type=click.Path(exists=True, dir_okay=False),
80
+ default=None, help="Read the query from a file (best for huge queries)")
81
+ @click.option("--host", "-H", default="localhost", envvar="CLICKHOUSE_HOST",
82
+ show_default=True, help="ClickHouse host")
83
+ @click.option("--port", "-p", default=8123, envvar="CLICKHOUSE_PORT", type=int,
84
+ show_default=True, help="HTTP(S) port")
85
+ @click.option("--user", "-u", default="default", envvar="CLICKHOUSE_USER",
86
+ show_default=True, help="Username")
87
+ @click.option("--password", "-P", default="", envvar="CLICKHOUSE_PASSWORD",
88
+ help="Password (or set CLICKHOUSE_PASSWORD)")
89
+ @click.option("--database", "-d", default="default", envvar="CLICKHOUSE_DATABASE",
90
+ show_default=True, help="Default database")
91
+ @click.option("--max-query-size", default=0, type=int, metavar="BYTES",
92
+ help="Raise ClickHouse max_query_size for very large queries "
93
+ "(server default is 262144)")
94
+ @click.option("--timeout", "-t", default=0, type=int, metavar="SECONDS",
95
+ help="Server-side max_execution_time; query is aborted after this "
96
+ "many seconds (0 = unlimited)")
97
+ @click.option("--max-ast-elements", default=0, type=int, metavar="N",
98
+ help="Raise ClickHouse max_ast_elements for queries that fail with "
99
+ "'AST is too big' (server default is 50000)")
100
+ @click.option("--no-execute", is_flag=True, default=False,
101
+ help="Estimate only — do not actually run the query")
102
+ @click.version_option(__version__, "-V", "--version")
103
+ def cli(query, file, host, port, user, password, database, max_query_size,
104
+ timeout, max_ast_elements, no_execute):
105
+ """ClickHouse Query Cost Estimator.
106
+
107
+ Estimates rows scanned, memory usage, and execution time for a ClickHouse
108
+ SQL query, and suggests indexes based on WHERE-clause columns.
109
+
110
+ \b
111
+ The query can come from (in priority order):
112
+ • --file query.sql best for huge / multi-line queries
113
+ • a QUERY argument chqce "SELECT ..."
114
+ • piped stdin chqce < query.sql
115
+ • interactive prompt run with no query at all
116
+
117
+ \b
118
+ Environment variables (override defaults):
119
+ CLICKHOUSE_HOST, CLICKHOUSE_PORT, CLICKHOUSE_USER,
120
+ CLICKHOUSE_PASSWORD, CLICKHOUSE_DATABASE
121
+
122
+ \b
123
+ Examples:
124
+ chqce "SELECT count() FROM hits WHERE EventDate = today()"
125
+ chqce -f report.sql --no-execute
126
+ chqce -t 600 "SELECT ... a slow query ..."
127
+ cat report.sql | chqce --max-query-size 1048576 --max-ast-elements 500000
128
+ """
129
+ try:
130
+ resolved = _resolve_query(query, file)
131
+ except OSError as e:
132
+ _err.print(f"[red]Could not read query file:[/red] {e}")
133
+ sys.exit(1)
134
+
135
+ try:
136
+ client = create_client(host=host, port=port, user=user,
137
+ password=password, database=database,
138
+ max_query_size=max_query_size,
139
+ max_execution_time=timeout,
140
+ max_ast_elements=max_ast_elements)
141
+ ok, version_or_err = test_connection(client)
142
+ except Exception as e:
143
+ _err.print(f"[red]Connection error:[/red] {e}")
144
+ sys.exit(1)
145
+
146
+ if not ok:
147
+ _err.print(f"[red]Connection failed:[/red] {version_or_err}")
148
+ sys.exit(1)
149
+
150
+ print_header(version_or_err, host, port, database)
151
+
152
+ estimator = QueryEstimator(client)
153
+ execute = not no_execute
154
+
155
+ if resolved:
156
+ _run(resolved, estimator, client, database, execute)
157
+ else:
158
+ while True:
159
+ q = _collect_interactive()
160
+ if not q:
161
+ continue
162
+ _run(q, estimator, client, database, execute)
163
+ console.print("[dim]" + "─" * 60 + "[/dim]")
@@ -0,0 +1,55 @@
1
+ import clickhouse_connect
2
+ from clickhouse_connect.driver import Client
3
+
4
+
5
+ DEFAULT_SOCKET_TIMEOUT = 300
6
+
7
+
8
+ def create_client(
9
+ host: str = "localhost",
10
+ port: int = 8123,
11
+ user: str = "default",
12
+ password: str = "",
13
+ database: str = "default",
14
+ max_query_size: int = 0,
15
+ max_execution_time: int = 0,
16
+ max_ast_elements: int = 0,
17
+ ) -> Client:
18
+ # Per-session ClickHouse settings, only sent when the caller overrides them.
19
+ settings = {}
20
+ if max_query_size > 0:
21
+ # ClickHouse rejects queries larger than max_query_size (256 KiB default).
22
+ settings["max_query_size"] = max_query_size
23
+ if max_execution_time > 0:
24
+ # Server aborts the query after this many seconds (0 = unlimited).
25
+ settings["max_execution_time"] = max_execution_time
26
+ if max_ast_elements > 0:
27
+ # Raises the limit on parsed-query size (huge IN-lists, deep nesting).
28
+ settings["max_ast_elements"] = max_ast_elements
29
+ settings["max_expanded_ast_elements"] = max_ast_elements
30
+
31
+ # Keep the client socket alive a bit longer than the server-side limit so
32
+ # ClickHouse returns a clean timeout error instead of the socket dropping.
33
+ socket_timeout = DEFAULT_SOCKET_TIMEOUT
34
+ if max_execution_time > 0:
35
+ socket_timeout = max_execution_time + 30
36
+
37
+ return clickhouse_connect.get_client(
38
+ host=host,
39
+ port=port,
40
+ username=user,
41
+ password=password,
42
+ database=database,
43
+ connect_timeout=10,
44
+ send_receive_timeout=socket_timeout,
45
+ settings=settings,
46
+ )
47
+
48
+
49
+ def test_connection(client: Client) -> tuple[bool, str]:
50
+ try:
51
+ result = client.query("SELECT version()")
52
+ version = result.result_rows[0][0]
53
+ return True, version
54
+ except Exception as e:
55
+ return False, str(e)