chqce 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chqce-0.1.0/LICENSE +21 -0
- chqce-0.1.0/PKG-INFO +223 -0
- chqce-0.1.0/README.md +191 -0
- chqce-0.1.0/chqce/__init__.py +1 -0
- chqce-0.1.0/chqce/cli.py +163 -0
- chqce-0.1.0/chqce/connection.py +55 -0
- chqce-0.1.0/chqce/errors.py +87 -0
- chqce-0.1.0/chqce/estimator.py +125 -0
- chqce-0.1.0/chqce/formatter.py +227 -0
- chqce-0.1.0/chqce/suggestions.py +156 -0
- chqce-0.1.0/chqce.egg-info/PKG-INFO +223 -0
- chqce-0.1.0/chqce.egg-info/SOURCES.txt +22 -0
- chqce-0.1.0/chqce.egg-info/dependency_links.txt +1 -0
- chqce-0.1.0/chqce.egg-info/entry_points.txt +2 -0
- chqce-0.1.0/chqce.egg-info/requires.txt +7 -0
- chqce-0.1.0/chqce.egg-info/top_level.txt +1 -0
- chqce-0.1.0/pyproject.toml +52 -0
- chqce-0.1.0/setup.cfg +4 -0
- chqce-0.1.0/tests/test_cli.py +106 -0
- chqce-0.1.0/tests/test_connection.py +75 -0
- chqce-0.1.0/tests/test_errors.py +51 -0
- chqce-0.1.0/tests/test_estimator.py +110 -0
- chqce-0.1.0/tests/test_formatter.py +79 -0
- chqce-0.1.0/tests/test_suggestions.py +114 -0
chqce-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ahmad Darwich
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
chqce-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chqce
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: ClickHouse Query Cost Estimator CLI
|
|
5
|
+
Author-email: Ahmad Darwich <darw.ahmad@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator
|
|
8
|
+
Project-URL: Repository, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator
|
|
9
|
+
Project-URL: Issues, https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/issues
|
|
10
|
+
Keywords: clickhouse,sql,cli,query,cost,performance,explain
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Database
|
|
21
|
+
Classifier: Topic :: Utilities
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: clickhouse-connect>=0.7.0
|
|
26
|
+
Requires-Dist: rich>=13.0.0
|
|
27
|
+
Requires-Dist: click>=8.1.0
|
|
28
|
+
Requires-Dist: sqlglot>=20.0.0
|
|
29
|
+
Provides-Extra: test
|
|
30
|
+
Requires-Dist: pytest>=7.0; extra == "test"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
|
|
33
|
+
# clickhouse-query-cost-estimator
|
|
34
|
+
|
|
35
|
+
[](https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/actions/workflows/ci.yml)
|
|
36
|
+
[](https://pypi.org/project/chqce/)
|
|
37
|
+
[](https://pypi.org/project/chqce/)
|
|
38
|
+
[](LICENSE)
|
|
39
|
+
|
|
40
|
+
A terminal CLI that estimates the cost of a ClickHouse SQL query **before you regret running it**, and helps you tune indexes afterwards.
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
╭──────────────────────────────────────────────────────────────╮
|
|
44
|
+
│ ClickHouse Query Cost Estimator v0.1.0 │
|
|
45
|
+
│ Connected to localhost:8123 · database: default · 23.8 │
|
|
46
|
+
╰──────────────────────────────────────────────────────────────╯
|
|
47
|
+
|
|
48
|
+
Cost Estimate (from EXPLAIN ESTIMATE)
|
|
49
|
+
Database Table Parts Est. Rows Marks
|
|
50
|
+
default orders 12 4.5M 550
|
|
51
|
+
|
|
52
|
+
Timing
|
|
53
|
+
Phase Time Notes
|
|
54
|
+
SQL Analyzer (EXPLAIN) 2.1 ms parsing + plan generation
|
|
55
|
+
Execution (client) 234.5 ms wall-clock including network
|
|
56
|
+
Execution (server) 228.3 ms server-side only
|
|
57
|
+
|
|
58
|
+
Execution Stats
|
|
59
|
+
Rows read 4.5M Bytes read 1.2 GB
|
|
60
|
+
Result rows 18.2K Peak memory 45.6 MB
|
|
61
|
+
|
|
62
|
+
── Index Suggestions ──────────────────────────────────────────
|
|
63
|
+
✓ created_at — already in ORDER BY
|
|
64
|
+
⚠ user_id — not in ORDER BY
|
|
65
|
+
ALTER TABLE default.orders
|
|
66
|
+
ADD INDEX idx_orders_user_id user_id
|
|
67
|
+
TYPE bloom_filter(0.01) GRANULARITY 4;
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## What it tells you
|
|
71
|
+
|
|
72
|
+
| Metric | Source |
|
|
73
|
+
|---|---|
|
|
74
|
+
| **Estimated rows / parts / marks** | `EXPLAIN ESTIMATE` |
|
|
75
|
+
| **SQL analyzer time** | time to run `EXPLAIN PLAN` (parsing + planning) |
|
|
76
|
+
| **Execution time (client)** | wall-clock including network round-trip |
|
|
77
|
+
| **Execution time (server)** | `elapsed_ns` from `X-ClickHouse-Summary` header |
|
|
78
|
+
| **Rows / bytes read, peak memory** | `system.query_log` after execution |
|
|
79
|
+
| **Index suggestions** | `system.tables` ORDER BY vs WHERE columns |
|
|
80
|
+
|
|
81
|
+
## Installation
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
pip install chqce
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Or, for local development:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
pip install -e .
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Usage
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
# Analyze a single query
|
|
97
|
+
chqce "SELECT count() FROM hits WHERE EventDate = today()"
|
|
98
|
+
|
|
99
|
+
# Interactive mode — paste any query, then type ; or GO to submit
|
|
100
|
+
chqce
|
|
101
|
+
|
|
102
|
+
# Custom connection
|
|
103
|
+
chqce --host my.ch.host --port 9123 --user admin --database analytics \
|
|
104
|
+
"SELECT count() FROM events WHERE user_id = 42"
|
|
105
|
+
|
|
106
|
+
# Estimate only — skip execution (safe for expensive/destructive queries)
|
|
107
|
+
chqce --no-execute "SELECT * FROM huge_table WHERE x > 0"
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Large queries
|
|
111
|
+
|
|
112
|
+
For big, multi-line queries (hundreds or thousands of lines) you don't want to
|
|
113
|
+
wrestle with shell quoting. Read the query from a file or pipe it in instead:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
# From a file — the cleanest option for huge queries
|
|
117
|
+
chqce -f report.sql
|
|
118
|
+
|
|
119
|
+
# Piped via stdin
|
|
120
|
+
cat report.sql | chqce
|
|
121
|
+
chqce < report.sql
|
|
122
|
+
|
|
123
|
+
# If ClickHouse rejects it with a max_query_size error, raise the limit
|
|
124
|
+
chqce -f report.sql --max-query-size 1048576 # 1 MiB
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
The query source is resolved in this priority order:
|
|
128
|
+
|
|
129
|
+
1. `--file` / `-f` — read from a file
|
|
130
|
+
2. `QUERY` argument — passed on the command line
|
|
131
|
+
3. piped **stdin** — when input isn't a terminal
|
|
132
|
+
4. interactive prompt — when nothing else is provided
|
|
133
|
+
|
|
134
|
+
The echoed query is truncated to the first 30 lines in the output, so a large
|
|
135
|
+
query never buries the results.
|
|
136
|
+
|
|
137
|
+
### Timeouts and resource limits
|
|
138
|
+
|
|
139
|
+
Heavy queries can hit server-side limits. The tool catches these, reports them
|
|
140
|
+
clearly, and tells you which flag gets you unstuck:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
# Abort the query after 5 minutes instead of waiting indefinitely
|
|
144
|
+
chqce -t 300 "SELECT ... a slow aggregation ..."
|
|
145
|
+
|
|
146
|
+
# Fix "AST is too big" (e.g. a giant IN (...) list)
|
|
147
|
+
chqce -f report.sql --max-ast-elements 500000
|
|
148
|
+
|
|
149
|
+
# Fix "Max query size exceeded"
|
|
150
|
+
chqce -f report.sql --max-query-size 1048576 # 1 MiB
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
When a query fails, the error is classified and shown with suggestions. For
|
|
154
|
+
example, a timeout renders as:
|
|
155
|
+
|
|
156
|
+
```
|
|
157
|
+
╭──────────────────────────────────────────────────────────────╮
|
|
158
|
+
│ ✗ Execution error Query timed out │
|
|
159
|
+
│ Code: 159. DB::Exception: Timeout exceeded: elapsed 30 ... │
|
|
160
|
+
│ │
|
|
161
|
+
│ Suggestions │
|
|
162
|
+
│ The query exceeded its time budget. Try: │
|
|
163
|
+
│ • raise the limit: --timeout 600 (seconds, 0 = unlimited)│
|
|
164
|
+
│ • estimate without running: --no-execute │
|
|
165
|
+
│ • narrow the query with a WHERE filter or LIMIT │
|
|
166
|
+
╰──────────────────────────────────────────────────────────────╯
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Recognized failures: **timeout**, **AST too big**, **parser depth**,
|
|
170
|
+
**query size**, **memory limit**, and **read-row/byte limits**.
|
|
171
|
+
|
|
172
|
+
## Options
|
|
173
|
+
|
|
174
|
+
| Flag | Env var | Default | Description |
|
|
175
|
+
|---|---|---|---|
|
|
176
|
+
| `--file` / `-f` | — | — | Read the query from a file (best for huge queries) |
|
|
177
|
+
| `--host` / `-H` | `CLICKHOUSE_HOST` | `localhost` | ClickHouse host |
|
|
178
|
+
| `--port` / `-p` | `CLICKHOUSE_PORT` | `8123` | HTTP port |
|
|
179
|
+
| `--user` / `-u` | `CLICKHOUSE_USER` | `default` | Username |
|
|
180
|
+
| `--password` / `-P` | `CLICKHOUSE_PASSWORD` | _(empty)_ | Password |
|
|
181
|
+
| `--database` / `-d` | `CLICKHOUSE_DATABASE` | `default` | Default database |
|
|
182
|
+
| `--timeout` / `-t` | — | `0` _(unlimited)_ | Server-side `max_execution_time` in seconds |
|
|
183
|
+
| `--max-query-size` | — | _(server default 262144)_ | Raise ClickHouse `max_query_size` for very large queries |
|
|
184
|
+
| `--max-ast-elements` | — | _(server default 50000)_ | Raise ClickHouse `max_ast_elements` for queries with huge ASTs |
|
|
185
|
+
| `--no-execute` | — | `false` | Skip actual execution; estimate only |
|
|
186
|
+
|
|
187
|
+
## Interactive mode
|
|
188
|
+
|
|
189
|
+
Type or paste a multi-line query, then submit by:
|
|
190
|
+
- Ending the last line with `;`
|
|
191
|
+
- Typing `GO` on its own line
|
|
192
|
+
|
|
193
|
+
Press **Ctrl+C** to exit.
|
|
194
|
+
|
|
195
|
+
## How index suggestions work
|
|
196
|
+
|
|
197
|
+
1. The query is parsed with [sqlglot](https://github.com/tobymao/sqlglot) to extract WHERE-clause columns and condition types (equality, range, LIKE, IN).
|
|
198
|
+
2. Each referenced table's `sorting_key` is fetched from `system.tables`.
|
|
199
|
+
3. Columns not covered by the sort key get a skip-index `ALTER TABLE` suggestion, with the type chosen by condition and column type:
|
|
200
|
+
|
|
201
|
+
| Condition | Column type | Suggested index |
|
|
202
|
+
|---|---|---|
|
|
203
|
+
| `LIKE` / `ILIKE` | any | `tokenbf_v1(32768, 3, 0)` |
|
|
204
|
+
| `>` / `<` / `BETWEEN` | any | `minmax` |
|
|
205
|
+
| `=` / `IN` | String | `bloom_filter(0.01)` |
|
|
206
|
+
| `=` / `IN` | numeric / date | `set(100)` |
|
|
207
|
+
|
|
208
|
+
## Requirements
|
|
209
|
+
|
|
210
|
+
- Python ≥ 3.10
|
|
211
|
+
- ClickHouse with HTTP interface enabled (default port 8123)
|
|
212
|
+
|
|
213
|
+
## Development
|
|
214
|
+
|
|
215
|
+
Run the test suite (no ClickHouse server required — tests use a fake client):
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
pip install -e ".[test]" # or: pip install -r requirements-dev.txt
|
|
219
|
+
pytest
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
The tests live in `tests/` and cover error classification, the estimator,
|
|
223
|
+
index suggestions, output formatting, connection settings, and the CLI.
|
chqce-0.1.0/README.md
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# clickhouse-query-cost-estimator
|
|
2
|
+
|
|
3
|
+
[](https://github.com/AhmadDarwich/clickhouse-query-cost-estimator/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/chqce/)
|
|
5
|
+
[](https://pypi.org/project/chqce/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
A terminal CLI that estimates the cost of a ClickHouse SQL query **before you regret running it**, and helps you tune indexes afterwards.
|
|
9
|
+
|
|
10
|
+
```
|
|
11
|
+
╭──────────────────────────────────────────────────────────────╮
|
|
12
|
+
│ ClickHouse Query Cost Estimator v0.1.0 │
|
|
13
|
+
│ Connected to localhost:8123 · database: default · 23.8 │
|
|
14
|
+
╰──────────────────────────────────────────────────────────────╯
|
|
15
|
+
|
|
16
|
+
Cost Estimate (from EXPLAIN ESTIMATE)
|
|
17
|
+
Database Table Parts Est. Rows Marks
|
|
18
|
+
default orders 12 4.5M 550
|
|
19
|
+
|
|
20
|
+
Timing
|
|
21
|
+
Phase Time Notes
|
|
22
|
+
SQL Analyzer (EXPLAIN) 2.1 ms parsing + plan generation
|
|
23
|
+
Execution (client) 234.5 ms wall-clock including network
|
|
24
|
+
Execution (server) 228.3 ms server-side only
|
|
25
|
+
|
|
26
|
+
Execution Stats
|
|
27
|
+
Rows read 4.5M Bytes read 1.2 GB
|
|
28
|
+
Result rows 18.2K Peak memory 45.6 MB
|
|
29
|
+
|
|
30
|
+
── Index Suggestions ──────────────────────────────────────────
|
|
31
|
+
✓ created_at — already in ORDER BY
|
|
32
|
+
⚠ user_id — not in ORDER BY
|
|
33
|
+
ALTER TABLE default.orders
|
|
34
|
+
ADD INDEX idx_orders_user_id user_id
|
|
35
|
+
TYPE bloom_filter(0.01) GRANULARITY 4;
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## What it tells you
|
|
39
|
+
|
|
40
|
+
| Metric | Source |
|
|
41
|
+
|---|---|
|
|
42
|
+
| **Estimated rows / parts / marks** | `EXPLAIN ESTIMATE` |
|
|
43
|
+
| **SQL analyzer time** | time to run `EXPLAIN PLAN` (parsing + planning) |
|
|
44
|
+
| **Execution time (client)** | wall-clock including network round-trip |
|
|
45
|
+
| **Execution time (server)** | `elapsed_ns` from `X-ClickHouse-Summary` header |
|
|
46
|
+
| **Rows / bytes read, peak memory** | `system.query_log` after execution |
|
|
47
|
+
| **Index suggestions** | `system.tables` ORDER BY vs WHERE columns |
|
|
48
|
+
|
|
49
|
+
## Installation
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install chqce
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Or, for local development:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install -e .
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Usage
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Analyze a single query
|
|
65
|
+
chqce "SELECT count() FROM hits WHERE EventDate = today()"
|
|
66
|
+
|
|
67
|
+
# Interactive mode — paste any query, then type ; or GO to submit
|
|
68
|
+
chqce
|
|
69
|
+
|
|
70
|
+
# Custom connection
|
|
71
|
+
chqce --host my.ch.host --port 9123 --user admin --database analytics \
|
|
72
|
+
"SELECT count() FROM events WHERE user_id = 42"
|
|
73
|
+
|
|
74
|
+
# Estimate only — skip execution (safe for expensive/destructive queries)
|
|
75
|
+
chqce --no-execute "SELECT * FROM huge_table WHERE x > 0"
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Large queries
|
|
79
|
+
|
|
80
|
+
For big, multi-line queries (hundreds or thousands of lines) you don't want to
|
|
81
|
+
wrestle with shell quoting. Read the query from a file or pipe it in instead:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
# From a file — the cleanest option for huge queries
|
|
85
|
+
chqce -f report.sql
|
|
86
|
+
|
|
87
|
+
# Piped via stdin
|
|
88
|
+
cat report.sql | chqce
|
|
89
|
+
chqce < report.sql
|
|
90
|
+
|
|
91
|
+
# If ClickHouse rejects it with a max_query_size error, raise the limit
|
|
92
|
+
chqce -f report.sql --max-query-size 1048576 # 1 MiB
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
The query source is resolved in this priority order:
|
|
96
|
+
|
|
97
|
+
1. `--file` / `-f` — read from a file
|
|
98
|
+
2. `QUERY` argument — passed on the command line
|
|
99
|
+
3. piped **stdin** — when input isn't a terminal
|
|
100
|
+
4. interactive prompt — when nothing else is provided
|
|
101
|
+
|
|
102
|
+
The echoed query is truncated to the first 30 lines in the output, so a large
|
|
103
|
+
query never buries the results.
|
|
104
|
+
|
|
105
|
+
### Timeouts and resource limits
|
|
106
|
+
|
|
107
|
+
Heavy queries can hit server-side limits. The tool catches these, reports them
|
|
108
|
+
clearly, and tells you which flag gets you unstuck:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
# Abort the query after 5 minutes instead of waiting indefinitely
|
|
112
|
+
chqce -t 300 "SELECT ... a slow aggregation ..."
|
|
113
|
+
|
|
114
|
+
# Fix "AST is too big" (e.g. a giant IN (...) list)
|
|
115
|
+
chqce -f report.sql --max-ast-elements 500000
|
|
116
|
+
|
|
117
|
+
# Fix "Max query size exceeded"
|
|
118
|
+
chqce -f report.sql --max-query-size 1048576 # 1 MiB
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
When a query fails, the error is classified and shown with suggestions. For
|
|
122
|
+
example, a timeout renders as:
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
╭──────────────────────────────────────────────────────────────╮
|
|
126
|
+
│ ✗ Execution error Query timed out │
|
|
127
|
+
│ Code: 159. DB::Exception: Timeout exceeded: elapsed 30 ... │
|
|
128
|
+
│ │
|
|
129
|
+
│ Suggestions │
|
|
130
|
+
│ The query exceeded its time budget. Try: │
|
|
131
|
+
│ • raise the limit: --timeout 600 (seconds, 0 = unlimited)│
|
|
132
|
+
│ • estimate without running: --no-execute │
|
|
133
|
+
│ • narrow the query with a WHERE filter or LIMIT │
|
|
134
|
+
╰──────────────────────────────────────────────────────────────╯
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Recognized failures: **timeout**, **AST too big**, **parser depth**,
|
|
138
|
+
**query size**, **memory limit**, and **read-row/byte limits**.
|
|
139
|
+
|
|
140
|
+
## Options
|
|
141
|
+
|
|
142
|
+
| Flag | Env var | Default | Description |
|
|
143
|
+
|---|---|---|---|
|
|
144
|
+
| `--file` / `-f` | — | — | Read the query from a file (best for huge queries) |
|
|
145
|
+
| `--host` / `-H` | `CLICKHOUSE_HOST` | `localhost` | ClickHouse host |
|
|
146
|
+
| `--port` / `-p` | `CLICKHOUSE_PORT` | `8123` | HTTP port |
|
|
147
|
+
| `--user` / `-u` | `CLICKHOUSE_USER` | `default` | Username |
|
|
148
|
+
| `--password` / `-P` | `CLICKHOUSE_PASSWORD` | _(empty)_ | Password |
|
|
149
|
+
| `--database` / `-d` | `CLICKHOUSE_DATABASE` | `default` | Default database |
|
|
150
|
+
| `--timeout` / `-t` | — | `0` _(unlimited)_ | Server-side `max_execution_time` in seconds |
|
|
151
|
+
| `--max-query-size` | — | _(server default 262144)_ | Raise ClickHouse `max_query_size` for very large queries |
|
|
152
|
+
| `--max-ast-elements` | — | _(server default 50000)_ | Raise ClickHouse `max_ast_elements` for queries with huge ASTs |
|
|
153
|
+
| `--no-execute` | — | `false` | Skip actual execution; estimate only |
|
|
154
|
+
|
|
155
|
+
## Interactive mode
|
|
156
|
+
|
|
157
|
+
Type or paste a multi-line query, then submit by:
|
|
158
|
+
- Ending the last line with `;`
|
|
159
|
+
- Typing `GO` on its own line
|
|
160
|
+
|
|
161
|
+
Press **Ctrl+C** to exit.
|
|
162
|
+
|
|
163
|
+
## How index suggestions work
|
|
164
|
+
|
|
165
|
+
1. The query is parsed with [sqlglot](https://github.com/tobymao/sqlglot) to extract WHERE-clause columns and condition types (equality, range, LIKE, IN).
|
|
166
|
+
2. Each referenced table's `sorting_key` is fetched from `system.tables`.
|
|
167
|
+
3. Columns not covered by the sort key get a skip-index `ALTER TABLE` suggestion, with the type chosen by condition and column type:
|
|
168
|
+
|
|
169
|
+
| Condition | Column type | Suggested index |
|
|
170
|
+
|---|---|---|
|
|
171
|
+
| `LIKE` / `ILIKE` | any | `tokenbf_v1(32768, 3, 0)` |
|
|
172
|
+
| `>` / `<` / `BETWEEN` | any | `minmax` |
|
|
173
|
+
| `=` / `IN` | String | `bloom_filter(0.01)` |
|
|
174
|
+
| `=` / `IN` | numeric / date | `set(100)` |
|
|
175
|
+
|
|
176
|
+
## Requirements
|
|
177
|
+
|
|
178
|
+
- Python ≥ 3.10
|
|
179
|
+
- ClickHouse with HTTP interface enabled (default port 8123)
|
|
180
|
+
|
|
181
|
+
## Development
|
|
182
|
+
|
|
183
|
+
Run the test suite (no ClickHouse server required — tests use a fake client):
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
pip install -e ".[test]" # or: pip install -r requirements-dev.txt
|
|
187
|
+
pytest
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
The tests live in `tests/` and cover error classification, the estimator,
|
|
191
|
+
index suggestions, output formatting, connection settings, and the CLI.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
chqce-0.1.0/chqce/cli.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
|
|
6
|
+
from . import __version__
|
|
7
|
+
from .connection import create_client, test_connection
|
|
8
|
+
from .estimator import QueryEstimator
|
|
9
|
+
from .formatter import console, print_header, print_result
|
|
10
|
+
from .suggestions import get_index_suggestions
|
|
11
|
+
|
|
12
|
+
_err = Console(stderr=True)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _collect_interactive() -> str:
|
|
16
|
+
"""Collect a multi-line SQL query from stdin.
|
|
17
|
+
|
|
18
|
+
Submit by ending a line with ';' or typing GO on its own line.
|
|
19
|
+
"""
|
|
20
|
+
console.print(
|
|
21
|
+
"\n[dim]Paste or type your SQL query."
|
|
22
|
+
" End with [bold];[/bold] or type [bold]GO[/bold] on its own line."
|
|
23
|
+
" [bold]Ctrl+C[/bold] to exit.[/dim]\n"
|
|
24
|
+
)
|
|
25
|
+
lines: list[str] = []
|
|
26
|
+
try:
|
|
27
|
+
while True:
|
|
28
|
+
prefix = "[bold cyan]SQL>[/bold cyan] " if not lines else " [dim]>[/dim] "
|
|
29
|
+
console.print(prefix, end="")
|
|
30
|
+
try:
|
|
31
|
+
line = input()
|
|
32
|
+
except EOFError:
|
|
33
|
+
break
|
|
34
|
+
stripped = line.strip()
|
|
35
|
+
if stripped.upper() == "GO" or stripped == ";":
|
|
36
|
+
if lines:
|
|
37
|
+
break
|
|
38
|
+
continue
|
|
39
|
+
lines.append(line)
|
|
40
|
+
if stripped.endswith(";"):
|
|
41
|
+
break
|
|
42
|
+
except KeyboardInterrupt:
|
|
43
|
+
console.print("\n[dim]Bye![/dim]")
|
|
44
|
+
sys.exit(0)
|
|
45
|
+
|
|
46
|
+
return "\n".join(lines).strip()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _resolve_query(query: str | None, file: str | None) -> str | None:
|
|
50
|
+
"""Determine the query source, in priority order.
|
|
51
|
+
|
|
52
|
+
1. --file FILE read the query from a file (best for huge queries)
|
|
53
|
+
2. QUERY argument passed directly on the command line
|
|
54
|
+
3. piped stdin e.g. `chqce < query.sql` or `cat q.sql | chqce`
|
|
55
|
+
4. None -> caller falls back to interactive mode
|
|
56
|
+
"""
|
|
57
|
+
if file:
|
|
58
|
+
with open(file, "r", encoding="utf-8") as fh:
|
|
59
|
+
return fh.read().strip()
|
|
60
|
+
if query:
|
|
61
|
+
return query
|
|
62
|
+
# Query piped in on stdin (non-interactive).
|
|
63
|
+
if not sys.stdin.isatty():
|
|
64
|
+
data = sys.stdin.read().strip()
|
|
65
|
+
if data:
|
|
66
|
+
return data
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _run(query: str, estimator: QueryEstimator, client, database: str, execute: bool) -> None:
|
|
71
|
+
with console.status("[bold green]Analyzing…[/bold green]", spinner="dots"):
|
|
72
|
+
result = estimator.estimate(query, execute=execute)
|
|
73
|
+
suggestions = get_index_suggestions(query, client, current_database=database)
|
|
74
|
+
print_result(result, suggestions)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@click.command(context_settings={"help_option_names": ["-h", "--help"]})
|
|
78
|
+
@click.argument("query", required=False)
|
|
79
|
+
@click.option("--file", "-f", "file", type=click.Path(exists=True, dir_okay=False),
|
|
80
|
+
default=None, help="Read the query from a file (best for huge queries)")
|
|
81
|
+
@click.option("--host", "-H", default="localhost", envvar="CLICKHOUSE_HOST",
|
|
82
|
+
show_default=True, help="ClickHouse host")
|
|
83
|
+
@click.option("--port", "-p", default=8123, envvar="CLICKHOUSE_PORT", type=int,
|
|
84
|
+
show_default=True, help="HTTP(S) port")
|
|
85
|
+
@click.option("--user", "-u", default="default", envvar="CLICKHOUSE_USER",
|
|
86
|
+
show_default=True, help="Username")
|
|
87
|
+
@click.option("--password", "-P", default="", envvar="CLICKHOUSE_PASSWORD",
|
|
88
|
+
help="Password (or set CLICKHOUSE_PASSWORD)")
|
|
89
|
+
@click.option("--database", "-d", default="default", envvar="CLICKHOUSE_DATABASE",
|
|
90
|
+
show_default=True, help="Default database")
|
|
91
|
+
@click.option("--max-query-size", default=0, type=int, metavar="BYTES",
|
|
92
|
+
help="Raise ClickHouse max_query_size for very large queries "
|
|
93
|
+
"(server default is 262144)")
|
|
94
|
+
@click.option("--timeout", "-t", default=0, type=int, metavar="SECONDS",
|
|
95
|
+
help="Server-side max_execution_time; query is aborted after this "
|
|
96
|
+
"many seconds (0 = unlimited)")
|
|
97
|
+
@click.option("--max-ast-elements", default=0, type=int, metavar="N",
|
|
98
|
+
help="Raise ClickHouse max_ast_elements for queries that fail with "
|
|
99
|
+
"'AST is too big' (server default is 50000)")
|
|
100
|
+
@click.option("--no-execute", is_flag=True, default=False,
|
|
101
|
+
help="Estimate only — do not actually run the query")
|
|
102
|
+
@click.version_option(__version__, "-V", "--version")
|
|
103
|
+
def cli(query, file, host, port, user, password, database, max_query_size,
|
|
104
|
+
timeout, max_ast_elements, no_execute):
|
|
105
|
+
"""ClickHouse Query Cost Estimator.
|
|
106
|
+
|
|
107
|
+
Estimates rows scanned, memory usage, and execution time for a ClickHouse
|
|
108
|
+
SQL query, and suggests indexes based on WHERE-clause columns.
|
|
109
|
+
|
|
110
|
+
\b
|
|
111
|
+
The query can come from (in priority order):
|
|
112
|
+
• --file query.sql best for huge / multi-line queries
|
|
113
|
+
• a QUERY argument chqce "SELECT ..."
|
|
114
|
+
• piped stdin chqce < query.sql
|
|
115
|
+
• interactive prompt run with no query at all
|
|
116
|
+
|
|
117
|
+
\b
|
|
118
|
+
Environment variables (override defaults):
|
|
119
|
+
CLICKHOUSE_HOST, CLICKHOUSE_PORT, CLICKHOUSE_USER,
|
|
120
|
+
CLICKHOUSE_PASSWORD, CLICKHOUSE_DATABASE
|
|
121
|
+
|
|
122
|
+
\b
|
|
123
|
+
Examples:
|
|
124
|
+
chqce "SELECT count() FROM hits WHERE EventDate = today()"
|
|
125
|
+
chqce -f report.sql --no-execute
|
|
126
|
+
chqce -t 600 "SELECT ... a slow query ..."
|
|
127
|
+
cat report.sql | chqce --max-query-size 1048576 --max-ast-elements 500000
|
|
128
|
+
"""
|
|
129
|
+
try:
|
|
130
|
+
resolved = _resolve_query(query, file)
|
|
131
|
+
except OSError as e:
|
|
132
|
+
_err.print(f"[red]Could not read query file:[/red] {e}")
|
|
133
|
+
sys.exit(1)
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
client = create_client(host=host, port=port, user=user,
|
|
137
|
+
password=password, database=database,
|
|
138
|
+
max_query_size=max_query_size,
|
|
139
|
+
max_execution_time=timeout,
|
|
140
|
+
max_ast_elements=max_ast_elements)
|
|
141
|
+
ok, version_or_err = test_connection(client)
|
|
142
|
+
except Exception as e:
|
|
143
|
+
_err.print(f"[red]Connection error:[/red] {e}")
|
|
144
|
+
sys.exit(1)
|
|
145
|
+
|
|
146
|
+
if not ok:
|
|
147
|
+
_err.print(f"[red]Connection failed:[/red] {version_or_err}")
|
|
148
|
+
sys.exit(1)
|
|
149
|
+
|
|
150
|
+
print_header(version_or_err, host, port, database)
|
|
151
|
+
|
|
152
|
+
estimator = QueryEstimator(client)
|
|
153
|
+
execute = not no_execute
|
|
154
|
+
|
|
155
|
+
if resolved:
|
|
156
|
+
_run(resolved, estimator, client, database, execute)
|
|
157
|
+
else:
|
|
158
|
+
while True:
|
|
159
|
+
q = _collect_interactive()
|
|
160
|
+
if not q:
|
|
161
|
+
continue
|
|
162
|
+
_run(q, estimator, client, database, execute)
|
|
163
|
+
console.print("[dim]" + "─" * 60 + "[/dim]")
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import clickhouse_connect
|
|
2
|
+
from clickhouse_connect.driver import Client
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
DEFAULT_SOCKET_TIMEOUT = 300
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create_client(
|
|
9
|
+
host: str = "localhost",
|
|
10
|
+
port: int = 8123,
|
|
11
|
+
user: str = "default",
|
|
12
|
+
password: str = "",
|
|
13
|
+
database: str = "default",
|
|
14
|
+
max_query_size: int = 0,
|
|
15
|
+
max_execution_time: int = 0,
|
|
16
|
+
max_ast_elements: int = 0,
|
|
17
|
+
) -> Client:
|
|
18
|
+
# Per-session ClickHouse settings, only sent when the caller overrides them.
|
|
19
|
+
settings = {}
|
|
20
|
+
if max_query_size > 0:
|
|
21
|
+
# ClickHouse rejects queries larger than max_query_size (256 KiB default).
|
|
22
|
+
settings["max_query_size"] = max_query_size
|
|
23
|
+
if max_execution_time > 0:
|
|
24
|
+
# Server aborts the query after this many seconds (0 = unlimited).
|
|
25
|
+
settings["max_execution_time"] = max_execution_time
|
|
26
|
+
if max_ast_elements > 0:
|
|
27
|
+
# Raises the limit on parsed-query size (huge IN-lists, deep nesting).
|
|
28
|
+
settings["max_ast_elements"] = max_ast_elements
|
|
29
|
+
settings["max_expanded_ast_elements"] = max_ast_elements
|
|
30
|
+
|
|
31
|
+
# Keep the client socket alive a bit longer than the server-side limit so
|
|
32
|
+
# ClickHouse returns a clean timeout error instead of the socket dropping.
|
|
33
|
+
socket_timeout = DEFAULT_SOCKET_TIMEOUT
|
|
34
|
+
if max_execution_time > 0:
|
|
35
|
+
socket_timeout = max_execution_time + 30
|
|
36
|
+
|
|
37
|
+
return clickhouse_connect.get_client(
|
|
38
|
+
host=host,
|
|
39
|
+
port=port,
|
|
40
|
+
username=user,
|
|
41
|
+
password=password,
|
|
42
|
+
database=database,
|
|
43
|
+
connect_timeout=10,
|
|
44
|
+
send_receive_timeout=socket_timeout,
|
|
45
|
+
settings=settings,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_connection(client: Client) -> tuple[bool, str]:
|
|
50
|
+
try:
|
|
51
|
+
result = client.query("SELECT version()")
|
|
52
|
+
version = result.result_rows[0][0]
|
|
53
|
+
return True, version
|
|
54
|
+
except Exception as e:
|
|
55
|
+
return False, str(e)
|