chops 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chops/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """chops — ClickHouse Operations CLI."""
2
+
3
+ __version__ = "0.1.0"
chops/cli.py ADDED
@@ -0,0 +1,70 @@
1
+ """CLI entry point for chops."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import typer
6
+
7
+ from chops import __version__
8
+ from chops.commands import dq, health
9
+
10
+ app = typer.Typer(
11
+ name="chops",
12
+ help="ClickHouse Operations CLI — health checks, data quality, and observability.",
13
+ no_args_is_help=True,
14
+ )
15
+
16
+ app.add_typer(health.app, name="health", help="Cluster health and observability commands.")
17
+ app.add_typer(dq.app, name="dq", help="Data quality profiling and checks.")
18
+
19
+
20
+ @app.command()
21
+ def version() -> None:
22
+ """Show chops version."""
23
+ typer.echo(f"chops {__version__}")
24
+
25
+
26
+ @app.callback()
27
+ def main(
28
+ ctx: typer.Context,
29
+ host: str | None = typer.Option(
30
+ None,
31
+ "--host",
32
+ "-h",
33
+ envvar="CLICKHOUSE_HOST",
34
+ help="ClickHouse host",
35
+ ),
36
+ port: int | None = typer.Option(
37
+ None,
38
+ "--port",
39
+ "-p",
40
+ envvar="CLICKHOUSE_PORT",
41
+ help="ClickHouse HTTP port",
42
+ ),
43
+ user: str | None = typer.Option(
44
+ None,
45
+ "--user",
46
+ "-u",
47
+ envvar="CLICKHOUSE_USER",
48
+ help="ClickHouse user",
49
+ ),
50
+ password: str | None = typer.Option(
51
+ None,
52
+ "--password",
53
+ envvar="CLICKHOUSE_PASSWORD",
54
+ help="ClickHouse password",
55
+ ),
56
+ database: str | None = typer.Option(
57
+ None,
58
+ "--database",
59
+ "-d",
60
+ envvar="CLICKHOUSE_DATABASE",
61
+ help="Default database",
62
+ ),
63
+ ) -> None:
64
+ """Global connection options."""
65
+ ctx.ensure_object(dict)
66
+ ctx.obj["host"] = host
67
+ ctx.obj["port"] = port
68
+ ctx.obj["user"] = user
69
+ ctx.obj["password"] = password
70
+ ctx.obj["database"] = database
chops/client.py ADDED
@@ -0,0 +1,40 @@
1
+ """ClickHouse connection client."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from typing import Any
7
+
8
+ import clickhouse_connect
9
+ from clickhouse_connect.driver.client import Client
10
+
11
+
12
+ def get_client(
13
+ host: str | None = None,
14
+ port: int | None = None,
15
+ user: str | None = None,
16
+ password: str | None = None,
17
+ database: str | None = None,
18
+ secure: bool | None = None,
19
+ ) -> Client:
20
+ """Create a ClickHouse client from explicit args or environment variables."""
21
+ return clickhouse_connect.get_client(
22
+ host=host or os.getenv("CLICKHOUSE_HOST", "localhost"),
23
+ port=port or int(os.getenv("CLICKHOUSE_PORT", "8123")),
24
+ username=user or os.getenv("CLICKHOUSE_USER", "default"),
25
+ password=password if password is not None else os.getenv("CLICKHOUSE_PASSWORD", ""),
26
+ database=database if database is not None else os.getenv("CLICKHOUSE_DATABASE", "default"),
27
+ secure=(
28
+ secure
29
+ if secure is not None
30
+ else os.getenv("CLICKHOUSE_SECURE", "false").lower() == "true"
31
+ ),
32
+ connect_timeout=10,
33
+ )
34
+
35
+
36
+ def query(client: Client, sql: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
37
+ """Execute a query and return results as list of dicts."""
38
+ result = client.query(sql, parameters=params or {})
39
+ columns = result.column_names
40
+ return [dict(zip(columns, row, strict=False)) for row in result.result_rows]
@@ -0,0 +1 @@
1
+ """chops CLI commands."""
chops/commands/dq.py ADDED
@@ -0,0 +1,345 @@
1
+ """Data quality commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json as json_mod
6
+
7
+ import typer
8
+ from clickhouse_connect.driver.client import Client
9
+ from rich.console import Console
10
+ from rich.table import Table
11
+
12
+ from chops.client import get_client, query
13
+
14
+ app = typer.Typer(no_args_is_help=True)
15
+ console = Console()
16
+
17
+
18
+ def _get_conn(ctx: typer.Context) -> Client:
19
+ """Build client from context."""
20
+ obj = ctx.obj or {}
21
+ return get_client(
22
+ host=obj.get("host"),
23
+ port=obj.get("port"),
24
+ user=obj.get("user"),
25
+ password=obj.get("password"),
26
+ database=obj.get("database"),
27
+ )
28
+
29
+
30
+ def _resolve_table(table: str, ctx_database: str | None) -> tuple[str, str]:
31
+ """Resolve 'db.table' or 'table' into (database, table)."""
32
+ if "." in table:
33
+ db, tbl = table.split(".", 1)
34
+ return db, tbl
35
+ return ctx_database or "default", table
36
+
37
+
38
+ @app.command()
39
+ def profile(
40
+ ctx: typer.Context,
41
+ table: str = typer.Argument(help="Table to profile (e.g. 'mydb.events' or 'events')"),
42
+ output: str = typer.Option("table", "--output", "-o", help="Output format: table, json"),
43
+ sample: int | None = typer.Option(
44
+ None,
45
+ "--sample",
46
+ "-s",
47
+ help="Sample N rows (faster for huge tables)",
48
+ ),
49
+ ) -> None:
50
+ """Profile a table: row count, null rates, cardinality, min/max per column."""
51
+ client = _get_conn(ctx)
52
+ obj = ctx.obj or {}
53
+ db, tbl = _resolve_table(table, obj.get("database"))
54
+
55
+ # Get columns
56
+ cols = query(
57
+ client,
58
+ f"""
59
+ SELECT name, type
60
+ FROM system.columns
61
+ WHERE database = '{db}' AND table = '{tbl}'
62
+ ORDER BY position
63
+ """,
64
+ )
65
+
66
+ if not cols:
67
+ console.print(f"[red]Table '{db}.{tbl}' not found or has no columns.[/red]")
68
+ raise typer.Exit(1)
69
+
70
+ # Row count
71
+ count_result = query(client, f"SELECT count() AS c FROM {db}.{tbl}")
72
+ total_rows = int(count_result[0]["c"]) if count_result else 0
73
+
74
+ # Build profiling query
75
+ select_parts: list[str] = []
76
+ for col in cols:
77
+ name = col["name"]
78
+ col_type = col["type"]
79
+ escaped = f"`{name}`"
80
+
81
+ select_parts.append(f"countIf({escaped} IS NULL) AS `{name}__nulls`")
82
+ select_parts.append(f"uniq({escaped}) AS `{name}__cardinality`")
83
+
84
+ # Min/max only for numeric and date types
85
+ if any(t in col_type for t in ("Int", "UInt", "Float", "Decimal", "Date", "DateTime")):
86
+ select_parts.append(f"min({escaped}) AS `{name}__min`")
87
+ select_parts.append(f"max({escaped}) AS `{name}__max`")
88
+
89
+ source = f"(SELECT * FROM {db}.{tbl} LIMIT {sample})" if sample else f"{db}.{tbl}"
90
+
91
+ profile_sql = f"SELECT {', '.join(select_parts)} FROM {source}"
92
+ profile_result = query(client, profile_sql)
93
+ stats = profile_result[0] if profile_result else {}
94
+
95
+ row_base = sample if sample else total_rows
96
+
97
+ # Build results
98
+ results: list[dict[str, object]] = []
99
+ for col in cols:
100
+ name = col["name"]
101
+ nulls = int(stats.get(f"{name}__nulls", 0))
102
+ cardinality = int(stats.get(f"{name}__cardinality", 0))
103
+ null_pct = round(nulls / row_base * 100, 1) if row_base > 0 else 0.0
104
+
105
+ entry: dict[str, object] = {
106
+ "column": name,
107
+ "type": col["type"],
108
+ "null_count": nulls,
109
+ "null_pct": null_pct,
110
+ "cardinality": cardinality,
111
+ }
112
+
113
+ min_val = stats.get(f"{name}__min")
114
+ max_val = stats.get(f"{name}__max")
115
+ if min_val is not None:
116
+ entry["min"] = min_val
117
+ entry["max"] = max_val
118
+
119
+ results.append(entry)
120
+
121
+ if output == "json":
122
+ payload = {"table": f"{db}.{tbl}", "rows": total_rows, "columns": results}
123
+ typer.echo(json_mod.dumps(payload, indent=2, default=str))
124
+ return
125
+
126
+ # Rich table output
127
+ t = Table(title=f"Profile: {db}.{tbl} ({total_rows:,} rows)")
128
+ t.add_column("Column", style="bold")
129
+ t.add_column("Type", style="dim")
130
+ t.add_column("Nulls", justify="right")
131
+ t.add_column("Null %", justify="right")
132
+ t.add_column("Cardinality", justify="right")
133
+ t.add_column("Min", justify="right")
134
+ t.add_column("Max", justify="right")
135
+
136
+ for r in results:
137
+ null_style = "red" if r["null_pct"] > 10 else "" # type: ignore[operator]
138
+ t.add_row(
139
+ str(r["column"]),
140
+ str(r["type"]),
141
+ f"{r['null_count']:,}",
142
+ f"[{null_style}]{r['null_pct']}%[/{null_style}]" if null_style else f"{r['null_pct']}%",
143
+ f"{r['cardinality']:,}",
144
+ str(r.get("min", "")),
145
+ str(r.get("max", "")),
146
+ )
147
+
148
+ console.print(t)
149
+
150
+
151
+ @app.command()
152
+ def freshness(
153
+ ctx: typer.Context,
154
+ table: str = typer.Argument(help="Table to check (e.g. 'mydb.events' or 'events')"),
155
+ column: str | None = typer.Option(
156
+ None,
157
+ "--column",
158
+ "-c",
159
+ help="DateTime column to check (auto-detected if not provided)",
160
+ ),
161
+ warn_minutes: int = typer.Option(60, "--warn", help="Warning threshold in minutes"),
162
+ critical_minutes: int = typer.Option(
163
+ 1440,
164
+ "--critical",
165
+ help="Critical threshold in minutes (default 24h)",
166
+ ),
167
+ ) -> None:
168
+ """Check data freshness — time since last row was inserted."""
169
+ client = _get_conn(ctx)
170
+ obj = ctx.obj or {}
171
+ db, tbl = _resolve_table(table, obj.get("database"))
172
+
173
+ # Auto-detect datetime column
174
+ if not column:
175
+ dt_cols = query(
176
+ client,
177
+ f"""
178
+ SELECT name FROM system.columns
179
+ WHERE database = '{db}' AND table = '{tbl}'
180
+ AND type LIKE '%DateTime%'
181
+ ORDER BY position
182
+ LIMIT 1
183
+ """,
184
+ )
185
+ if not dt_cols:
186
+ msg = f"No DateTime column found in {db}.{tbl}. Use --column to specify."
187
+ console.print(f"[red]{msg}[/red]")
188
+ raise typer.Exit(1)
189
+ column = dt_cols[0]["name"]
190
+
191
+ result = query(
192
+ client,
193
+ f"""
194
+ SELECT
195
+ max(`{column}`) AS latest,
196
+ dateDiff('minute', max(`{column}`), now()) AS minutes_ago
197
+ FROM {db}.{tbl}
198
+ """,
199
+ )
200
+
201
+ if not result or result[0].get("latest") is None:
202
+ console.print(f"[red]Table {db}.{tbl} is empty or column '{column}' has no data.[/red]")
203
+ raise typer.Exit(2)
204
+
205
+ row = result[0]
206
+ minutes = int(row["minutes_ago"])
207
+ latest = row["latest"]
208
+
209
+ if minutes >= critical_minutes:
210
+ style = "bold red"
211
+ status = "CRITICAL"
212
+ exit_code = 2
213
+ elif minutes >= warn_minutes:
214
+ style = "bold yellow"
215
+ status = "WARNING"
216
+ exit_code = 1
217
+ else:
218
+ style = "bold green"
219
+ status = "OK"
220
+ exit_code = 0
221
+
222
+ hours, mins = divmod(minutes, 60)
223
+ age_str = f"{hours}h {mins}m" if hours > 0 else f"{mins}m"
224
+
225
+ console.print(f"[{style}]{status}[/{style}] — {db}.{tbl}.{column}")
226
+ console.print(f" Latest: {latest}")
227
+ console.print(f" Age: {age_str} ago")
228
+
229
+ if exit_code > 0:
230
+ raise typer.Exit(exit_code)
231
+
232
+
233
+ @app.command()
234
+ def check(
235
+ ctx: typer.Context,
236
+ table: str = typer.Argument(help="Table to check (e.g. 'mydb.events' or 'events')"),
237
+ max_null_pct: float = typer.Option(
238
+ 5.0,
239
+ "--max-null-pct",
240
+ help="Max null percentage before failing",
241
+ ),
242
+ min_rows: int | None = typer.Option(None, "--min-rows", help="Minimum expected row count"),
243
+ output: str = typer.Option("table", "--output", "-o", help="Output format: table, json"),
244
+ ) -> None:
245
+ """Run data quality checks on a table. Returns non-zero exit code on failure."""
246
+ client = _get_conn(ctx)
247
+ obj = ctx.obj or {}
248
+ db, tbl = _resolve_table(table, obj.get("database"))
249
+
250
+ # Row count
251
+ count_result = query(client, f"SELECT count() AS c FROM {db}.{tbl}")
252
+ total_rows = int(count_result[0]["c"]) if count_result else 0
253
+
254
+ # Columns
255
+ cols = query(
256
+ client,
257
+ f"""
258
+ SELECT name, type
259
+ FROM system.columns
260
+ WHERE database = '{db}' AND table = '{tbl}'
261
+ ORDER BY position
262
+ """,
263
+ )
264
+
265
+ if not cols:
266
+ console.print(f"[red]Table '{db}.{tbl}' not found.[/red]")
267
+ raise typer.Exit(2)
268
+
269
+ # Null checks
270
+ select_parts = [f"countIf(`{c['name']}` IS NULL) AS `{c['name']}__nulls`" for c in cols]
271
+ null_result = query(client, f"SELECT {', '.join(select_parts)} FROM {db}.{tbl}")
272
+ null_stats = null_result[0] if null_result else {}
273
+
274
+ failures: list[dict[str, object]] = []
275
+ passes: list[dict[str, object]] = []
276
+
277
+ # Check min rows
278
+ if min_rows is not None and total_rows < min_rows:
279
+ failures.append(
280
+ {
281
+ "check": "min_rows",
282
+ "column": "-",
283
+ "expected": f">= {min_rows:,}",
284
+ "actual": f"{total_rows:,}",
285
+ }
286
+ )
287
+ else:
288
+ passes.append(
289
+ {
290
+ "check": "row_count",
291
+ "column": "-",
292
+ "expected": f">= {min_rows:,}" if min_rows else "any",
293
+ "actual": f"{total_rows:,}",
294
+ }
295
+ )
296
+
297
+ # Check null rates
298
+ for col in cols:
299
+ name = col["name"]
300
+ nulls = int(null_stats.get(f"{name}__nulls", 0))
301
+ null_pct = round(nulls / total_rows * 100, 1) if total_rows > 0 else 0.0
302
+
303
+ entry: dict[str, object] = {
304
+ "check": "null_rate",
305
+ "column": name,
306
+ "expected": f"<= {max_null_pct}%",
307
+ "actual": f"{null_pct}%",
308
+ }
309
+
310
+ if null_pct > max_null_pct:
311
+ failures.append(entry)
312
+ else:
313
+ passes.append(entry)
314
+
315
+ if output == "json":
316
+ typer.echo(
317
+ json_mod.dumps(
318
+ {
319
+ "table": f"{db}.{tbl}",
320
+ "rows": total_rows,
321
+ "passed": len(passes),
322
+ "failed": len(failures),
323
+ "failures": failures,
324
+ },
325
+ indent=2,
326
+ default=str,
327
+ )
328
+ )
329
+ else:
330
+ total_checks = len(passes) + len(failures)
331
+ status = "[green]PASSED[/green]" if not failures else "[red]FAILED[/red]"
332
+ console.print(f"\n{status} — {db}.{tbl}: {len(passes)}/{total_checks} checks passed\n")
333
+
334
+ if failures:
335
+ t = Table(title="Failed Checks", border_style="red")
336
+ t.add_column("Check", style="bold")
337
+ t.add_column("Column")
338
+ t.add_column("Expected")
339
+ t.add_column("Actual", style="red")
340
+ for f in failures:
341
+ t.add_row(str(f["check"]), str(f["column"]), str(f["expected"]), str(f["actual"]))
342
+ console.print(t)
343
+
344
+ if failures:
345
+ raise typer.Exit(1)
@@ -0,0 +1,297 @@
1
+ """Health and observability commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import typer
6
+ from clickhouse_connect.driver.client import Client
7
+ from rich.console import Console
8
+ from rich.table import Table
9
+
10
+ from chops.client import get_client, query
11
+
12
+ app = typer.Typer(no_args_is_help=True)
13
+ console = Console()
14
+
15
+
16
+ def _get_conn(ctx: typer.Context) -> Client:
17
+ """Build client from context."""
18
+ obj = ctx.obj or {}
19
+ return get_client(
20
+ host=obj.get("host"),
21
+ port=obj.get("port"),
22
+ user=obj.get("user"),
23
+ password=obj.get("password"),
24
+ database=obj.get("database"),
25
+ )
26
+
27
+
28
+ @app.command()
29
+ def summary(ctx: typer.Context) -> None:
30
+ """Show cluster health summary: version, uptime, databases, tables, parts, merges, queries."""
31
+ client = _get_conn(ctx)
32
+
33
+ # Server info
34
+ info = query(client, "SELECT version() AS version, uptime() AS uptime_seconds")
35
+ row = info[0] if info else {}
36
+ ver = row.get("version", "?")
37
+ uptime_s = int(row.get("uptime_seconds", 0))
38
+ days, rem = divmod(uptime_s, 86400)
39
+ hours, rem = divmod(rem, 3600)
40
+ mins, _ = divmod(rem, 60)
41
+ uptime_str = f"{days}d {hours}h {mins}m"
42
+
43
+ # Counts
44
+ sys_dbs = "('system', 'INFORMATION_SCHEMA', 'information_schema')"
45
+ db_count = query(
46
+ client,
47
+ f"SELECT count() AS c FROM system.databases WHERE name NOT IN {sys_dbs}",
48
+ )
49
+ table_count = query(
50
+ client,
51
+ f"SELECT count() AS c FROM system.tables WHERE database NOT IN {sys_dbs}",
52
+ )
53
+ parts_count = query(client, "SELECT count() AS c FROM system.parts WHERE active")
54
+ merges_count = query(client, "SELECT count() AS c FROM system.merges")
55
+ queries_count = query(
56
+ client,
57
+ "SELECT count() AS c FROM system.processes WHERE is_initial_query",
58
+ )
59
+
60
+ # Disk usage
61
+ disk = query(
62
+ client,
63
+ """
64
+ SELECT
65
+ formatReadableSize(sum(bytes_on_disk)) AS total_size,
66
+ sum(rows) AS total_rows
67
+ FROM system.parts
68
+ WHERE active
69
+ """,
70
+ )
71
+
72
+ table = Table(title="ClickHouse Health Summary", show_header=False, border_style="dim")
73
+ table.add_column("Metric", style="bold")
74
+ table.add_column("Value")
75
+
76
+ table.add_row("Version", str(ver))
77
+ table.add_row("Uptime", uptime_str)
78
+ table.add_row("Databases", str(db_count[0]["c"] if db_count else 0))
79
+ table.add_row("Tables", str(table_count[0]["c"] if table_count else 0))
80
+ table.add_row("Active parts", str(parts_count[0]["c"] if parts_count else 0))
81
+ table.add_row("Running merges", str(merges_count[0]["c"] if merges_count else 0))
82
+ table.add_row("Active queries", str(queries_count[0]["c"] if queries_count else 0))
83
+ if disk:
84
+ table.add_row("Total data size", str(disk[0].get("total_size", "?")))
85
+ table.add_row("Total rows", f"{int(disk[0].get('total_rows', 0)):,}")
86
+
87
+ console.print(table)
88
+
89
+
90
+ @app.command(name="table-sizes")
91
+ def table_sizes(
92
+ ctx: typer.Context,
93
+ limit: int = typer.Option(20, "--limit", "-n", help="Number of tables to show"),
94
+ database: str | None = typer.Option(None, "--database", "-d", help="Filter by database"),
95
+ ) -> None:
96
+ """Show disk usage by table, sorted by size."""
97
+ client = _get_conn(ctx)
98
+
99
+ where = "WHERE database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')"
100
+ if database:
101
+ where += f" AND database = '{database}'"
102
+
103
+ rows = query(
104
+ client,
105
+ f"""
106
+ SELECT
107
+ database,
108
+ name AS table_name,
109
+ engine,
110
+ formatReadableSize(total_bytes) AS size,
111
+ total_bytes,
112
+ formatReadableQuantity(total_rows) AS rows,
113
+ total_rows AS raw_rows,
114
+ partition_count
115
+ FROM (
116
+ SELECT
117
+ database,
118
+ table AS name,
119
+ engine,
120
+ sum(bytes_on_disk) AS total_bytes,
121
+ sum(rows) AS total_rows,
122
+ count(DISTINCT partition) AS partition_count
123
+ FROM system.parts
124
+ {where}
125
+ AND active
126
+ GROUP BY database, table, engine
127
+ )
128
+ ORDER BY total_bytes DESC
129
+ LIMIT {limit}
130
+ """,
131
+ )
132
+
133
+ table = Table(title=f"Top {limit} Tables by Size")
134
+ table.add_column("Database", style="cyan")
135
+ table.add_column("Table", style="bold")
136
+ table.add_column("Engine")
137
+ table.add_column("Size", justify="right", style="green")
138
+ table.add_column("Rows", justify="right")
139
+ table.add_column("Partitions", justify="right")
140
+
141
+ for r in rows:
142
+ table.add_row(
143
+ r["database"],
144
+ r["table_name"],
145
+ r["engine"],
146
+ r["size"],
147
+ r["rows"],
148
+ str(r["partition_count"]),
149
+ )
150
+
151
+ console.print(table)
152
+
153
+
154
+ @app.command(name="slow-queries")
155
+ def slow_queries(
156
+ ctx: typer.Context,
157
+ limit: int = typer.Option(10, "--limit", "-n", help="Number of queries to show"),
158
+ hours: int = typer.Option(24, "--hours", help="Look back N hours"),
159
+ ) -> None:
160
+ """Show slowest queries from the query log."""
161
+ client = _get_conn(ctx)
162
+
163
+ rows = query(
164
+ client,
165
+ f"""
166
+ SELECT
167
+ type,
168
+ query_duration_ms / 1000 AS duration_s,
169
+ formatReadableSize(read_bytes) AS read_size,
170
+ read_rows,
171
+ formatReadableSize(memory_usage) AS peak_memory,
172
+ user,
173
+ substring(query, 1, 120) AS query_preview
174
+ FROM system.query_log
175
+ WHERE event_time > now() - INTERVAL {hours} HOUR
176
+ AND type IN ('QueryFinish', 'ExceptionWhileProcessing')
177
+ AND query_kind = 'Select'
178
+ AND is_initial_query
179
+ ORDER BY query_duration_ms DESC
180
+ LIMIT {limit}
181
+ """,
182
+ )
183
+
184
+ table = Table(title=f"Top {limit} Slow Queries (last {hours}h)")
185
+ table.add_column("#", justify="right", style="dim")
186
+ table.add_column("Duration", justify="right", style="bold red")
187
+ table.add_column("Read", justify="right")
188
+ table.add_column("Rows", justify="right")
189
+ table.add_column("Memory", justify="right")
190
+ table.add_column("User")
191
+ table.add_column("Query", max_width=80)
192
+
193
+ for i, r in enumerate(rows, 1):
194
+ dur = f"{r['duration_s']:.1f}s"
195
+ table.add_row(
196
+ str(i),
197
+ dur,
198
+ r["read_size"],
199
+ f"{int(r['read_rows']):,}",
200
+ r["peak_memory"],
201
+ r["user"],
202
+ r["query_preview"],
203
+ )
204
+
205
+ console.print(table)
206
+
207
+
208
+ @app.command()
209
+ def merges(ctx: typer.Context) -> None:
210
+ """Show currently running merges."""
211
+ client = _get_conn(ctx)
212
+
213
+ rows = query(
214
+ client,
215
+ """
216
+ SELECT
217
+ database,
218
+ table,
219
+ round(progress * 100, 1) AS progress_pct,
220
+ round(elapsed, 1) AS elapsed_s,
221
+ num_parts,
222
+ formatReadableSize(total_size_bytes_compressed) AS size,
223
+ formatReadableSize(bytes_read_uncompressed) AS bytes_read,
224
+ formatReadableSize(bytes_written_uncompressed) AS bytes_written
225
+ FROM system.merges
226
+ ORDER BY elapsed DESC
227
+ """,
228
+ )
229
+
230
+ if not rows:
231
+ console.print("[green]No active merges.[/green]")
232
+ return
233
+
234
+ table = Table(title=f"Active Merges ({len(rows)})")
235
+ table.add_column("Database", style="cyan")
236
+ table.add_column("Table", style="bold")
237
+ table.add_column("Progress", justify="right")
238
+ table.add_column("Elapsed", justify="right")
239
+ table.add_column("Parts", justify="right")
240
+ table.add_column("Size", justify="right")
241
+
242
+ for r in rows:
243
+ table.add_row(
244
+ r["database"],
245
+ r["table"],
246
+ f"{r['progress_pct']}%",
247
+ f"{r['elapsed_s']}s",
248
+ str(r["num_parts"]),
249
+ r["size"],
250
+ )
251
+
252
+ console.print(table)
253
+
254
+
255
+ @app.command(name="running-queries")
256
+ def running_queries(ctx: typer.Context) -> None:
257
+ """Show currently running queries."""
258
+ client = _get_conn(ctx)
259
+
260
+ rows = query(
261
+ client,
262
+ """
263
+ SELECT
264
+ query_id,
265
+ user,
266
+ round(elapsed, 1) AS elapsed_s,
267
+ formatReadableSize(read_bytes) AS read_size,
268
+ read_rows,
269
+ formatReadableSize(memory_usage) AS memory,
270
+ substring(query, 1, 100) AS query_preview
271
+ FROM system.processes
272
+ WHERE is_initial_query
273
+ ORDER BY elapsed DESC
274
+ """,
275
+ )
276
+
277
+ if not rows:
278
+ console.print("[green]No running queries.[/green]")
279
+ return
280
+
281
+ table = Table(title=f"Running Queries ({len(rows)})")
282
+ table.add_column("Elapsed", justify="right", style="bold")
283
+ table.add_column("User")
284
+ table.add_column("Read", justify="right")
285
+ table.add_column("Memory", justify="right")
286
+ table.add_column("Query", max_width=80)
287
+
288
+ for r in rows:
289
+ table.add_row(
290
+ f"{r['elapsed_s']}s",
291
+ r["user"],
292
+ r["read_size"],
293
+ r["memory"],
294
+ r["query_preview"],
295
+ )
296
+
297
+ console.print(table)
@@ -0,0 +1,141 @@
1
+ Metadata-Version: 2.4
2
+ Name: chops
3
+ Version: 0.1.0
4
+ Summary: ClickHouse Operations CLI — health checks, data quality profiling, and observability
5
+ Project-URL: Homepage, https://github.com/antonio-mello-ai/chops
6
+ Project-URL: Repository, https://github.com/antonio-mello-ai/chops
7
+ Project-URL: Issues, https://github.com/antonio-mello-ai/chops/issues
8
+ Author-email: Antonio Mello <antonio.mello@felhen.com.br>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: cli,clickhouse,data-quality,database,observability,operations
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: System Administrators
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Database
23
+ Classifier: Topic :: System :: Monitoring
24
+ Requires-Python: >=3.10
25
+ Requires-Dist: clickhouse-connect>=0.7
26
+ Requires-Dist: rich>=13.0
27
+ Requires-Dist: typer>=0.12
28
+ Provides-Extra: dev
29
+ Requires-Dist: mypy>=1.11; extra == 'dev'
30
+ Requires-Dist: pytest-asyncio>=1.0; extra == 'dev'
31
+ Requires-Dist: pytest>=8.0; extra == 'dev'
32
+ Requires-Dist: ruff>=0.6; extra == 'dev'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # chops
36
+
37
+ ClickHouse Operations CLI — health checks, data quality profiling, and observability from your terminal.
38
+
39
+ No more copy-pasting system table queries. One command to check cluster health, profile data quality, or find slow queries.
40
+
41
+ ## Quick Start
42
+
43
+ ```bash
44
+ # Run directly with uvx (no install needed)
45
+ uvx chops health summary
46
+
47
+ # Or install with pip
48
+ pip install chops
49
+ ```
50
+
51
+ ## Configuration
52
+
53
+ Set environment variables or pass flags:
54
+
55
+ ```bash
56
+ export CLICKHOUSE_HOST=localhost
57
+ export CLICKHOUSE_PORT=8123
58
+ export CLICKHOUSE_USER=default
59
+ export CLICKHOUSE_PASSWORD=
60
+ ```
61
+
62
+ Or use flags: `chops --host myserver --user admin health summary`
63
+
64
+ ## Commands
65
+
66
+ ### Health & Observability
67
+
68
+ | Command | Description |
69
+ |---------|-------------|
70
+ | `chops health summary` | Cluster overview: version, uptime, databases, tables, parts, merges, queries |
71
+ | `chops health table-sizes` | Disk usage by table, sorted by size |
72
+ | `chops health slow-queries` | Top N slowest queries from query log |
73
+ | `chops health merges` | Currently running merge operations |
74
+ | `chops health running-queries` | Active queries with elapsed time and memory |
75
+
76
+ ### Data Quality
77
+
78
+ | Command | Description |
79
+ |---------|-------------|
80
+ | `chops dq profile <table>` | Column-level profiling: null rates, cardinality, min/max |
81
+ | `chops dq check <table>` | Run quality checks with configurable thresholds (CI-friendly exit codes) |
82
+ | `chops dq freshness <table>` | Time since last row — OK/WARNING/CRITICAL with exit codes |
83
+
84
+ ## Examples
85
+
86
+ ```bash
87
+ # Quick cluster health check
88
+ chops health summary
89
+
90
+ # Find which tables are eating disk
91
+ chops health table-sizes --limit 10
92
+
93
+ # Slowest queries in the last 6 hours
94
+ chops health slow-queries --hours 6
95
+
96
+ # Profile a table's data quality
97
+ chops dq profile mydb.events
98
+
99
+ # Run quality checks in CI (non-zero exit on failure)
100
+ chops dq check mydb.events --max-null-pct 5 --min-rows 1000
101
+
102
+ # Check if a streaming table is still receiving data
103
+ chops dq freshness mydb.events --warn 60 --critical 1440
104
+
105
+ # JSON output for automation
106
+ chops dq profile mydb.events --output json
107
+ ```
108
+
109
+ ## CI/CD Integration
110
+
111
+ `chops dq check` and `chops dq freshness` return non-zero exit codes on failure, making them usable in CI pipelines:
112
+
113
+ ```yaml
114
+ - name: Data quality gate
115
+ run: |
116
+ chops dq check production.orders --max-null-pct 2 --min-rows 10000
117
+ chops dq freshness production.orders --warn 30 --critical 120
118
+ ```
119
+
120
+ ## Development
121
+
122
+ ```bash
123
+ git clone https://github.com/antonio-mello-ai/chops.git
124
+ cd chops
125
+ python -m venv .venv
126
+ source .venv/bin/activate
127
+ pip install -e ".[dev]"
128
+
129
+ # Run tests
130
+ pytest
131
+
132
+ # Lint
133
+ ruff check src/ tests/
134
+
135
+ # Type check
136
+ mypy src/
137
+ ```
138
+
139
+ ## License
140
+
141
+ MIT
@@ -0,0 +1,11 @@
1
+ chops/__init__.py,sha256=b5hTDGHQP8zhs3wtEtBryS43CeIdoRzOYx5aAP-fEds,66
2
+ chops/cli.py,sha256=FoXpYsRrnx3EV8u-RJpN4w_YmedCaXu1R0UW88FJvwg,1665
3
+ chops/client.py,sha256=NN8G_U_RP_0I36iEoEvbynKfUIoBSCwNcLu-3p3vuow,1437
4
+ chops/commands/__init__.py,sha256=Eoi6lnn2eFRky6Zttu4A3M6oxqQsKrBnRv0hHzUz-wI,26
5
+ chops/commands/dq.py,sha256=h_fCkAFbl9bO0uWxJg9ga6xhRJXHgh6zAX1UwRfTHvM,10757
6
+ chops/commands/health.py,sha256=jEbBirRfr2JggEfyWsuK2-hnSexmNVQglRXNzPcEHuE,9011
7
+ chops-0.1.0.dist-info/METADATA,sha256=QVeQepqRYDv4PE_5C4tq9ZGt6uwPBm4ZwIHS-ElcfdM,4082
8
+ chops-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
9
+ chops-0.1.0.dist-info/entry_points.txt,sha256=Gyn8uepwVskbK_q7XQg804URgD1g6yqmwlaf2k-YTfw,40
10
+ chops-0.1.0.dist-info/licenses/LICENSE,sha256=cTwIz6AWGa-AZ2FGm1UhQERg3Z1Rc46T8ci6XfTnrGo,1070
11
+ chops-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ chops = chops.cli:app
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Antonio Mello
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.