mcp-hydrolix 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcp_hydrolix/log/log.yaml CHANGED
@@ -38,3 +38,7 @@ loggers:
38
38
  handlers: [ default ]
39
39
  level: INFO
40
40
  propagate: false
41
+ fastmcp:
42
+ handlers: [ default ]
43
+ level: INFO
44
+ propagate: false
mcp_hydrolix/main.py CHANGED
@@ -50,6 +50,7 @@ def main():
50
50
  host=config.mcp_bind_host,
51
51
  port=config.mcp_bind_port,
52
52
  uvicorn_config={"log_config": log_dict_config},
53
+ stateless_http=True,
53
54
  )
54
55
  else:
55
56
  log_dict_config = setup_logging(None, "INFO", "json")
@@ -70,6 +71,9 @@ def main():
70
71
  ).run()
71
72
  else:
72
73
  # For stdio transport, no host or port is needed
74
+ log_dict_config = setup_logging(None, "INFO", "json")
75
+ if log_dict_config:
76
+ lconfig.dictConfig(log_dict_config)
73
77
  mcp.run(transport=transport)
74
78
 
75
79
 
@@ -1,8 +1,7 @@
1
- import json
2
1
  import logging
2
+ import re
3
3
  import signal
4
4
  from collections.abc import Sequence
5
- from dataclasses import asdict, is_dataclass
6
5
  from typing import Any, Final, Optional, List, cast, TypedDict
7
6
 
8
7
  import clickhouse_connect
@@ -13,6 +12,7 @@ from dotenv import load_dotenv
13
12
  from fastmcp import FastMCP
14
13
  from fastmcp.exceptions import ToolError
15
14
  from fastmcp.server.dependencies import get_access_token
15
+ from jwt import DecodeError
16
16
  from pydantic import Field
17
17
  from pydantic.dataclasses import dataclass
18
18
  from starlette.requests import Request
@@ -31,6 +31,8 @@ from mcp_hydrolix.utils import with_serializer
31
31
 
32
32
  @dataclass
33
33
  class Column:
34
+ """Column with enriched metadata: column_category, base_function, merge_function."""
35
+
34
36
  database: str
35
37
  table: str
36
38
  name: str
@@ -38,17 +40,18 @@ class Column:
38
40
  default_kind: Optional[str]
39
41
  default_expression: Optional[str]
40
42
  comment: Optional[str]
43
+ column_category: Optional[str] = None # 'aggregate', 'alias_aggregate', 'dimension'
44
+ base_function: Optional[str] = None
45
+ merge_function: Optional[str] = None
41
46
 
42
47
 
43
48
  @dataclass
44
49
  class Table:
50
+ """Table with summary table detection (is_summary_table=True if has aggregate columns)."""
51
+
45
52
  database: str
46
53
  name: str
47
54
  engine: str
48
- create_table_query: str
49
- dependencies_database: List[str]
50
- dependencies_table: List[str]
51
- engine_full: str
52
55
  sorting_key: str
53
56
  primary_key: str
54
57
  total_rows: Optional[int]
@@ -56,17 +59,25 @@ class Table:
56
59
  total_bytes_uncompressed: Optional[int]
57
60
  parts: Optional[int]
58
61
  active_parts: Optional[int]
59
- total_marks: Optional[int]
60
- columns: Optional[List[Column]] = Field([])
61
- comment: Optional[str] = None
62
+ columns: Optional[List[Column]] = Field(default_factory=list)
63
+ is_summary_table: bool = False
64
+ summary_table_info: Optional[str] = None
62
65
 
63
66
 
64
- @dataclass
65
67
  class HdxQueryResult(TypedDict):
66
68
  columns: List[str]
67
69
  rows: List[List[Any]]
68
70
 
69
71
 
72
+ @dataclass
73
+ class TableClassification:
74
+ """Result of table column classification."""
75
+
76
+ is_summary_table: bool
77
+ aggregate_columns: List[Column]
78
+ dimension_columns: List[Column]
79
+
80
+
70
81
  MCP_SERVER_NAME = "mcp-hydrolix"
71
82
  logger = logging.getLogger(MCP_SERVER_NAME)
72
83
 
@@ -76,14 +87,17 @@ HYDROLIX_CONFIG: Final[HydrolixConfig] = get_config()
76
87
 
77
88
  mcp = FastMCP(
78
89
  name=MCP_SERVER_NAME,
79
- auth=HydrolixCredentialChain(f"https://{HYDROLIX_CONFIG.host}/config"),
90
+ auth=HydrolixCredentialChain(None),
80
91
  )
81
92
 
82
93
 
83
94
  def get_request_credential() -> Optional[HydrolixCredential]:
84
95
  if (token := get_access_token()) is not None:
85
96
  if isinstance(token, AccessToken):
86
- return token.as_credential()
97
+ try:
98
+ return token.as_credential()
99
+ except DecodeError:
100
+ raise ValueError("The provided access token is invalid.")
87
101
  else:
88
102
  raise ValueError(
89
103
  "Found non-hydrolix access token on request -- this should be impossible!"
@@ -127,7 +141,23 @@ async def create_hydrolix_client(pool_mgr, request_credential: Optional[Hydrolix
127
141
  # allow custom hydrolix settings in CH client
128
142
  common.set_setting("invalid_setting_action", "send")
129
143
  common.set_setting("autogenerate_session_id", False)
130
- client_shared_pool = httputil.get_pool_manager(maxsize=HYDROLIX_CONFIG.query_pool_size, num_pools=1)
144
+
145
+ pool_kwargs: dict[str, Any] = {
146
+ "maxsize": HYDROLIX_CONFIG.query_pool_size,
147
+ "num_pools": 1,
148
+ "verify": HYDROLIX_CONFIG.verify,
149
+ }
150
+
151
+ # When verify=True, use certifi CA bundle for SSL verification
152
+ # This ensures we trust modern CAs like Let's Encrypt
153
+ if HYDROLIX_CONFIG.verify:
154
+ pool_kwargs["ca_cert"] = "certifi"
155
+ else:
156
+ import urllib3
157
+
158
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
159
+
160
+ client_shared_pool = httputil.get_pool_manager(**pool_kwargs)
131
161
 
132
162
 
133
163
  def term(*args, **kwargs):
@@ -168,7 +198,7 @@ async def execute_cmd(query: str):
168
198
  client_shared_pool, get_request_credential()
169
199
  ) as client:
170
200
  res = await client.command(query)
171
- logger.info("Command returned executed.")
201
+ logger.info("Command executed successfully.")
172
202
  return res
173
203
  except Exception as err:
174
204
  logger.error(f"Error executing command: {err}")
@@ -197,22 +227,178 @@ def result_to_table(query_columns, result) -> List[Table]:
197
227
  return [Table(**dict(zip(query_columns, row))) for row in result]
198
228
 
199
229
 
200
- def result_to_column(query_columns, result) -> List[Column]:
201
- return [Column(**dict(zip(query_columns, row))) for row in result]
230
+ # system.tables query fields for fetching table metadata
231
+ SYSTEM_TABLES_FIELDS = """database, name, engine, sorting_key, primary_key, total_rows, total_bytes,
232
+ total_bytes_uncompressed, parts, active_parts"""
233
+
234
+
235
+ # Summary Table Support - Helper Functions
236
+
237
+
238
+ def extract_function_from_type(column_type: str) -> Optional[str]:
239
+ """
240
+ Extract aggregate function name from AggregateFunction type.
241
+ Examples:
242
+ "AggregateFunction(count, String)" -> "count"
243
+ "AggregateFunction(sumIf, Float64)" -> "sumIf"
244
+ "AggregateFunction(quantile(0.5), DateTime)" -> "quantile(0.5)"
245
+ "AggregateFunction(exponentialMovingAverage(0.5), UInt32)" -> "exponentialMovingAverage(0.5)"
246
+ "SimpleAggregateFunction(sum, Int64)" -> "sum"
247
+ "String" -> None
248
+ """
249
+ # Match everything from AggregateFunction( up to the comma that separates function from types
250
+ # This captures function names with parameters like quantile(0.5) or quantile(0.5, 0.9)
251
+ # Pattern: function_name or function_name(params) where params can contain commas
252
+ match = re.match(r"^(?:Simple)?AggregateFunction\(([^,()]+(?:\([^)]*\))?)", column_type)
253
+ if match:
254
+ return match.group(1).strip()
255
+ return None
256
+
257
+
258
+ def get_merge_function(base_function: str) -> str:
259
+ """
260
+ Generate -Merge function name from base function.
261
+ For parameterized functions, parameters go AFTER "Merge":
262
+ count -> countMerge
263
+ countIf -> countIfMerge
264
+ quantile(0.5) -> quantileMerge(0.5)
265
+ exponentialMovingAverage(0.5) -> exponentialMovingAverageMerge(0.5)
266
+ """
267
+ # Check if function has parameters
268
+ match = re.match(r"^(\w+)(\(.+\))$", base_function)
269
+ if match:
270
+ # Parameterized: quantile(0.5) -> quantileMerge(0.5)
271
+ func_name = match.group(1)
272
+ params = match.group(2)
273
+ return f"{func_name}Merge{params}"
274
+ else:
275
+ # Non-parameterized: count -> countMerge
276
+ return f"{base_function}Merge"
277
+
278
+
279
+ def classify_table_columns(columns: List[Column]) -> TableClassification:
280
+ """
281
+ Classify columns and determine if table is a summary table (has any aggregate columns).
282
+ Requires columns to be enriched first via enrich_column_metadata().
283
+ """
284
+ aggregate_columns = []
285
+ dimension_columns = []
286
+
287
+ for column in columns:
288
+ if column.column_category in ("aggregate", "alias_aggregate"):
289
+ aggregate_columns.append(column)
290
+ else:
291
+ dimension_columns.append(column)
292
+
293
+ return TableClassification(
294
+ is_summary_table=len(aggregate_columns) > 0,
295
+ aggregate_columns=aggregate_columns,
296
+ dimension_columns=dimension_columns,
297
+ )
298
+
299
+
300
+ def enrich_column_metadata(column: Column) -> Column:
301
+ """
302
+ Classify column as aggregate, alias_aggregate, or dimension and populate metadata.
303
+ Sets column_category, base_function, and merge_function fields.
304
+
305
+ Detection strategy:
306
+ 1. Check column_type for AggregateFunction/SimpleAggregateFunction (primary method)
307
+ 2. Check if ALIAS wrapping a -Merge function (for user-friendly shortcuts)
308
+ 3. Everything else is a dimension
309
+
310
+ Note: In real ClickHouse summary tables, aggregate columns ALWAYS have
311
+ AggregateFunction or SimpleAggregateFunction types.
312
+ """
313
+
314
+ type_func = extract_function_from_type(column.column_type)
315
+ if type_func:
316
+ column.column_category = "aggregate"
317
+ column.base_function = type_func
318
+ column.merge_function = get_merge_function(type_func)
319
+ elif (
320
+ column.default_kind == "ALIAS"
321
+ and column.default_expression
322
+ and "Merge(" in column.default_expression
323
+ ):
324
+ column.column_category = "alias_aggregate"
325
+ column.base_function = None
326
+ column.merge_function = None
327
+ # Everything else is a dimension
328
+ else:
329
+ column.column_category = "dimension"
330
+
331
+ return column
202
332
 
203
333
 
204
- def to_json(obj: Any) -> str:
205
- # This function technically returns different types:
206
- # - str for dataclasses (the primary use case)
207
- # - list/dict/Any for recursive processing during serialization
208
- # Type checking is suppressed for non-str returns as they're only used internally by json.dumps
209
- if is_dataclass(obj):
210
- return json.dumps(asdict(obj), default=to_json)
211
- elif isinstance(obj, list):
212
- return [to_json(item) for item in obj] # type: ignore[return-value]
213
- elif isinstance(obj, dict):
214
- return {key: to_json(value) for key, value in obj.items()} # type: ignore[return-value]
215
- return obj # type: ignore[return-value]
334
+ async def _populate_table_metadata(database: str, table: Table) -> None:
335
+ """Fetch and populate table with column metadata from Hydrolix.
336
+
337
+ Args:
338
+ database: Database name
339
+ table: Table object to enrich with column metadata
340
+ """
341
+ # Use DESCRIBE TABLE instead of system.columns to get full AggregateFunction types
342
+ # system.columns returns simplified types (like "String") but DESCRIBE returns full types
343
+ # ("AggregateFunction(count, Nullable(String))")
344
+ # Use backticks for identifiers, not format_query_value which adds quotes for VALUES
345
+ column_data_query = f"DESCRIBE TABLE `{database}`.`{table.name}`"
346
+ column_data_query_result = await execute_query(column_data_query)
347
+
348
+ # DESCRIBE TABLE returns: name, type, default_type, default_expression, comment, ...
349
+ # Transform results to Column objects, mapping DESCRIBE TABLE fields to Column dataclass fields
350
+ column_names = column_data_query_result["columns"]
351
+ columns = [
352
+ Column(
353
+ database=database,
354
+ table=table.name,
355
+ name=row_dict.get("name", ""),
356
+ column_type=row_dict.get("type", ""),
357
+ default_kind=row_dict.get("default_type", ""),
358
+ default_expression=row_dict.get("default_expression", ""),
359
+ comment=row_dict.get("comment", ""),
360
+ )
361
+ for row_dict in (dict(zip(column_names, row)) for row in column_data_query_result["rows"])
362
+ ]
363
+
364
+ # Summary Table Support: Enrich column metadata
365
+ # For each column, detect if it's an aggregate, alias_aggregate, or dimension
366
+ # and populate column_category, base_function, and merge_function fields.
367
+ enriched_columns = [enrich_column_metadata(col) for col in columns]
368
+
369
+ # Classify table based on enriched column metadata
370
+ # A table is a summary table if it has ANY aggregate columns
371
+ classification = classify_table_columns(enriched_columns)
372
+ is_summary_table = classification.is_summary_table
373
+
374
+ # Add human-readable usage guidance for LLMs querying summary tables
375
+ summary_table_info = None
376
+ if is_summary_table:
377
+ num_agg = len(classification.aggregate_columns)
378
+ num_dim = len(classification.dimension_columns)
379
+ summary_table_info = (
380
+ f"This is a SUMMARY TABLE with {num_agg} aggregate column(s) and {num_dim} dimension column(s). "
381
+ "Aggregate columns (column_category='aggregate') MUST be wrapped in their corresponding -Merge functions. "
382
+ "ALIAS aggregate columns (column_category='alias_aggregate') are pre-wrapped aggregates - use directly without -Merge. "
383
+ "Dimension columns (column_category='dimension') can be SELECTed directly and MUST appear in GROUP BY when mixed with aggregates. "
384
+ "IMPORTANT: Dimension columns may have function-like names (e.g., 'toStartOfHour(col)') - these are LITERAL column names, use them exactly as-is with backticks. "
385
+ "WRONG: SELECT toStartOfHour(col). RIGHT: SELECT `toStartOfHour(col)`. Also use in GROUP BY: GROUP BY `toStartOfHour(col)`. "
386
+ "CRITICAL RULE: If your SELECT includes ANY dimension columns (column_category='dimension') "
387
+ "AND ANY aggregate columns (column_category='aggregate' or 'alias_aggregate'), "
388
+ "you MUST include 'GROUP BY <all dimension columns from SELECT>'. "
389
+ "WITHOUT GROUP BY, the query will FAIL with 'NOT_AN_AGGREGATE' error. "
390
+ "IMPORTANT: ALIAS aggregates (column_category='alias_aggregate') are NOT dimensions - do NOT include them in GROUP BY. "
391
+ "Example: SELECT reqHost, cnt_all FROM table GROUP BY reqHost (reqHost=dimension, cnt_all=alias_aggregate). "
392
+ "CRITICAL: You MUST use the EXACT merge_function value from each aggregate column's metadata. "
393
+ "DO NOT infer the merge function from the column name - always check the merge_function field. "
394
+ "For example, if column `avgIf(col, condition)` has merge_function='avgIfMerge', "
395
+ "you MUST use avgIfMerge(`avgIf(col, condition)`), NOT avgMerge(...)."
396
+ )
397
+
398
+ # Populate table object with metadata
399
+ table.columns = enriched_columns
400
+ table.is_summary_table = is_summary_table
401
+ table.summary_table_info = summary_table_info
216
402
 
217
403
 
218
404
  @mcp.tool()
@@ -231,17 +417,83 @@ async def list_databases() -> List[str]:
231
417
  return databases
232
418
 
233
419
 
420
+ @mcp.tool()
421
+ async def get_table_info(database: str, table: str) -> Table:
422
+ """Get detailed metadata for a specific table including columns and summary table detection.
423
+
424
+ REQUIRED USAGE: Call this tool BEFORE querying ANY table to check if it's a summary
425
+ table and get column metadata. This is mandatory to avoid query errors.
426
+
427
+ This tool provides:
428
+ - is_summary_table: Boolean indicating if table has pre-aggregated data
429
+ - columns: List of columns with metadata:
430
+ - column_category: 'aggregate', 'alias_aggregate', or 'dimension'
431
+ - merge_function: Exact -Merge function name for aggregate columns (e.g., "countMerge")
432
+ - column_type: ClickHouse data type
433
+ - default_expression: For ALIAS columns, shows the underlying expression
434
+ - summary_table_info: Human-readable description for summary tables
435
+ - row_count, total_bytes: Table statistics
436
+
437
+ WORKFLOW for querying tables:
438
+ 1. Call get_table_info('database', 'table_name')
439
+ 2. Check is_summary_table field
440
+ 3. If is_summary_table=True:
441
+ - Read column_category and merge_function for each column
442
+ - Use merge_function to wrap aggregate columns in queries
443
+ - Example: SELECT countMerge(`count(vendor_id)`) FROM table
444
+ 4. If is_summary_table=False:
445
+ - Use standard SQL (SELECT count(*), sum(col), etc.)
446
+ 5. Execute query with run_select_query
447
+
448
+ For summary tables, aggregate columns MUST be wrapped with their corresponding -Merge functions
449
+ from the merge_function field. Querying without checking this metadata first will cause errors.
450
+ """
451
+ # Fetch table metadata (row counts, sizes, etc.)
452
+ query = f"""
453
+ SELECT {SYSTEM_TABLES_FIELDS}
454
+ FROM system.tables
455
+ WHERE database = {format_query_value(database)} AND name = {format_query_value(table)}"""
456
+
457
+ result = await execute_query(query)
458
+
459
+ if not result["rows"]:
460
+ raise ToolError(f"Table {database}.{table} not found")
461
+
462
+ # Create Table object from first (and only) row
463
+ tables = result_to_table(result["columns"], result["rows"])
464
+ table_obj = tables[0]
465
+
466
+ # Populate table with column metadata
467
+ await _populate_table_metadata(database, table_obj)
468
+
469
+ return table_obj
470
+
471
+
234
472
  @mcp.tool()
235
473
  async def list_tables(
236
474
  database: str, like: Optional[str] = None, not_like: Optional[str] = None
237
475
  ) -> List[Table]:
238
- """List available Hydrolix tables in a database, including schema, comment,
239
- row count, and column count."""
476
+ """List all tables in a database for exploration and discovery.
477
+
478
+ Use this tool to:
479
+ - Discover what tables exist in a database
480
+ - Filter tables by name pattern (like/not_like)
481
+ - Get overview of table metadata (engine, row counts, etc.)
482
+ - Identify which tables are summary tables (is_summary_table field)
483
+ - Get complete column metadata including merge_function for aggregates
484
+
485
+ Returns complete table information including columns and summary table detection
486
+ (same metadata as get_table_info but for all tables in the database).
487
+
488
+ NOTE: If you already know which specific table you want to query, use
489
+ get_table_info(database, table) instead - it's faster and returns metadata
490
+ for just that one table.
491
+
492
+ BEFORE querying any table from the results, check is_summary_table and column
493
+ metadata to build correct queries."""
240
494
  logger.info(f"Listing tables in database '{database}'")
241
495
  query = f"""
242
- SELECT database, name, engine, create_table_query, dependencies_database,
243
- dependencies_table, engine_full, sorting_key, primary_key, total_rows, total_bytes,
244
- total_bytes_uncompressed, parts, active_parts, total_marks, comment
496
+ SELECT {SYSTEM_TABLES_FIELDS}
245
497
  FROM system.tables WHERE database = {format_query_value(database)}"""
246
498
  if like:
247
499
  query += f" AND name LIKE {format_query_value(like)}"
@@ -254,19 +506,9 @@ async def list_tables(
254
506
  # Deserialize result as Table dataclass instances
255
507
  tables = result_to_table(result["columns"], result["rows"])
256
508
 
509
+ # Populate each table with column metadata
257
510
  for table in tables:
258
- column_data_query = f"""
259
- SELECT database, table, name, type AS column_type, default_kind, default_expression, comment
260
- FROM system.columns
261
- WHERE database = {format_query_value(database)} AND table = {format_query_value(table.name)}"""
262
- column_data_query_result = await execute_query(column_data_query)
263
- table.columns = [
264
- c
265
- for c in result_to_column(
266
- column_data_query_result["columns"],
267
- column_data_query_result["rows"],
268
- )
269
- ]
511
+ await _populate_table_metadata(database, table)
270
512
 
271
513
  logger.info(f"Found {len(tables)} tables")
272
514
  return tables
@@ -278,6 +520,29 @@ async def run_select_query(query: str) -> dict[str, tuple | Sequence[str | Seque
278
520
  """Run a SELECT query in a Hydrolix time-series database using the Clickhouse SQL dialect.
279
521
  Queries run using this tool will timeout after 30 seconds.
280
522
 
523
+ MANDATORY PRE-QUERY CHECK - DO THIS FIRST BEFORE EVERY QUERY:
524
+
525
+ BEFORE running ANY query on a table, you MUST call get_table_info(database, table_name)
526
+ to check if it's a summary table and get column metadata.
527
+
528
+ WHY: Summary tables require special -Merge functions for aggregate columns. Querying
529
+ without checking metadata first will cause:
530
+ - "Nested aggregate function" errors (if you use sum/count/avg instead of -Merge)
531
+ - "Cannot read AggregateFunction" errors (if you SELECT aggregate columns directly)
532
+ - Wrong results (if you treat aggregate columns as regular values)
533
+
534
+ REQUIRED WORKFLOW (follow this order every time):
535
+
536
+ 1. FIRST: Call get_table_info('database', 'table_name')
537
+ - Check is_summary_table field
538
+ - Read column metadata (column_category, merge_function for each column)
539
+
540
+ 2. THEN: Build query based on metadata
541
+ - If is_summary_table=False: use standard SQL (count, sum, avg, etc.)
542
+ - If is_summary_table=True: follow summary table rules below
543
+
544
+ Do NOT skip step 1. Do NOT assume a table is regular/summary without checking.
545
+
281
546
  The primary key on tables queried this way is always a timestamp. Queries should include either
282
547
  a LIMIT clause or a filter based on the primary key as a performance guard to ensure they return
283
548
  in a reasonable amount of time. Queries should select specific fields and avoid the use of
@@ -292,6 +557,127 @@ async def run_select_query(query: str) -> dict[str, tuple | Sequence[str | Seque
292
557
  full-text search whenever possible. When searching for substrings, the syntax `column LIKE
293
558
  '%suffix'` or `column LIKE 'prefix%'` should be used.
294
559
 
560
+ SUMMARY TABLE RULES (only apply if is_summary_table=True from get_table_info):
561
+
562
+ Summary tables contain pre-computed aggregations stored in aggregate function state columns.
563
+ These tables are identified by having columns with aggregate function names like count(...),
564
+ sum(...), avg(...), countIf(...), sumIf(...), etc.
565
+
566
+ CRITICAL RULES for querying summary tables:
567
+
568
+ 1. Raw aggregate columns (column_category='aggregate') CANNOT be SELECTed directly
569
+ - They store binary AggregateFunction states, not readable values
570
+ - Direct SELECT will cause deserialization errors
571
+ - MUST be wrapped in their -Merge function from get_table_info:
572
+ - count(vendor_id) → countMerge(`count(vendor_id)`)
573
+ - sum(bytes_out) → sumMerge(`sum(bytes_out)`)
574
+ - avg(latitude) → avgMerge(`avg(latitude)`)
575
+ - countIf(condition) → countIfMerge(`countIf(condition)`)
576
+ - ALWAYS check column.merge_function in get_table_info to get the exact function name
577
+ - Use backticks around column names with special characters
578
+
579
+ 2. Do NOT use standard aggregate functions (sum/count/avg) on summary table columns
580
+ - WRONG: SELECT sum(count_column) FROM summary_table
581
+ (causes "nested aggregate function" error)
582
+ - RIGHT: SELECT countMerge(`count_column`) FROM summary_table
583
+ (uses the merge function from column metadata)
584
+
585
+ 3. ALIAS aggregate columns (column_category='alias_aggregate') use directly:
586
+ - These are pre-defined shortcuts that already wrap -Merge functions
587
+ - Example: cnt_all (which is defined as ALIAS countMerge(`count()`))
588
+ - SELECT cnt_all directly, NO additional wrapping needed
589
+ - These make queries simpler and more readable
590
+
591
+ 4. Dimension columns (column_category='dimension') - use as-is with backticks:
592
+ - Reference them exactly as listed in column metadata
593
+ - Many have function-like names (e.g., `toStartOfMinute(primary_datetime)`)
594
+ - These are LITERAL column names, not expressions to compute
595
+ - WRONG: SELECT toStartOfMinute(primary_datetime) (tries to call function on non-existent base column)
596
+ - RIGHT: SELECT `toStartOfMinute(primary_datetime)` (selects the actual dimension column)
597
+ - Always use backticks for columns with special characters
598
+ - Can be used in SELECT, WHERE, GROUP BY, ORDER BY
599
+ - For time dimensions in WHERE clauses:
600
+ * Use simple date format: '2022-06-01' (preferred)
601
+ * Use full timestamp: '2022-06-01 00:00:00' (with seconds)
602
+ * Do NOT use partial time: '2022-06-01 00:00' (causes parse errors)
603
+ * Use >= and < for ranges: WHERE col >= '2022-06-01' AND col < '2022-06-02'
604
+
605
+ 5. CRITICAL: When mixing dimensions and aggregates in SELECT, you MUST use GROUP BY:
606
+ - SELECT only aggregates → no GROUP BY needed (aggregates entire table)
607
+ Example: SELECT count_vendor_id FROM table
608
+ - SELECT dimensions + aggregates → MUST GROUP BY all dimension columns
609
+ Example: SELECT pickup_dt, count_vendor_id FROM table GROUP BY pickup_dt
610
+ - Forgetting GROUP BY causes error: "Column X is not under aggregate function and not in GROUP BY"
611
+
612
+ 6. NEVER use SELECT * on summary tables (will cause deserialization errors)
613
+
614
+ 7. Aggregate columns can ONLY appear in SELECT:
615
+ - Raw aggregates: wrapped with -Merge (see column.merge_function)
616
+ - Alias aggregates: used directly
617
+ - NEVER in GROUP BY (use dimension columns only)
618
+
619
+ Summary table query patterns (after calling get_table_info first):
620
+
621
+ Pattern 1: Aggregate entire table
622
+ -- First: get_table_info('database', 'summary_table')
623
+ -- Read column.merge_function for count(column_name) = "countMerge"
624
+ SELECT countMerge(`count(column_name)`) as total FROM database.summary_table
625
+
626
+ Pattern 2: Aggregate with grouping by dimension
627
+ -- First: get_table_info('database', 'summary_table')
628
+ -- Read merge_function for each aggregate column
629
+ SELECT time_bucket_column,
630
+ countMerge(`count(column_name)`) as total,
631
+ avgMerge(`avg(other_column)`) as avg_value
632
+ FROM database.summary_table
633
+ GROUP BY time_bucket_column
634
+
635
+ Pattern 2b: Grouping with time range filter
636
+ -- First: get_table_info('database', 'summary_table')
637
+ SELECT `toStartOfMinute(datetime_field)` as time_bucket,
638
+ countMerge(`count(column)`) as total
639
+ FROM database.summary_table
640
+ WHERE `toStartOfMinute(datetime_field)` >= '2022-06-01'
641
+ AND `toStartOfMinute(datetime_field)` < '2022-06-02'
642
+ GROUP BY `toStartOfMinute(datetime_field)`
643
+ ORDER BY time_bucket DESC
644
+
645
+ Pattern 3: Multiple aggregates (no dimensions, no GROUP BY)
646
+ -- First: get_table_info('database', 'summary_table')
647
+ SELECT countMerge(`count(column_name)`) as count_result,
648
+ sumMerge(`sum(other_column)`) as sum_result
649
+ FROM database.summary_table
650
+
651
+ Pattern 4: Using ALIAS aggregate columns (no dimensions, no GROUP BY)
652
+ -- First: get_table_info('database', 'summary_table')
653
+ -- Check which columns have column_category='alias_aggregate'
654
+ SELECT cnt_all, sum_bytes, avg_value FROM database.summary_table
655
+ -- No -Merge needed, these are pre-defined aliases
656
+
657
+ Pattern 5: ALIAS aggregates with dimensions (requires GROUP BY)
658
+ -- First: get_table_info('database', 'summary_table')
659
+ SELECT time_dimension,
660
+ cnt_all,
661
+ avg_value
662
+ FROM database.summary_table
663
+ GROUP BY time_dimension
664
+ -- MUST include GROUP BY when mixing dimensions and aggregates
665
+
666
+ Pattern 6: Using dimensions with function-like names (common pattern)
667
+ -- First: get_table_info('database', 'summary_table')
668
+ -- See dimension column named: toStartOfMinute(primary_datetime)
669
+ -- WRONG: SELECT toStartOfMinute(primary_datetime) ... (tries to call function)
670
+ -- RIGHT: Use the literal column name with backticks
671
+ SELECT `toStartOfMinute(primary_datetime)` as time_bucket,
672
+ countMerge(`count()`) as cnt,
673
+ maxMerge(`max(value)`) as max_val
674
+ FROM database.summary_table
675
+ GROUP BY `toStartOfMinute(primary_datetime)`
676
+ ORDER BY time_bucket DESC
677
+ LIMIT 10
678
+
679
+ Regular table examples (non-summary):
680
+
295
681
  Example query. Purpose: get logs from the `application.logs` table. Primary key: `timestamp`.
296
682
  Performance guard: 10 minute recency filter.
297
683
 
mcp_hydrolix/utils.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import inspect
2
2
  import ipaddress
3
3
  import json
4
- from datetime import datetime, time
4
+ from datetime import datetime, time, date
5
5
  from decimal import Decimal
6
6
  from functools import wraps
7
7
 
@@ -16,9 +16,9 @@ class ExtendedEncoder(json.JSONEncoder):
16
16
  if isinstance(obj, ipaddress.IPv4Address):
17
17
  return str(obj)
18
18
  if isinstance(obj, datetime):
19
- return obj.time()
20
- if isinstance(obj, time):
21
- return obj.hour * 3600 + obj.minute * 60 + obj.second + obj.microsecond / 1_000_000
19
+ return obj.timestamp()
20
+ if isinstance(obj, (date, time)):
21
+ return obj.isoformat()
22
22
  if isinstance(obj, bytes):
23
23
  return obj.decode()
24
24
  if isinstance(obj, Decimal):
@@ -1,11 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-hydrolix
3
- Version: 0.1.6
3
+ Version: 0.2.0
4
4
  Summary: An MCP server for Hydrolix.
5
5
  Project-URL: Home, https://github.com/hydrolix/mcp-hydrolix
6
6
  License-Expression: Apache-2.0
7
7
  License-File: LICENSE
8
8
  Requires-Python: >=3.13
9
+ Requires-Dist: certifi>=2026.1.4
9
10
  Requires-Dist: clickhouse-connect<0.11,>=0.10
10
11
  Requires-Dist: fastmcp<2.15,>=2.14
11
12
  Requires-Dist: gunicorn<24.0,>=23.0
@@ -296,7 +297,7 @@ Example `mcpServers` configuration connecting to a remote HTTP server with per-r
296
297
  {
297
298
  "mcpServers": {
298
299
  "mcp-hydrolix-remote": {
299
- "url": "http://my-hydrolix-mcp.example.com:8000/mcp?token=<service-account-token>"
300
+ "url": "https://my-hydrolix-mcp.example.com/mcp?token=<service-account-token>"
300
301
  }
301
302
  }
302
303
  }
@@ -1,17 +1,17 @@
1
1
  mcp_hydrolix/__init__.py,sha256=DnAQkvoFf_QhrDNFLOmn-nHlldPUgtdN33k3xJWthgc,225
2
- mcp_hydrolix/main.py,sha256=Q58yz9ykx0bilptGALXW_Lli0pR7wDNOPib34l1z8Sg,2760
2
+ mcp_hydrolix/main.py,sha256=W8A6EyGOcqXgkzlcl5dMId_-OenY5zXPYTOvEMl9nvE,2935
3
3
  mcp_hydrolix/mcp_env.py,sha256=For5l-G67ihJJbW4d4qpZNZvhxsIfT0AXGQsg8-3BMk,11533
4
- mcp_hydrolix/mcp_server.py,sha256=TSTKEaXoonNXvk65CD99eCFdIDY0pppO2Qr66SbCvUc,12402
5
- mcp_hydrolix/utils.py,sha256=fMGCsRa2DqlS2PMfIpD5VaHTbaxUkW7mvgArgVViXbs,2433
4
+ mcp_hydrolix/mcp_server.py,sha256=rn5cNVgEd6pJOVUcPvG300kvDUQSUb32aAoyOo2eKL0,30228
5
+ mcp_hydrolix/utils.py,sha256=G7t4lajZIsQOl_oOHUQyEqytsPJpN71WcLkv1cbxsJk,2391
6
6
  mcp_hydrolix/auth/__init__.py,sha256=Ui9pLq3Z5tH8X56T_SqACRLEU9zl1gmcONWif-GV1Ko,656
7
7
  mcp_hydrolix/auth/credentials.py,sha256=IK8w6TjNxS1K0LCKBt3xXOOI-0ogWCVAkiJuOzEJuJY,1915
8
8
  mcp_hydrolix/auth/mcp_providers.py,sha256=4lexSj6tqCgPb5GGbuG5_wIocvSvQbqx8CHNl9D6OCA,5194
9
9
  mcp_hydrolix/log/__init__.py,sha256=1K-ycdGrawELMLSBeiqE8bV3-SFJYOE0dD_U3PAP2QM,119
10
10
  mcp_hydrolix/log/log.py,sha256=6KX0oSz-BbCWUoPxbJED4sZBmbgCHa3KDrc5nYtdks4,1838
11
- mcp_hydrolix/log/log.yaml,sha256=ldw66lGkQjqyJ92gJqOtdP63T_3MSD_ndKU1p8Xegvs,978
11
+ mcp_hydrolix/log/log.yaml,sha256=uQEW_LYSur_C4h0wR_vaYOVKE0an9tXozFMpjeZS5V8,1052
12
12
  mcp_hydrolix/log/utils.py,sha256=gOnlo25-sGZydGJmr6T94Pb805RZ9LcZlLCRaVEuUv4,2099
13
- mcp_hydrolix-0.1.6.dist-info/METADATA,sha256=FDfNexRS-g7bFLcjaGv-H5PlJ6erx7_4V-KTvnedxUA,11096
14
- mcp_hydrolix-0.1.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
15
- mcp_hydrolix-0.1.6.dist-info/entry_points.txt,sha256=vHa7F2rOCVu8lpsqR8BYbE1w8ugJSOYwX95w802Y5qE,56
16
- mcp_hydrolix-0.1.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
17
- mcp_hydrolix-0.1.6.dist-info/RECORD,,
13
+ mcp_hydrolix-0.2.0.dist-info/METADATA,sha256=SG_bTRbvQYhTtw2dC0vaQyfyYbIe2myb_Td8Uwhy4Ng,11125
14
+ mcp_hydrolix-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
15
+ mcp_hydrolix-0.2.0.dist-info/entry_points.txt,sha256=vHa7F2rOCVu8lpsqR8BYbE1w8ugJSOYwX95w802Y5qE,56
16
+ mcp_hydrolix-0.2.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
17
+ mcp_hydrolix-0.2.0.dist-info/RECORD,,