gapless-crypto-clickhouse 7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. gapless_crypto_clickhouse/__init__.py +147 -0
  2. gapless_crypto_clickhouse/__probe__.py +349 -0
  3. gapless_crypto_clickhouse/api.py +1032 -0
  4. gapless_crypto_clickhouse/clickhouse/__init__.py +17 -0
  5. gapless_crypto_clickhouse/clickhouse/config.py +119 -0
  6. gapless_crypto_clickhouse/clickhouse/connection.py +269 -0
  7. gapless_crypto_clickhouse/clickhouse/schema.sql +98 -0
  8. gapless_crypto_clickhouse/clickhouse/schema_validator.py +312 -0
  9. gapless_crypto_clickhouse/clickhouse_query.py +642 -0
  10. gapless_crypto_clickhouse/collectors/__init__.py +21 -0
  11. gapless_crypto_clickhouse/collectors/binance_public_data_collector.py +1994 -0
  12. gapless_crypto_clickhouse/collectors/clickhouse_bulk_loader.py +446 -0
  13. gapless_crypto_clickhouse/collectors/concurrent_collection_orchestrator.py +407 -0
  14. gapless_crypto_clickhouse/collectors/csv_format_detector.py +123 -0
  15. gapless_crypto_clickhouse/collectors/httpx_downloader.py +395 -0
  16. gapless_crypto_clickhouse/collectors/hybrid_url_generator.py +316 -0
  17. gapless_crypto_clickhouse/exceptions.py +145 -0
  18. gapless_crypto_clickhouse/gap_filling/__init__.py +1 -0
  19. gapless_crypto_clickhouse/gap_filling/safe_file_operations.py +439 -0
  20. gapless_crypto_clickhouse/gap_filling/universal_gap_filler.py +757 -0
  21. gapless_crypto_clickhouse/llms.txt +268 -0
  22. gapless_crypto_clickhouse/probe.py +235 -0
  23. gapless_crypto_clickhouse/py.typed +0 -0
  24. gapless_crypto_clickhouse/query_api.py +374 -0
  25. gapless_crypto_clickhouse/resume/__init__.py +12 -0
  26. gapless_crypto_clickhouse/resume/intelligent_checkpointing.py +383 -0
  27. gapless_crypto_clickhouse/utils/__init__.py +29 -0
  28. gapless_crypto_clickhouse/utils/error_handling.py +202 -0
  29. gapless_crypto_clickhouse/utils/etag_cache.py +194 -0
  30. gapless_crypto_clickhouse/utils/timeframe_constants.py +90 -0
  31. gapless_crypto_clickhouse/utils/timestamp_format_analyzer.py +256 -0
  32. gapless_crypto_clickhouse/utils/timestamp_utils.py +130 -0
  33. gapless_crypto_clickhouse/validation/__init__.py +36 -0
  34. gapless_crypto_clickhouse/validation/csv_validator.py +677 -0
  35. gapless_crypto_clickhouse/validation/models.py +220 -0
  36. gapless_crypto_clickhouse/validation/storage.py +502 -0
  37. gapless_crypto_clickhouse-7.1.0.dist-info/METADATA +1277 -0
  38. gapless_crypto_clickhouse-7.1.0.dist-info/RECORD +40 -0
  39. gapless_crypto_clickhouse-7.1.0.dist-info/WHEEL +4 -0
  40. gapless_crypto_clickhouse-7.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,17 @@
1
+ """
2
+ ClickHouse connection and schema management for gapless-crypto-data v4.0.0.
3
+
4
+ Provides ClickHouseConnection class for database operations using clickhouse-driver.
5
+ Replaces QuestDB implementation (ADR-0003) for future-proofing and ecosystem maturity.
6
+
7
+ Usage:
8
+ from gapless_crypto_clickhouse.clickhouse import ClickHouseConnection
9
+
10
+ with ClickHouseConnection() as conn:
11
+ conn.execute("SELECT 1")
12
+ """
13
+
14
+ from .config import ClickHouseConfig
15
+ from .connection import ClickHouseConnection
16
+
17
+ __all__ = ["ClickHouseConnection", "ClickHouseConfig"]
@@ -0,0 +1,119 @@
1
+ """
2
+ ClickHouse configuration for gapless-crypto-data v4.0.0.
3
+
4
+ Environment variable support for connection parameters.
5
+ Follows same pattern as QuestDBConfig (ADR-0003).
6
+
7
+ Error Handling: Raise and propagate (no fallback, no defaults for required params)
8
+ """
9
+
10
+ import os
11
+ from dataclasses import dataclass
12
+
13
+
14
+ @dataclass
15
+ class ClickHouseConfig:
16
+ """
17
+ ClickHouse connection configuration.
18
+
19
+ Attributes:
20
+ host: ClickHouse server hostname (default: localhost)
21
+ port: Native protocol port (default: 9000)
22
+ http_port: HTTP interface port (default: 8123)
23
+ database: Database name (default: default)
24
+ user: Username (default: default)
25
+ password: Password (default: empty)
26
+ secure: Enable TLS/SSL for secure connections (default: False, required for ClickHouse Cloud)
27
+
28
+ Environment Variables:
29
+ CLICKHOUSE_HOST: Override host
30
+ CLICKHOUSE_PORT: Override native protocol port
31
+ CLICKHOUSE_HTTP_PORT: Override HTTP port
32
+ CLICKHOUSE_DATABASE: Override database name
33
+ CLICKHOUSE_USER: Override username
34
+ CLICKHOUSE_PASSWORD: Override password
35
+ CLICKHOUSE_SECURE: Enable TLS/SSL (set to 'true' for ClickHouse Cloud)
36
+
37
+ Example:
38
+ # Default configuration (localhost)
39
+ config = ClickHouseConfig.from_env()
40
+
41
+ # Custom configuration
42
+ config = ClickHouseConfig(host="clickhouse.example.com", port=9000)
43
+ """
44
+
45
+ host: str = "localhost"
46
+ port: int = 9000
47
+ http_port: int = 8123
48
+ database: str = "default"
49
+ user: str = "default"
50
+ password: str = ""
51
+ secure: bool = False
52
+
53
+ @classmethod
54
+ def from_env(cls) -> "ClickHouseConfig":
55
+ """
56
+ Create configuration from environment variables.
57
+
58
+ Returns:
59
+ ClickHouseConfig with values from environment or defaults
60
+
61
+ Raises:
62
+ ValueError: If CLICKHOUSE_PORT is not a valid integer
63
+
64
+ Example:
65
+ export CLICKHOUSE_HOST=clickhouse.example.com
66
+ export CLICKHOUSE_PORT=9000
67
+ config = ClickHouseConfig.from_env()
68
+ """
69
+ try:
70
+ port = int(os.getenv("CLICKHOUSE_PORT", "9000"))
71
+ http_port = int(os.getenv("CLICKHOUSE_HTTP_PORT", "8123"))
72
+ except ValueError as e:
73
+ raise ValueError(
74
+ f"Invalid CLICKHOUSE_PORT or CLICKHOUSE_HTTP_PORT (must be integer): {e}"
75
+ ) from e
76
+
77
+ return cls(
78
+ host=os.getenv("CLICKHOUSE_HOST", "localhost"),
79
+ port=port,
80
+ http_port=http_port,
81
+ database=os.getenv("CLICKHOUSE_DATABASE", "default"),
82
+ user=os.getenv("CLICKHOUSE_USER", "default"),
83
+ password=os.getenv("CLICKHOUSE_PASSWORD", ""),
84
+ secure=os.getenv("CLICKHOUSE_SECURE", "false").lower() in ("true", "1", "yes"),
85
+ )
86
+
87
+ def validate(self) -> None:
88
+ """
89
+ Validate configuration parameters.
90
+
91
+ Raises:
92
+ ValueError: If any parameter is invalid
93
+
94
+ Example:
95
+ config = ClickHouseConfig(port=-1)
96
+ config.validate() # Raises ValueError
97
+ """
98
+ if not self.host:
99
+ raise ValueError("host cannot be empty")
100
+
101
+ if not (1 <= self.port <= 65535):
102
+ raise ValueError(f"port must be between 1 and 65535, got {self.port}")
103
+
104
+ if not (1 <= self.http_port <= 65535):
105
+ raise ValueError(f"http_port must be between 1 and 65535, got {self.http_port}")
106
+
107
+ if not self.database:
108
+ raise ValueError("database cannot be empty")
109
+
110
+ if not self.user:
111
+ raise ValueError("user cannot be empty")
112
+
113
+ def __repr__(self) -> str:
114
+ """String representation (hide password)."""
115
+ return (
116
+ f"ClickHouseConfig(host='{self.host}', port={self.port}, "
117
+ f"http_port={self.http_port}, database='{self.database}', "
118
+ f"user='{self.user}', password='***', secure={self.secure})"
119
+ )
@@ -0,0 +1,269 @@
1
+ """
2
+ ClickHouse connection management for gapless-crypto-clickhouse v6.0.0.
3
+
4
+ Provides context-managed connection to ClickHouse using clickhouse-connect with Apache Arrow support.
5
+ Replaces clickhouse-driver (ADR-0023) for 3x faster queries and 4x less memory.
6
+
7
+ Error Handling: Raise and propagate (no fallback, no retry, no silent failures)
8
+ SLOs: Availability (connection health checks), Correctness (query validation),
9
+ Observability (connection logging), Maintainability (standard HTTP client)
10
+
11
+ Usage:
12
+ from gapless_crypto_clickhouse.clickhouse import ClickHouseConnection
13
+
14
+ with ClickHouseConnection() as conn:
15
+ df = conn.query_dataframe("SELECT * FROM ohlcv FINAL LIMIT 10")
16
+ print(df) # pandas DataFrame with Arrow-optimized internals
17
+ """
18
+
19
+ import logging
20
+ from typing import Any, Dict, List, Optional, Tuple
21
+
22
+ import clickhouse_connect
23
+ import pandas as pd
24
+
25
+ from .config import ClickHouseConfig
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class ClickHouseConnection:
31
+ """
32
+ Context-managed ClickHouse connection with Apache Arrow support.
33
+
34
+ Provides execute() for queries and insert_dataframe() for bulk inserts.
35
+ Uses HTTP protocol (port 8123) with Apache Arrow for zero-copy DataFrame creation.
36
+
37
+ Attributes:
38
+ config: ClickHouse configuration
39
+ client: clickhouse-connect Client instance
40
+
41
+ Error Handling:
42
+ - Connection failures raise Exception
43
+ - Query failures raise Exception
44
+ - No retries, no fallbacks (raise and propagate policy)
45
+
46
+ Performance:
47
+ - Arrow-optimized queries: 3x faster DataFrame creation
48
+ - Zero-copy when possible: 4x less memory
49
+ - HTTP protocol: nginx/reverse proxy compatible
50
+
51
+ Example:
52
+ with ClickHouseConnection() as conn:
53
+ # Execute query (returns tuples)
54
+ result = conn.execute("SELECT COUNT(*) FROM ohlcv")
55
+
56
+ # Query DataFrame (Arrow-optimized internally)
57
+ df = conn.query_dataframe("SELECT * FROM ohlcv FINAL LIMIT 10")
58
+
59
+ # Insert DataFrame
60
+ df = pd.DataFrame({"col": [1, 2, 3]})
61
+ conn.insert_dataframe(df, "test_table")
62
+ """
63
+
64
+ def __init__(self, config: Optional[ClickHouseConfig] = None) -> None:
65
+ """
66
+ Initialize ClickHouse connection.
67
+
68
+ Args:
69
+ config: ClickHouse configuration (default: from environment)
70
+
71
+ Raises:
72
+ ValueError: If configuration is invalid
73
+ Exception: If connection fails
74
+
75
+ Example:
76
+ # Default configuration (localhost)
77
+ conn = ClickHouseConnection()
78
+
79
+ # Custom configuration
80
+ config = ClickHouseConfig(host="clickhouse.example.com")
81
+ conn = ClickHouseConnection(config)
82
+ """
83
+ self.config = config or ClickHouseConfig.from_env()
84
+ self.config.validate()
85
+
86
+ logger.info(
87
+ f"Initializing ClickHouse connection: {self.config.host}:{self.config.http_port} "
88
+ f"(HTTP protocol with Arrow support)"
89
+ )
90
+
91
+ try:
92
+ # clickhouse-connect uses HTTP protocol (port 8123 local, 8443 Cloud)
93
+ self.client = clickhouse_connect.get_client(
94
+ host=self.config.host,
95
+ port=self.config.http_port,
96
+ database=self.config.database,
97
+ username=self.config.user,
98
+ password=self.config.password,
99
+ secure=self.config.secure, # Enable TLS/SSL for ClickHouse Cloud
100
+ # Performance settings
101
+ settings={
102
+ "max_block_size": 100000, # Batch size for queries
103
+ },
104
+ )
105
+ except Exception as e:
106
+ raise Exception(
107
+ f"Failed to connect to ClickHouse at {self.config.host}:{self.config.http_port}: {e}"
108
+ ) from e
109
+
110
+ def __enter__(self) -> "ClickHouseConnection":
111
+ """Context manager entry with schema validation."""
112
+ if not self.health_check():
113
+ raise Exception("ClickHouse health check failed during context manager entry")
114
+
115
+ # Schema validation (ADR-0024)
116
+ from .schema_validator import SchemaValidationError, SchemaValidator
117
+
118
+ try:
119
+ validator = SchemaValidator(self)
120
+ validator.validate_schema()
121
+ logger.info("Schema validation passed")
122
+ except SchemaValidationError as e:
123
+ logger.error(f"Schema validation failed: {e}")
124
+ raise
125
+
126
+ logger.debug("ClickHouse connection opened")
127
+ return self
128
+
129
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
130
+ """Context manager exit (cleanup)."""
131
+ if self.client:
132
+ self.client.close()
133
+ logger.debug("ClickHouse connection closed")
134
+
135
+ def health_check(self) -> bool:
136
+ """
137
+ Verify ClickHouse connection is alive.
138
+
139
+ Returns:
140
+ True if connection is healthy, False otherwise
141
+
142
+ Example:
143
+ conn = ClickHouseConnection()
144
+ if conn.health_check():
145
+ print("Connection healthy")
146
+ """
147
+ try:
148
+ result = self.client.command("SELECT 1")
149
+ if result != 1:
150
+ logger.error(f"Health check failed: unexpected result {result}")
151
+ return False
152
+ logger.debug("ClickHouse health check passed")
153
+ return True
154
+ except Exception as e:
155
+ logger.error(f"ClickHouse health check failed: {e}")
156
+ return False
157
+
158
+ def execute(self, query: str, params: Optional[Dict[str, Any]] = None) -> List[Tuple[Any, ...]]:
159
+ """
160
+ Execute SQL query with parameter substitution.
161
+
162
+ Args:
163
+ query: SQL query string (use {name:Type} placeholders for clickhouse-connect)
164
+ params: Query parameters (dict mapping placeholder names to values)
165
+
166
+ Returns:
167
+ List of result tuples
168
+
169
+ Raises:
170
+ Exception: If query execution fails
171
+
172
+ Example:
173
+ # Simple query
174
+ result = conn.execute("SELECT 1") # [(1,)]
175
+
176
+ # Parameterized query (clickhouse-connect format)
177
+ result = conn.execute(
178
+ "SELECT * FROM ohlcv WHERE symbol = {symbol:String}",
179
+ params={'symbol': 'BTCUSDT'}
180
+ )
181
+ """
182
+ try:
183
+ logger.debug(f"Executing query: {query[:100]}...")
184
+ result = self.client.query(query, parameters=params or {})
185
+ rows = result.result_rows
186
+ logger.debug(f"Query returned {len(rows)} rows")
187
+ return rows
188
+ except Exception as e:
189
+ raise Exception(f"Query execution failed: {query[:100]}...\nError: {e}") from e
190
+
191
+ def query_dataframe(self, query: str, params: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
192
+ """
193
+ Execute SQL query and return results as pandas DataFrame with Arrow optimization.
194
+
195
+ Args:
196
+ query: SQL query string (use {name:Type} placeholders for clickhouse-connect)
197
+ params: Query parameters (dict mapping placeholder names to values)
198
+
199
+ Returns:
200
+ pandas DataFrame with query results (Arrow-optimized internally)
201
+
202
+ Raises:
203
+ Exception: If query execution fails
204
+
205
+ Performance:
206
+ - Arrow format enabled: 3x faster DataFrame creation
207
+ - Zero-copy when compatible: 4x less memory
208
+ - Automatic fallback if Arrow not available
209
+
210
+ Example:
211
+ # Simple query
212
+ df = conn.query_dataframe("SELECT * FROM ohlcv FINAL LIMIT 10")
213
+
214
+ # Parameterized query
215
+ df = conn.query_dataframe(
216
+ "SELECT * FROM ohlcv FINAL WHERE symbol = {symbol:String}",
217
+ params={'symbol': 'BTCUSDT'}
218
+ )
219
+ """
220
+ try:
221
+ logger.debug(f"Executing query (DataFrame, Arrow-optimized): {query[:100]}...")
222
+ # Use Arrow-optimized query method for 3x faster DataFrame creation
223
+ df = self.client.query_df_arrow(query, parameters=params or {})
224
+ logger.debug(f"Query returned {len(df)} rows (Arrow-optimized)")
225
+ return df
226
+ except Exception as e:
227
+ raise Exception(f"Query execution failed: {query[:100]}...\nError: {e}") from e
228
+
229
+ def insert_dataframe(self, df: pd.DataFrame, table: str) -> int:
230
+ """
231
+ Bulk insert DataFrame to ClickHouse table.
232
+
233
+ Args:
234
+ df: pandas DataFrame with data to insert
235
+ table: Target table name
236
+
237
+ Returns:
238
+ Number of rows inserted
239
+
240
+ Raises:
241
+ Exception: If insert fails
242
+ ValueError: If DataFrame is empty or has invalid schema
243
+
244
+ Example:
245
+ df = pd.DataFrame({
246
+ 'timestamp': pd.to_datetime(['2024-01-01']),
247
+ 'symbol': ['BTCUSDT'],
248
+ 'open': [50000.0]
249
+ })
250
+ rows = conn.insert_dataframe(df, 'ohlcv')
251
+ print(f"Inserted {rows} rows")
252
+ """
253
+ if df.empty:
254
+ logger.warning(f"Empty DataFrame, skipping insert to {table}")
255
+ return 0
256
+
257
+ try:
258
+ logger.info(f"Inserting {len(df)} rows to {table}")
259
+
260
+ # Use standard insert (Arrow benefits are mainly on query side)
261
+ self.client.insert_df(table, df)
262
+
263
+ logger.info(f"Successfully inserted {len(df)} rows to {table}")
264
+ return len(df)
265
+
266
+ except Exception as e:
267
+ raise Exception(f"Bulk insert failed for table {table} ({len(df)} rows): {e}") from e
268
+ except ValueError as e:
269
+ raise ValueError(f"Invalid DataFrame schema for table {table}: {e}") from e
@@ -0,0 +1,98 @@
1
+ -- ClickHouse Schema for gapless-crypto-data v4.0.0
2
+ -- ADR-0005: ClickHouse Migration for Future-Proofing
3
+ --
4
+ -- ReplacingMergeTree engine with deterministic versioning for zero-gap guarantee.
5
+ -- Preserves ADR-0004 futures support (instrument_type column for spot/futures).
6
+ --
7
+ -- Error Handling: Raise and propagate (no silent failures)
8
+ -- SLOs: Availability, Correctness (zero-gap via _version), Observability, Maintainability
9
+
10
+ CREATE TABLE IF NOT EXISTS ohlcv (
11
+ -- Primary timestamp (microsecond precision - ADR-0021)
12
+ -- Upgraded from DateTime64(3) to support Binance's 2025-01-01 format transition:
13
+ -- Spot data: microseconds (16 digits) after 2025-01-01
14
+ -- Futures data: milliseconds (13 digits), converted to microseconds during ingestion
15
+ timestamp DateTime64(6) CODEC(DoubleDelta, LZ4),
16
+
17
+ -- Metadata columns (low-cardinality, optimized for indexing)
18
+ symbol LowCardinality(String) CODEC(ZSTD(3)), -- Trading pair (e.g., "BTCUSDT")
19
+ timeframe LowCardinality(String) CODEC(ZSTD(3)), -- Timeframe (e.g., "1h", "1mo")
20
+ instrument_type LowCardinality(String) CODEC(ZSTD(3)), -- 'spot' or 'futures-um' (ADR-0004, ADR-0021)
21
+ data_source LowCardinality(String) CODEC(ZSTD(3)), -- 'cloudfront'
22
+
23
+ -- OHLCV data (core price/volume metrics)
24
+ open Float64 CODEC(Gorilla, LZ4),
25
+ high Float64 CODEC(Gorilla, LZ4),
26
+ low Float64 CODEC(Gorilla, LZ4),
27
+ close Float64 CODEC(Gorilla, LZ4),
28
+ volume Float64 CODEC(Gorilla, LZ4),
29
+
30
+ -- Additional microstructure metrics (Binance 11-column format)
31
+ close_time DateTime64(6) CODEC(DoubleDelta, LZ4), -- Upgraded to microsecond precision
32
+ quote_asset_volume Float64 CODEC(Gorilla, LZ4),
33
+ number_of_trades Int64 CODEC(Delta, LZ4),
34
+ taker_buy_base_asset_volume Float64 CODEC(Gorilla, LZ4),
35
+ taker_buy_quote_asset_volume Float64 CODEC(Gorilla, LZ4),
36
+
37
+ -- Futures-specific data (ADR-0021, v3.2.0+)
38
+ funding_rate Nullable(Float64) CODEC(Gorilla, LZ4), -- NULL for spot, initially NULL for futures
39
+
40
+ -- Deduplication support (application-level, preserves zero-gap guarantee)
41
+ _version UInt64 CODEC(Delta, LZ4), -- Deterministic hash of row content
42
+ _sign Int8 DEFAULT 1 -- ReplacingMergeTree sign (1 for active rows)
43
+
44
+ ) ENGINE = ReplacingMergeTree(_version)
45
+ ORDER BY (timestamp, symbol, timeframe, instrument_type)
46
+ PARTITION BY toYYYYMMDD(timestamp)
47
+ SETTINGS
48
+ index_granularity = 8192, -- Default granularity (good for time-series)
49
+ allow_nullable_key = 0, -- Disallow NULL in ORDER BY keys (data quality)
50
+ merge_with_ttl_timeout = 86400; -- Merge within 24 hours (background deduplication)
51
+
52
+ -- Rationale:
53
+ -- 1. ReplacingMergeTree(_version): Handles duplicates via background merges
54
+ -- - _version is deterministic hash of (timestamp, OHLCV, symbol, timeframe, instrument_type)
55
+ -- - Identical writes → identical _version → consistent merge outcome
56
+ -- - Preserves zero-gap guarantee via deterministic deduplication
57
+ --
58
+ -- 2. ORDER BY composite key: (timestamp, symbol, timeframe, instrument_type)
59
+ -- - Optimizes queries filtering by these columns
60
+ -- - ClickHouse uses ORDER BY as primary key (unlike PostgreSQL)
61
+ --
62
+ -- 3. PARTITION BY toYYYYMMDD(timestamp): Daily partitions
63
+ -- - Matches ADR-0003 QuestDB partition strategy (PARTITION BY DAY)
64
+ -- - Enables efficient partition pruning for date-range queries
65
+ --
66
+ -- 4. LowCardinality(String): ClickHouse equivalent to QuestDB SYMBOL
67
+ -- - Optimizes storage for low-cardinality columns (symbol, timeframe, etc.)
68
+ -- - Automatic dictionary encoding (similar to SYMBOL capacity)
69
+ --
70
+ -- 5. CODEC compression:
71
+ -- - DoubleDelta: Optimized for timestamps (sequential values)
72
+ -- - Gorilla: Optimized for float values (OHLCV data)
73
+ -- - Delta: Optimized for integer sequences (number_of_trades)
74
+ -- - ZSTD: General-purpose compression for string columns
75
+ --
76
+ -- 6. DateTime64(6): Microsecond precision (ADR-0021)
77
+ -- - Upgraded from DateTime64(3) to support Binance's 2025-01-01 format transition
78
+ -- - Spot data: microseconds (16 digits) after 2025-01-01
79
+ -- - Futures data: milliseconds (13 digits), converted to microseconds during ingestion
80
+ -- - Universal microsecond precision prevents timestamp errors
81
+ -- - ClickHouse equivalent to QuestDB TIMESTAMP type
82
+
83
+ -- Zero-Gap Guarantee:
84
+ -- Unlike QuestDB DEDUP ENABLE UPSERT KEYS (immediate consistency),
85
+ -- ClickHouse uses eventual consistency (duplicates visible until merge).
86
+ -- Application-level deterministic versioning ensures consistent merge outcomes.
87
+ --
88
+ -- Query pattern for deduplicated results:
89
+ -- SELECT * FROM ohlcv FINAL WHERE symbol = 'BTCUSDT' AND timeframe = '1h';
90
+ --
91
+ -- FINAL keyword forces deduplication at query time (10-30% performance overhead).
92
+ -- This is an acceptable trade-off for zero-gap guarantee preservation.
93
+
94
+ -- Migration from QuestDB (ADR-0003):
95
+ -- QuestDB SYMBOL → ClickHouse LowCardinality(String)
96
+ -- QuestDB DEDUP ENABLE UPSERT KEYS → ClickHouse ReplacingMergeTree(_version)
97
+ -- QuestDB PARTITION BY DAY → ClickHouse PARTITION BY toYYYYMMDD(timestamp)
98
+ -- QuestDB PostgreSQL wire protocol → ClickHouse native protocol (clickhouse-driver)