moose-lib 0.6.90__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +38 -3
  2. moose_lib/blocks.py +497 -37
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +94 -5
  5. moose_lib/config/config_file.py +44 -2
  6. moose_lib/config/runtime.py +137 -5
  7. moose_lib/data_models.py +451 -46
  8. moose_lib/dmv2/__init__.py +88 -60
  9. moose_lib/dmv2/_registry.py +3 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +56 -13
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +304 -119
  18. moose_lib/dmv2/registry.py +28 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +241 -21
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +175 -0
  24. moose_lib/dmv2/web_app_helpers.py +96 -0
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +537 -68
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +266 -156
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/METADATA +19 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +38 -1
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +210 -0
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +114 -0
  56. tests/test_web_app.py +227 -0
  57. moose_lib-0.6.90.dist-info/RECORD +0 -42
  58. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
@@ -4,6 +4,7 @@ OLAP table definitions for Moose Data Model v2 (dmv2).
4
4
  This module provides classes for defining and configuring OLAP tables,
5
5
  particularly for ClickHouse.
6
6
  """
7
+
7
8
  import json
8
9
  import warnings
9
10
  from clickhouse_connect import get_client
@@ -11,7 +12,17 @@ from clickhouse_connect.driver.client import Client
11
12
  from clickhouse_connect.driver.exceptions import ClickHouseError
12
13
  from dataclasses import dataclass
13
14
  from pydantic import BaseModel
14
- from typing import List, Optional, Any, Literal, Union, Tuple, TypeVar, Generic, Iterator
15
+ from typing import (
16
+ List,
17
+ Optional,
18
+ Any,
19
+ Literal,
20
+ Union,
21
+ Tuple,
22
+ TypeVar,
23
+ Generic,
24
+ Iterator,
25
+ )
15
26
  from ..blocks import ClickHouseEngines, EngineConfig
16
27
  from ..commons import Logger
17
28
  from ..config.runtime import RuntimeClickHouseConfig
@@ -20,6 +31,8 @@ from .types import TypedMooseResource, T, Cols
20
31
  from ._registry import _tables
21
32
  from ..data_models import Column, is_array_nested_type, is_nested_type, _to_columns
22
33
  from .life_cycle import LifeCycle
34
+ from ._source_capture import get_source_file_from_stack
35
+
23
36
 
24
37
  @dataclass
25
38
  class InsertOptions:
@@ -32,12 +45,14 @@ class InsertOptions:
32
45
  validate: Whether to validate data against schema before insertion.
33
46
  skip_validation_on_retry: Whether to skip validation for individual records during retries.
34
47
  """
48
+
35
49
  allow_errors: Optional[int] = None
36
50
  allow_errors_ratio: Optional[float] = None
37
51
  strategy: Literal["fail-fast", "discard", "isolate"] = "fail-fast"
38
52
  validate: bool = True
39
53
  skip_validation_on_retry: bool = False
40
54
 
55
+
41
56
  @dataclass
42
57
  class FailedRecord(Generic[T]):
43
58
  """Represents a failed record during insertion with error details.
@@ -47,10 +62,12 @@ class FailedRecord(Generic[T]):
47
62
  error: The error message describing why the insertion failed.
48
63
  index: Optional index of this record in the original batch.
49
64
  """
65
+
50
66
  record: T
51
67
  error: str
52
68
  index: Optional[int] = None
53
69
 
70
+
54
71
  @dataclass
55
72
  class ValidationError:
56
73
  """Validation error for a record with detailed error information.
@@ -61,11 +78,13 @@ class ValidationError:
61
78
  index: Optional index of this record in the original batch.
62
79
  path: Optional path to the field that failed validation.
63
80
  """
81
+
64
82
  record: Any
65
83
  error: str
66
84
  index: Optional[int] = None
67
85
  path: Optional[str] = None
68
86
 
87
+
69
88
  @dataclass
70
89
  class ValidationResult(Generic[T]):
71
90
  """Result of data validation with success/failure breakdown.
@@ -75,10 +94,12 @@ class ValidationResult(Generic[T]):
75
94
  invalid: Records that failed validation with detailed error information.
76
95
  total: Total number of records processed.
77
96
  """
97
+
78
98
  valid: List[T]
79
99
  invalid: List[ValidationError]
80
100
  total: int
81
101
 
102
+
82
103
  @dataclass
83
104
  class InsertResult(Generic[T]):
84
105
  """Result of an insert operation with detailed success/failure information.
@@ -89,20 +110,32 @@ class InsertResult(Generic[T]):
89
110
  total: Total number of records processed.
90
111
  failed_records: Detailed information about failed records (if record isolation was used).
91
112
  """
113
+
92
114
  successful: int
93
115
  failed: int
94
116
  total: int
95
117
  failed_records: Optional[List[FailedRecord[T]]] = None
96
118
 
119
+
97
120
  class OlapConfig(BaseModel):
98
121
  model_config = {"extra": "forbid"} # Reject unknown fields for a clean API
99
-
122
+
100
123
  """Configuration for OLAP tables (e.g., ClickHouse tables).
101
124
 
102
125
  Attributes:
103
126
  order_by_fields: List of column names to use for the ORDER BY clause.
104
127
  Crucial for `ReplacingMergeTree` and performance.
128
+ order_by_expression: An arbitrary ClickHouse expression for ORDER BY. Example:
129
+ `order_by_expression="(id, name)"` is equivalent to order_by_fields=["id", "name"], or
130
+ "tuple()" for no sorting.
105
131
  partition_by: Optional PARTITION BY expression (single ClickHouse SQL expression).
132
+ sample_by_expression: Optional SAMPLE BY expression for data sampling (single ClickHouse SQL expression).
133
+ Used to enable efficient approximate query processing with SAMPLE clause.
134
+ primary_key_expression: Optional PRIMARY KEY expression. When specified, this overrides the primary key
135
+ inferred from Key[T] column annotations. This allows for complex primary keys using
136
+ functions (e.g., "cityHash64(id)") or different column ordering in primary key vs
137
+ schema definition. Note: When this is set, any Key[T] annotations on columns are
138
+ ignored for PRIMARY KEY generation.
106
139
  engine: The ClickHouse table engine to use. Can be either a ClickHouseEngines enum value
107
140
  (for backward compatibility) or an EngineConfig instance (recommended).
108
141
  version: Optional version string for tracking configuration changes.
@@ -110,14 +143,113 @@ class OlapConfig(BaseModel):
110
143
  life_cycle: Determines how changes in code will propagate to the resources.
111
144
  settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING.
112
145
  These are alterable settings that can be changed without recreating the table.
146
+ cluster: Optional cluster name for ON CLUSTER support in ClickHouse.
147
+ Use this to enable replicated tables across ClickHouse clusters.
148
+ The cluster must be defined in moose.config.toml (dev environment only).
149
+ Example: cluster="prod_cluster"
113
150
  """
114
151
  order_by_fields: list[str] = []
152
+ order_by_expression: Optional[str] = None
115
153
  partition_by: Optional[str] = None
154
+ sample_by_expression: Optional[str] = None
155
+ primary_key_expression: Optional[str] = None
116
156
  engine: Optional[Union[ClickHouseEngines, EngineConfig]] = None
117
157
  version: Optional[str] = None
118
158
  metadata: Optional[dict] = None
119
159
  life_cycle: Optional[LifeCycle] = None
120
160
  settings: Optional[dict[str, str]] = None
161
+ # Optional table-level TTL expression (without leading 'TTL')
162
+ ttl: Optional[str] = None
163
+ # Optional cluster name for ON CLUSTER support in ClickHouse
164
+ cluster: Optional[str] = None
165
+
166
+ # Optional secondary/data-skipping indexes
167
+ class TableIndex(BaseModel):
168
+ name: str
169
+ expression: str
170
+ type: str
171
+ arguments: list[str] = []
172
+ granularity: int = 1
173
+
174
+ indexes: list[TableIndex] = []
175
+ database: Optional[str] = None # Optional database name for multi-database support
176
+
177
+ def model_post_init(self, __context):
178
+ has_fields = bool(self.order_by_fields)
179
+ has_expr = (
180
+ isinstance(self.order_by_expression, str)
181
+ and len(self.order_by_expression) > 0
182
+ )
183
+ if has_fields and has_expr:
184
+ raise ValueError(
185
+ "Provide either order_by_fields or order_by_expression, not both."
186
+ )
187
+
188
+ # Validate that non-MergeTree engines don't have unsupported clauses
189
+ if self.engine:
190
+ from ..blocks import (
191
+ S3Engine,
192
+ S3QueueEngine,
193
+ BufferEngine,
194
+ DistributedEngine,
195
+ IcebergS3Engine,
196
+ KafkaEngine,
197
+ )
198
+
199
+ # S3QueueEngine, BufferEngine, DistributedEngine, KafkaEngine, and IcebergS3Engine don't support ORDER BY
200
+ # Note: S3Engine DOES support ORDER BY (unlike S3Queue)
201
+ engines_without_order_by = (
202
+ S3QueueEngine,
203
+ BufferEngine,
204
+ DistributedEngine,
205
+ KafkaEngine,
206
+ IcebergS3Engine,
207
+ )
208
+ if isinstance(self.engine, engines_without_order_by):
209
+ engine_name = type(self.engine).__name__
210
+
211
+ if has_fields or has_expr:
212
+ raise ValueError(
213
+ f"{engine_name} does not support ORDER BY clauses. "
214
+ f"Remove order_by_fields or order_by_expression from your configuration."
215
+ )
216
+
217
+ # All non-MergeTree engines don't support SAMPLE BY
218
+ engines_without_sample_by = (
219
+ S3Engine,
220
+ S3QueueEngine,
221
+ BufferEngine,
222
+ DistributedEngine,
223
+ KafkaEngine,
224
+ IcebergS3Engine,
225
+ )
226
+ if isinstance(self.engine, engines_without_sample_by):
227
+ engine_name = type(self.engine).__name__
228
+
229
+ if self.sample_by_expression:
230
+ raise ValueError(
231
+ f"{engine_name} does not support SAMPLE BY clause. "
232
+ f"Remove sample_by_expression from your configuration."
233
+ )
234
+
235
+ # Only S3QueueEngine, BufferEngine, DistributedEngine, KafkaEngine, and IcebergS3Engine don't support PARTITION BY
236
+ # S3Engine DOES support PARTITION BY
237
+ engines_without_partition_by = (
238
+ S3QueueEngine,
239
+ BufferEngine,
240
+ DistributedEngine,
241
+ KafkaEngine,
242
+ IcebergS3Engine,
243
+ )
244
+ if isinstance(self.engine, engines_without_partition_by):
245
+ engine_name = type(self.engine).__name__
246
+
247
+ if self.partition_by:
248
+ raise ValueError(
249
+ f"{engine_name} does not support PARTITION BY clause. "
250
+ f"Remove partition_by from your configuration."
251
+ )
252
+
121
253
 
122
254
  class OlapTable(TypedMooseResource, Generic[T]):
123
255
  """Represents an OLAP table (e.g., a ClickHouse table) typed with a Pydantic model.
@@ -134,6 +266,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
134
266
  model_type (type[T]): The Pydantic model associated with this table.
135
267
  kind: The kind of the table (e.g., "OlapTable").
136
268
  """
269
+
137
270
  config: OlapConfig
138
271
  kind: str = "OlapTable"
139
272
  _memoized_client: Optional[Client] = None
@@ -146,15 +279,77 @@ class OlapTable(TypedMooseResource, Generic[T]):
146
279
  super().__init__()
147
280
  self._set_type(name, self._get_type(kwargs))
148
281
  self.config = config
149
- self.metadata = config.metadata
282
+
283
+ if config.metadata:
284
+ self.metadata = (
285
+ config.metadata.copy()
286
+ if isinstance(config.metadata, dict)
287
+ else config.metadata
288
+ )
289
+ else:
290
+ self.metadata = {}
291
+
292
+ if not isinstance(self.metadata, dict):
293
+ self.metadata = {}
294
+ if "source" not in self.metadata:
295
+ source_file = get_source_file_from_stack()
296
+ if source_file:
297
+ self.metadata["source"] = {"file": source_file}
298
+
150
299
  self._column_list = _to_columns(self._t)
300
+
301
+ # Create Cols instance for backward compatibility
302
+ # This works for both BaseModel and MooseModel
151
303
  self._cols = Cols(self._column_list)
152
- _tables[name] = self
153
-
304
+
305
+ # NOTE: For MooseModel types, columns are also accessible directly
306
+ # on the model class (e.g., MyModel.field_name) thanks to the metaclass.
307
+ # This provides LSP autocomplete without requiring .cols access.
308
+
309
+ registry_key = f"{name}_{config.version}" if config.version else name
310
+ if registry_key in _tables:
311
+ raise ValueError(
312
+ f"OlapTable with name {name} and version {config.version or 'unversioned'} already exists"
313
+ )
314
+ _tables[registry_key] = self
315
+
316
+ # Validate cluster and explicit replication params are not both specified
317
+ if config.cluster:
318
+ from moose_lib.blocks import (
319
+ ReplicatedMergeTreeEngine,
320
+ ReplicatedReplacingMergeTreeEngine,
321
+ ReplicatedAggregatingMergeTreeEngine,
322
+ ReplicatedSummingMergeTreeEngine,
323
+ ReplicatedCollapsingMergeTreeEngine,
324
+ ReplicatedVersionedCollapsingMergeTreeEngine,
325
+ )
326
+
327
+ if isinstance(
328
+ config.engine,
329
+ (
330
+ ReplicatedMergeTreeEngine,
331
+ ReplicatedReplacingMergeTreeEngine,
332
+ ReplicatedAggregatingMergeTreeEngine,
333
+ ReplicatedSummingMergeTreeEngine,
334
+ ReplicatedCollapsingMergeTreeEngine,
335
+ ReplicatedVersionedCollapsingMergeTreeEngine,
336
+ ),
337
+ ):
338
+ if (
339
+ config.engine.keeper_path is not None
340
+ or config.engine.replica_name is not None
341
+ ):
342
+ raise ValueError(
343
+ f"OlapTable {name}: Cannot specify both 'cluster' and explicit replication params "
344
+ f"('keeper_path' or 'replica_name'). "
345
+ f"Use 'cluster' for auto-injected params, or use explicit 'keeper_path' and "
346
+ f"'replica_name' without 'cluster'."
347
+ )
348
+
154
349
  # Check if using legacy enum-based engine configuration
155
350
  if config.engine and isinstance(config.engine, ClickHouseEngines):
156
351
  logger = Logger(action="OlapTable")
157
-
352
+
158
353
  # Special case for S3Queue - more detailed migration message
159
354
  if config.engine == ClickHouseEngines.S3Queue:
160
355
  logger.highlight(
@@ -174,13 +369,13 @@ class OlapTable(TypedMooseResource, Generic[T]):
174
369
  f" New: from moose_lib.blocks import {engine_name}Engine; engine={engine_name}Engine()\n"
175
370
  "The new API provides better type safety and configuration options."
176
371
  )
177
-
372
+
178
373
  # Also emit a Python warning for development environments
179
374
  warnings.warn(
180
375
  f"Table '{name}' uses deprecated ClickHouseEngines enum. "
181
376
  f"Please migrate to engine configuration classes (e.g., {config.engine.value}Engine).",
182
377
  DeprecationWarning,
183
- stacklevel=2
378
+ stacklevel=2,
184
379
  )
185
380
 
186
381
  @property
@@ -217,10 +412,13 @@ class OlapTable(TypedMooseResource, Generic[T]):
217
412
  A 16-character hex hash of the configuration.
218
413
  """
219
414
  import hashlib
415
+
416
+ # Use per-table database if specified, otherwise fall back to global config
417
+ effective_database = self.config.database or clickhouse_config.database
220
418
  config_string = (
221
419
  f"{clickhouse_config.host}:{clickhouse_config.port}:"
222
420
  f"{clickhouse_config.username}:{clickhouse_config.password}:"
223
- f"{clickhouse_config.database}:{clickhouse_config.use_ssl}"
421
+ f"{effective_database}:{clickhouse_config.use_ssl}"
224
422
  )
225
423
  return hashlib.sha256(config_string.encode()).hexdigest()[:16]
226
424
 
@@ -255,14 +453,16 @@ class OlapTable(TypedMooseResource, Generic[T]):
255
453
 
256
454
  try:
257
455
  # Create new client with standard configuration
258
- interface = 'https' if clickhouse_config.use_ssl else 'http'
456
+ # Use per-table database if specified, otherwise fall back to global config
457
+ effective_database = self.config.database or clickhouse_config.database
458
+ interface = "https" if clickhouse_config.use_ssl else "http"
259
459
  client = get_client(
260
460
  interface=interface,
261
461
  host=clickhouse_config.host,
262
462
  port=int(clickhouse_config.port),
263
463
  username=clickhouse_config.username,
264
464
  password=clickhouse_config.password,
265
- database=clickhouse_config.database,
465
+ database=effective_database,
266
466
  )
267
467
 
268
468
  # Cache the new client and config hash
@@ -298,7 +498,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
298
498
  Returns:
299
499
  Tuple of (validated_data, error_message). If validation succeeds,
300
500
  validated_data will be the validated record and error_message will be None.
301
- If validation fails for any reason, validated_data will be None and error_message
501
+ If validation fails for any reason, validated_data will be None and error_message
302
502
  will contain the error details.
303
503
  """
304
504
  try:
@@ -324,23 +524,19 @@ class OlapTable(TypedMooseResource, Generic[T]):
324
524
  if validated is not None:
325
525
  valid.append(validated)
326
526
  else:
327
- invalid.append(ValidationError(
328
- record=record,
329
- error=error or "Validation failed",
330
- index=i,
331
- path="root"
332
- ))
333
-
334
- return ValidationResult(
335
- valid=valid,
336
- invalid=invalid,
337
- total=len(data)
338
- )
527
+ invalid.append(
528
+ ValidationError(
529
+ record=record,
530
+ error=error or "Validation failed",
531
+ index=i,
532
+ path="root",
533
+ )
534
+ )
535
+
536
+ return ValidationResult(valid=valid, invalid=invalid, total=len(data))
339
537
 
340
538
  def _validate_insert_parameters(
341
- self,
342
- data: Union[List[T], Iterator[T]],
343
- options: Optional[InsertOptions]
539
+ self, data: Union[List[T], Iterator[T]], options: Optional[InsertOptions]
344
540
  ) -> Tuple[bool, str, bool]:
345
541
  """Validate input parameters and strategy compatibility.
346
542
 
@@ -362,7 +558,9 @@ class OlapTable(TypedMooseResource, Generic[T]):
362
558
  )
363
559
 
364
560
  if is_stream and should_validate:
365
- print("Warning: Validation is not supported with stream input. Validation will be skipped.")
561
+ print(
562
+ "Warning: Validation is not supported with stream input. Validation will be skipped."
563
+ )
366
564
 
367
565
  return is_stream, strategy, should_validate
368
566
 
@@ -371,7 +569,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
371
569
  data: List[T],
372
570
  should_validate: bool,
373
571
  strategy: str,
374
- options: Optional[InsertOptions] = None
572
+ options: Optional[InsertOptions] = None,
375
573
  ) -> Tuple[List[T], List[ValidationError]]:
376
574
  """Perform pre-insertion validation for array data.
377
575
 
@@ -394,10 +592,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
394
592
 
395
593
  if validation_errors:
396
594
  self._handle_validation_errors(
397
- validation_errors,
398
- strategy,
399
- data,
400
- options
595
+ validation_errors, strategy, data, options
401
596
  )
402
597
 
403
598
  if strategy == "discard":
@@ -420,7 +615,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
420
615
  validation_errors: List[ValidationError],
421
616
  strategy: str,
422
617
  data: List[T],
423
- options: Optional[InsertOptions]
618
+ options: Optional[InsertOptions],
424
619
  ) -> None:
425
620
  """Handle validation errors based on the specified strategy.
426
621
 
@@ -436,17 +631,13 @@ class OlapTable(TypedMooseResource, Generic[T]):
436
631
  f"Validation failed for record at index {first_error.index}: {first_error.error}"
437
632
  )
438
633
  elif strategy == "discard":
439
- self._check_validation_thresholds(
440
- validation_errors,
441
- len(data),
442
- options
443
- )
634
+ self._check_validation_thresholds(validation_errors, len(data), options)
444
635
 
445
636
  def _check_validation_thresholds(
446
637
  self,
447
638
  validation_errors: List[ValidationError],
448
639
  total_records: int,
449
- options: Optional[InsertOptions]
640
+ options: Optional[InsertOptions],
450
641
  ) -> None:
451
642
  """Check if validation errors exceed configured thresholds.
452
643
 
@@ -458,15 +649,21 @@ class OlapTable(TypedMooseResource, Generic[T]):
458
649
  validation_failed_count = len(validation_errors)
459
650
  validation_failed_ratio = validation_failed_count / total_records
460
651
 
461
- if (options and options.allow_errors is not None and
462
- validation_failed_count > options.allow_errors):
652
+ if (
653
+ options
654
+ and options.allow_errors is not None
655
+ and validation_failed_count > options.allow_errors
656
+ ):
463
657
  raise ValueError(
464
658
  f"Too many validation failures: {validation_failed_count} > {options.allow_errors}. "
465
659
  f"Errors: {', '.join(e.error for e in validation_errors)}"
466
660
  )
467
661
 
468
- if (options and options.allow_errors_ratio is not None and
469
- validation_failed_ratio > options.allow_errors_ratio):
662
+ if (
663
+ options
664
+ and options.allow_errors_ratio is not None
665
+ and validation_failed_ratio > options.allow_errors_ratio
666
+ ):
470
667
  raise ValueError(
471
668
  f"Validation failure ratio too high: {validation_failed_ratio:.3f} > "
472
669
  f"{options.allow_errors_ratio}. Errors: {', '.join(e.error for e in validation_errors)}"
@@ -477,10 +674,10 @@ class OlapTable(TypedMooseResource, Generic[T]):
477
674
 
478
675
  def _with_wait_end_settings(self, settings: dict) -> dict:
479
676
  """Add wait_end_of_query setting to ensure at least once delivery for INSERT operations.
480
-
677
+
481
678
  Args:
482
679
  settings: Base settings dictionary
483
-
680
+
484
681
  Returns:
485
682
  Settings dictionary with wait_end_of_query added
486
683
  """
@@ -493,20 +690,28 @@ class OlapTable(TypedMooseResource, Generic[T]):
493
690
  validated_data: List[T],
494
691
  is_stream: bool,
495
692
  strategy: str,
496
- options: Optional[InsertOptions]
693
+ options: Optional[InsertOptions],
497
694
  ) -> tuple[str, bytes, dict]:
498
695
  """Prepare insert options for JSONEachRow raw SQL insert, returning settings dict."""
499
696
  # Base settings for all inserts
500
697
  base_settings = {
501
698
  "date_time_input_format": "best_effort",
502
- "max_insert_block_size": 100000 if is_stream else min(len(validated_data), 100000),
699
+ "max_insert_block_size": (
700
+ 100000 if is_stream else min(len(validated_data), 100000)
701
+ ),
503
702
  "max_block_size": 65536,
504
703
  "async_insert": 1 if len(validated_data) > 1000 else 0,
505
704
  "wait_for_async_insert": 1,
506
705
  }
507
706
  settings = self._with_wait_end_settings(base_settings)
508
- if (strategy == "discard" and options and
509
- (options.allow_errors is not None or options.allow_errors_ratio is not None)):
707
+ if (
708
+ strategy == "discard"
709
+ and options
710
+ and (
711
+ options.allow_errors is not None
712
+ or options.allow_errors_ratio is not None
713
+ )
714
+ ):
510
715
  if options.allow_errors is not None:
511
716
  settings["input_format_allow_errors_num"] = options.allow_errors
512
717
  if options.allow_errors_ratio is not None:
@@ -519,7 +724,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
519
724
  validated_data = [validated_data]
520
725
  dict_data = []
521
726
  for record in validated_data:
522
- if hasattr(record, 'model_dump'):
727
+ if hasattr(record, "model_dump"):
523
728
  record_dict = record.model_dump()
524
729
  else:
525
730
  record_dict = record
@@ -537,7 +742,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
537
742
  validation_errors: List[ValidationError],
538
743
  is_stream: bool,
539
744
  should_validate: bool,
540
- strategy: str
745
+ strategy: str,
541
746
  ) -> InsertResult[T]:
542
747
  """Create appropriate result based on input type.
543
748
 
@@ -553,11 +758,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
553
758
  InsertResult with appropriate counts and error information.
554
759
  """
555
760
  if is_stream:
556
- return InsertResult(
557
- successful=-1,
558
- failed=0,
559
- total=-1
560
- )
761
+ return InsertResult(successful=-1, failed=0, total=-1)
561
762
 
562
763
  inserted_count = len(validated_data)
563
764
  total_processed = len(data) if not is_stream else inserted_count
@@ -565,32 +766,30 @@ class OlapTable(TypedMooseResource, Generic[T]):
565
766
  result = InsertResult(
566
767
  successful=inserted_count,
567
768
  failed=len(validation_errors) if should_validate else 0,
568
- total=total_processed
769
+ total=total_processed,
569
770
  )
570
771
 
571
- if (should_validate and validation_errors and strategy == "discard"):
772
+ if should_validate and validation_errors and strategy == "discard":
572
773
  result.failed_records = [
573
774
  FailedRecord(
574
775
  record=ve.record,
575
776
  error=f"Validation error: {ve.error}",
576
- index=ve.index
577
- ) for ve in validation_errors
777
+ index=ve.index,
778
+ )
779
+ for ve in validation_errors
578
780
  ]
579
781
 
580
782
  return result
581
783
 
582
784
  def _retry_individual_records(
583
- self,
584
- client: Client,
585
- records: List[T],
586
- options: InsertOptions
785
+ self, client: Client, records: List[T], options: InsertOptions
587
786
  ) -> InsertResult[T]:
588
787
  successful: List[T] = []
589
788
  failed: List[FailedRecord[T]] = []
590
789
  table_name = quote_identifier(self._generate_table_name())
591
790
  records_dict = []
592
791
  for record in records:
593
- if hasattr(record, 'model_dump'):
792
+ if hasattr(record, "model_dump"):
594
793
  record_dict = record.model_dump()
595
794
  else:
596
795
  record_dict = record
@@ -599,41 +798,42 @@ class OlapTable(TypedMooseResource, Generic[T]):
599
798
 
600
799
  RETRY_BATCH_SIZE = 10
601
800
  for i in range(0, len(records_dict), RETRY_BATCH_SIZE):
602
- batch = records_dict[i:i + RETRY_BATCH_SIZE]
801
+ batch = records_dict[i : i + RETRY_BATCH_SIZE]
603
802
  try:
604
803
  sql = f"INSERT INTO {table_name} FORMAT JSONEachRow"
605
804
  base_settings = {
606
805
  "date_time_input_format": "best_effort",
607
806
  "max_insert_block_size": RETRY_BATCH_SIZE,
608
807
  "max_block_size": RETRY_BATCH_SIZE,
609
- "async_insert": 0
808
+ "async_insert": 0,
610
809
  }
611
810
  settings = self._with_wait_end_settings(base_settings)
612
811
  json_lines = self._to_json_each_row(batch)
613
812
  client.command(sql, data=json_lines, settings=settings)
614
- successful.extend(records[i:i + RETRY_BATCH_SIZE])
813
+ successful.extend(records[i : i + RETRY_BATCH_SIZE])
615
814
  except ClickHouseError as batch_error:
616
815
  for j, record_dict in enumerate(batch):
617
816
  try:
618
817
  sql = f"INSERT INTO {table_name} FORMAT JSONEachRow"
619
- individual_settings = self._with_wait_end_settings({
620
- "date_time_input_format": "best_effort",
621
- "async_insert": 0
622
- })
818
+ individual_settings = self._with_wait_end_settings(
819
+ {"date_time_input_format": "best_effort", "async_insert": 0}
820
+ )
623
821
  json_line = self._to_json_each_row([record_dict])
624
- client.command(sql, data=json_line, settings=individual_settings)
822
+ client.command(
823
+ sql, data=json_line, settings=individual_settings
824
+ )
625
825
  successful.append(records[i + j])
626
826
  except ClickHouseError as error:
627
- failed.append(FailedRecord(
628
- record=records[i + j],
629
- error=str(error),
630
- index=i + j
631
- ))
827
+ failed.append(
828
+ FailedRecord(
829
+ record=records[i + j], error=str(error), index=i + j
830
+ )
831
+ )
632
832
  return InsertResult(
633
833
  successful=len(successful),
634
834
  failed=len(failed),
635
835
  total=len(records),
636
- failed_records=failed if failed else None
836
+ failed_records=failed if failed else None,
637
837
  )
638
838
 
639
839
  def _insert_array_data(
@@ -643,7 +843,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
643
843
  data: List[T],
644
844
  should_validate: bool,
645
845
  strategy: str,
646
- options: Optional[InsertOptions]
846
+ options: Optional[InsertOptions],
647
847
  ) -> InsertResult[T]:
648
848
  """Insert array data into the table with validation and error handling.
649
849
 
@@ -659,19 +859,11 @@ class OlapTable(TypedMooseResource, Generic[T]):
659
859
  InsertResult with detailed success/failure information.
660
860
  """
661
861
  validated_data, validation_errors = self._perform_pre_insertion_validation(
662
- data,
663
- should_validate,
664
- strategy,
665
- options
862
+ data, should_validate, strategy, options
666
863
  )
667
864
  try:
668
865
  table_name, json_lines, settings = self._prepare_insert_options(
669
- table_name,
670
- data,
671
- validated_data,
672
- False,
673
- strategy,
674
- options
866
+ table_name, data, validated_data, False, strategy, options
675
867
  )
676
868
  sql = f"INSERT INTO {table_name} FORMAT JSONEachRow"
677
869
  client.command(sql, data=json_lines, settings=settings)
@@ -681,7 +873,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
681
873
  validation_errors,
682
874
  False,
683
875
  should_validate,
684
- strategy
876
+ strategy,
685
877
  )
686
878
  except ClickHouseError as e:
687
879
  if strategy == "fail-fast":
@@ -692,7 +884,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
692
884
  return self._retry_individual_records(
693
885
  client,
694
886
  validated_data if not options.skip_validation_on_retry else data,
695
- options
887
+ options,
696
888
  )
697
889
 
698
890
  def _insert_stream(
@@ -701,7 +893,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
701
893
  table_name: str,
702
894
  data: Iterator[T],
703
895
  strategy: str,
704
- options: Optional[InsertOptions]
896
+ options: Optional[InsertOptions],
705
897
  ) -> InsertResult[T]:
706
898
  """Insert data from an iterator into the table.
707
899
 
@@ -719,17 +911,12 @@ class OlapTable(TypedMooseResource, Generic[T]):
719
911
  total_inserted = 0
720
912
 
721
913
  _, _, settings = self._prepare_insert_options(
722
- table_name,
723
- data,
724
- [],
725
- True,
726
- strategy,
727
- options
914
+ table_name, data, [], True, strategy, options
728
915
  )
729
916
 
730
917
  for record in data:
731
918
  # Convert record to dict using model_dump if available
732
- if hasattr(record, 'model_dump'):
919
+ if hasattr(record, "model_dump"):
733
920
  batch.append(record.model_dump())
734
921
  else:
735
922
  batch.append(record)
@@ -754,9 +941,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
754
941
  total_inserted += len(batch)
755
942
 
756
943
  return InsertResult(
757
- successful=total_inserted,
758
- failed=0,
759
- total=total_inserted
944
+ successful=total_inserted, failed=0, total=total_inserted
760
945
  )
761
946
  except ClickHouseError as e:
762
947
  if strategy == "fail-fast":
@@ -764,9 +949,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
764
949
  raise ValueError(f"Too many errors during stream insert: {e}")
765
950
 
766
951
  def insert(
767
- self,
768
- data: Union[List[T], Iterator[T]],
769
- options: Optional[InsertOptions] = None
952
+ self, data: Union[List[T], Iterator[T]], options: Optional[InsertOptions] = None
770
953
  ) -> InsertResult[T]:
771
954
  """Insert data into the table with validation and error handling.
772
955
 
@@ -818,7 +1001,9 @@ class OlapTable(TypedMooseResource, Generic[T]):
818
1001
  ```
819
1002
  """
820
1003
  options = options or InsertOptions()
821
- is_stream, strategy, should_validate = self._validate_insert_parameters(data, options)
1004
+ is_stream, strategy, should_validate = self._validate_insert_parameters(
1005
+ data, options
1006
+ )
822
1007
  if (is_stream and not data) or (not is_stream and not data):
823
1008
  return InsertResult(successful=0, failed=0, total=0)
824
1009
 
@@ -829,15 +1014,12 @@ class OlapTable(TypedMooseResource, Generic[T]):
829
1014
  return self._insert_stream(client, table_name, data, strategy, options)
830
1015
  else:
831
1016
  return self._insert_array_data(
832
- client,
833
- table_name,
834
- data,
835
- should_validate,
836
- strategy,
837
- options
1017
+ client, table_name, data, should_validate, strategy, options
838
1018
  )
839
1019
 
840
- def _map_to_clickhouse_record(self, record: dict, columns: Optional[List[Column]] = None) -> dict:
1020
+ def _map_to_clickhouse_record(
1021
+ self, record: dict, columns: Optional[List[Column]] = None
1022
+ ) -> dict:
841
1023
  """
842
1024
  Recursively transforms a record to match ClickHouse's JSONEachRow requirements.
843
1025
 
@@ -866,8 +1048,9 @@ class OlapTable(TypedMooseResource, Generic[T]):
866
1048
 
867
1049
  if is_array_nested_type(data_type):
868
1050
  # For Array(Nested(...)), wrap each item in its own array and recurse
869
- if (isinstance(value, list) and
870
- (len(value) == 0 or isinstance(value[0], dict))):
1051
+ if isinstance(value, list) and (
1052
+ len(value) == 0 or isinstance(value[0], dict)
1053
+ ):
871
1054
  nested_columns = data_type.element_type.columns
872
1055
  result[col.name] = [
873
1056
  [self._map_to_clickhouse_record(item, nested_columns)]
@@ -876,7 +1059,9 @@ class OlapTable(TypedMooseResource, Generic[T]):
876
1059
  elif is_nested_type(data_type):
877
1060
  # For Nested struct (not array), recurse into it
878
1061
  if value and isinstance(value, dict):
879
- result[col.name] = self._map_to_clickhouse_record(value, data_type.columns)
1062
+ result[col.name] = self._map_to_clickhouse_record(
1063
+ value, data_type.columns
1064
+ )
880
1065
  # All other types: leave as is
881
1066
 
882
- return result
1067
+ return result