moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +34 -3
  2. moose_lib/blocks.py +416 -52
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +37 -30
  5. moose_lib/config/config_file.py +5 -1
  6. moose_lib/config/runtime.py +73 -34
  7. moose_lib/data_models.py +331 -61
  8. moose_lib/dmv2/__init__.py +69 -73
  9. moose_lib/dmv2/_registry.py +2 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +35 -16
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +299 -151
  18. moose_lib/dmv2/registry.py +18 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +75 -23
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +11 -6
  24. moose_lib/dmv2/web_app_helpers.py +5 -1
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +340 -56
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +233 -117
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +6 -5
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +52 -58
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +42 -40
  56. tests/test_web_app.py +11 -5
  57. moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
  58. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
@@ -4,6 +4,7 @@ OLAP table definitions for Moose Data Model v2 (dmv2).
4
4
  This module provides classes for defining and configuring OLAP tables,
5
5
  particularly for ClickHouse.
6
6
  """
7
+
7
8
  import json
8
9
  import warnings
9
10
  from clickhouse_connect import get_client
@@ -11,7 +12,17 @@ from clickhouse_connect.driver.client import Client
11
12
  from clickhouse_connect.driver.exceptions import ClickHouseError
12
13
  from dataclasses import dataclass
13
14
  from pydantic import BaseModel
14
- from typing import List, Optional, Any, Literal, Union, Tuple, TypeVar, Generic, Iterator
15
+ from typing import (
16
+ List,
17
+ Optional,
18
+ Any,
19
+ Literal,
20
+ Union,
21
+ Tuple,
22
+ TypeVar,
23
+ Generic,
24
+ Iterator,
25
+ )
15
26
  from ..blocks import ClickHouseEngines, EngineConfig
16
27
  from ..commons import Logger
17
28
  from ..config.runtime import RuntimeClickHouseConfig
@@ -20,6 +31,7 @@ from .types import TypedMooseResource, T, Cols
20
31
  from ._registry import _tables
21
32
  from ..data_models import Column, is_array_nested_type, is_nested_type, _to_columns
22
33
  from .life_cycle import LifeCycle
34
+ from ._source_capture import get_source_file_from_stack
23
35
 
24
36
 
25
37
  @dataclass
@@ -33,6 +45,7 @@ class InsertOptions:
33
45
  validate: Whether to validate data against schema before insertion.
34
46
  skip_validation_on_retry: Whether to skip validation for individual records during retries.
35
47
  """
48
+
36
49
  allow_errors: Optional[int] = None
37
50
  allow_errors_ratio: Optional[float] = None
38
51
  strategy: Literal["fail-fast", "discard", "isolate"] = "fail-fast"
@@ -49,6 +62,7 @@ class FailedRecord(Generic[T]):
49
62
  error: The error message describing why the insertion failed.
50
63
  index: Optional index of this record in the original batch.
51
64
  """
65
+
52
66
  record: T
53
67
  error: str
54
68
  index: Optional[int] = None
@@ -64,6 +78,7 @@ class ValidationError:
64
78
  index: Optional index of this record in the original batch.
65
79
  path: Optional path to the field that failed validation.
66
80
  """
81
+
67
82
  record: Any
68
83
  error: str
69
84
  index: Optional[int] = None
@@ -79,6 +94,7 @@ class ValidationResult(Generic[T]):
79
94
  invalid: Records that failed validation with detailed error information.
80
95
  total: Total number of records processed.
81
96
  """
97
+
82
98
  valid: List[T]
83
99
  invalid: List[ValidationError]
84
100
  total: int
@@ -94,6 +110,7 @@ class InsertResult(Generic[T]):
94
110
  total: Total number of records processed.
95
111
  failed_records: Detailed information about failed records (if record isolation was used).
96
112
  """
113
+
97
114
  successful: int
98
115
  failed: int
99
116
  total: int
@@ -114,6 +131,11 @@ class OlapConfig(BaseModel):
114
131
  partition_by: Optional PARTITION BY expression (single ClickHouse SQL expression).
115
132
  sample_by_expression: Optional SAMPLE BY expression for data sampling (single ClickHouse SQL expression).
116
133
  Used to enable efficient approximate query processing with SAMPLE clause.
134
+ primary_key_expression: Optional PRIMARY KEY expression. When specified, this overrides the primary key
135
+ inferred from Key[T] column annotations. This allows for complex primary keys using
136
+ functions (e.g., "cityHash64(id)") or different column ordering in primary key vs
137
+ schema definition. Note: When this is set, any Key[T] annotations on columns are
138
+ ignored for PRIMARY KEY generation.
117
139
  engine: The ClickHouse table engine to use. Can be either a ClickHouseEngines enum value
118
140
  (for backward compatibility) or an EngineConfig instance (recommended).
119
141
  version: Optional version string for tracking configuration changes.
@@ -121,11 +143,16 @@ class OlapConfig(BaseModel):
121
143
  life_cycle: Determines how changes in code will propagate to the resources.
122
144
  settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING.
123
145
  These are alterable settings that can be changed without recreating the table.
146
+ cluster: Optional cluster name for ON CLUSTER support in ClickHouse.
147
+ Use this to enable replicated tables across ClickHouse clusters.
148
+ The cluster must be defined in moose.config.toml (dev environment only).
149
+ Example: cluster="prod_cluster"
124
150
  """
125
151
  order_by_fields: list[str] = []
126
152
  order_by_expression: Optional[str] = None
127
153
  partition_by: Optional[str] = None
128
154
  sample_by_expression: Optional[str] = None
155
+ primary_key_expression: Optional[str] = None
129
156
  engine: Optional[Union[ClickHouseEngines, EngineConfig]] = None
130
157
  version: Optional[str] = None
131
158
  metadata: Optional[dict] = None
@@ -133,22 +160,95 @@ class OlapConfig(BaseModel):
133
160
  settings: Optional[dict[str, str]] = None
134
161
  # Optional table-level TTL expression (without leading 'TTL')
135
162
  ttl: Optional[str] = None
163
+ # Optional cluster name for ON CLUSTER support in ClickHouse
164
+ cluster: Optional[str] = None
136
165
 
137
166
  # Optional secondary/data-skipping indexes
138
167
  class TableIndex(BaseModel):
139
168
  name: str
140
169
  expression: str
141
170
  type: str
142
- arguments: Optional[list[str]] = None
143
- granularity: int
171
+ arguments: list[str] = []
172
+ granularity: int = 1
144
173
 
145
174
  indexes: list[TableIndex] = []
175
+ database: Optional[str] = None # Optional database name for multi-database support
146
176
 
147
177
  def model_post_init(self, __context):
148
178
  has_fields = bool(self.order_by_fields)
149
- has_expr = isinstance(self.order_by_expression, str) and len(self.order_by_expression) > 0
179
+ has_expr = (
180
+ isinstance(self.order_by_expression, str)
181
+ and len(self.order_by_expression) > 0
182
+ )
150
183
  if has_fields and has_expr:
151
- raise ValueError("Provide either order_by_fields or order_by_expression, not both.")
184
+ raise ValueError(
185
+ "Provide either order_by_fields or order_by_expression, not both."
186
+ )
187
+
188
+ # Validate that non-MergeTree engines don't have unsupported clauses
189
+ if self.engine:
190
+ from ..blocks import (
191
+ S3Engine,
192
+ S3QueueEngine,
193
+ BufferEngine,
194
+ DistributedEngine,
195
+ IcebergS3Engine,
196
+ KafkaEngine,
197
+ )
198
+
199
+ # S3QueueEngine, BufferEngine, DistributedEngine, KafkaEngine, and IcebergS3Engine don't support ORDER BY
200
+ # Note: S3Engine DOES support ORDER BY (unlike S3Queue)
201
+ engines_without_order_by = (
202
+ S3QueueEngine,
203
+ BufferEngine,
204
+ DistributedEngine,
205
+ KafkaEngine,
206
+ IcebergS3Engine,
207
+ )
208
+ if isinstance(self.engine, engines_without_order_by):
209
+ engine_name = type(self.engine).__name__
210
+
211
+ if has_fields or has_expr:
212
+ raise ValueError(
213
+ f"{engine_name} does not support ORDER BY clauses. "
214
+ f"Remove order_by_fields or order_by_expression from your configuration."
215
+ )
216
+
217
+ # All non-MergeTree engines don't support SAMPLE BY
218
+ engines_without_sample_by = (
219
+ S3Engine,
220
+ S3QueueEngine,
221
+ BufferEngine,
222
+ DistributedEngine,
223
+ KafkaEngine,
224
+ IcebergS3Engine,
225
+ )
226
+ if isinstance(self.engine, engines_without_sample_by):
227
+ engine_name = type(self.engine).__name__
228
+
229
+ if self.sample_by_expression:
230
+ raise ValueError(
231
+ f"{engine_name} does not support SAMPLE BY clause. "
232
+ f"Remove sample_by_expression from your configuration."
233
+ )
234
+
235
+ # Only S3QueueEngine, BufferEngine, DistributedEngine, KafkaEngine, and IcebergS3Engine don't support PARTITION BY
236
+ # S3Engine DOES support PARTITION BY
237
+ engines_without_partition_by = (
238
+ S3QueueEngine,
239
+ BufferEngine,
240
+ DistributedEngine,
241
+ KafkaEngine,
242
+ IcebergS3Engine,
243
+ )
244
+ if isinstance(self.engine, engines_without_partition_by):
245
+ engine_name = type(self.engine).__name__
246
+
247
+ if self.partition_by:
248
+ raise ValueError(
249
+ f"{engine_name} does not support PARTITION BY clause. "
250
+ f"Remove partition_by from your configuration."
251
+ )
152
252
 
153
253
 
154
254
  class OlapTable(TypedMooseResource, Generic[T]):
@@ -166,6 +266,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
166
266
  model_type (type[T]): The Pydantic model associated with this table.
167
267
  kind: The kind of the table (e.g., "OlapTable").
168
268
  """
269
+
169
270
  config: OlapConfig
170
271
  kind: str = "OlapTable"
171
272
  _memoized_client: Optional[Client] = None
@@ -178,9 +279,33 @@ class OlapTable(TypedMooseResource, Generic[T]):
178
279
  super().__init__()
179
280
  self._set_type(name, self._get_type(kwargs))
180
281
  self.config = config
181
- self.metadata = config.metadata
282
+
283
+ if config.metadata:
284
+ self.metadata = (
285
+ config.metadata.copy()
286
+ if isinstance(config.metadata, dict)
287
+ else config.metadata
288
+ )
289
+ else:
290
+ self.metadata = {}
291
+
292
+ if not isinstance(self.metadata, dict):
293
+ self.metadata = {}
294
+ if "source" not in self.metadata:
295
+ source_file = get_source_file_from_stack()
296
+ if source_file:
297
+ self.metadata["source"] = {"file": source_file}
298
+
182
299
  self._column_list = _to_columns(self._t)
300
+
301
+ # Create Cols instance for backward compatibility
302
+ # This works for both BaseModel and MooseModel
183
303
  self._cols = Cols(self._column_list)
304
+
305
+ # NOTE: For MooseModel types, columns are also accessible directly
306
+ # on the model class (e.g., MyModel.field_name) thanks to the metaclass.
307
+ # This provides LSP autocomplete without requiring .cols access.
308
+
184
309
  registry_key = f"{name}_{config.version}" if config.version else name
185
310
  if registry_key in _tables:
186
311
  raise ValueError(
@@ -188,6 +313,39 @@ class OlapTable(TypedMooseResource, Generic[T]):
188
313
  )
189
314
  _tables[registry_key] = self
190
315
 
316
+ # Validate cluster and explicit replication params are not both specified
317
+ if config.cluster:
318
+ from moose_lib.blocks import (
319
+ ReplicatedMergeTreeEngine,
320
+ ReplicatedReplacingMergeTreeEngine,
321
+ ReplicatedAggregatingMergeTreeEngine,
322
+ ReplicatedSummingMergeTreeEngine,
323
+ ReplicatedCollapsingMergeTreeEngine,
324
+ ReplicatedVersionedCollapsingMergeTreeEngine,
325
+ )
326
+
327
+ if isinstance(
328
+ config.engine,
329
+ (
330
+ ReplicatedMergeTreeEngine,
331
+ ReplicatedReplacingMergeTreeEngine,
332
+ ReplicatedAggregatingMergeTreeEngine,
333
+ ReplicatedSummingMergeTreeEngine,
334
+ ReplicatedCollapsingMergeTreeEngine,
335
+ ReplicatedVersionedCollapsingMergeTreeEngine,
336
+ ),
337
+ ):
338
+ if (
339
+ config.engine.keeper_path is not None
340
+ or config.engine.replica_name is not None
341
+ ):
342
+ raise ValueError(
343
+ f"OlapTable {name}: Cannot specify both 'cluster' and explicit replication params "
344
+ f"('keeper_path' or 'replica_name'). "
345
+ f"Use 'cluster' for auto-injected params, or use explicit 'keeper_path' and "
346
+ f"'replica_name' without 'cluster'."
347
+ )
348
+
191
349
  # Check if using legacy enum-based engine configuration
192
350
  if config.engine and isinstance(config.engine, ClickHouseEngines):
193
351
  logger = Logger(action="OlapTable")
@@ -217,7 +375,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
217
375
  f"Table '{name}' uses deprecated ClickHouseEngines enum. "
218
376
  f"Please migrate to engine configuration classes (e.g., {config.engine.value}Engine).",
219
377
  DeprecationWarning,
220
- stacklevel=2
378
+ stacklevel=2,
221
379
  )
222
380
 
223
381
  @property
@@ -254,10 +412,13 @@ class OlapTable(TypedMooseResource, Generic[T]):
254
412
  A 16-character hex hash of the configuration.
255
413
  """
256
414
  import hashlib
415
+
416
+ # Use per-table database if specified, otherwise fall back to global config
417
+ effective_database = self.config.database or clickhouse_config.database
257
418
  config_string = (
258
419
  f"{clickhouse_config.host}:{clickhouse_config.port}:"
259
420
  f"{clickhouse_config.username}:{clickhouse_config.password}:"
260
- f"{clickhouse_config.database}:{clickhouse_config.use_ssl}"
421
+ f"{effective_database}:{clickhouse_config.use_ssl}"
261
422
  )
262
423
  return hashlib.sha256(config_string.encode()).hexdigest()[:16]
263
424
 
@@ -292,14 +453,16 @@ class OlapTable(TypedMooseResource, Generic[T]):
292
453
 
293
454
  try:
294
455
  # Create new client with standard configuration
295
- interface = 'https' if clickhouse_config.use_ssl else 'http'
456
+ # Use per-table database if specified, otherwise fall back to global config
457
+ effective_database = self.config.database or clickhouse_config.database
458
+ interface = "https" if clickhouse_config.use_ssl else "http"
296
459
  client = get_client(
297
460
  interface=interface,
298
461
  host=clickhouse_config.host,
299
462
  port=int(clickhouse_config.port),
300
463
  username=clickhouse_config.username,
301
464
  password=clickhouse_config.password,
302
- database=clickhouse_config.database,
465
+ database=effective_database,
303
466
  )
304
467
 
305
468
  # Cache the new client and config hash
@@ -335,7 +498,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
335
498
  Returns:
336
499
  Tuple of (validated_data, error_message). If validation succeeds,
337
500
  validated_data will be the validated record and error_message will be None.
338
- If validation fails for any reason, validated_data will be None and error_message
501
+ If validation fails for any reason, validated_data will be None and error_message
339
502
  will contain the error details.
340
503
  """
341
504
  try:
@@ -361,23 +524,19 @@ class OlapTable(TypedMooseResource, Generic[T]):
361
524
  if validated is not None:
362
525
  valid.append(validated)
363
526
  else:
364
- invalid.append(ValidationError(
365
- record=record,
366
- error=error or "Validation failed",
367
- index=i,
368
- path="root"
369
- ))
370
-
371
- return ValidationResult(
372
- valid=valid,
373
- invalid=invalid,
374
- total=len(data)
375
- )
527
+ invalid.append(
528
+ ValidationError(
529
+ record=record,
530
+ error=error or "Validation failed",
531
+ index=i,
532
+ path="root",
533
+ )
534
+ )
535
+
536
+ return ValidationResult(valid=valid, invalid=invalid, total=len(data))
376
537
 
377
538
  def _validate_insert_parameters(
378
- self,
379
- data: Union[List[T], Iterator[T]],
380
- options: Optional[InsertOptions]
539
+ self, data: Union[List[T], Iterator[T]], options: Optional[InsertOptions]
381
540
  ) -> Tuple[bool, str, bool]:
382
541
  """Validate input parameters and strategy compatibility.
383
542
 
@@ -399,16 +558,18 @@ class OlapTable(TypedMooseResource, Generic[T]):
399
558
  )
400
559
 
401
560
  if is_stream and should_validate:
402
- print("Warning: Validation is not supported with stream input. Validation will be skipped.")
561
+ print(
562
+ "Warning: Validation is not supported with stream input. Validation will be skipped."
563
+ )
403
564
 
404
565
  return is_stream, strategy, should_validate
405
566
 
406
567
  def _perform_pre_insertion_validation(
407
- self,
408
- data: List[T],
409
- should_validate: bool,
410
- strategy: str,
411
- options: Optional[InsertOptions] = None
568
+ self,
569
+ data: List[T],
570
+ should_validate: bool,
571
+ strategy: str,
572
+ options: Optional[InsertOptions] = None,
412
573
  ) -> Tuple[List[T], List[ValidationError]]:
413
574
  """Perform pre-insertion validation for array data.
414
575
 
@@ -431,10 +592,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
431
592
 
432
593
  if validation_errors:
433
594
  self._handle_validation_errors(
434
- validation_errors,
435
- strategy,
436
- data,
437
- options
595
+ validation_errors, strategy, data, options
438
596
  )
439
597
 
440
598
  if strategy == "discard":
@@ -453,11 +611,11 @@ class OlapTable(TypedMooseResource, Generic[T]):
453
611
  return data, []
454
612
 
455
613
  def _handle_validation_errors(
456
- self,
457
- validation_errors: List[ValidationError],
458
- strategy: str,
459
- data: List[T],
460
- options: Optional[InsertOptions]
614
+ self,
615
+ validation_errors: List[ValidationError],
616
+ strategy: str,
617
+ data: List[T],
618
+ options: Optional[InsertOptions],
461
619
  ) -> None:
462
620
  """Handle validation errors based on the specified strategy.
463
621
 
@@ -473,17 +631,13 @@ class OlapTable(TypedMooseResource, Generic[T]):
473
631
  f"Validation failed for record at index {first_error.index}: {first_error.error}"
474
632
  )
475
633
  elif strategy == "discard":
476
- self._check_validation_thresholds(
477
- validation_errors,
478
- len(data),
479
- options
480
- )
634
+ self._check_validation_thresholds(validation_errors, len(data), options)
481
635
 
482
636
  def _check_validation_thresholds(
483
- self,
484
- validation_errors: List[ValidationError],
485
- total_records: int,
486
- options: Optional[InsertOptions]
637
+ self,
638
+ validation_errors: List[ValidationError],
639
+ total_records: int,
640
+ options: Optional[InsertOptions],
487
641
  ) -> None:
488
642
  """Check if validation errors exceed configured thresholds.
489
643
 
@@ -495,15 +649,21 @@ class OlapTable(TypedMooseResource, Generic[T]):
495
649
  validation_failed_count = len(validation_errors)
496
650
  validation_failed_ratio = validation_failed_count / total_records
497
651
 
498
- if (options and options.allow_errors is not None and
499
- validation_failed_count > options.allow_errors):
652
+ if (
653
+ options
654
+ and options.allow_errors is not None
655
+ and validation_failed_count > options.allow_errors
656
+ ):
500
657
  raise ValueError(
501
658
  f"Too many validation failures: {validation_failed_count} > {options.allow_errors}. "
502
659
  f"Errors: {', '.join(e.error for e in validation_errors)}"
503
660
  )
504
661
 
505
- if (options and options.allow_errors_ratio is not None and
506
- validation_failed_ratio > options.allow_errors_ratio):
662
+ if (
663
+ options
664
+ and options.allow_errors_ratio is not None
665
+ and validation_failed_ratio > options.allow_errors_ratio
666
+ ):
507
667
  raise ValueError(
508
668
  f"Validation failure ratio too high: {validation_failed_ratio:.3f} > "
509
669
  f"{options.allow_errors_ratio}. Errors: {', '.join(e.error for e in validation_errors)}"
@@ -514,36 +674,44 @@ class OlapTable(TypedMooseResource, Generic[T]):
514
674
 
515
675
  def _with_wait_end_settings(self, settings: dict) -> dict:
516
676
  """Add wait_end_of_query setting to ensure at least once delivery for INSERT operations.
517
-
677
+
518
678
  Args:
519
679
  settings: Base settings dictionary
520
-
680
+
521
681
  Returns:
522
682
  Settings dictionary with wait_end_of_query added
523
683
  """
524
684
  return {**settings, "wait_end_of_query": 1}
525
685
 
526
686
  def _prepare_insert_options(
527
- self,
528
- table_name: str,
529
- data: Union[List[T], Iterator[T]],
530
- validated_data: List[T],
531
- is_stream: bool,
532
- strategy: str,
533
- options: Optional[InsertOptions]
687
+ self,
688
+ table_name: str,
689
+ data: Union[List[T], Iterator[T]],
690
+ validated_data: List[T],
691
+ is_stream: bool,
692
+ strategy: str,
693
+ options: Optional[InsertOptions],
534
694
  ) -> tuple[str, bytes, dict]:
535
695
  """Prepare insert options for JSONEachRow raw SQL insert, returning settings dict."""
536
696
  # Base settings for all inserts
537
697
  base_settings = {
538
698
  "date_time_input_format": "best_effort",
539
- "max_insert_block_size": 100000 if is_stream else min(len(validated_data), 100000),
699
+ "max_insert_block_size": (
700
+ 100000 if is_stream else min(len(validated_data), 100000)
701
+ ),
540
702
  "max_block_size": 65536,
541
703
  "async_insert": 1 if len(validated_data) > 1000 else 0,
542
704
  "wait_for_async_insert": 1,
543
705
  }
544
706
  settings = self._with_wait_end_settings(base_settings)
545
- if (strategy == "discard" and options and
546
- (options.allow_errors is not None or options.allow_errors_ratio is not None)):
707
+ if (
708
+ strategy == "discard"
709
+ and options
710
+ and (
711
+ options.allow_errors is not None
712
+ or options.allow_errors_ratio is not None
713
+ )
714
+ ):
547
715
  if options.allow_errors is not None:
548
716
  settings["input_format_allow_errors_num"] = options.allow_errors
549
717
  if options.allow_errors_ratio is not None:
@@ -556,7 +724,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
556
724
  validated_data = [validated_data]
557
725
  dict_data = []
558
726
  for record in validated_data:
559
- if hasattr(record, 'model_dump'):
727
+ if hasattr(record, "model_dump"):
560
728
  record_dict = record.model_dump()
561
729
  else:
562
730
  record_dict = record
@@ -568,13 +736,13 @@ class OlapTable(TypedMooseResource, Generic[T]):
568
736
  return quote_identifier(table_name), json_lines, settings
569
737
 
570
738
  def _create_success_result(
571
- self,
572
- data: Union[List[T], Iterator[T]],
573
- validated_data: List[T],
574
- validation_errors: List[ValidationError],
575
- is_stream: bool,
576
- should_validate: bool,
577
- strategy: str
739
+ self,
740
+ data: Union[List[T], Iterator[T]],
741
+ validated_data: List[T],
742
+ validation_errors: List[ValidationError],
743
+ is_stream: bool,
744
+ should_validate: bool,
745
+ strategy: str,
578
746
  ) -> InsertResult[T]:
579
747
  """Create appropriate result based on input type.
580
748
 
@@ -590,11 +758,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
590
758
  InsertResult with appropriate counts and error information.
591
759
  """
592
760
  if is_stream:
593
- return InsertResult(
594
- successful=-1,
595
- failed=0,
596
- total=-1
597
- )
761
+ return InsertResult(successful=-1, failed=0, total=-1)
598
762
 
599
763
  inserted_count = len(validated_data)
600
764
  total_processed = len(data) if not is_stream else inserted_count
@@ -602,32 +766,30 @@ class OlapTable(TypedMooseResource, Generic[T]):
602
766
  result = InsertResult(
603
767
  successful=inserted_count,
604
768
  failed=len(validation_errors) if should_validate else 0,
605
- total=total_processed
769
+ total=total_processed,
606
770
  )
607
771
 
608
- if (should_validate and validation_errors and strategy == "discard"):
772
+ if should_validate and validation_errors and strategy == "discard":
609
773
  result.failed_records = [
610
774
  FailedRecord(
611
775
  record=ve.record,
612
776
  error=f"Validation error: {ve.error}",
613
- index=ve.index
614
- ) for ve in validation_errors
777
+ index=ve.index,
778
+ )
779
+ for ve in validation_errors
615
780
  ]
616
781
 
617
782
  return result
618
783
 
619
784
  def _retry_individual_records(
620
- self,
621
- client: Client,
622
- records: List[T],
623
- options: InsertOptions
785
+ self, client: Client, records: List[T], options: InsertOptions
624
786
  ) -> InsertResult[T]:
625
787
  successful: List[T] = []
626
788
  failed: List[FailedRecord[T]] = []
627
789
  table_name = quote_identifier(self._generate_table_name())
628
790
  records_dict = []
629
791
  for record in records:
630
- if hasattr(record, 'model_dump'):
792
+ if hasattr(record, "model_dump"):
631
793
  record_dict = record.model_dump()
632
794
  else:
633
795
  record_dict = record
@@ -636,51 +798,52 @@ class OlapTable(TypedMooseResource, Generic[T]):
636
798
 
637
799
  RETRY_BATCH_SIZE = 10
638
800
  for i in range(0, len(records_dict), RETRY_BATCH_SIZE):
639
- batch = records_dict[i:i + RETRY_BATCH_SIZE]
801
+ batch = records_dict[i : i + RETRY_BATCH_SIZE]
640
802
  try:
641
803
  sql = f"INSERT INTO {table_name} FORMAT JSONEachRow"
642
804
  base_settings = {
643
805
  "date_time_input_format": "best_effort",
644
806
  "max_insert_block_size": RETRY_BATCH_SIZE,
645
807
  "max_block_size": RETRY_BATCH_SIZE,
646
- "async_insert": 0
808
+ "async_insert": 0,
647
809
  }
648
810
  settings = self._with_wait_end_settings(base_settings)
649
811
  json_lines = self._to_json_each_row(batch)
650
812
  client.command(sql, data=json_lines, settings=settings)
651
- successful.extend(records[i:i + RETRY_BATCH_SIZE])
813
+ successful.extend(records[i : i + RETRY_BATCH_SIZE])
652
814
  except ClickHouseError as batch_error:
653
815
  for j, record_dict in enumerate(batch):
654
816
  try:
655
817
  sql = f"INSERT INTO {table_name} FORMAT JSONEachRow"
656
- individual_settings = self._with_wait_end_settings({
657
- "date_time_input_format": "best_effort",
658
- "async_insert": 0
659
- })
818
+ individual_settings = self._with_wait_end_settings(
819
+ {"date_time_input_format": "best_effort", "async_insert": 0}
820
+ )
660
821
  json_line = self._to_json_each_row([record_dict])
661
- client.command(sql, data=json_line, settings=individual_settings)
822
+ client.command(
823
+ sql, data=json_line, settings=individual_settings
824
+ )
662
825
  successful.append(records[i + j])
663
826
  except ClickHouseError as error:
664
- failed.append(FailedRecord(
665
- record=records[i + j],
666
- error=str(error),
667
- index=i + j
668
- ))
827
+ failed.append(
828
+ FailedRecord(
829
+ record=records[i + j], error=str(error), index=i + j
830
+ )
831
+ )
669
832
  return InsertResult(
670
833
  successful=len(successful),
671
834
  failed=len(failed),
672
835
  total=len(records),
673
- failed_records=failed if failed else None
836
+ failed_records=failed if failed else None,
674
837
  )
675
838
 
676
839
  def _insert_array_data(
677
- self,
678
- client: Client,
679
- table_name: str,
680
- data: List[T],
681
- should_validate: bool,
682
- strategy: str,
683
- options: Optional[InsertOptions]
840
+ self,
841
+ client: Client,
842
+ table_name: str,
843
+ data: List[T],
844
+ should_validate: bool,
845
+ strategy: str,
846
+ options: Optional[InsertOptions],
684
847
  ) -> InsertResult[T]:
685
848
  """Insert array data into the table with validation and error handling.
686
849
 
@@ -696,19 +859,11 @@ class OlapTable(TypedMooseResource, Generic[T]):
696
859
  InsertResult with detailed success/failure information.
697
860
  """
698
861
  validated_data, validation_errors = self._perform_pre_insertion_validation(
699
- data,
700
- should_validate,
701
- strategy,
702
- options
862
+ data, should_validate, strategy, options
703
863
  )
704
864
  try:
705
865
  table_name, json_lines, settings = self._prepare_insert_options(
706
- table_name,
707
- data,
708
- validated_data,
709
- False,
710
- strategy,
711
- options
866
+ table_name, data, validated_data, False, strategy, options
712
867
  )
713
868
  sql = f"INSERT INTO {table_name} FORMAT JSONEachRow"
714
869
  client.command(sql, data=json_lines, settings=settings)
@@ -718,7 +873,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
718
873
  validation_errors,
719
874
  False,
720
875
  should_validate,
721
- strategy
876
+ strategy,
722
877
  )
723
878
  except ClickHouseError as e:
724
879
  if strategy == "fail-fast":
@@ -729,16 +884,16 @@ class OlapTable(TypedMooseResource, Generic[T]):
729
884
  return self._retry_individual_records(
730
885
  client,
731
886
  validated_data if not options.skip_validation_on_retry else data,
732
- options
887
+ options,
733
888
  )
734
889
 
735
890
  def _insert_stream(
736
- self,
737
- client: Client,
738
- table_name: str,
739
- data: Iterator[T],
740
- strategy: str,
741
- options: Optional[InsertOptions]
891
+ self,
892
+ client: Client,
893
+ table_name: str,
894
+ data: Iterator[T],
895
+ strategy: str,
896
+ options: Optional[InsertOptions],
742
897
  ) -> InsertResult[T]:
743
898
  """Insert data from an iterator into the table.
744
899
 
@@ -756,17 +911,12 @@ class OlapTable(TypedMooseResource, Generic[T]):
756
911
  total_inserted = 0
757
912
 
758
913
  _, _, settings = self._prepare_insert_options(
759
- table_name,
760
- data,
761
- [],
762
- True,
763
- strategy,
764
- options
914
+ table_name, data, [], True, strategy, options
765
915
  )
766
916
 
767
917
  for record in data:
768
918
  # Convert record to dict using model_dump if available
769
- if hasattr(record, 'model_dump'):
919
+ if hasattr(record, "model_dump"):
770
920
  batch.append(record.model_dump())
771
921
  else:
772
922
  batch.append(record)
@@ -791,9 +941,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
791
941
  total_inserted += len(batch)
792
942
 
793
943
  return InsertResult(
794
- successful=total_inserted,
795
- failed=0,
796
- total=total_inserted
944
+ successful=total_inserted, failed=0, total=total_inserted
797
945
  )
798
946
  except ClickHouseError as e:
799
947
  if strategy == "fail-fast":
@@ -801,9 +949,7 @@ class OlapTable(TypedMooseResource, Generic[T]):
801
949
  raise ValueError(f"Too many errors during stream insert: {e}")
802
950
 
803
951
  def insert(
804
- self,
805
- data: Union[List[T], Iterator[T]],
806
- options: Optional[InsertOptions] = None
952
+ self, data: Union[List[T], Iterator[T]], options: Optional[InsertOptions] = None
807
953
  ) -> InsertResult[T]:
808
954
  """Insert data into the table with validation and error handling.
809
955
 
@@ -855,7 +1001,9 @@ class OlapTable(TypedMooseResource, Generic[T]):
855
1001
  ```
856
1002
  """
857
1003
  options = options or InsertOptions()
858
- is_stream, strategy, should_validate = self._validate_insert_parameters(data, options)
1004
+ is_stream, strategy, should_validate = self._validate_insert_parameters(
1005
+ data, options
1006
+ )
859
1007
  if (is_stream and not data) or (not is_stream and not data):
860
1008
  return InsertResult(successful=0, failed=0, total=0)
861
1009
 
@@ -866,15 +1014,12 @@ class OlapTable(TypedMooseResource, Generic[T]):
866
1014
  return self._insert_stream(client, table_name, data, strategy, options)
867
1015
  else:
868
1016
  return self._insert_array_data(
869
- client,
870
- table_name,
871
- data,
872
- should_validate,
873
- strategy,
874
- options
1017
+ client, table_name, data, should_validate, strategy, options
875
1018
  )
876
1019
 
877
- def _map_to_clickhouse_record(self, record: dict, columns: Optional[List[Column]] = None) -> dict:
1020
+ def _map_to_clickhouse_record(
1021
+ self, record: dict, columns: Optional[List[Column]] = None
1022
+ ) -> dict:
878
1023
  """
879
1024
  Recursively transforms a record to match ClickHouse's JSONEachRow requirements.
880
1025
 
@@ -903,8 +1048,9 @@ class OlapTable(TypedMooseResource, Generic[T]):
903
1048
 
904
1049
  if is_array_nested_type(data_type):
905
1050
  # For Array(Nested(...)), wrap each item in its own array and recurse
906
- if (isinstance(value, list) and
907
- (len(value) == 0 or isinstance(value[0], dict))):
1051
+ if isinstance(value, list) and (
1052
+ len(value) == 0 or isinstance(value[0], dict)
1053
+ ):
908
1054
  nested_columns = data_type.element_type.columns
909
1055
  result[col.name] = [
910
1056
  [self._map_to_clickhouse_record(item, nested_columns)]
@@ -913,7 +1059,9 @@ class OlapTable(TypedMooseResource, Generic[T]):
913
1059
  elif is_nested_type(data_type):
914
1060
  # For Nested struct (not array), recurse into it
915
1061
  if value and isinstance(value, dict):
916
- result[col.name] = self._map_to_clickhouse_record(value, data_type.columns)
1062
+ result[col.name] = self._map_to_clickhouse_record(
1063
+ value, data_type.columns
1064
+ )
917
1065
  # All other types: leave as is
918
1066
 
919
1067
  return result