moose-lib 0.4.223__py3-none-any.whl → 0.4.225__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@ from .types import (
18
18
  from .olap_table import (
19
19
  OlapConfig,
20
20
  OlapTable,
21
+ InsertOptions,
21
22
  )
22
23
 
23
24
  from .stream import (
@@ -94,6 +95,7 @@ __all__ = [
94
95
  # OLAP Tables
95
96
  'OlapConfig',
96
97
  'OlapTable',
98
+ 'InsertOptions',
97
99
 
98
100
  # Streams
99
101
  'StreamConfig',
@@ -4,13 +4,91 @@ OLAP table definitions for Moose Data Model v2 (dmv2).
4
4
  This module provides classes for defining and configuring OLAP tables,
5
5
  particularly for ClickHouse.
6
6
  """
7
- from typing import Optional, Dict, Any, Generic
7
+ import json
8
+ from clickhouse_connect import get_client
9
+ from clickhouse_connect.driver.client import Client
10
+ from clickhouse_connect.driver.exceptions import ClickHouseError
11
+ from dataclasses import dataclass
8
12
  from pydantic import BaseModel
9
-
13
+ from typing import List, Optional, Any, Literal, Union, Tuple, TypeVar, Generic, Iterator
10
14
  from moose_lib import ClickHouseEngines
15
+ from ..config.runtime import RuntimeClickHouseConfig
11
16
  from .types import TypedMooseResource, T
12
17
  from ._registry import _tables
13
18
 
19
+ @dataclass
20
+ class InsertOptions:
21
+ """Options for insert operations.
22
+
23
+ Attributes:
24
+ allow_errors: Maximum number of bad records to tolerate before failing.
25
+ allow_errors_ratio: Maximum ratio of bad records to tolerate (0.0 to 1.0).
26
+ strategy: Error handling strategy ("fail-fast", "discard", or "isolate").
27
+ validate: Whether to validate data against schema before insertion.
28
+ skip_validation_on_retry: Whether to skip validation for individual records during retries.
29
+ """
30
+ allow_errors: Optional[int] = None
31
+ allow_errors_ratio: Optional[float] = None
32
+ strategy: Literal["fail-fast", "discard", "isolate"] = "fail-fast"
33
+ validate: bool = True
34
+ skip_validation_on_retry: bool = False
35
+
36
+ @dataclass
37
+ class FailedRecord(Generic[T]):
38
+ """Represents a failed record during insertion with error details.
39
+
40
+ Attributes:
41
+ record: The original record that failed to insert.
42
+ error: The error message describing why the insertion failed.
43
+ index: Optional index of this record in the original batch.
44
+ """
45
+ record: T
46
+ error: str
47
+ index: Optional[int] = None
48
+
49
+ @dataclass
50
+ class ValidationError:
51
+ """Validation error for a record with detailed error information.
52
+
53
+ Attributes:
54
+ record: The original record that failed validation.
55
+ error: Detailed validation error message.
56
+ index: Optional index of this record in the original batch.
57
+ path: Optional path to the field that failed validation.
58
+ """
59
+ record: Any
60
+ error: str
61
+ index: Optional[int] = None
62
+ path: Optional[str] = None
63
+
64
+ @dataclass
65
+ class ValidationResult(Generic[T]):
66
+ """Result of data validation with success/failure breakdown.
67
+
68
+ Attributes:
69
+ valid: Records that passed validation.
70
+ invalid: Records that failed validation with detailed error information.
71
+ total: Total number of records processed.
72
+ """
73
+ valid: List[T]
74
+ invalid: List[ValidationError]
75
+ total: int
76
+
77
+ @dataclass
78
+ class InsertResult(Generic[T]):
79
+ """Result of an insert operation with detailed success/failure information.
80
+
81
+ Attributes:
82
+ successful: Number of records successfully inserted.
83
+ failed: Number of records that failed to insert.
84
+ total: Total number of records processed.
85
+ failed_records: Detailed information about failed records (if record isolation was used).
86
+ """
87
+ successful: int
88
+ failed: int
89
+ total: int
90
+ failed_records: Optional[List[FailedRecord[T]]] = None
91
+
14
92
  class OlapConfig(BaseModel):
15
93
  """Configuration for OLAP tables (e.g., ClickHouse tables).
16
94
 
@@ -48,10 +126,640 @@ class OlapTable(TypedMooseResource, Generic[T]):
48
126
  """
49
127
  config: OlapConfig
50
128
  kind: str = "OlapTable"
129
+ _memoized_client: Optional[Client] = None
130
+ _config_hash: Optional[str] = None
131
+ _cached_table_name: Optional[str] = None
51
132
 
52
133
  def __init__(self, name: str, config: OlapConfig = OlapConfig(), **kwargs):
53
134
  super().__init__()
54
135
  self._set_type(name, self._get_type(kwargs))
55
136
  self.config = config
56
137
  self.metadata = config.metadata
57
- _tables[name] = self
138
+ _tables[name] = self
139
+
140
+ def _generate_table_name(self) -> str:
141
+ """Generate the versioned table name following Moose's naming convention.
142
+
143
+ Format: {tableName}_{version_with_dots_replaced_by_underscores}
144
+
145
+ Returns:
146
+ The versioned table name.
147
+ """
148
+ if self._cached_table_name:
149
+ return self._cached_table_name
150
+
151
+ table_version = self.config.version
152
+ if not table_version:
153
+ self._cached_table_name = self.name
154
+ else:
155
+ version_suffix = table_version.replace(".", "_")
156
+ self._cached_table_name = f"{self.name}_{version_suffix}"
157
+
158
+ return self._cached_table_name
159
+
160
+ def _create_config_hash(self, clickhouse_config: RuntimeClickHouseConfig) -> str:
161
+ """Create a fast hash of the ClickHouse configuration.
162
+
163
+ Args:
164
+ clickhouse_config: The ClickHouse configuration to hash.
165
+
166
+ Returns:
167
+ A 16-character hex hash of the configuration.
168
+ """
169
+ import hashlib
170
+ config_string = (
171
+ f"{clickhouse_config.host}:{clickhouse_config.port}:"
172
+ f"{clickhouse_config.username}:{clickhouse_config.password}:"
173
+ f"{clickhouse_config.database}:{clickhouse_config.use_ssl}"
174
+ )
175
+ return hashlib.sha256(config_string.encode()).hexdigest()[:16]
176
+
177
+ def _get_memoized_client(self) -> Client:
178
+ """Get or create a memoized ClickHouse client.
179
+
180
+ The client is cached and reused across multiple insert calls for better performance.
181
+ If the configuration changes, a new client will be created.
182
+
183
+ Returns:
184
+ A ClickHouse client instance.
185
+ """
186
+ from ..config.runtime import config_registry
187
+
188
+ # Get configuration from registry (with fallback to file)
189
+ clickhouse_config = config_registry.get_clickhouse_config()
190
+
191
+ # Create a fast hash of the current configuration to detect changes
192
+ current_config_hash = self._create_config_hash(clickhouse_config)
193
+
194
+ # If we have a cached client and the config hasn't changed, reuse it
195
+ if self._memoized_client and self._config_hash == current_config_hash:
196
+ return self._memoized_client
197
+
198
+ # Close existing client if config changed
199
+ if self._memoized_client and self._config_hash != current_config_hash:
200
+ try:
201
+ self._memoized_client.close()
202
+ except Exception:
203
+ # Ignore errors when closing old client
204
+ pass
205
+
206
+ try:
207
+ # Create new client with standard configuration
208
+ interface = 'https' if clickhouse_config.use_ssl else 'http'
209
+ client = get_client(
210
+ interface=interface,
211
+ host=clickhouse_config.host,
212
+ port=int(clickhouse_config.port),
213
+ username=clickhouse_config.username,
214
+ password=clickhouse_config.password,
215
+ database=clickhouse_config.database,
216
+ )
217
+
218
+ # Cache the new client and config hash
219
+ self._memoized_client = client
220
+ self._config_hash = current_config_hash
221
+
222
+ return client
223
+ except Exception as e:
224
+ raise RuntimeError(f"Failed to create ClickHouse client: {e}")
225
+
226
+ def close_client(self) -> None:
227
+ """Close the memoized ClickHouse client if it exists.
228
+
229
+ This is useful for cleaning up connections when the table instance is no longer needed.
230
+ The client will be automatically recreated on the next insert call if needed.
231
+ """
232
+ if self._memoized_client:
233
+ try:
234
+ self._memoized_client.close()
235
+ except Exception:
236
+ # Ignore errors when closing
237
+ pass
238
+ finally:
239
+ self._memoized_client = None
240
+ self._config_hash = None
241
+
242
+ def validate_record(self, record: Any) -> Tuple[Optional[T], Optional[str]]:
243
+ """Validate a single record using Pydantic validation.
244
+
245
+ Args:
246
+ record: The record to validate.
247
+
248
+ Returns:
249
+ Tuple of (validated_data, error_message). If validation succeeds,
250
+ validated_data will be the validated record and error_message will be None.
251
+ If validation fails for any reason, validated_data will be None and error_message
252
+ will contain the error details.
253
+ """
254
+ try:
255
+ validated = self._t.model_validate(record)
256
+ return validated, None
257
+ except Exception as e:
258
+ return None, str(e)
259
+
260
+ def validate_records(self, data: List[Any]) -> ValidationResult[T]:
261
+ """Validate an array of records with comprehensive error reporting.
262
+
263
+ Args:
264
+ data: Array of records to validate.
265
+
266
+ Returns:
267
+ ValidationResult containing valid and invalid records.
268
+ """
269
+ valid: List[T] = []
270
+ invalid: List[ValidationError] = []
271
+
272
+ for i, record in enumerate(data):
273
+ validated, error = self.validate_record(record)
274
+ if validated is not None:
275
+ valid.append(validated)
276
+ else:
277
+ invalid.append(ValidationError(
278
+ record=record,
279
+ error=error or "Validation failed",
280
+ index=i,
281
+ path="root"
282
+ ))
283
+
284
+ return ValidationResult(
285
+ valid=valid,
286
+ invalid=invalid,
287
+ total=len(data)
288
+ )
289
+
290
+ def _validate_insert_parameters(
291
+ self,
292
+ data: Union[List[T], Iterator[T]],
293
+ options: Optional[InsertOptions]
294
+ ) -> Tuple[bool, str, bool]:
295
+ """Validate input parameters and strategy compatibility.
296
+
297
+ Args:
298
+ data: The data to insert (array or iterator).
299
+ options: Optional insert options.
300
+
301
+ Returns:
302
+ Tuple of (is_stream, strategy, should_validate).
303
+ """
304
+ is_stream = not isinstance(data, list)
305
+ strategy = options.strategy if options else "fail-fast"
306
+ should_validate = options.validate if options else True
307
+
308
+ if is_stream and strategy == "isolate":
309
+ raise ValueError(
310
+ "The 'isolate' error strategy is not supported with stream input. "
311
+ "Use 'fail-fast' or 'discard' instead."
312
+ )
313
+
314
+ if is_stream and should_validate:
315
+ print("Warning: Validation is not supported with stream input. Validation will be skipped.")
316
+
317
+ return is_stream, strategy, should_validate
318
+
319
+ def _perform_pre_insertion_validation(
320
+ self,
321
+ data: List[T],
322
+ should_validate: bool,
323
+ strategy: str,
324
+ options: Optional[InsertOptions] = None
325
+ ) -> Tuple[List[T], List[ValidationError]]:
326
+ """Perform pre-insertion validation for array data.
327
+
328
+ Args:
329
+ data: The data to validate.
330
+ should_validate: Whether to perform validation.
331
+ strategy: The error handling strategy.
332
+ options: Optional insert options.
333
+
334
+ Returns:
335
+ Tuple of (validated_data, validation_errors).
336
+ """
337
+ if not should_validate:
338
+ return data, []
339
+
340
+ try:
341
+ validation_result = self.validate_records(data)
342
+ validated_data = validation_result.valid
343
+ validation_errors = validation_result.invalid
344
+
345
+ if validation_errors:
346
+ self._handle_validation_errors(
347
+ validation_errors,
348
+ strategy,
349
+ data,
350
+ options
351
+ )
352
+
353
+ if strategy == "discard":
354
+ return validated_data, validation_errors
355
+ elif strategy == "isolate":
356
+ return data, validation_errors
357
+ else: # fail-fast
358
+ return validated_data, validation_errors
359
+
360
+ return validated_data, validation_errors
361
+
362
+ except Exception as validation_error:
363
+ if strategy == "fail-fast":
364
+ raise ValueError(f"Validation failed: {validation_error}")
365
+ print(f"Validation error: {validation_error}")
366
+ return data, []
367
+
368
+ def _handle_validation_errors(
369
+ self,
370
+ validation_errors: List[ValidationError],
371
+ strategy: str,
372
+ data: List[T],
373
+ options: Optional[InsertOptions]
374
+ ) -> None:
375
+ """Handle validation errors based on the specified strategy.
376
+
377
+ Args:
378
+ validation_errors: List of validation errors.
379
+ strategy: The error handling strategy.
380
+ data: The original data.
381
+ options: Optional insert options.
382
+ """
383
+ if strategy == "fail-fast":
384
+ first_error = validation_errors[0]
385
+ raise ValueError(
386
+ f"Validation failed for record at index {first_error.index}: {first_error.error}"
387
+ )
388
+ elif strategy == "discard":
389
+ self._check_validation_thresholds(
390
+ validation_errors,
391
+ len(data),
392
+ options
393
+ )
394
+
395
+ def _check_validation_thresholds(
396
+ self,
397
+ validation_errors: List[ValidationError],
398
+ total_records: int,
399
+ options: Optional[InsertOptions]
400
+ ) -> None:
401
+ """Check if validation errors exceed configured thresholds.
402
+
403
+ Args:
404
+ validation_errors: List of validation errors.
405
+ total_records: Total number of records processed.
406
+ options: Optional insert options.
407
+ """
408
+ validation_failed_count = len(validation_errors)
409
+ validation_failed_ratio = validation_failed_count / total_records
410
+
411
+ if (options and options.allow_errors is not None and
412
+ validation_failed_count > options.allow_errors):
413
+ raise ValueError(
414
+ f"Too many validation failures: {validation_failed_count} > {options.allow_errors}. "
415
+ f"Errors: {', '.join(e.error for e in validation_errors)}"
416
+ )
417
+
418
+ if (options and options.allow_errors_ratio is not None and
419
+ validation_failed_ratio > options.allow_errors_ratio):
420
+ raise ValueError(
421
+ f"Validation failure ratio too high: {validation_failed_ratio:.3f} > "
422
+ f"{options.allow_errors_ratio}. Errors: {', '.join(e.error for e in validation_errors)}"
423
+ )
424
+
425
+ def _to_json_each_row(self, records: list[dict]) -> bytes:
426
+ return "\n".join(json.dumps(r, default=str) for r in records).encode("utf-8")
427
+
428
+ def _prepare_insert_options(
429
+ self,
430
+ table_name: str,
431
+ data: Union[List[T], Iterator[T]],
432
+ validated_data: List[T],
433
+ is_stream: bool,
434
+ strategy: str,
435
+ options: Optional[InsertOptions]
436
+ ) -> tuple[str, bytes, dict]:
437
+ """Prepare insert options for JSONEachRow raw SQL insert, returning settings dict."""
438
+ # Base settings for all inserts
439
+ settings = {
440
+ "date_time_input_format": "best_effort",
441
+ "max_insert_block_size": 100000 if is_stream else min(len(validated_data), 100000),
442
+ "max_block_size": 65536,
443
+ "async_insert": 1 if len(validated_data) > 1000 else 0,
444
+ "wait_for_async_insert": 1,
445
+ }
446
+ if (strategy == "discard" and options and
447
+ (options.allow_errors is not None or options.allow_errors_ratio is not None)):
448
+ if options.allow_errors is not None:
449
+ settings["input_format_allow_errors_num"] = options.allow_errors
450
+ if options.allow_errors_ratio is not None:
451
+ settings["input_format_allow_errors_ratio"] = options.allow_errors_ratio
452
+
453
+ if is_stream:
454
+ return table_name, data, settings
455
+
456
+ if not isinstance(validated_data, list):
457
+ validated_data = [validated_data]
458
+ dict_data = []
459
+ for record in validated_data:
460
+ if hasattr(record, 'model_dump'):
461
+ dict_data.append(record.model_dump())
462
+ else:
463
+ dict_data.append(record)
464
+ if not dict_data:
465
+ return table_name, b"", settings
466
+ json_lines = self._to_json_each_row(dict_data)
467
+ return table_name, json_lines, settings
468
+
469
+ def _create_success_result(
470
+ self,
471
+ data: Union[List[T], Iterator[T]],
472
+ validated_data: List[T],
473
+ validation_errors: List[ValidationError],
474
+ is_stream: bool,
475
+ should_validate: bool,
476
+ strategy: str
477
+ ) -> InsertResult[T]:
478
+ """Create appropriate result based on input type.
479
+
480
+ Args:
481
+ data: The original data (array or stream).
482
+ validated_data: Validated data for array input.
483
+ validation_errors: List of validation errors.
484
+ is_stream: Whether the input is a stream.
485
+ should_validate: Whether validation was performed.
486
+ strategy: The error handling strategy.
487
+
488
+ Returns:
489
+ InsertResult with appropriate counts and error information.
490
+ """
491
+ if is_stream:
492
+ return InsertResult(
493
+ successful=-1,
494
+ failed=0,
495
+ total=-1
496
+ )
497
+
498
+ inserted_count = len(validated_data)
499
+ total_processed = len(data) if not is_stream else inserted_count
500
+
501
+ result = InsertResult(
502
+ successful=inserted_count,
503
+ failed=len(validation_errors) if should_validate else 0,
504
+ total=total_processed
505
+ )
506
+
507
+ if (should_validate and validation_errors and strategy == "discard"):
508
+ result.failed_records = [
509
+ FailedRecord(
510
+ record=ve.record,
511
+ error=f"Validation error: {ve.error}",
512
+ index=ve.index
513
+ ) for ve in validation_errors
514
+ ]
515
+
516
+ return result
517
+
518
+ def _retry_individual_records(
519
+ self,
520
+ client: Client,
521
+ records: List[T],
522
+ options: InsertOptions
523
+ ) -> InsertResult[T]:
524
+ successful: List[T] = []
525
+ failed: List[FailedRecord[T]] = []
526
+ table_name = self._generate_table_name()
527
+ records_dict = []
528
+ for record in records:
529
+ if hasattr(record, 'model_dump'):
530
+ records_dict.append(record.model_dump())
531
+ else:
532
+ records_dict.append(record)
533
+
534
+ RETRY_BATCH_SIZE = 10
535
+ for i in range(0, len(records_dict), RETRY_BATCH_SIZE):
536
+ batch = records_dict[i:i + RETRY_BATCH_SIZE]
537
+ try:
538
+ sql = f"INSERT INTO {table_name} FORMAT JSONEachRow"
539
+ settings = {
540
+ "date_time_input_format": "best_effort",
541
+ "max_insert_block_size": RETRY_BATCH_SIZE,
542
+ "max_block_size": RETRY_BATCH_SIZE,
543
+ "async_insert": 0
544
+ }
545
+ json_lines = self._to_json_each_row(batch)
546
+ client.command(sql, data=json_lines, settings=settings)
547
+ successful.extend(records[i:i + RETRY_BATCH_SIZE])
548
+ except ClickHouseError as batch_error:
549
+ for j, record_dict in enumerate(batch):
550
+ try:
551
+ sql = f"INSERT INTO {table_name} FORMAT JSONEachRow"
552
+ settings = {
553
+ "date_time_input_format": "best_effort",
554
+ "async_insert": 0
555
+ }
556
+ json_line = self._to_json_each_row([record_dict])
557
+ client.command(sql, data=json_line, settings=settings)
558
+ successful.append(records[i + j])
559
+ except ClickHouseError as error:
560
+ failed.append(FailedRecord(
561
+ record=records[i + j],
562
+ error=str(error),
563
+ index=i + j
564
+ ))
565
+ return InsertResult(
566
+ successful=len(successful),
567
+ failed=len(failed),
568
+ total=len(records),
569
+ failed_records=failed if failed else None
570
+ )
571
+
572
+ def _insert_array_data(
573
+ self,
574
+ client: Client,
575
+ table_name: str,
576
+ data: List[T],
577
+ should_validate: bool,
578
+ strategy: str,
579
+ options: Optional[InsertOptions]
580
+ ) -> InsertResult[T]:
581
+ """Insert array data into the table with validation and error handling.
582
+
583
+ Args:
584
+ client: The ClickHouse client to use.
585
+ table_name: The name of the table to insert into.
586
+ data: The original data array.
587
+ should_validate: Whether validation was performed.
588
+ strategy: The error handling strategy.
589
+ options: Optional insert options.
590
+
591
+ Returns:
592
+ InsertResult with detailed success/failure information.
593
+ """
594
+ validated_data, validation_errors = self._perform_pre_insertion_validation(
595
+ data,
596
+ should_validate,
597
+ strategy,
598
+ options
599
+ )
600
+ try:
601
+ table_name, json_lines, settings = self._prepare_insert_options(
602
+ table_name,
603
+ data,
604
+ validated_data,
605
+ False,
606
+ strategy,
607
+ options
608
+ )
609
+ sql = f"INSERT INTO {table_name} FORMAT JSONEachRow"
610
+ client.command(sql, data=json_lines, settings=settings)
611
+ return self._create_success_result(
612
+ data,
613
+ validated_data,
614
+ validation_errors,
615
+ False,
616
+ should_validate,
617
+ strategy
618
+ )
619
+ except ClickHouseError as e:
620
+ if strategy == "fail-fast":
621
+ raise ValueError(f"Insert failed: {e}")
622
+ elif strategy == "discard":
623
+ raise ValueError(f"Too many errors during insert: {e}")
624
+ else: # isolate
625
+ return self._retry_individual_records(
626
+ client,
627
+ validated_data if not options.skip_validation_on_retry else data,
628
+ options
629
+ )
630
+
631
+ def _insert_stream(
632
+ self,
633
+ client: Client,
634
+ table_name: str,
635
+ data: Iterator[T],
636
+ strategy: str,
637
+ options: Optional[InsertOptions]
638
+ ) -> InsertResult[T]:
639
+ """Insert data from an iterator into the table.
640
+
641
+ Args:
642
+ client: The ClickHouse client to use.
643
+ table_name: The name of the table to insert into.
644
+ data: An iterator that yields objects to insert.
645
+ strategy: The error handling strategy.
646
+
647
+ Returns:
648
+ InsertResult with detailed success/failure information.
649
+ """
650
+ try:
651
+ batch = []
652
+ total_inserted = 0
653
+
654
+ _, _, settings = self._prepare_insert_options(
655
+ table_name,
656
+ data,
657
+ [],
658
+ True,
659
+ strategy,
660
+ options
661
+ )
662
+
663
+ for record in data:
664
+ # Convert record to dict using model_dump if available
665
+ if hasattr(record, 'model_dump'):
666
+ batch.append(record.model_dump())
667
+ else:
668
+ batch.append(record)
669
+
670
+ if len(batch) >= 1000: # Batch size
671
+ json_lines = self._to_json_each_row(batch)
672
+ sql = f"INSERT INTO {table_name} FORMAT JSONEachRow"
673
+ client.command(sql, data=json_lines, settings=settings)
674
+ total_inserted += len(batch)
675
+ batch = []
676
+
677
+ if batch: # Insert any remaining records
678
+ json_lines = self._to_json_each_row(batch)
679
+ sql = f"INSERT INTO {table_name} FORMAT JSONEachRow"
680
+ client.command(sql, data=json_lines, settings=settings)
681
+ total_inserted += len(batch)
682
+
683
+ return InsertResult(
684
+ successful=total_inserted,
685
+ failed=0,
686
+ total=total_inserted
687
+ )
688
+ except ClickHouseError as e:
689
+ if strategy == "fail-fast":
690
+ raise ValueError(f"Stream insert failed: {e}")
691
+ raise ValueError(f"Too many errors during stream insert: {e}")
692
+
693
+ def insert(
694
+ self,
695
+ data: Union[List[T], Iterator[T]],
696
+ options: Optional[InsertOptions] = None
697
+ ) -> InsertResult[T]:
698
+ """Insert data into the table with validation and error handling.
699
+
700
+ This method provides a typed interface for inserting data into the ClickHouse table,
701
+ with comprehensive validation and error handling strategies.
702
+
703
+ Args:
704
+ data: Either an array of objects conforming to the table schema, or an iterator
705
+ that yields objects to insert (e.g., a generator function).
706
+ options: Optional configuration for error handling, validation, and insertion behavior.
707
+
708
+ Returns:
709
+ InsertResult with detailed success/failure information.
710
+
711
+ Example:
712
+ ```python
713
+ # Create an OlapTable instance
714
+ user_table = OlapTable[User]('users')
715
+
716
+ # Insert with validation
717
+ result1 = user_table.insert([
718
+ {'id': 1, 'name': 'John', 'email': 'john@example.com'},
719
+ {'id': 2, 'name': 'Jane', 'email': 'jane@example.com'}
720
+ ])
721
+
722
+ # Insert with a generator (validation not available for streams)
723
+ def user_stream():
724
+ for i in range(10):
725
+ yield User(
726
+ id=i,
727
+ name=f'User {i}',
728
+ email=f'user{i}@example.com'
729
+ )
730
+
731
+ result2 = user_table.insert(user_stream(), options=InsertOptions(strategy='fail-fast'))
732
+
733
+ # Insert with validation disabled
734
+ result3 = user_table.insert(data, options=InsertOptions(validate=False))
735
+
736
+ # Insert with error handling strategies
737
+ result4 = user_table.insert(mixed_data, options=InsertOptions(
738
+ strategy='discard',
739
+ allow_errors_ratio=0.1,
740
+ validate=True
741
+ ))
742
+
743
+ # Optional: Clean up connection when done
744
+ user_table.close_client()
745
+ ```
746
+ """
747
+ options = options or InsertOptions()
748
+ is_stream, strategy, should_validate = self._validate_insert_parameters(data, options)
749
+ if (is_stream and not data) or (not is_stream and not data):
750
+ return InsertResult(successful=0, failed=0, total=0)
751
+
752
+ client = self._get_memoized_client()
753
+ table_name = self._generate_table_name()
754
+
755
+ if is_stream:
756
+ return self._insert_stream(client, table_name, data, strategy, options)
757
+ else:
758
+ return self._insert_array_data(
759
+ client,
760
+ table_name,
761
+ data,
762
+ should_validate,
763
+ strategy,
764
+ options
765
+ )
@@ -1,9 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moose_lib
3
- Version: 0.4.223
3
+ Version: 0.4.225
4
4
  Home-page: https://www.fiveonefour.com/moose
5
5
  Author: Fiveonefour Labs Inc.
6
6
  Author-email: support@fiveonefour.com
7
+ Requires-Python: >=3.12
7
8
  Description-Content-Type: text/markdown
8
9
  Requires-Dist: pyjwt[crypto]==2.9.0
9
10
  Requires-Dist: asyncio==3.4.3
@@ -12,12 +13,14 @@ Requires-Dist: temporalio==1.9.0
12
13
  Requires-Dist: kafka-python-ng==2.2.2
13
14
  Requires-Dist: redis==6.2.0
14
15
  Requires-Dist: humanfriendly==10.0
16
+ Requires-Dist: clickhouse_connect==0.7.16
15
17
  Dynamic: author
16
18
  Dynamic: author-email
17
19
  Dynamic: description
18
20
  Dynamic: description-content-type
19
21
  Dynamic: home-page
20
22
  Dynamic: requires-dist
23
+ Dynamic: requires-python
21
24
 
22
25
  # Python Moose Lib
23
26
 
@@ -9,13 +9,13 @@ moose_lib/query_param.py,sha256=AB5BKu610Ji-h1iYGMBZKfnEFqt85rS94kzhDwhWJnc,6288
9
9
  moose_lib/tasks.py,sha256=6MXA0j7nhvQILAJVTQHCAsquwrSOi2zAevghAc_7kXs,1554
10
10
  moose_lib/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  moose_lib/clients/redis_client.py,sha256=UBCdxwgZpIOIOy2EnPyxJIAYjw_qmNwGsJQCQ66SxUI,8117
12
- moose_lib/dmv2/__init__.py,sha256=Zooo_5MCPJbkP7kLecsH-xAkJexaHHkhwiG33SiSDfU,2229
12
+ moose_lib/dmv2/__init__.py,sha256=zyGK2YDN4zQ2SMn9-SAiOuhpU46STePuJjK18LEserg,2269
13
13
  moose_lib/dmv2/_registry.py,sha256=agdZ7xzS99Caou60Q2pEErzEwyNYHqwy6XqV79eEmwg,504
14
14
  moose_lib/dmv2/consumption.py,sha256=71wdv6ZuEi8Om7aX3Lq-d6bAoc1-iw3Wudb8dHESJKI,4072
15
15
  moose_lib/dmv2/ingest_api.py,sha256=Snek9NGwaJl_BuImSWGtQq91m9D3AJ4qBoGiKZ-9yTQ,2323
16
16
  moose_lib/dmv2/ingest_pipeline.py,sha256=Y1gsvHZjlW07gMapLnBRJEsoAPv7ThvLABoLmVV7BHE,6714
17
17
  moose_lib/dmv2/materialized_view.py,sha256=kcx-sJFTM-cH3Uc1GoldgFGodjoz0AegAQEMmohdS38,3826
18
- moose_lib/dmv2/olap_table.py,sha256=P-ycgkj68zyoH70osYQGSjy9c2AgdXjs7vlF4H9yqEU,2350
18
+ moose_lib/dmv2/olap_table.py,sha256=D3qpRGMnYF0gu5FRW8E5oDBqdWMCWLRv7fWv81DURsk,28378
19
19
  moose_lib/dmv2/registry.py,sha256=AaGS6Xy0vKz-wHLPgRVxfKfSwW5KksMePjZ8N7-2OKU,2054
20
20
  moose_lib/dmv2/sql_resource.py,sha256=kUZoGqxhZMHMthtBZGYJBxTFjXkspXiWLXhJRYXgGUM,1864
21
21
  moose_lib/dmv2/stream.py,sha256=H5nzqVHIXulFNMNaGZUQnhGjNx7fIg0X95kxAO_qlls,10600
@@ -28,7 +28,7 @@ tests/__init__.py,sha256=0Gh4yzPkkC3TzBGKhenpMIxJcRhyrrCfxLSfpTZnPMQ,53
28
28
  tests/conftest.py,sha256=ZVJNbnr4DwbcqkTmePW6U01zAzE6QD0kNAEZjPG1f4s,169
29
29
  tests/test_moose.py,sha256=mBsx_OYWmL8ppDzL_7Bd7xR6qf_i3-pCIO3wm2iQNaA,2136
30
30
  tests/test_redis_client.py,sha256=d9_MLYsJ4ecVil_jPB2gW3Q5aWnavxmmjZg2uYI3LVo,3256
31
- moose_lib-0.4.223.dist-info/METADATA,sha256=dpvo83we7TyfLit7q-ZpC-uyvTNnD3LpbBSjpskCvi8,638
32
- moose_lib-0.4.223.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
- moose_lib-0.4.223.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
34
- moose_lib-0.4.223.dist-info/RECORD,,
31
+ moose_lib-0.4.225.dist-info/METADATA,sha256=09ZolwkUTQivLSDh1AG7PQeqsrnjwkqGw4fLUnFPtsU,729
32
+ moose_lib-0.4.225.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
+ moose_lib-0.4.225.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
34
+ moose_lib-0.4.225.dist-info/RECORD,,