moose-lib 0.6.90__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +38 -3
  2. moose_lib/blocks.py +497 -37
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +94 -5
  5. moose_lib/config/config_file.py +44 -2
  6. moose_lib/config/runtime.py +137 -5
  7. moose_lib/data_models.py +451 -46
  8. moose_lib/dmv2/__init__.py +88 -60
  9. moose_lib/dmv2/_registry.py +3 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +56 -13
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +304 -119
  18. moose_lib/dmv2/registry.py +28 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +241 -21
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +175 -0
  24. moose_lib/dmv2/web_app_helpers.py +96 -0
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +537 -68
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +266 -156
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/METADATA +19 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +38 -1
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +210 -0
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +114 -0
  56. tests/test_web_app.py +227 -0
  57. moose_lib-0.6.90.dist-info/RECORD +0 -42
  58. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.90.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
@@ -31,4 +31,3 @@ def clickhouse_param_type_for_value(value: Any) -> str:
31
31
  if not isinstance(value, str):
32
32
  print(f"unhandled type {type(value)}", file=sys.stderr)
33
33
  return "String"
34
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moose_lib
3
- Version: 0.6.90
3
+ Version: 0.6.283
4
4
  Home-page: https://www.fiveonefour.com/moose
5
5
  Author: Fiveonefour Labs Inc.
6
6
  Author-email: support@fiveonefour.com
@@ -15,6 +15,7 @@ Requires-Dist: humanfriendly>=10.0
15
15
  Requires-Dist: clickhouse_connect>=0.7.16
16
16
  Requires-Dist: requests>=2.32.3
17
17
  Requires-Dist: sqlglot[rs]>=27.16.3
18
+ Requires-Dist: confluent-kafka[json,schemaregistry]>=2.11.1
18
19
  Dynamic: author
19
20
  Dynamic: author-email
20
21
  Dynamic: description
@@ -26,3 +27,20 @@ Dynamic: requires-python
26
27
  # Python Moose Lib
27
28
 
28
29
  Python package which contains moose utils
30
+
31
+ ## Column Autocomplete with MooseModel
32
+
33
+ For LSP autocomplete when working with columns, use `MooseModel` instead of `BaseModel`:
34
+
35
+ ```python
36
+ from moose_lib import MooseModel, OlapTable
37
+
38
+ class User(MooseModel):
39
+ user_id: int
40
+ email: str
41
+
42
+ # Autocomplete works when typing User.user_id
43
+ query = f"SELECT {User.user_id:col}, {User.email:col} FROM users"
44
+ ```
45
+
46
+ See [MooseModel Autocomplete Guide](docs/moose-model-autocomplete.md) for details.
@@ -0,0 +1,63 @@
1
+ moose_lib/__init__.py,sha256=jvIQmk1u6fG59qsiLziJbdEL2ZZsNLQgkvgyGL9tpU0,1491
2
+ moose_lib/blocks.py,sha256=mKajYpnQ42V5fvw65-D-PEX8JcZG0MOX7BIYuY5CIxQ,25363
3
+ moose_lib/commons.py,sha256=pVan-44bZceHy-4cNxMnDlDsgbp27J5q3a-Dd_NWXiU,6574
4
+ moose_lib/data_models.py,sha256=sxd30I9Fkz9bJzg3t_Smq5ahj03fG67vXqqNs-ZDL_o,24100
5
+ moose_lib/dmv2_serializer.py,sha256=CL_Pvvg8tJOT8Qk6hywDNzY8MYGhMVdTOw8arZi3jng,49
6
+ moose_lib/internal.py,sha256=6AfSogkixdAY44IeYCeWlrdXu2XAxaiGH0TzZfKy6OI,35767
7
+ moose_lib/main.py,sha256=P9E2bg1LvVM8ZrCC-mN0DqplgzQ7h6F17onIkOmAyn0,21008
8
+ moose_lib/query_builder.py,sha256=YenSawKqXD2n-rQ74FRfNoZ2KZpBiOv7a5G3CEFm1wg,6793
9
+ moose_lib/query_param.py,sha256=9qTEjO6OLHDkuwdUtdNqw_ye-A5ny5y_AZ1S37PfKwk,7009
10
+ moose_lib/secrets.py,sha256=GhsFlIWtGbUQYqEMClrCxcvUPEXxtF-zFkjEcmbco1c,4245
11
+ moose_lib/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ moose_lib/clients/redis_client.py,sha256=S-KdZaDVWKb4t5isedjF3z9wDcBx-PaTHyS9JBNt3NQ,12115
13
+ moose_lib/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ moose_lib/config/config_file.py,sha256=a6JdyxfjIUfTcl_Xm-zA4ATMXJHVIaP-eBQpGvsz7f4,3890
15
+ moose_lib/config/runtime.py,sha256=a6pPBD96wEsJKV4LKxoFjiRtbmTtuwzS0N0IEs-s6mo,8657
16
+ moose_lib/dmv2/__init__.py,sha256=REhStLFoYuQz7-VzswoUxm8NAvaqd-HkEkM_Ybb6QPQ,3367
17
+ moose_lib/dmv2/_registry.py,sha256=iO08jI6adyyszYyt53qgYphhLhwMo9JDXqmEfLj0L1Y,741
18
+ moose_lib/dmv2/_source_capture.py,sha256=qC-foQOkhl0GIQmCk5NVBq7SUhXGWy22fMbtNFWYsAY,1222
19
+ moose_lib/dmv2/consumption.py,sha256=gJhCKKbgQ_VHiKddkiI-Z8r0bfBfeROTK1KVuII-OY4,13100
20
+ moose_lib/dmv2/ingest_api.py,sha256=j5fPjC0TnAnQLX-U9v-kpNyt2kTgPuaZ-QoFhCmuujc,2594
21
+ moose_lib/dmv2/ingest_pipeline.py,sha256=d4UND75j9GuCi2EgUpsidySB_fi_7bho251Gev9P6g8,8659
22
+ moose_lib/dmv2/life_cycle.py,sha256=7MqN1bIS9aX2wX9lgHFZtTUAf9GNrYvz6a7g5Kpw5-o,1256
23
+ moose_lib/dmv2/materialized_view.py,sha256=rMivRcHw3BuLA6hfRULSe2-DcwOTVZEJHnCjxLhwp9E,4966
24
+ moose_lib/dmv2/moose_model.py,sha256=5QGf5ep1tRa9qWA-rTNiF78id6ZLqZGRuqhNxaE0KlU,5268
25
+ moose_lib/dmv2/olap_table.py,sha256=tk1dR8_SirJLFvQS2nDpE9pHx8fhfulSCwcqoozyrtc,41803
26
+ moose_lib/dmv2/registry.py,sha256=LIx-LEyTIw2KWUjZoECIfw6jCPAsNP_kWDql-KxjizY,2955
27
+ moose_lib/dmv2/sql_resource.py,sha256=LT28uq38rJ5hZSeu8Bmvvx6kt6T4qPUfKZiCo6im9ac,2117
28
+ moose_lib/dmv2/stream.py,sha256=5HR9s9Wub7NPZNnFmU6gc-IsHMhJDyGYqicnrYH23qA,18542
29
+ moose_lib/dmv2/types.py,sha256=K7QcL2WtM0vq_yy4NTwX8MOv0VdZ0Gza6LKG5H9CnKg,4667
30
+ moose_lib/dmv2/view.py,sha256=9Me1p-1etvUkiomRxOreiUjwQbQokOd1eHsQZTtpsCE,1383
31
+ moose_lib/dmv2/web_app.py,sha256=DtG_vgCFJ2Czb7ukqsQc6JO7BvGWjqwNce8LRFt_FIk,5494
32
+ moose_lib/dmv2/web_app_helpers.py,sha256=wYHKJIl6lC7UNfw7DeLgDWOwt0OrzSXzM1a99rlSTfw,2899
33
+ moose_lib/dmv2/workflow.py,sha256=aBBRmSo5nLxG_AVWjpRDkE9WsGetF6ILMNCW9H2F40I,6456
34
+ moose_lib/streaming/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
+ moose_lib/streaming/streaming_function_runner.py,sha256=f551lwAEv8-gt3qLsZ12Rkqn1lllcCjpMQv53UKLjaw,25356
36
+ moose_lib/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
+ moose_lib/utilities/sql.py,sha256=-OvBWZWRHU817zl2fU_wFlgGLbtoK83e-b09kFW4kaE,903
38
+ tests/__init__.py,sha256=jfCJcz0MBRIZe-gEcfvbe2f-Ns8_nx9O3hVudbHGjbg,53
39
+ tests/conftest.py,sha256=CRrm2SPmuMUctPx1KufzmrhlrIL9BEIdi4iK8SI0OQA,993
40
+ tests/test_backward_compatibility.py,sha256=G8bqSbRllF6SgkFyXruZRzPy15Pk9BdCeY-6X2ND6p0,2272
41
+ tests/test_cluster_validation.py,sha256=U2SOvvwFxwW7JeeO2RtkMjjPBXzCHVR4E2WRShD07hI,2605
42
+ tests/test_codec.py,sha256=Mfn7_pjgc1jPYtXhhcKzmHhe6kURJzjlY5jV0chwYUY,2340
43
+ tests/test_column_formatting.py,sha256=eFrpg1l7KCk2kSJeQxrtifgIsgaDwyk5J7lzaGV2SHU,2057
44
+ tests/test_fixedstring.py,sha256=wQbQLEMXgWRx7B7jtLomkP6HEaNFhhfbRkfVReoJY-E,1552
45
+ tests/test_iceberg_config.py,sha256=6Xrl_jyoPhFtFEkLmBXnuGteqxU83kRqeFeHU92ch3I,3692
46
+ tests/test_int_types.py,sha256=9JOpHdKir9xRjRemS_YzWZ0mGwBXYG0WzaZzMnD0A08,6528
47
+ tests/test_kafka_config.py,sha256=2QsdAanJGKbk33jnDrrMvGLJnNAj7_ZJsJtNdQ0Rh4Y,4307
48
+ tests/test_materialized.py,sha256=rw8ae4FoY0TZzthCLrOxiznwRDH90U079pUjVJvX7MQ,2520
49
+ tests/test_metadata.py,sha256=m8BDuFFFQggPz8mBRQw0UB_K5wrtvawQrsHamlOzyNg,1131
50
+ tests/test_moose.py,sha256=jsw7tRS8FyPsVxD5xD9QvpV3axx5wOajc26yMt9boDE,2021
51
+ tests/test_moose_model.py,sha256=p2bQLreqRh-6WmthyhB2Vyc-gSHsOQsEOLnx9_RtxSA,4208
52
+ tests/test_olap_table_moosemodel.py,sha256=_ZXruANx-EvN2mZvUoVfaO-UlihKEEeXoIVyVLA-gyg,2455
53
+ tests/test_olap_table_versioning.py,sha256=XsSia_Pz_whHLVU1uT7Wm9fK55DJxpTsvzYMyj1Ihu8,6832
54
+ tests/test_query_builder.py,sha256=65JUTd9uudVjT7sJpYdUDTGhlp1s40OPEhyOVN6Ayfc,4204
55
+ tests/test_redis_client.py,sha256=t_c5s9r2CiJHlP6K_VRud5sr68urwN0iU5p2VA5D294,3225
56
+ tests/test_s3queue_config.py,sha256=m5JDL_w-73cRYp9HarB3JHQCLNwApzKyfnm4Aa4biec,14233
57
+ tests/test_secrets.py,sha256=QfkFq2fWxnFj2Wk37VIPVB7LfgizH50s7lQi4YAnZ9Q,8797
58
+ tests/test_simple_aggregate.py,sha256=aXT2QjSCByY9_ygRcoAyLhEFgKZB4z7LFJg75GU_ntQ,4072
59
+ tests/test_web_app.py,sha256=swuAEOdmbrDnKo2K9BRl0kBKIeSI2r_SfUS5ZF3x55g,6585
60
+ moose_lib-0.6.283.dist-info/METADATA,sha256=eWeFg2UNYKGaLdXgni4g7S4-8MRAQv-l3WUqDRm0qC8,1270
61
+ moose_lib-0.6.283.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
62
+ moose_lib-0.6.283.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
63
+ moose_lib-0.6.283.dist-info/RECORD,,
tests/__init__.py CHANGED
@@ -1 +1 @@
1
- # Empty __init__.py to mark this as a Python package
1
+ # Empty __init__.py to mark this as a Python package
tests/conftest.py CHANGED
@@ -3,4 +3,41 @@ import os
3
3
  import sys
4
4
 
5
5
  # Add the package root to Python path for imports
6
- sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
6
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
7
+
8
+
9
+ @pytest.fixture(autouse=True)
10
+ def clear_registries():
11
+ """Clear all global registries before each test to prevent conflicts."""
12
+ from moose_lib.dmv2._registry import (
13
+ _tables,
14
+ _streams,
15
+ _ingest_apis,
16
+ _apis,
17
+ _api_name_aliases,
18
+ _api_path_map,
19
+ _sql_resources,
20
+ _workflows,
21
+ )
22
+
23
+ # Clear all registries
24
+ _tables.clear()
25
+ _streams.clear()
26
+ _ingest_apis.clear()
27
+ _apis.clear()
28
+ _api_name_aliases.clear()
29
+ _api_path_map.clear()
30
+ _sql_resources.clear()
31
+ _workflows.clear()
32
+
33
+ yield
34
+
35
+ # Clean up after test (optional, but good practice)
36
+ _tables.clear()
37
+ _streams.clear()
38
+ _ingest_apis.clear()
39
+ _apis.clear()
40
+ _api_name_aliases.clear()
41
+ _api_path_map.clear()
42
+ _sql_resources.clear()
43
+ _workflows.clear()
@@ -0,0 +1,85 @@
1
+ """
2
+ Tests ensuring MooseModel doesn't break existing BaseModel usage
3
+ """
4
+
5
+ from pydantic import BaseModel
6
+ from moose_lib.dmv2 import OlapTable, OlapConfig, MooseModel
7
+ from moose_lib.data_models import Column
8
+
9
+
10
+ def test_basemodel_olaptable_still_works():
11
+ """Existing code using BaseModel should continue working"""
12
+
13
+ class LegacyUser(BaseModel):
14
+ user_id: int
15
+ email: str
16
+
17
+ # Old pattern still works
18
+ table = OlapTable[LegacyUser]("legacy_users")
19
+
20
+ assert table.name == "legacy_users"
21
+ assert hasattr(table, "cols")
22
+ assert isinstance(table.cols.user_id, Column)
23
+
24
+
25
+ def test_moosemodel_and_basemodel_can_coexist():
26
+ """Projects can mix MooseModel and BaseModel"""
27
+
28
+ class NewModel(MooseModel):
29
+ new_field: int
30
+
31
+ class OldModel(BaseModel):
32
+ old_field: str
33
+
34
+ new_table = OlapTable[NewModel]("new_table")
35
+ old_table = OlapTable[OldModel]("old_table")
36
+
37
+ # Both work
38
+ assert new_table.name == "new_table"
39
+ assert old_table.name == "old_table"
40
+
41
+ # New model has direct column access
42
+ assert isinstance(NewModel.new_field, Column)
43
+
44
+ # Old model doesn't (expected)
45
+ assert (
46
+ not isinstance(OldModel.old_field, Column)
47
+ if hasattr(OldModel, "old_field")
48
+ else True
49
+ )
50
+
51
+
52
+ def test_moosemodel_cols_matches_direct_access():
53
+ """MooseModel.cols.field and MooseModel.field should return same Column"""
54
+
55
+ class Analytics(MooseModel):
56
+ event_id: int
57
+ timestamp: str
58
+
59
+ # Both access methods return the same Column
60
+ direct = Analytics.event_id
61
+ via_cols = Analytics.cols.event_id
62
+
63
+ assert direct.name == via_cols.name
64
+ assert direct.data_type == via_cols.data_type
65
+
66
+
67
+ def test_existing_query_patterns_unchanged():
68
+ """Existing query patterns should work identically"""
69
+
70
+ class Metrics(MooseModel):
71
+ metric_id: int
72
+ value: float
73
+
74
+ table = OlapTable[Metrics]("metrics")
75
+
76
+ # Pattern 1: Using table.cols (existing pattern)
77
+ col_via_table = table.cols.metric_id
78
+ assert isinstance(col_via_table, Column)
79
+
80
+ # Pattern 2: Using Model.cols (also existing)
81
+ col_via_model = Metrics.cols.metric_id
82
+ assert isinstance(col_via_model, Column)
83
+
84
+ # Both are equivalent
85
+ assert col_via_table.name == col_via_model.name
@@ -0,0 +1,85 @@
1
+ """Tests for OlapTable cluster validation."""
2
+
3
+ import pytest
4
+ from moose_lib import OlapTable, OlapConfig, MergeTreeEngine, ReplicatedMergeTreeEngine
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class SampleModel(BaseModel):
9
+ """Test model for cluster validation tests."""
10
+
11
+ id: str
12
+ value: int
13
+
14
+
15
+ def test_cluster_only_is_allowed():
16
+ """Test that specifying only cluster works."""
17
+ table = OlapTable[SampleModel](
18
+ "TestClusterOnly",
19
+ OlapConfig(
20
+ engine=MergeTreeEngine(),
21
+ order_by_fields=["id"],
22
+ cluster="test_cluster",
23
+ ),
24
+ )
25
+ assert table is not None
26
+
27
+
28
+ def test_explicit_params_only_is_allowed():
29
+ """Test that specifying explicit keeper_path and replica_name without cluster works."""
30
+ table = OlapTable[SampleModel](
31
+ "TestExplicitOnly",
32
+ OlapConfig(
33
+ engine=ReplicatedMergeTreeEngine(
34
+ keeper_path="/clickhouse/tables/{database}/{table}",
35
+ replica_name="{replica}",
36
+ ),
37
+ order_by_fields=["id"],
38
+ ),
39
+ )
40
+ assert table is not None
41
+
42
+
43
+ def test_cluster_and_explicit_params_raises_error():
44
+ """Test that specifying both cluster and explicit keeper_path/replica_name raises an error."""
45
+ with pytest.raises(
46
+ ValueError,
47
+ match=r"Cannot specify both 'cluster' and explicit replication params",
48
+ ):
49
+ OlapTable[SampleModel](
50
+ "TestBothClusterAndExplicit",
51
+ OlapConfig(
52
+ engine=ReplicatedMergeTreeEngine(
53
+ keeper_path="/clickhouse/tables/{database}/{table}",
54
+ replica_name="{replica}",
55
+ ),
56
+ order_by_fields=["id"],
57
+ cluster="test_cluster",
58
+ ),
59
+ )
60
+
61
+
62
+ def test_non_replicated_engine_with_cluster_is_allowed():
63
+ """Test that non-replicated engines can have a cluster specified."""
64
+ table = OlapTable[SampleModel](
65
+ "TestMergeTreeWithCluster",
66
+ OlapConfig(
67
+ engine=MergeTreeEngine(),
68
+ order_by_fields=["id"],
69
+ cluster="test_cluster",
70
+ ),
71
+ )
72
+ assert table is not None
73
+
74
+
75
+ def test_replicated_engine_without_cluster_or_explicit_params_is_allowed():
76
+ """Test that ReplicatedMergeTree without cluster or explicit params works (ClickHouse Cloud mode)."""
77
+ table = OlapTable[SampleModel](
78
+ "TestCloudMode",
79
+ OlapConfig(
80
+ engine=ReplicatedMergeTreeEngine(),
81
+ order_by_fields=["id"],
82
+ # No cluster, no keeper_path, no replica_name
83
+ ),
84
+ )
85
+ assert table is not None
tests/test_codec.py ADDED
@@ -0,0 +1,75 @@
1
+ from datetime import datetime
2
+ from typing import Annotated, Any
3
+ from pydantic import BaseModel
4
+ from moose_lib import Key, ClickHouseCodec, UInt64
5
+ from moose_lib.data_models import _to_columns
6
+
7
+
8
+ def test_codec_single():
9
+ """Test single codec annotation converts to correct ClickHouse CODEC."""
10
+
11
+ class CodecTest(BaseModel):
12
+ id: Key[str]
13
+ data: Annotated[str, ClickHouseCodec("ZSTD(3)")]
14
+
15
+ columns = _to_columns(CodecTest)
16
+ by_name = {col.name: col for col in columns}
17
+
18
+ assert by_name["data"].codec == "ZSTD(3)"
19
+ assert by_name["id"].codec is None
20
+
21
+
22
+ def test_codec_chain():
23
+ """Test codec chain annotation (Delta, LZ4)."""
24
+
25
+ class CodecChainTest(BaseModel):
26
+ timestamp: Annotated[datetime, ClickHouseCodec("Delta, LZ4")]
27
+ value: Annotated[float, ClickHouseCodec("Gorilla, ZSTD")]
28
+
29
+ columns = _to_columns(CodecChainTest)
30
+ by_name = {col.name: col for col in columns}
31
+
32
+ assert by_name["timestamp"].codec == "Delta, LZ4"
33
+ assert by_name["value"].codec == "Gorilla, ZSTD"
34
+
35
+
36
+ def test_codec_with_level():
37
+ """Test codec with compression level."""
38
+
39
+ class CodecLevelTest(BaseModel):
40
+ log_blob: Annotated[Any, ClickHouseCodec("ZSTD(3)")]
41
+ combination_hash: Annotated[list[UInt64], ClickHouseCodec("ZSTD(1)")]
42
+
43
+ columns = _to_columns(CodecLevelTest)
44
+ by_name = {col.name: col for col in columns}
45
+
46
+ assert by_name["log_blob"].codec == "ZSTD(3)"
47
+ assert by_name["combination_hash"].codec == "ZSTD(1)"
48
+
49
+
50
+ def test_codec_specialized():
51
+ """Test specialized codecs."""
52
+
53
+ class SpecializedCodecTest(BaseModel):
54
+ timestamp: Annotated[datetime, ClickHouseCodec("Delta")]
55
+ counter: Annotated[int, ClickHouseCodec("DoubleDelta")]
56
+ temperature: Annotated[float, ClickHouseCodec("Gorilla")]
57
+
58
+ columns = _to_columns(SpecializedCodecTest)
59
+ by_name = {col.name: col for col in columns}
60
+
61
+ assert by_name["timestamp"].codec == "Delta"
62
+ assert by_name["counter"].codec == "DoubleDelta"
63
+ assert by_name["temperature"].codec == "Gorilla"
64
+
65
+
66
+ def test_codec_none():
67
+ """Test codec with NONE (uncompressed)."""
68
+
69
+ class NoCodecTest(BaseModel):
70
+ data: Annotated[str, ClickHouseCodec("NONE")]
71
+
72
+ columns = _to_columns(NoCodecTest)
73
+ by_name = {col.name: col for col in columns}
74
+
75
+ assert by_name["data"].codec == "NONE"
@@ -0,0 +1,80 @@
1
+ """Tests for Column string formatting and interpolation"""
2
+
3
+ from moose_lib.data_models import Column
4
+
5
+
6
+ def test_column_str_returns_quoted_identifier():
7
+ """Column.__str__() should return backtick-quoted identifier"""
8
+ col = Column(
9
+ name="user_id",
10
+ data_type="String",
11
+ required=True,
12
+ unique=False,
13
+ primary_key=False,
14
+ )
15
+
16
+ assert str(col) == "`user_id`"
17
+
18
+
19
+ def test_column_format_spec_col():
20
+ """Column with :col format spec should return quoted identifier"""
21
+ col = Column(
22
+ name="email", data_type="String", required=True, unique=False, primary_key=False
23
+ )
24
+
25
+ result = f"{col:col}"
26
+ assert result == "`email`"
27
+
28
+
29
+ def test_column_format_spec_c():
30
+ """Column with :c format spec should return quoted identifier"""
31
+ col = Column(
32
+ name="timestamp",
33
+ data_type="DateTime",
34
+ required=True,
35
+ unique=False,
36
+ primary_key=False,
37
+ )
38
+
39
+ result = f"{col:c}"
40
+ assert result == "`timestamp`"
41
+
42
+
43
+ def test_column_format_spec_empty():
44
+ """Column with no format spec should return quoted identifier"""
45
+ col = Column(
46
+ name="count", data_type="Int64", required=True, unique=False, primary_key=False
47
+ )
48
+
49
+ result = f"{col}"
50
+ assert result == "`count`"
51
+
52
+
53
+ def test_column_with_special_chars():
54
+ """Column names with hyphens should be quoted"""
55
+ col = Column(
56
+ name="user-id",
57
+ data_type="String",
58
+ required=True,
59
+ unique=False,
60
+ primary_key=False,
61
+ )
62
+
63
+ assert str(col) == "`user-id`"
64
+
65
+
66
+ def test_column_in_fstring_interpolation():
67
+ """Column should work in f-string SQL construction"""
68
+ user_id_col = Column(
69
+ name="user_id",
70
+ data_type="String",
71
+ required=True,
72
+ unique=False,
73
+ primary_key=False,
74
+ )
75
+ email_col = Column(
76
+ name="email", data_type="String", required=True, unique=False, primary_key=False
77
+ )
78
+
79
+ query = f"SELECT {user_id_col:col}, {email_col:col} FROM users"
80
+ assert query == "SELECT `user_id`, `email` FROM users"
@@ -0,0 +1,43 @@
1
+ from datetime import datetime
2
+ from typing import Annotated
3
+ from pydantic import BaseModel
4
+ from moose_lib import Key, FixedString
5
+ from moose_lib.data_models import _to_columns
6
+
7
+
8
+ def test_fixedstring_annotation():
9
+ """Test FixedString annotation converts to correct ClickHouse type with str base type."""
10
+
11
+ class FixedStringTest(BaseModel):
12
+ id: Key[str]
13
+ created_at: datetime
14
+ md5_hash: Annotated[str, FixedString(16)]
15
+ sha256_hash: Annotated[str, FixedString(32)]
16
+ ipv6_address: Annotated[str, FixedString(16)]
17
+
18
+ columns = _to_columns(FixedStringTest)
19
+ by_name = {col.name: col for col in columns}
20
+
21
+ assert by_name["md5_hash"].data_type == "FixedString(16)"
22
+ assert by_name["sha256_hash"].data_type == "FixedString(32)"
23
+ assert by_name["ipv6_address"].data_type == "FixedString(16)"
24
+
25
+ # Verify other fields still work
26
+ assert by_name["id"].data_type == "String"
27
+ assert by_name["created_at"].data_type == "DateTime"
28
+
29
+
30
+ def test_fixedstring_different_sizes():
31
+ """Test various FixedString sizes."""
32
+
33
+ class FixedStringSizes(BaseModel):
34
+ mac_address: Annotated[str, FixedString(6)]
35
+ uuid_binary: Annotated[str, FixedString(16)]
36
+ sha512_hash: Annotated[str, FixedString(64)]
37
+
38
+ columns = _to_columns(FixedStringSizes)
39
+ by_name = {col.name: col for col in columns}
40
+
41
+ assert by_name["mac_address"].data_type == "FixedString(6)"
42
+ assert by_name["uuid_binary"].data_type == "FixedString(16)"
43
+ assert by_name["sha512_hash"].data_type == "FixedString(64)"
@@ -0,0 +1,105 @@
1
+ import pytest
2
+ from moose_lib import OlapTable, OlapConfig
3
+ from moose_lib.blocks import IcebergS3Engine
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class SampleData(BaseModel):
8
+ id: str
9
+ name: str
10
+ value: int
11
+
12
+
13
+ def test_iceberg_engine_basic_creation():
14
+ """Test basic IcebergS3Engine creation with required fields"""
15
+ engine = IcebergS3Engine(path="s3://bucket/warehouse/table/", format="Parquet")
16
+ assert engine.path == "s3://bucket/warehouse/table/"
17
+ assert engine.format == "Parquet"
18
+ assert engine.aws_access_key_id is None
19
+ assert engine.aws_secret_access_key is None
20
+ assert engine.compression is None
21
+
22
+
23
+ def test_iceberg_engine_with_all_options():
24
+ """Test IcebergS3Engine with all optional configuration"""
25
+ engine = IcebergS3Engine(
26
+ path="s3://bucket/table/",
27
+ format="ORC",
28
+ aws_access_key_id="AKIATEST",
29
+ aws_secret_access_key="secret123",
30
+ compression="zstd",
31
+ )
32
+ assert engine.path == "s3://bucket/table/"
33
+ assert engine.format == "ORC"
34
+ assert engine.aws_access_key_id == "AKIATEST"
35
+ assert engine.aws_secret_access_key == "secret123"
36
+ assert engine.compression == "zstd"
37
+
38
+
39
+ def test_iceberg_engine_missing_path():
40
+ """Test that missing path raises ValueError"""
41
+ with pytest.raises(ValueError, match="IcebergS3 engine requires 'path'"):
42
+ IcebergS3Engine(path="", format="Parquet")
43
+
44
+
45
+ def test_iceberg_engine_missing_format():
46
+ """Test that missing format raises ValueError"""
47
+ with pytest.raises(ValueError, match="IcebergS3 engine requires 'format'"):
48
+ IcebergS3Engine(path="s3://bucket/table/", format="")
49
+
50
+
51
+ def test_iceberg_engine_invalid_format():
52
+ """Test that invalid format raises ValueError (only Parquet and ORC supported)"""
53
+ with pytest.raises(ValueError, match="format must be 'Parquet' or 'ORC'"):
54
+ IcebergS3Engine(path="s3://bucket/table/", format="JSON")
55
+
56
+
57
+ def test_iceberg_rejects_order_by():
58
+ """Test that IcebergS3 engine rejects ORDER BY clauses (read-only external table)"""
59
+ with pytest.raises(
60
+ ValueError, match="IcebergS3Engine does not support ORDER BY clauses"
61
+ ):
62
+ OlapConfig(
63
+ engine=IcebergS3Engine(path="s3://bucket/table/", format="Parquet"),
64
+ order_by_fields=["id"],
65
+ )
66
+
67
+
68
+ def test_iceberg_rejects_partition_by():
69
+ """Test that IcebergS3 engine rejects PARTITION BY clauses (read-only external table)"""
70
+ with pytest.raises(
71
+ ValueError, match="IcebergS3Engine does not support PARTITION BY clause"
72
+ ):
73
+ OlapConfig(
74
+ engine=IcebergS3Engine(path="s3://bucket/table/", format="Parquet"),
75
+ partition_by="toYYYYMM(timestamp)",
76
+ )
77
+
78
+
79
+ def test_iceberg_rejects_sample_by():
80
+ """Test that IcebergS3 engine rejects SAMPLE BY clauses (read-only external table)"""
81
+ with pytest.raises(
82
+ ValueError, match="IcebergS3Engine does not support SAMPLE BY clause"
83
+ ):
84
+ OlapConfig(
85
+ engine=IcebergS3Engine(path="s3://bucket/table/", format="Parquet"),
86
+ sample_by_expression="cityHash64(id)",
87
+ )
88
+
89
+
90
+ def test_iceberg_table_in_olap_table():
91
+ """Test creating OlapTable with IcebergS3Engine and custom settings"""
92
+ table = OlapTable[SampleData](
93
+ "lake_events",
94
+ OlapConfig(
95
+ engine=IcebergS3Engine(
96
+ path="s3://datalake/events/",
97
+ format="Parquet",
98
+ aws_access_key_id="AKIATEST",
99
+ aws_secret_access_key="secret123",
100
+ )
101
+ ),
102
+ )
103
+ assert table.name == "lake_events"
104
+ assert isinstance(table.config.engine, IcebergS3Engine)
105
+ assert table.config.engine.path == "s3://datalake/events/"