moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- moose_lib/__init__.py +34 -3
- moose_lib/blocks.py +416 -52
- moose_lib/clients/redis_client.py +26 -14
- moose_lib/commons.py +37 -30
- moose_lib/config/config_file.py +5 -1
- moose_lib/config/runtime.py +73 -34
- moose_lib/data_models.py +331 -61
- moose_lib/dmv2/__init__.py +69 -73
- moose_lib/dmv2/_registry.py +2 -1
- moose_lib/dmv2/_source_capture.py +37 -0
- moose_lib/dmv2/consumption.py +55 -32
- moose_lib/dmv2/ingest_api.py +9 -2
- moose_lib/dmv2/ingest_pipeline.py +35 -16
- moose_lib/dmv2/life_cycle.py +3 -1
- moose_lib/dmv2/materialized_view.py +24 -14
- moose_lib/dmv2/moose_model.py +165 -0
- moose_lib/dmv2/olap_table.py +299 -151
- moose_lib/dmv2/registry.py +18 -3
- moose_lib/dmv2/sql_resource.py +16 -8
- moose_lib/dmv2/stream.py +75 -23
- moose_lib/dmv2/types.py +14 -8
- moose_lib/dmv2/view.py +13 -6
- moose_lib/dmv2/web_app.py +11 -6
- moose_lib/dmv2/web_app_helpers.py +5 -1
- moose_lib/dmv2/workflow.py +37 -9
- moose_lib/internal.py +340 -56
- moose_lib/main.py +87 -56
- moose_lib/query_builder.py +18 -5
- moose_lib/query_param.py +54 -20
- moose_lib/secrets.py +122 -0
- moose_lib/streaming/streaming_function_runner.py +233 -117
- moose_lib/utilities/sql.py +0 -1
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
- moose_lib-0.6.283.dist-info/RECORD +63 -0
- tests/__init__.py +1 -1
- tests/conftest.py +6 -5
- tests/test_backward_compatibility.py +85 -0
- tests/test_cluster_validation.py +85 -0
- tests/test_codec.py +75 -0
- tests/test_column_formatting.py +80 -0
- tests/test_fixedstring.py +43 -0
- tests/test_iceberg_config.py +105 -0
- tests/test_int_types.py +211 -0
- tests/test_kafka_config.py +141 -0
- tests/test_materialized.py +74 -0
- tests/test_metadata.py +37 -0
- tests/test_moose.py +21 -30
- tests/test_moose_model.py +153 -0
- tests/test_olap_table_moosemodel.py +89 -0
- tests/test_olap_table_versioning.py +52 -58
- tests/test_query_builder.py +97 -9
- tests/test_redis_client.py +10 -3
- tests/test_s3queue_config.py +211 -110
- tests/test_secrets.py +239 -0
- tests/test_simple_aggregate.py +42 -40
- tests/test_web_app.py +11 -5
- moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
- {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
moose_lib/__init__.py,sha256=jvIQmk1u6fG59qsiLziJbdEL2ZZsNLQgkvgyGL9tpU0,1491
|
|
2
|
+
moose_lib/blocks.py,sha256=mKajYpnQ42V5fvw65-D-PEX8JcZG0MOX7BIYuY5CIxQ,25363
|
|
3
|
+
moose_lib/commons.py,sha256=pVan-44bZceHy-4cNxMnDlDsgbp27J5q3a-Dd_NWXiU,6574
|
|
4
|
+
moose_lib/data_models.py,sha256=sxd30I9Fkz9bJzg3t_Smq5ahj03fG67vXqqNs-ZDL_o,24100
|
|
5
|
+
moose_lib/dmv2_serializer.py,sha256=CL_Pvvg8tJOT8Qk6hywDNzY8MYGhMVdTOw8arZi3jng,49
|
|
6
|
+
moose_lib/internal.py,sha256=6AfSogkixdAY44IeYCeWlrdXu2XAxaiGH0TzZfKy6OI,35767
|
|
7
|
+
moose_lib/main.py,sha256=P9E2bg1LvVM8ZrCC-mN0DqplgzQ7h6F17onIkOmAyn0,21008
|
|
8
|
+
moose_lib/query_builder.py,sha256=YenSawKqXD2n-rQ74FRfNoZ2KZpBiOv7a5G3CEFm1wg,6793
|
|
9
|
+
moose_lib/query_param.py,sha256=9qTEjO6OLHDkuwdUtdNqw_ye-A5ny5y_AZ1S37PfKwk,7009
|
|
10
|
+
moose_lib/secrets.py,sha256=GhsFlIWtGbUQYqEMClrCxcvUPEXxtF-zFkjEcmbco1c,4245
|
|
11
|
+
moose_lib/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
moose_lib/clients/redis_client.py,sha256=S-KdZaDVWKb4t5isedjF3z9wDcBx-PaTHyS9JBNt3NQ,12115
|
|
13
|
+
moose_lib/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
moose_lib/config/config_file.py,sha256=a6JdyxfjIUfTcl_Xm-zA4ATMXJHVIaP-eBQpGvsz7f4,3890
|
|
15
|
+
moose_lib/config/runtime.py,sha256=a6pPBD96wEsJKV4LKxoFjiRtbmTtuwzS0N0IEs-s6mo,8657
|
|
16
|
+
moose_lib/dmv2/__init__.py,sha256=REhStLFoYuQz7-VzswoUxm8NAvaqd-HkEkM_Ybb6QPQ,3367
|
|
17
|
+
moose_lib/dmv2/_registry.py,sha256=iO08jI6adyyszYyt53qgYphhLhwMo9JDXqmEfLj0L1Y,741
|
|
18
|
+
moose_lib/dmv2/_source_capture.py,sha256=qC-foQOkhl0GIQmCk5NVBq7SUhXGWy22fMbtNFWYsAY,1222
|
|
19
|
+
moose_lib/dmv2/consumption.py,sha256=gJhCKKbgQ_VHiKddkiI-Z8r0bfBfeROTK1KVuII-OY4,13100
|
|
20
|
+
moose_lib/dmv2/ingest_api.py,sha256=j5fPjC0TnAnQLX-U9v-kpNyt2kTgPuaZ-QoFhCmuujc,2594
|
|
21
|
+
moose_lib/dmv2/ingest_pipeline.py,sha256=d4UND75j9GuCi2EgUpsidySB_fi_7bho251Gev9P6g8,8659
|
|
22
|
+
moose_lib/dmv2/life_cycle.py,sha256=7MqN1bIS9aX2wX9lgHFZtTUAf9GNrYvz6a7g5Kpw5-o,1256
|
|
23
|
+
moose_lib/dmv2/materialized_view.py,sha256=rMivRcHw3BuLA6hfRULSe2-DcwOTVZEJHnCjxLhwp9E,4966
|
|
24
|
+
moose_lib/dmv2/moose_model.py,sha256=5QGf5ep1tRa9qWA-rTNiF78id6ZLqZGRuqhNxaE0KlU,5268
|
|
25
|
+
moose_lib/dmv2/olap_table.py,sha256=tk1dR8_SirJLFvQS2nDpE9pHx8fhfulSCwcqoozyrtc,41803
|
|
26
|
+
moose_lib/dmv2/registry.py,sha256=LIx-LEyTIw2KWUjZoECIfw6jCPAsNP_kWDql-KxjizY,2955
|
|
27
|
+
moose_lib/dmv2/sql_resource.py,sha256=LT28uq38rJ5hZSeu8Bmvvx6kt6T4qPUfKZiCo6im9ac,2117
|
|
28
|
+
moose_lib/dmv2/stream.py,sha256=5HR9s9Wub7NPZNnFmU6gc-IsHMhJDyGYqicnrYH23qA,18542
|
|
29
|
+
moose_lib/dmv2/types.py,sha256=K7QcL2WtM0vq_yy4NTwX8MOv0VdZ0Gza6LKG5H9CnKg,4667
|
|
30
|
+
moose_lib/dmv2/view.py,sha256=9Me1p-1etvUkiomRxOreiUjwQbQokOd1eHsQZTtpsCE,1383
|
|
31
|
+
moose_lib/dmv2/web_app.py,sha256=DtG_vgCFJ2Czb7ukqsQc6JO7BvGWjqwNce8LRFt_FIk,5494
|
|
32
|
+
moose_lib/dmv2/web_app_helpers.py,sha256=wYHKJIl6lC7UNfw7DeLgDWOwt0OrzSXzM1a99rlSTfw,2899
|
|
33
|
+
moose_lib/dmv2/workflow.py,sha256=aBBRmSo5nLxG_AVWjpRDkE9WsGetF6ILMNCW9H2F40I,6456
|
|
34
|
+
moose_lib/streaming/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
+
moose_lib/streaming/streaming_function_runner.py,sha256=f551lwAEv8-gt3qLsZ12Rkqn1lllcCjpMQv53UKLjaw,25356
|
|
36
|
+
moose_lib/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
+
moose_lib/utilities/sql.py,sha256=-OvBWZWRHU817zl2fU_wFlgGLbtoK83e-b09kFW4kaE,903
|
|
38
|
+
tests/__init__.py,sha256=jfCJcz0MBRIZe-gEcfvbe2f-Ns8_nx9O3hVudbHGjbg,53
|
|
39
|
+
tests/conftest.py,sha256=CRrm2SPmuMUctPx1KufzmrhlrIL9BEIdi4iK8SI0OQA,993
|
|
40
|
+
tests/test_backward_compatibility.py,sha256=G8bqSbRllF6SgkFyXruZRzPy15Pk9BdCeY-6X2ND6p0,2272
|
|
41
|
+
tests/test_cluster_validation.py,sha256=U2SOvvwFxwW7JeeO2RtkMjjPBXzCHVR4E2WRShD07hI,2605
|
|
42
|
+
tests/test_codec.py,sha256=Mfn7_pjgc1jPYtXhhcKzmHhe6kURJzjlY5jV0chwYUY,2340
|
|
43
|
+
tests/test_column_formatting.py,sha256=eFrpg1l7KCk2kSJeQxrtifgIsgaDwyk5J7lzaGV2SHU,2057
|
|
44
|
+
tests/test_fixedstring.py,sha256=wQbQLEMXgWRx7B7jtLomkP6HEaNFhhfbRkfVReoJY-E,1552
|
|
45
|
+
tests/test_iceberg_config.py,sha256=6Xrl_jyoPhFtFEkLmBXnuGteqxU83kRqeFeHU92ch3I,3692
|
|
46
|
+
tests/test_int_types.py,sha256=9JOpHdKir9xRjRemS_YzWZ0mGwBXYG0WzaZzMnD0A08,6528
|
|
47
|
+
tests/test_kafka_config.py,sha256=2QsdAanJGKbk33jnDrrMvGLJnNAj7_ZJsJtNdQ0Rh4Y,4307
|
|
48
|
+
tests/test_materialized.py,sha256=rw8ae4FoY0TZzthCLrOxiznwRDH90U079pUjVJvX7MQ,2520
|
|
49
|
+
tests/test_metadata.py,sha256=m8BDuFFFQggPz8mBRQw0UB_K5wrtvawQrsHamlOzyNg,1131
|
|
50
|
+
tests/test_moose.py,sha256=jsw7tRS8FyPsVxD5xD9QvpV3axx5wOajc26yMt9boDE,2021
|
|
51
|
+
tests/test_moose_model.py,sha256=p2bQLreqRh-6WmthyhB2Vyc-gSHsOQsEOLnx9_RtxSA,4208
|
|
52
|
+
tests/test_olap_table_moosemodel.py,sha256=_ZXruANx-EvN2mZvUoVfaO-UlihKEEeXoIVyVLA-gyg,2455
|
|
53
|
+
tests/test_olap_table_versioning.py,sha256=XsSia_Pz_whHLVU1uT7Wm9fK55DJxpTsvzYMyj1Ihu8,6832
|
|
54
|
+
tests/test_query_builder.py,sha256=65JUTd9uudVjT7sJpYdUDTGhlp1s40OPEhyOVN6Ayfc,4204
|
|
55
|
+
tests/test_redis_client.py,sha256=t_c5s9r2CiJHlP6K_VRud5sr68urwN0iU5p2VA5D294,3225
|
|
56
|
+
tests/test_s3queue_config.py,sha256=m5JDL_w-73cRYp9HarB3JHQCLNwApzKyfnm4Aa4biec,14233
|
|
57
|
+
tests/test_secrets.py,sha256=QfkFq2fWxnFj2Wk37VIPVB7LfgizH50s7lQi4YAnZ9Q,8797
|
|
58
|
+
tests/test_simple_aggregate.py,sha256=aXT2QjSCByY9_ygRcoAyLhEFgKZB4z7LFJg75GU_ntQ,4072
|
|
59
|
+
tests/test_web_app.py,sha256=swuAEOdmbrDnKo2K9BRl0kBKIeSI2r_SfUS5ZF3x55g,6585
|
|
60
|
+
moose_lib-0.6.283.dist-info/METADATA,sha256=eWeFg2UNYKGaLdXgni4g7S4-8MRAQv-l3WUqDRm0qC8,1270
|
|
61
|
+
moose_lib-0.6.283.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
62
|
+
moose_lib-0.6.283.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
|
|
63
|
+
moose_lib-0.6.283.dist-info/RECORD,,
|
tests/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
# Empty __init__.py to mark this as a Python package
|
|
1
|
+
# Empty __init__.py to mark this as a Python package
|
tests/conftest.py
CHANGED
|
@@ -3,7 +3,8 @@ import os
|
|
|
3
3
|
import sys
|
|
4
4
|
|
|
5
5
|
# Add the package root to Python path for imports
|
|
6
|
-
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__),
|
|
6
|
+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
7
|
+
|
|
7
8
|
|
|
8
9
|
@pytest.fixture(autouse=True)
|
|
9
10
|
def clear_registries():
|
|
@@ -18,7 +19,7 @@ def clear_registries():
|
|
|
18
19
|
_sql_resources,
|
|
19
20
|
_workflows,
|
|
20
21
|
)
|
|
21
|
-
|
|
22
|
+
|
|
22
23
|
# Clear all registries
|
|
23
24
|
_tables.clear()
|
|
24
25
|
_streams.clear()
|
|
@@ -28,9 +29,9 @@ def clear_registries():
|
|
|
28
29
|
_api_path_map.clear()
|
|
29
30
|
_sql_resources.clear()
|
|
30
31
|
_workflows.clear()
|
|
31
|
-
|
|
32
|
+
|
|
32
33
|
yield
|
|
33
|
-
|
|
34
|
+
|
|
34
35
|
# Clean up after test (optional, but good practice)
|
|
35
36
|
_tables.clear()
|
|
36
37
|
_streams.clear()
|
|
@@ -39,4 +40,4 @@ def clear_registries():
|
|
|
39
40
|
_api_name_aliases.clear()
|
|
40
41
|
_api_path_map.clear()
|
|
41
42
|
_sql_resources.clear()
|
|
42
|
-
_workflows.clear()
|
|
43
|
+
_workflows.clear()
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests ensuring MooseModel doesn't break existing BaseModel usage
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
from moose_lib.dmv2 import OlapTable, OlapConfig, MooseModel
|
|
7
|
+
from moose_lib.data_models import Column
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_basemodel_olaptable_still_works():
|
|
11
|
+
"""Existing code using BaseModel should continue working"""
|
|
12
|
+
|
|
13
|
+
class LegacyUser(BaseModel):
|
|
14
|
+
user_id: int
|
|
15
|
+
email: str
|
|
16
|
+
|
|
17
|
+
# Old pattern still works
|
|
18
|
+
table = OlapTable[LegacyUser]("legacy_users")
|
|
19
|
+
|
|
20
|
+
assert table.name == "legacy_users"
|
|
21
|
+
assert hasattr(table, "cols")
|
|
22
|
+
assert isinstance(table.cols.user_id, Column)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_moosemodel_and_basemodel_can_coexist():
|
|
26
|
+
"""Projects can mix MooseModel and BaseModel"""
|
|
27
|
+
|
|
28
|
+
class NewModel(MooseModel):
|
|
29
|
+
new_field: int
|
|
30
|
+
|
|
31
|
+
class OldModel(BaseModel):
|
|
32
|
+
old_field: str
|
|
33
|
+
|
|
34
|
+
new_table = OlapTable[NewModel]("new_table")
|
|
35
|
+
old_table = OlapTable[OldModel]("old_table")
|
|
36
|
+
|
|
37
|
+
# Both work
|
|
38
|
+
assert new_table.name == "new_table"
|
|
39
|
+
assert old_table.name == "old_table"
|
|
40
|
+
|
|
41
|
+
# New model has direct column access
|
|
42
|
+
assert isinstance(NewModel.new_field, Column)
|
|
43
|
+
|
|
44
|
+
# Old model doesn't (expected)
|
|
45
|
+
assert (
|
|
46
|
+
not isinstance(OldModel.old_field, Column)
|
|
47
|
+
if hasattr(OldModel, "old_field")
|
|
48
|
+
else True
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_moosemodel_cols_matches_direct_access():
|
|
53
|
+
"""MooseModel.cols.field and MooseModel.field should return same Column"""
|
|
54
|
+
|
|
55
|
+
class Analytics(MooseModel):
|
|
56
|
+
event_id: int
|
|
57
|
+
timestamp: str
|
|
58
|
+
|
|
59
|
+
# Both access methods return the same Column
|
|
60
|
+
direct = Analytics.event_id
|
|
61
|
+
via_cols = Analytics.cols.event_id
|
|
62
|
+
|
|
63
|
+
assert direct.name == via_cols.name
|
|
64
|
+
assert direct.data_type == via_cols.data_type
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_existing_query_patterns_unchanged():
|
|
68
|
+
"""Existing query patterns should work identically"""
|
|
69
|
+
|
|
70
|
+
class Metrics(MooseModel):
|
|
71
|
+
metric_id: int
|
|
72
|
+
value: float
|
|
73
|
+
|
|
74
|
+
table = OlapTable[Metrics]("metrics")
|
|
75
|
+
|
|
76
|
+
# Pattern 1: Using table.cols (existing pattern)
|
|
77
|
+
col_via_table = table.cols.metric_id
|
|
78
|
+
assert isinstance(col_via_table, Column)
|
|
79
|
+
|
|
80
|
+
# Pattern 2: Using Model.cols (also existing)
|
|
81
|
+
col_via_model = Metrics.cols.metric_id
|
|
82
|
+
assert isinstance(col_via_model, Column)
|
|
83
|
+
|
|
84
|
+
# Both are equivalent
|
|
85
|
+
assert col_via_table.name == col_via_model.name
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Tests for OlapTable cluster validation."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from moose_lib import OlapTable, OlapConfig, MergeTreeEngine, ReplicatedMergeTreeEngine
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SampleModel(BaseModel):
|
|
9
|
+
"""Test model for cluster validation tests."""
|
|
10
|
+
|
|
11
|
+
id: str
|
|
12
|
+
value: int
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_cluster_only_is_allowed():
|
|
16
|
+
"""Test that specifying only cluster works."""
|
|
17
|
+
table = OlapTable[SampleModel](
|
|
18
|
+
"TestClusterOnly",
|
|
19
|
+
OlapConfig(
|
|
20
|
+
engine=MergeTreeEngine(),
|
|
21
|
+
order_by_fields=["id"],
|
|
22
|
+
cluster="test_cluster",
|
|
23
|
+
),
|
|
24
|
+
)
|
|
25
|
+
assert table is not None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_explicit_params_only_is_allowed():
|
|
29
|
+
"""Test that specifying explicit keeper_path and replica_name without cluster works."""
|
|
30
|
+
table = OlapTable[SampleModel](
|
|
31
|
+
"TestExplicitOnly",
|
|
32
|
+
OlapConfig(
|
|
33
|
+
engine=ReplicatedMergeTreeEngine(
|
|
34
|
+
keeper_path="/clickhouse/tables/{database}/{table}",
|
|
35
|
+
replica_name="{replica}",
|
|
36
|
+
),
|
|
37
|
+
order_by_fields=["id"],
|
|
38
|
+
),
|
|
39
|
+
)
|
|
40
|
+
assert table is not None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_cluster_and_explicit_params_raises_error():
|
|
44
|
+
"""Test that specifying both cluster and explicit keeper_path/replica_name raises an error."""
|
|
45
|
+
with pytest.raises(
|
|
46
|
+
ValueError,
|
|
47
|
+
match=r"Cannot specify both 'cluster' and explicit replication params",
|
|
48
|
+
):
|
|
49
|
+
OlapTable[SampleModel](
|
|
50
|
+
"TestBothClusterAndExplicit",
|
|
51
|
+
OlapConfig(
|
|
52
|
+
engine=ReplicatedMergeTreeEngine(
|
|
53
|
+
keeper_path="/clickhouse/tables/{database}/{table}",
|
|
54
|
+
replica_name="{replica}",
|
|
55
|
+
),
|
|
56
|
+
order_by_fields=["id"],
|
|
57
|
+
cluster="test_cluster",
|
|
58
|
+
),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_non_replicated_engine_with_cluster_is_allowed():
|
|
63
|
+
"""Test that non-replicated engines can have a cluster specified."""
|
|
64
|
+
table = OlapTable[SampleModel](
|
|
65
|
+
"TestMergeTreeWithCluster",
|
|
66
|
+
OlapConfig(
|
|
67
|
+
engine=MergeTreeEngine(),
|
|
68
|
+
order_by_fields=["id"],
|
|
69
|
+
cluster="test_cluster",
|
|
70
|
+
),
|
|
71
|
+
)
|
|
72
|
+
assert table is not None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_replicated_engine_without_cluster_or_explicit_params_is_allowed():
|
|
76
|
+
"""Test that ReplicatedMergeTree without cluster or explicit params works (ClickHouse Cloud mode)."""
|
|
77
|
+
table = OlapTable[SampleModel](
|
|
78
|
+
"TestCloudMode",
|
|
79
|
+
OlapConfig(
|
|
80
|
+
engine=ReplicatedMergeTreeEngine(),
|
|
81
|
+
order_by_fields=["id"],
|
|
82
|
+
# No cluster, no keeper_path, no replica_name
|
|
83
|
+
),
|
|
84
|
+
)
|
|
85
|
+
assert table is not None
|
tests/test_codec.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Annotated, Any
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
from moose_lib import Key, ClickHouseCodec, UInt64
|
|
5
|
+
from moose_lib.data_models import _to_columns
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_codec_single():
|
|
9
|
+
"""Test single codec annotation converts to correct ClickHouse CODEC."""
|
|
10
|
+
|
|
11
|
+
class CodecTest(BaseModel):
|
|
12
|
+
id: Key[str]
|
|
13
|
+
data: Annotated[str, ClickHouseCodec("ZSTD(3)")]
|
|
14
|
+
|
|
15
|
+
columns = _to_columns(CodecTest)
|
|
16
|
+
by_name = {col.name: col for col in columns}
|
|
17
|
+
|
|
18
|
+
assert by_name["data"].codec == "ZSTD(3)"
|
|
19
|
+
assert by_name["id"].codec is None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_codec_chain():
|
|
23
|
+
"""Test codec chain annotation (Delta, LZ4)."""
|
|
24
|
+
|
|
25
|
+
class CodecChainTest(BaseModel):
|
|
26
|
+
timestamp: Annotated[datetime, ClickHouseCodec("Delta, LZ4")]
|
|
27
|
+
value: Annotated[float, ClickHouseCodec("Gorilla, ZSTD")]
|
|
28
|
+
|
|
29
|
+
columns = _to_columns(CodecChainTest)
|
|
30
|
+
by_name = {col.name: col for col in columns}
|
|
31
|
+
|
|
32
|
+
assert by_name["timestamp"].codec == "Delta, LZ4"
|
|
33
|
+
assert by_name["value"].codec == "Gorilla, ZSTD"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_codec_with_level():
|
|
37
|
+
"""Test codec with compression level."""
|
|
38
|
+
|
|
39
|
+
class CodecLevelTest(BaseModel):
|
|
40
|
+
log_blob: Annotated[Any, ClickHouseCodec("ZSTD(3)")]
|
|
41
|
+
combination_hash: Annotated[list[UInt64], ClickHouseCodec("ZSTD(1)")]
|
|
42
|
+
|
|
43
|
+
columns = _to_columns(CodecLevelTest)
|
|
44
|
+
by_name = {col.name: col for col in columns}
|
|
45
|
+
|
|
46
|
+
assert by_name["log_blob"].codec == "ZSTD(3)"
|
|
47
|
+
assert by_name["combination_hash"].codec == "ZSTD(1)"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_codec_specialized():
|
|
51
|
+
"""Test specialized codecs."""
|
|
52
|
+
|
|
53
|
+
class SpecializedCodecTest(BaseModel):
|
|
54
|
+
timestamp: Annotated[datetime, ClickHouseCodec("Delta")]
|
|
55
|
+
counter: Annotated[int, ClickHouseCodec("DoubleDelta")]
|
|
56
|
+
temperature: Annotated[float, ClickHouseCodec("Gorilla")]
|
|
57
|
+
|
|
58
|
+
columns = _to_columns(SpecializedCodecTest)
|
|
59
|
+
by_name = {col.name: col for col in columns}
|
|
60
|
+
|
|
61
|
+
assert by_name["timestamp"].codec == "Delta"
|
|
62
|
+
assert by_name["counter"].codec == "DoubleDelta"
|
|
63
|
+
assert by_name["temperature"].codec == "Gorilla"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_codec_none():
|
|
67
|
+
"""Test codec with NONE (uncompressed)."""
|
|
68
|
+
|
|
69
|
+
class NoCodecTest(BaseModel):
|
|
70
|
+
data: Annotated[str, ClickHouseCodec("NONE")]
|
|
71
|
+
|
|
72
|
+
columns = _to_columns(NoCodecTest)
|
|
73
|
+
by_name = {col.name: col for col in columns}
|
|
74
|
+
|
|
75
|
+
assert by_name["data"].codec == "NONE"
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Tests for Column string formatting and interpolation"""
|
|
2
|
+
|
|
3
|
+
from moose_lib.data_models import Column
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_column_str_returns_quoted_identifier():
|
|
7
|
+
"""Column.__str__() should return backtick-quoted identifier"""
|
|
8
|
+
col = Column(
|
|
9
|
+
name="user_id",
|
|
10
|
+
data_type="String",
|
|
11
|
+
required=True,
|
|
12
|
+
unique=False,
|
|
13
|
+
primary_key=False,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
assert str(col) == "`user_id`"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_column_format_spec_col():
|
|
20
|
+
"""Column with :col format spec should return quoted identifier"""
|
|
21
|
+
col = Column(
|
|
22
|
+
name="email", data_type="String", required=True, unique=False, primary_key=False
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
result = f"{col:col}"
|
|
26
|
+
assert result == "`email`"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_column_format_spec_c():
|
|
30
|
+
"""Column with :c format spec should return quoted identifier"""
|
|
31
|
+
col = Column(
|
|
32
|
+
name="timestamp",
|
|
33
|
+
data_type="DateTime",
|
|
34
|
+
required=True,
|
|
35
|
+
unique=False,
|
|
36
|
+
primary_key=False,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
result = f"{col:c}"
|
|
40
|
+
assert result == "`timestamp`"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_column_format_spec_empty():
|
|
44
|
+
"""Column with no format spec should return quoted identifier"""
|
|
45
|
+
col = Column(
|
|
46
|
+
name="count", data_type="Int64", required=True, unique=False, primary_key=False
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
result = f"{col}"
|
|
50
|
+
assert result == "`count`"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_column_with_special_chars():
|
|
54
|
+
"""Column names with hyphens should be quoted"""
|
|
55
|
+
col = Column(
|
|
56
|
+
name="user-id",
|
|
57
|
+
data_type="String",
|
|
58
|
+
required=True,
|
|
59
|
+
unique=False,
|
|
60
|
+
primary_key=False,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
assert str(col) == "`user-id`"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_column_in_fstring_interpolation():
|
|
67
|
+
"""Column should work in f-string SQL construction"""
|
|
68
|
+
user_id_col = Column(
|
|
69
|
+
name="user_id",
|
|
70
|
+
data_type="String",
|
|
71
|
+
required=True,
|
|
72
|
+
unique=False,
|
|
73
|
+
primary_key=False,
|
|
74
|
+
)
|
|
75
|
+
email_col = Column(
|
|
76
|
+
name="email", data_type="String", required=True, unique=False, primary_key=False
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
query = f"SELECT {user_id_col:col}, {email_col:col} FROM users"
|
|
80
|
+
assert query == "SELECT `user_id`, `email` FROM users"
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Annotated
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
from moose_lib import Key, FixedString
|
|
5
|
+
from moose_lib.data_models import _to_columns
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_fixedstring_annotation():
|
|
9
|
+
"""Test FixedString annotation converts to correct ClickHouse type with str base type."""
|
|
10
|
+
|
|
11
|
+
class FixedStringTest(BaseModel):
|
|
12
|
+
id: Key[str]
|
|
13
|
+
created_at: datetime
|
|
14
|
+
md5_hash: Annotated[str, FixedString(16)]
|
|
15
|
+
sha256_hash: Annotated[str, FixedString(32)]
|
|
16
|
+
ipv6_address: Annotated[str, FixedString(16)]
|
|
17
|
+
|
|
18
|
+
columns = _to_columns(FixedStringTest)
|
|
19
|
+
by_name = {col.name: col for col in columns}
|
|
20
|
+
|
|
21
|
+
assert by_name["md5_hash"].data_type == "FixedString(16)"
|
|
22
|
+
assert by_name["sha256_hash"].data_type == "FixedString(32)"
|
|
23
|
+
assert by_name["ipv6_address"].data_type == "FixedString(16)"
|
|
24
|
+
|
|
25
|
+
# Verify other fields still work
|
|
26
|
+
assert by_name["id"].data_type == "String"
|
|
27
|
+
assert by_name["created_at"].data_type == "DateTime"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_fixedstring_different_sizes():
|
|
31
|
+
"""Test various FixedString sizes."""
|
|
32
|
+
|
|
33
|
+
class FixedStringSizes(BaseModel):
|
|
34
|
+
mac_address: Annotated[str, FixedString(6)]
|
|
35
|
+
uuid_binary: Annotated[str, FixedString(16)]
|
|
36
|
+
sha512_hash: Annotated[str, FixedString(64)]
|
|
37
|
+
|
|
38
|
+
columns = _to_columns(FixedStringSizes)
|
|
39
|
+
by_name = {col.name: col for col in columns}
|
|
40
|
+
|
|
41
|
+
assert by_name["mac_address"].data_type == "FixedString(6)"
|
|
42
|
+
assert by_name["uuid_binary"].data_type == "FixedString(16)"
|
|
43
|
+
assert by_name["sha512_hash"].data_type == "FixedString(64)"
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from moose_lib import OlapTable, OlapConfig
|
|
3
|
+
from moose_lib.blocks import IcebergS3Engine
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SampleData(BaseModel):
|
|
8
|
+
id: str
|
|
9
|
+
name: str
|
|
10
|
+
value: int
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_iceberg_engine_basic_creation():
|
|
14
|
+
"""Test basic IcebergS3Engine creation with required fields"""
|
|
15
|
+
engine = IcebergS3Engine(path="s3://bucket/warehouse/table/", format="Parquet")
|
|
16
|
+
assert engine.path == "s3://bucket/warehouse/table/"
|
|
17
|
+
assert engine.format == "Parquet"
|
|
18
|
+
assert engine.aws_access_key_id is None
|
|
19
|
+
assert engine.aws_secret_access_key is None
|
|
20
|
+
assert engine.compression is None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_iceberg_engine_with_all_options():
|
|
24
|
+
"""Test IcebergS3Engine with all optional configuration"""
|
|
25
|
+
engine = IcebergS3Engine(
|
|
26
|
+
path="s3://bucket/table/",
|
|
27
|
+
format="ORC",
|
|
28
|
+
aws_access_key_id="AKIATEST",
|
|
29
|
+
aws_secret_access_key="secret123",
|
|
30
|
+
compression="zstd",
|
|
31
|
+
)
|
|
32
|
+
assert engine.path == "s3://bucket/table/"
|
|
33
|
+
assert engine.format == "ORC"
|
|
34
|
+
assert engine.aws_access_key_id == "AKIATEST"
|
|
35
|
+
assert engine.aws_secret_access_key == "secret123"
|
|
36
|
+
assert engine.compression == "zstd"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_iceberg_engine_missing_path():
|
|
40
|
+
"""Test that missing path raises ValueError"""
|
|
41
|
+
with pytest.raises(ValueError, match="IcebergS3 engine requires 'path'"):
|
|
42
|
+
IcebergS3Engine(path="", format="Parquet")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_iceberg_engine_missing_format():
|
|
46
|
+
"""Test that missing format raises ValueError"""
|
|
47
|
+
with pytest.raises(ValueError, match="IcebergS3 engine requires 'format'"):
|
|
48
|
+
IcebergS3Engine(path="s3://bucket/table/", format="")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_iceberg_engine_invalid_format():
|
|
52
|
+
"""Test that invalid format raises ValueError (only Parquet and ORC supported)"""
|
|
53
|
+
with pytest.raises(ValueError, match="format must be 'Parquet' or 'ORC'"):
|
|
54
|
+
IcebergS3Engine(path="s3://bucket/table/", format="JSON")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_iceberg_rejects_order_by():
|
|
58
|
+
"""Test that IcebergS3 engine rejects ORDER BY clauses (read-only external table)"""
|
|
59
|
+
with pytest.raises(
|
|
60
|
+
ValueError, match="IcebergS3Engine does not support ORDER BY clauses"
|
|
61
|
+
):
|
|
62
|
+
OlapConfig(
|
|
63
|
+
engine=IcebergS3Engine(path="s3://bucket/table/", format="Parquet"),
|
|
64
|
+
order_by_fields=["id"],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_iceberg_rejects_partition_by():
|
|
69
|
+
"""Test that IcebergS3 engine rejects PARTITION BY clauses (read-only external table)"""
|
|
70
|
+
with pytest.raises(
|
|
71
|
+
ValueError, match="IcebergS3Engine does not support PARTITION BY clause"
|
|
72
|
+
):
|
|
73
|
+
OlapConfig(
|
|
74
|
+
engine=IcebergS3Engine(path="s3://bucket/table/", format="Parquet"),
|
|
75
|
+
partition_by="toYYYYMM(timestamp)",
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_iceberg_rejects_sample_by():
|
|
80
|
+
"""Test that IcebergS3 engine rejects SAMPLE BY clauses (read-only external table)"""
|
|
81
|
+
with pytest.raises(
|
|
82
|
+
ValueError, match="IcebergS3Engine does not support SAMPLE BY clause"
|
|
83
|
+
):
|
|
84
|
+
OlapConfig(
|
|
85
|
+
engine=IcebergS3Engine(path="s3://bucket/table/", format="Parquet"),
|
|
86
|
+
sample_by_expression="cityHash64(id)",
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def test_iceberg_table_in_olap_table():
|
|
91
|
+
"""Test creating OlapTable with IcebergS3Engine and custom settings"""
|
|
92
|
+
table = OlapTable[SampleData](
|
|
93
|
+
"lake_events",
|
|
94
|
+
OlapConfig(
|
|
95
|
+
engine=IcebergS3Engine(
|
|
96
|
+
path="s3://datalake/events/",
|
|
97
|
+
format="Parquet",
|
|
98
|
+
aws_access_key_id="AKIATEST",
|
|
99
|
+
aws_secret_access_key="secret123",
|
|
100
|
+
)
|
|
101
|
+
),
|
|
102
|
+
)
|
|
103
|
+
assert table.name == "lake_events"
|
|
104
|
+
assert isinstance(table.config.engine, IcebergS3Engine)
|
|
105
|
+
assert table.config.engine.path == "s3://datalake/events/"
|