moose-lib 0.6.148.dev3442438466__py3-none-any.whl → 0.6.283__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. moose_lib/__init__.py +34 -3
  2. moose_lib/blocks.py +416 -52
  3. moose_lib/clients/redis_client.py +26 -14
  4. moose_lib/commons.py +37 -30
  5. moose_lib/config/config_file.py +5 -1
  6. moose_lib/config/runtime.py +73 -34
  7. moose_lib/data_models.py +331 -61
  8. moose_lib/dmv2/__init__.py +69 -73
  9. moose_lib/dmv2/_registry.py +2 -1
  10. moose_lib/dmv2/_source_capture.py +37 -0
  11. moose_lib/dmv2/consumption.py +55 -32
  12. moose_lib/dmv2/ingest_api.py +9 -2
  13. moose_lib/dmv2/ingest_pipeline.py +35 -16
  14. moose_lib/dmv2/life_cycle.py +3 -1
  15. moose_lib/dmv2/materialized_view.py +24 -14
  16. moose_lib/dmv2/moose_model.py +165 -0
  17. moose_lib/dmv2/olap_table.py +299 -151
  18. moose_lib/dmv2/registry.py +18 -3
  19. moose_lib/dmv2/sql_resource.py +16 -8
  20. moose_lib/dmv2/stream.py +75 -23
  21. moose_lib/dmv2/types.py +14 -8
  22. moose_lib/dmv2/view.py +13 -6
  23. moose_lib/dmv2/web_app.py +11 -6
  24. moose_lib/dmv2/web_app_helpers.py +5 -1
  25. moose_lib/dmv2/workflow.py +37 -9
  26. moose_lib/internal.py +340 -56
  27. moose_lib/main.py +87 -56
  28. moose_lib/query_builder.py +18 -5
  29. moose_lib/query_param.py +54 -20
  30. moose_lib/secrets.py +122 -0
  31. moose_lib/streaming/streaming_function_runner.py +233 -117
  32. moose_lib/utilities/sql.py +0 -1
  33. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/METADATA +18 -1
  34. moose_lib-0.6.283.dist-info/RECORD +63 -0
  35. tests/__init__.py +1 -1
  36. tests/conftest.py +6 -5
  37. tests/test_backward_compatibility.py +85 -0
  38. tests/test_cluster_validation.py +85 -0
  39. tests/test_codec.py +75 -0
  40. tests/test_column_formatting.py +80 -0
  41. tests/test_fixedstring.py +43 -0
  42. tests/test_iceberg_config.py +105 -0
  43. tests/test_int_types.py +211 -0
  44. tests/test_kafka_config.py +141 -0
  45. tests/test_materialized.py +74 -0
  46. tests/test_metadata.py +37 -0
  47. tests/test_moose.py +21 -30
  48. tests/test_moose_model.py +153 -0
  49. tests/test_olap_table_moosemodel.py +89 -0
  50. tests/test_olap_table_versioning.py +52 -58
  51. tests/test_query_builder.py +97 -9
  52. tests/test_redis_client.py +10 -3
  53. tests/test_s3queue_config.py +211 -110
  54. tests/test_secrets.py +239 -0
  55. tests/test_simple_aggregate.py +42 -40
  56. tests/test_web_app.py +11 -5
  57. moose_lib-0.6.148.dev3442438466.dist-info/RECORD +0 -47
  58. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/WHEEL +0 -0
  59. {moose_lib-0.6.148.dev3442438466.dist-info → moose_lib-0.6.283.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,211 @@
1
+ from datetime import datetime
2
+ from typing import Annotated
3
+ from pydantic import BaseModel
4
+ from moose_lib import (
5
+ Key,
6
+ Int8,
7
+ Int16,
8
+ Int32,
9
+ Int64,
10
+ UInt8,
11
+ UInt16,
12
+ UInt32,
13
+ UInt64,
14
+ Float32,
15
+ Float64,
16
+ )
17
+ from moose_lib.data_models import _to_columns
18
+
19
+
20
+ def test_integer_type_aliases():
21
+ """Test that integer type aliases convert to correct ClickHouse types."""
22
+
23
+ class IntegerTypesTest(BaseModel):
24
+ id: Key[str]
25
+ created_at: datetime
26
+ tiny_int: Int8
27
+ small_int: Int16
28
+ medium_int: Int32
29
+ big_int: Int64
30
+ tiny_uint: UInt8
31
+ small_uint: UInt16
32
+ medium_uint: UInt32
33
+ big_uint: UInt64
34
+
35
+ columns = _to_columns(IntegerTypesTest)
36
+ by_name = {col.name: col for col in columns}
37
+
38
+ # Verify signed integer types
39
+ assert by_name["tiny_int"].data_type == "Int8"
40
+ assert by_name["small_int"].data_type == "Int16"
41
+ assert by_name["medium_int"].data_type == "Int32"
42
+ assert by_name["big_int"].data_type == "Int64"
43
+
44
+ # Verify unsigned integer types
45
+ assert by_name["tiny_uint"].data_type == "UInt8"
46
+ assert by_name["small_uint"].data_type == "UInt16"
47
+ assert by_name["medium_uint"].data_type == "UInt32"
48
+ assert by_name["big_uint"].data_type == "UInt64"
49
+
50
+ # Verify other fields still work
51
+ assert by_name["id"].data_type == "String"
52
+ assert by_name["created_at"].data_type == "DateTime"
53
+
54
+
55
+ def test_float_type_aliases():
56
+ """Test that float type aliases convert to correct ClickHouse types."""
57
+
58
+ class FloatTypesTest(BaseModel):
59
+ id: Key[str]
60
+ precision_float: Float32
61
+ double_precision_float: Float64
62
+
63
+ columns = _to_columns(FloatTypesTest)
64
+ by_name = {col.name: col for col in columns}
65
+
66
+ assert by_name["precision_float"].data_type == "Float32"
67
+ assert by_name["double_precision_float"].data_type == "Float64"
68
+ assert by_name["id"].data_type == "String"
69
+
70
+
71
+ def test_combined_numeric_types():
72
+ """Test combining integer and float types in a single model."""
73
+
74
+ class NumericTypesTest(BaseModel):
75
+ user_id: UInt64
76
+ age: UInt8
77
+ score: Int32
78
+ latitude: Float64
79
+ longitude: Float64
80
+ precision_value: Float32
81
+
82
+ columns = _to_columns(NumericTypesTest)
83
+ by_name = {col.name: col for col in columns}
84
+
85
+ assert by_name["user_id"].data_type == "UInt64"
86
+ assert by_name["age"].data_type == "UInt8"
87
+ assert by_name["score"].data_type == "Int32"
88
+ assert by_name["latitude"].data_type == "Float64"
89
+ assert by_name["longitude"].data_type == "Float64"
90
+ assert by_name["precision_value"].data_type == "Float32"
91
+
92
+
93
+ def test_integer_types_as_keys():
94
+ """Test that integer types can be used as primary keys."""
95
+
96
+ class IntegerKeyTest(BaseModel):
97
+ user_id: Key[UInt64]
98
+ event_id: Key[Int64]
99
+ name: str
100
+
101
+ columns = _to_columns(IntegerKeyTest)
102
+ by_name = {col.name: col for col in columns}
103
+
104
+ assert by_name["user_id"].data_type == "UInt64"
105
+ assert by_name["user_id"].primary_key is True
106
+ assert by_name["event_id"].data_type == "Int64"
107
+ assert by_name["event_id"].primary_key is True
108
+ assert by_name["name"].data_type == "String"
109
+
110
+
111
+ def test_optional_integer_types():
112
+ """Test that optional integer types work correctly."""
113
+ from typing import Optional
114
+
115
+ class OptionalIntTest(BaseModel):
116
+ required_count: UInt32
117
+ optional_count: Optional[UInt32]
118
+ optional_score: Optional[Int16]
119
+
120
+ columns = _to_columns(OptionalIntTest)
121
+ by_name = {col.name: col for col in columns}
122
+
123
+ assert by_name["required_count"].data_type == "UInt32"
124
+ assert by_name["required_count"].required is True
125
+
126
+ assert by_name["optional_count"].data_type == "UInt32"
127
+ assert by_name["optional_count"].required is False
128
+
129
+ assert by_name["optional_score"].data_type == "Int16"
130
+ assert by_name["optional_score"].required is False
131
+
132
+
133
+ def test_uint_common_use_cases():
134
+ """Test common use cases for unsigned integers."""
135
+
136
+ class CommonUIntUseCases(BaseModel):
137
+ # User/entity IDs (always positive)
138
+ user_id: UInt64
139
+ # Counters (always positive or zero)
140
+ page_views: UInt32
141
+ click_count: UInt32
142
+ # Small enums/flags (0-255)
143
+ status_code: UInt8
144
+ # Port numbers (0-65535)
145
+ port: UInt16
146
+ # Timestamps as unix epoch
147
+ timestamp: UInt64
148
+
149
+ columns = _to_columns(CommonUIntUseCases)
150
+ by_name = {col.name: col for col in columns}
151
+
152
+ assert by_name["user_id"].data_type == "UInt64"
153
+ assert by_name["page_views"].data_type == "UInt32"
154
+ assert by_name["click_count"].data_type == "UInt32"
155
+ assert by_name["status_code"].data_type == "UInt8"
156
+ assert by_name["port"].data_type == "UInt16"
157
+ assert by_name["timestamp"].data_type == "UInt64"
158
+
159
+
160
+ def test_int_common_use_cases():
161
+ """Test common use cases for signed integers."""
162
+
163
+ class CommonIntUseCases(BaseModel):
164
+ # Temperature (can be negative)
165
+ temperature: Int16
166
+ # Financial amounts (can be negative for debits)
167
+ balance: Int64
168
+ # Deltas/differences
169
+ delta: Int32
170
+ # Small range values
171
+ offset: Int8
172
+
173
+ columns = _to_columns(CommonIntUseCases)
174
+ by_name = {col.name: col for col in columns}
175
+
176
+ assert by_name["temperature"].data_type == "Int16"
177
+ assert by_name["balance"].data_type == "Int64"
178
+ assert by_name["delta"].data_type == "Int32"
179
+ assert by_name["offset"].data_type == "Int8"
180
+
181
+
182
+ def test_default_int_still_works():
183
+ """Test that plain int without type annotation still works as before."""
184
+
185
+ class PlainIntTest(BaseModel):
186
+ plain_int: int
187
+ typed_int: UInt32
188
+
189
+ columns = _to_columns(PlainIntTest)
190
+ by_name = {col.name: col for col in columns}
191
+
192
+ # Plain int should still map to "Int" (default behavior)
193
+ assert by_name["plain_int"].data_type == "Int64"
194
+ # Typed int should map to specific type
195
+ assert by_name["typed_int"].data_type == "UInt32"
196
+
197
+
198
+ def test_default_float_still_works():
199
+ """Test that plain float without type annotation still works as before."""
200
+
201
+ class PlainFloatTest(BaseModel):
202
+ plain_float: float
203
+ typed_float: Float32
204
+
205
+ columns = _to_columns(PlainFloatTest)
206
+ by_name = {col.name: col for col in columns}
207
+
208
+ # Plain float should still map to "Float64" (default behavior)
209
+ assert by_name["plain_float"].data_type == "Float64"
210
+ # Typed float should map to specific type
211
+ assert by_name["typed_float"].data_type == "Float32"
@@ -0,0 +1,141 @@
1
+ """Tests for Kafka engine configuration."""
2
+
3
+ import pytest
4
+ from moose_lib import OlapTable, OlapConfig
5
+ from moose_lib.blocks import ClickHouseEngines, KafkaEngine
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class SampleEvent(BaseModel):
10
+ event_id: str
11
+ user_id: str
12
+ timestamp: str
13
+
14
+
15
+ def test_kafka_engine_required_fields():
16
+ engine = KafkaEngine(
17
+ broker_list="kafka:9092",
18
+ topic_list="events",
19
+ group_name="moose_consumer",
20
+ format="JSONEachRow",
21
+ )
22
+ assert engine.broker_list == "kafka:9092"
23
+ assert engine.topic_list == "events"
24
+ assert engine.group_name == "moose_consumer"
25
+ assert engine.format == "JSONEachRow"
26
+
27
+
28
+ def test_kafka_engine_missing_broker_list():
29
+ with pytest.raises(ValueError, match="Kafka engine requires 'broker_list'"):
30
+ KafkaEngine(
31
+ broker_list="", topic_list="events", group_name="c", format="JSONEachRow"
32
+ )
33
+
34
+
35
+ def test_kafka_engine_missing_topic_list():
36
+ with pytest.raises(ValueError, match="Kafka engine requires 'topic_list'"):
37
+ KafkaEngine(
38
+ broker_list="kafka:9092",
39
+ topic_list="",
40
+ group_name="c",
41
+ format="JSONEachRow",
42
+ )
43
+
44
+
45
+ def test_kafka_engine_missing_group_name():
46
+ with pytest.raises(ValueError, match="Kafka engine requires 'group_name'"):
47
+ KafkaEngine(
48
+ broker_list="kafka:9092",
49
+ topic_list="events",
50
+ group_name="",
51
+ format="JSONEachRow",
52
+ )
53
+
54
+
55
+ def test_kafka_engine_missing_format():
56
+ with pytest.raises(ValueError, match="Kafka engine requires 'format'"):
57
+ KafkaEngine(
58
+ broker_list="kafka:9092", topic_list="events", group_name="c", format=""
59
+ )
60
+
61
+
62
+ def test_kafka_engine_rejects_order_by():
63
+ with pytest.raises(ValueError, match="KafkaEngine does not support ORDER BY"):
64
+ OlapTable[SampleEvent](
65
+ "kafka_table",
66
+ OlapConfig(
67
+ engine=KafkaEngine(
68
+ broker_list="kafka:9092",
69
+ topic_list="events",
70
+ group_name="c",
71
+ format="JSONEachRow",
72
+ ),
73
+ order_by_fields=["event_id"],
74
+ ),
75
+ )
76
+
77
+
78
+ def test_kafka_engine_rejects_partition_by():
79
+ with pytest.raises(ValueError, match="KafkaEngine does not support PARTITION BY"):
80
+ OlapTable[SampleEvent](
81
+ "kafka_table",
82
+ OlapConfig(
83
+ engine=KafkaEngine(
84
+ broker_list="kafka:9092",
85
+ topic_list="events",
86
+ group_name="c",
87
+ format="JSONEachRow",
88
+ ),
89
+ partition_by="toYYYYMM(timestamp)",
90
+ ),
91
+ )
92
+
93
+
94
+ def test_kafka_engine_rejects_sample_by():
95
+ with pytest.raises(ValueError, match="KafkaEngine does not support SAMPLE BY"):
96
+ OlapTable[SampleEvent](
97
+ "kafka_table",
98
+ OlapConfig(
99
+ engine=KafkaEngine(
100
+ broker_list="kafka:9092",
101
+ topic_list="events",
102
+ group_name="c",
103
+ format="JSONEachRow",
104
+ ),
105
+ sample_by_expression="event_id",
106
+ ),
107
+ )
108
+
109
+
110
+ def test_kafka_engine_accepts_valid_config():
111
+ table = OlapTable[SampleEvent](
112
+ "kafka_table",
113
+ OlapConfig(
114
+ engine=KafkaEngine(
115
+ broker_list="kafka:9092",
116
+ topic_list="events",
117
+ group_name="c",
118
+ format="JSONEachRow",
119
+ ),
120
+ settings={"kafka_num_consumers": "2"},
121
+ ),
122
+ )
123
+ assert table.name == "kafka_table"
124
+ assert isinstance(table.config.engine, KafkaEngine)
125
+ assert table.config.settings["kafka_num_consumers"] == "2"
126
+
127
+
128
+ def test_kafka_engine_serialization():
129
+ from moose_lib.internal import _convert_engine_instance_to_config_dict
130
+
131
+ engine = KafkaEngine(
132
+ broker_list="kafka-1:9092,kafka-2:9092",
133
+ topic_list="events,logs",
134
+ group_name="moose_group",
135
+ format="JSONEachRow",
136
+ )
137
+ config_dict = _convert_engine_instance_to_config_dict(engine)
138
+
139
+ assert config_dict.engine == "Kafka"
140
+ assert config_dict.broker_list == "kafka-1:9092,kafka-2:9092"
141
+ assert config_dict.topic_list == "events,logs"
@@ -0,0 +1,74 @@
1
+ from datetime import datetime, date
2
+ from typing import Annotated, Any
3
+ from pydantic import BaseModel
4
+ from moose_lib import Key, ClickHouseMaterialized, ClickHouseCodec, UInt64
5
+ from moose_lib.data_models import _to_columns
6
+ import pytest
7
+
8
+
9
+ def test_materialized_basic():
10
+ """Test basic MATERIALIZED annotation converts to correct expression."""
11
+
12
+ class MaterializedTest(BaseModel):
13
+ timestamp: datetime
14
+ event_date: Annotated[date, ClickHouseMaterialized("toDate(timestamp)")]
15
+
16
+ columns = _to_columns(MaterializedTest)
17
+ by_name = {col.name: col for col in columns}
18
+
19
+ assert by_name["timestamp"].materialized is None
20
+ assert by_name["event_date"].materialized == "toDate(timestamp)"
21
+
22
+
23
+ def test_materialized_hash():
24
+ """Test MATERIALIZED with hash function."""
25
+
26
+ class HashTest(BaseModel):
27
+ user_id: str
28
+ user_hash: Annotated[UInt64, ClickHouseMaterialized("cityHash64(user_id)")]
29
+
30
+ columns = _to_columns(HashTest)
31
+ by_name = {col.name: col for col in columns}
32
+
33
+ assert by_name["user_id"].materialized is None
34
+ assert by_name["user_hash"].materialized == "cityHash64(user_id)"
35
+
36
+
37
+ def test_materialized_with_codec():
38
+ """Test MATERIALIZED combined with CODEC."""
39
+
40
+ class MaterializedCodecTest(BaseModel):
41
+ log_blob: Annotated[Any, ClickHouseCodec("ZSTD(3)")]
42
+ combination_hash: Annotated[
43
+ list[UInt64],
44
+ ClickHouseMaterialized(
45
+ "arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))"
46
+ ),
47
+ ClickHouseCodec("ZSTD(1)"),
48
+ ]
49
+
50
+ columns = _to_columns(MaterializedCodecTest)
51
+ by_name = {col.name: col for col in columns}
52
+
53
+ assert by_name["log_blob"].materialized is None
54
+ assert by_name["log_blob"].codec == "ZSTD(3)"
55
+ assert (
56
+ by_name["combination_hash"].materialized
57
+ == "arrayMap(kv -> cityHash64(kv.1, kv.2), JSONExtractKeysAndValuesRaw(toString(log_blob)))"
58
+ )
59
+ assert by_name["combination_hash"].codec == "ZSTD(1)"
60
+
61
+
62
+ def test_materialized_mutually_exclusive_with_default():
63
+ """Test that MATERIALIZED and DEFAULT are mutually exclusive."""
64
+ from moose_lib import clickhouse_default
65
+
66
+ class BadModel(BaseModel):
67
+ bad_field: Annotated[
68
+ str,
69
+ clickhouse_default("'default_value'"),
70
+ ClickHouseMaterialized("'materialized_value'"),
71
+ ]
72
+
73
+ with pytest.raises(ValueError, match="cannot have both DEFAULT and MATERIALIZED"):
74
+ _to_columns(BadModel)
tests/test_metadata.py ADDED
@@ -0,0 +1,37 @@
1
+ """Tests for metadata handling in OlapTable."""
2
+
3
+ from moose_lib.dmv2 import OlapTable, OlapConfig
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class SampleModel(BaseModel):
8
+ """Sample model for testing metadata."""
9
+
10
+ id: str
11
+ name: str
12
+
13
+
14
+ def test_respect_user_provided_source():
15
+ """Test that user-provided source file path is not overwritten."""
16
+
17
+ user_provided_path = "custom/path/to/model.py"
18
+
19
+ config = OlapConfig(metadata={"source": {"file": user_provided_path}})
20
+
21
+ table = OlapTable[SampleModel]("test_user_provided", config=config)
22
+
23
+ assert table.metadata is not None
24
+ assert table.metadata["source"]["file"] == user_provided_path
25
+
26
+
27
+ def test_preserve_metadata_with_auto_capture():
28
+ """Test that user metadata is preserved while auto-capturing source."""
29
+
30
+ config = OlapConfig(metadata={"description": "A test table"})
31
+
32
+ table = OlapTable[SampleModel]("test_preserve_metadata", config=config)
33
+
34
+ assert table.metadata is not None
35
+ assert isinstance(table.metadata, dict)
36
+ assert table.metadata["description"] == "A test table"
37
+ assert "test_metadata.py" in table.metadata["source"]["file"]
tests/test_moose.py CHANGED
@@ -3,8 +3,13 @@ from dataclasses import dataclass
3
3
  from typing import Optional
4
4
  from pydantic import BaseModel
5
5
 
6
- from moose_lib.query_param import convert_pydantic_definition, QueryField, ArrayType, map_params_to_class, \
7
- convert_dataclass_definition
6
+ from moose_lib.query_param import (
7
+ convert_pydantic_definition,
8
+ QueryField,
9
+ ArrayType,
10
+ map_params_to_class,
11
+ convert_dataclass_definition,
12
+ )
8
13
 
9
14
 
10
15
  @dataclass
@@ -24,28 +29,28 @@ class QueryParamPydantic(BaseModel):
24
29
 
25
30
  query_fields = [
26
31
  QueryField(
27
- name='optional_field',
28
- data_type='Int',
32
+ name="optional_field",
33
+ data_type="Int64",
29
34
  has_default=False,
30
35
  required=False,
31
36
  ),
32
37
  QueryField(
33
- name='date_field',
34
- data_type='DateTime',
38
+ name="date_field",
39
+ data_type="DateTime",
35
40
  has_default=False,
36
41
  required=True,
37
42
  ),
38
43
  QueryField(
39
- name='list_field',
44
+ name="list_field",
40
45
  data_type=ArrayType(
41
- element_type='String',
46
+ element_type="String",
42
47
  ),
43
48
  has_default=False,
44
49
  required=True,
45
50
  ),
46
51
  QueryField(
47
- name='int_field',
48
- data_type='Int',
52
+ name="int_field",
53
+ data_type="Int64",
49
54
  has_default=True,
50
55
  required=False,
51
56
  ),
@@ -58,34 +63,20 @@ parsed_date = datetime.datetime.fromisoformat(datestr)
58
63
  def test_pydantic():
59
64
  assert convert_pydantic_definition(QueryParamPydantic) == query_fields
60
65
  assert map_params_to_class(
61
- {
62
- "date_field": [datestr],
63
- "list_field": ["123"],
64
- "int_field": ["1"]
65
- },
66
+ {"date_field": [datestr], "list_field": ["123"], "int_field": ["1"]},
66
67
  query_fields,
67
- QueryParamPydantic
68
+ QueryParamPydantic,
68
69
  ) == QueryParamPydantic(
69
- optional_field=None,
70
- date_field=parsed_date,
71
- list_field=["123"],
72
- int_field=1
70
+ optional_field=None, date_field=parsed_date, list_field=["123"], int_field=1
73
71
  )
74
72
 
75
73
 
76
74
  def test_dataclass():
77
75
  assert convert_dataclass_definition(QueryParamDataClass) == query_fields
78
76
  assert map_params_to_class(
79
- {
80
- "date_field": [datestr],
81
- "list_field": ["123"],
82
- "int_field": ["1"]
83
- },
77
+ {"date_field": [datestr], "list_field": ["123"], "int_field": ["1"]},
84
78
  query_fields,
85
- QueryParamDataClass
79
+ QueryParamDataClass,
86
80
  ) == QueryParamDataClass(
87
- optional_field=None,
88
- date_field=parsed_date,
89
- list_field=["123"],
90
- int_field=1
81
+ optional_field=None, date_field=parsed_date, list_field=["123"], int_field=1
91
82
  )