PyPI - moose-lib - Versions diffs - 0.6.157__py3-none-any.whl → 0.6.162__py3-none-any.whl - Mend

moose-lib 0.6.157py3-none-any.whl → 0.6.162py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

moose_lib/__init__.py +2 -0
moose_lib/blocks.py +107 -0
moose_lib/dmv2/olap_table.py +34 -0
moose_lib/internal.py +86 -3
moose_lib/secrets.py +100 -0
{moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/METADATA +1 -1
{moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/RECORD +11 -9
tests/test_s3queue_config.py +167 -38
tests/test_secrets.py +221 -0
{moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/WHEEL +0 -0
{moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/top_level.txt +0 -0

moose_lib/__init__.py CHANGED Viewed

@@ -4,6 +4,8 @@ from .blocks import *
 from .commons import *
+from .secrets import moose_runtime_env
 from .data_models import *
 from .dmv2 import *

moose_lib/blocks.py CHANGED Viewed

@@ -14,6 +14,9 @@ class ClickHouseEngines(Enum):
     VersionedCollapsingMergeTree = "VersionedCollapsingMergeTree"
     GraphiteMergeTree = "GraphiteMergeTree"
     S3Queue = "S3Queue"
+    S3 = "S3"
+    Buffer = "Buffer"
+    Distributed = "Distributed"
     ReplicatedMergeTree = "ReplicatedMergeTree"
     ReplicatedReplacingMergeTree = "ReplicatedReplacingMergeTree"
     ReplicatedAggregatingMergeTree = "ReplicatedAggregatingMergeTree"
@@ -177,6 +180,110 @@ class S3QueueEngine(EngineConfig):
         if not self.format:
             raise ValueError("S3Queue engine requires 'format'")
+@dataclass
+class S3Engine(EngineConfig):
+    """Configuration for S3 engine - direct read/write from S3 storage.
+    Args:
+        path: S3 path to the data file(s) (e.g., 's3://bucket/path/file.json')
+        format: Data format (e.g., 'JSONEachRow', 'CSV', 'Parquet')
+        aws_access_key_id: AWS access key ID (optional, omit for public buckets)
+        aws_secret_access_key: AWS secret access key (optional, omit for public buckets)
+        compression: Compression type (e.g., 'gzip', 'zstd', 'auto')
+        partition_strategy: Optional partition strategy
+        partition_columns_in_data_file: Optional partition columns in data file
+    """
+    # Required fields
+    path: str
+    format: str
+    # Optional fields
+    aws_access_key_id: Optional[str] = None
+    aws_secret_access_key: Optional[str] = None
+    compression: Optional[str] = None
+    partition_strategy: Optional[str] = None
+    partition_columns_in_data_file: Optional[str] = None
+    def __post_init__(self):
+        """Validate required fields"""
+        if not self.path:
+            raise ValueError("S3 engine requires 'path'")
+        if not self.format:
+            raise ValueError("S3 engine requires 'format'")
+@dataclass
+class BufferEngine(EngineConfig):
+    """Configuration for Buffer engine - in-memory buffer that flushes to a destination table.
+    Args:
+        target_database: Target database name for the destination table
+        target_table: Target table name where data will be flushed
+        num_layers: Number of buffer layers (typically 16)
+        min_time: Minimum time in seconds before flushing
+        max_time: Maximum time in seconds before flushing
+        min_rows: Minimum number of rows before flushing
+        max_rows: Maximum number of rows before flushing
+        min_bytes: Minimum bytes before flushing
+        max_bytes: Maximum bytes before flushing
+        flush_time: Optional flush time in seconds
+        flush_rows: Optional flush number of rows
+        flush_bytes: Optional flush number of bytes
+    """
+    # Required fields
+    target_database: str
+    target_table: str
+    num_layers: int
+    min_time: int
+    max_time: int
+    min_rows: int
+    max_rows: int
+    min_bytes: int
+    max_bytes: int
+    # Optional fields
+    flush_time: Optional[int] = None
+    flush_rows: Optional[int] = None
+    flush_bytes: Optional[int] = None
+    def __post_init__(self):
+        """Validate required fields"""
+        if not self.target_database:
+            raise ValueError("Buffer engine requires 'target_database'")
+        if not self.target_table:
+            raise ValueError("Buffer engine requires 'target_table'")
+@dataclass
+class DistributedEngine(EngineConfig):
+    """Configuration for Distributed engine - distributed table across a cluster.
+    Args:
+        cluster: Cluster name from the ClickHouse configuration
+        target_database: Database name on the cluster
+        target_table: Table name on the cluster
+        sharding_key: Optional sharding key expression for data distribution
+        policy_name: Optional policy name for data distribution
+    """
+    # Required fields
+    cluster: str
+    target_database: str
+    target_table: str
+    # Optional fields
+    sharding_key: Optional[str] = None
+    policy_name: Optional[str] = None
+    def __post_init__(self):
+        """Validate required fields"""
+        if not self.cluster:
+            raise ValueError("Distributed engine requires 'cluster'")
+        if not self.target_database:
+            raise ValueError("Distributed engine requires 'target_database'")
+        if not self.target_table:
+            raise ValueError("Distributed engine requires 'target_table'")
 # ==========================
 # New Table Configuration (Recommended API)
 # ==========================

moose_lib/dmv2/olap_table.py CHANGED Viewed

@@ -143,6 +143,7 @@ class OlapConfig(BaseModel):
         granularity: int = 1
     indexes: list[TableIndex] = []
+    database: Optional[str] = None  # Optional database name for multi-database support
     def model_post_init(self, __context):
         has_fields = bool(self.order_by_fields)
@@ -150,6 +151,39 @@ class OlapConfig(BaseModel):
         if has_fields and has_expr:
             raise ValueError("Provide either order_by_fields or order_by_expression, not both.")
+        # Validate that non-MergeTree engines don't have unsupported clauses
+        if self.engine:
+            from ..blocks import S3Engine, S3QueueEngine, BufferEngine, DistributedEngine
+            # All non-MergeTree engines don't support ORDER BY
+            non_mergetree_engines = (S3Engine, S3QueueEngine, BufferEngine, DistributedEngine)
+            if isinstance(self.engine, non_mergetree_engines):
+                engine_name = type(self.engine).__name__
+                if has_fields or has_expr:
+                    raise ValueError(
+                        f"{engine_name} does not support ORDER BY clauses. "
+                        f"Remove order_by_fields or order_by_expression from your configuration."
+                    )
+                if self.sample_by_expression:
+                    raise ValueError(
+                        f"{engine_name} does not support SAMPLE BY clause. "
+                        f"Remove sample_by_expression from your configuration."
+                    )
+            # Only S3QueueEngine, BufferEngine, and DistributedEngine don't support PARTITION BY
+            # S3Engine DOES support PARTITION BY
+            engines_without_partition_by = (S3QueueEngine, BufferEngine, DistributedEngine)
+            if isinstance(self.engine, engines_without_partition_by):
+                engine_name = type(self.engine).__name__
+                if self.partition_by:
+                    raise ValueError(
+                        f"{engine_name} does not support PARTITION BY clause. "
+                        f"Remove partition_by from your configuration."
+                    )
 class OlapTable(TypedMooseResource, Generic[T]):
     """Represents an OLAP table (e.g., a ClickHouse table) typed with a Pydantic model.

moose_lib/internal.py CHANGED Viewed

@@ -128,6 +128,45 @@ class S3QueueConfigDict(BaseEngineConfigDict):
     headers: Optional[Dict[str, str]] = None
+class S3ConfigDict(BaseEngineConfigDict):
+    """Configuration for S3 engine."""
+    engine: Literal["S3"] = "S3"
+    path: str
+    format: str
+    aws_access_key_id: Optional[str] = None
+    aws_secret_access_key: Optional[str] = None
+    compression: Optional[str] = None
+    partition_strategy: Optional[str] = None
+    partition_columns_in_data_file: Optional[str] = None
+class BufferConfigDict(BaseEngineConfigDict):
+    """Configuration for Buffer engine."""
+    engine: Literal["Buffer"] = "Buffer"
+    target_database: str
+    target_table: str
+    num_layers: int
+    min_time: int
+    max_time: int
+    min_rows: int
+    max_rows: int
+    min_bytes: int
+    max_bytes: int
+    flush_time: Optional[int] = None
+    flush_rows: Optional[int] = None
+    flush_bytes: Optional[int] = None
+class DistributedConfigDict(BaseEngineConfigDict):
+    """Configuration for Distributed engine."""
+    engine: Literal["Distributed"] = "Distributed"
+    cluster: str
+    target_database: str
+    target_table: str
+    sharding_key: Optional[str] = None
+    policy_name: Optional[str] = None
 # Discriminated union of all engine configurations
 EngineConfigDict = Union[
     MergeTreeConfigDict,
@@ -138,7 +177,10 @@ EngineConfigDict = Union[
     ReplicatedReplacingMergeTreeConfigDict,
     ReplicatedAggregatingMergeTreeConfigDict,
     ReplicatedSummingMergeTreeConfigDict,
-    S3QueueConfigDict
+    S3QueueConfigDict,
+    S3ConfigDict,
+    BufferConfigDict,
+    DistributedConfigDict
 ]
@@ -171,6 +213,7 @@ class TableConfig(BaseModel):
     table_settings: Optional[dict[str, str]] = None
     indexes: list[OlapConfig.TableIndex] = []
     ttl: Optional[str] = None
+    database: Optional[str] = None
 class TopicConfig(BaseModel):
@@ -463,7 +506,7 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
     Returns:
         EngineConfigDict with engine-specific configuration
     """
-    from moose_lib.blocks import S3QueueEngine
+    from moose_lib.blocks import S3QueueEngine, S3Engine, BufferEngine, DistributedEngine
     # Try S3Queue first
     if isinstance(engine, S3QueueEngine):
@@ -475,6 +518,45 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon
             compression=engine.compression,
             headers=engine.headers
         )
+    # Try S3
+    if isinstance(engine, S3Engine):
+        return S3ConfigDict(
+            path=engine.path,
+            format=engine.format,
+            aws_access_key_id=engine.aws_access_key_id,
+            aws_secret_access_key=engine.aws_secret_access_key,
+            compression=engine.compression,
+            partition_strategy=engine.partition_strategy,
+            partition_columns_in_data_file=engine.partition_columns_in_data_file
+        )
+    # Try Buffer
+    if isinstance(engine, BufferEngine):
+        return BufferConfigDict(
+            target_database=engine.target_database,
+            target_table=engine.target_table,
+            num_layers=engine.num_layers,
+            min_time=engine.min_time,
+            max_time=engine.max_time,
+            min_rows=engine.min_rows,
+            max_rows=engine.max_rows,
+            min_bytes=engine.min_bytes,
+            max_bytes=engine.max_bytes,
+            flush_time=engine.flush_time,
+            flush_rows=engine.flush_rows,
+            flush_bytes=engine.flush_bytes
+        )
+    # Try Distributed
+    if isinstance(engine, DistributedEngine):
+        return DistributedConfigDict(
+            cluster=engine.cluster,
+            target_database=engine.target_database,
+            target_table=engine.target_table,
+            sharding_key=engine.sharding_key,
+            policy_name=engine.policy_name
+        )
     # Try basic engines
     basic_config = _convert_basic_engine_instance(engine)
@@ -613,6 +695,7 @@ def to_infra_map() -> dict:
             table_settings=table_settings if table_settings else None,
             indexes=table.config.indexes,
             ttl=table.config.ttl,
+            database=table.config.database,
         )
     for name, stream in get_streams().items():
@@ -716,7 +799,7 @@ def to_infra_map() -> dict:
         web_apps=web_apps
     )
-    return infra_map.model_dump(by_alias=True)
+    return infra_map.model_dump(by_alias=True, exclude_none=False)
 def load_models():

moose_lib/secrets.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""Utilities for runtime environment variable resolution.
+This module provides functionality to mark values that should be resolved
+from environment variables at runtime by the Moose CLI, rather than being
+embedded at build time.
+Example:
+    >>> from moose_lib import S3QueueEngine, moose_runtime_env
+    >>>
+    >>> engine = S3QueueEngine(
+    ...     s3_path="s3://bucket/data/*.json",
+    ...     format="JSONEachRow",
+    ...     aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"),
+    ...     aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY")
+    ... )
+"""
+#: Prefix used to mark values for runtime environment variable resolution.
+MOOSE_RUNTIME_ENV_PREFIX = "__MOOSE_RUNTIME_ENV__:"
+def get(env_var_name: str) -> str:
+    """Marks a value to be resolved from an environment variable at runtime.
+    When you use this function, the value is not read immediately. Instead,
+    a special marker is created that the Moose CLI will resolve when it
+    processes your infrastructure configuration.
+    This is useful for:
+    - Credentials that should never be embedded in Docker images
+    - Configuration that can be rotated without rebuilding
+    - Different values for different environments (dev, staging, prod)
+    - Any runtime configuration in infrastructure elements (Tables, Topics, etc.)
+    Args:
+        env_var_name: Name of the environment variable to resolve
+    Returns:
+        A marker string that Moose CLI will resolve at runtime
+    Raises:
+        ValueError: If the environment variable name is empty
+    Example:
+        >>> # Instead of this (evaluated at build time):
+        >>> import os
+        >>> aws_key = os.environ.get("AWS_ACCESS_KEY_ID")
+        >>>
+        >>> # Use this (evaluated at runtime):
+        >>> aws_key = moose_runtime_env.get("AWS_ACCESS_KEY_ID")
+    """
+    if not env_var_name or not env_var_name.strip():
+        raise ValueError("Environment variable name cannot be empty")
+    return f"{MOOSE_RUNTIME_ENV_PREFIX}{env_var_name}"
+class MooseRuntimeEnv:
+    """Utilities for marking values to be resolved from environment variables at runtime.
+    This class provides a namespace for runtime environment variable resolution.
+    Use the singleton instance `moose_runtime_env` rather than instantiating this class directly.
+    Attributes:
+        get: Static method for creating runtime environment variable markers
+    """
+    @staticmethod
+    def get(env_var_name: str) -> str:
+        """Marks a value to be resolved from an environment variable at runtime.
+        Args:
+            env_var_name: Name of the environment variable to resolve
+        Returns:
+            A marker string that Moose CLI will resolve at runtime
+        Raises:
+            ValueError: If the environment variable name is empty
+        """
+        return get(env_var_name)
+# Export singleton instance for module-level access
+moose_runtime_env = MooseRuntimeEnv()
+# Legacy exports for backwards compatibility
+MooseEnvSecrets = MooseRuntimeEnv  # Deprecated: Use MooseRuntimeEnv instead
+moose_env_secrets = moose_runtime_env  # Deprecated: Use moose_runtime_env instead
+MOOSE_ENV_SECRET_PREFIX = MOOSE_RUNTIME_ENV_PREFIX  # Deprecated: Use MOOSE_RUNTIME_ENV_PREFIX instead
+__all__ = [
+    "moose_runtime_env",
+    "MooseRuntimeEnv",
+    "get",
+    "MOOSE_RUNTIME_ENV_PREFIX",
+    # Legacy exports (deprecated)
+    "moose_env_secrets",
+    "MooseEnvSecrets",
+    "MOOSE_ENV_SECRET_PREFIX",
+]

{moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: moose_lib
-Version: 0.6.157
+Version: 0.6.162
 Home-page: https://www.fiveonefour.com/moose
 Author: Fiveonefour Labs Inc.
 Author-email: support@fiveonefour.com

{moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,13 @@
-moose_lib/__init__.py,sha256=swNNCec7czJO62pYKeRyonMR4QlzgCN74GLFFI3NDxs,917
-moose_lib/blocks.py,sha256=Cwjx7UX5qXLVpxF5eBomBLzIoxVn769jfZRJfJnqPyw,14423
+moose_lib/__init__.py,sha256=1wG0Y8VGAdRpMCsTuHhmbltAUYCsGH-a8v8W3sN1gQ8,957
+moose_lib/blocks.py,sha256=TJfaBJjDbCjGBVGpcV2HwVLWxiuqBC9lhx7FXCCbfsE,18238
 moose_lib/commons.py,sha256=YDMzRO3ySa_iQRknmFwwE539KgSyjWhYJ-E3jIsfSgc,6585
 moose_lib/data_models.py,sha256=IovDWmdHdoG7M3QmMbvWg7wDzrLlU-pea3CDoF9mShA,17615
 moose_lib/dmv2_serializer.py,sha256=CL_Pvvg8tJOT8Qk6hywDNzY8MYGhMVdTOw8arZi3jng,49
-moose_lib/internal.py,sha256=FMd4GuQ2uRXiV7_9JrTYN1xx2HFagMbEY9uu_Kb4omg,27742
+moose_lib/internal.py,sha256=7T6PQyU2QLquSSPujO37e9t0tC_QYXpIK2CQom69li4,30453
 moose_lib/main.py,sha256=JWsgza52xEh25AyF61cO1ItJ8VXJHHz8j-4HG445Whg,20380
 moose_lib/query_builder.py,sha256=-L5p2dArBx3SBA-WZPkcCJPemKXnqJw60NHy-wn5wa4,6619
 moose_lib/query_param.py,sha256=kxcR09BMIsEg4o2qetjKrVu1YFRaLfMEzwzyGsKUpvA,6474
+moose_lib/secrets.py,sha256=upFjLorfAsiyL3F3UIl2STUFGjC6Pu0-Qe1FGs5S09c,3480
 moose_lib/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 moose_lib/clients/redis_client.py,sha256=BDYjJ582V-rW92qVQ4neZ9Pu7JtDNt8x8jBWApt1XUg,11895
 moose_lib/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,7 +20,7 @@ moose_lib/dmv2/ingest_api.py,sha256=XhvHHgGPXp-BuRpAALth-FRhanwy-zJQ_83Cg_RLolM,
 moose_lib/dmv2/ingest_pipeline.py,sha256=R8NRrfD9zkEzwWEJKIrNL9Nk18DdGc7wdAakae_2-7o,8459
 moose_lib/dmv2/life_cycle.py,sha256=wl0k6yzwU1MJ_fO_UkN29buoY5G6ChYZvfwigP9fVfM,1254
 moose_lib/dmv2/materialized_view.py,sha256=wTam1V5CC2rExt7YrdK_Cz4rRTONm2keKOF951LlCP4,4875
-moose_lib/dmv2/olap_table.py,sha256=sj3nsi-reIFEdykEYGheA3zYhoFZqVOOwt5RaMXn4J0,35755
+moose_lib/dmv2/olap_table.py,sha256=AfKTH6kabyE2qzrz-9C9BdDHyQor_JOTfIqplAkP56k,37474
 moose_lib/dmv2/registry.py,sha256=5V8HRKBJXKLIi5tdIC2tKEmsGELQ1C2fBrMjns8a3ao,2947
 moose_lib/dmv2/sql_resource.py,sha256=kUZoGqxhZMHMthtBZGYJBxTFjXkspXiWLXhJRYXgGUM,1864
 moose_lib/dmv2/stream.py,sha256=pDryS1x5zMkDSJsFlGqm8quwKTl1ctAL-a_U8ySgqQI,18043
@@ -38,10 +39,11 @@ tests/test_moose.py,sha256=mBsx_OYWmL8ppDzL_7Bd7xR6qf_i3-pCIO3wm2iQNaA,2136
 tests/test_olap_table_versioning.py,sha256=ffuJsO5TYKBd2aWV0GWDmosCwL4j518Y_MqYJdrfgDY,7041
 tests/test_query_builder.py,sha256=O3imdFSaqU13kbK1jSQaHbBgynhVmJaApT8DlRqYwJU,1116
 tests/test_redis_client.py,sha256=d9_MLYsJ4ecVil_jPB2gW3Q5aWnavxmmjZg2uYI3LVo,3256
-tests/test_s3queue_config.py,sha256=F05cnD61S2wBKPabcpEJxf55-DJGF4nLqwBb6aFbprc,9741
+tests/test_s3queue_config.py,sha256=TSXSTAjcAVpZuvQza2i0XWGQTeaNs4mRZeH4pFCujNY,14728
+tests/test_secrets.py,sha256=g5ZjtOyT7vTz_sRUB71Vd6s715u0gMc4C-_8m_59Mtg,8347
 tests/test_simple_aggregate.py,sha256=yyFSYHUXskA1aTcwC-KQ9XmgOikeQ8wKXzntsUBIC6w,4055
 tests/test_web_app.py,sha256=iL86sg0NS2k6nP8jIGKZvrtg0BjvaqNTRp7-4T1TTck,6557
-moose_lib-0.6.157.dist-info/METADATA,sha256=o5cJlnVOp85VQQURjAEzuxYtxX6-AdBbbr7Gy_iRsq4,827
-moose_lib-0.6.157.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-moose_lib-0.6.157.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
-moose_lib-0.6.157.dist-info/RECORD,,
+moose_lib-0.6.162.dist-info/METADATA,sha256=dFe4T9iDSF2Ik79cm_6VsP3OPEqGB8avr-QsA2i4rCY,827
+moose_lib-0.6.162.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+moose_lib-0.6.162.dist-info/top_level.txt,sha256=XEns2-4aCmGp2XjJAeEH9TAUcGONLnSLy6ycT9FSJh8,16
+moose_lib-0.6.162.dist-info/RECORD,,

tests/test_s3queue_config.py CHANGED Viewed

@@ -8,7 +8,7 @@ import warnings
 from moose_lib import OlapTable, OlapConfig, ClickHouseEngines
 from moose_lib.blocks import S3QueueEngine, MergeTreeEngine, ReplacingMergeTreeEngine
 from moose_lib.internal import (
-    _convert_engine_to_config_dict,
+    _convert_engine_to_config_dict,
     EngineConfigDict,
     S3QueueConfigDict,
     MergeTreeConfigDict,
@@ -41,8 +41,8 @@ def test_olap_config_accepts_engine_config():
         aws_secret_access_key="secret123"
     )
     config = OlapConfig(
-        engine=s3_engine,
-        order_by_fields=["timestamp"]
+        engine=s3_engine
+        # Note: S3QueueEngine does not support order_by_fields
     )
     assert isinstance(config.engine, S3QueueEngine)
     assert config.engine.s3_path == "s3://bucket/data/*.json"
@@ -58,14 +58,14 @@ def test_olap_table_with_s3queue_engine():
                 format="JSONEachRow",
                 compression="gzip"
             ),
-            order_by_fields=["timestamp", "id"],
+            # Note: S3QueueEngine does not support order_by_fields
             settings={
                 "s3queue_mode": "unordered",
                 "s3queue_keeper_path": "/clickhouse/s3queue/test"
             }
         )
     )
     assert table.name == "TestS3Table"
     assert isinstance(table.config.engine, S3QueueEngine)
     assert table.config.engine.s3_path == "s3://test-bucket/logs/*.json"
@@ -83,7 +83,7 @@ def test_olap_table_with_mergetree_engines():
         )
     )
     assert isinstance(table1.config.engine, MergeTreeEngine)
     # Test with ReplacingMergeTreeEngine
     table2 = OlapTable[SampleEvent](
         "ReplacingTable",
@@ -111,10 +111,10 @@ def test_engine_conversion_to_dict():
             )
         )
     )
     # Convert engine to dict
     engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
     assert engine_dict.engine == "S3Queue"
     assert engine_dict.s3_path == "s3://bucket/data/*.parquet"
     assert engine_dict.format == "Parquet"
@@ -133,10 +133,10 @@ def test_engine_conversion_with_enum():
             engine=ClickHouseEngines.ReplacingMergeTree
         )
     )
     # Convert engine to dict
     engine_dict = _convert_engine_to_config_dict(table.config.engine, table)
     assert engine_dict.engine == "ReplacingMergeTree"
@@ -150,16 +150,16 @@ def test_backward_compatibility():
             order_by_fields=["id"]
         )
     )
     # New API with EngineConfig
     new_table = OlapTable[SampleEvent](
-        "NewTable",
+        "NewTable",
         OlapConfig(
             engine=MergeTreeEngine(),
             order_by_fields=["id"]
         )
     )
     # Both should work
     assert old_table.config.engine == ClickHouseEngines.MergeTree
     assert isinstance(new_table.config.engine, MergeTreeEngine)
@@ -169,7 +169,7 @@ def test_deprecation_warning_for_enum():
     """Test that using enum engine triggers deprecation warning."""
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
         table = OlapTable[SampleEvent](
             "LegacyTable",
             OlapConfig(
@@ -177,7 +177,7 @@ def test_deprecation_warning_for_enum():
                 order_by_fields=["id"]
             )
         )
         # Check that a deprecation warning was issued
         assert len(w) == 1
         assert issubclass(w[0].category, DeprecationWarning)
@@ -198,12 +198,12 @@ def test_s3queue_with_all_options():
             "Authorization": "Bearer token"
         }
     )
     table = OlapTable[SampleEvent](
         "FullConfigTable",
         OlapConfig(
             engine=engine,
-            order_by_fields=["timestamp"],
+            # Note: S3QueueEngine does not support order_by_fields
             settings={
                 "s3queue_mode": "ordered",
                 "s3queue_keeper_path": "/clickhouse/s3queue/full",
@@ -212,7 +212,7 @@ def test_s3queue_with_all_options():
             }
         )
     )
     assert table.config.engine.s3_path == "s3://my-bucket/path/to/data/*.json"
     assert table.config.engine.format == "JSONEachRow"
     assert table.config.engine.compression == "gzip"
@@ -226,42 +226,49 @@ def test_s3queue_public_bucket():
         format="Parquet"
         # No AWS credentials needed for public buckets
     )
     table = OlapTable[SampleEvent](
         "PublicBucketTable",
         OlapConfig(
-            engine=engine,
-            order_by_fields=["id"]
+            engine=engine
+            # Note: S3QueueEngine does not support order_by_fields
         )
     )
     assert table.config.engine.aws_access_key_id is None
     assert table.config.engine.aws_secret_access_key is None
 def test_migration_from_legacy_to_new():
     """Test migration path from legacy to new API."""
-    # Legacy approach
+    # Legacy approach (with MergeTree, which supports order_by_fields)
     legacy_config = OlapConfig(
-        engine=ClickHouseEngines.S3Queue,
+        engine=ClickHouseEngines.MergeTree,
         order_by_fields=["timestamp"]
     )
-    # New approach - equivalent configuration
+    # New approach - equivalent configuration for MergeTree
     new_config = OlapConfig(
-        engine=S3QueueEngine(
-            s3_path="s3://bucket/data/*.json",
-            format="JSONEachRow"
-        ),
+        engine=MergeTreeEngine(),
         order_by_fields=["timestamp"]
     )
     # Both should have the same order_by_fields
     assert legacy_config.order_by_fields == new_config.order_by_fields
     # Engine types should be different
     assert isinstance(legacy_config.engine, ClickHouseEngines)
-    assert isinstance(new_config.engine, S3QueueEngine)
+    assert isinstance(new_config.engine, MergeTreeEngine)
+    # For S3Queue, the new API correctly prevents unsupported clauses
+    s3queue_config = OlapConfig(
+        engine=S3QueueEngine(
+            s3_path="s3://bucket/data/*.json",
+            format="JSONEachRow"
+        )
+        # Note: order_by_fields is not supported for S3QueueEngine
+    )
+    assert isinstance(s3queue_config.engine, S3QueueEngine)
 def test_engine_config_validation():
@@ -272,7 +279,7 @@ def test_engine_config_validation():
             s3_path="",  # Empty path should fail
             format="JSONEachRow"
         )
     with pytest.raises(ValueError, match="S3Queue engine requires 'format'"):
         S3QueueEngine(
             s3_path="s3://bucket/data/*.json",
@@ -280,21 +287,143 @@ def test_engine_config_validation():
         )
+def test_non_mergetree_engines_reject_unsupported_clauses():
+    """Test that non-MergeTree engines reject unsupported ORDER BY and SAMPLE BY clauses."""
+    from moose_lib.blocks import S3Engine, S3QueueEngine, BufferEngine, DistributedEngine
+    # Test S3Engine rejects ORDER BY
+    with pytest.raises(ValueError, match="S3Engine does not support ORDER BY clauses"):
+        OlapConfig(
+            engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
+            order_by_fields=["id"]
+        )
+    with pytest.raises(ValueError, match="S3Engine does not support ORDER BY clauses"):
+        OlapConfig(
+            engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
+            order_by_expression="(id, name)"
+        )
+    # Test S3Engine rejects SAMPLE BY
+    with pytest.raises(ValueError, match="S3Engine does not support SAMPLE BY clause"):
+        OlapConfig(
+            engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
+            sample_by_expression="cityHash64(id)"
+        )
+    # Test S3Engine DOES support PARTITION BY (should not raise)
+    config_s3_with_partition = OlapConfig(
+        engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow"),
+        partition_by="toYYYYMM(timestamp)"
+    )
+    assert config_s3_with_partition.partition_by == "toYYYYMM(timestamp)"
+    # Test S3QueueEngine rejects ORDER BY
+    with pytest.raises(ValueError, match="S3QueueEngine does not support ORDER BY clauses"):
+        OlapConfig(
+            engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
+            order_by_fields=["id"]
+        )
+    # Test S3QueueEngine rejects PARTITION BY (unlike S3Engine)
+    with pytest.raises(ValueError, match="S3QueueEngine does not support PARTITION BY clause"):
+        OlapConfig(
+            engine=S3QueueEngine(s3_path="s3://bucket/*.json", format="JSONEachRow"),
+            partition_by="toYYYYMM(timestamp)"
+        )
+    # Test BufferEngine rejects ORDER BY
+    with pytest.raises(ValueError, match="BufferEngine does not support ORDER BY clauses"):
+        OlapConfig(
+            engine=BufferEngine(
+                target_database="default",
+                target_table="dest",
+                num_layers=16,
+                min_time=10,
+                max_time=100,
+                min_rows=10000,
+                max_rows=100000,
+                min_bytes=10000000,
+                max_bytes=100000000
+            ),
+            order_by_fields=["id"]
+        )
+    # Test BufferEngine rejects PARTITION BY
+    with pytest.raises(ValueError, match="BufferEngine does not support PARTITION BY clause"):
+        OlapConfig(
+            engine=BufferEngine(
+                target_database="default",
+                target_table="dest",
+                num_layers=16,
+                min_time=10,
+                max_time=100,
+                min_rows=10000,
+                max_rows=100000,
+                min_bytes=10000000,
+                max_bytes=100000000
+            ),
+            partition_by="date"
+        )
+    # Test DistributedEngine rejects PARTITION BY
+    with pytest.raises(ValueError, match="DistributedEngine does not support PARTITION BY clause"):
+        OlapConfig(
+            engine=DistributedEngine(
+                cluster="my_cluster",
+                target_database="default",
+                target_table="local_table"
+            ),
+            partition_by="date"
+        )
+    # Verify that S3Engine works without unsupported clauses
+    config = OlapConfig(
+        engine=S3Engine(path="s3://bucket/file.json", format="JSONEachRow")
+    )
+    assert isinstance(config.engine, S3Engine)
+def test_mergetree_engines_still_accept_clauses():
+    """Test that MergeTree engines still accept ORDER BY, PARTITION BY, and SAMPLE BY clauses."""
+    from moose_lib.blocks import MergeTreeEngine, ReplacingMergeTreeEngine
+    # MergeTree should accept all clauses
+    config1 = OlapConfig(
+        engine=MergeTreeEngine(),
+        order_by_fields=["id", "timestamp"],
+        partition_by="toYYYYMM(timestamp)",
+        sample_by_expression="cityHash64(id)"
+    )
+    assert config1.order_by_fields == ["id", "timestamp"]
+    assert config1.partition_by == "toYYYYMM(timestamp)"
+    assert config1.sample_by_expression == "cityHash64(id)"
+    # ReplacingMergeTree should also accept these clauses
+    config2 = OlapConfig(
+        engine=ReplacingMergeTreeEngine(ver="updated_at"),
+        order_by_expression="(id, name)",
+        partition_by="date"
+    )
+    assert config2.order_by_expression == "(id, name)"
+    assert config2.partition_by == "date"
 def test_multiple_engine_types():
     """Test that different engine types can be used in the same application."""
     tables = []
     # Create tables with different engine types
     tables.append(OlapTable[SampleEvent](
         "MergeTreeTable",
         OlapConfig(engine=MergeTreeEngine())
     ))
     tables.append(OlapTable[SampleEvent](
         "ReplacingTreeTable",
         OlapConfig(engine=ReplacingMergeTreeEngine())
     ))
     tables.append(OlapTable[SampleEvent](
         "S3QueueTable",
         OlapConfig(
@@ -304,7 +433,7 @@ def test_multiple_engine_types():
             )
         )
     ))
     # Verify all tables were created with correct engine types
     assert isinstance(tables[0].config.engine, MergeTreeEngine)
     assert isinstance(tables[1].config.engine, ReplacingMergeTreeEngine)

tests/test_secrets.py ADDED Viewed

@@ -0,0 +1,221 @@
+"""Tests for the moose_lib.secrets module.
+This module tests the runtime environment variable marker functionality,
+which allows users to defer secret resolution until runtime rather than
+embedding secrets at build time.
+"""
+import pytest
+from moose_lib.secrets import moose_runtime_env, get, MOOSE_RUNTIME_ENV_PREFIX
+class TestMooseRuntimeEnvGet:
+    """Tests for the moose_runtime_env.get() method."""
+    def test_creates_marker_with_correct_prefix(self):
+        """Should create a marker string with the correct prefix."""
+        var_name = "AWS_ACCESS_KEY_ID"
+        result = moose_runtime_env.get(var_name)
+        assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
+        assert result == "__MOOSE_RUNTIME_ENV__:AWS_ACCESS_KEY_ID"
+    def test_handles_different_variable_names(self):
+        """Should handle different environment variable names correctly."""
+        test_cases = [
+            "AWS_SECRET_ACCESS_KEY",
+            "DATABASE_PASSWORD",
+            "API_KEY",
+            "MY_CUSTOM_SECRET",
+        ]
+        for var_name in test_cases:
+            result = moose_runtime_env.get(var_name)
+            assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
+            assert var_name in result
+    def test_raises_error_for_empty_string(self):
+        """Should raise ValueError for empty string."""
+        with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
+            moose_runtime_env.get("")
+    def test_raises_error_for_whitespace_only(self):
+        """Should raise ValueError for whitespace-only string."""
+        with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
+            moose_runtime_env.get("   ")
+    def test_raises_error_for_tabs_only(self):
+        """Should raise ValueError for string with only tabs."""
+        with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
+            moose_runtime_env.get("\t\t")
+    def test_allows_underscores_in_variable_names(self):
+        """Should allow variable names with underscores."""
+        var_name = "MY_LONG_VAR_NAME"
+        result = moose_runtime_env.get(var_name)
+        assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
+    def test_allows_numbers_in_variable_names(self):
+        """Should allow variable names with numbers."""
+        var_name = "API_KEY_123"
+        result = moose_runtime_env.get(var_name)
+        assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
+    def test_preserves_exact_casing(self):
+        """Should preserve exact variable name casing."""
+        var_name = "MixedCase_VarName"
+        result = moose_runtime_env.get(var_name)
+        assert var_name in result
+        assert var_name.lower() not in result  # Ensure casing wasn't changed
+    def test_can_be_used_in_s3queue_config(self):
+        """Should create markers that can be used in S3Queue configuration."""
+        access_key_marker = moose_runtime_env.get("AWS_ACCESS_KEY_ID")
+        secret_key_marker = moose_runtime_env.get("AWS_SECRET_ACCESS_KEY")
+        config = {
+            "aws_access_key_id": access_key_marker,
+            "aws_secret_access_key": secret_key_marker,
+        }
+        assert "AWS_ACCESS_KEY_ID" in config["aws_access_key_id"]
+        assert "AWS_SECRET_ACCESS_KEY" in config["aws_secret_access_key"]
+class TestModuleLevelGetFunction:
+    """Tests for the module-level get() function."""
+    def test_module_level_get_creates_marker(self):
+        """The module-level get function should create markers."""
+        var_name = "TEST_SECRET"
+        result = get(var_name)
+        assert result == f"{MOOSE_RUNTIME_ENV_PREFIX}{var_name}"
+    def test_module_level_get_matches_class_method(self):
+        """Module-level get should produce same result as class method."""
+        var_name = "MY_SECRET"
+        result_module = get(var_name)
+        result_class = moose_runtime_env.get(var_name)
+        assert result_module == result_class
+    def test_module_level_get_raises_error_for_empty(self):
+        """Module-level get should raise ValueError for empty string."""
+        with pytest.raises(ValueError, match="Environment variable name cannot be empty"):
+            get("")
+class TestMooseRuntimeEnvPrefix:
+    """Tests for the MOOSE_RUNTIME_ENV_PREFIX constant."""
+    def test_has_expected_value(self):
+        """Should have the expected prefix value."""
+        assert MOOSE_RUNTIME_ENV_PREFIX == "__MOOSE_RUNTIME_ENV__:"
+    def test_is_string(self):
+        """Should be a string."""
+        assert isinstance(MOOSE_RUNTIME_ENV_PREFIX, str)
+    def test_is_not_empty(self):
+        """Should not be empty."""
+        assert len(MOOSE_RUNTIME_ENV_PREFIX) > 0
+class TestMarkerFormatValidation:
+    """Tests for marker format validation and parsing."""
+    def test_creates_easily_detectable_markers(self):
+        """Should create markers that are easily detectable."""
+        marker = moose_runtime_env.get("TEST_VAR")
+        assert marker.startswith("__MOOSE_RUNTIME_ENV__:")
+    def test_markers_can_be_split_to_extract_variable_name(self):
+        """Should create markers that can be split to extract variable name."""
+        var_name = "MY_SECRET"
+        marker = moose_runtime_env.get(var_name)
+        parts = marker.split(MOOSE_RUNTIME_ENV_PREFIX)
+        assert len(parts) == 2
+        assert parts[1] == var_name
+    def test_markers_are_json_serializable(self):
+        """Should create markers that are JSON serializable."""
+        import json
+        marker = moose_runtime_env.get("TEST_VAR")
+        json_str = json.dumps({"secret": marker})
+        parsed = json.loads(json_str)
+        assert parsed["secret"] == marker
+    def test_markers_work_with_dict_serialization(self):
+        """Should work correctly with dictionary serialization."""
+        marker = moose_runtime_env.get("DATABASE_PASSWORD")
+        config = {
+            "password": marker,
+            "other_field": "value"
+        }
+        # Verify the marker is preserved in the dict
+        assert config["password"] == marker
+        assert MOOSE_RUNTIME_ENV_PREFIX in config["password"]
+class TestIntegrationScenarios:
+    """Integration tests for real-world usage scenarios."""
+    def test_s3queue_engine_with_secrets(self):
+        """Should work correctly in S3Queue engine configuration."""
+        from moose_lib.blocks import S3QueueEngine
+        engine = S3QueueEngine(
+            s3_path="s3://my-bucket/data/*.json",
+            format="JSONEachRow",
+            aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"),
+            aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY"),
+        )
+        # Verify markers were set correctly
+        assert engine.aws_access_key_id == "__MOOSE_RUNTIME_ENV__:AWS_ACCESS_KEY_ID"
+        assert engine.aws_secret_access_key == "__MOOSE_RUNTIME_ENV__:AWS_SECRET_ACCESS_KEY"
+    def test_multiple_secrets_in_same_config(self):
+        """Should handle multiple secrets in the same configuration."""
+        config = {
+            "username": moose_runtime_env.get("DB_USERNAME"),
+            "password": moose_runtime_env.get("DB_PASSWORD"),
+            "api_key": moose_runtime_env.get("API_KEY"),
+        }
+        # All should have the correct prefix
+        for value in config.values():
+            assert value.startswith(MOOSE_RUNTIME_ENV_PREFIX)
+        # Each should have the correct variable name
+        assert "DB_USERNAME" in config["username"]
+        assert "DB_PASSWORD" in config["password"]
+        assert "API_KEY" in config["api_key"]
+    def test_mixed_secret_and_plain_values(self):
+        """Should handle mix of secret markers and plain values."""
+        config = {
+            "region": "us-east-1",  # Plain value
+            "access_key": moose_runtime_env.get("AWS_ACCESS_KEY_ID"),  # Secret
+            "bucket": "my-bucket",  # Plain value
+            "secret_key": moose_runtime_env.get("AWS_SECRET_ACCESS_KEY"),  # Secret
+        }
+        # Plain values should be unchanged
+        assert config["region"] == "us-east-1"
+        assert config["bucket"] == "my-bucket"
+        # Secrets should have markers
+        assert MOOSE_RUNTIME_ENV_PREFIX in config["access_key"]
+        assert MOOSE_RUNTIME_ENV_PREFIX in config["secret_key"]

{moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/WHEEL RENAMED Viewed

File without changes

{moose_lib-0.6.157.dist-info → moose_lib-0.6.162.dist-info}/top_level.txt RENAMED Viewed

File without changes

moose-lib 0.6.157__py3-none-any.whl → 0.6.162__py3-none-any.whl

moose-lib 0.6.157py3-none-any.whl → 0.6.162py3-none-any.whl