npm - @altimateai/altimate-code - Versions diffs - 0.5.1 → 0.5.3 - Mend

@altimateai/altimate-code 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py ADDED Viewed

@@ -0,0 +1,212 @@
+import typing as t
+class DataType:
+    def __repr__(self) -> str:
+        return self.__class__.__name__ + "()"
+    def __hash__(self) -> int:
+        return hash(str(self))
+    def __eq__(self, other: t.Any) -> bool:
+        return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+    def __ne__(self, other: t.Any) -> bool:
+        return not self.__eq__(other)
+    def __str__(self) -> str:
+        return self.typeName()
+    @classmethod
+    def typeName(cls) -> str:
+        return cls.__name__[:-4].lower()
+    def simpleString(self) -> str:
+        return str(self)
+    def jsonValue(self) -> t.Union[str, t.Dict[str, t.Any]]:
+        return str(self)
+class DataTypeWithLength(DataType):
+    def __init__(self, length: int):
+        self.length = length
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({self.length})"
+    def __str__(self) -> str:
+        return f"{self.typeName()}({self.length})"
+class StringType(DataType):
+    pass
+class CharType(DataTypeWithLength):
+    pass
+class VarcharType(DataTypeWithLength):
+    pass
+class BinaryType(DataType):
+    pass
+class BooleanType(DataType):
+    pass
+class DateType(DataType):
+    pass
+class TimestampType(DataType):
+    pass
+class TimestampNTZType(DataType):
+    @classmethod
+    def typeName(cls) -> str:
+        return "timestamp_ntz"
+class DecimalType(DataType):
+    def __init__(self, precision: int = 10, scale: int = 0):
+        self.precision = precision
+        self.scale = scale
+    def simpleString(self) -> str:
+        return f"decimal({self.precision}, {self.scale})"
+    def jsonValue(self) -> str:
+        return f"decimal({self.precision}, {self.scale})"
+    def __repr__(self) -> str:
+        return f"DecimalType({self.precision}, {self.scale})"
+class DoubleType(DataType):
+    pass
+class FloatType(DataType):
+    pass
+class ByteType(DataType):
+    def __str__(self) -> str:
+        return "tinyint"
+class IntegerType(DataType):
+    def __str__(self) -> str:
+        return "int"
+class LongType(DataType):
+    def __str__(self) -> str:
+        return "bigint"
+class ShortType(DataType):
+    def __str__(self) -> str:
+        return "smallint"
+class ArrayType(DataType):
+    def __init__(self, elementType: DataType, containsNull: bool = True):
+        self.elementType = elementType
+        self.containsNull = containsNull
+    def __repr__(self) -> str:
+        return f"ArrayType({self.elementType, str(self.containsNull)}"
+    def simpleString(self) -> str:
+        return f"array<{self.elementType.simpleString()}>"
+    def jsonValue(self) -> t.Dict[str, t.Any]:
+        return {
+            "type": self.typeName(),
+            "elementType": self.elementType.jsonValue(),
+            "containsNull": self.containsNull,
+        }
+class MapType(DataType):
+    def __init__(self, keyType: DataType, valueType: DataType, valueContainsNull: bool = True):
+        self.keyType = keyType
+        self.valueType = valueType
+        self.valueContainsNull = valueContainsNull
+    def __repr__(self) -> str:
+        return f"MapType({self.keyType}, {self.valueType}, {str(self.valueContainsNull)})"
+    def simpleString(self) -> str:
+        return f"map<{self.keyType.simpleString()}, {self.valueType.simpleString()}>"
+    def jsonValue(self) -> t.Dict[str, t.Any]:
+        return {
+            "type": self.typeName(),
+            "keyType": self.keyType.jsonValue(),
+            "valueType": self.valueType.jsonValue(),
+            "valueContainsNull": self.valueContainsNull,
+        }
+class StructField(DataType):
+    def __init__(
+        self,
+        name: str,
+        dataType: DataType,
+        nullable: bool = True,
+        metadata: t.Optional[t.Dict[str, t.Any]] = None,
+    ):
+        self.name = name
+        self.dataType = dataType
+        self.nullable = nullable
+        self.metadata = metadata or {}
+    def __repr__(self) -> str:
+        return f"StructField('{self.name}', {self.dataType}, {str(self.nullable)})"
+    def simpleString(self) -> str:
+        return f"{self.name}:{self.dataType.simpleString()}"
+    def jsonValue(self) -> t.Dict[str, t.Any]:
+        return {
+            "name": self.name,
+            "type": self.dataType.jsonValue(),
+            "nullable": self.nullable,
+            "metadata": self.metadata,
+        }
+class StructType(DataType):
+    def __init__(self, fields: t.Optional[t.List[StructField]] = None):
+        if not fields:
+            self.fields = []
+            self.names = []
+        else:
+            self.fields = fields
+            self.names = [f.name for f in fields]
+    def __iter__(self) -> t.Iterator[StructField]:
+        return iter(self.fields)
+    def __len__(self) -> int:
+        return len(self.fields)
+    def __repr__(self) -> str:
+        return f"StructType({', '.join(str(field) for field in self)})"
+    def simpleString(self) -> str:
+        return f"struct<{', '.join(x.simpleString() for x in self)}>"
+    def jsonValue(self) -> t.Dict[str, t.Any]:
+        return {"type": self.typeName(), "fields": [x.jsonValue() for x in self]}
+    def fieldNames(self) -> t.List[str]:
+        return list(self.names)

package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py ADDED Viewed

@@ -0,0 +1,32 @@
+from __future__ import annotations
+import typing as t
+from sqlglot import expressions as exp
+from sqlglot.dataframe.sql import types
+if t.TYPE_CHECKING:
+    from sqlglot.dataframe.sql._typing import SchemaInput
+def get_column_mapping_from_schema_input(schema: SchemaInput) -> t.Dict[str, t.Optional[str]]:
+    if isinstance(schema, dict):
+        return schema
+    elif isinstance(schema, str):
+        col_name_type_strs = [x.strip() for x in schema.split(",")]
+        return {
+            name_type_str.split(":")[0].strip(): name_type_str.split(":")[1].strip()
+            for name_type_str in col_name_type_strs
+        }
+    elif isinstance(schema, types.StructType):
+        return {struct_field.name: struct_field.dataType.simpleString() for struct_field in schema}
+    return {x.strip(): None for x in schema}  # type: ignore
+def get_tables_from_expression_with_join(expression: exp.Select) -> t.List[exp.Table]:
+    if not expression.args.get("joins"):
+        return []
+    left_table = expression.args["from"].this
+    other_tables = [join.this for join in expression.args["joins"]]
+    return [left_table] + other_tables

package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py ADDED Viewed

@@ -0,0 +1,134 @@
+from __future__ import annotations
+import sys
+import typing as t
+from sqlglot import expressions as exp
+from sqlglot.dataframe.sql import functions as F
+from sqlglot.helper import flatten
+if t.TYPE_CHECKING:
+    from sqlglot.dataframe.sql._typing import ColumnOrName
+class Window:
+    _JAVA_MIN_LONG = -(1 << 63)  # -9223372036854775808
+    _JAVA_MAX_LONG = (1 << 63) - 1  # 9223372036854775807
+    _PRECEDING_THRESHOLD = max(-sys.maxsize, _JAVA_MIN_LONG)
+    _FOLLOWING_THRESHOLD = min(sys.maxsize, _JAVA_MAX_LONG)
+    unboundedPreceding: int = _JAVA_MIN_LONG
+    unboundedFollowing: int = _JAVA_MAX_LONG
+    currentRow: int = 0
+    @classmethod
+    def partitionBy(cls, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+        return WindowSpec().partitionBy(*cols)
+    @classmethod
+    def orderBy(cls, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+        return WindowSpec().orderBy(*cols)
+    @classmethod
+    def rowsBetween(cls, start: int, end: int) -> WindowSpec:
+        return WindowSpec().rowsBetween(start, end)
+    @classmethod
+    def rangeBetween(cls, start: int, end: int) -> WindowSpec:
+        return WindowSpec().rangeBetween(start, end)
+class WindowSpec:
+    def __init__(self, expression: exp.Expression = exp.Window()):
+        self.expression = expression
+    def copy(self):
+        return WindowSpec(self.expression.copy())
+    def sql(self, **kwargs) -> str:
+        from sqlglot.dataframe.sql.session import SparkSession
+        return self.expression.sql(dialect=SparkSession().dialect, **kwargs)
+    def partitionBy(self, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+        from sqlglot.dataframe.sql.column import Column
+        cols = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols  # type: ignore
+        expressions = [Column.ensure_col(x).expression for x in cols]
+        window_spec = self.copy()
+        partition_by_expressions = window_spec.expression.args.get("partition_by", [])
+        partition_by_expressions.extend(expressions)
+        window_spec.expression.set("partition_by", partition_by_expressions)
+        return window_spec
+    def orderBy(self, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+        from sqlglot.dataframe.sql.column import Column
+        cols = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols  # type: ignore
+        expressions = [Column.ensure_col(x).expression for x in cols]
+        window_spec = self.copy()
+        if window_spec.expression.args.get("order") is None:
+            window_spec.expression.set("order", exp.Order(expressions=[]))
+        order_by = window_spec.expression.args["order"].expressions
+        order_by.extend(expressions)
+        window_spec.expression.args["order"].set("expressions", order_by)
+        return window_spec
+    def _calc_start_end(
+        self, start: int, end: int
+    ) -> t.Dict[str, t.Optional[t.Union[str, exp.Expression]]]:
+        kwargs: t.Dict[str, t.Optional[t.Union[str, exp.Expression]]] = {
+            "start_side": None,
+            "end_side": None,
+        }
+        if start == Window.currentRow:
+            kwargs["start"] = "CURRENT ROW"
+        else:
+            kwargs = {
+                **kwargs,
+                **{
+                    "start_side": "PRECEDING",
+                    "start": "UNBOUNDED"
+                    if start <= Window.unboundedPreceding
+                    else F.lit(start).expression,
+                },
+            }
+        if end == Window.currentRow:
+            kwargs["end"] = "CURRENT ROW"
+        else:
+            kwargs = {
+                **kwargs,
+                **{
+                    "end_side": "FOLLOWING",
+                    "end": "UNBOUNDED"
+                    if end >= Window.unboundedFollowing
+                    else F.lit(end).expression,
+                },
+            }
+        return kwargs
+    def rowsBetween(self, start: int, end: int) -> WindowSpec:
+        window_spec = self.copy()
+        spec = self._calc_start_end(start, end)
+        spec["kind"] = "ROWS"
+        window_spec.expression.set(
+            "spec",
+            exp.WindowSpec(
+                **{**window_spec.expression.args.get("spec", exp.WindowSpec()).args, **spec}
+            ),
+        )
+        return window_spec
+    def rangeBetween(self, start: int, end: int) -> WindowSpec:
+        window_spec = self.copy()
+        spec = self._calc_start_end(start, end)
+        spec["kind"] = "RANGE"
+        window_spec.expression.set(
+            "spec",
+            exp.WindowSpec(
+                **{**window_spec.expression.args.get("spec", exp.WindowSpec()).args, **spec}
+            ),
+        )
+        return window_spec

package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py ADDED Viewed

@@ -0,0 +1,118 @@
+# ruff: noqa: F401
+"""
+## Dialects
+While there is a SQL standard, most SQL engines support a variation of that standard. This makes it difficult
+to write portable SQL code. SQLGlot bridges all the different variations, called "dialects", with an extensible
+SQL transpilation framework.
+The base `sqlglot.dialects.dialect.Dialect` class implements a generic dialect that aims to be as universal as possible.
+Each SQL variation has its own `Dialect` subclass, extending the corresponding `Tokenizer`, `Parser` and `Generator`
+classes as needed.
+### Implementing a custom Dialect
+Creating a new SQL dialect may seem complicated at first, but it is actually quite simple in SQLGlot:
+```python
+from sqlglot import exp
+from sqlglot.dialects.dialect import Dialect
+from sqlglot.generator import Generator
+from sqlglot.tokens import Tokenizer, TokenType
+class Custom(Dialect):
+    class Tokenizer(Tokenizer):
+        QUOTES = ["'", '"']  # Strings can be delimited by either single or double quotes
+        IDENTIFIERS = ["`"]  # Identifiers can be delimited by backticks
+        # Associates certain meaningful words with tokens that capture their intent
+        KEYWORDS = {
+            **Tokenizer.KEYWORDS,
+            "INT64": TokenType.BIGINT,
+            "FLOAT64": TokenType.DOUBLE,
+        }
+    class Generator(Generator):
+        # Specifies how AST nodes, i.e. subclasses of exp.Expression, should be converted into SQL
+        TRANSFORMS = {
+            exp.Array: lambda self, e: f"[{self.expressions(e)}]",
+        }
+        # Specifies how AST nodes representing data types should be converted into SQL
+        TYPE_MAPPING = {
+            exp.DataType.Type.TINYINT: "INT64",
+            exp.DataType.Type.SMALLINT: "INT64",
+            exp.DataType.Type.INT: "INT64",
+            exp.DataType.Type.BIGINT: "INT64",
+            exp.DataType.Type.DECIMAL: "NUMERIC",
+            exp.DataType.Type.FLOAT: "FLOAT64",
+            exp.DataType.Type.DOUBLE: "FLOAT64",
+            exp.DataType.Type.BOOLEAN: "BOOL",
+            exp.DataType.Type.TEXT: "STRING",
+        }
+```
+The above example demonstrates how certain parts of the base `Dialect` class can be overridden to match a different
+specification. Even though it is a fairly realistic starting point, we strongly encourage the reader to study existing
+dialect implementations in order to understand how their various components can be modified, depending on the use-case.
+----
+"""
+import importlib
+import threading
+DIALECTS = [
+    "Athena",
+    "BigQuery",
+    "ClickHouse",
+    "Databricks",
+    "Doris",
+    "Drill",
+    "Druid",
+    "DuckDB",
+    "Dune",
+    "Hive",
+    "Materialize",
+    "MySQL",
+    "Oracle",
+    "Postgres",
+    "Presto",
+    "PRQL",
+    "Redshift",
+    "RisingWave",
+    "Snowflake",
+    "Spark",
+    "Spark2",
+    "SQLite",
+    "StarRocks",
+    "Tableau",
+    "Teradata",
+    "Trino",
+    "TSQL",
+]
+MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}
+DIALECT_MODULE_NAMES = MODULE_BY_DIALECT.values()
+MODULE_BY_ATTRIBUTE = {
+    **MODULE_BY_DIALECT,
+    "Dialect": "dialect",
+    "Dialects": "dialect",
+}
+__all__ = list(MODULE_BY_ATTRIBUTE)
+_import_lock = threading.Lock()
+def __getattr__(name):
+    module_name = MODULE_BY_ATTRIBUTE.get(name)
+    if module_name:
+        with _import_lock:
+            module = importlib.import_module(f"sqlglot.dialects.{module_name}")
+        return getattr(module, name)
+    raise AttributeError(f"module {__name__} has no attribute {name}")

package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py ADDED Viewed

@@ -0,0 +1,166 @@
+from __future__ import annotations
+import typing as t
+from sqlglot import exp
+from sqlglot.dialects.trino import Trino
+from sqlglot.dialects.hive import Hive
+from sqlglot.tokens import TokenType
+def _generate_as_hive(expression: exp.Expression) -> bool:
+    if isinstance(expression, exp.Create):
+        if expression.kind == "TABLE":
+            properties: t.Optional[exp.Properties] = expression.args.get("properties")
+            if properties and properties.find(exp.ExternalProperty):
+                return True  # CREATE EXTERNAL TABLE is Hive
+            if not isinstance(expression.expression, exp.Query):
+                return True  # any CREATE TABLE other than CREATE TABLE AS SELECT is Hive
+        else:
+            return expression.kind != "VIEW"  # CREATE VIEW is never Hive but CREATE SCHEMA etc is
+    # https://docs.aws.amazon.com/athena/latest/ug/ddl-reference.html
+    elif isinstance(expression, (exp.Alter, exp.Drop, exp.Describe)):
+        if isinstance(expression, exp.Drop) and expression.kind == "VIEW":
+            # DROP VIEW is Trino (I guess because CREATE VIEW is)
+            return False
+        # Everything else is Hive
+        return True
+    return False
+def _is_iceberg_table(properties: exp.Properties) -> bool:
+    table_type_property = next(
+        (
+            p
+            for p in properties.expressions
+            if isinstance(p, exp.Property) and p.name == "table_type"
+        ),
+        None,
+    )
+    return bool(table_type_property and table_type_property.text("value").lower() == "iceberg")
+def _location_property_sql(self: Athena.Generator, e: exp.LocationProperty):
+    # If table_type='iceberg', the LocationProperty is called 'location'
+    # Otherwise, it's called 'external_location'
+    # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html
+    prop_name = "external_location"
+    if isinstance(e.parent, exp.Properties):
+        if _is_iceberg_table(e.parent):
+            prop_name = "location"
+    return f"{prop_name}={self.sql(e, 'this')}"
+def _partitioned_by_property_sql(self: Athena.Generator, e: exp.PartitionedByProperty) -> str:
+    # If table_type='iceberg' then the table property for partitioning is called 'partitioning'
+    # If table_type='hive' it's called 'partitioned_by'
+    # ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
+    prop_name = "partitioned_by"
+    if isinstance(e.parent, exp.Properties):
+        if _is_iceberg_table(e.parent):
+            prop_name = "partitioning"
+    return f"{prop_name}={self.sql(e, 'this')}"
+class Athena(Trino):
+    """
+    Over the years, it looks like AWS has taken various execution engines, bolted on AWS-specific modifications and then
+    built the Athena service around them.
+    Thus, Athena is not simply hosted Trino, it's more like a router that routes SQL queries to an execution engine depending
+    on the query type.
+    As at 2024-09-10, assuming your Athena workgroup is configured to use "Athena engine version 3", the following engines exist:
+    Hive:
+     - Accepts mostly the same syntax as Hadoop / Hive
+     - Uses backticks to quote identifiers
+     - Has a distinctive DDL syntax (around things like setting table properties, storage locations etc) that is different from Trino
+     - Used for *most* DDL, with some exceptions that get routed to the Trino engine instead:
+        - CREATE [EXTERNAL] TABLE (without AS SELECT)
+        - ALTER
+        - DROP
+    Trino:
+      - Uses double quotes to quote identifiers
+      - Used for DDL operations that involve SELECT queries, eg:
+        - CREATE VIEW / DROP VIEW
+        - CREATE TABLE... AS SELECT
+      - Used for DML operations
+        - SELECT, INSERT, UPDATE, DELETE, MERGE
+    The SQLGlot Athena dialect tries to identify which engine a query would be routed to and then uses the parser / generator for that engine
+    rather than trying to create a universal syntax that can handle both types.
+    """
+    class Tokenizer(Trino.Tokenizer):
+        """
+        The Tokenizer is flexible enough to tokenize queries across both the Hive and Trino engines
+        """
+        IDENTIFIERS = ['"', "`"]
+        KEYWORDS = {
+            **Hive.Tokenizer.KEYWORDS,
+            **Trino.Tokenizer.KEYWORDS,
+            "UNLOAD": TokenType.COMMAND,
+        }
+    class Parser(Trino.Parser):
+        """
+        Parse queries for the Athena Trino execution engine
+        """
+        STATEMENT_PARSERS = {
+            **Trino.Parser.STATEMENT_PARSERS,
+            TokenType.USING: lambda self: self._parse_as_command(self._prev),
+        }
+    class _HiveGenerator(Hive.Generator):
+        def alter_sql(self, expression: exp.Alter) -> str:
+            # package any ALTER TABLE ADD actions into a Schema object
+            # so it gets generated as `ALTER TABLE .. ADD COLUMNS(...)`
+            # instead of `ALTER TABLE ... ADD COLUMN` which is invalid syntax on Athena
+            if isinstance(expression, exp.Alter) and expression.kind == "TABLE":
+                if expression.actions and isinstance(expression.actions[0], exp.ColumnDef):
+                    new_actions = exp.Schema(expressions=expression.actions)
+                    expression.set("actions", [new_actions])
+            return super().alter_sql(expression)
+    class Generator(Trino.Generator):
+        """
+        Generate queries for the Athena Trino execution engine
+        """
+        PROPERTIES_LOCATION = {
+            **Trino.Generator.PROPERTIES_LOCATION,
+            exp.LocationProperty: exp.Properties.Location.POST_WITH,
+        }
+        TRANSFORMS = {
+            **Trino.Generator.TRANSFORMS,
+            exp.PartitionedByProperty: _partitioned_by_property_sql,
+            exp.LocationProperty: _location_property_sql,
+        }
+        def __init__(self, *args, **kwargs):
+            super().__init__(*args, **kwargs)
+            hive_kwargs = {**kwargs, "dialect": "hive"}
+            self._hive_generator = Athena._HiveGenerator(*args, **hive_kwargs)
+        def generate(self, expression: exp.Expression, copy: bool = True) -> str:
+            if _generate_as_hive(expression):
+                return self._hive_generator.generate(expression, copy)
+            return super().generate(expression, copy)