npm - altimate-code - Versions diffs - 0.5.2 → 0.5.3 - Mend

altimate-code 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py ADDED Viewed

@@ -0,0 +1,91 @@
+from __future__ import annotations
+import typing as t
+from sqlglot import exp
+from sqlglot.helper import tsort
+JOIN_ATTRS = ("on", "side", "kind", "using", "method")
+def optimize_joins(expression):
+    """
+    Removes cross joins if possible and reorder joins based on predicate dependencies.
+    Example:
+        >>> from sqlglot import parse_one
+        >>> optimize_joins(parse_one("SELECT * FROM x CROSS JOIN y JOIN z ON x.a = z.a AND y.a = z.a")).sql()
+        'SELECT * FROM x JOIN z ON x.a = z.a AND TRUE JOIN y ON y.a = z.a'
+    """
+    for select in expression.find_all(exp.Select):
+        references = {}
+        cross_joins = []
+        for join in select.args.get("joins", []):
+            tables = other_table_names(join)
+            if tables:
+                for table in tables:
+                    references[table] = references.get(table, []) + [join]
+            else:
+                cross_joins.append((join.alias_or_name, join))
+        for name, join in cross_joins:
+            for dep in references.get(name, []):
+                on = dep.args["on"]
+                if isinstance(on, exp.Connector):
+                    if len(other_table_names(dep)) < 2:
+                        continue
+                    operator = type(on)
+                    for predicate in on.flatten():
+                        if name in exp.column_table_names(predicate):
+                            predicate.replace(exp.true())
+                            predicate = exp._combine(
+                                [join.args.get("on"), predicate], operator, copy=False
+                            )
+                            join.on(predicate, append=False, copy=False)
+    expression = reorder_joins(expression)
+    expression = normalize(expression)
+    return expression
+def reorder_joins(expression):
+    """
+    Reorder joins by topological sort order based on predicate references.
+    """
+    for from_ in expression.find_all(exp.From):
+        parent = from_.parent
+        joins = {join.alias_or_name: join for join in parent.args.get("joins", [])}
+        dag = {name: other_table_names(join) for name, join in joins.items()}
+        parent.set(
+            "joins",
+            [joins[name] for name in tsort(dag) if name != from_.alias_or_name and name in joins],
+        )
+    return expression
+def normalize(expression):
+    """
+    Remove INNER and OUTER from joins as they are optional.
+    """
+    for join in expression.find_all(exp.Join):
+        if not any(join.args.get(k) for k in JOIN_ATTRS):
+            join.set("kind", "CROSS")
+        if join.kind == "CROSS":
+            join.set("on", None)
+        else:
+            join.set("kind", None)
+            if not join.args.get("on") and not join.args.get("using"):
+                join.set("on", exp.true())
+    return expression
+def other_table_names(join: exp.Join) -> t.Set[str]:
+    on = join.args.get("on")
+    return exp.column_table_names(on, join.alias_or_name) if on else set()

package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py ADDED Viewed

@@ -0,0 +1,94 @@
+from __future__ import annotations
+import inspect
+import typing as t
+from sqlglot import Schema, exp
+from sqlglot.dialects.dialect import DialectType
+from sqlglot.optimizer.annotate_types import annotate_types
+from sqlglot.optimizer.canonicalize import canonicalize
+from sqlglot.optimizer.eliminate_ctes import eliminate_ctes
+from sqlglot.optimizer.eliminate_joins import eliminate_joins
+from sqlglot.optimizer.eliminate_subqueries import eliminate_subqueries
+from sqlglot.optimizer.merge_subqueries import merge_subqueries
+from sqlglot.optimizer.normalize import normalize
+from sqlglot.optimizer.optimize_joins import optimize_joins
+from sqlglot.optimizer.pushdown_predicates import pushdown_predicates
+from sqlglot.optimizer.pushdown_projections import pushdown_projections
+from sqlglot.optimizer.qualify import qualify
+from sqlglot.optimizer.qualify_columns import quote_identifiers
+from sqlglot.optimizer.simplify import simplify
+from sqlglot.optimizer.unnest_subqueries import unnest_subqueries
+from sqlglot.schema import ensure_schema
+RULES = (
+    qualify,
+    pushdown_projections,
+    normalize,
+    unnest_subqueries,
+    pushdown_predicates,
+    optimize_joins,
+    eliminate_subqueries,
+    merge_subqueries,
+    eliminate_joins,
+    eliminate_ctes,
+    quote_identifiers,
+    annotate_types,
+    canonicalize,
+    simplify,
+)
+def optimize(
+    expression: str | exp.Expression,
+    schema: t.Optional[dict | Schema] = None,
+    db: t.Optional[str | exp.Identifier] = None,
+    catalog: t.Optional[str | exp.Identifier] = None,
+    dialect: DialectType = None,
+    rules: t.Sequence[t.Callable] = RULES,
+    **kwargs,
+) -> exp.Expression:
+    """
+    Rewrite a sqlglot AST into an optimized form.
+    Args:
+        expression: expression to optimize
+        schema: database schema.
+            This can either be an instance of `sqlglot.optimizer.Schema` or a mapping in one of
+            the following forms:
+                1. {table: {col: type}}
+                2. {db: {table: {col: type}}}
+                3. {catalog: {db: {table: {col: type}}}}
+            If no schema is provided then the default schema defined at `sqlgot.schema` will be used
+        db: specify the default database, as might be set by a `USE DATABASE db` statement
+        catalog: specify the default catalog, as might be set by a `USE CATALOG c` statement
+        dialect: The dialect to parse the sql string.
+        rules: sequence of optimizer rules to use.
+            Many of the rules require tables and columns to be qualified.
+            Do not remove `qualify` from the sequence of rules unless you know what you're doing!
+        **kwargs: If a rule has a keyword argument with a same name in **kwargs, it will be passed in.
+    Returns:
+        The optimized expression.
+    """
+    schema = ensure_schema(schema, dialect=dialect)
+    possible_kwargs = {
+        "db": db,
+        "catalog": catalog,
+        "schema": schema,
+        "dialect": dialect,
+        "isolate_tables": True,  # needed for other optimizations to perform well
+        "quote_identifiers": False,
+        **kwargs,
+    }
+    optimized = exp.maybe_parse(expression, dialect=dialect, copy=True)
+    for rule in rules:
+        # Find any additional rule parameters, beyond `expression`
+        rule_params = inspect.getfullargspec(rule).args
+        rule_kwargs = {
+            param: possible_kwargs[param] for param in rule_params if param in possible_kwargs
+        }
+        optimized = rule(optimized, **rule_kwargs)
+    return optimized

package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py ADDED Viewed

@@ -0,0 +1,222 @@
+from sqlglot import exp
+from sqlglot.optimizer.normalize import normalized
+from sqlglot.optimizer.scope import build_scope, find_in_scope
+from sqlglot.optimizer.simplify import simplify
+from sqlglot import Dialect
+def pushdown_predicates(expression, dialect=None):
+    """
+    Rewrite sqlglot AST to pushdown predicates in FROMS and JOINS
+    Example:
+        >>> import sqlglot
+        >>> sql = "SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x) AS y WHERE y.a = 1"
+        >>> expression = sqlglot.parse_one(sql)
+        >>> pushdown_predicates(expression).sql()
+        'SELECT y.a AS a FROM (SELECT x.a AS a FROM x AS x WHERE x.a = 1) AS y WHERE TRUE'
+    Args:
+        expression (sqlglot.Expression): expression to optimize
+    Returns:
+        sqlglot.Expression: optimized expression
+    """
+    from sqlglot.dialects.presto import Presto
+    root = build_scope(expression)
+    dialect = Dialect.get_or_raise(dialect)
+    unnest_requires_cross_join = isinstance(dialect, Presto)
+    if root:
+        scope_ref_count = root.ref_count()
+        for scope in reversed(list(root.traverse())):
+            select = scope.expression
+            where = select.args.get("where")
+            if where:
+                selected_sources = scope.selected_sources
+                join_index = {
+                    join.alias_or_name: i for i, join in enumerate(select.args.get("joins") or [])
+                }
+                # a right join can only push down to itself and not the source FROM table
+                # presto, trino and athena don't support inner joins where the RHS is an UNNEST expression
+                pushdown_allowed = True
+                for k, (node, source) in selected_sources.items():
+                    parent = node.find_ancestor(exp.Join, exp.From)
+                    if isinstance(parent, exp.Join):
+                        if parent.side == "RIGHT":
+                            selected_sources = {k: (node, source)}
+                            break
+                        if isinstance(node, exp.Unnest) and unnest_requires_cross_join:
+                            pushdown_allowed = False
+                            break
+                if pushdown_allowed:
+                    pushdown(where.this, selected_sources, scope_ref_count, dialect, join_index)
+            # joins should only pushdown into itself, not to other joins
+            # so we limit the selected sources to only itself
+            for join in select.args.get("joins") or []:
+                name = join.alias_or_name
+                if name in scope.selected_sources:
+                    pushdown(
+                        join.args.get("on"),
+                        {name: scope.selected_sources[name]},
+                        scope_ref_count,
+                        dialect,
+                    )
+    return expression
+def pushdown(condition, sources, scope_ref_count, dialect, join_index=None):
+    if not condition:
+        return
+    condition = condition.replace(simplify(condition, dialect=dialect))
+    cnf_like = normalized(condition) or not normalized(condition, dnf=True)
+    predicates = list(
+        condition.flatten()
+        if isinstance(condition, exp.And if cnf_like else exp.Or)
+        else [condition]
+    )
+    if cnf_like:
+        pushdown_cnf(predicates, sources, scope_ref_count, join_index=join_index)
+    else:
+        pushdown_dnf(predicates, sources, scope_ref_count)
+def pushdown_cnf(predicates, sources, scope_ref_count, join_index=None):
+    """
+    If the predicates are in CNF like form, we can simply replace each block in the parent.
+    """
+    join_index = join_index or {}
+    for predicate in predicates:
+        for node in nodes_for_predicate(predicate, sources, scope_ref_count).values():
+            if isinstance(node, exp.Join):
+                name = node.alias_or_name
+                predicate_tables = exp.column_table_names(predicate, name)
+                # Don't push the predicate if it references tables that appear in later joins
+                this_index = join_index[name]
+                if all(join_index.get(table, -1) < this_index for table in predicate_tables):
+                    predicate.replace(exp.true())
+                    node.on(predicate, copy=False)
+                    break
+            if isinstance(node, exp.Select):
+                predicate.replace(exp.true())
+                inner_predicate = replace_aliases(node, predicate)
+                if find_in_scope(inner_predicate, exp.AggFunc):
+                    node.having(inner_predicate, copy=False)
+                else:
+                    node.where(inner_predicate, copy=False)
+def pushdown_dnf(predicates, sources, scope_ref_count):
+    """
+    If the predicates are in DNF form, we can only push down conditions that are in all blocks.
+    Additionally, we can't remove predicates from their original form.
+    """
+    # find all the tables that can be pushdown too
+    # these are tables that are referenced in all blocks of a DNF
+    # (a.x AND b.x) OR (a.y AND c.y)
+    # only table a can be push down
+    pushdown_tables = set()
+    for a in predicates:
+        a_tables = exp.column_table_names(a)
+        for b in predicates:
+            a_tables &= exp.column_table_names(b)
+        pushdown_tables.update(a_tables)
+    conditions = {}
+    # pushdown all predicates to their respective nodes
+    for table in sorted(pushdown_tables):
+        for predicate in predicates:
+            nodes = nodes_for_predicate(predicate, sources, scope_ref_count)
+            if table not in nodes:
+                continue
+            conditions[table] = (
+                exp.or_(conditions[table], predicate) if table in conditions else predicate
+            )
+        for name, node in nodes.items():
+            if name not in conditions:
+                continue
+            predicate = conditions[name]
+            if isinstance(node, exp.Join):
+                node.on(predicate, copy=False)
+            elif isinstance(node, exp.Select):
+                inner_predicate = replace_aliases(node, predicate)
+                if find_in_scope(inner_predicate, exp.AggFunc):
+                    node.having(inner_predicate, copy=False)
+                else:
+                    node.where(inner_predicate, copy=False)
+def nodes_for_predicate(predicate, sources, scope_ref_count):
+    nodes = {}
+    tables = exp.column_table_names(predicate)
+    where_condition = isinstance(predicate.find_ancestor(exp.Join, exp.Where), exp.Where)
+    for table in sorted(tables):
+        node, source = sources.get(table) or (None, None)
+        # if the predicate is in a where statement we can try to push it down
+        # we want to find the root join or from statement
+        if node and where_condition:
+            node = node.find_ancestor(exp.Join, exp.From)
+        # a node can reference a CTE which should be pushed down
+        if isinstance(node, exp.From) and not isinstance(source, exp.Table):
+            with_ = source.parent.expression.args.get("with")
+            if with_ and with_.recursive:
+                return {}
+            node = source.expression
+        if isinstance(node, exp.Join):
+            if node.side and node.side != "RIGHT":
+                return {}
+            nodes[table] = node
+        elif isinstance(node, exp.Select) and len(tables) == 1:
+            # We can't push down window expressions
+            has_window_expression = any(
+                select for select in node.selects if select.find(exp.Window)
+            )
+            # we can't push down predicates to select statements if they are referenced in
+            # multiple places.
+            if (
+                not node.args.get("group")
+                and scope_ref_count[id(source)] < 2
+                and not has_window_expression
+            ):
+                nodes[table] = node
+    return nodes
+def replace_aliases(source, predicate):
+    aliases = {}
+    for select in source.selects:
+        if isinstance(select, exp.Alias):
+            aliases[select.alias] = select.this
+        else:
+            aliases[select.name] = select
+    def _replace_alias(column):
+        if isinstance(column, exp.Column) and column.name in aliases:
+            return aliases[column.name].copy()
+        return column
+    return predicate.transform(_replace_alias)

package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py ADDED Viewed

@@ -0,0 +1,172 @@
+from __future__ import annotations
+import typing as t
+from collections import defaultdict
+from sqlglot import alias, exp
+from sqlglot.optimizer.qualify_columns import Resolver
+from sqlglot.optimizer.scope import Scope, traverse_scope
+from sqlglot.schema import ensure_schema
+from sqlglot.errors import OptimizeError
+from sqlglot.helper import seq_get
+if t.TYPE_CHECKING:
+    from sqlglot._typing import E
+    from sqlglot.schema import Schema
+    from sqlglot.dialects.dialect import DialectType
+# Sentinel value that means an outer query selecting ALL columns
+SELECT_ALL = object()
+# Selection to use if selection list is empty
+def default_selection(is_agg: bool) -> exp.Alias:
+    return alias(exp.Max(this=exp.Literal.number(1)) if is_agg else "1", "_")
+def pushdown_projections(
+    expression: E,
+    schema: t.Optional[t.Dict | Schema] = None,
+    remove_unused_selections: bool = True,
+    dialect: DialectType = None,
+) -> E:
+    """
+    Rewrite sqlglot AST to remove unused columns projections.
+    Example:
+        >>> import sqlglot
+        >>> sql = "SELECT y.a AS a FROM (SELECT x.a AS a, x.b AS b FROM x) AS y"
+        >>> expression = sqlglot.parse_one(sql)
+        >>> pushdown_projections(expression).sql()
+        'SELECT y.a AS a FROM (SELECT x.a AS a FROM x) AS y'
+    Args:
+        expression (sqlglot.Expression): expression to optimize
+        remove_unused_selections (bool): remove selects that are unused
+    Returns:
+        sqlglot.Expression: optimized expression
+    """
+    # Map of Scope to all columns being selected by outer queries.
+    schema = ensure_schema(schema, dialect=dialect)
+    source_column_alias_count: t.Dict[exp.Expression | Scope, int] = {}
+    referenced_columns: t.DefaultDict[Scope, t.Set[str | object]] = defaultdict(set)
+    # We build the scope tree (which is traversed in DFS postorder), then iterate
+    # over the result in reverse order. This should ensure that the set of selected
+    # columns for a particular scope are completely build by the time we get to it.
+    for scope in reversed(traverse_scope(expression)):
+        parent_selections = referenced_columns.get(scope, {SELECT_ALL})
+        alias_count = source_column_alias_count.get(scope, 0)
+        # We can't remove columns SELECT DISTINCT nor UNION DISTINCT.
+        if scope.expression.args.get("distinct"):
+            parent_selections = {SELECT_ALL}
+        if isinstance(scope.expression, exp.SetOperation):
+            set_op = scope.expression
+            if not (set_op.kind or set_op.side):
+                # Do not optimize this set operation if it's using the BigQuery specific
+                # kind / side syntax (e.g INNER UNION ALL BY NAME) which changes the semantics of the operation
+                left, right = scope.union_scopes
+                if len(left.expression.selects) != len(right.expression.selects):
+                    scope_sql = scope.expression.sql(dialect=dialect)
+                    raise OptimizeError(
+                        f"Invalid set operation due to column mismatch: {scope_sql}."
+                    )
+                referenced_columns[left] = parent_selections
+                if any(select.is_star for select in right.expression.selects):
+                    referenced_columns[right] = parent_selections
+                elif not any(select.is_star for select in left.expression.selects):
+                    if scope.expression.args.get("by_name"):
+                        referenced_columns[right] = referenced_columns[left]
+                    else:
+                        referenced_columns[right] = {
+                            right.expression.selects[i].alias_or_name
+                            for i, select in enumerate(left.expression.selects)
+                            if SELECT_ALL in parent_selections
+                            or select.alias_or_name in parent_selections
+                        }
+        if isinstance(scope.expression, exp.Select):
+            if remove_unused_selections:
+                _remove_unused_selections(scope, parent_selections, schema, alias_count)
+            if scope.expression.is_star:
+                continue
+            # Group columns by source name
+            selects = defaultdict(set)
+            for col in scope.columns:
+                table_name = col.table
+                col_name = col.name
+                selects[table_name].add(col_name)
+            # Push the selected columns down to the next scope
+            for name, (node, source) in scope.selected_sources.items():
+                if isinstance(source, Scope):
+                    select = seq_get(source.expression.selects, 0)
+                    if scope.pivots or isinstance(select, exp.QueryTransform):
+                        columns = {SELECT_ALL}
+                    else:
+                        columns = selects.get(name) or set()
+                    referenced_columns[source].update(columns)
+                column_aliases = node.alias_column_names
+                if column_aliases:
+                    source_column_alias_count[source] = len(column_aliases)
+    return expression
+def _remove_unused_selections(scope, parent_selections, schema, alias_count):
+    order = scope.expression.args.get("order")
+    if order:
+        # Assume columns without a qualified table are references to output columns
+        order_refs = {c.name for c in order.find_all(exp.Column) if not c.table}
+    else:
+        order_refs = set()
+    new_selections = []
+    removed = False
+    star = False
+    is_agg = False
+    select_all = SELECT_ALL in parent_selections
+    for selection in scope.expression.selects:
+        name = selection.alias_or_name
+        if select_all or name in parent_selections or name in order_refs or alias_count > 0:
+            new_selections.append(selection)
+            alias_count -= 1
+        else:
+            if selection.is_star:
+                star = True
+            removed = True
+        if not is_agg and selection.find(exp.AggFunc):
+            is_agg = True
+    if star:
+        resolver = Resolver(scope, schema)
+        names = {s.alias_or_name for s in new_selections}
+        for name in sorted(parent_selections):
+            if name not in names:
+                new_selections.append(
+                    alias(exp.column(name, table=resolver.get_table(name)), name, copy=False)
+                )
+    # If there are no remaining selections, just select a single constant
+    if not new_selections:
+        new_selections.append(default_selection(is_agg))
+    scope.expression.select(*new_selections, append=False, copy=False)
+    if removed:
+        scope.clear_cache()

package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py ADDED Viewed

@@ -0,0 +1,104 @@
+from __future__ import annotations
+import typing as t
+from sqlglot import exp
+from sqlglot.dialects.dialect import Dialect, DialectType
+from sqlglot.optimizer.isolate_table_selects import isolate_table_selects
+from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
+from sqlglot.optimizer.qualify_columns import (
+    pushdown_cte_alias_columns as pushdown_cte_alias_columns_func,
+    qualify_columns as qualify_columns_func,
+    quote_identifiers as quote_identifiers_func,
+    validate_qualify_columns as validate_qualify_columns_func,
+)
+from sqlglot.optimizer.qualify_tables import qualify_tables
+from sqlglot.schema import Schema, ensure_schema
+def qualify(
+    expression: exp.Expression,
+    dialect: DialectType = None,
+    db: t.Optional[str] = None,
+    catalog: t.Optional[str] = None,
+    schema: t.Optional[dict | Schema] = None,
+    expand_alias_refs: bool = True,
+    expand_stars: bool = True,
+    infer_schema: t.Optional[bool] = None,
+    isolate_tables: bool = False,
+    qualify_columns: bool = True,
+    allow_partial_qualification: bool = False,
+    validate_qualify_columns: bool = True,
+    quote_identifiers: bool = True,
+    identify: bool = True,
+    infer_csv_schemas: bool = False,
+) -> exp.Expression:
+    """
+    Rewrite sqlglot AST to have normalized and qualified tables and columns.
+    This step is necessary for all further SQLGlot optimizations.
+    Example:
+        >>> import sqlglot
+        >>> schema = {"tbl": {"col": "INT"}}
+        >>> expression = sqlglot.parse_one("SELECT col FROM tbl")
+        >>> qualify(expression, schema=schema).sql()
+        'SELECT "tbl"."col" AS "col" FROM "tbl" AS "tbl"'
+    Args:
+        expression: Expression to qualify.
+        db: Default database name for tables.
+        catalog: Default catalog name for tables.
+        schema: Schema to infer column names and types.
+        expand_alias_refs: Whether to expand references to aliases.
+        expand_stars: Whether to expand star queries. This is a necessary step
+            for most of the optimizer's rules to work; do not set to False unless you
+            know what you're doing!
+        infer_schema: Whether to infer the schema if missing.
+        isolate_tables: Whether to isolate table selects.
+        qualify_columns: Whether to qualify columns.
+        allow_partial_qualification: Whether to allow partial qualification.
+        validate_qualify_columns: Whether to validate columns.
+        quote_identifiers: Whether to run the quote_identifiers step.
+            This step is necessary to ensure correctness for case sensitive queries.
+            But this flag is provided in case this step is performed at a later time.
+        identify: If True, quote all identifiers, else only necessary ones.
+        infer_csv_schemas: Whether to scan READ_CSV calls in order to infer the CSVs' schemas.
+    Returns:
+        The qualified expression.
+    """
+    schema = ensure_schema(schema, dialect=dialect)
+    expression = qualify_tables(
+        expression,
+        db=db,
+        catalog=catalog,
+        schema=schema,
+        dialect=dialect,
+        infer_csv_schemas=infer_csv_schemas,
+    )
+    expression = normalize_identifiers(expression, dialect=dialect)
+    if isolate_tables:
+        expression = isolate_table_selects(expression, schema=schema)
+    if Dialect.get_or_raise(dialect).PREFER_CTE_ALIAS_COLUMN:
+        expression = pushdown_cte_alias_columns_func(expression)
+    if qualify_columns:
+        expression = qualify_columns_func(
+            expression,
+            schema,
+            expand_alias_refs=expand_alias_refs,
+            expand_stars=expand_stars,
+            infer_schema=infer_schema,
+            allow_partial_qualification=allow_partial_qualification,
+        )
+    if quote_identifiers:
+        expression = quote_identifiers_func(expression, dialect=dialect, identify=identify)
+    if validate_qualify_columns:
+        validate_qualify_columns_func(expression)
+    return expression