pytrilogy 0.0.1.102__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pytrilogy might be problematic. Click here for more details.
- pytrilogy-0.0.1.102.dist-info/LICENSE.md +19 -0
- pytrilogy-0.0.1.102.dist-info/METADATA +277 -0
- pytrilogy-0.0.1.102.dist-info/RECORD +77 -0
- pytrilogy-0.0.1.102.dist-info/WHEEL +5 -0
- pytrilogy-0.0.1.102.dist-info/entry_points.txt +2 -0
- pytrilogy-0.0.1.102.dist-info/top_level.txt +1 -0
- trilogy/__init__.py +8 -0
- trilogy/compiler.py +0 -0
- trilogy/constants.py +30 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +3 -0
- trilogy/core/enums.py +270 -0
- trilogy/core/env_processor.py +33 -0
- trilogy/core/environment_helpers.py +156 -0
- trilogy/core/ergonomics.py +187 -0
- trilogy/core/exceptions.py +23 -0
- trilogy/core/functions.py +320 -0
- trilogy/core/graph_models.py +55 -0
- trilogy/core/internal.py +37 -0
- trilogy/core/models.py +3145 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +603 -0
- trilogy/core/processing/graph_utils.py +44 -0
- trilogy/core/processing/node_generators/__init__.py +25 -0
- trilogy/core/processing/node_generators/basic_node.py +71 -0
- trilogy/core/processing/node_generators/common.py +239 -0
- trilogy/core/processing/node_generators/concept_merge.py +152 -0
- trilogy/core/processing/node_generators/filter_node.py +83 -0
- trilogy/core/processing/node_generators/group_node.py +92 -0
- trilogy/core/processing/node_generators/group_to_node.py +99 -0
- trilogy/core/processing/node_generators/merge_node.py +148 -0
- trilogy/core/processing/node_generators/multiselect_node.py +189 -0
- trilogy/core/processing/node_generators/rowset_node.py +130 -0
- trilogy/core/processing/node_generators/select_node.py +328 -0
- trilogy/core/processing/node_generators/unnest_node.py +37 -0
- trilogy/core/processing/node_generators/window_node.py +85 -0
- trilogy/core/processing/nodes/__init__.py +76 -0
- trilogy/core/processing/nodes/base_node.py +251 -0
- trilogy/core/processing/nodes/filter_node.py +49 -0
- trilogy/core/processing/nodes/group_node.py +110 -0
- trilogy/core/processing/nodes/merge_node.py +326 -0
- trilogy/core/processing/nodes/select_node_v2.py +198 -0
- trilogy/core/processing/nodes/unnest_node.py +54 -0
- trilogy/core/processing/nodes/window_node.py +34 -0
- trilogy/core/processing/utility.py +278 -0
- trilogy/core/query_processor.py +331 -0
- trilogy/dialect/__init__.py +0 -0
- trilogy/dialect/base.py +679 -0
- trilogy/dialect/bigquery.py +80 -0
- trilogy/dialect/common.py +43 -0
- trilogy/dialect/config.py +55 -0
- trilogy/dialect/duckdb.py +83 -0
- trilogy/dialect/enums.py +95 -0
- trilogy/dialect/postgres.py +86 -0
- trilogy/dialect/presto.py +82 -0
- trilogy/dialect/snowflake.py +82 -0
- trilogy/dialect/sql_server.py +89 -0
- trilogy/docs/__init__.py +0 -0
- trilogy/engine.py +48 -0
- trilogy/executor.py +242 -0
- trilogy/hooks/__init__.py +0 -0
- trilogy/hooks/base_hook.py +37 -0
- trilogy/hooks/graph_hook.py +24 -0
- trilogy/hooks/query_debugger.py +133 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +176 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +2 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +1951 -0
- trilogy/parsing/render.py +483 -0
- trilogy/py.typed +0 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/trilogy.py +127 -0
- trilogy/utility.py +31 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from typing import Mapping, Callable, Any
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from trilogy.core.enums import FunctionType, WindowType, UnnestMode
|
|
6
|
+
from trilogy.dialect.base import BaseDialect
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
10
|
+
|
|
11
|
+
FUNCTION_MAP = {
|
|
12
|
+
FunctionType.COUNT: lambda x: f"count({x[0]})",
|
|
13
|
+
FunctionType.SUM: lambda x: f"sum({x[0]})",
|
|
14
|
+
FunctionType.LENGTH: lambda x: f"length({x[0]})",
|
|
15
|
+
FunctionType.AVG: lambda x: f"avg({x[0]})",
|
|
16
|
+
FunctionType.LIKE: lambda x: (
|
|
17
|
+
f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
|
|
18
|
+
),
|
|
19
|
+
FunctionType.MINUTE: lambda x: f"EXTRACT(MINUTE from {x[0]})",
|
|
20
|
+
FunctionType.SECOND: lambda x: f"EXTRACT(SECOND from {x[0]})",
|
|
21
|
+
FunctionType.HOUR: lambda x: f"EXTRACT(HOUR from {x[0]})",
|
|
22
|
+
FunctionType.DAY_OF_WEEK: lambda x: f"EXTRACT(DAYOFWEEK from {x[0]})",
|
|
23
|
+
FunctionType.DAY: lambda x: f"EXTRACT(DAY from {x[0]})",
|
|
24
|
+
FunctionType.YEAR: lambda x: f"EXTRACT(YEAR from {x[0]})",
|
|
25
|
+
FunctionType.MONTH: lambda x: f"EXTRACT(MONTH from {x[0]})",
|
|
26
|
+
FunctionType.WEEK: lambda x: f"EXTRACT(WEEK from {x[0]})",
|
|
27
|
+
FunctionType.QUARTER: lambda x: f"EXTRACT(QUARTER from {x[0]})",
|
|
28
|
+
# math
|
|
29
|
+
FunctionType.DIVIDE: lambda x: f"COALESCE(SAFE_DIVIDE({x[0]},{x[1]}),0)",
|
|
30
|
+
FunctionType.DATE_ADD: lambda x: f"DATE_ADD({x[0]}, INTERVAL {x[2]} {x[1]})",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
34
|
+
**FUNCTION_MAP,
|
|
35
|
+
FunctionType.COUNT: lambda args: f"{args[0]}",
|
|
36
|
+
FunctionType.SUM: lambda args: f"{args[0]}",
|
|
37
|
+
FunctionType.AVG: lambda args: f"{args[0]}",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
BQ_SQL_TEMPLATE = Template(
|
|
41
|
+
"""{%- if output %}
|
|
42
|
+
CREATE OR REPLACE TABLE {{ output.address.location }} AS
|
|
43
|
+
{% endif %}{%- if ctes %}
|
|
44
|
+
WITH {% for cte in ctes %}
|
|
45
|
+
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
46
|
+
SELECT
|
|
47
|
+
|
|
48
|
+
{%- for select in select_columns %}
|
|
49
|
+
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
50
|
+
{% if base %}FROM
|
|
51
|
+
{{ base }}{% endif %}{% if joins %}
|
|
52
|
+
{% for join in joins %}
|
|
53
|
+
{{ join }}
|
|
54
|
+
{% endfor %}{% endif %}
|
|
55
|
+
{% if where %}WHERE
|
|
56
|
+
{{ where }}
|
|
57
|
+
{% endif %}
|
|
58
|
+
{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
59
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
60
|
+
{%- if order_by %}
|
|
61
|
+
ORDER BY {% for order in order_by %}
|
|
62
|
+
{{ order }}{% if not loop.last %},{% endif %}
|
|
63
|
+
{% endfor %}{% endif %}
|
|
64
|
+
{%- if limit is not none %}
|
|
65
|
+
LIMIT {{ limit }}{% endif %}
|
|
66
|
+
"""
|
|
67
|
+
)
|
|
68
|
+
MAX_IDENTIFIER_LENGTH = 50
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class BigqueryDialect(BaseDialect):
|
|
72
|
+
WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
|
|
73
|
+
FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
|
|
74
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
75
|
+
**BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
|
|
76
|
+
**FUNCTION_GRAIN_MATCH_MAP,
|
|
77
|
+
}
|
|
78
|
+
QUOTE_CHARACTER = "`"
|
|
79
|
+
SQL_TEMPLATE = BQ_SQL_TEMPLATE
|
|
80
|
+
UNNEST_MODE = UnnestMode.CROSS_JOIN
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from trilogy.core.models import Join, InstantiatedUnnestJoin, CTE, Concept
|
|
2
|
+
from trilogy.core.enums import UnnestMode, Modifier
|
|
3
|
+
from typing import Optional, Callable
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def null_wrapper(lval: str, rval: str, concept: Concept) -> str:
|
|
7
|
+
if concept.modifiers and Modifier.NULLABLE in concept.modifiers:
|
|
8
|
+
return f"(({lval} is null and {rval} is null) or ({lval} = {rval}))"
|
|
9
|
+
return f"{lval} = {rval}"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def render_join(
|
|
13
|
+
join: Join | InstantiatedUnnestJoin,
|
|
14
|
+
quote_character: str,
|
|
15
|
+
render_func: Optional[Callable[[Concept, CTE, bool], str]] = None,
|
|
16
|
+
cte: Optional[CTE] = None,
|
|
17
|
+
unnest_mode: UnnestMode = UnnestMode.CROSS_APPLY,
|
|
18
|
+
) -> str | None:
|
|
19
|
+
# {% for key in join.joinkeys %}{{ key.inner }} = {{ key.outer}}{% endfor %}
|
|
20
|
+
if isinstance(join, InstantiatedUnnestJoin):
|
|
21
|
+
if unnest_mode == UnnestMode.DIRECT:
|
|
22
|
+
return None
|
|
23
|
+
if not render_func:
|
|
24
|
+
raise ValueError("must provide a render func to build an unnest joins")
|
|
25
|
+
if not cte:
|
|
26
|
+
raise ValueError("must provide a cte to build an unnest joins")
|
|
27
|
+
if unnest_mode == UnnestMode.CROSS_JOIN:
|
|
28
|
+
return f"CROSS JOIN {render_func(join.concept, cte, False)} as {quote_character}{join.concept.safe_address}{quote_character}"
|
|
29
|
+
|
|
30
|
+
return f"FULL JOIN {render_func(join.concept, cte, False)} as unnest_wrapper({quote_character}{join.concept.safe_address}{quote_character})"
|
|
31
|
+
|
|
32
|
+
base_joinkeys = [
|
|
33
|
+
null_wrapper(
|
|
34
|
+
f"{join.left_cte.name}.{quote_character}{key.concept.safe_address}{quote_character}",
|
|
35
|
+
f"{join.right_cte.name}.{quote_character}{key.concept.safe_address}{quote_character}",
|
|
36
|
+
key.concept,
|
|
37
|
+
)
|
|
38
|
+
for key in join.joinkeys
|
|
39
|
+
]
|
|
40
|
+
if not base_joinkeys:
|
|
41
|
+
base_joinkeys = ["1=1"]
|
|
42
|
+
joinkeys = " AND ".join(base_joinkeys)
|
|
43
|
+
return f"{join.jointype.value.upper()} JOIN {join.right_cte.name} on {joinkeys}"
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
class DialectConfig:
|
|
2
|
+
pass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DuckDBConfig(DialectConfig):
|
|
6
|
+
def __init__(self, path: str | None = None):
|
|
7
|
+
self.path = path
|
|
8
|
+
|
|
9
|
+
def connection_string(self) -> str:
|
|
10
|
+
if not self.path:
|
|
11
|
+
return "duckdb:///:memory:"
|
|
12
|
+
return f"duckdb:///{self.path}"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PostgresConfig(DialectConfig):
|
|
16
|
+
def __init__(
|
|
17
|
+
self, host: str, port: int, username: str, password: str, database: str
|
|
18
|
+
):
|
|
19
|
+
self.host = host
|
|
20
|
+
self.port = port
|
|
21
|
+
self.username = username
|
|
22
|
+
self.password = password
|
|
23
|
+
self.database = database
|
|
24
|
+
|
|
25
|
+
def connection_string(self) -> str:
|
|
26
|
+
return f"postgresql://{self.username}:{self.password}@{self.host}:{self.port}"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SQLServerConfig(DialectConfig):
|
|
30
|
+
def __init__(
|
|
31
|
+
self, host: str, port: int, username: str, password: str, database: str
|
|
32
|
+
):
|
|
33
|
+
self.host = host
|
|
34
|
+
self.port = port
|
|
35
|
+
self.username = username
|
|
36
|
+
self.password = password
|
|
37
|
+
self.database = database
|
|
38
|
+
|
|
39
|
+
def connection_string(self) -> str:
|
|
40
|
+
return f"sqlserver//{self.username}:{self.password}@{self.host}:{self.port}"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class SnowflakeConfig(DialectConfig):
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
account: str,
|
|
47
|
+
username: str,
|
|
48
|
+
password: str,
|
|
49
|
+
):
|
|
50
|
+
self.account = account
|
|
51
|
+
self.username = username
|
|
52
|
+
self.password = password
|
|
53
|
+
|
|
54
|
+
def connection_string(self) -> str:
|
|
55
|
+
return f"snowflake://{self.username}:{self.password}@{self.account}"
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from typing import Mapping, Callable, Any
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from trilogy.core.enums import FunctionType, WindowType, UnnestMode
|
|
6
|
+
from trilogy.dialect.base import BaseDialect
|
|
7
|
+
|
|
8
|
+
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
9
|
+
|
|
10
|
+
FUNCTION_MAP = {
|
|
11
|
+
FunctionType.COUNT: lambda args: f"count({args[0]})",
|
|
12
|
+
FunctionType.SUM: lambda args: f"sum({args[0]})",
|
|
13
|
+
FunctionType.AVG: lambda args: f"avg({args[0]})",
|
|
14
|
+
FunctionType.LENGTH: lambda args: f"length({args[0]})",
|
|
15
|
+
FunctionType.LIKE: lambda args: (
|
|
16
|
+
f" CASE WHEN {args[0]} like {args[1]} THEN True ELSE False END"
|
|
17
|
+
),
|
|
18
|
+
FunctionType.CONCAT: lambda args: (
|
|
19
|
+
f"CONCAT({','.join([f''' {str(a)} ''' for a in args])})"
|
|
20
|
+
),
|
|
21
|
+
FunctionType.SPLIT: lambda args: (
|
|
22
|
+
f"STRING_SPLIT({','.join([f''' {str(a)} ''' for a in args])})"
|
|
23
|
+
),
|
|
24
|
+
## Duckdb indexes from 1, not 0
|
|
25
|
+
FunctionType.INDEX_ACCESS: lambda args: (f"{args[0]}[{args[1]}]"),
|
|
26
|
+
# datetime is aliased
|
|
27
|
+
FunctionType.CURRENT_DATETIME: lambda x: "cast(get_current_timestamp() as datetime)",
|
|
28
|
+
FunctionType.DATE_TRUNCATE: lambda x: f"date_trunc('{x[1]}', {x[0]})",
|
|
29
|
+
FunctionType.DATE_ADD: lambda x: f"date_add({x[0]}, INTERVAL {x[2]} {x[1]})",
|
|
30
|
+
FunctionType.DATE_PART: lambda x: f"date_part('{x[1]}', {x[0]})",
|
|
31
|
+
FunctionType.DATE_DIFF: lambda x: f"date_diff('{x[2]}', {x[0]}, {x[1]})",
|
|
32
|
+
FunctionType.CONCAT: lambda x: f"({' || '.join(x)})",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# if an aggregate function is called on a source that is at the same grain as the aggregate
|
|
36
|
+
# we may return a static value
|
|
37
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
38
|
+
**FUNCTION_MAP,
|
|
39
|
+
FunctionType.COUNT: lambda args: "1",
|
|
40
|
+
FunctionType.SUM: lambda args: f"{args[0]}",
|
|
41
|
+
FunctionType.AVG: lambda args: f"{args[0]}",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
DUCKDB_TEMPLATE = Template(
|
|
45
|
+
"""{%- if output %}
|
|
46
|
+
CREATE OR REPLACE TABLE {{ output.address.location }} AS
|
|
47
|
+
{% endif %}{%- if ctes %}
|
|
48
|
+
WITH {% for cte in ctes %}
|
|
49
|
+
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
50
|
+
SELECT
|
|
51
|
+
|
|
52
|
+
{%- for select in select_columns %}
|
|
53
|
+
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
54
|
+
{% if base %}FROM
|
|
55
|
+
{{ base }}{% endif %}{% if joins %}
|
|
56
|
+
{%- for join in joins %}
|
|
57
|
+
{{ join }}{% endfor %}{% endif %}
|
|
58
|
+
{% if where %}WHERE
|
|
59
|
+
{{ where }}
|
|
60
|
+
{% endif %}
|
|
61
|
+
{%- if group_by %}
|
|
62
|
+
GROUP BY {% for group in group_by %}
|
|
63
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
64
|
+
{%- if order_by %}
|
|
65
|
+
ORDER BY {% for order in order_by %}
|
|
66
|
+
{{ order }}{% if not loop.last %},{% endif %}
|
|
67
|
+
{% endfor %}{% endif %}
|
|
68
|
+
{%- if limit is not none %}
|
|
69
|
+
LIMIT ({{ limit }}){% endif %}
|
|
70
|
+
"""
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class DuckDBDialect(BaseDialect):
|
|
75
|
+
WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
|
|
76
|
+
FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
|
|
77
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
78
|
+
**BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
|
|
79
|
+
**FUNCTION_GRAIN_MATCH_MAP,
|
|
80
|
+
}
|
|
81
|
+
QUOTE_CHARACTER = '"'
|
|
82
|
+
SQL_TEMPLATE = DUCKDB_TEMPLATE
|
|
83
|
+
UNNEST_MODE = UnnestMode.DIRECT
|
trilogy/dialect/enums.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import List, TYPE_CHECKING, Optional
|
|
3
|
+
|
|
4
|
+
if TYPE_CHECKING:
|
|
5
|
+
from trilogy.hooks.base_hook import BaseHook
|
|
6
|
+
from trilogy import Executor, Environment
|
|
7
|
+
|
|
8
|
+
from trilogy.dialect.config import DialectConfig
|
|
9
|
+
from trilogy.constants import logger
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Dialects(Enum):
|
|
13
|
+
BIGQUERY = "bigquery"
|
|
14
|
+
SQL_SERVER = "sql_server"
|
|
15
|
+
DUCK_DB = "duck_db"
|
|
16
|
+
PRESTO = "presto"
|
|
17
|
+
TRINO = "trino"
|
|
18
|
+
POSTGRES = "postgres"
|
|
19
|
+
SNOWFLAKE = "snowflake"
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def _missing_(cls, value):
|
|
23
|
+
if value == "duckdb":
|
|
24
|
+
return cls.DUCK_DB
|
|
25
|
+
return super()._missing_(value)
|
|
26
|
+
|
|
27
|
+
def default_engine(self, conf=None):
|
|
28
|
+
if self == Dialects.BIGQUERY:
|
|
29
|
+
from sqlalchemy import create_engine
|
|
30
|
+
from google.auth import default
|
|
31
|
+
from google.cloud import bigquery
|
|
32
|
+
|
|
33
|
+
credentials, project = default()
|
|
34
|
+
client = bigquery.Client(credentials=credentials, project=project)
|
|
35
|
+
return create_engine(
|
|
36
|
+
f"bigquery://{project}?user_supplied_client=True",
|
|
37
|
+
connect_args={"client": client},
|
|
38
|
+
)
|
|
39
|
+
elif self == Dialects.SQL_SERVER:
|
|
40
|
+
from sqlalchemy import create_engine
|
|
41
|
+
|
|
42
|
+
raise NotImplementedError()
|
|
43
|
+
elif self == Dialects.DUCK_DB:
|
|
44
|
+
from sqlalchemy import create_engine
|
|
45
|
+
from trilogy.dialect.config import DuckDBConfig
|
|
46
|
+
|
|
47
|
+
if not conf:
|
|
48
|
+
conf = DuckDBConfig()
|
|
49
|
+
if not isinstance(conf, DuckDBConfig):
|
|
50
|
+
raise TypeError("Invalid dialect configuration for type duck_db")
|
|
51
|
+
return create_engine(conf.connection_string(), future=True)
|
|
52
|
+
elif self == Dialects.SNOWFLAKE:
|
|
53
|
+
from sqlalchemy import create_engine
|
|
54
|
+
from trilogy.dialect.config import SnowflakeConfig
|
|
55
|
+
|
|
56
|
+
if not isinstance(conf, SnowflakeConfig):
|
|
57
|
+
raise TypeError("Invalid dialect configuration for type snowflake")
|
|
58
|
+
return create_engine(conf.connection_string(), future=True)
|
|
59
|
+
elif self == Dialects.POSTGRES:
|
|
60
|
+
logger.warn(
|
|
61
|
+
"WARN: Using experimental postgres dialect. Most functionality will not work."
|
|
62
|
+
)
|
|
63
|
+
import importlib
|
|
64
|
+
|
|
65
|
+
spec = importlib.util.find_spec("psycopg2")
|
|
66
|
+
if spec is None:
|
|
67
|
+
raise ImportError(
|
|
68
|
+
"postgres driver not installed. python -m pip install pypreql[postgres]"
|
|
69
|
+
)
|
|
70
|
+
from sqlalchemy import create_engine
|
|
71
|
+
from trilogy.dialect.config import PostgresConfig
|
|
72
|
+
|
|
73
|
+
if not isinstance(conf, PostgresConfig):
|
|
74
|
+
raise TypeError("Invalid dialect configuration for type postgres")
|
|
75
|
+
|
|
76
|
+
return create_engine(conf.connection_string(), future=True)
|
|
77
|
+
else:
|
|
78
|
+
raise ValueError(
|
|
79
|
+
f"Unsupported dialect {self} for default engine creation; create one explicitly."
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def default_executor(
|
|
83
|
+
self,
|
|
84
|
+
environment: Optional["Environment"] = None,
|
|
85
|
+
hooks: List["BaseHook"] | None = None,
|
|
86
|
+
conf: DialectConfig | None = None,
|
|
87
|
+
) -> "Executor":
|
|
88
|
+
from trilogy import Executor, Environment
|
|
89
|
+
|
|
90
|
+
return Executor(
|
|
91
|
+
engine=self.default_engine(conf=conf),
|
|
92
|
+
environment=environment or Environment(),
|
|
93
|
+
dialect=self,
|
|
94
|
+
hooks=hooks,
|
|
95
|
+
)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from typing import Mapping, Callable, Any
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from trilogy.core.enums import FunctionType, WindowType, DatePart
|
|
6
|
+
from trilogy.dialect.base import BaseDialect
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def date_diff(first: str, second: str, grain: DatePart) -> str:
|
|
10
|
+
grain = DatePart(grain)
|
|
11
|
+
if grain == DatePart.YEAR:
|
|
12
|
+
return f"date_part('year', {second}) - date_part('year', {first})"
|
|
13
|
+
elif grain == DatePart.MONTH:
|
|
14
|
+
return f"12 * {date_diff(first, second, DatePart.YEAR)} + date_part('month', {second}) - date_part('month', {first})"
|
|
15
|
+
elif grain == DatePart.DAY:
|
|
16
|
+
return f"date_part('day', {second} - {first})"
|
|
17
|
+
elif grain == DatePart.HOUR:
|
|
18
|
+
return f"{date_diff(first, second, DatePart.DAY)} *24 + date_part('hour', {second} - {first})"
|
|
19
|
+
elif grain == DatePart.MINUTE:
|
|
20
|
+
return f"{date_diff(first, second, DatePart.HOUR)} *60 + date_part('minute', {second} - {first})"
|
|
21
|
+
elif grain == DatePart.SECOND:
|
|
22
|
+
return f"{date_diff(first, second, DatePart.MINUTE)} *60 + date_part('second', {second} - {first})"
|
|
23
|
+
else:
|
|
24
|
+
raise NotImplementedError(f"Date diff not implemented for grain {grain}")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
28
|
+
|
|
29
|
+
FUNCTION_MAP = {
|
|
30
|
+
FunctionType.SPLIT: lambda x: f"string_to_array({x[0]}, {x[1]})",
|
|
31
|
+
FunctionType.DATE_TRUNCATE: lambda x: f"date_trunc('{x[1]}', {x[0]})",
|
|
32
|
+
FunctionType.DATE_ADD: lambda x: f"({x[0]} + INTERVAL '{x[2]} {x[1]}')",
|
|
33
|
+
FunctionType.DATE_PART: lambda x: f"date_part('{x[1]}', {x[0]})",
|
|
34
|
+
FunctionType.DATE_DIFF: lambda x: date_diff(x[0], x[1], x[2]),
|
|
35
|
+
FunctionType.IS_NULL: lambda x: f"{x[0]} IS NULL",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
39
|
+
**FUNCTION_MAP,
|
|
40
|
+
FunctionType.COUNT: lambda args: f"{args[0]}",
|
|
41
|
+
FunctionType.SUM: lambda args: f"{args[0]}",
|
|
42
|
+
FunctionType.AVG: lambda args: f"{args[0]}",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
PG_SQL_TEMPLATE = Template(
|
|
46
|
+
"""{%- if output %}
|
|
47
|
+
DROP TABLE IF EXISTS {{ output.address.location }};
|
|
48
|
+
CREATE TABLE {{ output.address.location }} AS
|
|
49
|
+
{% endif %}{%- if ctes %}
|
|
50
|
+
WITH {% for cte in ctes %}
|
|
51
|
+
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
52
|
+
SELECT
|
|
53
|
+
|
|
54
|
+
{%- for select in select_columns %}
|
|
55
|
+
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
56
|
+
{% if base %}FROM
|
|
57
|
+
{{ base }}{% endif %}{% if joins %}
|
|
58
|
+
{% for join in joins %}
|
|
59
|
+
{{ join }}
|
|
60
|
+
{% endfor %}{% endif %}
|
|
61
|
+
{% if where %}WHERE
|
|
62
|
+
{{ where }}
|
|
63
|
+
{% endif %}
|
|
64
|
+
{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
65
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
66
|
+
{%- if order_by %}
|
|
67
|
+
ORDER BY {% for order in order_by %}
|
|
68
|
+
{{ order }}{% if not loop.last %},{% endif %}
|
|
69
|
+
{% endfor %}{% endif %}
|
|
70
|
+
{%- if limit is not none %}
|
|
71
|
+
LIMIT {{ limit }}{% endif %}
|
|
72
|
+
"""
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
MAX_IDENTIFIER_LENGTH = 50
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class PostgresDialect(BaseDialect):
|
|
79
|
+
WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
|
|
80
|
+
FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
|
|
81
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
82
|
+
**BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
|
|
83
|
+
**FUNCTION_GRAIN_MATCH_MAP,
|
|
84
|
+
}
|
|
85
|
+
QUOTE_CHARACTER = '"'
|
|
86
|
+
SQL_TEMPLATE = PG_SQL_TEMPLATE
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from typing import Mapping, Callable, Any
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from trilogy.core.enums import FunctionType, WindowType
|
|
6
|
+
from trilogy.dialect.base import BaseDialect
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
10
|
+
|
|
11
|
+
FUNCTION_MAP = {
|
|
12
|
+
FunctionType.COUNT: lambda x: f"count({x[0]})",
|
|
13
|
+
FunctionType.SUM: lambda x: f"sum({x[0]})",
|
|
14
|
+
FunctionType.LENGTH: lambda x: f"length({x[0]})",
|
|
15
|
+
FunctionType.AVG: lambda x: f"avg({x[0]})",
|
|
16
|
+
FunctionType.LIKE: lambda x: (
|
|
17
|
+
f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
|
|
18
|
+
),
|
|
19
|
+
FunctionType.MINUTE: lambda x: f"EXTRACT(MINUTE from {x[0]})",
|
|
20
|
+
FunctionType.SECOND: lambda x: f"EXTRACT(SECOND from {x[0]})",
|
|
21
|
+
FunctionType.HOUR: lambda x: f"EXTRACT(HOUR from {x[0]})",
|
|
22
|
+
FunctionType.DAY_OF_WEEK: lambda x: f"EXTRACT(DAYOFWEEK from {x[0]})",
|
|
23
|
+
FunctionType.DAY: lambda x: f"EXTRACT(DAY from {x[0]})",
|
|
24
|
+
FunctionType.YEAR: lambda x: f"EXTRACT(YEAR from {x[0]})",
|
|
25
|
+
FunctionType.MONTH: lambda x: f"EXTRACT(MONTH from {x[0]})",
|
|
26
|
+
FunctionType.WEEK: lambda x: f"EXTRACT(WEEK from {x[0]})",
|
|
27
|
+
FunctionType.QUARTER: lambda x: f"EXTRACT(QUARTER from {x[0]})",
|
|
28
|
+
# math
|
|
29
|
+
FunctionType.DIVIDE: lambda x: f"SAFE_DIVIDE({x[0]},{x[1]})",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
33
|
+
**FUNCTION_MAP,
|
|
34
|
+
FunctionType.COUNT: lambda args: f"{args[0]}",
|
|
35
|
+
FunctionType.SUM: lambda args: f"{args[0]}",
|
|
36
|
+
FunctionType.AVG: lambda args: f"{args[0]}",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
SQL_TEMPLATE = Template(
|
|
40
|
+
"""{%- if output %}
|
|
41
|
+
CREATE OR REPLACE TABLE {{ output.address }} AS
|
|
42
|
+
{% endif %}{%- if ctes %}
|
|
43
|
+
WITH {% for cte in ctes %}
|
|
44
|
+
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
45
|
+
SELECT
|
|
46
|
+
|
|
47
|
+
{%- for select in select_columns %}
|
|
48
|
+
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
49
|
+
{% if base %}FROM
|
|
50
|
+
{{ base }}{% endif %}{% if joins %}
|
|
51
|
+
{% for join in joins %}
|
|
52
|
+
{{ join }}
|
|
53
|
+
{% endfor %}{% endif %}
|
|
54
|
+
{% if where %}WHERE
|
|
55
|
+
{{ where }}
|
|
56
|
+
{% endif %}
|
|
57
|
+
{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
58
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
59
|
+
{%- if order_by %}
|
|
60
|
+
ORDER BY {% for order in order_by %}
|
|
61
|
+
{{ order }}{% if not loop.last %},{% endif %}
|
|
62
|
+
{% endfor %}{% endif %}
|
|
63
|
+
{%- if limit is not none %}
|
|
64
|
+
LIMIT {{ limit }}{% endif %}
|
|
65
|
+
"""
|
|
66
|
+
)
|
|
67
|
+
MAX_IDENTIFIER_LENGTH = 50
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class PrestoDialect(BaseDialect):
|
|
71
|
+
WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
|
|
72
|
+
FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
|
|
73
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
74
|
+
**BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
|
|
75
|
+
**FUNCTION_GRAIN_MATCH_MAP,
|
|
76
|
+
}
|
|
77
|
+
QUOTE_CHARACTER = '"'
|
|
78
|
+
SQL_TEMPLATE = SQL_TEMPLATE
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class TrinoDialect(PrestoDialect):
|
|
82
|
+
pass
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from typing import Mapping, Callable, Any
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from trilogy.core.enums import FunctionType, WindowType, UnnestMode
|
|
6
|
+
from trilogy.dialect.base import BaseDialect
|
|
7
|
+
|
|
8
|
+
ENV_SNOWFLAKE_PW = "PREQL_SNOWFLAKE_PW"
|
|
9
|
+
ENV_SNOWFLAKE_USER = "PREQL_SNOWFLAKE_USER"
|
|
10
|
+
ENV_SNOWFLAKE_ACCOUNT = "PREQL_SNOWFLAKE_ACCOUNT"
|
|
11
|
+
|
|
12
|
+
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
13
|
+
|
|
14
|
+
FUNCTION_MAP = {
|
|
15
|
+
FunctionType.COUNT: lambda x: f"count({x[0]})",
|
|
16
|
+
FunctionType.SUM: lambda x: f"sum({x[0]})",
|
|
17
|
+
FunctionType.LENGTH: lambda x: f"length({x[0]})",
|
|
18
|
+
FunctionType.AVG: lambda x: f"avg({x[0]})",
|
|
19
|
+
FunctionType.LIKE: lambda x: (
|
|
20
|
+
f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
|
|
21
|
+
),
|
|
22
|
+
FunctionType.MINUTE: lambda x: f"EXTRACT(MINUTE from {x[0]})",
|
|
23
|
+
FunctionType.SECOND: lambda x: f"EXTRACT(SECOND from {x[0]})",
|
|
24
|
+
FunctionType.HOUR: lambda x: f"EXTRACT(HOUR from {x[0]})",
|
|
25
|
+
FunctionType.DAY_OF_WEEK: lambda x: f"EXTRACT(DAYOFWEEK from {x[0]})",
|
|
26
|
+
FunctionType.DAY: lambda x: f"EXTRACT(DAY from {x[0]})",
|
|
27
|
+
FunctionType.YEAR: lambda x: f"EXTRACT(YEAR from {x[0]})",
|
|
28
|
+
FunctionType.MONTH: lambda x: f"EXTRACT(MONTH from {x[0]})",
|
|
29
|
+
FunctionType.WEEK: lambda x: f"EXTRACT(WEEK from {x[0]})",
|
|
30
|
+
FunctionType.QUARTER: lambda x: f"EXTRACT(QUARTER from {x[0]})",
|
|
31
|
+
# math
|
|
32
|
+
FunctionType.DIVIDE: lambda x: f"DIV0({x[0]},{x[1]})",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
36
|
+
**FUNCTION_MAP,
|
|
37
|
+
FunctionType.COUNT: lambda args: f"{args[0]}",
|
|
38
|
+
FunctionType.SUM: lambda args: f"{args[0]}",
|
|
39
|
+
FunctionType.AVG: lambda args: f"{args[0]}",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
BQ_SQL_TEMPLATE = Template(
|
|
43
|
+
"""{%- if output %}
|
|
44
|
+
CREATE OR REPLACE TABLE {{ output.address.location }} AS
|
|
45
|
+
{% endif %}{%- if ctes %}
|
|
46
|
+
WITH {% for cte in ctes %}
|
|
47
|
+
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
48
|
+
SELECT
|
|
49
|
+
|
|
50
|
+
{%- for select in select_columns %}
|
|
51
|
+
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
52
|
+
{% if base %}FROM
|
|
53
|
+
{{ base }}{% endif %}{% if joins %}
|
|
54
|
+
{% for join in joins %}
|
|
55
|
+
{{ join }}
|
|
56
|
+
{% endfor %}{% endif %}
|
|
57
|
+
{% if where %}WHERE
|
|
58
|
+
{{ where }}
|
|
59
|
+
{% endif %}
|
|
60
|
+
{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
61
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
62
|
+
{%- if order_by %}
|
|
63
|
+
ORDER BY {% for order in order_by %}
|
|
64
|
+
{{ order }}{% if not loop.last %},{% endif %}
|
|
65
|
+
{% endfor %}{% endif %}
|
|
66
|
+
{%- if limit is not none %}
|
|
67
|
+
LIMIT {{ limit }}{% endif %}
|
|
68
|
+
"""
|
|
69
|
+
)
|
|
70
|
+
MAX_IDENTIFIER_LENGTH = 50
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class SnowflakeDialect(BaseDialect):
|
|
74
|
+
WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
|
|
75
|
+
FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
|
|
76
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
77
|
+
**BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
|
|
78
|
+
**FUNCTION_GRAIN_MATCH_MAP,
|
|
79
|
+
}
|
|
80
|
+
QUOTE_CHARACTER = '"'
|
|
81
|
+
SQL_TEMPLATE = BQ_SQL_TEMPLATE
|
|
82
|
+
UNNEST_MODE = UnnestMode.CROSS_JOIN
|