pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
- pytrilogy-0.3.138.dist-info/METADATA +525 -0
- pytrilogy-0.3.138.dist-info/RECORD +182 -0
- pytrilogy-0.3.138.dist-info/WHEEL +5 -0
- pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +9 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +87 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +143 -0
- trilogy/constants.py +113 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +443 -0
- trilogy/core/env_processor.py +120 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1227 -0
- trilogy/core/graph_models.py +139 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2672 -0
- trilogy/core/models/build.py +2521 -0
- trilogy/core/models/build_environment.py +180 -0
- trilogy/core/models/core.py +494 -0
- trilogy/core/models/datasource.py +322 -0
- trilogy/core/models/environment.py +748 -0
- trilogy/core/models/execute.py +1177 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +517 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +268 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +205 -0
- trilogy/core/processing/node_generators/node_merge_node.py +653 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +748 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +519 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +596 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +106 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1359 -0
- trilogy/dialect/bigquery.py +256 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +144 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +177 -0
- trilogy/dialect/enums.py +147 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +91 -0
- trilogy/dialect/presto.py +104 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +90 -0
- trilogy/dialect/sql_server.py +92 -0
- trilogy/engine.py +48 -0
- trilogy/execution/config.py +75 -0
- trilogy/executor.py +568 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +139 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2813 -0
- trilogy/parsing/render.py +750 -0
- trilogy/parsing/trilogy.lark +540 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +42 -0
- trilogy/scripts/README.md +7 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +289 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +460 -0
- trilogy/scripts/environment.py +46 -0
- trilogy/scripts/parallel_execution.py +483 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/trilogy.py +772 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from typing import Any, Callable, Mapping
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from trilogy.core.enums import DatePart, FunctionType, WindowType
|
|
6
|
+
from trilogy.dialect.base import BaseDialect
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def date_diff(first: str, second: str, grain: DatePart) -> str:
|
|
10
|
+
grain = DatePart(grain)
|
|
11
|
+
if grain == DatePart.YEAR:
|
|
12
|
+
return f"date_part('year', {second}) - date_part('year', {first})"
|
|
13
|
+
elif grain == DatePart.MONTH:
|
|
14
|
+
return f"12 * {date_diff(first, second, DatePart.YEAR)} + date_part('month', {second}) - date_part('month', {first})"
|
|
15
|
+
elif grain == DatePart.DAY:
|
|
16
|
+
return f"date_part('day', {second} - {first})"
|
|
17
|
+
elif grain == DatePart.HOUR:
|
|
18
|
+
return f"{date_diff(first, second, DatePart.DAY)} *24 + date_part('hour', {second} - {first})"
|
|
19
|
+
elif grain == DatePart.MINUTE:
|
|
20
|
+
return f"{date_diff(first, second, DatePart.HOUR)} *60 + date_part('minute', {second} - {first})"
|
|
21
|
+
elif grain == DatePart.SECOND:
|
|
22
|
+
return f"{date_diff(first, second, DatePart.MINUTE)} *60 + date_part('second', {second} - {first})"
|
|
23
|
+
else:
|
|
24
|
+
raise NotImplementedError(f"Date diff not implemented for grain {grain}")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
28
|
+
|
|
29
|
+
FUNCTION_MAP = {
|
|
30
|
+
FunctionType.SPLIT: lambda x, types: f"string_to_array({x[0]}, {x[1]})",
|
|
31
|
+
FunctionType.DATE_TRUNCATE: lambda x, types: f"date_trunc('{x[1]}', {x[0]})",
|
|
32
|
+
FunctionType.DATE_ADD: lambda x, types: f"({x[0]} + INTERVAL '{x[2]} {x[1]}')",
|
|
33
|
+
FunctionType.DATE_PART: lambda x, types: f"date_part('{x[1]}', {x[0]})",
|
|
34
|
+
FunctionType.DATE_DIFF: lambda x, types: date_diff(x[0], x[1], x[2]),
|
|
35
|
+
FunctionType.IS_NULL: lambda x, types: f"{x[0]} IS NULL",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
39
|
+
**FUNCTION_MAP,
|
|
40
|
+
FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
41
|
+
FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
42
|
+
FunctionType.SUM: lambda args, types: f"{args[0]}",
|
|
43
|
+
FunctionType.AVG: lambda args, types: f"{args[0]}",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
PG_SQL_TEMPLATE = Template(
|
|
47
|
+
"""{%- if output %}
|
|
48
|
+
DROP TABLE IF EXISTS {{ output.address.location }};
|
|
49
|
+
CREATE TABLE {{ output.address.location }} AS
|
|
50
|
+
{% endif %}{%- if ctes %}
|
|
51
|
+
WITH {% for cte in ctes %}
|
|
52
|
+
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
53
|
+
{%- if full_select -%}
|
|
54
|
+
{{full_select}}
|
|
55
|
+
{%- else -%}
|
|
56
|
+
SELECT
|
|
57
|
+
{%- for select in select_columns %}
|
|
58
|
+
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
59
|
+
{% if base %}FROM
|
|
60
|
+
{{ base }}{% endif %}{% if joins %}
|
|
61
|
+
{% for join in joins %}
|
|
62
|
+
{{ join }}
|
|
63
|
+
{% endfor %}{% endif %}
|
|
64
|
+
{% if where %}WHERE
|
|
65
|
+
{{ where }}
|
|
66
|
+
{% endif %}
|
|
67
|
+
{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
68
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
|
|
69
|
+
HAVING
|
|
70
|
+
\t{{ having }}{% endif %}
|
|
71
|
+
{%- if order_by %}
|
|
72
|
+
ORDER BY {% for order in order_by %}
|
|
73
|
+
{{ order }}{% if not loop.last %},{% endif %}
|
|
74
|
+
{% endfor %}{% endif %}
|
|
75
|
+
{%- if limit is not none %}
|
|
76
|
+
LIMIT {{ limit }}{% endif %}{% endif %}
|
|
77
|
+
"""
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
MAX_IDENTIFIER_LENGTH = 50
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class PostgresDialect(BaseDialect):
|
|
84
|
+
WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
|
|
85
|
+
FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
|
|
86
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
87
|
+
**BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
|
|
88
|
+
**FUNCTION_GRAIN_MATCH_MAP,
|
|
89
|
+
}
|
|
90
|
+
QUOTE_CHARACTER = '"'
|
|
91
|
+
SQL_TEMPLATE = PG_SQL_TEMPLATE
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from typing import Any, Callable, Mapping
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from trilogy.core.enums import FunctionType, GroupMode, UnnestMode, WindowType
|
|
6
|
+
from trilogy.core.models.core import DataType
|
|
7
|
+
from trilogy.dialect.base import BaseDialect
|
|
8
|
+
|
|
9
|
+
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
10
|
+
|
|
11
|
+
FUNCTION_MAP = {
|
|
12
|
+
FunctionType.COUNT: lambda x, types: f"count({x[0]})",
|
|
13
|
+
FunctionType.SUM: lambda x, types: f"sum({x[0]})",
|
|
14
|
+
FunctionType.LENGTH: lambda x, types: f"length({x[0]})",
|
|
15
|
+
FunctionType.AVG: lambda x, types: f"avg({x[0]})",
|
|
16
|
+
FunctionType.INDEX_ACCESS: lambda x, types: f"element_at({x[0]},{x[1]})",
|
|
17
|
+
FunctionType.MAP_ACCESS: lambda x, types: f"{x[0]}[{x[1]}]",
|
|
18
|
+
FunctionType.LIKE: lambda x, types: (
|
|
19
|
+
f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
|
|
20
|
+
),
|
|
21
|
+
FunctionType.MINUTE: lambda x, types: f"EXTRACT(MINUTE from {x[0]})",
|
|
22
|
+
FunctionType.SECOND: lambda x, types: f"EXTRACT(SECOND from {x[0]})",
|
|
23
|
+
FunctionType.HOUR: lambda x, types: f"EXTRACT(HOUR from {x[0]})",
|
|
24
|
+
FunctionType.DAY_OF_WEEK: lambda x, types: f"EXTRACT(DAYOFWEEK from {x[0]})",
|
|
25
|
+
FunctionType.DAY: lambda x, types: f"EXTRACT(DAY from {x[0]})",
|
|
26
|
+
FunctionType.YEAR: lambda x, types: f"EXTRACT(YEAR from {x[0]})",
|
|
27
|
+
FunctionType.MONTH: lambda x, types: f"EXTRACT(MONTH from {x[0]})",
|
|
28
|
+
FunctionType.WEEK: lambda x, types: f"EXTRACT(WEEK from {x[0]})",
|
|
29
|
+
FunctionType.QUARTER: lambda x, types: f"EXTRACT(QUARTER from {x[0]})",
|
|
30
|
+
# math
|
|
31
|
+
FunctionType.DIVIDE: lambda x, types: f"{x[0]}/{x[1]}",
|
|
32
|
+
FunctionType.DATE_ADD: lambda x, types: f"DATE_ADD('{x[1]}', {x[2]}, {x[0]})",
|
|
33
|
+
FunctionType.CURRENT_DATE: lambda x, types: "CURRENT_DATE",
|
|
34
|
+
FunctionType.CURRENT_DATETIME: lambda x, types: "CURRENT_TIMESTAMP",
|
|
35
|
+
FunctionType.ARRAY: lambda x, types: f"ARRAY[{', '.join(x)}]",
|
|
36
|
+
# regex
|
|
37
|
+
FunctionType.REGEXP_CONTAINS: lambda x, types: f"REGEXP_LIKE({x[0]}, {x[1]})",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
41
|
+
**FUNCTION_MAP,
|
|
42
|
+
FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
43
|
+
FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
44
|
+
FunctionType.SUM: lambda args, types: f"{args[0]}",
|
|
45
|
+
FunctionType.AVG: lambda args, types: f"{args[0]}",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
SQL_TEMPLATE = Template(
|
|
49
|
+
"""{%- if output %}
|
|
50
|
+
{{output}}
|
|
51
|
+
{% endif %}{%- if ctes %}
|
|
52
|
+
WITH {% for cte in ctes %}
|
|
53
|
+
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
54
|
+
{%- if full_select -%}
|
|
55
|
+
{{full_select}}
|
|
56
|
+
{%- else -%}
|
|
57
|
+
SELECT
|
|
58
|
+
{%- for select in select_columns %}
|
|
59
|
+
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
60
|
+
{% if base %}FROM
|
|
61
|
+
{{ base }}{% endif %}{% if joins %}
|
|
62
|
+
{% for join in joins %}
|
|
63
|
+
{{ join }}
|
|
64
|
+
{% endfor %}{% endif %}
|
|
65
|
+
{% if where %}WHERE
|
|
66
|
+
{{ where }}
|
|
67
|
+
{% endif %}
|
|
68
|
+
{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
69
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
|
|
70
|
+
HAVING
|
|
71
|
+
\t{{ having }}{% endif %}
|
|
72
|
+
{%- if order_by %}
|
|
73
|
+
ORDER BY {% for order in order_by %}
|
|
74
|
+
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
75
|
+
{%- if limit is not none %}
|
|
76
|
+
LIMIT {{ limit }}{% endif %}{% endif %}
|
|
77
|
+
"""
|
|
78
|
+
)
|
|
79
|
+
MAX_IDENTIFIER_LENGTH = 50
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class PrestoDialect(BaseDialect):
|
|
83
|
+
WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
|
|
84
|
+
FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
|
|
85
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
86
|
+
**BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
|
|
87
|
+
**FUNCTION_GRAIN_MATCH_MAP,
|
|
88
|
+
}
|
|
89
|
+
QUOTE_CHARACTER = '"'
|
|
90
|
+
SQL_TEMPLATE = SQL_TEMPLATE
|
|
91
|
+
DATATYPE_MAP = {
|
|
92
|
+
**BaseDialect.DATATYPE_MAP,
|
|
93
|
+
DataType.NUMERIC: "DECIMAL",
|
|
94
|
+
DataType.STRING: "VARCHAR",
|
|
95
|
+
}
|
|
96
|
+
UNNEST_MODE = UnnestMode.PRESTO
|
|
97
|
+
GROUP_MODE = GroupMode.BY_INDEX
|
|
98
|
+
ALIAS_ORDER_REFERENCING_ALLOWED = (
|
|
99
|
+
False # some complex presto functions don't support aliasing
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class TrinoDialect(PrestoDialect):
|
|
104
|
+
pass
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Any, List
|
|
3
|
+
|
|
4
|
+
from trilogy.core.models.author import ConceptRef
|
|
5
|
+
from trilogy.engine import ResultProtocol
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class MockResult(ResultProtocol):
|
|
10
|
+
values: list["MockResultRow"]
|
|
11
|
+
columns: list[str]
|
|
12
|
+
|
|
13
|
+
def __init__(self, values: list[Any], columns: list[str]):
|
|
14
|
+
processed: list[MockResultRow] = []
|
|
15
|
+
for x in values:
|
|
16
|
+
if isinstance(x, dict):
|
|
17
|
+
processed.append(MockResultRow(x))
|
|
18
|
+
elif isinstance(x, MockResultRow):
|
|
19
|
+
processed.append(x)
|
|
20
|
+
else:
|
|
21
|
+
raise ValueError(
|
|
22
|
+
f"Cannot process value of type {type(x)} in MockResult"
|
|
23
|
+
)
|
|
24
|
+
self.columns = columns
|
|
25
|
+
self.values = processed
|
|
26
|
+
|
|
27
|
+
def __iter__(self):
|
|
28
|
+
while self.values:
|
|
29
|
+
yield self.values.pop(0)
|
|
30
|
+
|
|
31
|
+
def fetchall(self):
|
|
32
|
+
return self.values
|
|
33
|
+
|
|
34
|
+
def fetchone(self):
|
|
35
|
+
if self.values:
|
|
36
|
+
return self.values.pop(0)
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
def fetchmany(self, size: int):
|
|
40
|
+
rval = self.values[:size]
|
|
41
|
+
self.values = self.values[size:]
|
|
42
|
+
return rval
|
|
43
|
+
|
|
44
|
+
def keys(self):
|
|
45
|
+
return self.columns
|
|
46
|
+
|
|
47
|
+
def as_dict(self):
|
|
48
|
+
return [x.as_dict() if isinstance(x, MockResultRow) else x for x in self.values]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class MockResultRow:
|
|
53
|
+
_values: dict[str, Any]
|
|
54
|
+
|
|
55
|
+
def as_dict(self):
|
|
56
|
+
return self._values
|
|
57
|
+
|
|
58
|
+
def __str__(self) -> str:
|
|
59
|
+
return str(self._values)
|
|
60
|
+
|
|
61
|
+
def __repr__(self) -> str:
|
|
62
|
+
return repr(self._values)
|
|
63
|
+
|
|
64
|
+
def __getattr__(self, name: str) -> Any:
|
|
65
|
+
if name in self._values:
|
|
66
|
+
return self._values[name]
|
|
67
|
+
return super().__getattribute__(name)
|
|
68
|
+
|
|
69
|
+
def __getitem__(self, key: str) -> Any:
|
|
70
|
+
return self._values[key]
|
|
71
|
+
|
|
72
|
+
def __iter__(self):
|
|
73
|
+
return iter(self._values.values())
|
|
74
|
+
|
|
75
|
+
def values(self):
|
|
76
|
+
return self._values.values()
|
|
77
|
+
|
|
78
|
+
def keys(self):
|
|
79
|
+
return self._values.keys()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def generate_result_set(
|
|
83
|
+
columns: List[ConceptRef], output_data: list[Any]
|
|
84
|
+
) -> MockResult:
|
|
85
|
+
"""Generate a mock result set from columns and output data."""
|
|
86
|
+
names = [x.address.replace(".", "_") for x in columns]
|
|
87
|
+
return MockResult(
|
|
88
|
+
values=[dict(zip(names, [row])) for row in output_data], columns=names
|
|
89
|
+
)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from typing import Any, Callable, Mapping
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from trilogy.core.enums import FunctionType, UnnestMode, WindowType
|
|
6
|
+
from trilogy.dialect.base import BaseDialect
|
|
7
|
+
|
|
8
|
+
ENV_SNOWFLAKE_PW = "PREQL_SNOWFLAKE_PW"
|
|
9
|
+
ENV_SNOWFLAKE_USER = "PREQL_SNOWFLAKE_USER"
|
|
10
|
+
ENV_SNOWFLAKE_ACCOUNT = "PREQL_SNOWFLAKE_ACCOUNT"
|
|
11
|
+
|
|
12
|
+
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
13
|
+
|
|
14
|
+
FUNCTION_MAP = {
|
|
15
|
+
FunctionType.COUNT: lambda x, types: f"count({x[0]})",
|
|
16
|
+
FunctionType.SUM: lambda x, types: f"sum({x[0]})",
|
|
17
|
+
FunctionType.LENGTH: lambda x, types: f"length({x[0]})",
|
|
18
|
+
FunctionType.AVG: lambda x, types: f"avg({x[0]})",
|
|
19
|
+
FunctionType.LIKE: lambda x, types: (
|
|
20
|
+
f" CASE WHEN {x[0]} like {x[1]} THEN True ELSE False END"
|
|
21
|
+
),
|
|
22
|
+
FunctionType.MINUTE: lambda x, types: f"EXTRACT(MINUTE from {x[0]})",
|
|
23
|
+
FunctionType.SECOND: lambda x, types: f"EXTRACT(SECOND from {x[0]})",
|
|
24
|
+
FunctionType.HOUR: lambda x, types: f"EXTRACT(HOUR from {x[0]})",
|
|
25
|
+
FunctionType.DAY_OF_WEEK: lambda x, types: f"EXTRACT(DAYOFWEEK from {x[0]})",
|
|
26
|
+
FunctionType.DAY: lambda x, types: f"EXTRACT(DAY from {x[0]})",
|
|
27
|
+
FunctionType.YEAR: lambda x, types: f"EXTRACT(YEAR from {x[0]})",
|
|
28
|
+
FunctionType.MONTH: lambda x, types: f"EXTRACT(MONTH from {x[0]})",
|
|
29
|
+
FunctionType.WEEK: lambda x, types: f"EXTRACT(WEEK from {x[0]})",
|
|
30
|
+
FunctionType.QUARTER: lambda x, types: f"EXTRACT(QUARTER from {x[0]})",
|
|
31
|
+
# math
|
|
32
|
+
FunctionType.POWER: lambda x, types: f"POWER({x[0]}, {x[1]})",
|
|
33
|
+
FunctionType.DIVIDE: lambda x, types: f"DIV0({x[0]},{x[1]})",
|
|
34
|
+
FunctionType.UNNEST: lambda x, types: f"table(flatten({x[0]}))",
|
|
35
|
+
FunctionType.ARRAY: lambda x, types: f"ARRAY_CONSTRUCT({', '.join(x)})",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
39
|
+
**FUNCTION_MAP,
|
|
40
|
+
FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
41
|
+
FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
42
|
+
FunctionType.SUM: lambda args, types: f"{args[0]}",
|
|
43
|
+
FunctionType.AVG: lambda args, types: f"{args[0]}",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
SNOWFLAKE_SQL_TEMPLATE = Template(
|
|
48
|
+
"""{%- if output %}
|
|
49
|
+
{{output}}
|
|
50
|
+
{% endif %}{%- if ctes %}
|
|
51
|
+
WITH {% if recursive%}RECURSIVE{% endif %}{% for cte in ctes %}
|
|
52
|
+
"{{cte.name}}" as ({{cte.statement}}){% if not loop.last %},{% endif %}{% else %}
|
|
53
|
+
{% endfor %}{% endif %}
|
|
54
|
+
{%- if full_select -%}
|
|
55
|
+
{{full_select}}
|
|
56
|
+
{%- else -%}
|
|
57
|
+
|
|
58
|
+
SELECT
|
|
59
|
+
{%- for select in select_columns %}
|
|
60
|
+
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
61
|
+
{% if base %}FROM
|
|
62
|
+
{{ base }}{% endif %}{% if joins %}{% for join in joins %}
|
|
63
|
+
{{ join }}{% endfor %}{% endif %}
|
|
64
|
+
{% if where %}WHERE
|
|
65
|
+
{{ where }}
|
|
66
|
+
{% endif %}
|
|
67
|
+
{%- if group_by %}GROUP BY {% for group in group_by %}
|
|
68
|
+
{{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
|
|
69
|
+
HAVING
|
|
70
|
+
\t{{ having }}{% endif %}
|
|
71
|
+
{%- if order_by %}
|
|
72
|
+
ORDER BY {% for order in order_by %}
|
|
73
|
+
{{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
74
|
+
{%- if limit is not none %}
|
|
75
|
+
LIMIT {{ limit }}{% endif %}{% endif %}
|
|
76
|
+
"""
|
|
77
|
+
)
|
|
78
|
+
MAX_IDENTIFIER_LENGTH = 50
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class SnowflakeDialect(BaseDialect):
|
|
82
|
+
WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
|
|
83
|
+
FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
|
|
84
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
85
|
+
**BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
|
|
86
|
+
**FUNCTION_GRAIN_MATCH_MAP,
|
|
87
|
+
}
|
|
88
|
+
QUOTE_CHARACTER = '"'
|
|
89
|
+
SQL_TEMPLATE = SNOWFLAKE_SQL_TEMPLATE
|
|
90
|
+
UNNEST_MODE = UnnestMode.SNOWFLAKE
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from typing import Any, Callable, Mapping
|
|
2
|
+
|
|
3
|
+
from jinja2 import Template
|
|
4
|
+
|
|
5
|
+
from trilogy.core.enums import FunctionType, WindowType
|
|
6
|
+
from trilogy.core.statements.execute import (
|
|
7
|
+
PROCESSED_STATEMENT_TYPES,
|
|
8
|
+
ProcessedQuery,
|
|
9
|
+
ProcessedQueryPersist,
|
|
10
|
+
)
|
|
11
|
+
from trilogy.dialect.base import BaseDialect
|
|
12
|
+
from trilogy.utility import string_to_hash
|
|
13
|
+
|
|
14
|
+
WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
|
|
15
|
+
|
|
16
|
+
FUNCTION_MAP = {
|
|
17
|
+
FunctionType.COUNT: lambda args, types: f"count({args[0]})",
|
|
18
|
+
FunctionType.SUM: lambda args, types: f"sum({args[0]})",
|
|
19
|
+
FunctionType.AVG: lambda args, types: f"avg({args[0]})",
|
|
20
|
+
FunctionType.LENGTH: lambda args, types: f"length({args[0]})",
|
|
21
|
+
FunctionType.LIKE: lambda args, types: (
|
|
22
|
+
f" CASE WHEN {args[0]} like {args[1]} THEN True ELSE False END"
|
|
23
|
+
),
|
|
24
|
+
FunctionType.CONCAT: lambda args, types: (
|
|
25
|
+
f"CONCAT({','.join([f''' '{a}' ''' for a in args])})"
|
|
26
|
+
),
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# if an aggregate function is called on a source that is at the same grain as the aggregate
|
|
30
|
+
# we may return a static value
|
|
31
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
32
|
+
**FUNCTION_MAP,
|
|
33
|
+
FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
34
|
+
FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
|
|
35
|
+
FunctionType.SUM: lambda args, types: f"{args[0]}",
|
|
36
|
+
FunctionType.AVG: lambda args, types: f"{args[0]}",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
TSQL_TEMPLATE = Template(
|
|
40
|
+
"""{%- if ctes %}
|
|
41
|
+
WITH {% for cte in ctes %}
|
|
42
|
+
{{cte.name}} as ({{cte.statement}}){% if not loop.last %},{% endif %}{% endfor %}{% endif %}
|
|
43
|
+
{%- if full_select -%}{{full_select}}
|
|
44
|
+
{%- else -%}{%- if comment %}
|
|
45
|
+
-- {{ comment }}{%- endif -%}
|
|
46
|
+
SELECT
|
|
47
|
+
{%- if limit is not none %}
|
|
48
|
+
TOP {{ limit }}{% endif %}
|
|
49
|
+
{%- for select in select_columns %}
|
|
50
|
+
{{ select }}{% if not loop.last %},{% endif %}{% endfor %}
|
|
51
|
+
{% if base %}FROM
|
|
52
|
+
{{ base }}{% endif %}{% if joins %}
|
|
53
|
+
{% for join in joins %}
|
|
54
|
+
{{ join }}
|
|
55
|
+
{% endfor %}{% endif %}
|
|
56
|
+
{% if where %}WHERE
|
|
57
|
+
{{ where }}
|
|
58
|
+
{% endif %}
|
|
59
|
+
{%- if group_by %}
|
|
60
|
+
GROUP BY {% for group in group_by %}
|
|
61
|
+
{{group}}{% if not loop.last %},{% endif %}
|
|
62
|
+
{% endfor %}{% endif %}{% if having %}
|
|
63
|
+
HAVING
|
|
64
|
+
\t{{ having }}{% endif %}
|
|
65
|
+
{%- if order_by %}
|
|
66
|
+
ORDER BY {% for order in order_by %}
|
|
67
|
+
{{ order }}{% if not loop.last %},{% endif %}
|
|
68
|
+
{% endfor %}{% endif %}{% endif %}
|
|
69
|
+
"""
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
MAX_IDENTIFIER_LENGTH = 128
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class SqlServerDialect(BaseDialect):
|
|
76
|
+
WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
|
|
77
|
+
FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
|
|
78
|
+
FUNCTION_GRAIN_MATCH_MAP = {
|
|
79
|
+
**BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
|
|
80
|
+
**FUNCTION_GRAIN_MATCH_MAP,
|
|
81
|
+
}
|
|
82
|
+
QUOTE_CHARACTER = '"'
|
|
83
|
+
SQL_TEMPLATE = TSQL_TEMPLATE
|
|
84
|
+
|
|
85
|
+
def compile_statement(self, query: PROCESSED_STATEMENT_TYPES) -> str:
|
|
86
|
+
base = super().compile_statement(query)
|
|
87
|
+
if isinstance(query, (ProcessedQuery, ProcessedQueryPersist)):
|
|
88
|
+
for cte in query.ctes:
|
|
89
|
+
if len(cte.name) > MAX_IDENTIFIER_LENGTH:
|
|
90
|
+
new_name = f"rhash_{string_to_hash(cte.name)}"
|
|
91
|
+
base = base.replace(cte.name, new_name)
|
|
92
|
+
return base
|
trilogy/engine.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from typing import Any, Generator, List, Optional, Protocol
|
|
2
|
+
|
|
3
|
+
from trilogy.core.models.environment import Environment
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ResultProtocol(Protocol):
|
|
7
|
+
|
|
8
|
+
def fetchall(self) -> List[Any]: ...
|
|
9
|
+
|
|
10
|
+
def keys(self) -> List[str]: ...
|
|
11
|
+
|
|
12
|
+
def fetchone(self) -> Optional[Any]: ...
|
|
13
|
+
|
|
14
|
+
def fetchmany(self, size: int) -> List[Any]: ...
|
|
15
|
+
|
|
16
|
+
def __iter__(self) -> Generator[Any, None, None]: ...
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class EngineConnection(Protocol):
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
def execute(self, statement: str, parameters: Any | None = None) -> ResultProtocol:
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def commit(self):
|
|
26
|
+
raise NotImplementedError()
|
|
27
|
+
|
|
28
|
+
def begin(self):
|
|
29
|
+
raise NotImplementedError()
|
|
30
|
+
|
|
31
|
+
def rollback(self):
|
|
32
|
+
raise NotImplementedError()
|
|
33
|
+
|
|
34
|
+
def close(self) -> None:
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ExecutionEngine(Protocol):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def connect(self) -> EngineConnection:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def setup(self, env: Environment, connection):
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
def dispose(self, close: bool = True):
|
|
48
|
+
pass
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from tomllib import loads
|
|
5
|
+
|
|
6
|
+
from trilogy.dialect import (
|
|
7
|
+
BigQueryConfig,
|
|
8
|
+
DialectConfig,
|
|
9
|
+
DuckDBConfig,
|
|
10
|
+
PostgresConfig,
|
|
11
|
+
PrestoConfig,
|
|
12
|
+
SnowflakeConfig,
|
|
13
|
+
SQLServerConfig,
|
|
14
|
+
)
|
|
15
|
+
from trilogy.dialect.enums import Dialects
|
|
16
|
+
|
|
17
|
+
DEFAULT_PARALLELISM = 4
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class RuntimeConfig:
|
|
22
|
+
|
|
23
|
+
startup_trilogy: list[Path]
|
|
24
|
+
startup_sql: list[Path]
|
|
25
|
+
parallelism: int = DEFAULT_PARALLELISM
|
|
26
|
+
engine_dialect: Dialects | None = None
|
|
27
|
+
engine_config: DialectConfig | None = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def load_config_file(path: Path) -> RuntimeConfig:
|
|
31
|
+
with open(path, "r") as f:
|
|
32
|
+
toml_content = f.read()
|
|
33
|
+
config_data = loads(toml_content)
|
|
34
|
+
|
|
35
|
+
engine_raw: dict = config_data.get("engine", {})
|
|
36
|
+
engine_config_raw = engine_raw.get("config", {})
|
|
37
|
+
engine = Dialects(engine_raw.get("dialect")) if engine_raw.get("dialect") else None
|
|
38
|
+
engine_config: DialectConfig | None
|
|
39
|
+
if engine:
|
|
40
|
+
if engine == Dialects.DUCK_DB:
|
|
41
|
+
engine_config = (
|
|
42
|
+
DuckDBConfig(**engine_config_raw) if engine_config_raw else None
|
|
43
|
+
)
|
|
44
|
+
elif engine == Dialects.POSTGRES:
|
|
45
|
+
engine_config = (
|
|
46
|
+
PostgresConfig(**engine_config_raw) if engine_config_raw else None
|
|
47
|
+
)
|
|
48
|
+
elif engine == Dialects.PRESTO:
|
|
49
|
+
engine_config = (
|
|
50
|
+
PrestoConfig(**engine_config_raw) if engine_config_raw else None
|
|
51
|
+
)
|
|
52
|
+
elif engine == Dialects.SNOWFLAKE:
|
|
53
|
+
engine_config = (
|
|
54
|
+
SnowflakeConfig(**engine_config_raw) if engine_config_raw else None
|
|
55
|
+
)
|
|
56
|
+
elif engine == Dialects.SQL_SERVER:
|
|
57
|
+
engine_config = (
|
|
58
|
+
SQLServerConfig(**engine_config_raw) if engine_config_raw else None
|
|
59
|
+
)
|
|
60
|
+
elif engine == Dialects.BIGQUERY:
|
|
61
|
+
engine_config = (
|
|
62
|
+
BigQueryConfig(**engine_config_raw) if engine_config_raw else None
|
|
63
|
+
)
|
|
64
|
+
else:
|
|
65
|
+
engine_config = None
|
|
66
|
+
else:
|
|
67
|
+
engine_config = None
|
|
68
|
+
setup: dict = config_data.get("setup", {})
|
|
69
|
+
return RuntimeConfig(
|
|
70
|
+
startup_trilogy=[Path(p) for p in setup.get("trilogy", [])],
|
|
71
|
+
startup_sql=[Path(p) for p in setup.get("sql", [])],
|
|
72
|
+
parallelism=config_data.get("parallelism", DEFAULT_PARALLELISM),
|
|
73
|
+
engine_dialect=engine,
|
|
74
|
+
engine_config=engine_config,
|
|
75
|
+
)
|