pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.138.dist-info/METADATA +525 -0
  5. pytrilogy-0.3.138.dist-info/RECORD +182 -0
  6. pytrilogy-0.3.138.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +9 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +87 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +143 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2672 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +494 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +748 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +517 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +106 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1359 -0
  112. trilogy/dialect/bigquery.py +256 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +177 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +91 -0
  121. trilogy/dialect/presto.py +104 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +90 -0
  124. trilogy/dialect/sql_server.py +92 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +750 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +7 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/dependency/Cargo.lock +617 -0
  148. trilogy/scripts/dependency/Cargo.toml +39 -0
  149. trilogy/scripts/dependency/README.md +131 -0
  150. trilogy/scripts/dependency/build.sh +25 -0
  151. trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
  152. trilogy/scripts/dependency/src/lib.rs +16 -0
  153. trilogy/scripts/dependency/src/main.rs +770 -0
  154. trilogy/scripts/dependency/src/parser.rs +435 -0
  155. trilogy/scripts/dependency/src/preql.pest +208 -0
  156. trilogy/scripts/dependency/src/python_bindings.rs +289 -0
  157. trilogy/scripts/dependency/src/resolver.rs +716 -0
  158. trilogy/scripts/dependency/tests/base.preql +3 -0
  159. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  160. trilogy/scripts/dependency/tests/customer.preql +6 -0
  161. trilogy/scripts/dependency/tests/main.preql +9 -0
  162. trilogy/scripts/dependency/tests/orders.preql +7 -0
  163. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  164. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  165. trilogy/scripts/dependency.py +323 -0
  166. trilogy/scripts/display.py +460 -0
  167. trilogy/scripts/environment.py +46 -0
  168. trilogy/scripts/parallel_execution.py +483 -0
  169. trilogy/scripts/single_execution.py +131 -0
  170. trilogy/scripts/trilogy.py +772 -0
  171. trilogy/std/__init__.py +0 -0
  172. trilogy/std/color.preql +3 -0
  173. trilogy/std/date.preql +13 -0
  174. trilogy/std/display.preql +18 -0
  175. trilogy/std/geography.preql +22 -0
  176. trilogy/std/metric.preql +15 -0
  177. trilogy/std/money.preql +67 -0
  178. trilogy/std/net.preql +14 -0
  179. trilogy/std/ranking.preql +7 -0
  180. trilogy/std/report.preql +5 -0
  181. trilogy/std/semantic.preql +6 -0
  182. trilogy/utility.py +34 -0
@@ -0,0 +1,177 @@
1
+ import re
2
+ from typing import Any, Callable, Mapping
3
+
4
+ from jinja2 import Template
5
+
6
+ from trilogy.core.enums import FunctionType, Modifier, UnnestMode, WindowType
7
+ from trilogy.core.models.core import DataType
8
+ from trilogy.dialect.base import BaseDialect
9
+
10
+ WINDOW_FUNCTION_MAP: Mapping[WindowType, Callable[[Any, Any, Any], str]] = {}
11
+
12
+ SENTINAL_AUTO_CAPTURE_GROUP_VALUE = "-1"
13
+
14
+
15
+ def null_wrapper(
16
+ lval: str,
17
+ rval: str,
18
+ modifiers: list[Modifier],
19
+ ) -> str:
20
+
21
+ if Modifier.NULLABLE in modifiers:
22
+ return f"{lval} is not distinct from {rval}"
23
+ return f"{lval} = {rval}"
24
+
25
+
26
+ def generate_regex_extract(x: list[str]) -> str:
27
+ if str(x[2]) == SENTINAL_AUTO_CAPTURE_GROUP_VALUE:
28
+ regex = re.compile(x[1])
29
+ if regex.groups == 0:
30
+ search = 0
31
+ else:
32
+ search = 1
33
+ return f"REGEXP_EXTRACT({x[0]},{x[1]},{search})"
34
+ return f"REGEXP_EXTRACT({x[0]},{x[1]},{x[2]})"
35
+
36
+
37
+ def render_sort(args, types):
38
+ if len(args) == 1:
39
+ return f"list_sort({args[0]})"
40
+ order = args[1].split(" ", 1)
41
+ if len(order) == 1:
42
+ return f"list_sort({args[0]}, '{order[0]}')"
43
+ elif len(order) == 2:
44
+ return f"list_sort({args[0]}, '{order[0]}', '{order[1]}')"
45
+
46
+
47
+ def render_log(args):
48
+ if len(args) == 1:
49
+ return f"log({args[0]})"
50
+ elif len(args) == 2:
51
+ if int(args[1]) == 10:
52
+ return f"log({args[0]})"
53
+ else:
54
+ # change of base formula
55
+ return f"log({args[0]})/log({args[1]})"
56
+ else:
57
+ raise ValueError("log function requires 1 or 2 arguments")
58
+
59
+
60
+ def map_date_part_specifier(specifier: str) -> str:
61
+ """Map date part specifiers to DuckDB-compatible names"""
62
+ mapping = {
63
+ "day_of_week": "dow",
64
+ # Add other mappings if needed
65
+ }
66
+ return mapping.get(specifier, specifier)
67
+
68
+
69
+ FUNCTION_MAP = {
70
+ FunctionType.COUNT: lambda args, types: f"count({args[0]})",
71
+ FunctionType.SUM: lambda args, types: f"sum({args[0]})",
72
+ FunctionType.AVG: lambda args, types: f"avg({args[0]})",
73
+ FunctionType.LENGTH: lambda args, types: f"length({args[0]})",
74
+ FunctionType.LOG: lambda args, types: render_log(args),
75
+ FunctionType.LIKE: lambda args, types: (
76
+ f" CASE WHEN {args[0]} like {args[1]} THEN True ELSE False END"
77
+ ),
78
+ FunctionType.CONCAT: lambda args, types: (
79
+ f"CONCAT({','.join([f''' {str(a)} ''' for a in args])})"
80
+ ),
81
+ FunctionType.SPLIT: lambda args, types: (
82
+ f"STRING_SPLIT({','.join([f''' {str(a)} ''' for a in args])})"
83
+ ),
84
+ ## Duckdb indexes from 1, not 0
85
+ FunctionType.INDEX_ACCESS: lambda args, types: (f"{args[0]}[{args[1]}]"),
86
+ ## Duckdb uses list for array
87
+ FunctionType.ARRAY_DISTINCT: lambda args, types: f"list_distinct({args[0]})",
88
+ FunctionType.ARRAY_SUM: lambda args, types: f"list_sum({args[0]})",
89
+ FunctionType.ARRAY_SORT: render_sort,
90
+ FunctionType.ARRAY_TRANSFORM: lambda args, types: (
91
+ f"list_transform({args[0]}, {args[1]} -> {args[2]})"
92
+ ),
93
+ FunctionType.ARRAY_AGG: lambda args, types: f"array_agg({args[0]})",
94
+ # datetime is aliased,
95
+ FunctionType.CURRENT_DATETIME: lambda x, types: "cast(get_current_timestamp() as datetime)",
96
+ FunctionType.DATETIME: lambda x, types: f"cast({x[0]} as datetime)",
97
+ FunctionType.TIMESTAMP: lambda x, types: f"cast({x[0]} as timestamp)",
98
+ FunctionType.DATE: lambda x, types: f"cast({x[0]} as date)",
99
+ FunctionType.DATE_TRUNCATE: lambda x, types: f"date_trunc('{x[1]}', {x[0]})",
100
+ FunctionType.DATE_ADD: lambda x, types: f"date_add({x[0]}, {x[2]} * INTERVAL 1 {x[1]})",
101
+ FunctionType.DATE_SUB: lambda x, types: f"date_add({x[0]}, -{x[2]} * INTERVAL 1 {x[1]})",
102
+ FunctionType.DATE_PART: lambda x, types: f"date_part('{map_date_part_specifier(x[1])}', {x[0]})",
103
+ FunctionType.DATE_DIFF: lambda x, types: f"date_diff('{x[2]}', {x[0]}, {x[1]})",
104
+ FunctionType.CONCAT: lambda x, types: f"({' || '.join(x)})",
105
+ FunctionType.DATE_LITERAL: lambda x, types: f"date '{x}'",
106
+ FunctionType.DATETIME_LITERAL: lambda x, types: f"datetime '{x}'",
107
+ FunctionType.DAY_OF_WEEK: lambda x, types: f"dayofweek({x[0]})",
108
+ # string
109
+ FunctionType.CONTAINS: lambda x, types: f"CONTAINS(LOWER({x[0]}), LOWER({x[1]}))",
110
+ # regexp
111
+ FunctionType.REGEXP_CONTAINS: lambda x, types: f"REGEXP_MATCHES({x[0]},{x[1]})",
112
+ FunctionType.REGEXP_EXTRACT: lambda x, types: generate_regex_extract(x),
113
+ }
114
+
115
+ # if an aggregate function is called on a source that is at the same grain as the aggregate
116
+ # we may return a static value
117
+ FUNCTION_GRAIN_MATCH_MAP = {
118
+ **FUNCTION_MAP,
119
+ FunctionType.COUNT_DISTINCT: lambda args, types: f"CASE WHEN{args[0]} IS NOT NULL THEN 1 ELSE 0 END",
120
+ FunctionType.COUNT: lambda args, types: f"CASE WHEN {args[0]} IS NOT NULL THEN 1 ELSE 0 END",
121
+ FunctionType.SUM: lambda args, types: f"{args[0]}",
122
+ FunctionType.AVG: lambda args, types: f"{args[0]}",
123
+ FunctionType.MAX: lambda args, types: f"{args[0]}",
124
+ FunctionType.MIN: lambda args, types: f"{args[0]}",
125
+ }
126
+
127
+ DATATYPE_MAP: dict[DataType, str] = {}
128
+
129
+
130
+ DUCKDB_TEMPLATE = Template(
131
+ """{%- if output %}
132
+ {{output}}
133
+ {% endif %}{%- if ctes %}
134
+ WITH {% if recursive%}RECURSIVE{% endif %}{% for cte in ctes %}
135
+ {{cte.name}} as (
136
+ {{cte.statement}}){% if not loop.last %},{% else %}
137
+ {% endif %}{% endfor %}{% endif %}
138
+ {%- if full_select -%}
139
+ {{full_select}}
140
+ {%- else -%}{%- if comment -%}
141
+ -- {{ comment }}
142
+ {%- endif %}SELECT
143
+ {%- for select in select_columns %}
144
+ {{ select }}{% if not loop.last %},{% endif %}{% endfor %}
145
+ {% if base %}FROM
146
+ {{ base }}{% endif %}{% if joins %}
147
+ {%- for join in joins %}
148
+ {{ join }}{% endfor %}{% endif %}
149
+ {%- if where %}
150
+ WHERE
151
+ {{ where }}
152
+ {% endif -%}{%- if group_by %}
153
+ GROUP BY {% for group in group_by %}
154
+ {{group}}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if having %}
155
+ HAVING
156
+ {{ having }}
157
+ {% endif %}{%- if order_by %}
158
+ ORDER BY {% for order in order_by %}
159
+ {{ order }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}
160
+ {%- if limit is not none %}
161
+ LIMIT ({{ limit }}){% endif %}{% endif %}
162
+ """
163
+ )
164
+
165
+
166
+ class DuckDBDialect(BaseDialect):
167
+ WINDOW_FUNCTION_MAP = {**BaseDialect.WINDOW_FUNCTION_MAP, **WINDOW_FUNCTION_MAP}
168
+ FUNCTION_MAP = {**BaseDialect.FUNCTION_MAP, **FUNCTION_MAP}
169
+ FUNCTION_GRAIN_MATCH_MAP = {
170
+ **BaseDialect.FUNCTION_GRAIN_MATCH_MAP,
171
+ **FUNCTION_GRAIN_MATCH_MAP,
172
+ }
173
+ DATATYPE_MAP = {**BaseDialect.DATATYPE_MAP, **DATATYPE_MAP}
174
+ QUOTE_CHARACTER = '"'
175
+ SQL_TEMPLATE = DUCKDB_TEMPLATE
176
+ UNNEST_MODE = UnnestMode.DIRECT
177
+ NULL_WRAPPER = staticmethod(null_wrapper)
@@ -0,0 +1,147 @@
1
+ from enum import Enum
2
+ from typing import TYPE_CHECKING, Callable, List, Optional
3
+
4
+ from trilogy.core.models.environment import Environment
5
+
6
+ if TYPE_CHECKING:
7
+ from trilogy import Executor
8
+ from trilogy.hooks.base_hook import BaseHook
9
+
10
+ from trilogy.constants import Rendering, logger
11
+ from trilogy.dialect.config import DialectConfig, DuckDBConfig
12
+
13
+
14
+ def default_factory(conf: DialectConfig, config_type):
15
+ from sqlalchemy import create_engine
16
+ from sqlalchemy.pool import NullPool
17
+
18
+ # the DuckDB IdentifierPreparer uses a global connection that is not thread safe
19
+ if isinstance(conf, DuckDBConfig):
20
+ # we monkey patch to parent to avoid this
21
+ from duckdb_engine import DuckDBIdentifierPreparer, PGIdentifierPreparer
22
+
23
+ DuckDBIdentifierPreparer.__init__ = PGIdentifierPreparer.__init__ # type: ignore
24
+ engine_args = {
25
+ "future": True,
26
+ "poolclass": NullPool,
27
+ }
28
+ if not isinstance(conf, config_type):
29
+ raise TypeError(
30
+ f"Invalid dialect configuration for type {type(config_type).__name__}, is {type(conf)}"
31
+ )
32
+ connect_args = conf.create_connect_args()
33
+ if connect_args:
34
+ engine_args["connect_args"] = connect_args
35
+ return create_engine(conf.connection_string(), **engine_args)
36
+
37
+
38
+ class Dialects(Enum):
39
+ BIGQUERY = "bigquery"
40
+ SQL_SERVER = "sql_server"
41
+ DUCK_DB = "duck_db"
42
+ PRESTO = "presto"
43
+ TRINO = "trino"
44
+ POSTGRES = "postgres"
45
+ SNOWFLAKE = "snowflake"
46
+ DATAFRAME = "dataframe"
47
+
48
+ @classmethod
49
+ def _missing_(cls, value):
50
+ if value == "duckdb":
51
+ return cls.DUCK_DB
52
+ return super()._missing_(value)
53
+
54
+ def default_renderer(self, conf=None, _engine_factory: Callable = default_factory):
55
+ from trilogy.render import get_dialect_generator
56
+
57
+ return get_dialect_generator(self)
58
+
59
+ def default_engine(self, conf=None, _engine_factory: Callable = default_factory):
60
+ if self == Dialects.BIGQUERY:
61
+ from google.auth import default
62
+ from google.cloud import bigquery
63
+
64
+ from trilogy.dialect.config import BigQueryConfig
65
+
66
+ credentials, project = default()
67
+ client = bigquery.Client(credentials=credentials, project=project)
68
+ conf = conf or BigQueryConfig(project=project, client=client)
69
+ return _engine_factory(
70
+ conf,
71
+ BigQueryConfig,
72
+ )
73
+ elif self == Dialects.SQL_SERVER:
74
+ raise NotImplementedError()
75
+ elif self == Dialects.DUCK_DB:
76
+ from trilogy.dialect.config import DuckDBConfig
77
+
78
+ if not conf:
79
+ conf = DuckDBConfig()
80
+ return _engine_factory(conf, DuckDBConfig)
81
+ elif self == Dialects.SNOWFLAKE:
82
+ from trilogy.dialect.config import SnowflakeConfig
83
+
84
+ return _engine_factory(conf, SnowflakeConfig)
85
+ elif self == Dialects.POSTGRES:
86
+ logger.warn(
87
+ "WARN: Using experimental postgres dialect. Most functionality will not work."
88
+ )
89
+ import importlib
90
+
91
+ spec = importlib.util.find_spec("psycopg2")
92
+ if spec is None:
93
+ raise ImportError(
94
+ "postgres driver not installed. python -m pip install pypreql[postgres]"
95
+ )
96
+ from trilogy.dialect.config import PostgresConfig
97
+
98
+ return _engine_factory(conf, PostgresConfig)
99
+ elif self == Dialects.PRESTO:
100
+ from trilogy.dialect.config import PrestoConfig
101
+
102
+ return _engine_factory(conf, PrestoConfig)
103
+ elif self == Dialects.TRINO:
104
+ from trilogy.dialect.config import TrinoConfig
105
+
106
+ return _engine_factory(conf, TrinoConfig)
107
+ elif self == Dialects.DATAFRAME:
108
+ from trilogy.dialect.config import DataFrameConfig
109
+ from trilogy.dialect.dataframe import DataframeConnectionWrapper
110
+
111
+ if not conf:
112
+ conf = DataFrameConfig(dataframes={})
113
+
114
+ base = _engine_factory(conf, DataFrameConfig)
115
+
116
+ return DataframeConnectionWrapper(base, dataframes=conf.dataframes)
117
+ else:
118
+ raise ValueError(
119
+ f"Unsupported dialect {self} for default engine creation; create one explicitly."
120
+ )
121
+
122
+ def default_executor(
123
+ self,
124
+ environment: Optional["Environment"] = None,
125
+ hooks: List["BaseHook"] | None = None,
126
+ conf: DialectConfig | None = None,
127
+ rendering: Rendering | None = None,
128
+ _engine_factory: Callable | None = None,
129
+ ) -> "Executor":
130
+ from trilogy import Executor
131
+
132
+ if _engine_factory is not None:
133
+ return Executor(
134
+ engine=self.default_engine(conf=conf, _engine_factory=_engine_factory),
135
+ environment=environment or Environment(),
136
+ dialect=self,
137
+ rendering=rendering,
138
+ hooks=hooks,
139
+ )
140
+
141
+ return Executor(
142
+ engine=self.default_engine(conf=conf),
143
+ environment=environment or Environment(),
144
+ dialect=self,
145
+ rendering=rendering,
146
+ hooks=hooks,
147
+ )
@@ -0,0 +1,173 @@
1
+ from typing import Optional
2
+
3
+ from trilogy.core.enums import DatasourceState, PublishAction
4
+ from trilogy.core.models.datasource import Datasource
5
+ from trilogy.core.models.environment import Environment
6
+ from trilogy.core.statements.author import (
7
+ ConceptDeclarationStatement,
8
+ ImportStatement,
9
+ MergeStatementV2,
10
+ )
11
+ from trilogy.core.statements.execute import (
12
+ ProcessedPublishStatement,
13
+ ProcessedShowStatement,
14
+ ProcessedStaticValueOutput,
15
+ ProcessedValidateStatement,
16
+ )
17
+ from trilogy.core.validation.common import ValidationTest
18
+ from trilogy.dialect.base import BaseDialect
19
+ from trilogy.dialect.results import MockResult, generate_result_set
20
+
21
+
22
+ def handle_concept_declaration(query: ConceptDeclarationStatement) -> MockResult:
23
+ """Handle concept declaration statements without execution."""
24
+ concept = query.concept
25
+ return MockResult(
26
+ [
27
+ {
28
+ "address": concept.address,
29
+ "type": concept.datatype.value,
30
+ "purpose": concept.purpose.value,
31
+ "derivation": concept.derivation.value,
32
+ }
33
+ ],
34
+ ["address", "type", "purpose", "derivation"],
35
+ )
36
+
37
+
38
+ def handle_datasource(query: Datasource) -> MockResult:
39
+ """Handle datasource queries without execution."""
40
+ return MockResult(
41
+ [
42
+ {
43
+ "name": query.name,
44
+ }
45
+ ],
46
+ ["name"],
47
+ )
48
+
49
+
50
+ def handle_import_statement(query: ImportStatement) -> MockResult:
51
+ """Handle import statements without execution."""
52
+ return MockResult(
53
+ [
54
+ {
55
+ "path": query.path,
56
+ "alias": query.alias,
57
+ }
58
+ ],
59
+ ["path", "alias"],
60
+ )
61
+
62
+
63
+ def handle_publish_statement(
64
+ query: ProcessedPublishStatement, environment: Environment
65
+ ) -> MockResult:
66
+ """Handle publish statements by updating environment and returning result."""
67
+ for x in query.targets:
68
+ datasource = environment.datasources.get(x)
69
+ if not datasource:
70
+ raise ValueError(f"Datasource {x} not found in environment")
71
+ if query.action == PublishAction.UNPUBLISH:
72
+ datasource.status = DatasourceState.UNPUBLISHED
73
+ else:
74
+ datasource.status = DatasourceState.PUBLISHED
75
+
76
+ return MockResult(
77
+ [{"published": target} for target in query.targets],
78
+ [
79
+ "published",
80
+ ],
81
+ )
82
+
83
+
84
+ def handle_merge_statement(
85
+ query: MergeStatementV2, environment: Environment
86
+ ) -> MockResult:
87
+ """Handle merge statements by updating environment and returning result."""
88
+ for concept in query.sources:
89
+ environment.merge_concept(
90
+ concept, query.targets[concept.address], modifiers=query.modifiers
91
+ )
92
+
93
+ return MockResult(
94
+ [
95
+ {
96
+ "sources": ",".join([x.address for x in query.sources]),
97
+ "targets": ",".join([x.address for _, x in query.targets.items()]),
98
+ }
99
+ ],
100
+ ["source", "target"],
101
+ )
102
+
103
+
104
+ def handle_processed_show_statement(
105
+ query: ProcessedShowStatement, compiled_statements: list[str]
106
+ ) -> MockResult:
107
+ """Handle processed show statements without execution."""
108
+
109
+ return generate_result_set(query.output_columns, compiled_statements)
110
+
111
+
112
+ def raw_validation_to_result(
113
+ raw: list[ValidationTest], generator: Optional[BaseDialect] = None
114
+ ) -> Optional[MockResult]:
115
+ """Convert raw validation tests to mock result."""
116
+ if not raw:
117
+ return MockResult([], ["check_type", "expected", "result", "ran", "query"])
118
+ output = []
119
+ for row in raw:
120
+ if row.raw_query and generator and not row.generated_query:
121
+ try:
122
+ row.generated_query = generator.compile_statement(row.raw_query)
123
+ except Exception as e:
124
+ row.generated_query = f"Error generating query: {e}"
125
+ output.append(
126
+ {
127
+ "check_type": row.check_type.value,
128
+ "expected": row.expected,
129
+ "result": str(row.result) if row.result else None,
130
+ "ran": row.ran,
131
+ "query": row.generated_query if row.generated_query else "",
132
+ }
133
+ )
134
+ return MockResult(output, ["check_type", "expected", "result", "ran", "query"])
135
+
136
+
137
+ def handle_processed_validate_statement(
138
+ query: ProcessedValidateStatement, dialect: BaseDialect, validate_environment_func
139
+ ) -> Optional[MockResult]:
140
+ """Handle processed validate statements."""
141
+ results = validate_environment_func(query.scope, query.targets)
142
+ return raw_validation_to_result(results, dialect)
143
+
144
+
145
+ def handle_show_statement_outputs(
146
+ statement: ProcessedShowStatement,
147
+ compiled_statements: list[str],
148
+ environment: Environment,
149
+ dialect: BaseDialect,
150
+ ) -> list[MockResult]:
151
+ """Handle show statement outputs without execution."""
152
+ output = []
153
+ for x in statement.output_values:
154
+ if isinstance(x, ProcessedStaticValueOutput):
155
+ output.append(generate_result_set(statement.output_columns, x.values))
156
+ elif compiled_statements:
157
+
158
+ output.append(
159
+ generate_result_set(
160
+ statement.output_columns,
161
+ compiled_statements,
162
+ )
163
+ )
164
+ elif isinstance(x, ProcessedValidateStatement):
165
+ from trilogy.core.validation.environment import validate_environment
166
+
167
+ raw = validate_environment(environment, x.scope, x.targets)
168
+ results = raw_validation_to_result(raw, dialect)
169
+ if results:
170
+ output.append(results)
171
+ else:
172
+ raise NotImplementedError(f"Cannot show type {type(x)} in show statement")
173
+ return output
@@ -0,0 +1,190 @@
1
+ import random
2
+ from datetime import date, datetime
3
+ from typing import TYPE_CHECKING, Any, Iterable
4
+
5
+ from trilogy.core.enums import Purpose
6
+ from trilogy.core.models.author import Concept, ConceptRef
7
+ from trilogy.core.models.core import CONCRETE_TYPES, ArrayType, DataType, TraitDataType
8
+ from trilogy.core.models.datasource import Address, Datasource
9
+ from trilogy.core.models.environment import Environment
10
+ from trilogy.core.statements.execute import ProcessedMockStatement
11
+ from trilogy.dialect.results import MockResult
12
+
13
+ if TYPE_CHECKING:
14
+ from pyarrow import Table
15
+
16
+ DEFAULT_SCALE_FACTOR = 100
17
+
18
+
19
+ def safe_name(name: str) -> str:
20
+ return "".join(c if c.isalnum() or c == "_" else "_" for c in name)
21
+
22
+
23
+ def mock_email(scale_factor: int, is_key: bool = False) -> list[str]:
24
+ providers = ["example.com", "test.com", "mock.com", "sample.org"]
25
+ if is_key:
26
+ return [
27
+ f"user{i}@{providers[i % len(providers)]}"
28
+ for i in range(1, scale_factor + 1)
29
+ ]
30
+ return [
31
+ f"user{random.randint(1, 999999)}@{random.choice(providers)}"
32
+ for _ in range(scale_factor)
33
+ ]
34
+
35
+
36
+ def mock_hex_code(scale_factor: int, is_key: bool = False) -> list[str]:
37
+ if is_key:
38
+ return [f"#{i:06x}" for i in range(1, scale_factor + 1)]
39
+ return [f"#{random.randint(0, 0xFFFFFF):06x}" for _ in range(scale_factor)]
40
+
41
+
42
+ def mock_datatype(
43
+ full_type: Any, datatype: CONCRETE_TYPES, scale_factor: int, is_key: bool = False
44
+ ) -> list[Any]:
45
+ if isinstance(full_type, TraitDataType):
46
+ if full_type.type == DataType.STRING:
47
+ # TODO: get stdlib inventory some other way?
48
+ if full_type.traits == ["email_address"]:
49
+ # email mock function
50
+ return mock_email(scale_factor, is_key)
51
+ elif full_type.traits == ["hex"]:
52
+ return mock_hex_code(scale_factor, is_key)
53
+ return mock_datatype(full_type.type, full_type.type, scale_factor, is_key)
54
+ elif datatype == DataType.INTEGER:
55
+ if is_key:
56
+ # unique integers for keys
57
+ return list(range(1, scale_factor + 1))
58
+ return [random.randint(0, 999_999) for _ in range(scale_factor)]
59
+ elif datatype == DataType.STRING:
60
+ if is_key:
61
+ # unique strings for keys
62
+ return [f"key_{i}" for i in range(1, scale_factor + 1)]
63
+ return [
64
+ f"mock_string_{random.randint(0, 999_999)}" for _ in range(scale_factor)
65
+ ]
66
+ elif datatype == DataType.FLOAT:
67
+ if is_key:
68
+ # unique floats for keys
69
+ return [float(i) for i in range(1, scale_factor + 1)]
70
+ return [random.uniform(0, 999_999) for _ in range(scale_factor)]
71
+ elif datatype == DataType.NUMERIC:
72
+ if is_key:
73
+ # unique numerics for keys
74
+ return [float(i) for i in range(1, scale_factor + 1)]
75
+ return [round(random.uniform(0, 999_999), 2) for _ in range(scale_factor)]
76
+ elif datatype == DataType.BOOL:
77
+ # booleans can only have 2 unique values, so keys don't make sense here
78
+ return [random.choice([True, False]) for _ in range(scale_factor)]
79
+ elif datatype == DataType.DATE:
80
+ if is_key:
81
+ # unique dates for keys - spread across multiple months/years if needed
82
+ base_date = date(2023, 1, 1)
83
+ return [
84
+ date.fromordinal(base_date.toordinal() + i) for i in range(scale_factor)
85
+ ]
86
+ return [date(2023, 1, random.randint(1, 28)) for _ in range(scale_factor)]
87
+ elif datatype in (DataType.DATETIME, DataType.TIMESTAMP):
88
+ if is_key:
89
+ # unique datetimes for keys - increment by seconds
90
+ base_dt = datetime(2023, 1, 1, 0, 0, 0)
91
+ return [
92
+ datetime.fromtimestamp(base_dt.timestamp() + i)
93
+ for i in range(scale_factor)
94
+ ]
95
+ return [
96
+ datetime(
97
+ 2023,
98
+ 1,
99
+ 1,
100
+ random.randint(0, 23),
101
+ random.randint(0, 59),
102
+ random.randint(0, 59),
103
+ )
104
+ for _ in range(scale_factor)
105
+ ]
106
+ elif isinstance(datatype, ArrayType):
107
+ # arrays as keys don't typically make sense, but generate unique if requested
108
+ if is_key:
109
+ return [
110
+ [mock_datatype(datatype.type, datatype.value_data_type, 5, False)[0], i]
111
+ for i in range(scale_factor)
112
+ ]
113
+ return [
114
+ [mock_datatype(datatype.type, datatype.value_data_type, 5, False)]
115
+ for _ in range(scale_factor)
116
+ ]
117
+ raise NotImplementedError(f"Mocking not implemented for datatype {datatype}")
118
+
119
+
120
+ class MockManager:
121
+
122
+ def __init__(
123
+ self, environment: Environment, scale_factor: int = DEFAULT_SCALE_FACTOR
124
+ ):
125
+ self.environment = environment
126
+ self.concept_mocks: dict[str, Any] = {}
127
+ self.scale_factor = scale_factor
128
+
129
+ def mock_concept(self, concept: Concept | ConceptRef):
130
+ if concept.address in self.concept_mocks:
131
+ return False
132
+ concrete = self.environment.concepts[concept.address]
133
+ self.concept_mocks[concept.address] = mock_datatype(
134
+ concept.datatype,
135
+ concept.output_datatype,
136
+ self.scale_factor,
137
+ True if concrete.purpose == Purpose.KEY else False,
138
+ )
139
+ return True
140
+
141
+ def create_mock_table(
142
+ self, concepts: Iterable[Concept | ConceptRef], headers: list[str]
143
+ ) -> "Table":
144
+ from pyarrow import table
145
+
146
+ data = {h: self.concept_mocks[c.address] for h, c in zip(headers, concepts)}
147
+ return table(data)
148
+
149
+
150
+ def handle_processed_mock_statement(
151
+ query: ProcessedMockStatement, environment: Environment, executor
152
+ ) -> MockResult:
153
+ """Handle processed mock statements."""
154
+ # For mock statements, we can simulate some output based on targets
155
+ mock_manager = MockManager(environment)
156
+ output = []
157
+ for target in query.targets:
158
+ datasource = environment.datasources.get(target)
159
+ if not datasource:
160
+ raise ValueError(f"Datasource {target} not found in environment")
161
+ mock_datasource(datasource, mock_manager, executor)
162
+ output.append(
163
+ {
164
+ "target": target,
165
+ "status": "mocked",
166
+ }
167
+ )
168
+ return MockResult(output, ["target", "status"])
169
+
170
+
171
+ def mock_datasource(datasource: Datasource, manager: MockManager, executor):
172
+ concrete: list[ConceptRef] = []
173
+ headers: list[str] = []
174
+ for k, col in datasource.concrete_columns.items():
175
+ manager.mock_concept(col.concept)
176
+ concrete.append(col.concept)
177
+ headers.append(k)
178
+
179
+ table = manager.create_mock_table(concrete, headers)
180
+
181
+ # duckdb load the pyarrow table
182
+ executor.execute_raw_sql(
183
+ "register(:name, :tbl)", {"name": "mock_tbl", "tbl": table}
184
+ )
185
+ address = safe_name(datasource.safe_address)
186
+ executor.execute_raw_sql(
187
+ f"""CREATE OR REPLACE TABLE {address} AS SELECT * FROM mock_tbl"""
188
+ )
189
+ # overwrite the address since we've mangled the name
190
+ datasource.address = Address(location=address)