pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
- pytrilogy-0.3.138.dist-info/METADATA +525 -0
- pytrilogy-0.3.138.dist-info/RECORD +182 -0
- pytrilogy-0.3.138.dist-info/WHEEL +5 -0
- pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +9 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +87 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +143 -0
- trilogy/constants.py +113 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +443 -0
- trilogy/core/env_processor.py +120 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1227 -0
- trilogy/core/graph_models.py +139 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2672 -0
- trilogy/core/models/build.py +2521 -0
- trilogy/core/models/build_environment.py +180 -0
- trilogy/core/models/core.py +494 -0
- trilogy/core/models/datasource.py +322 -0
- trilogy/core/models/environment.py +748 -0
- trilogy/core/models/execute.py +1177 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +517 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +268 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +205 -0
- trilogy/core/processing/node_generators/node_merge_node.py +653 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +748 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +519 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +596 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +106 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1359 -0
- trilogy/dialect/bigquery.py +256 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +144 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +177 -0
- trilogy/dialect/enums.py +147 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +91 -0
- trilogy/dialect/presto.py +104 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +90 -0
- trilogy/dialect/sql_server.py +92 -0
- trilogy/engine.py +48 -0
- trilogy/execution/config.py +75 -0
- trilogy/executor.py +568 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +139 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2813 -0
- trilogy/parsing/render.py +750 -0
- trilogy/parsing/trilogy.lark +540 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +42 -0
- trilogy/scripts/README.md +7 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +289 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +460 -0
- trilogy/scripts/environment.py +46 -0
- trilogy/scripts/parallel_execution.py +483 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/trilogy.py +772 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
trilogy/core/README.md
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Implicit Filtering Design
|
|
5
|
+
|
|
6
|
+
An implicit filter is any case where the where clause of a statement references more fields than are in the output of a select.
|
|
7
|
+
|
|
8
|
+
In those cases, those concepts are "promoted" up into a virtual CTE before the final output is returned.
|
|
9
|
+
|
|
10
|
+
THe `wrinkle` is aggregates - aggregates must have the implicit filter pushed "inside" the aggregate via the select_context
|
|
11
|
+
to ensure that they aggregate appropriately. Applying the condition after creating the aggregate would result in an incorrect
|
|
12
|
+
aggregation result.
|
|
13
|
+
|
|
14
|
+
### Case 1 - Basic Scalar Filtering
|
|
15
|
+
|
|
16
|
+
SELECT
|
|
17
|
+
abc,
|
|
18
|
+
def
|
|
19
|
+
where
|
|
20
|
+
gde = 1
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
We should source
|
|
24
|
+
ABC,
|
|
25
|
+
DEF,
|
|
26
|
+
GDE
|
|
27
|
+
|
|
28
|
+
filter on GDE = 1;
|
|
29
|
+
|
|
30
|
+
return results with optional grouping.
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
### Case 2 - Aggregate Filtering
|
|
34
|
+
|
|
35
|
+
SELECT
|
|
36
|
+
sum(abc)
|
|
37
|
+
WHERE
|
|
38
|
+
gde = 1
|
|
39
|
+
|
|
40
|
+
We should source
|
|
41
|
+
|
|
42
|
+
abc | gde =1
|
|
43
|
+
|
|
44
|
+
;
|
|
45
|
+
|
|
46
|
+
### Case 3 - Mixed filter
|
|
47
|
+
|
|
48
|
+
SELECT
|
|
49
|
+
sum(abc) by def,
|
|
50
|
+
def,
|
|
51
|
+
gde
|
|
52
|
+
|
trilogy/core/__init__.py
ADDED
|
File without changes
|
trilogy/core/enums.py
ADDED
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
InfiniteFunctionArgs = -1
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DatasourceState(Enum):
|
|
7
|
+
PUBLISHED = "published"
|
|
8
|
+
UNPUBLISHED = "unpublished"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CreateMode(Enum):
|
|
12
|
+
CREATE = "create"
|
|
13
|
+
CREATE_IF_NOT_EXISTS = "create_if_not_exists"
|
|
14
|
+
CREATE_OR_REPLACE = "create_or_replace"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class UnnestMode(Enum):
|
|
18
|
+
DIRECT = "direct"
|
|
19
|
+
CROSS_APPLY = "cross_apply"
|
|
20
|
+
CROSS_JOIN = "cross_join"
|
|
21
|
+
CROSS_JOIN_UNNEST = "cross_join_unnest"
|
|
22
|
+
CROSS_JOIN_ALIAS = "cross_join_alias"
|
|
23
|
+
PRESTO = "presto"
|
|
24
|
+
SNOWFLAKE = "snowflake"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class GroupMode(Enum):
|
|
28
|
+
AUTO = "auto"
|
|
29
|
+
BY_INDEX = "by_index"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ConceptSource(Enum):
|
|
33
|
+
MANUAL = "manual"
|
|
34
|
+
CTE = "cte"
|
|
35
|
+
SELECT = "select"
|
|
36
|
+
PERSIST_STATEMENT = "persist_statement"
|
|
37
|
+
AUTO_DERIVED = "auto_derived"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class StatementType(Enum):
|
|
41
|
+
QUERY = "query"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Purpose(Enum):
|
|
45
|
+
CONSTANT = "const"
|
|
46
|
+
KEY = "key"
|
|
47
|
+
PROPERTY = "property"
|
|
48
|
+
UNIQUE_PROPERTY = "unique_property"
|
|
49
|
+
METRIC = "metric"
|
|
50
|
+
ROWSET = "rowset"
|
|
51
|
+
AUTO = "auto"
|
|
52
|
+
PARAMETER = "parameter"
|
|
53
|
+
UNKNOWN = "unknown"
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def _missing_(cls, value):
|
|
57
|
+
if value.lower() == "constant":
|
|
58
|
+
return Purpose.CONSTANT
|
|
59
|
+
if value.lower() == "param":
|
|
60
|
+
return Purpose.PARAMETER
|
|
61
|
+
return super()._missing_(value)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Derivation(Enum):
|
|
65
|
+
BASIC = "basic"
|
|
66
|
+
GROUP_TO = "group_to"
|
|
67
|
+
WINDOW = "window"
|
|
68
|
+
AGGREGATE = "aggregate"
|
|
69
|
+
FILTER = "filter"
|
|
70
|
+
CONSTANT = "constant"
|
|
71
|
+
UNNEST = "unnest"
|
|
72
|
+
UNION = "union"
|
|
73
|
+
ROOT = "root"
|
|
74
|
+
ROWSET = "rowset"
|
|
75
|
+
MULTISELECT = "multiselect"
|
|
76
|
+
RECURSIVE = "recursive"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class Granularity(Enum):
|
|
80
|
+
SINGLE_ROW = "single_row"
|
|
81
|
+
MULTI_ROW = "multi_row"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class PersistMode(Enum):
|
|
85
|
+
OVERWRITE = "overwrite"
|
|
86
|
+
APPEND = "append"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class PublishAction(Enum):
|
|
90
|
+
PUBLISH = "publish"
|
|
91
|
+
UNPUBLISH = "unpublish"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class Modifier(Enum):
|
|
95
|
+
PARTIAL = "Partial"
|
|
96
|
+
OPTIONAL = "Optional"
|
|
97
|
+
HIDDEN = "Hidden"
|
|
98
|
+
NULLABLE = "Nullable"
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def _missing_(cls, value):
|
|
102
|
+
strval = str(value)
|
|
103
|
+
if strval == "~":
|
|
104
|
+
return Modifier.PARTIAL
|
|
105
|
+
elif strval == "?":
|
|
106
|
+
return Modifier.NULLABLE
|
|
107
|
+
return super()._missing_(value=strval.capitalize())
|
|
108
|
+
|
|
109
|
+
def __lt__(self, other):
|
|
110
|
+
order = [
|
|
111
|
+
Modifier.HIDDEN,
|
|
112
|
+
Modifier.PARTIAL,
|
|
113
|
+
Modifier.NULLABLE,
|
|
114
|
+
Modifier.OPTIONAL,
|
|
115
|
+
]
|
|
116
|
+
return order.index(self) < order.index(other)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class JoinType(Enum):
|
|
120
|
+
INNER = "inner"
|
|
121
|
+
LEFT_OUTER = "left outer"
|
|
122
|
+
FULL = "full"
|
|
123
|
+
RIGHT_OUTER = "right outer"
|
|
124
|
+
CROSS = "cross"
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class Ordering(Enum):
|
|
128
|
+
ASCENDING = "asc"
|
|
129
|
+
DESCENDING = "desc"
|
|
130
|
+
ASC_NULLS_AUTO = "asc nulls auto"
|
|
131
|
+
ASC_NULLS_FIRST = "asc nulls first"
|
|
132
|
+
ASC_NULLS_LAST = "asc nulls last"
|
|
133
|
+
DESC_NULLS_FIRST = "desc nulls first"
|
|
134
|
+
DESC_NULLS_LAST = "desc nulls last"
|
|
135
|
+
DESC_NULLS_AUTO = "desc nulls auto"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class WindowType(Enum):
|
|
139
|
+
ROW_NUMBER = "row_number"
|
|
140
|
+
RANK = "rank"
|
|
141
|
+
LAG = "lag"
|
|
142
|
+
LEAD = "lead"
|
|
143
|
+
SUM = "sum"
|
|
144
|
+
MAX = "max"
|
|
145
|
+
MIN = "min"
|
|
146
|
+
AVG = "avg"
|
|
147
|
+
COUNT = "count"
|
|
148
|
+
COUNT_DISTINCT = "count_distinct"
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class WindowOrder(Enum):
|
|
152
|
+
ASCENDING = "top"
|
|
153
|
+
DESCENDING = "bottom"
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class FunctionType(Enum):
|
|
157
|
+
# custom
|
|
158
|
+
CUSTOM = "custom"
|
|
159
|
+
|
|
160
|
+
# structural
|
|
161
|
+
UNNEST = "unnest"
|
|
162
|
+
RECURSE_EDGE = "recurse_edge"
|
|
163
|
+
|
|
164
|
+
UNION = "union"
|
|
165
|
+
|
|
166
|
+
ALIAS = "alias"
|
|
167
|
+
|
|
168
|
+
PARENTHETICAL = "parenthetical"
|
|
169
|
+
|
|
170
|
+
# Generic
|
|
171
|
+
CASE = "case"
|
|
172
|
+
CAST = "cast"
|
|
173
|
+
CONCAT = "concat"
|
|
174
|
+
CONSTANT = "constant"
|
|
175
|
+
TYPED_CONSTANT = "typed_constant"
|
|
176
|
+
COALESCE = "coalesce"
|
|
177
|
+
IS_NULL = "isnull"
|
|
178
|
+
NULLIF = "nullif"
|
|
179
|
+
BOOL = "bool"
|
|
180
|
+
|
|
181
|
+
# COMPLEX
|
|
182
|
+
INDEX_ACCESS = "index_access"
|
|
183
|
+
MAP_ACCESS = "map_access"
|
|
184
|
+
ATTR_ACCESS = "attr_access"
|
|
185
|
+
STRUCT = "struct"
|
|
186
|
+
ARRAY = "array"
|
|
187
|
+
DATE_LITERAL = "date_literal"
|
|
188
|
+
DATETIME_LITERAL = "datetime_literal"
|
|
189
|
+
|
|
190
|
+
# ARRAY
|
|
191
|
+
ARRAY_DISTINCT = "array_distinct"
|
|
192
|
+
ARRAY_SUM = "array_sum"
|
|
193
|
+
ARRAY_SORT = "array_sort"
|
|
194
|
+
ARRAY_TRANSFORM = "array_transform"
|
|
195
|
+
ARRAY_TO_STRING = "array_to_string"
|
|
196
|
+
ARRAY_FILTER = "array_filter"
|
|
197
|
+
GENERATE_ARRAY = "generate_array"
|
|
198
|
+
|
|
199
|
+
# MAP
|
|
200
|
+
MAP_KEYS = "map_keys"
|
|
201
|
+
MAP_VALUES = "map_values"
|
|
202
|
+
|
|
203
|
+
# TEXT AND MAYBE MORE
|
|
204
|
+
SPLIT = "split"
|
|
205
|
+
LENGTH = "len"
|
|
206
|
+
|
|
207
|
+
# Maths
|
|
208
|
+
DIVIDE = "divide"
|
|
209
|
+
MULTIPLY = "multiply"
|
|
210
|
+
ADD = "add"
|
|
211
|
+
SUBTRACT = "subtract"
|
|
212
|
+
MOD = "mod"
|
|
213
|
+
ROUND = "round"
|
|
214
|
+
ABS = "abs"
|
|
215
|
+
SQRT = "sqrt"
|
|
216
|
+
RANDOM = "random"
|
|
217
|
+
FLOOR = "floor"
|
|
218
|
+
CEIL = "ceil"
|
|
219
|
+
LOG = "log"
|
|
220
|
+
POWER = "power"
|
|
221
|
+
|
|
222
|
+
# Aggregates
|
|
223
|
+
## group is not a real aggregate - it just means group by this + some other set of fields
|
|
224
|
+
## but is here as syntax is identical
|
|
225
|
+
GROUP = "group"
|
|
226
|
+
|
|
227
|
+
COUNT = "count"
|
|
228
|
+
COUNT_DISTINCT = "count_distinct"
|
|
229
|
+
SUM = "sum"
|
|
230
|
+
MAX = "max"
|
|
231
|
+
MIN = "min"
|
|
232
|
+
AVG = "avg"
|
|
233
|
+
ARRAY_AGG = "array_agg"
|
|
234
|
+
BOOL_OR = "bool_or"
|
|
235
|
+
BOOL_AND = "bool_and"
|
|
236
|
+
ANY = "any"
|
|
237
|
+
|
|
238
|
+
# String
|
|
239
|
+
LIKE = "like"
|
|
240
|
+
ILIKE = "ilike"
|
|
241
|
+
LOWER = "lower"
|
|
242
|
+
UPPER = "upper"
|
|
243
|
+
SUBSTRING = "substring"
|
|
244
|
+
STRPOS = "strpos"
|
|
245
|
+
CONTAINS = "contains"
|
|
246
|
+
TRIM = "trim"
|
|
247
|
+
REPLACE = "replace"
|
|
248
|
+
HASH = "hash"
|
|
249
|
+
|
|
250
|
+
# STRING REGEX
|
|
251
|
+
REGEXP_CONTAINS = "regexp_contains"
|
|
252
|
+
REGEXP_EXTRACT = "regexp_extract"
|
|
253
|
+
REGEXP_REPLACE = "regexp_replace"
|
|
254
|
+
|
|
255
|
+
# Dates
|
|
256
|
+
DATE = "date"
|
|
257
|
+
DATETIME = "datetime"
|
|
258
|
+
TIMESTAMP = "timestamp"
|
|
259
|
+
|
|
260
|
+
# time
|
|
261
|
+
SECOND = "second"
|
|
262
|
+
MINUTE = "minute"
|
|
263
|
+
HOUR = "hour"
|
|
264
|
+
DAY = "day"
|
|
265
|
+
DAY_OF_WEEK = "day_of_week"
|
|
266
|
+
WEEK = "week"
|
|
267
|
+
MONTH = "month"
|
|
268
|
+
QUARTER = "quarter"
|
|
269
|
+
YEAR = "year"
|
|
270
|
+
MONTH_NAME = "month_name"
|
|
271
|
+
DAY_NAME = "day_name"
|
|
272
|
+
|
|
273
|
+
DATE_PART = "date_part"
|
|
274
|
+
DATE_TRUNCATE = "date_truncate"
|
|
275
|
+
DATE_ADD = "date_add"
|
|
276
|
+
DATE_SUB = "date_sub"
|
|
277
|
+
DATE_DIFF = "date_diff"
|
|
278
|
+
DATE_SPINE = "date_spine"
|
|
279
|
+
|
|
280
|
+
# Geography
|
|
281
|
+
GEO_POINT = "geo_point"
|
|
282
|
+
GEO_DISTANCE = "geo_distance"
|
|
283
|
+
|
|
284
|
+
# UNIX
|
|
285
|
+
UNIX_TO_TIMESTAMP = "unix_to_timestamp"
|
|
286
|
+
|
|
287
|
+
# CONSTANTS
|
|
288
|
+
CURRENT_DATE = "current_date"
|
|
289
|
+
CURRENT_DATETIME = "current_datetime"
|
|
290
|
+
CURRENT_TIMESTAMP = "current_timestamp"
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class FunctionClass(Enum):
|
|
294
|
+
AGGREGATE_FUNCTIONS = [
|
|
295
|
+
FunctionType.MAX,
|
|
296
|
+
FunctionType.MIN,
|
|
297
|
+
FunctionType.SUM,
|
|
298
|
+
FunctionType.AVG,
|
|
299
|
+
FunctionType.ARRAY_AGG,
|
|
300
|
+
FunctionType.COUNT,
|
|
301
|
+
FunctionType.COUNT_DISTINCT,
|
|
302
|
+
FunctionType.ANY,
|
|
303
|
+
FunctionType.BOOL_OR,
|
|
304
|
+
FunctionType.BOOL_AND,
|
|
305
|
+
]
|
|
306
|
+
SINGLE_ROW = [
|
|
307
|
+
FunctionType.CONSTANT,
|
|
308
|
+
FunctionType.CURRENT_DATE,
|
|
309
|
+
FunctionType.CURRENT_DATETIME,
|
|
310
|
+
]
|
|
311
|
+
|
|
312
|
+
ONE_TO_MANY = [FunctionType.UNNEST, FunctionType.DATE_SPINE]
|
|
313
|
+
|
|
314
|
+
RECURSIVE = [FunctionType.RECURSE_EDGE]
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
class Boolean(Enum):
|
|
318
|
+
TRUE = "true"
|
|
319
|
+
FALSE = "false"
|
|
320
|
+
|
|
321
|
+
@classmethod
|
|
322
|
+
def _missing_(cls, value):
|
|
323
|
+
if value is True:
|
|
324
|
+
return Boolean.TRUE
|
|
325
|
+
elif value is False:
|
|
326
|
+
return Boolean.FALSE
|
|
327
|
+
strval = str(value)
|
|
328
|
+
if strval.lower() != strval:
|
|
329
|
+
return Boolean(strval.lower())
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
class BooleanOperator(Enum):
|
|
333
|
+
AND = "and"
|
|
334
|
+
OR = "or"
|
|
335
|
+
|
|
336
|
+
@classmethod
|
|
337
|
+
def _missing_(cls, value):
|
|
338
|
+
strval = str(value)
|
|
339
|
+
if strval.lower() != strval:
|
|
340
|
+
return BooleanOperator(strval.lower())
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
class ComparisonOperator(Enum):
|
|
345
|
+
LT = "<"
|
|
346
|
+
GT = ">"
|
|
347
|
+
EQ = "="
|
|
348
|
+
IS = "is"
|
|
349
|
+
IS_NOT = "is not"
|
|
350
|
+
GTE = ">="
|
|
351
|
+
LTE = "<="
|
|
352
|
+
NE = "!="
|
|
353
|
+
IN = "in"
|
|
354
|
+
NOT_IN = "not in"
|
|
355
|
+
# TODO: deprecate for contains?
|
|
356
|
+
LIKE = "like"
|
|
357
|
+
ILIKE = "ilike"
|
|
358
|
+
CONTAINS = "contains"
|
|
359
|
+
ELSE = "else"
|
|
360
|
+
|
|
361
|
+
def __eq__(self, other):
|
|
362
|
+
if isinstance(other, str):
|
|
363
|
+
return self.value == other
|
|
364
|
+
if not isinstance(other, ComparisonOperator):
|
|
365
|
+
return False
|
|
366
|
+
return self.value == other.value
|
|
367
|
+
|
|
368
|
+
@classmethod
|
|
369
|
+
def _missing_(cls, value):
|
|
370
|
+
if not isinstance(value, list) and " " in str(value):
|
|
371
|
+
value = str(value).split()
|
|
372
|
+
if isinstance(value, list):
|
|
373
|
+
processed = [str(v).lower() for v in value]
|
|
374
|
+
if processed == ["not", "in"]:
|
|
375
|
+
return ComparisonOperator.NOT_IN
|
|
376
|
+
if processed == ["is", "not"]:
|
|
377
|
+
return ComparisonOperator.IS_NOT
|
|
378
|
+
if value == ["in"]:
|
|
379
|
+
return ComparisonOperator.IN
|
|
380
|
+
if str(value).lower() != str(value):
|
|
381
|
+
return ComparisonOperator(str(value).lower())
|
|
382
|
+
return super()._missing_(str(value).lower())
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
class DatePart(Enum):
|
|
386
|
+
MONTH = "month"
|
|
387
|
+
YEAR = "year"
|
|
388
|
+
WEEK = "week"
|
|
389
|
+
DAY = "day"
|
|
390
|
+
QUARTER = "quarter"
|
|
391
|
+
HOUR = "hour"
|
|
392
|
+
MINUTE = "minute"
|
|
393
|
+
SECOND = "second"
|
|
394
|
+
DAY_OF_WEEK = "day_of_week"
|
|
395
|
+
|
|
396
|
+
@classmethod
|
|
397
|
+
def _missing_(cls, value):
|
|
398
|
+
if isinstance(value, str) and value.lower() != value:
|
|
399
|
+
return DatePart(value.lower())
|
|
400
|
+
return super()._missing_(value)
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
class SourceType(Enum):
|
|
404
|
+
FILTER = "filter"
|
|
405
|
+
SELECT = "select"
|
|
406
|
+
ABSTRACT = "abstract"
|
|
407
|
+
DIRECT_SELECT = "direct_select"
|
|
408
|
+
GROUP = "group"
|
|
409
|
+
WINDOW = "window"
|
|
410
|
+
UNNEST = "unnest"
|
|
411
|
+
CONSTANT = "constant"
|
|
412
|
+
ROWSET = "rowset"
|
|
413
|
+
MERGE = "merge"
|
|
414
|
+
BASIC = "basic"
|
|
415
|
+
UNION = "union"
|
|
416
|
+
RECURSIVE = "recursive"
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
class ShowCategory(Enum):
|
|
420
|
+
DATASOURCES = "datasources"
|
|
421
|
+
CONCEPTS = "concepts"
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
class IOType(Enum):
|
|
425
|
+
CSV = "csv"
|
|
426
|
+
|
|
427
|
+
@classmethod
|
|
428
|
+
def _missing_(cls, value):
|
|
429
|
+
if isinstance(value, str) and value.lower() != value:
|
|
430
|
+
return IOType(value.lower())
|
|
431
|
+
return super()._missing_(value)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
class ValidationScope(Enum):
|
|
435
|
+
ALL = "all"
|
|
436
|
+
CONCEPTS = "concepts"
|
|
437
|
+
DATASOURCES = "datasources"
|
|
438
|
+
|
|
439
|
+
@classmethod
|
|
440
|
+
def _missing_(cls, value):
|
|
441
|
+
if isinstance(value, str) and value.lower() != value:
|
|
442
|
+
return ValidationScope(value.lower())
|
|
443
|
+
return super()._missing_(value)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from trilogy.core.graph_models import (
|
|
2
|
+
ReferenceGraph,
|
|
3
|
+
concept_to_node,
|
|
4
|
+
datasource_to_node,
|
|
5
|
+
)
|
|
6
|
+
from trilogy.core.models.build import BuildConcept, BuildDatasource
|
|
7
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def add_concept(
|
|
11
|
+
concept: BuildConcept,
|
|
12
|
+
g: ReferenceGraph,
|
|
13
|
+
concept_mapping: dict[str, BuildConcept],
|
|
14
|
+
default_concept_graph: dict[str, BuildConcept],
|
|
15
|
+
seen: set[str],
|
|
16
|
+
):
|
|
17
|
+
|
|
18
|
+
# if we have sources, recursively add them
|
|
19
|
+
node_name = concept_to_node(concept)
|
|
20
|
+
if node_name in seen:
|
|
21
|
+
return
|
|
22
|
+
seen.add(node_name)
|
|
23
|
+
g.concepts[node_name] = concept
|
|
24
|
+
g.add_node(node_name)
|
|
25
|
+
if concept.concept_arguments:
|
|
26
|
+
for source in concept.concept_arguments:
|
|
27
|
+
if not isinstance(source, BuildConcept):
|
|
28
|
+
raise ValueError(
|
|
29
|
+
f"Invalid non-build concept {source} passed into graph generation from {concept}"
|
|
30
|
+
)
|
|
31
|
+
generic = get_default_grain_concept(source, default_concept_graph)
|
|
32
|
+
generic_node = concept_to_node(generic)
|
|
33
|
+
add_concept(generic, g, concept_mapping, default_concept_graph, seen)
|
|
34
|
+
|
|
35
|
+
g.add_edge(generic_node, node_name, fast=True)
|
|
36
|
+
for ps_address in concept.pseudonyms:
|
|
37
|
+
if ps_address not in concept_mapping:
|
|
38
|
+
raise SyntaxError(f"Concept {concept} has invalid pseudonym {ps_address}")
|
|
39
|
+
pseudonym = concept_mapping[ps_address]
|
|
40
|
+
pseudonym = get_default_grain_concept(pseudonym, default_concept_graph)
|
|
41
|
+
pseudonym_node = concept_to_node(pseudonym)
|
|
42
|
+
if (pseudonym_node, node_name) in g.edges and (
|
|
43
|
+
node_name,
|
|
44
|
+
pseudonym_node,
|
|
45
|
+
) in g.edges:
|
|
46
|
+
continue
|
|
47
|
+
if pseudonym_node.split("@")[0] == node_name.split("@")[0]:
|
|
48
|
+
continue
|
|
49
|
+
g.add_edge(pseudonym_node, node_name, fast=True)
|
|
50
|
+
g.add_edge(node_name, pseudonym_node, fast=True)
|
|
51
|
+
g.pseudonyms.add((pseudonym_node, node_name))
|
|
52
|
+
g.pseudonyms.add((node_name, pseudonym_node))
|
|
53
|
+
add_concept(pseudonym, g, concept_mapping, default_concept_graph, seen)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_default_grain_concept(
|
|
57
|
+
concept: BuildConcept, default_concept_graph: dict[str, BuildConcept]
|
|
58
|
+
) -> BuildConcept:
|
|
59
|
+
"""Get the default grain concept from the graph."""
|
|
60
|
+
if concept.address in default_concept_graph:
|
|
61
|
+
return default_concept_graph[concept.address]
|
|
62
|
+
default = concept.with_default_grain()
|
|
63
|
+
default_concept_graph[concept.address] = default
|
|
64
|
+
return default
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def generate_adhoc_graph(
|
|
68
|
+
concepts: list[BuildConcept],
|
|
69
|
+
datasources: list[BuildDatasource],
|
|
70
|
+
default_concept_graph: dict[str, BuildConcept],
|
|
71
|
+
restrict_to_listed: bool = False,
|
|
72
|
+
) -> ReferenceGraph:
|
|
73
|
+
g = ReferenceGraph()
|
|
74
|
+
concept_mapping = {x.address: x for x in concepts}
|
|
75
|
+
seen: set[str] = set()
|
|
76
|
+
for concept in concepts:
|
|
77
|
+
if not isinstance(concept, BuildConcept):
|
|
78
|
+
raise ValueError(f"Invalid non-build concept {concept}")
|
|
79
|
+
|
|
80
|
+
# add all parsed concepts
|
|
81
|
+
for concept in concepts:
|
|
82
|
+
|
|
83
|
+
add_concept(concept, g, concept_mapping, default_concept_graph, seen)
|
|
84
|
+
|
|
85
|
+
for dataset in datasources:
|
|
86
|
+
node = datasource_to_node(dataset)
|
|
87
|
+
g.add_datasource_node(node, dataset)
|
|
88
|
+
for concept in dataset.concepts:
|
|
89
|
+
cnode = concept_to_node(concept)
|
|
90
|
+
g.concepts[cnode] = concept
|
|
91
|
+
g.add_node(cnode)
|
|
92
|
+
if restrict_to_listed:
|
|
93
|
+
if cnode not in g.nodes:
|
|
94
|
+
continue
|
|
95
|
+
g.add_edge(node, cnode, fast=True)
|
|
96
|
+
g.add_edge(cnode, node, fast=True)
|
|
97
|
+
# if there is a key on a table at a different grain
|
|
98
|
+
# add an FK edge to the canonical source, if it exists
|
|
99
|
+
# for example, order ID on order product table
|
|
100
|
+
default = get_default_grain_concept(concept, default_concept_graph)
|
|
101
|
+
|
|
102
|
+
if concept != default:
|
|
103
|
+
dcnode = concept_to_node(default)
|
|
104
|
+
g.concepts[dcnode] = default
|
|
105
|
+
g.add_node(dcnode)
|
|
106
|
+
g.add_edge(cnode, dcnode, fast=True)
|
|
107
|
+
g.add_edge(dcnode, cnode, fast=True)
|
|
108
|
+
return g
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def generate_graph(
|
|
112
|
+
environment: BuildEnvironment,
|
|
113
|
+
) -> ReferenceGraph:
|
|
114
|
+
default_concept_graph: dict[str, BuildConcept] = {}
|
|
115
|
+
return generate_adhoc_graph(
|
|
116
|
+
list(environment.concepts.values())
|
|
117
|
+
+ list(environment.alias_origin_lookup.values()),
|
|
118
|
+
list(environment.datasources.values()),
|
|
119
|
+
default_concept_graph=default_concept_graph,
|
|
120
|
+
)
|