pytrilogy 0.3.148__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-312-aarch64-linux-gnu.so +0 -0
- pytrilogy-0.3.148.dist-info/METADATA +555 -0
- pytrilogy-0.3.148.dist-info/RECORD +206 -0
- pytrilogy-0.3.148.dist-info/WHEEL +5 -0
- pytrilogy-0.3.148.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.148.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +27 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +119 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +454 -0
- trilogy/core/env_processor.py +239 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1240 -0
- trilogy/core/graph_models.py +142 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2662 -0
- trilogy/core/models/build.py +2603 -0
- trilogy/core/models/build_environment.py +165 -0
- trilogy/core/models/core.py +506 -0
- trilogy/core/models/datasource.py +434 -0
- trilogy/core/models/environment.py +756 -0
- trilogy/core/models/execute.py +1213 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +270 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +207 -0
- trilogy/core/processing/node_generators/node_merge_node.py +695 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +786 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +522 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +604 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1431 -0
- trilogy/dialect/bigquery.py +314 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +159 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +376 -0
- trilogy/dialect/enums.py +149 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/__init__.py +17 -0
- trilogy/execution/config.py +119 -0
- trilogy/execution/state/__init__.py +0 -0
- trilogy/execution/state/file_state_store.py +0 -0
- trilogy/execution/state/sqllite_state_store.py +0 -0
- trilogy/execution/state/state_store.py +301 -0
- trilogy/executor.py +656 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +135 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2863 -0
- trilogy/parsing/render.py +773 -0
- trilogy/parsing/trilogy.lark +544 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +45 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +306 -0
- trilogy/scripts/common.py +430 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +387 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +311 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +555 -0
- trilogy/scripts/environment.py +59 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +472 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +748 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/refresh.py +106 -0
- trilogy/scripts/run.py +79 -0
- trilogy/scripts/serve.py +202 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +129 -0
- trilogy/scripts/trilogy.py +75 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
|
|
2
|
+
## Query Planning
|
|
3
|
+
|
|
4
|
+
Query planning is divided into 3 core phases.
|
|
5
|
+
|
|
6
|
+
The first phase builds an abstract node tree by looping through every combination of
|
|
7
|
+
output concept and keys in the output query grain and recursively searching for sources.
|
|
8
|
+
|
|
9
|
+
It will begin with aggregations if those exist, then window functions, then filtration functions,
|
|
10
|
+
rowsets, and finally look for bare selects.
|
|
11
|
+
|
|
12
|
+
Each type of complex node will generate a new recursive node search for required parents,
|
|
13
|
+
until a set of terminal nodes with base concept selection is reached.
|
|
14
|
+
|
|
15
|
+
A default merge node is injected between every recursion. The overall loop will terminate early
|
|
16
|
+
if an output node is returned with all required query concepts. If not, the merge node will
|
|
17
|
+
handle a join between the returned subtrees. If there are not multiple nodes to merge,
|
|
18
|
+
the merge node will simply return the single parent node and prune itself from the graph.
|
|
19
|
+
|
|
20
|
+
In the second pass, each node is resolved to an abstract CTE. At this phase, CTEs that
|
|
21
|
+
reference the same tables, parent CTEs, and filtering can be merged.
|
|
22
|
+
|
|
23
|
+
Finally, in query rendering each CTE is rendered to a backend appropriate query. The final
|
|
24
|
+
CTE, or the `base`, will contain all required columns for the final output. The last
|
|
25
|
+
select will only apply any query level filters + ordering, no joins will take place.
|
|
26
|
+
|
|
27
|
+
## Aug 2023 Update
|
|
28
|
+
|
|
29
|
+
For complex derivations, propogating the "full" context upstream is an issue. Instead, we need to adjust logic to prune the optional nodes in each search pattern.
|
|
30
|
+
|
|
31
|
+
For filter nodes -> we should have these generate a node with _just_ the filtered column + the new concept.
|
|
32
|
+
|
|
33
|
+
## Debugging
|
|
34
|
+
|
|
35
|
+
Base query derivation accepts the 'DebugHook' defined under hooks, which will print to console
|
|
36
|
+
each step of the plan. This is a great first step to figure out what might be going
|
|
37
|
+
wrong with discovery in a query.
|
|
38
|
+
|
|
39
|
+
Example usage
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from preql import parse
|
|
43
|
+
from preql.core.query_processor import process_query
|
|
44
|
+
from preql.hooks.query_debugger import DebuggingHook
|
|
45
|
+
from preql.core.models import Select
|
|
46
|
+
|
|
47
|
+
declarations = """
|
|
48
|
+
key user_id int metadata(description="the description");
|
|
49
|
+
property user_id.display_name string metadata(description="The display name ");
|
|
50
|
+
property user_id.about_me string metadata(description="User provided description");
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
key post_id int;
|
|
54
|
+
metric post_count <-count(post_id);
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
datasource posts (
|
|
58
|
+
user_id: user_id,
|
|
59
|
+
id: post_id
|
|
60
|
+
)
|
|
61
|
+
grain (post_id)
|
|
62
|
+
address bigquery-public-data.stackoverflow.post_history
|
|
63
|
+
;
|
|
64
|
+
|
|
65
|
+
select
|
|
66
|
+
user_id,
|
|
67
|
+
count(post_id) -> user_post_count
|
|
68
|
+
;
|
|
69
|
+
|
|
70
|
+
metric avg_user_post_count <- avg(user_post_count);
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
datasource users (
|
|
74
|
+
id: user_id,
|
|
75
|
+
display_name: display_name,
|
|
76
|
+
about_me: about_me,
|
|
77
|
+
)
|
|
78
|
+
grain (user_id)
|
|
79
|
+
address bigquery-public-data.stackoverflow.users
|
|
80
|
+
;
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
select
|
|
84
|
+
avg_user_post_count
|
|
85
|
+
;
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
"""
|
|
89
|
+
env, parsed = parse(declarations)
|
|
90
|
+
select: Select = parsed[-1]
|
|
91
|
+
|
|
92
|
+
query = process_query(statement=select, environment=env, hooks=[DebuggingHook()])
|
|
93
|
+
|
|
94
|
+
```
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
## Execution Plan
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
Query discovery is a recursive loop.
|
|
7
|
+
|
|
8
|
+
Order of concepts to be discovered is prioritized by lineage type, with an additional sort so that if one concept
|
|
9
|
+
is derived from another concept, the parent is delayed in order.
|
|
10
|
+
|
|
11
|
+
This ensures that the parent is typically included in discovery of the child node path and results in a more 'ergonomic' query.
|
|
12
|
+
|
|
13
|
+
In rare cases, a node may return partial results. Then the discovery loop will attempt to merge those.
|
|
14
|
+
|
|
15
|
+
If it cannot merge, it will attempt to discover new concepts to inject into the search path that will result in a mergable graph.
|
|
16
|
+
|
|
17
|
+
[see concept injection for more.]
|
|
18
|
+
|
|
19
|
+
## Filtering
|
|
20
|
+
Filtering via where clauses is always pushed up as high as possible by passing the condition object through to sourcing.
|
|
21
|
+
|
|
22
|
+
If at any point we have a discovery loop where the contents of the where clause are included in the filtering, we need to immediately inject them.
|
|
23
|
+
|
|
24
|
+
If we never hit that point, filtering will be injected when we have only root or constant nodes left.
|
|
25
|
+
|
|
26
|
+
## Psuedonyms
|
|
27
|
+
|
|
28
|
+
Pseudonyms should always be handled by a node returning the pseudonymous type.
|
|
29
|
+
|
|
30
|
+
Ex: if A* has been merged into B*, and B* cannot be found, but A* can, the node returning A* should return A* and let the merge loop reconcile.
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
To be more specific about desired behavior:
|
|
34
|
+
|
|
35
|
+
Nodes must accurately track the *actual* concept being returned, because pseudonyms can have different rendering.
|
|
36
|
+
|
|
37
|
+
Completion checks should always check for the matching concepts _or_ matching pseudonyms.
|
|
38
|
+
|
|
39
|
+
The final query should map the output label to whatever the user actually specified.
|
|
40
|
+
|
|
41
|
+
Things that might need work:
|
|
42
|
+
- merging concepts with partial modifiers relies on datasources being updated with partial. If you merge a calculation, we should have additional tests that upstream discoveyr of the calculation
|
|
43
|
+
verifies all partial constraints.
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
## Always pass up local optional
|
|
48
|
+
## pass up filter
|
|
49
|
+
Eg
|
|
50
|
+
|
|
51
|
+
where z = 2,
|
|
52
|
+
x, sum(y)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
loop 1
|
|
56
|
+
start with sum(y), pass up x and condition z
|
|
57
|
+
|
|
58
|
+
loop 2
|
|
59
|
+
only scalar left
|
|
60
|
+
|
|
61
|
+
get x, y, u via scalar resolution
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
## With Complex in Where caluse
|
|
65
|
+
|
|
66
|
+
where count(z) = 2,
|
|
67
|
+
x, sum(y)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
loop 1
|
|
71
|
+
start with sum(y), pass up x and condition z
|
|
72
|
+
|
|
73
|
+
loop 2
|
|
74
|
+
only scalar left for output
|
|
75
|
+
x, y
|
|
76
|
+
|
|
77
|
+
but condition has count(z) = 2
|
|
78
|
+
|
|
79
|
+
need one more loop with all concepts
|
|
80
|
+
|
|
81
|
+
loop 3:
|
|
82
|
+
|
|
83
|
+
x, y, z
|
|
84
|
+
|
|
85
|
+
get x,y,z via scalar resolution
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
## Where Clause With Output Condition
|
|
89
|
+
|
|
90
|
+
This is a tricky one
|
|
91
|
+
|
|
92
|
+
where count(z) = a
|
|
93
|
+
select
|
|
94
|
+
x,
|
|
95
|
+
count(z)
|
|
96
|
+
|
|
97
|
+
First pass
|
|
98
|
+
|
|
99
|
+
We need to get count(z), x AND a because we need to evaluate the condition here
|
|
100
|
+
|
|
101
|
+
so required becomes x, a, count(z) by x
|
|
102
|
+
|
|
103
|
+
loop 1:
|
|
104
|
+
count(z) optional x, a no filter
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
[Agg Updates]
|
|
108
|
+
|
|
109
|
+
18 Failing tests to start
|
|
110
|
+
|
|
111
|
+
16 after fixing pseudonym binding
|
|
112
|
+
|
|
113
|
+
17 after updating select nodes
|
|
114
|
+
|
|
115
|
+
6 failing after using partial/full mappings
|
|
116
|
+
|
|
117
|
+
3 failing after fixing persisted rebind handling
|
|
118
|
+
|
|
119
|
+
3 [different] failing after fixing the materialized_cache
|
|
120
|
+
|
|
121
|
+
1 failing after fixing order of injecting materialized roots
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Virtual Unnest Nodes
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
## The Problem
|
|
5
|
+
|
|
6
|
+
We want to create a dynamic list - let's say a list of quarters. We can use an unnest or a union to create a constant field with [1,2,3,4].
|
|
7
|
+
|
|
8
|
+
Now we have data - let's say quarter:sales that doesn't have all quarters. Maybe it has {2:3, 3:4} How do we merge this?
|
|
9
|
+
|
|
10
|
+
If we run a naive merge
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
auto quarters <- unnest([1,2,3,4]);
|
|
14
|
+
|
|
15
|
+
merge data_quarter into ~quarters;
|
|
16
|
+
|
|
17
|
+
Default discovery will say that since the unnest has no dependencies, we can just create it de-novo after fetching the sales.
|
|
18
|
+
|
|
19
|
+
So we'll end up with
|
|
20
|
+
|
|
21
|
+
{1:3, 2:3, 3:3, 4:3, 1:4, 2:4, 3:4, 4:4}.
|
|
22
|
+
|
|
23
|
+
When we want:
|
|
24
|
+
|
|
25
|
+
{1:null,2:3, 3:4, 4: null}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
## Fix
|
|
29
|
+
|
|
30
|
+
We need to regard a unnest node constant as a unique datasource.
|
|
File without changes
|