pytrilogy 0.3.142__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- LICENSE.md +19 -0
- _preql_import_resolver/__init__.py +5 -0
- _preql_import_resolver/_preql_import_resolver.cpython-313-x86_64-linux-gnu.so +0 -0
- pytrilogy-0.3.142.dist-info/METADATA +555 -0
- pytrilogy-0.3.142.dist-info/RECORD +200 -0
- pytrilogy-0.3.142.dist-info/WHEEL +5 -0
- pytrilogy-0.3.142.dist-info/entry_points.txt +2 -0
- pytrilogy-0.3.142.dist-info/licenses/LICENSE.md +19 -0
- trilogy/__init__.py +16 -0
- trilogy/ai/README.md +10 -0
- trilogy/ai/__init__.py +19 -0
- trilogy/ai/constants.py +92 -0
- trilogy/ai/conversation.py +107 -0
- trilogy/ai/enums.py +7 -0
- trilogy/ai/execute.py +50 -0
- trilogy/ai/models.py +34 -0
- trilogy/ai/prompts.py +100 -0
- trilogy/ai/providers/__init__.py +0 -0
- trilogy/ai/providers/anthropic.py +106 -0
- trilogy/ai/providers/base.py +24 -0
- trilogy/ai/providers/google.py +146 -0
- trilogy/ai/providers/openai.py +89 -0
- trilogy/ai/providers/utils.py +68 -0
- trilogy/authoring/README.md +3 -0
- trilogy/authoring/__init__.py +148 -0
- trilogy/constants.py +113 -0
- trilogy/core/README.md +52 -0
- trilogy/core/__init__.py +0 -0
- trilogy/core/constants.py +6 -0
- trilogy/core/enums.py +443 -0
- trilogy/core/env_processor.py +120 -0
- trilogy/core/environment_helpers.py +320 -0
- trilogy/core/ergonomics.py +193 -0
- trilogy/core/exceptions.py +123 -0
- trilogy/core/functions.py +1227 -0
- trilogy/core/graph_models.py +139 -0
- trilogy/core/internal.py +85 -0
- trilogy/core/models/__init__.py +0 -0
- trilogy/core/models/author.py +2669 -0
- trilogy/core/models/build.py +2521 -0
- trilogy/core/models/build_environment.py +180 -0
- trilogy/core/models/core.py +501 -0
- trilogy/core/models/datasource.py +322 -0
- trilogy/core/models/environment.py +751 -0
- trilogy/core/models/execute.py +1177 -0
- trilogy/core/optimization.py +251 -0
- trilogy/core/optimizations/__init__.py +12 -0
- trilogy/core/optimizations/base_optimization.py +17 -0
- trilogy/core/optimizations/hide_unused_concept.py +47 -0
- trilogy/core/optimizations/inline_datasource.py +102 -0
- trilogy/core/optimizations/predicate_pushdown.py +245 -0
- trilogy/core/processing/README.md +94 -0
- trilogy/core/processing/READMEv2.md +121 -0
- trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
- trilogy/core/processing/__init__.py +0 -0
- trilogy/core/processing/concept_strategies_v3.py +508 -0
- trilogy/core/processing/constants.py +15 -0
- trilogy/core/processing/discovery_node_factory.py +451 -0
- trilogy/core/processing/discovery_utility.py +548 -0
- trilogy/core/processing/discovery_validation.py +167 -0
- trilogy/core/processing/graph_utils.py +43 -0
- trilogy/core/processing/node_generators/README.md +9 -0
- trilogy/core/processing/node_generators/__init__.py +31 -0
- trilogy/core/processing/node_generators/basic_node.py +160 -0
- trilogy/core/processing/node_generators/common.py +268 -0
- trilogy/core/processing/node_generators/constant_node.py +38 -0
- trilogy/core/processing/node_generators/filter_node.py +315 -0
- trilogy/core/processing/node_generators/group_node.py +213 -0
- trilogy/core/processing/node_generators/group_to_node.py +117 -0
- trilogy/core/processing/node_generators/multiselect_node.py +205 -0
- trilogy/core/processing/node_generators/node_merge_node.py +653 -0
- trilogy/core/processing/node_generators/recursive_node.py +88 -0
- trilogy/core/processing/node_generators/rowset_node.py +165 -0
- trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
- trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
- trilogy/core/processing/node_generators/select_merge_node.py +748 -0
- trilogy/core/processing/node_generators/select_node.py +95 -0
- trilogy/core/processing/node_generators/synonym_node.py +98 -0
- trilogy/core/processing/node_generators/union_node.py +91 -0
- trilogy/core/processing/node_generators/unnest_node.py +182 -0
- trilogy/core/processing/node_generators/window_node.py +201 -0
- trilogy/core/processing/nodes/README.md +28 -0
- trilogy/core/processing/nodes/__init__.py +179 -0
- trilogy/core/processing/nodes/base_node.py +519 -0
- trilogy/core/processing/nodes/filter_node.py +75 -0
- trilogy/core/processing/nodes/group_node.py +194 -0
- trilogy/core/processing/nodes/merge_node.py +420 -0
- trilogy/core/processing/nodes/recursive_node.py +46 -0
- trilogy/core/processing/nodes/select_node_v2.py +242 -0
- trilogy/core/processing/nodes/union_node.py +53 -0
- trilogy/core/processing/nodes/unnest_node.py +62 -0
- trilogy/core/processing/nodes/window_node.py +56 -0
- trilogy/core/processing/utility.py +823 -0
- trilogy/core/query_processor.py +596 -0
- trilogy/core/statements/README.md +35 -0
- trilogy/core/statements/__init__.py +0 -0
- trilogy/core/statements/author.py +536 -0
- trilogy/core/statements/build.py +0 -0
- trilogy/core/statements/common.py +20 -0
- trilogy/core/statements/execute.py +155 -0
- trilogy/core/table_processor.py +66 -0
- trilogy/core/utility.py +8 -0
- trilogy/core/validation/README.md +46 -0
- trilogy/core/validation/__init__.py +0 -0
- trilogy/core/validation/common.py +161 -0
- trilogy/core/validation/concept.py +146 -0
- trilogy/core/validation/datasource.py +227 -0
- trilogy/core/validation/environment.py +73 -0
- trilogy/core/validation/fix.py +256 -0
- trilogy/dialect/__init__.py +32 -0
- trilogy/dialect/base.py +1392 -0
- trilogy/dialect/bigquery.py +308 -0
- trilogy/dialect/common.py +147 -0
- trilogy/dialect/config.py +144 -0
- trilogy/dialect/dataframe.py +50 -0
- trilogy/dialect/duckdb.py +231 -0
- trilogy/dialect/enums.py +147 -0
- trilogy/dialect/metadata.py +173 -0
- trilogy/dialect/mock.py +190 -0
- trilogy/dialect/postgres.py +117 -0
- trilogy/dialect/presto.py +110 -0
- trilogy/dialect/results.py +89 -0
- trilogy/dialect/snowflake.py +129 -0
- trilogy/dialect/sql_server.py +137 -0
- trilogy/engine.py +48 -0
- trilogy/execution/config.py +75 -0
- trilogy/executor.py +568 -0
- trilogy/hooks/__init__.py +4 -0
- trilogy/hooks/base_hook.py +40 -0
- trilogy/hooks/graph_hook.py +139 -0
- trilogy/hooks/query_debugger.py +166 -0
- trilogy/metadata/__init__.py +0 -0
- trilogy/parser.py +10 -0
- trilogy/parsing/README.md +21 -0
- trilogy/parsing/__init__.py +0 -0
- trilogy/parsing/common.py +1069 -0
- trilogy/parsing/config.py +5 -0
- trilogy/parsing/exceptions.py +8 -0
- trilogy/parsing/helpers.py +1 -0
- trilogy/parsing/parse_engine.py +2813 -0
- trilogy/parsing/render.py +769 -0
- trilogy/parsing/trilogy.lark +540 -0
- trilogy/py.typed +0 -0
- trilogy/render.py +42 -0
- trilogy/scripts/README.md +9 -0
- trilogy/scripts/__init__.py +0 -0
- trilogy/scripts/agent.py +41 -0
- trilogy/scripts/agent_info.py +303 -0
- trilogy/scripts/common.py +355 -0
- trilogy/scripts/dependency/Cargo.lock +617 -0
- trilogy/scripts/dependency/Cargo.toml +39 -0
- trilogy/scripts/dependency/README.md +131 -0
- trilogy/scripts/dependency/build.sh +25 -0
- trilogy/scripts/dependency/src/directory_resolver.rs +177 -0
- trilogy/scripts/dependency/src/lib.rs +16 -0
- trilogy/scripts/dependency/src/main.rs +770 -0
- trilogy/scripts/dependency/src/parser.rs +435 -0
- trilogy/scripts/dependency/src/preql.pest +208 -0
- trilogy/scripts/dependency/src/python_bindings.rs +303 -0
- trilogy/scripts/dependency/src/resolver.rs +716 -0
- trilogy/scripts/dependency/tests/base.preql +3 -0
- trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
- trilogy/scripts/dependency/tests/customer.preql +6 -0
- trilogy/scripts/dependency/tests/main.preql +9 -0
- trilogy/scripts/dependency/tests/orders.preql +7 -0
- trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
- trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
- trilogy/scripts/dependency.py +323 -0
- trilogy/scripts/display.py +512 -0
- trilogy/scripts/environment.py +46 -0
- trilogy/scripts/fmt.py +32 -0
- trilogy/scripts/ingest.py +471 -0
- trilogy/scripts/ingest_helpers/__init__.py +1 -0
- trilogy/scripts/ingest_helpers/foreign_keys.py +123 -0
- trilogy/scripts/ingest_helpers/formatting.py +93 -0
- trilogy/scripts/ingest_helpers/typing.py +161 -0
- trilogy/scripts/init.py +105 -0
- trilogy/scripts/parallel_execution.py +713 -0
- trilogy/scripts/plan.py +189 -0
- trilogy/scripts/run.py +63 -0
- trilogy/scripts/serve.py +140 -0
- trilogy/scripts/serve_helpers/__init__.py +41 -0
- trilogy/scripts/serve_helpers/file_discovery.py +142 -0
- trilogy/scripts/serve_helpers/index_generation.py +206 -0
- trilogy/scripts/serve_helpers/models.py +38 -0
- trilogy/scripts/single_execution.py +131 -0
- trilogy/scripts/testing.py +119 -0
- trilogy/scripts/trilogy.py +68 -0
- trilogy/std/__init__.py +0 -0
- trilogy/std/color.preql +3 -0
- trilogy/std/date.preql +13 -0
- trilogy/std/display.preql +18 -0
- trilogy/std/geography.preql +22 -0
- trilogy/std/metric.preql +15 -0
- trilogy/std/money.preql +67 -0
- trilogy/std/net.preql +14 -0
- trilogy/std/ranking.preql +7 -0
- trilogy/std/report.preql +5 -0
- trilogy/std/semantic.preql +6 -0
- trilogy/utility.py +34 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from trilogy.core.enums import Modifier
|
|
2
|
+
from trilogy.core.models.datasource import Address, Datasource
|
|
3
|
+
from trilogy.core.models.environment import Environment
|
|
4
|
+
from trilogy.core.statements.author import CreateStatement
|
|
5
|
+
from trilogy.core.statements.execute import (
|
|
6
|
+
ColumnInfo,
|
|
7
|
+
CreateTableInfo,
|
|
8
|
+
ProcessedCreateStatement,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def datasource_to_create_table_info(
|
|
13
|
+
datasource: Datasource,
|
|
14
|
+
) -> CreateTableInfo:
|
|
15
|
+
address_field_map: dict[str, str] = {
|
|
16
|
+
column.concept.address: column.alias # type: ignore
|
|
17
|
+
for column in datasource.columns
|
|
18
|
+
if column.is_concrete
|
|
19
|
+
}
|
|
20
|
+
columns_info = [
|
|
21
|
+
ColumnInfo(
|
|
22
|
+
# the is_concrete restricts this
|
|
23
|
+
name=col.alias, # type: ignore
|
|
24
|
+
type=col.concept.output_datatype,
|
|
25
|
+
description=(
|
|
26
|
+
col.concept.metadata.description if col.concept.metadata else None
|
|
27
|
+
),
|
|
28
|
+
nullable=Modifier.OPTIONAL in col.modifiers,
|
|
29
|
+
primary_key=col.concept.address in datasource.grain.components,
|
|
30
|
+
)
|
|
31
|
+
for col in datasource.columns
|
|
32
|
+
if col.is_concrete
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
return CreateTableInfo(
|
|
36
|
+
name=(
|
|
37
|
+
datasource.address.location
|
|
38
|
+
if isinstance(datasource.address, Address)
|
|
39
|
+
else datasource.address
|
|
40
|
+
),
|
|
41
|
+
columns=columns_info,
|
|
42
|
+
partition_keys=[
|
|
43
|
+
address_field_map[c.address]
|
|
44
|
+
for c in datasource.partition_by
|
|
45
|
+
if c.address in address_field_map
|
|
46
|
+
],
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def process_create_statement(
|
|
51
|
+
statement: CreateStatement,
|
|
52
|
+
environment: Environment,
|
|
53
|
+
) -> ProcessedCreateStatement:
|
|
54
|
+
# Process the create statement to extract table info
|
|
55
|
+
targets_info = []
|
|
56
|
+
for target in statement.targets:
|
|
57
|
+
datasource: Datasource | None = environment.datasources.get(target)
|
|
58
|
+
if not datasource:
|
|
59
|
+
raise ValueError(f"Datasource {target} not found in environment.")
|
|
60
|
+
|
|
61
|
+
create_table_info = datasource_to_create_table_info(datasource)
|
|
62
|
+
targets_info.append(create_table_info)
|
|
63
|
+
|
|
64
|
+
return ProcessedCreateStatement(
|
|
65
|
+
scope=statement.scope, targets=targets_info, create_mode=statement.create_mode
|
|
66
|
+
)
|
trilogy/core/utility.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
def safe_quote(string: str, quote_char: str):
|
|
2
|
+
# split dotted identifiers
|
|
3
|
+
# TODO: evaluate if we need smarter parsing for strings that could actually include .
|
|
4
|
+
if string.startswith("https://"):
|
|
5
|
+
# it's a url, no splitting
|
|
6
|
+
return f"{quote_char}{string}{quote_char}"
|
|
7
|
+
components = string.split(".")
|
|
8
|
+
return ".".join([f"{quote_char}{string}{quote_char}" for string in components])
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Validation Behavior
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
## Environment
|
|
5
|
+
|
|
6
|
+
Runs all checks.
|
|
7
|
+
|
|
8
|
+
## Datasource
|
|
9
|
+
|
|
10
|
+
Runs checks by comma separated list of datasource names
|
|
11
|
+
|
|
12
|
+
### Checks
|
|
13
|
+
|
|
14
|
+
- Column type bindings
|
|
15
|
+
- Grain
|
|
16
|
+
|
|
17
|
+
## Concepts
|
|
18
|
+
|
|
19
|
+
Run checks by comma separated list of concept names.
|
|
20
|
+
|
|
21
|
+
### Checks
|
|
22
|
+
|
|
23
|
+
- Root concepts have at least one datasource binding
|
|
24
|
+
- Key concepts bound to datasources are correctly partial if they do not contan full set
|
|
25
|
+
|
|
26
|
+
## Internal vs External Valid
|
|
27
|
+
|
|
28
|
+
Validation requires us to query the DB to get results to compare against in some cases, and minimally have schema access.
|
|
29
|
+
|
|
30
|
+
For example, validating bindings to a datasource requires getting all column types, which can be done per-engine based on information schema.
|
|
31
|
+
|
|
32
|
+
Validating datasource _grain_ requires either checking an enforced PK or - more generally - querying to see duplicates.
|
|
33
|
+
|
|
34
|
+
For inline evaluation in trilogy, we can internally optimize and raise errors by default.
|
|
35
|
+
|
|
36
|
+
For external cases where the trilogy engine is not being used for DB access - such as for studio - we instead can only validate
|
|
37
|
+
checks that do not require DB access.
|
|
38
|
+
|
|
39
|
+
For those that require DB access, we can instead return the required queries and some logical condition formatting and spec.
|
|
40
|
+
|
|
41
|
+
The client is responsible for then running the query and evaluating the results. This requires more work to integrate on the client side.
|
|
42
|
+
|
|
43
|
+
We don't have a canonical interchange format, so this will be brittle until we define that.
|
|
44
|
+
|
|
45
|
+
TODO: explore if we can offload all checks to SQL? Can we, for example, do the datasource validation by unioning multiple tables together and ensuring that the target table has the max?
|
|
46
|
+
|
|
File without changes
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
from trilogy import Environment
|
|
5
|
+
from trilogy.authoring import (
|
|
6
|
+
ConceptRef,
|
|
7
|
+
DataType,
|
|
8
|
+
Ordering,
|
|
9
|
+
Purpose,
|
|
10
|
+
)
|
|
11
|
+
from trilogy.constants import MagicConstants
|
|
12
|
+
from trilogy.core.enums import ComparisonOperator, FunctionType
|
|
13
|
+
from trilogy.core.exceptions import ModelValidationError
|
|
14
|
+
from trilogy.core.models.build import (
|
|
15
|
+
BuildCaseElse,
|
|
16
|
+
BuildCaseWhen,
|
|
17
|
+
BuildComparison,
|
|
18
|
+
BuildConcept,
|
|
19
|
+
BuildConditional,
|
|
20
|
+
BuildDatasource,
|
|
21
|
+
BuildFunction,
|
|
22
|
+
BuildOrderBy,
|
|
23
|
+
BuildOrderItem,
|
|
24
|
+
)
|
|
25
|
+
from trilogy.core.models.environment import EnvironmentConceptDict
|
|
26
|
+
from trilogy.core.models.execute import (
|
|
27
|
+
CTE,
|
|
28
|
+
QueryDatasource,
|
|
29
|
+
)
|
|
30
|
+
from trilogy.core.statements.execute import ProcessedQuery
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ExpectationType(Enum):
|
|
34
|
+
LOGICAL = "logical"
|
|
35
|
+
ROWCOUNT = "rowcount"
|
|
36
|
+
DATA_TYPE_LIST = "data_type_list"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class ValidationTest:
|
|
41
|
+
check_type: ExpectationType
|
|
42
|
+
raw_query: ProcessedQuery | None = None
|
|
43
|
+
generated_query: str | None = None
|
|
44
|
+
expected: str | None = None
|
|
45
|
+
result: ModelValidationError | None = None
|
|
46
|
+
ran: bool = True
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ValidationType(Enum):
|
|
50
|
+
DATASOURCES = "datasources"
|
|
51
|
+
CONCEPTS = "concepts"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def build_order_args(concepts: list[BuildConcept]) -> list[BuildFunction]:
|
|
55
|
+
order_args = []
|
|
56
|
+
for concept in concepts:
|
|
57
|
+
order_args.append(
|
|
58
|
+
BuildFunction(
|
|
59
|
+
operator=FunctionType.CASE,
|
|
60
|
+
arguments=[
|
|
61
|
+
BuildCaseWhen(
|
|
62
|
+
comparison=BuildComparison(
|
|
63
|
+
left=concept,
|
|
64
|
+
operator=ComparisonOperator.IS,
|
|
65
|
+
right=MagicConstants.NULL,
|
|
66
|
+
),
|
|
67
|
+
expr=1,
|
|
68
|
+
),
|
|
69
|
+
BuildCaseElse(expr=0),
|
|
70
|
+
],
|
|
71
|
+
output_data_type=DataType.INTEGER,
|
|
72
|
+
output_purpose=Purpose.PROPERTY,
|
|
73
|
+
arg_count=2,
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return order_args
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def easy_query(
|
|
81
|
+
concepts: list[BuildConcept],
|
|
82
|
+
datasource: BuildDatasource,
|
|
83
|
+
env: Environment,
|
|
84
|
+
condition: BuildConditional | BuildComparison | None = None,
|
|
85
|
+
limit: int = 100,
|
|
86
|
+
):
|
|
87
|
+
"""
|
|
88
|
+
Build basic datasource specific queries.
|
|
89
|
+
"""
|
|
90
|
+
datasource_outputs = {c.address: c for c in datasource.concepts}
|
|
91
|
+
first_qds_concepts = datasource.concepts + concepts
|
|
92
|
+
root_qds = QueryDatasource(
|
|
93
|
+
input_concepts=first_qds_concepts,
|
|
94
|
+
output_concepts=concepts,
|
|
95
|
+
datasources=[datasource],
|
|
96
|
+
joins=[],
|
|
97
|
+
source_map={
|
|
98
|
+
concept.address: (
|
|
99
|
+
set([datasource]) if concept.address in datasource_outputs else set()
|
|
100
|
+
)
|
|
101
|
+
# include all base datasource conepts for convenience
|
|
102
|
+
for concept in first_qds_concepts
|
|
103
|
+
},
|
|
104
|
+
grain=datasource.grain,
|
|
105
|
+
)
|
|
106
|
+
cte = CTE(
|
|
107
|
+
name=f"datasource_{datasource.name}_base",
|
|
108
|
+
source=root_qds,
|
|
109
|
+
output_columns=concepts,
|
|
110
|
+
source_map={
|
|
111
|
+
concept.address: (
|
|
112
|
+
[datasource.safe_identifier]
|
|
113
|
+
if concept.address in datasource_outputs
|
|
114
|
+
else []
|
|
115
|
+
)
|
|
116
|
+
for concept in first_qds_concepts
|
|
117
|
+
},
|
|
118
|
+
grain=datasource.grain,
|
|
119
|
+
group_to_grain=True,
|
|
120
|
+
base_alias_override=datasource.safe_identifier,
|
|
121
|
+
)
|
|
122
|
+
filter_cte = CTE(
|
|
123
|
+
name=f"datasource_{datasource.name}_filter",
|
|
124
|
+
source=QueryDatasource(
|
|
125
|
+
datasources=[root_qds],
|
|
126
|
+
input_concepts=cte.output_columns,
|
|
127
|
+
output_concepts=cte.output_columns,
|
|
128
|
+
joins=[],
|
|
129
|
+
source_map={concept.address: (set([root_qds])) for concept in concepts},
|
|
130
|
+
grain=cte.grain,
|
|
131
|
+
),
|
|
132
|
+
parent_ctes=[cte],
|
|
133
|
+
output_columns=cte.output_columns,
|
|
134
|
+
source_map={
|
|
135
|
+
concept.address: [cte.identifier] for concept in cte.output_columns
|
|
136
|
+
},
|
|
137
|
+
grain=cte.grain,
|
|
138
|
+
condition=condition,
|
|
139
|
+
limit=limit,
|
|
140
|
+
order_by=BuildOrderBy(
|
|
141
|
+
items=[
|
|
142
|
+
BuildOrderItem(
|
|
143
|
+
expr=BuildFunction(
|
|
144
|
+
operator=FunctionType.SUM,
|
|
145
|
+
arguments=build_order_args(concepts),
|
|
146
|
+
output_data_type=DataType.INTEGER,
|
|
147
|
+
output_purpose=Purpose.PROPERTY,
|
|
148
|
+
arg_count=len(concepts),
|
|
149
|
+
),
|
|
150
|
+
order=Ordering.DESCENDING,
|
|
151
|
+
)
|
|
152
|
+
]
|
|
153
|
+
),
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return ProcessedQuery(
|
|
157
|
+
output_columns=[ConceptRef(address=concept.address) for concept in concepts],
|
|
158
|
+
ctes=[cte, filter_cte],
|
|
159
|
+
base=cte,
|
|
160
|
+
local_concepts=EnvironmentConceptDict(**{}),
|
|
161
|
+
)
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from trilogy import Environment, Executor
|
|
2
|
+
from trilogy.core.enums import Derivation, Modifier, Purpose
|
|
3
|
+
from trilogy.core.exceptions import (
|
|
4
|
+
ConceptModelValidationError,
|
|
5
|
+
DatasourceColumnBindingData,
|
|
6
|
+
DatasourceColumnBindingError,
|
|
7
|
+
)
|
|
8
|
+
from trilogy.core.models.build import (
|
|
9
|
+
BuildConcept,
|
|
10
|
+
)
|
|
11
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
12
|
+
from trilogy.core.validation.common import ExpectationType, ValidationTest, easy_query
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def validate_property_concept(
|
|
16
|
+
concept: BuildConcept, exec: Executor | None = None
|
|
17
|
+
) -> list[ValidationTest]:
|
|
18
|
+
return []
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def validate_key_concept(
|
|
22
|
+
concept: BuildConcept,
|
|
23
|
+
env: Environment,
|
|
24
|
+
build_env: BuildEnvironment,
|
|
25
|
+
exec: Executor | None = None,
|
|
26
|
+
):
|
|
27
|
+
results: list[ValidationTest] = []
|
|
28
|
+
seen: dict[str, int] = {}
|
|
29
|
+
|
|
30
|
+
count = 0
|
|
31
|
+
for datasource in build_env.datasources.values():
|
|
32
|
+
if concept.address in [c.address for c in datasource.concepts]:
|
|
33
|
+
count += 1
|
|
34
|
+
# if it only has one source, it's a key
|
|
35
|
+
if count <= 1:
|
|
36
|
+
return results
|
|
37
|
+
|
|
38
|
+
for datasource in build_env.datasources.values():
|
|
39
|
+
if concept.address in [c.address for c in datasource.concepts]:
|
|
40
|
+
assignment = [
|
|
41
|
+
x for x in datasource.columns if x.concept.address == concept.address
|
|
42
|
+
][0]
|
|
43
|
+
# if it's not a partial, skip it
|
|
44
|
+
if not assignment.is_complete:
|
|
45
|
+
continue
|
|
46
|
+
type_query = easy_query(
|
|
47
|
+
concepts=[
|
|
48
|
+
# build_env.concepts[concept.address],
|
|
49
|
+
build_env.concepts[f"grain_check_{concept.safe_address}"],
|
|
50
|
+
],
|
|
51
|
+
datasource=datasource,
|
|
52
|
+
env=env,
|
|
53
|
+
limit=1,
|
|
54
|
+
)
|
|
55
|
+
if exec:
|
|
56
|
+
type_sql = exec.generate_sql(type_query)[-1]
|
|
57
|
+
|
|
58
|
+
rows = exec.execute_raw_sql(type_sql).fetchall()
|
|
59
|
+
seen[datasource.name] = rows[0][0] if rows else 0
|
|
60
|
+
else:
|
|
61
|
+
results.append(
|
|
62
|
+
ValidationTest(
|
|
63
|
+
raw_query=type_query,
|
|
64
|
+
check_type=ExpectationType.ROWCOUNT,
|
|
65
|
+
expected=f"equal_max_{concept.safe_address}",
|
|
66
|
+
result=None,
|
|
67
|
+
ran=False,
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if not exec:
|
|
72
|
+
return results
|
|
73
|
+
max_seen: int = max([v for v in seen.values() if v is not None], default=0)
|
|
74
|
+
for datasource in build_env.datasources.values():
|
|
75
|
+
if concept.address in [c.address for c in datasource.concepts]:
|
|
76
|
+
assignment = [
|
|
77
|
+
x for x in datasource.columns if x.concept.address == concept.address
|
|
78
|
+
][0]
|
|
79
|
+
err = None
|
|
80
|
+
datasource_count: int = seen.get(datasource.name, 0)
|
|
81
|
+
if datasource_count < max_seen and assignment.is_complete:
|
|
82
|
+
err = DatasourceColumnBindingError(
|
|
83
|
+
address=datasource.identifier,
|
|
84
|
+
errors=[
|
|
85
|
+
DatasourceColumnBindingData(
|
|
86
|
+
address=concept.address,
|
|
87
|
+
value=None,
|
|
88
|
+
value_type=concept.datatype,
|
|
89
|
+
value_modifiers=[Modifier.PARTIAL],
|
|
90
|
+
actual_type=concept.datatype,
|
|
91
|
+
actual_modifiers=concept.modifiers,
|
|
92
|
+
)
|
|
93
|
+
],
|
|
94
|
+
message=f"Key concept {concept.address} is missing values in datasource {datasource.name} (max cardinality in data {max_seen}, datasource has {seen[datasource.name]} values) but is not marked as partial.",
|
|
95
|
+
)
|
|
96
|
+
results.append(
|
|
97
|
+
ValidationTest(
|
|
98
|
+
check_type=ExpectationType.ROWCOUNT,
|
|
99
|
+
expected=str(max_seen),
|
|
100
|
+
result=err,
|
|
101
|
+
ran=True,
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
return results
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def validate_datasources(
|
|
109
|
+
concept: BuildConcept, build_env: BuildEnvironment
|
|
110
|
+
) -> list[ValidationTest]:
|
|
111
|
+
if concept.lineage:
|
|
112
|
+
return []
|
|
113
|
+
for datasource in build_env.datasources.values():
|
|
114
|
+
if concept.address in [c.address for c in datasource.concepts]:
|
|
115
|
+
return []
|
|
116
|
+
if not concept.derivation == Derivation.ROOT:
|
|
117
|
+
return []
|
|
118
|
+
if concept.name.startswith("__") or (
|
|
119
|
+
concept.namespace and concept.namespace.startswith("__")
|
|
120
|
+
):
|
|
121
|
+
return []
|
|
122
|
+
return [
|
|
123
|
+
ValidationTest(
|
|
124
|
+
check_type=ExpectationType.LOGICAL,
|
|
125
|
+
expected=None,
|
|
126
|
+
result=ConceptModelValidationError(
|
|
127
|
+
f"Concept {concept.address} is a root concept but has no datasources bound"
|
|
128
|
+
),
|
|
129
|
+
ran=True,
|
|
130
|
+
)
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def validate_concept(
|
|
135
|
+
concept: BuildConcept,
|
|
136
|
+
env: Environment,
|
|
137
|
+
build_env: BuildEnvironment,
|
|
138
|
+
exec: Executor | None = None,
|
|
139
|
+
) -> list[ValidationTest]:
|
|
140
|
+
base: list[ValidationTest] = []
|
|
141
|
+
base += validate_datasources(concept, build_env)
|
|
142
|
+
if concept.purpose == Purpose.PROPERTY:
|
|
143
|
+
base += validate_property_concept(concept)
|
|
144
|
+
elif concept.purpose == Purpose.KEY:
|
|
145
|
+
base += validate_key_concept(concept, env, build_env, exec)
|
|
146
|
+
return base
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
from datetime import date, datetime
|
|
2
|
+
from decimal import Decimal
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from trilogy import Environment, Executor
|
|
6
|
+
from trilogy.authoring import (
|
|
7
|
+
ArrayType,
|
|
8
|
+
DataType,
|
|
9
|
+
MapType,
|
|
10
|
+
NumericType,
|
|
11
|
+
StructType,
|
|
12
|
+
TraitDataType,
|
|
13
|
+
arg_to_datatype,
|
|
14
|
+
)
|
|
15
|
+
from trilogy.core.enums import ComparisonOperator, Modifier
|
|
16
|
+
from trilogy.core.exceptions import (
|
|
17
|
+
DatasourceColumnBindingData,
|
|
18
|
+
DatasourceColumnBindingError,
|
|
19
|
+
DatasourceModelValidationError,
|
|
20
|
+
)
|
|
21
|
+
from trilogy.core.models.build import (
|
|
22
|
+
BuildComparison,
|
|
23
|
+
BuildDatasource,
|
|
24
|
+
)
|
|
25
|
+
from trilogy.core.models.build_environment import BuildEnvironment
|
|
26
|
+
from trilogy.core.validation.common import ExpectationType, ValidationTest, easy_query
|
|
27
|
+
from trilogy.utility import unique
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def row_to_dict(row):
|
|
31
|
+
return {key: row[key] for key in row.keys()}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def type_check(
|
|
35
|
+
input: Any,
|
|
36
|
+
expected_type: (
|
|
37
|
+
DataType | ArrayType | StructType | MapType | NumericType | TraitDataType
|
|
38
|
+
),
|
|
39
|
+
nullable: bool = True,
|
|
40
|
+
) -> bool:
|
|
41
|
+
if input is None and nullable:
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
target_type = expected_type
|
|
45
|
+
while isinstance(target_type, TraitDataType):
|
|
46
|
+
return type_check(input, target_type.data_type, nullable)
|
|
47
|
+
|
|
48
|
+
if target_type == DataType.STRING:
|
|
49
|
+
return isinstance(input, str)
|
|
50
|
+
if target_type == DataType.INTEGER:
|
|
51
|
+
return isinstance(input, int)
|
|
52
|
+
if target_type == DataType.BIGINT:
|
|
53
|
+
return isinstance(input, int) # or check for larger int if needed
|
|
54
|
+
if target_type == DataType.FLOAT or isinstance(target_type, NumericType):
|
|
55
|
+
return (
|
|
56
|
+
isinstance(input, float)
|
|
57
|
+
or isinstance(input, int)
|
|
58
|
+
or isinstance(input, Decimal)
|
|
59
|
+
)
|
|
60
|
+
if target_type == DataType.NUMBER:
|
|
61
|
+
return isinstance(input, (int, float, Decimal))
|
|
62
|
+
if target_type == DataType.NUMERIC:
|
|
63
|
+
return isinstance(input, (int, float, Decimal))
|
|
64
|
+
if target_type == DataType.BOOL:
|
|
65
|
+
return isinstance(input, bool)
|
|
66
|
+
if target_type == DataType.DATE:
|
|
67
|
+
return isinstance(input, date) and not isinstance(input, datetime)
|
|
68
|
+
if target_type == DataType.DATETIME:
|
|
69
|
+
return isinstance(input, datetime)
|
|
70
|
+
if target_type == DataType.TIMESTAMP:
|
|
71
|
+
return isinstance(input, datetime) # or timestamp type if you have one
|
|
72
|
+
if target_type == DataType.UNIX_SECONDS:
|
|
73
|
+
return isinstance(input, (int, float)) # Unix timestamps are numeric
|
|
74
|
+
if target_type == DataType.DATE_PART:
|
|
75
|
+
return isinstance(
|
|
76
|
+
input, str
|
|
77
|
+
) # assuming date parts are strings like "year", "month"
|
|
78
|
+
if target_type == DataType.ARRAY or isinstance(target_type, ArrayType):
|
|
79
|
+
return isinstance(input, list)
|
|
80
|
+
if target_type == DataType.MAP or isinstance(target_type, MapType):
|
|
81
|
+
return isinstance(input, dict)
|
|
82
|
+
if target_type == DataType.STRUCT or isinstance(target_type, StructType):
|
|
83
|
+
return isinstance(input, dict)
|
|
84
|
+
if target_type == DataType.NULL:
|
|
85
|
+
return input is None
|
|
86
|
+
if target_type == DataType.UNKNOWN:
|
|
87
|
+
return True
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def validate_datasource(
|
|
92
|
+
datasource: BuildDatasource,
|
|
93
|
+
env: Environment,
|
|
94
|
+
build_env: BuildEnvironment,
|
|
95
|
+
exec: Executor | None = None,
|
|
96
|
+
fix: bool = False,
|
|
97
|
+
) -> list[ValidationTest]:
|
|
98
|
+
results: list[ValidationTest] = []
|
|
99
|
+
# we might have merged concepts, where both will map out to the same
|
|
100
|
+
unique_outputs = unique(
|
|
101
|
+
[build_env.concepts[col.concept.address] for col in datasource.columns],
|
|
102
|
+
"address",
|
|
103
|
+
)
|
|
104
|
+
type_query = easy_query(
|
|
105
|
+
concepts=unique_outputs,
|
|
106
|
+
datasource=datasource,
|
|
107
|
+
env=env,
|
|
108
|
+
limit=100,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
rows = []
|
|
112
|
+
if exec:
|
|
113
|
+
type_sql = exec.generate_sql(type_query)[-1]
|
|
114
|
+
try:
|
|
115
|
+
rows = exec.execute_raw_sql(type_sql).fetchall()
|
|
116
|
+
except Exception as e:
|
|
117
|
+
results.append(
|
|
118
|
+
ValidationTest(
|
|
119
|
+
raw_query=type_query,
|
|
120
|
+
generated_query=type_sql,
|
|
121
|
+
check_type=ExpectationType.LOGICAL,
|
|
122
|
+
expected="valid_sql",
|
|
123
|
+
result=DatasourceModelValidationError(
|
|
124
|
+
f"Datasource {datasource.name} failed validation. Error executing type query {type_sql}: {e}"
|
|
125
|
+
),
|
|
126
|
+
ran=True,
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
return results
|
|
130
|
+
else:
|
|
131
|
+
|
|
132
|
+
results.append(
|
|
133
|
+
ValidationTest(
|
|
134
|
+
raw_query=type_query,
|
|
135
|
+
check_type=ExpectationType.LOGICAL,
|
|
136
|
+
expected="datatype_match",
|
|
137
|
+
result=None,
|
|
138
|
+
ran=False,
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
return results
|
|
142
|
+
failures: list[DatasourceColumnBindingData] = []
|
|
143
|
+
cols_with_error = set()
|
|
144
|
+
for row in rows:
|
|
145
|
+
for col in datasource.columns:
|
|
146
|
+
actual_address = build_env.concepts[col.concept.address].safe_address
|
|
147
|
+
if actual_address in cols_with_error:
|
|
148
|
+
continue
|
|
149
|
+
rval = row[actual_address]
|
|
150
|
+
passed = type_check(rval, col.concept.datatype, col.is_nullable)
|
|
151
|
+
if not passed:
|
|
152
|
+
value_type = (
|
|
153
|
+
arg_to_datatype(rval) if rval is not None else col.concept.datatype
|
|
154
|
+
)
|
|
155
|
+
traits = None
|
|
156
|
+
if isinstance(col.concept.datatype, TraitDataType):
|
|
157
|
+
traits = col.concept.datatype.traits
|
|
158
|
+
if traits and not isinstance(value_type, TraitDataType):
|
|
159
|
+
value_type = TraitDataType(type=value_type, traits=traits)
|
|
160
|
+
failures.append(
|
|
161
|
+
DatasourceColumnBindingData(
|
|
162
|
+
address=col.concept.address,
|
|
163
|
+
value=rval,
|
|
164
|
+
value_type=value_type,
|
|
165
|
+
value_modifiers=[Modifier.NULLABLE] if rval is None else [],
|
|
166
|
+
actual_type=col.concept.datatype,
|
|
167
|
+
actual_modifiers=col.concept.modifiers,
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
cols_with_error.add(actual_address)
|
|
171
|
+
|
|
172
|
+
if failures:
|
|
173
|
+
results.append(
|
|
174
|
+
ValidationTest(
|
|
175
|
+
check_type=ExpectationType.LOGICAL,
|
|
176
|
+
expected="datatype_match",
|
|
177
|
+
ran=True,
|
|
178
|
+
result=DatasourceColumnBindingError(
|
|
179
|
+
address=datasource.identifier, errors=failures
|
|
180
|
+
),
|
|
181
|
+
)
|
|
182
|
+
)
|
|
183
|
+
if not datasource.grain.components:
|
|
184
|
+
return results
|
|
185
|
+
|
|
186
|
+
# grain validation section
|
|
187
|
+
query = easy_query(
|
|
188
|
+
concepts=[build_env.concepts[name] for name in datasource.grain.components]
|
|
189
|
+
+ [build_env.concepts["grain_check"]],
|
|
190
|
+
datasource=datasource,
|
|
191
|
+
env=exec.environment,
|
|
192
|
+
condition=BuildComparison(
|
|
193
|
+
left=build_env.concepts["grain_check"],
|
|
194
|
+
right=1,
|
|
195
|
+
operator=ComparisonOperator.GT,
|
|
196
|
+
),
|
|
197
|
+
)
|
|
198
|
+
if not exec:
|
|
199
|
+
results.append(
|
|
200
|
+
ValidationTest(
|
|
201
|
+
raw_query=query,
|
|
202
|
+
check_type=ExpectationType.ROWCOUNT,
|
|
203
|
+
expected="0",
|
|
204
|
+
result=None,
|
|
205
|
+
ran=False,
|
|
206
|
+
)
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
else:
|
|
210
|
+
sql = exec.generate_sql(query)[-1]
|
|
211
|
+
|
|
212
|
+
rows = exec.execute_raw_sql(sql).fetchmany(10)
|
|
213
|
+
if rows:
|
|
214
|
+
results.append(
|
|
215
|
+
ValidationTest(
|
|
216
|
+
raw_query=query,
|
|
217
|
+
generated_query=sql,
|
|
218
|
+
check_type=ExpectationType.ROWCOUNT,
|
|
219
|
+
expected="0",
|
|
220
|
+
result=DatasourceModelValidationError(
|
|
221
|
+
f"Datasource {datasource.name} failed validation. Found rows that do not conform to grain: {[row_to_dict(r) for r in rows]}"
|
|
222
|
+
),
|
|
223
|
+
ran=True,
|
|
224
|
+
)
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return results
|