pytrilogy 0.3.138__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. LICENSE.md +19 -0
  2. _preql_import_resolver/__init__.py +5 -0
  3. _preql_import_resolver/_preql_import_resolver.cpython-311-x86_64-linux-gnu.so +0 -0
  4. pytrilogy-0.3.138.dist-info/METADATA +525 -0
  5. pytrilogy-0.3.138.dist-info/RECORD +182 -0
  6. pytrilogy-0.3.138.dist-info/WHEEL +5 -0
  7. pytrilogy-0.3.138.dist-info/entry_points.txt +2 -0
  8. pytrilogy-0.3.138.dist-info/licenses/LICENSE.md +19 -0
  9. trilogy/__init__.py +9 -0
  10. trilogy/ai/README.md +10 -0
  11. trilogy/ai/__init__.py +19 -0
  12. trilogy/ai/constants.py +92 -0
  13. trilogy/ai/conversation.py +107 -0
  14. trilogy/ai/enums.py +7 -0
  15. trilogy/ai/execute.py +50 -0
  16. trilogy/ai/models.py +34 -0
  17. trilogy/ai/prompts.py +87 -0
  18. trilogy/ai/providers/__init__.py +0 -0
  19. trilogy/ai/providers/anthropic.py +106 -0
  20. trilogy/ai/providers/base.py +24 -0
  21. trilogy/ai/providers/google.py +146 -0
  22. trilogy/ai/providers/openai.py +89 -0
  23. trilogy/ai/providers/utils.py +68 -0
  24. trilogy/authoring/README.md +3 -0
  25. trilogy/authoring/__init__.py +143 -0
  26. trilogy/constants.py +113 -0
  27. trilogy/core/README.md +52 -0
  28. trilogy/core/__init__.py +0 -0
  29. trilogy/core/constants.py +6 -0
  30. trilogy/core/enums.py +443 -0
  31. trilogy/core/env_processor.py +120 -0
  32. trilogy/core/environment_helpers.py +320 -0
  33. trilogy/core/ergonomics.py +193 -0
  34. trilogy/core/exceptions.py +123 -0
  35. trilogy/core/functions.py +1227 -0
  36. trilogy/core/graph_models.py +139 -0
  37. trilogy/core/internal.py +85 -0
  38. trilogy/core/models/__init__.py +0 -0
  39. trilogy/core/models/author.py +2672 -0
  40. trilogy/core/models/build.py +2521 -0
  41. trilogy/core/models/build_environment.py +180 -0
  42. trilogy/core/models/core.py +494 -0
  43. trilogy/core/models/datasource.py +322 -0
  44. trilogy/core/models/environment.py +748 -0
  45. trilogy/core/models/execute.py +1177 -0
  46. trilogy/core/optimization.py +251 -0
  47. trilogy/core/optimizations/__init__.py +12 -0
  48. trilogy/core/optimizations/base_optimization.py +17 -0
  49. trilogy/core/optimizations/hide_unused_concept.py +47 -0
  50. trilogy/core/optimizations/inline_datasource.py +102 -0
  51. trilogy/core/optimizations/predicate_pushdown.py +245 -0
  52. trilogy/core/processing/README.md +94 -0
  53. trilogy/core/processing/READMEv2.md +121 -0
  54. trilogy/core/processing/VIRTUAL_UNNEST.md +30 -0
  55. trilogy/core/processing/__init__.py +0 -0
  56. trilogy/core/processing/concept_strategies_v3.py +508 -0
  57. trilogy/core/processing/constants.py +15 -0
  58. trilogy/core/processing/discovery_node_factory.py +451 -0
  59. trilogy/core/processing/discovery_utility.py +517 -0
  60. trilogy/core/processing/discovery_validation.py +167 -0
  61. trilogy/core/processing/graph_utils.py +43 -0
  62. trilogy/core/processing/node_generators/README.md +9 -0
  63. trilogy/core/processing/node_generators/__init__.py +31 -0
  64. trilogy/core/processing/node_generators/basic_node.py +160 -0
  65. trilogy/core/processing/node_generators/common.py +268 -0
  66. trilogy/core/processing/node_generators/constant_node.py +38 -0
  67. trilogy/core/processing/node_generators/filter_node.py +315 -0
  68. trilogy/core/processing/node_generators/group_node.py +213 -0
  69. trilogy/core/processing/node_generators/group_to_node.py +117 -0
  70. trilogy/core/processing/node_generators/multiselect_node.py +205 -0
  71. trilogy/core/processing/node_generators/node_merge_node.py +653 -0
  72. trilogy/core/processing/node_generators/recursive_node.py +88 -0
  73. trilogy/core/processing/node_generators/rowset_node.py +165 -0
  74. trilogy/core/processing/node_generators/select_helpers/__init__.py +0 -0
  75. trilogy/core/processing/node_generators/select_helpers/datasource_injection.py +261 -0
  76. trilogy/core/processing/node_generators/select_merge_node.py +748 -0
  77. trilogy/core/processing/node_generators/select_node.py +95 -0
  78. trilogy/core/processing/node_generators/synonym_node.py +98 -0
  79. trilogy/core/processing/node_generators/union_node.py +91 -0
  80. trilogy/core/processing/node_generators/unnest_node.py +182 -0
  81. trilogy/core/processing/node_generators/window_node.py +201 -0
  82. trilogy/core/processing/nodes/README.md +28 -0
  83. trilogy/core/processing/nodes/__init__.py +179 -0
  84. trilogy/core/processing/nodes/base_node.py +519 -0
  85. trilogy/core/processing/nodes/filter_node.py +75 -0
  86. trilogy/core/processing/nodes/group_node.py +194 -0
  87. trilogy/core/processing/nodes/merge_node.py +420 -0
  88. trilogy/core/processing/nodes/recursive_node.py +46 -0
  89. trilogy/core/processing/nodes/select_node_v2.py +242 -0
  90. trilogy/core/processing/nodes/union_node.py +53 -0
  91. trilogy/core/processing/nodes/unnest_node.py +62 -0
  92. trilogy/core/processing/nodes/window_node.py +56 -0
  93. trilogy/core/processing/utility.py +823 -0
  94. trilogy/core/query_processor.py +596 -0
  95. trilogy/core/statements/README.md +35 -0
  96. trilogy/core/statements/__init__.py +0 -0
  97. trilogy/core/statements/author.py +536 -0
  98. trilogy/core/statements/build.py +0 -0
  99. trilogy/core/statements/common.py +20 -0
  100. trilogy/core/statements/execute.py +155 -0
  101. trilogy/core/table_processor.py +66 -0
  102. trilogy/core/utility.py +8 -0
  103. trilogy/core/validation/README.md +46 -0
  104. trilogy/core/validation/__init__.py +0 -0
  105. trilogy/core/validation/common.py +161 -0
  106. trilogy/core/validation/concept.py +146 -0
  107. trilogy/core/validation/datasource.py +227 -0
  108. trilogy/core/validation/environment.py +73 -0
  109. trilogy/core/validation/fix.py +106 -0
  110. trilogy/dialect/__init__.py +32 -0
  111. trilogy/dialect/base.py +1359 -0
  112. trilogy/dialect/bigquery.py +256 -0
  113. trilogy/dialect/common.py +147 -0
  114. trilogy/dialect/config.py +144 -0
  115. trilogy/dialect/dataframe.py +50 -0
  116. trilogy/dialect/duckdb.py +177 -0
  117. trilogy/dialect/enums.py +147 -0
  118. trilogy/dialect/metadata.py +173 -0
  119. trilogy/dialect/mock.py +190 -0
  120. trilogy/dialect/postgres.py +91 -0
  121. trilogy/dialect/presto.py +104 -0
  122. trilogy/dialect/results.py +89 -0
  123. trilogy/dialect/snowflake.py +90 -0
  124. trilogy/dialect/sql_server.py +92 -0
  125. trilogy/engine.py +48 -0
  126. trilogy/execution/config.py +75 -0
  127. trilogy/executor.py +568 -0
  128. trilogy/hooks/__init__.py +4 -0
  129. trilogy/hooks/base_hook.py +40 -0
  130. trilogy/hooks/graph_hook.py +139 -0
  131. trilogy/hooks/query_debugger.py +166 -0
  132. trilogy/metadata/__init__.py +0 -0
  133. trilogy/parser.py +10 -0
  134. trilogy/parsing/README.md +21 -0
  135. trilogy/parsing/__init__.py +0 -0
  136. trilogy/parsing/common.py +1069 -0
  137. trilogy/parsing/config.py +5 -0
  138. trilogy/parsing/exceptions.py +8 -0
  139. trilogy/parsing/helpers.py +1 -0
  140. trilogy/parsing/parse_engine.py +2813 -0
  141. trilogy/parsing/render.py +750 -0
  142. trilogy/parsing/trilogy.lark +540 -0
  143. trilogy/py.typed +0 -0
  144. trilogy/render.py +42 -0
  145. trilogy/scripts/README.md +7 -0
  146. trilogy/scripts/__init__.py +0 -0
  147. trilogy/scripts/dependency/Cargo.lock +617 -0
  148. trilogy/scripts/dependency/Cargo.toml +39 -0
  149. trilogy/scripts/dependency/README.md +131 -0
  150. trilogy/scripts/dependency/build.sh +25 -0
  151. trilogy/scripts/dependency/src/directory_resolver.rs +162 -0
  152. trilogy/scripts/dependency/src/lib.rs +16 -0
  153. trilogy/scripts/dependency/src/main.rs +770 -0
  154. trilogy/scripts/dependency/src/parser.rs +435 -0
  155. trilogy/scripts/dependency/src/preql.pest +208 -0
  156. trilogy/scripts/dependency/src/python_bindings.rs +289 -0
  157. trilogy/scripts/dependency/src/resolver.rs +716 -0
  158. trilogy/scripts/dependency/tests/base.preql +3 -0
  159. trilogy/scripts/dependency/tests/cli_integration.rs +377 -0
  160. trilogy/scripts/dependency/tests/customer.preql +6 -0
  161. trilogy/scripts/dependency/tests/main.preql +9 -0
  162. trilogy/scripts/dependency/tests/orders.preql +7 -0
  163. trilogy/scripts/dependency/tests/test_data/base.preql +9 -0
  164. trilogy/scripts/dependency/tests/test_data/consumer.preql +1 -0
  165. trilogy/scripts/dependency.py +323 -0
  166. trilogy/scripts/display.py +460 -0
  167. trilogy/scripts/environment.py +46 -0
  168. trilogy/scripts/parallel_execution.py +483 -0
  169. trilogy/scripts/single_execution.py +131 -0
  170. trilogy/scripts/trilogy.py +772 -0
  171. trilogy/std/__init__.py +0 -0
  172. trilogy/std/color.preql +3 -0
  173. trilogy/std/date.preql +13 -0
  174. trilogy/std/display.preql +18 -0
  175. trilogy/std/geography.preql +22 -0
  176. trilogy/std/metric.preql +15 -0
  177. trilogy/std/money.preql +67 -0
  178. trilogy/std/net.preql +14 -0
  179. trilogy/std/ranking.preql +7 -0
  180. trilogy/std/report.preql +5 -0
  181. trilogy/std/semantic.preql +6 -0
  182. trilogy/utility.py +34 -0
@@ -0,0 +1,66 @@
1
+ from trilogy.core.enums import Modifier
2
+ from trilogy.core.models.datasource import Address, Datasource
3
+ from trilogy.core.models.environment import Environment
4
+ from trilogy.core.statements.author import CreateStatement
5
+ from trilogy.core.statements.execute import (
6
+ ColumnInfo,
7
+ CreateTableInfo,
8
+ ProcessedCreateStatement,
9
+ )
10
+
11
+
12
+ def datasource_to_create_table_info(
13
+ datasource: Datasource,
14
+ ) -> CreateTableInfo:
15
+ address_field_map: dict[str, str] = {
16
+ column.concept.address: column.alias # type: ignore
17
+ for column in datasource.columns
18
+ if column.is_concrete
19
+ }
20
+ columns_info = [
21
+ ColumnInfo(
22
+ # the is_concrete restricts this
23
+ name=col.alias, # type: ignore
24
+ type=col.concept.output_datatype,
25
+ description=(
26
+ col.concept.metadata.description if col.concept.metadata else None
27
+ ),
28
+ nullable=Modifier.OPTIONAL in col.modifiers,
29
+ primary_key=col.concept.address in datasource.grain.components,
30
+ )
31
+ for col in datasource.columns
32
+ if col.is_concrete
33
+ ]
34
+
35
+ return CreateTableInfo(
36
+ name=(
37
+ datasource.address.location
38
+ if isinstance(datasource.address, Address)
39
+ else datasource.address
40
+ ),
41
+ columns=columns_info,
42
+ partition_keys=[
43
+ address_field_map[c.address]
44
+ for c in datasource.partition_by
45
+ if c.address in address_field_map
46
+ ],
47
+ )
48
+
49
+
50
+ def process_create_statement(
51
+ statement: CreateStatement,
52
+ environment: Environment,
53
+ ) -> ProcessedCreateStatement:
54
+ # Process the create statement to extract table info
55
+ targets_info = []
56
+ for target in statement.targets:
57
+ datasource: Datasource | None = environment.datasources.get(target)
58
+ if not datasource:
59
+ raise ValueError(f"Datasource {target} not found in environment.")
60
+
61
+ create_table_info = datasource_to_create_table_info(datasource)
62
+ targets_info.append(create_table_info)
63
+
64
+ return ProcessedCreateStatement(
65
+ scope=statement.scope, targets=targets_info, create_mode=statement.create_mode
66
+ )
@@ -0,0 +1,8 @@
1
+ def safe_quote(string: str, quote_char: str):
2
+ # split dotted identifiers
3
+ # TODO: evaluate if we need smarter parsing for strings that could actually include .
4
+ if string.startswith("https://"):
5
+ # it's a url, no splitting
6
+ return f"{quote_char}{string}{quote_char}"
7
+ components = string.split(".")
8
+ return ".".join([f"{quote_char}{string}{quote_char}" for string in components])
@@ -0,0 +1,46 @@
1
+ # Validation Behavior
2
+
3
+
4
+ ## Environment
5
+
6
+ Runs all checks.
7
+
8
+ ## Datasource
9
+
10
+ Runs checks by comma separated list of datasource names
11
+
12
+ ### Checks
13
+
14
+ - Column type bindings
15
+ - Grain
16
+
17
+ ## Concepts
18
+
19
+ Run checks by comma separated list of concept names.
20
+
21
+ ### Checks
22
+
23
+ - Root concepts have at least one datasource binding
24
+ - Key concepts bound to datasources are correctly partial if they do not contan full set
25
+
26
+ ## Internal vs External Valid
27
+
28
+ Validation requires us to query the DB to get results to compare against in some cases, and minimally have schema access.
29
+
30
+ For example, validating bindings to a datasource requires getting all column types, which can be done per-engine based on information schema.
31
+
32
+ Validating datasource _grain_ requires either checking an enforced PK or - more generally - querying to see duplicates.
33
+
34
+ For inline evaluation in trilogy, we can internally optimize and raise errors by default.
35
+
36
+ For external cases where the trilogy engine is not being used for DB access - such as for studio - we instead can only validate
37
+ checks that do not require DB access.
38
+
39
+ For those that require DB access, we can instead return the required queries and some logical condition formatting and spec.
40
+
41
+ The client is responsible for then running the query and evaluating the results. This requires more work to integrate on the client side.
42
+
43
+ We don't have a canonical interchange format, so this will be brittle until we define that.
44
+
45
+ TODO: explore if we can offload all checks to SQL? Can we, for example, do the datasource validation by unioning multiple tables together and ensuring that the target table has the max?
46
+
File without changes
@@ -0,0 +1,161 @@
1
+ from dataclasses import dataclass
2
+ from enum import Enum
3
+
4
+ from trilogy import Environment
5
+ from trilogy.authoring import (
6
+ ConceptRef,
7
+ DataType,
8
+ Ordering,
9
+ Purpose,
10
+ )
11
+ from trilogy.constants import MagicConstants
12
+ from trilogy.core.enums import ComparisonOperator, FunctionType
13
+ from trilogy.core.exceptions import ModelValidationError
14
+ from trilogy.core.models.build import (
15
+ BuildCaseElse,
16
+ BuildCaseWhen,
17
+ BuildComparison,
18
+ BuildConcept,
19
+ BuildConditional,
20
+ BuildDatasource,
21
+ BuildFunction,
22
+ BuildOrderBy,
23
+ BuildOrderItem,
24
+ )
25
+ from trilogy.core.models.environment import EnvironmentConceptDict
26
+ from trilogy.core.models.execute import (
27
+ CTE,
28
+ QueryDatasource,
29
+ )
30
+ from trilogy.core.statements.execute import ProcessedQuery
31
+
32
+
33
+ class ExpectationType(Enum):
34
+ LOGICAL = "logical"
35
+ ROWCOUNT = "rowcount"
36
+ DATA_TYPE_LIST = "data_type_list"
37
+
38
+
39
+ @dataclass
40
+ class ValidationTest:
41
+ check_type: ExpectationType
42
+ raw_query: ProcessedQuery | None = None
43
+ generated_query: str | None = None
44
+ expected: str | None = None
45
+ result: ModelValidationError | None = None
46
+ ran: bool = True
47
+
48
+
49
+ class ValidationType(Enum):
50
+ DATASOURCES = "datasources"
51
+ CONCEPTS = "concepts"
52
+
53
+
54
+ def build_order_args(concepts: list[BuildConcept]) -> list[BuildFunction]:
55
+ order_args = []
56
+ for concept in concepts:
57
+ order_args.append(
58
+ BuildFunction(
59
+ operator=FunctionType.CASE,
60
+ arguments=[
61
+ BuildCaseWhen(
62
+ comparison=BuildComparison(
63
+ left=concept,
64
+ operator=ComparisonOperator.IS,
65
+ right=MagicConstants.NULL,
66
+ ),
67
+ expr=1,
68
+ ),
69
+ BuildCaseElse(expr=0),
70
+ ],
71
+ output_data_type=DataType.INTEGER,
72
+ output_purpose=Purpose.PROPERTY,
73
+ arg_count=2,
74
+ )
75
+ )
76
+
77
+ return order_args
78
+
79
+
80
+ def easy_query(
81
+ concepts: list[BuildConcept],
82
+ datasource: BuildDatasource,
83
+ env: Environment,
84
+ condition: BuildConditional | BuildComparison | None = None,
85
+ limit: int = 100,
86
+ ):
87
+ """
88
+ Build basic datasource specific queries.
89
+ """
90
+ datasource_outputs = {c.address: c for c in datasource.concepts}
91
+ first_qds_concepts = datasource.concepts + concepts
92
+ root_qds = QueryDatasource(
93
+ input_concepts=first_qds_concepts,
94
+ output_concepts=concepts,
95
+ datasources=[datasource],
96
+ joins=[],
97
+ source_map={
98
+ concept.address: (
99
+ set([datasource]) if concept.address in datasource_outputs else set()
100
+ )
101
+ # include all base datasource conepts for convenience
102
+ for concept in first_qds_concepts
103
+ },
104
+ grain=datasource.grain,
105
+ )
106
+ cte = CTE(
107
+ name=f"datasource_{datasource.name}_base",
108
+ source=root_qds,
109
+ output_columns=concepts,
110
+ source_map={
111
+ concept.address: (
112
+ [datasource.safe_identifier]
113
+ if concept.address in datasource_outputs
114
+ else []
115
+ )
116
+ for concept in first_qds_concepts
117
+ },
118
+ grain=datasource.grain,
119
+ group_to_grain=True,
120
+ base_alias_override=datasource.safe_identifier,
121
+ )
122
+ filter_cte = CTE(
123
+ name=f"datasource_{datasource.name}_filter",
124
+ source=QueryDatasource(
125
+ datasources=[root_qds],
126
+ input_concepts=cte.output_columns,
127
+ output_concepts=cte.output_columns,
128
+ joins=[],
129
+ source_map={concept.address: (set([root_qds])) for concept in concepts},
130
+ grain=cte.grain,
131
+ ),
132
+ parent_ctes=[cte],
133
+ output_columns=cte.output_columns,
134
+ source_map={
135
+ concept.address: [cte.identifier] for concept in cte.output_columns
136
+ },
137
+ grain=cte.grain,
138
+ condition=condition,
139
+ limit=limit,
140
+ order_by=BuildOrderBy(
141
+ items=[
142
+ BuildOrderItem(
143
+ expr=BuildFunction(
144
+ operator=FunctionType.SUM,
145
+ arguments=build_order_args(concepts),
146
+ output_data_type=DataType.INTEGER,
147
+ output_purpose=Purpose.PROPERTY,
148
+ arg_count=len(concepts),
149
+ ),
150
+ order=Ordering.DESCENDING,
151
+ )
152
+ ]
153
+ ),
154
+ )
155
+
156
+ return ProcessedQuery(
157
+ output_columns=[ConceptRef(address=concept.address) for concept in concepts],
158
+ ctes=[cte, filter_cte],
159
+ base=cte,
160
+ local_concepts=EnvironmentConceptDict(**{}),
161
+ )
@@ -0,0 +1,146 @@
1
+ from trilogy import Environment, Executor
2
+ from trilogy.core.enums import Derivation, Modifier, Purpose
3
+ from trilogy.core.exceptions import (
4
+ ConceptModelValidationError,
5
+ DatasourceColumnBindingData,
6
+ DatasourceColumnBindingError,
7
+ )
8
+ from trilogy.core.models.build import (
9
+ BuildConcept,
10
+ )
11
+ from trilogy.core.models.build_environment import BuildEnvironment
12
+ from trilogy.core.validation.common import ExpectationType, ValidationTest, easy_query
13
+
14
+
15
+ def validate_property_concept(
16
+ concept: BuildConcept, exec: Executor | None = None
17
+ ) -> list[ValidationTest]:
18
+ return []
19
+
20
+
21
+ def validate_key_concept(
22
+ concept: BuildConcept,
23
+ env: Environment,
24
+ build_env: BuildEnvironment,
25
+ exec: Executor | None = None,
26
+ ):
27
+ results: list[ValidationTest] = []
28
+ seen: dict[str, int] = {}
29
+
30
+ count = 0
31
+ for datasource in build_env.datasources.values():
32
+ if concept.address in [c.address for c in datasource.concepts]:
33
+ count += 1
34
+ # if it only has one source, it's a key
35
+ if count <= 1:
36
+ return results
37
+
38
+ for datasource in build_env.datasources.values():
39
+ if concept.address in [c.address for c in datasource.concepts]:
40
+ assignment = [
41
+ x for x in datasource.columns if x.concept.address == concept.address
42
+ ][0]
43
+ # if it's not a partial, skip it
44
+ if not assignment.is_complete:
45
+ continue
46
+ type_query = easy_query(
47
+ concepts=[
48
+ # build_env.concepts[concept.address],
49
+ build_env.concepts[f"grain_check_{concept.safe_address}"],
50
+ ],
51
+ datasource=datasource,
52
+ env=env,
53
+ limit=1,
54
+ )
55
+ if exec:
56
+ type_sql = exec.generate_sql(type_query)[-1]
57
+
58
+ rows = exec.execute_raw_sql(type_sql).fetchall()
59
+ seen[datasource.name] = rows[0][0] if rows else 0
60
+ else:
61
+ results.append(
62
+ ValidationTest(
63
+ raw_query=type_query,
64
+ check_type=ExpectationType.ROWCOUNT,
65
+ expected=f"equal_max_{concept.safe_address}",
66
+ result=None,
67
+ ran=False,
68
+ )
69
+ )
70
+
71
+ if not exec:
72
+ return results
73
+ max_seen: int = max([v for v in seen.values() if v is not None], default=0)
74
+ for datasource in build_env.datasources.values():
75
+ if concept.address in [c.address for c in datasource.concepts]:
76
+ assignment = [
77
+ x for x in datasource.columns if x.concept.address == concept.address
78
+ ][0]
79
+ err = None
80
+ datasource_count: int = seen.get(datasource.name, 0)
81
+ if datasource_count < max_seen and assignment.is_complete:
82
+ err = DatasourceColumnBindingError(
83
+ address=datasource.identifier,
84
+ errors=[
85
+ DatasourceColumnBindingData(
86
+ address=concept.address,
87
+ value=None,
88
+ value_type=concept.datatype,
89
+ value_modifiers=[Modifier.PARTIAL],
90
+ actual_type=concept.datatype,
91
+ actual_modifiers=concept.modifiers,
92
+ )
93
+ ],
94
+ message=f"Key concept {concept.address} is missing values in datasource {datasource.name} (max cardinality in data {max_seen}, datasource has {seen[datasource.name]} values) but is not marked as partial.",
95
+ )
96
+ results.append(
97
+ ValidationTest(
98
+ check_type=ExpectationType.ROWCOUNT,
99
+ expected=str(max_seen),
100
+ result=err,
101
+ ran=True,
102
+ )
103
+ )
104
+
105
+ return results
106
+
107
+
108
+ def validate_datasources(
109
+ concept: BuildConcept, build_env: BuildEnvironment
110
+ ) -> list[ValidationTest]:
111
+ if concept.lineage:
112
+ return []
113
+ for datasource in build_env.datasources.values():
114
+ if concept.address in [c.address for c in datasource.concepts]:
115
+ return []
116
+ if not concept.derivation == Derivation.ROOT:
117
+ return []
118
+ if concept.name.startswith("__") or (
119
+ concept.namespace and concept.namespace.startswith("__")
120
+ ):
121
+ return []
122
+ return [
123
+ ValidationTest(
124
+ check_type=ExpectationType.LOGICAL,
125
+ expected=None,
126
+ result=ConceptModelValidationError(
127
+ f"Concept {concept.address} is a root concept but has no datasources bound"
128
+ ),
129
+ ran=True,
130
+ )
131
+ ]
132
+
133
+
134
+ def validate_concept(
135
+ concept: BuildConcept,
136
+ env: Environment,
137
+ build_env: BuildEnvironment,
138
+ exec: Executor | None = None,
139
+ ) -> list[ValidationTest]:
140
+ base: list[ValidationTest] = []
141
+ base += validate_datasources(concept, build_env)
142
+ if concept.purpose == Purpose.PROPERTY:
143
+ base += validate_property_concept(concept)
144
+ elif concept.purpose == Purpose.KEY:
145
+ base += validate_key_concept(concept, env, build_env, exec)
146
+ return base
@@ -0,0 +1,227 @@
1
+ from datetime import date, datetime
2
+ from decimal import Decimal
3
+ from typing import Any
4
+
5
+ from trilogy import Environment, Executor
6
+ from trilogy.authoring import (
7
+ ArrayType,
8
+ DataType,
9
+ MapType,
10
+ NumericType,
11
+ StructType,
12
+ TraitDataType,
13
+ arg_to_datatype,
14
+ )
15
+ from trilogy.core.enums import ComparisonOperator, Modifier
16
+ from trilogy.core.exceptions import (
17
+ DatasourceColumnBindingData,
18
+ DatasourceColumnBindingError,
19
+ DatasourceModelValidationError,
20
+ )
21
+ from trilogy.core.models.build import (
22
+ BuildComparison,
23
+ BuildDatasource,
24
+ )
25
+ from trilogy.core.models.build_environment import BuildEnvironment
26
+ from trilogy.core.validation.common import ExpectationType, ValidationTest, easy_query
27
+ from trilogy.utility import unique
28
+
29
+
30
+ def row_to_dict(row):
31
+ return {key: row[key] for key in row.keys()}
32
+
33
+
34
+ def type_check(
35
+ input: Any,
36
+ expected_type: (
37
+ DataType | ArrayType | StructType | MapType | NumericType | TraitDataType
38
+ ),
39
+ nullable: bool = True,
40
+ ) -> bool:
41
+ if input is None and nullable:
42
+ return True
43
+
44
+ target_type = expected_type
45
+ while isinstance(target_type, TraitDataType):
46
+ return type_check(input, target_type.data_type, nullable)
47
+
48
+ if target_type == DataType.STRING:
49
+ return isinstance(input, str)
50
+ if target_type == DataType.INTEGER:
51
+ return isinstance(input, int)
52
+ if target_type == DataType.BIGINT:
53
+ return isinstance(input, int) # or check for larger int if needed
54
+ if target_type == DataType.FLOAT or isinstance(target_type, NumericType):
55
+ return (
56
+ isinstance(input, float)
57
+ or isinstance(input, int)
58
+ or isinstance(input, Decimal)
59
+ )
60
+ if target_type == DataType.NUMBER:
61
+ return isinstance(input, (int, float, Decimal))
62
+ if target_type == DataType.NUMERIC:
63
+ return isinstance(input, (int, float, Decimal))
64
+ if target_type == DataType.BOOL:
65
+ return isinstance(input, bool)
66
+ if target_type == DataType.DATE:
67
+ return isinstance(input, date) and not isinstance(input, datetime)
68
+ if target_type == DataType.DATETIME:
69
+ return isinstance(input, datetime)
70
+ if target_type == DataType.TIMESTAMP:
71
+ return isinstance(input, datetime) # or timestamp type if you have one
72
+ if target_type == DataType.UNIX_SECONDS:
73
+ return isinstance(input, (int, float)) # Unix timestamps are numeric
74
+ if target_type == DataType.DATE_PART:
75
+ return isinstance(
76
+ input, str
77
+ ) # assuming date parts are strings like "year", "month"
78
+ if target_type == DataType.ARRAY or isinstance(target_type, ArrayType):
79
+ return isinstance(input, list)
80
+ if target_type == DataType.MAP or isinstance(target_type, MapType):
81
+ return isinstance(input, dict)
82
+ if target_type == DataType.STRUCT or isinstance(target_type, StructType):
83
+ return isinstance(input, dict)
84
+ if target_type == DataType.NULL:
85
+ return input is None
86
+ if target_type == DataType.UNKNOWN:
87
+ return True
88
+ return False
89
+
90
+
91
+ def validate_datasource(
92
+ datasource: BuildDatasource,
93
+ env: Environment,
94
+ build_env: BuildEnvironment,
95
+ exec: Executor | None = None,
96
+ fix: bool = False,
97
+ ) -> list[ValidationTest]:
98
+ results: list[ValidationTest] = []
99
+ # we might have merged concepts, where both will map out to the same
100
+ unique_outputs = unique(
101
+ [build_env.concepts[col.concept.address] for col in datasource.columns],
102
+ "address",
103
+ )
104
+ type_query = easy_query(
105
+ concepts=unique_outputs,
106
+ datasource=datasource,
107
+ env=env,
108
+ limit=100,
109
+ )
110
+
111
+ rows = []
112
+ if exec:
113
+ type_sql = exec.generate_sql(type_query)[-1]
114
+ try:
115
+ rows = exec.execute_raw_sql(type_sql).fetchall()
116
+ except Exception as e:
117
+ results.append(
118
+ ValidationTest(
119
+ raw_query=type_query,
120
+ generated_query=type_sql,
121
+ check_type=ExpectationType.LOGICAL,
122
+ expected="valid_sql",
123
+ result=DatasourceModelValidationError(
124
+ f"Datasource {datasource.name} failed validation. Error executing type query {type_sql}: {e}"
125
+ ),
126
+ ran=True,
127
+ )
128
+ )
129
+ return results
130
+ else:
131
+
132
+ results.append(
133
+ ValidationTest(
134
+ raw_query=type_query,
135
+ check_type=ExpectationType.LOGICAL,
136
+ expected="datatype_match",
137
+ result=None,
138
+ ran=False,
139
+ )
140
+ )
141
+ return results
142
+ failures: list[DatasourceColumnBindingData] = []
143
+ cols_with_error = set()
144
+ for row in rows:
145
+ for col in datasource.columns:
146
+ actual_address = build_env.concepts[col.concept.address].safe_address
147
+ if actual_address in cols_with_error:
148
+ continue
149
+ rval = row[actual_address]
150
+ passed = type_check(rval, col.concept.datatype, col.is_nullable)
151
+ if not passed:
152
+ value_type = (
153
+ arg_to_datatype(rval) if rval is not None else col.concept.datatype
154
+ )
155
+ traits = None
156
+ if isinstance(col.concept.datatype, TraitDataType):
157
+ traits = col.concept.datatype.traits
158
+ if traits and not isinstance(value_type, TraitDataType):
159
+ value_type = TraitDataType(type=value_type, traits=traits)
160
+ failures.append(
161
+ DatasourceColumnBindingData(
162
+ address=col.concept.address,
163
+ value=rval,
164
+ value_type=value_type,
165
+ value_modifiers=[Modifier.NULLABLE] if rval is None else [],
166
+ actual_type=col.concept.datatype,
167
+ actual_modifiers=col.concept.modifiers,
168
+ )
169
+ )
170
+ cols_with_error.add(actual_address)
171
+
172
+ if failures:
173
+ results.append(
174
+ ValidationTest(
175
+ check_type=ExpectationType.LOGICAL,
176
+ expected="datatype_match",
177
+ ran=True,
178
+ result=DatasourceColumnBindingError(
179
+ address=datasource.identifier, errors=failures
180
+ ),
181
+ )
182
+ )
183
+ if not datasource.grain.components:
184
+ return results
185
+
186
+ # grain validation section
187
+ query = easy_query(
188
+ concepts=[build_env.concepts[name] for name in datasource.grain.components]
189
+ + [build_env.concepts["grain_check"]],
190
+ datasource=datasource,
191
+ env=exec.environment,
192
+ condition=BuildComparison(
193
+ left=build_env.concepts["grain_check"],
194
+ right=1,
195
+ operator=ComparisonOperator.GT,
196
+ ),
197
+ )
198
+ if not exec:
199
+ results.append(
200
+ ValidationTest(
201
+ raw_query=query,
202
+ check_type=ExpectationType.ROWCOUNT,
203
+ expected="0",
204
+ result=None,
205
+ ran=False,
206
+ )
207
+ )
208
+
209
+ else:
210
+ sql = exec.generate_sql(query)[-1]
211
+
212
+ rows = exec.execute_raw_sql(sql).fetchmany(10)
213
+ if rows:
214
+ results.append(
215
+ ValidationTest(
216
+ raw_query=query,
217
+ generated_query=sql,
218
+ check_type=ExpectationType.ROWCOUNT,
219
+ expected="0",
220
+ result=DatasourceModelValidationError(
221
+ f"Datasource {datasource.name} failed validation. Found rows that do not conform to grain: {[row_to_dict(r) for r in rows]}"
222
+ ),
223
+ ran=True,
224
+ )
225
+ )
226
+
227
+ return results