pytrilogy 0.0.3.95__py3-none-any.whl → 0.0.3.97__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pytrilogy might be problematic. Click here for more details.

@@ -1,8 +1,9 @@
1
- from trilogy import Executor
2
- from trilogy.core.enums import Derivation, Purpose
1
+ from trilogy import Environment, Executor
2
+ from trilogy.core.enums import Derivation, Modifier, Purpose
3
3
  from trilogy.core.exceptions import (
4
4
  ConceptModelValidationError,
5
- DatasourceModelValidationError,
5
+ DatasourceColumnBindingData,
6
+ DatasourceColumnBindingError,
6
7
  )
7
8
  from trilogy.core.models.build import (
8
9
  BuildConcept,
@@ -12,64 +13,79 @@ from trilogy.core.validation.common import ExpectationType, ValidationTest, easy
12
13
 
13
14
 
14
15
  def validate_property_concept(
15
- concept: BuildConcept, generate_only: bool = False
16
+ concept: BuildConcept, exec: Executor | None = None
16
17
  ) -> list[ValidationTest]:
17
18
  return []
18
19
 
19
20
 
20
21
  def validate_key_concept(
21
22
  concept: BuildConcept,
23
+ env: Environment,
22
24
  build_env: BuildEnvironment,
23
- exec: Executor,
24
- generate_only: bool = False,
25
+ exec: Executor | None = None,
25
26
  ):
26
27
  results: list[ValidationTest] = []
27
- seen = {}
28
+ seen: dict[str, int] = {}
28
29
  for datasource in build_env.datasources.values():
29
30
  if concept.address in [c.address for c in datasource.concepts]:
30
31
  assignment = [
31
32
  x for x in datasource.columns if x.concept.address == concept.address
32
33
  ][0]
34
+ # if it's not a partial, skip it
35
+ if not assignment.is_complete:
36
+ continue
33
37
  type_query = easy_query(
34
38
  concepts=[
35
39
  # build_env.concepts[concept.address],
36
40
  build_env.concepts[f"grain_check_{concept.safe_address}"],
37
41
  ],
38
42
  datasource=datasource,
39
- env=exec.environment,
43
+ env=env,
40
44
  limit=1,
41
45
  )
42
- type_sql = exec.generate_sql(type_query)[-1]
46
+ if exec:
47
+ type_sql = exec.generate_sql(type_query)[-1]
43
48
 
44
- rows = exec.execute_raw_sql(type_sql).fetchall()
45
- if generate_only and assignment.is_complete:
49
+ rows = exec.execute_raw_sql(type_sql).fetchall()
50
+ seen[datasource.name] = rows[0][0] if rows else 0
51
+ else:
46
52
  results.append(
47
53
  ValidationTest(
48
- query=type_sql,
54
+ raw_query=type_query,
49
55
  check_type=ExpectationType.ROWCOUNT,
50
56
  expected=f"equal_max_{concept.safe_address}",
51
57
  result=None,
52
58
  ran=False,
53
59
  )
54
60
  )
55
- continue
56
- seen[datasource.name] = rows[0][0] if rows else None
57
- if generate_only:
61
+
62
+ if not exec:
58
63
  return results
59
- max_seen = max([v for v in seen.values() if v is not None], default=0)
64
+ max_seen: int = max([v for v in seen.values() if v is not None], default=0)
60
65
  for datasource in build_env.datasources.values():
61
66
  if concept.address in [c.address for c in datasource.concepts]:
62
67
  assignment = [
63
68
  x for x in datasource.columns if x.concept.address == concept.address
64
69
  ][0]
65
70
  err = None
66
- if (seen[datasource.name] or 0) < max_seen and assignment.is_complete:
67
- err = DatasourceModelValidationError(
68
- f"Key concept {concept.address} is missing values in datasource {datasource.name} (max cardinality in data {max_seen}, datasource has {seen[datasource.name]} values) but is not marked as partial."
71
+ datasource_count: int = seen.get(datasource.name, 0)
72
+ if datasource_count < max_seen and assignment.is_complete:
73
+ err = DatasourceColumnBindingError(
74
+ address=datasource.identifier,
75
+ errors=[
76
+ DatasourceColumnBindingData(
77
+ address=concept.address,
78
+ value=None,
79
+ value_type=concept.datatype,
80
+ value_modifiers=[Modifier.PARTIAL],
81
+ actual_type=concept.datatype,
82
+ actual_modifiers=concept.modifiers,
83
+ )
84
+ ],
85
+ message=f"Key concept {concept.address} is missing values in datasource {datasource.name} (max cardinality in data {max_seen}, datasource has {seen[datasource.name]} values) but is not marked as partial.",
69
86
  )
70
87
  results.append(
71
88
  ValidationTest(
72
- query=None,
73
89
  check_type=ExpectationType.ROWCOUNT,
74
90
  expected=str(max_seen),
75
91
  result=err,
@@ -96,7 +112,6 @@ def validate_datasources(
96
112
  return []
97
113
  return [
98
114
  ValidationTest(
99
- query=None,
100
115
  check_type=ExpectationType.LOGICAL,
101
116
  expected=None,
102
117
  result=ConceptModelValidationError(
@@ -109,14 +124,14 @@ def validate_datasources(
109
124
 
110
125
  def validate_concept(
111
126
  concept: BuildConcept,
127
+ env: Environment,
112
128
  build_env: BuildEnvironment,
113
- exec: Executor,
114
- generate_only: bool = False,
129
+ exec: Executor | None = None,
115
130
  ) -> list[ValidationTest]:
116
131
  base: list[ValidationTest] = []
117
132
  base += validate_datasources(concept, build_env)
118
133
  if concept.purpose == Purpose.PROPERTY:
119
- base += validate_property_concept(concept, generate_only)
134
+ base += validate_property_concept(concept)
120
135
  elif concept.purpose == Purpose.KEY:
121
- base += validate_key_concept(concept, build_env, exec, generate_only)
136
+ base += validate_key_concept(concept, env, build_env, exec)
122
137
  return base
@@ -2,7 +2,7 @@ from datetime import date, datetime
2
2
  from decimal import Decimal
3
3
  from typing import Any
4
4
 
5
- from trilogy import Executor
5
+ from trilogy import Environment, Executor
6
6
  from trilogy.authoring import (
7
7
  ArrayType,
8
8
  DataType,
@@ -10,9 +10,14 @@ from trilogy.authoring import (
10
10
  NumericType,
11
11
  StructType,
12
12
  TraitDataType,
13
+ arg_to_datatype,
14
+ )
15
+ from trilogy.core.enums import ComparisonOperator, Modifier
16
+ from trilogy.core.exceptions import (
17
+ DatasourceColumnBindingData,
18
+ DatasourceColumnBindingError,
19
+ DatasourceModelValidationError,
13
20
  )
14
- from trilogy.core.enums import ComparisonOperator
15
- from trilogy.core.exceptions import DatasourceModelValidationError
16
21
  from trilogy.core.models.build import (
17
22
  BuildComparison,
18
23
  BuildDatasource,
@@ -61,12 +66,13 @@ def type_check(
61
66
 
62
67
  def validate_datasource(
63
68
  datasource: BuildDatasource,
69
+ env: Environment,
64
70
  build_env: BuildEnvironment,
65
- exec: Executor,
66
- generate_only: bool = False,
71
+ exec: Executor | None = None,
72
+ fix: bool = False,
67
73
  ) -> list[ValidationTest]:
68
74
  results: list[ValidationTest] = []
69
- # we might have merged concepts, where both wil lmap out to the same
75
+ # we might have merged concepts, where both will map out to the same
70
76
  unique_outputs = unique(
71
77
  [build_env.concepts[col.concept.address] for col in datasource.columns],
72
78
  "address",
@@ -74,18 +80,20 @@ def validate_datasource(
74
80
  type_query = easy_query(
75
81
  concepts=unique_outputs,
76
82
  datasource=datasource,
77
- env=exec.environment,
83
+ env=env,
78
84
  limit=100,
79
85
  )
80
- type_sql = exec.generate_sql(type_query)[-1]
86
+
81
87
  rows = []
82
- if not generate_only:
88
+ if exec:
89
+ type_sql = exec.generate_sql(type_query)[-1]
83
90
  try:
84
91
  rows = exec.execute_raw_sql(type_sql).fetchall()
85
92
  except Exception as e:
86
93
  results.append(
87
94
  ValidationTest(
88
- query=type_sql,
95
+ raw_query=type_query,
96
+ generated_query=type_sql,
89
97
  check_type=ExpectationType.LOGICAL,
90
98
  expected="valid_sql",
91
99
  result=DatasourceModelValidationError(
@@ -96,9 +104,10 @@ def validate_datasource(
96
104
  )
97
105
  return results
98
106
  else:
107
+
99
108
  results.append(
100
109
  ValidationTest(
101
- query=type_sql,
110
+ raw_query=type_query,
102
111
  check_type=ExpectationType.LOGICAL,
103
112
  expected="datatype_match",
104
113
  result=None,
@@ -106,18 +115,10 @@ def validate_datasource(
106
115
  )
107
116
  )
108
117
  return results
109
- failures: list[
110
- tuple[
111
- str,
112
- Any,
113
- DataType | ArrayType | StructType | MapType | NumericType | TraitDataType,
114
- bool,
115
- ]
116
- ] = []
118
+ failures: list[DatasourceColumnBindingData] = []
117
119
  cols_with_error = set()
118
120
  for row in rows:
119
121
  for col in datasource.columns:
120
-
121
122
  actual_address = build_env.concepts[col.concept.address].safe_address
122
123
  if actual_address in cols_with_error:
123
124
  continue
@@ -125,27 +126,29 @@ def validate_datasource(
125
126
  passed = type_check(rval, col.concept.datatype, col.is_nullable)
126
127
  if not passed:
127
128
  failures.append(
128
- (
129
- col.concept.address,
130
- rval,
131
- col.concept.datatype,
132
- col.is_nullable,
129
+ DatasourceColumnBindingData(
130
+ address=col.concept.address,
131
+ value=rval,
132
+ value_type=(
133
+ arg_to_datatype(rval)
134
+ if rval is not None
135
+ else col.concept.datatype
136
+ ),
137
+ value_modifiers=[Modifier.NULLABLE] if rval is None else [],
138
+ actual_type=col.concept.datatype,
139
+ actual_modifiers=col.concept.modifiers,
133
140
  )
134
141
  )
135
142
  cols_with_error.add(actual_address)
136
143
 
137
- def format_failure(failure):
138
- return f"Concept {failure[0]} value '{failure[1]}' does not conform to expected type {str(failure[2])} (nullable={failure[3]})"
139
-
140
144
  if failures:
141
145
  results.append(
142
146
  ValidationTest(
143
- query=None,
144
147
  check_type=ExpectationType.LOGICAL,
145
148
  expected="datatype_match",
146
149
  ran=True,
147
- result=DatasourceModelValidationError(
148
- f"Datasource {datasource.name} failed validation. Found rows that do not conform to types: {[format_failure(failure) for failure in failures]}",
150
+ result=DatasourceColumnBindingError(
151
+ address=datasource.identifier, errors=failures
149
152
  ),
150
153
  )
151
154
  )
@@ -161,10 +164,10 @@ def validate_datasource(
161
164
  operator=ComparisonOperator.GT,
162
165
  ),
163
166
  )
164
- if generate_only:
167
+ if not exec:
165
168
  results.append(
166
169
  ValidationTest(
167
- query=exec.generate_sql(query)[-1],
170
+ raw_query=query,
168
171
  check_type=ExpectationType.ROWCOUNT,
169
172
  expected="0",
170
173
  result=None,
@@ -179,7 +182,8 @@ def validate_datasource(
179
182
  if rows:
180
183
  results.append(
181
184
  ValidationTest(
182
- query=sql,
185
+ raw_query=query,
186
+ generated_query=sql,
183
187
  check_type=ExpectationType.ROWCOUNT,
184
188
  expected="0",
185
189
  result=DatasourceModelValidationError(
@@ -12,12 +12,13 @@ from trilogy.parsing.common import function_to_concept
12
12
 
13
13
  def validate_environment(
14
14
  env: Environment,
15
- exec: Executor,
16
15
  scope: ValidationScope = ValidationScope.ALL,
17
16
  targets: list[str] | None = None,
17
+ exec: Executor | None = None,
18
18
  generate_only: bool = False,
19
19
  ) -> list[ValidationTest]:
20
20
  # avoid mutating the environment for validation
21
+ generate_only = exec is None or generate_only
21
22
  env = env.duplicate()
22
23
  grain_check = function_to_concept(
23
24
  parent=Function(
@@ -51,13 +52,13 @@ def validate_environment(
51
52
  for datasource in build_env.datasources.values():
52
53
  if targets and datasource.name not in targets:
53
54
  continue
54
- results += validate_datasource(datasource, build_env, exec, generate_only)
55
+ results += validate_datasource(datasource, env, build_env, exec)
55
56
  if scope == ValidationScope.ALL or scope == ValidationScope.CONCEPTS:
56
57
 
57
58
  for bconcept in build_env.concepts.values():
58
59
  if targets and bconcept.address not in targets:
59
60
  continue
60
- results += validate_concept(bconcept, build_env, exec, generate_only)
61
+ results += validate_concept(bconcept, env, build_env, exec)
61
62
 
62
63
  # raise a nicely formatted union of all exceptions
63
64
  exceptions: list[ModelValidationError] = [e.result for e in results if e.result]
@@ -0,0 +1,106 @@
1
+ from collections import defaultdict
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ from trilogy import Environment, Executor
6
+ from trilogy.authoring import ConceptDeclarationStatement, Datasource
7
+ from trilogy.core.exceptions import (
8
+ DatasourceColumnBindingData,
9
+ DatasourceColumnBindingError,
10
+ )
11
+ from trilogy.core.validation.environment import validate_environment
12
+ from trilogy.parsing.render import Renderer
13
+
14
+
15
+ def rewrite_file_with_errors(
16
+ statements: list[Any], errors: list[DatasourceColumnBindingError]
17
+ ):
18
+ renderer = Renderer()
19
+ output = []
20
+ ds_error_map: dict[str, list[DatasourceColumnBindingData]] = defaultdict(list)
21
+ concept_error_map: dict[str, list[DatasourceColumnBindingData]] = defaultdict(list)
22
+ for error in errors:
23
+ if isinstance(error, DatasourceColumnBindingError):
24
+ for x in error.errors:
25
+ if error.dataset_address not in ds_error_map:
26
+ ds_error_map[error.dataset_address] = []
27
+ # this is by dataset address
28
+ if x.is_modifier_issue():
29
+ ds_error_map[error.dataset_address].append(x)
30
+ # this is by column
31
+ if x.is_type_issue():
32
+ concept_error_map[x.address].append(x)
33
+ for statement in statements:
34
+ if isinstance(statement, Datasource):
35
+ if statement.identifier in ds_error_map:
36
+ error_cols = ds_error_map[statement.identifier]
37
+ for col in statement.columns:
38
+ if col.concept.address in [x.address for x in error_cols]:
39
+ error_col = [
40
+ x for x in error_cols if x.address == col.concept.address
41
+ ][0]
42
+ col.modifiers = list(
43
+ set(col.modifiers + error_col.value_modifiers)
44
+ )
45
+ elif isinstance(statement, ConceptDeclarationStatement):
46
+ if statement.concept.address in concept_error_map:
47
+ error_cols = concept_error_map[statement.concept.address]
48
+ statement.concept.datatype = error_cols[0].value_type
49
+ output.append(statement)
50
+
51
+ return renderer.render_statement_string(output)
52
+
53
+
54
+ DEPTH_CUTOFF = 3
55
+
56
+
57
+ def validate_and_rewrite(
58
+ input: Path | str, exec: Executor | None = None, depth: int = 0
59
+ ) -> str | None:
60
+ if depth > DEPTH_CUTOFF:
61
+ print(f"Reached depth cutoff of {DEPTH_CUTOFF}, stopping.")
62
+ return None
63
+ if isinstance(input, str):
64
+ raw = input
65
+ env = Environment()
66
+ else:
67
+ with open(input, "r") as f:
68
+ raw = f.read()
69
+ env = Environment(working_path=input.parent)
70
+ if exec:
71
+ env = exec.environment
72
+ env, statements = env.parse(raw)
73
+
74
+ validation_results = validate_environment(env, exec=exec, generate_only=True)
75
+
76
+ errors = [
77
+ x.result
78
+ for x in validation_results
79
+ if isinstance(x.result, DatasourceColumnBindingError)
80
+ ]
81
+
82
+ if not errors:
83
+ print("No validation errors found")
84
+ return None
85
+ print(
86
+ f"Found {len(errors)} validation errors, attempting to fix, current depth: {depth}..."
87
+ )
88
+ for error in errors:
89
+ for item in error.errors:
90
+ print(f"- {item.format_failure()}")
91
+
92
+ new_text = rewrite_file_with_errors(statements, errors)
93
+
94
+ while iteration := validate_and_rewrite(new_text, exec=exec, depth=depth + 1):
95
+ depth = depth + 1
96
+ if depth >= DEPTH_CUTOFF:
97
+ break
98
+ if iteration:
99
+ new_text = iteration
100
+ depth += 1
101
+ if isinstance(input, Path):
102
+ with open(input, "w") as f:
103
+ f.write(new_text)
104
+ return None
105
+ else:
106
+ return new_text
trilogy/dialect/base.py CHANGED
@@ -76,6 +76,7 @@ from trilogy.core.statements.author import (
76
76
  )
77
77
  from trilogy.core.statements.execute import (
78
78
  PROCESSED_STATEMENT_TYPES,
79
+ ProcessedCopyStatement,
79
80
  ProcessedQuery,
80
81
  ProcessedQueryPersist,
81
82
  ProcessedRawSQLStatement,
@@ -345,6 +346,7 @@ class BaseDialect:
345
346
  COMPLEX_DATATYPE_MAP = COMPLEX_DATATYPE_MAP
346
347
  UNNEST_MODE = UnnestMode.CROSS_APPLY
347
348
  GROUP_MODE = GroupMode.AUTO
349
+ EXPLAIN_KEYWORD = "EXPLAIN"
348
350
 
349
351
  def __init__(self, rendering: Rendering | None = None):
350
352
  self.rendering = rendering or CONFIG.rendering
@@ -759,6 +761,7 @@ class BaseDialect:
759
761
  elif isinstance(e, MagicConstants):
760
762
  if e == MagicConstants.NULL:
761
763
  return "null"
764
+ return str(e.value)
762
765
  elif isinstance(e, date):
763
766
  return self.FUNCTION_MAP[FunctionType.DATE_LITERAL](e)
764
767
  elif isinstance(e, datetime):
@@ -1135,7 +1138,13 @@ class BaseDialect:
1135
1138
  query: PROCESSED_STATEMENT_TYPES,
1136
1139
  ) -> str:
1137
1140
  if isinstance(query, ProcessedShowStatement):
1138
- return ";\n".join([str(x) for x in query.output_values])
1141
+ return ";\n".join(
1142
+ [
1143
+ f"{self.EXPLAIN_KEYWORD} {self.compile_statement(x)}"
1144
+ for x in query.output_values
1145
+ if isinstance(x, (ProcessedQuery, ProcessedCopyStatement))
1146
+ ]
1147
+ )
1139
1148
  elif isinstance(query, ProcessedRawSQLStatement):
1140
1149
  return query.text
1141
1150