@altimateai/altimate-code 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/bin/altimate +6 -0
- package/bin/altimate-code +6 -0
- package/dbt-tools/bin/altimate-dbt +2 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
- package/dbt-tools/dist/index.js +23859 -0
- package/package.json +13 -13
- package/postinstall.mjs +42 -0
- package/skills/altimate-setup/SKILL.md +31 -0
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,41 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.5.3] - 2026-03-19
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
- Bundle skills, dbt-tools, and altimate-setup in shipped npm binary — skills now work in all distribution channels (npm, Homebrew, AUR, Docker) without relying on `postinstall` filesystem copies (#316)
|
|
13
|
+
- Exclude 220MB of unused `.node` binaries from dbt-tools bundle (#320)
|
|
14
|
+
- Documentation about warehouse connections updated (#318)
|
|
15
|
+
|
|
16
|
+
### Changed
|
|
17
|
+
|
|
18
|
+
- Added `altimate_change` markers to upstream-shared files and marker removal detection to CI — prevents markers from being silently stripped (#322)
|
|
19
|
+
|
|
20
|
+
## [0.5.2] - 2026-03-19
|
|
21
|
+
|
|
22
|
+
### Added
|
|
23
|
+
|
|
24
|
+
- Trace history dialog (`/trace` command) — browse, search, and open past session traces from the TUI (#297)
|
|
25
|
+
- Docs showcase examples with screenshots (#292)
|
|
26
|
+
|
|
27
|
+
### Fixed
|
|
28
|
+
|
|
29
|
+
- TUI trace dialog now respects custom `tracing.dir` config — previously always used default directory (#307)
|
|
30
|
+
- WebFetch `clearTimeout` leak — DNS failures no longer leak timer handles (#307)
|
|
31
|
+
- WebFetch User-Agent strategy inverted to honest-bot-first — reduces 403 blocks from TLS fingerprint mismatch (#303)
|
|
32
|
+
- Snowflake SDK stdout log noise suppressed in TUI via `additionalLogToConsole: false` (#305, #301)
|
|
33
|
+
- `cleanTitle` fallback in trace dialog no longer returns empty string (#307)
|
|
34
|
+
- Error logging added to `openTraceInBrowser` for debuggability (#307)
|
|
35
|
+
- `altimate_change` markers added to `webfetch.ts` for upstream merge compatibility (#307)
|
|
36
|
+
|
|
37
|
+
### Changed
|
|
38
|
+
|
|
39
|
+
- Snowflake SDK minimum version bumped to `^2.0.3` for log suppression support (#305)
|
|
40
|
+
- Removed brew from docs and README (#299)
|
|
41
|
+
- Fixed README typo (`altimate` → `altimate-code`) (#293)
|
|
42
|
+
|
|
8
43
|
## [0.5.1] - 2026-03-19
|
|
9
44
|
|
|
10
45
|
### Added
|
package/bin/altimate
CHANGED
|
@@ -33,6 +33,12 @@ function run(target) {
|
|
|
33
33
|
// Search from BOTH the binary's location AND the wrapper script's location
|
|
34
34
|
// to cover npm flat installs, pnpm isolated stores, and hoisted monorepos.
|
|
35
35
|
const env = { ...process.env }
|
|
36
|
+
|
|
37
|
+
// Export bin directory so the compiled binary can add it to PATH when
|
|
38
|
+
// spawning bash commands. This makes bundled tools (e.g. altimate-dbt)
|
|
39
|
+
// available to agents without manual PATH configuration.
|
|
40
|
+
env.ALTIMATE_BIN_DIR = scriptDir
|
|
41
|
+
|
|
36
42
|
try {
|
|
37
43
|
const resolvedTarget = fs.realpathSync(target)
|
|
38
44
|
const targetDir = path.dirname(path.dirname(resolvedTarget))
|
package/bin/altimate-code
CHANGED
|
@@ -33,6 +33,12 @@ function run(target) {
|
|
|
33
33
|
// Search from BOTH the binary's location AND the wrapper script's location
|
|
34
34
|
// to cover npm flat installs, pnpm isolated stores, and hoisted monorepos.
|
|
35
35
|
const env = { ...process.env }
|
|
36
|
+
|
|
37
|
+
// Export bin directory so the compiled binary can add it to PATH when
|
|
38
|
+
// spawning bash commands. This makes bundled tools (e.g. altimate-dbt)
|
|
39
|
+
// available to agents without manual PATH configuration.
|
|
40
|
+
env.ALTIMATE_BIN_DIR = scriptDir
|
|
41
|
+
|
|
36
42
|
try {
|
|
37
43
|
const resolvedTarget = fs.realpathSync(target)
|
|
38
44
|
const targetDir = path.dirname(path.dirname(resolvedTarget))
|
|
File without changes
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import sqlglot
|
|
2
|
+
from sqlglot.optimizer.qualify import qualify
|
|
3
|
+
from altimate.utils import map_adapter_to_dialect
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def fetch_schema(sql: str, dialect: str):
|
|
7
|
+
parsed_query = sqlglot.parse_one(sql=sql, dialect=map_adapter_to_dialect(dialect))
|
|
8
|
+
columns = []
|
|
9
|
+
for c in parsed_query.selects:
|
|
10
|
+
if c.key == "column":
|
|
11
|
+
if c.args["this"].key == "star":
|
|
12
|
+
raise Exception(
|
|
13
|
+
f"unable fetched schema due to star: {c.sql(pretty=True)}"
|
|
14
|
+
)
|
|
15
|
+
columns.append(c.alias_or_name)
|
|
16
|
+
elif c.key == "alias":
|
|
17
|
+
columns.append(c.alias_or_name)
|
|
18
|
+
else:
|
|
19
|
+
raise Exception(f"unknown key '{c.key}' detected for {c.sql(pretty=True)}")
|
|
20
|
+
return columns
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def validate_whether_sql_has_columns(sql: str, dialect: str):
|
|
24
|
+
try:
|
|
25
|
+
parsed_query = sqlglot.parse_one(sql=sql, dialect=map_adapter_to_dialect(dialect))
|
|
26
|
+
qualify(
|
|
27
|
+
parsed_query,
|
|
28
|
+
schema={},
|
|
29
|
+
dialect=dialect,
|
|
30
|
+
quote_identifiers=False,
|
|
31
|
+
validate_qualify_columns=True,
|
|
32
|
+
)
|
|
33
|
+
return True
|
|
34
|
+
except Exception as e:
|
|
35
|
+
return False
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
import sqlglot
|
|
4
|
+
from sqlglot.executor import execute
|
|
5
|
+
from sqlglot.expressions import Table
|
|
6
|
+
from sqlglot.optimizer import traverse_scope
|
|
7
|
+
from sqlglot.optimizer.qualify import qualify
|
|
8
|
+
|
|
9
|
+
ADAPTER_MAPPING = {
|
|
10
|
+
"bigquery": "bigquery",
|
|
11
|
+
"clickhouse": "clickhouse",
|
|
12
|
+
"databricks": "databricks",
|
|
13
|
+
"duckdb": "duckdb",
|
|
14
|
+
"hive": "hive",
|
|
15
|
+
"mysql": "mysql",
|
|
16
|
+
"oracle": "oracle",
|
|
17
|
+
"postgres": "postgres",
|
|
18
|
+
"redshift": "redshift",
|
|
19
|
+
"snowflake": "snowflake",
|
|
20
|
+
"spark": "spark",
|
|
21
|
+
"starrocks": "starrocks",
|
|
22
|
+
"teradata": "teradata",
|
|
23
|
+
"trino": "trino",
|
|
24
|
+
"synapse": "tsql",
|
|
25
|
+
"sqlserver": "tsql",
|
|
26
|
+
"doris": "doris",
|
|
27
|
+
"athena": "presto",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
MULTIPLE_OCCURENCES_STR = "Unable to highlight the exact location in the SQL code due to multiple occurrences."
|
|
31
|
+
MAPPING_FAILED_STR = "Unable to highlight the exact location in the SQL code."
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def extract_column_name(text):
|
|
35
|
+
# List of regex patterns
|
|
36
|
+
regex_patterns = [
|
|
37
|
+
r"Column '\"(\w+)\"' could not be resolved",
|
|
38
|
+
r"Unknown column: (\w+)",
|
|
39
|
+
r"Column '(\w+)' could not be resolved",
|
|
40
|
+
r"Unknown output column: (\w+)",
|
|
41
|
+
r"Cannot automatically join: (\w+)",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
# Iterate over each regex pattern
|
|
45
|
+
for regex in regex_patterns:
|
|
46
|
+
matches = re.findall(regex, text)
|
|
47
|
+
if matches:
|
|
48
|
+
return matches[0]
|
|
49
|
+
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def find_single_occurrence_indices(main_string, substring):
|
|
54
|
+
# Convert both strings to lowercase for case-insensitive comparison
|
|
55
|
+
main_string = main_string.lower()
|
|
56
|
+
substring = substring.lower() if substring else ""
|
|
57
|
+
|
|
58
|
+
if not substring:
|
|
59
|
+
# return consistent tuple when substring is empty
|
|
60
|
+
return None, None, 0
|
|
61
|
+
|
|
62
|
+
num_occurrences = main_string.count(substring)
|
|
63
|
+
# Check if the substring occurs only once in the main string
|
|
64
|
+
if num_occurrences == 1:
|
|
65
|
+
start_index = main_string.find(substring)
|
|
66
|
+
return start_index, start_index + len(substring), num_occurrences
|
|
67
|
+
|
|
68
|
+
# Return None if the substring doesn't occur exactly once
|
|
69
|
+
return None, None, num_occurrences
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def map_adapter_to_dialect(adapter: str):
|
|
73
|
+
return ADAPTER_MAPPING.get(adapter, adapter)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def get_str_position(str, row, col):
|
|
77
|
+
"""
|
|
78
|
+
Get the position of a grid position in a string
|
|
79
|
+
"""
|
|
80
|
+
lines = str.split("\n")
|
|
81
|
+
position = 0
|
|
82
|
+
for i in range(row - 1):
|
|
83
|
+
position += len(lines[i]) + 1
|
|
84
|
+
position += col
|
|
85
|
+
return position
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def get_line_and_column_from_position(text, start_index):
|
|
89
|
+
"""
|
|
90
|
+
Finds the grid position (row and column) in a multiline string given a Python start index.
|
|
91
|
+
Rows and columns are 1-indexed.
|
|
92
|
+
|
|
93
|
+
:param text: Multiline string.
|
|
94
|
+
:param start_index: Python start index (0-indexed).
|
|
95
|
+
:return: Tuple of (row, column).
|
|
96
|
+
"""
|
|
97
|
+
row = 0
|
|
98
|
+
current_length = 0
|
|
99
|
+
|
|
100
|
+
# Split the text into lines
|
|
101
|
+
lines = text.split("\n")
|
|
102
|
+
|
|
103
|
+
for line in lines:
|
|
104
|
+
# Check if the start_index is in the current line
|
|
105
|
+
if current_length + len(line) >= start_index:
|
|
106
|
+
# Column is the difference between start_index and the length of processed characters
|
|
107
|
+
column = start_index - current_length + 1
|
|
108
|
+
return row, column
|
|
109
|
+
|
|
110
|
+
# Update the row and current length for the next iteration
|
|
111
|
+
row += 1
|
|
112
|
+
current_length += len(line) + 1 # +1 for the newline character
|
|
113
|
+
|
|
114
|
+
return None, None
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _build_message(sql: str, error: dict):
|
|
118
|
+
len_highlight = len(error.get("highlight", ""))
|
|
119
|
+
len_prefix = len(error.get("start_context", ""))
|
|
120
|
+
if error.get("line") and error.get("col"):
|
|
121
|
+
end_position = get_str_position(sql, error["line"], error["col"])
|
|
122
|
+
start_position = end_position - len_highlight - len_prefix
|
|
123
|
+
row, col = get_line_and_column_from_position(sql, start_position)
|
|
124
|
+
return {
|
|
125
|
+
"description": "Failed to parse the sql query",
|
|
126
|
+
"start_position": [row, col],
|
|
127
|
+
"end_position": [error["line"], error["col"]],
|
|
128
|
+
}
|
|
129
|
+
return {"description": "Failed to parse the sql query"}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def sql_parse_errors(sql: str, dialect: str):
|
|
133
|
+
errors = []
|
|
134
|
+
try:
|
|
135
|
+
sqlglot.transpile(sql, read=dialect)
|
|
136
|
+
ast = sqlglot.parse_one(sql, read=dialect)
|
|
137
|
+
if isinstance(ast, sqlglot.exp.Alias):
|
|
138
|
+
return [
|
|
139
|
+
{
|
|
140
|
+
"description": "Failed to parse the sql query.",
|
|
141
|
+
}
|
|
142
|
+
]
|
|
143
|
+
except sqlglot.errors.ParseError as e:
|
|
144
|
+
for error in e.errors:
|
|
145
|
+
errors.append(_build_message(sql, error))
|
|
146
|
+
return errors
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def get_start_and_end_position(sql: str, invalid_string: str):
|
|
150
|
+
start, end, num_occurences = find_single_occurrence_indices(sql, invalid_string)
|
|
151
|
+
if start and end:
|
|
152
|
+
return (
|
|
153
|
+
list(get_line_and_column_from_position(sql, start)),
|
|
154
|
+
list(get_line_and_column_from_position(sql, end)),
|
|
155
|
+
num_occurences,
|
|
156
|
+
)
|
|
157
|
+
return None, None, num_occurences
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def form_error(
|
|
161
|
+
error: str, invalid_entity: str, start_position, end_position, num_occurences
|
|
162
|
+
):
|
|
163
|
+
if num_occurences > 1:
|
|
164
|
+
error = (
|
|
165
|
+
f"{error}\n {MULTIPLE_OCCURENCES_STR.format(invalid_entity=invalid_entity)}"
|
|
166
|
+
)
|
|
167
|
+
return {
|
|
168
|
+
"description": error,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if not start_position or not end_position:
|
|
172
|
+
error = (
|
|
173
|
+
f"{error}\n {MAPPING_FAILED_STR.format(invalid_entity=invalid_entity)}"
|
|
174
|
+
if invalid_entity
|
|
175
|
+
else error
|
|
176
|
+
)
|
|
177
|
+
return {
|
|
178
|
+
"description": error,
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
"description": error,
|
|
183
|
+
"start_position": start_position,
|
|
184
|
+
"end_position": end_position,
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def validate_tables_and_columns(
|
|
189
|
+
sql: str,
|
|
190
|
+
dialect: str,
|
|
191
|
+
schemas: dict,
|
|
192
|
+
):
|
|
193
|
+
try:
|
|
194
|
+
parsed_sql = sqlglot.parse_one(sql, read=dialect)
|
|
195
|
+
qualify(parsed_sql, dialect=dialect, schema=schemas)
|
|
196
|
+
except sqlglot.errors.OptimizeError as e:
|
|
197
|
+
error = str(e)
|
|
198
|
+
if "sqlglot" in error:
|
|
199
|
+
error = "Failed to validate the query."
|
|
200
|
+
invalid_entity = extract_column_name(error)
|
|
201
|
+
if not invalid_entity:
|
|
202
|
+
return [
|
|
203
|
+
{
|
|
204
|
+
"description": error,
|
|
205
|
+
}
|
|
206
|
+
]
|
|
207
|
+
start_position, end_position, num_occurences = get_start_and_end_position(
|
|
208
|
+
sql, invalid_entity
|
|
209
|
+
)
|
|
210
|
+
error = error if error[-1] == "." else error + "."
|
|
211
|
+
return [
|
|
212
|
+
form_error(
|
|
213
|
+
error, invalid_entity, start_position, end_position, num_occurences
|
|
214
|
+
)
|
|
215
|
+
]
|
|
216
|
+
|
|
217
|
+
return None
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def sql_execute_errors(
|
|
221
|
+
sql: str,
|
|
222
|
+
dialect: str,
|
|
223
|
+
schemas: dict,
|
|
224
|
+
):
|
|
225
|
+
tables = {}
|
|
226
|
+
for db in schemas:
|
|
227
|
+
if db not in tables:
|
|
228
|
+
tables[db] = {}
|
|
229
|
+
for schema in schemas[db]:
|
|
230
|
+
if schema not in tables[db]:
|
|
231
|
+
tables[db][schema] = {}
|
|
232
|
+
for table in schemas[db][schema]:
|
|
233
|
+
tables[db][schema][table] = []
|
|
234
|
+
|
|
235
|
+
try:
|
|
236
|
+
execute(
|
|
237
|
+
sql=sql,
|
|
238
|
+
read=dialect,
|
|
239
|
+
schema=schemas,
|
|
240
|
+
tables=tables,
|
|
241
|
+
)
|
|
242
|
+
except sqlglot.errors.ExecuteError as e:
|
|
243
|
+
return [
|
|
244
|
+
{
|
|
245
|
+
"description": str(e),
|
|
246
|
+
}
|
|
247
|
+
]
|
|
248
|
+
return None
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def qualify_columns(expression):
|
|
252
|
+
"""
|
|
253
|
+
Qualify the columns in the given SQL expression.
|
|
254
|
+
"""
|
|
255
|
+
try:
|
|
256
|
+
return qualify(
|
|
257
|
+
expression,
|
|
258
|
+
qualify_columns=True,
|
|
259
|
+
isolate_tables=True,
|
|
260
|
+
validate_qualify_columns=False,
|
|
261
|
+
)
|
|
262
|
+
except sqlglot.errors.OptimizeError as error:
|
|
263
|
+
return expression
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def parse_sql_query(sql_query, dialect):
|
|
267
|
+
"""
|
|
268
|
+
Parses the SQL query and returns an AST.
|
|
269
|
+
"""
|
|
270
|
+
return sqlglot.parse_one(sql_query, read=dialect)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def extract_physical_columns(ast):
|
|
274
|
+
"""
|
|
275
|
+
Extracts physical columns from the given AST.
|
|
276
|
+
"""
|
|
277
|
+
physical_columns = {}
|
|
278
|
+
for scope in traverse_scope(ast):
|
|
279
|
+
for column in scope.columns:
|
|
280
|
+
table = scope.sources.get(column.table)
|
|
281
|
+
if isinstance(table, Table):
|
|
282
|
+
db, schema, table_name = table.catalog, table.db, table.name
|
|
283
|
+
if db is None or schema is None:
|
|
284
|
+
continue
|
|
285
|
+
path = f"{db}.{schema}.{table_name}".lower()
|
|
286
|
+
physical_columns.setdefault(path, set()).add(column.name)
|
|
287
|
+
return physical_columns
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def get_columns_used(sql_query, dialect):
|
|
291
|
+
"""
|
|
292
|
+
Process the SQL query to extract physical columns.
|
|
293
|
+
"""
|
|
294
|
+
ast = parse_sql_query(sql_query, dialect)
|
|
295
|
+
qualified_ast = qualify_columns(ast)
|
|
296
|
+
return extract_physical_columns(qualified_ast)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def validate_columns_present_in_schema(sql_query, dialect, schemas, model_mapping):
|
|
300
|
+
"""
|
|
301
|
+
Validate that the columns in the SQL query are present in the schema.
|
|
302
|
+
"""
|
|
303
|
+
errors = []
|
|
304
|
+
new_schemas = {}
|
|
305
|
+
for db in schemas:
|
|
306
|
+
for schema in schemas[db]:
|
|
307
|
+
for table in schemas[db][schema]:
|
|
308
|
+
path = f"{db}.{schema}.{table}".lower()
|
|
309
|
+
new_schemas.setdefault(path, set()).update(
|
|
310
|
+
[column.lower() for column in schemas[db][schema][table].keys()]
|
|
311
|
+
)
|
|
312
|
+
schemas = new_schemas
|
|
313
|
+
try:
|
|
314
|
+
columns_used = get_columns_used(sql_query, dialect)
|
|
315
|
+
|
|
316
|
+
for table, columns_set in columns_used.items():
|
|
317
|
+
if table not in schemas:
|
|
318
|
+
(
|
|
319
|
+
start_position,
|
|
320
|
+
end_position,
|
|
321
|
+
num_occurences,
|
|
322
|
+
) = get_start_and_end_position(sql_query, table)
|
|
323
|
+
error = f"Error: Table '{table}' not found. This issue often occurs when a table is used directly\n in dbt instead of being referenced through the appropriate syntax.\n To resolve this, ensure that '{table}' is propaerly defined in your project and use the 'ref()' function to reference it in your models."
|
|
324
|
+
|
|
325
|
+
errors.append(
|
|
326
|
+
form_error(
|
|
327
|
+
error, table, start_position, end_position, num_occurences
|
|
328
|
+
)
|
|
329
|
+
)
|
|
330
|
+
continue
|
|
331
|
+
|
|
332
|
+
columns = schemas[table]
|
|
333
|
+
for column in columns_set:
|
|
334
|
+
if column.lower() not in columns:
|
|
335
|
+
(
|
|
336
|
+
start_position,
|
|
337
|
+
end_position,
|
|
338
|
+
num_occurences,
|
|
339
|
+
) = get_start_and_end_position(sql_query, column)
|
|
340
|
+
table = model_mapping.get(table, table)
|
|
341
|
+
error = f"Error: Column '{column}' not found in '{table}'. \nPossible causes: 1) Typo in column name. 2) Column not materialized. 3) Column not selected in parent cte."
|
|
342
|
+
errors.append(
|
|
343
|
+
form_error(
|
|
344
|
+
error,
|
|
345
|
+
column,
|
|
346
|
+
start_position,
|
|
347
|
+
end_position,
|
|
348
|
+
num_occurences,
|
|
349
|
+
)
|
|
350
|
+
)
|
|
351
|
+
except Exception as e:
|
|
352
|
+
pass
|
|
353
|
+
return errors
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
|
|
3
|
+
from altimate.utils import (
|
|
4
|
+
map_adapter_to_dialect,
|
|
5
|
+
sql_execute_errors,
|
|
6
|
+
sql_parse_errors,
|
|
7
|
+
validate_columns_present_in_schema,
|
|
8
|
+
validate_tables_and_columns,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _get_key(
|
|
13
|
+
key: str,
|
|
14
|
+
dialect: str,
|
|
15
|
+
):
|
|
16
|
+
if dialect == "bigquery":
|
|
17
|
+
return key.lower()
|
|
18
|
+
|
|
19
|
+
if dialect == "snowflake":
|
|
20
|
+
return key.upper()
|
|
21
|
+
return key
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _build_schemas(
|
|
25
|
+
models: List[Dict],
|
|
26
|
+
dialect: str,
|
|
27
|
+
):
|
|
28
|
+
"""
|
|
29
|
+
TODO: Duplicated in multiple places with slight variations. Fix this.
|
|
30
|
+
"""
|
|
31
|
+
schemas = {}
|
|
32
|
+
for model in models:
|
|
33
|
+
schema = {}
|
|
34
|
+
for column in model["columns"]:
|
|
35
|
+
schema[_get_key(model["columns"][column]["name"], dialect)] = model[
|
|
36
|
+
"columns"
|
|
37
|
+
][column].get("data_type", "string")
|
|
38
|
+
|
|
39
|
+
db = _get_key(model["database"], dialect)
|
|
40
|
+
schema_name = _get_key(model["schema"], dialect)
|
|
41
|
+
table = _get_key(model["alias"], dialect)
|
|
42
|
+
if db not in schemas:
|
|
43
|
+
schemas[db] = {}
|
|
44
|
+
|
|
45
|
+
if schema_name not in schemas[db]:
|
|
46
|
+
schemas[db][schema_name] = {}
|
|
47
|
+
|
|
48
|
+
schemas[db][schema_name][table] = schema
|
|
49
|
+
|
|
50
|
+
return schemas
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _build_model_mapping(
|
|
54
|
+
models: List[Dict],
|
|
55
|
+
):
|
|
56
|
+
model_map = {}
|
|
57
|
+
for model in models:
|
|
58
|
+
db = model["database"]
|
|
59
|
+
schema = model["schema"]
|
|
60
|
+
table = model["alias"]
|
|
61
|
+
model_map[f"{db}.{schema}.{table}".lower()] = model["name"]
|
|
62
|
+
return model_map
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def validate_sql_from_models(
|
|
66
|
+
sql: str,
|
|
67
|
+
dialect: str,
|
|
68
|
+
models: List[Dict],
|
|
69
|
+
):
|
|
70
|
+
"""
|
|
71
|
+
Validate SQL from models
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
dialect = map_adapter_to_dialect(dialect)
|
|
75
|
+
schemas = _build_schemas(models, dialect)
|
|
76
|
+
model_mapping = _build_model_mapping(models)
|
|
77
|
+
errors = sql_parse_errors(sql, dialect)
|
|
78
|
+
|
|
79
|
+
if len(errors) > 0:
|
|
80
|
+
return {
|
|
81
|
+
"error_type": "sql_parse_error",
|
|
82
|
+
"errors": errors,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
errors = validate_columns_present_in_schema(
|
|
86
|
+
sql, dialect, schemas, model_mapping
|
|
87
|
+
)
|
|
88
|
+
if len(errors) > 0:
|
|
89
|
+
return {
|
|
90
|
+
"error_type": "sql_invalid_error",
|
|
91
|
+
"errors": errors,
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
errors = validate_tables_and_columns(sql, dialect, schemas)
|
|
95
|
+
|
|
96
|
+
if errors:
|
|
97
|
+
return {
|
|
98
|
+
"error_type": "sql_invalid_error",
|
|
99
|
+
"errors": errors,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
# errors = sql_execute_errors(sql, dialect, schemas)
|
|
103
|
+
|
|
104
|
+
# if errors:
|
|
105
|
+
# return {"error_type": "sql_execute_error", "errors": errors}
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
return {
|
|
109
|
+
"error_type": "sql_unknown_error",
|
|
110
|
+
"errors": [
|
|
111
|
+
{"description": f"Unknown error. Cannot validate SQL. {str(e)}"}
|
|
112
|
+
],
|
|
113
|
+
}
|
|
114
|
+
return {}
|