run-cache 2.3.2__tar.gz → 2.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {run_cache-2.3.2 → run_cache-2.4.0}/PKG-INFO +1 -1
- run_cache-2.4.0/src/dbt_run_cache/_version.py +1 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/adapters/__init__.py +2 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/adapters/base.py +23 -0
- run_cache-2.4.0/src/dbt_run_cache/adapters/redshift.py +438 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/dev_cloner.py +5 -2
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/run_cache.py +9 -2
- run_cache-2.3.2/src/dbt_run_cache/_version.py +0 -1
- {run_cache-2.3.2 → run_cache-2.4.0}/.gitignore +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/pyproject.toml +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/__init__.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/_typing.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/adapters/bigquery.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/adapters/clock.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/adapters/common.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/adapters/databricks.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/adapters/postgres.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/adapters/snowflake.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/auth/__init__.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/auth/grpc.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/auth/oauth_clients.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/auth/sso.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/auth/sso_server.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/auth/utils.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/cli/__init__.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/cli/auth.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/cli/explainer.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/cli/main.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/config.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/decision_logger.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/dispatcher.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/errors.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/events.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/git.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/grpc/__init__.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/grpc/client.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/grpc/interceptors.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/plugin.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/profiles.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/relation.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/runner.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/selector.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/session.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/system_info.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/utils.py +0 -0
- {run_cache-2.3.2 → run_cache-2.4.0}/src/dbt_run_cache/version.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.4.0"
|
|
@@ -8,6 +8,7 @@ from dbt_run_cache.adapters.bigquery import BigQueryAdapterExtension
|
|
|
8
8
|
from dbt_run_cache.adapters.postgres import PostgresAdapterExtension
|
|
9
9
|
from dbt_run_cache.adapters.snowflake import SnowflakeAdapterExtension
|
|
10
10
|
from dbt_run_cache.adapters.databricks import DatabricksAdapterExtension
|
|
11
|
+
from dbt_run_cache.adapters.redshift import RedshiftAdapterExtension
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
ADAPTER_EXTENSION_MAPPING = {
|
|
@@ -15,6 +16,7 @@ ADAPTER_EXTENSION_MAPPING = {
|
|
|
15
16
|
"bigquery": BigQueryAdapterExtension,
|
|
16
17
|
"snowflake": SnowflakeAdapterExtension,
|
|
17
18
|
"databricks": DatabricksAdapterExtension,
|
|
19
|
+
"redshift": RedshiftAdapterExtension,
|
|
18
20
|
}
|
|
19
21
|
|
|
20
22
|
|
|
@@ -52,6 +52,9 @@ class BaseAdapterExtension(abc.ABC):
|
|
|
52
52
|
"""Catalogs that should not have their last modified / view definition tracked"""
|
|
53
53
|
SYSTEM_METADATA_SCHEMAS: t.List[str] = ["information_schema"]
|
|
54
54
|
"""Schemas that should not have their last modified / view definition tracked"""
|
|
55
|
+
IMPLEMENTS_CUSTOM_CLONE: bool = False
|
|
56
|
+
"""When True, the adapter extension handles clone execution via clone() rather than
|
|
57
|
+
the caller executing the server-provided clone_sqls directly."""
|
|
55
58
|
|
|
56
59
|
CLONE_CHAIN_DEPTH_LIMIT: t.Optional[int] = None
|
|
57
60
|
"""How many clones of clones can be created before the database throws a "Cannot have more than N chained clones"-style error.
|
|
@@ -105,6 +108,26 @@ class BaseAdapterExtension(abc.ABC):
|
|
|
105
108
|
_, agate_table = self.adapter.execute(sql, fetch=fetch)
|
|
106
109
|
return agate_table
|
|
107
110
|
|
|
111
|
+
def clone(
|
|
112
|
+
self,
|
|
113
|
+
clone_sqls: t.Iterable[str],
|
|
114
|
+
clone_source: str,
|
|
115
|
+
clone_target: str,
|
|
116
|
+
) -> None:
|
|
117
|
+
"""Execute a dialect-specific clone of clone_source into clone_target.
|
|
118
|
+
|
|
119
|
+
Only called when IMPLEMENTS_CUSTOM_CLONE is True. Subclasses that set
|
|
120
|
+
IMPLEMENTS_CUSTOM_CLONE = True must override this method.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
clone_sqls: Server-provided fallback SQL (may be used if custom logic fails).
|
|
124
|
+
clone_source: Fully-qualified quoted source table name.
|
|
125
|
+
clone_target: Fully-qualified quoted target table name.
|
|
126
|
+
"""
|
|
127
|
+
raise NotImplementedError(
|
|
128
|
+
f"{type(self).__name__} sets IMPLEMENTS_CUSTOM_CLONE=True but does not implement clone()"
|
|
129
|
+
)
|
|
130
|
+
|
|
108
131
|
def current_timestamp_utc(self) -> datetime:
|
|
109
132
|
"""Get the current UTC time from the database."""
|
|
110
133
|
raise NotImplementedError(
|
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from concurrent.futures import Future
|
|
5
|
+
from dataclasses import replace
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from multiprocessing import get_context
|
|
8
|
+
import typing as t
|
|
9
|
+
from dbt.adapters.sql import SQLAdapter
|
|
10
|
+
from sqlglot import TokenType, tokenize
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
from sqlglot import exp
|
|
14
|
+
|
|
15
|
+
from dbt_run_cache import events
|
|
16
|
+
from dbt_run_cache.adapters.base import BaseAdapterExtension
|
|
17
|
+
from dbt_run_cache.adapters.common import (
|
|
18
|
+
build_information_schema_filter,
|
|
19
|
+
ViewDefinition,
|
|
20
|
+
group_tables_by_catalog,
|
|
21
|
+
)
|
|
22
|
+
from dbt_run_cache.utils import set_invocation_context
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RedshiftAdapterExtension(BaseAdapterExtension):
|
|
26
|
+
DEFAULT_SCHEMA_NAME = "public"
|
|
27
|
+
SHOULD_RELEASE_CONNECTION: bool = True
|
|
28
|
+
SYSTEM_METADATA_SCHEMAS: t.List[str] = ["information_schema", "pg_catalog"]
|
|
29
|
+
IMPLEMENTS_CUSTOM_CLONE: bool = True
|
|
30
|
+
|
|
31
|
+
_SYS_QUERY_DETAIL_LOOKBACK_MINUTES = 30
|
|
32
|
+
|
|
33
|
+
def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
|
|
34
|
+
super().__init__(*args, **kwargs)
|
|
35
|
+
self._catalog_adapters: t.Dict[str, SQLAdapter] = {}
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def supports_view_last_modified(self) -> bool:
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def supports_current_timestamp_utc(self) -> bool:
|
|
43
|
+
return True
|
|
44
|
+
|
|
45
|
+
def current_timestamp_utc(self) -> datetime:
|
|
46
|
+
return (
|
|
47
|
+
self.execute(
|
|
48
|
+
"SELECT (SYSDATE AT TIME ZONE CURRENT_SETTING('timezone')) AT TIME ZONE 'UTC'",
|
|
49
|
+
fetch=True,
|
|
50
|
+
)
|
|
51
|
+
.rows[0][0]
|
|
52
|
+
.replace(tzinfo=timezone.utc)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def rollback(self) -> None:
|
|
56
|
+
"""Roll back the current transaction for Redshift.
|
|
57
|
+
|
|
58
|
+
Redshift uses implicit transactions, so we need to rollback directly
|
|
59
|
+
on the connection handle rather than checking transaction_open flag.
|
|
60
|
+
"""
|
|
61
|
+
conn = self.adapter.connections.get_if_exists()
|
|
62
|
+
if conn is not None and conn.handle:
|
|
63
|
+
try:
|
|
64
|
+
conn.handle.rollback()
|
|
65
|
+
except Exception as e:
|
|
66
|
+
events.fire_debug_event("Failed to rollback Redshift connection: {}", str(e))
|
|
67
|
+
|
|
68
|
+
def clone(
|
|
69
|
+
self,
|
|
70
|
+
clone_sqls: t.Iterable[str],
|
|
71
|
+
clone_source: str,
|
|
72
|
+
clone_target: str,
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Clone clone_source into clone_target preserving PRIMARY KEY and FOREIGN KEY.
|
|
75
|
+
|
|
76
|
+
Uses SHOW TABLE to obtain the full DDL (including PK/FK constraints) then
|
|
77
|
+
executes DROP + CREATE + INSERT in a single transaction. Falls back to the
|
|
78
|
+
server-provided LIKE-based clone_sqls when SHOW TABLE is unavailable.
|
|
79
|
+
"""
|
|
80
|
+
raw_ddl = self._show_table_ddl(clone_source)
|
|
81
|
+
if raw_ddl is None:
|
|
82
|
+
events.fire_debug_event(
|
|
83
|
+
"SHOW TABLE unavailable for {}; falling back to server-provided clone SQL",
|
|
84
|
+
clone_source,
|
|
85
|
+
)
|
|
86
|
+
for sql in clone_sqls:
|
|
87
|
+
self.adapter.execute(sql)
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
target_ddl = self._substitute_clone_table_name(raw_ddl, clone_target)
|
|
91
|
+
for sql in [
|
|
92
|
+
f"DROP TABLE IF EXISTS {clone_target}",
|
|
93
|
+
target_ddl,
|
|
94
|
+
f"INSERT INTO {clone_target} SELECT * FROM {clone_source}",
|
|
95
|
+
]:
|
|
96
|
+
self.adapter.execute(sql)
|
|
97
|
+
|
|
98
|
+
def close(self) -> None:
|
|
99
|
+
for adapter in self._catalog_adapters.values():
|
|
100
|
+
adapter.cleanup_connections()
|
|
101
|
+
super().close()
|
|
102
|
+
|
|
103
|
+
def _show_table_ddl(self, table_fqn: str) -> t.Optional[str]:
|
|
104
|
+
"""Run SHOW TABLE and return the DDL string, or None if unavailable."""
|
|
105
|
+
try:
|
|
106
|
+
result = self.execute(f"SHOW TABLE {table_fqn}", fetch=True)
|
|
107
|
+
if not result.rows:
|
|
108
|
+
return None
|
|
109
|
+
return result.rows[0][0]
|
|
110
|
+
except Exception as exc:
|
|
111
|
+
events.fire_debug_event("SHOW TABLE failed for {}: {}", table_fqn, str(exc))
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
def _substitute_clone_table_name(self, ddl: str, clone_target: str) -> str:
|
|
115
|
+
"""Replace the table name in a SHOW TABLE DDL string with clone_target.
|
|
116
|
+
|
|
117
|
+
Tokenizes the DDL and walks the token stream:
|
|
118
|
+
CREATE → skip LOCAL/TEMPORARY/TEMP → expect TABLE → skip IF NOT EXISTS
|
|
119
|
+
→ record start of first name token, extend end across any DOT+identifier
|
|
120
|
+
pairs to cover 2- and 3-part names → splice clone_target into that span.
|
|
121
|
+
|
|
122
|
+
Accepts unquoted (VAR) and double-quoted (IDENTIFIER) name tokens.
|
|
123
|
+
Returns ddl unchanged if no CREATE TABLE header is found.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
_TABLE_MODIFIER_STRINGS = {"LOCAL", "TEMPORARY", "TEMP"}
|
|
127
|
+
_NAME_TYPES = {TokenType.VAR, TokenType.IDENTIFIER}
|
|
128
|
+
|
|
129
|
+
tokens = tokenize(ddl, dialect=self.dialect)
|
|
130
|
+
len_tokens = len(tokens)
|
|
131
|
+
i = 0
|
|
132
|
+
|
|
133
|
+
while i < len_tokens and tokens[i].token_type != TokenType.CREATE:
|
|
134
|
+
i += 1
|
|
135
|
+
if i >= len_tokens:
|
|
136
|
+
return ddl
|
|
137
|
+
i += 1
|
|
138
|
+
|
|
139
|
+
while i < len_tokens and tokens[i].text.upper() in _TABLE_MODIFIER_STRINGS:
|
|
140
|
+
i += 1
|
|
141
|
+
|
|
142
|
+
if i >= len_tokens or tokens[i].token_type != TokenType.TABLE:
|
|
143
|
+
return ddl
|
|
144
|
+
i += 1
|
|
145
|
+
|
|
146
|
+
if (
|
|
147
|
+
i + 2 < len_tokens
|
|
148
|
+
and tokens[i].text.upper() == "IF"
|
|
149
|
+
and tokens[i + 1].text.upper() == "NOT"
|
|
150
|
+
and tokens[i + 2].text.upper() == "EXISTS"
|
|
151
|
+
):
|
|
152
|
+
i += 3
|
|
153
|
+
|
|
154
|
+
if i >= len_tokens or tokens[i].token_type not in _NAME_TYPES:
|
|
155
|
+
return ddl
|
|
156
|
+
|
|
157
|
+
name_start = tokens[i].start
|
|
158
|
+
name_end = tokens[i].end
|
|
159
|
+
i += 1
|
|
160
|
+
|
|
161
|
+
while (
|
|
162
|
+
i + 1 < len_tokens
|
|
163
|
+
and tokens[i].token_type == TokenType.DOT
|
|
164
|
+
and tokens[i + 1].token_type in _NAME_TYPES
|
|
165
|
+
):
|
|
166
|
+
name_end = tokens[i + 1].end
|
|
167
|
+
i += 2
|
|
168
|
+
|
|
169
|
+
return ddl[:name_start] + clone_target + ddl[name_end + 1 :]
|
|
170
|
+
|
|
171
|
+
def prefetch_last_modified_epochs(self, table_fqns: t.Collection[str]) -> Future[None]:
|
|
172
|
+
"""Batch-prefetch last modified timestamps for the given FQNs into the cache.
|
|
173
|
+
|
|
174
|
+
Runs SHOW TABLES per schema and the sys_query_detail join in parallel background
|
|
175
|
+
threads. Results are merged (sys_query_detail overrides SHOW TABLES) then stored
|
|
176
|
+
in the cache, so subsequent get_last_modified_epoch calls are cache hits.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
table_fqns: Fully qualified, quoted table name strings to prefetch.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
A Future that completes when the cache has been populated.
|
|
183
|
+
"""
|
|
184
|
+
if not table_fqns:
|
|
185
|
+
return super().prefetch_last_modified_epochs(table_fqns)
|
|
186
|
+
|
|
187
|
+
claimed_fqns: t.List[str] = []
|
|
188
|
+
tables_by_schema: t.Dict[t.Tuple[str, str], t.List[exp.Table]] = defaultdict(list)
|
|
189
|
+
|
|
190
|
+
with self._last_modified_epoch_cache as cache:
|
|
191
|
+
for raw_fqn in table_fqns:
|
|
192
|
+
table = self._to_fqn(raw_fqn)
|
|
193
|
+
fqn = self._sql(table)
|
|
194
|
+
if cache.claim_if_available(fqn):
|
|
195
|
+
claimed_fqns.append(fqn)
|
|
196
|
+
tables_by_schema[(table.catalog, table.db)].append(table)
|
|
197
|
+
|
|
198
|
+
if not claimed_fqns:
|
|
199
|
+
return super().prefetch_last_modified_epochs(table_fqns)
|
|
200
|
+
|
|
201
|
+
tables_by_catalog: t.Dict[str, t.List[exp.Table]] = defaultdict(list)
|
|
202
|
+
for (catalog, _), tables in tables_by_schema.items():
|
|
203
|
+
tables_by_catalog[catalog].extend(tables)
|
|
204
|
+
|
|
205
|
+
def _fetch_show_tables_thread(
|
|
206
|
+
catalog: str, schema: str, tables: t.List[exp.Table]
|
|
207
|
+
) -> t.Dict[str, t.Optional[int]]:
|
|
208
|
+
set_invocation_context()
|
|
209
|
+
self._ensure_thread_connection("prefetch_last_modified_timestamps")
|
|
210
|
+
return self._query_show_tables_for_schema(catalog, schema, tables)
|
|
211
|
+
|
|
212
|
+
def _fetch_sys_query_detail_thread(
|
|
213
|
+
catalog: str, catalog_tables: t.List[exp.Table]
|
|
214
|
+
) -> t.Dict[str, t.Optional[int]]:
|
|
215
|
+
set_invocation_context()
|
|
216
|
+
|
|
217
|
+
catalog_adapter = self._get_or_create_catalog_adapter(catalog)
|
|
218
|
+
try:
|
|
219
|
+
with catalog_adapter.connection_named(catalog):
|
|
220
|
+
return self._query_sys_query_detail(catalog_tables, adapter=catalog_adapter)
|
|
221
|
+
finally:
|
|
222
|
+
self._thread_local.connection_acquired = False
|
|
223
|
+
|
|
224
|
+
show_futures = [
|
|
225
|
+
self._executor.submit(_fetch_show_tables_thread, catalog, schema, tables)
|
|
226
|
+
for (catalog, schema), tables in tables_by_schema.items()
|
|
227
|
+
]
|
|
228
|
+
sys_futures = [
|
|
229
|
+
self._executor.submit(_fetch_sys_query_detail_thread, catalog, catalog_tables)
|
|
230
|
+
for catalog, catalog_tables in tables_by_catalog.items()
|
|
231
|
+
]
|
|
232
|
+
|
|
233
|
+
def _wait_all() -> None:
|
|
234
|
+
merged: t.Dict[str, t.Optional[int]] = {}
|
|
235
|
+
for f in show_futures:
|
|
236
|
+
try:
|
|
237
|
+
merged.update(f.result())
|
|
238
|
+
except Exception as exc:
|
|
239
|
+
events.fire_warn_event_suboptimal(
|
|
240
|
+
"Failed to prefetch SHOW TABLES for Redshift: {}", str(exc)
|
|
241
|
+
)
|
|
242
|
+
for f in sys_futures:
|
|
243
|
+
try:
|
|
244
|
+
merged.update(f.result())
|
|
245
|
+
except Exception as exc:
|
|
246
|
+
events.fire_warn_event_suboptimal(
|
|
247
|
+
"Failed to prefetch sys_query_detail for Redshift: {}", str(exc)
|
|
248
|
+
)
|
|
249
|
+
for fqn in claimed_fqns:
|
|
250
|
+
if fqn not in merged:
|
|
251
|
+
merged[fqn] = None
|
|
252
|
+
with self._last_modified_epoch_cache as cache:
|
|
253
|
+
cache.fulfill_many(merged)
|
|
254
|
+
|
|
255
|
+
return self._executor.submit(_wait_all)
|
|
256
|
+
|
|
257
|
+
def _fetch_last_modified_epochs(
|
|
258
|
+
self, table_batch: t.Collection[exp.Table]
|
|
259
|
+
) -> dict[str, t.Optional[int]]:
|
|
260
|
+
if not table_batch:
|
|
261
|
+
return {}
|
|
262
|
+
|
|
263
|
+
table_fqns = [self._to_fqn(t) for t in table_batch]
|
|
264
|
+
last_modified_epochs: dict[str, t.Optional[int]] = {
|
|
265
|
+
self._sql(fqn): None for fqn in table_fqns
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
tables_by_schema: dict[tuple[str, str], list[exp.Table]] = defaultdict(list)
|
|
269
|
+
tables_by_catalog: dict[str, list[exp.Table]] = defaultdict(list)
|
|
270
|
+
for fqn in table_fqns:
|
|
271
|
+
tables_by_schema[(fqn.catalog, fqn.db)].append(fqn)
|
|
272
|
+
tables_by_catalog[fqn.catalog].append(fqn)
|
|
273
|
+
|
|
274
|
+
for (catalog, schema), schema_tables in tables_by_schema.items():
|
|
275
|
+
last_modified_epochs.update(
|
|
276
|
+
self._query_show_tables_for_schema(catalog, schema, schema_tables)
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
for catalog, catalog_tables in tables_by_catalog.items():
|
|
280
|
+
catalog_adapter = self._get_or_create_catalog_adapter(catalog)
|
|
281
|
+
with catalog_adapter.connection_named(catalog):
|
|
282
|
+
sys_results = self._query_sys_query_detail(catalog_tables, adapter=catalog_adapter)
|
|
283
|
+
|
|
284
|
+
for fqn_key, epoch in sys_results.items():
|
|
285
|
+
if fqn_key in last_modified_epochs:
|
|
286
|
+
last_modified_epochs[fqn_key] = epoch
|
|
287
|
+
|
|
288
|
+
return last_modified_epochs
|
|
289
|
+
|
|
290
|
+
def _query_show_tables_for_schema(
|
|
291
|
+
self,
|
|
292
|
+
catalog: str,
|
|
293
|
+
schema: str,
|
|
294
|
+
tables: t.Collection[exp.Table],
|
|
295
|
+
) -> t.Dict[str, t.Optional[int]]:
|
|
296
|
+
"""Run SHOW TABLES for one schema and return an FQN→epoch dict for the given tables."""
|
|
297
|
+
result: t.Dict[str, t.Optional[int]] = {}
|
|
298
|
+
table_names = {tbl.name.lower() for tbl in tables}
|
|
299
|
+
show_result = self.execute(f"SHOW TABLES FROM SCHEMA {catalog}.{schema}", fetch=True)
|
|
300
|
+
fallback_epoch = None
|
|
301
|
+
for row in show_result:
|
|
302
|
+
if row["table_name"].lower() not in table_names:
|
|
303
|
+
continue
|
|
304
|
+
fqn_key = self._build_fqn_from_row(
|
|
305
|
+
row["database_name"], row["schema_name"], row["table_name"]
|
|
306
|
+
)
|
|
307
|
+
candidates = [
|
|
308
|
+
ts for ts in (row["last_modified_time"], row["last_altered_time"]) if ts is not None
|
|
309
|
+
]
|
|
310
|
+
if candidates:
|
|
311
|
+
result[fqn_key] = int(
|
|
312
|
+
max(candidates).replace(tzinfo=timezone.utc).timestamp() * 1000
|
|
313
|
+
)
|
|
314
|
+
else:
|
|
315
|
+
# Both timestamps NULL: Redshift's metadata refresh populates these via
|
|
316
|
+
# a background process with ~20 min lag. _query_sys_query_detail normally
|
|
317
|
+
# supplements via an OID-based join on insert/delete steps, but Redshift's
|
|
318
|
+
# ALTER TABLE RENAME assigns a new OID, orphaning the INSERT step's
|
|
319
|
+
# table_id — so tables built via CREATE + INSERT + RENAME come back empty
|
|
320
|
+
# from both signals.
|
|
321
|
+
#
|
|
322
|
+
# The table exists (it's in SHOW TABLES); skipping it would make downstream
|
|
323
|
+
# logic treat it as nonexistent. Record a "now" sentinel; sys_query_detail
|
|
324
|
+
# still overrides this whenever its OID join does match (e.g. direct DML).
|
|
325
|
+
if fallback_epoch is None:
|
|
326
|
+
fallback_epoch = int(self.current_timestamp_utc().timestamp() * 1000)
|
|
327
|
+
result[fqn_key] = fallback_epoch
|
|
328
|
+
return result
|
|
329
|
+
|
|
330
|
+
def _query_sys_query_detail(
|
|
331
|
+
self, tables: t.Collection[exp.Table], adapter: SQLAdapter
|
|
332
|
+
) -> t.Dict[str, t.Optional[int]]:
|
|
333
|
+
"""Query sys_query_detail for recent writes and return an FQN→epoch dict."""
|
|
334
|
+
if not tables:
|
|
335
|
+
return {}
|
|
336
|
+
|
|
337
|
+
catalog = next(iter(tables)).catalog or self.default_catalog
|
|
338
|
+
schema_table_filter = " OR ".join(
|
|
339
|
+
f"(upper(ns.nspname) = upper('{tbl.db}') AND upper(c.relname) = upper('{tbl.name}'))"
|
|
340
|
+
for tbl in tables
|
|
341
|
+
)
|
|
342
|
+
sys_rows_sql = f"""
|
|
343
|
+
SELECT
|
|
344
|
+
ns.nspname AS schema_name,
|
|
345
|
+
c.relname AS table_name,
|
|
346
|
+
MAX(qd.end_time) AS last_updated
|
|
347
|
+
FROM pg_class c
|
|
348
|
+
JOIN pg_namespace ns ON ns.oid = c.relnamespace
|
|
349
|
+
JOIN sys_query_detail qd ON qd.table_id = c.oid
|
|
350
|
+
LEFT JOIN sys_query_history qh ON qh.query_id = qd.query_id
|
|
351
|
+
LEFT JOIN sys_transaction_history th ON th.transaction_id = qh.transaction_id
|
|
352
|
+
WHERE qd.step_name IN ('insert', 'delete')
|
|
353
|
+
AND th.status = 'committed'
|
|
354
|
+
AND qd.end_time >= DATEADD(minute, -{self._SYS_QUERY_DETAIL_LOOKBACK_MINUTES}, (SYSDATE at time zone CURRENT_SETTING('timezone')) at time zone 'UTC')
|
|
355
|
+
AND ({schema_table_filter})
|
|
356
|
+
GROUP BY 1, 2
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
_, agate_result = adapter.execute(sys_rows_sql, fetch=True)
|
|
360
|
+
sys_rows = agate_result.rows
|
|
361
|
+
|
|
362
|
+
result: t.Dict[str, t.Optional[int]] = {}
|
|
363
|
+
for schema_name, table_name, last_updated in sys_rows:
|
|
364
|
+
if last_updated is None:
|
|
365
|
+
continue
|
|
366
|
+
fqn_key = self._build_fqn_from_row(catalog, schema_name.strip(), table_name.strip())
|
|
367
|
+
result[fqn_key] = int(last_updated.replace(tzinfo=timezone.utc).timestamp() * 1000)
|
|
368
|
+
return result
|
|
369
|
+
|
|
370
|
+
def _fetch_view_definitions(
|
|
371
|
+
self, table_batch: t.Collection[exp.Table]
|
|
372
|
+
) -> t.Collection[ViewDefinition]:
|
|
373
|
+
if not table_batch:
|
|
374
|
+
return []
|
|
375
|
+
|
|
376
|
+
queries = []
|
|
377
|
+
# redshift supports cross-database queries, need to group/query by catalog
|
|
378
|
+
for catalog, tables in group_tables_by_catalog(table_batch, self.default_catalog).items():
|
|
379
|
+
filter_expr = build_information_schema_filter(tables, ("table_schema", "table_name"))
|
|
380
|
+
query = f"""
|
|
381
|
+
SELECT
|
|
382
|
+
table_catalog,
|
|
383
|
+
table_schema,
|
|
384
|
+
table_name,
|
|
385
|
+
view_definition
|
|
386
|
+
FROM {catalog}.information_schema.views
|
|
387
|
+
WHERE {self._sql(filter_expr)}
|
|
388
|
+
"""
|
|
389
|
+
|
|
390
|
+
queries.append(query)
|
|
391
|
+
|
|
392
|
+
query = "UNION ALL\n".join(queries)
|
|
393
|
+
|
|
394
|
+
view_definitions = []
|
|
395
|
+
result_rows = self.execute(query, fetch=True).rows
|
|
396
|
+
|
|
397
|
+
for catalog, schema, name, view_definition in result_rows:
|
|
398
|
+
fqn = self._build_fqn_from_row(catalog, schema, name)
|
|
399
|
+
view_definitions.append(
|
|
400
|
+
ViewDefinition(
|
|
401
|
+
fqn=fqn,
|
|
402
|
+
definition=view_definition,
|
|
403
|
+
dialect=self.dialect,
|
|
404
|
+
default_catalog=catalog,
|
|
405
|
+
default_schema=schema,
|
|
406
|
+
)
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
return view_definitions
|
|
410
|
+
|
|
411
|
+
def cache_view_definition(self, table: exp.Table, definition: str, default_schema: str) -> None:
|
|
412
|
+
# Redshift uses early-binding views by default: at CREATE VIEW time it
|
|
413
|
+
# fully resolves the view body and stores the resolved form. Reading the
|
|
414
|
+
# view back from information_schema.views therefore returns the resolved
|
|
415
|
+
# SQL, not the original — e.g. `select id from upstream as up` becomes
|
|
416
|
+
# `SELECT up.id FROM upstream up`.
|
|
417
|
+
# https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_VIEW.html
|
|
418
|
+
|
|
419
|
+
# dbt's compiled_code is the unresolved form. If we cache compiled_code
|
|
420
|
+
# in run 1 and fall through to information_schema.views in run 2 (because
|
|
421
|
+
# the view isn't in the selected set), the two runs produce different
|
|
422
|
+
# `stable_sql` for the view dependency, which breaks candidate matching on
|
|
423
|
+
# the server.
|
|
424
|
+
|
|
425
|
+
# Skip the cache write so every run sources view definitions from the
|
|
426
|
+
# warehouse via `_fetch_view_definitions`, ensuring identical
|
|
427
|
+
# canonicalization across runs.
|
|
428
|
+
return
|
|
429
|
+
|
|
430
|
+
def _get_or_create_catalog_adapter(self, catalog: str) -> SQLAdapter:
|
|
431
|
+
if catalog.lower() == self.default_catalog.lower():
|
|
432
|
+
return self.adapter
|
|
433
|
+
|
|
434
|
+
if catalog not in self._catalog_adapters:
|
|
435
|
+
creds = self.adapter.config.credentials.replace(database=catalog)
|
|
436
|
+
config = replace(self.adapter.config, credentials=creds)
|
|
437
|
+
self._catalog_adapters[catalog] = type(self.adapter)(config, get_context("spawn"))
|
|
438
|
+
return self._catalog_adapters[catalog]
|
|
@@ -100,8 +100,11 @@ class DevCloner:
|
|
|
100
100
|
clone_target: str,
|
|
101
101
|
) -> None:
|
|
102
102
|
with events.downgrade_adapter_error_events():
|
|
103
|
-
|
|
104
|
-
|
|
103
|
+
if self._adapter_ext.IMPLEMENTS_CUSTOM_CLONE:
|
|
104
|
+
self._adapter_ext.clone(clone_sqls, clone_source, clone_target)
|
|
105
|
+
else:
|
|
106
|
+
for sql in clone_sqls:
|
|
107
|
+
adapter.execute(sql)
|
|
105
108
|
|
|
106
109
|
# Add the created relation to the cache
|
|
107
110
|
self._adapter_ext.cache_node_relation(node)
|
|
@@ -656,8 +656,15 @@ class RunCache:
|
|
|
656
656
|
should_run_hooks = False
|
|
657
657
|
return NoRunResult(query_cache_response.request_id, failed_to_clone=True)
|
|
658
658
|
with events.downgrade_adapter_error_events():
|
|
659
|
-
|
|
660
|
-
self.
|
|
659
|
+
if self._adapter_ext.IMPLEMENTS_CUSTOM_CLONE:
|
|
660
|
+
self._adapter_ext.clone(
|
|
661
|
+
query_cache_response.clone_sqls,
|
|
662
|
+
query_cache_response.clone_source,
|
|
663
|
+
query_cache_response.clone_target,
|
|
664
|
+
)
|
|
665
|
+
else:
|
|
666
|
+
for sql in query_cache_response.clone_sqls:
|
|
667
|
+
self._adapter.execute(sql)
|
|
661
668
|
self._commit_if_open()
|
|
662
669
|
# when we clone a table, we put it in the relation cache so that the remainder of the
|
|
663
670
|
# invocation knows it exists without hitting the db
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "2.3.2"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|