fal 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fal might be problematic. Click here for more details.
- _fal_testing/utils.py +2 -2
- dbt/adapters/fal/__init__.py +21 -0
- dbt/adapters/fal/__version__.py +1 -0
- dbt/adapters/fal/connections.py +18 -0
- dbt/adapters/fal/impl.py +93 -0
- dbt/adapters/fal/load_db_profile.py +80 -0
- dbt/adapters/fal/wrappers.py +113 -0
- dbt/adapters/fal_experimental/__init__.py +11 -0
- dbt/adapters/fal_experimental/__version__.py +1 -0
- dbt/adapters/fal_experimental/adapter.py +149 -0
- dbt/adapters/fal_experimental/adapter_support.py +234 -0
- dbt/adapters/fal_experimental/connections.py +72 -0
- dbt/adapters/fal_experimental/impl.py +240 -0
- dbt/adapters/fal_experimental/support/athena.py +92 -0
- dbt/adapters/fal_experimental/support/bigquery.py +74 -0
- dbt/adapters/fal_experimental/support/duckdb.py +28 -0
- dbt/adapters/fal_experimental/support/postgres.py +88 -0
- dbt/adapters/fal_experimental/support/redshift.py +56 -0
- dbt/adapters/fal_experimental/support/snowflake.py +76 -0
- dbt/adapters/fal_experimental/support/trino.py +26 -0
- dbt/adapters/fal_experimental/telemetry/__init__.py +1 -0
- dbt/adapters/fal_experimental/telemetry/telemetry.py +411 -0
- dbt/adapters/fal_experimental/teleport.py +192 -0
- dbt/adapters/fal_experimental/teleport_adapter_support.py +23 -0
- dbt/adapters/fal_experimental/teleport_support/duckdb.py +122 -0
- dbt/adapters/fal_experimental/teleport_support/snowflake.py +72 -0
- dbt/adapters/fal_experimental/utils/__init__.py +50 -0
- dbt/adapters/fal_experimental/utils/environments.py +302 -0
- dbt/fal/adapters/python/__init__.py +3 -0
- dbt/fal/adapters/python/connections.py +319 -0
- dbt/fal/adapters/python/impl.py +291 -0
- dbt/fal/adapters/teleport/__init__.py +3 -0
- dbt/fal/adapters/teleport/impl.py +103 -0
- dbt/fal/adapters/teleport/info.py +73 -0
- dbt/include/fal/__init__.py +3 -0
- dbt/include/fal/dbt_project.yml +5 -0
- dbt/include/fal/macros/materializations/table.sql +46 -0
- dbt/include/fal/macros/teleport_duckdb.sql +8 -0
- dbt/include/fal/macros/teleport_snowflake.sql +31 -0
- dbt/include/fal_experimental/__init__.py +3 -0
- dbt/include/fal_experimental/dbt_project.yml +5 -0
- dbt/include/fal_experimental/macros/materializations/table.sql +36 -0
- fal/__init__.py +61 -11
- fal/dbt/__init__.py +11 -0
- fal/dbt/cli/__init__.py +1 -0
- fal/{cli → dbt/cli}/args.py +7 -2
- fal/{cli → dbt/cli}/cli.py +18 -3
- fal/{cli → dbt/cli}/dbt_runner.py +1 -1
- fal/{cli → dbt/cli}/fal_runner.py +6 -6
- fal/{cli → dbt/cli}/flow_runner.py +9 -9
- fal/{cli → dbt/cli}/model_generator/model_generator.py +5 -5
- fal/{cli → dbt/cli}/selectors.py +2 -2
- fal/{fal_script.py → dbt/fal_script.py} +4 -4
- {faldbt → fal/dbt/integration}/lib.py +2 -2
- {faldbt → fal/dbt/integration}/magics.py +2 -2
- {faldbt → fal/dbt/integration}/parse.py +7 -7
- {faldbt → fal/dbt/integration}/project.py +7 -7
- fal/dbt/integration/utils/yaml_helper.py +80 -0
- fal/dbt/new/project.py +43 -0
- fal/{node_graph.py → dbt/node_graph.py} +2 -2
- fal/{packages → dbt/packages}/dependency_analysis.py +32 -38
- fal/{packages → dbt/packages}/environments/__init__.py +3 -3
- fal/{packages → dbt/packages}/environments/base.py +2 -2
- fal/{packages → dbt/packages}/environments/conda.py +3 -3
- fal/{packages → dbt/packages}/environments/virtual_env.py +3 -3
- fal/{packages → dbt/packages}/isolated_runner.py +5 -5
- fal/{planner → dbt/planner}/executor.py +4 -4
- fal/{planner → dbt/planner}/plan.py +3 -3
- fal/{planner → dbt/planner}/schedule.py +5 -5
- fal/{planner → dbt/planner}/tasks.py +5 -5
- fal/{telemetry → dbt/telemetry}/telemetry.py +4 -4
- fal/{typing.py → dbt/typing.py} +2 -2
- fal/{utils.py → dbt/utils.py} +2 -2
- {fal-0.9.2.dist-info → fal-0.9.4.dist-info}/METADATA +98 -117
- fal-0.9.4.dist-info/RECORD +91 -0
- fal-0.9.4.dist-info/entry_points.txt +4 -0
- fal/cli/__init__.py +0 -1
- fal-0.9.2.dist-info/RECORD +0 -47
- fal-0.9.2.dist-info/entry_points.txt +0 -3
- {faldbt → dbt/adapters/fal_experimental}/utils/yaml_helper.py +0 -0
- /fal/{cli → dbt/cli}/model_generator/__init__.py +0 -0
- /fal/{cli → dbt/cli}/model_generator/module_check.py +0 -0
- /fal/{feature_store → dbt/feature_store}/__init__.py +0 -0
- /fal/{feature_store → dbt/feature_store}/feature.py +0 -0
- /fal/{packages → dbt/integration}/__init__.py +0 -0
- {faldbt → fal/dbt/integration}/logger.py +0 -0
- /fal/{planner → dbt/integration/utils}/__init__.py +0 -0
- {faldbt → fal/dbt/integration}/version.py +0 -0
- /fal/{telemetry → dbt/packages}/__init__.py +0 -0
- /fal/{packages → dbt/packages}/bridge.py +0 -0
- {faldbt → fal/dbt/planner}/__init__.py +0 -0
- {faldbt/utils → fal/dbt/telemetry}/__init__.py +0 -0
- {fal-0.9.2.dist-info → fal-0.9.4.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import sqlalchemy
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from dbt.adapters.base import BaseAdapter, BaseRelation, RelationType
|
|
8
|
+
from dbt.adapters.base.connections import AdapterResponse, Connection
|
|
9
|
+
from dbt.config import RuntimeConfig
|
|
10
|
+
from dbt.parser.manifest import MacroManifest, Manifest
|
|
11
|
+
from dbt.flags import Namespace
|
|
12
|
+
|
|
13
|
+
from dbt.adapters import factory
|
|
14
|
+
|
|
15
|
+
_SQLALCHEMY_DIALECTS = {
|
|
16
|
+
"sqlserver": "mssql+pyodbc",
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _get_alchemy_engine(adapter: BaseAdapter, connection: Connection) -> Any:
|
|
21
|
+
# The following code heavily depends on the implementation
|
|
22
|
+
# details of the known adapters, hence it can't work for
|
|
23
|
+
# arbitrary ones.
|
|
24
|
+
adapter_type = adapter.type()
|
|
25
|
+
|
|
26
|
+
sqlalchemy_kwargs = {}
|
|
27
|
+
format_url = lambda url: url
|
|
28
|
+
|
|
29
|
+
if adapter_type == "trino":
|
|
30
|
+
import dbt.adapters.fal_experimental.support.trino as support_trino
|
|
31
|
+
|
|
32
|
+
return support_trino.create_engine(adapter)
|
|
33
|
+
|
|
34
|
+
elif adapter_type == "sqlserver":
|
|
35
|
+
sqlalchemy_kwargs["creator"] = lambda *args, **kwargs: connection.handle
|
|
36
|
+
url = _SQLALCHEMY_DIALECTS.get(adapter_type, adapter_type) + "://"
|
|
37
|
+
url = format_url(url)
|
|
38
|
+
else:
|
|
39
|
+
message = (
|
|
40
|
+
f"dbt-fal does not support {adapter_type} adapter. ",
|
|
41
|
+
f"If you need {adapter_type} support, you can create an issue ",
|
|
42
|
+
"in our GitHub repository: https://github.com/fal-ai/fal. ",
|
|
43
|
+
"We will look into it ASAP.",
|
|
44
|
+
)
|
|
45
|
+
raise NotImplementedError(message)
|
|
46
|
+
|
|
47
|
+
return sqlalchemy.create_engine(url, **sqlalchemy_kwargs)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def drop_relation_if_it_exists(adapter: BaseAdapter, relation: BaseRelation) -> None:
|
|
51
|
+
if adapter.get_relation(
|
|
52
|
+
database=relation.database,
|
|
53
|
+
schema=relation.schema,
|
|
54
|
+
identifier=relation.identifier,
|
|
55
|
+
):
|
|
56
|
+
adapter.drop_relation(relation)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def write_df_to_relation(
|
|
60
|
+
adapter: BaseAdapter,
|
|
61
|
+
relation: BaseRelation,
|
|
62
|
+
dataframe: pd.DataFrame,
|
|
63
|
+
*,
|
|
64
|
+
if_exists: str = "replace",
|
|
65
|
+
) -> AdapterResponse:
|
|
66
|
+
"""Generic version of the write_df_to_relation. Materialize the given
|
|
67
|
+
dataframe to the targeted relation on the adapter."""
|
|
68
|
+
|
|
69
|
+
adapter_type = adapter.type()
|
|
70
|
+
|
|
71
|
+
if adapter_type == "snowflake":
|
|
72
|
+
import dbt.adapters.fal_experimental.support.snowflake as support_snowflake
|
|
73
|
+
|
|
74
|
+
return support_snowflake.write_df_to_relation(adapter, dataframe, relation)
|
|
75
|
+
|
|
76
|
+
elif adapter_type == "bigquery":
|
|
77
|
+
import dbt.adapters.fal_experimental.support.bigquery as support_bq
|
|
78
|
+
|
|
79
|
+
return support_bq.write_df_to_relation(adapter, dataframe, relation)
|
|
80
|
+
|
|
81
|
+
elif adapter_type == "duckdb":
|
|
82
|
+
import dbt.adapters.fal_experimental.support.duckdb as support_duckdb
|
|
83
|
+
|
|
84
|
+
return support_duckdb.write_df_to_relation(adapter, dataframe, relation)
|
|
85
|
+
|
|
86
|
+
elif adapter_type == "postgres":
|
|
87
|
+
import dbt.adapters.fal_experimental.support.postgres as support_postgres
|
|
88
|
+
|
|
89
|
+
return support_postgres.write_df_to_relation(adapter, dataframe, relation)
|
|
90
|
+
elif adapter.type() == "athena":
|
|
91
|
+
import dbt.adapters.fal_experimental.support.athena as support_athena
|
|
92
|
+
|
|
93
|
+
return support_athena.write_df_to_relation(
|
|
94
|
+
adapter, dataframe, relation, if_exists
|
|
95
|
+
)
|
|
96
|
+
elif adapter_type == "redshift":
|
|
97
|
+
import dbt.adapters.fal_experimental.support.redshift as support_redshift
|
|
98
|
+
|
|
99
|
+
return support_redshift.write_df_to_relation(adapter, dataframe, relation)
|
|
100
|
+
|
|
101
|
+
else:
|
|
102
|
+
with new_connection(adapter, "fal:write_df_to_relation") as connection:
|
|
103
|
+
# TODO: this should probably live in the materialization macro.
|
|
104
|
+
temp_relation = relation.replace_path(
|
|
105
|
+
identifier=f"__dbt_fal_temp_{relation.identifier}"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
drop_relation_if_it_exists(adapter, temp_relation)
|
|
109
|
+
|
|
110
|
+
alchemy_engine = _get_alchemy_engine(adapter, connection)
|
|
111
|
+
|
|
112
|
+
# TODO: probably worth handling errors here an returning
|
|
113
|
+
# a proper adapter response.
|
|
114
|
+
rows_affected = dataframe.to_sql(
|
|
115
|
+
con=alchemy_engine,
|
|
116
|
+
name=temp_relation.identifier,
|
|
117
|
+
schema=temp_relation.schema,
|
|
118
|
+
if_exists=if_exists,
|
|
119
|
+
index=False,
|
|
120
|
+
)
|
|
121
|
+
adapter.cache.add(temp_relation)
|
|
122
|
+
drop_relation_if_it_exists(adapter, relation)
|
|
123
|
+
|
|
124
|
+
adapter.rename_relation(temp_relation, relation)
|
|
125
|
+
adapter.commit_if_has_connection()
|
|
126
|
+
|
|
127
|
+
return AdapterResponse("OK", rows_affected=rows_affected)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def read_relation_as_df(adapter: BaseAdapter, relation: BaseRelation) -> pd.DataFrame:
|
|
131
|
+
"""Generic version of the read_df_from_relation."""
|
|
132
|
+
|
|
133
|
+
adapter_type = adapter.type()
|
|
134
|
+
|
|
135
|
+
if adapter_type == "snowflake":
|
|
136
|
+
import dbt.adapters.fal_experimental.support.snowflake as support_snowflake
|
|
137
|
+
|
|
138
|
+
return support_snowflake.read_relation_as_df(adapter, relation)
|
|
139
|
+
|
|
140
|
+
elif adapter_type == "bigquery":
|
|
141
|
+
import dbt.adapters.fal_experimental.support.bigquery as support_bq
|
|
142
|
+
|
|
143
|
+
return support_bq.read_relation_as_df(adapter, relation)
|
|
144
|
+
|
|
145
|
+
elif adapter_type == "duckdb":
|
|
146
|
+
import dbt.adapters.fal_experimental.support.duckdb as support_duckdb
|
|
147
|
+
|
|
148
|
+
return support_duckdb.read_relation_as_df(adapter, relation)
|
|
149
|
+
|
|
150
|
+
elif adapter_type == "postgres":
|
|
151
|
+
import dbt.adapters.fal_experimental.support.postgres as support_postgres
|
|
152
|
+
|
|
153
|
+
return support_postgres.read_relation_as_df(adapter, relation)
|
|
154
|
+
|
|
155
|
+
elif adapter.type() == "athena":
|
|
156
|
+
import dbt.adapters.fal_experimental.support.athena as support_athena
|
|
157
|
+
|
|
158
|
+
return support_athena.read_relation_as_df(adapter, relation)
|
|
159
|
+
|
|
160
|
+
elif adapter.type() == "redshift":
|
|
161
|
+
import dbt.adapters.fal_experimental.support.redshift as support_redshift
|
|
162
|
+
|
|
163
|
+
return support_redshift.read_relation_as_df(adapter, relation)
|
|
164
|
+
|
|
165
|
+
else:
|
|
166
|
+
with new_connection(adapter, "fal:read_relation_as_df") as connection:
|
|
167
|
+
alchemy_engine = _get_alchemy_engine(adapter, connection)
|
|
168
|
+
|
|
169
|
+
return pd.read_sql_table(
|
|
170
|
+
con=alchemy_engine,
|
|
171
|
+
table_name=relation.identifier,
|
|
172
|
+
schema=relation.schema,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def prepare_for_adapter(adapter: BaseAdapter, function: Any) -> Any:
|
|
177
|
+
"""Prepare the given function to be used with string-like inputs
|
|
178
|
+
(for relations) on the given adapter."""
|
|
179
|
+
|
|
180
|
+
@functools.wraps(function)
|
|
181
|
+
def wrapped(quoted_relation: str, *args, **kwargs) -> Any:
|
|
182
|
+
# HACK: we need to drop the quotes from the relation parts
|
|
183
|
+
# This was introduced in https://github.com/dbt-labs/dbt-core/pull/7115
|
|
184
|
+
# and the recommended solution would be to create a macro `fal__resolve_model_name`
|
|
185
|
+
# but it is not possible thanks a macro resolution error we get by returning the db_adapter type.
|
|
186
|
+
# The overall solution could be to avoid creating a Relation and just passing the string as is to the read/write functions.
|
|
187
|
+
parts = map(
|
|
188
|
+
lambda part: part.strip(adapter.Relation.quote_character),
|
|
189
|
+
[*quoted_relation.split(".")],
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
relation = adapter.Relation.create(*parts, type=RelationType.Table)
|
|
193
|
+
return function(adapter, relation, *args, **kwargs)
|
|
194
|
+
|
|
195
|
+
return wrapped
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def reconstruct_adapter(
|
|
199
|
+
flags: Namespace,
|
|
200
|
+
config: RuntimeConfig,
|
|
201
|
+
manifest: Manifest,
|
|
202
|
+
macro_manifest: MacroManifest,
|
|
203
|
+
) -> BaseAdapter:
|
|
204
|
+
from dbt.flags import set_flags
|
|
205
|
+
from dbt.tracking import do_not_track
|
|
206
|
+
|
|
207
|
+
# Avoid dbt tracking
|
|
208
|
+
do_not_track()
|
|
209
|
+
|
|
210
|
+
# Flags need to be set before any plugin is loaded
|
|
211
|
+
set_flags(flags)
|
|
212
|
+
|
|
213
|
+
# Prepare the plugin loading system to handle the adapter
|
|
214
|
+
factory.load_plugin(config.credentials.type)
|
|
215
|
+
factory.load_plugin(config.python_adapter_credentials.type)
|
|
216
|
+
factory.register_adapter(config)
|
|
217
|
+
|
|
218
|
+
# Initialize the adapter
|
|
219
|
+
db_adapter = factory.get_adapter(config)
|
|
220
|
+
db_adapter._macro_manifest_lazy = macro_manifest
|
|
221
|
+
reload_adapter_cache(db_adapter, manifest)
|
|
222
|
+
|
|
223
|
+
return db_adapter
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def reload_adapter_cache(adapter: BaseAdapter, manifest: Manifest) -> None:
|
|
227
|
+
with new_connection(adapter, "fal:reload_adapter_cache"):
|
|
228
|
+
adapter.set_relations_cache(manifest, True)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
@contextmanager
|
|
232
|
+
def new_connection(adapter: BaseAdapter, connection_name: str) -> Connection:
|
|
233
|
+
with adapter.connection_named(connection_name):
|
|
234
|
+
yield adapter.connections.get_thread_connection()
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from dbt.adapters.base import Credentials
|
|
6
|
+
from dbt.dataclass_schema import StrEnum, ExtensibleDbtClassMixin
|
|
7
|
+
|
|
8
|
+
from dbt.fal.adapters.python import PythonConnectionManager
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
DEFAULT_HOSTS = {
|
|
12
|
+
"cloud": "api.alpha.fal.ai",
|
|
13
|
+
"cloud-eu": "api.eu.fal.ai",
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TeleportTypeEnum(StrEnum):
|
|
18
|
+
LOCAL = "local"
|
|
19
|
+
REMOTE_S3 = "s3"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class TeleportCredentials(ExtensibleDbtClassMixin):
|
|
24
|
+
type: TeleportTypeEnum
|
|
25
|
+
|
|
26
|
+
# local
|
|
27
|
+
local_path: Optional[str] = os.getcwd()
|
|
28
|
+
|
|
29
|
+
# s3
|
|
30
|
+
s3_bucket: Optional[str] = None
|
|
31
|
+
s3_region: Optional[str] = None
|
|
32
|
+
s3_access_key_id: Optional[str] = None
|
|
33
|
+
s3_access_key: Optional[str] = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class FalConnectionManager(PythonConnectionManager):
|
|
37
|
+
TYPE = "fal_experimental"
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def open(cls, connection):
|
|
41
|
+
raise NotImplementedError
|
|
42
|
+
|
|
43
|
+
def execute(self, compiled_code: str):
|
|
44
|
+
raise NotImplementedError
|
|
45
|
+
|
|
46
|
+
def cancel(self, connection):
|
|
47
|
+
raise NotImplementedError
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class FalCredentials(Credentials):
|
|
52
|
+
default_environment: str = "local"
|
|
53
|
+
teleport: Optional[TeleportCredentials] = None
|
|
54
|
+
host: str = ""
|
|
55
|
+
key_secret: str = ""
|
|
56
|
+
key_id: str = ""
|
|
57
|
+
|
|
58
|
+
# NOTE: So we are allowed to not set them in profiles.yml
|
|
59
|
+
# they are ignored for now
|
|
60
|
+
database: str = ""
|
|
61
|
+
schema: str = ""
|
|
62
|
+
|
|
63
|
+
def __post_init__(self):
|
|
64
|
+
if self.host in list(DEFAULT_HOSTS.keys()):
|
|
65
|
+
self.host = DEFAULT_HOSTS[self.host]
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def type(self):
|
|
69
|
+
return "fal_experimental"
|
|
70
|
+
|
|
71
|
+
def _connection_keys(self):
|
|
72
|
+
return ()
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
from typing import Iterator
|
|
5
|
+
|
|
6
|
+
from dbt.adapters.base.impl import BaseAdapter
|
|
7
|
+
from dbt.adapters.base.meta import AdapterMeta, available
|
|
8
|
+
from dbt.adapters.base.relation import BaseRelation
|
|
9
|
+
from dbt.contracts.connection import AdapterResponse
|
|
10
|
+
|
|
11
|
+
from dbt.fal.adapters.teleport.info import (
|
|
12
|
+
TeleportInfo,
|
|
13
|
+
S3TeleportInfo,
|
|
14
|
+
LocalTeleportInfo,
|
|
15
|
+
)
|
|
16
|
+
from dbt.fal.adapters.teleport.impl import TeleportAdapter
|
|
17
|
+
from dbt.fal.adapters.python.impl import PythonAdapter
|
|
18
|
+
from dbt.parser.manifest import MacroManifest, Manifest, ManifestLoader
|
|
19
|
+
|
|
20
|
+
from . import telemetry
|
|
21
|
+
|
|
22
|
+
from .connections import FalConnectionManager, FalCredentials, TeleportTypeEnum
|
|
23
|
+
|
|
24
|
+
from .teleport_adapter_support import wrap_db_adapter
|
|
25
|
+
from .teleport import DataLocation, run_in_environment_with_teleport, run_with_teleport
|
|
26
|
+
|
|
27
|
+
from .adapter_support import reload_adapter_cache
|
|
28
|
+
from .adapter import run_in_environment_with_adapter, run_with_adapter
|
|
29
|
+
|
|
30
|
+
from .utils.environments import fetch_environment, db_adapter_config
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class FalAdapterMixin(TeleportAdapter, metaclass=AdapterMeta):
|
|
34
|
+
ConnectionManager = FalConnectionManager
|
|
35
|
+
|
|
36
|
+
def __init__(self, config, db_adapter: BaseAdapter):
|
|
37
|
+
self.config = config
|
|
38
|
+
self._db_adapter = db_adapter
|
|
39
|
+
|
|
40
|
+
self._relation_data_location_cache: DataLocation = DataLocation({})
|
|
41
|
+
if self.is_teleport():
|
|
42
|
+
self._wrapper = wrap_db_adapter(self._db_adapter, self.credentials.teleport)
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def type(cls):
|
|
46
|
+
return "fal_experimental"
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def storage_formats(cls):
|
|
50
|
+
return ["csv", "parquet"]
|
|
51
|
+
|
|
52
|
+
@available
|
|
53
|
+
def is_teleport(self) -> bool:
|
|
54
|
+
return getattr(self.credentials, "teleport", None) is not None
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def manifest(self) -> Manifest:
|
|
58
|
+
return ManifestLoader.get_full_manifest(self.config)
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def macro_manifest(self) -> MacroManifest:
|
|
62
|
+
return self._db_adapter.load_macro_manifest()
|
|
63
|
+
|
|
64
|
+
@telemetry.log_call("experimental_submit_python_job", config=True)
|
|
65
|
+
def submit_python_job(
|
|
66
|
+
self, parsed_model: dict, compiled_code: str
|
|
67
|
+
) -> AdapterResponse:
|
|
68
|
+
"""Execute the given `compiled_code` in the target environment."""
|
|
69
|
+
config_dict = parsed_model["config"]
|
|
70
|
+
|
|
71
|
+
environment_name = config_dict.get(
|
|
72
|
+
"fal_environment",
|
|
73
|
+
self.credentials.default_environment,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
machine_type = config_dict.get(
|
|
77
|
+
"fal_machine",
|
|
78
|
+
"S",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
environment, is_local = fetch_environment(
|
|
82
|
+
self.config.project_root,
|
|
83
|
+
environment_name,
|
|
84
|
+
machine_type,
|
|
85
|
+
self.credentials
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
telemetry.log_api(
|
|
89
|
+
"experimental_submit_python_job_config",
|
|
90
|
+
config=self.config,
|
|
91
|
+
additional_props={
|
|
92
|
+
"is_teleport": self.is_teleport(),
|
|
93
|
+
"environment_is_local": is_local,
|
|
94
|
+
},
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
if self.is_teleport():
|
|
98
|
+
# We need to build teleport_info because we read from the external storage,
|
|
99
|
+
# we did not _localize_ the data in `teleport_from_external_storage`
|
|
100
|
+
teleport_info = self._build_teleport_info()
|
|
101
|
+
if is_local:
|
|
102
|
+
result_table_path = run_with_teleport(
|
|
103
|
+
code=compiled_code,
|
|
104
|
+
teleport_info=teleport_info,
|
|
105
|
+
locations=self._relation_data_location_cache,
|
|
106
|
+
config=db_adapter_config(self.config)
|
|
107
|
+
)
|
|
108
|
+
else:
|
|
109
|
+
result_table_path = run_in_environment_with_teleport(
|
|
110
|
+
environment,
|
|
111
|
+
compiled_code,
|
|
112
|
+
teleport_info=teleport_info,
|
|
113
|
+
locations=self._relation_data_location_cache,
|
|
114
|
+
config=db_adapter_config(self.config),
|
|
115
|
+
adapter_type=self._db_adapter.type()
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
relation = self._db_adapter.Relation.create(
|
|
119
|
+
parsed_model["database"], parsed_model["schema"], parsed_model["alias"]
|
|
120
|
+
)
|
|
121
|
+
self._sync_result_table(relation)
|
|
122
|
+
|
|
123
|
+
return AdapterResponse("OK")
|
|
124
|
+
|
|
125
|
+
else:
|
|
126
|
+
if is_local:
|
|
127
|
+
return run_with_adapter(compiled_code, self._db_adapter, self.config)
|
|
128
|
+
|
|
129
|
+
with self._invalidate_db_cache():
|
|
130
|
+
return run_in_environment_with_adapter(
|
|
131
|
+
environment,
|
|
132
|
+
compiled_code,
|
|
133
|
+
db_adapter_config(self.config),
|
|
134
|
+
self.manifest,
|
|
135
|
+
self.macro_manifest,
|
|
136
|
+
self._db_adapter.type()
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
@contextmanager
|
|
140
|
+
def _invalidate_db_cache(self) -> Iterator[None]:
|
|
141
|
+
try:
|
|
142
|
+
yield
|
|
143
|
+
finally:
|
|
144
|
+
# Since executed Python code might alter the database
|
|
145
|
+
# layout, we need to regenerate the relations cache
|
|
146
|
+
# after every time we execute a Python model.
|
|
147
|
+
#
|
|
148
|
+
# TODO: maybe propagate a list of tuples with the changes
|
|
149
|
+
# from the Python runner, so that we can tell the cache
|
|
150
|
+
# manager about what is going on instead of hard-resetting
|
|
151
|
+
# the cache-db.
|
|
152
|
+
reload_adapter_cache(self._db_adapter, self.manifest)
|
|
153
|
+
|
|
154
|
+
@property
|
|
155
|
+
def credentials(self):
|
|
156
|
+
python_creds: FalCredentials = self.config.python_adapter_credentials
|
|
157
|
+
# dbt-fal is not configured as a Python adapter,
|
|
158
|
+
# maybe we should raise an error?
|
|
159
|
+
assert python_creds is not None
|
|
160
|
+
return python_creds
|
|
161
|
+
|
|
162
|
+
def teleport_from_external_storage(
|
|
163
|
+
self, relation: BaseRelation, relation_path: str, teleport_info: TeleportInfo
|
|
164
|
+
):
|
|
165
|
+
"""
|
|
166
|
+
Store the teleport urls for later use
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
rel_name = teleport_info.relation_name(relation)
|
|
170
|
+
self._relation_data_location_cache[rel_name] = relation_path
|
|
171
|
+
|
|
172
|
+
def teleport_to_external_storage(
|
|
173
|
+
self, relation: BaseRelation, teleport_info: TeleportInfo
|
|
174
|
+
):
|
|
175
|
+
# Already in external_storage, we do not have local storage
|
|
176
|
+
# Just return the path
|
|
177
|
+
return teleport_info.build_relation_path(relation)
|
|
178
|
+
|
|
179
|
+
# TODO: cache this?
|
|
180
|
+
def _build_teleport_info(self):
|
|
181
|
+
teleport_creds = self.credentials.teleport
|
|
182
|
+
assert teleport_creds
|
|
183
|
+
|
|
184
|
+
teleport_format = TeleportAdapter.find_format(self, self._wrapper)
|
|
185
|
+
|
|
186
|
+
if teleport_creds.type == TeleportTypeEnum.LOCAL:
|
|
187
|
+
assert teleport_creds.local_path
|
|
188
|
+
return LocalTeleportInfo(
|
|
189
|
+
teleport_format, teleport_creds, teleport_creds.local_path
|
|
190
|
+
)
|
|
191
|
+
elif teleport_creds.type == TeleportTypeEnum.REMOTE_S3:
|
|
192
|
+
assert teleport_creds.s3_bucket
|
|
193
|
+
return S3TeleportInfo(
|
|
194
|
+
teleport_format, teleport_creds, teleport_creds.s3_bucket, "teleport"
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
raise NotImplementedError(
|
|
198
|
+
f"Teleport credentials of type {teleport_creds.type} not supported"
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
######
|
|
202
|
+
# HACK: Following implementations only necessary until dbt-core adds Teleport.
|
|
203
|
+
#####
|
|
204
|
+
@available
|
|
205
|
+
def sync_teleport_relation(self, relation: BaseRelation):
|
|
206
|
+
"""
|
|
207
|
+
Internal implementation of sync to avoid dbt-core changes
|
|
208
|
+
"""
|
|
209
|
+
teleport_info = self._build_teleport_info()
|
|
210
|
+
data_path = self._wrapper.teleport_to_external_storage(relation, teleport_info)
|
|
211
|
+
self.teleport_from_external_storage(relation, data_path, teleport_info)
|
|
212
|
+
|
|
213
|
+
def _sync_result_table(self, relation: BaseRelation):
|
|
214
|
+
"""
|
|
215
|
+
Internal implementation of sync to put data back into datawarehouse.
|
|
216
|
+
This is necessary because Teleport is not part of dbt-core.
|
|
217
|
+
Once it is and adapters implement it, we will sync the result table back.
|
|
218
|
+
Instead the other adapter will call `sync_teleport` and it will automatically call
|
|
219
|
+
FalAdapter's `teleport_to_external_storage` and the adapter's `teleport_from_external_storage`.
|
|
220
|
+
"""
|
|
221
|
+
teleport_info = self._build_teleport_info()
|
|
222
|
+
data_path = self.teleport_to_external_storage(relation, teleport_info)
|
|
223
|
+
self._wrapper.teleport_from_external_storage(relation, data_path, teleport_info)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class FalAdapter(FalAdapterMixin, PythonAdapter):
|
|
227
|
+
def __init__(self, config):
|
|
228
|
+
PythonAdapter.__init__(self, config)
|
|
229
|
+
FalAdapterMixin.__init__(self, config, self._db_adapter)
|
|
230
|
+
|
|
231
|
+
telemetry.log_api(
|
|
232
|
+
"experimental_init",
|
|
233
|
+
config=config,
|
|
234
|
+
additional_props={"is_teleport": self.is_teleport()},
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
@classmethod
|
|
238
|
+
def is_cancelable(cls) -> bool:
|
|
239
|
+
# TODO: maybe it is?
|
|
240
|
+
return False
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
import six
|
|
3
|
+
from dbt.adapters.base.relation import BaseRelation
|
|
4
|
+
from dbt.contracts.connection import AdapterResponse
|
|
5
|
+
from dbt.adapters.fal.wrappers import FalCredentialsWrapper
|
|
6
|
+
import sqlalchemy
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from dbt.adapters.base import BaseAdapter
|
|
9
|
+
from urllib.parse import quote_plus
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_engine(adapter: BaseAdapter) -> Any:
|
|
13
|
+
if isinstance(adapter.config.credentials, FalCredentialsWrapper):
|
|
14
|
+
creds = adapter.config.credentials._db_creds
|
|
15
|
+
else:
|
|
16
|
+
# In isolated environment, credentials are AthenaCredentials
|
|
17
|
+
creds = adapter.config.credentials
|
|
18
|
+
conn_str = ("awsathena+rest://:@athena.{region_name}.amazonaws.com:443/"
|
|
19
|
+
"{schema_name}?s3_staging_dir={s3_staging_dir}"
|
|
20
|
+
"&location={location}&compression=snappy")
|
|
21
|
+
return sqlalchemy.create_engine(conn_str.format(
|
|
22
|
+
region_name=creds.region_name,
|
|
23
|
+
schema_name=creds.schema,
|
|
24
|
+
s3_staging_dir=quote_plus(creds.s3_staging_dir),
|
|
25
|
+
location=quote_plus(creds.s3_staging_dir)))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def drop_relation_if_it_exists(adapter: BaseAdapter, relation: BaseRelation) -> None:
|
|
29
|
+
if adapter.get_relation(
|
|
30
|
+
database=relation.database,
|
|
31
|
+
schema=relation.schema,
|
|
32
|
+
identifier=relation.identifier,
|
|
33
|
+
):
|
|
34
|
+
adapter.drop_relation(relation)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def write_df_to_relation(adapter, dataframe, relation, if_exists) -> AdapterResponse:
|
|
38
|
+
|
|
39
|
+
assert adapter.type() == "athena"
|
|
40
|
+
if isinstance(adapter.config.credentials, FalCredentialsWrapper):
|
|
41
|
+
creds = adapter.config.credentials._db_creds
|
|
42
|
+
else:
|
|
43
|
+
# In isolated environment, credentials are AthenaCredentials
|
|
44
|
+
creds = adapter.config.credentials
|
|
45
|
+
|
|
46
|
+
# This is a quirk of dbt-athena-community, where they set
|
|
47
|
+
# relation.schema = relation.identifier
|
|
48
|
+
temp_relation = relation.replace_path(
|
|
49
|
+
schema=relation.database,
|
|
50
|
+
database=creds.database,
|
|
51
|
+
# athena complanes when table location has x.__y
|
|
52
|
+
identifier=f"dbt_fal_temp_{relation.schema}"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
relation = temp_relation.replace_path(identifier=relation.schema)
|
|
56
|
+
|
|
57
|
+
drop_relation_if_it_exists(adapter, temp_relation)
|
|
58
|
+
|
|
59
|
+
alchemy_engine = create_engine(adapter)
|
|
60
|
+
|
|
61
|
+
rows_affected = dataframe.to_sql(
|
|
62
|
+
con=alchemy_engine,
|
|
63
|
+
name=temp_relation.identifier,
|
|
64
|
+
schema=temp_relation.schema,
|
|
65
|
+
if_exists=if_exists,
|
|
66
|
+
index=False,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
adapter.cache.add(temp_relation)
|
|
70
|
+
|
|
71
|
+
drop_relation_if_it_exists(adapter, relation)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# athena doesn't let us rename relations, so we do it by hand
|
|
75
|
+
stmt = f"create table {relation} as select * from {temp_relation} with data"
|
|
76
|
+
adapter.execute(six.text_type(stmt).strip())
|
|
77
|
+
adapter.cache.add(relation)
|
|
78
|
+
adapter.drop_relation(temp_relation)
|
|
79
|
+
|
|
80
|
+
adapter.commit_if_has_connection()
|
|
81
|
+
return AdapterResponse("OK", rows_affected=rows_affected)
|
|
82
|
+
|
|
83
|
+
def read_relation_as_df(adapter: BaseAdapter, relation: BaseRelation) -> pd.DataFrame:
|
|
84
|
+
alchemy_engine = create_engine(adapter)
|
|
85
|
+
|
|
86
|
+
# This is dbt-athena-community quirk, table_name=relation.schema
|
|
87
|
+
|
|
88
|
+
return pd.read_sql_table(
|
|
89
|
+
con=alchemy_engine,
|
|
90
|
+
table_name=relation.schema,
|
|
91
|
+
schema=relation.database,
|
|
92
|
+
)
|