fal 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fal might be problematic. Click here for more details.
- _fal_testing/utils.py +2 -2
- dbt/adapters/fal/__init__.py +21 -0
- dbt/adapters/fal/__version__.py +1 -0
- dbt/adapters/fal/connections.py +18 -0
- dbt/adapters/fal/impl.py +93 -0
- dbt/adapters/fal/load_db_profile.py +80 -0
- dbt/adapters/fal/wrappers.py +113 -0
- dbt/adapters/fal_experimental/__init__.py +11 -0
- dbt/adapters/fal_experimental/__version__.py +1 -0
- dbt/adapters/fal_experimental/adapter.py +149 -0
- dbt/adapters/fal_experimental/adapter_support.py +234 -0
- dbt/adapters/fal_experimental/connections.py +72 -0
- dbt/adapters/fal_experimental/impl.py +240 -0
- dbt/adapters/fal_experimental/support/athena.py +92 -0
- dbt/adapters/fal_experimental/support/bigquery.py +74 -0
- dbt/adapters/fal_experimental/support/duckdb.py +28 -0
- dbt/adapters/fal_experimental/support/postgres.py +88 -0
- dbt/adapters/fal_experimental/support/redshift.py +56 -0
- dbt/adapters/fal_experimental/support/snowflake.py +76 -0
- dbt/adapters/fal_experimental/support/trino.py +26 -0
- dbt/adapters/fal_experimental/telemetry/__init__.py +1 -0
- dbt/adapters/fal_experimental/telemetry/telemetry.py +411 -0
- dbt/adapters/fal_experimental/teleport.py +192 -0
- dbt/adapters/fal_experimental/teleport_adapter_support.py +23 -0
- dbt/adapters/fal_experimental/teleport_support/duckdb.py +122 -0
- dbt/adapters/fal_experimental/teleport_support/snowflake.py +72 -0
- dbt/adapters/fal_experimental/utils/__init__.py +50 -0
- dbt/adapters/fal_experimental/utils/environments.py +302 -0
- dbt/fal/adapters/python/__init__.py +3 -0
- dbt/fal/adapters/python/connections.py +319 -0
- dbt/fal/adapters/python/impl.py +291 -0
- dbt/fal/adapters/teleport/__init__.py +3 -0
- dbt/fal/adapters/teleport/impl.py +103 -0
- dbt/fal/adapters/teleport/info.py +73 -0
- dbt/include/fal/__init__.py +3 -0
- dbt/include/fal/dbt_project.yml +5 -0
- dbt/include/fal/macros/materializations/table.sql +46 -0
- dbt/include/fal/macros/teleport_duckdb.sql +8 -0
- dbt/include/fal/macros/teleport_snowflake.sql +31 -0
- dbt/include/fal_experimental/__init__.py +3 -0
- dbt/include/fal_experimental/dbt_project.yml +5 -0
- dbt/include/fal_experimental/macros/materializations/table.sql +36 -0
- fal/__init__.py +61 -11
- fal/dbt/__init__.py +11 -0
- fal/dbt/cli/__init__.py +1 -0
- fal/{cli → dbt/cli}/args.py +7 -2
- fal/{cli → dbt/cli}/cli.py +18 -3
- fal/{cli → dbt/cli}/dbt_runner.py +1 -1
- fal/{cli → dbt/cli}/fal_runner.py +6 -6
- fal/{cli → dbt/cli}/flow_runner.py +9 -9
- fal/{cli → dbt/cli}/model_generator/model_generator.py +5 -5
- fal/{cli → dbt/cli}/selectors.py +2 -2
- fal/{fal_script.py → dbt/fal_script.py} +4 -4
- {faldbt → fal/dbt/integration}/lib.py +2 -2
- {faldbt → fal/dbt/integration}/magics.py +2 -2
- {faldbt → fal/dbt/integration}/parse.py +7 -7
- {faldbt → fal/dbt/integration}/project.py +7 -7
- fal/dbt/integration/utils/yaml_helper.py +80 -0
- fal/dbt/new/project.py +43 -0
- fal/{node_graph.py → dbt/node_graph.py} +2 -2
- fal/{packages → dbt/packages}/dependency_analysis.py +32 -38
- fal/{packages → dbt/packages}/environments/__init__.py +3 -3
- fal/{packages → dbt/packages}/environments/base.py +2 -2
- fal/{packages → dbt/packages}/environments/conda.py +3 -3
- fal/{packages → dbt/packages}/environments/virtual_env.py +3 -3
- fal/{packages → dbt/packages}/isolated_runner.py +5 -5
- fal/{planner → dbt/planner}/executor.py +4 -4
- fal/{planner → dbt/planner}/plan.py +3 -3
- fal/{planner → dbt/planner}/schedule.py +5 -5
- fal/{planner → dbt/planner}/tasks.py +5 -5
- fal/{telemetry → dbt/telemetry}/telemetry.py +4 -4
- fal/{typing.py → dbt/typing.py} +2 -2
- fal/{utils.py → dbt/utils.py} +2 -2
- {fal-0.9.2.dist-info → fal-0.9.4.dist-info}/METADATA +98 -117
- fal-0.9.4.dist-info/RECORD +91 -0
- fal-0.9.4.dist-info/entry_points.txt +4 -0
- fal/cli/__init__.py +0 -1
- fal-0.9.2.dist-info/RECORD +0 -47
- fal-0.9.2.dist-info/entry_points.txt +0 -3
- {faldbt → dbt/adapters/fal_experimental}/utils/yaml_helper.py +0 -0
- /fal/{cli → dbt/cli}/model_generator/__init__.py +0 -0
- /fal/{cli → dbt/cli}/model_generator/module_check.py +0 -0
- /fal/{feature_store → dbt/feature_store}/__init__.py +0 -0
- /fal/{feature_store → dbt/feature_store}/feature.py +0 -0
- /fal/{packages → dbt/integration}/__init__.py +0 -0
- {faldbt → fal/dbt/integration}/logger.py +0 -0
- /fal/{planner → dbt/integration/utils}/__init__.py +0 -0
- {faldbt → fal/dbt/integration}/version.py +0 -0
- /fal/{telemetry → dbt/packages}/__init__.py +0 -0
- /fal/{packages → dbt/packages}/bridge.py +0 -0
- {faldbt → fal/dbt/planner}/__init__.py +0 -0
- {faldbt/utils → fal/dbt/telemetry}/__init__.py +0 -0
- {fal-0.9.2.dist-info → fal-0.9.4.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
"""
|
|
2
|
+
dbt-fal telemetry code uses source code from: https://github.com/ploomber/ploomber
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import datetime
|
|
6
|
+
import http.client as httplib
|
|
7
|
+
import warnings
|
|
8
|
+
import posthog
|
|
9
|
+
import pkg_resources
|
|
10
|
+
import yaml
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import sys
|
|
14
|
+
import uuid
|
|
15
|
+
from functools import wraps
|
|
16
|
+
from typing import Any, List, Optional
|
|
17
|
+
import inspect
|
|
18
|
+
from dbt.config.runtime import RuntimeConfig
|
|
19
|
+
|
|
20
|
+
import platform
|
|
21
|
+
|
|
22
|
+
import atexit
|
|
23
|
+
|
|
24
|
+
TELEMETRY_VERSION = "0.0.1"
|
|
25
|
+
DEFAULT_HOME_DIR = "~/.fal"
|
|
26
|
+
CONF_DIR = "stats"
|
|
27
|
+
FAL_HOME_DIR = os.getenv("FAL_HOME_DIR", DEFAULT_HOME_DIR)
|
|
28
|
+
|
|
29
|
+
posthog.project_api_key = "phc_Yf1tsGPPb4POvqVjelT3rPPv2c3FH91zYURyyL30Phy"
|
|
30
|
+
|
|
31
|
+
invocation_id = uuid.uuid4()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def shutdown():
|
|
35
|
+
posthog.shutdown()
|
|
36
|
+
# HACK: while https://github.com/PostHog/posthog-python/pull/52 happens
|
|
37
|
+
from posthog.request import _session as posthog_session
|
|
38
|
+
|
|
39
|
+
posthog_session.close()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
atexit.register(shutdown)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def str_param(item: Any) -> str:
|
|
46
|
+
if not isinstance(item, str):
|
|
47
|
+
raise TypeError(f"Variable not supported/wrong type: {item} should be a str")
|
|
48
|
+
return item
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def opt_str_param(item: Any) -> Optional[str]:
|
|
52
|
+
if item is None:
|
|
53
|
+
return item
|
|
54
|
+
return str_param(item)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def python_version():
|
|
58
|
+
py_version = sys.version_info
|
|
59
|
+
return f"{py_version.major}.{py_version.minor}.{py_version.micro}"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def is_online():
|
|
63
|
+
"""Check if host is online"""
|
|
64
|
+
conn = httplib.HTTPSConnection("www.google.com", timeout=1)
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
conn.request("HEAD", "/")
|
|
68
|
+
return True
|
|
69
|
+
except Exception:
|
|
70
|
+
return False
|
|
71
|
+
finally:
|
|
72
|
+
conn.close()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# Will output if the code is within a container
|
|
76
|
+
def is_docker():
|
|
77
|
+
try:
|
|
78
|
+
cgroup = Path("/proc/self/cgroup")
|
|
79
|
+
docker_env = Path("/.dockerenv")
|
|
80
|
+
return (
|
|
81
|
+
docker_env.exists()
|
|
82
|
+
or cgroup.exists()
|
|
83
|
+
and any("docker" in line for line in cgroup.read_text().splitlines())
|
|
84
|
+
)
|
|
85
|
+
except OSError:
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def is_github():
|
|
90
|
+
"""Return True if inside a GitHub Action"""
|
|
91
|
+
return os.getenv("GITHUB_ACTIONS") is not None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def is_gitlab():
|
|
95
|
+
"""Return True if inside a GitLab CI"""
|
|
96
|
+
return os.getenv("GITLAB_CI") is not None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_os():
|
|
100
|
+
"""
|
|
101
|
+
The function will output the client platform
|
|
102
|
+
"""
|
|
103
|
+
return platform.system()
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def dbt_installed_version():
|
|
107
|
+
"""Returns: dbt version"""
|
|
108
|
+
try:
|
|
109
|
+
return pkg_resources.get_distribution("dbt-core").version
|
|
110
|
+
except ImportError:
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_dbt_adapter_type(config: RuntimeConfig) -> str:
|
|
115
|
+
"""Returns: the configured actual DBT adapter"""
|
|
116
|
+
target = config.to_target_dict()
|
|
117
|
+
return target["type"]
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def fal_installed_version():
|
|
121
|
+
"""Returns: fal version"""
|
|
122
|
+
try:
|
|
123
|
+
return pkg_resources.get_distribution("dbt-fal").version
|
|
124
|
+
except ImportError:
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def is_airflow():
|
|
129
|
+
"""Returns: True for Airflow env"""
|
|
130
|
+
return "AIRFLOW_CONFIG" in os.environ or "AIRFLOW_HOME" in os.environ
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def get_home_dir():
|
|
134
|
+
"""
|
|
135
|
+
Checks if fal home was set through the env variable.
|
|
136
|
+
returns the actual home_dir path.
|
|
137
|
+
"""
|
|
138
|
+
return FAL_HOME_DIR if FAL_HOME_DIR else DEFAULT_HOME_DIR
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def check_dir_exist(input_location=None):
|
|
142
|
+
"""
|
|
143
|
+
Checks if a specific directory exists, creates if not.
|
|
144
|
+
In case the user didn't set a custom dir, will turn to the default home
|
|
145
|
+
"""
|
|
146
|
+
home_dir = get_home_dir()
|
|
147
|
+
|
|
148
|
+
if input_location:
|
|
149
|
+
p = Path(home_dir, input_location)
|
|
150
|
+
else:
|
|
151
|
+
p = Path(home_dir)
|
|
152
|
+
|
|
153
|
+
p = p.expanduser()
|
|
154
|
+
|
|
155
|
+
if not p.exists():
|
|
156
|
+
p.mkdir(parents=True)
|
|
157
|
+
|
|
158
|
+
return p
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def check_uid():
|
|
162
|
+
"""
|
|
163
|
+
Checks if local user id exists as a uid file, creates if not.
|
|
164
|
+
"""
|
|
165
|
+
uid_path = Path(check_dir_exist(CONF_DIR), "uid.yaml")
|
|
166
|
+
conf = read_conf_file(uid_path) # file already exist due to version check
|
|
167
|
+
if "uid" not in conf.keys():
|
|
168
|
+
uid = str(uuid.uuid4())
|
|
169
|
+
err = write_conf_file(uid_path, {"uid": uid}, error=True)
|
|
170
|
+
if err:
|
|
171
|
+
return "NO_UID", err, True
|
|
172
|
+
else:
|
|
173
|
+
return uid, None, True
|
|
174
|
+
|
|
175
|
+
return conf.get("uid") or "NO_UID", None, False
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def check_stats_enabled() -> bool:
|
|
179
|
+
"""
|
|
180
|
+
Check if the user allows us to use telemetry. In order of precedence:
|
|
181
|
+
1. If FAL_STATS_ENABLED is defined, check its value
|
|
182
|
+
2. If DO_NOT_TRACK is defined, check its value
|
|
183
|
+
3. Otherwise use the value in stats_enabled in the config.yaml file
|
|
184
|
+
"""
|
|
185
|
+
if "FAL_STATS_ENABLED" in os.environ:
|
|
186
|
+
val = os.environ["FAL_STATS_ENABLED"].lower().strip()
|
|
187
|
+
return val != "0" and val != "false" and val != ""
|
|
188
|
+
|
|
189
|
+
if "DO_NOT_TRACK" in os.environ:
|
|
190
|
+
val = os.environ["DO_NOT_TRACK"].lower().strip()
|
|
191
|
+
return val != "1" and val != "true"
|
|
192
|
+
|
|
193
|
+
# Check if local config exists
|
|
194
|
+
config_path = Path(check_dir_exist(CONF_DIR), "config.yaml")
|
|
195
|
+
if not config_path.exists():
|
|
196
|
+
write_conf_file(config_path, {"stats_enabled": True})
|
|
197
|
+
return True
|
|
198
|
+
else: # read and return config
|
|
199
|
+
conf = read_conf_file(config_path)
|
|
200
|
+
return conf.get("stats_enabled", True)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def check_first_time_usage():
|
|
204
|
+
"""
|
|
205
|
+
The function checks for first time usage if the conf file exists and the
|
|
206
|
+
uid file doesn't exist.
|
|
207
|
+
"""
|
|
208
|
+
config_path = Path(check_dir_exist(CONF_DIR), "config.yaml")
|
|
209
|
+
uid_path = Path(check_dir_exist(CONF_DIR), "uid.yaml")
|
|
210
|
+
uid_conf = read_conf_file(uid_path)
|
|
211
|
+
return config_path.exists() and "uid" not in uid_conf.keys()
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def read_conf_file(conf_path):
|
|
215
|
+
try:
|
|
216
|
+
with conf_path.open("r") as file:
|
|
217
|
+
conf = yaml.safe_load(file)
|
|
218
|
+
return conf
|
|
219
|
+
except Exception as e:
|
|
220
|
+
warnings.warn(f"Can't read config file {e}")
|
|
221
|
+
return {}
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def write_conf_file(conf_path, to_write, error=None):
|
|
225
|
+
try: # Create for future runs
|
|
226
|
+
with conf_path.open("w") as file:
|
|
227
|
+
yaml.dump(to_write, file)
|
|
228
|
+
except Exception as e:
|
|
229
|
+
warnings.warn(f"Can't write to config file: {e}")
|
|
230
|
+
if error:
|
|
231
|
+
return e
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def log_api(
|
|
235
|
+
action: str,
|
|
236
|
+
total_runtime=None,
|
|
237
|
+
config=None,
|
|
238
|
+
additional_props: Optional[dict] = None,
|
|
239
|
+
):
|
|
240
|
+
"""
|
|
241
|
+
This function logs through an API call, assigns parameters if missing like
|
|
242
|
+
timestamp, event id and stats information.
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
if not check_stats_enabled():
|
|
246
|
+
return
|
|
247
|
+
|
|
248
|
+
if not is_online():
|
|
249
|
+
return
|
|
250
|
+
|
|
251
|
+
additional_props = additional_props or {}
|
|
252
|
+
|
|
253
|
+
event_id = uuid.uuid4()
|
|
254
|
+
|
|
255
|
+
client_time = datetime.datetime.now()
|
|
256
|
+
|
|
257
|
+
uid, uid_error, is_install = check_uid()
|
|
258
|
+
|
|
259
|
+
if "NO_UID" in uid:
|
|
260
|
+
additional_props["uid_issue"] = str(uid_error) if uid_error is not None else ""
|
|
261
|
+
|
|
262
|
+
config_hash = ""
|
|
263
|
+
if config is not None and hasattr(config, "hashed_name"):
|
|
264
|
+
config_hash = str(config.hashed_name())
|
|
265
|
+
|
|
266
|
+
opt_str_param(uid)
|
|
267
|
+
str_param(action)
|
|
268
|
+
|
|
269
|
+
props = {
|
|
270
|
+
"tool": "dbt-fal",
|
|
271
|
+
"config_hash": config_hash,
|
|
272
|
+
"event_id": str(event_id),
|
|
273
|
+
"invocation_id": str(invocation_id),
|
|
274
|
+
"user_id": uid,
|
|
275
|
+
"action": action,
|
|
276
|
+
"client_time": str(client_time),
|
|
277
|
+
"total_runtime": str(total_runtime),
|
|
278
|
+
"python_version": python_version(),
|
|
279
|
+
"dbt_version": dbt_installed_version(),
|
|
280
|
+
"dbt_adapter": get_dbt_adapter_type(config),
|
|
281
|
+
"docker_container": is_docker(),
|
|
282
|
+
"airflow": is_airflow(),
|
|
283
|
+
"github_action": is_github(),
|
|
284
|
+
"gitlab_ci": is_gitlab(),
|
|
285
|
+
"os": get_os(),
|
|
286
|
+
"telemetry_version": TELEMETRY_VERSION,
|
|
287
|
+
"$geoip_disable": True, # This disables GeoIp despite the backend setting
|
|
288
|
+
"$ip": None, # This disables IP tracking
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
all_props = {**props, **additional_props}
|
|
292
|
+
|
|
293
|
+
if "argv" in all_props:
|
|
294
|
+
all_props["argv"] = _clean_args_list(all_props["argv"])
|
|
295
|
+
|
|
296
|
+
if is_install:
|
|
297
|
+
posthog.capture(distinct_id=uid, event="install_success", properties=all_props)
|
|
298
|
+
|
|
299
|
+
posthog.capture(distinct_id=uid, event=action, properties=all_props)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def log_call(action, log_args: List[str] = [], config: bool = False):
|
|
303
|
+
"""Runs a function and logs it"""
|
|
304
|
+
|
|
305
|
+
def _log_call(func):
|
|
306
|
+
@wraps(func)
|
|
307
|
+
def wrapper(*func_args, **func_kwargs):
|
|
308
|
+
|
|
309
|
+
sig = inspect.signature(func).bind(*func_args, **func_kwargs)
|
|
310
|
+
sig.apply_defaults()
|
|
311
|
+
log_args_props = dict(
|
|
312
|
+
map(lambda arg: (arg, sig.arguments.get(arg)), log_args)
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
func_self = func_args[0]
|
|
316
|
+
# Get the dbt config from the self object of the method
|
|
317
|
+
dbt_config = func_self.config if config else None
|
|
318
|
+
|
|
319
|
+
log_api(
|
|
320
|
+
action=f"{action}_started",
|
|
321
|
+
additional_props={
|
|
322
|
+
"argv": sys.argv,
|
|
323
|
+
"args": log_args_props,
|
|
324
|
+
},
|
|
325
|
+
config=dbt_config,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
start = datetime.datetime.now()
|
|
329
|
+
|
|
330
|
+
try:
|
|
331
|
+
result = func(*func_args, **func_kwargs)
|
|
332
|
+
except Exception as e:
|
|
333
|
+
log_api(
|
|
334
|
+
action=f"{action}_error",
|
|
335
|
+
total_runtime=str(datetime.datetime.now() - start),
|
|
336
|
+
additional_props={
|
|
337
|
+
"exception": str(type(e)),
|
|
338
|
+
"argv": sys.argv,
|
|
339
|
+
"args": log_args_props,
|
|
340
|
+
},
|
|
341
|
+
config=dbt_config,
|
|
342
|
+
)
|
|
343
|
+
raise
|
|
344
|
+
else:
|
|
345
|
+
log_api(
|
|
346
|
+
action=f"{action}_success",
|
|
347
|
+
total_runtime=str(datetime.datetime.now() - start),
|
|
348
|
+
additional_props={
|
|
349
|
+
"argv": sys.argv,
|
|
350
|
+
"args": log_args_props,
|
|
351
|
+
},
|
|
352
|
+
config=dbt_config,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
return result
|
|
356
|
+
|
|
357
|
+
return wrapper
|
|
358
|
+
|
|
359
|
+
return _log_call
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _clean_args_list(args: List[str]) -> List[str]:
|
|
363
|
+
ALLOWLIST = [
|
|
364
|
+
"--disable-logging",
|
|
365
|
+
"--project-dir",
|
|
366
|
+
"--profiles-dir",
|
|
367
|
+
"--defer",
|
|
368
|
+
"--threads",
|
|
369
|
+
"--thread",
|
|
370
|
+
"--state",
|
|
371
|
+
"--full-refresh",
|
|
372
|
+
"-s",
|
|
373
|
+
"--select",
|
|
374
|
+
"-m",
|
|
375
|
+
"--models",
|
|
376
|
+
"--model",
|
|
377
|
+
"--exclude",
|
|
378
|
+
"--selector",
|
|
379
|
+
"--all",
|
|
380
|
+
"run",
|
|
381
|
+
"dbt",
|
|
382
|
+
"-v",
|
|
383
|
+
"--version",
|
|
384
|
+
"--debug",
|
|
385
|
+
"--vars",
|
|
386
|
+
"--var",
|
|
387
|
+
"--target",
|
|
388
|
+
"build",
|
|
389
|
+
"clean",
|
|
390
|
+
"compile",
|
|
391
|
+
"debug",
|
|
392
|
+
"deps",
|
|
393
|
+
"docs",
|
|
394
|
+
"init",
|
|
395
|
+
"list",
|
|
396
|
+
"parse",
|
|
397
|
+
"seed",
|
|
398
|
+
"snapshot",
|
|
399
|
+
"source",
|
|
400
|
+
"test",
|
|
401
|
+
"rpc",
|
|
402
|
+
"run-operation",
|
|
403
|
+
]
|
|
404
|
+
REDACTED = "[REDACTED]"
|
|
405
|
+
output = []
|
|
406
|
+
for item in args:
|
|
407
|
+
if item in ALLOWLIST:
|
|
408
|
+
output.append(item)
|
|
409
|
+
else:
|
|
410
|
+
output.append(REDACTED)
|
|
411
|
+
return output
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import zipfile
|
|
4
|
+
import io
|
|
5
|
+
import functools
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from functools import partial
|
|
8
|
+
from tempfile import NamedTemporaryFile
|
|
9
|
+
from typing import Any, Callable, Dict, NewType, Optional
|
|
10
|
+
|
|
11
|
+
from dbt.config.runtime import RuntimeConfig
|
|
12
|
+
from dbt.contracts.connection import AdapterResponse
|
|
13
|
+
from dbt.flags import get_flags, Namespace
|
|
14
|
+
|
|
15
|
+
from fal_serverless import FalServerlessHost, isolated
|
|
16
|
+
from dbt.adapters.fal_experimental.connections import TeleportTypeEnum
|
|
17
|
+
from dbt.adapters.fal_experimental.utils.environments import (
|
|
18
|
+
EnvironmentDefinition,
|
|
19
|
+
get_default_pip_dependencies,
|
|
20
|
+
)
|
|
21
|
+
from dbt.adapters.fal_experimental.utils import (
|
|
22
|
+
extra_path,
|
|
23
|
+
get_fal_scripts_path,
|
|
24
|
+
retrieve_symbol,
|
|
25
|
+
)
|
|
26
|
+
from dbt.fal.adapters.teleport.info import TeleportInfo
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
DataLocation = NewType("DataLocation", Dict[str, str])
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _prepare_for_teleport(
|
|
33
|
+
function: Callable, teleport: TeleportInfo, locations: DataLocation
|
|
34
|
+
) -> Callable:
|
|
35
|
+
@functools.wraps(function)
|
|
36
|
+
def wrapped(relation: str, *args, **kwargs) -> Any:
|
|
37
|
+
relation = relation.lower()
|
|
38
|
+
return function(teleport, locations, relation, *args, **kwargs)
|
|
39
|
+
|
|
40
|
+
return wrapped
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _teleport_df_from_external_storage(
|
|
44
|
+
teleport_info: TeleportInfo, locations: DataLocation, relation: str
|
|
45
|
+
) -> pd.DataFrame:
|
|
46
|
+
if relation not in locations:
|
|
47
|
+
raise RuntimeError(f"Could not find url for '{relation}' in {locations}")
|
|
48
|
+
|
|
49
|
+
if teleport_info.format == "parquet":
|
|
50
|
+
relation_path = locations[relation]
|
|
51
|
+
url = teleport_info.build_url(relation_path)
|
|
52
|
+
storage_options = _build_teleport_storage_options(teleport_info)
|
|
53
|
+
return pd.read_parquet(url, storage_options=storage_options)
|
|
54
|
+
else:
|
|
55
|
+
# TODO: support more
|
|
56
|
+
raise RuntimeError(f"Format {teleport_info.format} not supported")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _teleport_df_to_external_storage(
|
|
60
|
+
teleport_info: TeleportInfo,
|
|
61
|
+
locations: DataLocation,
|
|
62
|
+
relation: str,
|
|
63
|
+
data: pd.DataFrame,
|
|
64
|
+
):
|
|
65
|
+
if teleport_info.format == "parquet":
|
|
66
|
+
relation_path = teleport_info.build_relation_path(relation)
|
|
67
|
+
url = teleport_info.build_url(relation_path)
|
|
68
|
+
storage_options = _build_teleport_storage_options(teleport_info)
|
|
69
|
+
|
|
70
|
+
data.to_parquet(url, storage_options=storage_options)
|
|
71
|
+
locations[relation] = relation_path
|
|
72
|
+
return relation_path
|
|
73
|
+
else:
|
|
74
|
+
raise RuntimeError(f"Format {teleport_info.format} not supported")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _build_teleport_storage_options(teleport_info: TeleportInfo) -> Dict[str, str]:
|
|
78
|
+
storage_options = {}
|
|
79
|
+
if teleport_info.credentials.type == TeleportTypeEnum.REMOTE_S3:
|
|
80
|
+
storage_options = {
|
|
81
|
+
"key": teleport_info.credentials.s3_access_key_id,
|
|
82
|
+
"secret": teleport_info.credentials.s3_access_key,
|
|
83
|
+
}
|
|
84
|
+
elif teleport_info.credentials.type == TeleportTypeEnum.LOCAL:
|
|
85
|
+
pass
|
|
86
|
+
else:
|
|
87
|
+
raise RuntimeError(
|
|
88
|
+
f"Teleport storage type {teleport_info.credentials.type} not supported"
|
|
89
|
+
)
|
|
90
|
+
return storage_options
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def run_with_teleport(
|
|
94
|
+
code: str,
|
|
95
|
+
teleport_info: TeleportInfo,
|
|
96
|
+
locations: DataLocation,
|
|
97
|
+
config: RuntimeConfig,
|
|
98
|
+
local_packages: Optional[bytes] = None,
|
|
99
|
+
) -> str:
|
|
100
|
+
# main symbol is defined during dbt-fal's compilation
|
|
101
|
+
# and acts as an entrypoint for us to run the model.
|
|
102
|
+
fal_scripts_path = str(get_fal_scripts_path(config))
|
|
103
|
+
if local_packages is not None:
|
|
104
|
+
if fal_scripts_path.exists():
|
|
105
|
+
import shutil
|
|
106
|
+
|
|
107
|
+
shutil.rmtree(fal_scripts_path)
|
|
108
|
+
fal_scripts_path.parent.mkdir(parents=True, exist_ok=True)
|
|
109
|
+
zip_file = zipfile.ZipFile(io.BytesIO(local_packages))
|
|
110
|
+
zip_file.extractall(fal_scripts_path)
|
|
111
|
+
|
|
112
|
+
with extra_path(fal_scripts_path):
|
|
113
|
+
main = retrieve_symbol(code, "main")
|
|
114
|
+
return main(
|
|
115
|
+
read_df=_prepare_for_teleport(
|
|
116
|
+
_teleport_df_from_external_storage, teleport_info, locations
|
|
117
|
+
),
|
|
118
|
+
write_df=_prepare_for_teleport(
|
|
119
|
+
_teleport_df_to_external_storage, teleport_info, locations
|
|
120
|
+
),
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def run_in_environment_with_teleport(
|
|
125
|
+
environment: EnvironmentDefinition,
|
|
126
|
+
code: str,
|
|
127
|
+
teleport_info: TeleportInfo,
|
|
128
|
+
locations: DataLocation,
|
|
129
|
+
config: RuntimeConfig,
|
|
130
|
+
adapter_type: str,
|
|
131
|
+
) -> AdapterResponse:
|
|
132
|
+
"""Run the 'main' function inside the given code on the
|
|
133
|
+
specified environment.
|
|
134
|
+
|
|
135
|
+
The environment_name must be defined inside fal_project.yml file
|
|
136
|
+
in your project's root directory."""
|
|
137
|
+
compressed_local_packages = None
|
|
138
|
+
is_remote = type(environment.host) is FalServerlessHost
|
|
139
|
+
|
|
140
|
+
deps = get_default_pip_dependencies(
|
|
141
|
+
is_remote=is_remote,
|
|
142
|
+
adapter_type=adapter_type,
|
|
143
|
+
is_teleport=True,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
fal_scripts_path = get_fal_scripts_path(config)
|
|
147
|
+
|
|
148
|
+
if is_remote and fal_scripts_path.exists():
|
|
149
|
+
with NamedTemporaryFile() as temp_file:
|
|
150
|
+
with zipfile.ZipFile(temp_file.name, "w", zipfile.ZIP_DEFLATED) as zip_file:
|
|
151
|
+
for entry in fal_scripts_path.rglob("*"):
|
|
152
|
+
zip_file.write(entry, entry.relative_to(fal_scripts_path))
|
|
153
|
+
|
|
154
|
+
compressed_local_packages = temp_file.read()
|
|
155
|
+
|
|
156
|
+
execute_model = partial(
|
|
157
|
+
run_with_teleport,
|
|
158
|
+
code=code,
|
|
159
|
+
teleport_info=teleport_info,
|
|
160
|
+
locations=locations,
|
|
161
|
+
config=config,
|
|
162
|
+
local_packages=compressed_local_packages,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if environment.kind == "virtualenv":
|
|
166
|
+
requirements = environment.config.get("requirements", [])
|
|
167
|
+
requirements += deps
|
|
168
|
+
isolated_function = isolated(
|
|
169
|
+
kind="virtualenv", host=environment.host, requirements=requirements
|
|
170
|
+
)(execute_model)
|
|
171
|
+
elif environment.kind == "conda":
|
|
172
|
+
dependencies = environment.config.pop("packages", [])
|
|
173
|
+
dependencies.append({"pip": deps})
|
|
174
|
+
env_dict = {
|
|
175
|
+
"name": "dbt_fal_env",
|
|
176
|
+
"channels": ["conda-forge", "defaults"],
|
|
177
|
+
"dependencies": dependencies,
|
|
178
|
+
}
|
|
179
|
+
isolated_function = isolated(
|
|
180
|
+
kind="conda", host=environment.host, env_dict=env_dict
|
|
181
|
+
)(execute_model)
|
|
182
|
+
else:
|
|
183
|
+
# We should not reach this point, because environment types are validated when the
|
|
184
|
+
# environment objects are created (in utils/environments.py).
|
|
185
|
+
raise Exception(f"Environment type not supported: {environment.kind}")
|
|
186
|
+
|
|
187
|
+
# Machine type is only applicable in FalServerlessHost
|
|
188
|
+
if is_remote:
|
|
189
|
+
isolated_function = isolated_function.on(machine_type=environment.machine_type)
|
|
190
|
+
|
|
191
|
+
result = isolated_function()
|
|
192
|
+
return result
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from dbt.adapters.base.impl import BaseAdapter
|
|
2
|
+
from dbt.adapters.fal_experimental.connections import TeleportCredentials, TeleportTypeEnum
|
|
3
|
+
|
|
4
|
+
from dbt.fal.adapters.teleport.impl import TeleportAdapter
|
|
5
|
+
|
|
6
|
+
def wrap_db_adapter(db_adapter: BaseAdapter, teleport_credentials: TeleportCredentials) -> TeleportAdapter:
|
|
7
|
+
|
|
8
|
+
if TeleportAdapter.is_teleport_adapter(db_adapter):
|
|
9
|
+
return db_adapter
|
|
10
|
+
|
|
11
|
+
# Wrap the adapter with a custom implementation
|
|
12
|
+
if db_adapter.type() == 'duckdb':
|
|
13
|
+
import dbt.adapters.fal_experimental.teleport_support.duckdb as support_duckdb
|
|
14
|
+
return support_duckdb.DuckDBAdapterTeleport(db_adapter, teleport_credentials)
|
|
15
|
+
|
|
16
|
+
if db_adapter.type() == 'snowflake':
|
|
17
|
+
import dbt.adapters.fal_experimental.teleport_support.snowflake as support_snowflake
|
|
18
|
+
if teleport_credentials.type == TeleportTypeEnum.REMOTE_S3:
|
|
19
|
+
return support_snowflake.SnowflakeAdapterTeleport(db_adapter, teleport_credentials)
|
|
20
|
+
else:
|
|
21
|
+
raise RuntimeError("Snowflake teleporting works only with S3.")
|
|
22
|
+
|
|
23
|
+
raise NotImplementedError(f"Teleport support has not been implemented for adapter {db_adapter.type()}")
|