dbt-python-imports 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ source_up_secure
2
+ layout_venv
@@ -0,0 +1,57 @@
1
+
2
+ name: Run tests
3
+
4
+ on:
5
+ push:
6
+ branches: [ main ]
7
+ pull_request:
8
+
9
+ jobs:
10
+
11
+ get-hatch-test-envs:
12
+ name: Get hatch test environments
13
+ runs-on: ubuntu-latest
14
+
15
+ steps:
16
+ - name: Checkout
17
+ uses: actions/checkout@v4
18
+ - name: Install the latest version of uv
19
+ uses: astral-sh/setup-uv@v7
20
+ with:
21
+ enable-cache: true
22
+ - name: List hatch envs
23
+ id: get-test-envs
24
+ run: |-
25
+ uv run hatch env show --json | jq -r '
26
+ with_entries(select(.key | startswith("hatch-test.")))
27
+ | "envs=\(@json)"
28
+ ' >> "$GITHUB_OUTPUT"
29
+
30
+ outputs:
31
+ envs: ${{ steps.get-test-envs.outputs.envs }}
32
+
33
+ test:
34
+ name: Run Hatch tests
35
+ needs: get-hatch-test-envs
36
+ runs-on: ubuntu-latest
37
+ strategy:
38
+ fail-fast: false
39
+ matrix:
40
+ env-name: ${{ fromJSON(needs.get-hatch-test-envs.outputs.envs).*.env-vars.ENV_NAME }}
41
+ steps:
42
+
43
+ - name: Checkout
44
+ uses: actions/checkout@v4
45
+ - name: Set up Python
46
+ uses: actions/setup-python@v5
47
+ with:
48
+ python-version: ${{ fromJSON(needs.get-hatch-test-envs.outputs.envs)[matrix.env-name].python }}
49
+
50
+ - name: Install the latest version of uv
51
+ uses: astral-sh/setup-uv@v7
52
+ with:
53
+ enable-cache: true
54
+
55
+ - name: Run tests for ${{ matrix.env-name }}
56
+ run: uv run hatch --env=${{ matrix.env-name }} test
57
+
@@ -0,0 +1,3 @@
1
+ *_cache
2
+ *.pyc
3
+ .vscode
@@ -0,0 +1,52 @@
1
+ Metadata-Version: 2.4
2
+ Name: dbt-python-imports
3
+ Version: 0.1.0
4
+ Summary: dbt plugin to add `modules.import` method to Jinja context to import arbitrary Python modules
5
+ Author-email: Tim Vergenz <vergenzt@gmail.com>
6
+ Requires-Python: >=3.8
7
+ Requires-Dist: dbt-core>=1.6
8
+ Description-Content-Type: text/markdown
9
+
10
+ # dbt-python-imports
11
+
12
+ This is a [dbt plugin][dbt-plugin] to allow importing arbitrary Python modules from Jinja templates.
13
+
14
+ [dbt-plugin]: https://github.com/dbt-labs/dbt-core/blob/fa96acb15f79ae4f10b1d78f311f5ef2f4ed645e/core/dbt/plugins/manager.py
15
+
16
+ ## Usage
17
+
18
+ 1. Install `dbt-python-imports` via PyPI into the same Python environment as dbt itself.
19
+ 2. Call `modules.import(module_name)` to import the given module from a dbt Jinja expression.
20
+
21
+ > [!warning]
22
+ > **This allows arbitrary code execution.** (That's kindof the point.)
23
+ >
24
+ > Jinja's sandboxing is not foolproof (e.g. some adapters allow arbitrary reads/writes to disk), so you already shouldn't incorporate any untrusted dbt code/packages already. However the limited Jinja context available by default does currently make it *harder* to run arbitrary code.
25
+ >
26
+ > Make sure you're not installing any packages (dbt or Python) whose source you haven't inspected.
27
+
28
+ ## Example
29
+
30
+ ```console
31
+ # download the artifacts from your latest Databricks dbt job run
32
+
33
+ $ echo '
34
+ {%- macro fetch_dbt_artifacts(job_name='dbt build', extract_to=flags.TARGET_PATH ~ '/remote-state') %}
35
+ {#- https://databricks-sdk-py.readthedocs.io/en/latest/workspace/jobs/jobs.html #}
36
+ {%- set jobs_api = adapter.config.credentials.authenticate().api_client.jobs %}
37
+ {%- set job = jobs_api.list(name=job_name, limit=1) | first %}
38
+ {%- set job_run = jobs_api.list_runs(job_id=job["job_id"], limit=1, completed_only=true, expand_tasks=true) | first %}
39
+ {%- set task_run = job_run["tasks"] | first %}
40
+ {%- set dbt_output = jobs_api.get_run_output(run_id=task_run["run_id"]).dbt_output %}
41
+ {%- set download_url = dbt_output.artifacts_link %}
42
+
43
+ {%- set system = modules.import("dbt_common.clients.system") %}
44
+ {%- set tar_path = extract_to ~ '/dbt-artifacts.tar.gz' %}
45
+ {%- do system.download(download_url, tar_path) %}
46
+ {%- do system.untar_package(tar_path, extract_to) %}
47
+ {%- endmacro %}
48
+ ' > macros/fetch_dbt_artifacts.sql
49
+
50
+ $ dbt run-operation fetch_dbt_artifacts
51
+
52
+ ```
@@ -0,0 +1,43 @@
1
+ # dbt-python-imports
2
+
3
+ This is a [dbt plugin][dbt-plugin] to allow importing arbitrary Python modules from Jinja templates.
4
+
5
+ [dbt-plugin]: https://github.com/dbt-labs/dbt-core/blob/fa96acb15f79ae4f10b1d78f311f5ef2f4ed645e/core/dbt/plugins/manager.py
6
+
7
+ ## Usage
8
+
9
+ 1. Install `dbt-python-imports` via PyPI into the same Python environment as dbt itself.
10
+ 2. Call `modules.import(module_name)` to import the given module from a dbt Jinja expression.
11
+
12
+ > [!warning]
13
+ > **This allows arbitrary code execution.** (That's kindof the point.)
14
+ >
15
+ > Jinja's sandboxing is not foolproof (e.g. some adapters allow arbitrary reads/writes to disk), so you already shouldn't incorporate any untrusted dbt code/packages already. However the limited Jinja context available by default does currently make it *harder* to run arbitrary code.
16
+ >
17
+ > Make sure you're not installing any packages (dbt or Python) whose source you haven't inspected.
18
+
19
+ ## Example
20
+
21
+ ```console
22
+ # download the artifacts from your latest Databricks dbt job run
23
+
24
+ $ echo '
25
+ {%- macro fetch_dbt_artifacts(job_name='dbt build', extract_to=flags.TARGET_PATH ~ '/remote-state') %}
26
+ {#- https://databricks-sdk-py.readthedocs.io/en/latest/workspace/jobs/jobs.html #}
27
+ {%- set jobs_api = adapter.config.credentials.authenticate().api_client.jobs %}
28
+ {%- set job = jobs_api.list(name=job_name, limit=1) | first %}
29
+ {%- set job_run = jobs_api.list_runs(job_id=job["job_id"], limit=1, completed_only=true, expand_tasks=true) | first %}
30
+ {%- set task_run = job_run["tasks"] | first %}
31
+ {%- set dbt_output = jobs_api.get_run_output(run_id=task_run["run_id"]).dbt_output %}
32
+ {%- set download_url = dbt_output.artifacts_link %}
33
+
34
+ {%- set system = modules.import("dbt_common.clients.system") %}
35
+ {%- set tar_path = extract_to ~ '/dbt-artifacts.tar.gz' %}
36
+ {%- do system.download(download_url, tar_path) %}
37
+ {%- do system.untar_package(tar_path, extract_to) %}
38
+ {%- endmacro %}
39
+ ' > macros/fetch_dbt_artifacts.sql
40
+
41
+ $ dbt run-operation fetch_dbt_artifacts
42
+
43
+ ```
@@ -0,0 +1,34 @@
1
+ [dirs.env]
2
+ virtual = ".venvs"
3
+
4
+ [envs.hatch-test]
5
+ installer = "uv"
6
+ dependency-groups = ["dev"]
7
+ dependencies = [
8
+ "dbt-core~={matrix:dbt-core-version}",
9
+ ]
10
+
11
+ [envs.hatch-test.env-vars]
12
+ ENV_NAME = "{env_name}"
13
+ DBT_SEND_ANONYMOUS_USAGE_STATS = "False" # https://docs.getdbt.com/reference/global-configs/usage-stats
14
+
15
+ [[envs.hatch-test.matrix]]
16
+ python = ["3.8"]
17
+ dbt-core-version = [
18
+ "1.6",
19
+ "1.7",
20
+ "1.8",
21
+ ]
22
+
23
+ [[envs.hatch-test.matrix]]
24
+ python = ["3.9"]
25
+ dbt-core-version = [
26
+ "1.9",
27
+ "1.10",
28
+ ]
29
+
30
+ [[envs.hatch-test.matrix]]
31
+ python = ["3.10"]
32
+ dbt-core-version = [
33
+ "1.11",
34
+ ]
@@ -0,0 +1,24 @@
1
+ [project]
2
+ name = "dbt-python-imports"
3
+ version = "0.1.0"
4
+ description = "dbt plugin to add `modules.import` method to Jinja context to import arbitrary Python modules"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Tim Vergenz", email = "vergenzt@gmail.com" }
8
+ ]
9
+ requires-python = ">=3.8"
10
+ dependencies = [
11
+ "dbt-core>=1.6",
12
+ ]
13
+
14
+ [dependency-groups]
15
+ dev = [
16
+ "hatch>=1.14.2",
17
+ "pytest>=8.3.5",
18
+ "dbt-postgres",
19
+ "tinypg>=0.3.0",
20
+ ]
21
+
22
+ [build-system]
23
+ requires = ["hatchling"]
24
+ build-backend = "hatchling.build"
@@ -0,0 +1,30 @@
1
+ from functools import wraps
2
+ from importlib import import_module
3
+
4
+ import dbt.context.base
5
+ from dbt.plugins.manager import dbtPlugin
6
+
7
+
8
+ class DbtPythonImportsPlugin(dbtPlugin):
9
+ """dbt plugin to add `modules.import` method to Jinja context to import arbitrary Python modules"""
10
+
11
+ def initialize(self) -> None:
12
+ self._get_context_modules_orig = dbt.context.base.get_context_modules
13
+
14
+ @wraps(self._get_context_modules_orig)
15
+ def _wrapper():
16
+ return {
17
+ **self._get_context_modules_orig(),
18
+ "import": self.import_module,
19
+ }
20
+
21
+ dbt.context.base.get_context_modules = _wrapper
22
+
23
+ @staticmethod
24
+ def import_module(module: str):
25
+ if module.startswith("."):
26
+ raise ValueError("Relative imports not supported by `modules.import`!")
27
+ return import_module(module)
28
+
29
+
30
+ plugins = [DbtPythonImportsPlugin]
@@ -0,0 +1,10 @@
1
+ test_profile:
2
+ outputs:
3
+ default:
4
+ type: postgres
5
+ host: '{{ env_var("DB_HOST") }}'
6
+ user: '{{ env_var("DB_USERNAME") }}'
7
+ password: '{{ env_var("DB_PASSWORD") }}'
8
+ port: '{{ env_var("DB_PORT") | as_native }}'
9
+ dbname: '{{ env_var("DB_NAME") }}'
10
+ schema: '{{ env_var("DB_SCHEMA") }}'
@@ -0,0 +1,206 @@
1
+ import json
2
+ import os
3
+ import re
4
+ from pathlib import Path
5
+ from textwrap import dedent
6
+ from urllib.parse import urlparse
7
+
8
+ import pytest
9
+ import tinypg
10
+ from pytest import fixture
11
+
12
+ DBT_PROFILES_DIR = Path(__file__).parent
13
+ DBT_PROFILE = "test_profile"
14
+ DBT_PROJECT_NAME = "test_" + re.sub(
15
+ r"[^\w]", "_", os.getenv("ENV_NAME", DBT_PROFILES_DIR.parent.name).lower()
16
+ )
17
+ DBT_PROJECT_YML: str = json.dumps(
18
+ {
19
+ "name": DBT_PROJECT_NAME,
20
+ "flags": {
21
+ "send_anonymous_usage_stats": False,
22
+ },
23
+ "version": "0.0.1",
24
+ "profile": DBT_PROFILE,
25
+ "model-paths": ["models"],
26
+ "target-path": "target",
27
+ }
28
+ )
29
+
30
+
31
+ @fixture
32
+ def temp_postgres_db_url():
33
+ with tinypg.database() as dburl_str:
34
+ yield dburl_str
35
+
36
+
37
+ @fixture
38
+ def dbt_project(tmp_path, temp_postgres_db_url, monkeypatch):
39
+ dbt_project_path = tmp_path / DBT_PROJECT_NAME
40
+ dbt_project_path.mkdir()
41
+ dbt_project_path.joinpath("dbt_project.yml").write_text(DBT_PROJECT_YML)
42
+ dbt_project_path.joinpath("models").mkdir()
43
+ dbt_project_path.joinpath("target").mkdir()
44
+
45
+ monkeypatch.setenv("DBT_PROFILES_DIR", str(DBT_PROFILES_DIR))
46
+ monkeypatch.setenv("DBT_PROJECT_DIR", str(dbt_project_path))
47
+
48
+ dburl = urlparse(temp_postgres_db_url)
49
+ monkeypatch.setenv("DB_HOST", dburl.hostname or "")
50
+ monkeypatch.setenv("DB_USERNAME", dburl.username or "")
51
+ monkeypatch.setenv("DB_PASSWORD", dburl.password or "")
52
+ monkeypatch.setenv("DB_PORT", str(dburl.port))
53
+ monkeypatch.setenv("DB_NAME", dburl.path[1:] or "")
54
+ monkeypatch.setenv("DB_SCHEMA", "public")
55
+ monkeypatch.chdir(dbt_project_path)
56
+
57
+ return dbt_project_path
58
+
59
+
60
+ def test_model_context(dbt_project: Path):
61
+ model_path = dbt_project / "models" / "my_model.sql"
62
+ model_path.write_text("""
63
+ {%- set os_path = modules.import("os.path") %}
64
+ select '{{ os_path.dirname("foo/bar/baz") }}'
65
+ """)
66
+
67
+ from dbt.cli.main import cli
68
+
69
+ cli(["compile"], standalone_mode=False)
70
+
71
+ model_compiled_path = (
72
+ dbt_project / "target/compiled" / model_path.relative_to(dbt_project.parent)
73
+ )
74
+ assert model_compiled_path.read_text().strip() == "select 'foo/bar'"
75
+
76
+
77
+ def test_properties_context(dbt_project: Path):
78
+ model_path = dbt_project / "models" / "my_model.sql"
79
+ model_path.write_text("select 1 as my_column")
80
+
81
+ yml_path = dbt_project / "models" / "my_model.yml"
82
+ yml_path.write_text(
83
+ dedent(
84
+ """
85
+ version: 2
86
+ models:
87
+ - name: my_model
88
+ description: |-
89
+ {{ modules.import("calendar").TextCalendar().formatyear(2026) }}
90
+ """
91
+ )
92
+ )
93
+
94
+ from dbt.cli.main import cli
95
+
96
+ cli(["compile"], standalone_mode=False)
97
+
98
+ manifest_path = dbt_project / "target" / "manifest.json"
99
+ manifest = json.loads(manifest_path.read_text())
100
+ assert manifest["nodes"][f"model.{DBT_PROJECT_NAME}.my_model"]["description"].strip() == """
101
+ 2026
102
+
103
+ January February March
104
+ Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su
105
+ 1 2 3 4 1 1
106
+ 5 6 7 8 9 10 11 2 3 4 5 6 7 8 2 3 4 5 6 7 8
107
+ 12 13 14 15 16 17 18 9 10 11 12 13 14 15 9 10 11 12 13 14 15
108
+ 19 20 21 22 23 24 25 16 17 18 19 20 21 22 16 17 18 19 20 21 22
109
+ 26 27 28 29 30 31 23 24 25 26 27 28 23 24 25 26 27 28 29
110
+ 30 31
111
+
112
+ April May June
113
+ Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su
114
+ 1 2 3 4 5 1 2 3 1 2 3 4 5 6 7
115
+ 6 7 8 9 10 11 12 4 5 6 7 8 9 10 8 9 10 11 12 13 14
116
+ 13 14 15 16 17 18 19 11 12 13 14 15 16 17 15 16 17 18 19 20 21
117
+ 20 21 22 23 24 25 26 18 19 20 21 22 23 24 22 23 24 25 26 27 28
118
+ 27 28 29 30 25 26 27 28 29 30 31 29 30
119
+
120
+ July August September
121
+ Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su
122
+ 1 2 3 4 5 1 2 1 2 3 4 5 6
123
+ 6 7 8 9 10 11 12 3 4 5 6 7 8 9 7 8 9 10 11 12 13
124
+ 13 14 15 16 17 18 19 10 11 12 13 14 15 16 14 15 16 17 18 19 20
125
+ 20 21 22 23 24 25 26 17 18 19 20 21 22 23 21 22 23 24 25 26 27
126
+ 27 28 29 30 31 24 25 26 27 28 29 30 28 29 30
127
+ 31
128
+
129
+ October November December
130
+ Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su
131
+ 1 2 3 4 1 1 2 3 4 5 6
132
+ 5 6 7 8 9 10 11 2 3 4 5 6 7 8 7 8 9 10 11 12 13
133
+ 12 13 14 15 16 17 18 9 10 11 12 13 14 15 14 15 16 17 18 19 20
134
+ 19 20 21 22 23 24 25 16 17 18 19 20 21 22 21 22 23 24 25 26 27
135
+ 26 27 28 29 30 31 23 24 25 26 27 28 29 28 29 30 31
136
+ 30
137
+ """.strip()
138
+
139
+
140
+
141
+ def test_project_yml_context(dbt_project: Path, monkeypatch):
142
+ monkeypatch.setenv("EXAMPLE_URL", "https://example.org:8080/foo/bar?baz")
143
+
144
+ yml_path = dbt_project / "dbt_project.yml"
145
+ yml_path.write_text(
146
+ json.dumps(
147
+ {
148
+ **json.loads(yml_path.read_text()),
149
+ "vars": {
150
+ DBT_PROJECT_NAME: {
151
+ "example_domain": "{{- modules.import('urllib.parse').urlparse(env_var('EXAMPLE_URL')).hostname }}"
152
+ },
153
+ },
154
+ }
155
+ )
156
+ )
157
+
158
+ model_path = dbt_project / "models" / "my_model.sql"
159
+ model_path.write_text("select '{{ var('example_domain') }}'")
160
+
161
+ from dbt.cli.main import cli
162
+
163
+ cli(["compile"], standalone_mode=False)
164
+
165
+ model_compiled_path = (
166
+ dbt_project / "target/compiled" / model_path.relative_to(dbt_project.parent)
167
+ )
168
+ assert model_compiled_path.read_text().strip() == "select 'example.org'"
169
+
170
+
171
+ @pytest.mark.xfail(reason="dbt doesn't call set_up_plugin_manager until after profiles.yml is rendered")
172
+ def test_profiles_yml_context(dbt_project: Path, temp_postgres_db_url: str, monkeypatch, capsys):
173
+ monkeypatch.undo()
174
+
175
+ monkeypatch.setenv("DBT_PROJECT_DIR", str(dbt_project))
176
+ monkeypatch.setenv("DBT_PROFILES_DIR", str(dbt_project))
177
+ monkeypatch.setenv("DB_URI", temp_postgres_db_url + "?schema=my_schema")
178
+
179
+ yml_path = dbt_project / "profiles.yml"
180
+ yml_path.write_text(
181
+ dedent(
182
+ """
183
+ test_profile:
184
+ outputs:
185
+ default:
186
+ type: postgres
187
+ host: '{%- set parse = modules.import("urllib.parse") %}{{ parse.urlparse(env_var("DB_URI")).hostname }}'
188
+ user: '{%- set parse = modules.import("urllib.parse") %}{{ parse.urlparse(env_var("DB_URI")).username }}'
189
+ password: '{%- set parse = modules.import("urllib.parse") %}{{ parse.urlparse(env_var("DB_URI")).password }}'
190
+ port: '{%- set parse = modules.import("urllib.parse") %}{{ parse.urlparse(env_var("DB_URI")).port }}'
191
+ dbname: '{%- set parse = modules.import("urllib.parse") %}{{ parse.urlparse(env_var("DB_URI")).path.split("/")[1] }}'
192
+ schema: '{%- set parse = modules.import("urllib.parse") %}{{ parse.parse_qs(parse.urlparse(env_var("DB_URI")).query["schema"] }}'
193
+ """
194
+ )
195
+ )
196
+
197
+ model_path = dbt_project / "models" / "my_model.sql"
198
+ model_path.write_text("select 1 as my_column")
199
+
200
+ from dbt.cli.main import cli
201
+
202
+ cli(["run"], standalone_mode=False)
203
+ capsys.readouterr() # reset
204
+
205
+ cli(["show", "-q", "--output=json", "--inline", "select * from my_schema.my_model"])
206
+ assert json.loads(capsys.readouterr().out) == {"show": [{"my_column": 1}]}