netbek-dw-lib 1.32.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. netbek_dw_lib-1.32.0/PKG-INFO +141 -0
  2. netbek_dw_lib-1.32.0/README.md +71 -0
  3. netbek_dw_lib-1.32.0/pyproject.toml +148 -0
  4. netbek_dw_lib-1.32.0/src/dw_lib/__init__.py +0 -0
  5. netbek_dw_lib-1.32.0/src/dw_lib/cloud/__init__.py +9 -0
  6. netbek_dw_lib-1.32.0/src/dw_lib/cloud/adapters/__init__.py +0 -0
  7. netbek_dw_lib-1.32.0/src/dw_lib/cloud/adapters/s3.py +49 -0
  8. netbek_dw_lib-1.32.0/src/dw_lib/cloud/types.py +12 -0
  9. netbek_dw_lib-1.32.0/src/dw_lib/cloud/utils.py +17 -0
  10. netbek_dw_lib-1.32.0/src/dw_lib/constants.py +37 -0
  11. netbek_dw_lib-1.32.0/src/dw_lib/dagster/__init__.py +0 -0
  12. netbek_dw_lib-1.32.0/src/dw_lib/dagster/asset_utils.py +118 -0
  13. netbek_dw_lib-1.32.0/src/dw_lib/database/__init__.py +40 -0
  14. netbek_dw_lib-1.32.0/src/dw_lib/database/adapters/__init__.py +0 -0
  15. netbek_dw_lib-1.32.0/src/dw_lib/database/adapters/base.py +402 -0
  16. netbek_dw_lib-1.32.0/src/dw_lib/database/adapters/clickhouse.py +417 -0
  17. netbek_dw_lib-1.32.0/src/dw_lib/database/adapters/duckdb.py +215 -0
  18. netbek_dw_lib-1.32.0/src/dw_lib/database/adapters/postgres.py +642 -0
  19. netbek_dw_lib-1.32.0/src/dw_lib/database/types.py +290 -0
  20. netbek_dw_lib-1.32.0/src/dw_lib/database/utils.py +33 -0
  21. netbek_dw_lib-1.32.0/src/dw_lib/dbt/__init__.py +1172 -0
  22. netbek_dw_lib-1.32.0/src/dw_lib/dbt/polyfactory/__init__.py +0 -0
  23. netbek_dw_lib-1.32.0/src/dw_lib/dbt/polyfactory/factories/__init__.py +0 -0
  24. netbek_dw_lib-1.32.0/src/dw_lib/dbt/polyfactory/factories/sqlmodel_factory.py +39 -0
  25. netbek_dw_lib-1.32.0/src/dw_lib/dbt/polyfactory/mixins.py +35 -0
  26. netbek_dw_lib-1.32.0/src/dw_lib/dbt/types.py +167 -0
  27. netbek_dw_lib-1.32.0/src/dw_lib/dbt/utils.py +238 -0
  28. netbek_dw_lib-1.32.0/src/dw_lib/exceptions.py +66 -0
  29. netbek_dw_lib-1.32.0/src/dw_lib/loader.py +302 -0
  30. netbek_dw_lib-1.32.0/src/dw_lib/peerdb.py +1358 -0
  31. netbek_dw_lib-1.32.0/src/dw_lib/sqlalchemy/__init__.py +0 -0
  32. netbek_dw_lib-1.32.0/src/dw_lib/sqlalchemy/clickhouse/__init__.py +0 -0
  33. netbek_dw_lib-1.32.0/src/dw_lib/sqlalchemy/clickhouse/types.py +94 -0
  34. netbek_dw_lib-1.32.0/src/dw_lib/types.py +64 -0
  35. netbek_dw_lib-1.32.0/src/dw_lib/utils/__init__.py +0 -0
  36. netbek_dw_lib-1.32.0/src/dw_lib/utils/environ.py +12 -0
  37. netbek_dw_lib-1.32.0/src/dw_lib/utils/filesystem.py +60 -0
  38. netbek_dw_lib-1.32.0/src/dw_lib/utils/profiling.py +52 -0
  39. netbek_dw_lib-1.32.0/src/dw_lib/utils/python_utils.py +20 -0
  40. netbek_dw_lib-1.32.0/src/dw_lib/utils/sqlmodel_utils.py +260 -0
  41. netbek_dw_lib-1.32.0/src/dw_lib/utils/template.py +22 -0
  42. netbek_dw_lib-1.32.0/src/dw_lib/utils/typer_utils.py +13 -0
@@ -0,0 +1,141 @@
1
+ Metadata-Version: 2.3
2
+ Name: netbek-dw-lib
3
+ Version: 1.32.0
4
+ Summary: Tools for working with Postgres, ClickHouse, and DuckDB.
5
+ Requires-Dist: jinja2>=3.1.6
6
+ Requires-Dist: lazy-loader>=0.5
7
+ Requires-Dist: packaging>=26.0
8
+ Requires-Dist: psutil>=7.1.3
9
+ Requires-Dist: pydantic>=2.12.4
10
+ Requires-Dist: pydash>=8.0.5
11
+ Requires-Dist: requests>=2.32.5
12
+ Requires-Dist: ruamel-yaml>=0.19.1
13
+ Requires-Dist: sqlalchemy>=2.0.44
14
+ Requires-Dist: sqlglot>=28.0.0,<28.1.0
15
+ Requires-Dist: sqlmodel>=0.0.27
16
+ Requires-Dist: sqlparse>=0.5.3
17
+ Requires-Dist: clickhouse-connect>=0.10.0 ; extra == 'clickhouse'
18
+ Requires-Dist: clickhouse-sqlalchemy ; extra == 'clickhouse'
19
+ Requires-Dist: sqlglot>=28.0.0,<28.1.0 ; extra == 'clickhouse'
20
+ Requires-Dist: sqlmodel>=0.0.27 ; extra == 'clickhouse'
21
+ Requires-Dist: dagster>=1.12.19 ; extra == 'dagster'
22
+ Requires-Dist: dagster-dbt>=0.28.19 ; extra == 'dagster'
23
+ Requires-Dist: clickhouse-connect>=0.10.0 ; extra == 'dbt'
24
+ Requires-Dist: clickhouse-sqlalchemy ; extra == 'dbt'
25
+ Requires-Dist: dbt-clickhouse ; extra == 'dbt'
26
+ Requires-Dist: dbt-core>=1.10.0,<1.11 ; extra == 'dbt'
27
+ Requires-Dist: livereload>=2.7.1 ; extra == 'dbt'
28
+ Requires-Dist: opentelemetry-sdk>=1.39.1 ; extra == 'dbt'
29
+ Requires-Dist: polyfactory>=3.0.0 ; extra == 'dbt'
30
+ Requires-Dist: pydantic>=2.12.4 ; extra == 'dbt'
31
+ Requires-Dist: pydash>=8.0.5 ; extra == 'dbt'
32
+ Requires-Dist: ruamel-yaml>=0.19.1 ; extra == 'dbt'
33
+ Requires-Dist: sqlglot>=28.0.0,<28.1.0 ; extra == 'dbt'
34
+ Requires-Dist: sqlmodel>=0.0.27 ; extra == 'dbt'
35
+ Requires-Dist: duckdb>=1.4.2 ; extra == 'duckdb'
36
+ Requires-Dist: duckdb-engine>=0.17.0 ; extra == 'duckdb'
37
+ Requires-Dist: sqlglot>=28.0.0,<28.1.0 ; extra == 'duckdb'
38
+ Requires-Dist: sqlmodel>=0.0.27 ; extra == 'duckdb'
39
+ Requires-Dist: boto3>=1.41.1 ; extra == 'loader'
40
+ Requires-Dist: chdb>=3.7.2 ; extra == 'loader'
41
+ Requires-Dist: jinja2>=3.1.6 ; extra == 'loader'
42
+ Requires-Dist: pydantic>=2.12.4 ; extra == 'loader'
43
+ Requires-Dist: rich>=14.2.0 ; extra == 'loader'
44
+ Requires-Dist: clickhouse-connect>=0.10.0 ; extra == 'peerdb'
45
+ Requires-Dist: clickhouse-sqlalchemy ; extra == 'peerdb'
46
+ Requires-Dist: pydantic>=2.12.4 ; extra == 'peerdb'
47
+ Requires-Dist: pydash>=8.0.5 ; extra == 'peerdb'
48
+ Requires-Dist: requests>=2.32.5 ; extra == 'peerdb'
49
+ Requires-Dist: rich>=14.2.0 ; extra == 'peerdb'
50
+ Requires-Dist: ruamel-yaml>=0.19.1 ; extra == 'peerdb'
51
+ Requires-Dist: sqlalchemy>=2.0.44 ; extra == 'peerdb'
52
+ Requires-Dist: sqlglot>=28.0.0 ; extra == 'peerdb'
53
+ Requires-Dist: sqlmodel>=0.0.27 ; extra == 'peerdb'
54
+ Requires-Dist: psycopg[binary]>=3.3.3 ; extra == 'psycopg'
55
+ Requires-Dist: sqlglot>=28.0.0,<28.1.0 ; extra == 'psycopg'
56
+ Requires-Dist: sqlmodel>=0.0.27 ; extra == 'psycopg'
57
+ Requires-Dist: psycopg2-binary>=2.9.11 ; extra == 'psycopg2'
58
+ Requires-Dist: sqlglot>=28.0.0,<28.1.0 ; extra == 'psycopg2'
59
+ Requires-Dist: sqlmodel>=0.0.27 ; extra == 'psycopg2'
60
+ Requires-Python: ==3.13.*
61
+ Provides-Extra: clickhouse
62
+ Provides-Extra: dagster
63
+ Provides-Extra: dbt
64
+ Provides-Extra: duckdb
65
+ Provides-Extra: loader
66
+ Provides-Extra: peerdb
67
+ Provides-Extra: psycopg
68
+ Provides-Extra: psycopg2
69
+ Description-Content-Type: text/markdown
70
+
71
+ # dw-lib
72
+
73
+ Tools for working with Postgres, ClickHouse, and DuckDB.
74
+
75
+ ## Development: Installation
76
+
77
+ 1. Clone the repo:
78
+
79
+ ```shell
80
+ git clone git@github.com:netbek/dw-lib.git
81
+ ```
82
+
83
+ 2. Install [Docker Engine v23 or higher](https://docs.docker.com/engine/install/) and [Docker Compose v2 or higher](https://docs.docker.com/compose/install/). Follow the links for instructions or run this script:
84
+
85
+ ```shell
86
+ ./scripts/install.sh docker
87
+ ```
88
+
89
+ 3. Install Nix:
90
+
91
+ ```shell
92
+ sh <(curl -L https://nixos.org/nix/install) --daemon
93
+ ```
94
+
95
+ 4. Configure Nix. Edit `/etc/nix/nix.conf` (for a multi-user installation) or `~/.config/nix/nix.conf` (for a single-user installation) to include the following lines:
96
+
97
+ ```shell
98
+ experimental-features = nix-command flakes
99
+ trusted-users = root <USER>
100
+ ```
101
+
102
+ Replace `<USER>` with your username on your computer.
103
+
104
+ 5. Install direnv:
105
+
106
+ ```shell
107
+ sudo apt install direnv
108
+ ```
109
+
110
+ 6. Enable direnv in your shell by adding a line to your shell configuration file.
111
+
112
+ For Bash, edit `~/.bashrc`:
113
+
114
+ ```shell
115
+ eval "$(direnv hook bash)"
116
+ ```
117
+
118
+ 7. Allow `.envrc`:
119
+
120
+ ```shell
121
+ direnv allow
122
+ ```
123
+
124
+ 8. Enter a [PyPI API token](https://pypi.org/manage/account/#api-tokens) as the password in `.pypirc`.
125
+
126
+ ## Development: Usage
127
+
128
+ Build and publish the Python distribution package:
129
+
130
+ ```shell
131
+ make bump-version [major|minor|patch]
132
+ git push
133
+ make build
134
+ git push
135
+ make create-release
136
+ make publish
137
+ ```
138
+
139
+ ## License
140
+
141
+ Copyright (c) 2025 Hein Bekker. Licensed under the GNU Affero General Public License, version 3.
@@ -0,0 +1,71 @@
1
+ # dw-lib
2
+
3
+ Tools for working with Postgres, ClickHouse, and DuckDB.
4
+
5
+ ## Development: Installation
6
+
7
+ 1. Clone the repo:
8
+
9
+ ```shell
10
+ git clone git@github.com:netbek/dw-lib.git
11
+ ```
12
+
13
+ 2. Install [Docker Engine v23 or higher](https://docs.docker.com/engine/install/) and [Docker Compose v2 or higher](https://docs.docker.com/compose/install/). Follow the links for instructions or run this script:
14
+
15
+ ```shell
16
+ ./scripts/install.sh docker
17
+ ```
18
+
19
+ 3. Install Nix:
20
+
21
+ ```shell
22
+ sh <(curl -L https://nixos.org/nix/install) --daemon
23
+ ```
24
+
25
+ 4. Configure Nix. Edit `/etc/nix/nix.conf` (for a multi-user installation) or `~/.config/nix/nix.conf` (for a single-user installation) to include the following lines:
26
+
27
+ ```shell
28
+ experimental-features = nix-command flakes
29
+ trusted-users = root <USER>
30
+ ```
31
+
32
+ Replace `<USER>` with your username on your computer.
33
+
34
+ 5. Install direnv:
35
+
36
+ ```shell
37
+ sudo apt install direnv
38
+ ```
39
+
40
+ 6. Enable direnv in your shell by adding a line to your shell configuration file.
41
+
42
+ For Bash, edit `~/.bashrc`:
43
+
44
+ ```shell
45
+ eval "$(direnv hook bash)"
46
+ ```
47
+
48
+ 7. Allow `.envrc`:
49
+
50
+ ```shell
51
+ direnv allow
52
+ ```
53
+
54
+ 8. Enter a [PyPI API token](https://pypi.org/manage/account/#api-tokens) as the password in `.pypirc`.
55
+
56
+ ## Development: Usage
57
+
58
+ Build and publish the Python distribution package:
59
+
60
+ ```shell
61
+ make bump-version [major|minor|patch]
62
+ git push
63
+ make build
64
+ git push
65
+ make create-release
66
+ make publish
67
+ ```
68
+
69
+ ## License
70
+
71
+ Copyright (c) 2025 Hein Bekker. Licensed under the GNU Affero General Public License, version 3.
@@ -0,0 +1,148 @@
1
+ [project]
2
+ name = "netbek-dw-lib"
3
+ version = "1.32.0"
4
+ description = "Tools for working with Postgres, ClickHouse, and DuckDB."
5
+ readme = "README.md"
6
+ requires-python = "==3.13.*"
7
+
8
+ # sqlglot pinned for compatibility with dagster-dbt https://github.com/dagster-io/dagster/blob/1.12.19/python_modules/libraries/dagster-dbt/pyproject.toml#L33
9
+ dependencies = [
10
+ "jinja2>=3.1.6",
11
+ "lazy-loader>=0.5",
12
+ "packaging>=26.0",
13
+ "psutil>=7.1.3",
14
+ "pydantic>=2.12.4",
15
+ "pydash>=8.0.5",
16
+ "requests>=2.32.5",
17
+ "ruamel-yaml>=0.19.1",
18
+ "sqlalchemy>=2.0.44",
19
+ "sqlglot>=28.0.0,<28.1.0",
20
+ "sqlmodel>=0.0.27",
21
+ "sqlparse>=0.5.3",
22
+ ]
23
+
24
+ [project.optional-dependencies]
25
+ clickhouse = [
26
+ "clickhouse-connect>=0.10.0",
27
+ "clickhouse-sqlalchemy",
28
+ "sqlglot>=28.0.0,<28.1.0",
29
+ "sqlmodel>=0.0.27",
30
+ ]
31
+
32
+ duckdb = [
33
+ "duckdb>=1.4.2",
34
+ "duckdb-engine>=0.17.0",
35
+ "sqlglot>=28.0.0,<28.1.0",
36
+ "sqlmodel>=0.0.27",
37
+ ]
38
+
39
+ psycopg = [
40
+ "psycopg[binary]>=3.3.3",
41
+ "sqlglot>=28.0.0,<28.1.0",
42
+ "sqlmodel>=0.0.27",
43
+ ]
44
+
45
+ psycopg2 = [
46
+ "psycopg2-binary>=2.9.11",
47
+ "sqlglot>=28.0.0,<28.1.0",
48
+ "sqlmodel>=0.0.27",
49
+ ]
50
+
51
+ dagster = [
52
+ "dagster>=1.12.19",
53
+ "dagster-dbt>=0.28.19",
54
+ ]
55
+
56
+ # dbt-core pinned for compatibility with dbt-clickhouse https://github.com/netbek/dbt-clickhouse/blob/v1.10.0-batch/pyproject.toml#L26
57
+ dbt = [
58
+ "clickhouse-connect>=0.10.0",
59
+ "clickhouse-sqlalchemy",
60
+ "dbt-clickhouse",
61
+ "dbt-core>=1.10.0,<1.11",
62
+ "livereload>=2.7.1",
63
+ "opentelemetry-sdk>=1.39.1",
64
+ "polyfactory>=3.0.0",
65
+ "pydantic>=2.12.4",
66
+ "pydash>=8.0.5",
67
+ "ruamel-yaml>=0.19.1",
68
+ "sqlglot>=28.0.0,<28.1.0",
69
+ "sqlmodel>=0.0.27",
70
+ ]
71
+
72
+ loader = [
73
+ "boto3>=1.41.1",
74
+ "chdb>=3.7.2",
75
+ "jinja2>=3.1.6",
76
+ "pydantic>=2.12.4",
77
+ "rich>=14.2.0",
78
+ ]
79
+
80
+ peerdb = [
81
+ "clickhouse-connect>=0.10.0",
82
+ "clickhouse-sqlalchemy",
83
+ "pydantic>=2.12.4",
84
+ "pydash>=8.0.5",
85
+ "requests>=2.32.5",
86
+ "rich>=14.2.0",
87
+ "ruamel-yaml>=0.19.1",
88
+ "sqlalchemy>=2.0.44",
89
+ "sqlglot>=28.0.0",
90
+ "sqlmodel>=0.0.27",
91
+ ]
92
+
93
+ [dependency-groups]
94
+ dev = [
95
+ "docker>=7.1.0",
96
+ "pre-commit>=4.5.1",
97
+ "pytest>=9.0.2",
98
+ "pytest-docker>=3.2.5",
99
+ "ruff>=0.15.0",
100
+ "twine>=6.2.0",
101
+ "ty>=0.0.23",
102
+ ]
103
+
104
+ [build-system]
105
+ requires = ["uv_build>=0.9.17,<0.12.0"]
106
+ build-backend = "uv_build"
107
+
108
+ [tool.isort]
109
+ force_alphabetical_sort = true
110
+ line_length = 100
111
+ profile = "black"
112
+ py_version = 313
113
+
114
+ [tool.ruff]
115
+ line-length = 100
116
+ target-version = "py313"
117
+ extend-exclude = ["./vendor"]
118
+
119
+ [tool.ruff.lint]
120
+ ignore = ["D100", "D101", "D102", "D103", "D104", "D107", "D205"]
121
+
122
+ # [tool.pyright]
123
+ # include = ["src", "tests"]
124
+ # pythonVersion = "3.13"
125
+ # typeCheckingMode = "basic"
126
+ # reportPrivateImportUsage = "none"
127
+
128
+ [tool.ty.rules]
129
+ invalid-method-override = "ignore" # https://github.com/astral-sh/ty/issues/2154
130
+
131
+ [tool.ty.src]
132
+ include = ["src", "tests"]
133
+
134
+ [tool.pytest]
135
+ addopts = ["-x", "--no-header", "--container-scope=module"]
136
+ filterwarnings = [
137
+ "ignore:The 'u' type code is deprecated:DeprecationWarning", # Caused by clickhouse_connect
138
+ "ignore::DeprecationWarning:dbt_common.invocation", # https://github.com/dbt-labs/dbt-core/issues/9791
139
+ "ignore::DeprecationWarning:dbt.cli.options" # https://github.com/dbt-labs/dbt-core/issues/12038
140
+ ]
141
+ markers = ["docker_compose_file", "docker_skip_wait_until_responsive"]
142
+
143
+ [tool.uv.build-backend]
144
+ module-name = "dw_lib"
145
+
146
+ [tool.uv.sources]
147
+ clickhouse-sqlalchemy = { git = "https://github.com/netbek/clickhouse-sqlalchemy.git", rev = "datetime-uuid" }
148
+ dbt-clickhouse = { git = "https://github.com/netbek/dbt-clickhouse.git", rev = "v1.10.0-batch" }
File without changes
@@ -0,0 +1,9 @@
1
+ from .adapters.s3 import S3Adapter
2
+ from .types import S3Settings
3
+ from .utils import s3_to_endpoint_uri
4
+
5
+ __all__ = [
6
+ "s3_to_endpoint_uri",
7
+ "S3Adapter",
8
+ "S3Settings",
9
+ ]
@@ -0,0 +1,49 @@
1
+ from ..types import S3Settings
2
+ from botocore.client import Config
3
+ from typing import Any
4
+
5
+ import boto3
6
+
7
+
8
+ class S3Adapter:
9
+ def __init__(self, settings: S3Settings) -> None:
10
+ self.settings = settings
11
+
12
+ @property
13
+ def url(self) -> str:
14
+ return f"s3://{self.settings.bucket}"
15
+
16
+ def create_client(self):
17
+ if self.settings.use_ssl:
18
+ scheme = "https"
19
+ else:
20
+ scheme = "http"
21
+
22
+ endpoint_url = f"{scheme}://{self.settings.endpoint}"
23
+
24
+ client = boto3.client(
25
+ "s3",
26
+ endpoint_url=endpoint_url,
27
+ aws_access_key_id=self.settings.key_id,
28
+ aws_secret_access_key=self.settings.secret,
29
+ config=Config(signature_version="s3v4"),
30
+ region_name=self.settings.region,
31
+ )
32
+
33
+ return client
34
+
35
+ def can_connect(self) -> bool:
36
+ client = self.create_client()
37
+
38
+ try:
39
+ client.head_bucket(Bucket=self.settings.bucket)
40
+ except Exception:
41
+ return False
42
+
43
+ return True
44
+
45
+ def list_objects(self, prefix: str | None = None) -> list[dict[str, Any]]:
46
+ client = self.create_client()
47
+ response = client.list_objects_v2(Bucket=self.settings.bucket, Prefix=prefix)
48
+
49
+ return response.get("Contents")
@@ -0,0 +1,12 @@
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class S3Settings(BaseModel):
5
+ key_id: str
6
+ secret: str
7
+ region: str
8
+ endpoint: str
9
+ use_ssl: bool
10
+ url_style: str = "path"
11
+ bucket: str
12
+ prefix: str | None = None
@@ -0,0 +1,17 @@
1
+ from urllib.parse import urlparse
2
+
3
+
4
+ def s3_to_endpoint_uri(s3_uri: str, endpoint: str, use_ssl: bool = False) -> str:
5
+ parsed = urlparse(s3_uri)
6
+ if parsed.scheme != "s3":
7
+ raise ValueError(f"Invalid S3 URI: {s3_uri}")
8
+
9
+ if use_ssl:
10
+ scheme = "https"
11
+ else:
12
+ scheme = "http"
13
+
14
+ bucket = parsed.netloc
15
+ path = parsed.path.lstrip("/")
16
+
17
+ return f"{scheme}://{endpoint}/{bucket}/{path}"
@@ -0,0 +1,37 @@
1
+ PYTHON_RESERVED_WORDS = [
2
+ "false",
3
+ "none",
4
+ "true",
5
+ "and",
6
+ "as",
7
+ "assert",
8
+ "async",
9
+ "await",
10
+ "break",
11
+ "class",
12
+ "continue",
13
+ "def",
14
+ "del",
15
+ "elif",
16
+ "else",
17
+ "except",
18
+ "finally",
19
+ "for",
20
+ "from",
21
+ "global",
22
+ "if",
23
+ "import",
24
+ "in",
25
+ "is",
26
+ "lambda",
27
+ "nonlocal",
28
+ "not",
29
+ "or",
30
+ "pass",
31
+ "raise",
32
+ "return",
33
+ "try",
34
+ "while",
35
+ "with",
36
+ "yield",
37
+ ]
File without changes
@@ -0,0 +1,118 @@
1
+ from collections.abc import Mapping, Sequence
2
+ from dagster import (
3
+ AssetsDefinition,
4
+ DagsterRunStatus,
5
+ DefaultScheduleStatus,
6
+ define_asset_job,
7
+ RunConfig,
8
+ RunRequest,
9
+ RunsFilter,
10
+ schedule,
11
+ ScheduleEvaluationContext,
12
+ SkipReason,
13
+ )
14
+ from dagster._core.definitions.target import ExecutableDefinition
15
+ from dagster_dbt.asset_utils import (
16
+ build_dbt_asset_selection,
17
+ DBT_DEFAULT_EXCLUDE,
18
+ DBT_DEFAULT_SELECT,
19
+ DBT_DEFAULT_SELECTOR,
20
+ )
21
+
22
+ # https://github.com/dagster-io/dagster/blob/1.12.19/python_modules/dagster/dagster/_core/storage/dagster_run.py#L110
23
+ NOT_FINISHED_STATUSES = [
24
+ DagsterRunStatus.QUEUED,
25
+ DagsterRunStatus.NOT_STARTED,
26
+ DagsterRunStatus.STARTING,
27
+ DagsterRunStatus.STARTED,
28
+ ]
29
+
30
+
31
+ def build_singleton_schedule(
32
+ job: ExecutableDefinition,
33
+ cron_schedule: str,
34
+ schedule_name: str | None = None,
35
+ tags: Mapping[str, str] | None = None,
36
+ config: RunConfig | None = None,
37
+ execution_timezone: str | None = None,
38
+ default_status: DefaultScheduleStatus = DefaultScheduleStatus.STOPPED,
39
+ ):
40
+ """
41
+ Returns a schedule that only triggers if no other instance of the job is running.
42
+
43
+ Reference: https://docs.dagster.io/guides/operate/managing-concurrency/advanced
44
+ """
45
+ schedule_name = schedule_name or f"{job.name}_schedule"
46
+
47
+ @schedule(
48
+ name=schedule_name,
49
+ job=job,
50
+ cron_schedule=cron_schedule,
51
+ execution_timezone=execution_timezone,
52
+ default_status=default_status,
53
+ )
54
+ def _schedule(context: ScheduleEvaluationContext):
55
+ # Find an unfinished run of the job
56
+ runs = context.instance.get_runs(
57
+ filters=RunsFilter(job_name=job.name, statuses=NOT_FINISHED_STATUSES), limit=1
58
+ )
59
+
60
+ if runs:
61
+ return SkipReason(f"Skipping {job.name} because a run is already in progress.")
62
+
63
+ return RunRequest(run_config=config, tags=tags)
64
+
65
+ return _schedule
66
+
67
+
68
+ def build_singleton_schedule_from_dbt_selection(
69
+ dbt_assets: Sequence[AssetsDefinition],
70
+ job_name: str,
71
+ cron_schedule: str,
72
+ dbt_select: str = DBT_DEFAULT_SELECT,
73
+ dbt_exclude: str | None = DBT_DEFAULT_EXCLUDE,
74
+ dbt_selector: str = DBT_DEFAULT_SELECTOR,
75
+ schedule_name: str | None = None,
76
+ tags: Mapping[str, str] | None = None,
77
+ config: RunConfig | None = None,
78
+ execution_timezone: str | None = None,
79
+ default_status: DefaultScheduleStatus = DefaultScheduleStatus.STOPPED,
80
+ ):
81
+ """
82
+ Returns a schedule for dbt assets that only triggers if no other instance of the job is running.
83
+
84
+ Based on https://github.com/dagster-io/dagster/blob/1.12.19/python_modules/libraries/dagster-dbt/dagster_dbt/asset_utils.py#L353
85
+ """
86
+ selection = build_dbt_asset_selection(
87
+ dbt_assets,
88
+ dbt_select=dbt_select,
89
+ dbt_exclude=dbt_exclude or DBT_DEFAULT_EXCLUDE,
90
+ dbt_selector=dbt_selector,
91
+ )
92
+ job = define_asset_job(
93
+ name=job_name,
94
+ selection=selection,
95
+ config=config,
96
+ tags=tags,
97
+ )
98
+ schedule_name = schedule_name or f"{job_name}_schedule"
99
+
100
+ @schedule(
101
+ name=schedule_name,
102
+ job=job,
103
+ cron_schedule=cron_schedule,
104
+ execution_timezone=execution_timezone,
105
+ default_status=default_status,
106
+ )
107
+ def _schedule(context: ScheduleEvaluationContext):
108
+ # Find an unfinished run of the job
109
+ runs = context.instance.get_runs(
110
+ filters=RunsFilter(job_name=job.name, statuses=NOT_FINISHED_STATUSES), limit=1
111
+ )
112
+
113
+ if runs:
114
+ return SkipReason(f"Skipping {job.name} because a run is already in progress.")
115
+
116
+ return RunRequest(run_config=config, tags=tags)
117
+
118
+ return _schedule
@@ -0,0 +1,40 @@
1
+ from .types import (
2
+ ClickHouseRelation,
3
+ ClickHouseSettings,
4
+ DuckDBRelation,
5
+ DuckDBSettings,
6
+ PostgresRelation,
7
+ PostgresSettings,
8
+ )
9
+ from .utils import render_statement
10
+ from typing import TYPE_CHECKING
11
+
12
+ import lazy_loader as lazy
13
+
14
+ if TYPE_CHECKING:
15
+ from .adapters.clickhouse import ClickHouseAdapter
16
+ from .adapters.duckdb import DuckDBAdapter
17
+ from .adapters.postgres import PostgresAdapter
18
+
19
+ # TODO Replace with lazy keyword in Python 3.15+ https://docs.python.org/3.15/whatsnew/3.15.html#whatsnew315-lazy-imports
20
+ __getattr__, __dir__, _ = lazy.attach(
21
+ __name__,
22
+ submod_attrs={
23
+ "adapters.clickhouse": ["ClickHouseAdapter"],
24
+ "adapters.duckdb": ["DuckDBAdapter"],
25
+ "adapters.postgres": ["PostgresAdapter"],
26
+ },
27
+ )
28
+
29
+ __all__ = [
30
+ "ClickHouseAdapter",
31
+ "ClickHouseRelation",
32
+ "ClickHouseSettings",
33
+ "DuckDBAdapter",
34
+ "DuckDBRelation",
35
+ "DuckDBSettings",
36
+ "PostgresAdapter",
37
+ "PostgresRelation",
38
+ "PostgresSettings",
39
+ "render_statement",
40
+ ]