netbek-dw-lib 1.32.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- netbek_dw_lib-1.32.0/PKG-INFO +141 -0
- netbek_dw_lib-1.32.0/README.md +71 -0
- netbek_dw_lib-1.32.0/pyproject.toml +148 -0
- netbek_dw_lib-1.32.0/src/dw_lib/__init__.py +0 -0
- netbek_dw_lib-1.32.0/src/dw_lib/cloud/__init__.py +9 -0
- netbek_dw_lib-1.32.0/src/dw_lib/cloud/adapters/__init__.py +0 -0
- netbek_dw_lib-1.32.0/src/dw_lib/cloud/adapters/s3.py +49 -0
- netbek_dw_lib-1.32.0/src/dw_lib/cloud/types.py +12 -0
- netbek_dw_lib-1.32.0/src/dw_lib/cloud/utils.py +17 -0
- netbek_dw_lib-1.32.0/src/dw_lib/constants.py +37 -0
- netbek_dw_lib-1.32.0/src/dw_lib/dagster/__init__.py +0 -0
- netbek_dw_lib-1.32.0/src/dw_lib/dagster/asset_utils.py +118 -0
- netbek_dw_lib-1.32.0/src/dw_lib/database/__init__.py +40 -0
- netbek_dw_lib-1.32.0/src/dw_lib/database/adapters/__init__.py +0 -0
- netbek_dw_lib-1.32.0/src/dw_lib/database/adapters/base.py +402 -0
- netbek_dw_lib-1.32.0/src/dw_lib/database/adapters/clickhouse.py +417 -0
- netbek_dw_lib-1.32.0/src/dw_lib/database/adapters/duckdb.py +215 -0
- netbek_dw_lib-1.32.0/src/dw_lib/database/adapters/postgres.py +642 -0
- netbek_dw_lib-1.32.0/src/dw_lib/database/types.py +290 -0
- netbek_dw_lib-1.32.0/src/dw_lib/database/utils.py +33 -0
- netbek_dw_lib-1.32.0/src/dw_lib/dbt/__init__.py +1172 -0
- netbek_dw_lib-1.32.0/src/dw_lib/dbt/polyfactory/__init__.py +0 -0
- netbek_dw_lib-1.32.0/src/dw_lib/dbt/polyfactory/factories/__init__.py +0 -0
- netbek_dw_lib-1.32.0/src/dw_lib/dbt/polyfactory/factories/sqlmodel_factory.py +39 -0
- netbek_dw_lib-1.32.0/src/dw_lib/dbt/polyfactory/mixins.py +35 -0
- netbek_dw_lib-1.32.0/src/dw_lib/dbt/types.py +167 -0
- netbek_dw_lib-1.32.0/src/dw_lib/dbt/utils.py +238 -0
- netbek_dw_lib-1.32.0/src/dw_lib/exceptions.py +66 -0
- netbek_dw_lib-1.32.0/src/dw_lib/loader.py +302 -0
- netbek_dw_lib-1.32.0/src/dw_lib/peerdb.py +1358 -0
- netbek_dw_lib-1.32.0/src/dw_lib/sqlalchemy/__init__.py +0 -0
- netbek_dw_lib-1.32.0/src/dw_lib/sqlalchemy/clickhouse/__init__.py +0 -0
- netbek_dw_lib-1.32.0/src/dw_lib/sqlalchemy/clickhouse/types.py +94 -0
- netbek_dw_lib-1.32.0/src/dw_lib/types.py +64 -0
- netbek_dw_lib-1.32.0/src/dw_lib/utils/__init__.py +0 -0
- netbek_dw_lib-1.32.0/src/dw_lib/utils/environ.py +12 -0
- netbek_dw_lib-1.32.0/src/dw_lib/utils/filesystem.py +60 -0
- netbek_dw_lib-1.32.0/src/dw_lib/utils/profiling.py +52 -0
- netbek_dw_lib-1.32.0/src/dw_lib/utils/python_utils.py +20 -0
- netbek_dw_lib-1.32.0/src/dw_lib/utils/sqlmodel_utils.py +260 -0
- netbek_dw_lib-1.32.0/src/dw_lib/utils/template.py +22 -0
- netbek_dw_lib-1.32.0/src/dw_lib/utils/typer_utils.py +13 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: netbek-dw-lib
|
|
3
|
+
Version: 1.32.0
|
|
4
|
+
Summary: Tools for working with Postgres, ClickHouse, and DuckDB.
|
|
5
|
+
Requires-Dist: jinja2>=3.1.6
|
|
6
|
+
Requires-Dist: lazy-loader>=0.5
|
|
7
|
+
Requires-Dist: packaging>=26.0
|
|
8
|
+
Requires-Dist: psutil>=7.1.3
|
|
9
|
+
Requires-Dist: pydantic>=2.12.4
|
|
10
|
+
Requires-Dist: pydash>=8.0.5
|
|
11
|
+
Requires-Dist: requests>=2.32.5
|
|
12
|
+
Requires-Dist: ruamel-yaml>=0.19.1
|
|
13
|
+
Requires-Dist: sqlalchemy>=2.0.44
|
|
14
|
+
Requires-Dist: sqlglot>=28.0.0,<28.1.0
|
|
15
|
+
Requires-Dist: sqlmodel>=0.0.27
|
|
16
|
+
Requires-Dist: sqlparse>=0.5.3
|
|
17
|
+
Requires-Dist: clickhouse-connect>=0.10.0 ; extra == 'clickhouse'
|
|
18
|
+
Requires-Dist: clickhouse-sqlalchemy ; extra == 'clickhouse'
|
|
19
|
+
Requires-Dist: sqlglot>=28.0.0,<28.1.0 ; extra == 'clickhouse'
|
|
20
|
+
Requires-Dist: sqlmodel>=0.0.27 ; extra == 'clickhouse'
|
|
21
|
+
Requires-Dist: dagster>=1.12.19 ; extra == 'dagster'
|
|
22
|
+
Requires-Dist: dagster-dbt>=0.28.19 ; extra == 'dagster'
|
|
23
|
+
Requires-Dist: clickhouse-connect>=0.10.0 ; extra == 'dbt'
|
|
24
|
+
Requires-Dist: clickhouse-sqlalchemy ; extra == 'dbt'
|
|
25
|
+
Requires-Dist: dbt-clickhouse ; extra == 'dbt'
|
|
26
|
+
Requires-Dist: dbt-core>=1.10.0,<1.11 ; extra == 'dbt'
|
|
27
|
+
Requires-Dist: livereload>=2.7.1 ; extra == 'dbt'
|
|
28
|
+
Requires-Dist: opentelemetry-sdk>=1.39.1 ; extra == 'dbt'
|
|
29
|
+
Requires-Dist: polyfactory>=3.0.0 ; extra == 'dbt'
|
|
30
|
+
Requires-Dist: pydantic>=2.12.4 ; extra == 'dbt'
|
|
31
|
+
Requires-Dist: pydash>=8.0.5 ; extra == 'dbt'
|
|
32
|
+
Requires-Dist: ruamel-yaml>=0.19.1 ; extra == 'dbt'
|
|
33
|
+
Requires-Dist: sqlglot>=28.0.0,<28.1.0 ; extra == 'dbt'
|
|
34
|
+
Requires-Dist: sqlmodel>=0.0.27 ; extra == 'dbt'
|
|
35
|
+
Requires-Dist: duckdb>=1.4.2 ; extra == 'duckdb'
|
|
36
|
+
Requires-Dist: duckdb-engine>=0.17.0 ; extra == 'duckdb'
|
|
37
|
+
Requires-Dist: sqlglot>=28.0.0,<28.1.0 ; extra == 'duckdb'
|
|
38
|
+
Requires-Dist: sqlmodel>=0.0.27 ; extra == 'duckdb'
|
|
39
|
+
Requires-Dist: boto3>=1.41.1 ; extra == 'loader'
|
|
40
|
+
Requires-Dist: chdb>=3.7.2 ; extra == 'loader'
|
|
41
|
+
Requires-Dist: jinja2>=3.1.6 ; extra == 'loader'
|
|
42
|
+
Requires-Dist: pydantic>=2.12.4 ; extra == 'loader'
|
|
43
|
+
Requires-Dist: rich>=14.2.0 ; extra == 'loader'
|
|
44
|
+
Requires-Dist: clickhouse-connect>=0.10.0 ; extra == 'peerdb'
|
|
45
|
+
Requires-Dist: clickhouse-sqlalchemy ; extra == 'peerdb'
|
|
46
|
+
Requires-Dist: pydantic>=2.12.4 ; extra == 'peerdb'
|
|
47
|
+
Requires-Dist: pydash>=8.0.5 ; extra == 'peerdb'
|
|
48
|
+
Requires-Dist: requests>=2.32.5 ; extra == 'peerdb'
|
|
49
|
+
Requires-Dist: rich>=14.2.0 ; extra == 'peerdb'
|
|
50
|
+
Requires-Dist: ruamel-yaml>=0.19.1 ; extra == 'peerdb'
|
|
51
|
+
Requires-Dist: sqlalchemy>=2.0.44 ; extra == 'peerdb'
|
|
52
|
+
Requires-Dist: sqlglot>=28.0.0 ; extra == 'peerdb'
|
|
53
|
+
Requires-Dist: sqlmodel>=0.0.27 ; extra == 'peerdb'
|
|
54
|
+
Requires-Dist: psycopg[binary]>=3.3.3 ; extra == 'psycopg'
|
|
55
|
+
Requires-Dist: sqlglot>=28.0.0,<28.1.0 ; extra == 'psycopg'
|
|
56
|
+
Requires-Dist: sqlmodel>=0.0.27 ; extra == 'psycopg'
|
|
57
|
+
Requires-Dist: psycopg2-binary>=2.9.11 ; extra == 'psycopg2'
|
|
58
|
+
Requires-Dist: sqlglot>=28.0.0,<28.1.0 ; extra == 'psycopg2'
|
|
59
|
+
Requires-Dist: sqlmodel>=0.0.27 ; extra == 'psycopg2'
|
|
60
|
+
Requires-Python: ==3.13.*
|
|
61
|
+
Provides-Extra: clickhouse
|
|
62
|
+
Provides-Extra: dagster
|
|
63
|
+
Provides-Extra: dbt
|
|
64
|
+
Provides-Extra: duckdb
|
|
65
|
+
Provides-Extra: loader
|
|
66
|
+
Provides-Extra: peerdb
|
|
67
|
+
Provides-Extra: psycopg
|
|
68
|
+
Provides-Extra: psycopg2
|
|
69
|
+
Description-Content-Type: text/markdown
|
|
70
|
+
|
|
71
|
+
# dw-lib
|
|
72
|
+
|
|
73
|
+
Tools for working with Postgres, ClickHouse, and DuckDB.
|
|
74
|
+
|
|
75
|
+
## Development: Installation
|
|
76
|
+
|
|
77
|
+
1. Clone the repo:
|
|
78
|
+
|
|
79
|
+
```shell
|
|
80
|
+
git clone git@github.com:netbek/dw-lib.git
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
2. Install [Docker Engine v23 or higher](https://docs.docker.com/engine/install/) and [Docker Compose v2 or higher](https://docs.docker.com/compose/install/). Follow the links for instructions or run this script:
|
|
84
|
+
|
|
85
|
+
```shell
|
|
86
|
+
./scripts/install.sh docker
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
3. Install Nix:
|
|
90
|
+
|
|
91
|
+
```shell
|
|
92
|
+
sh <(curl -L https://nixos.org/nix/install) --daemon
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
4. Configure Nix. Edit `/etc/nix/nix.conf` (for a multi-user installation) or `~/.config/nix/nix.conf` (for a single-user installation) to include the following lines:
|
|
96
|
+
|
|
97
|
+
```shell
|
|
98
|
+
experimental-features = nix-command flakes
|
|
99
|
+
trusted-users = root <USER>
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Replace `<USER>` with your username on your computer.
|
|
103
|
+
|
|
104
|
+
5. Install direnv:
|
|
105
|
+
|
|
106
|
+
```shell
|
|
107
|
+
sudo apt install direnv
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
6. Enable direnv in your shell by adding a line to your shell configuration file.
|
|
111
|
+
|
|
112
|
+
For Bash, edit `~/.bashrc`:
|
|
113
|
+
|
|
114
|
+
```shell
|
|
115
|
+
eval "$(direnv hook bash)"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
7. Allow `.envrc`:
|
|
119
|
+
|
|
120
|
+
```shell
|
|
121
|
+
direnv allow
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
8. Enter a [PyPI API token](https://pypi.org/manage/account/#api-tokens) as the password in `.pypirc`.
|
|
125
|
+
|
|
126
|
+
## Development: Usage
|
|
127
|
+
|
|
128
|
+
Build and publish the Python distribution package:
|
|
129
|
+
|
|
130
|
+
```shell
|
|
131
|
+
make bump-version [major|minor|patch]
|
|
132
|
+
git push
|
|
133
|
+
make build
|
|
134
|
+
git push
|
|
135
|
+
make create-release
|
|
136
|
+
make publish
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
Copyright (c) 2025 Hein Bekker. Licensed under the GNU Affero General Public License, version 3.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# dw-lib
|
|
2
|
+
|
|
3
|
+
Tools for working with Postgres, ClickHouse, and DuckDB.
|
|
4
|
+
|
|
5
|
+
## Development: Installation
|
|
6
|
+
|
|
7
|
+
1. Clone the repo:
|
|
8
|
+
|
|
9
|
+
```shell
|
|
10
|
+
git clone git@github.com:netbek/dw-lib.git
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
2. Install [Docker Engine v23 or higher](https://docs.docker.com/engine/install/) and [Docker Compose v2 or higher](https://docs.docker.com/compose/install/). Follow the links for instructions or run this script:
|
|
14
|
+
|
|
15
|
+
```shell
|
|
16
|
+
./scripts/install.sh docker
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
3. Install Nix:
|
|
20
|
+
|
|
21
|
+
```shell
|
|
22
|
+
sh <(curl -L https://nixos.org/nix/install) --daemon
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
4. Configure Nix. Edit `/etc/nix/nix.conf` (for a multi-user installation) or `~/.config/nix/nix.conf` (for a single-user installation) to include the following lines:
|
|
26
|
+
|
|
27
|
+
```shell
|
|
28
|
+
experimental-features = nix-command flakes
|
|
29
|
+
trusted-users = root <USER>
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Replace `<USER>` with your username on your computer.
|
|
33
|
+
|
|
34
|
+
5. Install direnv:
|
|
35
|
+
|
|
36
|
+
```shell
|
|
37
|
+
sudo apt install direnv
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
6. Enable direnv in your shell by adding a line to your shell configuration file.
|
|
41
|
+
|
|
42
|
+
For Bash, edit `~/.bashrc`:
|
|
43
|
+
|
|
44
|
+
```shell
|
|
45
|
+
eval "$(direnv hook bash)"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
7. Allow `.envrc`:
|
|
49
|
+
|
|
50
|
+
```shell
|
|
51
|
+
direnv allow
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
8. Enter a [PyPI API token](https://pypi.org/manage/account/#api-tokens) as the password in `.pypirc`.
|
|
55
|
+
|
|
56
|
+
## Development: Usage
|
|
57
|
+
|
|
58
|
+
Build and publish the Python distribution package:
|
|
59
|
+
|
|
60
|
+
```shell
|
|
61
|
+
make bump-version [major|minor|patch]
|
|
62
|
+
git push
|
|
63
|
+
make build
|
|
64
|
+
git push
|
|
65
|
+
make create-release
|
|
66
|
+
make publish
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## License
|
|
70
|
+
|
|
71
|
+
Copyright (c) 2025 Hein Bekker. Licensed under the GNU Affero General Public License, version 3.
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "netbek-dw-lib"
|
|
3
|
+
version = "1.32.0"
|
|
4
|
+
description = "Tools for working with Postgres, ClickHouse, and DuckDB."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = "==3.13.*"
|
|
7
|
+
|
|
8
|
+
# sqlglot pinned for compatibility with dagster-dbt https://github.com/dagster-io/dagster/blob/1.12.19/python_modules/libraries/dagster-dbt/pyproject.toml#L33
|
|
9
|
+
dependencies = [
|
|
10
|
+
"jinja2>=3.1.6",
|
|
11
|
+
"lazy-loader>=0.5",
|
|
12
|
+
"packaging>=26.0",
|
|
13
|
+
"psutil>=7.1.3",
|
|
14
|
+
"pydantic>=2.12.4",
|
|
15
|
+
"pydash>=8.0.5",
|
|
16
|
+
"requests>=2.32.5",
|
|
17
|
+
"ruamel-yaml>=0.19.1",
|
|
18
|
+
"sqlalchemy>=2.0.44",
|
|
19
|
+
"sqlglot>=28.0.0,<28.1.0",
|
|
20
|
+
"sqlmodel>=0.0.27",
|
|
21
|
+
"sqlparse>=0.5.3",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.optional-dependencies]
|
|
25
|
+
clickhouse = [
|
|
26
|
+
"clickhouse-connect>=0.10.0",
|
|
27
|
+
"clickhouse-sqlalchemy",
|
|
28
|
+
"sqlglot>=28.0.0,<28.1.0",
|
|
29
|
+
"sqlmodel>=0.0.27",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
duckdb = [
|
|
33
|
+
"duckdb>=1.4.2",
|
|
34
|
+
"duckdb-engine>=0.17.0",
|
|
35
|
+
"sqlglot>=28.0.0,<28.1.0",
|
|
36
|
+
"sqlmodel>=0.0.27",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
psycopg = [
|
|
40
|
+
"psycopg[binary]>=3.3.3",
|
|
41
|
+
"sqlglot>=28.0.0,<28.1.0",
|
|
42
|
+
"sqlmodel>=0.0.27",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
psycopg2 = [
|
|
46
|
+
"psycopg2-binary>=2.9.11",
|
|
47
|
+
"sqlglot>=28.0.0,<28.1.0",
|
|
48
|
+
"sqlmodel>=0.0.27",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
dagster = [
|
|
52
|
+
"dagster>=1.12.19",
|
|
53
|
+
"dagster-dbt>=0.28.19",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
# dbt-core pinned for compatibility with dbt-clickhouse https://github.com/netbek/dbt-clickhouse/blob/v1.10.0-batch/pyproject.toml#L26
|
|
57
|
+
dbt = [
|
|
58
|
+
"clickhouse-connect>=0.10.0",
|
|
59
|
+
"clickhouse-sqlalchemy",
|
|
60
|
+
"dbt-clickhouse",
|
|
61
|
+
"dbt-core>=1.10.0,<1.11",
|
|
62
|
+
"livereload>=2.7.1",
|
|
63
|
+
"opentelemetry-sdk>=1.39.1",
|
|
64
|
+
"polyfactory>=3.0.0",
|
|
65
|
+
"pydantic>=2.12.4",
|
|
66
|
+
"pydash>=8.0.5",
|
|
67
|
+
"ruamel-yaml>=0.19.1",
|
|
68
|
+
"sqlglot>=28.0.0,<28.1.0",
|
|
69
|
+
"sqlmodel>=0.0.27",
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
loader = [
|
|
73
|
+
"boto3>=1.41.1",
|
|
74
|
+
"chdb>=3.7.2",
|
|
75
|
+
"jinja2>=3.1.6",
|
|
76
|
+
"pydantic>=2.12.4",
|
|
77
|
+
"rich>=14.2.0",
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
peerdb = [
|
|
81
|
+
"clickhouse-connect>=0.10.0",
|
|
82
|
+
"clickhouse-sqlalchemy",
|
|
83
|
+
"pydantic>=2.12.4",
|
|
84
|
+
"pydash>=8.0.5",
|
|
85
|
+
"requests>=2.32.5",
|
|
86
|
+
"rich>=14.2.0",
|
|
87
|
+
"ruamel-yaml>=0.19.1",
|
|
88
|
+
"sqlalchemy>=2.0.44",
|
|
89
|
+
"sqlglot>=28.0.0",
|
|
90
|
+
"sqlmodel>=0.0.27",
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
[dependency-groups]
|
|
94
|
+
dev = [
|
|
95
|
+
"docker>=7.1.0",
|
|
96
|
+
"pre-commit>=4.5.1",
|
|
97
|
+
"pytest>=9.0.2",
|
|
98
|
+
"pytest-docker>=3.2.5",
|
|
99
|
+
"ruff>=0.15.0",
|
|
100
|
+
"twine>=6.2.0",
|
|
101
|
+
"ty>=0.0.23",
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
[build-system]
|
|
105
|
+
requires = ["uv_build>=0.9.17,<0.12.0"]
|
|
106
|
+
build-backend = "uv_build"
|
|
107
|
+
|
|
108
|
+
[tool.isort]
|
|
109
|
+
force_alphabetical_sort = true
|
|
110
|
+
line_length = 100
|
|
111
|
+
profile = "black"
|
|
112
|
+
py_version = 313
|
|
113
|
+
|
|
114
|
+
[tool.ruff]
|
|
115
|
+
line-length = 100
|
|
116
|
+
target-version = "py313"
|
|
117
|
+
extend-exclude = ["./vendor"]
|
|
118
|
+
|
|
119
|
+
[tool.ruff.lint]
|
|
120
|
+
ignore = ["D100", "D101", "D102", "D103", "D104", "D107", "D205"]
|
|
121
|
+
|
|
122
|
+
# [tool.pyright]
|
|
123
|
+
# include = ["src", "tests"]
|
|
124
|
+
# pythonVersion = "3.13"
|
|
125
|
+
# typeCheckingMode = "basic"
|
|
126
|
+
# reportPrivateImportUsage = "none"
|
|
127
|
+
|
|
128
|
+
[tool.ty.rules]
|
|
129
|
+
invalid-method-override = "ignore" # https://github.com/astral-sh/ty/issues/2154
|
|
130
|
+
|
|
131
|
+
[tool.ty.src]
|
|
132
|
+
include = ["src", "tests"]
|
|
133
|
+
|
|
134
|
+
[tool.pytest]
|
|
135
|
+
addopts = ["-x", "--no-header", "--container-scope=module"]
|
|
136
|
+
filterwarnings = [
|
|
137
|
+
"ignore:The 'u' type code is deprecated:DeprecationWarning", # Caused by clickhouse_connect
|
|
138
|
+
"ignore::DeprecationWarning:dbt_common.invocation", # https://github.com/dbt-labs/dbt-core/issues/9791
|
|
139
|
+
"ignore::DeprecationWarning:dbt.cli.options" # https://github.com/dbt-labs/dbt-core/issues/12038
|
|
140
|
+
]
|
|
141
|
+
markers = ["docker_compose_file", "docker_skip_wait_until_responsive"]
|
|
142
|
+
|
|
143
|
+
[tool.uv.build-backend]
|
|
144
|
+
module-name = "dw_lib"
|
|
145
|
+
|
|
146
|
+
[tool.uv.sources]
|
|
147
|
+
clickhouse-sqlalchemy = { git = "https://github.com/netbek/clickhouse-sqlalchemy.git", rev = "datetime-uuid" }
|
|
148
|
+
dbt-clickhouse = { git = "https://github.com/netbek/dbt-clickhouse.git", rev = "v1.10.0-batch" }
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from ..types import S3Settings
|
|
2
|
+
from botocore.client import Config
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import boto3
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class S3Adapter:
|
|
9
|
+
def __init__(self, settings: S3Settings) -> None:
|
|
10
|
+
self.settings = settings
|
|
11
|
+
|
|
12
|
+
@property
|
|
13
|
+
def url(self) -> str:
|
|
14
|
+
return f"s3://{self.settings.bucket}"
|
|
15
|
+
|
|
16
|
+
def create_client(self):
|
|
17
|
+
if self.settings.use_ssl:
|
|
18
|
+
scheme = "https"
|
|
19
|
+
else:
|
|
20
|
+
scheme = "http"
|
|
21
|
+
|
|
22
|
+
endpoint_url = f"{scheme}://{self.settings.endpoint}"
|
|
23
|
+
|
|
24
|
+
client = boto3.client(
|
|
25
|
+
"s3",
|
|
26
|
+
endpoint_url=endpoint_url,
|
|
27
|
+
aws_access_key_id=self.settings.key_id,
|
|
28
|
+
aws_secret_access_key=self.settings.secret,
|
|
29
|
+
config=Config(signature_version="s3v4"),
|
|
30
|
+
region_name=self.settings.region,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
return client
|
|
34
|
+
|
|
35
|
+
def can_connect(self) -> bool:
|
|
36
|
+
client = self.create_client()
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
client.head_bucket(Bucket=self.settings.bucket)
|
|
40
|
+
except Exception:
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
return True
|
|
44
|
+
|
|
45
|
+
def list_objects(self, prefix: str | None = None) -> list[dict[str, Any]]:
|
|
46
|
+
client = self.create_client()
|
|
47
|
+
response = client.list_objects_v2(Bucket=self.settings.bucket, Prefix=prefix)
|
|
48
|
+
|
|
49
|
+
return response.get("Contents")
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from urllib.parse import urlparse
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def s3_to_endpoint_uri(s3_uri: str, endpoint: str, use_ssl: bool = False) -> str:
|
|
5
|
+
parsed = urlparse(s3_uri)
|
|
6
|
+
if parsed.scheme != "s3":
|
|
7
|
+
raise ValueError(f"Invalid S3 URI: {s3_uri}")
|
|
8
|
+
|
|
9
|
+
if use_ssl:
|
|
10
|
+
scheme = "https"
|
|
11
|
+
else:
|
|
12
|
+
scheme = "http"
|
|
13
|
+
|
|
14
|
+
bucket = parsed.netloc
|
|
15
|
+
path = parsed.path.lstrip("/")
|
|
16
|
+
|
|
17
|
+
return f"{scheme}://{endpoint}/{bucket}/{path}"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
PYTHON_RESERVED_WORDS = [
|
|
2
|
+
"false",
|
|
3
|
+
"none",
|
|
4
|
+
"true",
|
|
5
|
+
"and",
|
|
6
|
+
"as",
|
|
7
|
+
"assert",
|
|
8
|
+
"async",
|
|
9
|
+
"await",
|
|
10
|
+
"break",
|
|
11
|
+
"class",
|
|
12
|
+
"continue",
|
|
13
|
+
"def",
|
|
14
|
+
"del",
|
|
15
|
+
"elif",
|
|
16
|
+
"else",
|
|
17
|
+
"except",
|
|
18
|
+
"finally",
|
|
19
|
+
"for",
|
|
20
|
+
"from",
|
|
21
|
+
"global",
|
|
22
|
+
"if",
|
|
23
|
+
"import",
|
|
24
|
+
"in",
|
|
25
|
+
"is",
|
|
26
|
+
"lambda",
|
|
27
|
+
"nonlocal",
|
|
28
|
+
"not",
|
|
29
|
+
"or",
|
|
30
|
+
"pass",
|
|
31
|
+
"raise",
|
|
32
|
+
"return",
|
|
33
|
+
"try",
|
|
34
|
+
"while",
|
|
35
|
+
"with",
|
|
36
|
+
"yield",
|
|
37
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from collections.abc import Mapping, Sequence
|
|
2
|
+
from dagster import (
|
|
3
|
+
AssetsDefinition,
|
|
4
|
+
DagsterRunStatus,
|
|
5
|
+
DefaultScheduleStatus,
|
|
6
|
+
define_asset_job,
|
|
7
|
+
RunConfig,
|
|
8
|
+
RunRequest,
|
|
9
|
+
RunsFilter,
|
|
10
|
+
schedule,
|
|
11
|
+
ScheduleEvaluationContext,
|
|
12
|
+
SkipReason,
|
|
13
|
+
)
|
|
14
|
+
from dagster._core.definitions.target import ExecutableDefinition
|
|
15
|
+
from dagster_dbt.asset_utils import (
|
|
16
|
+
build_dbt_asset_selection,
|
|
17
|
+
DBT_DEFAULT_EXCLUDE,
|
|
18
|
+
DBT_DEFAULT_SELECT,
|
|
19
|
+
DBT_DEFAULT_SELECTOR,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# https://github.com/dagster-io/dagster/blob/1.12.19/python_modules/dagster/dagster/_core/storage/dagster_run.py#L110
|
|
23
|
+
NOT_FINISHED_STATUSES = [
|
|
24
|
+
DagsterRunStatus.QUEUED,
|
|
25
|
+
DagsterRunStatus.NOT_STARTED,
|
|
26
|
+
DagsterRunStatus.STARTING,
|
|
27
|
+
DagsterRunStatus.STARTED,
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def build_singleton_schedule(
|
|
32
|
+
job: ExecutableDefinition,
|
|
33
|
+
cron_schedule: str,
|
|
34
|
+
schedule_name: str | None = None,
|
|
35
|
+
tags: Mapping[str, str] | None = None,
|
|
36
|
+
config: RunConfig | None = None,
|
|
37
|
+
execution_timezone: str | None = None,
|
|
38
|
+
default_status: DefaultScheduleStatus = DefaultScheduleStatus.STOPPED,
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
Returns a schedule that only triggers if no other instance of the job is running.
|
|
42
|
+
|
|
43
|
+
Reference: https://docs.dagster.io/guides/operate/managing-concurrency/advanced
|
|
44
|
+
"""
|
|
45
|
+
schedule_name = schedule_name or f"{job.name}_schedule"
|
|
46
|
+
|
|
47
|
+
@schedule(
|
|
48
|
+
name=schedule_name,
|
|
49
|
+
job=job,
|
|
50
|
+
cron_schedule=cron_schedule,
|
|
51
|
+
execution_timezone=execution_timezone,
|
|
52
|
+
default_status=default_status,
|
|
53
|
+
)
|
|
54
|
+
def _schedule(context: ScheduleEvaluationContext):
|
|
55
|
+
# Find an unfinished run of the job
|
|
56
|
+
runs = context.instance.get_runs(
|
|
57
|
+
filters=RunsFilter(job_name=job.name, statuses=NOT_FINISHED_STATUSES), limit=1
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
if runs:
|
|
61
|
+
return SkipReason(f"Skipping {job.name} because a run is already in progress.")
|
|
62
|
+
|
|
63
|
+
return RunRequest(run_config=config, tags=tags)
|
|
64
|
+
|
|
65
|
+
return _schedule
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def build_singleton_schedule_from_dbt_selection(
|
|
69
|
+
dbt_assets: Sequence[AssetsDefinition],
|
|
70
|
+
job_name: str,
|
|
71
|
+
cron_schedule: str,
|
|
72
|
+
dbt_select: str = DBT_DEFAULT_SELECT,
|
|
73
|
+
dbt_exclude: str | None = DBT_DEFAULT_EXCLUDE,
|
|
74
|
+
dbt_selector: str = DBT_DEFAULT_SELECTOR,
|
|
75
|
+
schedule_name: str | None = None,
|
|
76
|
+
tags: Mapping[str, str] | None = None,
|
|
77
|
+
config: RunConfig | None = None,
|
|
78
|
+
execution_timezone: str | None = None,
|
|
79
|
+
default_status: DefaultScheduleStatus = DefaultScheduleStatus.STOPPED,
|
|
80
|
+
):
|
|
81
|
+
"""
|
|
82
|
+
Returns a schedule for dbt assets that only triggers if no other instance of the job is running.
|
|
83
|
+
|
|
84
|
+
Based on https://github.com/dagster-io/dagster/blob/1.12.19/python_modules/libraries/dagster-dbt/dagster_dbt/asset_utils.py#L353
|
|
85
|
+
"""
|
|
86
|
+
selection = build_dbt_asset_selection(
|
|
87
|
+
dbt_assets,
|
|
88
|
+
dbt_select=dbt_select,
|
|
89
|
+
dbt_exclude=dbt_exclude or DBT_DEFAULT_EXCLUDE,
|
|
90
|
+
dbt_selector=dbt_selector,
|
|
91
|
+
)
|
|
92
|
+
job = define_asset_job(
|
|
93
|
+
name=job_name,
|
|
94
|
+
selection=selection,
|
|
95
|
+
config=config,
|
|
96
|
+
tags=tags,
|
|
97
|
+
)
|
|
98
|
+
schedule_name = schedule_name or f"{job_name}_schedule"
|
|
99
|
+
|
|
100
|
+
@schedule(
|
|
101
|
+
name=schedule_name,
|
|
102
|
+
job=job,
|
|
103
|
+
cron_schedule=cron_schedule,
|
|
104
|
+
execution_timezone=execution_timezone,
|
|
105
|
+
default_status=default_status,
|
|
106
|
+
)
|
|
107
|
+
def _schedule(context: ScheduleEvaluationContext):
|
|
108
|
+
# Find an unfinished run of the job
|
|
109
|
+
runs = context.instance.get_runs(
|
|
110
|
+
filters=RunsFilter(job_name=job.name, statuses=NOT_FINISHED_STATUSES), limit=1
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
if runs:
|
|
114
|
+
return SkipReason(f"Skipping {job.name} because a run is already in progress.")
|
|
115
|
+
|
|
116
|
+
return RunRequest(run_config=config, tags=tags)
|
|
117
|
+
|
|
118
|
+
return _schedule
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from .types import (
|
|
2
|
+
ClickHouseRelation,
|
|
3
|
+
ClickHouseSettings,
|
|
4
|
+
DuckDBRelation,
|
|
5
|
+
DuckDBSettings,
|
|
6
|
+
PostgresRelation,
|
|
7
|
+
PostgresSettings,
|
|
8
|
+
)
|
|
9
|
+
from .utils import render_statement
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
import lazy_loader as lazy
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from .adapters.clickhouse import ClickHouseAdapter
|
|
16
|
+
from .adapters.duckdb import DuckDBAdapter
|
|
17
|
+
from .adapters.postgres import PostgresAdapter
|
|
18
|
+
|
|
19
|
+
# TODO Replace with lazy keyword in Python 3.15+ https://docs.python.org/3.15/whatsnew/3.15.html#whatsnew315-lazy-imports
|
|
20
|
+
__getattr__, __dir__, _ = lazy.attach(
|
|
21
|
+
__name__,
|
|
22
|
+
submod_attrs={
|
|
23
|
+
"adapters.clickhouse": ["ClickHouseAdapter"],
|
|
24
|
+
"adapters.duckdb": ["DuckDBAdapter"],
|
|
25
|
+
"adapters.postgres": ["PostgresAdapter"],
|
|
26
|
+
},
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"ClickHouseAdapter",
|
|
31
|
+
"ClickHouseRelation",
|
|
32
|
+
"ClickHouseSettings",
|
|
33
|
+
"DuckDBAdapter",
|
|
34
|
+
"DuckDBRelation",
|
|
35
|
+
"DuckDBSettings",
|
|
36
|
+
"PostgresAdapter",
|
|
37
|
+
"PostgresRelation",
|
|
38
|
+
"PostgresSettings",
|
|
39
|
+
"render_statement",
|
|
40
|
+
]
|
|
File without changes
|