dlt-iceberg 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dlt_iceberg-0.1.1/.github/workflows/publish.yml +99 -0
- dlt_iceberg-0.1.1/.github/workflows/test.yml +47 -0
- dlt_iceberg-0.1.1/.gitignore +29 -0
- dlt_iceberg-0.1.1/.python-version +1 -0
- dlt_iceberg-0.1.1/LICENSE +21 -0
- dlt_iceberg-0.1.1/PKG-INFO +15 -0
- dlt_iceberg-0.1.1/README.md +247 -0
- dlt_iceberg-0.1.1/TESTING.md +284 -0
- dlt_iceberg-0.1.1/docker-compose.yml +93 -0
- dlt_iceberg-0.1.1/examples/README.md +66 -0
- dlt_iceberg-0.1.1/examples/data/customers_initial.csv +6 -0
- dlt_iceberg-0.1.1/examples/data/customers_updates.csv +5 -0
- dlt_iceberg-0.1.1/examples/data/events_batch1.csv +6 -0
- dlt_iceberg-0.1.1/examples/data/events_batch2.csv +6 -0
- dlt_iceberg-0.1.1/examples/incremental_load.py +95 -0
- dlt_iceberg-0.1.1/examples/merge_load.py +105 -0
- dlt_iceberg-0.1.1/pyproject.toml +36 -0
- dlt_iceberg-0.1.1/src/dlt_iceberg/__init__.py +28 -0
- dlt_iceberg-0.1.1/src/dlt_iceberg/destination.py +400 -0
- dlt_iceberg-0.1.1/src/dlt_iceberg/destination_client.py +606 -0
- dlt_iceberg-0.1.1/src/dlt_iceberg/error_handling.py +224 -0
- dlt_iceberg-0.1.1/src/dlt_iceberg/partition_builder.py +308 -0
- dlt_iceberg-0.1.1/src/dlt_iceberg/schema_casting.py +381 -0
- dlt_iceberg-0.1.1/src/dlt_iceberg/schema_converter.py +207 -0
- dlt_iceberg-0.1.1/src/dlt_iceberg/schema_evolution.py +261 -0
- dlt_iceberg-0.1.1/tests/test_class_based_atomic.py +297 -0
- dlt_iceberg-0.1.1/tests/test_destination_e2e.py +147 -0
- dlt_iceberg-0.1.1/tests/test_destination_rest_catalog.py +681 -0
- dlt_iceberg-0.1.1/tests/test_e2e_sqlite_catalog.py +156 -0
- dlt_iceberg-0.1.1/tests/test_error_handling.py +375 -0
- dlt_iceberg-0.1.1/tests/test_merge_disposition.py +254 -0
- dlt_iceberg-0.1.1/tests/test_partition_builder.py +459 -0
- dlt_iceberg-0.1.1/tests/test_partitioning_e2e.py +303 -0
- dlt_iceberg-0.1.1/tests/test_pyiceberg_append.py +149 -0
- dlt_iceberg-0.1.1/tests/test_schema_casting.py +458 -0
- dlt_iceberg-0.1.1/tests/test_schema_converter.py +103 -0
- dlt_iceberg-0.1.1/tests/test_schema_evolution.py +381 -0
- dlt_iceberg-0.1.1/tests/test_smoke.py +128 -0
- dlt_iceberg-0.1.1/uv.lock +1386 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
version_bump:
|
|
7
|
+
description: 'Version bump type'
|
|
8
|
+
required: true
|
|
9
|
+
type: choice
|
|
10
|
+
options:
|
|
11
|
+
- patch
|
|
12
|
+
- minor
|
|
13
|
+
- major
|
|
14
|
+
|
|
15
|
+
jobs:
|
|
16
|
+
publish:
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
permissions:
|
|
19
|
+
contents: write
|
|
20
|
+
id-token: write # For PyPI trusted publishing
|
|
21
|
+
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
with:
|
|
25
|
+
fetch-depth: 0
|
|
26
|
+
token: ${{ secrets.GITHUB_TOKEN }}
|
|
27
|
+
|
|
28
|
+
- name: Set up Python
|
|
29
|
+
uses: actions/setup-python@v5
|
|
30
|
+
with:
|
|
31
|
+
python-version: '3.11'
|
|
32
|
+
|
|
33
|
+
- name: Install uv
|
|
34
|
+
uses: astral-sh/setup-uv@v3
|
|
35
|
+
|
|
36
|
+
- name: Get current version
|
|
37
|
+
id: current_version
|
|
38
|
+
run: |
|
|
39
|
+
CURRENT_VERSION=$(uv run python -c "import tomllib; print(tomllib.load(open('pyproject.toml', 'rb'))['project']['version'])")
|
|
40
|
+
echo "version=$CURRENT_VERSION" >> $GITHUB_OUTPUT
|
|
41
|
+
echo "Current version: $CURRENT_VERSION"
|
|
42
|
+
|
|
43
|
+
- name: Bump version
|
|
44
|
+
id: bump_version
|
|
45
|
+
run: |
|
|
46
|
+
CURRENT="${{ steps.current_version.outputs.version }}"
|
|
47
|
+
IFS='.' read -r major minor patch <<< "$CURRENT"
|
|
48
|
+
|
|
49
|
+
case "${{ github.event.inputs.version_bump }}" in
|
|
50
|
+
major)
|
|
51
|
+
NEW_VERSION="$((major + 1)).0.0"
|
|
52
|
+
;;
|
|
53
|
+
minor)
|
|
54
|
+
NEW_VERSION="${major}.$((minor + 1)).0"
|
|
55
|
+
;;
|
|
56
|
+
patch)
|
|
57
|
+
NEW_VERSION="${major}.${minor}.$((patch + 1))"
|
|
58
|
+
;;
|
|
59
|
+
esac
|
|
60
|
+
|
|
61
|
+
echo "new_version=$NEW_VERSION" >> $GITHUB_OUTPUT
|
|
62
|
+
echo "Bumping version: $CURRENT -> $NEW_VERSION"
|
|
63
|
+
|
|
64
|
+
- name: Update version in pyproject.toml
|
|
65
|
+
run: |
|
|
66
|
+
sed -i 's/^version = ".*"/version = "${{ steps.bump_version.outputs.new_version }}"/' pyproject.toml
|
|
67
|
+
cat pyproject.toml | grep "^version"
|
|
68
|
+
|
|
69
|
+
- name: Build package
|
|
70
|
+
run: |
|
|
71
|
+
uv build
|
|
72
|
+
|
|
73
|
+
- name: Publish to PyPI
|
|
74
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
75
|
+
with:
|
|
76
|
+
password: ${{ secrets.PYPI_TOKEN }}
|
|
77
|
+
print-hash: true
|
|
78
|
+
|
|
79
|
+
- name: Commit version bump
|
|
80
|
+
run: |
|
|
81
|
+
git config user.name "github-actions[bot]"
|
|
82
|
+
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
83
|
+
git add pyproject.toml
|
|
84
|
+
git commit -m "Bump version to ${{ steps.bump_version.outputs.new_version }}"
|
|
85
|
+
git push
|
|
86
|
+
|
|
87
|
+
- name: Create GitHub Release
|
|
88
|
+
uses: actions/create-release@v1
|
|
89
|
+
env:
|
|
90
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
91
|
+
with:
|
|
92
|
+
tag_name: v${{ steps.bump_version.outputs.new_version }}
|
|
93
|
+
release_name: v${{ steps.bump_version.outputs.new_version }}
|
|
94
|
+
body: |
|
|
95
|
+
Release version ${{ steps.bump_version.outputs.new_version }}
|
|
96
|
+
|
|
97
|
+
Published to PyPI: https://pypi.org/project/dlt-iceberg/${{ steps.bump_version.outputs.new_version }}/
|
|
98
|
+
draft: false
|
|
99
|
+
prerelease: false
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Start Docker services
|
|
17
|
+
run: docker compose up -d
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v4
|
|
21
|
+
with:
|
|
22
|
+
version: "latest"
|
|
23
|
+
|
|
24
|
+
- name: Set up Python
|
|
25
|
+
run: uv python install 3.11
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: uv sync
|
|
29
|
+
|
|
30
|
+
- name: Wait for services
|
|
31
|
+
run: |
|
|
32
|
+
echo "Waiting for services to be healthy..."
|
|
33
|
+
timeout 60 bash -c 'until docker compose ps | grep -q "healthy"; do sleep 2; done'
|
|
34
|
+
echo "Services are healthy"
|
|
35
|
+
|
|
36
|
+
- name: Run unit tests
|
|
37
|
+
run: uv run pytest tests/ -m "not integration" -v
|
|
38
|
+
|
|
39
|
+
- name: Run integration tests
|
|
40
|
+
run: uv run pytest tests/ -m integration -v -s
|
|
41
|
+
|
|
42
|
+
- name: Run all tests
|
|
43
|
+
run: uv run pytest tests/ -v
|
|
44
|
+
|
|
45
|
+
- name: Stop Docker services
|
|
46
|
+
if: always()
|
|
47
|
+
run: docker compose down
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Python-generated files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[oc]
|
|
4
|
+
build/
|
|
5
|
+
dist/
|
|
6
|
+
wheels/
|
|
7
|
+
*.egg-info
|
|
8
|
+
|
|
9
|
+
# Virtual environments
|
|
10
|
+
.venv
|
|
11
|
+
|
|
12
|
+
# dlt
|
|
13
|
+
.dlt/secrets.toml
|
|
14
|
+
.dlt/.sources
|
|
15
|
+
.dlt/pipeline_state/
|
|
16
|
+
*.duckdb
|
|
17
|
+
*.duckdb.wal
|
|
18
|
+
|
|
19
|
+
# IDE
|
|
20
|
+
.vscode/
|
|
21
|
+
.idea/
|
|
22
|
+
*.swp
|
|
23
|
+
|
|
24
|
+
# Testing
|
|
25
|
+
.pytest_cache/
|
|
26
|
+
.coverage
|
|
27
|
+
|
|
28
|
+
# OS
|
|
29
|
+
.DS_Store
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Sidequery
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dlt-iceberg
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: dlt custom destination for Apache Iceberg with REST catalog support
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: boto3>=1.40.50
|
|
8
|
+
Requires-Dist: dlt>=1.17.1
|
|
9
|
+
Requires-Dist: pandas>=2.3.3
|
|
10
|
+
Requires-Dist: pyarrow>=21.0.0
|
|
11
|
+
Requires-Dist: pydantic<2.11
|
|
12
|
+
Requires-Dist: pyiceberg[pyiceberg-core]>=0.10.0
|
|
13
|
+
Requires-Dist: requests>=2.32.5
|
|
14
|
+
Requires-Dist: s3fs>=0.4.2
|
|
15
|
+
Requires-Dist: sqlalchemy>=2.0.44
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
# dlt-iceberg
|
|
2
|
+
|
|
3
|
+
A [dlt](https://dlthub.com/) destination for [Apache Iceberg](https://iceberg.apache.org/) tables using REST catalogs.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Atomic Multi-File Commits**: Multiple parquet files committed as single Iceberg snapshot per table
|
|
8
|
+
- **REST Catalog Support**: Works with Nessie, Polaris, AWS Glue, Unity Catalog
|
|
9
|
+
- **Partitioning**: Full support for Iceberg partition transforms (temporal, bucket, truncate, identity)
|
|
10
|
+
- **Authentication**: OAuth2, Bearer token, AWS SigV4
|
|
11
|
+
- **Write Dispositions**: Append, replace, merge (upsert)
|
|
12
|
+
- **Schema Evolution**: Automatic schema updates when adding columns
|
|
13
|
+
- **Retry Logic**: Exponential backoff for transient failures
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
git clone https://github.com/sidequery/dlt-iceberg.git
|
|
19
|
+
cd dlt-iceberg
|
|
20
|
+
uv sync
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
See [examples/](examples/) directory for working examples.
|
|
26
|
+
|
|
27
|
+
### Incremental Load
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import dlt
|
|
31
|
+
from dlt_iceberg import iceberg_rest
|
|
32
|
+
|
|
33
|
+
@dlt.resource(name="events", write_disposition="append")
|
|
34
|
+
def generate_events():
|
|
35
|
+
yield {"event_id": 1, "value": 100}
|
|
36
|
+
|
|
37
|
+
pipeline = dlt.pipeline(
|
|
38
|
+
pipeline_name="my_pipeline",
|
|
39
|
+
destination=iceberg_rest(
|
|
40
|
+
catalog_uri="http://localhost:19120/iceberg/main",
|
|
41
|
+
namespace="analytics",
|
|
42
|
+
s3_endpoint="http://localhost:9000",
|
|
43
|
+
s3_access_key_id="minioadmin",
|
|
44
|
+
s3_secret_access_key="minioadmin",
|
|
45
|
+
s3_region="us-east-1",
|
|
46
|
+
),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
pipeline.run(generate_events())
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Merge/Upsert
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
@dlt.resource(
|
|
56
|
+
name="users",
|
|
57
|
+
write_disposition="merge",
|
|
58
|
+
primary_key="user_id"
|
|
59
|
+
)
|
|
60
|
+
def generate_users():
|
|
61
|
+
yield {"user_id": 1, "name": "Alice", "status": "active"}
|
|
62
|
+
|
|
63
|
+
pipeline.run(generate_users())
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Configuration
|
|
67
|
+
|
|
68
|
+
### Nessie (Docker)
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
iceberg_rest(
|
|
72
|
+
catalog_uri="http://localhost:19120/iceberg/main",
|
|
73
|
+
namespace="my_namespace",
|
|
74
|
+
s3_endpoint="http://localhost:9000",
|
|
75
|
+
s3_access_key_id="minioadmin",
|
|
76
|
+
s3_secret_access_key="minioadmin",
|
|
77
|
+
s3_region="us-east-1",
|
|
78
|
+
)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Start services: `docker compose up -d`
|
|
82
|
+
|
|
83
|
+
### AWS Glue
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
iceberg_rest(
|
|
87
|
+
catalog_uri="https://glue.us-east-1.amazonaws.com/iceberg",
|
|
88
|
+
warehouse="<account-id>:s3tablescatalog/<bucket>",
|
|
89
|
+
namespace="my_database",
|
|
90
|
+
sigv4_enabled=True,
|
|
91
|
+
signing_region="us-east-1",
|
|
92
|
+
)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
AWS credentials via environment variables.
|
|
96
|
+
|
|
97
|
+
### Polaris
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
iceberg_rest(
|
|
101
|
+
catalog_uri="https://polaris.example.com/api/catalog",
|
|
102
|
+
warehouse="s3://bucket/warehouse",
|
|
103
|
+
namespace="production",
|
|
104
|
+
credential="client-id:client-secret",
|
|
105
|
+
oauth2_server_uri="https://polaris.example.com/api/catalog/v1/oauth/tokens",
|
|
106
|
+
)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Unity Catalog
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
iceberg_rest(
|
|
113
|
+
catalog_uri="https://<workspace>.cloud.databricks.com/api/2.1/unity-catalog/iceberg-rest",
|
|
114
|
+
warehouse="<catalog-name>",
|
|
115
|
+
namespace="<schema-name>",
|
|
116
|
+
token="<databricks-token>",
|
|
117
|
+
)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Partitioning
|
|
121
|
+
|
|
122
|
+
Mark columns for partitioning using dlt column hints:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
@dlt.resource(
|
|
126
|
+
name="events",
|
|
127
|
+
columns={
|
|
128
|
+
"event_date": {
|
|
129
|
+
"data_type": "date",
|
|
130
|
+
"partition": True,
|
|
131
|
+
"partition_transform": "day", # Optional: year, month, day, hour
|
|
132
|
+
},
|
|
133
|
+
"region": {
|
|
134
|
+
"data_type": "text",
|
|
135
|
+
"partition": True, # Uses identity transform for strings
|
|
136
|
+
},
|
|
137
|
+
"user_id": {
|
|
138
|
+
"data_type": "bigint",
|
|
139
|
+
"partition": True,
|
|
140
|
+
"partition_transform": "bucket[10]", # Hash into 10 buckets
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
)
|
|
144
|
+
def events():
|
|
145
|
+
...
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Available Transforms
|
|
149
|
+
|
|
150
|
+
- **Temporal**: `year`, `month`, `day`, `hour` (for timestamp/date columns)
|
|
151
|
+
- **Identity**: No transformation (default for string/integer)
|
|
152
|
+
- **Bucket**: `bucket[N]` - Hash-based partitioning into N buckets
|
|
153
|
+
- **Truncate**: `truncate[N]` - Truncate strings/integers to N width
|
|
154
|
+
|
|
155
|
+
### Default Behavior
|
|
156
|
+
|
|
157
|
+
If `partition_transform` is not specified:
|
|
158
|
+
- Timestamp/date columns default to `month`
|
|
159
|
+
- String/integer columns default to `identity`
|
|
160
|
+
|
|
161
|
+
## Write Dispositions
|
|
162
|
+
|
|
163
|
+
### Append
|
|
164
|
+
```python
|
|
165
|
+
write_disposition="append"
|
|
166
|
+
```
|
|
167
|
+
Adds new data without modifying existing rows.
|
|
168
|
+
|
|
169
|
+
### Replace
|
|
170
|
+
```python
|
|
171
|
+
write_disposition="replace"
|
|
172
|
+
```
|
|
173
|
+
Truncates table and inserts new data.
|
|
174
|
+
|
|
175
|
+
### Merge
|
|
176
|
+
```python
|
|
177
|
+
write_disposition="merge"
|
|
178
|
+
primary_key="user_id"
|
|
179
|
+
```
|
|
180
|
+
Updates existing rows by primary key, inserts new rows.
|
|
181
|
+
|
|
182
|
+
## Development
|
|
183
|
+
|
|
184
|
+
### Run Tests
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
# Start Docker services
|
|
188
|
+
docker compose up -d
|
|
189
|
+
|
|
190
|
+
# Run all tests
|
|
191
|
+
uv run pytest tests/ -v
|
|
192
|
+
|
|
193
|
+
# Run only unit tests
|
|
194
|
+
uv run pytest tests/ -v -m "not integration"
|
|
195
|
+
|
|
196
|
+
# Run only integration tests
|
|
197
|
+
uv run pytest tests/ -v -m integration
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Project Structure
|
|
201
|
+
|
|
202
|
+
```
|
|
203
|
+
dlt-iceberg/
|
|
204
|
+
├── src/dlt_iceberg/
|
|
205
|
+
│ ├── __init__.py # Public API
|
|
206
|
+
│ ├── destination_client.py # Class-based destination (atomic commits)
|
|
207
|
+
│ ├── destination.py # Function-based destination (legacy)
|
|
208
|
+
│ ├── schema_converter.py # dlt → Iceberg schema conversion
|
|
209
|
+
│ ├── schema_casting.py # Arrow table casting
|
|
210
|
+
│ ├── schema_evolution.py # Schema updates
|
|
211
|
+
│ ├── partition_builder.py # Partition specs
|
|
212
|
+
│ └── error_handling.py # Retry logic
|
|
213
|
+
├── tests/
|
|
214
|
+
│ ├── test_destination_rest_catalog.py # Integration tests (Docker)
|
|
215
|
+
│ ├── test_class_based_atomic.py # Atomic commit tests
|
|
216
|
+
│ ├── test_merge_disposition.py
|
|
217
|
+
│ ├── test_schema_evolution.py
|
|
218
|
+
│ └── ...
|
|
219
|
+
├── examples/
|
|
220
|
+
│ ├── incremental_load.py # CSV incremental loading
|
|
221
|
+
│ ├── merge_load.py # CSV merge/upsert
|
|
222
|
+
│ └── data/ # Sample CSV files
|
|
223
|
+
└── docker-compose.yml # Nessie + MinIO for testing
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
## How It Works
|
|
227
|
+
|
|
228
|
+
The class-based destination uses dlt's `JobClientBase` interface to accumulate parquet files during a load and commit them atomically in `complete_load()`:
|
|
229
|
+
|
|
230
|
+
1. dlt extracts data and writes parquet files
|
|
231
|
+
2. Each file is registered in module-level global state
|
|
232
|
+
3. After all files complete, `complete_load()` is called
|
|
233
|
+
4. All files for a table are combined and committed as single Iceberg snapshot
|
|
234
|
+
5. Each table gets one snapshot per load
|
|
235
|
+
|
|
236
|
+
This ensures atomic commits even though dlt creates multiple client instances.
|
|
237
|
+
|
|
238
|
+
## License
|
|
239
|
+
|
|
240
|
+
MIT License - see LICENSE file
|
|
241
|
+
|
|
242
|
+
## Resources
|
|
243
|
+
|
|
244
|
+
- [dlt Documentation](https://dlthub.com/docs)
|
|
245
|
+
- [Apache Iceberg](https://iceberg.apache.org/)
|
|
246
|
+
- [PyIceberg](https://py.iceberg.apache.org/)
|
|
247
|
+
- [Iceberg REST Spec](https://iceberg.apache.org/rest-catalog-spec/)
|