dlt-iceberg 0.1.4__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dlt-iceberg might be problematic. Click here for more details.
- dlt_iceberg-0.1.5/.claude/settings.local.json +9 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/PKG-INFO +2 -2
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/README.md +1 -1
- dlt_iceberg-0.1.5/examples/usgs_load.log +88 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/pyproject.toml +1 -1
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/src/dlt_iceberg/destination_client.py +1 -1
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/uv.lock +1 -1
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/.github/workflows/publish.yml +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/.github/workflows/test.yml +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/.gitignore +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/.python-version +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/LICENSE +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/TESTING.md +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/docker-compose.yml +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/examples/README.md +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/examples/data/customers_initial.csv +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/examples/data/customers_updates.csv +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/examples/data/events_batch1.csv +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/examples/data/events_batch2.csv +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/examples/incremental_load.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/examples/merge_load.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/examples/usgs_earthquakes.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/src/dlt_iceberg/__init__.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/src/dlt_iceberg/destination.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/src/dlt_iceberg/error_handling.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/src/dlt_iceberg/partition_builder.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/src/dlt_iceberg/schema_casting.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/src/dlt_iceberg/schema_converter.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/src/dlt_iceberg/schema_evolution.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_class_based_atomic.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_destination_e2e.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_destination_rest_catalog.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_e2e_sqlite_catalog.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_error_handling.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_merge_disposition.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_partition_builder.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_partitioning_e2e.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_pyiceberg_append.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_schema_casting.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_schema_converter.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_schema_evolution.py +0 -0
- {dlt_iceberg-0.1.4 → dlt_iceberg-0.1.5}/tests/test_smoke.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dlt-iceberg
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: dlt destination for Apache Iceberg with atomic multi-file commits via REST catalogs
|
|
5
5
|
Project-URL: Homepage, https://github.com/sidequery/dlt-iceberg
|
|
6
6
|
Project-URL: Repository, https://github.com/sidequery/dlt-iceberg
|
|
@@ -127,7 +127,7 @@ iceberg_rest(
|
|
|
127
127
|
# Performance tuning
|
|
128
128
|
max_retries=5, # Retry attempts for transient failures
|
|
129
129
|
retry_backoff_base=2.0, # Exponential backoff multiplier
|
|
130
|
-
merge_batch_size=
|
|
130
|
+
merge_batch_size=500000, # Rows per batch for merge operations
|
|
131
131
|
strict_casting=False, # Fail on potential data loss
|
|
132
132
|
)
|
|
133
133
|
```
|
|
@@ -95,7 +95,7 @@ iceberg_rest(
|
|
|
95
95
|
# Performance tuning
|
|
96
96
|
max_retries=5, # Retry attempts for transient failures
|
|
97
97
|
retry_backoff_base=2.0, # Exponential backoff multiplier
|
|
98
|
-
merge_batch_size=
|
|
98
|
+
merge_batch_size=500000, # Rows per batch for merge operations
|
|
99
99
|
strict_casting=False, # Fail on potential data loss
|
|
100
100
|
)
|
|
101
101
|
```
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
Reading inline script metadata from `usgs_earthquakes.py`
|
|
2
|
+
Installed 60 packages in 340ms
|
|
3
|
+
Loading 190 months of earthquake data from 2010-01-01 to 2025-10-12...
|
|
4
|
+
|
|
5
|
+
Fetching earthquakes from 2010-01-01 to 2010-02-01...
|
|
6
|
+
Retrieved 9923 earthquakes
|
|
7
|
+
Traceback (most recent call last):
|
|
8
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/extract/pipe_iterator.py", line 274, in _get_source_item
|
|
9
|
+
pipe_item = next(gen)
|
|
10
|
+
File "/Users/nico/Code/sidequery-dlt/examples/usgs_earthquakes.py", line 107, in fetch_earthquakes
|
|
11
|
+
"time": datetime.fromtimestamp(props["time"] / 1000) if props.get("time") else None,
|
|
12
|
+
^^^^^^^^
|
|
13
|
+
UnboundLocalError: cannot access local variable 'datetime' where it is not associated with a value
|
|
14
|
+
|
|
15
|
+
The above exception was the direct cause of the following exception:
|
|
16
|
+
|
|
17
|
+
Traceback (most recent call last):
|
|
18
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/pipeline/pipeline.py", line 476, in extract
|
|
19
|
+
self._extract_source(
|
|
20
|
+
~~~~~~~~~~~~~~~~~~~~^
|
|
21
|
+
extract_step,
|
|
22
|
+
^^^^^^^^^^^^^
|
|
23
|
+
...<3 lines>...
|
|
24
|
+
refresh=refresh or self.refresh,
|
|
25
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
26
|
+
)
|
|
27
|
+
^
|
|
28
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/pipeline/pipeline.py", line 1308, in _extract_source
|
|
29
|
+
load_id = extract.extract(
|
|
30
|
+
source, max_parallel_items, workers, load_package_state_update=load_package_state_update
|
|
31
|
+
)
|
|
32
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/extract/extract.py", line 473, in extract
|
|
33
|
+
self._extract_single_source(
|
|
34
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~^
|
|
35
|
+
load_id,
|
|
36
|
+
^^^^^^^^
|
|
37
|
+
...<2 lines>...
|
|
38
|
+
workers=workers,
|
|
39
|
+
^^^^^^^^^^^^^^^^
|
|
40
|
+
)
|
|
41
|
+
^
|
|
42
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/extract/extract.py", line 397, in _extract_single_source
|
|
43
|
+
for pipe_item in pipes:
|
|
44
|
+
^^^^^
|
|
45
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/extract/pipe_iterator.py", line 158, in __next__
|
|
46
|
+
pipe_item = self._get_source_item()
|
|
47
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/extract/pipe_iterator.py", line 304, in _get_source_item
|
|
48
|
+
raise ResourceExtractionError(pipe.name, gen, str(ex), "generator") from ex
|
|
49
|
+
dlt.extract.exceptions.ResourceExtractionError: In processing pipe `earthquakes`: extraction of resource `earthquakes` in `generator` `fetch_earthquakes` caused an exception: cannot access local variable 'datetime' where it is not associated with a value
|
|
50
|
+
|
|
51
|
+
The above exception was the direct cause of the following exception:
|
|
52
|
+
|
|
53
|
+
Traceback (most recent call last):
|
|
54
|
+
File "/Users/nico/Code/sidequery-dlt/examples/usgs_earthquakes.py", line 234, in <module>
|
|
55
|
+
main()
|
|
56
|
+
~~~~^^
|
|
57
|
+
File "/Users/nico/Code/sidequery-dlt/examples/usgs_earthquakes.py", line 183, in main
|
|
58
|
+
load_info = pipeline.run(earthquakes_batch())
|
|
59
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/pipeline/pipeline.py", line 223, in _wrap
|
|
60
|
+
step_info = f(self, *args, **kwargs)
|
|
61
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/pipeline/pipeline.py", line 272, in _wrap
|
|
62
|
+
return f(self, *args, **kwargs)
|
|
63
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/pipeline/pipeline.py", line 724, in run
|
|
64
|
+
self.extract(
|
|
65
|
+
~~~~~~~~~~~~^
|
|
66
|
+
data,
|
|
67
|
+
^^^^^
|
|
68
|
+
...<8 lines>...
|
|
69
|
+
loader_file_format=loader_file_format,
|
|
70
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
71
|
+
)
|
|
72
|
+
^
|
|
73
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/pipeline/pipeline.py", line 223, in _wrap
|
|
74
|
+
step_info = f(self, *args, **kwargs)
|
|
75
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/pipeline/pipeline.py", line 177, in _wrap
|
|
76
|
+
rv = f(self, *args, **kwargs)
|
|
77
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/pipeline/pipeline.py", line 163, in _wrap
|
|
78
|
+
return f(self, *args, **kwargs)
|
|
79
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/pipeline/pipeline.py", line 272, in _wrap
|
|
80
|
+
return f(self, *args, **kwargs)
|
|
81
|
+
File "/Users/nico/.cache/uv/archive-v0/6bPdJD5TjlWUrQz91CSnF/lib/python3.13/site-packages/dlt/pipeline/pipeline.py", line 497, in extract
|
|
82
|
+
raise PipelineStepFailed(
|
|
83
|
+
...<5 lines>...
|
|
84
|
+
) from exc
|
|
85
|
+
dlt.pipeline.exceptions.PipelineStepFailed: Pipeline execution failed at `step=extract` when processing package with `load_id=1760337324.066329` with exception:
|
|
86
|
+
|
|
87
|
+
<class 'dlt.extract.exceptions.ResourceExtractionError'>
|
|
88
|
+
In processing pipe `earthquakes`: extraction of resource `earthquakes` in `generator` `fetch_earthquakes` caused an exception: cannot access local variable 'datetime' where it is not associated with a value
|
|
@@ -94,7 +94,7 @@ class IcebergRestConfiguration(DestinationClientConfiguration):
|
|
|
94
94
|
strict_casting: bool = False
|
|
95
95
|
|
|
96
96
|
# Merge batch size (for upsert operations to avoid memory issues)
|
|
97
|
-
merge_batch_size: int =
|
|
97
|
+
merge_batch_size: int = 500000
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
class IcebergRestLoadJob(RunnableLoadJob):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|