dlt-iceberg 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dlt_iceberg/__init__.py +6 -0
- dlt_iceberg/adapter.py +276 -0
- dlt_iceberg/destination.py +117 -16
- dlt_iceberg/destination_client.py +455 -38
- dlt_iceberg/partition_builder.py +12 -6
- dlt_iceberg/schema_converter.py +4 -1
- dlt_iceberg/sql_client.py +222 -0
- dlt_iceberg-0.2.0.dist-info/METADATA +442 -0
- dlt_iceberg-0.2.0.dist-info/RECORD +14 -0
- {dlt_iceberg-0.1.3.dist-info → dlt_iceberg-0.2.0.dist-info}/WHEEL +1 -1
- dlt_iceberg-0.1.3.dist-info/METADATA +0 -279
- dlt_iceberg-0.1.3.dist-info/RECORD +0 -12
- {dlt_iceberg-0.1.3.dist-info → dlt_iceberg-0.2.0.dist-info}/licenses/LICENSE +0 -0
dlt_iceberg/__init__.py
CHANGED
|
@@ -13,6 +13,9 @@ from .destination import iceberg_rest as iceberg_rest_function_based
|
|
|
13
13
|
# Export the class-based version as the primary destination
|
|
14
14
|
iceberg_rest = iceberg_rest_class_based
|
|
15
15
|
|
|
16
|
+
# Adapter for Iceberg-specific hints
|
|
17
|
+
from .adapter import iceberg_adapter, iceberg_partition, PartitionTransform
|
|
18
|
+
|
|
16
19
|
# Errors
|
|
17
20
|
from .schema_casting import CastingError
|
|
18
21
|
from .schema_evolution import SchemaEvolutionError
|
|
@@ -23,6 +26,9 @@ __all__ = [
|
|
|
23
26
|
"iceberg_rest_function_based",
|
|
24
27
|
"IcebergRestClient",
|
|
25
28
|
"IcebergRestConfiguration",
|
|
29
|
+
"iceberg_adapter",
|
|
30
|
+
"iceberg_partition",
|
|
31
|
+
"PartitionTransform",
|
|
26
32
|
"CastingError",
|
|
27
33
|
"SchemaEvolutionError",
|
|
28
34
|
]
|
dlt_iceberg/adapter.py
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Iceberg adapter for dlt resources.
|
|
3
|
+
|
|
4
|
+
Provides a way to add Iceberg-specific hints to dlt resources, following
|
|
5
|
+
the adapter pattern used by BigQuery, Databricks, and other destinations.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from dlt_iceberg import iceberg_adapter, iceberg_partition
|
|
9
|
+
|
|
10
|
+
@dlt.resource(name="events")
|
|
11
|
+
def my_events():
|
|
12
|
+
yield {"event_date": "2024-01-01", "user_id": 123}
|
|
13
|
+
|
|
14
|
+
# Partition by month on event_date and bucket user_id
|
|
15
|
+
adapted = iceberg_adapter(
|
|
16
|
+
my_events,
|
|
17
|
+
partition=[
|
|
18
|
+
iceberg_partition.month("event_date"),
|
|
19
|
+
iceberg_partition.bucket("user_id", 10),
|
|
20
|
+
]
|
|
21
|
+
)
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
from typing import Any, List, Optional, Union, cast
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True)
|
|
32
|
+
class PartitionTransform:
|
|
33
|
+
"""Represents an Iceberg partition transform for a column.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
column: Column name to partition on
|
|
37
|
+
transform: Transform type (identity, year, month, day, hour, bucket, truncate)
|
|
38
|
+
param: Optional parameter for bucket[N] or truncate[N]
|
|
39
|
+
name: Optional custom name for the partition field
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
column: str
|
|
43
|
+
transform: str
|
|
44
|
+
param: Optional[int] = None
|
|
45
|
+
name: Optional[str] = None
|
|
46
|
+
|
|
47
|
+
def to_hint_value(self) -> str:
|
|
48
|
+
"""Convert to partition_transform hint value."""
|
|
49
|
+
if self.param is not None:
|
|
50
|
+
return f"{self.transform}[{self.param}]"
|
|
51
|
+
return self.transform
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class iceberg_partition:
|
|
55
|
+
"""Factory for Iceberg partition transforms.
|
|
56
|
+
|
|
57
|
+
Provides static methods to create partition specifications:
|
|
58
|
+
|
|
59
|
+
- identity(column, name=None): No transformation, use value as-is
|
|
60
|
+
- year(column, name=None): Extract year from timestamp/date
|
|
61
|
+
- month(column, name=None): Extract year-month from timestamp/date
|
|
62
|
+
- day(column, name=None): Extract date from timestamp/date
|
|
63
|
+
- hour(column, name=None): Extract date-hour from timestamp
|
|
64
|
+
- bucket(num_buckets, column, name=None): Hash partition into n buckets
|
|
65
|
+
- truncate(width, column, name=None): Truncate string/number to width
|
|
66
|
+
|
|
67
|
+
Examples:
|
|
68
|
+
iceberg_partition.month("created_at")
|
|
69
|
+
iceberg_partition.month("created_at", "month_created")
|
|
70
|
+
iceberg_partition.bucket(10, "user_id")
|
|
71
|
+
iceberg_partition.bucket(10, "user_id", "user_bucket")
|
|
72
|
+
iceberg_partition.truncate(4, "email")
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def identity(column: str, name: Optional[str] = None) -> PartitionTransform:
|
|
77
|
+
"""Identity transform - use column value as-is for partitioning.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
column: Column name to partition on
|
|
81
|
+
name: Optional custom name for the partition field
|
|
82
|
+
"""
|
|
83
|
+
return PartitionTransform(column=column, transform="identity", name=name)
|
|
84
|
+
|
|
85
|
+
@staticmethod
|
|
86
|
+
def year(column: str, name: Optional[str] = None) -> PartitionTransform:
|
|
87
|
+
"""Year transform - partition by year extracted from timestamp/date.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
column: Column name to partition on
|
|
91
|
+
name: Optional custom name for the partition field
|
|
92
|
+
"""
|
|
93
|
+
return PartitionTransform(column=column, transform="year", name=name)
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def month(column: str, name: Optional[str] = None) -> PartitionTransform:
|
|
97
|
+
"""Month transform - partition by year-month extracted from timestamp/date.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
column: Column name to partition on
|
|
101
|
+
name: Optional custom name for the partition field
|
|
102
|
+
"""
|
|
103
|
+
return PartitionTransform(column=column, transform="month", name=name)
|
|
104
|
+
|
|
105
|
+
@staticmethod
|
|
106
|
+
def day(column: str, name: Optional[str] = None) -> PartitionTransform:
|
|
107
|
+
"""Day transform - partition by date extracted from timestamp/date.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
column: Column name to partition on
|
|
111
|
+
name: Optional custom name for the partition field
|
|
112
|
+
"""
|
|
113
|
+
return PartitionTransform(column=column, transform="day", name=name)
|
|
114
|
+
|
|
115
|
+
@staticmethod
|
|
116
|
+
def hour(column: str, name: Optional[str] = None) -> PartitionTransform:
|
|
117
|
+
"""Hour transform - partition by date-hour extracted from timestamp.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
column: Column name to partition on
|
|
121
|
+
name: Optional custom name for the partition field
|
|
122
|
+
"""
|
|
123
|
+
return PartitionTransform(column=column, transform="hour", name=name)
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def bucket(num_buckets: int, column: str, name: Optional[str] = None) -> PartitionTransform:
|
|
127
|
+
"""Bucket transform - hash partition into n buckets.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
num_buckets: Number of buckets (must be positive)
|
|
131
|
+
column: Column name to partition on
|
|
132
|
+
name: Optional custom name for the partition field
|
|
133
|
+
|
|
134
|
+
Raises:
|
|
135
|
+
ValueError: If num_buckets is not positive
|
|
136
|
+
"""
|
|
137
|
+
if num_buckets <= 0:
|
|
138
|
+
raise ValueError(f"num_buckets must be positive, got {num_buckets}")
|
|
139
|
+
return PartitionTransform(column=column, transform="bucket", param=num_buckets, name=name)
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def truncate(width: int, column: str, name: Optional[str] = None) -> PartitionTransform:
|
|
143
|
+
"""Truncate transform - truncate string/number to width.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
width: Truncation width (must be positive)
|
|
147
|
+
column: Column name to partition on
|
|
148
|
+
name: Optional custom name for the partition field
|
|
149
|
+
|
|
150
|
+
Raises:
|
|
151
|
+
ValueError: If width is not positive
|
|
152
|
+
"""
|
|
153
|
+
if width <= 0:
|
|
154
|
+
raise ValueError(f"width must be positive, got {width}")
|
|
155
|
+
return PartitionTransform(column=column, transform="truncate", param=width, name=name)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _get_resource_for_adapter(data: Any):
|
|
159
|
+
"""Get or create a DltResource from data.
|
|
160
|
+
|
|
161
|
+
Follows the pattern from dlt.destinations.utils.get_resource_for_adapter.
|
|
162
|
+
"""
|
|
163
|
+
import dlt
|
|
164
|
+
from dlt.extract.resource import DltResource
|
|
165
|
+
from dlt.extract.source import DltSource
|
|
166
|
+
|
|
167
|
+
if isinstance(data, DltResource):
|
|
168
|
+
return data
|
|
169
|
+
|
|
170
|
+
if isinstance(data, DltSource):
|
|
171
|
+
if len(data.selected_resources.keys()) == 1:
|
|
172
|
+
return list(data.selected_resources.values())[0]
|
|
173
|
+
else:
|
|
174
|
+
raise ValueError(
|
|
175
|
+
"You are trying to use iceberg_adapter on a DltSource with "
|
|
176
|
+
"multiple resources. You can only use adapters on: pure data, "
|
|
177
|
+
"a DltResource, or a DltSource with a single DltResource."
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
resource_name = None
|
|
181
|
+
if not hasattr(data, "__name__"):
|
|
182
|
+
logger.info("Setting default resource name to 'content' for adapted resource.")
|
|
183
|
+
resource_name = "content"
|
|
184
|
+
|
|
185
|
+
return cast("DltResource", dlt.resource(data, name=resource_name))
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def iceberg_adapter(
|
|
189
|
+
data: Any,
|
|
190
|
+
partition: Optional[Union[str, PartitionTransform, List[Union[str, PartitionTransform]]]] = None,
|
|
191
|
+
):
|
|
192
|
+
"""
|
|
193
|
+
Apply Iceberg-specific hints to a dlt resource.
|
|
194
|
+
|
|
195
|
+
This adapter prepares data for loading into Iceberg tables by setting
|
|
196
|
+
partition specifications using Iceberg's native transforms.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
data: A dlt resource, source (with single resource), or raw data
|
|
200
|
+
partition: Partition specification(s). Can be:
|
|
201
|
+
- A column name string (uses identity transform)
|
|
202
|
+
- A single PartitionTransform
|
|
203
|
+
- A list of column names and/or PartitionTransform objects
|
|
204
|
+
Use iceberg_partition helpers to create transforms.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
DltResource with Iceberg-specific hints applied
|
|
208
|
+
|
|
209
|
+
Examples:
|
|
210
|
+
# Simple identity partition by column name
|
|
211
|
+
iceberg_adapter(my_resource, partition="region")
|
|
212
|
+
iceberg_adapter(my_resource, partition=["region", "category"])
|
|
213
|
+
|
|
214
|
+
# Single partition column with month transform
|
|
215
|
+
iceberg_adapter(my_resource, partition=iceberg_partition.month("created_at"))
|
|
216
|
+
|
|
217
|
+
# Multiple partition columns with mixed specs
|
|
218
|
+
iceberg_adapter(
|
|
219
|
+
my_resource,
|
|
220
|
+
partition=[
|
|
221
|
+
iceberg_partition.day("event_date"),
|
|
222
|
+
"region", # identity partition
|
|
223
|
+
iceberg_partition.bucket(10, "user_id"),
|
|
224
|
+
]
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Works with raw data too
|
|
228
|
+
data = [{"id": 1, "ts": "2024-01-01"}]
|
|
229
|
+
iceberg_adapter(data, partition=iceberg_partition.month("ts"))
|
|
230
|
+
"""
|
|
231
|
+
resource = _get_resource_for_adapter(data)
|
|
232
|
+
|
|
233
|
+
if partition is None:
|
|
234
|
+
return resource
|
|
235
|
+
|
|
236
|
+
# Normalize to list
|
|
237
|
+
if isinstance(partition, (str, PartitionTransform)):
|
|
238
|
+
partition_list = [partition]
|
|
239
|
+
else:
|
|
240
|
+
partition_list = partition
|
|
241
|
+
|
|
242
|
+
if not partition_list:
|
|
243
|
+
return resource
|
|
244
|
+
|
|
245
|
+
# Convert strings to identity PartitionTransforms
|
|
246
|
+
partitions: List[PartitionTransform] = []
|
|
247
|
+
for p in partition_list:
|
|
248
|
+
if isinstance(p, str):
|
|
249
|
+
partitions.append(iceberg_partition.identity(p))
|
|
250
|
+
else:
|
|
251
|
+
partitions.append(p)
|
|
252
|
+
|
|
253
|
+
# Build column hints for partitioning
|
|
254
|
+
column_hints = {}
|
|
255
|
+
|
|
256
|
+
for p in partitions:
|
|
257
|
+
if p.column not in column_hints:
|
|
258
|
+
column_hints[p.column] = {"name": p.column}
|
|
259
|
+
|
|
260
|
+
# Set partition flag using x-partition (custom hint prefix)
|
|
261
|
+
column_hints[p.column]["x-partition"] = True
|
|
262
|
+
|
|
263
|
+
# Set transform (identity is handled as default in partition_builder)
|
|
264
|
+
if p.transform != "identity":
|
|
265
|
+
column_hints[p.column]["x-partition-transform"] = p.to_hint_value()
|
|
266
|
+
|
|
267
|
+
# Set custom partition field name if provided
|
|
268
|
+
if p.name:
|
|
269
|
+
column_hints[p.column]["x-partition-name"] = p.name
|
|
270
|
+
|
|
271
|
+
# Apply hints to resource
|
|
272
|
+
resource.apply_hints(columns=column_hints)
|
|
273
|
+
|
|
274
|
+
logger.info(f"Applied Iceberg partition hints: {[p.column for p in partitions]}")
|
|
275
|
+
|
|
276
|
+
return resource
|
dlt_iceberg/destination.py
CHANGED
|
@@ -37,6 +37,87 @@ from pyiceberg.io.pyarrow import schema_to_pyarrow
|
|
|
37
37
|
logger = logging.getLogger(__name__)
|
|
38
38
|
|
|
39
39
|
|
|
40
|
+
def _get_merge_strategy(table_schema: TTableSchema) -> str:
|
|
41
|
+
"""Extract merge strategy from table schema.
|
|
42
|
+
|
|
43
|
+
write_disposition can be:
|
|
44
|
+
- "merge" (string) -> use upsert (backward compatible)
|
|
45
|
+
- {"disposition": "merge", "strategy": "delete-insert"} -> explicit strategy
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Merge strategy: "upsert" or "delete-insert"
|
|
49
|
+
"""
|
|
50
|
+
write_disposition = table_schema.get("write_disposition", "append")
|
|
51
|
+
|
|
52
|
+
if isinstance(write_disposition, dict):
|
|
53
|
+
return write_disposition.get("strategy", "delete-insert")
|
|
54
|
+
|
|
55
|
+
# String "merge" - use upsert as our default (backward compatible)
|
|
56
|
+
return "upsert"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _execute_delete_insert(iceberg_table, arrow_table, primary_keys: list, identifier: str):
|
|
60
|
+
"""Execute delete-insert merge strategy.
|
|
61
|
+
|
|
62
|
+
Deletes rows matching primary keys in incoming data, then appends new data.
|
|
63
|
+
Uses PyIceberg transaction for atomic delete + append.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
iceberg_table: PyIceberg table object
|
|
67
|
+
arrow_table: Arrow table with data to merge
|
|
68
|
+
primary_keys: List of primary key column names
|
|
69
|
+
identifier: Table identifier for logging
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Tuple of (rows_deleted_estimate, rows_inserted)
|
|
73
|
+
"""
|
|
74
|
+
from pyiceberg.expressions import In, And, EqualTo, Or
|
|
75
|
+
|
|
76
|
+
# Build delete filter from primary key values in incoming data
|
|
77
|
+
if len(primary_keys) == 1:
|
|
78
|
+
pk_col = primary_keys[0]
|
|
79
|
+
pk_values = arrow_table.column(pk_col).to_pylist()
|
|
80
|
+
unique_pk_values = list(set(pk_values))
|
|
81
|
+
delete_filter = In(pk_col, unique_pk_values)
|
|
82
|
+
deleted_estimate = len(unique_pk_values)
|
|
83
|
+
else:
|
|
84
|
+
# Composite primary key - build OR of AND conditions
|
|
85
|
+
pk_tuples = set()
|
|
86
|
+
for i in range(len(arrow_table)):
|
|
87
|
+
pk_tuple = tuple(
|
|
88
|
+
arrow_table.column(pk).to_pylist()[i] for pk in primary_keys
|
|
89
|
+
)
|
|
90
|
+
pk_tuples.add(pk_tuple)
|
|
91
|
+
|
|
92
|
+
conditions = []
|
|
93
|
+
for pk_tuple in pk_tuples:
|
|
94
|
+
and_conditions = [
|
|
95
|
+
EqualTo(pk, val) for pk, val in zip(primary_keys, pk_tuple)
|
|
96
|
+
]
|
|
97
|
+
if len(and_conditions) == 1:
|
|
98
|
+
conditions.append(and_conditions[0])
|
|
99
|
+
else:
|
|
100
|
+
conditions.append(And(*and_conditions))
|
|
101
|
+
|
|
102
|
+
if len(conditions) == 1:
|
|
103
|
+
delete_filter = conditions[0]
|
|
104
|
+
else:
|
|
105
|
+
delete_filter = Or(*conditions)
|
|
106
|
+
deleted_estimate = len(pk_tuples)
|
|
107
|
+
|
|
108
|
+
logger.info(
|
|
109
|
+
f"Delete-insert for {identifier}: deleting up to {deleted_estimate} "
|
|
110
|
+
f"matching rows, inserting {len(arrow_table)} rows"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Execute atomic delete + append using transaction
|
|
114
|
+
with iceberg_table.transaction() as txn:
|
|
115
|
+
txn.delete(delete_filter)
|
|
116
|
+
txn.append(arrow_table)
|
|
117
|
+
|
|
118
|
+
return (deleted_estimate, len(arrow_table))
|
|
119
|
+
|
|
120
|
+
|
|
40
121
|
def _iceberg_rest_handler(
|
|
41
122
|
items: str, # File path when batch_size=0
|
|
42
123
|
table: TTableSchema,
|
|
@@ -270,13 +351,18 @@ def _iceberg_rest_handler(
|
|
|
270
351
|
raise
|
|
271
352
|
|
|
272
353
|
# Write data based on disposition
|
|
273
|
-
|
|
354
|
+
# Handle both string and dict write_disposition
|
|
355
|
+
disposition_type = write_disposition
|
|
356
|
+
if isinstance(write_disposition, dict):
|
|
357
|
+
disposition_type = write_disposition.get("disposition", "append")
|
|
358
|
+
|
|
359
|
+
if disposition_type == "replace":
|
|
274
360
|
logger.info(f"Overwriting table {identifier}")
|
|
275
361
|
iceberg_table.overwrite(arrow_table)
|
|
276
|
-
elif
|
|
362
|
+
elif disposition_type == "append":
|
|
277
363
|
logger.info(f"Appending to table {identifier}")
|
|
278
364
|
iceberg_table.append(arrow_table)
|
|
279
|
-
elif
|
|
365
|
+
elif disposition_type == "merge":
|
|
280
366
|
# For merge, we need primary keys
|
|
281
367
|
# Try multiple ways to get primary keys from dlt table schema
|
|
282
368
|
primary_keys = table.get("primary_key") or table.get("x-merge-keys")
|
|
@@ -296,21 +382,36 @@ def _iceberg_rest_handler(
|
|
|
296
382
|
)
|
|
297
383
|
iceberg_table.append(arrow_table)
|
|
298
384
|
else:
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
# PyIceberg will automatically match rows based on join_cols (primary keys)
|
|
302
|
-
upsert_result = iceberg_table.upsert(
|
|
303
|
-
df=arrow_table,
|
|
304
|
-
join_cols=primary_keys,
|
|
305
|
-
when_matched_update_all=True,
|
|
306
|
-
when_not_matched_insert_all=True,
|
|
307
|
-
)
|
|
385
|
+
# Get merge strategy
|
|
386
|
+
merge_strategy = _get_merge_strategy(table)
|
|
308
387
|
logger.info(
|
|
309
|
-
f"
|
|
310
|
-
f"{
|
|
388
|
+
f"Merging into table {identifier} on keys {primary_keys} "
|
|
389
|
+
f"using strategy: {merge_strategy}"
|
|
311
390
|
)
|
|
391
|
+
|
|
392
|
+
if merge_strategy == "delete-insert":
|
|
393
|
+
# Atomic delete + insert
|
|
394
|
+
deleted, inserted = _execute_delete_insert(
|
|
395
|
+
iceberg_table, arrow_table, primary_keys, identifier
|
|
396
|
+
)
|
|
397
|
+
logger.info(
|
|
398
|
+
f"Delete-insert completed: ~{deleted} deleted, "
|
|
399
|
+
f"{inserted} inserted"
|
|
400
|
+
)
|
|
401
|
+
else:
|
|
402
|
+
# Default: upsert strategy
|
|
403
|
+
upsert_result = iceberg_table.upsert(
|
|
404
|
+
df=arrow_table,
|
|
405
|
+
join_cols=primary_keys,
|
|
406
|
+
when_matched_update_all=True,
|
|
407
|
+
when_not_matched_insert_all=True,
|
|
408
|
+
)
|
|
409
|
+
logger.info(
|
|
410
|
+
f"Upsert completed: {upsert_result.rows_updated} updated, "
|
|
411
|
+
f"{upsert_result.rows_inserted} inserted"
|
|
412
|
+
)
|
|
312
413
|
else:
|
|
313
|
-
raise ValueError(f"Unknown write disposition: {
|
|
414
|
+
raise ValueError(f"Unknown write disposition: {disposition_type}")
|
|
314
415
|
|
|
315
416
|
logger.info(f"Successfully wrote {len(arrow_table)} rows to {identifier}")
|
|
316
417
|
return # Success
|
|
@@ -391,7 +492,7 @@ def iceberg_rest(**kwargs):
|
|
|
391
492
|
def _raw_capabilities_with_merge():
|
|
392
493
|
"""Add merge support to the destination capabilities."""
|
|
393
494
|
caps = original_raw_capabilities()
|
|
394
|
-
caps.supported_merge_strategies = ["upsert"]
|
|
495
|
+
caps.supported_merge_strategies = ["delete-insert", "upsert"]
|
|
395
496
|
return caps
|
|
396
497
|
|
|
397
498
|
# Bind the new method to the instance
|