ducklake 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +51 -5
- data/lib/ducklake/client.rb +108 -4
- data/lib/ducklake/version.rb +1 -1
- data/lib/ducklake.rb +5 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98460f8bbd24007057521ad2749899aebd4e689c0183ab98551e7c0e9d7b8ffe
|
4
|
+
data.tar.gz: 726ea12c95079c99999b9415fb915e05818548bcab38647413eee44018c2a7f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 772924fb6262d3a3373eb2e92431b55dc73f81869e2aaf1a92746831c1d784f669a9e82a03a8c4d2538bda7c7980e940cd29f65c068ca996ea71a55c19d37dbd
|
7
|
+
data.tar.gz: c57170ee010ea949d4f8c250f3ff4fcc05a3153b1e25d9412ba12cd53e4ef7e866f088e2956886948757e0586f4005087e50c89fff1295404ce4c01baae79ce1
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,15 @@
|
|
1
|
+
## 0.1.3 (2025-09-23)
|
2
|
+
|
3
|
+
- Added `current_snapshot` and `last_committed_snapshot` methods
|
4
|
+
- Added `rewrite_data_files` and `delete_orphaned_files` methods
|
5
|
+
- Added `commit_message` and `commit_author` options to `transaction` method
|
6
|
+
- Added `migrate_if_required` option
|
7
|
+
- Added experimental support for encryption
|
8
|
+
|
9
|
+
## 0.1.2 (2025-08-23)
|
10
|
+
|
11
|
+
- Added `transaction` method
|
12
|
+
|
1
13
|
## 0.1.1 (2025-08-18)
|
2
14
|
|
3
15
|
- Added experimental support for Polars
|
data/README.md
CHANGED
@@ -154,7 +154,7 @@ Note: This transfers ownership to the data lake, so the file may be deleted as p
|
|
154
154
|
Update data
|
155
155
|
|
156
156
|
```ruby
|
157
|
-
ducklake.sql("UPDATE events SET name = ? WHERE id =
|
157
|
+
ducklake.sql("UPDATE events SET name = ? WHERE id = ?", ["Test", 1])
|
158
158
|
```
|
159
159
|
|
160
160
|
Delete data
|
@@ -163,12 +163,46 @@ Delete data
|
|
163
163
|
ducklake.sql("DELETE * FROM events WHERE id = ?", [1])
|
164
164
|
```
|
165
165
|
|
166
|
+
Run multiple statements in a transaction
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
ducklake.transaction do
|
170
|
+
# ...
|
171
|
+
end
|
172
|
+
```
|
173
|
+
|
174
|
+
Raise `DuckLake::Rollback` to rollback
|
175
|
+
|
176
|
+
## Schema Changes
|
177
|
+
|
166
178
|
Update the schema
|
167
179
|
|
168
180
|
```ruby
|
169
181
|
ducklake.sql("ALTER TABLE events ADD COLUMN active BOOLEAN")
|
170
182
|
```
|
171
183
|
|
184
|
+
Set or remove a [partitioning key](https://ducklake.select/docs/stable/duckdb/advanced_features/partitioning)
|
185
|
+
|
186
|
+
```ruby
|
187
|
+
ducklake.sql("ALTER TABLE events SET PARTITIONED BY (name)")
|
188
|
+
# or
|
189
|
+
ducklake.sql("ALTER TABLE events RESET PARTITIONED BY")
|
190
|
+
```
|
191
|
+
|
192
|
+
## Views
|
193
|
+
|
194
|
+
Create a view
|
195
|
+
|
196
|
+
```ruby
|
197
|
+
ducklake.sql("CREATE VIEW events_view AS SELECT * FROM events")
|
198
|
+
```
|
199
|
+
|
200
|
+
Drop a view
|
201
|
+
|
202
|
+
```ruby
|
203
|
+
ducklake.sql("DROP VIEW events_view")
|
204
|
+
```
|
205
|
+
|
172
206
|
## Snapshots
|
173
207
|
|
174
208
|
Get snapshots
|
@@ -181,7 +215,7 @@ Query the data at a specific snapshot version or time
|
|
181
215
|
|
182
216
|
```ruby
|
183
217
|
ducklake.sql("SELECT * FROM events AT (VERSION => ?)", [3])
|
184
|
-
#
|
218
|
+
# or
|
185
219
|
ducklake.sql("SELECT * FROM events AT (TIMESTAMP => ?)", [Date.today - 7])
|
186
220
|
```
|
187
221
|
|
@@ -235,9 +269,9 @@ ducklake.set_option("parquet_compression", "zstd", table_name: "events")
|
|
235
269
|
|
236
270
|
## Read-Only Mode
|
237
271
|
|
238
|
-
Note: This feature is experimental
|
272
|
+
Note: This feature is experimental and does not prevent the DuckDB engine from writing files via `sql`
|
239
273
|
|
240
|
-
|
274
|
+
Attach the catalog in read-only mode
|
241
275
|
|
242
276
|
```ruby
|
243
277
|
DuckLake::Client.new(read_only: true, ...)
|
@@ -292,9 +326,19 @@ quoted_file = ducklake.quote("path/to/data.csv")
|
|
292
326
|
ducklake.sql("COPY #{quoted_table} FROM #{quoted_file}")
|
293
327
|
```
|
294
328
|
|
329
|
+
## Encryption
|
330
|
+
|
331
|
+
Note: This feature is unreleased and must be set when creating the catalog
|
332
|
+
|
333
|
+
Encrypt Parquet files
|
334
|
+
|
335
|
+
```ruby
|
336
|
+
DuckLake::Client.new(encryption: true, ...)
|
337
|
+
```
|
338
|
+
|
295
339
|
## Polars
|
296
340
|
|
297
|
-
Note: This feature is experimental and does not work on tables with schema changes
|
341
|
+
Note: This feature is experimental and does not currently work on tables with schema changes
|
298
342
|
|
299
343
|
Query the data with [Ruby Polars](https://github.com/ankane/ruby-polars)
|
300
344
|
|
@@ -368,10 +412,12 @@ bundle install
|
|
368
412
|
|
369
413
|
# Postgres
|
370
414
|
createdb ducklake_ruby_test
|
415
|
+
createdb ducklake_ruby_test2
|
371
416
|
bundle exec rake test:postgres
|
372
417
|
|
373
418
|
# MySQL and MariaDB
|
374
419
|
mysqladmin create ducklake_ruby_test
|
420
|
+
mysqladmin create ducklake_ruby_test2
|
375
421
|
bundle exec rake test:mysql
|
376
422
|
|
377
423
|
# SQLite
|
data/lib/ducklake/client.rb
CHANGED
@@ -8,7 +8,10 @@ module DuckLake
|
|
8
8
|
snapshot_time: nil,
|
9
9
|
data_inlining_row_limit: 0,
|
10
10
|
create_if_not_exists: false,
|
11
|
-
|
11
|
+
migrate_if_required: true, # TODO make false in 0.2.0
|
12
|
+
read_only: false, # experimental
|
13
|
+
override_storage_url: false, # experimental
|
14
|
+
encrypted: false # experimental
|
12
15
|
)
|
13
16
|
catalog_uri = URI.parse(catalog_url)
|
14
17
|
storage_uri = URI.parse(storage_url)
|
@@ -58,10 +61,13 @@ module DuckLake
|
|
58
61
|
|
59
62
|
attach_options = {data_path: storage_url}
|
60
63
|
attach_options[:read_only] = true if read_only
|
64
|
+
attach_options[:encrypted] = 1 if encrypted
|
61
65
|
attach_options[:snapshot_version] = snapshot_version if !snapshot_version.nil?
|
62
66
|
attach_options[:snapshot_time] = snapshot_time if !snapshot_time.nil?
|
63
67
|
attach_options[:data_inlining_row_limit] = data_inlining_row_limit if data_inlining_row_limit > 0
|
64
68
|
attach_options[:create_if_not_exists] = false unless create_if_not_exists
|
69
|
+
attach_options[:migrate_if_required] = false unless migrate_if_required
|
70
|
+
attach_options[:override_data_path] = true if override_storage_url
|
65
71
|
|
66
72
|
@catalog = "ducklake"
|
67
73
|
@storage_url = storage_url
|
@@ -106,6 +112,18 @@ module DuckLake
|
|
106
112
|
execute(sql, params)
|
107
113
|
end
|
108
114
|
|
115
|
+
def transaction(commit_message: nil, commit_author: nil)
|
116
|
+
execute("BEGIN")
|
117
|
+
begin
|
118
|
+
yield
|
119
|
+
set_commit_message(commit_message, commit_author) if commit_message || commit_author
|
120
|
+
execute("COMMIT")
|
121
|
+
rescue => e
|
122
|
+
execute("ROLLBACK")
|
123
|
+
raise e unless e.is_a?(Rollback)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
109
127
|
def attach(alias_, url)
|
110
128
|
type = nil
|
111
129
|
extension = nil
|
@@ -151,6 +169,15 @@ module DuckLake
|
|
151
169
|
symbolize_keys result
|
152
170
|
end
|
153
171
|
|
172
|
+
# experimental
|
173
|
+
# TODO use keyword arguments or range?
|
174
|
+
def table_changes(table, start_snapshot, end_snapshot)
|
175
|
+
params = [@catalog, "main", table, start_snapshot, end_snapshot]
|
176
|
+
result = execute("SELECT * FROM ducklake_table_changes(?, ?, ?, ?, ?)", params)
|
177
|
+
# only return changes between snapshots
|
178
|
+
symbolize_keys result.reject { |v| v["snapshot_id"] == start_snapshot }
|
179
|
+
end
|
180
|
+
|
154
181
|
# TODO more DDL methods?
|
155
182
|
def drop_table(table, if_exists: nil)
|
156
183
|
execute("DROP TABLE#{" IF EXISTS" if if_exists} #{quote_identifier(table)}")
|
@@ -162,6 +189,14 @@ module DuckLake
|
|
162
189
|
symbolize_keys execute("SELECT * FROM ducklake_snapshots(?)", [@catalog])
|
163
190
|
end
|
164
191
|
|
192
|
+
def current_snapshot
|
193
|
+
execute("SELECT * FROM ducklake_current_snapshot(?)", [@catalog]).rows[0][0]
|
194
|
+
end
|
195
|
+
|
196
|
+
def last_committed_snapshot
|
197
|
+
execute("SELECT * FROM ducklake_last_committed_snapshot(?)", [@catalog]).rows[0][0]
|
198
|
+
end
|
199
|
+
|
165
200
|
# https://ducklake.select/docs/stable/duckdb/usage/configuration
|
166
201
|
def options
|
167
202
|
symbolize_keys execute("SELECT * FROM ducklake_options(?)", [@catalog])
|
@@ -237,6 +272,55 @@ module DuckLake
|
|
237
272
|
symbolize_keys execute("CALL ducklake_cleanup_old_files(#{args.join(", ")})", params)
|
238
273
|
end
|
239
274
|
|
275
|
+
# https://ducklake.select/docs/stable/duckdb/maintenance/cleanup_of_files#cleanup-of-orphaned-files
|
276
|
+
def delete_orphaned_files(cleanup_all: false, older_than: nil, dry_run: false)
|
277
|
+
args = ["?"]
|
278
|
+
params = [@catalog]
|
279
|
+
|
280
|
+
if cleanup_all
|
281
|
+
args << "cleanup_all => ?"
|
282
|
+
params << cleanup_all
|
283
|
+
end
|
284
|
+
|
285
|
+
if !older_than.nil?
|
286
|
+
args << "older_than => ?"
|
287
|
+
params << older_than
|
288
|
+
end
|
289
|
+
|
290
|
+
if dry_run
|
291
|
+
args << "dry_run => ?"
|
292
|
+
params << dry_run
|
293
|
+
end
|
294
|
+
|
295
|
+
symbolize_keys execute("CALL ducklake_delete_orphaned_files(#{args.join(", ")})", params)
|
296
|
+
end
|
297
|
+
|
298
|
+
# https://ducklake.select/docs/stable/duckdb/maintenance/rewrite_data_files
|
299
|
+
def rewrite_data_files(table = nil, delete_threshold: nil)
|
300
|
+
args = ["?"]
|
301
|
+
params = [@catalog]
|
302
|
+
|
303
|
+
if !table.nil?
|
304
|
+
args << "?"
|
305
|
+
params << table
|
306
|
+
end
|
307
|
+
|
308
|
+
if !delete_threshold.nil?
|
309
|
+
args << "delete_threshold => ?"
|
310
|
+
params << delete_threshold
|
311
|
+
end
|
312
|
+
|
313
|
+
execute("CALL ducklake_rewrite_data_files(#{args.join(", ")})", params)
|
314
|
+
nil
|
315
|
+
end
|
316
|
+
|
317
|
+
# experimental
|
318
|
+
# https://ducklake.select/docs/stable/duckdb/maintenance/checkpoint
|
319
|
+
def checkpoint
|
320
|
+
execute("CHECKPOINT")
|
321
|
+
nil
|
322
|
+
end
|
323
|
+
|
240
324
|
# https://ducklake.select/docs/stable/duckdb/advanced_features/data_inlining
|
241
325
|
def flush_inlined_data(table_name: nil)
|
242
326
|
args = ["?"]
|
@@ -289,15 +373,26 @@ module DuckLake
|
|
289
373
|
end
|
290
374
|
|
291
375
|
# experimental
|
292
|
-
# TODO support schema changes
|
293
376
|
def polars(table, snapshot_version: nil, snapshot_time: nil)
|
294
377
|
files = list_files(table, snapshot_version:, snapshot_time:)
|
295
378
|
sources = files.map { |v| v[:data_file] }
|
379
|
+
# TODO support schema changes
|
380
|
+
# column_mapping = [
|
381
|
+
# "iceberg-column-mapping",
|
382
|
+
# nil
|
383
|
+
# ]
|
296
384
|
deletion_files = [
|
297
385
|
"iceberg-position-delete",
|
298
386
|
files.map.with_index.select { |v, i| v[:delete_file] }.to_h { |v, i| [i, [v[:delete_file]]] }
|
299
387
|
]
|
300
|
-
Polars.scan_parquet(
|
388
|
+
Polars.scan_parquet(
|
389
|
+
sources,
|
390
|
+
storage_options: polars_storage_options,
|
391
|
+
# allow_missing_columns: true,
|
392
|
+
# extra_columns: "ignore",
|
393
|
+
# _column_mapping: column_mapping,
|
394
|
+
_deletion_files: deletion_files
|
395
|
+
)
|
301
396
|
end
|
302
397
|
|
303
398
|
# libduckdb does not provide function
|
@@ -366,11 +461,15 @@ module DuckLake
|
|
366
461
|
|
367
462
|
def error_mapping
|
368
463
|
@error_mapping ||= {
|
464
|
+
"Binder Error: " => BinderError,
|
369
465
|
"Catalog Error: " => CatalogError,
|
370
466
|
"Conversion Error: " => ConversionError,
|
467
|
+
"Invalid Configuration Error: " => InvalidConfigurationError,
|
371
468
|
"Invalid Input Error: " => InvalidInputError,
|
372
469
|
"IO Error: " => IOError,
|
373
|
-
"
|
470
|
+
"Not implemented Error: " => NotImplementedError,
|
471
|
+
"Permission Error: " => PermissionError,
|
472
|
+
"TransactionContext Error: " => TransactionContextError
|
374
473
|
}
|
375
474
|
end
|
376
475
|
|
@@ -409,6 +508,11 @@ module DuckLake
|
|
409
508
|
name
|
410
509
|
end
|
411
510
|
|
511
|
+
def set_commit_message(message, author)
|
512
|
+
execute("CALL ducklake_set_commit_message(?, ?, ?)", [@catalog, author, message])
|
513
|
+
nil
|
514
|
+
end
|
515
|
+
|
412
516
|
def symbolize_keys(result)
|
413
517
|
result.map { |v| v.transform_keys(&:to_sym) }
|
414
518
|
end
|
data/lib/ducklake/version.rb
CHANGED
data/lib/ducklake.rb
CHANGED
@@ -11,9 +11,14 @@ require_relative "ducklake/version"
|
|
11
11
|
|
12
12
|
module DuckLake
|
13
13
|
class Error < StandardError; end
|
14
|
+
class BinderError < Error; end
|
14
15
|
class CatalogError < Error; end
|
15
16
|
class ConversionError < Error; end
|
17
|
+
class InvalidConfigurationError < Error; end
|
16
18
|
class InvalidInputError < Error; end
|
17
19
|
class IOError < Error; end
|
20
|
+
class NotImplementedError < Error; end
|
18
21
|
class PermissionError < Error; end
|
22
|
+
class Rollback < Error; end
|
23
|
+
class TransactionContextError < Error; end
|
19
24
|
end
|