ducklake 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 451f6059faf0e7c451599b218bb9dafbbb8b80a2b54416833a5dafb85a0d1a9d
4
- data.tar.gz: a1c6c7825da3dd5a077705c1c3f106a0cc1d75bc2e60b17ccb5099f98a3bdf1a
3
+ metadata.gz: 98460f8bbd24007057521ad2749899aebd4e689c0183ab98551e7c0e9d7b8ffe
4
+ data.tar.gz: 726ea12c95079c99999b9415fb915e05818548bcab38647413eee44018c2a7f3
5
5
  SHA512:
6
- metadata.gz: 9359cf3aaf06ed4398b9bc0ff35489c0e50008cd8d3426a787339d33f541a6bce8e3a1eca4a2b88b46852545895b591d08613f88ad26902aa4d41cf8b842bc93
7
- data.tar.gz: 5bd9818d1b928aa2c0a11301c3455b89f8a4fc99912c6d1b8e52fdd151088adcc331b0ca01169df08412e151841f2f6b50f4c7b85b909ca558d6c21f0ffebd56
6
+ metadata.gz: 772924fb6262d3a3373eb2e92431b55dc73f81869e2aaf1a92746831c1d784f669a9e82a03a8c4d2538bda7c7980e940cd29f65c068ca996ea71a55c19d37dbd
7
+ data.tar.gz: c57170ee010ea949d4f8c250f3ff4fcc05a3153b1e25d9412ba12cd53e4ef7e866f088e2956886948757e0586f4005087e50c89fff1295404ce4c01baae79ce1
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## 0.1.3 (2025-09-23)
2
+
3
+ - Added `current_snapshot` and `last_committed_snapshot` methods
4
+ - Added `rewrite_data_files` and `delete_orphaned_files` methods
5
+ - Added `commit_message` and `commit_author` options to `transaction` method
6
+ - Added `migrate_if_required` option
7
+ - Added experimental support for encryption
8
+
9
+ ## 0.1.2 (2025-08-23)
10
+
11
+ - Added `transaction` method
12
+
1
13
  ## 0.1.1 (2025-08-18)
2
14
 
3
15
  - Added experimental support for Polars
data/README.md CHANGED
@@ -154,7 +154,7 @@ Note: This transfers ownership to the data lake, so the file may be deleted as p
154
154
  Update data
155
155
 
156
156
  ```ruby
157
- ducklake.sql("UPDATE events SET name = ? WHERE id = 1", ["Test", 1])
157
+ ducklake.sql("UPDATE events SET name = ? WHERE id = ?", ["Test", 1])
158
158
  ```
159
159
 
160
160
  Delete data
@@ -163,12 +163,46 @@ Delete data
163
163
  ducklake.sql("DELETE * FROM events WHERE id = ?", [1])
164
164
  ```
165
165
 
166
+ Run multiple statements in a transaction
167
+
168
+ ```ruby
169
+ ducklake.transaction do
170
+ # ...
171
+ end
172
+ ```
173
+
174
+ Raise `DuckLake::Rollback` to rollback
175
+
176
+ ## Schema Changes
177
+
166
178
  Update the schema
167
179
 
168
180
  ```ruby
169
181
  ducklake.sql("ALTER TABLE events ADD COLUMN active BOOLEAN")
170
182
  ```
171
183
 
184
+ Set or remove a [partitioning key](https://ducklake.select/docs/stable/duckdb/advanced_features/partitioning)
185
+
186
+ ```ruby
187
+ ducklake.sql("ALTER TABLE events SET PARTITIONED BY (name)")
188
+ # or
189
+ ducklake.sql("ALTER TABLE events RESET PARTITIONED BY")
190
+ ```
191
+
192
+ ## Views
193
+
194
+ Create a view
195
+
196
+ ```ruby
197
+ ducklake.sql("CREATE VIEW events_view AS SELECT * FROM events")
198
+ ```
199
+
200
+ Drop a view
201
+
202
+ ```ruby
203
+ ducklake.sql("DROP VIEW events_view")
204
+ ```
205
+
172
206
  ## Snapshots
173
207
 
174
208
  Get snapshots
@@ -181,7 +215,7 @@ Query the data at a specific snapshot version or time
181
215
 
182
216
  ```ruby
183
217
  ducklake.sql("SELECT * FROM events AT (VERSION => ?)", [3])
184
- #
218
+ # or
185
219
  ducklake.sql("SELECT * FROM events AT (TIMESTAMP => ?)", [Date.today - 7])
186
220
  ```
187
221
 
@@ -235,9 +269,9 @@ ducklake.set_option("parquet_compression", "zstd", table_name: "events")
235
269
 
236
270
  ## Read-Only Mode
237
271
 
238
- Note: This feature is experimental
272
+ Note: This feature is experimental and does not prevent the DuckDB engine from writing files via `sql`
239
273
 
240
- Connect to the data lake in read-only mode
274
+ Attach the catalog in read-only mode
241
275
 
242
276
  ```ruby
243
277
  DuckLake::Client.new(read_only: true, ...)
@@ -292,9 +326,19 @@ quoted_file = ducklake.quote("path/to/data.csv")
292
326
  ducklake.sql("COPY #{quoted_table} FROM #{quoted_file}")
293
327
  ```
294
328
 
329
+ ## Encryption
330
+
331
+ Note: This feature is unreleased and must be set when creating the catalog
332
+
333
+ Encrypt Parquet files
334
+
335
+ ```ruby
336
+ DuckLake::Client.new(encryption: true, ...)
337
+ ```
338
+
295
339
  ## Polars
296
340
 
297
- Note: This feature is experimental and does not work on tables with schema changes
341
+ Note: This feature is experimental and does not currently work on tables with schema changes
298
342
 
299
343
  Query the data with [Ruby Polars](https://github.com/ankane/ruby-polars)
300
344
 
@@ -368,10 +412,12 @@ bundle install
368
412
 
369
413
  # Postgres
370
414
  createdb ducklake_ruby_test
415
+ createdb ducklake_ruby_test2
371
416
  bundle exec rake test:postgres
372
417
 
373
418
  # MySQL and MariaDB
374
419
  mysqladmin create ducklake_ruby_test
420
+ mysqladmin create ducklake_ruby_test2
375
421
  bundle exec rake test:mysql
376
422
 
377
423
  # SQLite
@@ -8,7 +8,10 @@ module DuckLake
8
8
  snapshot_time: nil,
9
9
  data_inlining_row_limit: 0,
10
10
  create_if_not_exists: false,
11
- read_only: false # experimental
11
+ migrate_if_required: true, # TODO make false in 0.2.0
12
+ read_only: false, # experimental
13
+ override_storage_url: false, # experimental
14
+ encrypted: false # experimental
12
15
  )
13
16
  catalog_uri = URI.parse(catalog_url)
14
17
  storage_uri = URI.parse(storage_url)
@@ -58,10 +61,13 @@ module DuckLake
58
61
 
59
62
  attach_options = {data_path: storage_url}
60
63
  attach_options[:read_only] = true if read_only
64
+ attach_options[:encrypted] = 1 if encrypted
61
65
  attach_options[:snapshot_version] = snapshot_version if !snapshot_version.nil?
62
66
  attach_options[:snapshot_time] = snapshot_time if !snapshot_time.nil?
63
67
  attach_options[:data_inlining_row_limit] = data_inlining_row_limit if data_inlining_row_limit > 0
64
68
  attach_options[:create_if_not_exists] = false unless create_if_not_exists
69
+ attach_options[:migrate_if_required] = false unless migrate_if_required
70
+ attach_options[:override_data_path] = true if override_storage_url
65
71
 
66
72
  @catalog = "ducklake"
67
73
  @storage_url = storage_url
@@ -106,6 +112,18 @@ module DuckLake
106
112
  execute(sql, params)
107
113
  end
108
114
 
115
+ def transaction(commit_message: nil, commit_author: nil)
116
+ execute("BEGIN")
117
+ begin
118
+ yield
119
+ set_commit_message(commit_message, commit_author) if commit_message || commit_author
120
+ execute("COMMIT")
121
+ rescue => e
122
+ execute("ROLLBACK")
123
+ raise e unless e.is_a?(Rollback)
124
+ end
125
+ end
126
+
109
127
  def attach(alias_, url)
110
128
  type = nil
111
129
  extension = nil
@@ -151,6 +169,15 @@ module DuckLake
151
169
  symbolize_keys result
152
170
  end
153
171
 
172
+ # experimental
173
+ # TODO use keyword arguments or range?
174
+ def table_changes(table, start_snapshot, end_snapshot)
175
+ params = [@catalog, "main", table, start_snapshot, end_snapshot]
176
+ result = execute("SELECT * FROM ducklake_table_changes(?, ?, ?, ?, ?)", params)
177
+ # only return changes between snapshots
178
+ symbolize_keys result.reject { |v| v["snapshot_id"] == start_snapshot }
179
+ end
180
+
154
181
  # TODO more DDL methods?
155
182
  def drop_table(table, if_exists: nil)
156
183
  execute("DROP TABLE#{" IF EXISTS" if if_exists} #{quote_identifier(table)}")
@@ -162,6 +189,14 @@ module DuckLake
162
189
  symbolize_keys execute("SELECT * FROM ducklake_snapshots(?)", [@catalog])
163
190
  end
164
191
 
192
+ def current_snapshot
193
+ execute("SELECT * FROM ducklake_current_snapshot(?)", [@catalog]).rows[0][0]
194
+ end
195
+
196
+ def last_committed_snapshot
197
+ execute("SELECT * FROM ducklake_last_committed_snapshot(?)", [@catalog]).rows[0][0]
198
+ end
199
+
165
200
  # https://ducklake.select/docs/stable/duckdb/usage/configuration
166
201
  def options
167
202
  symbolize_keys execute("SELECT * FROM ducklake_options(?)", [@catalog])
@@ -237,6 +272,55 @@ module DuckLake
237
272
  symbolize_keys execute("CALL ducklake_cleanup_old_files(#{args.join(", ")})", params)
238
273
  end
239
274
 
275
+ # https://ducklake.select/docs/stable/duckdb/maintenance/cleanup_of_files#cleanup-of-orphaned-files
276
+ def delete_orphaned_files(cleanup_all: false, older_than: nil, dry_run: false)
277
+ args = ["?"]
278
+ params = [@catalog]
279
+
280
+ if cleanup_all
281
+ args << "cleanup_all => ?"
282
+ params << cleanup_all
283
+ end
284
+
285
+ if !older_than.nil?
286
+ args << "older_than => ?"
287
+ params << older_than
288
+ end
289
+
290
+ if dry_run
291
+ args << "dry_run => ?"
292
+ params << dry_run
293
+ end
294
+
295
+ symbolize_keys execute("CALL ducklake_delete_orphaned_files(#{args.join(", ")})", params)
296
+ end
297
+
298
+ # https://ducklake.select/docs/stable/duckdb/maintenance/rewrite_data_files
299
+ def rewrite_data_files(table = nil, delete_threshold: nil)
300
+ args = ["?"]
301
+ params = [@catalog]
302
+
303
+ if !table.nil?
304
+ args << "?"
305
+ params << table
306
+ end
307
+
308
+ if !delete_threshold.nil?
309
+ args << "delete_threshold => ?"
310
+ params << delete_threshold
311
+ end
312
+
313
+ execute("CALL ducklake_rewrite_data_files(#{args.join(", ")})", params)
314
+ nil
315
+ end
316
+
317
+ # experimental
318
+ # https://ducklake.select/docs/stable/duckdb/maintenance/checkpoint
319
+ def checkpoint
320
+ execute("CHECKPOINT")
321
+ nil
322
+ end
323
+
240
324
  # https://ducklake.select/docs/stable/duckdb/advanced_features/data_inlining
241
325
  def flush_inlined_data(table_name: nil)
242
326
  args = ["?"]
@@ -289,15 +373,26 @@ module DuckLake
289
373
  end
290
374
 
291
375
  # experimental
292
- # TODO support schema changes
293
376
  def polars(table, snapshot_version: nil, snapshot_time: nil)
294
377
  files = list_files(table, snapshot_version:, snapshot_time:)
295
378
  sources = files.map { |v| v[:data_file] }
379
+ # TODO support schema changes
380
+ # column_mapping = [
381
+ # "iceberg-column-mapping",
382
+ # nil
383
+ # ]
296
384
  deletion_files = [
297
385
  "iceberg-position-delete",
298
386
  files.map.with_index.select { |v, i| v[:delete_file] }.to_h { |v, i| [i, [v[:delete_file]]] }
299
387
  ]
300
- Polars.scan_parquet(sources, _deletion_files: deletion_files, storage_options: polars_storage_options)
388
+ Polars.scan_parquet(
389
+ sources,
390
+ storage_options: polars_storage_options,
391
+ # allow_missing_columns: true,
392
+ # extra_columns: "ignore",
393
+ # _column_mapping: column_mapping,
394
+ _deletion_files: deletion_files
395
+ )
301
396
  end
302
397
 
303
398
  # libduckdb does not provide function
@@ -366,11 +461,15 @@ module DuckLake
366
461
 
367
462
  def error_mapping
368
463
  @error_mapping ||= {
464
+ "Binder Error: " => BinderError,
369
465
  "Catalog Error: " => CatalogError,
370
466
  "Conversion Error: " => ConversionError,
467
+ "Invalid Configuration Error: " => InvalidConfigurationError,
371
468
  "Invalid Input Error: " => InvalidInputError,
372
469
  "IO Error: " => IOError,
373
- "Permission Error: " => PermissionError
470
+ "Not implemented Error: " => NotImplementedError,
471
+ "Permission Error: " => PermissionError,
472
+ "TransactionContext Error: " => TransactionContextError
374
473
  }
375
474
  end
376
475
 
@@ -409,6 +508,11 @@ module DuckLake
409
508
  name
410
509
  end
411
510
 
511
+ def set_commit_message(message, author)
512
+ execute("CALL ducklake_set_commit_message(?, ?, ?)", [@catalog, author, message])
513
+ nil
514
+ end
515
+
412
516
  def symbolize_keys(result)
413
517
  result.map { |v| v.transform_keys(&:to_sym) }
414
518
  end
@@ -1,3 +1,3 @@
1
1
  module DuckLake
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.3"
3
3
  end
data/lib/ducklake.rb CHANGED
@@ -11,9 +11,14 @@ require_relative "ducklake/version"
11
11
 
12
12
  module DuckLake
13
13
  class Error < StandardError; end
14
+ class BinderError < Error; end
14
15
  class CatalogError < Error; end
15
16
  class ConversionError < Error; end
17
+ class InvalidConfigurationError < Error; end
16
18
  class InvalidInputError < Error; end
17
19
  class IOError < Error; end
20
+ class NotImplementedError < Error; end
18
21
  class PermissionError < Error; end
22
+ class Rollback < Error; end
23
+ class TransactionContextError < Error; end
19
24
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ducklake
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane