ducklake 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e31c96f2bcf9728685f6eb72f3cdfad44174eb28347dbe549391703f2795414
4
- data.tar.gz: 63befb2ceaad4c0587b6ef10672e687aa2c51cdc78c95451c85599595d9e83d7
3
+ metadata.gz: 9366806f0dee7b0277dfee65be6c4718c339b6e9ab53e595e1f88534d5144534
4
+ data.tar.gz: 3b3c01d575bb8d0c380b49cbbc8b6deb62cc7de5542449dc73d0fd3b5ccffb1f
5
5
  SHA512:
6
- metadata.gz: 1a62d39d7962cbdd8b60a49ba0f7a20d47bbf2a3a18061ae59330cab07b67d91d7dab533487da99cdfbfc3d2b1d7cceaf407bfdd471ffe2bee6d84d2f5567413
7
- data.tar.gz: 7032e761c77d93463beb4fb650e17fba8a8e3d54554ca5debc9f14a1625a18806c9167f5719cd7bab7079b068adae151b53a404600ac09b02eb1974cfe90c06e
6
+ metadata.gz: 2e1fb28d47b8efeedaedba54727d678bdf946efd6aac1001ae0a2ad9aa9fbdc5b28bf8df06b2a10be13f6684195e2f684eafe71c7bedb27b36eff7af36d65abf
7
+ data.tar.gz: 4feea1505445b4347e722ea31d87c6dd9ed759489550f7a69997fced7b9a45827b4beaa81c4bc5e12abb801226af82f1c4903aff5ab6fd4b0851b6772b2c83c3
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.1.2 (2025-08-23)
2
+
3
+ - Added `transaction` method
4
+
5
+ ## 0.1.1 (2025-08-18)
6
+
7
+ - Added experimental support for Polars
8
+
1
9
  ## 0.1.0 (2025-08-17)
2
10
 
3
11
  - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 Andrew Kane
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md CHANGED
@@ -1,12 +1,12 @@
1
1
  # DuckLake Ruby
2
2
 
3
- :fire: [DuckLake](https://ducklake.select/) for Ruby
3
+ :duck: [DuckLake](https://ducklake.select/) for Ruby
4
4
 
5
5
  Run your own data lake with a SQL database and file/object storage
6
6
 
7
7
  ```ruby
8
8
  DuckLake::Client.new(
9
- catalog_url: "postgres://user:pass@host:5432/db",
9
+ catalog_url: "postgres://user:pass@host:5432/dbname",
10
10
  storage_url: "s3://my-bucket/"
11
11
  )
12
12
  ```
@@ -149,7 +149,7 @@ Or [register existing data files](https://ducklake.select/docs/stable/duckdb/met
149
149
  ducklake.add_data_files("events", "data.parquet")
150
150
  ```
151
151
 
152
- Note: This transfers ownership to DuckLake, so the file can be deleted after running `cleanup_old_files`
152
+ Note: This transfers ownership to the data lake, so the file may be deleted as part of [maintenance](#maintenance)
153
153
 
154
154
  Update data
155
155
 
@@ -163,12 +163,46 @@ Delete data
163
163
  ducklake.sql("DELETE * FROM events WHERE id = ?", [1])
164
164
  ```
165
165
 
166
+ Run multiple statements in a transaction
167
+
168
+ ```ruby
169
+ ducklake.transaction do
170
+ # ...
171
+ end
172
+ ```
173
+
174
+ Raise `DuckLake::Rollback` to rollback
175
+
176
+ ## Schema Changes
177
+
166
178
  Update the schema
167
179
 
168
180
  ```ruby
169
181
  ducklake.sql("ALTER TABLE events ADD COLUMN active BOOLEAN")
170
182
  ```
171
183
 
184
+ Set or remove a [partitioning key](https://ducklake.select/docs/stable/duckdb/advanced_features/partitioning)
185
+
186
+ ```ruby
187
+ ducklake.sql("ALTER TABLE events SET PARTITIONED BY (name)")
188
+ # or
189
+ ducklake.sql("ALTER TABLE events RESET PARTITIONED BY")
190
+ ```
191
+
192
+ ## Views
193
+
194
+ Create a view
195
+
196
+ ```ruby
197
+ ducklake.sql("CREATE VIEW events_view AS SELECT * FROM events")
198
+ ```
199
+
200
+ Drop a view
201
+
202
+ ```ruby
203
+ ducklake.sql("DROP VIEW events_view")
204
+ ```
205
+
172
206
  ## Snapshots
173
207
 
174
208
  Get snapshots
@@ -233,13 +267,31 @@ Or for a specific table
233
267
  ducklake.set_option("parquet_compression", "zstd", table_name: "events")
234
268
  ```
235
269
 
236
- ## Security
270
+ ## Read-Only Mode
271
+
272
+ Note: This feature is experimental and does not prevent the DuckDB engine from writing files via `sql`
273
+
274
+ Attach the catalog in read-only mode
275
+
276
+ ```ruby
277
+ DuckLake::Client.new(read_only: true, ...)
278
+ ```
279
+
280
+ Use read-only credentials for catalog database and storage provider and [disable external access](#external-access)
237
281
 
238
- See [best practices](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/overview.html) for DuckDB security.
282
+ You should also consider [disabling community extensions](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/securing_extensions.html#community-extensions)
239
283
 
240
- Grant minimal permissions for the catalog database and data storage.
284
+ ```ruby
285
+ ducklake.sql("SET allow_community_extensions = false")
286
+ ```
241
287
 
242
- ### External Access
288
+ And [locking the configuration](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/overview.html#locking-configurations)
289
+
290
+ ```ruby
291
+ ducklake.sql("SET lock_configuration = true")
292
+ ```
293
+
294
+ ## External Access
243
295
 
244
296
  [Restrict external access](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/overview.html#restricting-file-access) to the DuckDB engine
245
297
 
@@ -258,6 +310,40 @@ ducklake.disable_external_access(
258
310
 
259
311
  The storage URL is automatically included in `allowed_directories`
260
312
 
313
+ ## SQL Safety
314
+
315
+ Use parameterized queries when possible
316
+
317
+ ```ruby
318
+ ducklake.sql("SELECT * FROM events WHERE id = ?", [1])
319
+ ```
320
+
321
+ For places that do not support parameters, use `quote` or `quote_identifier`
322
+
323
+ ```ruby
324
+ quoted_table = ducklake.quote_identifier("events")
325
+ quoted_file = ducklake.quote("path/to/data.csv")
326
+ ducklake.sql("COPY #{quoted_table} FROM #{quoted_file}")
327
+ ```
328
+
329
+ ## Polars
330
+
331
+ Note: This feature is experimental and does not currently work on tables with schema changes
332
+
333
+ Query the data with [Ruby Polars](https://github.com/ankane/ruby-polars)
334
+
335
+ ```ruby
336
+ ducklake.polars("events")
337
+ ```
338
+
339
+ Specify a snapshot
340
+
341
+ ```ruby
342
+ ducklake.polars("events", snapshot_version: 3)
343
+ # or
344
+ ducklake.polars("events", snapshot_time: Date.today - 7)
345
+ ```
346
+
261
347
  ## Reference
262
348
 
263
349
  Get table info
@@ -8,7 +8,7 @@ module DuckLake
8
8
  snapshot_time: nil,
9
9
  data_inlining_row_limit: 0,
10
10
  create_if_not_exists: false,
11
- _read_only: false # experimental
11
+ read_only: false # experimental
12
12
  )
13
13
  catalog_uri = URI.parse(catalog_url)
14
14
  storage_uri = URI.parse(storage_url)
@@ -30,6 +30,9 @@ module DuckLake
30
30
  raise ArgumentError, "Unsupported catalog type: #{catalog_uri.scheme}"
31
31
  end
32
32
 
33
+ @storage_scheme = storage_uri.scheme
34
+ @storage_options = storage_options.dup
35
+
33
36
  secret_options = nil
34
37
  storage_options = storage_options.dup
35
38
 
@@ -54,7 +57,7 @@ module DuckLake
54
57
  end
55
58
 
56
59
  attach_options = {data_path: storage_url}
57
- attach_options[:read_only] = true if _read_only
60
+ attach_options[:read_only] = true if read_only
58
61
  attach_options[:snapshot_version] = snapshot_version if !snapshot_version.nil?
59
62
  attach_options[:snapshot_time] = snapshot_time if !snapshot_time.nil?
60
63
  attach_options[:data_inlining_row_limit] = data_inlining_row_limit if data_inlining_row_limit > 0
@@ -63,7 +66,7 @@ module DuckLake
63
66
  @catalog = "ducklake"
64
67
  @storage_url = storage_url
65
68
 
66
- if _read_only
69
+ if read_only
67
70
  config = DuckDB::Config.new
68
71
  config["access_mode"] = "READ_ONLY"
69
72
 
@@ -103,6 +106,17 @@ module DuckLake
103
106
  execute(sql, params)
104
107
  end
105
108
 
109
+ def transaction
110
+ execute("BEGIN")
111
+ begin
112
+ yield
113
+ execute("COMMIT")
114
+ rescue => e
115
+ execute("ROLLBACK")
116
+ raise e unless e.is_a?(Rollback)
117
+ end
118
+ end
119
+
106
120
  def attach(alias_, url)
107
121
  type = nil
108
122
  extension = nil
@@ -148,6 +162,15 @@ module DuckLake
148
162
  symbolize_keys result
149
163
  end
150
164
 
165
+ # experimental
166
+ # TODO use keyword arguments or range?
167
+ def table_changes(table, start_snapshot, end_snapshot)
168
+ params = [@catalog, "main", table, start_snapshot, end_snapshot]
169
+ result = execute("SELECT * FROM ducklake_table_changes(?, ?, ?, ?, ?)", params)
170
+ # only return changes between snapshots
171
+ symbolize_keys result.reject { |v| v["snapshot_id"] == start_snapshot }
172
+ end
173
+
151
174
  # TODO more DDL methods?
152
175
  def drop_table(table, if_exists: nil)
153
176
  execute("DROP TABLE#{" IF EXISTS" if if_exists} #{quote_identifier(table)}")
@@ -285,6 +308,29 @@ module DuckLake
285
308
  nil
286
309
  end
287
310
 
311
+ # experimental
312
+ def polars(table, snapshot_version: nil, snapshot_time: nil)
313
+ files = list_files(table, snapshot_version:, snapshot_time:)
314
+ sources = files.map { |v| v[:data_file] }
315
+ # TODO support schema changes
316
+ # column_mapping = [
317
+ # "iceberg-column-mapping",
318
+ # nil
319
+ # ]
320
+ deletion_files = [
321
+ "iceberg-position-delete",
322
+ files.map.with_index.select { |v, i| v[:delete_file] }.to_h { |v, i| [i, [v[:delete_file]]] }
323
+ ]
324
+ Polars.scan_parquet(
325
+ sources,
326
+ storage_options: polars_storage_options,
327
+ # allow_missing_columns: true,
328
+ # extra_columns: "ignore",
329
+ # _column_mapping: column_mapping,
330
+ _deletion_files: deletion_files
331
+ )
332
+ end
333
+
288
334
  # libduckdb does not provide function
289
335
  # https://duckdb.org/docs/stable/sql/dialect/keywords_and_identifiers.html
290
336
  def quote_identifier(value)
@@ -355,7 +401,9 @@ module DuckLake
355
401
  "Conversion Error: " => ConversionError,
356
402
  "Invalid Input Error: " => InvalidInputError,
357
403
  "IO Error: " => IOError,
358
- "Permission Error: " => PermissionError
404
+ "Not implemented Error: " => NotImplementedError,
405
+ "Permission Error: " => PermissionError,
406
+ "TransactionContext Error: " => TransactionContextError
359
407
  }
360
408
  end
361
409
 
@@ -412,6 +460,27 @@ module DuckLake
412
460
  uri.path[1..]
413
461
  end
414
462
 
463
+ def polars_storage_options
464
+ @polars_storage_options ||= begin
465
+ storage_options = {}
466
+ extra_options = @storage_options.dup
467
+
468
+ case @storage_scheme
469
+ when "s3"
470
+ # https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html
471
+ [:aws_access_key_id, :aws_secret_access_key, :region].each do |k|
472
+ storage_options[k] = extra_options.delete(k) if extra_options.key?(k)
473
+ end
474
+ end
475
+
476
+ if extra_options.any?
477
+ raise ArgumentError, "Unsupported #{@storage_scheme || "file"} storage options: #{extra_options.keys.map(&:inspect).join(", ")}"
478
+ end
479
+
480
+ storage_options
481
+ end
482
+ end
483
+
415
484
  def quote_array(value)
416
485
  "[#{value.map { |v| quote(v) }.join(", ")}]"
417
486
  end
@@ -1,3 +1,3 @@
1
1
  module DuckLake
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.2"
3
3
  end
data/lib/ducklake.rb CHANGED
@@ -15,5 +15,8 @@ module DuckLake
15
15
  class ConversionError < Error; end
16
16
  class InvalidInputError < Error; end
17
17
  class IOError < Error; end
18
+ class NotImplementedError < Error; end
18
19
  class PermissionError < Error; end
20
+ class Rollback < Error; end
21
+ class TransactionContextError < Error; end
19
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ducklake
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -29,6 +29,7 @@ extensions: []
29
29
  extra_rdoc_files: []
30
30
  files:
31
31
  - CHANGELOG.md
32
+ - LICENSE.txt
32
33
  - README.md
33
34
  - lib/ducklake.rb
34
35
  - lib/ducklake/client.rb