ducklake 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e31c96f2bcf9728685f6eb72f3cdfad44174eb28347dbe549391703f2795414
4
- data.tar.gz: 63befb2ceaad4c0587b6ef10672e687aa2c51cdc78c95451c85599595d9e83d7
3
+ metadata.gz: 451f6059faf0e7c451599b218bb9dafbbb8b80a2b54416833a5dafb85a0d1a9d
4
+ data.tar.gz: a1c6c7825da3dd5a077705c1c3f106a0cc1d75bc2e60b17ccb5099f98a3bdf1a
5
5
  SHA512:
6
- metadata.gz: 1a62d39d7962cbdd8b60a49ba0f7a20d47bbf2a3a18061ae59330cab07b67d91d7dab533487da99cdfbfc3d2b1d7cceaf407bfdd471ffe2bee6d84d2f5567413
7
- data.tar.gz: 7032e761c77d93463beb4fb650e17fba8a8e3d54554ca5debc9f14a1625a18806c9167f5719cd7bab7079b068adae151b53a404600ac09b02eb1974cfe90c06e
6
+ metadata.gz: 9359cf3aaf06ed4398b9bc0ff35489c0e50008cd8d3426a787339d33f541a6bce8e3a1eca4a2b88b46852545895b591d08613f88ad26902aa4d41cf8b842bc93
7
+ data.tar.gz: 5bd9818d1b928aa2c0a11301c3455b89f8a4fc99912c6d1b8e52fdd151088adcc331b0ca01169df08412e151841f2f6b50f4c7b85b909ca558d6c21f0ffebd56
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.1.1 (2025-08-18)
2
+
3
+ - Added experimental support for Polars
4
+
1
5
  ## 0.1.0 (2025-08-17)
2
6
 
3
7
  - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 Andrew Kane
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md CHANGED
@@ -1,12 +1,12 @@
1
1
  # DuckLake Ruby
2
2
 
3
- :fire: [DuckLake](https://ducklake.select/) for Ruby
3
+ :duck: [DuckLake](https://ducklake.select/) for Ruby
4
4
 
5
5
  Run your own data lake with a SQL database and file/object storage
6
6
 
7
7
  ```ruby
8
8
  DuckLake::Client.new(
9
- catalog_url: "postgres://user:pass@host:5432/db",
9
+ catalog_url: "postgres://user:pass@host:5432/dbname",
10
10
  storage_url: "s3://my-bucket/"
11
11
  )
12
12
  ```
@@ -149,7 +149,7 @@ Or [register existing data files](https://ducklake.select/docs/stable/duckdb/met
149
149
  ducklake.add_data_files("events", "data.parquet")
150
150
  ```
151
151
 
152
- Note: This transfers ownership to DuckLake, so the file can be deleted after running `cleanup_old_files`
152
+ Note: This transfers ownership to the data lake, so the file may be deleted as part of [maintenance](#maintenance)
153
153
 
154
154
  Update data
155
155
 
@@ -233,13 +233,31 @@ Or for a specific table
233
233
  ducklake.set_option("parquet_compression", "zstd", table_name: "events")
234
234
  ```
235
235
 
236
- ## Security
236
+ ## Read-Only Mode
237
237
 
238
- See [best practices](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/overview.html) for DuckDB security.
238
+ Note: This feature is experimental
239
239
 
240
- Grant minimal permissions for the catalog database and data storage.
240
+ Connect to the data lake in read-only mode
241
241
 
242
- ### External Access
242
+ ```ruby
243
+ DuckLake::Client.new(read_only: true, ...)
244
+ ```
245
+
246
+ Use read-only credentials for catalog database and storage provider and [disable external access](#external-access)
247
+
248
+ You should also consider [disabling community extensions](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/securing_extensions.html#community-extensions)
249
+
250
+ ```ruby
251
+ ducklake.sql("SET allow_community_extensions = false")
252
+ ```
253
+
254
+ And [locking the configuration](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/overview.html#locking-configurations)
255
+
256
+ ```ruby
257
+ ducklake.sql("SET lock_configuration = true")
258
+ ```
259
+
260
+ ## External Access
243
261
 
244
262
  [Restrict external access](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/overview.html#restricting-file-access) to the DuckDB engine
245
263
 
@@ -258,6 +276,40 @@ ducklake.disable_external_access(
258
276
 
259
277
  The storage URL is automatically included in `allowed_directories`
260
278
 
279
+ ## SQL Safety
280
+
281
+ Use parameterized queries when possible
282
+
283
+ ```ruby
284
+ ducklake.sql("SELECT * FROM events WHERE id = ?", [1])
285
+ ```
286
+
287
+ For places that do not support parameters, use `quote` or `quote_identifier`
288
+
289
+ ```ruby
290
+ quoted_table = ducklake.quote_identifier("events")
291
+ quoted_file = ducklake.quote("path/to/data.csv")
292
+ ducklake.sql("COPY #{quoted_table} FROM #{quoted_file}")
293
+ ```
294
+
295
+ ## Polars
296
+
297
+ Note: This feature is experimental and does not work on tables with schema changes
298
+
299
+ Query the data with [Ruby Polars](https://github.com/ankane/ruby-polars)
300
+
301
+ ```ruby
302
+ ducklake.polars("events")
303
+ ```
304
+
305
+ Specify a snapshot
306
+
307
+ ```ruby
308
+ ducklake.polars("events", snapshot_version: 3)
309
+ # or
310
+ ducklake.polars("events", snapshot_time: Date.today - 7)
311
+ ```
312
+
261
313
  ## Reference
262
314
 
263
315
  Get table info
@@ -8,7 +8,7 @@ module DuckLake
8
8
  snapshot_time: nil,
9
9
  data_inlining_row_limit: 0,
10
10
  create_if_not_exists: false,
11
- _read_only: false # experimental
11
+ read_only: false # experimental
12
12
  )
13
13
  catalog_uri = URI.parse(catalog_url)
14
14
  storage_uri = URI.parse(storage_url)
@@ -30,6 +30,9 @@ module DuckLake
30
30
  raise ArgumentError, "Unsupported catalog type: #{catalog_uri.scheme}"
31
31
  end
32
32
 
33
+ @storage_scheme = storage_uri.scheme
34
+ @storage_options = storage_options.dup
35
+
33
36
  secret_options = nil
34
37
  storage_options = storage_options.dup
35
38
 
@@ -54,7 +57,7 @@ module DuckLake
54
57
  end
55
58
 
56
59
  attach_options = {data_path: storage_url}
57
- attach_options[:read_only] = true if _read_only
60
+ attach_options[:read_only] = true if read_only
58
61
  attach_options[:snapshot_version] = snapshot_version if !snapshot_version.nil?
59
62
  attach_options[:snapshot_time] = snapshot_time if !snapshot_time.nil?
60
63
  attach_options[:data_inlining_row_limit] = data_inlining_row_limit if data_inlining_row_limit > 0
@@ -63,7 +66,7 @@ module DuckLake
63
66
  @catalog = "ducklake"
64
67
  @storage_url = storage_url
65
68
 
66
- if _read_only
69
+ if read_only
67
70
  config = DuckDB::Config.new
68
71
  config["access_mode"] = "READ_ONLY"
69
72
 
@@ -285,6 +288,18 @@ module DuckLake
285
288
  nil
286
289
  end
287
290
 
291
+ # experimental
292
+ # TODO support schema changes
293
+ def polars(table, snapshot_version: nil, snapshot_time: nil)
294
+ files = list_files(table, snapshot_version:, snapshot_time:)
295
+ sources = files.map { |v| v[:data_file] }
296
+ deletion_files = [
297
+ "iceberg-position-delete",
298
+ files.map.with_index.select { |v, i| v[:delete_file] }.to_h { |v, i| [i, [v[:delete_file]]] }
299
+ ]
300
+ Polars.scan_parquet(sources, _deletion_files: deletion_files, storage_options: polars_storage_options)
301
+ end
302
+
288
303
  # libduckdb does not provide function
289
304
  # https://duckdb.org/docs/stable/sql/dialect/keywords_and_identifiers.html
290
305
  def quote_identifier(value)
@@ -412,6 +427,27 @@ module DuckLake
412
427
  uri.path[1..]
413
428
  end
414
429
 
430
+ def polars_storage_options
431
+ @polars_storage_options ||= begin
432
+ storage_options = {}
433
+ extra_options = @storage_options.dup
434
+
435
+ case @storage_scheme
436
+ when "s3"
437
+ # https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html
438
+ [:aws_access_key_id, :aws_secret_access_key, :region].each do |k|
439
+ storage_options[k] = extra_options.delete(k) if extra_options.key?(k)
440
+ end
441
+ end
442
+
443
+ if extra_options.any?
444
+ raise ArgumentError, "Unsupported #{@storage_scheme || "file"} storage options: #{extra_options.keys.map(&:inspect).join(", ")}"
445
+ end
446
+
447
+ storage_options
448
+ end
449
+ end
450
+
415
451
  def quote_array(value)
416
452
  "[#{value.map { |v| quote(v) }.join(", ")}]"
417
453
  end
@@ -1,3 +1,3 @@
1
1
  module DuckLake
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ducklake
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -29,6 +29,7 @@ extensions: []
29
29
  extra_rdoc_files: []
30
30
  files:
31
31
  - CHANGELOG.md
32
+ - LICENSE.txt
32
33
  - README.md
33
34
  - lib/ducklake.rb
34
35
  - lib/ducklake/client.rb