ducklake 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +59 -7
- data/lib/ducklake/client.rb +39 -3
- data/lib/ducklake/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 451f6059faf0e7c451599b218bb9dafbbb8b80a2b54416833a5dafb85a0d1a9d
|
4
|
+
data.tar.gz: a1c6c7825da3dd5a077705c1c3f106a0cc1d75bc2e60b17ccb5099f98a3bdf1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9359cf3aaf06ed4398b9bc0ff35489c0e50008cd8d3426a787339d33f541a6bce8e3a1eca4a2b88b46852545895b591d08613f88ad26902aa4d41cf8b842bc93
|
7
|
+
data.tar.gz: 5bd9818d1b928aa2c0a11301c3455b89f8a4fc99912c6d1b8e52fdd151088adcc331b0ca01169df08412e151841f2f6b50f4c7b85b909ca558d6c21f0ffebd56
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2025 Andrew Kane
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
# DuckLake Ruby
|
2
2
|
|
3
|
-
:
|
3
|
+
:duck: [DuckLake](https://ducklake.select/) for Ruby
|
4
4
|
|
5
5
|
Run your own data lake with a SQL database and file/object storage
|
6
6
|
|
7
7
|
```ruby
|
8
8
|
DuckLake::Client.new(
|
9
|
-
catalog_url: "postgres://user:pass@host:5432/
|
9
|
+
catalog_url: "postgres://user:pass@host:5432/dbname",
|
10
10
|
storage_url: "s3://my-bucket/"
|
11
11
|
)
|
12
12
|
```
|
@@ -149,7 +149,7 @@ Or [register existing data files](https://ducklake.select/docs/stable/duckdb/met
|
|
149
149
|
ducklake.add_data_files("events", "data.parquet")
|
150
150
|
```
|
151
151
|
|
152
|
-
Note: This transfers ownership to
|
152
|
+
Note: This transfers ownership to the data lake, so the file may be deleted as part of [maintenance](#maintenance)
|
153
153
|
|
154
154
|
Update data
|
155
155
|
|
@@ -233,13 +233,31 @@ Or for a specific table
|
|
233
233
|
ducklake.set_option("parquet_compression", "zstd", table_name: "events")
|
234
234
|
```
|
235
235
|
|
236
|
-
##
|
236
|
+
## Read-Only Mode
|
237
237
|
|
238
|
-
|
238
|
+
Note: This feature is experimental
|
239
239
|
|
240
|
-
|
240
|
+
Connect to the data lake in read-only mode
|
241
241
|
|
242
|
-
|
242
|
+
```ruby
|
243
|
+
DuckLake::Client.new(read_only: true, ...)
|
244
|
+
```
|
245
|
+
|
246
|
+
Use read-only credentials for catalog database and storage provider and [disable external access](#external-access)
|
247
|
+
|
248
|
+
You should also consider [disabling community extensions](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/securing_extensions.html#community-extensions)
|
249
|
+
|
250
|
+
```ruby
|
251
|
+
ducklake.sql("SET allow_community_extensions = false")
|
252
|
+
```
|
253
|
+
|
254
|
+
And [locking the configuration](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/overview.html#locking-configurations)
|
255
|
+
|
256
|
+
```ruby
|
257
|
+
ducklake.sql("SET lock_configuration = true")
|
258
|
+
```
|
259
|
+
|
260
|
+
## External Access
|
243
261
|
|
244
262
|
[Restrict external access](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/overview.html#restricting-file-access) to the DuckDB engine
|
245
263
|
|
@@ -258,6 +276,40 @@ ducklake.disable_external_access(
|
|
258
276
|
|
259
277
|
The storage URL is automatically included in `allowed_directories`
|
260
278
|
|
279
|
+
## SQL Safety
|
280
|
+
|
281
|
+
Use parameterized queries when possible
|
282
|
+
|
283
|
+
```ruby
|
284
|
+
ducklake.sql("SELECT * FROM events WHERE id = ?", [1])
|
285
|
+
```
|
286
|
+
|
287
|
+
For places that do not support parameters, use `quote` or `quote_identifier`
|
288
|
+
|
289
|
+
```ruby
|
290
|
+
quoted_table = ducklake.quote_identifier("events")
|
291
|
+
quoted_file = ducklake.quote("path/to/data.csv")
|
292
|
+
ducklake.sql("COPY #{quoted_table} FROM #{quoted_file}")
|
293
|
+
```
|
294
|
+
|
295
|
+
## Polars
|
296
|
+
|
297
|
+
Note: This feature is experimental and does not work on tables with schema changes
|
298
|
+
|
299
|
+
Query the data with [Ruby Polars](https://github.com/ankane/ruby-polars)
|
300
|
+
|
301
|
+
```ruby
|
302
|
+
ducklake.polars("events")
|
303
|
+
```
|
304
|
+
|
305
|
+
Specify a snapshot
|
306
|
+
|
307
|
+
```ruby
|
308
|
+
ducklake.polars("events", snapshot_version: 3)
|
309
|
+
# or
|
310
|
+
ducklake.polars("events", snapshot_time: Date.today - 7)
|
311
|
+
```
|
312
|
+
|
261
313
|
## Reference
|
262
314
|
|
263
315
|
Get table info
|
data/lib/ducklake/client.rb
CHANGED
@@ -8,7 +8,7 @@ module DuckLake
|
|
8
8
|
snapshot_time: nil,
|
9
9
|
data_inlining_row_limit: 0,
|
10
10
|
create_if_not_exists: false,
|
11
|
-
|
11
|
+
read_only: false # experimental
|
12
12
|
)
|
13
13
|
catalog_uri = URI.parse(catalog_url)
|
14
14
|
storage_uri = URI.parse(storage_url)
|
@@ -30,6 +30,9 @@ module DuckLake
|
|
30
30
|
raise ArgumentError, "Unsupported catalog type: #{catalog_uri.scheme}"
|
31
31
|
end
|
32
32
|
|
33
|
+
@storage_scheme = storage_uri.scheme
|
34
|
+
@storage_options = storage_options.dup
|
35
|
+
|
33
36
|
secret_options = nil
|
34
37
|
storage_options = storage_options.dup
|
35
38
|
|
@@ -54,7 +57,7 @@ module DuckLake
|
|
54
57
|
end
|
55
58
|
|
56
59
|
attach_options = {data_path: storage_url}
|
57
|
-
attach_options[:read_only] = true if
|
60
|
+
attach_options[:read_only] = true if read_only
|
58
61
|
attach_options[:snapshot_version] = snapshot_version if !snapshot_version.nil?
|
59
62
|
attach_options[:snapshot_time] = snapshot_time if !snapshot_time.nil?
|
60
63
|
attach_options[:data_inlining_row_limit] = data_inlining_row_limit if data_inlining_row_limit > 0
|
@@ -63,7 +66,7 @@ module DuckLake
|
|
63
66
|
@catalog = "ducklake"
|
64
67
|
@storage_url = storage_url
|
65
68
|
|
66
|
-
if
|
69
|
+
if read_only
|
67
70
|
config = DuckDB::Config.new
|
68
71
|
config["access_mode"] = "READ_ONLY"
|
69
72
|
|
@@ -285,6 +288,18 @@ module DuckLake
|
|
285
288
|
nil
|
286
289
|
end
|
287
290
|
|
291
|
+
# experimental
|
292
|
+
# TODO support schema changes
|
293
|
+
def polars(table, snapshot_version: nil, snapshot_time: nil)
|
294
|
+
files = list_files(table, snapshot_version:, snapshot_time:)
|
295
|
+
sources = files.map { |v| v[:data_file] }
|
296
|
+
deletion_files = [
|
297
|
+
"iceberg-position-delete",
|
298
|
+
files.map.with_index.select { |v, i| v[:delete_file] }.to_h { |v, i| [i, [v[:delete_file]]] }
|
299
|
+
]
|
300
|
+
Polars.scan_parquet(sources, _deletion_files: deletion_files, storage_options: polars_storage_options)
|
301
|
+
end
|
302
|
+
|
288
303
|
# libduckdb does not provide function
|
289
304
|
# https://duckdb.org/docs/stable/sql/dialect/keywords_and_identifiers.html
|
290
305
|
def quote_identifier(value)
|
@@ -412,6 +427,27 @@ module DuckLake
|
|
412
427
|
uri.path[1..]
|
413
428
|
end
|
414
429
|
|
430
|
+
def polars_storage_options
|
431
|
+
@polars_storage_options ||= begin
|
432
|
+
storage_options = {}
|
433
|
+
extra_options = @storage_options.dup
|
434
|
+
|
435
|
+
case @storage_scheme
|
436
|
+
when "s3"
|
437
|
+
# https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html
|
438
|
+
[:aws_access_key_id, :aws_secret_access_key, :region].each do |k|
|
439
|
+
storage_options[k] = extra_options.delete(k) if extra_options.key?(k)
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
if extra_options.any?
|
444
|
+
raise ArgumentError, "Unsupported #{@storage_scheme || "file"} storage options: #{extra_options.keys.map(&:inspect).join(", ")}"
|
445
|
+
end
|
446
|
+
|
447
|
+
storage_options
|
448
|
+
end
|
449
|
+
end
|
450
|
+
|
415
451
|
def quote_array(value)
|
416
452
|
"[#{value.map { |v| quote(v) }.join(", ")}]"
|
417
453
|
end
|
data/lib/ducklake/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ducklake
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
@@ -29,6 +29,7 @@ extensions: []
|
|
29
29
|
extra_rdoc_files: []
|
30
30
|
files:
|
31
31
|
- CHANGELOG.md
|
32
|
+
- LICENSE.txt
|
32
33
|
- README.md
|
33
34
|
- lib/ducklake.rb
|
34
35
|
- lib/ducklake/client.rb
|