ducklake 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +93 -7
- data/lib/ducklake/client.rb +73 -4
- data/lib/ducklake/version.rb +1 -1
- data/lib/ducklake.rb +3 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9366806f0dee7b0277dfee65be6c4718c339b6e9ab53e595e1f88534d5144534
|
4
|
+
data.tar.gz: 3b3c01d575bb8d0c380b49cbbc8b6deb62cc7de5542449dc73d0fd3b5ccffb1f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e1fb28d47b8efeedaedba54727d678bdf946efd6aac1001ae0a2ad9aa9fbdc5b28bf8df06b2a10be13f6684195e2f684eafe71c7bedb27b36eff7af36d65abf
|
7
|
+
data.tar.gz: 4feea1505445b4347e722ea31d87c6dd9ed759489550f7a69997fced7b9a45827b4beaa81c4bc5e12abb801226af82f1c4903aff5ab6fd4b0851b6772b2c83c3
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2025 Andrew Kane
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
# DuckLake Ruby
|
2
2
|
|
3
|
-
:
|
3
|
+
:duck: [DuckLake](https://ducklake.select/) for Ruby
|
4
4
|
|
5
5
|
Run your own data lake with a SQL database and file/object storage
|
6
6
|
|
7
7
|
```ruby
|
8
8
|
DuckLake::Client.new(
|
9
|
-
catalog_url: "postgres://user:pass@host:5432/
|
9
|
+
catalog_url: "postgres://user:pass@host:5432/dbname",
|
10
10
|
storage_url: "s3://my-bucket/"
|
11
11
|
)
|
12
12
|
```
|
@@ -149,7 +149,7 @@ Or [register existing data files](https://ducklake.select/docs/stable/duckdb/met
|
|
149
149
|
ducklake.add_data_files("events", "data.parquet")
|
150
150
|
```
|
151
151
|
|
152
|
-
Note: This transfers ownership to
|
152
|
+
Note: This transfers ownership to the data lake, so the file may be deleted as part of [maintenance](#maintenance)
|
153
153
|
|
154
154
|
Update data
|
155
155
|
|
@@ -163,12 +163,46 @@ Delete data
|
|
163
163
|
ducklake.sql("DELETE * FROM events WHERE id = ?", [1])
|
164
164
|
```
|
165
165
|
|
166
|
+
Run multiple statements in a transaction
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
ducklake.transaction do
|
170
|
+
# ...
|
171
|
+
end
|
172
|
+
```
|
173
|
+
|
174
|
+
Raise `DuckLake::Rollback` to rollback
|
175
|
+
|
176
|
+
## Schema Changes
|
177
|
+
|
166
178
|
Update the schema
|
167
179
|
|
168
180
|
```ruby
|
169
181
|
ducklake.sql("ALTER TABLE events ADD COLUMN active BOOLEAN")
|
170
182
|
```
|
171
183
|
|
184
|
+
Set or remove a [partitioning key](https://ducklake.select/docs/stable/duckdb/advanced_features/partitioning)
|
185
|
+
|
186
|
+
```ruby
|
187
|
+
ducklake.sql("ALTER TABLE events SET PARTITIONED BY (name)")
|
188
|
+
# or
|
189
|
+
ducklake.sql("ALTER TABLE events RESET PARTITIONED BY")
|
190
|
+
```
|
191
|
+
|
192
|
+
## Views
|
193
|
+
|
194
|
+
Create a view
|
195
|
+
|
196
|
+
```ruby
|
197
|
+
ducklake.sql("CREATE VIEW events_view AS SELECT * FROM events")
|
198
|
+
```
|
199
|
+
|
200
|
+
Drop a view
|
201
|
+
|
202
|
+
```ruby
|
203
|
+
ducklake.sql("DROP VIEW events_view")
|
204
|
+
```
|
205
|
+
|
172
206
|
## Snapshots
|
173
207
|
|
174
208
|
Get snapshots
|
@@ -233,13 +267,31 @@ Or for a specific table
|
|
233
267
|
ducklake.set_option("parquet_compression", "zstd", table_name: "events")
|
234
268
|
```
|
235
269
|
|
236
|
-
##
|
270
|
+
## Read-Only Mode
|
271
|
+
|
272
|
+
Note: This feature is experimental and does not prevent the DuckDB engine from writing files via `sql`
|
273
|
+
|
274
|
+
Attach the catalog in read-only mode
|
275
|
+
|
276
|
+
```ruby
|
277
|
+
DuckLake::Client.new(read_only: true, ...)
|
278
|
+
```
|
279
|
+
|
280
|
+
Use read-only credentials for catalog database and storage provider and [disable external access](#external-access)
|
237
281
|
|
238
|
-
|
282
|
+
You should also consider [disabling community extensions](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/securing_extensions.html#community-extensions)
|
239
283
|
|
240
|
-
|
284
|
+
```ruby
|
285
|
+
ducklake.sql("SET allow_community_extensions = false")
|
286
|
+
```
|
241
287
|
|
242
|
-
|
288
|
+
And [locking the configuration](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/overview.html#locking-configurations)
|
289
|
+
|
290
|
+
```ruby
|
291
|
+
ducklake.sql("SET lock_configuration = true")
|
292
|
+
```
|
293
|
+
|
294
|
+
## External Access
|
243
295
|
|
244
296
|
[Restrict external access](https://duckdb.org/docs/stable/operations_manual/securing_duckdb/overview.html#restricting-file-access) to the DuckDB engine
|
245
297
|
|
@@ -258,6 +310,40 @@ ducklake.disable_external_access(
|
|
258
310
|
|
259
311
|
The storage URL is automatically included in `allowed_directories`
|
260
312
|
|
313
|
+
## SQL Safety
|
314
|
+
|
315
|
+
Use parameterized queries when possible
|
316
|
+
|
317
|
+
```ruby
|
318
|
+
ducklake.sql("SELECT * FROM events WHERE id = ?", [1])
|
319
|
+
```
|
320
|
+
|
321
|
+
For places that do not support parameters, use `quote` or `quote_identifier`
|
322
|
+
|
323
|
+
```ruby
|
324
|
+
quoted_table = ducklake.quote_identifier("events")
|
325
|
+
quoted_file = ducklake.quote("path/to/data.csv")
|
326
|
+
ducklake.sql("COPY #{quoted_table} FROM #{quoted_file}")
|
327
|
+
```
|
328
|
+
|
329
|
+
## Polars
|
330
|
+
|
331
|
+
Note: This feature is experimental and does not currently work on tables with schema changes
|
332
|
+
|
333
|
+
Query the data with [Ruby Polars](https://github.com/ankane/ruby-polars)
|
334
|
+
|
335
|
+
```ruby
|
336
|
+
ducklake.polars("events")
|
337
|
+
```
|
338
|
+
|
339
|
+
Specify a snapshot
|
340
|
+
|
341
|
+
```ruby
|
342
|
+
ducklake.polars("events", snapshot_version: 3)
|
343
|
+
# or
|
344
|
+
ducklake.polars("events", snapshot_time: Date.today - 7)
|
345
|
+
```
|
346
|
+
|
261
347
|
## Reference
|
262
348
|
|
263
349
|
Get table info
|
data/lib/ducklake/client.rb
CHANGED
@@ -8,7 +8,7 @@ module DuckLake
|
|
8
8
|
snapshot_time: nil,
|
9
9
|
data_inlining_row_limit: 0,
|
10
10
|
create_if_not_exists: false,
|
11
|
-
|
11
|
+
read_only: false # experimental
|
12
12
|
)
|
13
13
|
catalog_uri = URI.parse(catalog_url)
|
14
14
|
storage_uri = URI.parse(storage_url)
|
@@ -30,6 +30,9 @@ module DuckLake
|
|
30
30
|
raise ArgumentError, "Unsupported catalog type: #{catalog_uri.scheme}"
|
31
31
|
end
|
32
32
|
|
33
|
+
@storage_scheme = storage_uri.scheme
|
34
|
+
@storage_options = storage_options.dup
|
35
|
+
|
33
36
|
secret_options = nil
|
34
37
|
storage_options = storage_options.dup
|
35
38
|
|
@@ -54,7 +57,7 @@ module DuckLake
|
|
54
57
|
end
|
55
58
|
|
56
59
|
attach_options = {data_path: storage_url}
|
57
|
-
attach_options[:read_only] = true if
|
60
|
+
attach_options[:read_only] = true if read_only
|
58
61
|
attach_options[:snapshot_version] = snapshot_version if !snapshot_version.nil?
|
59
62
|
attach_options[:snapshot_time] = snapshot_time if !snapshot_time.nil?
|
60
63
|
attach_options[:data_inlining_row_limit] = data_inlining_row_limit if data_inlining_row_limit > 0
|
@@ -63,7 +66,7 @@ module DuckLake
|
|
63
66
|
@catalog = "ducklake"
|
64
67
|
@storage_url = storage_url
|
65
68
|
|
66
|
-
if
|
69
|
+
if read_only
|
67
70
|
config = DuckDB::Config.new
|
68
71
|
config["access_mode"] = "READ_ONLY"
|
69
72
|
|
@@ -103,6 +106,17 @@ module DuckLake
|
|
103
106
|
execute(sql, params)
|
104
107
|
end
|
105
108
|
|
109
|
+
def transaction
|
110
|
+
execute("BEGIN")
|
111
|
+
begin
|
112
|
+
yield
|
113
|
+
execute("COMMIT")
|
114
|
+
rescue => e
|
115
|
+
execute("ROLLBACK")
|
116
|
+
raise e unless e.is_a?(Rollback)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
106
120
|
def attach(alias_, url)
|
107
121
|
type = nil
|
108
122
|
extension = nil
|
@@ -148,6 +162,15 @@ module DuckLake
|
|
148
162
|
symbolize_keys result
|
149
163
|
end
|
150
164
|
|
165
|
+
# experimental
|
166
|
+
# TODO use keyword arguments or range?
|
167
|
+
def table_changes(table, start_snapshot, end_snapshot)
|
168
|
+
params = [@catalog, "main", table, start_snapshot, end_snapshot]
|
169
|
+
result = execute("SELECT * FROM ducklake_table_changes(?, ?, ?, ?, ?)", params)
|
170
|
+
# only return changes between snapshots
|
171
|
+
symbolize_keys result.reject { |v| v["snapshot_id"] == start_snapshot }
|
172
|
+
end
|
173
|
+
|
151
174
|
# TODO more DDL methods?
|
152
175
|
def drop_table(table, if_exists: nil)
|
153
176
|
execute("DROP TABLE#{" IF EXISTS" if if_exists} #{quote_identifier(table)}")
|
@@ -285,6 +308,29 @@ module DuckLake
|
|
285
308
|
nil
|
286
309
|
end
|
287
310
|
|
311
|
+
# experimental
|
312
|
+
def polars(table, snapshot_version: nil, snapshot_time: nil)
|
313
|
+
files = list_files(table, snapshot_version:, snapshot_time:)
|
314
|
+
sources = files.map { |v| v[:data_file] }
|
315
|
+
# TODO support schema changes
|
316
|
+
# column_mapping = [
|
317
|
+
# "iceberg-column-mapping",
|
318
|
+
# nil
|
319
|
+
# ]
|
320
|
+
deletion_files = [
|
321
|
+
"iceberg-position-delete",
|
322
|
+
files.map.with_index.select { |v, i| v[:delete_file] }.to_h { |v, i| [i, [v[:delete_file]]] }
|
323
|
+
]
|
324
|
+
Polars.scan_parquet(
|
325
|
+
sources,
|
326
|
+
storage_options: polars_storage_options,
|
327
|
+
# allow_missing_columns: true,
|
328
|
+
# extra_columns: "ignore",
|
329
|
+
# _column_mapping: column_mapping,
|
330
|
+
_deletion_files: deletion_files
|
331
|
+
)
|
332
|
+
end
|
333
|
+
|
288
334
|
# libduckdb does not provide function
|
289
335
|
# https://duckdb.org/docs/stable/sql/dialect/keywords_and_identifiers.html
|
290
336
|
def quote_identifier(value)
|
@@ -355,7 +401,9 @@ module DuckLake
|
|
355
401
|
"Conversion Error: " => ConversionError,
|
356
402
|
"Invalid Input Error: " => InvalidInputError,
|
357
403
|
"IO Error: " => IOError,
|
358
|
-
"
|
404
|
+
"Not implemented Error: " => NotImplementedError,
|
405
|
+
"Permission Error: " => PermissionError,
|
406
|
+
"TransactionContext Error: " => TransactionContextError
|
359
407
|
}
|
360
408
|
end
|
361
409
|
|
@@ -412,6 +460,27 @@ module DuckLake
|
|
412
460
|
uri.path[1..]
|
413
461
|
end
|
414
462
|
|
463
|
+
def polars_storage_options
|
464
|
+
@polars_storage_options ||= begin
|
465
|
+
storage_options = {}
|
466
|
+
extra_options = @storage_options.dup
|
467
|
+
|
468
|
+
case @storage_scheme
|
469
|
+
when "s3"
|
470
|
+
# https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html
|
471
|
+
[:aws_access_key_id, :aws_secret_access_key, :region].each do |k|
|
472
|
+
storage_options[k] = extra_options.delete(k) if extra_options.key?(k)
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
if extra_options.any?
|
477
|
+
raise ArgumentError, "Unsupported #{@storage_scheme || "file"} storage options: #{extra_options.keys.map(&:inspect).join(", ")}"
|
478
|
+
end
|
479
|
+
|
480
|
+
storage_options
|
481
|
+
end
|
482
|
+
end
|
483
|
+
|
415
484
|
def quote_array(value)
|
416
485
|
"[#{value.map { |v| quote(v) }.join(", ")}]"
|
417
486
|
end
|
data/lib/ducklake/version.rb
CHANGED
data/lib/ducklake.rb
CHANGED
@@ -15,5 +15,8 @@ module DuckLake
|
|
15
15
|
class ConversionError < Error; end
|
16
16
|
class InvalidInputError < Error; end
|
17
17
|
class IOError < Error; end
|
18
|
+
class NotImplementedError < Error; end
|
18
19
|
class PermissionError < Error; end
|
20
|
+
class Rollback < Error; end
|
21
|
+
class TransactionContextError < Error; end
|
19
22
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ducklake
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
@@ -29,6 +29,7 @@ extensions: []
|
|
29
29
|
extra_rdoc_files: []
|
30
30
|
files:
|
31
31
|
- CHANGELOG.md
|
32
|
+
- LICENSE.txt
|
32
33
|
- README.md
|
33
34
|
- lib/ducklake.rb
|
34
35
|
- lib/ducklake/client.rb
|