slatedb 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c93234ef6d9251d54b90cd816983c1d073d159587ae655fbef8e4e3b433928a3
4
- data.tar.gz: 0b7aa7b9137ff2228f7e4153da2daafbb235ce857b51faa8d552c55f18cb7e7a
3
+ metadata.gz: 0026c9e2a5afd6e0d7fa35c10b037791910a2438cf0bcec73c9935db557c9a85
4
+ data.tar.gz: 140bdf13e24d576e87165a63f31e3a0130244f36ea1278da7da5e6703347f7c5
5
5
  SHA512:
6
- metadata.gz: 54cc3a48576fc4ca07dc6ae95a755aa6fc5dd7fa7af46eba3452f3465c74d005d9f29d06ac1cc9ddf3461d602d910d63a08f55b0d486140ed22024f3ce1e3e4e
7
- data.tar.gz: 2bd1c37ca9fcfa737bd47fd3d76b136376819b573a08e044c06114f5555d5243e8b465b7bbc8cba02f3461982cc5ea1e4195fb31e53a5f0d4dcd608d1ea79f55
6
+ metadata.gz: ae8fe00e7069801bfdf8cf90ea48bfee6a60ed1413cdbc8bef5b9a083744436a0b3d3c1e46ef51c5cbd5de85bd46c6c332ec4b016ffd6482a729fa0b3a50e531
7
+ data.tar.gz: 5e60f6509c2eae4c948751d941f6cd7389dae8e43e241d02d559c47251b26383c6a59fcedb50133d54759d9d791a9f07079f6eed37106a386358094f2f3a5b7e
data/README.md CHANGED
@@ -181,6 +181,105 @@ users = db.scan("user:").select { |k, v| v.include?("active") }
181
181
  all_entries = db.scan("").to_a
182
182
  ```
183
183
 
184
+ #### Prefix Scanning
185
+
186
+ Scan all keys with a given prefix using `scan_prefix`:
187
+
188
+ ```ruby
189
+ # Scan all keys starting with "user:"
190
+ db.scan_prefix("user:").each do |key, value|
191
+ puts "#{key}: #{value}"
192
+ end
193
+
194
+ # Block form
195
+ db.scan_prefix("order:") do |key, value|
196
+ puts "#{key}: #{value}"
197
+ end
198
+
199
+ # Works with transactions, snapshots, and readers too
200
+ db.transaction do |txn|
201
+ txn.scan_prefix("item:").each do |k, v|
202
+ puts "#{k}: #{v}"
203
+ end
204
+ end
205
+ ```
206
+
207
+ ### Merge Operations
208
+
209
+ Merge operations allow you to combine values without reading them first, useful for counters, append-only logs, and similar patterns:
210
+
211
+ ```ruby
212
+ # Open with a built-in merge operator
213
+ SlateDb::Database.open("/tmp/mydb", merge_operator: :string_concat) do |db|
214
+ # Merge appends to existing values (or creates if key doesn't exist)
215
+ db.merge("log", "line1\n")
216
+ db.merge("log", "line2\n")
217
+ db.merge("log", "line3\n")
218
+
219
+ db.get("log") # => "line1\nline2\nline3\n"
220
+ end
221
+
222
+ # Merge with options
223
+ db.merge("key", "value", ttl: 60_000, await_durable: false)
224
+
225
+ # Works in transactions and batches
226
+ db.transaction do |txn|
227
+ txn.merge("counter", "1")
228
+ end
229
+
230
+ db.batch do |b|
231
+ b.merge("key", "a")
232
+ .merge("key", "b")
233
+ end
234
+ ```
235
+
236
+ #### Custom Merge Operators
237
+
238
+ You can provide a Ruby Proc/lambda as a custom merge operator:
239
+
240
+ ```ruby
241
+ # Counter merge operator (adds numbers)
242
+ counter_merge = ->(key, existing, new_value) {
243
+ existing_num = existing ? existing.to_i : 0
244
+ (existing_num + new_value.to_i).to_s
245
+ }
246
+
247
+ SlateDb::Database.open("/tmp/mydb", merge_operator: counter_merge) do |db|
248
+ db.merge("visits", "1")
249
+ db.merge("visits", "1")
250
+ db.merge("visits", "1")
251
+
252
+ db.get("visits") # => "3"
253
+ end
254
+
255
+ # Max value merge operator
256
+ max_merge = ->(key, existing, new_value) {
257
+ existing_num = existing ? existing.to_i : 0
258
+ new_num = new_value.to_i
259
+ [existing_num, new_num].max.to_s
260
+ }
261
+
262
+ SlateDb::Database.open("/tmp/mydb", merge_operator: max_merge) do |db|
263
+ db.merge("high_score", "100")
264
+ db.merge("high_score", "250")
265
+ db.merge("high_score", "150")
266
+
267
+ db.get("high_score") # => "250"
268
+ end
269
+ ```
270
+
271
+ The proc receives three arguments:
272
+ - `key` - The key being merged
273
+ - `existing` - The existing value (nil if no value exists)
274
+ - `new_value` - The new merge operand
275
+
276
+ **Note:** Custom Proc merge operators work best with direct `db.merge()` calls. When used with transactions or batches, some merge operations may be processed on background threads and fall back to string concatenation.
277
+
278
+ #### Available Merge Operators
279
+
280
+ - `:string_concat` (or `:concat`) - Concatenates byte values (built-in)
281
+ - Any `Proc` or `lambda` - Custom merge logic
282
+
184
283
  ### Write Batches
185
284
 
186
285
  Perform multiple writes atomically:
@@ -231,18 +330,60 @@ Transaction operations:
231
330
  db.transaction do |txn|
232
331
  # Read
233
332
  value = txn.get("key")
234
-
333
+
235
334
  # Write
236
335
  txn.put("key", "value")
237
336
  txn.put("expiring", "data", ttl: 30_000)
238
-
337
+
239
338
  # Delete
240
339
  txn.delete("old_key")
241
-
340
+
242
341
  # Scan
243
342
  txn.scan("prefix:").each do |k, v|
244
343
  puts "#{k}: #{v}"
245
344
  end
345
+
346
+ # Scan with prefix
347
+ txn.scan_prefix("user:").each do |k, v|
348
+ puts "#{k}: #{v}"
349
+ end
350
+ end
351
+ ```
352
+
353
+ #### Explicit Read Tracking
354
+
355
+ In serializable transactions, use `mark_read` to explicitly track keys for conflict detection without actually reading them:
356
+
357
+ ```ruby
358
+ db.transaction(isolation: :serializable) do |txn|
359
+ # Mark keys as read for conflict detection
360
+ txn.mark_read(["key1", "key2", "key3"])
361
+
362
+ # Now if another transaction modifies key1/key2/key3,
363
+ # this transaction will fail on commit
364
+ txn.put("result", "computed_value")
365
+ end
366
+ ```
367
+
368
+ ### Checkpoints
369
+
370
+ Create durable checkpoints for backup or read replica purposes:
371
+
372
+ ```ruby
373
+ SlateDb::Database.open("/tmp/mydb", url: "file:///tmp/mydb") do |db|
374
+ db.put("key", "value")
375
+ db.flush
376
+
377
+ # Create a checkpoint
378
+ checkpoint = db.create_checkpoint
379
+ puts "Checkpoint ID: #{checkpoint[:id]}"
380
+ puts "Manifest ID: #{checkpoint[:manifest_id]}"
381
+
382
+ # Create a named checkpoint with lifetime
383
+ checkpoint = db.create_checkpoint(
384
+ name: "before-migration",
385
+ lifetime: 3_600_000 # 1 hour in milliseconds
386
+ )
246
387
  end
247
388
  ```
248
389
 
@@ -11,13 +11,12 @@ name = "slatedb"
11
11
  crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
- slatedb = "0.9"
15
- magnus = { version = "0.8", features = ["rb-sys"] }
16
- rb-sys = { version = "0.9", features = ["stable-api-compiled-fallback"] }
17
- tokio = { version = "1", features = ["rt-multi-thread", "sync"] }
18
- bytes = "1"
19
- object_store = { version = "0.12", features = ["aws"] }
20
- url = "2"
21
- once_cell = "1"
22
- log = "0.4"
23
- uuid = "1"
14
+ slatedb = "0.10"
15
+ magnus = { version = "0.8.2", features = ["rb-sys"] }
16
+ rb-sys = { version = "0.9.123", features = ["stable-api-compiled-fallback"] }
17
+ tokio = { version = "1.47.2", features = ["rt-multi-thread", "sync"] }
18
+ bytes = "1.11.0"
19
+ url = "2.5.7"
20
+ once_cell = "1.21.3"
21
+ log = "0.4.29"
22
+ uuid = "1.19.0"
@@ -25,10 +25,10 @@ impl Admin {
25
25
  /// * `path` - The path identifier for the database
26
26
  /// * `url` - Optional object store URL
27
27
  pub fn new(path: String, url: Option<String>) -> Result<Self, Error> {
28
- let object_store: Arc<dyn object_store::ObjectStore> = if let Some(ref url) = url {
28
+ let object_store: Arc<dyn slatedb::object_store::ObjectStore> = if let Some(ref url) = url {
29
29
  block_on_result(async { resolve_object_store(url) })?
30
30
  } else {
31
- Arc::new(object_store::memory::InMemory::new())
31
+ Arc::new(slatedb::object_store::memory::InMemory::new())
32
32
  };
33
33
 
34
34
  let admin = AdminBuilder::new(path, object_store).build();
@@ -235,6 +235,7 @@ impl Admin {
235
235
  min_age,
236
236
  default_opts.compacted_options,
237
237
  ),
238
+ compactions_options: default_opts.compactions_options,
238
239
  }
239
240
  };
240
241
 
@@ -2,12 +2,15 @@ use std::sync::Arc;
2
2
 
3
3
  use magnus::prelude::*;
4
4
  use magnus::{function, method, Error, RHash, Ruby};
5
- use slatedb::config::{DurabilityLevel, PutOptions, ReadOptions, ScanOptions, Ttl, WriteOptions};
5
+ use slatedb::config::{
6
+ DurabilityLevel, MergeOptions, PutOptions, ReadOptions, ScanOptions, Ttl, WriteOptions,
7
+ };
6
8
  use slatedb::object_store::memory::InMemory;
7
9
  use slatedb::{Db, IsolationLevel};
8
10
 
9
11
  use crate::errors::invalid_argument_error;
10
12
  use crate::iterator::Iterator;
13
+ use crate::merge_ops::{parse_merge_operator, parse_merge_operator_proc};
11
14
  use crate::runtime::block_on_result;
12
15
  use crate::snapshot::Snapshot;
13
16
  use crate::transaction::Transaction;
@@ -28,18 +31,29 @@ impl Database {
28
31
  /// # Arguments
29
32
  /// * `path` - The path identifier for the database
30
33
  /// * `url` - Optional object store URL (e.g., "s3://bucket/path")
34
+ /// * `kwargs` - Additional options (merge_operator, merge_operator_proc)
31
35
  ///
32
36
  /// # Returns
33
37
  /// A new Database instance
34
- pub fn open(path: String, url: Option<String>) -> Result<Self, Error> {
38
+ pub fn open(path: String, url: Option<String>, kwargs: RHash) -> Result<Self, Error> {
39
+ // Try string-based merge operator first, then proc-based
40
+ let merge_operator = parse_merge_operator(&kwargs)?
41
+ .or(parse_merge_operator_proc(&kwargs)?);
42
+
35
43
  let db = block_on_result(async {
36
- let object_store: Arc<dyn object_store::ObjectStore> = if let Some(ref url_str) = url {
37
- resolve_object_store(url_str)?
38
- } else {
39
- Arc::new(InMemory::new())
40
- };
44
+ let object_store: Arc<dyn slatedb::object_store::ObjectStore> =
45
+ if let Some(ref url_str) = url {
46
+ resolve_object_store(url_str)?
47
+ } else {
48
+ Arc::new(InMemory::new())
49
+ };
50
+
51
+ let mut builder = Db::builder(path, object_store);
52
+ if let Some(merge_operator) = merge_operator {
53
+ builder = builder.with_merge_operator(merge_operator);
54
+ }
41
55
 
42
- Db::builder(path, object_store).build().await
56
+ builder.build().await
43
57
  })?;
44
58
 
45
59
  Ok(Self {
@@ -325,6 +339,85 @@ impl Database {
325
339
  Ok(Iterator::new(iter))
326
340
  }
327
341
 
342
+ /// Scan all keys with a given prefix.
343
+ ///
344
+ /// # Arguments
345
+ /// * `prefix` - The key prefix to scan
346
+ ///
347
+ /// # Returns
348
+ /// An Iterator over key-value pairs
349
+ pub fn scan_prefix(&self, prefix: String) -> Result<Iterator, Error> {
350
+ if prefix.is_empty() {
351
+ return Err(invalid_argument_error("prefix cannot be empty"));
352
+ }
353
+
354
+ let opts = ScanOptions::default();
355
+ let iter = block_on_result(async {
356
+ self.inner
357
+ .scan_prefix_with_options(prefix.as_bytes(), &opts)
358
+ .await
359
+ })?;
360
+
361
+ Ok(Iterator::new(iter))
362
+ }
363
+
364
+ /// Scan all keys with a given prefix with options.
365
+ ///
366
+ /// # Arguments
367
+ /// * `prefix` - The key prefix to scan
368
+ /// * `kwargs` - Keyword arguments (durability_filter, dirty, read_ahead_bytes, cache_blocks, max_fetch_tasks)
369
+ ///
370
+ /// # Returns
371
+ /// An Iterator over key-value pairs
372
+ pub fn scan_prefix_with_options(
373
+ &self,
374
+ prefix: String,
375
+ kwargs: RHash,
376
+ ) -> Result<Iterator, Error> {
377
+ if prefix.is_empty() {
378
+ return Err(invalid_argument_error("prefix cannot be empty"));
379
+ }
380
+
381
+ let mut opts = ScanOptions::default();
382
+
383
+ if let Some(df) = get_optional::<String>(&kwargs, "durability_filter")? {
384
+ opts.durability_filter = match df.as_str() {
385
+ "remote" => DurabilityLevel::Remote,
386
+ "memory" => DurabilityLevel::Memory,
387
+ other => {
388
+ return Err(invalid_argument_error(&format!(
389
+ "invalid durability_filter: {} (expected 'remote' or 'memory')",
390
+ other
391
+ )))
392
+ }
393
+ };
394
+ }
395
+
396
+ if let Some(dirty) = get_optional::<bool>(&kwargs, "dirty")? {
397
+ opts.dirty = dirty;
398
+ }
399
+
400
+ if let Some(rab) = get_optional::<usize>(&kwargs, "read_ahead_bytes")? {
401
+ opts.read_ahead_bytes = rab;
402
+ }
403
+
404
+ if let Some(cb) = get_optional::<bool>(&kwargs, "cache_blocks")? {
405
+ opts.cache_blocks = cb;
406
+ }
407
+
408
+ if let Some(mft) = get_optional::<usize>(&kwargs, "max_fetch_tasks")? {
409
+ opts.max_fetch_tasks = mft;
410
+ }
411
+
412
+ let iter = block_on_result(async {
413
+ self.inner
414
+ .scan_prefix_with_options(prefix.as_bytes(), &opts)
415
+ .await
416
+ })?;
417
+
418
+ Ok(Iterator::new(iter))
419
+ }
420
+
328
421
  /// Write a batch of operations atomically.
329
422
  ///
330
423
  /// # Arguments
@@ -355,6 +448,62 @@ impl Database {
355
448
  Ok(())
356
449
  }
357
450
 
451
+ /// Merge a value into the database.
452
+ ///
453
+ /// # Arguments
454
+ /// * `key` - The key to merge into
455
+ /// * `value` - The merge operand to apply
456
+ pub fn merge(&self, key: String, value: String) -> Result<(), Error> {
457
+ if key.is_empty() {
458
+ return Err(invalid_argument_error("key cannot be empty"));
459
+ }
460
+
461
+ let merge_opts = MergeOptions { ttl: Ttl::Default };
462
+
463
+ let write_opts = WriteOptions {
464
+ await_durable: true,
465
+ };
466
+
467
+ block_on_result(async {
468
+ self.inner
469
+ .merge_with_options(key.as_bytes(), value.as_bytes(), &merge_opts, &write_opts)
470
+ .await
471
+ })?;
472
+
473
+ Ok(())
474
+ }
475
+
476
+ /// Merge a value into the database with options.
477
+ ///
478
+ /// # Arguments
479
+ /// * `key` - The key to merge into
480
+ /// * `value` - The merge operand to apply
481
+ /// * `kwargs` - Keyword arguments (ttl, await_durable)
482
+ pub fn merge_with_options(&self, key: String, value: String, kwargs: RHash) -> Result<(), Error> {
483
+ if key.is_empty() {
484
+ return Err(invalid_argument_error("key cannot be empty"));
485
+ }
486
+
487
+ let ttl = get_optional::<u64>(&kwargs, "ttl")?;
488
+ let merge_opts = MergeOptions {
489
+ ttl: match ttl {
490
+ Some(ms) => Ttl::ExpireAfter(ms),
491
+ None => Ttl::Default,
492
+ },
493
+ };
494
+
495
+ let await_durable = get_optional::<bool>(&kwargs, "await_durable")?.unwrap_or(true);
496
+ let write_opts = WriteOptions { await_durable };
497
+
498
+ block_on_result(async {
499
+ self.inner
500
+ .merge_with_options(key.as_bytes(), value.as_bytes(), &merge_opts, &write_opts)
501
+ .await
502
+ })?;
503
+
504
+ Ok(())
505
+ }
506
+
358
507
  /// Begin a new transaction.
359
508
  ///
360
509
  /// # Arguments
@@ -389,6 +538,40 @@ impl Database {
389
538
  Ok(Snapshot::new(snap))
390
539
  }
391
540
 
541
+ /// Create a checkpoint of the database.
542
+ ///
543
+ /// # Arguments
544
+ /// * `kwargs` - Options: lifetime (ms), name
545
+ ///
546
+ /// # Returns
547
+ /// Hash with id (UUID string) and manifest_id (int)
548
+ pub fn create_checkpoint(&self, kwargs: RHash) -> Result<RHash, Error> {
549
+ use slatedb::config::{CheckpointOptions, CheckpointScope};
550
+
551
+ let lifetime = get_optional::<u64>(&kwargs, "lifetime")?
552
+ .map(std::time::Duration::from_millis);
553
+ let name = get_optional::<String>(&kwargs, "name")?;
554
+
555
+ let options = CheckpointOptions {
556
+ lifetime,
557
+ source: None,
558
+ name,
559
+ };
560
+
561
+ let result = block_on_result(async {
562
+ self.inner
563
+ .create_checkpoint(CheckpointScope::Durable, &options)
564
+ .await
565
+ })?;
566
+
567
+ let ruby = Ruby::get().expect("Ruby runtime not available");
568
+ let hash = ruby.hash_new();
569
+ hash.aset(ruby.to_symbol("id"), result.id.to_string())?;
570
+ hash.aset(ruby.to_symbol("manifest_id"), result.manifest_id)?;
571
+
572
+ Ok(hash)
573
+ }
574
+
392
575
  /// Flush the database to ensure durability.
393
576
  pub fn flush(&self) -> Result<(), Error> {
394
577
  block_on_result(async { self.inner.flush().await })?;
@@ -407,7 +590,7 @@ pub fn define_database_class(ruby: &Ruby, module: &magnus::RModule) -> Result<()
407
590
  let class = module.define_class("Database", ruby.class_object())?;
408
591
 
409
592
  // Class methods
410
- class.define_singleton_method("_open", function!(Database::open, 2))?;
593
+ class.define_singleton_method("_open", function!(Database::open, 3))?;
411
594
 
412
595
  // Instance methods - simple versions
413
596
  class.define_method("_get", method!(Database::get, 1))?;
@@ -425,16 +608,30 @@ pub fn define_database_class(ruby: &Ruby, module: &magnus::RModule) -> Result<()
425
608
  "_scan_with_options",
426
609
  method!(Database::scan_with_options, 3),
427
610
  )?;
611
+ class.define_method("_scan_prefix", method!(Database::scan_prefix, 1))?;
612
+ class.define_method(
613
+ "_scan_prefix_with_options",
614
+ method!(Database::scan_prefix_with_options, 2),
615
+ )?;
428
616
  class.define_method("_write", method!(Database::write, 1))?;
429
617
  class.define_method(
430
618
  "_write_with_options",
431
619
  method!(Database::write_with_options, 2),
432
620
  )?;
621
+ class.define_method("_merge", method!(Database::merge, 2))?;
622
+ class.define_method(
623
+ "_merge_with_options",
624
+ method!(Database::merge_with_options, 3),
625
+ )?;
433
626
  class.define_method(
434
627
  "_begin_transaction",
435
628
  method!(Database::begin_transaction, 1),
436
629
  )?;
437
630
  class.define_method("_snapshot", method!(Database::snapshot, 0))?;
631
+ class.define_method(
632
+ "_create_checkpoint",
633
+ method!(Database::create_checkpoint, 1),
634
+ )?;
438
635
  class.define_method("flush", method!(Database::flush, 0))?;
439
636
  class.define_method("close", method!(Database::close, 0))?;
440
637
 
@@ -20,6 +20,7 @@ mod admin;
20
20
  mod database;
21
21
  mod errors;
22
22
  mod iterator;
23
+ mod merge_ops;
23
24
  mod reader;
24
25
  mod runtime;
25
26
  mod snapshot;