@nxtedition/rocksdb 16.0.2 → 16.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/binding.cc CHANGED
@@ -17,6 +17,7 @@
17
17
  #include <rocksdb/status.h>
18
18
  #include <rocksdb/table.h>
19
19
  #include <rocksdb/write_batch.h>
20
+ #include <rocksdb/write_buffer_manager.h>
20
21
 
21
22
  #include <re2/re2.h>
22
23
 
@@ -29,6 +30,19 @@
29
30
  #include <thread>
30
31
  #include <vector>
31
32
 
33
+ #ifdef __linux__
34
+ #include <sys/syscall.h>
35
+ #include <unistd.h>
36
+
37
+ #include <cerrno>
38
+
39
+ // Older libc headers may lack the SYS_ alias for io_uring_setup even though
40
+ // the kernel number (__NR_) is available — keep the Linux probe a boolean.
41
+ #if !defined(SYS_io_uring_setup) && defined(__NR_io_uring_setup)
42
+ #define SYS_io_uring_setup __NR_io_uring_setup
43
+ #endif
44
+ #endif
45
+
32
46
  #include "max_rev_operator.h"
33
47
  #include "util.h"
34
48
 
@@ -54,7 +68,7 @@ class NullLogger : public rocksdb::Logger {
54
68
 
55
69
  struct Database;
56
70
  class Iterator;
57
- class Updates;
71
+ struct Updates;
58
72
 
59
73
  struct ColumnFamily {
60
74
  rocksdb::ColumnFamilyHandle* handle;
@@ -334,6 +348,12 @@ struct BaseIterator : public Closable {
334
348
  const int limit,
335
349
  rocksdb::ReadOptions readOptions = {})
336
350
  : database_(database), column_(column), reverse_(reverse), limit_(limit) {
351
+ // TODO (correctness): the +'\0' byte-successor trick below converts
352
+ // inclusive/exclusive bounds correctly only under bytewise ordering. With a
353
+ // custom CF comparator (InitOptions "comparator", e.g.
354
+ // rocksdb.ReverseBytewiseComparator) RocksDB applies these bounds with that
355
+ // comparator, silently inverting the lte/gt boundary semantics. Seek()'s
356
+ // manual bound check below uses raw bytewise Slice::compare as well.
337
357
  if (lte) {
338
358
  upper_bound_ = rocksdb::PinnableSlice();
339
359
  *upper_bound_->GetSelf() = std::move(*lte) + '\0';
@@ -440,6 +460,25 @@ struct BaseIterator : public Closable {
440
460
  return iterator_->status();
441
461
  }
442
462
 
463
+ virtual rocksdb::Status Refresh() {
464
+ assert(iterator_);
465
+ // Refresh restarts iteration, so the user `limit` budget must restart too;
466
+ // otherwise an iterator that already yielded `limit` rows returns nothing
467
+ // after a refresh even though every other piece of state was reset.
468
+ count_ = 0;
469
+ ROCKS_STATUS_RETURN(iterator_->Refresh());
470
+ // rocksdb::Iterator::Refresh invalidates the iterator (a Seek* is required
471
+ // before use), so re-establish the starting position like the constructor
472
+ // does — otherwise the next read sees Valid()==false and reports an empty
473
+ // database.
474
+ if (reverse_) {
475
+ iterator_->SeekToLast();
476
+ } else {
477
+ iterator_->SeekToFirst();
478
+ }
479
+ return iterator_->status();
480
+ }
481
+
443
482
  Database* database_;
444
483
  rocksdb::ColumnFamilyHandle* column_;
445
484
 
@@ -508,6 +547,11 @@ class Iterator final : public BaseIterator {
508
547
  return BaseIterator::Seek(target);
509
548
  }
510
549
 
550
+ rocksdb::Status Refresh() override {
551
+ first_ = true;
552
+ return BaseIterator::Refresh();
553
+ }
554
+
511
555
  static std::unique_ptr<Iterator> create(napi_env env, napi_value db, napi_value options) {
512
556
  Database* database;
513
557
  NAPI_STATUS_THROWS(napi_get_value_external(env, db, reinterpret_cast<void**>(&database)));
@@ -527,7 +571,9 @@ class Iterator final : public BaseIterator {
527
571
  int32_t limit = -1;
528
572
  NAPI_STATUS_THROWS(GetProperty(env, options, "limit", limit));
529
573
 
530
- int32_t highWaterMarkBytes = std::numeric_limits<int32_t>::max();
574
+ // 64-bit: the value flows into a size_t cap, so parsing as int32 would wrap
575
+ // any value > 2 GiB to a garbage cap. Default stays ~2 GiB (effectively no cap).
576
+ int64_t highWaterMarkBytes = std::numeric_limits<int32_t>::max();
531
577
  NAPI_STATUS_THROWS(GetProperty(env, options, "highWaterMarkBytes", highWaterMarkBytes));
532
578
 
533
579
  std::optional<std::string> lt;
@@ -644,14 +690,11 @@ class Iterator final : public BaseIterator {
644
690
  break;
645
691
  }
646
692
 
647
- if (!Increment()) {
648
- // Hit the user's `limit` option: terminal, and flag that it was a
649
- // limit rather than natural exhaustion.
650
- state.finished = true;
651
- state.limited = true;
652
- break;
653
- }
654
-
693
+ // Apply the key/value filters BEFORE charging the user `limit`, so
694
+ // `limit` counts matched (emitted) rows, not rows merely scanned and
695
+ // then discarded. Otherwise a `{ limit, keyFilter }` query could
696
+ // exhaust its budget on non-matching rows and return fewer (or zero)
697
+ // matches than exist.
655
698
  if (keyFilter_ && !re2::RE2::PartialMatch(CurrentKey().ToStringView(), *keyFilter_)) {
656
699
  continue;
657
700
  }
@@ -660,6 +703,14 @@ class Iterator final : public BaseIterator {
660
703
  continue;
661
704
  }
662
705
 
706
+ if (!Increment()) {
707
+ // Hit the user's `limit` option: terminal, and flag that it was a
708
+ // limit rather than natural exhaustion.
709
+ state.finished = true;
710
+ state.limited = true;
711
+ break;
712
+ }
713
+
663
714
  if (keys_ && values_) {
664
715
  rocksdb::PinnableSlice k;
665
716
  k.PinSelf(CurrentKey());
@@ -680,9 +731,9 @@ class Iterator final : public BaseIterator {
680
731
  v.PinSelf(CurrentValue());
681
732
  state.bytes += v.size();
682
733
  state.values.push_back(std::move(v));
683
- } else {
684
- assert(false);
685
734
  }
735
+ // keys:false + values:false is valid per abstract-level: rows still
736
+ // count, each entry surfaces as [undefined, undefined].
686
737
  state.count += 1;
687
738
  }
688
739
 
@@ -712,7 +763,8 @@ class Iterator final : public BaseIterator {
712
763
  NAPI_STATUS_RETURN(napi_get_undefined(env, &key));
713
764
  NAPI_STATUS_RETURN(Convert(env, std::move(state.values[n]), valueEncoding_, val, unsafe_));
714
765
  } else {
715
- assert(false);
766
+ NAPI_STATUS_RETURN(napi_get_undefined(env, &key));
767
+ NAPI_STATUS_RETURN(napi_get_undefined(env, &val));
716
768
  }
717
769
 
718
770
  NAPI_STATUS_RETURN(napi_set_element(env, rows, n * 2 + 0, key));
@@ -772,14 +824,8 @@ class Iterator final : public BaseIterator {
772
824
  break;
773
825
  }
774
826
 
775
- if (!Increment()) {
776
- // Hit the user's `limit` option: terminal, and flag that it was a limit
777
- // rather than natural exhaustion.
778
- NAPI_STATUS_THROWS(napi_get_boolean(env, true, &finished));
779
- NAPI_STATUS_THROWS(napi_get_boolean(env, true, &limited));
780
- break;
781
- }
782
-
827
+ // Apply the key/value filters BEFORE charging the user `limit`, so `limit`
828
+ // counts matched (emitted) rows, not rows merely scanned and discarded.
783
829
  if (keyFilter_ && !re2::RE2::PartialMatch(CurrentKey().ToStringView(), *keyFilter_)) {
784
830
  continue;
785
831
  }
@@ -788,6 +834,14 @@ class Iterator final : public BaseIterator {
788
834
  continue;
789
835
  }
790
836
 
837
+ if (!Increment()) {
838
+ // Hit the user's `limit` option: terminal, and flag that it was a limit
839
+ // rather than natural exhaustion.
840
+ NAPI_STATUS_THROWS(napi_get_boolean(env, true, &finished));
841
+ NAPI_STATUS_THROWS(napi_get_boolean(env, true, &limited));
842
+ break;
843
+ }
844
+
791
845
  napi_value key;
792
846
  napi_value val;
793
847
 
@@ -804,7 +858,8 @@ class Iterator final : public BaseIterator {
804
858
  NAPI_STATUS_THROWS(napi_get_undefined(env, &key));
805
859
  NAPI_STATUS_THROWS(Convert(env, CurrentValue(), valueEncoding_, val, unsafe_));
806
860
  } else {
807
- assert(false);
861
+ NAPI_STATUS_THROWS(napi_get_undefined(env, &key));
862
+ NAPI_STATUS_THROWS(napi_get_undefined(env, &val));
808
863
  }
809
864
 
810
865
  NAPI_STATUS_THROWS(napi_set_element(env, rows, idx++, key));
@@ -848,6 +903,11 @@ static void FinalizeDatabase(napi_env env, void* data, void* hint) {
848
903
  database->resourceNamesRef = nullptr;
849
904
  }
850
905
  database->Close();
906
+ // This external owns the Database (the bigint-handle external in db_init is
907
+ // created with no finalizer, so it never reaches here). Close() already
908
+ // released the rocksdb::DB; free the heap object itself or it leaks for the
909
+ // lifetime of the process.
910
+ delete database;
851
911
  }
852
912
  }
853
913
 
@@ -876,11 +936,27 @@ NAPI_METHOD(db_init) {
876
936
  int64_t value;
877
937
  bool lossless;
878
938
  NAPI_STATUS_THROWS(napi_get_value_bigint_int64(env, argv[0], &value, &lossless));
939
+ if (!lossless) {
940
+ napi_throw_error(env, nullptr, "invalid database handle");
941
+ return NULL;
942
+ }
879
943
 
880
944
  database = reinterpret_cast<Database*>(value);
881
945
  NAPI_STATUS_THROWS(napi_create_external(env, database, nullptr, nullptr, &result));
882
946
 
883
- // We should have an env_cleanup_hook for closing iterators...
947
+ // TODO (critical, lifetime): sharing a Database* across V8 environments (e.g.
948
+ // worker_threads) via db_get_handle is unsafe. There is no cross-env
949
+ // reference count on the rocksdb::DB, so one env's db_close() runs
950
+ // Database::Close() (freeing the DB + column handles on a worker thread)
951
+ // while another env may still be running MultiGet / iterator / updates
952
+ // against it -> use-after-free / double-free. This branch also installs no
953
+ // env_cleanup_hook or finalizer, so a tearing-down secondary env never
954
+ // detaches its iterators, and GetResourceName() dereferences a napi_ref
955
+ // (resourceNamesRef) that belongs to the originating env (cross-env ref use
956
+ // is undefined behaviour). Fix: refcount the Database lifetime across all
957
+ // wrapping envs (run the real Close()/db.reset() only when the last
958
+ // reference drops), install a cleanup hook here, and make resource names
959
+ // per-env. Until then, close() on a shared handle must be app-coordinated.
884
960
  } else {
885
961
  NAPI_STATUS_THROWS(napi_invalid_arg);
886
962
  }
@@ -916,7 +992,15 @@ NAPI_METHOD(db_query) {
916
992
  NAPI_ARGV(2);
917
993
 
918
994
  try {
919
- return Iterator::create(env, argv[0], argv[1])->nextv(env, std::numeric_limits<uint32_t>::max());
995
+ auto iterator = Iterator::create(env, argv[0], argv[1]);
996
+ // Iterator::create uses NAPI_STATUS_THROWS internally, which on a N-API
997
+ // failure schedules a pending JS exception and `return NULL` — i.e. an empty
998
+ // unique_ptr. Dereferencing it (->nextv) would be a null deref / crash, so
999
+ // bail out and let the pending exception surface.
1000
+ if (!iterator) {
1001
+ return nullptr;
1002
+ }
1003
+ return iterator->nextv(env, std::numeric_limits<uint32_t>::max());
920
1004
  } catch (const std::exception& e) {
921
1005
  napi_throw_error(env, nullptr, e.what());
922
1006
  return nullptr;
@@ -983,6 +1067,10 @@ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
983
1067
  columnOptions.compression = rocksdb::kZSTD;
984
1068
  columnOptions.compression_opts.max_dict_bytes = 16 * 1024;
985
1069
  columnOptions.compression_opts.zstd_max_train_bytes = 16 * 1024 * 100;
1070
+ NAPI_STATUS_RETURN(GetProperty(env, options, "compressionLevel", columnOptions.compression_opts.level));
1071
+ NAPI_STATUS_RETURN(GetProperty(env, options, "maxDictBytes", columnOptions.compression_opts.max_dict_bytes));
1072
+ NAPI_STATUS_RETURN(
1073
+ GetProperty(env, options, "zstdMaxTrainBytes", columnOptions.compression_opts.zstd_max_train_bytes));
986
1074
  // TODO (perf): compression_opts.parallel_threads
987
1075
  } else {
988
1076
  columnOptions.compression = rocksdb::kNoCompression;
@@ -1040,6 +1128,10 @@ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
1040
1128
 
1041
1129
  NAPI_STATUS_RETURN(GetProperty(env, options, "optimizeFiltersForHits", columnOptions.optimize_filters_for_hits));
1042
1130
  NAPI_STATUS_RETURN(GetProperty(env, options, "periodicCompactionSeconds", columnOptions.periodic_compaction_seconds));
1131
+ // memtable_huge_page_size is a column-family option: when the DB is opened
1132
+ // with explicit column descriptors the copy read into dbOptions in db_open is
1133
+ // sliced away, so it must be settable per column to take effect at all.
1134
+ NAPI_STATUS_RETURN(GetProperty(env, options, "memTableHugePageSize", columnOptions.memtable_huge_page_size));
1043
1135
 
1044
1136
  // Compat
1045
1137
  NAPI_STATUS_RETURN(GetProperty(env, options, "enableBlobFiles", columnOptions.enable_blob_files));
@@ -1079,12 +1171,18 @@ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
1079
1171
  bool lossless;
1080
1172
  int64_t ptr;
1081
1173
  NAPI_STATUS_RETURN(napi_get_value_bigint_int64(env, handleValue, &ptr, &lossless));
1174
+ if (!lossless) {
1175
+ return napi_invalid_arg;
1176
+ }
1082
1177
 
1083
1178
  cache = *reinterpret_cast<std::shared_ptr<rocksdb::Cache>*>(ptr);
1084
1179
  } else if (cacheType == napi_bigint) {
1085
1180
  bool lossless;
1086
1181
  int64_t ptr;
1087
1182
  NAPI_STATUS_RETURN(napi_get_value_bigint_int64(env, cacheValue, &ptr, &lossless));
1183
+ if (!lossless) {
1184
+ return napi_invalid_arg;
1185
+ }
1088
1186
 
1089
1187
  cache = *reinterpret_cast<std::shared_ptr<rocksdb::Cache>*>(ptr);
1090
1188
  } else if (cacheType != napi_undefined && cacheType != napi_null) {
@@ -1093,7 +1191,9 @@ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
1093
1191
  }
1094
1192
 
1095
1193
  if (!cache) {
1096
- uint32_t cacheSize = 8 << 20;
1194
+ // size_t: RocksDB cache capacity is size_t; a 32-bit type silently wraps
1195
+ // requests >= 4 GiB (and 4 GiB exactly wraps to 0 -> cache disabled).
1196
+ uint64_t cacheSize = 8 << 20;
1097
1197
  double compressedRatio = 0.0;
1098
1198
 
1099
1199
  NAPI_STATUS_RETURN(GetProperty(env, options, "cacheSize", cacheSize));
@@ -1113,7 +1213,10 @@ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
1113
1213
  }
1114
1214
 
1115
1215
  {
1116
- uint32_t cacheSize = -1;
1216
+ // int64: -1 means "unset" (inherit the shared cache); a 32-bit type both
1217
+ // wraps requests >= 4 GiB and collides the unset sentinel with a real
1218
+ // 4294967295-byte request.
1219
+ int64_t cacheSize = -1;
1117
1220
  double compressedRatio = 0.0;
1118
1221
 
1119
1222
  NAPI_STATUS_RETURN(GetProperty(env, options, "cachePrepopulate", tableOptions.prepopulate_block_cache));
@@ -1141,7 +1244,9 @@ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
1141
1244
  }
1142
1245
 
1143
1246
  {
1144
- uint32_t cacheSize = -1;
1247
+ // int64: see the block-cache block above — -1 = unset, avoids 32-bit wrap
1248
+ // and the unset/4-GiB sentinel collision.
1249
+ int64_t cacheSize = -1;
1145
1250
  double compressedRatio = 0.0;
1146
1251
 
1147
1252
  NAPI_STATUS_RETURN(GetProperty(env, options, "cachePrepopulate", columnOptions.prepopulate_blob_cache));
@@ -1157,6 +1262,9 @@ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
1157
1262
  columnOptions.blob_cache = nullptr;
1158
1263
  } else if (compressedRatio > 0.0) {
1159
1264
  rocksdb::TieredCacheOptions options;
1265
+ // Match the block/main cache tiers: pin the primary tier to HyperClockCache
1266
+ // explicitly rather than letting it default to LRU.
1267
+ options.cache_type = rocksdb::PrimaryCacheType::kCacheTypeHCC;
1160
1268
  options.total_capacity = cacheSize;
1161
1269
  options.compressed_secondary_ratio = compressedRatio;
1162
1270
  options.comp_cache_opts.compression_type = rocksdb::CompressionType::kZSTD;
@@ -1299,15 +1407,25 @@ NAPI_METHOD(db_open) {
1299
1407
  NAPI_STATUS_THROWS(GetProperty(env, options, "parallelism", parallelism));
1300
1408
  dbOptions.IncreaseParallelism(parallelism);
1301
1409
 
1410
+ // IncreaseParallelism sizes the (process-wide) Env LOW pool to `parallelism`
1411
+ // but pins the HIGH pool — where every flush of every DB sharing the default
1412
+ // Env runs — at a single thread, so flushes across DBs serialize behind one
1413
+ // thread. Both pools are process-wide: the last opened DB's value wins.
1414
+ int flushParallelism = std::max(1, parallelism / 4);
1415
+ NAPI_STATUS_THROWS(GetProperty(env, options, "flushParallelism", flushParallelism));
1416
+ dbOptions.env->SetBackgroundThreads(std::max(1, flushParallelism), rocksdb::Env::HIGH);
1417
+
1302
1418
  NAPI_STATUS_THROWS(GetProperty(env, options, "walDir", dbOptions.wal_dir));
1303
1419
 
1304
- uint32_t walTTL = 0;
1420
+ // 64-bit inputs: walTTL is in ms and walSizeLimit in bytes, so a 32-bit type
1421
+ // wraps a >= ~4.3 GB size limit (or a ~49-day TTL) before the unit conversion.
1422
+ uint64_t walTTL = 0;
1305
1423
  NAPI_STATUS_THROWS(GetProperty(env, options, "walTTL", walTTL));
1306
- dbOptions.WAL_ttl_seconds = static_cast<uint32_t>(std::ceil(walTTL / 1e3));
1424
+ dbOptions.WAL_ttl_seconds = static_cast<uint64_t>(std::ceil(walTTL / 1e3));
1307
1425
 
1308
- uint32_t walSizeLimit = 0;
1426
+ uint64_t walSizeLimit = 0;
1309
1427
  NAPI_STATUS_THROWS(GetProperty(env, options, "walSizeLimit", walSizeLimit));
1310
- dbOptions.WAL_size_limit_MB = static_cast<uint32_t>(std::ceil(walSizeLimit / 1e6));
1428
+ dbOptions.WAL_size_limit_MB = static_cast<uint64_t>(std::ceil(walSizeLimit / 1e6));
1311
1429
 
1312
1430
  NAPI_STATUS_THROWS(GetProperty(env, options, "maxTotalWalSize", dbOptions.max_total_wal_size));
1313
1431
 
@@ -1363,6 +1481,34 @@ NAPI_METHOD(db_open) {
1363
1481
 
1364
1482
  NAPI_STATUS_THROWS(GetProperty(env, options, "writeBufferSize", dbOptions.db_write_buffer_size));
1365
1483
 
1484
+ {
1485
+ napi_value wbmValue;
1486
+ NAPI_STATUS_THROWS(napi_get_named_property(env, options, "writeBufferManager", &wbmValue));
1487
+
1488
+ napi_valuetype wbmType;
1489
+ NAPI_STATUS_THROWS(napi_typeof(env, wbmValue, &wbmType));
1490
+
1491
+ if (wbmType == napi_object || wbmType == napi_bigint) {
1492
+ napi_value handleValue = wbmValue;
1493
+ if (wbmType == napi_object) {
1494
+ NAPI_STATUS_THROWS(napi_get_named_property(env, wbmValue, "handle", &handleValue));
1495
+ }
1496
+
1497
+ bool lossless;
1498
+ int64_t ptr;
1499
+ NAPI_STATUS_THROWS(napi_get_value_bigint_int64(env, handleValue, &ptr, &lossless));
1500
+ if (!lossless) {
1501
+ napi_throw_error(env, nullptr, "invalid writeBufferManager handle");
1502
+ return NULL;
1503
+ }
1504
+
1505
+ dbOptions.write_buffer_manager = *reinterpret_cast<std::shared_ptr<rocksdb::WriteBufferManager>*>(ptr);
1506
+ } else if (wbmType != napi_undefined && wbmType != napi_null) {
1507
+ napi_throw_error(env, nullptr, "invalid writeBufferManager");
1508
+ return NULL;
1509
+ }
1510
+ }
1511
+
1366
1512
  NAPI_STATUS_THROWS(GetProperty(env, options, "manualWALFlush", dbOptions.manual_wal_flush));
1367
1513
  NAPI_STATUS_THROWS(GetProperty(env, options, "walManualFlush", dbOptions.manual_wal_flush));
1368
1514
 
@@ -1624,7 +1770,7 @@ NAPI_METHOD(db_get_many) {
1624
1770
  [=](auto& state, napi_env env, napi_value* result) {
1625
1771
  NAPI_STATUS_RETURN(napi_create_array_with_length(env, count, result));
1626
1772
 
1627
- for (auto n = 0; n < count; n++) {
1773
+ for (uint32_t n = 0; n < count; n++) {
1628
1774
  napi_value row;
1629
1775
  if (state.statuses[n].IsNotFound()) {
1630
1776
  NAPI_STATUS_RETURN(napi_get_undefined(env, &row));
@@ -1688,6 +1834,10 @@ NAPI_METHOD(db_clear) {
1688
1834
  *end.GetSelf() = std::move(*lt);
1689
1835
  } else {
1690
1836
  // HACK: Assume no key that starts with 0xFF is larger than 1MiB.
1837
+ // TODO (correctness): this synthetic upper bound silently leaves any key
1838
+ // >= a 1 MiB run of 0xFF bytes uncleared. Prefer DeleteRange over the full
1839
+ // keyspace (null end) or RangeBound::kInclusive on the max key instead of
1840
+ // assuming a bound.
1691
1841
  end.GetSelf()->resize(1e6);
1692
1842
  memset(end.GetSelf()->data(), 255, end.GetSelf()->size());
1693
1843
  }
@@ -1743,16 +1893,26 @@ NAPI_METHOD(db_clear) {
1743
1893
  }
1744
1894
 
1745
1895
  NAPI_METHOD(db_get_property) {
1746
- NAPI_ARGV(2);
1896
+ NAPI_ARGV(3);
1747
1897
 
1748
1898
  Database* database;
1749
1899
  NAPI_STATUS_THROWS(napi_get_value_external(env, argv[0], reinterpret_cast<void**>(&database)));
1750
1900
 
1901
+ if (!database->db) {
1902
+ napi_throw_error(env, "LEVEL_DATABASE_NOT_OPEN", "Database is not open");
1903
+ return NULL;
1904
+ }
1905
+
1751
1906
  rocksdb::PinnableSlice property;
1752
1907
  NAPI_STATUS_THROWS(GetValue(env, argv[1], property));
1753
1908
 
1909
+ // Most rocksdb properties are column-family scoped; without an explicit
1910
+ // column they answer for the default CF only.
1911
+ rocksdb::ColumnFamilyHandle* column = database->db->DefaultColumnFamily();
1912
+ NAPI_STATUS_THROWS(GetProperty(env, argv[2], "column", column));
1913
+
1754
1914
  std::string value;
1755
- database->db->GetProperty(property, &value);
1915
+ database->db->GetProperty(column, property, &value);
1756
1916
 
1757
1917
  napi_value result;
1758
1918
  NAPI_STATUS_THROWS(napi_create_string_utf8(env, value.data(), value.size(), &result));
@@ -1804,6 +1964,11 @@ NAPI_METHOD(iterator_init_sync) {
1804
1964
  napi_value result;
1805
1965
  try {
1806
1966
  auto iterator = Iterator::create(env, argv[0], argv[1]);
1967
+ // create() returns an empty unique_ptr (and a pending JS exception) on a
1968
+ // N-API failure; surface that instead of wrapping a null pointer.
1969
+ if (!iterator) {
1970
+ return nullptr;
1971
+ }
1807
1972
 
1808
1973
  NAPI_STATUS_THROWS(napi_create_external(env, iterator.get(), Finalize<Iterator>, iterator.get(), &result));
1809
1974
  iterator.release();
@@ -1815,6 +1980,22 @@ NAPI_METHOD(iterator_init_sync) {
1815
1980
  return result;
1816
1981
  }
1817
1982
 
1983
+ NAPI_METHOD(iterator_refresh_sync) {
1984
+ NAPI_ARGV(1);
1985
+
1986
+ try {
1987
+ Iterator* iterator;
1988
+ NAPI_STATUS_THROWS(napi_get_value_external(env, argv[0], reinterpret_cast<void**>(&iterator)));
1989
+
1990
+ ROCKS_STATUS_THROWS_NAPI(iterator->Refresh());
1991
+ } catch (const std::exception& e) {
1992
+ napi_throw_error(env, nullptr, e.what());
1993
+ return nullptr;
1994
+ }
1995
+
1996
+ return 0;
1997
+ }
1998
+
1818
1999
  NAPI_METHOD(iterator_seek) {
1819
2000
  NAPI_ARGV(3);
1820
2001
 
@@ -2345,6 +2526,128 @@ NAPI_METHOD(cache_get_handle) {
2345
2526
  return result;
2346
2527
  }
2347
2528
 
2529
+ NAPI_METHOD(write_buffer_manager_init) {
2530
+ NAPI_ARGV(1);
2531
+
2532
+ size_t bufferSize = 256 * 1024 * 1024; // 256 MiB
2533
+ NAPI_STATUS_THROWS(GetProperty(env, argv[0], "bufferSize", bufferSize));
2534
+
2535
+ bool allowStall = false;
2536
+ NAPI_STATUS_THROWS(GetProperty(env, argv[0], "allowStall", allowStall));
2537
+
2538
+ std::shared_ptr<rocksdb::Cache> cache;
2539
+ {
2540
+ napi_value cacheValue;
2541
+ NAPI_STATUS_THROWS(napi_get_named_property(env, argv[0], "cache", &cacheValue));
2542
+
2543
+ napi_valuetype cacheType;
2544
+ NAPI_STATUS_THROWS(napi_typeof(env, cacheValue, &cacheType));
2545
+
2546
+ if (cacheType == napi_object || cacheType == napi_bigint) {
2547
+ napi_value handleValue = cacheValue;
2548
+ if (cacheType == napi_object) {
2549
+ NAPI_STATUS_THROWS(napi_get_named_property(env, cacheValue, "handle", &handleValue));
2550
+ }
2551
+
2552
+ bool lossless;
2553
+ int64_t ptr;
2554
+ NAPI_STATUS_THROWS(napi_get_value_bigint_int64(env, handleValue, &ptr, &lossless));
2555
+ if (!lossless) {
2556
+ napi_throw_error(env, nullptr, "invalid cache handle");
2557
+ return NULL;
2558
+ }
2559
+
2560
+ cache = *reinterpret_cast<std::shared_ptr<rocksdb::Cache>*>(ptr);
2561
+ } else if (cacheType != napi_undefined && cacheType != napi_null) {
2562
+ napi_throw_error(env, nullptr, "invalid cache");
2563
+ return NULL;
2564
+ }
2565
+ }
2566
+
2567
+ auto wbm = new std::shared_ptr<rocksdb::WriteBufferManager>(
2568
+ std::make_shared<rocksdb::WriteBufferManager>(bufferSize, cache, allowStall));
2569
+
2570
+ napi_value result;
2571
+ NAPI_STATUS_THROWS(
2572
+ napi_create_external(env, wbm, Finalize<std::shared_ptr<rocksdb::WriteBufferManager>>, wbm, &result));
2573
+
2574
+ return result;
2575
+ }
2576
+
2577
+ NAPI_METHOD(write_buffer_manager_get_handle) {
2578
+ NAPI_ARGV(1);
2579
+
2580
+ std::shared_ptr<rocksdb::WriteBufferManager>* wbm;
2581
+ NAPI_STATUS_THROWS(napi_get_value_external(env, argv[0], reinterpret_cast<void**>(&wbm)));
2582
+
2583
+ napi_value result;
2584
+ NAPI_STATUS_THROWS(napi_create_bigint_int64(env, reinterpret_cast<intptr_t>(wbm), &result));
2585
+
2586
+ return result;
2587
+ }
2588
+
2589
+ NAPI_METHOD(write_buffer_manager_get_usage) {
2590
+ NAPI_ARGV(1);
2591
+
2592
+ std::shared_ptr<rocksdb::WriteBufferManager>* wbm;
2593
+ NAPI_STATUS_THROWS(napi_get_value_external(env, argv[0], reinterpret_cast<void**>(&wbm)));
2594
+
2595
+ napi_value result;
2596
+ NAPI_STATUS_THROWS(napi_create_object(env, &result));
2597
+
2598
+ napi_value memoryUsage;
2599
+ NAPI_STATUS_THROWS(napi_create_double(env, static_cast<double>((*wbm)->memory_usage()), &memoryUsage));
2600
+ NAPI_STATUS_THROWS(napi_set_named_property(env, result, "memoryUsage", memoryUsage));
2601
+
2602
+ napi_value mutableMemoryUsage;
2603
+ NAPI_STATUS_THROWS(
2604
+ napi_create_double(env, static_cast<double>((*wbm)->mutable_memtable_memory_usage()), &mutableMemoryUsage));
2605
+ NAPI_STATUS_THROWS(napi_set_named_property(env, result, "mutableMemoryUsage", mutableMemoryUsage));
2606
+
2607
+ napi_value bufferSize;
2608
+ NAPI_STATUS_THROWS(napi_create_double(env, static_cast<double>((*wbm)->buffer_size()), &bufferSize));
2609
+ NAPI_STATUS_THROWS(napi_set_named_property(env, result, "bufferSize", bufferSize));
2610
+
2611
+ return result;
2612
+ }
2613
+
2614
+ // Probes whether io_uring is actually usable in this process: RocksDB gates its
2615
+ // async MultiGet / prefetch I/O on io_uring_setup succeeding at runtime and
2616
+ // falls back to serial reads SILENTLY when the syscall is denied (seccomp — the
2617
+ // default Docker/containerd profiles since late 2023 — or the
2618
+ // kernel.io_uring_disabled sysctl) or missing (ENOSYS). io_uring_setup(0, NULL)
2619
+ // never succeeds; a functional kernel rejects the arguments (EINVAL/EFAULT)
2620
+ // while a blocked one fails with EPERM/EACCES/ENOSYS before looking at them.
2621
+ NAPI_METHOD(io_uring_available) {
2622
+ #if defined(__linux__) && defined(SYS_io_uring_setup)
2623
+ errno = 0;
2624
+ const long rc = syscall(SYS_io_uring_setup, 0, nullptr);
2625
+ const bool available = rc >= 0 || (errno != ENOSYS && errno != EPERM && errno != EACCES);
2626
+ if (rc >= 0) {
2627
+ close(static_cast<int>(rc));
2628
+ }
2629
+
2630
+ napi_value result;
2631
+ NAPI_STATUS_THROWS(napi_get_boolean(env, available, &result));
2632
+
2633
+ return result;
2634
+ #elif defined(__linux__)
2635
+ // Built without any syscall number for io_uring_setup (pre-io_uring-era
2636
+ // headers): this binary cannot use io_uring regardless of the running
2637
+ // kernel, so report it unavailable — the Linux contract stays boolean.
2638
+ napi_value result;
2639
+ NAPI_STATUS_THROWS(napi_get_boolean(env, false, &result));
2640
+
2641
+ return result;
2642
+ #else
2643
+ // Not applicable on this platform.
2644
+ napi_value result;
2645
+ NAPI_STATUS_THROWS(napi_get_null(env, &result));
2646
+
2647
+ return result;
2648
+ #endif
2649
+ }
2650
+
2348
2651
  NAPI_INIT() {
2349
2652
  NAPI_EXPORT_FUNCTION(db_init);
2350
2653
  NAPI_EXPORT_FUNCTION(db_open);
@@ -2363,6 +2666,7 @@ NAPI_INIT() {
2363
2666
  NAPI_EXPORT_FUNCTION(db_flush_wal);
2364
2667
 
2365
2668
  NAPI_EXPORT_FUNCTION(iterator_init_sync);
2669
+ NAPI_EXPORT_FUNCTION(iterator_refresh_sync);
2366
2670
  NAPI_EXPORT_FUNCTION(iterator_seek);
2367
2671
  NAPI_EXPORT_FUNCTION(iterator_seek_sync);
2368
2672
  NAPI_EXPORT_FUNCTION(iterator_close_sync);
@@ -2386,4 +2690,10 @@ NAPI_INIT() {
2386
2690
 
2387
2691
  NAPI_EXPORT_FUNCTION(cache_init);
2388
2692
  NAPI_EXPORT_FUNCTION(cache_get_handle);
2693
+
2694
+ NAPI_EXPORT_FUNCTION(write_buffer_manager_init);
2695
+ NAPI_EXPORT_FUNCTION(write_buffer_manager_get_handle);
2696
+ NAPI_EXPORT_FUNCTION(write_buffer_manager_get_usage);
2697
+
2698
+ NAPI_EXPORT_FUNCTION(io_uring_available);
2389
2699
  }
package/build.sh CHANGED
@@ -1,15 +1,29 @@
1
1
  #!/bin/bash
2
2
  set -e
3
3
 
4
+ # The Dockerfile targets x86-64 explicitly (znver3 march flags, prebuildify
5
+ # --arch x64), so the image must be built for linux/amd64 even on arm64 hosts
6
+ # (e.g. Apple Silicon), where it runs under emulation. Without this the native
7
+ # arm64 gcc rejects -march=znver3 ("unknown value 'znver3'") and the build fails.
8
+ PLATFORM=linux/amd64
9
+
10
+ # Build on the remote x86-64 docker host by default (avoids emulation on
11
+ # Apple Silicon). Override with DOCKER_HOST=... ./build.sh, or
12
+ # DOCKER_HOST= ./build.sh to use the local docker daemon.
13
+ export DOCKER_HOST="${DOCKER_HOST-ssh://nxtop@hq-test-srv1.nxt.io}"
14
+
4
15
  echo "Initializing submodules..."
5
16
  git submodule update --init
6
17
 
7
18
  echo "Building image..."
8
- docker build --iidfile prebuilds.iid .
19
+ # JOBS caps build parallelism for the memory-heavy rocksdb compile (default 8,
20
+ # see Dockerfile). Lower it (e.g. JOBS=4 ./build.sh) if the build still OOMs on
21
+ # a memory-constrained Docker, or raise it on a large host.
22
+ docker build --platform "$PLATFORM" ${JOBS:+--build-arg JOBS="$JOBS"} --iidfile prebuilds.iid .
9
23
 
10
24
  echo "Extracting prebuilds from image..."
11
25
  IMG=$(cat prebuilds.iid)
12
- ID=$(docker create $IMG)
26
+ ID=$(docker create --platform "$PLATFORM" $IMG)
13
27
  docker cp "$ID:/rocks-level/prebuilds" ./
14
28
 
15
29
  echo "Cleaning up..."
package/index.js CHANGED
@@ -6,6 +6,7 @@ const ModuleError = require('module-error')
6
6
  const binding = require('./binding')
7
7
  const { ChainedBatch } = require('./chained-batch')
8
8
  const { RocksCache } = require('./cache')
9
+ const { RocksWriteBufferManager } = require('./write-buffer-manager')
9
10
  const { Iterator } = require('./iterator')
10
11
  const fs = require('node:fs')
11
12
  const assert = require('node:assert')
@@ -224,7 +225,11 @@ class RocksLevel extends AbstractLevel {
224
225
  callback = fromCallback(callback, kPromise)
225
226
 
226
227
  try {
227
- // TODO (fix): Use batch + DeleteRange...
228
+ // TODO (perf): db_clear is a synchronous native call that blocks the event
229
+ // loop. The whole-range (limit === -1) path is a single DeleteRange (cheap),
230
+ // but the limited path iterates + writes WriteBatches on the JS thread, and
231
+ // neither is ref-counted against close(). Move to an async binding
232
+ // (runAsync) that takes a kRef like the other ops.
228
233
  binding.db_clear(this[kContext], options ?? kEmpty)
229
234
  process.nextTick(callback, null)
230
235
  } catch (err) {
@@ -290,7 +295,7 @@ class RocksLevel extends AbstractLevel {
290
295
  return binding.db_get_identity(this[kContext])
291
296
  }
292
297
 
293
- getProperty (property) {
298
+ getProperty (property, options) {
294
299
  if (typeof property !== 'string') {
295
300
  throw new TypeError("The first argument 'property' must be a string")
296
301
  }
@@ -302,7 +307,7 @@ class RocksLevel extends AbstractLevel {
302
307
  })
303
308
  }
304
309
 
305
- return binding.db_get_property(this[kContext], property)
310
+ return binding.db_get_property(this[kContext], property, options ?? kEmpty)
306
311
  }
307
312
 
308
313
  query (options, callback) {
@@ -410,3 +415,12 @@ class RocksLevel extends AbstractLevel {
410
415
 
411
416
  exports.RocksLevel = RocksLevel
412
417
  exports.RocksCache = RocksCache
418
+ exports.RocksWriteBufferManager = RocksWriteBufferManager
419
+
420
+ // null on platforms where io_uring does not apply (non-Linux); boolean on
421
+ // Linux, where `false` means RocksDB's async_io silently degrades to serial
422
+ // reads (seccomp, kernel.io_uring_disabled, a kernel without io_uring, or a
423
+ // binary built without an io_uring syscall number).
424
+ exports.ioUringAvailable = function ioUringAvailable () {
425
+ return binding.io_uring_available()
426
+ }
package/iterator.js CHANGED
@@ -16,6 +16,7 @@ const kFirst = Symbol('first')
16
16
  const kPosition = Symbol('position')
17
17
  const kBusy = Symbol('busy')
18
18
  const kPendingClose = Symbol('pendingClose')
19
+ const kHasFilter = Symbol('hasFilter')
19
20
 
20
21
  const kEmpty = Object.freeze([])
21
22
 
@@ -32,6 +33,7 @@ class Iterator extends AbstractIterator {
32
33
  this[kDB] = db
33
34
  this[kBusy] = false
34
35
  this[kPendingClose] = null
36
+ this[kHasFilter] = options.keyFilter != null || options.valueFilter != null
35
37
  }
36
38
 
37
39
  [Symbol.asyncDispose] () {
@@ -85,15 +87,41 @@ class Iterator extends AbstractIterator {
85
87
  const size = this[kFirst] ? 1 : 1000
86
88
  this[kFirst] = false
87
89
 
88
- try {
89
- const { rows, finished } = binding.iterator_nextv_sync(this[kContext], size, null)
90
- this[kCache] = rows
91
- this[kFinished] = finished
92
- this[kPosition] = 0
93
-
94
- setImmediate(() => this._next(callback))
95
- } catch (err) {
96
- process.nextTick(callback, err)
90
+ if (this[kHasFilter]) {
91
+ try {
92
+ this[kDB][kRef]()
93
+ this[kBusy] = true
94
+ binding.iterator_nextv(this[kContext], size, null, (err, result) => {
95
+ this[kBusy] = false
96
+ this[kDB][kUnref]()
97
+
98
+ if (err) {
99
+ callback(err)
100
+ } else {
101
+ this[kCache] = result.rows
102
+ this[kFinished] = result.finished
103
+ this[kPosition] = 0
104
+ this._next(callback)
105
+ }
106
+
107
+ this._flushPendingClose()
108
+ })
109
+ } catch (err) {
110
+ this[kBusy] = false
111
+ this[kDB][kUnref]()
112
+ process.nextTick(callback, err)
113
+ }
114
+ } else {
115
+ try {
116
+ const { rows, finished } = binding.iterator_nextv_sync(this[kContext], size, null)
117
+ this[kCache] = rows
118
+ this[kFinished] = finished
119
+ this[kPosition] = 0
120
+
121
+ setImmediate(() => this._next(callback))
122
+ } catch (err) {
123
+ process.nextTick(callback, err)
124
+ }
97
125
  }
98
126
  }
99
127
 
@@ -126,6 +154,18 @@ class Iterator extends AbstractIterator {
126
154
 
127
155
  // nxt API
128
156
 
157
+ _refreshSync () {
158
+ assert(this[kContext])
159
+ assert(!this[kBusy])
160
+
161
+ this[kFirst] = true
162
+ this[kCache] = kEmpty
163
+ this[kFinished] = false
164
+ this[kPosition] = 0
165
+
166
+ binding.iterator_refresh_sync(this[kContext])
167
+ }
168
+
129
169
  _seekSync (target) {
130
170
  assert(this[kContext])
131
171
  assert(!this[kBusy])
@@ -177,10 +217,25 @@ class Iterator extends AbstractIterator {
177
217
  return callback[kPromise]
178
218
  }
179
219
 
220
+ _nextvCached (size) {
221
+ const end = Math.min(this[kCache].length, this[kPosition] + size * 2)
222
+ const rows = this[kCache].slice(this[kPosition], end)
223
+ this[kPosition] = end
224
+
225
+ const finished = this[kFinished] && this[kPosition] >= this[kCache].length
226
+ const limited = !finished && rows.length >= size * 2
227
+
228
+ return { rows, finished, limited }
229
+ }
230
+
180
231
  _nextvSync (size, options) {
181
232
  assert(this[kContext])
182
233
  assert(!this[kBusy])
183
234
 
235
+ if (this[kPosition] < this[kCache].length) {
236
+ return this._nextvCached(size)
237
+ }
238
+
184
239
  if (this[kFinished]) {
185
240
  return { rows: [], finished: true }
186
241
  }
@@ -198,7 +253,9 @@ class Iterator extends AbstractIterator {
198
253
  callback = fromCallback(callback, kPromise)
199
254
 
200
255
  try {
201
- if (this[kFinished]) {
256
+ if (this[kPosition] < this[kCache].length) {
257
+ process.nextTick(callback, null, this._nextvCached(size))
258
+ } else if (this[kFinished]) {
202
259
  process.nextTick(callback, null, { rows: [], finished: true })
203
260
  } else {
204
261
  this[kDB][kRef]()
@@ -227,6 +284,8 @@ class Iterator extends AbstractIterator {
227
284
  }
228
285
 
229
286
  _closeSync () {
287
+ assert(!this[kBusy])
288
+
230
289
  this[kCache] = kEmpty
231
290
 
232
291
  if (this[kContext]) {
@@ -6,6 +6,21 @@
6
6
 
7
7
  #include <iostream>
8
8
 
9
+ // Compares two length-prefixed revision operands and returns <0, 0, >0.
10
+ //
11
+ // This MUST stay byte-for-byte order-compatible with the in-memory JS comparator
12
+ // (@nxtedition/util compareRev, lib/packages/util/src/compare-rev.ts), because
13
+ // RocksDB selects the durable winner with this operator while the application
14
+ // compares the same revisions in memory with the JS one — if they disagree, the
15
+ // stored "max revision" diverges from what the app believes is the max. A 500k
16
+ // randomized fuzz (leading zeros, INF, length ties, missing dashes) confirms
17
+ // parity. Revisions are `<number>-<id>` (e.g. `12-7a00`, `INF-…`) compared as:
18
+ // 1. INF sentinel: a number beginning with 'I' is +infinity (largest).
19
+ // 2. leading zeros are skipped so `01-x` == `1-x` in magnitude.
20
+ // 3. the number is compared digit-by-digit, terminated by '-'; the side whose
21
+ // number ends first (fewer significant digits) is smaller.
22
+ // 4. then the id is compared bytewise; finally the zero-stripped length breaks
23
+ // ties (a longer number = larger revision).
9
24
  int compareRev(const rocksdb::Slice& a, const rocksdb::Slice& b) {
10
25
  if (a.empty()) {
11
26
  return b.empty() ? 0 : -1;
@@ -22,14 +37,35 @@ int compareRev(const rocksdb::Slice& a, const rocksdb::Slice& b) {
22
37
  const std::size_t endA = 1 + std::min<std::size_t>(static_cast<unsigned char>(a[0]), a.size() - 1);
23
38
  const std::size_t endB = 1 + std::min<std::size_t>(static_cast<unsigned char>(b[0]), b.size() - 1);
24
39
 
40
+ // INF-XXXX sorts above every numeric revision. Mirror the JS comparator's
41
+ // explicit sentinel rather than relying on 'I' (0x49) happening to exceed the
42
+ // digit bytes.
43
+ const bool infA = indexA < endA && a[indexA] == 'I';
44
+ const bool infB = indexB < endB && b[indexB] == 'I';
45
+ if (infA != infB) {
46
+ return infA ? 1 : -1;
47
+ }
48
+
49
+ // Skip leading zeroes, tracking the zero-stripped content length for the final
50
+ // tiebreak, so `01-x` and `1-x` compare as the same magnitude.
51
+ std::size_t lenA = endA - indexA;
52
+ std::size_t lenB = endB - indexB;
53
+ while (indexA < endA && a[indexA] == '0') {
54
+ ++indexA;
55
+ --lenA;
56
+ }
57
+ while (indexB < endB && b[indexB] == '0') {
58
+ ++indexB;
59
+ --lenB;
60
+ }
61
+
25
62
  // Compare the revision number. Compare bytes as unsigned char: rocksdb::Slice
26
63
  // operator[] returns (signed-on-most-platforms) char, so a byte >= 0x80 would
27
64
  // otherwise sort as negative and order opposite to the JS comparator, which
28
65
  // reads bytes as unsigned (Buffer[i] in 0..255). Keeping both sides unsigned
29
66
  // ensures the in-memory ordering and this durable maxRev merge agree.
30
67
  auto result = 0;
31
- const auto end = std::min(endA, endB);
32
- while (indexA < end && indexB < end) {
68
+ while (indexA < endA && indexB < endB) {
33
69
  const unsigned char ac = static_cast<unsigned char>(a[indexA++]);
34
70
  const unsigned char bc = static_cast<unsigned char>(b[indexB++]);
35
71
 
@@ -52,7 +88,7 @@ int compareRev(const rocksdb::Slice& a, const rocksdb::Slice& b) {
52
88
  }
53
89
 
54
90
  // Compare the rest (unsigned, for the same reason as the loop above).
55
- while (indexA < end && indexB < end) {
91
+ while (indexA < endA && indexB < endB) {
56
92
  const unsigned char ac = static_cast<unsigned char>(a[indexA++]);
57
93
  const unsigned char bc = static_cast<unsigned char>(b[indexB++]);
58
94
  if (ac != bc) {
@@ -60,7 +96,7 @@ int compareRev(const rocksdb::Slice& a, const rocksdb::Slice& b) {
60
96
  }
61
97
  }
62
98
 
63
- return static_cast<int>(endA) - static_cast<int>(endB);
99
+ return static_cast<int>(lenA) - static_cast<int>(lenB);
64
100
  }
65
101
 
66
102
  class MaxRevOperator : public rocksdb::MergeOperator {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nxtedition/rocksdb",
3
- "version": "16.0.2",
3
+ "version": "16.0.9",
4
4
  "description": "A low-level Node.js RocksDB binding",
5
5
  "license": "MIT",
6
6
  "main": "index.js",
@@ -9,7 +9,8 @@
9
9
  "test": "standard && (nyc -s tape test/*-test.js | faucet) && nyc report",
10
10
  "test-prebuild": "cross-env PREBUILDS_ONLY=1 npm t",
11
11
  "prebuildify": "JOBS=8 prebuildify --napi --strip",
12
- "rebuild": "JOBS=8 npm run install --build-from-source"
12
+ "rebuild": "JOBS=8 npm run install --build-from-source",
13
+ "release": "./release.sh"
13
14
  },
14
15
  "dependencies": {
15
16
  "abstract-level": "^1.0.2",
package/release.sh ADDED
@@ -0,0 +1,69 @@
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ cd "$(dirname "$0")"
5
+
6
+ export DOCKER_HOST="${DOCKER_HOST:-ssh://nxtop@hq-test-srv1.nxt.io}"
7
+
8
+ # Fail fast: npm publish needs a valid login, so check before the slow builds.
9
+ if ! npm whoami --registry https://registry.npmjs.org > /dev/null 2>&1; then
10
+ echo "Not logged in to npm, run 'npm login' first." >&2
11
+ exit 1
12
+ fi
13
+
14
+ # Fail fast: npm version refuses a dirty tree, so check before the slow builds.
15
+ if [ -n "$(git status --porcelain)" ]; then
16
+ echo "Working tree is not clean, commit or stash changes first." >&2
17
+ exit 1
18
+ fi
19
+
20
+ # Fail fast: don't build/publish on a branch that's behind or diverged from origin.
21
+ BRANCH=$(git rev-parse --abbrev-ref HEAD)
22
+ echo "Fetching origin..."
23
+ git fetch origin "$BRANCH"
24
+
25
+ LOCAL=$(git rev-parse HEAD)
26
+ REMOTE=$(git rev-parse "origin/$BRANCH")
27
+ BASE=$(git merge-base HEAD "origin/$BRANCH")
28
+
29
+ if [ "$LOCAL" = "$REMOTE" ]; then
30
+ : # up to date
31
+ elif [ "$LOCAL" = "$BASE" ]; then
32
+ echo "Branch '$BRANCH' is behind origin, pull the latest changes first." >&2
33
+ exit 1
34
+ elif [ "$REMOTE" = "$BASE" ]; then
35
+ : # local is ahead, fine to push
36
+ else
37
+ echo "Branch '$BRANCH' has diverged from origin, reconcile before releasing." >&2
38
+ exit 1
39
+ fi
40
+
41
+ # Keep the local arm64 build targeting the same node version as the Docker image.
42
+ NODE_TARGET=$(sed -n 's/^FROM node:\([0-9.]*\).*/\1/p' Dockerfile)
43
+ if [ -z "$NODE_TARGET" ]; then
44
+ echo "Could not determine node version from Dockerfile." >&2
45
+ exit 1
46
+ fi
47
+
48
+ echo "Building linux prebuilds (docker)..."
49
+ ./build.sh
50
+
51
+ echo "Building darwin-arm64 prebuilds (node $NODE_TARGET)..."
52
+ JOBS=16 npx prebuildify -t "$NODE_TARGET" --napi --strip --arch arm64
53
+
54
+ read -r -p "Version bump (patch/minor/major): " BUMP
55
+ case "$BUMP" in
56
+ patch | minor | major) ;;
57
+ *)
58
+ echo "Invalid bump: '$BUMP' (expected patch, minor or major)" >&2
59
+ exit 1
60
+ ;;
61
+ esac
62
+
63
+ npm version "$BUMP"
64
+ npm publish
65
+
66
+ git push
67
+ git push --tags
68
+
69
+ echo "Published $(node -p "require('./package.json').version")."
package/util.h CHANGED
@@ -9,6 +9,7 @@
9
9
  #include <rocksdb/status.h>
10
10
 
11
11
  #include <array>
12
+ #include <memory>
12
13
  #include <optional>
13
14
  #include <string>
14
15
 
@@ -439,9 +440,18 @@ napi_status Convert(napi_env env,
439
440
  bool unsafe = false) {
440
441
  if (encoding == Encoding::Buffer) {
441
442
  if (unsafe) {
442
- auto s2 = new rocksdb::PinnableSlice(std::move(s));
443
- return napi_create_external_buffer(env, s2->size(), const_cast<char*>(s2->data()),
444
- Finalize<rocksdb::PinnableSlice>, s2, &result);
443
+ // The heap PinnableSlice is owned by the finalizer, which N-API only
444
+ // registers when the external buffer is created successfully. Hold it in a
445
+ // unique_ptr and release ownership only on success, so a failed
446
+ // napi_create_external_buffer does not leak it (and the block/memtable
447
+ // region it pinned).
448
+ auto s2 = std::make_unique<rocksdb::PinnableSlice>(std::move(s));
449
+ const auto status = napi_create_external_buffer(env, s2->size(), const_cast<char*>(s2->data()),
450
+ Finalize<rocksdb::PinnableSlice>, s2.get(), &result);
451
+ if (status == napi_ok) {
452
+ s2.release();
453
+ }
454
+ return status;
445
455
  } else {
446
456
  return napi_create_buffer_copy(env, s.size(), s.data(), nullptr, &result);
447
457
  }
package/util.js CHANGED
@@ -2,73 +2,3 @@
2
2
 
3
3
  exports.kRef = Symbol('ref')
4
4
  exports.kUnref = Symbol('unref')
5
-
6
- function handleMany (sizes, data, options) {
7
- const { valueEncoding } = options ?? {}
8
-
9
- data ??= Buffer.alloc(0)
10
- sizes ??= Buffer.alloc(0)
11
-
12
- const rows = []
13
- let offset = 0
14
- const sizes32 = new Int32Array(sizes.buffer, sizes.byteOffset, sizes.byteLength / 4)
15
- for (let n = 0; n < sizes32.length; n++) {
16
- const size = sizes32[n]
17
- const encoding = valueEncoding
18
- if (size < 0) {
19
- rows.push(undefined)
20
- } else {
21
- if (!encoding || encoding === 'buffer') {
22
- rows.push(data.subarray(offset, offset + size))
23
- } else if (encoding === 'slice') {
24
- rows.push({ buffer: data, byteOffset: offset, byteLength: size })
25
- } else {
26
- rows.push(data.toString(encoding, offset, offset + size))
27
- }
28
- offset += size
29
- if (offset & 0x7) {
30
- offset = (offset | 0x7) + 1
31
- }
32
- }
33
- }
34
-
35
- return rows
36
- }
37
- function handleNextv (err, sizes, buffer, finished, options, callback) {
38
- const { keyEncoding, valueEncoding } = options ?? {}
39
-
40
- if (err) {
41
- callback(err)
42
- } else {
43
- buffer ??= Buffer.alloc(0)
44
- sizes ??= Buffer.alloc(0)
45
-
46
- const rows = []
47
- let offset = 0
48
- const sizes32 = new Int32Array(sizes.buffer, sizes.byteOffset, sizes.byteLength / 4)
49
- for (let n = 0; n < sizes32.length; n++) {
50
- const size = sizes32[n]
51
- const encoding = n & 1 ? valueEncoding : keyEncoding
52
- if (size < 0) {
53
- rows.push(undefined)
54
- } else {
55
- if (!encoding || encoding === 'buffer') {
56
- rows.push(buffer.subarray(offset, offset + size))
57
- } else if (encoding === 'slice') {
58
- rows.push({ buffer, byteOffset: offset, byteLength: size })
59
- } else {
60
- rows.push(buffer.toString(encoding, offset, offset + size))
61
- }
62
- offset += size
63
- if (offset & 0x7) {
64
- offset = (offset | 0x7) + 1
65
- }
66
- }
67
- }
68
-
69
- callback(null, rows, finished)
70
- }
71
- }
72
-
73
- exports.handleMany = handleMany
74
- exports.handleNextv = handleNextv
@@ -0,0 +1,21 @@
1
+ 'use strict'
2
+
3
+ const binding = require('./binding')
4
+
5
+ const kWriteBufferManagerContext = Symbol('writeBufferManagerContext')
6
+
7
+ class RocksWriteBufferManager {
8
+ constructor (options = {}) {
9
+ this[kWriteBufferManagerContext] = binding.write_buffer_manager_init(options)
10
+ }
11
+
12
+ get handle () {
13
+ return binding.write_buffer_manager_get_handle(this[kWriteBufferManagerContext])
14
+ }
15
+
16
+ get usage () {
17
+ return binding.write_buffer_manager_get_usage(this[kWriteBufferManagerContext])
18
+ }
19
+ }
20
+
21
+ exports.RocksWriteBufferManager = RocksWriteBufferManager
@@ -1,35 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(npm pack:*)",
5
- "Bash(node:*)",
6
- "Bash(npm view:*)",
7
- "Bash(npm info:*)",
8
- "Bash(nm:*)",
9
- "Bash(grep:*)",
10
- "Bash(while read f)",
11
- "Bash(do basename \"$f\")",
12
- "Bash(done)",
13
- "Bash(ls:*)",
14
- "Bash(git submodule update:*)",
15
- "Bash(git submodule:*)",
16
- "Bash(yarn build:*)",
17
- "Bash(yarn rebuild:*)",
18
- "Bash(npx tape:*)",
19
- "Bash(echo \"=== In binding.gyp ===\" grep \"absl/base/internal\" binding.gyp echo \"\" echo \"=== Available \\(non-test\\) files ===\" ls deps/abseil-cpp/absl/base/internal/*.cc)",
20
- "Bash(echo \"=== Checking for abseil source requirements ===\" echo \"\" echo \"profiling sources:\" ls /Users/ronagy/GitHub/nxtedition/rocks-level/deps/abseil-cpp/absl/profiling/internal/*.cc)",
21
- "Bash(echo \"none found\" echo \"\" echo \"crc sources:\" ls /Users/ronagy/GitHub/nxtedition/rocks-level/deps/abseil-cpp/absl/crc/internal/*.cc)",
22
- "Bash(echo \"NONE INCLUDED!\" echo \"\" echo \"=== Available log sources ===\" ls /Users/ronagy/GitHub/nxtedition/rocks-level/deps/abseil-cpp/absl/log/*.cc)",
23
- "Bash(head -20 echo \"\" echo \"=== Log internal sources ===\" ls /Users/ronagy/GitHub/nxtedition/rocks-level/deps/abseil-cpp/absl/log/internal/*.cc)",
24
- "Bash(echo \"=== Required log sources \\(non-test\\) ===\" ls /Users/ronagy/GitHub/nxtedition/rocks-level/deps/abseil-cpp/absl/log/*.cc)",
25
- "Bash(CFLAGS=\"-g -O0\" CXXFLAGS=\"-g -O0\" npm run rebuild)",
26
- "Bash(node-gyp rebuild:*)",
27
- "Bash(npx node-gyp rebuild:*)",
28
- "Bash(git rm:*)",
29
- "WebFetch(domain:raw.githubusercontent.com)",
30
- "Bash(git log:*)",
31
- "WebFetch(domain:github.com)",
32
- "Bash(npm run rebuild:*)"
33
- ]
34
- }
35
- }